Commit 2d7a880c authored by 段英荣's avatar 段英荣

Merge branch 'similar_sort' into 'master'

Similar sort

See merge request !153
parents 236b7237 e51ef4f3
...@@ -5,6 +5,8 @@ from __future__ import unicode_literals, print_function, absolute_import ...@@ -5,6 +5,8 @@ from __future__ import unicode_literals, print_function, absolute_import
import six import six
import random import random
from django.db import models from django.db import models
import logging
import traceback
class ITableChunk(object): class ITableChunk(object):
...@@ -147,13 +149,25 @@ class TableSlicerChunk(ITableChunk): ...@@ -147,13 +149,25 @@ class TableSlicerChunk(ITableChunk):
class TableSlicer(object): class TableSlicer(object):
def __init__(self, queryset, chunk_size=None, chunk_count=None, sep_list=None): def __init__(self, queryset, chunk_size=None, chunk_count=None, sep_list=None):
try:
logging.info("duan add,before assert queryset")
assert isinstance(queryset, models.QuerySet) assert isinstance(queryset, models.QuerySet)
logging.info("duan add,before assert chunk_size")
assert chunk_size is None or isinstance(chunk_size, six.integer_types) assert chunk_size is None or isinstance(chunk_size, six.integer_types)
logging.info("duan add,before assert chunk_count")
assert chunk_count is None or isinstance(chunk_count, six.integer_types) assert chunk_count is None or isinstance(chunk_count, six.integer_types)
logging.info("duan add,before assert sep_list")
assert sep_list is None or isinstance(sep_list, list) assert sep_list is None or isinstance(sep_list, list)
logging.info("duan add,before assert chunk_size")
assert (chunk_size is not None) + (chunk_count is not None) + (sep_list is not None) == 1 assert (chunk_size is not None) + (chunk_count is not None) + (sep_list is not None) == 1
logging.info("duan add,after assert chunk_size")
logging.info("duan add,sep_list:%s" % str(sep_list))
if sep_list is not None: if sep_list is not None:
sep_list = list(sep_list) sep_list = list(sep_list)
else: else:
...@@ -166,17 +180,24 @@ class TableSlicer(object): ...@@ -166,17 +180,24 @@ class TableSlicer(object):
for index in index_list for index in index_list
] ]
logging.info("duan add,queryset count:%d" % count)
self._model = queryset.model self._model = queryset.model
self._query = queryset.query self._query = queryset.query
self._sep_list = [None] + sep_list + [None] self._sep_list = [None] + sep_list + [None]
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
def chunks(self): def chunks(self):
try:
reversed_sep_list = list(reversed(self._sep_list)) reversed_sep_list = list(reversed(self._sep_list))
logging.info("duan add,reversed_sep_list:%d" % (len(self._sep_list) - 1))
for i in range(len(self._sep_list) - 1): for i in range(len(self._sep_list) - 1):
pk_start = reversed_sep_list[i+1] pk_start = reversed_sep_list[i + 1]
pk_stop = reversed_sep_list[i] pk_stop = reversed_sep_list[i]
yield TableSlicerChunk(model=self._model, query=self._query, pk_start=pk_start, pk_stop=pk_stop) yield TableSlicerChunk(model=self._model, query=self._query, pk_start=pk_start, pk_stop=pk_stop)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
class TableStreamingSlicer(object): class TableStreamingSlicer(object):
......
...@@ -240,24 +240,24 @@ class TopicUtils(object): ...@@ -240,24 +240,24 @@ class TopicUtils(object):
{"term": {"has_image":True}}, {"term": {"has_image":True}},
{"term": {"is_online": True}}, {"term": {"is_online": True}},
{"term": {"is_deleted": False}} {"term": {"is_deleted": False}}
],
"should": [
{
"bool":{
"must":[
{"term":{"has_image":True}},
{"term": {"has_video": False}}
] ]
} # "should": [
},{ # {
"bool":{ # "bool":{
"must":{ # "must":[
"term":{"has_video":True} # {"term":{"has_image":True}},
} # {"term": {"has_video": False}}
} # ]
} # }
], # },{
"minimum_should_match":1 # "bool":{
# "must":{
# "term":{"has_video":True}
# }
# }
# }
# ],
# "minimum_should_match":1
} }
}, },
"score_mode": "sum", "score_mode": "sum",
......
...@@ -116,8 +116,10 @@ class Command(BaseCommand): ...@@ -116,8 +116,10 @@ class Command(BaseCommand):
type_info = get_type_info_map()[type_name] type_info = get_type_info_map()[type_name]
query_set = type_info.queryset query_set = type_info.queryset
logging.info("before TableSlicer")
slicer = TableSlicer(queryset=query_set, chunk_size=type_info.bulk_insert_chunk_size) slicer = TableSlicer(queryset=query_set, chunk_size=type_info.bulk_insert_chunk_size)
for chunk in slicer.chunks(): for chunk in slicer.chunks():
logging.info("in chunks....")
job = Job( job = Job(
sub_index_name=type_name, sub_index_name=type_name,
type_name=type_name, type_name=type_name,
......
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import unicode_literals, absolute_import, print_function
from django.conf import settings from django.conf import settings
from django.core.management.base import BaseCommand, CommandError from django.core.management.base import BaseCommand, CommandError
...@@ -54,7 +55,7 @@ class Topic(models.Model): ...@@ -54,7 +55,7 @@ class Topic(models.Model):
Group, verbose_name=u"关联的小组", related_name=u"group_topics", null=True, blank=True, default=None, Group, verbose_name=u"关联的小组", related_name=u"group_topics", null=True, blank=True, default=None,
on_delete=models.CASCADE) on_delete=models.CASCADE)
user_id = models.IntegerField(verbose_name=u'用户ID') user_id = models.IntegerField(verbose_name=u'用户ID')
has_video = models.IntegerField(verbose_name=u'是否是视频日记') has_video = models.BooleanField(verbose_name=u'是否是视频日记')
drop_score = models.IntegerField(verbose_name=u'人工赋分', default=0) drop_score = models.IntegerField(verbose_name=u'人工赋分', default=0)
description = models.CharField(verbose_name=u'日记本描述', max_length=200) description = models.CharField(verbose_name=u'日记本描述', max_length=200)
content = models.CharField(verbose_name=u'日记本内容', max_length=1000) content = models.CharField(verbose_name=u'日记本内容', max_length=1000)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment