Commit 2d7a880c authored by 段英荣's avatar 段英荣

Merge branch 'similar_sort' into 'master'

Similar sort

See merge request !153
parents 236b7237 e51ef4f3
......@@ -5,6 +5,8 @@ from __future__ import unicode_literals, print_function, absolute_import
import six
import random
from django.db import models
import logging
import traceback
class ITableChunk(object):
......@@ -147,36 +149,55 @@ class TableSlicerChunk(ITableChunk):
class TableSlicer(object):
def __init__(self, queryset, chunk_size=None, chunk_count=None, sep_list=None):
assert isinstance(queryset, models.QuerySet)
assert chunk_size is None or isinstance(chunk_size, six.integer_types)
assert chunk_count is None or isinstance(chunk_count, six.integer_types)
assert sep_list is None or isinstance(sep_list, list)
try:
logging.info("duan add,before assert queryset")
assert isinstance(queryset, models.QuerySet)
assert (chunk_size is not None) + (chunk_count is not None) + (sep_list is not None) == 1
logging.info("duan add,before assert chunk_size")
assert chunk_size is None or isinstance(chunk_size, six.integer_types)
if sep_list is not None:
sep_list = list(sep_list)
else:
count = queryset.count()
if chunk_size is None:
chunk_size = count / chunk_count
index_list = list(range(0, count, chunk_size))
sep_list = [
queryset.order_by('pk').values_list('pk', flat=True)[index]
for index in index_list
]
logging.info("duan add,before assert chunk_count")
assert chunk_count is None or isinstance(chunk_count, six.integer_types)
self._model = queryset.model
self._query = queryset.query
self._sep_list = [None] + sep_list + [None]
logging.info("duan add,before assert sep_list")
assert sep_list is None or isinstance(sep_list, list)
logging.info("duan add,before assert chunk_size")
assert (chunk_size is not None) + (chunk_count is not None) + (sep_list is not None) == 1
logging.info("duan add,after assert chunk_size")
logging.info("duan add,sep_list:%s" % str(sep_list))
if sep_list is not None:
sep_list = list(sep_list)
else:
count = queryset.count()
if chunk_size is None:
chunk_size = count / chunk_count
index_list = list(range(0, count, chunk_size))
sep_list = [
queryset.order_by('pk').values_list('pk', flat=True)[index]
for index in index_list
]
logging.info("duan add,queryset count:%d" % count)
self._model = queryset.model
self._query = queryset.query
self._sep_list = [None] + sep_list + [None]
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
def chunks(self):
reversed_sep_list = list(reversed(self._sep_list))
for i in range(len(self._sep_list) - 1):
pk_start = reversed_sep_list[i+1]
pk_stop = reversed_sep_list[i]
yield TableSlicerChunk(model=self._model, query=self._query, pk_start=pk_start, pk_stop=pk_stop)
try:
reversed_sep_list = list(reversed(self._sep_list))
logging.info("duan add,reversed_sep_list:%d" % (len(self._sep_list) - 1))
for i in range(len(self._sep_list) - 1):
pk_start = reversed_sep_list[i + 1]
pk_stop = reversed_sep_list[i]
yield TableSlicerChunk(model=self._model, query=self._query, pk_start=pk_start, pk_stop=pk_stop)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
class TableStreamingSlicer(object):
......
......@@ -240,24 +240,24 @@ class TopicUtils(object):
{"term": {"has_image":True}},
{"term": {"is_online": True}},
{"term": {"is_deleted": False}}
],
"should": [
{
"bool":{
"must":[
{"term":{"has_image":True}},
{"term": {"has_video": False}}
]
}
},{
"bool":{
"must":{
"term":{"has_video":True}
}
}
}
],
"minimum_should_match":1
]
# "should": [
# {
# "bool":{
# "must":[
# {"term":{"has_image":True}},
# {"term": {"has_video": False}}
# ]
# }
# },{
# "bool":{
# "must":{
# "term":{"has_video":True}
# }
# }
# }
# ],
# "minimum_should_match":1
}
},
"score_mode": "sum",
......
......@@ -116,8 +116,10 @@ class Command(BaseCommand):
type_info = get_type_info_map()[type_name]
query_set = type_info.queryset
logging.info("before TableSlicer")
slicer = TableSlicer(queryset=query_set, chunk_size=type_info.bulk_insert_chunk_size)
for chunk in slicer.chunks():
logging.info("in chunks....")
job = Job(
sub_index_name=type_name,
type_name=type_name,
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, absolute_import, print_function
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
......@@ -54,7 +55,7 @@ class Topic(models.Model):
Group, verbose_name=u"关联的小组", related_name=u"group_topics", null=True, blank=True, default=None,
on_delete=models.CASCADE)
user_id = models.IntegerField(verbose_name=u'用户ID')
has_video = models.IntegerField(verbose_name=u'是否是视频日记')
has_video = models.BooleanField(verbose_name=u'是否是视频日记')
drop_score = models.IntegerField(verbose_name=u'人工赋分', default=0)
description = models.CharField(verbose_name=u'日记本描述', max_length=200)
content = models.CharField(verbose_name=u'日记本内容', max_length=1000)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment