Merge branch 'similar_sort' into 'master'

modify See merge request !152

Merge branch 'similar_sort' into 'master'
modify See merge request !152
236b7237 · 段英荣 · 91891810 · d964c416 · 236b7237 · 236b7237
Commit 236b7237 authored Mar 11, 2019 by 段英荣
7 changed files
--- a/linucb/views/linucb.py
+++ b/linucb/views/linucb.py
@@ -59,9 +59,17 @@ class LinUCB:
            AaI_tmp = np.array(Aa_list)
            theta_tmp = np.array(theta_list)

-            art_max = tag_list[np.argmax(np.dot(xaT, theta_tmp) + cls.alpha * np.sqrt(np.dot(np.dot(xaT, AaI_tmp), xa)))]
+            top_tag_list_len = len(tag_list)/3
+            top_np_ind = np.argpartition(np.dot(xaT, theta_tmp) + cls.alpha * np.sqrt(np.dot(np.dot(xaT, AaI_tmp), xa)), -top_tag_list_len)[-top_tag_list_len:]

-            return [int(art_max)]
+            top_tag_list = list()
+            top_np_list = top_np_ind.tolist()
+            for tag_id in top_np_list:
+                top_tag_list.append(tag_id)
+
+            #art_max = tag_list[np.argmax(np.dot(xaT, theta_tmp) + cls.alpha * np.sqrt(np.dot(np.dot(xaT, AaI_tmp), xa)))]
+
+            return top_tag_list
        except:
            logging.error("catch exception,err_msg:%s" % traceback.format_exc())
            return []

--- a/search/utils/topic.py
+++ b/search/utils/topic.py
@@ -153,10 +153,6 @@ class TopicUtils(object):

                user_tag_list = result_dict["hits"][0]["_source"]["tag_list"]

-                # attention_user_id_term_list = cls.___get_should_term_list(attention_user_id_list,field_name="user_id")
-                # pick_user_id_term_list = cls.___get_should_term_list(pick_user_id_list,field_name="user_id")
-                # same_group_user_id_term_list = cls.___get_should_term_list(same_group_id_list,field_name="user_id")
-
            q = dict()
            q["query"] = dict()

@@ -244,7 +240,24 @@ class TopicUtils(object):
                            {"term": {"has_image":True}},
                            {"term": {"is_online": True}},
                            {"term": {"is_deleted": False}}
-                        ]
+                        ],
+                        "should": [
+                            {
+                                "bool":{
+                                    "must":[
+                                        {"term":{"has_image":True}},
+                                        {"term": {"has_video": False}}
+                                    ]
+                                }
+                            },{
+                                "bool":{
+                                    "must":{
+                                        "term":{"has_video":True}
+                                    }
+                                }
+                            }
+                        ],
+                        "minimum_should_match":1
                    }
                },
                "score_mode": "sum",

--- a/search/views/topic.py
+++ b/search/views/topic.py
@@ -130,92 +130,6 @@ def get_home_recommend_topic_ids(user_id,device_id,offset,size,query=None,query_
        logging.error("catch exception,err_msg:%s" % traceback.format_exc())
        return []

-"""
-def get_home_recommend_topic_ids(user_id,device_id,offset,size,query=None,query_type=TopicPageType.HOME_RECOMMEND):
-    try:
-        if query is None:
-            if user_id == -1:
-                redis_key = "physical:home_recommend" + ":user_id:" + str(user_id) + ":device_id:" + device_id + ":query_type:" + str(query_type)
-            else:
-                redis_key = "physical:home_recommend" + ":user_id:" + str(user_id) + ":query_type:" + str(query_type)
-        else:
-            if user_id == -1:
-                redis_key = "physical:home_query" + ":user_id:" + str(user_id) + ":device_id:" + device_id + ":query:" + str(query) + ":query_type:" + str(query_type)
-            else:
-                redis_key = "physical:home_query" + ":user_id:" + str(user_id) + ":query:" + str(query) + ":query_type:" + str(query_type)
-
-        redis_field_list = [b'last_offset_num', b'unread_topic_id']
-        for page_id in range(0,offset,size):
-            redis_field_list.append(str(page_id))
-        redis_field_val_list = redis_client.hmget(redis_key,redis_field_list)
-
-        last_offset_num = int(redis_field_val_list[0]) if redis_field_val_list[0] else -1
-        recommend_topic_ids = []
-
-        topic_id_dict = TopicUtils.get_recommend_topic_ids(user_id, offset, size*size,query,query_type=query_type)
-        have_read_group_id_set = set()
-        unread_topic_id_dict = dict()
-        have_read_topic_id_set = set()
-
-        if redis_field_val_list[1] and offset>0:
-            if (user_id>0 and offset==last_offset_num) or user_id==-1:
-                ori_unread_topic_id_dict = json.loads(redis_field_val_list[1])
-                if len(ori_unread_topic_id_dict) > 0:
-                    topic_id_dict.update(ori_unread_topic_id_dict)
-
-                for have_read_item in redis_field_val_list[2:]:
-                    if have_read_item:
-                        have_read_topic_id_set=have_read_topic_id_set.union(json.loads(have_read_item))
-
-        # 当前页小组数量
-        cur_page_group_num = 0
-        # 当前页用户数量
-        cur_page_user_num = 0
-
-        for topic_id in topic_id_dict:
-            if topic_id_dict[topic_id] in have_read_group_id_set:
-                unread_topic_id_dict[topic_id] = topic_id_dict[topic_id]
-            else:
-                if topic_id not in have_read_topic_id_set:
-                    if isinstance(topic_id_dict[topic_id],int) and topic_id_dict[topic_id]>0 and cur_page_group_num<(size*0.9):
-                        have_read_group_id_set.add(topic_id_dict[topic_id])
-                        have_read_topic_id_set.add(topic_id)
-                        cur_page_group_num += 1
-                        recommend_topic_ids.append(topic_id)
-                    elif topic_id_dict[topic_id] and cur_page_user_num<(size*0.1):
-                        cur_page_user_num += 1
-                        recommend_topic_ids.append(topic_id)
-                    else:
-                        unread_topic_id_dict[topic_id] = topic_id_dict[topic_id]
-
-                    if len(recommend_topic_ids) >= size:
-                        break
-
-
-        offi_unread_topic_id_dict = dict()
-        if len(recommend_topic_ids) < size and len(unread_topic_id_dict)>0:
-            for unread_topic_id in unread_topic_id_dict:
-                if len(recommend_topic_ids)<size:
-                    recommend_topic_ids.append(unread_topic_id)
-                else:
-                    offi_unread_topic_id_dict[unread_topic_id] = unread_topic_id_dict[unread_topic_id]
-
-        redis_dict = {
-            "unread_topic_id":json.dumps(offi_unread_topic_id_dict),
-            "last_offset_num":offset+size,
-            offset: json.dumps(recommend_topic_ids)
-        }
-        redis_client.hmset(redis_key,redis_dict)
-        # 每个session key保存15分钟
-        redis_client.expire(redis_key,15*60)
-
-        return recommend_topic_ids
-    except:
-        logging.error("catch exception,err_msg:%s" % traceback.format_exc())
-        return []
-
-"""
-
 @bind("physical/search/query_tag_id_by_topic")
 def query_tag_id_by_topic(offset=0,size=10,topic_id_list=[],user_id=-1):
    try:

--- a/trans2es/mapping/topic.json
+++ b/trans2es/mapping/topic.json
@@ -14,13 +14,13 @@
    "group_id":{"type":"long"}, //所在组ID
    "tag_list":{"type":"long"},//标签属性
    "edit_tag_list":{"type":"long"},//编辑标签
-
    "tag_name_list":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
    "share_num":{"type":"long"},
    "pick_id_list":{"type":"long"},
    "offline_score":{"type":"double"},//离线算分
    "manual_score":{"type":"double"},//人工赋分
    "has_image":{"type":"boolean"},//是否有图
+    "has_video":{"type":"boolean"},//是否是视频
    "create_time":{"type":"date", "format":"date_time_no_millis"},
    "update_time":{"type":"date", "format":"date_time_no_millis"},
    "create_time_val":{"type":"long"},

--- a/trans2es/models/topic.py
+++ b/trans2es/models/topic.py
@@ -54,6 +54,7 @@ class Topic(models.Model):
        Group, verbose_name=u"关联的小组", related_name=u"group_topics", null=True, blank=True, default=None,
        on_delete=models.CASCADE)
    user_id = models.IntegerField(verbose_name=u'用户ID')
+    has_video = models.IntegerField(verbose_name=u'是否是视频日记')
    drop_score = models.IntegerField(verbose_name=u'人工赋分', default=0)
    description = models.CharField(verbose_name=u'日记本描述', max_length=200)
    content = models.CharField(verbose_name=u'日记本内容', max_length=1000)

--- a/trans2es/type_info.py
+++ b/trans2es/type_info.py
@@ -185,43 +185,48 @@ class TypeInfo(object):
        )

    def insert_table_chunk(self, sub_index_name, table_chunk, es=None):
-
-        start_clock = time.clock()
-        start_time = time.time()
-
-        instance_list = list(table_chunk)
-
-        stage_1_time = time.time()
-
-        data_list = self.bulk_get_data(instance_list)
-
-        stage_2_time = time.time()
-
-        es_result = ESPerform.es_helpers_bulk(
-            es_cli=es,
-            data_list=data_list,
-            sub_index_name=sub_index_name,
-            auto_create_index=True
-        )
-
-        stage_3_time = time.time()
-        end_clock = time.clock()
-
-        return ('{datetime} {index_prefix} {type_name:10s} {pk_start:>15s} {pk_stop:>15s} {count:5d} '
-                '{stage_1_duration:6.3f} {stage_2_duration:6.3f} {stage_3_duration:6.3f} {clock_duration:6.3f} '
-                '{response}').format(
-            datetime=datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%f'),
-            index_prefix=sub_index_name,
-            type_name=self.name,
-            pk_start=repr(table_chunk.get_pk_start()),
-            pk_stop=repr(table_chunk.get_pk_stop()),
-            count=len(instance_list),
-            stage_1_duration=stage_1_time - start_time,
-            stage_2_duration=stage_2_time - stage_1_time,
-            stage_3_duration=stage_3_time - stage_2_time,
-            clock_duration=end_clock - start_clock,
-            response=es_result,
-        )
+        try:
+            start_clock = time.clock()
+            start_time = time.time()
+
+            instance_list = list(table_chunk)
+
+            stage_1_time = time.time()
+
+            data_list = self.bulk_get_data(instance_list)
+
+            stage_2_time = time.time()
+
+            es_result = ESPerform.es_helpers_bulk(
+                es_cli=es,
+                data_list=data_list,
+                sub_index_name=sub_index_name,
+                auto_create_index=True
+            )
+
+            logging.info("es_helpers_bulk,sub_index_name:%s,data_list len:%d" % (sub_index_name,len(data_list)))
+
+            stage_3_time = time.time()
+            end_clock = time.clock()
+
+            return ('{datetime} {index_prefix} {type_name:10s} {pk_start:>15s} {pk_stop:>15s} {count:5d} '
+                    '{stage_1_duration:6.3f} {stage_2_duration:6.3f} {stage_3_duration:6.3f} {clock_duration:6.3f} '
+                    '{response}').format(
+                datetime=datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%f'),
+                index_prefix=sub_index_name,
+                type_name=self.name,
+                pk_start=repr(table_chunk.get_pk_start()),
+                pk_stop=repr(table_chunk.get_pk_stop()),
+                count=len(instance_list),
+                stage_1_duration=stage_1_time - start_time,
+                stage_2_duration=stage_2_time - stage_1_time,
+                stage_3_duration=stage_3_time - stage_2_time,
+                clock_duration=end_clock - start_clock,
+                response=es_result,
+            )
+        except:
+            logging.error("catch exception,err_msg:%s" % traceback.format_exc())
+            return None


 _get_type_info_map_result = None

--- a/trans2es/utils/topic_transfer.py
+++ b/trans2es/utils/topic_transfer.py
@@ -57,6 +57,7 @@ class TopicTransfer(object):

            res["manual_score"] = instance.drop_score
            res["has_image"] = instance.topic_has_image()
+            res["has_video"] = instance.has_video
            res["language_type"] = instance.language_type

            # # 片假名