Merge branch 'gyz' into 'master'

lin新回答保量，lin旧回答排序(30天内的ctr+点赞率) See merge request !498

Merge branch 'gyz' into 'master'
lin新回答保量，lin旧回答排序(30天内的ctr+点赞率) See merge request !498
221dce25 · 段英荣 · 564bccf4 · e57109d6 · 221dce25 · 221dce25
Commit 221dce25 authored Sep 26, 2019 by 段英荣
Hide whitespace changes
Inline Side-by-side

Showing with 159 additions and 27 deletions

es.py libs/es.py +69 -0

topic.py search/views/topic.py +48 -27

trans2es_data2es_parallel.py trans2es/management/commands/trans2es_data2es_parallel.py +42 -0

No files found.
--- a/libs/es.py
+++ b/libs/es.py
@@ -401,6 +401,7 @@ class ESPerform(object):
                                    {"term": {"content_level": 6}},
                                    {"term": {"is_online": True}},
                                    {"term": {"is_deleted": False}},
+                                    {"term": {"is_new_topic": False}},
                                    {"terms": {"tag_list": tag_id}}
                                ]
                            }
@@ -414,6 +415,15 @@ class ESPerform(object):
                    "include": ["id", "user_id", "latest_reply_time"]
                },
                "sort": [
+                    {
+                        "_script": {
+                            "order": "desc",
+                            "script": {
+                                "inline": "doc['topic_ctr_30'].value+doc['like_rate_30'].value"
+                            },
+                            "type": "number"
+                        }
+                    },
                    {"latest_reply_time": {"order": "desc"}},
                    {"create_time_val": {"order": "desc"}},
                    {"language_type": {"order": "asc"}},
@@ -450,6 +460,65 @@ class ESPerform(object):
            logging.info("linucb_tag_id_list_2_same_tagset_ids_2_topics_detail:" + str(topic_id_dict_latest_reply_time))
            return topic_id_list, topic_id_dict
+        except:
+            logging.error("catch exception,err_msg:%s" % traceback.format_exc())
+            return list(), dict()
+    @classmethod
+    def get_tag_new_topic_list(cls, tag_id, have_read_topic_id_list, size=10):
+        try:
+            functions_list = list()
+            for id in tag_id:
+                functions_list.append(
+                    {
+                        "filter": {"term": {"tag_list": id}},
+                        "weight": 1
+                    }
+                )
+            q = {
+                "query": {
+                    "function_score": {
+                        "query": {
+                            "bool": {
+                                "must": [
+                                    {"term": {"content_level": 6}},
+                                    {"term": {"is_online": True}},
+                                    {"term": {"is_deleted": False}},
+                                    {"term": {"is_new_topic": True}},
+                                    {"terms": {"tag_list": tag_id}}
+                                ]
+                            }
+                        },
+                        "boost_mode": "sum",
+                        "score_mode": "sum",
+                        "functions": functions_list
+                    }
+                },
+                "_source": {
+                    "include": ["id", "user_id"]
+                },
+                "sort": [
+                    {"latest_reply_time": {"order": "desc"}},
+                    {"create_time_val": {"order": "desc"}},
+                    {"language_type": {"order": "asc"}},
+                ],
+                "collapse": {
+                    "field": "user_id"
+                }
+            }
+            if len(have_read_topic_id_list) > 0:
+                q["query"]["function_score"]["query"]["bool"]["must_not"] = {
+                    "terms": {
+                        "id": have_read_topic_id_list
+                    }
+                }
+            result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic-high-star",
+                                                       query_body=q,
+                                                       offset=0, size=size, routing="6")
+            topic_id_list = [item["_source"]["id"] for item in result_dict["hits"]]
+            return topic_id_list
        except:
            logging.error("catch exception,err_msg:%s" % traceback.format_exc())
            return list()

--- a/search/views/topic.py
+++ b/search/views/topic.py
@@ -8,7 +8,6 @@ import traceback
 import json
 from search.utils.topic import TopicUtils
 from trans2es.models.topic import TopicHomeRecommend
-from libs.es import ESPerform
 from libs.cache import redis_client
 from search.utils.common import *
 from libs.es import ESPerform
@@ -17,7 +16,6 @@ from libs.tools import get_have_read_lin_pictorial_id_list
 import datetime
 from trans2es.models.tag import Tag
 from search.views.tag import get_same_tagset_ids
-from linucb.views.collect_data import CollectData
 from linucb.views.linucb import LinUCB
 from alpha_types.physical.enum import STRATEGY_TYPE
@@ -55,7 +53,7 @@ def get_discover_page_topic_ids(user_id, device_id, size, query_type=TopicPageTy
        return []
-def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query=None,
+def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, underexposure_lin_topic_count, size, query=None,
                                 query_type=TopicPageType.HOME_RECOMMEND, promote_topic_list=[], disable_collpase=False,
                                 usefulrecall=-1, useful_tag_list=[], has_score=False, gray_list=[]):
    try:
@@ -110,6 +108,7 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
        recommend_topic_user_list = list()
        attention_tag_list = list()
        recommend_topic_list = list()
+        underexposure_lin_topic_ids = list()
        if query is None:
            if user_id != -1:
                # 有用标签召回
@@ -125,11 +124,29 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
                    have_read_topic_id_list.extend(useful_topic_id_list)
                # linucb 推荐帖子
+                linucb_recommend_tags = list()
                if STRATEGY_TYPE.CTR_GRAY in gray_list:
                    topic_recommend_redis_key = "ctr_physical:linucb:topic_recommend:device_id:" + str(device_id)
+                    linucb_recommend_tags_key = "ctr_physical:linucb:tag_recommend:device_id:" + str(device_id)
+                    linucb_recommend_tags = redis_client.get(linucb_recommend_tags_key)
+                    if linucb_recommend_tags:
+                        linucb_recommend_tags = json.loads(linucb_recommend_tags)
                else:
                    topic_recommend_redis_key = "physical:linucb:topic_recommend:device_id:" + str(device_id)
+                    linucb_recommend_tags_key = "physical:linucb:tag_recommend:device_id:" + str(device_id)
+                    linucb_recommend_tags = redis_client.get(linucb_recommend_tags_key)
+                    if linucb_recommend_tags:
+                        linucb_recommend_tags = json.loads(linucb_recommend_tags)
+                # linucb 推荐新帖子
+                if linucb_recommend_tags:
+                    linucb_recommend_tags_set_tags = get_same_tagset_ids(linucb_recommend_tags)
+                    underexposure_lin_topic_ids = ESPerform.get_tag_new_topic_list(linucb_recommend_tags_set_tags, have_read_topic_id_list, underexposure_lin_topic_count)
+                    size = size - len(underexposure_lin_topic_ids)
+                    have_read_topic_id_list.extend(underexposure_lin_topic_ids)
+                    redis_client.publish("new_topic_impression", json.dumps(underexposure_lin_topic_ids))
+                # linucb 推荐老帖子
                recommend_topic_dict = redis_client.hgetall(topic_recommend_redis_key)
                linucb_recommend_topic_id_list = list()
                recommend_topic_list = list()
@@ -236,26 +253,26 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
        ret_list = rank_topic_id_list if query is None else ret_data_list
        if usefulrecall != -1:
            if has_score:
-                return recommend_topic_list, ret_list, useful_topic_id_list, rank_topic_score
+                return underexposure_lin_topic_ids, recommend_topic_list, ret_list, useful_topic_id_list, rank_topic_score
            else:
-                return recommend_topic_list, ret_list, useful_topic_id_list
+                return underexposure_lin_topic_ids, recommend_topic_list, ret_list, useful_topic_id_list
        else:
            if has_score:
-                return recommend_topic_list, ret_list, rank_topic_score
+                return underexposure_lin_topic_ids, recommend_topic_list, ret_list, rank_topic_score
            else:
-                return recommend_topic_list, ret_list
+                return underexposure_lin_topic_ids, recommend_topic_list, ret_list
    except:
        logging.error("catch exception,err_msg:%s" % traceback.format_exc())
        if usefulrecall != -1:
            if has_score:
-                return [], [], [], []
+                return [], [], [], [], []
            else:
-                return [], [], []
+                return [], [], [], []
        else:
            if has_score:
-                return [], [], []
+                return [], [], [], []
            else:
-                return [], []
+                return [], [], []
 def get_home_recommend_pictorial_ids(user_id=-1, device_id="", size=4, gray_list=[]):
@@ -384,9 +401,8 @@ def query_tag_id_by_topic(offset=0, size=10, topic_id_list=[], user_id=-1):
 @bind("physical/search/home_recommend")
-def home_recommend(device_id="", user_id=-1, offset=0, lin_pictorial_count=4, size=10,
+def home_recommend(device_id="", user_id=-1, offset=0, underexposure_lin_topic_count =4, lin_pictorial_count=4, size=10, query_type=TopicPageType.HOME_RECOMMEND,
-                   query_type=TopicPageType.HOME_RECOMMEND,
+                   promote_topic_list=[], usefulrecall=-1, useful_tag_list=[], is_debug=False,gray_list=[]):
-                   promote_topic_list=[], usefulrecall=-1, useful_tag_list=[], is_debug=False, gray_list=[]):
    """
    :remark:首页推荐，目前只推荐日记
    :param session_id:
@@ -432,9 +448,10 @@ def home_recommend(device_id="", user_id=-1, offset=0, lin_pictorial_count=4, si
                size = size - len(recommend_pictorial_ids)
                if is_debug:
                    has_score = True
-                    recommend_topic_ids, rank_topic_ids, useful_topic_ids, rank_topic_score = get_home_recommend_topic_ids(
+                    underexposure_lin_topic_ids, recommend_topic_ids, rank_topic_ids, useful_topic_ids, rank_topic_score = get_home_recommend_topic_ids(
                        user_id, device_id,
                        tag_id=0, offset=0,
+                        underexposure_lin_topic_count=underexposure_lin_topic_count,
                        size=size,
                        query_type=query_type,
                        promote_topic_list=promote_topic_list,
@@ -443,10 +460,11 @@ def home_recommend(device_id="", user_id=-1, offset=0, lin_pictorial_count=4, si
                else:
                    has_score = False
-                    recommend_topic_ids, rank_topic_ids, useful_topic_ids = get_home_recommend_topic_ids(user_id,
+                    underexposure_lin_topic_ids, recommend_topic_ids, rank_topic_ids, useful_topic_ids = get_home_recommend_topic_ids(user_id,
                                                                                                         device_id,
                                                                                                         tag_id=0,
                                                                                                         offset=0,
+                                                                                                         underexposure_lin_topic_count=underexposure_lin_topic_count,
                                                                                                         size=size,
                                                                                                         query_type=query_type,
                                                                                                         promote_topic_list=promote_topic_list,
@@ -455,12 +473,12 @@ def home_recommend(device_id="", user_id=-1, offset=0, lin_pictorial_count=4, si
                                                                                                         has_score=has_score,
                                                                                                         gray_list=gray_list)
                if not is_debug:
-                    return {"linucb_topic_ids": recommend_topic_ids, "rank_topic_ids": rank_topic_ids,
+                    return {"underexposure_lin_topic_ids": underexposure_lin_topic_ids, "linucb_topic_ids": recommend_topic_ids, "rank_topic_ids": rank_topic_ids,
                            "useful_topic_ids": useful_topic_ids, "linucb_pictorial_ids": recommend_pictorial_ids}
                else:
                    results = get_topic_and_pictorial_detail_for_debug(device_id, recommend_topic_ids, rank_topic_ids,
                                                                       recommend_pictorial_ids, rank_topic_score)
-                    return {"linucb_topic_ids": recommend_topic_ids, "rank_topic_ids": rank_topic_ids,
+                    return {"underexposure_lin_topic_ids": underexposure_lin_topic_ids, "linucb_topic_ids": recommend_topic_ids, "rank_topic_ids": rank_topic_ids,
                            "useful_topic_ids": useful_topic_ids, "linucb_pictorial_ids": recommend_pictorial_ids,
                            "debug_model_data": results}
@@ -470,10 +488,11 @@ def home_recommend(device_id="", user_id=-1, offset=0, lin_pictorial_count=4, si
                size = size - len(recommend_pictorial_ids)
                if is_debug:
                    has_score = True
-                    recommend_topic_ids, rank_topic_ids, rank_topic_score = get_home_recommend_topic_ids(user_id,
+                    underexposure_lin_topic_ids, recommend_topic_ids, rank_topic_ids, rank_topic_score = get_home_recommend_topic_ids(user_id,
                                                                                                         device_id,
                                                                                                         tag_id=0,
                                                                                                         offset=0,
+                                                                                                         underexposure_lin_topic_count=underexposure_lin_topic_count,
                                                                                                         size=size,
                                                                                                         query_type=query_type,
                                                                                                         promote_topic_list=promote_topic_list,
@@ -482,28 +501,30 @@ def home_recommend(device_id="", user_id=-1, offset=0, lin_pictorial_count=4, si
                else:
                    has_score = False
-                    recommend_topic_ids, rank_topic_ids = get_home_recommend_topic_ids(user_id, device_id, tag_id=0,
+                    underexposure_lin_topic_ids, recommend_topic_ids, rank_topic_ids = get_home_recommend_topic_ids(user_id, device_id, tag_id=0,
-                                                                                       offset=0, size=size,
+                                                                                       offset=0,
+                                                                                       underexposure_lin_topic_count=underexposure_lin_topic_count,
+                                                                                       size=size,
                                                                                       query_type=query_type,
                                                                                       promote_topic_list=promote_topic_list,
                                                                                       has_score=has_score,
                                                                                       gray_list=gray_list)
                if not is_debug:
-                    return {"linucb_topic_ids": recommend_topic_ids, "rank_topic_ids": rank_topic_ids,
+                    return {"underexposure_lin_topic_ids": underexposure_lin_topic_ids, "linucb_topic_ids": recommend_topic_ids, "rank_topic_ids": rank_topic_ids,
                            "linucb_pictorial_ids": recommend_pictorial_ids}
                else:
                    results = get_topic_and_pictorial_detail_for_debug(device_id, recommend_topic_ids, rank_topic_ids,
                                                                       recommend_pictorial_ids, rank_topic_score)
-                    return {"linucb_topic_ids": recommend_topic_ids, "rank_topic_ids": rank_topic_ids,
+                    return {"underexposure_lin_topic_ids": underexposure_lin_topic_ids, "linucb_topic_ids": recommend_topic_ids, "rank_topic_ids": rank_topic_ids,
                            "linucb_pictorial_ids": recommend_pictorial_ids, "debug_model_data": results}
    except:
        logging.error("catch exception,err_msg:%s" % traceback.format_exc())
        if usefulrecall != -1:
-            return {"linucb_topic_ids": [], "rank_topic_ids": [], "useful_topic_ids": [], "linucb_pictorial_ids": [],
+            return {"underexposure_lin_topic_ids": [], "linucb_topic_ids": [], "rank_topic_ids": [], "useful_topic_ids": [],"linucb_pictorial_ids": [], "debug_model_data": {}}
-                    "debug_model_data": {}}
        else:
-            return {"linucb_topic_ids": [], "rank_topic_ids": [], "linucb_pictorial_ids": [], "debug_model_data": {}}
+            return {"underexposure_lin_topic_ids": [], "linucb_topic_ids": [], "rank_topic_ids": [], "linucb_pictorial_ids": [],"debug_model_data": {}}
 @bind("physical/search/discover_page")

--- a/trans2es/management/commands/trans2es_data2es_parallel.py
+++ b/trans2es/management/commands/trans2es_data2es_parallel.py
@@ -28,10 +28,13 @@ from trans2es.models.topic import Topic,ActionSumAboutTopic
 from search.utils.common import *
 from linucb.views.collect_data import CollectData
 from injection.data_sync.tasks import sync_user_similar_score
+import datetime
 from trans2es.models.tag import Tag
 from libs.cache import redis_client
 from trans2es.models.tag import TopicTag
+from libs.error import logging_exception
+from trans2es.models.portrait_stat import LikeDeviceTagStat
@@ -215,6 +218,42 @@ class Command(BaseCommand):
            logging.error("catch exception,err_msg:%s" % traceback.format_exc())
+    def sub_redis_new_topic_ids(self):
+        try:
+            ps = redis_client.pubsub()
+            ps.subscribe("new_topic_impression")
+            all_new_topic_impression_count_key = "all_new_topic_impression_count_key"
+            for item in ps.listen():
+                if item['type'] == 'message':
+                    new_topic_ids = json.loads(item["data"])
+                    all_new_topic_impression_count = json.loads(redis_client.get(all_new_topic_impression_count_key))
+                    insert_topic_ids = []
+                    for topic in new_topic_ids:
+                        topic = str(topic)
+                        if topic in all_new_topic_impression_count:
+                            all_new_topic_impression_count[topic] = all_new_topic_impression_count[topic] + 1
+                            if all_new_topic_impression_count[topic] > 100:
+                                insert_topic_ids.append(int(topic))
+                                all_new_topic_impression_count.pop(topic)
+                        else:
+                            all_new_topic_impression_count[topic] = 1
+                    if insert_topic_ids:
+                        insert_list = []
+                        for topic in insert_topic_ids:
+                            insert_list.append(
+                                LikeDeviceTagStat(create_time=datetime.datetime.today(),
+                                                  update_time=datetime.datetime.today(),
+                                                  topic_id=topic, is_new_topic=0, topic_ctr_30=0.0, like_rate_30=0.0))
+                        LikeDeviceTagStat.objects.using(settings.MASTER_DB_NAME).bulk_create(insert_list)
+                        logging.info("impressions count gt 100 topic ids" + str(insert_topic_ids))
+                    json_all_new_topic_impression_count = json.dumps(all_new_topic_impression_count)
+                    logging.info("all_new_topic_impression_count" + str(all_new_topic_impression_count))
+                    redis_client.set(all_new_topic_impression_count_key, json_all_new_topic_impression_count)
+        except:
+            logging_exception()
+            logging.error("redis new topic sub error!")
    def handle(self, *args, **options):
        try:
            type_name_list = get_type_info_map().keys()
@@ -241,5 +280,8 @@ class Command(BaseCommand):
            if len(options["sync_type"]) and options["sync_type"]=="tagname":
                self.sync_tag_collecction_name_set()
+            if len(options["sync_type"]) and options["sync_type"] == "new_topic_sub":
+                self.sub_redis_new_topic_ids()
        except:
            logging.error("catch exception,err_msg:%s" % traceback.format_exc())