Commit 221dce25 authored by 段英荣's avatar 段英荣

Merge branch 'gyz' into 'master'

lin新回答保量,lin旧回答排序(30天内的ctr+点赞率)

See merge request !498
parents 564bccf4 e57109d6
...@@ -401,6 +401,7 @@ class ESPerform(object): ...@@ -401,6 +401,7 @@ class ESPerform(object):
{"term": {"content_level": 6}}, {"term": {"content_level": 6}},
{"term": {"is_online": True}}, {"term": {"is_online": True}},
{"term": {"is_deleted": False}}, {"term": {"is_deleted": False}},
{"term": {"is_new_topic": False}},
{"terms": {"tag_list": tag_id}} {"terms": {"tag_list": tag_id}}
] ]
} }
...@@ -414,6 +415,15 @@ class ESPerform(object): ...@@ -414,6 +415,15 @@ class ESPerform(object):
"include": ["id", "user_id", "latest_reply_time"] "include": ["id", "user_id", "latest_reply_time"]
}, },
"sort": [ "sort": [
{
"_script": {
"order": "desc",
"script": {
"inline": "doc['topic_ctr_30'].value+doc['like_rate_30'].value"
},
"type": "number"
}
},
{"latest_reply_time": {"order": "desc"}}, {"latest_reply_time": {"order": "desc"}},
{"create_time_val": {"order": "desc"}}, {"create_time_val": {"order": "desc"}},
{"language_type": {"order": "asc"}}, {"language_type": {"order": "asc"}},
...@@ -450,6 +460,65 @@ class ESPerform(object): ...@@ -450,6 +460,65 @@ class ESPerform(object):
logging.info("linucb_tag_id_list_2_same_tagset_ids_2_topics_detail:" + str(topic_id_dict_latest_reply_time)) logging.info("linucb_tag_id_list_2_same_tagset_ids_2_topics_detail:" + str(topic_id_dict_latest_reply_time))
return topic_id_list, topic_id_dict return topic_id_list, topic_id_dict
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list(), dict()
@classmethod
def get_tag_new_topic_list(cls, tag_id, have_read_topic_id_list, size=10):
try:
functions_list = list()
for id in tag_id:
functions_list.append(
{
"filter": {"term": {"tag_list": id}},
"weight": 1
}
)
q = {
"query": {
"function_score": {
"query": {
"bool": {
"must": [
{"term": {"content_level": 6}},
{"term": {"is_online": True}},
{"term": {"is_deleted": False}},
{"term": {"is_new_topic": True}},
{"terms": {"tag_list": tag_id}}
]
}
},
"boost_mode": "sum",
"score_mode": "sum",
"functions": functions_list
}
},
"_source": {
"include": ["id", "user_id"]
},
"sort": [
{"latest_reply_time": {"order": "desc"}},
{"create_time_val": {"order": "desc"}},
{"language_type": {"order": "asc"}},
],
"collapse": {
"field": "user_id"
}
}
if len(have_read_topic_id_list) > 0:
q["query"]["function_score"]["query"]["bool"]["must_not"] = {
"terms": {
"id": have_read_topic_id_list
}
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic-high-star",
query_body=q,
offset=0, size=size, routing="6")
topic_id_list = [item["_source"]["id"] for item in result_dict["hits"]]
return topic_id_list
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list() return list()
......
...@@ -8,7 +8,6 @@ import traceback ...@@ -8,7 +8,6 @@ import traceback
import json import json
from search.utils.topic import TopicUtils from search.utils.topic import TopicUtils
from trans2es.models.topic import TopicHomeRecommend from trans2es.models.topic import TopicHomeRecommend
from libs.es import ESPerform
from libs.cache import redis_client from libs.cache import redis_client
from search.utils.common import * from search.utils.common import *
from libs.es import ESPerform from libs.es import ESPerform
...@@ -17,7 +16,6 @@ from libs.tools import get_have_read_lin_pictorial_id_list ...@@ -17,7 +16,6 @@ from libs.tools import get_have_read_lin_pictorial_id_list
import datetime import datetime
from trans2es.models.tag import Tag from trans2es.models.tag import Tag
from search.views.tag import get_same_tagset_ids from search.views.tag import get_same_tagset_ids
from linucb.views.collect_data import CollectData
from linucb.views.linucb import LinUCB from linucb.views.linucb import LinUCB
from alpha_types.physical.enum import STRATEGY_TYPE from alpha_types.physical.enum import STRATEGY_TYPE
...@@ -55,7 +53,7 @@ def get_discover_page_topic_ids(user_id, device_id, size, query_type=TopicPageTy ...@@ -55,7 +53,7 @@ def get_discover_page_topic_ids(user_id, device_id, size, query_type=TopicPageTy
return [] return []
def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query=None, def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, underexposure_lin_topic_count, size, query=None,
query_type=TopicPageType.HOME_RECOMMEND, promote_topic_list=[], disable_collpase=False, query_type=TopicPageType.HOME_RECOMMEND, promote_topic_list=[], disable_collpase=False,
usefulrecall=-1, useful_tag_list=[], has_score=False, gray_list=[]): usefulrecall=-1, useful_tag_list=[], has_score=False, gray_list=[]):
try: try:
...@@ -110,6 +108,7 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query ...@@ -110,6 +108,7 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
recommend_topic_user_list = list() recommend_topic_user_list = list()
attention_tag_list = list() attention_tag_list = list()
recommend_topic_list = list() recommend_topic_list = list()
underexposure_lin_topic_ids = list()
if query is None: if query is None:
if user_id != -1: if user_id != -1:
# 有用标签召回 # 有用标签召回
...@@ -125,11 +124,29 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query ...@@ -125,11 +124,29 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
have_read_topic_id_list.extend(useful_topic_id_list) have_read_topic_id_list.extend(useful_topic_id_list)
# linucb 推荐帖子 # linucb 推荐帖子
linucb_recommend_tags = list()
if STRATEGY_TYPE.CTR_GRAY in gray_list: if STRATEGY_TYPE.CTR_GRAY in gray_list:
topic_recommend_redis_key = "ctr_physical:linucb:topic_recommend:device_id:" + str(device_id) topic_recommend_redis_key = "ctr_physical:linucb:topic_recommend:device_id:" + str(device_id)
linucb_recommend_tags_key = "ctr_physical:linucb:tag_recommend:device_id:" + str(device_id)
linucb_recommend_tags = redis_client.get(linucb_recommend_tags_key)
if linucb_recommend_tags:
linucb_recommend_tags = json.loads(linucb_recommend_tags)
else: else:
topic_recommend_redis_key = "physical:linucb:topic_recommend:device_id:" + str(device_id) topic_recommend_redis_key = "physical:linucb:topic_recommend:device_id:" + str(device_id)
linucb_recommend_tags_key = "physical:linucb:tag_recommend:device_id:" + str(device_id)
linucb_recommend_tags = redis_client.get(linucb_recommend_tags_key)
if linucb_recommend_tags:
linucb_recommend_tags = json.loads(linucb_recommend_tags)
# linucb 推荐新帖子
if linucb_recommend_tags:
linucb_recommend_tags_set_tags = get_same_tagset_ids(linucb_recommend_tags)
underexposure_lin_topic_ids = ESPerform.get_tag_new_topic_list(linucb_recommend_tags_set_tags, have_read_topic_id_list, underexposure_lin_topic_count)
size = size - len(underexposure_lin_topic_ids)
have_read_topic_id_list.extend(underexposure_lin_topic_ids)
redis_client.publish("new_topic_impression", json.dumps(underexposure_lin_topic_ids))
# linucb 推荐老帖子
recommend_topic_dict = redis_client.hgetall(topic_recommend_redis_key) recommend_topic_dict = redis_client.hgetall(topic_recommend_redis_key)
linucb_recommend_topic_id_list = list() linucb_recommend_topic_id_list = list()
recommend_topic_list = list() recommend_topic_list = list()
...@@ -236,26 +253,26 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query ...@@ -236,26 +253,26 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
ret_list = rank_topic_id_list if query is None else ret_data_list ret_list = rank_topic_id_list if query is None else ret_data_list
if usefulrecall != -1: if usefulrecall != -1:
if has_score: if has_score:
return recommend_topic_list, ret_list, useful_topic_id_list, rank_topic_score return underexposure_lin_topic_ids, recommend_topic_list, ret_list, useful_topic_id_list, rank_topic_score
else: else:
return recommend_topic_list, ret_list, useful_topic_id_list return underexposure_lin_topic_ids, recommend_topic_list, ret_list, useful_topic_id_list
else: else:
if has_score: if has_score:
return recommend_topic_list, ret_list, rank_topic_score return underexposure_lin_topic_ids, recommend_topic_list, ret_list, rank_topic_score
else: else:
return recommend_topic_list, ret_list return underexposure_lin_topic_ids, recommend_topic_list, ret_list
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
if usefulrecall != -1: if usefulrecall != -1:
if has_score: if has_score:
return [], [], [], [] return [], [], [], [], []
else: else:
return [], [], [] return [], [], [], []
else: else:
if has_score: if has_score:
return [], [], [] return [], [], [], []
else: else:
return [], [] return [], [], []
def get_home_recommend_pictorial_ids(user_id=-1, device_id="", size=4, gray_list=[]): def get_home_recommend_pictorial_ids(user_id=-1, device_id="", size=4, gray_list=[]):
...@@ -384,9 +401,8 @@ def query_tag_id_by_topic(offset=0, size=10, topic_id_list=[], user_id=-1): ...@@ -384,9 +401,8 @@ def query_tag_id_by_topic(offset=0, size=10, topic_id_list=[], user_id=-1):
@bind("physical/search/home_recommend") @bind("physical/search/home_recommend")
def home_recommend(device_id="", user_id=-1, offset=0, lin_pictorial_count=4, size=10, def home_recommend(device_id="", user_id=-1, offset=0, underexposure_lin_topic_count =4, lin_pictorial_count=4, size=10, query_type=TopicPageType.HOME_RECOMMEND,
query_type=TopicPageType.HOME_RECOMMEND, promote_topic_list=[], usefulrecall=-1, useful_tag_list=[], is_debug=False,gray_list=[]):
promote_topic_list=[], usefulrecall=-1, useful_tag_list=[], is_debug=False, gray_list=[]):
""" """
:remark:首页推荐,目前只推荐日记 :remark:首页推荐,目前只推荐日记
:param session_id: :param session_id:
...@@ -432,9 +448,10 @@ def home_recommend(device_id="", user_id=-1, offset=0, lin_pictorial_count=4, si ...@@ -432,9 +448,10 @@ def home_recommend(device_id="", user_id=-1, offset=0, lin_pictorial_count=4, si
size = size - len(recommend_pictorial_ids) size = size - len(recommend_pictorial_ids)
if is_debug: if is_debug:
has_score = True has_score = True
recommend_topic_ids, rank_topic_ids, useful_topic_ids, rank_topic_score = get_home_recommend_topic_ids( underexposure_lin_topic_ids, recommend_topic_ids, rank_topic_ids, useful_topic_ids, rank_topic_score = get_home_recommend_topic_ids(
user_id, device_id, user_id, device_id,
tag_id=0, offset=0, tag_id=0, offset=0,
underexposure_lin_topic_count=underexposure_lin_topic_count,
size=size, size=size,
query_type=query_type, query_type=query_type,
promote_topic_list=promote_topic_list, promote_topic_list=promote_topic_list,
...@@ -443,10 +460,11 @@ def home_recommend(device_id="", user_id=-1, offset=0, lin_pictorial_count=4, si ...@@ -443,10 +460,11 @@ def home_recommend(device_id="", user_id=-1, offset=0, lin_pictorial_count=4, si
else: else:
has_score = False has_score = False
recommend_topic_ids, rank_topic_ids, useful_topic_ids = get_home_recommend_topic_ids(user_id, underexposure_lin_topic_ids, recommend_topic_ids, rank_topic_ids, useful_topic_ids = get_home_recommend_topic_ids(user_id,
device_id, device_id,
tag_id=0, tag_id=0,
offset=0, offset=0,
underexposure_lin_topic_count=underexposure_lin_topic_count,
size=size, size=size,
query_type=query_type, query_type=query_type,
promote_topic_list=promote_topic_list, promote_topic_list=promote_topic_list,
...@@ -455,12 +473,12 @@ def home_recommend(device_id="", user_id=-1, offset=0, lin_pictorial_count=4, si ...@@ -455,12 +473,12 @@ def home_recommend(device_id="", user_id=-1, offset=0, lin_pictorial_count=4, si
has_score=has_score, has_score=has_score,
gray_list=gray_list) gray_list=gray_list)
if not is_debug: if not is_debug:
return {"linucb_topic_ids": recommend_topic_ids, "rank_topic_ids": rank_topic_ids, return {"underexposure_lin_topic_ids": underexposure_lin_topic_ids, "linucb_topic_ids": recommend_topic_ids, "rank_topic_ids": rank_topic_ids,
"useful_topic_ids": useful_topic_ids, "linucb_pictorial_ids": recommend_pictorial_ids} "useful_topic_ids": useful_topic_ids, "linucb_pictorial_ids": recommend_pictorial_ids}
else: else:
results = get_topic_and_pictorial_detail_for_debug(device_id, recommend_topic_ids, rank_topic_ids, results = get_topic_and_pictorial_detail_for_debug(device_id, recommend_topic_ids, rank_topic_ids,
recommend_pictorial_ids, rank_topic_score) recommend_pictorial_ids, rank_topic_score)
return {"linucb_topic_ids": recommend_topic_ids, "rank_topic_ids": rank_topic_ids, return {"underexposure_lin_topic_ids": underexposure_lin_topic_ids, "linucb_topic_ids": recommend_topic_ids, "rank_topic_ids": rank_topic_ids,
"useful_topic_ids": useful_topic_ids, "linucb_pictorial_ids": recommend_pictorial_ids, "useful_topic_ids": useful_topic_ids, "linucb_pictorial_ids": recommend_pictorial_ids,
"debug_model_data": results} "debug_model_data": results}
...@@ -470,10 +488,11 @@ def home_recommend(device_id="", user_id=-1, offset=0, lin_pictorial_count=4, si ...@@ -470,10 +488,11 @@ def home_recommend(device_id="", user_id=-1, offset=0, lin_pictorial_count=4, si
size = size - len(recommend_pictorial_ids) size = size - len(recommend_pictorial_ids)
if is_debug: if is_debug:
has_score = True has_score = True
recommend_topic_ids, rank_topic_ids, rank_topic_score = get_home_recommend_topic_ids(user_id, underexposure_lin_topic_ids, recommend_topic_ids, rank_topic_ids, rank_topic_score = get_home_recommend_topic_ids(user_id,
device_id, device_id,
tag_id=0, tag_id=0,
offset=0, offset=0,
underexposure_lin_topic_count=underexposure_lin_topic_count,
size=size, size=size,
query_type=query_type, query_type=query_type,
promote_topic_list=promote_topic_list, promote_topic_list=promote_topic_list,
...@@ -482,28 +501,30 @@ def home_recommend(device_id="", user_id=-1, offset=0, lin_pictorial_count=4, si ...@@ -482,28 +501,30 @@ def home_recommend(device_id="", user_id=-1, offset=0, lin_pictorial_count=4, si
else: else:
has_score = False has_score = False
recommend_topic_ids, rank_topic_ids = get_home_recommend_topic_ids(user_id, device_id, tag_id=0, underexposure_lin_topic_ids, recommend_topic_ids, rank_topic_ids = get_home_recommend_topic_ids(user_id, device_id, tag_id=0,
offset=0, size=size, offset=0,
underexposure_lin_topic_count=underexposure_lin_topic_count,
size=size,
query_type=query_type, query_type=query_type,
promote_topic_list=promote_topic_list, promote_topic_list=promote_topic_list,
has_score=has_score, has_score=has_score,
gray_list=gray_list) gray_list=gray_list)
if not is_debug: if not is_debug:
return {"linucb_topic_ids": recommend_topic_ids, "rank_topic_ids": rank_topic_ids, return {"underexposure_lin_topic_ids": underexposure_lin_topic_ids, "linucb_topic_ids": recommend_topic_ids, "rank_topic_ids": rank_topic_ids,
"linucb_pictorial_ids": recommend_pictorial_ids} "linucb_pictorial_ids": recommend_pictorial_ids}
else: else:
results = get_topic_and_pictorial_detail_for_debug(device_id, recommend_topic_ids, rank_topic_ids, results = get_topic_and_pictorial_detail_for_debug(device_id, recommend_topic_ids, rank_topic_ids,
recommend_pictorial_ids, rank_topic_score) recommend_pictorial_ids, rank_topic_score)
return {"linucb_topic_ids": recommend_topic_ids, "rank_topic_ids": rank_topic_ids, return {"underexposure_lin_topic_ids": underexposure_lin_topic_ids, "linucb_topic_ids": recommend_topic_ids, "rank_topic_ids": rank_topic_ids,
"linucb_pictorial_ids": recommend_pictorial_ids, "debug_model_data": results} "linucb_pictorial_ids": recommend_pictorial_ids, "debug_model_data": results}
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
if usefulrecall != -1: if usefulrecall != -1:
return {"linucb_topic_ids": [], "rank_topic_ids": [], "useful_topic_ids": [], "linucb_pictorial_ids": [], return {"underexposure_lin_topic_ids": [], "linucb_topic_ids": [], "rank_topic_ids": [], "useful_topic_ids": [],"linucb_pictorial_ids": [], "debug_model_data": {}}
"debug_model_data": {}}
else: else:
return {"linucb_topic_ids": [], "rank_topic_ids": [], "linucb_pictorial_ids": [], "debug_model_data": {}} return {"underexposure_lin_topic_ids": [], "linucb_topic_ids": [], "rank_topic_ids": [], "linucb_pictorial_ids": [],"debug_model_data": {}}
@bind("physical/search/discover_page") @bind("physical/search/discover_page")
......
...@@ -28,10 +28,13 @@ from trans2es.models.topic import Topic,ActionSumAboutTopic ...@@ -28,10 +28,13 @@ from trans2es.models.topic import Topic,ActionSumAboutTopic
from search.utils.common import * from search.utils.common import *
from linucb.views.collect_data import CollectData from linucb.views.collect_data import CollectData
from injection.data_sync.tasks import sync_user_similar_score from injection.data_sync.tasks import sync_user_similar_score
import datetime
from trans2es.models.tag import Tag from trans2es.models.tag import Tag
from libs.cache import redis_client from libs.cache import redis_client
from trans2es.models.tag import TopicTag from trans2es.models.tag import TopicTag
from libs.error import logging_exception
from trans2es.models.portrait_stat import LikeDeviceTagStat
...@@ -215,6 +218,42 @@ class Command(BaseCommand): ...@@ -215,6 +218,42 @@ class Command(BaseCommand):
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
def sub_redis_new_topic_ids(self):
try:
ps = redis_client.pubsub()
ps.subscribe("new_topic_impression")
all_new_topic_impression_count_key = "all_new_topic_impression_count_key"
for item in ps.listen():
if item['type'] == 'message':
new_topic_ids = json.loads(item["data"])
all_new_topic_impression_count = json.loads(redis_client.get(all_new_topic_impression_count_key))
insert_topic_ids = []
for topic in new_topic_ids:
topic = str(topic)
if topic in all_new_topic_impression_count:
all_new_topic_impression_count[topic] = all_new_topic_impression_count[topic] + 1
if all_new_topic_impression_count[topic] > 100:
insert_topic_ids.append(int(topic))
all_new_topic_impression_count.pop(topic)
else:
all_new_topic_impression_count[topic] = 1
if insert_topic_ids:
insert_list = []
for topic in insert_topic_ids:
insert_list.append(
LikeDeviceTagStat(create_time=datetime.datetime.today(),
update_time=datetime.datetime.today(),
topic_id=topic, is_new_topic=0, topic_ctr_30=0.0, like_rate_30=0.0))
LikeDeviceTagStat.objects.using(settings.MASTER_DB_NAME).bulk_create(insert_list)
logging.info("impressions count gt 100 topic ids" + str(insert_topic_ids))
json_all_new_topic_impression_count = json.dumps(all_new_topic_impression_count)
logging.info("all_new_topic_impression_count" + str(all_new_topic_impression_count))
redis_client.set(all_new_topic_impression_count_key, json_all_new_topic_impression_count)
except:
logging_exception()
logging.error("redis new topic sub error!")
def handle(self, *args, **options): def handle(self, *args, **options):
try: try:
type_name_list = get_type_info_map().keys() type_name_list = get_type_info_map().keys()
...@@ -241,5 +280,8 @@ class Command(BaseCommand): ...@@ -241,5 +280,8 @@ class Command(BaseCommand):
if len(options["sync_type"]) and options["sync_type"]=="tagname": if len(options["sync_type"]) and options["sync_type"]=="tagname":
self.sync_tag_collecction_name_set() self.sync_tag_collecction_name_set()
if len(options["sync_type"]) and options["sync_type"] == "new_topic_sub":
self.sub_redis_new_topic_ids()
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment