Commit 8aa90ee9 authored by 段英荣's avatar 段英荣

modify

parent cf209f71
...@@ -9,6 +9,9 @@ import json ...@@ -9,6 +9,9 @@ import json
from trans2es.models.tag import TopicTag from trans2es.models.tag import TopicTag
import traceback import traceback
from django.conf import settings from django.conf import settings
from libs.es import ESPerform
from search.utils.common import *
class KafkaManager(object): class KafkaManager(object):
consumser_obj = None consumser_obj = None
...@@ -28,6 +31,8 @@ class CollectData(object): ...@@ -28,6 +31,8 @@ class CollectData(object):
def __init__(self): def __init__(self):
self.linucb_matrix_redis_prefix = "physical:linucb:device_id:" self.linucb_matrix_redis_prefix = "physical:linucb:device_id:"
self.linucb_recommend_redis_prefix = "physical:linucb:tag_recommend:device_id:" self.linucb_recommend_redis_prefix = "physical:linucb:tag_recommend:device_id:"
self.linucb_recommend_topic_id_prefix = "physical:linucb:topic_recommend:device_id:"
self.tag_topic_id_redis_prefix = "physical:tag_id:topic_id_list:"
# 默认 # 默认
self.user_feature = [0,1] self.user_feature = [0,1]
...@@ -44,8 +49,38 @@ class CollectData(object): ...@@ -44,8 +49,38 @@ class CollectData(object):
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return dict() return dict()
def get_tag_topic_list(self,tag_id):
try:
q = {
"query":{
"bool":{
"must":[
{"term":{"is_online": True}},
{"term":{"is_deleted": False}},
{"term":{"tag_list":tag_id}}
]
}
},
"_source":{
"include":["id"]
},
"sort":[
{"create_time_val":{"order":"desc"}},
{"language_type":{"order":"asc"}},
]
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic-high-star", query_body=q,
offset=0, size=5000)
topic_id_list = [item["_source"]["id"] for item in result_dict["hits"]]
return topic_id_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list()
def update_recommend_tag_list(self, device_id,user_feature=None): def update_recommend_tag_list(self, device_id,user_feature=None):
try: try:
recommend_tag_set = set()
recommend_tag_list = list() recommend_tag_list = list()
recommend_tag_dict = dict() recommend_tag_dict = dict()
redis_linucb_tag_data_dict = self._get_user_linucb_info(device_id) redis_linucb_tag_data_dict = self._get_user_linucb_info(device_id)
...@@ -54,15 +89,46 @@ class CollectData(object): ...@@ -54,15 +89,46 @@ class CollectData(object):
LinUCB.init_device_id_linucb_info(redis_client, self.linucb_matrix_redis_prefix,device_id,recommend_tag_list) LinUCB.init_device_id_linucb_info(redis_client, self.linucb_matrix_redis_prefix,device_id,recommend_tag_list)
else: else:
user_feature = user_feature if user_feature else self.user_feature user_feature = user_feature if user_feature else self.user_feature
recommend_tag_dict = LinUCB.linucb_recommend_tag(device_id,redis_linucb_tag_data_dict,user_feature,list(redis_linucb_tag_data_dict.keys())) (recommend_tag_dict,recommend_tag_set) = LinUCB.linucb_recommend_tag(device_id,redis_linucb_tag_data_dict,user_feature,list(redis_linucb_tag_data_dict.keys()))
logging.info("duan add,device_id:%s,recommend_tag_list:%s" % (str(device_id), str(recommend_tag_list)))
if len(recommend_tag_dict) > 0: if len(recommend_tag_dict) > 0:
recommend_tag_list = list(recommend_tag_set)
tag_recommend_redis_key = self.linucb_recommend_redis_prefix + str(device_id) tag_recommend_redis_key = self.linucb_recommend_redis_prefix + str(device_id)
redis_client.set(tag_recommend_redis_key, json.dumps(list(recommend_tag_dict.keys()))) redis_client.set(tag_recommend_redis_key, json.dumps(recommend_tag_list))
# Todo:设置过期时间,调研set是否支持 # Todo:设置过期时间,调研set是否支持
redis_client.expire(tag_recommend_redis_key, 7*24*60*60) redis_client.expire(tag_recommend_redis_key, 7*24*60*60)
redis_key = "physical:home_recommend" + ":device_id:" + device_id + ":query_type:" + str(TopicPageType.HOME_RECOMMEND)
have_read_topic_id_list = list()
redis_field_list = [b'have_read_topic_list']
redis_field_val_list = redis_client.hmget(redis_key, redis_field_list)
if redis_field_val_list[0]:
have_read_topic_id_list = list(json.loads(redis_field_val_list[0]))
recommend_topic_id_list = list()
for index in range(0,1000):
for tag_id in recommend_tag_list[0:5]:
redis_tag_id_key = self.tag_topic_id_redis_prefix + str(tag_id)
redis_tag_id_data = redis_client.get(redis_tag_id_key)
tag_topic_id_list = json.loads(redis_tag_id_data) if redis_tag_id_data else []
if not redis_tag_id_data:
tag_topic_id_list = self.get_tag_topic_list(tag_id)
redis_client.set(redis_tag_id_key,json.dumps(tag_topic_id_list))
redis_client.expire(redis_tag_id_key,1*24*60*60)
for topic_id in tag_topic_id_list:
if topic_id not in have_read_topic_id_list:
recommend_topic_id_list.append(topic_id)
break
topic_recommend_redis_key = self.linucb_recommend_topic_id_prefix + str(device_id)
redis_data_dict = {
"data": json.dumps(recommend_topic_id_list),
"cursor":0
}
redis_client.hmset(topic_recommend_redis_key,redis_data_dict)
return True return True
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
......
...@@ -14,7 +14,7 @@ from django.conf import settings ...@@ -14,7 +14,7 @@ from django.conf import settings
class LinUCB: class LinUCB:
d = 2 d = 2
alpha = 0.1 alpha = 0.01
r1 = 10 r1 = 10
r0 = -0.1 r0 = -0.1
default_tag_list = list() default_tag_list = list()
...@@ -70,6 +70,7 @@ class LinUCB: ...@@ -70,6 +70,7 @@ class LinUCB:
#art_max = tag_list[np.argmax(np.dot(xaT, theta_tmp) + cls.alpha * np.sqrt(np.dot(np.dot(xaT, AaI_tmp), xa)))] #art_max = tag_list[np.argmax(np.dot(xaT, theta_tmp) + cls.alpha * np.sqrt(np.dot(np.dot(xaT, AaI_tmp), xa)))]
top_tag_set = set()
top_tag_dict = dict() top_tag_dict = dict()
np_score_list = list() np_score_list = list()
np_score_dict = dict() np_score_dict = dict()
...@@ -87,17 +88,18 @@ class LinUCB: ...@@ -87,17 +88,18 @@ class LinUCB:
for top_score_index in np_score_dict[top_score]: for top_score_index in np_score_dict[top_score]:
tag_id = str(tag_list[top_score_index], encoding="utf-8") tag_id = str(tag_list[top_score_index], encoding="utf-8")
top_tag_dict[tag_id] = top_score top_tag_dict[tag_id] = top_score
if len(top_tag_dict) >= 10: top_tag_set.add(tag_id)
if len(top_tag_dict) >= 20:
break break
if len(top_tag_dict) >= 10: if len(top_tag_dict) >= 20:
break break
logging.info("duan add,device_id:%s,sorted_np_score_list:%s,np_score_dict:%s" % (str(device_id), str(sorted_np_score_list), str(np_score_dict))) logging.info("duan add,device_id:%s,sorted_np_score_list:%s,np_score_dict:%s" % (str(device_id), str(sorted_np_score_list), str(np_score_dict)))
return top_tag_dict return (top_tag_dict,top_tag_set)
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {} return ({},())
@classmethod @classmethod
def init_device_id_linucb_info(cls, redis_cli,redis_prefix, device_id, tag_list): def init_device_id_linucb_info(cls, redis_cli,redis_prefix, device_id, tag_list):
......
...@@ -165,7 +165,7 @@ class TopicUtils(object): ...@@ -165,7 +165,7 @@ class TopicUtils(object):
"language_type": 1 "language_type": 1
} }
}, },
"weight": 4 "weight": 6
}, },
{ {
"gauss": { "gauss": {
...@@ -224,25 +224,25 @@ class TopicUtils(object): ...@@ -224,25 +224,25 @@ class TopicUtils(object):
"weight": 1 "weight": 1
} }
) )
if len(recommend_tag_list)>0: # if len(recommend_tag_list)>0:
if len(recommend_tag_list)>1: # if len(recommend_tag_list)>1:
functions_list += [ # functions_list += [
{ # {
"filter": {"term": {"tag_list": recommend_tag_list[0]}}, # "filter": {"term": {"tag_list": recommend_tag_list[0]}},
"weight": 4 # "weight": 4
}, # },
{ # {
"filter": {"terms": {"tag_list": recommend_tag_list[1:]}}, # "filter": {"terms": {"tag_list": recommend_tag_list[1:]}},
"weight": 3 # "weight": 3
} # }
] # ]
else: # else:
functions_list.append( # functions_list.append(
{ # {
"filter": {"terms": {"tag_list": recommend_tag_list}}, # "filter": {"terms": {"tag_list": recommend_tag_list}},
"weight": 3 # "weight": 3
} # }
) # )
# for tag_id in recommend_tag_dict: # for tag_id in recommend_tag_dict:
# functions_list.append( # functions_list.append(
...@@ -252,13 +252,13 @@ class TopicUtils(object): ...@@ -252,13 +252,13 @@ class TopicUtils(object):
# } # }
# ) # )
low_content_level = 4 if query_type == TopicPageType.FIND_PAGE else 3 # low_content_level = 4 if query_type == TopicPageType.FIND_PAGE else 3
query_function_score = { query_function_score = {
"query": { "query": {
"bool": { "bool": {
"filter": [ "filter": [
# {"range": {"content_level": {"gte": low_content_level, "lte": 5}}}, # {"range": {"content_level": {"gte": low_content_level, "lte": 5}}},
{"term": {"has_image":True}}, # {"term": {"has_image":True}},
{"term": {"is_online": True}}, {"term": {"is_online": True}},
{"term": {"is_deleted": False}} {"term": {"is_deleted": False}}
], ],
......
...@@ -46,11 +46,14 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query ...@@ -46,11 +46,14 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
query_type=TopicPageType.HOME_RECOMMEND): query_type=TopicPageType.HOME_RECOMMEND):
try: try:
if query is None: if query is None:
redis_key = "physical:home_recommend" + ":user_id:" + str( # redis_key = "physical:home_recommend" + ":user_id:" + str(
user_id) + ":device_id:" + device_id + ":query_type:" + str(query_type) # user_id) + ":device_id:" + device_id + ":query_type:" + str(query_type)
redis_key = "physical:home_recommend" + ":device_id:" + device_id + ":query_type:" + str(query_type)
else: else:
redis_key = "physical:home_query" + ":user_id:" + str( # redis_key = "physical:home_query" + ":user_id:" + str(
user_id) + ":device_id:" + device_id + ":query:" + str(query) + ":query_type:" + str(query_type) # user_id) + ":device_id:" + device_id + ":query:" + str(query) + ":query_type:" + str(query_type)
redis_key = "physical:home_query" + ":device_id:" + device_id + ":query:" + str(query) + ":query_type:" + str(query_type)
redis_field_list = [b'have_read_topic_list'] redis_field_list = [b'have_read_topic_list']
redis_field_val_list = redis_client.hmget(redis_key, redis_field_list) redis_field_val_list = redis_client.hmget(redis_key, redis_field_list)
...@@ -63,12 +66,14 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query ...@@ -63,12 +66,14 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
# if tag_recommend_val: # if tag_recommend_val:
# recommend_tag_dict = json.loads(str(tag_recommend_val, encoding="utf-8")) # recommend_tag_dict = json.loads(str(tag_recommend_val, encoding="utf-8"))
recommend_tag_list = list() recommend_topic_list=list()
tag_recommend_val = redis_client.get(tag_recommend_redis_key) recommend_tag_dict = redis_client.hgetall(tag_recommend_redis_key)
if tag_recommend_val: if "data" in recommend_tag_dict:
recommend_tag_list = json.loads(str(tag_recommend_val, encoding="utf-8")) recommend_topic_id_list = json.loads(recommend_tag_dict["data"])
cursor = recommend_tag_dict["cursor"]
if len(recommend_topic_id_list) > (cursor+5):
recommend_topic_list = recommend_topic_id_list[cursor:cursor+5]
recommend_topic_ids = []
have_read_topic_id_list = list() have_read_topic_id_list = list()
if redis_field_val_list[0]: if redis_field_val_list[0]:
...@@ -83,10 +88,11 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query ...@@ -83,10 +88,11 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
user_similar_score_redis_list = json.loads( user_similar_score_redis_list = json.loads(
redis_user_similar_score_redis_val) if redis_user_similar_score_redis_val else [] redis_user_similar_score_redis_val) if redis_user_similar_score_redis_val else []
size = size-len(recommend_topic_list)
topic_id_list = TopicUtils.get_recommend_topic_ids(user_id=user_id, tag_id=tag_id, offset=offset, size=size, topic_id_list = TopicUtils.get_recommend_topic_ids(user_id=user_id, tag_id=tag_id, offset=offset, size=size,
single_size=size,query=query, query_type=query_type, single_size=size,query=query, query_type=query_type,
filter_topic_id_list=have_read_topic_id_list, filter_topic_id_list=have_read_topic_id_list,
recommend_tag_list=recommend_tag_list, recommend_tag_list=recommend_topic_list,
user_similar_score_list=user_similar_score_redis_list,index_type="topic-high-star") user_similar_score_list=user_similar_score_redis_list,index_type="topic-high-star")
have_read_group_id_set = set() have_read_group_id_set = set()
have_read_user_id_set = set() have_read_user_id_set = set()
...@@ -128,6 +134,7 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query ...@@ -128,6 +134,7 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
# else: # else:
# break # break
topic_id_list = recommend_topic_list.extend(topic_id_list)
have_read_topic_id_list.extend(topic_id_list) have_read_topic_id_list.extend(topic_id_list)
if len(have_read_topic_id_list) > 30000: if len(have_read_topic_id_list) > 30000:
cut_len = len(have_read_topic_id_list)-30000 cut_len = len(have_read_topic_id_list)-30000
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
"content":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"}, "content":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"content_level":{"type":"text"}, "content_level":{"type":"text"},
"user_id":{"type":"long"}, "user_id":{"type":"long"},
"user_nick_name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},//帖子用户名
"group_id":{"type":"long"}, //所在组ID "group_id":{"type":"long"}, //所在组ID
"tag_list":{"type":"long"},//标签属性 "tag_list":{"type":"long"},//标签属性
"edit_tag_list":{"type":"long"},//编辑标签 "edit_tag_list":{"type":"long"},//编辑标签
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment