Commit 8aa90ee9 authored by 段英荣's avatar 段英荣

modify

parent cf209f71
......@@ -9,6 +9,9 @@ import json
from trans2es.models.tag import TopicTag
import traceback
from django.conf import settings
from libs.es import ESPerform
from search.utils.common import *
class KafkaManager(object):
consumser_obj = None
......@@ -28,6 +31,8 @@ class CollectData(object):
def __init__(self):
self.linucb_matrix_redis_prefix = "physical:linucb:device_id:"
self.linucb_recommend_redis_prefix = "physical:linucb:tag_recommend:device_id:"
self.linucb_recommend_topic_id_prefix = "physical:linucb:topic_recommend:device_id:"
self.tag_topic_id_redis_prefix = "physical:tag_id:topic_id_list:"
# 默认
self.user_feature = [0,1]
......@@ -44,8 +49,38 @@ class CollectData(object):
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return dict()
def get_tag_topic_list(self,tag_id):
try:
q = {
"query":{
"bool":{
"must":[
{"term":{"is_online": True}},
{"term":{"is_deleted": False}},
{"term":{"tag_list":tag_id}}
]
}
},
"_source":{
"include":["id"]
},
"sort":[
{"create_time_val":{"order":"desc"}},
{"language_type":{"order":"asc"}},
]
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic-high-star", query_body=q,
offset=0, size=5000)
topic_id_list = [item["_source"]["id"] for item in result_dict["hits"]]
return topic_id_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list()
def update_recommend_tag_list(self, device_id,user_feature=None):
try:
recommend_tag_set = set()
recommend_tag_list = list()
recommend_tag_dict = dict()
redis_linucb_tag_data_dict = self._get_user_linucb_info(device_id)
......@@ -54,15 +89,46 @@ class CollectData(object):
LinUCB.init_device_id_linucb_info(redis_client, self.linucb_matrix_redis_prefix,device_id,recommend_tag_list)
else:
user_feature = user_feature if user_feature else self.user_feature
recommend_tag_dict = LinUCB.linucb_recommend_tag(device_id,redis_linucb_tag_data_dict,user_feature,list(redis_linucb_tag_data_dict.keys()))
(recommend_tag_dict,recommend_tag_set) = LinUCB.linucb_recommend_tag(device_id,redis_linucb_tag_data_dict,user_feature,list(redis_linucb_tag_data_dict.keys()))
logging.info("duan add,device_id:%s,recommend_tag_list:%s" % (str(device_id), str(recommend_tag_list)))
if len(recommend_tag_dict) > 0:
recommend_tag_list = list(recommend_tag_set)
tag_recommend_redis_key = self.linucb_recommend_redis_prefix + str(device_id)
redis_client.set(tag_recommend_redis_key, json.dumps(list(recommend_tag_dict.keys())))
redis_client.set(tag_recommend_redis_key, json.dumps(recommend_tag_list))
# Todo:设置过期时间,调研set是否支持
redis_client.expire(tag_recommend_redis_key, 7*24*60*60)
redis_key = "physical:home_recommend" + ":device_id:" + device_id + ":query_type:" + str(TopicPageType.HOME_RECOMMEND)
have_read_topic_id_list = list()
redis_field_list = [b'have_read_topic_list']
redis_field_val_list = redis_client.hmget(redis_key, redis_field_list)
if redis_field_val_list[0]:
have_read_topic_id_list = list(json.loads(redis_field_val_list[0]))
recommend_topic_id_list = list()
for index in range(0,1000):
for tag_id in recommend_tag_list[0:5]:
redis_tag_id_key = self.tag_topic_id_redis_prefix + str(tag_id)
redis_tag_id_data = redis_client.get(redis_tag_id_key)
tag_topic_id_list = json.loads(redis_tag_id_data) if redis_tag_id_data else []
if not redis_tag_id_data:
tag_topic_id_list = self.get_tag_topic_list(tag_id)
redis_client.set(redis_tag_id_key,json.dumps(tag_topic_id_list))
redis_client.expire(redis_tag_id_key,1*24*60*60)
for topic_id in tag_topic_id_list:
if topic_id not in have_read_topic_id_list:
recommend_topic_id_list.append(topic_id)
break
topic_recommend_redis_key = self.linucb_recommend_topic_id_prefix + str(device_id)
redis_data_dict = {
"data": json.dumps(recommend_topic_id_list),
"cursor":0
}
redis_client.hmset(topic_recommend_redis_key,redis_data_dict)
return True
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
......
......@@ -14,7 +14,7 @@ from django.conf import settings
class LinUCB:
d = 2
alpha = 0.1
alpha = 0.01
r1 = 10
r0 = -0.1
default_tag_list = list()
......@@ -70,6 +70,7 @@ class LinUCB:
#art_max = tag_list[np.argmax(np.dot(xaT, theta_tmp) + cls.alpha * np.sqrt(np.dot(np.dot(xaT, AaI_tmp), xa)))]
top_tag_set = set()
top_tag_dict = dict()
np_score_list = list()
np_score_dict = dict()
......@@ -87,17 +88,18 @@ class LinUCB:
for top_score_index in np_score_dict[top_score]:
tag_id = str(tag_list[top_score_index], encoding="utf-8")
top_tag_dict[tag_id] = top_score
if len(top_tag_dict) >= 10:
top_tag_set.add(tag_id)
if len(top_tag_dict) >= 20:
break
if len(top_tag_dict) >= 10:
if len(top_tag_dict) >= 20:
break
logging.info("duan add,device_id:%s,sorted_np_score_list:%s,np_score_dict:%s" % (str(device_id), str(sorted_np_score_list), str(np_score_dict)))
return top_tag_dict
return (top_tag_dict,top_tag_set)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {}
return ({},())
@classmethod
def init_device_id_linucb_info(cls, redis_cli,redis_prefix, device_id, tag_list):
......
......@@ -165,7 +165,7 @@ class TopicUtils(object):
"language_type": 1
}
},
"weight": 4
"weight": 6
},
{
"gauss": {
......@@ -224,25 +224,25 @@ class TopicUtils(object):
"weight": 1
}
)
if len(recommend_tag_list)>0:
if len(recommend_tag_list)>1:
functions_list += [
{
"filter": {"term": {"tag_list": recommend_tag_list[0]}},
"weight": 4
},
{
"filter": {"terms": {"tag_list": recommend_tag_list[1:]}},
"weight": 3
}
]
else:
functions_list.append(
{
"filter": {"terms": {"tag_list": recommend_tag_list}},
"weight": 3
}
)
# if len(recommend_tag_list)>0:
# if len(recommend_tag_list)>1:
# functions_list += [
# {
# "filter": {"term": {"tag_list": recommend_tag_list[0]}},
# "weight": 4
# },
# {
# "filter": {"terms": {"tag_list": recommend_tag_list[1:]}},
# "weight": 3
# }
# ]
# else:
# functions_list.append(
# {
# "filter": {"terms": {"tag_list": recommend_tag_list}},
# "weight": 3
# }
# )
# for tag_id in recommend_tag_dict:
# functions_list.append(
......@@ -252,13 +252,13 @@ class TopicUtils(object):
# }
# )
low_content_level = 4 if query_type == TopicPageType.FIND_PAGE else 3
# low_content_level = 4 if query_type == TopicPageType.FIND_PAGE else 3
query_function_score = {
"query": {
"bool": {
"filter": [
# {"range": {"content_level": {"gte": low_content_level, "lte": 5}}},
{"term": {"has_image":True}},
# {"term": {"has_image":True}},
{"term": {"is_online": True}},
{"term": {"is_deleted": False}}
],
......
......@@ -46,11 +46,14 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
query_type=TopicPageType.HOME_RECOMMEND):
try:
if query is None:
redis_key = "physical:home_recommend" + ":user_id:" + str(
user_id) + ":device_id:" + device_id + ":query_type:" + str(query_type)
# redis_key = "physical:home_recommend" + ":user_id:" + str(
# user_id) + ":device_id:" + device_id + ":query_type:" + str(query_type)
redis_key = "physical:home_recommend" + ":device_id:" + device_id + ":query_type:" + str(query_type)
else:
redis_key = "physical:home_query" + ":user_id:" + str(
user_id) + ":device_id:" + device_id + ":query:" + str(query) + ":query_type:" + str(query_type)
# redis_key = "physical:home_query" + ":user_id:" + str(
# user_id) + ":device_id:" + device_id + ":query:" + str(query) + ":query_type:" + str(query_type)
redis_key = "physical:home_query" + ":device_id:" + device_id + ":query:" + str(query) + ":query_type:" + str(query_type)
redis_field_list = [b'have_read_topic_list']
redis_field_val_list = redis_client.hmget(redis_key, redis_field_list)
......@@ -63,12 +66,14 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
# if tag_recommend_val:
# recommend_tag_dict = json.loads(str(tag_recommend_val, encoding="utf-8"))
recommend_tag_list = list()
tag_recommend_val = redis_client.get(tag_recommend_redis_key)
if tag_recommend_val:
recommend_tag_list = json.loads(str(tag_recommend_val, encoding="utf-8"))
recommend_topic_list=list()
recommend_tag_dict = redis_client.hgetall(tag_recommend_redis_key)
if "data" in recommend_tag_dict:
recommend_topic_id_list = json.loads(recommend_tag_dict["data"])
cursor = recommend_tag_dict["cursor"]
if len(recommend_topic_id_list) > (cursor+5):
recommend_topic_list = recommend_topic_id_list[cursor:cursor+5]
recommend_topic_ids = []
have_read_topic_id_list = list()
if redis_field_val_list[0]:
......@@ -83,10 +88,11 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
user_similar_score_redis_list = json.loads(
redis_user_similar_score_redis_val) if redis_user_similar_score_redis_val else []
size = size-len(recommend_topic_list)
topic_id_list = TopicUtils.get_recommend_topic_ids(user_id=user_id, tag_id=tag_id, offset=offset, size=size,
single_size=size,query=query, query_type=query_type,
filter_topic_id_list=have_read_topic_id_list,
recommend_tag_list=recommend_tag_list,
recommend_tag_list=recommend_topic_list,
user_similar_score_list=user_similar_score_redis_list,index_type="topic-high-star")
have_read_group_id_set = set()
have_read_user_id_set = set()
......@@ -128,6 +134,7 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
# else:
# break
topic_id_list = recommend_topic_list.extend(topic_id_list)
have_read_topic_id_list.extend(topic_id_list)
if len(have_read_topic_id_list) > 30000:
cut_len = len(have_read_topic_id_list)-30000
......
......@@ -11,6 +11,7 @@
"content":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"content_level":{"type":"text"},
"user_id":{"type":"long"},
"user_nick_name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},//帖子用户名
"group_id":{"type":"long"}, //所在组ID
"tag_list":{"type":"long"},//标签属性
"edit_tag_list":{"type":"long"},//编辑标签
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment