Commit e3387b7b authored by 段英荣's avatar 段英荣

Merge branch 'master' into 'test'

Master

See merge request !216
parents 0ad7f0ca cf209f71
...@@ -47,18 +47,19 @@ class CollectData(object): ...@@ -47,18 +47,19 @@ class CollectData(object):
def update_recommend_tag_list(self, device_id,user_feature=None): def update_recommend_tag_list(self, device_id,user_feature=None):
try: try:
recommend_tag_list = list() recommend_tag_list = list()
recommend_tag_dict = dict()
redis_linucb_tag_data_dict = self._get_user_linucb_info(device_id) redis_linucb_tag_data_dict = self._get_user_linucb_info(device_id)
if len(redis_linucb_tag_data_dict) == 0: if len(redis_linucb_tag_data_dict) == 0:
recommend_tag_list = LinUCB.get_default_tag_list() recommend_tag_list = LinUCB.get_default_tag_list()
LinUCB.init_device_id_linucb_info(redis_client, self.linucb_matrix_redis_prefix,device_id,recommend_tag_list) LinUCB.init_device_id_linucb_info(redis_client, self.linucb_matrix_redis_prefix,device_id,recommend_tag_list)
else: else:
user_feature = user_feature if user_feature else self.user_feature user_feature = user_feature if user_feature else self.user_feature
recommend_tag_list = LinUCB.linucb_recommend_tag(device_id,redis_linucb_tag_data_dict,user_feature,list(redis_linucb_tag_data_dict.keys())) recommend_tag_dict = LinUCB.linucb_recommend_tag(device_id,redis_linucb_tag_data_dict,user_feature,list(redis_linucb_tag_data_dict.keys()))
logging.info("duan add,device_id:%s,recommend_tag_list:%s" % (str(device_id), str(recommend_tag_list))) logging.info("duan add,device_id:%s,recommend_tag_list:%s" % (str(device_id), str(recommend_tag_list)))
if len(recommend_tag_list) > 0: if len(recommend_tag_dict) > 0:
tag_recommend_redis_key = self.linucb_recommend_redis_prefix + str(device_id) tag_recommend_redis_key = self.linucb_recommend_redis_prefix + str(device_id)
redis_client.set(tag_recommend_redis_key, json.dumps(recommend_tag_list)) redis_client.set(tag_recommend_redis_key, json.dumps(list(recommend_tag_dict.keys())))
# Todo:设置过期时间,调研set是否支持 # Todo:设置过期时间,调研set是否支持
redis_client.expire(tag_recommend_redis_key, 7*24*60*60) redis_client.expire(tag_recommend_redis_key, 7*24*60*60)
......
...@@ -14,9 +14,9 @@ from django.conf import settings ...@@ -14,9 +14,9 @@ from django.conf import settings
class LinUCB: class LinUCB:
d = 2 d = 2
alpha = 0.25 alpha = 0.1
r1 = 1 r1 = 10
r0 = -0.5 r0 = -0.1
default_tag_list = list() default_tag_list = list()
@classmethod @classmethod
...@@ -70,7 +70,7 @@ class LinUCB: ...@@ -70,7 +70,7 @@ class LinUCB:
#art_max = tag_list[np.argmax(np.dot(xaT, theta_tmp) + cls.alpha * np.sqrt(np.dot(np.dot(xaT, AaI_tmp), xa)))] #art_max = tag_list[np.argmax(np.dot(xaT, theta_tmp) + cls.alpha * np.sqrt(np.dot(np.dot(xaT, AaI_tmp), xa)))]
top_tag_set = set() top_tag_dict = dict()
np_score_list = list() np_score_list = list()
np_score_dict = dict() np_score_dict = dict()
...@@ -85,18 +85,19 @@ class LinUCB: ...@@ -85,18 +85,19 @@ class LinUCB:
sorted_np_score_list = sorted(np_score_list,reverse=True) sorted_np_score_list = sorted(np_score_list,reverse=True)
for top_score in sorted_np_score_list: for top_score in sorted_np_score_list:
for top_score_index in np_score_dict[top_score]: for top_score_index in np_score_dict[top_score]:
top_tag_set.add(str(tag_list[top_score_index], encoding="utf-8")) tag_id = str(tag_list[top_score_index], encoding="utf-8")
if len(top_tag_set) >= 10: top_tag_dict[tag_id] = top_score
if len(top_tag_dict) >= 10:
break break
if len(top_tag_set) >= 10: if len(top_tag_dict) >= 10:
break break
logging.info("duan add,device_id:%s,sorted_np_score_list:%s,np_score_dict:%s" % (str(device_id), str(sorted_np_score_list), str(np_score_dict))) logging.info("duan add,device_id:%s,sorted_np_score_list:%s,np_score_dict:%s" % (str(device_id), str(sorted_np_score_list), str(np_score_dict)))
return list(top_tag_set) return top_tag_dict
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return [] return {}
@classmethod @classmethod
def init_device_id_linucb_info(cls, redis_cli,redis_prefix, device_id, tag_list): def init_device_id_linucb_info(cls, redis_cli,redis_prefix, device_id, tag_list):
......
...@@ -134,7 +134,7 @@ class TopicUtils(object): ...@@ -134,7 +134,7 @@ class TopicUtils(object):
""" """
try: try:
attention_user_id_list = list() attention_user_id_list = list()
pick_user_id_list = list() # pick_user_id_list = list()
# same_group_id_list = list() # same_group_id_list = list()
user_tag_list = list() user_tag_list = list()
...@@ -145,8 +145,8 @@ class TopicUtils(object): ...@@ -145,8 +145,8 @@ class TopicUtils(object):
attention_user_info_list = result_dict["hits"][0]["_source"]["attention_user_id_list"] attention_user_info_list = result_dict["hits"][0]["_source"]["attention_user_id_list"]
attention_user_id_list = [item["user_id"] for item in attention_user_info_list] attention_user_id_list = [item["user_id"] for item in attention_user_info_list]
pick_user_info_list = result_dict["hits"][0]["_source"]["pick_user_id_list"] # pick_user_info_list = result_dict["hits"][0]["_source"]["pick_user_id_list"]
pick_user_id_list = [item["user_id"] for item in pick_user_info_list] # pick_user_id_list = [item["user_id"] for item in pick_user_info_list]
# same_pictorial_user_info_list = result_dict["hits"][0]["_source"]["same_pictorial_user_id_list"] # same_pictorial_user_info_list = result_dict["hits"][0]["_source"]["same_pictorial_user_id_list"]
# #
...@@ -165,22 +165,22 @@ class TopicUtils(object): ...@@ -165,22 +165,22 @@ class TopicUtils(object):
"language_type": 1 "language_type": 1
} }
}, },
"weight": 3 "weight": 4
}, },
{ {
"linear": { "gauss": {
"create_time": { "create_time": {
"scale": "1d", "scale": "1d",
"decay": 0.99 "decay": 0.99
} }
}, },
"weight": 500 "weight": 5
} }
] ]
if len(user_similar_score_list) > 0: if len(user_similar_score_list) > 0:
for item in user_similar_score_list[:100]: for item in user_similar_score_list[:100]:
score_item = 3 * 10 * item[1] score_item = 2 + item[1]
functions_list.append( functions_list.append(
{ {
"filter": {"bool": { "filter": {"bool": {
...@@ -197,14 +197,14 @@ class TopicUtils(object): ...@@ -197,14 +197,14 @@ class TopicUtils(object):
"weight": 3, "weight": 3,
} }
) )
if len(pick_user_id_list) > 0: # if len(pick_user_id_list) > 0:
functions_list.append( # functions_list.append(
{ # {
"filter": {"bool": { # "filter": {"bool": {
"should": {"terms": {"user_id": pick_user_id_list}}}}, # "should": {"terms": {"user_id": pick_user_id_list}}}},
"weight": 2 # "weight": 2
} # }
) # )
# if len(same_pictorial_id_list) > 0: # if len(same_pictorial_id_list) > 0:
# functions_list.append( # functions_list.append(
...@@ -224,15 +224,34 @@ class TopicUtils(object): ...@@ -224,15 +224,34 @@ class TopicUtils(object):
"weight": 1 "weight": 1
} }
) )
if len(recommend_tag_list) > 0: if len(recommend_tag_list)>0:
if len(recommend_tag_list)>1:
functions_list += [
{
"filter": {"term": {"tag_list": recommend_tag_list[0]}},
"weight": 4
},
{
"filter": {"terms": {"tag_list": recommend_tag_list[1:]}},
"weight": 3
}
]
else:
functions_list.append( functions_list.append(
{ {
"filter": {"bool": { "filter": {"terms": {"tag_list": recommend_tag_list}},
"should": {"terms": {"edit_tag_list": recommend_tag_list}}}},
"weight": 3 "weight": 3
} }
) )
# for tag_id in recommend_tag_dict:
# functions_list.append(
# {
# "filter": {"term": {"tag_list": tag_id}},
# "weight": recommend_tag_dict[tag_id]
# }
# )
low_content_level = 4 if query_type == TopicPageType.FIND_PAGE else 3 low_content_level = 4 if query_type == TopicPageType.FIND_PAGE else 3
query_function_score = { query_function_score = {
"query": { "query": {
...@@ -319,7 +338,13 @@ class TopicUtils(object): ...@@ -319,7 +338,13 @@ class TopicUtils(object):
}, },
"order": "desc" "order": "desc"
} }
} },
# {
# "offline_score":{
# "order": "desc"
# }
# },
"_score"
] ]
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name=index_type, query_body=q, result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name=index_type, query_body=q,
offset=offset, size=size) offset=offset, size=size)
......
...@@ -57,7 +57,13 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query ...@@ -57,7 +57,13 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
tag_recommend_redis_key = "physical:linucb:tag_recommend:device_id:" + str(device_id) tag_recommend_redis_key = "physical:linucb:tag_recommend:device_id:" + str(device_id)
recommend_tag_list = []
# recommend_tag_dict = dict()
# tag_recommend_val = redis_client.get(tag_recommend_redis_key)
# if tag_recommend_val:
# recommend_tag_dict = json.loads(str(tag_recommend_val, encoding="utf-8"))
recommend_tag_list = list()
tag_recommend_val = redis_client.get(tag_recommend_redis_key) tag_recommend_val = redis_client.get(tag_recommend_redis_key)
if tag_recommend_val: if tag_recommend_val:
recommend_tag_list = json.loads(str(tag_recommend_val, encoding="utf-8")) recommend_tag_list = json.loads(str(tag_recommend_val, encoding="utf-8"))
...@@ -123,15 +129,15 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query ...@@ -123,15 +129,15 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
# break # break
have_read_topic_id_list.extend(topic_id_list) have_read_topic_id_list.extend(topic_id_list)
if len(have_read_topic_id_list) > 5000: if len(have_read_topic_id_list) > 30000:
cut_len = len(have_read_topic_id_list)-5000 cut_len = len(have_read_topic_id_list)-30000
have_read_topic_id_list = have_read_topic_id_list[cut_len:] have_read_topic_id_list = have_read_topic_id_list[cut_len:]
redis_dict = { redis_dict = {
"have_read_topic_list": json.dumps(have_read_topic_id_list), "have_read_topic_list": json.dumps(have_read_topic_id_list),
} }
redis_client.hmset(redis_key, redis_dict) redis_client.hmset(redis_key, redis_dict)
# 每个session key保存15分钟 # 每个session key保存15分钟
redis_client.expire(redis_key, 60 * 60 * 24 * 3) redis_client.expire(redis_key, 60 * 60 * 24 * 30)
return topic_id_list return topic_id_list
except: except:
......
...@@ -15,7 +15,8 @@ class UserExtra(models.Model): ...@@ -15,7 +15,8 @@ class UserExtra(models.Model):
db_table="user_extra" db_table="user_extra"
id = models.IntegerField(verbose_name="主键ID",primary_key=True) id = models.IntegerField(verbose_name="主键ID",primary_key=True)
user_id = models.BigIntegerField(verbose_name=u"用户ID") user_id = models.CharField(verbose_name=u"用户ID",max_length=100)
is_shadow = models.BooleanField(verbose_name=u"是否是马甲账户") is_shadow = models.BooleanField(verbose_name=u"是否是马甲账户")
is_online = models.BooleanField(verbose_name=u"是否上线") is_online = models.BooleanField(verbose_name=u"是否上线")
is_recommend = models.BooleanField(verbose_name=u"是否推荐") is_recommend = models.BooleanField(verbose_name=u"是否推荐")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment