Commit e3387b7b authored by 段英荣's avatar 段英荣

Merge branch 'master' into 'test'

Master

See merge request !216
parents 0ad7f0ca cf209f71
......@@ -47,18 +47,19 @@ class CollectData(object):
def update_recommend_tag_list(self, device_id,user_feature=None):
try:
recommend_tag_list = list()
recommend_tag_dict = dict()
redis_linucb_tag_data_dict = self._get_user_linucb_info(device_id)
if len(redis_linucb_tag_data_dict) == 0:
recommend_tag_list = LinUCB.get_default_tag_list()
LinUCB.init_device_id_linucb_info(redis_client, self.linucb_matrix_redis_prefix,device_id,recommend_tag_list)
else:
user_feature = user_feature if user_feature else self.user_feature
recommend_tag_list = LinUCB.linucb_recommend_tag(device_id,redis_linucb_tag_data_dict,user_feature,list(redis_linucb_tag_data_dict.keys()))
recommend_tag_dict = LinUCB.linucb_recommend_tag(device_id,redis_linucb_tag_data_dict,user_feature,list(redis_linucb_tag_data_dict.keys()))
logging.info("duan add,device_id:%s,recommend_tag_list:%s" % (str(device_id), str(recommend_tag_list)))
if len(recommend_tag_list) > 0:
if len(recommend_tag_dict) > 0:
tag_recommend_redis_key = self.linucb_recommend_redis_prefix + str(device_id)
redis_client.set(tag_recommend_redis_key, json.dumps(recommend_tag_list))
redis_client.set(tag_recommend_redis_key, json.dumps(list(recommend_tag_dict.keys())))
# Todo:设置过期时间,调研set是否支持
redis_client.expire(tag_recommend_redis_key, 7*24*60*60)
......
......@@ -14,9 +14,9 @@ from django.conf import settings
class LinUCB:
d = 2
alpha = 0.25
r1 = 1
r0 = -0.5
alpha = 0.1
r1 = 10
r0 = -0.1
default_tag_list = list()
@classmethod
......@@ -70,7 +70,7 @@ class LinUCB:
#art_max = tag_list[np.argmax(np.dot(xaT, theta_tmp) + cls.alpha * np.sqrt(np.dot(np.dot(xaT, AaI_tmp), xa)))]
top_tag_set = set()
top_tag_dict = dict()
np_score_list = list()
np_score_dict = dict()
......@@ -85,18 +85,19 @@ class LinUCB:
sorted_np_score_list = sorted(np_score_list,reverse=True)
for top_score in sorted_np_score_list:
for top_score_index in np_score_dict[top_score]:
top_tag_set.add(str(tag_list[top_score_index], encoding="utf-8"))
if len(top_tag_set) >= 10:
tag_id = str(tag_list[top_score_index], encoding="utf-8")
top_tag_dict[tag_id] = top_score
if len(top_tag_dict) >= 10:
break
if len(top_tag_set) >= 10:
if len(top_tag_dict) >= 10:
break
logging.info("duan add,device_id:%s,sorted_np_score_list:%s,np_score_dict:%s" % (str(device_id), str(sorted_np_score_list), str(np_score_dict)))
return list(top_tag_set)
return top_tag_dict
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []
return {}
@classmethod
def init_device_id_linucb_info(cls, redis_cli,redis_prefix, device_id, tag_list):
......
......@@ -134,7 +134,7 @@ class TopicUtils(object):
"""
try:
attention_user_id_list = list()
pick_user_id_list = list()
# pick_user_id_list = list()
# same_group_id_list = list()
user_tag_list = list()
......@@ -145,8 +145,8 @@ class TopicUtils(object):
attention_user_info_list = result_dict["hits"][0]["_source"]["attention_user_id_list"]
attention_user_id_list = [item["user_id"] for item in attention_user_info_list]
pick_user_info_list = result_dict["hits"][0]["_source"]["pick_user_id_list"]
pick_user_id_list = [item["user_id"] for item in pick_user_info_list]
# pick_user_info_list = result_dict["hits"][0]["_source"]["pick_user_id_list"]
# pick_user_id_list = [item["user_id"] for item in pick_user_info_list]
# same_pictorial_user_info_list = result_dict["hits"][0]["_source"]["same_pictorial_user_id_list"]
#
......@@ -165,22 +165,22 @@ class TopicUtils(object):
"language_type": 1
}
},
"weight": 3
"weight": 4
},
{
"linear": {
"gauss": {
"create_time": {
"scale": "1d",
"decay": 0.99
}
},
"weight": 500
"weight": 5
}
]
if len(user_similar_score_list) > 0:
for item in user_similar_score_list[:100]:
score_item = 3 * 10 * item[1]
score_item = 2 + item[1]
functions_list.append(
{
"filter": {"bool": {
......@@ -197,14 +197,14 @@ class TopicUtils(object):
"weight": 3,
}
)
if len(pick_user_id_list) > 0:
functions_list.append(
{
"filter": {"bool": {
"should": {"terms": {"user_id": pick_user_id_list}}}},
"weight": 2
}
)
# if len(pick_user_id_list) > 0:
# functions_list.append(
# {
# "filter": {"bool": {
# "should": {"terms": {"user_id": pick_user_id_list}}}},
# "weight": 2
# }
# )
# if len(same_pictorial_id_list) > 0:
# functions_list.append(
......@@ -224,14 +224,33 @@ class TopicUtils(object):
"weight": 1
}
)
if len(recommend_tag_list) > 0:
functions_list.append(
{
"filter": {"bool": {
"should": {"terms": {"edit_tag_list": recommend_tag_list}}}},
"weight": 3
}
)
if len(recommend_tag_list)>0:
if len(recommend_tag_list)>1:
functions_list += [
{
"filter": {"term": {"tag_list": recommend_tag_list[0]}},
"weight": 4
},
{
"filter": {"terms": {"tag_list": recommend_tag_list[1:]}},
"weight": 3
}
]
else:
functions_list.append(
{
"filter": {"terms": {"tag_list": recommend_tag_list}},
"weight": 3
}
)
# for tag_id in recommend_tag_dict:
# functions_list.append(
# {
# "filter": {"term": {"tag_list": tag_id}},
# "weight": recommend_tag_dict[tag_id]
# }
# )
low_content_level = 4 if query_type == TopicPageType.FIND_PAGE else 3
query_function_score = {
......@@ -319,7 +338,13 @@ class TopicUtils(object):
},
"order": "desc"
}
}
},
# {
# "offline_score":{
# "order": "desc"
# }
# },
"_score"
]
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name=index_type, query_body=q,
offset=offset, size=size)
......
......@@ -57,7 +57,13 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
tag_recommend_redis_key = "physical:linucb:tag_recommend:device_id:" + str(device_id)
recommend_tag_list = []
# recommend_tag_dict = dict()
# tag_recommend_val = redis_client.get(tag_recommend_redis_key)
# if tag_recommend_val:
# recommend_tag_dict = json.loads(str(tag_recommend_val, encoding="utf-8"))
recommend_tag_list = list()
tag_recommend_val = redis_client.get(tag_recommend_redis_key)
if tag_recommend_val:
recommend_tag_list = json.loads(str(tag_recommend_val, encoding="utf-8"))
......@@ -123,15 +129,15 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
# break
have_read_topic_id_list.extend(topic_id_list)
if len(have_read_topic_id_list) > 5000:
cut_len = len(have_read_topic_id_list)-5000
if len(have_read_topic_id_list) > 30000:
cut_len = len(have_read_topic_id_list)-30000
have_read_topic_id_list = have_read_topic_id_list[cut_len:]
redis_dict = {
"have_read_topic_list": json.dumps(have_read_topic_id_list),
}
redis_client.hmset(redis_key, redis_dict)
# 每个session key保存15分钟
redis_client.expire(redis_key, 60 * 60 * 24 * 3)
redis_client.expire(redis_key, 60 * 60 * 24 * 30)
return topic_id_list
except:
......
......@@ -15,7 +15,8 @@ class UserExtra(models.Model):
db_table="user_extra"
id = models.IntegerField(verbose_name="主键ID",primary_key=True)
user_id = models.BigIntegerField(verbose_name=u"用户ID")
user_id = models.CharField(verbose_name=u"用户ID",max_length=100)
is_shadow = models.BooleanField(verbose_name=u"是否是马甲账户")
is_online = models.BooleanField(verbose_name=u"是否上线")
is_recommend = models.BooleanField(verbose_name=u"是否推荐")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment