Commit a5ccccd8 authored by 段英荣's avatar 段英荣

modify huabao bug

parent 877571ac
......@@ -11,22 +11,26 @@ import traceback
from libs.cache import redis_client
from trans2es.models.face_user_contrast_similar import FaceUserContrastSimilar,UserSimilarScore
import json
from linucb.utils.register_user_tag import RegisterUserTag
@shared_task
def write_to_es(es_type, pk_list, use_batch_query_set=False):
try:
pk_list = list(frozenset(pk_list))
type_info_map = get_type_info_map()
type_info = type_info_map[es_type]
logging.info("consume es_type:%s" % str(es_type))
type_info.insert_table_by_pk_list(
sub_index_name=es_type,
pk_list=pk_list,
use_batch_query_set=use_batch_query_set,
es=ESPerform.get_cli()
)
if es_type == "register_user_tag":
RegisterUserTag.get_register_user_tag(pk_list)
else:
type_info_map = get_type_info_map()
type_info = type_info_map[es_type]
logging.info("consume es_type:%s" % str(es_type))
type_info.insert_table_by_pk_list(
sub_index_name=es_type,
pk_list=pk_list,
use_batch_query_set=use_batch_query_set,
es=ESPerform.get_cli()
)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
......
......@@ -28,54 +28,64 @@ class RegisterUserTag(object):
linucb_device_id_register_tag_topic_id_prefix = "physical:linucb:register_tag_topic_recommend:device_id:"
linucb_user_id_register_tag_topic_id_prefix = "physical:linucb:register_tag_topic_recommend:user_id:"
linucb_register_user_tag_key = "physical:linucb:register_user_tag_info"
@classmethod
def get_register_user_tag(cls,pk_list):
try:
user_id_set = set()
# user_id_set = set()
user_id_dict = dict()
query_results = AccountUserTag.objects.filter(pk__in=pk_list)
for item in query_results:
tag_id = item.tag_id
user_id = item.user
if user_id not in user_id_set:
user_id_set.add(user_id)
user_tag_list = AccountUserTag.objects.filter(user=user_id).values_list("tag_id",flat=True)
have_read_topic_id_list = Tools.get_have_read_topic_id_list(-1, user_id,
TopicPageType.HOME_RECOMMEND)
recommend_topic_id_list = list()
cycle_num = int(10000/len(user_tag_list))
for index in range(0,cycle_num):
for tag_id in user_tag_list:
redis_tag_id_key = cls.tag_topic_id_redis_prefix + str(tag_id)
redis_tag_id_data = redis_client.get(redis_tag_id_key)
tag_topic_id_list = json.loads(redis_tag_id_data) if redis_tag_id_data else []
if not redis_tag_id_data:
tag_topic_id_list = ESPerform.get_tag_topic_list(tag_id)
redis_client.set(redis_tag_id_key,json.dumps(tag_topic_id_list))
redis_client.expire(redis_tag_id_key,1*24*60*60)
if len(tag_topic_id_list)>index:
for topic_id in tag_topic_id_list[index:]:
if topic_id not in have_read_topic_id_list and topic_id not in recommend_topic_id_list:
recommend_topic_id_list.append(topic_id)
break
redis_register_tag_topic_data = {
"data": json.dumps(recommend_topic_id_list),
"cursor": 0
}
redis_client.hmset(cls.linucb_user_id_register_tag_topic_id_prefix,redis_register_tag_topic_data)
redis_client.expire(cls.linucb_user_id_register_tag_topic_id_prefix,30*24*60*60)
if user_id not in user_id_dict:
user_id_dict[user_id] = set()
user_id_dict[user_id].add(tag_id)
topic_recommend_redis_key = cls.linucb_user_id_recommend_topic_id_prefix + str(user_id)
redis_data_dict = {
"data": json.dumps(recommend_topic_id_list),
"cursor":0
}
redis_client.hmset(topic_recommend_redis_key,redis_data_dict)
redis_client.expire(topic_recommend_redis_key,30*24*60*60)
for user_id in user_id_dict:
redis_client.hset(cls.linucb_register_user_tag_key, user_id, json.dumps(list(user_id_dict[user_id])))
# if user_id not in user_id_set:
# user_id_set.add(user_id)
#
# user_tag_list = AccountUserTag.objects.filter(user=user_id).values_list("tag_id",flat=True)
#
# have_read_topic_id_list = Tools.get_have_read_topic_id_list(-1, user_id,
# TopicPageType.HOME_RECOMMEND)
# recommend_topic_id_list = list()
# cycle_num = int(10000/len(user_tag_list))
# for index in range(0,cycle_num):
# for tag_id in user_tag_list:
# redis_tag_id_key = cls.tag_topic_id_redis_prefix + str(tag_id)
# redis_tag_id_data = redis_client.get(redis_tag_id_key)
# tag_topic_id_list = json.loads(redis_tag_id_data) if redis_tag_id_data else []
# if not redis_tag_id_data:
# tag_topic_id_list = ESPerform.get_tag_topic_list(tag_id)
# redis_client.set(redis_tag_id_key,json.dumps(tag_topic_id_list))
# redis_client.expire(redis_tag_id_key,1*24*60*60)
#
# if len(tag_topic_id_list)>index:
# for topic_id in tag_topic_id_list[index:]:
# if topic_id not in have_read_topic_id_list and topic_id not in recommend_topic_id_list:
# recommend_topic_id_list.append(topic_id)
# break
#
# redis_register_tag_topic_data = {
# "data": json.dumps(recommend_topic_id_list),
# "cursor": 0
# }
# redis_client.hmset(cls.linucb_user_id_register_tag_topic_id_prefix,redis_register_tag_topic_data)
# redis_client.expire(cls.linucb_user_id_register_tag_topic_id_prefix,30*24*60*60)
#
# topic_recommend_redis_key = cls.linucb_user_id_recommend_topic_id_prefix + str(user_id)
# redis_data_dict = {
# "data": json.dumps(recommend_topic_id_list),
# "cursor":0
# }
# redis_client.hmset(topic_recommend_redis_key,redis_data_dict)
# redis_client.expire(topic_recommend_redis_key,30*24*60*60)
#
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
......@@ -50,21 +50,21 @@ class CollectData(object):
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return dict()
def update_recommend_tag_list(self, device_id,user_feature=None):
def update_recommend_tag_list(self, device_id,user_feature=None,user_id=None):
try:
recommend_tag_set = set()
recommend_tag_list = list()
recommend_tag_dict = dict()
redis_linucb_tag_data_dict = self._get_user_linucb_info(device_id)
if len(redis_linucb_tag_data_dict) == 0:
recommend_tag_list = LinUCB.get_default_tag_list()
recommend_tag_list = LinUCB.get_default_tag_list(user_id)
LinUCB.init_device_id_linucb_info(redis_client, self.linucb_matrix_redis_prefix,device_id,recommend_tag_list)
else:
user_feature = user_feature if user_feature else self.user_feature
(recommend_tag_dict,recommend_tag_set) = LinUCB.linucb_recommend_tag(device_id,redis_linucb_tag_data_dict,user_feature,list(redis_linucb_tag_data_dict.keys()))
recommend_tag_list = list(recommend_tag_dict.keys())
if len(recommend_tag_dict) > 0:
recommend_tag_list = list(recommend_tag_set)
if len(recommend_tag_list) > 0:
tag_recommend_redis_key = self.linucb_recommend_redis_prefix + str(device_id)
redis_client.set(tag_recommend_redis_key, json.dumps(recommend_tag_list))
# Todo:设置过期时间,调研set是否支持
......@@ -131,6 +131,7 @@ class CollectData(object):
if "type" in raw_val_dict and "on_click_feed_topic_card" == raw_val_dict["type"]:
topic_id = raw_val_dict["params"]["business_id"] or raw_val_dict["params"]["topic_id"]
device_id = raw_val_dict["device"]["device_id"]
user_id = raw_val_dict["user_id"] if "user_id" in raw_val_dict else None
logging.info("consume topic_id:%s,device_id:%s" % (str(topic_id), str(device_id)))
......@@ -151,7 +152,7 @@ class CollectData(object):
self.update_user_linucb_tag_info(reward, device_id, tag_id, user_feature)
# 更新该用户的推荐tag数据,放在 更新完成user tag行为信息之后
self.update_recommend_tag_list(device_id, user_feature)
self.update_recommend_tag_list(device_id, user_feature, user_id)
elif "type" in raw_val_dict and "page_precise_exposure" == raw_val_dict["type"]:
if isinstance(raw_val_dict["params"]["exposure_cards"],str):
exposure_cards_list = json.loads(raw_val_dict["params"]["exposure_cards"])
......@@ -160,6 +161,7 @@ class CollectData(object):
else:
exposure_cards_list = list()
device_id = raw_val_dict["device"]["device_id"]
user_id = raw_val_dict["user_id"] if "user_id" in raw_val_dict else None
exposure_topic_id_list = list()
for item in exposure_cards_list:
......@@ -193,7 +195,7 @@ class CollectData(object):
self.update_user_linucb_tag_info(reward, device_id, tag_id, user_feature)
# 更新该用户的推荐tag数据,放在 更新完成user tag行为信息之后
self.update_recommend_tag_list(device_id, user_feature)
self.update_recommend_tag_list(device_id, user_feature, user_id)
else:
logging.warning("unknown type msg:%s" % raw_val_dict.get("type", "missing type"))
except:
......
......@@ -20,11 +20,16 @@ class LinUCB:
default_tag_list = list()
@classmethod
def get_default_tag_list(cls):
def get_default_tag_list(cls,user_id):
try:
if len(cls.default_tag_list) == 0:
cls.default_tag_list = Tag.objects.using(settings.SLAVE_DB_NAME).filter(is_online=True,collection=1).values_list("id",flat=True)[0:100]
if user_id:
redis_tag_data = redis_client.hget("physical:linucb:register_user_tag_info", user_id)
cls.default_tag_list = json.loads(redis_tag_data) if redis_tag_data else []
if len(cls.default_tag_list) == 0:
cls.default_tag_list = Tag.objects.using(settings.SLAVE_DB_NAME).filter(is_online=True,collection=1).values_list("id",flat=True)[0:100]
return cls.default_tag_list
except:
......
......@@ -80,7 +80,7 @@ class Pictorial(models.Model):
for topic_id in topic_id_list:
topic_id_object = Topic.objects.filter(id=int(topic_id)).first()
if topic_id_object and topic_id_object.is_online and topic_id_object.content_level in [0,3,4,5]:
if topic_id_object and topic_id_object.is_online and int(topic_id_object.content_level) in [0,3,4,5]:
effective_num += 1
if effective_num >= 5:
ret = True
......
......@@ -197,14 +197,14 @@ class Topic(models.Model):
elif self.content_level == '3':
offline_score += 2.0
exposure_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=1).count()
click_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=2).count()
uv_num = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=3).count()
if exposure_count > 0:
offline_score += click_count / exposure_count
if uv_num > 0:
offline_score += (self.vote_num / uv_num + self.reply_num / uv_num)
# exposure_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=1).count()
# click_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=2).count()
# uv_num = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=3).count()
#
# if exposure_count > 0:
# offline_score += click_count / exposure_count
# if uv_num > 0:
# offline_score += (self.vote_num / uv_num + self.reply_num / uv_num)
"""
1:马甲账号是否对总分降权?
......@@ -247,7 +247,6 @@ class PictorialTopic(models.Model):
pictorial_id = models.BigIntegerField(verbose_name=u'画报ID')
topic_id = models.BigIntegerField(verbose_name=u'帖子ID')
is_online = models.BooleanField(verbose_name=u"是否有效", default=True)
is_online = models.BooleanField(verbose_name=u'是否上线')
is_deleted = models.BooleanField(verbose_name=u'是否删除')
......
......@@ -37,7 +37,7 @@ class PictorialTransfer(object):
res["tag_id"] = tag_id
res["tag_name"] = instance.get_tag_by_name(tag_id)
res["topic_id_list"] =instance.get_topic_id()
#res["effective"] = instance.get_effective(res["topic_id_list"])
res["effective"] = instance.get_effective(res["topic_id_list"])
return res
except:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment