Commit ef69f2cc authored by 段英荣's avatar 段英荣

Merge branch 'master' into 'test'

Master

See merge request !289
parents 4526073e f5f2c418
...@@ -20,6 +20,8 @@ def write_to_es(es_type, pk_list, use_batch_query_set=False): ...@@ -20,6 +20,8 @@ def write_to_es(es_type, pk_list, use_batch_query_set=False):
if es_type == "register_user_tag": if es_type == "register_user_tag":
RegisterUserTag.get_register_user_tag(pk_list) RegisterUserTag.get_register_user_tag(pk_list)
elif es_type == "attention_user_tag":
RegisterUserTag.get_user_attention_tag(pk_list)
else: else:
type_info_map = get_type_info_map() type_info_map = get_type_info_map()
type_info = type_info_map[es_type] type_info = type_info_map[es_type]
......
...@@ -264,7 +264,7 @@ class ESPerform(object): ...@@ -264,7 +264,7 @@ class ESPerform(object):
return True return True
@classmethod @classmethod
def get_tag_topic_list(cls,tag_id,have_read_topic_id_list): def get_tag_topic_list(cls,tag_id,have_read_topic_id_list,size=100):
try: try:
functions_list = list() functions_list = list()
for id in tag_id: for id in tag_id:
...@@ -274,13 +274,27 @@ class ESPerform(object): ...@@ -274,13 +274,27 @@ class ESPerform(object):
"weight": 1 "weight": 1
} }
) )
functions_list += [
{
"filter": {"term": {"content_level": 6}},
"weight": 6000
},
{
"filter": {"term": {"content_level": 5}},
"weight": 5000
},
{
"filter": {"term": {"content_level": 4}},
"weight": 4000
}
]
q = { q = {
"query":{ "query":{
"function_score":{ "function_score":{
"query": { "query": {
"bool": { "bool": {
"must": [ "must": [
{"range": {"content_level": {"gte": 3, "lte": 5}}}, {"range": {"content_level": {"gte": 4, "lte": 6}}},
{"term": {"is_online": True}}, {"term": {"is_online": True}},
{"term": {"is_deleted": False}}, {"term": {"is_deleted": False}},
{"terms": {"tag_list": tag_id}} {"terms": {"tag_list": tag_id}}
...@@ -308,7 +322,7 @@ class ESPerform(object): ...@@ -308,7 +322,7 @@ class ESPerform(object):
} }
} }
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic", query_body=q, result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic", query_body=q,
offset=0, size=100,routing="3,4,5") offset=0, size=size,routing="4,5,6")
topic_id_list = [item["_source"]["id"] for item in result_dict["hits"]] topic_id_list = [item["_source"]["id"] for item in result_dict["hits"]]
logging.info("topic_id_list:%s"%str(topic_id_list)) logging.info("topic_id_list:%s"%str(topic_id_list))
......
...@@ -25,10 +25,10 @@ def tzlc(dt, truncate_to_sec=True): ...@@ -25,10 +25,10 @@ def tzlc(dt, truncate_to_sec=True):
def get_have_read_topic_id_list(device_id,user_id,query_type): def get_have_read_topic_id_list(device_id,user_id,query_type):
try: try:
if user_id==-1: if user_id and int(user_id)>0:
redis_key = "physical:home_recommend" + ":device_id:" + str(device_id) + ":query_type:" + str(query_type)
else:
redis_key = "physical:home_recommend" + ":user_id:" + str(user_id) + ":query_type:" + str(query_type) redis_key = "physical:home_recommend" + ":user_id:" + str(user_id) + ":query_type:" + str(query_type)
else:
redis_key = "physical:home_recommend" + ":device_id:" + str(device_id) + ":query_type:" + str(query_type)
have_read_topic_id_list = list() have_read_topic_id_list = list()
......
...@@ -9,7 +9,7 @@ import traceback ...@@ -9,7 +9,7 @@ import traceback
import json import json
import pickle import pickle
from django.conf import settings from django.conf import settings
from trans2es.models.tag import AccountUserTag from trans2es.models.tag import AccountUserTag,CommunityTagFollow
from libs.es import ESPerform from libs.es import ESPerform
import libs.tools as Tools import libs.tools as Tools
from search.utils.common import * from search.utils.common import *
...@@ -30,8 +30,40 @@ class RegisterUserTag(object): ...@@ -30,8 +30,40 @@ class RegisterUserTag(object):
linucb_user_id_register_tag_topic_id_prefix = "physical:linucb:register_tag_topic_recommend:user_id:" linucb_user_id_register_tag_topic_id_prefix = "physical:linucb:register_tag_topic_recommend:user_id:"
linucb_register_user_tag_key = "physical:linucb:register_user_tag_info" linucb_register_user_tag_key = "physical:linucb:register_user_tag_info"
@classmethod
def get_user_attention_tag(cls, pk_list):
"""
:remark 获取用户关注标签
:param pk_list:
:return:
"""
try:
user_id_dict = dict()
query_results = CommunityTagFollow.objects.filter(pk__in=pk_list,is_deleted=False,is_online=True)
for item in query_results:
tag_id = item.tag_id
user_id = item.user_id
user_tag_list = CommunityTagFollow.objects.filter(user=user_id,is_deleted=False,is_online=True).values_list("tag_id", flat=True)
user_id_dict[user_id] = user_tag_list
for user_id in user_id_dict:
redis_user_tag_id_data = redis_client.hget(cls.linucb_register_user_tag_key, user_id)
redis_user_tag_id_list = json.loads(redis_user_tag_id_data) if redis_user_tag_id_data else []
redis_user_tag_id_list.extend(user_id_dict[user_id])
redis_client.hset(cls.linucb_register_user_tag_key, user_id, json.dumps(list(set(redis_user_tag_id_list))))
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
@classmethod @classmethod
def get_register_user_tag(cls,pk_list): def get_register_user_tag(cls,pk_list):
"""
:remark 用户注册时选的标签
:param pk_list:
:return:
"""
try: try:
# user_id_set = set() # user_id_set = set()
user_id_dict = dict() user_id_dict = dict()
...@@ -43,7 +75,10 @@ class RegisterUserTag(object): ...@@ -43,7 +75,10 @@ class RegisterUserTag(object):
user_id_dict[user_id] = user_tag_list user_id_dict[user_id] = user_tag_list
for user_id in user_id_dict: for user_id in user_id_dict:
redis_client.hset(cls.linucb_register_user_tag_key, user_id, json.dumps(list(user_id_dict[user_id]))) redis_user_tag_id_data = redis_client.hget(cls.linucb_register_user_tag_key, user_id)
redis_user_tag_id_list = json.loads(redis_user_tag_id_data) if redis_user_tag_id_data else []
redis_user_tag_id_list.extend(user_id_dict[user_id])
redis_client.hset(cls.linucb_register_user_tag_key, user_id, json.dumps(list(set(redis_user_tag_id_list))))
# if user_id not in user_id_set: # if user_id not in user_id_set:
# user_id_set.add(user_id) # user_id_set.add(user_id)
......
...@@ -52,11 +52,8 @@ class CollectData(object): ...@@ -52,11 +52,8 @@ class CollectData(object):
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return dict() return dict()
def update_recommend_tag_list(self, device_id,user_feature=None,user_id=None,click_topic_tag_list=None): def update_recommend_tag_list(self, device_id,user_feature=None,user_id=None,click_topic_tag_list=None,new_user_click_tag_list = []):
try: try:
recommend_tag_set = set()
recommend_tag_list = list()
recommend_tag_dict = dict()
redis_linucb_tag_data_dict = self._get_user_linucb_info(device_id) redis_linucb_tag_data_dict = self._get_user_linucb_info(device_id)
if len(redis_linucb_tag_data_dict) == 0: if len(redis_linucb_tag_data_dict) == 0:
recommend_tag_list = LinUCB.get_default_tag_list(user_id) recommend_tag_list = LinUCB.get_default_tag_list(user_id)
...@@ -72,9 +69,8 @@ class CollectData(object): ...@@ -72,9 +69,8 @@ class CollectData(object):
# Todo:设置过期时间,调研set是否支持 # Todo:设置过期时间,调研set是否支持
redis_client.expire(tag_recommend_redis_key, 7*24*60*60) redis_client.expire(tag_recommend_redis_key, 7*24*60*60)
have_read_topic_id_list = Tools.get_have_read_topic_id_list(device_id,-1,TopicPageType.HOME_RECOMMEND) have_read_topic_id_list = Tools.get_have_read_topic_id_list(device_id,user_id,TopicPageType.HOME_RECOMMEND)
promote_recommend_topic_id_list = list() promote_recommend_topic_id_list = TopicHomeRecommend.objects.using(settings.SLAVE_DB_NAME).filter(is_online=1).values_list("topic_id",flat=True)
promote_recommend_topic_id_list = TopicHomeRecommend.objects.using(settings.SLAVE_DB_NAME).filter(is_online=1).values_list("topic_id")
have_read_topic_id_list.extend(promote_recommend_topic_id_list) have_read_topic_id_list.extend(promote_recommend_topic_id_list)
recommend_topic_id_list = list() recommend_topic_id_list = list()
...@@ -83,22 +79,17 @@ class CollectData(object): ...@@ -83,22 +79,17 @@ class CollectData(object):
if click_topic_tag_list: if click_topic_tag_list:
if len(click_topic_tag_list)>0: if len(click_topic_tag_list)>0:
recommend_topic_id_list_click = ESPerform.get_tag_topic_list(click_topic_tag_list, recommend_topic_id_list_click = ESPerform.get_tag_topic_list(click_topic_tag_list,
have_read_topic_id_list) have_read_topic_id_list,size=2)
if len(recommend_topic_id_list_click) > 0: if len(recommend_topic_id_list_click) > 0:
num = min(len(recommend_topic_id_list_click), 2) recommend_topic_id_list.extend(recommend_topic_id_list_click)
logging.info("recommend_topic_id_list:%s" % (str(num))) have_read_topic_id_list.extend(recommend_topic_id_list)
for i in range(0,num): click_recommend_redis_key = self.click_recommend_redis_key_prefix + str(device_id)
recommend_topic_id_list.append(recommend_topic_id_list_click[i]) click_redis_data_dict = {
have_read_topic_id_list.extend(recommend_topic_id_list) "data": json.dumps(recommend_topic_id_list),
click_recommend_redis_key = self.click_recommend_redis_key_prefix + str(device_id) "cursor": 0
click_redis_data_dict = { }
"data": json.dumps(recommend_topic_id_list), redis_client.hmset(click_recommend_redis_key, click_redis_data_dict)
"cursor": 0
}
redis_client.hmset(click_recommend_redis_key, click_redis_data_dict)
total_topic_list = list()
tag_topic_id_list = list()
tag_id_list = recommend_tag_list[0:100] tag_id_list = recommend_tag_list[0:100]
topic_recommend_redis_key = self.linucb_recommend_topic_id_prefix + str(device_id) topic_recommend_redis_key = self.linucb_recommend_topic_id_prefix + str(device_id)
...@@ -112,9 +103,16 @@ class CollectData(object): ...@@ -112,9 +103,16 @@ class CollectData(object):
if len(recommend_topic_id_list)==0 and cursor==0 and len(redis_topic_list)>0: if len(recommend_topic_id_list)==0 and cursor==0 and len(redis_topic_list)>0:
have_read_topic_id_list.extend(redis_topic_list[:2]) have_read_topic_id_list.extend(redis_topic_list[:2])
tag_topic_id_list = ESPerform.get_tag_topic_list(tag_id_list,have_read_topic_id_list) if len(tag_id_list) > 0:
if len(new_user_click_tag_list)>0:
tag_topic_id_list = ESPerform.get_tag_topic_list(new_user_click_tag_list, have_read_topic_id_list)
logging.warning("tag_topic_id_list:%s" % str(new_user_click_tag_list))
logging.warning("tag_id_list:%s" % str(tag_id_list))
else:
tag_topic_id_list = ESPerform.get_tag_topic_list(tag_id_list,have_read_topic_id_list)
if len(recommend_topic_id_list)>0: if len(recommend_topic_id_list)>0 or len(new_user_click_tag_list) > 0:
tag_topic_id_list = recommend_topic_id_list + tag_topic_id_list tag_topic_id_list = recommend_topic_id_list + tag_topic_id_list
redis_data_dict = { redis_data_dict = {
"data": json.dumps(tag_topic_id_list), "data": json.dumps(tag_topic_id_list),
...@@ -165,8 +163,8 @@ class CollectData(object): ...@@ -165,8 +163,8 @@ class CollectData(object):
tag_list = list() tag_list = list()
click_topic_tag_list = list() click_topic_tag_list = list()
collection_tag_sql_query_results = TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=topic_id).values_list("tag_id","is_online","is_collection") collection_tag_sql_query_results = TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=topic_id).values_list("tag_id","is_online","is_collection")
if len(collection_tag_sql_query_results)>0: # if len(collection_tag_sql_query_results)>0:
for tag_id,is_online,is_collection in collection_tag_sql_query_results: for tag_id,is_online,is_collection in collection_tag_sql_query_results:
if is_online and is_collection == 1: if is_online and is_collection == 1:
click_topic_tag_list.append(tag_id) click_topic_tag_list.append(tag_id)
...@@ -189,8 +187,8 @@ class CollectData(object): ...@@ -189,8 +187,8 @@ class CollectData(object):
reward = 1 if is_click or is_vote else 0 reward = 1 if is_click or is_vote else 0
logging.info("positive tag_list,device_id:%s,topic_id:%s,tag_list:%s" % ( logging.info("positive tag_list,device_id:%s,topic_id:%s,tag_list:%s" % (
str(device_id), str(topic_id), str(tag_list))) str(device_id), str(topic_id), str(click_topic_tag_list)))
for tag_id in tag_list: for tag_id in click_topic_tag_list:
self.update_user_linucb_tag_info(reward, device_id, tag_id, user_feature) self.update_user_linucb_tag_info(reward, device_id, tag_id, user_feature)
# 更新该用户的推荐tag数据,放在 更新完成user tag行为信息之后 # 更新该用户的推荐tag数据,放在 更新完成user tag行为信息之后
...@@ -204,7 +202,7 @@ class CollectData(object): ...@@ -204,7 +202,7 @@ class CollectData(object):
exposure_cards_list = list() exposure_cards_list = list()
device_id = raw_val_dict["device"]["device_id"] device_id = raw_val_dict["device"]["device_id"]
user_id = raw_val_dict["user_id"] if "user_id" in raw_val_dict else None user_id = raw_val_dict["user_id"] if "user_id" in raw_val_dict else None
logging.warning("type msg:%s" % raw_val_dict.get("type"))
exposure_topic_id_list = list() exposure_topic_id_list = list()
for item in exposure_cards_list: for item in exposure_cards_list:
if "card_id" not in item: if "card_id" not in item:
...@@ -217,14 +215,17 @@ class CollectData(object): ...@@ -217,14 +215,17 @@ class CollectData(object):
topic_tag_id_dict = dict() topic_tag_id_dict = dict()
tag_list = list() tag_list = list()
exposure_sql_query_results = TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(topic_id__in=exposure_topic_id_list).values_list("topic_id","tag_id","is_online") exposure_sql_query_results = TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(topic_id__in=exposure_topic_id_list).values_list("topic_id","tag_id","is_online","is_collection")
for topic_id,tag_id,is_online in exposure_sql_query_results: # if len(exposure_sql_query_results)>0:
for topic_id,tag_id,is_online,is_collection in exposure_sql_query_results:
if is_online and is_collection == 1:
tag_list.append(tag_id)
if is_online: if is_online:
# tag_sql_query_results = Tag.objects.using(settings.SLAVE_DB_NAME).filter( tag_sql_query_results = Tag.objects.using(settings.SLAVE_DB_NAME).filter(
# id=tag_id).values_list("id", "collection", "is_ai") id=tag_id).values_list("id", "collection", "is_ai")
# for id, collection, is_ai in tag_sql_query_results: for id, collection, is_ai in tag_sql_query_results:
# if collection == 1 or is_ai == 1: if (is_ai == 1) and id not in tag_list:
tag_list.append(tag_id) tag_list.append(id)
if topic_id not in topic_tag_id_dict: if topic_id not in topic_tag_id_dict:
topic_tag_id_dict[topic_id] = list() topic_tag_id_dict[topic_id] = list()
...@@ -242,6 +243,31 @@ class CollectData(object): ...@@ -242,6 +243,31 @@ class CollectData(object):
# 更新该用户的推荐tag数据,放在 更新完成user tag行为信息之后 # 更新该用户的推荐tag数据,放在 更新完成user tag行为信息之后
self.update_recommend_tag_list(device_id, user_feature, user_id) self.update_recommend_tag_list(device_id, user_feature, user_id)
elif "type" in raw_val_dict and "interest_choice_click_next" == raw_val_dict["type"]:
if isinstance(raw_val_dict["params"]["tagid_list"],str):
tagid_list = json.loads(raw_val_dict["params"]["tagid_list"])
elif isinstance(raw_val_dict["params"]["tagid_list"],list):
tagid_list = raw_val_dict["params"]["tagid_list"]
else:
tagid_list = list()
logging.warning("unknown type msg:%s" % raw_val_dict.get("type", "missing type"))
logging.info(
"consume click topic_id:%s,device_id:%s" % (
str(tagid_list), str(device_id)))
device_id = raw_val_dict["device"]["device_id"]
user_id = raw_val_dict["user_id"] if "user_id" in raw_val_dict else None
# if len(exposure_sql_query_results)>0:
if len(tagid_list) > 0:
is_click = 1
is_vote = 0
reward = 1 if is_click or is_vote else 0
for tag_id in tagid_list:
self.update_user_linucb_tag_info(reward, device_id, tag_id, user_feature)
# 更新该用户的推荐tag数据,放在 更新完成user tag行为信息之后
self.update_recommend_tag_list(device_id, user_feature, user_id,new_user_click_tag_list=tagid_list)
else: else:
logging.warning("unknown type msg:%s" % raw_val_dict.get("type", "missing type")) logging.warning("unknown type msg:%s" % raw_val_dict.get("type", "missing type"))
except: except:
......
...@@ -124,7 +124,6 @@ class TopicUtils(object): ...@@ -124,7 +124,6 @@ class TopicUtils(object):
filter_topic_id_list=[],test_score=False,must_topic_id_list=[],recommend_tag_list=[], filter_topic_id_list=[],test_score=False,must_topic_id_list=[],recommend_tag_list=[],
user_similar_score_list=[],index_type="topic",routing=None,attention_tag_list=[]): user_similar_score_list=[],index_type="topic",routing=None,attention_tag_list=[]):
""" """
:需增加打散逻辑
:remark:获取首页推荐帖子列表 :remark:获取首页推荐帖子列表
:param user_id: :param user_id:
:param offset: :param offset:
...@@ -197,25 +196,6 @@ class TopicUtils(object): ...@@ -197,25 +196,6 @@ class TopicUtils(object):
"weight": 30, "weight": 30,
} }
) )
# if len(pick_user_id_list) > 0:
# functions_list.append(
# {
# "filter": {"bool": {
# "should": {"terms": {"user_id": pick_user_id_list}}}},
# "weight": 2
# }
# )
# if len(same_pictorial_id_list) > 0:
# functions_list.append(
# {
# "filter": {"bool": {
# "should": {"terms": {"user_id": same_pictorial_id_list}}}},
# "weight": 1
# }
# )
# query_tag_term_list = cls.___get_should_term_list(user_tag_list)
if len(attention_tag_list) > 0: if len(attention_tag_list) > 0:
functions_list.append( functions_list.append(
{ {
...@@ -224,40 +204,11 @@ class TopicUtils(object): ...@@ -224,40 +204,11 @@ class TopicUtils(object):
"weight": 100 "weight": 100
} }
) )
# if len(recommend_tag_list)>0:
# if len(recommend_tag_list)>1:
# functions_list += [
# {
# "filter": {"term": {"tag_list": recommend_tag_list[0]}},
# "weight": 4
# },
# {
# "filter": {"terms": {"tag_list": recommend_tag_list[1:]}},
# "weight": 3
# }
# ]
# else:
# functions_list.append(
# {
# "filter": {"terms": {"tag_list": recommend_tag_list}},
# "weight": 3
# }
# )
# for tag_id in recommend_tag_dict:
# functions_list.append(
# {
# "filter": {"term": {"tag_list": tag_id}},
# "weight": recommend_tag_dict[tag_id]
# }
# )
# low_content_level = 4 if query_type == TopicPageType.FIND_PAGE else 3
query_function_score = { query_function_score = {
"query": { "query": {
"bool": { "bool": {
"filter": [ "filter": [
{"range": {"content_level": {"gte": 4, "lte": 5}}}, {"range": {"content_level": {"gte": 4, "lte": 6}}},
# {"term": {"has_image":True}}, # {"term": {"has_image":True}},
{"term": {"is_online": True}}, {"term": {"is_online": True}},
{"term": {"is_deleted": False}} {"term": {"is_deleted": False}}
...@@ -292,11 +243,9 @@ class TopicUtils(object): ...@@ -292,11 +243,9 @@ class TopicUtils(object):
} }
} }
if len(filter_topic_id_list) > 0: if len(filter_topic_id_list) > 0:
query_function_score["query"]["bool"]["must_not"] = { query_function_score["query"]["bool"]["must_not"] = [
"terms": { {"terms":{"id":filter_topic_id_list}}
"id": filter_topic_id_list ]
}
}
if query is not None: # 搜索帖子 if query is not None: # 搜索帖子
multi_fields = { multi_fields = {
...@@ -318,7 +267,15 @@ class TopicUtils(object): ...@@ -318,7 +267,15 @@ class TopicUtils(object):
{"term": {"tag_list": tag_id}} {"term": {"tag_list": tag_id}}
] ]
query_function_score["query"]["bool"]["minimum_should_match"] = 1 query_function_score["query"]["bool"]["minimum_should_match"] = 1
else:
if "must_not" in query_function_score["query"]["bool"]:
query_function_score["query"]["bool"]["must_not"] += [
{"term": {"is_operation_home_recommend": True}}
]
else:
query_function_score["query"]["bool"]["must_not"] = [
{"term": {"is_operation_home_recommend": True}}
]
q["query"]["function_score"] = query_function_score q["query"]["function_score"] = query_function_score
q["collapse"] = { q["collapse"] = {
"field": "user_id" "field": "user_id"
...@@ -350,37 +307,9 @@ class TopicUtils(object): ...@@ -350,37 +307,9 @@ class TopicUtils(object):
offset=offset, size=size,routing=routing) offset=offset, size=size,routing=routing)
topic_id_list = list() topic_id_list = list()
same_group_id_set = set()
same_user_id_set = set()
for item in result_dict["hits"]: for item in result_dict["hits"]:
topic_id_list.append(item["_source"]["id"]) topic_id_list.append(item["_source"]["id"])
# for item in result_dict["hits"]:
# if item["_source"]["group_id"]>0 and item["_source"]["group_id"] not in same_group_id_set:
# same_group_id_set.add(item["_source"]["id"])
# topic_id_list.append(item["_source"]["id"])
# else:
# same_group_id_set.add(item["_source"]["id"])
#
# if item["_source"]["user_id"] not in same_user_id_set:
# same_user_id_set.add(item["_source"]["id"])
# topic_id_list.append(item["_source"]["id"])
# else:
# same_user_id_set.add(item["_source"]["id"])
#
# if len(topic_id_list) >= single_size:
# break
#
# if len(topic_id_list) < single_size:
# for topic_id in same_group_id_set:
# topic_id_list.append(topic_id)
# if len(topic_id_list)>=single_size:
# break
# for topic_id in same_user_id_set:
# topic_id_list.append(topic_id)
# if len(topic_id_list)>=single_size:
# break
logging.info("topic_id_list:%s,attention_tag_list%s" % (str(topic_id_list),str(attention_tag_list)))
return topic_id_list return topic_id_list
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
......
...@@ -134,10 +134,10 @@ def choice_push_tag(device_id, user_id): ...@@ -134,10 +134,10 @@ def choice_push_tag(device_id, user_id):
if len(unread_tag_list)>0: if len(unread_tag_list)>0:
for tag_id in unread_tag_list: for tag_id in unread_tag_list:
valid_tag_topic_num = TopicTag.objects.filter(tag_id=tag_id,is_online=True).count() valid_tag_topic_num = TopicTag.objects.filter(tag_id=tag_id,is_online=True).count()
if valid_tag_topic_num>100: if valid_tag_topic_num>0:
ret_tag_set.add(tag_id) ret_tag_set.add(tag_id)
redis_push_tag_dict[tag_id] = now_sec redis_push_tag_dict[tag_id] = now_sec
if len(ret_tag_set)>=2: if len(ret_tag_set)>=1:
break break
redis_client.set(redis_push_tag_key, json.dumps(redis_push_tag_dict)) redis_client.set(redis_push_tag_key, json.dumps(redis_push_tag_dict))
......
...@@ -30,7 +30,7 @@ def get_discover_page_topic_ids(user_id, device_id, size, query_type=TopicPageTy ...@@ -30,7 +30,7 @@ def get_discover_page_topic_ids(user_id, device_id, size, query_type=TopicPageTy
recommend_topic_ids = TopicUtils.get_recommend_topic_ids(user_id=user_id, tag_id=0, offset=0, size=size,single_size=size, recommend_topic_ids = TopicUtils.get_recommend_topic_ids(user_id=user_id, tag_id=0, offset=0, size=size,single_size=size,
query_type=query_type, query_type=query_type,
filter_topic_id_list=have_read_topic_id_list,index_type="topic",routing="4,5") filter_topic_id_list=have_read_topic_id_list,index_type="topic",routing="4,5,6")
have_read_topic_id_list.extend(recommend_topic_ids) have_read_topic_id_list.extend(recommend_topic_ids)
...@@ -63,46 +63,14 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query ...@@ -63,46 +63,14 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
redis_field_list = [b'have_read_topic_list'] redis_field_list = [b'have_read_topic_list']
redis_field_val_list = redis_client.hmget(redis_key, redis_field_list) redis_field_val_list = redis_client.hmget(redis_key, redis_field_list)
topic_recommend_redis_key = "physical:linucb:topic_recommend:device_id:" + str(device_id) # 获取已读帖子
# click_recommend_redis_key = "physical:click_recommend:device_id:" + str(device_id)
# recommend_tag_dict = dict()
# tag_recommend_val = redis_client.get(tag_recommend_redis_key)
# if tag_recommend_val:
# recommend_tag_dict = json.loads(str(tag_recommend_val, encoding="utf-8"))
recommend_topic_list=list()
recommend_topic_dict = redis_client.hgetall(topic_recommend_redis_key)
if b"data" in recommend_topic_dict:
recommend_topic_id_list = json.loads(recommend_topic_dict[b"data"])
cursor = int(str(recommend_topic_dict[b"cursor"], encoding="utf-8"))
newcursor = cursor + 6
if len(recommend_topic_id_list) > newcursor:
recommend_topic_list = recommend_topic_id_list[cursor:newcursor]
redis_client.hset(topic_recommend_redis_key,"cursor",newcursor)
# click_recommend_topic_id_list = list()
# click_recommend_topic_list = list()
#
# click_recommend_topic_dict = redis_client.hgetall(click_recommend_redis_key)
# if b"data" in click_recommend_topic_dict:
# click_recommend_topic_id_list = json.loads(click_recommend_topic_dict[b"data"])
# cursor = int(str(click_recommend_topic_dict[b"cursor"], encoding="utf-8"))
# newcursor = cursor + 2
# if newcursor < 4 and len(click_recommend_topic_id_list) ==2:
# for i in range(0,2):
# click_recommend_topic_list.append(click_recommend_topic_id_list[i])
# redis_client.hset(click_recommend_redis_key, "cursor", newcursor)
# combine_recommend_topic_list_tmp = click_recommend_topic_list.extend(recommend_topic_list)
# combine_recommend_topic_list = combine_recommend_topic_list_tmp[0:5]
have_read_topic_id_list = list() have_read_topic_id_list = list()
if redis_field_val_list[0]: if redis_field_val_list[0]:
if query is None: if query is None:
have_read_topic_id_list = list(json.loads(redis_field_val_list[0])) have_read_topic_id_list = list(json.loads(redis_field_val_list[0]))
else: else:
if offset>0: if offset>0: # 首次搜索时不需要过滤已读
have_read_topic_id_list = list(json.loads(redis_field_val_list[0])) have_read_topic_id_list = list(json.loads(redis_field_val_list[0]))
user_similar_score_redis_key = "physical:user_similar_score:user_id:" + str(user_id) user_similar_score_redis_key = "physical:user_similar_score:user_id:" + str(user_id)
...@@ -110,31 +78,45 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query ...@@ -110,31 +78,45 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
user_similar_score_redis_list = json.loads( user_similar_score_redis_list = json.loads(
redis_user_similar_score_redis_val) if redis_user_similar_score_redis_val else [] redis_user_similar_score_redis_val) if redis_user_similar_score_redis_val else []
attention_tag_list = list()
recommend_topic_list = list()
if query is None:
# linucb 推荐帖子
topic_recommend_redis_key = "physical:linucb:topic_recommend:device_id:" + str(device_id)
recommend_topic_dict = redis_client.hgetall(topic_recommend_redis_key)
if b"data" in recommend_topic_dict:
recommend_topic_id_list = json.loads(recommend_topic_dict[b"data"])
# 推荐帖子是强插的,要保证推荐帖子不在已读里
recommend_topic_id_list = list(set(recommend_topic_id_list) - set(have_read_topic_id_list))
cursor = int(str(recommend_topic_dict[b"cursor"], encoding="utf-8"))
newcursor = cursor + 6
if len(recommend_topic_id_list) > newcursor:
recommend_topic_list = recommend_topic_id_list[cursor:newcursor]
redis_client.hset(topic_recommend_redis_key, "cursor", newcursor)
# 用户关注标签
redis_tag_data = redis_client.hget("physical:linucb:register_user_tag_info", user_id)
attention_tag_list = json.loads(redis_tag_data) if redis_tag_data else []
if len(recommend_topic_list)>0:
size = size-len(recommend_topic_list)
have_read_topic_id_list.extend(recommend_topic_list)
# have_read_topic_id_list_add_promote = list()
# have_read_topic_id_list_add_promote.extend(have_read_topic_id_list)
# promote_recommend_topic_id_list = TopicHomeRecommend.objects.using(settings.SLAVE_DB_NAME).filter(
# is_online=1).values_list("topic_id",flat=True)
#
# for topic_id in promote_recommend_topic_id_list:
# have_read_topic_id_list_add_promote.append(topic_id)
redis_tag_data = redis_client.hget("physical:linucb:register_user_tag_info", user_id)
attention_tag_list = json.loads(redis_tag_data) if redis_tag_data else []
logging.info("attention_tag_list:%s"%(str(attention_tag_list)))
if len(recommend_topic_list)>0:
size = size-len(recommend_topic_list)
have_read_topic_id_list.extend(recommend_topic_list)
have_read_topic_id_list_add_promote = list()
promote_recommend_topic_id_list = list()
have_read_topic_id_list_add_promote.extend(have_read_topic_id_list)
promote_recommend_topic_id_list = TopicHomeRecommend.objects.using(settings.SLAVE_DB_NAME).filter(
is_online=1).values_list("topic_id",flat=True)
if len(promote_recommend_topic_id_list)>0:
for topic_id in promote_recommend_topic_id_list:
have_read_topic_id_list_add_promote.append(topic_id)
topic_id_list = list() topic_id_list = list()
rank_topic_id_list = TopicUtils.get_recommend_topic_ids(user_id=user_id, tag_id=tag_id, offset=0, size=size,
rank_topic_id_list = TopicUtils.get_recommend_topic_ids(user_id=user_id, tag_id=tag_id, offset=offset, size=size,
single_size=size,query=query, query_type=query_type, single_size=size,query=query, query_type=query_type,
filter_topic_id_list=have_read_topic_id_list_add_promote, filter_topic_id_list=have_read_topic_id_list,
recommend_tag_list=recommend_topic_list, user_similar_score_list=user_similar_score_redis_list,index_type="topic",routing="4,5,6",attention_tag_list=attention_tag_list)
user_similar_score_list=user_similar_score_redis_list,index_type="topic",routing="4,5",attention_tag_list=attention_tag_list)
if (len(recommend_topic_list) == 6): if len(recommend_topic_list) == 6 and query is None:
if (size < 11): if (size < 11):
topic_id_list.extend(rank_topic_id_list[0:3]) topic_id_list.extend(rank_topic_id_list[0:3])
topic_id_list.extend(recommend_topic_list[0:3]) topic_id_list.extend(recommend_topic_list[0:3])
...@@ -148,49 +130,6 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query ...@@ -148,49 +130,6 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
else: else:
topic_id_list.extend(rank_topic_id_list) topic_id_list.extend(rank_topic_id_list)
have_read_group_id_set = set()
have_read_user_id_set = set()
unread_topic_id_dict = dict()
logging.info("attention_tag_list:%s"%(str(topic_id_list)))
# # 当前页小组数量
# cur_page_group_num = 0
# # 当前页用户数量
# cur_page_user_num = 0
#
# for topic_id in topic_id_dict:
# if topic_id_dict[topic_id][0] in have_read_group_id_set or topic_id_dict[topic_id][
# 1] in have_read_user_id_set:
# unread_topic_id_dict[topic_id] = topic_id_dict[topic_id]
# else:
# if isinstance(topic_id_dict[topic_id][0], int) and topic_id_dict[topic_id][
# 0] > 0 and cur_page_group_num < (size * 0.9):
# have_read_group_id_set.add(topic_id_dict[topic_id][0])
# have_read_user_id_set.add(topic_id_dict[topic_id][1])
# have_read_topic_id_list.append(topic_id)
# cur_page_group_num += 1
# recommend_topic_ids.append(topic_id)
# elif topic_id_dict[topic_id] and cur_page_user_num < (size * 0.1):
# have_read_user_id_set.add(topic_id_dict[topic_id][1])
# cur_page_user_num += 1
# recommend_topic_ids.append(topic_id)
# have_read_topic_id_list.append(topic_id)
# else:
# unread_topic_id_dict[topic_id] = topic_id_dict[topic_id]
#
# if len(recommend_topic_ids) >= size:
# break
# if len(recommend_topic_ids) < size and len(unread_topic_id_dict) > 0:
# for unread_topic_id in unread_topic_id_dict:
# if len(recommend_topic_ids) < size:
# recommend_topic_ids.append(unread_topic_id)
# have_read_topic_id_list.append(unread_topic_id)
# else:
# break
# topic_id_list.extend(recommend_topic_list)
# recommend_topic_list.extend(topic_id_list)
have_read_topic_id_list.extend(topic_id_list) have_read_topic_id_list.extend(topic_id_list)
if len(have_read_topic_id_list) > 30000: if len(have_read_topic_id_list) > 30000:
cut_len = len(have_read_topic_id_list)-30000 cut_len = len(have_read_topic_id_list)-30000
...@@ -305,7 +244,7 @@ def home_query(device_id="", tag_id=-1, user_id=-1, query="", offset=0, size=10) ...@@ -305,7 +244,7 @@ def home_query(device_id="", tag_id=-1, user_id=-1, query="", offset=0, size=10)
if not isinstance(device_id, str): if not isinstance(device_id, str):
device_id = "" device_id = ""
recommend_topic_ids = get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query) recommend_topic_ids = get_home_recommend_topic_ids(user_id, device_id, tag_id, offset=offset, size=size, query=query)
return {"recommend_topic_ids": recommend_topic_ids} return {"recommend_topic_ids": recommend_topic_ids}
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
...@@ -339,7 +278,7 @@ def topic_detail_page_recommend(device_id="", user_id=-1, topic_id=-1, topic_pic ...@@ -339,7 +278,7 @@ def topic_detail_page_recommend(device_id="", user_id=-1, topic_id=-1, topic_pic
result_list = TopicUtils.get_topic_detail_recommend_list(user_id, topic_id, topic_tag_list, topic_pictorial_id, result_list = TopicUtils.get_topic_detail_recommend_list(user_id, topic_id, topic_tag_list, topic_pictorial_id,
topic_user_id, filter_topic_user_id, topic_user_id, filter_topic_user_id,
have_read_topic_list, offset, size, es_cli_obj,index_type="topic",routing="4,5") have_read_topic_list, offset, size, es_cli_obj,index_type="topic",routing="4,5,6")
recommend_topic_ids_list = list() recommend_topic_ids_list = list()
if len(result_list) > 0: if len(result_list) > 0:
recommend_topic_ids_list = [item["_source"]["id"] for item in result_list] recommend_topic_ids_list = [item["_source"]["id"] for item in result_list]
...@@ -414,7 +353,7 @@ def query_topic_by_user_similarity(topic_similarity_score_dict, offset=0, size=1 ...@@ -414,7 +353,7 @@ def query_topic_by_user_similarity(topic_similarity_score_dict, offset=0, size=1
must_topic_id_list = list(topic_similarity_score_dict.keys()) must_topic_id_list = list(topic_similarity_score_dict.keys())
topic_id_list = TopicUtils.get_recommend_topic_ids(tag_id=0, user_id=-1, offset=offset, size=size,single_size=size, topic_id_list = TopicUtils.get_recommend_topic_ids(tag_id=0, user_id=-1, offset=offset, size=size,single_size=size,
must_topic_id_list=must_topic_id_list,index_type="topic",routing="4,5") must_topic_id_list=must_topic_id_list,index_type="topic",routing="4,5,6")
return {"recommend_topic_ids": topic_id_list} return {"recommend_topic_ids": topic_id_list}
except: except:
......
...@@ -51,6 +51,7 @@ ...@@ -51,6 +51,7 @@
"analyzer": "gm_default_index", "analyzer": "gm_default_index",
"search_analyzer": "gm_default_index" "search_analyzer": "gm_default_index"
}, },
"is_excellent":{"type": "long"} "is_excellent":{"type": "long"},
"is_operation_home_recommend": {"type": "boolean"} //是否首页运营推荐
} }
} }
...@@ -51,6 +51,7 @@ ...@@ -51,6 +51,7 @@
"analyzer": "gm_default_index", "analyzer": "gm_default_index",
"search_analyzer": "gm_default_index" "search_analyzer": "gm_default_index"
}, },
"is_excellent":{"type": "long"} "is_excellent":{"type": "long"},
"is_operation_home_recommend": {"type": "boolean"} //是否首页运营推荐
} }
} }
{ {
"dynamic":"strict", "dynamic":"strict",
"_routing": {"required": true},
"properties": { "properties": {
"id":{"type":"long"}, "id":{"type":"long"},
"is_online":{"type":"boolean"},//上线 "is_online":{"type":"boolean"},//上线
"is_deleted":{"type":"boolean"}, "is_deleted":{"type":"boolean"},
"vote_num":{"type":"long"}, "vote_num":{"type":"long"},
"total_vote_num":{"type":"long","default":0}, "total_vote_num":{"type":"long"},
"reply_num":{"type":"long"}, "reply_num":{"type":"long"},
"name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"}, "name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"description":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"}, "description":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
...@@ -50,6 +51,7 @@ ...@@ -50,6 +51,7 @@
"analyzer": "gm_default_index", "analyzer": "gm_default_index",
"search_analyzer": "gm_default_index" "search_analyzer": "gm_default_index"
}, },
"is_excellent":{"type": "long"} "is_excellent":{"type": "long"},
"is_operation_home_recommend": {"type": "boolean"} //是否首页运营推荐
} }
} }
...@@ -51,6 +51,7 @@ ...@@ -51,6 +51,7 @@
"analyzer": "gm_default_index", "analyzer": "gm_default_index",
"search_analyzer": "gm_default_index" "search_analyzer": "gm_default_index"
}, },
"is_excellent":{"type": "long"} "is_excellent":{"type": "long"},
"is_operation_home_recommend": {"type": "boolean"} //是否首页运营推荐
} }
} }
...@@ -197,8 +197,8 @@ class Topic(models.Model): ...@@ -197,8 +197,8 @@ class Topic(models.Model):
offline_score += 6.0 offline_score += 6.0
elif self.content_level == '4': elif self.content_level == '4':
offline_score += 5.0 offline_score += 5.0
elif self.content_level == '3': elif self.content_level == '6':
offline_score += 2.0 offline_score += 100.0
# exposure_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=1).count() # exposure_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=1).count()
# click_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=2).count() # click_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=2).count()
...@@ -289,4 +289,5 @@ class TopicHomeRecommend(models.Model): ...@@ -289,4 +289,5 @@ class TopicHomeRecommend(models.Model):
db_table = "topic_home_recommend" db_table = "topic_home_recommend"
id = models.IntegerField(verbose_name=u"id",primary_key=True) id = models.IntegerField(verbose_name=u"id",primary_key=True)
topic_id = models.IntegerField(verbose_name=u"帖子ID") topic_id = models.IntegerField(verbose_name=u"帖子ID")
is_online = models.BooleanField(verbose_name=u'是否上线') is_online = models.BooleanField(verbose_name=u'是否上线')
\ No newline at end of file is_deleted = models.BooleanField(verbose_name=u'是否删除')
...@@ -9,7 +9,7 @@ import time ...@@ -9,7 +9,7 @@ import time
import re import re
import datetime import datetime
from trans2es.models.user import User from trans2es.models.user import User
from trans2es.models.topic import ExcellentTopic from trans2es.models.topic import ExcellentTopic,TopicHomeRecommend
class TopicTransfer(object): class TopicTransfer(object):
...@@ -121,6 +121,11 @@ class TopicTransfer(object): ...@@ -121,6 +121,11 @@ class TopicTransfer(object):
else: else:
res["is_excellent"] = 0 res["is_excellent"] = 0
res["is_operation_home_recommend"] = False
operation_home_recommend = TopicHomeRecommend.objects.filter(topic_id=instance.id).first()
if operation_home_recommend and operation_home_recommend.is_online and not operation_home_recommend.is_deleted:
res["is_operation_home_recommend"] = True
logging.info("test topic transfer time cost,time0:%d,time1:%d,time2:%d,time3:%d,time4:%d" % (time0,time1,time2,time3,time4)) logging.info("test topic transfer time cost,time0:%d,time1:%d,time2:%d,time3:%d,time4:%d" % (time0,time1,time2,time3,time4))
return res return res
except: except:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment