Commit ac6c6ee7 authored by Kai's avatar Kai

Merge branch 'master' into hk

parents 5c539fd7 24529ca5
......@@ -20,6 +20,8 @@ def write_to_es(es_type, pk_list, use_batch_query_set=False):
if es_type == "register_user_tag":
RegisterUserTag.get_register_user_tag(pk_list)
elif es_type == "attention_user_tag":
RegisterUserTag.get_user_attention_tag(pk_list)
else:
type_info_map = get_type_info_map()
type_info = type_info_map[es_type]
......
......@@ -264,7 +264,7 @@ class ESPerform(object):
return True
@classmethod
def get_tag_topic_list(cls,tag_id,have_read_topic_id_list):
def get_tag_topic_list(cls,tag_id,have_read_topic_id_list,size=100):
try:
functions_list = list()
for id in tag_id:
......@@ -274,13 +274,27 @@ class ESPerform(object):
"weight": 1
}
)
functions_list += [
{
"filter": {"term": {"content_level": 6}},
"weight": 6000
},
{
"filter": {"term": {"content_level": 5}},
"weight": 5000
},
{
"filter": {"term": {"content_level": 4}},
"weight": 4000
}
]
q = {
"query":{
"function_score":{
"query": {
"bool": {
"must": [
{"range": {"content_level": {"gte": 3, "lte": 5}}},
{"range": {"content_level": {"gte": 4, "lte": 6}}},
{"term": {"is_online": True}},
{"term": {"is_deleted": False}},
{"terms": {"tag_list": tag_id}}
......@@ -308,7 +322,7 @@ class ESPerform(object):
}
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic", query_body=q,
offset=0, size=100,routing="3,4,5")
offset=0, size=size,routing="4,5,6")
topic_id_list = [item["_source"]["id"] for item in result_dict["hits"]]
logging.info("topic_id_list:%s"%str(topic_id_list))
......
......@@ -9,7 +9,7 @@ import traceback
import json
import pickle
from django.conf import settings
from trans2es.models.tag import AccountUserTag
from trans2es.models.tag import AccountUserTag,CommunityTagFollow
from libs.es import ESPerform
import libs.tools as Tools
from search.utils.common import *
......@@ -30,8 +30,40 @@ class RegisterUserTag(object):
linucb_user_id_register_tag_topic_id_prefix = "physical:linucb:register_tag_topic_recommend:user_id:"
linucb_register_user_tag_key = "physical:linucb:register_user_tag_info"
@classmethod
def get_user_attention_tag(cls, pk_list):
"""
:remark 获取用户关注标签
:param pk_list:
:return:
"""
try:
user_id_dict = dict()
query_results = CommunityTagFollow.objects.filter(pk__in=pk_list,is_deleted=False,is_online=True)
for item in query_results:
tag_id = item.tag_id
user_id = item.user_id
user_tag_list = CommunityTagFollow.objects.filter(user=user_id,is_deleted=False,is_online=True).values_list("tag_id", flat=True)
user_id_dict[user_id] = user_tag_list
for user_id in user_id_dict:
redis_user_tag_id_data = redis_client.hget(cls.linucb_register_user_tag_key, user_id)
redis_user_tag_id_list = json.loads(redis_user_tag_id_data) if redis_user_tag_id_data else []
redis_user_tag_id_list.extend(user_id_dict[user_id])
redis_client.hset(cls.linucb_register_user_tag_key, user_id, json.dumps(list(set(redis_user_tag_id_list))))
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
@classmethod
def get_register_user_tag(cls,pk_list):
"""
:remark 用户注册时选的标签
:param pk_list:
:return:
"""
try:
# user_id_set = set()
user_id_dict = dict()
......@@ -43,7 +75,10 @@ class RegisterUserTag(object):
user_id_dict[user_id] = user_tag_list
for user_id in user_id_dict:
redis_client.hset(cls.linucb_register_user_tag_key, user_id, json.dumps(list(user_id_dict[user_id])))
redis_user_tag_id_data = redis_client.hget(cls.linucb_register_user_tag_key, user_id)
redis_user_tag_id_list = json.loads(redis_user_tag_id_data) if redis_user_tag_id_data else []
redis_user_tag_id_list.extend(user_id_dict[user_id])
redis_client.hset(cls.linucb_register_user_tag_key, user_id, json.dumps(list(set(redis_user_tag_id_list))))
# if user_id not in user_id_set:
# user_id_set.add(user_id)
......
......@@ -54,9 +54,6 @@ class CollectData(object):
def update_recommend_tag_list(self, device_id,user_feature=None,user_id=None,click_topic_tag_list=None):
try:
recommend_tag_set = set()
recommend_tag_list = list()
recommend_tag_dict = dict()
redis_linucb_tag_data_dict = self._get_user_linucb_info(device_id)
if len(redis_linucb_tag_data_dict) == 0:
recommend_tag_list = LinUCB.get_default_tag_list(user_id)
......@@ -82,22 +79,17 @@ class CollectData(object):
if click_topic_tag_list:
if len(click_topic_tag_list)>0:
recommend_topic_id_list_click = ESPerform.get_tag_topic_list(click_topic_tag_list,
have_read_topic_id_list)
have_read_topic_id_list,size=2)
if len(recommend_topic_id_list_click) > 0:
num = min(len(recommend_topic_id_list_click), 2)
logging.info("recommend_topic_id_list:%s" % (str(num)))
for i in range(0,num):
recommend_topic_id_list.append(recommend_topic_id_list_click[i])
have_read_topic_id_list.extend(recommend_topic_id_list)
click_recommend_redis_key = self.click_recommend_redis_key_prefix + str(device_id)
click_redis_data_dict = {
"data": json.dumps(recommend_topic_id_list),
"cursor": 0
}
redis_client.hmset(click_recommend_redis_key, click_redis_data_dict)
total_topic_list = list()
tag_topic_id_list = list()
recommend_topic_id_list.extend(recommend_topic_id_list_click)
have_read_topic_id_list.extend(recommend_topic_id_list)
click_recommend_redis_key = self.click_recommend_redis_key_prefix + str(device_id)
click_redis_data_dict = {
"data": json.dumps(recommend_topic_id_list),
"cursor": 0
}
redis_client.hmset(click_recommend_redis_key, click_redis_data_dict)
tag_id_list = recommend_tag_list[0:100]
topic_recommend_redis_key = self.linucb_recommend_topic_id_prefix + str(device_id)
......@@ -164,8 +156,8 @@ class CollectData(object):
tag_list = list()
click_topic_tag_list = list()
collection_tag_sql_query_results = TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=topic_id).values_list("tag_id","is_online","is_collection")
if len(collection_tag_sql_query_results)>0:
for tag_id,is_online,is_collection in collection_tag_sql_query_results:
# if len(collection_tag_sql_query_results)>0:
for tag_id,is_online,is_collection in collection_tag_sql_query_results:
if is_online and is_collection == 1:
click_topic_tag_list.append(tag_id)
......@@ -217,8 +209,8 @@ class CollectData(object):
topic_tag_id_dict = dict()
tag_list = list()
exposure_sql_query_results = TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(topic_id__in=exposure_topic_id_list).values_list("topic_id","tag_id","is_online","is_collection")
if len(exposure_sql_query_results)>0:
for topic_id,tag_id,is_online,is_collection in exposure_sql_query_results:
# if len(exposure_sql_query_results)>0:
for topic_id,tag_id,is_online,is_collection in exposure_sql_query_results:
if is_online and is_collection == 1:
tag_list.append(tag_id)
if is_online:
......
......@@ -196,25 +196,6 @@ class TopicUtils(object):
"weight": 30,
}
)
# if len(pick_user_id_list) > 0:
# functions_list.append(
# {
# "filter": {"bool": {
# "should": {"terms": {"user_id": pick_user_id_list}}}},
# "weight": 2
# }
# )
# if len(same_pictorial_id_list) > 0:
# functions_list.append(
# {
# "filter": {"bool": {
# "should": {"terms": {"user_id": same_pictorial_id_list}}}},
# "weight": 1
# }
# )
# query_tag_term_list = cls.___get_should_term_list(user_tag_list)
if len(attention_tag_list) > 0:
functions_list.append(
{
......@@ -223,40 +204,11 @@ class TopicUtils(object):
"weight": 100
}
)
# if len(recommend_tag_list)>0:
# if len(recommend_tag_list)>1:
# functions_list += [
# {
# "filter": {"term": {"tag_list": recommend_tag_list[0]}},
# "weight": 4
# },
# {
# "filter": {"terms": {"tag_list": recommend_tag_list[1:]}},
# "weight": 3
# }
# ]
# else:
# functions_list.append(
# {
# "filter": {"terms": {"tag_list": recommend_tag_list}},
# "weight": 3
# }
# )
# for tag_id in recommend_tag_dict:
# functions_list.append(
# {
# "filter": {"term": {"tag_list": tag_id}},
# "weight": recommend_tag_dict[tag_id]
# }
# )
# low_content_level = 4 if query_type == TopicPageType.FIND_PAGE else 3
query_function_score = {
"query": {
"bool": {
"filter": [
{"range": {"content_level": {"gte": 4, "lte": 5}}},
{"range": {"content_level": {"gte": 4, "lte": 6}}},
# {"term": {"has_image":True}},
{"term": {"is_online": True}},
{"term": {"is_deleted": False}}
......@@ -291,11 +243,9 @@ class TopicUtils(object):
}
}
if len(filter_topic_id_list) > 0:
query_function_score["query"]["bool"]["must_not"] = {
"terms": {
"id": filter_topic_id_list
}
}
query_function_score["query"]["bool"]["must_not"] = [
{"terms":{"id":filter_topic_id_list}}
]
if query is not None: # 搜索帖子
multi_fields = {
......@@ -317,7 +267,15 @@ class TopicUtils(object):
{"term": {"tag_list": tag_id}}
]
query_function_score["query"]["bool"]["minimum_should_match"] = 1
else:
if "must_not" in query_function_score["query"]["bool"]:
query_function_score["query"]["bool"]["must_not"] += [
{"term": {"is_operation_home_recommend": True}}
]
else:
query_function_score["query"]["bool"]["must_not"] = [
{"term": {"is_operation_home_recommend": True}}
]
q["query"]["function_score"] = query_function_score
q["collapse"] = {
"field": "user_id"
......
......@@ -30,7 +30,7 @@ def get_discover_page_topic_ids(user_id, device_id, size, query_type=TopicPageTy
recommend_topic_ids = TopicUtils.get_recommend_topic_ids(user_id=user_id, tag_id=0, offset=0, size=size,single_size=size,
query_type=query_type,
filter_topic_id_list=have_read_topic_id_list,index_type="topic",routing="4,5")
filter_topic_id_list=have_read_topic_id_list,index_type="topic",routing="4,5,6")
have_read_topic_id_list.extend(recommend_topic_ids)
......@@ -63,25 +63,14 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
redis_field_list = [b'have_read_topic_list']
redis_field_val_list = redis_client.hmget(redis_key, redis_field_list)
topic_recommend_redis_key = "physical:linucb:topic_recommend:device_id:" + str(device_id)
recommend_topic_list=list()
recommend_topic_dict = redis_client.hgetall(topic_recommend_redis_key)
if b"data" in recommend_topic_dict:
recommend_topic_id_list = json.loads(recommend_topic_dict[b"data"])
cursor = int(str(recommend_topic_dict[b"cursor"], encoding="utf-8"))
newcursor = cursor + 6
if len(recommend_topic_id_list) > newcursor:
recommend_topic_list = recommend_topic_id_list[cursor:newcursor]
redis_client.hset(topic_recommend_redis_key,"cursor",newcursor)
# 获取已读帖子
have_read_topic_id_list = list()
if redis_field_val_list[0]:
if query is None:
have_read_topic_id_list = list(json.loads(redis_field_val_list[0]))
else:
if offset>0:
if offset>0: # 首次搜索时不需要过滤已读
have_read_topic_id_list = list(json.loads(redis_field_val_list[0]))
user_similar_score_redis_key = "physical:user_similar_score:user_id:" + str(user_id)
......@@ -89,30 +78,44 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
user_similar_score_redis_list = json.loads(
redis_user_similar_score_redis_val) if redis_user_similar_score_redis_val else []
redis_tag_data = redis_client.hget("physical:linucb:register_user_tag_info", user_id)
attention_tag_list = json.loads(redis_tag_data) if redis_tag_data else []
logging.info("attention_tag_list:%s"%(str(attention_tag_list)))
if len(recommend_topic_list)>0:
size = size-len(recommend_topic_list)
have_read_topic_id_list.extend(recommend_topic_list)
have_read_topic_id_list_add_promote = list()
have_read_topic_id_list_add_promote.extend(have_read_topic_id_list)
promote_recommend_topic_id_list = TopicHomeRecommend.objects.using(settings.SLAVE_DB_NAME).filter(
is_online=1).values_list("topic_id",flat=True)
for topic_id in promote_recommend_topic_id_list:
have_read_topic_id_list_add_promote.append(topic_id)
recommend_topic_list = list()
if query is None:
# linucb 推荐帖子
topic_recommend_redis_key = "physical:linucb:topic_recommend:device_id:" + str(device_id)
recommend_topic_dict = redis_client.hgetall(topic_recommend_redis_key)
if b"data" in recommend_topic_dict:
recommend_topic_id_list = json.loads(recommend_topic_dict[b"data"])
# 推荐帖子是强插的,要保证推荐帖子不在已读里
recommend_topic_id_list = list(set(recommend_topic_id_list) - set(have_read_topic_id_list))
cursor = int(str(recommend_topic_dict[b"cursor"], encoding="utf-8"))
newcursor = cursor + 6
if len(recommend_topic_id_list) > newcursor:
recommend_topic_list = recommend_topic_id_list[cursor:newcursor]
redis_client.hset(topic_recommend_redis_key, "cursor", newcursor)
# 用户关注标签
redis_tag_data = redis_client.hget("physical:linucb:register_user_tag_info", user_id)
attention_tag_list = json.loads(redis_tag_data) if redis_tag_data else []
if len(recommend_topic_list)>0:
size = size-len(recommend_topic_list)
have_read_topic_id_list.extend(recommend_topic_list)
# have_read_topic_id_list_add_promote = list()
# have_read_topic_id_list_add_promote.extend(have_read_topic_id_list)
# promote_recommend_topic_id_list = TopicHomeRecommend.objects.using(settings.SLAVE_DB_NAME).filter(
# is_online=1).values_list("topic_id",flat=True)
#
# for topic_id in promote_recommend_topic_id_list:
# have_read_topic_id_list_add_promote.append(topic_id)
topic_id_list = list()
rank_topic_id_list = TopicUtils.get_recommend_topic_ids(user_id=user_id, tag_id=tag_id, offset=offset, size=size,
single_size=size,query=query, query_type=query_type,
filter_topic_id_list=have_read_topic_id_list_add_promote,
recommend_tag_list=recommend_topic_list,
user_similar_score_list=user_similar_score_redis_list,index_type="topic",routing="4,5",attention_tag_list=attention_tag_list)
filter_topic_id_list=have_read_topic_id_list,
user_similar_score_list=user_similar_score_redis_list,index_type="topic",routing="4,5,6",attention_tag_list=attention_tag_list)
if (len(recommend_topic_list) == 6):
if len(recommend_topic_list) == 6 and query is None:
if (size < 11):
topic_id_list.extend(rank_topic_id_list[0:3])
topic_id_list.extend(recommend_topic_list[0:3])
......@@ -126,8 +129,6 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
else:
topic_id_list.extend(rank_topic_id_list)
logging.info("attention_tag_list:%s"%(str(topic_id_list)))
have_read_topic_id_list.extend(topic_id_list)
if len(have_read_topic_id_list) > 30000:
cut_len = len(have_read_topic_id_list)-30000
......@@ -276,7 +277,7 @@ def topic_detail_page_recommend(device_id="", user_id=-1, topic_id=-1, topic_pic
result_list = TopicUtils.get_topic_detail_recommend_list(user_id, topic_id, topic_tag_list, topic_pictorial_id,
topic_user_id, filter_topic_user_id,
have_read_topic_list, offset, size, es_cli_obj,index_type="topic",routing="4,5")
have_read_topic_list, offset, size, es_cli_obj,index_type="topic",routing="4,5,6")
recommend_topic_ids_list = list()
if len(result_list) > 0:
recommend_topic_ids_list = [item["_source"]["id"] for item in result_list]
......@@ -351,7 +352,7 @@ def query_topic_by_user_similarity(topic_similarity_score_dict, offset=0, size=1
must_topic_id_list = list(topic_similarity_score_dict.keys())
topic_id_list = TopicUtils.get_recommend_topic_ids(tag_id=0, user_id=-1, offset=offset, size=size,single_size=size,
must_topic_id_list=must_topic_id_list,index_type="topic",routing="4,5")
must_topic_id_list=must_topic_id_list,index_type="topic",routing="4,5,6")
return {"recommend_topic_ids": topic_id_list}
except:
......
......@@ -51,6 +51,7 @@
"analyzer": "gm_default_index",
"search_analyzer": "gm_default_index"
},
"is_excellent":{"type": "long"}
"is_excellent":{"type": "long"},
"is_operation_home_recommend": {"type": "boolean"} //是否首页运营推荐
}
}
......@@ -51,6 +51,7 @@
"analyzer": "gm_default_index",
"search_analyzer": "gm_default_index"
},
"is_excellent":{"type": "long"}
"is_excellent":{"type": "long"},
"is_operation_home_recommend": {"type": "boolean"} //是否首页运营推荐
}
}
{
"dynamic":"strict",
"_routing": {"required": true},
"properties": {
"id":{"type":"long"},
"is_online":{"type":"boolean"},//上线
"is_deleted":{"type":"boolean"},
"vote_num":{"type":"long"},
"total_vote_num":{"type":"long","default":0},
"total_vote_num":{"type":"long"},
"reply_num":{"type":"long"},
"name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"description":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
......@@ -49,7 +50,8 @@
"type": "text",
"analyzer": "gm_default_index",
"search_analyzer": "gm_default_index"
}
},
"is_excellent":{"type": "long"},
"is_operation_home_recommend": {"type": "boolean"} //是否首页运营推荐
}
}
......@@ -51,6 +51,7 @@
"analyzer": "gm_default_index",
"search_analyzer": "gm_default_index"
},
"is_excellent":{"type": "long"}
"is_excellent":{"type": "long"},
"is_operation_home_recommend": {"type": "boolean"} //是否首页运营推荐
}
}
......@@ -197,8 +197,8 @@ class Topic(models.Model):
offline_score += 6.0
elif self.content_level == '4':
offline_score += 5.0
elif self.content_level == '3':
offline_score += 2.0
elif self.content_level == '6':
offline_score += 100.0
# exposure_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=1).count()
# click_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=2).count()
......@@ -289,4 +289,5 @@ class TopicHomeRecommend(models.Model):
db_table = "topic_home_recommend"
id = models.IntegerField(verbose_name=u"id",primary_key=True)
topic_id = models.IntegerField(verbose_name=u"帖子ID")
is_online = models.BooleanField(verbose_name=u'是否上线')
\ No newline at end of file
is_online = models.BooleanField(verbose_name=u'是否上线')
is_deleted = models.BooleanField(verbose_name=u'是否删除')
......@@ -9,7 +9,7 @@ import time
import re
import datetime
from trans2es.models.user import User
from trans2es.models.topic import ExcellentTopic
from trans2es.models.topic import ExcellentTopic,TopicHomeRecommend
class TopicTransfer(object):
......@@ -121,6 +121,11 @@ class TopicTransfer(object):
else:
res["is_excellent"] = 0
res["is_operation_home_recommend"] = False
operation_home_recommend = TopicHomeRecommend.objects.filter(topic_id=instance.id).first()
if operation_home_recommend and operation_home_recommend.is_online and not operation_home_recommend.is_deleted:
res["is_operation_home_recommend"] = True
logging.info("test topic transfer time cost,time0:%d,time1:%d,time2:%d,time3:%d,time4:%d" % (time0,time1,time2,time3,time4))
return res
except:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment