Commit 0fc63a16 authored by Kai's avatar Kai

merge master

parents 2b312dd5 e043da6c
...@@ -20,6 +20,8 @@ def write_to_es(es_type, pk_list, use_batch_query_set=False): ...@@ -20,6 +20,8 @@ def write_to_es(es_type, pk_list, use_batch_query_set=False):
if es_type == "register_user_tag": if es_type == "register_user_tag":
RegisterUserTag.get_register_user_tag(pk_list) RegisterUserTag.get_register_user_tag(pk_list)
elif es_type == "attention_user_tag":
RegisterUserTag.get_user_attention_tag(pk_list)
else: else:
type_info_map = get_type_info_map() type_info_map = get_type_info_map()
type_info = type_info_map[es_type] type_info = type_info_map[es_type]
......
...@@ -264,7 +264,7 @@ class ESPerform(object): ...@@ -264,7 +264,7 @@ class ESPerform(object):
return True return True
@classmethod @classmethod
def get_tag_topic_list(cls,tag_id,have_read_topic_id_list): def get_tag_topic_list(cls,tag_id,have_read_topic_id_list,size=100):
try: try:
functions_list = list() functions_list = list()
for id in tag_id: for id in tag_id:
...@@ -274,13 +274,27 @@ class ESPerform(object): ...@@ -274,13 +274,27 @@ class ESPerform(object):
"weight": 1 "weight": 1
} }
) )
functions_list += [
{
"filter": {"term": {"content_level": 6}},
"weight": 6000
},
{
"filter": {"term": {"content_level": 5}},
"weight": 5000
},
{
"filter": {"term": {"content_level": 4}},
"weight": 4000
}
]
q = { q = {
"query":{ "query":{
"function_score":{ "function_score":{
"query": { "query": {
"bool": { "bool": {
"must": [ "must": [
{"range": {"content_level": {"gte": 3, "lte": 5}}}, {"range": {"content_level": {"gte": 4, "lte": 6}}},
{"term": {"is_online": True}}, {"term": {"is_online": True}},
{"term": {"is_deleted": False}}, {"term": {"is_deleted": False}},
{"terms": {"tag_list": tag_id}} {"terms": {"tag_list": tag_id}}
...@@ -308,7 +322,7 @@ class ESPerform(object): ...@@ -308,7 +322,7 @@ class ESPerform(object):
} }
} }
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic", query_body=q, result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic", query_body=q,
offset=0, size=100,routing="3,4,5") offset=0, size=size,routing="4,5,6")
topic_id_list = [item["_source"]["id"] for item in result_dict["hits"]] topic_id_list = [item["_source"]["id"] for item in result_dict["hits"]]
logging.info("topic_id_list:%s"%str(topic_id_list)) logging.info("topic_id_list:%s"%str(topic_id_list))
......
...@@ -25,7 +25,7 @@ def tzlc(dt, truncate_to_sec=True): ...@@ -25,7 +25,7 @@ def tzlc(dt, truncate_to_sec=True):
def get_have_read_topic_id_list(device_id,user_id,query_type): def get_have_read_topic_id_list(device_id,user_id,query_type):
try: try:
if user_id and user_id>0: if user_id and int(user_id)>0:
redis_key = "physical:home_recommend" + ":user_id:" + str(user_id) + ":query_type:" + str(query_type) redis_key = "physical:home_recommend" + ":user_id:" + str(user_id) + ":query_type:" + str(query_type)
else: else:
redis_key = "physical:home_recommend" + ":device_id:" + str(device_id) + ":query_type:" + str(query_type) redis_key = "physical:home_recommend" + ":device_id:" + str(device_id) + ":query_type:" + str(query_type)
......
...@@ -9,7 +9,7 @@ import traceback ...@@ -9,7 +9,7 @@ import traceback
import json import json
import pickle import pickle
from django.conf import settings from django.conf import settings
from trans2es.models.tag import AccountUserTag from trans2es.models.tag import AccountUserTag,CommunityTagFollow
from libs.es import ESPerform from libs.es import ESPerform
import libs.tools as Tools import libs.tools as Tools
from search.utils.common import * from search.utils.common import *
...@@ -30,8 +30,40 @@ class RegisterUserTag(object): ...@@ -30,8 +30,40 @@ class RegisterUserTag(object):
linucb_user_id_register_tag_topic_id_prefix = "physical:linucb:register_tag_topic_recommend:user_id:" linucb_user_id_register_tag_topic_id_prefix = "physical:linucb:register_tag_topic_recommend:user_id:"
linucb_register_user_tag_key = "physical:linucb:register_user_tag_info" linucb_register_user_tag_key = "physical:linucb:register_user_tag_info"
@classmethod
def get_user_attention_tag(cls, pk_list):
"""
:remark 获取用户关注标签
:param pk_list:
:return:
"""
try:
user_id_dict = dict()
query_results = CommunityTagFollow.objects.filter(pk__in=pk_list,is_deleted=False,is_online=True)
for item in query_results:
tag_id = item.tag_id
user_id = item.user_id
user_tag_list = CommunityTagFollow.objects.filter(user=user_id,is_deleted=False,is_online=True).values_list("tag_id", flat=True)
user_id_dict[user_id] = user_tag_list
for user_id in user_id_dict:
redis_user_tag_id_data = redis_client.hget(cls.linucb_register_user_tag_key, user_id)
redis_user_tag_id_list = json.loads(redis_user_tag_id_data) if redis_user_tag_id_data else []
redis_user_tag_id_list.extend(user_id_dict[user_id])
redis_client.hset(cls.linucb_register_user_tag_key, user_id, json.dumps(list(set(redis_user_tag_id_list))))
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
@classmethod @classmethod
def get_register_user_tag(cls,pk_list): def get_register_user_tag(cls,pk_list):
"""
:remark 用户注册时选的标签
:param pk_list:
:return:
"""
try: try:
# user_id_set = set() # user_id_set = set()
user_id_dict = dict() user_id_dict = dict()
...@@ -43,7 +75,10 @@ class RegisterUserTag(object): ...@@ -43,7 +75,10 @@ class RegisterUserTag(object):
user_id_dict[user_id] = user_tag_list user_id_dict[user_id] = user_tag_list
for user_id in user_id_dict: for user_id in user_id_dict:
redis_client.hset(cls.linucb_register_user_tag_key, user_id, json.dumps(list(user_id_dict[user_id]))) redis_user_tag_id_data = redis_client.hget(cls.linucb_register_user_tag_key, user_id)
redis_user_tag_id_list = json.loads(redis_user_tag_id_data) if redis_user_tag_id_data else []
redis_user_tag_id_list.extend(user_id_dict[user_id])
redis_client.hset(cls.linucb_register_user_tag_key, user_id, json.dumps(list(set(redis_user_tag_id_list))))
# if user_id not in user_id_set: # if user_id not in user_id_set:
# user_id_set.add(user_id) # user_id_set.add(user_id)
......
...@@ -52,11 +52,8 @@ class CollectData(object): ...@@ -52,11 +52,8 @@ class CollectData(object):
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return dict() return dict()
def update_recommend_tag_list(self, device_id,user_feature=None,user_id=None,click_topic_tag_list=None): def update_recommend_tag_list(self, device_id,user_feature=None,user_id=None,click_topic_tag_list=None,new_user_click_tag_list = []):
try: try:
recommend_tag_set = set()
recommend_tag_list = list()
recommend_tag_dict = dict()
redis_linucb_tag_data_dict = self._get_user_linucb_info(device_id) redis_linucb_tag_data_dict = self._get_user_linucb_info(device_id)
if len(redis_linucb_tag_data_dict) == 0: if len(redis_linucb_tag_data_dict) == 0:
recommend_tag_list = LinUCB.get_default_tag_list(user_id) recommend_tag_list = LinUCB.get_default_tag_list(user_id)
...@@ -82,22 +79,17 @@ class CollectData(object): ...@@ -82,22 +79,17 @@ class CollectData(object):
if click_topic_tag_list: if click_topic_tag_list:
if len(click_topic_tag_list)>0: if len(click_topic_tag_list)>0:
recommend_topic_id_list_click = ESPerform.get_tag_topic_list(click_topic_tag_list, recommend_topic_id_list_click = ESPerform.get_tag_topic_list(click_topic_tag_list,
have_read_topic_id_list) have_read_topic_id_list,size=2)
if len(recommend_topic_id_list_click) > 0: if len(recommend_topic_id_list_click) > 0:
num = min(len(recommend_topic_id_list_click), 2) recommend_topic_id_list.extend(recommend_topic_id_list_click)
logging.info("recommend_topic_id_list:%s" % (str(num))) have_read_topic_id_list.extend(recommend_topic_id_list)
for i in range(0,num): click_recommend_redis_key = self.click_recommend_redis_key_prefix + str(device_id)
recommend_topic_id_list.append(recommend_topic_id_list_click[i]) click_redis_data_dict = {
have_read_topic_id_list.extend(recommend_topic_id_list) "data": json.dumps(recommend_topic_id_list),
click_recommend_redis_key = self.click_recommend_redis_key_prefix + str(device_id) "cursor": 0
click_redis_data_dict = { }
"data": json.dumps(recommend_topic_id_list), redis_client.hmset(click_recommend_redis_key, click_redis_data_dict)
"cursor": 0
}
redis_client.hmset(click_recommend_redis_key, click_redis_data_dict)
total_topic_list = list()
tag_topic_id_list = list()
tag_id_list = recommend_tag_list[0:100] tag_id_list = recommend_tag_list[0:100]
topic_recommend_redis_key = self.linucb_recommend_topic_id_prefix + str(device_id) topic_recommend_redis_key = self.linucb_recommend_topic_id_prefix + str(device_id)
...@@ -111,9 +103,16 @@ class CollectData(object): ...@@ -111,9 +103,16 @@ class CollectData(object):
if len(recommend_topic_id_list)==0 and cursor==0 and len(redis_topic_list)>0: if len(recommend_topic_id_list)==0 and cursor==0 and len(redis_topic_list)>0:
have_read_topic_id_list.extend(redis_topic_list[:2]) have_read_topic_id_list.extend(redis_topic_list[:2])
tag_topic_id_list = ESPerform.get_tag_topic_list(tag_id_list,have_read_topic_id_list) if len(tag_id_list) > 0:
if len(new_user_click_tag_list)>0:
tag_topic_id_list = ESPerform.get_tag_topic_list(new_user_click_tag_list, have_read_topic_id_list)
logging.warning("tag_topic_id_list:%s" % str(new_user_click_tag_list))
logging.warning("tag_id_list:%s" % str(tag_id_list))
else:
tag_topic_id_list = ESPerform.get_tag_topic_list(tag_id_list,have_read_topic_id_list)
if len(recommend_topic_id_list)>0: if len(recommend_topic_id_list)>0 or len(new_user_click_tag_list) > 0:
tag_topic_id_list = recommend_topic_id_list + tag_topic_id_list tag_topic_id_list = recommend_topic_id_list + tag_topic_id_list
redis_data_dict = { redis_data_dict = {
"data": json.dumps(tag_topic_id_list), "data": json.dumps(tag_topic_id_list),
...@@ -164,8 +163,8 @@ class CollectData(object): ...@@ -164,8 +163,8 @@ class CollectData(object):
tag_list = list() tag_list = list()
click_topic_tag_list = list() click_topic_tag_list = list()
collection_tag_sql_query_results = TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=topic_id).values_list("tag_id","is_online","is_collection") collection_tag_sql_query_results = TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=topic_id).values_list("tag_id","is_online","is_collection")
if len(collection_tag_sql_query_results)>0: # if len(collection_tag_sql_query_results)>0:
for tag_id,is_online,is_collection in collection_tag_sql_query_results: for tag_id,is_online,is_collection in collection_tag_sql_query_results:
if is_online and is_collection == 1: if is_online and is_collection == 1:
click_topic_tag_list.append(tag_id) click_topic_tag_list.append(tag_id)
...@@ -203,7 +202,7 @@ class CollectData(object): ...@@ -203,7 +202,7 @@ class CollectData(object):
exposure_cards_list = list() exposure_cards_list = list()
device_id = raw_val_dict["device"]["device_id"] device_id = raw_val_dict["device"]["device_id"]
user_id = raw_val_dict["user_id"] if "user_id" in raw_val_dict else None user_id = raw_val_dict["user_id"] if "user_id" in raw_val_dict else None
logging.warning("type msg:%s" % raw_val_dict.get("type"))
exposure_topic_id_list = list() exposure_topic_id_list = list()
for item in exposure_cards_list: for item in exposure_cards_list:
if "card_id" not in item: if "card_id" not in item:
...@@ -217,14 +216,14 @@ class CollectData(object): ...@@ -217,14 +216,14 @@ class CollectData(object):
topic_tag_id_dict = dict() topic_tag_id_dict = dict()
tag_list = list() tag_list = list()
exposure_sql_query_results = TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(topic_id__in=exposure_topic_id_list).values_list("topic_id","tag_id","is_online","is_collection") exposure_sql_query_results = TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(topic_id__in=exposure_topic_id_list).values_list("topic_id","tag_id","is_online","is_collection")
# if len(exposure_sql_query_results)>0:
for topic_id,tag_id,is_online,is_collection in exposure_sql_query_results: for topic_id,tag_id,is_online,is_collection in exposure_sql_query_results:
if is_online and is_collection ==1: if is_online and is_collection == 1:
tag_list.append(tag_id) tag_list.append(tag_id)
if is_online: if is_online:
tag_sql_query_results = Tag.objects.using(settings.SLAVE_DB_NAME).filter( tag_sql_query_results = Tag.objects.using(settings.SLAVE_DB_NAME).filter(
id=tag_id).values_list("id", "is_ai") id=tag_id).values_list("id", "collection", "is_ai")
for id, is_ai in tag_sql_query_results: for id, collection, is_ai in tag_sql_query_results:
if (is_ai == 1) and id not in tag_list: if (is_ai == 1) and id not in tag_list:
tag_list.append(id) tag_list.append(id)
...@@ -244,6 +243,31 @@ class CollectData(object): ...@@ -244,6 +243,31 @@ class CollectData(object):
# 更新该用户的推荐tag数据,放在 更新完成user tag行为信息之后 # 更新该用户的推荐tag数据,放在 更新完成user tag行为信息之后
self.update_recommend_tag_list(device_id, user_feature, user_id) self.update_recommend_tag_list(device_id, user_feature, user_id)
elif "type" in raw_val_dict and "interest_choice_click_next" == raw_val_dict["type"]:
if isinstance(raw_val_dict["params"]["tagid_list"],str):
tagid_list = json.loads(raw_val_dict["params"]["tagid_list"])
elif isinstance(raw_val_dict["params"]["tagid_list"],list):
tagid_list = raw_val_dict["params"]["tagid_list"]
else:
tagid_list = list()
logging.warning("unknown type msg:%s" % raw_val_dict.get("type", "missing type"))
logging.info(
"consume click topic_id:%s,device_id:%s" % (
str(tagid_list), str(device_id)))
device_id = raw_val_dict["device"]["device_id"]
user_id = raw_val_dict["user_id"] if "user_id" in raw_val_dict else None
# if len(exposure_sql_query_results)>0:
if len(tagid_list) > 0:
is_click = 1
is_vote = 0
reward = 1 if is_click or is_vote else 0
for tag_id in tagid_list:
self.update_user_linucb_tag_info(reward, device_id, tag_id, user_feature)
# 更新该用户的推荐tag数据,放在 更新完成user tag行为信息之后
self.update_recommend_tag_list(device_id, user_feature, user_id,new_user_click_tag_list=tagid_list)
else: else:
logging.warning("unknown type msg:%s" % raw_val_dict.get("type", "missing type")) logging.warning("unknown type msg:%s" % raw_val_dict.get("type", "missing type"))
except: except:
......
...@@ -225,9 +225,10 @@ def pictorial_topic_sort(pictorial_id=-1, offset=0, size=10): ...@@ -225,9 +225,10 @@ def pictorial_topic_sort(pictorial_id=-1, offset=0, size=10):
} }
}, },
"sort":[ "sort":[
{"total_vote_num":{"order":"desc"}}, {"total_vote_num": {"order": "desc"}},
{"create_time": {"order": "desc"}} {"create_time": {"order": "desc"}}
] ]
} }
pict_pictorial_ids_list =[] pict_pictorial_ids_list =[]
# 获取es链接对象 # 获取es链接对象
......
...@@ -118,8 +118,8 @@ def choice_push_tag(device_id, user_id): ...@@ -118,8 +118,8 @@ def choice_push_tag(device_id, user_id):
redis_push_tag_dict.popitem(tag_id) redis_push_tag_dict.popitem(tag_id)
redis_push_tag_list = list(redis_push_tag_dict.keys()) redis_push_tag_list = list(redis_push_tag_dict.keys())
account_user_tag_list = AccountUserTag.objects.filter(user=user_id).values_list("tag_id",flat=True) account_user_tag_list = list(AccountUserTag.objects.filter(user=user_id).values_list("tag_id",flat=True))
community_tag_follow_list = CommunityTagFollow.objects.filter(user_id=user_id).values_list("tag_id",flat=True) community_tag_follow_list = list(CommunityTagFollow.objects.filter(user_id=user_id).values_list("tag_id",flat=True))
linucb_recommend_redis_prefix = "physical:linucb:tag_recommend:device_id:" linucb_recommend_redis_prefix = "physical:linucb:tag_recommend:device_id:"
tag_recommend_redis_key = linucb_recommend_redis_prefix + str(device_id) tag_recommend_redis_key = linucb_recommend_redis_prefix + str(device_id)
...@@ -134,10 +134,10 @@ def choice_push_tag(device_id, user_id): ...@@ -134,10 +134,10 @@ def choice_push_tag(device_id, user_id):
if len(unread_tag_list)>0: if len(unread_tag_list)>0:
for tag_id in unread_tag_list: for tag_id in unread_tag_list:
valid_tag_topic_num = TopicTag.objects.filter(tag_id=tag_id,is_online=True).count() valid_tag_topic_num = TopicTag.objects.filter(tag_id=tag_id,is_online=True).count()
if valid_tag_topic_num>100: if valid_tag_topic_num>0:
ret_tag_set.add(tag_id) ret_tag_set.add(tag_id)
redis_push_tag_dict[tag_id] = now_sec redis_push_tag_dict[tag_id] = now_sec
if len(ret_tag_set)>=2: if len(ret_tag_set)>=1:
break break
redis_client.set(redis_push_tag_key, json.dumps(redis_push_tag_dict)) redis_client.set(redis_push_tag_key, json.dumps(redis_push_tag_dict))
......
This diff is collapsed.
...@@ -51,6 +51,7 @@ ...@@ -51,6 +51,7 @@
"analyzer": "gm_default_index", "analyzer": "gm_default_index",
"search_analyzer": "gm_default_index" "search_analyzer": "gm_default_index"
}, },
"is_excellent":{"type": "long"} "is_excellent":{"type": "long"},
"is_operation_home_recommend": {"type": "boolean"} //是否首页运营推荐
} }
} }
...@@ -51,6 +51,7 @@ ...@@ -51,6 +51,7 @@
"analyzer": "gm_default_index", "analyzer": "gm_default_index",
"search_analyzer": "gm_default_index" "search_analyzer": "gm_default_index"
}, },
"is_excellent":{"type": "long"} "is_excellent":{"type": "long"},
"is_operation_home_recommend": {"type": "boolean"} //是否首页运营推荐
} }
} }
{ {
"dynamic":"strict", "dynamic":"strict",
"_routing": {"required": true},
"properties": { "properties": {
"id":{"type":"long"}, "id":{"type":"long"},
"is_online":{"type":"boolean"},//上线 "is_online":{"type":"boolean"},//上线
"is_deleted":{"type":"boolean"}, "is_deleted":{"type":"boolean"},
"vote_num":{"type":"long"}, "vote_num":{"type":"long"},
"total_vote_num":{"type":"long","default":0}, "total_vote_num":{"type":"long"},
"reply_num":{"type":"long"}, "reply_num":{"type":"long"},
"name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"}, "name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"description":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"}, "description":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
...@@ -49,7 +50,8 @@ ...@@ -49,7 +50,8 @@
"type": "text", "type": "text",
"analyzer": "gm_default_index", "analyzer": "gm_default_index",
"search_analyzer": "gm_default_index" "search_analyzer": "gm_default_index"
} },
"is_excellent":{"type": "long"},
"is_operation_home_recommend": {"type": "boolean"} //是否首页运营推荐
} }
} }
...@@ -51,6 +51,7 @@ ...@@ -51,6 +51,7 @@
"analyzer": "gm_default_index", "analyzer": "gm_default_index",
"search_analyzer": "gm_default_index" "search_analyzer": "gm_default_index"
}, },
"is_excellent":{"type": "long"} "is_excellent":{"type": "long"},
"is_operation_home_recommend": {"type": "boolean"} //是否首页运营推荐
} }
} }
...@@ -197,8 +197,8 @@ class Topic(models.Model): ...@@ -197,8 +197,8 @@ class Topic(models.Model):
offline_score += 6.0 offline_score += 6.0
elif self.content_level == '4': elif self.content_level == '4':
offline_score += 5.0 offline_score += 5.0
elif self.content_level == '3': elif self.content_level == '6':
offline_score += 2.0 offline_score += 100.0
# exposure_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=1).count() # exposure_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=1).count()
# click_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=2).count() # click_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=2).count()
...@@ -289,4 +289,5 @@ class TopicHomeRecommend(models.Model): ...@@ -289,4 +289,5 @@ class TopicHomeRecommend(models.Model):
db_table = "topic_home_recommend" db_table = "topic_home_recommend"
id = models.IntegerField(verbose_name=u"id",primary_key=True) id = models.IntegerField(verbose_name=u"id",primary_key=True)
topic_id = models.IntegerField(verbose_name=u"帖子ID") topic_id = models.IntegerField(verbose_name=u"帖子ID")
is_online = models.BooleanField(verbose_name=u'是否上线') is_online = models.BooleanField(verbose_name=u'是否上线')
\ No newline at end of file is_deleted = models.BooleanField(verbose_name=u'是否删除')
...@@ -9,7 +9,7 @@ import time ...@@ -9,7 +9,7 @@ import time
import re import re
import datetime import datetime
from trans2es.models.user import User from trans2es.models.user import User
from trans2es.models.topic import ExcellentTopic from trans2es.models.topic import ExcellentTopic,TopicHomeRecommend
class TopicTransfer(object): class TopicTransfer(object):
...@@ -121,6 +121,11 @@ class TopicTransfer(object): ...@@ -121,6 +121,11 @@ class TopicTransfer(object):
else: else:
res["is_excellent"] = 0 res["is_excellent"] = 0
res["is_operation_home_recommend"] = False
operation_home_recommend = TopicHomeRecommend.objects.filter(topic_id=instance.id).first()
if operation_home_recommend and operation_home_recommend.is_online and not operation_home_recommend.is_deleted:
res["is_operation_home_recommend"] = True
logging.info("test topic transfer time cost,time0:%d,time1:%d,time2:%d,time3:%d,time4:%d" % (time0,time1,time2,time3,time4)) logging.info("test topic transfer time cost,time0:%d,time1:%d,time2:%d,time3:%d,time4:%d" % (time0,time1,time2,time3,time4))
return res return res
except: except:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment