Commit bfb3c319 authored by lixiaofang's avatar lixiaofang

Merge branch 'toop' into test

parents 0ad7f0ca 42453f10
......@@ -154,14 +154,26 @@ class ESPerform(object):
cls.put_index_mapping(es_cli,sub_index_name)
bulk_actions = []
for data in data_list:
bulk_actions.append({
'_op_type': 'index',
'_index': official_index_name,
'_type': doc_type,
'_id': data['id'],
'_source': data,
})
if sub_index_name=="topic":
for data in data_list:
bulk_actions.append({
'_op_type': 'index',
'_index': official_index_name,
'_type': doc_type,
'_id': data['id'],
'_source': data,
'routing': data["content_level"]
})
else:
for data in data_list:
bulk_actions.append({
'_op_type': 'index',
'_index': official_index_name,
'_type': doc_type,
'_id': data['id'],
'_source': data,
})
elasticsearch.helpers.bulk(es_cli,bulk_actions)
return True
......
......@@ -9,6 +9,9 @@ import json
from trans2es.models.tag import TopicTag
import traceback
from django.conf import settings
from libs.es import ESPerform
from search.utils.common import *
class KafkaManager(object):
consumser_obj = None
......@@ -28,6 +31,8 @@ class CollectData(object):
def __init__(self):
self.linucb_matrix_redis_prefix = "physical:linucb:device_id:"
self.linucb_recommend_redis_prefix = "physical:linucb:tag_recommend:device_id:"
self.linucb_recommend_topic_id_prefix = "physical:linucb:topic_recommend:device_id:"
self.tag_topic_id_redis_prefix = "physical:tag_id:topic_id_list:"
# 默认
self.user_feature = [0,1]
......@@ -44,24 +49,88 @@ class CollectData(object):
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return dict()
def get_tag_topic_list(self,tag_id):
try:
q = {
"query":{
"bool":{
"must":[
{"term":{"is_online": True}},
{"term":{"is_deleted": False}},
{"term":{"tag_list":tag_id}}
]
}
},
"_source":{
"include":["id"]
},
"sort":[
{"create_time_val":{"order":"desc"}},
{"language_type":{"order":"asc"}},
]
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic-high-star", query_body=q,
offset=0, size=5000)
topic_id_list = [item["_source"]["id"] for item in result_dict["hits"]]
return topic_id_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list()
def update_recommend_tag_list(self, device_id,user_feature=None):
try:
recommend_tag_set = set()
recommend_tag_list = list()
recommend_tag_dict = dict()
redis_linucb_tag_data_dict = self._get_user_linucb_info(device_id)
if len(redis_linucb_tag_data_dict) == 0:
recommend_tag_list = LinUCB.get_default_tag_list()
LinUCB.init_device_id_linucb_info(redis_client, self.linucb_matrix_redis_prefix,device_id,recommend_tag_list)
else:
user_feature = user_feature if user_feature else self.user_feature
recommend_tag_list = LinUCB.linucb_recommend_tag(device_id,redis_linucb_tag_data_dict,user_feature,list(redis_linucb_tag_data_dict.keys()))
(recommend_tag_dict,recommend_tag_set) = LinUCB.linucb_recommend_tag(device_id,redis_linucb_tag_data_dict,user_feature,list(redis_linucb_tag_data_dict.keys()))
logging.info("duan add,device_id:%s,recommend_tag_list:%s" % (str(device_id), str(recommend_tag_list)))
if len(recommend_tag_list) > 0:
if len(recommend_tag_dict) > 0:
recommend_tag_list = list(recommend_tag_set)
tag_recommend_redis_key = self.linucb_recommend_redis_prefix + str(device_id)
redis_client.set(tag_recommend_redis_key, json.dumps(recommend_tag_list))
# Todo:设置过期时间,调研set是否支持
redis_client.expire(tag_recommend_redis_key, 7*24*60*60)
redis_key = "physical:home_recommend" + ":device_id:" + device_id + ":query_type:" + str(TopicPageType.HOME_RECOMMEND)
have_read_topic_id_list = list()
redis_field_list = [b'have_read_topic_list']
redis_field_val_list = redis_client.hmget(redis_key, redis_field_list)
if redis_field_val_list[0]:
have_read_topic_id_list = list(json.loads(redis_field_val_list[0]))
recommend_topic_id_list = list()
for index in range(0,1000):
for tag_id in recommend_tag_list[0:5]:
redis_tag_id_key = self.tag_topic_id_redis_prefix + str(tag_id)
redis_tag_id_data = redis_client.get(redis_tag_id_key)
tag_topic_id_list = json.loads(redis_tag_id_data) if redis_tag_id_data else []
if not redis_tag_id_data:
tag_topic_id_list = self.get_tag_topic_list(tag_id)
redis_client.set(redis_tag_id_key,json.dumps(tag_topic_id_list))
redis_client.expire(redis_tag_id_key,1*24*60*60)
if len(tag_topic_id_list)>index:
for topic_id in tag_topic_id_list[index:]:
if topic_id not in have_read_topic_id_list and topic_id not in recommend_topic_id_list:
recommend_topic_id_list.append(topic_id)
break
topic_recommend_redis_key = self.linucb_recommend_topic_id_prefix + str(device_id)
redis_data_dict = {
"data": json.dumps(recommend_topic_id_list),
"cursor":0
}
redis_client.hmset(topic_recommend_redis_key,redis_data_dict)
return True
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
......
......@@ -14,9 +14,9 @@ from django.conf import settings
class LinUCB:
d = 2
alpha = 0.25
r1 = 1
r0 = -0.5
alpha = 0.01
r1 = 10
r0 = -0.1
default_tag_list = list()
@classmethod
......@@ -71,6 +71,7 @@ class LinUCB:
top_tag_set = set()
top_tag_dict = dict()
np_score_list = list()
np_score_dict = dict()
......@@ -85,18 +86,20 @@ class LinUCB:
sorted_np_score_list = sorted(np_score_list,reverse=True)
for top_score in sorted_np_score_list:
for top_score_index in np_score_dict[top_score]:
top_tag_set.add(str(tag_list[top_score_index], encoding="utf-8"))
if len(top_tag_set) >= 10:
tag_id = str(tag_list[top_score_index], encoding="utf-8")
top_tag_dict[tag_id] = top_score
top_tag_set.add(tag_id)
if len(top_tag_dict) >= 20:
break
if len(top_tag_set) >= 10:
if len(top_tag_dict) >= 20:
break
logging.info("duan add,device_id:%s,sorted_np_score_list:%s,np_score_dict:%s" % (str(device_id), str(sorted_np_score_list), str(np_score_dict)))
return list(top_tag_set)
return (top_tag_dict,top_tag_set)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []
return ({},())
@classmethod
def init_device_id_linucb_info(cls, redis_cli,redis_prefix, device_id, tag_list):
......
......@@ -134,7 +134,7 @@ class TopicUtils(object):
"""
try:
attention_user_id_list = list()
pick_user_id_list = list()
# pick_user_id_list = list()
# same_group_id_list = list()
user_tag_list = list()
......@@ -145,8 +145,8 @@ class TopicUtils(object):
attention_user_info_list = result_dict["hits"][0]["_source"]["attention_user_id_list"]
attention_user_id_list = [item["user_id"] for item in attention_user_info_list]
pick_user_info_list = result_dict["hits"][0]["_source"]["pick_user_id_list"]
pick_user_id_list = [item["user_id"] for item in pick_user_info_list]
# pick_user_info_list = result_dict["hits"][0]["_source"]["pick_user_id_list"]
# pick_user_id_list = [item["user_id"] for item in pick_user_info_list]
# same_pictorial_user_info_list = result_dict["hits"][0]["_source"]["same_pictorial_user_id_list"]
#
......@@ -165,22 +165,22 @@ class TopicUtils(object):
"language_type": 1
}
},
"weight": 3
"weight": 6
},
{
"linear": {
"gauss": {
"create_time": {
"scale": "1d",
"decay": 0.99
}
},
"weight": 500
"weight": 5
}
]
if len(user_similar_score_list) > 0:
for item in user_similar_score_list[:100]:
score_item = 3 * 10 * item[1]
score_item = 2 + item[1]
functions_list.append(
{
"filter": {"bool": {
......@@ -197,14 +197,14 @@ class TopicUtils(object):
"weight": 3,
}
)
if len(pick_user_id_list) > 0:
functions_list.append(
{
"filter": {"bool": {
"should": {"terms": {"user_id": pick_user_id_list}}}},
"weight": 2
}
)
# if len(pick_user_id_list) > 0:
# functions_list.append(
# {
# "filter": {"bool": {
# "should": {"terms": {"user_id": pick_user_id_list}}}},
# "weight": 2
# }
# )
# if len(same_pictorial_id_list) > 0:
# functions_list.append(
......@@ -224,22 +224,41 @@ class TopicUtils(object):
"weight": 1
}
)
if len(recommend_tag_list) > 0:
functions_list.append(
{
"filter": {"bool": {
"should": {"terms": {"edit_tag_list": recommend_tag_list}}}},
"weight": 3
}
)
# if len(recommend_tag_list)>0:
# if len(recommend_tag_list)>1:
# functions_list += [
# {
# "filter": {"term": {"tag_list": recommend_tag_list[0]}},
# "weight": 4
# },
# {
# "filter": {"terms": {"tag_list": recommend_tag_list[1:]}},
# "weight": 3
# }
# ]
# else:
# functions_list.append(
# {
# "filter": {"terms": {"tag_list": recommend_tag_list}},
# "weight": 3
# }
# )
# for tag_id in recommend_tag_dict:
# functions_list.append(
# {
# "filter": {"term": {"tag_list": tag_id}},
# "weight": recommend_tag_dict[tag_id]
# }
# )
low_content_level = 4 if query_type == TopicPageType.FIND_PAGE else 3
# low_content_level = 4 if query_type == TopicPageType.FIND_PAGE else 3
query_function_score = {
"query": {
"bool": {
"filter": [
# {"range": {"content_level": {"gte": low_content_level, "lte": 5}}},
{"term": {"has_image":True}},
# {"term": {"has_image":True}},
{"term": {"is_online": True}},
{"term": {"is_deleted": False}}
],
......@@ -319,7 +338,13 @@ class TopicUtils(object):
},
"order": "desc"
}
}
},
# {
# "offline_score":{
# "order": "desc"
# }
# },
"_score"
]
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name=index_type, query_body=q,
offset=offset, size=size)
......@@ -722,6 +747,18 @@ class TopicUtils(object):
"order": "desc"
},
})
elif sort_by == TOPIC_SEARCH_SORT.VOTE_NUM_AEC:
sort_rule.append({
"total_vote_num": {
"order": "asc"
},
})
elif sort_by == TOPIC_SEARCH_SORT.VOTE_NUM_DESC:
sort_rule.append({
"total_vote_num": {
"order": "desc"
},
})
return sort_rule
......
......@@ -46,23 +46,36 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
query_type=TopicPageType.HOME_RECOMMEND):
try:
if query is None:
redis_key = "physical:home_recommend" + ":user_id:" + str(
user_id) + ":device_id:" + device_id + ":query_type:" + str(query_type)
# redis_key = "physical:home_recommend" + ":user_id:" + str(
# user_id) + ":device_id:" + device_id + ":query_type:" + str(query_type)
redis_key = "physical:home_recommend" + ":device_id:" + device_id + ":query_type:" + str(query_type)
else:
redis_key = "physical:home_query" + ":user_id:" + str(
user_id) + ":device_id:" + device_id + ":query:" + str(query) + ":query_type:" + str(query_type)
# redis_key = "physical:home_query" + ":user_id:" + str(
# user_id) + ":device_id:" + device_id + ":query:" + str(query) + ":query_type:" + str(query_type)
redis_key = "physical:home_query" + ":device_id:" + device_id + ":query:" + str(query) + ":query_type:" + str(query_type)
redis_field_list = [b'have_read_topic_list']
redis_field_val_list = redis_client.hmget(redis_key, redis_field_list)
tag_recommend_redis_key = "physical:linucb:tag_recommend:device_id:" + str(device_id)
topic_recommend_redis_key = "physical:linucb:topic_recommend:device_id:" + str(device_id)
# recommend_tag_dict = dict()
# tag_recommend_val = redis_client.get(tag_recommend_redis_key)
# if tag_recommend_val:
# recommend_tag_dict = json.loads(str(tag_recommend_val, encoding="utf-8"))
recommend_tag_list = []
tag_recommend_val = redis_client.get(tag_recommend_redis_key)
if tag_recommend_val:
recommend_tag_list = json.loads(str(tag_recommend_val, encoding="utf-8"))
recommend_topic_list=list()
recommend_topic_dict = redis_client.hgetall(topic_recommend_redis_key)
if b"data" in recommend_topic_dict:
recommend_topic_id_list = json.loads(recommend_topic_dict[b"data"])
cursor = int(str(recommend_topic_dict[b"cursor"], encoding="utf-8"))
newcursor = cursor + 5
if len(recommend_topic_id_list) > newcursor:
recommend_topic_list = recommend_topic_id_list[cursor:newcursor]
redis_client.hset(topic_recommend_redis_key,"cursor",newcursor)
recommend_topic_ids = []
have_read_topic_id_list = list()
if redis_field_val_list[0]:
......@@ -77,10 +90,11 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
user_similar_score_redis_list = json.loads(
redis_user_similar_score_redis_val) if redis_user_similar_score_redis_val else []
size = size-len(recommend_topic_list)
topic_id_list = TopicUtils.get_recommend_topic_ids(user_id=user_id, tag_id=tag_id, offset=offset, size=size,
single_size=size,query=query, query_type=query_type,
filter_topic_id_list=have_read_topic_id_list,
recommend_tag_list=recommend_tag_list,
recommend_tag_list=recommend_topic_list,
user_similar_score_list=user_similar_score_redis_list,index_type="topic-high-star")
have_read_group_id_set = set()
have_read_user_id_set = set()
......@@ -122,18 +136,19 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
# else:
# break
have_read_topic_id_list.extend(topic_id_list)
if len(have_read_topic_id_list) > 5000:
cut_len = len(have_read_topic_id_list)-5000
recommend_topic_list.extend(topic_id_list)
have_read_topic_id_list.extend(recommend_topic_list)
if len(have_read_topic_id_list) > 30000:
cut_len = len(have_read_topic_id_list)-30000
have_read_topic_id_list = have_read_topic_id_list[cut_len:]
redis_dict = {
"have_read_topic_list": json.dumps(have_read_topic_id_list),
}
redis_client.hmset(redis_key, redis_dict)
# 每个session key保存15分钟
redis_client.expire(redis_key, 60 * 60 * 24 * 3)
redis_client.expire(redis_key, 60 * 60 * 24 * 30)
return topic_id_list
return recommend_topic_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []
......
......@@ -11,6 +11,7 @@
"content":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"content_level":{"type":"text"},
"user_id":{"type":"long"},
"user_nick_name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},//帖子用户名
"group_id":{"type":"long"}, //所在组ID
"tag_list":{"type":"long"},//标签属性
"edit_tag_list":{"type":"long"},//编辑标签
......@@ -50,4 +51,4 @@
}
}
}
\ No newline at end of file
}
{
"dynamic":"strict",
"properties": {
"id":{"type":"long"},
"is_online":{"type":"boolean"},//上线
"is_deleted":{"type":"boolean"},
"vote_num":{"type":"long"},
"reply_num":{"type":"long"},
"name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"description":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"content":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"content_level":{"type":"text"},
"user_id":{"type":"long"},
"user_nick_name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},//帖子用户名
"group_id":{"type":"long"}, //所在组ID
"tag_list":{"type":"long"},//标签属性
"edit_tag_list":{"type":"long"},//编辑标签
"tag_name_list":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"share_num":{"type":"long"},
"pick_id_list":{"type":"long"},
"offline_score":{"type":"double"},//离线算分
"manual_score":{"type":"double"},//人工赋分
"has_image":{"type":"boolean"},//是否有图
"has_video":{"type":"boolean"},//是否是视频
"create_time":{"type":"date", "format":"date_time_no_millis"},
"update_time":{"type":"date", "format":"date_time_no_millis"},
"create_time_val":{"type":"long"},
"update_time_val":{"type":"long"},
"language_type":{"type":"long"},
"is_shadow": {"type": "boolean"},
"is_recommend": {"type": "boolean"},
"is_complaint": {"type": "boolean"}, // 是否被举报
"virtual_content_level":{"type": "text"},
"like_num_crawl": {"type": "long"}, // 爬取点赞数
"comment_num_crawl": {"type": "long"}, // 爬取评论数
"is_crawl": {"type": "boolean"},
"platform": {"type": "long"},
"platform_id": {"type": "long"},
"drop_score":{"type": "double"}, // 人工降分
"sort_score":{"type": "double"}, // 排序分
"pictorial_id":{"type": "long"}, //所在组ID
"pictorial_name":{ // 所在组名称
"type": "text",
"analyzer": "gm_default_index",
"search_analyzer": "gm_default_index"
}
}
}
......@@ -5,12 +5,14 @@
"is_online":{"type":"boolean"},//上线
"is_deleted":{"type":"boolean"},
"vote_num":{"type":"long"},
"total_vote_num":{"type":"long"},
"reply_num":{"type":"long"},
"name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"description":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"content":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"content_level":{"type":"text"},
"user_id":{"type":"long"},
"user_nick_name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},//帖子用户名
"group_id":{"type":"long"}, //所在组ID
"tag_list":{"type":"long"},//标签属性
"edit_tag_list":{"type":"long"},//编辑标签
......
......@@ -82,6 +82,14 @@ class Topic(models.Model):
platform = models.IntegerField(verbose_name=u'平台来源', choices=GRAP_PLATFORM, default=GRAP_PLATFORM.ALPHA)
platform_id = models.BigIntegerField(verbose_name='用平台ID', null=True)
def get_virtual_vote_num(self):
try:
topic_extra = TopicExtra.object.get(topic_id=self.id)
return topic_extra.virtual_vote_num
except:
return 0
def get_pictorial_id(self):
try:
pictorial_id_list =[]
......@@ -238,3 +246,16 @@ class PictorialTopic(models.Model):
is_online = models.BooleanField(verbose_name=u"是否有效", default=True)
is_online = models.BooleanField(verbose_name=u'是否上线')
is_deleted = models.BooleanField(verbose_name=u'是否删除')
class TopicExtra(models.Model):
"""帖子相关额外信息"""
class Meta:
verbose_name = '帖子额外信息'
app_label = 'community'
db_table = 'topic_extra'
id = models.IntegerField(verbose_name=u'ID', primary_key=True)
topic_id = models.IntegerField(verbose_name=u"帖子ID",db_index=True)
virtual_vote_num = models.IntegerField(verbose_name="帖子虚拟点赞")
......@@ -34,6 +34,16 @@ class User(models.Model):
create_time = models.DateTimeField(verbose_name=u'创建时间', default=datetime.datetime.fromtimestamp(0))
update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0))
@classmethod
def get_user_nick_name(cls,user_id):
try:
nick_name = User.objects.using(settings.SLAVE_DB_NAME).filter(user_id=user_id).values_list("nick_name").first()
return nick_name[0]
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ""
def get_is_recommend_flag(self):
is_shadow = False
is_recommend = False
......
......@@ -15,7 +15,8 @@ class UserExtra(models.Model):
db_table="user_extra"
id = models.IntegerField(verbose_name="主键ID",primary_key=True)
user_id = models.BigIntegerField(verbose_name=u"用户ID")
user_id = models.CharField(verbose_name=u"用户ID",max_length=100)
is_shadow = models.BooleanField(verbose_name=u"是否是马甲账户")
is_online = models.BooleanField(verbose_name=u"是否上线")
is_recommend = models.BooleanField(verbose_name=u"是否推荐")
......
......@@ -274,6 +274,16 @@ def get_type_info_map():
return _get_type_info_map_result
type_info_list = [
TypeInfo(
name='topic-star-routing',
type='topic-star-routing',
model=topic.Topic,
query_deferred=lambda: topic.Topic.objects.all().query,
get_data_func=TopicTransfer.get_topic_data,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
),
TypeInfo(
name='topic-high-star', # >=4星日记
type='topic-high-star',
......
......@@ -8,7 +8,7 @@ from libs.tools import tzlc
import time
import re
import datetime
from trans2es.models.user import User
class TopicTransfer(object):
......@@ -27,6 +27,7 @@ class TopicTransfer(object):
res["content"] = instance.content
res["content_level"] = instance.content_level
res["user_id"] = instance.user_id
res["user_nick_name"] = User.get_user_nick_name(instance.user_id)
if instance.group:
res["group_id"] = instance.group.id
......@@ -108,6 +109,8 @@ class TopicTransfer(object):
res["update_time"] = tzlc_update_time
res["update_time_val"] = int(time.mktime(tzlc_update_time.timetuple()))
res["total_vote_num"] = instance.get_virtual_vote_num() + instance.vote_num
logging.info("test topic transfer time cost,time0:%d,time1:%d,time2:%d,time3:%d,time4:%d" % (time0,time1,time2,time3,time4))
return res
except:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment