Commit b216b90c authored by 段英荣's avatar 段英荣

Merge branch 'test' into 'master'

# Conflicts:
#   search/views/topic.py
parents ed24f39e e6f5a172
......@@ -24,5 +24,29 @@
<option name="ITERATION_ELEMENTS_WRAPPING" value="chop_down_if_not_single" />
</formatting-settings>
</DBN-SQL>
<DBN-PSQL>
<case-options enabled="false">
<option name="KEYWORD_CASE" value="lower" />
<option name="FUNCTION_CASE" value="lower" />
<option name="PARAMETER_CASE" value="lower" />
<option name="DATATYPE_CASE" value="lower" />
<option name="OBJECT_CASE" value="preserve" />
</case-options>
<formatting-settings enabled="false" />
</DBN-PSQL>
<DBN-SQL>
<case-options enabled="false">
<option name="KEYWORD_CASE" value="lower" />
<option name="FUNCTION_CASE" value="lower" />
<option name="PARAMETER_CASE" value="lower" />
<option name="DATATYPE_CASE" value="lower" />
<option name="OBJECT_CASE" value="preserve" />
</case-options>
<formatting-settings enabled="false">
<option name="STATEMENT_SPACING" value="one_line" />
<option name="CLAUSE_CHOP_DOWN" value="chop_down_if_statement_long" />
<option name="ITERATION_ELEMENTS_WRAPPING" value="chop_down_if_not_single" />
</formatting-settings>
</DBN-SQL>
</code_scheme>
</component>
\ No newline at end of file
......@@ -120,9 +120,11 @@ class TopicUtils(object):
return {}
@classmethod
def get_recommend_topic_ids(cls,user_id,tag_id,offset,size,single_size,query=None,query_type=TopicPageType.FIND_PAGE,
filter_topic_id_list=[],test_score=False,must_topic_id_list=[],recommend_tag_list=[],
user_similar_score_list=[],index_type="topic",routing=None,attention_tag_list=[],linucb_user_id_list = [],disable_collpase=False):
def get_recommend_topic_ids(cls, user_id, tag_id, offset, size, single_size, query=None,
query_type=TopicPageType.FIND_PAGE,
filter_topic_id_list=[], test_score=False, must_topic_id_list=[], recommend_tag_list=[],
user_similar_score_list=[], index_type="topic", routing=None, attention_tag_list=[],
current_topic_id=-1, topic_tag_list=[], topic_user_id=-1):
"""
:remark:获取首页推荐帖子列表
:param user_id:
......@@ -158,14 +160,14 @@ class TopicUtils(object):
q["query"] = dict()
functions_list = [
# {
# "filter": {
# "term": {
# "language_type": 1
# }
# },
# "weight": 60
# },
{
"filter": {
"term": {
"language_type": 1
}
},
"weight": 60
},
{
"gauss": {
"create_time": {
......@@ -175,18 +177,18 @@ class TopicUtils(object):
},
"weight": 60
},
# {
# "filter": {
# "constant_score":{
# "filter":{
# "term": {
# "content_level": 6
# }
# }
# }
# },
# "weight": 600
# }
{
"filter": {
"constant_score": {
"filter": {
"term": {
"content_level": 6
}
}
}
},
"weight": 600
}
]
# if len(user_similar_score_list) > 0:
......@@ -216,7 +218,25 @@ class TopicUtils(object):
"weight": 100
}
)
if current_topic_id != -1:
if len(topic_tag_list) > 0:
functions_list.append(
{
"filter": {"bool": {
"should": {"terms": {"tag_list": topic_tag_list}}}},
"weight": 2000
}
)
if topic_user_id != -1:
functions_list.append(
{
"filter": {"bool": {
"should": {"term": {"user_id": topic_user_id}}}},
"weight": 1500
}
)
query_function_score = {
"query": {
"bool": {
......@@ -242,10 +262,7 @@ class TopicUtils(object):
}
}
],
"minimum_should_match": 1,
"must_not": [
{"term": {"is_history":True}}
]
"minimum_should_match": 1
}
},
"score_mode": "sum",
......@@ -260,18 +277,14 @@ class TopicUtils(object):
}
if len(filter_topic_id_list) > 0:
query_function_score["query"]["bool"]["must_not"] = [
{"terms":{"id":filter_topic_id_list}}
]
if len(linucb_user_id_list)>0:
if "must_not" in query_function_score["query"]["bool"]:
query_function_score["query"]["bool"]["must_not"] += [
{"terms": {"user_id": linucb_user_id_list}}
]
else:
query_function_score["query"]["bool"]["must_not"] = [
{"terms": {"user_id": linucb_user_id_list}}
{"terms": {"id": filter_topic_id_list}}
]
if current_topic_id != -1:
query_function_score["query"]["bool"]["must_not"] = [{
"term": {
"id": current_topic_id
}
}]
if query is not None: # 搜索帖子
multi_fields = {
'description': 200,
......@@ -289,8 +302,7 @@ class TopicUtils(object):
query_function_score["query"]["bool"]["should"] = [
{'multi_match': multi_match},
{"term": {"tag_list": tag_id}},
{"term": {"user_nick_name_pre": query.lower()}}
{"term": {"tag_list": tag_id}}
]
query_function_score["query"]["bool"]["minimum_should_match"] = 1
else:
......@@ -303,7 +315,6 @@ class TopicUtils(object):
{"term": {"is_operation_home_recommend": True}}
]
q["query"]["function_score"] = query_function_score
if not disable_collpase:
q["collapse"] = {
"field": "user_id"
}
......@@ -325,18 +336,18 @@ class TopicUtils(object):
# }
# },
{
"offline_score": {
"_score": {
"order": "desc"
}
},
{
"_score": {
"offline_score": {
"order": "desc"
}
}
]
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name=index_type, query_body=q,
offset=offset, size=size,routing=routing)
offset=offset, size=size, routing=routing)
topic_id_list = list()
......@@ -349,7 +360,8 @@ class TopicUtils(object):
@classmethod
def get_topic_detail_recommend_list(cls, user_id, topic_id, topic_tag_list, topic_pictorial_id, topic_user_id,
filter_topic_user_id, have_read_topic_list, offset, size, es_cli_obj=None,index_type="topic",routing=None):
filter_topic_user_id, have_read_topic_list, offset, size, es_cli_obj=None,
index_type="topic", routing=None):
"""
:remark 帖子详情页推荐列表,缺少按时间衰减
:param user_id:
......@@ -426,7 +438,7 @@ class TopicUtils(object):
}
result_dict = ESPerform.get_search_results(es_cli_obj, sub_index_name=index_type, query_body=q,
offset=offset, size=size,routing=routing)
offset=offset, size=size, routing=routing)
return result_dict["hits"]
except:
......@@ -434,9 +446,10 @@ class TopicUtils(object):
return []
@classmethod
def top_get_topic_detail_recommend_list(cls, user_id, topic_id,have_read_topic_list, size, es_cli_obj=None,
index_type="topic", routing=None,collection_topic_tag_list = [],topic_tag_list = [],
topic_user_id =-1):
def top_get_topic_detail_recommend_list(cls, user_id, topic_id, have_read_topic_list, size, es_cli_obj=None,
index_type="topic", routing=None, collection_topic_tag_list=[],
topic_tag_list=[],
topic_user_id=-1):
"""
:remark 帖子详情页推荐列表,缺少按时间衰减
:param user_id:
......@@ -480,7 +493,7 @@ class TopicUtils(object):
# "weight": 5000
# }
# )
if len(topic_tag_list) != 0 or topic_user_id!= -1:
if len(topic_tag_list) != 0 or topic_user_id != -1:
query_function_score = {
"query": {
"bool": {
......@@ -548,7 +561,8 @@ class TopicUtils(object):
# {"create_time": {"order": "desc"}}
# ]
result_dict = ESPerform.get_search_results(es_cli_obj, sub_index_name=index_type, query_body=q, size=size, routing=routing)
result_dict = ESPerform.get_search_results(es_cli_obj, sub_index_name=index_type, query_body=q, size=size,
routing=routing)
topic_id_list = list()
for item in result_dict["hits"]:
......@@ -691,7 +705,7 @@ class TopicUtils(object):
{"term": {"is_deleted": False}},
]
logging.info("get filters:%s"%filters)
logging.info("get filters:%s" % filters)
if not filters:
return f
......@@ -703,7 +717,6 @@ class TopicUtils(object):
if v in (None, '', []):
continue
if k in ["create_time_gte", "create_time_lte"]:
if k == "create_time_gte":
......@@ -797,7 +810,10 @@ class TopicUtils(object):
return nf
for k, v in nfilters.items():
pass
if k == "is_his":
nf.append({
"term": {k: v},
})
return nf
......@@ -897,13 +913,14 @@ class TopicUtils(object):
return ([],0)
@classmethod
def business_topic_ids(cls, filters, nfilters, sorts_by, offset=0, size=10, index_name="topic", filter_online=True):
def business_topic_id(cls, filters, nfilters, sorts_by, offset=0, size=10, index_name="topic", filter_online=True):
must = cls.process_filters(filters, filter_online=filter_online)
query = ''
for k, v in filters.items():
if k == "content":
query = filters[k]
q = {}
q["query"] = {
"function_score": {
......@@ -918,7 +935,7 @@ class TopicUtils(object):
}],
"query": {
"multi_match": {
"fields":["content"],
"fields": ["content"],
"type": "cross_fields",
"operator": "and",
"query": query
......@@ -926,6 +943,7 @@ class TopicUtils(object):
}
}
}
}
if query == '':
q["query"] = {
......@@ -933,6 +951,7 @@ class TopicUtils(object):
"must": must,
"must_not": cls.process_nfilters(nfilters),
}
}
if sorts_by:
......@@ -956,3 +975,76 @@ class TopicUtils(object):
"hits": [],
"total_count": 0
}
@classmethod
def business_topic_ids(cls, filters, nfilters, sorts_by, offset=0, size=10, index_name="topic", filter_online=True):
must = cls.business_filters(filters, filter_online=filter_online)
q = {
"query": {
"bool": {
"must": must,
"must_not": cls.process_nfilters(nfilters),
}
}
}
if sorts_by:
sorts = cls.process_sort(sorts_by)
if sorts:
q["sort"] = sorts
try:
result_dict = ESPerform.get_search_results(
ESPerform.get_cli(), sub_index_name=index_name,
query_body=q, offset=offset, size=size
)
return {
"hits": result_dict["hits"],
"total_count": result_dict["total_count"]
}
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {
"hits": [],
"total_count": 0
}
@classmethod
def business_filters(cls, filters, filter_online=True):
"""处理过滤器部分。"""
logging.info("get filters:%s" % filters)
f = [
{"term": {"is_deleted": False}},
]
if not filters:
return f
if filter_online:
f.append({"term": {"is_online": True}})
for k, v in filters.items():
if v in (None, '', []):
continue
if k == "content":
f.append({
"match": {k: v}
})
elif k == "virtual_content_level":
f.append({
"match": {k: v}
})
else:
f.append({
"term": {k: v},
})
return f
......@@ -15,7 +15,6 @@ from libs.es import ESPerform
from django.conf import settings
def get_discover_page_topic_ids(user_id, device_id, size, query_type=TopicPageType.FIND_PAGE):
try:
if user_id == -1:
......@@ -28,10 +27,11 @@ def get_discover_page_topic_ids(user_id, device_id, size, query_type=TopicPageTy
have_read_topic_id_list = json.loads(redis_field_val_list[0]) if redis_field_val_list[0] else []
recommend_topic_ids = TopicUtils.get_recommend_topic_ids(user_id=user_id, tag_id=0, offset=0, size=size,single_size=size,
recommend_topic_ids = TopicUtils.get_recommend_topic_ids(user_id=user_id, tag_id=0, offset=0, size=size,
single_size=size,
query_type=query_type,
filter_topic_id_list=have_read_topic_id_list,index_type="topic",routing="4,5,6")
filter_topic_id_list=have_read_topic_id_list,
index_type="topic", routing="4,5,6")
have_read_topic_id_list.extend(recommend_topic_ids)
redis_dict = {
......@@ -46,19 +46,21 @@ def get_discover_page_topic_ids(user_id, device_id, size, query_type=TopicPageTy
def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query=None,
query_type=TopicPageType.HOME_RECOMMEND,promote_topic_list = [],disable_collpase=False):
query_type=TopicPageType.HOME_RECOMMEND, promote_topic_list=[],
disable_collpase=False):
try:
if query is None:
if user_id>0:
if user_id > 0:
redis_key = "physical:home_recommend" + ":user_id:" + str(user_id) + ":query_type:" + str(query_type)
else:
redis_key = "physical:home_recommend" + ":device_id:" + device_id + ":query_type:" + str(query_type)
else:
if user_id>0:
redis_key = "physical:home_query" + ":user_id:" + str(user_id) + ":query:" + str(query) + ":query_type:" + str(query_type)
if user_id > 0:
redis_key = "physical:home_query" + ":user_id:" + str(user_id) + ":query:" + str(
query) + ":query_type:" + str(query_type)
else:
redis_key = "physical:home_query" + ":device_id:" + device_id + ":query:" + str(query) + ":query_type:" + str(query_type)
redis_key = "physical:home_query" + ":device_id:" + device_id + ":query:" + str(
query) + ":query_type:" + str(query_type)
redis_field_list = [b'have_read_topic_list']
redis_field_val_list = redis_client.hmget(redis_key, redis_field_list)
......@@ -70,7 +72,7 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
if query is None:
have_read_topic_id_list = list(json.loads(redis_field_val_list[0]))
else:
if offset>0: # 首次搜索时不需要过滤已读
if offset > 0: # 首次搜索时不需要过滤已读
have_read_topic_id_list = list(json.loads(redis_field_val_list[0]))
# user_similar_score_redis_key = "physical:user_similar_score:user_id:" + str(user_id)
......@@ -89,7 +91,6 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
if b"data" in recommend_topic_dict:
recommend_topic_id_list = json.loads(recommend_topic_dict[b"data"])
# 推荐帖子是强插的,要保证推荐帖子不在已读里
# recommend_topic_id_list = list(set(recommend_topic_id_list) - set(have_read_topic_id_list))
cursor = int(str(recommend_topic_dict[b"cursor"], encoding="utf-8"))
......@@ -106,8 +107,8 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
# 用户关注标签
redis_tag_data = redis_client.hget("physical:linucb:register_user_tag_info", user_id)
attention_tag_list = json.loads(redis_tag_data) if redis_tag_data else []
if len(recommend_topic_list)>0:
size = size-len(recommend_topic_list)
if len(recommend_topic_list) > 0:
size = size - len(recommend_topic_list)
have_read_topic_id_list.extend(recommend_topic_list)
# have_read_topic_id_list_add_promote = list()
......@@ -120,9 +121,12 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
have_read_topic_id_list.extend(promote_topic_list)
topic_id_list = list()
rank_topic_id_list = TopicUtils.get_recommend_topic_ids(user_id=user_id, tag_id=tag_id, offset=0, size=size,
single_size=size,query=query, query_type=query_type,
single_size=size, query=query, query_type=query_type,
filter_topic_id_list=have_read_topic_id_list,
index_type="topic-high-star",routing="4,5,6",attention_tag_list=attention_tag_list,linucb_user_id_list=recommend_topic_user_list,disable_collpase=disable_collpase)
index_type="topic-high-star", routing="4,5,6",
attention_tag_list=attention_tag_list,
linucb_user_id_list=recommend_topic_user_list,
disable_collpase=disable_collpase)
# if len(recommend_topic_list) == 6 and query is None:
# if (size < 11):
......@@ -140,7 +144,7 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
have_read_topic_id_list.extend(rank_topic_id_list)
if len(have_read_topic_id_list) > 30000:
cut_len = len(have_read_topic_id_list)-30000
cut_len = len(have_read_topic_id_list) - 30000
have_read_topic_id_list = have_read_topic_id_list[cut_len:]
redis_dict = {
"have_read_topic_list": json.dumps(have_read_topic_id_list),
......@@ -149,10 +153,10 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
# 每个session key保存60天
redis_client.expire(redis_key, 60 * 60 * 24 * 60)
return recommend_topic_list,rank_topic_id_list
return recommend_topic_list, rank_topic_id_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return [],[]
return [], []
@bind("physical/search/query_tag_id_by_topic")
......@@ -165,7 +169,8 @@ def query_tag_id_by_topic(offset=0, size=10, topic_id_list=[], user_id=-1):
@bind("physical/search/home_recommend")
def home_recommend(device_id="", user_id=-1, offset=0, size=10, query_type=TopicPageType.HOME_RECOMMEND,promote_topic_list=[]):
def home_recommend(device_id="", user_id=-1, offset=0, size=10, query_type=TopicPageType.HOME_RECOMMEND,
promote_topic_list=[]):
"""
:remark:首页推荐,目前只推荐日记
:param session_id:
......@@ -197,15 +202,17 @@ def home_recommend(device_id="", user_id=-1, offset=0, size=10, query_type=Topic
have_read_topic_id_list = list(json.loads(redis_field_val_list[0]))
if len(have_read_topic_id_list) > offset:
recommend_topic_ids = have_read_topic_id_list[offset:offset+size]
recommend_topic_ids = have_read_topic_id_list[offset:offset + size]
else:
recommend_topic_ids = have_read_topic_id_list[0:size]
else:
recommend_topic_ids,rank_topic_ids = get_home_recommend_topic_ids(user_id, device_id, tag_id=0, offset=0, size=size,
query_type=query_type,promote_topic_list=promote_topic_list)
recommend_topic_ids, rank_topic_ids = get_home_recommend_topic_ids(user_id, device_id, tag_id=0, offset=0,
size=size,
query_type=query_type,
promote_topic_list=promote_topic_list)
return {"linucb_topic_ids": recommend_topic_ids,"rank_topic_ids":rank_topic_ids}
return {"linucb_topic_ids": recommend_topic_ids, "rank_topic_ids": rank_topic_ids}
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"linucb_topic_ids": [],"rank_topic_ids":[]}
......@@ -252,11 +259,12 @@ def home_query(device_id="", tag_id=-1, user_id=-1, query="", offset=0, size=10)
if not isinstance(device_id, str):
device_id = ""
recommend_topic_list, rank_topic_id_list = get_home_recommend_topic_ids(user_id, device_id, tag_id, offset=offset, size=size, query=query)
if len(rank_topic_id_list)>0 and len(rank_topic_id_list)<size:
recommend_topic_list, rank_topic_id_list = get_home_recommend_topic_ids(user_id, device_id, tag_id,
offset=offset, size=size, query=query)
if len(rank_topic_id_list) > 0 and len(rank_topic_id_list) < size:
recommend_topic_list, rank_topic_id_list = get_home_recommend_topic_ids(user_id, device_id, tag_id,
offset=offset, size=size,
query=query,disable_collpase=True)
query=query, disable_collpase=True)
return {"recommend_topic_ids": rank_topic_id_list}
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
......@@ -278,9 +286,9 @@ def topic_detail_page_recommend(device_id="", user_id=-1, topic_id=-1, topic_pic
if not isinstance(user_id, int):
user_id = -1
if user_id > 0:
redis_key = "physical:topic_detail_recommend" + ":user_id:" + str(user_id) + "topic_id:"+str(topic_id)
redis_key = "physical:topic_detail_recommend" + ":user_id:" + str(user_id) + "topic_id:" + str(topic_id)
else:
redis_key = "physical:topic_detail_recommend" + ":device_id:" + device_id + "topic_id:"+str(topic_id)
redis_key = "physical:topic_detail_recommend" + ":device_id:" + device_id + "topic_id:" + str(topic_id)
if int(offset) == 0:
have_read_topic_list = list()
# redis_dict = {
......@@ -293,7 +301,7 @@ def topic_detail_page_recommend(device_id="", user_id=-1, topic_id=-1, topic_pic
have_read_topic_list = list()
redis_field_list = [b'have_read_topic_list']
have_read_topic_redis_data = redis_client.hmget(redis_key,redis_field_list)
have_read_topic_redis_data = redis_client.hmget(redis_key, redis_field_list)
have_read_topic_list = json.loads(have_read_topic_redis_data[0]) if have_read_topic_redis_data[0] else []
es_cli_obj = ESPerform.get_cli()
have_read_topic_list.append(topic_id)
......@@ -301,18 +309,23 @@ def topic_detail_page_recommend(device_id="", user_id=-1, topic_id=-1, topic_pic
topic_tag_result = list()
result = list()
if len(topic_tag_list) != 0:
topic_tag_result = TopicUtils.top_get_topic_detail_recommend_list(user_id,topic_id,have_read_topic_list,size,es_cli_obj,
index_type="topic",routing="3,4,5,6",topic_tag_list = topic_tag_list)
topic_tag_result = TopicUtils.top_get_topic_detail_recommend_list(user_id, topic_id, have_read_topic_list,
size, es_cli_obj,
index_type="topic", routing="3,4,5,6",
topic_tag_list=topic_tag_list)
topic_tag_size = len(topic_tag_result)
have_read_topic_list.extend(topic_tag_result)
else:
topic_tag_size = 0
if topic_tag_size <size:
if topic_tag_size < size:
size = size - topic_tag_size
if topic_user_id != -1:
topic_user_result = TopicUtils.top_get_topic_detail_recommend_list(user_id, topic_id, have_read_topic_list,
topic_user_result = TopicUtils.top_get_topic_detail_recommend_list(user_id, topic_id,
have_read_topic_list,
size, es_cli_obj,
index_type="topic", routing="3,4,5,6",topic_user_id = topic_user_id
index_type="topic",
routing="3,4,5,6",
topic_user_id=topic_user_id
)
topic_user_size = len(topic_user_result)
have_read_topic_list.extend(topic_user_result)
......@@ -400,7 +413,8 @@ def topic_search(filters, nfilters=None, sorts_by=None, offset=0, size=10):
try:
(topic_id_list,total_count) = TopicUtils.list_topic_ids(filters=filters, nfilters=nfilters,
sorts_by=sorts_by, offset=offset, size=size)
logging.info("get result_list:%s" % result_list)
topic_ids = [item["_source"]["id"] for item in result_list["hits"]]
return {
"topic_ids": topic_id_list,
"total_count": total_count
......@@ -422,8 +436,10 @@ def query_topic_by_user_similarity(topic_similarity_score_dict, offset=0, size=1
try:
must_topic_id_list = list(topic_similarity_score_dict.keys())
topic_id_list = TopicUtils.get_recommend_topic_ids(tag_id=0, user_id=-1, offset=offset, size=size,single_size=size,
must_topic_id_list=must_topic_id_list,index_type="topic",routing="4,5,6")
topic_id_list = TopicUtils.get_recommend_topic_ids(tag_id=0, user_id=-1, offset=offset, size=size,
single_size=size,
must_topic_id_list=must_topic_id_list, index_type="topic",
routing="4,5,6")
return {"recommend_topic_ids": topic_id_list}
except:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment