Commit 8a73118a authored by lixiaofang's avatar lixiaofang

修改hera后台的搜索逻辑

parent 56f1a2ba
...@@ -12,6 +12,7 @@ from .common import TopicDocumentField ...@@ -12,6 +12,7 @@ from .common import TopicDocumentField
from search.utils.common import * from search.utils.common import *
from trans2es.models.pictorial import PictorialTopics from trans2es.models.pictorial import PictorialTopics
class TopicUtils(object): class TopicUtils(object):
@classmethod @classmethod
...@@ -120,9 +121,11 @@ class TopicUtils(object): ...@@ -120,9 +121,11 @@ class TopicUtils(object):
return {} return {}
@classmethod @classmethod
def get_recommend_topic_ids(cls,user_id,tag_id,offset,size,single_size,query=None,query_type=TopicPageType.HOME_RECOMMEND, def get_recommend_topic_ids(cls, user_id, tag_id, offset, size, single_size, query=None,
filter_topic_id_list=[],test_score=False,must_topic_id_list=[],recommend_tag_list=[], query_type=TopicPageType.HOME_RECOMMEND,
user_similar_score_list=[],index_type="topic",routing=None,attention_tag_list=[],linucb_user_id_list = [],disable_collpase=False): filter_topic_id_list=[], test_score=False, must_topic_id_list=[], recommend_tag_list=[],
user_similar_score_list=[], index_type="topic", routing=None, attention_tag_list=[],
linucb_user_id_list=[], disable_collpase=False):
""" """
:remark:获取首页推荐帖子列表 :remark:获取首页推荐帖子列表
:param user_id: :param user_id:
...@@ -244,7 +247,7 @@ class TopicUtils(object): ...@@ -244,7 +247,7 @@ class TopicUtils(object):
], ],
"minimum_should_match": 1, "minimum_should_match": 1,
"must_not": [ "must_not": [
{"term": {"is_history":True}} {"term": {"is_history": True}}
] ]
} }
}, },
...@@ -260,10 +263,10 @@ class TopicUtils(object): ...@@ -260,10 +263,10 @@ class TopicUtils(object):
} }
if len(filter_topic_id_list) > 0: if len(filter_topic_id_list) > 0:
query_function_score["query"]["bool"]["must_not"] = [ query_function_score["query"]["bool"]["must_not"] = [
{"terms":{"id":filter_topic_id_list}} {"terms": {"id": filter_topic_id_list}}
] ]
if len(linucb_user_id_list)>0: if len(linucb_user_id_list) > 0:
if "must_not" in query_function_score["query"]["bool"]: if "must_not" in query_function_score["query"]["bool"]:
query_function_score["query"]["bool"]["must_not"] += [ query_function_score["query"]["bool"]["must_not"] += [
{"terms": {"user_id": linucb_user_id_list}} {"terms": {"user_id": linucb_user_id_list}}
...@@ -294,7 +297,7 @@ class TopicUtils(object): ...@@ -294,7 +297,7 @@ class TopicUtils(object):
] ]
query_function_score["query"]["bool"]["minimum_should_match"] = 1 query_function_score["query"]["bool"]["minimum_should_match"] = 1
query_function_score["query"]["bool"]["filter"].append( query_function_score["query"]["bool"]["filter"].append(
{"range": {"content_level": {"gte":4,"lte":6}}} {"range": {"content_level": {"gte": 4, "lte": 6}}}
) )
else: else:
if "must_not" in query_function_score["query"]["bool"]: if "must_not" in query_function_score["query"]["bool"]:
...@@ -347,7 +350,7 @@ class TopicUtils(object): ...@@ -347,7 +350,7 @@ class TopicUtils(object):
} }
] ]
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name=index_type, query_body=q, result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name=index_type, query_body=q,
offset=offset, size=size,routing=routing) offset=offset, size=size, routing=routing)
topic_id_list = list() topic_id_list = list()
...@@ -360,7 +363,8 @@ class TopicUtils(object): ...@@ -360,7 +363,8 @@ class TopicUtils(object):
@classmethod @classmethod
def get_topic_detail_recommend_list(cls, user_id, topic_id, topic_tag_list, topic_pictorial_id, topic_user_id, def get_topic_detail_recommend_list(cls, user_id, topic_id, topic_tag_list, topic_pictorial_id, topic_user_id,
filter_topic_user_id, have_read_topic_list, offset, size, es_cli_obj=None,index_type="topic",routing=None): filter_topic_user_id, have_read_topic_list, offset, size, es_cli_obj=None,
index_type="topic", routing=None):
""" """
:remark 帖子详情页推荐列表,缺少按时间衰减 :remark 帖子详情页推荐列表,缺少按时间衰减
:param user_id: :param user_id:
...@@ -437,7 +441,7 @@ class TopicUtils(object): ...@@ -437,7 +441,7 @@ class TopicUtils(object):
} }
result_dict = ESPerform.get_search_results(es_cli_obj, sub_index_name=index_type, query_body=q, result_dict = ESPerform.get_search_results(es_cli_obj, sub_index_name=index_type, query_body=q,
offset=offset, size=size,routing=routing) offset=offset, size=size, routing=routing)
return result_dict["hits"] return result_dict["hits"]
except: except:
...@@ -445,9 +449,10 @@ class TopicUtils(object): ...@@ -445,9 +449,10 @@ class TopicUtils(object):
return [] return []
@classmethod @classmethod
def top_get_topic_detail_recommend_list(cls, user_id, topic_id,have_read_topic_list, size, es_cli_obj=None, def top_get_topic_detail_recommend_list(cls, user_id, topic_id, have_read_topic_list, size, es_cli_obj=None,
index_type="topic", routing=None,collection_topic_tag_list = [],topic_tag_list = [], index_type="topic", routing=None, collection_topic_tag_list=[],
topic_user_id =-1): topic_tag_list=[],
topic_user_id=-1):
""" """
:remark 帖子详情页推荐列表,缺少按时间衰减 :remark 帖子详情页推荐列表,缺少按时间衰减
:param user_id: :param user_id:
...@@ -491,26 +496,26 @@ class TopicUtils(object): ...@@ -491,26 +496,26 @@ class TopicUtils(object):
# "weight": 5000 # "weight": 5000
# } # }
# ) # )
if len(topic_tag_list) != 0 or topic_user_id!= -1: if len(topic_tag_list) != 0 or topic_user_id != -1:
query_function_score = { query_function_score = {
"query": { "query": {
"bool": { "bool": {
"must": [ "must": [
{"range": {"content_level": {"gte": 3, "lte": 6}}}, {"range": {"content_level": {"gte": 3, "lte": 6}}},
{"term": {"is_online": True}}, {"term": {"is_online": True}},
{"term": {"is_deleted": False}} {"term": {"is_deleted": False}}
], ],
"must_not": { "must_not": {
"terms": { "terms": {
"id": have_read_topic_list "id": have_read_topic_list
}
} }
} }
} },
}, "score_mode": "sum",
"score_mode": "sum", "boost_mode": "sum",
"boost_mode": "sum", "functions": functions_list
"functions": functions_list }
}
else: else:
query_function_score = { query_function_score = {
"query": { "query": {
...@@ -546,9 +551,9 @@ class TopicUtils(object): ...@@ -546,9 +551,9 @@ class TopicUtils(object):
} }
q["query"]["function_score"] = query_function_score q["query"]["function_score"] = query_function_score
if topic_user_id == -1: if topic_user_id == -1:
q["collapse"] = { q["collapse"] = {
"field": "user_id" "field": "user_id"
} }
q["_source"] = { q["_source"] = {
"includes": ["id", "pictorial_id", "user_id", "_score"] "includes": ["id", "pictorial_id", "user_id", "_score"]
} }
...@@ -559,7 +564,8 @@ class TopicUtils(object): ...@@ -559,7 +564,8 @@ class TopicUtils(object):
# {"create_time": {"order": "desc"}} # {"create_time": {"order": "desc"}}
# ] # ]
result_dict = ESPerform.get_search_results(es_cli_obj, sub_index_name=index_type, query_body=q, size=size, routing=routing) result_dict = ESPerform.get_search_results(es_cli_obj, sub_index_name=index_type, query_body=q, size=size,
routing=routing)
topic_id_list = list() topic_id_list = list()
for item in result_dict["hits"]: for item in result_dict["hits"]:
...@@ -702,7 +708,7 @@ class TopicUtils(object): ...@@ -702,7 +708,7 @@ class TopicUtils(object):
{"term": {"is_deleted": False}}, {"term": {"is_deleted": False}},
] ]
logging.info("get filters:%s"%filters) logging.info("get filters:%s" % filters)
if not filters: if not filters:
return f return f
...@@ -892,21 +898,23 @@ class TopicUtils(object): ...@@ -892,21 +898,23 @@ class TopicUtils(object):
query_body=q, offset=offset, size=size query_body=q, offset=offset, size=size
) )
if len(result_dict["hits"])>0: if len(result_dict["hits"]) > 0:
topic_id_list = [item["_source"]["id"] for item in result_dict["hits"]] topic_id_list = [item["_source"]["id"] for item in result_dict["hits"]]
return (topic_id_list,result_dict["total_count"]) return (topic_id_list, result_dict["total_count"])
elif offset==0 and "pictorial_id" in filters: # 防止帖子同步延迟,画报详情页为空 elif offset == 0 and "pictorial_id" in filters: # 防止帖子同步延迟,画报详情页为空
pictorial_id = int(filters["pictorial_id"]) pictorial_id = int(filters["pictorial_id"])
topic_id_list = list(PictorialTopics.objects.filter(pictorial_id=pictorial_id,is_online=True,is_deleted=False).values_list("topic_id", flat=True)[offset:size]) topic_id_list = list(PictorialTopics.objects.filter(pictorial_id=pictorial_id, is_online=True,
is_deleted=False).values_list("topic_id",
flat=True)[
offset:size])
return (topic_id_list,len(topic_id_list)) return (topic_id_list, len(topic_id_list))
else: else:
return ([], 0) return ([], 0)
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([],0) return ([], 0)
@classmethod @classmethod
def business_topic_ids(cls, filters, nfilters, sorts_by, offset=0, size=10, index_name="topic", filter_online=True): def business_topic_ids(cls, filters, nfilters, sorts_by, offset=0, size=10, index_name="topic", filter_online=True):
...@@ -919,6 +927,7 @@ class TopicUtils(object): ...@@ -919,6 +927,7 @@ class TopicUtils(object):
"must_not": cls.process_nfilters(nfilters), "must_not": cls.process_nfilters(nfilters),
} }
} }
} }
if sorts_by: if sorts_by:
...@@ -966,7 +975,12 @@ class TopicUtils(object): ...@@ -966,7 +975,12 @@ class TopicUtils(object):
if k == "content": if k == "content":
f.append({ f.append({
"match": {k: v} "multi_match": {
"fields": ["content"],
"type": "cross_fields",
"operator": "and",
"query": v
}
}) })
elif k == "virtual_content_level": elif k == "virtual_content_level":
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment