Commit f938078e authored by 高雅喆's avatar 高雅喆

Merge branch 'master' into gyz_es

parents 940d063a cb772743
...@@ -12,6 +12,7 @@ from search.utils.common import * ...@@ -12,6 +12,7 @@ from search.utils.common import *
from trans2es.models.pictorial import PictorialTopics from trans2es.models.pictorial import PictorialTopics
from libs.cache import redis_client from libs.cache import redis_client
class TopicUtils(object): class TopicUtils(object):
@classmethod @classmethod
...@@ -320,7 +321,7 @@ class TopicUtils(object): ...@@ -320,7 +321,7 @@ class TopicUtils(object):
'type': 'best_fields', 'type': 'best_fields',
'operator': 'or', 'operator': 'or',
'fields': ["content", "tag_name_list"], 'fields': ["content", "tag_name_list"],
"analyzer":"gm_default_index" "analyzer": "gm_default_index"
} }
query_function_score["boost_mode"] = "replace" query_function_score["boost_mode"] = "replace"
...@@ -335,13 +336,14 @@ class TopicUtils(object): ...@@ -335,13 +336,14 @@ class TopicUtils(object):
{"range": {"content_level": {"gte": 3, "lte": 6}}} {"range": {"content_level": {"gte": 3, "lte": 6}}}
) )
collection_redis_key_name="physical:official_tag_name_set" collection_redis_key_name = "physical:official_tag_name_set"
collect_tag_name_set=set() collect_tag_name_set = set()
body = { body = {
'text': query, 'text': query,
'analyzer': "gm_default_search" 'analyzer': "gm_default_search"
} }
analyze_res = ESPerform.get_analyze_results(es_cli=ESPerform.get_cli(), sub_index_name="topic",query_body=body) analyze_res = ESPerform.get_analyze_results(es_cli=ESPerform.get_cli(), sub_index_name="topic",
query_body=body)
for item in analyze_res["tokens"]: for item in analyze_res["tokens"]:
token_word = item["token"] token_word = item["token"]
# is_member = redis_client.sismember(collection_redis_key_name, token_word) # is_member = redis_client.sismember(collection_redis_key_name, token_word)
...@@ -357,10 +359,10 @@ class TopicUtils(object): ...@@ -357,10 +359,10 @@ class TopicUtils(object):
} }
functions_list += [ functions_list += [
{ {
"weight":10, "weight": 10,
"filter":{ "filter": {
"term":{ "term": {
"language_type":1 "language_type": 1
} }
} }
}, },
...@@ -434,9 +436,9 @@ class TopicUtils(object): ...@@ -434,9 +436,9 @@ class TopicUtils(object):
topic_id_list.append(item["_source"]["id"]) topic_id_list.append(item["_source"]["id"])
if has_score: if has_score:
return topic_id_list,ret_data_list,topic_score_list return topic_id_list, ret_data_list, topic_score_list
else: else:
return topic_id_list,ret_data_list return topic_id_list, ret_data_list
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
if has_score: if has_score:
...@@ -444,7 +446,6 @@ class TopicUtils(object): ...@@ -444,7 +446,6 @@ class TopicUtils(object):
else: else:
return list(), list() return list(), list()
@classmethod @classmethod
def userful_tag_topic_list(cls, user_id, have_read_topic_list, size, def userful_tag_topic_list(cls, user_id, have_read_topic_list, size,
index_type="topic-high-star", routing=None, useful_tag_list=[]): index_type="topic-high-star", routing=None, useful_tag_list=[]):
...@@ -528,7 +529,6 @@ class TopicUtils(object): ...@@ -528,7 +529,6 @@ class TopicUtils(object):
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return [] return []
@classmethod @classmethod
def get_linucb_topic_info_for_debug(cls, size, def get_linucb_topic_info_for_debug(cls, size,
index_type="topic-high-star", routing=None, linucb_topic_list=[]): index_type="topic-high-star", routing=None, linucb_topic_list=[]):
...@@ -546,22 +546,22 @@ class TopicUtils(object): ...@@ -546,22 +546,22 @@ class TopicUtils(object):
} }
} }
q["_source"] = { q["_source"] = {
"includes": ["id","content_level","edit_tag_list"] "includes": ["id", "content_level", "edit_tag_list"]
} }
result_dict = ESPerform.get_search_results(es_cli_obj, sub_index_name=index_type, query_body=q, result_dict = ESPerform.get_search_results(es_cli_obj, sub_index_name=index_type, query_body=q,
size=size, size=size,
routing="6") routing="6")
topic_id_dict = dict() topic_id_dict = dict()
for item in result_dict["hits"]: for item in result_dict["hits"]:
topic_id_dict.update({item["_source"]["id"]:{"content_level":item["_source"]["content_level"],"edit_tag_list":item["_source"]["edit_tag_list"]}}) topic_id_dict.update({item["_source"]["id"]: {"content_level": item["_source"]["content_level"],
"edit_tag_list": item["_source"]["edit_tag_list"]}})
return topic_id_dict return topic_id_dict
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return [] return []
@classmethod @classmethod
def get_linucb_pictorial_info_for_debug(cls,size,linucb_pictorial_list = []): def get_linucb_pictorial_info_for_debug(cls, size, linucb_pictorial_list=[]):
try: try:
q = { q = {
"query": { "query": {
...@@ -593,8 +593,6 @@ class TopicUtils(object): ...@@ -593,8 +593,6 @@ class TopicUtils(object):
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list() return list()
@classmethod @classmethod
def get_topic_detail_recommend_list(cls, user_id, topic_id, topic_tag_list, topic_pictorial_id, topic_user_id, def get_topic_detail_recommend_list(cls, user_id, topic_id, topic_tag_list, topic_pictorial_id, topic_user_id,
filter_topic_user_id, have_read_topic_list, offset, size, es_cli_obj=None, filter_topic_user_id, have_read_topic_list, offset, size, es_cli_obj=None,
...@@ -1294,7 +1292,20 @@ class TopicUtils(object): ...@@ -1294,7 +1292,20 @@ class TopicUtils(object):
"query": v "query": v
} }
}) })
elif k == "is_shadow":
if v == 0:
f.append({
"term": {
"is_shadow": False
}
})
else:
f.append({
"term": {
"is_shadow": True
}
})
elif k == "virtual_content_level": elif k == "virtual_content_level":
f.append({ f.append({
"match": {k: v} "match": {k: v}
......
...@@ -420,7 +420,8 @@ def pictorial_topic_sort(pictorial_id=-1, offset=0, size=10, sort_type=PICTORIAL ...@@ -420,7 +420,8 @@ def pictorial_topic_sort(pictorial_id=-1, offset=0, size=10, sort_type=PICTORIAL
pict_pictorial_ids_list = [] pict_pictorial_ids_list = []
# 获取es链接对象 # 获取es链接对象
es_cli_obj = ESPerform.get_cli() es_cli_obj = ESPerform.get_cli()
result_dict = ESPerform.get_search_results(es_cli=es_cli_obj, sub_index_name="mv-alpha-topic-prod-190905001", query_body=q, offset=offset, size=size,if_official_index_name=True) result_dict = ESPerform.get_search_results(es_cli_obj, "topic", query_body=q, offset=offset, size=size)
# result_dict = ESPerform.get_search_results(es_cli=es_cli_obj, sub_index_name="mv-alpha-topic-prod-190905001", query_body=q, offset=offset, size=size,if_official_index_name=True)
# logging.info("get pictorial_topic_sort res:%s" % result_dict) # logging.info("get pictorial_topic_sort res:%s" % result_dict)
......
...@@ -53,7 +53,7 @@ def get_discover_page_topic_ids(user_id, device_id, size, query_type=TopicPageTy ...@@ -53,7 +53,7 @@ def get_discover_page_topic_ids(user_id, device_id, size, query_type=TopicPageTy
return [] return []
def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, underexposure_lin_topic_count, size, query=None, def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, underexposure_lin_topic_count=0, size=0, query=None,
query_type=TopicPageType.HOME_RECOMMEND, promote_topic_list=[], disable_collpase=False, query_type=TopicPageType.HOME_RECOMMEND, promote_topic_list=[], disable_collpase=False,
usefulrecall=-1, useful_tag_list=[], has_score=False, gray_list=[]): usefulrecall=-1, useful_tag_list=[], has_score=False, gray_list=[]):
try: try:
...@@ -141,6 +141,7 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, underexposu ...@@ -141,6 +141,7 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, underexposu
# linucb 推荐新帖子 # linucb 推荐新帖子
if linucb_recommend_tags: if linucb_recommend_tags:
linucb_recommend_tags_set_tags = get_same_tagset_ids(linucb_recommend_tags) linucb_recommend_tags_set_tags = get_same_tagset_ids(linucb_recommend_tags)
if underexposure_lin_topic_count:
underexposure_lin_topic_ids = ESPerform.get_tag_new_topic_list(linucb_recommend_tags_set_tags, have_read_topic_id_list, underexposure_lin_topic_count) underexposure_lin_topic_ids = ESPerform.get_tag_new_topic_list(linucb_recommend_tags_set_tags, have_read_topic_id_list, underexposure_lin_topic_count)
size = size - len(underexposure_lin_topic_ids) size = size - len(underexposure_lin_topic_ids)
have_read_topic_id_list.extend(underexposure_lin_topic_ids) have_read_topic_id_list.extend(underexposure_lin_topic_ids)
...@@ -570,10 +571,10 @@ def home_query(device_id="", tag_id=-1, user_id=-1, query="", offset=0, size=10, ...@@ -570,10 +571,10 @@ def home_query(device_id="", tag_id=-1, user_id=-1, query="", offset=0, size=10,
if not isinstance(device_id, str): if not isinstance(device_id, str):
device_id = "" device_id = ""
recommend_topic_list, rank_topic_id_list = get_home_recommend_topic_ids(user_id, device_id, tag_id, unexposure_lin_topic, recommend_topic_list, rank_topic_id_list = get_home_recommend_topic_ids(user_id, device_id, tag_id,
offset=offset, size=size, query=query) offset=offset, size=size, query=query)
if len(rank_topic_id_list) > 0 and len(rank_topic_id_list) < size: if len(rank_topic_id_list) > 0 and len(rank_topic_id_list) < size:
recommend_topic_list, rank_topic_id_list = get_home_recommend_topic_ids(user_id, device_id, tag_id, unexposure_lin_topic, recommend_topic_list, rank_topic_id_list = get_home_recommend_topic_ids(user_id, device_id, tag_id,
offset=offset, size=size, offset=offset, size=size,
query=query, disable_collpase=True) query=query, disable_collpase=True)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment