#!/usr/bin/env python # -*- coding: utf-8 -*- import logging import traceback import json from alpha_types.venus import TOPIC_SEARCH_SORT from search.views.tag import get_same_tagset_ids from libs.es import ESPerform from .common import TopicDocumentField from search.utils.common import * from trans2es.models.pictorial import PictorialTopics from libs.cache import redis_client class TopicUtils(object): @classmethod def get_related_user_info(cls, user_id, offset=0, size=10): """ :remark:获取指定用户相关用户列表 :param user_id: :param offset: :param size: :return: """ try: q = dict() q["query"] = { "term": { "user_id": user_id } } q["_source"] = { "include": ["tag_list", "attention_user_id_list", "pick_user_id_list", "same_pictorial_user_id_list"] } result_dict = ESPerform.get_search_results(ESPerform.get_cli(), "user", q, offset, size) return result_dict except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return {"total_count": 0, "hits": []} @classmethod def analyze_related_user_id_list(cls, related_user_id_list): """ :remark:获取指定用户关联的 用户列表 :param related_user_id_list: :return: """ try: chinese_user_id_list = list() japan_user_id_list = list() korea_user_id_list = list() for item in related_user_id_list: if item["country_id"] == 0: chinese_user_id_list.append(item["user_id"]) elif item["country_id"] == 1: japan_user_id_list.append(item["user_id"]) elif item["country_id"] == 2: korea_user_id_list.append(item["user_id"]) return (chinese_user_id_list, japan_user_id_list, korea_user_id_list) except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return ([], [], []) @classmethod def refresh_redis_hash_data(cls, redis_cli, redis_key, redis_data_dict): try: redis_cli.hmset(redis_key, redis_data_dict) return True except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return False @classmethod def ___get_should_term_list(cls, ori_list, field_name="tag_list"): try: should_term_list = list() for term_id in ori_list: term_dict = { "term": { field_name: {"value": term_id} } } should_term_list.append(term_dict) return should_term_list except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return [] @classmethod def get_topic_tag_info(cls, offset, size, topic_id_list, user_id): try: q = { "query": { "terms": { "id": topic_id_list } }, "_source": { "includes": ["id", "pictorial_id", "offline_score", "user_id", "edit_tag_list"] } } result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic", query_body=q, offset=offset, size=size) topic_id_dict = dict() for item in result_dict["hits"]: if "edit_tag_list" in item["_source"]: topic_id_dict[str(item["_source"]["id"])] = item["_source"]["edit_tag_list"] else: topic_id_dict[str(item["_source"]["id"])] = list() return topic_id_dict except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return {} @classmethod def get_recommend_topic_ids(cls, user_id, tag_id, offset, size, single_size, query=None, query_type=TopicPageType.HOME_RECOMMEND, filter_topic_id_list=[], test_score=False, must_topic_id_list=[], recommend_tag_list=[], user_similar_score_list=[], index_type="topic", routing=None, attention_tag_list=[], linucb_user_id_list=[], disable_collpase=False, has_score=False): """ :remark:获取首页推荐回答列表 :param user_id: :param offset: :param size: :param is_first_time: :return: """ try: attention_user_id_list = list() # pick_user_id_list = list() # same_group_id_list = list() user_tag_list = list() if filter_topic_id_list == None: filter_topic_id_list = [] result_dict = TopicUtils.get_related_user_info(user_id, 0, 1) if len(result_dict["hits"]) == 0: logging.warning("not find user_id:%d in es!" % int(user_id)) else: attention_user_info_list = result_dict["hits"][0]["_source"]["attention_user_id_list"] attention_user_id_list = [item["user_id"] for item in attention_user_info_list] # pick_user_info_list = result_dict["hits"][0]["_source"]["pick_user_id_list"] # pick_user_id_list = [item["user_id"] for item in pick_user_info_list] # same_pictorial_user_info_list = result_dict["hits"][0]["_source"]["same_pictorial_user_id_list"] # # same_pictorial_id_list = [item["user_id"] for item in same_pictorial_user_info_list] # same_pictorial_id_list = same_pictorial_id_list[:100] user_tag_list = result_dict["hits"][0]["_source"]["tag_list"] q = dict() ret_data_list = list() topic_id_list = list() topic_score_list = list() q["query"] = dict() functions_list = list() query_function_score = { "query": { "bool": { "filter": [ {"term": {"is_online": True}}, {"term": {"is_deleted": False}} ], "should": [ { "bool": { "must": [ {"term": {"has_image": True}}, {"term": {"has_video": False}} ] } }, { "bool": { "must": { "term": {"has_video": True} } } } ], "minimum_should_match": 1, "must_not": [ {"term": {"is_history": True}} ] } }, "score_mode": "sum", "boost_mode": "sum", "functions": functions_list } if len(must_topic_id_list) > 0: query_function_score["query"]["bool"]["must"] = { "terms": { "id": must_topic_id_list } } if len(filter_topic_id_list) > 0: query_function_score["query"]["bool"]["must_not"] = [ {"terms": {"id": filter_topic_id_list}} ] if len(linucb_user_id_list) > 0: if "must_not" in query_function_score["query"]["bool"]: query_function_score["query"]["bool"]["must_not"] += [ {"terms": {"user_id": linucb_user_id_list}} ] else: query_function_score["query"]["bool"]["must_not"] = [ {"terms": {"user_id": linucb_user_id_list}} ] q["_source"] = { "includes": ["id", "highlight", "description"] } if query is None: if "must_not" in query_function_score["query"]["bool"]: query_function_score["query"]["bool"]["must_not"] += [ {"term": {"is_operation_home_recommend": True}} ] else: query_function_score["query"]["bool"]["must_not"] = [ {"term": {"is_operation_home_recommend": True}} ] if query_type == TopicPageType.FIND_PAGE: query_function_score["query"]["bool"]["filter"].append( {"range": {"content_level": {"gte": 4, "lte": 6}}} ) else: query_function_score["query"]["bool"]["filter"].append( {"term": {"content_level": 6}} ) # if user_id and user_id > 0: # redis_key_prefix = "physical:user_similar:participant_user_id:" # similar_redis_key = redis_key_prefix + str(user_id) # redis_user_similar_data = redis_client.get(similar_redis_key) # user_similar_list = json.loads(redis_user_similar_data) if redis_user_similar_data else [] # if len(user_similar_list) > 0: # functions_list.extend(user_similar_list) functions_list.append( { "gauss": { "create_time": { "scale": "1d", "decay": 0.99 } }, "weight": 60 } ) if len(attention_user_id_list) > 0: functions_list.append( { "filter": {"constant_score": {"filter": {"terms": {"user_id": attention_user_id_list}}}}, "weight": 100, } ) if len(attention_tag_list) > 0: functions_list.append( { "filter": {"bool": { "should": {"terms": {"tag_list": attention_tag_list}}}}, "weight": 100 } ) query_function_score["functions"] = functions_list q["query"]["function_score"] = query_function_score if not disable_collpase: q["collapse"] = { "field": "user_id" } q["sort"] = [ # { # "_script": { # "type": "number", # "script": { # "lang": "expression", # "source": "_score+doc['offline_score']" # # "lang":"painless", # # "source":"_score+params._source.offline_score" # }, # "order": "desc" # } # }, # { # "offline_score": { # "order": "desc" # } # }, { "_script": { "order": "desc", "script": { "inline": "10*doc['topic_ctr_30'].value+doc['like_rate_30'].value+2*doc['topic_ctr_all'].value+doc['like_rate_all'].value" }, "type": "number" } }, { "_score": { "order": "desc" } } ] result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name=index_type, query_body=q, offset=offset, size=size, routing=routing) for item in result_dict["hits"]: topic_id_list.append(item["_source"]["id"]) if has_score: topic_score_list.append(item["_score"]) else: multi_match = { 'query': query, 'type': 'best_fields', 'operator': 'or', 'fields': ["content", "tag_name_list"], "analyzer": "gm_default_index" } query_function_score["boost_mode"] = "replace" same_tagset_ids = get_same_tagset_ids(tag_id) query_function_score["query"]["bool"]["should"] = [ {'multi_match': multi_match}, {"terms": {"tag_list": same_tagset_ids}}, {"term": {"user_nick_name_pre": query.lower()}} ] query_function_score["query"]["bool"]["minimum_should_match"] = 1 query_function_score["query"]["bool"]["filter"].append( {"range": {"content_level": {"gte": 3, "lte": 6}}} ) collection_redis_key_name = "physical:official_tag_name_set" collect_tag_name_set = set() body = { 'text': query, 'analyzer': "gm_default_search" } analyze_res = ESPerform.get_analyze_results(es_cli=ESPerform.get_cli(), sub_index_name="topic", query_body=body) for item in analyze_res["tokens"]: token_word = item["token"] # is_member = redis_client.sismember(collection_redis_key_name, token_word) # if is_member: collect_tag_name_set.add(token_word) query_fields = ["content", "tag_name_list"] multi_match = { 'query': query, 'type': 'best_fields', 'operator': 'and', 'fields': query_fields, } functions_list += [ { "weight": 10, "filter": { "term": { "language_type": 1 } } }, { "weight": 1000, "filter": { "bool": { "minimum_should_match": 1, "should": [ {'match_phrase': {"content": query}}, {'match_phrase': {"tag_name_list": query}}, # {'multi_match': multi_match}, {"terms": {"tag_list": same_tagset_ids}}, {"term": {"user_nick_name_pre": query.lower()}} ] } } } ] for query_item in collect_tag_name_set: for field_item in query_fields: term_dict = { "filter": { "term": { field_item: query_item } }, "weight": 2 } functions_list.append(term_dict) query_function_score["functions"] = functions_list q["query"]["function_score"] = query_function_score q["sort"] = [ # { # "_script": { # "type": "number", # "script": { # "lang": "expression", # "source": "_score+doc['offline_score']" # # "lang":"painless", # # "source":"_score+params._source.offline_score" # }, # "order": "desc" # } # }, { "_score": { "order": "desc" } }, { "latest_reply_time": { "order": "desc" } }, { "offline_score": { "order": "desc" } } ] q["highlight"] = ESPerform.get_highlight(["content"]) result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name=index_type, query_body=q, offset=offset, size=size, routing=routing) for item in result_dict["hits"]: ret_data_list.append({"id": item["_source"]["id"], "highlight": item.get("highlight", {})}) topic_id_list.append(item["_source"]["id"]) if has_score: return topic_id_list, ret_data_list, topic_score_list else: return topic_id_list, ret_data_list except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) if has_score: return list(), list(), list() else: return list(), list() @classmethod def userful_tag_topic_list(cls, user_id, have_read_topic_list, size, index_type="topic-high-star", routing=None, useful_tag_list=[]): """ :remark 帖子详情页推荐列表,缺少按时间衰减 :param user_id: :param topic_tag_list: :param topic_group_id: :param topic_user_id: :param offset: :param size: :return: """ try: es_cli_obj = ESPerform.get_cli() # useful_tag_list = list() # q = dict() # q["query"] = { # "term": { # "user_id": user_id # } # } # # q["_source"] = { # "include": ["useful_tag_list"] # } # result_dict = ESPerform.get_search_results(es_cli_obj, "user", q, 0, 1) # if len(result_dict["hits"]) == 0: # logging.warning("not find user_id:%d in es!" % int(user_id)) # else: # useful_tag_list = result_dict["hits"][0]["_source"]["useful_tag_list"] if len(useful_tag_list) == 0: return [] else: q = dict() q["query"] = { "bool": { "must": [ {"term": {"is_online": True}}, {"term": {"is_deleted": False}}, {"terms": {"useful_tag_list": useful_tag_list}}, {"term": {"content_level": 6}} ], "must_not": { "terms": { "id": have_read_topic_list } } } } # logging.warning("topic_tag_list:%s"%str(topic_tag_list)) # query_function_score = { # "query": { # "bool": { # "must": [ # {"term": {"is_online": True}}, # {"term": {"is_deleted": False}}, # {"terms": {"tag_list": useful_tag_list}} # ], # "must_not": { # "terms": { # "id": have_read_topic_list # } # } # } # } # } q["_source"] = { "includes": ["id"] } result_dict = ESPerform.get_search_results(es_cli_obj, sub_index_name=index_type, query_body=q, size=size, routing=routing) topic_id_list = list() for item in result_dict["hits"]: topic_id_list.append(item["_source"]["id"]) return topic_id_list except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return [] @classmethod def get_linucb_topic_info_for_debug(cls, size, index_type="topic-high-star", routing=None, linucb_topic_list=[]): try: es_cli_obj = ESPerform.get_cli() if len(linucb_topic_list) == 0: return {} else: q = dict() q["query"] = { "bool": { "must": [ {"terms": {"id": linucb_topic_list}} ] } } q["_source"] = { "includes": ["id", "content_level", "edit_tag_list"] } result_dict = ESPerform.get_search_results(es_cli_obj, sub_index_name=index_type, query_body=q, size=size, routing="6") topic_id_dict = dict() for item in result_dict["hits"]: topic_id_dict.update({item["_source"]["id"]: {"content_level": item["_source"]["content_level"], "edit_tag_list": item["_source"]["edit_tag_list"]}}) return topic_id_dict except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return [] @classmethod def get_linucb_pictorial_info_for_debug(cls, size, linucb_pictorial_list=[]): try: q = { "query": { "function_score": { "query": { "bool": { "must": [ {"terms": {"id": linucb_pictorial_list}} ] } }, "boost_mode": "sum", "score_mode": "sum", } }, "_source": { "include": ["id", "edit_tag_id"] } } result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="pictorial", query_body=q, offset=0, size=size) pictorial_id_dict = dict() for item in result_dict["hits"]: pictorial_id_dict.update({item["_source"]["id"]: {"edit_tag_list": item["_source"]["edit_tag_id"]}}) return pictorial_id_dict except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return list() @classmethod def get_topic_detail_recommend_list(cls, user_id, topic_id, topic_tag_list, topic_pictorial_id, topic_user_id, filter_topic_user_id, have_read_topic_list, offset, size, es_cli_obj=None, index_type="topic", routing=None): """ :remark 帖子详情页推荐列表,缺少按时间衰减 :param user_id: :param topic_tag_list: :param topic_group_id: :param topic_user_id: :param offset: :param size: :return: """ try: if not es_cli_obj: es_cli_obj = ESPerform.get_cli() q = dict() q["query"] = dict() functions_list = [ { "filter": {"term": { "user_id": topic_user_id}}, "weight": 1000 }, { "linear": { "create_time": { "scale": "1d", "decay": 0.5 } } } ] if isinstance(topic_pictorial_id, int) and topic_pictorial_id > 0: functions_list.append( { "filter": {"term": { "pictorial_id": topic_pictorial_id}}, "weight": 1, } ) have_read_topic_list.append(topic_id) query_function_score = { "query": { "bool": { "must": [ {"range": {"content_level": {"gte": 4, "lte": 6}}}, {"term": {"is_online": True}}, {"term": {"is_deleted": False}} ], "must_not": { "terms": { "id": have_read_topic_list } } } }, "score_mode": "sum", "boost_mode": "sum", "functions": functions_list } if filter_topic_user_id: query_function_score["query"]["bool"]["must"].append({"term": {"user_id": topic_user_id}}) if len(topic_tag_list) > 0: query_function_score["query"]["bool"]["should"] = { "terms": { "tag_list": topic_tag_list } } q["query"]["function_score"] = query_function_score q["_source"] = { "includes": ["id", "pictorial_id", "user_id", "_score"] } result_dict = ESPerform.get_search_results(es_cli_obj, sub_index_name=index_type, query_body=q, offset=offset, size=size, routing=routing) return result_dict["hits"] except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return [] @classmethod def top_get_topic_detail_recommend_list(cls, user_id, topic_id, have_read_topic_list, size, es_cli_obj=None, index_type="topic", routing=None, collection_topic_tag_list=[], topic_tag_list=[], topic_user_id=-1): """ :remark 帖子详情页推荐列表,缺少按时间衰减 :param user_id: :param topic_tag_list: :param topic_group_id: :param topic_user_id: :param offset: :param size: :return: """ try: if not es_cli_obj: es_cli_obj = ESPerform.get_cli() q = dict() q["query"] = dict() # logging.warning("topic_tag_list:%s"%str(topic_tag_list)) functions_list = [ { "linear": { "create_time": { "scale": "1d", "decay": 0.5 } } } ] # if len(topic_tag_list) > 0: # functions_list.append( # { # "filter": {"bool": { # "should": {"terms": {"tag_list": topic_tag_list}}}}, # "weight": 5000 # } # ) # if topic_user_id != -1: # functions_list.append( # { # "filter": {"bool": { # "should": {"term": {"user_id": topic_user_id}}}}, # "weight": 5000 # } # ) if len(topic_tag_list) != 0 or topic_user_id != -1: query_function_score = { "query": { "bool": { "must": [ {"range": {"content_level": {"gte": 3, "lte": 6}}}, {"term": {"is_online": True}}, {"term": {"is_deleted": False}} ], "must_not": { "terms": { "id": have_read_topic_list } } } }, "score_mode": "sum", "boost_mode": "sum", "functions": functions_list } else: query_function_score = { "query": { "bool": { "must": [ {"range": {"content_level": {"gte": 4, "lte": 6}}}, {"term": {"is_online": True}}, {"term": {"is_deleted": False}} ], "must_not": { "terms": { "id": have_read_topic_list } } } }, "score_mode": "sum", "boost_mode": "sum", "functions": functions_list } if len(topic_tag_list) > 0: query_function_score["query"]["bool"]["filter"] = { "terms": { "edit_tag_list": topic_tag_list } } if topic_user_id != -1: query_function_score["query"]["bool"]["filter"] = { "term": { "user_id": topic_user_id } } q["query"]["function_score"] = query_function_score if topic_user_id == -1: q["collapse"] = { "field": "user_id" } q["_source"] = { "includes": ["id", "pictorial_id", "user_id", "_score"] } # "includes": ["id", "pictorial_id", "user_id", "_score", "create_time", "content_level"] q['sort'] = [ {"latest_reply_time": {"order": "desc"}}, # {"create_time": {"order": "desc"}} ] result_dict = ESPerform.get_search_results(es_cli_obj, sub_index_name=index_type, query_body=q, size=size, routing=routing) topic_id_list = list() for item in result_dict["hits"]: topic_id_list.append(item["_source"]["id"]) return topic_id_list except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return [] @classmethod def get_topic_tag_id_list(cls, topic_id, es_cli_obj=None): """ :remark 获取帖子标签列表 :param topic_id: :return: """ try: if not es_cli_obj: es_cli_obj = ESPerform.get_cli() q = dict() q["query"] = { "term": { "id": topic_id } } q["_source"] = { "includes": [TopicDocumentField.TAG_LIST] } result_dict = ESPerform.get_search_results(es_cli_obj, sub_index_name="topic", query_body=q, size=1) tag_id_list = [] if len(result_dict["hits"]) > 0: tag_id_list = result_dict["hits"][0]["_source"][TopicDocumentField.TAG_LIST] return tag_id_list except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return list() @classmethod def get_tag_aggregation_topic_id_list(cls, user_id, tag_id, offset, size): try: attention_user_id_list = list() pick_user_id_list = list() result_dict = TopicUtils.get_related_user_info(user_id, 0, 1) if len(result_dict["hits"]) == 0: logging.warning("not find user_id:%d in es!" % int(user_id)) else: attention_user_info_list = result_dict["hits"][0]["_source"]["attention_user_id_list"] attention_user_id_list = [item["user_id"] for item in attention_user_info_list] pick_user_info_list = result_dict["hits"][0]["_source"]["pick_user_id_list"] pick_user_id_list = [item["user_id"] for item in pick_user_info_list] functions_list = [ { "linear": { "create_time": { "scale": "1d", "decay": 0.5 } } } ] if len(attention_user_id_list) > 0: functions_list.append( { "filter": {"bool": { "should": {"terms": {"user_id": attention_user_id_list}}}}, "weight": 3, } ) if len(pick_user_id_list) > 0: functions_list.append( { "filter": {"bool": { "should": {"terms": {"user_id": pick_user_id_list}}}}, "weight": 2 } ) query_function_score = { "query": { "bool": { "must": [ # {"range": {"content_level": {"gte": 3, "lte": 5}}}, {"term": {"is_online": True}}, {"term": {"is_deleted": False}}, {"term": {"tag_list": tag_id}} ], "must_not": [ {"terms": {"content_level": [1, 2]}} ] } }, "score_mode": "sum", "boost_mode": "sum", "functions": functions_list } q = dict() q["query"] = { "function_score": query_function_score } q["_source"] = { "includes": ["id", "pictorial_id", "user_id", "_score", "offline_score", "manual_score"] } q["sort"] = [ { "_script": { "type": "number", "script": { "lang": "expression", "source": "_score+doc['offline_score']+doc['manual_score']" # "lang":"painless", # "source":"_score+params._source.offline_score+params._source.manual_score" }, "order": "desc" } } ] result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic", query_body=q, offset=offset, size=size) return result_dict["hits"] except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return list() @classmethod def process_filters(cls, filters, filter_online=True): """处理过滤器部分。""" f = [ {"term": {"is_deleted": False}}, ] logging.info("get filters:%s" % filters) if not filters: return f for k, v in filters.items(): if k == "is_complaint": f.append({ "term": {k: v}, }) if v in (None, '', []): continue if k in ["create_time_gte", "create_time_lte"]: if k == "create_time_gte": op = "gte" elif k == "create_time_lte": op = "lte" f.append({ "range": { "create_time_val": { op: v, } } }) elif k in ["id_gte", "id_lte"]: if k == "id_gte": op = "gte" elif k == "id_lte": op = "lte" f.append({ "range": { "id": { op: v, } } }) elif k.endswith("__exclude"): filed = k[:-5] op = "lt" f.append({ "range": { filed: { op: v, } } }) op = "gt" f.append({ "range": { filed: { op: v, } } }) elif k.endswith("__gte") or k.endswith("__lte") or k.endswith("__gt") or k.endswith("__lt"): if k.endswith("__gte"): op = "gte" filed = k[:-5] elif k.endswith("__lte"): op = "lte" filed = k[:-5] elif k.endswith("__gt"): op = "gt" filed = k[:-4] elif k.endswith("__lt"): op = "lt" filed = k[:-4] f.append({ "range": { filed: { op: v, } } }) else: if isinstance(v, list): f.append({ "terms": {k: v}, }) else: f.append({ "term": {k: v}, }) if filter_online: f.append({"term": {"is_online": True}}) return f @classmethod def process_nfilters(cls, nfilters): """处理过滤器部分。""" nf = [] if not nfilters: return nf for k, v in nfilters.items(): pass return nf @classmethod def process_sort(cls, sorts_by, pictorial_id): """处理排序部分。""" sort_rule = [] if isinstance(sorts_by, int): if sorts_by == TOPIC_SEARCH_SORT.VOTE_NUM: sort_rule.append({ "vote_num": { "order": "desc" }, "update_time": { "order": "desc" }, }) if sorts_by == TOPIC_SEARCH_SORT.TOPIC_ADD_TIME: sort_rule.append({ "related_billboard.topic_add_createtime": { "order": "desc", "nested_path": "related_billboard", "nested_filter": { "term": { "related_billboard.pictorial_id": pictorial_id } } }, }) elif isinstance(sorts_by, list): for sort_by in sorts_by: if sort_by == TOPIC_SEARCH_SORT.ID_AEC: sort_rule.append({ "id": { "order": "asc" }, }) elif sort_by == TOPIC_SEARCH_SORT.ID_DESC: sort_rule.append({ "id": { "order": "desc" }, }) elif sort_by == TOPIC_SEARCH_SORT.SCORE_AEC: sort_rule.append({ "sort_score": { "order": "asc" }, }) elif sort_by == TOPIC_SEARCH_SORT.SCORE_DESC: sort_rule.append({ "sort_score": { "order": "desc" }, }) elif sort_by == TOPIC_SEARCH_SORT.VOTE_NUM_AEC: sort_rule.append({ "total_vote_num": { "order": "asc" }, }) elif sort_by == TOPIC_SEARCH_SORT.VOTE_NUM_DESC: sort_rule.append({ "total_vote_num": { "order": "desc" }, }) elif sort_by == TOPIC_SEARCH_SORT.REAL_VOTE_AEC: sort_rule.append({ "related_billboard.real_vote_cnt": { "order": "asc", "nested_path": "related_billboard", "missing": "_last", "nested_filter": { "term": { "related_billboard.pictorial_id": pictorial_id } } }, }) elif sort_by == TOPIC_SEARCH_SORT.REAL_VOTE_DESC: sort_rule.append({ "related_billboard.real_vote_cnt": { "order": "desc", "nested_path": "related_billboard", # "missing": "_last", "nested_filter": { "term": { "related_billboard.pictorial_id": pictorial_id } } }, }) elif sort_by == TOPIC_SEARCH_SORT.VIRT_VOTE_AEC: sort_rule.append({ "related_billboard.virt_vote_cnt": { "order": "asc", "nested_path": "related_billboard", # "missing": "_last", "nested_filter": { "term": { "related_billboard.pictorial_id": pictorial_id } } }, }) elif sort_by == TOPIC_SEARCH_SORT.VIRT_VOTE_DESC: sort_rule.append({ "related_billboard.virt_vote_cnt": { "order": "desc", "nested_path": "related_billboard", # "missing": "_last", "nested_filter": { "term": { "related_billboard.pictorial_id": pictorial_id } } }, }) logging.info("get picotirial:%s" % sort_rule) return sort_rule @classmethod def list_topic_ids(cls, filters, nfilters, sorts_by, offset=0, size=10, index_name="topic", filter_online=True): try: must = cls.process_filters(filters, filter_online=filter_online) q = { "query": { "bool": { "must": must, "must_not": cls.process_nfilters(nfilters), } } } if 'pictorial_id' in filters.keys(): if sorts_by: sorts = cls.process_sort(sorts_by, filters["pictorial_id"]) if sorts: q["sort"] = sorts else: if sorts_by: sorts = cls.process_sort(sorts_by, pictorial_id=None) if sorts: q["sort"] = sorts result_dict = ESPerform.get_search_results( ESPerform.get_cli(), sub_index_name=index_name, query_body=q, offset=offset, size=size ) if len(result_dict["hits"]) > 0: topic_id_list = [item["_source"]["id"] for item in result_dict["hits"]] return (topic_id_list, result_dict["total_count"]) elif offset == 0 and "pictorial_id" in filters: # 防止帖子同步延迟,画报详情页为空 pictorial_id = int(filters["pictorial_id"]) topic_id_list = list(PictorialTopics.objects.filter(pictorial_id=pictorial_id, is_online=True, is_deleted=False).values_list("topic_id", flat=True)[ offset:size]) return (topic_id_list, len(topic_id_list)) else: return ([], 0) except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return ([], 0) @classmethod def business_topic_ids(cls, filters, nfilters, sorts_by, offset=0, size=10, index_name="topic", filter_online=True): must = cls.business_filters(filters, filter_online=filter_online) q = { "query": { "bool": { "must": must, "must_not": cls.process_nfilters(nfilters), } } } if 'pictorial_id' in filters.keys(): if sorts_by: sorts = cls.process_sort(sorts_by, filters["pictorial_id"]) if sorts: q["sort"] = sorts else: if sorts_by: sorts = cls.process_sort(sorts_by, pictorial_id=None) if sorts: q["sort"] = sorts try: result_dict = ESPerform.get_search_results( ESPerform.get_cli(), sub_index_name=index_name, query_body=q, offset=offset, size=size ) return { "hits": result_dict["hits"], "total_count": result_dict["total_count"] } except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return { "hits": [], "total_count": 0 } @classmethod def business_filters(cls, filters, filter_online=True): """处理过滤器部分。""" logging.info("get filters:%s" % filters) f = [ {"term": {"is_deleted": False}}, ] if not filters: return f if filter_online: f.append({"term": {"is_online": True}}) for k, v in filters.items(): if v in (None, '', []): continue if k == "content": f.append({ "multi_match": { "fields": ["content"], "type": "cross_fields", "operator": "and", "query": v } }) elif k == "is_shadow": if v == 0: f.append({ "term": { "is_shadow": False } }) else: f.append({ "term": { "is_shadow": True } }) elif k == "virtual_content_level": f.append({ "match": {k: v} }) elif k in ["create_time_gte", "create_time_lte"]: if k == "create_time_gte": op = "gte" elif k == "create_time_lte": op = "lte" f.append({ "range": { "create_time_val": { op: v, } } }) elif k == "drop_score": if v == "0": f.append({ "term": {k: v} }) else: f.append({ "range": { "drop_score": { "gte": v, } } }) elif k == "is_kol": f.append({ "term": { "user_is_kol": True } }) elif k == "is_edit": f.append({ "term": { "user_is_edit": True } }) # elif k == "pictorial_id": # f.append({ # "nested": { # "path": "related_billboard", # "query": { # "bool": { # "must": [ # { # "term": { # "related_billboard.pictorial_id": v # } # } # ] # } # } # } # }) else: if isinstance(v, list): f.append({ "terms": {k: v}, }) else: f.append({ "term": {k: v}, }) return f