#!/usr/bin/env python # -*- coding: utf-8 -*- import os import sys import logging import traceback import os.path import re import json from elasticsearch import Elasticsearch import elasticsearch.helpers from django.conf import settings class ESPerform(object): cli_obj = None cli_info_list = settings.ES_INFO_LIST index_prefix = settings.ES_INDEX_PREFIX @classmethod def get_cli(cls, cli_info=None): try: init_args = { 'sniff_on_start': False, 'sniff_on_connection_fail': False, } es_cli_info = cli_info if cli_info else cls.cli_info_list cls.cli_obj = Elasticsearch(hosts=es_cli_info, **init_args) return cls.cli_obj except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return None @classmethod def get_official_index_name(cls, sub_index_name, index_flag=None): """ :remark:get official es index name :param sub_index_name: :param index_flag: :return: """ try: assert (index_flag in [None, "read", "write"]) official_index_name = cls.index_prefix + "-" + sub_index_name if index_flag: official_index_name += "-" + index_flag return official_index_name except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return None @classmethod def __load_mapping(cls, doc_type): try: mapping_file_path = os.path.join( os.path.dirname(__file__), '..', 'trans2es', 'mapping', '%s.json' % (doc_type,)) mapping = '' with open(mapping_file_path, 'r') as f: for line in f: # 去掉注释 mapping += re.sub(r'//.*$', '', line) mapping = json.loads(mapping) return mapping except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return None @classmethod def create_index(cls, es_cli, sub_index_name): """ :remark: create es index,alias index :param sub_index_name: :return: """ try: assert (es_cli is not None) official_index_name = cls.get_official_index_name(sub_index_name) index_exist = es_cli.indices.exists(official_index_name) if not index_exist: es_cli.indices.create(official_index_name) read_alias_name = cls.get_official_index_name(sub_index_name, "read") es_cli.indices.put_alias(official_index_name, read_alias_name) write_alias_name = cls.get_official_index_name(sub_index_name, "write") es_cli.indices.put_alias(official_index_name, write_alias_name) return True except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return False @classmethod def put_index_mapping(cls, es_cli, sub_index_name, mapping_type="_doc", force_sync=False): """ :remark: put index mapping :param es_cli: :param sub_index_name: :param mapping_type: :return: """ try: assert (es_cli is not None) write_alias_name = cls.get_official_index_name(sub_index_name, "write") index_exist = es_cli.indices.exists(write_alias_name) if not index_exist and not force_sync: return False mapping_dict = cls.__load_mapping(sub_index_name) es_cli.indices.put_mapping(index=write_alias_name, body=mapping_dict, doc_type=mapping_type) return True except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return False @classmethod def put_indices_template(cls, es_cli, template_file_name, template_name): """ :remark put index template :param es_cli: :param template_file_name: :param template_name: :return: """ try: assert (es_cli is not None) mapping_dict = cls.__load_mapping(template_file_name) es_cli.indices.put_template(name=template_name, body=mapping_dict) return True except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return False @classmethod def es_helpers_bulk(cls, es_cli, data_list, sub_index_name, auto_create_index=False, doc_type="_doc"): try: assert (es_cli is not None) official_index_name = cls.get_official_index_name(sub_index_name, "write") index_exists = es_cli.indices.exists(official_index_name) if not index_exists: if not auto_create_index: logging.error("index:%s is not existing,bulk data error!" % official_index_name) return False else: cls.create_index(es_cli, sub_index_name) cls.put_index_mapping(es_cli, sub_index_name) bulk_actions = [] if sub_index_name == "topic" or \ sub_index_name == "topic-star-routing" or \ sub_index_name == "topic-high-star": for data in data_list: if data: bulk_actions.append({ '_op_type': 'index', '_index': official_index_name, '_type': doc_type, '_id': data['id'], '_source': data, 'routing': data["content_level"] }) else: for data in data_list: if data: bulk_actions.append({ '_op_type': 'index', '_index': official_index_name, '_type': doc_type, '_id': data['id'], '_source': data, }) elasticsearch.helpers.bulk(es_cli, bulk_actions) return True except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return False @classmethod def get_search_results(cls, es_cli, sub_index_name, query_body, offset=0, size=10, auto_create_index=False, doc_type="_doc", aggregations_query=False, is_suggest_request=False, batch_search=False, routing=None,if_official_index_name=False): try: assert (es_cli is not None) if if_official_index_name: official_index_name = sub_index_name else: official_index_name = cls.get_official_index_name(sub_index_name, "read") index_exists = es_cli.indices.exists(official_index_name) if not index_exists: if not auto_create_index: logging.error("index:%s is not existing,get_search_results error!" % official_index_name) return None else: cls.create_index(es_cli, sub_index_name) cls.put_index_mapping(es_cli, sub_index_name) logging.info("duan add,query_body:%s" % str(query_body).encode("utf-8")) if not batch_search: if not routing: res = es_cli.search(index=official_index_name, doc_type=doc_type, body=query_body, from_=offset, size=size) else: res = es_cli.search(index=official_index_name, doc_type=doc_type, body=query_body, from_=offset, size=size, routing=routing) if is_suggest_request: return res else: result_dict = { "total_count": res["hits"]["total"], "hits": res["hits"]["hits"] } if aggregations_query: result_dict["aggregations"] = res["aggregations"] return result_dict else: res = es_cli.msearch(body=query_body, index=official_index_name, doc_type=doc_type) logging.info("duan add,msearch res:%s" % str(res)) return res except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return {"total_count": 0, "hits": []} @classmethod def get_analyze_results(cls, es_cli, sub_index_name, query_body): try: assert (es_cli is not None) official_index_name = cls.get_official_index_name(sub_index_name, "read") index_exists = es_cli.indices.exists(official_index_name) if not index_exists: logging.error("index:%s is not existing,get_search_results error!" % official_index_name) return None res = es_cli.indices.analyze(index=official_index_name, body=query_body) return res except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return None @classmethod def if_es_node_load_high(cls, es_cli): try: assert (es_cli is not None) high_num = 0 es_nodes_list = list() es_nodes_ori_info = es_cli.cat.nodes() es_nodes_info_list = es_nodes_ori_info.split("\n") for item in es_nodes_info_list: try: item_list = item.split(" ") if len(item_list) == 11: cpu_load = item_list[4] elif len(item_list) == 10: cpu_load = item_list[3] else: continue int_cpu_load = int(cpu_load) if int_cpu_load > 60: high_num += 1 es_nodes_list.append(int_cpu_load) except: logging.error("catch exception,item:%s,err_msg:%s" % (str(item), traceback.format_exc())) return True if high_num > 3: logging.info("check es_nodes_load high,cpu load:%s,ori_cpu_info:%s" % ( str(es_nodes_list), str(es_nodes_info_list))) return True else: return False except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return True @classmethod def get_tag_topic_list(cls, tag_id, have_read_topic_id_list, size=100): try: functions_list = list() # for id in tag_id: # functions_list.append( # { # "filter": {"term": {"tag_list": id}}, # "weight": 1 # } # ) functions_list += [ { "filter": { "constant_score": { "filter": { "term": {"content_level": 6}} } }, "weight": 60 }, { "filter": { "constant_score": { "filter": { "term": {"content_level": 5}} } }, "weight": 50 }, { "filter": { "constant_score": { "filter": { "term": {"content_level": 4}} } }, "weight": 40 } ] q = { "query": { "function_score": { "query": { "bool": { "must": [ {"range": {"content_level": {"gte": 4, "lte": 6}}}, {"term": {"is_online": True}}, {"term": {"is_deleted": False}}, {"terms": {"tag_list": tag_id}} ] } }, "boost_mode": "sum", "score_mode": "sum", "functions": functions_list } }, "_source": { "include": ["id"] }, "sort": [ {"_score": {"order": "desc"}}, {"create_time_val": {"order": "desc"}}, # {"language_type": {"order": "asc"}}, ] } if len(have_read_topic_id_list) > 0: q["query"]["function_score"]["query"]["bool"]["must_not"] = { "terms": { "id": have_read_topic_id_list } } result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic", query_body=q, offset=0, size=size, routing="4,5,6") topic_id_list = [item["_source"]["id"] for item in result_dict["hits"]] logging.info("topic_id_list:%s" % str(topic_id_list)) return topic_id_list except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return list() @classmethod def get_tag_topic_list_dict(cls, tag_id, have_read_topic_id_list, size=100): try: functions_list = list() for id in tag_id: functions_list.append( { "filter": {"term": {"tag_list": id}}, "weight": 1 } ) # functions_list += [ # { # "filter": {"term": {"content_level": 6}}, # "weight": 6000 # }, # { # "filter": {"term": {"content_level": 5}}, # "weight": 5000 # }, # { # "filter": {"term": {"content_level": 4}}, # "weight": 4000 # } # ] q = { "query": { "function_score": { "query": { "bool": { "must": [ {"term": {"content_level": 6}}, {"term": {"is_online": True}}, {"term": {"is_deleted": False}}, {"term": {"is_new_topic": False}}, {"terms": {"tag_list": tag_id}} ] } }, "boost_mode": "sum", "score_mode": "sum", "functions": functions_list } }, "_source": { "include": ["id", "user_id", "latest_reply_time", "topic_ctr_30", "topic_ctr_all", "like_rate_30", "like_rate_all"] }, "sort": [ { "_script": { "order": "desc", "script": { "inline": "10*doc['topic_ctr_30'].value+doc['like_rate_30'].value+2*doc['topic_ctr_all'].value+doc['like_rate_all'].value" }, "type": "number" } }, {"latest_reply_time": {"order": "desc"}}, {"create_time_val": {"order": "desc"}}, {"language_type": {"order": "asc"}}, ], "collapse": { "field": "user_id" } } if len(have_read_topic_id_list) > 0: q["query"]["function_score"]["query"]["bool"]["must_not"] = { "terms": { "id": have_read_topic_id_list } } result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic-high-star", query_body=q, offset=0, size=size, routing="6") topic_id_list = [item["_source"]["id"] for item in result_dict["hits"]] # logging.info("topic_id_list:%s" % str(topic_id_list)) # topic_id_dict = [{str(item["_source"]["id"]):item["_source"]["user_id"]} for item in result_dict["hits"]] topic_id_dict = dict() for item in result_dict["hits"]: topic_id_dict[str(item["_source"]["id"])] = item["_source"]["user_id"] logging.info("get_tag_topic_list_dict:gyz" + str(q) + str(result_dict)) return topic_id_list, topic_id_dict except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return list(), dict() @classmethod def get_tag_new_topic_list(cls, tag_id, have_read_topic_id_list, size=10): try: functions_list = list() for id in tag_id: functions_list.append( { "filter": {"term": {"tag_list": id}}, "weight": 1 } ) q = { "query": { "function_score": { "query": { "bool": { "must": [ {"term": {"content_level": 6}}, {"term": {"is_online": True}}, {"term": {"is_deleted": False}}, {"term": {"is_new_topic": True}}, {"terms": {"tag_list": tag_id}} ] } }, "boost_mode": "sum", "score_mode": "sum", "functions": functions_list } }, "_source": { "include": ["id", "user_id"] }, "sort": [ {"latest_reply_time": {"order": "desc"}}, {"create_time_val": {"order": "desc"}}, {"language_type": {"order": "asc"}}, ], "collapse": { "field": "user_id" } } if len(have_read_topic_id_list) > 0: q["query"]["function_score"]["query"]["bool"]["must_not"] = { "terms": { "id": have_read_topic_id_list } } result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic-high-star", query_body=q, offset=0, size=size, routing="6") topic_id_list = [item["_source"]["id"] for item in result_dict["hits"]] return topic_id_list except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return list() @classmethod def get_tag_pictorial_id_list(cls, tag_id, have_read_lin_pictorial_id_list, size=100): try: q = { "query": { "function_score": { "query": { "bool": { "must": [ {"term": {"is_online": True}}, {"term": {"is_deleted": False}}, {"terms": {"edit_tag_id": tag_id}}, {"term": {"is_cover": True}}, {"term": {"is_recommend": True}} ] } }, "boost_mode": "sum", "score_mode": "sum", } }, "_source": { "include": ["id", "real_user_activate_time", "create_time", "pictorial_ctr_30", "pictorial_ctr_all", "like_rate_30", "like_rate_all"] }, "sort": [ { "_script": { "order": "desc", "script": { "inline": "10*doc['pictorial_ctr_30'].value+10*doc['like_rate_30'].value+3*doc['pictorial_ctr_all'].value+2*doc['like_rate_all'].value" }, "type": "number" } }, {"real_user_activate_time": {"order": "desc"}}, {"create_time": {"order": "desc"}}, ], } if len(have_read_lin_pictorial_id_list) > 0: q["query"]["function_score"]["query"]["bool"]["must_not"] = { "terms": { "id": have_read_lin_pictorial_id_list } } result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="pictorial", query_body=q, offset=0, size=size) pictorial_id_list = [item["_source"]["id"] for item in result_dict["hits"]] logging.info("get_tag_pictorial_id_list:gyz" + str(q) + str(result_dict)) return pictorial_id_list except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return list() @classmethod def get_highlight(cls, fields=[]): field_highlight = { 'fields': {k: {} for k in fields}, 'pre_tags': ['<%s>' % 'ems'], 'post_tags': ['' % 'ems'] } return field_highlight