#!/usr/bin/env python # -*- coding: utf-8 -*- import os import sys import logging import traceback import os.path import re import json from elasticsearch import Elasticsearch import elasticsearch.helpers from django.conf import settings ES_INFO_LIST = [ { "host": "172.21.40.14", "port": 9200 } ] ES_INDEX_PREFIX = "gm-dbmw" class ESPerform(object): cli_obj = None cli_info_list = ES_INFO_LIST index_prefix = ES_INDEX_PREFIX @classmethod def get_cli(cls, cli_info=None): try: init_args = { 'sniff_on_start': False, 'sniff_on_connection_fail': False, } es_cli_info = cli_info if cli_info else cls.cli_info_list cls.cli_obj = Elasticsearch(hosts=es_cli_info, **init_args) return cls.cli_obj except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return None @classmethod def get_official_index_name(cls, sub_index_name, index_flag=None): """ :remark:get official es index name :param sub_index_name: :param index_flag: :return: """ try: index_flag = None assert (index_flag in [None, "read", "write"]) official_index_name = cls.index_prefix + "-" + sub_index_name if index_flag: official_index_name += "-" + index_flag return official_index_name except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return None @classmethod def __load_mapping(cls, doc_type): try: mapping_file_path = os.path.join( os.path.dirname(__file__), '..', 'trans2es', 'mapping', '%s.json' % (doc_type,)) mapping = '' with open(mapping_file_path, 'r') as f: for line in f: # 去掉注释 mapping += re.sub(r'//.*$', '', line) mapping = json.loads(mapping) return mapping except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return None @classmethod def create_index(cls, es_cli, sub_index_name): """ :remark: create es index,alias index :param sub_index_name: :return: """ try: assert (es_cli is not None) official_index_name = cls.get_official_index_name(sub_index_name) index_exist = es_cli.indices.exists(official_index_name) if not index_exist: es_cli.indices.create(official_index_name) read_alias_name = cls.get_official_index_name(sub_index_name, "read") es_cli.indices.put_alias(official_index_name, read_alias_name) write_alias_name = cls.get_official_index_name(sub_index_name, "write") es_cli.indices.put_alias(official_index_name, write_alias_name) return True except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return False @classmethod def put_index_mapping(cls, es_cli, sub_index_name, mapping_type="_doc", force_sync=False): """ :remark: put index mapping :param es_cli: :param sub_index_name: :param mapping_type: :return: """ try: assert (es_cli is not None) write_alias_name = cls.get_official_index_name(sub_index_name, "write") index_exist = es_cli.indices.exists(write_alias_name) if not index_exist and not force_sync: return False mapping_dict = cls.__load_mapping(sub_index_name) es_cli.indices.put_mapping(index=write_alias_name, body=mapping_dict, doc_type=mapping_type) return True except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return False @classmethod def put_indices_template(cls, es_cli, template_file_name, template_name): """ :remark put index template :param es_cli: :param template_file_name: :param template_name: :return: """ try: assert (es_cli is not None) mapping_dict = cls.__load_mapping(template_file_name) es_cli.indices.put_template(name=template_name, body=mapping_dict) return True except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return False @classmethod def es_helpers_bulk(cls, es_cli, data_list, sub_index_name, auto_create_index=False, doc_type="_doc"): try: assert (es_cli is not None) official_index_name = sub_index_name if sub_index_name != "mv-alpha-tag-test-190711901": official_index_name = cls.get_official_index_name(sub_index_name, "write") index_exists = es_cli.indices.exists(official_index_name) logging.info("get index_exists:%s" % index_exists) if not index_exists: if not auto_create_index: logging.error("index:%s is not existing,bulk data error!" % official_index_name) return False else: cls.create_index(es_cli, sub_index_name) cls.put_index_mapping(es_cli, sub_index_name) bulk_actions = [] for data in data_list: if data: bulk_actions.append({ '_op_type': 'index', '_index': official_index_name, '_type': doc_type, '_id': data['id'], '_source': data, }) elasticsearch.helpers.bulk(es_cli, bulk_actions) return True except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return False @classmethod def get_search_results(cls, es_cli, sub_index_name, query_body, offset=0, size=10, auto_create_index=False, doc_type="_doc", aggregations_query=False, is_suggest_request=False, batch_search=False, routing=None): try: assert (es_cli is not None) official_index_name = sub_index_name if sub_index_name != "mv-alpha-pictorial-test-190711901": official_index_name = cls.get_official_index_name(sub_index_name, "read") index_exists = es_cli.indices.exists(official_index_name) if not index_exists: if not auto_create_index: logging.error("index:%s is not existing,get_search_results error!" % official_index_name) return None else: cls.create_index(es_cli, sub_index_name) cls.put_index_mapping(es_cli, sub_index_name) logging.info("duan add,query_body:%s" % str(query_body).encode("utf-8")) if not batch_search: if not routing: res = es_cli.search(index=official_index_name, doc_type=doc_type, body=query_body, from_=offset, size=size) else: res = es_cli.search(index=official_index_name, doc_type=doc_type, body=query_body, from_=offset, size=size, routing=routing) if is_suggest_request: return res else: result_dict = { "total_count": res["hits"]["total"], "hits": res["hits"]["hits"] } if aggregations_query: result_dict["aggregations"] = res["aggregations"] return result_dict else: res = es_cli.msearch(body=query_body, index=official_index_name, doc_type=doc_type) logging.info("duan add,msearch res:%s" % str(res)) return res except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return {"total_count": 0, "hits": []} @classmethod def get_analyze_results(cls, es_cli, sub_index_name, query_body): try: assert (es_cli is not None) # official_index_name = cls.get_official_index_name(sub_index_name, "read") index_exists = es_cli.indices.exists(sub_index_name) if not index_exists: logging.error("index:%s is not existing,get_search_results error!" % sub_index_name) return None res = es_cli.indices.analyze(index=sub_index_name, body=query_body) return res except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return None @classmethod def if_es_node_load_high(cls, es_cli): try: assert (es_cli is not None) high_num = 0 es_nodes_list = list() es_nodes_ori_info = es_cli.cat.nodes() es_nodes_info_list = es_nodes_ori_info.split("\n") for item in es_nodes_info_list: try: item_list = item.split(" ") if len(item_list) == 11: cpu_load = item_list[4] elif len(item_list) == 10: cpu_load = item_list[3] else: continue int_cpu_load = int(cpu_load) if int_cpu_load > 60: high_num += 1 es_nodes_list.append(int_cpu_load) except: logging.error("catch exception,item:%s,err_msg:%s" % (str(item), traceback.format_exc())) return True if high_num > 3: logging.info("check es_nodes_load high,cpu load:%s,ori_cpu_info:%s" % ( str(es_nodes_list), str(es_nodes_info_list))) return True else: return False except: logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return True # 先获取一部分数据 es_cli_obj = ESPerform.get_cli() q = {} q["query"] = { "bool": { "must": [ {"term": {"is_online": True}}, {"term": {"is_deleted": False}}, {"range": {"update_time": {"gte": "2019-05-09T00:00:00+00:00"}}} ] } } result_dict = ESPerform.get_search_results(es_cli_obj, "pictorial", q, 0, 1000) result_dict_ids = list() for old_item in result_dict["hits"]: old_source = old_item["_source"] old_id = old_source["id"] # 获取新index es_cli_obj = ESPerform.get_cli() q = {} q["query"] = { "bool": { "must": [ {"term": {"id": old_id}}, {"term": {"is_online": True}} ] } } result_dict_test = ESPerform.get_search_results(es_cli_obj, "mv-alpha-pictorial-test-190711901", q, 0, 1) for new_item in result_dict_test["hits"]: new_source = new_item["_source"] new_id = new_source["id"] print(new_id) print("-----id-----") if old_source["is_online"] != new_source["is_online"]: print(old_source["is_online"]) print(new_source["is_online"]) print("-----is_online-----") if old_source["is_deleted"] != new_source["is_deleted"]: print(old_source["is_deleted"]) print(new_source["is_deleted"]) print("-----is_deleted-----") if old_source["is_recommend"] != new_source["is_recommend"]: print(old_source["is_recommend"]) print(new_source["is_recommend"]) print("-----is_recommend-----") if old_source["name"] != new_source["name"]: print(old_source["name"]) print(new_source["name"]) print("-----name-----") if old_source["description"] != new_source["description"]: print(old_source["description"]) print(new_source["description"]) print("-----description-----") if old_source["topic_num"] != new_source["topic_num"]: print(old_source["topic_num"]) print(new_source["topic_num"]) print("-----topic_num-----") if old_source["creator_id"] != new_source["creator_id"]: print(old_source["creator_id"]) print(new_source["creator_id"]) print("-----creator_id-----") if old_source["icon"] != new_source["icon"]: print(old_source["icon"]) print(new_source["icon"]) print("-----icon-----") if old_source["high_quality_topic_num"] != new_source["high_quality_topic_num"]: print(old_source["high_quality_topic_num"]) print(new_source["high_quality_topic_num"]) print("-----high_quality_topic_num-----") if old_source["create_time"] != new_source["create_time"]: print(old_source["create_time"]) print(new_source["create_time"]) print("-----create_time-----") if old_source["update_time"] != new_source["update_time"]: print(old_source["update_time"]) print(new_source["update_time"]) print("-----update_time-----") if old_source["tag_id"] != new_source["tag_id"]: print(old_source["tag_id"]) print(new_source["tag_id"]) print("-----tag_id-----") if old_source["tag_name"] != new_source["tag_name"]: print(old_source["tag_name"]) print(new_source["tag_name"]) print("-----tag_name-----") if old_source["topic_id_list"] != new_source["topic_id_list"]: print(old_source["topic_id_list"]) print(new_source["topic_id_list"]) print("-----topic_id_list-----") if old_source["effective"] != new_source["effective"]: print(old_source["effective"]) print(new_source["effective"]) print("-----effective-----") if old_source["offline_score"] != new_source["offline_score"]: print(old_source["offline_score"]) print(new_source["offline_score"]) print("-----offline_score-----") if old_source["is_default"] != new_source["is_default"]: print(old_source["is_default"]) print(new_source["is_default"]) print("-----is_default-----") if old_source["is_cover"] != new_source["is_cover"]: print(old_source["is_cover"]) print(new_source["is_cover"]) print("-----is_cover-----") if old_source["topic_vote_number"] != new_source["topic_vote_number"]: print(old_source["topic_vote_number"]) print(new_source["topic_vote_number"]) print("-----topic_vote_number-----") if old_source["activity_join"] != new_source["activity_join"]: print(old_source["activity_join"]) print(new_source["activity_join"]) print("-----activity_join-----")