# -*- coding:UTF-8 -*- # @Time : 2020/9/11 13:51 # @File : func_from_es_get_article.py # @email : litao@igengmei.com # @author : litao from elasticsearch import Elasticsearch exists_es_dic = {} es = Elasticsearch([ { 'host': '172.16.31.17', 'port': 9200, }, { 'host': '172.16.31.11', 'port': 9200, }]) def get_device_num_from_es(word): results = es.search( index='gm-dbmw-device', doc_type='doc', timeout='10s', size=0, body={ "query": { "bool": { "should": [ { "nested": { "path": "first_demands", "query": { "bool": { "must": [ { "terms": { "first_demands.name": [ word ] } } ] } } } }, { "nested": { "path": "second_demands", "query": { "bool": { "must": [ { "terms": { "second_demands.name": [ word ] } } ] } } } }, { "nested": { "path": "first_solutions", "query": { "bool": { "must": [ { "terms": { "first_solutions.name": [ word ] } } ] } } } }, { "nested": { "path": "second_solutions", "query": { "bool": { "must": [ { "terms": { "second_solutions.name": [ word ] } } ] } } } }, { "nested": { "path": "first_positions", "query": { "bool": { "must": [ { "terms": { "first_positions.name": [ word ] } } ] } } } }, { "nested": { "path": "second_positions", "query": { "bool": { "must": [ { "terms": { "second_positions.name": [ word ] } } ] } } } }, { "nested": { "path": "projects", "query": { "bool": { "must": [ { "terms": { "projects.name": [ word ] } } ] } } } } ], "minimum_should_match": 1 } } } ) tractate_content_num = results["hits"]["total"] return tractate_content_num def get_es_article_num(tag_dict, allow_tag=["first_demands", "second_demands", "first_solutions", "second_solutions", "positions", "second_positions", "tags_v3"]): # {tag_name:(answer_content_num, tractate_content_num, diary_content_num, total_num)} article_dict = { "first_demands": [], "second_demands": [], "first_solutions": [], "second_solutions": [], "positions": [], "second_positions": [], "tags_v3": [], } for key in list(article_dict.keys()): if key not in allow_tag: article_dict.pop(key) for tag_type in tag_dict: for tag_name in tag_dict[tag_type]: if tag_name in exists_es_dic: article_dict[tag_type].append(exists_es_dic[tag_name]) continue body = { "query": { "bool": { "minimum_should_match": 1, "should": [], "must": [ { "term": { "is_online": True } }, { "terms": { "content_level": [6, 5, 4, 3.5, 3] } }, { "range": { "content_length": { "gte": 30 } } }], } }, } body["query"]["bool"]["must"].append({"term": {tag_type: tag_name}}) results = es.search( index='gm-dbmw-answer-read', doc_type='answer', timeout='10s', size=0, body=body ) answer_content_num = results["hits"]["total"] body = { "query": { "bool": { "minimum_should_match": 1, "should": [], "must": [{"term": {"is_online": True}}, { "terms": {"content_level": [6, 5, 4, 3.5, 3]} }] } } } body["query"]["bool"]["must"].append({"term": {tag_type: tag_name}}) # tractate results = es.search( index='gm-dbmw-tractate-read', doc_type='tractate', timeout='10s', size=0, body=body ) tractate_content_num = results["hits"]["total"] body = { "query": { "bool": { "minimum_should_match": 1, "should": [], "must": [{"term": {"is_online": True}}, { "term": { "has_cover": True } }, {"term": { "is_sink": False } }, { "term": { "has_after_cover": True } }, { "term": { "has_before_cover": True } }, {"range": {"content_level": {"gte": "3"}}}, { "term": { "content_simi_bol_show": 0 } } ] } }, } body["query"]["bool"]["must"].append({"term": {tag_type: tag_name}}) ### diary 日记 results = es.search( index='gm-dbmw-diary-read', doc_type='diary', timeout='10s', size=0, body=body ) diary_content_num = results["hits"]["total"] total_num = answer_content_num + tractate_content_num + diary_content_num data_dic = {tag_name: (answer_content_num, tractate_content_num, diary_content_num, total_num)} # print(data_dic) exists_es_dic[tag_name] = data_dic article_dict[tag_type].append(data_dic) return article_dict