func_from_es_get_article.py 11.2 KB
# -*- coding:UTF-8 -*-
# @Time  : 2020/9/11 13:51
# @File  : func_from_es_get_article.py
# @email : litao@igengmei.com
# @author : litao
from elasticsearch import Elasticsearch
exists_es_dic = {}
es = Elasticsearch([
    {
        'host': '172.16.31.17',
        'port': 9200,
    }, {
        'host': '172.16.31.11',
        'port': 9200,
    }])


def get_device_num_from_es(word):

    results = es.search(
        index='gm-dbmw-device',
        doc_type='doc',
        timeout='10s',
        size=0,
        body={
            "query": {
                "bool": {
                    "should": [
                        {
                            "nested": {
                                "path": "first_demands",
                                "query": {
                                    "bool": {
                                        "must": [
                                            {
                                                "terms": {
                                                    "first_demands.name": [
                                                        word
                                                    ]
                                                }
                                            }
                                        ]
                                    }
                                }
                            }
                        },
                        {
                            "nested": {
                                "path": "second_demands",
                                "query": {
                                    "bool": {
                                        "must": [
                                            {
                                                "terms": {
                                                    "second_demands.name": [
                                                        word
                                                    ]
                                                }
                                            }
                                        ]
                                    }
                                }
                            }
                        },
                        {
                            "nested": {
                                "path": "first_solutions",
                                "query": {
                                    "bool": {
                                        "must": [
                                            {
                                                "terms": {
                                                    "first_solutions.name": [
                                                        word
                                                    ]
                                                }
                                            }
                                        ]
                                    }
                                }
                            }
                        },
                        {
                            "nested": {
                                "path": "second_solutions",
                                "query": {
                                    "bool": {
                                        "must": [
                                            {
                                                "terms": {
                                                    "second_solutions.name": [
                                                        word
                                                    ]
                                                }
                                            }
                                        ]
                                    }
                                }
                            }
                        },
                        {
                            "nested": {
                                "path": "first_positions",
                                "query": {
                                    "bool": {
                                        "must": [
                                            {
                                                "terms": {
                                                    "first_positions.name": [
                                                        word
                                                    ]
                                                }
                                            }
                                        ]
                                    }
                                }
                            }
                        },
                        {
                            "nested": {
                                "path": "second_positions",
                                "query": {
                                    "bool": {
                                        "must": [
                                            {
                                                "terms": {
                                                    "second_positions.name": [
                                                        word
                                                    ]
                                                }
                                            }
                                        ]
                                    }
                                }
                            }
                        },
                        {
                            "nested": {
                                "path": "projects",
                                "query": {
                                    "bool": {
                                        "must": [
                                            {
                                                "terms": {
                                                    "projects.name": [
                                                        word
                                                    ]
                                                }
                                            }
                                        ]
                                    }
                                }
                            }
                        }

                    ],
                    "minimum_should_match": 1
                }
            }
        }
    )
    tractate_content_num = results["hits"]["total"]
    return tractate_content_num


def get_es_article_num(tag_dict, allow_tag=["first_demands", "second_demands", "first_solutions", "second_solutions",
                                            "positions", "second_positions", "tags_v3"]):
    # {tag_name:(answer_content_num, tractate_content_num, diary_content_num, total_num)}

    article_dict = {
        "first_demands": [],
        "second_demands": [],
        "first_solutions": [],
        "second_solutions": [],
        "positions": [],
        "second_positions": [],
        "tags_v3": [],
    }
    for key in list(article_dict.keys()):
        if key not in allow_tag:
            article_dict.pop(key)

    for tag_type in tag_dict:
        for tag_name in tag_dict[tag_type]:
            if tag_name in exists_es_dic:
                article_dict[tag_type].append(exists_es_dic[tag_name])
                continue
            body = {
                "query": {
                    "bool": {
                        "minimum_should_match": 1,
                        "should": [],
                        "must": [
                            {
                                "term": {
                                    "is_online": True
                                }
                            }, {
                                "terms": {
                                    "content_level": [6, 5, 4, 3.5, 3]
                                }
                            }, {
                                "range": {
                                    "content_length": {
                                        "gte": 30
                                    }
                                }
                            }],

                    }
                },
            }
            body["query"]["bool"]["must"].append({"term": {tag_type: tag_name}})
            results = es.search(
                index='gm-dbmw-answer-read',
                doc_type='answer',
                timeout='10s',
                size=0,
                body=body
            )
            answer_content_num = results["hits"]["total"]

            body = {
                "query": {
                    "bool": {
                        "minimum_should_match": 1,
                        "should": [],
                        "must": [{"term": {"is_online": True}}, {
                            "terms":
                                {"content_level": [6, 5, 4, 3.5, 3]}
                        }]
                    }
                }
            }
            body["query"]["bool"]["must"].append({"term": {tag_type: tag_name}})
            # tractate
            results = es.search(
                index='gm-dbmw-tractate-read',
                doc_type='tractate',
                timeout='10s',
                size=0,
                body=body
            )

            tractate_content_num = results["hits"]["total"]

            body = {
                "query": {
                    "bool": {
                        "minimum_should_match": 1,
                        "should": [],
                        "must": [{"term": {"is_online": True}}, {
                            "term": {
                                "has_cover": True
                            }
                        }, {"term": {
                            "is_sink": False
                        }
                                 }, {
                                     "term": {
                                         "has_after_cover": True
                                     }
                                 }, {
                                     "term": {
                                         "has_before_cover": True
                                     }
                                 }, {"range": {"content_level": {"gte": "3"}}},
                                 {
                                     "term": {
                                         "content_simi_bol_show": 0
                                     }
                                 }
                                 ]
                    }
                },
            }
            body["query"]["bool"]["must"].append({"term": {tag_type: tag_name}})
            ### diary 日记
            results = es.search(
                index='gm-dbmw-diary-read',
                doc_type='diary',
                timeout='10s',
                size=0,
                body=body
            )
            diary_content_num = results["hits"]["total"]
            total_num = answer_content_num + tractate_content_num + diary_content_num
            data_dic = {tag_name: (answer_content_num, tractate_content_num, diary_content_num, total_num)}
            # print(data_dic)
            exists_es_dic[tag_name] = data_dic
            article_dict[tag_type].append(data_dic)

    return article_dict