# -*- coding:UTF-8 -*-
# @Time  : 2020/9/9 10:07
# @File  : portary_div_exposure.py
# @email : litao@igengmei.com
# @author : litao
import json
import traceback

import redis
import pymysql
from elasticsearch import Elasticsearch
from meta_base_code.utils.func_get_pv_card_id import get_card_id

redis_client = redis.StrictRedis.from_url("redis://:ReDis!GmTx*0aN6@172.16.40.133:6379")
redis_client2 = redis.StrictRedis.from_url("redis://:ReDis!GmTx*0aN9@172.16.40.173:6379")
redis_client3 = redis.StrictRedis.from_url("redis://:ReDis!GmTx*0aN12@172.16.40.164:6379")
redis_client4 = redis.StrictRedis.from_url("redis://:XfkMCCdWDIU%ls$h@172.16.50.145:6379")

es = Elasticsearch([
    {
        'host': '172.16.31.17',
        'port': 9200,
    }, {
        'host': '172.16.31.11',
        'port': 9200,
    }])


def user_portrait_scan_info():
    try:
        round = 0
        all_count = 0
        empty_count = 0
        just_projects_count = 0
        keys = "doris:user_portrait:tag3:device_id:*"
        cur, results = redis_client2.scan(0, keys, 3000)
        while cur != 0:
            round += 1
            print("round: " + str(round))
            cur, results = redis_client2.scan(cur, keys, 3000)
            for key in results:
                key = str(key, "utf-8")
                device_id = key.split(":")[-1]
                all_count += 1

                # if user_portrait_is_empty(device_id):
                #     print(device_id)
                #     empty_count += 1

                # if user_portrait_just_projects(device_id):
                # print(device_id)
                # just_projects_count += 1

                # user_portrait_get_empty_candidates(device_id)
                yield get_user_portrait_tag3_from_redis(device_id)

        print("all count: " + str(all_count))
        print("empty portrait: " + str(empty_count))
        print("just projects portrait: " + str(just_projects_count))
    except Exception as e:
        print(e)


def get_user_portrait_tag3_redis_key(device_id):
    return "doris:user_portrait:tag3:device_id:" + str(device_id)


def get_user_portrait_tag3_from_redis(device_id, limit_score=0):
    def items_gt_score(d):
        new_d = dict(sorted(d.items(), key=lambda x: x[1], reverse=True))
        res = {tag: float(score) for tag, score in new_d.items() if float(score) >= limit_score}
        return list(res.keys())

    portrait_key = get_user_portrait_tag3_redis_key(device_id)
    if redis_client2.exists(portrait_key):
        user_portrait = json.loads(redis_client2.get(portrait_key))
        first_demands = items_gt_score(user_portrait.get("first_demands", {}))  # 一级诉求
        second_demands = items_gt_score(user_portrait.get("second_demands", {}))  # 二级诉求
        first_solutions = items_gt_score(user_portrait.get("first_solutions", {}))  # 一级方式
        second_solutions = items_gt_score(user_portrait.get("second_solutions", {}))  # 二级方式
        first_positions = items_gt_score(user_portrait.get("first_positions", {}))  # 一级部位
        second_positions = items_gt_score(user_portrait.get("second_positions", {}))
        projects = items_gt_score(user_portrait.get("projects", {}))  # 项目
        anecdote_tags = items_gt_score(user_portrait.get("anecdote_tags", {}))  # 八卦
        return {
            "first_demands": first_demands,
            "second_demands": second_demands,
            "first_solutions": first_solutions,
            "second_solutions": second_solutions,
            "first_positions": first_positions,
            "second_positions": second_positions,
            "projects": projects,
            "anecdote_tags": anecdote_tags
        }
    return {}


def get_channel_tags_info():
    """
    tag_ids: [416, 432, 421, 423, 275, 582]
    return:
    """
    sql = "SELECT name, tag_type from api_tag_3_0"
    results = get_data_by_mysql("172.16.30.141", 3306, "zx_str", "ZXueX58pStrage", "zhengxing", sql)
    first_demands_lst = []
    second_demands_lst = []
    first_solutions_lst = []
    second_solutions_lst = []
    first_positions_lst = []
    second_positions_lst = []
    projects_lst = []
    # channels_lst = []
    for i in results:
        name = i.get("name", "")
        tag_id = i.get("tag_type", -1)
        if tag_id == 1:
            projects_lst.append(name)
        elif tag_id == 21:
            first_positions_lst.append(name)
        elif tag_id == 22:
            second_positions_lst.append(name)
        elif tag_id == 19:
            first_demands_lst.append(name)
        elif tag_id == 20:
            second_demands_lst.append(name)
        elif tag_id == 18:
            first_solutions_lst.append(name)
        elif tag_id == 16:
            second_solutions_lst.append(name)
        # elif tag_id == 29:
        #     channels_lst.append(name)
    return {
        "first_demands": first_demands_lst,
        "second_demands": second_demands_lst,
        "first_solutions": first_solutions_lst,
        "second_solutions": second_solutions_lst,
        "first_positions": first_positions_lst,
        "second_positions": second_positions_lst,
        "projects": projects_lst,
        # "channels": channels_lst
    }


def get_device_num_from_es(word):
    results = es.search(
        index='gm-dbmw-device',
        doc_type='doc',
        timeout='10s',
        size=0,
        body={
            "query": {
                "bool": {
                    "should": [
                        {
                            "nested": {
                                "path": "first_demands",
                                "query": {
                                    "bool": {
                                        "must": [
                                            {
                                                "terms": {
                                                    "first_demands.name": [
                                                        word
                                                    ]
                                                }
                                            }
                                        ]
                                    }
                                }
                            }
                        },
                        {
                            "nested": {
                                "path": "second_demands",
                                "query": {
                                    "bool": {
                                        "must": [
                                            {
                                                "terms": {
                                                    "second_demands.name": [
                                                        word
                                                    ]
                                                }
                                            }
                                        ]
                                    }
                                }
                            }
                        },
                        {
                            "nested": {
                                "path": "first_solutions",
                                "query": {
                                    "bool": {
                                        "must": [
                                            {
                                                "terms": {
                                                    "first_solutions.name": [
                                                        word
                                                    ]
                                                }
                                            }
                                        ]
                                    }
                                }
                            }
                        },
                        {
                            "nested": {
                                "path": "second_solutions",
                                "query": {
                                    "bool": {
                                        "must": [
                                            {
                                                "terms": {
                                                    "second_solutions.name": [
                                                        word
                                                    ]
                                                }
                                            }
                                        ]
                                    }
                                }
                            }
                        },
                        {
                            "nested": {
                                "path": "first_positions",
                                "query": {
                                    "bool": {
                                        "must": [
                                            {
                                                "terms": {
                                                    "first_positions.name": [
                                                        word
                                                    ]
                                                }
                                            }
                                        ]
                                    }
                                }
                            }
                        },
                        {
                            "nested": {
                                "path": "second_positions",
                                "query": {
                                    "bool": {
                                        "must": [
                                            {
                                                "terms": {
                                                    "second_positions.name": [
                                                        word
                                                    ]
                                                }
                                            }
                                        ]
                                    }
                                }
                            }
                        },
                        {
                            "nested": {
                                "path": "projects",
                                "query": {
                                    "bool": {
                                        "must": [
                                            {
                                                "terms": {
                                                    "projects.name": [
                                                        word
                                                    ]
                                                }
                                            }
                                        ]
                                    }
                                }
                            }
                        }

                    ],
                    "minimum_should_match": 1
                }
            }
        }
    )
    tractate_content_num = results["hits"]["total"]
    return tractate_content_num


def get_es_article_num(tag_dict):
    # {tag_name:(answer_content_num, tractate_content_num, diary_content_num, total_num)}
    article_dict = {
        "first_demands": [],
        "second_demands": [],
        "first_solutions": [],
        "second_solutions": [],
        "first_positions": [],
        "second_positions": [],
        "projects": [],
    }
    for tag_type in tag_dict:
        for tag_name in tag_dict[tag_type]:
            body = {
                "query": {
                    "bool": {
                        "minimum_should_match": 1,
                        "should": [],
                        "must": [
                            {
                                "term": {
                                    "is_online": True
                                }
                            }, {
                                "terms": {
                                    "content_level": [6, 5, 4, 3.5, 3]
                                }
                            }, {
                                "range": {
                                    "content_length": {
                                        "gte": 30
                                    }
                                }
                            }],

                    }
                },
            }
            body["query"]["bool"]["must"].append({"term": {tag_type: tag_name}})
            results = es.search(
                index='gm-dbmw-answer-read',
                doc_type='answer',
                timeout='10s',
                size=0,
                body=body
            )
            answer_content_num = results["hits"]["total"]

            body = {
                "query": {
                    "bool": {
                        "minimum_should_match": 1,
                        "should": [],
                        "must": [{"term": {"is_online": True}},
                                 {"terms": {"content_level": [6, 5, 4, 3.5, 3]}}
                                 ]
                    }
                }
            }
            body["query"]["bool"]["must"].append({"term": {tag_type: tag_name}})
            # tractate
            results = es.search(
                index='gm-dbmw-tractate-read',
                doc_type='tractate',
                timeout='10s',
                size=0,
                body=body
            )

            tractate_content_num = results["hits"]["total"]

            body = {
                "query": {
                    "bool": {
                        "minimum_should_match": 1,
                        "should": [],
                        "must": [{"term": {"is_online": True}}, {
                            "term": {
                                "has_cover": True
                            }
                        }, {"term": {
                            "is_sink": False
                        }
                                 }, {
                                     "term": {
                                         "has_after_cover": True
                                     }
                                 }, {
                                     "term": {
                                         "has_before_cover": True
                                     }
                                 }, {"range": {"content_level": {"gte": "3"}}},
                                 {
                                     "term": {
                                         "content_simi_bol_show": 0
                                     }
                                 }
                                 ]
                    }
                },
            }
            body["query"]["bool"]["must"].append({"term": {tag_type: tag_name}})
            ###diary 日记
            results = es.search(
                index='gm-dbmw-diary-read',
                doc_type='diary',
                timeout='10s',
                size=0,
                body=body
            )
            diary_content_num = results["hits"]["total"]
            total_num = answer_content_num + tractate_content_num + diary_content_num
            data_dic = {tag_name: (answer_content_num, tractate_content_num, diary_content_num, total_num)}
            print(data_dic)
            article_dict[tag_type].append(data_dic)

    return article_dict


def get_data_by_mysql(host, port, user, passwd, db, sql):
    try:
        db = pymysql.connect(host=host, port=port, user=user, passwd=passwd, db=db,
                             cursorclass=pymysql.cursors.DictCursor)
        cursor = db.cursor()
        cursor.execute(sql)
        results = cursor.fetchall()
        db.close()
        return results
    except:
        print("error2_user_portrait", traceback.format_exc())
        return traceback.format_exc()


def from_id_get_tag(card_id_dict):
    index = ""
    doc_type = ""
    query_count = {}
    for card_type in card_id_dict:
        if card_type == "diary":
            index = 'gm-dbmw-diary-read'
            doc_type = 'diary'
        elif card_type == "qa":
            index = 'gm-dbmw-answer-read'
            doc_type = 'answer'
        elif card_type == "user_post":
            index = 'gm-dbmw-tractate-read'
            doc_type = 'tractate'
        for card_id in card_id_dict[card_type]:
            res = es.get_source(index,card_id,doc_type=doc_type)
            print(res)
            first_demands = res.get("first_demands")
            second_demands = res.get("second_demands")
            first_solutions = res.get("first_solutions")
            second_solutions = res.get("second_solutions")
            first_positions = res.get("first_positions")
            second_positions = res.get("second_positions")
            projects = res.get("projects")
            word_count_list = first_demands + second_demands + first_solutions + second_solutions + first_positions + second_positions + projects
            for word in word_count_list:
                if word in query_count:
                    query_count[word] += 1
                else:
                    query_count[word] = 0
    return query_count



def parse_data():
    demands_num = {}
    # 获取全部标签
    all_tags = get_channel_tags_info()
    print(all_tags)
    # 获取标签对应的日记帖子回答数
    article_num_dict = get_es_article_num(all_tags)
    # 获取曝光的id
    card_id_dict = get_card_id()
    # 获取曝光id对应的标签
    word_count_exposure = from_id_get_tag(card_id_dict)

if __name__ == "__main__":
    parse_data()
