from django.conf import settings
from gm_types.gaia import DIARY_ORDER_TYPE
from libs.es import tzlc, get_es, es_query, es_index_adapt, es_indices_analyze, get_highlight_query_analyzer
from libs.debug import pretty_json
from extend.models.gaia import WordRel
import traceback
import logging
from libs.cache import redis_client
import json
from libs.neo4j import Neo4jManager
import datetime
from libs.error import logging_exception

logger = logging.getLogger(__name__)


def generate_sorting(sort_type=DIARY_ORDER_TYPE.DEFAULT, sort_params={}, recommend_detail=False):
    # 排序规则部分
    sort_list = [
        {'is_sink': {'order': 'asc'}},  # 下沉的排后面
    ]

    # 机构罚单下沉
    sort_list += [
        {
            '_script': {
                'lang': settings.ES_SCRIPT_LANG,
                'script_file': 'sort_diary-sink-by-org',
                'type': 'number',
                'order': 'desc',
            }
        }
    ]

    if sort_type == DIARY_ORDER_TYPE.RECOMMEND:
        sort_list += [
            {'_script': {
                'lang': settings.ES_SCRIPT_LANG,
                'script_file': 'sort_diary-recommend',
                'type': 'number',
                'params': {
                    'user_city_tag_id': sort_params['user_city_tag_id'] if 'user_city_tag_id' in sort_params else -1,
                },
                'order': 'desc',
                '_cache': True,
            }}
        ]

        if recommend_detail == True:
            sort_list += [
                {"_score": {"order": "desc"}},
                {"recommend_score": {"order": "desc"}}

            ]

        else:
            sort_list += [
                {"recommend_score": {"order": "desc"}}
            ]

    elif sort_type == DIARY_ORDER_TYPE.FEED:
        sort_list += [
            {'_script': {
                'lang': settings.ES_SCRIPT_LANG,
                'script_file': 'sort_diary-recommend',
                'type': 'number',
                'params': {
                    'user_city_tag_id': sort_params['user_city_tag_id'] if 'user_city_tag_id' in sort_params else -1,
                },
                'order': 'desc',
                '_cache': True,
            }},
            {"feed_score": {"order": "desc"}},
        ]

    return sort_list


# 日记帖过滤器
def generate_diary_note_filter(filters={}):
    f = []
    if not filters:
        return f

    for k, v in filters.items():
        if k == 'raw_tag_ids' and isinstance(v, list) and v:
            f.append({
                'terms': {'tag_ids': v}
            })
        elif k == 'tag_ids' and isinstance(v, list) and v:
            f.append({
                'terms': {'closure_tag_ids': v}
            })

    return f


def generate_filter(filters={}):
    # 过滤器部分
    f = [
        {'term': {'is_online': True}},  # 只返回上线的日记本
        {'range': {'normal_topic_count': {'gt': 0}}},  # 正常的关联帖子数>0
    ]
    for k, v in filters.items():
        if k == "closure_tag_ids":
            f.append({
                "terms": {"closure_tag_ids": v}
            })
        elif k == "is_sink":
            f.append({
                "term": {"is_sink": v}
            })
        elif k == "has_before_cover":
            f.append({
                "term": {"has_before_cover": v}
            })
        elif k == "has_after_cover":
            f.append({
                "term": {"has_after_cover": v}
            })
        elif k == "content_level_is_good":
            f.append({
                "term": {"content_level_is_good": v}
            })
        elif k == "content_level":
            f.append({
                "terms": {"content_level": v}
            })

    return f


def generate_racall(query):
    multi_fields = {
        'tags': 6,
        'doctor.name': 4,
        'doctor.hospital.name': 3,
        'doctor.hospital.officer_name': 3,
        'user.last_name': 2,
        'service.name': 1,
    }
    query_fields = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()]

    multi_match = {
        'query': query,
        'type': 'cross_fields',
        'operator': 'and',
        'fields': query_fields,

    }
    return {'multi_match': multi_match}


def recall_diary(query, offset, size, filters={}, sort_type=DIARY_ORDER_TYPE.DEFAULT, sort_params={}, fields=[],
                 get_query=False, have_read_diary=[], noarea_tags=[], two_type_tags=[], san_type_tags=[], all_tags=[]):
    size = min(size, settings.COUNT_LIMIT)
    filtered = {}
    if query:
        filtered['query'] = generate_racall(query)

    s = generate_sorting(sort_type, sort_params)
    f = generate_filter(filters)
    filtered['filter'] = {
        "bool": {
            "must": f
        }
    }
    if "id" not in fields:
        fields.append("id")
    if "hospital.id" not in fields:
        fields.append("hospital.id")

    q = {
        "_source": {
            "include": fields
        },
        "query": {
            "filtered": filtered
        },
        "sort": s
    }

    if get_query == True:
        new_q = {}
        q["from"] = offset
        q["size"] = size
        q["query"]["filtered"]["filter"]["bool"]["should"] = []
        q["query"]["filtered"]["filter"]["bool"]["minimum_should_match"] = 1

        if len(have_read_diary) > 0:
            q["query"]["filtered"]["filter"]["bool"]["must_not"] = {
                "terms": {
                    "id": list(set(have_read_diary))
                }
            }

        if len(noarea_tags) > 0:
            q["query"]["filtered"]["filter"]["bool"]["should"].append({
                "terms": {
                    "tags": noarea_tags
                }
            })

        if len(all_tags) > 0:
            q["query"]["filtered"]["filter"]["bool"]["should"].append({
                "terms": {
                    "closure_tag_ids": all_tags
                }
            })

        if len(san_type_tags) or len(two_type_tags):
            function_list = []
            if len(san_type_tags):
                function_list.append({
                    "filter": {
                        "terms": {
                            "closure_tag_ids": san_type_tags
                        }
                    },
                    "weight": 1000
                })
            if len(two_type_tags):
                function_list.append({
                    "filter": {
                        "terms": {
                            "closure_tag_ids": two_type_tags
                        }
                    },
                    "weight": 500
                })

            new_q = {
                "query": {
                    "function_score": {
                        "functions": function_list,
                        "query": q["query"],
                        "boost_mode": "replace",
                        "score_mode": "sum",
                    }
                }
            }
            new_q["sort"] = generate_sorting(sort_type, sort_params, recommend_detail=True)
            new_q["_source"] = q["_source"]
            return new_q
        return q

    res = es_query('diary', q, offset, size)
    hits = res["hits"]["hits"]
    hit_ids = [hit["_source"] for hit in hits]
    return hit_ids


def get_ori_query(query=None, device_id=""):
    try:
        ori_query_set = set()
        stop_word_list = ["手术", "手术法"]

        graph_related_word_set = set()
        if query:
            for stop_word in stop_word_list:
                find_index = query.find(stop_word)

                if find_index == 0:
                    ori_query_set.add(query[len(stop_word):])
                elif find_index > 0:
                    ori_query_set.add(query[0:find_index])

            # if device_id in ["868771031984211","A0000096367E9D"]:
            graph_related_word_set = Neo4jManager.neo4j_get_related_word_by_graph(
                graph_obj=Neo4jManager.neo4j_get_graph_obj(), query_word=query,
                graph_related_word_set=graph_related_word_set, need_size=50, redis_client=redis_client)

        return list(ori_query_set), list(graph_related_word_set)
    except:
        logger.error("catch exception,err_msg:%s" % traceback.format_exc())
        return [], []


def get_wordrel_set(query=None):
    try:
        all_word_rel_key = "doris:total_keywords"
        key_word_synonym_key = "doris:key_word_synonym"

        all_keyword_set = set()
        synonym_list = list()
        redis_data = redis_client.get(all_word_rel_key)

        if redis_data:
            all_keyword_set = set(json.loads(redis_data))

        redis_synonym_data = redis_client.hget(key_word_synonym_key, query)
        if redis_synonym_data:
            synonym_list = json.loads(redis_synonym_data)
        synonym_list.append(query)

        return all_keyword_set, synonym_list
    except:
        logger.error("catch exception,err_msg:%s" % traceback.format_exc())
        return set(), list()


def wordrel_some_list(analyze_res, wordrel_set, query):
    try:
        # 所有的term列表(全词匹配+同义词+核心词)
        total_query_term_set = set()
        total_query_term_set.add(query)

        # 全词匹配term列表
        complete_matching_term_set = set()
        complete_matching_term_set.add(query)

        # 命中核心词列表
        other_key_word_term_set = set()
        other_key_word_term_str = ""

        for item in analyze_res["tokens"]:
            total_query_term_set.add(item["token"])
            if item["token"] == query:
                complete_matching_term_set.add(query)
            if item["token"] in wordrel_set:
                other_key_word_term_set.add(item["token"])
                other_key_word_term_str += item["token"]

        return (list(total_query_term_set), list(complete_matching_term_set), list(other_key_word_term_set),
                other_key_word_term_str)
    except:
        logging.error("catch exception,err_msg:%s" % traceback.format_exc())
        return ([], [], [], [])


def get_cpc_diary(query=None, old_tag_id_list=list(), device_id="", city_id=-1, total_match_query_dict=None,
                  cpc_diary_num=2):
    """
    获取日记cpc内容
    :param device_id:
    :param city_id:
    :return:
    """
    try:
        redis_key_name = "device_ecpm_diary:device_id:" + device_id + ":city_id:" + str(city_id)
        device_diary_score_dict = redis_client.hgetall(redis_key_name)
        str_device_diary_score_dict = dict()
        for diary_id in device_diary_score_dict:
            str_device_diary_score_dict[str(diary_id, encoding='utf-8')] = device_diary_score_dict[diary_id]

        cpc_diary_list = list()
        if len(str_device_diary_score_dict) > 0:
            diary_ids = list(str_device_diary_score_dict.keys())

            if total_match_query_dict is None and query is not None:
                multi_fields = {
                    'tags': 8,
                    'doctor.name': 4,
                    'doctor.hospital.name': 3,
                    'doctor.hospital.officer_name': 3,
                    'user.last_name': 2,
                    'service.name': 1
                }
                fields_weight_list = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()]
                total_match_query_dict = {
                    "multi_match": {
                        "query": query,
                        "fields": fields_weight_list,
                        "analyzer": "gm_default_index",
                        "operator": "and",
                        "type": "best_fields"
                    }
                }

            q = {
                "query": {
                    "bool": {
                        "must": [
                            total_match_query_dict,
                            {
                                "term": {
                                    "is_online": True
                                }
                            },
                            {
                                "terms": {
                                    "id": diary_ids
                                }
                            }
                            # {
                            #     "terms":{
                            #         "content_level":["3.5","4","5"]
                            #     }
                            # }
                        ]
                    }
                },
                "_source": {
                    "include": ["id"]
                }
            }

            if query is not None:
                q["query"]["bool"]["must"].append(total_match_query_dict)
            elif len(old_tag_id_list) > 0:
                q["query"]["bool"]["must"].append(
                    {
                        "terms": {
                            "closure_tag_ids": old_tag_id_list
                        }
                    }
                )

            es = get_es()
            index = es_index_adapt(
                index_prefix=settings.ES_INDEX_PREFIX,
                doc_type='diary',
                rw='read'
            )
            res = es.search(
                index=index,
                doc_type='diary',
                timeout=settings.ES_SEARCH_TIMEOUT,
                body=q,
                from_=0,
                size=100)

            cpc_diary_dict = dict()
            for item in res["hits"]["hits"]:
                cpc_diary_dict[item["_id"]] = str_device_diary_score_dict[item["_id"]]

            sorted_cpc_diary_dict = sorted(cpc_diary_dict.keys(), key=lambda item_key: float(cpc_diary_dict[item_key]),
                                           reverse=True)

            for diary_id in sorted_cpc_diary_dict:
                cpc_diary_list.append(diary_id)
                if len(cpc_diary_list) >= cpc_diary_num:
                    break

        if query is not None and len(cpc_diary_list) > 0:
            diary_cpc_static_redis_key = "doris:diary_cpc_query:word:" + str(query)
            cur_num = redis_client.hincrby(diary_cpc_static_redis_key, city_id, len(cpc_diary_list))
            # if cur_num == len(cpc_diary_list):
            today = datetime.datetime.strptime(str(datetime.date.today()), "%Y-%m-%d")
            end_day = today + datetime.timedelta(days=1)
            nowTime = datetime.datetime.now()
            expire_time = (end_day - nowTime).seconds
            redis_client.expire(diary_cpc_static_redis_key, expire_time)

        return cpc_diary_list
    except:
        logging_exception()
        logging.error("catch exception,err_msg:%s" % traceback.format_exc())
        return list()
