from django.conf import settings
from gm_types.doris import ANSWER_SORT_TYPE
from libs.es import es_query, get_highlight_query_analyzer, get_highlight
import logging
import traceback


def generate_sorting(sort_type=ANSWER_SORT_TYPE.DEFAULT, sort_params={}, recommend_detail=False):
    """
    sorting = [
        {'smart_rank': {'order': 'desc'}},
    ]
    if sort_type == ANSWER_SORT_TYPE.RECOMMEND:
        sorting += [
            {"recommend_score": {"order": "desc"}}
        ]
    """

    sorting = []

    if sort_type == ANSWER_SORT_TYPE.DEFAULT:
        sorting += [
            {'smart_rank': {'order': 'desc'}},
        ]
        if recommend_detail:
            sorting += [
                {'_score': {'order': 'desc'}},
            ]

    elif sort_type == ANSWER_SORT_TYPE.RECOMMEND:
        if recommend_detail:
            sorting += [
                {'_score': {'order': 'desc'}}
            ]
        sorting += [
            {'smart_rank': {'order': 'desc'}},
            {"recommend_score": {"order": "desc"}}
        ]


    elif sort_type == ANSWER_SORT_TYPE.LASTEST_CREATE:
        sorting += [
            {"create_time": {"order": "desc"}}
        ]

    return sorting


def generate_filter(filters={}):
    f = [
        {'term': {'is_online': True}},
    ]
    for k, v in filters.items():
        if k == "tag_ids":
            f.append({
                "terms": {"tag_ids": v}
            })
        elif k == "content_level":
            f.append({
                "terms": {"content_level": v}
            })
        elif k == 'user_ids':
            f.append({
                "terms": {"user_id": v}
            })
        elif k == "question_types":
            f.append({
                "terms": {"question_type": v}
            })
        elif k == "is_recommend":
            f.append({
                "term": {"is_recommend": v}
            })
        elif k == "content_type" and v:
            f.append({
                "terms": {"content_type": v}
            })

    return f


def recall_answers(query, offset, size, filters={}, sort_type=ANSWER_SORT_TYPE.DEFAULT, sort_params={}, fields=[],
                   get_query=False, have_read_question=[], noarea_tags=[], two_type_tags=[], san_type_tags=[],
                   all_tags=[], content_star_keyword=[], content_keyword=[]):
    size = min(size, settings.COUNT_LIMIT)

    filtered = {}

    if query:
        multi_fields = {
            'title': 2,
            'desc': 2,
            "answer": 2,
        }
        query_fields = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()]

        multi_match = {
            'query': query,
            'type': 'cross_fields',
            'operator': 'and',
            'fields': query_fields,

        }
        m = {'multi_match': multi_match}
        filtered['query'] = m

    s = generate_sorting(sort_type, sort_params)
    f = generate_filter(filters)
    filtered['filter'] = {
        "bool": {
            "must": f
        }
    }

    if "id" not in fields:
        fields.append("id")

    q = {
        "_source": {
            "include": fields
        },
        "query": {
            "filtered": filtered
        },
        "sort": s,
    }

    if get_query == True:
        q["from"] = offset
        q["size"] = size
        new_q = {}
        q["query"]["filtered"]["filter"]["bool"]["should"] = []
        q["query"]["filtered"]["filter"]["bool"]["minimum_should_match"] = 1

        if len(have_read_question) > 0:
            q["query"]["filtered"]["filter"]["bool"]["must_not"] = {
                "terms": {
                    "id": list(set(have_read_question))
                }
            }

        if len(noarea_tags) > 0:
            q["query"]["filtered"]["filter"]["bool"]["should"].append({
                "terms": {
                    "tag_name_analyze": noarea_tags
                }
            })

        if len(all_tags) > 0:
            q["query"]["filtered"]["filter"]["bool"]["should"].append({
                "terms": {
                    "tag_ids": all_tags
                }
            })

        if len(san_type_tags) or len(two_type_tags) or len(content_star_keyword) or len(content_keyword):
            function_list = []
            if len(san_type_tags):
                function_list.append({
                    "filter": {
                        "terms": {
                            "tag_ids": san_type_tags
                        }
                    },
                    "weight": 1000
                })
            if len(two_type_tags):
                function_list.append({
                    "filter": {
                        "terms": {
                            "tag_ids": two_type_tags
                        }
                    },
                    "weight": 500
                })

            if len(content_keyword) > 0:
                q["query"]["filtered"]["filter"]["bool"]["should"].append({
                    "terms": {
                        "content_keyword": content_keyword
                    }
                })

                # 3100, 2100, 1100
                content_length = len(content_keyword)
                for key, val in enumerate(content_keyword):
                    weight = (content_length - key) * 1000 + 100
                    function_list.append({
                        "filter": {
                            "term": {"content_keyword": val}
                        },
                        "weight": weight
                    })

            if len(content_star_keyword) > 0:
                content_star_first_keyword = content_star_keyword[0]

                q["query"]["filtered"]["filter"]["bool"]["should"].append({
                    "terms": {
                        "content_star_keyword": content_star_keyword
                    }
                })

                function_list.append({
                    "filter": {
                        "term": {"content_star_first_keyword": content_star_first_keyword}
                    },
                    "weight": 9500
                })

                # 9200, 6200, 3200
                star_length = len(content_star_keyword)
                for key, val in enumerate(content_star_keyword):
                    weight = (star_length - key) * 3000 + 200
                    function_list.append({
                        "filter": {
                            "term": {"content_star_keyword": val}
                        },
                        "weight": weight
                    })

            new_q = {
                "query": {
                    "function_score": {
                        "functions": function_list,
                        "query": q["query"],
                        "boost_mode": "replace",
                        "score_mode": "sum",
                    }
                }
            }
            new_q["sort"] = generate_sorting(sort_type, sort_params, recommend_detail=True)
            new_q["_source"] = q["_source"]
            logging.info("answer detail recommend, query:%s" % str(new_q))
            return new_q
        return q

    res = es_query('answer', q, offset, size)
    hits = res["hits"]["hits"]
    hit_ids = [hit["_source"] for hit in hits]
    return hit_ids


def recall_comprehensive_ans(query, filters={}, sort_type=ANSWER_SORT_TYPE.DEFAULT,
                             sort_params={}, fields=[], recall_again=False, total_query_term_list=[],
                             correct_term_list=[], other_key_word_term_list=[], offset=0, size=100,
                             have_read_id_list=[], question_id_have_read_list=[], other_key_word_term_str="",
                             closure_tags=[]):
    try:
        # 标题单独加权
        title_term_list = ["title"]
        fields_term_list = ["answer", "desc"]
        total_term_list = ["title", "answer", "desc"]

        multi_fields = {
            'title': 40,
            'answer': 3,
            'desc': 2,
        }

        fields_weight_list = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()]

        # query_operator = "and"
        # query_type = "cross_fields"
        #
        # if recall_again:
        query_operator = "or"
        query_type = "best_fields"

        title_total_match_list = list()
        total_match_query_list = list()
        for field_name in fields_term_list:
            term_dict = {
                "terms": {
                    field_name: correct_term_list
                }
            }
            total_match_query_list.append(term_dict)

        total_match_query = {
            "multi_match": {
                "query": query,
                "fields": fields_weight_list,
                "analyzer": "gm_default_index",
                "operator": "and",
                "type": "best_fields"
            }
        }
        total_match_query_list.append(total_match_query)

        # 标题全命中加权
        title_total_match_list += [
            {
                "terms": {
                    "title": correct_term_list
                }
            },
            {
                "multi_match": {
                    "query": query,
                    "fields": title_term_list,
                    "analyzer": "gm_default_index",
                    "operator": "and",
                    "type": "best_fields"
                }
            }
        ]

        # 核心词的逻辑
        other_key_title_query_list = list()
        other_key_query_list = list()
        if len(other_key_word_term_list) > 0:
            for field_name in fields_term_list:
                term_dict = {
                    "terms": {
                        field_name: other_key_word_term_list
                    }
                }
                other_key_query_list.append(term_dict)

            other_key_match_query = {
                "multi_match": {
                    "query": other_key_word_term_str,
                    "fields": fields_weight_list,
                    "analyzer": "whitespace",
                    # "operator": "and",
                    # "type": "best_fields"
                }
            }
            other_key_query_list.append(other_key_match_query)

            # 标题加权
            other_key_title_query_list += [
                {
                    "terms": {
                        "title": other_key_word_term_list
                    }
                }
            ]

        q = {
            "from": 0,
            "size": size,
            "_source": {
                "include": ["id", "user_id", "question_id", "title", "smart_rank", "highlight"]
            },
            "query": {
                "function_score": {
                    "functions": [{
                        "filter": {
                            "bool": {
                                "should": total_match_query_list,
                                "minimum_should_match": 1
                            }
                        },
                        "weight": 10000
                    }, {
                        "filter": {
                            "bool": {
                                "should": title_total_match_list,
                                "minimum_should_match": 1
                            }
                        },
                        "weight": 15000
                    }, {
                        "filter": {
                            "multi_match": {
                                "fields": total_term_list,
                                "operator": "or",
                                "type": "best_fields",
                                "analyzer": "gm_default_index",
                                "query": query
                            }
                        },
                        "weight": 1000
                    }],
                    "boost_mode": "replace",
                    "score_mode": "sum",
                    "query": {
                        "bool": {
                            "must": [{
                                "multi_match": {
                                    "fields": fields_weight_list,
                                    "operator": query_operator,
                                    "type": query_type,
                                    "query": query,
                                    "analyzer": "gm_default_index"
                                }
                            }, {
                                "term": {
                                    "is_online": True
                                }
                            }],
                            "minimum_should_match": 1
                        }
                    }
                }
            },
            "sort": [
                {'_score': {'order': 'desc'}},
                {'smart_rank': {'order': 'desc'}}
            ],
            "highlight": get_highlight_query_analyzer(["title"], query)
        }

        # for field_item in total_term_list:
        #     for query_item in total_query_term_list:
        #         term_dict = {
        #             "filter":{
        #                 "term":{
        #                     field_item: query_item
        #                 }
        #             },
        #             "weight":10
        #         }
        #         q["query"]["function_score"]["functions"].append(term_dict)

        if len(other_key_query_list) > 0:
            q["query"]["function_score"]["functions"] += [
                {
                    "filter": {
                        "bool": {
                            "should": other_key_query_list,
                            "minimum_should_match": 1
                        }
                    },
                    "weight": 10
                },
                {
                    "filter": {
                        "bool": {
                            "should": other_key_title_query_list,
                            "minimum_should_match": 1
                        }
                    },
                    "weight": 15
                }
            ]

        fileds = ["tag_name", "title", "answer", "desc"]
        if len(closure_tags) > 0:

            for item in fileds:
                q["query"]["function_score"]["functions"] += [
                    {
                        "filter": {
                            "terms": {
                                item: closure_tags
                            }
                        },
                        "weight": 10000

                    }
                ]

        q["query"]["function_score"]["query"]["bool"]["must_not"] = list()
        q["query"]["function_score"]["query"]["bool"]["should"] = list()
        if len(have_read_id_list) > 0:
            q["query"]["function_score"]["query"]["bool"]["must_not"] += [
                {
                    "terms": {
                        "id": have_read_id_list
                    }
                }
            ]
        if len(question_id_have_read_list) > 0:
            q["query"]["function_score"]["query"]["bool"]["must_not"] += [
                {
                    "terms": {
                        "question_id": have_read_id_list
                    }
                }
            ]
        if len(closure_tags) > 0:
            for item in fileds:
                q["query"]["function_score"]["query"]["bool"]["should"] += [
                    {
                        "terms": {
                            item: closure_tags
                        }
                    }
                ]

        return q
    except:
        logging.error("catch exception,err_msg:%s" % traceback.format_exc())
        return dict()


def recall_ans(query, offset, size, filters={}, sort_type=ANSWER_SORT_TYPE.DEFAULT, sort_params={}, fields=[],
               recall_again=False, have_read_answer_list=[]):
    size = min(size, settings.COUNT_LIMIT)

    query_operator = "and"
    query_type = "cross_fields"

    if recall_again:
        query_operator = "or"
        query_type = "best_fields"

    q = {
        "query": {
            "function_score": {
                "functions": [{
                    "filter": {
                        "match": {
                            "title": query
                        }
                    },
                    "weight": 3
                }, {
                    "filter": {
                        "match": {
                            "answer": query
                        }
                    },
                    "weight": 2
                }, {
                    "filter": {
                        "match": {
                            "desc": query
                        }
                    },
                    "weight": 1
                }],
                "boost_mode": "sum",
                "score_mode": "sum",
                "query": {
                    "filtered": {
                        "filter": {
                            "bool": {
                                "must": [{
                                    "multi_match": {
                                        "fields": ["title^1", "desc^1", "answer^1"],
                                        "operator": query_operator,
                                        "type": query_type,
                                        "query": query
                                    }
                                }, {
                                    "term": {
                                        "is_online": True
                                    }
                                }]
                            }

                        }
                    }
                }
            }
        },
        "_source": {
            "include": ["id", "user_id", "question_id", "title", "smart_rank", "highlight"]
        },
        "highlight": get_highlight_query_analyzer(["title", "desc", "answer"], query)
    }
    if len(have_read_answer_list) > 0:
        q['query']['function_score']['query']['filtered']['filter']['bool']['must_not'] = {
            "terms": {"id": have_read_answer_list}}
    if not recall_again:
        q["sort"] = [
            {
                "_script": {
                    "order": "desc",
                    "script": {
                        "inline": "_score*factor+doc['smart_rank'].value*factors*0.01",
                        "params": {
                            "factor": 0.3,
                            "factors": 0.7,
                        }
                    },
                    "type": "number"
                }
            },
            {'_score': {'order': 'desc'}}
        ]
    else:
        q["sort"] = [
            {'_score': {'order': 'desc'}},
            {'smart_rank': {'order': 'desc'}}
        ]

    res = es_query('answer', q, offset, size)
    logging.info("get auery:%s" % q)

    hits = res["hits"]["hits"]

    logging.info("get hits:%s" % hits)
    return hits
