from django.conf import settings
from gm_types.doris import ARTICLE_SORT_TYPE
from libs.es import es_query, get_highlight_query_analyzer, get_highlight
import traceback
import logging


def gen_sorting(sort_type=ARTICLE_SORT_TYPE.DEFAULT, sort_params={}, recommend_detail=False):
    sorting = []
    if sort_type == ARTICLE_SORT_TYPE.DEFAULT:
        if recommend_detail == True:
            sorting += [
                {"_score": {"order": "desc"}}

            ]
        sorting += [
            {'smart_rank': {'order': 'desc'}},

        ]

    elif sort_type == ARTICLE_SORT_TYPE.RECOMMEND:
        if recommend_detail == True:
            sorting += [
                {"_score": {"order": "desc"}}

            ]
        sorting += [
            {"recommend_score": {"order": "desc"}}
        ]

    return sorting


def gen_filter(filters={}):
    f = [
        {'term': {'is_online': True}},
    ]
    for k, v in filters.items():
        if k == "tag_ids":
            f.append({
                "terms": {"tag_ids": v}
            })
        elif k == "user_ids":
            f.append({
                "terms": {"user.id": v}
            })

        # elif k == "content_level":
        #     f.append({
        #         "terms": {"content_level": v}
        #     })

    return f


def gen_query(query):
    multi_fields = {
        'title': 2,
        'content': 2,
        'tags': 2
    }
    query_fields = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()]

    multi_match = {
        'query': query,
        'type': 'cross_fields',
        'operator': 'and',
        'fields': query_fields,

    }
    m = {'multi_match': multi_match}
    return m


def recall_articles(query, offset, size, filters={}, sort_type=ARTICLE_SORT_TYPE.DEFAULT, sort_params={}, fields=[],
                    get_query=False, have_read_article=[], noarea_tags=[], two_type_tags=[], san_type_tags=[],
                    all_tags=[], content_star_keyword=[]):
    size = min(size, settings.COUNT_LIMIT)
    filtered = {}
    if query:
        filtered['query'] = gen_query(query)

    s = gen_sorting(sort_type, sort_params)
    f = gen_filter(filters)
    filtered['filter'] = {
        "bool": {
            "must": f
        }
    }

    if "id" not in fields:
        fields.append("id")

    q = {
        "_source": {
            "include": fields
        },
        "query": {
            "filtered": filtered
        },
        "sort": s,
    }

    if get_query == True:
        q["from"] = offset
        q["size"] = size
        new_q = {}
        q["query"]["filtered"]["filter"]["bool"]["should"] = []
        q["query"]["filtered"]["filter"]["bool"]["minimum_should_match"] = 1

        if len(have_read_article) > 0:
            q["query"]["filtered"]["filter"]["bool"]["must_not"] = {
                "terms": {
                    "id": list(set(have_read_article))
                }
            }
        if len(noarea_tags) > 0:
            q["query"]["filtered"]["filter"]["bool"]["should"].append({
                "terms": {
                    "tags": noarea_tags
                }
            })
        if len(all_tags) > 0:
            q["query"]["filtered"]["filter"]["bool"]["should"].append({
                "terms": {
                    "tag_ids": all_tags
                }
            })
        if len(san_type_tags) or len(two_type_tags) or len(content_star_keyword):
            function_list = []
            if len(san_type_tags):
                function_list.append({
                    "filter": {
                        "terms": {
                            "tag_ids": san_type_tags
                        }
                    },
                    "weight": 1000
                })
            if len(two_type_tags):
                function_list.append({
                    "filter": {
                        "terms": {
                            "tag_ids": two_type_tags
                        }
                    },
                    "weight": 500
                })

            if len(content_star_keyword) > 0:
                content_star_first_keyword = content_star_keyword[0]

                q["query"]["filtered"]["filter"]["bool"]["should"].append({
                    "terms": {
                        "content_star_keyword": content_star_keyword
                    }
                })

                function_list.append({
                    "filter": {
                        "term": {"content_star_first_keyword": content_star_first_keyword}
                    },
                    "weight": 3000
                })

                star_length = len(content_star_keyword)
                for key, val in enumerate(content_star_keyword):
                    weight = (star_length - key) * 900
                    function_list.append({
                        "filter": {
                            "term": {"content_star_keyword": val}
                        },
                        "weight": weight
                    })

            new_q = {
                "query": {
                    "function_score": {
                        "functions": function_list,
                        "query": q["query"],
                        "boost_mode": "replace",
                        "score_mode": "sum",
                    }
                }
            }
            new_q["sort"] = gen_sorting(sort_type, sort_params, recommend_detail=True)
            new_q["_source"] = q["_source"]
            logging.info("article detail recommend, query:%s" % str(new_q))
            return new_q
        return q
    res = es_query('article', q, offset, size)
    hits = res["hits"]["hits"]

    hit_ids = [hit["_source"] for hit in hits]
    return hit_ids


def recall_comprehensive_art(query, use_fresh_tag=False, correct_term_list=[], other_key_word_term_list=[], offset=0,
                             size=10, have_read_id_list=list(), closure_tags=[]):
    try:

        # 标题单独加权
        title_term_list = ["title"]

        fields_term_list = ["content"]

        multi_fields = {
            'title': 4,
            'content': 3
        }

        if use_fresh_tag:
            fields_term_list.append("fresh_tags")
            multi_fields["fresh_tags"] = 3
        else:
            fields_term_list.append("tags")
            multi_fields["tags"] = 3

        fields_weight_list = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()]

        total_match_query = {
            "multi_match": {
                "query": query,
                "fields": fields_weight_list,
                "analyzer": "gm_default_index",
                "operator": "and",
                "type": "best_fields"
            }
        }
        # 命中标题单独加权
        title_total_match_list = list()
        total_match_query_list = list()
        for field_name in fields_term_list:
            term_dict = {
                "terms": {
                    field_name: correct_term_list
                }
            }
            total_match_query_list.append(term_dict)

        total_match_query_list.append(total_match_query)

        title_total_match_list += [
            {
                "terms": {
                    "title": correct_term_list
                }
            },
            {
                "multi_match": {
                    "query": query,
                    "fields": title_term_list,
                    "analyzer": "gm_default_index",
                    "operator": "and",
                    "type": "best_fields"
                }
            }
        ]

        # 核心词的逻辑
        other_key_title_query_list = list()
        other_key_query_list = list()
        if len(other_key_word_term_list) > 0:
            for field_name in fields_term_list:
                term_dict = {
                    "terms": {
                        field_name: other_key_word_term_list
                    }
                }
                other_key_query_list.append(term_dict)

            other_key_match_query = {
                "multi_match": {
                    "query": other_key_word_term_list[0],
                    "fields": fields_weight_list,
                    "analyzer": "gm_default_index",
                    "operator": "and",
                    "type": "best_fields"
                }
            }
            other_key_query_list.append(other_key_match_query)

            # 标题加权
            other_key_title_query_list += [
                {
                    "terms": {
                        "title": other_key_word_term_list
                    }
                },
                {
                    "multi_match": {
                        "query": other_key_word_term_list[0],
                        "fields": title_term_list,
                        "analyzer": "gm_default_index",
                        "operator": "and",
                        "type": "best_fields"
                    }
                }
            ]

        q = {
            "from": 0,
            "size": size,
            "_source": {
                "include": ["id"]
            },
            "query": {
                "function_score": {
                    "functions": [{
                        "filter": {
                            "bool": {
                                "should": total_match_query_list,
                                "minimum_should_match": 1
                            }
                        },
                        "weight": 1000
                    }, {
                        "filter": {
                            "bool": {
                                "should": title_total_match_list,
                                "minimum_should_match": 1
                            }
                        },
                        "weight": 1500
                    }],
                    "boost_mode": "sum",
                    "score_mode": "sum",
                    "query": {
                        "bool": {
                            "must": [{
                                "multi_match": {
                                    "fields": fields_weight_list,
                                    "operator": "or",
                                    "type": "best_fields",
                                    "query": query
                                }
                            }, {
                                "term": {
                                    "is_online": True
                                }
                            }]
                        }
                    }
                }
            },
            "sort": [
                {"_score": {"order": "desc"}},
                {"smart_rank": {"order": "desc"}}
            ],
            "highlight": get_highlight_query_analyzer(["title", "ask"], query)
        }
        if len(other_key_query_list) > 0:
            q["query"]["function_score"]["functions"] += [
                {
                    "filter": {
                        "bool": {
                            "should": other_key_query_list,
                            "minimum_should_match": 1
                        }
                    },
                    "weight": 500
                },
                {
                    "filter": {
                        "bool": {
                            "should": other_key_title_query_list,
                            "minimum_should_match": 1
                        }
                    },
                    "weight": 550
                }
            ]

        if len(have_read_id_list) > 0:
            q["query"]["function_score"]["query"]["bool"]["must_not"] = {
                "terms": {
                    "id": have_read_id_list
                }
            }

        ###搜索词拓展
        if len(closure_tags) > 0:
            q["query"]["function_score"]["query"]["bool"]["should"] = list()
            if use_fresh_tag:
                q["query"]["function_score"]["query"]["bool"]["should"].append({
                    "terms": {
                        "fresh_tags": closure_tags
                    }
                })
                q["query"]["function_score"]["functions"] += [
                    {
                        "filter": {
                            "terms": {
                                "fresh_tags": closure_tags
                            }
                        },
                        "weight": 10000
                    }
                ]
            else:
                q["query"]["function_score"]["query"]["bool"]["should"].append({
                    "terms": {
                        "tags": closure_tags
                    }
                })
                q["query"]["function_score"]["functions"] += [
                    {
                        "filter": {
                            "terms": {
                                "tags": closure_tags
                            }
                        },
                        "weight": 10000
                    }
                ]

        return q
    except:
        logging.error("catch exception,err_msg:%s" % traceback.format_exc())
        return dict()


def recall_art(query, offset, size, filters={}, sort_type=ARTICLE_SORT_TYPE.DEFAULT, sort_params={}, fields=[],
               use_fresh_tag=False):
    size = min(size, settings.COUNT_LIMIT)
    filtered = {}
    if query:
        filtered['query'] = gen_query(query)

    # s = gen_sorting(sort_type, sort_params)
    f = gen_filter(filters)
    filtered['filter'] = {
        "bool": {
            "must": f
        }
    }

    if "id" not in fields:
        fields.append("id")

    if use_fresh_tag:
        fields_list = ["title", "content", "fresh_tags"]
    else:
        fields_list = ["title", "content", "tags"]

    q = {
        "_source": {
            "include": fields
        },
        "query": {
            "function_score": {
                "functions": [{
                    "filter": {
                        "match": {
                            "title": query
                        }
                    },
                    "weight": 3

                }, {
                    "filter": {
                        "match": {
                            "content": query
                        }
                    },
                    "weight": 2
                }, {
                    "filter": {
                        "match": {
                            "tags": query
                        }
                    },
                    "weight": 1
                }
                ],
                "boost_mode": "sum",
                "score_mode": "sum",
                "query": {
                    "filtered": {
                        "filter": {
                            "bool": {
                                "must": [{
                                    "multi_match": {
                                        "fields": fields_list,
                                        "operator": "and",
                                        "type": "cross_fields",
                                        "query": query
                                    }
                                }, {
                                    "term": {
                                        "is_online": True
                                    }
                                }]
                            }

                        }
                    }
                }
            }
        },
        "sort": [
            {
                "_script": {
                    "order": "desc",
                    "script": {
                        "inline": "_score*factor+doc['smart_rank'].value*factors*0.01",
                        "params": {
                            "factor": 0.3,
                            "factors": 0.7,
                        }
                    },
                    "type": "number"
                }
            },
            "_score"
        ],
        "highlight": get_highlight_query_analyzer(["title", "ask"], query)
    }

    res = es_query('article', q, offset, size)
    hits = res["hits"]["hits"]

    return hits
