from django.conf import settings
from gm_rpcd.all import bind
from gm_types.gaia import DIARY_ORDER_TYPE
from libs.es import tzlc, get_es, es_query, es_index_adapt, es_indices_analyze, get_highlight_query_analyzer
from libs.filters import area_tag_id_filter
from libs.error import logging_exception
from search.utils.diary import recall_diary
from collections import OrderedDict
from libs.algorithms import variousness
import logging
import traceback
import json
from libs.cache import redis_client
from search.utils.diary import get_wordrel_set, wordrel_some_list, get_ori_query, get_cpc_diary

logger = logging.getLogger(__name__)


def _get_penalty_score(doctor_times, hospital_times, original_score):
    penalty_score = (doctor_times - 1) * settings.PENALTY_FACTOR + (hospital_times - 1) * settings.PENALTY_FACTOR
    penalty_score = 30.0 if penalty_score > 30 else penalty_score
    penalty_score = original_score if penalty_score > original_score else penalty_score
    return penalty_score


# 将gaia代码合并过来，后续需要将本文件process_filters函数合并
def new_process_filters(filters, nfilters, use_fresh_tag=False, have_read_diary_id_list=[]):
    # 过滤器部分
    f = [
        {'term': {'is_online': True}},  # 只返回上线的日记本
        {'range': {'normal_topic_count': {'gt': 0}}},  # 正常的关联帖子数>0
    ]
    nf = []
    for k, v in filters.items():
        if k == 'province_tag_id':
            f.append({
                'term': {'user.city_province_tag_id': v}
            })
        elif k == 'area_tag_id':
            f.append(area_tag_id_filter(['user.', 'hospital.', 'doctor.hospital.'], v))
        elif k == 'area_tag_id_v1':
            f.append({
                "bool": {
                    "should": [
                        {"term": {"doctor.hospital.city_province_country_tag_id": v}},
                        {"term": {"doctor.hospital.city_tag_id": v}}
                    ]
                }
            })
        elif k == 'bodypart_tag_id':
            # compatibility
            if isinstance(v, list) and v:
                f.append({
                    'terms': {'closure_tag_ids': v}
                })
            elif isinstance(v, (int, long)):
                f.append({
                    'term': {'closure_tag_ids': v}
                })
        elif k == 'bodypart_subitem_tag_id':
            f.append({
                'term': {'closure_tag_ids': v}
            })
        elif k == 'doctor_id':
            f.append({
                'term': {'doctor.id': v}
            })
        elif k == 'doctor_ids':
            f.append({
                'terms': {'doctor.id': v}
            })
        elif k == 'hospital_id':
            f.append({
                'term': {'hospital.id': v}
            })
        elif k == 'hospital_ids':
            f.append({
                'terms': {"doctor.hospital.id": v}
            })
        elif k == 'service_id':
            f.append({
                'term': {'service.id': str(v)}  # convert to str because of weird mapping setting
            })
        elif k == 'tag_ids' and isinstance(v, list) and v:
            if use_fresh_tag:
                f.append({
                    'terms': {'fresh_closure_tag_ids': v}
                })
            else:
                f.append({
                    'terms': {'closure_tag_ids': v}
                })
        elif k == 'user_id':
            if isinstance(v, list):
                f.append({
                    'terms': {'user.id': v}
                })
            else:
                f.append({
                    'term': {'user.id': v}
                })
        elif k == 'has_cover':
            f.append({
                'term': {'has_cover': v}
            })
        elif k == 'is_headline':
            f.append({
                'term': {'is_headline': v}
            })
        elif k == 'service.doctor_id':
            f.append({
                'term': {'service.doctor_id': v}
            })
        elif k == 'content_level':
            if isinstance(v, list):
                vals = v
            else:
                vals = [v]
            if len(vals) > 0:
                f.append({
                    'terms': {'content_level': vals}
                })

        elif k == 'id':
            f.append({
                'term': {'id': v}
            })
        elif k == 'has_before_cover':
            f.append({
                'term': {'has_before_cover': v}
            })
        elif k == 'has_after_cover':
            f.append({
                'term': {'has_after_cover': v}
            })
        elif k == 'ids':
            f.append({
                'terms': {'id': v}
            })
        elif k == 'is_rating_5':
            f.append({
                'term': {'is_rating_5': v}
            })
        elif k == 'content_level_is_good':
            f.append({
                'term': {'content_level_is_good': v}
            })
        elif k == 'is_sink':
            f.append({
                'term': {'is_sink': v}
            })
        elif k == 'vote_num_gt':
            f.append({
                'range': {'vote_num': {'gt': v}}
            })

    for k, v in nfilters.items():
        if k == 'id':
            nf.append({
                'term': {'id': v}
            })
        elif k == 'user_ids' and isinstance(v, list) and v:
            for user_id in v:
                nf.append({
                    'term': {'user.id': user_id}
                })
    if have_read_diary_id_list:
        nf.append({
            "terms": {
                "id": have_read_diary_id_list
            }
        })

    return f, nf


# 将gaia代码合并过来，后续需要和本文件filter_diary函数合并
def new_filter_diary(
        offset=0,
        size=5,
        sort_type=DIARY_ORDER_TYPE.LAST_UPDATE_TIME,
        filters=None,
        nfilters=None,
        sort_params=None,
        expose_total=False,
        interal_call=False,
        filter_invalid_picture=False,
        fields=[], use_fresh_tag=False,
        device_id='', source_type=0
):
    if not interal_call:
        # 参数验证
        size = min(size, settings.COUNT_LIMIT)
    filters = filters or {}
    nfilters = nfilters or {}
    sort_params = sort_params or {}

    """
    增加已读过滤
    """
    have_read_diary_id_list = []
    if not isinstance(device_id, str):
        device_id = ""
    redis_key = ""
    if source_type == 1:
        if device_id:
            redis_key = "doris_feed:home_recommend_diary" + ":device_id:" + str(device_id)
            redis_question_val_list = redis_client.hmget(redis_key, source_type)
            # 获取已读question
            if redis_question_val_list[0]:
                have_read_diary_id_list = list(json.loads(redis_question_val_list[0]))

    # 过滤器部分
    f, nf = new_process_filters(filters=filters, nfilters=nfilters, use_fresh_tag=use_fresh_tag,
                                have_read_diary_id_list=have_read_diary_id_list)
    if filter_invalid_picture:
        f += [
            {"term": {"has_before_cover": True}},
            {"term": {"has_after_cover": True}}
        ]

    # 排序规则部分
    sort_list = [
        {'is_sink': {'order': 'asc'}},  # 下沉的排后面
    ]

    # 机构罚单下沉
    sort_list += [
        {
            '_script': {
                'lang': settings.ES_SCRIPT_LANG,
                'script_file': 'sort_diary-sink-by-org',
                'type': 'number',
                'order': 'desc',
            }
        }
    ]

    if sort_type == DIARY_ORDER_TYPE.GREATEST_HITS:  # 编辑精选
        sort_list += [
            {'_script': {
                'lang': settings.ES_SCRIPT_LANG,
                'script_file': 'sort_diary-greatest-hits',
                'type': 'number',
                'params': {
                    'TAG_ID_DIRECTOR_RECOMMEND': settings.ES_CONSTS['TAG_ID_DIRECTOR_RECOMMEND'],
                },
                'order': 'desc',
            }},
            {'author_last_update_time': {'order': 'desc'}},
        ]
    elif sort_type == DIARY_ORDER_TYPE.HOT:  # 热门日记
        sort_list += [
            {'is_headline': {'order': 'desc'}},
            {'author_last_update_time': {'order': 'desc'}},
        ]
    elif sort_type == DIARY_ORDER_TYPE.DOCTOR_CASE:  # 专家案例
        sort_list += [
            {'has_service': {'order': 'desc'}},
            {'doctor.is_authenticated': {'order': 'desc'}},
            {'author_last_update_time': {'order': 'desc'}},
        ]
    elif sort_type == DIARY_ORDER_TYPE.LAST_UPDATE_TIME:  # 最新回复
        sort_list += [
            {'last_update_time': {'order': 'desc'}},
        ]
    elif sort_type == DIARY_ORDER_TYPE.YOU_MAY_LIKE:  # 猜你喜欢
        sort_list += [
            {'_script': {
                'lang': settings.ES_SCRIPT_LANG,
                'script_file': 'sort_diary-you-may-like',
                'type': 'number',
                'params': {
                    'TAG_ID_DIRECTOR_RECOMMEND': settings.ES_CONSTS['TAG_ID_DIRECTOR_RECOMMEND'],
                    'TAG_ID_POPULAR': settings.ES_CONSTS['TAG_ID_POPULAR'],
                    'TAG_ID_DIARY_WITH_SERVICE': settings.ES_CONSTS['TAG_ID_DIARY_WITH_SERVICE'],
                },
                'order': 'desc',
            }},
            {'has_cover': {'order': 'desc'}},
            {'last_update_time': {'order': 'desc'}},
        ]
    elif sort_type == DIARY_ORDER_TYPE.CREATE_TIME:
        sort_list += [
            {'created_time': {'order': 'desc'}},
        ]
    elif sort_type == DIARY_ORDER_TYPE.SERVICE_RELATED:
        sort_list += [
            {'_script': {
                'lang': settings.ES_SCRIPT_LANG,
                'script_file': 'sort_diary-service-is',
                'type': 'number',
                'params': {
                    'service_id': sort_params['service_id'],
                },
                'order': 'desc',
            }},
            {'popularity': {'order': 'desc'}},
        ]
    elif sort_type == DIARY_ORDER_TYPE.SUGGEST:
        sort_list += [
            {'stick_priority': {'order': 'asc', 'missing': '_last'}},
            {'sticky_post': {'order': 'desc'}},
            {'is_headline': {'order': 'desc'}},
            {'is_rating_5': {'order': 'desc'}},
            {'has_before_cover': {'order': 'desc'}},
            {'has_after_cover': {'order': 'desc'}},
            {'vote_num': {'order': 'desc'}},
            {'last_any_reply_time': {'order': 'desc'}},
        ]
    elif sort_type == DIARY_ORDER_TYPE.LAST_TOPIC_ADDED_DESC:
        sort_list += [
            {'last_topic_add_time': {'order': 'desc'}},
        ]
    elif sort_type == DIARY_ORDER_TYPE.STICKY_POST:
        sort_list += [
            {'sticky_post': {'order': 'desc'}},
            {'created_time': {'order': 'desc'}},
        ]
    elif sort_type == DIARY_ORDER_TYPE.SERVICE_DETAIL:
        sort_list += [
            {'_script': {
                'lang': settings.ES_SCRIPT_LANG,
                'script_file': 'sort_diary-same-tag',
                'type': 'number',
                'params': {
                    'tag_ids': sort_params['same_tag_ids_count'],
                },
                'order': 'desc',
            }},
            {'popularity': {'order': 'desc'}},
        ]
    elif sort_type == DIARY_ORDER_TYPE.INDEX:
        # 过滤不含美购   hmmmmmmmm
        f.append(
            {'term': {'has_service': True}}
        )
        # sort_list += [{'has_service': {'order':'desc'}},]
        sort_list += [
            {'_script': {
                'lang': settings.ES_SCRIPT_LANG,
                'script_file': 'sort_diary-index-2',
                'type': 'number',
                'params': {
                    'heat_score_weight': settings.DIARY_SCORE_WEIGHT['HEAT_SCORE_WEIGHT'],  # 热度分权重
                    'audit_score_weight': settings.DIARY_SCORE_WEIGHT['AUDIT_SCORE_WEIGHT'],  # 内容质量分权重
                    'choucheng_score_weight': settings.DIARY_SCORE_WEIGHT['CHOUCHENG_SCORE_WEIGHT'],  # 抽成分权重
                    'user_city_tag_id': sort_params['user_city_tag_id'] if 'user_city_tag_id' in sort_params else -1,
                    'region_score': 60.0,  # 地域分
                },
                'order': 'desc',
            }}
        ]
    elif sort_type == DIARY_ORDER_TYPE.BEST:
        # http://wiki.wanmeizhensuo.com/pages/viewpage.action?pageId=5478006&focusedCommentId=5479145#comment-5479145
        sort_list += [
            {'content_level': {'order': 'desc'}},
            {'_script': {
                'lang': settings.ES_SCRIPT_LANG,
                'script_file': 'sort_diary-recommend',
                'type': 'number',
                'params': {
                    'user_city_tag_id': sort_params['user_city_tag_id'] if 'user_city_tag_id' in sort_params else -1,
                },
                'order': 'desc',
                '_cache': True,
            }},
            {'_script': {
                'lang': settings.ES_SCRIPT_LANG,
                'script_file': 'sort_diary-index-2',
                'type': 'number',
                'params': {
                    'heat_score_weight': settings.DIARY_SCORE_WEIGHT['HEAT_SCORE_WEIGHT'],  # 热度分权重
                    'audit_score_weight': settings.DIARY_SCORE_WEIGHT['AUDIT_SCORE_WEIGHT'],  # 内容质量分权重
                    'choucheng_score_weight': settings.DIARY_SCORE_WEIGHT['CHOUCHENG_SCORE_WEIGHT'],  # 抽成分权重
                    'user_city_tag_id': sort_params['user_city_tag_id'] if 'user_city_tag_id' in sort_params else -1,
                    'region_score': 60.0,  # 地域分
                },
                'order': 'desc',
            }}
        ]
    elif sort_type == DIARY_ORDER_TYPE.NATIVE:
        sort_list += [
            {'_script': {
                'lang': settings.ES_SCRIPT_LANG,
                'script_file': 'sort_diary-recommend',
                'type': 'number',
                'params': {
                    'user_city_tag_id': sort_params['user_city_tag_id'] if 'user_city_tag_id' in sort_params else -1,
                },
                'order': 'desc',
                '_cache': True,
            }},
            {'_script': {
                'lang': settings.ES_SCRIPT_LANG,
                'script_file': 'sort_diary-index-2',
                'type': 'number',
                'params': {
                    'heat_score_weight': settings.DIARY_SCORE_WEIGHT['HEAT_SCORE_WEIGHT'],  # 热度分权重
                    'audit_score_weight': settings.DIARY_SCORE_WEIGHT['AUDIT_SCORE_WEIGHT'],  # 内容质量分权重
                    'choucheng_score_weight': settings.DIARY_SCORE_WEIGHT['CHOUCHENG_SCORE_WEIGHT'],  # 抽成分权重
                    'user_city_tag_id': sort_params['user_city_tag_id'] if 'user_city_tag_id' in sort_params else -1,
                    'region_score': 60.0,  # 地域分
                },
                'order': 'desc',
            }}
        ]
    elif sort_type == DIARY_ORDER_TYPE.BEST_EVALUATION:
        f.append(
            {'term': {'has_service': True}}
        )
        sort_list += [
            {'service_rating_level_double': {'order': 'desc'}},
            {'_script': {
                'lang': settings.ES_SCRIPT_LANG,
                'script_file': 'sort_diary-recommend',
                'type': 'number',
                'params': {
                    'user_city_tag_id': sort_params['user_city_tag_id'] if 'user_city_tag_id' in sort_params else -1,
                },
                'order': 'desc',
                '_cache': True,
            }},
            {'_script': {
                'lang': settings.ES_SCRIPT_LANG,
                'script_file': 'sort_diary-index-2',
                'type': 'number',
                'params': {
                    'heat_score_weight': settings.DIARY_SCORE_WEIGHT['HEAT_SCORE_WEIGHT'],  # 热度分权重
                    'audit_score_weight': settings.DIARY_SCORE_WEIGHT['AUDIT_SCORE_WEIGHT'],  # 内容质量分权重
                    'choucheng_score_weight': settings.DIARY_SCORE_WEIGHT['CHOUCHENG_SCORE_WEIGHT'],  # 抽成分权重
                    'user_city_tag_id': sort_params['user_city_tag_id'] if 'user_city_tag_id' in sort_params else -1,
                    'region_score': 60.0,  # 地域分
                },
                'order': 'desc',
            }}
        ]
    elif sort_type == DIARY_ORDER_TYPE.DOCTOR_CUSTOMIZE_SORT:  # 医生自定义排序
        sort_list += [
            {'doctor_customize_sort': {'order': 'asc'}},
            {'last_update_time': {'order': 'desc'}},
        ]
    elif sort_type == DIARY_ORDER_TYPE.HOSPITAL_CUSTOMIZE_SORT:  # 医院自定义排序
        sort_list += [
            {'hospital_customize_sort': {'order': 'asc'}},
            {'last_update_time': {'order': 'desc'}},
        ]
    else:
        try:
            if not DIARY_ORDER_TYPE.__contains__(sort_type):
                raise Exception('invalid sort_type: {}'.format(sort_type))
        except Exception:
            logging_exception()
        # return at random order
        pass

    q = {
        'query': {'filtered': {
            'query': {'match_all': {}},
            'filter': {'bool': {'must': f, 'must_not': nf}}
        }},
        'sort': sort_list,
        '_source': {
            "include": ["id", "has_service", "doctor.is_authenticated", "doctor.id", "doctor.hospital.id"],
        }
    }

    if fields:
        if "id" not in fields:
            fields.append("id")
        q["_source"] = {
            "include": fields
        }

    es = get_es()
    index = es_index_adapt(
        index_prefix=settings.ES_INDEX_PREFIX,
        doc_type='diary',
        rw='read'
    )

    large_size = 200 if fields else size
    res = es.search(
        index=index,
        doc_type='diary',
        timeout=settings.ES_SEARCH_TIMEOUT,
        body=q,
        from_=offset,
        size=large_size)

    if fields:
        hits = res["hits"]
        hits["hits"] = scatter(hits['hits'], size, fields)

    result = {
        'diary_ids': [int(d['_id']) for d in res['hits']['hits']],
    }
    if expose_total:
        result['total'] = res['hits']['total']
    if interal_call:
        result['hits'] = res['hits']['hits']

    if source_type == 1:
        """
        保存已读的问答数据
        """

        diary_data = result.get('diary_ids', None)
        if diary_data == None:
            if len(have_read_diary_id_list) >= size:
                redis_client.delete(redis_key)
                redis_client.hset(redis_key, source_type, json.dumps(have_read_diary_id_list[0:size]))
                redis_client.expire(redis_key, 60 * 60 * 24 * 15)
                result = {
                    "diary_ids": [have_read_diary_id_list[0:size]]
                }
            else:
                redis_client.hset(redis_key, source_type, json.dumps(have_read_diary_id_list))
                redis_client.expire(redis_key, 60 * 60 * 24 * 15)
                result = {
                    "diary_ids": [have_read_diary_id_list[0:size]]
                }
        else:
            for id in result["diary_ids"]:
                have_read_diary_id_list.append(id)
            if redis_key:
                redis_client.hset(redis_key, source_type, json.dumps(have_read_diary_id_list))
                redis_client.expire(redis_key, 60 * 60 * 24 * 15)

    return result


def process_filters(filters, nfilters):
    # 过滤器部分
    f = [
        {'term': {'is_online': True}},  # 只返回上线的日记本
        {'range': {'normal_topic_count': {'gt': 0}}},  # 正常的关联帖子数>0
    ]
    nf = []

    for k, v in filters.items():
        if k == 'province_tag_id':
            f.append({
                'term': {'user.city_province_tag_id': v}
            })
        elif k == 'area_tag_id':
            f.append(area_tag_id_filter(['user.', 'hospital.', 'doctor.hospital.'], v))
        elif k == 'bodypart_tag_id':
            # compatibility
            if isinstance(v, list) and v:
                f.append({
                    'terms': {'closure_tag_ids': v}
                })
            elif isinstance(v, int):
                f.append({
                    'term': {'closure_tag_ids': v}
                })
        elif k == 'bodypart_subitem_tag_id':
            f.append({
                'term': {'closure_tag_ids': v}
            })
        elif k == 'doctor_id':
            f.append({
                'term': {'doctor.id': v}
            })
        elif k == 'hospital_id':
            f.append({
                'term': {'hospital.id': v}
            })
        elif k == 'service_id':
            f.append({
                'term': {'service.id': str(v)}  # convert to str because of weird mapping setting
            })
        elif k == 'special_id':
            f.append({
                'terms': {'service.special_ids': [v]}
            })
        elif k == 'tag_ids' and isinstance(v, list) and v:
            f.append({
                'terms': {'closure_tag_ids': v}
            })
        elif k == 'user_id':
            if isinstance(v, list):
                f.append({
                    'terms': {'user.id': v}
                })
            else:
                f.append({
                    'term': {'user.id': v}
                })
        elif k == 'has_cover':
            f.append({
                'term': {'has_cover': v}
            })
        elif k == 'is_headline':
            f.append({
                'term': {'is_headline': v}
            })
        elif k == 'service.doctor_id':
            f.append({
                'term': {'service.doctor_id': v}
            })
        elif k == 'content_level':
            f.append({
                'term': {'content_level': v}
            })
        elif k == 'id':
            f.append({
                'term': {'id': v}
            })
        elif k == 'has_before_cover':
            f.append({
                'term': {'has_before_cover': v}
            })
        elif k == 'has_after_cover':
            f.append({
                'term': {'has_after_cover': v}
            })
        elif k == 'ids':
            f.append({
                'terms': {'id': v}
            })
    for k, v in nfilters.items():
        if k == 'id':
            nf.append({
                'term': {'id': v}
            })
        elif k == 'user_ids' and isinstance(v, list) and v:
            for user_id in v:
                nf.append({
                    'term': {'user.id': user_id}
                })

    return f, nf


def filter_diary(
        offset=0,
        size=5,
        sort_type=DIARY_ORDER_TYPE.LAST_UPDATE_TIME,
        filters=None,
        nfilters=None,
        sort_params=None,
        expose_total=False,
        interal_call=False,
):
    if not interal_call:
        # 参数验证
        size = min(size, settings.COUNT_LIMIT)
    filters = filters or {}
    nfilters = nfilters or {}
    sort_params = sort_params or {}

    # 过滤器部分
    f, nf = process_filters(filters=filters, nfilters=nfilters)

    # 排序规则部分
    sort_list = [
        {'is_sink': {'order': 'asc'}},  # 下沉的排后面
    ]
    # 机构罚单下沉
    sort_list += [
        {
            '_script': {
                'lang': settings.ES_SCRIPT_LANG,
                'script_file': 'sort_diary-sink-by-org',
                'type': 'number',
                'order': 'desc',
            }
        }
    ]

    if sort_type == DIARY_ORDER_TYPE.GREATEST_HITS:  # 编辑精选
        sort_list += [
            {'_script': {
                'lang': settings.ES_SCRIPT_LANG,
                'script_file': 'sort_diary-greatest-hits',
                'type': 'number',
                'params': {
                    'TAG_ID_DIRECTOR_RECOMMEND': settings.ES_CONSTS['TAG_ID_DIRECTOR_RECOMMEND'],
                },
                'order': 'desc',
            }},
            {'author_last_update_time': {'order': 'desc'}},
        ]
    elif sort_type == DIARY_ORDER_TYPE.HOT:  # 热门日记
        sort_list += [
            {'is_headline': {'order': 'desc'}},
            {'author_last_update_time': {'order': 'desc'}},
        ]
    elif sort_type == DIARY_ORDER_TYPE.DOCTOR_CASE:  # 专家案例
        sort_list += [
            {'has_service': {'order': 'desc'}},
            {'doctor.is_authenticated': {'order': 'desc'}},
            {'author_last_update_time': {'order': 'desc'}},
        ]
    elif sort_type == DIARY_ORDER_TYPE.LAST_UPDATE_TIME:  # 最新回复
        sort_list += [
            {'last_update_time': {'order': 'desc'}},
        ]
    elif sort_type == DIARY_ORDER_TYPE.YOU_MAY_LIKE:  # 猜你喜欢
        sort_list += [
            {'_script': {
                'lang': settings.ES_SCRIPT_LANG,
                'script_file': 'sort_diary-you-may-like',
                'type': 'number',
                'params': {
                    'TAG_ID_DIRECTOR_RECOMMEND': settings.ES_CONSTS['TAG_ID_DIRECTOR_RECOMMEND'],
                    'TAG_ID_POPULAR': settings.ES_CONSTS['TAG_ID_POPULAR'],
                    'TAG_ID_DIARY_WITH_SERVICE': settings.ES_CONSTS['TAG_ID_DIARY_WITH_SERVICE'],
                },
                'order': 'desc',
            }},
            {'has_cover': {'order': 'desc'}},
            {'last_update_time': {'order': 'desc'}},
        ]
    elif sort_type == DIARY_ORDER_TYPE.CREATE_TIME:
        sort_list += [
            {'created_time': {'order': 'desc'}},
        ]
    elif sort_type == DIARY_ORDER_TYPE.SERVICE_RELATED:
        sort_list += [
            {'_script': {
                'lang': settings.ES_SCRIPT_LANG,
                'script_file': 'sort_diary-service-is',
                'type': 'number',
                'params': {
                    'service_id': sort_params['service_id'],
                },
                'order': 'desc',
            }},
            {'popularity': {'order': 'desc'}},
        ]
    elif sort_type == DIARY_ORDER_TYPE.SUGGEST:
        sort_list += [
            {'sticky_post': {'order': 'desc'}},
            {'is_headline': {'order': 'desc'}},
            {'is_rating_5': {'order': 'desc'}},
            {'vote_num': {'order': 'desc'}},
            {'last_any_reply_time': {'order': 'desc'}},
        ]
    elif sort_type == DIARY_ORDER_TYPE.LAST_TOPIC_ADDED_DESC:
        sort_list += [
            {'last_topic_add_time': {'order': 'desc'}},
        ]
    elif sort_type == DIARY_ORDER_TYPE.STICKY_POST:
        sort_list += [
            {'sticky_post': {'order': 'desc'}},
            {'created_time': {'order': 'desc'}},
        ]
    elif sort_type == DIARY_ORDER_TYPE.SERVICE_DETAIL:
        sort_list += [
            {'_script': {
                'lang': settings.ES_SCRIPT_LANG,
                'script_file': 'sort_diary-same-tag',
                'type': 'number',
                'params': {
                    'tag_ids': sort_params['same_tag_ids_count'],
                },
                'order': 'desc',
            }},
            {'popularity': {'order': 'desc'}},
        ]
    elif sort_type == DIARY_ORDER_TYPE.INDEX:
        # 过滤不含美购   hmmmmmmmm
        f.append(
            {'term': {'has_service': True}}
        )
        # sort_list += [{'has_service': {'order':'desc'}},]
        sort_list += [
            {'_script': {
                'lang': settings.ES_SCRIPT_LANG,
                'script_file': 'sort_diary-index-2',
                'type': 'number',
                'params': {
                    'heat_score_weight': settings.DIARY_SCORE_WEIGHT['HEAT_SCORE_WEIGHT'],  # 热度分权重
                    'audit_score_weight': settings.DIARY_SCORE_WEIGHT['AUDIT_SCORE_WEIGHT'],  # 内容质量分权重
                    'choucheng_score_weight': settings.DIARY_SCORE_WEIGHT['CHOUCHENG_SCORE_WEIGHT'],  # 抽成分权重
                    'user_city_tag_id': sort_params['user_city_tag_id'] if 'user_city_tag_id' in sort_params else -1,
                    'region_score': 60.0,  # 地域分
                },
                'order': 'desc',
            }}
        ]
    else:
        try:
            if not DIARY_ORDER_TYPE.__contains__(sort_type):
                raise Exception('invalid sort_type: {}'.format(sort_type))
        except Exception:
            logging_exception()
        # return at random order
        pass

    q = {
        'query': {'filtered': {
            'query': {'match_all': {}},
            'filter': {'bool': {'must': f, 'must_not': nf}}
        }},
        'sort': sort_list,
    }
    from libs.debug import pretty_json
    pretty_json(q)
    res = es_query('diary', q, offset, size)

    result = {
        'diary_ids': [int(d['_id']) for d in res['hits']['hits']],
    }
    if expose_total:
        result['total'] = res['hits']['total']
    if interal_call:
        result['hits'] = res['hits']['hits']
    return result


def get_diary_posts_results(query="", offset=0, size=10, total_query_term_list=list(),
                            complete_matching_term_list=list(),
                            synonym_term_list=list(),
                            other_key_word_term_list=list(),
                            correct_term_list=list(),
                            have_read_id_list=list()
                            ):
    """
    获取日记帖结果
    :param query:
    :param offset:
    :param size:
    :param total_query_term_list:
    :param complete_matching_term_list:
    :param synonym_term_list:
    :param other_key_word_term_list:
    :param correct_term_list:
    :param have_read_id_list:
    :return:
    """
    try:

        total_query_should_list = list()
        for query_term in correct_term_list:
            term_dict = {
                "match_phrase": {
                    "content": {
                        "query": query_term,
                        "analyzer": "gm_default_index"
                    }
                }
            }
            total_query_should_list.append(term_dict)

        q = {
            "from": 0,
            "size": 100,
            "query": {
                "bool": {
                    "must": [
                        {"term": {"is_online": True}},
                        {"term": {"has_image": True}},
                        {"range": {"content_level": {"gte": "3"}}}
                    ],
                    "must_not": {
                        "terms": {
                            "diary_id": have_read_id_list
                        }
                    },
                    "filter": {
                        "exists": {
                            "field": "diary_id"
                        }
                    },
                    "should": total_query_should_list,
                    "minimum_should_match": 1
                }
            },
            "sort": [
                {"ranking_popularity": {"order": "desc"}},
                {"_score": {"order": "desc"}}
            ],
            "_source": {
                "include": ["diary_id"]
            }
        }

        logging.info("duan add,posts query:%s" % str(q).encode("utf-8"))
        es = get_es()
        index = es_index_adapt(
            index_prefix=settings.ES_INDEX_PREFIX,
            doc_type='problem',
            rw='read'
        )
        res = es.search(
            index=index,
            doc_type='problem',
            timeout=settings.ES_SEARCH_TIMEOUT,
            body=q,
            from_=0,
            size=100)

        diary_posts_diary_id_set = set()
        diary_posts_term_list = list()
        for item in res.get("hits")["hits"]:
            if item["_source"]["diary_id"]:
                diary_posts_diary_id_set.add(item["_source"]["diary_id"])
                diary_posts_term_list.append(item)
            if len(diary_posts_diary_id_set) >= size:
                break

        return diary_posts_term_list
    except:
        logger.error("catch exception,err_msg:%s" % traceback.format_exc())
        return list()


def get_diary_alpha_query(
        query='',
        offset=0,
        size=5,
        sort_type=DIARY_ORDER_TYPE.DEFAULT,
        filters=None,
        sort_params=None,
        sort_diary_tag_first=False,
        search_again=False,
        device_id="null",
        is_answer=None,
        use_fresh_tag=False,
        total_query_term_list=list(),
        complete_matching_term_list=list(),
        synonym_term_list=list(),
        other_key_word_term_list=list(),
        correct_term_list=list(),
        have_read_id_list=list()
):
    try:
        user_city_tag_id = -10000
        if sort_params:
            user_city_tag_id = sort_params.get('user_city_tag_id', -10000)
            if not user_city_tag_id:
                user_city_tag_id = -10000

        multi_fields = {
            # 'tags': 6,
            'doctor.name': 4,
            'doctor.hospital.name': 3,
            'doctor.hospital.officer_name': 3,
            'user.last_name': 1,
            'service.name': 3,
            # "title": 3
            # "answer": 2
        }

        if is_answer == True:
            multi_fields["answer"] = 2

        fields = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()]

        total_match_query = {
            "multi_match": {
                "query": query,
                "fields": fields,
                "analyzer": "gm_default_index",
                "operator": "and",
                "type": "best_fields"
            }
        }

        query_fields = ["doctor.name", "doctor.hospital.name", "doctor.hospital.officer_name", "user.last_name",
                        "service.name"]

        if use_fresh_tag:
            multi_fields["fresh_tags"] = 6
            query_fields.append("fresh_tags")
        else:
            multi_fields["tags"] = 6
            query_fields.append("tags")

        q = dict()

        same_city_high_quality_diary_should_list = list()
        for field_name in query_fields:
            term_dict = {
                "terms": {
                    field_name: correct_term_list
                }
            }
            same_city_high_quality_diary_should_list.append(term_dict)

        same_city_high_quality_diary_should_list.append(total_match_query)
        same_city_high_quality_diary_filter = [
            {"term": {"diary_city": user_city_tag_id}},
            {"range": {"content_level": {"gte": "3"}}},
            {
                "bool": {
                    "minimum_should_match": 1,
                    "should": same_city_high_quality_diary_should_list
                }
            }
        ]
        q["from"] = 0
        q["size"] = size
        q["query"] = {
            "function_score": {
                "functions": [
                    {
                        "filter": {
                            "bool": {
                                "filter": same_city_high_quality_diary_filter
                            }
                        },
                        "weight": 2000
                    }
                ],
                "score_mode": "max",
                "boost_mode": "max"
            }
        }

        if len(total_query_term_list) > 0:
            q["query"]["function_score"]["query"] = {
                "bool": {
                    "must": {
                        "multi_match": {
                            "query": " ".join(total_query_term_list),
                            "fields": fields,
                            "analyzer": "whitespace"
                        }
                    }
                }
            }
            if len(have_read_id_list) > 0:
                q["query"]["function_score"]["query"]["bool"]["must_not"] = {
                    "terms": {
                        "id": have_read_id_list
                    }
                }

            if len(complete_matching_term_list) > 0:
                complete_matching_should_list = list()
                for field_name in query_fields:
                    term_dict = {
                        "terms": {
                            field_name: complete_matching_term_list
                        }
                    }
                    complete_matching_should_list.append(term_dict)

                complete_matching_should_list.append(total_match_query)
                function_score_item = {
                    "weight": 750,
                    "filter": {
                        "bool": {
                            "minimum_should_match": 1,
                            "should": complete_matching_should_list
                        }
                    }
                }
                q["query"]["function_score"]["functions"].append(function_score_item)

            if len(synonym_term_list) > 0:
                synonym_should_list = list()
                for field_name in query_fields:
                    term_dict = {
                        "terms": {
                            field_name: synonym_term_list
                        }
                    }
                    synonym_should_list.append(term_dict)
                function_score_item = {
                    "weight": 700,
                    "filter": {
                        "bool": {
                            "minimum_should_match": 1,
                            "should": synonym_should_list
                        }
                    }
                }
                q["query"]["function_score"]["functions"].append(function_score_item)

            if len(other_key_word_term_list) > 0:
                other_key_word_should_list = list()
                for field_name in query_fields:
                    term_dict = {
                        "terms": {
                            field_name: other_key_word_term_list
                        }
                    }
                    other_key_word_should_list.append(term_dict)

                other_key_match_query = {
                    "multi_match": {
                        "query": other_key_word_term_list[0],
                        "fields": fields,
                        "analyzer": "gm_default_index",
                        "operator": "and",
                        "type": "best_fields"
                    }
                }
                other_key_word_should_list.append(other_key_match_query)
                function_score_item = {
                    "weight": 200,
                    "filter": {
                        "bool": {
                            "minimum_should_match": 1,
                            "should": other_key_word_should_list
                        }
                    }
                }
                q["query"]["function_score"]["functions"].append(function_score_item)

        # 排序规则部分
        sort_list = [
            {'is_sink': {'order': 'asc'}},  # 下沉的排后面
        ]

        if sort_type == DIARY_ORDER_TYPE.SEARCH_V1:
            sort_list += [
                {
                    "_script": {
                        "order": "desc",
                        "script": {
                            "inline": "_score+doc['offline_score'].value"
                        },
                        "type": "number"
                    }
                },
                {'last_update_time': {'order': 'desc'}},
                '_score'
            ]

        elif sort_type == DIARY_ORDER_TYPE.POPULARITY:  # ('5', '按热度排序')
            sort_list += [
                {'popularity': {'order': 'desc'}},
                '_score',
            ]
        elif sort_type == DIARY_ORDER_TYPE.LAST_UPDATE_TIME:  # ('6', '最后更新时间')
            sort_list += [
                {'last_update_time': {'order': 'desc'}},
                '_score',
            ]
        elif sort_type == DIARY_ORDER_TYPE.CREATE_TIME:  # ('11', '创建时间')
            sort_list += [
                {'created_time': {'order': 'desc'}},
                '_score',
            ]
        elif sort_type == DIARY_ORDER_TYPE.FEED_FILTER:  # (feed 实时过滤）
            sort_list[0] = {
                '_script': {
                    'lang': settings.ES_SCRIPT_LANG,
                    'script_file': "sort_diary-revert-is-online",
                    'type': 'number',
                    'order': 'desc',
                }
            }  # 下沉的结果二值化，下沉返回0

            sort_list += [{
                '_script': {
                    'lang': settings.ES_SCRIPT_LANG,
                    'script_file': "sort_diary-has-service",
                    'type': 'number',
                    'order': 'desc',
                }
            }]

        else:
            sort_list += [
                {
                    "_script": {
                        "order": "desc",
                        "script": {
                            "inline": "_score+doc['offline_score'].value"
                        },
                        "type": "number"
                    }
                },
                {'last_update_time': {'order': 'desc'}},
                '_score'
            ]

        # 过滤器部分
        f, nf = new_process_filters(filters=filters, nfilters={})
        q["filter"] = {
            "bool": {
                "must": f
            }
        }
        q['sort'] = sort_list
        # q['highlight'] = get_highlight(multi_fields.keys())
        q['highlight'] = get_highlight_query_analyzer(["title", "answer", "tags"], query)

        q['_source'] = {
            "include": ["id", "nearby_city_tags_v1", "service", "hospital", "doctor", "tags", "offline_score"],
            "exclude": ["nearby_city_tags_v1.name", "service.name", "hospital.name", "doctor.name",
                        "doctor.hospital.officer_name", "doctor.hospital.name", "doctor.officer_name",
                        "service.short_description"]
        }

        return q
    except:
        logger.error("catch exception,err_msg:%s" % traceback.format_exc())
        return dict()


def search_problem(query="", offset=0, size=100, sort_params=None, have_read_diary_list=list(), user_city_tag_id=-1):
    try:
        q = {
            "query": {
                "function_score": {
                    "query": {
                        "bool": {
                            "must": [
                                {"term": {"is_online": True}},
                                {"terms": {"content_level": [3, 3.5, 4, 5]}},
                                {
                                    "match_phrase": {
                                        "content": {
                                            "query": query,
                                            "analyzer": "gm_default_index"
                                        }
                                    }
                                }
                            ]
                        }
                    },
                    "score_mode": "sum",
                    "boost_mode": "replace",
                    "functions": [
                        {
                            "filter": {
                                "term": {
                                    "user.city_tag_id": user_city_tag_id
                                }
                            },
                            "weight": 1000
                        }
                    ]
                }
            },
            "_source": {
                "include": ["diary_id"]
            }
        }

        if len(have_read_diary_list) > 0:
            q["query"]["function_score"]["query"]["bool"]["must_not"] = {
                "terms": {
                    "diary_id": have_read_diary_list
                }
            }

        es = get_es()
        index = es_index_adapt(
            index_prefix=settings.ES_INDEX_PREFIX,
            doc_type='problem',
            rw='read'
        )
        res = es.search(
            index=index,
            doc_type='problem',
            timeout=settings.ES_SEARCH_TIMEOUT,
            body=q,
            from_=offset,
            size=size)

        diary_id_list = list()
        for item in res.get("hits")["hits"]:
            diary_id_list.append(item["_source"]["diary_id"])

        return diary_id_list
    except:
        logger.error("catch exception,err_msg:%s" % traceback.format_exc())
        return list()


def search_diary_alpha(
        query='',
        offset=0,
        size=5,
        sort_type=DIARY_ORDER_TYPE.DEFAULT,
        filters=None,
        sort_params=None,
        sort_diary_tag_first=False,
        search_again=False,
        device_id="null",
        is_answer=None,
        use_fresh_tag=False,
        have_read_diary_list=list(),
        only_get_query=False,
        closure_tags=[]
):
    try:
        # 参数验证
        size = min(size, settings.COUNT_LIMIT)
        filters = filters or {}

        es = get_es()
        body = {
            'text': query,
            'analyzer': "gm_default_index"
        }
        res = es_indices_analyze(doc_type="diary", body=body, es=es)
        logging.info("analyzer res:%s" % str(res).encode("utf-8"))

        ori_query_list, graph_related_word_list = get_ori_query(query=query, device_id=device_id)
        wordrel_set, correct_term_list = get_wordrel_set(query)
        (total_query_term_list, complete_matching_term_list, other_key_word_term_list,
         other_key_word_term_str) = wordrel_some_list(res, wordrel_set, query)

        user_city_tag_id = -1
        if sort_params:
            user_city_tag_id = sort_params.get('user_city_tag_id', -1)
        if user_city_tag_id is None:
            user_city_tag_id = -1

        multi_fields = {
            # 'tags': 6,
            'doctor.name': 4,
            'doctor.hospital.name': 3,
            'doctor.hospital.officer_name': 3,
            'user.last_name': 1,
            'service.name': 3,
            # "title": 3
            # "answer": 2
        }

        if is_answer == True:
            multi_fields["answer"] = 2

        judge_correct_match_query_fields = ["service.name"]
        query_fields = ["doctor.name", "doctor.hospital.name", "doctor.hospital.officer_name", "user.last_name",
                        "service.name"]

        if use_fresh_tag:
            multi_fields["fresh_tags"] = 6
            query_fields.append("fresh_tags")
            judge_correct_match_query_fields.append("fresh_tags")
        else:
            multi_fields["tags"] = 6
            query_fields.append("tags")
            judge_correct_match_query_fields.append("tags")

        fields_weight_list = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()]

        total_match_query = {
            "multi_match": {
                "query": query,
                "fields": fields_weight_list,
                "analyzer": "gm_default_index",
                "operator": "and",
                "type": "cross_fields"
            }
        }

        q = dict()
        q["query"] = {
            "function_score": {
                "functions": [
                    {
                        "filter": {
                            "term": {
                                "diary_city": user_city_tag_id
                            }
                        },
                        "weight": 200
                    }
                ],
                "score_mode": "sum",
                "boost_mode": "replace"
            }
        }

        # length_sorted_total_query_term_list = sorted(total_query_term_list,key=lambda item:len(item),reverse=True)

        for query_item in total_query_term_list:
            for field_item in query_fields:
                term_dict = {
                    "filter": {
                        "term": {
                            field_item: query_item
                        }
                    },
                    "weight": 10
                }
                if query_item in other_key_word_term_list:
                    term_dict["weight"] = 50

                q["query"]["function_score"]["functions"].append(term_dict)

        q["query"]["function_score"]["query"] = {
            "bool": {
                "must": [
                    {
                        "term": {
                            "is_online": True
                        }
                    }
                ],
                "should": [
                    {
                        "multi_match": {
                            "query": query,
                            "fields": fields_weight_list,
                            "operator": "or",
                            "type": "best_fields",
                            "analyzer": "gm_default_index"
                        }
                    }
                ],
                "minimum_should_match": 1,
            }
        }
        for ori_query_term in ori_query_list:
            q["query"]["function_score"]["functions"] += [
                {
                    "weight": 5000,
                    "filter": {
                        "bool": {
                            "must": [
                                {
                                    "multi_match": {
                                        "query": ori_query_term,
                                        "fields": fields_weight_list,
                                        "analyzer": "gm_default_index",
                                        "operator": "and",
                                        "type": "cross_fields"
                                    }
                                }
                            ]
                        }
                    }
                },
                {
                    "weight": 6000,
                    "filter": {
                        "bool": {
                            "must": [
                                {
                                    "multi_match": {
                                        "query": ori_query_term,
                                        "fields": fields_weight_list,
                                        "analyzer": "gm_default_index",
                                        "operator": "and",
                                        "type": "cross_fields"
                                    }
                                },
                                {
                                    "term": {
                                        "diary_city": user_city_tag_id
                                    }
                                },
                                {
                                    "range": {"content_level": {"gte": "3"}}
                                }
                            ]
                        }
                    }
                }
            ]

        if len(correct_term_list) > 0:
            complete_matching_should_list = list()
            for field_name in judge_correct_match_query_fields:
                term_dict = {
                    "terms": {
                        field_name: correct_term_list
                    }
                }
                complete_matching_should_list.append(term_dict)

            complete_matching_should_list.append(total_match_query)
            same_city_high_quality_function_score_item = {
                "weight": 50000,
                "filter": {
                    "bool": {
                        "minimum_should_match": 1,
                        "should": complete_matching_should_list,
                        "must": [
                            {
                                "term": {
                                    "diary_city": user_city_tag_id
                                }
                            },
                            {
                                "range": {"content_level": {"gte": "3"}}
                            }
                        ]
                    }
                }
            }
            q["query"]["function_score"]["functions"].append(same_city_high_quality_function_score_item)
            function_score_item = {
                "weight": 20000,
                "filter": {
                    "bool": {
                        "minimum_should_match": 1,
                        "should": complete_matching_should_list
                    }
                }
            }
            q["query"]["function_score"]["functions"].append(function_score_item)

        if len(graph_related_word_list) > 0:
            # graph_related_query_tag_should_list = list()
            # for graph_word in graph_related_word_list:
            #     graph_related_query_tag_should_list.append(
            #         {
            #             "match_phrase": {
            #                 "tags": {
            #                     "analyzer": "gm_default_index",
            #                     "query": graph_word
            #                 }
            #             }
            #         }
            #     )
            q["query"]["function_score"]["query"]["bool"]["should"].append(
                {
                    "terms": {
                        "tags": graph_related_word_list
                    }
                }
            )

            same_city_graph_tag_word_score_item = {
                "weight": 6000,
                "filter": {
                    "bool": {
                        "minimum_should_match": 1,
                        "should": [
                            {
                                "terms": {
                                    "tags": graph_related_word_list
                                }
                            }
                        ],
                        "must": [
                            {
                                "term": {
                                    "diary_city": user_city_tag_id
                                }
                            },
                            {
                                "range": {"content_level": {"gte": "3"}}
                            }
                        ]
                    }
                }
            }
            # same_city_graph_tag_word_score_item["filter"]["bool"]["should"].extend(graph_related_query_tag_should_list)
            q["query"]["function_score"]["functions"].append(same_city_graph_tag_word_score_item)

            graph_tag_word_score_item = {
                "weight": 5000,
                "filter": {
                    "bool": {
                        "minimum_should_match": 1,
                        "should": [
                            {
                                "terms": {
                                    "tags": graph_related_word_list
                                }
                            }
                        ]
                    }
                }
            }
            # graph_tag_word_score_item["filter"]["bool"]["should"].extend(graph_related_query_tag_should_list)
            q["query"]["function_score"]["functions"].append(graph_tag_word_score_item)

        # if len(synonym_term_list) > 0:
        #     synonym_should_list = list()
        #     for field_name in query_fields:
        #         term_dict = {
        #             "terms": {
        #                 field_name: synonym_term_list
        #             }
        #         }
        #         synonym_should_list.append(term_dict)
        #     function_score_item = {
        #         "weight": 700,
        #         "filter": {
        #             "bool": {
        #                 "minimum_should_match": 1,
        #                 "should": synonym_should_list
        #             }
        #         }
        #     }
        #     q["query"]["function_score"]["functions"].append(function_score_item)

        # 暂时注视掉
        if len(other_key_word_term_list) > 0:
            other_key_word_should_list = list()
            for field_name in query_fields:
                term_dict = {
                    "terms": {
                        field_name: other_key_word_term_list
                    }
                }
                other_key_word_should_list.append(term_dict)
            function_score_item = {
                "weight": 200,
                "filter": {
                    "bool": {
                        "minimum_should_match": 1,
                        "should": other_key_word_should_list
                    }
                }
            }
            q["query"]["function_score"]["functions"].append(function_score_item)

        if len(have_read_diary_list) > 0:
            q["query"]["function_score"]["query"]["bool"]["must_not"] = [
                {
                    "terms": {
                        "id": have_read_diary_list
                    }
                }
            ]

        # 排序规则部分
        sort_list = [
            {'is_sink': {'order': 'asc'}},  # 下沉的排后面
        ]

        if sort_type == DIARY_ORDER_TYPE.SEARCH_V1:
            # sort_list += [
            #     {'_score': {"order": "desc"}},
            #     {'has_service': {'order': 'desc'}},
            #     {'content_level_is_good': {'order': 'desc'}},
            #     {'popularity': {'order': 'desc'}},
            #     {'last_update_time': {'order': 'desc'}},
            # ]
            sort_list += [
                {
                    "_script": {
                        "order": "desc",
                        "script": {
                            "inline": "_score+doc['offline_score'].value"
                        },
                        "type": "number"
                    }
                },
                '_score',
                {'last_update_time': {'order': 'desc'}}
            ]

        elif sort_type == DIARY_ORDER_TYPE.POPULARITY:  # ('5', '按热度排序')
            sort_list += [
                {'popularity': {'order': 'desc'}},
                '_score',
            ]
        elif sort_type == DIARY_ORDER_TYPE.LAST_UPDATE_TIME:  # ('6', '最后更新时间')
            sort_list += [
                {'last_update_time': {'order': 'desc'}},
                '_score',
            ]
        elif sort_type == DIARY_ORDER_TYPE.CREATE_TIME:  # ('11', '创建时间')
            sort_list += [
                {'created_time': {'order': 'desc'}},
                '_score',
            ]
        elif sort_type == DIARY_ORDER_TYPE.FEED_FILTER:  # (feed 实时过滤）
            sort_list[0] = {
                '_script': {
                    'lang': settings.ES_SCRIPT_LANG,
                    'script_file': "sort_diary-revert-is-online",
                    'type': 'number',
                    'order': 'desc',
                }
            }  # 下沉的结果二值化，下沉返回0

            sort_list += [{
                '_script': {
                    'lang': settings.ES_SCRIPT_LANG,
                    'script_file': "sort_diary-has-service",
                    'type': 'number',
                    'order': 'desc',
                }
            }]

        else:
            # sort_list += [
            #     {'_score': {"order": "desc"}},
            #     {'is_headline': {'order': 'desc'}},
            #     {'is_identification': {'order': 'desc'}},
            #     {'has_order': {'order': 'desc'}},
            #     {'has_service': {'order': 'desc'}},
            #     {'is_elite': {'order': 'desc'}},
            #     {'popularity': {'order': 'desc'}},
            #     {'last_update_time': {'order': 'desc'}},
            # ]
            sort_list += [
                {
                    "_script": {
                        "order": "desc",
                        "script": {
                            "inline": "_score+doc['offline_score'].value"
                        },
                        "type": "number"
                    }
                },
                {'last_update_time': {'order': 'desc'}},
                '_score'
            ]

        # 过滤器部分offline
        f, nf = new_process_filters(filters=filters, nfilters={})
        q["filter"] = {
            "bool": {
                "must": f
            }
        }
        q['sort'] = sort_list
        # q['highlight'] = get_highlight(multi_fields.keys())
        q['highlight'] = get_highlight_query_analyzer(["title", "answer", "tags"], query)

        q['_source'] = {
            "include": ["id", "nearby_city_tags_v1", "service", "hospital", "doctor", "tags", "offline_score"],
            "exclude": ["nearby_city_tags_v1.name", "service.name", "hospital.name", "doctor.name",
                        "doctor.hospital.officer_name", "doctor.hospital.name", "doctor.officer_name",
                        "service.short_description"]
        }

        logging.info("duan add,search_diary_alpha query is:%s" % str(q).encode("utf-8"))

        if only_get_query:
            return q

        es = get_es()
        index = es_index_adapt(
            index_prefix=settings.ES_INDEX_PREFIX,
            doc_type='diary',
            rw='read'
        )
        res = es.search(
            index=index,
            doc_type='diary',
            timeout=settings.ES_SEARCH_TIMEOUT,
            body=q,
            from_=offset,
            size=size)

        # 新增字段 nearby_city_tags_list
        for item in res.get("hits")["hits"]:
            item["_source"]["nearby_city_tags_list"] = []
            if "nearby_city_tags_v1" in item["_source"]:
                item["_source"]["nearby_city_tags_list"] = [i["tag_id"] for i in item["_source"]["nearby_city_tags_v1"]]
            item["_source"]["nearby_city_tags_v1"] = []
        return res
    except:
        logger.error("catch exception,err_msg:%s" % traceback.format_exc())
        return dict()


# 将原先gaia代码合并过来
def search_diary(
        query='',
        offset=0,
        size=5,
        sort_type=DIARY_ORDER_TYPE.DEFAULT,
        filters=None,
        sort_params=None,
        sort_diary_tag_first=False,
        search_again=False,
        device_id="null",
        is_answer=None,
        have_read_diary_list=[],
        use_fresh_tag=False,
        only_get_query=False
):
    """
    @param query: 搜索词
    @param offset: 偏移量
    @param size: 返回个数
    @param sort_type: 排序方式[POPULARITY<热度>, LAST_UPDATE_TIME<最近更新>]
    @param filters: 筛选器{"province_tag_id":<省份tag id>, "bodypart_tag_id":<一级tag id>, "bodypart_subitem_tag_id":<二级tag id>}
    @params sort_params: 筛选器排序参数
    @param sort_diary_tag_fist: 该query是否为1，2，3级标签

    日记本搜索
    搜索域:[
        1.医生名字
        2.项目(三级tag)
        ]
    默认排序:[
        本地在前
        匹配度
        展示顺序(ordering)，小的在前
        最后上架时间，新的在前
        ]
    其它排序:[
        销量，从高到低
        价格，更美价从低到高
        最新上架，上架时间从新到旧
        ]
    """

    experiment_device_list = [
        "AB20292B-5D15-4C44-9429-1C2FF5ED26F6",
        "802C5FDC-5DC6-42D0-8F6F-2DBE200BB21B",
        "358035085192742",
        "29548727-8242-4D58-8151-F603F975BB98",
        "B2F0665E-4375-4169-8FE3-8A26A1CFE248",
        "863455037703008",
        "65EC6C14-1AD6-44C2-AED2-C41452284E91"
    ]

    # 参数验证
    size = min(size, settings.COUNT_LIMIT)
    filters = filters or {}

    filtered = {}
    multi_fields = {
        # 'tags': 8,
        'doctor.name': 4,
        'doctor.hospital.name': 3,
        'doctor.hospital.officer_name': 3,
        'user.last_name': 2,
        'service.name': 1
        # "title": 2
        # "answer": 2

    }

    if is_answer == True:
        multi_fields["answer"] = 2

    if use_fresh_tag:
        multi_fields["fresh_tags"] = 8
    else:
        multi_fields["tags"] = 8

    if query:  # 如果query有值 则match query词 @kula
        # 搜索关键字部分
        # 搜索域
        fields = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()]

        multi_match = {
            'query': query,
            'type': 'cross_fields',
            'operator': 'and',
            'fields': fields,
        }
        if search_again:
            multi_match["operator"] = "or"

        filtered['query'] = {
            'multi_match': multi_match,
        }

    # 过滤器部分
    f, nf = new_process_filters(filters=filters, nfilters={})
    filtered['filter'] = {'bool': {'must': f}}
    if len(have_read_diary_list) > 0:
        filtered['filter']['bool']['must_not'] = {"terms": {"id": have_read_diary_list}}
    q = {
        'query': {'filtered': filtered}
    }

    if sort_type == DIARY_ORDER_TYPE.FEED_FILTER:
        del q['query']['filtered']['query']

    # 排序规则部分
    sort_list = [
        {'is_sink': {'order': 'asc'}},  # 下沉的排后面
    ]
    # 机构罚单下沉
    sort_list += [
        {
            '_script': {
                'lang': settings.ES_SCRIPT_LANG,
                'script_file': 'sort_diary-sink-by-org',
                'type': 'number',
                'order': 'desc',
            }
        }
    ]

    # if Tag.objects.filter(name=query).filter(tag_type__in=[TAG_TYPE.BODY_PART, TAG_TYPE.BODY_PART_SUB_ITEM, TAG_TYPE.ITEM_WIKI]):
    if sort_diary_tag_first:
        sort_list += [
            {
                '_script': {
                    'lang': settings.ES_SCRIPT_LANG,
                    'script_file': 'sort_diary-tag-first',
                    'type': 'number',
                    'params': {
                        'query': query,
                    },
                    'order': 'desc',
                }
            }
        ]

    if sort_type == DIARY_ORDER_TYPE.SEARCH_V1:
        if "user_city_tag_id" in sort_params and not sort_params["user_city_tag_id"]:
            sort_params["user_city_tag_id"] = -1
        sort_list += [
            {'_script': {
                'lang': settings.ES_SCRIPT_LANG,
                'script_file': 'sort_diary-search-v3',
                'type': 'number',
                'params': {
                    'user_city_tag_id': sort_params.get('user_city_tag_id', -1),
                    'in_whitelist': sort_params.get('in_whitelist', 0)
                },
                'order': 'desc',
            }}]

        if device_id in experiment_device_list:
            try:
                sort_list += [
                    {'ltr_score': {"order": "desc"}},
                ]
            except:
                logger.error("judge device id error,err_msg:%s" % traceback.format_exc())
                pass

        sort_list += [
            {'_score': {"order": "desc"}},
            {'has_service': {'order': 'desc'}},
            {'content_level_is_good': {'order': 'desc'}},
            {'popularity': {'order': 'desc'}},
            {'last_update_time': {'order': 'desc'}},
        ]


    elif sort_type == DIARY_ORDER_TYPE.POPULARITY:  # ('5', '按热度排序')
        sort_list += [
            {'popularity': {'order': 'desc'}},
            '_score',
        ]
    elif sort_type == DIARY_ORDER_TYPE.LAST_UPDATE_TIME:  # ('6', '最后更新时间')
        sort_list += [
            {'last_update_time': {'order': 'desc'}},
            '_score',
        ]
    elif sort_type == DIARY_ORDER_TYPE.CREATE_TIME:  # ('11', '创建时间')
        sort_list += [
            {'created_time': {'order': 'desc'}},
            '_score',
        ]
    elif sort_type == DIARY_ORDER_TYPE.FEED_FILTER:  # (feed 实时过滤）
        sort_list[0] = {
            '_script': {
                'lang': settings.ES_SCRIPT_LANG,
                'script_file': "sort_diary-revert-is-online",
                'type': 'number',
                'order': 'desc',
            }
        }  # 下沉的结果二值化，下沉返回0

        sort_list += [{
            '_script': {
                'lang': settings.ES_SCRIPT_LANG,
                'script_file': "sort_diary-has-service",
                'type': 'number',
                'order': 'desc',
            }
        }]

    else:
        if device_id in experiment_device_list:
            try:
                sort_list += [
                    {'ltr_score': {"order": "desc"}},
                ]
            except:
                logger.error("judge device id error,err_msg:%s" % traceback.format_exc())
                pass
        sort_list += [
            {'_score': {"order": "desc"}},
            {'is_headline': {'order': 'desc'}},
            {'is_identification': {'order': 'desc'}},
            {'has_order': {'order': 'desc'}},
            {'has_service': {'order': 'desc'}},
            {'is_elite': {'order': 'desc'}},
            {'popularity': {'order': 'desc'}},
            {'last_update_time': {'order': 'desc'}},

        ]

    q['sort'] = sort_list
    # 高亮部分
    # q['highlight'] = get_highlight(multi_fields.keys())
    q['highlight'] = get_highlight_query_analyzer(["title", "answer", "tags"], query)

    q['_source'] = {
        "include": ["id", "nearby_city_tags_v1", "service", "hospital", "doctor", "tags"],
        "exclude": ["nearby_city_tags_v1.name", "service.name", "hospital.name", "doctor.name",
                    "doctor.hospital.officer_name", "doctor.hospital.name", "doctor.officer_name",
                    "service.short_description"]

    }

    if only_get_query:
        return q

    es = get_es()
    index = es_index_adapt(
        index_prefix=settings.ES_INDEX_PREFIX,
        doc_type='diary',
        rw='read'
    )
    res = es.search(
        index=index,
        doc_type='diary',
        timeout=settings.ES_SEARCH_TIMEOUT,
        body=q,
        from_=offset,
        size=size)

    '''    
    {  
      "hits": {  
        "hits":[ //结果列表
          {  
            "sort":[...], //排序用数值
            "_source": { //日记本文档, 参见mapping/diary.json
              "last_update_time":"2015-05-23T02:53:33+08:00", //注意日期值的返回值是字符串"YYYY-mm-ddTHH:ii:ss+ZZ",需要datetime时要自己转换
            },
            "highlight": { //高亮字段
              "tags":[...]
            },
            "_id":"903" //福利id(请自行转数字)
          }
        ],
        "total":125, //搜索结果总数
      },
    }
    '''

    # 新增字段 nearby_city_tags_list
    for item in res.get("hits")["hits"]:
        item["_source"]["nearby_city_tags_list"] = []
        if "nearby_city_tags_v1" in item["_source"]:
            item["_source"]["nearby_city_tags_list"] = [i["tag_id"] for i in item["_source"]["nearby_city_tags_v1"]]
        item["_source"]["nearby_city_tags_v1"] = []

    return res


def scatter(data, size, fields):
    if "doctor" not in fields:
        return data

    for item in data:
        item['id'] = item['_id']
        item['group'] = item.get('_source', {}).get('doctor', {}).get("id", None)

    data = variousness(data, size)
    return data[:size]


# 首页精选排序
def index_filter_diary(filters, offset=0, size=10):
    hits = filter_diary(
        offset=0,
        size=1000,
        # filters={'id':9438548},
        sort_params={},
        sort_type=DIARY_ORDER_TYPE.INDEX,
        filters=filters,
        interal_call=True
    )['hits']
    doctor_penalty_times = {}
    hospital_penalty_times = {}
    diarys_with_penlaty = []

    for hit in hits:
        source = hit['_source']
        if source['doctor']['is_authenticated']:
            doctor_id = source['doctor']['id']
            if doctor_id in doctor_penalty_times:
                doctor_penalty_times[doctor_id]['times'] = doctor_penalty_times[doctor_id]['times'] + 1
            else:
                doctor_penalty_times[doctor_id] = {'times': 1}

            if 'hospital' in source['doctor']:
                hospital_id = source['doctor']['hospital'].get('id')
            else:
                hospital_id = ''  # fake hospital ID, shouldn't be in the first 1000 diaries

            if hospital_id in hospital_penalty_times:
                hospital_penalty_times[hospital_id]['times'] = hospital_penalty_times[hospital_id]['times'] + 1
            else:
                hospital_penalty_times[hospital_id] = {'times': 1}

            original_score = hit['sort'][1]
            penalty_score = _get_penalty_score(doctor_penalty_times[doctor_id]['times'],
                                               hospital_penalty_times[hospital_id]['times'], original_score)
            # if 'user_city_tag_id' in sort_params:
            #    user_city_tag =  sort_params['user_city_tag_id']
            final_score = original_score - penalty_score
            diarys_with_penlaty.append({
                'id': source['id'],
                'original_score': original_score,
                'penalty_score': penalty_score,
                'final_score': final_score,
            })
        else:
            diarys_with_penlaty.append({
                'id': source['id'],
                'original_score': hit['sort'][1],
                'penalty_score': 0,
                'final_score': hit['sort'][1],
            })
            # DiaryRank.objects.filter(diary_id=source['id']).update(penalty_score=penalty_score)

    diarys_with_penlaty.sort(key=lambda x: x['final_score'], reverse=True)
    hitids = [diary['id'] for diary in diarys_with_penlaty][offset:offset + size]
    return hitids


# todo
# 取美购列表中美购关联的日记本
# 排序按照首页精选排序
@bind('doris/search/get_diaries_by_special_id')
def get_diaries_by_special_id(special_id, offset=0, size=10):
    filters = {"special_id": special_id}
    hitids = index_filter_diary(filters, offset, size)
    return {'diaries': hitids}


# todo
# 取美购列表中美购关联的日记本
# 排序按照首页精选排序
@bind('doris/search/get_index_diaries')
def get_index_diaries(offset=0, size=10):
    hitids = index_filter_diary({}, offset, size)
    return {'diaries': hitids}


# 将gaia 请求es日记数据代码合并过来
@bind("doris/search/query_diary")
def query_diary(
        query='',
        offset=0,
        size=5,
        sort_type=DIARY_ORDER_TYPE.DEFAULT,
        filters=None,
        sort_params=None,
        sort_diary_tag_first=False,
        device_id="null",
        is_answer=None,
        have_read_diary_list=[],
        use_fresh_tag=False,
        need_diary_cpc_num=0
):
    cpc_diary_list = list()
    if need_diary_cpc_num > 0:
        user_city_id = -1
        if "user_city_tag_id" in sort_params and sort_params["user_city_tag_id"] is not None:
            user_city_id = sort_params["user_city_tag_id"]
        cpc_diary_list = get_cpc_diary(query=query, device_id=device_id, city_id=user_city_id,
                                       cpc_diary_num=need_diary_cpc_num)

    # have_read_diary_list.extend(cpc_diary_list)
    # res = search_diary(query, offset, size, sort_type, filters, sort_params, sort_diary_tag_first, False, device_id,
    #                   is_answer, have_read_diary_list=have_read_diary_list, use_fresh_tag=use_fresh_tag)
    res = search_diary_alpha(query=query, offset=offset, size=size, filters=filters, sort_params=sort_params,
                             sort_diary_tag_first=sort_diary_tag_first,
                             device_id=device_id, search_again=True, sort_type=sort_type, is_answer=is_answer,
                             have_read_diary_list=cpc_diary_list, use_fresh_tag=use_fresh_tag)

    # if len(res["hits"]["hits"]) == 0 or len(res["hits"]["hits"]) < size:
    #    res = search_diary(query, offset, size, sort_type, filters, sort_params, sort_diary_tag_first, True, device_id,
    #                       is_answer, have_read_diary_list=have_read_diary_list, use_fresh_tag=use_fresh_tag)

    #    logger.info("diary query word:%s recall again,sort_type:%s,filters:%s,sort_params:%s,hits len:%d",
    #                str(query).encode("utf-8"), str(sort_type).encode("utf-8"), str(filters).encode("utf-8"),
    #                str(sort_params).encode("utf-8"), len(res["hits"]["hits"]))

    if need_diary_cpc_num > 0:
        res["cpc_diary_list"] = cpc_diary_list

    return res


@bind("doris/search/query_diary_alpha")
def query_diary_alpha(
        query='',
        offset=0,
        size=5,
        sort_type=DIARY_ORDER_TYPE.DEFAULT,
        filters=None,
        sort_params=None,
        sort_diary_tag_first=False,
        device_id="null",
        is_answer=None,
        use_fresh_tag=False,
        need_diary_cpc_num=0
):
    res = query_diary(query=query, offset=offset, size=size, sort_type=sort_type, filters=filters,
                      sort_params=sort_params,
                      sort_diary_tag_first=sort_diary_tag_first, device_id=device_id, is_answer=is_answer,
                      use_fresh_tag=use_fresh_tag, need_diary_cpc_num=need_diary_cpc_num)
    # res = search_diary_alpha(query=query, offset=offset, size=size, filters=filters, sort_params=sort_params,sort_diary_tag_first=sort_diary_tag_first,
    #                                  device_id=device_id, search_again=True, sort_type=sort_type,is_answer=is_answer,use_fresh_tag=use_fresh_tag)
    return res


# 将gaia 请求es日记数据代码合并过来
@bind("doris/search/query_filter_diary")
def query_filter_diary(
        offset=0,
        size=5,
        sort_type=DIARY_ORDER_TYPE.LAST_UPDATE_TIME,
        filters=None,
        nfilters=None,
        sort_params=None,
        expose_total=False,
        interal_call=False,
        filter_invalid_picture=False,
        fields=[], use_fresh_tag=False,
        device_id='', source_type=0
):
    return new_filter_diary(offset, size, sort_type, filters, nfilters, sort_params, expose_total, interal_call,
                            filter_invalid_picture, fields, use_fresh_tag, device_id, source_type)


@bind("doris/search/diaries")
def get_diaries(query='', offset=0, size=10, user_city_tag_id=None, sort_type=DIARY_ORDER_TYPE.DEFAULT, filters={}):
    sort_params = {}
    if user_city_tag_id:
        sort_params["user_city_tag_id"] = user_city_tag_id

    total_recall_num = (offset + size) + size * 10
    diaries = recall_diary(query, 0, total_recall_num, filters, sort_type, sort_params)

    cur_page_hospital_dict = dict()
    break_up_id_dict = dict()

    cur_index = 0
    for diary in diaries:
        if 'hospital' not in diary or 'id' not in diary['hospital']:
            while cur_index in break_up_id_dict:
                cur_index += 1
            break_up_id_dict[cur_index] = diary['id']
            cur_index += 1
        elif diary['hospital']['id'] not in cur_page_hospital_dict:
            while cur_index in break_up_id_dict:
                cur_index += 1
            break_up_id_dict[cur_index] = diary['id']
            cur_page_hospital_dict[diary['hospital']['id']] = 1
            cur_index += 1
        else:
            new_index = cur_index + size * cur_page_hospital_dict[diary['hospital']['id']]
            while new_index in break_up_id_dict:
                new_index += 1
            break_up_id_dict[new_index] = diary['id']
            cur_page_hospital_dict[diary['hospital']['id']] += 1

        if cur_index % 10 == 0:
            cur_page_hospital_dict.clear()

    sorted_diary_id_key_list = sorted(break_up_id_dict)

    diaries_ids_keys = list()
    if len(sorted_diary_id_key_list) > offset + size:
        diaries_ids_keys = sorted_diary_id_key_list[offset:offset + size]
    elif len(sorted_diary_id_key_list) > offset:
        diaries_ids_keys = sorted_diary_id_key_list[offset:]

    diaries_ids = list()
    for id_key in diaries_ids_keys:
        diaries_ids.append(break_up_id_dict[id_key])
    return {"diaries_ids": diaries_ids}
