import logging
import jieba.posseg as pseg
from gm_types.mimas import TRACTATE_STATUS, PGC_TYPE
from talos.models.tractate.tractate import Tractate
from talos.services.user import UserService
from data_sync.utils import to_epoch, tzlc
from qa.models.toutiao import by_content_type_id_get_keywords, get_content_star_keywords, get_content_title_keywords, \
    get_content_star_first_keyword, has_service
from talos.services.convert_service.user_convert_service import UserConvertService
from gm_types.gaia import DOCTOR_TYPE, TAG_V3_TYPE, TAG_TYPE
from tags.services.tag import (get_tagv3_analysis_info, get_tag_v3_operators_tags, get_tag_v3_anecdote_tags,
                               get_tag_v3_anecdote_tag_ids, get_tag_v3_channel_tags_tags,
                               get_tag_v3_names_by_tag_v3_ids, get_gossip_tags, get_tag_v3_gossip_tag_ids)
from talos.models.tractate.vote import TractateVote
from talos.models.tractate.reply import TractateReply
from talos.services.tractate.tractate import TractateService
import time
import datetime
from django.conf import settings
import redis
import re
import traceback
from algorithm.text_classifical.base import model as alo_model
from gm_types.doris import SELECTED_CONTENT_TYPE


def get_tractate(pks):
    try:
        tractates = Tractate.objects.filter(id__in=pks)
        data = []
        user_ids = list(set(tractates.values_list("user_id", flat=True)))
        user_infos = UserService.get_users_by_user_ids(user_ids)
        tractate_all_tag_names = []
        for tractate in tractates:
            item = {}
            user = user_infos.get(tractate.user_id, None)
            user_name = user and user.nickname or ""
            cut_bool = False
            cut_word = [" 医生", " 机构"]
            for i in cut_word:
                if user_name.find(i) >= 0:
                    cut_bool = True
            if cut_bool == True:
                user_name = user_name.split()[0]

            item['id'] = tractate.id
            item["user_id"] = tractate.user_id
            sss = tractate.content.encode('utf-16', 'surrogatepass').decode('utf-16')
            dr = re.compile(r"<[^>]+>", re.S)
            str_re = dr.sub("", sss)
            item["content"] = str_re
            item['keynote_sentence'] = get_keynote_sentence(tractate.content) if int(
                float(tractate.content_level)) >= 3 else ""
            item["content_pre"] = ""
            item["is_online"] = tractate.is_online
            item["status"] = tractate.status
            item["platform"] = tractate.platform
            item["content_level"] = tractate.content_level
            item["is_excellent"] = tractate.is_excellent
            item["pgc_type"] = tractate.pgc_type
            item["create_time"] = tzlc(tractate.create_time)
            item['update_time_stratific'] = tractate.get_update_time_stratific(tractate.create_time)
            item["create_time_epoch"] = to_epoch(tzlc(tractate.create_time))
            item["last_modified"] = tzlc(tractate.last_modified)

            redis_client = redis.StrictRedis.from_url(settings.DORIS_URL)
            redis_name = "doris:tractate:community:update_time"
            if tractate.pgc_type == PGC_TYPE.COMMUNITY \
                    and item["is_online"] == True \
                    and int(item["content_level"]) >= 3:
                redis_client.zadd(redis_name, float(time.mktime(item["last_modified"].timetuple())), item["id"])
            else:
                redis_client.zrem(redis_name, item["id"])

            item["audit_time"] = tzlc(tractate.audit_time)
            item["tractate_score"] = tractate.get_tractate_score
            item['good_click'] = tractate.get_goodclick
            item['goodclick_rate_30'] = tractate.con_goodclick
            item["good_click_tractate_score"] = tractate.get_good_click_tractate_score
            tractate_tag_list = tractate.get_tag_list
            item["tractate_tag_list"] = tractate_tag_list
            item["post_time"] = tzlc(tractate.audit_time) if tractate.status == TRACTATE_STATUS.AUDIT_SUCCESS else None
            item["author"] = user_name
            tag_list = tractate.get_tag_list
            item["tractate_tag"] = tractate.get_tag(tag_list)
            tractate_tag_name = tractate.get_tag_names(tag_list)
            item["tractate_tag_name"] = tractate_tag_name
            item['portrait_tag_name'] = [t.get("tag_name", None) for t in item["tractate_tag"] if
                                         t.get("tag_type", 0) in (
                                             TAG_TYPE.BODY_PART, TAG_TYPE.BODY_PART_SUB_ITEM, TAG_TYPE.ITEM_WIKI)]
            item["is_video"] = tractate.get_is_video
            item["tractate_tag_name_content"] = tractate.get_tag_names_content(tag_list)
            item["hot_score"] = tractate.get_hot_score()
            item["vote_num"] = TractateVote.objects.filter(tractate_id=tractate.id, is_online=True).count()
            item["reply_num"] = TractateReply.objects.filter(tractate_id=tractate.id, is_online=True).count()
            item['reply_vote_radd'] = item["vote_num"] * 13 + item["reply_num"] * 17
            item["content_simi_bol_show"] = tractate.get_show()
            # 新标签
            fresh_tag_list = tractate.get_fresh_tag_list
            item["fresh_tractate_tag_list"] = fresh_tag_list
            item["fresh_tractate_tag_name"] = tractate.get_fresh_tag_names(fresh_tag_list)
            item["fresh_tractate_tag_name_content"] = tractate.get_tag_names_content(fresh_tag_list)
            item["last_any_reply_time"] = tzlc(tractate.get_tractate_last_any_reply_time())
            item["is_office"] = tractate.get_user_info_office()
            content_keyword = by_content_type_id_get_keywords(id=tractate.id, content_type="usertopic")
            item["content_keyword"] = content_keyword
            item["content_star_keyword"] = get_content_star_keywords(id=tractate.id, content_type="usertopic")
            item["content_star_first_keyword"] = get_content_star_first_keyword(id=tractate.id,
                                                                                content_type="usertopic")
            item["has_service"] = has_service(tractate_tag_list, content_keyword)
            item["user_type"] = get_user_type(tractate.user_id)
            item['has_picture'] = tractate.get_tractate_image

            (need_refresh_data, second_demands_list, second_solutions_list, second_positions_list,
             second_demands_ids_list,
             second_solutions_ids_list, second_positions_ids_list,
             first_demands_ids_list, first_solutions_ids_list, first_positions_ids_list, first_demands_list,
             first_solutions_list, first_positions_list,
             project_tags_list, project_tags_ids_list, first_classify_ids_list, first_classify_names_list,
             second_classify_ids_list, second_classify_names_list) = get_tagv3_analysis_info(content_id=item["id"],
                                                                                             content_type="tractate")
            if need_refresh_data:
                item["tags_v3"] = list(project_tags_list)
                item["first_demands"] = list(first_demands_list)
                item["second_demands"] = list(second_demands_list)
                item["first_solutions"] = list(first_solutions_list)
                item["second_solutions"] = list(second_solutions_list)
                item["positions"] = list(first_positions_list)
                item["second_positions"] = list(second_positions_list)

                item["tagv3_ids"] = list(project_tags_ids_list)
                item["first_demands_ids"] = list(first_demands_ids_list)
                item["second_demands_ids"] = list(second_demands_ids_list)
                item["first_solutions_ids"] = list(first_solutions_ids_list)
                item["second_solutions_ids"] = list(second_solutions_ids_list)
                item["first_positions_ids"] = list(first_positions_ids_list)
                item["second_positions_ids"] = list(second_positions_ids_list)

                item["first_classify_ids"] = list(first_classify_ids_list)
                item["first_classify_names"] = list(first_classify_names_list)
                item["second_classify_ids"] = list(second_classify_ids_list)
                item["second_classify_names"] = list(second_classify_names_list)
            else:
                item["tags_v3"] = []
                item["first_demands"] = []
                item["second_demands"] = []
                item["first_solutions"] = []
                item["second_solutions"] = []
                item["positions"] = []
                item["second_positions"] = []

                item["tagv3_ids"] = []
                item["first_demands_ids"] = []
                item["second_demands_ids"] = []
                item["first_solutions_ids"] = []
                item["second_solutions_ids"] = []
                item["first_positions_ids"] = []
                item["second_positions_ids"] = []

                item["first_classify_ids"] = []
                item["first_classify_names"] = []
                item["second_classify_ids"] = []
                item["second_classify_names"] = []

            ###新增字段同步新标签的运营标签
            item['operators_add_tags'] = get_tag_v3_operators_tags(content_id=tractate.id, content_type="tractate")
            item['channel_tags'] = get_tag_v3_channel_tags_tags(content_id=tractate.id, content_type="tractate")
            item['channel_tags_names'] = get_tag_v3_channel_tags_tags(content_id=tractate.id, content_type="tractate",
                                                                      get_names=True)
            item['anecdote_tags'] = get_tag_v3_anecdote_tags(content_id=tractate.id, content_type="tractate")
            item['anecdote_tag_ids'] = get_tag_v3_anecdote_tag_ids(content_id=tractate.id, content_type="tractate")
            item['new_smr'] = tractate.get_tractate_new_smart_rank_score(tractate_id=tractate.id)
            score = tractate.get_search_tractate_new_smart_rank_score(tractate_id=tractate.id)
            item['search_new_smr'] = score.get("smart_rank_score", 0)
            item['new_goodclicks'] = score.get('new_goodclick', 0)

            item["latest_interaction_time"] = tractate.get_tractate_latest_interaction_time(item["is_online"],
                                                                                            item["content_level"],
                                                                                            item["last_modified"])
            item['latest_create_or_reply_time'] = int(time.mktime(item['create_time'].timetuple()))

            if item['last_any_reply_time'] and item['last_any_reply_time'] > item["create_time"]:
                item['latest_create_or_reply_time'] = int(time.mktime(item['last_any_reply_time'].timetuple()))
            # 内容保量字段
            days_past_value = (tzlc(datetime.datetime.today()) - item['create_time']).days
            # if days_past_value < 7:
            redis_client = redis.StrictRedis.from_url(settings.DORIS_URL)
            redis_name_for_diary_exposure = "doris:content_exposure:tractate"
            redis_exposure_val = redis_client.hget(redis_name_for_diary_exposure, item["id"])
            if redis_exposure_val:
                redis_exposure_val = int(redis_exposure_val)
                if (str(item["content_level"]) == "6" and redis_exposure_val < 1000) \
                        or (str(item["content_level"]) == "5" and redis_exposure_val < 800) \
                        or (str(item["content_level"]) == "4" and redis_exposure_val < 500) \
                        or (str(item["content_level"]) == "3.5" and redis_exposure_val < 300) \
                        or (str(item["content_level"]) == "3" and redis_exposure_val < 200):
                    item["is_need_guarantee"] = True
                else:
                    item["is_need_guarantee"] = False
            elif days_past_value <= 1:  # 认为是当天新增内容，还没产生过曝光
                item["is_need_guarantee"] = True
            # else:  # 7天以上强制不保量
            #     item["is_need_guarantee"] = False
            # 过滤标签
            filter_tags_names = ["斩男心机妆", "今日look打卡", "少女心未泯", "颜值高光时刻", "美妆", "Get漫画迷人眼", "最显白口红推荐", "口红试色", "穿搭技巧",
                                 "眼妆教程", "腮红", "氧气笑容练成记", "口红试色", "假体隆胸", "自体脂肪隆胸", "胶原蛋白填充隆胸", "玻尿酸填充隆胸", "胸部假体取出",
                                 "胸部失败修复", "胸部下垂矫正", "胸部修复", "胸形美化", "丰胸(隆胸)", "胸部塑身", "玻尿酸隆胸", "假体隆胸", "胶原蛋白隆胸",
                                 "埋线隆胸", "自体脂肪隆胸", "胸部注射物取出", "胸部假体取出", "胸部假体取出", "胸部修复", "脂肪胶隆胸", "胸部整形", "隆胸", "胸部美化",
                                 "缩胸", "胸部提升", "生胸毛", "胸部护理", "综合隆胸", "植胸毛", "胸部修复", "隆胸修复", "胸部护理", "硅胶隆胸", "胸部护理",
                                 "胸部修复", "胸部手术", "胸部提升", "生胸毛", "美胸", "胸毛", "胸部", "胸形", "胸部假体", "胸部kyc", "自体脂肪丰胸",
                                 "泰国隆胸", "泰国假体隆胸","脂肪丰胸","复合式隆胸","隆胸假体","丰胸（隆胸）","美胸养成"]
            operators_add_tags_names = list()
            operators_add_tags_names = get_tag_v3_names_by_tag_v3_ids(item['operators_add_tags'])
            tractate_all_tag_names.extend(
                item["tractate_tag_name"] + item["tractate_tag_name_content"] + item["fresh_tractate_tag_name"] + item[
                    "fresh_tractate_tag_name_content"]
                + item["tags_v3"] + item["first_demands"] + item["second_demands"] + item["first_solutions"] + item[
                    "second_solutions"] + item["positions"] + item["second_positions"]
                + item['channel_tags_names'] + item['anecdote_tags'] + item["first_classify_names"] + item[
                    "second_classify_names"] + operators_add_tags_names)

            tags_inter = [item for item in list(set(tractate_all_tag_names)) if item in list(set(filter_tags_names))]
            # tags_inter = list(set(tractate_all_tag_names).intersection(set(filter_tags_names)))
            if tags_inter:
                item['show_by_index'] = 2  # 2 标签中含有过滤列表中的标签
            else:
                item['show_by_index'] = tractate.get_show_by_index(tractate.id)

            item["has_video"] = TractateService.has_video(tractate.id)
            item["is_gif"] = tractate.cover_is_dynamic  # 是否是动图
            # 判断是否有八卦标签
            gossip_tag_names = get_gossip_tags()
            gossip_inter = list(set(operators_add_tags_names).intersection(set(gossip_tag_names)))
            if gossip_inter:
                item["is_gossip"] = True
            else:
                item["is_gossip"] = False
            item["gossip_tag_ids"] = get_tag_v3_gossip_tag_ids(content_id=tractate.id, content_type="tractate")
            item["gossip_tags"] = get_tag_v3_gossip_tag_ids(content_id=tractate.id, content_type="tractate",
                                                            get_names=True)

            # 首页精选增加内容分类 明星列表和网红列表
            tagv4 = tractate.get_tractate_tagv4_names(tractate_id=tractate.id)
            if tagv4:
                tags_info = tractate.get_tag(list(tagv4))
                item['selected_stars'] = []
                item['selected_internet_celebrity'] = []
                all_tags = []
                for tag in tags_info:
                    all_tags.append(tag.get("id", None))
                    if tag.get("tag_type", 0) == TAG_TYPE.STAR:
                        item['selected_stars'].append(tag.get("name", None))
                    elif tag.get("tag_type", 0) == TAG_TYPE.INFLUENCER:
                        item['selected_internet_celebrity'].append(tag.get("name", None))
                    else:
                        pass

                    if tag.get("id") == 14288:
                        item['operators_add_tags'].append(14288)
                if 15928 in all_tags:
                    item['selected_content_type'] = SELECTED_CONTENT_TYPE.BEAUTY_STAR
                elif 10682 in all_tags:
                    item['selected_content_type'] = SELECTED_CONTENT_TYPE.STAR_GOSSIP
                elif 15930 in all_tags:
                    item['selected_content_type'] = SELECTED_CONTENT_TYPE.BEAUTY_CELEBRITY
                elif 10683 in all_tags:
                    item['selected_content_type'] = SELECTED_CONTENT_TYPE.CELEBRITY_GOSSIP
                else:
                    item['selected_content_type'] = -1
            else:
                try:
                    result_data = alo_model.run(tractate.content)
                    item['selected_content_type'] = int(result_data.get("content_type", 0))
                    item['selected_internet_celebrity'] = [list(item.keys())[0] for item in
                                                           result_data.get("celebrity", [])]
                    item['selected_stars'] = [list(item.keys())[0] for item in result_data.get("star", [])]
                    projects = [list(item.keys())[0] for item in result_data.get("projects", [])]
                    item['portrait_tag_name'].extend(projects)
                except:
                    pass

            if "operators_add_tags" in item and 3315 in item['operators_add_tags']:
                item['new_smr'] = tractate.get_tractate_newuser_smr(tractate_id=tractate.id)

            data.append(item)
        logging.info("get data:%s" % data)
        return data
    except:
        logging.error("catch exception,logins:%s" % traceback.format_exc())
        return []


def get_user_type(user_id):
    """
    0:doctor
    1:office
    2:daren
    3:putong

    :param user_id:
    :return:
    """
    try:
        user_info = UserConvertService.get_user_info_by_user_id(user_id=user_id)
        if user_info.get("doctor_id", None) and user_info["doctor_type"] == DOCTOR_TYPE.DOCTOR:
            user_type = 0
        elif user_info.get("hospital_id", None) and user_info["doctor_type"] == DOCTOR_TYPE.OFFICER:
            user_type = 1
        elif "membership_level" in user_info and user_info["membership_level"] != "0":
            user_type = 2
        else:
            user_type = 3

        return user_type
    except:
        return 4


def get_keynote_sentence(content):
    try:
        content_list = []
        ss = content.encode('utf-16', 'surrogatepass').decode('utf-16')
        dr = re.compile(r"<[^>]+>", re.S)
        str_re = dr.sub("", ss)
        para = re.sub('([；。！？\?])([^”’])', r"\1\n\2", str_re)  # 单字符断句符
        para = re.sub('(\.{6})([^”’])', r"\1\n\2", para)  # 英文省略号
        para = re.sub('(\…{2})([^”’])', r"\1\n\2", para)  # 中文省略号
        para = re.sub('([；。！？\?][”’])([^，。！？\?])', r'\1\n\2', para)
        para = para.rstrip()  # 段尾如果有多余的\n就去掉它
        for con in para.split("\n"):  ##切分文章成一句一句的内容
            r = '[’!"#$%&\'()*+,-./:;=?@[\\]^_`{|}~。？，]+'
            cos = con.lstrip(r)
            content_list.append(cos)
        return content_list
    except:
        logging.error("catch exception,logins:%s" % traceback.format_exc())
        return []