# -*- coding:utf-8 -*-
# author:gm
# mail: zhangguodong@igengmei.com
# datetime:2020/4/24 3:32 下午
# software: PyCharm
from preprocesser.processors import token_processor
from preprocesser.filter import stopwords_filter
from collections import Counter
from config import config
import os
import codecs


class SELECTED_CONTENT_TYPE():
    BEAUTY_PROJECT = (1, "医美项目")
    BEAUTY_STAR = (2, "明星医美")
    BEAUTY_CELEBRITY = (3, "网红医美")
    STAR_GOSSIP = (4, "明星八卦")
    CELEBRITY_GOSSIP = (5, "网红八卦")


class TextClassifical(object):
    def __init__(self, network_influencer_path, project_path, star_path):
        self.network_influencer_words = self.build_network_influencer_words(network_influencer_path)
        self.project_words = self.build_project_words(project_path)
        self.star_words = self.build_star_words(star_path)
        self.tokenprocessor = token_processor
        self.stopwords_filter = stopwords_filter

    def build_network_influencer_words(self, word_path):
        ret = {}
        for line in codecs.open(word_path, "r", errors="ignore"):
            line = line.strip()
            ret[line] = 1
        return ret

    def build_project_words(self, project_path):
        ret = {}
        for line in codecs.open(project_path, "r", errors="ignore"):
            line = line.strip()
            ret[line] = 1
        return ret

    def build_star_words(self, star_path):
        ret = {}
        for line in codecs.open(star_path, "r", errors="ignore"):
            line = line.strip()
            ret[line] = 1
        return ret

    def run(self, content):
        ret = {
            "content_type": -1,
            "star": [],
            "celebrity": [],
            "projects": []
        }
        words = self.tokenprocessor.lcut(content, cut_all=True)
        words = stopwords_filter.filter(words)
        netword_influencer_concurrence = set(words) & set(self.network_influencer_words)
        project_word_concurrence = set(words) & set(self.project_words)
        star_words_concurrence = set(words) & set(self.star_words)
        counter = Counter(words)
        content_type, words_proba = self.predict(counter, netword_influencer_concurrence, project_word_concurrence,
                                                 star_words_concurrence)
        ret["content_type"] = content_type
        ret["star"].extend([{word: words_proba[2].get(word, 0.0)} for word in list(star_words_concurrence)])
        ret["celebrity"].extend(
            [{word: words_proba[0].get(word, 0.0)} for word in list(netword_influencer_concurrence)])
        ret["projects"].extend(
            [{word: words_proba[1].get(word, 0.0)} for word in list(project_word_concurrence)])
        return ret

    def score(self, counter, concurrence_words):
        pass

    def predict(self, counter, netword_influencer_concurrence, project_word_concurrence, star_words_concurrence):
        words_proba = []
        net_influencer_total = sum([counter[word] * 2 for word in netword_influencer_concurrence])
        net_influencer_proba = {word: float(counter[word] * 2) / net_influencer_total for word in
                                netword_influencer_concurrence}
        words_proba.append(net_influencer_proba)
        project_words_total = sum([counter[word] for word in project_word_concurrence])
        project_words_proba = {word: float(counter[word]) / project_words_total for word in project_word_concurrence}
        words_proba.append(project_words_proba)
        star_words_total = sum([counter[word] * 2 for word in star_words_concurrence])
        star_words_proba = {word: float(counter[word] * 2) / star_words_total for word in star_words_concurrence}
        words_proba.append(star_words_proba)
        total_word = sum([net_influencer_total, project_words_total, star_words_total])
        if total_word <= 0:
            return -1, words_proba
        each_proba = [float(item) / total_word for item in
                      [net_influencer_total, project_words_total, star_words_total]]
        if each_proba[1] <= 0 and each_proba[2] >= each_proba[0]:
            return SELECTED_CONTENT_TYPE.STAR_GOSSIP[0], words_proba
        elif each_proba[1] <= 0 and each_proba[2] < each_proba[0]:
            return SELECTED_CONTENT_TYPE[0], words_proba
        elif each_proba[1] > 0.75:
            return SELECTED_CONTENT_TYPE.BEAUTY_PROJECT[0], words_proba
        elif each_proba[0] > each_proba[2]:
            return SELECTED_CONTENT_TYPE.BEAUTY_CELEBRITY[0], words_proba
        else:
            return SELECTED_CONTENT_TYPE.BEAUTY_STAR[0], words_proba


root_path = "/".join(str(__file__).split("/")[:-3])
model = TextClassifical(os.path.join(root_path, config.network_influcer_dic),
                        os.path.join(root_path, config.projects_dic), os.path.join(root_path, config.star_dic))
