#coding:utf8
import json
import math
from multiprocessing import Pool, Manager

from bs4 import BeautifulSoup
from django.db.models import Max
from django import db
from django.conf import settings
from django.core.management import BaseCommand

from gm_types.mimas import SPAM_LABEL, SPAM_EVIL_FLAG, SPAM_SUGGEST, GRABBING_PLATFORM, CONTENT_CLASS

from qa.models import Answer, AnswerReply, Question, QualityAuthorAnswer, QualityReply
from utils.rpc import get_rpc_invoker
from utils.common import gm_decode_html


rpc_client = get_rpc_invoker()
limit = 500


def antispam_check(text):

    if not text:
        return SPAM_LABEL.NORMAL, []

    try:
        data = rpc_client['antispam/text/check'](text=text).unwrap()
    except:
        return SPAM_LABEL.NORMAL, []

    evil = data.get('evil', 0)
    suggest = data.get('suggest', "Normal")
    keywords = data.get('data', {}).get('Keywords', [])

    if evil == SPAM_EVIL_FLAG.NORMAL:
        return SPAM_LABEL.NORMAL, []

    if suggest == SPAM_SUGGEST.NORMAL:
        return SPAM_LABEL.NORMAL, []
    elif suggest == SPAM_SUGGEST.REVIEW:
        return SPAM_LABEL.SPAM, keywords
    elif suggest == SPAM_SUGGEST.BLOCK:
        return SPAM_LABEL.SPAM, keywords

    return SPAM_LABEL.NORMAL, []


def update_question(queue):

    start_id = queue.get()
    print(start_id)
    questions = Question.objects.using(settings.SLAVE_DB_NAME).filter(platform=GRABBING_PLATFORM.GM, pk__gt=start_id, is_online=True)[:limit]
    max_id = questions.aggregate(max_id=Max('id'))
    queue.put(max_id["max_id"])
    if not questions:
        return

    questions_ids = []
    for question in questions:

        soup = BeautifulSoup(question.content)
        content_text = soup.get_text().replace("\n", '').replace(" ", '')
        content = question.title + content_text
        lablel, keywords = antispam_check(content)
        if lablel == SPAM_LABEL.SPAM:
            questions_ids.append(question.id)
            print({"id": question.id, "content": question.title, "content": content, "keywords": keywords})

    if questions_ids:
        pass
        # Question.objects.filter(pk__in=questions_ids).update(is_online=False)


def update_answer(queue):

    start_id = queue.get()
    print(start_id)
    answers = Answer.objects.using(settings.SLAVE_DB_NAME).filter(level=CONTENT_CLASS.UNAUDITED, pk__gt=start_id, platform=GRABBING_PLATFORM.GM,is_online=True)[: limit]

    max_id = answers.aggregate(max_id=Max('id'))
    queue.put(max_id["max_id"])
    if not answers:
        return

    answer_ids = []
    for answer in answers:

        soup = BeautifulSoup(answer.content)
        content_text = soup.get_text().replace("\n", '').replace(" ", '')
        lablel, keywords = antispam_check(content_text)
        if lablel == SPAM_LABEL.SPAM:
            answer_ids.append(answer.id)
            print({"look": True, "id": answer.id, "content": answer.content, "keywords": keywords})
            print(json.dumps({"id": answer.id, "content": answer.content, "keywords": keywords}))

    if answer_ids:
        pass
        # Answer.objects.filter(pk__in=answer_ids).update(is_online=False)


def update_answer_reply(queue):

    start_id = queue.get()
    print(start_id)
    replies = AnswerReply.objects.using(settings.SLAVE_DB_NAME).filter(is_fake=False, pk__gt=start_id, is_online=True)[: limit]
    max_id = replies.aggregate(max_id=Max('id'))
    queue.put(max_id["max_id"])
    if not replies:
        return

    reply_ids = []
    for reply in replies:

        label, keywords = antispam_check(reply.content)
        if label == SPAM_LABEL.SPAM:
            reply_ids.append(reply.id)
            print(json.dumps({"id": reply.id, "content": reply.content, "keywords": keywords}))

    if reply_ids:
        pass
        # AnswerReply.objects.filter(pk__in=reply_ids).update(is_online=False)


class Command(BaseCommand):

    processes = 20

    def start(self, count, processor):

        queue = Manager().Queue(maxsize=self.processes)
        queue.put(0)  # 触发程序开始

        args_list = []
        cnt = int(math.ceil(count / limit))
        for _ in range(cnt):
            args_list.append((queue,))

        db.connections.close_all()
        pool = Pool(processes=self.processes)
        pool.starmap(processor, args_list)
        pool.close()
        pool.join()

    def start_question(self):

        count = Question.objects.using(settings.SLAVE_DB_NAME).filter(platform=GRABBING_PLATFORM.GM, is_online=True).count()
        self.start(count, update_question)

    def start_answer(self):

        count = Answer.objects.using(settings.SLAVE_DB_NAME).filter(level=CONTENT_CLASS.UNAUDITED, platform=GRABBING_PLATFORM.GM,is_online=True).count()
        self.start(count, update_answer)

    def start_answer_reply(self):

        count = AnswerReply.objects.using(settings.SLAVE_DB_NAME).filter(is_fake=False, is_online=True).count()
        self.start(count, update_answer_reply)

    def handle(self, *args, **options):

        self.start_question()

        self.start_answer()

        self.start_answer_reply()
