# -*- coding: utf-8 -*-
from multiprocessing import Manager, Pool
import math
import time
from django.conf import settings
from django.core.management import BaseCommand

from api.tool.tag3_tool import CONTENT_TABLE_MAP, TagMapTool, CACHE_KEY, HISTORY_RECORD
from rpc.cache import tag_map_tag3_record


def get_relation_write_db(queue, content_type):
    tag_model = TagMapTool.current_tag_model(content_type)
    start_id = queue.get()
    content_ids = TagMapTool.get_content_ids(content_type, start_id)
    if not content_ids:
        return
    current_max_id = max(content_ids)
    queue.put(current_max_id)
    tag_map_info = TagMapTool.get_tag_map_result(content_ids, content_type)
    objects = []
    for content in tag_map_info:
        content_id = content.get('content_id')
        new_tag_ids = content.get('tags_info', {}).get('project_tags', [])
        if not all([content_id, new_tag_ids]):
            continue

        create_info = [[content_id, tag_id] for tag_id in new_tag_ids]
        content_objects = TagMapTool.create_info(create_info, content_type)
        objects.extend(content_objects)
    if objects:
        tag_model.objects.bulk_create(objects)
    print('内容类型:{}, 当前id入队列:{}'.format(content_type, current_max_id))
    cache_key = CACHE_KEY.format(content_type=content_type)
    tag_map_tag3_record.set(cache_key, current_max_id)      # 缓存当前已经处理的最大ID，设为下次的起始点


class Command(BaseCommand):
    """
    python django_manage.py service_new_tag --content_type=service
    内容-新标签映射 数据清洗
    """
    def add_arguments(self, parser):
        parser.add_argument(
            '--content_type',
            help=u'内容类型(单选), choice is diary/topic/question/answer/tractate ...'
        )

    def handle(self, *args, **options):
        t = time.time()
        content_type = options['content_type']
        model = CONTENT_TABLE_MAP.get(content_type)
        if not model:
            print(u'请输入正确参数')
            return
        max_id = TagMapTool.get_max_id(content_type)
        if not max_id:
            print(u'没有数据需要处理')
            return
        cache_key = CACHE_KEY.format(content_type=content_type)
        history_record = HISTORY_RECORD.format(content_type=content_type)
        tag_map_tag3_record.set(history_record, max_id)
        cached_max_id = int(tag_map_tag3_record.get(cache_key) or 0)     # 当前内容类型下已经处理过的数据最大ID值
        if cached_max_id >= max_id:
            print(u'没有数据需要处理')
            return

        queue = Manager().Queue(maxsize=8)
        queue.put(cached_max_id)    # 初始id
        print(u'当前{}执行ID起始值为:{}'.format(content_type, cached_max_id))

        args_list = []
        count = model.objects.using(settings.SLAVE_DB_NAME).filter(id__gte=cached_max_id, is_online=True).count()
        for _ in range(int(math.ceil(count / TagMapTool.BATCH_SIZE))):
            args_list.append((queue, content_type))

        pool = Pool(processes=4)
        for args in args_list:
            pool.apply(get_relation_write_db, args)
        pool.close()
        pool.join()
        print('Done! cost time:{} s'.format(time.time() - t))
