#!/usr/bin/env python
# -*- coding: utf-8 -*-

from django.conf import settings
from django.core.management.base import BaseCommand, CommandError


import os
import six
import sys

import multiprocessing
import multiprocessing.queues

from search.utils.es import get_es
from injection.utils.table_scan import TableSlicer, TableSlicerChunk, TableStreamingSlicer
from trans2es.type_info import TypeInfo, get_type_info_map
import logging

logger = logging.getLogger('async_debug')

class Job(object):
    __es = None

    def __init__(self, index_prefix, type_name, chunk):
        assert isinstance(index_prefix, six.string_types)
        assert isinstance(type_name, six.string_types)
        assert isinstance(chunk, TableSlicerChunk)
        self._index_prefix = index_prefix
        self._type_name = type_name
        self._chunk = chunk

    @classmethod
    def get_es(cls):
        if cls.__es is None:
            cls.__es = get_es()
        return cls.__es

    def __call__(self):
        type_info = get_type_info_map()[self._type_name]
        assert isinstance(type_info, TypeInfo)

        result = type_info.insert_table_chunk(
            index_prefix=self._index_prefix,
            table_chunk=self._chunk,
            es=self.get_es(),
        )

        logger.debug('{worker:12s} {pid:5d} {result}'.format(
            worker=multiprocessing.current_process().name,
            pid=os.getpid(),
            result=result,
        ))
        sys.stdout.flush()


class Command(BaseCommand):
    args = ''
    help = 'dump data to elasticsearch, parallel'

    from optparse import make_option

    option_list = BaseCommand.option_list + (
        make_option('-t', '--type', dest='type_list', action='append', help='type name to dump data to elasticsearch', metavar='TYPE'),
        make_option('-i', '--index-prefix', dest='index_prefix', help='index name to dump data to elasticsearch', metavar='INDEX_PREFIX'),
        make_option('-p', '--parallel', dest='parallel', help='parallel process count', metavar='PARALLEL'),
        make_option('-s', '--pks', dest='pks', help='specify sync pks, comma separated', metavar='PKS', default=''),
        make_option('--streaming-slicing', dest='streaming_slicing', action='store_true', default=True),
        make_option('--no-streaming-slicing', dest='streaming_slicing', action='store_false', default=True),
    )

    def handle(self, *args, **options):
        all_type_name_list = tuple(get_type_info_map().keys())
        permitted_type_set = {'_all'}.union(all_type_name_list)

        options_type_list = options['type_list'] or ()

        type_name_list = []
        for option_type in options_type_list:
            if option_type not in permitted_type_set:
                raise CommandError('unknown type name, type must be one of [{}]'.format(', '.join(permitted_type_set)))
            elif option_type == '_all':
                type_name_list = all_type_name_list
                break
            else:
                type_name_list.append(option_type)

        if not type_name_list:
            type_name_list = all_type_name_list

        if options['index_prefix']:
            index_prefix = options['index_prefix']
        else:
            index_prefix = settings.ES_INDEX_PREFIX

        streaming_slicing = options['streaming_slicing']

        # try get pks
        pks = options['pks']

        def iter_jobs():
            for type_name in type_name_list:
                type_info = get_type_info_map()[type_name]
                assert isinstance(type_info, TypeInfo)

                queryset = type_info.queryset
                _pks = filter(bool, pks.split(','))
                if _pks:
                    queryset = type_info.model.objects.filter(pk__in=_pks)

                if streaming_slicing:
                    slicer = TableStreamingSlicer(
                        queryset=queryset,
                        chunk_size=type_info.bulk_insert_chunk_size,
                    )

                else:
                    slicer = TableSlicer(
                        queryset=queryset,
                        chunk_size=type_info.bulk_insert_chunk_size,
                    )

                for chunk in slicer.chunks():


                    job = Job(
                        index_prefix=index_prefix,
                        type_name=type_name,
                        chunk=chunk,
                    )

                    yield job

        parallel_option = options['parallel']
        if parallel_option is None:
            parallel_count = 0
        else:
            parallel_count = int(parallel_option)

        if parallel_count >= 1:
            print('* PARALLEL COUNT={:d}'.format(parallel_count))

            queue = multiprocessing.JoinableQueue(maxsize=512)
            workers = [
                multiprocessing.Process(target=self.worker, kwargs=dict(queue=queue))
                for _ in range(parallel_count)
            ]

            try:
                for w in workers:
                    w.start()

                for job in iter_jobs():
                    queue.put(job)

                for _ in range(parallel_count):
                    queue.put(sys.exit)

                queue.close()
                queue.join_thread()
                queue.join()

            finally:
                for w in workers:
                    w.join(timeout=1)
                for w in workers:
                    if w.is_alive():
                        print('Worker is alive: {}'.format(w.name))
                    w.terminate()

        else:
            print('* PARALLEL DISABLED')
            for job in iter_jobs():
                job()

    @staticmethod
    def worker(queue):
        while True:
            try:
                job = queue.get()
            except multiprocessing.queues.Empty:
                continue

            try:
                job()
            finally:
                queue.task_done()


