# 读取json文件 调用api 将数据导入库
import os
from datetime import datetime
import json
import requests
from datetime import datetime
import random
from random import randint
from django.core.management import BaseCommand
from talos.cache.base import crawl_user_cache
from gm_types.mimas import TRACTATE_PLATFORM
from talos.logger import info_logger, exception_logger

import requests
from gm_upload import upload, upload_file
from gm_upload import IMG_TYPE

from talos.models.tractate import Tractate, TractateImages, TractateReply, TractateExtra
from talos.services.user import UserService


FILE_PATH = '/srv/apps/mimas'
TRACTATE_YEAR = 2019
DAY_LIST = [5, 6]


def upload_image(url, img_type=IMG_TYPE.TOPIC):
    '''非站内图片处理'''
    try:
        response = requests.get(url)
        return upload(response.content, img_type=img_type)
    except:
        return None


class Command(BaseCommand):
    """
        爬取帖子入库
    """
    create_faild_topic_list = []

    insert_topic_ids = []

    prod_user_list = [
        32269952, 32269956, 32269962, 32269966, 32269973, 32269978, 32269980, 32269982,
        32269987, 32269989, 32270003, 32270004, 32270007, 32270012, 32270015, 32270017,
        32270020, 32270024, 32270027, 32270031, 32270041, 32270044, 32270047, 32270050,
        32270054, 32270055, 32270057, 32270059, 32270063, 32270066, 32269913, 32269918,
        32269920, 32269927, 32269933, 32269939, 32269943, 32269948, 32269957, 32269965,
        32269972, 32269979, 32269983, 32269988, 32269995, 32270002, 32270005, 32270011,
        32270016, 32270022, 32270029, 32270036, 32270040, 32270051, 32270061, 32270065,
        32270071, 32270075, 32270081, 32270085, 32270094, 32270096, 32270110, 32270116,
        32270121, 32270141, 32270147, 32270152, 32270156, 32270161, 32270114, 32270119,
        32270122, 32270125, 32270129, 32270131, 32270133, 32270134, 32270137, 32270167,
        32270068, 32270070, 32270076, 32270078, 32270083, 32270087, 32270093, 32270095,
        32270099, 32270105, 32269992, 32270018, 32270023, 32270030, 32270034, 32270043,
        32270048, 32270052, 32270056, 32270060]

    def get_user_id(self, count):
        # 获取用户ID
        return self.prod_user_list[(count - 1) % len(self.prod_user_list)]

    def get_json_data_from_dir(self, is_topic=None, is_pictorial=None):
        # 获取目录文件数据
        ret = []
        if is_topic:
            file_path = FILE_PATH + 'waibushuju_topic/'
        if is_pictorial:
            file_path = FILE_PATH + 'pictorial/'
        filenames = []

        for root, dirs, names in os.walk(file_path):
            if not dirs:
                for item in names:
                    if item.startswith('.'):
                        continue
                    filenames.append(root + '/' + item)
        
        for filename in filenames:
            ret.append(self.get_file_json_data(filename))
    
        return ret
    
    def get_file_json_data(self, file):
        # 获取文件数据
        data = None
        with open(file, 'rb') as f:
            content = f.read()
            content = content.decode('utf-8')
            if content.startswith(u'\ufeff'):
                content = content.encode('utf8')[3:].decode('utf8')
            data = json.loads(content)
    
        return data

    def get_image_size(self, image_url):
        # 获取图片宽高
        try:
            url = image_url + '-imageinfo'
            response = requests.request("GET", url)
            info = response.json()
            return info.get('width'), info.get('height')
        except Exception as e:
            exception_logger.error(e)
            return None, None

    def image_info(self, urls):
        # 获取图片信息
        ret = []
        for url in urls:
            image_url = upload_image(url)
            while not image_url:
                image_url = upload_image(url)
            width, height = self.get_image_size(image_url)
            while not width and not height:
                width, height = self.get_image_size(image_url)
            ret.append(
                {
                    'url': image_url.replace('http://alpha.gmeiapp.com/', ''),
                    'height': height,
                    'width': width,
                }
            )
        return ret

    def topic_create(self, data, platform):
        obj = Tractate.objects.filter(platform=platform, platform_id=data.get('id')).first()
        if obj:
            return obj.id
        obj = Tractate()
        obj.user_id = data.get('user_id')
        obj.content = data.get('content')
        obj.platform = platform
        obj.platform_id = data.get('id')
        obj.is_online = False
        obj.save()

        create_time = datetime.fromtimestamp(data.get('create_time'))
        create_time = create_time.replace(year=TRACTATE_YEAR, month=12, day=random.choice(DAY_LIST))
        Tractate.objects.filter(platform=platform, platform_id=data.get('id')).update(create_time=create_time)

        images = data.get('images')
        image_list = []
        for image in images:
            image_list.append(TractateImages(
                tractate_id=obj.id,
                image_url=image.get('url'),
                width=image.get('width'),
                height=image.get('height'),
            ))
        TractateImages.objects.bulk_create(image_list)

        return obj.id

    def create_topic(self, topics, platform):
        count = 0
        for topic in topics:
            topic_comments = topic.pop('comments', None)
            topic_exist = Tractate.objects.filter(platform=platform, platform_id=topic.get('id')).first()
            if not topic_exist:
                count += 1
                images = topic.pop('image')
                topic['images'] = self.image_info(images)
                topic['user_id'] = self.get_user_id(count)
                topic.pop('user')
                print('-------- topic current count: ', count)
                topic_id = self.topic_create(data=topic, platform=platform)
                if not topic_id:
                    self.create_faild_topic_list.append(topic.get('id'))
                    continue
            else:
                topic_id = topic_exist.id
            self.insert_topic_ids.append(topic_id)
            print('-------- return topic info: ', topic_id)

        return None, None

    def handle(self, *args, **options):

        platform = TRACTATE_PLATFORM.XIAOHONGSHU
        # 帖子
        print('----- start deal topic at {} -----'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S %f')))
        topic_data = self.get_json_data_from_dir(is_topic=1)
        self.create_topic(topics=topic_data, platform=platform)
        print('-------- create_faild_topic_list:', len(self.create_faild_topic_list))
        print('----- end deal topic at {} -----'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S %f')))
