export_user_diary.py

# coding:utf-8
"""导出对应用户的优质日记本（评分4分及以上），以及相应的图片下载。"""

import os
import os.path
import time
from collections import OrderedDict

from django.core.management.base import BaseCommand, CommandError

from utils.execel import ExcelReader, ExcelWriter
from talos.libs.img_download import IMGDownloader
from gm_types.gaia import TOPIC_IMAGE_TYPE
from talos.libs.image_utils import get_temp_image_path
from talos.libs.image_utils import get_full_path
from talos.models.diary import Diary
from talos.models.topic import Problem, TopicImage
from talos.models.diary.preoperationimage import PreOperationImage
from django.db.models import Q


BASESAVE_DIR = "export_result"
AFTER  ='after'
BEFORE = 'before'


class Command(BaseCommand):
    help = '导出对应用户的优质日记本，以及相应的图片下载。'

    def add_arguments(self, parser):

        # 添加 excel 绝对路径
        parser.add_argument(
            '-f', '--excel', type=str,
            dest='users_excel',
            help='用户ID文件'
        )

        # 结果保存路径，保存在对应的路径下面的 export_result 目录下面
        parser.add_argument(
            '-p', '--save-path', type=str,
            dest='export_path',
            help='结果保存路径',
            default='.'
        )

    def handle(self, *args, **options):

        print('------ starting -----')
        start_time = time.time()
        print("start at: ", start_time)

        user_id_file = options.get("users_excel", None)
        if not user_id_file:
            raise CommandError("请指定用户ID文件")

        reader = ExcelReader(user_id_file)
        users_id = reader.read_column(2)  # 规定第二列为用户ID

        if len(users_id) <= 1:
            raise CommandError("请确保文件格式为规定的格式")

        users_id = users_id[1:]

        # users_id 第一个为标题
        if not all(map(lambda i: isinstance(i, int), users_id)):
            raise CommandError("用户列表存在不合法id(全部为数字)")

        # 创建结果存放路径
        export_path = options.get("export_path")
        result_path = os.path.join(export_path, BASESAVE_DIR)
        mkdirp(result_path)

        # 获取用户优质日记本
        diaries = get_user_diaries(users_id)
        diaries_id = []
        [diaries_id.extend(i) for i in diaries.values()]

        before_operation_images = get_diary_img(diaries_id, TOPIC_IMAGE_TYPE.PRE_OPERATION_IMAGE)  # 术前
        after_operation_images = get_diary_img(diaries_id, TOPIC_IMAGE_TYPE.POST_OPERATION_IMAGE)  # 术后

        result_excel = os.path.join(result_path, "用户日记本链接地址.xlsx")
        excel = ExcelWriter(result_excel)
        excel.write_header(["用户ID", "用户日记本地址"])

        rows = []
        for user_id, diaries_id in diaries.items():
            rows.append([user_id, "\n".join(get_diaries_link(diaries_id))])

            for diary_id in diaries_id:

                # 创建日记本、术前、术后图片目录
                diary_path = os.path.join(result_path,
                                          "用户{user_id}".format(user_id=user_id),
                                          str(diary_id))
                after_path = os.path.join(diary_path, AFTER)
                before_path = os.path.join(diary_path, BEFORE)
                mkdirp(after_path)
                mkdirp(before_path)

                for d_id, imgs in after_operation_images.items():
                    if diary_id == d_id:
                        IMGDownloader.download(imgs, after_path)

                for d_id, imgs in before_operation_images.items():
                    if diary_id == d_id:
                        IMGDownloader.download(imgs, before_path)

        excel.write_rows(2, rows)
        excel.save()

        end_time = time.time()
        print("end at: ", end_time)
        print('total use {} s.'.format(end_time - start_time))
        print('Done!')


def mkdirp(des):
    if not os.path.exists(des):
        os.makedirs(des, exist_ok=True)


def get_user_diaries(users_id):
    """获取用户日记本。

    {
        user_id: [ diary_id, ]
    }
    """

    diaries = Diary.objects.filter(Q(user_id__in=users_id) &
                                   Q(content_level__in=('4', '5'))).values("user_id", "id").iterator()

    res = OrderedDict()
    for user_id in users_id:
        res[user_id] = []

    for diary in diaries:
        res[diary["user_id"]].append(diary["id"])

    return res


def get_diary_img(diary_ids, img_type, watermark=True):
    """获取日记本图片。

    :param diary_ids: 日记本id list
    :param img_type: 图片类型：0术前， 1术后
    :param img_num: 一次最多获取的日记本img_num
    :param watermark: 是否加水印
    :return:
    {
        diary_id: [img_url],
    }
    """
    if img_type == TOPIC_IMAGE_TYPE.PRE_OPERATION_IMAGE:  # 术前图片
        images = PreOperationImage.objects.filter(
            diary_id__in=diary_ids
        ).values('diary_id', 'image_url').iterator()

        result = {}
        for img in images:
            if img["diary_id"] not in result:
                result[img["diary_id"]] = []
            result[img["diary_id"]].append(img["image_url"])
    else:
        topics = Problem.objects.filter(
            diary_id__in=diary_ids, is_online=True
        ).values('id', "diary_id").iterator()

        res = {}  # key: diary_id value: topic_id list
        topic_ids = []
        for topic in topics:
            topic_ids.append(topic["id"])

            if topic["diary_id"] not in res:
                res[topic["diary_id"]] = []

            res[topic["diary_id"]].append(topic["id"])

        images = TopicImage.objects.filter(topic_id__in=topic_ids).order_by(
            '-taken_time'
        ).values('image_url', 'topic_id')

        result = {}  # key: diary_id value: image_url list
        for diary_id, topics in res.items():
            if diary_id not in result:
                result[diary_id] = []

            for topic_id in topics:
                for img in images:
                    if img["topic_id"] == topic_id:
                        result[diary_id].append(img["image_url"])

    res = {}
    for diary_id, image_urls in result.items():
        res[diary_id] = []
        for url in image_urls:
            image_url = get_full_path(url, '-w') if watermark else get_temp_image_path(url)
            res[diary_id].append(image_url)

    return res


def get_diaries_link(diaries_id):
    """获取m站日记本链接"""

    return [
        "http://m.igengmei.com/diary_book/{diary_id}/".format(diary_id=diary_id)
        for diary_id in diaries_id
    ]