#!/usr/bin/env python # -*- coding: utf-8 -*- import multiprocessing import time from itertools import chain from lxml import html from urllib.parse import urlparse from django.core.management import BaseCommand from gm_upload.utils.image_utils import Picture from qa.models import Answer, Question from utils.common import get_data_from_rich_text qiniu_host = [ "pic.igengmei.com", "heras.igengmei.com", ] def _sync_func(query_obj): element_obj, image_list = get_data_from_rich_text(query_obj.content, "//img") for element in image_list: inline_style = element.attrib _image_url = Picture.get_full_path(inline_style.get('src', "")) if not _image_url: continue image_info = urlparse(_image_url) if image_info.netloc in qiniu_host and image_info.scheme != "https": inline_style.update({ "src": _image_url.replace("http", "https"), }) query_obj.content = html.tostring(element_obj, encoding="unicode") query_obj.save(update_fields=["content"]) class Command(BaseCommand): def handle(self, *args, **options): print("replace image host start") start_time = time.time() pool = multiprocessing.Pool(processes=4) for query_obj in chain( Answer.objects.filter(is_online=True).only("id", "content").iterator(), Question.objects.filter(is_online=True).only("id", "content").iterator() ): pool.apply_async(_sync_func, args=(query_obj, )) pool.close() pool.join() end_time = time.time() print("total time: {}".format(end_time - start_time)) print("replace image host end")