1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import multiprocessing
import time
from itertools import chain
from lxml import html
from urllib.parse import urlparse
from django.core.management import BaseCommand
from gm_upload.utils.image_utils import Picture
from qa.models import Answer, Question
from utils.common import get_data_from_rich_text
qiniu_host = [
"pic.igengmei.com",
"heras.igengmei.com",
]
def _sync_func(query_obj):
element_obj, image_list = get_data_from_rich_text(query_obj.content, "//img")
for element in image_list:
inline_style = element.attrib
_image_url = Picture.get_full_path(inline_style.get('src', ""))
if not _image_url:
continue
image_info = urlparse(_image_url)
if image_info.netloc in qiniu_host and image_info.scheme != "https":
inline_style.update({
"src": _image_url.replace("http", "https"),
})
query_obj.content = html.tostring(element_obj, encoding="unicode")
query_obj.save(update_fields=["content"])
class Command(BaseCommand):
def handle(self, *args, **options):
print("replace image host start")
start_time = time.time()
pool = multiprocessing.Pool(processes=4)
for query_obj in chain(
Answer.objects.filter(is_online=True).only("id", "content").iterator(),
Question.objects.filter(is_online=True).only("id", "content").iterator()
):
pool.apply_async(_sync_func, args=(query_obj, ))
pool.close()
pool.join()
end_time = time.time()
print("total time: {}".format(end_time - start_time))
print("replace image host end")