Commit 6910aaee authored by 段英荣's avatar 段英荣

增加七牛图片转换

parent 385821df
git+ssh://git@git.wanmeizhensuo.com/backend/gm-upload.git@master
Pillow >= 5.0.0 Pillow >= 5.0.0
matplotlib >= 2.1.2 matplotlib >= 2.1.2
requests >= 2.18.4 requests >= 2.18.4
......
SECRET_KEY = 'h88p#*4t5l4&7^j4gfui%qs4x3wo(+u$1x2d1=p9cb)ogjr080'
QINIU_ACCESS_KEY = "UPCOYIJkZOMcdd9FDzpBqYjzWUh55fBpVi3AhWpL"
QINIU_SECRET_KEY = "z5YvpDDSam_JE345Z8J_f3TufzelOW2VOGNoBl9e"
QINIU_HOST = "http://wanmeizhensuo.qiniudn.com/"
QINIU_SCOPE = 'wanmeizhensuo'
...@@ -25,6 +25,14 @@ from scrapy.selector import Selector ...@@ -25,6 +25,14 @@ from scrapy.selector import Selector
import brotli import brotli
import random import random
import time import time
import re
from gm_upload import upload, upload_file
from urllib.request import urlretrieve
import os
import cv2
import copy
top_query_list = [ top_query_list = [
...@@ -232,7 +240,7 @@ class ZhihuAccount(object): ...@@ -232,7 +240,7 @@ class ZhihuAccount(object):
print(50*"*") print(50*"*")
# 知乎搜索词搜索 # 知乎搜索词搜索
def zhihu_query_by_word(self,query_word,zhihu_spider_fd): def zhihu_query_by_word(self,query_word,zhihu_spider_fd,cur_image_index):
for begin_index in range(0,200,10): for begin_index in range(0,200,10):
query_by_word_url = "https://www.zhihu.com/api/v4/search_v3?t=general&correction=1&lc_idx=62&" \ query_by_word_url = "https://www.zhihu.com/api/v4/search_v3?t=general&correction=1&lc_idx=62&" \
...@@ -252,10 +260,27 @@ class ZhihuAccount(object): ...@@ -252,10 +260,27 @@ class ZhihuAccount(object):
try: try:
data_type = data_item["object"]["type"] data_type = data_item["object"]["type"]
content = data_item["object"]["content"] content = data_item["object"]["content"]
# content = copy.deepcopy(tmp_content)
platform_id = data_item["object"]["id"] platform_id = data_item["object"]["id"]
user_id = random.choice(majia_user_list) user_id = random.choice(majia_user_list)
question_id = "" question_id = ""
img_url_list = re.findall('img src="(.*?)"', content)
for ori_img_url in img_url_list:
cur_image_index += 1
local_img_url_path = "./image/img_" + str(cur_image_index) + ".png"
urlretrieve(ori_img_url, local_img_url_path)
local_cv2_img = cv2.imread(local_img_url_path)
height, weidth, channel = local_cv2_img.shape
local_cropped_img = local_cv2_img[0:(height - 100), 0:weidth]
local_cropped_img_url_path = "./image/cropped_image_" + str(cur_image_index) + ".png"
cv2.imwrite(local_cropped_img_url_path,local_cropped_img)
qiniu_url = upload_file(local_cropped_img_url_path)
content = content.replace(ori_img_url,qiniu_url)
if data_type == "article": if data_type == "article":
title = data_item["object"]["title"] title = data_item["object"]["title"]
elif data_type == "answer": elif data_type == "answer":
...@@ -289,12 +314,16 @@ class ZhihuAccount(object): ...@@ -289,12 +314,16 @@ class ZhihuAccount(object):
return return
if __name__ == '__main__': if __name__ == '__main__':
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings.settings")
account = ZhihuAccount('', '') account = ZhihuAccount('', '')
account.login(captcha_lang='en', load_cookies=True) account.login(captcha_lang='en', load_cookies=True)
#account.test_member_article() #account.test_member_article()
zhihu_spider_data = "./zhihu_spider_data.txt" zhihu_spider_data = "./zhihu_spider_data.txt"
zhihu_spider_fd = open(zhihu_spider_data,"w") zhihu_spider_fd = open(zhihu_spider_data,"w")
account.zhihu_query_by_word(top_query_list[0],zhihu_spider_fd)
cur_image_index = 0
account.zhihu_query_by_word(top_query_list[0],zhihu_spider_fd,cur_image_index)
zhihu_spider_fd.close() zhihu_spider_fd.close()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment