Commit ea1e46b8 authored by 段英荣's avatar 段英荣

处理图片替换

parent 747ccc99
......@@ -239,6 +239,30 @@ class ZhihuAccount(object):
print(item["content"])
print(50*"*")
def _dispose_content_url(self,content,img_url_list,cur_image_index):
try:
for ori_img_url in img_url_list:
if ori_img_url.find(".jpg") >= 0 or ori_img_url.find(".png") >= 0:
cur_image_index += 1
local_img_url_path = "./image/img_" + str(cur_image_index) + ".png"
print(ori_img_url, local_img_url_path)
urlretrieve(ori_img_url, local_img_url_path)
local_cv2_img = cv2.imread(local_img_url_path)
height, weidth, channel = local_cv2_img.shape
local_cropped_img = local_cv2_img[0:(height - 100), 0:weidth]
local_cropped_img_url_path = "./image/cropped_image_" + str(cur_image_index) + ".png"
cv2.imwrite(local_cropped_img_url_path, local_cropped_img)
qiniu_url = upload_file(local_cropped_img_url_path)
content = content.replace(ori_img_url, qiniu_url)
return content
except:
print(traceback.format_exc())
return content
# 知乎搜索词搜索
def zhihu_query_by_word(self,query_word,zhihu_spider_fd,cur_image_index):
......@@ -265,21 +289,11 @@ class ZhihuAccount(object):
user_id = random.choice(majia_user_list)
question_id = ""
img_url_list = re.findall('img src="(.*?)"', content)
for ori_img_url in img_url_list:
cur_image_index += 1
local_img_url_path = "./image/img_" + str(cur_image_index) + ".png"
print(ori_img_url,local_img_url_path)
img_url_list = re.findall('src="(.*?)"', content)
content = self._dispose_content_url(content=content,img_url_list=img_url_list,cur_image_index=cur_image_index)
urlretrieve(ori_img_url, local_img_url_path)
local_cv2_img = cv2.imread(local_img_url_path)
height, weidth, channel = local_cv2_img.shape
local_cropped_img = local_cv2_img[0:(height - 100), 0:weidth]
local_cropped_img_url_path = "./image/cropped_image_" + str(cur_image_index) + ".png"
cv2.imwrite(local_cropped_img_url_path,local_cropped_img)
qiniu_url = upload_file(local_cropped_img_url_path)
content = content.replace(ori_img_url,qiniu_url)
img_url_list = re.findall('data-original="(.*?)"', content)
content = self._dispose_content_url(content=content,img_url_list=img_url_list,cur_image_index=cur_image_index)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment