Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
S
strategy_spider
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
rank
strategy_spider
Commits
ea1e46b8
Commit
ea1e46b8
authored
Jan 14, 2020
by
段英荣
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
处理图片替换
parent
747ccc99
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
28 additions
and
14 deletions
+28
-14
zhihu_login.py
zhihu_login.py
+28
-14
No files found.
zhihu_login.py
View file @
ea1e46b8
...
...
@@ -239,6 +239,30 @@ class ZhihuAccount(object):
print
(
item
[
"content"
])
print
(
50
*
"*"
)
def
_dispose_content_url
(
self
,
content
,
img_url_list
,
cur_image_index
):
try
:
for
ori_img_url
in
img_url_list
:
if
ori_img_url
.
find
(
".jpg"
)
>=
0
or
ori_img_url
.
find
(
".png"
)
>=
0
:
cur_image_index
+=
1
local_img_url_path
=
"./image/img_"
+
str
(
cur_image_index
)
+
".png"
print
(
ori_img_url
,
local_img_url_path
)
urlretrieve
(
ori_img_url
,
local_img_url_path
)
local_cv2_img
=
cv2
.
imread
(
local_img_url_path
)
height
,
weidth
,
channel
=
local_cv2_img
.
shape
local_cropped_img
=
local_cv2_img
[
0
:(
height
-
100
),
0
:
weidth
]
local_cropped_img_url_path
=
"./image/cropped_image_"
+
str
(
cur_image_index
)
+
".png"
cv2
.
imwrite
(
local_cropped_img_url_path
,
local_cropped_img
)
qiniu_url
=
upload_file
(
local_cropped_img_url_path
)
content
=
content
.
replace
(
ori_img_url
,
qiniu_url
)
return
content
except
:
print
(
traceback
.
format_exc
())
return
content
# 知乎搜索词搜索
def
zhihu_query_by_word
(
self
,
query_word
,
zhihu_spider_fd
,
cur_image_index
):
...
...
@@ -265,21 +289,11 @@ class ZhihuAccount(object):
user_id
=
random
.
choice
(
majia_user_list
)
question_id
=
""
img_url_list
=
re
.
findall
(
'img src="(.*?)"'
,
content
)
for
ori_img_url
in
img_url_list
:
cur_image_index
+=
1
local_img_url_path
=
"./image/img_"
+
str
(
cur_image_index
)
+
".png"
print
(
ori_img_url
,
local_img_url_path
)
img_url_list
=
re
.
findall
(
'src="(.*?)"'
,
content
)
content
=
self
.
_dispose_content_url
(
content
=
content
,
img_url_list
=
img_url_list
,
cur_image_index
=
cur_image_index
)
urlretrieve
(
ori_img_url
,
local_img_url_path
)
local_cv2_img
=
cv2
.
imread
(
local_img_url_path
)
height
,
weidth
,
channel
=
local_cv2_img
.
shape
local_cropped_img
=
local_cv2_img
[
0
:(
height
-
100
),
0
:
weidth
]
local_cropped_img_url_path
=
"./image/cropped_image_"
+
str
(
cur_image_index
)
+
".png"
cv2
.
imwrite
(
local_cropped_img_url_path
,
local_cropped_img
)
qiniu_url
=
upload_file
(
local_cropped_img_url_path
)
content
=
content
.
replace
(
ori_img_url
,
qiniu_url
)
img_url_list
=
re
.
findall
(
'data-original="(.*?)"'
,
content
)
content
=
self
.
_dispose_content_url
(
content
=
content
,
img_url_list
=
img_url_list
,
cur_image_index
=
cur_image_index
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment