Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
C
crawler
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Chengyang Zhong
crawler
Commits
c6034ea7
Commit
c6034ea7
authored
Dec 01, 2020
by
haowang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
modify upload_picture script
parent
b8560e3a
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
21 additions
and
16 deletions
+21
-16
upload_picture.py
tasks/zhihu/upload_picture.py
+21
-16
No files found.
tasks/zhihu/upload_picture.py
View file @
c6034ea7
...
...
@@ -46,8 +46,8 @@ class UploadImage(object):
except
:
with
open
(
self
.
JS_FILE_PATH
,
'r'
,
encoding
=
'utf-8'
)
as
f
:
js
=
f
.
read
()
#
print(js
)
self
.
exec_js
=
execjs
.
compile
(
js
,
)
#
self.exec_js = execjs.compile(js, cwd='/home/gmuser/node_modules'
)
self
.
exec_js
=
execjs
.
compile
(
js
)
def
get_serach_page_cookies
(
self
):
'''
...
...
@@ -142,6 +142,22 @@ class UploadImage(object):
文章图片剪切和下载
'''
def
_deal_image_by_path
(
file_path
):
img
=
cv2
.
imread
(
file_path
)
if
img
:
high
,
width
=
img
.
shape
[:
2
]
cropped
=
img
[
0
:
int
(
high
/
10
*
9
),
0
:
width
]
pathes
=
new_path
+
"num"
+
str
(
i
)
+
".jpg"
cv2
.
imwrite
(
pathes
,
cropped
)
new_url
=
self
.
upload_image_with_path
(
pathes
)
sql
=
"""UPDATE {} SET new_url = "{}" WHERE url = "{}" """
.
format
(
table
,
str
(
new_url
),
str
(
tuple
[
i
][
1
]))
self
.
cur
.
execute
(
sql
)
self
.
conn
.
commit
()
else
:
print
(
'image open error : '
,
file_path
)
urls
=
self
.
find_all_url
(
content
)
self
.
insert_picture_urls
(
table
,
urls
,
content_id
,
key_id
)
...
...
@@ -171,18 +187,7 @@ class UploadImage(object):
with
open
(
pathes
,
'wb'
)
as
f
:
# 打开写入到path路径里-二进制文件,返回的句柄名为f
f
.
write
(
r
.
content
)
# 往f里写入r对象的二进制文件
f
.
close
()
img
=
cv2
.
imread
(
pathes
)
high
,
width
=
img
.
shape
[:
2
]
cropped
=
img
[
0
:
int
(
high
/
10
*
9
),
0
:
width
]
pathes
=
new_path
+
"num"
+
str
(
i
)
+
".jpg"
cv2
.
imwrite
(
pathes
,
cropped
)
new_url
=
self
.
upload_image_with_path
(
pathes
)
sql
=
"""UPDATE {} SET new_url = "{}" WHERE url = "{}" """
.
format
(
table
,
str
(
new_url
),
str
(
tuple
[
i
][
1
]))
self
.
cur
.
execute
(
sql
)
self
.
conn
.
commit
()
_deal_image_by_path
(
pathes
)
def
picture_process
(
self
,
path
,
new_path
,
table
,
pic_table
,
key_id
,
offset
=
0
,
count
=
10
):
content_dict
=
self
.
gets_content_dict
(
table
,
key_id
,
offset
,
count
)
...
...
@@ -190,7 +195,7 @@ class UploadImage(object):
for
content_id
,
content
in
content_dict
.
items
():
self
.
picture_download_and_cut
(
path
,
new_path
,
pic_table
,
key_id
,
content_id
,
content
)
def
insert_picture_urls
(
self
,
table
,
urls
,
content_id
,
key_id
,
has_old
=
True
):
def
insert_picture_urls
(
self
,
table
,
urls
,
content_id
,
key_id
):
def
_delete_repeat_url
(
instance
,
columns
):
print
(
columns
)
...
...
@@ -213,7 +218,7 @@ class UploadImage(object):
values
=
[]
for
url
in
urls
:
if
has_old
and
not
_url_exist
(
self
,
url
):
if
not
_url_exist
(
self
,
url
):
continue
values
.
append
(
"({}, '{}')"
.
format
(
content_id
,
url
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment