Commit b19b0518 authored by haowang's avatar haowang

code

parent 929f9869
......@@ -11,8 +11,8 @@ from datetime import datetime
from image_qiniu import upload_file, IMG_TYPE
DATA_OS_PATH = '/image'
PROJECT_PATH = '/'
DATA_OS_PATH = '/data'
PROJECT_PATH = '/srv/apps/crawler'
class UploadImage(object):
......@@ -134,11 +134,11 @@ class UploadImage(object):
print('upload ..... error')
return None
def picture_download_and_cut(self, path, new_path, table, key_id, offset=0, count=10):
def picture_download_and_cut(self, path, new_path, table, key_id, start_id, offset=0, count=10):
'''
文章图片剪切和下载
'''
sql = """select {}, url from {} where new_url == '' limit {}, {}""".format(key_id, table, offset, count)
sql = """select {}, url from {} where id > {} and new_url is null limit {}, {}""".format(key_id, table, start_id, offset, count)
self.cur.execute(sql)
tuple = self.cur.fetchall()
self.conn.commit()
......@@ -178,24 +178,26 @@ class UploadImage(object):
self.conn.commit()
def picture_download_and_cut_process(self):
self.picture_download_and_cut(self.ANSWER_PICTURE_PATH, self.ANSWER_PICTURE_CUT_PATH,
'zhihu_answer_picture_url', 'answer_id')
self.picture_download_and_cut(self.ARTICLE_PICTURE_PATH, self.ARTICLE_PICTURE_CUT_PATH,
'zhihu_article_picture_url', 'article_id')
self.picture_download_and_cut(self.THOUGHT_PICTURE_PATH, self.THOUGHT_PICTURE_CUT_PATH,
'zhihu_thought_picture_url', 'thought_id')
pass
# self.picture_download_and_cut(self.ANSWER_PICTURE_PATH, self.ANSWER_PICTURE_CUT_PATH,
# 'zhihu_answer_picture_url', 'answer_id')
# self.picture_download_and_cut(self.ARTICLE_PICTURE_PATH, self.ARTICLE_PICTURE_CUT_PATH,
# 'zhihu_article_picture_url', 'article_id')
# self.picture_download_and_cut(self.THOUGHT_PICTURE_PATH, self.THOUGHT_PICTURE_CUT_PATH,
# 'zhihu_thought_picture_url', 'thought_id')
if __name__ == '__main__':
mark = int(sys.argv[1]) or 0
offset = int(sys.argv[2]) or 0
count = int(sys.argv[3]) or 10
start_id = int(sys.argv[2]) or 0
offset = int(sys.argv[3]) or 0
count = int(sys.argv[4]) or 10
print(datetime.now())
a = UploadImage()
if mark == 0:
a.picture_download_and_cut(a.ANSWER_PICTURE_PATH, a.ANSWER_PICTURE_CUT_PATH, 'zhihu_answer_picture_url', 'answer_id', offset, count)
a.picture_download_and_cut(a.ANSWER_PICTURE_PATH, a.ANSWER_PICTURE_CUT_PATH, 'zhihu_answer_picture_url', 'answer_id', start_id, offset, count)
if mark == 1:
a.picture_download_and_cut(a.ARTICLE_PICTURE_PATH, a.ARTICLE_PICTURE_CUT_PATH, 'zhihu_article_picture_url', 'article_id', offset, count)
a.picture_download_and_cut(a.ARTICLE_PICTURE_PATH, a.ARTICLE_PICTURE_CUT_PATH, 'zhihu_article_picture_url', 'article_id', start_id, offset, count)
if mark == 2:
a.picture_download_and_cut(a.THOUGHT_PICTURE_PATH, a.THOUGHT_PICTURE_CUT_PATH, 'zhihu_thought_picture_url', 'thought_id', offset, count)
a.picture_download_and_cut(a.THOUGHT_PICTURE_PATH, a.THOUGHT_PICTURE_CUT_PATH, 'zhihu_thought_picture_url', 'thought_id', start_id, offset, count)
print(datetime.now())
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment