Commit b19b0518 authored by haowang's avatar haowang

code

parent 929f9869
...@@ -11,8 +11,8 @@ from datetime import datetime ...@@ -11,8 +11,8 @@ from datetime import datetime
from image_qiniu import upload_file, IMG_TYPE from image_qiniu import upload_file, IMG_TYPE
DATA_OS_PATH = '/image' DATA_OS_PATH = '/data'
PROJECT_PATH = '/' PROJECT_PATH = '/srv/apps/crawler'
class UploadImage(object): class UploadImage(object):
...@@ -134,11 +134,11 @@ class UploadImage(object): ...@@ -134,11 +134,11 @@ class UploadImage(object):
print('upload ..... error') print('upload ..... error')
return None return None
def picture_download_and_cut(self, path, new_path, table, key_id, offset=0, count=10): def picture_download_and_cut(self, path, new_path, table, key_id, start_id, offset=0, count=10):
''' '''
文章图片剪切和下载 文章图片剪切和下载
''' '''
sql = """select {}, url from {} where new_url == '' limit {}, {}""".format(key_id, table, offset, count) sql = """select {}, url from {} where id > {} and new_url is null limit {}, {}""".format(key_id, table, start_id, offset, count)
self.cur.execute(sql) self.cur.execute(sql)
tuple = self.cur.fetchall() tuple = self.cur.fetchall()
self.conn.commit() self.conn.commit()
...@@ -178,24 +178,26 @@ class UploadImage(object): ...@@ -178,24 +178,26 @@ class UploadImage(object):
self.conn.commit() self.conn.commit()
def picture_download_and_cut_process(self): def picture_download_and_cut_process(self):
self.picture_download_and_cut(self.ANSWER_PICTURE_PATH, self.ANSWER_PICTURE_CUT_PATH, pass
'zhihu_answer_picture_url', 'answer_id') # self.picture_download_and_cut(self.ANSWER_PICTURE_PATH, self.ANSWER_PICTURE_CUT_PATH,
self.picture_download_and_cut(self.ARTICLE_PICTURE_PATH, self.ARTICLE_PICTURE_CUT_PATH, # 'zhihu_answer_picture_url', 'answer_id')
'zhihu_article_picture_url', 'article_id') # self.picture_download_and_cut(self.ARTICLE_PICTURE_PATH, self.ARTICLE_PICTURE_CUT_PATH,
self.picture_download_and_cut(self.THOUGHT_PICTURE_PATH, self.THOUGHT_PICTURE_CUT_PATH, # 'zhihu_article_picture_url', 'article_id')
'zhihu_thought_picture_url', 'thought_id') # self.picture_download_and_cut(self.THOUGHT_PICTURE_PATH, self.THOUGHT_PICTURE_CUT_PATH,
# 'zhihu_thought_picture_url', 'thought_id')
if __name__ == '__main__': if __name__ == '__main__':
mark = int(sys.argv[1]) or 0 mark = int(sys.argv[1]) or 0
offset = int(sys.argv[2]) or 0 start_id = int(sys.argv[2]) or 0
count = int(sys.argv[3]) or 10 offset = int(sys.argv[3]) or 0
count = int(sys.argv[4]) or 10
print(datetime.now()) print(datetime.now())
a = UploadImage() a = UploadImage()
if mark == 0: if mark == 0:
a.picture_download_and_cut(a.ANSWER_PICTURE_PATH, a.ANSWER_PICTURE_CUT_PATH, 'zhihu_answer_picture_url', 'answer_id', offset, count) a.picture_download_and_cut(a.ANSWER_PICTURE_PATH, a.ANSWER_PICTURE_CUT_PATH, 'zhihu_answer_picture_url', 'answer_id', start_id, offset, count)
if mark == 1: if mark == 1:
a.picture_download_and_cut(a.ARTICLE_PICTURE_PATH, a.ARTICLE_PICTURE_CUT_PATH, 'zhihu_article_picture_url', 'article_id', offset, count) a.picture_download_and_cut(a.ARTICLE_PICTURE_PATH, a.ARTICLE_PICTURE_CUT_PATH, 'zhihu_article_picture_url', 'article_id', start_id, offset, count)
if mark == 2: if mark == 2:
a.picture_download_and_cut(a.THOUGHT_PICTURE_PATH, a.THOUGHT_PICTURE_CUT_PATH, 'zhihu_thought_picture_url', 'thought_id', offset, count) a.picture_download_and_cut(a.THOUGHT_PICTURE_PATH, a.THOUGHT_PICTURE_CUT_PATH, 'zhihu_thought_picture_url', 'thought_id', start_id, offset, count)
print(datetime.now()) print(datetime.now())
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment