Commit 7c2c462d authored by haowang's avatar haowang

fix code

parent ea702309
...@@ -23,6 +23,8 @@ class RefreshContent(object): ...@@ -23,6 +23,8 @@ class RefreshContent(object):
''' '''
初始化数据库,调整js规则 初始化数据库,调整js规则
''' '''
self.update_error_content_id = []
self.conn = pymysql.connect(host=HOST, port=PORT, user=USER, self.conn = pymysql.connect(host=HOST, port=PORT, user=USER,
passwd=PASSWD, passwd=PASSWD,
db=DB, charset='utf8') db=DB, charset='utf8')
...@@ -48,26 +50,20 @@ class RefreshContent(object): ...@@ -48,26 +50,20 @@ class RefreshContent(object):
item.replace_with(new_rich_obj) item.replace_with(new_rich_obj)
return rich_obj.decode() return rich_obj.decode()
def create_new_content(self, content, pic_dict): def create_new_content(self, content_id, content, pic_dict):
content = self.replace_html_image_to_url(content) content = self.replace_html_image_to_url(content)
rich_obj = BeautifulSoup(content, features="html.parser") rich_obj = BeautifulSoup(content, features="html.parser")
for item in rich_obj.find_all("img"): for item in rich_obj.find_all("img"):
url = item.get("src") url = item.get("src")
new_url = pic_dict.get(url) new_url = pic_dict.get(url)
if not new_url:
self.update_error_content_id.append({content_id: url})
print({content_id: url})
continue
item['src'] = new_url + '-w' item['src'] = new_url + '-w'
return rich_obj.decode() return rich_obj.decode()
@staticmethod
def replace_url_to_new_url(content, url_dict):
rich_obj = BeautifulSoup(content, features="html.parser")
for item in rich_obj.find_all("img"):
url = item.get("src")
new_url = url_dict.get(url)
item['src'] = new_url
return rich_obj.decode()
def get_all_content_ids(self, table, key_id): def get_all_content_ids(self, table, key_id):
sql = """select distinct {} from {}""".format(key_id, table) sql = """select distinct {} from {}""".format(key_id, table)
self.cur.execute(sql) self.cur.execute(sql)
...@@ -97,9 +93,7 @@ class RefreshContent(object): ...@@ -97,9 +93,7 @@ class RefreshContent(object):
self.conn.commit() self.conn.commit()
content = res[0][0] content = res[0][0]
new_content = self.create_new_content(content, pic_dict) new_content = self.create_new_content(content_id, content, pic_dict)
print(new_content)
import pdb; pdb.set_trace()
sql = """update {} set new_content = '{}' WHERE {} = '{}' """.format(table, new_content, key_id, content_id) sql = """update {} set new_content = '{}' WHERE {} = '{}' """.format(table, new_content, key_id, content_id)
self.cur.execute(sql) self.cur.execute(sql)
...@@ -120,4 +114,6 @@ if __name__ == '__main__': ...@@ -120,4 +114,6 @@ if __name__ == '__main__':
refresh.refresh_content('zhihu_article', 'zhihu_article_picture_url', 'article_id') refresh.refresh_content('zhihu_article', 'zhihu_article_picture_url', 'article_id')
elif mark == 2: elif mark == 2:
refresh.refresh_content('zhihu_thought', 'zhihu_thought_picture_url', 'thought_id') refresh.refresh_content('zhihu_thought', 'zhihu_thought_picture_url', 'thought_id')
print(refresh.update_error_content_id)
print(datetime.now()) print(datetime.now())
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment