Commit 209fc8b6 authored by litaolemo's avatar litaolemo

update

parent 89892252
...@@ -325,6 +325,10 @@ class Crawler_toutiao(): ...@@ -325,6 +325,10 @@ class Crawler_toutiao():
url = "https://m.toutiao.com/i{0}/info/?i={1}".format(article_id, article_id) url = "https://m.toutiao.com/i{0}/info/?i={1}".format(article_id, article_id)
requests_res = retry_get_url(url, headers=headers, proxies=proxies_num) requests_res = retry_get_url(url, headers=headers, proxies=proxies_num)
res_json = requests_res.json() res_json = requests_res.json()
try:
content = res_json["data"].get("content").replace("\r", "").replace("\n", "")
except:
content = ""
res_dic = { res_dic = {
"title": res_json["data"].get("title").replace("\r", "").replace("\n", ""), "title": res_json["data"].get("title").replace("\r", "").replace("\n", ""),
'high_quality_flag': int(res_json["data"].get('high_quality_flag')), 'high_quality_flag': int(res_json["data"].get('high_quality_flag')),
...@@ -334,7 +338,7 @@ class Crawler_toutiao(): ...@@ -334,7 +338,7 @@ class Crawler_toutiao():
"favorite_count": res_json["data"].get("digg_count"), "favorite_count": res_json["data"].get("digg_count"),
'releaser_followers_count': res_json["data"].get("follower_count"), 'releaser_followers_count': res_json["data"].get("follower_count"),
'release_time': int(res_json["data"].get('publish_time') * 1e3), 'release_time': int(res_json["data"].get('publish_time') * 1e3),
"content": res_json["data"].get("content").replace("\r", "").replace("\n", ""), "content": content,
"img_list": re.findall('img src=".*?"', res_json["data"].get("content")) "img_list": re.findall('img src=".*?"', res_json["data"].get("content"))
} }
return res_dic return res_dic
...@@ -438,8 +442,6 @@ class Crawler_toutiao(): ...@@ -438,8 +442,6 @@ class Crawler_toutiao():
# The search api just return something seems related to search # The search api just return something seems related to search
print(e) print(e)
continue continue
else:
break
if len(toutiao_Lst) >= 100: if len(toutiao_Lst) >= 100:
output_result(result_Lst=toutiao_Lst, output_result(result_Lst=toutiao_Lst,
platform=self.platform, platform=self.platform,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment