Commit 6a26b3f6 authored by litaolemo's avatar litaolemo

update

parent e2e6ec48
......@@ -767,7 +767,7 @@ class Crawler_toutiao():
return video_image_url
def get_web_article_info(self,article_id):
def get_web_article_info(self,article_id,proxies_num=0):
# headers = {
# "Accept": "*/*",
# "Accept-Encoding": "gzip, deflate",
......@@ -799,7 +799,7 @@ class Crawler_toutiao():
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36",
}
url = "https://m.toutiao.com/i{0}/info/?i={1}".format(article_id,article_id)
requests_res = retry_get_url(url,headers=headers,proxies=0)
requests_res = retry_get_url(url,headers=headers,proxies=proxies_num)
res_json = requests_res.json()
res_dic = {
"title":res_json["data"].get("title").replace("\r","").replace("\n",""),
......@@ -849,10 +849,6 @@ class Crawler_toutiao():
try:
proxies = get_proxy(proxies_num)
if proxies:
# proxies = {
# "http": "http://127.0.0.1:80",
# "https": "http://127.0.0.1:443"
# }
get_page = requests.get(url, headers=self.headers, proxies=proxies, timeout=10)
else:
get_page = requests.get(url, headers=self.headers, timeout=10)
......@@ -927,7 +923,7 @@ class Crawler_toutiao():
video_dic['id'] = cal_doc_id(video_dic["platform"], url=video_dic["url"], doc_id_type='all-time-url', data_dict=video_dic)
try:
article_info = self.get_web_article_info(info_dic.get('tag_id'))
article_info = self.get_web_article_info(info_dic.get('tag_id'),proxies_num=proxies_num)
video_dic.update(article_info)
except Exception as e:
print("method get_web_article_info error %s" %e)
......@@ -1882,7 +1878,7 @@ if __name__ == '__main__':
for url in data_lis:
test.releaser_page_by_time(1595088000000, 1595319362610, url, output_to_es_raw=True,
es_index='crawler-data-raw', releaser_page_num_max=2,
proxies_num=2
proxies_num=1
)
# test.get_releaser_follower_num(url)
# test.get_releaser_image(releaserUrl=data_lis[0])
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment