Commit 43d2037c authored by 李小芳's avatar 李小芳

add

parent 6d6ad627
......@@ -92,11 +92,8 @@ class CrawlerMain(object):
try:
requests_res = requests.get(url, headers=self.headers, allow_redirects=False, timeout=10)
# 打印状态码
if requests_res.status_code == 200:
page_obj = etree.HTML(requests_res.text)
return page_obj, url
else:
return None,url
page_obj = etree.HTML(requests_res.text)
return page_obj, url
except ReadTimeout:
print('timeout')
......@@ -124,11 +121,8 @@ class CrawlerMain(object):
.format(query, page)
try:
requests_res = requests.get(url, headers=self.headers, allow_redirects=False, timeout=10)
if requests_res.status_code == 200:
res_json = requests_res.json()
return res_json
else:
return None
res_json = requests_res.json()
return res_json
except ReadTimeout:
print('超时,执行下一个请求')
......
......@@ -93,11 +93,8 @@ class CrawlerMain(object):
try:
requests_res = requests.get(url, headers=self.headers, allow_redirects=False, timeout=10)
# 打印状态码
if requests_res.status_code == 200:
page_obj = etree.HTML(requests_res.text)
return page_obj, url
else:
return None,url
page_obj = etree.HTML(requests_res.text)
return page_obj, url
except ReadTimeout:
print('timeout')
return None, url
......@@ -124,11 +121,8 @@ class CrawlerMain(object):
.format(query, page)
try:
requests_res = requests.get(url, headers=self.headers, allow_redirects=False, timeout=10)
if requests_res.status_code == 200:
res_json = requests_res.json()
return res_json
else:
return None
res_json = requests_res.json()
return res_json
except ReadTimeout:
print('超时,执行下一个请求')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment