Commit 43d2037c authored by 李小芳's avatar 李小芳

add

parent 6d6ad627
...@@ -92,11 +92,8 @@ class CrawlerMain(object): ...@@ -92,11 +92,8 @@ class CrawlerMain(object):
try: try:
requests_res = requests.get(url, headers=self.headers, allow_redirects=False, timeout=10) requests_res = requests.get(url, headers=self.headers, allow_redirects=False, timeout=10)
# 打印状态码 # 打印状态码
if requests_res.status_code == 200:
page_obj = etree.HTML(requests_res.text) page_obj = etree.HTML(requests_res.text)
return page_obj, url return page_obj, url
else:
return None,url
except ReadTimeout: except ReadTimeout:
print('timeout') print('timeout')
...@@ -124,11 +121,8 @@ class CrawlerMain(object): ...@@ -124,11 +121,8 @@ class CrawlerMain(object):
.format(query, page) .format(query, page)
try: try:
requests_res = requests.get(url, headers=self.headers, allow_redirects=False, timeout=10) requests_res = requests.get(url, headers=self.headers, allow_redirects=False, timeout=10)
if requests_res.status_code == 200:
res_json = requests_res.json() res_json = requests_res.json()
return res_json return res_json
else:
return None
except ReadTimeout: except ReadTimeout:
print('超时,执行下一个请求') print('超时,执行下一个请求')
......
...@@ -93,11 +93,8 @@ class CrawlerMain(object): ...@@ -93,11 +93,8 @@ class CrawlerMain(object):
try: try:
requests_res = requests.get(url, headers=self.headers, allow_redirects=False, timeout=10) requests_res = requests.get(url, headers=self.headers, allow_redirects=False, timeout=10)
# 打印状态码 # 打印状态码
if requests_res.status_code == 200:
page_obj = etree.HTML(requests_res.text) page_obj = etree.HTML(requests_res.text)
return page_obj, url return page_obj, url
else:
return None,url
except ReadTimeout: except ReadTimeout:
print('timeout') print('timeout')
return None, url return None, url
...@@ -124,11 +121,8 @@ class CrawlerMain(object): ...@@ -124,11 +121,8 @@ class CrawlerMain(object):
.format(query, page) .format(query, page)
try: try:
requests_res = requests.get(url, headers=self.headers, allow_redirects=False, timeout=10) requests_res = requests.get(url, headers=self.headers, allow_redirects=False, timeout=10)
if requests_res.status_code == 200:
res_json = requests_res.json() res_json = requests_res.json()
return res_json return res_json
else:
return None
except ReadTimeout: except ReadTimeout:
print('超时,执行下一个请求') print('超时,执行下一个请求')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment