Commit 6d6ad627 authored by 李小芳's avatar 李小芳

add

parent 057919d9
......@@ -95,31 +95,27 @@ class CrawlerMain(object):
if requests_res.status_code == 200:
page_obj = etree.HTML(requests_res.text)
return page_obj, url
else:
return None,url
except ReadTimeout:
print('timeout')
pass
return None, None
except HTTPError:
print('httperror')
pass
return None, None
# 请求异常
except RequestException:
print('reqerror')
pass
return None, None
except socket.timeout:
print(socket.timeout)
pass
return None, None
except ReadTimeoutError:
print("ReadTimeoutError")
pass
return None, None
except ConnectionError:
print("ConnectionError")
pass
return None, None
def get_search_service_info_list(self, page=1, city_id=-1, query=""):
......@@ -131,30 +127,27 @@ class CrawlerMain(object):
if requests_res.status_code == 200:
res_json = requests_res.json()
return res_json
else:
return None
except ReadTimeout:
print('超时,执行下一个请求')
pass
return None
except HTTPError:
print('httperror')
pass
return None
# 请求异常
except RequestException:
print('reqerror')
pass
return None
except socket.timeout:
print(socket.timeout)
pass
return None
except ReadTimeoutError:
print("ReadTimeoutError")
pass
return None
except ConnectionError:
print("ConnectionError")
pass
return None
def get_services_list(self, res_json, query="", city_name=""):
......
......@@ -96,6 +96,8 @@ class CrawlerMain(object):
if requests_res.status_code == 200:
page_obj = etree.HTML(requests_res.text)
return page_obj, url
else:
return None,url
except ReadTimeout:
print('timeout')
return None, url
......@@ -125,6 +127,9 @@ class CrawlerMain(object):
if requests_res.status_code == 200:
res_json = requests_res.json()
return res_json
else:
return None
except ReadTimeout:
print('超时,执行下一个请求')
return None
......@@ -336,3 +341,6 @@ def main(city_tags):
if __name__ == "__main__":
args = sys.argv[1]
main(city_tags=args)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment