Commit 6d6ad627 authored by 李小芳's avatar 李小芳

add

parent 057919d9
...@@ -95,31 +95,27 @@ class CrawlerMain(object): ...@@ -95,31 +95,27 @@ class CrawlerMain(object):
if requests_res.status_code == 200: if requests_res.status_code == 200:
page_obj = etree.HTML(requests_res.text) page_obj = etree.HTML(requests_res.text)
return page_obj, url return page_obj, url
else:
return None,url
except ReadTimeout: except ReadTimeout:
print('timeout') print('timeout')
pass
return None, None return None, None
except HTTPError: except HTTPError:
print('httperror') print('httperror')
pass
return None, None return None, None
# 请求异常 # 请求异常
except RequestException: except RequestException:
print('reqerror') print('reqerror')
pass
return None, None return None, None
except socket.timeout: except socket.timeout:
print(socket.timeout) print(socket.timeout)
pass
return None, None return None, None
except ReadTimeoutError: except ReadTimeoutError:
print("ReadTimeoutError") print("ReadTimeoutError")
pass
return None, None return None, None
except ConnectionError: except ConnectionError:
print("ConnectionError") print("ConnectionError")
pass
return None, None return None, None
def get_search_service_info_list(self, page=1, city_id=-1, query=""): def get_search_service_info_list(self, page=1, city_id=-1, query=""):
...@@ -131,30 +127,27 @@ class CrawlerMain(object): ...@@ -131,30 +127,27 @@ class CrawlerMain(object):
if requests_res.status_code == 200: if requests_res.status_code == 200:
res_json = requests_res.json() res_json = requests_res.json()
return res_json return res_json
else:
return None
except ReadTimeout: except ReadTimeout:
print('超时,执行下一个请求') print('超时,执行下一个请求')
pass
return None return None
except HTTPError: except HTTPError:
print('httperror') print('httperror')
pass
return None return None
# 请求异常 # 请求异常
except RequestException: except RequestException:
print('reqerror') print('reqerror')
pass
return None return None
except socket.timeout: except socket.timeout:
print(socket.timeout) print(socket.timeout)
pass
return None return None
except ReadTimeoutError: except ReadTimeoutError:
print("ReadTimeoutError") print("ReadTimeoutError")
pass
return None return None
except ConnectionError: except ConnectionError:
print("ConnectionError") print("ConnectionError")
pass
return None return None
def get_services_list(self, res_json, query="", city_name=""): def get_services_list(self, res_json, query="", city_name=""):
......
...@@ -96,6 +96,8 @@ class CrawlerMain(object): ...@@ -96,6 +96,8 @@ class CrawlerMain(object):
if requests_res.status_code == 200: if requests_res.status_code == 200:
page_obj = etree.HTML(requests_res.text) page_obj = etree.HTML(requests_res.text)
return page_obj, url return page_obj, url
else:
return None,url
except ReadTimeout: except ReadTimeout:
print('timeout') print('timeout')
return None, url return None, url
...@@ -125,6 +127,9 @@ class CrawlerMain(object): ...@@ -125,6 +127,9 @@ class CrawlerMain(object):
if requests_res.status_code == 200: if requests_res.status_code == 200:
res_json = requests_res.json() res_json = requests_res.json()
return res_json return res_json
else:
return None
except ReadTimeout: except ReadTimeout:
print('超时,执行下一个请求') print('超时,执行下一个请求')
return None return None
...@@ -336,3 +341,6 @@ def main(city_tags): ...@@ -336,3 +341,6 @@ def main(city_tags):
if __name__ == "__main__": if __name__ == "__main__":
args = sys.argv[1] args = sys.argv[1]
main(city_tags=args) main(city_tags=args)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment