Commit 057919d9 authored by 李小芳's avatar 李小芳

add

parent 4be5032f
...@@ -89,39 +89,32 @@ class CrawlerMain(object): ...@@ -89,39 +89,32 @@ class CrawlerMain(object):
@retry(stop_max_attempt_number=5, wait_fixed=1000) @retry(stop_max_attempt_number=5, wait_fixed=1000)
def get_service_base_info_list(self, pid): def get_service_base_info_list(self, pid):
url = "https://y.soyoung.com/cp{}".format(pid) url = "https://y.soyoung.com/cp{}".format(pid)
page_obj =None
try: try:
requests_res = requests.get(url, headers=self.headers, allow_redirects=False, timeout=10) requests_res = requests.get(url, headers=self.headers, allow_redirects=False, timeout=10)
# 打印状态码 # 打印状态码
if requests_res.status_code == 200: if requests_res.status_code == 200:
page_obj = etree.HTML(requests_res.text) page_obj = etree.HTML(requests_res.text)
return page_obj, url return page_obj, url
except ReadTimeout: except ReadTimeout:
print('timeout') print('timeout')
pass return None, url
return None, None
except HTTPError: except HTTPError:
print('httperror') print('httperror')
pass return None, url
return None, None
# 请求异常 # 请求异常
except RequestException: except RequestException:
print('reqerror') print('reqerror')
pass return None, url
return None, None
except socket.timeout: except socket.timeout:
print(socket.timeout) print(socket.timeout)
pass return None, url
return None, None
except ReadTimeoutError: except ReadTimeoutError:
print("ReadTimeoutError") print("ReadTimeoutError")
pass return None, url
return None, None
except ConnectionError: except ConnectionError:
print("ConnectionError") print("ConnectionError")
pass return None, url
return None, None
def get_search_service_info_list(self, page=1, city_id=-1, query=""): def get_search_service_info_list(self, page=1, city_id=-1, query=""):
url = "https://www.soyoung.com/searchNew/product?" \ url = "https://www.soyoung.com/searchNew/product?" \
...@@ -134,28 +127,22 @@ class CrawlerMain(object): ...@@ -134,28 +127,22 @@ class CrawlerMain(object):
return res_json return res_json
except ReadTimeout: except ReadTimeout:
print('超时,执行下一个请求') print('超时,执行下一个请求')
pass
return None return None
except HTTPError: except HTTPError:
print('httperror') print('httperror')
pass
return None return None
# 请求异常 # 请求异常
except RequestException: except RequestException:
print('reqerror') print('reqerror')
pass
return None return None
except socket.timeout: except socket.timeout:
print(socket.timeout) print(socket.timeout)
pass
return None return None
except ReadTimeoutError: except ReadTimeoutError:
print("ReadTimeoutError") print("ReadTimeoutError")
pass
return None return None
except ConnectionError: except ConnectionError:
print("ConnectionError") print("ConnectionError")
pass
return None return None
def get_services_list(self, res_json, query="", city_name=""): def get_services_list(self, res_json, query="", city_name=""):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment