Commit 057919d9 authored by 李小芳's avatar 李小芳

add

parent 4be5032f
......@@ -89,39 +89,32 @@ class CrawlerMain(object):
@retry(stop_max_attempt_number=5, wait_fixed=1000)
def get_service_base_info_list(self, pid):
url = "https://y.soyoung.com/cp{}".format(pid)
page_obj =None
try:
requests_res = requests.get(url, headers=self.headers, allow_redirects=False, timeout=10)
# 打印状态码
if requests_res.status_code == 200:
page_obj = etree.HTML(requests_res.text)
return page_obj, url
except ReadTimeout:
print('timeout')
pass
return None, None
return None, url
except HTTPError:
print('httperror')
pass
return None, None
return None, url
# 请求异常
except RequestException:
print('reqerror')
pass
return None, None
return None, url
except socket.timeout:
print(socket.timeout)
pass
return None, None
return None, url
except ReadTimeoutError:
print("ReadTimeoutError")
pass
return None, None
return None, url
except ConnectionError:
print("ConnectionError")
pass
return None, None
return None, url
def get_search_service_info_list(self, page=1, city_id=-1, query=""):
url = "https://www.soyoung.com/searchNew/product?" \
......@@ -134,28 +127,22 @@ class CrawlerMain(object):
return res_json
except ReadTimeout:
print('超时,执行下一个请求')
pass
return None
except HTTPError:
print('httperror')
pass
return None
# 请求异常
except RequestException:
print('reqerror')
pass
return None
except socket.timeout:
print(socket.timeout)
pass
return None
except ReadTimeoutError:
print("ReadTimeoutError")
pass
return None
except ConnectionError:
print("ConnectionError")
pass
return None
def get_services_list(self, res_json, query="", city_name=""):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment