Commit 3310832b authored by 李小芳's avatar 李小芳

add

parent 568a9046
......@@ -13,7 +13,10 @@ from email.utils import formataddr
from urllib import error
import requests
# 导入requests.exceptions模块中的三种异常类
from requests.exceptions import ReadTimeout, HTTPError, RequestException
from requests.exceptions import ReadTimeout, HTTPError, RequestException, ConnectionError
from requests.packages.urllib3.exceptions import ReadTimeoutError
from socket import timeout
from retrying import retry
import pandas as pd
......@@ -103,6 +106,15 @@ class CrawlerMain(object):
except RequestException:
print('reqerror')
pass
except socket.timeout:
print(socket.timeout)
pass
except ReadTimeoutError:
print("ReadTimeoutError")
pass
except ConnectionError:
print("ConnectionError")
pass
def get_search_service_info_list(self, page=1, city_id=-1, query=""):
url = "https://www.soyoung.com/searchNew/product?" \
......@@ -123,6 +135,15 @@ class CrawlerMain(object):
except RequestException:
print('reqerror')
pass
except socket.timeout:
print(socket.timeout)
pass
except ReadTimeoutError:
print("ReadTimeoutError")
pass
except ConnectionError:
print("ConnectionError")
pass
def get_services_list(self, res_json, query="", city_name=""):
......@@ -148,64 +169,65 @@ class CrawlerMain(object):
def get_service_base_info(self, pid):
service_info = dict()
res_json, url = self.get_service_base_info_list(pid)
res_json = res_json.xpath(
"/html[1]/body[1]/div[@class='page-content']"
"/div[@class='w1000']/div[@class='detail-wrap']/div[@class='width-control']/div"
)
service_info['链接'] = url
for base_info in res_json:
if "basic-info" in base_info.xpath("div/@class"):
service_info["美购名称"] = str(base_info.xpath("div/h1/text()")[0].strip())
service_info["sku活动价"] = base_info.xpath("div/div[@class='base-price']/em/text()")[0].strip() # 980
service_info["sku原价"] = base_info.xpath("div/div[@class='base-price']/del/text()")[0].strip() # 1980
service_info["销量"] = base_info.xpath("div/div[@class='base-relation']/div[3]/em/text()")[
0].strip() # 110
service_info["机构等级"] = base_info.xpath("div/div[@class='base-relation']/div[1]/text()")[
0].strip() # 110
service_info['可领取预约金优惠券'] = []
service_info['可用尾款券'] = []
for vip_info in base_info.xpath("div/dl[@class='base-param']/dd[@class='app-vip']/div"):
vip_str_info = ""
vip_title = vip_info.xpath("div[@class='label']/text()")[0].strip() if vip_info.xpath(
"div[@class='label']/text()") else ""
if vip_title in ["支持分期"]:
vip_data = vip_info.xpath("div[@class='text']/text()")[0].strip() if vip_info.xpath(
"div[@class='text']/text()") else ""
vip_str_info += vip_title + ":" + vip_data
elif vip_title in ["尾款红包"]:
vip_youhui = []
for youhui in vip_info.xpath("div[@class='text']/span"):
vip_data = youhui.xpath("em/text()")
vip_youhui.append(vip_data[0] + "元红包满" + vip_data[1] + "可用")
vip_str_info += vip_title + ":" + " ".join(vip_youhui)
elif vip_title in ["氧分抵扣"]:
vip_data = vip_info.xpath("div[@class='text']/text()")[0].strip() if vip_info.xpath(
"div[@class='text']/text()") else ""
vip_money = vip_info.xpath("div[@class='text']/em/text()")[0].strip() if vip_info.xpath(
"div[@class='text']/em/text()") else ""
vip_str_info += vip_title + ":" + vip_data + str(vip_money) + "元"
if res_json:
res_json = res_json.xpath(
"/html[1]/body[1]/div[@class='page-content']"
"/div[@class='w1000']/div[@class='detail-wrap']/div[@class='width-control']/div"
)
service_info['链接'] = url
for base_info in res_json:
if "basic-info" in base_info.xpath("div/@class"):
service_info["美购名称"] = str(base_info.xpath("div/h1/text()")[0].strip())
service_info["sku活动价"] = base_info.xpath("div/div[@class='base-price']/em/text()")[0].strip() # 980
service_info["sku原价"] = base_info.xpath("div/div[@class='base-price']/del/text()")[0].strip() # 1980
service_info["销量"] = base_info.xpath("div/div[@class='base-relation']/div[3]/em/text()")[
0].strip() # 110
service_info["机构等级"] = base_info.xpath("div/div[@class='base-relation']/div[1]/text()")[
0].strip() # 110
service_info['可领取预约金优惠券'] = []
service_info['可用尾款券'] = []
for vip_info in base_info.xpath("div/dl[@class='base-param']/dd[@class='app-vip']/div"):
vip_str_info = ""
vip_title = vip_info.xpath("div[@class='label']/text()")[0].strip() if vip_info.xpath(
"div[@class='label']/text()") else ""
if vip_title in ["支持分期"]:
vip_data = vip_info.xpath("div[@class='text']/text()")[0].strip() if vip_info.xpath(
"div[@class='text']/text()") else ""
vip_str_info += vip_title + ":" + vip_data
elif vip_title in ["尾款红包"]:
vip_youhui = []
for youhui in vip_info.xpath("div[@class='text']/span"):
vip_data = youhui.xpath("em/text()")
vip_youhui.append(vip_data[0] + "元红包满" + vip_data[1] + "可用")
vip_str_info += vip_title + ":" + " ".join(vip_youhui)
elif vip_title in ["氧分抵扣"]:
vip_data = vip_info.xpath("div[@class='text']/text()")[0].strip() if vip_info.xpath(
"div[@class='text']/text()") else ""
vip_money = vip_info.xpath("div[@class='text']/em/text()")[0].strip() if vip_info.xpath(
"div[@class='text']/em/text()") else ""
vip_str_info += vip_title + ":" + vip_data + str(vip_money) + "元"
else:
pass
service_info['可领取预约金优惠券'].append(vip_str_info)
for pay_info in base_info.xpath("div/div[@class='base-buy']/div[@class='price-box']"):
deposit_title = pay_info.xpath("span/i/text()")[0].strip()
deposit_price = pay_info.xpath("span/em/text()")[0].strip()
to_pay_title = pay_info.xpath("p/text()")[0].strip()
to_pay_price = pay_info.xpath("p/span/text()")[0].strip()
service_info['可用尾款券'].append(
deposit_title + ":" + deposit_price + "," + to_pay_title + ":" + to_pay_price
)
else:
pass
service_info['可领取预约金优惠券'].append(vip_str_info)
for pay_info in base_info.xpath("div/div[@class='base-buy']/div[@class='price-box']"):
deposit_title = pay_info.xpath("span/i/text()")[0].strip()
deposit_price = pay_info.xpath("span/em/text()")[0].strip()
to_pay_title = pay_info.xpath("p/text()")[0].strip()
to_pay_price = pay_info.xpath("p/span/text()")[0].strip()
service_info['可用尾款券'].append(
deposit_title + ":" + deposit_price + "," + to_pay_title + ":" + to_pay_price
)
else:
pass
else:
pass
return service_info
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment