Commit 47a96c79 authored by 李小芳's avatar 李小芳

add

parent 19399a66
......@@ -58,98 +58,101 @@ def send_email_tome():
def get_service_info(city_id=-1, keyword="", city_name="", all_skuids=[], get_data_file=None):
print("get_service_info")
url = 'https://api.soyoung.com/v8/superList/index'
break_flat = False
other_city_count = 0
for page in range(1, 500):
data = {'_time': '1626769752',
'ab_id': 'C521C79519A5D544390E60FEA08B32DB',
"app_id": 42,
"area_belong": 4,
"channel": 1,
"cityId": str(city_id),
"device_id": 196374256,
"device_model": "iPhone12,1",
'device_version': '13.6.1',
"event": "--Boundary+D46DCD61FE6FA268",
"filter": {},
"from_site": 1,
"gps_city_id": 1,
"idfa": "057F28DF-20B8-488F-A285-931367FCC110",
"is_tf": 0,
"item_id": "--Boundary+D46DCD61FE6FA268",
"keyword": str(keyword),
"list_name": "sy_app_superlist_search_page",
"lver": "8.28.2",
# "menu1_id": "--Boundary+D46DCD61FE6FA268",
# "menu2_id": "--Boundary+D46DCD61FE6FA268",
"page": page,
"page_size": 20,
"push_app_id": 42,
"request_id": "14d1e2b53ca644242ec7ccd7316a0aa2",
"s_mei_device_id": "20200317131719d8bcbc37c54be511421dc3ebf7f1d0a801036b566bd47092",
"s_meng_device_id": "D2VCzq4o472Ur7QtdVY6RlcjO6h3455JlJ+OC39JcQC7sX6a",
"schemecard": "--Boundary+D46DCD61FE6FA268",
# "sub_tab": "--Boundary+D46DCD61FE6FA268",
"sys": 1,
"tab": "mix",
"uid": "48804194",
"vistor_uid": "",
"xy_device_token": "33fa06111dea535c88cc07521f2e466c91",
'xy_sign': "Z1VfaYFXrpWBPeizj2VGeQ%3D%3D",
"xy_token": "ad970db3d79f0833d1d25d3942068585"
}
s = random.random()
time.sleep(s)
count_qita = 0
response_res = requests.post(url, data, verify=False)
if response_res.status_code == 200 and response_res.text and count_qita <= 10:
response = json.loads(response_res.text)
responseData = response.get("responseData", {}).get("data")
for item in responseData:
if item.get("type") == "feed_area":
if item.get("items", {}).get("feed_list", []):
for data in item.get("items", {}).get("feed_list", []):
if data.get("type") == "feed_shop_diallel":
for service in data.get("items", []):
service_data = service.get("data")
if str(service_data.get("district_2")) == str(city_id):
service_info = dict()
service_info['skuid'] = service_data.get("pid")
service_info['美购id'] = service_data.get("spu_id")
# service_info['医生名'] = service_data.get("doctor_name")
service_info['医院名称'] = service_data.get("hospital_name")
service_info['sku原价'] = service_data.get("price_origin")
service_info['sku活动价'] = service_data.get("price_online")
service_info['机构等级'] = service_data.get("avg_score")
service_info['美购名称'] = service_data.get("title")
service_info['销量'] = service_data.get("order_cnt")
icon_data = service_data.get("icons", [])
service_info['可用尾款券'] = service_data.get("wei_kuan_list", [])
service_info['可领取预约金优惠券'] = [
service_data.get("new_user_text", "")] if service_data.get(
"new_user_text", "") else []
for item in icon_data:
if "预约金满" in item:
service_info['可领取预约金优惠券'].append(item)
elif "尾款满" in item:
service_info['可用尾款券'].append(item)
service_info['query词'] = keyword
service_info['城市'] = city_name
service_info['平台'] = "新氧"
service_info['链接'] = "https://y.soyoung.com/cp{}".format(service_info['skuid'])
print(service_info)
if service_data.get("pid") not in all_skuids:
get_data_file.write(json.dumps(service_info))
get_data_file.write("\n")
else:
count_qita += 1
else:
print("break")
break
elif count_qita > 10:
print(city_id, keyword, "本地已爬完")
break
if break_flat == False and other_city_count < 100:
data = {'_time': '1626769752',
'ab_id': 'C521C79519A5D544390E60FEA08B32DB',
"app_id": 42,
"area_belong": 4,
"channel": 1,
"cityId": str(city_id),
"device_id": 196374256,
"device_model": "iPhone12,1",
'device_version': '13.6.1',
"event": "--Boundary+D46DCD61FE6FA268",
"filter": {},
"from_site": 1,
"gps_city_id": 1,
"idfa": "057F28DF-20B8-488F-A285-931367FCC110",
"is_tf": 0,
"item_id": "--Boundary+D46DCD61FE6FA268",
"keyword": str(keyword),
"list_name": "sy_app_superlist_search_page",
"lver": "8.28.2",
# "menu1_id": "--Boundary+D46DCD61FE6FA268",
# "menu2_id": "--Boundary+D46DCD61FE6FA268",
"page": page,
"page_size": 20,
"push_app_id": 42,
"request_id": "14d1e2b53ca644242ec7ccd7316a0aa2",
"s_mei_device_id": "20200317131719d8bcbc37c54be511421dc3ebf7f1d0a801036b566bd47092",
"s_meng_device_id": "D2VCzq4o472Ur7QtdVY6RlcjO6h3455JlJ+OC39JcQC7sX6a",
"schemecard": "--Boundary+D46DCD61FE6FA268",
# "sub_tab": "--Boundary+D46DCD61FE6FA268",
"sys": 1,
"tab": "mix",
"uid": "48804194",
"vistor_uid": "",
"xy_device_token": "33fa06111dea535c88cc07521f2e466c91",
'xy_sign': "Z1VfaYFXrpWBPeizj2VGeQ%3D%3D",
"xy_token": "ad970db3d79f0833d1d25d3942068585"
}
s = random.random()
time.sleep(s)
response_res = requests.post(url, data, verify=False)
if response_res.status_code == 200 and response_res.text:
response = json.loads(response_res.text)
responseData = response.get("responseData", {}).get("data")
for item in responseData:
if item.get("type") == "feed_area":
if item.get("items", {}).get("feed_list", []):
for data in item.get("items", {}).get("feed_list", []):
if data.get("type") == "feed_shop_diallel":
for service in data.get("items", []):
service_data = service.get("data")
if str(service_data.get("district_2")) == str(city_id):
service_info = dict()
service_info['skuid'] = service_data.get("pid")
service_info['美购id'] = service_data.get("spu_id")
# service_info['医生名'] = service_data.get("doctor_name")
service_info['医院名称'] = service_data.get("hospital_name")
service_info['sku原价'] = service_data.get("price_origin")
service_info['sku活动价'] = service_data.get("price_online")
service_info['机构等级'] = service_data.get("avg_score")
service_info['美购名称'] = service_data.get("title")
service_info['销量'] = service_data.get("order_cnt")
icon_data = service_data.get("icons", [])
service_info['可用尾款券'] = service_data.get("wei_kuan_list", [])
service_info['可领取预约金优惠券'] = [
service_data.get("new_user_text", "")] if service_data.get(
"new_user_text", "") else []
for item in icon_data:
if "预约金满" in item:
service_info['可领取预约金优惠券'].append(item)
elif "尾款满" in item:
service_info['可用尾款券'].append(item)
service_info['query词'] = keyword
service_info['城市'] = city_name
service_info['平台'] = "新氧"
service_info['链接'] = "https://y.soyoung.com/cp{}".format(
service_info['skuid'])
print(service_info)
if service_data.get("pid") not in all_skuids:
get_data_file.write(json.dumps(service_info))
get_data_file.write("\n")
else:
other_city_count += 1
else:
print("break")
break_flat = True
break
else:
print(city_id, keyword, "爬取失败")
else:
print(city_id, keyword, "爬取失败")
print(page,city_id, keyword, "本地已爬完")
def main(city_tag=""):
......@@ -162,7 +165,7 @@ def main(city_tag=""):
cityIdMapping = {'北京': '1', '上海': '9', '重庆': '22', '广州市': '289', '深圳市': '291', '郑州市': '240', '武汉市': '258',
'长沙市': '275', '南京市': '162', '成都市': '385', '西安市': '438', '杭州市': '175'}
\
# keywords = ['瘦脸针', '双眼皮', '光子嫩肤', '吸脂', '水光针', '玻尿酸', '热玛吉', '脱毛', '瘦腿针', '鼻综合', '瘦肩针', '下颌角', '线雕', '超声刀',
# '美白针',
# '眼综合', '隆鼻', '菲洛嘉', '下巴', '热拉提', '点阵激光', '面部吸脂', '开内眼角', '嗨体', '牙齿矫正', '皮秒', '超皮秒', '植发',
......@@ -175,7 +178,7 @@ def main(city_tag=""):
# '艾莉薇', '抽脂', '瘦腿', '玻尿酸丰下巴', '纹眉', '伊肤泉微针', '美白', '厚唇改薄', '面部线雕', '祛疤', '伊婉V', '超皮秒祛斑', '除皱针',
# '开眼角',
# '海菲秀', '假体下巴', '刷酸', '泪沟', '拉皮', '全身吸脂', '缩鼻翼']
keywords=['欧洲之星fotona4d']
keywords = ['欧洲之星fotona4d']
# city_list = ["北京", "上海", "广州市", "深圳市", "杭州市", "成都市", "重庆", "南京市", "武汉市", "长沙市", "郑州市", "西安市"]
......
......@@ -190,14 +190,15 @@ if __name__ == '__main__':
# '埋线双眼皮', '菲洛嘉水光针', '双眼皮修复', '欧洲之星', '脂肪填充',
# '溶脂针', '法令纹', '鼻基底','全切双眼皮', '颧骨内推',
# '鼻子', '抽脂', '光子嫩肤m22', '下颌缘提升', 'm22',
# '鼻翼缩小', '欧洲之星fotona4d', '自体脂肪全面部填充', '玻尿酸丰唇', '除皱针',
# '水光', '嗨体祛颈纹',
keywords = ['假体隆胸', '英诺小棕瓶', '黄金微雕',
'眼袋', '乔雅登', '除皱', '颧骨', '艾莉薇',
'瘦腿', '玻尿酸丰下巴', '纹眉', '伊肤泉微针', '美白',
'厚唇改薄', '面部线雕', '祛疤', '伊婉V', '超皮秒祛斑',
'开眼角', '海菲秀', '假体下巴', '刷酸', '泪沟', '拉皮', '全身吸脂', '缩鼻翼']
# '鼻翼缩小', 'fotona4d欧洲之星', '自体脂肪全面部填充', '玻尿酸丰唇', '除皱针',
# '水光', '嗨体祛颈纹','假体隆胸', '英诺小棕瓶', '黄金微雕',
# '眼袋', '乔雅登',
keywords = [
'除皱', '颧骨', '艾莉薇',
'瘦腿', '玻尿酸丰下巴', '纹眉', '伊肤泉微针', '美白',
'厚唇改薄', '面部线雕', '祛疤', '伊婉V', '超皮秒祛斑',
'开眼角', '海菲秀', '假体下巴', '刷酸', '泪沟', '拉皮', '全身吸脂', '缩鼻翼']
city_list = ["北京", "上海", "广州市", "深圳市", "杭州市", "成都市", "重庆", "南京市", "武汉市", "长沙市", "郑州市", "西安市"]
......
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment