Commit 47a96c79 authored by 李小芳's avatar 李小芳

add

parent 19399a66
...@@ -58,98 +58,101 @@ def send_email_tome(): ...@@ -58,98 +58,101 @@ def send_email_tome():
def get_service_info(city_id=-1, keyword="", city_name="", all_skuids=[], get_data_file=None): def get_service_info(city_id=-1, keyword="", city_name="", all_skuids=[], get_data_file=None):
print("get_service_info") print("get_service_info")
url = 'https://api.soyoung.com/v8/superList/index' url = 'https://api.soyoung.com/v8/superList/index'
break_flat = False
other_city_count = 0
for page in range(1, 500): for page in range(1, 500):
data = {'_time': '1626769752', if break_flat == False and other_city_count < 100:
'ab_id': 'C521C79519A5D544390E60FEA08B32DB', data = {'_time': '1626769752',
"app_id": 42, 'ab_id': 'C521C79519A5D544390E60FEA08B32DB',
"area_belong": 4, "app_id": 42,
"channel": 1, "area_belong": 4,
"cityId": str(city_id), "channel": 1,
"device_id": 196374256, "cityId": str(city_id),
"device_model": "iPhone12,1", "device_id": 196374256,
'device_version': '13.6.1', "device_model": "iPhone12,1",
"event": "--Boundary+D46DCD61FE6FA268", 'device_version': '13.6.1',
"filter": {}, "event": "--Boundary+D46DCD61FE6FA268",
"from_site": 1, "filter": {},
"gps_city_id": 1, "from_site": 1,
"idfa": "057F28DF-20B8-488F-A285-931367FCC110", "gps_city_id": 1,
"is_tf": 0, "idfa": "057F28DF-20B8-488F-A285-931367FCC110",
"item_id": "--Boundary+D46DCD61FE6FA268", "is_tf": 0,
"keyword": str(keyword), "item_id": "--Boundary+D46DCD61FE6FA268",
"list_name": "sy_app_superlist_search_page", "keyword": str(keyword),
"lver": "8.28.2", "list_name": "sy_app_superlist_search_page",
# "menu1_id": "--Boundary+D46DCD61FE6FA268", "lver": "8.28.2",
# "menu2_id": "--Boundary+D46DCD61FE6FA268", # "menu1_id": "--Boundary+D46DCD61FE6FA268",
"page": page, # "menu2_id": "--Boundary+D46DCD61FE6FA268",
"page_size": 20, "page": page,
"push_app_id": 42, "page_size": 20,
"request_id": "14d1e2b53ca644242ec7ccd7316a0aa2", "push_app_id": 42,
"s_mei_device_id": "20200317131719d8bcbc37c54be511421dc3ebf7f1d0a801036b566bd47092", "request_id": "14d1e2b53ca644242ec7ccd7316a0aa2",
"s_meng_device_id": "D2VCzq4o472Ur7QtdVY6RlcjO6h3455JlJ+OC39JcQC7sX6a", "s_mei_device_id": "20200317131719d8bcbc37c54be511421dc3ebf7f1d0a801036b566bd47092",
"schemecard": "--Boundary+D46DCD61FE6FA268", "s_meng_device_id": "D2VCzq4o472Ur7QtdVY6RlcjO6h3455JlJ+OC39JcQC7sX6a",
# "sub_tab": "--Boundary+D46DCD61FE6FA268", "schemecard": "--Boundary+D46DCD61FE6FA268",
"sys": 1, # "sub_tab": "--Boundary+D46DCD61FE6FA268",
"tab": "mix", "sys": 1,
"uid": "48804194", "tab": "mix",
"vistor_uid": "", "uid": "48804194",
"xy_device_token": "33fa06111dea535c88cc07521f2e466c91", "vistor_uid": "",
'xy_sign': "Z1VfaYFXrpWBPeizj2VGeQ%3D%3D", "xy_device_token": "33fa06111dea535c88cc07521f2e466c91",
"xy_token": "ad970db3d79f0833d1d25d3942068585" 'xy_sign': "Z1VfaYFXrpWBPeizj2VGeQ%3D%3D",
} "xy_token": "ad970db3d79f0833d1d25d3942068585"
s = random.random() }
time.sleep(s) s = random.random()
count_qita = 0 time.sleep(s)
response_res = requests.post(url, data, verify=False) response_res = requests.post(url, data, verify=False)
if response_res.status_code == 200 and response_res.text and count_qita <= 10: if response_res.status_code == 200 and response_res.text:
response = json.loads(response_res.text) response = json.loads(response_res.text)
responseData = response.get("responseData", {}).get("data") responseData = response.get("responseData", {}).get("data")
for item in responseData: for item in responseData:
if item.get("type") == "feed_area": if item.get("type") == "feed_area":
if item.get("items", {}).get("feed_list", []): if item.get("items", {}).get("feed_list", []):
for data in item.get("items", {}).get("feed_list", []): for data in item.get("items", {}).get("feed_list", []):
if data.get("type") == "feed_shop_diallel": if data.get("type") == "feed_shop_diallel":
for service in data.get("items", []): for service in data.get("items", []):
service_data = service.get("data") service_data = service.get("data")
if str(service_data.get("district_2")) == str(city_id): if str(service_data.get("district_2")) == str(city_id):
service_info = dict() service_info = dict()
service_info['skuid'] = service_data.get("pid") service_info['skuid'] = service_data.get("pid")
service_info['美购id'] = service_data.get("spu_id") service_info['美购id'] = service_data.get("spu_id")
# service_info['医生名'] = service_data.get("doctor_name") # service_info['医生名'] = service_data.get("doctor_name")
service_info['医院名称'] = service_data.get("hospital_name") service_info['医院名称'] = service_data.get("hospital_name")
service_info['sku原价'] = service_data.get("price_origin") service_info['sku原价'] = service_data.get("price_origin")
service_info['sku活动价'] = service_data.get("price_online") service_info['sku活动价'] = service_data.get("price_online")
service_info['机构等级'] = service_data.get("avg_score") service_info['机构等级'] = service_data.get("avg_score")
service_info['美购名称'] = service_data.get("title") service_info['美购名称'] = service_data.get("title")
service_info['销量'] = service_data.get("order_cnt") service_info['销量'] = service_data.get("order_cnt")
icon_data = service_data.get("icons", []) icon_data = service_data.get("icons", [])
service_info['可用尾款券'] = service_data.get("wei_kuan_list", []) service_info['可用尾款券'] = service_data.get("wei_kuan_list", [])
service_info['可领取预约金优惠券'] = [ service_info['可领取预约金优惠券'] = [
service_data.get("new_user_text", "")] if service_data.get( service_data.get("new_user_text", "")] if service_data.get(
"new_user_text", "") else [] "new_user_text", "") else []
for item in icon_data: for item in icon_data:
if "预约金满" in item: if "预约金满" in item:
service_info['可领取预约金优惠券'].append(item) service_info['可领取预约金优惠券'].append(item)
elif "尾款满" in item: elif "尾款满" in item:
service_info['可用尾款券'].append(item) service_info['可用尾款券'].append(item)
service_info['query词'] = keyword service_info['query词'] = keyword
service_info['城市'] = city_name service_info['城市'] = city_name
service_info['平台'] = "新氧" service_info['平台'] = "新氧"
service_info['链接'] = "https://y.soyoung.com/cp{}".format(service_info['skuid']) service_info['链接'] = "https://y.soyoung.com/cp{}".format(
print(service_info) service_info['skuid'])
if service_data.get("pid") not in all_skuids: print(service_info)
get_data_file.write(json.dumps(service_info)) if service_data.get("pid") not in all_skuids:
get_data_file.write("\n") get_data_file.write(json.dumps(service_info))
else: get_data_file.write("\n")
count_qita += 1 else:
else: other_city_count += 1
print("break") else:
break print("break")
elif count_qita > 10: break_flat = True
print(city_id, keyword, "本地已爬完") break
break else:
print(city_id, keyword, "爬取失败")
else: else:
print(city_id, keyword, "爬取失败") print(page,city_id, keyword, "本地已爬完")
def main(city_tag=""): def main(city_tag=""):
...@@ -162,7 +165,7 @@ def main(city_tag=""): ...@@ -162,7 +165,7 @@ def main(city_tag=""):
cityIdMapping = {'北京': '1', '上海': '9', '重庆': '22', '广州市': '289', '深圳市': '291', '郑州市': '240', '武汉市': '258', cityIdMapping = {'北京': '1', '上海': '9', '重庆': '22', '广州市': '289', '深圳市': '291', '郑州市': '240', '武汉市': '258',
'长沙市': '275', '南京市': '162', '成都市': '385', '西安市': '438', '杭州市': '175'} '长沙市': '275', '南京市': '162', '成都市': '385', '西安市': '438', '杭州市': '175'}
\
# keywords = ['瘦脸针', '双眼皮', '光子嫩肤', '吸脂', '水光针', '玻尿酸', '热玛吉', '脱毛', '瘦腿针', '鼻综合', '瘦肩针', '下颌角', '线雕', '超声刀', # keywords = ['瘦脸针', '双眼皮', '光子嫩肤', '吸脂', '水光针', '玻尿酸', '热玛吉', '脱毛', '瘦腿针', '鼻综合', '瘦肩针', '下颌角', '线雕', '超声刀',
# '美白针', # '美白针',
# '眼综合', '隆鼻', '菲洛嘉', '下巴', '热拉提', '点阵激光', '面部吸脂', '开内眼角', '嗨体', '牙齿矫正', '皮秒', '超皮秒', '植发', # '眼综合', '隆鼻', '菲洛嘉', '下巴', '热拉提', '点阵激光', '面部吸脂', '开内眼角', '嗨体', '牙齿矫正', '皮秒', '超皮秒', '植发',
...@@ -175,7 +178,7 @@ def main(city_tag=""): ...@@ -175,7 +178,7 @@ def main(city_tag=""):
# '艾莉薇', '抽脂', '瘦腿', '玻尿酸丰下巴', '纹眉', '伊肤泉微针', '美白', '厚唇改薄', '面部线雕', '祛疤', '伊婉V', '超皮秒祛斑', '除皱针', # '艾莉薇', '抽脂', '瘦腿', '玻尿酸丰下巴', '纹眉', '伊肤泉微针', '美白', '厚唇改薄', '面部线雕', '祛疤', '伊婉V', '超皮秒祛斑', '除皱针',
# '开眼角', # '开眼角',
# '海菲秀', '假体下巴', '刷酸', '泪沟', '拉皮', '全身吸脂', '缩鼻翼'] # '海菲秀', '假体下巴', '刷酸', '泪沟', '拉皮', '全身吸脂', '缩鼻翼']
keywords=['欧洲之星fotona4d'] keywords = ['欧洲之星fotona4d']
# city_list = ["北京", "上海", "广州市", "深圳市", "杭州市", "成都市", "重庆", "南京市", "武汉市", "长沙市", "郑州市", "西安市"] # city_list = ["北京", "上海", "广州市", "深圳市", "杭州市", "成都市", "重庆", "南京市", "武汉市", "长沙市", "郑州市", "西安市"]
......
...@@ -190,14 +190,15 @@ if __name__ == '__main__': ...@@ -190,14 +190,15 @@ if __name__ == '__main__':
# '埋线双眼皮', '菲洛嘉水光针', '双眼皮修复', '欧洲之星', '脂肪填充', # '埋线双眼皮', '菲洛嘉水光针', '双眼皮修复', '欧洲之星', '脂肪填充',
# '溶脂针', '法令纹', '鼻基底','全切双眼皮', '颧骨内推', # '溶脂针', '法令纹', '鼻基底','全切双眼皮', '颧骨内推',
# '鼻子', '抽脂', '光子嫩肤m22', '下颌缘提升', 'm22', # '鼻子', '抽脂', '光子嫩肤m22', '下颌缘提升', 'm22',
# '鼻翼缩小', '欧洲之星fotona4d', '自体脂肪全面部填充', '玻尿酸丰唇', '除皱针', # '鼻翼缩小', 'fotona4d欧洲之星', '自体脂肪全面部填充', '玻尿酸丰唇', '除皱针',
# '水光', '嗨体祛颈纹', # '水光', '嗨体祛颈纹','假体隆胸', '英诺小棕瓶', '黄金微雕',
# '眼袋', '乔雅登',
keywords = ['假体隆胸', '英诺小棕瓶', '黄金微雕',
'眼袋', '乔雅登', '除皱', '颧骨', '艾莉薇', keywords = [
'瘦腿', '玻尿酸丰下巴', '纹眉', '伊肤泉微针', '美白', '除皱', '颧骨', '艾莉薇',
'厚唇改薄', '面部线雕', '祛疤', '伊婉V', '超皮秒祛斑', '瘦腿', '玻尿酸丰下巴', '纹眉', '伊肤泉微针', '美白',
'开眼角', '海菲秀', '假体下巴', '刷酸', '泪沟', '拉皮', '全身吸脂', '缩鼻翼'] '厚唇改薄', '面部线雕', '祛疤', '伊婉V', '超皮秒祛斑',
'开眼角', '海菲秀', '假体下巴', '刷酸', '泪沟', '拉皮', '全身吸脂', '缩鼻翼']
city_list = ["北京", "上海", "广州市", "深圳市", "杭州市", "成都市", "重庆", "南京市", "武汉市", "长沙市", "郑州市", "西安市"] city_list = ["北京", "上海", "广州市", "深圳市", "杭州市", "成都市", "重庆", "南京市", "武汉市", "长沙市", "郑州市", "西安市"]
......
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment