Commit 4309e098 authored by 李小芳's avatar 李小芳

update

parent 4d1d6e83
......@@ -17,21 +17,21 @@
</Attribute>
</value>
</entry>
<entry key="/dev/xinyang_ask_tag/soyoung_service_1.csv">
<entry key="/dev/xinyang_ask_tag/soyoung_service_cika.csv">
<value>
<Attribute>
<option name="separator" value="," />
</Attribute>
</value>
</entry>
<entry key="/dev/xinyang_ask_tag/soyoung_service_cika.csv">
<entry key="/dev/xinyang_ask_tag/医院尾款劵数据.csv">
<value>
<Attribute>
<option name="separator" value="," />
</Attribute>
</value>
</entry>
<entry key="/dev/xinyang_ask_tag/soyoung_service_write_cika.csv">
<entry key="/dev/xinyang_ask_tag/更美机构商务信息.csv">
<value>
<Attribute>
<option name="separator" value="," />
......
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Encoding">
<file url="file://$PROJECT_DIR$/dev/xinyang_ask_tag/更美机构商务信息.csv" charset="UTF-8" />
</component>
</project>
\ No newline at end of file
......@@ -5,12 +5,13 @@ import re
from pypinyin import lazy_pinyin
import time
import random
import csv
monkey.patch_all()
def get_service_info(spu_id, url, ids_bug):
time.sleep(random.uniform(1, 2))
time.sleep(random.uniform(2, 3))
url = url + "?tab=0&rj=0"
headers = {
......@@ -56,7 +57,7 @@ def gevent_test():
gevent.joinall(tasks)
def get_cika_info_to_csv():
def get_cika_info_to_csv(get_data=False):
read_file = open("soyoung_service.csv", "r", encoding="utf-8")
write_file = open("soyoung_service_cika.csv", "a+", encoding="utf-8")
all_cika_title = list()
......@@ -66,7 +67,7 @@ def get_cika_info_to_csv():
try:
cika_price_dict = dict()
print("index:", item.strip().split(",")[0])
if int(item.strip().split(",")[0]) > 34645:
if int(item.strip().split(",")[0]) > 64526:
service_id = item.strip().split(",")[2]
url = item.strip().split(",")[-1]
if service_id in have_read_service:
......@@ -74,12 +75,16 @@ def get_cika_info_to_csv():
else:
have_read_service.append(service_id)
sub_title_dict = get_service_info(str(service_id), url, ids_bug)
if sub_title_dict:
all_cika_title.extend(sub_title_dict.keys())
cika_price_dict[service_id] = sub_title_dict
print(cika_price_dict)
write_file.write(str(cika_price_dict))
write_file.write("\n")
if get_data == False:
if sub_title_dict:
all_cika_title.extend(sub_title_dict.keys())
cika_price_dict[service_id] = sub_title_dict
print(cika_price_dict)
write_file.write(str(cika_price_dict))
write_file.write("\n")
else:
return sub_title_dict
except Exception as e:
print(e)
pass
......@@ -89,5 +94,42 @@ def get_cika_info_to_csv():
write_file.close()
def read_cika_info():
all_cika_title = {}
cika_file = open("soyoung_service_cika.csv", "r", encoding="utf-8")
for item in cika_file.readlines():
cika = eval(item)
for key, values in cika.items():
all_cika_title[key] = values
print(all_cika_title)
cika_file.close()
return all_cika_title
def np_write_csv_data():
all_cika_title = read_cika_info()
write_file = open("soyoung_service_write_cika.csv", "a+", encoding="utf-8")
# /Users/edz/Desktop/xinyang.csv
with open("soyoung_service.csv", encoding='utf-8') as f:
reader = csv.reader(f)
header_row = next(reader)
print(header_row)
for row in reader:
service_id = row[2]
cika_info = all_cika_title.get(service_id)
if cika_info:
print('cika_info:', cika_info)
write_file.write(str(row.append(cika_info)))
write_file.write("\n")
else:
sub_title_dict = get_cika_info_to_csv(get_data=True)
print('sub_title_dict:', sub_title_dict)
write_file.write(str(row.append(sub_title_dict)))
write_file.write("\n")
write_file.close()
if __name__ == '__main__':
get_cika_info_to_csv()
import csv
def read_cika_info():
all_cika_title = {}
cika_file = open("soyoung_service_cika.csv", "r", encoding="utf-8")
for item in cika_file.readlines():
cika = eval(item)
for key, values in cika.items():
all_cika_title[key] = values
print(all_cika_title)
cika_file.close()
return all_cika_title
def np_write_csv_data():
all_cika_title = read_cika_info()
write_file = open("soyoung_service_write_cika.csv", "a+", encoding="utf-8")
# /Users/edz/Desktop/xinyang.csv
with open("soyoung_service.csv", encoding='utf-8') as f:
reader = csv.reader(f)
header_row = next(reader)
print(header_row)
for row in reader:
service_id = row[2]
cika_info = all_cika_title.get(service_id)
if cika_info:
print('cika_info:', cika_info)
write_file.write(str(row.append(cika_info)))
write_file.write("\n")
write_file.close()
if __name__ == '__main__':
np_write_csv_data()
city_info = [
{
"name": "热门城市",
"selected": 0,
"son": [
{
"id": "1",
"name": "北京",
"value_cnt": "12676",
"pinyin": "beijing",
"selected": 0
},
{
"id": "385",
"name": "成都",
"value_cnt": "6715",
"pinyin": "chengdu",
"selected": 0
},
{
"id": "9",
"name": "上海",
"value_cnt": "6172",
"pinyin": "shanghai",
"selected": 0
},
{
"id": "291",
"name": "深圳",
"value_cnt": "5686",
"pinyin": "shenzhen",
"selected": 0
},
{
"id": "289",
"name": "广州",
"value_cnt": "5428",
"pinyin": "guangzhou",
"selected": 0
},
{
"id": "22",
"name": "重庆",
"value_cnt": "5213",
"pinyin": "chongqing",
"selected": 0
},
{
"id": "175",
"name": "杭州",
"value_cnt": "3238",
"pinyin": "hangzhou",
"selected": 0
},
{
"id": "162",
"name": "南京",
"value_cnt": "2479",
"pinyin": "nanjing",
"selected": 0
},
{
"id": "166",
"name": "苏州",
"value_cnt": "1630",
"pinyin": "suzhou166",
"selected": 0
},
{
"id": "186",
"name": "合肥",
"value_cnt": "1584",
"pinyin": "hefei",
"selected": 0
},
{
"id": 258,
"name": "武汉",
"pinyin": "wuhan"
},
{
"id": 438,
"name": "西安",
"pinyin": "xian"
}
]
},
{
"id": 0,
"name": "全部城市",
"selected": 1,
"letter": "Q",
"pinyin": "quanguo",
"first_letters": "Qg",
"allname": "全部城市",
"son": [
]
},
{
"id": "1",
"name": "北京",
......@@ -3942,91 +3844,19 @@ city_info = [
cityId_mapping = dict()
# city = ["北京", "上海", "广州市", "深圳市", "杭州市", "成都市", "重庆", "南京市", "武汉市", "长沙市", "郑州市", "西安市"]
city = ['东莞', '福州', '贵阳', '海口', '合肥', '昆明', '南昌', '南宁', '宁波', '三亚', '沈阳', '苏州', '天津', '乌鲁木齐', '阿拉善', '安康', '安宁', '安庆',
'鞍山', '安顺', '安阳', '百色', '白沙', '白山', '保定', '宝鸡', '保山', '包头', '巴彦淖尔', '巴中', '北海', '蚌埠', '本溪', '毕节', '滨州', '亳州',
'沧州', '长春', '常德', '常熟', '长治', '常州', '巢湖', '朝阳', '潮州', '承德', '郴州', '赤峰', '池州', '楚雄', '滁州', '大理', '大连', '丹东',
'大庆', '大同', '达州', '德宏', '德阳', '德州', '东方', '东京', '东阳', '东营', '鄂尔多斯', '恩施', '佛山', '抚顺', '阜新', '阜阳', '抚州', '甘南',
'赣州', '广安', '广元', '贵港', '桂林', '果洛', '固原', '哈尔滨', '海南', '邯郸', '汉中', '鹤岗', '黑河', '衡水', '衡阳', '河源', '菏泽', '红河',
'淮安', '淮北', '怀化', '淮南', '黄冈', '黄山', '黄石', '呼和浩特', '惠州', '葫芦岛', '呼伦贝尔', '湖州', '佳木斯', '吉安', '江门', '江油', '焦作',
'嘉兴', '揭阳', '吉林', '济南', '晋城', '景德镇', '荆门', '荆州', '金华', '济宁', '晋中', '锦州', '九江', '鸡西', '济州岛', '开封', '喀什', '克拉玛依',
'库尔勒', '昆山', '莱芜', '廊坊', '兰州', '拉萨', '乐山', '凉山', '连云港', '聊城', '辽阳', '辽源', '丽江', '临汾', '临夏', '临沂', '丽水', '六安',
'六盘水', '柳州', '龙岩', '娄底', '漯河', '洛阳', '泸州', '马鞍山', '曼谷', '茂名', '眉山', '梅州', '绵阳', '牡丹江', '南充', '南平', '南通', '南阳',
'内江', '宁德', '攀枝花', '平顶山', '平凉', '萍乡', '普洱', '莆田', '濮阳', '潜江', '青岛', '庆阳', '清远', '秦皇岛', '钦州', '齐齐哈尔', '七台河',
'泉州', '曲靖', '衢州', '日照', '三门峡', '三明', '商丘', '上饶', '汕头', '韶关', '绍兴', '邵阳', '嵊州', '石河子', '石家庄', '十堰', '市中心', '石嘴山',
'首尔', '双鸭山', '四平', '松原', '绥化', '遂宁', '随州', '宿迁', '宿州', '泰安', '台北', '太仓', '太原', '台中', '泰州', '台州', '唐山', '天门',
'天水', '铁岭', '铜川', '通化', '通辽', '铜陵', '铜仁', '潍坊', '威海', '渭南', '温州', '芜湖', '武威', '无锡', '吴忠', '梧州', '厦门', '中国香港',
'湘潭', '湘西', '襄阳', '咸宁', '仙桃', '咸阳', '孝感', '邢台', '悉尼', '西宁', '新加坡', '新乡', '信阳', '新余', '西双版纳', '宣城', '许昌', '徐州',
'雅安', '延安', '延边', '盐城', '阳江', '阳泉', '扬州', '延吉', '烟台', '宜宾', '宜昌', '伊春', '宜春', '银川', '营口', '鹰潭', '义乌', '益阳',
'永州', '岳阳', '玉林', '运城', '玉溪', '枣庄', '张家港', '张家口', '张掖', '漳州', '湛江', '肇庆', '昭通', '镇江', '中山', '周口', '舟山', '珠海',
'驻马店', '株洲', '淄博', '自贡', '资阳', '遵义']
for item in city_info:
if "level" in item.keys():
if 'son' in item.keys():
for level2Item in item.get("son", []):
cityId = level2Item.get("id")
cityName = level2Item.get("name")
level = level2Item.get("level")
if int(level) == 2:
if cityName in city:
cityId_mapping[cityName] = cityId
elif cityName[:-1] in city:
cityId_mapping[cityName] = cityId
elif cityName[:-2] in city:
cityId_mapping[cityName] = cityId
else:
print(cityName)
name = item.get("name", "")
city_id = item.get("id", "")
cityId_mapping[name] = city_id
for level2Item in item.get("son", []):
cityId = level2Item.get("id")
cityName = level2Item.get("name")
cityId_mapping[cityName] = cityId
def invert_dict(d):
return dict([(v, k) for (k, v) in d.items()])
# print(len(city))
# print(cityId_mapping)
# print(len(cityId_mapping))
# print(cityId_mapping.keys())
dict_keys = ['合肥市', '芜湖市', '蚌埠市', '淮南市', '马鞍山市', '淮北市', '铜陵市', '安庆市', '滁州市', '阜阳市', '宿州市', '六安地区', '亳州市', '池州市', '宣城市',
'福州市', '厦门市', '莆田市', '三明市', '泉州市', '漳州市', '南平市', '龙岩市', '宁德市', '韶关市', '东莞市', '中山市', '珠海市', '佛山市', '汕头市',
'江门市', '湛江市', '茂名市', '肇庆市', '惠州市', '梅州市', '河源市', '阳江市', '清远市', '潮州市', '揭阳市', '南宁市', '柳州市', '桂林市', '梧州市',
'北海市', '钦州市', '贵港市', '玉林市', '百色市', '贵阳市', '遵义市', '六盘水市', '铜仁地区', '毕节地区', '兰州市', '张掖市', '平凉市', '庆阳市', '开封市',
'洛阳市', '平顶山市', '安阳市', '新乡市', '焦作市', '濮阳市', '许昌市', '漯河市', '三门峡市', '南阳市', '商丘市', '信阳市', '周口市', '驻马店市', '黄石市',
'荆州市', '宜昌市', '襄阳市', '天门市', '十堰市', '荆门市', '孝感市', '黄冈市', '咸宁市', '随州市', '仙桃市', '潜江市', '株洲市', '衡阳市', '益阳市',
'邵阳市', '湘潭市', '岳阳市', '常德市', '郴州市', '永州市', '怀化市', '娄底市', '石家庄市', '唐山市', '秦皇岛市', '邯郸市', '邢台市', '保定市', '张家口市',
'承德市', '衡水市', '廊坊市', '沧州市', '哈尔滨市', '齐齐哈尔市', '鸡西市', '鹤岗市', '大庆市', '佳木斯市', '七台河市', '牡丹江市', '绥化市', '海口市',
'三亚市', '东方市', '苏州市', '无锡市', '常州市', '南通市', '盐城市', '徐州市', '连云港市', '扬州市', '淮安市', '镇江市', '泰州市', '宿迁市', '南昌市',
'景德镇市', '萍乡市', '九江市', '新余市', '鹰潭市', '赣州市', '吉安市', '宜春市', '抚州市', '上饶市', '长春市', '吉林市', '四平市', '通化市', '白山市',
'松原市', '沈阳市', '大连市', '鞍山市', '抚顺市', '丹东市', '锦州市', '营口市', '阜新市', '辽阳市', '铁岭市', '朝阳市', '葫芦岛市', '呼和浩特市', '包头市',
'赤峰市', '通辽市', '鄂尔多斯市', '呼伦贝尔市', '巴彦淖尔市', '银川市', '西宁市', '自贡市', '绵阳市', '德阳市', '乐山市', '南充市', '攀枝花市', '泸州市',
'广元市', '遂宁市', '内江市', '眉山市', '宜宾市', '广安市', '达州市', '雅安市', '巴中市', '资阳市', '济南市', '青岛市', '枣庄市', '烟台市', '淄博市',
'临沂市', '东营市', '潍坊市', '济宁市', '泰安市', '威海市', '日照市', '德州市', '聊城市', '滨州市', '菏泽市', '宝鸡市', '咸阳市', '汉中市', '渭南市',
'延安市', '安康市', '太原市', '晋中市', '运城市', '大同市', '阳泉市', '长治市', '晋城市', '临汾市', '乌鲁木齐市', '石河子市', '昆明市', '曲靖市', '玉溪市',
'保山市', '昭通市', '丽江市', '宁波市', '温州市', '嘉兴市', '金华市', '台州市', '湖州市', '绍兴市', '舟山市', '衢州市', '丽水市', '台北市', '台中市',
'台北县']
mp = {'合肥市': '186', '芜湖市': '187', '蚌埠市': '188', '淮南市': '189', '马鞍山市': '190', '淮北市': '191', '铜陵市': '192', '安庆市': '193',
'滁州市': '195', '阜阳市': '196', '宿州市': '197', '六安地区': '199', '亳州市': '200', '池州市': '201', '宣城市': '202', '福州市': '203',
'厦门市': '204', '莆田市': '205', '三明市': '206', '泉州市': '207', '漳州市': '208', '南平市': '209', '龙岩市': '210', '宁德市': '211',
'韶关市': '290', '东莞市': '305', '中山市': '306', '珠海市': '292', '佛山市': '294', '汕头市': '293', '江门市': '295', '湛江市': '296',
'茂名市': '297', '肇庆市': '298', '惠州市': '299', '梅州市': '300', '河源市': '302', '阳江市': '303', '清远市': '304', '潮州市': '307',
'揭阳市': '308', '南宁市': '310', '柳州市': '311', '桂林市': '312', '梧州市': '313', '北海市': '314', '钦州市': '316', '贵港市': '317',
'玉林市': '318', '百色市': '319', '贵阳市': '406', '遵义市': '408', '六盘水市': '407', '铜仁地区': '410', '毕节地区': '412', '兰州市': '448',
'张掖市': '454', '平凉市': '455', '庆阳市': '457', '开封市': '241', '洛阳市': '242', '平顶山市': '243', '安阳市': '244', '新乡市': '246',
'焦作市': '247', '濮阳市': '248', '许昌市': '249', '漯河市': '250', '三门峡市': '251', '南阳市': '252', '商丘市': '253', '信阳市': '254',
'周口市': '255', '驻马店市': '256', '黄石市': '259', '荆州市': '266', '宜昌市': '261', '襄阳市': '45068', '天门市': '273', '十堰市': '260',
'荆门市': '264', '孝感市': '265', '黄冈市': '267', '咸宁市': '268', '随州市': '269', '仙桃市': '271', '潜江市': '272', '株洲市': '276',
'衡阳市': '278', '益阳市': '283', '邵阳市': '279', '湘潭市': '277', '岳阳市': '280', '常德市': '281', '郴州市': '284', '永州市': '285',
'怀化市': '286', '娄底市': '287', '石家庄市': '73', '唐山市': '74', '秦皇岛市': '75', '邯郸市': '76', '邢台市': '77', '保定市': '78',
'张家口市': '79', '承德市': '80', '衡水市': '81', '廊坊市': '82', '沧州市': '83', '哈尔滨市': '130', '齐齐哈尔市': '131', '鸡西市': '132',
'鹤岗市': '133', '大庆市': '135', '佳木斯市': '137', '七台河市': '138', '牡丹江市': '139', '绥化市': '141', '海口市': '324', '三亚市': '325',
'东方市': '331', '苏州市': '166', '无锡市': '163', '常州市': '165', '南通市': '167', '盐城市': '170', '徐州市': '164', '连云港市': '168',
'扬州市': '171', '淮安市': '169', '镇江市': '172', '泰州市': '173', '宿迁市': '174', '南昌市': '212', '景德镇市': '213', '萍乡市': '214',
'九江市': '215', '新余市': '216', '鹰潭市': '217', '赣州市': '218', '吉安市': '219', '宜春市': '220', '抚州市': '221', '上饶市': '222',
'长春市': '121', '吉林市': '122', '四平市': '123', '通化市': '125', '白山市': '126', '松原市': '127', '沈阳市': '107', '大连市': '108',
'鞍山市': '109', '抚顺市': '110', '丹东市': '112', '锦州市': '113', '营口市': '114', '阜新市': '115', '辽阳市': '116', '铁岭市': '118',
'朝阳市': '119', '葫芦岛市': '120', '呼和浩特市': '95', '包头市': '96', '赤峰市': '98', '通辽市': '99', '鄂尔多斯市': '100', '呼伦贝尔市': '101',
'巴彦淖尔市': '102', '银川市': '470', '西宁市': '462', '自贡市': '386', '绵阳市': '390', '德阳市': '389', '乐山市': '394', '南充市': '395',
'攀枝花市': '387', '泸州市': '388', '广元市': '391', '遂宁市': '392', '内江市': '393', '眉山市': '396', '宜宾市': '397', '广安市': '398',
'达州市': '399', '雅安市': '400', '巴中市': '401', '资阳市': '402', '济南市': '223', '青岛市': '224', '枣庄市': '226', '烟台市': '228',
'淄博市': '225', '临沂市': '235', '东营市': '227', '潍坊市': '229', '济宁市': '230', '泰安市': '231', '威海市': '232', '日照市': '233',
'德州市': '236', '聊城市': '237', '滨州市': '238', '菏泽市': '239', '宝鸡市': '440', '咸阳市': '441', '汉中市': '444', '渭南市': '442',
'延安市': '443', '安康市': '446', '太原市': '84', '晋中市': '90', '运城市': '91', '大同市': '85', '阳泉市': '86', '长治市': '87',
'晋城市': '88', '临汾市': '93', '乌鲁木齐市': '475', '石河子市': '489', '昆明市': '415', '曲靖市': '416', '玉溪市': '417', '保山市': '418',
'昭通市': '419', '丽江市': '420', '宁波市': '176', '温州市': '177', '嘉兴市': '178', '金华市': '183', '台州市': '184', '湖州市': '179',
'绍兴市': '180', '舟山市': '181', '衢州市': '182', '丽水市': '185', '台北市': '493', '台中市': '496', '台北县': '500'}
print(invert_dict(dict(cityId_mapping)))
import random
import time
import requests
import json
from bs4 import BeautifulSoup
import re
import csv
class SoYoung(object):
def __init__(self):
self.url = "https://api.soyoung.com/v8/superList/index"
self.headers = {"_time": "1629431742",
"ab_id": "F3243DBE867209BA715A755FADDD1DE",
"app_id": 42,
"channel": 1,
"cityId": 1,
"device_id": 196374256,
"device_model": "iPhone12,1",
"device_version": "13.6.1",
"idfa": "057F28DF-20B8-488F-A285-931367FCC110",
"is_tf": 0,
"list_name": "sy_app_superlist_homepage",
"lver": "8.30.1",
"page": 1,
"page_size": 20,
"push_app_id": 42,
"request_grey": 1,
"request_id": "6eb7d63dc28f7200ce9c303266496cd4",
"s_mei_device_id": "20200317131719d8bcbc37c54be511421dc3ebf7f1d0a801036b566bd47092",
"s_meng_device_id": "D2VCzq4o472Ur7QtdVY6RlcjO6h3455JlJ+OC39JcQC7sX6a",
"sys": 1,
"tab": "hospital",
"uid": "48804194",
"vistor_uid": "",
"xy_device_token": "93fa06111dea535c88cc07521f2e466c93",
"xy_sign": "xJRQVYzxt9eR2NYI1MBArQ%3D%3D",
"xy_token": "ad970db3d79f0833d1d25d3942068585"
}
self.info_url = "https://api.soyoung.com/v8/hospitals/homeV2"
self.cityIdMapping = {'1': '北京', '9': '上海', '2': '天津', '22': '重庆', '12': '安徽省', '186': '合肥市', '187': '芜湖市',
'188': '蚌埠市', '189': '淮南市', '190': '马鞍山市', '191': '淮北市', '192': '铜陵市', '193': '安庆市',
'195': '滁州市', '196': '阜阳市', '197': '宿州市', '199': '六安地区', '200': '亳州市', '201': '池州市',
'202': '宣城市', '13': '福建省', '203': '福州市', '204': '厦门市', '205': '莆田市', '206': '三明市',
'207': '泉州市', '208': '漳州市', '209': '南平市', '210': '龙岩市', '211': '宁德市', '19': '广东省',
'289': '广州市', '290': '韶关市', '291': '深圳市', '305': '东莞市', '306': '中山市', '292': '珠海市',
'294': '佛山市', '293': '汕头市', '295': '江门市', '296': '湛江市', '297': '茂名市', '298': '肇庆市',
'299': '惠州市', '300': '梅州市', '301': '汕尾市', '302': '河源市', '303': '阳江市', '304': '清远市',
'307': '潮州市', '308': '揭阳市', '309': '云浮市', '20': '广西壮族自治区', '310': '南宁市', '311': '柳州市',
'312': '桂林市', '313': '梧州市', '314': '北海市', '315': '防城港市', '316': '钦州市', '317': '贵港市',
'318': '玉林市', '319': '百色市', '320': '贺州市', '322': '来宾市', '24': '贵州省', '406': '贵阳市',
'408': '遵义市', '407': '六盘水市', '410': '铜仁地区', '411': '黔西南布依族苗族自治州', '412': '毕节地区',
'413': '黔东南苗族侗族自治州', '414': '黔南布依族苗族自治州', '28': '甘肃省', '448': '兰州市', '451': '白银市',
'454': '张掖市', '455': '平凉市', '456': '酒泉市', '457': '庆阳市', '459': '陇南市', '16': '河南省',
'240': '郑州市', '241': '开封市', '242': '洛阳市', '243': '平顶山市', '244': '安阳市', '245': '鹤壁市',
'246': '新乡市', '247': '焦作市', '248': '濮阳市', '249': '许昌市', '250': '漯河市', '251': '三门峡市',
'252': '南阳市', '253': '商丘市', '254': '信阳市', '255': '周口市', '256': '驻马店市', '257': '济源市',
'17': '湖北省', '258': '武汉市', '259': '黄石市', '266': '荆州市', '261': '宜昌市', '45068': '襄阳市',
'273': '天门市', '260': '十堰市', '262': '襄樊市', '263': '鄂州市', '264': '荆门市', '265': '孝感市',
'267': '黄冈市', '268': '咸宁市', '269': '随州市', '270': '恩施土家族苗族自治州', '271': '仙桃市', '272': '潜江市',
'18': '湖南省', '275': '长沙市', '276': '株洲市', '278': '衡阳市', '283': '益阳市', '279': '邵阳市',
'277': '湘潭市', '280': '岳阳市', '281': '常德市', '282': '张家界市', '284': '郴州市', '285': '永州市',
'286': '怀化市', '287': '娄底市', '288': '湘西土家族苗族自治州', '3': '河北省', '73': '石家庄市', '74': '唐山市',
'75': '秦皇岛市', '76': '邯郸市', '77': '邢台市', '78': '保定市', '79': '张家口市', '80': '承德市',
'81': '衡水市', '82': '廊坊市', '83': '沧州市', '8': '黑龙江省', '130': '哈尔滨市', '131': '齐齐哈尔市',
'132': '鸡西市', '133': '鹤岗市', '135': '大庆市', '137': '佳木斯市', '138': '七台河市', '139': '牡丹江市',
'141': '绥化市', '21': '海南省', '324': '海口市', '325': '三亚市', '327': '琼海市', '328': '儋州市',
'329': '文昌市', '330': '万宁市', '331': '东方市', '339': '陵水黎族自治县', '10': '江苏省', '162': '南京市',
'166': '苏州市', '163': '无锡市', '165': '常州市', '167': '南通市', '170': '盐城市', '164': '徐州市',
'168': '连云港市', '171': '扬州市', '169': '淮安市', '172': '镇江市', '173': '泰州市', '174': '宿迁市',
'14': '江西省', '212': '南昌市', '213': '景德镇市', '214': '萍乡市', '215': '九江市', '216': '新余市',
'217': '鹰潭市', '218': '赣州市', '219': '吉安市', '220': '宜春市', '221': '抚州市', '222': '上饶市',
'7': '吉林省', '121': '长春市', '122': '吉林市', '123': '四平市', '125': '通化市', '126': '白山市',
'127': '松原市', '129': '延边朝鲜族自治州', '6': '辽宁省', '107': '沈阳市', '108': '大连市', '109': '鞍山市',
'110': '抚顺市', '112': '丹东市', '113': '锦州市', '114': '营口市', '115': '阜新市', '116': '辽阳市',
'117': '盘锦市', '118': '铁岭市', '119': '朝阳市', '120': '葫芦岛市', '5': '内蒙古自治区', '95': '呼和浩特市',
'96': '包头市', '98': '赤峰市', '99': '通辽市', '100': '鄂尔多斯市', '101': '呼伦贝尔市', '102': '巴彦淖尔市',
'103': '乌兰察布市', '104': '兴安盟', '105': '锡林郭勒盟', '30': '宁夏回族自治区', '470': '银川市', '29': '青海省',
'462': '西宁市', '469': '海西蒙古族藏族自治州', '566': '其他', '23': '四川省', '385': '成都市', '386': '自贡市',
'390': '绵阳市', '389': '德阳市', '394': '乐山市', '395': '南充市', '387': '攀枝花市', '388': '泸州市',
'391': '广元市', '392': '遂宁市', '393': '内江市', '396': '眉山市', '397': '宜宾市', '398': '广安市',
'399': '达州市', '400': '雅安市', '401': '巴中市', '402': '资阳市', '403': '阿坝藏族羌族自治州',
'405': '凉山彝族自治州', '15': '山东省', '223': '济南市', '224': '青岛市', '226': '枣庄市', '228': '烟台市',
'225': '淄博市', '235': '临沂市', '227': '东营市', '229': '潍坊市', '230': '济宁市', '231': '泰安市',
'232': '威海市', '233': '日照市', '236': '德州市', '237': '聊城市', '238': '滨州市', '239': '菏泽市',
'27': '陕西省', '438': '西安市', '440': '宝鸡市', '441': '咸阳市', '444': '汉中市', '442': '渭南市',
'443': '延安市', '445': '榆林市', '446': '安康市', '4': '山西省', '84': '太原市', '90': '晋中市',
'91': '运城市', '85': '大同市', '86': '阳泉市', '87': '长治市', '88': '晋城市', '89': '朔州市', '93': '临汾市',
'26': '西藏自治区', '433': '山南市', '437': '林芝市', '31': '新疆维吾尔自治区', '475': '乌鲁木齐市',
'478': '哈密地区', '479': '昌吉回族自治州', '481': '巴音郭楞蒙古自治州', '486': '伊犁哈萨克自治州', '489': '石河子市',
'25': '云南省', '415': '昆明市', '416': '曲靖市', '417': '玉溪市', '418': '保山市', '419': '昭通市',
'420': '丽江市', '423': '楚雄彝族自治州', '424': '红河哈尼族彝族自治州', '425': '文山壮族苗族自治州',
'426': '西双版纳傣族自治州', '427': '大理白族自治州', '11': '浙江省', '175': '杭州市', '176': '宁波市',
'177': '温州市', '178': '嘉兴市', '183': '金华市', '184': '台州市', '179': '湖州市', '180': '绍兴市',
'181': '舟山市', '182': '衢州市', '185': '丽水市', '32': '台湾', '493': '台北市', '494': '高雄市',
'496': '台中市', '498': '新竹市', '500': '台北县', '502': '桃园县', '510': '台南县', '45070': '新北市',
'33': '香港特别行政区', '516': '中西区', '518': '九龙城区', '521': '深水埗区', '523': '湾仔区', '524': '油尖旺区',
'534': '澳门特别行政区'}
def get_gengmei_hospital_shangwu_info(self):
all_hospital_shangwu_info = []
f = open("更美机构商务信息.csv", "r", encoding="utf-8")
for item in f.readlines():
print(item.strip().split())
all_hospital_shangwu_info.append({item.strip().split()[0]: {
"大区": item.strip().split()[1] if len(item.strip().split()) >= 2 else "",
"商务": item.strip().split()[2] if len(item.strip().split()) == 3 else ""}})
return all_hospital_shangwu_info
def search(self, page=0):
try:
# 每次请求之前先暂停几秒 防止被ban
s = random.random()
time.sleep(s)
self.headers["page"] = page
response_res = requests.post(self.url, self.headers, verify=False)
if response_res.status_code == 200 and response_res.text:
response = json.loads(response_res.text)
return response
elif response_res.status_code == 403:
return self.search()
else:
print("列表请求发生错误,停止请求")
except:
return print("列表请求发生错误,停止请求")
def info_search(self, info_url):
info_headers = {
"cookie": "_ga=GA1.2.2084074278.1626341224; smidV2=20210715174222a8c0fc7fc96128d6b9c09abf5787b250008f7cb10a6f61380; PHPSESSID=ace2ec3e62b7d5a8f7021c3c85e0bb00; cityId=275; Hm_lvt_043bb3729b5d18b3373f4f287baed3ec=1628817456; __usersign__=1629085055462892805; Hm_lvt_b366fbb5465f5a86e1cc2871552e1fdb=1628817456,1628837727; _gid=GA1.2.2070957881.1629442869; Hm_lpvt_b366fbb5465f5a86e1cc2871552e1fdb=1629442968; _gat=1; Hm_lpvt_043bb3729b5d18b3373f4f287baed3ec=1629442968",
"accept": "application/json",
"accept-encoding": "gzip, deflate, br",
"accept-language": "zh-CN,zh;q=0.9",
"referer": "https://m.soyoung.com",
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36",
"x-requested-with": "XMLHttpRequest"
}
response_res = requests.post(info_url, info_headers, verify=False)
print("info_url:{},response_status:{}".format(info_url, response_res.status_code))
if response_res.status_code == 200 and response_res.text:
response = response_res.text
return response
elif response_res.status_code == 403:
print("机构详情发生错误,重新请求")
time.sleep(30)
return self.search()
else:
print("机构详情发生错误,重新请求")
def get_weikuan_info(self, hospital_id):
info_url = "https://m.soyoung.com/y/hospital/" + hospital_id + "/?sys=1"
response = self.info_search(info_url)
soup = BeautifulSoup(response, 'html.parser')
red_bag_info = []
for item in soup.find_all(name="li", attrs={"class": "new-red-bag"}):
type_name = re.compile('<span class="channel">(.*?)</span>').findall(str(item))
type_desc = re.compile('<div class="name">(.*?)</div>').findall(str(item))
time_end = re.compile('<div class="stop_date">(.*?)</div>').findall(str(item))
xz_price = re.compile('div class="code_notice">(.*?)</div>').findall(str(item))
jd_price = re.compile('<div class="price"><i>¥</i>(.*?)</div>').findall(str(item))
red_bag_info.append(
{"红包类型": type_name,
"红包名称": type_desc if type_desc else "",
"到期时间": time_end[0] if time_end else "",
"红包满减": xz_price[0].replace("满", "").replace("可用", "") if xz_price else "",
"红包金额": jd_price[0] if jd_price else ""})
return red_bag_info
def response_analysis(self, page, have_get_hospital_id=[]):
"""
解析获取到的数据
"""
try:
hospital_red_bag = {}
city_info = {}
response = self.search(page)
responseData = response.get("responseData", {}).get("data")
for res in responseData:
if res.get("type") == "feed_area":
if res.get("items", {}).get("feed_list", []):
for data in res.get("items", {}).get("feed_list", []):
for item in data.get("items"):
hospital_id = item.get("data", {}).get("hospital_id", "")
hospital_name = item.get("data", {}).get("name_cn", "")
city_id = item.get("data", {}).get("city_id", "")
city_info[hospital_name] = self.cityIdMapping[str(city_id)]
if hospital_name not in have_get_hospital_id:
weikuan_info = self.get_weikuan_info(hospital_id)
hospital_red_bag[hospital_name] = weikuan_info
have_get_hospital_id.append(hospital_name)
print("page:{},hospital_id:{}".format(i, hospital_name))
else:
print("end")
return hospital_red_bag, city_info
except:
return {}, {}
if __name__ == '__main__':
soyoung = SoYoung()
hospital_red_bag = {}
have_get_hospital_id = []
today = time.time()
# 获取更美的机构和商务信息
all_hospital_shangwu_info = soyoung.get_gengmei_hospital_shangwu_info()
print(all_hospital_shangwu_info)
# f = open("医院尾款劵数据.csv", "a+", encoding="utf-8")
# csv_writer = csv.writer(f)
# # csv_writer.writerow(
# # ["大区", "城市", "商务", "机构名称", "满", "减", "劣势/缺失", "到期时间", "查到劣势日期", "最晚补券时间"])
#
# for i in range(1, 2):
# hospital_red_bag, city_info = soyoung.response_analysis(page=i, have_get_hospital_id=have_get_hospital_id)
# for name, value in hospital_red_bag.items():
# for item in value:
# csv_writer.writerow(["", city_info[name], "", name, item.get("红包满减"), item.get("红包金额"), "",
# item.get("到期时间"),
# time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), ""])
# print("have_get_hospital_id:", have_get_hospital_id)
# print(hospital_red_bag)
This source diff could not be displayed because it is too large. You can view the blob instead.
"大区", "城市", "商务", "机构名称", "满", "减", "劣势/缺失", "到期时间", "查到劣势日期", "最晚补券时间"
,北京,,北京好年华眼整形中心,10000,1000,,2027-02-28 到期,2021-08-20 17:36:56,
,北京,,北京好年华眼整形中心,100,5,,2021-08-31 到期,2021-08-20 17:36:56,
,北京,,北京好年华眼整形中心,500,30,,2021-08-31 到期,2021-08-20 17:36:56,
,北京,,北京清木医疗美容诊所,2000,100,,2023-03-31 到期,2021-08-20 17:36:56,
,北京,,北京清木医疗美容诊所,10000,300,,2022-06-30 到期,2021-08-20 17:36:56,
,北京,,北京清木医疗美容诊所,999,50,,2023-04-30 到期,2021-08-20 17:36:56,
,北京,,北京清木医疗美容诊所,2000,100,,2024-04-30 到期,2021-08-20 17:36:56,
,北京,,北京清木医疗美容诊所,300,30,,2024-04-30 到期,2021-08-20 17:36:56,
,北京,,美诗沁世茂医疗美容,20,20,,2022-08-31 到期,2021-08-20 17:36:56,
,北京,,美诗沁世茂医疗美容,500,50,,2022-08-31 到期,2021-08-20 17:36:56,
,北京,,美诗沁世茂医疗美容,5000,600,,2022-08-31 到期,2021-08-20 17:36:56,
,北京,,北京新星靓医疗美容医院,3000,500,,2026-03-31 到期,2021-08-20 17:36:56,
,北京,,北京新星靓医疗美容医院,223,222,,2024-04-26 到期,2021-08-20 17:36:56,
,北京,,北京新星靓医疗美容医院,9999,2000,,2021-08-31 到期,2021-08-20 17:36:56,
,北京,,北京新星靓医疗美容医院,9999,2000,,2025-05-31 到期,2021-08-20 17:36:56,
,北京,,北京新星靓医疗美容医院,9999,2000,,2025-05-31 到期,2021-08-20 17:36:56,
,北京,,北京新星靓医疗美容医院,2000,1000,,2025-05-31 到期,2021-08-20 17:36:56,
,北京,,北京新星靓医疗美容医院,9999,2000,,2028-05-31 到期,2021-08-20 17:36:56,
,北京,,北京新星靓医疗美容医院,1000,100,,2028-06-30 到期,2021-08-20 17:36:56,
,北京,,北京新星靓医疗美容医院,2000,200,,2024-06-28 到期,2021-08-20 17:36:56,
,北京,,北京润美玉之光医疗美容,1000,50,,2021-12-31 到期,2021-08-20 17:36:56,
,北京,,北京润美玉之光医疗美容,3000,150,,2021-12-31 到期,2021-08-20 17:36:56,
,北京,,北京润美玉之光医疗美容,5000,300,,2021-12-31 到期,2021-08-20 17:36:56,
,北京,,北京润美玉之光医疗美容,10000,500,,2021-12-31 到期,2021-08-20 17:36:56,
,北京,,北京叶子整形美容医院,1000,100,,2021-09-26 到期,2021-08-20 17:36:56,
,北京,,北京亚楠容悦医疗美容,100,8,,2021-08-31 到期,2021-08-20 17:36:56,
,北京,,北京亚楠容悦医疗美容,3000,100,,2021-08-31 到期,2021-08-20 17:36:56,
,北京,,北京画美医疗美容医院,10000,1000,,2022-07-31 到期,2021-08-20 17:36:56,
,北京,,北京画美医疗美容医院,5000,500,,2022-07-31 到期,2021-08-20 17:36:56,
,北京,,北京画美医疗美容医院,1500,200,,2022-07-31 到期,2021-08-20 17:36:56,
,北京,,北京画美医疗美容医院,500,50,,2022-07-31 到期,2021-08-20 17:36:56,
,北京,,北京爱悦丽格医疗美容,500,50,,2021-08-31 到期,2021-08-20 17:36:56,
,北京,,北京爱悦丽格医疗美容,1000,100,,2021-08-31 到期,2021-08-20 17:36:56,
,北京,,上上相整形·脂肪骨性移植,15000,350,,2021-10-31 到期,2021-08-20 17:36:56,
,北京,,上上相整形·脂肪骨性移植,30000,700,,2021-10-31 到期,2021-08-20 17:36:56,
,北京,,北京嘉禾医疗美容,500,50,,2022-06-30 到期,2021-08-20 17:36:56,
,北京,,北京嘉禾医疗美容,999,100,,2022-06-30 到期,2021-08-20 17:36:56,
,北京,,北京嘉禾医疗美容,1999,200,,2022-08-31 到期,2021-08-20 17:36:56,
,北京,,北京嘉禾医疗美容,4999,500,,2022-08-31 到期,2021-08-20 17:36:56,
,北京,,北京嘉禾医疗美容,19999,2000,,2022-08-31 到期,2021-08-20 17:36:56,
,北京,,北京雅靓医疗美容,1000,100,,2021-12-31 到期,2021-08-20 17:36:56,
,北京,,北京俊泰美好医疗美容门诊部(鼻修复及不明注射物取出中心),800,100,,2021-08-31 到期,2021-08-20 17:36:56,
,北京,,北京俊泰美好医疗美容门诊部(鼻修复及不明注射物取出中心),500,50,,2021-08-31 到期,2021-08-20 17:36:56,
,北京,,北京华悦府医疗美容,999,50,,2021-08-31 到期,2021-08-20 17:36:56,
,北京,,北京华悦府医疗美容,3980,2000,,2021-10-31 到期,2021-08-20 17:36:56,
,北京,,北京艺星医疗美容医院,10999,500,,2021-08-31 到期,2021-08-20 17:36:56,
,北京,,北京艺星医疗美容医院,1999,200,,2021-10-31 到期,2021-08-20 17:36:56,
,北京,,北京艺星医疗美容医院,4000,300,,2021-10-31 到期,2021-08-20 17:36:56,
,北京,,北京艺星医疗美容医院,8000,800,,2021-10-31 到期,2021-08-20 17:36:56,
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment