Commit 92e50bf6 authored by 张彦钊's avatar 张彦钊

Merge branch 'zhao22' into 'master'

修改医院名称脚本

See merge request !88
parents 9b1d1c97 534799ff
......@@ -32,40 +32,50 @@ def name_short():
print(name_list[:10])
names = ["门诊部","医疗", "门诊", "研究所", "有限", "公司", "医学", "诊所", "中心","美容","医院","整形","外科",
"医美", "集团", "卫生", "机构", "专业","皮肤","管理",
"光学", "国际", "连锁", "综合", "专科",]
"光学", "国际", "连锁", "综合", "专科","市","*"]
location = ["街道", "社区",]
city_list = get_city_names()
city_list += ["香港","澳门","韩国"]
for word in ["市中心","丰台","科","东方"]:
if word in city_list:
city_list.remove(word)
stop_words = names + location
new_names = []
third_names = []
for name in name_list:
city_tag = "flag"
for city in city_list:
if city in name:
city_tag = city
name = re.sub(city, '', name)
for word in stop_words:
name = re.sub(word, '', name)
# 去除\t
name = re.sub(r'\t', "", name)
# 去除 中文括号( )
name = re.sub(r'\(.*?\)', '', name)
# 去除 英文括号( )
name = re.sub(r'\(.*?\)', '', name)
# 去除 左英文括号,右中文括号
name = re.sub(r'\(.*?\)', '', name)
if city_tag != "flag":
third_names.append(city_tag+name)
else:
third_names.append("无")
new_names.append(name)
for test_word in ["测试","测试1","ceshi","ceshi1"]:
if test_word in name:
pass
else:
city_tag = "flag"
for city in city_list:
if city in name:
city_tag = city
name = re.sub(city, '', name)
for word in stop_words:
name = re.sub(word, '', name)
# 去除\t
name = re.sub(r'\t', "", name)
# 去除 中文括号( )
name = re.sub(r'\(.*?\)', '', name)
# 去除 英文括号( )
name = re.sub(r'\(.*?\)', '', name)
# 去除 左英文括号,右中文括号
name = re.sub(r'\(.*?\)', '', name)
if city_tag != "flag":
third_names.append(city_tag+name)
else:
third_names.append("无")
new_names.append(name)
df = pd.DataFrame()
df['old_name'] = name_list
df['new_name'] = new_names
df['core_name'] = third_names
print(df.head(6))
df.to_csv("/tmp/"+"name_22.csv",index=None)
df.to_csv("/tmp/"+"name_28.csv",index=None)
......@@ -89,7 +99,7 @@ def name_process(name):
project_tags = ["口腔","植发","牙","皮肤","眼","外科","美容","整形","烧伤","胸","丰胸","美胸","祛痘","祛斑","脱毛",
"创伤","除疤","半永久","纹绣","纹眉"]
names = ["医疗","门诊","研究所","有限","公司","医学","诊所","中心","医美","集团","卫生","机构","专业",
"光学","国际","连锁","综合","专科",""]
"光学","国际","连锁","综合","专科",""]
location = ["街道","社区",]
stop_words = project_tags + names + location
for word in stop_words:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment