Commit d57b404d authored by 张彦钊's avatar 张彦钊

add

parent b21cc87a
......@@ -30,7 +30,7 @@ def name_short():
name_list = pd.DataFrame(list(result))[0].values.tolist()
db.close()
print(name_list[:10])
names = ["美容院","门诊部","医疗", "门诊", "研究所", "有限", "公司", "医学", "诊所", "中心","美容","医院","整形","外科",
names = ["中医","美容院","门诊部","医疗", "门诊", "研究所", "有限", "公司", "医学", "诊所", "中心","美容","医院","整形","外科",
"集团", "卫生", "机构", "专业","皮肤","管理",
"光学", "国际", "连锁", "综合", "专科","市","\*","\•"]
location = ["街道", "社区",]
......@@ -52,29 +52,33 @@ def name_short():
pass
else:
first_names.append(name)
city_tag = "flag"
for city in city_list:
if city in name:
city_tag = city
name = re.sub(city, '', name)
for word in stop_words:
name = re.sub(word, '', name)
# 去除\t
name = re.sub(r'\t', "", name)
# 去除 中文括号( )
name = re.sub(r'\(.*?\)', '', name)
# 去除 英文括号( )
name = re.sub(r'\(.*?\)', '', name)
# 去除 左英文括号,右中文括号
name = re.sub(r'\(.*?\)', '', name)
if city_tag != "flag" and name != "":
third_names.append(city_tag+name)
else:
third_names.append("无")
if name != "":
if "第" in name:
second_names.append(name)
third_names.append(name)
else:
second_names.append(first_names[-1])
city_tag = "flag"
for city in city_list:
if city in name:
city_tag = city
name = re.sub(city, '', name)
for word in stop_words:
name = re.sub(word, '', name)
# 去除\t
name = re.sub(r'\t', "", name)
# 去除 中文括号( )
name = re.sub(r'\(.*?\)', '', name)
# 去除 英文括号( )
name = re.sub(r'\(.*?\)', '', name)
# 去除 左英文括号,右中文括号
name = re.sub(r'\(.*?\)', '', name)
if city_tag != "flag" and name != "":
third_names.append(city_tag+name)
else:
third_names.append("无")
if name != "":
second_names.append(name)
else:
second_names.append(first_names[-1])
df = pd.DataFrame()
df['old_name'] = first_names
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment