Commit b5fc8879 authored by 张彦钊's avatar 张彦钊

add

parent a1908523
...@@ -31,7 +31,7 @@ def name_short(): ...@@ -31,7 +31,7 @@ def name_short():
db.close() db.close()
print(name_list[:10]) print(name_list[:10])
names = ["医疗", "门诊", "研究所", "有限", "公司", "医学", "诊所", "中心", "医美", "集团", "卫生", "机构", "专业", names = ["医疗", "门诊", "研究所", "有限", "公司", "医学", "诊所", "中心", "医美", "集团", "卫生", "机构", "专业",
"光学", "国际", "连锁", "综合", "专科", ""] "光学", "国际", "连锁", "综合", "专科", "门诊部"]
location = ["街道", "社区",] location = ["街道", "社区",]
city_list = get_city_names() city_list = get_city_names()
stop_words = names + location + city_list stop_words = names + location + city_list
...@@ -39,6 +39,8 @@ def name_short(): ...@@ -39,6 +39,8 @@ def name_short():
for name in name_list: for name in name_list:
for word in stop_words: for word in stop_words:
name = re.sub(word, '', name) name = re.sub(word, '', name)
# 去除\t
name = re.sub(r'\t', "", name)
new_names.append(name) new_names.append(name)
df = pd.DataFrame() df = pd.DataFrame()
df['old_name'] = name_list df['old_name'] = name_list
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment