Commit 00ed6533 authored by 张彦钊's avatar 张彦钊

Merge branch 'zhao22' into 'master'

add

See merge request !93
parents 9fcd4762 d57b404d
...@@ -30,7 +30,7 @@ def name_short(): ...@@ -30,7 +30,7 @@ def name_short():
name_list = pd.DataFrame(list(result))[0].values.tolist() name_list = pd.DataFrame(list(result))[0].values.tolist()
db.close() db.close()
print(name_list[:10]) print(name_list[:10])
names = ["美容院","门诊部","医疗", "门诊", "研究所", "有限", "公司", "医学", "诊所", "中心","美容","医院","整形","外科", names = ["中医","美容院","门诊部","医疗", "门诊", "研究所", "有限", "公司", "医学", "诊所", "中心","美容","医院","整形","外科",
"集团", "卫生", "机构", "专业","皮肤","管理", "集团", "卫生", "机构", "专业","皮肤","管理",
"光学", "国际", "连锁", "综合", "专科","市","\*","\•"] "光学", "国际", "连锁", "综合", "专科","市","\*","\•"]
location = ["街道", "社区",] location = ["街道", "社区",]
...@@ -52,29 +52,33 @@ def name_short(): ...@@ -52,29 +52,33 @@ def name_short():
pass pass
else: else:
first_names.append(name) first_names.append(name)
city_tag = "flag" if "第" in name:
for city in city_list:
if city in name:
city_tag = city
name = re.sub(city, '', name)
for word in stop_words:
name = re.sub(word, '', name)
# 去除\t
name = re.sub(r'\t', "", name)
# 去除 中文括号( )
name = re.sub(r'\(.*?\)', '', name)
# 去除 英文括号( )
name = re.sub(r'\(.*?\)', '', name)
# 去除 左英文括号,右中文括号
name = re.sub(r'\(.*?\)', '', name)
if city_tag != "flag" and name != "":
third_names.append(city_tag+name)
else:
third_names.append("无")
if name != "":
second_names.append(name) second_names.append(name)
third_names.append(name)
else: else:
second_names.append(first_names[-1]) city_tag = "flag"
for city in city_list:
if city in name:
city_tag = city
name = re.sub(city, '', name)
for word in stop_words:
name = re.sub(word, '', name)
# 去除\t
name = re.sub(r'\t', "", name)
# 去除 中文括号( )
name = re.sub(r'\(.*?\)', '', name)
# 去除 英文括号( )
name = re.sub(r'\(.*?\)', '', name)
# 去除 左英文括号,右中文括号
name = re.sub(r'\(.*?\)', '', name)
if city_tag != "flag" and name != "":
third_names.append(city_tag+name)
else:
third_names.append("无")
if name != "":
second_names.append(name)
else:
second_names.append(first_names[-1])
df = pd.DataFrame() df = pd.DataFrame()
df['old_name'] = first_names df['old_name'] = first_names
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment