Commit b21cc87a authored by 张彦钊's avatar 张彦钊

add

parent b24843b4
...@@ -30,27 +30,28 @@ def name_short(): ...@@ -30,27 +30,28 @@ def name_short():
name_list = pd.DataFrame(list(result))[0].values.tolist() name_list = pd.DataFrame(list(result))[0].values.tolist()
db.close() db.close()
print(name_list[:10]) print(name_list[:10])
names = ["门诊部","医疗", "门诊", "研究所", "有限", "公司", "医学", "诊所", "中心","美容","医院","整形","外科", names = ["美容院","门诊部","医疗", "门诊", "研究所", "有限", "公司", "医学", "诊所", "中心","美容","医院","整形","外科",
"医美", "集团", "卫生", "机构", "专业","皮肤","管理", "集团", "卫生", "机构", "专业","皮肤","管理",
"光学", "国际", "连锁", "综合", "专科","市","\*"] "光学", "国际", "连锁", "综合", "专科","市","\*","\•"]
location = ["街道", "社区",] location = ["街道", "社区",]
city_list = get_city_names() city_list = get_city_names()
city_list += ["香港","澳门","韩国"] city_list += ["香港","澳门","韩国"]
for word in ["市中心","丰台","科","东方"]: for word in ["市中心","丰台","科","东方","鞍山"]:
if word in city_list: if word in city_list:
city_list.remove(word) city_list.remove(word)
city_list.append("鞍山")
stop_words = names + location stop_words = names + location
new_names = [] second_names = []
third_names = [] third_names = []
names_new = [] first_names = []
for name in name_list: for name in name_list:
for test_word in ["测试","测试1","ceshi","ceshi1"]: for test_word in ["测试","测试1","ceshi","ceshi1"]:
if test_word in name: if test_word in name:
pass pass
else: else:
names_new.append(name) first_names.append(name)
city_tag = "flag" city_tag = "flag"
for city in city_list: for city in city_list:
if city in name: if city in name:
...@@ -66,19 +67,22 @@ def name_short(): ...@@ -66,19 +67,22 @@ def name_short():
name = re.sub(r'\(.*?\)', '', name) name = re.sub(r'\(.*?\)', '', name)
# 去除 左英文括号,右中文括号 # 去除 左英文括号,右中文括号
name = re.sub(r'\(.*?\)', '', name) name = re.sub(r'\(.*?\)', '', name)
if city_tag != "flag": if city_tag != "flag" and name != "":
third_names.append(city_tag+name) third_names.append(city_tag+name)
else: else:
third_names.append("无") third_names.append("无")
new_names.append(name) if name != "":
second_names.append(name)
else:
second_names.append(first_names[-1])
df = pd.DataFrame() df = pd.DataFrame()
df['old_name'] = names_new df['old_name'] = first_names
df['new_name'] = new_names df['new_name'] = second_names
df['core_name'] = third_names df['core_name'] = third_names
print(df.head(6)) print(df.head(6))
df.to_csv("/tmp/"+"name_29.csv",index=None,encoding="utf_8_sig") df.to_csv("/tmp/"+"name_66.csv",index=None,encoding="utf_8_sig")
df.to_csv("/tmp/" + "name_30.csv", index=None, encoding="utf_8")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment