From a1908523d047ae48b5b9c4124231bc2b77bc3025 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=BD=A6=E9=92=8A?= <zhangyanzhao@igengmei.com> Date: Wed, 13 Jan 2021 16:26:55 +0800 Subject: [PATCH] add --- zyz/hosipatl_name.py | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/zyz/hosipatl_name.py b/zyz/hosipatl_name.py index d915de5a..92178895 100644 --- a/zyz/hosipatl_name.py +++ b/zyz/hosipatl_name.py @@ -9,14 +9,44 @@ import pandas as pd # 抽å–医院åå—简称 +def get_city_names(): + db = pymysql.connect(host='172.16.30.143', port=3306, user='work', passwd='BJQaT9VzDcuPBqkd', db='zhengxing') + cursor = db.cursor() + sql_active = "select name from api_city;" + cursor.execute(sql_active) + result = cursor.fetchall() + name_list = pd.DataFrame(list(result))[0].values.tolist() + print(name_list[:10]) + db.close() + return name_list + + def name_short(): db = pymysql.connect(host='172.16.30.143', port=3306, user='work', passwd='BJQaT9VzDcuPBqkd', db='zhengxing') cursor = db.cursor() sql_active = "select name from api_hospital;" cursor.execute(sql_active) result = cursor.fetchall() - name_list = pd.DataFrame(list(result))[0].values.tolist() + name_list = pd.DataFrame(list(result))[0].values.tolist()[:20] + db.close() print(name_list[:10]) + names = ["医疗", "门诊", "ç ”ç©¶æ‰€", "有é™", "å…¬å¸", "医å¦", "诊所", "ä¸å¿ƒ", "医美", "集团", "å«ç”Ÿ", "机构", "专业", + "å…‰å¦", "国际", "è¿žé”", "综åˆ", "专科", ""] + location = ["è¡—é“", "社区",] + city_list = get_city_names() + stop_words = names + location + city_list + new_names = [] + for name in name_list: + for word in stop_words: + name = re.sub(word, '', name) + new_names.append(name) + df = pd.DataFrame() + df['old_name'] = name_list + df['new_name'] = new_names + print(df.head(6)) + df.to_csv("/home/gmuser/"+"12.csv",index=None) + + # number = list() # for i in cover: -- 2.18.0