From a1908523d047ae48b5b9c4124231bc2b77bc3025 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=BD=A6=E9=92=8A?= <zhangyanzhao@igengmei.com>
Date: Wed, 13 Jan 2021 16:26:55 +0800
Subject: [PATCH] add

---
 zyz/hosipatl_name.py | 32 +++++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/zyz/hosipatl_name.py b/zyz/hosipatl_name.py
index d915de5a..92178895 100644
--- a/zyz/hosipatl_name.py
+++ b/zyz/hosipatl_name.py
@@ -9,14 +9,44 @@ import pandas as pd
 # 抽取医院名字简称
 
 
+def get_city_names():
+    db = pymysql.connect(host='172.16.30.143', port=3306, user='work', passwd='BJQaT9VzDcuPBqkd', db='zhengxing')
+    cursor = db.cursor()
+    sql_active = "select name from api_city;"
+    cursor.execute(sql_active)
+    result = cursor.fetchall()
+    name_list = pd.DataFrame(list(result))[0].values.tolist()
+    print(name_list[:10])
+    db.close()
+    return name_list
+
+
 def name_short():
     db = pymysql.connect(host='172.16.30.143', port=3306, user='work', passwd='BJQaT9VzDcuPBqkd', db='zhengxing')
     cursor = db.cursor()
     sql_active = "select name from api_hospital;"
     cursor.execute(sql_active)
     result = cursor.fetchall()
-    name_list = pd.DataFrame(list(result))[0].values.tolist()
+    name_list = pd.DataFrame(list(result))[0].values.tolist()[:20]
+    db.close()
     print(name_list[:10])
+    names = ["医疗", "门诊", "研究所", "有限", "公司", "医学", "诊所", "中心", "医美", "集团", "卫生", "机构", "专业",
+             "光学", "国际", "连锁", "综合", "专科", ""]
+    location = ["街道", "社区",]
+    city_list = get_city_names()
+    stop_words = names + location + city_list
+    new_names = []
+    for name in name_list:
+        for word in stop_words:
+            name = re.sub(word, '', name)
+        new_names.append(name)
+    df = pd.DataFrame()
+    df['old_name'] = name_list
+    df['new_name'] = new_names
+    print(df.head(6))
+    df.to_csv("/home/gmuser/"+"12.csv",index=None)
+
+
 
     # number = list()
     # for i in cover:
-- 
2.18.0