Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
6e61079b
Commit
6e61079b
authored
Jan 14, 2021
by
张彦钊
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'zhao21' into 'master'
增加医院名字简化 See merge request
!83
parents
8b0c960b
07b29270
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
114 additions
and
0 deletions
+114
-0
hospital_name_short.py
local/hospital_name_short.py
+114
-0
No files found.
local/hospital_name_short.py
0 → 100644
View file @
6e61079b
# -*- coding: utf-8 -*-
import
redis
import
re
import
json
import
pymysql
import
pandas
as
pd
# 抽取医院名字简称
def
get_city_names
():
db
=
pymysql
.
connect
(
host
=
'172.16.30.143'
,
port
=
3306
,
user
=
'work'
,
passwd
=
'BJQaT9VzDcuPBqkd'
,
db
=
'zhengxing'
)
cursor
=
db
.
cursor
()
sql_active
=
"select name from api_city;"
cursor
.
execute
(
sql_active
)
result
=
cursor
.
fetchall
()
name_list
=
pd
.
DataFrame
(
list
(
result
))[
0
]
.
values
.
tolist
()
print
(
name_list
[:
10
])
db
.
close
()
return
name_list
def
name_short
():
db
=
pymysql
.
connect
(
host
=
'172.16.30.143'
,
port
=
3306
,
user
=
'work'
,
passwd
=
'BJQaT9VzDcuPBqkd'
,
db
=
'zhengxing'
)
cursor
=
db
.
cursor
()
sql_active
=
"select name from api_hospital where is_online = 1;"
cursor
.
execute
(
sql_active
)
result
=
cursor
.
fetchall
()
name_list
=
pd
.
DataFrame
(
list
(
result
))[
0
]
.
values
.
tolist
()
db
.
close
()
print
(
name_list
[:
10
])
names
=
[
"门诊部"
,
"医疗"
,
"门诊"
,
"研究所"
,
"有限"
,
"公司"
,
"医学"
,
"诊所"
,
"中心"
,
"美容"
,
"医院"
,
"整形"
,
"外科"
,
"医美"
,
"集团"
,
"卫生"
,
"机构"
,
"专业"
,
"皮肤"
,
"管理"
,
"光学"
,
"国际"
,
"连锁"
,
"综合"
,
"专科"
,]
location
=
[
"街道"
,
"社区"
,]
city_list
=
get_city_names
()
stop_words
=
names
+
location
new_names
=
[]
third_names
=
[]
for
name
in
name_list
:
city_tag
=
"flag"
for
city
in
city_list
:
if
city
in
name
:
city_tag
=
city
name
=
re
.
sub
(
city
,
''
,
name
)
for
word
in
stop_words
:
name
=
re
.
sub
(
word
,
''
,
name
)
# 去除\t
name
=
re
.
sub
(
r'\t'
,
""
,
name
)
# 去除 中文括号( )
name
=
re
.
sub
(
r'\(.*?\)'
,
''
,
name
)
# 去除 英文括号( )
name
=
re
.
sub
(
r'\(.*?\)'
,
''
,
name
)
# 去除 左英文括号,右中文括号
name
=
re
.
sub
(
r'\(.*?\)'
,
''
,
name
)
if
city_tag
!=
"flag"
:
third_names
.
append
(
city_tag
+
name
)
else
:
third_names
.
append
(
"无"
)
new_names
.
append
(
name
)
df
=
pd
.
DataFrame
()
df
[
'old_name'
]
=
name_list
df
[
'new_name'
]
=
new_names
df
[
'core_name'
]
=
third_names
print
(
df
.
head
(
6
))
df
.
to_csv
(
"/home/gmuser/"
+
"hospital_name_21.csv"
,
index
=
None
)
# number = list()
# for i in cover:
# sql = "select count(cid) from data_feed_exposure " \
# "where cid_type = 'diary'" \
# "and stat_date = '{}' and device_id = '{}';".format(date,i)
# cursor.execute(sql)
# result = cursor.fetchone()[0]
# cid.append(i)
# number.append(result)
# df = pd.DataFrame()
# df['id'] = cid
# df['number'] = number
# df = df.sort_values(by = "number",ascending=False)
# df.to_csv(DIRECTORY_PATH+"exp.csv",index=None)
def
name_process
(
name
):
project_tags
=
[
"口腔"
,
"植发"
,
"牙"
,
"皮肤"
,
"眼"
,
"外科"
,
"美容"
,
"整形"
,
"烧伤"
,
"胸"
,
"丰胸"
,
"美胸"
,
"祛痘"
,
"祛斑"
,
"脱毛"
,
"创伤"
,
"除疤"
,
"半永久"
,
"纹绣"
,
"纹眉"
]
names
=
[
"医疗"
,
"门诊"
,
"研究所"
,
"有限"
,
"公司"
,
"医学"
,
"诊所"
,
"中心"
,
"医美"
,
"集团"
,
"卫生"
,
"机构"
,
"专业"
,
"光学"
,
"国际"
,
"连锁"
,
"综合"
,
"专科"
,
""
]
location
=
[
"街道"
,
"社区"
,]
stop_words
=
project_tags
+
names
+
location
for
word
in
stop_words
:
name
=
re
.
sub
(
word
,
''
,
name
)
# 去除 中文括号( )
name
=
re
.
sub
(
r'\(.*?\)'
,
''
,
name
)
# 去除 英文括号( )
name
=
re
.
sub
(
r'\(.*?\)'
,
''
,
name
)
# 去除 左英文括号,右中文括号
name
=
re
.
sub
(
r'\(.*?\)'
,
''
,
name
)
# 去除数字
name
=
re
.
sub
(
r'\d'
,
''
,
name
)
# 去除\t
name
=
re
.
sub
(
r'\t'
,
""
,
name
)
return
name
if
__name__
==
'__main__'
:
name_short
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment