Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
07b29270
Commit
07b29270
authored
Jan 14, 2021
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add
parent
8b0c960b
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
114 additions
and
0 deletions
+114
-0
hospital_name_short.py
local/hospital_name_short.py
+114
-0
No files found.
local/hospital_name_short.py
0 → 100644
View file @
07b29270
# -*- coding: utf-8 -*-
import
redis
import
re
import
json
import
pymysql
import
pandas
as
pd
# 抽取医院名字简称
def
get_city_names
():
db
=
pymysql
.
connect
(
host
=
'172.16.30.143'
,
port
=
3306
,
user
=
'work'
,
passwd
=
'BJQaT9VzDcuPBqkd'
,
db
=
'zhengxing'
)
cursor
=
db
.
cursor
()
sql_active
=
"select name from api_city;"
cursor
.
execute
(
sql_active
)
result
=
cursor
.
fetchall
()
name_list
=
pd
.
DataFrame
(
list
(
result
))[
0
]
.
values
.
tolist
()
print
(
name_list
[:
10
])
db
.
close
()
return
name_list
def
name_short
():
db
=
pymysql
.
connect
(
host
=
'172.16.30.143'
,
port
=
3306
,
user
=
'work'
,
passwd
=
'BJQaT9VzDcuPBqkd'
,
db
=
'zhengxing'
)
cursor
=
db
.
cursor
()
sql_active
=
"select name from api_hospital where is_online = 1;"
cursor
.
execute
(
sql_active
)
result
=
cursor
.
fetchall
()
name_list
=
pd
.
DataFrame
(
list
(
result
))[
0
]
.
values
.
tolist
()
db
.
close
()
print
(
name_list
[:
10
])
names
=
[
"门诊部"
,
"医疗"
,
"门诊"
,
"研究所"
,
"有限"
,
"公司"
,
"医学"
,
"诊所"
,
"中心"
,
"美容"
,
"医院"
,
"整形"
,
"外科"
,
"医美"
,
"集团"
,
"卫生"
,
"机构"
,
"专业"
,
"皮肤"
,
"管理"
,
"光学"
,
"国际"
,
"连锁"
,
"综合"
,
"专科"
,]
location
=
[
"街道"
,
"社区"
,]
city_list
=
get_city_names
()
stop_words
=
names
+
location
new_names
=
[]
third_names
=
[]
for
name
in
name_list
:
city_tag
=
"flag"
for
city
in
city_list
:
if
city
in
name
:
city_tag
=
city
name
=
re
.
sub
(
city
,
''
,
name
)
for
word
in
stop_words
:
name
=
re
.
sub
(
word
,
''
,
name
)
# 去除\t
name
=
re
.
sub
(
r'\t'
,
""
,
name
)
# 去除 中文括号( )
name
=
re
.
sub
(
r'\(.*?\)'
,
''
,
name
)
# 去除 英文括号( )
name
=
re
.
sub
(
r'\(.*?\)'
,
''
,
name
)
# 去除 左英文括号,右中文括号
name
=
re
.
sub
(
r'\(.*?\)'
,
''
,
name
)
if
city_tag
!=
"flag"
:
third_names
.
append
(
city_tag
+
name
)
else
:
third_names
.
append
(
"无"
)
new_names
.
append
(
name
)
df
=
pd
.
DataFrame
()
df
[
'old_name'
]
=
name_list
df
[
'new_name'
]
=
new_names
df
[
'core_name'
]
=
third_names
print
(
df
.
head
(
6
))
df
.
to_csv
(
"/home/gmuser/"
+
"hospital_name_21.csv"
,
index
=
None
)
# number = list()
# for i in cover:
# sql = "select count(cid) from data_feed_exposure " \
# "where cid_type = 'diary'" \
# "and stat_date = '{}' and device_id = '{}';".format(date,i)
# cursor.execute(sql)
# result = cursor.fetchone()[0]
# cid.append(i)
# number.append(result)
# df = pd.DataFrame()
# df['id'] = cid
# df['number'] = number
# df = df.sort_values(by = "number",ascending=False)
# df.to_csv(DIRECTORY_PATH+"exp.csv",index=None)
def
name_process
(
name
):
project_tags
=
[
"口腔"
,
"植发"
,
"牙"
,
"皮肤"
,
"眼"
,
"外科"
,
"美容"
,
"整形"
,
"烧伤"
,
"胸"
,
"丰胸"
,
"美胸"
,
"祛痘"
,
"祛斑"
,
"脱毛"
,
"创伤"
,
"除疤"
,
"半永久"
,
"纹绣"
,
"纹眉"
]
names
=
[
"医疗"
,
"门诊"
,
"研究所"
,
"有限"
,
"公司"
,
"医学"
,
"诊所"
,
"中心"
,
"医美"
,
"集团"
,
"卫生"
,
"机构"
,
"专业"
,
"光学"
,
"国际"
,
"连锁"
,
"综合"
,
"专科"
,
""
]
location
=
[
"街道"
,
"社区"
,]
stop_words
=
project_tags
+
names
+
location
for
word
in
stop_words
:
name
=
re
.
sub
(
word
,
''
,
name
)
# 去除 中文括号( )
name
=
re
.
sub
(
r'\(.*?\)'
,
''
,
name
)
# 去除 英文括号( )
name
=
re
.
sub
(
r'\(.*?\)'
,
''
,
name
)
# 去除 左英文括号,右中文括号
name
=
re
.
sub
(
r'\(.*?\)'
,
''
,
name
)
# 去除数字
name
=
re
.
sub
(
r'\d'
,
''
,
name
)
# 去除\t
name
=
re
.
sub
(
r'\t'
,
""
,
name
)
return
name
if
__name__
==
'__main__'
:
name_short
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment