Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
534799ff
Commit
534799ff
authored
Feb 07, 2021
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add
parent
0522e469
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
31 additions
and
22 deletions
+31
-22
hospital_name_short.py
local/hospital_name_short.py
+31
-22
No files found.
local/hospital_name_short.py
View file @
534799ff
...
...
@@ -32,41 +32,50 @@ def name_short():
print
(
name_list
[:
10
])
names
=
[
"门诊部"
,
"医疗"
,
"门诊"
,
"研究所"
,
"有限"
,
"公司"
,
"医学"
,
"诊所"
,
"中心"
,
"美容"
,
"医院"
,
"整形"
,
"外科"
,
"医美"
,
"集团"
,
"卫生"
,
"机构"
,
"专业"
,
"皮肤"
,
"管理"
,
"光学"
,
"国际"
,
"连锁"
,
"综合"
,
"专科"
,
"市"
,
"
测试"
,
"测试1"
,
"ceshi"
,
"ceshi1"
,
"
*"
]
"光学"
,
"国际"
,
"连锁"
,
"综合"
,
"专科"
,
"市"
,
"*"
]
location
=
[
"街道"
,
"社区"
,]
city_list
=
get_city_names
()
city_list
+=
[
"香港"
,
"澳门"
,
"韩国"
]
for
word
in
[
"市中心"
,
"丰台"
,
"科"
,
"东方"
]:
if
word
in
city_list
:
city_list
.
remove
(
word
)
stop_words
=
names
+
location
new_names
=
[]
third_names
=
[]
for
name
in
name_list
:
city_tag
=
"flag"
for
city
in
city_list
:
if
city
in
name
:
city_tag
=
city
name
=
re
.
sub
(
city
,
''
,
name
)
for
word
in
stop_words
:
name
=
re
.
sub
(
word
,
''
,
name
)
# 去除\t
name
=
re
.
sub
(
r'\t'
,
""
,
name
)
# 去除 中文括号( )
name
=
re
.
sub
(
r'\(.*?\)'
,
''
,
name
)
# 去除 英文括号( )
name
=
re
.
sub
(
r'\(.*?\)'
,
''
,
name
)
# 去除 左英文括号,右中文括号
name
=
re
.
sub
(
r'\(.*?\)'
,
''
,
name
)
if
city_tag
!=
"flag"
:
third_names
.
append
(
city_tag
+
name
)
else
:
third_names
.
append
(
"无"
)
new_names
.
append
(
name
)
for
test_word
in
[
"测试"
,
"测试1"
,
"ceshi"
,
"ceshi1"
]:
if
test_word
in
name
:
pass
else
:
city_tag
=
"flag"
for
city
in
city_list
:
if
city
in
name
:
city_tag
=
city
name
=
re
.
sub
(
city
,
''
,
name
)
for
word
in
stop_words
:
name
=
re
.
sub
(
word
,
''
,
name
)
# 去除\t
name
=
re
.
sub
(
r'\t'
,
""
,
name
)
# 去除 中文括号( )
name
=
re
.
sub
(
r'\(.*?\)'
,
''
,
name
)
# 去除 英文括号( )
name
=
re
.
sub
(
r'\(.*?\)'
,
''
,
name
)
# 去除 左英文括号,右中文括号
name
=
re
.
sub
(
r'\(.*?\)'
,
''
,
name
)
if
city_tag
!=
"flag"
:
third_names
.
append
(
city_tag
+
name
)
else
:
third_names
.
append
(
"无"
)
new_names
.
append
(
name
)
df
=
pd
.
DataFrame
()
df
[
'old_name'
]
=
name_list
df
[
'new_name'
]
=
new_names
df
[
'core_name'
]
=
third_names
print
(
df
.
head
(
6
))
df
.
to_csv
(
"/tmp/"
+
"name_2
2
.csv"
,
index
=
None
)
df
.
to_csv
(
"/tmp/"
+
"name_2
8
.csv"
,
index
=
None
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment