Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
b21cc87a
Commit
b21cc87a
authored
Feb 07, 2021
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add
parent
b24843b4
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
17 additions
and
13 deletions
+17
-13
hospital_name_short.py
local/hospital_name_short.py
+17
-13
No files found.
local/hospital_name_short.py
View file @
b21cc87a
...
@@ -30,27 +30,28 @@ def name_short():
...
@@ -30,27 +30,28 @@ def name_short():
name_list
=
pd
.
DataFrame
(
list
(
result
))[
0
]
.
values
.
tolist
()
name_list
=
pd
.
DataFrame
(
list
(
result
))[
0
]
.
values
.
tolist
()
db
.
close
()
db
.
close
()
print
(
name_list
[:
10
])
print
(
name_list
[:
10
])
names
=
[
"门诊部"
,
"医疗"
,
"门诊"
,
"研究所"
,
"有限"
,
"公司"
,
"医学"
,
"诊所"
,
"中心"
,
"美容"
,
"医院"
,
"整形"
,
"外科"
,
names
=
[
"
美容院"
,
"
门诊部"
,
"医疗"
,
"门诊"
,
"研究所"
,
"有限"
,
"公司"
,
"医学"
,
"诊所"
,
"中心"
,
"美容"
,
"医院"
,
"整形"
,
"外科"
,
"
医美"
,
"
集团"
,
"卫生"
,
"机构"
,
"专业"
,
"皮肤"
,
"管理"
,
"集团"
,
"卫生"
,
"机构"
,
"专业"
,
"皮肤"
,
"管理"
,
"光学"
,
"国际"
,
"连锁"
,
"综合"
,
"专科"
,
"市"
,
"
\
*"
]
"光学"
,
"国际"
,
"连锁"
,
"综合"
,
"专科"
,
"市"
,
"
\
*"
,
"
\
•"
]
location
=
[
"街道"
,
"社区"
,]
location
=
[
"街道"
,
"社区"
,]
city_list
=
get_city_names
()
city_list
=
get_city_names
()
city_list
+=
[
"香港"
,
"澳门"
,
"韩国"
]
city_list
+=
[
"香港"
,
"澳门"
,
"韩国"
]
for
word
in
[
"市中心"
,
"丰台"
,
"科"
,
"东方"
]:
for
word
in
[
"市中心"
,
"丰台"
,
"科"
,
"东方"
,
"鞍山"
]:
if
word
in
city_list
:
if
word
in
city_list
:
city_list
.
remove
(
word
)
city_list
.
remove
(
word
)
city_list
.
append
(
"鞍山"
)
stop_words
=
names
+
location
stop_words
=
names
+
location
new
_names
=
[]
second
_names
=
[]
third_names
=
[]
third_names
=
[]
names_new
=
[]
first_names
=
[]
for
name
in
name_list
:
for
name
in
name_list
:
for
test_word
in
[
"测试"
,
"测试1"
,
"ceshi"
,
"ceshi1"
]:
for
test_word
in
[
"测试"
,
"测试1"
,
"ceshi"
,
"ceshi1"
]:
if
test_word
in
name
:
if
test_word
in
name
:
pass
pass
else
:
else
:
names_new
.
append
(
name
)
first_names
.
append
(
name
)
city_tag
=
"flag"
city_tag
=
"flag"
for
city
in
city_list
:
for
city
in
city_list
:
if
city
in
name
:
if
city
in
name
:
...
@@ -66,19 +67,22 @@ def name_short():
...
@@ -66,19 +67,22 @@ def name_short():
name
=
re
.
sub
(
r'\(.*?\)'
,
''
,
name
)
name
=
re
.
sub
(
r'\(.*?\)'
,
''
,
name
)
# 去除 左英文括号,右中文括号
# 去除 左英文括号,右中文括号
name
=
re
.
sub
(
r'\(.*?\)'
,
''
,
name
)
name
=
re
.
sub
(
r'\(.*?\)'
,
''
,
name
)
if
city_tag
!=
"flag"
:
if
city_tag
!=
"flag"
and
name
!=
""
:
third_names
.
append
(
city_tag
+
name
)
third_names
.
append
(
city_tag
+
name
)
else
:
else
:
third_names
.
append
(
"无"
)
third_names
.
append
(
"无"
)
new_names
.
append
(
name
)
if
name
!=
""
:
second_names
.
append
(
name
)
else
:
second_names
.
append
(
first_names
[
-
1
])
df
=
pd
.
DataFrame
()
df
=
pd
.
DataFrame
()
df
[
'old_name'
]
=
names_new
df
[
'old_name'
]
=
first_names
df
[
'new_name'
]
=
new
_names
df
[
'new_name'
]
=
second
_names
df
[
'core_name'
]
=
third_names
df
[
'core_name'
]
=
third_names
print
(
df
.
head
(
6
))
print
(
df
.
head
(
6
))
df
.
to_csv
(
"/tmp/"
+
"name_29.csv"
,
index
=
None
,
encoding
=
"utf_8_sig"
)
df
.
to_csv
(
"/tmp/"
+
"name_66.csv"
,
index
=
None
,
encoding
=
"utf_8_sig"
)
df
.
to_csv
(
"/tmp/"
+
"name_30.csv"
,
index
=
None
,
encoding
=
"utf_8"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment