Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
22e7eace
Commit
22e7eace
authored
Nov 07, 2019
by
高雅喆
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add gm_tag_cf
parent
84e6abb5
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
15 additions
and
5 deletions
+15
-5
gm_tag_cf.py
eda/smart_rank/gm_tag_cf.py
+15
-5
No files found.
eda/smart_rank/gm_tag_cf.py
View file @
22e7eace
import
redis
import
json
import
numpy
as
np
import
pandas
as
pd
from
sklearn.metrics
import
pairwise_distances
...
...
@@ -49,20 +51,28 @@ if __name__ == '__main__':
# 所有标签的协同标签
all_tags_cf_tags
=
dict
()
for
tag
in
all_tags_name
:
if
tag
in
cf_tag
s
:
if
tag
in
sorted_tag_id
s
:
tag_cf_tags
=
get_similary_tags
(
tag
,
cf_tags
,
ratings_matrix
)
else
:
tag_cf_tags
=
[]
continue
# 过滤父级标签
parent_tags
=
all_tag_parent_tags
[
tag
]
parent_tags
=
all_tag_parent_tags
.
get
(
tag
,
[])
# 1级标签没有父级标签
for
parent_tag
in
parent_tags
:
if
parent_tag
in
tag_cf_tags
:
tag_cf_tags
.
remove
(
parent_tag
)
# 过滤标签的同义词
tag_name
=
all_tags_name
[
tag
]
tag_cf_tags_names
=
[
all_tags_name
[
i
]
for
i
in
tag_cf_tags
]
tag_synonym_names
=
all_word_synonym_words
[
tag_name
]
tag_synonym_names
=
all_word_synonym_words
.
get
(
tag_name
,
[])
# 部分标签名称没有同义词
for
tag_synonym_name
in
tag_synonym_names
:
if
tag_synonym_name
in
tag_cf_tags_names
:
tag_cf_tags_names
.
remove
(
tag_synonym_name
)
all_tags_cf_tags
[
tag_name
]
=
tag_cf_tags_names
all_tags_cf_tags
[
tag_name
]
=
json
.
dumps
(
tag_cf_tags_names
[:
10
])
redis_client
=
redis
.
StrictRedis
.
from_url
(
'redis://:ReDis!GmTx*0aN9@172.16.40.173:6379'
)
gm_tag_cf_tags_key
=
"gm:tag:cf:tags"
redis_client
.
hmset
(
gm_tag_cf_tags_key
,
all_tags_cf_tags
)
# doris上执行
# data = redis_client.hgetall(gm_tag_cf_tags_key)
# data_dict = {str(i, 'utf-8'): json.loads(data[i]) for i in data}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment