Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
e4407c5f
Commit
e4407c5f
authored
Nov 07, 2019
by
高雅喆
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
read tidb
parent
dec6e3c3
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
13 additions
and
3 deletions
+13
-3
gm_tag_cf.py
eda/smart_rank/gm_tag_cf.py
+13
-3
No files found.
eda/smart_rank/gm_tag_cf.py
View file @
e4407c5f
...
...
@@ -4,7 +4,7 @@ import numpy as np
import
pandas
as
pd
from
sklearn.metrics
import
pairwise_distances
from
scipy.spatial.distance
import
cosine
,
correlation
from
tool
import
get_all_tag_parent_tag
,
get_all_tags_name
,
get_all_word_synonym_words
from
tool
import
get_all_tag_parent_tag
,
get_all_tags_name
,
get_all_word_synonym_words
,
get_data_by_mysql
# Recommender Engine
...
...
@@ -14,12 +14,22 @@ def get_similary_tags(tag_id, tags, ratings_matrix):
tags
[
'similarity'
]
=
ratings_matrix
.
iloc
[
tag_id_index
]
return
tags
.
sort_values
([
"similarity"
],
ascending
=
False
)[:
-
1
]
.
tag_id
.
tolist
()
# rm self
def
get_user_log
():
sql
=
"select userId, tagId, rating from item_cf_log"
mysql_results
=
get_data_by_mysql
(
'172.16.40.158'
,
4000
,
'root'
,
'3SYz54LS9#^9sBvC'
,
'jerry_test'
,
sql
)
df
=
pd
.
DataFrame
(
mysql_results
)
return
df
if
__name__
==
'__main__'
:
# User Log
# index: sample id
# columns: userId,tagId,rating
ratings
=
pd
.
read_csv
(
"/home/gmuser/gyz/log/cf/user_item_log.csv"
,
sep
=
"
\t
"
)
ratings
.
drop
(
"timestamp"
,
inplace
=
True
,
axis
=
1
)
# ratings = pd.read_csv("/home/gmuser/gyz/log/cf/user_item_log.csv", sep="\t")
# ratings.drop("timestamp", inplace=True, axis=1)
# sorted_tag_ids = sorted(ratings.tagId.unique())
# cf_tags = pd.DataFrame({'tag_id': sorted_tag_ids})
ratings
=
get_user_log
()
sorted_tag_ids
=
sorted
(
ratings
.
tagId
.
unique
())
cf_tags
=
pd
.
DataFrame
({
'tag_id'
:
sorted_tag_ids
})
# ratings.head()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment