Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
S
strategy_embedding
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
rank
strategy_embedding
Commits
7280af56
Commit
7280af56
authored
Oct 28, 2020
by
赵威
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
get item2vec
parent
43f30aa7
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
12 additions
and
3 deletions
+12
-3
settings.py
strategy_embedding/settings.py
+1
-0
word_to_vec.py
word_vector/word_to_vec.py
+11
-3
No files found.
strategy_embedding/settings.py
View file @
7280af56
...
...
@@ -38,6 +38,7 @@ INSTALLED_APPS = [
"django_extensions"
,
"face_similarity"
,
"word_vector"
,
"utils"
,
]
MIDDLEWARE
=
[
...
...
word_vector/word_to_vec.py
View file @
7280af56
...
...
@@ -4,7 +4,6 @@ import time
from
gensim.models
import
Word2Vec
,
word2vec
from
gm_rpcd.all
import
bind
from
utils.db
import
get_device_click_tractate_ids
from
utils.es
import
es_scan
base_dir
=
os
.
getcwd
()
...
...
@@ -76,8 +75,15 @@ def projects_item2vec(score_limit=5):
def
clicked_tractate_ids_item2vec
():
user_dict
=
get_device_click_tractate_ids
()
click_ids
=
list
(
user_dict
.
values
())
click_ids
=
[]
with
open
(
os
.
path
.
join
(
data_dir
,
"click_tractate_ids.csv"
),
"r"
)
as
f
:
data
=
f
.
readlines
()
data
=
data
[:
100
]
for
i
in
data
:
tmp
=
i
.
split
(
"|"
)
# device_id = tmp[0]
ids
=
tmp
[
1
]
.
rstrip
(
"
\n
"
)
.
split
(
","
)
click_ids
.
append
(
ids
)
model
=
Word2Vec
(
click_ids
,
hs
=
0
,
min_count
=
3
,
workers
=
multiprocessing
.
cpu_count
(),
iter
=
10
)
print
(
model
)
print
(
len
(
click_ids
))
...
...
@@ -94,4 +100,6 @@ if __name__ == "__main__":
for
i
in
[
"双眼皮"
,
"隆鼻"
]:
print
(
word_similarity
(
i
))
clicked_tractate_ids_item2vec
()
print
(
"total cost: {:.2f}mins"
.
format
((
time
.
time
()
-
begin_time
)
/
60
))
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment