Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
S
strategy_embedding
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
rank
strategy_embedding
Commits
3376c972
Commit
3376c972
authored
Nov 16, 2020
by
赵威
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
get vector
parent
16ec9503
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
32 additions
and
25 deletions
+32
-25
to_vector.py
personas_vector/to_vector.py
+27
-24
files.py
utils/files.py
+5
-1
No files found.
personas_vector/to_vector.py
View file @
3376c972
...
...
@@ -3,38 +3,41 @@ import sys
sys
.
path
.
append
(
os
.
path
.
realpath
(
"."
))
import
multiprocessing
import
pandas
as
pd
from
gensim.models
import
Word2Vec
,
word2vec
from
utils.defs
import
nth_element
from
utils.files
import
get_df
DEVICE_COLUMNS
=
[
"device_id"
,
]
TRACTATE_COLUMNS
=
[
"card_id"
,
]
def
device_tractate_fe
():
device_tags_df
=
get_df
(
"personas_device_feature.csv"
)
device_tags_df
=
device_tags_df
[[
"cl_id"
,
"business_tags"
]]
device_tags_df
[
"business_tags"
]
=
device_tags_df
[
"business_tags"
]
.
str
.
split
(
","
)
.
\
apply
(
lambda
d
:
d
if
isinstance
(
d
,
list
)
else
[])
print
(
device_tags_df
.
head
(
3
))
tractate_tags_df
=
get_df
(
"personas_tractate_tags.csv"
,
columns
=
[
"tractate_id"
,
"business_tags"
])
tractate_tags_df
[
"business_tags"
]
=
tractate_tags_df
[
"business_tags"
]
.
str
.
split
(
","
)
.
\
apply
(
lambda
d
:
d
if
isinstance
(
d
,
list
)
else
[])
print
(
tractate_tags_df
.
head
(
3
))
def
device_tractae_fe
():
pass
return
device_tags_df
,
tractate_tags_df
if
__name__
==
"__main__"
:
device_fe_df
=
get_df
(
"personas_device_feature.csv"
,
columns
=
[
"cl_id"
,
"first_demands"
,
"first_solutions"
,
"first_positions"
,
"second_demands"
,
"second_solutions"
,
"second_positions"
,
"projects"
,
"business_tags"
,
])
print
(
device_fe_df
.
head
(
3
))
def
tractate_business_tags_word2vec
(
tractate_df
):
data
=
tractate_tags_df
[
"business_tags"
]
.
to_list
()
print
(
len
(
data
))
model
=
Word2Vec
(
data
,
hs
=
0
,
min_count
=
3
,
workers
=
multiprocessing
.
cpu_count
(),
iter
=
10
)
print
(
model
)
return
model
tractate_tags_df
=
get_df
(
"personas_tractate_tags.csv"
,
columns
=
[
"tractate_id"
,
"business_tags"
])
print
(
tractate_tags_df
.
head
(
3
))
if
__name__
==
"__main__"
:
device_tags_df
,
tractate_tags_df
=
device_tractate_fe
()
model
=
tractate_business_tags_word2vec
(
tractate_tags_df
)
for
i
in
[
"自体脂肪面部年轻化"
,
"自体脂肪填充面部"
,
"自体脂肪全面部填充"
,
"自体脂肪面部填充"
,
"鼻综合"
,
"鼻部综合"
]:
print
(
model
.
wv
.
most_similar
(
i
))
print
(
model
.
wv
.
get_vector
(
i
))
# tractate_tags_df["business_tags"].to_list()
utils/files.py
View file @
3376c972
...
...
@@ -34,7 +34,11 @@ def save_dict_to_csv(d, file):
def
get_df
(
file
,
sep
=
"|"
,
columns
=
[]):
full_path
=
os
.
path
.
join
(
DATA_PATH
,
file
)
# full_path = os.path.join("/Users/offic/work/GM/strategy_embedding/_data", file) # TODO
print
(
full_path
)
df
=
pd
.
read_csv
(
full_path
,
sep
=
"|"
,
names
=
columns
)
if
columns
:
df
=
pd
.
read_csv
(
full_path
,
sep
=
sep
,
names
=
columns
)
else
:
df
=
pd
.
read_csv
(
full_path
,
sep
=
sep
)
print
(
df
.
shape
)
return
df
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment