Commit 086d0f85 authored by 赵威's avatar 赵威

get vector dict

parent 3376c972
import json
import os import os
import random
import sys import sys
sys.path.append(os.path.realpath(".")) sys.path.append(os.path.realpath("."))
...@@ -26,8 +28,7 @@ def device_tractate_fe(): ...@@ -26,8 +28,7 @@ def device_tractate_fe():
return device_tags_df, tractate_tags_df return device_tags_df, tractate_tags_df
def tractate_business_tags_word2vec(tractate_df): def tractate_business_tags_word2vec(data):
data = tractate_tags_df["business_tags"].to_list()
print(len(data)) print(len(data))
model = Word2Vec(data, hs=0, min_count=3, workers=multiprocessing.cpu_count(), iter=10) model = Word2Vec(data, hs=0, min_count=3, workers=multiprocessing.cpu_count(), iter=10)
print(model) print(model)
...@@ -36,8 +37,20 @@ def tractate_business_tags_word2vec(tractate_df): ...@@ -36,8 +37,20 @@ def tractate_business_tags_word2vec(tractate_df):
if __name__ == "__main__": if __name__ == "__main__":
device_tags_df, tractate_tags_df = device_tractate_fe() device_tags_df, tractate_tags_df = device_tractate_fe()
model = tractate_business_tags_word2vec(tractate_tags_df) tags_data = tractate_tags_df["business_tags"].to_list()
for i in ["自体脂肪面部年轻化", "自体脂肪填充面部", "自体脂肪全面部填充", "自体脂肪面部填充", "鼻综合", "鼻部综合"]: model = tractate_business_tags_word2vec(tags_data)
print(model.wv.most_similar(i))
print(model.wv.get_vector(i)) tags_set = set()
# tractate_tags_df["business_tags"].to_list() for i in tags_data:
for j in i:
tags_set.add(j)
tags_vector_dict = {}
for i in tags_set:
tags_vector_dict[i] = json.dumps(model.wv.get_vector(i))
print(random.choice(tags_vector_dict.items()))
# for i in ["自体脂肪面部年轻化", "自体脂肪填充面部", "自体脂肪全面部填充", "自体脂肪面部填充", "鼻综合", "鼻部综合"]:
# print(model.wv.most_similar(i))
# print(model.wv.get_vector(i))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment