Commit 086d0f85 authored by 赵威's avatar 赵威

get vector dict

parent 3376c972
import json
import os
import random
import sys
sys.path.append(os.path.realpath("."))
......@@ -26,8 +28,7 @@ def device_tractate_fe():
return device_tags_df, tractate_tags_df
def tractate_business_tags_word2vec(tractate_df):
data = tractate_tags_df["business_tags"].to_list()
def tractate_business_tags_word2vec(data):
print(len(data))
model = Word2Vec(data, hs=0, min_count=3, workers=multiprocessing.cpu_count(), iter=10)
print(model)
......@@ -36,8 +37,20 @@ def tractate_business_tags_word2vec(tractate_df):
if __name__ == "__main__":
device_tags_df, tractate_tags_df = device_tractate_fe()
model = tractate_business_tags_word2vec(tractate_tags_df)
for i in ["自体脂肪面部年轻化", "自体脂肪填充面部", "自体脂肪全面部填充", "自体脂肪面部填充", "鼻综合", "鼻部综合"]:
print(model.wv.most_similar(i))
print(model.wv.get_vector(i))
# tractate_tags_df["business_tags"].to_list()
tags_data = tractate_tags_df["business_tags"].to_list()
model = tractate_business_tags_word2vec(tags_data)
tags_set = set()
for i in tags_data:
for j in i:
tags_set.add(j)
tags_vector_dict = {}
for i in tags_set:
tags_vector_dict[i] = json.dumps(model.wv.get_vector(i))
print(random.choice(tags_vector_dict.items()))
# for i in ["自体脂肪面部年轻化", "自体脂肪填充面部", "自体脂肪全面部填充", "自体脂肪面部填充", "鼻综合", "鼻部综合"]:
# print(model.wv.most_similar(i))
# print(model.wv.get_vector(i))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment