Commit 2df0646d authored by 赵威's avatar 赵威

load model

parent 94aef5ab
......@@ -15,6 +15,10 @@ model_output_name = "w2v_model"
model_path = os.path.join(model_dir, model_output_name)
WORD2VEC_MODEL = word2vec.Word2Vec.load(model_path)
tracate_click_ids_model_name = "tractate_click_ids_item2vec_model"
tractate_click_ids_model_path = os.path.join(model_dir, tracate_click_ids_model_name)
TRACTATE_CLICK_IDS_MODEL = word2vec.Word2Vec.load(tractate_click_ids_model_path)
class W2vSentences:
def __init__(self, f_name):
......@@ -74,7 +78,7 @@ def projects_item2vec(score_limit=5):
return model
def clicked_tractate_ids_item2vec():
def save_clicked_tractate_ids_item2vec():
click_ids = []
with open(os.path.join(data_dir, "click_tractate_ids.csv"), "r") as f:
data = f.readlines()
......@@ -86,11 +90,16 @@ def clicked_tractate_ids_item2vec():
model = Word2Vec(click_ids, hs=0, min_count=3, workers=multiprocessing.cpu_count(), iter=10)
print(model)
print(len(click_ids))
for id in ["373744", "268517", "512"]:
print(model.wv.most_similar(id, topn=5))
model.save(tractate_click_ids_model_path)
return model
@bind("strategy_embedding/word_vector/tractate_item2vec")
def clicked_tractate_ids_item2vec_model(id, n=5):
return TRACTATE_CLICK_IDS_MODEL.wv.most_similar(id, topn=n)
if __name__ == "__main__":
begin_time = time.time()
......@@ -99,6 +108,9 @@ if __name__ == "__main__":
for i in ["双眼皮", "隆鼻"]:
print(word_similarity(i))
clicked_tractate_ids_item2vec()
# save_clicked_tractate_ids_item2vec()
for id in ["84375", "148764", "368399"]:
print(clicked_tractate_ids_item2vec_model(id, n=5))
print("total cost: {:.2f}mins".format((time.time() - begin_time) / 60))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment