Commit 89d9b293 authored by 赵威's avatar 赵威

save model

parent 22c130e7
import os import os
from gensim.models import Word2Vec from gensim.models import word2vec
base_dir = os.getcwd() base_dir = os.getcwd()
print("base_dir: " + base_dir) print("base_dir: " + base_dir)
...@@ -8,5 +8,24 @@ model_dir = os.path.join(base_dir, "_models") ...@@ -8,5 +8,24 @@ model_dir = os.path.join(base_dir, "_models")
data_dir = os.path.join(base_dir, "_data") data_dir = os.path.join(base_dir, "_data")
def w2v_train(): class W2vSentences:
pass def __init__(self, f_name):
self.f_name = f_name
def __iter__(self):
for line in open(self.f_name, "r"):
yield line.split()
def w2v_train(f_name, model_output_name):
input_file = os.path.join(data_dir, f_name)
print("input: " + input_file)
sentences = W2vSentences(input_file)
w2v_model = word2vec.Word2Vec(sentences, min_count=2, workers=2, size=100, window=10)
model_path = os.path.join(model_dir, model_output_name)
print("output: " + model_path)
w2v_model.save(model_path)
if __name__ == "__main__":
w2v_train("dispose_problem.txt", "w2v_model")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment