Commit 671df37a authored by 赵威's avatar 赵威

update path

parent bcc01f2c
...@@ -12,7 +12,7 @@ from utils.es import es_scan ...@@ -12,7 +12,7 @@ from utils.es import es_scan
from utils.files import DATA_PATH, MODEL_PATH from utils.files import DATA_PATH, MODEL_PATH
from utils.message import send_msg_to_dingtalk from utils.message import send_msg_to_dingtalk
from word_vector.tractate import tractate_click_ids_model_path from word_vector.tractate import TRACTATE_CLICK_IDS_MODEL
model_output_name = "w2v_model" model_output_name = "w2v_model"
model_path = os.path.join(MODEL_PATH, model_output_name) model_path = os.path.join(MODEL_PATH, model_output_name)
...@@ -21,11 +21,6 @@ try: ...@@ -21,11 +21,6 @@ try:
except Exception as e: except Exception as e:
print(e) print(e)
try:
TRACTATE_CLICK_IDS_MODEL = word2vec.Word2Vec.load(tractate_click_ids_model_path)
except Exception as e:
print(e)
class W2vSentences: class W2vSentences:
def __init__(self, f_name): def __init__(self, f_name):
......
import multiprocessing import multiprocessing
import os import os
import sys import sys
import time
from collections import defaultdict from collections import defaultdict
sys.path.append(os.path.realpath(".")) sys.path.append(os.path.realpath("."))
import time
from gensim.models import Word2Vec, word2vec from gensim.models import Word2Vec, word2vec
from utils.date import get_ndays_before_no_minus, get_ndays_before_with_format from utils.date import get_ndays_before_no_minus, get_ndays_before_with_format
from utils.files import DATA_PATH, MODEL_PATH from utils.files import DATA_PATH, MODEL_PATH
from utils.spark import get_spark from utils.spark import get_spark
from word_vector.api import clicked_tractate_ids_item2vec_model
tractate_click_ids_model_path = os.path.join(MODEL_PATH, "tractate_click_ids_item2vec_model") tractate_click_ids_model_path = os.path.join(MODEL_PATH, "tractate_click_ids_item2vec_model")
try:
TRACTATE_CLICK_IDS_MODEL = word2vec.Word2Vec.load(tractate_click_ids_model_path)
except Exception as e:
print(e)
def get_tracate_click_data(spark, start, end): def get_tracate_click_data(spark, start, end):
reg = r"""^\\d+$""" reg = r"""^\\d+$"""
...@@ -165,7 +167,7 @@ if __name__ == "__main__": ...@@ -165,7 +167,7 @@ if __name__ == "__main__":
save_clicked_tractate_ids_item2vec() save_clicked_tractate_ids_item2vec()
for id in ["84375", "148764", "368399"]: for id in ["84375", "148764", "368399"]:
print(clicked_tractate_ids_item2vec_model(id, n=5)) print(TRACTATE_CLICK_IDS_MODEL.wv.most_similar(id, topn=5))
print("total cost: {:.2f}mins".format((time.time() - begin_time) / 60)) print("total cost: {:.2f}mins".format((time.time() - begin_time) / 60))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment