Commit 7280af56 authored by 赵威's avatar 赵威

get item2vec

parent 43f30aa7
......@@ -38,6 +38,7 @@ INSTALLED_APPS = [
"django_extensions",
"face_similarity",
"word_vector",
"utils",
]
MIDDLEWARE = [
......
......@@ -4,7 +4,6 @@ import time
from gensim.models import Word2Vec, word2vec
from gm_rpcd.all import bind
from utils.db import get_device_click_tractate_ids
from utils.es import es_scan
base_dir = os.getcwd()
......@@ -76,8 +75,15 @@ def projects_item2vec(score_limit=5):
def clicked_tractate_ids_item2vec():
user_dict = get_device_click_tractate_ids()
click_ids = list(user_dict.values())
click_ids = []
with open(os.path.join(data_dir, "click_tractate_ids.csv"), "r") as f:
data = f.readlines()
data = data[:100]
for i in data:
tmp = i.split("|")
# device_id = tmp[0]
ids = tmp[1].rstrip("\n").split(",")
click_ids.append(ids)
model = Word2Vec(click_ids, hs=0, min_count=3, workers=multiprocessing.cpu_count(), iter=10)
print(model)
print(len(click_ids))
......@@ -94,4 +100,6 @@ if __name__ == "__main__":
for i in ["双眼皮", "隆鼻"]:
print(word_similarity(i))
clicked_tractate_ids_item2vec()
print("total cost: {:.2f}mins".format((time.time() - begin_time) / 60))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment