Commit 5ab7d3ea authored by 赵威's avatar 赵威

get vector info

parent 3be11306
......@@ -9,6 +9,7 @@ import jieba
import numpy as np
from gensim import models as word2vec
from utils.cache import redis_client5
from utils.es import es_scan
reload(sys)
......@@ -41,6 +42,31 @@ def get_new_user_tractate_info():
return es_scan("tractate", q)
def get_tractate_vector(sentence_lst, all_keywords_set, model):
word_list = []
for s in sentence_lst:
tmp_lst = []
for i in jieba.lcut(s):
s = i.encode("utf-8")
if s in all_keywords_set:
tmp_lst.append(s)
word_list.append(tmp_lst)
count = 0
res = {}
for lst in word_list:
vecs = []
for name in lst:
try:
vecs.append(model.wv.get_vector(name))
except Exception as e:
pass
if vecs:
n = np.average(vecs, axis=0)
res[count] = n
count += 1
return res
if __name__ == "__main__":
model_path = "/data/log/word2vec/app/ipynb_garbage_files/test_w2v_model_4"
keyword_txt = "/data/log/word2vec/all_key_word.txt"
......@@ -58,13 +84,16 @@ if __name__ == "__main__":
for word in all_keywords_set:
jieba.add_word(word, freq=1000, tag="user_defined")
tractate_vector_dict = {}
es_result = get_new_user_tractate_info()
count = 0
# count = 0
for i in es_result:
count += 1
# print(count)
# count += 1
source = i["_source"]
print(count, source["id"])
sentences = source["keynote_sentence"]
id = source["id"]
res = get_tractate_vector(sentences, all_keywords_set, model)
print(res)
# sentences = [
# "是不是肋软骨钙化就只能做异体骨修复鼻子?",
......
......@@ -4,5 +4,6 @@ redis_client = redis.StrictRedis.from_url("redis://:ReDis!GmTx*0aN6@172.16.40.13
redis_client2 = redis.StrictRedis.from_url("redis://:ReDis!GmTx*0aN9@172.16.40.173:6379")
redis_client3 = redis.StrictRedis.from_url("redis://:ReDis!GmTx*0aN12@172.16.40.164:6379")
redis_client4 = redis.StrictRedis.from_url("redis://:XfkMCCdWDIU%ls$h@172.16.50.145:6379")
redis_client5 = redis.StrictRedis.from_url("redis://:XfkMCCdWDIU%ls$h3@172.16.50.159:6379")
redis_client_db = redis.StrictRedis.from_url("redis://:ReDis!GmTx*0aN14@172.16.40.146:6379")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment