Commit 63093189 authored by 赵威's avatar 赵威

change type to redis

parent 6d492c13
...@@ -53,12 +53,12 @@ if __name__ == "__main__": ...@@ -53,12 +53,12 @@ if __name__ == "__main__":
tags_vector_dict = {} tags_vector_dict = {}
for i in tags_set: for i in tags_set:
try: try:
# vec = json.dumps(model.wv.get_vector(i).tolist()) vec = json.dumps(model.wv.get_vector(i).tolist())
vec = model.wv.get_vector(i) # vec = model.wv.get_vector(i)
tags_vector_dict[i] = vec tags_vector_dict[i] = vec
except Exception as e: except Exception as e:
pass pass
redis_client_db.hmset("personas_tags_embedding", tags_vector_dict) # redis_client_db.hmset("personas_tags_embedding", tags_vector_dict)
print(len(tags_vector_dict.items())) print(len(tags_vector_dict.items()))
# print(random.choice(list(tags_vector_dict.items()))) # print(random.choice(list(tags_vector_dict.items())))
...@@ -68,9 +68,9 @@ if __name__ == "__main__": ...@@ -68,9 +68,9 @@ if __name__ == "__main__":
for _, row in tractate_tags_df.iterrows(): for _, row in tractate_tags_df.iterrows():
vecs = [] vecs = []
for i in row["business_tags"]: for i in row["business_tags"]:
vec = tags_vector_dict.get(i, np.array([])) vec = tags_vector_dict.get(i)
if vec.any(): if vec:
vecs.append(vec) vecs.append(np.array(json.loads(vec)).astype("float32"))
if vecs: if vecs:
tractate_vector_dict[row["tractate_id"]] = np.average(vecs, axis=0) tractate_vector_dict[row["tractate_id"]] = np.average(vecs, axis=0)
print(len(tractate_vector_dict.items())) print(len(tractate_vector_dict.items()))
...@@ -91,20 +91,21 @@ if __name__ == "__main__": ...@@ -91,20 +91,21 @@ if __name__ == "__main__":
base_dir = os.getcwd() base_dir = os.getcwd()
model_dir = os.path.join(base_dir, "_models") model_dir = os.path.join(base_dir, "_models")
index_path = os.path.join(model_dir, "faiss_personas_vector.index") index_path = os.path.join(model_dir, "faiss_personas_vector.index")
faiss.write_index(index2, index_path) # faiss.write_index(index2, index_path)
print(index_path) print(index_path)
# device vector device vector
# for _, row in device_tags_df.iterrows(): for _, row in device_tags_df.iterrows():
# vecs = [] vecs = []
# for i in row["business_tags"]: for i in row["business_tags"]:
# vec = tags_vector_dict.get(i, np.array([])) # vec = tags_vector_dict.get(i, np.array([]))
# if vec.any(): vec = tags_vector_dict.get(i)
# vecs.append(vec) if vec:
# if vecs: vecs.append(np.array(json.loads(vec)).astype("float32"))
# t = np.array([np.average(vecs, axis=0)]).astype("float32") if vecs:
# D, I = index2.search(t, 10) t = np.array([np.average(vecs, axis=0)]).astype("float32")
# print(row["cl_id"], row["business_tags"]) D, I = index2.search(t, 10)
# print(I) print(row["cl_id"], row["business_tags"])
print(I)
# curl "http://172.16.31.17:9000/gm-dbmw-tractate-read/_search?pretty" -d '{"query": {"term": {"id": "10269"}}, "_source": {"include": ["content", "portrait_tag_name"]}}' # curl "http://172.16.31.17:9000/gm-dbmw-tractate-read/_search?pretty" -d '{"query": {"term": {"id": "10269"}}, "_source": {"include": ["content", "portrait_tag_name"]}}'
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment