Commit 06526cd7 authored by 赵威's avatar 赵威

write data

parent 63093189
...@@ -58,7 +58,7 @@ if __name__ == "__main__": ...@@ -58,7 +58,7 @@ if __name__ == "__main__":
tags_vector_dict[i] = vec tags_vector_dict[i] = vec
except Exception as e: except Exception as e:
pass pass
# redis_client_db.hmset("personas_tags_embedding", tags_vector_dict) redis_client_db.hmset("personas_tags_embedding", tags_vector_dict)
print(len(tags_vector_dict.items())) print(len(tags_vector_dict.items()))
# print(random.choice(list(tags_vector_dict.items()))) # print(random.choice(list(tags_vector_dict.items())))
...@@ -91,21 +91,21 @@ if __name__ == "__main__": ...@@ -91,21 +91,21 @@ if __name__ == "__main__":
base_dir = os.getcwd() base_dir = os.getcwd()
model_dir = os.path.join(base_dir, "_models") model_dir = os.path.join(base_dir, "_models")
index_path = os.path.join(model_dir, "faiss_personas_vector.index") index_path = os.path.join(model_dir, "faiss_personas_vector.index")
# faiss.write_index(index2, index_path) faiss.write_index(index2, index_path)
print(index_path) print(index_path)
device vector # device vector
for _, row in device_tags_df.iterrows(): # for _, row in device_tags_df.iterrows():
vecs = [] # vecs = []
for i in row["business_tags"]: # for i in row["business_tags"]:
# vec = tags_vector_dict.get(i, np.array([])) # # vec = tags_vector_dict.get(i, np.array([]))
vec = tags_vector_dict.get(i) # vec = tags_vector_dict.get(i)
if vec: # if vec:
vecs.append(np.array(json.loads(vec)).astype("float32")) # vecs.append(np.array(json.loads(vec)).astype("float32"))
if vecs: # if vecs:
t = np.array([np.average(vecs, axis=0)]).astype("float32") # t = np.array([np.average(vecs, axis=0)]).astype("float32")
D, I = index2.search(t, 10) # D, I = index2.search(t, 10)
print(row["cl_id"], row["business_tags"]) # print(row["cl_id"], row["business_tags"])
print(I) # print(I)
# curl "http://172.16.31.17:9000/gm-dbmw-tractate-read/_search?pretty" -d '{"query": {"term": {"id": "10269"}}, "_source": {"include": ["content", "portrait_tag_name"]}}' # curl "http://172.16.31.17:9000/gm-dbmw-tractate-read/_search?pretty" -d '{"query": {"term": {"id": "10269"}}, "_source": {"include": ["content", "portrait_tag_name"]}}'
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment