Commit 7b7ac522 authored by 赵威's avatar 赵威

get answer id from index

parent 00e153af
......@@ -4,8 +4,8 @@ import sys
sys.path.append(os.path.realpath("."))
import numpy as np
from bert_serving.client import BertClient
from utils.es import es_scan, get_answer_info_from_es
import faiss
def cos_sim(vector_a, vector_b):
......@@ -38,9 +38,28 @@ if __name__ == "__main__":
# print(cos_sim(sen1_em, sen2_em))
for item in get_answer_info_from_es(["id", "answer", "content_level", "desc"]):
id = item["_id"]
level_dict = {"6": [], "5": [], "4": [], "3.5": [], "3": []}
embedding_dict = {}
for item in get_answer_info_from_es(["id", "answer", "content_level"]):
id = int(item["_id"])
content = item["_source"]["answer"]
content_level = item["_source"]["content_level"]
desc = item["_source"]["desc"]
print(id, content_level, content, desc)
content_level = str(item["_source"]["content_level"])
# print(id, content_level, content)
level_dict[content_level].append(id)
embedding_dict[id] = bc.encode([content])
answer_ids = np.array(list(embedding_dict.keys())).astype("int")
answer_embeddings = np.array(list(embedding_dict.values())).astype("float32")
index = faiss.IndexFlatL2(answer_embeddings.shape[1])
print("trained: " + str(index.is_trained))
index2 = faiss.IndexIDMap(index)
index2.add_with_ids(answer_embeddings, answer_ids)
print("trained: " + str(index2.is_trained))
print("total index: " + str(index2.ntotal))
for i in [59753, 54792, 42643]:
D, I = index2.search(np.array(answer_embeddings[i]).astype("float32"))
res = I.tolist()
print(res, "\n")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment