Commit 7b7ac522 authored by 赵威's avatar 赵威

get answer id from index

parent 00e153af
...@@ -4,8 +4,8 @@ import sys ...@@ -4,8 +4,8 @@ import sys
sys.path.append(os.path.realpath(".")) sys.path.append(os.path.realpath("."))
import numpy as np import numpy as np
from bert_serving.client import BertClient from bert_serving.client import BertClient
from utils.es import es_scan, get_answer_info_from_es from utils.es import es_scan, get_answer_info_from_es
import faiss
def cos_sim(vector_a, vector_b): def cos_sim(vector_a, vector_b):
...@@ -38,9 +38,28 @@ if __name__ == "__main__": ...@@ -38,9 +38,28 @@ if __name__ == "__main__":
# print(cos_sim(sen1_em, sen2_em)) # print(cos_sim(sen1_em, sen2_em))
for item in get_answer_info_from_es(["id", "answer", "content_level", "desc"]): level_dict = {"6": [], "5": [], "4": [], "3.5": [], "3": []}
id = item["_id"] embedding_dict = {}
for item in get_answer_info_from_es(["id", "answer", "content_level"]):
id = int(item["_id"])
content = item["_source"]["answer"] content = item["_source"]["answer"]
content_level = item["_source"]["content_level"] content_level = str(item["_source"]["content_level"])
desc = item["_source"]["desc"] # print(id, content_level, content)
print(id, content_level, content, desc) level_dict[content_level].append(id)
embedding_dict[id] = bc.encode([content])
answer_ids = np.array(list(embedding_dict.keys())).astype("int")
answer_embeddings = np.array(list(embedding_dict.values())).astype("float32")
index = faiss.IndexFlatL2(answer_embeddings.shape[1])
print("trained: " + str(index.is_trained))
index2 = faiss.IndexIDMap(index)
index2.add_with_ids(answer_embeddings, answer_ids)
print("trained: " + str(index2.is_trained))
print("total index: " + str(index2.ntotal))
for i in [59753, 54792, 42643]:
D, I = index2.search(np.array(answer_embeddings[i]).astype("float32"))
res = I.tolist()
print(res, "\n")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment