Commit d3d2013e authored by 赵威's avatar 赵威

write data

parent 8fd10a8d
...@@ -10,7 +10,7 @@ import numpy as np ...@@ -10,7 +10,7 @@ import numpy as np
from bert_serving.client import BertClient from bert_serving.client import BertClient
from utils.cache import redis_client_db from utils.cache import redis_client_db
from utils.es import get_diary_info_from_es from utils.es import get_diary_info_from_es
from utils.files import MODEL_PATH from utils.files import MODEL_PATH, DATA_PATH
def save_result(): def save_result():
...@@ -20,27 +20,27 @@ def save_result(): ...@@ -20,27 +20,27 @@ def save_result():
faiss_index = faiss.read_index(index_path) faiss_index = faiss.read_index(index_path)
print(faiss_index) print(faiss_index)
# level_dict = {"6": set([]), "5": set([]), "4": set([]), "3.5": set([]), "3": set([])} with open(os.path.join(DATA_PATH, "diary_similarity.log"), "w") as f:
count = 0 count = 0
for item in get_diary_info_from_es(["id", "answer", "content_level"]): for item in get_diary_info_from_es(["id", "answer", "content_level"]):
count += 1 count += 1
id = int(item["_id"]) id = int(item["_id"])
content = item["_source"]["answer"] content = item["_source"]["answer"]
content_level = str(item["_source"]["content_level"]) content_level = str(item["_source"]["content_level"])
# level_dict[content_level].add(id) try:
try: emb = np.array([bc.encode([content]).tolist()[0]]).astype("float32")
emb = np.array([bc.encode([content]).tolist()[0]]).astype("float32") D, I = faiss_index.search(emb, 10)
D, I = faiss_index.search(emb, 10) distances = D.tolist()[0]
distances = D.tolist()[0] ids = I.tolist()[0]
ids = I.tolist()[0] res = []
res = [] for (index, i) in enumerate(distances):
for (index, i) in enumerate(distances): tmp_id = ids[index]
tmp_id = ids[index] if i <= 1.0 and tmp_id != id:
if i <= 1.0 and tmp_id != id: res.append(tmp_id)
res.append(tmp_id) if res:
print(count, id, content_level, res) f.write("{}:{}:{}".format(content_level, id, ",".join(res)))
except Exception as e: except Exception as e:
print(e) print(e)
if __name__ == "__main__": if __name__ == "__main__":
# bc = BertClient("172.16.44.82", check_length=False) # bc = BertClient("172.16.44.82", check_length=False)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment