Commit 1006952d authored by 赵威's avatar 赵威

add content level

parent d7443fce
...@@ -34,6 +34,7 @@ def write_result(): ...@@ -34,6 +34,7 @@ def write_result():
bc = BertClient("172.16.44.82", check_length=False) bc = BertClient("172.16.44.82", check_length=False)
count = 0 count = 0
content_level_dict = {}
embedding_dict = {} embedding_dict = {}
for item in get_answer_info_from_es(["id", "answer", "content_level"]): for item in get_answer_info_from_es(["id", "answer", "content_level"]):
count += 1 count += 1
...@@ -41,7 +42,8 @@ def write_result(): ...@@ -41,7 +42,8 @@ def write_result():
id = int(item["_id"]) id = int(item["_id"])
soup = BeautifulSoup(item["_source"]["answer"], "html.parser") soup = BeautifulSoup(item["_source"]["answer"], "html.parser")
content = soup.get_text() content = soup.get_text()
# content_level = str(item["_source"]["content_level"]) content_level = str(item["_source"]["content_level"])
content_level_dict[id] = content_level
# print(count, id, content) # print(count, id, content)
embedding_dict[id] = bc.encode([content]).tolist()[0] embedding_dict[id] = bc.encode([content]).tolist()[0]
except Exception as e: except Exception as e:
...@@ -72,7 +74,7 @@ def write_result(): ...@@ -72,7 +74,7 @@ def write_result():
if i <= 1.0 and tmp_id != id: if i <= 1.0 and tmp_id != id:
res.append(str(tmp_id)) res.append(str(tmp_id))
if res: if res:
data = "{}:{}".format(str(id), ",".join(res)) data = "{}:{}:{}".format(content_level_dict.get(id, -1), str(id), ",".join(res))
print(data) print(data)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment