Commit 14aaeed2 authored by 赵威's avatar 赵威

get data from es

parent 160c408e
import os
import sys
sys.path.append(os.path.realpath("."))
import numpy as np import numpy as np
from bert_serving.client import BertClient from bert_serving.client import BertClient
from utils.es import es_scan, get_answer_info_from_es
def cos_sim(vector_a, vector_b): def cos_sim(vector_a, vector_b):
""" """
...@@ -18,15 +24,22 @@ def cos_sim(vector_a, vector_b): ...@@ -18,15 +24,22 @@ def cos_sim(vector_a, vector_b):
return sim return sim
bc = BertClient("172.16.44.82") if __name__ == "__main__":
sentence = """ bc = BertClient("172.16.44.82")
<p>做完私处整形手术,最好在一个月以后进行同房。因为过早同房,可能会对女性的私处造成损伤,甚至可能出现感染的情况。在恢复期间,女性可以适当的多吃水果蔬菜,多喝水,保持体内水分的充足。尽量不要吃刺激性过强的食物。在平时要注意私处的卫生,如果私处有瘙痒的情况,尽量不要用手直接的抓挠,坚持每天更换内裤,不要擅自用妇科清洗液,可以用温水轻轻擦拭私处。如果私处有不适感,需要及时去医院进行检查并治疗。</p> # sentence = """
""" # <p>做完私处整形手术,最好在一个月以后进行同房。因为过早同房,可能会对女性的私处造成损伤,甚至可能出现感染的情况。在恢复期间,女性可以适当的多吃水果蔬菜,多喝水,保持体内水分的充足。尽量不要吃刺激性过强的食物。在平时要注意私处的卫生,如果私处有瘙痒的情况,尽量不要用手直接的抓挠,坚持每天更换内裤,不要擅自用妇科清洗液,可以用温水轻轻擦拭私处。如果私处有不适感,需要及时去医院进行检查并治疗。</p>
# """
# sen1_em = bc.encode([sentence])
# sen2_em = bc.encode([sentence])
sen1_em = bc.encode([sentence]) # print(type(sen1_em), sen1_em)
sen2_em = bc.encode([sentence]) # print(sen2_em)
print(type(sen1_em), sen1_em) # print(cos_sim(sen1_em, sen2_em))
print(sen2_em)
print(cos_sim(sen1_em, sen2_em)) for item in get_answer_info_from_es(["id", "content", "content_level"]):
id = item["_id"]
content = item["_source"]["content"]
content_level = item["_source"]["content_level"]
print(id, content_level, content)
...@@ -32,8 +32,8 @@ tensorflow==2.3.1 ...@@ -32,8 +32,8 @@ tensorflow==2.3.1
keras==2.4.3 keras==2.4.3
protobuf==3.13.0 protobuf==3.13.0
bert-serving-server bert-serving-server==1.10.0
bert-serving-client bert-serving-client==1.10.0
ipython ipython
prompt-toolkit==2.0.10 prompt-toolkit==2.0.10
...@@ -80,6 +80,35 @@ def get_tractate_info_from_es(fields=["id"]): ...@@ -80,6 +80,35 @@ def get_tractate_info_from_es(fields=["id"]):
return results return results
def get_answer_info_from_es(fields=["id"]):
q = {
"query": {
"bool": {
"must": [{
"term": {
"is_online": True
}
}, {
"terms": {
"content_level": [6, 5, 4, 3.5, 3]
}
}, {
"range": {
"content_length": {
"gte": 30
}
}
}]
}
},
"_source": {
"include": fields
}
}
results = es_scan("answer", q)
return results
# def save_diary_info_from_es(): # def save_diary_info_from_es():
# q = { # q = {
# "query": { # "query": {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment