Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
S
strategy_embedding
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
rank
strategy_embedding
Commits
e337d89f
Commit
e337d89f
authored
Nov 24, 2020
by
赵威
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
save index
parent
d6ea66b1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
9 additions
and
5 deletions
+9
-5
answer_similarity.py
doc_similarity/answer_similarity.py
+9
-5
No files found.
doc_similarity/answer_similarity.py
View file @
e337d89f
import
os
import
sys
import
random
import
sys
sys
.
path
.
append
(
os
.
path
.
realpath
(
"."
))
import
faiss
import
numpy
as
np
from
bert_serving.client
import
BertClient
from
utils.es
import
es_scan
,
get_answer_info_from_es
import
faiss
def
cos_sim
(
vector_a
,
vector_b
):
...
...
@@ -43,13 +44,11 @@ if __name__ == "__main__":
count
=
0
embedding_dict
=
{}
for
item
in
get_answer_info_from_es
([
"id"
,
"answer"
,
"content_level"
]):
if
count
<
1000
:
count
+=
1
id
=
int
(
item
[
"_id"
])
print
(
count
,
id
)
content
=
item
[
"_source"
][
"answer"
]
content_level
=
str
(
item
[
"_source"
][
"content_level"
])
# print(id, content_level, content)
level_dict
[
content_level
]
.
append
(
id
)
embedding_dict
[
id
]
=
bc
.
encode
([
content
])
.
tolist
()[
0
]
...
...
@@ -67,6 +66,12 @@ if __name__ == "__main__":
print
(
"trained: "
+
str
(
index2
.
is_trained
))
print
(
"total index: "
+
str
(
index2
.
ntotal
))
base_dir
=
os
.
getcwd
()
model_dir
=
os
.
path
.
join
(
base_dir
,
"_models"
)
index_path
=
os
.
path
.
join
(
model_dir
,
"faiss_answer_similarity.index"
)
faiss
.
write_index
(
index2
,
index_path
)
print
(
index_path
)
id
=
tmp_tuple
[
0
]
emb
=
np
.
array
([
embedding_dict
[
id
]])
.
astype
(
"float32"
)
print
(
emb
)
...
...
@@ -78,4 +83,3 @@ if __name__ == "__main__":
if
i
<=
0.1
:
res
.
append
(
ids
[
index
])
print
(
res
,
"
\n
"
)
print
(
D
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment