Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
S
strategy_embedding
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
rank
strategy_embedding
Commits
d3d2013e
Commit
d3d2013e
authored
Nov 25, 2020
by
赵威
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
write data
parent
8fd10a8d
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
22 additions
and
22 deletions
+22
-22
diary_similarity.py
doc_similarity/diary_similarity.py
+22
-22
No files found.
doc_similarity/diary_similarity.py
View file @
d3d2013e
...
@@ -10,7 +10,7 @@ import numpy as np
...
@@ -10,7 +10,7 @@ import numpy as np
from
bert_serving.client
import
BertClient
from
bert_serving.client
import
BertClient
from
utils.cache
import
redis_client_db
from
utils.cache
import
redis_client_db
from
utils.es
import
get_diary_info_from_es
from
utils.es
import
get_diary_info_from_es
from
utils.files
import
MODEL_PATH
from
utils.files
import
MODEL_PATH
,
DATA_PATH
def
save_result
():
def
save_result
():
...
@@ -20,27 +20,27 @@ def save_result():
...
@@ -20,27 +20,27 @@ def save_result():
faiss_index
=
faiss
.
read_index
(
index_path
)
faiss_index
=
faiss
.
read_index
(
index_path
)
print
(
faiss_index
)
print
(
faiss_index
)
# level_dict = {"6": set([]), "5": set([]), "4": set([]), "3.5": set([]), "3": set([])}
with
open
(
os
.
path
.
join
(
DATA_PATH
,
"diary_similarity.log"
),
"w"
)
as
f
:
count
=
0
count
=
0
for
item
in
get_diary_info_from_es
([
"id"
,
"answer"
,
"content_level"
]):
for
item
in
get_diary_info_from_es
([
"id"
,
"answer"
,
"content_level"
]):
count
+=
1
count
+=
1
id
=
int
(
item
[
"_id"
])
id
=
int
(
item
[
"_id"
])
content
=
item
[
"_source"
][
"answer"
]
content
=
item
[
"_source"
][
"answer"
]
content_level
=
str
(
item
[
"_source"
][
"content_level"
])
content_level
=
str
(
item
[
"_source"
][
"content_level"
])
# level_dict[content_level].add(id)
try
:
try
:
emb
=
np
.
array
([
bc
.
encode
([
content
])
.
tolist
()[
0
]])
.
astype
(
"float32"
)
emb
=
np
.
array
([
bc
.
encode
([
content
])
.
tolist
()[
0
]])
.
astype
(
"float32"
)
D
,
I
=
faiss_index
.
search
(
emb
,
10
)
D
,
I
=
faiss_index
.
search
(
emb
,
10
)
distances
=
D
.
tolist
()[
0
]
distances
=
D
.
tolist
()[
0
]
ids
=
I
.
tolist
()[
0
]
ids
=
I
.
tolist
()[
0
]
res
=
[
]
res
=
[]
for
(
index
,
i
)
in
enumerate
(
distances
):
for
(
index
,
i
)
in
enumerate
(
distances
):
tmp_id
=
ids
[
index
]
tmp_id
=
ids
[
index
]
if
i
<=
1.0
and
tmp_id
!=
id
:
if
i
<=
1.0
and
tmp_id
!=
id
:
res
.
append
(
tmp_id
)
res
.
append
(
tmp_id
)
if
res
:
print
(
count
,
id
,
content_level
,
res
)
f
.
write
(
"{}:{}:{}"
.
format
(
content_level
,
id
,
","
.
join
(
res
))
)
except
Exception
as
e
:
except
Exception
as
e
:
print
(
e
)
print
(
e
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
# bc = BertClient("172.16.44.82", check_length=False)
# bc = BertClient("172.16.44.82", check_length=False)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment