Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
faiss
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
faiss
Commits
daf589d9
Commit
daf589d9
authored
Dec 20, 2018
by
matthijs
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add bench_all_ivf
parent
4bcb5b3f
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
1105 additions
and
32 deletions
+1105
-32
AutoTune.cpp
AutoTune.cpp
+3
-1
Index.h
Index.h
+1
-1
IndexBinaryIVF.cpp
IndexBinaryIVF.cpp
+4
-0
IndexFlat.cpp
IndexFlat.cpp
+0
-1
IndexIVF.cpp
IndexIVF.cpp
+4
-1
IndexIVF.h
IndexIVF.h
+2
-0
IndexIVFPQ.cpp
IndexIVFPQ.cpp
+12
-0
bench_all_ivf.py
benchs/bench_all_ivf/bench_all_ivf.py
+309
-0
datasets.py
benchs/bench_all_ivf/datasets.py
+234
-0
parse_bench_all_ivf.py
benchs/bench_all_ivf/parse_bench_all_ivf.py
+269
-0
run_on_cluster_generic.bash
benchs/bench_all_ivf/run_on_cluster_generic.bash
+250
-0
README.md
benchs/link_and_code/README.md
+0
-2
faiss.py
python/faiss.py
+2
-2
test_index.py
tests/test_index.py
+1
-1
test_index_accuracy.py
tests/test_index_accuracy.py
+6
-8
test_lowlevel_ivf.cpp
tests/test_lowlevel_ivf.cpp
+1
-1
test_sliding_ivf.cpp
tests/test_sliding_ivf.cpp
+1
-5
test_transfer_invlists.cpp
tests/test_transfer_invlists.cpp
+6
-9
No files found.
AutoTune.cpp
View file @
daf589d9
...
...
@@ -392,7 +392,9 @@ void ParameterSpace::initialize (const Index * index)
for
(
int
i
=
8
;
i
<
20
;
i
++
)
{
pr_max_codes
.
values
.
push_back
(
1
<<
i
);
}
pr_max_codes
.
values
.
push_back
(
std
::
numeric_limits
<
double
>::
infinity
());
pr_max_codes
.
values
.
push_back
(
std
::
numeric_limits
<
double
>::
infinity
()
);
}
}
if
(
DC
(
IndexIVFPQR
))
{
...
...
Index.h
View file @
daf589d9
...
...
@@ -18,7 +18,7 @@
#include <sstream>
#define FAISS_VERSION_MAJOR 1
#define FAISS_VERSION_MINOR
5
#define FAISS_VERSION_MINOR
4
#define FAISS_VERSION_PATCH 0
/**
...
...
IndexBinaryIVF.cpp
View file @
daf589d9
...
...
@@ -130,12 +130,16 @@ void IndexBinaryIVF::search(idx_t n, const uint8_t *x, idx_t k,
std
::
unique_ptr
<
idx_t
[]
>
idx
(
new
idx_t
[
n
*
nprobe
]);
std
::
unique_ptr
<
int32_t
[]
>
coarse_dis
(
new
int32_t
[
n
*
nprobe
]);
double
t0
=
getmillisecs
();
quantizer
->
search
(
n
,
x
,
nprobe
,
coarse_dis
.
get
(),
idx
.
get
());
indexIVF_stats
.
quantization_time
+=
getmillisecs
()
-
t0
;
t0
=
getmillisecs
();
invlists
->
prefetch_lists
(
idx
.
get
(),
n
*
nprobe
);
search_preassigned
(
n
,
x
,
k
,
idx
.
get
(),
coarse_dis
.
get
(),
distances
,
labels
,
false
);
indexIVF_stats
.
search_time
+=
getmillisecs
()
-
t0
;
}
void
IndexBinaryIVF
::
reconstruct
(
idx_t
key
,
uint8_t
*
recons
)
const
{
...
...
IndexFlat.cpp
View file @
daf589d9
...
...
@@ -11,7 +11,6 @@
#include "IndexFlat.h"
#include <cstring>
#include <limits>
#include "utils.h"
#include "Heap.h"
...
...
IndexIVF.cpp
View file @
daf589d9
...
...
@@ -175,13 +175,16 @@ void IndexIVF::search (idx_t n, const float *x, idx_t k,
float
*
coarse_dis
=
new
float
[
n
*
nprobe
];
ScopeDeleter
<
float
>
del2
(
coarse_dis
);
double
t0
=
getmillisecs
();
quantizer
->
search
(
n
,
x
,
nprobe
,
coarse_dis
,
idx
);
indexIVF_stats
.
quantization_time
+=
getmillisecs
()
-
t0
;
t0
=
getmillisecs
();
invlists
->
prefetch_lists
(
idx
,
n
*
nprobe
);
search_preassigned
(
n
,
x
,
k
,
idx
,
coarse_dis
,
distances
,
labels
,
false
);
indexIVF_stats
.
search_time
+=
getmillisecs
()
-
t0
;
}
...
...
IndexIVF.h
View file @
daf589d9
...
...
@@ -297,6 +297,8 @@ struct IndexIVFStats {
size_t
nlist
;
// nb of inverted lists scanned
size_t
ndis
;
// nb of distancs computed
size_t
nheap_updates
;
// nb of times the heap was updated
double
quantization_time
;
// time spent quantizing vectors (in ms)
double
search_time
;
// time spent searching lists (in ms)
IndexIVFStats
()
{
reset
();
}
void
reset
();
...
...
IndexIVFPQ.cpp
View file @
daf589d9
...
...
@@ -636,6 +636,11 @@ struct QueryTables {
if
(
use_precomputed_table
==
0
||
use_precomputed_table
==
-
1
)
{
ivfpq
.
quantizer
->
compute_residual
(
qi
,
residual_vec
,
key
);
pq
.
compute_distance_table
(
residual_vec
,
sim_table
);
if
(
polysemous_ht
!=
0
)
{
pq
.
compute_code
(
residual_vec
,
q_code
.
data
());
}
}
else
if
(
use_precomputed_table
==
1
)
{
dis0
=
coarse_dis
;
...
...
@@ -643,6 +648,13 @@ struct QueryTables {
&
ivfpq
.
precomputed_table
[
key
*
pq
.
ksub
*
pq
.
M
],
-
2.0
,
sim_table_2
,
sim_table
);
if
(
polysemous_ht
!=
0
)
{
ivfpq
.
quantizer
->
compute_residual
(
qi
,
residual_vec
,
key
);
pq
.
compute_code
(
residual_vec
,
q_code
.
data
());
}
}
else
if
(
use_precomputed_table
==
2
)
{
dis0
=
coarse_dis
;
...
...
benchs/bench_all_ivf/bench_all_ivf.py
0 → 100644
View file @
daf589d9
# Copyright (c) 2015-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the BSD+Patents license found in the
# LICENSE file in the root directory of this source tree.
#!/usr/bin/env python2
import
os
import
sys
import
time
import
numpy
as
np
import
faiss
import
argparse
import
datasets
from
datasets
import
sanitize
######################################################
# Command-line parsing
######################################################
parser
=
argparse
.
ArgumentParser
()
def
aa
(
*
args
,
**
kwargs
):
group
.
add_argument
(
*
args
,
**
kwargs
)
group
=
parser
.
add_argument_group
(
'dataset options'
)
aa
(
'--db'
,
default
=
'deep1M'
,
help
=
'dataset'
)
aa
(
'--compute_gt'
,
default
=
False
,
action
=
'store_true'
,
help
=
'compute and store the groundtruth'
)
group
=
parser
.
add_argument_group
(
'index consturction'
)
aa
(
'--indexkey'
,
default
=
'HNSW32'
,
help
=
'index_factory type'
)
aa
(
'--efConstruction'
,
default
=
200
,
type
=
int
,
help
=
'HNSW construction factor'
)
aa
(
'--M0'
,
default
=-
1
,
type
=
int
,
help
=
'size of base level'
)
aa
(
'--maxtrain'
,
default
=
256
*
256
,
type
=
int
,
help
=
'maximum number of training points (0 to set automatically)'
)
aa
(
'--indexfile'
,
default
=
''
,
help
=
'file to read or write index from'
)
aa
(
'--add_bs'
,
default
=-
1
,
type
=
int
,
help
=
'add elements index by batches of this size'
)
aa
(
'--no_precomputed_tables'
,
action
=
'store_true'
,
default
=
False
,
help
=
'disable precomputed tables (uses less memory)'
)
aa
(
'--clustering_niter'
,
default
=-
1
,
type
=
int
,
help
=
'number of clustering iterations (-1 = leave default)'
)
aa
(
'--train_on_gpu'
,
default
=
False
,
action
=
'store_true'
,
help
=
'do training on GPU'
)
aa
(
'--get_centroids_from'
,
default
=
''
,
help
=
'get the centroids from this index (to speed up training)'
)
group
=
parser
.
add_argument_group
(
'searching'
)
aa
(
'--k'
,
default
=
100
,
type
=
int
,
help
=
'nb of nearest neighbors'
)
aa
(
'--searchthreads'
,
default
=-
1
,
type
=
int
,
help
=
'nb of threads to use at search time'
)
aa
(
'--searchparams'
,
nargs
=
'+'
,
default
=
[
'autotune'
],
help
=
"search parameters to use (can be autotune or a list of params)"
)
aa
(
'--n_autotune'
,
default
=
500
,
type
=
int
,
help
=
"max nb of autotune experiments"
)
aa
(
'--autotune_max'
,
default
=
[],
nargs
=
'*'
,
help
=
'set max value for autotune variables format "var:val" (exclusive)'
)
aa
(
'--autotune_range'
,
default
=
[],
nargs
=
'*'
,
help
=
'set complete autotune range, format "var:val1,val2,..."'
)
aa
(
'--min_test_duration'
,
default
=
0
,
type
=
float
,
help
=
'run test at least for so long to avoid jitter'
)
args
=
parser
.
parse_args
()
print
"args:"
,
args
os
.
system
(
'echo -n "nb processors "; '
'cat /proc/cpuinfo | grep ^processor | wc -l; '
'cat /proc/cpuinfo | grep ^"model name" | tail -1'
)
######################################################
# Load dataset
######################################################
xt
,
xb
,
xq
,
gt
=
datasets
.
load_data
(
dataset
=
args
.
db
,
compute_gt
=
args
.
compute_gt
)
print
"dataset sizes: train
%
s base
%
s query
%
s GT
%
s"
%
(
xt
.
shape
,
xb
.
shape
,
xq
.
shape
,
gt
.
shape
)
nq
,
d
=
xq
.
shape
nb
,
d
=
xb
.
shape
######################################################
# Make index
######################################################
if
args
.
indexfile
and
os
.
path
.
exists
(
args
.
indexfile
):
print
"reading"
,
args
.
indexfile
index
=
faiss
.
read_index
(
args
.
indexfile
)
if
isinstance
(
index
,
faiss
.
IndexPreTransform
):
index_ivf
=
faiss
.
downcast_index
(
index
.
index
)
else
:
index_ivf
=
index
assert
isinstance
(
index_ivf
,
faiss
.
IndexIVF
)
vec_transform
=
lambda
x
:
x
assert
isinstance
(
index_ivf
,
faiss
.
IndexIVF
)
else
:
print
"build index, key="
,
args
.
indexkey
index
=
faiss
.
index_factory
(
d
,
args
.
indexkey
)
if
isinstance
(
index
,
faiss
.
IndexPreTransform
):
index_ivf
=
faiss
.
downcast_index
(
index
.
index
)
vec_transform
=
index
.
chain
.
at
(
0
)
.
apply_py
else
:
index_ivf
=
index
vec_transform
=
lambda
x
:
x
assert
isinstance
(
index_ivf
,
faiss
.
IndexIVF
)
index_ivf
.
verbose
=
True
index_ivf
.
quantizer
.
verbose
=
True
index_ivf
.
cp
.
verbose
=
True
maxtrain
=
args
.
maxtrain
if
maxtrain
==
0
:
if
'IMI'
in
args
.
indexkey
:
maxtrain
=
int
(
256
*
2
**
(
np
.
log2
(
index_ivf
.
nlist
)
/
2
))
else
:
maxtrain
=
50
*
index_ivf
.
nlist
print
"setting maxtrain to
%
d"
%
maxtrain
args
.
maxtrain
=
maxtrain
xt2
=
sanitize
(
xt
[:
args
.
maxtrain
])
assert
np
.
all
(
np
.
isfinite
(
xt2
))
print
"train, size"
,
xt2
.
shape
if
args
.
get_centroids_from
==
''
:
if
args
.
clustering_niter
>=
0
:
print
(
"setting nb of clustering iterations to
%
d"
%
args
.
clustering_niter
)
index_ivf
.
cp
.
niter
=
args
.
clustering_niter
if
args
.
train_on_gpu
:
print
"add a training index on GPU"
train_index
=
faiss
.
index_cpu_to_all_gpus
(
faiss
.
IndexFlatL2
(
d
))
index_ivf
.
clustering_index
=
train_index
else
:
print
"Getting centroids from"
,
args
.
get_centroids_from
src_index
=
faiss
.
read_index
(
args
.
get_centroids_from
)
src_quant
=
faiss
.
downcast_index
(
src_index
.
quantizer
)
centroids
=
faiss
.
vector_to_array
(
src_quant
.
xb
)
centroids
=
centroids
.
reshape
(
-
1
,
d
)
print
" centroid table shape"
,
centroids
.
shape
if
isinstance
(
index
,
faiss
.
IndexPreTransform
):
print
" training vector transform"
assert
index
.
chain
.
size
()
==
1
vt
=
index
.
chain
.
at
(
0
)
vt
.
train
(
xt2
)
print
" transform centroids"
centroids
=
vt
.
apply_py
(
centroids
)
print
" add centroids to quantizer"
index_ivf
.
quantizer
.
add
(
centroids
)
del
src_index
t0
=
time
.
time
()
index
.
train
(
xt2
)
print
" train in
%.3
f s"
%
(
time
.
time
()
-
t0
)
print
"adding"
t0
=
time
.
time
()
if
args
.
add_bs
==
-
1
:
index
.
add
(
sanitize
(
xb
))
else
:
for
i0
in
range
(
0
,
nb
,
args
.
add_bs
):
i1
=
min
(
nb
,
i0
+
args
.
add_bs
)
print
" adding
%
d:
%
d /
%
d"
%
(
i0
,
i1
,
nb
)
index
.
add
(
sanitize
(
xb
[
i0
:
i1
]))
print
" add in
%.3
f s"
%
(
time
.
time
()
-
t0
)
if
args
.
indexfile
:
print
"storing"
,
args
.
indexfile
faiss
.
write_index
(
index
,
args
.
indexfile
)
if
args
.
no_precomputed_tables
:
if
isinstance
(
index_ivf
,
faiss
.
IndexIVFPQ
):
print
"disabling precomputed table"
index_ivf
.
use_precomputed_table
=
-
1
index_ivf
.
precomputed_table
.
clear
()
if
args
.
indexfile
:
print
"index size on disk: "
,
os
.
stat
(
args
.
indexfile
)
.
st_size
print
"current RSS:"
,
faiss
.
get_mem_usage_kb
()
*
1024
precomputed_table_size
=
0
if
hasattr
(
index_ivf
,
'precomputed_table'
):
precomputed_table_size
=
index_ivf
.
precomputed_table
.
size
()
*
4
print
"precomputed tables size:"
,
precomputed_table_size
#############################################################
# Index is ready
#############################################################
xq
=
sanitize
(
xq
)
if
args
.
searchthreads
!=
-
1
:
print
"Setting nb of threads to"
,
args
.
searchthreads
faiss
.
omp_set_num_threads
(
args
.
searchthreads
)
ps
=
faiss
.
ParameterSpace
()
ps
.
initialize
(
index
)
parametersets
=
args
.
searchparams
header
=
'
%-40
s R@1 R@10 R@100 time(ms/q) nb distances #runs'
%
"parameters"
def
eval_setting
(
index
,
xq
,
gt
,
min_time
):
nq
=
xq
.
shape
[
0
]
ivf_stats
=
faiss
.
cvar
.
indexIVF_stats
ivf_stats
.
reset
()
nrun
=
0
t0
=
time
.
time
()
while
True
:
D
,
I
=
index
.
search
(
xq
,
100
)
nrun
+=
1
t1
=
time
.
time
()
if
t1
-
t0
>
min_time
:
break
ms_per_query
=
((
t1
-
t0
)
*
1000.0
/
nq
/
nrun
)
for
rank
in
1
,
10
,
100
:
n_ok
=
(
I
[:,
:
rank
]
==
gt
[:,
:
1
])
.
sum
()
print
"
%.4
f"
%
(
n_ok
/
float
(
nq
)),
print
"
%8.3
f "
%
ms_per_query
,
print
"
%12
d "
%
(
ivf_stats
.
ndis
/
nrun
),
print
nrun
if
parametersets
==
[
'autotune'
]:
ps
.
n_experiments
=
args
.
n_autotune
ps
.
min_test_duration
=
args
.
min_test_duration
for
kv
in
args
.
autotune_max
:
k
,
vmax
=
kv
.
split
(
':'
)
vmax
=
float
(
vmax
)
print
"limiting
%
s to
%
g"
%
(
k
,
vmax
)
pr
=
ps
.
add_range
(
k
)
values
=
faiss
.
vector_to_array
(
pr
.
values
)
values
=
np
.
array
([
v
for
v
in
values
if
v
<
vmax
])
faiss
.
copy_array_to_vector
(
values
,
pr
.
values
)
for
kv
in
args
.
autotune_range
:
k
,
vals
=
kv
.
split
(
':'
)
vals
=
np
.
fromstring
(
vals
,
sep
=
','
)
print
"setting
%
s to
%
s"
%
(
k
,
vals
)
pr
=
ps
.
add_range
(
k
)
faiss
.
copy_array_to_vector
(
vals
,
pr
.
values
)
# setup the Criterion object: optimize for 1-R@1
crit
=
faiss
.
OneRecallAtRCriterion
(
nq
,
1
)
# by default, the criterion will request only 1 NN
crit
.
nnn
=
100
crit
.
set_groundtruth
(
None
,
gt
.
astype
(
'int64'
))
# then we let Faiss find the optimal parameters by itself
print
"exploring operating points"
ps
.
display
()
t0
=
time
.
time
()
op
=
ps
.
explore
(
index
,
xq
,
crit
)
print
"Done in
%.3
f s, available OPs:"
%
(
time
.
time
()
-
t0
)
op
.
display
()
print
header
opv
=
op
.
optimal_pts
for
i
in
range
(
opv
.
size
()):
opt
=
opv
.
at
(
i
)
ps
.
set_index_parameters
(
index
,
opt
.
key
)
print
"
%-40
s "
%
opt
.
key
,
sys
.
stdout
.
flush
()
eval_setting
(
index
,
xq
,
gt
,
args
.
min_test_duration
)
else
:
print
header
for
param
in
parametersets
:
print
"
%-40
s "
%
param
,
sys
.
stdout
.
flush
()
ps
.
set_index_parameters
(
index
,
param
)
eval_setting
(
index
,
xq
,
gt
,
args
.
min_test_duration
)
benchs/bench_all_ivf/datasets.py
0 → 100644
View file @
daf589d9
# Copyright (c) 2015-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the BSD+Patents license found in the
# LICENSE file in the root directory of this source tree.
#! /usr/bin/env python2
"""
Common functions to load datasets and compute their ground-truth
"""
import
time
import
numpy
as
np
import
faiss
import
sys
# set this to the directory that contains the datafiles.
# deep1b data should be at simdir + 'deep1b'
# bigann data should be at simdir + 'bigann'
simdir
=
'/mnt/vol/gfsai-east/ai-group/datasets/simsearch/'
#################################################################
# Small I/O functions
#################################################################
def
ivecs_read
(
fname
):
a
=
np
.
fromfile
(
fname
,
dtype
=
'int32'
)
d
=
a
[
0
]
return
a
.
reshape
(
-
1
,
d
+
1
)[:,
1
:]
.
copy
()
def
fvecs_read
(
fname
):
return
ivecs_read
(
fname
)
.
view
(
'float32'
)
def
ivecs_mmap
(
fname
):
a
=
np
.
memmap
(
fname
,
dtype
=
'int32'
,
mode
=
'r'
)
d
=
a
[
0
]
return
a
.
reshape
(
-
1
,
d
+
1
)[:,
1
:]
def
fvecs_mmap
(
fname
):
return
ivecs_mmap
(
fname
)
.
view
(
'float32'
)
def
bvecs_mmap
(
fname
):
x
=
np
.
memmap
(
fname
,
dtype
=
'uint8'
,
mode
=
'r'
)
d
=
x
[:
4
]
.
view
(
'int32'
)[
0
]
return
x
.
reshape
(
-
1
,
d
+
4
)[:,
4
:]
def
ivecs_write
(
fname
,
m
):
n
,
d
=
m
.
shape
m1
=
np
.
empty
((
n
,
d
+
1
),
dtype
=
'int32'
)
m1
[:,
0
]
=
d
m1
[:,
1
:]
=
m
m1
.
tofile
(
fname
)
def
fvecs_write
(
fname
,
m
):
m
=
m
.
astype
(
'float32'
)
ivecs_write
(
fname
,
m
.
view
(
'int32'
))
#################################################################
# Dataset
#################################################################
def
sanitize
(
x
):
return
np
.
ascontiguousarray
(
x
,
dtype
=
'float32'
)
class
ResultHeap
:
""" Combine query results from a sliced dataset """
def
__init__
(
self
,
nq
,
k
):
" nq: number of query vectors, k: number of results per query "
self
.
I
=
np
.
zeros
((
nq
,
k
),
dtype
=
'int64'
)
self
.
D
=
np
.
zeros
((
nq
,
k
),
dtype
=
'float32'
)
self
.
nq
,
self
.
k
=
nq
,
k
heaps
=
faiss
.
float_maxheap_array_t
()
heaps
.
k
=
k
heaps
.
nh
=
nq
heaps
.
val
=
faiss
.
swig_ptr
(
self
.
D
)
heaps
.
ids
=
faiss
.
swig_ptr
(
self
.
I
)
heaps
.
heapify
()
self
.
heaps
=
heaps
def
add_batch_result
(
self
,
D
,
I
,
i0
):
assert
D
.
shape
==
(
self
.
nq
,
self
.
k
)
assert
I
.
shape
==
(
self
.
nq
,
self
.
k
)
I
+=
i0
self
.
heaps
.
addn_with_ids
(
self
.
k
,
faiss
.
swig_ptr
(
D
),
faiss
.
swig_ptr
(
I
),
self
.
k
)
def
finalize
(
self
):
self
.
heaps
.
reorder
()
def
compute_GT_sliced
(
xb
,
xq
,
k
):
print
"compute GT"
t0
=
time
.
time
()
nb
,
d
=
xb
.
shape
nq
,
d
=
xq
.
shape
rh
=
ResultHeap
(
nq
,
k
)
bs
=
10
**
5
xqs
=
sanitize
(
xq
)
db_gt
=
faiss
.
index_cpu_to_all_gpus
(
faiss
.
IndexFlatL2
(
d
))
# compute ground-truth by blocks of bs, and add to heaps
for
i0
in
range
(
0
,
nb
,
bs
):
i1
=
min
(
nb
,
i0
+
bs
)
xsl
=
sanitize
(
xb
[
i0
:
i1
])
db_gt
.
add
(
xsl
)
D
,
I
=
db_gt
.
search
(
xqs
,
k
)
rh
.
add_batch_result
(
D
,
I
,
i0
)
db_gt
.
reset
()
print
"
\r
%
d/
%
d,
%.3
f s"
%
(
i0
,
nb
,
time
.
time
()
-
t0
),
sys
.
stdout
.
flush
()
print
rh
.
finalize
()
gt_I
=
rh
.
I
print
"GT time:
%.3
f s"
%
(
time
.
time
()
-
t0
)
return
gt_I
def
do_compute_gt
(
xb
,
xq
,
k
):
print
"computing GT"
nb
,
d
=
xb
.
shape
index
=
faiss
.
index_cpu_to_all_gpus
(
faiss
.
IndexFlatL2
(
d
))
if
nb
<
100
*
1000
:
print
" add"
index
.
add
(
np
.
ascontiguousarray
(
xb
,
dtype
=
'float32'
))
print
" search"
D
,
I
=
index
.
search
(
np
.
ascontiguousarray
(
xq
,
dtype
=
'float32'
),
k
)
else
:
I
=
compute_GT_sliced
(
xb
,
xq
,
k
)
return
I
.
astype
(
'int32'
)
def
load_data
(
dataset
=
'deep1M'
,
compute_gt
=
False
):
print
"load data"
,
dataset
if
dataset
==
'sift1M'
:
basedir
=
simdir
+
'sift1M/'
xt
=
fvecs_read
(
basedir
+
"sift_learn.fvecs"
)
xb
=
fvecs_read
(
basedir
+
"sift_base.fvecs"
)
xq
=
fvecs_read
(
basedir
+
"sift_query.fvecs"
)
gt
=
ivecs_read
(
basedir
+
"sift_groundtruth.ivecs"
)
elif
dataset
.
startswith
(
'bigann'
):
basedir
=
simdir
+
'bigann/'
dbsize
=
1000
if
dataset
==
"bigann1B"
else
int
(
dataset
[
6
:
-
1
])
xb
=
bvecs_mmap
(
basedir
+
'bigann_base.bvecs'
)
xq
=
bvecs_mmap
(
basedir
+
'bigann_query.bvecs'
)
xt
=
bvecs_mmap
(
basedir
+
'bigann_learn.bvecs'
)
# trim xb to correct size
xb
=
xb
[:
dbsize
*
1000
*
1000
]
gt
=
ivecs_read
(
basedir
+
'gnd/idx_
%
dM.ivecs'
%
dbsize
)
elif
dataset
.
startswith
(
"deep"
):
basedir
=
simdir
+
'deep1b/'
szsuf
=
dataset
[
4
:]
if
szsuf
[
-
1
]
==
'M'
:
dbsize
=
10
**
6
*
int
(
szsuf
[:
-
1
])
elif
szsuf
==
'1B'
:
dbsize
=
10
**
9
elif
szsuf
[
-
1
]
==
'k'
:
dbsize
=
1000
*
int
(
szsuf
[:
-
1
])
else
:
assert
False
,
"did not recognize suffix "
+
szsuf
xt
=
fvecs_mmap
(
basedir
+
"learn.fvecs"
)
xb
=
fvecs_mmap
(
basedir
+
"base.fvecs"
)
xq
=
fvecs_read
(
basedir
+
"deep1B_queries.fvecs"
)
xb
=
xb
[:
dbsize
]
gt_fname
=
basedir
+
"
%
s_groundtruth.ivecs"
%
dataset
if
compute_gt
:
gt
=
do_compute_gt
(
xb
,
xq
,
100
)
print
"store"
,
gt_fname
ivecs_write
(
gt_fname
,
gt
)
gt
=
ivecs_read
(
gt_fname
)
else
:
assert
False
print
"dataset
%
s sizes: B
%
s Q
%
s T
%
s"
%
(
dataset
,
xb
.
shape
,
xq
.
shape
,
xt
.
shape
)
return
xt
,
xb
,
xq
,
gt
#################################################################
# Evaluation
#################################################################
def
evaluate_DI
(
D
,
I
,
gt
):
nq
=
gt
.
shape
[
0
]
k
=
I
.
shape
[
1
]
rank
=
1
while
rank
<=
k
:
recall
=
(
I
[:,
:
rank
]
==
gt
[:,
:
1
])
.
sum
()
/
float
(
nq
)
print
"R@
%
d:
%.4
f"
%
(
rank
,
recall
),
rank
*=
10
def
evaluate
(
xq
,
gt
,
index
,
k
=
100
,
endl
=
True
):
t0
=
time
.
time
()
D
,
I
=
index
.
search
(
xq
,
k
)
t1
=
time
.
time
()
nq
=
xq
.
shape
[
0
]
print
"
\t
%8.4
f ms per query, "
%
(
(
t1
-
t0
)
*
1000.0
/
nq
),
rank
=
1
while
rank
<=
k
:
recall
=
(
I
[:,
:
rank
]
==
gt
[:,
:
1
])
.
sum
()
/
float
(
nq
)
print
"R@
%
d:
%.4
f"
%
(
rank
,
recall
),
rank
*=
10
if
endl
:
print
return
D
,
I
benchs/bench_all_ivf/parse_bench_all_ivf.py
0 → 100644
View file @
daf589d9
# Copyright (c) 2015-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the BSD+Patents license found in the
# LICENSE file in the root directory of this source tree.
#! /usr/bin/python2
import
os
import
numpy
as
np
from
matplotlib
import
pyplot
import
re
from
argparse
import
Namespace
# the directory used in run_on_cluster.bash
basedir
=
'/mnt/vol/gfsai-east/ai-group/users/matthijs/bench_all_ivf/'
logdir
=
basedir
+
'logs/'
# which plot to output
db
=
'bigann1B'
code_size
=
8
def
unitsize
(
indexkey
):
""" size of one vector in the index """
mo
=
re
.
match
(
'.*,PQ(
\\
d+)'
,
indexkey
)
if
mo
:
return
int
(
mo
.
group
(
1
))
if
indexkey
.
endswith
(
'SQ8'
):
bits_per_d
=
8
elif
indexkey
.
endswith
(
'SQ4'
):
bits_per_d
=
4
elif
indexkey
.
endswith
(
'SQfp16'
):
bits_per_d
=
16
else
:
assert
False
mo
=
re
.
match
(
'PCAR(
\\
d+),.*'
,
indexkey
)
if
mo
:
return
bits_per_d
*
int
(
mo
.
group
(
1
))
/
8
mo
=
re
.
match
(
'OPQ
\\
d+_(
\\
d+),.*'
,
indexkey
)
if
mo
:
return
bits_per_d
*
int
(
mo
.
group
(
1
))
/
8
mo
=
re
.
match
(
'RR(
\\
d+),.*'
,
indexkey
)
if
mo
:
return
bits_per_d
*
int
(
mo
.
group
(
1
))
/
8
assert
False
def
dbsize_from_name
(
dbname
):
sufs
=
{
'1B'
:
10
**
9
,
'100M'
:
10
**
8
,
'10M'
:
10
**
7
,
'1M'
:
10
**
6
,
}
for
s
in
sufs
:
if
dbname
.
endswith
(
s
):
return
sufs
[
s
]
else
:
assert
False
def
keep_latest_stdout
(
fnames
):
fnames
=
[
fname
for
fname
in
fnames
if
fname
.
endswith
(
'.stdout'
)]
fnames
.
sort
()
n
=
len
(
fnames
)
fnames2
=
[]
for
i
,
fname
in
enumerate
(
fnames
):
if
i
+
1
<
n
and
fnames
[
i
+
1
][:
-
8
]
==
fname
[:
-
8
]:
continue
fnames2
.
append
(
fname
)
return
fnames2
def
parse_result_file
(
fname
):
# print fname
st
=
0
res
=
[]
keys
=
[]
stats
=
{}
stats
[
'run_version'
]
=
fname
[
-
8
]
for
l
in
open
(
fname
):
if
st
==
0
:
if
l
.
startswith
(
'CHRONOS_JOB_INSTANCE_ID'
):
stats
[
'CHRONOS_JOB_INSTANCE_ID'
]
=
l
.
split
()[
-
1
]
if
l
.
startswith
(
'index size on disk:'
):
stats
[
'index_size'
]
=
int
(
l
.
split
()[
-
1
])
if
l
.
startswith
(
'current RSS:'
):
stats
[
'RSS'
]
=
int
(
l
.
split
()[
-
1
])
if
l
.
startswith
(
'precomputed tables size:'
):
stats
[
'tables_size'
]
=
int
(
l
.
split
()[
-
1
])
if
l
.
startswith
(
'Setting nb of threads to'
):
stats
[
'n_threads'
]
=
int
(
l
.
split
()[
-
1
])
if
l
.
startswith
(
' add in'
):
stats
[
'add_time'
]
=
float
(
l
.
split
()[
-
2
])
if
l
.
startswith
(
'args:'
):
args
=
eval
(
l
[
l
.
find
(
' '
):])
indexkey
=
args
.
indexkey
elif
'R@1 R@10 R@100'
in
l
:
st
=
1
elif
'index size on disk:'
in
l
:
index_size
=
int
(
l
.
split
()[
-
1
])
elif
st
==
1
:
st
=
2
elif
st
==
2
:
fi
=
l
.
split
()
keys
.
append
(
fi
[
0
])
res
.
append
([
float
(
x
)
for
x
in
fi
[
1
:]])
return
indexkey
,
np
.
array
(
res
),
keys
,
stats
# run parsing
allres
=
{}
allstats
=
{}
nts
=
[]
missing
=
[]
versions
=
{}
fnames
=
keep_latest_stdout
(
os
.
listdir
(
logdir
))
# print fnames
# filenames are in the form <key>.x.stdout
# where x is a version number (from a to z)
# keep only latest version of each name
for
fname
in
fnames
:
if
not
(
'db'
+
db
in
fname
and
fname
.
endswith
(
'.stdout'
)):
continue
indexkey
,
res
,
_
,
stats
=
parse_result_file
(
logdir
+
fname
)
if
res
.
size
==
0
:
missing
.
append
(
fname
)
errorline
=
open
(
logdir
+
fname
.
replace
(
'.stdout'
,
'.stderr'
))
.
readlines
()
if
len
(
errorline
)
>
0
:
errorline
=
errorline
[
-
1
]
else
:
errorline
=
'NO STDERR'
print
fname
,
stats
[
'CHRONOS_JOB_INSTANCE_ID'
],
errorline
else
:
if
indexkey
in
allres
:
if
allstats
[
indexkey
][
'run_version'
]
>
stats
[
'run_version'
]:
# don't use this run
continue
n_threads
=
stats
.
get
(
'n_threads'
,
1
)
nts
.
append
(
n_threads
)
allres
[
indexkey
]
=
res
allstats
[
indexkey
]
=
stats
assert
len
(
set
(
nts
))
==
1
n_threads
=
nts
[
0
]
def
plot_tradeoffs
(
allres
,
code_size
,
recall_rank
):
dbsize
=
dbsize_from_name
(
db
)
recall_idx
=
int
(
np
.
log10
(
recall_rank
))
bigtab
=
[]
names
=
[]
for
k
,
v
in
sorted
(
allres
.
items
()):
if
v
.
ndim
!=
2
:
continue
us
=
unitsize
(
k
)
if
us
!=
code_size
:
continue
perf
=
v
[:,
recall_idx
]
times
=
v
[:,
3
]
bigtab
.
append
(
np
.
vstack
((
np
.
ones
(
times
.
size
,
dtype
=
int
)
*
len
(
names
),
perf
,
times
))
)
names
.
append
(
k
)
bigtab
=
np
.
hstack
(
bigtab
)
perm
=
np
.
argsort
(
bigtab
[
1
,
:])
bigtab
=
bigtab
[:,
perm
]
times
=
np
.
minimum
.
accumulate
(
bigtab
[
2
,
::
-
1
])[::
-
1
]
selection
=
np
.
where
(
bigtab
[
2
,
:]
==
times
)
selected_methods
=
[
names
[
i
]
for
i
in
np
.
unique
(
bigtab
[
0
,
selection
]
.
astype
(
int
))]
not_selected
=
list
(
set
(
names
)
-
set
(
selected_methods
))
print
"methods without an optimal OP: "
,
not_selected
nq
=
10000
pyplot
.
title
(
'database '
+
db
+
' code_size=
%
d'
%
code_size
)
# grayed out lines
for
k
in
not_selected
:
v
=
allres
[
k
]
if
v
.
ndim
!=
2
:
continue
us
=
unitsize
(
k
)
if
us
!=
code_size
:
continue
linestyle
=
(
':'
if
'PQ'
in
k
else
'-.'
if
'SQ4'
in
k
else
'--'
if
'SQ8'
in
k
else
'-'
)
pyplot
.
semilogy
(
v
[:,
recall_idx
],
v
[:,
3
],
label
=
None
,
linestyle
=
linestyle
,
marker
=
'o'
if
'HNSW'
in
k
else
'+'
,
color
=
'#cccccc'
,
linewidth
=
0.2
)
# important methods
for
k
in
selected_methods
:
v
=
allres
[
k
]
if
v
.
ndim
!=
2
:
continue
us
=
unitsize
(
k
)
if
us
!=
code_size
:
continue
stats
=
allstats
[
k
]
tot_size
=
stats
[
'index_size'
]
+
stats
[
'tables_size'
]
id_size
=
8
# 64 bit
addt
=
''
if
'add_time'
in
stats
:
add_time
=
stats
[
'add_time'
]
if
add_time
>
7200
:
add_min
=
add_time
/
60
addt
=
',
%
dh
%02
d'
%
(
add_min
/
60
,
add_min
%
60
)
else
:
add_sec
=
int
(
add_time
)
addt
=
',
%
dm
%02
d'
%
(
add_sec
/
60
,
add_sec
%
60
)
label
=
k
+
' (size+
%.1
f
%%%
s)'
%
(
tot_size
/
float
((
code_size
+
id_size
)
*
dbsize
)
*
100
-
100
,
addt
)
linestyle
=
(
':'
if
'PQ'
in
k
else
'-.'
if
'SQ4'
in
k
else
'--'
if
'SQ8'
in
k
else
'-'
)
pyplot
.
semilogy
(
v
[:,
recall_idx
],
v
[:,
3
],
label
=
label
,
linestyle
=
linestyle
,
marker
=
'o'
if
'HNSW'
in
k
else
'+'
)
if
len
(
not_selected
)
==
0
:
om
=
''
else
:
om
=
'
\n
omitted:'
nc
=
len
(
om
)
for
m
in
not_selected
:
if
nc
>
80
:
om
+=
'
\n
'
nc
=
0
om
+=
' '
+
m
nc
+=
len
(
m
)
+
1
pyplot
.
xlabel
(
'1-recall at
%
d
%
s'
%
(
recall_rank
,
om
)
)
pyplot
.
ylabel
(
'search time per query (ms,
%
d threads)'
%
n_threads
)
pyplot
.
legend
()
pyplot
.
grid
()
pyplot
.
savefig
(
'figs/tradeoffs_
%
s_cs
%
d_r
%
d.png'
%
(
db
,
code_size
,
recall_rank
))
return
selected_methods
,
not_selected
pyplot
.
gcf
()
.
set_size_inches
(
15
,
10
)
plot_tradeoffs
(
allres
,
code_size
=
code_size
,
recall_rank
=
1
)
benchs/bench_all_ivf/run_on_cluster_generic.bash
0 → 100644
View file @
daf589d9
# Copyright (c) 2015-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the BSD+Patents license found in the
# LICENSE file in the root directory of this source tree.
# @nolint
# This script launches the experiments on a cluster
# It assumes two shell functions are defined:
#
# run_on_1machine: runs a command on one (full) machine on a cluster
#
# run_on_8gpu: runs a command on one machine with 8 GPUs
#
# the two functions are called as:
#
# run_on_1machine <name> <command>
#
# the stdout of the command should be stored in $logdir/<name>.stdout
function
run_on_1machine
()
{
# To be implemented
}
function
run_on_1machine
()
{
# To be implemented
}
# prepare output directories
basedir
=
/mnt/vol/gfsai-east/ai-group/users/matthijs/bench_all_ivf
logdir
=
$basedir
/logs
indexdir
=
$basedir
/indexes
mkdir
-p
$lars
$logdir
$indexdir
############################### 1M experiments
for
db
in
sift1M deep1M bigann1M
;
do
for
coarse
in
IMI2x9 IMI2x10 IVF1024_HNSW32 IVF4096_HNSW32 IVF16384_HNSW32
do
for
indexkey
in
\
OPQ8_64,
$coarse
,PQ8
\
PCAR16,
$coarse
,SQ4
\
OPQ16_64,
$coarse
,PQ16
\
PCAR32,
$coarse
,SQ4
\
PCAR16,
$coarse
,SQ8
\
OPQ32_128,
$coarse
,PQ32
\
PCAR64,
$coarse
,SQ4
\
PCAR32,
$coarse
,SQ8
\
PCAR16,
$coarse
,SQfp16
\
PCAR64,
$coarse
,SQ8
\
PCAR32,
$coarse
,SQfp16
\
PCAR128,
$coarse
,SQ4
do
key
=
autotune.db
$db
.
${
indexkey
//,/_
}
run_on_1machine
$key
\
python
-u
bench_all_ivf.py
\
--db
$db
\
--indexkey
$indexkey
\
--maxtrain
0
\
--indexfile
$indexdir
/
$key
.faissindex
done
done
done
############################### 10M experiments
for
db
in
deep10M bigann10M
;
do
for
coarse
in
\
IMI2x10 IMI2x11 IMI2x12 IMI2x13 IVF4096_HNSW32
\
IVF16384_HNSW32 IVF65536_HNSW32 IVF262144_HNSW32
do
for
indexkey
in
\
OPQ8_64,
$coarse
,PQ8
\
PCAR16,
$coarse
,SQ4
\
OPQ16_64,
$coarse
,PQ16
\
PCAR32,
$coarse
,SQ4
\
PCAR16,
$coarse
,SQ8
\
OPQ32_128,
$coarse
,PQ32
\
PCAR64,
$coarse
,SQ4
\
PCAR32,
$coarse
,SQ8
\
PCAR16,
$coarse
,SQfp16
\
PCAR64,
$coarse
,SQ8
\
PCAR32,
$coarse
,SQfp16
\
PCAR128,
$coarse
,SQ4
\
OPQ64_128,
$coarse
,PQ64
do
key
=
autotune.db
$db
.
${
indexkey
//,/_
}
run_on_1machine
$key
\
python
-u
bench_all_ivf.py
\
--db
$db
\
--indexkey
$indexkey
\
--maxtrain
0
\
--indexfile
$indexdir
/
$key
.faissindex
\
--searchthreads
16
\
--min_test_duration
3
\
done
done
done
############################### 100M experiments
for
db
in
deep100M bigann100M
;
do
for
coarse
in
IMI2x11 IMI2x12 IVF65536_HNSW32 IVF262144_HNSW32
do
for
indexkey
in
\
OPQ8_64,
$coarse
,PQ8
\
OPQ16_64,
$coarse
,PQ16
\
PCAR32,
$coarse
,SQ4
\
OPQ32_128,
$coarse
,PQ32
\
PCAR64,
$coarse
,SQ4
\
PCAR32,
$coarse
,SQ8
\
PCAR64,
$coarse
,SQ8
\
PCAR32,
$coarse
,SQfp16
\
PCAR128,
$coarse
,SQ4
\
OPQ64_128,
$coarse
,PQ64
do
key
=
autotune.db
$db
.
${
indexkey
//,/_
}
run_on_1machine
$key
\
python
-u
bench_all_ivf.py
\
--db
$db
\
--indexkey
$indexkey
\
--maxtrain
0
\
--indexfile
$indexdir
/
$key
.faissindex
\
--searchthreads
16
\
--min_test_duration
3
\
--add_bs
1000000
done
done
done
############################### 1B experiments
for
db
in
deep1B bigann1B
;
do
for
coarse
in
IMI2x12 IMI2x13 IVF262144_HNSW32
do
for
indexkey
in
\
OPQ8_64,
$coarse
,PQ8
\
OPQ16_64,
$coarse
,PQ16
\
PCAR32,
$coarse
,SQ4
\
OPQ32_128,
$coarse
,PQ32
\
PCAR64,
$coarse
,SQ4
\
PCAR32,
$coarse
,SQ8
\
PCAR64,
$coarse
,SQ8
\
PCAR32,
$coarse
,SQfp16
\
PCAR128,
$coarse
,SQ4
\
PQ64_128,
$coarse
,PQ64
\
RR128,
$coarse
,SQ4
do
key
=
autotune.db
$db
.
${
indexkey
//,/_
}
run_on_1machine
$key
\
python
-u
bench_all_ivf.py
\
--db
$db
\
--indexkey
$indexkey
\
--maxtrain
0
\
--indexfile
$indexdir
/
$key
.faissindex
\
--searchthreads
16
\
--min_test_duration
3
\
--add_bs
1000000
done
done
done
############################################
# precompute centroids on GPU for large vocabularies
for
db
in
deep1M bigann1M
;
do
for
ncent
in
1048576 4194304
;
do
key
=
clustering.db
$db
.IVF
$ncent
run_on_8gpu
$key
\
python
-u
bench_all_ivf.py
\
--db
$db
\
--indexkey
IVF
$ncent
,SQ8
\
--maxtrain
100000000
\
--indexfile
$indexdir
/
$key
.faissindex
\
--searchthreads
16
\
--min_test_duration
3
\
--add_bs
1000000
\
--train_on_gpu
done
done
#################################
# Run actual experiment
for
db
in
deep1B bigann1B
;
do
for
ncent
in
1048576 4194304
;
do
coarse
=
IVF
${
ncent
}
_HNSW32
centroidsname
=
clustering.db
${
db
/1B/1M
}
.IVF
${
ncent
}
.faissindex
for
indexkey
in
\
OPQ8_64,
$coarse
,PQ8
\
OPQ16_64,
$coarse
,PQ16
\
PCAR32,
$coarse
,SQ4
\
OPQ32_128,
$coarse
,PQ32
\
PCAR64,
$coarse
,SQ4
\
PCAR32,
$coarse
,SQ8
\
PCAR64,
$coarse
,SQ8
\
PCAR32,
$coarse
,SQfp16
\
OPQ64_128,
$coarse
,PQ64
\
RR128,
$coarse
,SQ4
\
OPQ64_128,
$coarse
,PQ64
\
RR128,
$coarse
,SQ4
do
key
=
autotune.db
$db
.
${
indexkey
//,/_
}
run_on_1machine
$key
.c
$key
\
python
-u
bench_all_ivf.py
\
--db
$db
\
--indexkey
$indexkey
\
--maxtrain
256000
\
--indexfile
$indexdir
/
$key
.faissindex
\
--get_centroids_from
$indexdir
/
$centroidsname
\
--searchthreads
16
\
--min_test_duration
3
\
--add_bs
1000000
done
done
done
benchs/link_and_code/README.md
View file @
daf589d9
...
...
@@ -11,14 +11,12 @@ Link & code is an indexing method that combines HNSW indexing with
compression and exploits the neighborhood structure of the similarity
graph to improve the reconstruction. It is described in
```
@inproceedings{link_and_code,
author = {Matthijs Douze and Alexandre Sablayrolles and Herv
\'
e J
\'
egou},
title = {Link and code: Fast indexing with graphs and compact regression codes},
booktitle = {CVPR},
year = {2018}
}
```
ArXiV
[
here
](
https://arxiv.org/abs/1804.09996
)
...
...
python/faiss.py
View file @
daf589d9
...
...
@@ -18,14 +18,14 @@ import pdb
# we import * so that the symbol X can be accessed as faiss.X
try
:
from
.
swigfaiss_gpu
import
*
from
swigfaiss_gpu
import
*
except
ImportError
as
e
:
if
'No module named'
not
in
e
.
args
[
0
]:
# swigfaiss_gpu is there but failed to load: Warn user about it.
sys
.
stderr
.
write
(
"Failed to load GPU Faiss:
%
s
\n
"
%
e
.
args
[
0
])
sys
.
stderr
.
write
(
"Faiss falling back to CPU-only.
\n
"
)
from
.
swigfaiss
import
*
from
swigfaiss
import
*
__version__
=
"
%
d.
%
d.
%
d"
%
(
FAISS_VERSION_MAJOR
,
FAISS_VERSION_MINOR
,
...
...
tests/test_index.py
View file @
daf589d9
...
...
@@ -16,7 +16,7 @@ import os
import
re
from
common
import
get_dataset
,
get_dataset_2
from
.
common
import
get_dataset
,
get_dataset_2
class
TestModuleInterface
(
unittest
.
TestCase
):
...
...
tests/test_index_accuracy.py
View file @
daf589d9
...
...
@@ -26,7 +26,7 @@ kprobe = int(np.sqrt(ncentroids))
nbits
=
d
# Parameters for indexes involving PQ
M
=
d
/
8
# for PQ: #subquantizers
M
=
int
(
d
/
8
)
# for PQ: #subquantizers
nbits_per_index
=
8
# for PQ
...
...
@@ -126,7 +126,6 @@ class IndexAccuracy(unittest.TestCase):
stats
=
faiss
.
cvar
.
indexPQ_stats
stats
.
reset
()
res
=
ev
.
launch
(
'Polysemous ht=
%
d'
%
index
.
polysemous_ht
,
index
)
e_polysemous
=
ev
.
evalres
(
res
)
...
...
@@ -249,7 +248,7 @@ class TestSQFlavors(unittest.TestCase):
D
,
I
=
index
.
search
(
xq
,
10
)
ninter
=
faiss
.
eval_intersection
(
I
,
gt_I
)
print
(
'(
%
d,
%
s):
%
d, '
%
(
mt
,
repr
(
qname
),
ninter
))
assert
ninter
>=
self
.
ref_results
[(
mt
,
qname
)]
-
4
assert
abs
(
ninter
-
self
.
ref_results
[(
mt
,
qname
)])
<=
4
D2
,
I2
=
self
.
subtest_add2col
(
xb
,
xq
,
index
,
qname
)
...
...
@@ -265,10 +264,10 @@ class TestSQFlavors(unittest.TestCase):
class
TestPQFlavors
(
unittest
.
TestCase
):
# run on
Sept 6
, 2018
# run on
Dec 14
, 2018
ref_results
=
{
(
1
,
True
):
800
,
(
1
,
True
,
20
):
7
42
,
(
1
,
True
,
20
):
7
94
,
(
1
,
False
):
769
,
(
0
,
True
):
831
,
(
0
,
True
,
20
):
828
,
...
...
@@ -312,7 +311,7 @@ class TestPQFlavors(unittest.TestCase):
ninter
=
faiss
.
eval_intersection
(
I
,
gt_I
)
print
(
'(
%
d,
%
s):
%
d, '
%
(
mt
,
by_residual
,
ninter
))
assert
ninter
>=
self
.
ref_results
[
mt
,
by_residual
]
-
2
assert
abs
(
ninter
-
self
.
ref_results
[
mt
,
by_residual
])
<=
2
index
.
use_precomputed_table
=
0
D2
,
I2
=
index
.
search
(
xq
,
10
)
...
...
@@ -412,8 +411,7 @@ class OPQRelativeAccuracy(unittest.TestCase):
e_oivfpq
=
ev
.
evalres
(
res
)
# verify same on OIVFPQ
# Currently disabled because flaky.
# self.assertGreater(e_oivfpq[1], e_ivfpq[1])
assert
(
e_oivfpq
[
1
]
>
e_ivfpq
[
1
])
if
__name__
==
'__main__'
:
...
...
tests/test_lowlevel_ivf.cpp
View file @
daf589d9
...
...
@@ -379,7 +379,7 @@ void test_lowlevel_access_binary (const char *index_key) {
printf
(
"]
\n
"
);
// re-order heap
heap_reorder
<
CMax
<
int32_t
,
i
dx
_t
>
>
(
k
,
D
.
data
(),
I
.
data
());
heap_reorder
<
CMax
<
int32_t
,
i
nt64
_t
>
>
(
k
,
D
.
data
(),
I
.
data
());
printf
(
"ref: ["
);
for
(
int
j
=
0
;
j
<
k
;
j
++
)
...
...
tests/test_sliding_ivf.cpp
View file @
daf589d9
...
...
@@ -21,10 +21,6 @@
using
namespace
faiss
;
namespace
{
typedef
Index
::
idx_t
idx_t
;
...
...
@@ -220,7 +216,7 @@ int test_sliding_invlists (const char *index_key) {
}
}
// namespace
/*************************************************************
...
...
tests/test_transfer_invlists.cpp
View file @
daf589d9
...
...
@@ -8,7 +8,6 @@
#include <cstdio>
#include <cstdlib>
#include <memory>
#include <gtest/gtest.h>
...
...
@@ -21,11 +20,6 @@
#include <faiss/IVFlib.h>
using
namespace
faiss
;
namespace
{
// parameters to use for the test
int
d
=
64
;
size_t
nb
=
1000
;
...
...
@@ -34,6 +28,8 @@ size_t nt = 500;
int
k
=
10
;
int
nlist
=
40
;
using
namespace
faiss
;
typedef
faiss
::
Index
::
idx_t
idx_t
;
...
...
@@ -43,6 +39,10 @@ std::vector<float> get_data (size_t nb, int seed) {
return
x
;
}
void
test_index_type
(
const
char
*
factory_string
)
{
// transfer inverted lists in nslice slices
...
...
@@ -147,9 +147,6 @@ void test_index_type(const char *factory_string) {
}
}
// namespace
TEST
(
TRANS
,
IVFFlat
)
{
test_index_type
(
"IVF40,Flat"
);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment