Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
faiss
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
faiss
Commits
00c288c0
Commit
00c288c0
authored
Feb 24, 2017
by
matthijs
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added benchmarking scripts
parent
a67190c6
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
223 additions
and
200 deletions
+223
-200
Clustering.cpp
Clustering.cpp
+2
-2
Makefile
Makefile
+4
-0
MetaIndexes.cpp
MetaIndexes.cpp
+54
-18
makefile.inc.Mac.brew
example_makefiles/makefile.inc.Mac.brew
+0
-6
makefile.inc.Mac.port
example_makefiles/makefile.inc.Mac.port
+0
-6
faiss.py
faiss.py
+36
-1
IVFBase.cu
gpu/impl/IVFBase.cu
+1
-0
IVFPQ.cu
gpu/impl/IVFPQ.cu
+1
-0
PQCodeDistances.cu
gpu/impl/PQCodeDistances.cu
+1
-1
PQCodeLoad.cuh
gpu/impl/PQCodeLoad.cuh
+72
-88
PQScanMultiPassNoPrecomputed.cu
gpu/impl/PQScanMultiPassNoPrecomputed.cu
+3
-0
PQScanMultiPassPrecomputed.cu
gpu/impl/PQScanMultiPassPrecomputed.cu
+3
-0
TestGpuIndexIVFPQ.cpp
gpu/test/TestGpuIndexIVFPQ.cpp
+7
-3
Float16.cu
gpu/utils/Float16.cu
+4
-60
swigfaiss_gpu_wrap.cxx
python/swigfaiss_gpu_wrap.cxx
+0
-0
swigfaiss_wrap.cxx
python/swigfaiss_wrap.cxx
+0
-0
test_build_blocks.py
python/test_build_blocks.py
+19
-0
utils.cpp
utils.cpp
+16
-15
No files found.
Clustering.cpp
View file @
00c288c0
...
...
@@ -166,8 +166,8 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) {
assign
,
d
,
k
,
nx
);
if
(
verbose
)
{
printf
(
" Iteration %d (%
5g s, search %5g
s): "
"objective=%g imbalance=%
g
nsplit=%d
\r
"
,
printf
(
" Iteration %d (%
.2f s, search %.2f
s): "
"objective=%g imbalance=%
.3f
nsplit=%d
\r
"
,
i
,
(
getmillisecs
()
-
t0
)
/
1000.0
,
t_search_tot
/
1000
,
err
,
imbalance_factor
(
nx
,
k
,
assign
),
...
...
Makefile
View file @
00c288c0
...
...
@@ -7,15 +7,19 @@
.SUFFIXES
:
.cpp .o
MAKEFILE_INC
=
makefile.inc
-include
$(MAKEFILE_INC)
LIBNAME
=
libfaiss
all
:
.env_ok $(LIBNAME).a tests/demo_ivfpq_indexing
py
:
_swigfaiss.so
#############################
# Various
...
...
MetaIndexes.cpp
View file @
00c288c0
...
...
@@ -211,6 +211,7 @@ void translate_labels (long n, idx_t *labels, long translation)
* @param all_labels idem
* @param translartions label translations to apply, size nshard
*/
template
<
class
C
>
void
merge_tables
(
long
n
,
long
k
,
long
nshard
,
float
*
distances
,
idx_t
*
labels
,
...
...
@@ -218,22 +219,55 @@ void merge_tables (long n, long k, long nshard,
idx_t
*
all_labels
,
const
long
*
translations
)
{
long
shard_stride
=
n
*
k
;
#pragma omp parallel for
for
(
long
i
=
0
;
i
<
n
;
i
++
)
{
float
*
D
=
distances
+
i
*
k
;
idx_t
*
I
=
labels
+
i
*
k
;
const
float
*
Ds
=
all_distances
+
i
*
k
;
idx_t
*
Is
=
all_labels
+
i
*
k
;
translate_labels
(
k
,
Is
,
translations
[
0
]);
heap_heapify
<
C
>
(
k
,
D
,
I
,
Ds
,
Is
,
k
);
for
(
int
s
=
1
;
s
<
nshard
;
s
++
)
{
Ds
+=
shard_stride
;
Is
+=
shard_stride
;
translate_labels
(
k
,
Is
,
translations
[
s
]);
heap_addn
<
C
>
(
k
,
D
,
I
,
Ds
,
Is
,
k
);
if
(
k
==
0
)
{
return
;
}
long
stride
=
n
*
k
;
#pragma omp parallel
{
std
::
vector
<
int
>
buf
(
2
*
nshard
);
int
*
pointer
=
buf
.
data
();
int
*
shard_ids
=
pointer
+
nshard
;
std
::
vector
<
float
>
buf2
(
nshard
);
float
*
heap_vals
=
buf2
.
data
();
#pragma omp for
for
(
long
i
=
0
;
i
<
n
;
i
++
)
{
// the heap maps values to the shard where they are
// produced.
const
float
*
D_in
=
all_distances
+
i
*
k
;
const
idx_t
*
I_in
=
all_labels
+
i
*
k
;
int
heap_size
=
0
;
for
(
long
s
=
0
;
s
<
nshard
;
s
++
)
{
pointer
[
s
]
=
0
;
if
(
I_in
[
stride
*
s
]
>=
0
)
heap_push
<
C
>
(
++
heap_size
,
heap_vals
,
shard_ids
,
D_in
[
stride
*
s
],
s
);
}
float
*
D
=
distances
+
i
*
k
;
idx_t
*
I
=
labels
+
i
*
k
;
for
(
int
j
=
0
;
j
<
k
;
j
++
)
{
if
(
heap_size
==
0
)
{
I
[
j
]
=
-
1
;
D
[
j
]
=
C
::
neutral
();
}
else
{
// pop best element
int
s
=
shard_ids
[
0
];
int
&
p
=
pointer
[
s
];
D
[
j
]
=
heap_vals
[
0
];
I
[
j
]
=
I_in
[
stride
*
s
+
p
]
+
translations
[
s
];
heap_pop
<
C
>
(
heap_size
--
,
heap_vals
,
shard_ids
);
p
++
;
if
(
p
<
k
&&
I_in
[
stride
*
s
+
p
]
>=
0
)
heap_push
<
C
>
(
++
heap_size
,
heap_vals
,
shard_ids
,
D_in
[
stride
*
s
+
p
],
s
);
}
}
}
heap_reorder
<
C
>
(
k
,
D
,
I
);
}
}
...
...
@@ -360,6 +394,9 @@ void IndexShards::add_with_ids (idx_t n, const float * x, const long *xids)
}
void
IndexShards
::
reset
()
{
for
(
int
i
=
0
;
i
<
shard_indexes
.
size
();
i
++
)
{
...
...
@@ -433,15 +470,14 @@ void IndexShards::search (
}
if
(
metric_type
==
METRIC_L2
)
{
merge_tables
<
CM
ax
<
float
,
idx_
t
>
>
(
merge_tables
<
CM
in
<
float
,
in
t
>
>
(
n
,
k
,
nshard
,
distances
,
labels
,
all_distances
,
all_labels
,
translations
.
data
());
}
else
{
merge_tables
<
CM
in
<
float
,
idx_
t
>
>
(
merge_tables
<
CM
ax
<
float
,
in
t
>
>
(
n
,
k
,
nshard
,
distances
,
labels
,
all_distances
,
all_labels
,
translations
.
data
());
}
delete
[]
all_distances
;
delete
[]
all_labels
;
}
...
...
example_makefiles/makefile.inc.Mac.brew
View file @
00c288c0
# Copyright (c) 2015-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the CC-by-NC license found in the
# LICENSE file in the root directory of this source tree.
# -*- makefile -*-
# Tested on macOS Sierra (10.12.2) with llvm installed using Homebrew (https://brew.sh)
...
...
example_makefiles/makefile.inc.Mac.port
View file @
00c288c0
# Copyright (c) 2015-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the CC-by-NC license found in the
# LICENSE file in the root directory of this source tree.
# -*- makefile -*-
# tested on Mac OS X 10.12.2 Sierra with additional software installed via macports
...
...
faiss.py
View file @
00c288c0
...
...
@@ -15,6 +15,9 @@ import types
import
sys
import
pdb
# we import * so that the symbol X can be accessed as faiss.X
try
:
from
swigfaiss_gpu
import
*
except
ImportError
as
e
:
...
...
@@ -29,6 +32,9 @@ except ImportError as e:
##################################################################
# The functions below add or replace some methods for classes
# this is to be able to pass in numpy arrays directly
# The C++ version of the classnames will be suffixed with _c
##################################################################
def
replace_method
(
the_class
,
name
,
replacement
):
...
...
@@ -49,6 +55,32 @@ def handle_Clustering():
handle_Clustering
()
def
handle_ProductQuantizer
():
def
replacement_train
(
self
,
x
):
n
,
d
=
x
.
shape
assert
d
==
self
.
d
self
.
train_c
(
n
,
swig_ptr
(
x
))
def
replacement_compute_codes
(
self
,
x
):
n
,
d
=
x
.
shape
assert
d
==
self
.
d
codes
=
np
.
empty
((
n
,
self
.
code_size
),
dtype
=
'uint8'
)
self
.
compute_codes_c
(
swig_ptr
(
x
),
swig_ptr
(
codes
),
n
)
return
codes
def
replacement_decode
(
self
,
codes
):
n
,
cs
=
codes
.
shape
assert
cs
==
self
.
code_size
x
=
np
.
empty
((
n
,
self
.
d
),
dtype
=
'float32'
)
self
.
decode_c
(
swig_ptr
(
codes
),
swig_ptr
(
x
),
n
)
return
x
replace_method
(
ProductQuantizer
,
'train'
,
replacement_train
)
replace_method
(
ProductQuantizer
,
'compute_codes'
,
replacement_compute_codes
)
replace_method
(
ProductQuantizer
,
'decode'
,
replacement_decode
)
handle_ProductQuantizer
()
def
handle_Index
(
the_class
):
...
...
@@ -153,6 +185,8 @@ for symbol in dir(this_module):
handle_ParameterSpace
(
the_class
)
def
vector_float_to_array
(
v
):
a
=
np
.
empty
(
v
.
size
(),
dtype
=
'float32'
)
memcpy
(
swig_ptr
(
a
),
v
.
data
(),
4
*
v
.
size
())
...
...
@@ -241,7 +275,8 @@ def eval_intersection(I1, I2):
k1
,
k2
=
I1
.
shape
[
1
],
I2
.
shape
[
1
]
ninter
=
0
for
i
in
range
(
n
):
ninter
+=
ranklist_intersection_size
(
k1
,
swig_ptr
(
I1
[
i
]),
k2
,
swig_ptr
(
I2
[
i
]))
ninter
+=
ranklist_intersection_size
(
k1
,
swig_ptr
(
I1
[
i
]),
k2
,
swig_ptr
(
I2
[
i
]))
return
ninter
...
...
gpu/impl/IVFBase.cu
View file @
00c288c0
...
...
@@ -77,6 +77,7 @@ IVFBase::reset() {
deviceListDataPointers_.clear();
deviceListIndexPointers_.clear();
deviceListLengths_.clear();
listOffsetToUserIndex_.clear();
for (size_t i = 0; i < numLists_; ++i) {
deviceListData_.emplace_back(
...
...
gpu/impl/IVFPQ.cu
View file @
00c288c0
...
...
@@ -85,6 +85,7 @@ IVFPQ::isSupportedPQCodeLength(int size) {
case 48:
case 56: // only supported with float16
case 64: // only supported with float16
case 96: // only supported with float16
return true;
default:
return false;
...
...
gpu/impl/PQCodeDistances.cu
View file @
00c288c0
...
...
@@ -485,7 +485,7 @@ runPQCodeDistances(Tensor<float, 3, true>& pqCentroids,
CODE_DISTANCE(10);
break;
case 12:
CODE_DISTANCE(1
0
);
CODE_DISTANCE(1
2
);
break;
case 16:
CODE_DISTANCE(16);
...
...
gpu/impl/PQCodeLoad.cuh
View file @
00c288c0
...
...
@@ -174,20 +174,14 @@ struct LoadCode32<24> {
unsigned char* p,
int offset) {
p += offset * 24;
// FIXME: this is a non-coalesced, unaligned,
non
-vectorized load
// FIXME: this is a non-coalesced, unaligned,
2
-vectorized load
// unfortunately need to reorganize memory layout by warp
asm("ld.global.cs.nc.u32 {%0}, [%1 + 0];" :
"=r"(code32[0]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 4];" :
"=r"(code32[1]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 8];" :
"=r"(code32[2]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 12];" :
"=r"(code32[3]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 16];" :
"=r"(code32[4]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 20];" :
"=r"(code32[5]) : "l"(p));
asm("ld.global.cs.nc.v2.u32 {%0, %1}, [%2 + 0];" :
"=r"(code32[0]), "=r"(code32[1]) : "l"(p));
asm("ld.global.cs.nc.v2.u32 {%0, %1}, [%2 + 8];" :
"=r"(code32[2]), "=r"(code32[3]) : "l"(p));
asm("ld.global.cs.nc.v2.u32 {%0, %1}, [%2 + 16];" :
"=r"(code32[4]), "=r"(code32[5]) : "l"(p));
}
};
...
...
@@ -239,28 +233,18 @@ struct LoadCode32<40> {
unsigned char* p,
int offset) {
p += offset * 40;
// FIXME: this is a non-coalesced, unaligned,
non
-vectorized load
// FIXME: this is a non-coalesced, unaligned,
2
-vectorized load
// unfortunately need to reorganize memory layout by warp
asm("ld.global.cs.nc.u32 {%0}, [%1 + 0];" :
"=r"(code32[0]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 4];" :
"=r"(code32[1]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 8];" :
"=r"(code32[2]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 12];" :
"=r"(code32[3]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 16];" :
"=r"(code32[4]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 20];" :
"=r"(code32[5]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 24];" :
"=r"(code32[6]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 28];" :
"=r"(code32[7]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 32];" :
"=r"(code32[8]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 36];" :
"=r"(code32[9]) : "l"(p));
asm("ld.global.cs.nc.v2.u32 {%0, %1}, [%2 + 0];" :
"=r"(code32[0]), "=r"(code32[1]) : "l"(p));
asm("ld.global.cs.nc.v2.u32 {%0, %1}, [%2 + 8];" :
"=r"(code32[2]), "=r"(code32[3]) : "l"(p));
asm("ld.global.cs.nc.v2.u32 {%0, %1}, [%2 + 16];" :
"=r"(code32[4]), "=r"(code32[5]) : "l"(p));
asm("ld.global.cs.nc.v2.u32 {%0, %1}, [%2 + 24];" :
"=r"(code32[6]), "=r"(code32[7]) : "l"(p));
asm("ld.global.cs.nc.v2.u32 {%0, %1}, [%2 + 32];" :
"=r"(code32[8]), "=r"(code32[9]) : "l"(p));
}
};
...
...
@@ -270,32 +254,17 @@ struct LoadCode32<48> {
unsigned char* p,
int offset) {
p += offset * 48;
// FIXME: this is a non-coalesced
, unaligned, non-vectorized
load
// FIXME: this is a non-coalesced load
// unfortunately need to reorganize memory layout by warp
asm("ld.global.cs.nc.u32 {%0}, [%1 + 0];" :
"=r"(code32[0]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 4];" :
"=r"(code32[1]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 8];" :
"=r"(code32[2]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 12];" :
"=r"(code32[3]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 16];" :
"=r"(code32[4]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 20];" :
"=r"(code32[5]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 24];" :
"=r"(code32[6]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 28];" :
"=r"(code32[7]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 32];" :
"=r"(code32[8]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 36];" :
"=r"(code32[9]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 40];" :
"=r"(code32[10]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 44];" :
"=r"(code32[11]) : "l"(p));
asm("ld.global.cs.nc.v4.u32 {%0, %1, %2, %3}, [%4];" :
"=r"(code32[0]), "=r"(code32[1]),
"=r"(code32[2]), "=r"(code32[3]) : "l"(p));
asm("ld.global.cs.nc.v4.u32 {%0, %1, %2, %3}, [%4 + 16];" :
"=r"(code32[4]), "=r"(code32[5]),
"=r"(code32[6]), "=r"(code32[7]) : "l"(p));
asm("ld.global.cs.nc.v4.u32 {%0, %1, %2, %3}, [%4 + 32];" :
"=r"(code32[8]), "=r"(code32[9]),
"=r"(code32[10]), "=r"(code32[11]) : "l"(p));
}
};
...
...
@@ -305,36 +274,22 @@ struct LoadCode32<56> {
unsigned char* p,
int offset) {
p += offset * 56;
// FIXME: this is a non-coalesced, unaligned,
non
-vectorized load
// FIXME: this is a non-coalesced, unaligned,
2
-vectorized load
// unfortunately need to reorganize memory layout by warp
asm("ld.global.cs.nc.u32 {%0}, [%1 + 0];" :
"=r"(code32[0]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 4];" :
"=r"(code32[1]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 8];" :
"=r"(code32[2]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 12];" :
"=r"(code32[3]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 16];" :
"=r"(code32[4]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 20];" :
"=r"(code32[5]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 24];" :
"=r"(code32[6]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 28];" :
"=r"(code32[7]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 32];" :
"=r"(code32[8]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 36];" :
"=r"(code32[9]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 40];" :
"=r"(code32[10]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 44];" :
"=r"(code32[11]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 48];" :
"=r"(code32[12]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 52];" :
"=r"(code32[13]) : "l"(p));
asm("ld.global.cs.nc.v2.u32 {%0, %1}, [%2 + 0];" :
"=r"(code32[0]), "=r"(code32[1]) : "l"(p));
asm("ld.global.cs.nc.v2.u32 {%0, %1}, [%2 + 8];" :
"=r"(code32[2]), "=r"(code32[3]) : "l"(p));
asm("ld.global.cs.nc.v2.u32 {%0, %1}, [%2 + 16];" :
"=r"(code32[4]), "=r"(code32[5]) : "l"(p));
asm("ld.global.cs.nc.v2.u32 {%0, %1}, [%2 + 24];" :
"=r"(code32[6]), "=r"(code32[7]) : "l"(p));
asm("ld.global.cs.nc.v2.u32 {%0, %1}, [%2 + 32];" :
"=r"(code32[8]), "=r"(code32[9]) : "l"(p));
asm("ld.global.cs.nc.v2.u32 {%0, %1}, [%2 + 40];" :
"=r"(code32[10]), "=r"(code32[11]) : "l"(p));
asm("ld.global.cs.nc.v2.u32 {%0, %1}, [%2 + 48];" :
"=r"(code32[12]), "=r"(code32[13]) : "l"(p));
}
};
...
...
@@ -361,5 +316,34 @@ struct LoadCode32<64> {
}
};
template<>
struct LoadCode32<96> {
static inline __device__ void load(unsigned int code32[24],
unsigned char* p,
int offset) {
p += offset * 96;
// FIXME: this is a non-coalesced load
// unfortunately need to reorganize memory layout by warp
asm("ld.global.cs.nc.v4.u32 {%0, %1, %2, %3}, [%4];" :
"=r"(code32[0]), "=r"(code32[1]),
"=r"(code32[2]), "=r"(code32[3]) : "l"(p));
asm("ld.global.cs.nc.v4.u32 {%0, %1, %2, %3}, [%4 + 16];" :
"=r"(code32[4]), "=r"(code32[5]),
"=r"(code32[6]), "=r"(code32[7]) : "l"(p));
asm("ld.global.cs.nc.v4.u32 {%0, %1, %2, %3}, [%4 + 32];" :
"=r"(code32[8]), "=r"(code32[9]),
"=r"(code32[10]), "=r"(code32[11]) : "l"(p));
asm("ld.global.cs.nc.v4.u32 {%0, %1, %2, %3}, [%4 + 48];" :
"=r"(code32[12]), "=r"(code32[13]),
"=r"(code32[14]), "=r"(code32[15]) : "l"(p));
asm("ld.global.cs.nc.v4.u32 {%0, %1, %2, %3}, [%4 + 64];" :
"=r"(code32[16]), "=r"(code32[17]),
"=r"(code32[18]), "=r"(code32[19]) : "l"(p));
asm("ld.global.cs.nc.v4.u32 {%0, %1, %2, %3}, [%4 + 80];" :
"=r"(code32[20]), "=r"(code32[21]),
"=r"(code32[22]), "=r"(code32[23]) : "l"(p));
}
};
} } // namespace
gpu/impl/PQScanMultiPassNoPrecomputed.cu
View file @
00c288c0
...
...
@@ -358,6 +358,9 @@ runMultiPassTile(Tensor<float, 2, true>& queries,
case 64:
RUN_PQ(64);
break;
case 96:
RUN_PQ(96);
break;
default:
FAISS_ASSERT(false);
break;
...
...
gpu/impl/PQScanMultiPassPrecomputed.cu
View file @
00c288c0
...
...
@@ -343,6 +343,9 @@ runMultiPassTile(Tensor<float, 2, true>& queries,
case 64:
RUN_PQ(64);
break;
case 96:
RUN_PQ(96);
break;
default:
FAISS_ASSERT(false);
break;
...
...
gpu/test/TestGpuIndexIVFPQ.cpp
View file @
00c288c0
...
...
@@ -22,7 +22,11 @@
void
pickEncoding
(
int
&
codes
,
int
&
dim
)
{
std
::
vector
<
int
>
codeSizes
{
3
,
4
,
8
,
12
,
16
,
20
,
24
,
28
,
32
,
40
,
48
,
56
,
64
};
std
::
vector
<
int
>
codeSizes
{
3
,
4
,
8
,
12
,
16
,
20
,
24
,
28
,
32
,
40
,
48
,
56
,
64
,
96
};
std
::
vector
<
int
>
dimSizes
{
4
,
8
,
16
,
32
};
codes
=
codeSizes
[
faiss
::
gpu
::
randVal
(
0
,
codeSizes
.
size
()
-
1
)];
...
...
@@ -55,8 +59,8 @@ struct Options {
faiss
::
gpu
::
INDICES_CPU
,
faiss
::
gpu
::
INDICES_32_BIT
,
faiss
::
gpu
::
INDICES_64_BIT
});
if
(
codes
==
64
)
{
//
64x8 lookup
can only fit using float16
if
(
codes
>
48
)
{
//
large codes
can only fit using float16
useFloat16
=
true
;
}
else
{
useFloat16
=
faiss
::
gpu
::
randBool
();
...
...
gpu/utils/Float16.cu
View file @
00c288c0
...
...
@@ -10,6 +10,7 @@
// Copyright 2004-present Facebook. All Rights Reserved.
#include "Float16.cuh"
#include "nvidia/fp16_emu.cuh"
#include <thrust/execution_policy.h>
#include <thrust/transform.h>
...
...
@@ -48,67 +49,10 @@ void runConvertToFloat32(float* out,
in, in + num, out, HalfToFloat());
}
// FIXME: replace
/*
Copyright (c) 2015, Norbert Juffa
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
half hostFloat2Half(float a) {
uint32_t ia;
uint16_t ir;
memcpy(&ia, &a, sizeof(float));
ir = (ia >> 16) & 0x8000;
if ((ia & 0x7f800000) == 0x7f800000) {
if ((ia & 0x7fffffff) == 0x7f800000) {
ir |= 0x7c00; /* infinity */
} else {
ir = 0x7fff; /* canonical NaN */
}
} else if ((ia & 0x7f800000) >= 0x33000000) {
int shift = (int)((ia >> 23) & 0xff) - 127;
if (shift > 15) {
ir |= 0x7c00; /* infinity */
} else {
ia = (ia & 0x007fffff) | 0x00800000; /* extract mantissa */
if (shift < -14) { /* denormal */
ir |= ia >> (-1 - shift);
ia = ia << (32 - (-1 - shift));
} else { /* normal */
ir |= ia >> (24 - 11);
ia = ia << (32 - (24 - 11));
ir = ir + ((14 + shift) << 10);
}
/* IEEE-754 round to nearest of even */
if ((ia > 0x80000000) || ((ia == 0x80000000) && (ir & 1))) {
ir++;
}
}
}
half ret;
memcpy(&ret, &ir, sizeof(half));
return ret;
half h;
h.x = cpu_float2half_rn(a).x;
return h;
}
} } // namespace
...
...
python/swigfaiss_gpu_wrap.cxx
View file @
00c288c0
This diff is collapsed.
Click to expand it.
python/swigfaiss_wrap.cxx
View file @
00c288c0
This diff is collapsed.
Click to expand it.
python/test_build_blocks.py
View file @
00c288c0
...
...
@@ -58,3 +58,22 @@ class TestPCA(testutil.BaseFacebookTestCase):
for
o
in
column_norm2
:
self
.
assertGreater
(
prev
,
o
)
prev
=
o
class
TestProductQuantizer
(
testutil
.
BaseFacebookTestCase
):
def
test_pq
(
self
):
d
=
64
n
=
1000
cs
=
4
np
.
random
.
seed
(
123
)
x
=
np
.
random
.
random
(
size
=
(
n
,
d
))
.
astype
(
'float32'
)
pq
=
faiss
.
ProductQuantizer
(
d
,
cs
,
8
)
pq
.
train
(
x
)
codes
=
pq
.
compute_codes
(
x
)
x2
=
pq
.
decode
(
codes
)
diff
=
((
x
-
x2
)
**
2
)
.
sum
()
# print "diff=", diff
# diff= 1807.98
self
.
assertGreater
(
2500
,
diff
)
utils.cpp
View file @
00c288c0
...
...
@@ -16,7 +16,6 @@
#include <cassert>
#include <cstring>
#include <cmath>
#include <cmath>
#include <smmintrin.h>
#include <mmintrin.h>
...
...
@@ -99,8 +98,8 @@ size_t get_mem_usage_kb ()
size_t
get_mem_usage_kb
()
{
fprintf
(
stderr
,
"WARN: get_mem_usage_kb not implemented on the mac
\n
"
);
return
0
;
fprintf
(
stderr
,
"WARN: get_mem_usage_kb not implemented on the mac
\n
"
);
return
0
;
}
#endif
...
...
@@ -173,13 +172,13 @@ int rand_r(unsigned *seed);
RandomGenerator
::
RandomGenerator
(
long
seed
)
{
rand_state
=
seed
;
rand_state
=
seed
;
}
RandomGenerator
::
RandomGenerator
(
const
RandomGenerator
&
other
)
{
rand_state
=
other
.
rand_state
;
rand_state
=
other
.
rand_state
;
}
...
...
@@ -208,7 +207,7 @@ int RandomGenerator::rand_int (int max)
float
RandomGenerator
::
rand_float
()
{
return
rand_int
()
/
float
(
1
<<
31
);
return
rand_int
()
/
float
(
1
L
<<
31
);
}
double
RandomGenerator
::
rand_double
()
...
...
@@ -233,7 +232,7 @@ void float_rand (float * x, size_t n, long seed)
RandomGenerator
rng0
(
seed
);
int
a0
=
rng0
.
rand_int
(),
b0
=
rng0
.
rand_int
();
#pragma omp parallel for
#pragma omp parallel for
for
(
size_t
j
=
0
;
j
<
nblock
;
j
++
)
{
RandomGenerator
rng
(
a0
+
j
*
b0
);
...
...
@@ -255,7 +254,7 @@ void float_randn (float * x, size_t n, long seed)
RandomGenerator
rng0
(
seed
);
int
a0
=
rng0
.
rand_int
(),
b0
=
rng0
.
rand_int
();
#pragma omp parallel for
#pragma omp parallel for
for
(
size_t
j
=
0
;
j
<
nblock
;
j
++
)
{
RandomGenerator
rng
(
a0
+
j
*
b0
);
...
...
@@ -292,7 +291,7 @@ void long_rand (long * x, size_t n, long seed)
RandomGenerator
rng0
(
seed
);
int
a0
=
rng0
.
rand_int
(),
b0
=
rng0
.
rand_int
();
#pragma omp parallel for
#pragma omp parallel for
for
(
size_t
j
=
0
;
j
<
nblock
;
j
++
)
{
RandomGenerator
rng
(
a0
+
j
*
b0
);
...
...
@@ -329,7 +328,7 @@ void byte_rand (uint8_t * x, size_t n, long seed)
RandomGenerator
rng0
(
seed
);
int
a0
=
rng0
.
rand_int
(),
b0
=
rng0
.
rand_int
();
#pragma omp parallel for
#pragma omp parallel for
for
(
size_t
j
=
0
;
j
<
nblock
;
j
++
)
{
RandomGenerator
rng
(
a0
+
j
*
b0
);
...
...
@@ -1278,7 +1277,9 @@ void pairwise_L2sqr (long d,
* Kmeans subroutine
***************************************************************************/
#define EPS 1e-7
// a bit above machine epsilon for float16
#define EPS (1 / 1024.)
/* For k-means, compute centroids given assignment of vectors to centroids */
/* NOTE: This could be multi-threaded (use histogram of indexes) */
...
...
@@ -1347,12 +1348,12 @@ int km_update_centroids (const float * x,
/* small symmetric pertubation. Much better than */
for
(
size_t
j
=
0
;
j
<
d
;
j
++
)
if
(
j
%
2
==
0
)
{
centroids
[
ci
*
d
+
j
]
+=
EPS
;
centroids
[
cj
*
d
+
j
]
-=
EPS
;
centroids
[
ci
*
d
+
j
]
*=
1
+
EPS
;
centroids
[
cj
*
d
+
j
]
*=
1
-
EPS
;
}
else
{
centroids
[
ci
*
d
+
j
]
-=
EPS
;
centroids
[
cj
*
d
+
j
]
+=
EPS
;
centroids
[
ci
*
d
+
j
]
*=
1
+
EPS
;
centroids
[
cj
*
d
+
j
]
*=
1
-
EPS
;
}
/* assume even split of the cluster */
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment