Commit 00c288c0 authored by matthijs's avatar matthijs

added benchmarking scripts

parent a67190c6
...@@ -166,8 +166,8 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) { ...@@ -166,8 +166,8 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) {
assign, d, k, nx); assign, d, k, nx);
if (verbose) { if (verbose) {
printf (" Iteration %d (%5g s, search %5g s): " printf (" Iteration %d (%.2f s, search %.2f s): "
"objective=%g imbalance=%g nsplit=%d \r", "objective=%g imbalance=%.3f nsplit=%d \r",
i, (getmillisecs() - t0) / 1000.0, i, (getmillisecs() - t0) / 1000.0,
t_search_tot / 1000, t_search_tot / 1000,
err, imbalance_factor (nx, k, assign), err, imbalance_factor (nx, k, assign),
......
...@@ -7,15 +7,19 @@ ...@@ -7,15 +7,19 @@
.SUFFIXES: .cpp .o .SUFFIXES: .cpp .o
MAKEFILE_INC=makefile.inc MAKEFILE_INC=makefile.inc
-include $(MAKEFILE_INC) -include $(MAKEFILE_INC)
LIBNAME=libfaiss LIBNAME=libfaiss
all: .env_ok $(LIBNAME).a tests/demo_ivfpq_indexing all: .env_ok $(LIBNAME).a tests/demo_ivfpq_indexing
py: _swigfaiss.so py: _swigfaiss.so
############################# #############################
# Various # Various
......
...@@ -211,6 +211,7 @@ void translate_labels (long n, idx_t *labels, long translation) ...@@ -211,6 +211,7 @@ void translate_labels (long n, idx_t *labels, long translation)
* @param all_labels idem * @param all_labels idem
* @param translartions label translations to apply, size nshard * @param translartions label translations to apply, size nshard
*/ */
template <class C> template <class C>
void merge_tables (long n, long k, long nshard, void merge_tables (long n, long k, long nshard,
float *distances, idx_t *labels, float *distances, idx_t *labels,
...@@ -218,22 +219,55 @@ void merge_tables (long n, long k, long nshard, ...@@ -218,22 +219,55 @@ void merge_tables (long n, long k, long nshard,
idx_t *all_labels, idx_t *all_labels,
const long *translations) const long *translations)
{ {
long shard_stride = n * k; if(k == 0) {
#pragma omp parallel for return;
}
long stride = n * k;
#pragma omp parallel
{
std::vector<int> buf (2 * nshard);
int * pointer = buf.data();
int * shard_ids = pointer + nshard;
std::vector<float> buf2 (nshard);
float * heap_vals = buf2.data();
#pragma omp for
for (long i = 0; i < n; i++) { for (long i = 0; i < n; i++) {
// the heap maps values to the shard where they are
// produced.
const float *D_in = all_distances + i * k;
const idx_t *I_in = all_labels + i * k;
int heap_size = 0;
for (long s = 0; s < nshard; s++) {
pointer[s] = 0;
if (I_in[stride * s] >= 0)
heap_push<C> (++heap_size, heap_vals, shard_ids,
D_in[stride * s], s);
}
float *D = distances + i * k; float *D = distances + i * k;
idx_t *I = labels + i * k; idx_t *I = labels + i * k;
const float *Ds = all_distances + i * k;
idx_t *Is = all_labels + i * k; for (int j = 0; j < k; j++) {
translate_labels (k, Is, translations[0]); if (heap_size == 0) {
heap_heapify<C>(k, D, I, Ds, Is, k); I[j] = -1;
for (int s = 1; s < nshard; s++) { D[j] = C::neutral();
Ds += shard_stride; } else {
Is += shard_stride; // pop best element
translate_labels (k, Is, translations[s]); int s = shard_ids[0];
heap_addn<C> (k, D, I, Ds, Is, k); int & p = pointer[s];
} D[j] = heap_vals[0];
heap_reorder<C>(k, D, I); I[j] = I_in[stride * s + p] + translations[s];
heap_pop<C> (heap_size--, heap_vals, shard_ids);
p++;
if (p < k && I_in[stride * s + p] >= 0)
heap_push<C> (++heap_size, heap_vals, shard_ids,
D_in[stride * s + p], s);
}
}
}
} }
} }
...@@ -360,6 +394,9 @@ void IndexShards::add_with_ids (idx_t n, const float * x, const long *xids) ...@@ -360,6 +394,9 @@ void IndexShards::add_with_ids (idx_t n, const float * x, const long *xids)
} }
void IndexShards::reset () void IndexShards::reset ()
{ {
for (int i = 0; i < shard_indexes.size(); i++) { for (int i = 0; i < shard_indexes.size(); i++) {
...@@ -433,15 +470,14 @@ void IndexShards::search ( ...@@ -433,15 +470,14 @@ void IndexShards::search (
} }
if (metric_type == METRIC_L2) { if (metric_type == METRIC_L2) {
merge_tables< CMax<float, idx_t> > ( merge_tables< CMin<float, int> > (
n, k, nshard, distances, labels, n, k, nshard, distances, labels,
all_distances, all_labels, translations.data ()); all_distances, all_labels, translations.data ());
} else { } else {
merge_tables< CMin<float, idx_t> > ( merge_tables< CMax<float, int> > (
n, k, nshard, distances, labels, n, k, nshard, distances, labels,
all_distances, all_labels, translations.data ()); all_distances, all_labels, translations.data ());
} }
delete [] all_distances; delete [] all_distances;
delete [] all_labels; delete [] all_labels;
} }
......
# Copyright (c) 2015-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the CC-by-NC license found in the
# LICENSE file in the root directory of this source tree.
# -*- makefile -*- # -*- makefile -*-
# Tested on macOS Sierra (10.12.2) with llvm installed using Homebrew (https://brew.sh) # Tested on macOS Sierra (10.12.2) with llvm installed using Homebrew (https://brew.sh)
......
# Copyright (c) 2015-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the CC-by-NC license found in the
# LICENSE file in the root directory of this source tree.
# -*- makefile -*- # -*- makefile -*-
# tested on Mac OS X 10.12.2 Sierra with additional software installed via macports # tested on Mac OS X 10.12.2 Sierra with additional software installed via macports
......
...@@ -15,6 +15,9 @@ import types ...@@ -15,6 +15,9 @@ import types
import sys import sys
import pdb import pdb
# we import * so that the symbol X can be accessed as faiss.X
try: try:
from swigfaiss_gpu import * from swigfaiss_gpu import *
except ImportError as e: except ImportError as e:
...@@ -29,6 +32,9 @@ except ImportError as e: ...@@ -29,6 +32,9 @@ except ImportError as e:
################################################################## ##################################################################
# The functions below add or replace some methods for classes # The functions below add or replace some methods for classes
# this is to be able to pass in numpy arrays directly # this is to be able to pass in numpy arrays directly
# The C++ version of the classnames will be suffixed with _c
##################################################################
def replace_method(the_class, name, replacement): def replace_method(the_class, name, replacement):
...@@ -49,6 +55,32 @@ def handle_Clustering(): ...@@ -49,6 +55,32 @@ def handle_Clustering():
handle_Clustering() handle_Clustering()
def handle_ProductQuantizer():
def replacement_train(self, x):
n, d = x.shape
assert d == self.d
self.train_c(n, swig_ptr(x))
def replacement_compute_codes(self, x):
n, d = x.shape
assert d == self.d
codes = np.empty((n, self.code_size), dtype='uint8')
self.compute_codes_c(swig_ptr(x), swig_ptr(codes), n)
return codes
def replacement_decode(self, codes):
n, cs = codes.shape
assert cs == self.code_size
x = np.empty((n, self.d), dtype='float32')
self.decode_c(swig_ptr(codes), swig_ptr(x), n)
return x
replace_method(ProductQuantizer, 'train', replacement_train)
replace_method(ProductQuantizer, 'compute_codes', replacement_compute_codes)
replace_method(ProductQuantizer, 'decode', replacement_decode)
handle_ProductQuantizer()
def handle_Index(the_class): def handle_Index(the_class):
...@@ -153,6 +185,8 @@ for symbol in dir(this_module): ...@@ -153,6 +185,8 @@ for symbol in dir(this_module):
handle_ParameterSpace(the_class) handle_ParameterSpace(the_class)
def vector_float_to_array(v): def vector_float_to_array(v):
a = np.empty(v.size(), dtype='float32') a = np.empty(v.size(), dtype='float32')
memcpy(swig_ptr(a), v.data(), 4 * v.size()) memcpy(swig_ptr(a), v.data(), 4 * v.size())
...@@ -241,7 +275,8 @@ def eval_intersection(I1, I2): ...@@ -241,7 +275,8 @@ def eval_intersection(I1, I2):
k1, k2 = I1.shape[1], I2.shape[1] k1, k2 = I1.shape[1], I2.shape[1]
ninter = 0 ninter = 0
for i in range(n): for i in range(n):
ninter += ranklist_intersection_size(k1, swig_ptr(I1[i]), k2, swig_ptr(I2[i])) ninter += ranklist_intersection_size(
k1, swig_ptr(I1[i]), k2, swig_ptr(I2[i]))
return ninter return ninter
......
...@@ -77,6 +77,7 @@ IVFBase::reset() { ...@@ -77,6 +77,7 @@ IVFBase::reset() {
deviceListDataPointers_.clear(); deviceListDataPointers_.clear();
deviceListIndexPointers_.clear(); deviceListIndexPointers_.clear();
deviceListLengths_.clear(); deviceListLengths_.clear();
listOffsetToUserIndex_.clear();
for (size_t i = 0; i < numLists_; ++i) { for (size_t i = 0; i < numLists_; ++i) {
deviceListData_.emplace_back( deviceListData_.emplace_back(
......
...@@ -85,6 +85,7 @@ IVFPQ::isSupportedPQCodeLength(int size) { ...@@ -85,6 +85,7 @@ IVFPQ::isSupportedPQCodeLength(int size) {
case 48: case 48:
case 56: // only supported with float16 case 56: // only supported with float16
case 64: // only supported with float16 case 64: // only supported with float16
case 96: // only supported with float16
return true; return true;
default: default:
return false; return false;
......
...@@ -485,7 +485,7 @@ runPQCodeDistances(Tensor<float, 3, true>& pqCentroids, ...@@ -485,7 +485,7 @@ runPQCodeDistances(Tensor<float, 3, true>& pqCentroids,
CODE_DISTANCE(10); CODE_DISTANCE(10);
break; break;
case 12: case 12:
CODE_DISTANCE(10); CODE_DISTANCE(12);
break; break;
case 16: case 16:
CODE_DISTANCE(16); CODE_DISTANCE(16);
......
...@@ -174,20 +174,14 @@ struct LoadCode32<24> { ...@@ -174,20 +174,14 @@ struct LoadCode32<24> {
unsigned char* p, unsigned char* p,
int offset) { int offset) {
p += offset * 24; p += offset * 24;
// FIXME: this is a non-coalesced, unaligned, non-vectorized load // FIXME: this is a non-coalesced, unaligned, 2-vectorized load
// unfortunately need to reorganize memory layout by warp // unfortunately need to reorganize memory layout by warp
asm("ld.global.cs.nc.u32 {%0}, [%1 + 0];" : asm("ld.global.cs.nc.v2.u32 {%0, %1}, [%2 + 0];" :
"=r"(code32[0]) : "l"(p)); "=r"(code32[0]), "=r"(code32[1]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 4];" : asm("ld.global.cs.nc.v2.u32 {%0, %1}, [%2 + 8];" :
"=r"(code32[1]) : "l"(p)); "=r"(code32[2]), "=r"(code32[3]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 8];" : asm("ld.global.cs.nc.v2.u32 {%0, %1}, [%2 + 16];" :
"=r"(code32[2]) : "l"(p)); "=r"(code32[4]), "=r"(code32[5]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 12];" :
"=r"(code32[3]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 16];" :
"=r"(code32[4]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 20];" :
"=r"(code32[5]) : "l"(p));
} }
}; };
...@@ -239,28 +233,18 @@ struct LoadCode32<40> { ...@@ -239,28 +233,18 @@ struct LoadCode32<40> {
unsigned char* p, unsigned char* p,
int offset) { int offset) {
p += offset * 40; p += offset * 40;
// FIXME: this is a non-coalesced, unaligned, non-vectorized load // FIXME: this is a non-coalesced, unaligned, 2-vectorized load
// unfortunately need to reorganize memory layout by warp // unfortunately need to reorganize memory layout by warp
asm("ld.global.cs.nc.u32 {%0}, [%1 + 0];" : asm("ld.global.cs.nc.v2.u32 {%0, %1}, [%2 + 0];" :
"=r"(code32[0]) : "l"(p)); "=r"(code32[0]), "=r"(code32[1]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 4];" : asm("ld.global.cs.nc.v2.u32 {%0, %1}, [%2 + 8];" :
"=r"(code32[1]) : "l"(p)); "=r"(code32[2]), "=r"(code32[3]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 8];" : asm("ld.global.cs.nc.v2.u32 {%0, %1}, [%2 + 16];" :
"=r"(code32[2]) : "l"(p)); "=r"(code32[4]), "=r"(code32[5]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 12];" : asm("ld.global.cs.nc.v2.u32 {%0, %1}, [%2 + 24];" :
"=r"(code32[3]) : "l"(p)); "=r"(code32[6]), "=r"(code32[7]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 16];" : asm("ld.global.cs.nc.v2.u32 {%0, %1}, [%2 + 32];" :
"=r"(code32[4]) : "l"(p)); "=r"(code32[8]), "=r"(code32[9]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 20];" :
"=r"(code32[5]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 24];" :
"=r"(code32[6]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 28];" :
"=r"(code32[7]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 32];" :
"=r"(code32[8]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 36];" :
"=r"(code32[9]) : "l"(p));
} }
}; };
...@@ -270,32 +254,17 @@ struct LoadCode32<48> { ...@@ -270,32 +254,17 @@ struct LoadCode32<48> {
unsigned char* p, unsigned char* p,
int offset) { int offset) {
p += offset * 48; p += offset * 48;
// FIXME: this is a non-coalesced, unaligned, non-vectorized load // FIXME: this is a non-coalesced load
// unfortunately need to reorganize memory layout by warp // unfortunately need to reorganize memory layout by warp
asm("ld.global.cs.nc.u32 {%0}, [%1 + 0];" : asm("ld.global.cs.nc.v4.u32 {%0, %1, %2, %3}, [%4];" :
"=r"(code32[0]) : "l"(p)); "=r"(code32[0]), "=r"(code32[1]),
asm("ld.global.cs.nc.u32 {%0}, [%1 + 4];" : "=r"(code32[2]), "=r"(code32[3]) : "l"(p));
"=r"(code32[1]) : "l"(p)); asm("ld.global.cs.nc.v4.u32 {%0, %1, %2, %3}, [%4 + 16];" :
asm("ld.global.cs.nc.u32 {%0}, [%1 + 8];" : "=r"(code32[4]), "=r"(code32[5]),
"=r"(code32[2]) : "l"(p)); "=r"(code32[6]), "=r"(code32[7]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 12];" : asm("ld.global.cs.nc.v4.u32 {%0, %1, %2, %3}, [%4 + 32];" :
"=r"(code32[3]) : "l"(p)); "=r"(code32[8]), "=r"(code32[9]),
asm("ld.global.cs.nc.u32 {%0}, [%1 + 16];" : "=r"(code32[10]), "=r"(code32[11]) : "l"(p));
"=r"(code32[4]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 20];" :
"=r"(code32[5]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 24];" :
"=r"(code32[6]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 28];" :
"=r"(code32[7]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 32];" :
"=r"(code32[8]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 36];" :
"=r"(code32[9]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 40];" :
"=r"(code32[10]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 44];" :
"=r"(code32[11]) : "l"(p));
} }
}; };
...@@ -305,36 +274,22 @@ struct LoadCode32<56> { ...@@ -305,36 +274,22 @@ struct LoadCode32<56> {
unsigned char* p, unsigned char* p,
int offset) { int offset) {
p += offset * 56; p += offset * 56;
// FIXME: this is a non-coalesced, unaligned, non-vectorized load // FIXME: this is a non-coalesced, unaligned, 2-vectorized load
// unfortunately need to reorganize memory layout by warp // unfortunately need to reorganize memory layout by warp
asm("ld.global.cs.nc.u32 {%0}, [%1 + 0];" : asm("ld.global.cs.nc.v2.u32 {%0, %1}, [%2 + 0];" :
"=r"(code32[0]) : "l"(p)); "=r"(code32[0]), "=r"(code32[1]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 4];" : asm("ld.global.cs.nc.v2.u32 {%0, %1}, [%2 + 8];" :
"=r"(code32[1]) : "l"(p)); "=r"(code32[2]), "=r"(code32[3]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 8];" : asm("ld.global.cs.nc.v2.u32 {%0, %1}, [%2 + 16];" :
"=r"(code32[2]) : "l"(p)); "=r"(code32[4]), "=r"(code32[5]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 12];" : asm("ld.global.cs.nc.v2.u32 {%0, %1}, [%2 + 24];" :
"=r"(code32[3]) : "l"(p)); "=r"(code32[6]), "=r"(code32[7]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 16];" : asm("ld.global.cs.nc.v2.u32 {%0, %1}, [%2 + 32];" :
"=r"(code32[4]) : "l"(p)); "=r"(code32[8]), "=r"(code32[9]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 20];" : asm("ld.global.cs.nc.v2.u32 {%0, %1}, [%2 + 40];" :
"=r"(code32[5]) : "l"(p)); "=r"(code32[10]), "=r"(code32[11]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 24];" : asm("ld.global.cs.nc.v2.u32 {%0, %1}, [%2 + 48];" :
"=r"(code32[6]) : "l"(p)); "=r"(code32[12]), "=r"(code32[13]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 28];" :
"=r"(code32[7]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 32];" :
"=r"(code32[8]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 36];" :
"=r"(code32[9]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 40];" :
"=r"(code32[10]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 44];" :
"=r"(code32[11]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 48];" :
"=r"(code32[12]) : "l"(p));
asm("ld.global.cs.nc.u32 {%0}, [%1 + 52];" :
"=r"(code32[13]) : "l"(p));
} }
}; };
...@@ -361,5 +316,34 @@ struct LoadCode32<64> { ...@@ -361,5 +316,34 @@ struct LoadCode32<64> {
} }
}; };
template<>
struct LoadCode32<96> {
static inline __device__ void load(unsigned int code32[24],
unsigned char* p,
int offset) {
p += offset * 96;
// FIXME: this is a non-coalesced load
// unfortunately need to reorganize memory layout by warp
asm("ld.global.cs.nc.v4.u32 {%0, %1, %2, %3}, [%4];" :
"=r"(code32[0]), "=r"(code32[1]),
"=r"(code32[2]), "=r"(code32[3]) : "l"(p));
asm("ld.global.cs.nc.v4.u32 {%0, %1, %2, %3}, [%4 + 16];" :
"=r"(code32[4]), "=r"(code32[5]),
"=r"(code32[6]), "=r"(code32[7]) : "l"(p));
asm("ld.global.cs.nc.v4.u32 {%0, %1, %2, %3}, [%4 + 32];" :
"=r"(code32[8]), "=r"(code32[9]),
"=r"(code32[10]), "=r"(code32[11]) : "l"(p));
asm("ld.global.cs.nc.v4.u32 {%0, %1, %2, %3}, [%4 + 48];" :
"=r"(code32[12]), "=r"(code32[13]),
"=r"(code32[14]), "=r"(code32[15]) : "l"(p));
asm("ld.global.cs.nc.v4.u32 {%0, %1, %2, %3}, [%4 + 64];" :
"=r"(code32[16]), "=r"(code32[17]),
"=r"(code32[18]), "=r"(code32[19]) : "l"(p));
asm("ld.global.cs.nc.v4.u32 {%0, %1, %2, %3}, [%4 + 80];" :
"=r"(code32[20]), "=r"(code32[21]),
"=r"(code32[22]), "=r"(code32[23]) : "l"(p));
}
};
} } // namespace } } // namespace
...@@ -358,6 +358,9 @@ runMultiPassTile(Tensor<float, 2, true>& queries, ...@@ -358,6 +358,9 @@ runMultiPassTile(Tensor<float, 2, true>& queries,
case 64: case 64:
RUN_PQ(64); RUN_PQ(64);
break; break;
case 96:
RUN_PQ(96);
break;
default: default:
FAISS_ASSERT(false); FAISS_ASSERT(false);
break; break;
......
...@@ -343,6 +343,9 @@ runMultiPassTile(Tensor<float, 2, true>& queries, ...@@ -343,6 +343,9 @@ runMultiPassTile(Tensor<float, 2, true>& queries,
case 64: case 64:
RUN_PQ(64); RUN_PQ(64);
break; break;
case 96:
RUN_PQ(96);
break;
default: default:
FAISS_ASSERT(false); FAISS_ASSERT(false);
break; break;
......
...@@ -22,7 +22,11 @@ ...@@ -22,7 +22,11 @@
void pickEncoding(int& codes, int& dim) { void pickEncoding(int& codes, int& dim) {
std::vector<int> codeSizes{3, 4, 8, 12, 16, 20, 24, 28, 32, 40, 48, 56, 64}; std::vector<int> codeSizes{
3, 4, 8, 12, 16, 20, 24,
28, 32, 40, 48, 56, 64, 96
};
std::vector<int> dimSizes{4, 8, 16, 32}; std::vector<int> dimSizes{4, 8, 16, 32};
codes = codeSizes[faiss::gpu::randVal(0, codeSizes.size() - 1)]; codes = codeSizes[faiss::gpu::randVal(0, codeSizes.size() - 1)];
...@@ -55,8 +59,8 @@ struct Options { ...@@ -55,8 +59,8 @@ struct Options {
faiss::gpu::INDICES_CPU, faiss::gpu::INDICES_CPU,
faiss::gpu::INDICES_32_BIT, faiss::gpu::INDICES_32_BIT,
faiss::gpu::INDICES_64_BIT}); faiss::gpu::INDICES_64_BIT});
if (codes == 64) { if (codes > 48) {
// 64x8 lookup can only fit using float16 // large codes can only fit using float16
useFloat16 = true; useFloat16 = true;
} else { } else {
useFloat16 = faiss::gpu::randBool(); useFloat16 = faiss::gpu::randBool();
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
// Copyright 2004-present Facebook. All Rights Reserved. // Copyright 2004-present Facebook. All Rights Reserved.
#include "Float16.cuh" #include "Float16.cuh"
#include "nvidia/fp16_emu.cuh"
#include <thrust/execution_policy.h> #include <thrust/execution_policy.h>
#include <thrust/transform.h> #include <thrust/transform.h>
...@@ -48,67 +49,10 @@ void runConvertToFloat32(float* out, ...@@ -48,67 +49,10 @@ void runConvertToFloat32(float* out,
in, in + num, out, HalfToFloat()); in, in + num, out, HalfToFloat());
} }
// FIXME: replace
/*
Copyright (c) 2015, Norbert Juffa
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
half hostFloat2Half(float a) { half hostFloat2Half(float a) {
uint32_t ia; half h;
uint16_t ir; h.x = cpu_float2half_rn(a).x;
memcpy(&ia, &a, sizeof(float)); return h;
ir = (ia >> 16) & 0x8000;
if ((ia & 0x7f800000) == 0x7f800000) {
if ((ia & 0x7fffffff) == 0x7f800000) {
ir |= 0x7c00; /* infinity */
} else {
ir = 0x7fff; /* canonical NaN */
}
} else if ((ia & 0x7f800000) >= 0x33000000) {
int shift = (int)((ia >> 23) & 0xff) - 127;
if (shift > 15) {
ir |= 0x7c00; /* infinity */
} else {
ia = (ia & 0x007fffff) | 0x00800000; /* extract mantissa */
if (shift < -14) { /* denormal */
ir |= ia >> (-1 - shift);
ia = ia << (32 - (-1 - shift));
} else { /* normal */
ir |= ia >> (24 - 11);
ia = ia << (32 - (24 - 11));
ir = ir + ((14 + shift) << 10);
}
/* IEEE-754 round to nearest of even */
if ((ia > 0x80000000) || ((ia == 0x80000000) && (ir & 1))) {
ir++;
}
}
}
half ret;
memcpy(&ret, &ir, sizeof(half));
return ret;
} }
} } // namespace } } // namespace
......
This diff is collapsed.
This diff is collapsed.
...@@ -58,3 +58,22 @@ class TestPCA(testutil.BaseFacebookTestCase): ...@@ -58,3 +58,22 @@ class TestPCA(testutil.BaseFacebookTestCase):
for o in column_norm2: for o in column_norm2:
self.assertGreater(prev, o) self.assertGreater(prev, o)
prev = o prev = o
class TestProductQuantizer(testutil.BaseFacebookTestCase):
def test_pq(self):
d = 64
n = 1000
cs = 4
np.random.seed(123)
x = np.random.random(size=(n, d)).astype('float32')
pq = faiss.ProductQuantizer(d, cs, 8)
pq.train(x)
codes = pq.compute_codes(x)
x2 = pq.decode(codes)
diff = ((x - x2)**2).sum()
# print "diff=", diff
# diff= 1807.98
self.assertGreater(2500, diff)
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
#include <cassert> #include <cassert>
#include <cstring> #include <cstring>
#include <cmath> #include <cmath>
#include <cmath>
#include <smmintrin.h> #include <smmintrin.h>
#include <mmintrin.h> #include <mmintrin.h>
...@@ -208,7 +207,7 @@ int RandomGenerator::rand_int (int max) ...@@ -208,7 +207,7 @@ int RandomGenerator::rand_int (int max)
float RandomGenerator::rand_float () float RandomGenerator::rand_float ()
{ {
return rand_int() / float(1 << 31); return rand_int() / float(1L << 31);
} }
double RandomGenerator::rand_double () double RandomGenerator::rand_double ()
...@@ -1278,7 +1277,9 @@ void pairwise_L2sqr (long d, ...@@ -1278,7 +1277,9 @@ void pairwise_L2sqr (long d,
* Kmeans subroutine * Kmeans subroutine
***************************************************************************/ ***************************************************************************/
#define EPS 1e-7 // a bit above machine epsilon for float16
#define EPS (1 / 1024.)
/* For k-means, compute centroids given assignment of vectors to centroids */ /* For k-means, compute centroids given assignment of vectors to centroids */
/* NOTE: This could be multi-threaded (use histogram of indexes) */ /* NOTE: This could be multi-threaded (use histogram of indexes) */
...@@ -1347,12 +1348,12 @@ int km_update_centroids (const float * x, ...@@ -1347,12 +1348,12 @@ int km_update_centroids (const float * x,
/* small symmetric pertubation. Much better than */ /* small symmetric pertubation. Much better than */
for (size_t j = 0; j < d; j++) for (size_t j = 0; j < d; j++)
if (j % 2 == 0) { if (j % 2 == 0) {
centroids[ci * d + j] += EPS; centroids[ci * d + j] *= 1 + EPS;
centroids[cj * d + j] -= EPS; centroids[cj * d + j] *= 1 - EPS;
} }
else { else {
centroids[ci * d + j] -= EPS; centroids[ci * d + j] *= 1 + EPS;
centroids[cj * d + j] += EPS; centroids[cj * d + j] *= 1 - EPS;
} }
/* assume even split of the cluster */ /* assume even split of the cluster */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment