Commit c5077070 authored by matthijs's avatar matthijs

sync with FB version. Added:

- better selection of training sets for PQ and preprocessing
- GPU parameter object
- IndexIDMap fixed
- fixed redo bug in clustering
parent acb93857
...@@ -104,14 +104,13 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) { ...@@ -104,14 +104,13 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) {
int(nx), d, k, nredo, niter); int(nx), d, k, nredo, niter);
idx_t * assign = new idx_t[nx]; idx_t * assign = new idx_t[nx];
float * dis = new float[nx]; float * dis = new float[nx];
float best_err = 1e50; float best_err = 1e50;
double t_search_tot = 0; double t_search_tot = 0;
if (verbose) { if (verbose) {
printf(" Preprocessing in %5g s\n", printf(" Preprocessing in %.2f s\n",
(getmillisecs() - t0)/1000.); (getmillisecs() - t0)/1000.);
} }
t0 = getmillisecs(); t0 = getmillisecs();
...@@ -149,7 +148,7 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) { ...@@ -149,7 +148,7 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) {
if (!index.is_trained) if (!index.is_trained)
index.train (k, cur_centroids.data()); index.train (k, cur_centroids.data());
FAISS_ASSERT (index.ntotal == 0 ); FAISS_ASSERT (index.ntotal == 0);
index.add (k, cur_centroids.data()); index.add (k, cur_centroids.data());
float err = 0; float err = 0;
for (int i = 0; i < niter; i++) { for (int i = 0; i < niter; i++) {
...@@ -183,16 +182,17 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) { ...@@ -183,16 +182,17 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) {
index.train (k, cur_centroids.data()); index.train (k, cur_centroids.data());
assert (index.ntotal == 0); assert (index.ntotal == 0);
index.add (k, centroids.data()); index.add (k, cur_centroids.data());
} }
if (verbose) printf("\n"); if (verbose) printf("\n");
if (nredo > 1) { if (nredo > 1) {
if (err < best_err) { if (err < best_err) {
if (verbose) if (verbose)
printf ("Keep new clusters\n"); printf ("Objective improved: keep new clusters\n");
centroids = cur_centroids; centroids = buf_centroids;
best_err = err; best_err = err;
} }
index.reset ();
} }
} }
......
...@@ -26,7 +26,6 @@ struct ClusteringParameters { ...@@ -26,7 +26,6 @@ struct ClusteringParameters {
int niter; ///< clustering iterations int niter; ///< clustering iterations
int nredo; ///< redo clustering this many times and keep best int nredo; ///< redo clustering this many times and keep best
bool verbose; bool verbose;
bool spherical; ///< do we want normalized centroids? bool spherical; ///< do we want normalized centroids?
bool update_index; ///< update index after each iteration? bool update_index; ///< update index after each iteration?
......
...@@ -82,8 +82,11 @@ void IndexIVFPQ::train_residual (idx_t n, const float *x) ...@@ -82,8 +82,11 @@ void IndexIVFPQ::train_residual (idx_t n, const float *x)
void IndexIVFPQ::train_residual_o (idx_t n, const float *x, float *residuals_2) void IndexIVFPQ::train_residual_o (idx_t n, const float *x, float *residuals_2)
{ {
idx_t ntrain = pq.ksub * 64; const float * x_in = x;
if(n > ntrain) n = ntrain;
x = fvecs_maybe_subsample (
d, (size_t*)&n, pq.cp.max_points_per_centroid * pq.ksub,
x, verbose, pq.cp.seed);
const float *trainset; const float *trainset;
if (by_residual) { if (by_residual) {
...@@ -132,6 +135,7 @@ void IndexIVFPQ::train_residual_o (idx_t n, const float *x, float *residuals_2) ...@@ -132,6 +135,7 @@ void IndexIVFPQ::train_residual_o (idx_t n, const float *x, float *residuals_2)
precompute_table (); precompute_table ();
} }
if (x_in != x) delete [] x;
} }
......
...@@ -94,8 +94,6 @@ AutoTune.o: AutoTune.cpp AutoTune.h Index.h FaissAssert.h utils.h Heap.h \ ...@@ -94,8 +94,6 @@ AutoTune.o: AutoTune.cpp AutoTune.h Index.h FaissAssert.h utils.h Heap.h \
IndexFlat.h VectorTransform.h IndexLSH.h IndexPQ.h ProductQuantizer.h \ IndexFlat.h VectorTransform.h IndexLSH.h IndexPQ.h ProductQuantizer.h \
Clustering.h PolysemousTraining.h IndexIVF.h IndexIVFPQ.h MetaIndexes.h Clustering.h PolysemousTraining.h IndexIVF.h IndexIVFPQ.h MetaIndexes.h
AuxIndexStructures.o: AuxIndexStructures.cpp AuxIndexStructures.h Index.h AuxIndexStructures.o: AuxIndexStructures.cpp AuxIndexStructures.h Index.h
BinaryCode.o: BinaryCode.cpp BinaryCode.h VectorTransform.h Index.h \
FaissAssert.h hamming.h Heap.h
Clustering.o: Clustering.cpp Clustering.h Index.h utils.h Heap.h \ Clustering.o: Clustering.cpp Clustering.h Index.h utils.h Heap.h \
FaissAssert.h IndexFlat.h FaissAssert.h IndexFlat.h
hamming.o: hamming.cpp hamming.h Heap.h FaissAssert.h hamming.o: hamming.cpp hamming.h Heap.h FaissAssert.h
...@@ -105,7 +103,7 @@ IndexFlat.o: IndexFlat.cpp IndexFlat.h Index.h utils.h Heap.h \ ...@@ -105,7 +103,7 @@ IndexFlat.o: IndexFlat.cpp IndexFlat.h Index.h utils.h Heap.h \
FaissAssert.h FaissAssert.h
index_io.o: index_io.cpp index_io.h FaissAssert.h IndexFlat.h Index.h \ index_io.o: index_io.cpp index_io.h FaissAssert.h IndexFlat.h Index.h \
VectorTransform.h IndexLSH.h IndexPQ.h ProductQuantizer.h Clustering.h \ VectorTransform.h IndexLSH.h IndexPQ.h ProductQuantizer.h Clustering.h \
Heap.h PolysemousTraining.h IndexIVF.h IndexIVFPQ.h Heap.h PolysemousTraining.h IndexIVF.h IndexIVFPQ.h MetaIndexes.h
IndexIVF.o: IndexIVF.cpp IndexIVF.h Index.h Clustering.h Heap.h utils.h \ IndexIVF.o: IndexIVF.cpp IndexIVF.h Index.h Clustering.h Heap.h utils.h \
hamming.h FaissAssert.h IndexFlat.h AuxIndexStructures.h hamming.h FaissAssert.h IndexFlat.h AuxIndexStructures.h
IndexIVFPQ.o: IndexIVFPQ.cpp IndexIVFPQ.h IndexIVF.h Index.h Clustering.h \ IndexIVFPQ.o: IndexIVFPQ.cpp IndexIVFPQ.h IndexIVF.h Index.h Clustering.h \
...@@ -113,12 +111,8 @@ IndexIVFPQ.o: IndexIVFPQ.cpp IndexIVFPQ.h IndexIVF.h Index.h Clustering.h \ ...@@ -113,12 +111,8 @@ IndexIVFPQ.o: IndexIVFPQ.cpp IndexIVFPQ.h IndexIVF.h Index.h Clustering.h \
IndexFlat.h hamming.h FaissAssert.h AuxIndexStructures.h IndexFlat.h hamming.h FaissAssert.h AuxIndexStructures.h
IndexLSH.o: IndexLSH.cpp IndexLSH.h Index.h VectorTransform.h utils.h \ IndexLSH.o: IndexLSH.cpp IndexLSH.h Index.h VectorTransform.h utils.h \
Heap.h hamming.h FaissAssert.h Heap.h hamming.h FaissAssert.h
IndexNested.o: IndexNested.cpp IndexNested.h IndexIVF.h Index.h \
Clustering.h Heap.h IndexIVFPQ.h IndexPQ.h ProductQuantizer.h \
PolysemousTraining.h IndexFlat.h FaissAssert.h
IndexPQ.o: IndexPQ.cpp IndexPQ.h Index.h ProductQuantizer.h Clustering.h \ IndexPQ.o: IndexPQ.cpp IndexPQ.h Index.h ProductQuantizer.h Clustering.h \
Heap.h PolysemousTraining.h FaissAssert.h hamming.h Heap.h PolysemousTraining.h FaissAssert.h hamming.h
MetaIndexes.o: MetaIndexes.cpp MetaIndexes.h Index.h FaissAssert.h Heap.h MetaIndexes.o: MetaIndexes.cpp MetaIndexes.h Index.h FaissAssert.h Heap.h
PolysemousTraining.o: PolysemousTraining.cpp PolysemousTraining.h \ PolysemousTraining.o: PolysemousTraining.cpp PolysemousTraining.h \
ProductQuantizer.h Clustering.h Index.h Heap.h utils.h hamming.h \ ProductQuantizer.h Clustering.h Index.h Heap.h utils.h hamming.h \
...@@ -131,6 +125,7 @@ VectorTransform.o: VectorTransform.cpp VectorTransform.h Index.h utils.h \ ...@@ -131,6 +125,7 @@ VectorTransform.o: VectorTransform.cpp VectorTransform.h Index.h utils.h \
Heap.h FaissAssert.h IndexPQ.h ProductQuantizer.h Clustering.h \ Heap.h FaissAssert.h IndexPQ.h ProductQuantizer.h Clustering.h \
PolysemousTraining.h PolysemousTraining.h
clean: clean:
rm -f $(LIBNAME).a $(LIBNAME).$(SHAREDEXT)* *.o \ rm -f $(LIBNAME).a $(LIBNAME).$(SHAREDEXT)* *.o \
lua/swigfaiss.so lua/swigfaiss_wrap.cxx \ lua/swigfaiss.so lua/swigfaiss_wrap.cxx \
......
...@@ -49,7 +49,7 @@ struct IndexIDMap : Index { ...@@ -49,7 +49,7 @@ struct IndexIDMap : Index {
virtual void set_typename () override; virtual void set_typename () override;
virtual ~IndexIDMap (); virtual ~IndexIDMap ();
IndexIDMap () {own_fields=false; index=nullptr; }
}; };
/** Index that concatenates the results from several sub-indexes /** Index that concatenates the results from several sub-indexes
......
...@@ -95,7 +95,7 @@ void VectorTransform::reverse_transform ( ...@@ -95,7 +95,7 @@ void VectorTransform::reverse_transform (
LinearTransform::LinearTransform (int d_in, int d_out, LinearTransform::LinearTransform (int d_in, int d_out,
bool have_bias): bool have_bias):
VectorTransform (d_in, d_out), have_bias (have_bias), VectorTransform (d_in, d_out), have_bias (have_bias),
max_points_per_d (1 << 20), verbose (false) verbose (false)
{} {}
void LinearTransform::apply_noalloc (Index::idx_t n, const float * x, void LinearTransform::apply_noalloc (Index::idx_t n, const float * x,
...@@ -152,27 +152,6 @@ void LinearTransform::transform_transpose (idx_t n, const float * y, ...@@ -152,27 +152,6 @@ void LinearTransform::transform_transpose (idx_t n, const float * y,
if (have_bias) delete [] y; if (have_bias) delete [] y;
} }
const float * LinearTransform::maybe_subsample_train_set (
Index::idx_t *n, const float *x)
{
if (*n <= max_points_per_d * d_in) return x;
size_t n2 = max_points_per_d * d_in;
if (verbose) {
printf (" Input training set too big, sampling "
"%ld / %ld vectors\n", n2, *n);
}
std::vector<int> subset (*n);
rand_perm (subset.data (), *n, 1234);
float *x_subset = new float[n2 * d_in];
for (long i = 0; i < n2; i++)
memcpy (&x_subset[i * d_in],
&x[subset[i] * size_t(d_in)],
sizeof (x[0]) * d_in);
*n = n2;
return x_subset;
}
/********************************************* /*********************************************
* RandomRotationMatrix * RandomRotationMatrix
...@@ -228,7 +207,8 @@ void PCAMatrix::train (Index::idx_t n, const float *x) ...@@ -228,7 +207,8 @@ void PCAMatrix::train (Index::idx_t n, const float *x)
{ {
const float * x_in = x; const float * x_in = x;
x = maybe_subsample_train_set(&n, x); x = fvecs_maybe_subsample (d_in, (size_t*)&n,
max_points_per_d * d_in, x, verbose);
// compute mean // compute mean
mean.clear(); mean.resize(d_in, 0.0); mean.clear(); mean.resize(d_in, 0.0);
...@@ -461,7 +441,8 @@ OPQMatrix::OPQMatrix (int d, int M, int d2): ...@@ -461,7 +441,8 @@ OPQMatrix::OPQMatrix (int d, int M, int d2):
verbose(false) verbose(false)
{ {
is_trained = false; is_trained = false;
max_points_per_d = 1000; // OPQ is quite expensive to train, so set this right.
max_train_points = 256 * 256;
} }
...@@ -471,7 +452,8 @@ void OPQMatrix::train (Index::idx_t n, const float *x) ...@@ -471,7 +452,8 @@ void OPQMatrix::train (Index::idx_t n, const float *x)
const float * x_in = x; const float * x_in = x;
x = maybe_subsample_train_set (&n, x); x = fvecs_maybe_subsample (d_in, (size_t*)&n,
max_train_points, x, verbose);
// To support d_out > d_in, we pad input vectors with 0s to d_out // To support d_out > d_in, we pad input vectors with 0s to d_out
size_t d = d_out <= d_in ? d_in : d_out; size_t d = d_out <= d_in ? d_in : d_out;
......
...@@ -100,13 +100,8 @@ struct LinearTransform: VectorTransform { ...@@ -100,13 +100,8 @@ struct LinearTransform: VectorTransform {
void transform_transpose (idx_t n, const float * y, void transform_transpose (idx_t n, const float * y,
float *x) const; float *x) const;
// ratio between # training vectors and dimension
size_t max_points_per_d;
bool verbose; bool verbose;
// subsamples training set if there are too many vectors
const float *maybe_subsample_train_set (Index::idx_t *n, const float *x);
virtual ~LinearTransform () {} virtual ~LinearTransform () {}
...@@ -146,6 +141,9 @@ struct PCAMatrix: LinearTransform { ...@@ -146,6 +141,9 @@ struct PCAMatrix: LinearTransform {
/// random rotation after PCA /// random rotation after PCA
bool random_rotation; bool random_rotation;
/// ratio between # training vectors and dimension
size_t max_points_per_d;
/// try to distribute output eigenvectors in this many bins /// try to distribute output eigenvectors in this many bins
int balanced_bins; int balanced_bins;
...@@ -191,8 +189,9 @@ struct OPQMatrix: LinearTransform { ...@@ -191,8 +189,9 @@ struct OPQMatrix: LinearTransform {
int niter; ///< Number of outer training iterations int niter; ///< Number of outer training iterations
int niter_pq; ///< Number of training iterations for the PQ int niter_pq; ///< Number of training iterations for the PQ
int niter_pq_0; ///< same, for the first outer iteration int niter_pq_0; ///< same, for the first outer iteration
/// if there are too many training points, resample /// if there are too many training points, resample
int max_points_per_d; size_t max_train_points;
bool verbose; bool verbose;
/// if d2 != -1, output vectors of this dimension /// if d2 != -1, output vectors of this dimension
......
...@@ -52,7 +52,10 @@ res = faiss.StandardGpuResources() ...@@ -52,7 +52,10 @@ res = faiss.StandardGpuResources()
print "============ Exact search" print "============ Exact search"
index = faiss.GpuIndexFlatL2(res, 0, d, False) flat_config = faiss.GpuIndexFlatConfig()
flat_config.device = 0
index = faiss.GpuIndexFlatL2(res, d, flat_config)
print "add vectors to index" print "add vectors to index"
......
...@@ -55,12 +55,17 @@ def train_kmeans(x, k, ngpu): ...@@ -55,12 +55,17 @@ def train_kmeans(x, k, ngpu):
res = [faiss.StandardGpuResources() for i in range(ngpu)] res = [faiss.StandardGpuResources() for i in range(ngpu)]
useFloat16 = False flat_config = []
for i in range(ngpu):
cfg = faiss.GpuIndexFlatConfig()
cfg.useFloat16 = False
cfg.device = i
flat_config.append(cfg)
if ngpu == 1: if ngpu == 1:
index = faiss.GpuIndexFlatL2(res[0], 0, d, useFloat16) index = faiss.GpuIndexFlatL2(res[0], d, flat_config[0])
else: else:
indexes = [faiss.GpuIndexFlatL2(res[i], i, d, useFloat16) indexes = [faiss.GpuIndexFlatL2(res[i], d, flat_config[i])
for i in range(ngpu)] for i in range(ngpu)]
index = faiss.IndexProxy() index = faiss.IndexProxy()
for sub_index in indexes: for sub_index in indexes:
......
...@@ -65,6 +65,7 @@ GpuClonerOptions::GpuClonerOptions(): ...@@ -65,6 +65,7 @@ GpuClonerOptions::GpuClonerOptions():
useFloat16(false), useFloat16(false),
usePrecomputed(true), usePrecomputed(true),
reserveVecs(0), reserveVecs(0),
storeTransposed(false),
verbose(0) verbose(0)
{} {}
...@@ -79,7 +80,12 @@ struct ToGpuCloner: faiss::Cloner, GpuClonerOptions { ...@@ -79,7 +80,12 @@ struct ToGpuCloner: faiss::Cloner, GpuClonerOptions {
Index *clone_Index(const Index *index) override { Index *clone_Index(const Index *index) override {
if(auto ifl = dynamic_cast<const IndexFlat *>(index)) { if(auto ifl = dynamic_cast<const IndexFlat *>(index)) {
return new GpuIndexFlat(resources, device, useFloat16, ifl); GpuIndexFlatConfig config;
config.device = device;
config.useFloat16 = useFloat16;
config.storeTransposed = storeTransposed;
return new GpuIndexFlat(resources, ifl, config);
} else if(auto ifl = dynamic_cast<const faiss::IndexIVFFlat *>(index)) { } else if(auto ifl = dynamic_cast<const faiss::IndexIVFFlat *>(index)) {
GpuIndexIVFFlat *res = GpuIndexIVFFlat *res =
new GpuIndexIVFFlat(resources, new GpuIndexIVFFlat(resources,
......
...@@ -40,6 +40,8 @@ struct GpuClonerOptions { ...@@ -40,6 +40,8 @@ struct GpuClonerOptions {
bool usePrecomputed; bool usePrecomputed;
/// reserve vectors in the invfiles? /// reserve vectors in the invfiles?
long reserveVecs; long reserveVecs;
/// For GpuIndexFlat, store data in transposed layout?
bool storeTransposed;
int verbose; int verbose;
GpuClonerOptions (); GpuClonerOptions ();
}; };
......
...@@ -31,31 +31,30 @@ constexpr size_t kMinPagedQuerySize = (size_t) 256 * 1024 * 1024; ...@@ -31,31 +31,30 @@ constexpr size_t kMinPagedQuerySize = (size_t) 256 * 1024 * 1024;
constexpr size_t kNonPinnedPageSize = (size_t) 256 * 1024 * 1024; constexpr size_t kNonPinnedPageSize = (size_t) 256 * 1024 * 1024;
GpuIndexFlat::GpuIndexFlat(GpuResources* resources, GpuIndexFlat::GpuIndexFlat(GpuResources* resources,
int device, const faiss::IndexFlat* index,
bool useFloat16, GpuIndexFlatConfig config) :
const faiss::IndexFlat* index) : GpuIndex(resources, config.device, index->d, index->metric_type),
GpuIndex(resources, device, index->d, index->metric_type),
minPagedSize_(kMinPagedQuerySize), minPagedSize_(kMinPagedQuerySize),
useFloat16_(useFloat16), config_(config),
data_(nullptr) { data_(nullptr) {
copyFrom(index); copyFrom(index);
} }
GpuIndexFlat::GpuIndexFlat(GpuResources* resources, GpuIndexFlat::GpuIndexFlat(GpuResources* resources,
int device,
int dims, int dims,
bool useFloat16, faiss::MetricType metric,
faiss::MetricType metric) : GpuIndexFlatConfig config) :
GpuIndex(resources, device, dims, metric), GpuIndex(resources, config.device, dims, metric),
minPagedSize_(kMinPagedQuerySize), minPagedSize_(kMinPagedQuerySize),
useFloat16_(useFloat16), config_(config),
data_(nullptr) { data_(nullptr) {
DeviceScope scope(device_); DeviceScope scope(device_);
data_ = new FlatIndex(resources, data_ = new FlatIndex(resources,
dims, dims,
metric == faiss::METRIC_L2, metric == faiss::METRIC_L2,
useFloat16); config_.useFloat16,
config_.storeTransposed);
} }
GpuIndexFlat::~GpuIndexFlat() { GpuIndexFlat::~GpuIndexFlat() {
...@@ -74,7 +73,7 @@ GpuIndexFlat::getMinPagingSize() const { ...@@ -74,7 +73,7 @@ GpuIndexFlat::getMinPagingSize() const {
bool bool
GpuIndexFlat::getUseFloat16() const { GpuIndexFlat::getUseFloat16() const {
return useFloat16_; return config_.useFloat16;
} }
void void
...@@ -93,7 +92,8 @@ GpuIndexFlat::copyFrom(const faiss::IndexFlat* index) { ...@@ -93,7 +92,8 @@ GpuIndexFlat::copyFrom(const faiss::IndexFlat* index) {
data_ = new FlatIndex(resources_, data_ = new FlatIndex(resources_,
this->d, this->d,
index->metric_type == faiss::METRIC_L2, index->metric_type == faiss::METRIC_L2,
useFloat16_); config_.useFloat16,
config_.storeTransposed);
// The index could be empty // The index could be empty
if (index->ntotal > 0) { if (index->ntotal > 0) {
...@@ -117,7 +117,7 @@ GpuIndexFlat::copyTo(faiss::IndexFlat* index) const { ...@@ -117,7 +117,7 @@ GpuIndexFlat::copyTo(faiss::IndexFlat* index) const {
auto stream = resources_->getDefaultStream(device_); auto stream = resources_->getDefaultStream(device_);
if (this->ntotal > 0) { if (this->ntotal > 0) {
if (useFloat16_) { if (config_.useFloat16) {
auto vecFloat32 = data_->getVectorsFloat32Copy(stream); auto vecFloat32 = data_->getVectorsFloat32Copy(stream);
fromDevice(vecFloat32, index->xb.data(), stream); fromDevice(vecFloat32, index->xb.data(), stream);
} else { } else {
...@@ -444,7 +444,7 @@ GpuIndexFlat::reconstruct(faiss::Index::idx_t key, ...@@ -444,7 +444,7 @@ GpuIndexFlat::reconstruct(faiss::Index::idx_t key,
FAISS_ASSERT(key < this->ntotal); FAISS_ASSERT(key < this->ntotal);
auto stream = resources_->getDefaultStream(device_); auto stream = resources_->getDefaultStream(device_);
if (useFloat16_) { if (config_.useFloat16) {
auto vec = data_->getVectorsFloat32Copy(key, 1, stream); auto vec = data_->getVectorsFloat32Copy(key, 1, stream);
fromDevice(vec.data(), out, this->d, stream); fromDevice(vec.data(), out, this->d, stream);
} else { } else {
...@@ -463,7 +463,7 @@ GpuIndexFlat::reconstruct_n(faiss::Index::idx_t i0, ...@@ -463,7 +463,7 @@ GpuIndexFlat::reconstruct_n(faiss::Index::idx_t i0,
FAISS_ASSERT(i0 + num - 1 < this->ntotal); FAISS_ASSERT(i0 + num - 1 < this->ntotal);
auto stream = resources_->getDefaultStream(device_); auto stream = resources_->getDefaultStream(device_);
if (useFloat16_) { if (config_.useFloat16) {
auto vec = data_->getVectorsFloat32Copy(i0, num, stream); auto vec = data_->getVectorsFloat32Copy(i0, num, stream);
fromDevice(vec.data(), out, num * this->d, stream); fromDevice(vec.data(), out, num * this->d, stream);
} else { } else {
...@@ -486,17 +486,15 @@ GpuIndexFlat::set_typename() { ...@@ -486,17 +486,15 @@ GpuIndexFlat::set_typename() {
// //
GpuIndexFlatL2::GpuIndexFlatL2(GpuResources* resources, GpuIndexFlatL2::GpuIndexFlatL2(GpuResources* resources,
int device, faiss::IndexFlatL2* index,
bool useFloat16, GpuIndexFlatConfig config) :
faiss::IndexFlatL2* index) : GpuIndexFlat(resources, index, config) {
GpuIndexFlat(resources, device, useFloat16, index) {
} }
GpuIndexFlatL2::GpuIndexFlatL2(GpuResources* resources, GpuIndexFlatL2::GpuIndexFlatL2(GpuResources* resources,
int device,
int dims, int dims,
bool useFloat16) : GpuIndexFlatConfig config) :
GpuIndexFlat(resources, device, dims, useFloat16, faiss::METRIC_L2) { GpuIndexFlat(resources, dims, faiss::METRIC_L2, config) {
} }
void void
...@@ -514,18 +512,15 @@ GpuIndexFlatL2::copyTo(faiss::IndexFlatL2* index) { ...@@ -514,18 +512,15 @@ GpuIndexFlatL2::copyTo(faiss::IndexFlatL2* index) {
// //
GpuIndexFlatIP::GpuIndexFlatIP(GpuResources* resources, GpuIndexFlatIP::GpuIndexFlatIP(GpuResources* resources,
int device, faiss::IndexFlatIP* index,
bool useFloat16, GpuIndexFlatConfig config) :
faiss::IndexFlatIP* index) : GpuIndexFlat(resources, index, config) {
GpuIndexFlat(resources, device, useFloat16, index) {
} }
GpuIndexFlatIP::GpuIndexFlatIP(GpuResources* resources, GpuIndexFlatIP::GpuIndexFlatIP(GpuResources* resources,
int device,
int dims, int dims,
bool useFloat16) : GpuIndexFlatConfig config) :
GpuIndexFlat(resources, device, dims, useFloat16, GpuIndexFlat(resources, dims, faiss::METRIC_INNER_PRODUCT, config) {
faiss::METRIC_INNER_PRODUCT) {
} }
void void
......
...@@ -25,6 +25,18 @@ namespace faiss { namespace gpu { ...@@ -25,6 +25,18 @@ namespace faiss { namespace gpu {
struct FlatIndex; struct FlatIndex;
struct GpuIndexFlatConfig {
inline GpuIndexFlatConfig()
: device(0),
useFloat16(false),
storeTransposed(false) {
}
int device;
bool useFloat16;
bool storeTransposed;
};
/// Wrapper around the GPU implementation that looks like /// Wrapper around the GPU implementation that looks like
/// faiss::IndexFlat; copies over centroid data from a given /// faiss::IndexFlat; copies over centroid data from a given
/// faiss::IndexFlat /// faiss::IndexFlat
...@@ -33,16 +45,14 @@ class GpuIndexFlat : public GpuIndex { ...@@ -33,16 +45,14 @@ class GpuIndexFlat : public GpuIndex {
/// Construct from a pre-existing faiss::IndexFlat instance, copying /// Construct from a pre-existing faiss::IndexFlat instance, copying
/// data over to the given GPU /// data over to the given GPU
GpuIndexFlat(GpuResources* resources, GpuIndexFlat(GpuResources* resources,
int device, const faiss::IndexFlat* index,
bool useFloat16, GpuIndexFlatConfig config = GpuIndexFlatConfig());
const faiss::IndexFlat* index);
/// Construct an empty instance that can be added to /// Construct an empty instance that can be added to
GpuIndexFlat(GpuResources* resources, GpuIndexFlat(GpuResources* resources,
int device,
int dims, int dims,
bool useFloat16, faiss::MetricType metric,
faiss::MetricType metric); GpuIndexFlatConfig config = GpuIndexFlatConfig());
~GpuIndexFlat() override; ~GpuIndexFlat() override;
...@@ -118,8 +128,7 @@ class GpuIndexFlat : public GpuIndex { ...@@ -118,8 +128,7 @@ class GpuIndexFlat : public GpuIndex {
/// Size above which we page copies from the CPU to GPU /// Size above which we page copies from the CPU to GPU
size_t minPagedSize_; size_t minPagedSize_;
/// Whether or not we store our vectors in float32 or float16 const GpuIndexFlatConfig config_;
const bool useFloat16_;
/// Holds our GPU data containing the list of vectors /// Holds our GPU data containing the list of vectors
FlatIndex* data_; FlatIndex* data_;
...@@ -133,15 +142,13 @@ class GpuIndexFlatL2 : public GpuIndexFlat { ...@@ -133,15 +142,13 @@ class GpuIndexFlatL2 : public GpuIndexFlat {
/// Construct from a pre-existing faiss::IndexFlatL2 instance, copying /// Construct from a pre-existing faiss::IndexFlatL2 instance, copying
/// data over to the given GPU /// data over to the given GPU
GpuIndexFlatL2(GpuResources* resources, GpuIndexFlatL2(GpuResources* resources,
int device, faiss::IndexFlatL2* index,
bool useFloat16, GpuIndexFlatConfig config = GpuIndexFlatConfig());
faiss::IndexFlatL2* index);
/// Construct an empty instance that can be added to /// Construct an empty instance that can be added to
GpuIndexFlatL2(GpuResources* resources, GpuIndexFlatL2(GpuResources* resources,
int device,
int dims, int dims,
bool useFloat16); GpuIndexFlatConfig config = GpuIndexFlatConfig());
/// Initialize ourselves from the given CPU index; will overwrite /// Initialize ourselves from the given CPU index; will overwrite
/// all data in ourselves /// all data in ourselves
...@@ -160,15 +167,13 @@ class GpuIndexFlatIP : public GpuIndexFlat { ...@@ -160,15 +167,13 @@ class GpuIndexFlatIP : public GpuIndexFlat {
/// Construct from a pre-existing faiss::IndexFlatIP instance, copying /// Construct from a pre-existing faiss::IndexFlatIP instance, copying
/// data over to the given GPU /// data over to the given GPU
GpuIndexFlatIP(GpuResources* resources, GpuIndexFlatIP(GpuResources* resources,
int device, faiss::IndexFlatIP* index,
bool useFloat16, GpuIndexFlatConfig config = GpuIndexFlatConfig());
faiss::IndexFlatIP* index);
/// Construct an empty instance that can be added to /// Construct an empty instance that can be added to
GpuIndexFlatIP(GpuResources* resources, GpuIndexFlatIP(GpuResources* resources,
int device,
int dims, int dims,
bool useFloat16); GpuIndexFlatConfig config = GpuIndexFlatConfig());
/// Initialize ourselves from the given CPU index; will overwrite /// Initialize ourselves from the given CPU index; will overwrite
/// all data in ourselves /// all data in ourselves
......
...@@ -82,14 +82,17 @@ GpuIndexIVF::init_() { ...@@ -82,14 +82,17 @@ GpuIndexIVF::init_() {
if (!quantizer_) { if (!quantizer_) {
// Construct an empty quantizer // Construct an empty quantizer
GpuIndexFlatConfig config;
config.device = device_;
config.useFloat16 = useFloat16CoarseQuantizer_;
config.storeTransposed = false;
if (this->metric_type == faiss::METRIC_L2) { if (this->metric_type == faiss::METRIC_L2) {
// FIXME: 2 different float16 options? // FIXME: 2 different float16 options?
quantizer_ = new GpuIndexFlatL2(resources_, device_, this->d, quantizer_ = new GpuIndexFlatL2(resources_, this->d, config);
useFloat16CoarseQuantizer_);
} else if (this->metric_type == faiss::METRIC_INNER_PRODUCT) { } else if (this->metric_type == faiss::METRIC_INNER_PRODUCT) {
// FIXME: 2 different float16 options? // FIXME: 2 different float16 options?
quantizer_ = new GpuIndexFlatIP(resources_, device_, this->d, quantizer_ = new GpuIndexFlatIP(resources_, this->d, config);
useFloat16CoarseQuantizer_);
} else { } else {
// unknown metric type // unknown metric type
FAISS_ASSERT(false); FAISS_ASSERT(false);
...@@ -131,14 +134,17 @@ GpuIndexIVF::copyFrom(const faiss::IndexIVF* index) { ...@@ -131,14 +134,17 @@ GpuIndexIVF::copyFrom(const faiss::IndexIVF* index) {
delete quantizer_; delete quantizer_;
quantizer_ = nullptr; quantizer_ = nullptr;
GpuIndexFlatConfig config;
config.device = device_;
config.useFloat16 = useFloat16CoarseQuantizer_;
config.storeTransposed = false;
if (index->metric_type == faiss::METRIC_L2) { if (index->metric_type == faiss::METRIC_L2) {
// FIXME: 2 different float16 options? // FIXME: 2 different float16 options?
quantizer_ = new GpuIndexFlatL2(resources_, device_, this->d, quantizer_ = new GpuIndexFlatL2(resources_, this->d, config);
useFloat16CoarseQuantizer_);
} else if (index->metric_type == faiss::METRIC_INNER_PRODUCT) { } else if (index->metric_type == faiss::METRIC_INNER_PRODUCT) {
// FIXME: 2 different float16 options? // FIXME: 2 different float16 options?
quantizer_ = new GpuIndexFlatIP(resources_, device_, this->d, quantizer_ = new GpuIndexFlatIP(resources_, this->d, config);
useFloat16CoarseQuantizer_);
} else { } else {
// unknown metric type // unknown metric type
FAISS_ASSERT(false); FAISS_ASSERT(false);
......
...@@ -92,8 +92,8 @@ IndexProxy::runOnIndex(std::function<void(faiss::Index*)> f) { ...@@ -92,8 +92,8 @@ IndexProxy::runOnIndex(std::function<void(faiss::Index*)> f) {
} }
// Blocking wait for completion // Blocking wait for completion
for (auto& f : v) { for (auto& func : v) {
f.get(); func.get();
} }
} }
...@@ -183,7 +183,8 @@ IndexProxy::set_typename() { ...@@ -183,7 +183,8 @@ IndexProxy::set_typename() {
float kmeans_clustering_gpu (int ngpu, size_t d, size_t n, size_t k, float kmeans_clustering_gpu (int ngpu, size_t d, size_t n, size_t k,
const float *x, const float *x,
float *centroids, float *centroids,
bool useFloat16) bool useFloat16,
bool storeTransposed)
{ {
Clustering clus (d, k); Clustering clus (d, k);
// display logs if > 16Gflop per iteration // display logs if > 16Gflop per iteration
...@@ -194,8 +195,15 @@ float kmeans_clustering_gpu (int ngpu, size_t d, size_t n, size_t k, ...@@ -194,8 +195,15 @@ float kmeans_clustering_gpu (int ngpu, size_t d, size_t n, size_t k,
std::vector<std::unique_ptr<GpuIndexFlatL2> > sub_indices; std::vector<std::unique_ptr<GpuIndexFlatL2> > sub_indices;
for(int dev_no = 0; dev_no < ngpu; dev_no++) { for(int dev_no = 0; dev_no < ngpu; dev_no++) {
res.emplace_back(new StandardGpuResources()); res.emplace_back(new StandardGpuResources());
GpuIndexFlatConfig config;
config.device = dev_no;
config.useFloat16 = useFloat16;
config.storeTransposed = storeTransposed;
sub_indices.emplace_back( sub_indices.emplace_back(
new GpuIndexFlatL2(res.back().get(), dev_no, d, useFloat16)); new GpuIndexFlatL2(res.back().get(), d, config));
} }
IndexProxy proxy; IndexProxy proxy;
......
...@@ -98,7 +98,8 @@ class IndexProxy : public faiss::Index { ...@@ -98,7 +98,8 @@ class IndexProxy : public faiss::Index {
float kmeans_clustering_gpu (int ngpu, size_t d, size_t n, size_t k, float kmeans_clustering_gpu (int ngpu, size_t d, size_t n, size_t k,
const float *x, const float *x,
float *centroids, float *centroids,
bool useFloat16); bool useFloat16,
bool storeTransposed);
......
This diff is collapsed.
...@@ -119,7 +119,8 @@ StandardGpuResources::initializeForDevice(int device) { ...@@ -119,7 +119,8 @@ StandardGpuResources::initializeForDevice(int device) {
auto& prop = getDeviceProperties(device); auto& prop = getDeviceProperties(device);
// Also check to make sure we meet our minimum compute capability (3.5) // Also check to make sure we meet our minimum compute capability (3.5)
FAISS_ASSERT(prop.major > 3 || (prop.major == 3 && prop.minor >= 5)); FAISS_ASSERT(prop.major > 3 || (prop.major == 3 && prop.minor >= 5) ||
!"Device not supported, need 3.5+ compute capability");
// Create streams // Create streams
cudaStream_t defaultStream = 0; cudaStream_t defaultStream = 0;
......
...@@ -33,6 +33,7 @@ constexpr int kDefaultTileSize = 256; ...@@ -33,6 +33,7 @@ constexpr int kDefaultTileSize = 256;
template <typename T> template <typename T>
void runL2Distance(GpuResources* resources, void runL2Distance(GpuResources* resources,
Tensor<T, 2, true>& centroids, Tensor<T, 2, true>& centroids,
Tensor<T, 2, true>* centroidsTransposed,
Tensor<T, 1, true>* centroidNorms, Tensor<T, 1, true>* centroidNorms,
Tensor<T, 2, true>& queries, Tensor<T, 2, true>& queries,
int k, int k,
...@@ -132,7 +133,8 @@ void runL2Distance(GpuResources* resources, ...@@ -132,7 +133,8 @@ void runL2Distance(GpuResources* resources,
// (query id x dim) x (centroid id, dim)' = (query id, centroid id) // (query id x dim) x (centroid id, dim)' = (query id, centroid id)
runMatrixMult(distanceBufView, false, runMatrixMult(distanceBufView, false,
queryView, false, queryView, false,
centroids, true, centroidsTransposed ? *centroidsTransposed : centroids,
centroidsTransposed ? false : true,
-2.0f, 0.0f, -2.0f, 0.0f,
resources->getBlasHandleCurrentDevice(), resources->getBlasHandleCurrentDevice(),
streams[curStream]); streams[curStream]);
...@@ -164,6 +166,7 @@ void runL2Distance(GpuResources* resources, ...@@ -164,6 +166,7 @@ void runL2Distance(GpuResources* resources,
template <typename T> template <typename T>
void runIPDistance(GpuResources* resources, void runIPDistance(GpuResources* resources,
Tensor<T, 2, true>& centroids, Tensor<T, 2, true>& centroids,
Tensor<T, 2, true>* centroidsTransposed,
Tensor<T, 2, true>& queries, Tensor<T, 2, true>& queries,
int k, int k,
Tensor<T, 2, true>& outDistances, Tensor<T, 2, true>& outDistances,
...@@ -236,7 +239,9 @@ void runIPDistance(GpuResources* resources, ...@@ -236,7 +239,9 @@ void runIPDistance(GpuResources* resources,
// (query id x dim) x (centroid id, dim)' = (query id, centroid id) // (query id x dim) x (centroid id, dim)' = (query id, centroid id)
runMatrixMult(distanceBufView, false, runMatrixMult(distanceBufView, false,
queryView, false, centroids, true, queryView, false,
centroidsTransposed ? *centroidsTransposed : centroids,
centroidsTransposed ? false : true,
1.0f, 0.0f, 1.0f, 0.0f,
resources->getBlasHandleCurrentDevice(), resources->getBlasHandleCurrentDevice(),
streams[curStream]); streams[curStream]);
...@@ -261,6 +266,7 @@ void runIPDistance(GpuResources* resources, ...@@ -261,6 +266,7 @@ void runIPDistance(GpuResources* resources,
void void
runIPDistance(GpuResources* resources, runIPDistance(GpuResources* resources,
Tensor<float, 2, true>& vectors, Tensor<float, 2, true>& vectors,
Tensor<float, 2, true>* vectorsTransposed,
Tensor<float, 2, true>& queries, Tensor<float, 2, true>& queries,
int k, int k,
Tensor<float, 2, true>& outDistances, Tensor<float, 2, true>& outDistances,
...@@ -268,6 +274,7 @@ runIPDistance(GpuResources* resources, ...@@ -268,6 +274,7 @@ runIPDistance(GpuResources* resources,
int tileSize) { int tileSize) {
runIPDistance<float>(resources, runIPDistance<float>(resources,
vectors, vectors,
vectorsTransposed,
queries, queries,
k, k,
outDistances, outDistances,
...@@ -279,6 +286,7 @@ runIPDistance(GpuResources* resources, ...@@ -279,6 +286,7 @@ runIPDistance(GpuResources* resources,
void void
runIPDistance(GpuResources* resources, runIPDistance(GpuResources* resources,
Tensor<half, 2, true>& vectors, Tensor<half, 2, true>& vectors,
Tensor<half, 2, true>* vectorsTransposed,
Tensor<half, 2, true>& queries, Tensor<half, 2, true>& queries,
int k, int k,
Tensor<half, 2, true>& outDistances, Tensor<half, 2, true>& outDistances,
...@@ -286,6 +294,7 @@ runIPDistance(GpuResources* resources, ...@@ -286,6 +294,7 @@ runIPDistance(GpuResources* resources,
int tileSize) { int tileSize) {
runIPDistance<half>(resources, runIPDistance<half>(resources,
vectors, vectors,
vectorsTransposed,
queries, queries,
k, k,
outDistances, outDistances,
...@@ -297,6 +306,7 @@ runIPDistance(GpuResources* resources, ...@@ -297,6 +306,7 @@ runIPDistance(GpuResources* resources,
void void
runL2Distance(GpuResources* resources, runL2Distance(GpuResources* resources,
Tensor<float, 2, true>& vectors, Tensor<float, 2, true>& vectors,
Tensor<float, 2, true>* vectorsTransposed,
Tensor<float, 1, true>* vectorNorms, Tensor<float, 1, true>* vectorNorms,
Tensor<float, 2, true>& queries, Tensor<float, 2, true>& queries,
int k, int k,
...@@ -306,6 +316,7 @@ runL2Distance(GpuResources* resources, ...@@ -306,6 +316,7 @@ runL2Distance(GpuResources* resources,
int tileSize) { int tileSize) {
runL2Distance<float>(resources, runL2Distance<float>(resources,
vectors, vectors,
vectorsTransposed,
vectorNorms, vectorNorms,
queries, queries,
k, k,
...@@ -319,6 +330,7 @@ runL2Distance(GpuResources* resources, ...@@ -319,6 +330,7 @@ runL2Distance(GpuResources* resources,
void void
runL2Distance(GpuResources* resources, runL2Distance(GpuResources* resources,
Tensor<half, 2, true>& vectors, Tensor<half, 2, true>& vectors,
Tensor<half, 2, true>* vectorsTransposed,
Tensor<half, 1, true>* vectorNorms, Tensor<half, 1, true>* vectorNorms,
Tensor<half, 2, true>& queries, Tensor<half, 2, true>& queries,
int k, int k,
...@@ -328,6 +340,7 @@ runL2Distance(GpuResources* resources, ...@@ -328,6 +340,7 @@ runL2Distance(GpuResources* resources,
int tileSize) { int tileSize) {
runL2Distance<half>(resources, runL2Distance<half>(resources,
vectors, vectors,
vectorsTransposed,
vectorNorms, vectorNorms,
queries, queries,
k, k,
......
...@@ -22,6 +22,7 @@ class GpuResources; ...@@ -22,6 +22,7 @@ class GpuResources;
/// `queries`, returning the k closest results seen /// `queries`, returning the k closest results seen
void runL2Distance(GpuResources* resources, void runL2Distance(GpuResources* resources,
Tensor<float, 2, true>& vectors, Tensor<float, 2, true>& vectors,
Tensor<float, 2, true>* vectorsTransposed,
// can be optionally pre-computed; nullptr if we // can be optionally pre-computed; nullptr if we
// have to compute it upon the call // have to compute it upon the call
Tensor<float, 1, true>* vectorNorms, Tensor<float, 1, true>* vectorNorms,
...@@ -41,6 +42,7 @@ void runL2Distance(GpuResources* resources, ...@@ -41,6 +42,7 @@ void runL2Distance(GpuResources* resources,
/// and `queries`, returning the k closest results seen /// and `queries`, returning the k closest results seen
void runIPDistance(GpuResources* resources, void runIPDistance(GpuResources* resources,
Tensor<float, 2, true>& vectors, Tensor<float, 2, true>& vectors,
Tensor<float, 2, true>* vectorsTransposed,
Tensor<float, 2, true>& queries, Tensor<float, 2, true>& queries,
int k, int k,
Tensor<float, 2, true>& outDistances, Tensor<float, 2, true>& outDistances,
...@@ -53,6 +55,7 @@ void runIPDistance(GpuResources* resources, ...@@ -53,6 +55,7 @@ void runIPDistance(GpuResources* resources,
#ifdef FAISS_USE_FLOAT16 #ifdef FAISS_USE_FLOAT16
void runIPDistance(GpuResources* resources, void runIPDistance(GpuResources* resources,
Tensor<half, 2, true>& vectors, Tensor<half, 2, true>& vectors,
Tensor<half, 2, true>* vectorsTransposed,
Tensor<half, 2, true>& queries, Tensor<half, 2, true>& queries,
int k, int k,
Tensor<half, 2, true>& outDistances, Tensor<half, 2, true>& outDistances,
...@@ -61,6 +64,7 @@ void runIPDistance(GpuResources* resources, ...@@ -61,6 +64,7 @@ void runIPDistance(GpuResources* resources,
void runL2Distance(GpuResources* resources, void runL2Distance(GpuResources* resources,
Tensor<half, 2, true>& vectors, Tensor<half, 2, true>& vectors,
Tensor<half, 2, true>* vectorsTransposed,
Tensor<half, 1, true>* vectorNorms, Tensor<half, 1, true>* vectorNorms,
Tensor<half, 2, true>& queries, Tensor<half, 2, true>& queries,
int k, int k,
......
...@@ -14,16 +14,19 @@ ...@@ -14,16 +14,19 @@
#include "L2Norm.cuh" #include "L2Norm.cuh"
#include "../utils/CopyUtils.cuh" #include "../utils/CopyUtils.cuh"
#include "../utils/DeviceUtils.h" #include "../utils/DeviceUtils.h"
#include "../utils/Transpose.cuh"
namespace faiss { namespace gpu { namespace faiss { namespace gpu {
FlatIndex::FlatIndex(GpuResources* res, FlatIndex::FlatIndex(GpuResources* res,
int dim, int dim,
bool l2Distance, bool l2Distance,
bool useFloat16) : bool useFloat16,
bool storeTransposed) :
resources_(res), resources_(res),
dim_(dim), dim_(dim),
useFloat16_(useFloat16), useFloat16_(useFloat16),
storeTransposed_(storeTransposed),
l2Distance_(l2Distance), l2Distance_(l2Distance),
num_(0) { num_(0) {
#ifndef FAISS_USE_FLOAT16 #ifndef FAISS_USE_FLOAT16
...@@ -92,7 +95,7 @@ FlatIndex::getVectorsFloat32Copy(int from, int num, cudaStream_t stream) { ...@@ -92,7 +95,7 @@ FlatIndex::getVectorsFloat32Copy(int from, int num, cudaStream_t stream) {
} }
void void
FlatIndex::query(Tensor<float, 2, true>& vecs, FlatIndex::query(Tensor<float, 2, true>& input,
int k, int k,
Tensor<float, 2, true>& outDistances, Tensor<float, 2, true>& outDistances,
Tensor<int, 2, true>& outIndices, Tensor<int, 2, true>& outIndices,
...@@ -104,12 +107,12 @@ FlatIndex::query(Tensor<float, 2, true>& vecs, ...@@ -104,12 +107,12 @@ FlatIndex::query(Tensor<float, 2, true>& vecs,
if (useFloat16_) { if (useFloat16_) {
// We need to convert to float16 // We need to convert to float16
#ifdef FAISS_USE_FLOAT16 #ifdef FAISS_USE_FLOAT16
auto vecsHalf = toHalf<2>(resources_, stream, vecs); auto inputHalf = toHalf<2>(resources_, stream, input);
DeviceTensor<half, 2, true> outDistancesHalf( DeviceTensor<half, 2, true> outDistancesHalf(
mem, {outDistances.getSize(0), outDistances.getSize(1)}, stream); mem, {outDistances.getSize(0), outDistances.getSize(1)}, stream);
query(vecsHalf, k, outDistancesHalf, outIndices, exactDistance, tileSize); query(inputHalf, k, outDistancesHalf, outIndices, exactDistance, tileSize);
if (exactDistance) { if (exactDistance) {
// Convert outDistances back // Convert outDistances back
...@@ -120,8 +123,9 @@ FlatIndex::query(Tensor<float, 2, true>& vecs, ...@@ -120,8 +123,9 @@ FlatIndex::query(Tensor<float, 2, true>& vecs,
if (l2Distance_) { if (l2Distance_) {
runL2Distance(resources_, runL2Distance(resources_,
vectors_, vectors_,
storeTransposed_ ? &vectorsTransposed_ : nullptr,
&norms_, &norms_,
vecs, input,
k, k,
outDistances, outDistances,
outIndices, outIndices,
...@@ -131,7 +135,8 @@ FlatIndex::query(Tensor<float, 2, true>& vecs, ...@@ -131,7 +135,8 @@ FlatIndex::query(Tensor<float, 2, true>& vecs,
} else { } else {
runIPDistance(resources_, runIPDistance(resources_,
vectors_, vectors_,
vecs, storeTransposed_ ? &vectorsTransposed_ : nullptr,
input,
k, k,
outDistances, outDistances,
outIndices, outIndices,
...@@ -142,7 +147,7 @@ FlatIndex::query(Tensor<float, 2, true>& vecs, ...@@ -142,7 +147,7 @@ FlatIndex::query(Tensor<float, 2, true>& vecs,
#ifdef FAISS_USE_FLOAT16 #ifdef FAISS_USE_FLOAT16
void void
FlatIndex::query(Tensor<half, 2, true>& vecs, FlatIndex::query(Tensor<half, 2, true>& input,
int k, int k,
Tensor<half, 2, true>& outDistances, Tensor<half, 2, true>& outDistances,
Tensor<int, 2, true>& outIndices, Tensor<int, 2, true>& outIndices,
...@@ -153,8 +158,9 @@ FlatIndex::query(Tensor<half, 2, true>& vecs, ...@@ -153,8 +158,9 @@ FlatIndex::query(Tensor<half, 2, true>& vecs,
if (l2Distance_) { if (l2Distance_) {
runL2Distance(resources_, runL2Distance(resources_,
vectorsHalf_, vectorsHalf_,
storeTransposed_ ? &vectorsHalfTransposed_ : nullptr,
&normsHalf_, &normsHalf_,
vecs, input,
k, k,
outDistances, outDistances,
outIndices, outIndices,
...@@ -164,7 +170,8 @@ FlatIndex::query(Tensor<half, 2, true>& vecs, ...@@ -164,7 +170,8 @@ FlatIndex::query(Tensor<half, 2, true>& vecs,
} else { } else {
runIPDistance(resources_, runIPDistance(resources_,
vectorsHalf_, vectorsHalf_,
vecs, storeTransposed_ ? &vectorsHalfTransposed_ : nullptr,
input,
k, k,
outDistances, outDistances,
outIndices, outIndices,
...@@ -215,6 +222,20 @@ FlatIndex::add(const float* data, int numVecs, cudaStream_t stream) { ...@@ -215,6 +222,20 @@ FlatIndex::add(const float* data, int numVecs, cudaStream_t stream) {
vectors_ = std::move(vectors); vectors_ = std::move(vectors);
} }
if (storeTransposed_) {
if (useFloat16_) {
#ifdef FAISS_USE_FLOAT16
vectorsHalfTransposed_ =
std::move(DeviceTensor<half, 2, true>({dim_, (int) num_}));
runTransposeAny(vectorsHalf_, 0, 1, vectorsHalfTransposed_, stream);
#endif
} else {
vectorsTransposed_ =
std::move(DeviceTensor<float, 2, true>({dim_, (int) num_}));
runTransposeAny(vectors_, 0, 1, vectorsTransposed_, stream);
}
}
if (l2Distance_) { if (l2Distance_) {
// Precompute L2 norms of our database // Precompute L2 norms of our database
if (useFloat16_) { if (useFloat16_) {
......
...@@ -25,7 +25,8 @@ class FlatIndex { ...@@ -25,7 +25,8 @@ class FlatIndex {
FlatIndex(GpuResources* res, FlatIndex(GpuResources* res,
int dim, int dim,
bool l2Distance, bool l2Distance,
bool useFloat16); bool useFloat16,
bool storeTransposed);
bool getUseFloat16() const; bool getUseFloat16() const;
...@@ -84,6 +85,10 @@ class FlatIndex { ...@@ -84,6 +85,10 @@ class FlatIndex {
/// Float16 data format /// Float16 data format
const bool useFloat16_; const bool useFloat16_;
/// Store vectors in transposed layout for speed; makes addition to
/// the index slower
const bool storeTransposed_;
/// L2 or inner product distance? /// L2 or inner product distance?
bool l2Distance_; bool l2Distance_;
...@@ -95,10 +100,12 @@ class FlatIndex { ...@@ -95,10 +100,12 @@ class FlatIndex {
/// Vectors currently in rawData_ /// Vectors currently in rawData_
DeviceTensor<float, 2, true> vectors_; DeviceTensor<float, 2, true> vectors_;
DeviceTensor<float, 2, true> vectorsTransposed_;
#ifdef FAISS_USE_FLOAT16 #ifdef FAISS_USE_FLOAT16
/// Vectors currently in rawData_, float16 form /// Vectors currently in rawData_, float16 form
DeviceTensor<half, 2, true> vectorsHalf_; DeviceTensor<half, 2, true> vectorsHalf_;
DeviceTensor<half, 2, true> vectorsHalfTransposed_;
#endif #endif
/// Precomputed L2 norms /// Precomputed L2 norms
......
...@@ -187,6 +187,7 @@ IVFPQ::classifyAndAddVectors(Tensor<float, 2, true>& vecs, ...@@ -187,6 +187,7 @@ IVFPQ::classifyAndAddVectors(Tensor<float, 2, true>& vecs,
runL2Distance(resources_, runL2Distance(resources_,
pqCentroidsMiddleCodeView, pqCentroidsMiddleCodeView,
nullptr, // no transposed storage
nullptr, // no precomputed norms nullptr, // no precomputed norms
residualsTransposeView, residualsTransposeView,
1, 1,
......
...@@ -31,6 +31,7 @@ DEFINE_int32(dim, 128, "# of dimensions"); ...@@ -31,6 +31,7 @@ DEFINE_int32(dim, 128, "# of dimensions");
DEFINE_int32(num_queries, 3, "number of query vectors"); DEFINE_int32(num_queries, 3, "number of query vectors");
DEFINE_bool(diff, true, "show exact distance + index output discrepancies"); DEFINE_bool(diff, true, "show exact distance + index output discrepancies");
DEFINE_bool(use_float16, false, "use encodings in float16 instead of float32"); DEFINE_bool(use_float16, false, "use encodings in float16 instead of float32");
DEFINE_bool(transposed, false, "store vectors transposed");
DEFINE_int64(seed, -1, "specify random seed"); DEFINE_int64(seed, -1, "specify random seed");
DEFINE_int32(num_gpus, 1, "number of gpus to use"); DEFINE_int32(num_gpus, 1, "number of gpus to use");
DEFINE_int64(pinned_mem, 0, "pinned memory allocation to use"); DEFINE_int64(pinned_mem, 0, "pinned memory allocation to use");
...@@ -38,7 +39,7 @@ DEFINE_int64(pinned_mem, 0, "pinned memory allocation to use"); ...@@ -38,7 +39,7 @@ DEFINE_int64(pinned_mem, 0, "pinned memory allocation to use");
using namespace faiss::gpu; using namespace faiss::gpu;
int main(int argc, char** argv) { int main(int argc, char** argv) {
google::ParseCommandLineFlags(&argc, &argv, true); gflags::ParseCommandLineFlags(&argc, &argv, true);
cudaProfilerStop(); cudaProfilerStop();
...@@ -59,6 +60,7 @@ int main(int argc, char** argv) { ...@@ -59,6 +60,7 @@ int main(int argc, char** argv) {
printf("L2 lookup: %d queries, total k %d\n", printf("L2 lookup: %d queries, total k %d\n",
numQueries, FLAGS_k); numQueries, FLAGS_k);
printf("float16 encoding %s\n", FLAGS_use_float16 ? "enabled" : "disabled"); printf("float16 encoding %s\n", FLAGS_use_float16 ? "enabled" : "disabled");
printf("transposed storage %s\n", FLAGS_transposed ? "enabled" : "disabled");
// Convert to GPU index // Convert to GPU index
printf("Copying index to %d GPU(s)...\n", FLAGS_num_gpus); printf("Copying index to %d GPU(s)...\n", FLAGS_num_gpus);
...@@ -68,8 +70,13 @@ int main(int argc, char** argv) { ...@@ -68,8 +70,13 @@ int main(int argc, char** argv) {
((faiss::gpu::StandardGpuResources*) res)->setPinnedMemory( ((faiss::gpu::StandardGpuResources*) res)->setPinnedMemory(
FLAGS_pinned_mem); FLAGS_pinned_mem);
GpuIndexFlatConfig config;
config.device = dev;
config.useFloat16 = FLAGS_use_float16;
config.storeTransposed = FLAGS_transposed;
auto p = std::unique_ptr<faiss::gpu::GpuIndexFlatL2>( auto p = std::unique_ptr<faiss::gpu::GpuIndexFlatL2>(
new faiss::gpu::GpuIndexFlatL2(res, dev, FLAGS_use_float16, index.get())); new faiss::gpu::GpuIndexFlatL2(res, index.get(), config));
return p; return p;
}; };
......
...@@ -41,7 +41,7 @@ DEFINE_int32(index, 2, "0 = no indices on GPU; 1 = 32 bit, 2 = 64 bit on GPU"); ...@@ -41,7 +41,7 @@ DEFINE_int32(index, 2, "0 = no indices on GPU; 1 = 32 bit, 2 = 64 bit on GPU");
using namespace faiss::gpu; using namespace faiss::gpu;
int main(int argc, char** argv) { int main(int argc, char** argv) {
google::ParseCommandLineFlags(&argc, &argv, true); gflags::ParseCommandLineFlags(&argc, &argv, true);
cudaProfilerStop(); cudaProfilerStop();
......
...@@ -41,7 +41,7 @@ DEFINE_int32(index, 2, "0 = no indices on GPU; 1 = 32 bit, 2 = 64 bit on GPU"); ...@@ -41,7 +41,7 @@ DEFINE_int32(index, 2, "0 = no indices on GPU; 1 = 32 bit, 2 = 64 bit on GPU");
using namespace faiss::gpu; using namespace faiss::gpu;
int main(int argc, char** argv) { int main(int argc, char** argv) {
google::ParseCommandLineFlags(&argc, &argv, true); gflags::ParseCommandLineFlags(&argc, &argv, true);
CUDA_VERIFY(cudaProfilerStop()); CUDA_VERIFY(cudaProfilerStop());
......
...@@ -34,7 +34,7 @@ DEFINE_int32(index, 2, "0 = no indices on GPU; 1 = 32 bit, 2 = 64 bit on GPU"); ...@@ -34,7 +34,7 @@ DEFINE_int32(index, 2, "0 = no indices on GPU; 1 = 32 bit, 2 = 64 bit on GPU");
using namespace faiss::gpu; using namespace faiss::gpu;
int main(int argc, char** argv) { int main(int argc, char** argv) {
google::ParseCommandLineFlags(&argc, &argv, true); gflags::ParseCommandLineFlags(&argc, &argv, true);
auto seed = time(nullptr); auto seed = time(nullptr);
auto k = FLAGS_k; auto k = FLAGS_k;
......
...@@ -28,6 +28,7 @@ DEFINE_int32(dim, 128, "# of dimensions"); ...@@ -28,6 +28,7 @@ DEFINE_int32(dim, 128, "# of dimensions");
DEFINE_int32(niter, 10, "# of iterations"); DEFINE_int32(niter, 10, "# of iterations");
DEFINE_bool(L2_metric, true, "If true, use L2 metric. If false, use IP metric"); DEFINE_bool(L2_metric, true, "If true, use L2 metric. If false, use IP metric");
DEFINE_bool(use_float16, false, "use float16 vectors and math"); DEFINE_bool(use_float16, false, "use float16 vectors and math");
DEFINE_bool(transposed, false, "transposed vector storage");
DEFINE_bool(verbose, false, "turn on clustering logging"); DEFINE_bool(verbose, false, "turn on clustering logging");
DEFINE_int64(seed, -1, "specify random seed"); DEFINE_int64(seed, -1, "specify random seed");
DEFINE_int32(num_gpus, 1, "number of gpus to use"); DEFINE_int32(num_gpus, 1, "number of gpus to use");
...@@ -38,7 +39,7 @@ DEFINE_int32(max_points, -1, "max points per centroid"); ...@@ -38,7 +39,7 @@ DEFINE_int32(max_points, -1, "max points per centroid");
using namespace faiss::gpu; using namespace faiss::gpu;
int main(int argc, char** argv) { int main(int argc, char** argv) {
google::ParseCommandLineFlags(&argc, &argv, true); gflags::ParseCommandLineFlags(&argc, &argv, true);
cudaProfilerStop(); cudaProfilerStop();
...@@ -52,6 +53,7 @@ int main(int argc, char** argv) { ...@@ -52,6 +53,7 @@ int main(int argc, char** argv) {
FLAGS_L2_metric ? "L2" : "IP", FLAGS_L2_metric ? "L2" : "IP",
FLAGS_dim, FLAGS_k, FLAGS_num, FLAGS_niter); FLAGS_dim, FLAGS_k, FLAGS_num, FLAGS_niter);
printf("float16 math %s\n", FLAGS_use_float16 ? "enabled" : "disabled"); printf("float16 math %s\n", FLAGS_use_float16 ? "enabled" : "disabled");
printf("transposed storage %s\n", FLAGS_transposed ? "enabled" : "disabled");
printf("verbose %s\n", FLAGS_verbose ? "enabled" : "disabled"); printf("verbose %s\n", FLAGS_verbose ? "enabled" : "disabled");
auto initFn = [](faiss::gpu::GpuResources* res, int dev) -> auto initFn = [](faiss::gpu::GpuResources* res, int dev) ->
...@@ -61,12 +63,17 @@ int main(int argc, char** argv) { ...@@ -61,12 +63,17 @@ int main(int argc, char** argv) {
FLAGS_pinned_mem); FLAGS_pinned_mem);
} }
GpuIndexFlatConfig config;
config.device = dev;
config.useFloat16 = FLAGS_use_float16;
config.storeTransposed = FLAGS_transposed;
auto p = std::unique_ptr<faiss::gpu::GpuIndexFlat>( auto p = std::unique_ptr<faiss::gpu::GpuIndexFlat>(
FLAGS_L2_metric ? FLAGS_L2_metric ?
(faiss::gpu::GpuIndexFlat*) (faiss::gpu::GpuIndexFlat*)
new faiss::gpu::GpuIndexFlatL2(res, dev, FLAGS_dim, FLAGS_use_float16) : new faiss::gpu::GpuIndexFlatL2(res, FLAGS_dim, config) :
(faiss::gpu::GpuIndexFlat*) (faiss::gpu::GpuIndexFlat*)
new faiss::gpu::GpuIndexFlatIP(res, dev, FLAGS_dim, FLAGS_use_float16)); new faiss::gpu::GpuIndexFlatIP(res, FLAGS_dim, config));
if (FLAGS_min_paging_size >= 0) { if (FLAGS_min_paging_size >= 0) {
p->setMinPagingSize(FLAGS_min_paging_size); p->setMinPagingSize(FLAGS_min_paging_size);
......
...@@ -35,7 +35,7 @@ DEFINE_bool(per_batch_time, false, "print per-batch times"); ...@@ -35,7 +35,7 @@ DEFINE_bool(per_batch_time, false, "print per-batch times");
DEFINE_bool(reserve_memory, false, "whether or not to pre-reserve memory"); DEFINE_bool(reserve_memory, false, "whether or not to pre-reserve memory");
int main(int argc, char** argv) { int main(int argc, char** argv) {
google::ParseCommandLineFlags(&argc, &argv, true); gflags::ParseCommandLineFlags(&argc, &argv, true);
cudaProfilerStop(); cudaProfilerStop();
......
...@@ -52,7 +52,7 @@ void fillAndSave(T& index, int numTrain, int num, int dim) { ...@@ -52,7 +52,7 @@ void fillAndSave(T& index, int numTrain, int num, int dim) {
} }
int main(int argc, char** argv) { int main(int argc, char** argv) {
google::ParseCommandLineFlags(&argc, &argv, true); gflags::ParseCommandLineFlags(&argc, &argv, true);
// Either ivfpq or ivfflat must be set // Either ivfpq or ivfflat must be set
if ((FLAGS_ivfpq && FLAGS_ivfflat) || if ((FLAGS_ivfpq && FLAGS_ivfflat) ||
......
...@@ -22,7 +22,10 @@ ...@@ -22,7 +22,10 @@
constexpr float kF16MaxRelErr = 0.07f; constexpr float kF16MaxRelErr = 0.07f;
constexpr float kF32MaxRelErr = 6e-3f; constexpr float kF32MaxRelErr = 6e-3f;
void testFlat(bool useL2, bool useFloat16, int kOverride = -1) { void testFlat(bool useL2,
bool useFloat16,
bool useTransposed,
int kOverride = -1) {
int numVecs = faiss::gpu::randVal(1000, 20000); int numVecs = faiss::gpu::randVal(1000, 20000);
int dim = faiss::gpu::randVal(50, 800); int dim = faiss::gpu::randVal(50, 800);
int numQuery = faiss::gpu::randVal(1, 512); int numQuery = faiss::gpu::randVal(1, 512);
...@@ -49,8 +52,15 @@ void testFlat(bool useL2, bool useFloat16, int kOverride = -1) { ...@@ -49,8 +52,15 @@ void testFlat(bool useL2, bool useFloat16, int kOverride = -1) {
faiss::gpu::StandardGpuResources res; faiss::gpu::StandardGpuResources res;
res.noTempMemory(); res.noTempMemory();
faiss::gpu::GpuIndexFlatIP gpuIndexIP(&res, device, dim, useFloat16);
faiss::gpu::GpuIndexFlatL2 gpuIndexL2(&res, device, dim, useFloat16);
faiss::gpu::GpuIndexFlatConfig config;
config.device = device;
config.useFloat16 = useFloat16;
config.storeTransposed = useTransposed;
faiss::gpu::GpuIndexFlatIP gpuIndexIP(&res, dim, config);
faiss::gpu::GpuIndexFlatL2 gpuIndexL2(&res, dim, config);
faiss::gpu::GpuIndexFlat* gpuIndex = faiss::gpu::GpuIndexFlat* gpuIndex =
useL2 ? (faiss::gpu::GpuIndexFlat*) &gpuIndexL2 : useL2 ? (faiss::gpu::GpuIndexFlat*) &gpuIndexL2 :
...@@ -64,6 +74,7 @@ void testFlat(bool useL2, bool useFloat16, int kOverride = -1) { ...@@ -64,6 +74,7 @@ void testFlat(bool useL2, bool useFloat16, int kOverride = -1) {
str << (useL2 ? "L2" : "IP") << " numVecs " << numVecs str << (useL2 ? "L2" : "IP") << " numVecs " << numVecs
<< " dim " << dim << " dim " << dim
<< " useFloat16 " << useFloat16 << " useFloat16 " << useFloat16
<< " transposed " << useTransposed
<< " numQuery " << numQuery << " numQuery " << numQuery
<< " k " << k; << " k " << k;
...@@ -79,16 +90,18 @@ void testFlat(bool useL2, bool useFloat16, int kOverride = -1) { ...@@ -79,16 +90,18 @@ void testFlat(bool useL2, bool useFloat16, int kOverride = -1) {
} }
TEST(TestGpuIndexFlat, IP_Float32) { TEST(TestGpuIndexFlat, IP_Float32) {
for (int tries = 0; tries < 10; ++tries) { for (int tries = 0; tries < 5; ++tries) {
faiss::gpu::newTestSeed(); faiss::gpu::newTestSeed();
testFlat(false, false); testFlat(false, false, false);
testFlat(false, false, true);
} }
} }
TEST(TestGpuIndexFlat, L2_Float32) { TEST(TestGpuIndexFlat, L2_Float32) {
for (int tries = 0; tries < 10; ++tries) { for (int tries = 0; tries < 5; ++tries) {
faiss::gpu::newTestSeed(); faiss::gpu::newTestSeed();
testFlat(true, false); testFlat(true, false, false);
testFlat(true, false, true);
} }
} }
...@@ -96,21 +109,24 @@ TEST(TestGpuIndexFlat, L2_Float32) { ...@@ -96,21 +109,24 @@ TEST(TestGpuIndexFlat, L2_Float32) {
TEST(TestGpuIndexFlat, L2_Float32_K1) { TEST(TestGpuIndexFlat, L2_Float32_K1) {
for (int tries = 0; tries < 5; ++tries) { for (int tries = 0; tries < 5; ++tries) {
faiss::gpu::newTestSeed(); faiss::gpu::newTestSeed();
testFlat(true, false, 1); testFlat(true, false, false, 1);
testFlat(true, false, true, 1);
} }
} }
TEST(TestGpuIndexFlat, IP_Float16) { TEST(TestGpuIndexFlat, IP_Float16) {
for (int tries = 0; tries < 10; ++tries) { for (int tries = 0; tries < 5; ++tries) {
faiss::gpu::newTestSeed(); faiss::gpu::newTestSeed();
testFlat(false, true); testFlat(false, true, false);
testFlat(false, true, false);
} }
} }
TEST(TestGpuIndexFlat, L2_Float16) { TEST(TestGpuIndexFlat, L2_Float16) {
for (int tries = 0; tries < 10; ++tries) { for (int tries = 0; tries < 5; ++tries) {
faiss::gpu::newTestSeed(); faiss::gpu::newTestSeed();
testFlat(true, true); testFlat(true, true, false);
testFlat(true, true, true);
} }
} }
...@@ -118,7 +134,8 @@ TEST(TestGpuIndexFlat, L2_Float16) { ...@@ -118,7 +134,8 @@ TEST(TestGpuIndexFlat, L2_Float16) {
TEST(TestGpuIndexFlat, L2_Float16_K1) { TEST(TestGpuIndexFlat, L2_Float16_K1) {
for (int tries = 0; tries < 5; ++tries) { for (int tries = 0; tries < 5; ++tries) {
faiss::gpu::newTestSeed(); faiss::gpu::newTestSeed();
testFlat(true, true, 1); testFlat(true, true, false, 1);
testFlat(true, true, true, 1);
} }
} }
...@@ -126,8 +143,13 @@ TEST(TestGpuIndexFlat, QueryEmpty) { ...@@ -126,8 +143,13 @@ TEST(TestGpuIndexFlat, QueryEmpty) {
faiss::gpu::StandardGpuResources res; faiss::gpu::StandardGpuResources res;
res.noTempMemory(); res.noTempMemory();
faiss::gpu::GpuIndexFlatConfig config;
config.device = 0;
config.useFloat16 = false;
config.storeTransposed = false;
int dim = 128; int dim = 128;
faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, 0, dim, false); faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);
// Querying an empty index should not blow up, and just return // Querying an empty index should not blow up, and just return
// (FLT_MAX, -1) // (FLT_MAX, -1)
...@@ -165,7 +187,13 @@ TEST(TestGpuIndexFlat, CopyFrom) { ...@@ -165,7 +187,13 @@ TEST(TestGpuIndexFlat, CopyFrom) {
// Fill with garbage values // Fill with garbage values
int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1); int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, device, 2000, false);
faiss::gpu::GpuIndexFlatConfig config;
config.device = 0;
config.useFloat16 = false;
config.storeTransposed = false;
faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, 2000, config);
gpuIndex.copyFrom(&cpuIndex); gpuIndex.copyFrom(&cpuIndex);
EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal); EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
...@@ -195,7 +223,13 @@ TEST(TestGpuIndexFlat, CopyTo) { ...@@ -195,7 +223,13 @@ TEST(TestGpuIndexFlat, CopyTo) {
int dim = faiss::gpu::randVal(1, 1000); int dim = faiss::gpu::randVal(1, 1000);
int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1); int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, device, dim, false);
faiss::gpu::GpuIndexFlatConfig config;
config.device = device;
config.useFloat16 = false;
config.storeTransposed = false;
faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);
std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim); std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
gpuIndex.add(numVecs, vecs.data()); gpuIndex.add(numVecs, vecs.data());
......
...@@ -45,7 +45,10 @@ class EvalIVFPQAccuracy(testutil.BaseFacebookTestCase): ...@@ -45,7 +45,10 @@ class EvalIVFPQAccuracy(testutil.BaseFacebookTestCase):
res = faiss.StandardGpuResources() res = faiss.StandardGpuResources()
gt_index = faiss.GpuIndexFlatL2(res, dev_no, d, False) flat_config = faiss.GpuIndexFlatConfig()
flat_config.device = dev_no
gt_index = faiss.GpuIndexFlatL2(res, d, flat_config)
gt_index.add(xb) gt_index.add(xb)
D, gt_nns = gt_index.search(xq, 1) D, gt_nns = gt_index.search(xq, 1)
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include "../../FaissAssert.h" #include "../../FaissAssert.h"
#include "DeviceUtils.h" #include "DeviceUtils.h"
#include <limits>
namespace faiss { namespace gpu { namespace faiss { namespace gpu {
...@@ -279,6 +280,58 @@ Tensor<T, Dim, Contig, IndexT, PtrTraits>::canCastResize() const { ...@@ -279,6 +280,58 @@ Tensor<T, Dim, Contig, IndexT, PtrTraits>::canCastResize() const {
return true; return true;
} }
template <typename T, int Dim, bool Contig,
typename IndexT, template <typename U> class PtrTraits>
template <typename NewIndexT>
__host__ Tensor<T, Dim, Contig, NewIndexT, PtrTraits>
Tensor<T, Dim, Contig, IndexT, PtrTraits>::castIndexType() const {
if (sizeof(NewIndexT) < sizeof(IndexT)) {
assert(this->canCastIndexType<NewIndexT>());
}
NewIndexT newSize[Dim];
NewIndexT newStride[Dim];
for (int i = 0; i < Dim; ++i) {
newSize[i] = (NewIndexT) size_[i];
newStride[i] = (NewIndexT) stride_[i];
}
return Tensor<T, Dim, Contig, NewIndexT, PtrTraits>(
data_, newSize, newStride);
}
template <typename T, int Dim, bool Contig,
typename IndexT, template <typename U> class PtrTraits>
template <typename NewIndexT>
__host__ bool
Tensor<T, Dim, Contig, IndexT, PtrTraits>::canCastIndexType() const {
static_assert(sizeof(size_t) >= sizeof(IndexT),
"index size too large");
static_assert(sizeof(size_t) >= sizeof(NewIndexT),
"new index size too large");
// Find maximum offset that can be calculated
// FIXME: maybe also consider offset in bytes? multiply by sizeof(T)?
size_t maxOffset = 0;
if (Contig) {
maxOffset = (size_t) size_[0] * (size_t) stride_[0];
} else {
for (int i = 0; i < Dim; ++i) {
size_t curMaxOffset = (size_t) size_[i] * (size_t) stride_[i];
if (curMaxOffset > maxOffset) {
maxOffset = curMaxOffset;
}
}
}
if (maxOffset > (size_t) std::numeric_limits<NewIndexT>::max()) {
return false;
}
return true;
}
template <typename T, int Dim, bool Contig, template <typename T, int Dim, bool Contig,
typename IndexT, template <typename U> class PtrTraits> typename IndexT, template <typename U> class PtrTraits>
__host__ __device__ IndexT __host__ __device__ IndexT
......
...@@ -158,6 +158,18 @@ class Tensor { ...@@ -158,6 +158,18 @@ class Tensor {
template <typename U> template <typename U>
__host__ __device__ bool canCastResize() const; __host__ __device__ bool canCastResize() const;
/// Attempts to cast this tensor to a tensor of a different IndexT.
/// Fails if size or stride entries are not representable in the new
/// IndexT.
template <typename NewIndexT>
__host__ Tensor<T, Dim, Contig, NewIndexT, PtrTraits>
castIndexType() const;
/// Returns true if we can castIndexType() this tensor to the new
/// index type
template <typename NewIndexT>
__host__ bool canCastIndexType() const;
/// Returns a raw pointer to the start of our data. /// Returns a raw pointer to the start of our data.
__host__ __device__ inline DataPtrType data() { __host__ __device__ inline DataPtrType data() {
return data_; return data_;
...@@ -337,6 +349,27 @@ class Tensor { ...@@ -337,6 +349,27 @@ class Tensor {
IndexT size_[Dim]; IndexT size_[Dim];
}; };
// Utilities for checking a collection of tensors
namespace detail {
template <typename IndexType>
bool canCastIndexType() {
return true;
}
template <typename IndexType, typename T, typename... U>
bool canCastIndexType(const T& arg, const U&... args) {
return arg.canCastIndexType<IndexType>() &&
canCastIndexType(args...);
}
} // namespace detail
template <typename IndexType, typename... T>
bool canCastIndexType(const T&... args) {
return detail::canCastIndexType(args...);
}
namespace detail { namespace detail {
/// Specialization for a view of a single value (0-dimensional) /// Specialization for a view of a single value (0-dimensional)
......
...@@ -24,16 +24,25 @@ ...@@ -24,16 +24,25 @@
#include "IndexPQ.h" #include "IndexPQ.h"
#include "IndexIVF.h" #include "IndexIVF.h"
#include "IndexIVFPQ.h" #include "IndexIVFPQ.h"
#include "MetaIndexes.h"
/************************************************************* /*************************************************************
* The I/O format is the content of the class. For objects that are * The I/O format is the content of the class. For objects that are
* inherited, like Index, a 4-character-code indicates which child * inherited, like Index, a 4-character-code (fourcc) indicates which
* class this is an instance of. * child class this is an instance of.
* *
* In this case, the fields of the parent class are written first, * In this case, the fields of the parent class are written first,
* then the ones for the child classes. Note that this requires * then the ones for the child classes. Note that this requires
* classes to be serialized to have a constructor without parameters, * classes to be serialized to have a constructor without parameters,
* so that the fields can be filled in later. * so that the fields can be filled in later. The default constructor
* should set reasonable defaults for all fields.
*
* The fourccs are assigned arbitrarily. When the class changed (added
* or deprecated fields), the fourcc can be replaced. New code should
* be able to read the old fourcc and fill in new classes.
*
* TODO: serialization to strings for use in Python pickle or Torch
* serialization.
**************************************************************/ **************************************************************/
...@@ -294,6 +303,13 @@ void write_index (const Index *idx, FILE *f) { ...@@ -294,6 +303,13 @@ void write_index (const Index *idx, FILE *f) {
write_index (idxrf->base_index, f); write_index (idxrf->base_index, f);
write_index (&idxrf->refine_index, f); write_index (&idxrf->refine_index, f);
WRITE1 (idxrf->k_factor); WRITE1 (idxrf->k_factor);
} else if(const IndexIDMap * idxmap =
dynamic_cast<const IndexIDMap *> (idx)) {
uint32_t h = fourcc ("IxMp");
WRITE1 (h);
write_index_header (idxmap, f);
write_index (idxmap->index, f);
WRITEVECTOR (idxmap->id_map);
} else { } else {
FAISS_ASSERT (!"don't know how to serialize this type of index"); FAISS_ASSERT (!"don't know how to serialize this type of index");
} }
...@@ -572,6 +588,13 @@ Index *read_index (FILE * f, bool try_mmap) { ...@@ -572,6 +588,13 @@ Index *read_index (FILE * f, bool try_mmap) {
delete rf; delete rf;
READ1 (idxrf->k_factor); READ1 (idxrf->k_factor);
idx = idxrf; idx = idxrf;
} else if(h == fourcc ("IxMp")) {
IndexIDMap * idxmap = new IndexIDMap ();
read_index_header (idxmap, f);
idxmap->index = read_index (f);
idxmap->own_fields = true;
READVECTOR (idxmap->id_map);
idx = idxmap;
} else { } else {
fprintf (stderr, "Index type 0x%08x not supported\n", h); fprintf (stderr, "Index type 0x%08x not supported\n", h);
abort (); abort ();
......
...@@ -724,6 +724,10 @@ bincode_hist = _swigfaiss.bincode_hist ...@@ -724,6 +724,10 @@ bincode_hist = _swigfaiss.bincode_hist
def ivec_checksum(*args): def ivec_checksum(*args):
return _swigfaiss.ivec_checksum(*args) return _swigfaiss.ivec_checksum(*args)
ivec_checksum = _swigfaiss.ivec_checksum ivec_checksum = _swigfaiss.ivec_checksum
def fvecs_maybe_subsample(*args):
return _swigfaiss.fvecs_maybe_subsample(*args)
fvecs_maybe_subsample = _swigfaiss.fvecs_maybe_subsample
METRIC_INNER_PRODUCT = _swigfaiss.METRIC_INNER_PRODUCT METRIC_INNER_PRODUCT = _swigfaiss.METRIC_INNER_PRODUCT
METRIC_L2 = _swigfaiss.METRIC_L2 METRIC_L2 = _swigfaiss.METRIC_L2
class Index(_object): class Index(_object):
...@@ -963,13 +967,9 @@ class LinearTransform(VectorTransform): ...@@ -963,13 +967,9 @@ class LinearTransform(VectorTransform):
except: self.this = this except: self.this = this
def apply_noalloc(self, *args): return _swigfaiss.LinearTransform_apply_noalloc(self, *args) def apply_noalloc(self, *args): return _swigfaiss.LinearTransform_apply_noalloc(self, *args)
def transform_transpose(self, *args): return _swigfaiss.LinearTransform_transform_transpose(self, *args) def transform_transpose(self, *args): return _swigfaiss.LinearTransform_transform_transpose(self, *args)
__swig_setmethods__["max_points_per_d"] = _swigfaiss.LinearTransform_max_points_per_d_set
__swig_getmethods__["max_points_per_d"] = _swigfaiss.LinearTransform_max_points_per_d_get
if _newclass:max_points_per_d = _swig_property(_swigfaiss.LinearTransform_max_points_per_d_get, _swigfaiss.LinearTransform_max_points_per_d_set)
__swig_setmethods__["verbose"] = _swigfaiss.LinearTransform_verbose_set __swig_setmethods__["verbose"] = _swigfaiss.LinearTransform_verbose_set
__swig_getmethods__["verbose"] = _swigfaiss.LinearTransform_verbose_get __swig_getmethods__["verbose"] = _swigfaiss.LinearTransform_verbose_get
if _newclass:verbose = _swig_property(_swigfaiss.LinearTransform_verbose_get, _swigfaiss.LinearTransform_verbose_set) if _newclass:verbose = _swig_property(_swigfaiss.LinearTransform_verbose_get, _swigfaiss.LinearTransform_verbose_set)
def maybe_subsample_train_set(self, *args): return _swigfaiss.LinearTransform_maybe_subsample_train_set(self, *args)
__swig_destroy__ = _swigfaiss.delete_LinearTransform __swig_destroy__ = _swigfaiss.delete_LinearTransform
__del__ = lambda self : None; __del__ = lambda self : None;
LinearTransform_swigregister = _swigfaiss.LinearTransform_swigregister LinearTransform_swigregister = _swigfaiss.LinearTransform_swigregister
...@@ -1008,6 +1008,9 @@ class PCAMatrix(LinearTransform): ...@@ -1008,6 +1008,9 @@ class PCAMatrix(LinearTransform):
__swig_setmethods__["random_rotation"] = _swigfaiss.PCAMatrix_random_rotation_set __swig_setmethods__["random_rotation"] = _swigfaiss.PCAMatrix_random_rotation_set
__swig_getmethods__["random_rotation"] = _swigfaiss.PCAMatrix_random_rotation_get __swig_getmethods__["random_rotation"] = _swigfaiss.PCAMatrix_random_rotation_get
if _newclass:random_rotation = _swig_property(_swigfaiss.PCAMatrix_random_rotation_get, _swigfaiss.PCAMatrix_random_rotation_set) if _newclass:random_rotation = _swig_property(_swigfaiss.PCAMatrix_random_rotation_get, _swigfaiss.PCAMatrix_random_rotation_set)
__swig_setmethods__["max_points_per_d"] = _swigfaiss.PCAMatrix_max_points_per_d_set
__swig_getmethods__["max_points_per_d"] = _swigfaiss.PCAMatrix_max_points_per_d_get
if _newclass:max_points_per_d = _swig_property(_swigfaiss.PCAMatrix_max_points_per_d_get, _swigfaiss.PCAMatrix_max_points_per_d_set)
__swig_setmethods__["balanced_bins"] = _swigfaiss.PCAMatrix_balanced_bins_set __swig_setmethods__["balanced_bins"] = _swigfaiss.PCAMatrix_balanced_bins_set
__swig_getmethods__["balanced_bins"] = _swigfaiss.PCAMatrix_balanced_bins_get __swig_getmethods__["balanced_bins"] = _swigfaiss.PCAMatrix_balanced_bins_get
if _newclass:balanced_bins = _swig_property(_swigfaiss.PCAMatrix_balanced_bins_get, _swigfaiss.PCAMatrix_balanced_bins_set) if _newclass:balanced_bins = _swig_property(_swigfaiss.PCAMatrix_balanced_bins_get, _swigfaiss.PCAMatrix_balanced_bins_set)
...@@ -1053,9 +1056,9 @@ class OPQMatrix(LinearTransform): ...@@ -1053,9 +1056,9 @@ class OPQMatrix(LinearTransform):
__swig_setmethods__["niter_pq_0"] = _swigfaiss.OPQMatrix_niter_pq_0_set __swig_setmethods__["niter_pq_0"] = _swigfaiss.OPQMatrix_niter_pq_0_set
__swig_getmethods__["niter_pq_0"] = _swigfaiss.OPQMatrix_niter_pq_0_get __swig_getmethods__["niter_pq_0"] = _swigfaiss.OPQMatrix_niter_pq_0_get
if _newclass:niter_pq_0 = _swig_property(_swigfaiss.OPQMatrix_niter_pq_0_get, _swigfaiss.OPQMatrix_niter_pq_0_set) if _newclass:niter_pq_0 = _swig_property(_swigfaiss.OPQMatrix_niter_pq_0_get, _swigfaiss.OPQMatrix_niter_pq_0_set)
__swig_setmethods__["max_points_per_d"] = _swigfaiss.OPQMatrix_max_points_per_d_set __swig_setmethods__["max_train_points"] = _swigfaiss.OPQMatrix_max_train_points_set
__swig_getmethods__["max_points_per_d"] = _swigfaiss.OPQMatrix_max_points_per_d_get __swig_getmethods__["max_train_points"] = _swigfaiss.OPQMatrix_max_train_points_get
if _newclass:max_points_per_d = _swig_property(_swigfaiss.OPQMatrix_max_points_per_d_get, _swigfaiss.OPQMatrix_max_points_per_d_set) if _newclass:max_train_points = _swig_property(_swigfaiss.OPQMatrix_max_train_points_get, _swigfaiss.OPQMatrix_max_train_points_set)
__swig_setmethods__["verbose"] = _swigfaiss.OPQMatrix_verbose_set __swig_setmethods__["verbose"] = _swigfaiss.OPQMatrix_verbose_set
__swig_getmethods__["verbose"] = _swigfaiss.OPQMatrix_verbose_get __swig_getmethods__["verbose"] = _swigfaiss.OPQMatrix_verbose_get
if _newclass:verbose = _swig_property(_swigfaiss.OPQMatrix_verbose_get, _swigfaiss.OPQMatrix_verbose_set) if _newclass:verbose = _swig_property(_swigfaiss.OPQMatrix_verbose_get, _swigfaiss.OPQMatrix_verbose_set)
...@@ -1885,10 +1888,6 @@ class IndexIDMap(Index): ...@@ -1885,10 +1888,6 @@ class IndexIDMap(Index):
__swig_setmethods__["id_map"] = _swigfaiss.IndexIDMap_id_map_set __swig_setmethods__["id_map"] = _swigfaiss.IndexIDMap_id_map_set
__swig_getmethods__["id_map"] = _swigfaiss.IndexIDMap_id_map_get __swig_getmethods__["id_map"] = _swigfaiss.IndexIDMap_id_map_get
if _newclass:id_map = _swig_property(_swigfaiss.IndexIDMap_id_map_get, _swigfaiss.IndexIDMap_id_map_set) if _newclass:id_map = _swig_property(_swigfaiss.IndexIDMap_id_map_get, _swigfaiss.IndexIDMap_id_map_set)
def __init__(self, *args):
this = _swigfaiss.new_IndexIDMap(*args)
try: self.this.append(this)
except: self.this = this
def add_with_ids(self, *args): return _swigfaiss.IndexIDMap_add_with_ids(self, *args) def add_with_ids(self, *args): return _swigfaiss.IndexIDMap_add_with_ids(self, *args)
def add(self, *args): return _swigfaiss.IndexIDMap_add(self, *args) def add(self, *args): return _swigfaiss.IndexIDMap_add(self, *args)
def search(self, *args): return _swigfaiss.IndexIDMap_search(self, *args) def search(self, *args): return _swigfaiss.IndexIDMap_search(self, *args)
...@@ -1897,6 +1896,10 @@ class IndexIDMap(Index): ...@@ -1897,6 +1896,10 @@ class IndexIDMap(Index):
def set_typename(self): return _swigfaiss.IndexIDMap_set_typename(self) def set_typename(self): return _swigfaiss.IndexIDMap_set_typename(self)
__swig_destroy__ = _swigfaiss.delete_IndexIDMap __swig_destroy__ = _swigfaiss.delete_IndexIDMap
__del__ = lambda self : None; __del__ = lambda self : None;
def __init__(self, *args):
this = _swigfaiss.new_IndexIDMap(*args)
try: self.this.append(this)
except: self.this = this
IndexIDMap_swigregister = _swigfaiss.IndexIDMap_swigregister IndexIDMap_swigregister = _swigfaiss.IndexIDMap_swigregister
IndexIDMap_swigregister(IndexIDMap) IndexIDMap_swigregister(IndexIDMap)
......
...@@ -793,6 +793,10 @@ bincode_hist = _swigfaiss_gpu.bincode_hist ...@@ -793,6 +793,10 @@ bincode_hist = _swigfaiss_gpu.bincode_hist
def ivec_checksum(*args): def ivec_checksum(*args):
return _swigfaiss_gpu.ivec_checksum(*args) return _swigfaiss_gpu.ivec_checksum(*args)
ivec_checksum = _swigfaiss_gpu.ivec_checksum ivec_checksum = _swigfaiss_gpu.ivec_checksum
def fvecs_maybe_subsample(*args):
return _swigfaiss_gpu.fvecs_maybe_subsample(*args)
fvecs_maybe_subsample = _swigfaiss_gpu.fvecs_maybe_subsample
METRIC_INNER_PRODUCT = _swigfaiss_gpu.METRIC_INNER_PRODUCT METRIC_INNER_PRODUCT = _swigfaiss_gpu.METRIC_INNER_PRODUCT
METRIC_L2 = _swigfaiss_gpu.METRIC_L2 METRIC_L2 = _swigfaiss_gpu.METRIC_L2
class Index(_object): class Index(_object):
...@@ -1032,13 +1036,9 @@ class LinearTransform(VectorTransform): ...@@ -1032,13 +1036,9 @@ class LinearTransform(VectorTransform):
except: self.this = this except: self.this = this
def apply_noalloc(self, *args): return _swigfaiss_gpu.LinearTransform_apply_noalloc(self, *args) def apply_noalloc(self, *args): return _swigfaiss_gpu.LinearTransform_apply_noalloc(self, *args)
def transform_transpose(self, *args): return _swigfaiss_gpu.LinearTransform_transform_transpose(self, *args) def transform_transpose(self, *args): return _swigfaiss_gpu.LinearTransform_transform_transpose(self, *args)
__swig_setmethods__["max_points_per_d"] = _swigfaiss_gpu.LinearTransform_max_points_per_d_set
__swig_getmethods__["max_points_per_d"] = _swigfaiss_gpu.LinearTransform_max_points_per_d_get
if _newclass:max_points_per_d = _swig_property(_swigfaiss_gpu.LinearTransform_max_points_per_d_get, _swigfaiss_gpu.LinearTransform_max_points_per_d_set)
__swig_setmethods__["verbose"] = _swigfaiss_gpu.LinearTransform_verbose_set __swig_setmethods__["verbose"] = _swigfaiss_gpu.LinearTransform_verbose_set
__swig_getmethods__["verbose"] = _swigfaiss_gpu.LinearTransform_verbose_get __swig_getmethods__["verbose"] = _swigfaiss_gpu.LinearTransform_verbose_get
if _newclass:verbose = _swig_property(_swigfaiss_gpu.LinearTransform_verbose_get, _swigfaiss_gpu.LinearTransform_verbose_set) if _newclass:verbose = _swig_property(_swigfaiss_gpu.LinearTransform_verbose_get, _swigfaiss_gpu.LinearTransform_verbose_set)
def maybe_subsample_train_set(self, *args): return _swigfaiss_gpu.LinearTransform_maybe_subsample_train_set(self, *args)
__swig_destroy__ = _swigfaiss_gpu.delete_LinearTransform __swig_destroy__ = _swigfaiss_gpu.delete_LinearTransform
__del__ = lambda self : None; __del__ = lambda self : None;
LinearTransform_swigregister = _swigfaiss_gpu.LinearTransform_swigregister LinearTransform_swigregister = _swigfaiss_gpu.LinearTransform_swigregister
...@@ -1077,6 +1077,9 @@ class PCAMatrix(LinearTransform): ...@@ -1077,6 +1077,9 @@ class PCAMatrix(LinearTransform):
__swig_setmethods__["random_rotation"] = _swigfaiss_gpu.PCAMatrix_random_rotation_set __swig_setmethods__["random_rotation"] = _swigfaiss_gpu.PCAMatrix_random_rotation_set
__swig_getmethods__["random_rotation"] = _swigfaiss_gpu.PCAMatrix_random_rotation_get __swig_getmethods__["random_rotation"] = _swigfaiss_gpu.PCAMatrix_random_rotation_get
if _newclass:random_rotation = _swig_property(_swigfaiss_gpu.PCAMatrix_random_rotation_get, _swigfaiss_gpu.PCAMatrix_random_rotation_set) if _newclass:random_rotation = _swig_property(_swigfaiss_gpu.PCAMatrix_random_rotation_get, _swigfaiss_gpu.PCAMatrix_random_rotation_set)
__swig_setmethods__["max_points_per_d"] = _swigfaiss_gpu.PCAMatrix_max_points_per_d_set
__swig_getmethods__["max_points_per_d"] = _swigfaiss_gpu.PCAMatrix_max_points_per_d_get
if _newclass:max_points_per_d = _swig_property(_swigfaiss_gpu.PCAMatrix_max_points_per_d_get, _swigfaiss_gpu.PCAMatrix_max_points_per_d_set)
__swig_setmethods__["balanced_bins"] = _swigfaiss_gpu.PCAMatrix_balanced_bins_set __swig_setmethods__["balanced_bins"] = _swigfaiss_gpu.PCAMatrix_balanced_bins_set
__swig_getmethods__["balanced_bins"] = _swigfaiss_gpu.PCAMatrix_balanced_bins_get __swig_getmethods__["balanced_bins"] = _swigfaiss_gpu.PCAMatrix_balanced_bins_get
if _newclass:balanced_bins = _swig_property(_swigfaiss_gpu.PCAMatrix_balanced_bins_get, _swigfaiss_gpu.PCAMatrix_balanced_bins_set) if _newclass:balanced_bins = _swig_property(_swigfaiss_gpu.PCAMatrix_balanced_bins_get, _swigfaiss_gpu.PCAMatrix_balanced_bins_set)
...@@ -1122,9 +1125,9 @@ class OPQMatrix(LinearTransform): ...@@ -1122,9 +1125,9 @@ class OPQMatrix(LinearTransform):
__swig_setmethods__["niter_pq_0"] = _swigfaiss_gpu.OPQMatrix_niter_pq_0_set __swig_setmethods__["niter_pq_0"] = _swigfaiss_gpu.OPQMatrix_niter_pq_0_set
__swig_getmethods__["niter_pq_0"] = _swigfaiss_gpu.OPQMatrix_niter_pq_0_get __swig_getmethods__["niter_pq_0"] = _swigfaiss_gpu.OPQMatrix_niter_pq_0_get
if _newclass:niter_pq_0 = _swig_property(_swigfaiss_gpu.OPQMatrix_niter_pq_0_get, _swigfaiss_gpu.OPQMatrix_niter_pq_0_set) if _newclass:niter_pq_0 = _swig_property(_swigfaiss_gpu.OPQMatrix_niter_pq_0_get, _swigfaiss_gpu.OPQMatrix_niter_pq_0_set)
__swig_setmethods__["max_points_per_d"] = _swigfaiss_gpu.OPQMatrix_max_points_per_d_set __swig_setmethods__["max_train_points"] = _swigfaiss_gpu.OPQMatrix_max_train_points_set
__swig_getmethods__["max_points_per_d"] = _swigfaiss_gpu.OPQMatrix_max_points_per_d_get __swig_getmethods__["max_train_points"] = _swigfaiss_gpu.OPQMatrix_max_train_points_get
if _newclass:max_points_per_d = _swig_property(_swigfaiss_gpu.OPQMatrix_max_points_per_d_get, _swigfaiss_gpu.OPQMatrix_max_points_per_d_set) if _newclass:max_train_points = _swig_property(_swigfaiss_gpu.OPQMatrix_max_train_points_get, _swigfaiss_gpu.OPQMatrix_max_train_points_set)
__swig_setmethods__["verbose"] = _swigfaiss_gpu.OPQMatrix_verbose_set __swig_setmethods__["verbose"] = _swigfaiss_gpu.OPQMatrix_verbose_set
__swig_getmethods__["verbose"] = _swigfaiss_gpu.OPQMatrix_verbose_get __swig_getmethods__["verbose"] = _swigfaiss_gpu.OPQMatrix_verbose_get
if _newclass:verbose = _swig_property(_swigfaiss_gpu.OPQMatrix_verbose_get, _swigfaiss_gpu.OPQMatrix_verbose_set) if _newclass:verbose = _swig_property(_swigfaiss_gpu.OPQMatrix_verbose_get, _swigfaiss_gpu.OPQMatrix_verbose_set)
...@@ -1954,10 +1957,6 @@ class IndexIDMap(Index): ...@@ -1954,10 +1957,6 @@ class IndexIDMap(Index):
__swig_setmethods__["id_map"] = _swigfaiss_gpu.IndexIDMap_id_map_set __swig_setmethods__["id_map"] = _swigfaiss_gpu.IndexIDMap_id_map_set
__swig_getmethods__["id_map"] = _swigfaiss_gpu.IndexIDMap_id_map_get __swig_getmethods__["id_map"] = _swigfaiss_gpu.IndexIDMap_id_map_get
if _newclass:id_map = _swig_property(_swigfaiss_gpu.IndexIDMap_id_map_get, _swigfaiss_gpu.IndexIDMap_id_map_set) if _newclass:id_map = _swig_property(_swigfaiss_gpu.IndexIDMap_id_map_get, _swigfaiss_gpu.IndexIDMap_id_map_set)
def __init__(self, *args):
this = _swigfaiss_gpu.new_IndexIDMap(*args)
try: self.this.append(this)
except: self.this = this
def add_with_ids(self, *args): return _swigfaiss_gpu.IndexIDMap_add_with_ids(self, *args) def add_with_ids(self, *args): return _swigfaiss_gpu.IndexIDMap_add_with_ids(self, *args)
def add(self, *args): return _swigfaiss_gpu.IndexIDMap_add(self, *args) def add(self, *args): return _swigfaiss_gpu.IndexIDMap_add(self, *args)
def search(self, *args): return _swigfaiss_gpu.IndexIDMap_search(self, *args) def search(self, *args): return _swigfaiss_gpu.IndexIDMap_search(self, *args)
...@@ -1966,6 +1965,10 @@ class IndexIDMap(Index): ...@@ -1966,6 +1965,10 @@ class IndexIDMap(Index):
def set_typename(self): return _swigfaiss_gpu.IndexIDMap_set_typename(self) def set_typename(self): return _swigfaiss_gpu.IndexIDMap_set_typename(self)
__swig_destroy__ = _swigfaiss_gpu.delete_IndexIDMap __swig_destroy__ = _swigfaiss_gpu.delete_IndexIDMap
__del__ = lambda self : None; __del__ = lambda self : None;
def __init__(self, *args):
this = _swigfaiss_gpu.new_IndexIDMap(*args)
try: self.this.append(this)
except: self.this = this
IndexIDMap_swigregister = _swigfaiss_gpu.IndexIDMap_swigregister IndexIDMap_swigregister = _swigfaiss_gpu.IndexIDMap_swigregister
IndexIDMap_swigregister(IndexIDMap) IndexIDMap_swigregister(IndexIDMap)
...@@ -2064,6 +2067,30 @@ class GpuIndex(Index): ...@@ -2064,6 +2067,30 @@ class GpuIndex(Index):
GpuIndex_swigregister = _swigfaiss_gpu.GpuIndex_swigregister GpuIndex_swigregister = _swigfaiss_gpu.GpuIndex_swigregister
GpuIndex_swigregister(GpuIndex) GpuIndex_swigregister(GpuIndex)
class GpuIndexFlatConfig(_object):
__swig_setmethods__ = {}
__setattr__ = lambda self, name, value: _swig_setattr(self, GpuIndexFlatConfig, name, value)
__swig_getmethods__ = {}
__getattr__ = lambda self, name: _swig_getattr(self, GpuIndexFlatConfig, name)
__repr__ = _swig_repr
def __init__(self):
this = _swigfaiss_gpu.new_GpuIndexFlatConfig()
try: self.this.append(this)
except: self.this = this
__swig_setmethods__["device"] = _swigfaiss_gpu.GpuIndexFlatConfig_device_set
__swig_getmethods__["device"] = _swigfaiss_gpu.GpuIndexFlatConfig_device_get
if _newclass:device = _swig_property(_swigfaiss_gpu.GpuIndexFlatConfig_device_get, _swigfaiss_gpu.GpuIndexFlatConfig_device_set)
__swig_setmethods__["useFloat16"] = _swigfaiss_gpu.GpuIndexFlatConfig_useFloat16_set
__swig_getmethods__["useFloat16"] = _swigfaiss_gpu.GpuIndexFlatConfig_useFloat16_get
if _newclass:useFloat16 = _swig_property(_swigfaiss_gpu.GpuIndexFlatConfig_useFloat16_get, _swigfaiss_gpu.GpuIndexFlatConfig_useFloat16_set)
__swig_setmethods__["storeTransposed"] = _swigfaiss_gpu.GpuIndexFlatConfig_storeTransposed_set
__swig_getmethods__["storeTransposed"] = _swigfaiss_gpu.GpuIndexFlatConfig_storeTransposed_get
if _newclass:storeTransposed = _swig_property(_swigfaiss_gpu.GpuIndexFlatConfig_storeTransposed_get, _swigfaiss_gpu.GpuIndexFlatConfig_storeTransposed_set)
__swig_destroy__ = _swigfaiss_gpu.delete_GpuIndexFlatConfig
__del__ = lambda self : None;
GpuIndexFlatConfig_swigregister = _swigfaiss_gpu.GpuIndexFlatConfig_swigregister
GpuIndexFlatConfig_swigregister(GpuIndexFlatConfig)
class GpuIndexFlat(GpuIndex): class GpuIndexFlat(GpuIndex):
__swig_setmethods__ = {} __swig_setmethods__ = {}
for _s in [GpuIndex]: __swig_setmethods__.update(getattr(_s,'__swig_setmethods__',{})) for _s in [GpuIndex]: __swig_setmethods__.update(getattr(_s,'__swig_setmethods__',{}))
...@@ -2521,6 +2548,9 @@ class GpuClonerOptions(_object): ...@@ -2521,6 +2548,9 @@ class GpuClonerOptions(_object):
__swig_setmethods__["reserveVecs"] = _swigfaiss_gpu.GpuClonerOptions_reserveVecs_set __swig_setmethods__["reserveVecs"] = _swigfaiss_gpu.GpuClonerOptions_reserveVecs_set
__swig_getmethods__["reserveVecs"] = _swigfaiss_gpu.GpuClonerOptions_reserveVecs_get __swig_getmethods__["reserveVecs"] = _swigfaiss_gpu.GpuClonerOptions_reserveVecs_get
if _newclass:reserveVecs = _swig_property(_swigfaiss_gpu.GpuClonerOptions_reserveVecs_get, _swigfaiss_gpu.GpuClonerOptions_reserveVecs_set) if _newclass:reserveVecs = _swig_property(_swigfaiss_gpu.GpuClonerOptions_reserveVecs_get, _swigfaiss_gpu.GpuClonerOptions_reserveVecs_set)
__swig_setmethods__["storeTransposed"] = _swigfaiss_gpu.GpuClonerOptions_storeTransposed_set
__swig_getmethods__["storeTransposed"] = _swigfaiss_gpu.GpuClonerOptions_storeTransposed_get
if _newclass:storeTransposed = _swig_property(_swigfaiss_gpu.GpuClonerOptions_storeTransposed_get, _swigfaiss_gpu.GpuClonerOptions_storeTransposed_set)
__swig_setmethods__["verbose"] = _swigfaiss_gpu.GpuClonerOptions_verbose_set __swig_setmethods__["verbose"] = _swigfaiss_gpu.GpuClonerOptions_verbose_set
__swig_getmethods__["verbose"] = _swigfaiss_gpu.GpuClonerOptions_verbose_get __swig_getmethods__["verbose"] = _swigfaiss_gpu.GpuClonerOptions_verbose_get
if _newclass:verbose = _swig_property(_swigfaiss_gpu.GpuClonerOptions_verbose_get, _swigfaiss_gpu.GpuClonerOptions_verbose_set) if _newclass:verbose = _swig_property(_swigfaiss_gpu.GpuClonerOptions_verbose_get, _swigfaiss_gpu.GpuClonerOptions_verbose_set)
......
This diff is collapsed.
This diff is collapsed.
...@@ -1790,6 +1790,28 @@ int fvec_madd_and_argmin (size_t n, const float *a, ...@@ -1790,6 +1790,28 @@ int fvec_madd_and_argmin (size_t n, const float *a,
const float *fvecs_maybe_subsample (
size_t d, size_t *n, size_t nmax, const float *x,
bool verbose, long seed)
{
if (*n <= nmax) return x; // nothing to do
size_t n2 = nmax;
if (verbose) {
printf (" Input training set too big (max size is %ld), sampling "
"%ld / %ld vectors\n", nmax, n2, *n);
}
std::vector<int> subset (*n);
rand_perm (subset.data (), *n, seed);
float *x_subset = new float[n2 * d];
for (long i = 0; i < n2; i++)
memcpy (&x_subset[i * d],
&x[subset[i] * size_t(d)],
sizeof (x[0]) * d);
*n = n2;
return x_subset;
}
} // namespace faiss } // namespace faiss
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment