Unverified Commit 76bec0b5 authored by Lucas Hosseini's avatar Lucas Hosseini Committed by GitHub

Facebook sync (#573)

Features:

- automatic tracking of C++ references in Python
- non-intel platforms supported -- some functions optimized for ARM
- override nprobe for concurrent searches
- support for floating-point quantizers in binary indexes
Bug fixes:

- no more segfaults in python (I know it's the same as the first feature but it's important!)
- fix GpuIndexIVFFlat issues for float32 with 64 / 128 dims
- fix sharding of flat indexes on GPU with index_cpu_to_gpu_multiple
parent 19cea3d2
...@@ -518,6 +518,21 @@ void ParameterSpace::set_index_parameter ( ...@@ -518,6 +518,21 @@ void ParameterSpace::set_index_parameter (
return; return;
} }
} }
if (name == "efSearch") {
if (DC (IndexHNSW)) {
ix->hnsw.efSearch = int(val);
return;
}
if (DC (IndexIVF)) {
if (IndexHNSW *cq =
dynamic_cast<IndexHNSW *>(ix->quantizer)) {
cq->hnsw.efSearch = int(val);
return;
}
}
}
FAISS_THROW_FMT ("ParameterSpace::set_index_parameter:" FAISS_THROW_FMT ("ParameterSpace::set_index_parameter:"
"could not set parameter %s", "could not set parameter %s",
name.c_str()); name.c_str());
...@@ -682,6 +697,7 @@ struct VTChain { ...@@ -682,6 +697,7 @@ struct VTChain {
char get_trains_alone(const Index *coarse_quantizer) { char get_trains_alone(const Index *coarse_quantizer) {
return return
dynamic_cast<const MultiIndexQuantizer*>(coarse_quantizer) ? 1 : dynamic_cast<const MultiIndexQuantizer*>(coarse_quantizer) ? 1 :
dynamic_cast<const IndexHNSWFlat*>(coarse_quantizer) ? 2 :
0; 0;
} }
...@@ -738,6 +754,11 @@ Index *index_factory (int d, const char *description_in, MetricType metric) ...@@ -738,6 +754,11 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
} else if (stok == "L2norm") { } else if (stok == "L2norm") {
vt_1 = new NormalizationTransform (d, 2.0); vt_1 = new NormalizationTransform (d, 2.0);
// coarse quantizers
} else if (!coarse_quantizer &&
sscanf (tok, "IVF%d_HNSW%d", &ncentroids, &M) == 2) {
FAISS_THROW_IF_NOT (metric == METRIC_L2);
coarse_quantizer_1 = new IndexHNSWFlat (d, M);
} else if (!coarse_quantizer && } else if (!coarse_quantizer &&
sscanf (tok, "IVF%d", &ncentroids) == 1) { sscanf (tok, "IVF%d", &ncentroids) == 1) {
...@@ -935,4 +956,5 @@ IndexBinary *index_binary_factory(int d, const char *description) ...@@ -935,4 +956,5 @@ IndexBinary *index_binary_factory(int d, const char *description)
} }
} // namespace faiss } // namespace faiss
...@@ -205,8 +205,6 @@ Index *index_factory (int d, const char *description, ...@@ -205,8 +205,6 @@ Index *index_factory (int d, const char *description,
IndexBinary *index_binary_factory (int d, const char *description); IndexBinary *index_binary_factory (int d, const char *description);
} // namespace faiss } // namespace faiss
......
...@@ -198,7 +198,7 @@ struct IOWriter { ...@@ -198,7 +198,7 @@ struct IOWriter {
struct VectorIOReader:IOReader { struct VectorIOReader:IOReader {
const std::vector<uint8_t> data; std::vector<uint8_t> data;
size_t rp = 0; size_t rp = 0;
size_t operator()(void *ptr, size_t size, size_t nitems) override; size_t operator()(void *ptr, size_t size, size_t nitems) override;
}; };
......
/**
* Copyright (c) 2015-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD+Patents license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
/*
* implementation of Hyper-parameter auto-tuning
*/
#include "IVFlib.h"
#include <memory>
#include "VectorTransform.h"
#include "FaissAssert.h"
namespace faiss { namespace ivflib {
void check_compatible_for_merge (const Index * index0,
const Index * index1)
{
const faiss::IndexPreTransform *pt0 =
dynamic_cast<const faiss::IndexPreTransform *>(index0);
if (pt0) {
const faiss::IndexPreTransform *pt1 =
dynamic_cast<const faiss::IndexPreTransform *>(index1);
FAISS_THROW_IF_NOT_MSG (pt1, "both indexes should be pretransforms");
FAISS_THROW_IF_NOT (pt0->chain.size() == pt1->chain.size());
for (int i = 0; i < pt0->chain.size(); i++) {
FAISS_THROW_IF_NOT (typeid(pt0->chain[i]) == typeid(pt1->chain[i]));
}
index0 = pt0->index;
index1 = pt1->index;
}
FAISS_THROW_IF_NOT (typeid(index0) == typeid(index1));
FAISS_THROW_IF_NOT (index0->d == index1->d &&
index0->metric_type == index1->metric_type);
const faiss::IndexIVF *ivf0 = dynamic_cast<const faiss::IndexIVF *>(index0);
if (ivf0) {
const faiss::IndexIVF *ivf1 =
dynamic_cast<const faiss::IndexIVF *>(index1);
FAISS_THROW_IF_NOT (ivf1);
ivf0->check_compatible_for_merge (*ivf1);
}
// TODO: check as thoroughfully for other index types
}
const IndexIVF * extract_index_ivf (const Index * index)
{
if (auto *pt =
dynamic_cast<const IndexPreTransform *>(index)) {
index = pt->index;
}
auto *ivf = dynamic_cast<const IndexIVF *>(index);
FAISS_THROW_IF_NOT (ivf);
return ivf;
}
IndexIVF * extract_index_ivf (Index * index) {
return const_cast<IndexIVF*> (extract_index_ivf ((const Index*)(index)));
}
void merge_into(faiss::Index *index0, faiss::Index *index1, bool shift_ids) {
check_compatible_for_merge (index0, index1);
IndexIVF * ivf0 = extract_index_ivf (index0);
IndexIVF * ivf1 = extract_index_ivf (index1);
ivf0->merge_from (*ivf1, shift_ids ? ivf0->ntotal : 0);
// useful for IndexPreTransform
index0->ntotal = ivf0->ntotal;
index1->ntotal = ivf1->ntotal;
}
void search_centroid(faiss::Index *index,
const float* x, int n,
idx_t* centroid_ids)
{
std::unique_ptr<float[]> del;
if (auto index_pre = dynamic_cast<faiss::IndexPreTransform*>(index)) {
x = index_pre->apply_chain(n, x);
del.reset((float*)x);
index = index_pre->index;
}
faiss::IndexIVF* index_ivf = dynamic_cast<faiss::IndexIVF*>(index);
assert(index_ivf);
index_ivf->quantizer->assign(n, x, centroid_ids);
}
void search_and_return_centroids(faiss::Index *index,
size_t n,
const float* xin,
long k,
float *distances,
idx_t* labels,
idx_t* query_centroid_ids,
idx_t* result_centroid_ids)
{
const float *x = xin;
std::unique_ptr<float []> del;
if (auto index_pre = dynamic_cast<faiss::IndexPreTransform*>(index)) {
x = index_pre->apply_chain(n, x);
del.reset((float*)x);
index = index_pre->index;
}
faiss::IndexIVF* index_ivf = dynamic_cast<faiss::IndexIVF*>(index);
assert(index_ivf);
size_t nprobe = index_ivf->nprobe;
std::vector<idx_t> cent_nos (n * nprobe);
std::vector<float> cent_dis (n * nprobe);
index_ivf->quantizer->search(
n, x, nprobe, cent_dis.data(), cent_nos.data());
if (query_centroid_ids) {
for (size_t i = 0; i < n; i++)
query_centroid_ids[i] = cent_nos[i * nprobe];
}
index_ivf->search_preassigned (n, x, k,
cent_nos.data(), cent_dis.data(),
distances, labels, true);
for (size_t i = 0; i < n * k; i++) {
idx_t label = labels[i];
if (label < 0) {
if (result_centroid_ids)
result_centroid_ids[i] = -1;
} else {
long list_no = label >> 32;
long list_index = label & 0xffffffff;
if (result_centroid_ids)
result_centroid_ids[i] = list_no;
labels[i] = index_ivf->invlists->get_single_id(list_no, list_index);
}
}
}
SlidingIndexWindow::SlidingIndexWindow (Index *index): index (index) {
n_slice = 0;
IndexIVF* index_ivf = const_cast<IndexIVF*>(extract_index_ivf (index));
ils = dynamic_cast<ArrayInvertedLists *> (index_ivf->invlists);
nlist = ils->nlist;
FAISS_THROW_IF_NOT_MSG (ils,
"only supports indexes with ArrayInvertedLists");
sizes.resize(nlist);
}
template<class T>
static void shift_and_add (std::vector<T> & dst,
size_t remove,
const std::vector<T> & src)
{
if (remove > 0)
memmove (dst.data(), dst.data() + remove,
(dst.size() - remove) * sizeof (T));
size_t insert_point = dst.size() - remove;
dst.resize (insert_point + src.size());
memcpy (dst.data() + insert_point, src.data (), src.size() * sizeof(T));
}
template<class T>
static void remove_from_begin (std::vector<T> & v,
size_t remove)
{
if (remove > 0)
v.erase (v.begin(), v.begin() + remove);
}
void SlidingIndexWindow::step(const Index *sub_index, bool remove_oldest) {
FAISS_THROW_IF_NOT_MSG (!remove_oldest || n_slice > 0,
"cannot remove slice: there is none");
const ArrayInvertedLists *ils2 = nullptr;
if(sub_index) {
check_compatible_for_merge (index, sub_index);
ils2 = dynamic_cast<const ArrayInvertedLists*>(
extract_index_ivf (sub_index)->invlists);
FAISS_THROW_IF_NOT_MSG (ils2, "supports only ArrayInvertedLists");
}
IndexIVF *index_ivf = extract_index_ivf (index);
if (remove_oldest && ils2) {
for (int i = 0; i < nlist; i++) {
std::vector<size_t> & sizesi = sizes[i];
size_t amount_to_remove = sizesi[0];
index_ivf->ntotal += ils2->ids[i].size() - amount_to_remove;
shift_and_add (ils->ids[i], amount_to_remove, ils2->ids[i]);
shift_and_add (ils->codes[i], amount_to_remove * ils->code_size,
ils2->codes[i]);
for (int j = 0; j + 1 < n_slice; j++) {
sizesi[j] = sizesi[j + 1] - amount_to_remove;
}
sizesi[n_slice - 1] = ils->ids[i].size();
}
} else if (ils2) {
for (int i = 0; i < nlist; i++) {
index_ivf->ntotal += ils2->ids[i].size();
shift_and_add (ils->ids[i], 0, ils2->ids[i]);
shift_and_add (ils->codes[i], 0, ils2->codes[i]);
sizes[i].push_back(ils->ids[i].size());
}
n_slice++;
} else if (remove_oldest) {
for (int i = 0; i < nlist; i++) {
size_t amount_to_remove = sizes[i][0];
index_ivf->ntotal -= amount_to_remove;
remove_from_begin (ils->ids[i], amount_to_remove);
remove_from_begin (ils->codes[i],
amount_to_remove * ils->code_size);
for (int j = 0; j + 1 < n_slice; j++) {
sizes[i][j] = sizes[i][j + 1] - amount_to_remove;
}
sizes[i].resize(sizes[i].size() - 1);
}
n_slice--;
} else {
FAISS_THROW_MSG ("nothing to do???");
}
index->ntotal = index_ivf->ntotal;
}
// Get a subset of inverted lists [i0, i1). Works on IndexIVF's and
// IndexIVF's embedded in a IndexPreTransform
ArrayInvertedLists *
get_invlist_range (const Index *index, long i0, long i1)
{
const IndexIVF *ivf = extract_index_ivf (index);
FAISS_THROW_IF_NOT (0 <= i0 && i0 <= i1 && i1 <= ivf->nlist);
const InvertedLists *src = ivf->invlists;
ArrayInvertedLists * il = new ArrayInvertedLists(i1 - i0, src->code_size);
for (long i = i0; i < i1; i++) {
il->add_entries(i - i0, src->list_size(i),
InvertedLists::ScopedIds (src, i).get(),
InvertedLists::ScopedCodes (src, i).get());
}
return il;
}
void set_invlist_range (Index *index, long i0, long i1,
ArrayInvertedLists * src)
{
IndexIVF *ivf = extract_index_ivf (index);
FAISS_THROW_IF_NOT (0 <= i0 && i0 <= i1 && i1 <= ivf->nlist);
ArrayInvertedLists *dst = dynamic_cast<ArrayInvertedLists *>(ivf->invlists);
FAISS_THROW_IF_NOT_MSG (dst, "only ArrayInvertedLists supported");
FAISS_THROW_IF_NOT (src->nlist == i1 - i0 &&
dst->code_size == src->code_size);
size_t ntotal = index->ntotal;
for (long i = i0 ; i < i1; i++) {
ntotal -= dst->list_size (i);
ntotal += src->list_size (i - i0);
std::swap (src->codes[i - i0], dst->codes[i]);
std::swap (src->ids[i - i0], dst->ids[i]);
}
ivf->ntotal = index->ntotal = ntotal;
}
void search_with_parameters (const Index *index,
idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels,
IVFSearchParameters *params)
{
FAISS_THROW_IF_NOT (params);
const float *prev_x = x;
ScopeDeleter<float> del;
if (auto ip = dynamic_cast<const IndexPreTransform *> (index)) {
x = ip->apply_chain (n, x);
if (x != prev_x) {
del.set(x);
}
index = ip->index;
}
std::vector<idx_t> Iq(params->nprobe * n);
std::vector<float> Dq(params->nprobe * n);
const IndexIVF *index_ivf = dynamic_cast<const IndexIVF *>(index);
FAISS_THROW_IF_NOT (index_ivf);
index_ivf->quantizer->search(n, x, params->nprobe,
Dq.data(), Iq.data());
index_ivf->search_preassigned(n, x, k, Iq.data(), Dq.data(),
distances, labels,
false, params);
}
} } // namespace faiss::ivflib
/**
* Copyright (c) 2015-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD+Patents license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_IVFLIB_H
#define FAISS_IVFLIB_H
/** Since IVF (inverted file) indexes are of so much use for
* large-scale use cases, we group a few functions related to them in
* this small library. Most functions work both on IndexIVFs and
* IndexIVFs embedded within an IndexPreTransform.
*/
#include <vector>
#include "IndexIVF.h"
namespace faiss { namespace ivflib {
/** check if two indexes have the same parameters and are trained in
* the same way, otherwise throw. */
void check_compatible_for_merge (const Index * index1,
const Index * index2);
/** get an IndexIVF from an index. The index may be an IndexIVF or
* some wrapper class that encloses an IndexIVF
*
* throws an exception if this is not the case.
*/
const IndexIVF * extract_index_ivf (const Index * index);
IndexIVF * extract_index_ivf (Index * index);
/** Merge index1 into index0. Works on IndexIVF's and IndexIVF's
* embedded in a IndexPreTransform. On output, the index1 is empty.
*
* @param shift_ids: translate the ids from index1 to index0->prev_ntotal
*/
void merge_into(Index *index0, Index *index1, bool shift_ids);
typedef Index::idx_t idx_t;
/* Returns the cluster the embeddings belong to.
*
* @param index Index, which should be an IVF index
* (otherwise there are no clusters)
* @param embeddings object descriptors for which the centroids should be found,
* size num_objects * d
* @param centroid_ids
* cluster id each object belongs to, size num_objects
*/
void search_centroid(Index *index,
const float* x, int n,
idx_t* centroid_ids);
/* Returns the cluster the embeddings belong to.
*
* @param index Index, which should be an IVF index
* (otherwise there are no clusters)
* @param query_centroid_ids
* centroid ids corresponding to the query vectors (size n)
* @param result_centroid_ids
* centroid ids corresponding to the results (size n * k)
* other arguments are the same as the standard search function
*/
void search_and_return_centroids(Index *index,
size_t n,
const float* xin,
long k,
float *distances,
idx_t* labels,
idx_t* query_centroid_ids,
idx_t* result_centroid_ids);
/** A set of IndexIVFs concatenated together in a FIFO fashion.
* at each "step", the oldest index slice is removed and a new index is added.
*/
struct SlidingIndexWindow {
/// common index that contains the sliding window
Index * index;
/// InvertedLists of index
ArrayInvertedLists *ils;
/// number of slices currently in index
int n_slice;
/// same as index->nlist
size_t nlist;
/// cumulative list sizes at each slice
std::vector<std::vector<size_t> > sizes;
/// index should be initially empty and trained
SlidingIndexWindow (Index *index);
/** Add one index to the current index and remove the oldest one.
*
* @param sub_index slice to swap in (can be NULL)
* @param remove_oldest if true, remove the oldest slices */
void step(const Index *sub_index, bool remove_oldest);
};
/// Get a subset of inverted lists [i0, i1)
ArrayInvertedLists * get_invlist_range (const Index *index,
long i0, long i1);
/// Set a subset of inverted lists
void set_invlist_range (Index *index, long i0, long i1,
ArrayInvertedLists * src);
// search an IndexIVF, possibly embedded in an IndexPreTransform
// with given parameters
void search_with_parameters (const Index *index,
idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels,
IVFSearchParameters *params);
} } // namespace faiss::ivflib
#endif
/**
* Copyright (c) 2015-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD+Patents license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include "IndexBinaryFromFloat.h"
#include <memory>
#include "utils.h"
namespace faiss {
IndexBinaryFromFloat::IndexBinaryFromFloat(Index *index)
: IndexBinary(index->d),
index(index),
own_fields(false) {
is_trained = index->is_trained;
ntotal = index->ntotal;
}
IndexBinaryFromFloat::~IndexBinaryFromFloat() {
if (own_fields) {
delete index;
}
}
void IndexBinaryFromFloat::add(idx_t n, const uint8_t *x) {
constexpr idx_t bs = 32768;
std::unique_ptr<float[]> xf(new float[bs * d]);
for (idx_t b = 0; b < n; b += bs) {
idx_t bn = std::min(bs, n - b);
binary_to_real(bn * d, x + b * code_size, xf.get());
index->add(bn, xf.get());
}
ntotal = index->ntotal;
}
void IndexBinaryFromFloat::reset() {
index->reset();
ntotal = index->ntotal;
}
void IndexBinaryFromFloat::search(idx_t n, const uint8_t *x, idx_t k,
int32_t *distances, idx_t *labels) const {
constexpr idx_t bs = 32768;
std::unique_ptr<float[]> xf(new float[bs * d]);
std::unique_ptr<float[]> df(new float[bs * k]);
for (idx_t b = 0; b < n; b += bs) {
idx_t bn = std::min(bs, n - b);
binary_to_real(bn * d, x + b * code_size, xf.get());
index->search(bn, xf.get(), k, df.get(), labels + b * k);
for (int i = 0; i < bn * k; ++i) {
distances[b * k + i] = int32_t(std::round(df[i] / 4.0));
}
}
}
void IndexBinaryFromFloat::train(idx_t n, const uint8_t *x) {
std::unique_ptr<float[]> xf(new float[n * d]);
binary_to_real(n * d, x, xf.get());
index->train(n, xf.get());
is_trained = true;
ntotal = index->ntotal;
}
} // namespace faiss
/**
* Copyright (c) 2015-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD+Patents license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_INDEX_BINARY_FROM_FLOAT_H
#define FAISS_INDEX_BINARY_FROM_FLOAT_H
#include "IndexBinary.h"
namespace faiss {
class Index;
/** IndexBinary backed by a float Index.
*
* Supports adding vertices and searching them.
*
* All queries are symmetric because there is no distinction between codes and
* vectors.
*/
struct IndexBinaryFromFloat : IndexBinary {
Index *index;
bool own_fields; ///< Whether object owns the index pointer.
explicit IndexBinaryFromFloat(Index *index);
~IndexBinaryFromFloat();
void add(idx_t n, const uint8_t *x) override;
void reset() override;
void search(idx_t n, const uint8_t *x, idx_t k,
int32_t *distances, idx_t *labels) const override;
void train(idx_t n, const uint8_t *x) override;
};
} // namespace faiss
#endif // FAISS_INDEX_BINARY_FROM_FLOAT_H
...@@ -24,7 +24,6 @@ ...@@ -24,7 +24,6 @@
namespace faiss { namespace faiss {
IndexBinaryIVF::IndexBinaryIVF(IndexBinary *quantizer, size_t d, size_t nlist) IndexBinaryIVF::IndexBinaryIVF(IndexBinary *quantizer, size_t d, size_t nlist)
: IndexBinary(d), : IndexBinary(d),
invlists(new ArrayInvertedLists(nlist, code_size)), invlists(new ArrayInvertedLists(nlist, code_size)),
...@@ -291,26 +290,7 @@ void IndexBinaryIVF::merge_from(IndexBinaryIVF &other, idx_t add_id) { ...@@ -291,26 +290,7 @@ void IndexBinaryIVF::merge_from(IndexBinaryIVF &other, idx_t add_id) {
FAISS_THROW_IF_NOT_MSG(typeid (*this) == typeid (other), FAISS_THROW_IF_NOT_MSG(typeid (*this) == typeid (other),
"can only merge indexes of the same type"); "can only merge indexes of the same type");
InvertedLists *oivf = other.invlists; invlists->merge_from (other.invlists, add_id);
#pragma omp parallel for
for (long i = 0; i < nlist; i++) {
size_t list_size = oivf->list_size(i);
const idx_t * ids = oivf->get_ids(i);
if (add_id == 0) {
invlists->add_entries(i, list_size, ids,
oivf->get_codes(i));
} else {
std::vector <idx_t> new_ids(list_size);
for (size_t j = 0; j < list_size; j++) {
new_ids [j] = ids[j] + add_id;
}
invlists->add_entries(i, list_size, new_ids.data(),
oivf->get_codes(i));
}
oivf->resize(i, 0);
}
ntotal += other.ntotal; ntotal += other.ntotal;
other.ntotal = 0; other.ntotal = 0;
...@@ -327,33 +307,6 @@ void IndexBinaryIVF::replace_invlists(InvertedLists *il, bool own) { ...@@ -327,33 +307,6 @@ void IndexBinaryIVF::replace_invlists(InvertedLists *il, bool own) {
} }
namespace {
void binary_to_real(int d, const uint8_t *x_in, float *x_out) {
for (int j = 0; j < d; ++j) {
if ((x_in[j / 8] & (1 << (j % 8))) == 0) {
x_out[j] = -1.0;
} else {
x_out[j] = 1.0;
}
}
}
void real_to_binary(int d, const float *x_in, uint8_t *x_out) {
for (int j = 0; j < d; ++j) {
if (x_in[j] > 0) {
x_out[j / 8] |= (1 << (j % 8));
} else {
x_out[j / 8] &= ~(1 << (j % 8));
}
}
}
} // namespace
void IndexBinaryIVF::train_q1(size_t n, const uint8_t *x, bool verbose) { void IndexBinaryIVF::train_q1(size_t n, const uint8_t *x, bool verbose) {
if (quantizer->is_trained && (quantizer->ntotal == nlist)) { if (quantizer->is_trained && (quantizer->ntotal == nlist)) {
if (verbose) if (verbose)
...@@ -366,22 +319,15 @@ void IndexBinaryIVF::train_q1(size_t n, const uint8_t *x, bool verbose) { ...@@ -366,22 +319,15 @@ void IndexBinaryIVF::train_q1(size_t n, const uint8_t *x, bool verbose) {
quantizer->reset(); quantizer->reset();
std::unique_ptr<float[]> x_f(new float[n * d]); std::unique_ptr<float[]> x_f(new float[n * d]);
for (int i = 0; i < n; ++i) { binary_to_real(n * d, x, x_f.get());
binary_to_real(d,
x + i * code_size,
x_f.get() + i * d);
}
IndexFlatL2 index_tmp(d); IndexFlatL2 index_tmp(d);
clus.train(n, x_f.get(), index_tmp); clus.train(n, x_f.get(), index_tmp);
std::unique_ptr<uint8_t[]> x_b(new uint8_t[clus.k * code_size]); std::unique_ptr<uint8_t[]> x_b(new uint8_t[clus.k * code_size]);
for (int i = 0; i < clus.k; ++i) { real_to_binary(d * clus.k, clus.centroids.data(), x_b.get());
real_to_binary(d,
clus.centroids.data() + i * d,
x_b.get() + i * code_size);
}
quantizer->add(clus.k, x_b.get()); quantizer->add(clus.k, x_b.get());
quantizer->is_trained = true; quantizer->is_trained = true;
} }
...@@ -426,7 +372,9 @@ void search_knn_hamming_heap(const IndexBinaryIVF& ivf, ...@@ -426,7 +372,9 @@ void search_knn_hamming_heap(const IndexBinaryIVF& ivf,
nlistv++; nlistv++;
size_t list_size = ivf.invlists->list_size(key); size_t list_size = ivf.invlists->list_size(key);
const uint8_t *list_vecs = (const uint8_t*)ivf.invlists->get_codes(key);
InvertedLists::ScopedCodes scodes (ivf.invlists, key);
const uint8_t *list_vecs = scodes.get();
const Index::idx_t *ids = store_pairs const Index::idx_t *ids = store_pairs
? nullptr ? nullptr
: ivf.invlists->get_ids(key); : ivf.invlists->get_ids(key);
...@@ -443,6 +391,9 @@ void search_knn_hamming_heap(const IndexBinaryIVF& ivf, ...@@ -443,6 +391,9 @@ void search_knn_hamming_heap(const IndexBinaryIVF& ivf,
maxheap_push(k, disi, idxi, disij, id); maxheap_push(k, disi, idxi, disij, id);
} }
} }
if (ids)
ivf.invlists->release_ids (ids);
nscan += list_size; nscan += list_size;
if (max_codes && nscan >= max_codes) if (max_codes && nscan >= max_codes)
break; break;
...@@ -504,7 +455,8 @@ void search_knn_hamming_count(const IndexBinaryIVF& ivf, ...@@ -504,7 +455,8 @@ void search_knn_hamming_count(const IndexBinaryIVF& ivf,
nlistv++; nlistv++;
size_t list_size = ivf.invlists->list_size(key); size_t list_size = ivf.invlists->list_size(key);
const uint8_t *list_vecs = (const uint8_t*)ivf.invlists->get_codes(key); InvertedLists::ScopedCodes scodes (ivf.invlists, key);
const uint8_t *list_vecs = scodes.get();
const Index::idx_t *ids = store_pairs const Index::idx_t *ids = store_pairs
? nullptr ? nullptr
: ivf.invlists->get_ids(key); : ivf.invlists->get_ids(key);
...@@ -515,6 +467,9 @@ void search_knn_hamming_count(const IndexBinaryIVF& ivf, ...@@ -515,6 +467,9 @@ void search_knn_hamming_count(const IndexBinaryIVF& ivf,
long id = store_pairs ? (key << 32 | j) : ids[j]; long id = store_pairs ? (key << 32 | j) : ids[j];
csi.update_counter(yj, id); csi.update_counter(yj, id);
} }
if (ids)
ivf.invlists->release_ids (ids);
nscan += list_size; nscan += list_size;
if (max_codes && nscan >= max_codes) if (max_codes && nscan >= max_codes)
break; break;
......
...@@ -26,7 +26,9 @@ ...@@ -26,7 +26,9 @@
#include <unistd.h> #include <unistd.h>
#include <stdint.h> #include <stdint.h>
#ifdef __SSE__
#include <immintrin.h> #include <immintrin.h>
#endif
#include "utils.h" #include "utils.h"
#include "Heap.h" #include "Heap.h"
...@@ -1869,6 +1871,7 @@ struct DistanceXPQ4: Distance2Level { ...@@ -1869,6 +1871,7 @@ struct DistanceXPQ4: Distance2Level {
float operator () (storage_idx_t i) override float operator () (storage_idx_t i) override
{ {
#ifdef __SSE__
const uint8_t *code = storage.codes.data() + i * storage.code_size; const uint8_t *code = storage.codes.data() + i * storage.code_size;
long key = 0; long key = 0;
memcpy (&key, code, storage.code_size_1); memcpy (&key, code, storage.code_size_1);
...@@ -1892,6 +1895,9 @@ struct DistanceXPQ4: Distance2Level { ...@@ -1892,6 +1895,9 @@ struct DistanceXPQ4: Distance2Level {
accu = _mm_hadd_ps (accu, accu); accu = _mm_hadd_ps (accu, accu);
accu = _mm_hadd_ps (accu, accu); accu = _mm_hadd_ps (accu, accu);
return _mm_cvtss_f32 (accu); return _mm_cvtss_f32 (accu);
#else
FAISS_THROW_MSG("not implemented for non-x64 platforms");
#endif
} }
}; };
...@@ -1920,6 +1926,7 @@ struct Distance2xXPQ4: Distance2Level { ...@@ -1920,6 +1926,7 @@ struct Distance2xXPQ4: Distance2Level {
long key01 = 0; long key01 = 0;
memcpy (&key01, code, storage.code_size_1); memcpy (&key01, code, storage.code_size_1);
code += storage.code_size_1; code += storage.code_size_1;
#ifdef __SSE__
// walking pointers // walking pointers
const float *qa = q; const float *qa = q;
...@@ -1945,6 +1952,9 @@ struct Distance2xXPQ4: Distance2Level { ...@@ -1945,6 +1952,9 @@ struct Distance2xXPQ4: Distance2Level {
accu = _mm_hadd_ps (accu, accu); accu = _mm_hadd_ps (accu, accu);
accu = _mm_hadd_ps (accu, accu); accu = _mm_hadd_ps (accu, accu);
return _mm_cvtss_f32 (accu); return _mm_cvtss_f32 (accu);
#else
FAISS_THROW_MSG("not implemented for non-x64 platforms");
#endif
} }
}; };
...@@ -1957,6 +1967,7 @@ HNSW::DistanceComputer * IndexHNSW2Level::get_distance_computer () const ...@@ -1957,6 +1967,7 @@ HNSW::DistanceComputer * IndexHNSW2Level::get_distance_computer () const
dynamic_cast<Index2Layer*>(storage); dynamic_cast<Index2Layer*>(storage);
if (storage2l) { if (storage2l) {
#ifdef __SSE__
const MultiIndexQuantizer *mi = const MultiIndexQuantizer *mi =
dynamic_cast<MultiIndexQuantizer*> (storage2l->q1.quantizer); dynamic_cast<MultiIndexQuantizer*> (storage2l->q1.quantizer);
...@@ -1971,6 +1982,7 @@ HNSW::DistanceComputer * IndexHNSW2Level::get_distance_computer () const ...@@ -1971,6 +1982,7 @@ HNSW::DistanceComputer * IndexHNSW2Level::get_distance_computer () const
if (fl && storage2l->pq.dsub == 4) { if (fl && storage2l->pq.dsub == 4) {
return new DistanceXPQ4(*storage2l); return new DistanceXPQ4(*storage2l);
} }
#endif
} }
// IVFPQ and cases not handled above // IVFPQ and cases not handled above
......
...@@ -21,6 +21,9 @@ ...@@ -21,6 +21,9 @@
namespace faiss { namespace faiss {
using ScopedIds = InvertedLists::ScopedIds;
using ScopedCodes = InvertedLists::ScopedCodes;
/***************************************** /*****************************************
* Level1Quantizer implementation * Level1Quantizer implementation
******************************************/ ******************************************/
...@@ -98,120 +101,6 @@ void Level1Quantizer::train_q1 (size_t n, const float *x, bool verbose, MetricTy ...@@ -98,120 +101,6 @@ void Level1Quantizer::train_q1 (size_t n, const float *x, bool verbose, MetricTy
} }
} }
/*****************************************
* InvertedLists implementation
******************************************/
InvertedLists::InvertedLists (size_t nlist, size_t code_size):
nlist (nlist), code_size (code_size)
{
}
InvertedLists::~InvertedLists ()
{}
InvertedLists::idx_t InvertedLists::get_single_id (
size_t list_no, size_t offset) const
{
assert (offset < list_size (list_no));
return get_ids(list_no)[offset];
}
void InvertedLists::prefetch_lists (const long *, int) const
{}
const uint8_t * InvertedLists::get_single_code (
size_t list_no, size_t offset) const
{
assert (offset < list_size (list_no));
return get_codes(list_no) + offset * code_size;
}
size_t InvertedLists::add_entry (size_t list_no, idx_t theid,
const uint8_t *code)
{
return add_entries (list_no, 1, &theid, code);
}
void InvertedLists::update_entry (size_t list_no, size_t offset,
idx_t id, const uint8_t *code)
{
update_entries (list_no, offset, 1, &id, code);
}
void InvertedLists::reset () {
for (size_t i = 0; i < nlist; i++) {
resize (i, 0);
}
}
/*****************************************
* ArrayInvertedLists implementation
******************************************/
ArrayInvertedLists::ArrayInvertedLists (size_t nlist, size_t code_size):
InvertedLists (nlist, code_size)
{
ids.resize (nlist);
codes.resize (nlist);
}
size_t ArrayInvertedLists::add_entries (
size_t list_no, size_t n_entry,
const idx_t* ids_in, const uint8_t *code)
{
if (n_entry == 0) return 0;
assert (list_no < nlist);
size_t o = ids [list_no].size();
ids [list_no].resize (o + n_entry);
memcpy (&ids[list_no][o], ids_in, sizeof (ids_in[0]) * n_entry);
codes [list_no].resize ((o + n_entry) * code_size);
memcpy (&codes[list_no][o * code_size], code, code_size * n_entry);
return o;
}
size_t ArrayInvertedLists::list_size(size_t list_no) const
{
assert (list_no < nlist);
return ids[list_no].size();
}
const uint8_t * ArrayInvertedLists::get_codes (size_t list_no) const
{
assert (list_no < nlist);
return codes[list_no].data();
}
const InvertedLists::idx_t * ArrayInvertedLists::get_ids (size_t list_no) const
{
assert (list_no < nlist);
return ids[list_no].data();
}
void ArrayInvertedLists::resize (size_t list_no, size_t new_size)
{
ids[list_no].resize (new_size);
codes[list_no].resize (new_size * code_size);
}
void ArrayInvertedLists::update_entries (
size_t list_no, size_t offset, size_t n_entry,
const idx_t *ids_in, const uint8_t *codes_in)
{
assert (list_no < nlist);
assert (n_entry + offset <= ids[list_no].size());
memcpy (&ids[list_no][offset], ids_in, sizeof(ids_in[0]) * n_entry);
memcpy (&codes[list_no][offset * code_size], codes_in, code_size * n_entry);
}
ArrayInvertedLists::~ArrayInvertedLists ()
{}
/***************************************** /*****************************************
...@@ -262,7 +151,7 @@ void IndexIVF::make_direct_map (bool new_maintain_direct_map) ...@@ -262,7 +151,7 @@ void IndexIVF::make_direct_map (bool new_maintain_direct_map)
direct_map.resize (ntotal, -1); direct_map.resize (ntotal, -1);
for (size_t key = 0; key < nlist; key++) { for (size_t key = 0; key < nlist; key++) {
size_t list_size = invlists->list_size (key); size_t list_size = invlists->list_size (key);
const idx_t *idlist = invlists->get_ids (key); ScopedIds idlist (invlists, key);
for (long ofs = 0; ofs < list_size; ofs++) { for (long ofs = 0; ofs < list_size; ofs++) {
FAISS_THROW_IF_NOT_MSG ( FAISS_THROW_IF_NOT_MSG (
...@@ -312,12 +201,12 @@ void IndexIVF::reconstruct_n (idx_t i0, idx_t ni, float* recons) const ...@@ -312,12 +201,12 @@ void IndexIVF::reconstruct_n (idx_t i0, idx_t ni, float* recons) const
for (long list_no = 0; list_no < nlist; list_no++) { for (long list_no = 0; list_no < nlist; list_no++) {
size_t list_size = invlists->list_size (list_no); size_t list_size = invlists->list_size (list_no);
const Index::idx_t * idlist = invlists->get_ids (list_no); ScopedIds idlist (invlists, list_no);
for (long offset = 0; offset < list_size; offset++) { for (long offset = 0; offset < list_size; offset++) {
long id = idlist[offset]; long id = idlist[offset];
if (!(id >= i0 && id < i0 + ni)) { if (!(id >= i0 && id < i0 + ni)) {
continue; continue;
} }
float* reconstructed = recons + (id - i0) * d; float* reconstructed = recons + (id - i0) * d;
...@@ -390,14 +279,14 @@ long IndexIVF::remove_ids (const IDSelector & sel) ...@@ -390,14 +279,14 @@ long IndexIVF::remove_ids (const IDSelector & sel)
#pragma omp parallel for #pragma omp parallel for
for (long i = 0; i < nlist; i++) { for (long i = 0; i < nlist; i++) {
long l0 = invlists->list_size (i), l = l0, j = 0; long l0 = invlists->list_size (i), l = l0, j = 0;
const idx_t *idsi = invlists->get_ids (i); ScopedIds idsi (invlists, i);
while (j < l) { while (j < l) {
if (sel.is_member (idsi[j])) { if (sel.is_member (idsi[j])) {
l--; l--;
invlists->update_entry ( invlists->update_entry (
i, j, i, j,
invlists->get_single_id (i, l), invlists->get_single_id (i, l),
invlists->get_single_code (i, l)); ScopedCodes (invlists, i, l).get());
} else { } else {
j++; j++;
} }
...@@ -472,38 +361,26 @@ void IndexIVF::print_stats () const ...@@ -472,38 +361,26 @@ void IndexIVF::print_stats () const
} }
void IndexIVF::merge_from (IndexIVF &other, idx_t add_id)
void IndexIVF::check_compatible_for_merge (const IndexIVF &other) const
{ {
// minimal sanity checks // minimal sanity checks
FAISS_THROW_IF_NOT (other.d == d); FAISS_THROW_IF_NOT (other.d == d);
FAISS_THROW_IF_NOT (other.nlist == nlist); FAISS_THROW_IF_NOT (other.nlist == nlist);
FAISS_THROW_IF_NOT (other.code_size == code_size); FAISS_THROW_IF_NOT (other.code_size == code_size);
FAISS_THROW_IF_NOT_MSG ((!maintain_direct_map &&
!other.maintain_direct_map),
"direct map copy not implemented");
FAISS_THROW_IF_NOT_MSG (typeid (*this) == typeid (other), FAISS_THROW_IF_NOT_MSG (typeid (*this) == typeid (other),
"can only merge indexes of the same type"); "can only merge indexes of the same type");
}
InvertedLists *oivf = other.invlists;
#pragma omp parallel for
for (long i = 0; i < nlist; i++) {
size_t list_size = oivf->list_size (i);
const idx_t * ids = oivf->get_ids (i);
if (add_id == 0) {
invlists->add_entries (i, list_size, ids,
oivf->get_codes (i));
} else {
std::vector <idx_t> new_ids (list_size);
for (size_t j = 0; j < list_size; j++) { void IndexIVF::merge_from (IndexIVF &other, idx_t add_id)
new_ids [j] = ids[j] + add_id; {
} check_compatible_for_merge (other);
FAISS_THROW_IF_NOT_MSG ((!maintain_direct_map &&
!other.maintain_direct_map),
"direct map copy not implemented");
invlists->add_entries (i, list_size, new_ids.data(), invlists->merge_from (other.invlists, add_id);
oivf->get_codes (i));
}
oivf->resize (i, 0);
}
ntotal += other.ntotal; ntotal += other.ntotal;
other.ntotal = 0; other.ntotal = 0;
...@@ -542,7 +419,7 @@ void IndexIVF::copy_subset_to (IndexIVF & other, int subset_type, ...@@ -542,7 +419,7 @@ void IndexIVF::copy_subset_to (IndexIVF & other, int subset_type,
for (long list_no = 0; list_no < nlist; list_no++) { for (long list_no = 0; list_no < nlist; list_no++) {
size_t n = invlists->list_size (list_no); size_t n = invlists->list_size (list_no);
const idx_t *ids_in = invlists->get_ids (list_no); ScopedIds ids_in (invlists, list_no);
if (subset_type == 0) { if (subset_type == 0) {
for (long i = 0; i < n; i++) { for (long i = 0; i < n; i++) {
...@@ -550,7 +427,7 @@ void IndexIVF::copy_subset_to (IndexIVF & other, int subset_type, ...@@ -550,7 +427,7 @@ void IndexIVF::copy_subset_to (IndexIVF & other, int subset_type,
if (a1 <= id && id < a2) { if (a1 <= id && id < a2) {
oivf->add_entry (list_no, oivf->add_entry (list_no,
invlists->get_single_id (list_no, i), invlists->get_single_id (list_no, i),
invlists->get_single_code (list_no, i)); ScopedCodes (invlists, list_no, i).get());
other.ntotal++; other.ntotal++;
} }
} }
...@@ -560,7 +437,7 @@ void IndexIVF::copy_subset_to (IndexIVF & other, int subset_type, ...@@ -560,7 +437,7 @@ void IndexIVF::copy_subset_to (IndexIVF & other, int subset_type,
if (id % a1 == a2) { if (id % a1 == a2) {
oivf->add_entry (list_no, oivf->add_entry (list_no,
invlists->get_single_id (list_no, i), invlists->get_single_id (list_no, i),
invlists->get_single_code (list_no, i)); ScopedCodes (invlists, list_no, i).get());
other.ntotal++; other.ntotal++;
} }
} }
...@@ -575,7 +452,7 @@ void IndexIVF::copy_subset_to (IndexIVF & other, int subset_type, ...@@ -575,7 +452,7 @@ void IndexIVF::copy_subset_to (IndexIVF & other, int subset_type,
for (long i = i1; i < i2; i++) { for (long i = i1; i < i2; i++) {
oivf->add_entry (list_no, oivf->add_entry (list_no,
invlists->get_single_id (list_no, i), invlists->get_single_id (list_no, i),
invlists->get_single_code (list_no, i)); ScopedCodes (invlists, list_no, i).get());
} }
other.ntotal += i2 - i1; other.ntotal += i2 - i1;
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include "Index.h" #include "Index.h"
#include "InvertedLists.h"
#include "Clustering.h" #include "Clustering.h"
#include "Heap.h" #include "Heap.h"
...@@ -56,91 +57,6 @@ struct Level1Quantizer { ...@@ -56,91 +57,6 @@ struct Level1Quantizer {
}; };
/** Table of inverted lists
* multithreading rules:
* - concurrent read accesses are allowed
* - concurrent update accesses are allowed
* - for resize and add_entries, only concurrent access to different lists
* are allowed
*/
struct InvertedLists {
typedef Index::idx_t idx_t;
size_t nlist; ///< number of possible key values
size_t code_size; ///< code size per vector in bytes
InvertedLists (size_t nlist, size_t code_size);
/*************************
* Read only functions */
/// get the size of a list
virtual size_t list_size(size_t list_no) const = 0;
/// @return codes size list_size * code_size
virtual const uint8_t * get_codes (size_t list_no) const = 0;
/// @return ids size list_size
virtual const idx_t * get_ids (size_t list_no) const = 0;
/// @return a single id in an inverted list
virtual idx_t get_single_id (size_t list_no, size_t offset) const;
/// @return a single code in an inverted list
virtual const uint8_t * get_single_code (
size_t list_no, size_t offset) const;
/// prepare the following lists (default does nothing)
/// a list can be -1 hence the signed long
virtual void prefetch_lists (const long *list_nos, int nlist) const;
/*************************
* writing functions */
/// add one entry to an inverted list
virtual size_t add_entry (size_t list_no, idx_t theid,
const uint8_t *code);
virtual size_t add_entries (
size_t list_no, size_t n_entry,
const idx_t* ids, const uint8_t *code) = 0;
virtual void update_entry (size_t list_no, size_t offset,
idx_t id, const uint8_t *code);
virtual void update_entries (size_t list_no, size_t offset, size_t n_entry,
const idx_t *ids, const uint8_t *code) = 0;
virtual void resize (size_t list_no, size_t new_size) = 0;
virtual void reset ();
virtual ~InvertedLists ();
};
struct ArrayInvertedLists: InvertedLists {
std::vector < std::vector<uint8_t> > codes; // binary codes, size nlist
std::vector < std::vector<idx_t> > ids; ///< Inverted lists for indexes
ArrayInvertedLists (size_t nlist, size_t code_size);
size_t list_size(size_t list_no) const override;
const uint8_t * get_codes (size_t list_no) const override;
const idx_t * get_ids (size_t list_no) const override;
size_t add_entries (
size_t list_no, size_t n_entry,
const idx_t* ids, const uint8_t *code) override;
void update_entries (size_t list_no, size_t offset, size_t n_entry,
const idx_t *ids, const uint8_t *code) override;
void resize (size_t list_no, size_t new_size) override;
virtual ~ArrayInvertedLists ();
};
struct IVFSearchParameters { struct IVFSearchParameters {
size_t nprobe; ///< number of probes at query time size_t nprobe; ///< number of probes at query time
...@@ -273,6 +189,11 @@ struct IndexIVF: Index, Level1Quantizer { ...@@ -273,6 +189,11 @@ struct IndexIVF: Index, Level1Quantizer {
long remove_ids(const IDSelector& sel) override; long remove_ids(const IDSelector& sel) override;
/** check that the two indexes are compatible (ie, they are
* trained in the same way and have the same
* parameters). Otherwise throw. */
void check_compatible_for_merge (const IndexIVF &other) const;
/** moves the entries from another dataset to self. On output, /** moves the entries from another dataset to self. On output,
* other is empty. add_id is added to all moved ids (for * other is empty. add_id is added to all moved ids (for
* sequential ids, this would be this->ntotal */ * sequential ids, this would be this->ntotal */
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include "IndexFlat.h" #include "IndexFlat.h"
#include "AuxIndexStructures.h" #include "AuxIndexStructures.h"
namespace faiss { namespace faiss {
...@@ -33,8 +34,6 @@ IndexIVFFlat::IndexIVFFlat (Index * quantizer, ...@@ -33,8 +34,6 @@ IndexIVFFlat::IndexIVFFlat (Index * quantizer,
} }
void IndexIVFFlat::add_with_ids (idx_t n, const float * x, const long *xids) void IndexIVFFlat::add_with_ids (idx_t n, const float * x, const long *xids)
{ {
add_core (n, x, xids, nullptr); add_core (n, x, xids, nullptr);
...@@ -122,8 +121,8 @@ void search_knn_for_ivf (const IndexIVFFlat & ivf, ...@@ -122,8 +121,8 @@ void search_knn_for_ivf (const IndexIVFFlat & ivf,
nlistv++; nlistv++;
size_t list_size = ivf.invlists->list_size(key); size_t list_size = ivf.invlists->list_size(key);
const float * list_vecs = InvertedLists::ScopedCodes scodes (ivf.invlists, key);
(const float*)ivf.invlists->get_codes (key); const float * list_vecs = (const float*)scodes.get();
const Index::idx_t * ids = store_pairs ? nullptr : const Index::idx_t * ids = store_pairs ? nullptr :
ivf.invlists->get_ids (key); ivf.invlists->get_ids (key);
...@@ -137,6 +136,10 @@ void search_knn_for_ivf (const IndexIVFFlat & ivf, ...@@ -137,6 +136,10 @@ void search_knn_for_ivf (const IndexIVFFlat & ivf,
heap_push<C> (k, simi, idxi, dis, id); heap_push<C> (k, simi, idxi, dis, id);
} }
} }
if (ids) {
ivf.invlists->release_ids (ids);
}
nscan += list_size; nscan += list_size;
if (max_codes && nscan >= max_codes) if (max_codes && nscan >= max_codes)
break; break;
...@@ -213,9 +216,9 @@ void IndexIVFFlat::range_search (idx_t nx, const float *x, float radius, ...@@ -213,9 +216,9 @@ void IndexIVFFlat::range_search (idx_t nx, const float *x, float radius,
} }
const size_t list_size = invlists->list_size(key); const size_t list_size = invlists->list_size(key);
const float * list_vecs = InvertedLists::ScopedCodes scodes (invlists, key);
(const float*)invlists->get_codes (key); const float * list_vecs = (const float*)scodes.get();
const Index::idx_t * ids = invlists->get_ids (key); InvertedLists::ScopedIds ids (invlists, key);
for (size_t j = 0; j < list_size; j++) { for (size_t j = 0; j < list_size; j++) {
const float * yj = list_vecs + d * j; const float * yj = list_vecs + d * j;
...@@ -355,11 +358,12 @@ void IndexIVFFlatDedup::add_with_ids( ...@@ -355,11 +358,12 @@ void IndexIVFFlatDedup::add_with_ids(
const float *xi = x + i * d; const float *xi = x + i * d;
// search if there is already an entry with that id // search if there is already an entry with that id
const uint8_t * codes = invlists->get_codes (list_no); InvertedLists::ScopedCodes codes (invlists, list_no);
long n = invlists->list_size (list_no); long n = invlists->list_size (list_no);
long offset = -1; long offset = -1;
for (long o = 0; o < n; o++) { for (long o = 0; o < n; o++) {
if (!memcmp (codes + o * code_size, if (!memcmp (codes.get() + o * code_size,
xi, code_size)) { xi, code_size)) {
offset = o; offset = o;
break; break;
...@@ -479,7 +483,7 @@ long IndexIVFFlatDedup::remove_ids(const IDSelector& sel) ...@@ -479,7 +483,7 @@ long IndexIVFFlatDedup::remove_ids(const IDSelector& sel)
#pragma omp parallel for #pragma omp parallel for
for (long i = 0; i < nlist; i++) { for (long i = 0; i < nlist; i++) {
long l0 = invlists->list_size (i), l = l0, j = 0; long l0 = invlists->list_size (i), l = l0, j = 0;
const idx_t *idsi = invlists->get_ids (i); InvertedLists::ScopedIds idsi (invlists, i);
while (j < l) { while (j < l) {
if (sel.is_member (idsi[j])) { if (sel.is_member (idsi[j])) {
if (replace.count(idsi[j]) == 0) { if (replace.count(idsi[j]) == 0) {
...@@ -487,12 +491,12 @@ long IndexIVFFlatDedup::remove_ids(const IDSelector& sel) ...@@ -487,12 +491,12 @@ long IndexIVFFlatDedup::remove_ids(const IDSelector& sel)
invlists->update_entry ( invlists->update_entry (
i, j, i, j,
invlists->get_single_id (i, l), invlists->get_single_id (i, l),
invlists->get_single_code (i, l)); InvertedLists::ScopedCodes (invlists, i, l).get());
} else { } else {
invlists->update_entry ( invlists->update_entry (
i, j, i, j,
replace[idsi[j]], replace[idsi[j]],
invlists->get_single_code (i, j)); InvertedLists::ScopedCodes (invlists, i, j).get());
j++; j++;
} }
} else { } else {
......
...@@ -421,11 +421,15 @@ void IndexIVFPQ::precompute_table () ...@@ -421,11 +421,15 @@ void IndexIVFPQ::precompute_table ()
namespace { namespace {
static uint64_t get_cycles () { static uint64_t get_cycles () {
#ifdef __x86_64__
uint32_t high, low; uint32_t high, low;
asm volatile("rdtsc \n\t" asm volatile("rdtsc \n\t"
: "=a" (low), : "=a" (low),
"=d" (high)); "=d" (high));
return ((uint64_t)high << 32) | (low); return ((uint64_t)high << 32) | (low);
#else
return 0;
#endif
} }
#define TIC t0 = get_cycles() #define TIC t0 = get_cycles()
...@@ -987,8 +991,9 @@ void IndexIVFPQ::search_preassigned (idx_t nx, const float *qx, idx_t k, ...@@ -987,8 +991,9 @@ void IndexIVFPQ::search_preassigned (idx_t nx, const float *qx, idx_t k,
if (list_size == 0) continue; if (list_size == 0) continue;
qt.init_list (key, coarse_dis_i[ik], qt.init_list (key, coarse_dis_i[ik],
list_size, invlists->get_ids (key), list_size,
invlists->get_codes (key)); InvertedLists::ScopedIds (invlists, key).get(),
InvertedLists::ScopedCodes (invlists, key).get());
TIC; TIC;
if (polysemous_ht > 0) { if (polysemous_ht > 0) {
...@@ -1063,10 +1068,11 @@ size_t IndexIVFPQ::find_duplicates (idx_t *dup_ids, size_t *lims) const ...@@ -1063,10 +1068,11 @@ size_t IndexIVFPQ::find_duplicates (idx_t *dup_ids, size_t *lims) const
size_t n = invlists->list_size (list_no); size_t n = invlists->list_size (list_no);
std::vector<int> ord (n); std::vector<int> ord (n);
for (int i = 0; i < n; i++) ord[i] = i; for (int i = 0; i < n; i++) ord[i] = i;
CodeCmp cs = { invlists->get_codes (list_no), code_size }; InvertedLists::ScopedCodes codes (invlists, list_no);
CodeCmp cs = { codes.get(), code_size };
std::sort (ord.begin(), ord.end(), cs); std::sort (ord.begin(), ord.end(), cs);
const idx_t *list_ids = invlists->get_ids (list_no); InvertedLists::ScopedIds list_ids (invlists, list_no);
int prev = -1; // all elements from prev to i-1 are equal int prev = -1; // all elements from prev to i-1 are equal
for (int i = 0; i < n; i++) { for (int i = 0; i < n; i++) {
if (prev >= 0 && cs.cmp (ord [prev], ord [i]) == 0) { if (prev >= 0 && cs.cmp (ord [prev], ord [i]) == 0) {
......
...@@ -15,7 +15,9 @@ ...@@ -15,7 +15,9 @@
#include <omp.h> #include <omp.h>
#ifdef __SSE__
#include <immintrin.h> #include <immintrin.h>
#endif
#include "utils.h" #include "utils.h"
...@@ -143,11 +145,13 @@ float decode_fp16 (uint16_t x) { ...@@ -143,11 +145,13 @@ float decode_fp16 (uint16_t x) {
// https://github.com/ispc/ispc/blob/master/stdlib.ispc // https://github.com/ispc/ispc/blob/master/stdlib.ispc
float floatbits (uint32_t x) { float floatbits (uint32_t x) {
return *(float*)&x; void *xptr = &x;
return *(float*)xptr;
} }
uint32_t intbits (float f) { uint32_t intbits (float f) {
return *(uint32_t*)&f; void *fptr = &f;
return *(uint32_t*)fptr;
} }
...@@ -1179,7 +1183,8 @@ void search_with_probes_ip (const IndexIVFScalarQuantizer & index, ...@@ -1179,7 +1183,8 @@ void search_with_probes_ip (const IndexIVFScalarQuantizer & index,
float accu0 = cent_dis[i]; float accu0 = cent_dis[i];
const size_t list_size = index.invlists->list_size (list_no); const size_t list_size = index.invlists->list_size (list_no);
const uint8_t * codes = index.invlists->get_codes (list_no); InvertedLists::ScopedCodes scodes (index.invlists, list_no);
const uint8_t *codes = scodes.get();
const idx_t * ids = const idx_t * ids =
store_pairs ? nullptr : index.invlists->get_ids (list_no); store_pairs ? nullptr : index.invlists->get_ids (list_no);
...@@ -1196,6 +1201,9 @@ void search_with_probes_ip (const IndexIVFScalarQuantizer & index, ...@@ -1196,6 +1201,9 @@ void search_with_probes_ip (const IndexIVFScalarQuantizer & index,
} }
codes += code_size; codes += code_size;
} }
if (ids) {
index.invlists->release_ids (ids);
}
nscan += list_size; nscan += list_size;
if (max_codes && nscan > max_codes) if (max_codes && nscan > max_codes)
break; break;
...@@ -1225,7 +1233,8 @@ void search_with_probes_L2 (const IndexIVFScalarQuantizer & index, ...@@ -1225,7 +1233,8 @@ void search_with_probes_L2 (const IndexIVFScalarQuantizer & index,
if (list_no < 0) break; if (list_no < 0) break;
const size_t list_size = index.invlists->list_size (list_no); const size_t list_size = index.invlists->list_size (list_no);
const uint8_t * codes = index.invlists->get_codes (list_no); InvertedLists::ScopedCodes scodes (index.invlists, list_no);
const uint8_t *codes = scodes.get();
const idx_t * ids = const idx_t * ids =
store_pairs ? nullptr : index.invlists->get_ids (list_no); store_pairs ? nullptr : index.invlists->get_ids (list_no);
...@@ -1243,6 +1252,9 @@ void search_with_probes_L2 (const IndexIVFScalarQuantizer & index, ...@@ -1243,6 +1252,9 @@ void search_with_probes_L2 (const IndexIVFScalarQuantizer & index,
} }
codes += code_size; codes += code_size;
} }
if (ids) {
index.invlists->release_ids (ids);
}
nscan += list_size; nscan += list_size;
if (max_codes && nscan > max_codes) if (max_codes && nscan > max_codes)
break; break;
......
/**
* Copyright (c) 2015-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD+Patents license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include "InvertedLists.h"
#include <cstdio>
#include "utils.h"
#include "FaissAssert.h"
namespace faiss {
using ScopedIds = InvertedLists::ScopedIds;
using ScopedCodes = InvertedLists::ScopedCodes;
/*****************************************
* InvertedLists implementation
******************************************/
InvertedLists::InvertedLists (size_t nlist, size_t code_size):
nlist (nlist), code_size (code_size)
{
}
InvertedLists::~InvertedLists ()
{}
InvertedLists::idx_t InvertedLists::get_single_id (
size_t list_no, size_t offset) const
{
assert (offset < list_size (list_no));
return get_ids(list_no)[offset];
}
void InvertedLists::release_codes (const uint8_t *) const
{}
void InvertedLists::release_ids (const idx_t *) const
{}
void InvertedLists::prefetch_lists (const long *, int) const
{}
const uint8_t * InvertedLists::get_single_code (
size_t list_no, size_t offset) const
{
assert (offset < list_size (list_no));
return get_codes(list_no) + offset * code_size;
}
size_t InvertedLists::add_entry (size_t list_no, idx_t theid,
const uint8_t *code)
{
return add_entries (list_no, 1, &theid, code);
}
void InvertedLists::update_entry (size_t list_no, size_t offset,
idx_t id, const uint8_t *code)
{
update_entries (list_no, offset, 1, &id, code);
}
void InvertedLists::reset () {
for (size_t i = 0; i < nlist; i++) {
resize (i, 0);
}
}
void InvertedLists::merge_from (InvertedLists *oivf, size_t add_id) {
#pragma omp parallel for
for (long i = 0; i < nlist; i++) {
size_t list_size = oivf->list_size (i);
ScopedIds ids (oivf, i);
if (add_id == 0) {
add_entries (i, list_size, ids.get (),
ScopedCodes (oivf, i).get());
} else {
std::vector <idx_t> new_ids (list_size);
for (size_t j = 0; j < list_size; j++) {
new_ids [j] = ids[j] + add_id;
}
add_entries (i, list_size, new_ids.data(),
ScopedCodes (oivf, i).get());
}
oivf->resize (i, 0);
}
}
/*****************************************
* ArrayInvertedLists implementation
******************************************/
ArrayInvertedLists::ArrayInvertedLists (size_t nlist, size_t code_size):
InvertedLists (nlist, code_size)
{
ids.resize (nlist);
codes.resize (nlist);
}
size_t ArrayInvertedLists::add_entries (
size_t list_no, size_t n_entry,
const idx_t* ids_in, const uint8_t *code)
{
if (n_entry == 0) return 0;
assert (list_no < nlist);
size_t o = ids [list_no].size();
ids [list_no].resize (o + n_entry);
memcpy (&ids[list_no][o], ids_in, sizeof (ids_in[0]) * n_entry);
codes [list_no].resize ((o + n_entry) * code_size);
memcpy (&codes[list_no][o * code_size], code, code_size * n_entry);
return o;
}
size_t ArrayInvertedLists::list_size(size_t list_no) const
{
assert (list_no < nlist);
return ids[list_no].size();
}
const uint8_t * ArrayInvertedLists::get_codes (size_t list_no) const
{
assert (list_no < nlist);
return codes[list_no].data();
}
const InvertedLists::idx_t * ArrayInvertedLists::get_ids (size_t list_no) const
{
assert (list_no < nlist);
return ids[list_no].data();
}
void ArrayInvertedLists::resize (size_t list_no, size_t new_size)
{
ids[list_no].resize (new_size);
codes[list_no].resize (new_size * code_size);
}
void ArrayInvertedLists::update_entries (
size_t list_no, size_t offset, size_t n_entry,
const idx_t *ids_in, const uint8_t *codes_in)
{
assert (list_no < nlist);
assert (n_entry + offset <= ids[list_no].size());
memcpy (&ids[list_no][offset], ids_in, sizeof(ids_in[0]) * n_entry);
memcpy (&codes[list_no][offset * code_size], codes_in, code_size * n_entry);
}
ArrayInvertedLists::~ArrayInvertedLists ()
{}
/*****************************************
* ConcatenatedInvertedLists implementation
******************************************/
ConcatenatedInvertedLists::ConcatenatedInvertedLists (
int nil, const InvertedLists **ils_in):
InvertedLists (nil > 0 ? ils_in[0]->nlist : 0,
nil > 0 ? ils_in[0]->code_size : 0)
{
FAISS_THROW_IF_NOT (nil > 0);
for (int i = 0; i < nil; i++) {
ils.push_back (ils_in[i]);
FAISS_THROW_IF_NOT (ils_in[i]->code_size == code_size &&
ils_in[i]->nlist == nlist);
}
}
size_t ConcatenatedInvertedLists::list_size(size_t list_no) const
{
size_t sz = 0;
for (int i = 0; i < ils.size(); i++) {
const InvertedLists *il = ils[i];
sz += il->list_size (list_no);
}
return sz;
}
const uint8_t * ConcatenatedInvertedLists::get_codes (size_t list_no) const
{
uint8_t *codes = new uint8_t [code_size * list_size(list_no)], *c = codes;
for (int i = 0; i < ils.size(); i++) {
const InvertedLists *il = ils[i];
size_t sz = il->list_size(list_no) * code_size;
if (sz > 0) {
memcpy (c, ScopedCodes (il, list_no).get(), sz);
c += sz;
}
}
return codes;
}
const uint8_t * ConcatenatedInvertedLists::get_single_code (
size_t list_no, size_t offset) const
{
for (int i = 0; i < ils.size(); i++) {
const InvertedLists *il = ils[i];
size_t sz = il->list_size (list_no);
if (offset < sz) {
// here we have to copy the code, otherwise it will crash at dealloc
uint8_t * code = new uint8_t [code_size];
memcpy (code, ScopedCodes (il, list_no, offset).get(), code_size);
return code;
}
offset -= sz;
}
FAISS_THROW_FMT ("offset %ld unknown", offset);
}
void ConcatenatedInvertedLists::release_codes (const uint8_t *codes) const {
delete [] codes;
}
const Index::idx_t * ConcatenatedInvertedLists::get_ids (size_t list_no) const
{
idx_t *ids = new idx_t [list_size(list_no)], *c = ids;
for (int i = 0; i < ils.size(); i++) {
const InvertedLists *il = ils[i];
size_t sz = il->list_size(list_no);
if (sz > 0) {
memcpy (c, ScopedIds (il, list_no).get(), sz * sizeof(idx_t));
c += sz;
}
}
return ids;
}
Index::idx_t ConcatenatedInvertedLists::get_single_id (
size_t list_no, size_t offset) const
{
for (int i = 0; i < ils.size(); i++) {
const InvertedLists *il = ils[i];
size_t sz = il->list_size (list_no);
if (offset < sz) {
return il->get_single_id (list_no, offset);
}
offset -= sz;
}
FAISS_THROW_FMT ("offset %ld unknown", offset);
}
void ConcatenatedInvertedLists::release_ids (const idx_t *ids) const {
delete [] ids;
}
size_t ConcatenatedInvertedLists::add_entries (
size_t , size_t ,
const idx_t* , const uint8_t *)
{
FAISS_THROW_MSG ("not implemented");
}
void ConcatenatedInvertedLists::update_entries (size_t, size_t , size_t ,
const idx_t *, const uint8_t *)
{
FAISS_THROW_MSG ("not implemented");
}
void ConcatenatedInvertedLists::resize (size_t , size_t )
{
FAISS_THROW_MSG ("not implemented");
}
} // namespace faiss
/**
* Copyright (c) 2015-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD+Patents license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_INVERTEDLISTS_IVF_H
#define FAISS_INVERTEDLISTS_IVF_H
/**
* Definition of inverted lists + a few common classes that implement
* the interface.
*/
#include <vector>
#include "Index.h"
namespace faiss {
/** Table of inverted lists
* multithreading rules:
* - concurrent read accesses are allowed
* - concurrent update accesses are allowed
* - for resize and add_entries, only concurrent access to different lists
* are allowed
*/
struct InvertedLists {
typedef Index::idx_t idx_t;
size_t nlist; ///< number of possible key values
size_t code_size; ///< code size per vector in bytes
InvertedLists (size_t nlist, size_t code_size);
/*************************
* Read only functions */
/// get the size of a list
virtual size_t list_size(size_t list_no) const = 0;
/** get the codes for an inverted list
* must be released by release_codes
*
* @return codes size list_size * code_size
*/
virtual const uint8_t * get_codes (size_t list_no) const = 0;
/** get the ids for an inverted list
* must be released by release_ids
*
* @return ids size list_size
*/
virtual const idx_t * get_ids (size_t list_no) const = 0;
/// release codes returned by get_codes (default implementation is nop
virtual void release_codes (const uint8_t *codes) const;
/// release ids returned by get_ids
virtual void release_ids (const idx_t *ids) const;
/// @return a single id in an inverted list
virtual idx_t get_single_id (size_t list_no, size_t offset) const;
/// @return a single code in an inverted list
/// (should be deallocated with release_codes)
virtual const uint8_t * get_single_code (
size_t list_no, size_t offset) const;
/// prepare the following lists (default does nothing)
/// a list can be -1 hence the signed long
virtual void prefetch_lists (const long *list_nos, int nlist) const;
/*************************
* writing functions */
/// add one entry to an inverted list
virtual size_t add_entry (size_t list_no, idx_t theid,
const uint8_t *code);
virtual size_t add_entries (
size_t list_no, size_t n_entry,
const idx_t* ids, const uint8_t *code) = 0;
virtual void update_entry (size_t list_no, size_t offset,
idx_t id, const uint8_t *code);
virtual void update_entries (size_t list_no, size_t offset, size_t n_entry,
const idx_t *ids, const uint8_t *code) = 0;
virtual void resize (size_t list_no, size_t new_size) = 0;
virtual void reset ();
/// move all entries from oivf (empty on output)
void merge_from (InvertedLists *oivf, size_t add_id);
virtual ~InvertedLists ();
/**************************************
* Scoped inverted lists (for automatic deallocation)
*
* instead of writing:
*
* uint8_t * codes = invlists->get_codes (10);
* ... use codes
* invlists->release_codes(codes)
*
* write:
*
* ScopedCodes codes (invlists, 10);
* ... use codes.get()
* // release called automatically when codes goes out of scope
*
* the following function call also works:
*
* foo (123, ScopedCodes (invlists, 10).get(), 456);
*
*/
struct ScopedIds {
const InvertedLists *il;
const idx_t *ids;
ScopedIds (const InvertedLists *il, size_t list_no):
il (il), ids (il->get_ids (list_no))
{}
const idx_t *get() {return ids; }
idx_t operator [] (size_t i) const {
return ids[i];
}
~ScopedIds () {
il->release_ids (ids);
}
};
struct ScopedCodes {
const InvertedLists *il;
const uint8_t *codes;
ScopedCodes (const InvertedLists *il, size_t list_no):
il (il), codes (il->get_codes (list_no))
{}
ScopedCodes (const InvertedLists *il, size_t list_no, size_t offset):
il (il), codes (il->get_single_code (list_no, offset))
{}
const uint8_t *get() {return codes; }
~ScopedCodes () {
il->release_codes (codes);
}
};
};
/// simple (default) implementation as an array of inverted lists
struct ArrayInvertedLists: InvertedLists {
std::vector < std::vector<uint8_t> > codes; // binary codes, size nlist
std::vector < std::vector<idx_t> > ids; ///< Inverted lists for indexes
ArrayInvertedLists (size_t nlist, size_t code_size);
size_t list_size(size_t list_no) const override;
const uint8_t * get_codes (size_t list_no) const override;
const idx_t * get_ids (size_t list_no) const override;
size_t add_entries (
size_t list_no, size_t n_entry,
const idx_t* ids, const uint8_t *code) override;
void update_entries (size_t list_no, size_t offset, size_t n_entry,
const idx_t *ids, const uint8_t *code) override;
void resize (size_t list_no, size_t new_size) override;
virtual ~ArrayInvertedLists ();
};
/// inverted lists built as the concatenation of a set of invlists
/// (read-only)
struct ConcatenatedInvertedLists: InvertedLists {
std::vector<const InvertedLists *>ils;
/// build InvertedLists by concatenating nil of them
ConcatenatedInvertedLists (int nil, const InvertedLists **ils);
size_t list_size(size_t list_no) const override;
const uint8_t * get_codes (size_t list_no) const override;
const idx_t * get_ids (size_t list_no) const override;
void release_codes (const uint8_t *codes) const override;
void release_ids (const idx_t *ids) const override;
idx_t get_single_id (size_t list_no, size_t offset) const override;
const uint8_t * get_single_code (
size_t list_no, size_t offset) const override;
size_t add_entries (
size_t list_no, size_t n_entry,
const idx_t* ids, const uint8_t *code) override;
void update_entries (size_t list_no, size_t offset, size_t n_entry,
const idx_t *ids, const uint8_t *code) override;
void resize (size_t list_no, size_t new_size) override;
};
} // namespace faiss
#endif
...@@ -134,7 +134,7 @@ struct PCAMatrix: LinearTransform { ...@@ -134,7 +134,7 @@ struct PCAMatrix: LinearTransform {
* eigenvalues^eigen_power * eigenvalues^eigen_power
* *
* =0: no whitening * =0: no whitening
* =-2: full whitening * =-0.5: full whitening
*/ */
float eigen_power; float eigen_power;
......
...@@ -551,9 +551,15 @@ def compute_populated_index(preproc): ...@@ -551,9 +551,15 @@ def compute_populated_index(preproc):
print "Aggregate indexes to CPU" print "Aggregate indexes to CPU"
t0 = time.time() t0 = time.time()
for i in range(ngpu): if hasattr(gpu_index, 'at'):
index_src = faiss.index_gpu_to_cpu(gpu_index.at(i)) # it is a sharded index
print " index %d size %d" % (i, index_src.ntotal) for i in range(ngpu):
index_src = faiss.index_gpu_to_cpu(gpu_index.at(i))
print " index %d size %d" % (i, index_src.ntotal)
index_src.copy_subset_to(indexall, 0, 0, nb)
else:
# simple index
index_src = faiss.index_gpu_to_cpu(gpu_index)
index_src.copy_subset_to(indexall, 0, 0, nb) index_src.copy_subset_to(indexall, 0, 0, nb)
print " done in %.3f s" % (time.time() - t0) print " done in %.3f s" % (time.time() - t0)
......
...@@ -10,6 +10,16 @@ import numpy as np ...@@ -10,6 +10,16 @@ import numpy as np
import faiss import faiss
import time import time
swig_ptr = faiss.swig_ptr
if False:
a = np.arange(10, 14).astype('float32')
b = np.arange(20, 24).astype('float32')
faiss.fvec_inner_product (swig_ptr(a), swig_ptr(b), 4)
1/0
xd = 100 xd = 100
yd = 1000000 yd = 1000000
...@@ -29,9 +39,9 @@ for d in 3, 4, 12, 36, 64: ...@@ -29,9 +39,9 @@ for d in 3, 4, 12, 36, 64:
t0 = time.time() t0 = time.time()
for i in xrange(xd): for i in xrange(xd):
faiss.fvec_inner_products_ny(faiss.swig_ptr(distances[i]), faiss.fvec_inner_products_ny(swig_ptr(distances[i]),
faiss.swig_ptr(x[i]), swig_ptr(x[i]),
faiss.swig_ptr(y), swig_ptr(y),
d, yd) d, yd)
t1 = time.time() t1 = time.time()
...@@ -57,9 +67,9 @@ for d in 3, 4, 12, 36, 64: ...@@ -57,9 +67,9 @@ for d in 3, 4, 12, 36, 64:
t0 = time.time() t0 = time.time()
for i in xrange(xd): for i in xrange(xd):
faiss.fvec_L2sqr_ny(faiss.swig_ptr(distances[i]), faiss.fvec_L2sqr_ny(swig_ptr(distances[i]),
faiss.swig_ptr(x[i]), swig_ptr(x[i]),
faiss.swig_ptr(y), swig_ptr(y),
d, yd) d, yd)
t1 = time.time() t1 = time.time()
......
...@@ -220,6 +220,74 @@ struct ToGpuClonerMultiple: faiss::Cloner, GpuMultipleClonerOptions { ...@@ -220,6 +220,74 @@ struct ToGpuClonerMultiple: faiss::Cloner, GpuMultipleClonerOptions {
} }
Index * clone_Index_to_shards (const Index *index) {
long n = sub_cloners.size();
auto index_ivfpq =
dynamic_cast<const faiss::IndexIVFPQ *>(index);
auto index_ivfflat =
dynamic_cast<const faiss::IndexIVFFlat *>(index);
auto index_flat =
dynamic_cast<const faiss::IndexFlat *>(index);
FAISS_THROW_IF_NOT_MSG (
index_ivfpq || index_ivfflat || index_flat,
"IndexShards implemented only for "
"IndexIVFFlat, IndexFlat and IndexIVFPQ");
std::vector<faiss::Index*> shards(n);
for(long i = 0; i < n; i++) {
// make a shallow copy
if(reserveVecs)
sub_cloners[i].reserveVecs =
(reserveVecs + n - 1) / n;
if (index_ivfpq) {
faiss::IndexIVFPQ idx2(
index_ivfpq->quantizer, index_ivfpq->d,
index_ivfpq->nlist, index_ivfpq->code_size,
index_ivfpq->pq.nbits);
idx2.metric_type = index_ivfpq->metric_type;
idx2.pq = index_ivfpq->pq;
idx2.nprobe = index_ivfpq->nprobe;
idx2.use_precomputed_table = 0;
idx2.is_trained = index->is_trained;
copy_ivf_shard (index_ivfpq, &idx2, n, i);
shards[i] = sub_cloners[i].clone_Index(&idx2);
} else if (index_ivfflat) {
faiss::IndexIVFFlat idx2(
index_ivfflat->quantizer, index->d,
index_ivfflat->nlist, index_ivfflat->metric_type);
idx2.nprobe = index_ivfflat->nprobe;
copy_ivf_shard (index_ivfflat, &idx2, n, i);
shards[i] = sub_cloners[i].clone_Index(&idx2);
} else if (index_flat) {
faiss::IndexFlat idx2 (
index->d, index->metric_type);
shards[i] = sub_cloners[i].clone_Index(&idx2);
if (index->ntotal > 0) {
long i0 = index->ntotal * i / n;
long i1 = index->ntotal * (i + 1) / n;
shards[i]->add (
i1 - i0,
index_flat->xb.data() + i0 * index->d);
}
}
}
bool successive_ids = index_flat != nullptr;
faiss::IndexShards *res =
new faiss::IndexShards(index->d, true,
successive_ids);
for (int i = 0; i < n; i++) {
res->add_shard(shards[i]);
}
res->own_fields = true;
FAISS_ASSERT(index->ntotal == res->ntotal);
return res;
}
Index *clone_Index(const Index *index) override { Index *clone_Index(const Index *index) override {
long n = sub_cloners.size(); long n = sub_cloners.size();
if (n == 1) if (n == 1)
...@@ -236,54 +304,7 @@ struct ToGpuClonerMultiple: faiss::Cloner, GpuMultipleClonerOptions { ...@@ -236,54 +304,7 @@ struct ToGpuClonerMultiple: faiss::Cloner, GpuMultipleClonerOptions {
res->own_fields = true; res->own_fields = true;
return res; return res;
} else { } else {
auto index_ivfpq = return clone_Index_to_shards (index);
dynamic_cast<const faiss::IndexIVFPQ *>(index);
auto index_ivfflat =
dynamic_cast<const faiss::IndexIVFFlat *>(index);
FAISS_THROW_IF_NOT_MSG (index_ivfpq || index_ivfflat,
"IndexShards implemented only for "
"IndexIVFFlat or IndexIVFPQ");
std::vector<faiss::Index*> shards(n);
for(long i = 0; i < n; i++) {
// make a shallow copy
if(reserveVecs)
sub_cloners[i].reserveVecs =
(reserveVecs + n - 1) / n;
if (index_ivfpq) {
faiss::IndexIVFPQ idx2(
index_ivfpq->quantizer, index_ivfpq->d,
index_ivfpq->nlist, index_ivfpq->code_size,
index_ivfpq->pq.nbits);
idx2.metric_type = index_ivfpq->metric_type;
idx2.pq = index_ivfpq->pq;
idx2.nprobe = index_ivfpq->nprobe;
idx2.use_precomputed_table = 0;
idx2.is_trained = index->is_trained;
copy_ivf_shard (index_ivfpq, &idx2, n, i);
shards[i] = sub_cloners[i].clone_Index(&idx2);
} else if (index_ivfflat) {
faiss::IndexIVFFlat idx2(
index_ivfflat->quantizer, index->d,
index_ivfflat->nlist, index_ivfflat->metric_type);
idx2.nprobe = index_ivfflat->nprobe;
idx2.nprobe = index_ivfflat->nprobe;
copy_ivf_shard (index_ivfflat, &idx2, n, i);
shards[i] = sub_cloners[i].clone_Index(&idx2);
}
}
faiss::IndexShards *res =
new faiss::IndexShards(index->d, true, false);
for (int i = 0; i < n; i++) {
res->add_shard(shards[i]);
}
res->own_fields = true;
FAISS_ASSERT(index->ntotal == res->ntotal);
return res;
} }
} else if(auto miq = dynamic_cast<const MultiIndexQuantizer *>(index)) { } else if(auto miq = dynamic_cast<const MultiIndexQuantizer *>(index)) {
if (verbose) { if (verbose) {
......
...@@ -30,7 +30,8 @@ StandardGpuResources::StandardGpuResources() : ...@@ -30,7 +30,8 @@ StandardGpuResources::StandardGpuResources() :
tempMemFraction_(kDefaultTempMemFraction), tempMemFraction_(kDefaultTempMemFraction),
tempMemSize_(0), tempMemSize_(0),
useFraction_(true), useFraction_(true),
pinnedMemSize_(kDefaultPinnedMemoryAllocation) { pinnedMemSize_(kDefaultPinnedMemoryAllocation),
cudaMallocWarning_(true) {
} }
StandardGpuResources::~StandardGpuResources() { StandardGpuResources::~StandardGpuResources() {
...@@ -74,6 +75,7 @@ StandardGpuResources::~StandardGpuResources() { ...@@ -74,6 +75,7 @@ StandardGpuResources::~StandardGpuResources() {
void void
StandardGpuResources::noTempMemory() { StandardGpuResources::noTempMemory() {
setTempMemory(0); setTempMemory(0);
setCudaMallocWarning(false);
} }
void void
...@@ -117,6 +119,15 @@ StandardGpuResources::setDefaultNullStreamAllDevices() { ...@@ -117,6 +119,15 @@ StandardGpuResources::setDefaultNullStreamAllDevices() {
} }
} }
void
StandardGpuResources::setCudaMallocWarning(bool b) {
cudaMallocWarning_ = b;
for (auto& v : memory_) {
v.second->setCudaMallocWarning(b);
}
}
void void
StandardGpuResources::initializeForDevice(int device) { StandardGpuResources::initializeForDevice(int device) {
// Use default streams as a marker for whether or not a certain // Use default streams as a marker for whether or not a certain
...@@ -195,9 +206,12 @@ StandardGpuResources::initializeForDevice(int device) { ...@@ -195,9 +206,12 @@ StandardGpuResources::initializeForDevice(int device) {
} }
FAISS_ASSERT(memory_.count(device) == 0); FAISS_ASSERT(memory_.count(device) == 0);
memory_.emplace(device,
std::unique_ptr<StackDeviceMemory>( auto mem = std::unique_ptr<StackDeviceMemory>(
new StackDeviceMemory(device, toAlloc))); new StackDeviceMemory(device, toAlloc));
mem->setCudaMallocWarning(cudaMallocWarning_);
memory_.emplace(device, std::move(mem));
} }
cublasHandle_t cublasHandle_t
......
...@@ -48,6 +48,10 @@ class StandardGpuResources : public GpuResources { ...@@ -48,6 +48,10 @@ class StandardGpuResources : public GpuResources {
/// for all devices /// for all devices
void setDefaultNullStreamAllDevices(); void setDefaultNullStreamAllDevices();
/// Enable or disable the warning about not having enough temporary memory
/// when cudaMalloc gets called
void setCudaMallocWarning(bool b);
public: public:
/// Internal system calls /// Internal system calls
void initializeForDevice(int device) override; void initializeForDevice(int device) override;
...@@ -100,6 +104,9 @@ class StandardGpuResources : public GpuResources { ...@@ -100,6 +104,9 @@ class StandardGpuResources : public GpuResources {
/// Amount of pinned memory we should allocate /// Amount of pinned memory we should allocate
size_t pinnedMemSize_; size_t pinnedMemSize_;
/// Whether or not a warning upon cudaMalloc is generated
bool cudaMallocWarning_;
}; };
} } // namespace } } // namespace
...@@ -146,435 +146,6 @@ struct IVFFlatScan<0, L2, T> { ...@@ -146,435 +146,6 @@ struct IVFFlatScan<0, L2, T> {
} }
}; };
// 64-d float32 implementation
template <bool L2>
struct IVFFlatScan<64, L2, float> {
static constexpr int kDims = 64;
static __device__ void scan(float* query,
void* vecData,
int numVecs,
int dim,
float* distanceOut) {
// Each warp reduces a single 64-d vector; each lane loads a float2
float* vecs = (float*) vecData;
int laneId = getLaneId();
int warpId = threadIdx.x / kWarpSize;
int numWarps = blockDim.x / kWarpSize;
float2 queryVal = *(float2*) &query[laneId * 2];
constexpr int kUnroll = 4;
float2 vecVal[kUnroll];
int limit = utils::roundDown(numVecs, kUnroll * numWarps);
for (int i = warpId; i < limit; i += kUnroll * numWarps) {
#pragma unroll
for (int j = 0; j < kUnroll; ++j) {
// Vector we are loading from is i
// Dim we are loading from is laneId * 2
vecVal[j] = *(float2*) &vecs[(i + j * numWarps) * kDims + laneId * 2];
}
float dist[kUnroll];
#pragma unroll
for (int j = 0; j < kUnroll; ++j) {
if (L2) {
dist[j] = l2Distance(queryVal, vecVal[j]);
} else {
dist[j] = ipDistance(queryVal, vecVal[j]);
}
}
// Reduce within the warp
#pragma unroll
for (int j = 0; j < kUnroll; ++j) {
dist[j] = warpReduceAllSum(dist[j]);
}
if (laneId == 0) {
#pragma unroll
for (int j = 0; j < kUnroll; ++j) {
distanceOut[i + j * numWarps] = dist[j];
}
}
}
// Handle remainder
for (int i = limit + warpId; i < numVecs; i += numWarps) {
vecVal[0] = *(float2*) &vecs[i * kDims + laneId * 2];
float dist;
if (L2) {
dist = l2Distance(queryVal, vecVal[0]);
} else {
dist = ipDistance(queryVal, vecVal[0]);
}
dist = warpReduceAllSum(dist);
if (laneId == 0) {
distanceOut[i] = dist;
}
}
}
};
#ifdef FAISS_USE_FLOAT16
// float16 implementation
template <bool L2>
struct IVFFlatScan<64, L2, half> {
static constexpr int kDims = 64;
static __device__ void scan(float* query,
void* vecData,
int numVecs,
int dim,
float* distanceOut) {
// Each warp reduces a single 64-d vector; each lane loads a half2
half* vecs = (half*) vecData;
int laneId = getLaneId();
int warpId = threadIdx.x / kWarpSize;
int numWarps = blockDim.x / kWarpSize;
float2 queryVal = *(float2*) &query[laneId * 2];
constexpr int kUnroll = 4;
half2 vecVal[kUnroll];
int limit = utils::roundDown(numVecs, kUnroll * numWarps);
for (int i = warpId; i < limit; i += kUnroll * numWarps) {
#pragma unroll
for (int j = 0; j < kUnroll; ++j) {
// Vector we are loading from is i
// Dim we are loading from is laneId * 2
vecVal[j] = *(half2*) &vecs[(i + j * numWarps) * kDims + laneId * 2];
}
float dist[kUnroll];
#pragma unroll
for (int j = 0; j < kUnroll; ++j) {
if (L2) {
dist[j] = l2Distance(queryVal, __half22float2(vecVal[j]));
} else {
dist[j] = ipDistance(queryVal, __half22float2(vecVal[j]));
}
}
// Reduce within the warp
#pragma unroll
for (int j = 0; j < kUnroll; ++j) {
dist[j] = warpReduceAllSum(dist[j]);
}
if (laneId == 0) {
#pragma unroll
for (int j = 0; j < kUnroll; ++j) {
distanceOut[i + j * numWarps] = dist[j];
}
}
}
// Handle remainder
for (int i = limit + warpId; i < numVecs; i += numWarps) {
vecVal[0] = *(half2*) &vecs[i * kDims + laneId * 2];
float dist;
if (L2) {
dist = l2Distance(queryVal, __half22float2(vecVal[0]));
} else {
dist = ipDistance(queryVal, __half22float2(vecVal[0]));
}
dist = warpReduceAllSum(dist);
if (laneId == 0) {
distanceOut[i] = dist;
}
}
}
};
#endif
// 128-d float32 implementation
template <bool L2>
struct IVFFlatScan<128, L2, float> {
static constexpr int kDims = 128;
static __device__ void scan(float* query,
void* vecData,
int numVecs,
int dim,
float* distanceOut) {
// Each warp reduces a single 128-d vector; each lane loads a float4
float* vecs = (float*) vecData;
int laneId = getLaneId();
int warpId = threadIdx.x / kWarpSize;
int numWarps = blockDim.x / kWarpSize;
float4 queryVal = *(float4*) &query[laneId * 4];
constexpr int kUnroll = 4;
float4 vecVal[kUnroll];
int limit = utils::roundDown(numVecs, kUnroll * numWarps);
for (int i = warpId; i < limit; i += kUnroll * numWarps) {
#pragma unroll
for (int j = 0; j < kUnroll; ++j) {
// Vector we are loading from is i
// Dim we are loading from is laneId * 4
vecVal[j] = *(float4*) &vecs[(i + j * numWarps) * kDims + laneId * 4];
}
float dist[kUnroll];
#pragma unroll
for (int j = 0; j < kUnroll; ++j) {
if (L2) {
dist[j] = l2Distance(queryVal, vecVal[j]);
} else {
dist[j] = ipDistance(queryVal, vecVal[j]);
}
}
// Reduce within the warp
#pragma unroll
for (int j = 0; j < kUnroll; ++j) {
dist[j] = warpReduceAllSum(dist[j]);
}
if (laneId == 0) {
#pragma unroll
for (int j = 0; j < kUnroll; ++j) {
distanceOut[i + j * numWarps] = dist[j];
}
}
}
// Handle remainder
for (int i = limit + warpId; i < numVecs; i += numWarps) {
vecVal[0] = *(float4*) &vecs[i * kDims + laneId * 4];
float dist;
if (L2) {
dist = l2Distance(queryVal, vecVal[0]);
} else {
dist = ipDistance(queryVal, vecVal[0]);
}
dist = warpReduceAllSum(dist);
if (laneId == 0) {
distanceOut[i] = dist;
}
}
}
};
#ifdef FAISS_USE_FLOAT16
// float16 implementation
template <bool L2>
struct IVFFlatScan<128, L2, half> {
static constexpr int kDims = 128;
static __device__ void scan(float* query,
void* vecData,
int numVecs,
int dim,
float* distanceOut) {
// Each warp reduces a single 128-d vector; each lane loads a Half4
half* vecs = (half*) vecData;
int laneId = getLaneId();
int warpId = threadIdx.x / kWarpSize;
int numWarps = blockDim.x / kWarpSize;
float4 queryVal = *(float4*) &query[laneId * 4];
constexpr int kUnroll = 4;
Half4 vecVal[kUnroll];
int limit = utils::roundDown(numVecs, kUnroll * numWarps);
for (int i = warpId; i < limit; i += kUnroll * numWarps) {
#pragma unroll
for (int j = 0; j < kUnroll; ++j) {
// Vector we are loading from is i
// Dim we are loading from is laneId * 4
vecVal[j] =
LoadStore<Half4>::load(
&vecs[(i + j * numWarps) * kDims + laneId * 4]);
}
float dist[kUnroll];
#pragma unroll
for (int j = 0; j < kUnroll; ++j) {
if (L2) {
dist[j] = l2Distance(queryVal, half4ToFloat4(vecVal[j]));
} else {
dist[j] = ipDistance(queryVal, half4ToFloat4(vecVal[j]));
}
}
// Reduce within the warp
#pragma unroll
for (int j = 0; j < kUnroll; ++j) {
dist[j] = warpReduceAllSum(dist[j]);
}
if (laneId == 0) {
#pragma unroll
for (int j = 0; j < kUnroll; ++j) {
distanceOut[i + j * numWarps] = dist[j];
}
}
}
// Handle remainder
for (int i = limit + warpId; i < numVecs; i += numWarps) {
vecVal[0] = LoadStore<Half4>::load(&vecs[i * kDims + laneId * 4]);
float dist;
if (L2) {
dist = l2Distance(queryVal, half4ToFloat4(vecVal[0]));
} else {
dist = ipDistance(queryVal, half4ToFloat4(vecVal[0]));
}
dist = warpReduceAllSum(dist);
if (laneId == 0) {
distanceOut[i] = dist;
}
}
}
};
#endif
// 256-d float32 implementation
template <bool L2>
struct IVFFlatScan<256, L2, float> {
static constexpr int kDims = 256;
static __device__ void scan(float* query,
void* vecData,
int numVecs,
int dim,
float* distanceOut) {
// A specialization here to load per-warp seems to be worse, since
// we're already running at near memory b/w peak
IVFFlatScan<0, L2, float>::scan(query,
vecData,
numVecs,
dim,
distanceOut);
}
};
#ifdef FAISS_USE_FLOAT16
// float16 implementation
template <bool L2>
struct IVFFlatScan<256, L2, half> {
static constexpr int kDims = 256;
static __device__ void scan(float* query,
void* vecData,
int numVecs,
int dim,
float* distanceOut) {
// Each warp reduces a single 256-d vector; each lane loads a Half8
half* vecs = (half*) vecData;
int laneId = getLaneId();
int warpId = threadIdx.x / kWarpSize;
int numWarps = blockDim.x / kWarpSize;
// This is not a contiguous load, but we only have to load these two
// values, so that we can load by Half8 below
float4 queryValA = *(float4*) &query[laneId * 8];
float4 queryValB = *(float4*) &query[laneId * 8 + 4];
constexpr int kUnroll = 4;
Half8 vecVal[kUnroll];
int limit = utils::roundDown(numVecs, kUnroll * numWarps);
for (int i = warpId; i < limit; i += kUnroll * numWarps) {
#pragma unroll
for (int j = 0; j < kUnroll; ++j) {
// Vector we are loading from is i
// Dim we are loading from is laneId * 8
vecVal[j] =
LoadStore<Half8>::load(
&vecs[(i + j * numWarps) * kDims + laneId * 8]);
}
float dist[kUnroll];
#pragma unroll
for (int j = 0; j < kUnroll; ++j) {
if (L2) {
dist[j] = l2Distance(queryValA, half4ToFloat4(vecVal[j].a));
dist[j] += l2Distance(queryValB, half4ToFloat4(vecVal[j].b));
} else {
dist[j] = ipDistance(queryValA, half4ToFloat4(vecVal[j].a));
dist[j] += ipDistance(queryValB, half4ToFloat4(vecVal[j].b));
}
}
// Reduce within the warp
#pragma unroll
for (int j = 0; j < kUnroll; ++j) {
dist[j] = warpReduceAllSum(dist[j]);
}
if (laneId == 0) {
#pragma unroll
for (int j = 0; j < kUnroll; ++j) {
distanceOut[i + j * numWarps] = dist[j];
}
}
}
// Handle remainder
for (int i = limit + warpId; i < numVecs; i += numWarps) {
vecVal[0] = LoadStore<Half8>::load(&vecs[i * kDims + laneId * 8]);
float dist;
if (L2) {
dist = l2Distance(queryValA, half4ToFloat4(vecVal[0].a));
dist += l2Distance(queryValB, half4ToFloat4(vecVal[0].b));
} else {
dist = ipDistance(queryValA, half4ToFloat4(vecVal[0].a));
dist += ipDistance(queryValB, half4ToFloat4(vecVal[0].b));
}
dist = warpReduceAllSum(dist);
if (laneId == 0) {
distanceOut[i] = dist;
}
}
}
};
#endif
template <int Dims, bool L2, typename T> template <int Dims, bool L2, typename T>
__global__ void __global__ void
ivfFlatScan(Tensor<float, 2, true> queries, ivfFlatScan(Tensor<float, 2, true> queries,
...@@ -693,13 +264,7 @@ runIVFFlatScanTile(Tensor<float, 2, true>& queries, ...@@ -693,13 +264,7 @@ runIVFFlatScanTile(Tensor<float, 2, true>& queries,
#endif // FAISS_USE_FLOAT16 #endif // FAISS_USE_FLOAT16
if (dim == 64) { if (dim <= kMaxThreadsIVF) {
HANDLE_DIM_CASE(64);
} else if (dim == 128) {
HANDLE_DIM_CASE(128);
} else if (dim == 256) {
HANDLE_DIM_CASE(256);
} else if (dim <= kMaxThreadsIVF) {
HANDLE_DIM_CASE(0); HANDLE_DIM_CASE(0);
} else { } else {
HANDLE_DIM_CASE(-1); HANDLE_DIM_CASE(-1);
......
...@@ -107,8 +107,6 @@ void testFlat(const TestFlatOptions& opt) { ...@@ -107,8 +107,6 @@ void testFlat(const TestFlatOptions& opt) {
TEST(TestGpuIndexFlat, IP_Float32) { TEST(TestGpuIndexFlat, IP_Float32) {
for (int tries = 0; tries < 5; ++tries) { for (int tries = 0; tries < 5; ++tries) {
faiss::gpu::newTestSeed();
TestFlatOptions opt; TestFlatOptions opt;
opt.useL2 = false; opt.useL2 = false;
opt.useFloat16 = false; opt.useFloat16 = false;
...@@ -123,8 +121,6 @@ TEST(TestGpuIndexFlat, IP_Float32) { ...@@ -123,8 +121,6 @@ TEST(TestGpuIndexFlat, IP_Float32) {
TEST(TestGpuIndexFlat, L2_Float32) { TEST(TestGpuIndexFlat, L2_Float32) {
for (int tries = 0; tries < 5; ++tries) { for (int tries = 0; tries < 5; ++tries) {
faiss::gpu::newTestSeed();
TestFlatOptions opt; TestFlatOptions opt;
opt.useL2 = true; opt.useL2 = true;
opt.useFloat16 = false; opt.useFloat16 = false;
...@@ -140,8 +136,6 @@ TEST(TestGpuIndexFlat, L2_Float32) { ...@@ -140,8 +136,6 @@ TEST(TestGpuIndexFlat, L2_Float32) {
// test specialized k == 1 codepath // test specialized k == 1 codepath
TEST(TestGpuIndexFlat, L2_Float32_K1) { TEST(TestGpuIndexFlat, L2_Float32_K1) {
for (int tries = 0; tries < 5; ++tries) { for (int tries = 0; tries < 5; ++tries) {
faiss::gpu::newTestSeed();
TestFlatOptions opt; TestFlatOptions opt;
opt.useL2 = true; opt.useL2 = true;
opt.useFloat16 = false; opt.useFloat16 = false;
...@@ -154,8 +148,6 @@ TEST(TestGpuIndexFlat, L2_Float32_K1) { ...@@ -154,8 +148,6 @@ TEST(TestGpuIndexFlat, L2_Float32_K1) {
TEST(TestGpuIndexFlat, IP_Float16) { TEST(TestGpuIndexFlat, IP_Float16) {
for (int tries = 0; tries < 5; ++tries) { for (int tries = 0; tries < 5; ++tries) {
faiss::gpu::newTestSeed();
TestFlatOptions opt; TestFlatOptions opt;
opt.useL2 = false; opt.useL2 = false;
opt.useFloat16 = true; opt.useFloat16 = true;
...@@ -170,8 +162,6 @@ TEST(TestGpuIndexFlat, IP_Float16) { ...@@ -170,8 +162,6 @@ TEST(TestGpuIndexFlat, IP_Float16) {
TEST(TestGpuIndexFlat, L2_Float16) { TEST(TestGpuIndexFlat, L2_Float16) {
for (int tries = 0; tries < 5; ++tries) { for (int tries = 0; tries < 5; ++tries) {
faiss::gpu::newTestSeed();
TestFlatOptions opt; TestFlatOptions opt;
opt.useL2 = true; opt.useL2 = true;
opt.useFloat16 = true; opt.useFloat16 = true;
...@@ -187,8 +177,6 @@ TEST(TestGpuIndexFlat, L2_Float16) { ...@@ -187,8 +177,6 @@ TEST(TestGpuIndexFlat, L2_Float16) {
// test specialized k == 1 codepath // test specialized k == 1 codepath
TEST(TestGpuIndexFlat, L2_Float16_K1) { TEST(TestGpuIndexFlat, L2_Float16_K1) {
for (int tries = 0; tries < 5; ++tries) { for (int tries = 0; tries < 5; ++tries) {
faiss::gpu::newTestSeed();
TestFlatOptions opt; TestFlatOptions opt;
opt.useL2 = true; opt.useL2 = true;
opt.useFloat16 = true; opt.useFloat16 = true;
...@@ -201,15 +189,13 @@ TEST(TestGpuIndexFlat, L2_Float16_K1) { ...@@ -201,15 +189,13 @@ TEST(TestGpuIndexFlat, L2_Float16_K1) {
// test tiling along a huge vector set // test tiling along a huge vector set
TEST(TestGpuIndexFlat, L2_Tiling) { TEST(TestGpuIndexFlat, L2_Tiling) {
for (int tries = 0; tries < 3; ++tries) { for (int tries = 0; tries < 2; ++tries) {
faiss::gpu::newTestSeed();
TestFlatOptions opt; TestFlatOptions opt;
opt.useL2 = true; opt.useL2 = true;
opt.useFloat16 = false; opt.useFloat16 = false;
opt.useTransposed = false; opt.useTransposed = false;
opt.numVecsOverride = 1000000; opt.numVecsOverride = 1000000;
opt.numQueriesOverride = 8; opt.numQueriesOverride = 4;
testFlat(opt); testFlat(opt);
...@@ -251,8 +237,6 @@ TEST(TestGpuIndexFlat, QueryEmpty) { ...@@ -251,8 +237,6 @@ TEST(TestGpuIndexFlat, QueryEmpty) {
} }
TEST(TestGpuIndexFlat, CopyFrom) { TEST(TestGpuIndexFlat, CopyFrom) {
faiss::gpu::newTestSeed();
int numVecs = faiss::gpu::randVal(100, 200); int numVecs = faiss::gpu::randVal(100, 200);
int dim = faiss::gpu::randVal(1, 1000); int dim = faiss::gpu::randVal(1, 1000);
...@@ -293,8 +277,6 @@ TEST(TestGpuIndexFlat, CopyFrom) { ...@@ -293,8 +277,6 @@ TEST(TestGpuIndexFlat, CopyFrom) {
} }
TEST(TestGpuIndexFlat, CopyTo) { TEST(TestGpuIndexFlat, CopyTo) {
faiss::gpu::newTestSeed();
faiss::gpu::StandardGpuResources res; faiss::gpu::StandardGpuResources res;
res.noTempMemory(); res.noTempMemory();
...@@ -375,3 +357,12 @@ TEST(TestGpuIndexFlat, UnifiedMemory) { ...@@ -375,3 +357,12 @@ TEST(TestGpuIndexFlat, UnifiedMemory) {
0.1f, 0.1f,
0.015f); 0.015f);
} }
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
// just run with a fixed test seed
faiss::gpu::setTestSeed(100);
return RUN_ALL_TESTS();
}
...@@ -72,8 +72,6 @@ void queryTest(faiss::MetricType metricType, ...@@ -72,8 +72,6 @@ void queryTest(faiss::MetricType metricType,
bool useFloat16, bool useFloat16,
int dimOverride = -1) { int dimOverride = -1) {
for (int tries = 0; tries < 3; ++tries) { for (int tries = 0; tries < 3; ++tries) {
faiss::gpu::newTestSeed();
Options opt; Options opt;
opt.dim = dimOverride != -1 ? dimOverride : opt.dim; opt.dim = dimOverride != -1 ? dimOverride : opt.dim;
...@@ -116,7 +114,7 @@ void queryTest(faiss::MetricType metricType, ...@@ -116,7 +114,7 @@ void queryTest(faiss::MetricType metricType,
// FIXME: the fp16 bounds are // FIXME: the fp16 bounds are
// useless when math (the accumulator) is // useless when math (the accumulator) is
// in fp16. Figure out another way to test // in fp16. Figure out another way to test
compFloat16 ? 0.99f : 0.1f, compFloat16 ? 0.70f : 0.1f,
compFloat16 ? 0.65f : 0.015f); compFloat16 ? 0.65f : 0.015f);
} }
} }
...@@ -125,8 +123,6 @@ void addTest(faiss::MetricType metricType, ...@@ -125,8 +123,6 @@ void addTest(faiss::MetricType metricType,
bool useFloat16CoarseQuantizer, bool useFloat16CoarseQuantizer,
bool useFloat16) { bool useFloat16) {
for (int tries = 0; tries < 5; ++tries) { for (int tries = 0; tries < 5; ++tries) {
faiss::gpu::newTestSeed();
Options opt; Options opt;
std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim); std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
...@@ -176,8 +172,6 @@ void addTest(faiss::MetricType metricType, ...@@ -176,8 +172,6 @@ void addTest(faiss::MetricType metricType,
void copyToTest(bool useFloat16CoarseQuantizer, void copyToTest(bool useFloat16CoarseQuantizer,
bool useFloat16) { bool useFloat16) {
faiss::gpu::newTestSeed();
Options opt; Options opt;
std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim); std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim); std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
...@@ -226,8 +220,6 @@ void copyToTest(bool useFloat16CoarseQuantizer, ...@@ -226,8 +220,6 @@ void copyToTest(bool useFloat16CoarseQuantizer,
void copyFromTest(bool useFloat16CoarseQuantizer, void copyFromTest(bool useFloat16CoarseQuantizer,
bool useFloat16) { bool useFloat16) {
faiss::gpu::newTestSeed();
Options opt; Options opt;
std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim); std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim); std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
...@@ -391,8 +383,6 @@ TEST(TestGpuIndexIVFFlat, Float32_32_CopyTo) { ...@@ -391,8 +383,6 @@ TEST(TestGpuIndexIVFFlat, Float32_32_CopyTo) {
} }
TEST(TestGpuIndexIVFFlat, Float32_negative) { TEST(TestGpuIndexIVFFlat, Float32_negative) {
faiss::gpu::newTestSeed();
Options opt; Options opt;
auto trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim); auto trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
...@@ -457,8 +447,6 @@ TEST(TestGpuIndexIVFFlat, Float32_negative) { ...@@ -457,8 +447,6 @@ TEST(TestGpuIndexIVFFlat, Float32_negative) {
// //
TEST(TestGpuIndexIVFFlat, QueryNaN) { TEST(TestGpuIndexIVFFlat, QueryNaN) {
faiss::gpu::newTestSeed();
Options opt; Options opt;
std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim); std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
...@@ -505,8 +493,6 @@ TEST(TestGpuIndexIVFFlat, QueryNaN) { ...@@ -505,8 +493,6 @@ TEST(TestGpuIndexIVFFlat, QueryNaN) {
} }
TEST(TestGpuIndexIVFFlat, AddNaN) { TEST(TestGpuIndexIVFFlat, AddNaN) {
faiss::gpu::newTestSeed();
Options opt; Options opt;
faiss::gpu::StandardGpuResources res; faiss::gpu::StandardGpuResources res;
...@@ -612,3 +598,12 @@ TEST(TestGpuIndexIVFFlat, UnifiedMemory) { ...@@ -612,3 +598,12 @@ TEST(TestGpuIndexIVFFlat, UnifiedMemory) {
0.1f, 0.1f,
0.015f); 0.015f);
} }
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
// just run with a fixed test seed
faiss::gpu::setTestSeed(100);
return RUN_ALL_TESTS();
}
...@@ -94,7 +94,7 @@ struct Options { ...@@ -94,7 +94,7 @@ struct Options {
} }
float getPctMaxDiffN() const { float getPctMaxDiffN() const {
return useFloat16 ? 0.05f : 0.015f; return useFloat16 ? 0.05f : 0.02f;
} }
int numAdd; int numAdd;
...@@ -114,8 +114,6 @@ struct Options { ...@@ -114,8 +114,6 @@ struct Options {
TEST(TestGpuIndexIVFPQ, Query) { TEST(TestGpuIndexIVFPQ, Query) {
for (int tries = 0; tries < 5; ++tries) { for (int tries = 0; tries < 5; ++tries) {
faiss::gpu::newTestSeed();
Options opt; Options opt;
std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim); std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
...@@ -150,8 +148,6 @@ TEST(TestGpuIndexIVFPQ, Query) { ...@@ -150,8 +148,6 @@ TEST(TestGpuIndexIVFPQ, Query) {
TEST(TestGpuIndexIVFPQ, Add) { TEST(TestGpuIndexIVFPQ, Add) {
for (int tries = 0; tries < 5; ++tries) { for (int tries = 0; tries < 5; ++tries) {
faiss::gpu::newTestSeed();
Options opt; Options opt;
std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim); std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
...@@ -187,8 +183,6 @@ TEST(TestGpuIndexIVFPQ, Add) { ...@@ -187,8 +183,6 @@ TEST(TestGpuIndexIVFPQ, Add) {
} }
TEST(TestGpuIndexIVFPQ, CopyTo) { TEST(TestGpuIndexIVFPQ, CopyTo) {
faiss::gpu::newTestSeed();
Options opt; Options opt;
std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim); std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim); std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
...@@ -240,8 +234,6 @@ TEST(TestGpuIndexIVFPQ, CopyTo) { ...@@ -240,8 +234,6 @@ TEST(TestGpuIndexIVFPQ, CopyTo) {
} }
TEST(TestGpuIndexIVFPQ, CopyFrom) { TEST(TestGpuIndexIVFPQ, CopyFrom) {
faiss::gpu::newTestSeed();
Options opt; Options opt;
std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim); std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim); std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
...@@ -291,8 +283,6 @@ TEST(TestGpuIndexIVFPQ, CopyFrom) { ...@@ -291,8 +283,6 @@ TEST(TestGpuIndexIVFPQ, CopyFrom) {
} }
TEST(TestGpuIndexIVFPQ, QueryNaN) { TEST(TestGpuIndexIVFPQ, QueryNaN) {
faiss::gpu::newTestSeed();
Options opt; Options opt;
std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim); std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
...@@ -342,8 +332,6 @@ TEST(TestGpuIndexIVFPQ, QueryNaN) { ...@@ -342,8 +332,6 @@ TEST(TestGpuIndexIVFPQ, QueryNaN) {
} }
TEST(TestGpuIndexIVFPQ, AddNaN) { TEST(TestGpuIndexIVFPQ, AddNaN) {
faiss::gpu::newTestSeed();
Options opt; Options opt;
faiss::gpu::StandardGpuResources res; faiss::gpu::StandardGpuResources res;
...@@ -450,3 +438,12 @@ TEST(TestGpuIndexIVFPQ, UnifiedMemory) { ...@@ -450,3 +438,12 @@ TEST(TestGpuIndexIVFPQ, UnifiedMemory) {
0.1f, 0.1f,
0.015f); 0.015f);
} }
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
// just run with a fixed test seed
faiss::gpu::setTestSeed(100);
return RUN_ALL_TESTS();
}
...@@ -19,11 +19,6 @@ ...@@ -19,11 +19,6 @@
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
void testForSize(int rows, int cols, int k, bool dir, bool warp) { void testForSize(int rows, int cols, int k, bool dir, bool warp) {
std::vector<float> v = faiss::gpu::randVecs(rows, cols); std::vector<float> v = faiss::gpu::randVecs(rows, cols);
faiss::gpu::HostTensor<float, 2, true> hostVal({rows, cols}); faiss::gpu::HostTensor<float, 2, true> hostVal({rows, cols});
...@@ -184,3 +179,12 @@ TEST(TestGpuSelect, testExactWarp) { ...@@ -184,3 +179,12 @@ TEST(TestGpuSelect, testExactWarp) {
testForSize(rows, cols, cols, dir, true); testForSize(rows, cols, cols, dir, true);
} }
} }
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
// just run with a fixed test seed
faiss::gpu::setTestSeed(100);
return RUN_ALL_TESTS();
}
...@@ -112,3 +112,94 @@ class EvalIVFPQAccuracy(unittest.TestCase): ...@@ -112,3 +112,94 @@ class EvalIVFPQAccuracy(unittest.TestCase):
res = faiss.StandardGpuResources() res = faiss.StandardGpuResources()
gpu_index = faiss.index_cpu_to_gpu(res, 0, index) gpu_index = faiss.index_cpu_to_gpu(res, 0, index)
faiss.GpuParameterSpace().set_index_parameter(gpu_index, "nprobe", 3) faiss.GpuParameterSpace().set_index_parameter(gpu_index, "nprobe", 3)
class ReferencedObject(unittest.TestCase):
d = 16
xb = np.random.rand(256, d).astype('float32')
nlist = 128
def test_proxy(self):
index = faiss.IndexProxy()
for i in range(3):
sub_index = faiss.IndexFlatL2(self.d)
sub_index.add(self.xb)
index.addIndex(sub_index)
assert index.d == self.d
index.search(self.xb, 10)
def test_resources(self):
# this used to crash!
index = faiss.index_cpu_to_gpu(faiss.StandardGpuResources(), 0,
faiss.IndexFlatL2(self.d))
index.add(self.xb)
def test_flat(self):
index = faiss.GpuIndexFlat(faiss.StandardGpuResources(),
self.d, faiss.METRIC_L2)
index.add(self.xb)
def test_ivfflat(self):
index = faiss.GpuIndexIVFFlat(
faiss.StandardGpuResources(),
self.d, self.nlist, faiss.METRIC_L2)
index.train(self.xb)
def test_ivfpq(self):
index_cpu = faiss.IndexIVFPQ(
faiss.IndexFlatL2(self.d),
self.d, self.nlist, 2, 8)
# speed up test
index_cpu.pq.cp.niter = 2
index_cpu.do_polysemous_training = False
index_cpu.train(self.xb)
index = faiss.GpuIndexIVFPQ(
faiss.StandardGpuResources(), index_cpu)
index.add(self.xb)
class TestShardedFlat(unittest.TestCase):
def test_sharded(self):
d = 32
nb = 1000
nq = 200
k = 10
rs = np.random.RandomState(123)
xb = rs.rand(nb, d).astype('float32')
xq = rs.rand(nq, d).astype('float32')
index_cpu = faiss.IndexFlatL2(d)
assert faiss.get_num_gpus() > 1
co = faiss.GpuMultipleClonerOptions()
co.shard = True
index = faiss.index_cpu_to_all_gpus(index_cpu, co, ngpu=2)
index.add(xb)
D, I = index.search(xq, k)
index_cpu.add(xb)
D_ref, I_ref = index_cpu.search(xq, k)
assert np.all(I == I_ref)
del index
index2 = faiss.index_cpu_to_all_gpus(index_cpu, co, ngpu=2)
D2, I2 = index2.search(xq, k)
assert np.all(I2 == I_ref)
try:
index2.add(xb)
except RuntimeError:
pass
else:
assert False, "this call should fail!"
if __name__ == '__main__':
unittest.main()
...@@ -25,7 +25,8 @@ StackDeviceMemory::Stack::Stack(int d, size_t sz) ...@@ -25,7 +25,8 @@ StackDeviceMemory::Stack::Stack(int d, size_t sz)
head_(nullptr), head_(nullptr),
mallocCurrent_(0), mallocCurrent_(0),
highWaterMemoryUsed_(0), highWaterMemoryUsed_(0),
highWaterMalloc_(0) { highWaterMalloc_(0),
cudaMallocWarning_(true) {
DeviceScope s(device_); DeviceScope s(device_);
cudaError_t err = cudaMalloc(&start_, size_); cudaError_t err = cudaMalloc(&start_, size_);
...@@ -41,7 +42,11 @@ StackDeviceMemory::Stack::Stack(int d, void* p, size_t sz, bool isOwner) ...@@ -41,7 +42,11 @@ StackDeviceMemory::Stack::Stack(int d, void* p, size_t sz, bool isOwner)
start_((char*) p), start_((char*) p),
end_(((char*) p) + sz), end_(((char*) p) + sz),
size_(sz), size_(sz),
head_((char*) p) { head_((char*) p),
mallocCurrent_(0),
highWaterMemoryUsed_(0),
highWaterMalloc_(0),
cudaMallocWarning_(true) {
} }
StackDeviceMemory::Stack::~Stack() { StackDeviceMemory::Stack::~Stack() {
...@@ -59,15 +64,18 @@ StackDeviceMemory::Stack::getSizeAvailable() const { ...@@ -59,15 +64,18 @@ StackDeviceMemory::Stack::getSizeAvailable() const {
} }
char* char*
StackDeviceMemory::Stack::getAlloc(size_t size, cudaStream_t stream) { StackDeviceMemory::Stack::getAlloc(size_t size,
cudaStream_t stream) {
if (size > (end_ - head_)) { if (size > (end_ - head_)) {
// Too large for our stack // Too large for our stack
DeviceScope s(device_); DeviceScope s(device_);
// Print our requested size before we attempt the allocation if (cudaMallocWarning_) {
fprintf(stderr, "WARN: increase temp memory to avoid cudaMalloc, " // Print our requested size before we attempt the allocation
"or decrease query/add size (alloc %zu B, highwater %zu B)\n", fprintf(stderr, "WARN: increase temp memory to avoid cudaMalloc, "
size, highWaterMalloc_); "or decrease query/add size (alloc %zu B, highwater %zu B)\n",
size, highWaterMalloc_);
}
char* p = nullptr; char* p = nullptr;
auto err = cudaMalloc(&p, size); auto err = cudaMalloc(&p, size);
...@@ -190,6 +198,11 @@ StackDeviceMemory::StackDeviceMemory(int device, ...@@ -190,6 +198,11 @@ StackDeviceMemory::StackDeviceMemory(int device,
StackDeviceMemory::~StackDeviceMemory() { StackDeviceMemory::~StackDeviceMemory() {
} }
void
StackDeviceMemory::setCudaMallocWarning(bool b) {
stack_.cudaMallocWarning_ = b;
}
int int
StackDeviceMemory::getDevice() const { StackDeviceMemory::getDevice() const {
return device_; return device_;
......
...@@ -29,6 +29,10 @@ class StackDeviceMemory : public DeviceMemory { ...@@ -29,6 +29,10 @@ class StackDeviceMemory : public DeviceMemory {
~StackDeviceMemory() override; ~StackDeviceMemory() override;
/// Enable or disable the warning about not having enough temporary memory
/// when cudaMalloc gets called
void setCudaMallocWarning(bool b);
int getDevice() const override; int getDevice() const override;
DeviceMemoryReservation getMemory(cudaStream_t stream, DeviceMemoryReservation getMemory(cudaStream_t stream,
...@@ -111,6 +115,9 @@ class StackDeviceMemory : public DeviceMemory { ...@@ -111,6 +115,9 @@ class StackDeviceMemory : public DeviceMemory {
/// What's the high water mark in terms of memory allocated via /// What's the high water mark in terms of memory allocated via
/// cudaMalloc? /// cudaMalloc?
size_t highWaterMalloc_; size_t highWaterMalloc_;
/// Whether or not a warning upon cudaMalloc is generated
bool cudaMallocWarning_;
}; };
/// Our device /// Our device
......
...@@ -226,7 +226,7 @@ static void write_ScalarQuantizer ( ...@@ -226,7 +226,7 @@ static void write_ScalarQuantizer (
WRITEVECTOR (ivsc->trained); WRITEVECTOR (ivsc->trained);
} }
static void write_InvertedLists (const InvertedLists *ils, IOWriter *f) { void write_InvertedLists (const InvertedLists *ils, IOWriter *f) {
if (ils == nullptr) { if (ils == nullptr) {
uint32_t h = fourcc ("il00"); uint32_t h = fourcc ("il00");
WRITE1 (h); WRITE1 (h);
......
...@@ -26,6 +26,7 @@ struct IndexIVF; ...@@ -26,6 +26,7 @@ struct IndexIVF;
struct ProductQuantizer; struct ProductQuantizer;
struct IOReader; struct IOReader;
struct IOWriter; struct IOWriter;
struct InvertedLists;
void write_index (const Index *idx, const char *fname); void write_index (const Index *idx, const char *fname);
void write_index (const Index *idx, FILE *f); void write_index (const Index *idx, FILE *f);
...@@ -35,8 +36,7 @@ void write_index_binary (const IndexBinary *idx, const char *fname); ...@@ -35,8 +36,7 @@ void write_index_binary (const IndexBinary *idx, const char *fname);
void write_index_binary (const IndexBinary *idx, FILE *f); void write_index_binary (const IndexBinary *idx, FILE *f);
void write_index_binary (const IndexBinary *idx, IOWriter *writer); void write_index_binary (const IndexBinary *idx, IOWriter *writer);
const int IO_FLAG_MMAP = 1; // try to memmap if possible
const int IO_FLAG_MMAP = 1;
const int IO_FLAG_READ_ONLY = 2; const int IO_FLAG_READ_ONLY = 2;
Index *read_index (const char *fname, int io_flags = 0); Index *read_index (const char *fname, int io_flags = 0);
...@@ -47,14 +47,14 @@ IndexBinary *read_index_binary (const char *fname, int io_flags = 0); ...@@ -47,14 +47,14 @@ IndexBinary *read_index_binary (const char *fname, int io_flags = 0);
IndexBinary *read_index_binary (FILE * f, int io_flags = 0); IndexBinary *read_index_binary (FILE * f, int io_flags = 0);
IndexBinary *read_index_binary (IOReader *reader, int io_flags = 0); IndexBinary *read_index_binary (IOReader *reader, int io_flags = 0);
void write_VectorTransform (const VectorTransform *vt, const char *fname); void write_VectorTransform (const VectorTransform *vt, const char *fname);
VectorTransform *read_VectorTransform (const char *fname); VectorTransform *read_VectorTransform (const char *fname);
ProductQuantizer * read_ProductQuantizer (const char*fname); ProductQuantizer * read_ProductQuantizer (const char*fname);
void write_ProductQuantizer (const ProductQuantizer*pq, const char *fname); void write_ProductQuantizer (const ProductQuantizer*pq, const char *fname);
void write_InvertedLists (const InvertedLists *ils, IOWriter *f);
InvertedLists *read_InvertedLists (IOReader *reader, int io_flags = 0);
/* cloning functions */ /* cloning functions */
Index *clone_index (const Index *); Index *clone_index (const Index *);
......
...@@ -306,6 +306,88 @@ for symbol in dir(this_module): ...@@ -306,6 +306,88 @@ for symbol in dir(this_module):
handle_ParameterSpace(the_class) handle_ParameterSpace(the_class)
###########################################
# Add Python references to objects
# we do this at the Python class wrapper level.
###########################################
def add_ref_in_constructor(the_class, parameter_no):
# adds a reference to parameter parameter_no in self
# so that that parameter does not get deallocated before self
original_init = the_class.__init__
def replacement_init(self, *args):
original_init(self, *args)
self.referenced_objects = [args[parameter_no]]
def replacement_init_multiple(self, *args):
original_init(self, *args)
pset = parameter_no[len(args)]
self.referenced_objects = [args[no] for no in pset]
if type(parameter_no) == dict:
# a list of parameters to keep, depending on the number of arguments
the_class.__init__ = replacement_init_multiple
else:
the_class.__init__ = replacement_init
def add_ref_in_method(the_class, method_name, parameter_no):
original_method = getattr(the_class, method_name)
def replacement_method(self, *args):
ref = args[parameter_no]
if not hasattr(self, 'referenced_objects'):
self.referenced_objects = [ref]
else:
self.referenced_objects.append(ref)
return original_method(self, *args)
setattr(the_class, method_name, replacement_method)
def add_ref_in_function(function_name, parameter_no):
# assumes the function returns an object
original_function = getattr(this_module, function_name)
def replacement_function(*args):
result = original_function(*args)
ref = args[parameter_no]
result.referenced_objects = [ref]
return result
setattr(this_module, function_name, replacement_function)
add_ref_in_constructor(IndexIVFFlat, 0)
add_ref_in_constructor(IndexIVFFlatDedup, 0)
add_ref_in_constructor(IndexPreTransform, {2: [0, 1], 1: [0]})
add_ref_in_method(IndexPreTransform, 'prepend_transform', 0)
add_ref_in_constructor(IndexIVFPQ, 0)
add_ref_in_constructor(IndexIVFPQR, 0)
add_ref_in_constructor(Index2Layer, 0)
add_ref_in_constructor(Level1Quantizer, 0)
add_ref_in_constructor(IndexIVFScalarQuantizer, 0)
add_ref_in_constructor(IndexIDMap, 0)
add_ref_in_constructor(IndexIDMap2, 0)
add_ref_in_method(IndexShards, 'add_shard', 0)
add_ref_in_constructor(IndexRefineFlat, 0)
add_ref_in_constructor(IndexBinaryIVF, 0)
add_ref_in_constructor(IndexBinaryFromFloat, 0)
if hasattr(this_module, 'IndexProxy'):
add_ref_in_method(IndexProxy, 'addIndex', 0)
# seems really marginal...
# remove_ref_from_method(IndexProxy, 'removeIndex', 0)
# handle all the GPUResources refs
add_ref_in_function('index_cpu_to_gpu', 0)
add_ref_in_constructor(GpuIndexFlat, 0)
add_ref_in_constructor(GpuIndexIVFFlat, 0)
add_ref_in_constructor(GpuIndexIVFPQ, 0)
###########################################
# GPU functions
###########################################
def index_cpu_to_gpu_multiple_py(resources, index, co=None): def index_cpu_to_gpu_multiple_py(resources, index, co=None):
"""builds the C++ vectors for the GPU indices and the """builds the C++ vectors for the GPU indices and the
resources. Handles the common case where the resources are assigned to resources. Handles the common case where the resources are assigned to
...@@ -315,18 +397,22 @@ def index_cpu_to_gpu_multiple_py(resources, index, co=None): ...@@ -315,18 +397,22 @@ def index_cpu_to_gpu_multiple_py(resources, index, co=None):
for i, res in enumerate(resources): for i, res in enumerate(resources):
vdev.push_back(i) vdev.push_back(i)
vres.push_back(res) vres.push_back(res)
return index_cpu_to_gpu_multiple(vres, vdev, index, co) index = index_cpu_to_gpu_multiple(vres, vdev, index, co)
index.referenced_objects = resources
return index
def index_cpu_to_all_gpus(index, co=None, ngpu=-1): def index_cpu_to_all_gpus(index, co=None, ngpu=-1):
if ngpu == -1: if ngpu == -1:
ngpu = get_num_gpus() ngpu = get_num_gpus()
res = [StandardGpuResources() for i in range(ngpu)] res = [StandardGpuResources() for i in range(ngpu)]
index2 = index_cpu_to_gpu_multiple_py(res, index, co) index2 = index_cpu_to_gpu_multiple_py(res, index, co)
index2.dont_dealloc = res
return index2 return index2
###########################################
# numpy array / std::vector conversions
###########################################
# mapping from vector names in swigfaiss.swig and the numpy dtype names # mapping from vector names in swigfaiss.swig and the numpy dtype names
vector_name_map = { vector_name_map = {
'Float': 'float32', 'Float': 'float32',
...@@ -365,39 +451,9 @@ def copy_array_to_vector(a, v): ...@@ -365,39 +451,9 @@ def copy_array_to_vector(a, v):
memcpy(v.data(), swig_ptr(a), a.nbytes) memcpy(v.data(), swig_ptr(a), a.nbytes)
class Kmeans: ###########################################
# Wrapper for a few functions
def __init__(self, d, k, niter=25, verbose=False, spherical = False): ###########################################
self.d = d
self.k = k
self.cp = ClusteringParameters()
self.cp.niter = niter
self.cp.verbose = verbose
self.cp.spherical = spherical
self.centroids = None
def train(self, x):
assert x.flags.contiguous
n, d = x.shape
assert d == self.d
clus = Clustering(d, self.k, self.cp)
if self.cp.spherical:
self.index = IndexFlatIP(d)
else:
self.index = IndexFlatL2(d)
clus.train(x, self.index)
centroids = vector_float_to_array(clus.centroids)
self.centroids = centroids.reshape(self.k, d)
self.obj = vector_float_to_array(clus.obj)
return self.obj[-1]
def assign(self, x):
assert self.centroids is not None, "should train before assigning"
index = IndexFlatL2(self.d)
index.add(self.centroids)
D, I = index.search(x, 1)
return D.ravel(), I.ravel()
def kmin(array, k): def kmin(array, k):
"""return k smallest values (and their indices) of the lines of a """return k smallest values (and their indices) of the lines of a
...@@ -480,3 +536,42 @@ def replacement_map_search_multiple(self, keys): ...@@ -480,3 +536,42 @@ def replacement_map_search_multiple(self, keys):
replace_method(MapLong2Long, 'add', replacement_map_add) replace_method(MapLong2Long, 'add', replacement_map_add)
replace_method(MapLong2Long, 'search_multiple', replacement_map_search_multiple) replace_method(MapLong2Long, 'search_multiple', replacement_map_search_multiple)
###########################################
# Kmeans object
###########################################
class Kmeans:
def __init__(self, d, k, niter=25, verbose=False, spherical = False):
self.d = d
self.k = k
self.cp = ClusteringParameters()
self.cp.niter = niter
self.cp.verbose = verbose
self.cp.spherical = spherical
self.centroids = None
def train(self, x):
assert x.flags.contiguous
n, d = x.shape
assert d == self.d
clus = Clustering(d, self.k, self.cp)
if self.cp.spherical:
self.index = IndexFlatIP(d)
else:
self.index = IndexFlatL2(d)
clus.train(x, self.index)
centroids = vector_float_to_array(clus.centroids)
self.centroids = centroids.reshape(self.k, d)
self.obj = vector_float_to_array(clus.obj)
return self.obj[-1]
def assign(self, x):
assert self.centroids is not None, "should train before assigning"
index = IndexFlatL2(self.d)
index.add(self.centroids)
D, I = index.search(x, 1)
return D.ravel(), I.ravel()
...@@ -1082,6 +1082,10 @@ class RandomGenerator(_object): ...@@ -1082,6 +1082,10 @@ class RandomGenerator(_object):
__swig_getmethods__ = {} __swig_getmethods__ = {}
__getattr__ = lambda self, name: _swig_getattr(self, RandomGenerator, name) __getattr__ = lambda self, name: _swig_getattr(self, RandomGenerator, name)
__repr__ = _swig_repr __repr__ = _swig_repr
__swig_setmethods__["mt"] = _swigfaiss.RandomGenerator_mt_set
__swig_getmethods__["mt"] = _swigfaiss.RandomGenerator_mt_get
if _newclass:
mt = _swig_property(_swigfaiss.RandomGenerator_mt_get, _swigfaiss.RandomGenerator_mt_set)
def rand_long(self): def rand_long(self):
return _swigfaiss.RandomGenerator_rand_long(self) return _swigfaiss.RandomGenerator_rand_long(self)
...@@ -1095,8 +1099,8 @@ class RandomGenerator(_object): ...@@ -1095,8 +1099,8 @@ class RandomGenerator(_object):
def rand_double(self): def rand_double(self):
return _swigfaiss.RandomGenerator_rand_double(self) return _swigfaiss.RandomGenerator_rand_double(self)
def __init__(self, *args): def __init__(self, seed=1234):
this = _swigfaiss.new_RandomGenerator(*args) this = _swigfaiss.new_RandomGenerator(seed)
try: try:
self.this.append(this) self.this.append(this)
except __builtin__.Exception: except __builtin__.Exception:
...@@ -1262,6 +1266,14 @@ ivec_checksum = _swigfaiss.ivec_checksum ...@@ -1262,6 +1266,14 @@ ivec_checksum = _swigfaiss.ivec_checksum
def fvecs_maybe_subsample(d, n, nmax, x, verbose=False, seed=1234): def fvecs_maybe_subsample(d, n, nmax, x, verbose=False, seed=1234):
return _swigfaiss.fvecs_maybe_subsample(d, n, nmax, x, verbose, seed) return _swigfaiss.fvecs_maybe_subsample(d, n, nmax, x, verbose, seed)
fvecs_maybe_subsample = _swigfaiss.fvecs_maybe_subsample fvecs_maybe_subsample = _swigfaiss.fvecs_maybe_subsample
def binary_to_real(d, x_in, x_out):
return _swigfaiss.binary_to_real(d, x_in, x_out)
binary_to_real = _swigfaiss.binary_to_real
def real_to_binary(d, x_in, x_out):
return _swigfaiss.real_to_binary(d, x_in, x_out)
real_to_binary = _swigfaiss.real_to_binary
METRIC_INNER_PRODUCT = _swigfaiss.METRIC_INNER_PRODUCT METRIC_INNER_PRODUCT = _swigfaiss.METRIC_INNER_PRODUCT
METRIC_L2 = _swigfaiss.METRIC_L2 METRIC_L2 = _swigfaiss.METRIC_L2
class Index(_object): class Index(_object):
...@@ -2639,51 +2651,6 @@ class MultiIndexQuantizer2(MultiIndexQuantizer): ...@@ -2639,51 +2651,6 @@ class MultiIndexQuantizer2(MultiIndexQuantizer):
MultiIndexQuantizer2_swigregister = _swigfaiss.MultiIndexQuantizer2_swigregister MultiIndexQuantizer2_swigregister = _swigfaiss.MultiIndexQuantizer2_swigregister
MultiIndexQuantizer2_swigregister(MultiIndexQuantizer2) MultiIndexQuantizer2_swigregister(MultiIndexQuantizer2)
class Level1Quantizer(_object):
__swig_setmethods__ = {}
__setattr__ = lambda self, name, value: _swig_setattr(self, Level1Quantizer, name, value)
__swig_getmethods__ = {}
__getattr__ = lambda self, name: _swig_getattr(self, Level1Quantizer, name)
__repr__ = _swig_repr
__swig_setmethods__["quantizer"] = _swigfaiss.Level1Quantizer_quantizer_set
__swig_getmethods__["quantizer"] = _swigfaiss.Level1Quantizer_quantizer_get
if _newclass:
quantizer = _swig_property(_swigfaiss.Level1Quantizer_quantizer_get, _swigfaiss.Level1Quantizer_quantizer_set)
__swig_setmethods__["nlist"] = _swigfaiss.Level1Quantizer_nlist_set
__swig_getmethods__["nlist"] = _swigfaiss.Level1Quantizer_nlist_get
if _newclass:
nlist = _swig_property(_swigfaiss.Level1Quantizer_nlist_get, _swigfaiss.Level1Quantizer_nlist_set)
__swig_setmethods__["quantizer_trains_alone"] = _swigfaiss.Level1Quantizer_quantizer_trains_alone_set
__swig_getmethods__["quantizer_trains_alone"] = _swigfaiss.Level1Quantizer_quantizer_trains_alone_get
if _newclass:
quantizer_trains_alone = _swig_property(_swigfaiss.Level1Quantizer_quantizer_trains_alone_get, _swigfaiss.Level1Quantizer_quantizer_trains_alone_set)
__swig_setmethods__["own_fields"] = _swigfaiss.Level1Quantizer_own_fields_set
__swig_getmethods__["own_fields"] = _swigfaiss.Level1Quantizer_own_fields_get
if _newclass:
own_fields = _swig_property(_swigfaiss.Level1Quantizer_own_fields_get, _swigfaiss.Level1Quantizer_own_fields_set)
__swig_setmethods__["cp"] = _swigfaiss.Level1Quantizer_cp_set
__swig_getmethods__["cp"] = _swigfaiss.Level1Quantizer_cp_get
if _newclass:
cp = _swig_property(_swigfaiss.Level1Quantizer_cp_get, _swigfaiss.Level1Quantizer_cp_set)
__swig_setmethods__["clustering_index"] = _swigfaiss.Level1Quantizer_clustering_index_set
__swig_getmethods__["clustering_index"] = _swigfaiss.Level1Quantizer_clustering_index_get
if _newclass:
clustering_index = _swig_property(_swigfaiss.Level1Quantizer_clustering_index_get, _swigfaiss.Level1Quantizer_clustering_index_set)
def train_q1(self, n, x, verbose, metric_type):
return _swigfaiss.Level1Quantizer_train_q1(self, n, x, verbose, metric_type)
def __init__(self, *args):
this = _swigfaiss.new_Level1Quantizer(*args)
try:
self.this.append(this)
except __builtin__.Exception:
self.this = this
__swig_destroy__ = _swigfaiss.delete_Level1Quantizer
__del__ = lambda self: None
Level1Quantizer_swigregister = _swigfaiss.Level1Quantizer_swigregister
Level1Quantizer_swigregister(Level1Quantizer)
class InvertedLists(_object): class InvertedLists(_object):
__swig_setmethods__ = {} __swig_setmethods__ = {}
__setattr__ = lambda self, name, value: _swig_setattr(self, InvertedLists, name, value) __setattr__ = lambda self, name, value: _swig_setattr(self, InvertedLists, name, value)
...@@ -2711,6 +2678,12 @@ class InvertedLists(_object): ...@@ -2711,6 +2678,12 @@ class InvertedLists(_object):
def get_ids(self, list_no): def get_ids(self, list_no):
return _swigfaiss.InvertedLists_get_ids(self, list_no) return _swigfaiss.InvertedLists_get_ids(self, list_no)
def release_codes(self, codes):
return _swigfaiss.InvertedLists_release_codes(self, codes)
def release_ids(self, ids):
return _swigfaiss.InvertedLists_release_ids(self, ids)
def get_single_id(self, list_no, offset): def get_single_id(self, list_no, offset):
return _swigfaiss.InvertedLists_get_single_id(self, list_no, offset) return _swigfaiss.InvertedLists_get_single_id(self, list_no, offset)
...@@ -2737,6 +2710,9 @@ class InvertedLists(_object): ...@@ -2737,6 +2710,9 @@ class InvertedLists(_object):
def reset(self): def reset(self):
return _swigfaiss.InvertedLists_reset(self) return _swigfaiss.InvertedLists_reset(self)
def merge_from(self, oivf, add_id):
return _swigfaiss.InvertedLists_merge_from(self, oivf, add_id)
__swig_destroy__ = _swigfaiss.delete_InvertedLists __swig_destroy__ = _swigfaiss.delete_InvertedLists
__del__ = lambda self: None __del__ = lambda self: None
InvertedLists_swigregister = _swigfaiss.InvertedLists_swigregister InvertedLists_swigregister = _swigfaiss.InvertedLists_swigregister
...@@ -2790,6 +2766,107 @@ class ArrayInvertedLists(InvertedLists): ...@@ -2790,6 +2766,107 @@ class ArrayInvertedLists(InvertedLists):
ArrayInvertedLists_swigregister = _swigfaiss.ArrayInvertedLists_swigregister ArrayInvertedLists_swigregister = _swigfaiss.ArrayInvertedLists_swigregister
ArrayInvertedLists_swigregister(ArrayInvertedLists) ArrayInvertedLists_swigregister(ArrayInvertedLists)
class ConcatenatedInvertedLists(InvertedLists):
__swig_setmethods__ = {}
for _s in [InvertedLists]:
__swig_setmethods__.update(getattr(_s, '__swig_setmethods__', {}))
__setattr__ = lambda self, name, value: _swig_setattr(self, ConcatenatedInvertedLists, name, value)
__swig_getmethods__ = {}
for _s in [InvertedLists]:
__swig_getmethods__.update(getattr(_s, '__swig_getmethods__', {}))
__getattr__ = lambda self, name: _swig_getattr(self, ConcatenatedInvertedLists, name)
__repr__ = _swig_repr
__swig_setmethods__["ils"] = _swigfaiss.ConcatenatedInvertedLists_ils_set
__swig_getmethods__["ils"] = _swigfaiss.ConcatenatedInvertedLists_ils_get
if _newclass:
ils = _swig_property(_swigfaiss.ConcatenatedInvertedLists_ils_get, _swigfaiss.ConcatenatedInvertedLists_ils_set)
def __init__(self, nil, ils):
this = _swigfaiss.new_ConcatenatedInvertedLists(nil, ils)
try:
self.this.append(this)
except __builtin__.Exception:
self.this = this
def list_size(self, list_no):
return _swigfaiss.ConcatenatedInvertedLists_list_size(self, list_no)
def get_codes(self, list_no):
return _swigfaiss.ConcatenatedInvertedLists_get_codes(self, list_no)
def get_ids(self, list_no):
return _swigfaiss.ConcatenatedInvertedLists_get_ids(self, list_no)
def release_codes(self, codes):
return _swigfaiss.ConcatenatedInvertedLists_release_codes(self, codes)
def release_ids(self, ids):
return _swigfaiss.ConcatenatedInvertedLists_release_ids(self, ids)
def get_single_id(self, list_no, offset):
return _swigfaiss.ConcatenatedInvertedLists_get_single_id(self, list_no, offset)
def get_single_code(self, list_no, offset):
return _swigfaiss.ConcatenatedInvertedLists_get_single_code(self, list_no, offset)
def add_entries(self, list_no, n_entry, ids, code):
return _swigfaiss.ConcatenatedInvertedLists_add_entries(self, list_no, n_entry, ids, code)
def update_entries(self, list_no, offset, n_entry, ids, code):
return _swigfaiss.ConcatenatedInvertedLists_update_entries(self, list_no, offset, n_entry, ids, code)
def resize(self, list_no, new_size):
return _swigfaiss.ConcatenatedInvertedLists_resize(self, list_no, new_size)
__swig_destroy__ = _swigfaiss.delete_ConcatenatedInvertedLists
__del__ = lambda self: None
ConcatenatedInvertedLists_swigregister = _swigfaiss.ConcatenatedInvertedLists_swigregister
ConcatenatedInvertedLists_swigregister(ConcatenatedInvertedLists)
class Level1Quantizer(_object):
__swig_setmethods__ = {}
__setattr__ = lambda self, name, value: _swig_setattr(self, Level1Quantizer, name, value)
__swig_getmethods__ = {}
__getattr__ = lambda self, name: _swig_getattr(self, Level1Quantizer, name)
__repr__ = _swig_repr
__swig_setmethods__["quantizer"] = _swigfaiss.Level1Quantizer_quantizer_set
__swig_getmethods__["quantizer"] = _swigfaiss.Level1Quantizer_quantizer_get
if _newclass:
quantizer = _swig_property(_swigfaiss.Level1Quantizer_quantizer_get, _swigfaiss.Level1Quantizer_quantizer_set)
__swig_setmethods__["nlist"] = _swigfaiss.Level1Quantizer_nlist_set
__swig_getmethods__["nlist"] = _swigfaiss.Level1Quantizer_nlist_get
if _newclass:
nlist = _swig_property(_swigfaiss.Level1Quantizer_nlist_get, _swigfaiss.Level1Quantizer_nlist_set)
__swig_setmethods__["quantizer_trains_alone"] = _swigfaiss.Level1Quantizer_quantizer_trains_alone_set
__swig_getmethods__["quantizer_trains_alone"] = _swigfaiss.Level1Quantizer_quantizer_trains_alone_get
if _newclass:
quantizer_trains_alone = _swig_property(_swigfaiss.Level1Quantizer_quantizer_trains_alone_get, _swigfaiss.Level1Quantizer_quantizer_trains_alone_set)
__swig_setmethods__["own_fields"] = _swigfaiss.Level1Quantizer_own_fields_set
__swig_getmethods__["own_fields"] = _swigfaiss.Level1Quantizer_own_fields_get
if _newclass:
own_fields = _swig_property(_swigfaiss.Level1Quantizer_own_fields_get, _swigfaiss.Level1Quantizer_own_fields_set)
__swig_setmethods__["cp"] = _swigfaiss.Level1Quantizer_cp_set
__swig_getmethods__["cp"] = _swigfaiss.Level1Quantizer_cp_get
if _newclass:
cp = _swig_property(_swigfaiss.Level1Quantizer_cp_get, _swigfaiss.Level1Quantizer_cp_set)
__swig_setmethods__["clustering_index"] = _swigfaiss.Level1Quantizer_clustering_index_set
__swig_getmethods__["clustering_index"] = _swigfaiss.Level1Quantizer_clustering_index_get
if _newclass:
clustering_index = _swig_property(_swigfaiss.Level1Quantizer_clustering_index_get, _swigfaiss.Level1Quantizer_clustering_index_set)
def train_q1(self, n, x, verbose, metric_type):
return _swigfaiss.Level1Quantizer_train_q1(self, n, x, verbose, metric_type)
def __init__(self, *args):
this = _swigfaiss.new_Level1Quantizer(*args)
try:
self.this.append(this)
except __builtin__.Exception:
self.this = this
__swig_destroy__ = _swigfaiss.delete_Level1Quantizer
__del__ = lambda self: None
Level1Quantizer_swigregister = _swigfaiss.Level1Quantizer_swigregister
Level1Quantizer_swigregister(Level1Quantizer)
class IVFSearchParameters(_object): class IVFSearchParameters(_object):
__swig_setmethods__ = {} __swig_setmethods__ = {}
__setattr__ = lambda self, name, value: _swig_setattr(self, IVFSearchParameters, name, value) __setattr__ = lambda self, name, value: _swig_setattr(self, IVFSearchParameters, name, value)
...@@ -2891,6 +2968,9 @@ class IndexIVF(Index, Level1Quantizer): ...@@ -2891,6 +2968,9 @@ class IndexIVF(Index, Level1Quantizer):
def remove_ids(self, sel): def remove_ids(self, sel):
return _swigfaiss.IndexIVF_remove_ids(self, sel) return _swigfaiss.IndexIVF_remove_ids(self, sel)
def check_compatible_for_merge(self, other):
return _swigfaiss.IndexIVF_check_compatible_for_merge(self, other)
def merge_from(self, other, add_id): def merge_from(self, other, add_id):
return _swigfaiss.IndexIVF_merge_from(self, other, add_id) return _swigfaiss.IndexIVF_merge_from(self, other, add_id)
...@@ -4244,6 +4324,48 @@ class IndexBinaryIVF(IndexBinary): ...@@ -4244,6 +4324,48 @@ class IndexBinaryIVF(IndexBinary):
IndexBinaryIVF_swigregister = _swigfaiss.IndexBinaryIVF_swigregister IndexBinaryIVF_swigregister = _swigfaiss.IndexBinaryIVF_swigregister
IndexBinaryIVF_swigregister(IndexBinaryIVF) IndexBinaryIVF_swigregister(IndexBinaryIVF)
class IndexBinaryFromFloat(IndexBinary):
__swig_setmethods__ = {}
for _s in [IndexBinary]:
__swig_setmethods__.update(getattr(_s, '__swig_setmethods__', {}))
__setattr__ = lambda self, name, value: _swig_setattr(self, IndexBinaryFromFloat, name, value)
__swig_getmethods__ = {}
for _s in [IndexBinary]:
__swig_getmethods__.update(getattr(_s, '__swig_getmethods__', {}))
__getattr__ = lambda self, name: _swig_getattr(self, IndexBinaryFromFloat, name)
__repr__ = _swig_repr
__swig_setmethods__["index"] = _swigfaiss.IndexBinaryFromFloat_index_set
__swig_getmethods__["index"] = _swigfaiss.IndexBinaryFromFloat_index_get
if _newclass:
index = _swig_property(_swigfaiss.IndexBinaryFromFloat_index_get, _swigfaiss.IndexBinaryFromFloat_index_set)
__swig_setmethods__["own_fields"] = _swigfaiss.IndexBinaryFromFloat_own_fields_set
__swig_getmethods__["own_fields"] = _swigfaiss.IndexBinaryFromFloat_own_fields_get
if _newclass:
own_fields = _swig_property(_swigfaiss.IndexBinaryFromFloat_own_fields_get, _swigfaiss.IndexBinaryFromFloat_own_fields_set)
def __init__(self, index):
this = _swigfaiss.new_IndexBinaryFromFloat(index)
try:
self.this.append(this)
except __builtin__.Exception:
self.this = this
__swig_destroy__ = _swigfaiss.delete_IndexBinaryFromFloat
__del__ = lambda self: None
def add(self, n, x):
return _swigfaiss.IndexBinaryFromFloat_add(self, n, x)
def reset(self):
return _swigfaiss.IndexBinaryFromFloat_reset(self)
def search(self, n, x, k, distances, labels):
return _swigfaiss.IndexBinaryFromFloat_search(self, n, x, k, distances, labels)
def train(self, n, x):
return _swigfaiss.IndexBinaryFromFloat_train(self, n, x)
IndexBinaryFromFloat_swigregister = _swigfaiss.IndexBinaryFromFloat_swigregister
IndexBinaryFromFloat_swigregister(IndexBinaryFromFloat)
class IndexIDMap(Index): class IndexIDMap(Index):
__swig_setmethods__ = {} __swig_setmethods__ = {}
for _s in [Index]: for _s in [Index]:
...@@ -4496,6 +4618,14 @@ def write_ProductQuantizer(pq, fname): ...@@ -4496,6 +4618,14 @@ def write_ProductQuantizer(pq, fname):
return _swigfaiss.write_ProductQuantizer(pq, fname) return _swigfaiss.write_ProductQuantizer(pq, fname)
write_ProductQuantizer = _swigfaiss.write_ProductQuantizer write_ProductQuantizer = _swigfaiss.write_ProductQuantizer
def write_InvertedLists(ils, f):
return _swigfaiss.write_InvertedLists(ils, f)
write_InvertedLists = _swigfaiss.write_InvertedLists
def read_InvertedLists(reader, io_flags=0):
return _swigfaiss.read_InvertedLists(reader, io_flags)
read_InvertedLists = _swigfaiss.read_InvertedLists
def clone_index(arg1): def clone_index(arg1):
return _swigfaiss.clone_index(arg1) return _swigfaiss.clone_index(arg1)
clone_index = _swigfaiss.clone_index clone_index = _swigfaiss.clone_index
...@@ -5316,9 +5446,10 @@ class VectorIOReader(IOReader): ...@@ -5316,9 +5446,10 @@ class VectorIOReader(IOReader):
__swig_getmethods__.update(getattr(_s, '__swig_getmethods__', {})) __swig_getmethods__.update(getattr(_s, '__swig_getmethods__', {}))
__getattr__ = lambda self, name: _swig_getattr(self, VectorIOReader, name) __getattr__ = lambda self, name: _swig_getattr(self, VectorIOReader, name)
__repr__ = _swig_repr __repr__ = _swig_repr
__swig_setmethods__["data"] = _swigfaiss.VectorIOReader_data_set
__swig_getmethods__["data"] = _swigfaiss.VectorIOReader_data_get __swig_getmethods__["data"] = _swigfaiss.VectorIOReader_data_get
if _newclass: if _newclass:
data = _swig_property(_swigfaiss.VectorIOReader_data_get) data = _swig_property(_swigfaiss.VectorIOReader_data_get, _swigfaiss.VectorIOReader_data_set)
__swig_setmethods__["rp"] = _swigfaiss.VectorIOReader_rp_set __swig_setmethods__["rp"] = _swigfaiss.VectorIOReader_rp_set
__swig_getmethods__["rp"] = _swigfaiss.VectorIOReader_rp_get __swig_getmethods__["rp"] = _swigfaiss.VectorIOReader_rp_get
if _newclass: if _newclass:
......
...@@ -21,9 +21,11 @@ ...@@ -21,9 +21,11 @@
%module swigfaiss; %module swigfaiss;
#endif #endif
// fbode SWIG fails on warnings, so make them non fatal
#pragma SWIG nowarn=321 #pragma SWIG nowarn=321
#pragma SWIG nowarn=403 #pragma SWIG nowarn=403
#pragma SWIG nowarn=325 #pragma SWIG nowarn=325
#pragma SWIG nowarn=389
typedef unsigned long uint64_t; typedef unsigned long uint64_t;
typedef uint64_t size_t; typedef uint64_t size_t;
...@@ -85,6 +87,7 @@ extern "C" { ...@@ -85,6 +87,7 @@ extern "C" {
#include "IndexBinaryFlat.h" #include "IndexBinaryFlat.h"
#include "IndexBinaryIVF.h" #include "IndexBinaryIVF.h"
#include "IndexBinaryFromFloat.h"
#include "index_io.h" #include "index_io.h"
...@@ -155,7 +158,6 @@ namespace std { ...@@ -155,7 +158,6 @@ namespace std {
%ignore faiss::hamming; %ignore faiss::hamming;
/******************************************************************* /*******************************************************************
* Parse headers * Parse headers
*******************************************************************/ *******************************************************************/
...@@ -258,6 +260,7 @@ int get_num_gpus() ...@@ -258,6 +260,7 @@ int get_num_gpus()
%include "IndexLSH.h" %include "IndexLSH.h"
%include "PolysemousTraining.h" %include "PolysemousTraining.h"
%include "IndexPQ.h" %include "IndexPQ.h"
%include "InvertedLists.h"
%include "IndexIVF.h" %include "IndexIVF.h"
%include "IndexScalarQuantizer.h" %include "IndexScalarQuantizer.h"
%include "IndexHNSW.h" %include "IndexHNSW.h"
...@@ -270,6 +273,7 @@ int get_num_gpus() ...@@ -270,6 +273,7 @@ int get_num_gpus()
%include "IndexBinary.h" %include "IndexBinary.h"
%include "IndexBinaryFlat.h" %include "IndexBinaryFlat.h"
%include "IndexBinaryIVF.h" %include "IndexBinaryIVF.h"
%include "IndexBinaryFromFloat.h"
...@@ -481,6 +485,7 @@ struct AsyncIndexSearchC { ...@@ -481,6 +485,7 @@ struct AsyncIndexSearchC {
%typemap(out) faiss::IndexBinary * { %typemap(out) faiss::IndexBinary * {
DOWNCAST ( IndexBinaryIVF ) DOWNCAST ( IndexBinaryIVF )
DOWNCAST ( IndexBinaryFlat ) DOWNCAST ( IndexBinaryFlat )
DOWNCAST ( IndexBinaryFromFloat )
// default for non-recognized classes // default for non-recognized classes
DOWNCAST ( IndexBinary ) DOWNCAST ( IndexBinary )
if ($1 == NULL) if ($1 == NULL)
......
...@@ -1195,6 +1195,9 @@ class StandardGpuResources(GpuResources): ...@@ -1195,6 +1195,9 @@ class StandardGpuResources(GpuResources):
def setDefaultNullStreamAllDevices(self): def setDefaultNullStreamAllDevices(self):
return _swigfaiss_gpu.StandardGpuResources_setDefaultNullStreamAllDevices(self) return _swigfaiss_gpu.StandardGpuResources_setDefaultNullStreamAllDevices(self)
def setCudaMallocWarning(self, b):
return _swigfaiss_gpu.StandardGpuResources_setCudaMallocWarning(self, b)
def initializeForDevice(self, device): def initializeForDevice(self, device):
return _swigfaiss_gpu.StandardGpuResources_initializeForDevice(self, device) return _swigfaiss_gpu.StandardGpuResources_initializeForDevice(self, device)
...@@ -1229,6 +1232,10 @@ class RandomGenerator(_object): ...@@ -1229,6 +1232,10 @@ class RandomGenerator(_object):
__swig_getmethods__ = {} __swig_getmethods__ = {}
__getattr__ = lambda self, name: _swig_getattr(self, RandomGenerator, name) __getattr__ = lambda self, name: _swig_getattr(self, RandomGenerator, name)
__repr__ = _swig_repr __repr__ = _swig_repr
__swig_setmethods__["mt"] = _swigfaiss_gpu.RandomGenerator_mt_set
__swig_getmethods__["mt"] = _swigfaiss_gpu.RandomGenerator_mt_get
if _newclass:
mt = _swig_property(_swigfaiss_gpu.RandomGenerator_mt_get, _swigfaiss_gpu.RandomGenerator_mt_set)
def rand_long(self): def rand_long(self):
return _swigfaiss_gpu.RandomGenerator_rand_long(self) return _swigfaiss_gpu.RandomGenerator_rand_long(self)
...@@ -1242,8 +1249,8 @@ class RandomGenerator(_object): ...@@ -1242,8 +1249,8 @@ class RandomGenerator(_object):
def rand_double(self): def rand_double(self):
return _swigfaiss_gpu.RandomGenerator_rand_double(self) return _swigfaiss_gpu.RandomGenerator_rand_double(self)
def __init__(self, *args): def __init__(self, seed=1234):
this = _swigfaiss_gpu.new_RandomGenerator(*args) this = _swigfaiss_gpu.new_RandomGenerator(seed)
try: try:
self.this.append(this) self.this.append(this)
except __builtin__.Exception: except __builtin__.Exception:
...@@ -1409,6 +1416,14 @@ ivec_checksum = _swigfaiss_gpu.ivec_checksum ...@@ -1409,6 +1416,14 @@ ivec_checksum = _swigfaiss_gpu.ivec_checksum
def fvecs_maybe_subsample(d, n, nmax, x, verbose=False, seed=1234): def fvecs_maybe_subsample(d, n, nmax, x, verbose=False, seed=1234):
return _swigfaiss_gpu.fvecs_maybe_subsample(d, n, nmax, x, verbose, seed) return _swigfaiss_gpu.fvecs_maybe_subsample(d, n, nmax, x, verbose, seed)
fvecs_maybe_subsample = _swigfaiss_gpu.fvecs_maybe_subsample fvecs_maybe_subsample = _swigfaiss_gpu.fvecs_maybe_subsample
def binary_to_real(d, x_in, x_out):
return _swigfaiss_gpu.binary_to_real(d, x_in, x_out)
binary_to_real = _swigfaiss_gpu.binary_to_real
def real_to_binary(d, x_in, x_out):
return _swigfaiss_gpu.real_to_binary(d, x_in, x_out)
real_to_binary = _swigfaiss_gpu.real_to_binary
METRIC_INNER_PRODUCT = _swigfaiss_gpu.METRIC_INNER_PRODUCT METRIC_INNER_PRODUCT = _swigfaiss_gpu.METRIC_INNER_PRODUCT
METRIC_L2 = _swigfaiss_gpu.METRIC_L2 METRIC_L2 = _swigfaiss_gpu.METRIC_L2
class Index(_object): class Index(_object):
...@@ -2786,51 +2801,6 @@ class MultiIndexQuantizer2(MultiIndexQuantizer): ...@@ -2786,51 +2801,6 @@ class MultiIndexQuantizer2(MultiIndexQuantizer):
MultiIndexQuantizer2_swigregister = _swigfaiss_gpu.MultiIndexQuantizer2_swigregister MultiIndexQuantizer2_swigregister = _swigfaiss_gpu.MultiIndexQuantizer2_swigregister
MultiIndexQuantizer2_swigregister(MultiIndexQuantizer2) MultiIndexQuantizer2_swigregister(MultiIndexQuantizer2)
class Level1Quantizer(_object):
__swig_setmethods__ = {}
__setattr__ = lambda self, name, value: _swig_setattr(self, Level1Quantizer, name, value)
__swig_getmethods__ = {}
__getattr__ = lambda self, name: _swig_getattr(self, Level1Quantizer, name)
__repr__ = _swig_repr
__swig_setmethods__["quantizer"] = _swigfaiss_gpu.Level1Quantizer_quantizer_set
__swig_getmethods__["quantizer"] = _swigfaiss_gpu.Level1Quantizer_quantizer_get
if _newclass:
quantizer = _swig_property(_swigfaiss_gpu.Level1Quantizer_quantizer_get, _swigfaiss_gpu.Level1Quantizer_quantizer_set)
__swig_setmethods__["nlist"] = _swigfaiss_gpu.Level1Quantizer_nlist_set
__swig_getmethods__["nlist"] = _swigfaiss_gpu.Level1Quantizer_nlist_get
if _newclass:
nlist = _swig_property(_swigfaiss_gpu.Level1Quantizer_nlist_get, _swigfaiss_gpu.Level1Quantizer_nlist_set)
__swig_setmethods__["quantizer_trains_alone"] = _swigfaiss_gpu.Level1Quantizer_quantizer_trains_alone_set
__swig_getmethods__["quantizer_trains_alone"] = _swigfaiss_gpu.Level1Quantizer_quantizer_trains_alone_get
if _newclass:
quantizer_trains_alone = _swig_property(_swigfaiss_gpu.Level1Quantizer_quantizer_trains_alone_get, _swigfaiss_gpu.Level1Quantizer_quantizer_trains_alone_set)
__swig_setmethods__["own_fields"] = _swigfaiss_gpu.Level1Quantizer_own_fields_set
__swig_getmethods__["own_fields"] = _swigfaiss_gpu.Level1Quantizer_own_fields_get
if _newclass:
own_fields = _swig_property(_swigfaiss_gpu.Level1Quantizer_own_fields_get, _swigfaiss_gpu.Level1Quantizer_own_fields_set)
__swig_setmethods__["cp"] = _swigfaiss_gpu.Level1Quantizer_cp_set
__swig_getmethods__["cp"] = _swigfaiss_gpu.Level1Quantizer_cp_get
if _newclass:
cp = _swig_property(_swigfaiss_gpu.Level1Quantizer_cp_get, _swigfaiss_gpu.Level1Quantizer_cp_set)
__swig_setmethods__["clustering_index"] = _swigfaiss_gpu.Level1Quantizer_clustering_index_set
__swig_getmethods__["clustering_index"] = _swigfaiss_gpu.Level1Quantizer_clustering_index_get
if _newclass:
clustering_index = _swig_property(_swigfaiss_gpu.Level1Quantizer_clustering_index_get, _swigfaiss_gpu.Level1Quantizer_clustering_index_set)
def train_q1(self, n, x, verbose, metric_type):
return _swigfaiss_gpu.Level1Quantizer_train_q1(self, n, x, verbose, metric_type)
def __init__(self, *args):
this = _swigfaiss_gpu.new_Level1Quantizer(*args)
try:
self.this.append(this)
except __builtin__.Exception:
self.this = this
__swig_destroy__ = _swigfaiss_gpu.delete_Level1Quantizer
__del__ = lambda self: None
Level1Quantizer_swigregister = _swigfaiss_gpu.Level1Quantizer_swigregister
Level1Quantizer_swigregister(Level1Quantizer)
class InvertedLists(_object): class InvertedLists(_object):
__swig_setmethods__ = {} __swig_setmethods__ = {}
__setattr__ = lambda self, name, value: _swig_setattr(self, InvertedLists, name, value) __setattr__ = lambda self, name, value: _swig_setattr(self, InvertedLists, name, value)
...@@ -2858,6 +2828,12 @@ class InvertedLists(_object): ...@@ -2858,6 +2828,12 @@ class InvertedLists(_object):
def get_ids(self, list_no): def get_ids(self, list_no):
return _swigfaiss_gpu.InvertedLists_get_ids(self, list_no) return _swigfaiss_gpu.InvertedLists_get_ids(self, list_no)
def release_codes(self, codes):
return _swigfaiss_gpu.InvertedLists_release_codes(self, codes)
def release_ids(self, ids):
return _swigfaiss_gpu.InvertedLists_release_ids(self, ids)
def get_single_id(self, list_no, offset): def get_single_id(self, list_no, offset):
return _swigfaiss_gpu.InvertedLists_get_single_id(self, list_no, offset) return _swigfaiss_gpu.InvertedLists_get_single_id(self, list_no, offset)
...@@ -2884,6 +2860,9 @@ class InvertedLists(_object): ...@@ -2884,6 +2860,9 @@ class InvertedLists(_object):
def reset(self): def reset(self):
return _swigfaiss_gpu.InvertedLists_reset(self) return _swigfaiss_gpu.InvertedLists_reset(self)
def merge_from(self, oivf, add_id):
return _swigfaiss_gpu.InvertedLists_merge_from(self, oivf, add_id)
__swig_destroy__ = _swigfaiss_gpu.delete_InvertedLists __swig_destroy__ = _swigfaiss_gpu.delete_InvertedLists
__del__ = lambda self: None __del__ = lambda self: None
InvertedLists_swigregister = _swigfaiss_gpu.InvertedLists_swigregister InvertedLists_swigregister = _swigfaiss_gpu.InvertedLists_swigregister
...@@ -2937,6 +2916,107 @@ class ArrayInvertedLists(InvertedLists): ...@@ -2937,6 +2916,107 @@ class ArrayInvertedLists(InvertedLists):
ArrayInvertedLists_swigregister = _swigfaiss_gpu.ArrayInvertedLists_swigregister ArrayInvertedLists_swigregister = _swigfaiss_gpu.ArrayInvertedLists_swigregister
ArrayInvertedLists_swigregister(ArrayInvertedLists) ArrayInvertedLists_swigregister(ArrayInvertedLists)
class ConcatenatedInvertedLists(InvertedLists):
__swig_setmethods__ = {}
for _s in [InvertedLists]:
__swig_setmethods__.update(getattr(_s, '__swig_setmethods__', {}))
__setattr__ = lambda self, name, value: _swig_setattr(self, ConcatenatedInvertedLists, name, value)
__swig_getmethods__ = {}
for _s in [InvertedLists]:
__swig_getmethods__.update(getattr(_s, '__swig_getmethods__', {}))
__getattr__ = lambda self, name: _swig_getattr(self, ConcatenatedInvertedLists, name)
__repr__ = _swig_repr
__swig_setmethods__["ils"] = _swigfaiss_gpu.ConcatenatedInvertedLists_ils_set
__swig_getmethods__["ils"] = _swigfaiss_gpu.ConcatenatedInvertedLists_ils_get
if _newclass:
ils = _swig_property(_swigfaiss_gpu.ConcatenatedInvertedLists_ils_get, _swigfaiss_gpu.ConcatenatedInvertedLists_ils_set)
def __init__(self, nil, ils):
this = _swigfaiss_gpu.new_ConcatenatedInvertedLists(nil, ils)
try:
self.this.append(this)
except __builtin__.Exception:
self.this = this
def list_size(self, list_no):
return _swigfaiss_gpu.ConcatenatedInvertedLists_list_size(self, list_no)
def get_codes(self, list_no):
return _swigfaiss_gpu.ConcatenatedInvertedLists_get_codes(self, list_no)
def get_ids(self, list_no):
return _swigfaiss_gpu.ConcatenatedInvertedLists_get_ids(self, list_no)
def release_codes(self, codes):
return _swigfaiss_gpu.ConcatenatedInvertedLists_release_codes(self, codes)
def release_ids(self, ids):
return _swigfaiss_gpu.ConcatenatedInvertedLists_release_ids(self, ids)
def get_single_id(self, list_no, offset):
return _swigfaiss_gpu.ConcatenatedInvertedLists_get_single_id(self, list_no, offset)
def get_single_code(self, list_no, offset):
return _swigfaiss_gpu.ConcatenatedInvertedLists_get_single_code(self, list_no, offset)
def add_entries(self, list_no, n_entry, ids, code):
return _swigfaiss_gpu.ConcatenatedInvertedLists_add_entries(self, list_no, n_entry, ids, code)
def update_entries(self, list_no, offset, n_entry, ids, code):
return _swigfaiss_gpu.ConcatenatedInvertedLists_update_entries(self, list_no, offset, n_entry, ids, code)
def resize(self, list_no, new_size):
return _swigfaiss_gpu.ConcatenatedInvertedLists_resize(self, list_no, new_size)
__swig_destroy__ = _swigfaiss_gpu.delete_ConcatenatedInvertedLists
__del__ = lambda self: None
ConcatenatedInvertedLists_swigregister = _swigfaiss_gpu.ConcatenatedInvertedLists_swigregister
ConcatenatedInvertedLists_swigregister(ConcatenatedInvertedLists)
class Level1Quantizer(_object):
__swig_setmethods__ = {}
__setattr__ = lambda self, name, value: _swig_setattr(self, Level1Quantizer, name, value)
__swig_getmethods__ = {}
__getattr__ = lambda self, name: _swig_getattr(self, Level1Quantizer, name)
__repr__ = _swig_repr
__swig_setmethods__["quantizer"] = _swigfaiss_gpu.Level1Quantizer_quantizer_set
__swig_getmethods__["quantizer"] = _swigfaiss_gpu.Level1Quantizer_quantizer_get
if _newclass:
quantizer = _swig_property(_swigfaiss_gpu.Level1Quantizer_quantizer_get, _swigfaiss_gpu.Level1Quantizer_quantizer_set)
__swig_setmethods__["nlist"] = _swigfaiss_gpu.Level1Quantizer_nlist_set
__swig_getmethods__["nlist"] = _swigfaiss_gpu.Level1Quantizer_nlist_get
if _newclass:
nlist = _swig_property(_swigfaiss_gpu.Level1Quantizer_nlist_get, _swigfaiss_gpu.Level1Quantizer_nlist_set)
__swig_setmethods__["quantizer_trains_alone"] = _swigfaiss_gpu.Level1Quantizer_quantizer_trains_alone_set
__swig_getmethods__["quantizer_trains_alone"] = _swigfaiss_gpu.Level1Quantizer_quantizer_trains_alone_get
if _newclass:
quantizer_trains_alone = _swig_property(_swigfaiss_gpu.Level1Quantizer_quantizer_trains_alone_get, _swigfaiss_gpu.Level1Quantizer_quantizer_trains_alone_set)
__swig_setmethods__["own_fields"] = _swigfaiss_gpu.Level1Quantizer_own_fields_set
__swig_getmethods__["own_fields"] = _swigfaiss_gpu.Level1Quantizer_own_fields_get
if _newclass:
own_fields = _swig_property(_swigfaiss_gpu.Level1Quantizer_own_fields_get, _swigfaiss_gpu.Level1Quantizer_own_fields_set)
__swig_setmethods__["cp"] = _swigfaiss_gpu.Level1Quantizer_cp_set
__swig_getmethods__["cp"] = _swigfaiss_gpu.Level1Quantizer_cp_get
if _newclass:
cp = _swig_property(_swigfaiss_gpu.Level1Quantizer_cp_get, _swigfaiss_gpu.Level1Quantizer_cp_set)
__swig_setmethods__["clustering_index"] = _swigfaiss_gpu.Level1Quantizer_clustering_index_set
__swig_getmethods__["clustering_index"] = _swigfaiss_gpu.Level1Quantizer_clustering_index_get
if _newclass:
clustering_index = _swig_property(_swigfaiss_gpu.Level1Quantizer_clustering_index_get, _swigfaiss_gpu.Level1Quantizer_clustering_index_set)
def train_q1(self, n, x, verbose, metric_type):
return _swigfaiss_gpu.Level1Quantizer_train_q1(self, n, x, verbose, metric_type)
def __init__(self, *args):
this = _swigfaiss_gpu.new_Level1Quantizer(*args)
try:
self.this.append(this)
except __builtin__.Exception:
self.this = this
__swig_destroy__ = _swigfaiss_gpu.delete_Level1Quantizer
__del__ = lambda self: None
Level1Quantizer_swigregister = _swigfaiss_gpu.Level1Quantizer_swigregister
Level1Quantizer_swigregister(Level1Quantizer)
class IVFSearchParameters(_object): class IVFSearchParameters(_object):
__swig_setmethods__ = {} __swig_setmethods__ = {}
__setattr__ = lambda self, name, value: _swig_setattr(self, IVFSearchParameters, name, value) __setattr__ = lambda self, name, value: _swig_setattr(self, IVFSearchParameters, name, value)
...@@ -3038,6 +3118,9 @@ class IndexIVF(Index, Level1Quantizer): ...@@ -3038,6 +3118,9 @@ class IndexIVF(Index, Level1Quantizer):
def remove_ids(self, sel): def remove_ids(self, sel):
return _swigfaiss_gpu.IndexIVF_remove_ids(self, sel) return _swigfaiss_gpu.IndexIVF_remove_ids(self, sel)
def check_compatible_for_merge(self, other):
return _swigfaiss_gpu.IndexIVF_check_compatible_for_merge(self, other)
def merge_from(self, other, add_id): def merge_from(self, other, add_id):
return _swigfaiss_gpu.IndexIVF_merge_from(self, other, add_id) return _swigfaiss_gpu.IndexIVF_merge_from(self, other, add_id)
...@@ -4391,6 +4474,48 @@ class IndexBinaryIVF(IndexBinary): ...@@ -4391,6 +4474,48 @@ class IndexBinaryIVF(IndexBinary):
IndexBinaryIVF_swigregister = _swigfaiss_gpu.IndexBinaryIVF_swigregister IndexBinaryIVF_swigregister = _swigfaiss_gpu.IndexBinaryIVF_swigregister
IndexBinaryIVF_swigregister(IndexBinaryIVF) IndexBinaryIVF_swigregister(IndexBinaryIVF)
class IndexBinaryFromFloat(IndexBinary):
__swig_setmethods__ = {}
for _s in [IndexBinary]:
__swig_setmethods__.update(getattr(_s, '__swig_setmethods__', {}))
__setattr__ = lambda self, name, value: _swig_setattr(self, IndexBinaryFromFloat, name, value)
__swig_getmethods__ = {}
for _s in [IndexBinary]:
__swig_getmethods__.update(getattr(_s, '__swig_getmethods__', {}))
__getattr__ = lambda self, name: _swig_getattr(self, IndexBinaryFromFloat, name)
__repr__ = _swig_repr
__swig_setmethods__["index"] = _swigfaiss_gpu.IndexBinaryFromFloat_index_set
__swig_getmethods__["index"] = _swigfaiss_gpu.IndexBinaryFromFloat_index_get
if _newclass:
index = _swig_property(_swigfaiss_gpu.IndexBinaryFromFloat_index_get, _swigfaiss_gpu.IndexBinaryFromFloat_index_set)
__swig_setmethods__["own_fields"] = _swigfaiss_gpu.IndexBinaryFromFloat_own_fields_set
__swig_getmethods__["own_fields"] = _swigfaiss_gpu.IndexBinaryFromFloat_own_fields_get
if _newclass:
own_fields = _swig_property(_swigfaiss_gpu.IndexBinaryFromFloat_own_fields_get, _swigfaiss_gpu.IndexBinaryFromFloat_own_fields_set)
def __init__(self, index):
this = _swigfaiss_gpu.new_IndexBinaryFromFloat(index)
try:
self.this.append(this)
except __builtin__.Exception:
self.this = this
__swig_destroy__ = _swigfaiss_gpu.delete_IndexBinaryFromFloat
__del__ = lambda self: None
def add(self, n, x):
return _swigfaiss_gpu.IndexBinaryFromFloat_add(self, n, x)
def reset(self):
return _swigfaiss_gpu.IndexBinaryFromFloat_reset(self)
def search(self, n, x, k, distances, labels):
return _swigfaiss_gpu.IndexBinaryFromFloat_search(self, n, x, k, distances, labels)
def train(self, n, x):
return _swigfaiss_gpu.IndexBinaryFromFloat_train(self, n, x)
IndexBinaryFromFloat_swigregister = _swigfaiss_gpu.IndexBinaryFromFloat_swigregister
IndexBinaryFromFloat_swigregister(IndexBinaryFromFloat)
class IndexIDMap(Index): class IndexIDMap(Index):
__swig_setmethods__ = {} __swig_setmethods__ = {}
for _s in [Index]: for _s in [Index]:
...@@ -5229,6 +5354,14 @@ def write_ProductQuantizer(pq, fname): ...@@ -5229,6 +5354,14 @@ def write_ProductQuantizer(pq, fname):
return _swigfaiss_gpu.write_ProductQuantizer(pq, fname) return _swigfaiss_gpu.write_ProductQuantizer(pq, fname)
write_ProductQuantizer = _swigfaiss_gpu.write_ProductQuantizer write_ProductQuantizer = _swigfaiss_gpu.write_ProductQuantizer
def write_InvertedLists(ils, f):
return _swigfaiss_gpu.write_InvertedLists(ils, f)
write_InvertedLists = _swigfaiss_gpu.write_InvertedLists
def read_InvertedLists(reader, io_flags=0):
return _swigfaiss_gpu.read_InvertedLists(reader, io_flags)
read_InvertedLists = _swigfaiss_gpu.read_InvertedLists
def clone_index(arg1): def clone_index(arg1):
return _swigfaiss_gpu.clone_index(arg1) return _swigfaiss_gpu.clone_index(arg1)
clone_index = _swigfaiss_gpu.clone_index clone_index = _swigfaiss_gpu.clone_index
...@@ -6089,9 +6222,10 @@ class VectorIOReader(IOReader): ...@@ -6089,9 +6222,10 @@ class VectorIOReader(IOReader):
__swig_getmethods__.update(getattr(_s, '__swig_getmethods__', {})) __swig_getmethods__.update(getattr(_s, '__swig_getmethods__', {}))
__getattr__ = lambda self, name: _swig_getattr(self, VectorIOReader, name) __getattr__ = lambda self, name: _swig_getattr(self, VectorIOReader, name)
__repr__ = _swig_repr __repr__ = _swig_repr
__swig_setmethods__["data"] = _swigfaiss_gpu.VectorIOReader_data_set
__swig_getmethods__["data"] = _swigfaiss_gpu.VectorIOReader_data_get __swig_getmethods__["data"] = _swigfaiss_gpu.VectorIOReader_data_get
if _newclass: if _newclass:
data = _swig_property(_swigfaiss_gpu.VectorIOReader_data_get) data = _swig_property(_swigfaiss_gpu.VectorIOReader_data_get, _swigfaiss_gpu.VectorIOReader_data_set)
__swig_setmethods__["rp"] = _swigfaiss_gpu.VectorIOReader_rp_set __swig_setmethods__["rp"] = _swigfaiss_gpu.VectorIOReader_rp_set
__swig_getmethods__["rp"] = _swigfaiss_gpu.VectorIOReader_rp_get __swig_getmethods__["rp"] = _swigfaiss_gpu.VectorIOReader_rp_get
if _newclass: if _newclass:
......
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -38,7 +38,7 @@ class TestBinaryFlat(unittest.TestCase): ...@@ -38,7 +38,7 @@ class TestBinaryFlat(unittest.TestCase):
index.add(self.xb) index.add(self.xb)
D, I = index.search(self.xq, 3) D, I = index.search(self.xq, 3)
tmpnam = tempfile.NamedTemporaryFile().name _, tmpnam = tempfile.mkstemp()
try: try:
faiss.write_index_binary(index, tmpnam) faiss.write_index_binary(index, tmpnam)
...@@ -75,7 +75,7 @@ class TestBinaryIVF(unittest.TestCase): ...@@ -75,7 +75,7 @@ class TestBinaryIVF(unittest.TestCase):
index.add(self.xb) index.add(self.xb)
D, I = index.search(self.xq, 3) D, I = index.search(self.xq, 3)
tmpnam = tempfile.NamedTemporaryFile().name _, tmpnam = tempfile.mkstemp()
try: try:
faiss.write_index_binary(index, tmpnam) faiss.write_index_binary(index, tmpnam)
......
...@@ -207,11 +207,31 @@ class TestOrthognalReconstruct(unittest.TestCase): ...@@ -207,11 +207,31 @@ class TestOrthognalReconstruct(unittest.TestCase):
x = rs.rand(30, 20).astype('float32') x = rs.rand(30, 20).astype('float32')
xt = lt.apply_py(x) xt = lt.apply_py(x)
try: try:
xtt = lt.reverse_transform(xt) lt.reverse_transform(xt)
except Exception: except Exception:
pass pass
else: else:
self.assertFalse('should do an exception') self.assertFalse('should do an exception')
class TestMAdd(unittest.TestCase):
def test_1(self):
# try with dimensions that are multiples of 16 or not
rs = np.random.RandomState(123)
swig_ptr = faiss.swig_ptr
for dim in 16, 32, 20, 25:
for repeat in 1, 2, 3, 4, 5:
a = rs.rand(dim).astype('float32')
b = rs.rand(dim).astype('float32')
c = np.zeros(dim, dtype='float32')
bf = rs.uniform(5.0) - 2.5
idx = faiss.fvec_madd_and_argmin(
dim, swig_ptr(a), bf, swig_ptr(b),
swig_ptr(c))
ref_c = a + b * bf
assert np.abs(c - ref_c).max() < 1e-5
assert idx == ref_c.argmin()
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -30,7 +30,7 @@ def get_dataset_2(d, nb, nt, nq): ...@@ -30,7 +30,7 @@ def get_dataset_2(d, nb, nt, nq):
""" """
d1 = 10 # intrinsic dimension (more or less) d1 = 10 # intrinsic dimension (more or less)
n = nb + nt + nq n = nb + nt + nq
rs = np.random.RandomState(1234) rs = np.random.RandomState(1338)
x = rs.normal(size=(n, d1)) x = rs.normal(size=(n, d1))
x = np.dot(x, rs.rand(d1, d)) x = np.dot(x, rs.rand(d1, d))
# now we have a d1-dim ellipsoid in d-dimensional space # now we have a d1-dim ellipsoid in d-dimensional space
......
# Copyright (c) 2015-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the BSD+Patents license found in the
# LICENSE file in the root directory of this source tree.
#! /usr/bin/env python2
import numpy as np
import unittest
import faiss
# translation of test_knn.lua
def random_unitary(n, d, seed):
x = faiss.randn(n * d, seed).reshape(n, d)
faiss.normalize_L2(x)
return x
class Randu10k:
def __init__(self):
self.nb = 10000
self.nq = 1000
self.nt = 10000
self.d = 128
self.xb = random_unitary(self.nb, self.d, 1)
self.xt = random_unitary(self.nt, self.d, 2)
self.xq = random_unitary(self.nq, self.d, 3)
dotprods = np.dot(self.xq, self.xb.T)
self.gt = dotprods.argmax(1)
self.k = 100
def launch(self, name, index):
if not index.is_trained:
index.train(self.xt)
index.add(self.xb)
return index.search(self.xq, self.k)
def evalres(self, res):
D, I = res
e = {}
for rank in 1, 10, 100:
e[rank] = (I[:, :rank] == self.gt.reshape(-1, 1)).sum() / float(self.nq)
return e
ev = Randu10k()
d = ev.d
# Parameters inverted indexes
ncentroids = int(4 * np.sqrt(ev.nb))
kprobe = int(np.sqrt(ncentroids))
# Parameters for LSH
nbits = d
# Parameters for indexes involving PQ
M = d / 8 # for PQ: #subquantizers
nbits_per_index = 8 # for PQ
class IndexAccuracy(unittest.TestCase):
def test_IndexFlatIP(self):
q = faiss.IndexFlatIP(d) # Ask inner product
res = ev.launch('FLAT / IP', q)
e = ev.evalres(res)
assert e[1] == 1.0
def test_IndexFlatL2(self):
q = faiss.IndexFlatL2(d)
res = ev.launch('FLAT / L2', q)
e = ev.evalres(res)
assert e[1] == 1.0
def test_ivf_kmeans(self):
ivfk = faiss.IndexIVFFlat(faiss.IndexFlatL2(d), d, ncentroids)
ivfk.nprobe = kprobe
res = ev.launch('IVF K-means', ivfk)
e = ev.evalres(res)
# should give 0.260 0.260 0.260
assert e[1] > 0.2
def test_indexLSH(self):
q = faiss.IndexLSH(d, nbits)
res = ev.launch('FLAT / LSH Cosine', q)
e = ev.evalres(res)
# should give 0.070 0.250 0.580
assert e[10] > 0.2
def test_IndexLSH_32_48(self):
# CHECK: the difference between 32 and 48 does not make much sense
for nbits2 in 32, 48:
q = faiss.IndexLSH(d, nbits2)
res = ev.launch('LSH half size', q)
e = ev.evalres(res)
# should give 0.003 0.019 0.108
assert e[10] > 0.018, e
def test_IndexPQ(self):
q = faiss.IndexPQ(d, M, nbits_per_index)
res = ev.launch('FLAT / PQ L2', q)
e = ev.evalres(res)
# should give 0.070 0.230 0.260
assert e[10] > 0.2
# Approximate search module: PQ with inner product distance
def test_IndexPQ_ip(self):
q = faiss.IndexPQ(d, M, nbits_per_index, faiss.METRIC_INNER_PRODUCT)
res = ev.launch('FLAT / PQ IP', q)
e = ev.evalres(res)
# should give 0.070 0.230 0.260
#(same result as regular PQ on normalized distances)
assert e[10] > 0.2
def test_IndexIVFPQ(self):
ivfpq = faiss.IndexIVFPQ(faiss.IndexFlatL2(d), d, ncentroids, M, 8)
ivfpq.nprobe = kprobe
res = ev.launch('IVF PQ', ivfpq)
e = ev.evalres(res)
# should give 0.070 0.230 0.260
assert e[10] > 0.2
# TODO: translate evaluation of nested
# Approximate search: PQ with full vector refinement
def test_IndexPQ_refined(self):
q = faiss.IndexPQ(d, M, nbits_per_index)
res = ev.launch('PQ non-refined', q)
e = ev.evalres(res)
q.reset()
rq = faiss.IndexRefineFlat(q)
res = ev.launch('PQ refined', rq)
e2 = ev.evalres(res)
assert e2[10] >= e[10]
rq.k_factor = 4
res = ev.launch('PQ refined*4', rq)
e3 = ev.evalres(res)
assert e3[10] >= e2[10]
def test_polysemous(self):
index = faiss.IndexPQ(d, M, nbits_per_index)
index.do_polysemous_training = True
# reduce nb iterations to speed up training for the test
index.polysemous_training.n_iter = 50000
index.polysemous_training.n_redo = 1
res = ev.launch('normal PQ', index)
e_baseline = ev.evalres(res)
index.search_type = faiss.IndexPQ.ST_polysemous
index.polysemous_ht = int(M / 16. * 58)
stats = faiss.cvar.indexPQ_stats
stats.reset()
res = ev.launch('Polysemous ht=%d' % index.polysemous_ht,
index)
e_polysemous = ev.evalres(res)
print(e_baseline, e_polysemous, index.polysemous_ht)
print(stats.n_hamming_pass, stats.ncode)
# The randu dataset is difficult, so we are not too picky on
# the results. Here we assert that we have < 10 % loss when
# computing full PQ on fewer than 20% of the data.
assert stats.n_hamming_pass < stats.ncode / 5
# Test disabled because difference is 0.17 on aarch64
# TODO check why???
# assert e_polysemous[10] > e_baseline[10] - 0.1
def test_ScalarQuantizer(self):
quantizer = faiss.IndexFlatL2(d)
ivfpq = faiss.IndexIVFScalarQuantizer(
quantizer, d, ncentroids,
faiss.ScalarQuantizer.QT_8bit)
ivfpq.nprobe = kprobe
res = ev.launch('IVF SQ', ivfpq)
e = ev.evalres(res)
# should give 0.234 0.236 0.236
assert e[10] > 0.235
...@@ -124,10 +124,23 @@ class TestBinaryIVF(unittest.TestCase): ...@@ -124,10 +124,23 @@ class TestBinaryIVF(unittest.TestCase):
(self.xt, self.xb, self.xq) = make_binary_dataset(d, nb, nt, nq) (self.xt, self.xb, self.xq) = make_binary_dataset(d, nb, nt, nq)
index = faiss.IndexBinaryFlat(d) index = faiss.IndexBinaryFlat(d)
index.add(self.xb) index.add(self.xb)
Dref, Iref = index.search(self.xq, 1) Dref, Iref = index.search(self.xq, 10)
self.Dref = Dref self.Dref = Dref
def test_ivf_flat(self): def test_ivf_flat_exhaustive(self):
d = self.xq.shape[1] * 8
quantizer = faiss.IndexBinaryFlat(d)
index = faiss.IndexBinaryIVF(quantizer, d, 8)
index.cp.min_points_per_centroid = 5 # quiet warning
index.nprobe = 8
index.train(self.xt)
index.add(self.xb)
Divfflat, _ = index.search(self.xq, 10)
np.testing.assert_array_equal(self.Dref, Divfflat)
def test_ivf_flat2(self):
d = self.xq.shape[1] * 8 d = self.xq.shape[1] * 8
quantizer = faiss.IndexBinaryFlat(d) quantizer = faiss.IndexBinaryFlat(d)
...@@ -136,9 +149,9 @@ class TestBinaryIVF(unittest.TestCase): ...@@ -136,9 +149,9 @@ class TestBinaryIVF(unittest.TestCase):
index.nprobe = 4 index.nprobe = 4
index.train(self.xt) index.train(self.xt)
index.add(self.xb) index.add(self.xb)
Divfflat, _ = index.search(self.xq, 1) Divfflat, _ = index.search(self.xq, 10)
self.assertGreaterEqual((self.Dref == Divfflat).sum(), 448) self.assertEqual((self.Dref == Divfflat).sum(), 4122)
......
# Copyright (c) 2015-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the BSD+Patents license found in the
# LICENSE file in the root directory of this source tree.
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
import unittest
import faiss
def make_binary_dataset(d, nb, nt, nq):
assert d % 8 == 0
rs = np.random.RandomState(123)
x = rs.randint(256, size=(nb + nq + nt, int(d / 8))).astype('uint8')
return x[:nt], x[nt:-nq], x[-nq:]
def binary_to_float(x):
n, d = x.shape
x8 = x.reshape(n * d, -1)
c8 = 2 * ((x8 >> np.arange(8)) & 1).astype('int8') - 1
return c8.astype('float32').reshape(n, d * 8)
class TestIndexBinaryFromFloat(unittest.TestCase):
"""Use a binary index backed by a float index"""
def test_index_from_float(self):
d = 256
nt = 0
nb = 1500
nq = 500
(xt, xb, xq) = make_binary_dataset(d, nb, nt, nq)
index_ref = faiss.IndexFlatL2(d)
index_ref.add(binary_to_float(xb))
index = faiss.IndexFlatL2(d)
index_bin = faiss.IndexBinaryFromFloat(index)
index_bin.add(xb)
D_ref, I_ref = index_ref.search(binary_to_float(xq), 10)
D, I = index_bin.search(xq, 10)
np.testing.assert_allclose((D_ref / 4.0).astype('int32'), D)
def test_wrapped_quantizer(self):
d = 256
nt = 150
nb = 1500
nq = 500
(xt, xb, xq) = make_binary_dataset(d, nb, nt, nq)
nlist = 16
quantizer_ref = faiss.IndexBinaryFlat(d)
index_ref = faiss.IndexBinaryIVF(quantizer_ref, d, nlist)
index_ref.train(xt)
index_ref.add(xb)
unwrapped_quantizer = faiss.IndexFlatL2(d)
quantizer = faiss.IndexBinaryFromFloat(unwrapped_quantizer)
index = faiss.IndexBinaryIVF(quantizer, d, nlist)
index.train(xt)
index.add(xb)
D_ref, I_ref = index_ref.search(xq, 10)
D, I = index.search(xq, 10)
np.testing.assert_array_equal(D_ref, D)
def test_wrapped_quantizer_IMI(self):
d = 256
nt = 3500
nb = 10000
nq = 500
(xt, xb, xq) = make_binary_dataset(d, nb, nt, nq)
index_ref = faiss.IndexBinaryFlat(d)
index_ref.add(xb)
nlist_exp = 6
nlist = 2 ** (2 * nlist_exp)
float_quantizer = faiss.MultiIndexQuantizer(d, 2, nlist_exp)
wrapped_quantizer = faiss.IndexBinaryFromFloat(float_quantizer)
wrapped_quantizer.train(xt)
assert nlist == float_quantizer.ntotal
index = faiss.IndexBinaryIVF(wrapped_quantizer, d,
float_quantizer.ntotal)
index.nprobe = 2048
assert index.is_trained
index.add(xb)
D_ref, I_ref = index_ref.search(xq, 10)
D, I = index.search(xq, 10)
recall = sum(gti[0] in Di[:10] for gti, Di in zip(D_ref, D)) \
/ float(D_ref.shape[0])
assert recall > 0.82, "recall = %g" % recall
def test_wrapped_quantizer_HNSW(self):
faiss.omp_set_num_threads(1)
def bin2float(v):
def byte2float(byte):
return np.array([-1.0 + 2.0 * (byte & (1 << b) != 0)
for b in range(0, 8)])
return np.hstack([byte2float(byte) for byte in v]).astype('float32')
def floatvec2nparray(v):
return np.array([np.float32(v.at(i)) for i in range(0, v.size())]) \
.reshape(-1, d)
d = 256
nt = 12800
nb = 10000
nq = 500
(xt, xb, xq) = make_binary_dataset(d, nb, nt, nq)
index_ref = faiss.IndexBinaryFlat(d)
index_ref.add(xb)
nlist = 256
clus = faiss.Clustering(d, nlist)
clus_index = faiss.IndexFlatL2(d)
xt_f = np.array([bin2float(v) for v in xt])
clus.train(xt_f, clus_index)
centroids = floatvec2nparray(clus.centroids)
hnsw_quantizer = faiss.IndexHNSWFlat(d, 32)
hnsw_quantizer.add(centroids)
hnsw_quantizer.is_trained = True
wrapped_quantizer = faiss.IndexBinaryFromFloat(hnsw_quantizer)
assert nlist == hnsw_quantizer.ntotal
assert nlist == wrapped_quantizer.ntotal
assert wrapped_quantizer.is_trained
index = faiss.IndexBinaryIVF(wrapped_quantizer, d,
hnsw_quantizer.ntotal)
index.nprobe = 128
assert index.is_trained
index.add(xb)
D_ref, I_ref = index_ref.search(xq, 10)
D, I = index.search(xq, 10)
recall = sum(gti[0] in Di[:10] for gti, Di in zip(D_ref, D)) \
/ float(D_ref.shape[0])
assert recall > 0.77, "recall = %g" % recall
...@@ -316,9 +316,7 @@ class TestRareIO(unittest.TestCase): ...@@ -316,9 +316,7 @@ class TestRareIO(unittest.TestCase):
if in_pretransform: if in_pretransform:
# make sure it still works when wrapped in an IndexPreTransform # make sure it still works when wrapped in an IndexPreTransform
tmp = index1
index1 = faiss.IndexPreTransform(index1) index1 = faiss.IndexPreTransform(index1)
index1.dont_dealloc_me = tmp
index1.train(xt) index1.train(xt)
index1.add(xb) index1.add(xb)
......
...@@ -18,46 +18,13 @@ ...@@ -18,46 +18,13 @@
#include <faiss/FaissAssert.h> #include <faiss/FaissAssert.h>
#include <faiss/VectorTransform.h> #include <faiss/VectorTransform.h>
#include <faiss/OnDiskInvertedLists.h> #include <faiss/OnDiskInvertedLists.h>
#include <faiss/IVFlib.h>
namespace { namespace {
// Main function to test // Main function to test
// Merge index1 into index0. Works on IndexIVF's and IndexIVF's
// embedded in a IndexPreTransform
void merge_into(faiss::Index *index0, faiss::Index *index1, bool shift_ids) {
FAISS_THROW_IF_NOT (index0->d == index1->d);
faiss::IndexIVF *ivf0 = dynamic_cast<faiss::IndexIVF *>(index0);
faiss::IndexIVF *ivf1 = dynamic_cast<faiss::IndexIVF *>(index1);
if (!ivf0) {
faiss::IndexPreTransform *pt0 = dynamic_cast<faiss::IndexPreTransform *>(index0);
faiss::IndexPreTransform *pt1 = dynamic_cast<faiss::IndexPreTransform *>(index1);
// minimal sanity check
FAISS_THROW_IF_NOT (pt0 && pt1);
FAISS_THROW_IF_NOT (pt0->chain.size() == pt1->chain.size());
for (int i = 0; i < pt0->chain.size(); i++) {
FAISS_THROW_IF_NOT (typeid(pt0->chain[i]) == typeid(pt1->chain[i]));
}
ivf0 = dynamic_cast<faiss::IndexIVF *>(pt0->index);
ivf1 = dynamic_cast<faiss::IndexIVF *>(pt1->index);
}
FAISS_THROW_IF_NOT (ivf0);
FAISS_THROW_IF_NOT (ivf1);
ivf0->merge_from (*ivf1, shift_ids ? ivf0->ntotal : 0);
// useful for IndexPreTransform
index0->ntotal = ivf0->ntotal;
index1->ntotal = ivf1->ntotal;
}
struct Tempfilename { struct Tempfilename {
static pthread_mutex_t mutex; static pthread_mutex_t mutex;
...@@ -122,8 +89,6 @@ struct CommonData { ...@@ -122,8 +89,6 @@ struct CommonData {
CommonData cd; CommonData cd;
/// perform a search on shards, then merge and search again and /// perform a search on shards, then merge and search again and
/// compare results. /// compare results.
int compare_merged (faiss::IndexShards *index_shards, bool shift_ids, int compare_merged (faiss::IndexShards *index_shards, bool shift_ids,
...@@ -142,7 +107,9 @@ int compare_merged (faiss::IndexShards *index_shards, bool shift_ids, ...@@ -142,7 +107,9 @@ int compare_merged (faiss::IndexShards *index_shards, bool shift_ids,
if (standard_merge) { if (standard_merge) {
for (int i = 1; i < nindex; i++) { for (int i = 1; i < nindex; i++) {
merge_into(index_shards->at(0), index_shards->at(i), shift_ids); faiss::ivflib::merge_into(
index_shards->at(0), index_shards->at(i),
shift_ids);
} }
index_shards->sync_with_shard_indexes(); index_shards->sync_with_shard_indexes();
...@@ -275,7 +242,7 @@ TEST(MERGE, merge_flat_ondisk) { ...@@ -275,7 +242,7 @@ TEST(MERGE, merge_flat_ondisk) {
EXPECT_EQ(ndiff, 0); EXPECT_EQ(ndiff, 0);
} }
// non use ondisk specific merge // now use ondisk specific merge
TEST(MERGE, merge_flat_ondisk_2) { TEST(MERGE, merge_flat_ondisk_2) {
faiss::IndexShards index_shards(d, false, false); faiss::IndexShards index_shards(d, false, false);
index_shards.own_fields = true; index_shards.own_fields = true;
......
...@@ -17,118 +17,17 @@ ...@@ -17,118 +17,17 @@
#include <faiss/IndexIVF.h> #include <faiss/IndexIVF.h>
#include <faiss/AutoTune.h> #include <faiss/AutoTune.h>
#include <faiss/VectorTransform.h> #include <faiss/VectorTransform.h>
#include <faiss/IVFlib.h>
namespace { namespace {
typedef faiss::Index::idx_t idx_t; typedef faiss::Index::idx_t idx_t;
/*************************************************************
* The functions to test, that can be useful in FANN
*************************************************************/
/* Returns the cluster the embeddings belong to.
*
* @param index Index, which should be an IVF index
* (otherwise there are no clusters)
* @param embeddings object descriptors for which the centroids should be found,
* size num_objects * d
* @param cebtroid_ids
* cluster id each object belongs to, size num_objects
*/
void Search_centroid(faiss::Index *index,
const float* embeddings, int num_objects,
idx_t* centroid_ids)
{
const float *x = embeddings;
std::unique_ptr<float[]> del;
if (auto index_pre = dynamic_cast<faiss::IndexPreTransform*>(index)) {
x = index_pre->apply_chain(num_objects, x);
del.reset((float*)x);
index = index_pre->index;
}
faiss::IndexIVF* index_ivf = dynamic_cast<faiss::IndexIVF*>(index);
assert(index_ivf);
index_ivf->quantizer->assign(num_objects, x, centroid_ids);
}
/* Returns the cluster the embeddings belong to.
*
* @param index Index, which should be an IVF index
* (otherwise there are no clusters)
* @param query_centroid_ids
* centroid ids corresponding to the query vectors (size n)
* @param result_centroid_ids
* centroid ids corresponding to the results (size n * k)
* other arguments are the same as the standard search function
*/
void search_and_retrun_centroids(faiss::Index *index,
size_t n,
const float* xin,
long k,
float *distances,
idx_t* labels,
idx_t* query_centroid_ids,
idx_t* result_centroid_ids)
{
const float *x = xin;
std::unique_ptr<float []> del;
if (auto index_pre = dynamic_cast<faiss::IndexPreTransform*>(index)) {
x = index_pre->apply_chain(n, x);
del.reset((float*)x);
index = index_pre->index;
}
faiss::IndexIVF* index_ivf = dynamic_cast<faiss::IndexIVF*>(index);
assert(index_ivf);
size_t nprobe = index_ivf->nprobe;
std::vector<idx_t> cent_nos (n * nprobe);
std::vector<float> cent_dis (n * nprobe);
index_ivf->quantizer->search(
n, x, nprobe, cent_dis.data(), cent_nos.data());
if (query_centroid_ids) {
for (size_t i = 0; i < n; i++)
query_centroid_ids[i] = cent_nos[i * nprobe];
}
index_ivf->search_preassigned (n, x, k,
cent_nos.data(), cent_dis.data(),
distances, labels, true);
for (size_t i = 0; i < n * k; i++) {
idx_t label = labels[i];
if (label < 0) {
if (result_centroid_ids)
result_centroid_ids[i] = -1;
} else {
long list_no = label >> 32;
long list_index = label & 0xffffffff;
if (result_centroid_ids)
result_centroid_ids[i] = list_no;
labels[i] = index_ivf->invlists->get_single_id(list_no, list_index);
}
}
}
/************************************************************* /*************************************************************
* Test utils * Test utils
*************************************************************/ *************************************************************/
// return an IndexIVF that may be embedded in an IndexPreTransform
faiss::IndexIVF * get_IndexIVF(faiss::Index *index) {
if (auto index_pre = dynamic_cast<faiss::IndexPreTransform*>(index)) {
index = index_pre->index;
}
faiss::IndexIVF* index_ivf = dynamic_cast<faiss::IndexIVF*>(index);
bool t = index_ivf != nullptr;
assert(index_ivf);
return index_ivf;
}
// dimension of the vectors to index // dimension of the vectors to index
int d = 64; int d = 64;
...@@ -162,7 +61,7 @@ std::unique_ptr<faiss::Index> make_index(const char *index_type, ...@@ -162,7 +61,7 @@ std::unique_ptr<faiss::Index> make_index(const char *index_type,
* Test functions for a given index type * Test functions for a given index type
*************************************************************/ *************************************************************/
bool test_Search_centroid(const char *index_key) { bool test_search_centroid(const char *index_key) {
std::vector<float> xb = make_data(nb); // database vectors std::vector<float> xb = make_data(nb); // database vectors
auto index = make_index(index_key, xb); auto index = make_index(index_key, xb);
...@@ -171,9 +70,11 @@ bool test_Search_centroid(const char *index_key) { ...@@ -171,9 +70,11 @@ bool test_Search_centroid(const char *index_key) {
the inverted list corresponding to its centroid */ the inverted list corresponding to its centroid */
std::vector<idx_t> centroid_ids (nb); std::vector<idx_t> centroid_ids (nb);
Search_centroid(index.get(), xb.data(), nb, centroid_ids.data()); faiss::ivflib::search_centroid(
index.get(), xb.data(), nb, centroid_ids.data());
const faiss::IndexIVF * ivf = get_IndexIVF(index.get()); const faiss::IndexIVF * ivf = faiss::ivflib::extract_index_ivf
(index.get());
for(int i = 0; i < nb; i++) { for(int i = 0; i < nb; i++) {
bool found = false; bool found = false;
...@@ -197,9 +98,11 @@ int test_search_and_return_centroids(const char *index_key) { ...@@ -197,9 +98,11 @@ int test_search_and_return_centroids(const char *index_key) {
auto index = make_index(index_key, xb); auto index = make_index(index_key, xb);
std::vector<idx_t> centroid_ids (nb); std::vector<idx_t> centroid_ids (nb);
Search_centroid(index.get(), xb.data(), nb, centroid_ids.data()); faiss::ivflib::search_centroid(index.get(), xb.data(),
nb, centroid_ids.data());
faiss::IndexIVF * ivf = get_IndexIVF(index.get()); faiss::IndexIVF * ivf =
faiss::ivflib::extract_index_ivf (index.get());
ivf->nprobe = 4; ivf->nprobe = 4;
std::vector<float> xq = make_data(nq); // database vectors std::vector<float> xq = make_data(nq); // database vectors
...@@ -220,7 +123,7 @@ int test_search_and_return_centroids(const char *index_key) { ...@@ -220,7 +123,7 @@ int test_search_and_return_centroids(const char *index_key) {
std::vector<idx_t> query_centroid_ids (nq); std::vector<idx_t> query_centroid_ids (nq);
std::vector<idx_t> result_centroid_ids (nq * k); std::vector<idx_t> result_centroid_ids (nq * k);
search_and_retrun_centroids(index.get(), faiss::ivflib::search_and_return_centroids(index.get(),
nq, xq.data(), k, nq, xq.data(), k,
newD.data(), newI.data(), newD.data(), newI.data(),
query_centroid_ids.data(), query_centroid_ids.data(),
...@@ -264,13 +167,13 @@ int test_search_and_return_centroids(const char *index_key) { ...@@ -264,13 +167,13 @@ int test_search_and_return_centroids(const char *index_key) {
* Test entry points * Test entry points
*************************************************************/ *************************************************************/
TEST(test_Search_centroid, IVFFlat) { TEST(test_search_centroid, IVFFlat) {
bool ok = test_Search_centroid("IVF32,Flat"); bool ok = test_search_centroid("IVF32,Flat");
EXPECT_TRUE(ok); EXPECT_TRUE(ok);
} }
TEST(test_Search_centroid, PCAIVFFlat) { TEST(test_search_centroid, PCAIVFFlat) {
bool ok = test_Search_centroid("PCA16,IVF32,Flat"); bool ok = test_search_centroid("PCA16,IVF32,Flat");
EXPECT_TRUE(ok); EXPECT_TRUE(ok);
} }
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <faiss/IndexIVF.h> #include <faiss/IndexIVF.h>
#include <faiss/IndexBinaryIVF.h> #include <faiss/IndexBinaryIVF.h>
#include <faiss/AutoTune.h> #include <faiss/AutoTune.h>
#include <faiss/IVFlib.h>
using namespace faiss; using namespace faiss;
...@@ -45,19 +46,17 @@ std::vector<float> make_data(size_t n) ...@@ -45,19 +46,17 @@ std::vector<float> make_data(size_t n)
return database; return database;
} }
std::unique_ptr<IndexIVF> make_index(const char *index_type, std::unique_ptr<Index> make_index(const char *index_type,
MetricType metric, MetricType metric,
const std::vector<float> & x) const std::vector<float> & x)
{ {
std::unique_ptr<Index> index(index_factory(d, index_type, metric));
auto index = std::unique_ptr<IndexIVF>
(dynamic_cast<IndexIVF*>(index_factory(d, index_type, metric)));
index->train(nb, x.data()); index->train(nb, x.data());
index->add(nb, x.data()); index->add(nb, x.data());
return index; return index;
} }
std::vector<idx_t> search_index(IndexIVF *index, const float *xq) { std::vector<idx_t> search_index(Index *index, const float *xq) {
int k = 10; int k = 10;
std::vector<idx_t> I(k * nq); std::vector<idx_t> I(k * nq);
std::vector<float> D(k * nq); std::vector<float> D(k * nq);
...@@ -66,19 +65,12 @@ std::vector<idx_t> search_index(IndexIVF *index, const float *xq) { ...@@ -66,19 +65,12 @@ std::vector<idx_t> search_index(IndexIVF *index, const float *xq) {
} }
std::vector<idx_t> search_index_with_params( std::vector<idx_t> search_index_with_params(
IndexIVF *index, const float *xq, IVFSearchParameters *params) { Index *index, const float *xq, IVFSearchParameters *params) {
int k = 10; int k = 10;
std::vector<idx_t> I(k * nq); std::vector<idx_t> I(k * nq);
std::vector<float> D(k * nq); std::vector<float> D(k * nq);
ivflib::search_with_parameters (index, nq, xq, k,
std::vector<idx_t> Iq(params->nprobe * nq); D.data(), I.data(), params);
std::vector<float> Dq(params->nprobe * nq);
index->quantizer->search(nq, xq, params->nprobe,
Dq.data(), Iq.data());
index->search_preassigned(nq, xq, k, Iq.data(), Dq.data(),
D.data(), I.data(),
false, params);
return I; return I;
} }
...@@ -92,14 +84,15 @@ std::vector<idx_t> search_index_with_params( ...@@ -92,14 +84,15 @@ std::vector<idx_t> search_index_with_params(
int test_params_override (const char *index_key, MetricType metric) { int test_params_override (const char *index_key, MetricType metric) {
std::vector<float> xb = make_data(nb); // database vectors std::vector<float> xb = make_data(nb); // database vectors
auto index = make_index(index_key, metric, xb); auto index = make_index(index_key, metric, xb);
index->train(nb, xb.data()); //index->train(nb, xb.data());
index->add(nb, xb.data()); // index->add(nb, xb.data());
std::vector<float> xq = make_data(nq); std::vector<float> xq = make_data(nq);
index->nprobe = 2; ParameterSpace ps;
ps.set_index_parameter(index.get(), "nprobe", 2);
auto res2ref = search_index(index.get(), xq.data()); auto res2ref = search_index(index.get(), xq.data());
index->nprobe = 9; ps.set_index_parameter(index.get(), "nprobe", 9);
auto res9ref = search_index(index.get(), xq.data()); auto res9ref = search_index(index.get(), xq.data());
index->nprobe = 1; ps.set_index_parameter(index.get(), "nprobe", 1);
IVFSearchParameters params; IVFSearchParameters params;
params.max_codes = 0; params.max_codes = 0;
...@@ -146,6 +139,13 @@ TEST(TPO, IVFSQ) { ...@@ -146,6 +139,13 @@ TEST(TPO, IVFSQ) {
EXPECT_EQ(err2, 0); EXPECT_EQ(err2, 0);
} }
TEST(TPO, IVFFlatPP) {
int err1 = test_params_override ("PCA16,IVF32,SQ8", METRIC_L2);
EXPECT_EQ(err1, 0);
int err2 = test_params_override ("PCA16,IVF32,SQ8", METRIC_INNER_PRODUCT);
EXPECT_EQ(err2, 0);
}
/************************************************************* /*************************************************************
......
# Copyright (c) 2015-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the BSD+Patents license found in the
# LICENSE file in the root directory of this source tree.
#! /usr/bin/env python2
"""make sure that the referenced objects are kept"""
import numpy as np
import unittest
import faiss
import sys
import gc
d = 10
xt = np.random.rand(100, d).astype('float32')
xb = np.random.rand(20, d).astype('float32')
class TestReferenced(unittest.TestCase):
def test_IndexIVF(self):
quantizer = faiss.IndexFlatL2(d)
index = faiss.IndexIVFFlat(quantizer, d, 10)
index.train(xt)
index.add(xb)
del quantizer
gc.collect()
index.add(xb)
def test_count_refs(self):
quantizer = faiss.IndexFlatL2(d)
index = faiss.IndexIVFFlat(quantizer, d, 10)
refc1 = sys.getrefcount(quantizer)
del index
gc.collect()
refc2 = sys.getrefcount(quantizer)
assert refc2 == refc1 - 1
def test_IndexIVF_2(self):
index = faiss.IndexIVFFlat(faiss.IndexFlatL2(d), d, 10)
index.train(xt)
index.add(xb)
def test_IndexPreTransform(self):
ltrans = faiss.NormalizationTransform(d)
sub_index = faiss.IndexFlatL2(d)
index = faiss.IndexPreTransform(ltrans, sub_index)
index.add(xb)
del ltrans
gc.collect()
index.add(xb)
del sub_index
gc.collect()
index.add(xb)
def test_IndexPreTransform_2(self):
sub_index = faiss.IndexFlatL2(d)
index = faiss.IndexPreTransform(sub_index)
ltrans = faiss.NormalizationTransform(d)
index.prepend_transform(ltrans)
index.add(xb)
del ltrans
gc.collect()
index.add(xb)
del sub_index
gc.collect()
index.add(xb)
def test_IDMap(self):
sub_index = faiss.IndexFlatL2(d)
index = faiss.IndexIDMap(sub_index)
index.add_with_ids(xb, np.arange(len(xb)))
del sub_index
gc.collect()
index.add_with_ids(xb, np.arange(len(xb)))
def test_shards(self):
index = faiss.IndexShards(d)
for i in range(3):
sub_index = faiss.IndexFlatL2(d)
sub_index.add(xb)
index.add_shard(sub_index)
gc.collect()
index.search(xb, 10)
if __name__ == '__main__':
unittest.main()
/**
* Copyright (c) 2015-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD+Patents license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <cstdio>
#include <cstdlib>
#include <memory>
#include <vector>
#include <gtest/gtest.h>
#include <faiss/IndexIVF.h>
#include <faiss/AutoTune.h>
#include <faiss/index_io.h>
#include <faiss/IVFlib.h>
using namespace faiss;
namespace {
typedef Index::idx_t idx_t;
// dimension of the vectors to index
int d = 32;
// nb of training vectors
size_t nt = 5000;
// size of the database points per window step
size_t nb = 1000;
// nb of queries
size_t nq = 200;
int total_size = 40;
int window_size = 10;
std::vector<float> make_data(size_t n)
{
std::vector <float> database (n * d);
for (size_t i = 0; i < n * d; i++) {
database[i] = drand48();
}
return database;
}
std::unique_ptr<Index> make_trained_index(const char *index_type)
{
auto index = std::unique_ptr<Index>(index_factory(d, index_type));
auto xt = make_data(nt * d);
index->train(nt, xt.data());
ParameterSpace().set_index_parameter (index.get(), "nprobe", 4);
return index;
}
std::vector<idx_t> search_index(Index *index, const float *xq) {
int k = 10;
std::vector<idx_t> I(k * nq);
std::vector<float> D(k * nq);
index->search (nq, xq, k, D.data(), I.data());
return I;
}
/*************************************************************
* Test functions for a given index type
*************************************************************/
// make a few slices of indexes that can be merged
void make_index_slices (const Index* trained_index,
std::vector<std::unique_ptr<Index> > & sub_indexes) {
for (int i = 0; i < total_size; i++) {
sub_indexes.emplace_back (clone_index (trained_index));
printf ("preparing sub-index # %d\n", i);
Index * index = sub_indexes.back().get();
auto xb = make_data(nb * d);
std::vector<long> ids (nb);
for (int j = 0; j < nb; j++) {
ids[j] = lrand48();
}
index->add_with_ids (nb, xb.data(), ids.data());
}
}
// build merged index explicitly at sliding window position i
Index *make_merged_index(
const Index* trained_index,
const std::vector<std::unique_ptr<Index> > & sub_indexes,
int i) {
Index * merged_index = clone_index (trained_index);
for (int j = i - window_size + 1; j <= i; j++) {
if (j < 0 || j >= total_size) continue;
std::unique_ptr<Index> sub_index (
clone_index (sub_indexes[j].get()));
IndexIVF *ivf0 = ivflib::extract_index_ivf (merged_index);
IndexIVF *ivf1 = ivflib::extract_index_ivf (sub_index.get());
ivf0->merge_from (*ivf1, 0);
merged_index->ntotal = ivf0->ntotal;
}
return merged_index;
}
int test_sliding_window (const char *index_key) {
std::unique_ptr<Index> trained_index = make_trained_index(index_key);
// make the index slices
std::vector<std::unique_ptr<Index> > sub_indexes;
make_index_slices (trained_index.get(), sub_indexes);
// now slide over the windows
std::unique_ptr<Index> index (clone_index (trained_index.get()));
ivflib::SlidingIndexWindow window (index.get());
auto xq = make_data (nq * d);
for (int i = 0; i < total_size + window_size; i++) {
printf ("doing step %d / %d\n", i, total_size + window_size);
// update the index
window.step (i < total_size ? sub_indexes[i].get() : nullptr,
i >= window_size);
printf (" current n_slice = %d\n", window.n_slice);
auto new_res = search_index (index.get(), xq.data());
std::unique_ptr<Index> merged_index (
make_merged_index (trained_index.get(), sub_indexes, i));
auto ref_res = search_index (merged_index.get(), xq.data ());
EXPECT_EQ (ref_res.size(), new_res.size());
EXPECT_EQ (ref_res, new_res);
}
return 0;
}
int test_sliding_invlists (const char *index_key) {
std::unique_ptr<Index> trained_index = make_trained_index(index_key);
// make the index slices
std::vector<std::unique_ptr<Index> > sub_indexes;
make_index_slices (trained_index.get(), sub_indexes);
// now slide over the windows
std::unique_ptr<Index> index (clone_index (trained_index.get()));
IndexIVF * index_ivf = ivflib::extract_index_ivf (index.get());
auto xq = make_data (nq * d);
for (int i = 0; i < total_size + window_size; i++) {
printf ("doing step %d / %d\n", i, total_size + window_size);
// update the index
std::vector<const InvertedLists*> ils;
for (int j = i - window_size + 1; j <= i; j++) {
if (j < 0 || j >= total_size) continue;
ils.push_back (ivflib::extract_index_ivf (
sub_indexes[j].get())->invlists);
}
if (ils.size() == 0) continue;
ConcatenatedInvertedLists *ci =
new ConcatenatedInvertedLists (ils.size(), ils.data());
// will be deleted by the index
index_ivf->replace_invlists (ci, true);
printf (" nb invlists = %ld\n", ils.size());
auto new_res = search_index (index.get(), xq.data());
std::unique_ptr<Index> merged_index (
make_merged_index (trained_index.get(), sub_indexes, i));
auto ref_res = search_index (merged_index.get(), xq.data ());
EXPECT_EQ (ref_res.size(), new_res.size());
size_t ndiff = 0;
for (size_t j = 0; j < ref_res.size(); j++) {
if (ref_res[j] != new_res[j])
ndiff++;
}
printf(" nb differences: %ld / %ld\n",
ndiff, ref_res.size());
EXPECT_EQ (ref_res, new_res);
}
return 0;
}
} // namespace
/*************************************************************
* Test entry points
*************************************************************/
TEST(SlidingWindow, IVFFlat) {
test_sliding_window ("IVF32,Flat");
}
TEST(SlidingWindow, PCAIVFFlat) {
test_sliding_window ("PCA24,IVF32,Flat");
}
TEST(SlidingInvlists, IVFFlat) {
test_sliding_invlists ("IVF32,Flat");
}
TEST(SlidingInvlists, PCAIVFFlat) {
test_sliding_invlists ("PCA24,IVF32,Flat");
}
/**
* Copyright (c) 2015-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD+Patents license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <memory>
#include <cstdio>
#include <cstdlib>
#include <gtest/gtest.h>
#include <faiss/IndexIVFFlat.h>
#include <faiss/index_io.h>
#include <faiss/AuxIndexStructures.h>
#include <faiss/AutoTune.h>
#include <faiss/VectorTransform.h>
#include <faiss/utils.h>
#include <faiss/IVFlib.h>
using namespace faiss;
namespace {
// parameters to use for the test
int d = 64;
size_t nb = 1000;
size_t nq = 100;
size_t nt = 500;
int k = 10;
int nlist = 40;
typedef faiss::Index::idx_t idx_t;
std::vector<float> get_data (size_t nb, int seed) {
std::vector<float> x (nb * d);
float_randn (x.data(), nb * d, seed);
return x;
}
void test_index_type(const char *factory_string) {
// transfer inverted lists in nslice slices
int nslice = 3;
/****************************************************************
* trained reference index
****************************************************************/
std::unique_ptr<Index> trained (index_factory (d, factory_string));
{
auto xt = get_data (nt, 123);
trained->train (nt, xt.data());
}
// sample nq query vectors to check if results are the same
auto xq = get_data (nq, 818);
/****************************************************************
* source index
***************************************************************/
std::unique_ptr<Index> src_index (clone_index (trained.get()));
{ // add some data to source index
auto xb = get_data (nb, 245);
src_index->add (nb, xb.data());
}
ParameterSpace().set_index_parameter (src_index.get(), "nprobe", 4);
// remember reference search result on source index
std::vector<idx_t> Iref (nq * k);
std::vector<float> Dref (nq * k);
src_index->search (nq, xq.data(), k, Dref.data(), Iref.data());
/****************************************************************
* destination index -- should be replaced by source index
***************************************************************/
std::unique_ptr<Index> dst_index (clone_index (trained.get()));
{ // initial state: filled in with some garbage
int nb2 = nb + 10;
auto xb = get_data (nb2, 366);
dst_index->add (nb2, xb.data());
}
std::vector<idx_t> Inew (nq * k);
std::vector<float> Dnew (nq * k);
ParameterSpace().set_index_parameter (dst_index.get(), "nprobe", 4);
// transfer from source to destination in nslice slices
for (int sl = 0; sl < nslice; sl++) {
// so far, the indexes are different
dst_index->search (nq, xq.data(), k, Dnew.data(), Inew.data());
EXPECT_TRUE (Iref != Inew);
EXPECT_TRUE (Dref != Dnew);
// range of inverted list indices to transfer
long i0 = sl * nlist / nslice;
long i1 = (sl + 1) * nlist / nslice;
std::vector<uint8_t> data_to_transfer;
{
std::unique_ptr<ArrayInvertedLists> il
(ivflib::get_invlist_range (src_index.get(), i0, i1));
// serialize inverted lists
VectorIOWriter wr;
write_InvertedLists (il.get(), &wr);
data_to_transfer.swap (wr.data);
}
// transfer data here from source machine to dest machine
{
VectorIOReader reader;
reader.data.swap (data_to_transfer);
// deserialize inverted lists
std::unique_ptr<ArrayInvertedLists> il
(dynamic_cast<ArrayInvertedLists *>
(read_InvertedLists (&reader)));
// swap inverted lists. Block searches here!
{
ivflib::set_invlist_range (dst_index.get(), i0, i1, il.get());
}
}
}
EXPECT_EQ (dst_index->ntotal, src_index->ntotal);
// now, the indexes are the same
dst_index->search (nq, xq.data(), k, Dnew.data(), Inew.data());
EXPECT_TRUE (Iref == Inew);
EXPECT_TRUE (Dref == Dnew);
}
} // namespace
TEST(TRANS, IVFFlat) {
test_index_type ("IVF40,Flat");
}
TEST(TRANS, IVFFlatPreproc) {
test_index_type ("PCAR32,IVF40,Flat");
}
...@@ -15,9 +15,6 @@ ...@@ -15,9 +15,6 @@
#include <cstring> #include <cstring>
#include <cmath> #include <cmath>
#include <immintrin.h>
#include <sys/time.h> #include <sys/time.h>
#include <sys/types.h> #include <sys/types.h>
#include <unistd.h> #include <unistd.h>
...@@ -66,10 +63,6 @@ int sorgqr_(FINTEGER *m, FINTEGER *n, FINTEGER *k, float *a, ...@@ -66,10 +63,6 @@ int sorgqr_(FINTEGER *m, FINTEGER *n, FINTEGER *k, float *a,
namespace faiss { namespace faiss {
#ifdef __AVX__
#define USE_AVX
#endif
double getmillisecs () { double getmillisecs () {
struct timeval tv; struct timeval tv;
gettimeofday (&tv, nullptr); gettimeofday (&tv, nullptr);
...@@ -112,109 +105,32 @@ size_t get_mem_usage_kb () ...@@ -112,109 +105,32 @@ size_t get_mem_usage_kb ()
* Random data generation functions * Random data generation functions
**************************************************/ **************************************************/
/**
* The definition of random functions depends on the architecture:
*
* - for Linux, we rely on re-entrant functions (random_r). This
* provides good quality reproducible random sequences.
*
* - for Apple, we use rand_r. Apple is trying so hard to deprecate
* this function that it removed its definition form stdlib.h, so we
* re-declare it below. Fortunately, since it is deprecated, its
* prototype should not change much in the forerseeable future.
*
* Unfortunately, system designers are more concerned with making the
* most unpredictable random sequences for cryptographic use, when in
* scientific contexts what acutally matters is having reproducible
* squences in multi-threaded contexts.
*/
#ifdef __linux__
int RandomGenerator::rand_int ()
{
int32_t a;
random_r (&rand_data, &a);
return a;
}
long RandomGenerator::rand_long ()
{
int32_t a, b;
random_r (&rand_data, &a);
random_r (&rand_data, &b);
return long(a) | long(b) << 31;
}
RandomGenerator::RandomGenerator (long seed)
{
memset (&rand_data, 0, sizeof (rand_data));
initstate_r (seed, rand_state, sizeof (rand_state), &rand_data);
}
RandomGenerator::RandomGenerator (const RandomGenerator & other)
{
memcpy (rand_state, other.rand_state, sizeof(rand_state));
rand_data = other.rand_data;
setstate_r (rand_state, &rand_data);
}
#elif __APPLE__
extern "C" {
int rand_r(unsigned *seed);
}
RandomGenerator::RandomGenerator (long seed) RandomGenerator::RandomGenerator (long seed)
{ : mt((unsigned int)seed) {}
rand_state = seed;
}
RandomGenerator::RandomGenerator (const RandomGenerator & other)
{
rand_state = other.rand_state;
}
int RandomGenerator::rand_int () int RandomGenerator::rand_int ()
{ {
// RAND_MAX is 31 bits return mt() & 0x7fffffff;
// try to add more randomness in the lower bits
int lowbits = rand_r(&rand_state) >> 15;
return rand_r(&rand_state) ^ lowbits;
} }
long RandomGenerator::rand_long () long RandomGenerator::rand_long ()
{ {
return long(random()) | long(random()) << 31; return long(rand_int()) | long(rand_int()) << 31;
} }
#endif
int RandomGenerator::rand_int (int max) int RandomGenerator::rand_int (int max)
{ // this suffers form non-uniform probabilities when max is not a {
// power of 2, but if RAND_MAX >> max the bias is limited. return mt() % max;
return rand_int () % max;
} }
float RandomGenerator::rand_float () float RandomGenerator::rand_float ()
{ {
return rand_int() / float(1L << 31); return mt() / float(mt.max());
} }
double RandomGenerator::rand_double () double RandomGenerator::rand_double ()
{ {
return rand_long() / double(1L << 62); return mt() / double(mt.max());
} }
...@@ -393,260 +309,6 @@ void reflection_ref (const float * u, float * x, size_t n, size_t d, size_t nu) ...@@ -393,260 +309,6 @@ void reflection_ref (const float * u, float * x, size_t n, size_t d, size_t nu)
} }
} }
/*********************************************************
* Optimized distance computations
*********************************************************/
/* Functions to compute:
- L2 distance between 2 vectors
- inner product between 2 vectors
- L2 norm of a vector
The functions should probably not be invoked when a large number of
vectors are be processed in batch (in which case Matrix multiply
is faster), but may be useful for comparing vectors isolated in
memory.
Works with any vectors of any dimension, even unaligned (in which
case they are slower).
*/
/*********************************************************
* Reference implementations
*/
/* same without SSE */
float fvec_L2sqr_ref (const float * x,
const float * y,
size_t d)
{
size_t i;
float res = 0;
for (i = 0; i < d; i++) {
const float tmp = x[i] - y[i];
res += tmp * tmp;
}
return res;
}
float fvec_inner_product_ref (const float * x,
const float * y,
size_t d)
{
size_t i;
float res = 0;
for (i = 0; i < d; i++)
res += x[i] * y[i];
return res;
}
float fvec_norm_L2sqr_ref (const float * __restrict x,
size_t d)
{
size_t i;
double res = 0;
for (i = 0; i < d; i++)
res += x[i] * x[i];
return res;
}
/*********************************************************
* SSE and AVX implementations
*/
// reads 0 <= d < 4 floats as __m128
static inline __m128 masked_read (int d, const float *x)
{
assert (0 <= d && d < 4);
__attribute__((__aligned__(16))) float buf[4] = {0, 0, 0, 0};
switch (d) {
case 3:
buf[2] = x[2];
case 2:
buf[1] = x[1];
case 1:
buf[0] = x[0];
}
return _mm_load_ps (buf);
// cannot use AVX2 _mm_mask_set1_epi32
}
#ifdef USE_AVX
// reads 0 <= d < 8 floats as __m256
static inline __m256 masked_read_8 (int d, const float *x)
{
assert (0 <= d && d < 8);
if (d < 4) {
__m256 res = _mm256_setzero_ps ();
res = _mm256_insertf128_ps (res, masked_read (d, x), 0);
return res;
} else {
__m256 res = _mm256_setzero_ps ();
res = _mm256_insertf128_ps (res, _mm_loadu_ps (x), 0);
res = _mm256_insertf128_ps (res, masked_read (d - 4, x + 4), 1);
return res;
}
}
float fvec_inner_product (const float * x,
const float * y,
size_t d)
{
__m256 msum1 = _mm256_setzero_ps();
while (d >= 8) {
__m256 mx = _mm256_loadu_ps (x); x += 8;
__m256 my = _mm256_loadu_ps (y); y += 8;
msum1 = _mm256_add_ps (msum1, _mm256_mul_ps (mx, my));
d -= 8;
}
__m128 msum2 = _mm256_extractf128_ps(msum1, 1);
msum2 += _mm256_extractf128_ps(msum1, 0);
if (d >= 4) {
__m128 mx = _mm_loadu_ps (x); x += 4;
__m128 my = _mm_loadu_ps (y); y += 4;
msum2 = _mm_add_ps (msum2, _mm_mul_ps (mx, my));
d -= 4;
}
if (d > 0) {
__m128 mx = masked_read (d, x);
__m128 my = masked_read (d, y);
msum2 = _mm_add_ps (msum2, _mm_mul_ps (mx, my));
}
msum2 = _mm_hadd_ps (msum2, msum2);
msum2 = _mm_hadd_ps (msum2, msum2);
return _mm_cvtss_f32 (msum2);
}
float fvec_L2sqr (const float * x,
const float * y,
size_t d)
{
__m256 msum1 = _mm256_setzero_ps();
while (d >= 8) {
__m256 mx = _mm256_loadu_ps (x); x += 8;
__m256 my = _mm256_loadu_ps (y); y += 8;
const __m256 a_m_b1 = mx - my;
msum1 += a_m_b1 * a_m_b1;
d -= 8;
}
__m128 msum2 = _mm256_extractf128_ps(msum1, 1);
msum2 += _mm256_extractf128_ps(msum1, 0);
if (d >= 4) {
__m128 mx = _mm_loadu_ps (x); x += 4;
__m128 my = _mm_loadu_ps (y); y += 4;
const __m128 a_m_b1 = mx - my;
msum2 += a_m_b1 * a_m_b1;
d -= 4;
}
if (d > 0) {
__m128 mx = masked_read (d, x);
__m128 my = masked_read (d, y);
__m128 a_m_b1 = mx - my;
msum2 += a_m_b1 * a_m_b1;
}
msum2 = _mm_hadd_ps (msum2, msum2);
msum2 = _mm_hadd_ps (msum2, msum2);
return _mm_cvtss_f32 (msum2);
}
#else
/* SSE-implementation of L2 distance */
float fvec_L2sqr (const float * x,
const float * y,
size_t d)
{
__m128 msum1 = _mm_setzero_ps();
while (d >= 4) {
__m128 mx = _mm_loadu_ps (x); x += 4;
__m128 my = _mm_loadu_ps (y); y += 4;
const __m128 a_m_b1 = mx - my;
msum1 += a_m_b1 * a_m_b1;
d -= 4;
}
if (d > 0) {
// add the last 1, 2 or 3 values
__m128 mx = masked_read (d, x);
__m128 my = masked_read (d, y);
__m128 a_m_b1 = mx - my;
msum1 += a_m_b1 * a_m_b1;
}
msum1 = _mm_hadd_ps (msum1, msum1);
msum1 = _mm_hadd_ps (msum1, msum1);
return _mm_cvtss_f32 (msum1);
}
float fvec_inner_product (const float * x,
const float * y,
size_t d)
{
__m128 mx, my;
__m128 msum1 = _mm_setzero_ps();
while (d >= 4) {
mx = _mm_loadu_ps (x); x += 4;
my = _mm_loadu_ps (y); y += 4;
msum1 = _mm_add_ps (msum1, _mm_mul_ps (mx, my));
d -= 4;
}
// add the last 1, 2, or 3 values
mx = masked_read (d, x);
my = masked_read (d, y);
__m128 prod = _mm_mul_ps (mx, my);
msum1 = _mm_add_ps (msum1, prod);
msum1 = _mm_hadd_ps (msum1, msum1);
msum1 = _mm_hadd_ps (msum1, msum1);
return _mm_cvtss_f32 (msum1);
}
#endif
float fvec_norm_L2sqr (const float * x,
size_t d)
{
__m128 mx;
__m128 msum1 = _mm_setzero_ps();
while (d >= 4) {
mx = _mm_loadu_ps (x); x += 4;
msum1 = _mm_add_ps (msum1, _mm_mul_ps (mx, mx));
d -= 4;
}
mx = masked_read (d, x);
msum1 = _mm_add_ps (msum1, _mm_mul_ps (mx, mx));
msum1 = _mm_hadd_ps (msum1, msum1);
msum1 = _mm_hadd_ps (msum1, msum1);
return _mm_cvtss_f32 (msum1);
}
...@@ -1857,118 +1519,6 @@ void fvec_argsort_parallel (size_t n, const float *vals, ...@@ -1857,118 +1519,6 @@ void fvec_argsort_parallel (size_t n, const float *vals,
/***************************************************************************
* heavily optimized table computations
***************************************************************************/
static inline void fvec_madd_ref (size_t n, const float *a,
float bf, const float *b, float *c) {
for (size_t i = 0; i < n; i++)
c[i] = a[i] + bf * b[i];
}
static inline void fvec_madd_sse (size_t n, const float *a,
float bf, const float *b, float *c) {
n >>= 2;
__m128 bf4 = _mm_set_ps1 (bf);
__m128 * a4 = (__m128*)a;
__m128 * b4 = (__m128*)b;
__m128 * c4 = (__m128*)c;
while (n--) {
*c4 = _mm_add_ps (*a4, _mm_mul_ps (bf4, *b4));
b4++;
a4++;
c4++;
}
}
void fvec_madd (size_t n, const float *a,
float bf, const float *b, float *c)
{
if ((n & 3) == 0 &&
((((long)a) | ((long)b) | ((long)c)) & 15) == 0)
fvec_madd_sse (n, a, bf, b, c);
else
fvec_madd_ref (n, a, bf, b, c);
}
static inline int fvec_madd_and_argmin_ref (size_t n, const float *a,
float bf, const float *b, float *c) {
float vmin = 1e20;
int imin = -1;
for (size_t i = 0; i < n; i++) {
c[i] = a[i] + bf * b[i];
if (c[i] < vmin) {
vmin = c[i];
imin = i;
}
}
return imin;
}
static inline int fvec_madd_and_argmin_sse (size_t n, const float *a,
float bf, const float *b, float *c) {
n >>= 2;
__m128 bf4 = _mm_set_ps1 (bf);
__m128 vmin4 = _mm_set_ps1 (1e20);
__m128i imin4 = _mm_set1_epi32 (-1);
__m128i idx4 = _mm_set_epi32 (3, 2, 1, 0);
__m128i inc4 = _mm_set1_epi32 (4);
__m128 * a4 = (__m128*)a;
__m128 * b4 = (__m128*)b;
__m128 * c4 = (__m128*)c;
while (n--) {
__m128 vc4 = _mm_add_ps (*a4, _mm_mul_ps (bf4, *b4));
*c4 = vc4;
__m128i mask = (__m128i)_mm_cmpgt_ps (vmin4, vc4);
// imin4 = _mm_blendv_epi8 (imin4, idx4, mask); // slower!
imin4 = _mm_or_si128 (_mm_and_si128 (mask, idx4),
_mm_andnot_si128 (mask, imin4));
vmin4 = _mm_min_ps (vmin4, vc4);
b4++;
a4++;
c4++;
idx4 = _mm_add_epi32 (idx4, inc4);
}
// 4 values -> 2
{
idx4 = _mm_shuffle_epi32 (imin4, 3 << 2 | 2);
__m128 vc4 = _mm_shuffle_ps (vmin4, vmin4, 3 << 2 | 2);
__m128i mask = (__m128i)_mm_cmpgt_ps (vmin4, vc4);
imin4 = _mm_or_si128 (_mm_and_si128 (mask, idx4),
_mm_andnot_si128 (mask, imin4));
vmin4 = _mm_min_ps (vmin4, vc4);
}
// 2 values -> 1
{
idx4 = _mm_shuffle_epi32 (imin4, 1);
__m128 vc4 = _mm_shuffle_ps (vmin4, vmin4, 1);
__m128i mask = (__m128i)_mm_cmpgt_ps (vmin4, vc4);
imin4 = _mm_or_si128 (_mm_and_si128 (mask, idx4),
_mm_andnot_si128 (mask, imin4));
// vmin4 = _mm_min_ps (vmin4, vc4);
}
return _mm_extract_epi32 (imin4, 0);
}
int fvec_madd_and_argmin (size_t n, const float *a,
float bf, const float *b, float *c)
{
if ((n & 3) == 0 &&
((((long)a) | ((long)b) | ((long)c)) & 15) == 0)
return fvec_madd_and_argmin_sse (n, a, bf, b, c);
else
return fvec_madd_and_argmin_ref (n, a, bf, b, c);
}
const float *fvecs_maybe_subsample ( const float *fvecs_maybe_subsample (
...@@ -1995,4 +1545,23 @@ const float *fvecs_maybe_subsample ( ...@@ -1995,4 +1545,23 @@ const float *fvecs_maybe_subsample (
} }
void binary_to_real(int d, const uint8_t *x_in, float *x_out) {
for (int i = 0; i < d; ++i) {
x_out[i] = 2 * ((x_in[i / 8] & (1 << (i % 8))) != 0) - 1;
}
}
void real_to_binary(int d, const float *x_in, uint8_t *x_out) {
for (int i = 0; i < d / 8; ++i) {
uint8_t b = 0;
for (int j = 0; j < 8; ++j) {
if (x_in[8 * i + j] > 0) {
b |= (1 << j);
}
}
x_out[i] = b;
}
}
} // namespace faiss } // namespace faiss
...@@ -18,9 +18,9 @@ ...@@ -18,9 +18,9 @@
#ifndef FAISS_utils_h #ifndef FAISS_utils_h
#define FAISS_utils_h #define FAISS_utils_h
#include <random>
#include <stdint.h> #include <stdint.h>
// for the random data struct
#include <cstdlib>
#include "Heap.h" #include "Heap.h"
...@@ -47,34 +47,23 @@ size_t get_mem_usage_kb (); ...@@ -47,34 +47,23 @@ size_t get_mem_usage_kb ();
/// random generator that can be used in multithreaded contexts /// random generator that can be used in multithreaded contexts
struct RandomGenerator { struct RandomGenerator {
#ifdef __linux__ std::mt19937 mt;
char rand_state [8];
struct random_data rand_data;
#elif __APPLE__
unsigned rand_state;
#endif
/// random 31-bit positive integer /// random positive integer
int rand_int (); int rand_int ();
/// random long < 2 ^ 62 /// random long
long rand_long (); long rand_long ();
/// generate random number between 0 and max-1 /// generate random integer between 0 and max-1
int rand_int (int max); int rand_int (int max);
/// between 0 and 1 /// between 0 and 1
float rand_float (); float rand_float ();
double rand_double (); double rand_double ();
/// initialize
explicit RandomGenerator (long seed = 1234); explicit RandomGenerator (long seed = 1234);
/// default copy constructor messes up pointer in rand_data
RandomGenerator (const RandomGenerator & other);
}; };
/* Generate an array of uniform random floats / multi-threaded implementation */ /* Generate an array of uniform random floats / multi-threaded implementation */
...@@ -389,6 +378,24 @@ const float *fvecs_maybe_subsample ( ...@@ -389,6 +378,24 @@ const float *fvecs_maybe_subsample (
size_t d, size_t *n, size_t nmax, const float *x, size_t d, size_t *n, size_t nmax, const float *x,
bool verbose = false, long seed = 1234); bool verbose = false, long seed = 1234);
/** Convert binary vector to +1/-1 valued float vector.
*
* @param d dimension of the vector
* @param x_in input binary vector (uint8_t table of size d / 8)
* @param x_out output float vector (float table of size d)
*/
void binary_to_real(int d, const uint8_t *x_in, float *x_out);
/** Convert float vector to binary vector. Components > 0 are converted to 1,
* others to 0.
*
* @param d dimension of the vector
* @param x_in input float vector (float table of size d)
* @param x_out output binary vector (uint8_t table of size d / 8)
*/
void real_to_binary(int d, const float *x_in, uint8_t *x_out);
} // namspace faiss } // namspace faiss
......
/**
* Copyright (c) 2015-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD+Patents license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include "utils.h"
#include <cstdio>
#include <cassert>
#include <cstring>
#include <cmath>
#ifdef __SSE__
#include <immintrin.h>
#endif
#ifdef __aarch64__
#include <arm_neon.h>
#endif
#include <omp.h>
/**************************************************
* Get some stats about the system
**************************************************/
namespace faiss {
#ifdef __AVX__
#define USE_AVX
#endif
/*********************************************************
* Optimized distance computations
*********************************************************/
/* Functions to compute:
- L2 distance between 2 vectors
- inner product between 2 vectors
- L2 norm of a vector
The functions should probably not be invoked when a large number of
vectors are be processed in batch (in which case Matrix multiply
is faster), but may be useful for comparing vectors isolated in
memory.
Works with any vectors of any dimension, even unaligned (in which
case they are slower).
*/
/*********************************************************
* Reference implementations
*/
/* same without SSE */
float fvec_L2sqr_ref (const float * x,
const float * y,
size_t d)
{
size_t i;
float res = 0;
for (i = 0; i < d; i++) {
const float tmp = x[i] - y[i];
res += tmp * tmp;
}
return res;
}
float fvec_inner_product_ref (const float * x,
const float * y,
size_t d)
{
size_t i;
float res = 0;
for (i = 0; i < d; i++)
res += x[i] * y[i];
return res;
}
float fvec_norm_L2sqr_ref (const float *x, size_t d)
{
size_t i;
double res = 0;
for (i = 0; i < d; i++)
res += x[i] * x[i];
return res;
}
/*********************************************************
* SSE and AVX implementations
*/
#ifdef __SSE__
// reads 0 <= d < 4 floats as __m128
static inline __m128 masked_read (int d, const float *x)
{
assert (0 <= d && d < 4);
__attribute__((__aligned__(16))) float buf[4] = {0, 0, 0, 0};
switch (d) {
case 3:
buf[2] = x[2];
case 2:
buf[1] = x[1];
case 1:
buf[0] = x[0];
}
return _mm_load_ps (buf);
// cannot use AVX2 _mm_mask_set1_epi32
}
float fvec_norm_L2sqr (const float * x,
size_t d)
{
__m128 mx;
__m128 msum1 = _mm_setzero_ps();
while (d >= 4) {
mx = _mm_loadu_ps (x); x += 4;
msum1 = _mm_add_ps (msum1, _mm_mul_ps (mx, mx));
d -= 4;
}
mx = masked_read (d, x);
msum1 = _mm_add_ps (msum1, _mm_mul_ps (mx, mx));
msum1 = _mm_hadd_ps (msum1, msum1);
msum1 = _mm_hadd_ps (msum1, msum1);
return _mm_cvtss_f32 (msum1);
}
#endif
#ifdef USE_AVX
// reads 0 <= d < 8 floats as __m256
static inline __m256 masked_read_8 (int d, const float *x)
{
assert (0 <= d && d < 8);
if (d < 4) {
__m256 res = _mm256_setzero_ps ();
res = _mm256_insertf128_ps (res, masked_read (d, x), 0);
return res;
} else {
__m256 res = _mm256_setzero_ps ();
res = _mm256_insertf128_ps (res, _mm_loadu_ps (x), 0);
res = _mm256_insertf128_ps (res, masked_read (d - 4, x + 4), 1);
return res;
}
}
float fvec_inner_product (const float * x,
const float * y,
size_t d)
{
__m256 msum1 = _mm256_setzero_ps();
while (d >= 8) {
__m256 mx = _mm256_loadu_ps (x); x += 8;
__m256 my = _mm256_loadu_ps (y); y += 8;
msum1 = _mm256_add_ps (msum1, _mm256_mul_ps (mx, my));
d -= 8;
}
__m128 msum2 = _mm256_extractf128_ps(msum1, 1);
msum2 += _mm256_extractf128_ps(msum1, 0);
if (d >= 4) {
__m128 mx = _mm_loadu_ps (x); x += 4;
__m128 my = _mm_loadu_ps (y); y += 4;
msum2 = _mm_add_ps (msum2, _mm_mul_ps (mx, my));
d -= 4;
}
if (d > 0) {
__m128 mx = masked_read (d, x);
__m128 my = masked_read (d, y);
msum2 = _mm_add_ps (msum2, _mm_mul_ps (mx, my));
}
msum2 = _mm_hadd_ps (msum2, msum2);
msum2 = _mm_hadd_ps (msum2, msum2);
return _mm_cvtss_f32 (msum2);
}
float fvec_L2sqr (const float * x,
const float * y,
size_t d)
{
__m256 msum1 = _mm256_setzero_ps();
while (d >= 8) {
__m256 mx = _mm256_loadu_ps (x); x += 8;
__m256 my = _mm256_loadu_ps (y); y += 8;
const __m256 a_m_b1 = mx - my;
msum1 += a_m_b1 * a_m_b1;
d -= 8;
}
__m128 msum2 = _mm256_extractf128_ps(msum1, 1);
msum2 += _mm256_extractf128_ps(msum1, 0);
if (d >= 4) {
__m128 mx = _mm_loadu_ps (x); x += 4;
__m128 my = _mm_loadu_ps (y); y += 4;
const __m128 a_m_b1 = mx - my;
msum2 += a_m_b1 * a_m_b1;
d -= 4;
}
if (d > 0) {
__m128 mx = masked_read (d, x);
__m128 my = masked_read (d, y);
__m128 a_m_b1 = mx - my;
msum2 += a_m_b1 * a_m_b1;
}
msum2 = _mm_hadd_ps (msum2, msum2);
msum2 = _mm_hadd_ps (msum2, msum2);
return _mm_cvtss_f32 (msum2);
}
#elif defined(__SSE__)
/* SSE-implementation of L2 distance */
float fvec_L2sqr (const float * x,
const float * y,
size_t d)
{
__m128 msum1 = _mm_setzero_ps();
while (d >= 4) {
__m128 mx = _mm_loadu_ps (x); x += 4;
__m128 my = _mm_loadu_ps (y); y += 4;
const __m128 a_m_b1 = mx - my;
msum1 += a_m_b1 * a_m_b1;
d -= 4;
}
if (d > 0) {
// add the last 1, 2 or 3 values
__m128 mx = masked_read (d, x);
__m128 my = masked_read (d, y);
__m128 a_m_b1 = mx - my;
msum1 += a_m_b1 * a_m_b1;
}
msum1 = _mm_hadd_ps (msum1, msum1);
msum1 = _mm_hadd_ps (msum1, msum1);
return _mm_cvtss_f32 (msum1);
}
float fvec_inner_product (const float * x,
const float * y,
size_t d)
{
__m128 mx, my;
__m128 msum1 = _mm_setzero_ps();
while (d >= 4) {
mx = _mm_loadu_ps (x); x += 4;
my = _mm_loadu_ps (y); y += 4;
msum1 = _mm_add_ps (msum1, _mm_mul_ps (mx, my));
d -= 4;
}
// add the last 1, 2, or 3 values
mx = masked_read (d, x);
my = masked_read (d, y);
__m128 prod = _mm_mul_ps (mx, my);
msum1 = _mm_add_ps (msum1, prod);
msum1 = _mm_hadd_ps (msum1, msum1);
msum1 = _mm_hadd_ps (msum1, msum1);
return _mm_cvtss_f32 (msum1);
}
#elif defined(__aarch64__)
float fvec_L2sqr (const float * x,
const float * y,
size_t d)
{
if (d & 3) return fvec_L2sqr_ref (x, y, d);
float32x4_t accu = vdupq_n_f32 (0);
for (size_t i = 0; i < d; i += 4) {
float32x4_t xi = vld1q_f32 (x + i);
float32x4_t yi = vld1q_f32 (y + i);
float32x4_t sq = vsubq_f32 (xi, yi);
accu = vfmaq_f32 (accu, sq, sq);
}
float32x4_t a2 = vpaddq_f32 (accu, accu);
return vdups_laneq_f32 (a2, 0) + vdups_laneq_f32 (a2, 1);
}
float fvec_inner_product (const float * x,
const float * y,
size_t d)
{
if (d & 3) return fvec_inner_product_ref (x, y, d);
float32x4_t accu = vdupq_n_f32 (0);
for (size_t i = 0; i < d; i += 4) {
float32x4_t xi = vld1q_f32 (x + i);
float32x4_t yi = vld1q_f32 (y + i);
accu = vfmaq_f32 (accu, xi, yi);
}
float32x4_t a2 = vpaddq_f32 (accu, accu);
return vdups_laneq_f32 (a2, 0) + vdups_laneq_f32 (a2, 1);
}
float fvec_norm_L2sqr (const float *x, size_t d)
{
if (d & 3) return fvec_norm_L2sqr_ref (x, d);
float32x4_t accu = vdupq_n_f32 (0);
for (size_t i = 0; i < d; i += 4) {
float32x4_t xi = vld1q_f32 (x + i);
accu = vfmaq_f32 (accu, xi, xi);
}
float32x4_t a2 = vpaddq_f32 (accu, accu);
return vdups_laneq_f32 (a2, 0) + vdups_laneq_f32 (a2, 1);
}
#else
// scalar implementation
float fvec_L2sqr (const float * x,
const float * y,
size_t d)
{
return fvec_L2sqr_ref (x, y, d);
}
float fvec_inner_product (const float * x,
const float * y,
size_t d)
{
return fvec_inner_product_ref (x, y, d);
}
float fvec_norm_L2sqr (const float *x, size_t d)
{
return fvec_norm_L2sqr_ref (x, d);
}
#endif
/***************************************************************************
* heavily optimized table computations
***************************************************************************/
static inline void fvec_madd_ref (size_t n, const float *a,
float bf, const float *b, float *c) {
for (size_t i = 0; i < n; i++)
c[i] = a[i] + bf * b[i];
}
#ifdef __SSE__
static inline void fvec_madd_sse (size_t n, const float *a,
float bf, const float *b, float *c) {
n >>= 2;
__m128 bf4 = _mm_set_ps1 (bf);
__m128 * a4 = (__m128*)a;
__m128 * b4 = (__m128*)b;
__m128 * c4 = (__m128*)c;
while (n--) {
*c4 = _mm_add_ps (*a4, _mm_mul_ps (bf4, *b4));
b4++;
a4++;
c4++;
}
}
void fvec_madd (size_t n, const float *a,
float bf, const float *b, float *c)
{
if ((n & 3) == 0 &&
((((long)a) | ((long)b) | ((long)c)) & 15) == 0)
fvec_madd_sse (n, a, bf, b, c);
else
fvec_madd_ref (n, a, bf, b, c);
}
#else
void fvec_madd (size_t n, const float *a,
float bf, const float *b, float *c)
{
fvec_madd_ref (n, a, bf, b, c);
}
#endif
static inline int fvec_madd_and_argmin_ref (size_t n, const float *a,
float bf, const float *b, float *c) {
float vmin = 1e20;
int imin = -1;
for (size_t i = 0; i < n; i++) {
c[i] = a[i] + bf * b[i];
if (c[i] < vmin) {
vmin = c[i];
imin = i;
}
}
return imin;
}
#ifdef __SSE__
static inline int fvec_madd_and_argmin_sse (
size_t n, const float *a,
float bf, const float *b, float *c) {
n >>= 2;
__m128 bf4 = _mm_set_ps1 (bf);
__m128 vmin4 = _mm_set_ps1 (1e20);
__m128i imin4 = _mm_set1_epi32 (-1);
__m128i idx4 = _mm_set_epi32 (3, 2, 1, 0);
__m128i inc4 = _mm_set1_epi32 (4);
__m128 * a4 = (__m128*)a;
__m128 * b4 = (__m128*)b;
__m128 * c4 = (__m128*)c;
while (n--) {
__m128 vc4 = _mm_add_ps (*a4, _mm_mul_ps (bf4, *b4));
*c4 = vc4;
__m128i mask = (__m128i)_mm_cmpgt_ps (vmin4, vc4);
// imin4 = _mm_blendv_epi8 (imin4, idx4, mask); // slower!
imin4 = _mm_or_si128 (_mm_and_si128 (mask, idx4),
_mm_andnot_si128 (mask, imin4));
vmin4 = _mm_min_ps (vmin4, vc4);
b4++;
a4++;
c4++;
idx4 = _mm_add_epi32 (idx4, inc4);
}
// 4 values -> 2
{
idx4 = _mm_shuffle_epi32 (imin4, 3 << 2 | 2);
__m128 vc4 = _mm_shuffle_ps (vmin4, vmin4, 3 << 2 | 2);
__m128i mask = (__m128i)_mm_cmpgt_ps (vmin4, vc4);
imin4 = _mm_or_si128 (_mm_and_si128 (mask, idx4),
_mm_andnot_si128 (mask, imin4));
vmin4 = _mm_min_ps (vmin4, vc4);
}
// 2 values -> 1
{
idx4 = _mm_shuffle_epi32 (imin4, 1);
__m128 vc4 = _mm_shuffle_ps (vmin4, vmin4, 1);
__m128i mask = (__m128i)_mm_cmpgt_ps (vmin4, vc4);
imin4 = _mm_or_si128 (_mm_and_si128 (mask, idx4),
_mm_andnot_si128 (mask, imin4));
// vmin4 = _mm_min_ps (vmin4, vc4);
}
return _mm_cvtsi128_si32 (imin4);
}
int fvec_madd_and_argmin (size_t n, const float *a,
float bf, const float *b, float *c)
{
if ((n & 3) == 0 &&
((((long)a) | ((long)b) | ((long)c)) & 15) == 0)
return fvec_madd_and_argmin_sse (n, a, bf, b, c);
else
return fvec_madd_and_argmin_ref (n, a, bf, b, c);
}
#else
int fvec_madd_and_argmin (size_t n, const float *a,
float bf, const float *b, float *c)
{
return fvec_madd_and_argmin_ref (n, a, bf, b, c);
}
#endif
} // namespace faiss
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment