Unverified Commit 6880286e authored by Lucas Hosseini's avatar Lucas Hosseini Committed by GitHub

Facebook sync (#504)

* Facebook sync

* Update swig wrappers.

* Fix comment.
parent 98b23c87
......@@ -6,9 +6,11 @@
* LICENSE file in the root directory of this source tree.
*/
/* Copyright 2004-present Facebook. All Rights Reserved.
implementation of Hyper-parameter auto-tuning
*/
// -*- c++ -*-
/*
* implementation of Hyper-parameter auto-tuning
*/
#include "AutoTune.h"
......@@ -25,7 +27,8 @@
#include "MetaIndexes.h"
#include "IndexScalarQuantizer.h"
#include "IndexHNSW.h"
#include "IndexBinaryFlat.h"
#include "IndexBinaryIVF.h"
namespace faiss {
......@@ -734,11 +737,8 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
vt_1 = new OPQMatrix (d, opq_M);
} else if (stok == "L2norm") {
vt_1 = new NormalizationTransform (d, 2.0);
// coarse quantizers
} else if (!coarse_quantizer &&
sscanf (tok, "IVF%d_HNSW%d", &ncentroids, &M) == 2) {
FAISS_THROW_IF_NOT (metric == METRIC_L2);
coarse_quantizer_1 = new IndexHNSWFlat (d, M);
} else if (!coarse_quantizer &&
sscanf (tok, "IVF%d", &ncentroids) == 1) {
if (metric == METRIC_L2) {
......@@ -755,11 +755,14 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
add_idmap = true;
// IVFs
} else if (!index && stok == "Flat") {
} else if (!index && (stok == "Flat" || stok == "FlatDedup")) {
if (coarse_quantizer) {
// if there was an IVF in front, then it is an IVFFlat
IndexIVF *index_ivf = new IndexIVFFlat (
coarse_quantizer, d, ncentroids, metric);
IndexIVF *index_ivf = stok == "Flat" ?
new IndexIVFFlat (
coarse_quantizer, d, ncentroids, metric) :
new IndexIVFFlatDedup (
coarse_quantizer, d, ncentroids, metric);
index_ivf->quantizer_trains_alone =
get_trains_alone (coarse_quantizer);
index_ivf->cp.spherical = metric == METRIC_INNER_PRODUCT;
......@@ -767,12 +770,16 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
index_ivf->own_fields = true;
index_1 = index_ivf;
} else {
FAISS_THROW_IF_NOT_MSG (stok != "FlatDedup",
"dedup supported only for IVFFlat");
index_1 = new IndexFlat (d, metric);
}
} else if (!index && (stok == "SQ8" || stok == "SQ4")) {
} else if (!index && (stok == "SQ8" || stok == "SQ4" ||
stok == "SQfp16")) {
ScalarQuantizer::QuantizerType qt =
stok == "SQ8" ? ScalarQuantizer::QT_8bit :
stok == "SQ4" ? ScalarQuantizer::QT_4bit :
stok == "SQfp16" ? ScalarQuantizer::QT_fp16 :
ScalarQuantizer::QT_4bit;
if (coarse_quantizer) {
IndexIVFScalarQuantizer *index_ivf =
......@@ -905,7 +912,27 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
return index;
}
IndexBinary *index_binary_factory(int d, const char *description)
{
IndexBinary *index = nullptr;
int ncentroids = -1;
if (sscanf(description, "BIVF%d", &ncentroids) == 1) {
IndexBinaryIVF *index_ivf = new IndexBinaryIVF(
new IndexBinaryFlat(d), d, ncentroids
);
index_ivf->own_fields = true;
index = index_ivf;
} else if (std::string(description) == "BFlat") {
index = new IndexBinaryFlat(d);
} else {
FAISS_THROW_IF_NOT_FMT(index, "descrption %s did not generate an index",
description);
}
return index;
}
}; // namespace faiss
} // namespace faiss
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
// -*- c++ -*-
#ifndef FAISS_AUTO_TUNE_H
......@@ -15,6 +14,7 @@
#include <vector>
#include "Index.h"
#include "IndexBinary.h"
namespace faiss {
......@@ -203,6 +203,8 @@ struct ParameterSpace {
Index *index_factory (int d, const char *description,
MetricType metric = METRIC_L2);
IndexBinary *index_binary_factory (int d, const char *description);
} // namespace faiss
......
......@@ -6,11 +6,12 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved
// -*- c++ -*-
#include "AuxIndexStructures.h"
#include "FaissAssert.h"
#include <cstring>
namespace faiss {
......@@ -208,9 +209,44 @@ bool IDSelectorBatch::is_member (idx_t i) const
}
/***********************************************************************
* IO functions
***********************************************************************/
int IOReader::fileno ()
{
FAISS_THROW_MSG ("IOReader does not support memory mapping");
}
int IOWriter::fileno ()
{
FAISS_THROW_MSG ("IOWriter does not support memory mapping");
}
size_t VectorIOWriter::operator()(
const void *ptr, size_t size, size_t nitems)
{
size_t o = data.size();
data.resize(o + size * nitems);
memcpy (&data[o], ptr, size * nitems);
return nitems;
}
size_t VectorIOReader::operator()(
void *ptr, size_t size, size_t nitems)
{
if (rp >= data.size()) return 0;
size_t nremain = (data.size() - rp) / size;
if (nremain < nitems) nitems = nremain;
memcpy (ptr, &data[rp], size * nitems);
rp += size * nitems;
return nitems;
}
} // namespace faiss
......@@ -6,14 +6,16 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved
// -*- c++ -*-
// Auxiliary index structures, that are used in indexes but that can
// be forward-declared
#ifndef FAISS_AUX_INDEX_STRUCTURES_H
#define FAISS_AUX_INDEX_STRUCTURES_H
#include <stdint.h>
#include <vector>
#include <unordered_set>
......@@ -157,10 +159,8 @@ struct RangeSearchPartialResult: BufferList {
/// begin a new result
QueryResult & new_result (idx_t qno);
void finalize ();
/// called by range_search before do_allocation
void set_lims ();
......@@ -169,6 +169,46 @@ struct RangeSearchPartialResult: BufferList {
};
/***********************************************************
* Abstract I/O objects
***********************************************************/
struct IOReader {
// fread
virtual size_t operator()(
void *ptr, size_t size, size_t nitems) = 0;
// return a file number that can be memory-mapped
virtual int fileno ();
virtual ~IOReader() {}
};
struct IOWriter {
// fwrite
virtual size_t operator()(
const void *ptr, size_t size, size_t nitems) = 0;
// return a file number that can be memory-mapped
virtual int fileno ();
virtual ~IOWriter() {}
};
struct VectorIOReader:IOReader {
const std::vector<uint8_t> data;
size_t rp = 0;
size_t operator()(void *ptr, size_t size, size_t nitems) override;
};
struct VectorIOWriter:IOWriter {
std::vector<uint8_t> data;
size_t operator()(const void *ptr, size_t size, size_t nitems) override;
};
}; // namespace faiss
......
......@@ -6,14 +6,11 @@
* LICENSE file in the root directory of this source tree.
*/
/* Copyright 2004-present Facebook. All Rights Reserved.
kmeans clustering routines
*/
// -*- c++ -*-
#include "Clustering.h"
#include <cmath>
#include <cstdio>
#include <cstring>
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved
// -*- c++ -*-
#ifndef FAISS_CLUSTERING_H
......
......@@ -6,7 +6,7 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
// -*- c++ -*-
#ifndef FAISS_ASSERT_INCLUDED
#define FAISS_ASSERT_INCLUDED
......
......@@ -6,7 +6,7 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
// -*- c++ -*-
#include "FaissException.h"
......
......@@ -6,7 +6,7 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
// -*- c++ -*-
#ifndef FAISS_EXCEPTION_INCLUDED
#define FAISS_EXCEPTION_INCLUDED
......
......@@ -6,7 +6,8 @@
* LICENSE file in the root directory of this source tree.
*/
/* Copyright 2004-present Facebook. All Rights Reserved. */
// -*- c++ -*-
/* Function for soft heap */
#include "Heap.h"
......@@ -15,13 +16,6 @@
namespace faiss {
template <typename C>
void HeapArray<C>::heapify ()
{
......@@ -126,7 +120,4 @@ template class HeapArray<CMin <int, long> >;
template class HeapArray<CMax <int, long> >;
} // END namespace fasis
......@@ -6,8 +6,9 @@
* LICENSE file in the root directory of this source tree.
*/
/* Copyright 2004-present Facebook. All Rights Reserved.
*
// -*- c++ -*-
/*
* C++ support for heaps. The set of functions is tailored for
* efficient similarity search.
*
......@@ -31,7 +32,6 @@
#include <limits>
namespace faiss {
/*******************************************************************
......@@ -490,8 +490,6 @@ void indirect_heap_push (size_t k,
}
} // namespace faiss
#endif /* FAISS_Heap_h */
......@@ -6,13 +6,14 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved
// -*- c++ -*-
#include "IndexFlat.h"
#include "FaissAssert.h"
#include <cstring>
namespace faiss {
Index::~Index ()
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved
// -*- c++ -*-
#ifndef FAISS_INDEX_H
......
/**
* Copyright (c) 2015-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD+Patents license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include "IndexBinary.h"
#include "FaissAssert.h"
#include <cstring>
namespace faiss {
IndexBinary::~IndexBinary() {}
void IndexBinary::train(idx_t, const uint8_t *) {
// Does nothing by default.
}
void IndexBinary::range_search(idx_t, const uint8_t *, int,
RangeSearchResult *) const {
FAISS_THROW_MSG("range search not implemented");
}
void IndexBinary::assign(idx_t n, const uint8_t *x, idx_t *labels, idx_t k) {
int *distances = new int[n * k];
ScopeDeleter<int> del(distances);
search(n, x, k, distances, labels);
}
void IndexBinary::add_with_ids(idx_t, const uint8_t *, const long *) {
FAISS_THROW_MSG("add_with_ids not implemented for this type of index");
}
long IndexBinary::remove_ids(const IDSelector&) {
FAISS_THROW_MSG("remove_ids not implemented for this type of index");
return -1;
}
void IndexBinary::reconstruct(idx_t, uint8_t *) const {
FAISS_THROW_MSG("reconstruct not implemented for this type of index");
}
void IndexBinary::reconstruct_n(idx_t i0, idx_t ni, uint8_t *recons) const {
for (idx_t i = 0; i < ni; i++) {
reconstruct(i0 + i, recons + i * d);
}
}
void IndexBinary::search_and_reconstruct(idx_t n, const uint8_t *x, idx_t k,
int32_t *distances, idx_t *labels,
uint8_t *recons) const {
search(n, x, k, distances, labels);
for (idx_t i = 0; i < n; ++i) {
for (idx_t j = 0; j < k; ++j) {
idx_t ij = i * k + j;
idx_t key = labels[ij];
uint8_t *reconstructed = recons + ij * d;
if (key < 0) {
// Fill with NaNs
memset(reconstructed, -1, sizeof(*reconstructed) * d);
} else {
reconstruct(key, reconstructed);
}
}
}
}
void IndexBinary::display() const {
printf("Index: %s -> %ld elements\n", typeid (*this).name(), ntotal);
}
} // namespace faiss
/**
* Copyright (c) 2015-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD+Patents license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_INDEX_BINARY_H
#define FAISS_INDEX_BINARY_H
#include <cstdio>
#include <typeinfo>
#include <string>
#include <sstream>
#include "FaissAssert.h"
#include "Index.h"
namespace faiss {
/// Forward declarations see AuxIndexStructures.h
struct IDSelector;
struct RangeSearchResult;
/** Abstract structure for a binary index.
*
* Supports adding vertices and searching them.
*
* All queries are symmetric because there is no distinction between codes and
* vectors.
*/
struct IndexBinary {
typedef long idx_t; ///< all indices are this type
int d; ///< vector dimension
int code_size; ///< number of bytes per vector ( = d / 8 )
idx_t ntotal; ///< total nb of indexed vectors
bool verbose; ///< verbosity level
/// set if the Index does not require training, or if training is done already
bool is_trained;
/// type of metric this index uses for search
MetricType metric_type;
explicit IndexBinary(idx_t d = 0, MetricType metric = METRIC_L2)
: d(d),
code_size(d / 8),
ntotal(0),
verbose(false),
is_trained(true),
metric_type(metric) {
FAISS_THROW_IF_NOT(d % 8 == 0);
}
virtual ~IndexBinary();
/** Perform training on a representative set of vectors.
*
* @param n nb of training vectors
* @param x training vecors, size n * d / 8
*/
virtual void train(idx_t n, const uint8_t *x);
/** Add n vectors of dimension d to the index.
*
* Vectors are implicitly assigned labels ntotal .. ntotal + n - 1
* @param x input matrix, size n * d / 8
*/
virtual void add(idx_t n, const uint8_t *x) = 0;
/** Same as add, but stores xids instead of sequential ids.
*
* The default implementation fails with an assertion, as it is
* not supported by all indexes.
*
* @param xids if non-null, ids to store for the vectors (size n)
*/
virtual void add_with_ids(idx_t n, const uint8_t *x, const long *xids);
/** Query n vectors of dimension d to the index.
*
* return at most k vectors. If there are not enough results for a
* query, the result array is padded with -1s.
*
* @param x input vectors to search, size n * d / 8
* @param labels output labels of the NNs, size n*k
* @param distances output pairwise distances, size n*k
*/
virtual void search(idx_t n, const uint8_t *x, idx_t k,
int32_t *distances, idx_t *labels) const = 0;
/** Query n vectors of dimension d to the index.
*
* return all vectors with distance < radius. Note that many
* indexes do not implement the range_search (only the k-NN search
* is mandatory).
*
* @param x input vectors to search, size n * d / 8
* @param radius search radius
* @param result result table
*/
virtual void range_search(idx_t n, const uint8_t *x, int radius,
RangeSearchResult *result) const;
/** Return the indexes of the k vectors closest to the query x.
*
* This function is identical to search but only returns labels of neighbors.
* @param x input vectors to search, size n * d / 8
* @param labels output labels of the NNs, size n*k
*/
void assign(idx_t n, const uint8_t *x, idx_t *labels, idx_t k = 1);
/// Removes all elements from the database.
virtual void reset() = 0;
/** Removes IDs from the index. Not supported by all indexes.
*/
virtual long remove_ids(const IDSelector& sel);
/** Reconstruct a stored vector.
*
* This function may not be defined for some indexes.
* @param key id of the vector to reconstruct
* @param recons reconstucted vector (size d / 8)
*/
virtual void reconstruct(idx_t key, uint8_t *recons) const;
/** Reconstruct vectors i0 to i0 + ni - 1.
*
* This function may not be defined for some indexes.
* @param recons reconstucted vectors (size ni * d / 8)
*/
virtual void reconstruct_n(idx_t i0, idx_t ni, uint8_t *recons) const;
/** Similar to search, but also reconstructs the stored vectors (or an
* approximation in the case of lossy coding) for the search results.
*
* If there are not enough results for a query, the resulting array
* is padded with -1s.
*
* @param recons reconstructed vectors size (n, k, d)
**/
virtual void search_and_reconstruct(idx_t n, const uint8_t *x, idx_t k,
int32_t *distances, idx_t *labels,
uint8_t *recons) const;
/** Display the actual class name and some more info. */
void display() const;
};
} // namespace faiss
#endif // FAISS_INDEX_BINARY_H
/**
* Copyright (c) 2015-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD+Patents license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include "IndexBinaryFlat.h"
#include <cstring>
#include "hamming.h"
#include "utils.h"
#include "Heap.h"
#include "FaissAssert.h"
#include "AuxIndexStructures.h"
namespace faiss {
IndexBinaryFlat::IndexBinaryFlat(idx_t d)
: IndexBinary(d) {}
void IndexBinaryFlat::add(idx_t n, const uint8_t *x) {
xb.insert(xb.end(), x, x + n * code_size);
ntotal += n;
}
void IndexBinaryFlat::reset() {
xb.clear();
ntotal = 0;
}
void IndexBinaryFlat::search(idx_t n, const uint8_t *x, idx_t k,
int32_t *distances, idx_t *labels) const {
const size_t block_size = query_batch_size;
for (size_t s = 0; s < n; s += block_size) {
idx_t nn = block_size;
if (s + block_size > n) {
nn = n - s;
}
if (use_heap) {
// We see the distances and labels as heaps.
int_maxheap_array_t res = {
size_t(nn), size_t(k), labels + s * k, distances + s * k
};
hammings_knn_hc(&res, x + s * code_size, xb.data(), ntotal, code_size,
/* ordered = */ true);
} else {
hammings_knn_mc(x + s * code_size, xb.data(), nn, ntotal, k, code_size,
distances, labels);
}
}
}
long IndexBinaryFlat::remove_ids(const IDSelector& sel) {
idx_t j = 0;
for (idx_t i = 0; i < ntotal; i++) {
if (sel.is_member(i)) {
// should be removed
} else {
if (i > j) {
memmove(&xb[code_size * j], &xb[code_size * i], sizeof(xb[0]) * code_size);
}
j++;
}
}
long nremove = ntotal - j;
if (nremove > 0) {
ntotal = j;
xb.resize(ntotal * code_size);
}
return nremove;
}
void IndexBinaryFlat::reconstruct(idx_t key, uint8_t *recons) const {
memcpy(recons, &(xb[code_size * key]), sizeof(*recons) * code_size);
}
} // namespace faiss
/**
* Copyright (c) 2015-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD+Patents license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef INDEX_BINARY_FLAT_H
#define INDEX_BINARY_FLAT_H
#include <vector>
#include "IndexBinary.h"
namespace faiss {
/** Index that stores the full vectors and performs exhaustive search. */
struct IndexBinaryFlat : IndexBinary {
/// database vectors, size ntotal * d / 8
std::vector<uint8_t> xb;
/** Select between using a heap or counting to select the k smallest values
* when scanning inverted lists.
*/
bool use_heap = true;
size_t query_batch_size = 32;
explicit IndexBinaryFlat(idx_t d);
void add(idx_t n, const uint8_t *x) override;
void reset() override;
void search(idx_t n, const uint8_t *x, idx_t k,
int32_t *distances, idx_t *labels) const override;
void reconstruct(idx_t key, uint8_t *recons) const override;
/** Remove some ids. Note that because of the indexing structure,
* the semantics of this operation are different from the usual ones:
* the new ids are shifted. */
long remove_ids(const IDSelector& sel) override;
IndexBinaryFlat() {}
};
} // namespace faiss
#endif // INDEX_BINARY_FLAT_H
This diff is collapsed.
/**
* Copyright (c) 2015-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD+Patents license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_INDEX_BINARY_IVF_H
#define FAISS_INDEX_BINARY_IVF_H
#include <vector>
#include "IndexBinary.h"
#include "IndexIVF.h"
#include "Clustering.h"
#include "Heap.h"
namespace faiss {
/** Index based on a inverted file (IVF)
*
* In the inverted file, the quantizer (an IndexBinary instance) provides a
* quantization index for each vector to be added. The quantization
* index maps to a list (aka inverted list or posting list), where the
* id of the vector is stored.
*
* The inverted list object is required only after trainng. If none is
* set externally, an ArrayInvertedLists is used automatically.
*
* At search time, the vector to be searched is also quantized, and
* only the list corresponding to the quantization index is
* searched. This speeds up the search by making it
* non-exhaustive. This can be relaxed using multi-probe search: a few
* (nprobe) quantization indices are selected and several inverted
* lists are visited.
*/
struct IndexBinaryIVF : IndexBinary {
/// Acess to the actual data
InvertedLists *invlists;
bool own_invlists;
size_t nprobe; ///< number of probes at query time
size_t max_codes; ///< max nb of codes to visit to do a query
/** Select between using a heap or counting to select the k smallest values
* when scanning inverted lists.
*/
bool use_heap = true;
/// map for direct access to the elements. Enables reconstruct().
bool maintain_direct_map;
std::vector<long> direct_map;
IndexBinary *quantizer; ///< quantizer that maps vectors to inverted lists
size_t nlist; ///< number of possible key values
/**
* = 0: use the quantizer as index in a kmeans training
* = 1: just pass on the training set to the train() of the quantizer
* = 2: kmeans training on a flat index + add the centroids to the quantizer
*/
bool own_fields; ///< whether object owns the quantizer
ClusteringParameters cp; ///< to override default clustering params
/// Trains the quantizer and calls train_residual to train sub-quantizers
void train_q1(size_t n, const uint8_t *x, bool verbose);
/** The Inverted file takes a quantizer (an IndexBinary) on input,
* which implements the function mapping a vector to a list
* identifier. The pointer is borrowed: the quantizer should not
* be deleted while the IndexBinaryIVF is in use.
*/
IndexBinaryIVF(IndexBinary *quantizer, size_t d, size_t nlist);
IndexBinaryIVF();
~IndexBinaryIVF() override;
void reset() override;
/// Trains the quantizer and calls train_residual to train sub-quantizers
void train(idx_t n, const uint8_t *x) override;
/// Quantizes x and calls add_with_key
void add(idx_t n, const uint8_t *x) override;
void add_with_ids(idx_t n, const uint8_t *x, const long *xids) override;
/// same as add_with_ids, with precomputed coarse quantizer
void add_core (idx_t n, const uint8_t * x, const long *xids,
const long *precomputed_idx);
/** Search a set of vectors, that are pre-quantized by the IVF
* quantizer. Fill in the corresponding heaps with the query
* results. search() calls this.
*
* @param n nb of vectors to query
* @param x query vectors, size nx * d
* @param assign coarse quantization indices, size nx * nprobe
* @param centroid_dis
* distances to coarse centroids, size nx * nprobe
* @param distance
* output distances, size n * k
* @param labels output labels, size n * k
* @param store_pairs store inv list index + inv list offset
* instead in upper/lower 32 bit of result,
* instead of ids (used for reranking).
* @param params used to override the object's search parameters
*/
void search_preassigned(idx_t n, const uint8_t *x, idx_t k,
const idx_t *assign,
const int32_t *centroid_dis,
int32_t *distances, idx_t *labels,
bool store_pairs,
const IVFSearchParameters *params=nullptr
) const;
/** assign the vectors, then call search_preassign */
virtual void search(idx_t n, const uint8_t *x, idx_t k,
int32_t *distances, idx_t *labels) const override;
void reconstruct(idx_t key, uint8_t *recons) const override;
/** Reconstruct a subset of the indexed vectors.
*
* Overrides default implementation to bypass reconstruct() which requires
* direct_map to be maintained.
*
* @param i0 first vector to reconstruct
* @param ni nb of vectors to reconstruct
* @param recons output array of reconstructed vectors, size ni * d / 8
*/
void reconstruct_n(idx_t i0, idx_t ni, uint8_t *recons) const override;
/** Similar to search, but also reconstructs the stored vectors (or an
* approximation in the case of lossy coding) for the search results.
*
* Overrides default implementation to avoid having to maintain direct_map
* and instead fetch the code offsets through the `store_pairs` flag in
* search_preassigned().
*
* @param recons reconstructed vectors size (n, k, d / 8)
*/
void search_and_reconstruct(idx_t n, const uint8_t *x, idx_t k,
int32_t *distances, idx_t *labels,
uint8_t *recons) const override;
/** Reconstruct a vector given the location in terms of (inv list index +
* inv list offset) instead of the id.
*
* Useful for reconstructing when the direct_map is not maintained and
* the inv list offset is computed by search_preassigned() with
* `store_pairs` set.
*/
virtual void reconstruct_from_offset(long list_no, long offset,
uint8_t* recons) const;
/// Dataset manipulation functions
long remove_ids(const IDSelector& sel) override;
/** moves the entries from another dataset to self. On output,
* other is empty. add_id is added to all moved ids (for
* sequential ids, this would be this->ntotal */
virtual void merge_from(IndexBinaryIVF& other, idx_t add_id);
size_t get_list_size(size_t list_no) const
{ return invlists->list_size(list_no); }
/** intialize a direct map
*
* @param new_maintain_direct_map if true, create a direct map,
* else clear it
*/
void make_direct_map(bool new_maintain_direct_map=true);
/// 1= perfectly balanced, >1: imbalanced
double imbalance_factor() const;
/// display some stats about the inverted lists
void print_stats() const;
void replace_invlists(InvertedLists *il, bool own=false);
};
} // namespace faiss
#endif // FAISS_INDEX_BINARY_IVF_H
......@@ -6,7 +6,7 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved
// -*- c++ -*-
#include "IndexFlat.h"
......@@ -18,6 +18,7 @@
#include "AuxIndexStructures.h"
namespace faiss {
IndexFlat::IndexFlat (idx_t d, MetricType metric):
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved
// -*- c++ -*-
#ifndef INDEX_FLAT_H
......@@ -24,7 +23,7 @@ struct IndexFlat: Index {
/// database vectors, size ntotal * d
std::vector<float> xb;
explicit IndexFlat (idx_t d, MetricType metric = METRIC_INNER_PRODUCT);
explicit IndexFlat (idx_t d, MetricType metric = METRIC_L2);
void add(idx_t n, const float* x) override;
......
......@@ -6,6 +6,8 @@
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include "IndexHNSW.h"
......@@ -1694,19 +1696,17 @@ struct PQDis: HNSW::DistanceComputer {
return accu;
}
PQDis(const IndexPQ & storage, const float *q = nullptr):
pq(storage.pq)
{
precomputed_table.resize(pq.M * pq.ksub);
nb = storage.ntotal;
d = storage.d;
codes = storage.codes.data();
code_size = pq.code_size;
FAISS_ASSERT(pq.ksub == 256);
FAISS_ASSERT(pq.sdc_table.size() == pq.ksub * pq.ksub * pq.M);
sdc = pq.sdc_table.data();
ndis = 0;
PQDis(const IndexPQ& storage, const float* /*q*/ = nullptr)
: pq(storage.pq) {
precomputed_table.resize(pq.M * pq.ksub);
nb = storage.ntotal;
d = storage.d;
codes = storage.codes.data();
code_size = pq.code_size;
FAISS_ASSERT(pq.ksub == 256);
FAISS_ASSERT(pq.sdc_table.size() == pq.ksub * pq.ksub * pq.M);
sdc = pq.sdc_table.data();
ndis = 0;
}
void set_query(const float *x) override {
......@@ -1771,15 +1771,13 @@ struct SQDis: HNSW::DistanceComputer {
return dc->compute_code_distance (codei, codej);
}
SQDis(const IndexScalarQuantizer & storage, const float *q = nullptr):
sq(storage.sq)
{
nb = storage.ntotal;
d = storage.d;
codes = storage.codes.data();
code_size = sq.code_size;
dc = sq.get_distance_computer();
SQDis(const IndexScalarQuantizer& storage, const float* /*q*/ = nullptr)
: sq(storage.sq) {
nb = storage.ntotal;
d = storage.d;
codes = storage.codes.data();
code_size = sq.code_size;
dc = sq.get_distance_computer();
}
void set_query(const float *x) override {
......
......@@ -6,6 +6,8 @@
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#pragma once
#include <vector>
......@@ -320,5 +322,4 @@ struct IndexHNSW2Level: IndexHNSW {
};
};
}
......@@ -6,9 +6,7 @@
* LICENSE file in the root directory of this source tree.
*/
/* Copyright 2004-present Facebook. All Rights Reserved.
Inverted list structure.
*/
// -*- c++ -*-
#include "IndexIVF.h"
......@@ -367,9 +365,10 @@ void IndexIVF::search_and_reconstruct (idx_t n, const float *x, idx_t k,
}
}
void IndexIVF::reconstruct_from_offset (long list_no, long offset,
float* recons) const
{
void IndexIVF::reconstruct_from_offset(
long /*list_no*/,
long /*offset*/,
float* /*recons*/) const {
FAISS_THROW_MSG ("reconstruct_from_offset not implemented");
}
......@@ -607,5 +606,4 @@ void IndexIVFStats::reset()
IndexIVFStats indexIVF_stats;
} // namespace faiss
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
// -*- c++ -*-
#ifndef FAISS_INDEX_IVF_H
......@@ -143,6 +142,12 @@ struct ArrayInvertedLists: InvertedLists {
};
struct IVFSearchParameters {
size_t nprobe; ///< number of probes at query time
size_t max_codes; ///< max nb of codes to visit to do a query
virtual ~IVFSearchParameters () {}
};
/** Index based on a inverted file (IVF)
*
* In the inverted file, the quantizer (an Index instance) provides a
......@@ -213,12 +218,15 @@ struct IndexIVF: Index, Level1Quantizer {
* @param store_pairs store inv list index + inv list offset
* instead in upper/lower 32 bit of result,
* instead of ids (used for reranking).
* @param params used to override the object's search parameters
*/
virtual void search_preassigned (idx_t n, const float *x, idx_t k,
const idx_t *assign,
const float *centroid_dis,
float *distances, idx_t *labels,
bool store_pairs) const = 0;
bool store_pairs,
const IVFSearchParameters *params=nullptr
) const = 0;
/** assign the vectors, then call search_preassign */
virtual void search (idx_t n, const float *x, idx_t k,
......@@ -317,11 +325,7 @@ struct IndexIVFStats {
extern IndexIVFStats indexIVF_stats;
} // namespace faiss
#endif
This diff is collapsed.
......@@ -11,9 +11,9 @@
#ifndef FAISS_INDEX_IVF_FLAT_H
#define FAISS_INDEX_IVF_FLAT_H
#include "IndexIVF.h"
#include <unordered_map>
#include "IndexIVF.h"
namespace faiss {
......@@ -39,7 +39,9 @@ struct IndexIVFFlat: IndexIVF {
const idx_t *assign,
const float *centroid_dis,
float *distances, idx_t *labels,
bool store_pairs) const override;
bool store_pairs,
const IVFSearchParameters *params=nullptr
) const override;
void range_search(
idx_t n,
......@@ -55,7 +57,7 @@ struct IndexIVFFlat: IndexIVF {
* @param idx vector indices to update, size nv
* @param v vectors of new values, size nv*d
*/
void update_vectors (int nv, idx_t *idx, const float *v);
virtual void update_vectors (int nv, idx_t *idx, const float *v);
void reconstruct_from_offset (long list_no, long offset,
float* recons) const override;
......@@ -64,6 +66,55 @@ struct IndexIVFFlat: IndexIVF {
};
struct IndexIVFFlatDedup: IndexIVFFlat {
/** Maps ids stored in the index to the ids of vectors that are
* the same. When a vector is unique, it does not appear in the
* instances map */
std::unordered_multimap <idx_t, idx_t> instances;
IndexIVFFlatDedup (
Index * quantizer, size_t d, size_t nlist_,
MetricType = METRIC_L2);
/// also dedups the training set
void train(idx_t n, const float* x) override;
/// implemented for all IndexIVF* classes
void add_with_ids(idx_t n, const float* x, const long* xids) override;
void search_preassigned (idx_t n, const float *x, idx_t k,
const idx_t *assign,
const float *centroid_dis,
float *distances, idx_t *labels,
bool store_pairs,
const IVFSearchParameters *params=nullptr
) const override;
long remove_ids(const IDSelector& sel) override;
/// not implemented
void range_search(
idx_t n,
const float* x,
float radius,
RangeSearchResult* result) const override;
/// not implemented
void update_vectors (int nv, idx_t *idx, const float *v) override;
/// not implemented
void reconstruct_from_offset (long list_no, long offset,
float* recons) const override;
IndexIVFFlatDedup () {}
};
} // namespace faiss
#endif
......@@ -6,9 +6,7 @@
* LICENSE file in the root directory of this source tree.
*/
/* Copyright 2004-present Facebook. All Rights Reserved.
Inverted list structure.
*/
// -*- c++ -*-
#include "IndexIVFPQ.h"
......@@ -33,9 +31,6 @@
namespace faiss {
/*****************************************
* IndexIVFPQ implementation
******************************************/
......@@ -452,6 +447,7 @@ struct QueryTables {
*****************************************************/
const IndexIVFPQ & ivfpq;
const IVFSearchParameters *params;
// copied from IndexIVFPQ for easier access
int d;
......@@ -459,6 +455,7 @@ struct QueryTables {
MetricType metric_type;
bool by_residual;
int use_precomputed_table;
int polysemous_ht;
// pre-allocated data buffers
float * sim_table, * sim_table_2;
......@@ -470,7 +467,8 @@ struct QueryTables {
// for table pointers
std::vector<const float *> sim_table_ptrs;
explicit QueryTables (const IndexIVFPQ & ivfpq):
explicit QueryTables (const IndexIVFPQ & ivfpq,
const IVFSearchParameters *params):
ivfpq(ivfpq),
d(ivfpq.d),
pq (ivfpq.pq),
......@@ -485,7 +483,12 @@ struct QueryTables {
decoded_vec = residual_vec + d;
// for polysemous
if (ivfpq.polysemous_ht != 0) {
polysemous_ht = ivfpq.polysemous_ht;
if (auto ivfpq_params =
dynamic_cast<const IVFPQSearchParameters *>(params)) {
polysemous_ht = ivfpq_params->polysemous_ht;
}
if (polysemous_ht != 0) {
q_code.resize (pq.code_size);
}
init_list_cycles = 0;
......@@ -506,7 +509,7 @@ struct QueryTables {
init_query_IP ();
else
init_query_L2 ();
if (!by_residual && ivfpq.polysemous_ht != 0)
if (!by_residual && polysemous_ht != 0)
pq.compute_code (qi, q_code.data());
}
......@@ -579,7 +582,7 @@ struct QueryTables {
// decoded_vec = centroid
float dis0 = -fvec_inner_product (qi, decoded_vec, d);
if (ivfpq.polysemous_ht) {
if (polysemous_ht) {
for (int i = 0; i < d; i++) {
residual_vec [i] = qi[i] - decoded_vec[i];
}
......@@ -629,7 +632,7 @@ struct QueryTables {
const float *pc = &ivfpq.precomputed_table
[(ki * pq.M + cm * Mf) * pq.ksub];
if (ivfpq.polysemous_ht == 0) {
if (polysemous_ht == 0) {
// sum up with query-specific table
fvec_madd (Mf * pq.ksub,
......@@ -694,7 +697,7 @@ struct QueryTables {
FAISS_THROW_MSG ("need precomputed tables");
}
if (ivfpq.polysemous_ht) {
if (polysemous_ht) {
FAISS_THROW_MSG ("not implemented");
// Not clear that it makes sense to implemente this,
// because it costs M * ksub, which is what we wanted to
......@@ -720,8 +723,9 @@ struct InvertedListScanner: QueryTables {
const IDType * list_ids;
size_t list_size;
explicit InvertedListScanner (const IndexIVFPQ & ivfpq):
QueryTables (ivfpq)
explicit InvertedListScanner (const IndexIVFPQ & ivfpq,
const IVFSearchParameters *params):
QueryTables (ivfpq, params)
{
FAISS_THROW_IF_NOT (pq.byte_per_idx == 1);
n_hamming_pass = 0;
......@@ -931,27 +935,30 @@ void IndexIVFPQ::search_preassigned (idx_t nx, const float *qx, idx_t k,
const idx_t *keys,
const float *coarse_dis,
float *distances, idx_t *labels,
bool store_pairs) const
bool store_pairs,
const IVFSearchParameters *params
) const
{
float_maxheap_array_t res = {
size_t(nx), size_t(k),
labels, distances
};
long local_nprobe = params ? params->nprobe : nprobe;
long local_max_codes = params ? params->max_codes : max_codes;
#pragma omp parallel
{
InvertedListScanner<long> qt (*this);
size_t stats_nlist = 0;
size_t stats_ncode = 0;
InvertedListScanner<long> qt (*this, params);
size_t stats_nlist = 0, stats_ncode = 0;
uint64_t init_query_cycles = 0;
uint64_t scan_cycles = 0;
uint64_t heap_cycles = 0;
uint64_t scan_cycles = 0, heap_cycles = 0;
#pragma omp for
for (size_t i = 0; i < nx; i++) {
const float *qi = qx + i * d;
const long * keysi = keys + i * nprobe;
const float *coarse_dis_i = coarse_dis + i * nprobe;
const long * keysi = keys + i * local_nprobe;
const float *coarse_dis_i = coarse_dis + i * local_nprobe;
float * heap_sim = res.get_val (i);
long * heap_ids = res.get_ids (i);
......@@ -966,7 +973,7 @@ void IndexIVFPQ::search_preassigned (idx_t nx, const float *qx, idx_t k,
size_t nscan = 0;
for (size_t ik = 0; ik < nprobe; ik++) {
for (size_t ik = 0; ik < local_nprobe; ik++) {
long key = keysi[ik]; /* select the list */
if (key < 0) {
// not enough centroids for multiprobe
......@@ -994,7 +1001,7 @@ void IndexIVFPQ::search_preassigned (idx_t nx, const float *qx, idx_t k,
}
scan_cycles += TOC;
if (max_codes && nscan >= max_codes) break;
if (local_max_codes && nscan >= local_max_codes) break;
}
stats_ncode += nscan;
TIC;
......@@ -1160,7 +1167,9 @@ void IndexIVFPQR::search_preassigned (idx_t n, const float *x, idx_t k,
const idx_t *idx,
const float *L1_dis,
float *distances, idx_t *labels,
bool store_pairs) const
bool store_pairs,
const IVFSearchParameters *params
) const
{
uint64_t t0;
TIC;
......@@ -1171,9 +1180,10 @@ void IndexIVFPQR::search_preassigned (idx_t n, const float *x, idx_t k,
float *coarse_distances = new float [k_coarse * n];
ScopeDeleter<float> del(coarse_distances);
IndexIVFPQ::search_preassigned (n, x, k_coarse,
idx, L1_dis, coarse_distances, coarse_labels,
true);
IndexIVFPQ::search_preassigned (
n, x, k_coarse,
idx, L1_dis, coarse_distances, coarse_labels,
true, params);
}
......@@ -1392,13 +1402,12 @@ void Index2Layer::add(idx_t n, const float* x)
}
void Index2Layer::search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const
{
FAISS_THROW_MSG ("not implemented");
idx_t /*n*/,
const float* /*x*/,
idx_t /*k*/,
float* /*distances*/,
idx_t* /*labels*/) const {
FAISS_THROW_MSG("not implemented");
}
......@@ -1456,10 +1465,4 @@ void Index2Layer::reset()
}
} // namespace faiss
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
// -*- c++ -*-
#ifndef FAISS_INDEX_IVFPQ_H
......@@ -21,6 +20,11 @@
namespace faiss {
struct IVFPQSearchParameters: IVFSearchParameters {
size_t scan_table_threshold; ///< use table computation or on-the-fly?
int polysemous_ht; ///< Hamming thresh for polysemous filtering
~IVFPQSearchParameters () {}
};
/** Inverted file with Product Quantizer encoding. Each residual
......@@ -103,7 +107,9 @@ struct IndexIVFPQ: IndexIVF {
const idx_t *assign,
const float *centroid_dis,
float *distances, idx_t *labels,
bool store_pairs) const override;
bool store_pairs,
const IVFSearchParameters *params=nullptr
) const override;
/// build precomputed table
void precompute_table ();
......@@ -181,7 +187,9 @@ struct IndexIVFPQR: IndexIVFPQ {
const idx_t *assign,
const float *centroid_dis,
float *distances, idx_t *labels,
bool store_pairs) const override;
bool store_pairs,
const IVFSearchParameters *params=nullptr
) const override;
IndexIVFPQR();
};
......@@ -242,11 +250,7 @@ struct Index2Layer: Index {
};
} // namespace faiss
#endif
......@@ -6,7 +6,7 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
// -*- c++ -*-
#include "IndexLSH.h"
......@@ -19,6 +19,7 @@
#include "hamming.h"
#include "FaissAssert.h"
namespace faiss {
/***************************************************************
......@@ -146,8 +147,8 @@ void IndexLSH::search (
int_maxheap_array_t res = { size_t(n), size_t(k), labels, idistances};
hammings_knn (&res, qcodes, codes.data(),
ntotal, bytes_per_vec, true);
hammings_knn_hc (&res, qcodes, codes.data(),
ntotal, bytes_per_vec, true);
// convert distances to floats
......@@ -176,5 +177,4 @@ void IndexLSH::reset() {
}
} // namespace faiss
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
// -*- c++ -*-
#ifndef INDEX_LSH_H
......@@ -73,12 +72,7 @@ struct IndexLSH:Index {
};
}
#endif
......@@ -6,9 +6,7 @@
* LICENSE file in the root directory of this source tree.
*/
/* Copyright 2004-present Facebook. All Rights Reserved.
Index based on product quantiztion.
*/
// -*- c++ -*-
#include "IndexPQ.h"
......@@ -203,13 +201,13 @@ void IndexPQ::search (idx_t n, const float *x, idx_t k,
if (search_type == ST_HE) {
hammings_knn (&res, q_codes, codes.data(),
ntotal, pq.code_size, true);
hammings_knn_hc (&res, q_codes, codes.data(),
ntotal, pq.code_size, true);
} else if (search_type == ST_generalized_HE) {
generalized_hammings_knn (&res, q_codes, codes.data(),
ntotal, pq.code_size, true);
generalized_hammings_knn_hc (&res, q_codes, codes.data(),
ntotal, pq.code_size, true);
}
// convert distances to floats
......@@ -1092,5 +1090,4 @@ void MultiIndexQuantizer2::search(
}
} // END namespace faiss
} // namespace faiss
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
// -*- c++ -*-
#ifndef FAISS_INDEX_PQ_H
......@@ -63,6 +62,8 @@ struct IndexPQ: Index {
void reconstruct(idx_t key, float* recons) const override;
long remove_ids(const IDSelector& sel) override;
/******************************************************
* Polysemous codes implementation
******************************************************/
......@@ -83,11 +84,6 @@ struct IndexPQ: Index {
Search_type_t search_type;
/** remove some ids. NB that Because of the structure of the
* indexing structre, the semantics of this operation are
* different from the usual ones: the new ids are shifted */
long remove_ids(const IDSelector& sel) override;
// just encode the sign of the components, instead of using the PQ encoder
// used only for the queries
bool encode_signs;
......@@ -189,5 +185,4 @@ struct MultiIndexQuantizer2: MultiIndexQuantizer {
} // namespace faiss
#endif
This diff is collapsed.
......@@ -6,6 +6,8 @@
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_INDEX_SCALAR_QUANTIZER_H
#define FAISS_INDEX_SCALAR_QUANTIZER_H
......@@ -34,6 +36,7 @@ struct ScalarQuantizer {
QT_4bit, ///< 4 bits per component
QT_8bit_uniform, ///< same, shared range for all dimensions
QT_4bit_uniform,
QT_fp16,
};
QuantizerType qtype;
......@@ -160,7 +163,9 @@ struct IndexIVFScalarQuantizer: IndexIVF {
const idx_t *assign,
const float *centroid_dis,
float *distances, idx_t *labels,
bool store_pairs) const override;
bool store_pairs,
const IVFSearchParameters *params=nullptr
) const override;
void reconstruct_from_offset (long list_no, long offset,
float* recons) const override;
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved
// -*- c++ -*-
#include "MetaIndexes.h"
......@@ -729,8 +728,4 @@ IndexSplitVectors::~IndexSplitVectors ()
}
}; // namespace faiss
} // namespace faiss
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
// -*- c++ -*-
#ifndef META_INDEXES_H
......@@ -170,8 +169,7 @@ struct IndexSplitVectors: Index {
};
}
} // namespace faiss
#endif
......@@ -6,6 +6,7 @@
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include "OnDiskInvertedLists.h"
......@@ -223,17 +224,7 @@ int OnDiskInvertedLists::OngoingPrefetch::global_cs = 0;
void OnDiskInvertedLists::prefetch_lists (const long *list_nos, int n) const
{
if (use_madvise) {
for (int i = 0; i < n; ++i) {
const auto list_no = list_nos[i];
const auto size = list_size(list_no);
madvise((void *) get_ids(list_no), size * sizeof(Index::idx_t), MADV_WILLNEED);
madvise((void *) get_codes(list_no), size * code_size, MADV_WILLNEED);
}
} else {
pf->prefetch_lists (list_nos, n);
}
pf->prefetch_lists (list_nos, n);
}
......@@ -337,7 +328,6 @@ OnDiskInvertedLists::OnDiskInvertedLists (
totsize (0),
ptr (nullptr),
read_only (false),
use_madvise (false),
locks (new LockLevels ()),
pf (new OngoingPrefetch (this))
{
......@@ -351,7 +341,6 @@ OnDiskInvertedLists::OnDiskInvertedLists ():
totsize (0),
ptr (nullptr),
read_only (false),
use_madvise (false),
locks (new LockLevels ()),
pf (new OngoingPrefetch (this))
{
......@@ -382,11 +371,19 @@ size_t OnDiskInvertedLists::list_size(size_t list_no) const
const uint8_t * OnDiskInvertedLists::get_codes (size_t list_no) const
{
if (lists[list_no].offset == INVALID_OFFSET) {
return nullptr;
}
return ptr + lists[list_no].offset;
}
const Index::idx_t * OnDiskInvertedLists::get_ids (size_t list_no) const
{
if (lists[list_no].offset == INVALID_OFFSET) {
return nullptr;
}
return (const idx_t*)(ptr + lists[list_no].offset +
code_size * lists[list_no].capacity);
}
......@@ -606,8 +603,4 @@ size_t OnDiskInvertedLists::merge_from (const InvertedLists **ils, int n_il)
}
} // namespace faiss
......@@ -6,7 +6,7 @@
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_ON_DISK_INVERTED_LISTS_H
#define FAISS_ON_DISK_INVERTED_LISTS_H
......@@ -73,8 +73,7 @@ struct OnDiskInvertedLists: InvertedLists {
size_t totsize;
uint8_t *ptr; // mmap base pointer
bool read_only; /// are inverted lists mapped read-only
bool use_madvise; // use madvice for prefetching
OnDiskInvertedLists (size_t nlist, size_t code_size,
const char *filename);
......@@ -118,8 +117,6 @@ struct OnDiskInvertedLists: InvertedLists {
};
} // namespace faiss
#endif
......@@ -6,6 +6,8 @@
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include "PolysemousTraining.h"
#include <cstdlib>
......@@ -946,5 +948,4 @@ void PolysemousTraining::optimize_pq_for_hamming (ProductQuantizer &pq,
}
} // namespace faiss
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
// -*- c++ -*-
#ifndef FAISS_POLYSEMOUS_TRAINING_INCLUDED
......@@ -19,8 +18,6 @@
namespace faiss {
/// parameters used for the simulated annealing method
struct SimulatedAnnealingParameters {
......@@ -156,7 +153,6 @@ struct PolysemousTraining: SimulatedAnnealingParameters {
};
} // namespace faiss
......
......@@ -6,9 +6,7 @@
* LICENSE file in the root directory of this source tree.
*/
/* Copyright 2004-present Facebook. All Rights Reserved.
Index based on product quantiztion.
*/
// -*- c++ -*-
#include "ProductQuantizer.h"
......@@ -40,8 +38,6 @@ int sgemm_ (const char *transa, const char *transb, FINTEGER *m, FINTEGER *
namespace faiss {
/* compute an estimator using look-up tables for typical values of M */
template <typename CT, class C>
void pq_estimators_from_tables_Mmul4 (int M, const CT * codes,
......@@ -661,8 +657,4 @@ void ProductQuantizer::search_sdc (const uint8_t * qcodes,
}
} // namespace faiss
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
// -*- c++ -*-
#ifndef FAISS_PRODUCT_QUANTIZER_H
......@@ -171,7 +170,6 @@ struct ProductQuantizer {
};
} // namespace faiss
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved
// -*- c++ -*-
#include "VectorTransform.h"
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
// -*- c++ -*-
#ifndef FAISS_VECTOR_TRANSFORM_H
......@@ -304,9 +303,7 @@ struct IndexPreTransform: Index {
};
} // namespace faiss
#endif
......@@ -552,10 +552,7 @@ def compute_populated_index(preproc):
t0 = time.time()
for i in range(ngpu):
if ngpu == 1:
index_src = faiss.index_gpu_to_cpu(gpu_index)
else:
index_src = faiss.index_gpu_to_cpu(gpu_index.at(i))
index_src = faiss.index_gpu_to_cpu(gpu_index.at(i))
print " index %d size %d" % (i, index_src.ntotal)
index_src.copy_subset_to(indexall, 0, 0, nb)
......
# Copyright (c) 2015-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the BSD+Patents license found in the
# LICENSE file in the root directory of this source tree.
#! /usr/bin/env python2
import numpy as np
import faiss
import time
xd = 100
yd = 1000000
np.random.seed(1234)
faiss.omp_set_num_threads(1)
print 'xd=%d yd=%d' % (xd, yd)
print 'Running inner products test..'
for d in 3, 4, 12, 36, 64:
x = faiss.rand(xd * d).reshape(xd, d)
y = faiss.rand(yd * d).reshape(yd, d)
distances = np.empty((xd, yd), dtype='float32')
t0 = time.time()
for i in xrange(xd):
faiss.fvec_inner_products_ny(faiss.swig_ptr(distances[i]),
faiss.swig_ptr(x[i]),
faiss.swig_ptr(y),
d, yd)
t1 = time.time()
# sparse verification
ntry = 100
num, denom = 0, 0
for t in range(ntry):
xi = np.random.randint(xd)
yi = np.random.randint(yd)
num += abs(distances[xi, yi] - np.dot(x[xi], y[yi]))
denom += abs(distances[xi, yi])
print 'd=%d t=%.3f s diff=%g' % (d, t1 - t0, num / denom)
print 'Running L2sqr test..'
for d in 3, 4, 12, 36, 64:
x = faiss.rand(xd * d).reshape(xd, d)
y = faiss.rand(yd * d).reshape(yd, d)
distances = np.empty((xd, yd), dtype='float32')
t0 = time.time()
for i in xrange(xd):
faiss.fvec_L2sqr_ny(faiss.swig_ptr(distances[i]),
faiss.swig_ptr(x[i]),
faiss.swig_ptr(y),
d, yd)
t1 = time.time()
# sparse verification
ntry = 100
num, denom = 0, 0
for t in range(ntry):
xi = np.random.randint(xd)
yi = np.random.randint(yd)
num += abs(distances[xi, yi] - np.sum((x[xi] - y[yi]) ** 2))
denom += abs(distances[xi, yi])
print 'd=%d t=%.3f s diff=%g' % (d, t1 - t0, num / denom)
README for the link & code implementation
=========================================
......@@ -10,14 +11,12 @@ Link & code is an indexing method that combines HNSW indexing with
compression and exploits the neighborhood structure of the similarity
graph to improve the reconstruction. It is described in
```
@inproceedings{link_and_code,
author = {Matthijs Douze and Alexandre Sablayrolles and Herv\'e J\'egou},
title = {Link and code: Fast indexing with graphs and compact regression codes},
booktitle = {CVPR},
year = {2018}
}
```
ArXiV [here](https://arxiv.org/abs/1804.09996)
......@@ -30,7 +29,7 @@ The test runs with 3 files:
- `datasets.py`: code to load the datasets. The example code runs on the
deep1b and bigann datasets. See the [toplevel README](../README.md)
on how to download them. They should be put in a directory, edit
on how to downlod them. They should be put in a directory, edit
datasets.py to set the path.
- `neighbor_codec.py`: this is where the representation is trained.
......@@ -62,7 +61,7 @@ Set `bdir` to a scratch directory.
Explanation of the flags:
- `--db deep100M`: dataset to process
- `--db deep1M`: dataset to process
- `--M0 6`: number of links on the base level (L6)
......@@ -135,7 +134,7 @@ python bench_link_and_code.py \
--beta_nsq 1 --beta_k 1 \
--beta_centroids $bdir/deep1M_PQ40_M6_nsq0.npy \
--neigh_recons_codes $bdir/deep1M_PQ36_M6_nsq0_codes.npy \
--k_reorder 0,5 --efSearch 16,64,256,1024
--k_reorder 0 --efSearch 16,64,256,1024
```
The arguments are similar to the previous table. Note that nsq = 0 is
......
# Copyright (c) 2015-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the BSD+Patents license found in the
# LICENSE file in the root directory of this source tree.
#!/usr/bin/env python2
import os
......
# Copyright (c) 2015-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the BSD+Patents license found in the
# LICENSE file in the root directory of this source tree.
#! /usr/bin/env python2
"""
......
# Copyright (c) 2015-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the BSD+Patents license found in the
# LICENSE file in the root directory of this source tree.
#! /usr/bin/env python2
"""
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved
#include <cmath>
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved
#include <cmath>
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved
#include <cmath>
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved
#include <cmath>
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#include "GpuAutoTune.h"
#include <typeinfo>
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#pragma once
#include "../Index.h"
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#include "GpuClonerOptions.h"
namespace faiss { namespace gpu {
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#pragma once
#include "GpuIndicesOptions.h"
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#ifndef GPU_FAISS_ASSERT_INCLUDED
#define GPU_FAISS_ASSERT_INCLUDED
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#include "GpuIndex.h"
#include "../FaissAssert.h"
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#pragma once
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#include "GpuIndexFlat.h"
#include "../IndexFlat.h"
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#pragma once
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#include "GpuIndexIVF.h"
#include "../FaissAssert.h"
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#pragma once
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#include "GpuIndexIVFFlat.h"
#include "../IndexFlat.h"
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#pragma once
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#include "GpuIndexIVFPQ.h"
#include "../IndexFlat.h"
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#pragma once
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#pragma once
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#include "GpuResources.h"
#include "utils/DeviceUtils.h"
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#pragma once
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#include "IndexProxy.h"
#include "../FaissAssert.h"
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#pragma once
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#include "StandardGpuResources.h"
#include "../FaissAssert.h"
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#pragma once
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#include <algorithm>
#include "../../FaissAssert.h"
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#pragma once
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#include "Distance.cuh"
#include "BroadcastSum.cuh"
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#pragma once
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#include "FlatIndex.cuh"
#include "Distance.cuh"
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#pragma once
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#include "IVFBase.cuh"
#include "../GpuResources.h"
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#pragma once
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#include "IVFFlat.cuh"
#include "../GpuResources.h"
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#pragma once
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#include "IVFFlatScan.cuh"
#include "../GpuResources.h"
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#pragma once
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#include "IVFPQ.cuh"
#include "../GpuResources.h"
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#pragma once
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#include "IVFUtils.cuh"
#include "../utils/DeviceUtils.h"
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#pragma once
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#include "IVFUtils.cuh"
#include "../utils/DeviceUtils.h"
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#include "IVFUtils.cuh"
#include "../utils/DeviceUtils.h"
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#include "InvertedListAppend.cuh"
#include "../../FaissAssert.h"
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#pragma once
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#include "L2Norm.cuh"
#include "../../FaissAssert.h"
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#pragma once
......
......@@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#include "L2Select.cuh"
#include "../../FaissAssert.h"
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment