Commit 9933892e authored by matthijs's avatar matthijs

sync with FB version 2017-01-09

- adding HNSW indexing method

- simultaneous search and reconstruction for IndexIVFPQ
parent 5b45b055
......@@ -23,6 +23,7 @@
#include "IndexIVFPQ.h"
#include "MetaIndexes.h"
#include "IndexScalarQuantizer.h"
#include "IndexHNSW.h"
namespace faiss {
......@@ -321,6 +322,11 @@ static void init_pq_ParameterRange (const ProductQuantizer & pq,
ParameterRange &ParameterSpace::add_range(const char * name)
{
for (auto & pr : parameter_ranges) {
if (pr.name == name) {
return pr;
}
}
parameter_ranges.push_back (ParameterRange ());
parameter_ranges.back ().name = name;
return parameter_ranges.back ();
......@@ -353,6 +359,12 @@ void ParameterSpace::initialize (const Index * index)
pr.values.push_back (nprobe);
}
}
if (dynamic_cast<const IndexHNSW*>(ix->quantizer)) {
ParameterRange & pr = add_range("efSearch");
for (int i = 2; i <= 9; i++) {
pr.values.push_back (1 << i);
}
}
}
if (DC (IndexPQ)) {
ParameterRange & pr = add_range("ht");
......@@ -361,7 +373,9 @@ void ParameterSpace::initialize (const Index * index)
if (DC (IndexIVFPQ)) {
ParameterRange & pr = add_range("ht");
init_pq_ParameterRange (ix->pq, pr);
}
if (DC (IndexIVF)) {
const MultiIndexQuantizer *miq =
dynamic_cast<const MultiIndexQuantizer *> (ix->quantizer);
if (miq) {
......@@ -378,6 +392,12 @@ void ParameterSpace::initialize (const Index * index)
pr.values.push_back (1 << i);
}
}
if (dynamic_cast<const IndexHNSW*>(index)) {
ParameterRange & pr = add_range("efSearch");
for (int i = 2; i <= 9; i++) {
pr.values.push_back (1 << i);
}
}
}
#undef DC
......@@ -489,7 +509,7 @@ void ParameterSpace::set_index_parameter (
}
}
if (name == "max_codes") {
if (DC (IndexIVFPQ)) {
if (DC (IndexIVF)) {
ix->max_codes = finite(val) ? size_t(val) : 0;
return;
}
......@@ -683,7 +703,7 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
for (char *tok = strtok_r (description, " ,", &ptr);
tok;
tok = strtok_r (nullptr, " ,", &ptr)) {
int d_out, opq_M, nbit, M, M2;
int d_out, opq_M, nbit, M, M2, pq_m, ncent;
std::string stok(tok);
// to avoid mem leaks with exceptions:
......@@ -793,7 +813,6 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
index_pq->do_polysemous_training = do_polysemous_training;
index_1 = index_pq;
}
} else if (stok == "RFlat") {
make_IndexRefineFlat = true;
} else {
......@@ -841,7 +860,7 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
index_pt->own_fields = true;
// add from back
while (vts.chain.size() > 0) {
index_pt->prepend_transform (vts.chain.back());
index_pt->prepend_transform (vts.chain.back ());
vts.chain.pop_back ();
}
index = index_pt;
......
......@@ -162,7 +162,7 @@ struct ParameterSpace {
/// print a description on stdout
void display () const;
/// add a new parameter
/// add a new parameter (or return it if it exists)
ParameterRange &add_range(const char * name);
/// initialize with reasonable parameters for the index
......
......@@ -65,8 +65,9 @@ static double imbalance_factor (int n, int k, long *assign) {
void Clustering::train (idx_t nx, const float *x_in, Index & index) {
FAISS_THROW_IF_NOT_MSG (nx >= k,
"need at least as many training points as clusters");
FAISS_THROW_IF_NOT_FMT (nx >= k,
"Number of training points (%ld) should be at least "
"as large as number of clusters (%ld)", nx, k);
double t0 = getmillisecs();
......@@ -100,12 +101,26 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) {
}
if (nx == k) {
if (verbose) {
printf("Number of training points (%ld) same as number of "
"clusters, just copying\n", nx);
}
// this is a corner case, just copy training set to clusters
centroids.resize (d * k);
memcpy (centroids.data(), x_in, sizeof (*x_in) * d * k);
return;
}
if (verbose)
printf("Clustering %d points in %ldD to %ld clusters, "
"redo %d times, %d iterations\n",
int(nx), d, k, nredo, niter);
idx_t * assign = new idx_t[nx];
ScopeDeleter<idx_t> del (assign);
float * dis = new float[nx];
......
......@@ -179,7 +179,7 @@ void maxheap_push (size_t k, T * bh_val, long * bh_ids, T val, long ids)
* Heap initialization
*******************************************************************/
/* Initialization phase for the heap (with inconditionnal pushes).
/* Initialization phase for the heap (with unconditionnal pushes).
* Store k0 elements in a heap containing up to k values. Note that
* (bh_val, bh_ids) can be the same as (x, ids) */
template <class C> inline
......
......@@ -11,8 +11,19 @@
#include "IndexFlat.h"
#include "FaissAssert.h"
#include <cstring>
namespace faiss {
Index::~Index ()
{
}
void Index::train(idx_t /*n*/, const float* /*x*/) {
// does nothing by default
}
void Index::range_search (idx_t , const float *, float,
RangeSearchResult *) const
......@@ -52,6 +63,25 @@ void Index::reconstruct_n (idx_t i0, idx_t ni, float *recons) const {
}
void Index::search_and_reconstruct (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels,
float *recons) const {
search (n, x, k, distances, labels);
for (idx_t i = 0; i < n; ++i) {
for (idx_t j = 0; j < k; ++j) {
idx_t ij = i * k + j;
idx_t key = labels[ij];
float* reconstructed = recons + ij * d;
if (key < 0) {
// Fill with NaNs
memset(reconstructed, -1, sizeof(*reconstructed) * d);
} else {
reconstruct (key, reconstructed);
}
}
}
}
void Index::compute_residual (const float * x,
float * residual, idx_t key) const {
......
......@@ -71,14 +71,14 @@ struct Index {
/// type of metric this index uses for search
MetricType metric_type;
explicit Index (idx_t d = 0, MetricType metric = METRIC_INNER_PRODUCT):
explicit Index (idx_t d = 0, MetricType metric = METRIC_L2):
d(d),
ntotal(0),
verbose(false),
is_trained(true),
metric_type (metric) {}
virtual ~Index () { }
virtual ~Index ();
/** Perform training on a representative set of vectors
......@@ -86,9 +86,7 @@ struct Index {
* @param n nb of training vectors
* @param x training vecors, size n * d
*/
virtual void train(idx_t /*n*/, const float* /*x*/) {
// does nothing by default
}
virtual void train(idx_t n, const float* x);
/** Add n vectors of dimension d to the index.
*
......@@ -164,6 +162,17 @@ struct Index {
*/
virtual void reconstruct_n (idx_t i0, idx_t ni, float *recons) const;
/** Similar to search, but also reconstructs the stored vectors (or an
* approximation in the case of lossy coding) for the search results.
*
* If there are not enough results for a query, the resulting arrays
* is padded with -1s.
*
* @param recons reconstructed vectors size (n, k, d)
**/
virtual void search_and_reconstruct (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels,
float *recons) const;
/** Computes a residual vector after indexing encoding.
*
......
This diff is collapsed.
/**
* Copyright (c) 2015-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD+Patents license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <vector>
#include <omp.h>
#include "IndexFlat.h"
#include "IndexPQ.h"
#include "IndexScalarQuantizer.h"
#include "utils.h"
namespace faiss {
/** Implementation of the Hierarchical Navigable Small World
* datastructure.
*
* Efficient and robust approximate nearest neighbor search using
* Hierarchical Navigable Small World graphs
*
* Yu. A. Malkov, D. A. Yashunin, arXiv 2017
*
* This implmentation is heavily influenced by the NMSlib
* implementation by Yury Malkov and Leonid Boystov
* (https://github.com/searchivarius/nmslib)
*
* The HNSW object stores only the neighbor link structure, see
* IndexHNSW below for the full index object.
*/
struct VisitedTable;
struct HNSW {
/// internal storage of vectors (32 bits: this is expensive)
typedef int storage_idx_t;
/// Faiss results are 64-bit
typedef faiss::Index::idx_t idx_t;
/** The HNSW structure does not store vectors, it only accesses
* them through this class.
*
* Functions are guaranteed to be be accessed only from 1 thread. */
struct DistanceComputer {
idx_t d;
/// called before computing distances
virtual void set_query (const float *x) = 0;
/// compute distance of vector i to current query
virtual float operator () (storage_idx_t i) = 0;
/// compute distance between two stored vectors
virtual float symmetric_dis(storage_idx_t i, storage_idx_t j) = 0;
virtual ~DistanceComputer () {}
};
/// assignment probability to each layer (sum=1)
std::vector<double> assign_probas;
/// number of neighbors stored per layer (cumulative), should not
/// be changed after first add
std::vector<int> cum_nneighbor_per_level;
/// level of each vector (base level = 1), size = ntotal
std::vector<int> levels;
/// offsets[i] is the offset in the neighbors array where vector i is stored
/// size ntotal + 1
std::vector<size_t> offsets;
/// neighbors[offsets[i]:offsets[i+1]] is the list of neighbors of vector i
/// for all levels. this is where all storage goes.
std::vector<storage_idx_t> neighbors;
/// entry point in the search structure (one of the points with maximum level
storage_idx_t entry_point;
faiss::RandomGenerator rng;
/// maximum level
int max_level;
/// expansion factor at construction time
int efConstruction;
/// expansion factor at search time
int efSearch;
/// during search: do we check whether the next best distance is good enough?
bool check_relative_distance;
/// number of entry points in levels > 0.
int upper_beam;
// methods that initialize the tree sizes
/// initialize the assign_probas and cum_nneighbor_per_level to
/// have 2*M links on level 0 and M links on levels > 0
void set_default_probas(int M, float levelMult);
/// set nb of neighbors for this level (before adding anything)
void set_nb_neighbors(int level_no, int n);
// methods that access the tree sizes
/// nb of neighbors for this level
int nb_neighbors(int layer_no) const;
/// cumumlative nb up to (and excluding) this level
int cum_nb_neighbors(int layer_no) const;
/// range of entries in the neighbors table of vertex no at layer_no
void neighbor_range(idx_t no, int layer_no,
size_t * begin, size_t * end) const;
/// only mandatory parameter: nb of neighbors
explicit HNSW(int M = 32);
/// pick a random level for a new point
int random_level();
/// add n random levels to table (for debugging...)
void fill_with_random_links(size_t n);
/** add point pt_id on all levels <= pt_level and build the link
* structure for them. */
void add_with_locks(DistanceComputer & ptdis, int pt_level, int pt_id,
std::vector<omp_lock_t> & locks,
VisitedTable &vt);
/// search interface
void search(DistanceComputer & qdis, int k,
idx_t *I, float * D,
VisitedTable &vt) const;
void reset();
void clear_neighbor_tables(int level);
void print_neighbor_stats(int level) const;
};
struct HNSWStats {
size_t n1, n2, n3;
size_t ndis;
size_t nreorder;
bool view;
HNSWStats () {reset (); }
void reset ();
};
// global var that collects them all
extern HNSWStats hnsw_stats;
class IndexHNSW;
struct ReconstructFromNeighbors {
typedef Index::idx_t idx_t;
typedef HNSW::storage_idx_t storage_idx_t;
const IndexHNSW & index;
size_t M; // number of neighbors
size_t k; // number of codebook entries
size_t nsq; // number of subvectors
size_t code_size;
int k_reorder; // nb to reorder. -1 = all
std::vector<float> codebook; // size nsq * k * (M + 1)
std::vector<uint8_t> codes; // size ntotal * code_size
size_t ntotal;
size_t d, dsub; // derived values
ReconstructFromNeighbors(const IndexHNSW & index,
size_t k=256, size_t nsq=1);
/// codes must be added in the correct order and the IndexHNSW
/// must be populated and sorted
void add_codes(size_t n, const float *x);
size_t compute_distances(size_t n, const idx_t *shortlist,
const float *query, float *distances) const;
/// called by add_codes
void estimate_code(const float *x, storage_idx_t i, uint8_t *code) const;
/// called by compute_distances
void reconstruct(storage_idx_t i, float *x, float *tmp) const;
void reconstruct_n(storage_idx_t n0, storage_idx_t ni, float *x) const;
/// get the M+1 -by-d table for neighbor coordinates for vector i
void get_neighbor_table(storage_idx_t i, float *out) const;
};
/** The HNSW index is a normal random-access index with a HNSW
* link structure built on top */
struct IndexHNSW: Index {
typedef HNSW::storage_idx_t storage_idx_t;
// the link strcuture
HNSW hnsw;
// the sequential storage
bool own_fields;
Index * storage;
ReconstructFromNeighbors *reconstruct_from_neighbors;
explicit IndexHNSW (int d = 0, int M = 32);
explicit IndexHNSW (Index * storage, int M = 32);
~IndexHNSW() override;
// get a DistanceComputer object for this kind of storage
virtual HNSW::DistanceComputer * get_distance_computer() const = 0;
void add(idx_t n, const float *x) override;
/// Trains the storage if needed
void train(idx_t n, const float* x) override;
/// entry point for search
void search (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels) const override;
void reconstruct(idx_t key, float* recons) const override;
void reset () override;
void shrink_level_0_neighbors(int size);
/** Perform search only on level 0, given the starting points for
* each vertex.
*
* @param search_type 1:perform one search per nprobe, 2: enqueue
* all entry points
*/
void search_level_0(idx_t n, const float *x, idx_t k,
const storage_idx_t *nearest, const float *nearest_d,
float *distances, idx_t *labels, int nprobe = 1,
int search_type = 1) const;
/// alternative graph building
void init_level_0_from_knngraph(
int k, const float *D, const idx_t *I);
/// alternative graph building
void init_level_0_from_entry_points(
int npt, const storage_idx_t *points,
const storage_idx_t *nearests);
// reorder links from nearest to farthest
void reorder_links();
void link_singletons();
};
/** Flat index topped with with a HNSW structure to access elements
* more efficiently.
*/
struct IndexHNSWFlat: IndexHNSW {
IndexHNSWFlat();
IndexHNSWFlat(int d, int M);
HNSW::DistanceComputer * get_distance_computer() const override;
};
/** PQ index topped with with a HNSW structure to access elements
* more efficiently.
*/
struct IndexHNSWPQ: IndexHNSW {
IndexHNSWPQ();
IndexHNSWPQ(int d, int pq_m, int M);
void train(idx_t n, const float* x) override;
HNSW::DistanceComputer * get_distance_computer() const override;
};
/** SQ index topped with with a HNSW structure to access elements
* more efficiently.
*/
struct IndexHNSWSQ: IndexHNSW {
IndexHNSWSQ();
IndexHNSWSQ(int d, ScalarQuantizer::QuantizerType qtype, int M);
HNSW::DistanceComputer * get_distance_computer() const override;
};
/** 2-level code structure with fast random access
*/
struct IndexHNSW2Level: IndexHNSW {
IndexHNSW2Level();
IndexHNSW2Level(Index *quantizer, size_t nlist, int m_pq, int M);
HNSW::DistanceComputer * get_distance_computer() const override;
void flip_to_ivf();
/// entry point for search
void search (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels) const override;
};
};
This diff is collapsed.
......@@ -24,6 +24,38 @@
namespace faiss {
/** Encapsulates a quantizer object for the IndexIVF
*
* The class isolates the fields that are independent of the storage
* of the lists (especially training)
*/
struct Level1Quantizer {
Index * quantizer; ///< quantizer that maps vectors to inverted lists
size_t nlist; ///< number of possible key values
/**
* = 0: use the quantizer as index in a kmeans training
* = 1: just pass on the training set to the train() of the quantizer
* = 2: kmeans training on a flat index + add the centroids to the quantizer
*/
char quantizer_trains_alone;
bool own_fields; ///< whether object owns the quantizer
ClusteringParameters cp; ///< to override default clustering params
Index *clustering_index; ///< to override index used during clustering
/// Trains the quantizer and calls train_residual to train sub-quantizers
void train_q1 (size_t n, const float *x, bool verbose,
MetricType metric_type);
Level1Quantizer (Index * quantizer, size_t nlist);
Level1Quantizer ();
~Level1Quantizer ();
};
/** Index based on a inverted file (IVF)
*
......@@ -42,22 +74,9 @@ namespace faiss {
* Sub-classes implement a post-filtering of the index that refines
* the distance estimation from the query to databse vectors.
*/
struct IndexIVF: Index {
size_t nlist; ///< number of possible key values
struct IndexIVF: Index, Level1Quantizer {
size_t nprobe; ///< number of probes at query time
Index * quantizer; ///< quantizer that maps vectors to inverted lists
/**
* = 0: use the quantizer as index in a kmeans training
* = 1: just pass on the training set to the train() of the quantizer
* = 2: kmeans training on a flat index + add the centroids to the quantizer
*/
char quantizer_trains_alone;
bool own_fields; ///< whether object owns the quantizer
ClusteringParameters cp; ///< to override default clustering params
Index *clustering_index; ///< to override index used during clustering
size_t max_codes; ///< max nb of codes to visit to do a query
std::vector < std::vector<long> > ids; ///< Inverted lists for indexes
......@@ -74,7 +93,7 @@ struct IndexIVF: Index {
* be deleted while the IndexIVF is in use.
*/
IndexIVF (Index * quantizer, size_t d, size_t nlist,
MetricType metric = METRIC_INNER_PRODUCT);
MetricType metric = METRIC_L2);
void reset() override;
......@@ -115,6 +134,42 @@ struct IndexIVF: Index {
virtual void search (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels) const override;
void reconstruct (idx_t key, float* recons) const override;
/** Reconstruct a subset of the indexed vectors.
*
* Overrides default implementation to bypass reconstruct() which requires
* direct_map to be maintained.
*
* @param i0 first vector to reconstruct
* @param ni nb of vectors to reconstruct
* @param recons output array of reconstructed vectors, size ni * d
*/
void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
/** Similar to search, but also reconstructs the stored vectors (or an
* approximation in the case of lossy coding) for the search results.
*
* Overrides default implementation to avoid having to maintain direct_map
* and instead fetch the code offsets through the `store_pairs` flag in
* search_preassigned().
*
* @param recons reconstructed vectors size (n, k, d)
*/
void search_and_reconstruct (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels,
float *recons) const override;
/** Reconstruct a vector given the location in terms of (inv list index +
* inv list offset) instead of the id.
*
* Useful for reconstructing when the direct_map is not maintained and
* the inv list offset is computed by search_preassigned() with
* `store_pairs` set.
*/
virtual void reconstruct_from_offset (long list_no, long offset,
float* recons) const;
/// Dataset manipulation functions
......@@ -157,18 +212,17 @@ struct IndexIVF: Index {
};
struct IndexIVFFlatStats {
struct IndexIVFStats {
size_t nq; // nb of queries run
size_t nlist; // nb of inverted lists scanned
size_t ndis; // nb of distancs computed
size_t npartial; // nb of bound computations (IndexIVFFlatIPBounds)
IndexIVFFlatStats () {reset (); }
IndexIVFStats () {reset (); }
void reset ();
};
// global var that collects them all
extern IndexIVFFlatStats indexIVFFlat_stats;
extern IndexIVFStats indexIVF_stats;
......@@ -182,7 +236,7 @@ struct IndexIVFFlat: IndexIVF {
IndexIVFFlat (
Index * quantizer, size_t d, size_t nlist_,
MetricType = METRIC_INNER_PRODUCT);
MetricType = METRIC_L2);
/// same as add_with_ids, with precomputed coarse quantizer
virtual void add_core (idx_t n, const float * x, const long *xids,
......@@ -213,7 +267,8 @@ struct IndexIVFFlat: IndexIVF {
*/
void update_vectors (int nv, idx_t *idx, const float *v);
void reconstruct(idx_t key, float* recons) const override;
void reconstruct_from_offset (long list_no, long offset,
float* recons) const override;
IndexIVFFlat () {}
};
......
This diff is collapsed.
......@@ -36,7 +36,6 @@ struct IndexIVFPQ: IndexIVF {
// search-time parameters
size_t scan_table_threshold; ///< use table computation or on-the-fly?
size_t max_codes; ///< max nb of codes to visit to do a query
int polysemous_ht; ///< Hamming thresh for polysemous filtering
......@@ -64,16 +63,8 @@ struct IndexIVFPQ: IndexIVF {
/// same as train_residual, also output 2nd level residuals
void train_residual_o (idx_t n, const float *x, float *residuals_2);
/** Reconstruct a subset of the indexed vectors
*
* @param i0 first vector to reconstruct
* @param ni nb of vectors to reconstruct
* @param recons output array of reconstructed vectors, size ni * d
*/
void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
void reconstruct(idx_t key, float* recons) const override;
void reconstruct_from_offset (long list_no, long offset,
float* recons) const override;
/** Find exact duplicates in the dataset.
*
......@@ -114,16 +105,6 @@ struct IndexIVFPQ: IndexIVF {
float *distances, idx_t *labels,
bool store_pairs) const override;
/** Same as the search function, but also reconstruct approximate
* vectors for the search results
*
* @param reconstructed size (n, k, d)
**/
void search_and_reconstruct (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels,
float *reconstructed);
/// build precomputed table
void precompute_table ();
......@@ -190,17 +171,17 @@ struct IndexIVFPQR: IndexIVFPQ {
void add_core (idx_t n, const float *x, const long *xids,
const long *precomputed_idx = nullptr);
void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
void reconstruct_from_offset (long list_no, long offset,
float* recons) const override;
void merge_from (IndexIVF &other, idx_t add_id) override;
void search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const override;
void search_preassigned (idx_t n, const float *x, idx_t k,
const idx_t *assign,
const float *centroid_dis,
float *distances, idx_t *labels,
bool store_pairs) const override;
IndexIVFPQR();
};
......@@ -250,6 +231,60 @@ struct IndexIVFPQCompact: IndexIVFPQ {
};
/** Same as an IndexIVFPQ without the inverted lists: codes are stored sequentially
*
* The class is mainly inteded to store encoded vectors that can be
* accessed randomly, the search function is not implemented.
*/
struct Index2Layer: Index {
/// first level quantizer
Level1Quantizer q1;
/// second level quantizer is always a PQ
ProductQuantizer pq;
/// Codes. Size ntotal * code_size.
std::vector<uint8_t> codes;
/// size of the code for the first level (ceil(log8(q1.nlist)))
size_t code_size_1;
/// size of the code for the second level
size_t code_size_2;
/// code_size_1 + code_size_2
size_t code_size;
Index2Layer (Index * quantizer, size_t nlist,
int M, MetricType metric = METRIC_L2);
Index2Layer ();
~Index2Layer ();
void train(idx_t n, const float* x) override;
void add(idx_t n, const float* x) override;
/// not implemented
void search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const override;
void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
void reconstruct(idx_t key, float* recons) const override;
void reset() override;
/// transfer the flat codes to an IVFPQ index
void transfer_to_IVFPQ(IndexIVFPQ & other) const;
};
} // namespace faiss
......
......@@ -803,6 +803,7 @@ MultiIndexQuantizer::MultiIndexQuantizer (int d,
void MultiIndexQuantizer::train(idx_t n, const float *x)
{
pq.verbose = verbose;
pq.train (n, x);
is_trained = true;
// count virtual elements in index
......
This diff is collapsed.
......@@ -74,7 +74,24 @@ struct ScalarQuantizer {
size_t n) const ;
/// decode a vector from a given code (or n vectors if third argument)
void decode (const uint8_t *code, float *x, size_t n) const;
void decode (const uint8_t *code, float *x, size_t n) const;
// fast, non thread-safe way of computing vector-to-code and
// code-to-code distances.
struct DistanceComputer {
/// vector-to-code distance computation
virtual float compute_distance (const float *x,
const uint8_t *code) = 0;
/// code-to-code distance computation
virtual float compute_code_distance (const uint8_t *code1,
const uint8_t *code2) = 0;
virtual ~DistanceComputer () {}
};
DistanceComputer *get_distance_computer (MetricType metric = METRIC_L2)
const;
};
......@@ -126,7 +143,7 @@ struct IndexScalarQuantizer: Index {
* distances are computed.
*/
struct IndexIVFScalarQuantizer:IndexIVF {
struct IndexIVFScalarQuantizer: IndexIVF {
ScalarQuantizer sq;
IndexIVFScalarQuantizer(Index *quantizer, size_t d, size_t nlist,
......@@ -145,6 +162,9 @@ struct IndexIVFScalarQuantizer:IndexIVF {
float *distances, idx_t *labels,
bool store_pairs) const override;
void reconstruct_from_offset (long list_no, long offset,
float* recons) const override;
};
......
......@@ -29,7 +29,7 @@ LIBOBJ=hamming.o utils.o \
Clustering.o Heap.o VectorTransform.o index_io.o \
PolysemousTraining.o MetaIndexes.o Index.o \
ProductQuantizer.o AutoTune.o AuxIndexStructures.o \
IndexScalarQuantizer.o FaissException.o
IndexScalarQuantizer.o FaissException.o IndexHNSW.o
$(LIBNAME).a: $(LIBOBJ)
......@@ -44,6 +44,7 @@ $(LIBNAME).$(SHAREDEXT): $(LIBOBJ)
utils.o: EXTRAFLAGS=$(BLASCFLAGS)
VectorTransform.o: EXTRAFLAGS=$(BLASCFLAGS)
ProductQuantizer.o: EXTRAFLAGS=$(BLASCFLAGS)
IndexHNSW.o: EXTRAFLAGS=$(BLASCFLAGS)
# for MKL, the flags when generating a dynamic lib are different from
# the ones when making an executable, but by default they are the same
......@@ -121,7 +122,7 @@ VectorTransform.o: VectorTransform.cpp VectorTransform.h Index.h utils.h \
index_io.o: index_io.cpp index_io.h FaissAssert.h FaissException.h \
IndexFlat.h Index.h VectorTransform.h IndexLSH.h IndexPQ.h \
ProductQuantizer.h Clustering.h Heap.h PolysemousTraining.h IndexIVF.h \
IndexIVFPQ.h MetaIndexes.h IndexScalarQuantizer.h
IndexIVFPQ.h MetaIndexes.h IndexScalarQuantizer.h IndexHNSW.h utils.h
PolysemousTraining.o: PolysemousTraining.cpp PolysemousTraining.h \
ProductQuantizer.h Clustering.h Index.h Heap.h utils.h hamming.h \
FaissAssert.h FaissException.h
......@@ -134,12 +135,16 @@ ProductQuantizer.o: ProductQuantizer.cpp ProductQuantizer.h Clustering.h \
AutoTune.o: AutoTune.cpp AutoTune.h Index.h FaissAssert.h \
FaissException.h utils.h Heap.h IndexFlat.h VectorTransform.h IndexLSH.h \
IndexPQ.h ProductQuantizer.h Clustering.h PolysemousTraining.h \
IndexIVF.h IndexIVFPQ.h MetaIndexes.h IndexScalarQuantizer.h
IndexIVF.h IndexIVFPQ.h MetaIndexes.h IndexScalarQuantizer.h IndexHNSW.h
AuxIndexStructures.o: AuxIndexStructures.cpp AuxIndexStructures.h Index.h
IndexScalarQuantizer.o: IndexScalarQuantizer.cpp IndexScalarQuantizer.h \
IndexIVF.h Index.h Clustering.h Heap.h utils.h FaissAssert.h \
FaissException.h
FaissException.o: FaissException.cpp FaissException.h
IndexHNSW.o: IndexHNSW.cpp IndexHNSW.h IndexFlat.h Index.h IndexPQ.h \
ProductQuantizer.h Clustering.h Heap.h PolysemousTraining.h \
IndexScalarQuantizer.h IndexIVF.h utils.h FaissAssert.h FaissException.h \
IndexIVFPQ.h
clean:
......
......@@ -25,7 +25,7 @@ namespace faiss {
struct SimulatedAnnealingParameters {
// optimization parameters
double init_temperature; // init probability of accepting a bad swap
double init_temperature; // init probaility of accepting a bad swap
double temperature_decay; // at each iteration the temp is multiplied by this
int n_iter; // nb of iterations
int n_redo; // nb of runs of the simulation
......
......@@ -355,7 +355,7 @@ void ProductQuantizer::decode (const uint8_t *code, float *x) const
void ProductQuantizer::decode (const uint8_t *code, float *x, size_t n) const
{
for (size_t i = 0; i < n; i++) {
this->decode (code + M * i, x + d * i);
this->decode (code + code_size * i, x + d * i);
}
}
......
This diff is collapsed.
......@@ -37,7 +37,7 @@ struct VectorTransform {
{}
/// set if the LinearTransform does not require training, or if
/// set if the VectorTransform does not require training, or if
/// training is done already
bool is_trained;
......@@ -78,6 +78,9 @@ struct LinearTransform: VectorTransform {
bool have_bias; ///! whether to use the bias term
/// check if matrix A is orthonormal (enables reverse_transform)
bool is_orthonormal;
/// Transformation matrix, size d_out * d_in
std::vector<float> A;
......@@ -96,6 +99,13 @@ struct LinearTransform: VectorTransform {
void transform_transpose (idx_t n, const float * y,
float *x) const;
/// works only if is_orthonormal
void reverse_transform (idx_t n, const float * xt,
float *x) const override;
/// compute A^T * A to set the is_orthonormal flag
void set_is_orthonormal ();
bool verbose;
~LinearTransform() override {}
......@@ -113,8 +123,6 @@ struct RandomRotationMatrix: LinearTransform {
/// must be called before the transform is used
void init(int seed);
void reverse_transform(idx_t n, const float* xt, float* x) const override;
RandomRotationMatrix () {}
};
......@@ -157,8 +165,6 @@ struct PCAMatrix: LinearTransform {
/// will be completed with 0s
void train(Index::idx_t n, const float* x) override;
void reverse_transform(idx_t n, const float* xt, float* x) const override;
/// copy pre-trained PCA matrix
void copy_from (const PCAMatrix & other);
......@@ -192,8 +198,6 @@ struct OPQMatrix: LinearTransform {
explicit OPQMatrix (int d = 0, int M = 1, int d2 = -1);
void train(Index::idx_t n, const float* x) override;
void reverse_transform(idx_t n, const float* xt, float* x) const override;
};
......@@ -230,6 +234,9 @@ struct NormalizationTransform: VectorTransform {
NormalizationTransform ();
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
/// Identity transform since norm is not revertible
void reverse_transform(idx_t n, const float* xt, float* x) const override;
};
......@@ -271,13 +278,23 @@ struct IndexPreTransform: Index {
float* distances,
idx_t* labels) const override;
void reconstruct (idx_t key, float * recons) const override;
void reconstruct_n (idx_t i0, idx_t ni, float *recons)
const override;
void search_and_reconstruct (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels,
float *recons) const override;
/// apply the transforms in the chain. The returned float * may be
/// equal to x, otherwise it should be deallocated.
const float * apply_chain (idx_t n, const float *x) const;
/// Reverse the transforms in the chain. May not be implemented for
/// all transforms in the chain or may return approximate results.
void reverse_chain (idx_t n, const float* xt, float* x) const;
~IndexPreTransform() override;
};
......
This diff is collapsed.
......@@ -427,7 +427,7 @@ def replacement_map_add(self, keys, vals):
def replacement_map_search_multiple(self, keys):
n, = keys.shape
vals = np.empty(n, dtype='uint64')
vals = np.empty(n, dtype='int64')
self.search_multiple_c(n, swig_ptr(keys), swig_ptr(vals))
return vals
......
......@@ -150,6 +150,8 @@ dep:
../VectorTransform.h ../MetaIndexes.h GpuIndexFlat.h GpuIndexIVFFlat.h \
GpuIndexIVF.h ../Clustering.h GpuIndexIVFPQ.h IndexProxy.h \
utils/WorkerThread.h
./GpuClonerOptions.o: GpuClonerOptions.cpp GpuClonerOptions.h \
GpuIndicesOptions.h
impl/RemapIndices.o: impl/RemapIndices.cpp impl/RemapIndices.h \
impl/../../FaissAssert.h impl/../../FaissException.h
utils/DeviceMemory.o: utils/DeviceMemory.cpp utils/DeviceMemory.h \
......
......@@ -111,4 +111,4 @@ class EvalIVFPQAccuracy(unittest.TestCase):
index = faiss.index_factory(12, "PCAR8,IVF10,PQ4")
res = faiss.StandardGpuResources()
gpu_index = faiss.index_cpu_to_gpu(res, 0, index)
faiss.GpuParameterSpace().set_index_parameter(index, "nprobe", 3)
faiss.GpuParameterSpace().set_index_parameter(gpu_index, "nprobe", 3)
This diff is collapsed.
This diff is collapsed.
......@@ -9,7 +9,6 @@
import os
import time
import numpy as np
import pdb
try:
import matplotlib
......
This diff is collapsed.
......@@ -6,7 +6,6 @@
#! /usr/bin/env python2
import numpy as np
import unittest
import faiss
......
This diff is collapsed.
This diff is collapsed.
......@@ -195,6 +195,8 @@ void fvec_L2sqr_by_idx (
* KNN functions
***************************************************************************/
// threshold on nx above which we switch to BLAS to compute distances
extern int distance_compute_blas_threshold;
/** Return the k nearest neighors of each of the nx vectors x among the ny
* vector y, w.r.t to max inner product
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment