sync with FB version 2017-01-09

- adding HNSW indexing method - simultaneous search and reconstruction for IndexIVFPQ

sync with FB version 2017-01-09
- adding HNSW indexing method - simultaneous search and reconstruction for IndexIVFPQ
9933892e · matthijs · 5b45b055 · 9933892e · 9933892e · 9933892e
Commit 9933892e authored Jan 09, 2018 by matthijs
32 changed files
--- a/AutoTune.cpp
+++ b/AutoTune.cpp
@@ -23,6 +23,7 @@
 #include "IndexIVFPQ.h"
 #include "MetaIndexes.h"
 #include "IndexScalarQuantizer.h"
+#include "IndexHNSW.h"


 namespace faiss {
@@ -321,6 +322,11 @@ static void init_pq_ParameterRange (const ProductQuantizer & pq,

 ParameterRange &ParameterSpace::add_range(const char * name)
 {
+    for (auto & pr : parameter_ranges) {
+        if (pr.name == name) {
+            return pr;
+        }
+    }
    parameter_ranges.push_back (ParameterRange ());
    parameter_ranges.back ().name = name;
    return parameter_ranges.back ();
@@ -353,6 +359,12 @@ void ParameterSpace::initialize (const Index * index)
                pr.values.push_back (nprobe);
            }
        }
+        if (dynamic_cast<const IndexHNSW*>(ix->quantizer)) {
+            ParameterRange & pr = add_range("efSearch");
+            for (int i = 2; i <= 9; i++) {
+                pr.values.push_back (1 << i);
+            }
+        }
    }
    if (DC (IndexPQ)) {
        ParameterRange & pr = add_range("ht");
@@ -361,7 +373,9 @@ void ParameterSpace::initialize (const Index * index)
    if (DC (IndexIVFPQ)) {
        ParameterRange & pr = add_range("ht");
        init_pq_ParameterRange (ix->pq, pr);
+    }

+    if (DC (IndexIVF)) {
        const MultiIndexQuantizer *miq =
            dynamic_cast<const MultiIndexQuantizer *> (ix->quantizer);
        if (miq) {
@@ -378,6 +392,12 @@ void ParameterSpace::initialize (const Index * index)
            pr.values.push_back (1 << i);
        }
    }
+    if (dynamic_cast<const IndexHNSW*>(index)) {
+        ParameterRange & pr = add_range("efSearch");
+        for (int i = 2; i <= 9; i++) {
+            pr.values.push_back (1 << i);
+        }
+    }
 }

 #undef DC
@@ -489,7 +509,7 @@ void ParameterSpace::set_index_parameter (
        }
    }
    if (name == "max_codes") {
-        if (DC (IndexIVFPQ)) {
+        if (DC (IndexIVF)) {
            ix->max_codes = finite(val) ? size_t(val) : 0;
            return;
        }
@@ -683,7 +703,7 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
    for (char *tok = strtok_r (description, " ,", &ptr);
         tok;
         tok = strtok_r (nullptr, " ,", &ptr)) {
-        int d_out, opq_M, nbit, M, M2;
+        int d_out, opq_M, nbit, M, M2, pq_m, ncent;
        std::string stok(tok);

        // to avoid mem leaks with exceptions:
@@ -793,7 +813,6 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
                index_pq->do_polysemous_training = do_polysemous_training;
                index_1 = index_pq;
            }
-
        } else if (stok == "RFlat") {
            make_IndexRefineFlat = true;
        } else {
@@ -841,7 +860,7 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
        index_pt->own_fields = true;
        // add from back
        while (vts.chain.size() > 0) {
-            index_pt->prepend_transform (vts.chain.back());
+            index_pt->prepend_transform (vts.chain.back ());
            vts.chain.pop_back ();
        }
        index = index_pt;

--- a/AutoTune.h
+++ b/AutoTune.h
@@ -162,7 +162,7 @@ struct ParameterSpace {
    /// print a description on stdout
    void display () const;

-    /// add a new parameter
+    /// add a new parameter (or return it if it exists)
    ParameterRange &add_range(const char * name);

    /// initialize with reasonable parameters for the index

--- a/Clustering.cpp
+++ b/Clustering.cpp
@@ -65,8 +65,9 @@ static double imbalance_factor (int n, int k, long *assign) {


 void Clustering::train (idx_t nx, const float *x_in, Index & index) {
-    FAISS_THROW_IF_NOT_MSG (nx >= k,
-                    "need at least as many training points as clusters");
+    FAISS_THROW_IF_NOT_FMT (nx >= k,
+             "Number of training points (%ld) should be at least "
+             "as large as number of clusters (%ld)", nx, k);

    double t0 = getmillisecs();

@@ -100,12 +101,26 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) {
    }


+    if (nx == k) {
+        if (verbose) {
+            printf("Number of training points (%ld) same as number of "
+                   "clusters, just copying\n", nx);
+        }
+        // this is a corner case, just copy training set to clusters
+        centroids.resize (d * k);
+        memcpy (centroids.data(), x_in, sizeof (*x_in) * d * k);
+        return;
+    }
+
+
    if (verbose)
        printf("Clustering %d points in %ldD to %ld clusters, "
               "redo %d times, %d iterations\n",
               int(nx), d, k, nredo, niter);


+
+
    idx_t * assign = new idx_t[nx];
    ScopeDeleter<idx_t> del (assign);
    float * dis = new float[nx];

--- a/Heap.h
+++ b/Heap.h
@@ -179,7 +179,7 @@ void maxheap_push (size_t k, T * bh_val, long * bh_ids, T val, long ids)
 * Heap initialization
 *******************************************************************/

-/* Initialization phase for the heap (with inconditionnal pushes).
+/* Initialization phase for the heap (with unconditionnal pushes).
 * Store k0 elements in a heap containing up to k values. Note that
 * (bh_val, bh_ids) can be the same as (x, ids) */
 template <class C> inline

--- a/Index.cpp
+++ b/Index.cpp
@@ -11,8 +11,19 @@
 #include "IndexFlat.h"
 #include "FaissAssert.h"

+#include <cstring>
+
 namespace faiss {

+Index::~Index ()
+{
+}
+
+
+void Index::train(idx_t /*n*/, const float* /*x*/) {
+    // does nothing by default
+}
+

 void Index::range_search (idx_t , const float *, float,
                          RangeSearchResult *) const
@@ -52,6 +63,25 @@ void Index::reconstruct_n (idx_t i0, idx_t ni, float *recons) const {
 }


+void Index::search_and_reconstruct (idx_t n, const float *x, idx_t k,
+                                    float *distances, idx_t *labels,
+                                    float *recons) const {
+  search (n, x, k, distances, labels);
+  for (idx_t i = 0; i < n; ++i) {
+    for (idx_t j = 0; j < k; ++j) {
+      idx_t ij = i * k + j;
+      idx_t key = labels[ij];
+      float* reconstructed = recons + ij * d;
+      if (key < 0) {
+        // Fill with NaNs
+        memset(reconstructed, -1, sizeof(*reconstructed) * d);
+      } else {
+        reconstruct (key, reconstructed);
+      }
+    }
+  }
+}
+

 void Index::compute_residual (const float * x,
                              float * residual, idx_t key) const {

--- a/Index.h
+++ b/Index.h
@@ -71,14 +71,14 @@ struct Index {
    /// type of metric this index uses for search
    MetricType metric_type;

-    explicit Index (idx_t d = 0, MetricType metric = METRIC_INNER_PRODUCT):
+    explicit Index (idx_t d = 0, MetricType metric = METRIC_L2):
                    d(d),
                    ntotal(0),
                    verbose(false),
                    is_trained(true),
                    metric_type (metric) {}

-    virtual ~Index () {  }
+    virtual ~Index ();


    /** Perform training on a representative set of vectors
@@ -86,9 +86,7 @@ struct Index {
     * @param n      nb of training vectors
     * @param x      training vecors, size n * d
     */
-    virtual void train(idx_t /*n*/, const float* /*x*/) {
-      // does nothing by default
-    }
+    virtual void train(idx_t n, const float* x);

    /** Add n vectors of dimension d to the index.
     *
@@ -164,6 +162,17 @@ struct Index {
     */
    virtual void reconstruct_n (idx_t i0, idx_t ni, float *recons) const;

+    /** Similar to search, but also reconstructs the stored vectors (or an
+     * approximation in the case of lossy coding) for the search results.
+     *
+     * If there are not enough results for a query, the resulting arrays
+     * is padded with -1s.
+     *
+     * @param recons      reconstructed vectors size (n, k, d)
+     **/
+    virtual void search_and_reconstruct (idx_t n, const float *x, idx_t k,
+                                         float *distances, idx_t *labels,
+                                         float *recons) const;

    /** Computes a residual vector after indexing encoding.
     *

--- a/IndexHNSW.cpp
+++ b/IndexHNSW.cpp
--- a/IndexHNSW.h
+++ b/IndexHNSW.h
+/**
+ * Copyright (c) 2015-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD+Patents license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <vector>
+#include <omp.h>
+
+#include "IndexFlat.h"
+#include "IndexPQ.h"
+#include "IndexScalarQuantizer.h"
+#include "utils.h"
+
+
+namespace faiss {
+
+/** Implementation of the Hierarchical Navigable Small World
+ * datastructure.
+ *
+ * Efficient and robust approximate nearest neighbor search using
+ * Hierarchical Navigable Small World graphs
+ *
+ *  Yu. A. Malkov, D. A. Yashunin, arXiv 2017
+ *
+ * This implmentation is heavily influenced by the NMSlib
+ * implementation by Yury Malkov and Leonid Boystov
+ * (https://github.com/searchivarius/nmslib)
+ *
+ * The HNSW object stores only the neighbor link structure, see
+ * IndexHNSW below for the full index object.
+ */
+
+struct VisitedTable;
+
+struct HNSW {
+
+    /// internal storage of vectors (32 bits: this is expensive)
+    typedef int storage_idx_t;
+
+    /// Faiss results are 64-bit
+    typedef faiss::Index::idx_t idx_t;
+
+    /** The HNSW structure does not store vectors, it only accesses
+     * them through this class.
+     *
+     * Functions are guaranteed to be be accessed only from 1 thread. */
+    struct DistanceComputer {
+
+        idx_t d;
+
+        /// called before computing distances
+        virtual void set_query (const float *x) = 0;
+
+        /// compute distance of vector i to current query
+        virtual float operator () (storage_idx_t i) = 0;
+
+        /// compute distance between two stored vectors
+        virtual float symmetric_dis(storage_idx_t i, storage_idx_t j) = 0;
+
+        virtual ~DistanceComputer () {}
+
+    };
+
+    /// assignment probability to each layer (sum=1)
+    std::vector<double> assign_probas;
+
+    /// number of neighbors stored per layer (cumulative), should not
+    /// be changed after first add
+    std::vector<int> cum_nneighbor_per_level;
+
+    /// level of each vector (base level = 1), size = ntotal
+    std::vector<int> levels;
+
+    /// offsets[i] is the offset in the neighbors array where vector i is stored
+    /// size ntotal + 1
+    std::vector<size_t> offsets;
+
+    /// neighbors[offsets[i]:offsets[i+1]] is the list of neighbors of vector i
+    /// for all levels. this is where all storage goes.
+    std::vector<storage_idx_t> neighbors;
+
+    /// entry point in the search structure (one of the points with maximum level
+    storage_idx_t entry_point;
+
+    faiss::RandomGenerator rng;
+
+    /// maximum level
+    int max_level;
+
+    /// expansion factor at construction time
+    int efConstruction;
+
+    /// expansion factor at search time
+    int efSearch;
+
+    /// during search: do we check whether the next best distance is good enough?
+    bool check_relative_distance;
+
+    /// number of entry points in levels > 0.
+    int upper_beam;
+
+    // methods that initialize the tree sizes
+
+    /// initialize the assign_probas and cum_nneighbor_per_level to
+    /// have 2*M links on level 0 and M links on levels > 0
+    void set_default_probas(int M, float levelMult);
+
+    /// set nb of neighbors for this level (before adding anything)
+    void set_nb_neighbors(int level_no, int n);
+
+    // methods that access the tree sizes
+
+    /// nb of neighbors for this level
+    int nb_neighbors(int layer_no) const;
+
+    /// cumumlative nb up to (and excluding) this level
+    int cum_nb_neighbors(int layer_no) const;
+
+    /// range of entries in the neighbors table of vertex no at layer_no
+    void neighbor_range(idx_t no, int layer_no,
+                        size_t * begin, size_t * end) const;
+
+    /// only mandatory parameter: nb of neighbors
+    explicit HNSW(int M = 32);
+
+    /// pick a random level for a new point
+    int random_level();
+
+    /// add n random levels to table (for debugging...)
+    void fill_with_random_links(size_t n);
+
+    /** add point pt_id on all levels <= pt_level and build the link
+     * structure for them. */
+    void add_with_locks(DistanceComputer & ptdis, int pt_level, int pt_id,
+                        std::vector<omp_lock_t> & locks,
+                        VisitedTable &vt);
+
+
+    /// search interface
+    void search(DistanceComputer & qdis, int k,
+                idx_t *I, float * D,
+                VisitedTable &vt) const;
+
+    void reset();
+
+    void clear_neighbor_tables(int level);
+    void print_neighbor_stats(int level) const;
+};
+
+
+struct HNSWStats {
+    size_t n1, n2, n3;
+    size_t ndis;
+    size_t nreorder;
+    bool view;
+
+    HNSWStats () {reset (); }
+    void reset ();
+};
+
+// global var that collects them all
+extern HNSWStats hnsw_stats;
+
+class IndexHNSW;
+
+struct ReconstructFromNeighbors {
+    typedef Index::idx_t idx_t;
+    typedef HNSW::storage_idx_t storage_idx_t;
+
+    const IndexHNSW & index;
+    size_t M; // number of neighbors
+    size_t k; // number of codebook entries
+    size_t nsq; // number of subvectors
+    size_t code_size;
+    int k_reorder; // nb to reorder. -1 = all
+
+    std::vector<float> codebook; // size nsq * k * (M + 1)
+
+    std::vector<uint8_t> codes; // size ntotal * code_size
+    size_t ntotal;
+    size_t d, dsub; // derived values
+
+    ReconstructFromNeighbors(const IndexHNSW & index,
+                             size_t k=256, size_t nsq=1);
+
+    /// codes must be added in the correct order and the IndexHNSW
+    /// must be populated and sorted
+    void add_codes(size_t n, const float *x);
+
+    size_t compute_distances(size_t n, const idx_t *shortlist,
+                           const float *query, float *distances) const;
+
+    /// called by add_codes
+    void estimate_code(const float *x, storage_idx_t i, uint8_t *code) const;
+
+    /// called by compute_distances
+    void reconstruct(storage_idx_t i, float *x, float *tmp) const;
+
+    void reconstruct_n(storage_idx_t n0, storage_idx_t ni, float *x) const;
+
+    /// get the M+1 -by-d table for neighbor coordinates for vector i
+    void get_neighbor_table(storage_idx_t i, float *out) const;
+
+};
+
+
+/** The HNSW index is a normal random-access index with a HNSW
+ * link structure built on top */
+
+struct IndexHNSW: Index {
+
+    typedef HNSW::storage_idx_t storage_idx_t;
+
+    // the link strcuture
+    HNSW hnsw;
+
+    // the sequential storage
+    bool own_fields;
+    Index * storage;
+
+    ReconstructFromNeighbors *reconstruct_from_neighbors;
+
+    explicit IndexHNSW (int d = 0, int M = 32);
+    explicit IndexHNSW (Index * storage, int M = 32);
+
+    ~IndexHNSW() override;
+
+    // get a DistanceComputer object for this kind of storage
+    virtual HNSW::DistanceComputer * get_distance_computer() const = 0;
+
+    void add(idx_t n, const float *x) override;
+
+    /// Trains the storage if needed
+    void train(idx_t n, const float* x) override;
+
+    /// entry point for search
+    void search (idx_t n, const float *x, idx_t k,
+                 float *distances, idx_t *labels) const override;
+
+    void reconstruct(idx_t key, float* recons) const override;
+
+    void reset () override;
+
+    void shrink_level_0_neighbors(int size);
+
+    /** Perform search only on level 0, given the starting points for
+     * each vertex.
+     *
+     * @param search_type 1:perform one search per nprobe, 2: enqueue
+     *                    all entry points
+     */
+    void search_level_0(idx_t n, const float *x, idx_t k,
+                        const storage_idx_t *nearest, const float *nearest_d,
+                        float *distances, idx_t *labels, int nprobe = 1,
+                        int search_type = 1) const;
+
+    /// alternative graph building
+    void init_level_0_from_knngraph(
+                        int k, const float *D, const idx_t *I);
+
+    /// alternative graph building
+    void init_level_0_from_entry_points(
+                        int npt, const storage_idx_t *points,
+                        const storage_idx_t *nearests);
+
+    // reorder links from nearest to farthest
+    void reorder_links();
+
+    void link_singletons();
+};
+
+
+
+/** Flat index topped with with a HNSW structure to access elements
+ *  more efficiently.
+ */
+
+struct IndexHNSWFlat: IndexHNSW {
+    IndexHNSWFlat();
+    IndexHNSWFlat(int d, int M);
+    HNSW::DistanceComputer * get_distance_computer() const override;
+};
+
+/** PQ index topped with with a HNSW structure to access elements
+ *  more efficiently.
+ */
+struct IndexHNSWPQ: IndexHNSW {
+    IndexHNSWPQ();
+    IndexHNSWPQ(int d, int pq_m, int M);
+    void train(idx_t n, const float* x) override;
+    HNSW::DistanceComputer * get_distance_computer() const override;
+};
+
+/** SQ index topped with with a HNSW structure to access elements
+ *  more efficiently.
+ */
+struct IndexHNSWSQ: IndexHNSW {
+    IndexHNSWSQ();
+    IndexHNSWSQ(int d, ScalarQuantizer::QuantizerType qtype, int M);
+    HNSW::DistanceComputer * get_distance_computer() const override;
+};
+
+/** 2-level code structure with fast random access
+ */
+struct IndexHNSW2Level: IndexHNSW {
+    IndexHNSW2Level();
+    IndexHNSW2Level(Index *quantizer, size_t nlist, int m_pq, int M);
+    HNSW::DistanceComputer * get_distance_computer() const override;
+    void flip_to_ivf();
+
+    /// entry point for search
+    void search (idx_t n, const float *x, idx_t k,
+                 float *distances, idx_t *labels) const override;
+
+};
+
+
+
+};
--- a/IndexIVF.cpp
+++ b/IndexIVF.cpp
--- a/IndexIVF.h
+++ b/IndexIVF.h
@@ -24,6 +24,38 @@
 namespace faiss {


+/** Encapsulates a quantizer object for the IndexIVF
+ *
+ * The class isolates the fields that are independent of the storage
+ * of the lists (especially training)
+ */
+struct Level1Quantizer {
+    Index * quantizer;        ///< quantizer that maps vectors to inverted lists
+    size_t nlist;             ///< number of possible key values
+
+    /**
+     * = 0: use the quantizer as index in a kmeans training
+     * = 1: just pass on the training set to the train() of the quantizer
+     * = 2: kmeans training on a flat index + add the centroids to the quantizer
+     */
+    char quantizer_trains_alone;
+    bool own_fields;          ///< whether object owns the quantizer
+
+    ClusteringParameters cp; ///< to override default clustering params
+    Index *clustering_index; ///< to override index used during clustering
+
+    /// Trains the quantizer and calls train_residual to train sub-quantizers
+    void train_q1 (size_t n, const float *x, bool verbose,
+                   MetricType metric_type);
+
+    Level1Quantizer (Index * quantizer, size_t nlist);
+
+    Level1Quantizer ();
+
+    ~Level1Quantizer ();
+
+};
+

 /** Index based on a inverted file (IVF)
 *
@@ -42,22 +74,9 @@ namespace faiss {
 * Sub-classes implement a post-filtering of the index that refines
 * the distance estimation from the query to databse vectors.
 */
-struct IndexIVF: Index {
-    size_t nlist;             ///< number of possible key values
+struct IndexIVF: Index, Level1Quantizer {
    size_t nprobe;            ///< number of probes at query time
-
-    Index * quantizer;        ///< quantizer that maps vectors to inverted lists
-
-    /**
-     * = 0: use the quantizer as index in a kmeans training
-     * = 1: just pass on the training set to the train() of the quantizer
-     * = 2: kmeans training on a flat index + add the centroids to the quantizer
-     */
-    char quantizer_trains_alone;
-    bool own_fields;          ///< whether object owns the quantizer
-
-    ClusteringParameters cp; ///< to override default clustering params
-    Index *clustering_index; ///< to override index used during clustering
+    size_t max_codes;         ///< max nb of codes to visit to do a query

    std::vector < std::vector<long> > ids;  ///< Inverted lists for indexes

@@ -74,7 +93,7 @@ struct IndexIVF: Index {
     * be deleted while the IndexIVF is in use.
     */
    IndexIVF (Index * quantizer, size_t d, size_t nlist,
-              MetricType metric = METRIC_INNER_PRODUCT);
+              MetricType metric = METRIC_L2);

    void reset() override;

@@ -115,6 +134,42 @@ struct IndexIVF: Index {
    virtual void search (idx_t n, const float *x, idx_t k,
                         float *distances, idx_t *labels) const override;

+    void reconstruct (idx_t key, float* recons) const override;
+
+    /** Reconstruct a subset of the indexed vectors.
+     *
+     * Overrides default implementation to bypass reconstruct() which requires
+     * direct_map to be maintained.
+     *
+     * @param i0     first vector to reconstruct
+     * @param ni     nb of vectors to reconstruct
+     * @param recons output array of reconstructed vectors, size ni * d
+     */
+    void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
+
+    /** Similar to search, but also reconstructs the stored vectors (or an
+     * approximation in the case of lossy coding) for the search results.
+     *
+     * Overrides default implementation to avoid having to maintain direct_map
+     * and instead fetch the code offsets through the `store_pairs` flag in
+     * search_preassigned().
+     *
+     * @param recons      reconstructed vectors size (n, k, d)
+     */
+    void search_and_reconstruct (idx_t n, const float *x, idx_t k,
+                                 float *distances, idx_t *labels,
+                                 float *recons) const override;
+
+    /** Reconstruct a vector given the location in terms of (inv list index +
+     * inv list offset) instead of the id.
+     *
+     * Useful for reconstructing when the direct_map is not maintained and
+     * the inv list offset is computed by search_preassigned() with
+     * `store_pairs` set.
+     */
+    virtual void reconstruct_from_offset (long list_no, long offset,
+                                          float* recons) const;
+

    /// Dataset manipulation functions

@@ -157,18 +212,17 @@ struct IndexIVF: Index {
 };


-struct IndexIVFFlatStats {
+struct IndexIVFStats {
    size_t nq;       // nb of queries run
    size_t nlist;    // nb of inverted lists scanned
    size_t ndis;     // nb of distancs computed
-    size_t npartial; // nb of bound computations (IndexIVFFlatIPBounds)

-    IndexIVFFlatStats () {reset (); }
+    IndexIVFStats () {reset (); }
    void reset ();
 };

 // global var that collects them all
-extern IndexIVFFlatStats indexIVFFlat_stats;
+extern IndexIVFStats indexIVF_stats;



@@ -182,7 +236,7 @@ struct IndexIVFFlat: IndexIVF {

    IndexIVFFlat (
            Index * quantizer, size_t d, size_t nlist_,
-            MetricType = METRIC_INNER_PRODUCT);
+            MetricType = METRIC_L2);

    /// same as add_with_ids, with precomputed coarse quantizer
    virtual void add_core (idx_t n, const float * x, const long *xids,
@@ -213,7 +267,8 @@ struct IndexIVFFlat: IndexIVF {
     */
    void update_vectors (int nv, idx_t *idx, const float *v);

-    void reconstruct(idx_t key, float* recons) const override;
+    void reconstruct_from_offset (long list_no, long offset,
+                                  float* recons) const override;

    IndexIVFFlat () {}
 };

--- a/IndexIVFPQ.cpp
+++ b/IndexIVFPQ.cpp
--- a/IndexIVFPQ.h
+++ b/IndexIVFPQ.h
@@ -36,7 +36,6 @@ struct IndexIVFPQ: IndexIVF {

    // search-time parameters
    size_t scan_table_threshold;   ///< use table computation or on-the-fly?
-    size_t max_codes;              ///< max nb of codes to visit to do a query
    int polysemous_ht;             ///< Hamming thresh for polysemous filtering


@@ -64,16 +63,8 @@ struct IndexIVFPQ: IndexIVF {
    /// same as train_residual, also output 2nd level residuals
    void train_residual_o (idx_t n, const float *x, float *residuals_2);

-
-    /** Reconstruct a subset of the indexed vectors
-     *
-     * @param i0     first vector to reconstruct
-     * @param ni     nb of vectors to reconstruct
-     * @param recons output array of reconstructed vectors, size ni * d
-     */
-    void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
-
-    void reconstruct(idx_t key, float* recons) const override;
+    void reconstruct_from_offset (long list_no, long offset,
+                                  float* recons) const override;

    /** Find exact duplicates in the dataset.
     *
@@ -114,16 +105,6 @@ struct IndexIVFPQ: IndexIVF {
                             float *distances, idx_t *labels,
                             bool store_pairs) const override;

-    /** Same as the search function, but also reconstruct approximate
-     * vectors for the search results
-     *
-     * @param reconstructed    size (n, k, d)
-     **/
-    void search_and_reconstruct (idx_t n, const float *x, idx_t k,
-                                 float *distances, idx_t *labels,
-                                 float *reconstructed);
-
-
    /// build precomputed table
    void precompute_table ();

@@ -190,17 +171,17 @@ struct IndexIVFPQR: IndexIVFPQ {
    void add_core (idx_t n, const float *x, const long *xids,
                     const long *precomputed_idx = nullptr);

-    void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
+    void reconstruct_from_offset (long list_no, long offset,
+                                  float* recons) const override;

    void merge_from (IndexIVF &other, idx_t add_id) override;


-    void search(
-        idx_t n,
-        const float* x,
-        idx_t k,
-        float* distances,
-        idx_t* labels) const override;
+    void search_preassigned (idx_t n, const float *x, idx_t k,
+                             const idx_t *assign,
+                             const float *centroid_dis,
+                             float *distances, idx_t *labels,
+                             bool store_pairs) const override;

    IndexIVFPQR();
 };
@@ -250,6 +231,60 @@ struct IndexIVFPQCompact: IndexIVFPQ {
 };


+/** Same as an IndexIVFPQ without the inverted lists: codes are stored sequentially
+ *
+ * The class is mainly inteded to store encoded vectors that can be
+ * accessed randomly, the search function is not implemented.
+ */
+struct Index2Layer: Index {
+    /// first level quantizer
+    Level1Quantizer q1;
+
+    /// second level quantizer is always a PQ
+    ProductQuantizer pq;
+
+    /// Codes. Size ntotal * code_size.
+    std::vector<uint8_t> codes;
+
+    /// size of the code for the first level (ceil(log8(q1.nlist)))
+    size_t code_size_1;
+
+    /// size of the code for the second level
+    size_t code_size_2;
+
+    /// code_size_1 + code_size_2
+    size_t code_size;
+
+    Index2Layer (Index * quantizer, size_t nlist,
+                 int M, MetricType metric = METRIC_L2);
+
+    Index2Layer ();
+    ~Index2Layer ();
+
+    void train(idx_t n, const float* x) override;
+
+    void add(idx_t n, const float* x) override;
+
+    /// not implemented
+    void search(
+        idx_t n,
+        const float* x,
+        idx_t k,
+        float* distances,
+        idx_t* labels) const override;
+
+    void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
+
+    void reconstruct(idx_t key, float* recons) const override;
+
+    void reset() override;
+
+    /// transfer the flat codes to an IVFPQ index
+    void transfer_to_IVFPQ(IndexIVFPQ & other) const;
+
+};
+
+

 } // namespace faiss


--- a/IndexPQ.cpp
+++ b/IndexPQ.cpp
@@ -803,6 +803,7 @@ MultiIndexQuantizer::MultiIndexQuantizer (int d,

 void MultiIndexQuantizer::train(idx_t n, const float *x)
 {
+    pq.verbose = verbose;
    pq.train (n, x);
    is_trained = true;
    // count virtual elements in index

--- a/IndexScalarQuantizer.cpp
+++ b/IndexScalarQuantizer.cpp
--- a/IndexScalarQuantizer.h
+++ b/IndexScalarQuantizer.h
@@ -74,7 +74,24 @@ struct ScalarQuantizer {
                        size_t n) const ;

    /// decode a vector from a given code (or n vectors if third argument)
-     void decode (const uint8_t *code, float *x, size_t n) const;
+    void decode (const uint8_t *code, float *x, size_t n) const;
+
+    // fast, non thread-safe way of computing vector-to-code and
+    // code-to-code distances.
+    struct DistanceComputer {
+
+        /// vector-to-code distance computation
+        virtual float compute_distance (const float *x,
+                                        const uint8_t *code) = 0;
+
+        /// code-to-code distance computation
+        virtual float compute_code_distance (const uint8_t *code1,
+                                             const uint8_t *code2) = 0;
+        virtual ~DistanceComputer () {}
+    };
+
+    DistanceComputer *get_distance_computer (MetricType metric = METRIC_L2)
+        const;

 };

@@ -126,7 +143,7 @@ struct IndexScalarQuantizer: Index {
 * distances are computed.
 */

-struct IndexIVFScalarQuantizer:IndexIVF {
+struct IndexIVFScalarQuantizer: IndexIVF {
    ScalarQuantizer sq;

    IndexIVFScalarQuantizer(Index *quantizer, size_t d, size_t nlist,
@@ -145,6 +162,9 @@ struct IndexIVFScalarQuantizer:IndexIVF {
                             float *distances, idx_t *labels,
                             bool store_pairs) const override;

+    void reconstruct_from_offset (long list_no, long offset,
+                                  float* recons) const override;
+
 };



--- a/Makefile
+++ b/Makefile
@@ -29,7 +29,7 @@ LIBOBJ=hamming.o  utils.o \
       Clustering.o Heap.o VectorTransform.o index_io.o \
       PolysemousTraining.o MetaIndexes.o Index.o \
       ProductQuantizer.o AutoTune.o AuxIndexStructures.o \
-       IndexScalarQuantizer.o FaissException.o
+       IndexScalarQuantizer.o FaissException.o IndexHNSW.o


 $(LIBNAME).a: $(LIBOBJ)
@@ -44,6 +44,7 @@ $(LIBNAME).$(SHAREDEXT): $(LIBOBJ)
 utils.o:             EXTRAFLAGS=$(BLASCFLAGS)
 VectorTransform.o:   EXTRAFLAGS=$(BLASCFLAGS)
 ProductQuantizer.o:  EXTRAFLAGS=$(BLASCFLAGS)
+IndexHNSW.o:         EXTRAFLAGS=$(BLASCFLAGS)

 # for MKL, the flags when generating a dynamic lib are different from
 # the ones when making an executable, but by default they are the same
@@ -121,7 +122,7 @@ VectorTransform.o: VectorTransform.cpp VectorTransform.h Index.h utils.h \
 index_io.o: index_io.cpp index_io.h FaissAssert.h FaissException.h \
 IndexFlat.h Index.h VectorTransform.h IndexLSH.h IndexPQ.h \
 ProductQuantizer.h Clustering.h Heap.h PolysemousTraining.h IndexIVF.h \
- IndexIVFPQ.h MetaIndexes.h IndexScalarQuantizer.h
+ IndexIVFPQ.h MetaIndexes.h IndexScalarQuantizer.h IndexHNSW.h utils.h
 PolysemousTraining.o: PolysemousTraining.cpp PolysemousTraining.h \
 ProductQuantizer.h Clustering.h Index.h Heap.h utils.h hamming.h \
 FaissAssert.h FaissException.h
@@ -134,12 +135,16 @@ ProductQuantizer.o: ProductQuantizer.cpp ProductQuantizer.h Clustering.h \
 AutoTune.o: AutoTune.cpp AutoTune.h Index.h FaissAssert.h \
 FaissException.h utils.h Heap.h IndexFlat.h VectorTransform.h IndexLSH.h \
 IndexPQ.h ProductQuantizer.h Clustering.h PolysemousTraining.h \
- IndexIVF.h IndexIVFPQ.h MetaIndexes.h IndexScalarQuantizer.h
+ IndexIVF.h IndexIVFPQ.h MetaIndexes.h IndexScalarQuantizer.h IndexHNSW.h
 AuxIndexStructures.o: AuxIndexStructures.cpp AuxIndexStructures.h Index.h
 IndexScalarQuantizer.o: IndexScalarQuantizer.cpp IndexScalarQuantizer.h \
 IndexIVF.h Index.h Clustering.h Heap.h utils.h FaissAssert.h \
 FaissException.h
 FaissException.o: FaissException.cpp FaissException.h
+IndexHNSW.o: IndexHNSW.cpp IndexHNSW.h IndexFlat.h Index.h IndexPQ.h \
+ ProductQuantizer.h Clustering.h Heap.h PolysemousTraining.h \
+ IndexScalarQuantizer.h IndexIVF.h utils.h FaissAssert.h FaissException.h \
+ IndexIVFPQ.h


 clean:

--- a/PolysemousTraining.h
+++ b/PolysemousTraining.h
@@ -25,7 +25,7 @@ namespace faiss {
 struct SimulatedAnnealingParameters {

    // optimization parameters
-    double init_temperature;   // init probability of accepting a bad swap
+    double init_temperature;   // init probaility of accepting a bad swap
    double temperature_decay;  // at each iteration the temp is multiplied by this
    int n_iter; // nb of iterations
    int n_redo; // nb of runs of the simulation

--- a/ProductQuantizer.cpp
+++ b/ProductQuantizer.cpp
@@ -355,7 +355,7 @@ void ProductQuantizer::decode (const uint8_t *code, float *x) const
 void ProductQuantizer::decode (const uint8_t *code, float *x, size_t n) const
 {
    for (size_t i = 0; i < n; i++) {
-        this->decode (code + M * i, x + d * i);
+        this->decode (code + code_size * i, x + d * i);
    }
 }


--- a/VectorTransform.cpp
+++ b/VectorTransform.cpp
--- a/VectorTransform.h
+++ b/VectorTransform.h
@@ -37,7 +37,7 @@ struct VectorTransform {
    {}


-    /// set if the LinearTransform does not require training, or if
+    /// set if the VectorTransform does not require training, or if
    /// training is done already
    bool is_trained;

@@ -78,6 +78,9 @@ struct LinearTransform: VectorTransform {

    bool have_bias; ///! whether to use the bias term

+    /// check if matrix A is orthonormal (enables reverse_transform)
+    bool is_orthonormal;
+
    /// Transformation matrix, size d_out * d_in
    std::vector<float> A;

@@ -96,6 +99,13 @@ struct LinearTransform: VectorTransform {
    void transform_transpose (idx_t n, const float * y,
                              float *x) const;

+    /// works only if is_orthonormal
+    void reverse_transform (idx_t n, const float * xt,
+                            float *x) const override;
+
+    /// compute A^T * A to set the is_orthonormal flag
+    void set_is_orthonormal ();
+
    bool verbose;

    ~LinearTransform() override {}
@@ -113,8 +123,6 @@ struct RandomRotationMatrix: LinearTransform {
     /// must be called before the transform is used
     void init(int seed);

-     void reverse_transform(idx_t n, const float* xt, float* x) const override;
-
     RandomRotationMatrix () {}
 };

@@ -157,8 +165,6 @@ struct PCAMatrix: LinearTransform {
    /// will be completed with 0s
    void train(Index::idx_t n, const float* x) override;

-    void reverse_transform(idx_t n, const float* xt, float* x) const override;
-
    /// copy pre-trained PCA matrix
    void copy_from (const PCAMatrix & other);

@@ -192,8 +198,6 @@ struct OPQMatrix: LinearTransform {
    explicit OPQMatrix (int d = 0, int M = 1, int d2 = -1);

    void train(Index::idx_t n, const float* x) override;
-
-    void reverse_transform(idx_t n, const float* xt, float* x) const override;
 };


@@ -230,6 +234,9 @@ struct NormalizationTransform: VectorTransform {
    NormalizationTransform ();

    void apply_noalloc(idx_t n, const float* x, float* xt) const override;
+
+    /// Identity transform since norm is not revertible
+    void reverse_transform(idx_t n, const float* xt, float* x) const override;
 };


@@ -271,13 +278,23 @@ struct IndexPreTransform: Index {
        float* distances,
        idx_t* labels) const override;

+    void reconstruct (idx_t key, float * recons) const override;
+
    void reconstruct_n (idx_t i0, idx_t ni, float *recons)
        const override;

+    void search_and_reconstruct (idx_t n, const float *x, idx_t k,
+                                 float *distances, idx_t *labels,
+                                 float *recons) const override;
+
    /// apply the transforms in the chain. The returned float * may be
    /// equal to x, otherwise it should be deallocated.
    const float * apply_chain (idx_t n, const float *x) const;

+    /// Reverse the transforms in the chain. May not be implemented for
+    /// all transforms in the chain or may return approximate results.
+    void reverse_chain (idx_t n, const float* xt, float* x) const;
+
    ~IndexPreTransform() override;
 };


--- a/benchs/bench_hnsw.py
+++ b/benchs/bench_hnsw.py
--- a/faiss.py
+++ b/faiss.py
@@ -427,7 +427,7 @@ def replacement_map_add(self, keys, vals):

 def replacement_map_search_multiple(self, keys):
    n, = keys.shape
-    vals = np.empty(n, dtype='uint64')
+    vals = np.empty(n, dtype='int64')
    self.search_multiple_c(n, swig_ptr(keys), swig_ptr(vals))
    return vals


--- a/gpu/Makefile
+++ b/gpu/Makefile
@@ -150,6 +150,8 @@ dep:
 ../VectorTransform.h ../MetaIndexes.h GpuIndexFlat.h GpuIndexIVFFlat.h \
 GpuIndexIVF.h ../Clustering.h GpuIndexIVFPQ.h IndexProxy.h \
 utils/WorkerThread.h
+./GpuClonerOptions.o: GpuClonerOptions.cpp GpuClonerOptions.h \
+ GpuIndicesOptions.h
 impl/RemapIndices.o: impl/RemapIndices.cpp impl/RemapIndices.h \
 impl/../../FaissAssert.h impl/../../FaissException.h
 utils/DeviceMemory.o: utils/DeviceMemory.cpp utils/DeviceMemory.h \

--- a/gpu/test/test_gpu_index.py
+++ b/gpu/test/test_gpu_index.py
@@ -111,4 +111,4 @@ class EvalIVFPQAccuracy(unittest.TestCase):
        index = faiss.index_factory(12, "PCAR8,IVF10,PQ4")
        res = faiss.StandardGpuResources()
        gpu_index = faiss.index_cpu_to_gpu(res, 0, index)
-        faiss.GpuParameterSpace().set_index_parameter(index, "nprobe", 3)
+        faiss.GpuParameterSpace().set_index_parameter(gpu_index, "nprobe", 3)
--- a/index_io.cpp
+++ b/index_io.cpp
--- a/swigfaiss.swig
+++ b/swigfaiss.swig
--- a/tests/demo_auto_tune.py
+++ b/tests/demo_auto_tune.py
@@ -9,7 +9,6 @@
 import os
 import time
 import numpy as np
-import pdb

 try:
    import matplotlib

--- a/tests/test_build_blocks.py
+++ b/tests/test_build_blocks.py
--- a/tests/test_factory.py
+++ b/tests/test_factory.py
@@ -6,7 +6,6 @@

 #! /usr/bin/env python2

-import numpy as np
 import unittest
 import faiss


--- a/tests/test_index.py
+++ b/tests/test_index.py
--- a/utils.cpp
+++ b/utils.cpp
--- a/utils.h
+++ b/utils.h
@@ -195,6 +195,8 @@ void fvec_L2sqr_by_idx (
 * KNN functions
 ***************************************************************************/

+// threshold on nx above which we switch to BLAS to compute distances
+extern int distance_compute_blas_threshold;

 /** Return the k nearest neighors of each of the nx vectors x among the ny
 *  vector y, w.r.t to max inner product