Commit 9933892e authored by matthijs's avatar matthijs

sync with FB version 2017-01-09

- adding HNSW indexing method

- simultaneous search and reconstruction for IndexIVFPQ
parent 5b45b055
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include "IndexIVFPQ.h" #include "IndexIVFPQ.h"
#include "MetaIndexes.h" #include "MetaIndexes.h"
#include "IndexScalarQuantizer.h" #include "IndexScalarQuantizer.h"
#include "IndexHNSW.h"
namespace faiss { namespace faiss {
...@@ -321,6 +322,11 @@ static void init_pq_ParameterRange (const ProductQuantizer & pq, ...@@ -321,6 +322,11 @@ static void init_pq_ParameterRange (const ProductQuantizer & pq,
ParameterRange &ParameterSpace::add_range(const char * name) ParameterRange &ParameterSpace::add_range(const char * name)
{ {
for (auto & pr : parameter_ranges) {
if (pr.name == name) {
return pr;
}
}
parameter_ranges.push_back (ParameterRange ()); parameter_ranges.push_back (ParameterRange ());
parameter_ranges.back ().name = name; parameter_ranges.back ().name = name;
return parameter_ranges.back (); return parameter_ranges.back ();
...@@ -353,6 +359,12 @@ void ParameterSpace::initialize (const Index * index) ...@@ -353,6 +359,12 @@ void ParameterSpace::initialize (const Index * index)
pr.values.push_back (nprobe); pr.values.push_back (nprobe);
} }
} }
if (dynamic_cast<const IndexHNSW*>(ix->quantizer)) {
ParameterRange & pr = add_range("efSearch");
for (int i = 2; i <= 9; i++) {
pr.values.push_back (1 << i);
}
}
} }
if (DC (IndexPQ)) { if (DC (IndexPQ)) {
ParameterRange & pr = add_range("ht"); ParameterRange & pr = add_range("ht");
...@@ -361,7 +373,9 @@ void ParameterSpace::initialize (const Index * index) ...@@ -361,7 +373,9 @@ void ParameterSpace::initialize (const Index * index)
if (DC (IndexIVFPQ)) { if (DC (IndexIVFPQ)) {
ParameterRange & pr = add_range("ht"); ParameterRange & pr = add_range("ht");
init_pq_ParameterRange (ix->pq, pr); init_pq_ParameterRange (ix->pq, pr);
}
if (DC (IndexIVF)) {
const MultiIndexQuantizer *miq = const MultiIndexQuantizer *miq =
dynamic_cast<const MultiIndexQuantizer *> (ix->quantizer); dynamic_cast<const MultiIndexQuantizer *> (ix->quantizer);
if (miq) { if (miq) {
...@@ -378,6 +392,12 @@ void ParameterSpace::initialize (const Index * index) ...@@ -378,6 +392,12 @@ void ParameterSpace::initialize (const Index * index)
pr.values.push_back (1 << i); pr.values.push_back (1 << i);
} }
} }
if (dynamic_cast<const IndexHNSW*>(index)) {
ParameterRange & pr = add_range("efSearch");
for (int i = 2; i <= 9; i++) {
pr.values.push_back (1 << i);
}
}
} }
#undef DC #undef DC
...@@ -489,7 +509,7 @@ void ParameterSpace::set_index_parameter ( ...@@ -489,7 +509,7 @@ void ParameterSpace::set_index_parameter (
} }
} }
if (name == "max_codes") { if (name == "max_codes") {
if (DC (IndexIVFPQ)) { if (DC (IndexIVF)) {
ix->max_codes = finite(val) ? size_t(val) : 0; ix->max_codes = finite(val) ? size_t(val) : 0;
return; return;
} }
...@@ -683,7 +703,7 @@ Index *index_factory (int d, const char *description_in, MetricType metric) ...@@ -683,7 +703,7 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
for (char *tok = strtok_r (description, " ,", &ptr); for (char *tok = strtok_r (description, " ,", &ptr);
tok; tok;
tok = strtok_r (nullptr, " ,", &ptr)) { tok = strtok_r (nullptr, " ,", &ptr)) {
int d_out, opq_M, nbit, M, M2; int d_out, opq_M, nbit, M, M2, pq_m, ncent;
std::string stok(tok); std::string stok(tok);
// to avoid mem leaks with exceptions: // to avoid mem leaks with exceptions:
...@@ -793,7 +813,6 @@ Index *index_factory (int d, const char *description_in, MetricType metric) ...@@ -793,7 +813,6 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
index_pq->do_polysemous_training = do_polysemous_training; index_pq->do_polysemous_training = do_polysemous_training;
index_1 = index_pq; index_1 = index_pq;
} }
} else if (stok == "RFlat") { } else if (stok == "RFlat") {
make_IndexRefineFlat = true; make_IndexRefineFlat = true;
} else { } else {
...@@ -841,7 +860,7 @@ Index *index_factory (int d, const char *description_in, MetricType metric) ...@@ -841,7 +860,7 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
index_pt->own_fields = true; index_pt->own_fields = true;
// add from back // add from back
while (vts.chain.size() > 0) { while (vts.chain.size() > 0) {
index_pt->prepend_transform (vts.chain.back()); index_pt->prepend_transform (vts.chain.back ());
vts.chain.pop_back (); vts.chain.pop_back ();
} }
index = index_pt; index = index_pt;
......
...@@ -162,7 +162,7 @@ struct ParameterSpace { ...@@ -162,7 +162,7 @@ struct ParameterSpace {
/// print a description on stdout /// print a description on stdout
void display () const; void display () const;
/// add a new parameter /// add a new parameter (or return it if it exists)
ParameterRange &add_range(const char * name); ParameterRange &add_range(const char * name);
/// initialize with reasonable parameters for the index /// initialize with reasonable parameters for the index
......
...@@ -65,8 +65,9 @@ static double imbalance_factor (int n, int k, long *assign) { ...@@ -65,8 +65,9 @@ static double imbalance_factor (int n, int k, long *assign) {
void Clustering::train (idx_t nx, const float *x_in, Index & index) { void Clustering::train (idx_t nx, const float *x_in, Index & index) {
FAISS_THROW_IF_NOT_MSG (nx >= k, FAISS_THROW_IF_NOT_FMT (nx >= k,
"need at least as many training points as clusters"); "Number of training points (%ld) should be at least "
"as large as number of clusters (%ld)", nx, k);
double t0 = getmillisecs(); double t0 = getmillisecs();
...@@ -100,12 +101,26 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) { ...@@ -100,12 +101,26 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) {
} }
if (nx == k) {
if (verbose) {
printf("Number of training points (%ld) same as number of "
"clusters, just copying\n", nx);
}
// this is a corner case, just copy training set to clusters
centroids.resize (d * k);
memcpy (centroids.data(), x_in, sizeof (*x_in) * d * k);
return;
}
if (verbose) if (verbose)
printf("Clustering %d points in %ldD to %ld clusters, " printf("Clustering %d points in %ldD to %ld clusters, "
"redo %d times, %d iterations\n", "redo %d times, %d iterations\n",
int(nx), d, k, nredo, niter); int(nx), d, k, nredo, niter);
idx_t * assign = new idx_t[nx]; idx_t * assign = new idx_t[nx];
ScopeDeleter<idx_t> del (assign); ScopeDeleter<idx_t> del (assign);
float * dis = new float[nx]; float * dis = new float[nx];
......
...@@ -179,7 +179,7 @@ void maxheap_push (size_t k, T * bh_val, long * bh_ids, T val, long ids) ...@@ -179,7 +179,7 @@ void maxheap_push (size_t k, T * bh_val, long * bh_ids, T val, long ids)
* Heap initialization * Heap initialization
*******************************************************************/ *******************************************************************/
/* Initialization phase for the heap (with inconditionnal pushes). /* Initialization phase for the heap (with unconditionnal pushes).
* Store k0 elements in a heap containing up to k values. Note that * Store k0 elements in a heap containing up to k values. Note that
* (bh_val, bh_ids) can be the same as (x, ids) */ * (bh_val, bh_ids) can be the same as (x, ids) */
template <class C> inline template <class C> inline
......
...@@ -11,8 +11,19 @@ ...@@ -11,8 +11,19 @@
#include "IndexFlat.h" #include "IndexFlat.h"
#include "FaissAssert.h" #include "FaissAssert.h"
#include <cstring>
namespace faiss { namespace faiss {
Index::~Index ()
{
}
void Index::train(idx_t /*n*/, const float* /*x*/) {
// does nothing by default
}
void Index::range_search (idx_t , const float *, float, void Index::range_search (idx_t , const float *, float,
RangeSearchResult *) const RangeSearchResult *) const
...@@ -52,6 +63,25 @@ void Index::reconstruct_n (idx_t i0, idx_t ni, float *recons) const { ...@@ -52,6 +63,25 @@ void Index::reconstruct_n (idx_t i0, idx_t ni, float *recons) const {
} }
void Index::search_and_reconstruct (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels,
float *recons) const {
search (n, x, k, distances, labels);
for (idx_t i = 0; i < n; ++i) {
for (idx_t j = 0; j < k; ++j) {
idx_t ij = i * k + j;
idx_t key = labels[ij];
float* reconstructed = recons + ij * d;
if (key < 0) {
// Fill with NaNs
memset(reconstructed, -1, sizeof(*reconstructed) * d);
} else {
reconstruct (key, reconstructed);
}
}
}
}
void Index::compute_residual (const float * x, void Index::compute_residual (const float * x,
float * residual, idx_t key) const { float * residual, idx_t key) const {
......
...@@ -71,14 +71,14 @@ struct Index { ...@@ -71,14 +71,14 @@ struct Index {
/// type of metric this index uses for search /// type of metric this index uses for search
MetricType metric_type; MetricType metric_type;
explicit Index (idx_t d = 0, MetricType metric = METRIC_INNER_PRODUCT): explicit Index (idx_t d = 0, MetricType metric = METRIC_L2):
d(d), d(d),
ntotal(0), ntotal(0),
verbose(false), verbose(false),
is_trained(true), is_trained(true),
metric_type (metric) {} metric_type (metric) {}
virtual ~Index () { } virtual ~Index ();
/** Perform training on a representative set of vectors /** Perform training on a representative set of vectors
...@@ -86,9 +86,7 @@ struct Index { ...@@ -86,9 +86,7 @@ struct Index {
* @param n nb of training vectors * @param n nb of training vectors
* @param x training vecors, size n * d * @param x training vecors, size n * d
*/ */
virtual void train(idx_t /*n*/, const float* /*x*/) { virtual void train(idx_t n, const float* x);
// does nothing by default
}
/** Add n vectors of dimension d to the index. /** Add n vectors of dimension d to the index.
* *
...@@ -164,6 +162,17 @@ struct Index { ...@@ -164,6 +162,17 @@ struct Index {
*/ */
virtual void reconstruct_n (idx_t i0, idx_t ni, float *recons) const; virtual void reconstruct_n (idx_t i0, idx_t ni, float *recons) const;
/** Similar to search, but also reconstructs the stored vectors (or an
* approximation in the case of lossy coding) for the search results.
*
* If there are not enough results for a query, the resulting arrays
* is padded with -1s.
*
* @param recons reconstructed vectors size (n, k, d)
**/
virtual void search_and_reconstruct (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels,
float *recons) const;
/** Computes a residual vector after indexing encoding. /** Computes a residual vector after indexing encoding.
* *
......
/**
* Copyright (c) 2015-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD+Patents license found in the
* LICENSE file in the root directory of this source tree.
*/
#include "IndexHNSW.h"
#include <cstdlib>
#include <cassert>
#include <cstring>
#include <cstdio>
#include <cmath>
#include <omp.h>
#include <unordered_set>
#include <queue>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <stdint.h>
#include <immintrin.h>
#include "utils.h"
#include "Heap.h"
#include "FaissAssert.h"
#include "IndexFlat.h"
#include "IndexIVFPQ.h"
extern "C" {
/* declare BLAS functions, see http://www.netlib.org/clapack/cblas/ */
int sgemm_ (const char *transa, const char *transb, FINTEGER *m, FINTEGER *
n, FINTEGER *k, const float *alpha, const float *a,
FINTEGER *lda, const float *b, FINTEGER *
ldb, float *beta, float *c, FINTEGER *ldc);
}
namespace faiss {
/**************************************************************
* Auxiliary structures
**************************************************************/
/// set implementation optimized for fast access.
struct VisitedTable {
std::vector<uint8_t> visited;
int visno;
VisitedTable(int size):
visited(size), visno(1)
{}
/// set flog #no to true
void set(int no) {
visited[no] = visno;
}
/// get flag #no
bool get(int no) const {
return visited[no] == visno;
}
/// reset all flags to false
void advance() {
visno++;
if (visno == 250) {
// 250 rather than 255 because sometimes we use visno and visno+1
memset (visited.data(), 0, sizeof(visited[0]) * visited.size());
visno = 1;
}
}
};
namespace {
typedef HNSW::idx_t idx_t;
typedef HNSW::storage_idx_t storage_idx_t;
typedef HNSW::DistanceComputer DistanceComputer;
// typedef ::faiss::VisitedTable VisitedTable;
/// to sort pairs of (id, distance) from nearest to fathest or the reverse
struct NodeDistCloser {
float d;
int id;
NodeDistCloser(float d, int id): d(d), id(id) {}
bool operator<(const NodeDistCloser &obj1) const { return d < obj1.d; }
};
struct NodeDistFarther {
float d;
int id;
NodeDistFarther(float d, int id): d(d), id(id) {}
bool operator<(const NodeDistFarther &obj1) const { return d > obj1.d; }
};
/** Heap structure that allows fast */
struct MinimaxHeap {
int n;
int k;
int nvalid;
std::vector<storage_idx_t> ids;
std::vector<float> dis;
typedef faiss::CMax<float, storage_idx_t> HC;
explicit MinimaxHeap(int n): n(n), k(0), nvalid(0), ids(n), dis(n) {}
void push(storage_idx_t i, float v)
{
if (k == n) {
if (v >= dis[0]) return;
faiss::heap_pop<HC> (k--, dis.data(), ids.data());
nvalid--;
}
faiss::heap_push<HC> (++k, dis.data(), ids.data(), v, i);
nvalid++;
}
float max() const
{
return dis[0];
}
int size() const {return nvalid;}
void clear() {nvalid = k = 0; }
int pop_min(float *vmin_out = nullptr)
{
assert(k > 0);
// returns min. This is an O(n) operation
int i = k - 1;
while (i >= 0) {
if (ids[i] != -1) break;
i--;
}
if (i == -1) return -1;
int imin = i;
float vmin = dis[i];
i--;
while(i >= 0) {
if (ids[i] != -1 && dis[i] < vmin) {
vmin = dis[i];
imin = i;
}
i--;
}
if (vmin_out) *vmin_out = vmin;
int ret = ids[imin];
ids[imin] = -1;
nvalid --;
return ret;
}
int count_below(float thresh) {
float n_below = 0;
for(int i = 0; i < k; i++) {
if (dis[i] < thresh)
n_below++;
}
return n_below;
}
};
/**************************************************************
* Addition subroutines
**************************************************************/
/** Enumerate vertices from farthest to nearest from query, keep a
* neighbor only if there is no previous neighbor that is closer to
* that vertex than the query.
*/
void shrink_neighbor_list(DistanceComputer & qdis,
std::priority_queue<NodeDistFarther> &input,
std::vector<NodeDistFarther> &output,
int max_size)
{
while (input.size() > 0) {
NodeDistFarther v1 = input.top();
input.pop();
float dist_v1_q = v1.d;
bool good = true;
for (NodeDistFarther v2 : output) {
float dist_v1_v2 = qdis.symmetric_dis(v2.id, v1.id);
if (dist_v1_v2 < dist_v1_q) {
good = false;
break;
}
}
if (good) {
output.push_back(v1);
if (output.size() >= max_size)
return;
}
}
}
/// remove neighbors from the list to make it smaller than max_size
void shrink_neighbor_list(DistanceComputer & qdis,
std::priority_queue<NodeDistCloser> &resultSet1,
int max_size)
{
if (resultSet1.size() < max_size) {
return;
}
std::priority_queue<NodeDistFarther> resultSet;
std::vector<NodeDistFarther> returnlist;
while (resultSet1.size() > 0) {
resultSet.emplace(resultSet1.top().d, resultSet1.top().id);
resultSet1.pop();
}
shrink_neighbor_list (qdis, resultSet, returnlist, max_size);
for (NodeDistFarther curen2 : returnlist) {
resultSet1.emplace(curen2.d, curen2.id);
}
}
/// add a link between two elements, possibly shrinking the list
/// of links to make room for it.
void add_link(HNSW & hnsw,
DistanceComputer & qdis,
storage_idx_t src, storage_idx_t dest,
int level)
{
size_t begin, end;
hnsw.neighbor_range(src, level, &begin, &end);
if (hnsw.neighbors[end - 1] == -1) {
// there is enough room, find a slot to add it
size_t i = end;
while(i > begin) {
if (hnsw.neighbors[i - 1] != -1) break;
i--;
}
hnsw.neighbors[i] = dest;
return;
}
// otherwise we let them fight out which to keep
// copy to resultSet...
std::priority_queue<NodeDistCloser> resultSet;
resultSet.emplace(qdis.symmetric_dis(src, dest), dest);
for (size_t i = begin; i < end; i++) { // HERE WAS THE BUG
storage_idx_t neigh = hnsw.neighbors[i];
resultSet.emplace(qdis.symmetric_dis(src, neigh), neigh);
}
shrink_neighbor_list(qdis, resultSet, end - begin);
// ...and back
size_t i = begin;
while (resultSet.size()) {
hnsw.neighbors[i++] = resultSet.top().id;
resultSet.pop();
}
// they may have shrunk more than just by 1 element
while(i < end) {
hnsw.neighbors[i++] = -1;
}
}
/// search neighbors on a single level, starting from an entry point
void search_neighbors_to_add(HNSW & hnsw,
DistanceComputer &qdis,
std::priority_queue<NodeDistCloser> &results,
int entry_point,
float d_entry_point,
int level,
VisitedTable &vt)
{
// top is nearest candidate
std::priority_queue<NodeDistFarther> candidates;
NodeDistFarther ev(d_entry_point, entry_point);
candidates.push(ev);
results.emplace(d_entry_point, entry_point);
vt.set(entry_point);
while (!candidates.empty()) {
// get nearest
const NodeDistFarther &currEv = candidates.top();
if (currEv.d > results.top().d) {
break;
}
int currNode = currEv.id;
candidates.pop();
// loop over neighbors
size_t begin, end;
hnsw.neighbor_range(currNode, level, &begin, &end);
for(size_t i = begin; i < end; i++) {
storage_idx_t nodeId = hnsw.neighbors[i];
if (nodeId < 0) break;
if (vt.get(nodeId)) continue;
vt.set(nodeId);
float dis = qdis(nodeId);
NodeDistFarther evE1(dis, nodeId);
if (results.size() < hnsw.efConstruction ||
results.top().d > dis) {
results.emplace(dis, nodeId);
candidates.emplace(dis, nodeId);
if (results.size() > hnsw.efConstruction) {
results.pop();
}
}
}
}
vt.advance();
}
/// Finds neighbors and builds links with them, starting from an entry
/// point. The own neighbor list is assumed to be locked.
void add_links_starting_from(HNSW & hnsw,
DistanceComputer &ptdis,
storage_idx_t pt_id,
storage_idx_t nearest,
float d_nearest,
int level,
omp_lock_t * locks,
VisitedTable &vt)
{
std::priority_queue<NodeDistCloser> link_targets;
search_neighbors_to_add(
hnsw, ptdis, link_targets, nearest, d_nearest,
level, vt);
// but we can afford only this many neighbors
int M = hnsw.nb_neighbors(level);
shrink_neighbor_list(ptdis, link_targets, M);
while (!link_targets.empty()) {
int other_id = link_targets.top().id;
omp_set_lock(&locks[other_id]);
add_link(hnsw, ptdis, other_id, pt_id, level);
omp_unset_lock(&locks[other_id]);
add_link(hnsw, ptdis, pt_id, other_id, level);
link_targets.pop();
}
}
/**************************************************************
* Searching subroutines
**************************************************************/
/// greedily update a nearest vector at a given level
void greedy_update_nearest(const HNSW & hnsw,
DistanceComputer & qdis,
int level,
storage_idx_t & nearest,
float & d_nearest)
{
for(;;) {
storage_idx_t prev_nearest = nearest;
size_t begin, end;
hnsw.neighbor_range(nearest, level, &begin, &end);
for(size_t i = begin; i < end; i++) {
storage_idx_t v = hnsw.neighbors[i];
if (v < 0) break;
float dis = qdis(v);
if (dis < d_nearest) {
nearest = v;
d_nearest = dis;
}
}
if (nearest == prev_nearest) {
return;
}
}
}
/** Do a BFS on the candidates list */
int search_from_candidates(const HNSW & hnsw,
DistanceComputer & qdis, int k,
idx_t *I, float * D,
MinimaxHeap &candidates,
VisitedTable &vt,
int level, int nres_in = 0)
{
int nres = nres_in;
int ndis = 0;
for (int i = 0; i < candidates.size(); i++) {
idx_t v1 = candidates.ids[i];
float d = candidates.dis[i];
FAISS_ASSERT(v1 >= 0);
if (nres < k) {
faiss::maxheap_push (++nres, D, I, d, v1);
} else if (d < D[0]) {
faiss::maxheap_pop (nres--, D, I);
faiss::maxheap_push (++nres, D, I, d, v1);
}
vt.set(v1);
}
bool do_dis_check = hnsw.check_relative_distance;
int nstep = 0;
while (candidates.size() > 0) {
float d0 = 0;
int v0 = candidates.pop_min(&d0);
if (do_dis_check) {
// tricky stopping condition: there are more that ef
// distances that are processed already that are smaller
// than d0
int n_dis_below = candidates.count_below(d0);
if(n_dis_below >= hnsw.efSearch) {
break;
}
}
size_t begin, end;
hnsw.neighbor_range(v0, level, &begin, &end);
for (size_t j = begin; j < end; j++) {
int v1 = hnsw.neighbors[j];
if (v1 < 0) break;
if (vt.get(v1)) {
continue;
}
vt.set(v1);
ndis++;
float d = qdis(v1);
if (nres < k) {
faiss::maxheap_push (++nres, D, I, d, v1);
} else if (d < D[0]) {
faiss::maxheap_pop (nres--, D, I);
faiss::maxheap_push (++nres, D, I, d, v1);
}
candidates.push(v1, d);
}
nstep++;
if (!do_dis_check && nstep > hnsw.efSearch) {
break;
}
}
if (level == 0) {
#pragma omp critical
{
hnsw_stats.n1 ++;
if (candidates.size() == 0)
hnsw_stats.n2 ++;
hnsw_stats.n3 += ndis;
}
}
return nres;
}
} // anonymous namespace
/**************************************************************
* HNSW structure implementation
**************************************************************/
int HNSW::nb_neighbors(int layer_no) const
{
return cum_nneighbor_per_level[layer_no + 1] -
cum_nneighbor_per_level[layer_no];
}
void HNSW::set_nb_neighbors(int level_no, int n)
{
FAISS_THROW_IF_NOT(levels.size() == 0);
int cur_n = nb_neighbors(level_no);
for (int i = level_no + 1; i < cum_nneighbor_per_level.size(); i++) {
cum_nneighbor_per_level[i] += n - cur_n;
}
}
int HNSW::cum_nb_neighbors(int layer_no) const
{
return cum_nneighbor_per_level[layer_no];
}
void HNSW::neighbor_range(idx_t no, int layer_no,
size_t * begin, size_t * end) const
{
size_t o = offsets[no];
*begin = o + cum_nb_neighbors(layer_no);
*end = o + cum_nb_neighbors(layer_no + 1);
}
HNSW::HNSW(int M): rng(12345) {
set_default_probas(M, 1.0 / log(M));
max_level = -1;
entry_point = -1;
efSearch = 16;
check_relative_distance = true;
efConstruction = 40;
upper_beam = 1;
offsets.push_back(0);
}
int HNSW::random_level()
{
double f = rng.rand_float();
// could be a bit faster with bissection
for (int level = 0; level < assign_probas.size(); level++) {
if (f < assign_probas[level]) {
return level;
}
f -= assign_probas[level];
}
// happens with exponentially low probability
return assign_probas.size() - 1;
}
void HNSW::set_default_probas(int M, float levelMult)
{
int nn = 0;
cum_nneighbor_per_level.push_back (0);
for (int level = 0; ;level++) {
float proba = exp(-level / levelMult) * (1 - exp(-1 / levelMult));
if (proba < 1e-9) break;
assign_probas.push_back(proba);
nn += level == 0 ? M * 2 : M;
cum_nneighbor_per_level.push_back (nn);
}
}
void HNSW::clear_neighbor_tables(int level)
{
for (int i = 0; i < levels.size(); i++) {
size_t begin, end;
neighbor_range(i, level, &begin, &end);
for (size_t j = begin; j < end; j++)
neighbors[j] = -1;
}
}
void HNSW::reset() {
max_level = -1;
entry_point = -1;
offsets.clear();
offsets.push_back(0);
levels.clear();
neighbors.clear();
}
void HNSW::print_neighbor_stats(int level) const
{
FAISS_THROW_IF_NOT (level < cum_nneighbor_per_level.size());
printf("stats on level %d, max %d neighbors per vertex:\n",
level, nb_neighbors(level));
size_t tot_neigh = 0, tot_common = 0, tot_reciprocal = 0, n_node = 0;
#pragma omp parallel for reduction(+: tot_neigh) reduction(+: tot_common) \
reduction(+: tot_reciprocal) reduction(+: n_node)
for (int i = 0; i < levels.size(); i++) {
if (levels[i] > level) {
n_node++;
size_t begin, end;
neighbor_range(i, level, &begin, &end);
std::unordered_set<int> neighset;
for (size_t j = begin; j < end; j++) {
if (neighbors [j] < 0) break;
neighset.insert(neighbors[j]);
}
int n_neigh = neighset.size();
int n_common = 0;
int n_reciprocal = 0;
for (size_t j = begin; j < end; j++) {
storage_idx_t i2 = neighbors[j];
if (i2 < 0) break;
FAISS_ASSERT(i2 != i);
size_t begin2, end2;
neighbor_range(i2, level, &begin2, &end2);
for (size_t j2 = begin2; j2 < end2; j2++) {
storage_idx_t i3 = neighbors[j2];
if (i3 < 0) break;
if (i3 == i) {
n_reciprocal++;
continue;
}
if (neighset.count(i3)) {
neighset.erase(i3);
n_common++;
}
}
}
tot_neigh += n_neigh;
tot_common += n_common;
tot_reciprocal += n_reciprocal;
}
}
float normalizer = n_node;
printf(" nb of nodes at that level %ld\n", n_node);
printf(" neighbors per node: %.2f (%ld)\n", tot_neigh / normalizer, tot_neigh);
printf(" nb of reciprocal neighbors: %.2f\n", tot_reciprocal / normalizer);
printf(" nb of neighbors that are also neighbor-of-neighbors: %.2f (%ld)\n",
tot_common / normalizer, tot_common);
}
HNSWStats hnsw_stats;
void HNSWStats::reset ()
{
memset(this, 0, sizeof(*this));
}
/**************************************************************
* Building, parallel
**************************************************************/
void HNSW::add_with_locks(
DistanceComputer & ptdis, int pt_level, int pt_id,
std::vector<omp_lock_t> & locks,
VisitedTable &vt)
{
// greedy search on upper levels
storage_idx_t nearest;
#pragma omp critical
{
nearest = entry_point;
if (nearest == -1) {
max_level = pt_level;
entry_point = pt_id;
}
}
if (nearest < 0) {
return;
}
omp_set_lock(&locks[pt_id]);
int level = max_level; // level at which we start adding neighbors
float d_nearest = ptdis(nearest);
for(; level > pt_level; level--) {
greedy_update_nearest(*this, ptdis, level, nearest, d_nearest);
}
for(; level >= 0; level--) {
add_links_starting_from(*this, ptdis, pt_id, nearest, d_nearest,
level, locks.data(), vt);
}
omp_unset_lock(&locks[pt_id]);
if (pt_level > max_level) {
max_level = pt_level;
entry_point = pt_id;
}
}
/**************************************************************
* Searching
**************************************************************/
void HNSW::search(DistanceComputer & qdis,
int k, idx_t *I, float * D,
VisitedTable &vt) const
{
if (upper_beam == 1) {
// greedy search on upper levels
storage_idx_t nearest = entry_point;
float d_nearest = qdis(nearest);
for(int level = max_level; level >= 1; level--) {
greedy_update_nearest(*this, qdis, level, nearest, d_nearest);
}
int candidates_size = std::max(efSearch, k);
MinimaxHeap candidates(candidates_size);
candidates.push(nearest, d_nearest);
search_from_candidates (
*this, qdis, k, I, D, candidates, vt, 0);
vt.advance();
} else {
int candidates_size = upper_beam;
MinimaxHeap candidates(candidates_size);
std::vector<idx_t> I_to_next(candidates_size);
std::vector<float> D_to_next(candidates_size);
int nres = 1;
I_to_next[0] = entry_point;
D_to_next[0] = qdis(entry_point);
for(int level = max_level; level >= 0; level--) {
// copy I, D -> candidates
candidates.clear();
for (int i = 0; i < nres; i++) {
candidates.push(I_to_next[i], D_to_next[i]);
}
if (level == 0) {
nres = search_from_candidates (
*this, qdis, k, I, D, candidates, vt, 0);
} else {
nres = search_from_candidates (
*this, qdis, candidates_size,
I_to_next.data(), D_to_next.data(),
candidates, vt, level);
}
vt.advance();
}
}
}
/**************************************************************
* add / search blocks of descriptors
**************************************************************/
namespace {
int prepare_level_tab (HNSW & hnsw, size_t n, bool preset_levels = false)
{
size_t n0 = hnsw.offsets.size() - 1;
if (preset_levels) {
FAISS_ASSERT (n0 + n == hnsw.levels.size());
} else {
FAISS_ASSERT (n0 == hnsw.levels.size());
for (int i = 0; i < n; i++) {
int pt_level = hnsw.random_level();
hnsw.levels.push_back(pt_level + 1);
}
}
int max_level = 0;
for (int i = 0; i < n; i++) {
int pt_level = hnsw.levels[i + n0] - 1;
if (pt_level > max_level) max_level = pt_level;
hnsw.offsets.push_back(hnsw.offsets.back() +
hnsw.cum_nb_neighbors(pt_level + 1));
hnsw.neighbors.resize(hnsw.offsets.back(), -1);
}
return max_level;
}
void hnsw_add_vertices(IndexHNSW &index_hnsw,
size_t n0,
size_t n, const float *x,
bool verbose,
bool preset_levels = false) {
HNSW & hnsw = index_hnsw.hnsw;
size_t ntotal = n0 + n;
double t0 = getmillisecs();
if (verbose) {
printf("hnsw_add_vertices: adding %ld elements on top of %ld "
"(preset_levels=%d)\n",
n, n0, int(preset_levels));
}
int max_level = prepare_level_tab (index_hnsw.hnsw, n, preset_levels);
if (verbose) {
printf(" max_level = %d\n", max_level);
}
std::vector<omp_lock_t> locks(ntotal);
for(int i = 0; i < ntotal; i++)
omp_init_lock(&locks[i]);
// add vectors from highest to lowest level
std::vector<int> hist;
std::vector<int> order(n);
{ // make buckets with vectors of the same level
// build histogram
for (int i = 0; i < n; i++) {
storage_idx_t pt_id = i + n0;
int pt_level = hnsw.levels[pt_id] - 1;
while (pt_level >= hist.size())
hist.push_back(0);
hist[pt_level] ++;
}
// accumulate
std::vector<int> offsets(hist.size() + 1, 0);
for (int i = 0; i < hist.size() - 1; i++) {
offsets[i + 1] = offsets[i] + hist[i];
}
// bucket sort
for (int i = 0; i < n; i++) {
storage_idx_t pt_id = i + n0;
int pt_level = hnsw.levels[pt_id] - 1;
order[offsets[pt_level]++] = pt_id;
}
}
{ // perform add
RandomGenerator rng2(789);
int i1 = n;
for (int pt_level = hist.size() - 1; pt_level >= 0; pt_level--) {
int i0 = i1 - hist[pt_level];
if (verbose) {
printf("Adding %d elements at level %d\n",
i1 - i0, pt_level);
}
// random permutation to get rid of dataset order bias
for (int j = i0; j < i1; j++)
std::swap(order[j], order[j + rng2.rand_int(i1 - j)]);
#pragma omp parallel
{
VisitedTable vt (ntotal);
DistanceComputer *dis = index_hnsw.get_distance_computer();
ScopeDeleter1<DistanceComputer> del(dis);
int prev_display = verbose && omp_get_thread_num() == 0 ? 0 : -1;
#pragma omp for schedule(dynamic)
for (int i = i0; i < i1; i++) {
storage_idx_t pt_id = order[i];
dis->set_query (x + (pt_id - n0) * dis->d);
hnsw.add_with_locks (
*dis, pt_level, pt_id, locks,
vt);
if (prev_display >= 0 && i - i0 > prev_display + 10000) {
prev_display = i - i0;
printf(" %d / %d\r", i - i0, i1 - i0);
fflush(stdout);
}
}
}
i1 = i0;
}
FAISS_ASSERT(i1 == 0);
}
if (verbose)
printf("Done in %.3f ms\n", getmillisecs() - t0);
for(int i = 0; i < ntotal; i++)
omp_destroy_lock(&locks[i]);
}
} // anonymous namespace
void HNSW::fill_with_random_links(size_t n)
{
int max_level = prepare_level_tab (*this, n);
RandomGenerator rng2(456);
for (int level = max_level - 1; level >= 0; level++) {
std::vector<int> elts;
for (int i = 0; i < n; i++) {
if (levels[i] > level) {
elts.push_back(i);
}
}
printf ("linking %ld elements in level %d\n",
elts.size(), level);
if (elts.size() == 1) continue;
for (int ii = 0; ii < elts.size(); ii++) {
int i = elts[ii];
size_t begin, end;
neighbor_range(i, 0, &begin, &end);
for (size_t j = begin; j < end; j++) {
int other = 0;
do {
other = elts[rng2.rand_int(elts.size())];
} while(other == i);
neighbors[j] = other;
}
}
}
}
/**************************************************************
* IndexHNSW implementation
**************************************************************/
IndexHNSW::IndexHNSW(int d, int M):
Index(d, METRIC_L2),
hnsw(M),
own_fields(false),
storage(nullptr),
reconstruct_from_neighbors(nullptr)
{}
IndexHNSW::IndexHNSW(Index *storage, int M):
Index(storage->d, METRIC_L2),
hnsw(M),
own_fields(false),
storage(storage),
reconstruct_from_neighbors(nullptr)
{}
IndexHNSW::~IndexHNSW() {
if (own_fields) {
delete storage;
}
}
void IndexHNSW::train(idx_t n, const float* x)
{
// hnsw structure does not require training
storage->train (n, x);
is_trained = true;
}
void IndexHNSW::search (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels) const
{
#pragma omp parallel
{
VisitedTable vt (ntotal);
DistanceComputer *dis = get_distance_computer();
ScopeDeleter1<DistanceComputer> del(dis);
size_t nreorder = 0;
#pragma omp for
for(int i = 0; i < n; i++) {
idx_t * idxi = labels + i * k;
float * simi = distances + i * k;
dis->set_query(x + i * d);
maxheap_heapify (k, simi, idxi);
hnsw.search (*dis, k, idxi, simi, vt);
maxheap_reorder (k, simi, idxi);
if (reconstruct_from_neighbors &&
reconstruct_from_neighbors->k_reorder != 0) {
int k_reorder = reconstruct_from_neighbors->k_reorder;
if (k_reorder == -1 || k_reorder > k) k_reorder = k;
nreorder += reconstruct_from_neighbors->compute_distances(
k_reorder, idxi, x + i * d, simi);
// sort top k_reorder
maxheap_heapify (k_reorder, simi, idxi, simi, idxi, k_reorder);
maxheap_reorder (k_reorder, simi, idxi);
}
}
#pragma omp critical
{
hnsw_stats.nreorder += nreorder;
}
}
}
void IndexHNSW::add(idx_t n, const float *x)
{
FAISS_THROW_IF_NOT(is_trained);
int n0 = ntotal;
storage->add(n, x);
ntotal = storage->ntotal;
hnsw_add_vertices (*this, n0, n, x, verbose,
hnsw.levels.size() == ntotal);
}
void IndexHNSW::reset()
{
hnsw.reset();
storage->reset();
ntotal = 0;
}
void IndexHNSW::reconstruct (idx_t key, float* recons) const
{
storage->reconstruct(key, recons);
}
void IndexHNSW::shrink_level_0_neighbors(int new_size)
{
#pragma omp parallel
{
DistanceComputer *dis = get_distance_computer();
ScopeDeleter1<DistanceComputer> del(dis);
#pragma omp for
for (idx_t i = 0; i < ntotal; i++) {
size_t begin, end;
hnsw.neighbor_range(i, 0, &begin, &end);
std::priority_queue<NodeDistFarther> initial_list;
for (size_t j = begin; j < end; j++) {
int v1 = hnsw.neighbors[j];
if (v1 < 0) break;
initial_list.emplace(dis->symmetric_dis(i, v1), v1);
// initial_list.emplace(qdis(v1), v1);
}
std::vector<NodeDistFarther> shrunk_list;
shrink_neighbor_list (*dis, initial_list, shrunk_list, new_size);
for (size_t j = begin; j < end; j++) {
if (j - begin < shrunk_list.size())
hnsw.neighbors[j] = shrunk_list[j - begin].id;
else
hnsw.neighbors[j] = -1;
}
}
}
}
void IndexHNSW::search_level_0(
idx_t n, const float *x, idx_t k,
const storage_idx_t *nearest, const float *nearest_d,
float *distances, idx_t *labels, int nprobe,
int search_type) const
{
storage_idx_t ntotal = hnsw.levels.size();
#pragma omp parallel
{
DistanceComputer *qdis = get_distance_computer();
ScopeDeleter1<DistanceComputer> del(qdis);
VisitedTable vt (ntotal);
#pragma omp for
for(idx_t i = 0; i < n; i++) {
idx_t * idxi = labels + i * k;
float * simi = distances + i * k;
qdis->set_query(x + i * d);
maxheap_heapify (k, simi, idxi);
if (search_type == 1) {
int nres = 0;
for(int j = 0; j < nprobe; j++) {
storage_idx_t cj = nearest[i * nprobe + j];
if (cj < 0) break;
if (vt.get(cj)) continue;
int candidates_size = std::max(hnsw.efSearch, int(k));
MinimaxHeap candidates(candidates_size);
candidates.push(cj, nearest_d[i * nprobe + j]);
nres = search_from_candidates (
hnsw, *qdis, k, idxi, simi,
candidates, vt, 0, nres);
}
} else if (search_type == 2) {
int candidates_size = std::max(hnsw.efSearch, int(k));
candidates_size = std::max(candidates_size, nprobe);
MinimaxHeap candidates(candidates_size);
for(int j = 0; j < nprobe; j++) {
storage_idx_t cj = nearest[i * nprobe + j];
if (cj < 0) break;
candidates.push(cj, nearest_d[i * nprobe + j]);
}
search_from_candidates (
hnsw, *qdis, k, idxi, simi,
candidates, vt, 0);
}
vt.advance();
maxheap_reorder (k, simi, idxi);
}
}
}
void IndexHNSW::init_level_0_from_knngraph(
int k, const float *D, const idx_t *I)
{
int dest_size = hnsw.nb_neighbors (0);
#pragma omp parallel for
for (idx_t i = 0; i < ntotal; i++) {
DistanceComputer *qdis = get_distance_computer();
float vec[d];
storage->reconstruct(i, vec);
qdis->set_query(vec);
std::priority_queue<NodeDistFarther> initial_list;
for (size_t j = 0; j < k; j++) {
int v1 = I[i * k + j];
if (v1 == i) continue;
if (v1 < 0) break;
initial_list.emplace(D[i * k + j], v1);
}
std::vector<NodeDistFarther> shrunk_list;
shrink_neighbor_list (*qdis, initial_list, shrunk_list, dest_size);
size_t begin, end;
hnsw.neighbor_range(i, 0, &begin, &end);
for (size_t j = begin; j < end; j++) {
if (j - begin < shrunk_list.size())
hnsw.neighbors[j] = shrunk_list[j - begin].id;
else
hnsw.neighbors[j] = -1;
}
}
}
void IndexHNSW::init_level_0_from_entry_points(
int n, const storage_idx_t *points,
const storage_idx_t *nearests)
{
std::vector<omp_lock_t> locks(ntotal);
for(int i = 0; i < ntotal; i++)
omp_init_lock(&locks[i]);
#pragma omp parallel
{
VisitedTable vt (ntotal);
DistanceComputer *dis = get_distance_computer();
ScopeDeleter1<DistanceComputer> del(dis);
float vec[storage->d];
#pragma omp for schedule(dynamic)
for (int i = 0; i < n; i++) {
storage_idx_t pt_id = points[i];
storage_idx_t nearest = nearests[i];
storage->reconstruct (pt_id, vec);
dis->set_query (vec);
add_links_starting_from(hnsw, *dis, pt_id, nearest, (*dis)(nearest),
0, locks.data(), vt);
if (verbose && i % 10000 == 0) {
printf(" %d / %d\r", i, n);
fflush(stdout);
}
}
}
if (verbose) {
printf("\n");
}
for(int i = 0; i < ntotal; i++)
omp_destroy_lock(&locks[i]);
}
void IndexHNSW::reorder_links()
{
int M = hnsw.nb_neighbors(0);
#pragma omp parallel
{
std::vector<float> distances (M);
std::vector<size_t> order (M);
std::vector<storage_idx_t> tmp (M);
DistanceComputer *dis = get_distance_computer();
ScopeDeleter1<DistanceComputer> del(dis);
#pragma omp for
for(storage_idx_t i = 0; i < ntotal; i++) {
size_t begin, end;
hnsw.neighbor_range(i, 0, &begin, &end);
for (size_t j = begin; j < end; j++) {
storage_idx_t nj = hnsw.neighbors[j];
if (nj < 0) {
end = j;
break;
}
distances[j - begin] = dis->symmetric_dis(i, nj);
tmp [j - begin] = nj;
}
fvec_argsort (end - begin, distances.data(), order.data());
for (size_t j = begin; j < end; j++) {
hnsw.neighbors[j] = tmp[order[j - begin]];
}
}
}
}
void IndexHNSW::link_singletons()
{
printf("search for singletons\n");
std::vector<bool> seen(ntotal);
for (size_t i = 0; i < ntotal; i++) {
size_t begin, end;
hnsw.neighbor_range(i, 0, &begin, &end);
for (size_t j = begin; j < end; j++) {
storage_idx_t ni = hnsw.neighbors[j];
if (ni >= 0) seen[ni] = true;
}
}
int n_sing = 0, n_sing_l1 = 0;
std::vector<storage_idx_t> singletons;
for (storage_idx_t i = 0; i < ntotal; i++) {
if (!seen[i]) {
singletons.push_back(i);
n_sing++;
if (hnsw.levels[i] > 1)
n_sing_l1++;
}
}
printf(" Found %d / %ld singletons (%d appear in a level above)\n",
n_sing, ntotal, n_sing_l1);
std::vector<float>recons(singletons.size() * d);
for (int i = 0; i < singletons.size(); i++) {
FAISS_ASSERT(!"not implemented");
}
}
// storage that explicitly reconstructs vectors before computing distances
struct GenericDistanceComputer: HNSW::DistanceComputer {
const Index & storage;
std::vector<float> buf;
const float *q;
GenericDistanceComputer(const Index & storage): storage(storage)
{
d = storage.d;
buf.resize(d * 2);
}
float operator () (storage_idx_t i) override
{
storage.reconstruct(i, buf.data());
return fvec_L2sqr(q, buf.data(), d);
}
float symmetric_dis(storage_idx_t i, storage_idx_t j) override
{
storage.reconstruct(i, buf.data());
storage.reconstruct(j, buf.data() + d);
return fvec_L2sqr(buf.data() + d, buf.data(), d);
}
void set_query(const float *x) override {
q = x;
}
};
HNSW::DistanceComputer * IndexHNSW::get_distance_computer () const
{
return new GenericDistanceComputer (*storage);
}
/**************************************************************
* ReconstructFromNeighbors implementation
**************************************************************/
ReconstructFromNeighbors::ReconstructFromNeighbors(
const IndexHNSW & index, size_t k, size_t nsq):
index(index), k(k), nsq(nsq) {
M = index.hnsw.nb_neighbors(0);
FAISS_ASSERT(k <= 256);
code_size = k == 1 ? 0 : nsq;
ntotal = 0;
d = index.d;
FAISS_ASSERT(d % nsq == 0);
dsub = d / nsq;
k_reorder = -1;
}
void ReconstructFromNeighbors::reconstruct(storage_idx_t i, float *x, float *tmp) const
{
const HNSW & hnsw = index.hnsw;
size_t begin, end;
hnsw.neighbor_range(i, 0, &begin, &end);
if (k == 1 || nsq == 1) {
const float * beta;
if (k == 1) {
beta = codebook.data();
} else {
int idx = codes[i];
beta = codebook.data() + idx * (M + 1);
}
float w0 = beta[0]; // weight of image itself
index.storage->reconstruct(i, tmp);
for (int l = 0; l < d; l++)
x[l] = w0 * tmp[l];
for (size_t j = begin; j < end; j++) {
storage_idx_t ji = hnsw.neighbors[j];
if (ji < 0) ji = i;
float w = beta[j - begin + 1];
index.storage->reconstruct(ji, tmp);
for (int l = 0; l < d; l++)
x[l] += w * tmp[l];
}
} else if (nsq == 2) {
int idx0 = codes[2 * i];
int idx1 = codes[2 * i + 1];
const float *beta0 = codebook.data() + idx0 * (M + 1);
const float *beta1 = codebook.data() + (idx1 + k) * (M + 1);
index.storage->reconstruct(i, tmp);
float w0;
w0 = beta0[0];
for (int l = 0; l < dsub; l++)
x[l] = w0 * tmp[l];
w0 = beta1[0];
for (int l = dsub; l < d; l++)
x[l] = w0 * tmp[l];
for (size_t j = begin; j < end; j++) {
storage_idx_t ji = hnsw.neighbors[j];
if (ji < 0) ji = i;
index.storage->reconstruct(ji, tmp);
float w;
w = beta0[j - begin + 1];
for (int l = 0; l < dsub; l++)
x[l] += w * tmp[l];
w = beta1[j - begin + 1];
for (int l = dsub; l < d; l++)
x[l] += w * tmp[l];
}
} else {
const float *betas[nsq];
{
const float *b = codebook.data();
const uint8_t *c = &codes[i * code_size];
for (int sq = 0; sq < nsq; sq++) {
betas[sq] = b + (*c++) * (M + 1);
b += (M + 1) * k;
}
}
index.storage->reconstruct(i, tmp);
{
int d0 = 0;
for (int sq = 0; sq < nsq; sq++) {
float w = *(betas[sq]++);
int d1 = d0 + dsub;
for (int l = d0; l < d1; l++) {
x[l] = w * tmp[l];
}
d0 = d1;
}
}
for (size_t j = begin; j < end; j++) {
storage_idx_t ji = hnsw.neighbors[j];
if (ji < 0) ji = i;
index.storage->reconstruct(ji, tmp);
int d0 = 0;
for (int sq = 0; sq < nsq; sq++) {
float w = *(betas[sq]++);
int d1 = d0 + dsub;
for (int l = d0; l < d1; l++) {
x[l] += w * tmp[l];
}
d0 = d1;
}
}
}
}
void ReconstructFromNeighbors::reconstruct_n(storage_idx_t n0,
storage_idx_t ni,
float *x) const
{
#pragma omp parallel
{
std::vector<float> tmp(index.d);
#pragma omp for
for (storage_idx_t i = 0; i < ni; i++) {
reconstruct(n0 + i, x + i * index.d, tmp.data());
}
}
}
size_t ReconstructFromNeighbors::compute_distances(size_t n, const idx_t *shortlist,
const float *query, float *distances) const
{
std::vector<float> tmp(2 * index.d);
size_t ncomp = 0;
for (int i = 0; i < n; i++) {
if (shortlist[i] < 0) break;
reconstruct(shortlist[i], tmp.data(), tmp.data() + index.d);
distances[i] = fvec_L2sqr(query, tmp.data(), index.d);
ncomp++;
}
return ncomp;
}
void ReconstructFromNeighbors::get_neighbor_table(storage_idx_t i, float *tmp1) const
{
const HNSW & hnsw = index.hnsw;
size_t begin, end;
hnsw.neighbor_range(i, 0, &begin, &end);
size_t d = index.d;
index.storage->reconstruct(i, tmp1);
for (size_t j = begin; j < end; j++) {
storage_idx_t ji = hnsw.neighbors[j];
if (ji < 0) ji = i;
index.storage->reconstruct(ji, tmp1 + (j - begin + 1) * d);
}
}
/// called by add_codes
void ReconstructFromNeighbors::estimate_code(
const float *x, storage_idx_t i, uint8_t *code) const
{
// fill in tmp table with the neighbor values
float *tmp1 = new float[d * (M + 1) + (d * k)];
float *tmp2 = tmp1 + d * (M + 1);
ScopeDeleter<float> del(tmp1);
// collect coordinates of base
get_neighbor_table (i, tmp1);
for (int sq = 0; sq < nsq; sq++) {
int d0 = sq * dsub;
int d1 = d0 + dsub;
{
FINTEGER ki = k, di = d, m1 = M + 1;
FINTEGER dsubi = dsub;
float zero = 0, one = 1;
sgemm_ ("N", "N", &dsubi, &ki, &m1, &one,
tmp1 + d0, &di,
codebook.data() + sq * (m1 * k), &m1,
&zero, tmp2, &dsubi);
}
float min = HUGE_VAL;
int argmin = -1;
for (int j = 0; j < k; j++) {
float dis = fvec_L2sqr(x + d0, tmp2 + j * dsub, dsub);
if (dis < min) {
min = dis;
argmin = j;
}
}
code[sq] = argmin;
}
}
void ReconstructFromNeighbors::add_codes(size_t n, const float *x)
{
if (k == 1) { // nothing to encode
ntotal += n;
return;
}
codes.resize(codes.size() + code_size * n);
#pragma omp parallel for
for (int i = 0; i < n; i++) {
estimate_code(x + i * index.d, ntotal + i,
codes.data() + (ntotal + i) * code_size);
}
ntotal += n;
FAISS_ASSERT (codes.size() == ntotal * code_size);
}
/**************************************************************
* IndexHNSWFlat implementation
**************************************************************/
struct FlatL2Dis: HNSW::DistanceComputer {
Index::idx_t nb;
const float *q;
const float *b;
size_t ndis;
float operator () (storage_idx_t i) override
{
ndis++;
return (fvec_L2sqr(q, b + i * d, d));
}
float symmetric_dis(storage_idx_t i, storage_idx_t j) override
{
return (fvec_L2sqr(b + j * d, b + i * d, d));
}
FlatL2Dis(const IndexFlatL2 & storage, const float *q = nullptr):
q(q)
{
nb = storage.ntotal;
d = storage.d;
b = storage.xb.data();
ndis = 0;
}
void set_query(const float *x) override {
q = x;
}
virtual ~FlatL2Dis () {
#pragma omp critical
{
hnsw_stats.ndis += ndis;
}
}
};
IndexHNSWFlat::IndexHNSWFlat()
{
is_trained = true;
}
IndexHNSWFlat::IndexHNSWFlat(int d, int M):
IndexHNSW(new IndexFlatL2(d), M)
{
own_fields = true;
is_trained = true;
}
HNSW::DistanceComputer * IndexHNSWFlat::get_distance_computer () const
{
return new FlatL2Dis (*dynamic_cast<IndexFlatL2*> (storage));
}
/**************************************************************
* IndexHNSWPQ implementation
**************************************************************/
struct PQDis: HNSW::DistanceComputer {
Index::idx_t nb;
const uint8_t *codes;
size_t code_size;
const ProductQuantizer & pq;
const float *sdc;
std::vector<float> precomputed_table;
size_t ndis;
float operator () (storage_idx_t i) override
{
const uint8_t *code = codes + i * code_size;
const float *dt = precomputed_table.data();
float accu = 0;
for (int j = 0; j < pq.M; j++) {
accu += dt[*code++];
dt += 256;
}
ndis++;
return accu;
}
float symmetric_dis(storage_idx_t i, storage_idx_t j) override
{
const float * sdci = sdc;
float accu = 0;
const uint8_t *codei = codes + i * code_size;
const uint8_t *codej = codes + j * code_size;
for (int l = 0; l < pq.M; l++) {
accu += sdci[(*codei++) + (*codej++) * 256];
sdci += 256 * 256;
}
return accu;
}
PQDis(const IndexPQ & storage, const float *q = nullptr):
pq(storage.pq)
{
precomputed_table.resize(pq.M * pq.ksub);
nb = storage.ntotal;
d = storage.d;
codes = storage.codes.data();
code_size = pq.code_size;
FAISS_ASSERT(pq.ksub == 256);
FAISS_ASSERT(pq.sdc_table.size() == pq.ksub * pq.ksub * pq.M);
sdc = pq.sdc_table.data();
ndis = 0;
}
void set_query(const float *x) override {
pq.compute_distance_table(x, precomputed_table.data());
}
virtual ~PQDis () {
#pragma omp critical
{
hnsw_stats.ndis += ndis;
}
}
};
IndexHNSWPQ::IndexHNSWPQ() {}
IndexHNSWPQ::IndexHNSWPQ(int d, int pq_m, int M):
IndexHNSW(new IndexPQ(d, pq_m, 8), M)
{
own_fields = true;
is_trained = false;
}
void IndexHNSWPQ::train(idx_t n, const float* x)
{
IndexHNSW::train (n, x);
(dynamic_cast<IndexPQ*> (storage))->pq.compute_sdc_table();
}
HNSW::DistanceComputer * IndexHNSWPQ::get_distance_computer () const
{
return new PQDis (*dynamic_cast<IndexPQ*> (storage));
}
/**************************************************************
* IndexHNSWSQ implementation
**************************************************************/
struct SQDis: HNSW::DistanceComputer {
Index::idx_t nb;
const uint8_t *codes;
size_t code_size;
const ScalarQuantizer & sq;
const float *q;
ScalarQuantizer::DistanceComputer * dc;
float operator () (storage_idx_t i) override
{
const uint8_t *code = codes + i * code_size;
return dc->compute_distance (q, code);
}
float symmetric_dis(storage_idx_t i, storage_idx_t j) override
{
const uint8_t *codei = codes + i * code_size;
const uint8_t *codej = codes + j * code_size;
return dc->compute_code_distance (codei, codej);
}
SQDis(const IndexScalarQuantizer & storage, const float *q = nullptr):
sq(storage.sq)
{
nb = storage.ntotal;
d = storage.d;
codes = storage.codes.data();
code_size = sq.code_size;
dc = sq.get_distance_computer();
}
void set_query(const float *x) override {
q = x;
}
virtual ~SQDis () {
delete dc;
}
};
IndexHNSWSQ::IndexHNSWSQ(int d, ScalarQuantizer::QuantizerType qtype, int M):
IndexHNSW (new IndexScalarQuantizer (d, qtype), M)
{
own_fields = true;
}
IndexHNSWSQ::IndexHNSWSQ() {}
HNSW::DistanceComputer * IndexHNSWSQ::get_distance_computer () const
{
return new SQDis (*dynamic_cast<IndexScalarQuantizer*> (storage));
}
/**************************************************************
* IndexHNSW2Level implementation
**************************************************************/
IndexHNSW2Level::IndexHNSW2Level(Index *quantizer, size_t nlist, int m_pq, int M):
IndexHNSW (new Index2Layer (quantizer, nlist, m_pq), M)
{
own_fields = true;
is_trained = false;
}
IndexHNSW2Level::IndexHNSW2Level() {}
struct Distance2Level: HNSW::DistanceComputer {
const Index2Layer & storage;
std::vector<float> buf;
const float *q;
const float *pq_l1_tab, *pq_l2_tab;
Distance2Level(const Index2Layer & storage): storage(storage)
{
d = storage.d;
FAISS_ASSERT(storage.pq.dsub == 4);
pq_l2_tab = storage.pq.centroids.data();
buf.resize(2 * d);
}
float symmetric_dis(storage_idx_t i, storage_idx_t j) override
{
storage.reconstruct(i, buf.data());
storage.reconstruct(j, buf.data() + d);
return fvec_L2sqr(buf.data() + d, buf.data(), d);
}
void set_query(const float *x) override {
q = x;
}
};
// well optimized for xNN+PQNN
struct DistanceXPQ4: Distance2Level {
int M, k;
DistanceXPQ4(const Index2Layer & storage):
Distance2Level (storage)
{
const IndexFlat *quantizer =
dynamic_cast<IndexFlat*> (storage.q1.quantizer);
FAISS_ASSERT(quantizer);
M = storage.pq.M;
pq_l1_tab = quantizer->xb.data();
}
float operator () (storage_idx_t i) override
{
const uint8_t *code = storage.codes.data() + i * storage.code_size;
long key = 0;
memcpy (&key, code, storage.code_size_1);
code += storage.code_size_1;
// walking pointers
const float *qa = q;
const __m128 *l1_t = (const __m128 *)(pq_l1_tab + d * key);
const __m128 *pq_l2_t = (const __m128 *)pq_l2_tab;
__m128 accu = _mm_setzero_ps();
for (int m = 0; m < M; m++) {
__m128 qi = _mm_loadu_ps(qa);
__m128 recons = l1_t[m] + pq_l2_t[*code++];
__m128 diff = qi - recons;
accu += diff * diff;
pq_l2_t += 256;
qa += 4;
}
accu = _mm_hadd_ps (accu, accu);
accu = _mm_hadd_ps (accu, accu);
return _mm_cvtss_f32 (accu);
}
};
// well optimized for 2xNN+PQNN
struct Distance2xXPQ4: Distance2Level {
int M_2, mi_nbits;
Distance2xXPQ4(const Index2Layer & storage):
Distance2Level (storage)
{
const MultiIndexQuantizer *mi =
dynamic_cast<MultiIndexQuantizer*> (storage.q1.quantizer);
FAISS_ASSERT(mi);
FAISS_ASSERT(storage.pq.M % 2 == 0);
M_2 = storage.pq.M / 2;
mi_nbits = mi->pq.nbits;
pq_l1_tab = mi->pq.centroids.data();
}
float operator () (storage_idx_t i) override
{
const uint8_t *code = storage.codes.data() + i * storage.code_size;
long key01 = 0;
memcpy (&key01, code, storage.code_size_1);
code += storage.code_size_1;
// walking pointers
const float *qa = q;
const __m128 *pq_l1_t = (const __m128 *)pq_l1_tab;
const __m128 *pq_l2_t = (const __m128 *)pq_l2_tab;
__m128 accu = _mm_setzero_ps();
for (int mi_m = 0; mi_m < 2; mi_m++) {
long l1_idx = key01 & ((1L << mi_nbits) - 1);
const __m128 * pq_l1 = pq_l1_t + M_2 * l1_idx;
for (int m = 0; m < M_2; m++) {
__m128 qi = _mm_loadu_ps(qa);
__m128 recons = pq_l1[m] + pq_l2_t[*code++];
__m128 diff = qi - recons;
accu += diff * diff;
pq_l2_t += 256;
qa += 4;
}
pq_l1_t += M_2 << mi_nbits;
key01 >>= mi_nbits;
}
accu = _mm_hadd_ps (accu, accu);
accu = _mm_hadd_ps (accu, accu);
return _mm_cvtss_f32 (accu);
}
};
HNSW::DistanceComputer * IndexHNSW2Level::get_distance_computer () const
{
const Index2Layer *storage2l =
dynamic_cast<Index2Layer*>(storage);
if (storage2l) {
const MultiIndexQuantizer *mi =
dynamic_cast<MultiIndexQuantizer*> (storage2l->q1.quantizer);
if (mi && storage2l->pq.M % 2 == 0 && storage2l->pq.dsub == 4) {
return new Distance2xXPQ4(*storage2l);
}
const IndexFlat *fl =
dynamic_cast<IndexFlat*> (storage2l->q1.quantizer);
if (fl && storage2l->pq.dsub == 4) {
return new DistanceXPQ4(*storage2l);
}
}
// IVFPQ and cases not handled above
return new GenericDistanceComputer (*storage);
}
namespace {
// same as search_from_candidates but uses v
// visno -> is in result list
// visno + 1 -> in result list + in candidates
int search_from_candidates_2(const HNSW & hnsw,
DistanceComputer & qdis, int k,
idx_t *I, float * D,
MinimaxHeap &candidates,
VisitedTable &vt,
int level, int nres_in = 0)
{
int nres = nres_in;
int ndis = 0;
for (int i = 0; i < candidates.size(); i++) {
idx_t v1 = candidates.ids[i];
float d = candidates.dis[i];
FAISS_ASSERT(v1 >= 0);
vt.visited[v1] = vt.visno + 1;
}
bool do_dis_check = hnsw.check_relative_distance;
int nstep = 0;
while (candidates.size() > 0) {
float d0 = 0;
int v0 = candidates.pop_min(&d0);
if (do_dis_check) {
// tricky stopping condition: there are more that ef
// distances that are processed already that are smaller
// than d0
int n_dis_below = candidates.count_below(d0);
if(n_dis_below >= hnsw.efSearch) {
break;
}
}
size_t begin, end;
hnsw.neighbor_range(v0, level, &begin, &end);
for (size_t j = begin; j < end; j++) {
int v1 = hnsw.neighbors[j];
if (v1 < 0) break;
if (vt.visited[v1] == vt.visno + 1) {
// nothing to do
} else {
ndis++;
float d = qdis(v1);
candidates.push(v1, d);
// never seen before --> add to heap
if (vt.visited[v1] < vt.visno) {
if (nres < k) {
faiss::maxheap_push (++nres, D, I, d, v1);
} else if (d < D[0]) {
faiss::maxheap_pop (nres--, D, I);
faiss::maxheap_push (++nres, D, I, d, v1);
}
}
vt.visited[v1] = vt.visno + 1;
}
}
nstep++;
if (!do_dis_check && nstep > hnsw.efSearch) {
break;
}
}
if (level == 0) {
#pragma omp critical
{
hnsw_stats.n1 ++;
if (candidates.size() == 0)
hnsw_stats.n2 ++;
}
}
return nres;
}
} // anonymous namespace
void IndexHNSW2Level::search (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels) const
{
if (dynamic_cast<const Index2Layer*>(storage)) {
IndexHNSW::search (n, x, k, distances, labels);
} else { // "mixed" search
const IndexIVFPQ *index_ivfpq =
dynamic_cast<const IndexIVFPQ*>(storage);
int nprobe = index_ivfpq->nprobe;
long * coarse_assign = new long [n * nprobe];
ScopeDeleter<long> del (coarse_assign);
float * coarse_dis = new float [n * nprobe];
ScopeDeleter<float> del2 (coarse_dis);
index_ivfpq->quantizer->search (n, x, nprobe, coarse_dis, coarse_assign);
index_ivfpq->search_preassigned (
n, x, k, coarse_assign, coarse_dis, distances, labels, false);
#pragma omp parallel
{
VisitedTable vt (ntotal);
DistanceComputer *dis = get_distance_computer();
ScopeDeleter1<DistanceComputer> del(dis);
int candidates_size = hnsw.upper_beam;
MinimaxHeap candidates(candidates_size);
#pragma omp for
for(int i = 0; i < n; i++) {
idx_t * idxi = labels + i * k;
float * simi = distances + i * k;
dis->set_query(x + i * d);
// mark all inverted list elements as visited
for (int j = 0; j < nprobe; j++) {
idx_t key = coarse_assign[j + i * nprobe];
if (key < 0) break;
const std::vector<idx_t> & ids = index_ivfpq->ids[key];
for (int jj = 0; jj < ids.size(); jj++) {
vt.set (ids[jj]);
}
}
candidates.clear();
// copy the upper_beam elements to candidates list
int search_policy = 2;
if (search_policy == 1) {
for (int j = 0 ; j < hnsw.upper_beam && j < k; j++) {
if (idxi[j] < 0) break;
candidates.push (idxi[j], simi[j]);
// search_from_candidates adds them back
idxi[j] = -1;
simi[j] = HUGE_VAL;
}
// reorder from sorted to heap
maxheap_heapify (k, simi, idxi, simi, idxi, k);
search_from_candidates (
hnsw, *dis, k, idxi, simi,
candidates, vt, 0, k);
vt.advance();
} else if (search_policy == 2) {
for (int j = 0 ; j < hnsw.upper_beam && j < k; j++) {
if (idxi[j] < 0) break;
candidates.push (idxi[j], simi[j]);
}
// reorder from sorted to heap
maxheap_heapify (k, simi, idxi, simi, idxi, k);
search_from_candidates_2 (
hnsw, *dis, k, idxi, simi,
candidates, vt, 0, k);
vt.advance ();
vt.advance ();
}
maxheap_reorder (k, simi, idxi);
}
}
}
}
void IndexHNSW2Level::flip_to_ivf ()
{
Index2Layer *storage2l =
dynamic_cast<Index2Layer*>(storage);
FAISS_THROW_IF_NOT (storage2l);
IndexIVFPQ * index_ivfpq =
new IndexIVFPQ (storage2l->q1.quantizer,
d, storage2l->q1.nlist,
storage2l->pq.M, 8);
index_ivfpq->pq = storage2l->pq;
index_ivfpq->is_trained = storage2l->is_trained;
index_ivfpq->precompute_table();
index_ivfpq->own_fields = storage2l->q1.own_fields;
storage2l->transfer_to_IVFPQ(*index_ivfpq);
index_ivfpq->make_direct_map (true);
storage = index_ivfpq;
delete storage2l;
}
} // namespace faiss
/**
* Copyright (c) 2015-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD+Patents license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <vector>
#include <omp.h>
#include "IndexFlat.h"
#include "IndexPQ.h"
#include "IndexScalarQuantizer.h"
#include "utils.h"
namespace faiss {
/** Implementation of the Hierarchical Navigable Small World
* datastructure.
*
* Efficient and robust approximate nearest neighbor search using
* Hierarchical Navigable Small World graphs
*
* Yu. A. Malkov, D. A. Yashunin, arXiv 2017
*
* This implmentation is heavily influenced by the NMSlib
* implementation by Yury Malkov and Leonid Boystov
* (https://github.com/searchivarius/nmslib)
*
* The HNSW object stores only the neighbor link structure, see
* IndexHNSW below for the full index object.
*/
struct VisitedTable;
struct HNSW {
/// internal storage of vectors (32 bits: this is expensive)
typedef int storage_idx_t;
/// Faiss results are 64-bit
typedef faiss::Index::idx_t idx_t;
/** The HNSW structure does not store vectors, it only accesses
* them through this class.
*
* Functions are guaranteed to be be accessed only from 1 thread. */
struct DistanceComputer {
idx_t d;
/// called before computing distances
virtual void set_query (const float *x) = 0;
/// compute distance of vector i to current query
virtual float operator () (storage_idx_t i) = 0;
/// compute distance between two stored vectors
virtual float symmetric_dis(storage_idx_t i, storage_idx_t j) = 0;
virtual ~DistanceComputer () {}
};
/// assignment probability to each layer (sum=1)
std::vector<double> assign_probas;
/// number of neighbors stored per layer (cumulative), should not
/// be changed after first add
std::vector<int> cum_nneighbor_per_level;
/// level of each vector (base level = 1), size = ntotal
std::vector<int> levels;
/// offsets[i] is the offset in the neighbors array where vector i is stored
/// size ntotal + 1
std::vector<size_t> offsets;
/// neighbors[offsets[i]:offsets[i+1]] is the list of neighbors of vector i
/// for all levels. this is where all storage goes.
std::vector<storage_idx_t> neighbors;
/// entry point in the search structure (one of the points with maximum level
storage_idx_t entry_point;
faiss::RandomGenerator rng;
/// maximum level
int max_level;
/// expansion factor at construction time
int efConstruction;
/// expansion factor at search time
int efSearch;
/// during search: do we check whether the next best distance is good enough?
bool check_relative_distance;
/// number of entry points in levels > 0.
int upper_beam;
// methods that initialize the tree sizes
/// initialize the assign_probas and cum_nneighbor_per_level to
/// have 2*M links on level 0 and M links on levels > 0
void set_default_probas(int M, float levelMult);
/// set nb of neighbors for this level (before adding anything)
void set_nb_neighbors(int level_no, int n);
// methods that access the tree sizes
/// nb of neighbors for this level
int nb_neighbors(int layer_no) const;
/// cumumlative nb up to (and excluding) this level
int cum_nb_neighbors(int layer_no) const;
/// range of entries in the neighbors table of vertex no at layer_no
void neighbor_range(idx_t no, int layer_no,
size_t * begin, size_t * end) const;
/// only mandatory parameter: nb of neighbors
explicit HNSW(int M = 32);
/// pick a random level for a new point
int random_level();
/// add n random levels to table (for debugging...)
void fill_with_random_links(size_t n);
/** add point pt_id on all levels <= pt_level and build the link
* structure for them. */
void add_with_locks(DistanceComputer & ptdis, int pt_level, int pt_id,
std::vector<omp_lock_t> & locks,
VisitedTable &vt);
/// search interface
void search(DistanceComputer & qdis, int k,
idx_t *I, float * D,
VisitedTable &vt) const;
void reset();
void clear_neighbor_tables(int level);
void print_neighbor_stats(int level) const;
};
struct HNSWStats {
size_t n1, n2, n3;
size_t ndis;
size_t nreorder;
bool view;
HNSWStats () {reset (); }
void reset ();
};
// global var that collects them all
extern HNSWStats hnsw_stats;
class IndexHNSW;
struct ReconstructFromNeighbors {
typedef Index::idx_t idx_t;
typedef HNSW::storage_idx_t storage_idx_t;
const IndexHNSW & index;
size_t M; // number of neighbors
size_t k; // number of codebook entries
size_t nsq; // number of subvectors
size_t code_size;
int k_reorder; // nb to reorder. -1 = all
std::vector<float> codebook; // size nsq * k * (M + 1)
std::vector<uint8_t> codes; // size ntotal * code_size
size_t ntotal;
size_t d, dsub; // derived values
ReconstructFromNeighbors(const IndexHNSW & index,
size_t k=256, size_t nsq=1);
/// codes must be added in the correct order and the IndexHNSW
/// must be populated and sorted
void add_codes(size_t n, const float *x);
size_t compute_distances(size_t n, const idx_t *shortlist,
const float *query, float *distances) const;
/// called by add_codes
void estimate_code(const float *x, storage_idx_t i, uint8_t *code) const;
/// called by compute_distances
void reconstruct(storage_idx_t i, float *x, float *tmp) const;
void reconstruct_n(storage_idx_t n0, storage_idx_t ni, float *x) const;
/// get the M+1 -by-d table for neighbor coordinates for vector i
void get_neighbor_table(storage_idx_t i, float *out) const;
};
/** The HNSW index is a normal random-access index with a HNSW
* link structure built on top */
struct IndexHNSW: Index {
typedef HNSW::storage_idx_t storage_idx_t;
// the link strcuture
HNSW hnsw;
// the sequential storage
bool own_fields;
Index * storage;
ReconstructFromNeighbors *reconstruct_from_neighbors;
explicit IndexHNSW (int d = 0, int M = 32);
explicit IndexHNSW (Index * storage, int M = 32);
~IndexHNSW() override;
// get a DistanceComputer object for this kind of storage
virtual HNSW::DistanceComputer * get_distance_computer() const = 0;
void add(idx_t n, const float *x) override;
/// Trains the storage if needed
void train(idx_t n, const float* x) override;
/// entry point for search
void search (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels) const override;
void reconstruct(idx_t key, float* recons) const override;
void reset () override;
void shrink_level_0_neighbors(int size);
/** Perform search only on level 0, given the starting points for
* each vertex.
*
* @param search_type 1:perform one search per nprobe, 2: enqueue
* all entry points
*/
void search_level_0(idx_t n, const float *x, idx_t k,
const storage_idx_t *nearest, const float *nearest_d,
float *distances, idx_t *labels, int nprobe = 1,
int search_type = 1) const;
/// alternative graph building
void init_level_0_from_knngraph(
int k, const float *D, const idx_t *I);
/// alternative graph building
void init_level_0_from_entry_points(
int npt, const storage_idx_t *points,
const storage_idx_t *nearests);
// reorder links from nearest to farthest
void reorder_links();
void link_singletons();
};
/** Flat index topped with with a HNSW structure to access elements
* more efficiently.
*/
struct IndexHNSWFlat: IndexHNSW {
IndexHNSWFlat();
IndexHNSWFlat(int d, int M);
HNSW::DistanceComputer * get_distance_computer() const override;
};
/** PQ index topped with with a HNSW structure to access elements
* more efficiently.
*/
struct IndexHNSWPQ: IndexHNSW {
IndexHNSWPQ();
IndexHNSWPQ(int d, int pq_m, int M);
void train(idx_t n, const float* x) override;
HNSW::DistanceComputer * get_distance_computer() const override;
};
/** SQ index topped with with a HNSW structure to access elements
* more efficiently.
*/
struct IndexHNSWSQ: IndexHNSW {
IndexHNSWSQ();
IndexHNSWSQ(int d, ScalarQuantizer::QuantizerType qtype, int M);
HNSW::DistanceComputer * get_distance_computer() const override;
};
/** 2-level code structure with fast random access
*/
struct IndexHNSW2Level: IndexHNSW {
IndexHNSW2Level();
IndexHNSW2Level(Index *quantizer, size_t nlist, int m_pq, int M);
HNSW::DistanceComputer * get_distance_computer() const override;
void flip_to_ivf();
/// entry point for search
void search (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels) const override;
};
};
...@@ -23,6 +23,81 @@ ...@@ -23,6 +23,81 @@
namespace faiss { namespace faiss {
/*****************************************
* Level1Quantizer implementation
******************************************/
Level1Quantizer::Level1Quantizer (Index * quantizer, size_t nlist):
quantizer (quantizer),
nlist (nlist),
quantizer_trains_alone (0),
own_fields (false),
clustering_index (nullptr)
{
cp.niter = 10;
}
Level1Quantizer::Level1Quantizer ():
quantizer (nullptr),
nlist (0),
quantizer_trains_alone (0), own_fields (false),
clustering_index (nullptr)
{}
Level1Quantizer::~Level1Quantizer ()
{
if (own_fields) delete quantizer;
}
void Level1Quantizer::train_q1 (size_t n, const float *x, bool verbose, MetricType metric_type)
{
size_t d = quantizer->d;
if (quantizer->is_trained && (quantizer->ntotal == nlist)) {
if (verbose)
printf ("IVF quantizer does not need training.\n");
} else if (quantizer_trains_alone == 1) {
if (verbose)
printf ("IVF quantizer trains alone...\n");
quantizer->train (n, x);
quantizer->verbose = verbose;
FAISS_THROW_IF_NOT_MSG (quantizer->ntotal == nlist,
"nlist not consistent with quantizer size");
} else if (quantizer_trains_alone == 0) {
if (verbose)
printf ("Training level-1 quantizer on %ld vectors in %ldD\n",
n, d);
Clustering clus (d, nlist, cp);
quantizer->reset();
if (clustering_index) {
clus.train (n, x, *clustering_index);
quantizer->add (nlist, clus.centroids.data());
} else {
clus.train (n, x, *quantizer);
}
quantizer->is_trained = true;
} else if (quantizer_trains_alone == 2) {
if (verbose)
printf (
"Training L2 quantizer on %ld vectors in %ldD%s\n",
n, d,
clustering_index ? "(user provided index)" : "");
FAISS_THROW_IF_NOT (metric_type == METRIC_L2);
Clustering clus (d, nlist, cp);
if (!clustering_index) {
IndexFlatL2 assigner (d);
clus.train(n, x, assigner);
} else {
clus.train(n, x, *clustering_index);
}
if (verbose)
printf ("Adding centroids to quantizer\n");
quantizer->add (nlist, clus.centroids.data());
}
}
/***************************************** /*****************************************
* IndexIVF implementation * IndexIVF implementation
******************************************/ ******************************************/
...@@ -31,13 +106,9 @@ namespace faiss { ...@@ -31,13 +106,9 @@ namespace faiss {
IndexIVF::IndexIVF (Index * quantizer, size_t d, size_t nlist, IndexIVF::IndexIVF (Index * quantizer, size_t d, size_t nlist,
MetricType metric): MetricType metric):
Index (d, metric), Index (d, metric),
nlist (nlist), Level1Quantizer (quantizer, nlist),
nprobe (1), nprobe (1),
quantizer (quantizer), max_codes (0),
quantizer_trains_alone (0),
own_fields (false),
clustering_index (nullptr),
ids (nlist),
maintain_direct_map (false) maintain_direct_map (false)
{ {
FAISS_THROW_IF_NOT (d == quantizer->d); FAISS_THROW_IF_NOT (d == quantizer->d);
...@@ -49,16 +120,13 @@ IndexIVF::IndexIVF (Index * quantizer, size_t d, size_t nlist, ...@@ -49,16 +120,13 @@ IndexIVF::IndexIVF (Index * quantizer, size_t d, size_t nlist,
// here we set a low # iterations because this is typically used // here we set a low # iterations because this is typically used
// for large clusterings (nb this is not used for the MultiIndex, // for large clusterings (nb this is not used for the MultiIndex,
// for which quantizer_trains_alone = true) // for which quantizer_trains_alone = true)
cp.niter = 10;
cp.verbose = verbose;
code_size = 0; // let sub-classes set this code_size = 0; // let sub-classes set this
codes.resize(nlist); ids.resize (nlist);
codes.resize (nlist);
} }
IndexIVF::IndexIVF (): IndexIVF::IndexIVF ():
nlist (0), nprobe (1), quantizer (nullptr), nprobe (1), max_codes (0),
quantizer_trains_alone (0), own_fields (false),
clustering_index (nullptr),
maintain_direct_map (false) maintain_direct_map (false)
{} {}
...@@ -109,6 +177,78 @@ void IndexIVF::search (idx_t n, const float *x, idx_t k, ...@@ -109,6 +177,78 @@ void IndexIVF::search (idx_t n, const float *x, idx_t k,
} }
void IndexIVF::reconstruct (idx_t key, float* recons) const
{
FAISS_THROW_IF_NOT_MSG (direct_map.size() == ntotal,
"direct map is not initialized");
long list_no = direct_map[key] >> 32;
long offset = direct_map[key] & 0xffffffff;
reconstruct_from_offset (list_no, offset, recons);
}
void IndexIVF::reconstruct_n (idx_t i0, idx_t ni, float* recons) const
{
FAISS_THROW_IF_NOT (ni == 0 || (i0 >= 0 && i0 + ni <= ntotal));
for (long list_no = 0; list_no < nlist; list_no++) {
const std::vector<long>& idlist = ids[list_no];
for (long offset = 0; offset < idlist.size(); offset++) {
long id = idlist[offset];
if (!(id >= i0 && id < i0 + ni)) {
continue;
}
float* reconstructed = recons + (id - i0) * d;
reconstruct_from_offset (list_no, offset, reconstructed);
}
}
}
void IndexIVF::search_and_reconstruct (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels,
float *recons) const
{
long * idx = new long [n * nprobe];
ScopeDeleter<long> del (idx);
float * coarse_dis = new float [n * nprobe];
ScopeDeleter<float> del2 (coarse_dis);
quantizer->search (n, x, nprobe, coarse_dis, idx);
// search_preassigned() with `store_pairs` enabled to obtain the list_no
// and offset into `codes` for reconstruction
search_preassigned (n, x, k, idx, coarse_dis,
distances, labels, true /* store_pairs */);
for (idx_t i = 0; i < n; ++i) {
for (idx_t j = 0; j < k; ++j) {
idx_t ij = i * k + j;
idx_t key = labels[ij];
float* reconstructed = recons + ij * d;
if (key < 0) {
// Fill with NaNs
memset(reconstructed, -1, sizeof(*reconstructed) * d);
} else {
int list_no = key >> 32;
int offset = key & 0xffffffff;
// Update label to the actual id
labels[ij] = ids[list_no][offset];
reconstruct_from_offset (list_no, offset, reconstructed);
}
}
}
}
void IndexIVF::reconstruct_from_offset (long list_no, long offset,
float* recons) const
{
FAISS_THROW_MSG ("reconstruct_from_offset not implemented");
}
void IndexIVF::reset () void IndexIVF::reset ()
{ {
ntotal = 0; ntotal = 0;
...@@ -156,48 +296,11 @@ long IndexIVF::remove_ids (const IDSelector & sel) ...@@ -156,48 +296,11 @@ long IndexIVF::remove_ids (const IDSelector & sel)
void IndexIVF::train (idx_t n, const float *x) void IndexIVF::train (idx_t n, const float *x)
{ {
if (quantizer->is_trained && (quantizer->ntotal == nlist)) { if (verbose)
if (verbose) printf ("Training level-1 quantizer\n");
printf ("IVF quantizer does not need training.\n");
} else if (quantizer_trains_alone == 1) { train_q1 (n, x, verbose, metric_type);
if (verbose)
printf ("IVF quantizer trains alone...\n");
quantizer->train (n, x);
quantizer->verbose = verbose;
FAISS_THROW_IF_NOT_MSG (quantizer->ntotal == nlist,
"nlist not consistent with quantizer size");
} else if (quantizer_trains_alone == 0) {
if (verbose)
printf ("Training IVF quantizer on %ld vectors in %dD\n",
n, d);
Clustering clus (d, nlist, cp);
quantizer->reset();
if (clustering_index) {
clus.train (n, x, *clustering_index);
quantizer->add (nlist, clus.centroids.data());
} else {
clus.train (n, x, *quantizer);
}
quantizer->is_trained = true;
} else if (quantizer_trains_alone == 2) {
if (verbose)
printf (
"Training L2 quantizer on %ld vectors in %dD%s\n",
n, d,
clustering_index ? "(user provided index)" : "");
FAISS_THROW_IF_NOT (metric_type == METRIC_L2);
Clustering clus (d, nlist, cp);
if (!clustering_index) {
IndexFlatL2 assigner (d);
clus.train(n, x, assigner);
} else {
clus.train(n, x, *clustering_index);
}
if (verbose)
printf ("Adding centroids to quantizer\n");
quantizer->add (nlist, clus.centroids.data());
}
if (verbose) if (verbose)
printf ("Training IVF residual\n"); printf ("Training IVF residual\n");
...@@ -337,7 +440,6 @@ void IndexIVF::copy_subset_to (IndexIVF & other, int subset_type, ...@@ -337,7 +440,6 @@ void IndexIVF::copy_subset_to (IndexIVF & other, int subset_type,
IndexIVF::~IndexIVF() IndexIVF::~IndexIVF()
{ {
if (own_fields) delete quantizer;
} }
...@@ -408,13 +510,13 @@ void IndexIVFFlat::add_core (idx_t n, const float * x, const long *xids, ...@@ -408,13 +510,13 @@ void IndexIVFFlat::add_core (idx_t n, const float * x, const long *xids,
ntotal += n_add; ntotal += n_add;
} }
void IndexIVFFlatStats::reset() void IndexIVFStats::reset()
{ {
memset ((void*)this, 0, sizeof (*this)); memset ((void*)this, 0, sizeof (*this));
} }
IndexIVFFlatStats indexIVFFlat_stats; IndexIVFStats indexIVF_stats;
namespace { namespace {
...@@ -437,6 +539,7 @@ void search_knn_inner_product (const IndexIVFFlat & ivf, ...@@ -437,6 +539,7 @@ void search_knn_inner_product (const IndexIVFFlat & ivf,
float * __restrict simi = res->get_val (i); float * __restrict simi = res->get_val (i);
long * __restrict idxi = res->get_ids (i); long * __restrict idxi = res->get_ids (i);
minheap_heapify (k, simi, idxi); minheap_heapify (k, simi, idxi);
size_t nscan = 0;
for (size_t ik = 0; ik < ivf.nprobe; ik++) { for (size_t ik = 0; ik < ivf.nprobe; ik++) {
long key = keysi[ik]; /* select the list */ long key = keysi[ik]; /* select the list */
...@@ -462,13 +565,16 @@ void search_knn_inner_product (const IndexIVFFlat & ivf, ...@@ -462,13 +565,16 @@ void search_knn_inner_product (const IndexIVFFlat & ivf,
minheap_push (k, simi, idxi, ip, id); minheap_push (k, simi, idxi, ip, id);
} }
} }
ndis += list_size; nscan += list_size;
if (ivf.max_codes && nscan >= ivf.max_codes)
break;
} }
ndis += nscan;
minheap_reorder (k, simi, idxi); minheap_reorder (k, simi, idxi);
} }
indexIVFFlat_stats.nq += nx; indexIVF_stats.nq += nx;
indexIVFFlat_stats.nlist += nlistv; indexIVF_stats.nlist += nlistv;
indexIVFFlat_stats.ndis += ndis; indexIVF_stats.ndis += ndis;
} }
...@@ -490,6 +596,8 @@ void search_knn_L2sqr (const IndexIVFFlat &ivf, ...@@ -490,6 +596,8 @@ void search_knn_L2sqr (const IndexIVFFlat &ivf,
long * __restrict idxi = res->get_ids (i); long * __restrict idxi = res->get_ids (i);
maxheap_heapify (k, disi, idxi); maxheap_heapify (k, disi, idxi);
size_t nscan = 0;
for (size_t ik = 0; ik < ivf.nprobe; ik++) { for (size_t ik = 0; ik < ivf.nprobe; ik++) {
long key = keysi[ik]; /* select the list */ long key = keysi[ik]; /* select the list */
if (key < 0) { if (key < 0) {
...@@ -514,13 +622,16 @@ void search_knn_L2sqr (const IndexIVFFlat &ivf, ...@@ -514,13 +622,16 @@ void search_knn_L2sqr (const IndexIVFFlat &ivf,
maxheap_push (k, disi, idxi, disij, id); maxheap_push (k, disi, idxi, disij, id);
} }
} }
ndis += list_size; nscan += list_size;
if (ivf.max_codes && nscan >= ivf.max_codes)
break;
} }
ndis += nscan;
maxheap_reorder (k, disi, idxi); maxheap_reorder (k, disi, idxi);
} }
indexIVFFlat_stats.nq += nx; indexIVF_stats.nq += nx;
indexIVFFlat_stats.nlist += nlistv; indexIVF_stats.nlist += nlistv;
indexIVFFlat_stats.ndis += ndis; indexIVF_stats.ndis += ndis;
} }
...@@ -639,20 +750,11 @@ void IndexIVFFlat::update_vectors (int n, idx_t *new_ids, const float *x) ...@@ -639,20 +750,11 @@ void IndexIVFFlat::update_vectors (int n, idx_t *new_ids, const float *x)
} }
void IndexIVFFlat::reconstruct_from_offset (long list_no, long offset,
float* recons) const
void IndexIVFFlat::reconstruct (idx_t key, float * recons) const
{ {
FAISS_THROW_IF_NOT_MSG (direct_map.size() == ntotal, memcpy (recons, &codes[list_no][offset * code_size], d * sizeof(recons[0]));
"direct map is not initialized");
int list_no = direct_map[key] >> 32;
int ofs = direct_map[key] & 0xffffffff;
memcpy (recons, &codes[list_no][ofs * code_size], d * sizeof(recons[0]));
} }
} // namespace faiss } // namespace faiss
...@@ -24,6 +24,38 @@ ...@@ -24,6 +24,38 @@
namespace faiss { namespace faiss {
/** Encapsulates a quantizer object for the IndexIVF
*
* The class isolates the fields that are independent of the storage
* of the lists (especially training)
*/
struct Level1Quantizer {
Index * quantizer; ///< quantizer that maps vectors to inverted lists
size_t nlist; ///< number of possible key values
/**
* = 0: use the quantizer as index in a kmeans training
* = 1: just pass on the training set to the train() of the quantizer
* = 2: kmeans training on a flat index + add the centroids to the quantizer
*/
char quantizer_trains_alone;
bool own_fields; ///< whether object owns the quantizer
ClusteringParameters cp; ///< to override default clustering params
Index *clustering_index; ///< to override index used during clustering
/// Trains the quantizer and calls train_residual to train sub-quantizers
void train_q1 (size_t n, const float *x, bool verbose,
MetricType metric_type);
Level1Quantizer (Index * quantizer, size_t nlist);
Level1Quantizer ();
~Level1Quantizer ();
};
/** Index based on a inverted file (IVF) /** Index based on a inverted file (IVF)
* *
...@@ -42,22 +74,9 @@ namespace faiss { ...@@ -42,22 +74,9 @@ namespace faiss {
* Sub-classes implement a post-filtering of the index that refines * Sub-classes implement a post-filtering of the index that refines
* the distance estimation from the query to databse vectors. * the distance estimation from the query to databse vectors.
*/ */
struct IndexIVF: Index { struct IndexIVF: Index, Level1Quantizer {
size_t nlist; ///< number of possible key values
size_t nprobe; ///< number of probes at query time size_t nprobe; ///< number of probes at query time
size_t max_codes; ///< max nb of codes to visit to do a query
Index * quantizer; ///< quantizer that maps vectors to inverted lists
/**
* = 0: use the quantizer as index in a kmeans training
* = 1: just pass on the training set to the train() of the quantizer
* = 2: kmeans training on a flat index + add the centroids to the quantizer
*/
char quantizer_trains_alone;
bool own_fields; ///< whether object owns the quantizer
ClusteringParameters cp; ///< to override default clustering params
Index *clustering_index; ///< to override index used during clustering
std::vector < std::vector<long> > ids; ///< Inverted lists for indexes std::vector < std::vector<long> > ids; ///< Inverted lists for indexes
...@@ -74,7 +93,7 @@ struct IndexIVF: Index { ...@@ -74,7 +93,7 @@ struct IndexIVF: Index {
* be deleted while the IndexIVF is in use. * be deleted while the IndexIVF is in use.
*/ */
IndexIVF (Index * quantizer, size_t d, size_t nlist, IndexIVF (Index * quantizer, size_t d, size_t nlist,
MetricType metric = METRIC_INNER_PRODUCT); MetricType metric = METRIC_L2);
void reset() override; void reset() override;
...@@ -115,6 +134,42 @@ struct IndexIVF: Index { ...@@ -115,6 +134,42 @@ struct IndexIVF: Index {
virtual void search (idx_t n, const float *x, idx_t k, virtual void search (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels) const override; float *distances, idx_t *labels) const override;
void reconstruct (idx_t key, float* recons) const override;
/** Reconstruct a subset of the indexed vectors.
*
* Overrides default implementation to bypass reconstruct() which requires
* direct_map to be maintained.
*
* @param i0 first vector to reconstruct
* @param ni nb of vectors to reconstruct
* @param recons output array of reconstructed vectors, size ni * d
*/
void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
/** Similar to search, but also reconstructs the stored vectors (or an
* approximation in the case of lossy coding) for the search results.
*
* Overrides default implementation to avoid having to maintain direct_map
* and instead fetch the code offsets through the `store_pairs` flag in
* search_preassigned().
*
* @param recons reconstructed vectors size (n, k, d)
*/
void search_and_reconstruct (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels,
float *recons) const override;
/** Reconstruct a vector given the location in terms of (inv list index +
* inv list offset) instead of the id.
*
* Useful for reconstructing when the direct_map is not maintained and
* the inv list offset is computed by search_preassigned() with
* `store_pairs` set.
*/
virtual void reconstruct_from_offset (long list_no, long offset,
float* recons) const;
/// Dataset manipulation functions /// Dataset manipulation functions
...@@ -157,18 +212,17 @@ struct IndexIVF: Index { ...@@ -157,18 +212,17 @@ struct IndexIVF: Index {
}; };
struct IndexIVFFlatStats { struct IndexIVFStats {
size_t nq; // nb of queries run size_t nq; // nb of queries run
size_t nlist; // nb of inverted lists scanned size_t nlist; // nb of inverted lists scanned
size_t ndis; // nb of distancs computed size_t ndis; // nb of distancs computed
size_t npartial; // nb of bound computations (IndexIVFFlatIPBounds)
IndexIVFFlatStats () {reset (); } IndexIVFStats () {reset (); }
void reset (); void reset ();
}; };
// global var that collects them all // global var that collects them all
extern IndexIVFFlatStats indexIVFFlat_stats; extern IndexIVFStats indexIVF_stats;
...@@ -182,7 +236,7 @@ struct IndexIVFFlat: IndexIVF { ...@@ -182,7 +236,7 @@ struct IndexIVFFlat: IndexIVF {
IndexIVFFlat ( IndexIVFFlat (
Index * quantizer, size_t d, size_t nlist_, Index * quantizer, size_t d, size_t nlist_,
MetricType = METRIC_INNER_PRODUCT); MetricType = METRIC_L2);
/// same as add_with_ids, with precomputed coarse quantizer /// same as add_with_ids, with precomputed coarse quantizer
virtual void add_core (idx_t n, const float * x, const long *xids, virtual void add_core (idx_t n, const float * x, const long *xids,
...@@ -213,7 +267,8 @@ struct IndexIVFFlat: IndexIVF { ...@@ -213,7 +267,8 @@ struct IndexIVFFlat: IndexIVF {
*/ */
void update_vectors (int nv, idx_t *idx, const float *v); void update_vectors (int nv, idx_t *idx, const float *v);
void reconstruct(idx_t key, float* recons) const override; void reconstruct_from_offset (long list_no, long offset,
float* recons) const override;
IndexIVFFlat () {} IndexIVFFlat () {}
}; };
......
...@@ -53,7 +53,6 @@ IndexIVFPQ::IndexIVFPQ (Index * quantizer, size_t d, size_t nlist, ...@@ -53,7 +53,6 @@ IndexIVFPQ::IndexIVFPQ (Index * quantizer, size_t d, size_t nlist,
by_residual = true; by_residual = true;
use_precomputed_table = 0; use_precomputed_table = 0;
scan_table_threshold = 0; scan_table_threshold = 0;
max_codes = 0; // means unlimited
polysemous_training = nullptr; polysemous_training = nullptr;
do_polysemous_training = false; do_polysemous_training = false;
...@@ -192,6 +191,23 @@ void IndexIVFPQ::add_with_ids (idx_t n, const float * x, const long *xids) ...@@ -192,6 +191,23 @@ void IndexIVFPQ::add_with_ids (idx_t n, const float * x, const long *xids)
void IndexIVFPQ::add_core_o (idx_t n, const float * x, const long *xids, void IndexIVFPQ::add_core_o (idx_t n, const float * x, const long *xids,
float *residuals_2, const long *precomputed_idx) float *residuals_2, const long *precomputed_idx)
{ {
idx_t bs = 32768;
if (n > bs) {
for (idx_t i0 = 0; i0 < n; i0 += bs) {
idx_t i1 = std::min(i0 + bs, n);
if (verbose) {
printf("IndexIVFPQ::add_core_o: adding %ld:%ld / %ld\n",
i0, i1, n);
}
add_core_o (i1 - i0, x + i0 * d,
xids ? xids + i0 : nullptr,
residuals_2 ? residuals_2 + i0 * d : nullptr,
precomputed_idx ? precomputed_idx + i0 : nullptr);
}
return;
}
FAISS_THROW_IF_NOT (is_trained); FAISS_THROW_IF_NOT (is_trained);
double t0 = getmillisecs (); double t0 = getmillisecs ();
const long * idx; const long * idx;
...@@ -271,50 +287,22 @@ void IndexIVFPQ::add_core_o (idx_t n, const float * x, const long *xids, ...@@ -271,50 +287,22 @@ void IndexIVFPQ::add_core_o (idx_t n, const float * x, const long *xids,
ntotal += n; ntotal += n;
} }
void IndexIVFPQ::reconstruct_n (idx_t i0, idx_t ni, float *recons) const
{
FAISS_THROW_IF_NOT (ni == 0 || (i0 >= 0 && i0 + ni <= ntotal));
std::vector<float> centroid (d);
for (int key = 0; key < nlist; key++) {
const std::vector<long> & idlist = ids[key];
const uint8_t * code_line = codes[key].data();
for (long ofs = 0; ofs < idlist.size(); ofs++) {
long id = idlist[ofs];
if (!(id >= i0 && id < i0 + ni)) continue;
float *r = recons + d * (id - i0);
if (by_residual) {
quantizer->reconstruct (key, centroid.data());
pq.decode (code_line + ofs * pq.code_size, r);
for (int j = 0; j < d; j++) {
r[j] += centroid[j];
}
} else {
pq.decode (code_line + ofs * pq.code_size, r);
}
}
}
}
void IndexIVFPQ::reconstruct (idx_t key, float * recons) const void IndexIVFPQ::reconstruct_from_offset (long list_no, long offset,
float* recons) const
{ {
FAISS_THROW_IF_NOT (direct_map.size() == ntotal); const uint8_t* code = &(codes[list_no][offset * code_size]);
int list_no = direct_map[key] >> 32;
int ofs = direct_map[key] & 0xffffffff;
quantizer->reconstruct (list_no, recons); if (by_residual) {
const uint8_t * code = &(codes[list_no][ofs * pq.code_size]); std::vector<float> centroid(d);
quantizer->reconstruct (list_no, centroid.data());
for (size_t m = 0; m < pq.M; m++) { pq.decode (code, recons);
float * out = recons + m * pq.dsub; for (int i = 0; i < d; ++i) {
const float * cent = pq.get_centroids (m, code[m]); recons[i] += centroid[i];
for (size_t i = 0; i < pq.dsub; i++) { }
out[i] += cent[i]; } else {
} pq.decode (code, recons);
} }
} }
...@@ -1029,53 +1017,6 @@ void IndexIVFPQ::search_preassigned (idx_t nx, const float *qx, idx_t k, ...@@ -1029,53 +1017,6 @@ void IndexIVFPQ::search_preassigned (idx_t nx, const float *qx, idx_t k,
} }
void IndexIVFPQ::search_and_reconstruct (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels,
float *reconstructed)
{
long * idx = new long [n * nprobe];
ScopeDeleter<long> del (idx);
float * coarse_dis = new float [n * nprobe];
ScopeDeleter<float> del2 (coarse_dis);
quantizer->search (n, x, nprobe, coarse_dis, idx);
search_preassigned (n, x, k, idx, coarse_dis,
distances, labels, true);
for (long i = 0; i < n; i++) {
for (long j = 0; j < k; j++) {
long ij = i * k + j;
idx_t res = labels[ij];
float *recons = reconstructed + d * (ij);
if (res < 0) {
// fill with NaNs
memset(recons, -1, sizeof(*recons) * d);
} else {
int list_no = res >> 32;
int ofs = res & 0xffffffff;
labels[ij] = ids[list_no][ofs];
quantizer->reconstruct (list_no, recons);
const uint8_t * code = &(codes[list_no][ofs * pq.code_size]);
for (size_t m = 0; m < pq.M; m++) {
float * out = recons + m * pq.dsub;
const float * cent = pq.get_centroids (m, code[m]);
for (size_t l = 0; l < pq.dsub; l++) {
out[l] += cent[l];
}
}
}
}
}
}
IndexIVFPQ::IndexIVFPQ () IndexIVFPQ::IndexIVFPQ ()
{ {
// initialize some runtime values // initialize some runtime values
...@@ -1083,7 +1024,6 @@ IndexIVFPQ::IndexIVFPQ () ...@@ -1083,7 +1024,6 @@ IndexIVFPQ::IndexIVFPQ ()
scan_table_threshold = 0; scan_table_threshold = 0;
do_polysemous_training = false; do_polysemous_training = false;
polysemous_ht = 0; polysemous_ht = 0;
max_codes = 0;
polysemous_training = nullptr; polysemous_training = nullptr;
} }
...@@ -1209,29 +1149,22 @@ void IndexIVFPQR::add_core (idx_t n, const float *x, const long *xids, ...@@ -1209,29 +1149,22 @@ void IndexIVFPQR::add_core (idx_t n, const float *x, const long *xids,
} }
void IndexIVFPQR::search ( void IndexIVFPQR::search_preassigned (idx_t n, const float *x, idx_t k,
idx_t n, const float *x, idx_t k, const idx_t *idx,
float *distances, idx_t *labels) const const float *L1_dis,
float *distances, idx_t *labels,
bool store_pairs) const
{ {
FAISS_THROW_IF_NOT (is_trained);
long * idx = new long [n * nprobe];
ScopeDeleter<long> del (idx);
float * L1_dis = new float [n * nprobe];
ScopeDeleter<float> del2 (L1_dis);
uint64_t t0; uint64_t t0;
TIC;
quantizer->search (n, x, nprobe, L1_dis, idx);
indexIVFPQ_stats.assign_cycles += TOC;
TIC; TIC;
size_t k_coarse = long(k * k_factor); size_t k_coarse = long(k * k_factor);
idx_t *coarse_labels = new idx_t [k_coarse * n]; idx_t *coarse_labels = new idx_t [k_coarse * n];
ScopeDeleter<idx_t> del3 (coarse_labels); ScopeDeleter<idx_t> del1 (coarse_labels);
{ // query with quantizer levels 1 and 2. { // query with quantizer levels 1 and 2.
float *coarse_distances = new float [k_coarse * n]; float *coarse_distances = new float [k_coarse * n];
ScopeDeleter<float> del(coarse_distances); ScopeDeleter<float> del(coarse_distances);
search_preassigned (n, x, k_coarse, IndexIVFPQ::search_preassigned (n, x, k_coarse,
idx, L1_dis, coarse_distances, coarse_labels, idx, L1_dis, coarse_distances, coarse_labels,
true); true);
} }
...@@ -1287,7 +1220,8 @@ void IndexIVFPQR::search ( ...@@ -1287,7 +1220,8 @@ void IndexIVFPQR::search (
if (dis < heap_sim[0]) { if (dis < heap_sim[0]) {
maxheap_pop (k, heap_sim, heap_ids); maxheap_pop (k, heap_sim, heap_ids);
maxheap_push (k, heap_sim, heap_ids, dis, id); long id_or_pair = store_pairs ? sl : id;
maxheap_push (k, heap_sim, heap_ids, dis, id_or_pair);
} }
n_refine ++; n_refine ++;
} }
...@@ -1298,25 +1232,21 @@ void IndexIVFPQR::search ( ...@@ -1298,25 +1232,21 @@ void IndexIVFPQR::search (
indexIVFPQ_stats.refine_cycles += TOC; indexIVFPQ_stats.refine_cycles += TOC;
} }
void IndexIVFPQR::reconstruct_n (idx_t i0, idx_t ni, float *recons) const void IndexIVFPQR::reconstruct_from_offset (long list_no, long offset,
float* recons) const
{ {
std::vector<float> r3 (d); IndexIVFPQ::reconstruct_from_offset (list_no, offset, recons);
IndexIVFPQ::reconstruct_n (i0, ni, recons);
for (idx_t i = i0; i < i0 + ni; i++) {
float *r = recons + i * d;
refine_pq.decode (&refine_codes [i * refine_pq.code_size], r3.data());
for (int j = 0; j < d; j++) idx_t id = ids[list_no][offset];
r[j] += r3[j]; assert (0 <= id && id < ntotal);
std::vector<float> r3(d);
refine_pq.decode (&refine_codes [id * refine_pq.code_size], r3.data());
for (int i = 0; i < d; ++i) {
recons[i] += r3[i];
} }
} }
void IndexIVFPQR::merge_from (IndexIVF &other_in, idx_t add_id) void IndexIVFPQR::merge_from (IndexIVF &other_in, idx_t add_id)
{ {
IndexIVFPQR *other = dynamic_cast<IndexIVFPQR *> (&other_in); IndexIVFPQR *other = dynamic_cast<IndexIVFPQR *> (&other_in);
...@@ -1335,6 +1265,206 @@ long IndexIVFPQR::remove_ids(const IDSelector& /*sel*/) { ...@@ -1335,6 +1265,206 @@ long IndexIVFPQR::remove_ids(const IDSelector& /*sel*/) {
return 0; return 0;
} }
/*************************************
* Index2Layer implementation
*************************************/
Index2Layer::Index2Layer (Index * quantizer, size_t nlist,
int M,
MetricType metric):
Index (quantizer->d, metric),
q1 (quantizer, nlist),
pq (quantizer->d, M, 8)
{
is_trained = false;
for (int nbyte = 0; nbyte < 7; nbyte++) {
if ((1L << (8 * nbyte)) >= nlist) {
code_size_1 = nbyte;
break;
}
}
code_size_2 = pq.code_size;
code_size = code_size_1 + code_size_2;
}
Index2Layer::Index2Layer ()
{
code_size = code_size_1 = code_size_2 = 0;
}
Index2Layer::~Index2Layer ()
{}
void Index2Layer::train(idx_t n, const float* x)
{
if (verbose) {
printf ("training level-1 quantizer %ld vectors in %dD\n",
n, d);
}
q1.train_q1 (n, x, verbose, metric_type);
if (verbose) {
printf("computing residuals\n");
}
const float * x_in = x;
x = fvecs_maybe_subsample (
d, (size_t*)&n, pq.cp.max_points_per_centroid * pq.ksub,
x, verbose, pq.cp.seed);
ScopeDeleter<float> del_x (x_in == x ? nullptr : x);
std::vector<idx_t> assign(n); // assignement to coarse centroids
q1.quantizer->assign (n, x, assign.data());
std::vector<float> residuals(n * d);
for (idx_t i = 0; i < n; i++) {
q1.quantizer->compute_residual (
x + i * d, residuals.data() + i * d, assign[i]);
}
if (verbose)
printf ("training %zdx%zd product quantizer on %ld vectors in %dD\n",
pq.M, pq.ksub, n, d);
pq.verbose = verbose;
pq.train (n, residuals.data());
is_trained = true;
}
void Index2Layer::add(idx_t n, const float* x)
{
idx_t bs = 32768;
if (n > bs) {
for (idx_t i0 = 0; i0 < n; i0 += bs) {
idx_t i1 = std::min(i0 + bs, n);
if (verbose) {
printf("Index2Layer::add: adding %ld:%ld / %ld\n",
i0, i1, n);
}
add (i1 - i0, x + i0 * d);
}
return;
}
std::vector<idx_t> codes1 (n);
q1.quantizer->assign (n, x, codes1.data());
std::vector<float> residuals(n * d);
for (idx_t i = 0; i < n; i++) {
q1.quantizer->compute_residual (
x + i * d, residuals.data() + i * d, codes1[i]);
}
std::vector<uint8_t> codes2 (n * code_size_2);
pq.compute_codes (residuals.data(), codes2.data(), n);
codes.resize ((ntotal + n) * code_size);
uint8_t *wp = &codes[ntotal * code_size];
{
int i = 0x11223344;
const char *ip = (char*)&i;
FAISS_THROW_IF_NOT_MSG (ip[0] == 0x44,
"works only on a little-endian CPU");
}
// copy to output table
for (idx_t i = 0; i < n; i++) {
memcpy (wp, &codes1[i], code_size_1);
wp += code_size_1;
memcpy (wp, &codes2[i * code_size_2], code_size_2);
wp += code_size_2;
}
ntotal += n;
}
void Index2Layer::search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const
{
FAISS_THROW_MSG ("not implemented");
}
void Index2Layer::reconstruct_n(idx_t i0, idx_t ni, float* recons) const
{
float recons1[d];
FAISS_THROW_IF_NOT (i0 >= 0 && i0 + ni <= ntotal);
const uint8_t *rp = &codes[i0 * code_size];
for (idx_t i = 0; i < ni; i++) {
idx_t key = 0;
memcpy (&key, rp, code_size_1);
q1.quantizer->reconstruct (key, recons1);
rp += code_size_1;
pq.decode (rp, recons);
for (idx_t j = 0; j < d; j++) {
recons[j] += recons1[j];
}
rp += code_size_2;
recons += d;
}
}
void Index2Layer::transfer_to_IVFPQ (IndexIVFPQ & other) const
{
FAISS_THROW_IF_NOT (other.nlist == q1.nlist);
FAISS_THROW_IF_NOT (other.code_size == code_size_2);
FAISS_THROW_IF_NOT (other.ntotal == 0);
const uint8_t *rp = codes.data();
for (idx_t i = 0; i < ntotal; i++) {
idx_t key = 0;
memcpy (&key, rp, code_size_1);
other.ids[key].push_back (i);
rp += code_size_1;
std::vector<uint8_t> & list = other.codes[key];
size_t len = list.size();
list.resize(len + code_size_2);
memcpy (&list[len], rp, code_size_2);
rp += code_size_2;
}
other.ntotal = ntotal;
}
void Index2Layer::reconstruct(idx_t key, float* recons) const
{
reconstruct_n (key, 1, recons);
}
void Index2Layer::reset()
{
ntotal = 0;
codes.clear ();
}
/***************************************** /*****************************************
* IndexIVFPQCompact implementation * IndexIVFPQCompact implementation
******************************************/ ******************************************/
......
...@@ -36,7 +36,6 @@ struct IndexIVFPQ: IndexIVF { ...@@ -36,7 +36,6 @@ struct IndexIVFPQ: IndexIVF {
// search-time parameters // search-time parameters
size_t scan_table_threshold; ///< use table computation or on-the-fly? size_t scan_table_threshold; ///< use table computation or on-the-fly?
size_t max_codes; ///< max nb of codes to visit to do a query
int polysemous_ht; ///< Hamming thresh for polysemous filtering int polysemous_ht; ///< Hamming thresh for polysemous filtering
...@@ -64,16 +63,8 @@ struct IndexIVFPQ: IndexIVF { ...@@ -64,16 +63,8 @@ struct IndexIVFPQ: IndexIVF {
/// same as train_residual, also output 2nd level residuals /// same as train_residual, also output 2nd level residuals
void train_residual_o (idx_t n, const float *x, float *residuals_2); void train_residual_o (idx_t n, const float *x, float *residuals_2);
void reconstruct_from_offset (long list_no, long offset,
/** Reconstruct a subset of the indexed vectors float* recons) const override;
*
* @param i0 first vector to reconstruct
* @param ni nb of vectors to reconstruct
* @param recons output array of reconstructed vectors, size ni * d
*/
void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
void reconstruct(idx_t key, float* recons) const override;
/** Find exact duplicates in the dataset. /** Find exact duplicates in the dataset.
* *
...@@ -114,16 +105,6 @@ struct IndexIVFPQ: IndexIVF { ...@@ -114,16 +105,6 @@ struct IndexIVFPQ: IndexIVF {
float *distances, idx_t *labels, float *distances, idx_t *labels,
bool store_pairs) const override; bool store_pairs) const override;
/** Same as the search function, but also reconstruct approximate
* vectors for the search results
*
* @param reconstructed size (n, k, d)
**/
void search_and_reconstruct (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels,
float *reconstructed);
/// build precomputed table /// build precomputed table
void precompute_table (); void precompute_table ();
...@@ -190,17 +171,17 @@ struct IndexIVFPQR: IndexIVFPQ { ...@@ -190,17 +171,17 @@ struct IndexIVFPQR: IndexIVFPQ {
void add_core (idx_t n, const float *x, const long *xids, void add_core (idx_t n, const float *x, const long *xids,
const long *precomputed_idx = nullptr); const long *precomputed_idx = nullptr);
void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override; void reconstruct_from_offset (long list_no, long offset,
float* recons) const override;
void merge_from (IndexIVF &other, idx_t add_id) override; void merge_from (IndexIVF &other, idx_t add_id) override;
void search( void search_preassigned (idx_t n, const float *x, idx_t k,
idx_t n, const idx_t *assign,
const float* x, const float *centroid_dis,
idx_t k, float *distances, idx_t *labels,
float* distances, bool store_pairs) const override;
idx_t* labels) const override;
IndexIVFPQR(); IndexIVFPQR();
}; };
...@@ -250,6 +231,60 @@ struct IndexIVFPQCompact: IndexIVFPQ { ...@@ -250,6 +231,60 @@ struct IndexIVFPQCompact: IndexIVFPQ {
}; };
/** Same as an IndexIVFPQ without the inverted lists: codes are stored sequentially
*
* The class is mainly inteded to store encoded vectors that can be
* accessed randomly, the search function is not implemented.
*/
struct Index2Layer: Index {
/// first level quantizer
Level1Quantizer q1;
/// second level quantizer is always a PQ
ProductQuantizer pq;
/// Codes. Size ntotal * code_size.
std::vector<uint8_t> codes;
/// size of the code for the first level (ceil(log8(q1.nlist)))
size_t code_size_1;
/// size of the code for the second level
size_t code_size_2;
/// code_size_1 + code_size_2
size_t code_size;
Index2Layer (Index * quantizer, size_t nlist,
int M, MetricType metric = METRIC_L2);
Index2Layer ();
~Index2Layer ();
void train(idx_t n, const float* x) override;
void add(idx_t n, const float* x) override;
/// not implemented
void search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const override;
void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
void reconstruct(idx_t key, float* recons) const override;
void reset() override;
/// transfer the flat codes to an IVFPQ index
void transfer_to_IVFPQ(IndexIVFPQ & other) const;
};
} // namespace faiss } // namespace faiss
......
...@@ -803,6 +803,7 @@ MultiIndexQuantizer::MultiIndexQuantizer (int d, ...@@ -803,6 +803,7 @@ MultiIndexQuantizer::MultiIndexQuantizer (int d,
void MultiIndexQuantizer::train(idx_t n, const float *x) void MultiIndexQuantizer::train(idx_t n, const float *x)
{ {
pq.verbose = verbose;
pq.train (n, x); pq.train (n, x);
is_trained = true; is_trained = true;
// count virtual elements in index // count virtual elements in index
......
...@@ -11,6 +11,8 @@ ...@@ -11,6 +11,8 @@
#include <cstdio> #include <cstdio>
#include <algorithm> #include <algorithm>
#include <malloc.h>
#include <omp.h> #include <omp.h>
#include <immintrin.h> #include <immintrin.h>
...@@ -46,6 +48,7 @@ namespace { ...@@ -46,6 +48,7 @@ namespace {
typedef Index::idx_t idx_t; typedef Index::idx_t idx_t;
typedef ScalarQuantizer::QuantizerType QuantizerType; typedef ScalarQuantizer::QuantizerType QuantizerType;
typedef ScalarQuantizer::RangeStat RangeStat; typedef ScalarQuantizer::RangeStat RangeStat;
using DistanceComputer = ScalarQuantizer::DistanceComputer;
/******************************************************************* /*******************************************************************
...@@ -117,145 +120,179 @@ struct Codec4bit { ...@@ -117,145 +120,179 @@ struct Codec4bit {
}; };
/******************************************************************* /*******************************************************************
* Similarity: gets vector components and computes a similarity wrt. a * Quantizer: normalizes scalar vector components, then passes them
* query vector stored in the object * through a codec
*/ */
struct SimilarityL2 {
const float *y, *yi;
explicit SimilarityL2 (const float * y): y(y) {}
/******* scalar accumulator *******/ struct Quantizer {
virtual void encode_vector(const float *x, uint8_t *code) const = 0;
virtual void decode_vector(const uint8_t *code, float *x) const = 0;
float accu;
void begin () { virtual ~Quantizer() {}
accu = 0; };
yi = y;
template<class Codec>
struct QuantizerUniform: Quantizer {
const size_t d;
const float vmin, vdiff;
QuantizerUniform(size_t d, const std::vector<float> &trained):
d(d), vmin(trained[0]), vdiff(trained[1])
{
} }
void add_component (float x) { void encode_vector(const float* x, uint8_t* code) const override {
float tmp = *yi++ - x; for (size_t i = 0; i < d; i++) {
accu += tmp * tmp; float xi = (x[i] - vmin) / vdiff;
if (xi < 0)
xi = 0;
if (xi > 1.0)
xi = 1.0;
Codec::encode_component(xi, code, i);
}
} }
float result () { void decode_vector(const uint8_t* code, float* x) const override {
return accu; for (size_t i = 0; i < d; i++) {
float xi = Codec::decode_component(code, i);
x[i] = vmin + xi * vdiff;
}
} }
float reconstruct_component (const uint8_t * code, int i) const
{
float xi = Codec::decode_component (code, i);
return vmin + xi * vdiff;
}
};
#ifdef USE_AVX #ifdef USE_AVX
/******* AVX accumulator *******/
__m256 accu8; template<class Codec>
struct QuantizerUniform8: QuantizerUniform<Codec> {
void begin_8 () { QuantizerUniform8 (size_t d, const std::vector<float> &trained):
accu8 = _mm256_setzero_ps(); QuantizerUniform<Codec> (d, trained) {}
yi = y;
}
void add_8_components (__m256 x) { __m256 reconstruct_8_components (const uint8_t * code, int i) const
__m256 yiv = _mm256_loadu_ps (yi); {
yi += 8; __m256 xi = Codec::decode_8_components (code, i);
__m256 tmp = yiv - x; return _mm256_set1_ps(this->vmin) + xi * _mm256_set1_ps (this->vdiff);
accu8 += tmp * tmp;
} }
float result_8 () {
__m256 sum = _mm256_hadd_ps(accu8, accu8);
__m256 sum2 = _mm256_hadd_ps(sum, sum);
// now add the 0th and 4th component
return
_mm_cvtss_f32 (_mm256_castps256_ps128(sum2)) +
_mm_cvtss_f32 (_mm256_extractf128_ps(sum2, 1));
}
#endif
}; };
struct SimilarityIP { #endif
const float *y, *yi;
const float accu0;
/******* scalar accumulator *******/
float accu;
SimilarityIP (const float * y, float accu0): template<class Codec>
y (y), accu0 (accu0) {} struct QuantizerNonUniform: Quantizer {
const size_t d;
const float *vmin, *vdiff;
void begin () { QuantizerNonUniform(size_t d, const std::vector<float> &trained):
accu = accu0; d(d), vmin(trained.data()), vdiff(trained.data() + d) {}
yi = y;
void encode_vector(const float* x, uint8_t* code) const override {
for (size_t i = 0; i < d; i++) {
float xi = (x[i] - vmin[i]) / vdiff[i];
if (xi < 0)
xi = 0;
if (xi > 1.0)
xi = 1.0;
Codec::encode_component(xi, code, i);
}
} }
void add_component (float x) { void decode_vector(const uint8_t* code, float* x) const override {
accu += *yi++ * x; for (size_t i = 0; i < d; i++) {
float xi = Codec::decode_component(code, i);
x[i] = vmin[i] + xi * vdiff[i];
}
} }
float result () { float reconstruct_component (const uint8_t * code, int i) const
return accu; {
float xi = Codec::decode_component (code, i);
return vmin[i] + xi * vdiff[i];
} }
#ifdef USE_AVX };
/******* AVX accumulator *******/
__m256 accu8;
void begin_8 () { #ifdef USE_AVX
accu8 = _mm256_setzero_ps();
yi = y;
}
void add_8_components (__m256 x) { template<class Codec>
__m256 yiv = _mm256_loadu_ps (yi); struct QuantizerNonUniform8: QuantizerNonUniform<Codec> {
yi += 8;
accu8 += yiv * x;
}
float result_8 () { QuantizerNonUniform8 (size_t d, const std::vector<float> &trained):
__m256 sum = _mm256_hadd_ps(accu8, accu8); QuantizerNonUniform<Codec> (d, trained) {}
__m256 sum2 = _mm256_hadd_ps(sum, sum);
// now add the 0th and 4th component __m256 reconstruct_8_components (const uint8_t * code, int i) const
return {
accu0 + __m256 xi = Codec::decode_8_components (code, i);
_mm_cvtss_f32 (_mm256_castps256_ps128(sum2)) + return _mm256_loadu_ps (this->vmin + i) + xi * _mm256_loadu_ps (this->vdiff + i);
_mm_cvtss_f32 (_mm256_extractf128_ps(sum2, 1));
} }
#endif
};
/******************************************************************* };
* templatized distance functions
*/
#endif
template<class Quantizer, class Similarity> Quantizer *select_quantizer (
float compute_distance(const Quantizer & quant, Similarity & sim, QuantizerType qtype,
const uint8_t *code) size_t d, const std::vector<float> & trained)
{ {
sim.begin(); #ifdef USE_AVX
for (size_t i = 0; i < quant.d; i++) { if (d % 8 == 0) {
float xi = quant.reconstruct_component (code, i); switch(qtype) {
sim.add_component (xi); case ScalarQuantizer::QT_8bit:
return new QuantizerNonUniform8<Codec8bit>(d, trained);
case ScalarQuantizer::QT_4bit:
return new QuantizerNonUniform8<Codec4bit>(d, trained);
case ScalarQuantizer::QT_8bit_uniform:
return new QuantizerUniform8<Codec8bit>(d, trained);
case ScalarQuantizer::QT_4bit_uniform:
return new QuantizerUniform8<Codec4bit>(d, trained);
}
} else
#endif
{
switch(qtype) {
case ScalarQuantizer::QT_8bit:
return new QuantizerNonUniform<Codec8bit>(d, trained);
case ScalarQuantizer::QT_4bit:
return new QuantizerNonUniform<Codec4bit>(d, trained);
case ScalarQuantizer::QT_8bit_uniform:
return new QuantizerUniform<Codec8bit>(d, trained);
case ScalarQuantizer::QT_4bit_uniform:
return new QuantizerUniform<Codec4bit>(d, trained);
}
} }
return sim.result(); FAISS_THROW_MSG ("unknown qtype");
return nullptr;
} }
#ifdef USE_AVX
template<class Quantizer, class Similarity> Quantizer *select_quantizer (const ScalarQuantizer &sq)
float compute_distance_8(const Quantizer & quant, Similarity & sim,
const uint8_t *code)
{ {
sim.begin_8(); return select_quantizer (sq.qtype, sq.d, sq.trained);
for (size_t i = 0; i < quant.d; i += 8) {
__m256 xi = quant.reconstruct_8_components (code, i);
sim.add_8_components (xi);
}
return sim.result_8();
} }
#endif
/******************************************************************* /*******************************************************************
...@@ -412,215 +449,261 @@ void train_NonUniform(RangeStat rs, float rs_arg, ...@@ -412,215 +449,261 @@ void train_NonUniform(RangeStat rs, float rs_arg,
} }
/******************************************************************* /*******************************************************************
* Quantizer: normalizes scalar vector components, then passes them * Similarity: gets vector components and computes a similarity wrt. a
* through a codec * query vector stored in the object. The data fields just encapsulate
* an accumulator.
*/ */
struct SimilarityL2 {
const float *y, *yi;
explicit SimilarityL2 (const float * y): y(y) {}
struct Quantizer { /******* scalar accumulator *******/
virtual void encode_vector(const float *x, uint8_t *code) const = 0;
virtual void decode_vector(const uint8_t *code, float *x) const = 0;
virtual float compute_distance_L2 (SimilarityL2 &sim, float accu;
const uint8_t * codes) const = 0;
virtual float compute_distance_IP (SimilarityIP &sim,
const uint8_t * codes) const = 0;
virtual ~Quantizer() {} void begin () {
}; accu = 0;
yi = y;
}
void add_component (float x) {
float tmp = *yi++ - x;
accu += tmp * tmp;
}
void add_component_2 (float x1, float x2) {
float tmp = x1 - x2;
accu += tmp * tmp;
}
float result () {
return accu;
}
template<class Codec> #ifdef USE_AVX
struct QuantizerUniform: Quantizer { __m256 accu8;
const size_t d;
const float vmin, vdiff;
QuantizerUniform(size_t d, const std::vector<float> &trained): void begin_8 () {
d(d), vmin(trained[0]), vdiff(trained[1]) { accu8 = _mm256_setzero_ps();
yi = y;
} }
void encode_vector(const float* x, uint8_t* code) const override { void add_8_components (__m256 x) {
for (size_t i = 0; i < d; i++) { __m256 yiv = _mm256_loadu_ps (yi);
float xi = (x[i] - vmin) / vdiff; yi += 8;
if (xi < 0) __m256 tmp = yiv - x;
xi = 0; accu8 += tmp * tmp;
if (xi > 1.0)
xi = 1.0;
Codec::encode_component(xi, code, i);
}
} }
void decode_vector(const uint8_t* code, float* x) const override { void add_8_components_2 (__m256 x, __m256 y) {
for (size_t i = 0; i < d; i++) { __m256 tmp = y - x;
float xi = Codec::decode_component(code, i); accu8 += tmp * tmp;
x[i] = vmin + xi * vdiff;
}
} }
float reconstruct_component (const uint8_t * code, int i) const float result_8 () {
{ __m256 sum = _mm256_hadd_ps(accu8, accu8);
float xi = Codec::decode_component (code, i); __m256 sum2 = _mm256_hadd_ps(sum, sum);
return vmin + xi * vdiff; // now add the 0th and 4th component
return
_mm_cvtss_f32 (_mm256_castps256_ps128(sum2)) +
_mm_cvtss_f32 (_mm256_extractf128_ps(sum2, 1));
} }
#endif
#ifdef USE_AVX };
__m256 reconstruct_8_components (const uint8_t * code, int i) const
{
__m256 xi = Codec::decode_8_components (code, i); struct SimilarityIP {
return _mm256_set1_ps(vmin) + xi * _mm256_set1_ps (vdiff); const float *y, *yi;
/******* scalar accumulator *******/
float accu;
explicit SimilarityIP (const float * y):
y (y) {}
void begin () {
accu = 0;
yi = y;
} }
#endif
float compute_distance_L2(SimilarityL2& sim, const uint8_t* codes) void add_component (float x) {
const override { accu += *yi++ * x;
return compute_distance(*this, sim, codes);
} }
float compute_distance_IP(SimilarityIP& sim, const uint8_t* codes) void add_component_2 (float x1, float x2) {
const override { accu += x1 * x2;
return compute_distance(*this, sim, codes); }
float result () {
return accu;
} }
};
#ifdef USE_AVX #ifdef USE_AVX
template<class Codec>
struct QuantizerUniform8: QuantizerUniform<Codec> {
QuantizerUniform8 (size_t d, const std::vector<float> &trained): __m256 accu8;
QuantizerUniform<Codec> (d, trained) {}
float compute_distance_L2(SimilarityL2& sim, const uint8_t* codes) void begin_8 () {
const override { accu8 = _mm256_setzero_ps();
return compute_distance_8(*this, sim, codes); yi = y;
} }
float compute_distance_IP(SimilarityIP& sim, const uint8_t* codes) void add_8_components (__m256 x) {
const override { __m256 yiv = _mm256_loadu_ps (yi);
return compute_distance_8(*this, sim, codes); yi += 8;
accu8 += yiv * x;
} }
};
#endif
void add_8_components_2 (__m256 x1, __m256 x2) {
accu8 += x1 * x2;
}
float result_8 () {
__m256 sum = _mm256_hadd_ps(accu8, accu8);
__m256 sum2 = _mm256_hadd_ps(sum, sum);
// now add the 0th and 4th component
return
_mm_cvtss_f32 (_mm256_castps256_ps128(sum2)) +
_mm_cvtss_f32 (_mm256_extractf128_ps(sum2, 1));
}
#endif
};
/*******************************************************************
* DistanceComputer: combines a similarity and a quantizer to do
* code-to-vector or code-to-code comparisons
*/
template<class Codec>
struct QuantizerNonUniform: Quantizer {
const size_t d;
const float *vmin, *vdiff;
QuantizerNonUniform(size_t d, const std::vector<float> &trained): template<class Quantizer, class Similarity>
d(d), vmin(trained.data()), vdiff(trained.data() + d) {} struct DCTemplate : ScalarQuantizer::DistanceComputer {
void encode_vector(const float* x, uint8_t* code) const override { Quantizer quant;
for (size_t i = 0; i < d; i++) {
float xi = (x[i] - vmin[i]) / vdiff[i];
if (xi < 0)
xi = 0;
if (xi > 1.0)
xi = 1.0;
Codec::encode_component(xi, code, i);
}
}
void decode_vector(const uint8_t* code, float* x) const override { DCTemplate(size_t d, const std::vector<float> &trained):
for (size_t i = 0; i < d; i++) { quant(d, trained)
float xi = Codec::decode_component(code, i); {}
x[i] = vmin[i] + xi * vdiff[i];
}
}
float reconstruct_component (const uint8_t * code, int i) const float compute_distance (const float *x,
const uint8_t *code) override
{ {
float xi = Codec::decode_component (code, i); Similarity sim(x);
return vmin[i] + xi * vdiff[i]; sim.begin();
for (size_t i = 0; i < quant.d; i ++) {
float xi = quant.reconstruct_component (code, i);
sim.add_component (xi);
}
return sim.result();
} }
#ifdef USE_AVX float compute_code_distance (const uint8_t *code1,
__m256 reconstruct_8_components (const uint8_t * code, int i) const const uint8_t *code2) override
{ {
__m256 xi = Codec::decode_8_components (code, i); Similarity sim(nullptr);
return _mm256_loadu_ps(vmin + i) + xi * _mm256_loadu_ps (vdiff + i); sim.begin ();
} for (size_t i = 0; i < quant.d; i ++) {
#endif float x1 = quant.reconstruct_component (code1, i);
float x2 = quant.reconstruct_component (code2, i);
float compute_distance_L2(SimilarityL2& sim, const uint8_t* codes) sim.add_component_2 (x1, x2);
const override { }
return compute_distance(*this, sim, codes); return sim.result ();
} }
float compute_distance_IP(SimilarityIP& sim, const uint8_t* codes)
const override {
return compute_distance(*this, sim, codes);
}
}; };
#ifdef USE_AVX #ifdef USE_AVX
template<class Codec>
struct QuantizerNonUniform8: QuantizerNonUniform<Codec> {
QuantizerNonUniform8 (size_t d, const std::vector<float> &trained): template<class Quantizer, class Similarity>
QuantizerNonUniform<Codec> (d, trained) {} struct DCTemplate_8 : ScalarQuantizer::DistanceComputer {
Quantizer quant;
float compute_distance_L2(SimilarityL2& sim, const uint8_t* codes) DCTemplate_8(size_t d, const std::vector<float> &trained):
const override { quant(d, trained)
return compute_distance_8(*this, sim, codes); {}
float compute_distance (const float *x,
const uint8_t *code) override
{
Similarity sim(x);
sim.begin_8();
for (size_t i = 0; i < quant.d; i += 8) {
__m256 xi = quant.reconstruct_8_components (code, i);
sim.add_8_components (xi);
}
return sim.result_8();
} }
float compute_distance_IP(SimilarityIP& sim, const uint8_t* codes) float compute_code_distance (const uint8_t *code1,
const override { const uint8_t *code2) override
return compute_distance_8(*this, sim, codes); {
Similarity sim(nullptr);
sim.begin_8 ();
for (size_t i = 0; i < quant.d; i += 8) {
__m256 x1 = quant.reconstruct_8_components (code1, i);
__m256 x2 = quant.reconstruct_8_components (code2, i);
sim.add_8_components_2 (x1, x2);
}
return sim.result_8 ();
} }
}; };
#endif
#endif
Quantizer *select_quantizer ( template<class Sim>
QuantizerType qtype, DistanceComputer *select_distance_computer (
size_t d, const std::vector<float> & trained) QuantizerType qtype,
size_t d, const std::vector<float> & trained)
{ {
#ifdef USE_AVX #ifdef USE_AVX
if (d % 8 == 0) { if (d % 8 == 0) {
switch(qtype) { switch(qtype) {
case ScalarQuantizer::QT_8bit: case ScalarQuantizer::QT_8bit:
return new QuantizerNonUniform8<Codec8bit>(d, trained); return new DCTemplate_8<QuantizerNonUniform8
<Codec8bit>, Sim>(d, trained);
case ScalarQuantizer::QT_4bit: case ScalarQuantizer::QT_4bit:
return new QuantizerNonUniform8<Codec4bit>(d, trained); return new DCTemplate_8<QuantizerNonUniform8
<Codec4bit>, Sim>(d, trained);
case ScalarQuantizer::QT_8bit_uniform: case ScalarQuantizer::QT_8bit_uniform:
return new QuantizerUniform8<Codec8bit>(d, trained); return new DCTemplate_8<QuantizerUniform8
<Codec8bit>, Sim>(d, trained);
case ScalarQuantizer::QT_4bit_uniform: case ScalarQuantizer::QT_4bit_uniform:
return new QuantizerUniform8<Codec4bit>(d, trained); return new DCTemplate_8<QuantizerUniform8
<Codec4bit>, Sim>(d, trained);
} }
} else } else
#endif #endif
{ {
switch(qtype) { switch(qtype) {
case ScalarQuantizer::QT_8bit: case ScalarQuantizer::QT_8bit:
return new QuantizerNonUniform<Codec8bit>(d, trained); return new DCTemplate<QuantizerNonUniform
<Codec8bit>, Sim>(d, trained);
case ScalarQuantizer::QT_4bit: case ScalarQuantizer::QT_4bit:
return new QuantizerNonUniform<Codec4bit>(d, trained); return new DCTemplate<QuantizerNonUniform
<Codec4bit>, Sim>(d, trained);
case ScalarQuantizer::QT_8bit_uniform: case ScalarQuantizer::QT_8bit_uniform:
return new QuantizerUniform<Codec8bit>(d, trained); return new DCTemplate<QuantizerUniform
<Codec8bit>, Sim>(d, trained);
case ScalarQuantizer::QT_4bit_uniform: case ScalarQuantizer::QT_4bit_uniform:
return new QuantizerUniform<Codec4bit>(d, trained); return new DCTemplate<QuantizerUniform
<Codec4bit>, Sim>(d, trained);
} }
} }
FAISS_THROW_MSG ("unknown qtype"); FAISS_THROW_MSG ("unknown qtype");
return nullptr; return nullptr;
} }
Quantizer *select_quantizer (const ScalarQuantizer &sq)
{
return select_quantizer (sq.qtype, sq.d, sq.trained);
}
} // anonymous namespace } // anonymous namespace
...@@ -691,6 +774,19 @@ void ScalarQuantizer::decode (const uint8_t *codes, float *x, size_t n) const ...@@ -691,6 +774,19 @@ void ScalarQuantizer::decode (const uint8_t *codes, float *x, size_t n) const
squant->decode_vector (codes + i * code_size, x + i * d); squant->decode_vector (codes + i * code_size, x + i * d);
} }
ScalarQuantizer::DistanceComputer *ScalarQuantizer::get_distance_computer (
MetricType metric)
const
{
if (metric == METRIC_L2) {
return select_distance_computer<SimilarityL2>(qtype, d, trained);
} else {
return select_distance_computer<SimilarityIP>(qtype, d, trained);
}
}
/******************************************************************* /*******************************************************************
* IndexScalarQuantizer implementation * IndexScalarQuantizer implementation
********************************************************************/ ********************************************************************/
...@@ -724,61 +820,66 @@ void IndexScalarQuantizer::add(idx_t n, const float* x) ...@@ -724,61 +820,66 @@ void IndexScalarQuantizer::add(idx_t n, const float* x)
ntotal += n; ntotal += n;
} }
void IndexScalarQuantizer::search(
namespace {
template<class C>
void search_flat_scalar_quantizer(
const IndexScalarQuantizer & index,
idx_t n, idx_t n,
const float* x, const float* x,
idx_t k, idx_t k,
float* distances, float* distances,
idx_t* labels) const idx_t* labels)
{ {
Quantizer *squant = select_quantizer (sq); size_t code_size = index.code_size;
ScopeDeleter1<Quantizer> del(squant); size_t d = index.d;
FAISS_THROW_IF_NOT (is_trained);
if (metric_type == METRIC_INNER_PRODUCT) { #pragma omp parallel
#pragma omp parallel for {
DistanceComputer *dc =
index.sq.get_distance_computer(index.metric_type);
ScopeDeleter1<DistanceComputer> del(dc);
#pragma omp for
for (size_t i = 0; i < n; i++) { for (size_t i = 0; i < n; i++) {
idx_t *idxi = labels + i * k; idx_t *idxi = labels + i * k;
float *simi = distances + i * k; float *simi = distances + i * k;
minheap_heapify (k, simi, idxi); heap_heapify<C> (k, simi, idxi);
SimilarityIP sim(x + i * d, 0);
const uint8_t *ci = codes.data ();
for (size_t j = 0; j < ntotal; j++) { const float *xi = x + i * d;
float accu = squant->compute_distance_IP(sim, ci); const uint8_t *ci = index.codes.data ();
if (accu > simi [0]) { for (size_t j = 0; j < index.ntotal; j++) {
minheap_pop (k, simi, idxi); float accu = dc->compute_distance(xi, ci);
minheap_push (k, simi, idxi, accu, j); if (C::cmp (simi [0], accu)) {
heap_pop<C> (k, simi, idxi);
heap_push<C> (k, simi, idxi, accu, j);
} }
ci += code_size; ci += code_size;
} }
minheap_reorder (k, simi, idxi); heap_reorder<C> (k, simi, idxi);
} }
} else { }
#pragma omp parallel for
for (size_t i = 0; i < n; i++) {
idx_t *idxi = labels + i * k;
float *simi = distances + i * k;
maxheap_heapify (k, simi, idxi);
SimilarityL2 sim(x + i * d); };
const uint8_t *ci = codes.data ();
for (size_t j = 0; j < ntotal; j++) { }
float accu = squant->compute_distance_L2(sim, ci);
if (accu < simi [0]) { void IndexScalarQuantizer::search(
maxheap_pop (k, simi, idxi); idx_t n,
maxheap_push (k, simi, idxi, accu, j); const float* x,
} idx_t k,
ci += code_size; float* distances,
} idx_t* labels) const
maxheap_reorder (k, simi, idxi); {
} FAISS_THROW_IF_NOT (is_trained);
if (metric_type == METRIC_L2) {
search_flat_scalar_quantizer<CMax<float, idx_t> > (*this, n, x, k, distances, labels);
} else {
search_flat_scalar_quantizer<CMin<float, idx_t> > (*this, n, x, k, distances, labels);
} }
} }
void IndexScalarQuantizer::reset() void IndexScalarQuantizer::reset()
...@@ -883,18 +984,20 @@ void IndexIVFScalarQuantizer::add_with_ids ...@@ -883,18 +984,20 @@ void IndexIVFScalarQuantizer::add_with_ids
namespace { namespace {
void search_with_probes_ip (const IndexIVFScalarQuantizer & index, void search_with_probes_ip (const IndexIVFScalarQuantizer & index,
const float *x, const float *x,
const idx_t *cent_ids, const float *cent_dis, const idx_t *cent_ids, const float *cent_dis,
const Quantizer & quant, DistanceComputer & dc,
int k, float *simi, idx_t *idxi, int k, float *simi, idx_t *idxi,
bool store_pairs) bool store_pairs)
{ {
int nprobe = index.nprobe; int nprobe = index.nprobe;
size_t code_size = index.code_size; size_t code_size = index.code_size;
size_t d = index.d; size_t d = index.d;
std::vector<float> decoded(d); std::vector<float> decoded(d);
minheap_heapify (k, simi, idxi); minheap_heapify (k, simi, idxi);
size_t nscan = 0;
for (int i = 0; i < nprobe; i++) { for (int i = 0; i < nprobe; i++) {
idx_t list_no = cent_ids[i]; idx_t list_no = cent_ids[i];
if (list_no < 0) break; if (list_no < 0) break;
...@@ -903,11 +1006,11 @@ void search_with_probes_ip (const IndexIVFScalarQuantizer & index, ...@@ -903,11 +1006,11 @@ void search_with_probes_ip (const IndexIVFScalarQuantizer & index,
const std::vector<idx_t> & ids = index.ids[list_no]; const std::vector<idx_t> & ids = index.ids[list_no];
const uint8_t* codes = index.codes[list_no].data(); const uint8_t* codes = index.codes[list_no].data();
SimilarityIP sim(x, accu0); SimilarityIP sim(x);
for (size_t j = 0; j < ids.size(); j++) { for (size_t j = 0; j < ids.size(); j++) {
float accu = quant.compute_distance_IP(sim, codes); float accu = accu0 + dc.compute_distance(x, codes);
if (accu > simi [0]) { if (accu > simi [0]) {
minheap_pop (k, simi, idxi); minheap_pop (k, simi, idxi);
...@@ -916,7 +1019,9 @@ void search_with_probes_ip (const IndexIVFScalarQuantizer & index, ...@@ -916,7 +1019,9 @@ void search_with_probes_ip (const IndexIVFScalarQuantizer & index,
} }
codes += code_size; codes += code_size;
} }
nscan += ids.size();
if (index.max_codes && nscan > index.max_codes)
break;
} }
minheap_reorder (k, simi, idxi); minheap_reorder (k, simi, idxi);
} }
...@@ -925,15 +1030,16 @@ void search_with_probes_L2 (const IndexIVFScalarQuantizer & index, ...@@ -925,15 +1030,16 @@ void search_with_probes_L2 (const IndexIVFScalarQuantizer & index,
const float *x_in, const float *x_in,
const idx_t *cent_ids, const idx_t *cent_ids,
const Index *quantizer, const Index *quantizer,
const Quantizer & quant, DistanceComputer & dc,
int k, float *simi, idx_t *idxi, int k, float *simi, idx_t *idxi,
bool store_pairs) bool store_pairs)
{ {
int nprobe = index.nprobe; int nprobe = index.nprobe;
size_t code_size = index.code_size; size_t code_size = index.code_size;
size_t d = index.d; size_t d = index.d;
std::vector<float> decoded(d), x(d); std::vector<float> x(d);
maxheap_heapify (k, simi, idxi); maxheap_heapify (k, simi, idxi);
size_t nscan = 0;
for (int i = 0; i < nprobe; i++) { for (int i = 0; i < nprobe; i++) {
idx_t list_no = cent_ids[i]; idx_t list_no = cent_ids[i];
if (list_no < 0) break; if (list_no < 0) break;
...@@ -944,11 +1050,9 @@ void search_with_probes_L2 (const IndexIVFScalarQuantizer & index, ...@@ -944,11 +1050,9 @@ void search_with_probes_L2 (const IndexIVFScalarQuantizer & index,
// shift of x_in wrt centroid // shift of x_in wrt centroid
quantizer->compute_residual (x_in, x.data(), list_no); quantizer->compute_residual (x_in, x.data(), list_no);
SimilarityL2 sim(x.data());
for (size_t j = 0; j < ids.size(); j++) { for (size_t j = 0; j < ids.size(); j++) {
float dis = quant.compute_distance_L2 (sim, codes); float dis = dc.compute_distance (x.data(), codes);
if (dis < simi [0]) { if (dis < simi [0]) {
maxheap_pop (k, simi, idxi); maxheap_pop (k, simi, idxi);
...@@ -957,6 +1061,9 @@ void search_with_probes_L2 (const IndexIVFScalarQuantizer & index, ...@@ -957,6 +1061,9 @@ void search_with_probes_L2 (const IndexIVFScalarQuantizer & index,
} }
codes += code_size; codes += code_size;
} }
nscan += ids.size();
if (index.max_codes && nscan > index.max_codes)
break;
} }
maxheap_reorder (k, simi, idxi); maxheap_reorder (k, simi, idxi);
} }
...@@ -972,28 +1079,49 @@ void IndexIVFScalarQuantizer::search_preassigned ( ...@@ -972,28 +1079,49 @@ void IndexIVFScalarQuantizer::search_preassigned (
{ {
FAISS_THROW_IF_NOT (is_trained); FAISS_THROW_IF_NOT (is_trained);
Quantizer *squant = select_quantizer (sq);
ScopeDeleter1<Quantizer> del(squant);
if (metric_type == METRIC_INNER_PRODUCT) { if (metric_type == METRIC_INNER_PRODUCT) {
#pragma omp parallel for #pragma omp parallel
for (size_t i = 0; i < n; i++) { {
search_with_probes_ip (*this, x + i * d, DistanceComputer *dc = sq.get_distance_computer (metric_type);
idx + i * nprobe, dis + i * nprobe, *squant, ScopeDeleter1<DistanceComputer> del(dc);
k, distances + i * k, labels + i * k, #pragma omp for
store_pairs); for (size_t i = 0; i < n; i++) {
search_with_probes_ip (*this, x + i * d,
idx + i * nprobe, dis + i * nprobe, *dc,
k, distances + i * k, labels + i * k,
store_pairs);
}
} }
} else { } else {
#pragma omp parallel for #pragma omp parallel
for (size_t i = 0; i < n; i++) { {
search_with_probes_L2 (*this, x + i * d, DistanceComputer *dc = sq.get_distance_computer (metric_type);
idx + i * nprobe, quantizer, *squant, ScopeDeleter1<DistanceComputer> del(dc);
k, distances + i * k, labels + i * k, #pragma omp for
store_pairs); for (size_t i = 0; i < n; i++) {
search_with_probes_L2 (*this, x + i * d,
idx + i * nprobe, quantizer, *dc,
k, distances + i * k, labels + i * k,
store_pairs);
}
} }
} }
} }
void IndexIVFScalarQuantizer::reconstruct_from_offset (long list_no,
long offset,
float* recons) const
{
std::vector<float> centroid(d);
quantizer->reconstruct (list_no, centroid.data());
const uint8_t* code = &(codes[list_no][offset * code_size]);
sq.decode (code, recons, 1);
for (int i = 0; i < d; ++i) {
recons[i] += centroid[i];
}
}
} }
...@@ -74,7 +74,24 @@ struct ScalarQuantizer { ...@@ -74,7 +74,24 @@ struct ScalarQuantizer {
size_t n) const ; size_t n) const ;
/// decode a vector from a given code (or n vectors if third argument) /// decode a vector from a given code (or n vectors if third argument)
void decode (const uint8_t *code, float *x, size_t n) const; void decode (const uint8_t *code, float *x, size_t n) const;
// fast, non thread-safe way of computing vector-to-code and
// code-to-code distances.
struct DistanceComputer {
/// vector-to-code distance computation
virtual float compute_distance (const float *x,
const uint8_t *code) = 0;
/// code-to-code distance computation
virtual float compute_code_distance (const uint8_t *code1,
const uint8_t *code2) = 0;
virtual ~DistanceComputer () {}
};
DistanceComputer *get_distance_computer (MetricType metric = METRIC_L2)
const;
}; };
...@@ -126,7 +143,7 @@ struct IndexScalarQuantizer: Index { ...@@ -126,7 +143,7 @@ struct IndexScalarQuantizer: Index {
* distances are computed. * distances are computed.
*/ */
struct IndexIVFScalarQuantizer:IndexIVF { struct IndexIVFScalarQuantizer: IndexIVF {
ScalarQuantizer sq; ScalarQuantizer sq;
IndexIVFScalarQuantizer(Index *quantizer, size_t d, size_t nlist, IndexIVFScalarQuantizer(Index *quantizer, size_t d, size_t nlist,
...@@ -145,6 +162,9 @@ struct IndexIVFScalarQuantizer:IndexIVF { ...@@ -145,6 +162,9 @@ struct IndexIVFScalarQuantizer:IndexIVF {
float *distances, idx_t *labels, float *distances, idx_t *labels,
bool store_pairs) const override; bool store_pairs) const override;
void reconstruct_from_offset (long list_no, long offset,
float* recons) const override;
}; };
......
...@@ -29,7 +29,7 @@ LIBOBJ=hamming.o utils.o \ ...@@ -29,7 +29,7 @@ LIBOBJ=hamming.o utils.o \
Clustering.o Heap.o VectorTransform.o index_io.o \ Clustering.o Heap.o VectorTransform.o index_io.o \
PolysemousTraining.o MetaIndexes.o Index.o \ PolysemousTraining.o MetaIndexes.o Index.o \
ProductQuantizer.o AutoTune.o AuxIndexStructures.o \ ProductQuantizer.o AutoTune.o AuxIndexStructures.o \
IndexScalarQuantizer.o FaissException.o IndexScalarQuantizer.o FaissException.o IndexHNSW.o
$(LIBNAME).a: $(LIBOBJ) $(LIBNAME).a: $(LIBOBJ)
...@@ -44,6 +44,7 @@ $(LIBNAME).$(SHAREDEXT): $(LIBOBJ) ...@@ -44,6 +44,7 @@ $(LIBNAME).$(SHAREDEXT): $(LIBOBJ)
utils.o: EXTRAFLAGS=$(BLASCFLAGS) utils.o: EXTRAFLAGS=$(BLASCFLAGS)
VectorTransform.o: EXTRAFLAGS=$(BLASCFLAGS) VectorTransform.o: EXTRAFLAGS=$(BLASCFLAGS)
ProductQuantizer.o: EXTRAFLAGS=$(BLASCFLAGS) ProductQuantizer.o: EXTRAFLAGS=$(BLASCFLAGS)
IndexHNSW.o: EXTRAFLAGS=$(BLASCFLAGS)
# for MKL, the flags when generating a dynamic lib are different from # for MKL, the flags when generating a dynamic lib are different from
# the ones when making an executable, but by default they are the same # the ones when making an executable, but by default they are the same
...@@ -121,7 +122,7 @@ VectorTransform.o: VectorTransform.cpp VectorTransform.h Index.h utils.h \ ...@@ -121,7 +122,7 @@ VectorTransform.o: VectorTransform.cpp VectorTransform.h Index.h utils.h \
index_io.o: index_io.cpp index_io.h FaissAssert.h FaissException.h \ index_io.o: index_io.cpp index_io.h FaissAssert.h FaissException.h \
IndexFlat.h Index.h VectorTransform.h IndexLSH.h IndexPQ.h \ IndexFlat.h Index.h VectorTransform.h IndexLSH.h IndexPQ.h \
ProductQuantizer.h Clustering.h Heap.h PolysemousTraining.h IndexIVF.h \ ProductQuantizer.h Clustering.h Heap.h PolysemousTraining.h IndexIVF.h \
IndexIVFPQ.h MetaIndexes.h IndexScalarQuantizer.h IndexIVFPQ.h MetaIndexes.h IndexScalarQuantizer.h IndexHNSW.h utils.h
PolysemousTraining.o: PolysemousTraining.cpp PolysemousTraining.h \ PolysemousTraining.o: PolysemousTraining.cpp PolysemousTraining.h \
ProductQuantizer.h Clustering.h Index.h Heap.h utils.h hamming.h \ ProductQuantizer.h Clustering.h Index.h Heap.h utils.h hamming.h \
FaissAssert.h FaissException.h FaissAssert.h FaissException.h
...@@ -134,12 +135,16 @@ ProductQuantizer.o: ProductQuantizer.cpp ProductQuantizer.h Clustering.h \ ...@@ -134,12 +135,16 @@ ProductQuantizer.o: ProductQuantizer.cpp ProductQuantizer.h Clustering.h \
AutoTune.o: AutoTune.cpp AutoTune.h Index.h FaissAssert.h \ AutoTune.o: AutoTune.cpp AutoTune.h Index.h FaissAssert.h \
FaissException.h utils.h Heap.h IndexFlat.h VectorTransform.h IndexLSH.h \ FaissException.h utils.h Heap.h IndexFlat.h VectorTransform.h IndexLSH.h \
IndexPQ.h ProductQuantizer.h Clustering.h PolysemousTraining.h \ IndexPQ.h ProductQuantizer.h Clustering.h PolysemousTraining.h \
IndexIVF.h IndexIVFPQ.h MetaIndexes.h IndexScalarQuantizer.h IndexIVF.h IndexIVFPQ.h MetaIndexes.h IndexScalarQuantizer.h IndexHNSW.h
AuxIndexStructures.o: AuxIndexStructures.cpp AuxIndexStructures.h Index.h AuxIndexStructures.o: AuxIndexStructures.cpp AuxIndexStructures.h Index.h
IndexScalarQuantizer.o: IndexScalarQuantizer.cpp IndexScalarQuantizer.h \ IndexScalarQuantizer.o: IndexScalarQuantizer.cpp IndexScalarQuantizer.h \
IndexIVF.h Index.h Clustering.h Heap.h utils.h FaissAssert.h \ IndexIVF.h Index.h Clustering.h Heap.h utils.h FaissAssert.h \
FaissException.h FaissException.h
FaissException.o: FaissException.cpp FaissException.h FaissException.o: FaissException.cpp FaissException.h
IndexHNSW.o: IndexHNSW.cpp IndexHNSW.h IndexFlat.h Index.h IndexPQ.h \
ProductQuantizer.h Clustering.h Heap.h PolysemousTraining.h \
IndexScalarQuantizer.h IndexIVF.h utils.h FaissAssert.h FaissException.h \
IndexIVFPQ.h
clean: clean:
......
...@@ -25,7 +25,7 @@ namespace faiss { ...@@ -25,7 +25,7 @@ namespace faiss {
struct SimulatedAnnealingParameters { struct SimulatedAnnealingParameters {
// optimization parameters // optimization parameters
double init_temperature; // init probability of accepting a bad swap double init_temperature; // init probaility of accepting a bad swap
double temperature_decay; // at each iteration the temp is multiplied by this double temperature_decay; // at each iteration the temp is multiplied by this
int n_iter; // nb of iterations int n_iter; // nb of iterations
int n_redo; // nb of runs of the simulation int n_redo; // nb of runs of the simulation
......
...@@ -355,7 +355,7 @@ void ProductQuantizer::decode (const uint8_t *code, float *x) const ...@@ -355,7 +355,7 @@ void ProductQuantizer::decode (const uint8_t *code, float *x) const
void ProductQuantizer::decode (const uint8_t *code, float *x, size_t n) const void ProductQuantizer::decode (const uint8_t *code, float *x, size_t n) const
{ {
for (size_t i = 0; i < n; i++) { for (size_t i = 0; i < n; i++) {
this->decode (code + M * i, x + d * i); this->decode (code + code_size * i, x + d * i);
} }
} }
......
...@@ -95,11 +95,12 @@ void VectorTransform::reverse_transform ( ...@@ -95,11 +95,12 @@ void VectorTransform::reverse_transform (
/********************************************* /*********************************************
* LinearTransform * LinearTransform
*********************************************/ *********************************************/
/// both d_in > d_out and d_out < d_in are supported /// both d_in > d_out and d_out < d_in are supported
LinearTransform::LinearTransform (int d_in, int d_out, LinearTransform::LinearTransform (int d_in, int d_out,
bool have_bias): bool have_bias):
VectorTransform (d_in, d_out), have_bias (have_bias), VectorTransform (d_in, d_out), have_bias (have_bias),
verbose (false) is_orthonormal (false), verbose (false)
{} {}
void LinearTransform::apply_noalloc (Index::idx_t n, const float * x, void LinearTransform::apply_noalloc (Index::idx_t n, const float * x,
...@@ -156,6 +157,56 @@ void LinearTransform::transform_transpose (idx_t n, const float * y, ...@@ -156,6 +157,56 @@ void LinearTransform::transform_transpose (idx_t n, const float * y,
if (have_bias) delete [] y; if (have_bias) delete [] y;
} }
void LinearTransform::set_is_orthonormal ()
{
if (d_out > d_in) {
// not clear what we should do in this case
is_orthonormal = false;
return;
}
if (d_out == 0) { // borderline case, unnormalized matrix
is_orthonormal = true;
return;
}
double eps = 4e-5;
FAISS_ASSERT(A.size() >= d_out * d_in);
{
std::vector<float> ATA(d_out * d_out);
FINTEGER dii = d_in, doi = d_out;
float one = 1.0, zero = 0.0;
sgemm_ ("Transposed", "Not", &doi, &doi, &dii,
&one, A.data (), &dii,
A.data(), &dii,
&zero, ATA.data(), &doi);
is_orthonormal = true;
for (long i = 0; i < d_out; i++) {
for (long j = 0; j < d_out; j++) {
float v = ATA[i + j * d_out];
if (i == j) v-= 1;
if (fabs(v) > eps) {
is_orthonormal = false;
}
}
}
}
}
void LinearTransform::reverse_transform (idx_t n, const float * xt,
float *x) const
{
if (is_orthonormal) {
transform_transpose (n, xt, x);
} else {
FAISS_THROW_MSG ("reverse transform not implemented for non-orthonormal matrices");
}
}
/********************************************* /*********************************************
* RandomRotationMatrix * RandomRotationMatrix
...@@ -183,13 +234,7 @@ void RandomRotationMatrix::init (int seed) ...@@ -183,13 +234,7 @@ void RandomRotationMatrix::init (int seed)
} }
A.resize(d_in * d_out); A.resize(d_in * d_out);
} }
is_orthonormal = true;
}
void RandomRotationMatrix::reverse_transform (idx_t n, const float * xt,
float *x) const
{
transform_transpose (n, xt, x);
} }
/********************************************* /*********************************************
...@@ -422,12 +467,12 @@ void PCAMatrix::copy_from (const PCAMatrix & other) ...@@ -422,12 +467,12 @@ void PCAMatrix::copy_from (const PCAMatrix & other)
void PCAMatrix::prepare_Ab () void PCAMatrix::prepare_Ab ()
{ {
FAISS_THROW_IF_NOT_FMT (
d_out * d_in <= PCAMat.size(),
"PCA matrix cannot output %d dimensions from %d ",
d_out, d_in);
if (!random_rotation) { if (!random_rotation) {
FAISS_THROW_IF_NOT_MSG (
d_out * d_in <= PCAMat.size(),
"PCA matrix was trained on too few examples "
"to output this number of dimensions");
A = PCAMat; A = PCAMat;
A.resize(d_out * d_in); // strip off useless dimensions A.resize(d_out * d_in); // strip off useless dimensions
...@@ -480,8 +525,8 @@ void PCAMatrix::prepare_Ab () ...@@ -480,8 +525,8 @@ void PCAMatrix::prepare_Ab ()
} else { } else {
FAISS_THROW_IF_NOT_MSG (balanced_bins == 0, FAISS_THROW_IF_NOT_MSG (balanced_bins == 0,
"both balancing bins and applying a random rotation " "both balancing bins and applying a random rotation "
"does not make sense"); "does not make sense");
RandomRotationMatrix rr(d_out, d_out); RandomRotationMatrix rr(d_out, d_out);
rr.init(5); rr.init(5);
...@@ -517,14 +562,8 @@ void PCAMatrix::prepare_Ab () ...@@ -517,14 +562,8 @@ void PCAMatrix::prepare_Ab ()
b[i] = accu; b[i] = accu;
} }
} is_orthonormal = eigen_power == 0;
void PCAMatrix::reverse_transform (idx_t n, const float * xt,
float *x) const
{
FAISS_THROW_IF_NOT_MSG (eigen_power == 0,
"reverse only implemented for orthogonal transforms");
transform_transpose (n, xt, x);
} }
/********************************************* /*********************************************
...@@ -701,15 +740,7 @@ void OPQMatrix::train (Index::idx_t n, const float *x) ...@@ -701,15 +740,7 @@ void OPQMatrix::train (Index::idx_t n, const float *x)
} }
is_trained = true; is_trained = true;
} is_orthonormal = true;
void OPQMatrix::reverse_transform (idx_t n, const float * xt,
float *x) const
{
transform_transpose (n, xt, x);
} }
...@@ -738,6 +769,12 @@ void NormalizationTransform::apply_noalloc ...@@ -738,6 +769,12 @@ void NormalizationTransform::apply_noalloc
} }
} }
void NormalizationTransform::reverse_transform (idx_t n, const float* xt,
float* x) const
{
memcpy (x, xt, sizeof (xt[0]) * n * d_in);
}
/********************************************* /*********************************************
* IndexPreTransform * IndexPreTransform
*********************************************/ *********************************************/
...@@ -810,6 +847,7 @@ void IndexPreTransform::train (idx_t n, const float *x) ...@@ -810,6 +847,7 @@ void IndexPreTransform::train (idx_t n, const float *x)
} }
for (int i = 0; i <= last_untrained; i++) { for (int i = 0; i <= last_untrained; i++) {
if (i < chain.size()) { if (i < chain.size()) {
VectorTransform *ltrans = chain [i]; VectorTransform *ltrans = chain [i];
if (!ltrans->is_trained) { if (!ltrans->is_trained) {
...@@ -835,6 +873,7 @@ void IndexPreTransform::train (idx_t n, const float *x) ...@@ -835,6 +873,7 @@ void IndexPreTransform::train (idx_t n, const float *x)
} }
float * xt = chain[i]->apply (n, prev_x); float * xt = chain[i]->apply (n, prev_x);
if (prev_x != x) delete [] prev_x; if (prev_x != x) delete [] prev_x;
prev_x = xt; prev_x = xt;
del.set(xt); del.set(xt);
...@@ -859,6 +898,20 @@ const float *IndexPreTransform::apply_chain (idx_t n, const float *x) const ...@@ -859,6 +898,20 @@ const float *IndexPreTransform::apply_chain (idx_t n, const float *x) const
return prev_x; return prev_x;
} }
void IndexPreTransform::reverse_chain (idx_t n, const float* xt, float* x) const
{
const float* next_x = xt;
ScopeDeleter<float> del;
for (int i = chain.size() - 1; i >= 0; i--) {
float* prev_x = (i == 0) ? x : new float [n * chain[i]->d_in];
ScopeDeleter<float> del2 ((prev_x == x) ? nullptr : prev_x);
chain [i]->reverse_transform (n, next_x, prev_x);
del2.swap (del);
next_x = prev_x;
}
}
void IndexPreTransform::add (idx_t n, const float *x) void IndexPreTransform::add (idx_t n, const float *x)
{ {
FAISS_THROW_IF_NOT (is_trained); FAISS_THROW_IF_NOT (is_trained);
...@@ -903,24 +956,47 @@ long IndexPreTransform::remove_ids (const IDSelector & sel) { ...@@ -903,24 +956,47 @@ long IndexPreTransform::remove_ids (const IDSelector & sel) {
} }
void IndexPreTransform::reconstruct (idx_t key, float * recons) const
{
float *x = chain.empty() ? recons : new float [index->d];
ScopeDeleter<float> del (recons == x ? nullptr : x);
// Initial reconstruction
index->reconstruct (key, x);
// Revert transformations from last to first
reverse_chain (1, x, recons);
}
void IndexPreTransform::reconstruct_n (idx_t i0, idx_t ni, float *recons) const void IndexPreTransform::reconstruct_n (idx_t i0, idx_t ni, float *recons) const
{ {
float *x = chain.empty() ? recons : new float [ni * index->d]; float *x = chain.empty() ? recons : new float [ni * index->d];
ScopeDeleter<float> del (recons == x ? nullptr : x); ScopeDeleter<float> del (recons == x ? nullptr : x);
// initial reconstruction // Initial reconstruction
index->reconstruct_n (i0, ni, x); index->reconstruct_n (i0, ni, x);
// revert transformations from last to first // Revert transformations from last to first
for (int i = chain.size() - 1; i >= 0; i--) { reverse_chain (ni, x, recons);
float *x_pre = i == 0 ? recons : new float [chain[i]->d_in * ni];
ScopeDeleter<float> del2 (x_pre == recons ? nullptr : x_pre);
chain [i]->reverse_transform (ni, x, x_pre);
del2.swap (del); // delete [] x;
x = x_pre;
}
} }
void IndexPreTransform::search_and_reconstruct (
idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels, float* recons) const
{
FAISS_THROW_IF_NOT (is_trained);
const float* xt = apply_chain (n, x);
ScopeDeleter<float> del ((xt == x) ? nullptr : xt);
float* recons_temp = chain.empty() ? recons : new float [n * k * index->d];
ScopeDeleter<float> del2 ((recons_temp == recons) ? nullptr : recons_temp);
index->search_and_reconstruct (n, xt, k, distances, labels, recons_temp);
// Revert transformations from last to first
reverse_chain (n * k, recons_temp, recons);
}
/********************************************* /*********************************************
* RemapDimensionsTransform * RemapDimensionsTransform
......
...@@ -37,7 +37,7 @@ struct VectorTransform { ...@@ -37,7 +37,7 @@ struct VectorTransform {
{} {}
/// set if the LinearTransform does not require training, or if /// set if the VectorTransform does not require training, or if
/// training is done already /// training is done already
bool is_trained; bool is_trained;
...@@ -78,6 +78,9 @@ struct LinearTransform: VectorTransform { ...@@ -78,6 +78,9 @@ struct LinearTransform: VectorTransform {
bool have_bias; ///! whether to use the bias term bool have_bias; ///! whether to use the bias term
/// check if matrix A is orthonormal (enables reverse_transform)
bool is_orthonormal;
/// Transformation matrix, size d_out * d_in /// Transformation matrix, size d_out * d_in
std::vector<float> A; std::vector<float> A;
...@@ -96,6 +99,13 @@ struct LinearTransform: VectorTransform { ...@@ -96,6 +99,13 @@ struct LinearTransform: VectorTransform {
void transform_transpose (idx_t n, const float * y, void transform_transpose (idx_t n, const float * y,
float *x) const; float *x) const;
/// works only if is_orthonormal
void reverse_transform (idx_t n, const float * xt,
float *x) const override;
/// compute A^T * A to set the is_orthonormal flag
void set_is_orthonormal ();
bool verbose; bool verbose;
~LinearTransform() override {} ~LinearTransform() override {}
...@@ -113,8 +123,6 @@ struct RandomRotationMatrix: LinearTransform { ...@@ -113,8 +123,6 @@ struct RandomRotationMatrix: LinearTransform {
/// must be called before the transform is used /// must be called before the transform is used
void init(int seed); void init(int seed);
void reverse_transform(idx_t n, const float* xt, float* x) const override;
RandomRotationMatrix () {} RandomRotationMatrix () {}
}; };
...@@ -157,8 +165,6 @@ struct PCAMatrix: LinearTransform { ...@@ -157,8 +165,6 @@ struct PCAMatrix: LinearTransform {
/// will be completed with 0s /// will be completed with 0s
void train(Index::idx_t n, const float* x) override; void train(Index::idx_t n, const float* x) override;
void reverse_transform(idx_t n, const float* xt, float* x) const override;
/// copy pre-trained PCA matrix /// copy pre-trained PCA matrix
void copy_from (const PCAMatrix & other); void copy_from (const PCAMatrix & other);
...@@ -192,8 +198,6 @@ struct OPQMatrix: LinearTransform { ...@@ -192,8 +198,6 @@ struct OPQMatrix: LinearTransform {
explicit OPQMatrix (int d = 0, int M = 1, int d2 = -1); explicit OPQMatrix (int d = 0, int M = 1, int d2 = -1);
void train(Index::idx_t n, const float* x) override; void train(Index::idx_t n, const float* x) override;
void reverse_transform(idx_t n, const float* xt, float* x) const override;
}; };
...@@ -230,6 +234,9 @@ struct NormalizationTransform: VectorTransform { ...@@ -230,6 +234,9 @@ struct NormalizationTransform: VectorTransform {
NormalizationTransform (); NormalizationTransform ();
void apply_noalloc(idx_t n, const float* x, float* xt) const override; void apply_noalloc(idx_t n, const float* x, float* xt) const override;
/// Identity transform since norm is not revertible
void reverse_transform(idx_t n, const float* xt, float* x) const override;
}; };
...@@ -271,13 +278,23 @@ struct IndexPreTransform: Index { ...@@ -271,13 +278,23 @@ struct IndexPreTransform: Index {
float* distances, float* distances,
idx_t* labels) const override; idx_t* labels) const override;
void reconstruct (idx_t key, float * recons) const override;
void reconstruct_n (idx_t i0, idx_t ni, float *recons) void reconstruct_n (idx_t i0, idx_t ni, float *recons)
const override; const override;
void search_and_reconstruct (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels,
float *recons) const override;
/// apply the transforms in the chain. The returned float * may be /// apply the transforms in the chain. The returned float * may be
/// equal to x, otherwise it should be deallocated. /// equal to x, otherwise it should be deallocated.
const float * apply_chain (idx_t n, const float *x) const; const float * apply_chain (idx_t n, const float *x) const;
/// Reverse the transforms in the chain. May not be implemented for
/// all transforms in the chain or may return approximate results.
void reverse_chain (idx_t n, const float* xt, float* x) const;
~IndexPreTransform() override; ~IndexPreTransform() override;
}; };
......
# Copyright (c) 2015-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the BSD+Patents license found in the
# LICENSE file in the root directory of this source tree.
#!/usr/bin/env python2
import time
import sys
import numpy as np
import faiss
#################################################################
# Small I/O functions
#################################################################
def ivecs_read(fname):
a = np.fromfile(fname, dtype='int32')
d = a[0]
return a.reshape(-1, d + 1)[:, 1:].copy()
def fvecs_read(fname):
return ivecs_read(fname).view('float32')
#################################################################
# Main program
#################################################################
print "load data"
xt = fvecs_read("sift1M/sift_learn.fvecs")
xb = fvecs_read("sift1M/sift_base.fvecs")
xq = fvecs_read("sift1M/sift_query.fvecs")
nq, d = xq.shape
print "load GT"
gt = ivecs_read("sift1M/sift_groundtruth.ivecs")
todo = sys.argv[1:]
if todo == []:
todo = 'hnsw hnsw_sq ivf ivf_hnsw_quantizer kmeans kmeans_hnsw'.split()
def evaluate(index):
# for timing with a single core
# faiss.omp_set_num_threads(1)
t0 = time.time()
D, I = index.search(xq, 1)
t1 = time.time()
recall_at_1 = (I == gt[:, :1]).sum() / float(nq)
print "\t %7.3f ms per query, R@1 %.4f" % (
(t1 - t0) * 1000.0 / nq, recall_at_1)
if 'hnsw' in todo:
print "Testing HNSW Flat"
index = faiss.IndexHNSWFlat(d, 32)
# training is not needed
# this is the default, higher is more accurate and slower to
# construct
index.hnsw.efConstruction = 40
print "add"
# to see progress
index.verbose = True
index.add(xb)
print "search"
for efSearch in 16, 32, 64, 128, 256:
print "efSearch", efSearch,
index.hnsw.efSearch = efSearch
evaluate(index)
if 'hnsw_sq' in todo:
print "Testing HNSW with a scalar quantizer"
# also set M so that the vectors and links both use 128 bytes per
# entry (total 256 bytes)
index = faiss.IndexHNSWSQ(d, faiss.ScalarQuantizer.QT_8bit, 16)
print "training"
# training for the scalar quantizer
index.train(xt)
# this is the default, higher is more accurate and slower to
# construct
index.hnsw.efConstruction = 40
print "add"
# to see progress
index.verbose = True
index.add(xb)
print "search"
for efSearch in 16, 32, 64, 128, 256:
print "efSearch", efSearch,
index.hnsw.efSearch = efSearch
evaluate(index)
if 'ivf' in todo:
print "Testing IVF Flat (baseline)"
quantizer = faiss.IndexFlatL2(d)
index = faiss.IndexIVFFlat(quantizer, d, 16384)
index.cp.min_points_per_centroid = 5 # quiet warning
# to see progress
index.verbose = True
print "training"
index.train(xt)
print "add"
index.add(xb)
print "search"
for nprobe in 1, 4, 16, 64, 256:
print "nprobe", nprobe,
index.nprobe = nprobe
evaluate(index)
if 'ivf_hnsw_quantizer' in todo:
print "Testing IVF Flat with HNSW quantizer"
quantizer = faiss.IndexHNSWFlat(d, 32)
index = faiss.IndexIVFFlat(quantizer, d, 16384)
index.cp.min_points_per_centroid = 5 # quiet warning
index.quantizer_trains_alone = 2
# to see progress
index.verbose = True
print "training"
index.train(xt)
print "add"
index.add(xb)
print "search"
quantizer.hnsw.efSearch = 64
for nprobe in 1, 4, 16, 64, 256:
print "nprobe", nprobe,
index.nprobe = nprobe
evaluate(index)
# Bonus: 2 kmeans tests
if 'kmeans' in todo:
print "Performing kmeans on sift1M database vectors (baseline)"
clus = faiss.Clustering(d, 16384)
clus.verbose = True
clus.niter = 10
index = faiss.IndexFlatL2(d)
clus.train(xb, index)
if 'kmeans_hnsw' in todo:
print "Performing kmeans on sift1M using HNSW assignment"
clus = faiss.Clustering(d, 16384)
clus.verbose = True
clus.niter = 10
index = faiss.IndexHNSWFlat(d, 32)
# increase the default efSearch, otherwise the number of empty
# clusters is too high.
index.hnsw.efSearch = 128
clus.train(xb, index)
...@@ -427,7 +427,7 @@ def replacement_map_add(self, keys, vals): ...@@ -427,7 +427,7 @@ def replacement_map_add(self, keys, vals):
def replacement_map_search_multiple(self, keys): def replacement_map_search_multiple(self, keys):
n, = keys.shape n, = keys.shape
vals = np.empty(n, dtype='uint64') vals = np.empty(n, dtype='int64')
self.search_multiple_c(n, swig_ptr(keys), swig_ptr(vals)) self.search_multiple_c(n, swig_ptr(keys), swig_ptr(vals))
return vals return vals
......
...@@ -150,6 +150,8 @@ dep: ...@@ -150,6 +150,8 @@ dep:
../VectorTransform.h ../MetaIndexes.h GpuIndexFlat.h GpuIndexIVFFlat.h \ ../VectorTransform.h ../MetaIndexes.h GpuIndexFlat.h GpuIndexIVFFlat.h \
GpuIndexIVF.h ../Clustering.h GpuIndexIVFPQ.h IndexProxy.h \ GpuIndexIVF.h ../Clustering.h GpuIndexIVFPQ.h IndexProxy.h \
utils/WorkerThread.h utils/WorkerThread.h
./GpuClonerOptions.o: GpuClonerOptions.cpp GpuClonerOptions.h \
GpuIndicesOptions.h
impl/RemapIndices.o: impl/RemapIndices.cpp impl/RemapIndices.h \ impl/RemapIndices.o: impl/RemapIndices.cpp impl/RemapIndices.h \
impl/../../FaissAssert.h impl/../../FaissException.h impl/../../FaissAssert.h impl/../../FaissException.h
utils/DeviceMemory.o: utils/DeviceMemory.cpp utils/DeviceMemory.h \ utils/DeviceMemory.o: utils/DeviceMemory.cpp utils/DeviceMemory.h \
......
...@@ -111,4 +111,4 @@ class EvalIVFPQAccuracy(unittest.TestCase): ...@@ -111,4 +111,4 @@ class EvalIVFPQAccuracy(unittest.TestCase):
index = faiss.index_factory(12, "PCAR8,IVF10,PQ4") index = faiss.index_factory(12, "PCAR8,IVF10,PQ4")
res = faiss.StandardGpuResources() res = faiss.StandardGpuResources()
gpu_index = faiss.index_cpu_to_gpu(res, 0, index) gpu_index = faiss.index_cpu_to_gpu(res, 0, index)
faiss.GpuParameterSpace().set_index_parameter(index, "nprobe", 3) faiss.GpuParameterSpace().set_index_parameter(gpu_index, "nprobe", 3)
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include "IndexIVFPQ.h" #include "IndexIVFPQ.h"
#include "MetaIndexes.h" #include "MetaIndexes.h"
#include "IndexScalarQuantizer.h" #include "IndexScalarQuantizer.h"
#include "IndexHNSW.h"
/************************************************************* /*************************************************************
* The I/O format is the content of the class. For objects that are * The I/O format is the content of the class. For objects that are
...@@ -153,8 +154,6 @@ static void write_index_header (const Index *idx, FILE *f) { ...@@ -153,8 +154,6 @@ static void write_index_header (const Index *idx, FILE *f) {
WRITE1 (idx->metric_type); WRITE1 (idx->metric_type);
} }
void write_VectorTransform (const VectorTransform *vt, FILE *f) { void write_VectorTransform (const VectorTransform *vt, FILE *f) {
if (const LinearTransform * lt = if (const LinearTransform * lt =
dynamic_cast < const LinearTransform *> (vt)) { dynamic_cast < const LinearTransform *> (vt)) {
...@@ -221,6 +220,21 @@ void write_ProductQuantizer (const ProductQuantizer*pq, const char *fname) { ...@@ -221,6 +220,21 @@ void write_ProductQuantizer (const ProductQuantizer*pq, const char *fname) {
write_ProductQuantizer (pq, f); write_ProductQuantizer (pq, f);
} }
static void write_HNSW (const HNSW *hnsw, FILE *f) {
WRITEVECTOR (hnsw->assign_probas);
WRITEVECTOR (hnsw->cum_nneighbor_per_level);
WRITEVECTOR (hnsw->levels);
WRITEVECTOR (hnsw->offsets);
WRITEVECTOR (hnsw->neighbors);
WRITE1 (hnsw->entry_point);
WRITE1 (hnsw->max_level);
WRITE1 (hnsw->efConstruction);
WRITE1 (hnsw->efSearch);
WRITE1 (hnsw->upper_beam);
}
static void write_ivf_header (const IndexIVF * ivf, FILE *f, static void write_ivf_header (const IndexIVF * ivf, FILE *f,
bool include_ids = true) { bool include_ids = true) {
...@@ -265,6 +279,19 @@ void write_index (const Index *idx, FILE *f) { ...@@ -265,6 +279,19 @@ void write_index (const Index *idx, FILE *f) {
WRITE1 (idxp->search_type); WRITE1 (idxp->search_type);
WRITE1 (idxp->encode_signs); WRITE1 (idxp->encode_signs);
WRITE1 (idxp->polysemous_ht); WRITE1 (idxp->polysemous_ht);
} else if(const Index2Layer * idxp =
dynamic_cast<const Index2Layer *> (idx)) {
uint32_t h = fourcc ("Ix2L");
WRITE1 (h);
write_index_header (idx, f);
write_index (idxp->q1.quantizer, f);
WRITE1 (idxp->q1.nlist);
WRITE1 (idxp->q1.quantizer_trains_alone);
write_ProductQuantizer (&idxp->pq, f);
WRITE1 (idxp->code_size_1);
WRITE1 (idxp->code_size_2);
WRITE1 (idxp->code_size);
WRITEVECTOR (idxp->codes);
} else if(const IndexScalarQuantizer * idxs = } else if(const IndexScalarQuantizer * idxs =
dynamic_cast<const IndexScalarQuantizer *> (idx)) { dynamic_cast<const IndexScalarQuantizer *> (idx)) {
uint32_t h = fourcc ("IxSQ"); uint32_t h = fourcc ("IxSQ");
...@@ -348,6 +375,19 @@ void write_index (const Index *idx, FILE *f) { ...@@ -348,6 +375,19 @@ void write_index (const Index *idx, FILE *f) {
write_index_header (idxmap, f); write_index_header (idxmap, f);
write_index (idxmap->index, f); write_index (idxmap->index, f);
WRITEVECTOR (idxmap->id_map); WRITEVECTOR (idxmap->id_map);
} else if(const IndexHNSW * idxhnsw =
dynamic_cast<const IndexHNSW *> (idx)) {
uint32_t h =
dynamic_cast<const IndexHNSWFlat*>(idx) ? fourcc("IHNf") :
dynamic_cast<const IndexHNSWPQ*>(idx) ? fourcc("IHNp") :
dynamic_cast<const IndexHNSWSQ*>(idx) ? fourcc("IHNs") :
dynamic_cast<const IndexHNSW2Level*>(idx) ? fourcc("IHN2") :
0;
FAISS_THROW_IF_NOT (h != 0);
WRITE1 (h);
write_index_header (idxhnsw, f);
write_HNSW (&idxhnsw->hnsw, f);
write_index (idxhnsw->storage, f);
} else { } else {
FAISS_THROW_MSG ("don't know how to serialize this type of index"); FAISS_THROW_MSG ("don't know how to serialize this type of index");
} }
...@@ -409,6 +449,9 @@ VectorTransform* read_VectorTransform (FILE *f) { ...@@ -409,6 +449,9 @@ VectorTransform* read_VectorTransform (FILE *f) {
READ1 (lt->have_bias); READ1 (lt->have_bias);
READVECTOR (lt->A); READVECTOR (lt->A);
READVECTOR (lt->b); READVECTOR (lt->b);
FAISS_THROW_IF_NOT (lt->A.size() >= lt->d_in * lt->d_out);
FAISS_THROW_IF_NOT (!lt->have_bias || lt->b.size() >= lt->d_out);
lt->set_is_orthonormal();
vt = lt; vt = lt;
} else if (h == fourcc ("RmDT")) { } else if (h == fourcc ("RmDT")) {
RemapDimensionsTransform *rdt = new RemapDimensionsTransform (); RemapDimensionsTransform *rdt = new RemapDimensionsTransform ();
...@@ -444,6 +487,19 @@ static void read_ScalarQuantizer (ScalarQuantizer *ivsc, FILE *f) { ...@@ -444,6 +487,19 @@ static void read_ScalarQuantizer (ScalarQuantizer *ivsc, FILE *f) {
READVECTOR (ivsc->trained); READVECTOR (ivsc->trained);
} }
static void read_HNSW (HNSW *hnsw, FILE *f) {
READVECTOR (hnsw->assign_probas);
READVECTOR (hnsw->cum_nneighbor_per_level);
READVECTOR (hnsw->levels);
READVECTOR (hnsw->offsets);
READVECTOR (hnsw->neighbors);
READ1 (hnsw->entry_point);
READ1 (hnsw->max_level);
READ1 (hnsw->efConstruction);
READ1 (hnsw->efSearch);
READ1 (hnsw->upper_beam);
}
ProductQuantizer * read_ProductQuantizer (const char*fname) { ProductQuantizer * read_ProductQuantizer (const char*fname) {
FILE *f = fopen (fname, "r"); FILE *f = fopen (fname, "r");
...@@ -675,6 +731,33 @@ Index *read_index (FILE * f, bool try_mmap) { ...@@ -675,6 +731,33 @@ Index *read_index (FILE * f, bool try_mmap) {
static_cast<IndexIDMap2*>(idxmap)->construct_rev_map (); static_cast<IndexIDMap2*>(idxmap)->construct_rev_map ();
} }
idx = idxmap; idx = idxmap;
} else if (h == fourcc ("Ix2L")) {
Index2Layer * idxp = new Index2Layer ();
read_index_header (idxp, f);
idxp->q1.quantizer = read_index (f);
READ1 (idxp->q1.nlist);
READ1 (idxp->q1.quantizer_trains_alone);
read_ProductQuantizer (&idxp->pq, f);
READ1 (idxp->code_size_1);
READ1 (idxp->code_size_2);
READ1 (idxp->code_size);
READVECTOR (idxp->codes);
idx = idxp;
} else if(h == fourcc("IHNf") || h == fourcc("IHNp") ||
h == fourcc("IHNs") || h == fourcc("IHN2")) {
IndexHNSW *idxhnsw = nullptr;
if (h == fourcc("IHNf")) idxhnsw = new IndexHNSWFlat ();
if (h == fourcc("IHNp")) idxhnsw = new IndexHNSWPQ ();
if (h == fourcc("IHNs")) idxhnsw = new IndexHNSWSQ ();
if (h == fourcc("IHN2")) idxhnsw = new IndexHNSW2Level ();
read_index_header (idxhnsw, f);
read_HNSW (&idxhnsw->hnsw, f);
idxhnsw->storage = read_index (f);
idxhnsw->own_fields = true;
if (h == fourcc("IHNp")) {
dynamic_cast<IndexPQ*>(idxhnsw->storage)->pq.compute_sdc_table ();
}
idx = idxhnsw;
} else { } else {
FAISS_THROW_FMT("Index type 0x%08x not supported\n", h); FAISS_THROW_FMT("Index type 0x%08x not supported\n", h);
idx = nullptr; idx = nullptr;
...@@ -771,6 +854,12 @@ Index *Cloner::clone_Index (const Index *index) ...@@ -771,6 +854,12 @@ Index *Cloner::clone_Index (const Index *index)
res->chain.push_back (clone_VectorTransform (ipt->chain[i])); res->chain.push_back (clone_VectorTransform (ipt->chain[i]));
res->own_fields = true; res->own_fields = true;
return res; return res;
} else if (const IndexIDMap *idmap =
dynamic_cast<const IndexIDMap*> (index)) {
IndexIDMap *res = new IndexIDMap (*idmap);
res->own_fields = true;
res->index = clone_Index (idmap->index);
return res;
} else { } else {
FAISS_THROW_MSG( "clone not supported for this type of Index"); FAISS_THROW_MSG( "clone not supported for this type of Index");
} }
......
...@@ -27,7 +27,8 @@ ...@@ -27,7 +27,8 @@
typedef unsigned long uint64_t; typedef unsigned long uint64_t;
typedef uint64_t size_t; typedef uint64_t size_t;
typedef int int32_t ; typedef int int32_t;
typedef unsigned char uint8_t;
#define __restrict #define __restrict
...@@ -77,6 +78,7 @@ extern "C" { ...@@ -77,6 +78,7 @@ extern "C" {
#include "IndexIVF.h" #include "IndexIVF.h"
#include "IndexIVFPQ.h" #include "IndexIVFPQ.h"
#include "IndexScalarQuantizer.h" #include "IndexScalarQuantizer.h"
#include "IndexHNSW.h"
#include "MetaIndexes.h" #include "MetaIndexes.h"
#include "FaissAssert.h" #include "FaissAssert.h"
...@@ -247,6 +249,7 @@ int get_num_gpus() ...@@ -247,6 +249,7 @@ int get_num_gpus()
%include "IndexPQ.h" %include "IndexPQ.h"
%include "IndexIVF.h" %include "IndexIVF.h"
%include "IndexScalarQuantizer.h" %include "IndexScalarQuantizer.h"
%include "IndexHNSW.h"
%ignore faiss::IndexIVFPQ::alloc_type; %ignore faiss::IndexIVFPQ::alloc_type;
%include "IndexIVFPQ.h" %include "IndexIVFPQ.h"
...@@ -431,6 +434,11 @@ struct AsyncIndexSearchC { ...@@ -431,6 +434,11 @@ struct AsyncIndexSearchC {
DOWNCAST ( IndexLSH ) DOWNCAST ( IndexLSH )
DOWNCAST ( IndexPreTransform ) DOWNCAST ( IndexPreTransform )
DOWNCAST ( MultiIndexQuantizer ) DOWNCAST ( MultiIndexQuantizer )
DOWNCAST ( IndexHNSWFlat )
DOWNCAST ( IndexHNSWPQ )
DOWNCAST ( IndexHNSWSQ )
DOWNCAST ( IndexHNSW2Level )
DOWNCAST ( Index2Layer )
#ifdef GPU_WRAPPER #ifdef GPU_WRAPPER
DOWNCAST_GPU ( IndexProxy ) DOWNCAST_GPU ( IndexProxy )
DOWNCAST_GPU ( GpuIndexIVFPQ ) DOWNCAST_GPU ( GpuIndexIVFPQ )
...@@ -537,11 +545,14 @@ PyObject *swig_ptr (PyObject *a) ...@@ -537,11 +545,14 @@ PyObject *swig_ptr (PyObject *a)
if(PyArray_TYPE(ao) == NPY_FLOAT32) { if(PyArray_TYPE(ao) == NPY_FLOAT32) {
return SWIG_NewPointerObj(data, SWIGTYPE_p_float, 0); return SWIG_NewPointerObj(data, SWIGTYPE_p_float, 0);
} }
if(PyArray_TYPE(ao) == NPY_FLOAT64) {
return SWIG_NewPointerObj(data, SWIGTYPE_p_double, 0);
}
if(PyArray_TYPE(ao) == NPY_INT32) { if(PyArray_TYPE(ao) == NPY_INT32) {
return SWIG_NewPointerObj(data, SWIGTYPE_p_int, 0); return SWIG_NewPointerObj(data, SWIGTYPE_p_int, 0);
} }
if(PyArray_TYPE(ao) == NPY_UINT8) { if(PyArray_TYPE(ao) == NPY_UINT8) {
return SWIG_NewPointerObj(data, SWIGTYPE_p_uint8_t, 0); return SWIG_NewPointerObj(data, SWIGTYPE_p_unsigned_char, 0);
} }
if(PyArray_TYPE(ao) == NPY_UINT64) { if(PyArray_TYPE(ao) == NPY_UINT64) {
return SWIG_NewPointerObj(data, SWIGTYPE_p_unsigned_long, 0); return SWIG_NewPointerObj(data, SWIGTYPE_p_unsigned_long, 0);
......
...@@ -9,7 +9,6 @@ ...@@ -9,7 +9,6 @@
import os import os
import time import time
import numpy as np import numpy as np
import pdb
try: try:
import matplotlib import matplotlib
......
...@@ -92,7 +92,7 @@ class TestProductQuantizer(unittest.TestCase): ...@@ -92,7 +92,7 @@ class TestProductQuantizer(unittest.TestCase):
def test_pq(self): def test_pq(self):
d = 64 d = 64
n = 1000 n = 2000
cs = 4 cs = 4
np.random.seed(123) np.random.seed(123)
x = np.random.random(size=(n, d)).astype('float32') x = np.random.random(size=(n, d)).astype('float32')
...@@ -103,8 +103,20 @@ class TestProductQuantizer(unittest.TestCase): ...@@ -103,8 +103,20 @@ class TestProductQuantizer(unittest.TestCase):
diff = ((x - x2)**2).sum() diff = ((x - x2)**2).sum()
# print "diff=", diff # print "diff=", diff
# diff= 1807.98 # diff= 4418.0562
self.assertGreater(2500, diff) self.assertGreater(5000, diff)
pq10 = faiss.ProductQuantizer(d, cs, 10)
assert pq10.code_size == cs * 2
pq10.verbose = True
pq10.cp.verbose = True
pq10.train(x)
codes = pq10.compute_codes(x)
x10 = pq10.decode(codes)
diff10 = ((x - x10)**2).sum()
self.assertGreater(diff, diff10)
class TestRevSwigPtr(unittest.TestCase): class TestRevSwigPtr(unittest.TestCase):
...@@ -132,7 +144,7 @@ class TestException(unittest.TestCase): ...@@ -132,7 +144,7 @@ class TestException(unittest.TestCase):
try: try:
# an unsupported operation for IndexFlat # an unsupported operation for IndexFlat
index.add_with_ids(a, b) index.add_with_ids(a, b)
except RuntimeError as e: except RuntimeError, e:
assert 'add_with_ids not implemented' in str(e) assert 'add_with_ids not implemented' in str(e)
else: else:
assert False, 'exception did not fire???' assert False, 'exception did not fire???'
...@@ -141,14 +153,14 @@ class TestException(unittest.TestCase): ...@@ -141,14 +153,14 @@ class TestException(unittest.TestCase):
try: try:
faiss.index_factory(12, 'IVF256,Flat,PQ8') faiss.index_factory(12, 'IVF256,Flat,PQ8')
except RuntimeError as e: except RuntimeError, e:
assert 'could not parse' in str(e) assert 'could not parse' in str(e)
else: else:
assert False, 'exception did not fire???' assert False, 'exception did not fire???'
class TestMapLong2Long: class TestMapLong2Long(unittest.TestCase):
def test_do_it(self): def test_maplong2long(self):
keys = np.array([13, 45, 67]) keys = np.array([13, 45, 67])
vals = np.array([3, 8, 2]) vals = np.array([3, 8, 2])
...@@ -160,5 +172,46 @@ class TestMapLong2Long: ...@@ -160,5 +172,46 @@ class TestMapLong2Long:
assert m.search(12343) == -1 assert m.search(12343) == -1
class TestOrthognalReconstruct(unittest.TestCase):
def test_recons_orthonormal(self):
lt = faiss.LinearTransform(20, 10, True)
rs = np.random.RandomState(10)
A, _ = np.linalg.qr(rs.randn(20, 20))
A = A[:10].astype('float32')
faiss.copy_array_to_vector(A.ravel(), lt.A)
faiss.copy_array_to_vector(rs.randn(10).astype('float32'), lt.b)
lt.set_is_orthonormal()
assert lt.is_orthonormal
x = rs.rand(30, 20).astype('float32')
xt = lt.apply_py(x)
xtt = lt.reverse_transform(xt)
xttt = lt.apply_py(xtt)
err = ((xt - xttt)**2).sum()
self.assertGreater(1e-5, err)
def test_recons_orthogona_impossible(self):
lt = faiss.LinearTransform(20, 10, True)
rs = np.random.RandomState(10)
A = rs.randn(10 * 20).astype('float32')
faiss.copy_array_to_vector(A.ravel(), lt.A)
faiss.copy_array_to_vector(rs.randn(10).astype('float32'), lt.b)
lt.set_is_orthonormal()
assert not lt.is_orthonormal
x = rs.rand(30, 20).astype('float32')
xt = lt.apply_py(x)
try:
xtt = lt.reverse_transform(xt)
except Exception:
pass
else:
self.assertFalse('should do an exception')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -6,7 +6,6 @@ ...@@ -6,7 +6,6 @@
#! /usr/bin/env python2 #! /usr/bin/env python2
import numpy as np
import unittest import unittest
import faiss import faiss
......
...@@ -11,6 +11,8 @@ ...@@ -11,6 +11,8 @@
import numpy as np import numpy as np
import unittest import unittest
import faiss import faiss
import tempfile
import os
def get_dataset(d, nb, nt, nq): def get_dataset(d, nb, nt, nq):
...@@ -56,6 +58,7 @@ class EvalIVFPQAccuracy(unittest.TestCase): ...@@ -56,6 +58,7 @@ class EvalIVFPQAccuracy(unittest.TestCase):
coarse_quantizer = faiss.IndexFlatL2(d) coarse_quantizer = faiss.IndexFlatL2(d)
index = faiss.IndexIVFPQ(coarse_quantizer, d, 32, 8, 8) index = faiss.IndexIVFPQ(coarse_quantizer, d, 32, 8, 8)
index.cp.min_points_per_centroid = 5 # quiet warning
index.train(xt) index.train(xt)
index.add(xb) index.add(xb)
index.nprobe = 4 index.nprobe = 4
...@@ -65,6 +68,23 @@ class EvalIVFPQAccuracy(unittest.TestCase): ...@@ -65,6 +68,23 @@ class EvalIVFPQAccuracy(unittest.TestCase):
self.assertGreater(n_ok, nq * 0.66) self.assertGreater(n_ok, nq * 0.66)
# check that and Index2Layer gives the same reconstruction
# this is a bit fragile: it assumes 2 runs of training give
# the exact same result.
index2 = faiss.Index2Layer(coarse_quantizer, 32, 8)
if True:
index2.train(xt)
else:
index2.pq = index.pq
index2.is_trained = True
index2.add(xb)
ref_recons = index.reconstruct_n(0, nb)
new_recons = index2.reconstruct_n(0, nb)
self.assertTrue(np.all(ref_recons == new_recons))
class TestMultiIndexQuantizer(unittest.TestCase): class TestMultiIndexQuantizer(unittest.TestCase):
...@@ -114,6 +134,7 @@ class TestScalarQuantizer(unittest.TestCase): ...@@ -114,6 +134,7 @@ class TestScalarQuantizer(unittest.TestCase):
index = faiss.IndexIVFFlat(quantizer, d, ncent, index = faiss.IndexIVFFlat(quantizer, d, ncent,
faiss.METRIC_L2) faiss.METRIC_L2)
index.cp.min_points_per_centroid = 5 # quiet warning
index.nprobe = 4 index.nprobe = 4
index.train(xt) index.train(xt)
index.add(xb) index.add(xb)
...@@ -201,5 +222,175 @@ class TestRangeSearch(unittest.TestCase): ...@@ -201,5 +222,175 @@ class TestRangeSearch(unittest.TestCase):
self.assertGreaterEqual(1e-4, abs(Dline[idx] - dis)) self.assertGreaterEqual(1e-4, abs(Dline[idx] - dis))
class TestSearchAndReconstruct(unittest.TestCase):
def run_search_and_reconstruct(self, index, xb, xq, k=10, eps=None):
n, d = xb.shape
assert xq.shape[1] == d
assert index.d == d
D_ref, I_ref = index.search(xq, k)
R_ref = index.reconstruct_n(0, n)
D, I, R = index.search_and_reconstruct(xq, k)
self.assertTrue((D == D_ref).all())
self.assertTrue((I == I_ref).all())
self.assertEqual(R.shape[:2], I.shape)
self.assertEqual(R.shape[2], d)
# (n, k, ..) -> (n * k, ..)
I_flat = I.reshape(-1)
R_flat = R.reshape(-1, d)
# Filter out -1s when not enough results
R_flat = R_flat[I_flat >= 0]
I_flat = I_flat[I_flat >= 0]
recons_ref_err = np.mean(np.linalg.norm(R_flat - R_ref[I_flat]))
self.assertLessEqual(recons_ref_err, 1e-6)
def norm1(x):
return np.sqrt((x ** 2).sum(axis=1))
recons_err = np.mean(norm1(R_flat - xb[I_flat]))
print('Reconstruction error = %.3f' % recons_err)
if eps is not None:
self.assertLessEqual(recons_err, eps)
return D, I, R
def test_IndexFlat(self):
d = 32
nb = 1000
nt = 1500
nq = 200
(xt, xb, xq) = get_dataset(d, nb, nt, nq)
index = faiss.IndexFlatL2(d)
index.add(xb)
self.run_search_and_reconstruct(index, xb, xq, eps=0.0)
def test_IndexIVFFlat(self):
d = 32
nb = 1000
nt = 1500
nq = 200
(xt, xb, xq) = get_dataset(d, nb, nt, nq)
quantizer = faiss.IndexFlatL2(d)
index = faiss.IndexIVFFlat(quantizer, d, 32, faiss.METRIC_L2)
index.cp.min_points_per_centroid = 5 # quiet warning
index.nprobe = 4
index.train(xt)
index.add(xb)
self.run_search_and_reconstruct(index, xb, xq, eps=0.0)
def test_IndexIVFPQ(self):
d = 32
nb = 1000
nt = 1500
nq = 200
(xt, xb, xq) = get_dataset(d, nb, nt, nq)
quantizer = faiss.IndexFlatL2(d)
index = faiss.IndexIVFPQ(quantizer, d, 32, 8, 8)
index.cp.min_points_per_centroid = 5 # quiet warning
index.nprobe = 4
index.train(xt)
index.add(xb)
self.run_search_and_reconstruct(index, xb, xq, eps=1.0)
def test_MultiIndex(self):
d = 32
nb = 1000
nt = 1500
nq = 200
(xt, xb, xq) = get_dataset(d, nb, nt, nq)
index = faiss.index_factory(d, "IMI2x5,PQ8np")
faiss.ParameterSpace().set_index_parameter(index, "nprobe", 4)
index.train(xt)
index.add(xb)
self.run_search_and_reconstruct(index, xb, xq, eps=1.0)
def test_IndexTransform(self):
d = 32
nb = 1000
nt = 1500
nq = 200
(xt, xb, xq) = get_dataset(d, nb, nt, nq)
index = faiss.index_factory(d, "L2norm,PCA8,IVF32,PQ8np")
faiss.ParameterSpace().set_index_parameter(index, "nprobe", 4)
index.train(xt)
index.add(xb)
self.run_search_and_reconstruct(index, xb, xq)
class TestHNSW(unittest.TestCase):
def __init__(self, *args, **kwargs):
unittest.TestCase.__init__(self, *args, **kwargs)
d = 32
nt = 0
nb = 1500
nq = 500
(_, self.xb, self.xq) = get_dataset_2(d, nb, nt, nq)
index = faiss.IndexFlatL2(d)
index.add(self.xb)
Dref, Iref = index.search(self.xq, 1)
self.Iref = Iref
def test_hnsw(self):
d = self.xq.shape[1]
index = faiss.IndexHNSWFlat(d, 16)
index.add(self.xb)
Dhnsw, Ihnsw = index.search(self.xq, 1)
self.assertGreaterEqual((self.Iref == Ihnsw).sum(), 460)
self.io_and_retest(index, Dhnsw, Ihnsw)
def io_and_retest(self, index, Dhnsw, Ihnsw):
_, tmpfile = tempfile.mkstemp()
try:
faiss.write_index(index, tmpfile)
index2 = faiss.read_index(tmpfile)
finally:
if os.path.exists(tmpfile):
os.unlink(tmpfile)
Dhnsw2, Ihnsw2 = index2.search(self.xq, 1)
self.assertTrue(np.all(Dhnsw2 == Dhnsw))
self.assertTrue(np.all(Ihnsw2 == Ihnsw))
def test_hnsw_2level(self):
d = self.xq.shape[1]
quant = faiss.IndexFlatL2(d)
index = faiss.IndexHNSW2Level(quant, 256, 8, 8)
index.train(self.xb)
index.add(self.xb)
Dhnsw, Ihnsw = index.search(self.xq, 1)
self.assertGreaterEqual((self.Iref == Ihnsw).sum(), 310)
self.io_and_retest(index, Dhnsw, Ihnsw)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -943,12 +943,14 @@ static void knn_L2sqr_blas (const float * x, ...@@ -943,12 +943,14 @@ static void knn_L2sqr_blas (const float * x,
* KNN driver functions * KNN driver functions
*******************************************************/ *******************************************************/
int distance_compute_blas_threshold = 20;
void knn_inner_product (const float * x, void knn_inner_product (const float * x,
const float * y, const float * y,
size_t d, size_t nx, size_t ny, size_t d, size_t nx, size_t ny,
float_minheap_array_t * res) float_minheap_array_t * res)
{ {
if (d % 4 == 0 && nx < 20) { if (d % 4 == 0 && nx < distance_compute_blas_threshold) {
knn_inner_product_sse (x, y, d, nx, ny, res); knn_inner_product_sse (x, y, d, nx, ny, res);
} else { } else {
knn_inner_product_blas (x, y, d, nx, ny, res); knn_inner_product_blas (x, y, d, nx, ny, res);
...@@ -968,7 +970,7 @@ void knn_L2sqr (const float * x, ...@@ -968,7 +970,7 @@ void knn_L2sqr (const float * x,
size_t d, size_t nx, size_t ny, size_t d, size_t nx, size_t ny,
float_maxheap_array_t * res) float_maxheap_array_t * res)
{ {
if (d % 4 == 0 && nx < 20) { if (d % 4 == 0 && nx < distance_compute_blas_threshold) {
knn_L2sqr_sse (x, y, d, nx, ny, res); knn_L2sqr_sse (x, y, d, nx, ny, res);
} else { } else {
NopDistanceCorrection nop; NopDistanceCorrection nop;
...@@ -1270,7 +1272,7 @@ void range_search_L2sqr ( ...@@ -1270,7 +1272,7 @@ void range_search_L2sqr (
RangeSearchResult *res) RangeSearchResult *res)
{ {
if (d % 4 == 0 && nx < 20) { if (d % 4 == 0 && nx < distance_compute_blas_threshold) {
range_search_sse<true> (x, y, d, nx, ny, radius, res); range_search_sse<true> (x, y, d, nx, ny, radius, res);
} else { } else {
range_search_blas<true> (x, y, d, nx, ny, radius, res); range_search_blas<true> (x, y, d, nx, ny, radius, res);
...@@ -1285,7 +1287,7 @@ void range_search_inner_product ( ...@@ -1285,7 +1287,7 @@ void range_search_inner_product (
RangeSearchResult *res) RangeSearchResult *res)
{ {
if (d % 4 == 0 && nx < 20) { if (d % 4 == 0 && nx < distance_compute_blas_threshold) {
range_search_sse<false> (x, y, d, nx, ny, radius, res); range_search_sse<false> (x, y, d, nx, ny, radius, res);
} else { } else {
range_search_blas<false> (x, y, d, nx, ny, radius, res); range_search_blas<false> (x, y, d, nx, ny, radius, res);
......
...@@ -195,6 +195,8 @@ void fvec_L2sqr_by_idx ( ...@@ -195,6 +195,8 @@ void fvec_L2sqr_by_idx (
* KNN functions * KNN functions
***************************************************************************/ ***************************************************************************/
// threshold on nx above which we switch to BLAS to compute distances
extern int distance_compute_blas_threshold;
/** Return the k nearest neighors of each of the nx vectors x among the ny /** Return the k nearest neighors of each of the nx vectors x among the ny
* vector y, w.r.t to max inner product * vector y, w.r.t to max inner product
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment