Unverified Commit afe0fdc1 authored by Lucas Hosseini's avatar Lucas Hosseini Committed by GitHub

Facebook sync (Mar 2019) (#756)

Facebook sync (Mar 2019)

- MatrixStats object
- option to round coordinates during k-means optimization
- alternative option for search in HNSW
- moved stats and imbalance_factor of IndexIVF to InvertedLists object
- range search for IVFScalarQuantizer
- direct unit8 codec in ScalarQuantizer
- renamed IndexProxy to IndexReplicas and moved to main Faiss
- better support for PQ code assignment with external index
- support for IMI2x16 (4B virtual centroids!)
- support for k = 2048 search on GPU (instead of 1024)
- most CUDA mem alloc failures throw exceptions instead of terminating on an assertion
- support for renaming an ondisk invertedlists
- interrupt computations with ctrl-C in python
parent a9959bf6
...@@ -15,6 +15,8 @@ ...@@ -15,6 +15,8 @@
#include "AutoTune.h" #include "AutoTune.h"
#include <cmath> #include <cmath>
#include <stdarg.h> /* va_list, va_start, va_arg, va_end */
#include "FaissAssert.h" #include "FaissAssert.h"
#include "utils.h" #include "utils.h"
...@@ -992,5 +994,235 @@ IndexBinary *index_binary_factory(int d, const char *description) ...@@ -992,5 +994,235 @@ IndexBinary *index_binary_factory(int d, const char *description)
return index; return index;
} }
/*********************************************************************
* MatrixStats
*********************************************************************/
MatrixStats::PerDimStats::PerDimStats():
n(0), n_nan(0), n_inf(0), n0(0),
min(HUGE_VALF), max(-HUGE_VALF),
sum(0), sum2(0),
mean(NAN), stddev(NAN)
{}
void MatrixStats::PerDimStats::add (float x)
{
n++;
if (std::isnan(x)) {
n_nan++;
return;
}
if (!std::isfinite(x)) {
n_inf++;
return;
}
if (x == 0) n0++;
if (x < min) min = x;
if (x > max) max = x;
sum += x;
sum2 += (double)x * (double)x;
}
void MatrixStats::PerDimStats::compute_mean_std ()
{
n_valid = n - n_nan - n_inf;
mean = sum / n_valid;
double var = sum2 / n_valid - mean * mean;
if (var < 0) var = 0;
stddev = sqrt(var);
}
void MatrixStats::do_comment (const char *fmt, ...)
{
va_list ap;
/* Determine required size */
va_start(ap, fmt);
size_t size = vsnprintf(buf, nbuf, fmt, ap);
va_end(ap);
nbuf -= size;
buf += size;
}
MatrixStats::MatrixStats (size_t n, size_t d, const float *x):
n(n), d(d),
n_collision(0), n_valid(0), n0(0),
min_norm2(HUGE_VAL), max_norm2(0)
{
std::vector<char> comment_buf (10000);
buf = comment_buf.data ();
nbuf = comment_buf.size();
do_comment ("analyzing %ld vectors of size %ld\n", n, d);
if (d > 1024) {
do_comment (
"indexing this many dimensions is hard, "
"please consider dimensionality reducution (with PCAMatrix)\n");
}
size_t nbytes = sizeof (x[0]) * d;
per_dim_stats.resize (d);
for (size_t i = 0; i < n; i++) {
const float *xi = x + d * i;
double sum2 = 0;
for (size_t j = 0; j < d; j++) {
per_dim_stats[j].add (xi[j]);
sum2 += xi[j] * (double)xi[j];
}
if (std::isfinite (sum2)) {
n_valid++;
if (sum2 == 0) {
n0 ++;
} else {
if (sum2 < min_norm2) min_norm2 = sum2;
if (sum2 > max_norm2) max_norm2 = sum2;
}
}
{ // check hash
uint64_t hash = hash_bytes((const uint8_t*)xi, nbytes);
auto elt = occurrences.find (hash);
if (elt == occurrences.end()) {
Occurrence occ = {i, 1};
occurrences[hash] = occ;
} else {
if (!memcmp (xi, x + elt->second.first * d, nbytes)) {
elt->second.count ++;
} else {
n_collision ++;
// we should use a list of collisions but overkill
}
}
}
}
// invalid vecor stats
if (n_valid == n) {
do_comment ("no NaN or Infs in data\n");
} else {
do_comment ("%ld vectors contain NaN or Inf "
"(or have too large components), "
"expect bad results with indexing!\n", n - n_valid);
}
// copies in dataset
if (occurrences.size() == n) {
do_comment ("all vectors are distinct\n");
} else {
do_comment ("%ld vectors are distinct (%.2f%%)\n",
occurrences.size(),
occurrences.size() * 100.0 / n);
if (n_collision > 0) {
do_comment ("%ld collisions in hash table, "
"counts may be invalid\n", n_collision);
}
Occurrence max = {0, 0};
for (auto it = occurrences.begin();
it != occurrences.end(); ++it) {
if (it->second.count > max.count) {
max = it->second;
}
}
do_comment ("vector %ld has %ld copies\n", max.first, max.count);
}
{ // norm stats
min_norm2 = sqrt (min_norm2);
max_norm2 = sqrt (max_norm2);
do_comment ("range of L2 norms=[%g, %g] (%ld null vectors)\n",
min_norm2, max_norm2, n0);
if (max_norm2 < min_norm2 * 1.0001) {
do_comment ("vectors are normalized, inner product and "
"L2 search are equivalent\n");
}
if (max_norm2 > min_norm2 * 100) {
do_comment ("vectors have very large differences in norms, "
"is this normal?\n");
}
}
{ // per dimension stats
double max_std = 0, min_std = HUGE_VAL;
size_t n_dangerous_range = 0, n_0_range = 0, n0 = 0;
for (size_t j = 0; j < d; j++) {
PerDimStats &st = per_dim_stats[j];
st.compute_mean_std ();
n0 += st.n0;
if (st.max == st.min) {
n_0_range ++;
} else if (st.max < 1.001 * st.min) {
n_dangerous_range ++;
}
if (st.stddev > max_std) max_std = st.stddev;
if (st.stddev < min_std) min_std = st.stddev;
}
if (n0 == 0) {
do_comment ("matrix contains no 0s\n");
} else {
do_comment ("matrix contains %.2f %% 0 entries\n",
n0 * 100.0 / (n * d));
}
if (n_0_range == 0) {
do_comment ("no constant dimensions\n");
} else {
do_comment ("%ld dimensions are constant: they can be removed\n",
n_0_range);
}
if (n_dangerous_range == 0) {
do_comment ("no dimension has a too large mean\n");
} else {
do_comment ("%ld dimensions are too large "
"wrt. their variance, may loose precision "
"in IndexFlatL2 (use CenteringTransform)\n",
n_dangerous_range);
}
do_comment ("stddevs per dimension are in [%g %g]\n", min_std, max_std);
size_t n_small_var = 0;
for (size_t j = 0; j < d; j++) {
const PerDimStats &st = per_dim_stats[j];
if (st.stddev < max_std * 1e-4) {
n_small_var++;
}
}
if (n_small_var > 0) {
do_comment ("%ld dimensions have negligible stddev wrt. "
"the largest dimension, they could be ignored",
n_small_var);
}
}
comments = comment_buf.data ();
buf = nullptr;
nbuf = 0;
}
} // namespace faiss } // namespace faiss
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#define FAISS_AUTO_TUNE_H #define FAISS_AUTO_TUNE_H
#include <vector> #include <vector>
#include <unordered_map>
#include "Index.h" #include "Index.h"
#include "IndexBinary.h" #include "IndexBinary.h"
...@@ -209,6 +210,50 @@ Index *index_factory (int d, const char *description, ...@@ -209,6 +210,50 @@ Index *index_factory (int d, const char *description,
IndexBinary *index_binary_factory (int d, const char *description); IndexBinary *index_binary_factory (int d, const char *description);
/** Reports some statistics on a dataset and comments on them.
*
* It is a class rather than a function so that all stats can also be
* accessed from code */
struct MatrixStats {
MatrixStats (size_t n, size_t d, const float *x);
std::string comments;
// raw statistics
size_t n, d;
size_t n_collision, n_valid, n0;
double min_norm2, max_norm2;
struct PerDimStats {
size_t n, n_nan, n_inf, n0;
float min, max;
double sum, sum2;
size_t n_valid;
double mean, stddev;
PerDimStats();
void add (float x);
void compute_mean_std ();
};
std::vector<PerDimStats> per_dim_stats;
struct Occurrence {
size_t first;
size_t count;
};
std::unordered_map<uint64_t, Occurrence> occurrences;
char *buf;
size_t nbuf;
void do_comment (const char *fmt, ...);
};
} // namespace faiss } // namespace faiss
......
...@@ -8,11 +8,12 @@ ...@@ -8,11 +8,12 @@
// -*- c++ -*- // -*- c++ -*-
#include <cstring>
#include "AuxIndexStructures.h" #include "AuxIndexStructures.h"
#include "FaissAssert.h" #include "FaissAssert.h"
#include <cstring>
namespace faiss { namespace faiss {
...@@ -72,6 +73,15 @@ BufferList::~BufferList () ...@@ -72,6 +73,15 @@ BufferList::~BufferList ()
} }
} }
void BufferList::add (idx_t id, float dis) {
if (wp == buffer_size) { // need new buffer
append_buffer();
}
Buffer & buf = buffers.back();
buf.ids [wp] = id;
buf.dis [wp] = dis;
wp++;
}
void BufferList::append_buffer () void BufferList::append_buffer ()
...@@ -106,6 +116,12 @@ void BufferList::copy_range (size_t ofs, size_t n, ...@@ -106,6 +116,12 @@ void BufferList::copy_range (size_t ofs, size_t n,
* RangeSearchPartialResult * RangeSearchPartialResult
***********************************************************************/ ***********************************************************************/
void RangeQueryResult::add (float dis, idx_t id) {
nres++;
pres->add (id, dis);
}
RangeSearchPartialResult::RangeSearchPartialResult (RangeSearchResult * res_in): RangeSearchPartialResult::RangeSearchPartialResult (RangeSearchResult * res_in):
BufferList(res_in->buffer_size), BufferList(res_in->buffer_size),
...@@ -114,10 +130,10 @@ RangeSearchPartialResult::RangeSearchPartialResult (RangeSearchResult * res_in): ...@@ -114,10 +130,10 @@ RangeSearchPartialResult::RangeSearchPartialResult (RangeSearchResult * res_in):
/// begin a new result /// begin a new result
RangeSearchPartialResult::QueryResult & RangeQueryResult &
RangeSearchPartialResult::new_result (idx_t qno) RangeSearchPartialResult::new_result (idx_t qno)
{ {
QueryResult qres = {qno, 0, this}; RangeQueryResult qres = {qno, 0, this};
queries.push_back (qres); queries.push_back (qres);
return queries.back(); return queries.back();
} }
...@@ -140,7 +156,7 @@ void RangeSearchPartialResult::finalize () ...@@ -140,7 +156,7 @@ void RangeSearchPartialResult::finalize ()
void RangeSearchPartialResult::set_lims () void RangeSearchPartialResult::set_lims ()
{ {
for (int i = 0; i < queries.size(); i++) { for (int i = 0; i < queries.size(); i++) {
QueryResult & qres = queries[i]; RangeQueryResult & qres = queries[i];
res->lims[qres.qno] = qres.nres; res->lims[qres.qno] = qres.nres;
} }
} }
...@@ -150,7 +166,7 @@ void RangeSearchPartialResult::set_result (bool incremental) ...@@ -150,7 +166,7 @@ void RangeSearchPartialResult::set_result (bool incremental)
{ {
size_t ofs = 0; size_t ofs = 0;
for (int i = 0; i < queries.size(); i++) { for (int i = 0; i < queries.size(); i++) {
QueryResult & qres = queries[i]; RangeQueryResult & qres = queries[i];
copy_range (ofs, qres.nres, copy_range (ofs, qres.nres,
res->labels + res->lims[qres.qno], res->labels + res->lims[qres.qno],
...@@ -246,6 +262,38 @@ size_t VectorIOReader::operator()( ...@@ -246,6 +262,38 @@ size_t VectorIOReader::operator()(
} }
/***********************************************************
* Interrupt callback
***********************************************************/
std::unique_ptr<InterruptCallback> InterruptCallback::instance;
void InterruptCallback::check () {
if (!instance.get()) {
return;
}
if (instance->want_interrupt ()) {
FAISS_THROW_MSG ("computation interrupted");
}
}
bool InterruptCallback::is_interrupted () {
if (!instance.get()) {
return false;
}
return instance->want_interrupt();
}
size_t InterruptCallback::get_period_hint (size_t flops) {
if (!instance.get()) {
return 1L << 30; // never check
}
// for 10M flops, it is reasonable to check once every 10 iterations
return std::max((size_t)10 * 10 * 1000 * 1000 / (flops + 1), (size_t)1);
}
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <vector> #include <vector>
#include <unordered_set> #include <unordered_set>
#include <memory>
#include "Index.h" #include "Index.h"
...@@ -117,16 +118,7 @@ struct BufferList { ...@@ -117,16 +118,7 @@ struct BufferList {
// create a new buffer // create a new buffer
void append_buffer (); void append_buffer ();
inline void add (idx_t id, float dis) void add (idx_t id, float dis);
{
if (wp == buffer_size) { // need new buffer
append_buffer();
}
Buffer & buf = buffers.back();
buf.ids [wp] = id;
buf.dis [wp] = dis;
wp++;
}
/// copy elemnts ofs:ofs+n-1 seen as linear data in the buffers to /// copy elemnts ofs:ofs+n-1 seen as linear data in the buffers to
/// tables dest_ids, dest_dis /// tables dest_ids, dest_dis
...@@ -135,7 +127,17 @@ struct BufferList { ...@@ -135,7 +127,17 @@ struct BufferList {
}; };
struct RangeSearchPartialResult;
/// result structure for a single query
struct RangeQueryResult {
using idx_t = Index::idx_t;
idx_t qno;
size_t nres;
RangeSearchPartialResult * pres;
void add (float dis, idx_t id);
};
/// the entries in the buffers are split per query /// the entries in the buffers are split per query
struct RangeSearchPartialResult: BufferList { struct RangeSearchPartialResult: BufferList {
...@@ -143,21 +145,10 @@ struct RangeSearchPartialResult: BufferList { ...@@ -143,21 +145,10 @@ struct RangeSearchPartialResult: BufferList {
explicit RangeSearchPartialResult (RangeSearchResult * res_in); explicit RangeSearchPartialResult (RangeSearchResult * res_in);
/// result structure for a single query std::vector<RangeQueryResult> queries;
struct QueryResult {
idx_t qno;
size_t nres;
RangeSearchPartialResult * pres;
inline void add (float dis, idx_t id) {
nres++;
pres->add (id, dis);
}
};
std::vector<QueryResult> queries;
/// begin a new result /// begin a new result
QueryResult & new_result (idx_t qno); RangeQueryResult & new_result (idx_t qno);
void finalize (); void finalize ();
...@@ -173,7 +164,6 @@ struct RangeSearchPartialResult: BufferList { ...@@ -173,7 +164,6 @@ struct RangeSearchPartialResult: BufferList {
* Abstract I/O objects * Abstract I/O objects
***********************************************************/ ***********************************************************/
struct IOReader { struct IOReader {
// name that can be used in error messages // name that can be used in error messages
std::string name; std::string name;
...@@ -214,6 +204,57 @@ struct VectorIOWriter:IOWriter { ...@@ -214,6 +204,57 @@ struct VectorIOWriter:IOWriter {
size_t operator()(const void *ptr, size_t size, size_t nitems) override; size_t operator()(const void *ptr, size_t size, size_t nitems) override;
}; };
/***********************************************************
* The distance computer maintains a current query and computes
* distances to elements in an index that supports random access.
*
* The DistanceComputer is not intended to be thread-safe (eg. because
* it maintains counters) so the distance functions are not const,
* instanciate one from each thread if needed.
***********************************************************/
struct DistanceComputer {
using idx_t = Index::idx_t;
/// called before computing distances
virtual void set_query(const float *x) = 0;
/// compute distance of vector i to current query
virtual float operator () (idx_t i) = 0;
/// compute distance between two stored vectors
virtual float symmetric_dis (idx_t i, idx_t j) = 0;
virtual ~DistanceComputer() {}
};
/***********************************************************
* Interrupt callback
***********************************************************/
struct InterruptCallback {
virtual bool want_interrupt () = 0;
virtual ~InterruptCallback() {}
static std::unique_ptr<InterruptCallback> instance;
/** check if:
* - an interrupt callback is set
* - the callback retuns true
* if this is the case, then throw an exception
*/
static void check ();
/// same as check() but return true if is interrupted instead of
/// throwing
static bool is_interrupted ();
/** assuming each iteration takes a certain number of flops, what
* is a reasonable interval to check for interrupts?
*/
static size_t get_period_hint (size_t flops);
};
}; // namespace faiss }; // namespace faiss
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
// -*- c++ -*- // -*- c++ -*-
#include "Clustering.h" #include "Clustering.h"
#include "AuxIndexStructures.h"
#include <cmath> #include <cmath>
...@@ -24,7 +25,9 @@ namespace faiss { ...@@ -24,7 +25,9 @@ namespace faiss {
ClusteringParameters::ClusteringParameters (): ClusteringParameters::ClusteringParameters ():
niter(25), niter(25),
nredo(1), nredo(1),
verbose(false), spherical(false), verbose(false),
spherical(false),
int_centroids(false),
update_index(false), update_index(false),
frozen_centroids(false), frozen_centroids(false),
min_points_per_centroid(39), min_points_per_centroid(39),
...@@ -58,7 +61,18 @@ static double imbalance_factor (int n, int k, long *assign) { ...@@ -58,7 +61,18 @@ static double imbalance_factor (int n, int k, long *assign) {
return uf; return uf;
} }
void Clustering::post_process_centroids ()
{
if (spherical) {
fvec_renorm_L2 (d, k, centroids.data());
}
if (int_centroids) {
for (size_t i = 0; i < centroids.size(); i++)
centroids[i] = roundf (centroids[i]);
}
}
void Clustering::train (idx_t nx, const float *x_in, Index & index) { void Clustering::train (idx_t nx, const float *x_in, Index & index) {
...@@ -117,9 +131,6 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) { ...@@ -117,9 +131,6 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) {
"redo %d times, %d iterations\n", "redo %d times, %d iterations\n",
int(nx), d, k, nredo, niter); int(nx), d, k, nredo, niter);
idx_t * assign = new idx_t[nx]; idx_t * assign = new idx_t[nx];
ScopeDeleter<idx_t> del (assign); ScopeDeleter<idx_t> del (assign);
float * dis = new float[nx]; float * dis = new float[nx];
...@@ -146,7 +157,7 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) { ...@@ -146,7 +157,7 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) {
double t_search_tot = 0; double t_search_tot = 0;
if (verbose) { if (verbose) {
printf(" Preprocessing in %.2f s\n", printf(" Preprocessing in %.2f s\n",
(getmillisecs() - t0)/1000.); (getmillisecs() - t0) / 1000.);
} }
t0 = getmillisecs(); t0 = getmillisecs();
...@@ -156,7 +167,6 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) { ...@@ -156,7 +167,6 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) {
printf("Outer iteration %d / %d\n", redo, nredo); printf("Outer iteration %d / %d\n", redo, nredo);
} }
// initialize remaining centroids with random points from the dataset // initialize remaining centroids with random points from the dataset
centroids.resize (d * k); centroids.resize (d * k);
std::vector<int> perm (nx); std::vector<int> perm (nx);
...@@ -166,9 +176,7 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) { ...@@ -166,9 +176,7 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) {
memcpy (&centroids[i * d], x + perm[i] * d, memcpy (&centroids[i * d], x + perm[i] * d,
d * sizeof (float)); d * sizeof (float));
if (spherical) { post_process_centroids ();
fvec_renorm_L2 (d, k, centroids.data());
}
if (index.ntotal != 0) { if (index.ntotal != 0) {
index.reset(); index.reset();
...@@ -183,6 +191,7 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) { ...@@ -183,6 +191,7 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) {
for (int i = 0; i < niter; i++) { for (int i = 0; i < niter; i++) {
double t0s = getmillisecs(); double t0s = getmillisecs();
index.search (nx, x, 1, dis, assign); index.search (nx, x, 1, dis, assign);
InterruptCallback::check();
t_search_tot += getmillisecs() - t0s; t_search_tot += getmillisecs() - t0s;
err = 0; err = 0;
...@@ -204,8 +213,7 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) { ...@@ -204,8 +213,7 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) {
fflush (stdout); fflush (stdout);
} }
if (spherical) post_process_centroids ();
fvec_renorm_L2 (d, k, centroids.data());
index.reset (); index.reset ();
if (update_index) if (update_index)
...@@ -213,6 +221,7 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) { ...@@ -213,6 +221,7 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) {
assert (index.ntotal == 0); assert (index.ntotal == 0);
index.add (k, centroids.data()); index.add (k, centroids.data());
InterruptCallback::check ();
} }
if (verbose) printf("\n"); if (verbose) printf("\n");
if (nredo > 1) { if (nredo > 1) {
......
...@@ -26,6 +26,7 @@ struct ClusteringParameters { ...@@ -26,6 +26,7 @@ struct ClusteringParameters {
bool verbose; bool verbose;
bool spherical; ///< do we want normalized centroids? bool spherical; ///< do we want normalized centroids?
bool int_centroids; ///< round centroids coordinates to integer
bool update_index; ///< update index after each iteration? bool update_index; ///< update index after each iteration?
bool frozen_centroids; ///< use the centroids provided as input and do not change them during iterations bool frozen_centroids; ///< use the centroids provided as input and do not change them during iterations
...@@ -72,6 +73,10 @@ struct Clustering: ClusteringParameters { ...@@ -72,6 +73,10 @@ struct Clustering: ClusteringParameters {
/// Index is used during the assignment stage /// Index is used during the assignment stage
virtual void train (idx_t n, const float * x, faiss::Index & index); virtual void train (idx_t n, const float * x, faiss::Index & index);
/// Post-process the centroids after each centroid update.
/// includes optional L2 normalization and nearest integer rounding
void post_process_centroids ();
virtual ~Clustering() {} virtual ~Clustering() {}
}; };
......
...@@ -9,12 +9,11 @@ ...@@ -9,12 +9,11 @@
// -*- c++ -*- // -*- c++ -*-
#include "HNSW.h" #include "HNSW.h"
#include "AuxIndexStructures.h"
namespace faiss { namespace faiss {
using idx_t = Index::idx_t; using idx_t = Index::idx_t;
using DistanceComputer = HNSW::DistanceComputer;
/************************************************************** /**************************************************************
* HNSW structure implementation * HNSW structure implementation
...@@ -544,12 +543,24 @@ int HNSW::search_from_candidates( ...@@ -544,12 +543,24 @@ int HNSW::search_from_candidates(
vt.set(v1); vt.set(v1);
} }
bool do_dis_check = check_relative_distance;
int nstep = 0; int nstep = 0;
while (candidates.size() > 0) { while (candidates.size() > 0) {
float d0 = 0; float d0 = 0;
int v0 = candidates.pop_min(&d0); int v0 = candidates.pop_min(&d0);
if (do_dis_check) {
// tricky stopping condition: there are more that ef
// distances that are processed already that are smaller
// than d0
int n_dis_below = candidates.count_below(d0);
if(n_dis_below >= efSearch) {
break;
}
}
size_t begin, end; size_t begin, end;
neighbor_range(v0, level, &begin, &end); neighbor_range(v0, level, &begin, &end);
...@@ -572,7 +583,7 @@ int HNSW::search_from_candidates( ...@@ -572,7 +583,7 @@ int HNSW::search_from_candidates(
} }
nstep++; nstep++;
if (nstep > efSearch) { if (!do_dis_check && nstep > efSearch) {
break; break;
} }
} }
...@@ -596,38 +607,31 @@ int HNSW::search_from_candidates( ...@@ -596,38 +607,31 @@ int HNSW::search_from_candidates(
* Searching * Searching
**************************************************************/ **************************************************************/
template<typename T> std::priority_queue<HNSW::Node> HNSW::search_from_candidate_unbounded(
using MaxHeap = std::priority_queue<T, std::vector<T>, std::less<T>>;
template<typename T>
using MinHeap = std::priority_queue<T, std::vector<T>, std::greater<T>>;
MaxHeap<HNSW::Node> HNSW::search_from(
const Node& node, const Node& node,
DistanceComputer& qdis, DistanceComputer& qdis,
int ef, int ef,
VisitedTable *vt) const VisitedTable *vt) const
{ {
MaxHeap<Node> top_candidates; int ndis = 0;
MinHeap<Node> candidate_set; std::priority_queue<Node> top_candidates;
std::priority_queue<Node, std::vector<Node>, std::greater<Node>> candidates;
top_candidates.push(node); top_candidates.push(node);
candidate_set.push(node); candidates.push(node);
vt->set(node.second); vt->set(node.second);
float lower_bound = node.first; while (!candidates.empty()) {
while (!candidate_set.empty()) {
float d0; float d0;
storage_idx_t v0; storage_idx_t v0;
std::tie(d0, v0) = candidate_set.top(); std::tie(d0, v0) = candidates.top();
if (d0 > lower_bound) { if (d0 > top_candidates.top().first) {
break; break;
} }
candidate_set.pop(); candidates.pop();
size_t begin, end; size_t begin, end;
neighbor_range(v0, 0, &begin, &end); neighbor_range(v0, 0, &begin, &end);
...@@ -645,20 +649,28 @@ MaxHeap<HNSW::Node> HNSW::search_from( ...@@ -645,20 +649,28 @@ MaxHeap<HNSW::Node> HNSW::search_from(
vt->set(v1); vt->set(v1);
float d1 = qdis(v1); float d1 = qdis(v1);
++ndis;
if (top_candidates.top().first > d1 || top_candidates.size() < ef) { if (top_candidates.top().first > d1 || top_candidates.size() < ef) {
candidate_set.emplace(d1, v1); candidates.emplace(d1, v1);
top_candidates.emplace(d1, v1); top_candidates.emplace(d1, v1);
if (top_candidates.size() > ef) { if (top_candidates.size() > ef) {
top_candidates.pop(); top_candidates.pop();
} }
lower_bound = top_candidates.top().first;
} }
} }
} }
#pragma omp critical
{
++hnsw_stats.n1;
if (candidates.size() == 0) {
++hnsw_stats.n2;
}
hnsw_stats.n3 += ndis;
}
return top_candidates; return top_candidates;
} }
...@@ -677,7 +689,17 @@ void HNSW::search(DistanceComputer& qdis, int k, ...@@ -677,7 +689,17 @@ void HNSW::search(DistanceComputer& qdis, int k,
} }
int ef = std::max(efSearch, k); int ef = std::max(efSearch, k);
MaxHeap<Node> top_candidates = search_from(Node(d_nearest, nearest), qdis, ef, &vt); if (search_bounded_queue) {
MinimaxHeap candidates(ef);
candidates.push(nearest, d_nearest);
search_from_candidates(qdis, k, I, D, candidates, vt, 0);
} else {
std::priority_queue<Node> top_candidates =
search_from_candidate_unbounded(Node(d_nearest, nearest),
qdis, ef, &vt);
while (top_candidates.size() > k) { while (top_candidates.size() > k) {
top_candidates.pop(); top_candidates.pop();
} }
...@@ -690,19 +712,11 @@ void HNSW::search(DistanceComputer& qdis, int k, ...@@ -690,19 +712,11 @@ void HNSW::search(DistanceComputer& qdis, int k,
faiss::maxheap_push(++nres, D, I, d, label); faiss::maxheap_push(++nres, D, I, d, label);
top_candidates.pop(); top_candidates.pop();
} }
}
// MinimaxHeap candidates(candidates_size);
// top_candidates.emplace(d_nearest, nearest);
// search_from_candidates(qdis, k, I, D, candidates, vt, 0);
// NOTE(hoss): Init at the beginning?
vt.advance(); vt.advance();
} else { } else {
assert(false);
int candidates_size = upper_beam; int candidates_size = upper_beam;
MinimaxHeap candidates(candidates_size); MinimaxHeap candidates(candidates_size);
...@@ -742,44 +756,47 @@ void HNSW::MinimaxHeap::push(storage_idx_t i, float v) { ...@@ -742,44 +756,47 @@ void HNSW::MinimaxHeap::push(storage_idx_t i, float v) {
if (k == n) { if (k == n) {
if (v >= dis[0]) return; if (v >= dis[0]) return;
faiss::heap_pop<HC> (k--, dis.data(), ids.data()); faiss::heap_pop<HC> (k--, dis.data(), ids.data());
--nvalid;
} }
faiss::heap_push<HC> (++k, dis.data(), ids.data(), v, i); faiss::heap_push<HC> (++k, dis.data(), ids.data(), v, i);
++nvalid;
} }
float HNSW::MinimaxHeap::max() const { float HNSW::MinimaxHeap::max() const {
assert(k > 0);
return dis[0]; return dis[0];
} }
int HNSW::MinimaxHeap::size() const { int HNSW::MinimaxHeap::size() const {
return k; return nvalid;
} }
void HNSW::MinimaxHeap::clear() { void HNSW::MinimaxHeap::clear() {
k = 0; nvalid = k = 0;
} }
int HNSW::MinimaxHeap::pop_min(float *vmin_out) { int HNSW::MinimaxHeap::pop_min(float *vmin_out) {
assert(k > 0); assert(k > 0);
// returns min. This is an O(n) operation // returns min. This is an O(n) operation
int i = k - 1; int i = k - 1;
while (i >= 0) {
if (ids[i] != -1) break;
i--;
}
if (i == -1) return -1;
int imin = i; int imin = i;
float vmin = dis[i]; float vmin = dis[i];
i--; i--;
while(i >= 0) { while(i >= 0) {
if (dis[i] < vmin) { if (ids[i] != -1 && dis[i] < vmin) {
vmin = dis[i]; vmin = dis[i];
imin = i; imin = i;
} }
i--; i--;
} }
assert(2 * i > k);
if (vmin_out) *vmin_out = vmin; if (vmin_out) *vmin_out = vmin;
int ret = ids[imin]; int ret = ids[imin];
ids[imin] = -1;
--k; --nvalid;
faiss::heap_push<HC>(++imin, dis.data(), ids.data(), ids[k], dis[k]);
return ret; return ret;
} }
......
...@@ -37,12 +37,12 @@ namespace faiss { ...@@ -37,12 +37,12 @@ namespace faiss {
* (https://github.com/searchivarius/nmslib) * (https://github.com/searchivarius/nmslib)
* *
* The HNSW object stores only the neighbor link structure, see * The HNSW object stores only the neighbor link structure, see
* IndexHNSW below for the full index object. * IndexHNSW.h for the full index object.
*/ */
struct VisitedTable; struct VisitedTable;
struct DistanceComputer; // from AuxIndexStructures
struct HNSW { struct HNSW {
/// internal storage of vectors (32 bits: this is expensive) /// internal storage of vectors (32 bits: this is expensive)
...@@ -53,37 +53,18 @@ struct HNSW { ...@@ -53,37 +53,18 @@ struct HNSW {
typedef std::pair<float, storage_idx_t> Node; typedef std::pair<float, storage_idx_t> Node;
/** The HNSW structure does not store vectors, it only accesses
* them through this class.
*
* Functions are guaranteed to be be accessed only from 1 thread. */
struct DistanceComputer {
idx_t d;
/// called before computing distances
virtual void set_query(const float *x) = 0;
/// compute distance of vector i to current query
virtual float operator () (storage_idx_t i) = 0;
/// compute distance between two stored vectors
virtual float symmetric_dis(storage_idx_t i, storage_idx_t j) = 0;
virtual ~DistanceComputer() {}
};
/** Heap structure that allows fast /** Heap structure that allows fast
*/ */
struct MinimaxHeap { struct MinimaxHeap {
int n; int n;
int k; int k;
int nvalid;
std::vector<storage_idx_t> ids; std::vector<storage_idx_t> ids;
std::vector<float> dis; std::vector<float> dis;
typedef faiss::CMax<float, storage_idx_t> HC; typedef faiss::CMax<float, storage_idx_t> HC;
explicit MinimaxHeap(int n): n(n), k(0), ids(n), dis(n) {} explicit MinimaxHeap(int n): n(n), k(0), nvalid(0), ids(n), dis(n) {}
void push(storage_idx_t i, float v); void push(storage_idx_t i, float v);
...@@ -147,9 +128,15 @@ struct HNSW { ...@@ -147,9 +128,15 @@ struct HNSW {
/// expansion factor at search time /// expansion factor at search time
int efSearch; int efSearch;
/// during search: do we check whether the next best distance is good enough?
bool check_relative_distance = true;
/// number of entry points in levels > 0. /// number of entry points in levels > 0.
int upper_beam; int upper_beam;
/// use bounded queue during exploration
bool search_bounded_queue = true;
// methods that initialize the tree sizes // methods that initialize the tree sizes
/// initialize the assign_probas and cum_nneighbor_per_level to /// initialize the assign_probas and cum_nneighbor_per_level to
...@@ -201,10 +188,12 @@ struct HNSW { ...@@ -201,10 +188,12 @@ struct HNSW {
VisitedTable &vt, VisitedTable &vt,
int level, int nres_in = 0) const; int level, int nres_in = 0) const;
std::priority_queue<Node> search_from(const Node& node, std::priority_queue<Node> search_from_candidate_unbounded(
const Node& node,
DistanceComputer& qdis, DistanceComputer& qdis,
int ef, int ef,
VisitedTable *vt) const; VisitedTable *vt
) const;
/// search interface /// search interface
void search(DistanceComputer& qdis, int k, void search(DistanceComputer& qdis, int k,
......
...@@ -234,7 +234,7 @@ void SlidingIndexWindow::step(const Index *sub_index, bool remove_oldest) { ...@@ -234,7 +234,7 @@ void SlidingIndexWindow::step(const Index *sub_index, bool remove_oldest) {
for (int j = 0; j + 1 < n_slice; j++) { for (int j = 0; j + 1 < n_slice; j++) {
sizes[i][j] = sizes[i][j + 1] - amount_to_remove; sizes[i][j] = sizes[i][j + 1] - amount_to_remove;
} }
sizes[i].resize(sizes[i].size() - 1); sizes[i].pop_back ();
} }
n_slice--; n_slice--;
} else { } else {
......
...@@ -60,8 +60,9 @@ struct RangeSearchResult; ...@@ -60,8 +60,9 @@ struct RangeSearchResult;
* database-to-database queries are not implemented. * database-to-database queries are not implemented.
*/ */
struct Index { struct Index {
using idx_t = long; ///< all indices are this type
typedef long idx_t; ///< all indices are this type using component_t = float;
using distance_t = float;
int d; ///< vector dimension int d; ///< vector dimension
idx_t ntotal; ///< total nb of indexed vectors idx_t ntotal; ///< total nb of indexed vectors
......
...@@ -35,7 +35,9 @@ struct RangeSearchResult; ...@@ -35,7 +35,9 @@ struct RangeSearchResult;
* vectors. * vectors.
*/ */
struct IndexBinary { struct IndexBinary {
typedef long idx_t; ///< all indices are this type using idx_t = Index::idx_t; ///< all indices are this type
using component_t = uint8_t;
using distance_t = int32_t;
int d; ///< vector dimension int d; ///< vector dimension
int code_size; ///< number of bytes per vector ( = d / 8 ) int code_size; ///< number of bytes per vector ( = d / 8 )
......
...@@ -32,7 +32,7 @@ ...@@ -32,7 +32,7 @@
#include "FaissAssert.h" #include "FaissAssert.h"
#include "IndexBinaryFlat.h" #include "IndexBinaryFlat.h"
#include "hamming.h" #include "hamming.h"
#include "AuxIndexStructures.h"
namespace faiss { namespace faiss {
...@@ -121,7 +121,7 @@ void hnsw_add_vertices(IndexBinaryHNSW& index_hnsw, ...@@ -121,7 +121,7 @@ void hnsw_add_vertices(IndexBinaryHNSW& index_hnsw,
{ {
VisitedTable vt (ntotal); VisitedTable vt (ntotal);
std::unique_ptr<HNSW::DistanceComputer> dis( std::unique_ptr<DistanceComputer> dis(
index_hnsw.get_distance_computer() index_hnsw.get_distance_computer()
); );
int prev_display = verbose && omp_get_thread_num() == 0 ? 0 : -1; int prev_display = verbose && omp_get_thread_num() == 0 ? 0 : -1;
...@@ -202,7 +202,7 @@ void IndexBinaryHNSW::search(idx_t n, const uint8_t *x, idx_t k, ...@@ -202,7 +202,7 @@ void IndexBinaryHNSW::search(idx_t n, const uint8_t *x, idx_t k,
#pragma omp parallel #pragma omp parallel
{ {
VisitedTable vt(ntotal); VisitedTable vt(ntotal);
std::unique_ptr<HNSW::DistanceComputer> dis(get_distance_computer()); std::unique_ptr<DistanceComputer> dis(get_distance_computer());
#pragma omp for #pragma omp for
for(idx_t i = 0; i < n; i++) { for(idx_t i = 0; i < n; i++) {
...@@ -252,18 +252,18 @@ namespace { ...@@ -252,18 +252,18 @@ namespace {
template<class HammingComputer> template<class HammingComputer>
struct FlatHammingDis : HNSW::DistanceComputer { struct FlatHammingDis : DistanceComputer {
const int code_size; const int code_size;
const uint8_t *b; const uint8_t *b;
size_t ndis; size_t ndis;
HammingComputer hc; HammingComputer hc;
float operator () (HNSW::storage_idx_t i) override { float operator () (idx_t i) override {
ndis++; ndis++;
return hc.hamming(b + i * code_size); return hc.hamming(b + i * code_size);
} }
float symmetric_dis(HNSW::storage_idx_t i, HNSW::storage_idx_t j) override { float symmetric_dis(idx_t i, idx_t j) override {
return HammingComputerDefault(b + j * code_size, code_size) return HammingComputerDefault(b + j * code_size, code_size)
.hamming(b + i * code_size); .hamming(b + i * code_size);
} }
...@@ -281,7 +281,7 @@ struct FlatHammingDis : HNSW::DistanceComputer { ...@@ -281,7 +281,7 @@ struct FlatHammingDis : HNSW::DistanceComputer {
hc.set((uint8_t *)x, code_size); hc.set((uint8_t *)x, code_size);
} }
virtual ~FlatHammingDis() { ~FlatHammingDis() override {
#pragma omp critical #pragma omp critical
{ {
hnsw_stats.ndis += ndis; hnsw_stats.ndis += ndis;
...@@ -293,7 +293,7 @@ struct FlatHammingDis : HNSW::DistanceComputer { ...@@ -293,7 +293,7 @@ struct FlatHammingDis : HNSW::DistanceComputer {
} // namespace } // namespace
HNSW::DistanceComputer *IndexBinaryHNSW::get_distance_computer() const { DistanceComputer *IndexBinaryHNSW::get_distance_computer() const {
IndexBinaryFlat *flat_storage = dynamic_cast<IndexBinaryFlat *>(storage); IndexBinaryFlat *flat_storage = dynamic_cast<IndexBinaryFlat *>(storage);
FAISS_ASSERT(flat_storage != nullptr); FAISS_ASSERT(flat_storage != nullptr);
......
...@@ -37,7 +37,7 @@ struct IndexBinaryHNSW : IndexBinary { ...@@ -37,7 +37,7 @@ struct IndexBinaryHNSW : IndexBinary {
~IndexBinaryHNSW() override; ~IndexBinaryHNSW() override;
HNSW::DistanceComputer *get_distance_computer() const; DistanceComputer *get_distance_computer() const;
void add(idx_t n, const uint8_t *x) override; void add(idx_t n, const uint8_t *x) override;
......
...@@ -252,39 +252,42 @@ long IndexBinaryIVF::remove_ids(const IDSelector& sel) { ...@@ -252,39 +252,42 @@ long IndexBinaryIVF::remove_ids(const IDSelector& sel) {
} }
void IndexBinaryIVF::train(idx_t n, const uint8_t *x) { void IndexBinaryIVF::train(idx_t n, const uint8_t *x) {
if (verbose) if (verbose) {
printf("Training level-1 quantizer\n"); printf("Training quantizer\n");
}
train_q1(n, x, verbose); if (quantizer->is_trained && (quantizer->ntotal == nlist)) {
if (verbose) {
printf("IVF quantizer does not need training.\n");
}
} else {
if (verbose) {
printf("Training quantizer on %ld vectors in %dD\n", n, d);
}
is_trained = true; Clustering clus(d, nlist, cp);
} quantizer->reset();
double IndexBinaryIVF::imbalance_factor () const { std::unique_ptr<float[]> x_f(new float[n * d]);
std::vector<int> hist(nlist); binary_to_real(n * d, x, x_f.get());
for (int i = 0; i < nlist; i++) { IndexFlatL2 index_tmp(d);
hist[i] = invlists->list_size(i);
if (clustering_index && verbose) {
printf("using clustering_index of dimension %d to do the clustering\n",
clustering_index->d);
} }
return faiss::imbalance_factor(nlist, hist.data()); clus.train(n, x_f.get(), clustering_index ? *clustering_index : index_tmp);
}
void IndexBinaryIVF::print_stats() const { std::unique_ptr<uint8_t[]> x_b(new uint8_t[clus.k * code_size]);
std::vector<int> sizes(40); real_to_binary(d * clus.k, clus.centroids.data(), x_b.get());
for (int i = 0; i < nlist; i++) {
for (int j = 0; j < sizes.size(); j++) { quantizer->add(clus.k, x_b.get());
if ((invlists->list_size(i) >> j) == 0) { quantizer->is_trained = true;
sizes[j]++;
break;
}
}
}
for (int i = 0; i < sizes.size(); i++) {
if (sizes[i]) {
printf("list size in < %d: %d instances\n", 1 << i, sizes[i]);
}
} }
is_trained = true;
} }
void IndexBinaryIVF::merge_from(IndexBinaryIVF &other, idx_t add_id) { void IndexBinaryIVF::merge_from(IndexBinaryIVF &other, idx_t add_id) {
...@@ -315,38 +318,6 @@ void IndexBinaryIVF::replace_invlists(InvertedLists *il, bool own) { ...@@ -315,38 +318,6 @@ void IndexBinaryIVF::replace_invlists(InvertedLists *il, bool own) {
} }
void IndexBinaryIVF::train_q1(size_t n, const uint8_t *x, bool verbose) {
if (quantizer->is_trained && (quantizer->ntotal == nlist)) {
if (verbose)
printf("IVF quantizer does not need training.\n");
} else {
if (verbose)
printf("Training level-1 quantizer on %ld vectors in %dD\n", n, d);
Clustering clus(d, nlist, cp);
quantizer->reset();
std::unique_ptr<float[]> x_f(new float[n * d]);
binary_to_real(n * d, x, x_f.get());
IndexFlatL2 index_tmp(d);
if (clustering_index && verbose) {
printf("using clustering_index of dimension %d to do the clustering\n",
clustering_index->d);
}
clus.train(n, x_f.get(), clustering_index ? *clustering_index : index_tmp);
std::unique_ptr<uint8_t[]> x_b(new uint8_t[clus.k * code_size]);
real_to_binary(d * clus.k, clus.centroids.data(), x_b.get());
quantizer->add(clus.k, x_b.get());
quantizer->is_trained = true;
}
}
namespace { namespace {
using idx_t = Index::idx_t; using idx_t = Index::idx_t;
......
...@@ -58,9 +58,6 @@ struct IndexBinaryIVF : IndexBinary { ...@@ -58,9 +58,6 @@ struct IndexBinaryIVF : IndexBinary {
ClusteringParameters cp; ///< to override default clustering params ClusteringParameters cp; ///< to override default clustering params
Index *clustering_index; ///< to override index used during clustering Index *clustering_index; ///< to override index used during clustering
/// Trains the quantizer and calls train_residual to train sub-quantizers
void train_q1(size_t n, const uint8_t *x, bool verbose);
/** The Inverted file takes a quantizer (an IndexBinary) on input, /** The Inverted file takes a quantizer (an IndexBinary) on input,
* which implements the function mapping a vector to a list * which implements the function mapping a vector to a list
* identifier. The pointer is borrowed: the quantizer should not * identifier. The pointer is borrowed: the quantizer should not
...@@ -74,10 +71,9 @@ struct IndexBinaryIVF : IndexBinary { ...@@ -74,10 +71,9 @@ struct IndexBinaryIVF : IndexBinary {
void reset() override; void reset() override;
/// Trains the quantizer and calls train_residual to train sub-quantizers /// Trains the quantizer
void train(idx_t n, const uint8_t *x) override; void train(idx_t n, const uint8_t *x) override;
/// Quantizes x and calls add_with_key
void add(idx_t n, const uint8_t *x) override; void add(idx_t n, const uint8_t *x) override;
void add_with_ids(idx_t n, const uint8_t *x, const long *xids) override; void add_with_ids(idx_t n, const uint8_t *x, const long *xids) override;
...@@ -174,12 +170,6 @@ struct IndexBinaryIVF : IndexBinary { ...@@ -174,12 +170,6 @@ struct IndexBinaryIVF : IndexBinary {
*/ */
void make_direct_map(bool new_maintain_direct_map=true); void make_direct_map(bool new_maintain_direct_map=true);
/// 1= perfectly balanced, >1: imbalanced
double imbalance_factor() const;
/// display some stats about the inverted lists
void print_stats() const;
void replace_invlists(InvertedLists *il, bool own=false); void replace_invlists(InvertedLists *il, bool own=false);
}; };
......
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#include "FaissAssert.h" #include "FaissAssert.h"
#include "IndexFlat.h" #include "IndexFlat.h"
#include "IndexIVFPQ.h" #include "IndexIVFPQ.h"
#include "AuxIndexStructures.h"
extern "C" { extern "C" {
...@@ -55,7 +56,6 @@ using MinimaxHeap = HNSW::MinimaxHeap; ...@@ -55,7 +56,6 @@ using MinimaxHeap = HNSW::MinimaxHeap;
using storage_idx_t = HNSW::storage_idx_t; using storage_idx_t = HNSW::storage_idx_t;
using NodeDistCloser = HNSW::NodeDistCloser; using NodeDistCloser = HNSW::NodeDistCloser;
using NodeDistFarther = HNSW::NodeDistFarther; using NodeDistFarther = HNSW::NodeDistFarther;
using DistanceComputer = HNSW::DistanceComputer;
HNSWStats hnsw_stats; HNSWStats hnsw_stats;
...@@ -71,6 +71,7 @@ void hnsw_add_vertices(IndexHNSW &index_hnsw, ...@@ -71,6 +71,7 @@ void hnsw_add_vertices(IndexHNSW &index_hnsw,
size_t n, const float *x, size_t n, const float *x,
bool verbose, bool verbose,
bool preset_levels = false) { bool preset_levels = false) {
size_t d = index_hnsw.d;
HNSW & hnsw = index_hnsw.hnsw; HNSW & hnsw = index_hnsw.hnsw;
size_t ntotal = n0 + n; size_t ntotal = n0 + n;
double t0 = getmillisecs(); double t0 = getmillisecs();
...@@ -80,6 +81,10 @@ void hnsw_add_vertices(IndexHNSW &index_hnsw, ...@@ -80,6 +81,10 @@ void hnsw_add_vertices(IndexHNSW &index_hnsw,
n, n0, int(preset_levels)); n, n0, int(preset_levels));
} }
if (n == 0) {
return;
}
int max_level = hnsw.prepare_level_tab(n, preset_levels); int max_level = hnsw.prepare_level_tab(n, preset_levels);
if (verbose) { if (verbose) {
...@@ -119,6 +124,10 @@ void hnsw_add_vertices(IndexHNSW &index_hnsw, ...@@ -119,6 +124,10 @@ void hnsw_add_vertices(IndexHNSW &index_hnsw,
} }
} }
idx_t check_period = InterruptCallback::get_period_hint
(max_level * index_hnsw.d * hnsw.efConstruction);
{ // perform add { // perform add
RandomGenerator rng2(789); RandomGenerator rng2(789);
...@@ -136,18 +145,26 @@ void hnsw_add_vertices(IndexHNSW &index_hnsw, ...@@ -136,18 +145,26 @@ void hnsw_add_vertices(IndexHNSW &index_hnsw,
for (int j = i0; j < i1; j++) for (int j = i0; j < i1; j++)
std::swap(order[j], order[j + rng2.rand_int(i1 - j)]); std::swap(order[j], order[j + rng2.rand_int(i1 - j)]);
#pragma omp parallel bool interrupt = false;
#pragma omp parallel if(i1 > i0 + 100)
{ {
VisitedTable vt (ntotal); VisitedTable vt (ntotal);
DistanceComputer *dis = index_hnsw.get_distance_computer(); DistanceComputer *dis = index_hnsw.get_distance_computer();
ScopeDeleter1<DistanceComputer> del(dis); ScopeDeleter1<DistanceComputer> del(dis);
int prev_display = verbose && omp_get_thread_num() == 0 ? 0 : -1; int prev_display = verbose && omp_get_thread_num() == 0 ? 0 : -1;
size_t counter = 0;
#pragma omp for schedule(dynamic) #pragma omp for schedule(dynamic)
for (int i = i0; i < i1; i++) { for (int i = i0; i < i1; i++) {
storage_idx_t pt_id = order[i]; storage_idx_t pt_id = order[i];
dis->set_query (x + (pt_id - n0) * dis->d); dis->set_query (x + (pt_id - n0) * d);
// cannot break
if (interrupt) {
continue;
}
hnsw.add_with_locks(*dis, pt_level, pt_id, locks, vt); hnsw.add_with_locks(*dis, pt_level, pt_id, locks, vt);
...@@ -156,7 +173,21 @@ void hnsw_add_vertices(IndexHNSW &index_hnsw, ...@@ -156,7 +173,21 @@ void hnsw_add_vertices(IndexHNSW &index_hnsw,
printf(" %d / %d\r", i - i0, i1 - i0); printf(" %d / %d\r", i - i0, i1 - i0);
fflush(stdout); fflush(stdout);
} }
if (counter % check_period == 0) {
#pragma omp critical
{
if (InterruptCallback::is_interrupted ()) {
interrupt = true;
}
}
}
counter++;
}
} }
if (interrupt) {
FAISS_THROW_MSG ("computation interrupted");
} }
i1 = i0; i1 = i0;
} }
...@@ -214,16 +245,22 @@ void IndexHNSW::search (idx_t n, const float *x, idx_t k, ...@@ -214,16 +245,22 @@ void IndexHNSW::search (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels) const float *distances, idx_t *labels) const
{ {
size_t nreorder = 0;
#pragma omp parallel idx_t check_period = InterruptCallback::get_period_hint (
hnsw.max_level * d * hnsw.efSearch);
for (idx_t i0 = 0; i0 < n; i0 += check_period) {
idx_t i1 = std::min(i0 + check_period, n);
#pragma omp parallel reduction(+ : nreorder)
{ {
VisitedTable vt (ntotal); VisitedTable vt (ntotal);
DistanceComputer *dis = get_distance_computer(); DistanceComputer *dis = get_distance_computer();
ScopeDeleter1<DistanceComputer> del(dis); ScopeDeleter1<DistanceComputer> del(dis);
size_t nreorder = 0;
#pragma omp for #pragma omp for
for(idx_t i = 0; i < n; i++) { for(idx_t i = i0; i < i1; i++) {
idx_t * idxi = labels + i * k; idx_t * idxi = labels + i * k;
float * simi = distances + i * k; float * simi = distances + i * k;
dis->set_query(x + i * d); dis->set_query(x + i * d);
...@@ -245,14 +282,13 @@ void IndexHNSW::search (idx_t n, const float *x, idx_t k, ...@@ -245,14 +282,13 @@ void IndexHNSW::search (idx_t n, const float *x, idx_t k,
maxheap_heapify (k_reorder, simi, idxi, simi, idxi, k_reorder); maxheap_heapify (k_reorder, simi, idxi, simi, idxi, k_reorder);
maxheap_reorder (k_reorder, simi, idxi); maxheap_reorder (k_reorder, simi, idxi);
} }
} }
#pragma omp critical
{
hnsw_stats.nreorder += nreorder;
} }
InterruptCallback::check ();
} }
hnsw_stats.nreorder += nreorder;
} }
...@@ -552,7 +588,7 @@ namespace { ...@@ -552,7 +588,7 @@ namespace {
// storage that explicitly reconstructs vectors before computing distances // storage that explicitly reconstructs vectors before computing distances
struct GenericDistanceComputer: DistanceComputer { struct GenericDistanceComputer: DistanceComputer {
size_t d;
const Index & storage; const Index & storage;
std::vector<float> buf; std::vector<float> buf;
const float *q; const float *q;
...@@ -563,13 +599,13 @@ struct GenericDistanceComputer: DistanceComputer { ...@@ -563,13 +599,13 @@ struct GenericDistanceComputer: DistanceComputer {
buf.resize(d * 2); buf.resize(d * 2);
} }
float operator () (storage_idx_t i) override float operator () (idx_t i) override
{ {
storage.reconstruct(i, buf.data()); storage.reconstruct(i, buf.data());
return fvec_L2sqr(q, buf.data(), d); return fvec_L2sqr(q, buf.data(), d);
} }
float symmetric_dis(storage_idx_t i, storage_idx_t j) override float symmetric_dis(idx_t i, idx_t j) override
{ {
storage.reconstruct(i, buf.data()); storage.reconstruct(i, buf.data());
storage.reconstruct(j, buf.data() + d); storage.reconstruct(j, buf.data() + d);
...@@ -830,18 +866,19 @@ namespace { ...@@ -830,18 +866,19 @@ namespace {
struct FlatL2Dis: DistanceComputer { struct FlatL2Dis: DistanceComputer {
size_t d;
Index::idx_t nb; Index::idx_t nb;
const float *q; const float *q;
const float *b; const float *b;
size_t ndis; size_t ndis;
float operator () (storage_idx_t i) override float operator () (idx_t i) override
{ {
ndis++; ndis++;
return (fvec_L2sqr(q, b + i * d, d)); return (fvec_L2sqr(q, b + i * d, d));
} }
float symmetric_dis(storage_idx_t i, storage_idx_t j) override float symmetric_dis(idx_t i, idx_t j) override
{ {
return (fvec_L2sqr(b + j * d, b + i * d, d)); return (fvec_L2sqr(b + j * d, b + i * d, d));
} }
...@@ -860,7 +897,7 @@ struct FlatL2Dis: DistanceComputer { ...@@ -860,7 +897,7 @@ struct FlatL2Dis: DistanceComputer {
q = x; q = x;
} }
virtual ~FlatL2Dis () { ~FlatL2Dis() override {
#pragma omp critical #pragma omp critical
{ {
hnsw_stats.ndis += ndis; hnsw_stats.ndis += ndis;
...@@ -903,6 +940,7 @@ namespace { ...@@ -903,6 +940,7 @@ namespace {
struct PQDis: DistanceComputer { struct PQDis: DistanceComputer {
size_t d;
Index::idx_t nb; Index::idx_t nb;
const uint8_t *codes; const uint8_t *codes;
size_t code_size; size_t code_size;
...@@ -911,7 +949,7 @@ struct PQDis: DistanceComputer { ...@@ -911,7 +949,7 @@ struct PQDis: DistanceComputer {
std::vector<float> precomputed_table; std::vector<float> precomputed_table;
size_t ndis; size_t ndis;
float operator () (storage_idx_t i) override float operator () (idx_t i) override
{ {
const uint8_t *code = codes + i * code_size; const uint8_t *code = codes + i * code_size;
const float *dt = precomputed_table.data(); const float *dt = precomputed_table.data();
...@@ -924,7 +962,7 @@ struct PQDis: DistanceComputer { ...@@ -924,7 +962,7 @@ struct PQDis: DistanceComputer {
return accu; return accu;
} }
float symmetric_dis(storage_idx_t i, storage_idx_t j) override float symmetric_dis(idx_t i, idx_t j) override
{ {
const float * sdci = sdc; const float * sdci = sdc;
float accu = 0; float accu = 0;
...@@ -955,7 +993,7 @@ struct PQDis: DistanceComputer { ...@@ -955,7 +993,7 @@ struct PQDis: DistanceComputer {
pq.compute_distance_table(x, precomputed_table.data()); pq.compute_distance_table(x, precomputed_table.data());
} }
virtual ~PQDis () { ~PQDis() override {
#pragma omp critical #pragma omp critical
{ {
hnsw_stats.ndis += ndis; hnsw_stats.ndis += ndis;
...@@ -995,56 +1033,10 @@ DistanceComputer * IndexHNSWPQ::get_distance_computer () const ...@@ -995,56 +1033,10 @@ DistanceComputer * IndexHNSWPQ::get_distance_computer () const
**************************************************************/ **************************************************************/
namespace {
struct SQDis: DistanceComputer {
Index::idx_t nb;
const uint8_t *codes;
size_t code_size;
const ScalarQuantizer & sq;
const float *q;
ScalarQuantizer::DistanceComputer * dc;
float operator () (storage_idx_t i) override
{
const uint8_t *code = codes + i * code_size;
return dc->compute_distance (q, code);
}
float symmetric_dis(storage_idx_t i, storage_idx_t j) override
{
const uint8_t *codei = codes + i * code_size;
const uint8_t *codej = codes + j * code_size;
return dc->compute_code_distance (codei, codej);
}
SQDis(const IndexScalarQuantizer& storage, const float* /*q*/ = nullptr)
: sq(storage.sq) {
nb = storage.ntotal;
d = storage.d;
codes = storage.codes.data();
code_size = sq.code_size;
dc = sq.get_distance_computer();
}
void set_query(const float *x) override {
q = x;
}
virtual ~SQDis () {
delete dc;
}
};
} // namespace
IndexHNSWSQ::IndexHNSWSQ(int d, ScalarQuantizer::QuantizerType qtype, int M): IndexHNSWSQ::IndexHNSWSQ(int d, ScalarQuantizer::QuantizerType qtype, int M):
IndexHNSW (new IndexScalarQuantizer (d, qtype), M) IndexHNSW (new IndexScalarQuantizer (d, qtype), M)
{ {
is_trained = false;
own_fields = true; own_fields = true;
} }
...@@ -1052,7 +1044,8 @@ IndexHNSWSQ::IndexHNSWSQ() {} ...@@ -1052,7 +1044,8 @@ IndexHNSWSQ::IndexHNSWSQ() {}
DistanceComputer * IndexHNSWSQ::get_distance_computer () const DistanceComputer * IndexHNSWSQ::get_distance_computer () const
{ {
return new SQDis (*dynamic_cast<IndexScalarQuantizer*> (storage)); return (dynamic_cast<const IndexScalarQuantizer*> (storage))->
get_distance_computer ();
} }
...@@ -1078,7 +1071,7 @@ namespace { ...@@ -1078,7 +1071,7 @@ namespace {
struct Distance2Level: DistanceComputer { struct Distance2Level: DistanceComputer {
size_t d;
const Index2Layer & storage; const Index2Layer & storage;
std::vector<float> buf; std::vector<float> buf;
const float *q; const float *q;
...@@ -1093,7 +1086,7 @@ struct Distance2Level: DistanceComputer { ...@@ -1093,7 +1086,7 @@ struct Distance2Level: DistanceComputer {
buf.resize(2 * d); buf.resize(2 * d);
} }
float symmetric_dis(storage_idx_t i, storage_idx_t j) override float symmetric_dis(idx_t i, idx_t j) override
{ {
storage.reconstruct(i, buf.data()); storage.reconstruct(i, buf.data());
storage.reconstruct(j, buf.data() + d); storage.reconstruct(j, buf.data() + d);
...@@ -1122,7 +1115,7 @@ struct DistanceXPQ4: Distance2Level { ...@@ -1122,7 +1115,7 @@ struct DistanceXPQ4: Distance2Level {
pq_l1_tab = quantizer->xb.data(); pq_l1_tab = quantizer->xb.data();
} }
float operator () (storage_idx_t i) override float operator () (idx_t i) override
{ {
#ifdef __SSE__ #ifdef __SSE__
const uint8_t *code = storage.codes.data() + i * storage.code_size; const uint8_t *code = storage.codes.data() + i * storage.code_size;
...@@ -1173,7 +1166,7 @@ struct Distance2xXPQ4: Distance2Level { ...@@ -1173,7 +1166,7 @@ struct Distance2xXPQ4: Distance2Level {
pq_l1_tab = mi->pq.centroids.data(); pq_l1_tab = mi->pq.centroids.data();
} }
float operator () (storage_idx_t i) override float operator () (idx_t i) override
{ {
const uint8_t *code = storage.codes.data() + i * storage.code_size; const uint8_t *code = storage.codes.data() + i * storage.code_size;
long key01 = 0; long key01 = 0;
......
...@@ -86,7 +86,7 @@ struct IndexHNSW : Index { ...@@ -86,7 +86,7 @@ struct IndexHNSW : Index {
~IndexHNSW() override; ~IndexHNSW() override;
// get a DistanceComputer object for this kind of storage // get a DistanceComputer object for this kind of storage
virtual HNSW::DistanceComputer *get_distance_computer() const = 0; virtual DistanceComputer *get_distance_computer() const = 0;
void add(idx_t n, const float *x) override; void add(idx_t n, const float *x) override;
...@@ -138,7 +138,7 @@ struct IndexHNSW : Index { ...@@ -138,7 +138,7 @@ struct IndexHNSW : Index {
struct IndexHNSWFlat : IndexHNSW { struct IndexHNSWFlat : IndexHNSW {
IndexHNSWFlat(); IndexHNSWFlat();
IndexHNSWFlat(int d, int M); IndexHNSWFlat(int d, int M);
HNSW::DistanceComputer * DistanceComputer *
get_distance_computer() const override; get_distance_computer() const override;
}; };
...@@ -149,7 +149,7 @@ struct IndexHNSWPQ : IndexHNSW { ...@@ -149,7 +149,7 @@ struct IndexHNSWPQ : IndexHNSW {
IndexHNSWPQ(); IndexHNSWPQ();
IndexHNSWPQ(int d, int pq_m, int M); IndexHNSWPQ(int d, int pq_m, int M);
void train(idx_t n, const float* x) override; void train(idx_t n, const float* x) override;
HNSW::DistanceComputer * DistanceComputer *
get_distance_computer() const override; get_distance_computer() const override;
}; };
...@@ -159,7 +159,7 @@ struct IndexHNSWPQ : IndexHNSW { ...@@ -159,7 +159,7 @@ struct IndexHNSWPQ : IndexHNSW {
struct IndexHNSWSQ : IndexHNSW { struct IndexHNSWSQ : IndexHNSW {
IndexHNSWSQ(); IndexHNSWSQ();
IndexHNSWSQ(int d, ScalarQuantizer::QuantizerType qtype, int M); IndexHNSWSQ(int d, ScalarQuantizer::QuantizerType qtype, int M);
HNSW::DistanceComputer * DistanceComputer *
get_distance_computer() const override; get_distance_computer() const override;
}; };
...@@ -168,7 +168,7 @@ struct IndexHNSWSQ : IndexHNSW { ...@@ -168,7 +168,7 @@ struct IndexHNSWSQ : IndexHNSW {
struct IndexHNSW2Level : IndexHNSW { struct IndexHNSW2Level : IndexHNSW {
IndexHNSW2Level(); IndexHNSW2Level();
IndexHNSW2Level(Index *quantizer, size_t nlist, int m_pq, int M); IndexHNSW2Level(Index *quantizer, size_t nlist, int m_pq, int M);
HNSW::DistanceComputer * DistanceComputer *
get_distance_computer() const override; get_distance_computer() const override;
void flip_to_ivf(); void flip_to_ivf();
......
...@@ -204,12 +204,18 @@ void IndexIVF::search_preassigned (idx_t n, const float *x, idx_t k, ...@@ -204,12 +204,18 @@ void IndexIVF::search_preassigned (idx_t n, const float *x, idx_t k,
using HeapForIP = CMin<float, idx_t>; using HeapForIP = CMin<float, idx_t>;
using HeapForL2 = CMax<float, idx_t>; using HeapForL2 = CMax<float, idx_t>;
idx_t check_period = InterruptCallback::get_period_hint
(nprobe * ntotal * d / nlist);
for (idx_t i0 = 0; i0 < n; i0 += check_period) {
idx_t i1 = std::min(i0 + check_period, n);
#pragma omp parallel reduction(+: nlistv, ndis, nheap) #pragma omp parallel reduction(+: nlistv, ndis, nheap)
{ {
InvertedListScanner *scanner = get_InvertedListScanner(store_pairs); InvertedListScanner *scanner = get_InvertedListScanner(store_pairs);
ScopeDeleter1<InvertedListScanner> del(scanner); ScopeDeleter1<InvertedListScanner> del(scanner);
#pragma omp for #pragma omp for
for (size_t i = 0; i < n; i++) { for (size_t i = i0; i < i1; i++) {
// loop over queries // loop over queries
const float * xi = x + i * d; const float * xi = x + i * d;
scanner->set_query (xi); scanner->set_query (xi);
...@@ -248,7 +254,6 @@ void IndexIVF::search_preassigned (idx_t n, const float *x, idx_t k, ...@@ -248,7 +254,6 @@ void IndexIVF::search_preassigned (idx_t n, const float *x, idx_t k,
nlistv++; nlistv++;
InvertedLists::ScopedCodes scodes (invlists, key); InvertedLists::ScopedCodes scodes (invlists, key);
const Index::idx_t * ids = store_pairs ? nullptr : const Index::idx_t * ids = store_pairs ? nullptr :
invlists->get_ids (key); invlists->get_ids (key);
...@@ -271,9 +276,10 @@ void IndexIVF::search_preassigned (idx_t n, const float *x, idx_t k, ...@@ -271,9 +276,10 @@ void IndexIVF::search_preassigned (idx_t n, const float *x, idx_t k,
} else { } else {
heap_reorder<HeapForL2> (k, simi, idxi); heap_reorder<HeapForL2> (k, simi, idxi);
} }
} // parallel for } // parallel for
} // parallel } // parallel
InterruptCallback::check ();
} // loop over blocks
indexIVF_stats.nq += n; indexIVF_stats.nq += n;
indexIVF_stats.nlist += nlistv; indexIVF_stats.nlist += nlistv;
...@@ -284,10 +290,83 @@ void IndexIVF::search_preassigned (idx_t n, const float *x, idx_t k, ...@@ -284,10 +290,83 @@ void IndexIVF::search_preassigned (idx_t n, const float *x, idx_t k,
void IndexIVF::range_search (idx_t nx, const float *x, float radius,
RangeSearchResult *result) const
{
long * keys = new long [nx * nprobe];
ScopeDeleter<long> del (keys);
float * coarse_dis = new float [nx * nprobe];
ScopeDeleter<float> del2 (coarse_dis);
double t0 = getmillisecs();
quantizer->search (nx, x, nprobe, coarse_dis, keys);
indexIVF_stats.quantization_time += getmillisecs() - t0;
t0 = getmillisecs();
invlists->prefetch_lists (keys, nx * nprobe);
size_t nlistv = 0, ndis = 0;
bool store_pairs = false;
#pragma omp parallel reduction(+: nlistv, ndis)
{
RangeSearchPartialResult pres(result);
InvertedListScanner *scanner = get_InvertedListScanner(store_pairs);
ScopeDeleter1<InvertedListScanner> del3(scanner);
#pragma omp for
for (size_t i = 0; i < nx; i++) {
const float * xi = x + i * d;
scanner->set_query (xi);
const long * keysi = keys + i * nprobe;
RangeQueryResult & qres = pres.new_result (i);
for (size_t ik = 0; ik < nprobe; ik++) {
long key = keysi[ik]; /* select the list */
if (key < 0) continue;
FAISS_THROW_IF_NOT_FMT (key < (long) nlist,
"Invalid key=%ld at ik=%ld nlist=%ld\n",
key, ik, nlist);
const size_t list_size = invlists->list_size(key);
if (list_size == 0) continue;
InvertedLists::ScopedCodes scodes (invlists, key);
InvertedLists::ScopedIds ids (invlists, key);
scanner->set_list (key, coarse_dis[i * nprobe + ik]);
nlistv++;
ndis += list_size;
scanner->scan_codes_range (list_size, scodes.get(),
ids.get(), radius, qres);
}
}
pres.finalize ();
}
indexIVF_stats.search_time += getmillisecs() - t0;
indexIVF_stats.nq += nx;
indexIVF_stats.nlist += nlistv;
indexIVF_stats.ndis += ndis;
}
InvertedListScanner *IndexIVF::get_InvertedListScanner (
bool /*store_pairs*/) const
{
return nullptr;
}
void IndexIVF::reconstruct (idx_t key, float* recons) const void IndexIVF::reconstruct (idx_t key, float* recons) const
{ {
FAISS_THROW_IF_NOT_MSG (direct_map.size() == ntotal, FAISS_THROW_IF_NOT_MSG (direct_map.size() == ntotal,
"direct map is not initialized"); "direct map is not initialized");
FAISS_THROW_IF_NOT_MSG (key >= 0 && key < direct_map.size(),
"invalid key");
long list_no = direct_map[key] >> 32; long list_no = direct_map[key] >> 32;
long offset = direct_map[key] & 0xffffffff; long offset = direct_map[key] & 0xffffffff;
reconstruct_from_offset (list_no, offset, recons); reconstruct_from_offset (list_no, offset, recons);
...@@ -430,37 +509,6 @@ void IndexIVF::train_residual(idx_t /*n*/, const float* /*x*/) { ...@@ -430,37 +509,6 @@ void IndexIVF::train_residual(idx_t /*n*/, const float* /*x*/) {
} }
double IndexIVF::imbalance_factor () const
{
std::vector<int> hist (nlist);
for (int i = 0; i < nlist; i++) {
hist[i] = invlists->list_size(i);
}
return faiss::imbalance_factor (nlist, hist.data());
}
void IndexIVF::print_stats () const
{
std::vector<int> sizes(40);
for (int i = 0; i < nlist; i++) {
for (int j = 0; j < sizes.size(); j++) {
if ((invlists->list_size(i) >> j) == 0) {
sizes[j]++;
break;
}
}
}
for (int i = 0; i < sizes.size(); i++) {
if (sizes[i]) {
printf ("list size in < %d: %d instances\n",
1 << i, sizes[i]);
}
}
}
void IndexIVF::check_compatible_for_merge (const IndexIVF &other) const void IndexIVF::check_compatible_for_merge (const IndexIVF &other) const
{ {
// minimal sanity checks // minimal sanity checks
...@@ -581,5 +629,15 @@ void IndexIVFStats::reset() ...@@ -581,5 +629,15 @@ void IndexIVFStats::reset()
IndexIVFStats indexIVF_stats; IndexIVFStats indexIVF_stats;
void InvertedListScanner::scan_codes_range (size_t ,
const uint8_t *,
const idx_t *,
float ,
RangeQueryResult &) const
{
FAISS_THROW_MSG ("scan_codes_range not implemented");
}
} // namespace faiss } // namespace faiss
...@@ -160,14 +160,15 @@ struct IndexIVF: Index, Level1Quantizer { ...@@ -160,14 +160,15 @@ struct IndexIVF: Index, Level1Quantizer {
) const; ) const;
/** assign the vectors, then call search_preassign */ /** assign the vectors, then call search_preassign */
virtual void search (idx_t n, const float *x, idx_t k, void search (idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels) const override; float *distances, idx_t *labels) const override;
void range_search (idx_t n, const float* x, float radius,
RangeSearchResult* result) const override;
/// get a scanner for this index (store_pairs means ignore labels) /// get a scanner for this index (store_pairs means ignore labels)
virtual InvertedListScanner *get_InvertedListScanner ( virtual InvertedListScanner *get_InvertedListScanner (
bool store_pairs=false) const { bool store_pairs=false) const;
return nullptr;
}
void reconstruct (idx_t key, float* recons) const override; void reconstruct (idx_t key, float* recons) const override;
...@@ -242,18 +243,14 @@ struct IndexIVF: Index, Level1Quantizer { ...@@ -242,18 +243,14 @@ struct IndexIVF: Index, Level1Quantizer {
*/ */
void make_direct_map (bool new_maintain_direct_map=true); void make_direct_map (bool new_maintain_direct_map=true);
/// 1= perfectly balanced, >1: imbalanced
double imbalance_factor () const;
/// display some stats about the inverted lists
void print_stats () const;
/// replace the inverted lists, old one is deallocated if own_invlists /// replace the inverted lists, old one is deallocated if own_invlists
void replace_invlists (InvertedLists *il, bool own=false); void replace_invlists (InvertedLists *il, bool own=false);
IndexIVF (); IndexIVF ();
}; };
class RangeQueryResult;
/** Object that handles a query. The inverted lists to scan are /** Object that handles a query. The inverted lists to scan are
* provided externally. The object has a lot of state, but * provided externally. The object has a lot of state, but
* distance_to_code and scan_codes can be called in multiple * distance_to_code and scan_codes can be called in multiple
...@@ -271,8 +268,8 @@ struct InvertedListScanner { ...@@ -271,8 +268,8 @@ struct InvertedListScanner {
/// compute a single query-to-code distance /// compute a single query-to-code distance
virtual float distance_to_code (const uint8_t *code) const = 0; virtual float distance_to_code (const uint8_t *code) const = 0;
/** compute the distances to codes. (distances, labels) should be /** scan a set of codes, compute distances to current query and
* organized ad a min- or max-heap * update heap of results if necessary.
* *
* @param n number of codes to scan * @param n number of codes to scan
* @param codes codes to scan (n * code_size) * @param codes codes to scan (n * code_size)
...@@ -280,6 +277,7 @@ struct InvertedListScanner { ...@@ -280,6 +277,7 @@ struct InvertedListScanner {
* @param distances heap distances (size k) * @param distances heap distances (size k)
* @param labels heap labels (size k) * @param labels heap labels (size k)
* @param k heap size * @param k heap size
* @return number of heap updates performed
*/ */
virtual size_t scan_codes (size_t n, virtual size_t scan_codes (size_t n,
const uint8_t *codes, const uint8_t *codes,
...@@ -287,6 +285,16 @@ struct InvertedListScanner { ...@@ -287,6 +285,16 @@ struct InvertedListScanner {
float *distances, idx_t *labels, float *distances, idx_t *labels,
size_t k) const = 0; size_t k) const = 0;
/** scan a set of codes, compute distances to current query and
* update results if distances are below radius
*
* (default implementation fails) */
virtual void scan_codes_range (size_t n,
const uint8_t *codes,
const idx_t *ids,
float radius,
RangeQueryResult &result) const;
virtual ~InvertedListScanner () {} virtual ~InvertedListScanner () {}
}; };
......
...@@ -137,6 +137,25 @@ struct IVFFlatScanner: InvertedListScanner { ...@@ -137,6 +137,25 @@ struct IVFFlatScanner: InvertedListScanner {
return nup; return nup;
} }
void scan_codes_range (size_t list_size,
const uint8_t *codes,
const idx_t *ids,
float radius,
RangeQueryResult & res) const override
{
const float *list_vecs = (const float*)codes;
for (size_t j = 0; j < list_size; j++) {
const float * yj = list_vecs + d * j;
float dis = metric == METRIC_INNER_PRODUCT ?
fvec_inner_product (xi, yj, d) : fvec_L2sqr (xi, yj, d);
if (C::cmp (radius, dis)) {
long id = store_pairs ? (list_no << 32 | j) : ids[j];
res.add (dis, id);
}
}
}
}; };
...@@ -168,57 +187,6 @@ InvertedListScanner* IndexIVFFlat::get_InvertedListScanner ...@@ -168,57 +187,6 @@ InvertedListScanner* IndexIVFFlat::get_InvertedListScanner
} }
void IndexIVFFlat::range_search (idx_t nx, const float *x, float radius,
RangeSearchResult *result) const
{
idx_t * keys = new idx_t [nx * nprobe];
ScopeDeleter<idx_t> del (keys);
quantizer->assign (nx, x, keys, nprobe);
#pragma omp parallel
{
RangeSearchPartialResult pres(result);
for (size_t i = 0; i < nx; i++) {
const float * xi = x + i * d;
const long * keysi = keys + i * nprobe;
RangeSearchPartialResult::QueryResult & qres =
pres.new_result (i);
for (size_t ik = 0; ik < nprobe; ik++) {
long key = keysi[ik]; /* select the list */
if (key < 0 || key >= (long) nlist) {
fprintf (stderr, "Invalid key=%ld at ik=%ld nlist=%ld\n",
key, ik, nlist);
throw;
}
const size_t list_size = invlists->list_size(key);
InvertedLists::ScopedCodes scodes (invlists, key);
const float * list_vecs = (const float*)scodes.get();
InvertedLists::ScopedIds ids (invlists, key);
for (size_t j = 0; j < list_size; j++) {
const float * yj = list_vecs + d * j;
if (metric_type == METRIC_L2) {
float disij = fvec_L2sqr (xi, yj, d);
if (disij < radius) {
qres.add (disij, ids[j]);
}
} else if (metric_type == METRIC_INNER_PRODUCT) {
float disij = fvec_inner_product(xi, yj, d);
if (disij > radius) {
qres.add (disij, ids[j]);
}
}
}
}
}
pres.finalize ();
}
}
void IndexIVFFlat::update_vectors (int n, idx_t *new_ids, const float *x) void IndexIVFFlat::update_vectors (int n, idx_t *new_ids, const float *x)
{ {
...@@ -272,18 +240,6 @@ IndexIVFFlatDedup::IndexIVFFlatDedup ( ...@@ -272,18 +240,6 @@ IndexIVFFlatDedup::IndexIVFFlatDedup (
IndexIVFFlat (quantizer, d, nlist_, metric_type) IndexIVFFlat (quantizer, d, nlist_, metric_type)
{} {}
// from Python's stringobject.c
static uint64_t hash_bytes (const uint8_t *bytes, long n) {
const uint8_t *p = bytes;
uint64_t x = (uint64_t)(*p) << 7;
long len = n;
while (--len >= 0) {
x = (1000003*x) ^ *p++;
}
x ^= n;
return x;
}
void IndexIVFFlatDedup::train(idx_t n, const float* x) void IndexIVFFlatDedup::train(idx_t n, const float* x)
{ {
......
...@@ -39,24 +39,10 @@ struct IndexIVFFlat: IndexIVF { ...@@ -39,24 +39,10 @@ struct IndexIVFFlat: IndexIVF {
const idx_t *list_nos, const idx_t *list_nos,
uint8_t * codes) const override; uint8_t * codes) const override;
/*
void search_preassigned (idx_t n, const float *x, idx_t k,
const idx_t *assign,
const float *centroid_dis,
float *distances, idx_t *labels,
bool store_pairs,
const IVFSearchParameters *params=nullptr
) const override;
*/
InvertedListScanner *get_InvertedListScanner (bool store_pairs) InvertedListScanner *get_InvertedListScanner (bool store_pairs)
const override; const override;
void range_search(
idx_t n,
const float* x,
float radius,
RangeSearchResult* result) const override;
/** Update a subset of vectors. /** Update a subset of vectors.
* *
* The index must have a direct_map * The index must have a direct_map
......
...@@ -796,7 +796,7 @@ struct MinSumK { ...@@ -796,7 +796,7 @@ struct MinSumK {
// enqueue followers // enqueue followers
long ii = ti; long ii = ti;
for (int m = 0; m < M; m++) { for (int m = 0; m < M; m++) {
long n = ii & ((1 << nbit) - 1); long n = ii & ((1L << nbit) - 1);
ii >>= nbit; ii >>= nbit;
if (n + 1 >= N) continue; if (n + 1 >= N) continue;
...@@ -819,8 +819,8 @@ struct MinSumK { ...@@ -819,8 +819,8 @@ struct MinSumK {
} }
long ti = 0; long ti = 0;
for (int m = 0; m < M; m++) { for (int m = 0; m < M; m++) {
long n = ii & ((1 << nbit) - 1); long n = ii & ((1L << nbit) - 1);
ti += ssx[m].get_ord(n) << (nbit * m); ti += long(ssx[m].get_ord(n)) << (nbit * m);
ii >>= nbit; ii >>= nbit;
} }
terms[k] = ti; terms[k] = ti;
......
/**
* Copyright (c) 2015-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD+Patents license found in the
* LICENSE file in the root directory of this source tree.
*/
#include "IndexReplicas.h"
#include "FaissAssert.h"
namespace faiss {
template<class IndexClass>
IndexReplicasTemplate<IndexClass>::IndexReplicasTemplate()
: own_fields(false) {
}
template<class IndexClass>
IndexReplicasTemplate<IndexClass>::~IndexReplicasTemplate() {
if (own_fields) {
for (auto& index : this->indices_)
delete index.first;
}
}
template<class IndexClass>
void IndexReplicasTemplate<IndexClass>::addIndex(IndexClass* index) {
// Make sure that the parameters are the same for all prior indices
if (!indices_.empty()) {
auto& existing = indices_.front().first;
FAISS_THROW_IF_NOT_FMT(index->d == existing->d,
"IndexReplicas::addIndex: dimension mismatch for "
"newly added index; prior index has dim %d, "
"new index has %d",
existing->d, index->d);
FAISS_THROW_IF_NOT_FMT(index->ntotal == existing->ntotal,
"IndexReplicas::addIndex: newly added index does "
"not have same number of vectors as prior index; "
"prior index has %ld vectors, new index has %ld",
existing->ntotal, index->ntotal);
FAISS_THROW_IF_NOT_MSG(index->metric_type == existing->metric_type,
"IndexReplicas::addIndex: newly added index is "
"of different metric type than old index");
} else {
// Set our parameters
// FIXME: this is a little bit weird
this->d = index->d;
this->ntotal = index->ntotal;
this->verbose = index->verbose;
this->is_trained = index->is_trained;
this->metric_type = index->metric_type;
}
this->indices_.emplace_back(
std::make_pair(index,
std::unique_ptr<WorkerThread>(new WorkerThread)));
}
template<class IndexClass>
void IndexReplicasTemplate<IndexClass>::removeIndex(IndexClass* index) {
for (auto it = this->indices_.begin(); it != indices_.end(); ++it) {
if (it->first == index) {
// This is our index; stop the worker thread before removing it,
// to ensure that it has finished before function exit
it->second->stop();
it->second->waitForThreadExit();
this->indices_.erase(it);
return;
}
}
// could not find our index
FAISS_THROW_MSG("IndexReplicas::removeIndex: index not found");
}
template<class IndexClass>
void IndexReplicasTemplate<IndexClass>::runOnIndex(std::function<void(IndexClass*)> f) {
FAISS_THROW_IF_NOT_MSG(!indices_.empty(), "no replicas in index");
std::vector<std::future<bool>> v;
for (auto& index : this->indices_) {
auto indexPtr = index.first;
v.emplace_back(index.second->add([indexPtr, f](){ f(indexPtr); }));
}
// Blocking wait for completion
for (auto& func : v) {
func.get();
}
}
template<class IndexClass>
void IndexReplicasTemplate<IndexClass>::reset() {
runOnIndex([](IndexClass* index){ index->reset(); });
this->ntotal = 0;
}
template<class IndexClass>
void IndexReplicasTemplate<IndexClass>::train(idx_t n, const component_t* x) {
runOnIndex([n, x](IndexClass* index){ index->train(n, x); });
}
template<class IndexClass>
void IndexReplicasTemplate<IndexClass>::add(idx_t n, const component_t* x) {
runOnIndex([n, x](IndexClass* index){ index->add(n, x); });
this->ntotal += n;
}
template<class IndexClass>
void IndexReplicasTemplate<IndexClass>::reconstruct(idx_t n, component_t* x) const {
FAISS_THROW_IF_NOT_MSG(!indices_.empty(), "no replicas in index");
indices_[0].first->reconstruct (n, x);
}
template<class IndexClass>
void IndexReplicasTemplate<IndexClass>::search(
idx_t n,
const component_t* x,
idx_t k,
distance_t* distances,
idx_t* labels) const {
FAISS_THROW_IF_NOT_MSG(!indices_.empty(), "no replicas in index");
if (n == 0) {
return;
}
auto dim = indices_.front().first->d;
std::vector<std::future<bool>> v;
// Partition the query by the number of indices we have
auto queriesPerIndex =
(faiss::Index::idx_t) (n + indices_.size() - 1) / indices_.size();
FAISS_ASSERT(n / queriesPerIndex <= indices_.size());
for (faiss::Index::idx_t i = 0; i < indices_.size(); ++i) {
auto base = i * queriesPerIndex;
if (base >= n) {
break;
}
auto numForIndex = std::min(queriesPerIndex, n - base);
size_t components_per_vec = sizeof(component_t) == 1 ? (dim + 7) / 8 : dim;
auto queryStart = x + base * components_per_vec;
auto distancesStart = distances + base * k;
auto labelsStart = labels + base * k;
auto indexPtr = indices_[i].first;
auto fn =
[indexPtr, numForIndex, queryStart, k, distancesStart, labelsStart]() {
indexPtr->search(numForIndex, queryStart,
k, distancesStart, labelsStart);
};
v.emplace_back(indices_[i].second->add(std::move(fn)));
}
// Blocking wait for completion
for (auto& f : v) {
f.get();
}
}
// explicit instanciations
template struct IndexReplicasTemplate<Index>;
template struct IndexReplicasTemplate<IndexBinary>;
} // namespace
/**
* Copyright (c) 2015-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD+Patents license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include "Index.h"
#include "IndexBinary.h"
#include "WorkerThread.h"
#include <memory>
#include <vector>
namespace faiss {
/// Takes individual faiss::Index instances, and splits queries for
/// sending to each Index instance, and joins the results together
/// when done.
/// Each index is managed by a separate CPU thread.
template<class IndexClass>
class IndexReplicasTemplate : public IndexClass {
public:
using idx_t = typename IndexClass::idx_t;
using component_t = typename IndexClass::component_t;
using distance_t = typename IndexClass::distance_t;
IndexReplicasTemplate();
~IndexReplicasTemplate() override;
/// Adds an index that is managed by ourselves.
/// WARNING: once an index is added to this proxy, it becomes unsafe
/// to touch it from any other thread than that on which is managing
/// it, until we are shut down. Use runOnIndex to perform work on it
/// instead.
void addIndex(IndexClass* index);
/// Remove an index that is managed by ourselves.
/// This will flush all pending work on that index, and then shut
/// down its managing thread, and will remove the index.
void removeIndex(IndexClass* index);
/// Run a function on all indices, in the thread that the index is
/// managed in.
void runOnIndex(std::function<void(IndexClass*)> f);
/// faiss::Index API
/// All indices receive the same call
void reset() override;
/// faiss::Index API
/// All indices receive the same call
virtual void train(idx_t n, const component_t* x) override;
/// faiss::Index API
/// All indices receive the same call
virtual void add(idx_t n, const component_t* x) override;
/// faiss::Index API
/// Query is partitioned into a slice for each sub-index
/// split by ceil(n / #indices) for our sub-indices
virtual void search(idx_t n,
const component_t* x,
idx_t k,
distance_t* distances,
idx_t* labels) const override;
/// reconstructs from the first index
virtual void reconstruct(idx_t, component_t *v) const override;
bool own_fields;
int count() const {return indices_.size(); }
IndexClass* at(int i) {return indices_[i].first; }
const IndexClass* at(int i) const {return indices_[i].first; }
private:
/// Collection of Index instances, with their managing worker thread
mutable std::vector<std::pair<IndexClass*,
std::unique_ptr<WorkerThread> > > indices_;
};
using IndexReplicas = IndexReplicasTemplate<Index>;
using IndexBinaryReplicas = IndexReplicasTemplate<IndexBinary>;
} // namespace
This diff is collapsed.
...@@ -28,6 +28,7 @@ namespace faiss { ...@@ -28,6 +28,7 @@ namespace faiss {
* (default). * (default).
*/ */
struct SQDistanceComputer;
struct ScalarQuantizer { struct ScalarQuantizer {
...@@ -37,6 +38,7 @@ struct ScalarQuantizer { ...@@ -37,6 +38,7 @@ struct ScalarQuantizer {
QT_8bit_uniform, ///< same, shared range for all dimensions QT_8bit_uniform, ///< same, shared range for all dimensions
QT_4bit_uniform, QT_4bit_uniform,
QT_fp16, QT_fp16,
QT_8bit_direct, /// fast indexing of uint8s
}; };
QuantizerType qtype; QuantizerType qtype;
...@@ -79,25 +81,13 @@ struct ScalarQuantizer { ...@@ -79,25 +81,13 @@ struct ScalarQuantizer {
/// decode a vector from a given code (or n vectors if third argument) /// decode a vector from a given code (or n vectors if third argument)
void decode (const uint8_t *code, float *x, size_t n) const; void decode (const uint8_t *code, float *x, size_t n) const;
// fast, non thread-safe way of computing vector-to-code and
// code-to-code distances.
struct DistanceComputer {
/// vector-to-code distance computation SQDistanceComputer *get_distance_computer (MetricType metric = METRIC_L2)
virtual float compute_distance (const float *x,
const uint8_t *code) const = 0;
/// code-to-code distance computation
virtual float compute_code_distance (const uint8_t *code1,
const uint8_t *code2) const = 0;
virtual ~DistanceComputer () {}
};
DistanceComputer *get_distance_computer (MetricType metric = METRIC_L2)
const; const;
}; };
struct DistanceComputer;
struct IndexScalarQuantizer: Index { struct IndexScalarQuantizer: Index {
/// Used to encode the vectors /// Used to encode the vectors
...@@ -137,6 +127,8 @@ struct IndexScalarQuantizer: Index { ...@@ -137,6 +127,8 @@ struct IndexScalarQuantizer: Index {
void reconstruct(idx_t key, float* recons) const override; void reconstruct(idx_t key, float* recons) const override;
DistanceComputer *get_distance_computer () const;
}; };
...@@ -148,6 +140,7 @@ struct IndexScalarQuantizer: Index { ...@@ -148,6 +140,7 @@ struct IndexScalarQuantizer: Index {
struct IndexIVFScalarQuantizer: IndexIVF { struct IndexIVFScalarQuantizer: IndexIVF {
ScalarQuantizer sq; ScalarQuantizer sq;
bool by_residual;
IndexIVFScalarQuantizer(Index *quantizer, size_t d, size_t nlist, IndexIVFScalarQuantizer(Index *quantizer, size_t d, size_t nlist,
ScalarQuantizer::QuantizerType qtype, ScalarQuantizer::QuantizerType qtype,
......
/**
* Copyright (c) 2015-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD+Patents license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include "IndexShards.h"
#include <cstdio>
#include <functional>
#include "FaissAssert.h"
#include "Heap.h"
#include "WorkerThread.h"
namespace faiss {
// subroutines
namespace {
typedef Index::idx_t idx_t;
// add translation to all valid labels
void translate_labels (long n, idx_t *labels, long translation)
{
if (translation == 0) return;
for (long i = 0; i < n; i++) {
if(labels[i] < 0) continue;
labels[i] += translation;
}
}
/** merge result tables from several shards.
* @param all_distances size nshard * n * k
* @param all_labels idem
* @param translartions label translations to apply, size nshard
*/
template <class IndexClass, class C>
void merge_tables (long n, long k, long nshard,
typename IndexClass::distance_t *distances,
idx_t *labels,
const typename IndexClass::distance_t *all_distances,
idx_t *all_labels,
const long *translations)
{
if(k == 0) {
return;
}
using distance_t = typename IndexClass::distance_t;
long stride = n * k;
#pragma omp parallel
{
std::vector<int> buf (2 * nshard);
int * pointer = buf.data();
int * shard_ids = pointer + nshard;
std::vector<distance_t> buf2 (nshard);
distance_t * heap_vals = buf2.data();
#pragma omp for
for (long i = 0; i < n; i++) {
// the heap maps values to the shard where they are
// produced.
const distance_t *D_in = all_distances + i * k;
const idx_t *I_in = all_labels + i * k;
int heap_size = 0;
for (long s = 0; s < nshard; s++) {
pointer[s] = 0;
if (I_in[stride * s] >= 0)
heap_push<C> (++heap_size, heap_vals, shard_ids,
D_in[stride * s], s);
}
distance_t *D = distances + i * k;
idx_t *I = labels + i * k;
for (int j = 0; j < k; j++) {
if (heap_size == 0) {
I[j] = -1;
D[j] = C::neutral();
} else {
// pop best element
int s = shard_ids[0];
int & p = pointer[s];
D[j] = heap_vals[0];
I[j] = I_in[stride * s + p] + translations[s];
heap_pop<C> (heap_size--, heap_vals, shard_ids);
p++;
if (p < k && I_in[stride * s + p] >= 0)
heap_push<C> (++heap_size, heap_vals, shard_ids,
D_in[stride * s + p], s);
}
}
}
}
}
template<class IndexClass>
void runOnIndexes(bool threaded,
std::function<void(int no, IndexClass*)> f,
std::vector<IndexClass *> indexes)
{
FAISS_THROW_IF_NOT_MSG(!indexes.empty(), "no shards in index");
if (!threaded) {
for (int no = 0; no < indexes.size(); no++) {
IndexClass *index = indexes[no];
f(no, index);
}
} else {
std::vector<std::unique_ptr<WorkerThread> > threads;
std::vector<std::future<bool>> v;
for (int no = 0; no < indexes.size(); no++) {
IndexClass *index = indexes[no];
threads.emplace_back(new WorkerThread());
WorkerThread *wt = threads.back().get();
v.emplace_back(wt->add([no, index, f](){ f(no, index); }));
}
// Blocking wait for completion
for (auto& func : v) {
func.get();
}
}
};
} // anonymous namespace
template<class IndexClass>
IndexShardsTemplate<IndexClass>::IndexShardsTemplate (idx_t d, bool threaded, bool successive_ids):
IndexClass (d), own_fields (false),
threaded (threaded), successive_ids (successive_ids)
{
}
template<class IndexClass>
void IndexShardsTemplate<IndexClass>::add_shard (IndexClass *idx)
{
shard_indexes.push_back (idx);
sync_with_shard_indexes ();
}
template<class IndexClass>
void IndexShardsTemplate<IndexClass>::sync_with_shard_indexes ()
{
if (shard_indexes.empty()) return;
IndexClass * index0 = shard_indexes[0];
this->d = index0->d;
this->metric_type = index0->metric_type;
this->is_trained = index0->is_trained;
this->ntotal = index0->ntotal;
for (int i = 1; i < shard_indexes.size(); i++) {
IndexClass * index = shard_indexes[i];
FAISS_THROW_IF_NOT (this->metric_type == index->metric_type);
FAISS_THROW_IF_NOT (this->d == index->d);
this->ntotal += index->ntotal;
}
}
template<class IndexClass>
void IndexShardsTemplate<IndexClass>::train (idx_t n, const component_t *x)
{
auto train_func = [n, x](int no, IndexClass *index)
{
if (index->verbose)
printf ("begin train shard %d on %ld points\n", no, n);
index->train(n, x);
if (index->verbose)
printf ("end train shard %d\n", no);
};
runOnIndexes<IndexClass> (threaded, train_func, shard_indexes);
sync_with_shard_indexes ();
}
template<class IndexClass>
void IndexShardsTemplate<IndexClass>::add (idx_t n, const component_t *x)
{
add_with_ids (n, x, nullptr);
}
template<class IndexClass>
void IndexShardsTemplate<IndexClass>::add_with_ids (idx_t n, const component_t * x, const idx_t *xids)
{
FAISS_THROW_IF_NOT_MSG(!(successive_ids && xids),
"It makes no sense to pass in ids and "
"request them to be shifted");
if (successive_ids) {
FAISS_THROW_IF_NOT_MSG(!xids,
"It makes no sense to pass in ids and "
"request them to be shifted");
FAISS_THROW_IF_NOT_MSG(this->ntotal == 0,
"when adding to IndexShards with sucessive_ids, "
"only add() in a single pass is supported");
}
long nshard = shard_indexes.size();
const idx_t *ids = xids;
ScopeDeleter<idx_t> del;
if (!ids && !successive_ids) {
idx_t *aids = new idx_t[n];
for (idx_t i = 0; i < n; i++)
aids[i] = this->ntotal + i;
ids = aids;
del.set (ids);
}
size_t components_per_vec =
sizeof(component_t) == 1 ? (this->d + 7) / 8 : this->d;
auto add_func = [n, ids, x, nshard, components_per_vec]
(int no, IndexClass *index) {
idx_t i0 = no * n / nshard;
idx_t i1 = (no + 1) * n / nshard;
auto x0 = x + i0 * components_per_vec;
if (index->verbose) {
printf ("begin add shard %d on %ld points\n", no, n);
}
if (ids) {
index->add_with_ids (i1 - i0, x0, ids + i0);
} else {
index->add (i1 - i0, x0);
}
if (index->verbose) {
printf ("end add shard %d on %ld points\n", no, i1 - i0);
}
};
runOnIndexes<IndexClass> (threaded, add_func, shard_indexes);
this->ntotal += n;
}
template<class IndexClass>
void IndexShardsTemplate<IndexClass>::reset ()
{
for (int i = 0; i < shard_indexes.size(); i++) {
shard_indexes[i]->reset ();
}
sync_with_shard_indexes ();
}
template<class IndexClass>
void IndexShardsTemplate<IndexClass>::search (
idx_t n, const component_t *x, idx_t k,
distance_t *distances, idx_t *labels) const
{
long nshard = shard_indexes.size();
distance_t *all_distances = new distance_t [nshard * k * n];
idx_t *all_labels = new idx_t [nshard * k * n];
ScopeDeleter<distance_t> del (all_distances);
ScopeDeleter<idx_t> del2 (all_labels);
auto query_func = [n, k, x, all_distances, all_labels]
(int no, IndexClass *index) {
if (index->verbose) {
printf ("begin query shard %d on %ld points\n", no, n);
}
index->search (n, x, k,
all_distances + no * k * n,
all_labels + no * k * n);
if (index->verbose) {
printf ("end query shard %d\n", no);
}
};
runOnIndexes<IndexClass> (threaded, query_func, shard_indexes);
std::vector<long> translations (nshard, 0);
if (successive_ids) {
translations[0] = 0;
for (int s = 0; s + 1 < nshard; s++)
translations [s + 1] = translations [s] +
shard_indexes [s]->ntotal;
}
if (this->metric_type == METRIC_L2) {
merge_tables<IndexClass, CMin<distance_t, int> > (
n, k, nshard, distances, labels,
all_distances, all_labels, translations.data ());
} else {
merge_tables<IndexClass, CMax<distance_t, int> > (
n, k, nshard, distances, labels,
all_distances, all_labels, translations.data ());
}
}
template<class IndexClass>
IndexShardsTemplate<IndexClass>::~IndexShardsTemplate ()
{
if (own_fields) {
for (int s = 0; s < shard_indexes.size(); s++)
delete shard_indexes [s];
}
}
// explicit instanciations
template struct IndexShardsTemplate<Index>;
template struct IndexShardsTemplate<IndexBinary>;
} // namespace faiss
/**
* Copyright (c) 2015-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD+Patents license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#pragma once
#include <vector>
#include "Index.h"
#include "IndexBinary.h"
namespace faiss {
/** Index that concatenates the results from several sub-indexes
*
*/
template<class IndexClass>
struct IndexShardsTemplate : IndexClass {
using idx_t = typename IndexClass::idx_t;
using component_t = typename IndexClass::component_t;
using distance_t = typename IndexClass::distance_t;
std::vector<IndexClass*> shard_indexes;
bool own_fields; /// should the sub-indexes be deleted along with this?
bool threaded;
bool successive_ids;
/**
* @param threaded do we use one thread per sub_index or do
* queries sequentially?
* @param successive_ids should we shift the returned ids by
* the size of each sub-index or return them
* as they are?
*/
explicit IndexShardsTemplate (idx_t d, bool threaded = false,
bool successive_ids = true);
void add_shard (IndexClass *);
// update metric_type and ntotal. Call if you changes something in
// the shard indexes.
void sync_with_shard_indexes ();
IndexClass *at(int i) {return shard_indexes[i]; }
/// supported only for sub-indices that implement add_with_ids
void add(idx_t n, const component_t* x) override;
/**
* Cases (successive_ids, xids):
* - true, non-NULL ERROR: it makes no sense to pass in ids and
* request them to be shifted
* - true, NULL OK, but should be called only once (calls add()
* on sub-indexes).
* - false, non-NULL OK: will call add_with_ids with passed in xids
* distributed evenly over shards
* - false, NULL OK: will call add_with_ids on each sub-index,
* starting at ntotal
*/
void add_with_ids(idx_t n, const component_t* x, const idx_t* xids) override;
void search(
idx_t n, const component_t* x, idx_t k,
distance_t* distances, idx_t* labels) const override;
void train(idx_t n, const component_t* x) override;
void reset() override;
~IndexShardsTemplate() override;
};
using IndexShards = IndexShardsTemplate<Index>;
using IndexBinaryShards = IndexShardsTemplate<IndexBinary>;
} // namespace faiss
...@@ -97,6 +97,34 @@ void InvertedLists::merge_from (InvertedLists *oivf, size_t add_id) { ...@@ -97,6 +97,34 @@ void InvertedLists::merge_from (InvertedLists *oivf, size_t add_id) {
} }
} }
double InvertedLists::imbalance_factor () const {
std::vector<int> hist(nlist);
for (size_t i = 0; i < nlist; i++) {
hist[i] = list_size(i);
}
return faiss::imbalance_factor(nlist, hist.data());
}
void InvertedLists::print_stats () const {
std::vector<int> sizes(40);
for (size_t i = 0; i < nlist; i++) {
for (size_t j = 0; j < sizes.size(); j++) {
if ((list_size(i) >> j) == 0) {
sizes[j]++;
break;
}
}
}
for (size_t i = 0; i < sizes.size(); i++) {
if (sizes[i]) {
printf("list size in < %d: %d instances\n", 1 << i, sizes[i]);
}
}
}
/***************************************** /*****************************************
* ArrayInvertedLists implementation * ArrayInvertedLists implementation
******************************************/ ******************************************/
......
...@@ -101,6 +101,16 @@ struct InvertedLists { ...@@ -101,6 +101,16 @@ struct InvertedLists {
virtual ~InvertedLists (); virtual ~InvertedLists ();
/*************************
* statistics */
/// 1= perfectly balanced, >1: imbalanced
double imbalance_factor () const;
/// display some stats about the inverted lists
void print_stats () const;
/************************************** /**************************************
* Scoped inverted lists (for automatic deallocation) * Scoped inverted lists (for automatic deallocation)
* *
......
This diff is collapsed.
...@@ -11,13 +11,10 @@ ...@@ -11,13 +11,10 @@
#ifndef META_INDEXES_H #ifndef META_INDEXES_H
#define META_INDEXES_H #define META_INDEXES_H
#include <vector> #include <vector>
#include <unordered_map> #include <unordered_map>
#include "Index.h" #include "Index.h"
#include "IndexShards.h"
namespace faiss { namespace faiss {
...@@ -78,65 +75,6 @@ struct IndexIDMap2 : IndexIDMap { ...@@ -78,65 +75,6 @@ struct IndexIDMap2 : IndexIDMap {
IndexIDMap2 () {} IndexIDMap2 () {}
}; };
/** Index that concatenates the results from several sub-indexes
*
*/
struct IndexShards : Index {
std::vector<Index*> shard_indexes;
bool own_fields; /// should the sub-indexes be deleted along with this?
bool threaded;
bool successive_ids;
/**
* @param threaded do we use one thread per sub_index or do
* queries sequentially?
* @param successive_ids should we shift the returned ids by
* the size of each sub-index or return them
* as they are?
*/
explicit IndexShards (idx_t d, bool threaded = false,
bool successive_ids = true);
void add_shard (Index *);
// update metric_type and ntotal. Call if you changes something in
// the shard indexes.
void sync_with_shard_indexes ();
Index *at(int i) {return shard_indexes[i]; }
/// supported only for sub-indices that implement add_with_ids
void add(idx_t n, const float* x) override;
/**
* Cases (successive_ids, xids):
* - true, non-NULL ERROR: it makes no sense to pass in ids and
* request them to be shifted
* - true, NULL OK, but should be called only once (calls add()
* on sub-indexes).
* - false, non-NULL OK: will call add_with_ids with passed in xids
* distributed evenly over shards
* - false, NULL OK: will call add_with_ids on each sub-index,
* starting at ntotal
*/
void add_with_ids(idx_t n, const float* x, const long* xids) override;
void search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels) const override;
void train(idx_t n, const float* x) override;
void reset() override;
~IndexShards() override;
};
/** splits input vectors in segments and assigns each segment to a sub-index /** splits input vectors in segments and assigns each segment to a sub-index
* used to distribute a MultiIndexQuantizer * used to distribute a MultiIndexQuantizer
*/ */
......
...@@ -379,10 +379,74 @@ void ProductQuantizer::compute_code_from_distance_table (const float *tab, ...@@ -379,10 +379,74 @@ void ProductQuantizer::compute_code_from_distance_table (const float *tab,
} }
} }
void ProductQuantizer::compute_codes_with_assign_index (
const float * x,
uint8_t * codes,
size_t n)
{
FAISS_THROW_IF_NOT (assign_index && assign_index->d == dsub);
for (size_t m = 0; m < M; m++) {
assign_index->reset ();
assign_index->add (ksub, get_centroids (m, 0));
size_t bs = 65536;
float * xslice = new float[bs * dsub];
ScopeDeleter<float> del (xslice);
idx_t *assign = new idx_t[bs];
ScopeDeleter<idx_t> del2 (assign);
for (size_t i0 = 0; i0 < n; i0 += bs) {
size_t i1 = std::min(i0 + bs, n);
for (size_t i = i0; i < i1; i++) {
memcpy (xslice + (i - i0) * dsub,
x + i * d + m * dsub,
dsub * sizeof(float));
}
assign_index->assign (i1 - i0, xslice, assign);
switch (byte_per_idx) {
case 1:
{
uint8_t *c = codes + code_size * i0 + m;
for (size_t i = i0; i < i1; i++) {
*c = assign[i - i0];
c += M;
}
}
break;
case 2:
{
uint16_t *c = (uint16_t*)(codes + code_size * i0 + m * 2);
for (size_t i = i0; i < i1; i++) {
*c = assign[i - i0];
c += M;
}
}
break;
}
}
}
}
void ProductQuantizer::compute_codes (const float * x, void ProductQuantizer::compute_codes (const float * x,
uint8_t * codes, uint8_t * codes,
size_t n) const size_t n) const
{ {
// process by blocks to avoid using too much RAM
size_t bs = 256 * 1024;
if (n > bs) {
for (size_t i0 = 0; i0 < n; i0 += bs) {
size_t i1 = std::min(i0 + bs, n);
compute_codes (x + d * i0, codes + code_size * i0, i1 - i0);
}
return;
}
if (dsub < 16) { // simple direct computation if (dsub < 16) { // simple direct computation
#pragma omp parallel for #pragma omp parallel for
...@@ -525,15 +589,6 @@ static void pq_knn_search_with_tables ( ...@@ -525,15 +589,6 @@ static void pq_knn_search_with_tables (
} }
} }
/*
static inline void pq_estimators_from_tables (const ProductQuantizer * pq,
const CT * codes,
size_t ncodes,
const float * dis_table,
size_t k,
float * heap_dis,
long * heap_ids)
*/
void ProductQuantizer::search (const float * __restrict x, void ProductQuantizer::search (const float * __restrict x,
size_t nx, size_t nx,
const uint8_t * codes, const uint8_t * codes,
......
...@@ -23,6 +23,8 @@ namespace faiss { ...@@ -23,6 +23,8 @@ namespace faiss {
/** Product Quantizer. Implemented only for METRIC_L2 */ /** Product Quantizer. Implemented only for METRIC_L2 */
struct ProductQuantizer { struct ProductQuantizer {
using idx_t = Index::idx_t;
size_t d; ///< size of the input vectors size_t d; ///< size of the input vectors
size_t M; ///< number of subquantizers size_t M; ///< number of subquantizers
size_t nbits; ///< number of bits per quantization index size_t nbits; ///< number of bits per quantization index
...@@ -86,6 +88,13 @@ struct ProductQuantizer { ...@@ -86,6 +88,13 @@ struct ProductQuantizer {
uint8_t * codes, uint8_t * codes,
size_t n) const ; size_t n) const ;
/// speed up code assignment using assign_index
/// (non-const because the index is changed)
void compute_codes_with_assign_index (
const float * x,
uint8_t * codes,
size_t n);
/// decode a vector from a given code (or n vectors if third argument) /// decode a vector from a given code (or n vectors if third argument)
void decode (const uint8_t *code, float *x) const; void decode (const uint8_t *code, float *x) const;
void decode (const uint8_t *code, float *x, size_t n) const; void decode (const uint8_t *code, float *x, size_t n) const;
......
...@@ -239,7 +239,7 @@ void RandomRotationMatrix::init (int seed) ...@@ -239,7 +239,7 @@ void RandomRotationMatrix::init (int seed)
is_trained = true; is_trained = true;
} }
void RandomRotationMatrix::train (Index::idx_t n, const float *x) void RandomRotationMatrix::train (Index::idx_t /*n*/, const float */*x*/)
{ {
// initialize with some arbitrary seed // initialize with some arbitrary seed
init (12345); init (12345);
...@@ -671,11 +671,11 @@ void OPQMatrix::train (Index::idx_t n, const float *x) ...@@ -671,11 +671,11 @@ void OPQMatrix::train (Index::idx_t n, const float *x)
xproj (d2 * n), pq_recons (d2 * n), xxr (d * n), xproj (d2 * n), pq_recons (d2 * n), xxr (d * n),
tmp(d * d * 4); tmp(d * d * 4);
std::vector<uint8_t> codes (M * n);
ProductQuantizer pq_default (d2, M, 8); ProductQuantizer pq_default (d2, M, 8);
ProductQuantizer &pq_regular = ProductQuantizer &pq_regular = pq ? *pq : pq_default;
pq ? *pq : pq_default; std::vector<uint8_t> codes (pq_regular.code_size * n);
double t0 = getmillisecs(); double t0 = getmillisecs();
for (int iter = 0; iter < niter; iter++) { for (int iter = 0; iter < niter; iter++) {
...@@ -691,10 +691,18 @@ void OPQMatrix::train (Index::idx_t n, const float *x) ...@@ -691,10 +691,18 @@ void OPQMatrix::train (Index::idx_t n, const float *x)
pq_regular.cp.max_points_per_centroid = 1000; pq_regular.cp.max_points_per_centroid = 1000;
pq_regular.cp.niter = iter == 0 ? niter_pq_0 : niter_pq; pq_regular.cp.niter = iter == 0 ? niter_pq_0 : niter_pq;
pq_regular.cp.verbose = verbose; pq_regular.verbose = verbose;
pq_regular.train (n, xproj.data()); pq_regular.train (n, xproj.data());
if (verbose) {
printf(" encode / decode\n");
}
if (pq_regular.assign_index) {
pq_regular.compute_codes_with_assign_index
(xproj.data(), codes.data(), n);
} else {
pq_regular.compute_codes (xproj.data(), codes.data(), n); pq_regular.compute_codes (xproj.data(), codes.data(), n);
}
pq_regular.decode (codes.data(), pq_recons.data(), n); pq_regular.decode (codes.data(), pq_recons.data(), n);
float pq_err = fvec_L2sqr (pq_recons.data(), xproj.data(), n * d2) / n; float pq_err = fvec_L2sqr (pq_recons.data(), xproj.data(), n * d2) / n;
...@@ -710,6 +718,9 @@ void OPQMatrix::train (Index::idx_t n, const float *x) ...@@ -710,6 +718,9 @@ void OPQMatrix::train (Index::idx_t n, const float *x)
FINTEGER di = d, d2i = d2, ni = n; FINTEGER di = d, d2i = d2, ni = n;
float one = 1, zero = 0; float one = 1, zero = 0;
if (verbose) {
printf(" X * recons\n");
}
// torch.mm(xtrain:t(), pq_recons) // torch.mm(xtrain:t(), pq_recons)
sgemm_ ("Not", "Transposed", sgemm_ ("Not", "Transposed",
&d2i, &di, &ni, &d2i, &di, &ni,
...@@ -788,6 +799,58 @@ void NormalizationTransform::reverse_transform (idx_t n, const float* xt, ...@@ -788,6 +799,58 @@ void NormalizationTransform::reverse_transform (idx_t n, const float* xt,
memcpy (x, xt, sizeof (xt[0]) * n * d_in); memcpy (x, xt, sizeof (xt[0]) * n * d_in);
} }
/*********************************************
* CenteringTransform
*********************************************/
CenteringTransform::CenteringTransform (int d):
VectorTransform (d, d)
{
is_trained = false;
}
void CenteringTransform::train(Index::idx_t n, const float *x) {
FAISS_THROW_IF_NOT_MSG(n > 0, "need at least one training vector");
mean.resize (d_in, 0);
for (idx_t i = 0; i < n; i++) {
for (size_t j = 0; j < d_in; j++) {
mean[j] += *x++;
}
}
for (size_t j = 0; j < d_in; j++) {
mean[j] /= n;
}
is_trained = true;
}
void CenteringTransform::apply_noalloc
(idx_t n, const float* x, float* xt) const
{
FAISS_THROW_IF_NOT (is_trained);
for (idx_t i = 0; i < n; i++) {
for (size_t j = 0; j < d_in; j++) {
*xt++ = *x++ - mean[j];
}
}
}
void CenteringTransform::reverse_transform (idx_t n, const float* xt,
float* x) const
{
FAISS_THROW_IF_NOT (is_trained);
for (idx_t i = 0; i < n; i++) {
for (size_t j = 0; j < d_in; j++) {
*x++ = *xt++ + mean[j];
}
}
}
/********************************************* /*********************************************
* IndexPreTransform * IndexPreTransform
*********************************************/ *********************************************/
...@@ -956,6 +1019,16 @@ void IndexPreTransform::search (idx_t n, const float *x, idx_t k, ...@@ -956,6 +1019,16 @@ void IndexPreTransform::search (idx_t n, const float *x, idx_t k,
index->search (n, xt, k, distances, labels); index->search (n, xt, k, distances, labels);
} }
void IndexPreTransform::range_search (idx_t n, const float* x, float radius,
RangeSearchResult* result) const
{
FAISS_THROW_IF_NOT (is_trained);
const float *xt = apply_chain (n, x);
ScopeDeleter<float> del(xt == x ? nullptr : xt);
index->range_search (n, xt, radius, result);
}
void IndexPreTransform::reset () { void IndexPreTransform::reset () {
index->reset(); index->reset();
......
...@@ -246,6 +246,25 @@ struct NormalizationTransform: VectorTransform { ...@@ -246,6 +246,25 @@ struct NormalizationTransform: VectorTransform {
void reverse_transform(idx_t n, const float* xt, float* x) const override; void reverse_transform(idx_t n, const float* xt, float* x) const override;
}; };
/** Subtract the mean of each component from the vectors. */
struct CenteringTransform: VectorTransform {
/// Mean, size d_in = d_out
std::vector<float> mean;
explicit CenteringTransform (int d = 0);
/// train on n vectors.
void train(Index::idx_t n, const float* x) override;
/// subtract the mean
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
/// add the mean
void reverse_transform (idx_t n, const float * xt,
float *x) const override;
};
/** Index that applies a LinearTransform transform on vectors before /** Index that applies a LinearTransform transform on vectors before
...@@ -285,6 +304,12 @@ struct IndexPreTransform: Index { ...@@ -285,6 +304,12 @@ struct IndexPreTransform: Index {
float* distances, float* distances,
idx_t* labels) const override; idx_t* labels) const override;
/* range search, no attempt is done to change the radius */
void range_search (idx_t n, const float* x, float radius,
RangeSearchResult* result) const override;
void reconstruct (idx_t key, float * recons) const override; void reconstruct (idx_t key, float * recons) const override;
void reconstruct_n (idx_t i0, idx_t ni, float *recons) void reconstruct_n (idx_t i0, idx_t ni, float *recons)
......
/**
* Copyright (c) 2015-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD+Patents license found in the
* LICENSE file in the root directory of this source tree.
*/
#include "WorkerThread.h"
#include "FaissAssert.h"
namespace faiss {
WorkerThread::WorkerThread() :
wantStop_(false) {
startThread();
// Make sure that the thread has started before continuing
add([](){}).get();
}
WorkerThread::~WorkerThread() {
stop();
waitForThreadExit();
}
void
WorkerThread::startThread() {
thread_ = std::thread([this](){ threadMain(); });
}
void
WorkerThread::stop() {
std::lock_guard<std::mutex> guard(mutex_);
wantStop_ = true;
monitor_.notify_one();
}
std::future<bool>
WorkerThread::add(std::function<void()> f) {
std::lock_guard<std::mutex> guard(mutex_);
if (wantStop_) {
// The timer thread has been stopped, or we want to stop; we can't
// schedule anything else
std::promise<bool> p;
auto fut = p.get_future();
// did not execute
p.set_value(false);
return fut;
}
auto pr = std::promise<bool>();
auto fut = pr.get_future();
queue_.emplace_back(std::make_pair(std::move(f), std::move(pr)));
// Wake up our thread
monitor_.notify_one();
return fut;
}
void
WorkerThread::threadMain() {
threadLoop();
// Call all pending tasks
FAISS_ASSERT(wantStop_);
for (auto& f : queue_) {
f.first();
f.second.set_value(true);
}
}
void
WorkerThread::threadLoop() {
while (true) {
std::pair<std::function<void()>, std::promise<bool>> data;
{
std::unique_lock<std::mutex> lock(mutex_);
while (!wantStop_ && queue_.empty()) {
monitor_.wait(lock);
}
if (wantStop_) {
return;
}
data = std::move(queue_.front());
queue_.pop_front();
}
data.first();
data.second.set_value(true);
}
}
void
WorkerThread::waitForThreadExit() {
try {
thread_.join();
} catch (...) {
}
}
} // namespace
This diff is collapsed.
This diff is collapsed.
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
# This source code is licensed under the BSD+Patents license found in the # This source code is licensed under the BSD+Patents license found in the
# LICENSE file in the root directory of this source tree. # LICENSE file in the root directory of this source tree.
#! /usr/bin/python2 #! /usr/bin/env python2
import os import os
import numpy as np import numpy as np
......
...@@ -23,14 +23,14 @@ function run_on_1machine () { ...@@ -23,14 +23,14 @@ function run_on_1machine () {
# To be implemented # To be implemented
} }
function run_on_1machine () { function run_on_8gpu () {
# To be implemented # To be implemented
} }
# prepare output directories # prepare output directories
# set to some directory where all indexes, can be written.
basedir=/mnt/vol/gfsai-east/ai-group/users/matthijs/bench_all_ivf basedir=XXXXX
logdir=$basedir/logs logdir=$basedir/logs
indexdir=$basedir/indexes indexdir=$basedir/indexes
......
...@@ -654,7 +654,7 @@ def get_populated_index(preproc): ...@@ -654,7 +654,7 @@ def get_populated_index(preproc):
print "Copy CPU index to %d sharded GPU indexes" % replicas print "Copy CPU index to %d sharded GPU indexes" % replicas
index = faiss.IndexProxy() index = faiss.IndexReplicas()
for i in range(replicas): for i in range(replicas):
gpu0 = ngpu * i / replicas gpu0 = ngpu * i / replicas
......
This diff is collapsed.
...@@ -66,7 +66,7 @@ def train_kmeans(x, k, ngpu): ...@@ -66,7 +66,7 @@ def train_kmeans(x, k, ngpu):
else: else:
indexes = [faiss.GpuIndexFlatL2(res[i], d, flat_config[i]) indexes = [faiss.GpuIndexFlatL2(res[i], d, flat_config[i])
for i in range(ngpu)] for i in range(ngpu)]
index = faiss.IndexProxy() index = faiss.IndexReplicas()
for sub_index in indexes: for sub_index in indexes:
index.addIndex(sub_index) index.addIndex(sub_index)
......
README for the link & code implementation README for the link & code implementation
========================================= =========================================
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment