21 #include "FaissAssert.h" 
   22 #include "IndexFlat.h" 
   23 #include "AuxIndexStructures.h" 
   32 IndexIVF::IndexIVF (
Index * quantizer, 
size_t d, 
size_t nlist,
 
   37     quantizer (quantizer),
 
   38     quantizer_trains_alone (false),
 
   41     maintain_direct_map (false)
 
   43     FAISS_ASSERT (d == quantizer->
d);
 
   57 IndexIVF::IndexIVF ():
 
   58     nlist (0), nprobe (1), quantizer (nullptr),
 
   59     quantizer_trains_alone (false), own_fields (false),
 
   60     maintain_direct_map (false)
 
   73     direct_map.resize (
ntotal, -1);
 
   74     for (
size_t key = 0; key < 
nlist; key++) {
 
   75         const std::vector<long> & idlist = 
ids[key];
 
   77         for (
long ofs = 0; ofs < idlist.size(); ofs++) {
 
   78             direct_map [idlist [ofs]] =
 
   91     for (
size_t i = 0; i < 
ids.size(); i++)
 
  100             printf (
"IVF quantizer does not need training.\n");
 
  103             printf (
"IVF quantizer trains alone...\n");
 
  106                       !
"nlist not consistent with quantizer size");
 
  109             printf (
"Training IVF quantizer on %ld vectors in %dD\n",
 
  119         printf (
"Training IVF residual\n");
 
  128         printf (
"IndexIVF: no residual training\n");
 
  136     std::vector<int> hist (
nlist);
 
  137     for (
int i = 0; i < 
nlist; i++) {
 
  138         hist[i] = 
ids[i].size();
 
  145     std::vector<int> sizes(40);
 
  146     for (
int i = 0; i < 
nlist; i++) {
 
  147         for (
int j = 0; j < sizes.size(); j++) {
 
  148             if ((
ids[i].size() >> j) == 0) {
 
  154     for (
int i = 0; i < sizes.size(); i++) {
 
  156             printf (
"list size in < %d: %d instances\n",
 
  166     FAISS_ASSERT (other.
d == 
d);
 
  169                   !
"direct map copy not implemented");
 
  170     FAISS_ASSERT (
typeid (*
this) == 
typeid (other) ||
 
  171                   !
"can only merge indexes of the same type");
 
  172     for (
long i = 0; i < 
nlist; i++) {
 
  173         std::vector<idx_t> & src = other.
ids[i];
 
  174         std::vector<idx_t> & dest = 
ids[i];
 
  175         for (
long j = 0; j < src.size(); j++)
 
  176             dest.push_back (src[j] + add_id);
 
  187 IndexIVF::~IndexIVF()
 
  198 IndexIVFFlat::IndexIVFFlat (Index * quantizer,
 
  200     IndexIVF (quantizer, d, nlist, metric)
 
  215     s << 
"[" << nlist << 
":" << quantizer->index_typename << 
"]";
 
  216     index_typename = s.str();
 
  230                              const long *precomputed_idx)
 
  236     if (precomputed_idx) {
 
  237         idx = precomputed_idx;
 
  239         long * idx0 = 
new long [n];
 
  240         quantizer->assign (n, x, idx0);
 
  244     for (
size_t i = 0; i < n; i++) {
 
  245         long id = xids ? xids[i] : 
ntotal + i;
 
  246         long list_no = idx [i];
 
  249         FAISS_ASSERT (list_no < nlist);
 
  251         ids[list_no].push_back (
id);
 
  252         const float *xi = x + i * 
d;
 
  254         for (
size_t j = 0 ; j < 
d ; j++)
 
  255             vecs[list_no].push_back (xi [j]);
 
  258             direct_map.push_back (list_no << 32 | (
ids[list_no].size() - 1));
 
  262         printf(
"IndexIVFFlat::add_core: added %ld / %ld vectors\n",
 
  265     if (!precomputed_idx)
 
  276     const long * __restrict keys,
 
  280     const size_t k = res->
k;
 
  282 #pragma omp parallel for 
  283     for (
size_t i = 0; i < nx; i++) {
 
  284         const float * xi = x + i * 
d;
 
  285         const long * keysi = keys + i * 
nprobe;
 
  286         float * __restrict simi = res->
get_val (i);
 
  287         long * __restrict idxi = res->
get_ids (i);
 
  288         minheap_heapify (k, simi, idxi);
 
  290         for (
size_t ik = 0; ik < 
nprobe; ik++) {
 
  291             long key = keysi[ik];  
 
  296             if (key >= (
long) nlist) {
 
  297                 fprintf (stderr, 
"Invalid key=%ld  at ik=%ld nlist=%ld\n",
 
  302             const size_t list_size = 
ids[key].size();
 
  303             const float * list_vecs = 
vecs[key].data();
 
  305             for (
size_t j = 0; j < list_size; j++) {
 
  306                 const float * yj = list_vecs + d * j;
 
  307                 float ip = fvec_inner_product (xi, yj, d);
 
  309                     minheap_pop (k, simi, idxi);
 
  310                     minheap_push (k, simi, idxi, ip, 
ids[key][j]);
 
  314         minheap_reorder (k, simi, idxi);
 
  322     const long * __restrict keys,
 
  325     const size_t k = res->
k;
 
  327 #pragma omp parallel for 
  328     for (
size_t i = 0; i < nx; i++) {
 
  329         const float * xi = x + i * 
d;
 
  330         const long * keysi = keys + i * 
nprobe;
 
  331         float * __restrict disi = res->
get_val (i);
 
  332         long * __restrict idxi = res->
get_ids (i);
 
  333         maxheap_heapify (k, disi, idxi);
 
  335         for (
size_t ik = 0; ik < 
nprobe; ik++) {
 
  336             long key = keysi[ik];  
 
  341             if (key >= (
long) nlist) {
 
  342                 fprintf (stderr, 
"Invalid key=%ld  at ik=%ld nlist=%ld\n",
 
  347             const size_t list_size = 
ids[key].size();
 
  348             const float * list_vecs = 
vecs[key].data();
 
  350             for (
size_t j = 0; j < list_size; j++) {
 
  351                 const float * yj = list_vecs + d * j;
 
  353                 if (disij < disi[0]) {
 
  354                     maxheap_pop (k, disi, idxi);
 
  355                     maxheap_push (k, disi, idxi, disij, 
ids[key][j]);
 
  359         maxheap_reorder (k, disi, idxi);
 
  365                                 float *distances, 
idx_t *labels)
 const 
  368     quantizer->assign (n, x, idx, 
nprobe);
 
  372             size_t(n), size_t(k), labels, distances};
 
  377             size_t(n), size_t(k), labels, distances};
 
  389     quantizer->assign (nx, x, keys, 
nprobe);
 
  391     assert (
metric_type == METRIC_L2 || !
"Only L2 implemented");
 
  396         for (
size_t i = 0; i < nx; i++) {
 
  397             const float * xi = x + i * 
d;
 
  398             const long * keysi = keys + i * 
nprobe;
 
  403             for (
size_t ik = 0; ik < 
nprobe; ik++) {
 
  404                 long key = keysi[ik];  
 
  405                 if (key < 0 || key >= (
long) nlist) {
 
  406                     fprintf (stderr, 
"Invalid key=%ld  at ik=%ld nlist=%ld\n",
 
  411                 const size_t list_size = 
ids[key].size();
 
  412                 const float * list_vecs = 
vecs[key].data();
 
  414                 for (
size_t j = 0; j < list_size; j++) {
 
  415                     const float * yj = list_vecs + d * j;
 
  417                     if (disij < radius) {
 
  418                         qres.add (disij, 
ids[key][j]);
 
  432     for (
int i = 0; i < 
nlist; i++) {
 
  433         std::vector<float> & src = other.
vecs[i];
 
  434         std::vector<float> & dest = 
vecs[i];
 
  435         for (
int j = 0; j < src.size(); j++)
 
  436             dest.push_back (src[j]);
 
  442                      long a1, 
long a2)
 const 
  444     FAISS_ASSERT (nlist == other.
nlist);
 
  447     for (
long list_no = 0; list_no < 
nlist; list_no++) {
 
  448         const std::vector<idx_t> & ids_in = 
ids[list_no];
 
  449         std::vector<idx_t> & ids_out = other.
ids[list_no];
 
  450         const std::vector<float> & vecs_in = 
vecs[list_no];
 
  451         std::vector<float> & vecs_out = other.
vecs[list_no];
 
  453         for (
long i = 0; i < ids_in.size(); i++) {
 
  454             idx_t id = ids_in[i];
 
  455             if (subset_type == 0 && a1 <= 
id && 
id < a2) {
 
  456                 ids_out.push_back (
id);
 
  457                 vecs_out.insert (vecs_out.end(),
 
  458                                   vecs_in.begin() + i * 
d,
 
  459                                   vecs_in.begin() + (i + 1) * d);
 
  471     for (
size_t key = 0; key < 
nlist; key++) {
 
  479                   !
"direct map remove not implemented");
 
  481 #pragma omp parallel for reduction(+: nremove) 
  482     for (
long i = 0; i < 
nlist; i++) {
 
  483         std::vector<idx_t> & idsi = 
ids[i];
 
  484         float *vecsi = 
vecs[i].data();
 
  486         long l = idsi.size(), j = 0;
 
  488             if (sel.is_member (idsi[j])) {
 
  491                 memmove (vecsi + j * d,
 
  492                          vecsi + l * d, d * 
sizeof (
float));
 
  497         if (l < idsi.size()) {
 
  498             nremove += idsi.size() - l;
 
  500             vecs[i].resize (l * d);
 
  510     assert (direct_map.size() == 
ntotal);
 
  511     int list_no = direct_map[key] >> 32;
 
  512     int ofs = direct_map[key] & 0xffffffff;
 
  513     memcpy (recons, &
vecs[list_no][ofs * d], d * 
sizeof(recons[0]));
 
int niter
clustering iterations 
result structure for a single query 
float fvec_L2sqr(const float *x, const float *y, size_t d)
Squared L2 distance between two vectors. 
void search_knn_L2sqr(size_t nx, const float *x, const long *keys, float_maxheap_array_t *res) const 
Implementation of the search for the L2 metric. 
T * get_val(size_t key)
Return the list of values for a heap. 
double imbalance_factor() const 
1= perfectly balanced, >1: imbalanced 
virtual void reset()=0
removes all elements from the database. 
size_t nprobe
number of probes at query time 
virtual void reconstruct(idx_t key, float *recons) const override
bool quantizer_trains_alone
just pass over the trainset to quantizer 
virtual void set_typename() override
virtual void range_search(idx_t n, const float *x, float radius, RangeSearchResult *result) const override
void copy_subset_to(IndexIVFFlat &other, int subset_type, long a1, long a2) const 
virtual void merge_from_residuals(IndexIVF &other) override
virtual void add_with_ids(idx_t n, const float *x, const long *xids)
virtual void train_residual(idx_t n, const float *x)
size_t k
allocated size per heap 
double imbalance_factor(int n, int k, const long *assign)
a balanced assignment has a IF of 1 
virtual long remove_ids(const IDSelector &sel) override
std::vector< std::vector< long > > ids
Inverted lists for indexes. 
Index * quantizer
quantizer that maps vectors to inverted lists 
virtual void train(idx_t n, const float *x) override
Trains the quantizer and calls train_residual to train sub-quantizers. 
ClusteringParameters cp
to override default clustering params 
virtual void add_with_ids(idx_t n, const float *x, const long *xids) override
implemented for all IndexIVF* classes 
bool own_fields
whether object owns the quantizer 
long idx_t
all indices are this type 
virtual void reset() override
removes all elements from the database. 
void make_direct_map()
intialize a direct map 
idx_t ntotal
total nb of indexed vectors 
bool verbose
verbosity level 
virtual void reset() override
removes all elements from the database. 
QueryResult & new_result(idx_t qno)
begin a new result 
the entries in the buffers are split per query 
virtual void merge_from_residuals(IndexIVF &other)=0
TI * get_ids(size_t key)
Correspponding identifiers. 
MetricType metric_type
type of metric this index uses for search 
void print_stats() const 
display some stats about the inverted lists 
size_t nlist
number of possible key values 
virtual void add(idx_t n, const float *x) override
Quantizes x and calls add_with_key. 
virtual void train(idx_t n, const float *x, faiss::Index &index)
Index is used during the assignment stage. 
bool is_trained
set if the Index does not require training, or if training is done already 
void search_knn_inner_product(size_t nx, const float *x, const long *keys, float_minheap_array_t *res) const 
Implementation of the search for the inner product metric. 
virtual void train(idx_t n, const float *x)
bool maintain_direct_map
map for direct access to the elements. Enables reconstruct(). 
bool spherical
do we want normalized centroids? 
virtual void merge_from(IndexIVF &other, idx_t add_id)
MetricType
Some algorithms support both an inner product vetsion and a L2 search version. 
std::vector< std::vector< float > > vecs
void add_core(idx_t n, const float *x, const long *xids, const long *precomputed_idx)
same as add_with_ids, with precomputed coarse quantizer 
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override