add bench_all_ivf

daf589d9 · matthijs · 4bcb5b3f · daf589d9 · daf589d9 · daf589d9
Commit daf589d9 authored Dec 20, 2018 by matthijs
18 changed files
--- a/AutoTune.cpp
+++ b/AutoTune.cpp
@@ -392,7 +392,9 @@ void ParameterSpace::initialize (const Index * index)
            for (int i = 8; i < 20; i++) {
                pr_max_codes.values.push_back (1 << i);
            }
-            pr_max_codes.values.push_back (std::numeric_limits<double>::infinity());
+            pr_max_codes.values.push_back (
+                std::numeric_limits<double>::infinity()
+            );
        }
    }
    if (DC (IndexIVFPQR)) {

--- a/Index.h
+++ b/Index.h
@@ -18,7 +18,7 @@
 #include <sstream>
 #define FAISS_VERSION_MAJOR 1
-#define FAISS_VERSION_MINOR 5
+#define FAISS_VERSION_MINOR 4
 #define FAISS_VERSION_PATCH 0
 /**

--- a/IndexBinaryIVF.cpp
+++ b/IndexBinaryIVF.cpp
@@ -130,12 +130,16 @@ void IndexBinaryIVF::search(idx_t n, const uint8_t *x, idx_t k,
  std::unique_ptr<idx_t[]> idx(new idx_t[n * nprobe]);
  std::unique_ptr<int32_t[]> coarse_dis(new int32_t[n * nprobe]);
+  double t0 = getmillisecs();
  quantizer->search(n, x, nprobe, coarse_dis.get(), idx.get());
+  indexIVF_stats.quantization_time += getmillisecs() - t0;
+  t0 = getmillisecs();
  invlists->prefetch_lists(idx.get(), n * nprobe);
  search_preassigned(n, x, k, idx.get(), coarse_dis.get(),
                     distances, labels, false);
+  indexIVF_stats.search_time += getmillisecs() - t0;
 }
 void IndexBinaryIVF::reconstruct(idx_t key, uint8_t *recons) const {

--- a/IndexFlat.cpp
+++ b/IndexFlat.cpp
@@ -11,7 +11,6 @@
 #include "IndexFlat.h"
 #include <cstring>
-#include <limits>
 #include "utils.h"
 #include "Heap.h"

--- a/IndexIVF.cpp
+++ b/IndexIVF.cpp
@@ -175,13 +175,16 @@ void IndexIVF::search (idx_t n, const float *x, idx_t k,
    float * coarse_dis = new float [n * nprobe];
    ScopeDeleter<float> del2 (coarse_dis);
+    double t0 = getmillisecs();
    quantizer->search (n, x, nprobe, coarse_dis, idx);
+    indexIVF_stats.quantization_time += getmillisecs() - t0;
+    t0 = getmillisecs();
    invlists->prefetch_lists (idx, n * nprobe);
    search_preassigned (n, x, k, idx, coarse_dis,
                        distances, labels, false);
+    indexIVF_stats.search_time += getmillisecs() - t0;
 }

--- a/IndexIVF.h
+++ b/IndexIVF.h
@@ -297,6 +297,8 @@ struct IndexIVFStats {
    size_t nlist;    // nb of inverted lists scanned
    size_t ndis;     // nb of distancs computed
    size_t nheap_updates; // nb of times the heap was updated
+    double quantization_time; // time spent quantizing vectors (in ms)
+    double search_time;       // time spent searching lists (in ms)
    IndexIVFStats () {reset (); }
    void reset ();

--- a/IndexIVFPQ.cpp
+++ b/IndexIVFPQ.cpp
@@ -636,6 +636,11 @@ struct QueryTables {
        if (use_precomputed_table == 0 || use_precomputed_table == -1) {
            ivfpq.quantizer->compute_residual (qi, residual_vec, key);
            pq.compute_distance_table (residual_vec, sim_table);
+            if (polysemous_ht != 0) {
+                pq.compute_code (residual_vec, q_code.data());
+            }
        } else if (use_precomputed_table == 1) {
            dis0 = coarse_dis;
@@ -643,6 +648,13 @@ struct QueryTables {
                       &ivfpq.precomputed_table [key * pq.ksub * pq.M],
                       -2.0, sim_table_2,
                       sim_table);
+            if (polysemous_ht != 0) {
+                ivfpq.quantizer->compute_residual (qi, residual_vec, key);
+                pq.compute_code (residual_vec, q_code.data());
+            }
        } else if (use_precomputed_table == 2) {
            dis0 = coarse_dis;

--- a/benchs/bench_all_ivf/bench_all_ivf.py
+++ b/benchs/bench_all_ivf/bench_all_ivf.py
+# Copyright (c) 2015-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the BSD+Patents license found in the
+# LICENSE file in the root directory of this source tree.
+#!/usr/bin/env python2
+import os
+import sys
+import time
+import numpy as np
+import faiss
+import argparse
+import datasets
+from datasets import sanitize
+######################################################
+# Command-line parsing
+######################################################
+parser = argparse.ArgumentParser()
+def aa(*args, **kwargs):
+    group.add_argument(*args, **kwargs)
+group = parser.add_argument_group('dataset options')
+aa('--db', default='deep1M', help='dataset')
+aa('--compute_gt', default=False, action='store_true',
+    help='compute and store the groundtruth')
+group = parser.add_argument_group('index consturction')
+aa('--indexkey', default='HNSW32', help='index_factory type')
+aa('--efConstruction', default=200, type=int,
+   help='HNSW construction factor')
+aa('--M0', default=-1, type=int, help='size of base level')
+aa('--maxtrain', default=256 * 256, type=int,
+   help='maximum number of training points (0 to set automatically)')
+aa('--indexfile', default='', help='file to read or write index from')
+aa('--add_bs', default=-1, type=int,
+   help='add elements index by batches of this size')
+aa('--no_precomputed_tables', action='store_true', default=False,
+   help='disable precomputed tables (uses less memory)')
+aa('--clustering_niter', default=-1, type=int,
+   help='number of clustering iterations (-1 = leave default)')
+aa('--train_on_gpu', default=False, action='store_true',
+   help='do training on GPU')
+aa('--get_centroids_from', default='',
+   help='get the centroids from this index (to speed up training)')
+group = parser.add_argument_group('searching')
+aa('--k', default=100, type=int, help='nb of nearest neighbors')
+aa('--searchthreads', default=-1, type=int,
+   help='nb of threads to use at search time')
+aa('--searchparams', nargs='+', default=['autotune'],
+   help="search parameters to use (can be autotune or a list of params)")
+aa('--n_autotune', default=500, type=int,
+   help="max nb of autotune experiments")
+aa('--autotune_max', default=[], nargs='*',
+   help='set max value for autotune variables format "var:val" (exclusive)')
+aa('--autotune_range', default=[], nargs='*',
+   help='set complete autotune range, format "var:val1,val2,..."')
+aa('--min_test_duration', default=0, type=float,
+   help='run test at least for so long to avoid jitter')
+args = parser.parse_args()
+print "args:", args
+os.system('echo -n "nb processors "; '
+          'cat /proc/cpuinfo | grep ^processor | wc -l; '
+          'cat /proc/cpuinfo | grep ^"model name" | tail -1')
+######################################################
+# Load dataset
+######################################################
+xt, xb, xq, gt = datasets.load_data(
+    dataset=args.db, compute_gt=args.compute_gt)
+print "dataset sizes: train %s base %s query %s GT %s" % (
+    xt.shape, xb.shape, xq.shape, gt.shape)
+nq, d = xq.shape
+nb, d = xb.shape
+######################################################
+# Make index
+######################################################
+if args.indexfile and os.path.exists(args.indexfile):
+    print "reading", args.indexfile
+    index = faiss.read_index(args.indexfile)
+    if isinstance(index, faiss.IndexPreTransform):
+        index_ivf = faiss.downcast_index(index.index)
+    else:
+        index_ivf = index
+        assert isinstance(index_ivf, faiss.IndexIVF)
+        vec_transform = lambda x: x
+    assert isinstance(index_ivf, faiss.IndexIVF)
+else:
+    print "build index, key=", args.indexkey
+    index = faiss.index_factory(d, args.indexkey)
+    if isinstance(index, faiss.IndexPreTransform):
+        index_ivf = faiss.downcast_index(index.index)
+        vec_transform = index.chain.at(0).apply_py
+    else:
+        index_ivf = index
+        vec_transform = lambda x:x
+    assert isinstance(index_ivf, faiss.IndexIVF)
+    index_ivf.verbose = True
+    index_ivf.quantizer.verbose = True
+    index_ivf.cp.verbose = True
+    maxtrain = args.maxtrain
+    if maxtrain == 0:
+        if 'IMI' in args.indexkey:
+            maxtrain = int(256 * 2 ** (np.log2(index_ivf.nlist) / 2))
+        else:
+            maxtrain = 50 * index_ivf.nlist
+        print "setting maxtrain to %d" % maxtrain
+        args.maxtrain = maxtrain
+    xt2 = sanitize(xt[:args.maxtrain])
+    assert np.all(np.isfinite(xt2))
+    print "train, size", xt2.shape
+    if args.get_centroids_from == '':
+        if args.clustering_niter >= 0:
+            print ("setting nb of clustering iterations to %d" %
+                   args.clustering_niter)
+            index_ivf.cp.niter = args.clustering_niter
+        if args.train_on_gpu:
+            print "add a training index on GPU"
+            train_index = faiss.index_cpu_to_all_gpus(faiss.IndexFlatL2(d))
+            index_ivf.clustering_index = train_index
+    else:
+        print "Getting centroids from", args.get_centroids_from
+        src_index = faiss.read_index(args.get_centroids_from)
+        src_quant = faiss.downcast_index(src_index.quantizer)
+        centroids = faiss.vector_to_array(src_quant.xb)
+        centroids = centroids.reshape(-1, d)
+        print "  centroid table shape", centroids.shape
+        if isinstance(index, faiss.IndexPreTransform):
+            print "  training vector transform"
+            assert index.chain.size() == 1
+            vt = index.chain.at(0)
+            vt.train(xt2)
+            print "  transform centroids"
+            centroids = vt.apply_py(centroids)
+        print "  add centroids to quantizer"
+        index_ivf.quantizer.add(centroids)
+        del src_index
+    t0 = time.time()
+    index.train(xt2)
+    print "  train in %.3f s" % (time.time() - t0)
+    print "adding"
+    t0 = time.time()
+    if args.add_bs == -1:
+        index.add(sanitize(xb))
+    else:
+        for i0 in range(0, nb, args.add_bs):
+            i1 = min(nb, i0 + args.add_bs)
+            print "  adding %d:%d / %d" % (i0, i1, nb)
+            index.add(sanitize(xb[i0:i1]))
+    print "  add in %.3f s" % (time.time() - t0)
+    if args.indexfile:
+        print "storing", args.indexfile
+        faiss.write_index(index, args.indexfile)
+if args.no_precomputed_tables:
+    if isinstance(index_ivf, faiss.IndexIVFPQ):
+        print "disabling precomputed table"
+        index_ivf.use_precomputed_table = -1
+        index_ivf.precomputed_table.clear()
+if args.indexfile:
+    print "index size on disk: ", os.stat(args.indexfile).st_size
+print "current RSS:", faiss.get_mem_usage_kb() * 1024
+precomputed_table_size = 0
+if hasattr(index_ivf, 'precomputed_table'):
+    precomputed_table_size = index_ivf.precomputed_table.size() * 4
+print "precomputed tables size:", precomputed_table_size
+#############################################################
+# Index is ready
+#############################################################
+xq = sanitize(xq)
+if args.searchthreads != -1:
+    print "Setting nb of threads to", args.searchthreads
+    faiss.omp_set_num_threads(args.searchthreads)
+ps = faiss.ParameterSpace()
+ps.initialize(index)
+parametersets = args.searchparams
+header = '%-40s     R@1   R@10  R@100  time(ms/q)   nb distances #runs' % "parameters"
+def eval_setting(index, xq, gt, min_time):
+    nq = xq.shape[0]
+    ivf_stats = faiss.cvar.indexIVF_stats
+    ivf_stats.reset()
+    nrun = 0
+    t0 = time.time()
+    while True:
+        D, I = index.search(xq, 100)
+        nrun += 1
+        t1 = time.time()
+        if t1 - t0 > min_time:
+            break
+    ms_per_query = ((t1 - t0) * 1000.0 / nq / nrun)
+    for rank in 1, 10, 100:
+        n_ok = (I[:, :rank] == gt[:, :1]).sum()
+        print "%.4f" % (n_ok / float(nq)),
+    print "   %8.3f  " % ms_per_query,
+    print "%12d   " % (ivf_stats.ndis / nrun),
+    print nrun
+if parametersets == ['autotune']:
+    ps.n_experiments = args.n_autotune
+    ps.min_test_duration = args.min_test_duration
+    for kv in args.autotune_max:
+        k, vmax = kv.split(':')
+        vmax = float(vmax)
+        print "limiting %s to %g" % (k, vmax)
+        pr = ps.add_range(k)
+        values = faiss.vector_to_array(pr.values)
+        values = np.array([v for v in values if v < vmax])
+        faiss.copy_array_to_vector(values, pr.values)
+    for kv in args.autotune_range:
+        k, vals = kv.split(':')
+        vals = np.fromstring(vals, sep=',')
+        print "setting %s to %s" % (k, vals)
+        pr = ps.add_range(k)
+        faiss.copy_array_to_vector(vals, pr.values)
+    # setup the Criterion object: optimize for 1-R@1
+    crit = faiss.OneRecallAtRCriterion(nq, 1)
+    # by default, the criterion will request only 1 NN
+    crit.nnn = 100
+    crit.set_groundtruth(None, gt.astype('int64'))
+    # then we let Faiss find the optimal parameters by itself
+    print "exploring operating points"
+    ps.display()
+    t0 = time.time()
+    op = ps.explore(index, xq, crit)
+    print "Done in %.3f s, available OPs:" % (time.time() - t0)
+    op.display()
+    print header
+    opv = op.optimal_pts
+    for i in range(opv.size()):
+        opt = opv.at(i)
+        ps.set_index_parameters(index, opt.key)
+        print "%-40s " % opt.key,
+        sys.stdout.flush()
+        eval_setting(index, xq, gt, args.min_test_duration)
+else:
+    print header
+    for param in parametersets:
+        print "%-40s " % param,
+        sys.stdout.flush()
+        ps.set_index_parameters(index, param)
+        eval_setting(index, xq, gt, args.min_test_duration)
--- a/benchs/bench_all_ivf/datasets.py
+++ b/benchs/bench_all_ivf/datasets.py
+# Copyright (c) 2015-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the BSD+Patents license found in the
+# LICENSE file in the root directory of this source tree.
+#! /usr/bin/env python2
+"""
+Common functions to load datasets and compute their ground-truth
+"""
+import time
+import numpy as np
+import faiss
+import sys
+# set this to the directory that contains the datafiles.
+# deep1b data should be at simdir + 'deep1b'
+# bigann data should be at simdir + 'bigann'
+simdir = '/mnt/vol/gfsai-east/ai-group/datasets/simsearch/'
+#################################################################
+# Small I/O functions
+#################################################################
+def ivecs_read(fname):
+    a = np.fromfile(fname, dtype='int32')
+    d = a[0]
+    return a.reshape(-1, d + 1)[:, 1:].copy()
+def fvecs_read(fname):
+    return ivecs_read(fname).view('float32')
+def ivecs_mmap(fname):
+    a = np.memmap(fname, dtype='int32', mode='r')
+    d = a[0]
+    return a.reshape(-1, d + 1)[:, 1:]
+def fvecs_mmap(fname):
+    return ivecs_mmap(fname).view('float32')
+def bvecs_mmap(fname):
+    x = np.memmap(fname, dtype='uint8', mode='r')
+    d = x[:4].view('int32')[0]
+    return x.reshape(-1, d + 4)[:, 4:]
+def ivecs_write(fname, m):
+    n, d = m.shape
+    m1 = np.empty((n, d + 1), dtype='int32')
+    m1[:, 0] = d
+    m1[:, 1:] = m
+    m1.tofile(fname)
+def fvecs_write(fname, m):
+    m = m.astype('float32')
+    ivecs_write(fname, m.view('int32'))
+#################################################################
+# Dataset
+#################################################################
+def sanitize(x):
+    return np.ascontiguousarray(x, dtype='float32')
+class ResultHeap:
+    """ Combine query results from a sliced dataset """
+    def __init__(self, nq, k):
+        " nq: number of query vectors, k: number of results per query "
+        self.I = np.zeros((nq, k), dtype='int64')
+        self.D = np.zeros((nq, k), dtype='float32')
+        self.nq, self.k = nq, k
+        heaps = faiss.float_maxheap_array_t()
+        heaps.k = k
+        heaps.nh = nq
+        heaps.val = faiss.swig_ptr(self.D)
+        heaps.ids = faiss.swig_ptr(self.I)
+        heaps.heapify()
+        self.heaps = heaps
+    def add_batch_result(self, D, I, i0):
+        assert D.shape == (self.nq, self.k)
+        assert I.shape == (self.nq, self.k)
+        I += i0
+        self.heaps.addn_with_ids(
+            self.k, faiss.swig_ptr(D),
+            faiss.swig_ptr(I), self.k)
+    def finalize(self):
+        self.heaps.reorder()
+def compute_GT_sliced(xb, xq, k):
+    print "compute GT"
+    t0 = time.time()
+    nb, d = xb.shape
+    nq, d = xq.shape
+    rh = ResultHeap(nq, k)
+    bs = 10 ** 5
+    xqs = sanitize(xq)
+    db_gt = faiss.index_cpu_to_all_gpus(faiss.IndexFlatL2(d))
+    # compute ground-truth by blocks of bs, and add to heaps
+    for i0 in range(0, nb, bs):
+        i1 = min(nb, i0 + bs)
+        xsl = sanitize(xb[i0:i1])
+        db_gt.add(xsl)
+        D, I = db_gt.search(xqs, k)
+        rh.add_batch_result(D, I, i0)
+        db_gt.reset()
+        print "\r   %d/%d, %.3f s" % (i0, nb, time.time() - t0),
+        sys.stdout.flush()
+    print
+    rh.finalize()
+    gt_I = rh.I
+    print "GT time: %.3f s" % (time.time() - t0)
+    return gt_I
+def do_compute_gt(xb, xq, k):
+    print "computing GT"
+    nb, d = xb.shape
+    index = faiss.index_cpu_to_all_gpus(faiss.IndexFlatL2(d))
+    if nb < 100 * 1000:
+        print "   add"
+        index.add(np.ascontiguousarray(xb, dtype='float32'))
+        print "   search"
+        D, I = index.search(np.ascontiguousarray(xq, dtype='float32'), k)
+    else:
+        I = compute_GT_sliced(xb, xq, k)
+    return I.astype('int32')
+def load_data(dataset='deep1M', compute_gt=False):
+    print "load data", dataset
+    if dataset == 'sift1M':
+        basedir = simdir + 'sift1M/'
+        xt = fvecs_read(basedir + "sift_learn.fvecs")
+        xb = fvecs_read(basedir + "sift_base.fvecs")
+        xq = fvecs_read(basedir + "sift_query.fvecs")
+        gt = ivecs_read(basedir + "sift_groundtruth.ivecs")
+    elif dataset.startswith('bigann'):
+        basedir = simdir + 'bigann/'
+        dbsize = 1000 if dataset == "bigann1B" else int(dataset[6:-1])
+        xb = bvecs_mmap(basedir + 'bigann_base.bvecs')
+        xq = bvecs_mmap(basedir + 'bigann_query.bvecs')
+        xt = bvecs_mmap(basedir + 'bigann_learn.bvecs')
+        # trim xb to correct size
+        xb = xb[:dbsize * 1000 * 1000]
+        gt = ivecs_read(basedir + 'gnd/idx_%dM.ivecs' % dbsize)
+    elif dataset.startswith("deep"):
+        basedir = simdir + 'deep1b/'
+        szsuf = dataset[4:]
+        if szsuf[-1] == 'M':
+            dbsize = 10 ** 6 * int(szsuf[:-1])
+        elif szsuf == '1B':
+            dbsize = 10 ** 9
+        elif szsuf[-1] == 'k':
+            dbsize = 1000 * int(szsuf[:-1])
+        else:
+            assert False, "did not recognize suffix " + szsuf
+        xt = fvecs_mmap(basedir + "learn.fvecs")
+        xb = fvecs_mmap(basedir + "base.fvecs")
+        xq = fvecs_read(basedir + "deep1B_queries.fvecs")
+        xb = xb[:dbsize]
+        gt_fname = basedir + "%s_groundtruth.ivecs" % dataset
+        if compute_gt:
+            gt = do_compute_gt(xb, xq, 100)
+            print "store", gt_fname
+            ivecs_write(gt_fname, gt)
+        gt = ivecs_read(gt_fname)
+    else:
+        assert False
+    print "dataset %s sizes: B %s Q %s T %s" % (
+        dataset, xb.shape, xq.shape, xt.shape)
+    return xt, xb, xq, gt
+#################################################################
+# Evaluation
+#################################################################
+def evaluate_DI(D, I, gt):
+    nq = gt.shape[0]
+    k = I.shape[1]
+    rank = 1
+    while rank <= k:
+        recall = (I[:, :rank] == gt[:, :1]).sum() / float(nq)
+        print "R@%d: %.4f" % (rank, recall),
+        rank *= 10
+def evaluate(xq, gt, index, k=100, endl=True):
+    t0 = time.time()
+    D, I = index.search(xq, k)
+    t1 = time.time()
+    nq = xq.shape[0]
+    print "\t %8.4f ms per query, " % (
+        (t1 - t0) * 1000.0 / nq),
+    rank = 1
+    while rank <= k:
+        recall = (I[:, :rank] == gt[:, :1]).sum() / float(nq)
+        print "R@%d: %.4f" % (rank, recall),
+        rank *= 10
+    if endl:
+        print
+    return D, I
--- a/benchs/bench_all_ivf/parse_bench_all_ivf.py
+++ b/benchs/bench_all_ivf/parse_bench_all_ivf.py
+# Copyright (c) 2015-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the BSD+Patents license found in the
+# LICENSE file in the root directory of this source tree.
+#! /usr/bin/python2
+import os
+import numpy as np
+from matplotlib import pyplot
+import re
+from argparse import Namespace
+# the directory used in run_on_cluster.bash
+basedir = '/mnt/vol/gfsai-east/ai-group/users/matthijs/bench_all_ivf/'
+logdir = basedir + 'logs/'
+# which plot to output
+db = 'bigann1B'
+code_size = 8
+def unitsize(indexkey):
+    """ size of one vector in the index """
+    mo = re.match('.*,PQ(\\d+)', indexkey)
+    if mo:
+        return int(mo.group(1))
+    if indexkey.endswith('SQ8'):
+        bits_per_d = 8
+    elif indexkey.endswith('SQ4'):
+        bits_per_d = 4
+    elif indexkey.endswith('SQfp16'):
+        bits_per_d = 16
+    else:
+        assert False
+    mo = re.match('PCAR(\\d+),.*', indexkey)
+    if mo:
+        return bits_per_d * int(mo.group(1)) / 8
+    mo = re.match('OPQ\\d+_(\\d+),.*', indexkey)
+    if mo:
+        return bits_per_d * int(mo.group(1)) / 8
+    mo = re.match('RR(\\d+),.*', indexkey)
+    if mo:
+        return bits_per_d * int(mo.group(1)) / 8
+    assert False
+def dbsize_from_name(dbname):
+    sufs = {
+        '1B': 10**9,
+        '100M': 10**8,
+        '10M': 10**7,
+        '1M': 10**6,
+    }
+    for s in sufs:
+        if dbname.endswith(s):
+            return sufs[s]
+    else:
+        assert False
+def keep_latest_stdout(fnames):
+    fnames = [fname for fname in fnames if fname.endswith('.stdout')]
+    fnames.sort()
+    n = len(fnames)
+    fnames2 = []
+    for i, fname in enumerate(fnames):
+        if i + 1 < n and fnames[i + 1][:-8] == fname[:-8]:
+            continue
+        fnames2.append(fname)
+    return fnames2
+def parse_result_file(fname):
+    # print fname
+    st = 0
+    res = []
+    keys = []
+    stats = {}
+    stats['run_version'] = fname[-8]
+    for l in open(fname):
+        if st == 0:
+            if l.startswith('CHRONOS_JOB_INSTANCE_ID'):
+                stats['CHRONOS_JOB_INSTANCE_ID'] = l.split()[-1]
+            if l.startswith('index size on disk:'):
+                stats['index_size'] = int(l.split()[-1])
+            if l.startswith('current RSS:'):
+                stats['RSS'] = int(l.split()[-1])
+            if l.startswith('precomputed tables size:'):
+                stats['tables_size'] = int(l.split()[-1])
+            if l.startswith('Setting nb of threads to'):
+                stats['n_threads'] = int(l.split()[-1])
+            if l.startswith('  add in'):
+                stats['add_time'] = float(l.split()[-2])
+            if l.startswith('args:'):
+                args = eval(l[l.find(' '):])
+                indexkey = args.indexkey
+            elif 'R@1   R@10  R@100' in l:
+                st = 1
+            elif 'index size on disk:' in l:
+                index_size = int(l.split()[-1])
+        elif st == 1:
+            st = 2
+        elif st == 2:
+            fi = l.split()
+            keys.append(fi[0])
+            res.append([float(x) for x in fi[1:]])
+    return indexkey, np.array(res), keys, stats
+# run parsing
+allres = {}
+allstats = {}
+nts = []
+missing = []
+versions = {}
+fnames = keep_latest_stdout(os.listdir(logdir))
+# print fnames
+# filenames are in the form <key>.x.stdout
+# where x is a version number (from a to z)
+# keep only latest version of each name
+for fname in fnames:
+    if not ('db' + db in fname and fname.endswith('.stdout')):
+        continue
+    indexkey, res, _, stats = parse_result_file(logdir + fname)
+    if res.size == 0:
+        missing.append(fname)
+        errorline = open(
+            logdir + fname.replace('.stdout', '.stderr')).readlines()
+        if len(errorline) > 0:
+            errorline = errorline[-1]
+        else:
+            errorline = 'NO STDERR'
+        print fname, stats['CHRONOS_JOB_INSTANCE_ID'], errorline
+    else:
+        if indexkey in allres:
+            if allstats[indexkey]['run_version'] > stats['run_version']:
+                # don't use this run
+                continue
+        n_threads = stats.get('n_threads', 1)
+        nts.append(n_threads)
+        allres[indexkey] = res
+        allstats[indexkey] = stats
+assert len(set(nts)) == 1
+n_threads = nts[0]
+def plot_tradeoffs(allres, code_size, recall_rank):
+    dbsize = dbsize_from_name(db)
+    recall_idx = int(np.log10(recall_rank))
+    bigtab = []
+    names = []
+    for k,v in sorted(allres.items()):
+        if v.ndim != 2: continue
+        us = unitsize(k)
+        if us != code_size: continue
+        perf = v[:, recall_idx]
+        times = v[:, 3]
+        bigtab.append(
+            np.vstack((
+                np.ones(times.size, dtype=int) * len(names),
+                perf, times
+            ))
+        )
+        names.append(k)
+    bigtab = np.hstack(bigtab)
+    perm = np.argsort(bigtab[1, :])
+    bigtab = bigtab[:, perm]
+    times = np.minimum.accumulate(bigtab[2, ::-1])[::-1]
+    selection = np.where(bigtab[2, :] == times)
+    selected_methods = [names[i] for i in
+                        np.unique(bigtab[0, selection].astype(int))]
+    not_selected = list(set(names) - set(selected_methods))
+    print "methods without an optimal OP: ", not_selected
+    nq = 10000
+    pyplot.title('database ' + db + ' code_size=%d' % code_size)
+    # grayed out lines
+    for k in not_selected:
+        v = allres[k]
+        if v.ndim != 2: continue
+        us = unitsize(k)
+        if us != code_size: continue
+        linestyle = (':' if 'PQ' in k else
+                     '-.' if 'SQ4' in k else
+                     '--' if 'SQ8' in k else '-')
+        pyplot.semilogy(v[:, recall_idx], v[:, 3], label=None,
+                        linestyle=linestyle,
+                        marker='o' if 'HNSW' in k else '+',
+                        color='#cccccc', linewidth=0.2)
+    # important methods
+    for k in selected_methods:
+        v = allres[k]
+        if v.ndim != 2: continue
+        us = unitsize(k)
+        if us != code_size: continue
+        stats = allstats[k]
+        tot_size = stats['index_size'] + stats['tables_size']
+        id_size = 8 # 64 bit
+        addt = ''
+        if 'add_time' in stats:
+            add_time = stats['add_time']
+            if add_time > 7200:
+                add_min = add_time / 60
+                addt = ', %dh%02d' % (add_min / 60, add_min % 60)
+            else:
+                add_sec = int(add_time)
+                addt = ', %dm%02d' % (add_sec / 60, add_sec % 60)
+        label = k + ' (size+%.1f%%%s)' % (
+            tot_size / float((code_size + id_size) * dbsize) * 100 - 100,
+            addt)
+        linestyle = (':' if 'PQ' in k else
+                     '-.' if 'SQ4' in k else
+                     '--' if 'SQ8' in k else '-')
+        pyplot.semilogy(v[:, recall_idx], v[:, 3], label=label,
+                        linestyle=linestyle,
+                        marker='o' if 'HNSW' in k else '+')
+    if len(not_selected) == 0:
+        om = ''
+    else:
+        om = '\nomitted:'
+        nc = len(om)
+        for m in not_selected:
+            if nc > 80:
+                om += '\n'
+                nc = 0
+            om += ' ' + m
+            nc += len(m) + 1
+    pyplot.xlabel('1-recall at %d %s' % (recall_rank, om) )
+    pyplot.ylabel('search time per query (ms, %d threads)' % n_threads)
+    pyplot.legend()
+    pyplot.grid()
+    pyplot.savefig('figs/tradeoffs_%s_cs%d_r%d.png' % (
+        db, code_size, recall_rank))
+    return selected_methods, not_selected
+pyplot.gcf().set_size_inches(15, 10)
+plot_tradeoffs(allres, code_size=code_size, recall_rank=1)
--- a/benchs/bench_all_ivf/run_on_cluster_generic.bash
+++ b/benchs/bench_all_ivf/run_on_cluster_generic.bash
+# Copyright (c) 2015-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the BSD+Patents license found in the
+# LICENSE file in the root directory of this source tree.
+# @nolint
+# This script launches the experiments on a cluster
+# It assumes two shell functions are defined:
+#
+#    run_on_1machine: runs a command on one (full) machine on a cluster
+#
+#    run_on_8gpu: runs a command on one machine with 8 GPUs
+#
+# the two functions are called as:
+#
+#    run_on_1machine <name> <command>
+#
+# the stdout of the command should be stored in $logdir/<name>.stdout
+function run_on_1machine () {
+    # To be implemented
+}
+function run_on_1machine () {
+    # To be implemented
+}
+# prepare output directories
+basedir=/mnt/vol/gfsai-east/ai-group/users/matthijs/bench_all_ivf
+logdir=$basedir/logs
+indexdir=$basedir/indexes
+mkdir -p $lars $logdir $indexdir
+############################### 1M experiments
+for db in sift1M deep1M bigann1M; do
+    for coarse in IMI2x9 IMI2x10 IVF1024_HNSW32 IVF4096_HNSW32 IVF16384_HNSW32
+    do
+        for indexkey in \
+            OPQ8_64,$coarse,PQ8 \
+            PCAR16,$coarse,SQ4 \
+            OPQ16_64,$coarse,PQ16 \
+            PCAR32,$coarse,SQ4 \
+            PCAR16,$coarse,SQ8 \
+            OPQ32_128,$coarse,PQ32 \
+            PCAR64,$coarse,SQ4 \
+            PCAR32,$coarse,SQ8 \
+            PCAR16,$coarse,SQfp16 \
+            PCAR64,$coarse,SQ8 \
+            PCAR32,$coarse,SQfp16 \
+            PCAR128,$coarse,SQ4
+        do
+            key=autotune.db$db.${indexkey//,/_}
+            run_on_1machine $key \
+                 python -u bench_all_ivf.py \
+                    --db $db \
+                    --indexkey $indexkey \
+                    --maxtrain 0  \
+                    --indexfile $indexdir/$key.faissindex
+        done
+    done
+done
+############################### 10M experiments
+for db in deep10M bigann10M; do
+    for coarse in \
+        IMI2x10 IMI2x11 IMI2x12 IMI2x13 IVF4096_HNSW32 \
+        IVF16384_HNSW32 IVF65536_HNSW32 IVF262144_HNSW32
+    do
+        for indexkey in \
+            OPQ8_64,$coarse,PQ8 \
+            PCAR16,$coarse,SQ4 \
+            OPQ16_64,$coarse,PQ16 \
+            PCAR32,$coarse,SQ4 \
+            PCAR16,$coarse,SQ8 \
+            OPQ32_128,$coarse,PQ32 \
+            PCAR64,$coarse,SQ4 \
+            PCAR32,$coarse,SQ8 \
+            PCAR16,$coarse,SQfp16 \
+            PCAR64,$coarse,SQ8 \
+            PCAR32,$coarse,SQfp16 \
+            PCAR128,$coarse,SQ4 \
+            OPQ64_128,$coarse,PQ64
+        do
+            key=autotune.db$db.${indexkey//,/_}
+            run_on_1machine $key \
+                 python -u bench_all_ivf.py \
+                    --db $db \
+                    --indexkey $indexkey \
+                    --maxtrain 0  \
+                    --indexfile $indexdir/$key.faissindex \
+                    --searchthreads 16 \
+                    --min_test_duration 3 \
+        done
+    done
+done
+############################### 100M experiments
+for db in deep100M bigann100M; do
+    for coarse in IMI2x11 IMI2x12 IVF65536_HNSW32 IVF262144_HNSW32
+    do
+        for indexkey in \
+            OPQ8_64,$coarse,PQ8 \
+            OPQ16_64,$coarse,PQ16 \
+            PCAR32,$coarse,SQ4 \
+            OPQ32_128,$coarse,PQ32 \
+            PCAR64,$coarse,SQ4 \
+            PCAR32,$coarse,SQ8 \
+            PCAR64,$coarse,SQ8 \
+            PCAR32,$coarse,SQfp16 \
+            PCAR128,$coarse,SQ4 \
+            OPQ64_128,$coarse,PQ64
+        do
+            key=autotune.db$db.${indexkey//,/_}
+            run_on_1machine $key \
+                 python -u bench_all_ivf.py \
+                    --db $db \
+                    --indexkey $indexkey \
+                    --maxtrain 0  \
+                    --indexfile $indexdir/$key.faissindex \
+                    --searchthreads 16 \
+                    --min_test_duration 3 \
+                    --add_bs 1000000
+        done
+    done
+done
+############################### 1B experiments
+for db in deep1B bigann1B; do
+    for coarse in  IMI2x12 IMI2x13 IVF262144_HNSW32
+    do
+        for indexkey in \
+            OPQ8_64,$coarse,PQ8 \
+            OPQ16_64,$coarse,PQ16 \
+            PCAR32,$coarse,SQ4 \
+            OPQ32_128,$coarse,PQ32 \
+            PCAR64,$coarse,SQ4 \
+            PCAR32,$coarse,SQ8 \
+            PCAR64,$coarse,SQ8 \
+            PCAR32,$coarse,SQfp16 \
+            PCAR128,$coarse,SQ4 \
+            PQ64_128,$coarse,PQ64 \
+            RR128,$coarse,SQ4
+        do
+            key=autotune.db$db.${indexkey//,/_}
+            run_on_1machine $key \
+                 python -u bench_all_ivf.py \
+                    --db $db \
+                    --indexkey $indexkey \
+                    --maxtrain 0  \
+                    --indexfile $indexdir/$key.faissindex \
+                    --searchthreads 16 \
+                    --min_test_duration 3 \
+                    --add_bs 1000000
+        done
+    done
+done
+############################################
+# precompute centroids on GPU for large vocabularies
+for db in deep1M bigann1M; do
+    for ncent in 1048576 4194304; do
+        key=clustering.db$db.IVF$ncent
+        run_on_8gpu $key \
+            python -u bench_all_ivf.py \
+                --db $db \
+                --indexkey IVF$ncent,SQ8 \
+                --maxtrain 100000000  \
+                --indexfile $indexdir/$key.faissindex \
+                --searchthreads 16 \
+                --min_test_duration 3 \
+                --add_bs 1000000 \
+                --train_on_gpu
+    done
+done
+#################################
+# Run actual experiment
+for db in deep1B bigann1B; do
+    for ncent in 1048576 4194304; do
+        coarse=IVF${ncent}_HNSW32
+        centroidsname=clustering.db${db/1B/1M}.IVF${ncent}.faissindex
+        for indexkey in \
+            OPQ8_64,$coarse,PQ8 \
+            OPQ16_64,$coarse,PQ16 \
+            PCAR32,$coarse,SQ4 \
+            OPQ32_128,$coarse,PQ32 \
+            PCAR64,$coarse,SQ4 \
+            PCAR32,$coarse,SQ8 \
+            PCAR64,$coarse,SQ8 \
+            PCAR32,$coarse,SQfp16 \
+            OPQ64_128,$coarse,PQ64 \
+            RR128,$coarse,SQ4 \
+            OPQ64_128,$coarse,PQ64 \
+            RR128,$coarse,SQ4
+        do
+            key=autotune.db$db.${indexkey//,/_}
+            run_on_1machine $key.c $key \
+                 python -u bench_all_ivf.py \
+                    --db $db \
+                    --indexkey $indexkey \
+                    --maxtrain 256000  \
+                    --indexfile $indexdir/$key.faissindex \
+                    --get_centroids_from $indexdir/$centroidsname \
+                    --searchthreads 16 \
+                    --min_test_duration 3 \
+                    --add_bs 1000000
+        done
+    done
+done
--- a/benchs/link_and_code/README.md
+++ b/benchs/link_and_code/README.md
@@ -11,14 +11,12 @@ Link & code is an indexing method that combines HNSW indexing with
 compression and exploits the neighborhood structure of the similarity
 graph to improve the reconstruction. It is described in
-```
 @inproceedings{link_and_code,
   author = {Matthijs Douze and Alexandre Sablayrolles and Herv\'e J\'egou},
   title = {Link and code: Fast indexing with graphs and compact regression codes},
   booktitle = {CVPR},
   year = {2018}
 }
-```
 ArXiV [here](https://arxiv.org/abs/1804.09996)

--- a/python/faiss.py
+++ b/python/faiss.py
@@ -18,14 +18,14 @@ import pdb
 # we import * so that the symbol X can be accessed as faiss.X
 try:
-    from .swigfaiss_gpu import *
+    from swigfaiss_gpu import *
 except ImportError as e:
    if 'No module named' not in e.args[0]:
        # swigfaiss_gpu is there but failed to load: Warn user about it.
        sys.stderr.write("Failed to load GPU Faiss: %s\n" % e.args[0])
        sys.stderr.write("Faiss falling back to CPU-only.\n")
-    from .swigfaiss import *
+    from swigfaiss import *
 __version__ = "%d.%d.%d" % (FAISS_VERSION_MAJOR,
                            FAISS_VERSION_MINOR,

--- a/tests/test_index.py
+++ b/tests/test_index.py
@@ -16,7 +16,7 @@ import os
 import re
-from common import get_dataset, get_dataset_2
+from .common import get_dataset, get_dataset_2
 class TestModuleInterface(unittest.TestCase):

--- a/tests/test_index_accuracy.py
+++ b/tests/test_index_accuracy.py
@@ -26,7 +26,7 @@ kprobe = int(np.sqrt(ncentroids))
 nbits = d
 # Parameters for indexes involving PQ
-M = d / 8                # for PQ: #subquantizers
+M = int(d / 8)           # for PQ: #subquantizers
 nbits_per_index = 8      # for PQ
@@ -126,7 +126,6 @@ class IndexAccuracy(unittest.TestCase):
        stats = faiss.cvar.indexPQ_stats
        stats.reset()
        res = ev.launch('Polysemous ht=%d' % index.polysemous_ht,
                        index)
        e_polysemous = ev.evalres(res)
@@ -249,7 +248,7 @@ class TestSQFlavors(unittest.TestCase):
            D, I = index.search(xq, 10)
            ninter = faiss.eval_intersection(I, gt_I)
            print('(%d, %s): %d, ' % (mt, repr(qname), ninter))
-            assert ninter >= self.ref_results[(mt, qname)] - 4
+            assert abs(ninter - self.ref_results[(mt, qname)]) <= 4
            D2, I2 = self.subtest_add2col(xb, xq, index, qname)
@@ -265,10 +264,10 @@ class TestSQFlavors(unittest.TestCase):
 class TestPQFlavors(unittest.TestCase):
-    # run on Sept 6, 2018
+    # run on Dec 14, 2018
    ref_results = {
        (1, True): 800,
-        (1, True, 20): 742,
+        (1, True, 20): 794,
        (1, False): 769,
        (0, True): 831,
        (0, True, 20): 828,
@@ -312,7 +311,7 @@ class TestPQFlavors(unittest.TestCase):
            ninter = faiss.eval_intersection(I, gt_I)
            print('(%d, %s): %d, ' % (mt, by_residual, ninter))
-            assert ninter >= self.ref_results[mt, by_residual] - 2
+            assert abs(ninter - self.ref_results[mt, by_residual]) <= 2
            index.use_precomputed_table = 0
            D2, I2 = index.search(xq, 10)
@@ -412,8 +411,7 @@ class OPQRelativeAccuracy(unittest.TestCase):
        e_oivfpq = ev.evalres(res)
        # verify same on OIVFPQ
-        # Currently disabled because flaky.
+        assert(e_oivfpq[1] > e_ivfpq[1])
-        # self.assertGreater(e_oivfpq[1], e_ivfpq[1])
 if __name__ == '__main__':

--- a/tests/test_lowlevel_ivf.cpp
+++ b/tests/test_lowlevel_ivf.cpp
@@ -379,7 +379,7 @@ void test_lowlevel_access_binary (const char *index_key) {
        printf("]\n");
        // re-order heap
-        heap_reorder<CMax<int32_t, idx_t> > (k, D.data(), I.data());
+        heap_reorder<CMax<int32_t, int64_t> > (k, D.data(), I.data());
        printf("ref: [");
        for (int j = 0; j < k; j++)

--- a/tests/test_sliding_ivf.cpp
+++ b/tests/test_sliding_ivf.cpp
@@ -21,10 +21,6 @@
 using namespace faiss;
-namespace {
 typedef Index::idx_t idx_t;
@@ -220,7 +216,7 @@ int test_sliding_invlists (const char *index_key) {
 }
-}  // namespace
 /*************************************************************

--- a/tests/test_transfer_invlists.cpp
+++ b/tests/test_transfer_invlists.cpp
@@ -8,7 +8,6 @@
 #include <cstdio>
 #include <cstdlib>
-#include <memory>
 #include <gtest/gtest.h>
@@ -21,11 +20,6 @@
 #include <faiss/IVFlib.h>
-using namespace faiss;
-namespace {
 // parameters to use for the test
 int d = 64;
 size_t nb = 1000;
@@ -34,6 +28,8 @@ size_t nt = 500;
 int k = 10;
 int nlist = 40;
+using namespace faiss;
 typedef faiss::Index::idx_t idx_t;
@@ -43,6 +39,10 @@ std::vector<float> get_data (size_t nb, int seed) {
    return x;
 }
 void test_index_type(const char *factory_string) {
    // transfer inverted lists in nslice slices
@@ -147,9 +147,6 @@ void test_index_type(const char *factory_string) {
 }
-}  // namespace
 TEST(TRANS, IVFFlat) {
    test_index_type ("IVF40,Flat");
 }