• Lucas Hosseini's avatar
    Facebook sync (May 2019) + relicense (#838) · a8118acb
    Lucas Hosseini authored
    Changelog:
    
    - changed license: BSD+Patents -> MIT
    - propagates exceptions raised in sub-indexes of IndexShards and IndexReplicas
    - support for searching several inverted lists in parallel (parallel_mode != 0)
    - better support for PQ codes where nbit != 8 or 16
    - IVFSpectralHash implementation: spectral hash codes inside an IVF
    - 6-bit per component scalar quantizer (4 and 8 bit were already supported)
    - combinations of inverted lists: HStackInvertedLists and VStackInvertedLists
    - configurable number of threads for OnDiskInvertedLists prefetching (including 0=no prefetch)
    - more test and demo code compatible with Python 3 (print with parentheses)
    - refactored benchmark code: data loading is now in a single file
    Unverified
    a8118acb
datasets.py 1.07 KB
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from __future__ import print_function
import sys
import time
import numpy as np


def ivecs_read(fname):
    a = np.fromfile(fname, dtype='int32')
    d = a[0]
    return a.reshape(-1, d + 1)[:, 1:].copy()


def fvecs_read(fname):
    return ivecs_read(fname).view('float32')


def load_sift1M():
    print("Loading sift1M...", end='', file=sys.stderr)
    xt = fvecs_read("sift1M/sift_learn.fvecs")
    xb = fvecs_read("sift1M/sift_base.fvecs")
    xq = fvecs_read("sift1M/sift_query.fvecs")
    gt = ivecs_read("sift1M/sift_groundtruth.ivecs")
    print("done", file=sys.stderr)

    return xb, xq, xt, gt


def evaluate(index, xq, gt, k):
    nq = xq.shape[0]
    t0 = time.time()
    D, I = index.search(xq, k)  # noqa: E741
    t1 = time.time()

    recalls = {}
    i = 1
    while i <= k:
        recalls[i] = (I[:, :i] == gt[:, :1]).sum() / float(nq)
        i *= 10

    return (t1 - t0) * 1000.0 / nq, recalls