Commit c5077070 authored by matthijs's avatar matthijs

sync with FB version. Added:

- better selection of training sets for PQ and preprocessing
- GPU parameter object
- IndexIDMap fixed
- fixed redo bug in clustering
parent acb93857
......@@ -104,14 +104,13 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) {
int(nx), d, k, nredo, niter);
idx_t * assign = new idx_t[nx];
float * dis = new float[nx];
float best_err = 1e50;
double t_search_tot = 0;
if (verbose) {
printf(" Preprocessing in %5g s\n",
printf(" Preprocessing in %.2f s\n",
(getmillisecs() - t0)/1000.);
}
t0 = getmillisecs();
......@@ -149,7 +148,7 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) {
if (!index.is_trained)
index.train (k, cur_centroids.data());
FAISS_ASSERT (index.ntotal == 0 );
FAISS_ASSERT (index.ntotal == 0);
index.add (k, cur_centroids.data());
float err = 0;
for (int i = 0; i < niter; i++) {
......@@ -183,16 +182,17 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) {
index.train (k, cur_centroids.data());
assert (index.ntotal == 0);
index.add (k, centroids.data());
index.add (k, cur_centroids.data());
}
if (verbose) printf("\n");
if (nredo > 1) {
if (err < best_err) {
if (verbose)
printf ("Keep new clusters\n");
centroids = cur_centroids;
printf ("Objective improved: keep new clusters\n");
centroids = buf_centroids;
best_err = err;
}
index.reset ();
}
}
......
......@@ -26,7 +26,6 @@ struct ClusteringParameters {
int niter; ///< clustering iterations
int nredo; ///< redo clustering this many times and keep best
bool verbose;
bool spherical; ///< do we want normalized centroids?
bool update_index; ///< update index after each iteration?
......
......@@ -82,8 +82,11 @@ void IndexIVFPQ::train_residual (idx_t n, const float *x)
void IndexIVFPQ::train_residual_o (idx_t n, const float *x, float *residuals_2)
{
idx_t ntrain = pq.ksub * 64;
if(n > ntrain) n = ntrain;
const float * x_in = x;
x = fvecs_maybe_subsample (
d, (size_t*)&n, pq.cp.max_points_per_centroid * pq.ksub,
x, verbose, pq.cp.seed);
const float *trainset;
if (by_residual) {
......@@ -132,6 +135,7 @@ void IndexIVFPQ::train_residual_o (idx_t n, const float *x, float *residuals_2)
precompute_table ();
}
if (x_in != x) delete [] x;
}
......
......@@ -94,8 +94,6 @@ AutoTune.o: AutoTune.cpp AutoTune.h Index.h FaissAssert.h utils.h Heap.h \
IndexFlat.h VectorTransform.h IndexLSH.h IndexPQ.h ProductQuantizer.h \
Clustering.h PolysemousTraining.h IndexIVF.h IndexIVFPQ.h MetaIndexes.h
AuxIndexStructures.o: AuxIndexStructures.cpp AuxIndexStructures.h Index.h
BinaryCode.o: BinaryCode.cpp BinaryCode.h VectorTransform.h Index.h \
FaissAssert.h hamming.h Heap.h
Clustering.o: Clustering.cpp Clustering.h Index.h utils.h Heap.h \
FaissAssert.h IndexFlat.h
hamming.o: hamming.cpp hamming.h Heap.h FaissAssert.h
......@@ -105,7 +103,7 @@ IndexFlat.o: IndexFlat.cpp IndexFlat.h Index.h utils.h Heap.h \
FaissAssert.h
index_io.o: index_io.cpp index_io.h FaissAssert.h IndexFlat.h Index.h \
VectorTransform.h IndexLSH.h IndexPQ.h ProductQuantizer.h Clustering.h \
Heap.h PolysemousTraining.h IndexIVF.h IndexIVFPQ.h
Heap.h PolysemousTraining.h IndexIVF.h IndexIVFPQ.h MetaIndexes.h
IndexIVF.o: IndexIVF.cpp IndexIVF.h Index.h Clustering.h Heap.h utils.h \
hamming.h FaissAssert.h IndexFlat.h AuxIndexStructures.h
IndexIVFPQ.o: IndexIVFPQ.cpp IndexIVFPQ.h IndexIVF.h Index.h Clustering.h \
......@@ -113,12 +111,8 @@ IndexIVFPQ.o: IndexIVFPQ.cpp IndexIVFPQ.h IndexIVF.h Index.h Clustering.h \
IndexFlat.h hamming.h FaissAssert.h AuxIndexStructures.h
IndexLSH.o: IndexLSH.cpp IndexLSH.h Index.h VectorTransform.h utils.h \
Heap.h hamming.h FaissAssert.h
IndexNested.o: IndexNested.cpp IndexNested.h IndexIVF.h Index.h \
Clustering.h Heap.h IndexIVFPQ.h IndexPQ.h ProductQuantizer.h \
PolysemousTraining.h IndexFlat.h FaissAssert.h
IndexPQ.o: IndexPQ.cpp IndexPQ.h Index.h ProductQuantizer.h Clustering.h \
Heap.h PolysemousTraining.h FaissAssert.h hamming.h
MetaIndexes.o: MetaIndexes.cpp MetaIndexes.h Index.h FaissAssert.h Heap.h
PolysemousTraining.o: PolysemousTraining.cpp PolysemousTraining.h \
ProductQuantizer.h Clustering.h Index.h Heap.h utils.h hamming.h \
......@@ -131,6 +125,7 @@ VectorTransform.o: VectorTransform.cpp VectorTransform.h Index.h utils.h \
Heap.h FaissAssert.h IndexPQ.h ProductQuantizer.h Clustering.h \
PolysemousTraining.h
clean:
rm -f $(LIBNAME).a $(LIBNAME).$(SHAREDEXT)* *.o \
lua/swigfaiss.so lua/swigfaiss_wrap.cxx \
......
......@@ -49,7 +49,7 @@ struct IndexIDMap : Index {
virtual void set_typename () override;
virtual ~IndexIDMap ();
IndexIDMap () {own_fields=false; index=nullptr; }
};
/** Index that concatenates the results from several sub-indexes
......
......@@ -95,7 +95,7 @@ void VectorTransform::reverse_transform (
LinearTransform::LinearTransform (int d_in, int d_out,
bool have_bias):
VectorTransform (d_in, d_out), have_bias (have_bias),
max_points_per_d (1 << 20), verbose (false)
verbose (false)
{}
void LinearTransform::apply_noalloc (Index::idx_t n, const float * x,
......@@ -152,27 +152,6 @@ void LinearTransform::transform_transpose (idx_t n, const float * y,
if (have_bias) delete [] y;
}
const float * LinearTransform::maybe_subsample_train_set (
Index::idx_t *n, const float *x)
{
if (*n <= max_points_per_d * d_in) return x;
size_t n2 = max_points_per_d * d_in;
if (verbose) {
printf (" Input training set too big, sampling "
"%ld / %ld vectors\n", n2, *n);
}
std::vector<int> subset (*n);
rand_perm (subset.data (), *n, 1234);
float *x_subset = new float[n2 * d_in];
for (long i = 0; i < n2; i++)
memcpy (&x_subset[i * d_in],
&x[subset[i] * size_t(d_in)],
sizeof (x[0]) * d_in);
*n = n2;
return x_subset;
}
/*********************************************
* RandomRotationMatrix
......@@ -228,7 +207,8 @@ void PCAMatrix::train (Index::idx_t n, const float *x)
{
const float * x_in = x;
x = maybe_subsample_train_set(&n, x);
x = fvecs_maybe_subsample (d_in, (size_t*)&n,
max_points_per_d * d_in, x, verbose);
// compute mean
mean.clear(); mean.resize(d_in, 0.0);
......@@ -461,7 +441,8 @@ OPQMatrix::OPQMatrix (int d, int M, int d2):
verbose(false)
{
is_trained = false;
max_points_per_d = 1000;
// OPQ is quite expensive to train, so set this right.
max_train_points = 256 * 256;
}
......@@ -471,7 +452,8 @@ void OPQMatrix::train (Index::idx_t n, const float *x)
const float * x_in = x;
x = maybe_subsample_train_set (&n, x);
x = fvecs_maybe_subsample (d_in, (size_t*)&n,
max_train_points, x, verbose);
// To support d_out > d_in, we pad input vectors with 0s to d_out
size_t d = d_out <= d_in ? d_in : d_out;
......
......@@ -100,13 +100,8 @@ struct LinearTransform: VectorTransform {
void transform_transpose (idx_t n, const float * y,
float *x) const;
// ratio between # training vectors and dimension
size_t max_points_per_d;
bool verbose;
// subsamples training set if there are too many vectors
const float *maybe_subsample_train_set (Index::idx_t *n, const float *x);
virtual ~LinearTransform () {}
......@@ -146,6 +141,9 @@ struct PCAMatrix: LinearTransform {
/// random rotation after PCA
bool random_rotation;
/// ratio between # training vectors and dimension
size_t max_points_per_d;
/// try to distribute output eigenvectors in this many bins
int balanced_bins;
......@@ -191,8 +189,9 @@ struct OPQMatrix: LinearTransform {
int niter; ///< Number of outer training iterations
int niter_pq; ///< Number of training iterations for the PQ
int niter_pq_0; ///< same, for the first outer iteration
/// if there are too many training points, resample
int max_points_per_d;
size_t max_train_points;
bool verbose;
/// if d2 != -1, output vectors of this dimension
......
......@@ -52,7 +52,10 @@ res = faiss.StandardGpuResources()
print "============ Exact search"
index = faiss.GpuIndexFlatL2(res, 0, d, False)
flat_config = faiss.GpuIndexFlatConfig()
flat_config.device = 0
index = faiss.GpuIndexFlatL2(res, d, flat_config)
print "add vectors to index"
......
......@@ -55,12 +55,17 @@ def train_kmeans(x, k, ngpu):
res = [faiss.StandardGpuResources() for i in range(ngpu)]
useFloat16 = False
flat_config = []
for i in range(ngpu):
cfg = faiss.GpuIndexFlatConfig()
cfg.useFloat16 = False
cfg.device = i
flat_config.append(cfg)
if ngpu == 1:
index = faiss.GpuIndexFlatL2(res[0], 0, d, useFloat16)
index = faiss.GpuIndexFlatL2(res[0], d, flat_config[0])
else:
indexes = [faiss.GpuIndexFlatL2(res[i], i, d, useFloat16)
indexes = [faiss.GpuIndexFlatL2(res[i], d, flat_config[i])
for i in range(ngpu)]
index = faiss.IndexProxy()
for sub_index in indexes:
......
......@@ -65,6 +65,7 @@ GpuClonerOptions::GpuClonerOptions():
useFloat16(false),
usePrecomputed(true),
reserveVecs(0),
storeTransposed(false),
verbose(0)
{}
......@@ -79,7 +80,12 @@ struct ToGpuCloner: faiss::Cloner, GpuClonerOptions {
Index *clone_Index(const Index *index) override {
if(auto ifl = dynamic_cast<const IndexFlat *>(index)) {
return new GpuIndexFlat(resources, device, useFloat16, ifl);
GpuIndexFlatConfig config;
config.device = device;
config.useFloat16 = useFloat16;
config.storeTransposed = storeTransposed;
return new GpuIndexFlat(resources, ifl, config);
} else if(auto ifl = dynamic_cast<const faiss::IndexIVFFlat *>(index)) {
GpuIndexIVFFlat *res =
new GpuIndexIVFFlat(resources,
......
......@@ -40,6 +40,8 @@ struct GpuClonerOptions {
bool usePrecomputed;
/// reserve vectors in the invfiles?
long reserveVecs;
/// For GpuIndexFlat, store data in transposed layout?
bool storeTransposed;
int verbose;
GpuClonerOptions ();
};
......
......@@ -31,31 +31,30 @@ constexpr size_t kMinPagedQuerySize = (size_t) 256 * 1024 * 1024;
constexpr size_t kNonPinnedPageSize = (size_t) 256 * 1024 * 1024;
GpuIndexFlat::GpuIndexFlat(GpuResources* resources,
int device,
bool useFloat16,
const faiss::IndexFlat* index) :
GpuIndex(resources, device, index->d, index->metric_type),
const faiss::IndexFlat* index,
GpuIndexFlatConfig config) :
GpuIndex(resources, config.device, index->d, index->metric_type),
minPagedSize_(kMinPagedQuerySize),
useFloat16_(useFloat16),
config_(config),
data_(nullptr) {
copyFrom(index);
}
GpuIndexFlat::GpuIndexFlat(GpuResources* resources,
int device,
int dims,
bool useFloat16,
faiss::MetricType metric) :
GpuIndex(resources, device, dims, metric),
faiss::MetricType metric,
GpuIndexFlatConfig config) :
GpuIndex(resources, config.device, dims, metric),
minPagedSize_(kMinPagedQuerySize),
useFloat16_(useFloat16),
config_(config),
data_(nullptr) {
DeviceScope scope(device_);
data_ = new FlatIndex(resources,
dims,
metric == faiss::METRIC_L2,
useFloat16);
config_.useFloat16,
config_.storeTransposed);
}
GpuIndexFlat::~GpuIndexFlat() {
......@@ -74,7 +73,7 @@ GpuIndexFlat::getMinPagingSize() const {
bool
GpuIndexFlat::getUseFloat16() const {
return useFloat16_;
return config_.useFloat16;
}
void
......@@ -93,7 +92,8 @@ GpuIndexFlat::copyFrom(const faiss::IndexFlat* index) {
data_ = new FlatIndex(resources_,
this->d,
index->metric_type == faiss::METRIC_L2,
useFloat16_);
config_.useFloat16,
config_.storeTransposed);
// The index could be empty
if (index->ntotal > 0) {
......@@ -117,7 +117,7 @@ GpuIndexFlat::copyTo(faiss::IndexFlat* index) const {
auto stream = resources_->getDefaultStream(device_);
if (this->ntotal > 0) {
if (useFloat16_) {
if (config_.useFloat16) {
auto vecFloat32 = data_->getVectorsFloat32Copy(stream);
fromDevice(vecFloat32, index->xb.data(), stream);
} else {
......@@ -444,7 +444,7 @@ GpuIndexFlat::reconstruct(faiss::Index::idx_t key,
FAISS_ASSERT(key < this->ntotal);
auto stream = resources_->getDefaultStream(device_);
if (useFloat16_) {
if (config_.useFloat16) {
auto vec = data_->getVectorsFloat32Copy(key, 1, stream);
fromDevice(vec.data(), out, this->d, stream);
} else {
......@@ -463,7 +463,7 @@ GpuIndexFlat::reconstruct_n(faiss::Index::idx_t i0,
FAISS_ASSERT(i0 + num - 1 < this->ntotal);
auto stream = resources_->getDefaultStream(device_);
if (useFloat16_) {
if (config_.useFloat16) {
auto vec = data_->getVectorsFloat32Copy(i0, num, stream);
fromDevice(vec.data(), out, num * this->d, stream);
} else {
......@@ -486,17 +486,15 @@ GpuIndexFlat::set_typename() {
//
GpuIndexFlatL2::GpuIndexFlatL2(GpuResources* resources,
int device,
bool useFloat16,
faiss::IndexFlatL2* index) :
GpuIndexFlat(resources, device, useFloat16, index) {
faiss::IndexFlatL2* index,
GpuIndexFlatConfig config) :
GpuIndexFlat(resources, index, config) {
}
GpuIndexFlatL2::GpuIndexFlatL2(GpuResources* resources,
int device,
int dims,
bool useFloat16) :
GpuIndexFlat(resources, device, dims, useFloat16, faiss::METRIC_L2) {
GpuIndexFlatConfig config) :
GpuIndexFlat(resources, dims, faiss::METRIC_L2, config) {
}
void
......@@ -514,18 +512,15 @@ GpuIndexFlatL2::copyTo(faiss::IndexFlatL2* index) {
//
GpuIndexFlatIP::GpuIndexFlatIP(GpuResources* resources,
int device,
bool useFloat16,
faiss::IndexFlatIP* index) :
GpuIndexFlat(resources, device, useFloat16, index) {
faiss::IndexFlatIP* index,
GpuIndexFlatConfig config) :
GpuIndexFlat(resources, index, config) {
}
GpuIndexFlatIP::GpuIndexFlatIP(GpuResources* resources,
int device,
int dims,
bool useFloat16) :
GpuIndexFlat(resources, device, dims, useFloat16,
faiss::METRIC_INNER_PRODUCT) {
GpuIndexFlatConfig config) :
GpuIndexFlat(resources, dims, faiss::METRIC_INNER_PRODUCT, config) {
}
void
......
......@@ -25,6 +25,18 @@ namespace faiss { namespace gpu {
struct FlatIndex;
struct GpuIndexFlatConfig {
inline GpuIndexFlatConfig()
: device(0),
useFloat16(false),
storeTransposed(false) {
}
int device;
bool useFloat16;
bool storeTransposed;
};
/// Wrapper around the GPU implementation that looks like
/// faiss::IndexFlat; copies over centroid data from a given
/// faiss::IndexFlat
......@@ -33,16 +45,14 @@ class GpuIndexFlat : public GpuIndex {
/// Construct from a pre-existing faiss::IndexFlat instance, copying
/// data over to the given GPU
GpuIndexFlat(GpuResources* resources,
int device,
bool useFloat16,
const faiss::IndexFlat* index);
const faiss::IndexFlat* index,
GpuIndexFlatConfig config = GpuIndexFlatConfig());
/// Construct an empty instance that can be added to
GpuIndexFlat(GpuResources* resources,
int device,
int dims,
bool useFloat16,
faiss::MetricType metric);
faiss::MetricType metric,
GpuIndexFlatConfig config = GpuIndexFlatConfig());
~GpuIndexFlat() override;
......@@ -118,8 +128,7 @@ class GpuIndexFlat : public GpuIndex {
/// Size above which we page copies from the CPU to GPU
size_t minPagedSize_;
/// Whether or not we store our vectors in float32 or float16
const bool useFloat16_;
const GpuIndexFlatConfig config_;
/// Holds our GPU data containing the list of vectors
FlatIndex* data_;
......@@ -133,15 +142,13 @@ class GpuIndexFlatL2 : public GpuIndexFlat {
/// Construct from a pre-existing faiss::IndexFlatL2 instance, copying
/// data over to the given GPU
GpuIndexFlatL2(GpuResources* resources,
int device,
bool useFloat16,
faiss::IndexFlatL2* index);
faiss::IndexFlatL2* index,
GpuIndexFlatConfig config = GpuIndexFlatConfig());
/// Construct an empty instance that can be added to
GpuIndexFlatL2(GpuResources* resources,
int device,
int dims,
bool useFloat16);
GpuIndexFlatConfig config = GpuIndexFlatConfig());
/// Initialize ourselves from the given CPU index; will overwrite
/// all data in ourselves
......@@ -160,15 +167,13 @@ class GpuIndexFlatIP : public GpuIndexFlat {
/// Construct from a pre-existing faiss::IndexFlatIP instance, copying
/// data over to the given GPU
GpuIndexFlatIP(GpuResources* resources,
int device,
bool useFloat16,
faiss::IndexFlatIP* index);
faiss::IndexFlatIP* index,
GpuIndexFlatConfig config = GpuIndexFlatConfig());
/// Construct an empty instance that can be added to
GpuIndexFlatIP(GpuResources* resources,
int device,
int dims,
bool useFloat16);
GpuIndexFlatConfig config = GpuIndexFlatConfig());
/// Initialize ourselves from the given CPU index; will overwrite
/// all data in ourselves
......
......@@ -82,14 +82,17 @@ GpuIndexIVF::init_() {
if (!quantizer_) {
// Construct an empty quantizer
GpuIndexFlatConfig config;
config.device = device_;
config.useFloat16 = useFloat16CoarseQuantizer_;
config.storeTransposed = false;
if (this->metric_type == faiss::METRIC_L2) {
// FIXME: 2 different float16 options?
quantizer_ = new GpuIndexFlatL2(resources_, device_, this->d,
useFloat16CoarseQuantizer_);
quantizer_ = new GpuIndexFlatL2(resources_, this->d, config);
} else if (this->metric_type == faiss::METRIC_INNER_PRODUCT) {
// FIXME: 2 different float16 options?
quantizer_ = new GpuIndexFlatIP(resources_, device_, this->d,
useFloat16CoarseQuantizer_);
quantizer_ = new GpuIndexFlatIP(resources_, this->d, config);
} else {
// unknown metric type
FAISS_ASSERT(false);
......@@ -131,14 +134,17 @@ GpuIndexIVF::copyFrom(const faiss::IndexIVF* index) {
delete quantizer_;
quantizer_ = nullptr;
GpuIndexFlatConfig config;
config.device = device_;
config.useFloat16 = useFloat16CoarseQuantizer_;
config.storeTransposed = false;
if (index->metric_type == faiss::METRIC_L2) {
// FIXME: 2 different float16 options?
quantizer_ = new GpuIndexFlatL2(resources_, device_, this->d,
useFloat16CoarseQuantizer_);
quantizer_ = new GpuIndexFlatL2(resources_, this->d, config);
} else if (index->metric_type == faiss::METRIC_INNER_PRODUCT) {
// FIXME: 2 different float16 options?
quantizer_ = new GpuIndexFlatIP(resources_, device_, this->d,
useFloat16CoarseQuantizer_);
quantizer_ = new GpuIndexFlatIP(resources_, this->d, config);
} else {
// unknown metric type
FAISS_ASSERT(false);
......
......@@ -92,8 +92,8 @@ IndexProxy::runOnIndex(std::function<void(faiss::Index*)> f) {
}
// Blocking wait for completion
for (auto& f : v) {
f.get();
for (auto& func : v) {
func.get();
}
}
......@@ -183,7 +183,8 @@ IndexProxy::set_typename() {
float kmeans_clustering_gpu (int ngpu, size_t d, size_t n, size_t k,
const float *x,
float *centroids,
bool useFloat16)
bool useFloat16,
bool storeTransposed)
{
Clustering clus (d, k);
// display logs if > 16Gflop per iteration
......@@ -194,8 +195,15 @@ float kmeans_clustering_gpu (int ngpu, size_t d, size_t n, size_t k,
std::vector<std::unique_ptr<GpuIndexFlatL2> > sub_indices;
for(int dev_no = 0; dev_no < ngpu; dev_no++) {
res.emplace_back(new StandardGpuResources());
GpuIndexFlatConfig config;
config.device = dev_no;
config.useFloat16 = useFloat16;
config.storeTransposed = storeTransposed;
sub_indices.emplace_back(
new GpuIndexFlatL2(res.back().get(), dev_no, d, useFloat16));
new GpuIndexFlatL2(res.back().get(), d, config));
}
IndexProxy proxy;
......
......@@ -98,7 +98,8 @@ class IndexProxy : public faiss::Index {
float kmeans_clustering_gpu (int ngpu, size_t d, size_t n, size_t k,
const float *x,
float *centroids,
bool useFloat16);
bool useFloat16,
bool storeTransposed);
......
This diff is collapsed.
......@@ -119,7 +119,8 @@ StandardGpuResources::initializeForDevice(int device) {
auto& prop = getDeviceProperties(device);
// Also check to make sure we meet our minimum compute capability (3.5)
FAISS_ASSERT(prop.major > 3 || (prop.major == 3 && prop.minor >= 5));
FAISS_ASSERT(prop.major > 3 || (prop.major == 3 && prop.minor >= 5) ||
!"Device not supported, need 3.5+ compute capability");
// Create streams
cudaStream_t defaultStream = 0;
......
......@@ -33,6 +33,7 @@ constexpr int kDefaultTileSize = 256;
template <typename T>
void runL2Distance(GpuResources* resources,
Tensor<T, 2, true>& centroids,
Tensor<T, 2, true>* centroidsTransposed,
Tensor<T, 1, true>* centroidNorms,
Tensor<T, 2, true>& queries,
int k,
......@@ -132,7 +133,8 @@ void runL2Distance(GpuResources* resources,
// (query id x dim) x (centroid id, dim)' = (query id, centroid id)
runMatrixMult(distanceBufView, false,
queryView, false,
centroids, true,
centroidsTransposed ? *centroidsTransposed : centroids,
centroidsTransposed ? false : true,
-2.0f, 0.0f,
resources->getBlasHandleCurrentDevice(),
streams[curStream]);
......@@ -164,6 +166,7 @@ void runL2Distance(GpuResources* resources,
template <typename T>
void runIPDistance(GpuResources* resources,
Tensor<T, 2, true>& centroids,
Tensor<T, 2, true>* centroidsTransposed,
Tensor<T, 2, true>& queries,
int k,
Tensor<T, 2, true>& outDistances,
......@@ -236,7 +239,9 @@ void runIPDistance(GpuResources* resources,
// (query id x dim) x (centroid id, dim)' = (query id, centroid id)
runMatrixMult(distanceBufView, false,
queryView, false, centroids, true,
queryView, false,
centroidsTransposed ? *centroidsTransposed : centroids,
centroidsTransposed ? false : true,
1.0f, 0.0f,
resources->getBlasHandleCurrentDevice(),
streams[curStream]);
......@@ -261,6 +266,7 @@ void runIPDistance(GpuResources* resources,
void
runIPDistance(GpuResources* resources,
Tensor<float, 2, true>& vectors,
Tensor<float, 2, true>* vectorsTransposed,
Tensor<float, 2, true>& queries,
int k,
Tensor<float, 2, true>& outDistances,
......@@ -268,6 +274,7 @@ runIPDistance(GpuResources* resources,
int tileSize) {
runIPDistance<float>(resources,
vectors,
vectorsTransposed,
queries,
k,
outDistances,
......@@ -279,6 +286,7 @@ runIPDistance(GpuResources* resources,
void
runIPDistance(GpuResources* resources,
Tensor<half, 2, true>& vectors,
Tensor<half, 2, true>* vectorsTransposed,
Tensor<half, 2, true>& queries,
int k,
Tensor<half, 2, true>& outDistances,
......@@ -286,6 +294,7 @@ runIPDistance(GpuResources* resources,
int tileSize) {
runIPDistance<half>(resources,
vectors,
vectorsTransposed,
queries,
k,
outDistances,
......@@ -297,6 +306,7 @@ runIPDistance(GpuResources* resources,
void
runL2Distance(GpuResources* resources,
Tensor<float, 2, true>& vectors,
Tensor<float, 2, true>* vectorsTransposed,
Tensor<float, 1, true>* vectorNorms,
Tensor<float, 2, true>& queries,
int k,
......@@ -306,6 +316,7 @@ runL2Distance(GpuResources* resources,
int tileSize) {
runL2Distance<float>(resources,
vectors,
vectorsTransposed,
vectorNorms,
queries,
k,
......@@ -319,6 +330,7 @@ runL2Distance(GpuResources* resources,
void
runL2Distance(GpuResources* resources,
Tensor<half, 2, true>& vectors,
Tensor<half, 2, true>* vectorsTransposed,
Tensor<half, 1, true>* vectorNorms,
Tensor<half, 2, true>& queries,
int k,
......@@ -328,6 +340,7 @@ runL2Distance(GpuResources* resources,
int tileSize) {
runL2Distance<half>(resources,
vectors,
vectorsTransposed,
vectorNorms,
queries,
k,
......
......@@ -22,6 +22,7 @@ class GpuResources;
/// `queries`, returning the k closest results seen
void runL2Distance(GpuResources* resources,
Tensor<float, 2, true>& vectors,
Tensor<float, 2, true>* vectorsTransposed,
// can be optionally pre-computed; nullptr if we
// have to compute it upon the call
Tensor<float, 1, true>* vectorNorms,
......@@ -41,6 +42,7 @@ void runL2Distance(GpuResources* resources,
/// and `queries`, returning the k closest results seen
void runIPDistance(GpuResources* resources,
Tensor<float, 2, true>& vectors,
Tensor<float, 2, true>* vectorsTransposed,
Tensor<float, 2, true>& queries,
int k,
Tensor<float, 2, true>& outDistances,
......@@ -53,6 +55,7 @@ void runIPDistance(GpuResources* resources,
#ifdef FAISS_USE_FLOAT16
void runIPDistance(GpuResources* resources,
Tensor<half, 2, true>& vectors,
Tensor<half, 2, true>* vectorsTransposed,
Tensor<half, 2, true>& queries,
int k,
Tensor<half, 2, true>& outDistances,
......@@ -61,6 +64,7 @@ void runIPDistance(GpuResources* resources,
void runL2Distance(GpuResources* resources,
Tensor<half, 2, true>& vectors,
Tensor<half, 2, true>* vectorsTransposed,
Tensor<half, 1, true>* vectorNorms,
Tensor<half, 2, true>& queries,
int k,
......
......@@ -14,16 +14,19 @@
#include "L2Norm.cuh"
#include "../utils/CopyUtils.cuh"
#include "../utils/DeviceUtils.h"
#include "../utils/Transpose.cuh"
namespace faiss { namespace gpu {
FlatIndex::FlatIndex(GpuResources* res,
int dim,
bool l2Distance,
bool useFloat16) :
bool useFloat16,
bool storeTransposed) :
resources_(res),
dim_(dim),
useFloat16_(useFloat16),
storeTransposed_(storeTransposed),
l2Distance_(l2Distance),
num_(0) {
#ifndef FAISS_USE_FLOAT16
......@@ -92,7 +95,7 @@ FlatIndex::getVectorsFloat32Copy(int from, int num, cudaStream_t stream) {
}
void
FlatIndex::query(Tensor<float, 2, true>& vecs,
FlatIndex::query(Tensor<float, 2, true>& input,
int k,
Tensor<float, 2, true>& outDistances,
Tensor<int, 2, true>& outIndices,
......@@ -104,12 +107,12 @@ FlatIndex::query(Tensor<float, 2, true>& vecs,
if (useFloat16_) {
// We need to convert to float16
#ifdef FAISS_USE_FLOAT16
auto vecsHalf = toHalf<2>(resources_, stream, vecs);
auto inputHalf = toHalf<2>(resources_, stream, input);
DeviceTensor<half, 2, true> outDistancesHalf(
mem, {outDistances.getSize(0), outDistances.getSize(1)}, stream);
query(vecsHalf, k, outDistancesHalf, outIndices, exactDistance, tileSize);
query(inputHalf, k, outDistancesHalf, outIndices, exactDistance, tileSize);
if (exactDistance) {
// Convert outDistances back
......@@ -120,8 +123,9 @@ FlatIndex::query(Tensor<float, 2, true>& vecs,
if (l2Distance_) {
runL2Distance(resources_,
vectors_,
storeTransposed_ ? &vectorsTransposed_ : nullptr,
&norms_,
vecs,
input,
k,
outDistances,
outIndices,
......@@ -131,7 +135,8 @@ FlatIndex::query(Tensor<float, 2, true>& vecs,
} else {
runIPDistance(resources_,
vectors_,
vecs,
storeTransposed_ ? &vectorsTransposed_ : nullptr,
input,
k,
outDistances,
outIndices,
......@@ -142,7 +147,7 @@ FlatIndex::query(Tensor<float, 2, true>& vecs,
#ifdef FAISS_USE_FLOAT16
void
FlatIndex::query(Tensor<half, 2, true>& vecs,
FlatIndex::query(Tensor<half, 2, true>& input,
int k,
Tensor<half, 2, true>& outDistances,
Tensor<int, 2, true>& outIndices,
......@@ -153,8 +158,9 @@ FlatIndex::query(Tensor<half, 2, true>& vecs,
if (l2Distance_) {
runL2Distance(resources_,
vectorsHalf_,
storeTransposed_ ? &vectorsHalfTransposed_ : nullptr,
&normsHalf_,
vecs,
input,
k,
outDistances,
outIndices,
......@@ -164,7 +170,8 @@ FlatIndex::query(Tensor<half, 2, true>& vecs,
} else {
runIPDistance(resources_,
vectorsHalf_,
vecs,
storeTransposed_ ? &vectorsHalfTransposed_ : nullptr,
input,
k,
outDistances,
outIndices,
......@@ -215,6 +222,20 @@ FlatIndex::add(const float* data, int numVecs, cudaStream_t stream) {
vectors_ = std::move(vectors);
}
if (storeTransposed_) {
if (useFloat16_) {
#ifdef FAISS_USE_FLOAT16
vectorsHalfTransposed_ =
std::move(DeviceTensor<half, 2, true>({dim_, (int) num_}));
runTransposeAny(vectorsHalf_, 0, 1, vectorsHalfTransposed_, stream);
#endif
} else {
vectorsTransposed_ =
std::move(DeviceTensor<float, 2, true>({dim_, (int) num_}));
runTransposeAny(vectors_, 0, 1, vectorsTransposed_, stream);
}
}
if (l2Distance_) {
// Precompute L2 norms of our database
if (useFloat16_) {
......
......@@ -25,7 +25,8 @@ class FlatIndex {
FlatIndex(GpuResources* res,
int dim,
bool l2Distance,
bool useFloat16);
bool useFloat16,
bool storeTransposed);
bool getUseFloat16() const;
......@@ -84,6 +85,10 @@ class FlatIndex {
/// Float16 data format
const bool useFloat16_;
/// Store vectors in transposed layout for speed; makes addition to
/// the index slower
const bool storeTransposed_;
/// L2 or inner product distance?
bool l2Distance_;
......@@ -95,10 +100,12 @@ class FlatIndex {
/// Vectors currently in rawData_
DeviceTensor<float, 2, true> vectors_;
DeviceTensor<float, 2, true> vectorsTransposed_;
#ifdef FAISS_USE_FLOAT16
/// Vectors currently in rawData_, float16 form
DeviceTensor<half, 2, true> vectorsHalf_;
DeviceTensor<half, 2, true> vectorsHalfTransposed_;
#endif
/// Precomputed L2 norms
......
......@@ -187,6 +187,7 @@ IVFPQ::classifyAndAddVectors(Tensor<float, 2, true>& vecs,
runL2Distance(resources_,
pqCentroidsMiddleCodeView,
nullptr, // no transposed storage
nullptr, // no precomputed norms
residualsTransposeView,
1,
......
......@@ -31,6 +31,7 @@ DEFINE_int32(dim, 128, "# of dimensions");
DEFINE_int32(num_queries, 3, "number of query vectors");
DEFINE_bool(diff, true, "show exact distance + index output discrepancies");
DEFINE_bool(use_float16, false, "use encodings in float16 instead of float32");
DEFINE_bool(transposed, false, "store vectors transposed");
DEFINE_int64(seed, -1, "specify random seed");
DEFINE_int32(num_gpus, 1, "number of gpus to use");
DEFINE_int64(pinned_mem, 0, "pinned memory allocation to use");
......@@ -38,7 +39,7 @@ DEFINE_int64(pinned_mem, 0, "pinned memory allocation to use");
using namespace faiss::gpu;
int main(int argc, char** argv) {
google::ParseCommandLineFlags(&argc, &argv, true);
gflags::ParseCommandLineFlags(&argc, &argv, true);
cudaProfilerStop();
......@@ -59,6 +60,7 @@ int main(int argc, char** argv) {
printf("L2 lookup: %d queries, total k %d\n",
numQueries, FLAGS_k);
printf("float16 encoding %s\n", FLAGS_use_float16 ? "enabled" : "disabled");
printf("transposed storage %s\n", FLAGS_transposed ? "enabled" : "disabled");
// Convert to GPU index
printf("Copying index to %d GPU(s)...\n", FLAGS_num_gpus);
......@@ -68,8 +70,13 @@ int main(int argc, char** argv) {
((faiss::gpu::StandardGpuResources*) res)->setPinnedMemory(
FLAGS_pinned_mem);
GpuIndexFlatConfig config;
config.device = dev;
config.useFloat16 = FLAGS_use_float16;
config.storeTransposed = FLAGS_transposed;
auto p = std::unique_ptr<faiss::gpu::GpuIndexFlatL2>(
new faiss::gpu::GpuIndexFlatL2(res, dev, FLAGS_use_float16, index.get()));
new faiss::gpu::GpuIndexFlatL2(res, index.get(), config));
return p;
};
......
......@@ -41,7 +41,7 @@ DEFINE_int32(index, 2, "0 = no indices on GPU; 1 = 32 bit, 2 = 64 bit on GPU");
using namespace faiss::gpu;
int main(int argc, char** argv) {
google::ParseCommandLineFlags(&argc, &argv, true);
gflags::ParseCommandLineFlags(&argc, &argv, true);
cudaProfilerStop();
......
......@@ -41,7 +41,7 @@ DEFINE_int32(index, 2, "0 = no indices on GPU; 1 = 32 bit, 2 = 64 bit on GPU");
using namespace faiss::gpu;
int main(int argc, char** argv) {
google::ParseCommandLineFlags(&argc, &argv, true);
gflags::ParseCommandLineFlags(&argc, &argv, true);
CUDA_VERIFY(cudaProfilerStop());
......
......@@ -34,7 +34,7 @@ DEFINE_int32(index, 2, "0 = no indices on GPU; 1 = 32 bit, 2 = 64 bit on GPU");
using namespace faiss::gpu;
int main(int argc, char** argv) {
google::ParseCommandLineFlags(&argc, &argv, true);
gflags::ParseCommandLineFlags(&argc, &argv, true);
auto seed = time(nullptr);
auto k = FLAGS_k;
......
......@@ -28,6 +28,7 @@ DEFINE_int32(dim, 128, "# of dimensions");
DEFINE_int32(niter, 10, "# of iterations");
DEFINE_bool(L2_metric, true, "If true, use L2 metric. If false, use IP metric");
DEFINE_bool(use_float16, false, "use float16 vectors and math");
DEFINE_bool(transposed, false, "transposed vector storage");
DEFINE_bool(verbose, false, "turn on clustering logging");
DEFINE_int64(seed, -1, "specify random seed");
DEFINE_int32(num_gpus, 1, "number of gpus to use");
......@@ -38,7 +39,7 @@ DEFINE_int32(max_points, -1, "max points per centroid");
using namespace faiss::gpu;
int main(int argc, char** argv) {
google::ParseCommandLineFlags(&argc, &argv, true);
gflags::ParseCommandLineFlags(&argc, &argv, true);
cudaProfilerStop();
......@@ -52,6 +53,7 @@ int main(int argc, char** argv) {
FLAGS_L2_metric ? "L2" : "IP",
FLAGS_dim, FLAGS_k, FLAGS_num, FLAGS_niter);
printf("float16 math %s\n", FLAGS_use_float16 ? "enabled" : "disabled");
printf("transposed storage %s\n", FLAGS_transposed ? "enabled" : "disabled");
printf("verbose %s\n", FLAGS_verbose ? "enabled" : "disabled");
auto initFn = [](faiss::gpu::GpuResources* res, int dev) ->
......@@ -61,12 +63,17 @@ int main(int argc, char** argv) {
FLAGS_pinned_mem);
}
GpuIndexFlatConfig config;
config.device = dev;
config.useFloat16 = FLAGS_use_float16;
config.storeTransposed = FLAGS_transposed;
auto p = std::unique_ptr<faiss::gpu::GpuIndexFlat>(
FLAGS_L2_metric ?
(faiss::gpu::GpuIndexFlat*)
new faiss::gpu::GpuIndexFlatL2(res, dev, FLAGS_dim, FLAGS_use_float16) :
new faiss::gpu::GpuIndexFlatL2(res, FLAGS_dim, config) :
(faiss::gpu::GpuIndexFlat*)
new faiss::gpu::GpuIndexFlatIP(res, dev, FLAGS_dim, FLAGS_use_float16));
new faiss::gpu::GpuIndexFlatIP(res, FLAGS_dim, config));
if (FLAGS_min_paging_size >= 0) {
p->setMinPagingSize(FLAGS_min_paging_size);
......
......@@ -35,7 +35,7 @@ DEFINE_bool(per_batch_time, false, "print per-batch times");
DEFINE_bool(reserve_memory, false, "whether or not to pre-reserve memory");
int main(int argc, char** argv) {
google::ParseCommandLineFlags(&argc, &argv, true);
gflags::ParseCommandLineFlags(&argc, &argv, true);
cudaProfilerStop();
......
......@@ -52,7 +52,7 @@ void fillAndSave(T& index, int numTrain, int num, int dim) {
}
int main(int argc, char** argv) {
google::ParseCommandLineFlags(&argc, &argv, true);
gflags::ParseCommandLineFlags(&argc, &argv, true);
// Either ivfpq or ivfflat must be set
if ((FLAGS_ivfpq && FLAGS_ivfflat) ||
......
......@@ -22,7 +22,10 @@
constexpr float kF16MaxRelErr = 0.07f;
constexpr float kF32MaxRelErr = 6e-3f;
void testFlat(bool useL2, bool useFloat16, int kOverride = -1) {
void testFlat(bool useL2,
bool useFloat16,
bool useTransposed,
int kOverride = -1) {
int numVecs = faiss::gpu::randVal(1000, 20000);
int dim = faiss::gpu::randVal(50, 800);
int numQuery = faiss::gpu::randVal(1, 512);
......@@ -49,8 +52,15 @@ void testFlat(bool useL2, bool useFloat16, int kOverride = -1) {
faiss::gpu::StandardGpuResources res;
res.noTempMemory();
faiss::gpu::GpuIndexFlatIP gpuIndexIP(&res, device, dim, useFloat16);
faiss::gpu::GpuIndexFlatL2 gpuIndexL2(&res, device, dim, useFloat16);
faiss::gpu::GpuIndexFlatConfig config;
config.device = device;
config.useFloat16 = useFloat16;
config.storeTransposed = useTransposed;
faiss::gpu::GpuIndexFlatIP gpuIndexIP(&res, dim, config);
faiss::gpu::GpuIndexFlatL2 gpuIndexL2(&res, dim, config);
faiss::gpu::GpuIndexFlat* gpuIndex =
useL2 ? (faiss::gpu::GpuIndexFlat*) &gpuIndexL2 :
......@@ -64,6 +74,7 @@ void testFlat(bool useL2, bool useFloat16, int kOverride = -1) {
str << (useL2 ? "L2" : "IP") << " numVecs " << numVecs
<< " dim " << dim
<< " useFloat16 " << useFloat16
<< " transposed " << useTransposed
<< " numQuery " << numQuery
<< " k " << k;
......@@ -79,16 +90,18 @@ void testFlat(bool useL2, bool useFloat16, int kOverride = -1) {
}
TEST(TestGpuIndexFlat, IP_Float32) {
for (int tries = 0; tries < 10; ++tries) {
for (int tries = 0; tries < 5; ++tries) {
faiss::gpu::newTestSeed();
testFlat(false, false);
testFlat(false, false, false);
testFlat(false, false, true);
}
}
TEST(TestGpuIndexFlat, L2_Float32) {
for (int tries = 0; tries < 10; ++tries) {
for (int tries = 0; tries < 5; ++tries) {
faiss::gpu::newTestSeed();
testFlat(true, false);
testFlat(true, false, false);
testFlat(true, false, true);
}
}
......@@ -96,21 +109,24 @@ TEST(TestGpuIndexFlat, L2_Float32) {
TEST(TestGpuIndexFlat, L2_Float32_K1) {
for (int tries = 0; tries < 5; ++tries) {
faiss::gpu::newTestSeed();
testFlat(true, false, 1);
testFlat(true, false, false, 1);
testFlat(true, false, true, 1);
}
}
TEST(TestGpuIndexFlat, IP_Float16) {
for (int tries = 0; tries < 10; ++tries) {
for (int tries = 0; tries < 5; ++tries) {
faiss::gpu::newTestSeed();
testFlat(false, true);
testFlat(false, true, false);
testFlat(false, true, false);
}
}
TEST(TestGpuIndexFlat, L2_Float16) {
for (int tries = 0; tries < 10; ++tries) {
for (int tries = 0; tries < 5; ++tries) {
faiss::gpu::newTestSeed();
testFlat(true, true);
testFlat(true, true, false);
testFlat(true, true, true);
}
}
......@@ -118,7 +134,8 @@ TEST(TestGpuIndexFlat, L2_Float16) {
TEST(TestGpuIndexFlat, L2_Float16_K1) {
for (int tries = 0; tries < 5; ++tries) {
faiss::gpu::newTestSeed();
testFlat(true, true, 1);
testFlat(true, true, false, 1);
testFlat(true, true, true, 1);
}
}
......@@ -126,8 +143,13 @@ TEST(TestGpuIndexFlat, QueryEmpty) {
faiss::gpu::StandardGpuResources res;
res.noTempMemory();
faiss::gpu::GpuIndexFlatConfig config;
config.device = 0;
config.useFloat16 = false;
config.storeTransposed = false;
int dim = 128;
faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, 0, dim, false);
faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);
// Querying an empty index should not blow up, and just return
// (FLT_MAX, -1)
......@@ -165,7 +187,13 @@ TEST(TestGpuIndexFlat, CopyFrom) {
// Fill with garbage values
int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, device, 2000, false);
faiss::gpu::GpuIndexFlatConfig config;
config.device = 0;
config.useFloat16 = false;
config.storeTransposed = false;
faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, 2000, config);
gpuIndex.copyFrom(&cpuIndex);
EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
......@@ -195,7 +223,13 @@ TEST(TestGpuIndexFlat, CopyTo) {
int dim = faiss::gpu::randVal(1, 1000);
int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, device, dim, false);
faiss::gpu::GpuIndexFlatConfig config;
config.device = device;
config.useFloat16 = false;
config.storeTransposed = false;
faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);
std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
gpuIndex.add(numVecs, vecs.data());
......
......@@ -45,7 +45,10 @@ class EvalIVFPQAccuracy(testutil.BaseFacebookTestCase):
res = faiss.StandardGpuResources()
gt_index = faiss.GpuIndexFlatL2(res, dev_no, d, False)
flat_config = faiss.GpuIndexFlatConfig()
flat_config.device = dev_no
gt_index = faiss.GpuIndexFlatL2(res, d, flat_config)
gt_index.add(xb)
D, gt_nns = gt_index.search(xq, 1)
......
......@@ -11,6 +11,7 @@
#include "../../FaissAssert.h"
#include "DeviceUtils.h"
#include <limits>
namespace faiss { namespace gpu {
......@@ -279,6 +280,58 @@ Tensor<T, Dim, Contig, IndexT, PtrTraits>::canCastResize() const {
return true;
}
template <typename T, int Dim, bool Contig,
typename IndexT, template <typename U> class PtrTraits>
template <typename NewIndexT>
__host__ Tensor<T, Dim, Contig, NewIndexT, PtrTraits>
Tensor<T, Dim, Contig, IndexT, PtrTraits>::castIndexType() const {
if (sizeof(NewIndexT) < sizeof(IndexT)) {
assert(this->canCastIndexType<NewIndexT>());
}
NewIndexT newSize[Dim];
NewIndexT newStride[Dim];
for (int i = 0; i < Dim; ++i) {
newSize[i] = (NewIndexT) size_[i];
newStride[i] = (NewIndexT) stride_[i];
}
return Tensor<T, Dim, Contig, NewIndexT, PtrTraits>(
data_, newSize, newStride);
}
template <typename T, int Dim, bool Contig,
typename IndexT, template <typename U> class PtrTraits>
template <typename NewIndexT>
__host__ bool
Tensor<T, Dim, Contig, IndexT, PtrTraits>::canCastIndexType() const {
static_assert(sizeof(size_t) >= sizeof(IndexT),
"index size too large");
static_assert(sizeof(size_t) >= sizeof(NewIndexT),
"new index size too large");
// Find maximum offset that can be calculated
// FIXME: maybe also consider offset in bytes? multiply by sizeof(T)?
size_t maxOffset = 0;
if (Contig) {
maxOffset = (size_t) size_[0] * (size_t) stride_[0];
} else {
for (int i = 0; i < Dim; ++i) {
size_t curMaxOffset = (size_t) size_[i] * (size_t) stride_[i];
if (curMaxOffset > maxOffset) {
maxOffset = curMaxOffset;
}
}
}
if (maxOffset > (size_t) std::numeric_limits<NewIndexT>::max()) {
return false;
}
return true;
}
template <typename T, int Dim, bool Contig,
typename IndexT, template <typename U> class PtrTraits>
__host__ __device__ IndexT
......
......@@ -158,6 +158,18 @@ class Tensor {
template <typename U>
__host__ __device__ bool canCastResize() const;
/// Attempts to cast this tensor to a tensor of a different IndexT.
/// Fails if size or stride entries are not representable in the new
/// IndexT.
template <typename NewIndexT>
__host__ Tensor<T, Dim, Contig, NewIndexT, PtrTraits>
castIndexType() const;
/// Returns true if we can castIndexType() this tensor to the new
/// index type
template <typename NewIndexT>
__host__ bool canCastIndexType() const;
/// Returns a raw pointer to the start of our data.
__host__ __device__ inline DataPtrType data() {
return data_;
......@@ -337,6 +349,27 @@ class Tensor {
IndexT size_[Dim];
};
// Utilities for checking a collection of tensors
namespace detail {
template <typename IndexType>
bool canCastIndexType() {
return true;
}
template <typename IndexType, typename T, typename... U>
bool canCastIndexType(const T& arg, const U&... args) {
return arg.canCastIndexType<IndexType>() &&
canCastIndexType(args...);
}
} // namespace detail
template <typename IndexType, typename... T>
bool canCastIndexType(const T&... args) {
return detail::canCastIndexType(args...);
}
namespace detail {
/// Specialization for a view of a single value (0-dimensional)
......
......@@ -24,16 +24,25 @@
#include "IndexPQ.h"
#include "IndexIVF.h"
#include "IndexIVFPQ.h"
#include "MetaIndexes.h"
/*************************************************************
* The I/O format is the content of the class. For objects that are
* inherited, like Index, a 4-character-code indicates which child
* class this is an instance of.
* inherited, like Index, a 4-character-code (fourcc) indicates which
* child class this is an instance of.
*
* In this case, the fields of the parent class are written first,
* then the ones for the child classes. Note that this requires
* classes to be serialized to have a constructor without parameters,
* so that the fields can be filled in later.
* so that the fields can be filled in later. The default constructor
* should set reasonable defaults for all fields.
*
* The fourccs are assigned arbitrarily. When the class changed (added
* or deprecated fields), the fourcc can be replaced. New code should
* be able to read the old fourcc and fill in new classes.
*
* TODO: serialization to strings for use in Python pickle or Torch
* serialization.
**************************************************************/
......@@ -294,6 +303,13 @@ void write_index (const Index *idx, FILE *f) {
write_index (idxrf->base_index, f);
write_index (&idxrf->refine_index, f);
WRITE1 (idxrf->k_factor);
} else if(const IndexIDMap * idxmap =
dynamic_cast<const IndexIDMap *> (idx)) {
uint32_t h = fourcc ("IxMp");
WRITE1 (h);
write_index_header (idxmap, f);
write_index (idxmap->index, f);
WRITEVECTOR (idxmap->id_map);
} else {
FAISS_ASSERT (!"don't know how to serialize this type of index");
}
......@@ -572,6 +588,13 @@ Index *read_index (FILE * f, bool try_mmap) {
delete rf;
READ1 (idxrf->k_factor);
idx = idxrf;
} else if(h == fourcc ("IxMp")) {
IndexIDMap * idxmap = new IndexIDMap ();
read_index_header (idxmap, f);
idxmap->index = read_index (f);
idxmap->own_fields = true;
READVECTOR (idxmap->id_map);
idx = idxmap;
} else {
fprintf (stderr, "Index type 0x%08x not supported\n", h);
abort ();
......
......@@ -724,6 +724,10 @@ bincode_hist = _swigfaiss.bincode_hist
def ivec_checksum(*args):
return _swigfaiss.ivec_checksum(*args)
ivec_checksum = _swigfaiss.ivec_checksum
def fvecs_maybe_subsample(*args):
return _swigfaiss.fvecs_maybe_subsample(*args)
fvecs_maybe_subsample = _swigfaiss.fvecs_maybe_subsample
METRIC_INNER_PRODUCT = _swigfaiss.METRIC_INNER_PRODUCT
METRIC_L2 = _swigfaiss.METRIC_L2
class Index(_object):
......@@ -963,13 +967,9 @@ class LinearTransform(VectorTransform):
except: self.this = this
def apply_noalloc(self, *args): return _swigfaiss.LinearTransform_apply_noalloc(self, *args)
def transform_transpose(self, *args): return _swigfaiss.LinearTransform_transform_transpose(self, *args)
__swig_setmethods__["max_points_per_d"] = _swigfaiss.LinearTransform_max_points_per_d_set
__swig_getmethods__["max_points_per_d"] = _swigfaiss.LinearTransform_max_points_per_d_get
if _newclass:max_points_per_d = _swig_property(_swigfaiss.LinearTransform_max_points_per_d_get, _swigfaiss.LinearTransform_max_points_per_d_set)
__swig_setmethods__["verbose"] = _swigfaiss.LinearTransform_verbose_set
__swig_getmethods__["verbose"] = _swigfaiss.LinearTransform_verbose_get
if _newclass:verbose = _swig_property(_swigfaiss.LinearTransform_verbose_get, _swigfaiss.LinearTransform_verbose_set)
def maybe_subsample_train_set(self, *args): return _swigfaiss.LinearTransform_maybe_subsample_train_set(self, *args)
__swig_destroy__ = _swigfaiss.delete_LinearTransform
__del__ = lambda self : None;
LinearTransform_swigregister = _swigfaiss.LinearTransform_swigregister
......@@ -1008,6 +1008,9 @@ class PCAMatrix(LinearTransform):
__swig_setmethods__["random_rotation"] = _swigfaiss.PCAMatrix_random_rotation_set
__swig_getmethods__["random_rotation"] = _swigfaiss.PCAMatrix_random_rotation_get
if _newclass:random_rotation = _swig_property(_swigfaiss.PCAMatrix_random_rotation_get, _swigfaiss.PCAMatrix_random_rotation_set)
__swig_setmethods__["max_points_per_d"] = _swigfaiss.PCAMatrix_max_points_per_d_set
__swig_getmethods__["max_points_per_d"] = _swigfaiss.PCAMatrix_max_points_per_d_get
if _newclass:max_points_per_d = _swig_property(_swigfaiss.PCAMatrix_max_points_per_d_get, _swigfaiss.PCAMatrix_max_points_per_d_set)
__swig_setmethods__["balanced_bins"] = _swigfaiss.PCAMatrix_balanced_bins_set
__swig_getmethods__["balanced_bins"] = _swigfaiss.PCAMatrix_balanced_bins_get
if _newclass:balanced_bins = _swig_property(_swigfaiss.PCAMatrix_balanced_bins_get, _swigfaiss.PCAMatrix_balanced_bins_set)
......@@ -1053,9 +1056,9 @@ class OPQMatrix(LinearTransform):
__swig_setmethods__["niter_pq_0"] = _swigfaiss.OPQMatrix_niter_pq_0_set
__swig_getmethods__["niter_pq_0"] = _swigfaiss.OPQMatrix_niter_pq_0_get
if _newclass:niter_pq_0 = _swig_property(_swigfaiss.OPQMatrix_niter_pq_0_get, _swigfaiss.OPQMatrix_niter_pq_0_set)
__swig_setmethods__["max_points_per_d"] = _swigfaiss.OPQMatrix_max_points_per_d_set
__swig_getmethods__["max_points_per_d"] = _swigfaiss.OPQMatrix_max_points_per_d_get
if _newclass:max_points_per_d = _swig_property(_swigfaiss.OPQMatrix_max_points_per_d_get, _swigfaiss.OPQMatrix_max_points_per_d_set)
__swig_setmethods__["max_train_points"] = _swigfaiss.OPQMatrix_max_train_points_set
__swig_getmethods__["max_train_points"] = _swigfaiss.OPQMatrix_max_train_points_get
if _newclass:max_train_points = _swig_property(_swigfaiss.OPQMatrix_max_train_points_get, _swigfaiss.OPQMatrix_max_train_points_set)
__swig_setmethods__["verbose"] = _swigfaiss.OPQMatrix_verbose_set
__swig_getmethods__["verbose"] = _swigfaiss.OPQMatrix_verbose_get
if _newclass:verbose = _swig_property(_swigfaiss.OPQMatrix_verbose_get, _swigfaiss.OPQMatrix_verbose_set)
......@@ -1885,10 +1888,6 @@ class IndexIDMap(Index):
__swig_setmethods__["id_map"] = _swigfaiss.IndexIDMap_id_map_set
__swig_getmethods__["id_map"] = _swigfaiss.IndexIDMap_id_map_get
if _newclass:id_map = _swig_property(_swigfaiss.IndexIDMap_id_map_get, _swigfaiss.IndexIDMap_id_map_set)
def __init__(self, *args):
this = _swigfaiss.new_IndexIDMap(*args)
try: self.this.append(this)
except: self.this = this
def add_with_ids(self, *args): return _swigfaiss.IndexIDMap_add_with_ids(self, *args)
def add(self, *args): return _swigfaiss.IndexIDMap_add(self, *args)
def search(self, *args): return _swigfaiss.IndexIDMap_search(self, *args)
......@@ -1897,6 +1896,10 @@ class IndexIDMap(Index):
def set_typename(self): return _swigfaiss.IndexIDMap_set_typename(self)
__swig_destroy__ = _swigfaiss.delete_IndexIDMap
__del__ = lambda self : None;
def __init__(self, *args):
this = _swigfaiss.new_IndexIDMap(*args)
try: self.this.append(this)
except: self.this = this
IndexIDMap_swigregister = _swigfaiss.IndexIDMap_swigregister
IndexIDMap_swigregister(IndexIDMap)
......
......@@ -793,6 +793,10 @@ bincode_hist = _swigfaiss_gpu.bincode_hist
def ivec_checksum(*args):
return _swigfaiss_gpu.ivec_checksum(*args)
ivec_checksum = _swigfaiss_gpu.ivec_checksum
def fvecs_maybe_subsample(*args):
return _swigfaiss_gpu.fvecs_maybe_subsample(*args)
fvecs_maybe_subsample = _swigfaiss_gpu.fvecs_maybe_subsample
METRIC_INNER_PRODUCT = _swigfaiss_gpu.METRIC_INNER_PRODUCT
METRIC_L2 = _swigfaiss_gpu.METRIC_L2
class Index(_object):
......@@ -1032,13 +1036,9 @@ class LinearTransform(VectorTransform):
except: self.this = this
def apply_noalloc(self, *args): return _swigfaiss_gpu.LinearTransform_apply_noalloc(self, *args)
def transform_transpose(self, *args): return _swigfaiss_gpu.LinearTransform_transform_transpose(self, *args)
__swig_setmethods__["max_points_per_d"] = _swigfaiss_gpu.LinearTransform_max_points_per_d_set
__swig_getmethods__["max_points_per_d"] = _swigfaiss_gpu.LinearTransform_max_points_per_d_get
if _newclass:max_points_per_d = _swig_property(_swigfaiss_gpu.LinearTransform_max_points_per_d_get, _swigfaiss_gpu.LinearTransform_max_points_per_d_set)
__swig_setmethods__["verbose"] = _swigfaiss_gpu.LinearTransform_verbose_set
__swig_getmethods__["verbose"] = _swigfaiss_gpu.LinearTransform_verbose_get
if _newclass:verbose = _swig_property(_swigfaiss_gpu.LinearTransform_verbose_get, _swigfaiss_gpu.LinearTransform_verbose_set)
def maybe_subsample_train_set(self, *args): return _swigfaiss_gpu.LinearTransform_maybe_subsample_train_set(self, *args)
__swig_destroy__ = _swigfaiss_gpu.delete_LinearTransform
__del__ = lambda self : None;
LinearTransform_swigregister = _swigfaiss_gpu.LinearTransform_swigregister
......@@ -1077,6 +1077,9 @@ class PCAMatrix(LinearTransform):
__swig_setmethods__["random_rotation"] = _swigfaiss_gpu.PCAMatrix_random_rotation_set
__swig_getmethods__["random_rotation"] = _swigfaiss_gpu.PCAMatrix_random_rotation_get
if _newclass:random_rotation = _swig_property(_swigfaiss_gpu.PCAMatrix_random_rotation_get, _swigfaiss_gpu.PCAMatrix_random_rotation_set)
__swig_setmethods__["max_points_per_d"] = _swigfaiss_gpu.PCAMatrix_max_points_per_d_set
__swig_getmethods__["max_points_per_d"] = _swigfaiss_gpu.PCAMatrix_max_points_per_d_get
if _newclass:max_points_per_d = _swig_property(_swigfaiss_gpu.PCAMatrix_max_points_per_d_get, _swigfaiss_gpu.PCAMatrix_max_points_per_d_set)
__swig_setmethods__["balanced_bins"] = _swigfaiss_gpu.PCAMatrix_balanced_bins_set
__swig_getmethods__["balanced_bins"] = _swigfaiss_gpu.PCAMatrix_balanced_bins_get
if _newclass:balanced_bins = _swig_property(_swigfaiss_gpu.PCAMatrix_balanced_bins_get, _swigfaiss_gpu.PCAMatrix_balanced_bins_set)
......@@ -1122,9 +1125,9 @@ class OPQMatrix(LinearTransform):
__swig_setmethods__["niter_pq_0"] = _swigfaiss_gpu.OPQMatrix_niter_pq_0_set
__swig_getmethods__["niter_pq_0"] = _swigfaiss_gpu.OPQMatrix_niter_pq_0_get
if _newclass:niter_pq_0 = _swig_property(_swigfaiss_gpu.OPQMatrix_niter_pq_0_get, _swigfaiss_gpu.OPQMatrix_niter_pq_0_set)
__swig_setmethods__["max_points_per_d"] = _swigfaiss_gpu.OPQMatrix_max_points_per_d_set
__swig_getmethods__["max_points_per_d"] = _swigfaiss_gpu.OPQMatrix_max_points_per_d_get
if _newclass:max_points_per_d = _swig_property(_swigfaiss_gpu.OPQMatrix_max_points_per_d_get, _swigfaiss_gpu.OPQMatrix_max_points_per_d_set)
__swig_setmethods__["max_train_points"] = _swigfaiss_gpu.OPQMatrix_max_train_points_set
__swig_getmethods__["max_train_points"] = _swigfaiss_gpu.OPQMatrix_max_train_points_get
if _newclass:max_train_points = _swig_property(_swigfaiss_gpu.OPQMatrix_max_train_points_get, _swigfaiss_gpu.OPQMatrix_max_train_points_set)
__swig_setmethods__["verbose"] = _swigfaiss_gpu.OPQMatrix_verbose_set
__swig_getmethods__["verbose"] = _swigfaiss_gpu.OPQMatrix_verbose_get
if _newclass:verbose = _swig_property(_swigfaiss_gpu.OPQMatrix_verbose_get, _swigfaiss_gpu.OPQMatrix_verbose_set)
......@@ -1954,10 +1957,6 @@ class IndexIDMap(Index):
__swig_setmethods__["id_map"] = _swigfaiss_gpu.IndexIDMap_id_map_set
__swig_getmethods__["id_map"] = _swigfaiss_gpu.IndexIDMap_id_map_get
if _newclass:id_map = _swig_property(_swigfaiss_gpu.IndexIDMap_id_map_get, _swigfaiss_gpu.IndexIDMap_id_map_set)
def __init__(self, *args):
this = _swigfaiss_gpu.new_IndexIDMap(*args)
try: self.this.append(this)
except: self.this = this
def add_with_ids(self, *args): return _swigfaiss_gpu.IndexIDMap_add_with_ids(self, *args)
def add(self, *args): return _swigfaiss_gpu.IndexIDMap_add(self, *args)
def search(self, *args): return _swigfaiss_gpu.IndexIDMap_search(self, *args)
......@@ -1966,6 +1965,10 @@ class IndexIDMap(Index):
def set_typename(self): return _swigfaiss_gpu.IndexIDMap_set_typename(self)
__swig_destroy__ = _swigfaiss_gpu.delete_IndexIDMap
__del__ = lambda self : None;
def __init__(self, *args):
this = _swigfaiss_gpu.new_IndexIDMap(*args)
try: self.this.append(this)
except: self.this = this
IndexIDMap_swigregister = _swigfaiss_gpu.IndexIDMap_swigregister
IndexIDMap_swigregister(IndexIDMap)
......@@ -2064,6 +2067,30 @@ class GpuIndex(Index):
GpuIndex_swigregister = _swigfaiss_gpu.GpuIndex_swigregister
GpuIndex_swigregister(GpuIndex)
class GpuIndexFlatConfig(_object):
__swig_setmethods__ = {}
__setattr__ = lambda self, name, value: _swig_setattr(self, GpuIndexFlatConfig, name, value)
__swig_getmethods__ = {}
__getattr__ = lambda self, name: _swig_getattr(self, GpuIndexFlatConfig, name)
__repr__ = _swig_repr
def __init__(self):
this = _swigfaiss_gpu.new_GpuIndexFlatConfig()
try: self.this.append(this)
except: self.this = this
__swig_setmethods__["device"] = _swigfaiss_gpu.GpuIndexFlatConfig_device_set
__swig_getmethods__["device"] = _swigfaiss_gpu.GpuIndexFlatConfig_device_get
if _newclass:device = _swig_property(_swigfaiss_gpu.GpuIndexFlatConfig_device_get, _swigfaiss_gpu.GpuIndexFlatConfig_device_set)
__swig_setmethods__["useFloat16"] = _swigfaiss_gpu.GpuIndexFlatConfig_useFloat16_set
__swig_getmethods__["useFloat16"] = _swigfaiss_gpu.GpuIndexFlatConfig_useFloat16_get
if _newclass:useFloat16 = _swig_property(_swigfaiss_gpu.GpuIndexFlatConfig_useFloat16_get, _swigfaiss_gpu.GpuIndexFlatConfig_useFloat16_set)
__swig_setmethods__["storeTransposed"] = _swigfaiss_gpu.GpuIndexFlatConfig_storeTransposed_set
__swig_getmethods__["storeTransposed"] = _swigfaiss_gpu.GpuIndexFlatConfig_storeTransposed_get
if _newclass:storeTransposed = _swig_property(_swigfaiss_gpu.GpuIndexFlatConfig_storeTransposed_get, _swigfaiss_gpu.GpuIndexFlatConfig_storeTransposed_set)
__swig_destroy__ = _swigfaiss_gpu.delete_GpuIndexFlatConfig
__del__ = lambda self : None;
GpuIndexFlatConfig_swigregister = _swigfaiss_gpu.GpuIndexFlatConfig_swigregister
GpuIndexFlatConfig_swigregister(GpuIndexFlatConfig)
class GpuIndexFlat(GpuIndex):
__swig_setmethods__ = {}
for _s in [GpuIndex]: __swig_setmethods__.update(getattr(_s,'__swig_setmethods__',{}))
......@@ -2521,6 +2548,9 @@ class GpuClonerOptions(_object):
__swig_setmethods__["reserveVecs"] = _swigfaiss_gpu.GpuClonerOptions_reserveVecs_set
__swig_getmethods__["reserveVecs"] = _swigfaiss_gpu.GpuClonerOptions_reserveVecs_get
if _newclass:reserveVecs = _swig_property(_swigfaiss_gpu.GpuClonerOptions_reserveVecs_get, _swigfaiss_gpu.GpuClonerOptions_reserveVecs_set)
__swig_setmethods__["storeTransposed"] = _swigfaiss_gpu.GpuClonerOptions_storeTransposed_set
__swig_getmethods__["storeTransposed"] = _swigfaiss_gpu.GpuClonerOptions_storeTransposed_get
if _newclass:storeTransposed = _swig_property(_swigfaiss_gpu.GpuClonerOptions_storeTransposed_get, _swigfaiss_gpu.GpuClonerOptions_storeTransposed_set)
__swig_setmethods__["verbose"] = _swigfaiss_gpu.GpuClonerOptions_verbose_set
__swig_getmethods__["verbose"] = _swigfaiss_gpu.GpuClonerOptions_verbose_get
if _newclass:verbose = _swig_property(_swigfaiss_gpu.GpuClonerOptions_verbose_get, _swigfaiss_gpu.GpuClonerOptions_verbose_set)
......
This diff is collapsed.
This diff is collapsed.
......@@ -1790,6 +1790,28 @@ int fvec_madd_and_argmin (size_t n, const float *a,
const float *fvecs_maybe_subsample (
size_t d, size_t *n, size_t nmax, const float *x,
bool verbose, long seed)
{
if (*n <= nmax) return x; // nothing to do
size_t n2 = nmax;
if (verbose) {
printf (" Input training set too big (max size is %ld), sampling "
"%ld / %ld vectors\n", nmax, n2, *n);
}
std::vector<int> subset (*n);
rand_perm (subset.data (), *n, seed);
float *x_subset = new float[n2 * d];
for (long i = 0; i < n2; i++)
memcpy (&x_subset[i * d],
&x[subset[i] * size_t(d)],
sizeof (x[0]) * d);
*n = n2;
return x_subset;
}
} // namespace faiss
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment