10 #include "../../IndexFlat.h"
11 #include "../../utils.h"
12 #include "../GpuIndexFlat.h"
13 #include "IndexWrapper.h"
14 #include "../test/TestUtils.h"
15 #include "../utils/DeviceTensor.cuh"
16 #include "../utils/DeviceUtils.h"
17 #include "../utils/HostTensor.cuh"
18 #include "../utils/Timer.h"
19 #include <gflags/gflags.h>
24 #include <cuda_profiler_api.h>
26 DEFINE_bool(l2,
true,
"L2 or inner product");
27 DEFINE_int32(k, 3,
"final number of closest results returned");
28 DEFINE_int32(num, 128,
"# of vecs");
29 DEFINE_int32(dim, 128,
"# of dimensions");
30 DEFINE_int32(num_queries, 3,
"number of query vectors");
31 DEFINE_bool(diff,
true,
"show exact distance + index output discrepancies");
32 DEFINE_bool(use_float16,
false,
"use encodings in float16");
33 DEFINE_bool(use_float16_math,
false,
"perform math in float16");
34 DEFINE_bool(transposed,
false,
"store vectors transposed");
35 DEFINE_int64(seed, -1,
"specify random seed");
36 DEFINE_int32(num_gpus, 1,
"number of gpus to use");
37 DEFINE_int64(pinned_mem, 0,
"pinned memory allocation to use");
38 DEFINE_bool(cpu,
true,
"run the CPU code for timing and comparison");
39 DEFINE_bool(use_unified_mem,
false,
"use Pascal unified memory for the index");
41 using namespace faiss::gpu;
43 int main(
int argc,
char** argv) {
44 gflags::ParseCommandLineFlags(&argc, &argv,
true);
48 auto seed = FLAGS_seed != -1L ? FLAGS_seed : time(
nullptr);
49 printf(
"using seed %ld\n", seed);
51 auto numQueries = FLAGS_num_queries;
53 auto index = std::unique_ptr<faiss::IndexFlat>(
55 faiss::METRIC_L2 : faiss::METRIC_INNER_PRODUCT));
58 faiss::float_rand(vecs.data(), vecs.numElements(), seed);
60 index->add(FLAGS_num, vecs.data());
62 printf(
"Database: dim %d num vecs %d\n", FLAGS_dim, FLAGS_num);
63 printf(
"%s lookup: %d queries, total k %d\n",
64 FLAGS_l2 ?
"L2" :
"IP",
66 printf(
"float16 encoding %s\n", FLAGS_use_float16 ?
"enabled" :
"disabled");
67 printf(
"transposed storage %s\n", FLAGS_transposed ?
"enabled" :
"disabled");
70 printf(
"Copying index to %d GPU(s)...\n", FLAGS_num_gpus);
73 std::unique_ptr<faiss::gpu::GpuIndexFlat> {
83 MemorySpace::Unified : MemorySpace::Device;
85 auto p = std::unique_ptr<faiss::gpu::GpuIndexFlat>(
91 printf(
"copy done\n");
95 faiss::float_rand(cpuQuery.data(), cpuQuery.numElements(), seed);
102 float cpuTime = 0.0f;
105 index->search(numQueries,
112 printf(
"CPU time %.3f ms\n", cpuTime);
118 CUDA_VERIFY(cudaProfilerStart());
119 faiss::gpu::synchronizeAllDevices();
121 float gpuTime = 0.0f;
127 gpuIndex.getIndex()->search(cpuQuery.getSize(0),
138 CUDA_VERIFY(cudaProfilerStop());
139 printf(
"GPU time %.3f ms\n", gpuTime);
142 compareLists(cpuDistances.data(), cpuIndices.data(),
143 gpuDistances.data(), gpuIndices.data(),
145 "",
true, FLAGS_diff,
false);
148 CUDA_VERIFY(cudaDeviceSynchronize());
float elapsedMilliseconds()
Returns elapsed time in milliseconds.
bool useFloat16Accumulator
CPU wallclock elapsed timer.
bool useFloat16
Whether or not data is stored as float16.
int device
GPU device on which the index is resident.