9 #include "InvertedListAppend.cuh"
10 #include "../../FaissAssert.h"
11 #include "../utils/Float16.cuh"
12 #include "../utils/DeviceUtils.h"
13 #include "../utils/Tensor.cuh"
14 #include "../utils/StaticUtils.h"
16 namespace faiss {
namespace gpu {
19 runUpdateListPointers(Tensor<int, 1, true> listIds,
20 Tensor<int, 1, true> newListLength,
21 Tensor<void*, 1, true> newCodePointers,
22 Tensor<void*, 1, true> newIndexPointers,
26 int index = blockIdx.x * blockDim.x + threadIdx.x;
28 if (index >= listIds.getSize(0)) {
32 int listId = listIds[index];
33 listLengths[listId] = newListLength[index];
34 listCodes[listId] = newCodePointers[index];
35 listIndices[listId] = newIndexPointers[index];
39 runUpdateListPointers(Tensor<int, 1, true>& listIds,
40 Tensor<int, 1, true>& newListLength,
41 Tensor<void*, 1, true>& newCodePointers,
42 Tensor<void*, 1, true>& newIndexPointers,
43 thrust::device_vector<int>& listLengths,
44 thrust::device_vector<void*>& listCodes,
45 thrust::device_vector<void*>& listIndices,
46 cudaStream_t stream) {
47 int numThreads = std::min(listIds.getSize(0), getMaxThreadsCurrentDevice());
48 int numBlocks = utils::divUp(listIds.getSize(0), numThreads);
51 dim3 block(numThreads);
53 runUpdateListPointers<<<grid, block, 0, stream>>>(
54 listIds, newListLength, newCodePointers, newIndexPointers,
55 listLengths.data().get(),
56 listCodes.data().get(),
57 listIndices.data().get());
62 template <IndicesOptions Opt>
64 ivfpqInvertedListAppend(Tensor<int, 1, true> listIds,
65 Tensor<int, 1, true> listOffset,
66 Tensor<int, 2, true> encodings,
67 Tensor<long, 1, true> indices,
70 int encodingToAdd = blockIdx.x * blockDim.x + threadIdx.x;
72 if (encodingToAdd >= listIds.getSize(0)) {
76 int listId = listIds[encodingToAdd];
77 int offset = listOffset[encodingToAdd];
80 if (listId == -1 || offset == -1) {
84 auto encoding = encodings[encodingToAdd];
85 long index = indices[encodingToAdd];
87 if (Opt == INDICES_32_BIT) {
89 ((
int*) listIndices[listId])[offset] = (int) index;
90 }
else if (Opt == INDICES_64_BIT) {
91 ((
long*) listIndices[listId])[offset] = (long) index;
96 unsigned char* codeStart =
97 ((
unsigned char*) listCodes[listId]) + offset * encodings.getSize(1);
100 for (
int i = 0; i < encodings.getSize(1); ++i) {
101 codeStart[i] = (
unsigned char) encoding[i];
106 runIVFPQInvertedListAppend(Tensor<int, 1, true>& listIds,
107 Tensor<int, 1, true>& listOffset,
108 Tensor<int, 2, true>& encodings,
109 Tensor<long, 1, true>& indices,
110 thrust::device_vector<void*>& listCodes,
111 thrust::device_vector<void*>& listIndices,
112 IndicesOptions indicesOptions,
113 cudaStream_t stream) {
114 int numThreads = std::min(listIds.getSize(0), getMaxThreadsCurrentDevice());
115 int numBlocks = utils::divUp(listIds.getSize(0), numThreads);
117 dim3 grid(numBlocks);
118 dim3 block(numThreads);
120 #define RUN_APPEND(IND) \
122 ivfpqInvertedListAppend<IND><<<grid, block, 0, stream>>>( \
123 listIds, listOffset, encodings, indices, \
124 listCodes.data().get(), \
125 listIndices.data().get()); \
128 if ((indicesOptions == INDICES_CPU) || (indicesOptions == INDICES_IVF)) {
130 RUN_APPEND(INDICES_IVF);
131 }
else if (indicesOptions == INDICES_32_BIT) {
132 RUN_APPEND(INDICES_32_BIT);
133 }
else if (indicesOptions == INDICES_64_BIT) {
134 RUN_APPEND(INDICES_64_BIT);
145 template <IndicesOptions Opt,
bool Exact,
bool Float16>
147 ivfFlatInvertedListAppend(Tensor<int, 1, true> listIds,
148 Tensor<int, 1, true> listOffset,
149 Tensor<float, 2, true> vecs,
150 Tensor<long, 1, true> indices,
152 void** listIndices) {
153 int vec = blockIdx.x;
155 int listId = listIds[vec];
156 int offset = listOffset[vec];
159 if (listId == -1 || offset == -1) {
163 if (threadIdx.x == 0) {
164 long index = indices[vec];
166 if (Opt == INDICES_32_BIT) {
168 ((
int*) listIndices[listId])[offset] = (int) index;
169 }
else if (Opt == INDICES_64_BIT) {
170 ((
long*) listIndices[listId])[offset] = (long) index;
176 #ifdef FAISS_USE_FLOAT16
179 half* vecStart = ((half*) listData[listId]) + offset * vecs.getSize(1);
182 vecStart[threadIdx.x] = __float2half(vecs[vec][threadIdx.x]);
184 for (
int i = threadIdx.x; i < vecs.getSize(1); i += blockDim.x) {
185 vecStart[i] = __float2half(vecs[vec][i]);
190 static_assert(!Float16,
"float16 unsupported");
194 float* vecStart = ((
float*) listData[listId]) + offset * vecs.getSize(1);
197 vecStart[threadIdx.x] = vecs[vec][threadIdx.x];
199 for (
int i = threadIdx.x; i < vecs.getSize(1); i += blockDim.x) {
200 vecStart[i] = vecs[vec][i];
207 runIVFFlatInvertedListAppend(Tensor<int, 1, true>& listIds,
208 Tensor<int, 1, true>& listOffset,
209 Tensor<float, 2, true>& vecs,
210 Tensor<long, 1, true>& indices,
212 thrust::device_vector<void*>& listData,
213 thrust::device_vector<void*>& listIndices,
214 IndicesOptions indicesOptions,
215 cudaStream_t stream) {
216 int maxThreads = getMaxThreadsCurrentDevice();
217 bool exact = vecs.getSize(1) <= maxThreads;
220 dim3 grid(vecs.getSize(0));
221 dim3 block(std::min(vecs.getSize(1), maxThreads));
223 #define RUN_APPEND_OPT(OPT, EXACT, FLOAT16) \
225 ivfFlatInvertedListAppend<OPT, EXACT, FLOAT16> \
226 <<<grid, block, 0, stream>>>( \
227 listIds, listOffset, vecs, indices, \
228 listData.data().get(), \
229 listIndices.data().get()); \
232 #define RUN_APPEND(EXACT, FLOAT16) \
234 if ((indicesOptions == INDICES_CPU) || (indicesOptions == INDICES_IVF)) { \
236 RUN_APPEND_OPT(INDICES_IVF, EXACT, FLOAT16); \
237 } else if (indicesOptions == INDICES_32_BIT) { \
238 RUN_APPEND_OPT(INDICES_32_BIT, EXACT, FLOAT16); \
239 } else if (indicesOptions == INDICES_64_BIT) { \
240 RUN_APPEND_OPT(INDICES_64_BIT, EXACT, FLOAT16); \
242 FAISS_ASSERT(false); \
247 #ifdef FAISS_USE_FLOAT16
249 RUN_APPEND(
true,
true);
251 RUN_APPEND(
false,
true);
259 RUN_APPEND(
true,
false);
261 RUN_APPEND(
false,
false);
268 #undef RUN_APPEND_OPT