10 #include "InvertedListAppend.cuh"
11 #include "../../FaissAssert.h"
12 #include "../utils/Float16.cuh"
13 #include "../utils/DeviceUtils.h"
14 #include "../utils/Tensor.cuh"
15 #include "../utils/StaticUtils.h"
17 namespace faiss {
namespace gpu {
20 runUpdateListPointers(Tensor<int, 1, true> listIds,
21 Tensor<int, 1, true> newListLength,
22 Tensor<void*, 1, true> newCodePointers,
23 Tensor<void*, 1, true> newIndexPointers,
27 int index = blockIdx.x * blockDim.x + threadIdx.x;
29 if (index >= listIds.getSize(0)) {
33 int listId = listIds[index];
34 listLengths[listId] = newListLength[index];
35 listCodes[listId] = newCodePointers[index];
36 listIndices[listId] = newIndexPointers[index];
40 runUpdateListPointers(Tensor<int, 1, true>& listIds,
41 Tensor<int, 1, true>& newListLength,
42 Tensor<void*, 1, true>& newCodePointers,
43 Tensor<void*, 1, true>& newIndexPointers,
44 thrust::device_vector<int>& listLengths,
45 thrust::device_vector<void*>& listCodes,
46 thrust::device_vector<void*>& listIndices,
47 cudaStream_t stream) {
48 int numThreads = std::min(listIds.getSize(0), getMaxThreadsCurrentDevice());
49 int numBlocks = utils::divUp(listIds.getSize(0), numThreads);
52 dim3 block(numThreads);
54 runUpdateListPointers<<<grid, block, 0, stream>>>(
55 listIds, newListLength, newCodePointers, newIndexPointers,
56 listLengths.data().get(),
57 listCodes.data().get(),
58 listIndices.data().get());
63 template <IndicesOptions Opt>
65 ivfpqInvertedListAppend(Tensor<int, 1, true> listIds,
66 Tensor<int, 1, true> listOffset,
67 Tensor<int, 2, true> encodings,
68 Tensor<long, 1, true> indices,
71 int encodingToAdd = blockIdx.x * blockDim.x + threadIdx.x;
73 if (encodingToAdd >= listIds.getSize(0)) {
77 int listId = listIds[encodingToAdd];
78 int offset = listOffset[encodingToAdd];
81 if (listId == -1 || offset == -1) {
85 auto encoding = encodings[encodingToAdd];
86 long index = indices[encodingToAdd];
88 if (Opt == INDICES_32_BIT) {
90 ((
int*) listIndices[listId])[offset] = (int) index;
91 }
else if (Opt == INDICES_64_BIT) {
92 ((
long*) listIndices[listId])[offset] = (long) index;
97 unsigned char* codeStart =
98 ((
unsigned char*) listCodes[listId]) + offset * encodings.getSize(1);
101 for (
int i = 0; i < encodings.getSize(1); ++i) {
102 codeStart[i] = (
unsigned char) encoding[i];
107 runIVFPQInvertedListAppend(Tensor<int, 1, true>& listIds,
108 Tensor<int, 1, true>& listOffset,
109 Tensor<int, 2, true>& encodings,
110 Tensor<long, 1, true>& indices,
111 thrust::device_vector<void*>& listCodes,
112 thrust::device_vector<void*>& listIndices,
113 IndicesOptions indicesOptions,
114 cudaStream_t stream) {
115 int numThreads = std::min(listIds.getSize(0), getMaxThreadsCurrentDevice());
116 int numBlocks = utils::divUp(listIds.getSize(0), numThreads);
118 dim3 grid(numBlocks);
119 dim3 block(numThreads);
121 #define RUN_APPEND(IND) \
123 ivfpqInvertedListAppend<IND><<<grid, block, 0, stream>>>( \
124 listIds, listOffset, encodings, indices, \
125 listCodes.data().get(), \
126 listIndices.data().get()); \
129 if ((indicesOptions == INDICES_CPU) || (indicesOptions == INDICES_IVF)) {
131 RUN_APPEND(INDICES_IVF);
132 }
else if (indicesOptions == INDICES_32_BIT) {
133 RUN_APPEND(INDICES_32_BIT);
134 }
else if (indicesOptions == INDICES_64_BIT) {
135 RUN_APPEND(INDICES_64_BIT);
146 template <IndicesOptions Opt,
bool Exact,
bool Float16>
148 ivfFlatInvertedListAppend(Tensor<int, 1, true> listIds,
149 Tensor<int, 1, true> listOffset,
150 Tensor<float, 2, true> vecs,
151 Tensor<long, 1, true> indices,
153 void** listIndices) {
154 int vec = blockIdx.x;
156 int listId = listIds[vec];
157 int offset = listOffset[vec];
160 if (listId == -1 || offset == -1) {
164 if (threadIdx.x == 0) {
165 long index = indices[vec];
167 if (Opt == INDICES_32_BIT) {
169 ((
int*) listIndices[listId])[offset] = (int) index;
170 }
else if (Opt == INDICES_64_BIT) {
171 ((
long*) listIndices[listId])[offset] = (long) index;
177 #ifdef FAISS_USE_FLOAT16
180 half* vecStart = ((half*) listData[listId]) + offset * vecs.getSize(1);
183 vecStart[threadIdx.x] = __float2half(vecs[vec][threadIdx.x]);
185 for (
int i = threadIdx.x; i < vecs.getSize(1); i += blockDim.x) {
186 vecStart[i] = __float2half(vecs[vec][i]);
191 static_assert(!Float16,
"float16 unsupported");
195 float* vecStart = ((
float*) listData[listId]) + offset * vecs.getSize(1);
198 vecStart[threadIdx.x] = vecs[vec][threadIdx.x];
200 for (
int i = threadIdx.x; i < vecs.getSize(1); i += blockDim.x) {
201 vecStart[i] = vecs[vec][i];
208 runIVFFlatInvertedListAppend(Tensor<int, 1, true>& listIds,
209 Tensor<int, 1, true>& listOffset,
210 Tensor<float, 2, true>& vecs,
211 Tensor<long, 1, true>& indices,
213 thrust::device_vector<void*>& listData,
214 thrust::device_vector<void*>& listIndices,
215 IndicesOptions indicesOptions,
216 cudaStream_t stream) {
217 int maxThreads = getMaxThreadsCurrentDevice();
218 bool exact = vecs.getSize(1) <= maxThreads;
221 dim3 grid(vecs.getSize(0));
222 dim3 block(std::min(vecs.getSize(1), maxThreads));
224 #define RUN_APPEND_OPT(OPT, EXACT, FLOAT16) \
226 ivfFlatInvertedListAppend<OPT, EXACT, FLOAT16> \
227 <<<grid, block, 0, stream>>>( \
228 listIds, listOffset, vecs, indices, \
229 listData.data().get(), \
230 listIndices.data().get()); \
233 #define RUN_APPEND(EXACT, FLOAT16) \
235 if ((indicesOptions == INDICES_CPU) || (indicesOptions == INDICES_IVF)) { \
237 RUN_APPEND_OPT(INDICES_IVF, EXACT, FLOAT16); \
238 } else if (indicesOptions == INDICES_32_BIT) { \
239 RUN_APPEND_OPT(INDICES_32_BIT, EXACT, FLOAT16); \
240 } else if (indicesOptions == INDICES_64_BIT) { \
241 RUN_APPEND_OPT(INDICES_64_BIT, EXACT, FLOAT16); \
243 FAISS_ASSERT(false); \
248 #ifdef FAISS_USE_FLOAT16
250 RUN_APPEND(
true,
true);
252 RUN_APPEND(
false,
true);
260 RUN_APPEND(
true,
false);
262 RUN_APPEND(
false,
false);
269 #undef RUN_APPEND_OPT