12 #include "InvertedListAppend.cuh"
13 #include "../../FaissAssert.h"
14 #include "../utils/Float16.cuh"
15 #include "../utils/DeviceUtils.h"
16 #include "../utils/Tensor.cuh"
17 #include "../utils/StaticUtils.h"
19 namespace faiss {
namespace gpu {
22 runUpdateListPointers(Tensor<int, 1, true> listIds,
23 Tensor<int, 1, true> newListLength,
24 Tensor<void*, 1, true> newCodePointers,
25 Tensor<void*, 1, true> newIndexPointers,
29 int index = blockIdx.x * blockDim.x + threadIdx.x;
31 if (index >= listIds.getSize(0)) {
35 int listId = listIds[index];
36 listLengths[listId] = newListLength[index];
37 listCodes[listId] = newCodePointers[index];
38 listIndices[listId] = newIndexPointers[index];
42 runUpdateListPointers(Tensor<int, 1, true>& listIds,
43 Tensor<int, 1, true>& newListLength,
44 Tensor<void*, 1, true>& newCodePointers,
45 Tensor<void*, 1, true>& newIndexPointers,
46 thrust::device_vector<int>& listLengths,
47 thrust::device_vector<void*>& listCodes,
48 thrust::device_vector<void*>& listIndices,
49 cudaStream_t stream) {
50 int numThreads = std::min(listIds.getSize(0), getMaxThreadsCurrentDevice());
51 int numBlocks = utils::divUp(listIds.getSize(0), numThreads);
54 dim3 block(numThreads);
56 runUpdateListPointers<<<grid, block, 0, stream>>>(
57 listIds, newListLength, newCodePointers, newIndexPointers,
58 listLengths.data().get(),
59 listCodes.data().get(),
60 listIndices.data().get());
63 template <IndicesOptions Opt>
65 ivfpqInvertedListAppend(Tensor<int, 1, true> listIds,
66 Tensor<int, 1, true> listOffset,
67 Tensor<int, 2, true> encodings,
68 Tensor<long, 1, true> indices,
71 int encodingToAdd = blockIdx.x * blockDim.x + threadIdx.x;
73 if (encodingToAdd >= listIds.getSize(0)) {
77 int listId = listIds[encodingToAdd];
78 int offset = listOffset[encodingToAdd];
81 if (listId == -1 || offset == -1) {
85 auto encoding = encodings[encodingToAdd];
86 long index = indices[encodingToAdd];
88 if (Opt == INDICES_32_BIT) {
90 ((
int*) listIndices[listId])[offset] = (int) index;
91 }
else if (Opt == INDICES_64_BIT) {
92 ((
long*) listIndices[listId])[offset] = (long) index;
97 unsigned char* codeStart =
98 ((
unsigned char*) listCodes[listId]) + offset * encodings.getSize(1);
101 for (
int i = 0; i < encodings.getSize(1); ++i) {
102 codeStart[i] = (
unsigned char) encoding[i];
107 runIVFPQInvertedListAppend(Tensor<int, 1, true>& listIds,
108 Tensor<int, 1, true>& listOffset,
109 Tensor<int, 2, true>& encodings,
110 Tensor<long, 1, true>& indices,
111 thrust::device_vector<void*>& listCodes,
112 thrust::device_vector<void*>& listIndices,
113 IndicesOptions indicesOptions,
114 cudaStream_t stream) {
115 int numThreads = std::min(listIds.getSize(0), getMaxThreadsCurrentDevice());
116 int numBlocks = utils::divUp(listIds.getSize(0), numThreads);
118 dim3 grid(numBlocks);
119 dim3 block(numThreads);
121 #define RUN_APPEND(IND) \
123 ivfpqInvertedListAppend<IND><<<grid, block, 0, stream>>>( \
124 listIds, listOffset, encodings, indices, \
125 listCodes.data().get(), \
126 listIndices.data().get()); \
129 if ((indicesOptions == INDICES_CPU) || (indicesOptions == INDICES_IVF)) {
131 RUN_APPEND(INDICES_IVF);
132 }
else if (indicesOptions == INDICES_32_BIT) {
133 RUN_APPEND(INDICES_32_BIT);
134 }
else if (indicesOptions == INDICES_64_BIT) {
135 RUN_APPEND(INDICES_64_BIT);
144 template <IndicesOptions Opt,
bool Exact,
bool Float16>
146 ivfFlatInvertedListAppend(Tensor<int, 1, true> listIds,
147 Tensor<int, 1, true> listOffset,
148 Tensor<float, 2, true> vecs,
149 Tensor<long, 1, true> indices,
151 void** listIndices) {
152 int vec = blockIdx.x;
154 int listId = listIds[vec];
155 int offset = listOffset[vec];
158 if (listId == -1 || offset == -1) {
162 if (threadIdx.x == 0) {
163 long index = indices[vec];
165 if (Opt == INDICES_32_BIT) {
167 ((
int*) listIndices[listId])[offset] = (int) index;
168 }
else if (Opt == INDICES_64_BIT) {
169 ((
long*) listIndices[listId])[offset] = (long) index;
175 #ifdef FAISS_USE_FLOAT16
178 half* vecStart = ((half*) listData[listId]) + offset * vecs.getSize(1);
181 vecStart[threadIdx.x] = __float2half(vecs[vec][threadIdx.x]);
183 for (
int i = threadIdx.x; i < vecs.getSize(1); i += blockDim.x) {
184 vecStart[i] = __float2half(vecs[vec][i]);
189 static_assert(!Float16,
"float16 unsupported");
193 float* vecStart = ((
float*) listData[listId]) + offset * vecs.getSize(1);
196 vecStart[threadIdx.x] = vecs[vec][threadIdx.x];
198 for (
int i = threadIdx.x; i < vecs.getSize(1); i += blockDim.x) {
199 vecStart[i] = vecs[vec][i];
206 runIVFFlatInvertedListAppend(Tensor<int, 1, true>& listIds,
207 Tensor<int, 1, true>& listOffset,
208 Tensor<float, 2, true>& vecs,
209 Tensor<long, 1, true>& indices,
211 thrust::device_vector<void*>& listData,
212 thrust::device_vector<void*>& listIndices,
213 IndicesOptions indicesOptions,
214 cudaStream_t stream) {
215 int maxThreads = getMaxThreadsCurrentDevice();
216 bool exact = vecs.getSize(1) <= maxThreads;
219 dim3 grid(vecs.getSize(0));
220 dim3 block(std::min(vecs.getSize(1), maxThreads));
222 #define RUN_APPEND_OPT(OPT, EXACT, FLOAT16) \
224 ivfFlatInvertedListAppend<OPT, EXACT, FLOAT16> \
225 <<<grid, block, 0, stream>>>( \
226 listIds, listOffset, vecs, indices, \
227 listData.data().get(), \
228 listIndices.data().get()); \
231 #define RUN_APPEND(EXACT, FLOAT16) \
233 if ((indicesOptions == INDICES_CPU) || (indicesOptions == INDICES_IVF)) { \
235 RUN_APPEND_OPT(INDICES_IVF, EXACT, FLOAT16); \
236 } else if (indicesOptions == INDICES_32_BIT) { \
237 RUN_APPEND_OPT(INDICES_32_BIT, EXACT, FLOAT16); \
238 } else if (indicesOptions == INDICES_64_BIT) { \
239 RUN_APPEND_OPT(INDICES_64_BIT, EXACT, FLOAT16); \
241 FAISS_ASSERT(false); \
246 #ifdef FAISS_USE_FLOAT16
248 RUN_APPEND(
true,
true);
250 RUN_APPEND(
false,
true);
258 RUN_APPEND(
true,
false);
260 RUN_APPEND(
false,
false);
265 #undef RUN_APPEND_OPT