11 #include "InvertedListAppend.cuh"
12 #include "../../FaissAssert.h"
13 #include "../utils/Float16.cuh"
14 #include "../utils/DeviceUtils.h"
15 #include "../utils/Tensor.cuh"
16 #include "../utils/StaticUtils.h"
18 namespace faiss {
namespace gpu {
21 runUpdateListPointers(Tensor<int, 1, true> listIds,
22 Tensor<int, 1, true> newListLength,
23 Tensor<void*, 1, true> newCodePointers,
24 Tensor<void*, 1, true> newIndexPointers,
28 int index = blockIdx.x * blockDim.x + threadIdx.x;
30 if (index >= listIds.getSize(0)) {
34 int listId = listIds[index];
35 listLengths[listId] = newListLength[index];
36 listCodes[listId] = newCodePointers[index];
37 listIndices[listId] = newIndexPointers[index];
41 runUpdateListPointers(Tensor<int, 1, true>& listIds,
42 Tensor<int, 1, true>& newListLength,
43 Tensor<void*, 1, true>& newCodePointers,
44 Tensor<void*, 1, true>& newIndexPointers,
45 thrust::device_vector<int>& listLengths,
46 thrust::device_vector<void*>& listCodes,
47 thrust::device_vector<void*>& listIndices,
48 cudaStream_t stream) {
49 int numThreads = std::min(listIds.getSize(0), getMaxThreadsCurrentDevice());
50 int numBlocks = utils::divUp(listIds.getSize(0), numThreads);
53 dim3 block(numThreads);
55 runUpdateListPointers<<<grid, block, 0, stream>>>(
56 listIds, newListLength, newCodePointers, newIndexPointers,
57 listLengths.data().get(),
58 listCodes.data().get(),
59 listIndices.data().get());
64 template <IndicesOptions Opt>
66 ivfpqInvertedListAppend(Tensor<int, 1, true> listIds,
67 Tensor<int, 1, true> listOffset,
68 Tensor<int, 2, true> encodings,
69 Tensor<long, 1, true> indices,
72 int encodingToAdd = blockIdx.x * blockDim.x + threadIdx.x;
74 if (encodingToAdd >= listIds.getSize(0)) {
78 int listId = listIds[encodingToAdd];
79 int offset = listOffset[encodingToAdd];
82 if (listId == -1 || offset == -1) {
86 auto encoding = encodings[encodingToAdd];
87 long index = indices[encodingToAdd];
89 if (Opt == INDICES_32_BIT) {
91 ((
int*) listIndices[listId])[offset] = (int) index;
92 }
else if (Opt == INDICES_64_BIT) {
93 ((
long*) listIndices[listId])[offset] = (long) index;
98 unsigned char* codeStart =
99 ((
unsigned char*) listCodes[listId]) + offset * encodings.getSize(1);
102 for (
int i = 0; i < encodings.getSize(1); ++i) {
103 codeStart[i] = (
unsigned char) encoding[i];
108 runIVFPQInvertedListAppend(Tensor<int, 1, true>& listIds,
109 Tensor<int, 1, true>& listOffset,
110 Tensor<int, 2, true>& encodings,
111 Tensor<long, 1, true>& indices,
112 thrust::device_vector<void*>& listCodes,
113 thrust::device_vector<void*>& listIndices,
114 IndicesOptions indicesOptions,
115 cudaStream_t stream) {
116 int numThreads = std::min(listIds.getSize(0), getMaxThreadsCurrentDevice());
117 int numBlocks = utils::divUp(listIds.getSize(0), numThreads);
119 dim3 grid(numBlocks);
120 dim3 block(numThreads);
122 #define RUN_APPEND(IND) \
124 ivfpqInvertedListAppend<IND><<<grid, block, 0, stream>>>( \
125 listIds, listOffset, encodings, indices, \
126 listCodes.data().get(), \
127 listIndices.data().get()); \
130 if ((indicesOptions == INDICES_CPU) || (indicesOptions == INDICES_IVF)) {
132 RUN_APPEND(INDICES_IVF);
133 }
else if (indicesOptions == INDICES_32_BIT) {
134 RUN_APPEND(INDICES_32_BIT);
135 }
else if (indicesOptions == INDICES_64_BIT) {
136 RUN_APPEND(INDICES_64_BIT);
147 template <IndicesOptions Opt,
bool Exact,
bool Float16>
149 ivfFlatInvertedListAppend(Tensor<int, 1, true> listIds,
150 Tensor<int, 1, true> listOffset,
151 Tensor<float, 2, true> vecs,
152 Tensor<long, 1, true> indices,
154 void** listIndices) {
155 int vec = blockIdx.x;
157 int listId = listIds[vec];
158 int offset = listOffset[vec];
161 if (listId == -1 || offset == -1) {
165 if (threadIdx.x == 0) {
166 long index = indices[vec];
168 if (Opt == INDICES_32_BIT) {
170 ((
int*) listIndices[listId])[offset] = (int) index;
171 }
else if (Opt == INDICES_64_BIT) {
172 ((
long*) listIndices[listId])[offset] = (long) index;
178 #ifdef FAISS_USE_FLOAT16
181 half* vecStart = ((half*) listData[listId]) + offset * vecs.getSize(1);
184 vecStart[threadIdx.x] = __float2half(vecs[vec][threadIdx.x]);
186 for (
int i = threadIdx.x; i < vecs.getSize(1); i += blockDim.x) {
187 vecStart[i] = __float2half(vecs[vec][i]);
192 static_assert(!Float16,
"float16 unsupported");
196 float* vecStart = ((
float*) listData[listId]) + offset * vecs.getSize(1);
199 vecStart[threadIdx.x] = vecs[vec][threadIdx.x];
201 for (
int i = threadIdx.x; i < vecs.getSize(1); i += blockDim.x) {
202 vecStart[i] = vecs[vec][i];
209 runIVFFlatInvertedListAppend(Tensor<int, 1, true>& listIds,
210 Tensor<int, 1, true>& listOffset,
211 Tensor<float, 2, true>& vecs,
212 Tensor<long, 1, true>& indices,
214 thrust::device_vector<void*>& listData,
215 thrust::device_vector<void*>& listIndices,
216 IndicesOptions indicesOptions,
217 cudaStream_t stream) {
218 int maxThreads = getMaxThreadsCurrentDevice();
219 bool exact = vecs.getSize(1) <= maxThreads;
222 dim3 grid(vecs.getSize(0));
223 dim3 block(std::min(vecs.getSize(1), maxThreads));
225 #define RUN_APPEND_OPT(OPT, EXACT, FLOAT16) \
227 ivfFlatInvertedListAppend<OPT, EXACT, FLOAT16> \
228 <<<grid, block, 0, stream>>>( \
229 listIds, listOffset, vecs, indices, \
230 listData.data().get(), \
231 listIndices.data().get()); \
234 #define RUN_APPEND(EXACT, FLOAT16) \
236 if ((indicesOptions == INDICES_CPU) || (indicesOptions == INDICES_IVF)) { \
238 RUN_APPEND_OPT(INDICES_IVF, EXACT, FLOAT16); \
239 } else if (indicesOptions == INDICES_32_BIT) { \
240 RUN_APPEND_OPT(INDICES_32_BIT, EXACT, FLOAT16); \
241 } else if (indicesOptions == INDICES_64_BIT) { \
242 RUN_APPEND_OPT(INDICES_64_BIT, EXACT, FLOAT16); \
244 FAISS_ASSERT(false); \
249 #ifdef FAISS_USE_FLOAT16
251 RUN_APPEND(
true,
true);
253 RUN_APPEND(
false,
true);
261 RUN_APPEND(
true,
false);
263 RUN_APPEND(
false,
false);
270 #undef RUN_APPEND_OPT