11 #include "../GpuFaissAssert.h"
12 #include "DeviceUtils.h"
15 namespace faiss {
namespace gpu {
17 template <
typename T,
int Dim,
bool InnerContig,
18 typename IndexT,
template <
typename U>
class PtrTraits>
22 static_assert(Dim > 0,
"must have > 0 dimensions");
24 for (
int i = 0; i < Dim; ++i) {
30 template <
typename T,
int Dim,
bool InnerContig,
31 typename IndexT,
template <
typename U>
class PtrTraits>
38 template <
typename T,
int Dim,
bool InnerContig,
39 typename IndexT,
template <
typename U>
class PtrTraits>
43 this->operator=(std::move(t));
46 template <
typename T,
int Dim,
bool InnerContig,
47 typename IndexT,
template <
typename U>
class PtrTraits>
53 for (
int i = 0; i < Dim; ++i) {
54 size_[i] = t.
size_[i];
61 template <
typename T,
int Dim,
bool InnerContig,
62 typename IndexT,
template <
typename U>
class PtrTraits>
67 data_ = t.data_; t.data_ =
nullptr;
68 for (
int i = 0; i < Dim; ++i) {
69 stride_[i] = t.stride_[i]; t.stride_[i] = 0;
70 size_[i] = t.size_[i]; t.size_[i] = 0;
76 template <
typename T,
int Dim,
bool InnerContig,
77 typename IndexT,
template <
typename U>
class PtrTraits>
80 Tensor(DataPtrType data,
const IndexT sizes[Dim])
82 static_assert(Dim > 0,
"must have > 0 dimensions");
84 for (
int i = 0; i < Dim; ++i) {
89 for (
int i = Dim - 2; i >= 0; --i) {
94 template <
typename T,
int Dim,
bool InnerContig,
95 typename IndexT,
template <
typename U>
class PtrTraits>
98 Tensor(DataPtrType data, std::initializer_list<IndexT> sizes)
100 GPU_FAISS_ASSERT(sizes.size() == Dim);
101 static_assert(Dim > 0,
"must have > 0 dimensions");
104 for (
auto s : sizes) {
109 for (
int j = Dim - 2; j >= 0; --j) {
115 template <
typename T,
int Dim,
bool InnerContig,
116 typename IndexT,
template <
typename U>
class PtrTraits>
119 DataPtrType data,
const IndexT sizes[Dim],
const IndexT strides[Dim])
121 static_assert(Dim > 0,
"must have > 0 dimensions");
123 for (
int i = 0; i < Dim; ++i) {
129 template <
typename T,
int Dim,
bool InnerContig,
130 typename IndexT,
template <
typename U>
class PtrTraits>
134 cudaStream_t stream) {
136 GPU_FAISS_ASSERT(this->isContiguous());
141 GPU_FAISS_ASSERT(this->numElements() == t.
numElements());
144 GPU_FAISS_ASSERT(this->data_);
145 GPU_FAISS_ASSERT(t.
data());
147 int ourDev = getDeviceForAddress(this->data_);
148 int tDev = getDeviceForAddress(t.
data());
151 CUDA_VERIFY(cudaMemcpyAsync(this->data_,
153 this->getSizeInBytes(),
154 ourDev == -1 ? cudaMemcpyHostToHost :
155 cudaMemcpyHostToDevice,
158 CUDA_VERIFY(cudaMemcpyAsync(this->data_,
160 this->getSizeInBytes(),
161 ourDev == -1 ? cudaMemcpyDeviceToHost :
162 cudaMemcpyDeviceToDevice,
168 template <
typename T,
int Dim,
bool InnerContig,
169 typename IndexT,
template <
typename U>
class PtrTraits>
173 cudaStream_t stream) {
175 GPU_FAISS_ASSERT(this->isContiguous());
180 GPU_FAISS_ASSERT(this->numElements() == t.
numElements());
183 GPU_FAISS_ASSERT(this->data_);
184 GPU_FAISS_ASSERT(t.
data());
186 int ourDev = getDeviceForAddress(this->data_);
187 int tDev = getDeviceForAddress(t.
data());
190 CUDA_VERIFY(cudaMemcpyAsync(t.
data(),
192 this->getSizeInBytes(),
193 ourDev == -1 ? cudaMemcpyHostToHost :
194 cudaMemcpyDeviceToHost,
197 CUDA_VERIFY(cudaMemcpyAsync(t.
data(),
199 this->getSizeInBytes(),
200 ourDev == -1 ? cudaMemcpyHostToDevice :
201 cudaMemcpyDeviceToDevice,
207 template <
typename T,
int Dim,
bool InnerContig,
208 typename IndexT,
template <
typename U>
class PtrTraits>
209 template <
typename OtherT,
int OtherDim>
210 __host__ __device__
bool
213 if (Dim != OtherDim) {
217 for (
int i = 0; i < Dim; ++i) {
218 if (this->getSize(i) != rhs.
getSize(i)) {
222 if (this->getStride(i) != rhs.
getStride(i)) {
230 template <
typename T,
int Dim,
bool InnerContig,
231 typename IndexT,
template <
typename U>
class PtrTraits>
232 template <
typename OtherT,
int OtherDim>
233 __host__ __device__
bool
236 if (Dim != OtherDim) {
240 for (
int i = 0; i < Dim; ++i) {
241 if (this->getSize(i) != rhs.
getSize(i)) {
249 template <
typename T,
int Dim,
bool InnerContig,
250 typename IndexT,
template <
typename U>
class PtrTraits>
251 template <
typename U>
254 static_assert(
sizeof(U) ==
sizeof(T),
"cast must be to same size object");
257 reinterpret_cast<U*
>(data_), size_, stride_);
260 template <
typename T,
int Dim,
bool InnerContig,
261 typename IndexT,
template <
typename U>
class PtrTraits>
262 template <
typename U>
265 static_assert(
sizeof(U) ==
sizeof(T),
"cast must be to same size object");
268 reinterpret_cast<U*
>(data_), size_, stride_);
271 template <
typename T,
int Dim,
bool InnerContig,
272 typename IndexT,
template <
typename U>
class PtrTraits>
273 template <
typename U>
276 static_assert(
sizeof(U) >=
sizeof(T),
"only handles greater sizes");
277 constexpr
int kMultiple =
sizeof(U) /
sizeof(T);
279 GPU_FAISS_ASSERT(canCastResize<U>());
282 IndexT newStride[Dim];
284 for (
int i = 0; i < Dim - 1; ++i) {
285 newSize[i] = size_[i];
286 newStride[i] = stride_[i] / kMultiple;
289 newStride[Dim - 1] = 1;
290 newSize[Dim - 1] = size_[Dim - 1] / kMultiple;
293 reinterpret_cast<U*
>(data_), newSize, newStride);
296 template <
typename T,
int Dim,
bool InnerContig,
297 typename IndexT,
template <
typename U>
class PtrTraits>
298 template <
typename U>
305 template <
typename T,
int Dim,
bool InnerContig,
306 typename IndexT,
template <
typename U>
class PtrTraits>
307 template <
typename U>
308 __host__ __device__
bool
310 static_assert(
sizeof(U) >=
sizeof(T),
"only handles greater sizes");
311 constexpr
int kMultiple =
sizeof(U) /
sizeof(T);
314 for (
int i = 0; i < Dim - 1; ++i) {
315 if (stride_[i] % kMultiple != 0) {
321 if (size_[Dim - 1] % kMultiple != 0) {
325 if (stride_[Dim - 1] != 1) {
332 template <
typename T,
int Dim,
bool InnerContig,
333 typename IndexT,
template <
typename U>
class PtrTraits>
334 template <
typename NewIndexT>
337 if (
sizeof(NewIndexT) <
sizeof(IndexT)) {
338 GPU_FAISS_ASSERT(this->canUseIndexType<NewIndexT>());
341 NewIndexT newSize[Dim];
342 NewIndexT newStride[Dim];
343 for (
int i = 0; i < Dim; ++i) {
344 newSize[i] = (NewIndexT) size_[i];
345 newStride[i] = (NewIndexT) stride_[i];
349 data_, newSize, newStride);
352 template <
typename T,
int Dim,
bool InnerContig,
353 typename IndexT,
template <
typename U>
class PtrTraits>
354 template <
typename NewIndexT>
357 static_assert(
sizeof(
size_t) >=
sizeof(IndexT),
358 "index size too large");
359 static_assert(
sizeof(
size_t) >=
sizeof(NewIndexT),
360 "new index size too large");
364 size_t maxOffset = 0;
366 for (
int i = 0; i < Dim; ++i) {
367 size_t curMaxOffset = (size_t) size_[i] * (
size_t) stride_[i];
368 if (curMaxOffset > maxOffset) {
369 maxOffset = curMaxOffset;
373 if (maxOffset > (
size_t) std::numeric_limits<NewIndexT>::max()) {
380 template <
typename T,
int Dim,
bool InnerContig,
381 typename IndexT,
template <
typename U>
class PtrTraits>
382 __host__ __device__
size_t
384 size_t size = (size_t) getSize(0);
386 for (
int i = 1; i < Dim; ++i) {
387 size *= (size_t) getSize(i);
393 template <
typename T,
int Dim,
bool InnerContig,
394 typename IndexT,
template <
typename U>
class PtrTraits>
395 __host__ __device__
bool
399 for (
int i = Dim - 1; i >= 0; --i) {
400 if (getSize(i) != (IndexT) 1) {
401 if (getStride(i) == prevSize) {
402 prevSize *= getSize(i);
412 template <
typename T,
int Dim,
bool InnerContig,
413 typename IndexT,
template <
typename U>
class PtrTraits>
414 __host__ __device__
bool
416 if (i == 0 && getStride(i) > 0 && getSize(i) > 0) {
418 }
else if ((i > 0) && (i < Dim) && (getStride(i) > 0) &&
419 ((getStride(i - 1) / getStride(i)) >= getSize(i))) {
426 template <
typename T,
int Dim,
bool InnerContig,
427 typename IndexT,
template <
typename U>
class PtrTraits>
428 __host__ __device__
bool
430 for (
int i = 0; i < Dim; ++i) {
431 if (!isConsistentlySized(i)) {
439 template <
typename T,
int Dim,
bool InnerContig,
440 typename IndexT,
template <
typename U>
class PtrTraits>
441 __host__ __device__
bool
443 return (i == Dim - 1) ||
445 ((getStride(i) / getStride(i + 1)) == getSize(i + 1)));
448 template <
typename T,
int Dim,
bool InnerContig,
449 typename IndexT,
template <
typename U>
class PtrTraits>
453 GPU_FAISS_ASSERT(dim1 >= 0 && dim1 < Dim);
454 GPU_FAISS_ASSERT(dim1 >= 0 && dim2 < Dim);
459 GPU_FAISS_ASSERT(dim1 != Dim - 1 && dim2 != Dim - 1);
463 IndexT newStride[Dim];
465 for (
int i = 0; i < Dim; ++i) {
466 newSize[i] = size_[i];
467 newStride[i] = stride_[i];
470 IndexT tmp = newSize[dim1];
471 newSize[dim1] = newSize[dim2];
474 tmp = newStride[dim1];
475 newStride[dim1] = newStride[dim2];
476 newStride[dim2] = tmp;
481 template <
typename T,
int Dim,
bool InnerContig,
482 typename IndexT,
template <
typename U>
class PtrTraits>
483 template <
int NewDim>
487 static_assert(NewDim > Dim,
"Can only upcast to greater dim");
489 IndexT newSize[NewDim];
490 IndexT newStride[NewDim];
492 int shift = NewDim - Dim;
494 for (
int i = 0; i < NewDim; ++i) {
497 newSize[i] = (IndexT) 1;
498 newStride[i] = size_[0] * stride_[0];
501 newSize[i] = size_[i - shift];
502 newStride[i] = stride_[i - shift];
507 data_, newSize, newStride);
510 template <
typename T,
int Dim,
bool InnerContig,
511 typename IndexT,
template <
typename U>
class PtrTraits>
512 template <
int NewDim>
516 static_assert(NewDim > Dim,
"Can only upcast to greater dim");
518 IndexT newSize[NewDim];
519 IndexT newStride[NewDim];
521 for (
int i = 0; i < NewDim; ++i) {
524 newSize[i] = size_[i];
525 newStride[i] = stride_[i];
528 newSize[i] = (IndexT) 1;
529 newStride[i] = (IndexT) 1;
534 data_, newSize, newStride);
537 template <
typename T,
int Dim,
bool InnerContig,
538 typename IndexT,
template <
typename U>
class PtrTraits>
539 template <
int NewDim>
543 static_assert(NewDim < Dim,
"Can only downcast to lesser dim");
549 for (
int i = 0; i < Dim - NewDim; ++i) {
550 bool cont = isContiguousDim(i);
551 GPU_FAISS_ASSERT(cont);
554 IndexT newSize[NewDim];
555 IndexT newStride[NewDim];
557 int ignoredDims = Dim - NewDim;
558 IndexT collapsedSize = 1;
560 for (
int i = 0; i < Dim; ++i) {
561 if (i < ignoredDims) {
563 collapsedSize *= getSize(i);
566 if (i == ignoredDims) {
568 newSize[i - ignoredDims] = collapsedSize * getSize(i);
571 newSize[i - ignoredDims] = getSize(i);
574 newStride[i - ignoredDims] = getStride(i);
579 data_, newSize, newStride);
582 template <
typename T,
int Dim,
bool InnerContig,
583 typename IndexT,
template <
typename U>
class PtrTraits>
584 template <
int NewDim>
588 static_assert(NewDim < Dim,
"Can only downcast to lesser dim");
594 for (
int i = NewDim; i < Dim; ++i) {
595 GPU_FAISS_ASSERT(isContiguousDim(i));
598 IndexT newSize[NewDim];
599 IndexT newStride[NewDim];
601 IndexT collapsedSize = 1;
603 for (
int i = Dim - 1; i >= 0; --i) {
606 collapsedSize *= getSize(i);
609 if (i == NewDim - 1) {
611 newSize[i] = collapsedSize * getSize(i);
612 newStride[i] = getStride(Dim - 1);
615 newSize[i] = getSize(i);
616 newStride[i] = getStride(i);
622 data_, newSize, newStride);
625 template <
typename T,
int Dim,
bool InnerContig,
626 typename IndexT,
template <
typename U>
class PtrTraits>
627 template <
int SubDim>
630 static_assert(SubDim >= 1 && SubDim < Dim,
631 "can only create view of lesser dim");
633 IndexT viewSizes[SubDim];
634 IndexT viewStrides[SubDim];
636 for (
int i = 0; i < SubDim; ++i) {
637 viewSizes[i] = size_[Dim - SubDim + i];
638 viewStrides[i] = stride_[Dim - SubDim + i];
642 at, viewSizes, viewStrides);
645 template <
typename T,
int Dim,
bool InnerContig,
646 typename IndexT,
template <
typename U>
class PtrTraits>
647 template <
int SubDim>
650 return view<SubDim>(data_);
653 template <
typename T,
int Dim,
bool InnerContig,
654 typename IndexT,
template <
typename U>
class PtrTraits>
658 return this->narrow(0, start, size);
661 template <
typename T,
int Dim,
bool InnerContig,
662 typename IndexT,
template <
typename U>
class PtrTraits>
667 DataPtrType newData = data_;
669 GPU_FAISS_ASSERT(start >= 0 &&
670 start < size_[dim] &&
671 (start + size) <= size_[dim]);
674 newData += (size_t) start * stride_[dim];
678 for (
int i = 0; i < Dim; ++i) {
680 GPU_FAISS_ASSERT(start + size <= size_[dim]);
683 newSize[i] = size_[i];
691 template <
typename T,
int Dim,
bool InnerContig,
692 typename IndexT,
template <
typename U>
class PtrTraits>
693 template <
int NewDim>
696 std::initializer_list<IndexT> sizes) {
697 GPU_FAISS_ASSERT(this->isContiguous());
699 GPU_FAISS_ASSERT(sizes.size() == NewDim);
703 size_t curSize = numElements();
706 for (
auto s : sizes) {
710 GPU_FAISS_ASSERT(curSize == newSize);
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > upcastOuter()
__host__ Tensor< T, Dim, InnerContig, NewIndexT, PtrTraits > castIndexType() const
__host__ __device__ bool isContiguousDim(int i) const
Returns true if the given dimension index has no padding.
__host__ __device__ Tensor< U, Dim, InnerContig, IndexT, PtrTraits > cast()
__host__ __device__ size_t numElements() const
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > downcastOuter()
__host__ __device__ bool canCastResize() const
Returns true if we can castResize() this tensor to the new type.
DataPtrType data_
Raw pointer to where the tensor data begins.
__host__ __device__ Tensor()
Default constructor.
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > upcastInner()
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > narrowOutermost(IndexT start, IndexT size)
IndexT stride_[Dim]
Array of strides (in sizeof(T) terms) per each dimension.
__host__ __device__ bool isContiguous() const
__host__ __device__ const IndexT * sizes() const
Returns the size array.
__host__ void copyFrom(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies a tensor into ourselves; sizes must match.
IndexT size_[Dim]
Size per each dimension.
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > & operator=(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t)
Assignment.
__host__ __device__ const IndexT * strides() const
Returns the stride array.
__host__ __device__ IndexT getSize(int i) const
__host__ __device__ bool isSameSize(const Tensor< OtherT, OtherDim, InnerContig, IndexT, PtrTraits > &rhs) const
Returns true if the two tensors are of the same dimensionality and size.
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > downcastInner()
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > narrow(int dim, IndexT start, IndexT size)
__host__ __device__ DataPtrType data()
Returns a raw pointer to the start of our data.
__host__ void copyTo(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies ourselves into a tensor; sizes must match.
__host__ bool canUseIndexType() const
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > transpose(int dim1, int dim2) const
__host__ __device__ IndexT getStride(int i) const
__host__ __device__ Tensor< U, Dim, InnerContig, IndexT, PtrTraits > castResize()
__host__ __device__ Tensor< T, SubDim, InnerContig, IndexT, PtrTraits > view()
__host__ __device__ bool isSame(const Tensor< OtherT, OtherDim, InnerContig, IndexT, PtrTraits > &rhs) const