Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
Tensor-inl.cuh
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 
11 #include "../GpuFaissAssert.h"
12 #include "DeviceUtils.h"
13 #include <limits>
14 
15 namespace faiss { namespace gpu {
16 
17 template <typename T, int Dim, bool InnerContig,
18  typename IndexT, template <typename U> class PtrTraits>
19 __host__ __device__
21  : data_(nullptr) {
22  static_assert(Dim > 0, "must have > 0 dimensions");
23 
24  for (int i = 0; i < Dim; ++i) {
25  size_[i] = 0;
26  stride_[i] = (IndexT) 1;
27  }
28 }
29 
30 template <typename T, int Dim, bool InnerContig,
31  typename IndexT, template <typename U> class PtrTraits>
32 __host__ __device__
35  this->operator=(t);
36 }
37 
38 template <typename T, int Dim, bool InnerContig,
39  typename IndexT, template <typename U> class PtrTraits>
40 __host__ __device__
43  this->operator=(std::move(t));
44 }
45 
46 template <typename T, int Dim, bool InnerContig,
47  typename IndexT, template <typename U> class PtrTraits>
48 __host__ __device__
52  data_ = t.data_;
53  for (int i = 0; i < Dim; ++i) {
54  size_[i] = t.size_[i];
55  stride_[i] = t.stride_[i];
56  }
57 
58  return *this;
59 }
60 
61 template <typename T, int Dim, bool InnerContig,
62  typename IndexT, template <typename U> class PtrTraits>
63 __host__ __device__
67  data_ = t.data_; t.data_ = nullptr;
68  for (int i = 0; i < Dim; ++i) {
69  stride_[i] = t.stride_[i]; t.stride_[i] = 0;
70  size_[i] = t.size_[i]; t.size_[i] = 0;
71  }
72 
73  return *this;
74 }
75 
76 template <typename T, int Dim, bool InnerContig,
77  typename IndexT, template <typename U> class PtrTraits>
78 __host__ __device__
80 Tensor(DataPtrType data, const IndexT sizes[Dim])
81  : data_(data) {
82  static_assert(Dim > 0, "must have > 0 dimensions");
83 
84  for (int i = 0; i < Dim; ++i) {
85  size_[i] = sizes[i];
86  }
87 
88  stride_[Dim - 1] = (IndexT) 1;
89  for (int i = Dim - 2; i >= 0; --i) {
90  stride_[i] = stride_[i + 1] * sizes[i + 1];
91  }
92 }
93 
94 template <typename T, int Dim, bool InnerContig,
95  typename IndexT, template <typename U> class PtrTraits>
96 __host__ __device__
98 Tensor(DataPtrType data, std::initializer_list<IndexT> sizes)
99  : data_(data) {
100  GPU_FAISS_ASSERT(sizes.size() == Dim);
101  static_assert(Dim > 0, "must have > 0 dimensions");
102 
103  int i = 0;
104  for (auto s : sizes) {
105  size_[i++] = s;
106  }
107 
108  stride_[Dim - 1] = (IndexT) 1;
109  for (int j = Dim - 2; j >= 0; --j) {
110  stride_[j] = stride_[j + 1] * size_[j + 1];
111  }
112 }
113 
114 
115 template <typename T, int Dim, bool InnerContig,
116  typename IndexT, template <typename U> class PtrTraits>
117 __host__ __device__
119  DataPtrType data, const IndexT sizes[Dim], const IndexT strides[Dim])
120  : data_(data) {
121  static_assert(Dim > 0, "must have > 0 dimensions");
122 
123  for (int i = 0; i < Dim; ++i) {
124  size_[i] = sizes[i];
125  stride_[i] = strides[i];
126  }
127 }
128 
129 template <typename T, int Dim, bool InnerContig,
130  typename IndexT, template <typename U> class PtrTraits>
131 __host__ void
134  cudaStream_t stream) {
135  // The tensor must be fully contiguous
136  GPU_FAISS_ASSERT(this->isContiguous());
137 
138  // Size must be the same (since dimensions are checked and
139  // continuity is assumed, we need only check total number of
140  // elements
141  GPU_FAISS_ASSERT(this->numElements() == t.numElements());
142 
143  if (t.numElements() > 0) {
144  GPU_FAISS_ASSERT(this->data_);
145  GPU_FAISS_ASSERT(t.data());
146 
147  int ourDev = getDeviceForAddress(this->data_);
148  int tDev = getDeviceForAddress(t.data());
149 
150  if (tDev == -1) {
151  CUDA_VERIFY(cudaMemcpyAsync(this->data_,
152  t.data(),
153  this->getSizeInBytes(),
154  ourDev == -1 ? cudaMemcpyHostToHost :
155  cudaMemcpyHostToDevice,
156  stream));
157  } else {
158  CUDA_VERIFY(cudaMemcpyAsync(this->data_,
159  t.data(),
160  this->getSizeInBytes(),
161  ourDev == -1 ? cudaMemcpyDeviceToHost :
162  cudaMemcpyDeviceToDevice,
163  stream));
164  }
165  }
166 }
167 
168 template <typename T, int Dim, bool InnerContig,
169  typename IndexT, template <typename U> class PtrTraits>
170 __host__ void
173  cudaStream_t stream) {
174  // The tensor must be fully contiguous
175  GPU_FAISS_ASSERT(this->isContiguous());
176 
177  // Size must be the same (since dimensions are checked and
178  // continuity is assumed, we need only check total number of
179  // elements
180  GPU_FAISS_ASSERT(this->numElements() == t.numElements());
181 
182  if (t.numElements() > 0) {
183  GPU_FAISS_ASSERT(this->data_);
184  GPU_FAISS_ASSERT(t.data());
185 
186  int ourDev = getDeviceForAddress(this->data_);
187  int tDev = getDeviceForAddress(t.data());
188 
189  if (tDev == -1) {
190  CUDA_VERIFY(cudaMemcpyAsync(t.data(),
191  this->data_,
192  this->getSizeInBytes(),
193  ourDev == -1 ? cudaMemcpyHostToHost :
194  cudaMemcpyDeviceToHost,
195  stream));
196  } else {
197  CUDA_VERIFY(cudaMemcpyAsync(t.data(),
198  this->data_,
199  this->getSizeInBytes(),
200  ourDev == -1 ? cudaMemcpyHostToDevice :
201  cudaMemcpyDeviceToDevice,
202  stream));
203  }
204  }
205 }
206 
207 template <typename T, int Dim, bool InnerContig,
208  typename IndexT, template <typename U> class PtrTraits>
209 template <typename OtherT, int OtherDim>
210 __host__ __device__ bool
213  if (Dim != OtherDim) {
214  return false;
215  }
216 
217  for (int i = 0; i < Dim; ++i) {
218  if (this->getSize(i) != rhs.getSize(i)) {
219  return false;
220  }
221 
222  if (this->getStride(i) != rhs.getStride(i)) {
223  return false;
224  }
225  }
226 
227  return true;
228 }
229 
230 template <typename T, int Dim, bool InnerContig,
231  typename IndexT, template <typename U> class PtrTraits>
232 template <typename OtherT, int OtherDim>
233 __host__ __device__ bool
236  if (Dim != OtherDim) {
237  return false;
238  }
239 
240  for (int i = 0; i < Dim; ++i) {
241  if (this->getSize(i) != rhs.getSize(i)) {
242  return false;
243  }
244  }
245 
246  return true;
247 }
248 
249 template <typename T, int Dim, bool InnerContig,
250  typename IndexT, template <typename U> class PtrTraits>
251 template <typename U>
254  static_assert(sizeof(U) == sizeof(T), "cast must be to same size object");
255 
257  reinterpret_cast<U*>(data_), size_, stride_);
258 }
259 
260 template <typename T, int Dim, bool InnerContig,
261  typename IndexT, template <typename U> class PtrTraits>
262 template <typename U>
263 __host__ __device__ const Tensor<U, Dim, InnerContig, IndexT, PtrTraits>
265  static_assert(sizeof(U) == sizeof(T), "cast must be to same size object");
266 
268  reinterpret_cast<U*>(data_), size_, stride_);
269 }
270 
271 template <typename T, int Dim, bool InnerContig,
272  typename IndexT, template <typename U> class PtrTraits>
273 template <typename U>
276  static_assert(sizeof(U) >= sizeof(T), "only handles greater sizes");
277  constexpr int kMultiple = sizeof(U) / sizeof(T);
278 
279  GPU_FAISS_ASSERT(canCastResize<U>());
280 
281  IndexT newSize[Dim];
282  IndexT newStride[Dim];
283 
284  for (int i = 0; i < Dim - 1; ++i) {
285  newSize[i] = size_[i];
286  newStride[i] = stride_[i] / kMultiple;
287  }
288 
289  newStride[Dim - 1] = 1; // this is the same as the old stride
290  newSize[Dim - 1] = size_[Dim - 1] / kMultiple;
291 
293  reinterpret_cast<U*>(data_), newSize, newStride);
294 }
295 
296 template <typename T, int Dim, bool InnerContig,
297  typename IndexT, template <typename U> class PtrTraits>
298 template <typename U>
299 __host__ __device__ const Tensor<U, Dim, InnerContig, IndexT, PtrTraits>
301  return const_cast<Tensor<T, Dim, InnerContig, IndexT, PtrTraits>*>(this)->
302  castResize<U>();
303 }
304 
305 template <typename T, int Dim, bool InnerContig,
306  typename IndexT, template <typename U> class PtrTraits>
307 template <typename U>
308 __host__ __device__ bool
310  static_assert(sizeof(U) >= sizeof(T), "only handles greater sizes");
311  constexpr int kMultiple = sizeof(U) / sizeof(T);
312 
313  // Check all outer strides
314  for (int i = 0; i < Dim - 1; ++i) {
315  if (stride_[i] % kMultiple != 0) {
316  return false;
317  }
318  }
319 
320  // Check inner size
321  if (size_[Dim - 1] % kMultiple != 0) {
322  return false;
323  }
324 
325  if (stride_[Dim - 1] != 1) {
326  return false;
327  }
328 
329  return true;
330 }
331 
332 template <typename T, int Dim, bool InnerContig,
333  typename IndexT, template <typename U> class PtrTraits>
334 template <typename NewIndexT>
337  if (sizeof(NewIndexT) < sizeof(IndexT)) {
338  GPU_FAISS_ASSERT(this->canUseIndexType<NewIndexT>());
339  }
340 
341  NewIndexT newSize[Dim];
342  NewIndexT newStride[Dim];
343  for (int i = 0; i < Dim; ++i) {
344  newSize[i] = (NewIndexT) size_[i];
345  newStride[i] = (NewIndexT) stride_[i];
346  }
347 
349  data_, newSize, newStride);
350 }
351 
352 template <typename T, int Dim, bool InnerContig,
353  typename IndexT, template <typename U> class PtrTraits>
354 template <typename NewIndexT>
355 __host__ bool
357  static_assert(sizeof(size_t) >= sizeof(IndexT),
358  "index size too large");
359  static_assert(sizeof(size_t) >= sizeof(NewIndexT),
360  "new index size too large");
361 
362  // Find maximum offset that can be calculated
363  // FIXME: maybe also consider offset in bytes? multiply by sizeof(T)?
364  size_t maxOffset = 0;
365 
366  for (int i = 0; i < Dim; ++i) {
367  size_t curMaxOffset = (size_t) size_[i] * (size_t) stride_[i];
368  if (curMaxOffset > maxOffset) {
369  maxOffset = curMaxOffset;
370  }
371  }
372 
373  if (maxOffset > (size_t) std::numeric_limits<NewIndexT>::max()) {
374  return false;
375  }
376 
377  return true;
378 }
379 
380 template <typename T, int Dim, bool InnerContig,
381  typename IndexT, template <typename U> class PtrTraits>
382 __host__ __device__ size_t
384  size_t size = (size_t) getSize(0);
385 
386  for (int i = 1; i < Dim; ++i) {
387  size *= (size_t) getSize(i);
388  }
389 
390  return size;
391 }
392 
393 template <typename T, int Dim, bool InnerContig,
394  typename IndexT, template <typename U> class PtrTraits>
395 __host__ __device__ bool
397  long prevSize = 1;
398 
399  for (int i = Dim - 1; i >= 0; --i) {
400  if (getSize(i) != (IndexT) 1) {
401  if (getStride(i) == prevSize) {
402  prevSize *= getSize(i);
403  } else {
404  return false;
405  }
406  }
407  }
408 
409  return true;
410 }
411 
412 template <typename T, int Dim, bool InnerContig,
413  typename IndexT, template <typename U> class PtrTraits>
414 __host__ __device__ bool
416  if (i == 0 && getStride(i) > 0 && getSize(i) > 0) {
417  return true;
418  } else if ((i > 0) && (i < Dim) && (getStride(i) > 0) &&
419  ((getStride(i - 1) / getStride(i)) >= getSize(i))) {
420  return true;
421  }
422 
423  return false;
424 }
425 
426 template <typename T, int Dim, bool InnerContig,
427  typename IndexT, template <typename U> class PtrTraits>
428 __host__ __device__ bool
430  for (int i = 0; i < Dim; ++i) {
431  if (!isConsistentlySized(i)) {
432  return false;
433  }
434  }
435 
436  return true;
437 }
438 
439 template <typename T, int Dim, bool InnerContig,
440  typename IndexT, template <typename U> class PtrTraits>
441 __host__ __device__ bool
443  return (i == Dim - 1) || // just in case
444  ((i < Dim - 1) &&
445  ((getStride(i) / getStride(i + 1)) == getSize(i + 1)));
446 }
447 
448 template <typename T, int Dim, bool InnerContig,
449  typename IndexT, template <typename U> class PtrTraits>
452  int dim2) const {
453  GPU_FAISS_ASSERT(dim1 >= 0 && dim1 < Dim);
454  GPU_FAISS_ASSERT(dim1 >= 0 && dim2 < Dim);
455 
456  // If a tensor is innermost contiguous, one cannot transpose the innermost
457  // dimension
458  if (InnerContig) {
459  GPU_FAISS_ASSERT(dim1 != Dim - 1 && dim2 != Dim - 1);
460  }
461 
462  IndexT newSize[Dim];
463  IndexT newStride[Dim];
464 
465  for (int i = 0; i < Dim; ++i) {
466  newSize[i] = size_[i];
467  newStride[i] = stride_[i];
468  }
469 
470  IndexT tmp = newSize[dim1];
471  newSize[dim1] = newSize[dim2];
472  newSize[dim2] = tmp;
473 
474  tmp = newStride[dim1];
475  newStride[dim1] = newStride[dim2];
476  newStride[dim2] = tmp;
477 
478  return Tensor<T, Dim, InnerContig, IndexT, PtrTraits>(data_, newSize, newStride);
479 }
480 
481 template <typename T, int Dim, bool InnerContig,
482  typename IndexT, template <typename U> class PtrTraits>
483 template <int NewDim>
486  // Can only create tensors of greater dimension
487  static_assert(NewDim > Dim, "Can only upcast to greater dim");
488 
489  IndexT newSize[NewDim];
490  IndexT newStride[NewDim];
491 
492  int shift = NewDim - Dim;
493 
494  for (int i = 0; i < NewDim; ++i) {
495  if (i < shift) {
496  // These are the extended dimensions
497  newSize[i] = (IndexT) 1;
498  newStride[i] = size_[0] * stride_[0];
499  } else {
500  // Shift the remaining dimensions
501  newSize[i] = size_[i - shift];
502  newStride[i] = stride_[i - shift];
503  }
504  }
505 
507  data_, newSize, newStride);
508 }
509 
510 template <typename T, int Dim, bool InnerContig,
511  typename IndexT, template <typename U> class PtrTraits>
512 template <int NewDim>
515  // Can only create tensors of greater dimension
516  static_assert(NewDim > Dim, "Can only upcast to greater dim");
517 
518  IndexT newSize[NewDim];
519  IndexT newStride[NewDim];
520 
521  for (int i = 0; i < NewDim; ++i) {
522  if (i < Dim) {
523  // Existing dimensions get copied over
524  newSize[i] = size_[i];
525  newStride[i] = stride_[i];
526  } else {
527  // Extended dimensions
528  newSize[i] = (IndexT) 1;
529  newStride[i] = (IndexT) 1;
530  }
531  }
532 
534  data_, newSize, newStride);
535 }
536 
537 template <typename T, int Dim, bool InnerContig,
538  typename IndexT, template <typename U> class PtrTraits>
539 template <int NewDim>
542  // Can only create tensors of lesser dimension
543  static_assert(NewDim < Dim, "Can only downcast to lesser dim");
544 
545  // We can't downcast non-contiguous tensors, since it leaves
546  // garbage data in the tensor. The tensor needs to be contiguous
547  // in all of the dimensions we are collapsing (no padding in
548  // them).
549  for (int i = 0; i < Dim - NewDim; ++i) {
550  bool cont = isContiguousDim(i);
551  GPU_FAISS_ASSERT(cont);
552  }
553 
554  IndexT newSize[NewDim];
555  IndexT newStride[NewDim];
556 
557  int ignoredDims = Dim - NewDim;
558  IndexT collapsedSize = 1;
559 
560  for (int i = 0; i < Dim; ++i) {
561  if (i < ignoredDims) {
562  // Collapse these dimensions
563  collapsedSize *= getSize(i);
564  } else {
565  // Non-collapsed dimensions
566  if (i == ignoredDims) {
567  // This is the first non-collapsed dimension
568  newSize[i - ignoredDims] = collapsedSize * getSize(i);
569  } else {
570  // Subsequent non-collapsed dimensions
571  newSize[i - ignoredDims] = getSize(i);
572  }
573 
574  newStride[i - ignoredDims] = getStride(i);
575  }
576  }
577 
579  data_, newSize, newStride);
580 }
581 
582 template <typename T, int Dim, bool InnerContig,
583  typename IndexT, template <typename U> class PtrTraits>
584 template <int NewDim>
587  // Can only create tensors of lesser dimension
588  static_assert(NewDim < Dim, "Can only downcast to lesser dim");
589 
590  // We can't downcast non-contiguous tensors, since it leaves
591  // garbage data in the tensor. The tensor needs to be contiguous
592  // in all of the dimensions we are collapsing (no padding in
593  // them).
594  for (int i = NewDim; i < Dim; ++i) {
595  GPU_FAISS_ASSERT(isContiguousDim(i));
596  }
597 
598  IndexT newSize[NewDim];
599  IndexT newStride[NewDim];
600 
601  IndexT collapsedSize = 1;
602 
603  for (int i = Dim - 1; i >= 0; --i) {
604  if (i >= NewDim) {
605  // Collapse these dimensions
606  collapsedSize *= getSize(i);
607  } else {
608  // Non-collapsed dimensions
609  if (i == NewDim - 1) {
610  // This is the first non-collapsed dimension
611  newSize[i] = collapsedSize * getSize(i);
612  newStride[i] = getStride(Dim - 1);
613  } else {
614  // Subsequent non-collapsed dimensions
615  newSize[i] = getSize(i);
616  newStride[i] = getStride(i);
617  }
618  }
619  }
620 
622  data_, newSize, newStride);
623 }
624 
625 template <typename T, int Dim, bool InnerContig,
626  typename IndexT, template <typename U> class PtrTraits>
627 template <int SubDim>
630  static_assert(SubDim >= 1 && SubDim < Dim,
631  "can only create view of lesser dim");
632 
633  IndexT viewSizes[SubDim];
634  IndexT viewStrides[SubDim];
635 
636  for (int i = 0; i < SubDim; ++i) {
637  viewSizes[i] = size_[Dim - SubDim + i];
638  viewStrides[i] = stride_[Dim - SubDim + i];
639  }
640 
642  at, viewSizes, viewStrides);
643 }
644 
645 template <typename T, int Dim, bool InnerContig,
646  typename IndexT, template <typename U> class PtrTraits>
647 template <int SubDim>
650  return view<SubDim>(data_);
651 }
652 
653 template <typename T, int Dim, bool InnerContig,
654  typename IndexT, template <typename U> class PtrTraits>
657  IndexT size) {
658  return this->narrow(0, start, size);
659 }
660 
661 template <typename T, int Dim, bool InnerContig,
662  typename IndexT, template <typename U> class PtrTraits>
665  IndexT start,
666  IndexT size) {
667  DataPtrType newData = data_;
668 
669  GPU_FAISS_ASSERT(start >= 0 &&
670  start < size_[dim] &&
671  (start + size) <= size_[dim]);
672 
673  if (start > 0) {
674  newData += (size_t) start * stride_[dim];
675  }
676 
677  IndexT newSize[Dim];
678  for (int i = 0; i < Dim; ++i) {
679  if (i == dim) {
680  GPU_FAISS_ASSERT(start + size <= size_[dim]);
681  newSize[i] = size;
682  } else {
683  newSize[i] = size_[i];
684  }
685  }
686 
687  // If we were innermost contiguous before, we are still innermost contiguous
688  return Tensor<T, Dim, InnerContig, IndexT, PtrTraits>(newData, newSize, stride_);
689 }
690 
691 template <typename T, int Dim, bool InnerContig,
692  typename IndexT, template <typename U> class PtrTraits>
693 template <int NewDim>
696  std::initializer_list<IndexT> sizes) {
697  GPU_FAISS_ASSERT(this->isContiguous());
698 
699  GPU_FAISS_ASSERT(sizes.size() == NewDim);
700 
701  // The total size of the new view must be the same as the total size
702  // of the old view
703  size_t curSize = numElements();
704  size_t newSize = 1;
705 
706  for (auto s : sizes) {
707  newSize *= s;
708  }
709 
710  GPU_FAISS_ASSERT(curSize == newSize);
711  return Tensor<T, NewDim, true, IndexT, PtrTraits>(data(), sizes);
712 }
713 
714 } } // namespace
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > upcastOuter()
Definition: Tensor-inl.cuh:485
__host__ Tensor< T, Dim, InnerContig, NewIndexT, PtrTraits > castIndexType() const
Definition: Tensor-inl.cuh:336
__host__ __device__ bool isContiguousDim(int i) const
Returns true if the given dimension index has no padding.
Definition: Tensor-inl.cuh:442
__host__ __device__ Tensor< U, Dim, InnerContig, IndexT, PtrTraits > cast()
Definition: Tensor-inl.cuh:253
__host__ __device__ size_t numElements() const
Definition: Tensor-inl.cuh:383
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > downcastOuter()
Definition: Tensor-inl.cuh:541
__host__ __device__ bool canCastResize() const
Returns true if we can castResize() this tensor to the new type.
Definition: Tensor-inl.cuh:309
DataPtrType data_
Raw pointer to where the tensor data begins.
Definition: Tensor.cuh:344
__host__ __device__ Tensor()
Default constructor.
Definition: Tensor-inl.cuh:20
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > upcastInner()
Definition: Tensor-inl.cuh:514
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > narrowOutermost(IndexT start, IndexT size)
Definition: Tensor-inl.cuh:656
IndexT stride_[Dim]
Array of strides (in sizeof(T) terms) per each dimension.
Definition: Tensor.cuh:347
__host__ __device__ bool isContiguous() const
Definition: Tensor-inl.cuh:396
__host__ __device__ const IndexT * sizes() const
Returns the size array.
Definition: Tensor.cuh:245
__host__ void copyFrom(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies a tensor into ourselves; sizes must match.
Definition: Tensor-inl.cuh:132
IndexT size_[Dim]
Size per each dimension.
Definition: Tensor.cuh:350
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > & operator=(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t)
Assignment.
Definition: Tensor-inl.cuh:50
__host__ __device__ const IndexT * strides() const
Returns the stride array.
Definition: Tensor.cuh:250
__host__ __device__ IndexT getSize(int i) const
Definition: Tensor.cuh:224
__host__ __device__ bool isSameSize(const Tensor< OtherT, OtherDim, InnerContig, IndexT, PtrTraits > &rhs) const
Returns true if the two tensors are of the same dimensionality and size.
Definition: Tensor-inl.cuh:234
__host__ __device__ Tensor< T, NewDim, InnerContig, IndexT, PtrTraits > downcastInner()
Definition: Tensor-inl.cuh:586
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > narrow(int dim, IndexT start, IndexT size)
Definition: Tensor-inl.cuh:664
__host__ __device__ DataPtrType data()
Returns a raw pointer to the start of our data.
Definition: Tensor.cuh:176
__host__ void copyTo(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies ourselves into a tensor; sizes must match.
Definition: Tensor-inl.cuh:171
Our tensor type.
Definition: Tensor.cuh:30
__host__ bool canUseIndexType() const
Definition: Tensor-inl.cuh:356
__host__ __device__ Tensor< T, Dim, InnerContig, IndexT, PtrTraits > transpose(int dim1, int dim2) const
Definition: Tensor-inl.cuh:451
__host__ __device__ IndexT getStride(int i) const
Definition: Tensor.cuh:230
__host__ __device__ Tensor< U, Dim, InnerContig, IndexT, PtrTraits > castResize()
Definition: Tensor-inl.cuh:275
__host__ __device__ Tensor< T, SubDim, InnerContig, IndexT, PtrTraits > view()
Definition: Tensor-inl.cuh:649
__host__ __device__ bool isSame(const Tensor< OtherT, OtherDim, InnerContig, IndexT, PtrTraits > &rhs) const
Definition: Tensor-inl.cuh:211