Implemented more cuDNN bindings and cleaned up code a bit.

88f5d9a3 · Davis King · e9efffff · 88f5d9a3 · 88f5d9a3 · 88f5d9a3
Commit 88f5d9a3 authored Oct 25, 2015 by Davis King
Showing with 92 additions and 20 deletions

cudnn_dlibapi.cpp dlib/dnn/cudnn_dlibapi.cpp +37 -7

cudnn_dlibapi.h dlib/dnn/cudnn_dlibapi.h +6 -12

tensor.h dlib/dnn/tensor.h +31 -1

tensor_abstract.h dlib/dnn/tensor_abstract.h +18 -0

No files found.
--- a/dlib/dnn/cudnn_dlibapi.cpp
+++ b/dlib/dnn/cudnn_dlibapi.cpp
@@ -36,6 +36,17 @@ namespace dlib
            }
        }

+    // ------------------------------------------------------------------------------------
+
+        static const cudnnTensorDescriptor_t descriptor(const tensor& t) 
+        {
+            return (const cudnnTensorDescriptor_t)t.get_cudnn_tensor_descriptor().get_handle();
+        }
+        static const cudnnTensorDescriptor_t descriptor(const tensor_descriptor& t) 
+        {
+            return (const cudnnTensorDescriptor_t)t.get_handle();
+        }
+
    // ------------------------------------------------------------------------------------

        class cudnn_context
@@ -155,6 +166,13 @@ namespace dlib
            const tensor& src
        )
        {
+            check(cudnnAddTensor_v3(context(),
+                                    &alpha,
+                                    descriptor(src),
+                                    src.device(),
+                                    &beta,
+                                    descriptor(dest),
+                                    dest.device()));
        }

        void set_tensor (
@@ -162,6 +180,12 @@ namespace dlib
            float value
        )
        {
+            if (t.size() == 0)
+                return;
+            check(cudnnSetTensor(context(),
+                                 descriptor(t),
+                                 t.device(),
+                                 &value));
        }

        void scale_tensor (
@@ -169,6 +193,12 @@ namespace dlib
            float value
        )
        {
+            if (t.size() == 0)
+                return;
+            check(cudnnScaleTensor(context(),
+                                   descriptor(t),
+                                   t.device(),
+                                   &value));
        }

    // ------------------------------------------------------------------------------------
@@ -246,7 +276,7 @@ namespace dlib

                check(cudnnGetConvolution2dForwardOutputDim(
                        (const cudnnConvolutionDescriptor_t)conv_handle,
-                        (const cudnnTensorDescriptor_t)data.get_cudnn_tensor_descriptor().get_handle(),
+                        descriptor(data),
                        (const cudnnFilterDescriptor_t)filter_handle,
                        &out_num_samples,
                        &out_k,
@@ -259,10 +289,10 @@ namespace dlib
                cudnnConvolutionFwdAlgo_t forward_best_algo;
                check(cudnnGetConvolutionForwardAlgorithm(
                        context(), 
-                        (const cudnnTensorDescriptor_t)data.get_cudnn_tensor_descriptor().get_handle(),
+                        descriptor(data),
                        (const cudnnFilterDescriptor_t)filter_handle,
                        (const cudnnConvolutionDescriptor_t)conv_handle,
-                        (const cudnnTensorDescriptor_t)dest_desc.get_handle(),
+                        descriptor(dest_desc),
                        CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, // or CUDNN_CONVOLUTION_FWD_NO_WORKSPACE,
                        std::numeric_limits<size_t>::max(),
                        &forward_best_algo));
@@ -271,10 +301,10 @@ namespace dlib

                check(cudnnGetConvolutionForwardWorkspaceSize( 
                        context(),
-                        (const cudnnTensorDescriptor_t)data.get_cudnn_tensor_descriptor().get_handle(),
+                        descriptor(data),
                        (const cudnnFilterDescriptor_t)filter_handle,
                        (const cudnnConvolutionDescriptor_t)conv_handle,
-                        (const cudnnTensorDescriptor_t)dest_desc.get_handle(),
+                        descriptor(dest_desc),
                        forward_best_algo,
                        &forward_workspace_size_in_bytes));

@@ -313,7 +343,7 @@ namespace dlib
            check(cudnnConvolutionForward(
                    context(),
                    &alpha,
-                    (const cudnnTensorDescriptor_t)data.get_cudnn_tensor_descriptor().get_handle(),
+                    descriptor(data),
                    data.device(),
                    (const cudnnFilterDescriptor_t)filter_handle,
                    filters.device(),
@@ -322,7 +352,7 @@ namespace dlib
                    forward_workspace,
                    forward_workspace_size_in_bytes,
                    &beta,
-                    (const cudnnTensorDescriptor_t)output.get_cudnn_tensor_descriptor().get_handle(),
+                    descriptor(output),
                    output.device()));
        }


--- a/dlib/dnn/cudnn_dlibapi.h
+++ b/dlib/dnn/cudnn_dlibapi.h
@@ -83,15 +83,12 @@ namespace dlib
                - dest.k()==src.k()   || src.k()==1
            ensures
                - performs: dest = beta*dest + alpha*src
-                   TODO, clarify:
-
-                   calls cudnnAddTensor_v3()
-
-                   This function adds the scaled values of one src tensor to another
-                   tensor. Each dimension of the src tensor must match the corresponding
-                   dimension of the dest tensor or must be equal to 1. In the latter case,
-                   the same value from the src tensor, for those dimensions, will be used
-                   to blend into the dest tensor.
+                  However, how the addition happens depends on the dimensions of src.  In
+                  particular, this function adds the scaled values of one src tensor to
+                  dest. Each dimension of the src tensor must match the corresponding
+                  dimension of the dest tensor or must be equal to 1. In the latter case,
+                  the same value from the src tensor, for those dimensions, will be used to
+                  add into the dest tensor.
        !*/

        void set_tensor (
@@ -101,7 +98,6 @@ namespace dlib
        /*!
            ensures
                - sets all elements in t equal to value.
-                  Uses cudnnSetTensor().
        !*/

        void scale_tensor (
@@ -113,8 +109,6 @@ namespace dlib
                - scales all elements of t by the given value.  I.e. for all elements E in
                  t, this function performs:
                    - E = E*value
-
-                uses cudnnScaleTensor()
        !*/

    // ------------------------------------------------------------------------------------

--- a/dlib/dnn/tensor.h
+++ b/dlib/dnn/tensor.h
@@ -3,6 +3,7 @@
 #ifndef DLIB_DNn_TENSOR_H_
 #define DLIB_DNn_TENSOR_H_

+#include "tensor_abstract.h"
 #include <cstring>
 #include "../matrix.h"
 #include "cudnn_dlibapi.h"
@@ -46,13 +47,42 @@ namespace dlib

        tensor& operator= (float val)
        {
-            // TODO, do on the device if that's where the memory is living right now.
+#ifdef DLIB_USE_CUDA
+            // If you are using CUDA then presumably you will be mostly using tensor's on
+            // the GPU.  So unless you seem to be actively working with the host side's
+            // data then we do this initialization on the device side since this avoids a
+            // host to device transfer that would likely immediately follow.
+            if (data.device_ready())
+            {
+                cuda::set_tensor(*this, val);
+                return *this;
+            }
+#endif
            auto d = data.host();
            for (size_t i = 0; i < data.size(); ++i)
                d[i] = val;
            return *this;
        }

+        tensor& operator*= (float val)
+        {
+#ifdef DLIB_USE_CUDA
+            cuda::scale_tensor(*this, val);
+            return *this;
+#else
+            auto d = data.host();
+            for (size_t i = 0; i < data.size(); ++i)
+                d[i] *= val;
+            return *this;
+#endif
+        }
+        
+        tensor& operator/= (float val)
+        {
+            *this *= 1.0/val;
+            return *this;
+        }
+
        template <typename EXP>
        tensor& operator= (const matrix_exp<EXP>& item)
        {

--- a/dlib/dnn/tensor_abstract.h
+++ b/dlib/dnn/tensor_abstract.h
@@ -161,6 +161,24 @@ namespace dlib
                - returns *this
        !*/

+        tensor& operator*= (
+            float val
+        );
+        /*!
+            ensures
+                - pointwise multiplies all elements of *this tensor with val.
+                - returns *this
+        !*/
+        
+        tensor& operator/= (
+            float val
+        );
+        /*!
+            ensures
+                - pointwise divides all elements of *this tensor with val.
+                - returns *this
+        !*/
+
        template <typename EXP>
        tensor& operator= (
            const matrix_exp<EXP>& item