Cleanup of cuda code.

0ce6ed5b · Davis King · 8073f4b1 · 0ce6ed5b · 0ce6ed5b · 0ce6ed5b
Commit 0ce6ed5b authored Apr 04, 2018 by Davis King
Showing with 26 additions and 4 deletions

cuda_data_ptr.h dlib/dnn/cuda_data_ptr.h +5 -1

cudnn_dlibapi.cpp dlib/dnn/cudnn_dlibapi.cpp +2 -2

cudnn_dlibapi.h dlib/dnn/cudnn_dlibapi.h +18 -0

layers_abstract.h dlib/dnn/layers_abstract.h +1 -1

No files found.
--- a/dlib/dnn/cuda_data_ptr.h
+++ b/dlib/dnn/cuda_data_ptr.h
@@ -160,8 +160,12 @@ namespace dlib
            cuda_data_void_ptr get(size_t size)
            /*!
                ensures
-                    - This object will return the buffer of requested size of larger
+                    - This object will return the buffer of requested size or larger.
                    - buffer.size() >= size
+                    - Client code should not hold the returned cuda_data_void_ptr for long
+                      durations, but instead should call get() whenever the buffer is
+                      needed.  Doing so ensures that multiple buffers are not kept around
+                      in the event of a resize.
            !*/
            {
                if (buffer.size() < size)

--- a/dlib/dnn/cudnn_dlibapi.cpp
+++ b/dlib/dnn/cudnn_dlibapi.cpp
@@ -160,12 +160,12 @@ namespace dlib
            std::vector<std::weak_ptr<resizable_cuda_buffer>> buffers;
        };

-
-        static std::shared_ptr<resizable_cuda_buffer> device_global_buffer()
+        std::shared_ptr<resizable_cuda_buffer> device_global_buffer()
        {
            thread_local cudnn_device_buffer buffer;
            return buffer.get_buffer();
        }
+
    // ------------------------------------------------------------------------------------

        class cudnn_activation_descriptor

--- a/dlib/dnn/cudnn_dlibapi.h
+++ b/dlib/dnn/cudnn_dlibapi.h
@@ -17,6 +17,24 @@ namespace dlib
    namespace cuda 
    {

+    // ----------------------------------------------------------------------------------------
+
+        std::shared_ptr<resizable_cuda_buffer> device_global_buffer(
+        );
+        /*!
+            ensures
+                - Returns a pointer to a globally shared CUDA memory buffer on the
+                  currently selected CUDA device.  The buffer is also thread local.  So
+                  each host thread will get its own buffer.  You can use this global buffer
+                  as scratch space for CUDA computations that all take place on the default
+                  stream.  Using it in this way ensures that there aren't any race conditions
+                  involving the use of the buffer.
+                - The global buffer is deallocated once all references to it are
+                  destructed.  It will be reallocated as required.  So if you want to avoid
+                  these reallocations then hold a copy of the shared_ptr returned by this
+                  function.
+        !*/
+
    // -----------------------------------------------------------------------------------

        class tensor_descriptor

--- a/dlib/dnn/layers_abstract.h
+++ b/dlib/dnn/layers_abstract.h
@@ -366,7 +366,7 @@ namespace dlib
            follows:

            ensures
-                - calling clean() Causes this object to forget about everything except its
+                - calling clean() causes this object to forget about everything except its
                  parameters.  This is useful if your layer caches information between
                  forward and backward passes and you want to clean out that cache
                  information before saving the network to disk.