More cleanup

bb8e0bc8 · Davis King · 0ce6ed5b · bb8e0bc8 · bb8e0bc8 · bb8e0bc8
Commit bb8e0bc8 authored Apr 04, 2018 by Davis King
Showing with 69 additions and 67 deletions

cuda_data_ptr.cpp dlib/dnn/cuda_data_ptr.cpp +49 -0

cuda_data_ptr.h dlib/dnn/cuda_data_ptr.h +20 -0

cudnn_dlibapi.cpp dlib/dnn/cudnn_dlibapi.cpp +0 -49

cudnn_dlibapi.h dlib/dnn/cudnn_dlibapi.h +0 -18

No files found.
--- a/dlib/dnn/cuda_data_ptr.cpp
+++ b/dlib/dnn/cuda_data_ptr.cpp
@@ -59,6 +59,55 @@ namespace dlib
            }
        }
+    // ------------------------------------------------------------------------------------
+        class cudnn_device_buffer
+        {
+        public:
+            // not copyable
+            cudnn_device_buffer(const cudnn_device_buffer&) = delete;
+            cudnn_device_buffer& operator=(const cudnn_device_buffer&) = delete;
+            cudnn_device_buffer()
+            {
+                buffers.resize(16);
+            }
+            ~cudnn_device_buffer()
+            {
+            }
+            std::shared_ptr<resizable_cuda_buffer> get_buffer (
+            )
+            {
+                int new_device_id;
+                CHECK_CUDA(cudaGetDevice(&new_device_id));
+                // make room for more devices if needed
+                if (new_device_id >= (long)buffers.size())
+                    buffers.resize(new_device_id+16);
+                // If we don't have a buffer already for this device then make one
+                std::shared_ptr<resizable_cuda_buffer> buff = buffers[new_device_id].lock();
+                if (!buff)
+                {
+                    buff = std::make_shared<resizable_cuda_buffer>();
+                    buffers[new_device_id] = buff;
+                }
+                // Finally, return the buffer for the current device
+                return buff;
+            }
+        private:
+            std::vector<std::weak_ptr<resizable_cuda_buffer>> buffers;
+        };
+        std::shared_ptr<resizable_cuda_buffer> device_global_buffer()
+        {
+            thread_local cudnn_device_buffer buffer;
+            return buffer.get_buffer();
+        }
    // ------------------------------------------------------------------------------------
    }  

--- a/dlib/dnn/cuda_data_ptr.h
+++ b/dlib/dnn/cuda_data_ptr.h
@@ -179,6 +179,26 @@ namespace dlib
            cuda_data_void_ptr buffer;
        };
+    // ----------------------------------------------------------------------------------------
+        std::shared_ptr<resizable_cuda_buffer> device_global_buffer(
+        );
+        /*!
+            ensures
+                - Returns a pointer to a globally shared CUDA memory buffer on the
+                  currently selected CUDA device.  The buffer is also thread local.  So
+                  each host thread will get its own buffer.  You can use this global buffer
+                  as scratch space for CUDA computations that all take place on the default
+                  stream.  Using it in this way ensures that there aren't any race conditions
+                  involving the use of the buffer.
+                - The global buffer is deallocated once all references to it are
+                  destructed.  It will be reallocated as required.  So if you want to avoid
+                  these reallocations then hold a copy of the shared_ptr returned by this
+                  function.
+        !*/
+    // ----------------------------------------------------------------------------------------
    }
 }

--- a/dlib/dnn/cudnn_dlibapi.cpp
+++ b/dlib/dnn/cudnn_dlibapi.cpp
@@ -117,55 +117,6 @@ namespace dlib
            thread_local cudnn_context c;
            return c.get_handle();
        }
-    // ------------------------------------------------------------------------------------
-        class cudnn_device_buffer
-        {
-        public:
-            // not copyable
-            cudnn_device_buffer(const cudnn_device_buffer&) = delete;
-            cudnn_device_buffer& operator=(const cudnn_device_buffer&) = delete;
-            cudnn_device_buffer()
-            {
-                buffers.resize(16);
-            }
-            ~cudnn_device_buffer()
-            {
-            }
-            std::shared_ptr<resizable_cuda_buffer> get_buffer (
-            )
-            {
-                int new_device_id;
-                CHECK_CUDA(cudaGetDevice(&new_device_id));
-                // make room for more devices if needed
-                if (new_device_id >= (long)buffers.size())
-                    buffers.resize(new_device_id+16);
-                // If we don't have a buffer already for this device then make one
-                std::shared_ptr<resizable_cuda_buffer> buff = buffers[new_device_id].lock();
-                if (!buff)
-                {
-                    buff = std::make_shared<resizable_cuda_buffer>();
-                    buffers[new_device_id] = buff;
-                }
-                // Finally, return the buffer for the current device
-                return buff;
-            }
-        private:
-            std::vector<std::weak_ptr<resizable_cuda_buffer>> buffers;
-        };
-        std::shared_ptr<resizable_cuda_buffer> device_global_buffer()
-        {
-            thread_local cudnn_device_buffer buffer;
-            return buffer.get_buffer();
-        }
    // ------------------------------------------------------------------------------------
        class cudnn_activation_descriptor

--- a/dlib/dnn/cudnn_dlibapi.h
+++ b/dlib/dnn/cudnn_dlibapi.h
@@ -17,24 +17,6 @@ namespace dlib
    namespace cuda 
    {
-    // ----------------------------------------------------------------------------------------
-        std::shared_ptr<resizable_cuda_buffer> device_global_buffer(
-        );
-        /*!
-            ensures
-                - Returns a pointer to a globally shared CUDA memory buffer on the
-                  currently selected CUDA device.  The buffer is also thread local.  So
-                  each host thread will get its own buffer.  You can use this global buffer
-                  as scratch space for CUDA computations that all take place on the default
-                  stream.  Using it in this way ensures that there aren't any race conditions
-                  involving the use of the buffer.
-                - The global buffer is deallocated once all references to it are
-                  destructed.  It will be reallocated as required.  So if you want to avoid
-                  these reallocations then hold a copy of the shared_ptr returned by this
-                  function.
-        !*/
    // -----------------------------------------------------------------------------------
        class tensor_descriptor