Added gpu_data spec and also optimized it a little bit.

30c4ee54 · Davis King · fe8b99f1 · 30c4ee54 · 30c4ee54 · 30c4ee54
Commit 30c4ee54 authored Oct 20, 2015 by Davis King
Hide whitespace changes
Inline Side-by-side

Showing with 161 additions and 13 deletions

gpu_data.cpp dlib/dnn/gpu_data.cpp +3 -9

gpu_data.h dlib/dnn/gpu_data.h +1 -4

gpu_data_abstract.h dlib/dnn/gpu_data_abstract.h +157 -0

No files found.
--- a/dlib/dnn/gpu_data.cpp
+++ b/dlib/dnn/gpu_data.cpp
@@ -34,16 +34,10 @@ namespace dlib
    void gpu_data::
    copy_to_device() const
    {
+        // We want transfers to the device to always be concurrent with any device
+        // computation.  So we use our non-default stream to do the transfer.
+        async_copy_to_device();
        wait_for_transfer_to_finish();
-        if (!device_current)
-        {
-            std::cout << "cudaMemcpy to device" << std::endl;
-            CHECK_CUDA(cudaMemcpy(data_device.get(), data_host.get(), data_size*sizeof(float), cudaMemcpyHostToDevice));
-            device_current = true;
-            // Check for errors.  These calls to cudaGetLastError() are what help us find
-            // out if our kernel launches have been failing.
-            CHECK_CUDA(cudaGetLastError());
-        }
    }

    void gpu_data::

--- a/dlib/dnn/gpu_data.h
+++ b/dlib/dnn/gpu_data.h
@@ -3,6 +3,7 @@
 #ifndef DLIB_GPU_DaTA_H_
 #define DLIB_GPU_DaTA_H_

+#include "gpu_data_abstract.h"
 #include <memory>
 #include "cuda_errors.h"
 #include "../serialize.h"
@@ -29,10 +30,6 @@ namespace dlib
                  modified the data and it hasn't been copied to the device yet then
                  host_current==true and device_current==false.

-
-            THREAD SAFETY
-                This object is not thread-safe.  Don't touch it from multiple threads as the
-                same time.
        !*/
    public:


--- a/dlib/dnn/gpu_data_abstract.h
+++ b/dlib/dnn/gpu_data_abstract.h
+// Copyright (C) 2015  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_GPU_DaTA_ABSTRACT_H_
+#ifdef DLIB_GPU_DaTA_ABSTRACT_H_
+
+#include "cuda_errors.h"
+#include "../serialize.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    class gpu_data 
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object is a block of size() floats, all stored contiguously in memory.
+                In particular, it keeps two copies of the floats, one on the host CPU side
+                and another on the GPU device side. It automatically performs the necessary
+                host/device transfers to keep these two copies of the data in sync.
+
+                All transfers to the device happen asynchronously so that CUDA kernel
+                computations can overlap with data transfers.  However, any transfers from
+                the device to the host happen synchronously in the default CUDA stream.
+                Therefore, you should perform all your CUDA kernel launches on the default
+                stream so that transfers back to the host do not happen before the
+                computations have completed.
+
+                If DLIB_USE_CUDA is not #defined then this object will not use CUDA at all.
+                Instead, it will simply store one host side memory block of floats.  
+
+            THREAD SAFETY
+                This object is not thread-safe.  Don't touch it from multiple threads at
+                the same time.
+        !*/
+    public:
+
+        gpu_data(
+        );
+        /*!
+            ensures
+                - #size() == 0
+                - #host() == nullptr 
+                - #device() == nullptr 
+        !*/
+
+        // This object is not copyable, however, it is movable.
+        gpu_data(const gpu_data&) = delete;
+        gpu_data& operator=(const gpu_data&) = delete;
+        gpu_data(gpu_data&& item);
+        gpu_data& operator=(gpu_data&& item);
+
+
+        void async_copy_to_device(
+        ); 
+        /*!
+            ensures
+                - This function does not block.
+                - if (the host version of the data is newer than the device's copy) then
+                    - Begins asynchronously copying host data to the device.
+                    - A call to device() that happens before the transfer completes will
+                      block until the transfer is complete.  That is, it is safe to call
+                      async_copy_to_device() and then immediately call device().
+        !*/
+
+        void set_size(
+            size_t new_size
+        );
+        /*!
+            ensures
+                - #size() == new_size
+        !*/
+
+        const float* host(
+        ) const;
+        /*!
+            ensures
+                - returns a pointer to the host memory block of size() contiguous float
+                  values or nullptr if size()==0.
+                - if (the host's copy of the data is out of date) then
+                    - copies the data from the device to the host, while this is happening
+                      the call to host() blocks. 
+        !*/
+
+        float* host(
+        );
+        /*!
+            ensures
+                - returns a pointer to the host memory block of size() contiguous float
+                  values or nullptr if size()==0.
+                - if (the host's copy of the data is out of date) then
+                    - copies the data from the device to the host, while this is happening
+                      the call to host() blocks. 
+                - Marks the device side data as out of date so that the next call to
+                  device() will perform a host to device transfer.  If you want to begin
+                  the transfer immediately then you can call async_copy_to_device() after
+                  calling host().
+        !*/
+
+        const float* device(
+        ) const;
+        /*!
+            requires
+                - DLIB_USE_CUDA is #defined
+            ensures
+                - returns a pointer to the device memory block of size() contiguous float
+                  values or nullptr if size()==0.
+                - if (the device's copy of the data is out of date) then
+                    - copies the data from the host to the device, while this is happening
+                      the call to device() blocks. 
+        !*/
+
+        float* device(
+        );
+        /*!
+            requires
+                - DLIB_USE_CUDA is #defined
+            ensures
+                - returns a pointer to the device memory block of size() contiguous float
+                  values or nullptr if size()==0.
+                - if (the device's copy of the data is out of date) then
+                    - copies the data from the host to the device, while this is happening
+                      the call to device() blocks. 
+                - Marks the host side data as out of date so that the next call to
+                  host() will perform a device to host transfer.
+        !*/
+
+        size_t size(
+        ) const; 
+        /*!
+            ensures
+                - returns the number of floats contained in this object.
+        !*/
+
+        void swap (
+            gpu_data& item
+        );
+        /*!
+            ensures
+                - swaps the state of *this and item
+        !*/
+
+    };
+
+    void serialize(const gpu_data& item, std::ostream& out);
+    void deserialize(gpu_data& item, std::istream& in);
+    /*!
+        provides serialization support
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_GPU_DaTA_ABSTRACT_H_
+