Added memcpy() functions for tensor and gpu_data objects.

24830e8e · Davis King · 2fd98234 · 24830e8e · 24830e8e · 24830e8e
Commit 24830e8e authored Apr 24, 2016 by Davis King
6 changed files
--- a/dlib/dnn/gpu_data.cpp
+++ b/dlib/dnn/gpu_data.cpp
@@ -10,11 +10,34 @@
 #include "gpu_data.h"
 #include <iostream>
 #include "cuda_utils.h"
+#include <cstring>


 namespace dlib
 {

+// ----------------------------------------------------------------------------------------
+
+    void memcpy (
+        gpu_data& dest, 
+        const gpu_data& src
+    )
+    {
+        DLIB_CASSERT(dest.size() == src.size(), "");
+        if (src.size() == 0)
+            return;
+
+        // copy the memory efficiently based on which copy is current in each object.
+        if (dest.device_ready() && src.device_ready())
+            CHECK_CUDA(cudaMemcpy(dest.device(), src.device(),          src.size()*sizeof(float), cudaMemcpyDeviceToDevice));
+        else if (!dest.device_ready() && src.device_ready())
+            CHECK_CUDA(cudaMemcpy(dest.host_write_only(), src.device(), src.size()*sizeof(float), cudaMemcpyDeviceToHost));
+        else if (dest.device_ready() && !src.device_ready())
+            CHECK_CUDA(cudaMemcpy(dest.device(), src.host(),            src.size()*sizeof(float), cudaMemcpyHostToDevice));
+        else 
+            CHECK_CUDA(cudaMemcpy(dest.host_write_only(), src.host(),   src.size()*sizeof(float), cudaMemcpyHostToHost));
+    }
+
 // ----------------------------------------------------------------------------------------

    void gpu_data::

--- a/dlib/dnn/gpu_data.h
+++ b/dlib/dnn/gpu_data.h
@@ -5,6 +5,7 @@

 #include "gpu_data_abstract.h"
 #include <memory>
+#include <cstring>
 #include "cuda_errors.h"
 #include "../serialize.h"

@@ -202,6 +203,18 @@ namespace dlib
            deserialize(data[i], in);
    }

+#ifdef DLIB_USE_CUDA
+    void memcpy (gpu_data& dest, const gpu_data& src);
+#else
+    inline void memcpy (gpu_data& dest, const gpu_data& src)
+    {
+        DLIB_CASSERT(dest.size() == src.size(), "");
+        if (src.size() == 0)
+            return;
+        std::memcpy(dest.host_write_only(), src.host(), sizeof(float)*src.size());
+    }
+#endif
+
 // ----------------------------------------------------------------------------------------

 }

--- a/dlib/dnn/gpu_data_abstract.h
+++ b/dlib/dnn/gpu_data_abstract.h
@@ -208,6 +208,21 @@ namespace dlib
        provides serialization support
    !*/

+    void memcpy (
+        gpu_data& dest, 
+        const gpu_data& src
+    );
+    /*!
+        requires
+            - dest.size() == src.size()
+        ensures
+            - Copies the data in src to dest.  If the device data is current (i.e.
+              device_ready()==true) on both src and dest then the copy will happen entirely
+              on the device side.
+            - It doesn't matter what GPU device is selected by cudaSetDevice().  You can
+              always copy gpu_data objects to and from each other regardless.
+    !*/
+
 // ----------------------------------------------------------------------------------------

 }

--- a/dlib/dnn/tensor.h
+++ b/dlib/dnn/tensor.h
@@ -158,6 +158,15 @@ namespace dlib
        ) const = 0; 
 #endif

+        friend void memcpy (
+            tensor& dest, 
+            const tensor& src
+        )
+        {
+            memcpy(dest.data(), src.data());
+        }
+
+
    protected:

        friend class alias_tensor;

--- a/dlib/dnn/tensor_abstract.h
+++ b/dlib/dnn/tensor_abstract.h
@@ -298,6 +298,22 @@ namespace dlib
        tensor& operator=(tensor&& item); 
    };

+// ----------------------------------------------------------------------------------------
+
+    void memcpy (
+        tensor& dest, 
+        const tensor& src
+    );
+    /*!
+        requires
+            - dest.size() == src.size()
+        ensures
+            - Copies the data in src to dest.  If the device data is current on both src
+              and dest then the copy will happen entirely on the device side.
+            - It doesn't matter what GPU device is selected by cudaSetDevice().  You can
+              always copy tensor objects to and from each other regardless.
+    !*/
+
 // ----------------------------------------------------------------------------------------

    const matrix_exp mat (

--- a/dlib/test/dnn.cpp
+++ b/dlib/test/dnn.cpp
@@ -439,6 +439,50 @@ namespace
            DLIB_TEST(max(abs(mat(dest)-pointwise_multiply(AA,mat(B)))) < 1e-6); 
        }

+        {
+            resizable_tensor A, B, truth;
+            A.set_size(2,3,4,5);
+            truth.copy_size(A);
+            B.copy_size(A);
+
+            A = 4;
+            B = 1;
+            truth = 1;
+            DLIB_TEST(max(abs(mat(B)- mat(truth))) < 1e-5);
+            memcpy(A, truth);
+            DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
+
+            A = 4;
+            A.host();
+            B.host();
+            memcpy(A, truth);
+            DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
+
+            A = 4;
+            A.device();
+            B.host();
+            memcpy(A, truth);
+            DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
+
+            A = 4;
+            A.device();
+            B.device();
+            memcpy(A, truth);
+            DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
+
+            A = 4;
+            A.host();
+            B.device();
+            memcpy(A, truth);
+            DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
+
+            A = 4;
+            A.host_write_only();
+            B.device();
+            memcpy(A, truth);
+            DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
+        }
+
        {
            resizable_tensor A, B;
            A.set_size(2,3,4,5);