merged

7c29fbb1 · Davis King · 82295c10 · 16583730 · 7c29fbb1 · 7c29fbb1
Commit 7c29fbb1 authored Apr 25, 2016 by Davis King
10 changed files
--- a/dlib/dnn/cublas_dlibapi.cpp
+++ b/dlib/dnn/cublas_dlibapi.cpp
@@ -25,7 +25,7 @@ static const char* cublas_get_error_string(cublasStatus_t s)
 // Check the return value of a call to the cuBLAS runtime for an error condition.
 #define CHECK_CUBLAS(call)                                                      \
-{                                                                              \
+do{                                                                              \
    const cublasStatus_t error = call;                                         \
    if (error != CUBLAS_STATUS_SUCCESS)                                        \
    {                                                                          \
@@ -34,7 +34,7 @@ static const char* cublas_get_error_string(cublasStatus_t s)
        sout << "code: " << error << ", reason: " << cublas_get_error_string(error);\
        throw dlib::cublas_error(sout.str());                            \
    }                                                                          \
-}
+}while(false)
 namespace dlib
 {

--- a/dlib/dnn/cuda_utils.h
+++ b/dlib/dnn/cuda_utils.h
@@ -15,7 +15,7 @@
 // Check the return value of a call to the CUDA runtime for an error condition.
 #define CHECK_CUDA(call)                                                       \
-{                                                                              \
+do{                                                                              \
    const cudaError_t error = call;                                            \
    if (error != cudaSuccess)                                                  \
    {                                                                          \
@@ -24,7 +24,7 @@
        sout << "code: " << error << ", reason: " << cudaGetErrorString(error);\
        throw dlib::cuda_error(sout.str());                                          \
    }                                                                          \
-}
+}while(false)
 // ----------------------------------------------------------------------------------------

--- a/dlib/dnn/cudnn_dlibapi.cpp
+++ b/dlib/dnn/cudnn_dlibapi.cpp
@@ -36,7 +36,7 @@ static const char* cudnn_get_error_string(cudnnStatus_t s)
 // Check the return value of a call to the cuDNN runtime for an error condition.
 #define CHECK_CUDNN(call)                                                      \
-{                                                                              \
+do{                                                                              \
    const cudnnStatus_t error = call;                                         \
    if (error != CUDNN_STATUS_SUCCESS)                                        \
    {                                                                          \
@@ -45,7 +45,7 @@ static const char* cudnn_get_error_string(cudnnStatus_t s)
        sout << "code: " << error << ", reason: " << cudnn_get_error_string(error);\
        throw dlib::cudnn_error(sout.str());                            \
    }                                                                          \
-}
+}while(false)
 namespace dlib

--- a/dlib/dnn/curand_dlibapi.cpp
+++ b/dlib/dnn/curand_dlibapi.cpp
@@ -24,7 +24,7 @@ static const char* curand_get_error_string(curandStatus_t s)
 // Check the return value of a call to the cuDNN runtime for an error condition.
 #define CHECK_CURAND(call)                                                      \
-{                                                                              \
+do{                                                                              \
    const curandStatus_t error = call;                                         \
    if (error != CURAND_STATUS_SUCCESS)                                        \
    {                                                                          \
@@ -33,7 +33,7 @@ static const char* curand_get_error_string(curandStatus_t s)
        sout << "code: " << error << ", reason: " << curand_get_error_string(error);\
        throw dlib::curand_error(sout.str());                            \
    }                                                                          \
-}
+}while(false)
 namespace dlib
 {

--- a/dlib/dnn/gpu_data.cpp
+++ b/dlib/dnn/gpu_data.cpp
@@ -10,11 +10,34 @@
 #include "gpu_data.h"
 #include <iostream>
 #include "cuda_utils.h"
+#include <cstring>
 namespace dlib
 {
+// ----------------------------------------------------------------------------------------
+    void memcpy (
+        gpu_data& dest, 
+        const gpu_data& src
+    )
+    {
+        DLIB_CASSERT(dest.size() == src.size(), "");
+        if (src.size() == 0)
+            return;
+        // copy the memory efficiently based on which copy is current in each object.
+        if (dest.device_ready() && src.device_ready())
+            CHECK_CUDA(cudaMemcpy(dest.device(), src.device(),          src.size()*sizeof(float), cudaMemcpyDeviceToDevice));
+        else if (!dest.device_ready() && src.device_ready())
+            CHECK_CUDA(cudaMemcpy(dest.host_write_only(), src.device(), src.size()*sizeof(float), cudaMemcpyDeviceToHost));
+        else if (dest.device_ready() && !src.device_ready())
+            CHECK_CUDA(cudaMemcpy(dest.device(), src.host(),            src.size()*sizeof(float), cudaMemcpyHostToDevice));
+        else 
+            CHECK_CUDA(cudaMemcpy(dest.host_write_only(), src.host(),   src.size()*sizeof(float), cudaMemcpyHostToHost));
+    }
 // ----------------------------------------------------------------------------------------
    void gpu_data::

--- a/dlib/dnn/gpu_data.h
+++ b/dlib/dnn/gpu_data.h
@@ -5,6 +5,7 @@
 #include "gpu_data_abstract.h"
 #include <memory>
+#include <cstring>
 #include "cuda_errors.h"
 #include "../serialize.h"
@@ -202,6 +203,18 @@ namespace dlib
            deserialize(data[i], in);
    }
+#ifdef DLIB_USE_CUDA
+    void memcpy (gpu_data& dest, const gpu_data& src);
+#else
+    inline void memcpy (gpu_data& dest, const gpu_data& src)
+    {
+        DLIB_CASSERT(dest.size() == src.size(), "");
+        if (src.size() == 0)
+            return;
+        std::memcpy(dest.host_write_only(), src.host(), sizeof(float)*src.size());
+    }
+#endif
 // ----------------------------------------------------------------------------------------
 }

--- a/dlib/dnn/gpu_data_abstract.h
+++ b/dlib/dnn/gpu_data_abstract.h
@@ -208,6 +208,21 @@ namespace dlib
        provides serialization support
    !*/
+    void memcpy (
+        gpu_data& dest, 
+        const gpu_data& src
+    );
+    /*!
+        requires
+            - dest.size() == src.size()
+        ensures
+            - Copies the data in src to dest.  If the device data is current (i.e.
+              device_ready()==true) on both src and dest then the copy will happen entirely
+              on the device side.
+            - It doesn't matter what GPU device is selected by cudaSetDevice().  You can
+              always copy gpu_data objects to and from each other regardless.
+    !*/
 // ----------------------------------------------------------------------------------------
 }

--- a/dlib/dnn/tensor.h
+++ b/dlib/dnn/tensor.h
@@ -55,7 +55,7 @@ namespace dlib
        tensor& operator= (float val)
        {
 #ifdef DLIB_USE_CUDA
-            // If you are using CUDA then presumably you will be mostly using tensor's on
+            // If you are using CUDA then presumably you will be mostly using tensors on
            // the GPU.  So unless you seem to be actively working with the host side's
            // data then we do this initialization on the device side since this avoids a
            // host to device transfer that would likely immediately follow.
@@ -158,6 +158,15 @@ namespace dlib
        ) const = 0; 
 #endif
+        friend void memcpy (
+            tensor& dest, 
+            const tensor& src
+        )
+        {
+            memcpy(dest.data(), src.data());
+        }
    protected:
        friend class alias_tensor;

--- a/dlib/dnn/tensor_abstract.h
+++ b/dlib/dnn/tensor_abstract.h
@@ -298,6 +298,22 @@ namespace dlib
        tensor& operator=(tensor&& item); 
    };
+// ----------------------------------------------------------------------------------------
+    void memcpy (
+        tensor& dest, 
+        const tensor& src
+    );
+    /*!
+        requires
+            - dest.size() == src.size()
+        ensures
+            - Copies the data in src to dest.  If the device data is current on both src
+              and dest then the copy will happen entirely on the device side.
+            - It doesn't matter what GPU device is selected by cudaSetDevice().  You can
+              always copy tensor objects to and from each other regardless.
+    !*/
 // ----------------------------------------------------------------------------------------
    const matrix_exp mat (

--- a/dlib/test/dnn.cpp
+++ b/dlib/test/dnn.cpp
@@ -439,6 +439,52 @@ namespace
            DLIB_TEST(max(abs(mat(dest)-pointwise_multiply(AA,mat(B)))) < 1e-6); 
        }
+        {
+            resizable_tensor A, B, truth;
+            A.set_size(2,3,4,5);
+            truth.copy_size(A);
+            B.copy_size(A);
+            A = 4;
+            B = 1;
+            truth = 1;
+            DLIB_TEST(max(abs(mat(B)- mat(truth))) < 1e-5);
+            memcpy(A, truth);
+            DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
+            A = 4;
+            A.host();
+            B.host();
+            memcpy(A, truth);
+            DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
+#ifdef DLIB_USE_CUDA
+            A = 4;
+            A.device();
+            B.host();
+            memcpy(A, truth);
+            DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
+            A = 4;
+            A.device();
+            B.device();
+            memcpy(A, truth);
+            DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
+            A = 4;
+            A.host();
+            B.device();
+            memcpy(A, truth);
+            DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
+            A = 4;
+            A.host_write_only();
+            B.device();
+            memcpy(A, truth);
+            DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
+#endif
+        }
        {
            resizable_tensor A, B;
            A.set_size(2,3,4,5);