Commit 24830e8e authored by Davis King's avatar Davis King

Added memcpy() functions for tensor and gpu_data objects.

parent 2fd98234
...@@ -10,11 +10,34 @@ ...@@ -10,11 +10,34 @@
#include "gpu_data.h" #include "gpu_data.h"
#include <iostream> #include <iostream>
#include "cuda_utils.h" #include "cuda_utils.h"
#include <cstring>
namespace dlib namespace dlib
{ {
// ----------------------------------------------------------------------------------------
void memcpy (
gpu_data& dest,
const gpu_data& src
)
{
DLIB_CASSERT(dest.size() == src.size(), "");
if (src.size() == 0)
return;
// copy the memory efficiently based on which copy is current in each object.
if (dest.device_ready() && src.device_ready())
CHECK_CUDA(cudaMemcpy(dest.device(), src.device(), src.size()*sizeof(float), cudaMemcpyDeviceToDevice));
else if (!dest.device_ready() && src.device_ready())
CHECK_CUDA(cudaMemcpy(dest.host_write_only(), src.device(), src.size()*sizeof(float), cudaMemcpyDeviceToHost));
else if (dest.device_ready() && !src.device_ready())
CHECK_CUDA(cudaMemcpy(dest.device(), src.host(), src.size()*sizeof(float), cudaMemcpyHostToDevice));
else
CHECK_CUDA(cudaMemcpy(dest.host_write_only(), src.host(), src.size()*sizeof(float), cudaMemcpyHostToHost));
}
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
void gpu_data:: void gpu_data::
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include "gpu_data_abstract.h" #include "gpu_data_abstract.h"
#include <memory> #include <memory>
#include <cstring>
#include "cuda_errors.h" #include "cuda_errors.h"
#include "../serialize.h" #include "../serialize.h"
...@@ -202,6 +203,18 @@ namespace dlib ...@@ -202,6 +203,18 @@ namespace dlib
deserialize(data[i], in); deserialize(data[i], in);
} }
#ifdef DLIB_USE_CUDA
void memcpy (gpu_data& dest, const gpu_data& src);
#else
inline void memcpy (gpu_data& dest, const gpu_data& src)
{
DLIB_CASSERT(dest.size() == src.size(), "");
if (src.size() == 0)
return;
std::memcpy(dest.host_write_only(), src.host(), sizeof(float)*src.size());
}
#endif
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
} }
......
...@@ -208,6 +208,21 @@ namespace dlib ...@@ -208,6 +208,21 @@ namespace dlib
provides serialization support provides serialization support
!*/ !*/
void memcpy (
gpu_data& dest,
const gpu_data& src
);
/*!
requires
- dest.size() == src.size()
ensures
- Copies the data in src to dest. If the device data is current (i.e.
device_ready()==true) on both src and dest then the copy will happen entirely
on the device side.
- It doesn't matter what GPU device is selected by cudaSetDevice(). You can
always copy gpu_data objects to and from each other regardless.
!*/
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
} }
......
...@@ -158,6 +158,15 @@ namespace dlib ...@@ -158,6 +158,15 @@ namespace dlib
) const = 0; ) const = 0;
#endif #endif
friend void memcpy (
tensor& dest,
const tensor& src
)
{
memcpy(dest.data(), src.data());
}
protected: protected:
friend class alias_tensor; friend class alias_tensor;
......
...@@ -298,6 +298,22 @@ namespace dlib ...@@ -298,6 +298,22 @@ namespace dlib
tensor& operator=(tensor&& item); tensor& operator=(tensor&& item);
}; };
// ----------------------------------------------------------------------------------------
void memcpy (
tensor& dest,
const tensor& src
);
/*!
requires
- dest.size() == src.size()
ensures
- Copies the data in src to dest. If the device data is current on both src
and dest then the copy will happen entirely on the device side.
- It doesn't matter what GPU device is selected by cudaSetDevice(). You can
always copy tensor objects to and from each other regardless.
!*/
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
const matrix_exp mat ( const matrix_exp mat (
......
...@@ -439,6 +439,50 @@ namespace ...@@ -439,6 +439,50 @@ namespace
DLIB_TEST(max(abs(mat(dest)-pointwise_multiply(AA,mat(B)))) < 1e-6); DLIB_TEST(max(abs(mat(dest)-pointwise_multiply(AA,mat(B)))) < 1e-6);
} }
{
resizable_tensor A, B, truth;
A.set_size(2,3,4,5);
truth.copy_size(A);
B.copy_size(A);
A = 4;
B = 1;
truth = 1;
DLIB_TEST(max(abs(mat(B)- mat(truth))) < 1e-5);
memcpy(A, truth);
DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
A = 4;
A.host();
B.host();
memcpy(A, truth);
DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
A = 4;
A.device();
B.host();
memcpy(A, truth);
DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
A = 4;
A.device();
B.device();
memcpy(A, truth);
DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
A = 4;
A.host();
B.device();
memcpy(A, truth);
DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
A = 4;
A.host_write_only();
B.device();
memcpy(A, truth);
DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
}
{ {
resizable_tensor A, B; resizable_tensor A, B;
A.set_size(2,3,4,5); A.set_size(2,3,4,5);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment