Commit 290b1cb1 authored by Evgeniy Fominov's avatar Evgeniy Fominov

Fixed dnn_tester in GPU mode for cpu_tensor test

parent a06e5332
...@@ -1532,12 +1532,6 @@ namespace ...@@ -1532,12 +1532,6 @@ namespace
} }
} }
#ifdef DLIB_USE_CUDA #ifdef DLIB_USE_CUDA
float tensor_read_gpu(const tensor& t, long i, long k, long r, long c)
{
const float* p = t.device() + t.k() * t.nr() * t.nc() * i +
t.nr() * t.nc() * k + t.nc() * r + c;
return *p;
}
void test_copy_tensor_gpu() void test_copy_tensor_gpu()
{ {
using namespace dlib::tt; using namespace dlib::tt;
...@@ -1550,10 +1544,9 @@ namespace ...@@ -1550,10 +1544,9 @@ namespace
src1 = matrix_cast<float>(gaussian_randm(src1.num_samples(), src1.k() * src1.nr() * src1.nc(), 0)); src1 = matrix_cast<float>(gaussian_randm(src1.num_samples(), src1.k() * src1.nr() * src1.nc(), 0));
src2 = matrix_cast<float>(gaussian_randm(src1.num_samples(), src2.k() * src2.nr() * src2.nc(), 0)); src2 = matrix_cast<float>(gaussian_randm(src1.num_samples(), src2.k() * src2.nr() * src2.nc(), 0));
src3 = matrix_cast<float>(gaussian_randm(src1.num_samples(), src3.k() * src3.nr() * src3.nc(), 0)); src3 = matrix_cast<float>(gaussian_randm(src1.num_samples(), src3.k() * src3.nr() * src3.nc(), 0));
cuda::copy_tensor(dest, 0, src1, 0, src1.k()); //full copy src1->dest
gpu::copy_tensor(dest, 0, src1, 0, src1.k()); //full copy src1->dest cuda::copy_tensor(dest, src1.k(), src2, 0, src2.k()); //full copy src2->dest with offset of src1
gpu::copy_tensor(dest, src1.k(), src2, 0, src2.k()); //full copy src2->dest with offset of src1 cuda::copy_tensor(dest, src1.k() + src2.k(), src3, 3, 3); //partial copy src3 into the rest place of dest
gpu::copy_tensor(dest, src1.k() + src2.k(), src3, 3, 3); //partial copy src3 into the rest place of dest
for (long i = 0; i < dest.num_samples(); ++i) for (long i = 0; i < dest.num_samples(); ++i)
...@@ -1564,23 +1557,23 @@ namespace ...@@ -1564,23 +1557,23 @@ namespace
{ {
for (long c = 0; c < dest.nc(); ++c) for (long c = 0; c < dest.nc(); ++c)
{ {
float dest_value = tensor_read_gpu(dest, i, k, r, c); float dest_value = tensor_read_cpu(dest, i, k, r, c);
// first part is from src1 // first part is from src1
if (k < src1.k()) if (k < src1.k())
{ {
float src_value = tensor_read_gpu(src1, i, k, r, c); float src_value = tensor_read_cpu(src1, i, k, r, c);
DLIB_TEST(src_value == dest_value); DLIB_TEST(src_value == dest_value);
} }
// second part is from src2 // second part is from src2
else if (k < src1.k() + src2.k()) else if (k < src1.k() + src2.k())
{ {
float src_value = tensor_read_gpu(src2, i, k - src1.k(), r, c); float src_value = tensor_read_cpu(src2, i, k - src1.k(), r, c);
DLIB_TEST(src_value == dest_value); DLIB_TEST(src_value == dest_value);
} }
// third part is from src3 // third part is from src3
else else
{ {
float src_value = tensor_read_gpu(src3, i, k - src1.k() - src2.k() + 3, r, c); float src_value = tensor_read_cpu(src3, i, k - src1.k() - src2.k() + 3, r, c);
DLIB_TEST(src_value == dest_value); DLIB_TEST(src_value == dest_value);
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment