Commit d7e4b88d authored by Davis King's avatar Davis King

Made tt::add() more general

parent a6c331ce
......@@ -160,7 +160,8 @@ namespace dlib
(have_same_dimensions(src, dest) ||
(src.num_samples()==1 && src.k()==dest.k() && src.nr()==1 && src.nc()==1) ||
(src.num_samples()==1 && src.k()==dest.k() && src.nr()==dest.nr() && src.nc()==dest.nc()) ||
(src.num_samples()==1 && src.k()==1 && src.nr()==dest.nr() && src.nc()==dest.nc())) &&
(src.num_samples()==1 && src.k()==1 && src.nr()==dest.nr() && src.nc()==dest.nc()) ||
(src.num_samples()==dest.num_samples() && src.k()==1 && src.nr()==1 && src.nc()==1)) &&
is_same_object(src,dest) == false ,
"\n\t dest.num_samples(): " << dest.num_samples()
<<"\n\t dest.k(): " << dest.k()
......
......@@ -643,6 +643,38 @@ namespace dlib
launch_kernel(_cuda_add_scaled,max_jobs(dest.size()),dest.device(), src.device(), dest.size(), scale);
}
// ----------------------------------------------------------------------------------------
__global__ void _cuda_add_cv_to_all_columns(float beta, float* dest, float alpha, const float* src, size_t size, size_t stride)
{
for (auto i : grid_stride_range(0, size))
{
dest[i] = beta*dest[i] + alpha*src[i/stride];
}
}
__global__ void _cuda_add_cv_to_all_columns_no_beta(float* dest, float alpha, const float* src, size_t size, size_t stride)
{
for (auto i : grid_stride_range(0, size))
{
dest[i] = alpha*src[i/stride];
}
}
void add_cv_to_all_columns(
float beta,
tensor& dest,
float alpha,
const tensor& src
)
{
DLIB_CASSERT(dest.num_samples() == src.num_samples() && src.num_samples() == src.size());
if (beta == 0)
launch_kernel(_cuda_add_cv_to_all_columns_no_beta, max_jobs(dest.size()), dest.device(), alpha, src.device(), dest.size(), dest.size()/dest.num_samples());
else
launch_kernel(_cuda_add_cv_to_all_columns, max_jobs(dest.size()), beta, dest.device(), alpha, src.device(), dest.size(), dest.size()/dest.num_samples());
}
// ----------------------------------------------------------------------------------------
__global__ void _cuda_affine_transform5(
......
......@@ -238,6 +238,13 @@ namespace dlib
const tensor& src
);
void add_cv_to_all_columns(
float beta,
tensor& dest,
float alpha,
const tensor& src
);
// -----------------------------------------------------------------------------------
void affine_transform(
......
......@@ -260,7 +260,8 @@ namespace dlib
(have_same_dimensions(src, dest) ||
(src.num_samples()==1 && src.k()==dest.k() && src.nr()==1 && src.nc()==1) ||
(src.num_samples()==1 && src.k()==dest.k() && src.nr()==dest.nr() && src.nc()==dest.nc()) ||
(src.num_samples()==1 && src.k()==1 && src.nr()==dest.nr() && src.nc()==dest.nc())) &&
(src.num_samples()==1 && src.k()==1 && src.nr()==dest.nr() && src.nc()==dest.nc()) ||
(src.num_samples()==dest.num_samples() && src.k()==1 && src.nr()==1 && src.nc()==1)) &&
is_same_object(src,dest) == false ,
"\n\t dest.num_samples(): " << dest.num_samples()
<<"\n\t dest.k(): " << dest.k()
......@@ -279,6 +280,11 @@ namespace dlib
add_scaled(dest, alpha, src);
return;
}
else if (src.num_samples()==dest.num_samples() && src.k()==1 && src.nr()==1 && src.nc()==1)
{
add_cv_to_all_columns(beta, dest, alpha, src);
return;
}
CHECK_CUDNN(cudnnAddTensor(context(),
&alpha,
......
......@@ -693,6 +693,7 @@ namespace dlib { namespace tt
- src.num_samples()==1 && src.k()==dest.k() && src.nr()==1 && src.nc()==1
- src.num_samples()==1 && src.k()==dest.k() && src.nr()==dest.nr() && src.nc()==dest.nc()
- src.num_samples()==1 && src.k()==1 && src.nr()==dest.nr() && src.nc()==dest.nc()
- src.num_samples()==dest.num_samples() && src.k()==1 && src.nr()==1 && src.nc()==1
- is_same_object(src,dest) == false
ensures
- performs: dest = beta*dest + alpha*src
......
......@@ -602,6 +602,35 @@ namespace
#endif
}
{
resizable_tensor A(4,5), B(4);
tensor_rand rnd;
rnd.fill_uniform(A);
rnd.fill_uniform(B);
float alpha = 1.4;
float beta = 0.5;
matrix<float> a(mat(A)), b(mat(B));
for (long c = 0; c < a.nc(); ++c)
{
set_colm(a,c) = beta*colm(a,c) + alpha*b;
}
tt::add(beta, A, alpha, B);
DLIB_TEST_MSG(max(abs(mat(A)-a)) < 1e-6, max(abs(mat(A)-a)));
beta = 0;
for (long c = 0; c < a.nc(); ++c)
{
set_colm(a,c) = beta*colm(a,c) + alpha*b;
}
tt::add(beta, A, alpha, B);
DLIB_TEST(max(abs(mat(A)-a)) < 1e-6);
}
{
resizable_tensor A, B;
A.set_size(2,3,4,5);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment