Commit d5e65cd7 authored by Davis King's avatar Davis King

Added tt::scale_channels()

parent a7363d41
......@@ -77,6 +77,8 @@ namespace dlib
}
}
// ------------------------------------------------------------------------------------
void multiply_conv (
bool add_to,
tensor& dest,
......@@ -151,6 +153,72 @@ namespace dlib
}
}
// ------------------------------------------------------------------------------------
void scale_channels (
bool add_to,
tensor& dest,
const tensor& src,
const tensor& scales
)
{
DLIB_CASSERT(have_same_dimensions(dest,src) &&
scales.num_samples() == src.num_samples() &&
scales.k() == src.k() &&
scales.nr() == 1 &&
scales.nc() == 1 );
if (dest.size() == 0)
return;
if (add_to)
{
auto d = dest.host();
auto s = src.host();
auto scal = scales.host();
for (long n = 0; n < src.num_samples(); ++n)
{
for (long k = 0; k < src.k(); ++k)
{
const auto scale = scal[n*scales.k() + k];
for (long r = 0; r < src.nr(); ++r)
{
for (long c = 0; c < src.nc(); ++c)
{
*d++ += (*s++) * scale;
}
}
}
}
}
else
{
auto d = dest.host_write_only();
auto s = src.host();
auto scal = scales.host();
for (long n = 0; n < src.num_samples(); ++n)
{
for (long k = 0; k < src.k(); ++k)
{
const auto scale = scal[n*scales.k() + k];
for (long r = 0; r < src.nr(); ++r)
{
for (long c = 0; c < src.nc(); ++c)
{
*d++ = (*s++) * scale;
}
}
}
}
}
}
// ------------------------------------------------------------------------------------
void add(
float beta,
tensor& dest,
......
......@@ -37,6 +37,13 @@ namespace dlib
const tensor& src2
);
void scale_channels (
bool add_to,
tensor& dest,
const tensor& src,
const tensor& scales
);
void add(
float beta,
tensor& dest,
......
......@@ -513,6 +513,50 @@ namespace dlib
}
// ------------------------------------------------------------------------------------
__global__ void _cuda_scale_channels_add_to(float* d, const float* src, size_t n, const float* scales, size_t bs)
{
for (auto i : grid_stride_range(0, n))
{
auto k = i/bs;
d[i] += src[i]*scales[k];
}
}
__global__ void _cuda_scale_channels(float* d, const float* src, size_t n, const float* scales, size_t bs)
{
for (auto i : grid_stride_range(0, n))
{
auto k = i/bs;
d[i] = src[i]*scales[k];
}
}
void scale_channels (
bool add_to,
tensor& dest,
const tensor& src,
const tensor& scales
)
{
DLIB_CASSERT(have_same_dimensions(dest,src) &&
scales.num_samples() == src.num_samples() &&
scales.k() == src.k() &&
scales.nr() == 1 &&
scales.nc() == 1 );
if (dest.size() == 0)
return;
if (add_to)
launch_kernel(_cuda_scale_channels_add_to,max_jobs(dest.size()),
dest.device(), src.device(), src.size(), scales.device(), src.nr()*src.nc());
else
launch_kernel(_cuda_scale_channels,max_jobs(dest.size()),
dest.device_write_only(), src.device(), src.size(), scales.device(), src.nr()*src.nc());
}
// ------------------------------------------------------------------------------------
__global__ void _cuda_mult1(float* d, const float* s1, const float* s2, size_t n)
......
......@@ -192,6 +192,13 @@ namespace dlib
const tensor& src2
);
void scale_channels (
bool add_to,
tensor& dest,
const tensor& src,
const tensor& scales
);
void add (
tensor& dest,
const tensor& src1,
......
......@@ -290,6 +290,20 @@ namespace dlib { namespace tt
}
void scale_channels (
bool add_to,
tensor& dest,
const tensor& src,
const tensor& scales
)
{
#ifdef DLIB_USE_CUDA
cuda::scale_channels(add_to, dest, src, scales);
#else
cpu::scale_channels(add_to, dest, src, scales);
#endif
}
void multiply_conv (
bool add_to,
tensor& dest,
......
......@@ -275,6 +275,27 @@ namespace dlib { namespace tt
- Instead of assigning the result to dest, this function adds the result to dest.
!*/
void scale_channels (
bool add_to,
tensor& dest,
const tensor& src,
const tensor& scales
);
/*!
requires
- have_same_dimensions(dest, src) == true
- scales.num_samples() == src.num_samples()
- scales.k() == src.k()
- scales.nr() == 1
- scales.nc() == 1
ensures
- Scales each channel of src by the corresponding value in scales. To be
precise, we will have:
- #dest(n,k,r,c) == src(n,k,r,c)*scales(n,k,1,1)
- if (add_to) then
- Instead of assigning the result to dest, this function adds the result to dest.
!*/
void multiply_conv (
bool add_to,
tensor& dest,
......
......@@ -793,6 +793,33 @@ namespace
#ifdef DLIB_USE_CUDA
void test_scale_channels()
{
tt::tensor_rand rnd;
resizable_tensor dest1(2,3,4,5), dest2;
rnd.fill_gaussian(dest1);
dest2 = dest1;
resizable_tensor src(2,3,4,5);
resizable_tensor scales(2,3);
rnd.fill_gaussian(src);
rnd.fill_gaussian(scales);
cpu::scale_channels(true, dest1, src, scales);
cuda::scale_channels(true, dest2, src, scales);
DLIB_TEST(max(abs(mat(dest1)-mat(dest2))) < 1e-6);
cpu::scale_channels(false, dest1, src, scales);
cuda::scale_channels(false, dest2, src, scales);
DLIB_TEST(max(abs(mat(dest1)-mat(dest2))) < 1e-6);
}
// ----------------------------------------------------------------------------------------
void test_affine_rect()
{
dlib::rand rnd;
......@@ -3084,6 +3111,7 @@ namespace
compare_adam();
test_copy_tensor_gpu();
test_copy_tensor_add_to_gpu();
test_scale_channels();
#endif
test_tensor_resize_bilinear(2, 3, 6,6, 11, 11);
test_tensor_resize_bilinear(2, 3, 6,6, 3, 4);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment