Commit 67170338 authored by Davis King's avatar Davis King

Added a version of tt::affine_transform() that operates on a sub-rectangle

rather than the entire tensor.
parent e0a14376
......@@ -486,6 +486,46 @@ namespace dlib
}
}
// ----------------------------------------------------------------------------------------
void affine_transform(
const rectangle& rect,
tensor& dest,
const tensor& src1,
const tensor& src2,
const tensor& src3,
float A,
float B,
float C
)
{
DLIB_CASSERT(dest.size() == src1.size());
DLIB_CASSERT(dest.size() == src2.size());
DLIB_CASSERT(dest.size() == src3.size());
DLIB_CASSERT(dest.num_samples() == src1.num_samples());
DLIB_CASSERT(dest.num_samples() == src2.num_samples());
DLIB_CASSERT(dest.num_samples() == src3.num_samples());
DLIB_CASSERT(rectangle(0,0, dest.size()/dest.num_samples()-1, dest.num_samples()-1).contains(rect));
auto d = dest.host();
auto s1 = src1.host();
auto s2 = src2.host();
auto s3 = src3.host();
const auto nc = dest.size()/dest.num_samples();
for (long r = rect.top(); r <= rect.bottom(); ++r)
{
for (long c = rect.left(); c <= rect.right(); ++c)
{
auto idx = r*nc + c;
d[idx] = s1[idx]*A + s2[idx]*B + s3[idx]*C;
}
}
}
// -----------------------------------------------------------------------------------
void compute_adam_update (
......
......@@ -7,6 +7,7 @@
// and cudnn_dlibapi.h
#include "tensor.h"
#include "../geometry/rectangle.h"
namespace dlib
{
......@@ -111,6 +112,19 @@ namespace dlib
const tensor& B
);
// -----------------------------------------------------------------------------------
void affine_transform(
const rectangle& rect,
tensor& dest,
const tensor& src1,
const tensor& src2,
const tensor& src3,
float A,
float B,
float C
);
// -----------------------------------------------------------------------------------
void compute_adam_update (
......
......@@ -575,6 +575,57 @@ namespace dlib
launch_kernel(_cuda_affine_transform1_0,max_jobs(dest.size()),dest.device(), src.device(), src.size(), A);
}
// ----------------------------------------------------------------------------------------
__global__ void _cuda_affine_transform_rect(
float* d,
const float* s1,
const float* s2,
const float* s3,
float A,
float B,
float C,
size_t start_idx,
size_t n,
size_t rect_nc,
size_t total_nc
)
{
for (auto i : grid_stride_range(0, n))
{
size_t r = i/rect_nc;
size_t c = i%rect_nc;
size_t idx = r*total_nc + c + start_idx;
d[idx] = A*s1[idx] + B*s2[idx] + C*s3[idx];
}
}
void affine_transform(
const rectangle& rect,
tensor& dest,
const tensor& src1,
const tensor& src2,
const tensor& src3,
float A,
float B,
float C
)
{
DLIB_CASSERT(dest.size() == src1.size());
DLIB_CASSERT(dest.size() == src2.size());
DLIB_CASSERT(dest.size() == src3.size());
DLIB_CASSERT(dest.num_samples() == src1.num_samples());
DLIB_CASSERT(dest.num_samples() == src2.num_samples());
DLIB_CASSERT(dest.num_samples() == src3.num_samples());
DLIB_CASSERT(rectangle(0,0, dest.size()/dest.num_samples()-1, dest.num_samples()-1).contains(rect));
launch_kernel(_cuda_affine_transform_rect,max_jobs(rect.area()),
dest.device(), src1.device(), src2.device(), src3.device(), A, B, C,
rect.left() + rect.top()*(dest.size()/dest.num_samples()),
rect.area(),
rect.width(),
dest.size()/dest.num_samples());
}
// ----------------------------------------------------------------------------------------
__global__ void _cuda_affine_transform4(float* d, const float* s1, const float* s2, size_t n, float A, float B, float C)
......
......@@ -5,6 +5,7 @@
#include "tensor.h"
#include "../geometry/rectangle.h"
namespace dlib
{
......@@ -230,6 +231,17 @@ namespace dlib
const float C
);
void affine_transform(
const rectangle& rect,
tensor& dest,
const tensor& src1,
const tensor& src2,
const tensor& src3,
float A,
float B,
float C
);
// Note that this function isn't in the tt:: namespace because add_scaled() is
// called by cuda::add() so we don't need a tt:: version of add_scaled().
void add_scaled(
......
......@@ -353,6 +353,24 @@ namespace dlib { namespace tt
#endif
}
void affine_transform(
const rectangle& rect,
tensor& dest,
const tensor& src1,
const tensor& src2,
const tensor& src3,
float A,
float B,
float C
)
{
#ifdef DLIB_USE_CUDA
cuda::affine_transform(rect, dest,src1,src2,src3,A,B,C);
#else
cpu::affine_transform(rect, dest,src1,src2,src3,A,B,C);
#endif
}
void affine_transform(
tensor& dest,
const tensor& src1,
......
......@@ -11,6 +11,7 @@
#include "cuda_dlib.h"
#include "../rand.h"
#include <memory>
#include "../geometry/rectangle.h"
namespace dlib
{
......@@ -356,6 +357,34 @@ namespace dlib { namespace tt
- #dest.host()[i] == A*src1.host()[i] + B*src2.host()[i] + C*src3.host()[i]
!*/
void affine_transform(
const rectangle& rect,
tensor& dest,
const tensor& src1,
const tensor& src2,
const tensor& src3,
float A,
float B,
float C
);
/*!
requires
- dest.size()==src1.size()
- dest.size()==src2.size()
- dest.size()==src3.size()
- dest.num_samples()==src1.num_samples()
- dest.num_samples()==src2.num_samples()
- dest.num_samples()==src3.num_samples()
- get_rect(mat(dest)).contains(rect) == true
(i.e. rect must be entirely contained within dest)
ensures
- This function operates much like
affine_transform(dest,src1,src2,src3,A,B,C,0), except that it runs over only
the sub-rectangle indicated by rect. In particular, this function is equivalent
to:
set_subm(dest,rect) = A*subm(mat(src1),rect) + B*subm(mat(src2),rect) + C*subm(mat(src3),rect)
!*/
// ----------------------------------------------------------------------------------------
void affine_transform(
......
......@@ -698,6 +698,45 @@ namespace
#ifdef DLIB_USE_CUDA
void test_affine_rect()
{
dlib::rand rnd;
for (int iter = 0; iter < 20; ++iter)
{
long nr = 1 + rnd.get_random_32bit_number()%10;
long nc = 1 + rnd.get_random_32bit_number()%10;
resizable_tensor dest1(nr,nc), dest2(nr,nc), src1(nr,nc), src2(nr,nc), src3(nr,nc);
matrix<float> dest3;
dest1 = 1;
dest2 = 1;
dest3 = mat(dest1);
src1 = 2;
src2 = 3;
src3 = 4;
point p1(rnd.get_random_32bit_number()%nc, rnd.get_random_32bit_number()%nr);
point p2(rnd.get_random_32bit_number()%nc, rnd.get_random_32bit_number()%nr);
rectangle rect(p1,p2);
cuda::affine_transform(rect, dest1, src1, src2, src3, 2,3,4);
cpu::affine_transform(rect, dest2, src1, src2, src3, 2,3,4);
DLIB_TEST(mat(dest1) == mat(dest2));
set_subm(dest3,rect) = 2*subm(mat(src1),rect) + 3*subm(mat(src2),rect) + 4*subm(mat(src3),rect);
DLIB_TEST(dest3 == mat(dest1));
dest1 = 1;
tt::affine_transform(rect, dest1, src1, src2, src3, 2,3,4);
DLIB_TEST(dest3 == mat(dest1));
}
}
void test_conv()
{
cuda::tensor_conv conv1;
......@@ -1865,6 +1904,7 @@ namespace
test_tagging();
#ifdef DLIB_USE_CUDA
test_affine_rect();
test_conv();
test_more_ops2();
test_more_ops(1,1);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment