Commit 88376980 authored by Davis King's avatar Davis King

Added an avg_pool_ layer. Also fixed some errors in the layer specs.

parent 5875fa75
......@@ -920,18 +920,18 @@ namespace dlib
// ------------------------------------------------------------------------------------
// ------------------------------------------------------------------------------------
max_pool::max_pool (
pooling::pooling (
) : handle(nullptr),window_height(0),window_width(0),stride_y(0),stride_x(0)
{
}
max_pool::~max_pool(
pooling::~pooling(
)
{
clear();
}
void max_pool::
void pooling::
clear(
)
{
......@@ -944,13 +944,38 @@ namespace dlib
stride_x = 0;
}
void max_pool::
setup(
void pooling::
setup_max_pooling(
int window_height_,
int window_width_,
int stride_y_,
int stride_x_
)
{
setup(window_height_, window_width_, stride_y_, stride_x_, CUDNN_POOLING_MAX);
do_max_pooling = true;
}
void pooling::
setup_avg_pooling(
int window_height_,
int window_width_,
int stride_y_,
int stride_x_
)
{
setup(window_height_, window_width_, stride_y_, stride_x_, CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING);
do_max_pooling = false;
}
void pooling::
setup(
int window_height_,
int window_width_,
int stride_y_,
int stride_x_,
int pooling_mode
)
{
if (window_height == window_height_ &&
window_width == window_width_ &&
......@@ -972,7 +997,7 @@ namespace dlib
handle = poolingDesc;
CHECK_CUDNN(cudnnSetPooling2dDescriptor(poolingDesc,
CUDNN_POOLING_MAX,
(cudnnPoolingMode_t)pooling_mode,
window_height,
window_width,
window_height/2,
......@@ -987,7 +1012,7 @@ namespace dlib
}
}
void max_pool::
void pooling::
operator() (
resizable_tensor& dest,
const tensor& src
......@@ -1034,7 +1059,7 @@ namespace dlib
dest.device()));
}
void max_pool::get_gradient(
void pooling::get_gradient(
const tensor& gradient_input,
const tensor& dest,
const tensor& src,
......
......@@ -326,52 +326,43 @@ namespace dlib
// ------------------------------------------------------------------------------------
class max_pool
class pooling
{
/*!
!*/
public:
max_pool(const max_pool&) = delete;
max_pool& operator=(const max_pool&) = delete;
pooling(const pooling&) = delete;
pooling& operator=(const pooling&) = delete;
max_pool (
pooling (
);
~max_pool(
~pooling(
);
void clear(
);
void setup(
void setup_max_pooling(
int window_height,
int window_width,
int stride_y,
int stride_x
);
void setup_avg_pooling(
int window_height,
int window_width,
int stride_y,
int stride_x
);
bool does_max_pooling(
) const { return do_max_pooling; }
void operator() (
resizable_tensor& dest,
const tensor& src
);
/*!
requires
- is_same_object(dest,src) == false
- src.nr() >= stride_y
- src.nc() >= stride_x
ensures
- #dest.num_samples() == src.num_samples()
- #dest.k() == src.k()
- #dest.nr() == 1+(src.nr()-window_height%2)/stride_y
- #dest.nc() == 1+(src.nc()-window_width%2)/stride_x
- for all valid s, k, r, and c:
- image_plane(#dest,s,k)(r,c) == max(subm_clipped(image_plane(src,s,k),
centered_rect(c*stride_x,
r*stride_y,
window_width,
window_height)))
!*/
void get_gradient(
const tensor& gradient_input,
......@@ -379,27 +370,23 @@ namespace dlib
const tensor& src,
tensor& grad
);
/*!
requires
- have_same_dimensions(gradient_input,dest) == true
- have_same_dimensions(src,grad) == true
- dest contains the result of calling (*this)(dest,src)
- is_same_object(grad,gradient_input) == false
- is_same_object(grad,dest) == false
- is_same_object(grad,src) == false
ensures
- Recalling that dest is the output of (*this)(dest,src),
let f(src) == dot(gradient_input,dest)
- Then this function computes the gradient of f() with respect to src
and adds it to grad.
!*/
private:
void setup(
int window_height,
int window_width,
int stride_y,
int stride_x,
int pooling_mode
);
void* handle;
int window_height;
int window_width;
int stride_y;
int stride_x;
bool do_max_pooling;
};
// ------------------------------------------------------------------------------------
......
......@@ -217,7 +217,7 @@ namespace dlib
{
// this->mp is non-copyable so we have to write our own copy to avoid trying to
// copy it and getting an error.
mp.setup(_nr, _nc, _stride_y, _stride_x);
mp.setup_max_pooling(_nr, _nc, _stride_y, _stride_x);
}
max_pool_& operator= (
......@@ -234,14 +234,14 @@ namespace dlib
_stride_y = item._stride_y;
_stride_x = item._stride_x;
mp.setup(_nr, _nc, _stride_y, _stride_x);
mp.setup_max_pooling(_nr, _nc, _stride_y, _stride_x);
return *this;
}
template <typename SUBNET>
void setup (const SUBNET& /*sub*/)
{
mp.setup(_nr, _nc, _stride_y, _stride_x);
mp.setup_max_pooling(_nr, _nc, _stride_y, _stride_x);
}
template <typename SUBNET>
......@@ -279,7 +279,7 @@ namespace dlib
deserialize(item._stride_y, in);
deserialize(item._stride_y, in);
item.mp.setup(item._nr, item._nc, item._stride_y, item._stride_x);
item.mp.setup_max_pooling(item._nr, item._nc, item._stride_y, item._stride_x);
}
private:
......@@ -289,13 +289,133 @@ namespace dlib
int _stride_y;
int _stride_x;
tt::max_pool mp;
tt::pooling mp;
resizable_tensor params;
};
template <typename SUBNET>
using max_pool = add_layer<max_pool_, SUBNET>;
// ----------------------------------------------------------------------------------------
class avg_pool_
{
public:
avg_pool_ (
) :
_nr(3),
_nc(3),
_stride_y(1),
_stride_x(1)
{}
avg_pool_(
long nr_,
long nc_,
int stride_y_ = 1,
int stride_x_ = 1
) :
_nr(nr_),
_nc(nc_),
_stride_y(stride_y_),
_stride_x(stride_x_)
{}
long nr() const { return _nr; }
long nc() const { return _nc; }
long stride_y() const { return _stride_y; }
long stride_x() const { return _stride_x; }
avg_pool_ (
const avg_pool_& item
) :
_nr(item._nr),
_nc(item._nc),
_stride_y(item._stride_y),
_stride_x(item._stride_x)
{
// this->ap is non-copyable so we have to write our own copy to avoid trying to
// copy it and getting an error.
ap.setup_avg_pooling(_nr, _nc, _stride_y, _stride_x);
}
avg_pool_& operator= (
const avg_pool_& item
)
{
if (this == &item)
return *this;
// this->ap is non-copyable so we have to write our own copy to avoid trying to
// copy it and getting an error.
_nr = item._nr;
_nc = item._nc;
_stride_y = item._stride_y;
_stride_x = item._stride_x;
ap.setup_avg_pooling(_nr, _nc, _stride_y, _stride_x);
return *this;
}
template <typename SUBNET>
void setup (const SUBNET& /*sub*/)
{
ap.setup_avg_pooling(_nr, _nc, _stride_y, _stride_x);
}
template <typename SUBNET>
void forward(const SUBNET& sub, resizable_tensor& output)
{
ap(output, sub.get_output());
}
template <typename SUBNET>
void backward(const tensor& computed_output, const tensor& gradient_input, SUBNET& sub, tensor& /*params_grad*/)
{
ap.get_gradient(gradient_input, computed_output, sub.get_output(), sub.get_gradient_input());
}
const tensor& get_layer_params() const { return params; }
tensor& get_layer_params() { return params; }
friend void serialize(const avg_pool_& item, std::ostream& out)
{
serialize("avg_pool_", out);
serialize(item._nr, out);
serialize(item._nc, out);
serialize(item._stride_y, out);
serialize(item._stride_y, out);
}
friend void deserialize(avg_pool_& item, std::istream& in)
{
std::string version;
deserialize(version, in);
if (version != "avg_pool_")
throw serialization_error("Unexpected version found while deserializing dlib::avg_pool_.");
deserialize(item._nr, in);
deserialize(item._nc, in);
deserialize(item._stride_y, in);
deserialize(item._stride_y, in);
item.ap.setup_avg_pooling(item._nr, item._nc, item._stride_y, item._stride_x);
}
private:
long _nr;
long _nc;
int _stride_y;
int _stride_x;
tt::pooling ap;
resizable_tensor params;
};
template <typename SUBNET>
using avg_pool = add_layer<avg_pool_, SUBNET>;
// ----------------------------------------------------------------------------------------
enum batch_normalization_mode
......
......@@ -664,8 +664,8 @@ namespace dlib
then OUT is defined as follows:
- OUT.num_samples() == IN.num_samples()
- OUT.k() == IN.k()
- OUT.nr() == IN.nr()/stride_y()
- OUT.nc() == IN.nc()/stride_x()
- OUT.nr() == 1+(IN.nr()-nr()%2)/stride_y()
- OUT.nc() == 1+(IN.nc()-nc()%2)/stride_x()
- for all valid s, k, r, and c:
- image_plane(OUT,s,k)(r,c) == max(subm_clipped(image_plane(IN,s,k),
r*stride_y(),
......@@ -753,6 +753,111 @@ namespace dlib
template <typename SUBNET>
using max_pool = add_layer<max_pool_, SUBNET>;
// ----------------------------------------------------------------------------------------
class avg_pool_
{
/*!
WHAT THIS OBJECT REPRESENTS
This is an implementation of the EXAMPLE_LAYER_ interface defined above.
In particular, it defines an average pooling layer that takes an input tensor
and downsamples it. It does this by sliding a window over the images in an
input tensor and outputting, for each channel, the average element within
the window.
To be precise, if we call the input tensor IN and the output tensor OUT,
then OUT is defined as follows:
- OUT.num_samples() == IN.num_samples()
- OUT.k() == IN.k()
- OUT.nr() == 1+(IN.nr()-nr()%2)/stride_y()
- OUT.nc() == 1+(IN.nc()-nc()%2)/stride_x()
- for all valid s, k, r, and c:
- image_plane(OUT,s,k)(r,c) == mean(subm_clipped(image_plane(IN,s,k),
r*stride_y(),
c*stride_x(),
nr(),
nc()))
!*/
public:
avg_pool_ (
);
/*!
ensures
- #nr() == 3
- #nc() == 3
- #stride_y() == 1
- #stride_x() == 1
!*/
avg_pool_(
long nr_,
long nc_,
int stride_y_ = 1,
int stride_x_ = 1
);
/*!
ensures
- #nr() == nr_
- #nc() == nc_
- #stride_y() == stride_y_
- #stride_x() == stride_x_
!*/
long nr(
) const;
/*!
ensures
- returns the number of rows in the pooling window.
!*/
long nc(
) const;
/*!
ensures
- returns the number of columns in the pooling window.
!*/
long stride_y(
) const;
/*!
ensures
- returns the vertical stride used when scanning the pooling window
over an image. That is, each window will be moved stride_y() pixels down
at a time when it moves over the image.
!*/
long stride_x(
) const;
/*!
ensures
- returns the horizontal stride used when scanning the pooling window
over an image. That is, each window will be moved stride_x() pixels down
at a time when it moves over the image.
!*/
template <typename SUBNET> void setup (const SUBNET& sub);
template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output);
template <typename SUBNET> void backward(const tensor& computed_output, const tensor& gradient_input, SUBNET& sub, tensor& params_grad);
const tensor& get_layer_params() const;
tensor& get_layer_params();
/*!
These functions are implemented as described in the EXAMPLE_LAYER_ interface.
Note that this layer doesn't have any parameters, so the tensor returned by
get_layer_params() is always empty.
!*/
};
void serialize(const avg_pool_& item, std::ostream& out);
void deserialize(avg_pool_& item, std::istream& in);
/*!
provides serialization support
!*/
template <typename SUBNET>
using avg_pool = add_layer<avg_pool_, SUBNET>;
// ----------------------------------------------------------------------------------------
class relu_
......
......@@ -422,13 +422,13 @@ namespace dlib { namespace tt
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
max_pool::
max_pool (
pooling::
pooling (
)
{
}
void max_pool::
void pooling::
clear(
)
{
......@@ -440,8 +440,8 @@ namespace dlib { namespace tt
#endif
}
void max_pool::
setup(
void pooling::
setup_max_pooling(
int window_height,
int window_width,
int stride_y,
......@@ -449,14 +449,42 @@ namespace dlib { namespace tt
)
{
#ifdef DLIB_USE_CUDA
impl.setup(window_height, window_width, stride_y, stride_x);
impl.setup_max_pooling(window_height, window_width, stride_y, stride_x);
#else
// TODO
DLIB_CASSERT(false,"");
#endif
}
void max_pool::
void pooling::
setup_avg_pooling(
int window_height,
int window_width,
int stride_y,
int stride_x
)
{
#ifdef DLIB_USE_CUDA
impl.setup_avg_pooling(window_height, window_width, stride_y, stride_x);
#else
// TODO
DLIB_CASSERT(false,"");
#endif
}
bool pooling::
does_max_pooling (
) const
{
#ifdef DLIB_USE_CUDA
return impl.does_max_pooling();
#else
// TODO
DLIB_CASSERT(false,"");
#endif
}
void pooling::
operator() (
resizable_tensor& dest,
const tensor& src
......@@ -470,7 +498,7 @@ namespace dlib { namespace tt
#endif
}
void max_pool::
void pooling::
get_gradient(
const tensor& gradient_input,
const tensor& dest,
......
......@@ -557,28 +557,38 @@ namespace dlib { namespace tt
// ----------------------------------------------------------------------------------------
class max_pool
class pooling
{
/*!
!*/
public:
max_pool(const max_pool&) = delete;
max_pool& operator=(const max_pool&) = delete;
pooling(const pooling&) = delete;
pooling& operator=(const pooling&) = delete;
max_pool (
pooling (
);
void clear(
);
void setup(
void setup_max_pooling(
int window_height,
int window_width,
int stride_y,
int stride_x
);
void setup_avg_pooling(
int window_height,
int window_width,
int stride_y,
int stride_x
);
bool does_max_pooling(
) const;
void operator() (
resizable_tensor& dest,
const tensor& src
......@@ -592,7 +602,14 @@ namespace dlib { namespace tt
- #dest.nr() == 1+(src.nr()-window_height%2)/stride_y
- #dest.nc() == 1+(src.nc()-window_width%2)/stride_x
- for all valid s, k, r, and c:
- image_plane(#dest,s,k)(r,c) == max(subm_clipped(image_plane(src,s,k),
- if (does_max_pooling()) then
- image_plane(#dest,s,k)(r,c) == max(subm_clipped(image_plane(src,s,k),
centered_rect(c*stride_x,
r*stride_y,
window_width,
window_height)))
- else
- image_plane(#dest,s,k)(r,c) == mean(subm_clipped(image_plane(src,s,k),
centered_rect(c*stride_x,
r*stride_y,
window_width,
......@@ -622,7 +639,7 @@ namespace dlib { namespace tt
private:
#ifdef DLIB_USE_CUDA
cuda::max_pool impl;
cuda::pooling impl;
#else
// TODO
#endif
......
......@@ -17,7 +17,6 @@ namespace
using namespace test;
using namespace dlib;
using namespace dlib::tt;
using namespace std;
logger dlog("test.dnn");
......@@ -43,6 +42,7 @@ namespace
void test_tanh()
{
using namespace dlib::tt;
print_spinner();
resizable_tensor src(5,5), dest(5,5), gradient_input(5,5);
src = matrix_cast<float>(gaussian_randm(5,5, 0));
......@@ -78,6 +78,7 @@ namespace
void test_sigmoid()
{
using namespace dlib::tt;
print_spinner();
resizable_tensor src(5,5), dest(5,5), gradient_input(5,5);
src = matrix_cast<float>(gaussian_randm(5,5, 0));
......@@ -113,6 +114,7 @@ namespace
void test_softmax()
{
using namespace dlib::tt;
print_spinner();
const long nr = 3;
const long nc = 3;
......@@ -150,6 +152,7 @@ namespace
void test_batch_normalize()
{
using namespace dlib::tt;
print_spinner();
resizable_tensor src(5,5), gamma(1,5), beta(1,5), dest, dest2, means, vars, gradient_input(5,5);
src = matrix_cast<float>(gaussian_randm(5,5, 0));
......@@ -229,6 +232,7 @@ namespace
void test_batch_normalize_conv()
{
using namespace dlib::tt;
print_spinner();
resizable_tensor src(5,5,4,4), gamma(1,5), beta(1,5), dest, dest2, means, vars, gradient_input(5,5,4,4);
src = matrix_cast<float>(gaussian_randm(5,5*4*4, 0));
......@@ -313,6 +317,7 @@ namespace
void test_basic_tensor_ops()
{
using namespace dlib::tt;
print_spinner();
resizable_tensor dest, src(3,4), A(1,4), B(1,4);
src = 2;
......@@ -490,6 +495,7 @@ namespace
#ifdef DLIB_USE_CUDA
void test_more_ops(const long nr, const long nc)
{
using namespace dlib::tt;
print_spinner();
// We are going to make sure that the CPU implementation of these things matches
// the CUDA implementation.
......@@ -726,12 +732,12 @@ namespace
rnd.fill_gaussian(gradient_input,0,1);
tt::max_pool mp;
tt::pooling mp;
mp.setup(window_height,window_width,stride_y,stride_x);
mp.setup_max_pooling(window_height,window_width,stride_y,stride_x);
mp(A, B);
// make sure max_pool does what it's spec says it should.
// make sure max pooling does what it's spec says it should.
DLIB_TEST( A.num_samples() == B.num_samples());
DLIB_TEST( A.k() == B.k());
DLIB_TEST( A.nr() == 1+(B.nr()-window_height%2)/stride_y);
......@@ -755,6 +761,58 @@ namespace
}
}
// ----------------------------------------------------------------------------------------
void test_avg_pool(
const int window_height,
const int window_width,
const int stride_y,
const int stride_x
)
{
print_spinner();
resizable_tensor A, B, gradient_input;
A.set_size(2,2,16,7);
B.copy_size(A);
gradient_input.copy_size(A);
tt::tensor_rand rnd;
rnd.fill_gaussian(A,0,1);
rnd.fill_gaussian(B,0,1);
rnd.fill_gaussian(gradient_input,0,1);
tt::pooling mp;
mp.setup_avg_pooling(window_height,window_width,stride_y,stride_x);
mp(A, B);
// make sure avg pooling does what it's spec says it should.
DLIB_TEST( A.num_samples() == B.num_samples());
DLIB_TEST( A.k() == B.k());
DLIB_TEST( A.nr() == 1+(B.nr()-window_height%2)/stride_y);
DLIB_TEST( A.nc() == 1+(B.nc()-window_width%2)/stride_x);
for (long s = 0; s < A.num_samples(); ++s)
{
for (long k = 0; k < A.k(); ++k)
{
for (long r = 0; r < A.nr(); ++r)
{
for (long c = 0; c < A.nc(); ++c)
{
float expected = mean(subm_clipped(image_plane(B,s,k),
centered_rect(c*stride_x,
r*stride_y,
window_width,
window_height)));
float err = abs(image_plane(A,s,k)(r,c) - expected);
DLIB_TEST_MSG(err < 1e-5, err << " " << expected << " " << image_plane(A,s,k)(r,c));
}
}
}
}
}
// ----------------------------------------------------------------------------------------
void test_layers()
......@@ -764,6 +822,11 @@ namespace
max_pool_ l;
DLIB_TEST_MSG(test_layer(l), test_layer(l));
}
{
print_spinner();
avg_pool_ l;
DLIB_TEST_MSG(test_layer(l), test_layer(l));
}
{
print_spinner();
affine_ l;
......@@ -826,6 +889,40 @@ namespace
}
}
// ----------------------------------------------------------------------------------------
template <typename T> using rcon = max_pool<relu<bn<con<T>>>>;
std::tuple<max_pool_,relu_,bn_,con_> rcon_ (unsigned long n)
{
return std::make_tuple(max_pool_(2,2,2,2),relu_(),bn_(BATCH_NORM_CONV),con_(n,5,5));
}
template <typename T> using rfc = relu<bn<fc<T>>>;
std::tuple<relu_,bn_,fc_> rfc_ (unsigned long n)
{
return std::make_tuple(relu_(),bn_(),fc_(n));
}
void test_tagging(
)
{
typedef loss_multiclass_log<rfc<skip1<rfc<rfc<tag1<rcon<rcon<input<matrix<unsigned char>>>>>>>>>> net_type;
net_type net(rfc_(10),
rfc_(84),
rfc_(120),
rcon_(16),
rcon_(6)
);
DLIB_TEST(layer<tag1>(net).num_layers == 9);
DLIB_TEST(layer<skip1>(net).num_layers == 9+3+3+1);
DLIB_TEST(&layer<skip1>(net).get_output() == &layer<tag1>(net).get_output());
DLIB_TEST(&layer<skip1>(net).get_output() != &layer<tag1>(net).subnet().subnet().get_output());
}
// ----------------------------------------------------------------------------------------
class dnn_tester : public tester
{
public:
......@@ -838,6 +935,7 @@ namespace
void perform_test (
)
{
test_tagging();
#ifdef DLIB_USE_CUDA
test_more_ops(1,1);
test_more_ops(3,4);
......@@ -853,6 +951,11 @@ namespace
test_max_pool(3,3,2,2);
test_max_pool(2,2,2,2);
test_max_pool(4,5,3,1);
test_avg_pool(1,1,2,3);
test_avg_pool(3,3,1,1);
test_avg_pool(3,3,2,2);
test_avg_pool(2,2,2,2);
test_avg_pool(4,5,3,1);
test_tanh();
test_softmax();
test_sigmoid();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment