Commit 73d78355 authored by Davis King's avatar Davis King

Added a padding parameter to the pooling and convolution classes. Still need

to expose it in the final layer interface.
parent 08c87784
......@@ -1245,7 +1245,7 @@ namespace dlib
// ------------------------------------------------------------------------------------
pooling::pooling (
) : window_height(0),window_width(0),stride_y(0),stride_x(0),do_max_pooling(true)
) : window_height(0),window_width(0),stride_y(0),stride_x(0),padding_y(0),padding_x(0),do_max_pooling(true)
{
}
......@@ -1257,6 +1257,8 @@ namespace dlib
window_width = 0;
stride_y = 0;
stride_x = 0;
padding_y = 0;
padding_x = 0;
}
void pooling::
......@@ -1264,18 +1266,24 @@ namespace dlib
int window_height_,
int window_width_,
int stride_y_,
int stride_x_
int stride_x_,
int padding_y_,
int padding_x_
)
{
DLIB_CASSERT(window_width_ > 0,"");
DLIB_CASSERT(window_height_ > 0,"");
DLIB_CASSERT(stride_y_ > 0,"");
DLIB_CASSERT(stride_x_ > 0,"");
DLIB_CASSERT(0 <= padding_y_ && padding_y_ < window_height_,"");
DLIB_CASSERT(0 <= padding_x_ && padding_x_ < window_width_, "");
window_height = window_height_;
window_width = window_width_;
stride_y = stride_y_;
stride_x = stride_x_;
padding_y = padding_y_;
padding_x = padding_x_;
do_max_pooling = true;
}
......@@ -1284,18 +1292,24 @@ namespace dlib
int window_height_,
int window_width_,
int stride_y_,
int stride_x_
int stride_x_,
int padding_y_,
int padding_x_
)
{
DLIB_CASSERT(window_width_ > 0,"");
DLIB_CASSERT(window_height_ > 0,"");
DLIB_CASSERT(stride_y_ > 0,"");
DLIB_CASSERT(stride_x_ > 0,"");
DLIB_CASSERT(0 <= padding_y_ && padding_y_ < window_height_,"");
DLIB_CASSERT(0 <= padding_x_ && padding_x_ < window_width_, "");
window_height = window_height_;
window_width = window_width_;
stride_y = stride_y_;
stride_x = stride_x_;
padding_y = padding_y_;
padding_x = padding_x_;
do_max_pooling = false;
}
......@@ -1309,12 +1323,18 @@ namespace dlib
DLIB_CASSERT(window_height > 0,"");
DLIB_CASSERT(stride_y > 0,"");
DLIB_CASSERT(stride_x > 0,"");
DLIB_CASSERT(0 <= padding_y && padding_y < window_height,"");
DLIB_CASSERT(0 <= padding_x && padding_x < window_width, "");
DLIB_CASSERT(window_width <= src.nc() + 2*padding_x,
"Pooling windows must be small enough to fit into the padded image.");
DLIB_CASSERT(window_height <= src.nr() + 2*padding_y,
"Pooling windows must be small enough to fit into the padded image.");
dest.set_size(
src.num_samples(),
src.k(),
1+(src.nr()-window_height%2)/stride_y,
1+(src.nc()-window_width%2)/stride_x
1+(src.nr()+2*padding_y-window_height)/stride_y,
1+(src.nc()+2*padding_x-window_width)/stride_x
);
if (src.size() == 0)
......@@ -1326,6 +1346,8 @@ namespace dlib
auto d = dest.host();
auto s = src.host();
const long x_offset = window_width/2 - padding_x;
const long y_offset = window_height/2 - padding_y;
if (does_max_pooling())
{
for (long n = 0; n < dest.num_samples(); ++n)
......@@ -1339,8 +1361,8 @@ namespace dlib
{
for (long c = 0; c < dest.nc(); ++c)
{
auto win = centered_rect(c*stride_x,
r*stride_y,
auto win = centered_rect(c*stride_x+x_offset,
r*stride_y+y_offset,
window_width,
window_height);
dimg[r*dest.nc() + c] = max(subm_clipped(simg,win));
......@@ -1362,8 +1384,8 @@ namespace dlib
{
for (long c = 0; c < dest.nc(); ++c)
{
auto win = centered_rect(c*stride_x,
r*stride_y,
auto win = centered_rect(c*stride_x+x_offset,
r*stride_y+y_offset,
window_width,
window_height);
dimg[r*dest.nc() + c] = mean(subm_clipped(simg,win));
......@@ -1395,6 +1417,8 @@ namespace dlib
auto gi = gradient_input.host();
auto g = grad.host();
auto s = src.host();
const long x_offset = window_width/2 - padding_x;
const long y_offset = window_height/2 - padding_y;
if (does_max_pooling())
{
for (long n = 0; n < dest.num_samples(); ++n)
......@@ -1410,8 +1434,8 @@ namespace dlib
{
for (long c = 0; c < dest.nc(); ++c)
{
auto win = centered_rect(c*stride_x,
r*stride_y,
auto win = centered_rect(c*stride_x+x_offset,
r*stride_y+y_offset,
window_width,
window_height).intersect(imgbox);
auto p = max_point(subm(simg,win))+win.tl_corner();
......@@ -1436,8 +1460,8 @@ namespace dlib
{
for (long c = 0; c < dest.nc(); ++c)
{
auto win = centered_rect(c*stride_x,
r*stride_y,
auto win = centered_rect(c*stride_x+x_offset,
r*stride_y+y_offset,
window_width,
window_height).intersect(imgbox);
const float delta = giimg[r*dest.nc()+c]/win.area();
......@@ -1467,14 +1491,16 @@ namespace dlib
long filter_nr,
long filter_nc,
long stride_y,
long stride_x
long stride_x,
long padding_y,
long padding_x
)
{
const auto d = data.host() + data.k()*data.nr()*data.nc()*n;
const rectangle boundary = get_rect(data);
const long out_nr = 1+(data.nr()-filter_nr%2)/stride_y;
const long out_nc = 1+(data.nc()-filter_nc%2)/stride_x;
const long out_nr = 1+(data.nr()+2*padding_y-filter_nr)/stride_y;
const long out_nc = 1+(data.nc()+2*padding_x-filter_nc)/stride_x;
output.set_size(out_nr*out_nc,
data.k()*filter_nr*filter_nc);
......@@ -1483,9 +1509,9 @@ namespace dlib
// now fill in the Toeplitz output matrix for the n-th sample in data.
size_t cnt = 0;
for (long r = -(1-filter_nr%2); r < data.nr(); r+=stride_y)
for (long r = filter_nr-1-padding_y; r-padding_y < data.nr(); r+=stride_y)
{
for (long c = -(1-filter_nc%2); c < data.nc(); c+=stride_x)
for (long c = filter_nc-1-padding_x; c-padding_x < data.nc(); c+=stride_x)
{
for (long k = 0; k < data.k(); ++k)
{
......@@ -1493,9 +1519,9 @@ namespace dlib
{
for (long x = 0; x < filter_nc; ++x)
{
DLIB_CASSERT(cnt < output.size(),"");
long xx = c-x+filter_nc/2;
long yy = r-y+filter_nr/2;
DLIB_ASSERT(cnt < output.size(),"");
long xx = c-x;
long yy = r-y;
if (boundary.contains(xx,yy))
*t = d[(k*data.nr() + yy)*data.nc() + xx];
else
......@@ -1516,7 +1542,9 @@ namespace dlib
long filter_nr,
long filter_nc,
long stride_y,
long stride_x
long stride_x,
long padding_y,
long padding_x
)
{
const auto d = data.host() + data.k()*data.nr()*data.nc()*n;
......@@ -1526,9 +1554,9 @@ namespace dlib
const float* t = &output(0,0);
// now fill in the Toeplitz output matrix for the n-th sample in data.
for (long r = -(1-filter_nr%2); r < data.nr(); r+=stride_y)
for (long r = filter_nr-1-padding_y; r-padding_y < data.nr(); r+=stride_y)
{
for (long c = -(1-filter_nc%2); c < data.nc(); c+=stride_x)
for (long c = filter_nc-1-padding_x; c-padding_x < data.nc(); c+=stride_x)
{
for (long k = 0; k < data.k(); ++k)
{
......@@ -1536,8 +1564,8 @@ namespace dlib
{
for (long x = 0; x < filter_nc; ++x)
{
long xx = c-x+filter_nc/2;
long yy = r-y+filter_nr/2;
long xx = c-x;
long yy = r-y;
if (boundary.contains(xx,yy))
d[(k*data.nr() + yy)*data.nc() + xx] += *t;
++t;
......@@ -1553,28 +1581,38 @@ namespace dlib
const tensor& data,
const tensor& filters,
int stride_y,
int stride_x
int stride_x,
int padding_y,
int padding_x
)
{
DLIB_CASSERT(is_same_object(output,data) == false,"");
DLIB_CASSERT(is_same_object(output,filters) == false,"");
DLIB_CASSERT(filters.k() == data.k(),"");
DLIB_CASSERT(stride_y > 0 && stride_x > 0,"");
DLIB_CASSERT(0 <= padding_y && padding_y < filters.nr(),"");
DLIB_CASSERT(0 <= padding_x && padding_x < filters.nc(),"");
DLIB_CASSERT(filters.nr() <= data.nr() + 2*padding_y,
"Filter windows must be small enough to fit into the padded image.");
DLIB_CASSERT(filters.nc() <= data.nc() + 2*padding_x,
"Filter windows must be small enough to fit into the padded image.");
output.set_size(data.num_samples(),
filters.num_samples(),
1+(data.nr()-filters.nr()%2)/stride_y,
1+(data.nc()-filters.nc()%2)/stride_x);
1+(data.nr()+2*padding_y-filters.nr())/stride_y,
1+(data.nc()+2*padding_x-filters.nc())/stride_x);
matrix<float> temp;
for (long n = 0; n < data.num_samples(); ++n)
{
img2col(temp, data, n, filters.nr(), filters.nc(), stride_y, stride_x);
img2col(temp, data, n, filters.nr(), filters.nc(), stride_y, stride_x, padding_y, padding_x);
output.set_sample(n, mat(filters)*trans(temp));
}
last_stride_y = stride_y;
last_stride_x = stride_x;
last_padding_y = padding_y;
last_padding_x = padding_x;
}
// ------------------------------------------------------------------------------------
......@@ -1595,7 +1633,7 @@ namespace dlib
temp = trans(gi)*mat(filters);
col2img(temp, data_gradient, n, filters.nr(), filters.nc(), last_stride_y, last_stride_x);
col2img(temp, data_gradient, n, filters.nr(), filters.nc(), last_stride_y, last_stride_x, last_padding_y, last_padding_x);
}
}
......@@ -1616,7 +1654,7 @@ namespace dlib
gradient_input.nr()*gradient_input.nc());
img2col(temp, data, n, filters_gradient.nr(), filters_gradient.nc(), last_stride_y, last_stride_x);
img2col(temp, data, n, filters_gradient.nr(), filters_gradient.nc(), last_stride_y, last_stride_x, last_padding_y, last_padding_x);
if (n == 0)
filters_gradient = gi*temp;
else
......
......@@ -279,14 +279,18 @@ namespace dlib
int window_height,
int window_width,
int stride_y,
int stride_x
int stride_x,
int padding_y,
int padding_x
);
void setup_avg_pooling(
int window_height,
int window_width,
int stride_y,
int stride_x
int stride_x,
int padding_y,
int padding_x
);
bool does_max_pooling(
......@@ -309,6 +313,8 @@ namespace dlib
int window_width;
int stride_y;
int stride_x;
int padding_y;
int padding_x;
bool do_max_pooling;
};
......@@ -331,7 +337,9 @@ namespace dlib
const tensor& data,
const tensor& filters,
int stride_y,
int stride_x
int stride_x,
int padding_y,
int padding_x
);
void get_gradient_for_data (
......@@ -350,6 +358,8 @@ namespace dlib
long last_stride_y;
long last_stride_x;
long last_padding_y;
long last_padding_x;
};
// -----------------------------------------------------------------------------------
......
......@@ -737,6 +737,8 @@ namespace dlib
stride_y = 0;
stride_x = 0;
padding_y = 0;
padding_x = 0;
data_num_samples = 0;
data_k = 0;
data_nr = 0;
......@@ -752,7 +754,9 @@ namespace dlib
const tensor& data,
const tensor& filters,
int stride_y_,
int stride_x_
int stride_x_,
int padding_y_,
int padding_x_
)
{
DLIB_CASSERT(data.k() == filters.k(),"");
......@@ -761,6 +765,8 @@ namespace dlib
// anything.
if (stride_y_ == stride_y &&
stride_x_ == stride_x &&
padding_y_ == padding_y &&
padding_x_ == padding_x &&
data_num_samples == data.num_samples() &&
data_k == data.k() &&
data_nr == data.nr() &&
......@@ -778,6 +784,8 @@ namespace dlib
{
stride_y = stride_y_;
stride_x = stride_x_;
padding_y = padding_y_;
padding_x = padding_x_;
data_num_samples = data.num_samples();
data_k = data.k();
data_nr = data.nr();
......@@ -798,8 +806,8 @@ namespace dlib
CHECK_CUDNN(cudnnCreateConvolutionDescriptor((cudnnConvolutionDescriptor_t*)&conv_handle));
CHECK_CUDNN(cudnnSetConvolution2dDescriptor((cudnnConvolutionDescriptor_t)conv_handle,
filters.nr()/2, // vertical padding
filters.nc()/2, // horizontal padding
padding_y, // vertical padding
padding_x, // horizontal padding
stride_y,
stride_x,
1, 1, // must be 1,1
......@@ -907,22 +915,31 @@ namespace dlib
const tensor& data,
const tensor& filters,
int stride_y,
int stride_x
int stride_x,
int padding_y,
int padding_x
)
{
DLIB_CASSERT(is_same_object(output,data) == false,"");
DLIB_CASSERT(is_same_object(output,filters) == false,"");
DLIB_CASSERT(filters.k() == data.k(),"");
DLIB_CASSERT(stride_y > 0 && stride_x > 0,"");
DLIB_CASSERT(filters.nr() <= data.nr() + 2*padding_y,
"Filter windows must be small enough to fit into the padded image.");
DLIB_CASSERT(filters.nc() <= data.nc() + 2*padding_x,
"Filter windows must be small enough to fit into the padded image.");
setup(data,filters,stride_y,stride_x);
setup(data,filters,stride_y,stride_x,padding_y,padding_x);
output.set_size(out_num_samples, out_k, out_nr, out_nc);
DLIB_ASSERT(output.num_samples() == data.num_samples(),out_num_samples << " " << data.num_samples());
DLIB_ASSERT(output.k() == filters.num_samples(),"");
DLIB_ASSERT(output.nr() == 1+(data.nr()-filters.nr()%2)/stride_y,"");
DLIB_ASSERT(output.nc() == 1+(data.nc()-filters.nc()%2)/stride_x,output.nc() << " " <<1+(data.nc()-1)/stride_x << " : " << data.nc() << " " << stride_x);
DLIB_ASSERT(output.nr() == 1+(data.nr()+2*padding_y-filters.nr())/stride_y,"");
DLIB_ASSERT(output.nc() == 1+(data.nc()+2*padding_x-filters.nc())/stride_x,"");
const float alpha = 1;
const float beta = 0;
......@@ -995,7 +1012,7 @@ namespace dlib
// ------------------------------------------------------------------------------------
pooling::pooling (
) : handle(nullptr),window_height(0),window_width(0),stride_y(0),stride_x(0)
) : handle(nullptr),window_height(0),window_width(0),stride_y(0),stride_x(0),padding_y(0), padding_x(0)
{
}
......@@ -1016,6 +1033,8 @@ namespace dlib
window_width = 0;
stride_y = 0;
stride_x = 0;
padding_y = 0;
padding_x = 0;
}
void pooling::
......@@ -1023,10 +1042,12 @@ namespace dlib
int window_height_,
int window_width_,
int stride_y_,
int stride_x_
int stride_x_,
int padding_y_,
int padding_x_
)
{
setup(window_height_, window_width_, stride_y_, stride_x_, CUDNN_POOLING_MAX);
setup(window_height_, window_width_, stride_y_, stride_x_, padding_y_, padding_x_, CUDNN_POOLING_MAX);
do_max_pooling = true;
}
......@@ -1035,10 +1056,12 @@ namespace dlib
int window_height_,
int window_width_,
int stride_y_,
int stride_x_
int stride_x_,
int padding_y_,
int padding_x_
)
{
setup(window_height_, window_width_, stride_y_, stride_x_, CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING);
setup(window_height_, window_width_, stride_y_, stride_x_, padding_y_, padding_x_, CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING);
do_max_pooling = false;
}
......@@ -1048,13 +1071,31 @@ namespace dlib
int window_width_,
int stride_y_,
int stride_x_,
int padding_y_,
int padding_x_,
int pooling_mode
)
{
DLIB_CASSERT (window_height_ > 0 && window_width_ > 0 &&
stride_y_ > 0 && stride_x_ > 0 ,
"window_height_: " << window_height_
<< "\t\n window_width_: " << window_width_
<< "\t\n stride_y_: " << stride_y_
<< "\t\n stride_x_: " << stride_x_ );
DLIB_CASSERT( 0 <= padding_y_ && padding_y_ < window_height_ &&
0 <= padding_x_ && padding_x_ < window_width_,
"window_height_: " << window_height_
<< "\t\n window_width_: " << window_width_
<< "\t\n padding_y_: " << padding_y_
<< "\t\n padding_x_: " << padding_x_ );
if (window_height == window_height_ &&
window_width == window_width_ &&
stride_y == stride_y_ &&
stride_x == stride_x_ )
stride_x == stride_x_ &&
padding_y == padding_y_ &&
padding_x == padding_x_
)
{
return;
}
......@@ -1066,6 +1107,8 @@ namespace dlib
window_width = window_width_;
stride_x = stride_x_;
stride_y = stride_y_;
padding_y = padding_y_;
padding_x = padding_x_;
cudnnPoolingDescriptor_t poolingDesc;
CHECK_CUDNN(cudnnCreatePoolingDescriptor(&poolingDesc));
handle = poolingDesc;
......@@ -1075,8 +1118,8 @@ namespace dlib
CUDNN_PROPAGATE_NAN,
window_height,
window_width,
window_height/2,
window_width/2,
padding_y,
padding_x,
stride_y,
stride_x));
}
......@@ -1093,6 +1136,10 @@ namespace dlib
const tensor& src
)
{
DLIB_CASSERT(window_width <= src.nc() + 2*padding_x,
"Pooling windows must be small enough to fit into the padded image.");
DLIB_CASSERT(window_height <= src.nr() + 2*padding_y,
"Pooling windows must be small enough to fit into the padded image.");
const float alpha = 1;
const float beta = 0;
int outN;
......@@ -1111,14 +1158,16 @@ namespace dlib
DLIB_CASSERT(dest.num_samples() == src.num_samples(),"");
DLIB_CASSERT(dest.k() == src.k(),"");
DLIB_CASSERT(dest.nr() == 1+(src.nr()-window_height%2)/stride_y,
"\n stride_y: " << stride_y <<
DLIB_CASSERT(dest.nr() == 1 + (src.nr() + 2*padding_y - window_height)/stride_y,
"\n stride_y: " << stride_y <<
"\n padding_y: " << padding_y <<
"\n window_height: " << window_height <<
"\n src.nr(): " << src.nr() <<
"\n dest.nr(): " << dest.nr() <<
"\n src.nr()/stride_y: " << src.nr()/stride_y);
DLIB_CASSERT(dest.nc() == 1+(src.nc()-window_width%2)/stride_x,
"\n stride_x: " << stride_x <<
DLIB_CASSERT(dest.nc() == 1 + (src.nc() + 2*padding_x - window_width)/stride_x,
"\n stride_x: " << stride_x <<
"\n padding_x: " << padding_x <<
"\n window_width: " << window_width <<
"\n src.nc(): " << src.nc() <<
"\n dest.nc(): " << dest.nc() <<
......
......@@ -221,12 +221,16 @@ namespace dlib
const tensor& data,
const tensor& filters,
int stride_y,
int stride_x
int stride_x,
int padding_y,
int padding_x
);
/*!
requires
- stride_y > 0
- stride_x > 0
- 0 <= padding_y < filters.nr()
- 0 <= padding_x < filters.nc()
- is_same_object(output,data) == false
- is_same_object(output,filters) == false
ensures
......@@ -286,18 +290,24 @@ namespace dlib
const tensor& data,
const tensor& filters,
int stride_y,
int stride_x
int stride_x,
int padding_y,
int padding_x
);
/*!
requires
- filters.k() == data.k()
- stride_y > 0
- stride_x > 0
- 0 <= padding_y < filters.nr()
- 0 <= padding_x < filters.nc()
!*/
// These variables record the type of data given to the last call to setup().
int stride_y;
int stride_x;
int padding_y;
int padding_x;
long data_num_samples, data_k, data_nr, data_nc;
long filters_num_samples, filters_k, filters_nr, filters_nc;
......@@ -346,14 +356,18 @@ namespace dlib
int window_height,
int window_width,
int stride_y,
int stride_x
int stride_x,
int padding_y,
int padding_x
);
void setup_avg_pooling(
int window_height,
int window_width,
int stride_y,
int stride_x
int stride_x,
int padding_y,
int padding_x
);
bool does_max_pooling(
......@@ -378,6 +392,8 @@ namespace dlib
int window_width,
int stride_y,
int stride_x,
int padding_y,
int padding_x,
int pooling_mode
);
......@@ -386,6 +402,8 @@ namespace dlib
int window_width;
int stride_y;
int stride_x;
int padding_y;
int padding_x;
bool do_max_pooling;
};
......
......@@ -97,7 +97,10 @@ namespace dlib
sub.get_output(),
filters(params,0),
_stride_y,
_stride_x);
_stride_x,
_nr/2,
_nc/2
);
tt::add(1,output,1,biases(params,filters.size()));
}
......@@ -221,7 +224,7 @@ namespace dlib
{
// this->mp is non-copyable so we have to write our own copy to avoid trying to
// copy it and getting an error.
mp.setup_max_pooling(_nr, _nc, _stride_y, _stride_x);
mp.setup_max_pooling(_nr, _nc, _stride_y, _stride_x, _nr/2, _nc/2);
}
max_pool_& operator= (
......@@ -233,14 +236,14 @@ namespace dlib
// this->mp is non-copyable so we have to write our own copy to avoid trying to
// copy it and getting an error.
mp.setup_max_pooling(_nr, _nc, _stride_y, _stride_x);
mp.setup_max_pooling(_nr, _nc, _stride_y, _stride_x, _nr/2, _nc/2);
return *this;
}
template <typename SUBNET>
void setup (const SUBNET& /*sub*/)
{
mp.setup_max_pooling(_nr, _nc, _stride_y, _stride_x);
mp.setup_max_pooling(_nr, _nc, _stride_y, _stride_x, _nr/2, _nc/2);
}
template <typename SUBNET>
......@@ -274,7 +277,7 @@ namespace dlib
if (version != "max_pool_")
throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::max_pool_.");
item.mp.setup_max_pooling(_nr, _nc, _stride_y, _stride_x);
item.mp.setup_max_pooling(_nr, _nc, _stride_y, _stride_x, _nr/2, _nc/2);
long nr;
long nc;
......@@ -349,7 +352,7 @@ namespace dlib
{
// this->ap is non-copyable so we have to write our own copy to avoid trying to
// copy it and getting an error.
ap.setup_avg_pooling(_nr, _nc, _stride_y, _stride_x);
ap.setup_avg_pooling(_nr, _nc, _stride_y, _stride_x, _nr/2, _nc/2);
}
avg_pool_& operator= (
......@@ -361,14 +364,14 @@ namespace dlib
// this->ap is non-copyable so we have to write our own copy to avoid trying to
// copy it and getting an error.
ap.setup_avg_pooling(_nr, _nc, _stride_y, _stride_x);
ap.setup_avg_pooling(_nr, _nc, _stride_y, _stride_x, _nr/2, _nc/2);
return *this;
}
template <typename SUBNET>
void setup (const SUBNET& /*sub*/)
{
ap.setup_avg_pooling(_nr, _nc, _stride_y, _stride_x);
ap.setup_avg_pooling(_nr, _nc, _stride_y, _stride_x, _nr/2, _nc/2);
}
template <typename SUBNET>
......@@ -402,7 +405,7 @@ namespace dlib
if (version != "avg_pool_")
throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::avg_pool_.");
item.ap.setup_avg_pooling(_nr, _nc, _stride_y, _stride_x);
item.ap.setup_avg_pooling(_nr, _nc, _stride_y, _stride_x, _nr/2, _nc/2);
long nr;
long nc;
......
......@@ -639,22 +639,28 @@ namespace dlib { namespace tt
const tensor& data,
const tensor& filters,
int stride_y,
int stride_x
) { impl(output,data,filters,stride_y,stride_x); }
int stride_x,
int padding_y,
int padding_x
) { impl(output,data,filters,stride_y,stride_x,padding_y,padding_x); }
/*!
requires
- stride_y > 0
- stride_x > 0
- 0 <= padding_y < filters.nr()
- 0 <= padding_x < filters.nc()
- is_same_object(output,data) == false
- is_same_object(output,filters) == false
- filters.k() == data.k()
- filters.nr() <= src.nr() + 2*padding_y
- filters.nc() <= src.nc() + 2*padding_x
ensures
- convolves filters over data.
- filters contains filters.num_samples() filters.
- #output.num_samples() == data.num_samples()
- #output.k() == filters.num_samples()
- #output.nr() == 1+(data.nr()-filters.nr()%2)/stride_y
- #output.nc() == 1+(data.nc()-filters.nc()%2)/stride_x
- #output.nr() == 1+(data.nr() + 2*padding_y - filters.nr())/stride_y
- #output.nc() == 1+(data.nc() + 2*padding_x - filters.nc())/stride_x
!*/
void get_gradient_for_data (
......@@ -732,14 +738,18 @@ namespace dlib { namespace tt
int window_height,
int window_width,
int stride_y,
int stride_x
) { impl.setup_max_pooling(window_height, window_width, stride_y, stride_x); }
int stride_x,
int padding_y,
int padding_x
) { impl.setup_max_pooling(window_height, window_width, stride_y, stride_x, padding_y, padding_x); }
/*!
requires
- window_height > 0
- window_width > 0
- stride_y > 0
- stride_x > 0
- 0 <= padding_y < window_height
- 0 <= padding_x < window_width
ensures
- When you call operator() it will do max pooling with the given
parameters.
......@@ -749,14 +759,18 @@ namespace dlib { namespace tt
int window_height,
int window_width,
int stride_y,
int stride_x
) { impl.setup_avg_pooling(window_height, window_width, stride_y, stride_x); }
int stride_x,
int padding_y,
int padding_x
) { impl.setup_avg_pooling(window_height, window_width, stride_y, stride_x, padding_y, padding_x); }
/*!
requires
- window_height > 0
- window_width > 0
- stride_y > 0
- stride_x > 0
- 0 <= padding_y < window_height
- 0 <= padding_x < window_width
ensures
- When you call operator() it will do average pooling with the given
parameters.
......@@ -773,24 +787,22 @@ namespace dlib { namespace tt
requires
- is_same_object(dest,src) == false
- either setup_max_pooling() or setup_avg_pooling() has been called.
- window_width <= src.nc() + 2*padding_x
- window_height <= src.nr() + 2*padding_y
ensures
- #dest.num_samples() == src.num_samples()
- #dest.k() == src.k()
- #dest.nr() == 1+(src.nr()-window_height%2)/stride_y
- #dest.nc() == 1+(src.nc()-window_width%2)/stride_x
- #dest.nr() == 1 + (src.nr() + 2*padding_y - window_height)/stride_y
- #dest.nc() == 1 + (src.nc() + 2*padding_x - window_width)/stride_x
- WINDOW == centered_rect(x*stride + window_width/2 - padding_x,
y*stride + window_height/2 - padding_y,
window_width,
window_height)
- for all valid s, k, r, and c:
- if (does_max_pooling()) then
- image_plane(#dest,s,k)(r,c) == max(subm_clipped(image_plane(src,s,k),
centered_rect(c*stride_x,
r*stride_y,
window_width,
window_height)))
- image_plane(#dest,s,k)(r,c) == max(subm_clipped(image_plane(src,s,k),WINDOW(c,r)))
- else
- image_plane(#dest,s,k)(r,c) == mean(subm_clipped(image_plane(src,s,k),
centered_rect(c*stride_x,
r*stride_y,
window_width,
window_height)))
- image_plane(#dest,s,k)(r,c) == mean(subm_clipped(image_plane(src,s,k),WINDOW(c,r)))
!*/
void get_gradient(
......
......@@ -583,12 +583,19 @@ namespace
const int stride_y = prnd.get_random_32bit_number()%5+1;
const int stride_x = prnd.get_random_32bit_number()%5+1;
conv1(output1, data, filters, stride_y,stride_x);
conv2(output2, data, filters, stride_y,stride_x);
int padding_y = prnd.get_random_32bit_number()%(filters.nr()/2+1);
int padding_x = prnd.get_random_32bit_number()%(filters.nc()/2+1);
if (!(filters.nr() <= data.nr() + 2*padding_y))
padding_y = (filters.nr()-data.nr()+1)/2;
if (!(filters.nc() <= data.nc() + 2*padding_x))
padding_x = (filters.nc()-data.nc()+1)/2;
conv1(output1, data, filters, stride_y,stride_x, padding_y, padding_x);
conv2(output2, data, filters, stride_y,stride_x, padding_y, padding_x);
dlog << LINFO << "forward error: "<< max(abs(mat(output1)-mat(output2)));
DLIB_TEST(max(abs(mat(output1)-mat(output2))) < 1e-3);
DLIB_TEST_MSG(max(abs(mat(output1)-mat(output2))) < 1e-3, max(abs(mat(output1)-mat(output2)))
<<"\n\t padding_y: "<< padding_y
<<"\n\t padding_x: "<< padding_x
);
......@@ -621,7 +628,7 @@ namespace
conv2.get_gradient_for_filters(gi, data, filter_gradient2);
dlog << LINFO << "filter gradient error: "<< max(abs(mat(filter_gradient1)-mat(filter_gradient2)));
DLIB_TEST(max(abs(mat(filter_gradient1)-mat(filter_gradient2))) < 1e-3);
DLIB_TEST_MSG(max(abs(mat(filter_gradient1)-mat(filter_gradient2))) < 1e-3, max(abs(mat(filter_gradient1)-mat(filter_gradient2))));
}
}
......@@ -1026,12 +1033,14 @@ namespace
const int window_height,
const int window_width,
const int stride_y,
const int stride_x
const int stride_x,
const int padding_y,
const int padding_x
)
{
print_spinner();
resizable_tensor A, B, gradient_input;
A.set_size(2,2,16,7);
A.set_size(4,5,16,7);
B.copy_size(A);
gradient_input.copy_size(A);
......@@ -1043,14 +1052,18 @@ namespace
tt::pooling mp;
mp.setup_max_pooling(window_height,window_width,stride_y,stride_x);
mp.setup_max_pooling(window_height,window_width,stride_y,stride_x,padding_y,padding_x);
mp(A, B);
// make sure max pooling does what it's spec says it should.
DLIB_TEST( A.num_samples() == B.num_samples());
DLIB_TEST( A.k() == B.k());
DLIB_TEST( A.nr() == 1+(B.nr()-window_height%2)/stride_y);
DLIB_TEST( A.nc() == 1+(B.nc()-window_width%2)/stride_x);
DLIB_TEST( A.nr() == 1+(B.nr()+2*padding_y-window_height)/stride_y);
DLIB_TEST( A.nc() == 1+(B.nc()+2*padding_x-window_width)/stride_x);
const long x_offset = window_width/2 - padding_x;
const long y_offset = window_height/2 - padding_y;
for (long s = 0; s < A.num_samples(); ++s)
{
for (long k = 0; k < A.k(); ++k)
......@@ -1059,11 +1072,15 @@ namespace
{
for (long c = 0; c < A.nc(); ++c)
{
DLIB_TEST(image_plane(A,s,k)(r,c) == max(subm_clipped(image_plane(B,s,k),
centered_rect(c*stride_x,
r*stride_y,
DLIB_TEST_MSG(image_plane(A,s,k)(r,c) == max(subm_clipped(image_plane(B,s,k),
centered_rect(c*stride_x+x_offset,
r*stride_y+y_offset,
window_width,
window_height))));
window_height))),
"padding: "<< padding_x << " " << padding_y
<< " window size: " << window_width << " " << window_height
<< " stride: " << stride_x << " " << stride_y
);
}
}
}
......@@ -1076,12 +1093,14 @@ namespace
const int window_height,
const int window_width,
const int stride_y,
const int stride_x
const int stride_x,
const int padding_y,
const int padding_x
)
{
print_spinner();
resizable_tensor A, B, gradient_input;
A.set_size(2,2,16,7);
A.set_size(4,5,16,7);
B.copy_size(A);
gradient_input.copy_size(A);
......@@ -1093,14 +1112,17 @@ namespace
tt::pooling mp;
mp.setup_avg_pooling(window_height,window_width,stride_y,stride_x);
mp.setup_avg_pooling(window_height,window_width,stride_y,stride_x,padding_y,padding_x);
mp(A, B);
// make sure avg pooling does what it's spec says it should.
DLIB_TEST( A.num_samples() == B.num_samples());
DLIB_TEST( A.k() == B.k());
DLIB_TEST( A.nr() == 1+(B.nr()-window_height%2)/stride_y);
DLIB_TEST( A.nc() == 1+(B.nc()-window_width%2)/stride_x);
DLIB_TEST( A.nr() == 1+(B.nr()+2*padding_y-window_height)/stride_y);
DLIB_TEST( A.nc() == 1+(B.nc()+2*padding_x-window_width)/stride_x);
const long x_offset = window_width/2 - padding_x;
const long y_offset = window_height/2 - padding_y;
for (long s = 0; s < A.num_samples(); ++s)
{
for (long k = 0; k < A.k(); ++k)
......@@ -1110,8 +1132,8 @@ namespace
for (long c = 0; c < A.nc(); ++c)
{
float expected = mean(subm_clipped(image_plane(B,s,k),
centered_rect(c*stride_x,
r*stride_y,
centered_rect(c*stride_x+x_offset,
r*stride_y+y_offset,
window_width,
window_height)));
float err = abs(image_plane(A,s,k)(r,c) - expected);
......@@ -1275,17 +1297,30 @@ namespace
test_add();
compare_adam();
#endif
test_max_pool(1,1,2,3);
test_max_pool(3,3,1,1);
test_max_pool(3,3,2,2);
test_max_pool(2,2,2,2);
test_max_pool(4,5,3,1);
test_avg_pool(1,1,2,3);
test_avg_pool(3,3,1,1);
test_avg_pool(3,3,2,2);
test_avg_pool(2,2,2,2);
test_avg_pool(4,5,3,1);
test_avg_pool(100,100,100,100);
test_max_pool(1,1,2,3,0,0);
test_max_pool(3,3,1,1,0,0);
test_max_pool(3,3,2,2,0,0);
test_max_pool(2,2,2,2,0,0);
test_max_pool(4,5,3,1,0,0);
test_avg_pool(1,1,2,3,0,0);
test_avg_pool(3,3,1,1,0,0);
test_avg_pool(3,3,2,2,0,0);
test_avg_pool(2,2,2,2,0,0);
test_avg_pool(4,5,3,1,0,0);
test_avg_pool(4,4,2,2,0,0);
test_avg_pool(4,5,40,50,0,0);
test_max_pool(2,2,2,3,1,1);
test_max_pool(3,3,1,1,1,1);
test_max_pool(3,3,2,2,2,1);
test_max_pool(2,2,2,2,1,0);
test_max_pool(4,5,3,1,2,3);
test_avg_pool(1,1,2,3,0,0);
test_avg_pool(3,3,1,1,1,2);
test_avg_pool(3,3,2,2,2,1);
test_avg_pool(2,2,2,2,1,0);
test_avg_pool(4,5,3,1,2,4);
test_avg_pool(4,4,2,2,1,3);
test_avg_pool(4,5,40,50,0,1);
test_tanh();
test_softmax();
test_sigmoid();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment