Commit 31bcddd5 authored by Davis King's avatar Davis King

Cleaned up documentation for conv_. Also removed unnecessary tensor

reallocation and copying inside conv_'s backward pass.  Doing this
required adding an add_to_output boolean option to the methods of
tensor_conv.
parent b3d5dbd3
...@@ -1739,31 +1739,52 @@ namespace dlib ...@@ -1739,31 +1739,52 @@ namespace dlib
} }
} }
void tensor_conv::operator() ( void tensor_conv::operator() (
const bool add_to_output,
resizable_tensor& output, resizable_tensor& output,
const tensor& data, const tensor& data,
const tensor& filters const tensor& filters
) )
{
DLIB_CASSERT(last_stride_y > 0 && last_stride_x > 0, "You must call setup() before calling this function.");
output.set_size(data.num_samples(),
filters.num_samples(),
1+(data.nr()+2*last_padding_y-filters.nr())/last_stride_y,
1+(data.nc()+2*last_padding_x-filters.nc())/last_stride_x);
(*this)(add_to_output, static_cast<tensor&>(output),data,filters);
}
void tensor_conv::operator() (
const bool add_to_output,
tensor& output,
const tensor& data,
const tensor& filters
)
{ {
DLIB_CASSERT(is_same_object(output,data) == false); DLIB_CASSERT(is_same_object(output,data) == false);
DLIB_CASSERT(is_same_object(output,filters) == false); DLIB_CASSERT(is_same_object(output,filters) == false);
DLIB_CASSERT(filters.k() == data.k()); DLIB_CASSERT(filters.k() == data.k());
DLIB_CASSERT(last_stride_y > 0 && last_stride_x > 0, "You must call setup() before calling this function.");
DLIB_CASSERT(filters.nr() <= data.nr() + 2*last_padding_y, DLIB_CASSERT(filters.nr() <= data.nr() + 2*last_padding_y,
"Filter windows must be small enough to fit into the padded image."); "Filter windows must be small enough to fit into the padded image.");
DLIB_CASSERT(filters.nc() <= data.nc() + 2*last_padding_x, DLIB_CASSERT(filters.nc() <= data.nc() + 2*last_padding_x,
"Filter windows must be small enough to fit into the padded image."); "Filter windows must be small enough to fit into the padded image.");
output.set_size(data.num_samples(), DLIB_CASSERT(output.num_samples() == data.num_samples());
filters.num_samples(), DLIB_CASSERT(output.k() == filters.num_samples());
1+(data.nr()+2*last_padding_y-filters.nr())/last_stride_y, DLIB_CASSERT(output.nr() == 1+(data.nr()+2*last_padding_y-filters.nr())/last_stride_y);
1+(data.nc()+2*last_padding_x-filters.nc())/last_stride_x); DLIB_CASSERT(output.nc() == 1+(data.nc()+2*last_padding_x-filters.nc())/last_stride_x);
matrix<float> temp; matrix<float> temp;
for (long n = 0; n < data.num_samples(); ++n) for (long n = 0; n < data.num_samples(); ++n)
{ {
img2col(temp, data, n, filters.nr(), filters.nc(), last_stride_y, last_stride_x, last_padding_y, last_padding_x); img2col(temp, data, n, filters.nr(), filters.nc(), last_stride_y, last_stride_x, last_padding_y, last_padding_x);
output.set_sample(n, mat(filters)*trans(temp));
if (add_to_output)
output.add_to_sample(n, mat(filters)*trans(temp));
else
output.set_sample(n, mat(filters)*trans(temp));
} }
} }
...@@ -1771,12 +1792,15 @@ namespace dlib ...@@ -1771,12 +1792,15 @@ namespace dlib
void tensor_conv:: void tensor_conv::
get_gradient_for_data ( get_gradient_for_data (
const bool add_to_output,
const tensor& gradient_input, const tensor& gradient_input,
const tensor& filters, const tensor& filters,
tensor& data_gradient tensor& data_gradient
) )
{ {
matrix<float> temp; matrix<float> temp;
if (!add_to_output)
data_gradient = 0;
for (long n = 0; n < gradient_input.num_samples(); ++n) for (long n = 0; n < gradient_input.num_samples(); ++n)
{ {
auto gi = mat(gradient_input.host()+gradient_input.k()*gradient_input.nr()*gradient_input.nc()*n, auto gi = mat(gradient_input.host()+gradient_input.k()*gradient_input.nr()*gradient_input.nc()*n,
...@@ -1793,6 +1817,7 @@ namespace dlib ...@@ -1793,6 +1817,7 @@ namespace dlib
void tensor_conv:: void tensor_conv::
get_gradient_for_filters ( get_gradient_for_filters (
const bool add_to_output,
const tensor& gradient_input, const tensor& gradient_input,
const tensor& data, const tensor& data,
tensor& filters_gradient tensor& filters_gradient
...@@ -1808,9 +1833,16 @@ namespace dlib ...@@ -1808,9 +1833,16 @@ namespace dlib
img2col(temp, data, n, filters_gradient.nr(), filters_gradient.nc(), last_stride_y, last_stride_x, last_padding_y, last_padding_x); img2col(temp, data, n, filters_gradient.nr(), filters_gradient.nc(), last_stride_y, last_stride_x, last_padding_y, last_padding_x);
if (n == 0) if (n == 0)
filters_gradient = gi*temp; {
if (add_to_output)
filters_gradient += gi*temp;
else
filters_gradient = gi*temp;
}
else else
{
filters_gradient += gi*temp; filters_gradient += gi*temp;
}
} }
} }
// ------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------
......
...@@ -388,18 +388,28 @@ namespace dlib ...@@ -388,18 +388,28 @@ namespace dlib
} }
void operator() ( void operator() (
const bool add_to_output,
resizable_tensor& output, resizable_tensor& output,
const tensor& data, const tensor& data,
const tensor& filters const tensor& filters
); );
void operator() (
const bool add_to_output,
tensor& output,
const tensor& data,
const tensor& filters
);
void get_gradient_for_data ( void get_gradient_for_data (
const bool add_to_output,
const tensor& gradient_input, const tensor& gradient_input,
const tensor& filters, const tensor& filters,
tensor& data_gradient tensor& data_gradient
); );
void get_gradient_for_filters ( void get_gradient_for_filters (
const bool add_to_output,
const tensor& gradient_input, const tensor& gradient_input,
const tensor& data, const tensor& data,
tensor& filters_gradient tensor& filters_gradient
...@@ -407,10 +417,10 @@ namespace dlib ...@@ -407,10 +417,10 @@ namespace dlib
private: private:
long last_stride_y; long last_stride_y = 0;
long last_stride_x; long last_stride_x = 0;
long last_padding_y; long last_padding_y = 0;
long last_padding_x; long last_padding_x = 0;
}; };
// ----------------------------------------------------------------------------------- // -----------------------------------------------------------------------------------
......
...@@ -951,15 +951,29 @@ namespace dlib ...@@ -951,15 +951,29 @@ namespace dlib
} }
void tensor_conv::operator() ( void tensor_conv::operator() (
const bool add_to_output,
resizable_tensor& output, resizable_tensor& output,
const tensor& data, const tensor& data,
const tensor& filters const tensor& filters
) )
{
DLIB_CASSERT(stride_y > 0 && stride_x > 0, "You must call setup() before calling this function");
output.set_size(out_num_samples, out_k, out_nr, out_nc);
(*this)(add_to_output, static_cast<tensor&>(output), data, filters);
}
void tensor_conv::operator() (
const bool add_to_output,
tensor& output,
const tensor& data,
const tensor& filters
)
{ {
DLIB_CASSERT(is_same_object(output,data) == false); DLIB_CASSERT(is_same_object(output,data) == false);
DLIB_CASSERT(is_same_object(output,filters) == false); DLIB_CASSERT(is_same_object(output,filters) == false);
DLIB_CASSERT(filters.k() == data.k()); DLIB_CASSERT(filters.k() == data.k());
DLIB_CASSERT(stride_y > 0 && stride_x > 0); DLIB_CASSERT(stride_y > 0 && stride_x > 0, "You must call setup() before calling this function");
DLIB_CASSERT(filters.nc() <= data.nc() + 2*padding_x, DLIB_CASSERT(filters.nc() <= data.nc() + 2*padding_x,
"Filter windows must be small enough to fit into the padded image." "Filter windows must be small enough to fit into the padded image."
<< "\n\t filters.nc(): " << filters.nc() << "\n\t filters.nc(): " << filters.nc()
...@@ -974,17 +988,15 @@ namespace dlib ...@@ -974,17 +988,15 @@ namespace dlib
); );
output.set_size(out_num_samples, out_k, out_nr, out_nc); DLIB_CASSERT(output.num_samples() == data.num_samples(),out_num_samples << " " << data.num_samples());
DLIB_CASSERT(output.k() == filters.num_samples());
DLIB_ASSERT(output.num_samples() == data.num_samples(),out_num_samples << " " << data.num_samples()); DLIB_CASSERT(output.nr() == 1+(data.nr()+2*padding_y-filters.nr())/stride_y);
DLIB_ASSERT(output.k() == filters.num_samples()); DLIB_CASSERT(output.nc() == 1+(data.nc()+2*padding_x-filters.nc())/stride_x);
DLIB_ASSERT(output.nr() == 1+(data.nr()+2*padding_y-filters.nr())/stride_y);
DLIB_ASSERT(output.nc() == 1+(data.nc()+2*padding_x-filters.nc())/stride_x);
const float alpha = 1; const float alpha = 1;
const float beta = 0; const float beta = add_to_output ? 1 : 0;
CHECK_CUDNN(cudnnConvolutionForward( CHECK_CUDNN(cudnnConvolutionForward(
context(), context(),
&alpha, &alpha,
...@@ -1002,13 +1014,14 @@ namespace dlib ...@@ -1002,13 +1014,14 @@ namespace dlib
} }
void tensor_conv::get_gradient_for_data ( void tensor_conv::get_gradient_for_data (
const bool add_to_output,
const tensor& gradient_input, const tensor& gradient_input,
const tensor& filters, const tensor& filters,
tensor& data_gradient tensor& data_gradient
) )
{ {
const float alpha = 1; const float alpha = 1;
const float beta = 1; const float beta = add_to_output ? 1 : 0;
CHECK_CUDNN(cudnnConvolutionBackwardData(context(), CHECK_CUDNN(cudnnConvolutionBackwardData(context(),
...@@ -1028,13 +1041,14 @@ namespace dlib ...@@ -1028,13 +1041,14 @@ namespace dlib
void tensor_conv:: void tensor_conv::
get_gradient_for_filters ( get_gradient_for_filters (
const bool add_to_output,
const tensor& gradient_input, const tensor& gradient_input,
const tensor& data, const tensor& data,
tensor& filters_gradient tensor& filters_gradient
) )
{ {
const float alpha = 1; const float alpha = 1;
const float beta = 0; const float beta = add_to_output ? 1 : 0;
CHECK_CUDNN(cudnnConvolutionBackwardFilter(context(), CHECK_CUDNN(cudnnConvolutionBackwardFilter(context(),
&alpha, &alpha,
descriptor(data), descriptor(data),
......
...@@ -203,68 +203,32 @@ namespace dlib ...@@ -203,68 +203,32 @@ namespace dlib
); );
void operator() ( void operator() (
const bool add_to_output,
tensor& output,
const tensor& data,
const tensor& filters
);
void operator() (
const bool add_to_output,
resizable_tensor& output, resizable_tensor& output,
const tensor& data, const tensor& data,
const tensor& filters const tensor& filters
); );
/*!
requires
- stride_y > 0
- stride_x > 0
- 0 <= padding_y < filters.nr()
- 0 <= padding_x < filters.nc()
- is_same_object(output,data) == false
- is_same_object(output,filters) == false
ensures
- convolves filters over data.
- filters contains filters.num_samples() filters.
- #output.num_samples() == data.num_samples()
- #output.k() == filters.num_samples()
- #output.nr() == 1+(data.nr()-filters.nr()%2)/stride_y
- #output.nc() == 1+(data.nc()-filters.nc()%2)/stride_x
!*/
void get_gradient_for_data ( void get_gradient_for_data (
const bool add_to_output,
const tensor& gradient_input, const tensor& gradient_input,
const tensor& filters, const tensor& filters,
tensor& data_gradient tensor& data_gradient
); );
/*!
requires
- filters has the same dimensions as the filters object give to the
last call to operator().
- data_gradient has the same dimensions as the data object give to the
last call to operator().
- gradient_input has the same dimensions as the output of operator().
- is_same_object(data_gradient,filters) == false
- is_same_object(data_gradient,gradient_input) == false
ensures
- let OUT be the output of (*this)(OUT,data,filters).
- let f(data,filters) == dot(OUT, gradient_input)
- This function finds the gradient of f() with respect to data
and adds this gradient to data_gradient.
!*/
void get_gradient_for_filters ( void get_gradient_for_filters (
const bool add_to_output,
const tensor& gradient_input, const tensor& gradient_input,
const tensor& data, const tensor& data,
tensor& filters_gradient tensor& filters_gradient
); );
/*!
requires
- filters_gradient has the same dimensions as the filters object give
to the last call to operator().
- data has the same dimensions as the data object give to the last call
to operator().
- gradient_input has the same dimensions as the output of operator().
- is_same_object(filters_gradient,data) == false
- is_same_object(filters_gradient,gradient_input) == false
ensures
- let OUT be the output of (*this)(OUT,data,filters).
- let f(data,filters) == dot(OUT, gradient_input)
- This function finds the gradient of f() with respect to filters
and assigns this gradient to filters_gradient.
!*/
void setup( void setup(
const tensor& data, const tensor& data,
...@@ -277,15 +241,6 @@ namespace dlib ...@@ -277,15 +241,6 @@ namespace dlib
private: private:
/*!
requires
- filters.k() == data.k()
- stride_y > 0
- stride_x > 0
- 0 <= padding_y < filters.nr()
- 0 <= padding_x < filters.nc()
!*/
// These variables record the type of data given to the last call to setup(). // These variables record the type of data given to the last call to setup().
int stride_y; int stride_y;
int stride_x; int stride_x;
......
...@@ -142,6 +142,7 @@ namespace dlib ...@@ -142,6 +142,7 @@ namespace dlib
// set the initial bias values to zero // set the initial bias values to zero
biases(params,filters.size()) = 0; biases(params,filters.size()) = 0;
} }
template <typename SUBNET> template <typename SUBNET>
...@@ -153,8 +154,7 @@ namespace dlib ...@@ -153,8 +154,7 @@ namespace dlib
_stride_x, _stride_x,
padding_y_, padding_y_,
padding_x_); padding_x_);
conv(false, output,
conv(output,
sub.get_output(), sub.get_output(),
filters(params,0)); filters(params,0));
...@@ -164,12 +164,12 @@ namespace dlib ...@@ -164,12 +164,12 @@ namespace dlib
template <typename SUBNET> template <typename SUBNET>
void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad) void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad)
{ {
conv.get_gradient_for_data (gradient_input, filters(params,0), sub.get_gradient_input()); conv.get_gradient_for_data (true, gradient_input, filters(params,0), sub.get_gradient_input());
// no point computing the parameter gradients if they won't be used. // no point computing the parameter gradients if they won't be used.
if (learning_rate_multiplier != 0) if (learning_rate_multiplier != 0)
{ {
auto filt = filters(params_grad,0); auto filt = filters(params_grad,0);
conv.get_gradient_for_filters (gradient_input, sub.get_output(), filt); conv.get_gradient_for_filters (false, gradient_input, sub.get_output(), filt);
auto b = biases(params_grad, filters.size()); auto b = biases(params_grad, filters.size());
tt::assign_conv_bias_gradient(b, gradient_input); tt::assign_conv_bias_gradient(b, gradient_input);
} }
...@@ -443,26 +443,21 @@ namespace dlib ...@@ -443,26 +443,21 @@ namespace dlib
unsigned int gnsamps = sub.get_output().num_samples(); unsigned int gnsamps = sub.get_output().num_samples();
unsigned int gk = filt.k(); unsigned int gk = filt.k();
output.set_size(gnsamps,gk,gnr,gnc); output.set_size(gnsamps,gk,gnr,gnc);
output = 0;
conv.setup(output,filt,_stride_y,_stride_x,padding_y_,padding_x_); conv.setup(output,filt,_stride_y,_stride_x,padding_y_,padding_x_);
conv.get_gradient_for_data(sub.get_output(),filt,output); conv.get_gradient_for_data(false, sub.get_output(),filt,output);
tt::add(1,output,1,biases(params,filters.size())); tt::add(1,output,1,biases(params,filters.size()));
} }
template <typename SUBNET> template <typename SUBNET>
void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad) void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad)
{ {
resizable_tensor temp;
temp.copy_size(sub.get_gradient_input());
auto filt = filters(params,0); auto filt = filters(params,0);
conv(temp,gradient_input, filt); conv(true, sub.get_gradient_input(),gradient_input, filt);
// need to add the new gradients on top of the previous ones
tt::add(1,sub.get_gradient_input(),1,temp);
// no point computing the parameter gradients if they won't be used. // no point computing the parameter gradients if they won't be used.
if (learning_rate_multiplier != 0) if (learning_rate_multiplier != 0)
{ {
auto filt = filters(params_grad,0); auto filt = filters(params_grad,0);
conv.get_gradient_for_filters (sub.get_output(),gradient_input, filt); conv.get_gradient_for_filters (false, sub.get_output(),gradient_input, filt);
auto b = biases(params_grad, filters.size()); auto b = biases(params_grad, filters.size());
tt::assign_conv_bias_gradient(b, gradient_input); tt::assign_conv_bias_gradient(b, gradient_input);
} }
...@@ -566,7 +561,7 @@ namespace dlib ...@@ -566,7 +561,7 @@ namespace dlib
<< " padding_y='"<<item.padding_y_<<"'" << " padding_y='"<<item.padding_y_<<"'"
<< " padding_x='"<<item.padding_x_<<"'" << " padding_x='"<<item.padding_x_<<"'"
<< " learning_rate_mult='"<<item.learning_rate_multiplier<<"'" << " learning_rate_mult='"<<item.learning_rate_multiplier<<"'"
<< " weight_decay_46mult='"<<item.weight_decay_multiplier<<"'" << " weight_decay_mult='"<<item.weight_decay_multiplier<<"'"
<< " bias_learning_rate_mult='"<<item.bias_learning_rate_multiplier<<"'" << " bias_learning_rate_mult='"<<item.bias_learning_rate_multiplier<<"'"
<< " bias_weight_decay_mult='"<<item.bias_weight_decay_multiplier<<"'>\n"; << " bias_weight_decay_mult='"<<item.bias_weight_decay_multiplier<<"'>\n";
out << mat(item.params); out << mat(item.params);
......
...@@ -864,17 +864,21 @@ namespace dlib ...@@ -864,17 +864,21 @@ namespace dlib
WHAT THIS OBJECT REPRESENTS WHAT THIS OBJECT REPRESENTS
This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
defined above. In particular, it defines a transposed convolution layer defined above. In particular, it defines a transposed convolution layer
that takes an input tensor (nominally representing an image) and that takes an input tensor and transpose convolves (sometimes called
transpose convolves (deconvolves) it with a set of filters and then outputs the results. "deconvolution") it with a set of filters and then outputs the results.
This is basically a convolutional layer with reversed forward/backward passes
This is essentially a convolutional layer that allows fractional strides.
Therefore, you can make output tensors that are larger than the input
tensors using this layer type.
The dimensions of the tensors output by this layer are as follows (letting The dimensions of the tensors output by this layer are as follows (letting
IN be the input tensor and OUT the output tensor): IN be the input tensor and OUT the output tensor):
- OUT.num_samples() == IN.num_samples() - OUT.num_samples() == IN.num_samples()
- OUT.k() == num_filters() - OUT.k() == num_filters()
- OUT.nr() == stride_y * (IN.nr() -1) + nr) - 2*padding_y - OUT.nr() == stride_y()*(IN.nr()-1) + nr() - 2*padding_y()
- OUT.nc() == stride_x * (IN.nc() -1) + nc) - 2*padding_x - OUT.nc() == stride_x()*(IN.nc()-1) + nc() - 2*padding_x()
!*/ !*/
public: public:
...@@ -923,8 +927,8 @@ namespace dlib ...@@ -923,8 +927,8 @@ namespace dlib
/*! /*!
ensures ensures
- returns the vertical stride used when convolving the filters over an - returns the vertical stride used when convolving the filters over an
image. That is, each filter will be moved stride_y() pixels down at a image. That is, each filter will be moved 1.0/stride_y() pixels down at
time when it moves over the image. a time when it moves over the image.
!*/ !*/
long stride_x( long stride_x(
...@@ -932,8 +936,8 @@ namespace dlib ...@@ -932,8 +936,8 @@ namespace dlib
/*! /*!
ensures ensures
- returns the horizontal stride used when convolving the filters over an - returns the horizontal stride used when convolving the filters over an
image. That is, each filter will be moved stride_x() pixels right at a image. That is, each filter will be moved 1.0/stride_x() pixels right at
time when it moves over the image. a time when it moves over the image.
!*/ !*/
long padding_y( long padding_y(
......
...@@ -877,23 +877,50 @@ namespace dlib { namespace tt ...@@ -877,23 +877,50 @@ namespace dlib { namespace tt
) { impl.clear(); } ) { impl.clear(); }
void operator() ( void operator() (
const bool add_to_output,
tensor& output,
const tensor& data,
const tensor& filters
) { impl(add_to_output,output,data,filters); }
/*!
requires
- setup() has been called. Specifically, setup() has been called like this:
this->setup(data, filters, stride_y, stride_x, padding_y, padding_x);
- is_same_object(output,data) == false
- is_same_object(output,filters) == false
- filters.k() == data.k()
- filters.nr() <= src.nr() + 2*padding_y
- filters.nc() <= src.nc() + 2*padding_x
- #output.num_samples() == data.num_samples()
- #output.k() == filters.num_samples()
- #output.nr() == 1+(data.nr() + 2*padding_y - filters.nr())/stride_y
- #output.nc() == 1+(data.nc() + 2*padding_x - filters.nc())/stride_x
ensures
- Convolves filters over data. If add_to_output==true then we add the
results to output, otherwise we assign to output, overwriting the
previous values in output.
- filters contains filters.num_samples() filters.
!*/
void operator() (
const bool add_to_output,
resizable_tensor& output, resizable_tensor& output,
const tensor& data, const tensor& data,
const tensor& filters const tensor& filters
) { impl(output,data,filters); } ) { impl(add_to_output,output,data,filters); }
/*! /*!
requires requires
- stride_y > 0 - setup() has been called. Specifically, setup() has been called like this:
- stride_x > 0 this->setup(data, filters, stride_y, stride_x, padding_y, padding_x);
- 0 <= padding_y < filters.nr()
- 0 <= padding_x < filters.nc()
- is_same_object(output,data) == false - is_same_object(output,data) == false
- is_same_object(output,filters) == false - is_same_object(output,filters) == false
- filters.k() == data.k() - filters.k() == data.k()
- filters.nr() <= src.nr() + 2*padding_y - filters.nr() <= src.nr() + 2*padding_y
- filters.nc() <= src.nc() + 2*padding_x - filters.nc() <= src.nc() + 2*padding_x
ensures ensures
- convolves filters over data. - Convolves filters over data. If add_to_output==true then we add the
results to output, otherwise we assign to output, overwriting the
previous values in output.
- filters contains filters.num_samples() filters. - filters contains filters.num_samples() filters.
- #output.num_samples() == data.num_samples() - #output.num_samples() == data.num_samples()
- #output.k() == filters.num_samples() - #output.k() == filters.num_samples()
...@@ -902,49 +929,77 @@ namespace dlib { namespace tt ...@@ -902,49 +929,77 @@ namespace dlib { namespace tt
!*/ !*/
void get_gradient_for_data ( void get_gradient_for_data (
const bool add_to_output,
const tensor& gradient_input, const tensor& gradient_input,
const tensor& filters, const tensor& filters,
tensor& data_gradient tensor& data_gradient
) { impl.get_gradient_for_data(gradient_input,filters,data_gradient); } ) { impl.get_gradient_for_data(add_to_output,gradient_input,filters,data_gradient); }
/*! /*!
requires requires
- filters has the same dimensions as the filters object given to the last - One of the following must be true:
call to operator(). - filters has the same dimensions as the filters object given to the
- data_gradient has the same dimensions as the data object given to the last last call to operator(). Also, data_gradient has the same dimensions
call to operator(). as the data object given to the last call to operator().
- gradient_input has the same dimensions as the last output of operator(). - setup() has been called. Specifically, setup() has been called like this:
this->setup(data_gradient, filters, stride_y, stride_x, padding_y, padding_x);
- gradient_input has the following dimensions:
- gradient_input.num_samples() == data_gradient.num_samples()
- gradient_input.k() == filters.num_samples()
- gradient_input.nr() == 1+(data_gradient.nr() + 2*padding_y - filters.nr())/stride_y
- gradient_input.nc() == 1+(data_gradient.nc() + 2*padding_x - filters.nc())/stride_x
- NOTE, these dimensions are what you would obtain if gradient_input
has the same dimensions as the last output of operator().
- is_same_object(data_gradient,filters) == false - is_same_object(data_gradient,filters) == false
- is_same_object(data_gradient,gradient_input) == false - is_same_object(data_gradient,gradient_input) == false
ensures ensures
- let OUT be the output of (*this)(OUT,data,filters,sx,sy). - let OUT be the output of (*this)(OUT,data,filters,sx,sy).
- let f(data,filters) == dot(OUT, gradient_input) - let f(data,filters) == dot(OUT, gradient_input)
- This function finds the gradient of f() with respect to data and adds - if (add_to_output) then
this gradient to data_gradient. - This function finds the gradient of f() with respect to data and adds
this gradient to data_gradient.
- else
- This function finds the gradient of f() with respect to data and
assigns this gradient to data_gradient, overwriting the previous
values in data_gradient.
!*/ !*/
void get_gradient_for_filters ( void get_gradient_for_filters (
const bool add_to_output,
const tensor& gradient_input, const tensor& gradient_input,
const tensor& data, const tensor& data,
tensor& filters_gradient tensor& filters_gradient
) { impl.get_gradient_for_filters(gradient_input,data,filters_gradient); } ) { impl.get_gradient_for_filters(add_to_output,gradient_input,data,filters_gradient); }
/*! /*!
requires requires
- filters_gradient has the same dimensions as the filters object given to - One of the following must be true:
the last call to operator(). - filters_gradient has the same dimensions as the filters object given
- data has the same dimensions as the data object given to the last call to to the last call to operator(). Also, data has the same dimensions
operator(). as the data object given to the last call to operator().
- gradient_input has the same dimensions as the last output of operator(). - setup() has been called. Specifically, setup() has been called like this:
this->setup(data, filters_gradient, stride_y, stride_x, padding_y, padding_x);
- gradient_input has the following dimensions:
- gradient_input.num_samples() == data.num_samples()
- gradient_input.k() == filters.num_samples()
- gradient_input.nr() == 1+(data.nr() + 2*padding_y - filters.nr())/stride_y
- gradient_input.nc() == 1+(data.nc() + 2*padding_x - filters.nc())/stride_x
- NOTE, these dimensions are what you would obtain if gradient_input
has the same dimensions as the last output of operator().
- is_same_object(filters_gradient,data) == false - is_same_object(filters_gradient,data) == false
- is_same_object(filters_gradient,gradient_input) == false - is_same_object(filters_gradient,gradient_input) == false
ensures ensures
- let OUT be the output of (*this)(OUT,data,filters,sx,sy). - let OUT be the output of (*this)(OUT,data,filters,sx,sy).
- let f(data,filters) == dot(OUT, gradient_input) - let f(data,filters) == dot(OUT, gradient_input)
- This function finds the gradient of f() with respect to filters and assigns - if (add_to_output) then
this gradient to filters_gradient. - This function finds the gradient of f() with respect to filters and
adds this gradient to filters_gradient.
- else
- This function finds the gradient of f() with respect to filters and
assigns this gradient to filters_gradient, overwriting the previous
values in filters_gradient.
!*/ !*/
void setup( void setup(
const tensor& data, const tensor& data,
const tensor& filters, const tensor& filters,
int stride_y, int stride_y,
...@@ -952,6 +1007,26 @@ namespace dlib { namespace tt ...@@ -952,6 +1007,26 @@ namespace dlib { namespace tt
int padding_y, int padding_y,
int padding_x int padding_x
) {impl.setup(data,filters,stride_y,stride_x,padding_y,padding_x); } ) {impl.setup(data,filters,stride_y,stride_x,padding_y,padding_x); }
/*!
requires
- filters.k() == data.k()
- stride_y > 0
- stride_x > 0
- 0 <= padding_y < filters.nr()
- 0 <= padding_x < filters.nc()
ensures
- When operator() is called, the output tensor will have these dimensions:
- output.nr() == 1+(data.nr() + 2*padding_y - filters.nr())/stride_y
- output.nc() == 1+(data.nc() + 2*padding_x - filters.nc())/stride_x
- output.num_samples() == data.num_samples()
- output.k() == filters.num_samples()
- The point of setup() is to allow this object to gather information about
all the tensor sizes and filter layouts involved in the computation. In
particular, the reason the tensors are input into setup() is just to
observe their sizes. setup() doesn't do anything with the contents of
the tensors, or store any kind of references to the data or filter
tensors.
!*/
private: private:
#ifdef DLIB_USE_CUDA #ifdef DLIB_USE_CUDA
......
...@@ -806,9 +806,17 @@ namespace ...@@ -806,9 +806,17 @@ namespace
if (!(filters.nc() <= data.nc() + 2*padding_x)) if (!(filters.nc() <= data.nc() + 2*padding_x))
padding_x = (filters.nc()-data.nc()+1)/2; padding_x = (filters.nc()-data.nc()+1)/2;
conv1.setup(data,filters,stride_y,stride_x,padding_y,padding_x); conv1.setup(data,filters,stride_y,stride_x,padding_y,padding_x);
conv1(output1, data, filters); conv1(false, output1, data, filters);
conv2.setup(data,filters,stride_y,stride_x,padding_y,padding_x); conv2.setup(data,filters,stride_y,stride_x,padding_y,padding_x);
conv2(output2, data, filters); conv2(false, output2, data, filters);
dlog << LINFO << "forward error: "<< max(abs(mat(output1)-mat(output2)));
DLIB_TEST_MSG(max(abs(mat(output1)-mat(output2))) < 1e-3, max(abs(mat(output1)-mat(output2)))
<<"\n\t padding_y: "<< padding_y
<<"\n\t padding_x: "<< padding_x
);
conv1(true, output1, data, filters);
conv2(true, output2, data, filters);
dlog << LINFO << "forward error: "<< max(abs(mat(output1)-mat(output2))); dlog << LINFO << "forward error: "<< max(abs(mat(output1)-mat(output2)));
DLIB_TEST_MSG(max(abs(mat(output1)-mat(output2))) < 1e-3, max(abs(mat(output1)-mat(output2))) DLIB_TEST_MSG(max(abs(mat(output1)-mat(output2))) < 1e-3, max(abs(mat(output1)-mat(output2)))
<<"\n\t padding_y: "<< padding_y <<"\n\t padding_y: "<< padding_y
...@@ -826,8 +834,14 @@ namespace ...@@ -826,8 +834,14 @@ namespace
data_gradient1 = 1; data_gradient1 = 1;
data_gradient2 = 1; data_gradient2 = 1;
conv1.get_gradient_for_data(gi, filters, data_gradient1); conv1.get_gradient_for_data(true, gi, filters, data_gradient1);
conv2.get_gradient_for_data(gi, filters, data_gradient2); conv2.get_gradient_for_data(true, gi, filters, data_gradient2);
dlog << LINFO << "data gradient error: "<< max(abs(mat(data_gradient1)-mat(data_gradient2)));
DLIB_TEST(max(abs(mat(data_gradient1)-mat(data_gradient2))) < 1e-3);
conv1.get_gradient_for_data(false, gi, filters, data_gradient1);
conv2.get_gradient_for_data(false, gi, filters, data_gradient2);
dlog << LINFO << "data gradient error: "<< max(abs(mat(data_gradient1)-mat(data_gradient2))); dlog << LINFO << "data gradient error: "<< max(abs(mat(data_gradient1)-mat(data_gradient2)));
DLIB_TEST(max(abs(mat(data_gradient1)-mat(data_gradient2))) < 1e-3); DLIB_TEST(max(abs(mat(data_gradient1)-mat(data_gradient2))) < 1e-3);
...@@ -842,8 +856,15 @@ namespace ...@@ -842,8 +856,15 @@ namespace
filter_gradient1 = 1; filter_gradient1 = 1;
filter_gradient2 = 1; filter_gradient2 = 1;
conv1.get_gradient_for_filters(gi, data, filter_gradient1); conv1.get_gradient_for_filters(false, gi, data, filter_gradient1);
conv2.get_gradient_for_filters(gi, data, filter_gradient2); conv2.get_gradient_for_filters(false, gi, data, filter_gradient2);
dlog << LINFO << "filter gradient error: "<< max(abs(mat(filter_gradient1)-mat(filter_gradient2)));
DLIB_TEST_MSG(max(abs(mat(filter_gradient1)-mat(filter_gradient2))) < 1e-3, max(abs(mat(filter_gradient1)-mat(filter_gradient2))));
conv1.get_gradient_for_filters(true, gi, data, filter_gradient1);
conv2.get_gradient_for_filters(true, gi, data, filter_gradient2);
dlog << LINFO << "filter gradient error: "<< max(abs(mat(filter_gradient1)-mat(filter_gradient2))); dlog << LINFO << "filter gradient error: "<< max(abs(mat(filter_gradient1)-mat(filter_gradient2)));
DLIB_TEST_MSG(max(abs(mat(filter_gradient1)-mat(filter_gradient2))) < 1e-3, max(abs(mat(filter_gradient1)-mat(filter_gradient2)))); DLIB_TEST_MSG(max(abs(mat(filter_gradient1)-mat(filter_gradient2))) < 1e-3, max(abs(mat(filter_gradient1)-mat(filter_gradient2))));
...@@ -1475,6 +1496,12 @@ namespace ...@@ -1475,6 +1496,12 @@ namespace
auto res = test_layer(l); auto res = test_layer(l);
DLIB_TEST_MSG(res, res); DLIB_TEST_MSG(res, res);
} }
{
print_spinner();
cont_<3,3,3,2,2,0,0> l;
auto res = test_layer(l);
DLIB_TEST_MSG(res, res);
}
{ {
print_spinner(); print_spinner();
cont_<3,3,3,2,2> l; cont_<3,3,3,2,2> l;
...@@ -1487,6 +1514,12 @@ namespace ...@@ -1487,6 +1514,12 @@ namespace
auto res = test_layer(l); auto res = test_layer(l);
DLIB_TEST_MSG(res, res); DLIB_TEST_MSG(res, res);
} }
{
print_spinner();
cont_<3,3,3,1,1,0,0> l;
auto res = test_layer(l);
DLIB_TEST_MSG(res, res);
}
{ {
print_spinner(); print_spinner();
cont_<3,2,2,2,2> l; cont_<3,2,2,2,2> l;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment