Commit 023e1398 authored by Davis King's avatar Davis King

merged

parents e44da577 c68af9dc
......@@ -3199,7 +3199,7 @@ namespace dlib
}
}
} // end for (int iter = 0; iter < 5; ++iter)
} // end for (int iter = 0; iter < 10; ++iter)
if (rs_params.mean() > 0.003)
{
......
......@@ -1740,54 +1740,67 @@ namespace dlib
}
void tensor_conv::operator() (
const bool add_to_output,
resizable_tensor& output,
const tensor& data,
const tensor& filters,
int stride_y,
int stride_x,
int padding_y,
int padding_x
const tensor& filters
)
{
DLIB_CASSERT(last_stride_y > 0 && last_stride_x > 0, "You must call setup() before calling this function.");
output.set_size(data.num_samples(),
filters.num_samples(),
1+(data.nr()+2*last_padding_y-filters.nr())/last_stride_y,
1+(data.nc()+2*last_padding_x-filters.nc())/last_stride_x);
(*this)(add_to_output, static_cast<tensor&>(output),data,filters);
}
void tensor_conv::operator() (
const bool add_to_output,
tensor& output,
const tensor& data,
const tensor& filters
)
{
DLIB_CASSERT(is_same_object(output,data) == false);
DLIB_CASSERT(is_same_object(output,filters) == false);
DLIB_CASSERT(filters.k() == data.k());
DLIB_CASSERT(stride_y > 0 && stride_x > 0);
DLIB_CASSERT(0 <= padding_y && padding_y < filters.nr());
DLIB_CASSERT(0 <= padding_x && padding_x < filters.nc());
DLIB_CASSERT(filters.nr() <= data.nr() + 2*padding_y,
DLIB_CASSERT(last_stride_y > 0 && last_stride_x > 0, "You must call setup() before calling this function.");
DLIB_CASSERT(filters.nr() <= data.nr() + 2*last_padding_y,
"Filter windows must be small enough to fit into the padded image.");
DLIB_CASSERT(filters.nc() <= data.nc() + 2*padding_x,
DLIB_CASSERT(filters.nc() <= data.nc() + 2*last_padding_x,
"Filter windows must be small enough to fit into the padded image.");
output.set_size(data.num_samples(),
filters.num_samples(),
1+(data.nr()+2*padding_y-filters.nr())/stride_y,
1+(data.nc()+2*padding_x-filters.nc())/stride_x);
DLIB_CASSERT(output.num_samples() == data.num_samples());
DLIB_CASSERT(output.k() == filters.num_samples());
DLIB_CASSERT(output.nr() == 1+(data.nr()+2*last_padding_y-filters.nr())/last_stride_y);
DLIB_CASSERT(output.nc() == 1+(data.nc()+2*last_padding_x-filters.nc())/last_stride_x);
matrix<float> temp;
for (long n = 0; n < data.num_samples(); ++n)
{
img2col(temp, data, n, filters.nr(), filters.nc(), stride_y, stride_x, padding_y, padding_x);
output.set_sample(n, mat(filters)*trans(temp));
}
img2col(temp, data, n, filters.nr(), filters.nc(), last_stride_y, last_stride_x, last_padding_y, last_padding_x);
last_stride_y = stride_y;
last_stride_x = stride_x;
last_padding_y = padding_y;
last_padding_x = padding_x;
if (add_to_output)
output.add_to_sample(n, mat(filters)*trans(temp));
else
output.set_sample(n, mat(filters)*trans(temp));
}
}
// ------------------------------------------------------------------------------------
void tensor_conv::
get_gradient_for_data (
const bool add_to_output,
const tensor& gradient_input,
const tensor& filters,
tensor& data_gradient
)
{
matrix<float> temp;
if (!add_to_output)
data_gradient = 0;
for (long n = 0; n < gradient_input.num_samples(); ++n)
{
auto gi = mat(gradient_input.host()+gradient_input.k()*gradient_input.nr()*gradient_input.nc()*n,
......@@ -1804,6 +1817,7 @@ namespace dlib
void tensor_conv::
get_gradient_for_filters (
const bool add_to_output,
const tensor& gradient_input,
const tensor& data,
tensor& filters_gradient
......@@ -1819,12 +1833,19 @@ namespace dlib
img2col(temp, data, n, filters_gradient.nr(), filters_gradient.nc(), last_stride_y, last_stride_x, last_padding_y, last_padding_x);
if (n == 0)
filters_gradient = gi*temp;
{
if (add_to_output)
filters_gradient += gi*temp;
else
filters_gradient = gi*temp;
}
else
{
filters_gradient += gi*temp;
}
}
}
// ------------------------------------------------------------------------------------
// ------------------------------------------------------------------------------------
void copy_tensor(
tensor& dest,
size_t dest_k_offset,
......
......@@ -368,23 +368,48 @@ namespace dlib
void clear(
) {}
void operator() (
resizable_tensor& output,
const tensor& data,
const tensor& filters,
void setup(
const tensor& data, /* not used but required for interface */
const tensor& filters, /* not used but required for interface */
int stride_y,
int stride_x,
int padding_y,
int padding_x
)
{
(void)data; /* silence compiler */
DLIB_CASSERT(stride_y > 0 && stride_x > 0);
DLIB_CASSERT(0 <= padding_y && padding_y < filters.nr());
DLIB_CASSERT(0 <= padding_x && padding_x < filters.nc());
last_stride_y = stride_y;
last_stride_x = stride_x;
last_padding_y = padding_y;
last_padding_x = padding_x;
}
void operator() (
const bool add_to_output,
resizable_tensor& output,
const tensor& data,
const tensor& filters
);
void operator() (
const bool add_to_output,
tensor& output,
const tensor& data,
const tensor& filters
);
void get_gradient_for_data (
const bool add_to_output,
const tensor& gradient_input,
const tensor& filters,
tensor& data_gradient
);
void get_gradient_for_filters (
const bool add_to_output,
const tensor& gradient_input,
const tensor& data,
tensor& filters_gradient
......@@ -392,10 +417,10 @@ namespace dlib
private:
long last_stride_y;
long last_stride_x;
long last_padding_y;
long last_padding_x;
long last_stride_y = 0;
long last_stride_x = 0;
long last_padding_y = 0;
long last_padding_x = 0;
};
// -----------------------------------------------------------------------------------
......
......@@ -951,19 +951,29 @@ namespace dlib
}
void tensor_conv::operator() (
const bool add_to_output,
resizable_tensor& output,
const tensor& data,
const tensor& filters,
int stride_y,
int stride_x,
int padding_y,
int padding_x
const tensor& filters
)
{
DLIB_CASSERT(stride_y > 0 && stride_x > 0, "You must call setup() before calling this function");
output.set_size(out_num_samples, out_k, out_nr, out_nc);
(*this)(add_to_output, static_cast<tensor&>(output), data, filters);
}
void tensor_conv::operator() (
const bool add_to_output,
tensor& output,
const tensor& data,
const tensor& filters
)
{
DLIB_CASSERT(is_same_object(output,data) == false);
DLIB_CASSERT(is_same_object(output,filters) == false);
DLIB_CASSERT(filters.k() == data.k());
DLIB_CASSERT(stride_y > 0 && stride_x > 0);
DLIB_CASSERT(stride_y > 0 && stride_x > 0, "You must call setup() before calling this function");
DLIB_CASSERT(filters.nc() <= data.nc() + 2*padding_x,
"Filter windows must be small enough to fit into the padded image."
<< "\n\t filters.nc(): " << filters.nc()
......@@ -978,19 +988,15 @@ namespace dlib
);
setup(data,filters,stride_y,stride_x,padding_y,padding_x);
output.set_size(out_num_samples, out_k, out_nr, out_nc);
DLIB_ASSERT(output.num_samples() == data.num_samples(),out_num_samples << " " << data.num_samples());
DLIB_ASSERT(output.k() == filters.num_samples());
DLIB_ASSERT(output.nr() == 1+(data.nr()+2*padding_y-filters.nr())/stride_y);
DLIB_ASSERT(output.nc() == 1+(data.nc()+2*padding_x-filters.nc())/stride_x);
DLIB_CASSERT(output.num_samples() == data.num_samples(),out_num_samples << " " << data.num_samples());
DLIB_CASSERT(output.k() == filters.num_samples());
DLIB_CASSERT(output.nr() == 1+(data.nr()+2*padding_y-filters.nr())/stride_y);
DLIB_CASSERT(output.nc() == 1+(data.nc()+2*padding_x-filters.nc())/stride_x);
const float alpha = 1;
const float beta = 0;
const float beta = add_to_output ? 1 : 0;
CHECK_CUDNN(cudnnConvolutionForward(
context(),
&alpha,
......@@ -1008,13 +1014,14 @@ namespace dlib
}
void tensor_conv::get_gradient_for_data (
const bool add_to_output,
const tensor& gradient_input,
const tensor& filters,
tensor& data_gradient
)
{
const float alpha = 1;
const float beta = 1;
const float beta = add_to_output ? 1 : 0;
CHECK_CUDNN(cudnnConvolutionBackwardData(context(),
......@@ -1034,13 +1041,14 @@ namespace dlib
void tensor_conv::
get_gradient_for_filters (
const bool add_to_output,
const tensor& gradient_input,
const tensor& data,
tensor& filters_gradient
)
{
const float alpha = 1;
const float beta = 0;
const float beta = add_to_output ? 1 : 0;
CHECK_CUDNN(cudnnConvolutionBackwardFilter(context(),
&alpha,
descriptor(data),
......
......@@ -203,76 +203,34 @@ namespace dlib
);
void operator() (
const bool add_to_output,
tensor& output,
const tensor& data,
const tensor& filters
);
void operator() (
const bool add_to_output,
resizable_tensor& output,
const tensor& data,
const tensor& filters,
int stride_y,
int stride_x,
int padding_y,
int padding_x
const tensor& filters
);
/*!
requires
- stride_y > 0
- stride_x > 0
- 0 <= padding_y < filters.nr()
- 0 <= padding_x < filters.nc()
- is_same_object(output,data) == false
- is_same_object(output,filters) == false
ensures
- convolves filters over data.
- filters contains filters.num_samples() filters.
- #output.num_samples() == data.num_samples()
- #output.k() == filters.num_samples()
- #output.nr() == 1+(data.nr()-filters.nr()%2)/stride_y
- #output.nc() == 1+(data.nc()-filters.nc()%2)/stride_x
!*/
void get_gradient_for_data (
const bool add_to_output,
const tensor& gradient_input,
const tensor& filters,
tensor& data_gradient
);
/*!
requires
- filters has the same dimensions as the filters object give to the
last call to operator().
- data_gradient has the same dimensions as the data object give to the
last call to operator().
- gradient_input has the same dimensions as the output of operator().
- is_same_object(data_gradient,filters) == false
- is_same_object(data_gradient,gradient_input) == false
ensures
- let OUT be the output of (*this)(OUT,data,filters).
- let f(data,filters) == dot(OUT, gradient_input)
- This function finds the gradient of f() with respect to data
and adds this gradient to data_gradient.
!*/
void get_gradient_for_filters (
const bool add_to_output,
const tensor& gradient_input,
const tensor& data,
tensor& filters_gradient
);
/*!
requires
- filters_gradient has the same dimensions as the filters object give
to the last call to operator().
- data has the same dimensions as the data object give to the last call
to operator().
- gradient_input has the same dimensions as the output of operator().
- is_same_object(filters_gradient,data) == false
- is_same_object(filters_gradient,gradient_input) == false
ensures
- let OUT be the output of (*this)(OUT,data,filters).
- let f(data,filters) == dot(OUT, gradient_input)
- This function finds the gradient of f() with respect to filters
and assigns this gradient to filters_gradient.
!*/
private:
void setup(
void setup(
const tensor& data,
const tensor& filters,
int stride_y,
......@@ -280,14 +238,8 @@ namespace dlib
int padding_y,
int padding_x
);
/*!
requires
- filters.k() == data.k()
- stride_y > 0
- stride_x > 0
- 0 <= padding_y < filters.nr()
- 0 <= padding_x < filters.nc()
!*/
private:
// These variables record the type of data given to the last call to setup().
int stride_y;
......
This diff is collapsed.
......@@ -840,6 +840,221 @@ namespace dlib
>
using con = add_layer<con_<num_filters,nr,nc,stride_y,stride_x>, SUBNET>;
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template <
long _num_filters,
long _nr,
long _nc,
int _stride_y,
int _stride_x,
int _padding_y = _stride_y!=1? 0 : _nr/2,
int _padding_x = _stride_x!=1? 0 : _nc/2
>
class cont_
{
/*!
REQUIREMENTS ON TEMPLATE ARGUMENTS
All of them must be > 0.
Also, we require that:
- 0 <= _padding_y && _padding_y < _nr
- 0 <= _padding_x && _padding_x < _nc
WHAT THIS OBJECT REPRESENTS
This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
defined above. In particular, it defines a transposed convolution layer
that takes an input tensor and transpose convolves (sometimes called
"deconvolution") it with a set of filters and then outputs the results.
This is essentially a convolutional layer that allows fractional strides.
Therefore, you can make output tensors that are larger than the input
tensors using this layer type.
The dimensions of the tensors output by this layer are as follows (letting
IN be the input tensor and OUT the output tensor):
- OUT.num_samples() == IN.num_samples()
- OUT.k() == num_filters()
- OUT.nr() == stride_y()*(IN.nr()-1) + nr() - 2*padding_y()
- OUT.nc() == stride_x()*(IN.nc()-1) + nc() - 2*padding_x()
!*/
public:
cont_(
);
/*!
ensures
- #num_filters() == _num_filters
- #nr() == _nr
- #nc() == _nc
- #stride_y() == _stride_y
- #stride_x() == _stride_x
- #padding_y() == _padding_y
- #padding_x() == _padding_x
- #get_learning_rate_multiplier() == 1
- #get_weight_decay_multiplier() == 1
- #get_bias_learning_rate_multiplier() == 1
- #get_bias_weight_decay_multiplier() == 0
!*/
long num_filters(
) const;
/*!
ensures
- returns the number of filters contained in this layer. The k dimension
of the output tensors produced by this layer will be equal to the number
of filters.
!*/
long nr(
) const;
/*!
ensures
- returns the number of rows in the filters in this layer.
!*/
long nc(
) const;
/*!
ensures
- returns the number of columns in the filters in this layer.
!*/
long stride_y(
) const;
/*!
ensures
- returns the vertical stride used when convolving the filters over an
image. That is, each filter will be moved 1.0/stride_y() pixels down at
a time when it moves over the image.
!*/
long stride_x(
) const;
/*!
ensures
- returns the horizontal stride used when convolving the filters over an
image. That is, each filter will be moved 1.0/stride_x() pixels right at
a time when it moves over the image.
!*/
long padding_y(
) const;
/*!
ensures
- returns the number of pixels of zero padding added to the top and bottom
sides of the image.
!*/
long padding_x(
) const;
/*!
ensures
- returns the number of pixels of zero padding added to the left and right
sides of the image.
!*/
double get_learning_rate_multiplier(
) const;
/*!
ensures
- returns a multiplier number. The interpretation is that this object is
requesting that the learning rate used to optimize its parameters be
multiplied by get_learning_rate_multiplier().
!*/
double get_weight_decay_multiplier(
) const;
/*!
ensures
- returns a multiplier number. The interpretation is that this object is
requesting that the weight decay used to optimize its parameters be
multiplied by get_weight_decay_multiplier().
!*/
void set_learning_rate_multiplier(
double val
);
/*!
requires
- val >= 0
ensures
- #get_learning_rate_multiplier() == val
!*/
void set_weight_decay_multiplier(
double val
);
/*!
requires
- val >= 0
ensures
- #get_weight_decay_multiplier() == val
!*/
double get_bias_learning_rate_multiplier(
) const;
/*!
ensures
- returns a multiplier number. The interpretation is that this object is
requesting that the learning rate used to optimize its bias parameters be
multiplied by get_learning_rate_multiplier()*get_bias_learning_rate_multiplier().
!*/
double get_bias_weight_decay_multiplier(
) const;
/*!
ensures
- returns a multiplier number. The interpretation is that this object is
requesting that the weight decay used to optimize its bias parameters be
multiplied by get_weight_decay_multiplier()*get_bias_weight_decay_multiplier().
!*/
void set_bias_learning_rate_multiplier(
double val
);
/*!
requires
- val >= 0
ensures
- #get_bias_learning_rate_multiplier() == val
!*/
void set_bias_weight_decay_multiplier(
double val
);
/*!
requires
- val >= 0
ensures
- #get_bias_weight_decay_multiplier() == val
!*/
template <typename SUBNET> void setup (const SUBNET& sub);
template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output);
template <typename SUBNET> void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad);
point map_input_to_output(point p) const;
point map_output_to_input(point p) const;
const tensor& get_layer_params() const;
tensor& get_layer_params();
/*!
These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface.
!*/
};
template <
long num_filters,
long nr,
long nc,
int stride_y,
int stride_x,
typename SUBNET
>
using cont = add_layer<cont_<num_filters,nr,nc,stride_y,stride_x>, SUBNET>;
// ----------------------------------------------------------------------------------------
class dropout_
......
......@@ -877,27 +877,50 @@ namespace dlib { namespace tt
) { impl.clear(); }
void operator() (
const bool add_to_output,
tensor& output,
const tensor& data,
const tensor& filters
) { impl(add_to_output,output,data,filters); }
/*!
requires
- setup() has been called. Specifically, setup() has been called like this:
this->setup(data, filters, stride_y, stride_x, padding_y, padding_x);
- is_same_object(output,data) == false
- is_same_object(output,filters) == false
- filters.k() == data.k()
- filters.nr() <= src.nr() + 2*padding_y
- filters.nc() <= src.nc() + 2*padding_x
- #output.num_samples() == data.num_samples()
- #output.k() == filters.num_samples()
- #output.nr() == 1+(data.nr() + 2*padding_y - filters.nr())/stride_y
- #output.nc() == 1+(data.nc() + 2*padding_x - filters.nc())/stride_x
ensures
- Convolves filters over data. If add_to_output==true then we add the
results to output, otherwise we assign to output, overwriting the
previous values in output.
- filters contains filters.num_samples() filters.
!*/
void operator() (
const bool add_to_output,
resizable_tensor& output,
const tensor& data,
const tensor& filters,
int stride_y,
int stride_x,
int padding_y,
int padding_x
) { impl(output,data,filters,stride_y,stride_x,padding_y,padding_x); }
const tensor& filters
) { impl(add_to_output,output,data,filters); }
/*!
requires
- stride_y > 0
- stride_x > 0
- 0 <= padding_y < filters.nr()
- 0 <= padding_x < filters.nc()
- setup() has been called. Specifically, setup() has been called like this:
this->setup(data, filters, stride_y, stride_x, padding_y, padding_x);
- is_same_object(output,data) == false
- is_same_object(output,filters) == false
- filters.k() == data.k()
- filters.nr() <= src.nr() + 2*padding_y
- filters.nc() <= src.nc() + 2*padding_x
ensures
- convolves filters over data.
- Convolves filters over data. If add_to_output==true then we add the
results to output, otherwise we assign to output, overwriting the
previous values in output.
- filters contains filters.num_samples() filters.
- #output.num_samples() == data.num_samples()
- #output.k() == filters.num_samples()
......@@ -906,47 +929,105 @@ namespace dlib { namespace tt
!*/
void get_gradient_for_data (
const bool add_to_output,
const tensor& gradient_input,
const tensor& filters,
tensor& data_gradient
) { impl.get_gradient_for_data(gradient_input,filters,data_gradient); }
) { impl.get_gradient_for_data(add_to_output,gradient_input,filters,data_gradient); }
/*!
requires
- filters has the same dimensions as the filters object given to the last
call to operator().
- data_gradient has the same dimensions as the data object given to the last
call to operator().
- gradient_input has the same dimensions as the last output of operator().
- One of the following must be true:
- filters has the same dimensions as the filters object given to the
last call to operator(). Also, data_gradient has the same dimensions
as the data object given to the last call to operator().
- setup() has been called. Specifically, setup() has been called like this:
this->setup(data_gradient, filters, stride_y, stride_x, padding_y, padding_x);
- gradient_input has the following dimensions:
- gradient_input.num_samples() == data_gradient.num_samples()
- gradient_input.k() == filters.num_samples()
- gradient_input.nr() == 1+(data_gradient.nr() + 2*padding_y - filters.nr())/stride_y
- gradient_input.nc() == 1+(data_gradient.nc() + 2*padding_x - filters.nc())/stride_x
- NOTE, these dimensions are what you would obtain if gradient_input
has the same dimensions as the last output of operator().
- is_same_object(data_gradient,filters) == false
- is_same_object(data_gradient,gradient_input) == false
ensures
- let OUT be the output of (*this)(OUT,data,filters,sx,sy).
- let f(data,filters) == dot(OUT, gradient_input)
- This function finds the gradient of f() with respect to data and adds
this gradient to data_gradient.
- if (add_to_output) then
- This function finds the gradient of f() with respect to data and adds
this gradient to data_gradient.
- else
- This function finds the gradient of f() with respect to data and
assigns this gradient to data_gradient, overwriting the previous
values in data_gradient.
!*/
void get_gradient_for_filters (
const bool add_to_output,
const tensor& gradient_input,
const tensor& data,
tensor& filters_gradient
) { impl.get_gradient_for_filters(gradient_input,data,filters_gradient); }
) { impl.get_gradient_for_filters(add_to_output,gradient_input,data,filters_gradient); }
/*!
requires
- filters_gradient has the same dimensions as the filters object given to
the last call to operator().
- data has the same dimensions as the data object given to the last call to
operator().
- gradient_input has the same dimensions as the last output of operator().
- One of the following must be true:
- filters_gradient has the same dimensions as the filters object given
to the last call to operator(). Also, data has the same dimensions
as the data object given to the last call to operator().
- setup() has been called. Specifically, setup() has been called like this:
this->setup(data, filters_gradient, stride_y, stride_x, padding_y, padding_x);
- gradient_input has the following dimensions:
- gradient_input.num_samples() == data.num_samples()
- gradient_input.k() == filters.num_samples()
- gradient_input.nr() == 1+(data.nr() + 2*padding_y - filters.nr())/stride_y
- gradient_input.nc() == 1+(data.nc() + 2*padding_x - filters.nc())/stride_x
- NOTE, these dimensions are what you would obtain if gradient_input
has the same dimensions as the last output of operator().
- is_same_object(filters_gradient,data) == false
- is_same_object(filters_gradient,gradient_input) == false
ensures
- let OUT be the output of (*this)(OUT,data,filters,sx,sy).
- let f(data,filters) == dot(OUT, gradient_input)
- This function finds the gradient of f() with respect to filters and assigns
this gradient to filters_gradient.
- if (add_to_output) then
- This function finds the gradient of f() with respect to filters and
adds this gradient to filters_gradient.
- else
- This function finds the gradient of f() with respect to filters and
assigns this gradient to filters_gradient, overwriting the previous
values in filters_gradient.
!*/
void setup(
const tensor& data,
const tensor& filters,
int stride_y,
int stride_x,
int padding_y,
int padding_x
) {impl.setup(data,filters,stride_y,stride_x,padding_y,padding_x); }
/*!
requires
- filters.k() == data.k()
- stride_y > 0
- stride_x > 0
- 0 <= padding_y < filters.nr()
- 0 <= padding_x < filters.nc()
ensures
- When operator() is called, the output tensor will have these dimensions:
- output.nr() == 1+(data.nr() + 2*padding_y - filters.nr())/stride_y
- output.nc() == 1+(data.nc() + 2*padding_x - filters.nc())/stride_x
- output.num_samples() == data.num_samples()
- output.k() == filters.num_samples()
- The point of setup() is to allow this object to gather information about
all the tensor sizes and filter layouts involved in the computation. In
particular, the reason the tensors are input into setup() is just to
observe their sizes. setup() doesn't do anything with the contents of
the tensors, or store any kind of references to the data or filter
tensors.
!*/
private:
#ifdef DLIB_USE_CUDA
cuda::tensor_conv impl;
......
......@@ -805,8 +805,18 @@ namespace
padding_y = (filters.nr()-data.nr()+1)/2;
if (!(filters.nc() <= data.nc() + 2*padding_x))
padding_x = (filters.nc()-data.nc()+1)/2;
conv1(output1, data, filters, stride_y,stride_x, padding_y, padding_x);
conv2(output2, data, filters, stride_y,stride_x, padding_y, padding_x);
conv1.setup(data,filters,stride_y,stride_x,padding_y,padding_x);
conv1(false, output1, data, filters);
conv2.setup(data,filters,stride_y,stride_x,padding_y,padding_x);
conv2(false, output2, data, filters);
dlog << LINFO << "forward error: "<< max(abs(mat(output1)-mat(output2)));
DLIB_TEST_MSG(max(abs(mat(output1)-mat(output2))) < 1e-3, max(abs(mat(output1)-mat(output2)))
<<"\n\t padding_y: "<< padding_y
<<"\n\t padding_x: "<< padding_x
);
conv1(true, output1, data, filters);
conv2(true, output2, data, filters);
dlog << LINFO << "forward error: "<< max(abs(mat(output1)-mat(output2)));
DLIB_TEST_MSG(max(abs(mat(output1)-mat(output2))) < 1e-3, max(abs(mat(output1)-mat(output2)))
<<"\n\t padding_y: "<< padding_y
......@@ -824,8 +834,14 @@ namespace
data_gradient1 = 1;
data_gradient2 = 1;
conv1.get_gradient_for_data(gi, filters, data_gradient1);
conv2.get_gradient_for_data(gi, filters, data_gradient2);
conv1.get_gradient_for_data(true, gi, filters, data_gradient1);
conv2.get_gradient_for_data(true, gi, filters, data_gradient2);
dlog << LINFO << "data gradient error: "<< max(abs(mat(data_gradient1)-mat(data_gradient2)));
DLIB_TEST(max(abs(mat(data_gradient1)-mat(data_gradient2))) < 1e-3);
conv1.get_gradient_for_data(false, gi, filters, data_gradient1);
conv2.get_gradient_for_data(false, gi, filters, data_gradient2);
dlog << LINFO << "data gradient error: "<< max(abs(mat(data_gradient1)-mat(data_gradient2)));
DLIB_TEST(max(abs(mat(data_gradient1)-mat(data_gradient2))) < 1e-3);
......@@ -840,8 +856,15 @@ namespace
filter_gradient1 = 1;
filter_gradient2 = 1;
conv1.get_gradient_for_filters(gi, data, filter_gradient1);
conv2.get_gradient_for_filters(gi, data, filter_gradient2);
conv1.get_gradient_for_filters(false, gi, data, filter_gradient1);
conv2.get_gradient_for_filters(false, gi, data, filter_gradient2);
dlog << LINFO << "filter gradient error: "<< max(abs(mat(filter_gradient1)-mat(filter_gradient2)));
DLIB_TEST_MSG(max(abs(mat(filter_gradient1)-mat(filter_gradient2))) < 1e-3, max(abs(mat(filter_gradient1)-mat(filter_gradient2))));
conv1.get_gradient_for_filters(true, gi, data, filter_gradient1);
conv2.get_gradient_for_filters(true, gi, data, filter_gradient2);
dlog << LINFO << "filter gradient error: "<< max(abs(mat(filter_gradient1)-mat(filter_gradient2)));
DLIB_TEST_MSG(max(abs(mat(filter_gradient1)-mat(filter_gradient2))) < 1e-3, max(abs(mat(filter_gradient1)-mat(filter_gradient2))));
......@@ -1473,6 +1496,36 @@ namespace
auto res = test_layer(l);
DLIB_TEST_MSG(res, res);
}
{
print_spinner();
cont_<3,3,3,2,2,0,0> l;
auto res = test_layer(l);
DLIB_TEST_MSG(res, res);
}
{
print_spinner();
cont_<3,3,3,2,2> l;
auto res = test_layer(l);
DLIB_TEST_MSG(res, res);
}
{
print_spinner();
cont_<3,3,3,1,1> l;
auto res = test_layer(l);
DLIB_TEST_MSG(res, res);
}
{
print_spinner();
cont_<3,3,3,1,1,0,0> l;
auto res = test_layer(l);
DLIB_TEST_MSG(res, res);
}
{
print_spinner();
cont_<3,2,2,2,2> l;
auto res = test_layer(l);
DLIB_TEST_MSG(res, res);
}
{
print_spinner();
con_<3,2,2,2,2> l;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment