Commit fe6e2457 authored by Davis King's avatar Davis King

Added CPU convolution implementation.

parent fdfe77d1
......@@ -1362,6 +1362,174 @@ namespace dlib
}
// ------------------------------------------------------------------------------------
// ------------------------------------------------------------------------------------
// ------------------------------------------------------------------------------------
void img2col(
matrix<float>& output,
const tensor& data,
long n,
long filter_nr,
long filter_nc,
long stride_y,
long stride_x
)
{
const auto d = data.host() + data.k()*data.nr()*data.nc()*n;
const rectangle boundary = get_rect(data);
const long out_nr = 1+(data.nr()-filter_nr%2)/stride_y;
const long out_nc = 1+(data.nc()-filter_nc%2)/stride_x;
output.set_size(out_nr*out_nc,
data.k()*filter_nr*filter_nc);
DLIB_CASSERT(output.size() != 0,"");
float* t = &output(0,0);
// now fill in the Toeplitz output matrix for the n-th sample in data.
size_t cnt = 0;
for (long r = -(1-filter_nr%2); r < data.nr(); r+=stride_y)
{
for (long c = -(1-filter_nc%2); c < data.nc(); c+=stride_x)
{
for (long k = 0; k < data.k(); ++k)
{
for (long y = 0; y < filter_nr; ++y)
{
for (long x = 0; x < filter_nc; ++x)
{
DLIB_CASSERT(cnt < output.size(),"");
long xx = c-x+filter_nc/2;
long yy = r-y+filter_nr/2;
if (boundary.contains(xx,yy))
*t = d[(k*data.nr() + yy)*data.nc() + xx];
else
*t = 0;
++t;
++cnt;
}
}
}
}
}
}
void col2img(
const matrix<float>& output,
tensor& data,
long n,
long filter_nr,
long filter_nc,
long stride_y,
long stride_x
)
{
const auto d = data.host() + data.k()*data.nr()*data.nc()*n;
const rectangle boundary = get_rect(data);
DLIB_CASSERT(output.size() != 0,"");
const float* t = &output(0,0);
// now fill in the Toeplitz output matrix for the n-th sample in data.
for (long r = -(1-filter_nr%2); r < data.nr(); r+=stride_y)
{
for (long c = -(1-filter_nc%2); c < data.nc(); c+=stride_x)
{
for (long k = 0; k < data.k(); ++k)
{
for (long y = 0; y < filter_nr; ++y)
{
for (long x = 0; x < filter_nc; ++x)
{
long xx = c-x+filter_nc/2;
long yy = r-y+filter_nr/2;
if (boundary.contains(xx,yy))
d[(k*data.nr() + yy)*data.nc() + xx] += *t;
++t;
}
}
}
}
}
}
void tensor_conv::operator() (
resizable_tensor& output,
const tensor& data,
const tensor& filters,
int stride_y,
int stride_x
)
{
DLIB_CASSERT(is_same_object(output,data) == false,"");
DLIB_CASSERT(is_same_object(output,filters) == false,"");
DLIB_CASSERT(filters.k() == data.k(),"");
DLIB_CASSERT(stride_y > 0 && stride_x > 0,"");
output.set_size(data.num_samples(),
filters.num_samples(),
1+(data.nr()-filters.nr()%2)/stride_y,
1+(data.nc()-filters.nc()%2)/stride_x);
matrix<float> temp;
for (long n = 0; n < data.num_samples(); ++n)
{
img2col(temp, data, n, filters.nr(), filters.nc(), stride_y, stride_x);
output.set_sample(n, mat(filters)*trans(temp));
}
last_stride_y = stride_y;
last_stride_x = stride_x;
}
// ------------------------------------------------------------------------------------
void tensor_conv::
get_gradient_for_data (
const tensor& gradient_input,
const tensor& filters,
tensor& data_gradient
)
{
matrix<float> temp;
for (long n = 0; n < gradient_input.num_samples(); ++n)
{
auto gi = mat(gradient_input.host()+gradient_input.k()*gradient_input.nr()*gradient_input.nc()*n,
gradient_input.k(),
gradient_input.nr()*gradient_input.nc());
temp = trans(gi)*mat(filters);
col2img(temp, data_gradient, n, filters.nr(), filters.nc(), last_stride_y, last_stride_x);
}
}
// ------------------------------------------------------------------------------------
void tensor_conv::
get_gradient_for_filters (
const tensor& gradient_input,
const tensor& data,
tensor& filters_gradient
)
{
matrix<float> temp;
for (long n = 0; n < gradient_input.num_samples(); ++n)
{
auto gi = mat(gradient_input.host()+gradient_input.k()*gradient_input.nr()*gradient_input.nc()*n,
gradient_input.k(),
gradient_input.nr()*gradient_input.nc());
img2col(temp, data, n, filters_gradient.nr(), filters_gradient.nc(), last_stride_y, last_stride_x);
if (n == 0)
filters_gradient = gi*temp;
else
filters_gradient += gi*temp;
}
}
// ------------------------------------------------------------------------------------
// ------------------------------------------------------------------------------------
// ------------------------------------------------------------------------------------
......
......@@ -286,6 +286,45 @@ namespace dlib
};
// -----------------------------------------------------------------------------------
class tensor_conv
{
public:
tensor_conv(const tensor_conv&) = delete;
tensor_conv& operator=(const tensor_conv&) = delete;
tensor_conv() {}
void clear(
) {}
void operator() (
resizable_tensor& output,
const tensor& data,
const tensor& filters,
int stride_y,
int stride_x
);
void get_gradient_for_data (
const tensor& gradient_input,
const tensor& filters,
tensor& data_gradient
);
void get_gradient_for_filters (
const tensor& gradient_input,
const tensor& data,
tensor& filters_gradient
);
private:
long last_stride_y;
long last_stride_x;
};
// -----------------------------------------------------------------------------------
}
......
......@@ -409,73 +409,6 @@ namespace dlib { namespace tt
#endif
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
tensor_conv::
tensor_conv()
{
}
void tensor_conv::
clear(
)
{
#ifdef DLIB_USE_CUDA
impl.clear();
#else
// TODO
DLIB_CASSERT(false,"");
#endif
}
void tensor_conv::
operator() (
resizable_tensor& output,
const tensor& data,
const tensor& filters,
int stride_y,
int stride_x
)
{
#ifdef DLIB_USE_CUDA
impl(output, data, filters, stride_y, stride_x);
#else
// TODO
DLIB_CASSERT(false,"");
#endif
}
void tensor_conv::
get_gradient_for_data (
const tensor& gradient_input,
const tensor& filters,
tensor& data_gradient
)
{
#ifdef DLIB_USE_CUDA
impl.get_gradient_for_data(gradient_input, filters, data_gradient);
#else
// TODO
DLIB_CASSERT(false,"");
#endif
}
void tensor_conv::
get_gradient_for_filters (
const tensor& gradient_input,
const tensor& data,
tensor& filters_gradient
)
{
#ifdef DLIB_USE_CUDA
impl.get_gradient_for_filters(gradient_input, data, filters_gradient);
#else
// TODO
DLIB_CASSERT(false,"");
#endif
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
......
......@@ -563,10 +563,10 @@ namespace dlib { namespace tt
tensor_conv(const tensor_conv&) = delete;
tensor_conv& operator=(const tensor_conv&) = delete;
tensor_conv();
tensor_conv() {}
void clear(
);
) { impl.clear(); }
void operator() (
resizable_tensor& output,
......@@ -574,38 +574,39 @@ namespace dlib { namespace tt
const tensor& filters,
int stride_y,
int stride_x
);
) { impl(output,data,filters,stride_y,stride_x); }
/*!
requires
- stride_y > 0
- stride_x > 0
- is_same_object(output,data) == false
- is_same_object(output,filters) == false
- filters.k() == data.k()
ensures
- convolves filters over data.
- filters contains filters.num_samples() filters.
- #output.num_samples() == data.num_samples()
- #output.k() == filters.num_samples()
- #output.nr() == 1+(data.nr()-filters.nr()%2)/stride_y
- #output.nc() == 1+(data.nc()-filters.nc()%2)/stride_x
- filters contains filters.num_samples() filters.
- #output.num_samples() == data.num_samples()
- #output.k() == filters.num_samples()
- #output.nr() == 1+(data.nr()-filters.nr()%2)/stride_y
- #output.nc() == 1+(data.nc()-filters.nc()%2)/stride_x
!*/
void get_gradient_for_data (
const tensor& gradient_input,
const tensor& filters,
tensor& data_gradient
);
) { impl.get_gradient_for_data(gradient_input,filters,data_gradient); }
/*!
requires
- filters has the same dimensions as the filters object give to the last
- filters has the same dimensions as the filters object given to the last
call to operator().
- data_gradient has the same dimensions as the data object give to the last
- data_gradient has the same dimensions as the data object given to the last
call to operator().
- gradient_input has the same dimensions as the output of operator().
- gradient_input has the same dimensions as the last output of operator().
- is_same_object(data_gradient,filters) == false
- is_same_object(data_gradient,gradient_input) == false
ensures
- let OUT be the output of (*this)(OUT,data,filters).
- let OUT be the output of (*this)(OUT,data,filters,sx,sy).
- let f(data,filters) == dot(OUT, gradient_input)
- This function finds the gradient of f() with respect to data and adds
this gradient to data_gradient.
......@@ -615,18 +616,18 @@ namespace dlib { namespace tt
const tensor& gradient_input,
const tensor& data,
tensor& filters_gradient
);
) { impl.get_gradient_for_filters(gradient_input,data,filters_gradient); }
/*!
requires
- filters_gradient has the same dimensions as the filters object give to
- filters_gradient has the same dimensions as the filters object given to
the last call to operator().
- data has the same dimensions as the data object give to the last call to
- data has the same dimensions as the data object given to the last call to
operator().
- gradient_input has the same dimensions as the output of operator().
- gradient_input has the same dimensions as the last output of operator().
- is_same_object(filters_gradient,data) == false
- is_same_object(filters_gradient,gradient_input) == false
ensures
- let OUT be the output of (*this)(OUT,data,filters).
- let OUT be the output of (*this)(OUT,data,filters,sx,sy).
- let f(data,filters) == dot(OUT, gradient_input)
- This function finds the gradient of f() with respect to filters and assigns
this gradient to filters_gradient.
......@@ -636,7 +637,7 @@ namespace dlib { namespace tt
#ifdef DLIB_USE_CUDA
cuda::tensor_conv impl;
#else
// TODO
cpu::tensor_conv impl;
#endif
};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment