Commit 122f2fa6 authored by Davis King's avatar Davis King

Upgraded to cuDNN v4.0. This means changing the binding to max_pool a little

since that's a little different in cuDNN.  I also removed my CUDA code for
doing batch normalization and replaced it with cuDNN's new batch normalization
methods.

Finally, I forgot to add a convolutional option to the bn_ object.  Now it has
one so you can set the mode however you like, either BATCH_NORM_FC or
BATCH_NORM_CONV.
parent ea2947d0
......@@ -476,7 +476,7 @@ if (NOT TARGET dlib)
set(DLIB_USE_CUDA OFF CACHE STRING ${DLIB_USE_BLAS_STR} FORCE )
toggle_preprocessor_switch(DLIB_USE_CUDA)
if (NOT cudnn OR NOT cudnn_include)
message(STATUS "***cuDNN NOT FOUND. DLIB WILL NOT USE CUDA.***")
message(STATUS "***cuDNN V4.0 OR GREATER NOT FOUND. DLIB WILL NOT USE CUDA.***")
endif()
if (NOT COMPILER_CAN_DO_CPP_11)
message(STATUS "***Dlib CUDA support requires C++11 but your compiler doesn't support it.***")
......
......@@ -234,15 +234,79 @@ namespace dlib
// -----------------------------------------------------------------------------------
void batch_normalize_inference (
resizable_tensor& dest,
const tensor& src,
const tensor& gamma,
const tensor& beta,
const tensor& running_means,
const tensor& running_invstds
)
{
DLIB_CASSERT(
gamma.num_samples() == 1 &&
gamma.nr() == src.nr() &&
gamma.nc() == src.nc() &&
gamma.k() == src.k() &&
have_same_dimensions(gamma, beta) &&
have_same_dimensions(gamma, running_means) &&
have_same_dimensions(gamma, running_invstds),
"\ngamma.num_samples(): " << gamma.num_samples() <<
"\ngamma.k(): " << gamma.k() <<
"\ngamma.nr(): " << gamma.nr() <<
"\ngamma.nc(): " << gamma.nc() <<
"\nbeta.num_samples(): " << beta.num_samples() <<
"\nbeta.k(): " << beta.k() <<
"\nbeta.nr(): " << beta.nr() <<
"\nbeta.nc(): " << beta.nc() <<
"\nrunning_means.num_samples(): " << running_means.num_samples() <<
"\nrunning_means.k(): " << running_means.k() <<
"\nrunning_means.nr(): " << running_means.nr() <<
"\nrunning_means.nc(): " << running_means.nc() <<
"\nrunning_invstds.num_samples(): " << running_invstds.num_samples() <<
"\nrunning_invstds.k(): " << running_invstds.k() <<
"\nrunning_invstds.nr(): " << running_invstds.nr() <<
"\nrunning_invstds.nc(): " << running_invstds.nc() <<
"\nsrc.k(): " << src.k() <<
"\nsrc.nr(): " << src.nr() <<
"\nsrc.nc(): " << src.nc()
);
dest.copy_size(src);
auto d = dest.host();
auto s = src.host();
auto g = gamma.host();
auto b = beta.host();
auto m = running_means.host();
auto i = running_invstds.host();
const long num = src.k()*src.nr()*src.nc();
for (long n = 0; n < src.num_samples(); ++n)
{
for (long k = 0; k < num; ++k)
{
*d = g[k]*(*s - m[k])*i[k] + b[k];
++d;
++s;
}
}
}
void batch_normalize (
resizable_tensor& dest,
resizable_tensor& means,
resizable_tensor& invstds,
const double averaging_factor,
resizable_tensor& running_means,
resizable_tensor& running_invstds,
const tensor& src,
const tensor& gamma,
const tensor& beta
)
{
DLIB_CASSERT(0 <= averaging_factor && averaging_factor <= 1, "averaging_factor: " << averaging_factor);
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_means,means),"");
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_invstds,invstds),"");
DLIB_CASSERT(
src.num_samples() > 1 &&
gamma.num_samples() == 1 &&
......@@ -289,12 +353,11 @@ namespace dlib
// copy data back to host
invstds.host(); means.host();
const float eps = 0.00001;
// compute variances
for (long i = 0; i < num; ++i)
{
auto actual_var = p_invstds[i] - p_means[i]*p_means[i];
p_invstds[i] = 1.0f/std::sqrt(actual_var+eps);
p_invstds[i] = 1.0f/std::sqrt(actual_var+BATCH_NORM_EPS);
}
p_src = src.host();
......@@ -311,9 +374,23 @@ namespace dlib
++p_dest;
}
}
// now keep track of the running means and invstds
running_means.copy_size(means);
running_invstds.copy_size(invstds);
if (averaging_factor != 1)
{
running_means = (1-averaging_factor)*mat(running_means) + averaging_factor*mat(means);
running_invstds = (1-averaging_factor)*mat(running_invstds) + averaging_factor*mat(invstds);
}
else
{
running_means = means;
running_invstds = invstds;
}
}
void batch_normalize_gradient::operator() (
void batch_normalize_gradient (
const tensor& gradient_input,
const tensor& means,
const tensor& invstds,
......@@ -326,6 +403,7 @@ namespace dlib
{
const long num = src.k()*src.nr()*src.nc();
DLIB_CASSERT(src.num_samples() > 1, "");
DLIB_CASSERT(num == means.size(),"");
DLIB_CASSERT(num == invstds.size(),"");
DLIB_CASSERT(num == gamma.size(),"");
......@@ -344,6 +422,7 @@ namespace dlib
const auto p_invstds = invstds.host();
const auto p_means = means.host();
resizable_tensor dvars, dmeans;
dvars.copy_size(invstds);
dmeans.copy_size(means);
dvars = 0;
......@@ -406,15 +485,82 @@ namespace dlib
// ----------------------------------------------------------------------------------------
void batch_normalize_conv_inference (
resizable_tensor& dest,
const tensor& src,
const tensor& gamma,
const tensor& beta,
const tensor& running_means,
const tensor& running_invstds
)
{
DLIB_CASSERT(
gamma.num_samples() == 1 &&
gamma.nr() == 1 &&
gamma.nc() == 1 &&
gamma.k() == src.k() &&
have_same_dimensions(gamma, beta) &&
have_same_dimensions(gamma, running_means) &&
have_same_dimensions(gamma, running_invstds),
"\ngamma.num_samples(): " << gamma.num_samples() <<
"\ngamma.k(): " << gamma.k() <<
"\ngamma.nr(): " << gamma.nr() <<
"\ngamma.nc(): " << gamma.nc() <<
"\nbeta.num_samples(): " << beta.num_samples() <<
"\nbeta.k(): " << beta.k() <<
"\nbeta.nr(): " << beta.nr() <<
"\nbeta.nc(): " << beta.nc() <<
"\nrunning_means.num_samples(): " << running_means.num_samples() <<
"\nrunning_means.k(): " << running_means.k() <<
"\nrunning_means.nr(): " << running_means.nr() <<
"\nrunning_means.nc(): " << running_means.nc() <<
"\nrunning_invstds.num_samples(): " << running_invstds.num_samples() <<
"\nrunning_invstds.k(): " << running_invstds.k() <<
"\nrunning_invstds.nr(): " << running_invstds.nr() <<
"\nrunning_invstds.nc(): " << running_invstds.nc() <<
"\nsrc.k(): " << src.k() <<
"\nsrc.nr(): " << src.nr() <<
"\nsrc.nc(): " << src.nc()
);
dest.copy_size(src);
auto d = dest.host();
auto s = src.host();
auto g = gamma.host();
auto b = beta.host();
auto m = running_means.host();
auto i = running_invstds.host();
const long num = src.nr()*src.nc();
for (long n = 0; n < src.num_samples(); ++n)
{
for (long k = 0; k < src.k(); ++k)
{
for (long j = 0; j < num; ++j)
{
*d = g[k]*(*s - m[k])*i[k] + b[k];
++d;
++s;
}
}
}
}
void batch_normalize_conv (
resizable_tensor& dest,
resizable_tensor& means,
resizable_tensor& invstds,
const double averaging_factor,
resizable_tensor& running_means,
resizable_tensor& running_invstds,
const tensor& src,
const tensor& gamma,
const tensor& beta
)
{
DLIB_CASSERT(0 <= averaging_factor && averaging_factor <= 1, "averaging_factor: " << averaging_factor);
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_means,means),"");
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_invstds,invstds),"");
DLIB_CASSERT(
src.num_samples() > 1 &&
gamma.num_samples() == 1 &&
......@@ -468,13 +614,12 @@ namespace dlib
// copy data back to host
invstds.host(); means.host();
const float eps = 0.00001;
p_src = src.host();
// compute variances
for (long k = 0; k < src.k(); ++k)
{
float actual_var = p_invstds[k] - p_means[k]*p_means[k];
p_invstds[k] = 1.0f/std::sqrt(actual_var + eps);
p_invstds[k] = 1.0f/std::sqrt(actual_var + BATCH_NORM_EPS);
}
p_src = src.host();
......@@ -492,9 +637,23 @@ namespace dlib
}
}
}
// now keep track of the running means and invstds
running_means.copy_size(means);
running_invstds.copy_size(invstds);
if (averaging_factor != 1)
{
running_means = (1-averaging_factor)*mat(running_means) + averaging_factor*mat(means);
running_invstds = (1-averaging_factor)*mat(running_invstds) + averaging_factor*mat(invstds);
}
else
{
running_means = means;
running_invstds = invstds;
}
}
void batch_normalize_conv_gradient::operator() (
void batch_normalize_conv_gradient(
const tensor& gradient_input,
const tensor& means,
const tensor& invstds,
......@@ -507,6 +666,7 @@ namespace dlib
{
const long num = src.nr()*src.nc();
DLIB_CASSERT(src.num_samples() > 1, "");
DLIB_CASSERT(src.k() == means.size(),"");
DLIB_CASSERT(src.k() == invstds.size(),"");
DLIB_CASSERT(src.k() == gamma.size(),"");
......@@ -526,6 +686,7 @@ namespace dlib
const auto p_invstds = invstds.host();
const auto p_means = means.host();
resizable_tensor dvars, dmeans;
dvars.copy_size(invstds);
dmeans.copy_size(means);
dvars = 0;
......
......@@ -13,6 +13,10 @@ namespace dlib
namespace cpu
{
// ----------------------------------------------------------------------------------------
const double BATCH_NORM_EPS = 0.00001;
// -----------------------------------------------------------------------------------
void multiply (
......@@ -73,19 +77,28 @@ namespace dlib
// -----------------------------------------------------------------------------------
void batch_normalize_inference (
resizable_tensor& dest,
const tensor& src,
const tensor& gamma,
const tensor& beta,
const tensor& running_means,
const tensor& running_invstds
);
void batch_normalize (
resizable_tensor& dest,
resizable_tensor& means,
resizable_tensor& invstds,
const double averaging_factor,
resizable_tensor& running_means,
resizable_tensor& running_invstds,
const tensor& src,
const tensor& gamma,
const tensor& beta
);
class batch_normalize_gradient
{
public:
void operator() (
void batch_normalize_gradient (
const tensor& gradient_input,
const tensor& means,
const tensor& invstds,
......@@ -95,23 +108,29 @@ namespace dlib
tensor& gamma_grad,
tensor& beta_grad
);
private:
resizable_tensor dvars, dmeans;
};
void batch_normalize_conv_inference (
resizable_tensor& dest,
const tensor& src,
const tensor& gamma,
const tensor& beta,
const tensor& running_means,
const tensor& running_invstds
);
void batch_normalize_conv (
resizable_tensor& dest,
resizable_tensor& means,
resizable_tensor& invstds,
const double averaging_factor,
resizable_tensor& running_means,
resizable_tensor& running_invstds,
const tensor& src,
const tensor& gamma,
const tensor& beta
);
class batch_normalize_conv_gradient
{
public:
void operator() (
void batch_normalize_conv_gradient (
const tensor& gradient_input,
const tensor& means,
const tensor& invstds,
......@@ -121,9 +140,6 @@ namespace dlib
tensor& gamma_grad,
tensor& beta_grad
);
private:
resizable_tensor dvars, dmeans;
};
// -----------------------------------------------------------------------------------
......
......@@ -239,509 +239,6 @@ namespace dlib
}
// -----------------------------------------------------------------------------------
// -----------------------------------------------------------------------------------
__global__ void _cuda_batch_normalize(
float* dest,
float* means,
float* invstds,
const float* src,
const float* gamma,
const float* beta,
long num,
long num_samples
)
{
const float eps = 0.00001;
const float invnum = 1.0f/num_samples;
for (auto i : grid_stride_range(0, num))
{
means[i] = 0;
invstds[i] = 0;
for (long n = 0; n < num_samples; ++n)
{
float val = src[n*num+i];
means[i] += val;
invstds[i] += val*val;
}
means[i] *= invnum;
invstds[i] *= invnum;
float actual_var = invstds[i] - means[i]*means[i];
invstds[i] = 1.0f/::sqrt(actual_var+eps);
for (long n = 0; n < num_samples; ++n)
{
long idx = n*num+i;
float temp = (src[idx] - means[i])*invstds[i];
dest[idx] = temp*gamma[i] + beta[i];
}
}
}
void batch_normalize (
resizable_tensor& dest,
resizable_tensor& means,
resizable_tensor& invstds,
const tensor& src,
const tensor& gamma,
const tensor& beta
)
{
DLIB_CASSERT(
src.num_samples() > 1 &&
gamma.num_samples() == 1 &&
beta.num_samples() == 1 &&
gamma.nr() == beta.nr() && beta.nr() == src.nr() &&
gamma.nc() == beta.nc() && beta.nc() == src.nc() &&
gamma.k() == beta.k() && beta.k() == src.k(),
"\ngamma.num_samples(): " << gamma.num_samples() <<
"\ngamma.k(): " << gamma.k() <<
"\ngamma.nr(): " << gamma.nr() <<
"\ngamma.nc(): " << gamma.nc() <<
"\nbeta.num_samples(): " << beta.num_samples() <<
"\nbeta.k(): " << beta.k() <<
"\nbeta.nr(): " << beta.nr() <<
"\nbeta.nc(): " << beta.nc() <<
"\nsrc.k(): " << src.k() <<
"\nsrc.nr(): " << src.nr() <<
"\nsrc.nc(): " << src.nc()
);
dest.copy_size(src);
means.set_size(1, src.k(), src.nr(), src.nc());
invstds.set_size(1, src.k(), src.nr(), src.nc());
_cuda_batch_normalize<<<512,512>>>(dest.device(),
means.device(),
invstds.device(),
src.device(),
gamma.device(),
beta.device(),
means.size(),
src.num_samples());
}
__global__ void _cuda_batch_normalize_gradient(
const float* grad,
const float* means,
const float* invstds,
const float* src,
const float* gamma,
float* src_grad,
float* gamma_grad,
float* beta_grad,
float* dmeans,
float* dvars,
long num,
long num_samples
)
{
const float invnum = 1.0f/num_samples;
for (auto i : grid_stride_range(0, num))
{
dvars[i] = 0;
dmeans[i] = 0;
gamma_grad[i] = 0;
beta_grad[i] = 0;
for (long n = 0; n < num_samples; ++n)
{
const long idx = n*num+i;
const float x_hat = (src[idx] - means[i])*invstds[i];
beta_grad[i] += grad[idx];
gamma_grad[i] += grad[idx]*x_hat;
const float dx = grad[idx] * gamma[i];
dvars[i] += dx*(src[idx] - means[i])*-0.5*::pow(invstds[i], 3.0f);
}
for (long n = 0; n < num_samples; ++n)
{
const long idx = n*num+i;
const float dx = grad[idx]*gamma[i];
dmeans[i] += dx*-invstds[i] + dvars[i] * -2*(src[idx] - means[i])*invnum;
}
for (long n = 0; n < num_samples; ++n)
{
const long idx = n*num+i;
const float dx = grad[idx]*gamma[i];
src_grad[idx] += dx*invstds[i] +
dvars[i] *2*(src[idx] - means[i])*invnum +
dmeans[i]*invnum;
}
}
}
void batch_normalize_gradient::operator() (
const tensor& gradient_input,
const tensor& means,
const tensor& invstds,
const tensor& src,
const tensor& gamma,
tensor& src_grad,
tensor& gamma_grad,
tensor& beta_grad
)
{
const long num = src.k()*src.nr()*src.nc();
DLIB_CASSERT(num == means.size(),"");
DLIB_CASSERT(num == invstds.size(),"");
DLIB_CASSERT(num == gamma.size(),"");
DLIB_CASSERT(num == gamma_grad.size(),"");
DLIB_CASSERT(num == beta_grad.size(),"");
DLIB_CASSERT(have_same_dimensions(gradient_input, src),"");
DLIB_CASSERT(have_same_dimensions(gradient_input, src_grad),"");
dvars.copy_size(invstds);
dmeans.copy_size(means);
_cuda_batch_normalize_gradient<<<512,512>>>(
gradient_input.device(),
means.device(),
invstds.device(),
src.device(),
gamma.device(),
src_grad.device(),
gamma_grad.device(),
beta_grad.device(),
dmeans.device(),
dvars.device(),
num,
src.num_samples());
}
// ----------------------------------------------------------------------------------------
__global__ void _cuda_batch_normalize_conv1(
float* dest,
float* means,
float* invstds,
const float* src,
const float* gamma,
const float* beta,
long num_k,
long num_samples,
long num_pixels
)
{
for (long k = 0; k < num_k; ++k)
{
float mval = 0;
float ival = 0;
// Now do two parallel reductions to compute the first two moments of the
// data.
for(auto j : grid_stride_range(0, num_samples*num_pixels))
{
long i = j%num_pixels;
long n = j/num_pixels;
float val = src[n*num_k*num_pixels + k*num_pixels +i];
mval += val;
ival += val*val;
}
warp_reduce_atomic_add(means[k], mval);
warp_reduce_atomic_add(invstds[k], ival);
}
}
__global__ void _cuda_batch_normalize_conv2(
float* means,
float* invstds,
long num_k,
long num_samples,
long num_pixels
)
{
const float scale = 1.0f/(num_samples*num_pixels);
const float eps = 0.00001;
for (auto k : grid_stride_range(0, num_k))
{
means[k] *= scale;
auto actual_var = scale*invstds[k] - means[k]*means[k];
invstds[k] = 1.0f/::sqrt(actual_var + eps);
}
}
__global__ void _cuda_batch_normalize_conv3(
float* dest,
float* means,
float* invstds,
const float* src,
const float* gamma,
const float* beta,
long num_k,
long num_samples,
long num_pixels
)
{
for (long k = 0; k < num_k; ++k)
{
for(auto j : grid_stride_range(0, num_samples*num_pixels))
{
long i = j%num_pixels;
long n = j/num_pixels;
i = n*num_k*num_pixels + k*num_pixels +i;
dest[i] = (src[i] - means[k])*invstds[k];
dest[i] = dest[i]*gamma[k] + beta[k];
}
}
}
void batch_normalize_conv (
resizable_tensor& dest,
resizable_tensor& means,
resizable_tensor& invstds,
const tensor& src,
const tensor& gamma,
const tensor& beta
)
{
DLIB_CASSERT(
src.num_samples() > 1 &&
gamma.num_samples() == 1 &&
beta.num_samples() == 1 &&
gamma.nr() == 1 &&
beta.nr() == 1 &&
gamma.nc() == 1 &&
beta.nc() == 1 &&
gamma.k() == beta.k() && beta.k() == src.k(),
"\ngamma.num_samples(): " << gamma.num_samples() <<
"\ngamma.k(): " << gamma.k() <<
"\ngamma.nr(): " << gamma.nr() <<
"\ngamma.nc(): " << gamma.nc() <<
"\nbeta.num_samples(): " << beta.num_samples() <<
"\nbeta.k(): " << beta.k() <<
"\nbeta.nr(): " << beta.nr() <<
"\nbeta.nc(): " << beta.nc() <<
"\nsrc.k(): " << src.k() <<
"\nsrc.nr(): " << src.nr() <<
"\nsrc.nc(): " << src.nc()
);
dest.copy_size(src);
means.set_size(1, src.k());
invstds.set_size(1, src.k());
means = 0;
invstds = 0;
_cuda_batch_normalize_conv1<<<512,512>>>(dest.device(),
means.device(),
invstds.device(),
src.device(),
gamma.device(),
beta.device(),
src.k(),
src.num_samples(),
src.nr()*src.nc());
_cuda_batch_normalize_conv2<<<512,512>>>(
means.device(),
invstds.device(),
src.k(),
src.num_samples(),
src.nr()*src.nc());
_cuda_batch_normalize_conv3<<<512,512>>>(dest.device(),
means.device(),
invstds.device(),
src.device(),
gamma.device(),
beta.device(),
src.k(),
src.num_samples(),
src.nr()*src.nc());
}
__global__ void _cuda_batch_normalize_conv_gradient1(
const float* grad,
const float* means,
const float* invstds,
const float* src,
const float* gamma,
float* src_grad,
float* gamma_grad,
float* beta_grad,
float* dmeans,
float* dvars,
long num_k,
long num_samples,
long num_pixels
)
{
for (long k = 0; k < num_k; ++k)
{
float bval = 0;
float gval = 0;
float dval = 0;
const float invstd_pow = -0.5f*::pow(invstds[k], 3.0f);
// Now do three parallel reductions
for(auto j : grid_stride_range(0, num_samples*num_pixels))
{
long i = j%num_pixels;
long n = j/num_pixels;
long idx = n*num_k*num_pixels + k*num_pixels +i;
const float x_hat = (src[idx] - means[k])*invstds[k];
bval += grad[idx];
gval += grad[idx]*x_hat;
const float dx = grad[idx] * gamma[k];
dval += dx*(src[idx] - means[k])*invstd_pow;
}
warp_reduce_atomic_add(beta_grad[k], bval);
warp_reduce_atomic_add(gamma_grad[k], gval);
warp_reduce_atomic_add(dvars[k], dval);
}
}
__global__ void _cuda_batch_normalize_conv_gradient2(
const float* grad,
const float* means,
const float* invstds,
const float* src,
const float* gamma,
float* src_grad,
float* gamma_grad,
float* beta_grad,
float* dmeans,
float* dvars,
long num_k,
long num_samples,
long num_pixels
)
{
const float invnum = 1.0f/(num_samples*num_pixels);
for (long k = 0; k < num_k; ++k)
{
float mval = 0;
// Now do a parallel reduction
for(auto j : grid_stride_range(0, num_samples*num_pixels))
{
long i = j%num_pixels;
long n = j/num_pixels;
long idx = n*num_k*num_pixels + k*num_pixels +i;
const float dx = grad[idx] * gamma[k];
mval += -dx*invstds[k] + dvars[k] * -2*(src[idx] - means[k])*invnum;
}
warp_reduce_atomic_add(dmeans[k], mval);
}
}
__global__ void _cuda_batch_normalize_conv_gradient3(
const float* grad,
const float* means,
const float* invstds,
const float* src,
const float* gamma,
float* src_grad,
float* gamma_grad,
float* beta_grad,
float* dmeans,
float* dvars,
long num_k,
long num_samples,
long num_pixels
)
{
const float invnum = 1.0f/(num_samples*num_pixels);
for (long k = 0; k < num_k; ++k)
{
for(auto j : grid_stride_range(0, num_samples*num_pixels))
{
long i = j%num_pixels;
long n = j/num_pixels;
long idx = n*num_k*num_pixels + k*num_pixels +i;
const float dx = grad[idx] * gamma[k];
src_grad[idx] += dx*invstds[k] +
dvars[k]*2*(src[idx] - means[k])*invnum +
dmeans[k]*invnum;
}
}
}
void batch_normalize_conv_gradient::operator() (
const tensor& gradient_input,
const tensor& means,
const tensor& invstds,
const tensor& src,
const tensor& gamma,
tensor& src_grad,
tensor& gamma_grad,
tensor& beta_grad
)
{
DLIB_CASSERT(src.k() == means.size(),"");
DLIB_CASSERT(src.k() == invstds.size(),"");
DLIB_CASSERT(src.k() == gamma.size(),"");
DLIB_CASSERT(src.k() == gamma_grad.size(),"");
DLIB_CASSERT(src.k() == beta_grad.size(),"");
DLIB_CASSERT(have_same_dimensions(gradient_input, src),"");
DLIB_CASSERT(have_same_dimensions(gradient_input, src_grad),"");
dvars.copy_size(invstds);
dmeans.copy_size(means);
dvars = 0;
dmeans = 0;
gamma_grad = 0;
beta_grad = 0;
_cuda_batch_normalize_conv_gradient1<<<512,512>>>(
gradient_input.device(),
means.device(),
invstds.device(),
src.device(),
gamma.device(),
src_grad.device(),
gamma_grad.device(),
beta_grad.device(),
dmeans.device(),
dvars.device(),
src.k(),
src.num_samples(),
src.nr()*src.nc());
_cuda_batch_normalize_conv_gradient2<<<512,512>>>(
gradient_input.device(),
means.device(),
invstds.device(),
src.device(),
gamma.device(),
src_grad.device(),
gamma_grad.device(),
beta_grad.device(),
dmeans.device(),
dvars.device(),
src.k(),
src.num_samples(),
src.nr()*src.nc());
_cuda_batch_normalize_conv_gradient3<<<512,512>>>(
gradient_input.device(),
means.device(),
invstds.device(),
src.device(),
gamma.device(),
src_grad.device(),
gamma_grad.device(),
beta_grad.device(),
dmeans.device(),
dvars.device(),
src.k(),
src.num_samples(),
src.nr()*src.nc());
}
// -----------------------------------------------------------------------------------
__global__ void _cuda_threshold(float* d, size_t n, float thresh)
......
......@@ -76,60 +76,6 @@ namespace dlib
const tensor& gradient_input
);
// -----------------------------------------------------------------------------------
void batch_normalize (
resizable_tensor& dest,
resizable_tensor& means,
resizable_tensor& invstds,
const tensor& src,
const tensor& gamma,
const tensor& beta
);
class batch_normalize_gradient
{
public:
void operator() (
const tensor& gradient_input,
const tensor& means,
const tensor& invstds,
const tensor& src,
const tensor& gamma,
tensor& src_grad,
tensor& gamma_grad,
tensor& beta_grad
);
private:
resizable_tensor dvars, dmeans;
};
void batch_normalize_conv (
resizable_tensor& dest,
resizable_tensor& means,
resizable_tensor& invstds,
const tensor& src,
const tensor& gamma,
const tensor& beta
);
class batch_normalize_conv_gradient
{
public:
void operator() (
const tensor& gradient_input,
const tensor& means,
const tensor& invstds,
const tensor& src,
const tensor& gamma,
tensor& src_grad,
tensor& gamma_grad,
tensor& beta_grad
);
private:
resizable_tensor dvars, dmeans;
};
// -----------------------------------------------------------------------------------
void threshold (
......
......@@ -11,6 +11,7 @@
#include <iostream>
#include <string>
#include "cuda_utils.h"
#include "cpu_dlib.h"
static const char* cudnn_get_error_string(cudnnStatus_t s)
{
......@@ -271,6 +272,344 @@ namespace dlib
grad.device()));
}
// ------------------------------------------------------------------------------------
void batch_normalize_inference (
resizable_tensor& dest,
const tensor& src,
const tensor& gamma,
const tensor& beta,
const tensor& running_means,
const tensor& running_invstds
)
{
DLIB_CASSERT(
gamma.num_samples() == 1 &&
gamma.nr() == src.nr() &&
gamma.nc() == src.nc() &&
gamma.k() == src.k() &&
have_same_dimensions(gamma, beta) &&
have_same_dimensions(gamma, running_means) &&
have_same_dimensions(gamma, running_invstds),
"\ngamma.num_samples(): " << gamma.num_samples() <<
"\ngamma.k(): " << gamma.k() <<
"\ngamma.nr(): " << gamma.nr() <<
"\ngamma.nc(): " << gamma.nc() <<
"\nbeta.num_samples(): " << beta.num_samples() <<
"\nbeta.k(): " << beta.k() <<
"\nbeta.nr(): " << beta.nr() <<
"\nbeta.nc(): " << beta.nc() <<
"\nrunning_means.num_samples(): " << running_means.num_samples() <<
"\nrunning_means.k(): " << running_means.k() <<
"\nrunning_means.nr(): " << running_means.nr() <<
"\nrunning_means.nc(): " << running_means.nc() <<
"\nrunning_invstds.num_samples(): " << running_invstds.num_samples() <<
"\nrunning_invstds.k(): " << running_invstds.k() <<
"\nrunning_invstds.nr(): " << running_invstds.nr() <<
"\nrunning_invstds.nc(): " << running_invstds.nc() <<
"\nsrc.k(): " << src.k() <<
"\nsrc.nr(): " << src.nr() <<
"\nsrc.nc(): " << src.nc()
);
const float in_scale = 1;
const float out_scale = 0;
dest.copy_size(src);
CHECK_CUDNN(cudnnBatchNormalizationForwardInference(
context(),
CUDNN_BATCHNORM_PER_ACTIVATION,
&in_scale,
&out_scale,
descriptor(src),
src.device(),
descriptor(dest),
dest.device(),
descriptor(gamma),
gamma.device(),
beta.device(),
running_means.device(),
running_invstds.device(),
dlib::cpu::BATCH_NORM_EPS));
}
void batch_normalize (
resizable_tensor& dest,
resizable_tensor& means,
resizable_tensor& invstds,
const double averaging_factor,
resizable_tensor& running_means,
resizable_tensor& running_invstds,
const tensor& src,
const tensor& gamma,
const tensor& beta
)
{
DLIB_CASSERT(0 <= averaging_factor && averaging_factor <= 1, "averaging_factor: " << averaging_factor);
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_means,means),"");
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_invstds,invstds),"");
DLIB_CASSERT(
src.num_samples() > 1 &&
gamma.num_samples() == 1 &&
beta.num_samples() == 1 &&
gamma.nr() == beta.nr() && beta.nr() == src.nr() &&
gamma.nc() == beta.nc() && beta.nc() == src.nc() &&
gamma.k() == beta.k() && beta.k() == src.k(),
"\ngamma.num_samples(): " << gamma.num_samples() <<
"\ngamma.k(): " << gamma.k() <<
"\ngamma.nr(): " << gamma.nr() <<
"\ngamma.nc(): " << gamma.nc() <<
"\nbeta.num_samples(): " << beta.num_samples() <<
"\nbeta.k(): " << beta.k() <<
"\nbeta.nr(): " << beta.nr() <<
"\nbeta.nc(): " << beta.nc() <<
"\nsrc.k(): " << src.k() <<
"\nsrc.nr(): " << src.nr() <<
"\nsrc.nc(): " << src.nc()
);
const float in_scale = 1;
const float out_scale = 0;
dest.copy_size(src);
means.set_size(1, src.k(), src.nr(), src.nc());
invstds.copy_size(means);
running_means.copy_size(means);
running_invstds.copy_size(means);
CHECK_CUDNN(cudnnBatchNormalizationForwardTraining(
context(),
CUDNN_BATCHNORM_PER_ACTIVATION,
&in_scale,
&out_scale,
descriptor(src),
src.device(),
descriptor(dest),
dest.device(),
descriptor(gamma),
gamma.device(),
beta.device(),
averaging_factor,
running_means.device(),
running_invstds.device(),
dlib::cpu::BATCH_NORM_EPS,
means.device(),
invstds.device()));
}
void batch_normalize_gradient(
const tensor& gradient_input,
const tensor& means,
const tensor& invstds,
const tensor& src,
const tensor& gamma,
tensor& src_grad,
tensor& gamma_grad,
tensor& beta_grad
)
{
const long num = src.k()*src.nr()*src.nc();
DLIB_CASSERT(src.num_samples() > 1, "");
DLIB_CASSERT(num == means.size(),"");
DLIB_CASSERT(num == invstds.size(),"");
DLIB_CASSERT(num == gamma.size(),"");
DLIB_CASSERT(num == gamma_grad.size(),"");
DLIB_CASSERT(num == beta_grad.size(),"");
DLIB_CASSERT(have_same_dimensions(gradient_input, src),"");
DLIB_CASSERT(have_same_dimensions(gradient_input, src_grad),"");
const float in_scale = 1;
const float out_scale = 1;
CHECK_CUDNN(cudnnBatchNormalizationBackward(
context(),
CUDNN_BATCHNORM_PER_ACTIVATION,
&in_scale,
&out_scale,
descriptor(src),
src.device(),
descriptor(gradient_input),
gradient_input.device(),
descriptor(src_grad),
src_grad.device(),
descriptor(gamma),
gamma.device(),
gamma_grad.device(),
beta_grad.device(),
dlib::cpu::BATCH_NORM_EPS,
means.device(),
invstds.device()));
}
// ------------------------------------------------------------------------------------
void batch_normalize_conv_inference (
resizable_tensor& dest,
const tensor& src,
const tensor& gamma,
const tensor& beta,
const tensor& running_means,
const tensor& running_invstds
)
{
DLIB_CASSERT(
gamma.num_samples() == 1 &&
gamma.nr() == 1 &&
gamma.nc() == 1 &&
gamma.k() == src.k() &&
have_same_dimensions(gamma, beta) &&
have_same_dimensions(gamma, running_means) &&
have_same_dimensions(gamma, running_invstds),
"\ngamma.num_samples(): " << gamma.num_samples() <<
"\ngamma.k(): " << gamma.k() <<
"\ngamma.nr(): " << gamma.nr() <<
"\ngamma.nc(): " << gamma.nc() <<
"\nbeta.num_samples(): " << beta.num_samples() <<
"\nbeta.k(): " << beta.k() <<
"\nbeta.nr(): " << beta.nr() <<
"\nbeta.nc(): " << beta.nc() <<
"\nrunning_means.num_samples(): " << running_means.num_samples() <<
"\nrunning_means.k(): " << running_means.k() <<
"\nrunning_means.nr(): " << running_means.nr() <<
"\nrunning_means.nc(): " << running_means.nc() <<
"\nrunning_invstds.num_samples(): " << running_invstds.num_samples() <<
"\nrunning_invstds.k(): " << running_invstds.k() <<
"\nrunning_invstds.nr(): " << running_invstds.nr() <<
"\nrunning_invstds.nc(): " << running_invstds.nc() <<
"\nsrc.k(): " << src.k() <<
"\nsrc.nr(): " << src.nr() <<
"\nsrc.nc(): " << src.nc()
);
const float in_scale = 1;
const float out_scale = 0;
dest.copy_size(src);
CHECK_CUDNN(cudnnBatchNormalizationForwardInference(
context(),
CUDNN_BATCHNORM_SPATIAL,
&in_scale,
&out_scale,
descriptor(src),
src.device(),
descriptor(dest),
dest.device(),
descriptor(gamma),
gamma.device(),
beta.device(),
running_means.device(),
running_invstds.device(),
dlib::cpu::BATCH_NORM_EPS));
}
void batch_normalize_conv (
resizable_tensor& dest,
resizable_tensor& means,
resizable_tensor& invstds,
const double averaging_factor,
resizable_tensor& running_means,
resizable_tensor& running_invstds,
const tensor& src,
const tensor& gamma,
const tensor& beta
)
{
DLIB_CASSERT(0 <= averaging_factor && averaging_factor <= 1, "averaging_factor: " << averaging_factor);
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_means,means),"");
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_invstds,invstds),"");
DLIB_CASSERT(
src.num_samples() > 1 &&
gamma.num_samples() == 1 &&
beta.num_samples() == 1 &&
gamma.nr() == 1 &&
beta.nr() == 1 &&
gamma.nc() == 1 &&
beta.nc() == 1 &&
gamma.k() == beta.k() && beta.k() == src.k(),
"\ngamma.num_samples(): " << gamma.num_samples() <<
"\ngamma.k(): " << gamma.k() <<
"\ngamma.nr(): " << gamma.nr() <<
"\ngamma.nc(): " << gamma.nc() <<
"\nbeta.num_samples(): " << beta.num_samples() <<
"\nbeta.k(): " << beta.k() <<
"\nbeta.nr(): " << beta.nr() <<
"\nbeta.nc(): " << beta.nc() <<
"\nsrc.k(): " << src.k() <<
"\nsrc.nr(): " << src.nr() <<
"\nsrc.nc(): " << src.nc()
);
const float in_scale = 1;
const float out_scale = 0;
dest.copy_size(src);
means.set_size(1, src.k());
invstds.copy_size(means);
running_means.copy_size(means);
running_invstds.copy_size(means);
CHECK_CUDNN(cudnnBatchNormalizationForwardTraining(
context(),
CUDNN_BATCHNORM_SPATIAL,
&in_scale,
&out_scale,
descriptor(src),
src.device(),
descriptor(dest),
dest.device(),
descriptor(gamma),
gamma.device(),
beta.device(),
averaging_factor,
running_means.device(),
running_invstds.device(),
dlib::cpu::BATCH_NORM_EPS,
means.device(),
invstds.device()));
}
void batch_normalize_conv_gradient(
const tensor& gradient_input,
const tensor& means,
const tensor& invstds,
const tensor& src,
const tensor& gamma,
tensor& src_grad,
tensor& gamma_grad,
tensor& beta_grad
)
{
const long num = src.nr()*src.nc();
DLIB_CASSERT(src.k() == means.size(),"");
DLIB_CASSERT(src.k() == invstds.size(),"");
DLIB_CASSERT(src.k() == gamma.size(),"");
DLIB_CASSERT(src.k() == gamma_grad.size(),"");
DLIB_CASSERT(src.k() == beta_grad.size(),"");
DLIB_CASSERT(have_same_dimensions(gradient_input, src),"");
DLIB_CASSERT(have_same_dimensions(gradient_input, src_grad),"");
const float in_scale = 1;
const float out_scale = 1;
CHECK_CUDNN(cudnnBatchNormalizationBackward(
context(),
CUDNN_BATCHNORM_SPATIAL,
&in_scale,
&out_scale,
descriptor(src),
src.device(),
descriptor(gradient_input),
gradient_input.device(),
descriptor(src_grad),
src_grad.device(),
descriptor(gamma),
gamma.device(),
gamma_grad.device(),
beta_grad.device(),
dlib::cpu::BATCH_NORM_EPS,
means.device(),
invstds.device()));
}
// ------------------------------------------------------------------------------------
// ------------------------------------------------------------------------------------
......@@ -636,7 +975,8 @@ namespace dlib
CUDNN_POOLING_MAX,
window_height,
window_width,
0,0, // no padding
window_height/2,
window_width/2,
stride_y,
stride_x));
}
......@@ -671,8 +1011,18 @@ namespace dlib
DLIB_CASSERT(dest.num_samples() == src.num_samples(),"");
DLIB_CASSERT(dest.k() == src.k(),"");
DLIB_CASSERT(dest.nr() == src.nr()/stride_y, stride_y << ", " << dest.nr() << " " << src.nr()/stride_y);
DLIB_CASSERT(dest.nc() == src.nc()/stride_x, stride_x << ", " << dest.nc() << " " << src.nc()/stride_x);
DLIB_CASSERT(dest.nr() == 1+(src.nr()-window_height%2)/stride_y,
"\n stride_y: " << stride_y <<
"\n window_height: " << window_height <<
"\n src.nr(): " << src.nr() <<
"\n dest.nr(): " << dest.nr() <<
"\n src.nr()/stride_y: " << src.nr()/stride_y);
DLIB_CASSERT(dest.nc() == 1+(src.nc()-window_width%2)/stride_x,
"\n stride_x: " << stride_x <<
"\n window_width: " << window_width <<
"\n src.nc(): " << src.nc() <<
"\n dest.nc(): " << dest.nc() <<
"\n src.nc()/stride_x: " << src.nc()/stride_x);
CHECK_CUDNN(cudnnPoolingForward(context(),
(const cudnnPoolingDescriptor_t)handle,
......
......@@ -132,6 +132,74 @@ namespace dlib
assigns it to grad.
!*/
// ------------------------------------------------------------------------------------
void batch_normalize_inference (
resizable_tensor& dest,
const tensor& src,
const tensor& gamma,
const tensor& beta,
const tensor& running_means,
const tensor& running_invstds
);
void batch_normalize (
resizable_tensor& dest,
resizable_tensor& means,
resizable_tensor& invstds,
const double averaging_factor,
resizable_tensor& running_means,
resizable_tensor& running_invstds,
const tensor& src,
const tensor& gamma,
const tensor& beta
);
void batch_normalize_gradient(
const tensor& gradient_input,
const tensor& means,
const tensor& invstds,
const tensor& src,
const tensor& gamma,
tensor& src_grad,
tensor& gamma_grad,
tensor& beta_grad
);
// ------------------------------------------------------------------------------------
void batch_normalize_conv_inference (
resizable_tensor& dest,
const tensor& src,
const tensor& gamma,
const tensor& beta,
const tensor& running_means,
const tensor& running_invstds
);
void batch_normalize_conv (
resizable_tensor& dest,
resizable_tensor& means,
resizable_tensor& invstds,
const double averaging_factor,
resizable_tensor& running_means,
resizable_tensor& running_invstds,
const tensor& src,
const tensor& gamma,
const tensor& beta
);
void batch_normalize_conv_gradient(
const tensor& gradient_input,
const tensor& means,
const tensor& invstds,
const tensor& src,
const tensor& gamma,
tensor& src_grad,
tensor& gamma_grad,
tensor& beta_grad
);
// ------------------------------------------------------------------------------------
class tensor_conv
......@@ -295,14 +363,14 @@ namespace dlib
ensures
- #dest.num_samples() == src.num_samples()
- #dest.k() == src.k()
- #dest.nr() == src.nr()/stride_y
- #dest.nc() == src.nc()/stride_x
- #dest.nr() == 1+(src.nr()-window_height%2)/stride_y
- #dest.nc() == 1+(src.nc()-window_width%2)/stride_x
- for all valid s, k, r, and c:
- image_plane(#dest,s,k)(r,c) == max(subm_clipped(image_plane(src,s,k),
centered_rect(c*stride_x,
r*stride_y,
c*stride_x,
window_height,
window_width))
window_width,
window_height)))
!*/
void get_gradient(
......
......@@ -298,19 +298,37 @@ namespace dlib
// ----------------------------------------------------------------------------------------
enum batch_normalization_mode
{
BATCH_NORM_CONV = 0,
BATCH_NORM_FC = 1
};
class bn_
{
public:
bn_() : num_updates(0), running_stats_window_size(1000), running_nim_out_of_date(true)
bn_() : num_updates(0), running_stats_window_size(1000), mode(BATCH_NORM_FC)
{}
explicit bn_(batch_normalization_mode mode_) : num_updates(0), running_stats_window_size(1000), mode(mode_)
{}
batch_normalization_mode get_mode() const { return mode; }
template <typename SUBNET>
void setup (const SUBNET& sub)
{
if (mode == BATCH_NORM_FC)
{
gamma = alias_tensor(1,
sub.get_output().k(),
sub.get_output().nr(),
sub.get_output().nc());
}
else
{
gamma = alias_tensor(1, sub.get_output().k());
}
beta = gamma;
params.set_size(gamma.size()+beta.size());
......@@ -318,15 +336,11 @@ namespace dlib
gamma(params,0) = 1;
beta(params,gamma.size()) = 0;
running_means.set_size(1,
sub.get_output().k(),
sub.get_output().nr(),
sub.get_output().nc());
running_invstds.copy_size(running_means);
running_means.copy_size(gamma(params,0));
running_invstds.copy_size(gamma(params,0));
running_means = 0;
running_invstds = 1;
num_updates = 0;
running_nim_out_of_date = true;
}
template <typename SUBNET>
......@@ -336,27 +350,20 @@ namespace dlib
auto b = beta(params,gamma.size());
if (sub.get_output().num_samples() > 1)
{
tt::batch_normalize(output, means, invstds, sub.get_output(), g, b);
const double decay = num_updates/(num_updates+1.0);
const double decay = 1.0 - num_updates/(num_updates+1.0);
if (num_updates <running_stats_window_size)
++num_updates;
tt::affine_transform(running_means, running_means, means, decay, 1-decay, 0);
tt::affine_transform(running_invstds, running_invstds, invstds, decay, 1-decay, 0);
running_nim_out_of_date = true;
if (mode == BATCH_NORM_FC)
tt::batch_normalize(output, means, invstds, decay, running_means, running_invstds, sub.get_output(), g, b);
else
tt::batch_normalize_conv(output, means, invstds, decay, running_means, running_invstds, sub.get_output(), g, b);
}
else // we are running in testing mode so we just linearly scale the input tensor.
{
if (running_nim_out_of_date)
{
running_nim_out_of_date = false;
running_nim.copy_size(running_means);
tt::multiply(running_nim, running_means, running_invstds);
running_nim *= -1;
}
output.copy_size(sub.get_output());
tt::affine_transform(output, sub.get_output(), running_invstds, running_nim);
tt::affine_transform(output, output, g, b);
if (mode == BATCH_NORM_FC)
tt::batch_normalize_inference(output, sub.get_output(), g, b, running_means, running_invstds);
else
tt::batch_normalize_conv_inference(output, sub.get_output(), g, b, running_means, running_invstds);
}
}
......@@ -366,7 +373,10 @@ namespace dlib
auto g = gamma(params,0);
auto g_grad = gamma(params_grad, 0);
auto b_grad = beta(params_grad, gamma.size());
bng(gradient_input, means, invstds, sub.get_output(), g, sub.get_gradient_input(), g_grad, b_grad);
if (mode == BATCH_NORM_FC)
tt::batch_normalize_gradient(gradient_input, means, invstds, sub.get_output(), g, sub.get_gradient_input(), g_grad, b_grad );
else
tt::batch_normalize_conv_gradient(gradient_input, means, invstds, sub.get_output(), g, sub.get_gradient_input(), g_grad, b_grad );
}
const tensor& get_layer_params() const { return params; }
......@@ -384,6 +394,7 @@ namespace dlib
serialize(item.running_invstds, out);
serialize(item.num_updates, out);
serialize(item.running_stats_window_size, out);
serialize((int)item.mode, out);
}
friend void deserialize(bn_& item, std::istream& in)
......@@ -401,21 +412,20 @@ namespace dlib
deserialize(item.running_invstds, in);
deserialize(item.num_updates, in);
deserialize(item.running_stats_window_size, in);
item.running_nim_out_of_date = true;
int mode;
deserialize(mode, in);
item.mode = (batch_normalization_mode)mode;
}
private:
tt::batch_normalize_gradient bng;
resizable_tensor params;
alias_tensor gamma, beta;
resizable_tensor means, running_means;
resizable_tensor invstds, running_invstds;
unsigned long num_updates;
unsigned long running_stats_window_size;
bool running_nim_out_of_date;
resizable_tensor running_nim;
batch_normalization_mode mode;
};
template <typename SUBNET>
......
......@@ -563,6 +563,12 @@ namespace dlib
// ----------------------------------------------------------------------------------------
enum batch_normalization_mode
{
BATCH_NORM_CONV = 0,
BATCH_NORM_FC = 1
};
class bn_
{
/*!
......@@ -581,6 +587,34 @@ namespace dlib
public:
bn_(
);
/*!
ensures
- #get_mode() == BATCH_NORM_FC
!*/
explicit bn_(
batch_normalization_mode mode
);
/*!
ensures
- #get_mode() == mode
!*/
batch_normalization_mode get_mode(
) const;
/*!
ensures
- returns the mode of this layer, either BATCH_NORM_CONV or BATCH_NORM_FC.
If the mode is BATCH_NORM_FC then the normalization is applied across the
samples in a tensor (i.e. k()*nr()*nc() different things will be
normalized). Otherwise, normalization is applied across everything
except for the k() dimension, resulting in there being only k()
normalization equations that are applied spatially over the tensor.
Therefore, if you are putting batch normalization after a fully connected
layer you should use BATCH_NORM_FC. Otherwise, if you are putting batch
normalization after a convolutional layer you should use BATCH_NORM_CONV.
!*/
template <typename SUBNET> void setup (const SUBNET& sub);
template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output);
......
......@@ -183,37 +183,113 @@ namespace dlib { namespace tt
// ----------------------------------------------------------------------------------------
void batch_normalize_inference (
resizable_tensor& dest,
const tensor& src,
const tensor& gamma,
const tensor& beta,
const tensor& running_means,
const tensor& running_invstds
)
{
#ifdef DLIB_USE_CUDA
cuda::batch_normalize_inference(dest,src,gamma,beta,running_means,running_invstds);
#else
cpu::batch_normalize_inference(dest,src,gamma,beta,running_means,running_invstds);
#endif
}
void batch_normalize (
resizable_tensor& dest,
resizable_tensor& means,
resizable_tensor& vars,
const double averaging_factor,
resizable_tensor& running_means,
resizable_tensor& running_invstds,
const tensor& src,
const tensor& gamma,
const tensor& beta
)
{
#ifdef DLIB_USE_CUDA
cuda::batch_normalize(dest,means,vars,src,gamma,beta);
cuda::batch_normalize(dest,means,vars,averaging_factor,running_means,running_invstds,src,gamma,beta);
#else
cpu::batch_normalize(dest,means,vars,averaging_factor,running_means,running_invstds,src,gamma,beta);
#endif
}
void batch_normalize_gradient (
const tensor& gradient_input,
const tensor& means,
const tensor& invstds,
const tensor& src,
const tensor& gamma,
tensor& src_grad,
tensor& gamma_grad,
tensor& beta_grad
)
{
#ifdef DLIB_USE_CUDA
cuda::batch_normalize_gradient(gradient_input, means, invstds, src, gamma, src_grad, gamma_grad, beta_grad);
#else
cpu::batch_normalize(dest,means,vars,src,gamma,beta);
cpu::batch_normalize_gradient(gradient_input, means, invstds, src, gamma, src_grad, gamma_grad, beta_grad);
#endif
}
// ----------------------------------------------------------------------------------------
void batch_normalize_conv_inference (
resizable_tensor& dest,
const tensor& src,
const tensor& gamma,
const tensor& beta,
const tensor& running_means,
const tensor& running_invstds
)
{
#ifdef DLIB_USE_CUDA
cuda::batch_normalize_conv_inference(dest,src,gamma,beta,running_means,running_invstds);
#else
cpu::batch_normalize_conv_inference(dest,src,gamma,beta,running_means,running_invstds);
#endif
}
void batch_normalize_conv (
resizable_tensor& dest,
resizable_tensor& means,
resizable_tensor& vars,
const double averaging_factor,
resizable_tensor& running_means,
resizable_tensor& running_invstds,
const tensor& src,
const tensor& gamma,
const tensor& beta
)
{
#ifdef DLIB_USE_CUDA
cuda::batch_normalize_conv(dest,means,vars,src,gamma,beta);
cuda::batch_normalize_conv(dest,means,vars,averaging_factor,running_means,running_invstds,src,gamma,beta);
#else
cpu::batch_normalize_conv(dest,means,vars,averaging_factor,running_means,running_invstds,src,gamma,beta);
#endif
}
void batch_normalize_conv_gradient (
const tensor& gradient_input,
const tensor& means,
const tensor& invstds,
const tensor& src,
const tensor& gamma,
tensor& src_grad,
tensor& gamma_grad,
tensor& beta_grad
)
{
#ifdef DLIB_USE_CUDA
cuda::batch_normalize_conv_gradient(gradient_input, means, invstds, src, gamma, src_grad, gamma_grad, beta_grad);
#else
cpu::batch_normalize_conv(dest,means,vars,src,gamma,beta);
cpu::batch_normalize_conv_gradient(gradient_input, means, invstds, src, gamma, src_grad, gamma_grad, beta_grad);
#endif
}
......
......@@ -198,10 +198,38 @@ namespace dlib { namespace tt
// ----------------------------------------------------------------------------------------
void batch_normalize_inference (
resizable_tensor& dest,
const tensor& src,
const tensor& gamma,
const tensor& beta,
const tensor& running_means,
const tensor& running_invstds
);
/*!
requires
- gamma.num_samples() == 1
- gamma.nr() == src.nr()
- gamma.nc() == src.nc()
- gamma.k() == src.k()
- have_same_dimensions(gamma, beta)
- have_same_dimensions(gamma, running_means)
- have_same_dimensions(gamma, running_invstds)
ensures
- Just linearly transforms src as a call to batch_normalize() would if the resulting
means and invstds were running_means and running_invstds. That is, this function
performs:
dest = gamma*(src-running_means)*running_invstds + beta
Note that it does it in a pointwise fashion over the samples in src.
!*/
void batch_normalize (
resizable_tensor& dest,
resizable_tensor& means,
resizable_tensor& invstds,
const double averaging_factor,
resizable_tensor& running_means,
resizable_tensor& running_invstds,
const tensor& src,
const tensor& gamma,
const tensor& beta
......@@ -214,6 +242,10 @@ namespace dlib { namespace tt
- gamma.nr() == beta.nr() == src.nr()
- gamma.nc() == beta.nc() == src.nc()
- gamma.k() == beta.k() == src.k()
- 0 <= averaging_factor <= 1
- if (averaging_factor != 1)
- have_same_dimensions(running_means, means) == true
- have_same_dimensions(running_invstds, invstds) == true
ensures
- have_same_dimensions(#dest, src) == true
- #means.num_samples() == 1
......@@ -224,12 +256,11 @@ namespace dlib { namespace tt
- #src == the batch normalized version of src.
- #means == the mean values of the contents of src.
- #invstds == 1/(the standard deviation values of the contents of src).
- #running_means = (1-averaging_factor)*mat(#running_means) + averaging_factor*mat(#means);
- #running_invstds = (1-averaging_factor)*mat(#running_invstds) + averaging_factor*mat(#invstds);
!*/
class batch_normalize_gradient
{
public:
void operator() (
void batch_normalize_gradient (
const tensor& gradient_input,
const tensor& means,
const tensor& invstds,
......@@ -238,7 +269,7 @@ namespace dlib { namespace tt
tensor& src_grad,
tensor& gamma_grad,
tensor& beta_grad
){impl(gradient_input,means,invstds,src,gamma,src_grad,gamma_grad,beta_grad);}
);
/*!
requires
- invstds and means should be the output of a call to
......@@ -261,20 +292,42 @@ namespace dlib { namespace tt
- Assigns the gradient of f() with respect to gamma to #gamma_grad.
- Assigns the gradient of f() with respect to beta to #beta_grad.
!*/
private:
#ifdef DLIB_USE_CUDA
cuda::batch_normalize_gradient impl;
#else
cpu::batch_normalize_gradient impl;
#endif
};
// ----------------------------------------------------------------------------------------
void batch_normalize_conv_inference (
resizable_tensor& dest,
const tensor& src,
const tensor& gamma,
const tensor& beta,
const tensor& running_means,
const tensor& running_invstds
);
/*!
requires
- gamma.num_samples() == 1
- gamma.nr() == 1
- gamma.nc() == 1
- gamma.k() == src.k()
- have_same_dimensions(gamma, beta)
- have_same_dimensions(gamma, running_means)
- have_same_dimensions(gamma, running_invstds)
ensures
- Just linearly transforms src as a call to batch_normalize() would if the resulting
means and invstds were running_means and running_invstds. That is, this function
performs:
dest = gamma*(src-running_means)*running_invstds + beta
Note that it does it in a pointwise fashion over the samples, rows, and
columns in src.
!*/
void batch_normalize_conv (
resizable_tensor& dest,
resizable_tensor& means,
resizable_tensor& invstds,
const double averaging_factor,
resizable_tensor& running_means,
resizable_tensor& running_invstds,
const tensor& src,
const tensor& gamma,
const tensor& beta
......@@ -285,6 +338,10 @@ namespace dlib { namespace tt
- gamma.num_samples()==gamma.nr()==gamma.nc() == 1
- beta.num_samples() ==beta.nr() ==gamma.nc() == 1
- gamma.k() == beta.k() == src.k()
- 0 <= averaging_factor <= 1
- if (averaging_factor != 1)
- have_same_dimensions(running_means, means) == true
- have_same_dimensions(running_invstds, invstds) == true
ensures
- have_same_dimensions(#dest, src) == true
- #means.num_samples()==means.nr()==means.nc() == 1
......@@ -293,12 +350,11 @@ namespace dlib { namespace tt
- #src == the batch normalized version of src.
- #means == the mean values of the contents of src.
- #invstds == 1/(the standard deviation values of the contents of src).
- #running_means = (1-averaging_factor)*mat(#running_means) + averaging_factor*mat(#means);
- #running_invstds = (1-averaging_factor)*mat(#running_invstds) + averaging_factor*mat(#invstds);
!*/
class batch_normalize_conv_gradient
{
public:
void operator() (
void batch_normalize_conv_gradient (
const tensor& gradient_input,
const tensor& means,
const tensor& invstds,
......@@ -307,7 +363,7 @@ namespace dlib { namespace tt
tensor& src_grad,
tensor& gamma_grad,
tensor& beta_grad
){impl(gradient_input,means,invstds,src,gamma,src_grad,gamma_grad,beta_grad);}
);
/*!
requires
- invstds and means should be the output of a call to
......@@ -328,13 +384,6 @@ namespace dlib { namespace tt
- Assigns the gradient of f() with respect to gamma to #gamma_grad.
- Assigns the gradient of f() with respect to beta to #beta_grad.
!*/
private:
#ifdef DLIB_USE_CUDA
cuda::batch_normalize_conv_gradient impl;
#else
cpu::batch_normalize_conv_gradient impl;
#endif
};
// -----------------------------------------------------------------------------------
......@@ -540,14 +589,14 @@ namespace dlib { namespace tt
ensures
- #dest.num_samples() == src.num_samples()
- #dest.k() == src.k()
- #dest.nr() == src.nr()/stride_y
- #dest.nc() == src.nc()/stride_x
- #dest.nr() == 1+(src.nr()-window_height%2)/stride_y
- #dest.nc() == 1+(src.nc()-window_width%2)/stride_x
- for all valid s, k, r, and c:
- image_plane(#dest,s,k)(r,c) == max(subm_clipped(image_plane(src,s,k),
centered_rect(c*stride_x,
r*stride_y,
c*stride_x,
window_height,
window_width))
window_width,
window_height)))
!*/
void get_gradient(
......
......@@ -14,139 +14,6 @@ include(../cmake)
# This variable contains a list of all the tests we are building
# into the regression test suite.
set (tests
example.cpp
active_learning.cpp
any.cpp
any_function.cpp
array2d.cpp
array.cpp
assignment_learning.cpp
base64.cpp
bayes_nets.cpp
bigint.cpp
binary_search_tree_kernel_1a.cpp
binary_search_tree_kernel_2a.cpp
binary_search_tree_mm1.cpp
binary_search_tree_mm2.cpp
bridge.cpp
bsp.cpp
byte_orderer.cpp
cca.cpp
clustering.cpp
cmd_line_parser.cpp
cmd_line_parser_wchar_t.cpp
compress_stream.cpp
conditioning_class_c.cpp
conditioning_class.cpp
config_reader.cpp
crc32.cpp
create_iris_datafile.cpp
data_io.cpp
directed_graph.cpp
discriminant_pca.cpp
disjoint_subsets.cpp
ekm_and_lisf.cpp
empirical_kernel_map.cpp
entropy_coder.cpp
entropy_encoder_model.cpp
example_args.cpp
face.cpp
fft.cpp
fhog.cpp
filtering.cpp
find_max_factor_graph_nmplp.cpp
find_max_factor_graph_viterbi.cpp
geometry.cpp
graph.cpp
graph_cuts.cpp
graph_labeler.cpp
hash.cpp
hash_map.cpp
hash_set.cpp
hash_table.cpp
hog_image.cpp
image.cpp
iosockstream.cpp
is_same_object.cpp
kcentroid.cpp
kernel_matrix.cpp
kmeans.cpp
learning_to_track.cpp
least_squares.cpp
linear_manifold_regularizer.cpp
lspi.cpp
lz77_buffer.cpp
map.cpp
matrix2.cpp
matrix3.cpp
matrix4.cpp
matrix_chol.cpp
matrix.cpp
matrix_eig.cpp
matrix_lu.cpp
matrix_qr.cpp
max_cost_assignment.cpp
max_sum_submatrix.cpp
md5.cpp
member_function_pointer.cpp
metaprogramming.cpp
mpc.cpp
multithreaded_object.cpp
numerical_integration.cpp
object_detector.cpp
oca.cpp
one_vs_all_trainer.cpp
one_vs_one_trainer.cpp
optimization.cpp
optimization_test_functions.cpp
opt_qp_solver.cpp
parallel_for.cpp
parse.cpp
pipe.cpp
pixel.cpp
probabilistic.cpp
pyramid_down.cpp
queue.cpp
rand.cpp
ranking.cpp
read_write_mutex.cpp
reference_counter.cpp
rls.cpp
sammon.cpp
scan_image.cpp
sequence.cpp
sequence_labeler.cpp
sequence_segmenter.cpp
serialize.cpp
set.cpp
sldf.cpp
sliding_buffer.cpp
smart_pointers.cpp
sockets2.cpp
sockets.cpp
sockstreambuf.cpp
sparse_vector.cpp
stack.cpp
static_map.cpp
static_set.cpp
statistics.cpp
std_vector_c.cpp
string.cpp
svm_c_linear.cpp
svm_c_linear_dcd.cpp
svm.cpp
svm_multiclass_linear.cpp
svm_struct.cpp
svr_linear_trainer.cpp
symmetric_matrix_cache.cpp
thread_pool.cpp
threads.cpp
timer.cpp
tokenizer.cpp
trust_region.cpp
tuple.cpp
type_safe_union.cpp
vectorstream.cpp
)
# Tests that require C++11 support
......@@ -171,9 +38,3 @@ endif()
TARGET_LINK_LIBRARIES(${target_name} dlib )
if (NOT DLIB_NO_GUI_SUPPORT)
add_subdirectory(gui)
add_subdirectory(examples)
endif()
......@@ -151,7 +151,7 @@ namespace
void test_batch_normalize()
{
print_spinner();
resizable_tensor src(5,5), gamma(1,5), beta(1,5), dest, means, vars, gradient_input(5,5);
resizable_tensor src(5,5), gamma(1,5), beta(1,5), dest, dest2, means, vars, gradient_input(5,5);
src = matrix_cast<float>(gaussian_randm(5,5, 0));
gamma = matrix_cast<float>(gaussian_randm(1,5, 1));
beta = matrix_cast<float>(gaussian_randm(1,5, 2));
......@@ -160,14 +160,18 @@ namespace
gamma = 1;
beta = 0;
batch_normalize(dest, means, vars, src, gamma, beta);
resizable_tensor running_means;
resizable_tensor running_invstds;
batch_normalize(dest, means, vars, 1, running_means, running_invstds, src, gamma, beta);
batch_normalize_inference(dest2, src, gamma, beta, running_means, running_invstds);
DLIB_TEST(max(abs(mat(dest2)-mat(dest))) < 1e-5);
auto grad_src = [&](long idx) {
auto f = [&](float eps) {
const float old = src.host()[idx];
src.host()[idx] += eps;
batch_normalize(dest, means, vars, src, gamma, beta);
batch_normalize(dest, means, vars, 1, running_means, running_invstds, src, gamma, beta);
float result = dot(gradient_input, dest);
src.host()[idx] = old;
return result;
......@@ -179,7 +183,7 @@ namespace
auto f = [&](float eps) {
const float old = gamma.host()[idx];
gamma.host()[idx] += eps;
batch_normalize(dest, means, vars, src, gamma, beta);
batch_normalize(dest, means, vars, 1, running_means, running_invstds, src, gamma, beta);
float result = dot(gradient_input, dest);
gamma.host()[idx] = old;
return result;
......@@ -191,7 +195,7 @@ namespace
auto f = [&](float eps) {
const float old = beta.host()[idx];
beta.host()[idx] += eps;
batch_normalize(dest, means, vars, src, gamma, beta);
batch_normalize(dest, means, vars, 1, running_means, running_invstds, src, gamma, beta);
float result = dot(gradient_input, dest);
beta.host()[idx] = old;
return result;
......@@ -208,8 +212,7 @@ namespace
gamma_grad = 8;
beta_grad = 8;
batch_normalize_gradient bng;
bng(gradient_input, means, vars, src, gamma, src_grad, gamma_grad, beta_grad);
batch_normalize_gradient(gradient_input, means, vars, src, gamma, src_grad, gamma_grad, beta_grad);
auto grad_error = compare_gradients(src_grad, grad_src);
dlog << LINFO << "src error: " << grad_error;
......@@ -227,7 +230,7 @@ namespace
void test_batch_normalize_conv()
{
print_spinner();
resizable_tensor src(5,5,4,4), gamma(1,5), beta(1,5), dest, means, vars, gradient_input(5,5,4,4);
resizable_tensor src(5,5,4,4), gamma(1,5), beta(1,5), dest, dest2, means, vars, gradient_input(5,5,4,4);
src = matrix_cast<float>(gaussian_randm(5,5*4*4, 0));
gamma = matrix_cast<float>(gaussian_randm(1,5, 1));
beta = matrix_cast<float>(gaussian_randm(1,5, 2));
......@@ -236,14 +239,18 @@ namespace
gamma = 1;
beta = 0;
batch_normalize_conv(dest, means, vars, src, gamma, beta);
resizable_tensor running_means;
resizable_tensor running_invstds;
batch_normalize_conv(dest, means, vars, 1, running_means, running_invstds, src, gamma, beta);
batch_normalize_conv_inference(dest2, src, gamma, beta, running_means, running_invstds);
DLIB_TEST(max(abs(mat(dest2)-mat(dest))) < 1e-5);
auto grad_src = [&](long idx) {
auto f = [&](float eps) {
const float old = src.host()[idx];
src.host()[idx] += eps;
batch_normalize_conv(dest, means, vars, src, gamma, beta);
batch_normalize_conv(dest, means, vars, 1, running_means, running_invstds, src, gamma, beta);
float result = dot(gradient_input, dest);
src.host()[idx] = old;
return result;
......@@ -255,7 +262,7 @@ namespace
auto f = [&](float eps) {
const float old = gamma.host()[idx];
gamma.host()[idx] += eps;
batch_normalize_conv(dest, means, vars, src, gamma, beta);
batch_normalize_conv(dest, means, vars, 1, running_means, running_invstds, src, gamma, beta);
float result = dot(gradient_input, dest);
gamma.host()[idx] = old;
return result;
......@@ -267,7 +274,7 @@ namespace
auto f = [&](float eps) {
const float old = beta.host()[idx];
beta.host()[idx] += eps;
batch_normalize_conv(dest, means, vars, src, gamma, beta);
batch_normalize_conv(dest, means, vars, 1, running_means, running_invstds, src, gamma, beta);
float result = dot(gradient_input, dest);
beta.host()[idx] = old;
return result;
......@@ -285,8 +292,7 @@ namespace
gamma_grad = 9;
beta_grad = 9;
batch_normalize_conv_gradient bng;
bng(gradient_input, means, vars, src, gamma, src_grad, gamma_grad, beta_grad);
batch_normalize_conv_gradient(gradient_input, means, vars, src, gamma, src_grad, gamma_grad, beta_grad);
auto grad_error = compare_gradients(src_grad, grad_src);
......@@ -597,6 +603,8 @@ namespace
resizable_tensor dest, dest2;
resizable_tensor means, means2;
resizable_tensor invstds, invstds2;
resizable_tensor running_means, running_means2;
resizable_tensor running_invstds, running_invstds2;
resizable_tensor src(64,20,100,100);
resizable_tensor gamma(1,20,100,100);
resizable_tensor beta(1,20,100,100);
......@@ -605,17 +613,21 @@ namespace
tt::tensor_rand rnd;
rnd.fill_uniform(src);
cpu::batch_normalize(dest,means,invstds, src, gamma, beta);
cuda::batch_normalize(dest2,means2,invstds2, src, gamma, beta);
cpu::batch_normalize(dest, means, invstds, 1, running_means, running_invstds, src, gamma, beta);
cuda::batch_normalize(dest2,means2,invstds2, 1, running_means2, running_invstds2, src, gamma, beta);
dlog << LINFO << "dest error: "<< max(abs(mat(dest) -mat(dest2)));
dlog << LINFO << "means error: "<< max(abs(mat(means) -mat(means2)));
dlog << LINFO << "invstds error: "<< max(abs(mat(invstds) -mat(invstds2)));
dlog << LINFO << "running_means error: "<< max(abs(mat(running_means) -mat(running_means2)));
dlog << LINFO << "running_invstds error: "<< max(abs(mat(running_invstds) -mat(running_invstds2)));
DLIB_TEST(max(abs(mat(dest) -mat(dest2))) < 1e-5);
DLIB_TEST(max(abs(mat(means) -mat(means2))) < 1e-5);
DLIB_TEST(max(abs(mat(invstds) -mat(invstds2))) < 1e-5);
DLIB_TEST(max(abs(mat(dest) -mat(dest2))) < 1e-4);
DLIB_TEST(max(abs(mat(means) -mat(means2))) < 1e-4);
DLIB_TEST(max(abs(mat(invstds) -mat(invstds2))) < 1e-4);
DLIB_TEST(max(abs(mat(running_means) -mat(running_means2))) < 1e-4);
DLIB_TEST(max(abs(mat(running_invstds) -mat(running_invstds2))) < 1e-4);
// now check that the gradients match as well
......@@ -629,17 +641,15 @@ namespace
rnd.fill_uniform(gradient_input);
cpu::batch_normalize_gradient cpu_bng;
cpu_bng(gradient_input, means, invstds, src, gamma, src_grad, gamma_grad, beta_grad);
cuda::batch_normalize_gradient cuda_bng;
cuda_bng(gradient_input, means, invstds, src, gamma, src_grad2, gamma_grad2, beta_grad2);
cpu::batch_normalize_gradient(gradient_input, means, invstds, src, gamma, src_grad, gamma_grad, beta_grad);
cuda::batch_normalize_gradient(gradient_input, means, invstds, src, gamma, src_grad2, gamma_grad2, beta_grad2);
dlog << LINFO << "src_grad error: " << max(abs(mat(src_grad)-mat(src_grad2)));
dlog << LINFO << "gamma_grad error: " << max(abs(mat(gamma_grad)-mat(gamma_grad2)));
dlog << LINFO << "beta_grad error: " << max(abs(mat(beta_grad)-mat(beta_grad2)));
DLIB_TEST(max(abs(mat(src_grad)-mat(src_grad2))) < 1e-5);
DLIB_TEST(max(abs(mat(gamma_grad)-mat(gamma_grad2))) < 1e-5);
DLIB_TEST(max(abs(mat(beta_grad)-mat(beta_grad2))) < 1e-5);
DLIB_TEST(max(abs(mat(src_grad)-mat(src_grad2))) < 1e-4);
DLIB_TEST(max(abs(mat(gamma_grad)-mat(gamma_grad2))) < 1e-4);
DLIB_TEST(max(abs(mat(beta_grad)-mat(beta_grad2))) < 1e-4);
}
void compare_bn_conv_gpu_and_cpu()
......@@ -648,6 +658,8 @@ namespace
resizable_tensor dest, dest2;
resizable_tensor means, means2;
resizable_tensor invstds, invstds2;
resizable_tensor running_means, running_means2;
resizable_tensor running_invstds, running_invstds2;
resizable_tensor src(2,8,10,9);
resizable_tensor gamma(1,8);
resizable_tensor beta(1,8);
......@@ -656,17 +668,20 @@ namespace
tt::tensor_rand rnd;
rnd.fill_uniform(src);
cpu::batch_normalize_conv(dest,means,invstds, src, gamma, beta);
cuda::batch_normalize_conv(dest2,means2,invstds2, src, gamma, beta);
cpu::batch_normalize_conv(dest,means,invstds,1,running_means,running_invstds, src, gamma, beta);
cuda::batch_normalize_conv(dest2,means2,invstds2,1,running_means2,running_invstds2, src, gamma, beta);
dlog << LINFO << "dest error: "<< max(abs(mat(dest) -mat(dest2)));
dlog << LINFO << "means error: "<< max(abs(mat(means) -mat(means2)));
dlog << LINFO << "invstds error: "<< max(abs(mat(invstds) -mat(invstds2)));
dlog << LINFO << "running_means error: "<< max(abs(mat(running_means) -mat(running_means2)));
dlog << LINFO << "running_invstds error: "<< max(abs(mat(running_invstds) -mat(running_invstds2)));
DLIB_TEST(max(abs(mat(dest) -mat(dest2))) < 1e-4);
DLIB_TEST(max(abs(mat(means) -mat(means2))) < 1e-4);
DLIB_TEST(max(abs(mat(invstds) -mat(invstds2))) < 1e-4);
DLIB_TEST(max(abs(mat(running_means) -mat(running_means2))) < 1e-4);
DLIB_TEST(max(abs(mat(running_invstds) -mat(running_invstds2))) < 1e-4);
resizable_tensor gradient_input;
resizable_tensor src_grad, gamma_grad, beta_grad;
......@@ -678,10 +693,8 @@ namespace
rnd.fill_uniform(gradient_input);
cpu::batch_normalize_conv_gradient cpu_bng;
cpu_bng(gradient_input, means, invstds, src, gamma, src_grad, gamma_grad, beta_grad);
cuda::batch_normalize_conv_gradient cuda_bng;
cuda_bng(gradient_input, means, invstds, src, gamma, src_grad2, gamma_grad2, beta_grad2);
cpu::batch_normalize_conv_gradient(gradient_input, means, invstds, src, gamma, src_grad, gamma_grad, beta_grad);
cuda::batch_normalize_conv_gradient(gradient_input, means, invstds, src, gamma, src_grad2, gamma_grad2, beta_grad2);
dlog << LINFO << "src_grad error: " << max(abs(mat(src_grad)-mat(src_grad2)));
dlog << LINFO << "gamma_grad error: " << max(abs(mat(gamma_grad)-mat(gamma_grad2)));
......@@ -721,8 +734,8 @@ namespace
// make sure max_pool does what it's spec says it should.
DLIB_TEST( A.num_samples() == B.num_samples());
DLIB_TEST( A.k() == B.k());
DLIB_TEST( A.nr() == B.nr()/stride_y);
DLIB_TEST( A.nc() == B.nc()/stride_x);
DLIB_TEST( A.nr() == 1+(B.nr()-window_height%2)/stride_y);
DLIB_TEST( A.nc() == 1+(B.nc()-window_width%2)/stride_x);
for (long s = 0; s < A.num_samples(); ++s)
{
for (long k = 0; k < A.k(); ++k)
......@@ -732,10 +745,10 @@ namespace
for (long c = 0; c < A.nc(); ++c)
{
DLIB_TEST(image_plane(A,s,k)(r,c) == max(subm_clipped(image_plane(B,s,k),
centered_rect(c*stride_x,
r*stride_y,
c*stride_x,
window_height,
window_width)));
window_width,
window_height))));
}
}
}
......@@ -838,6 +851,7 @@ namespace
test_max_pool(1,1,2,3);
test_max_pool(3,3,1,1);
test_max_pool(3,3,2,2);
test_max_pool(2,2,2,2);
test_max_pool(4,5,3,1);
test_tanh();
test_softmax();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment