Commit 32125dea authored by Davis King's avatar Davis King

Optimized batch normalization code

parent 273a21cf
...@@ -87,7 +87,7 @@ namespace dlib ...@@ -87,7 +87,7 @@ namespace dlib
void batch_normalize ( void batch_normalize (
resizable_tensor& dest, resizable_tensor& dest,
resizable_tensor& means, resizable_tensor& means,
resizable_tensor& vars, resizable_tensor& invstds,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
const tensor& beta const tensor& beta
...@@ -115,12 +115,12 @@ namespace dlib ...@@ -115,12 +115,12 @@ namespace dlib
dest.copy_size(src); dest.copy_size(src);
means.set_size(1, src.k(), src.nr(), src.nc()); means.set_size(1, src.k(), src.nr(), src.nc());
vars.set_size(1, src.k(), src.nr(), src.nc()); invstds.set_size(1, src.k(), src.nr(), src.nc());
// first compute means and vars // first compute means and invstds
means = 0; means = 0;
vars = 0; invstds = 0;
const auto p_vars = vars.host(); const auto p_invstds = invstds.host();
const auto p_means = means.host(); const auto p_means = means.host();
auto p_src = src.host(); auto p_src = src.host();
const long num = src.k()*src.nr()*src.nc(); const long num = src.k()*src.nr()*src.nc();
...@@ -131,23 +131,23 @@ namespace dlib ...@@ -131,23 +131,23 @@ namespace dlib
{ {
float val = p_src[n*num+i]; float val = p_src[n*num+i];
p_means[i] += val; p_means[i] += val;
p_vars[i] += val*val; p_invstds[i] += val*val;
} }
} }
means /= src.num_samples(); means /= src.num_samples();
vars /= src.num_samples(); invstds /= src.num_samples();
// copy data back to host // copy data back to host
vars.host(); means.host(); invstds.host(); means.host();
const float eps = 0.00001;
p_src = src.host(); p_src = src.host();
// compute variances // compute variances
for (long i = 0; i < num; ++i) for (long i = 0; i < num; ++i)
{ {
p_vars[i] = p_vars[i] - p_means[i]*p_means[i]; auto actual_var = p_invstds[i] - p_means[i]*p_means[i];
p_invstds[i] = 1.0/std::sqrt(actual_var+eps);
} }
// TODO, must match eps in batch_normalize_gradient() so make this a shared variable.
const float eps = 0.00001;
p_src = src.host(); p_src = src.host();
auto p_dest = dest.host(); auto p_dest = dest.host();
const auto p_gamma = gamma.host(); const auto p_gamma = gamma.host();
...@@ -156,7 +156,7 @@ namespace dlib ...@@ -156,7 +156,7 @@ namespace dlib
{ {
for (long i = 0; i < num; ++i) for (long i = 0; i < num; ++i)
{ {
*p_dest = (*p_src - p_means[i])/std::sqrt(p_vars[i] + eps); *p_dest = (*p_src - p_means[i])*p_invstds[i];
*p_dest = (*p_dest)*p_gamma[i] + p_beta[i]; *p_dest = (*p_dest)*p_gamma[i] + p_beta[i];
++p_src; ++p_src;
++p_dest; ++p_dest;
...@@ -167,7 +167,7 @@ namespace dlib ...@@ -167,7 +167,7 @@ namespace dlib
void batch_normalize_gradient ( void batch_normalize_gradient (
const tensor& gradient_input, const tensor& gradient_input,
const tensor& means, const tensor& means,
const tensor& vars, const tensor& invstds,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
tensor& src_grad, tensor& src_grad,
...@@ -175,11 +175,10 @@ namespace dlib ...@@ -175,11 +175,10 @@ namespace dlib
tensor& beta_grad tensor& beta_grad
) )
{ {
const float eps = 0.00001;
const long num = src.k()*src.nr()*src.nc(); const long num = src.k()*src.nr()*src.nc();
DLIB_CASSERT(num == means.size(),""); DLIB_CASSERT(num == means.size(),"");
DLIB_CASSERT(num == vars.size(),""); DLIB_CASSERT(num == invstds.size(),"");
DLIB_CASSERT(num == gamma.size(),""); DLIB_CASSERT(num == gamma.size(),"");
DLIB_CASSERT(num == gamma_grad.size(),""); DLIB_CASSERT(num == gamma_grad.size(),"");
DLIB_CASSERT(num == beta_grad.size(),""); DLIB_CASSERT(num == beta_grad.size(),"");
...@@ -190,11 +189,11 @@ namespace dlib ...@@ -190,11 +189,11 @@ namespace dlib
const auto p_gamma = gamma.host(); const auto p_gamma = gamma.host();
const auto p_gamma_grad = gamma_grad.host(); const auto p_gamma_grad = gamma_grad.host();
const auto p_beta_grad = beta_grad.host(); const auto p_beta_grad = beta_grad.host();
const auto p_vars = vars.host(); const auto p_invstds = invstds.host();
const auto p_means = means.host(); const auto p_means = means.host();
resizable_tensor dvars, dmeans; resizable_tensor dvars, dmeans;
dvars.copy_size(vars); dvars.copy_size(invstds);
dmeans.copy_size(means); dmeans.copy_size(means);
dvars = 0; dvars = 0;
dmeans = 0; dmeans = 0;
...@@ -205,13 +204,13 @@ namespace dlib ...@@ -205,13 +204,13 @@ namespace dlib
{ {
for (long i = 0; i < num; ++i) for (long i = 0; i < num; ++i)
{ {
const float x_hat = (*p_src - p_means[i])/std::sqrt(p_vars[i] + eps); const float x_hat = (*p_src - p_means[i])*p_invstds[i];
p_beta_grad[i] += *p_grad; p_beta_grad[i] += *p_grad;
p_gamma_grad[i] += (*p_grad)*x_hat; p_gamma_grad[i] += (*p_grad)*x_hat;
const float dx = *p_grad * p_gamma[i]; const float dx = *p_grad * p_gamma[i];
p_dvars[i] += dx*(*p_src - p_means[i])* -0.5*std::pow(p_vars[i]+eps, -3.0f/2); p_dvars[i] += dx*(*p_src - p_means[i])* -0.5*std::pow(p_invstds[i], 3.0f);
++p_grad; ++p_grad;
++p_src; ++p_src;
...@@ -226,7 +225,7 @@ namespace dlib ...@@ -226,7 +225,7 @@ namespace dlib
{ {
const float dx = *p_grad * p_gamma[i]; const float dx = *p_grad * p_gamma[i];
p_dmeans[i] += dx*-1/std::sqrt(p_vars[i] + eps) + p_dvars[i] * -2*(*p_src - p_means[i])/src.num_samples(); p_dmeans[i] += dx*-p_invstds[i] + p_dvars[i] * -2*(*p_src - p_means[i])/src.num_samples();
++p_grad; ++p_grad;
++p_src; ++p_src;
...@@ -241,7 +240,7 @@ namespace dlib ...@@ -241,7 +240,7 @@ namespace dlib
{ {
const float dx = *p_grad * p_gamma[i]; const float dx = *p_grad * p_gamma[i];
*p_src_grad += dx/std::sqrt(p_vars[i] + eps) + *p_src_grad += dx*p_invstds[i] +
p_dvars[i] *2*(*p_src - p_means[i])/src.num_samples() + p_dvars[i] *2*(*p_src - p_means[i])/src.num_samples() +
p_dmeans[i]/src.num_samples(); p_dmeans[i]/src.num_samples();
...@@ -258,7 +257,7 @@ namespace dlib ...@@ -258,7 +257,7 @@ namespace dlib
void batch_normalize_conv ( void batch_normalize_conv (
resizable_tensor& dest, resizable_tensor& dest,
resizable_tensor& means, resizable_tensor& means,
resizable_tensor& vars, resizable_tensor& invstds,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
const tensor& beta const tensor& beta
...@@ -288,12 +287,12 @@ namespace dlib ...@@ -288,12 +287,12 @@ namespace dlib
dest.copy_size(src); dest.copy_size(src);
means.set_size(1, src.k()); means.set_size(1, src.k());
vars.set_size(1, src.k()); invstds.set_size(1, src.k());
// first compute means and vars // first compute means and invstds
means = 0; means = 0;
vars = 0; invstds = 0;
const auto p_vars = vars.host(); const auto p_invstds = invstds.host();
const auto p_means = means.host(); const auto p_means = means.host();
const auto p_gamma = gamma.host(); const auto p_gamma = gamma.host();
const auto p_beta = beta.host(); const auto p_beta = beta.host();
...@@ -307,25 +306,25 @@ namespace dlib ...@@ -307,25 +306,25 @@ namespace dlib
for (long i = 0; i < num; ++i) for (long i = 0; i < num; ++i)
{ {
p_means[k] += *p_src; p_means[k] += *p_src;
p_vars[k] += (*p_src)*(*p_src); p_invstds[k] += (*p_src)*(*p_src);
++p_src; ++p_src;
} }
} }
} }
means /= src.num_samples()*num; means /= src.num_samples()*num;
vars /= src.num_samples()*num; invstds /= src.num_samples()*num;
// copy data back to host // copy data back to host
vars.host(); means.host(); invstds.host(); means.host();
const float eps = 0.00001;
p_src = src.host(); p_src = src.host();
// compute variances // compute variances
for (long k = 0; k < src.k(); ++k) for (long k = 0; k < src.k(); ++k)
{ {
p_vars[k] = p_vars[k] - p_means[k]*p_means[k]; auto actual_var = p_invstds[k] - p_means[k]*p_means[k];
p_invstds[k] = 1.0/std::sqrt(actual_var + eps);
} }
// TODO, must match eps in batch_normalize_gradient() so make this a shared variable.
const float eps = 0.00001;
p_src = src.host(); p_src = src.host();
auto p_dest = dest.host(); auto p_dest = dest.host();
for (long n = 0; n < src.num_samples(); ++n) for (long n = 0; n < src.num_samples(); ++n)
...@@ -334,7 +333,7 @@ namespace dlib ...@@ -334,7 +333,7 @@ namespace dlib
{ {
for (long i = 0; i < num; ++i) for (long i = 0; i < num; ++i)
{ {
*p_dest = (*p_src - p_means[k])/std::sqrt(p_vars[k] + eps); *p_dest = (*p_src - p_means[k])*p_invstds[k];
*p_dest = (*p_dest)*p_gamma[k] + p_beta[k]; *p_dest = (*p_dest)*p_gamma[k] + p_beta[k];
++p_src; ++p_src;
++p_dest; ++p_dest;
...@@ -346,7 +345,7 @@ namespace dlib ...@@ -346,7 +345,7 @@ namespace dlib
void batch_normalize_conv_gradient ( void batch_normalize_conv_gradient (
const tensor& gradient_input, const tensor& gradient_input,
const tensor& means, const tensor& means,
const tensor& vars, const tensor& invstds,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
tensor& src_grad, tensor& src_grad,
...@@ -354,11 +353,10 @@ namespace dlib ...@@ -354,11 +353,10 @@ namespace dlib
tensor& beta_grad tensor& beta_grad
) )
{ {
const float eps = 0.00001;
const long num = src.nr()*src.nc(); const long num = src.nr()*src.nc();
DLIB_CASSERT(src.k() == means.size(),""); DLIB_CASSERT(src.k() == means.size(),"");
DLIB_CASSERT(src.k() == vars.size(),""); DLIB_CASSERT(src.k() == invstds.size(),"");
DLIB_CASSERT(src.k() == gamma.size(),""); DLIB_CASSERT(src.k() == gamma.size(),"");
DLIB_CASSERT(src.k() == gamma_grad.size(),""); DLIB_CASSERT(src.k() == gamma_grad.size(),"");
DLIB_CASSERT(src.k() == beta_grad.size(),""); DLIB_CASSERT(src.k() == beta_grad.size(),"");
...@@ -369,11 +367,11 @@ namespace dlib ...@@ -369,11 +367,11 @@ namespace dlib
const auto p_gamma = gamma.host(); const auto p_gamma = gamma.host();
const auto p_gamma_grad = gamma_grad.host(); const auto p_gamma_grad = gamma_grad.host();
const auto p_beta_grad = beta_grad.host(); const auto p_beta_grad = beta_grad.host();
const auto p_vars = vars.host(); const auto p_invstds = invstds.host();
const auto p_means = means.host(); const auto p_means = means.host();
resizable_tensor dvars, dmeans; resizable_tensor dvars, dmeans;
dvars.copy_size(vars); dvars.copy_size(invstds);
dmeans.copy_size(means); dmeans.copy_size(means);
dvars = 0; dvars = 0;
dmeans = 0; dmeans = 0;
...@@ -386,13 +384,13 @@ namespace dlib ...@@ -386,13 +384,13 @@ namespace dlib
{ {
for (long i = 0; i < num; ++i) for (long i = 0; i < num; ++i)
{ {
const float x_hat = (*p_src - p_means[k])/std::sqrt(p_vars[k] + eps); const float x_hat = (*p_src - p_means[k])*p_invstds[k];
p_beta_grad[k] += *p_grad; p_beta_grad[k] += *p_grad;
p_gamma_grad[k] += (*p_grad)*x_hat; p_gamma_grad[k] += (*p_grad)*x_hat;
const float dx = *p_grad * p_gamma[k]; const float dx = *p_grad * p_gamma[k];
p_dvars[k] += dx*(*p_src - p_means[k])* -0.5*std::pow(p_vars[k]+eps, -3.0f/2); p_dvars[k] += dx*(*p_src - p_means[k])* -0.5*std::pow(p_invstds[k], 3.0f);
++p_grad; ++p_grad;
++p_src; ++p_src;
...@@ -410,7 +408,7 @@ namespace dlib ...@@ -410,7 +408,7 @@ namespace dlib
{ {
const float dx = *p_grad * p_gamma[k]; const float dx = *p_grad * p_gamma[k];
p_dmeans[k] += dx*-1/std::sqrt(p_vars[k] + eps) + p_dvars[k] * -2*(*p_src - p_means[k])/src.num_samples()/num; p_dmeans[k] += -dx*p_invstds[k] + p_dvars[k] * -2*(*p_src - p_means[k])/src.num_samples()/num;
++p_grad; ++p_grad;
++p_src; ++p_src;
...@@ -428,7 +426,7 @@ namespace dlib ...@@ -428,7 +426,7 @@ namespace dlib
{ {
const float dx = *p_grad * p_gamma[k]; const float dx = *p_grad * p_gamma[k];
*p_src_grad += dx/std::sqrt(p_vars[k] + eps) + *p_src_grad += dx*p_invstds[k] +
p_dvars[k] *2*(*p_src - p_means[k])/src.num_samples()/num + p_dvars[k] *2*(*p_src - p_means[k])/src.num_samples()/num +
p_dmeans[k]/src.num_samples()/num; p_dmeans[k]/src.num_samples()/num;
......
...@@ -43,7 +43,7 @@ namespace dlib ...@@ -43,7 +43,7 @@ namespace dlib
void batch_normalize ( void batch_normalize (
resizable_tensor& dest, resizable_tensor& dest,
resizable_tensor& means, resizable_tensor& means,
resizable_tensor& vars, resizable_tensor& invstds,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
const tensor& beta const tensor& beta
...@@ -52,7 +52,7 @@ namespace dlib ...@@ -52,7 +52,7 @@ namespace dlib
void batch_normalize_gradient ( void batch_normalize_gradient (
const tensor& gradient_input, const tensor& gradient_input,
const tensor& means, const tensor& means,
const tensor& vars, const tensor& invstds,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
tensor& src_grad, tensor& src_grad,
...@@ -63,7 +63,7 @@ namespace dlib ...@@ -63,7 +63,7 @@ namespace dlib
void batch_normalize_conv ( void batch_normalize_conv (
resizable_tensor& dest, resizable_tensor& dest,
resizable_tensor& means, resizable_tensor& means,
resizable_tensor& vars, resizable_tensor& invstds,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
const tensor& beta const tensor& beta
...@@ -72,7 +72,7 @@ namespace dlib ...@@ -72,7 +72,7 @@ namespace dlib
void batch_normalize_conv_gradient ( void batch_normalize_conv_gradient (
const tensor& gradient_input, const tensor& gradient_input,
const tensor& means, const tensor& means,
const tensor& vars, const tensor& invstds,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
tensor& src_grad, tensor& src_grad,
......
...@@ -74,7 +74,7 @@ namespace dlib ...@@ -74,7 +74,7 @@ namespace dlib
void batch_normalize ( void batch_normalize (
resizable_tensor& dest, resizable_tensor& dest,
resizable_tensor& means, resizable_tensor& means,
resizable_tensor& vars, resizable_tensor& invstds,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
const tensor& beta const tensor& beta
...@@ -90,19 +90,19 @@ namespace dlib ...@@ -90,19 +90,19 @@ namespace dlib
ensures ensures
- have_same_dimensions(#dest, src) == true - have_same_dimensions(#dest, src) == true
- #means.num_samples() == 1 - #means.num_samples() == 1
- #vars.num_samples() == 1 - #invstds.num_samples() == 1
- means.nr() == vars.nr() == src.nr() - means.nr() == invstds.nr() == src.nr()
- means.nc() == vars.nc() == src.nc() - means.nc() == invstds.nc() == src.nc()
- means.k() == vars.k() == src.k() - means.k() == invstds.k() == src.k()
- #src == the batch normalized version of src. - #src == the batch normalized version of src.
- #means == the mean values of the contents of src. - #means == the mean values of the contents of src.
- #vars == the variance values of the contents of src. - #invstds == 1/(the standard deviation values of the contents of src).
!*/ !*/
void batch_normalize_gradient ( void batch_normalize_gradient (
const tensor& gradient_input, const tensor& gradient_input,
const tensor& means, const tensor& means,
const tensor& vars, const tensor& invstds,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
tensor& src_grad, tensor& src_grad,
...@@ -111,8 +111,8 @@ namespace dlib ...@@ -111,8 +111,8 @@ namespace dlib
); );
/*! /*!
requires requires
- vars and means should be the output of a call to - invstds and means should be the output of a call to
batch_normalize(dest,means,vars,src,gamma,beta) batch_normalize(dest,means,invstds,src,gamma,beta)
- have_same_dimensions(gradient_input, src) == true - have_same_dimensions(gradient_input, src) == true
- have_same_dimensions(src, src_grad) == true - have_same_dimensions(src, src_grad) == true
- src.num_samples() > 1 - src.num_samples() > 1
...@@ -123,10 +123,10 @@ namespace dlib ...@@ -123,10 +123,10 @@ namespace dlib
- gamma.nc() == src.nc() - gamma.nc() == src.nc()
- gamma.k() == src.k() - gamma.k() == src.k()
- have_same_dimensions(means, gamma) == true - have_same_dimensions(means, gamma) == true
- have_same_dimensions(vars, gamma) == true - have_same_dimensions(invstds, gamma) == true
ensures ensures
- Let f(src,gamma,beta) == dot(gradient_input, dest output of - Let f(src,gamma,beta) == dot(gradient_input, dest output of
batch_normalize(dest,means,vars,src,gamma,beta)) batch_normalize(dest,means,invstds,src,gamma,beta))
- Adds the gradient of f() with respect to src to #src_grad. - Adds the gradient of f() with respect to src to #src_grad.
- Adds the gradient of f() with respect to gamma to #gamma_grad. - Adds the gradient of f() with respect to gamma to #gamma_grad.
- Adds the gradient of f() with respect to beta to #beta_grad. - Adds the gradient of f() with respect to beta to #beta_grad.
...@@ -135,7 +135,7 @@ namespace dlib ...@@ -135,7 +135,7 @@ namespace dlib
void batch_normalize_conv ( void batch_normalize_conv (
resizable_tensor& dest, resizable_tensor& dest,
resizable_tensor& means, resizable_tensor& means,
resizable_tensor& vars, resizable_tensor& invstds,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
const tensor& beta const tensor& beta
...@@ -149,17 +149,17 @@ namespace dlib ...@@ -149,17 +149,17 @@ namespace dlib
ensures ensures
- have_same_dimensions(#dest, src) == true - have_same_dimensions(#dest, src) == true
- #means.num_samples()==means.nr()==means.nc() == 1 - #means.num_samples()==means.nr()==means.nc() == 1
- #vars.num_samples() ==vars.nr() ==vars.nc() == 1 - #invstds.num_samples() ==invstds.nr() ==invstds.nc() == 1
- means.k() == vars.k() == src.k() - means.k() == invstds.k() == src.k()
- #src == the batch normalized version of src. - #src == the batch normalized version of src.
- #means == the mean values of the contents of src. - #means == the mean values of the contents of src.
- #vars == the variance values of the contents of src. - #invstds == 1/(the standard deviation values of the contents of src).
!*/ !*/
void batch_normalize_conv_gradient ( void batch_normalize_conv_gradient (
const tensor& gradient_input, const tensor& gradient_input,
const tensor& means, const tensor& means,
const tensor& vars, const tensor& invstds,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
tensor& src_grad, tensor& src_grad,
...@@ -168,8 +168,8 @@ namespace dlib ...@@ -168,8 +168,8 @@ namespace dlib
); );
/*! /*!
requires requires
- vars and means should be the output of a call to - invstds and means should be the output of a call to
batch_normalize_conv(dest,means,vars,src,gamma,beta) batch_normalize_conv(dest,means,invstds,src,gamma,beta)
- have_same_dimensions(gradient_input, src) == true - have_same_dimensions(gradient_input, src) == true
- have_same_dimensions(src, src_grad) == true - have_same_dimensions(src, src_grad) == true
- src.num_samples() > 1 - src.num_samples() > 1
...@@ -178,10 +178,10 @@ namespace dlib ...@@ -178,10 +178,10 @@ namespace dlib
- have_same_dimensions(gamma, beta_grad) == true - have_same_dimensions(gamma, beta_grad) == true
- gamma.k() == src.k() - gamma.k() == src.k()
- have_same_dimensions(means, gamma) == true - have_same_dimensions(means, gamma) == true
- have_same_dimensions(vars, gamma) == true - have_same_dimensions(invstds, gamma) == true
ensures ensures
- Let f(src,gamma,beta) == dot(gradient_input, dest output of - Let f(src,gamma,beta) == dot(gradient_input, dest output of
batch_normalize_conv(dest,means,vars,src,gamma,beta)) batch_normalize_conv(dest,means,invstds,src,gamma,beta))
- Adds the gradient of f() with respect to src to #src_grad. - Adds the gradient of f() with respect to src to #src_grad.
- Adds the gradient of f() with respect to gamma to #gamma_grad. - Adds the gradient of f() with respect to gamma to #gamma_grad.
- Adds the gradient of f() with respect to beta to #beta_grad. - Adds the gradient of f() with respect to beta to #beta_grad.
......
...@@ -150,7 +150,7 @@ namespace dlib { namespace tt ...@@ -150,7 +150,7 @@ namespace dlib { namespace tt
void batch_normalize ( void batch_normalize (
resizable_tensor& dest, resizable_tensor& dest,
resizable_tensor& means, resizable_tensor& means,
resizable_tensor& vars, resizable_tensor& invstds,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
const tensor& beta const tensor& beta
...@@ -166,13 +166,13 @@ namespace dlib { namespace tt ...@@ -166,13 +166,13 @@ namespace dlib { namespace tt
ensures ensures
- have_same_dimensions(#dest, src) == true - have_same_dimensions(#dest, src) == true
- #means.num_samples() == 1 - #means.num_samples() == 1
- #vars.num_samples() == 1 - #invstds.num_samples() == 1
- means.nr() == vars.nr() == src.nr() - means.nr() == invstds.nr() == src.nr()
- means.nc() == vars.nc() == src.nc() - means.nc() == invstds.nc() == src.nc()
- means.k() == vars.k() == src.k() - means.k() == invstds.k() == src.k()
- #src == the batch normalized version of src. - #src == the batch normalized version of src.
- #means == the mean values of the contents of src. - #means == the mean values of the contents of src.
- #vars == the variance values of the contents of src. - #invstds == 1/(the standard deviation values of the contents of src).
!*/ !*/
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
...@@ -180,7 +180,7 @@ namespace dlib { namespace tt ...@@ -180,7 +180,7 @@ namespace dlib { namespace tt
void batch_normalize_gradient ( void batch_normalize_gradient (
const tensor& gradient_input, const tensor& gradient_input,
const tensor& means, const tensor& means,
const tensor& vars, const tensor& invstds,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
tensor& src_grad, tensor& src_grad,
...@@ -189,8 +189,8 @@ namespace dlib { namespace tt ...@@ -189,8 +189,8 @@ namespace dlib { namespace tt
); );
/*! /*!
requires requires
- vars and means should be the output of a call to - invstds and means should be the output of a call to
batch_normalize(dest,means,vars,src,gamma,beta) batch_normalize(dest,means,invstds,src,gamma,beta)
- have_same_dimensions(gradient_input, src) == true - have_same_dimensions(gradient_input, src) == true
- have_same_dimensions(src, src_grad) == true - have_same_dimensions(src, src_grad) == true
- src.num_samples() > 1 - src.num_samples() > 1
...@@ -201,10 +201,10 @@ namespace dlib { namespace tt ...@@ -201,10 +201,10 @@ namespace dlib { namespace tt
- gamma.nc() == src.nc() - gamma.nc() == src.nc()
- gamma.k() == src.k() - gamma.k() == src.k()
- have_same_dimensions(means, gamma) == true - have_same_dimensions(means, gamma) == true
- have_same_dimensions(vars, gamma) == true - have_same_dimensions(invstds, gamma) == true
ensures ensures
- Let f(src,gamma,beta) == dot(gradient_input, dest output of - Let f(src,gamma,beta) == dot(gradient_input, dest output of
batch_normalize(dest,means,vars,src,gamma,beta)) batch_normalize(dest,means,invstds,src,gamma,beta))
- Adds the gradient of f() with respect to src to #src_grad. - Adds the gradient of f() with respect to src to #src_grad.
- Adds the gradient of f() with respect to gamma to #gamma_grad. - Adds the gradient of f() with respect to gamma to #gamma_grad.
- Adds the gradient of f() with respect to beta to #beta_grad. - Adds the gradient of f() with respect to beta to #beta_grad.
...@@ -213,7 +213,7 @@ namespace dlib { namespace tt ...@@ -213,7 +213,7 @@ namespace dlib { namespace tt
void batch_normalize_conv ( void batch_normalize_conv (
resizable_tensor& dest, resizable_tensor& dest,
resizable_tensor& means, resizable_tensor& means,
resizable_tensor& vars, resizable_tensor& invstds,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
const tensor& beta const tensor& beta
...@@ -227,17 +227,17 @@ namespace dlib { namespace tt ...@@ -227,17 +227,17 @@ namespace dlib { namespace tt
ensures ensures
- have_same_dimensions(#dest, src) == true - have_same_dimensions(#dest, src) == true
- #means.num_samples()==means.nr()==means.nc() == 1 - #means.num_samples()==means.nr()==means.nc() == 1
- #vars.num_samples() ==vars.nr() ==vars.nc() == 1 - #invstds.num_samples() ==invstds.nr() ==invstds.nc() == 1
- means.k() == vars.k() == src.k() - means.k() == invstds.k() == src.k()
- #src == the batch normalized version of src. - #src == the batch normalized version of src.
- #means == the mean values of the contents of src. - #means == the mean values of the contents of src.
- #vars == the variance values of the contents of src. - #invstds == 1/(the standard deviation values of the contents of src).
!*/ !*/
void batch_normalize_conv_gradient ( void batch_normalize_conv_gradient (
const tensor& gradient_input, const tensor& gradient_input,
const tensor& means, const tensor& means,
const tensor& vars, const tensor& invstds,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
tensor& src_grad, tensor& src_grad,
...@@ -246,8 +246,8 @@ namespace dlib { namespace tt ...@@ -246,8 +246,8 @@ namespace dlib { namespace tt
); );
/*! /*!
requires requires
- vars and means should be the output of a call to - invstds and means should be the output of a call to
batch_normalize_conv(dest,means,vars,src,gamma,beta) batch_normalize_conv(dest,means,invstds,src,gamma,beta)
- have_same_dimensions(gradient_input, src) == true - have_same_dimensions(gradient_input, src) == true
- have_same_dimensions(src, src_grad) == true - have_same_dimensions(src, src_grad) == true
- src.num_samples() > 1 - src.num_samples() > 1
...@@ -256,10 +256,10 @@ namespace dlib { namespace tt ...@@ -256,10 +256,10 @@ namespace dlib { namespace tt
- have_same_dimensions(gamma, beta_grad) == true - have_same_dimensions(gamma, beta_grad) == true
- gamma.k() == src.k() - gamma.k() == src.k()
- have_same_dimensions(means, gamma) == true - have_same_dimensions(means, gamma) == true
- have_same_dimensions(vars, gamma) == true - have_same_dimensions(invstds, gamma) == true
ensures ensures
- Let f(src,gamma,beta) == dot(gradient_input, dest output of - Let f(src,gamma,beta) == dot(gradient_input, dest output of
batch_normalize_conv(dest,means,vars,src,gamma,beta)) batch_normalize_conv(dest,means,invstds,src,gamma,beta))
- Adds the gradient of f() with respect to src to #src_grad. - Adds the gradient of f() with respect to src to #src_grad.
- Adds the gradient of f() with respect to gamma to #gamma_grad. - Adds the gradient of f() with respect to gamma to #gamma_grad.
- Adds the gradient of f() with respect to beta to #beta_grad. - Adds the gradient of f() with respect to beta to #beta_grad.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment