Commit 6acddf99 authored by Davis King's avatar Davis King

Just renamed variables to reflect the new meaning of the batch normalization

running variance output.
parent 538de238
...@@ -466,7 +466,7 @@ namespace dlib ...@@ -466,7 +466,7 @@ namespace dlib
const tensor& gamma, const tensor& gamma,
const tensor& beta, const tensor& beta,
const tensor& running_means, const tensor& running_means,
const tensor& running_invstds const tensor& running_variances
) )
{ {
DLIB_CASSERT( DLIB_CASSERT(
...@@ -476,7 +476,7 @@ namespace dlib ...@@ -476,7 +476,7 @@ namespace dlib
gamma.k() == src.k() && gamma.k() == src.k() &&
have_same_dimensions(gamma, beta) && have_same_dimensions(gamma, beta) &&
have_same_dimensions(gamma, running_means) && have_same_dimensions(gamma, running_means) &&
have_same_dimensions(gamma, running_invstds), have_same_dimensions(gamma, running_variances),
"\ngamma.num_samples(): " << gamma.num_samples() << "\ngamma.num_samples(): " << gamma.num_samples() <<
"\ngamma.k(): " << gamma.k() << "\ngamma.k(): " << gamma.k() <<
"\ngamma.nr(): " << gamma.nr() << "\ngamma.nr(): " << gamma.nr() <<
...@@ -489,10 +489,10 @@ namespace dlib ...@@ -489,10 +489,10 @@ namespace dlib
"\nrunning_means.k(): " << running_means.k() << "\nrunning_means.k(): " << running_means.k() <<
"\nrunning_means.nr(): " << running_means.nr() << "\nrunning_means.nr(): " << running_means.nr() <<
"\nrunning_means.nc(): " << running_means.nc() << "\nrunning_means.nc(): " << running_means.nc() <<
"\nrunning_invstds.num_samples(): " << running_invstds.num_samples() << "\nrunning_variances.num_samples(): " << running_variances.num_samples() <<
"\nrunning_invstds.k(): " << running_invstds.k() << "\nrunning_variances.k(): " << running_variances.k() <<
"\nrunning_invstds.nr(): " << running_invstds.nr() << "\nrunning_variances.nr(): " << running_variances.nr() <<
"\nrunning_invstds.nc(): " << running_invstds.nc() << "\nrunning_variances.nc(): " << running_variances.nc() <<
"\nsrc.k(): " << src.k() << "\nsrc.k(): " << src.k() <<
"\nsrc.nr(): " << src.nr() << "\nsrc.nr(): " << src.nr() <<
"\nsrc.nc(): " << src.nc() "\nsrc.nc(): " << src.nc()
...@@ -504,14 +504,14 @@ namespace dlib ...@@ -504,14 +504,14 @@ namespace dlib
auto g = gamma.host(); auto g = gamma.host();
auto b = beta.host(); auto b = beta.host();
auto m = running_means.host(); auto m = running_means.host();
auto i = running_invstds.host(); auto v = running_variances.host();
const long num = src.k()*src.nr()*src.nc(); const long num = src.k()*src.nr()*src.nc();
for (long n = 0; n < src.num_samples(); ++n) for (long n = 0; n < src.num_samples(); ++n)
{ {
for (long k = 0; k < num; ++k) for (long k = 0; k < num; ++k)
{ {
*d = g[k]*(*s - m[k])/std::sqrt(i[k]+dlib::tt::BATCH_NORM_EPS) + b[k]; *d = g[k]*(*s - m[k])/std::sqrt(v[k]+dlib::tt::BATCH_NORM_EPS) + b[k];
++d; ++d;
++s; ++s;
} }
...@@ -524,7 +524,7 @@ namespace dlib ...@@ -524,7 +524,7 @@ namespace dlib
resizable_tensor& invstds, resizable_tensor& invstds,
const double averaging_factor, const double averaging_factor,
resizable_tensor& running_means, resizable_tensor& running_means,
resizable_tensor& running_invstds, resizable_tensor& running_variances,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
const tensor& beta const tensor& beta
...@@ -532,7 +532,7 @@ namespace dlib ...@@ -532,7 +532,7 @@ namespace dlib
{ {
DLIB_CASSERT(0 <= averaging_factor && averaging_factor <= 1, "averaging_factor: " << averaging_factor); DLIB_CASSERT(0 <= averaging_factor && averaging_factor <= 1, "averaging_factor: " << averaging_factor);
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_means,means),""); DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_means,means),"");
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_invstds,invstds),""); DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_variances,invstds),"");
DLIB_CASSERT( DLIB_CASSERT(
src.num_samples() > 1 && src.num_samples() > 1 &&
gamma.num_samples() == 1 && gamma.num_samples() == 1 &&
...@@ -580,8 +580,9 @@ namespace dlib ...@@ -580,8 +580,9 @@ namespace dlib
invstds.host(); means.host(); invstds.host(); means.host();
// compute variances // compute variances
running_invstds.copy_size(invstds); running_variances.copy_size(invstds);
auto rvar = running_invstds.host(); auto rvar = running_variances.host();
// This scale makes the running variances unbiased.
const double scale = (src.num_samples())/(src.num_samples()-1.0); const double scale = (src.num_samples())/(src.num_samples()-1.0);
for (long i = 0; i < num; ++i) for (long i = 0; i < num; ++i)
{ {
...@@ -718,7 +719,7 @@ namespace dlib ...@@ -718,7 +719,7 @@ namespace dlib
const tensor& gamma, const tensor& gamma,
const tensor& beta, const tensor& beta,
const tensor& running_means, const tensor& running_means,
const tensor& running_invstds const tensor& running_variances
) )
{ {
DLIB_CASSERT( DLIB_CASSERT(
...@@ -728,7 +729,7 @@ namespace dlib ...@@ -728,7 +729,7 @@ namespace dlib
gamma.k() == src.k() && gamma.k() == src.k() &&
have_same_dimensions(gamma, beta) && have_same_dimensions(gamma, beta) &&
have_same_dimensions(gamma, running_means) && have_same_dimensions(gamma, running_means) &&
have_same_dimensions(gamma, running_invstds), have_same_dimensions(gamma, running_variances),
"\ngamma.num_samples(): " << gamma.num_samples() << "\ngamma.num_samples(): " << gamma.num_samples() <<
"\ngamma.k(): " << gamma.k() << "\ngamma.k(): " << gamma.k() <<
"\ngamma.nr(): " << gamma.nr() << "\ngamma.nr(): " << gamma.nr() <<
...@@ -741,10 +742,10 @@ namespace dlib ...@@ -741,10 +742,10 @@ namespace dlib
"\nrunning_means.k(): " << running_means.k() << "\nrunning_means.k(): " << running_means.k() <<
"\nrunning_means.nr(): " << running_means.nr() << "\nrunning_means.nr(): " << running_means.nr() <<
"\nrunning_means.nc(): " << running_means.nc() << "\nrunning_means.nc(): " << running_means.nc() <<
"\nrunning_invstds.num_samples(): " << running_invstds.num_samples() << "\nrunning_variances.num_samples(): " << running_variances.num_samples() <<
"\nrunning_invstds.k(): " << running_invstds.k() << "\nrunning_variances.k(): " << running_variances.k() <<
"\nrunning_invstds.nr(): " << running_invstds.nr() << "\nrunning_variances.nr(): " << running_variances.nr() <<
"\nrunning_invstds.nc(): " << running_invstds.nc() << "\nrunning_variances.nc(): " << running_variances.nc() <<
"\nsrc.k(): " << src.k() << "\nsrc.k(): " << src.k() <<
"\nsrc.nr(): " << src.nr() << "\nsrc.nr(): " << src.nr() <<
"\nsrc.nc(): " << src.nc() "\nsrc.nc(): " << src.nc()
...@@ -756,14 +757,14 @@ namespace dlib ...@@ -756,14 +757,14 @@ namespace dlib
auto g = gamma.host(); auto g = gamma.host();
auto b = beta.host(); auto b = beta.host();
auto m = running_means.host(); auto m = running_means.host();
auto i = running_invstds.host(); auto v = running_variances.host();
const long num = src.nr()*src.nc(); const long num = src.nr()*src.nc();
for (long n = 0; n < src.num_samples(); ++n) for (long n = 0; n < src.num_samples(); ++n)
{ {
for (long k = 0; k < src.k(); ++k) for (long k = 0; k < src.k(); ++k)
{ {
const float invstd = 1.0f/std::sqrt(i[k] + dlib::tt::BATCH_NORM_EPS); const float invstd = 1.0f/std::sqrt(v[k] + dlib::tt::BATCH_NORM_EPS);
for (long j = 0; j < num; ++j) for (long j = 0; j < num; ++j)
{ {
*d = g[k]*(*s - m[k])*invstd + b[k]; *d = g[k]*(*s - m[k])*invstd + b[k];
...@@ -780,7 +781,7 @@ namespace dlib ...@@ -780,7 +781,7 @@ namespace dlib
resizable_tensor& invstds, resizable_tensor& invstds,
const double averaging_factor, const double averaging_factor,
resizable_tensor& running_means, resizable_tensor& running_means,
resizable_tensor& running_invstds, resizable_tensor& running_variances,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
const tensor& beta const tensor& beta
...@@ -788,7 +789,7 @@ namespace dlib ...@@ -788,7 +789,7 @@ namespace dlib
{ {
DLIB_CASSERT(0 <= averaging_factor && averaging_factor <= 1, "averaging_factor: " << averaging_factor); DLIB_CASSERT(0 <= averaging_factor && averaging_factor <= 1, "averaging_factor: " << averaging_factor);
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_means,means),""); DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_means,means),"");
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_invstds,invstds),""); DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_variances,invstds),"");
DLIB_CASSERT( DLIB_CASSERT(
src.num_samples() > 1 && src.num_samples() > 1 &&
gamma.num_samples() == 1 && gamma.num_samples() == 1 &&
...@@ -844,8 +845,9 @@ namespace dlib ...@@ -844,8 +845,9 @@ namespace dlib
p_src = src.host(); p_src = src.host();
// compute variances // compute variances
running_invstds.copy_size(invstds); running_variances.copy_size(invstds);
auto rvar = running_invstds.host(); auto rvar = running_variances.host();
// This scale makes the running variances unbiased.
const double scale = (src.num_samples()*num)/(src.num_samples()*num-1.0); const double scale = (src.num_samples()*num)/(src.num_samples()*num-1.0);
for (long k = 0; k < src.k(); ++k) for (long k = 0; k < src.k(); ++k)
{ {
......
...@@ -120,7 +120,7 @@ namespace dlib ...@@ -120,7 +120,7 @@ namespace dlib
const tensor& gamma, const tensor& gamma,
const tensor& beta, const tensor& beta,
const tensor& running_means, const tensor& running_means,
const tensor& running_invstds const tensor& running_variances
); );
void batch_normalize ( void batch_normalize (
...@@ -129,7 +129,7 @@ namespace dlib ...@@ -129,7 +129,7 @@ namespace dlib
resizable_tensor& invstds, resizable_tensor& invstds,
const double averaging_factor, const double averaging_factor,
resizable_tensor& running_means, resizable_tensor& running_means,
resizable_tensor& running_invstds, resizable_tensor& running_variances,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
const tensor& beta const tensor& beta
...@@ -152,7 +152,7 @@ namespace dlib ...@@ -152,7 +152,7 @@ namespace dlib
const tensor& gamma, const tensor& gamma,
const tensor& beta, const tensor& beta,
const tensor& running_means, const tensor& running_means,
const tensor& running_invstds const tensor& running_variances
); );
void batch_normalize_conv ( void batch_normalize_conv (
...@@ -161,7 +161,7 @@ namespace dlib ...@@ -161,7 +161,7 @@ namespace dlib
resizable_tensor& invstds, resizable_tensor& invstds,
const double averaging_factor, const double averaging_factor,
resizable_tensor& running_means, resizable_tensor& running_means,
resizable_tensor& running_invstds, resizable_tensor& running_variances,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
const tensor& beta const tensor& beta
......
...@@ -343,7 +343,7 @@ namespace dlib ...@@ -343,7 +343,7 @@ namespace dlib
const tensor& gamma, const tensor& gamma,
const tensor& beta, const tensor& beta,
const tensor& running_means, const tensor& running_means,
const tensor& running_invstds const tensor& running_variances
) )
{ {
DLIB_CASSERT( DLIB_CASSERT(
...@@ -353,7 +353,7 @@ namespace dlib ...@@ -353,7 +353,7 @@ namespace dlib
gamma.k() == src.k() && gamma.k() == src.k() &&
have_same_dimensions(gamma, beta) && have_same_dimensions(gamma, beta) &&
have_same_dimensions(gamma, running_means) && have_same_dimensions(gamma, running_means) &&
have_same_dimensions(gamma, running_invstds), have_same_dimensions(gamma, running_variances),
"\ngamma.num_samples(): " << gamma.num_samples() << "\ngamma.num_samples(): " << gamma.num_samples() <<
"\ngamma.k(): " << gamma.k() << "\ngamma.k(): " << gamma.k() <<
"\ngamma.nr(): " << gamma.nr() << "\ngamma.nr(): " << gamma.nr() <<
...@@ -366,10 +366,10 @@ namespace dlib ...@@ -366,10 +366,10 @@ namespace dlib
"\nrunning_means.k(): " << running_means.k() << "\nrunning_means.k(): " << running_means.k() <<
"\nrunning_means.nr(): " << running_means.nr() << "\nrunning_means.nr(): " << running_means.nr() <<
"\nrunning_means.nc(): " << running_means.nc() << "\nrunning_means.nc(): " << running_means.nc() <<
"\nrunning_invstds.num_samples(): " << running_invstds.num_samples() << "\nrunning_variances.num_samples(): " << running_variances.num_samples() <<
"\nrunning_invstds.k(): " << running_invstds.k() << "\nrunning_variances.k(): " << running_variances.k() <<
"\nrunning_invstds.nr(): " << running_invstds.nr() << "\nrunning_variances.nr(): " << running_variances.nr() <<
"\nrunning_invstds.nc(): " << running_invstds.nc() << "\nrunning_variances.nc(): " << running_variances.nc() <<
"\nsrc.k(): " << src.k() << "\nsrc.k(): " << src.k() <<
"\nsrc.nr(): " << src.nr() << "\nsrc.nr(): " << src.nr() <<
"\nsrc.nc(): " << src.nc() "\nsrc.nc(): " << src.nc()
...@@ -392,7 +392,7 @@ namespace dlib ...@@ -392,7 +392,7 @@ namespace dlib
gamma.device(), gamma.device(),
beta.device(), beta.device(),
running_means.device(), running_means.device(),
running_invstds.device(), running_variances.device(),
dlib::tt::BATCH_NORM_EPS)); dlib::tt::BATCH_NORM_EPS));
} }
...@@ -402,7 +402,7 @@ namespace dlib ...@@ -402,7 +402,7 @@ namespace dlib
resizable_tensor& invstds, resizable_tensor& invstds,
const double averaging_factor, const double averaging_factor,
resizable_tensor& running_means, resizable_tensor& running_means,
resizable_tensor& running_invstds, resizable_tensor& running_variances,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
const tensor& beta const tensor& beta
...@@ -410,7 +410,7 @@ namespace dlib ...@@ -410,7 +410,7 @@ namespace dlib
{ {
DLIB_CASSERT(0 <= averaging_factor && averaging_factor <= 1, "averaging_factor: " << averaging_factor); DLIB_CASSERT(0 <= averaging_factor && averaging_factor <= 1, "averaging_factor: " << averaging_factor);
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_means,means),""); DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_means,means),"");
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_invstds,invstds),""); DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_variances,invstds),"");
DLIB_CASSERT( DLIB_CASSERT(
src.num_samples() > 1 && src.num_samples() > 1 &&
gamma.num_samples() == 1 && gamma.num_samples() == 1 &&
...@@ -438,7 +438,7 @@ namespace dlib ...@@ -438,7 +438,7 @@ namespace dlib
means.set_size(1, src.k(), src.nr(), src.nc()); means.set_size(1, src.k(), src.nr(), src.nc());
invstds.copy_size(means); invstds.copy_size(means);
running_means.copy_size(means); running_means.copy_size(means);
running_invstds.copy_size(means); running_variances.copy_size(means);
CHECK_CUDNN(cudnnBatchNormalizationForwardTraining( CHECK_CUDNN(cudnnBatchNormalizationForwardTraining(
context(), context(),
...@@ -454,7 +454,7 @@ namespace dlib ...@@ -454,7 +454,7 @@ namespace dlib
beta.device(), beta.device(),
averaging_factor, averaging_factor,
running_means.device(), running_means.device(),
running_invstds.device(), running_variances.device(),
dlib::tt::BATCH_NORM_EPS, dlib::tt::BATCH_NORM_EPS,
means.device(), means.device(),
invstds.device())); invstds.device()));
...@@ -516,7 +516,7 @@ namespace dlib ...@@ -516,7 +516,7 @@ namespace dlib
const tensor& gamma, const tensor& gamma,
const tensor& beta, const tensor& beta,
const tensor& running_means, const tensor& running_means,
const tensor& running_invstds const tensor& running_variances
) )
{ {
DLIB_CASSERT( DLIB_CASSERT(
...@@ -526,7 +526,7 @@ namespace dlib ...@@ -526,7 +526,7 @@ namespace dlib
gamma.k() == src.k() && gamma.k() == src.k() &&
have_same_dimensions(gamma, beta) && have_same_dimensions(gamma, beta) &&
have_same_dimensions(gamma, running_means) && have_same_dimensions(gamma, running_means) &&
have_same_dimensions(gamma, running_invstds), have_same_dimensions(gamma, running_variances),
"\ngamma.num_samples(): " << gamma.num_samples() << "\ngamma.num_samples(): " << gamma.num_samples() <<
"\ngamma.k(): " << gamma.k() << "\ngamma.k(): " << gamma.k() <<
"\ngamma.nr(): " << gamma.nr() << "\ngamma.nr(): " << gamma.nr() <<
...@@ -539,10 +539,10 @@ namespace dlib ...@@ -539,10 +539,10 @@ namespace dlib
"\nrunning_means.k(): " << running_means.k() << "\nrunning_means.k(): " << running_means.k() <<
"\nrunning_means.nr(): " << running_means.nr() << "\nrunning_means.nr(): " << running_means.nr() <<
"\nrunning_means.nc(): " << running_means.nc() << "\nrunning_means.nc(): " << running_means.nc() <<
"\nrunning_invstds.num_samples(): " << running_invstds.num_samples() << "\nrunning_variances.num_samples(): " << running_variances.num_samples() <<
"\nrunning_invstds.k(): " << running_invstds.k() << "\nrunning_variances.k(): " << running_variances.k() <<
"\nrunning_invstds.nr(): " << running_invstds.nr() << "\nrunning_variances.nr(): " << running_variances.nr() <<
"\nrunning_invstds.nc(): " << running_invstds.nc() << "\nrunning_variances.nc(): " << running_variances.nc() <<
"\nsrc.k(): " << src.k() << "\nsrc.k(): " << src.k() <<
"\nsrc.nr(): " << src.nr() << "\nsrc.nr(): " << src.nr() <<
"\nsrc.nc(): " << src.nc() "\nsrc.nc(): " << src.nc()
...@@ -565,7 +565,7 @@ namespace dlib ...@@ -565,7 +565,7 @@ namespace dlib
gamma.device(), gamma.device(),
beta.device(), beta.device(),
running_means.device(), running_means.device(),
running_invstds.device(), running_variances.device(),
dlib::tt::BATCH_NORM_EPS)); dlib::tt::BATCH_NORM_EPS));
} }
...@@ -575,7 +575,7 @@ namespace dlib ...@@ -575,7 +575,7 @@ namespace dlib
resizable_tensor& invstds, resizable_tensor& invstds,
const double averaging_factor, const double averaging_factor,
resizable_tensor& running_means, resizable_tensor& running_means,
resizable_tensor& running_invstds, resizable_tensor& running_variances,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
const tensor& beta const tensor& beta
...@@ -583,7 +583,7 @@ namespace dlib ...@@ -583,7 +583,7 @@ namespace dlib
{ {
DLIB_CASSERT(0 <= averaging_factor && averaging_factor <= 1, "averaging_factor: " << averaging_factor); DLIB_CASSERT(0 <= averaging_factor && averaging_factor <= 1, "averaging_factor: " << averaging_factor);
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_means,means),""); DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_means,means),"");
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_invstds,invstds),""); DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_variances,invstds),"");
DLIB_CASSERT( DLIB_CASSERT(
src.num_samples() > 1 && src.num_samples() > 1 &&
gamma.num_samples() == 1 && gamma.num_samples() == 1 &&
...@@ -612,7 +612,7 @@ namespace dlib ...@@ -612,7 +612,7 @@ namespace dlib
means.set_size(1, src.k()); means.set_size(1, src.k());
invstds.copy_size(means); invstds.copy_size(means);
running_means.copy_size(means); running_means.copy_size(means);
running_invstds.copy_size(means); running_variances.copy_size(means);
CHECK_CUDNN(cudnnBatchNormalizationForwardTraining( CHECK_CUDNN(cudnnBatchNormalizationForwardTraining(
context(), context(),
...@@ -628,7 +628,7 @@ namespace dlib ...@@ -628,7 +628,7 @@ namespace dlib
beta.device(), beta.device(),
averaging_factor, averaging_factor,
running_means.device(), running_means.device(),
running_invstds.device(), running_variances.device(),
dlib::tt::BATCH_NORM_EPS, dlib::tt::BATCH_NORM_EPS,
means.device(), means.device(),
invstds.device())); invstds.device()));
......
...@@ -140,7 +140,7 @@ namespace dlib ...@@ -140,7 +140,7 @@ namespace dlib
const tensor& gamma, const tensor& gamma,
const tensor& beta, const tensor& beta,
const tensor& running_means, const tensor& running_means,
const tensor& running_invstds const tensor& running_variances
); );
void batch_normalize ( void batch_normalize (
...@@ -149,7 +149,7 @@ namespace dlib ...@@ -149,7 +149,7 @@ namespace dlib
resizable_tensor& invstds, resizable_tensor& invstds,
const double averaging_factor, const double averaging_factor,
resizable_tensor& running_means, resizable_tensor& running_means,
resizable_tensor& running_invstds, resizable_tensor& running_variances,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
const tensor& beta const tensor& beta
...@@ -174,7 +174,7 @@ namespace dlib ...@@ -174,7 +174,7 @@ namespace dlib
const tensor& gamma, const tensor& gamma,
const tensor& beta, const tensor& beta,
const tensor& running_means, const tensor& running_means,
const tensor& running_invstds const tensor& running_variances
); );
void batch_normalize_conv ( void batch_normalize_conv (
...@@ -183,7 +183,7 @@ namespace dlib ...@@ -183,7 +183,7 @@ namespace dlib
resizable_tensor& invstds, resizable_tensor& invstds,
const double averaging_factor, const double averaging_factor,
resizable_tensor& running_means, resizable_tensor& running_means,
resizable_tensor& running_invstds, resizable_tensor& running_variances,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
const tensor& beta const tensor& beta
......
...@@ -453,9 +453,9 @@ namespace dlib ...@@ -453,9 +453,9 @@ namespace dlib
beta(params,gamma.size()) = 0; beta(params,gamma.size()) = 0;
running_means.copy_size(gamma(params,0)); running_means.copy_size(gamma(params,0));
running_invstds.copy_size(gamma(params,0)); running_variances.copy_size(gamma(params,0));
running_means = 0; running_means = 0;
running_invstds = 1; running_variances = 1;
num_updates = 0; num_updates = 0;
} }
...@@ -470,16 +470,16 @@ namespace dlib ...@@ -470,16 +470,16 @@ namespace dlib
if (num_updates <running_stats_window_size) if (num_updates <running_stats_window_size)
++num_updates; ++num_updates;
if (mode == FC_MODE) if (mode == FC_MODE)
tt::batch_normalize(output, means, invstds, decay, running_means, running_invstds, sub.get_output(), g, b); tt::batch_normalize(output, means, invstds, decay, running_means, running_variances, sub.get_output(), g, b);
else else
tt::batch_normalize_conv(output, means, invstds, decay, running_means, running_invstds, sub.get_output(), g, b); tt::batch_normalize_conv(output, means, invstds, decay, running_means, running_variances, sub.get_output(), g, b);
} }
else // we are running in testing mode so we just linearly scale the input tensor. else // we are running in testing mode so we just linearly scale the input tensor.
{ {
if (mode == FC_MODE) if (mode == FC_MODE)
tt::batch_normalize_inference(output, sub.get_output(), g, b, running_means, running_invstds); tt::batch_normalize_inference(output, sub.get_output(), g, b, running_means, running_variances);
else else
tt::batch_normalize_conv_inference(output, sub.get_output(), g, b, running_means, running_invstds); tt::batch_normalize_conv_inference(output, sub.get_output(), g, b, running_means, running_variances);
} }
} }
...@@ -510,7 +510,7 @@ namespace dlib ...@@ -510,7 +510,7 @@ namespace dlib
serialize(item.means, out); serialize(item.means, out);
serialize(item.invstds, out); serialize(item.invstds, out);
serialize(item.running_means, out); serialize(item.running_means, out);
serialize(item.running_invstds, out); serialize(item.running_variances, out);
serialize(item.num_updates, out); serialize(item.num_updates, out);
serialize(item.running_stats_window_size, out); serialize(item.running_stats_window_size, out);
} }
...@@ -539,7 +539,7 @@ namespace dlib ...@@ -539,7 +539,7 @@ namespace dlib
deserialize(item.means, in); deserialize(item.means, in);
deserialize(item.invstds, in); deserialize(item.invstds, in);
deserialize(item.running_means, in); deserialize(item.running_means, in);
deserialize(item.running_invstds, in); deserialize(item.running_variances, in);
deserialize(item.num_updates, in); deserialize(item.num_updates, in);
deserialize(item.running_stats_window_size, in); deserialize(item.running_stats_window_size, in);
...@@ -551,9 +551,9 @@ namespace dlib ...@@ -551,9 +551,9 @@ namespace dlib
deserialize(_mode, in); deserialize(_mode, in);
if (mode != (layer_mode)_mode) throw serialization_error("Wrong mode found while deserializing dlib::bn_"); if (mode != (layer_mode)_mode) throw serialization_error("Wrong mode found while deserializing dlib::bn_");
// We also need to flip the running_invstds around since the previous // We also need to flip the running_variances around since the previous
// format saved the inverse standard deviations instead of variances. // format saved the inverse standard deviations instead of variances.
item.running_invstds = 1.0f/squared(mat(item.running_invstds)) - tt::BATCH_NORM_EPS; item.running_variances = 1.0f/squared(mat(item.running_variances)) - tt::BATCH_NORM_EPS;
} }
} }
...@@ -564,7 +564,7 @@ namespace dlib ...@@ -564,7 +564,7 @@ namespace dlib
resizable_tensor params; resizable_tensor params;
alias_tensor gamma, beta; alias_tensor gamma, beta;
resizable_tensor means, running_means; resizable_tensor means, running_means;
resizable_tensor invstds, running_invstds; resizable_tensor invstds, running_variances;
unsigned long num_updates; unsigned long num_updates;
unsigned long running_stats_window_size; unsigned long running_stats_window_size;
}; };
...@@ -911,7 +911,7 @@ namespace dlib ...@@ -911,7 +911,7 @@ namespace dlib
auto sg = gamma(temp,0); auto sg = gamma(temp,0);
auto sb = beta(temp,gamma.size()); auto sb = beta(temp,gamma.size());
g = pointwise_multiply(mat(sg), 1.0f/sqrt(mat(item.running_invstds)+tt::BATCH_NORM_EPS)); g = pointwise_multiply(mat(sg), 1.0f/sqrt(mat(item.running_variances)+tt::BATCH_NORM_EPS));
b = mat(sb) - pointwise_multiply(mat(g), mat(item.running_means)); b = mat(sb) - pointwise_multiply(mat(g), mat(item.running_means));
} }
......
...@@ -274,13 +274,13 @@ namespace dlib { namespace tt ...@@ -274,13 +274,13 @@ namespace dlib { namespace tt
const tensor& gamma, const tensor& gamma,
const tensor& beta, const tensor& beta,
const tensor& running_means, const tensor& running_means,
const tensor& running_invstds const tensor& running_variances
) )
{ {
#ifdef DLIB_USE_CUDA #ifdef DLIB_USE_CUDA
cuda::batch_normalize_inference(dest,src,gamma,beta,running_means,running_invstds); cuda::batch_normalize_inference(dest,src,gamma,beta,running_means,running_variances);
#else #else
cpu::batch_normalize_inference(dest,src,gamma,beta,running_means,running_invstds); cpu::batch_normalize_inference(dest,src,gamma,beta,running_means,running_variances);
#endif #endif
} }
...@@ -290,16 +290,16 @@ namespace dlib { namespace tt ...@@ -290,16 +290,16 @@ namespace dlib { namespace tt
resizable_tensor& vars, resizable_tensor& vars,
const double averaging_factor, const double averaging_factor,
resizable_tensor& running_means, resizable_tensor& running_means,
resizable_tensor& running_invstds, resizable_tensor& running_variances,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
const tensor& beta const tensor& beta
) )
{ {
#ifdef DLIB_USE_CUDA #ifdef DLIB_USE_CUDA
cuda::batch_normalize(dest,means,vars,averaging_factor,running_means,running_invstds,src,gamma,beta); cuda::batch_normalize(dest,means,vars,averaging_factor,running_means,running_variances,src,gamma,beta);
#else #else
cpu::batch_normalize(dest,means,vars,averaging_factor,running_means,running_invstds,src,gamma,beta); cpu::batch_normalize(dest,means,vars,averaging_factor,running_means,running_variances,src,gamma,beta);
#endif #endif
} }
...@@ -330,13 +330,13 @@ namespace dlib { namespace tt ...@@ -330,13 +330,13 @@ namespace dlib { namespace tt
const tensor& gamma, const tensor& gamma,
const tensor& beta, const tensor& beta,
const tensor& running_means, const tensor& running_means,
const tensor& running_invstds const tensor& running_variances
) )
{ {
#ifdef DLIB_USE_CUDA #ifdef DLIB_USE_CUDA
cuda::batch_normalize_conv_inference(dest,src,gamma,beta,running_means,running_invstds); cuda::batch_normalize_conv_inference(dest,src,gamma,beta,running_means,running_variances);
#else #else
cpu::batch_normalize_conv_inference(dest,src,gamma,beta,running_means,running_invstds); cpu::batch_normalize_conv_inference(dest,src,gamma,beta,running_means,running_variances);
#endif #endif
} }
...@@ -346,16 +346,16 @@ namespace dlib { namespace tt ...@@ -346,16 +346,16 @@ namespace dlib { namespace tt
resizable_tensor& vars, resizable_tensor& vars,
const double averaging_factor, const double averaging_factor,
resizable_tensor& running_means, resizable_tensor& running_means,
resizable_tensor& running_invstds, resizable_tensor& running_variances,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
const tensor& beta const tensor& beta
) )
{ {
#ifdef DLIB_USE_CUDA #ifdef DLIB_USE_CUDA
cuda::batch_normalize_conv(dest,means,vars,averaging_factor,running_means,running_invstds,src,gamma,beta); cuda::batch_normalize_conv(dest,means,vars,averaging_factor,running_means,running_variances,src,gamma,beta);
#else #else
cpu::batch_normalize_conv(dest,means,vars,averaging_factor,running_means,running_invstds,src,gamma,beta); cpu::batch_normalize_conv(dest,means,vars,averaging_factor,running_means,running_variances,src,gamma,beta);
#endif #endif
} }
......
...@@ -294,7 +294,7 @@ namespace dlib { namespace tt ...@@ -294,7 +294,7 @@ namespace dlib { namespace tt
const tensor& gamma, const tensor& gamma,
const tensor& beta, const tensor& beta,
const tensor& running_means, const tensor& running_means,
const tensor& running_invstds const tensor& running_variances
); );
/*! /*!
requires requires
...@@ -304,12 +304,12 @@ namespace dlib { namespace tt ...@@ -304,12 +304,12 @@ namespace dlib { namespace tt
- gamma.k() == src.k() - gamma.k() == src.k()
- have_same_dimensions(gamma, beta) - have_same_dimensions(gamma, beta)
- have_same_dimensions(gamma, running_means) - have_same_dimensions(gamma, running_means)
- have_same_dimensions(gamma, running_invstds) - have_same_dimensions(gamma, running_variances)
ensures ensures
- Just linearly transforms src as a call to batch_normalize() would if the resulting - Linearly transforms src as a call to batch_normalize() would if src had means
means and invstds were running_means and running_invstds. That is, this function and variances as given by running_means and running_variances. That is, this
performs: function performs:
dest = gamma*(src-running_means)*running_invstds + beta dest = gamma*(src-running_means)/sqrt(running_variances+BATCH_NORM_EPS) + beta
Note that it does it in a pointwise fashion over the samples in src. Note that it does it in a pointwise fashion over the samples in src.
!*/ !*/
...@@ -319,7 +319,7 @@ namespace dlib { namespace tt ...@@ -319,7 +319,7 @@ namespace dlib { namespace tt
resizable_tensor& invstds, resizable_tensor& invstds,
const double averaging_factor, const double averaging_factor,
resizable_tensor& running_means, resizable_tensor& running_means,
resizable_tensor& running_invstds, resizable_tensor& running_variances,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
const tensor& beta const tensor& beta
...@@ -335,7 +335,7 @@ namespace dlib { namespace tt ...@@ -335,7 +335,7 @@ namespace dlib { namespace tt
- 0 <= averaging_factor <= 1 - 0 <= averaging_factor <= 1
- if (averaging_factor != 1) - if (averaging_factor != 1)
- have_same_dimensions(running_means, means) == true - have_same_dimensions(running_means, means) == true
- have_same_dimensions(running_invstds, invstds) == true - have_same_dimensions(running_variances, invstds) == true
ensures ensures
- have_same_dimensions(#dest, src) == true - have_same_dimensions(#dest, src) == true
- #means.num_samples() == 1 - #means.num_samples() == 1
...@@ -347,7 +347,7 @@ namespace dlib { namespace tt ...@@ -347,7 +347,7 @@ namespace dlib { namespace tt
- #means == the mean values of the contents of src. - #means == the mean values of the contents of src.
- #invstds == 1/(the standard deviation values of the contents of src). - #invstds == 1/(the standard deviation values of the contents of src).
- #running_means = (1-averaging_factor)*mat(#running_means) + averaging_factor*mat(#means); - #running_means = (1-averaging_factor)*mat(#running_means) + averaging_factor*mat(#means);
- #running_invstds = (1-averaging_factor)*mat(#running_invstds) + averaging_factor*mat(#invstds); - #running_variances = (1-averaging_factor)*mat(#running_variances) + averaging_factor*(variance of contents of src);
!*/ !*/
void batch_normalize_gradient ( void batch_normalize_gradient (
...@@ -391,7 +391,7 @@ namespace dlib { namespace tt ...@@ -391,7 +391,7 @@ namespace dlib { namespace tt
const tensor& gamma, const tensor& gamma,
const tensor& beta, const tensor& beta,
const tensor& running_means, const tensor& running_means,
const tensor& running_invstds const tensor& running_variances
); );
/*! /*!
requires requires
...@@ -401,13 +401,13 @@ namespace dlib { namespace tt ...@@ -401,13 +401,13 @@ namespace dlib { namespace tt
- gamma.k() == src.k() - gamma.k() == src.k()
- have_same_dimensions(gamma, beta) - have_same_dimensions(gamma, beta)
- have_same_dimensions(gamma, running_means) - have_same_dimensions(gamma, running_means)
- have_same_dimensions(gamma, running_invstds) - have_same_dimensions(gamma, running_variances)
ensures ensures
- Just linearly transforms src as a call to batch_normalize_conv() would if the resulting - Linearly transforms src as a call to batch_normalize_conv() would if src had
means and invstds were running_means and running_invstds. That is, this function means and variances as given by running_means and running_variances. That
performs: is, this function performs:
dest = gamma*(src-running_means)*running_invstds + beta dest = gamma*(src-running_means)/sqrt(running_variances+BATCH_NORM_EPS) + beta
Note that it does it in a pointwise fashion over the samples, rows, and Note that it does this in a pointwise fashion over the samples, rows, and
columns in src. columns in src.
!*/ !*/
...@@ -417,7 +417,7 @@ namespace dlib { namespace tt ...@@ -417,7 +417,7 @@ namespace dlib { namespace tt
resizable_tensor& invstds, resizable_tensor& invstds,
const double averaging_factor, const double averaging_factor,
resizable_tensor& running_means, resizable_tensor& running_means,
resizable_tensor& running_invstds, resizable_tensor& running_variances,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
const tensor& beta const tensor& beta
...@@ -431,7 +431,7 @@ namespace dlib { namespace tt ...@@ -431,7 +431,7 @@ namespace dlib { namespace tt
- 0 <= averaging_factor <= 1 - 0 <= averaging_factor <= 1
- if (averaging_factor != 1) - if (averaging_factor != 1)
- have_same_dimensions(running_means, means) == true - have_same_dimensions(running_means, means) == true
- have_same_dimensions(running_invstds, invstds) == true - have_same_dimensions(running_variances, invstds) == true
ensures ensures
- have_same_dimensions(#dest, src) == true - have_same_dimensions(#dest, src) == true
- #means.num_samples()==means.nr()==means.nc() == 1 - #means.num_samples()==means.nr()==means.nc() == 1
...@@ -441,7 +441,7 @@ namespace dlib { namespace tt ...@@ -441,7 +441,7 @@ namespace dlib { namespace tt
- #means == the mean values of the contents of src. - #means == the mean values of the contents of src.
- #invstds == 1/(the standard deviation values of the contents of src). - #invstds == 1/(the standard deviation values of the contents of src).
- #running_means = (1-averaging_factor)*mat(#running_means) + averaging_factor*mat(#means); - #running_means = (1-averaging_factor)*mat(#running_means) + averaging_factor*mat(#means);
- #running_invstds = (1-averaging_factor)*mat(#running_invstds) + averaging_factor*mat(#invstds); - #running_variances = (1-averaging_factor)*mat(#running_variances) + averaging_factor*(variance of contents of src);
!*/ !*/
void batch_normalize_conv_gradient ( void batch_normalize_conv_gradient (
......
...@@ -164,12 +164,12 @@ namespace ...@@ -164,12 +164,12 @@ namespace
beta = 0; beta = 0;
resizable_tensor running_means; resizable_tensor running_means;
resizable_tensor running_invstds; resizable_tensor running_variances;
batch_normalize(dest, means, vars, 1, running_means, running_invstds, src, gamma, beta); batch_normalize(dest, means, vars, 1, running_means, running_variances, src, gamma, beta);
const double scale = (src.num_samples())/(src.num_samples()-1.0); const double scale = (src.num_samples())/(src.num_samples()-1.0);
// Turn back into biased variance estimate because that's how batch_normalize() works, so if we want to match it this is necessary. // Turn back into biased variance estimate because that's how batch_normalize() works, so if we want to match it this is necessary.
running_invstds = mat(running_invstds)/scale; running_variances = mat(running_variances)/scale;
batch_normalize_inference(dest2, src, gamma, beta, running_means, running_invstds); batch_normalize_inference(dest2, src, gamma, beta, running_means, running_variances);
DLIB_TEST_MSG(max(abs(mat(dest2)-mat(dest))) < 1e-5, max(abs(mat(dest2)-mat(dest)))); DLIB_TEST_MSG(max(abs(mat(dest2)-mat(dest))) < 1e-5, max(abs(mat(dest2)-mat(dest))));
...@@ -177,7 +177,7 @@ namespace ...@@ -177,7 +177,7 @@ namespace
auto f = [&](float eps) { auto f = [&](float eps) {
const float old = src.host()[idx]; const float old = src.host()[idx];
src.host()[idx] += eps; src.host()[idx] += eps;
batch_normalize(dest, means, vars, 1, running_means, running_invstds, src, gamma, beta); batch_normalize(dest, means, vars, 1, running_means, running_variances, src, gamma, beta);
float result = dot(gradient_input, dest); float result = dot(gradient_input, dest);
src.host()[idx] = old; src.host()[idx] = old;
return result; return result;
...@@ -189,7 +189,7 @@ namespace ...@@ -189,7 +189,7 @@ namespace
auto f = [&](float eps) { auto f = [&](float eps) {
const float old = gamma.host()[idx]; const float old = gamma.host()[idx];
gamma.host()[idx] += eps; gamma.host()[idx] += eps;
batch_normalize(dest, means, vars, 1, running_means, running_invstds, src, gamma, beta); batch_normalize(dest, means, vars, 1, running_means, running_variances, src, gamma, beta);
float result = dot(gradient_input, dest); float result = dot(gradient_input, dest);
gamma.host()[idx] = old; gamma.host()[idx] = old;
return result; return result;
...@@ -201,7 +201,7 @@ namespace ...@@ -201,7 +201,7 @@ namespace
auto f = [&](float eps) { auto f = [&](float eps) {
const float old = beta.host()[idx]; const float old = beta.host()[idx];
beta.host()[idx] += eps; beta.host()[idx] += eps;
batch_normalize(dest, means, vars, 1, running_means, running_invstds, src, gamma, beta); batch_normalize(dest, means, vars, 1, running_means, running_variances, src, gamma, beta);
float result = dot(gradient_input, dest); float result = dot(gradient_input, dest);
beta.host()[idx] = old; beta.host()[idx] = old;
return result; return result;
...@@ -247,13 +247,13 @@ namespace ...@@ -247,13 +247,13 @@ namespace
beta = 0; beta = 0;
resizable_tensor running_means; resizable_tensor running_means;
resizable_tensor running_invstds; resizable_tensor running_variances;
batch_normalize_conv(dest, means, vars, 1, running_means, running_invstds, src, gamma, beta); batch_normalize_conv(dest, means, vars, 1, running_means, running_variances, src, gamma, beta);
const double scale = (src.num_samples()*src.nr()*src.nc())/(src.num_samples()*src.nr()*src.nc()-1.0); const double scale = (src.num_samples()*src.nr()*src.nc())/(src.num_samples()*src.nr()*src.nc()-1.0);
// Turn back into biased variance estimate because that's how // Turn back into biased variance estimate because that's how
// batch_normalize_conv() works, so if we want to match it this is necessary. // batch_normalize_conv() works, so if we want to match it this is necessary.
running_invstds = mat(running_invstds)/scale; running_variances = mat(running_variances)/scale;
batch_normalize_conv_inference(dest2, src, gamma, beta, running_means, running_invstds); batch_normalize_conv_inference(dest2, src, gamma, beta, running_means, running_variances);
DLIB_TEST(max(abs(mat(dest2)-mat(dest))) < 1e-5); DLIB_TEST(max(abs(mat(dest2)-mat(dest))) < 1e-5);
...@@ -261,7 +261,7 @@ namespace ...@@ -261,7 +261,7 @@ namespace
auto f = [&](float eps) { auto f = [&](float eps) {
const float old = src.host()[idx]; const float old = src.host()[idx];
src.host()[idx] += eps; src.host()[idx] += eps;
batch_normalize_conv(dest, means, vars, 1, running_means, running_invstds, src, gamma, beta); batch_normalize_conv(dest, means, vars, 1, running_means, running_variances, src, gamma, beta);
float result = dot(gradient_input, dest); float result = dot(gradient_input, dest);
src.host()[idx] = old; src.host()[idx] = old;
return result; return result;
...@@ -273,7 +273,7 @@ namespace ...@@ -273,7 +273,7 @@ namespace
auto f = [&](float eps) { auto f = [&](float eps) {
const float old = gamma.host()[idx]; const float old = gamma.host()[idx];
gamma.host()[idx] += eps; gamma.host()[idx] += eps;
batch_normalize_conv(dest, means, vars, 1, running_means, running_invstds, src, gamma, beta); batch_normalize_conv(dest, means, vars, 1, running_means, running_variances, src, gamma, beta);
float result = dot(gradient_input, dest); float result = dot(gradient_input, dest);
gamma.host()[idx] = old; gamma.host()[idx] = old;
return result; return result;
...@@ -285,7 +285,7 @@ namespace ...@@ -285,7 +285,7 @@ namespace
auto f = [&](float eps) { auto f = [&](float eps) {
const float old = beta.host()[idx]; const float old = beta.host()[idx];
beta.host()[idx] += eps; beta.host()[idx] += eps;
batch_normalize_conv(dest, means, vars, 1, running_means, running_invstds, src, gamma, beta); batch_normalize_conv(dest, means, vars, 1, running_means, running_variances, src, gamma, beta);
float result = dot(gradient_input, dest); float result = dot(gradient_input, dest);
beta.host()[idx] = old; beta.host()[idx] = old;
return result; return result;
...@@ -775,7 +775,7 @@ namespace ...@@ -775,7 +775,7 @@ namespace
resizable_tensor means, means2; resizable_tensor means, means2;
resizable_tensor invstds, invstds2; resizable_tensor invstds, invstds2;
resizable_tensor running_means, running_means2; resizable_tensor running_means, running_means2;
resizable_tensor running_invstds, running_invstds2; resizable_tensor running_variances, running_variances2;
resizable_tensor src(64,20,100,100); resizable_tensor src(64,20,100,100);
resizable_tensor gamma(1,20,100,100); resizable_tensor gamma(1,20,100,100);
resizable_tensor beta(1,20,100,100); resizable_tensor beta(1,20,100,100);
...@@ -785,20 +785,20 @@ namespace ...@@ -785,20 +785,20 @@ namespace
rnd.fill_uniform(src); rnd.fill_uniform(src);
cpu::batch_normalize(dest, means, invstds, 1, running_means, running_invstds, src, gamma, beta); cpu::batch_normalize(dest, means, invstds, 1, running_means, running_variances, src, gamma, beta);
cuda::batch_normalize(dest2,means2,invstds2, 1, running_means2, running_invstds2, src, gamma, beta); cuda::batch_normalize(dest2,means2,invstds2, 1, running_means2, running_variances2, src, gamma, beta);
dlog << LINFO << "dest error: "<< max(abs(mat(dest) -mat(dest2))); dlog << LINFO << "dest error: "<< max(abs(mat(dest) -mat(dest2)));
dlog << LINFO << "means error: "<< max(abs(mat(means) -mat(means2))); dlog << LINFO << "means error: "<< max(abs(mat(means) -mat(means2)));
dlog << LINFO << "invstds error: "<< max(abs(mat(invstds) -mat(invstds2))); dlog << LINFO << "invstds error: "<< max(abs(mat(invstds) -mat(invstds2)));
dlog << LINFO << "running_means error: "<< max(abs(mat(running_means) -mat(running_means2))); dlog << LINFO << "running_means error: "<< max(abs(mat(running_means) -mat(running_means2)));
dlog << LINFO << "running_invstds error: "<< max(abs(mat(running_invstds) -mat(running_invstds2))); dlog << LINFO << "running_variances error: "<< max(abs(mat(running_variances) -mat(running_variances2)));
DLIB_TEST(max(abs(mat(dest) -mat(dest2))) < 1e-4); DLIB_TEST(max(abs(mat(dest) -mat(dest2))) < 1e-4);
DLIB_TEST(max(abs(mat(means) -mat(means2))) < 1e-4); DLIB_TEST(max(abs(mat(means) -mat(means2))) < 1e-4);
DLIB_TEST(max(abs(mat(invstds) -mat(invstds2))) < 1e-4); DLIB_TEST(max(abs(mat(invstds) -mat(invstds2))) < 1e-4);
DLIB_TEST(max(abs(mat(running_means) -mat(running_means2))) < 1e-4); DLIB_TEST(max(abs(mat(running_means) -mat(running_means2))) < 1e-4);
DLIB_TEST(max(abs(mat(running_invstds) -mat(running_invstds2))) < 1e-4); DLIB_TEST(max(abs(mat(running_variances) -mat(running_variances2))) < 1e-4);
// now check that the gradients match as well // now check that the gradients match as well
...@@ -830,7 +830,7 @@ namespace ...@@ -830,7 +830,7 @@ namespace
resizable_tensor means, means2; resizable_tensor means, means2;
resizable_tensor invstds, invstds2; resizable_tensor invstds, invstds2;
resizable_tensor running_means, running_means2; resizable_tensor running_means, running_means2;
resizable_tensor running_invstds, running_invstds2; resizable_tensor running_variances, running_variances2;
resizable_tensor src(2,8,10,9); resizable_tensor src(2,8,10,9);
resizable_tensor gamma(1,8); resizable_tensor gamma(1,8);
resizable_tensor beta(1,8); resizable_tensor beta(1,8);
...@@ -839,20 +839,20 @@ namespace ...@@ -839,20 +839,20 @@ namespace
tt::tensor_rand rnd; tt::tensor_rand rnd;
rnd.fill_uniform(src); rnd.fill_uniform(src);
cpu::batch_normalize_conv(dest,means,invstds,1,running_means,running_invstds, src, gamma, beta); cpu::batch_normalize_conv(dest,means,invstds,1,running_means,running_variances, src, gamma, beta);
cuda::batch_normalize_conv(dest2,means2,invstds2,1,running_means2,running_invstds2, src, gamma, beta); cuda::batch_normalize_conv(dest2,means2,invstds2,1,running_means2,running_variances2, src, gamma, beta);
dlog << LINFO << "dest error: "<< max(abs(mat(dest) -mat(dest2))); dlog << LINFO << "dest error: "<< max(abs(mat(dest) -mat(dest2)));
dlog << LINFO << "means error: "<< max(abs(mat(means) -mat(means2))); dlog << LINFO << "means error: "<< max(abs(mat(means) -mat(means2)));
dlog << LINFO << "invstds error: "<< max(abs(mat(invstds) -mat(invstds2))); dlog << LINFO << "invstds error: "<< max(abs(mat(invstds) -mat(invstds2)));
dlog << LINFO << "running_means error: "<< max(abs(mat(running_means) -mat(running_means2))); dlog << LINFO << "running_means error: "<< max(abs(mat(running_means) -mat(running_means2)));
dlog << LINFO << "running_invstds error: "<< max(abs(mat(running_invstds) -mat(running_invstds2))); dlog << LINFO << "running_variances error: "<< max(abs(mat(running_variances) -mat(running_variances2)));
DLIB_TEST(max(abs(mat(dest) -mat(dest2))) < 1e-4); DLIB_TEST(max(abs(mat(dest) -mat(dest2))) < 1e-4);
DLIB_TEST(max(abs(mat(means) -mat(means2))) < 1e-4); DLIB_TEST(max(abs(mat(means) -mat(means2))) < 1e-4);
DLIB_TEST(max(abs(mat(invstds) -mat(invstds2))) < 1e-4); DLIB_TEST(max(abs(mat(invstds) -mat(invstds2))) < 1e-4);
DLIB_TEST(max(abs(mat(running_means) -mat(running_means2))) < 1e-4); DLIB_TEST(max(abs(mat(running_means) -mat(running_means2))) < 1e-4);
DLIB_TEST(max(abs(mat(running_invstds) -mat(running_invstds2))) < 1e-4); DLIB_TEST(max(abs(mat(running_variances) -mat(running_variances2))) < 1e-4);
resizable_tensor gradient_input; resizable_tensor gradient_input;
resizable_tensor src_grad, gamma_grad, beta_grad; resizable_tensor src_grad, gamma_grad, beta_grad;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment