Commit b818b553 authored by Davis King's avatar Davis King

Made the batch normalization code assign the parameter gradient outputs instead

of add to them so that it's consistent with how the layer interface expects
this to be done.
parent 2c1e67f1
...@@ -224,6 +224,9 @@ namespace dlib ...@@ -224,6 +224,9 @@ namespace dlib
DLIB_CASSERT(num == beta_grad.size(),""); DLIB_CASSERT(num == beta_grad.size(),"");
DLIB_CASSERT(have_same_dimensions(gradient_input, src),""); DLIB_CASSERT(have_same_dimensions(gradient_input, src),"");
DLIB_CASSERT(have_same_dimensions(gradient_input, src_grad),""); DLIB_CASSERT(have_same_dimensions(gradient_input, src_grad),"");
beta_grad = 0;
gamma_grad = 0;
auto p_grad = gradient_input.host(); auto p_grad = gradient_input.host();
auto p_src = src.host(); auto p_src = src.host();
const auto p_gamma = gamma.host(); const auto p_gamma = gamma.host();
......
...@@ -268,6 +268,8 @@ namespace dlib ...@@ -268,6 +268,8 @@ namespace dlib
{ {
dvars[i] = 0; dvars[i] = 0;
dmeans[i] = 0; dmeans[i] = 0;
gamma_grad[i] = 0;
beta_grad[i] = 0;
for (long n = 0; n < num_samples; ++n) for (long n = 0; n < num_samples; ++n)
{ {
...@@ -646,6 +648,8 @@ namespace dlib ...@@ -646,6 +648,8 @@ namespace dlib
dmeans.copy_size(means); dmeans.copy_size(means);
dvars = 0; dvars = 0;
dmeans = 0; dmeans = 0;
gamma_grad = 0;
beta_grad = 0;
_cuda_batch_normalize_conv_gradient1<<<512,512>>>( _cuda_batch_normalize_conv_gradient1<<<512,512>>>(
gradient_input.device(), gradient_input.device(),
......
...@@ -245,8 +245,8 @@ namespace dlib { namespace tt ...@@ -245,8 +245,8 @@ namespace dlib { namespace tt
- Let f(src,gamma,beta) == dot(gradient_input, dest output of - Let f(src,gamma,beta) == dot(gradient_input, dest output of
batch_normalize(dest,means,invstds,src,gamma,beta)) batch_normalize(dest,means,invstds,src,gamma,beta))
- Adds the gradient of f() with respect to src to #src_grad. - Adds the gradient of f() with respect to src to #src_grad.
- Adds the gradient of f() with respect to gamma to #gamma_grad. - Assigns the gradient of f() with respect to gamma to #gamma_grad.
- Adds the gradient of f() with respect to beta to #beta_grad. - Assigns the gradient of f() with respect to beta to #beta_grad.
!*/ !*/
private: private:
#ifdef DLIB_USE_CUDA #ifdef DLIB_USE_CUDA
...@@ -312,8 +312,8 @@ namespace dlib { namespace tt ...@@ -312,8 +312,8 @@ namespace dlib { namespace tt
- Let f(src,gamma,beta) == dot(gradient_input, dest output of - Let f(src,gamma,beta) == dot(gradient_input, dest output of
batch_normalize_conv(dest,means,invstds,src,gamma,beta)) batch_normalize_conv(dest,means,invstds,src,gamma,beta))
- Adds the gradient of f() with respect to src to #src_grad. - Adds the gradient of f() with respect to src to #src_grad.
- Adds the gradient of f() with respect to gamma to #gamma_grad. - Assigns the gradient of f() with respect to gamma to #gamma_grad.
- Adds the gradient of f() with respect to beta to #beta_grad. - Assigns the gradient of f() with respect to beta to #beta_grad.
!*/ !*/
private: private:
#ifdef DLIB_USE_CUDA #ifdef DLIB_USE_CUDA
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment