Made the batch normalization code assign the parameter gradient outputs instead

of add to them so that it's consistent with how the layer interface expects this to be done.

Made the batch normalization code assign the parameter gradient outputs instead
of add to them so that it's consistent with how the layer interface expects this to be done.
b818b553 · Davis King · 2c1e67f1 · b818b553 · b818b553 · b818b553
Commit b818b553 authored Dec 08, 2015 by Davis King
Hide whitespace changes
Inline Side-by-side

Showing with 11 additions and 4 deletions

cpu_dlib.cpp dlib/dnn/cpu_dlib.cpp +3 -0

cuda_dlib.cu dlib/dnn/cuda_dlib.cu +4 -0

tensor_tools.h dlib/dnn/tensor_tools.h +4 -4

No files found.
--- a/dlib/dnn/cpu_dlib.cpp
+++ b/dlib/dnn/cpu_dlib.cpp
@@ -224,6 +224,9 @@ namespace dlib
            DLIB_CASSERT(num == beta_grad.size(),"");
            DLIB_CASSERT(have_same_dimensions(gradient_input, src),"");
            DLIB_CASSERT(have_same_dimensions(gradient_input, src_grad),"");
+            beta_grad = 0;
+            gamma_grad = 0;
            auto p_grad = gradient_input.host();
            auto p_src = src.host();
            const auto p_gamma = gamma.host();   

--- a/dlib/dnn/cuda_dlib.cu
+++ b/dlib/dnn/cuda_dlib.cu
@@ -268,6 +268,8 @@ namespace dlib
            {
                dvars[i] = 0;
                dmeans[i] = 0;
+                gamma_grad[i] = 0;
+                beta_grad[i] = 0;
                for (long n = 0; n < num_samples; ++n)
                {
@@ -646,6 +648,8 @@ namespace dlib
            dmeans.copy_size(means);
            dvars = 0;
            dmeans = 0;
+            gamma_grad = 0;
+            beta_grad = 0;
            _cuda_batch_normalize_conv_gradient1<<<512,512>>>(
                gradient_input.device(),

--- a/dlib/dnn/tensor_tools.h
+++ b/dlib/dnn/tensor_tools.h
@@ -245,8 +245,8 @@ namespace dlib { namespace tt
                - Let f(src,gamma,beta) == dot(gradient_input, dest output of
                  batch_normalize(dest,means,invstds,src,gamma,beta))
                - Adds the gradient of f() with respect to src to #src_grad.
-                - Adds the gradient of f() with respect to gamma to #gamma_grad.
+                - Assigns the gradient of f() with respect to gamma to #gamma_grad.
-                - Adds the gradient of f() with respect to beta to #beta_grad.
+                - Assigns the gradient of f() with respect to beta to #beta_grad.
        !*/
    private:
 #ifdef DLIB_USE_CUDA
@@ -312,8 +312,8 @@ namespace dlib { namespace tt
                - Let f(src,gamma,beta) == dot(gradient_input, dest output of
                  batch_normalize_conv(dest,means,invstds,src,gamma,beta))
                - Adds the gradient of f() with respect to src to #src_grad.
-                - Adds the gradient of f() with respect to gamma to #gamma_grad.
+                - Assigns the gradient of f() with respect to gamma to #gamma_grad.
-                - Adds the gradient of f() with respect to beta to #beta_grad.
+                - Assigns the gradient of f() with respect to beta to #beta_grad.
        !*/
    private:
 #ifdef DLIB_USE_CUDA