Fixed a bug in affine_. The layer was implemented as an inplace layer,

however, that doesn't really work if we need to compute the gradients with respect to the parameters which the layer attempted to do. So I made the layer's parameters non-learnable so it can continue to be inplace since the main usecase for this layer is to be a fast replacement for bn_ during testing.

Fixed a bug in affine_. The layer was implemented as an inplace layer,
however, that doesn't really work if we need to compute the gradients with respect to the parameters which the layer attempted to do. So I made the layer's parameters non-learnable so it can continue to be inplace since the main usecase for this layer is to be a fast replacement for bn_ during testing.
ebf7a89a · Davis King · bf7fdb63 · ebf7a89a · ebf7a89a
Commit ebf7a89a authored Apr 01, 2016 by Davis King
Show whitespace changes
Inline Side-by-side

Showing with 12 additions and 9 deletions

layers.h dlib/dnn/layers.h +3 -9

layers_abstract.h dlib/dnn/layers_abstract.h +9 -0

No files found.
--- a/dlib/dnn/layers.h
+++ b/dlib/dnn/layers.h
@@ -927,26 +927,20 @@ namespace dlib
        {
            auto g = gamma(params,0);
            auto b = beta(params,gamma.size());
-            auto g_grad = gamma(params_grad,0);
-            auto b_grad = beta(params_grad,gamma.size());

            // We are computing the gradient of dot(gradient_input, computed_output*g + b)
            if (mode == FC_MODE)
            {
                tt::multiply(data_grad, gradient_input, g);
-                tt::multiply(g_grad, gradient_input, computed_output);
-                tt::assign_bias_gradient(b_grad, gradient_input);
            }
            else
            {
                tt::multiply_conv(data_grad, gradient_input, g);
-                tt::multiply_conv(g_grad, gradient_input, computed_output);
-                tt::assign_conv_bias_gradient(b_grad, gradient_input);
            }
        }

-        const tensor& get_layer_params() const { return params; }
-        tensor& get_layer_params() { return params; }
+        const tensor& get_layer_params() const { return empty_params; }
+        tensor& get_layer_params() { return empty_params; }

        friend void serialize(const affine_& item, std::ostream& out)
        {
@@ -983,7 +977,7 @@ namespace dlib
        }

    private:
-        resizable_tensor params; 
+        resizable_tensor params, empty_params; 
        alias_tensor gamma, beta;
        layer_mode mode;
    };

--- a/dlib/dnn/layers_abstract.h
+++ b/dlib/dnn/layers_abstract.h
@@ -751,6 +751,13 @@ namespace dlib
                or
                    OUTPUT(n,k,r,c) == A(1,k,1,1)*INPUT(n,k,r,c)+B(1,k,1,1)
                as appropriate.
+
+
+                Finally, note that the parameters of this layer are not learnable and
+                therefore not modified during network updates.  Instead, the layer will
+                perform the identity transformation unless it is initialized with a bn_
+                layer, in which case it will perform whatever transformation the bn_ layer
+                has learned.
        !*/

    public:
@@ -796,6 +803,8 @@ namespace dlib
        tensor& get_layer_params(); 
        /*!
            These functions are implemented as described in the EXAMPLE_LAYER_ interface.
+            Also note that get_layer_params() always returns an empty tensor since there
+            are no learnable parameters in this object.
        !*/
    };