Commit ebf7a89a authored by Davis King's avatar Davis King

Fixed a bug in affine_. The layer was implemented as an inplace layer,

however, that doesn't really work if we need to compute the gradients with
respect to the parameters which the layer attempted to do.  So I made the
layer's parameters non-learnable so it can continue to be inplace since the
main usecase for this layer is to be a fast replacement for bn_ during testing.
parent bf7fdb63
...@@ -927,26 +927,20 @@ namespace dlib ...@@ -927,26 +927,20 @@ namespace dlib
{ {
auto g = gamma(params,0); auto g = gamma(params,0);
auto b = beta(params,gamma.size()); auto b = beta(params,gamma.size());
auto g_grad = gamma(params_grad,0);
auto b_grad = beta(params_grad,gamma.size());
// We are computing the gradient of dot(gradient_input, computed_output*g + b) // We are computing the gradient of dot(gradient_input, computed_output*g + b)
if (mode == FC_MODE) if (mode == FC_MODE)
{ {
tt::multiply(data_grad, gradient_input, g); tt::multiply(data_grad, gradient_input, g);
tt::multiply(g_grad, gradient_input, computed_output);
tt::assign_bias_gradient(b_grad, gradient_input);
} }
else else
{ {
tt::multiply_conv(data_grad, gradient_input, g); tt::multiply_conv(data_grad, gradient_input, g);
tt::multiply_conv(g_grad, gradient_input, computed_output);
tt::assign_conv_bias_gradient(b_grad, gradient_input);
} }
} }
const tensor& get_layer_params() const { return params; } const tensor& get_layer_params() const { return empty_params; }
tensor& get_layer_params() { return params; } tensor& get_layer_params() { return empty_params; }
friend void serialize(const affine_& item, std::ostream& out) friend void serialize(const affine_& item, std::ostream& out)
{ {
...@@ -983,7 +977,7 @@ namespace dlib ...@@ -983,7 +977,7 @@ namespace dlib
} }
private: private:
resizable_tensor params; resizable_tensor params, empty_params;
alias_tensor gamma, beta; alias_tensor gamma, beta;
layer_mode mode; layer_mode mode;
}; };
......
...@@ -751,6 +751,13 @@ namespace dlib ...@@ -751,6 +751,13 @@ namespace dlib
or or
OUTPUT(n,k,r,c) == A(1,k,1,1)*INPUT(n,k,r,c)+B(1,k,1,1) OUTPUT(n,k,r,c) == A(1,k,1,1)*INPUT(n,k,r,c)+B(1,k,1,1)
as appropriate. as appropriate.
Finally, note that the parameters of this layer are not learnable and
therefore not modified during network updates. Instead, the layer will
perform the identity transformation unless it is initialized with a bn_
layer, in which case it will perform whatever transformation the bn_ layer
has learned.
!*/ !*/
public: public:
...@@ -796,6 +803,8 @@ namespace dlib ...@@ -796,6 +803,8 @@ namespace dlib
tensor& get_layer_params(); tensor& get_layer_params();
/*! /*!
These functions are implemented as described in the EXAMPLE_LAYER_ interface. These functions are implemented as described in the EXAMPLE_LAYER_ interface.
Also note that get_layer_params() always returns an empty tensor since there
are no learnable parameters in this object.
!*/ !*/
}; };
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment