Commit 1f5aa6c1 authored by Davis King's avatar Davis King

Added an option to fc_ to enable or disable a bias term.

parent 88376980
...@@ -553,30 +553,51 @@ namespace dlib ...@@ -553,30 +553,51 @@ namespace dlib
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
enum fc_bias_mode{
FC_HAS_BIAS = 0,
FC_NO_BIAS = 1
};
class fc_ class fc_
{ {
public: public:
fc_() : num_outputs(1), num_inputs(0) fc_() : num_outputs(1), num_inputs(0), bias_mode(FC_HAS_BIAS)
{ {
} }
explicit fc_( explicit fc_(
unsigned long num_outputs_ unsigned long num_outputs_,
) : num_outputs(num_outputs_), num_inputs(0) fc_bias_mode mode = FC_HAS_BIAS
) : num_outputs(num_outputs_), num_inputs(0), bias_mode(mode)
{ {
} }
unsigned long get_num_outputs ( unsigned long get_num_outputs (
) const { return num_outputs; } ) const { return num_outputs; }
fc_bias_mode get_bias_mode (
) const { return bias_mode; }
template <typename SUBNET> template <typename SUBNET>
void setup (const SUBNET& sub) void setup (const SUBNET& sub)
{ {
num_inputs = sub.get_output().nr()*sub.get_output().nc()*sub.get_output().k(); num_inputs = sub.get_output().nr()*sub.get_output().nc()*sub.get_output().k();
params.set_size(num_inputs, num_outputs); if (bias_mode == FC_HAS_BIAS)
params.set_size(num_inputs+1, num_outputs);
else
params.set_size(num_inputs, num_outputs);
dlib::rand rnd("fc_"+cast_to_string(num_outputs)); dlib::rand rnd("fc_"+cast_to_string(num_outputs));
randomize_parameters(params, num_inputs+num_outputs, rnd); randomize_parameters(params, num_inputs+num_outputs, rnd);
weights = alias_tensor(num_inputs, num_outputs);
if (bias_mode == FC_HAS_BIAS)
{
biases = alias_tensor(1,num_outputs);
// set the initial bias values to zero
biases(params,weights.size()) = 0;
}
} }
template <typename SUBNET> template <typename SUBNET>
...@@ -584,17 +605,32 @@ namespace dlib ...@@ -584,17 +605,32 @@ namespace dlib
{ {
output.set_size(sub.get_output().num_samples(), num_outputs); output.set_size(sub.get_output().num_samples(), num_outputs);
tt::gemm(0,output, 1,sub.get_output(),false, params,false); auto w = weights(params, 0);
tt::gemm(0,output, 1,sub.get_output(),false, w,false);
if (bias_mode == FC_HAS_BIAS)
{
auto b = biases(params, weights.size());
tt::add(1,output,1,b);
}
} }
template <typename SUBNET> template <typename SUBNET>
void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad) void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad)
{ {
// compute the gradient of the parameters. // compute the gradient of the weight parameters.
tt::gemm(0,params_grad, 1,sub.get_output(),true, gradient_input,false); auto pw = weights(params_grad, 0);
tt::gemm(0,pw, 1,sub.get_output(),true, gradient_input,false);
if (bias_mode == FC_HAS_BIAS)
{
// compute the gradient of the bias parameters.
auto pb = biases(params_grad, weights.size());
tt::add_bias_gradient(pb, gradient_input);
}
// compute the gradient for the data // compute the gradient for the data
tt::gemm(1,sub.get_gradient_input(), 1,gradient_input,false, params,true); auto w = weights(params, 0);
tt::gemm(1,sub.get_gradient_input(), 1,gradient_input,false, w,true);
} }
const tensor& get_layer_params() const { return params; } const tensor& get_layer_params() const { return params; }
...@@ -606,6 +642,9 @@ namespace dlib ...@@ -606,6 +642,9 @@ namespace dlib
serialize(item.num_outputs, out); serialize(item.num_outputs, out);
serialize(item.num_inputs, out); serialize(item.num_inputs, out);
serialize(item.params, out); serialize(item.params, out);
serialize(item.weights, out);
serialize(item.biases, out);
serialize((int)item.bias_mode, out);
} }
friend void deserialize(fc_& item, std::istream& in) friend void deserialize(fc_& item, std::istream& in)
...@@ -617,6 +656,11 @@ namespace dlib ...@@ -617,6 +656,11 @@ namespace dlib
deserialize(item.num_outputs, in); deserialize(item.num_outputs, in);
deserialize(item.num_inputs, in); deserialize(item.num_inputs, in);
deserialize(item.params, in); deserialize(item.params, in);
deserialize(item.weights, in);
deserialize(item.biases, in);
int bmode = 0;
deserialize(bmode, in);
item.bias_mode = (fc_bias_mode)bmode;
} }
private: private:
...@@ -624,9 +668,10 @@ namespace dlib ...@@ -624,9 +668,10 @@ namespace dlib
unsigned long num_outputs; unsigned long num_outputs;
unsigned long num_inputs; unsigned long num_inputs;
resizable_tensor params; resizable_tensor params;
alias_tensor weights, biases;
fc_bias_mode bias_mode;
}; };
template <typename SUBNET> template <typename SUBNET>
using fc = add_layer<fc_, SUBNET>; using fc = add_layer<fc_, SUBNET>;
......
...@@ -313,6 +313,11 @@ namespace dlib ...@@ -313,6 +313,11 @@ namespace dlib
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
enum fc_bias_mode{
FC_HAS_BIAS = 0,
FC_NO_BIAS = 1
};
class fc_ class fc_
{ {
/*! /*!
...@@ -328,16 +333,19 @@ namespace dlib ...@@ -328,16 +333,19 @@ namespace dlib
/*! /*!
ensures ensures
- #get_num_outputs() == 1 - #get_num_outputs() == 1
- #get_bias_mode() == FC_HAS_BIAS
!*/ !*/
explicit fc_( explicit fc_(
unsigned long num_outputs unsigned long num_outputs,
fc_bias_mode mode = FC_HAS_BIAS
); );
/*! /*!
requires requires
- num_outputs > 0 - num_outputs > 0
ensures ensures
- #get_num_outputs() == num_outputs - #get_num_outputs() == num_outputs
- #get_bias_mode() == mode
!*/ !*/
unsigned long get_num_outputs ( unsigned long get_num_outputs (
...@@ -351,6 +359,15 @@ namespace dlib ...@@ -351,6 +359,15 @@ namespace dlib
- The rest of the dimensions of T will be 1. - The rest of the dimensions of T will be 1.
!*/ !*/
fc_bias_mode get_bias_mode (
) const;
/*!
ensures
- returns the bias mode which determines if this layer includes bias terms.
That is, if the bias mode is FC_HAS_BIAS then a different constant scalar
is added to each of the outputs of this layer.
!*/
template <typename SUBNET> void setup (const SUBNET& sub); template <typename SUBNET> void setup (const SUBNET& sub);
template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output); template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output);
template <typename SUBNET> void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad); template <typename SUBNET> void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad);
......
...@@ -864,7 +864,12 @@ namespace ...@@ -864,7 +864,12 @@ namespace
} }
{ {
print_spinner(); print_spinner();
fc_ l(5); fc_ l(5,FC_HAS_BIAS);
DLIB_TEST_MSG(test_layer(l), test_layer(l));
}
{
print_spinner();
fc_ l(5,FC_NO_BIAS);
DLIB_TEST_MSG(test_layer(l), test_layer(l)); DLIB_TEST_MSG(test_layer(l), test_layer(l));
} }
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment