Commit 919cbd11 authored by Davis King's avatar Davis King

Added a multiply_ layer and set it up so you can use it instead of dropout_

after training has finished.
parent 565bed38
...@@ -692,6 +692,7 @@ namespace dlib ...@@ -692,6 +692,7 @@ namespace dlib
) : ) :
drop_rate(drop_rate_) drop_rate(drop_rate_)
{ {
DLIB_CASSERT(0 <= drop_rate && drop_rate <= 1,"");
} }
// We have to add a copy constructor and assignment operator because the rnd object // We have to add a copy constructor and assignment operator because the rnd object
...@@ -771,6 +772,81 @@ namespace dlib ...@@ -771,6 +772,81 @@ namespace dlib
template <typename SUBNET> template <typename SUBNET>
using dropout = add_layer<dropout_, SUBNET>; using dropout = add_layer<dropout_, SUBNET>;
// ----------------------------------------------------------------------------------------
class multiply_
{
public:
explicit multiply_(
float val_ = 0.5
) :
val(val_)
{
}
multiply_ (
const dropout_& item
) : val(1-item.get_drop_rate()) {}
float get_multiply_value (
) const { return val; }
template <typename SUBNET>
void setup (const SUBNET& /*sub*/)
{
}
void forward_inplace(const tensor& input, tensor& output)
{
tt::affine_transform(output, input, val, 0);
}
void backward_inplace(
const tensor& gradient_input,
tensor& data_grad,
tensor& /*params_grad*/
)
{
tt::affine_transform(data_grad, gradient_input, val, 0);
}
const tensor& get_layer_params() const { return params; }
tensor& get_layer_params() { return params; }
friend void serialize(const multiply_& item, std::ostream& out)
{
serialize("multiply_", out);
serialize(item.val, out);
}
friend void deserialize(multiply_& item, std::istream& in)
{
std::string version;
deserialize(version, in);
if (version == "dropout_")
{
// Since we can build a multiply_ from a dropout_ we check if that's what
// is in the stream and if so then just convert it right here.
unserialize sin(version, in);
dropout_ temp;
deserialize(temp, sin);
item = temp;
return;
}
if (version != "multiply_")
throw serialization_error("Unexpected version found while deserializing dlib::multiply_.");
deserialize(item.val, in);
}
private:
float val;
resizable_tensor params; // unused
};
template <typename SUBNET>
using multiply = add_layer<multiply_, SUBNET>;
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
class affine_ class affine_
......
...@@ -516,6 +516,10 @@ namespace dlib ...@@ -516,6 +516,10 @@ namespace dlib
through the stochastic function f(x) which outputs either 0 or x. The through the stochastic function f(x) which outputs either 0 or x. The
probability of 0 being output is given by the drop_rate argument to this probability of 0 being output is given by the drop_rate argument to this
object's constructor. object's constructor.
Note that, after you finish training a network with dropout, it is a good
idea to replace each dropout_ layer with a multiply_ layer because the
multiply_ layer is faster and deterministic.
!*/ !*/
public: public:
...@@ -524,6 +528,8 @@ namespace dlib ...@@ -524,6 +528,8 @@ namespace dlib
float drop_rate = 0.5 float drop_rate = 0.5
); );
/*! /*!
requires
- 0 <= drop_rate <= 1
ensures ensures
- #get_drop_rate() == drop_rate - #get_drop_rate() == drop_rate
!*/ !*/
...@@ -555,6 +561,64 @@ namespace dlib ...@@ -555,6 +561,64 @@ namespace dlib
template <typename SUBNET> template <typename SUBNET>
using dropout = add_layer<dropout_, SUBNET>; using dropout = add_layer<dropout_, SUBNET>;
// ----------------------------------------------------------------------------------------
class multiply_
{
/*!
WHAT THIS OBJECT REPRESENTS
This is an implementation of the EXAMPLE_LAYER_ interface defined above.
In particular, it defines a basic layer that just multiplies its input
tensor with a constant value and returns the result. It therefore has no
learnable parameters.
!*/
public:
explicit multiply_(
float val = 0.5
);
/*!
ensures
- #get_multiply_value() == val
!*/
multiply_ (
const dropout_& item
);
/*!
ensures
- #get_multiply_value() == 1-item.get_drop_rate()
(i.e. We construct the multiply_ layer so that it is essentially a
deterministic version of the given dropout_ layer)
!*/
float get_multiply_value (
) const;
/*!
ensures
- this layer simply multiplies its input tensor by get_multiply_value() and
produces the result as output.
!*/
template <typename SUBNET> void setup (const SUBNET& sub);
void forward_inplace(const tensor& input, tensor& output);
void backward_inplace(const tensor& gradient_input, tensor& data_grad, tensor& params_grad);
const tensor& get_layer_params() const;
tensor& get_layer_params();
/*!
These functions are implemented as described in the EXAMPLE_LAYER_ interface.
!*/
};
void serialize(const multiply_& item, std::ostream& out);
void deserialize(multiply_& item, std::istream& in);
/*!
provides serialization support
!*/
template <typename SUBNET>
using multiply = add_layer<multiply_, SUBNET>;
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
enum layer_mode enum layer_mode
......
...@@ -351,7 +351,7 @@ namespace ...@@ -351,7 +351,7 @@ namespace
DLIB_TEST(max(abs(truth3-mat(dest))) < 1e-5); DLIB_TEST(max(abs(truth3-mat(dest))) < 1e-5);
matrix<float> truth4 = pointwise_multiply(mat(A), mat(B)); matrix<float> truth4 = pointwise_multiply(mat(A), mat(B));
multiply(A, A, B); tt::multiply(A, A, B);
DLIB_TEST(max(abs(truth4-mat(A))) < 1e-5); DLIB_TEST(max(abs(truth4-mat(A))) < 1e-5);
matrix<float> truth5 = mat(B) > 0.1; matrix<float> truth5 = mat(B) > 0.1;
...@@ -965,6 +965,11 @@ namespace ...@@ -965,6 +965,11 @@ namespace
void test_layers() void test_layers()
{ {
{
print_spinner();
multiply_ l;
DLIB_TEST_MSG(test_layer(l), test_layer(l));
}
{ {
print_spinner(); print_spinner();
max_pool_ l; max_pool_ l;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment