Commit 9bfd059f authored by Davis King's avatar Davis King

Filled out a bunch more of the core spec.

parent 2cd91288
...@@ -49,7 +49,7 @@ namespace dlib ...@@ -49,7 +49,7 @@ namespace dlib
WHAT THIS OBJECT REPRESENTS WHAT THIS OBJECT REPRESENTS
This is a basic stack of T objects. It holds N of the objects and is This is a basic stack of T objects. It holds N of the objects and is
entirely allocated on the stack. entirely allocated on the stack rather than on the heap.
!*/ !*/
public: public:
...@@ -142,7 +142,10 @@ namespace dlib ...@@ -142,7 +142,10 @@ namespace dlib
- SUBNET is an add_skip_layer object. - SUBNET is an add_skip_layer object.
WHAT THIS OBJECT REPRESENTS WHAT THIS OBJECT REPRESENTS
Stacks a new layer, defined by LAYER_DETAILS, on top of SUBNET type. This object represents a deep neural network. In particular, it is a tool
for adding another layer on top of the neural network of type SUBNET, which
is specified as a template argument. The specific layer added is defined
by the LAYER_DETAILS details template argument.
!*/ !*/
public: public:
...@@ -156,45 +159,53 @@ namespace dlib ...@@ -156,45 +159,53 @@ namespace dlib
add_layer( add_layer(
); );
/*!
ensures
- default constructs all the layers in this network.
!*/
add_layer(const add_layer&) = default; add_layer(const add_layer&) = default;
add_layer(add_layer&&) = default; add_layer(add_layer&&) = default;
add_layer& operator=(add_layer&&) = default; add_layer& operator=(add_layer&&) = default;
add_layer& operator=(const add_layer&) = default; add_layer& operator=(const add_layer&) = default;
/*!
ensures
- this object is copyable and movable.
!*/
// Allow copying networks from one to another as long as their corresponding
// layers can be constructed from each other.
template <typename T, typename U> template <typename T, typename U>
add_layer( add_layer(
const add_layer<T,U>& item const add_layer<T,U>& item
); );
/*! /*!
ensures ensures
- This constructor allows you to copy neural network objects from one to
another as long as their corresponding layers can be constructed from
each other.
- #layer_details() == layer_details_type(item.layer_details()) - #layer_details() == layer_details_type(item.layer_details())
- #subnet() == subnet_type(item.subnet()) - #subnet() == subnet_type(item.subnet())
!*/ !*/
template <typename ...T> template <typename ...T>
add_layer( add_layer(
const LAYER_DETAILS& layer_det, const layer_details_type& layer_det,
T&& ...args T&& ...args
); );
/*! /*!
ensures ensures
- #layer_details() == layer_details_type(layer_det) - #layer_details() == layer_details_type(layer_det)
- #subnet() == subnet_type(args) - #subnet() == subnet_type(args)
!*/ !*/
template <typename ...T> template <typename ...T>
add_layer( add_layer(
LAYER_DETAILS&& layer_det, layer_details_type&& layer_det,
T&& ...args T&& ...args
); );
/*! /*!
ensures ensures
- #layer_details() == layer_details_type(layer_det) - #layer_details() == layer_details_type(layer_det)
- #subnet() == subnet_type(args) - #subnet() == subnet_type(args)
!*/ !*/
template <typename input_iterator> template <typename input_iterator>
...@@ -206,12 +217,50 @@ namespace dlib ...@@ -206,12 +217,50 @@ namespace dlib
/*! /*!
requires requires
- [ibegin, iend) is an iterator range over input_type objects. - [ibegin, iend) is an iterator range over input_type objects.
- std::distance(ibegin,iend) > 0
ensures ensures
- Converts the iterator range into a tensor and stores it into #data. - Converts the iterator range into a tensor and stores it into #data.
- #data.num_samples() == distance(ibegin,iend)*sample_expansion_factor. - #data.num_samples() == distance(ibegin,iend)*sample_expansion_factor.
- The data in the ith sample of #data corresponds to the input_type object
*(ibegin+i/sample_expansion_factor).
- Invokes data.async_copy_to_device() so that the data begins transferring - Invokes data.async_copy_to_device() so that the data begins transferring
to the device. to the GPU device, if present.
- Ultimately this function just calls subnet().subnet()...subnet().to_tensor(ibegin,iend,data). - This function is implemented by calling the to_tensor() routine defined
at the input layer of this network.
!*/
const subnet_type& subnet(
) const;
/*!
ensures
- returns the immediate subnetwork of *this network.
!*/
subnet_type& subnet(
);
/*!
ensures
- returns the immediate subnetwork of *this network.
!*/
const layer_details_type& layer_details(
) const;
/*!
ensures
- returns the layer_details_type instance that defines the behavior of the
layer at the top of this network. I.e. returns the layer details that
defines the behavior of the layer nearest to the network output rather
than the input layer.
!*/
layer_details_type& layer_details(
);
/*!
ensures
- returns the layer_details_type instance that defines the behavior of the
layer at the top of this network. I.e. returns the layer details that
defines the behavior of the layer nearest to the network output rather
than the input layer.
!*/ !*/
template <typename input_iterator> template <typename input_iterator>
...@@ -220,14 +269,19 @@ namespace dlib ...@@ -220,14 +269,19 @@ namespace dlib
input_iterator iend input_iterator iend
); );
/*! /*!
requires
- [ibegin, iend) is an iterator range over input_type objects.
- std::distance(ibegin,iend) > 0
ensures ensures
- runs [ibegin,iend) through the network and returns the results. - runs [ibegin,iend) through the network and returns the results.
In particular, this function performs: In particular, this function performs:
to_tensor(ibegin,iend,temp_tensor); to_tensor(ibegin,iend,temp_tensor);
return forward(temp_tensor); return forward(temp_tensor);
- The return value from this function is also available in #get_output(). - The return value from this function is also available in #get_output().
- have_same_dimensions(#get_gradient_input(), #get_output()) == true - have_same_dimensions(#get_gradient_input(), #get_output()) == true.
- All elements of #get_gradient_input() are set to 0. - All elements of #get_gradient_input() are set to 0.
i.e. calling this function clears out #get_gradient_input() and ensures
it has the same dimensions as the most recent output.
!*/ !*/
...@@ -254,19 +308,9 @@ namespace dlib ...@@ -254,19 +308,9 @@ namespace dlib
- The return value from this function is also available in #get_output(). - The return value from this function is also available in #get_output().
- have_same_dimensions(#get_gradient_input(), #get_output()) == true - have_same_dimensions(#get_gradient_input(), #get_output()) == true
- All elements of #get_gradient_input() are set to 0. - All elements of #get_gradient_input() are set to 0.
i.e. calling this function clears out #get_gradient_input() and ensures
it has the same dimensions as the most recent output.
!*/ !*/
{
subnetwork.forward(x);
const dimpl::subnet_wrapper<subnet_type> wsub(subnetwork);
if (!this_layer_setup_called)
{
details.setup(wsub);
this_layer_setup_called = true;
}
details.forward(wsub, cached_output);
gradient_input_is_stale = true;
return get_output();
}
const tensor& get_output( const tensor& get_output(
) const; ) const;
...@@ -281,46 +325,51 @@ namespace dlib ...@@ -281,46 +325,51 @@ namespace dlib
); );
/*! /*!
ensures ensures
- - returns the error gradient for this network. That is, this is the error
gradient that this network will use to update itself when update() is
called. Therefore, when performing back propagation, layers that sit on
top of this network layer write their back propagated error gradients
into get_gradient_input(). Or to put it another way, during back
propagation, layers take the contents of their get_gradient_input() and
back propagate it through themselves and store the results into their
subnetwork's get_gradient_input().
This means you should consider get_gradient_input() as an input to the
update() method.
!*/ !*/
{
if (gradient_input_is_stale)
{
gradient_input_is_stale = false;
x_grad.copy_size(get_output());
x_grad = 0;
}
return x_grad;
}
template <typename solver_type> template <typename solver_type>
void update(const tensor& x, sstack<solver_type,num_layers>& solvers) void update(
const tensor& x,
sstack<solver_type,num_layers>& solvers
);
/*! /*!
requires requires
- forward(x) was called to forward propagate x though the network. - forward(x) was called to forward propagate x though the network.
- x.num_samples() == get_gradient_input().num_samples() - x.num_samples() == get_output().num_samples()
- get_gradient_input() == the gradient of the network with respect - get_gradient_input() has been set equal to the gradient of this network's
to some loss. output with respect to some loss function.
!*/ - This instance of solvers has only ever been used with this network. That
{ is, if you want to call update() on some other neural network object then
dimpl::subnet_wrapper<subnet_type> wsub(subnetwork); you must not reuse the same solvers object.
params_grad.copy_size(details.get_layer_params()); ensures
params_grad = 0; - Back propagates the error gradient, get_gradient_input(), through this
details.backward(get_gradient_input(), wsub, static_cast<tensor&>(params_grad)); network and uses the provided solvers to update the network parameters.
// Don't try to adjust the parameters if this layer doesn't have any. !*/
if (params_grad.size() != 0)
solvers.top()(details, static_cast<const tensor&>(params_grad));
subnetwork.update(x, solvers.pop());
}
const subnet_type& subnet() const { return subnetwork; }
subnet_type& subnet() { return subnetwork; }
const layer_details_type& layer_details() const { return details; }
layer_details_type& layer_details() { return details; }
void clean( void clean(
); );
/*!
ensures
- Causes the network to forget about everything but its parameters.
That is, for each layer we will have:
- get_output().num_samples() == 0
- get_gradient_input().num_samples() == 0
However, running new input data though this network will still have the
same output it would have had regardless of any calls to clean().
The purpose of clean() is to compact the network object prior to saving
it to disk so that it takes up less space and the IO is quicker.
!*/
}; };
...@@ -350,115 +399,175 @@ namespace dlib ...@@ -350,115 +399,175 @@ namespace dlib
- SUBNET is an add_skip_layer object. - SUBNET is an add_skip_layer object.
WHAT THIS OBJECT REPRESENTS WHAT THIS OBJECT REPRESENTS
- Adds a loss layer, defined by LOSS_DETAILS, on top of SUBNET. This object represents a deep neural network. In particular, it is a tool
for adding a loss layer on top of the neural network of type SUBNET, which
is specified as a template argument. The specific layer added is defined
by the LOSS_DETAILS details template argument. Importantly, a loss layer
is the last layer in a deep neural network. So once it is added you can't
add any other layers of any type.
!*/ !*/
public: public:
typedef LOSS_DETAILS loss_details_type; typedef LOSS_DETAILS loss_details_type;
typedef SUBNET subnet_type; typedef SUBNET subnet_type;
typedef typename subnet_type::input_type input_type; typedef typename subnet_type::input_type input_type;
// Note that the loss layer doesn't count as an additional layer. // Note that the loss layer doesn't count as an additional layer since it doesn't
// have any learnable parameters.
const static size_t num_layers = subnet_type::num_layers; const static size_t num_layers = subnet_type::num_layers;
const static unsigned int sample_expansion_factor = subnet_type::sample_expansion_factor; const static unsigned int sample_expansion_factor = subnet_type::sample_expansion_factor;
// If LOSS_DETAILS is an unsupervised loss then label_type==no_label_type. // If LOSS_DETAILS is an unsupervised loss then label_type==no_label_type.
// Otherwise it is defined as follows: // Otherwise it is defined as follows:
typedef typename LOSS_DETAILS::label_type label_type; typedef typename LOSS_DETAILS::label_type label_type;
static_assert(sample_expansion_factor == LOSS_DETAILS::sample_expansion_factor,
"The loss layer and input layer must agree on the sample_expansion_factor.");
add_loss_layer() = default; add_loss_layer() = default;
/*!
ensures
- default constructs all the layers in this network.
!*/
add_loss_layer(const add_loss_layer&) = default; add_loss_layer(const add_loss_layer&) = default;
add_loss_layer(add_loss_layer&&) = default; add_loss_layer(add_loss_layer&&) = default;
add_loss_layer& operator=(add_loss_layer&&) = default; add_loss_layer& operator=(add_loss_layer&&) = default;
add_loss_layer& operator=(const add_loss_layer&) = default; add_loss_layer& operator=(const add_loss_layer&) = default;
/*!
ensures
- this object is copyable and movable.
!*/
template <typename T, typename U> template <typename T, typename U>
add_loss_layer( add_loss_layer(
const add_loss_layer<T,U>& item const add_loss_layer<T,U>& item
) : );
loss(item.loss_details()), /*!
sub(item.subnet()) ensures
{} - This constructor allows you to copy neural network objects from one to
another as long as their corresponding layers can be constructed from
each other.
- #loss_details() == loss_details_type(item.loss_details())
- #subnet() == subnet_type(item.subnet())
!*/
template <typename ...T> template <typename ...T>
add_loss_layer( add_loss_layer(
const LOSS_DETAILS& layer_det, const LOSS_DETAILS& layer_det,
T&& ...args T&& ...args
) : );
loss(layer_det), /*!
sub(std::forward<T>(args)...) ensures
{ - #loss_details() == loss_details_type(layer_det)
} - #subnet() == subnet_type(args)
!*/
template <typename ...T> template <typename ...T>
add_loss_layer( add_loss_layer(
LOSS_DETAILS&& layer_det, LOSS_DETAILS&& layer_det,
T&& ...args T&& ...args
) : );
loss(std::move(layer_det)), /*!
sub(std::forward<T>(args)...) ensures
{ - #loss_details() == loss_details_type(layer_det)
} - #subnet() == subnet_type(args)
!*/
template <typename ...T> template <typename ...T>
add_loss_layer( add_loss_layer(
T ...args T ...args
) : );
sub(std::move(args)...) /*!
{ ensures
} - #loss_details() == loss_details_type()
- #subnet() == subnet_type(args)
!*/
const subnet_type& subnet(
) const;
/*!
ensures
- returns the immediate subnetwork of *this network.
!*/
subnet_type& subnet(
);
/*!
ensures
- returns the immediate subnetwork of *this network.
!*/
const loss_details_type& loss_details(
) const;
/*!
ensures
- returns the loss_details_type instance that defines the behavior of the
loss layer used by this network.
!*/
template <typename input_iterator, typename output_iterator> loss_details_type& loss_details(
);
/*!
ensures
- returns the loss_details_type instance that defines the behavior of the
loss layer used by this network.
!*/
template <typename input_iterator, typename label_iterator>
void operator() ( void operator() (
input_iterator ibegin, input_iterator ibegin,
input_iterator iend, input_iterator iend,
output_iterator obegin label_iterator obegin
) );
/*! /*!
requires requires
- obegin == iterator pointing to the start of a range of distance(ibegin,iend) - [ibegin, iend) is an iterator range over input_type objects.
elements. - std::distance(ibegin,iend) > 0
- obegin == iterator pointing to the start of a range of
std::distance(ibegin,iend) label_type elements.
ensures ensures
- runs [ibegin,iend) through the network and writes the output to the range at obegin. - runs [ibegin,iend) through the network and writes the output to the range
at obegin.
!*/ !*/
{
sub.to_tensor(ibegin,iend,temp_tensor);
sub.forward(temp_tensor);
loss.to_label(sub, obegin);
}
const label_type& operator() (
const label_type& operator() (const input_type& x) const input_type& x
);
/*! /*!
ensures ensures
- runs a single x through the network and returns the output. - runs a single object, x, through the network and returns the output.
!*/ !*/
{
(*this)(&x, &x+1, &temp_label);
return temp_label;
}
template <typename input_iterator, typename label_iterator> template <typename input_iterator, typename label_iterator>
double compute_loss ( double compute_loss (
input_iterator ibegin, input_iterator ibegin,
input_iterator iend, input_iterator iend,
label_iterator lbegin label_iterator lbegin
) );
{ /*!
sub.to_tensor(ibegin,iend,temp_tensor); requires
sub.forward(temp_tensor); - [ibegin, iend) is an iterator range over input_type objects.
dimpl::subnet_wrapper<subnet_type> wsub(sub); - std::distance(ibegin,iend) > 0
return loss.compute_loss(temp_tensor, lbegin, wsub); - lbegin == iterator pointing to the start of a range of
} std::distance(ibegin,iend) label_type elements.
ensures
- runs [ibegin,iend) through the network, compares the output to the
expected output pointed to by lbegin, and returns the resulting loss.
- This function does not update the network parameters.
!*/
template <typename input_iterator> template <typename input_iterator>
double compute_loss ( double compute_loss (
input_iterator ibegin, input_iterator ibegin,
input_iterator iend, input_iterator iend,
); );
/*!
requires
- LOSS_DETAILS is an unsupervised loss. i.e. label_type==no_label_type.
- [ibegin, iend) is an iterator range over input_type objects.
- std::distance(ibegin,iend) > 0
ensures
- runs [ibegin,iend) through the network and returns the resulting loss.
- This function does not update the network parameters.
!*/
template <typename input_iterator, typename label_iterator, typename solver_type> template <typename input_iterator, typename label_iterator, typename solver_type>
double update ( double update (
...@@ -466,15 +575,24 @@ namespace dlib ...@@ -466,15 +575,24 @@ namespace dlib
input_iterator iend, input_iterator iend,
label_iterator lbegin, label_iterator lbegin,
sstack<solver_type,num_layers>& solvers sstack<solver_type,num_layers>& solvers
) );
{ /*!
sub.to_tensor(ibegin,iend,temp_tensor); requires
sub.forward(temp_tensor); - [ibegin, iend) is an iterator range over input_type objects.
dimpl::subnet_wrapper<subnet_type> wsub(sub); - std::distance(ibegin,iend) > 0
double l = loss.compute_loss(temp_tensor, lbegin, wsub); - lbegin == iterator pointing to the start of a range of
sub.update(temp_tensor, solvers); std::distance(ibegin,iend) label_type elements.
return l; - This instance of solvers has only ever been used with this network. That
} is, if you want to call update() on some other neural network object then
you must not reuse the same solvers object.
ensures
- runs [ibegin,iend) through the network, compares the output to the
expected output pointed to by lbegin, and updates the network parameters
via backpropagation.
- The provided solvers are used to update the parameters in each layer of
the network.
- returns compute_loss(ibegin,iend,lbegin)
!*/
template <typename input_iterator, typename solver_type> template <typename input_iterator, typename solver_type>
double update ( double update (
...@@ -482,45 +600,31 @@ namespace dlib ...@@ -482,45 +600,31 @@ namespace dlib
input_iterator iend, input_iterator iend,
sstack<solver_type,num_layers>& solvers sstack<solver_type,num_layers>& solvers
); );
/*!
const subnet_type& subnet() const { return sub; } requires
subnet_type& subnet() { return sub; } - LOSS_DETAILS is an unsupervised loss. i.e. label_type==no_label_type.
const loss_details_type& loss_details() const { return loss; } - [ibegin, iend) is an iterator range over input_type objects.
loss_details_type& loss_details() { return loss; } - std::distance(ibegin,iend) > 0
- This instance of solvers has only ever been used with this network. That
is, if you want to call update() on some other neural network object then
you must not reuse the same solvers object.
ensures
- runs [ibegin,iend) through the network and updates the network parameters
by back-propagating the loss gradient through the network.
- The provided solvers are used to update the parameters in each layer of
the network.
- returns compute_loss(ibegin,iend)
!*/
void clean ( void clean (
) );
/*! /*!
ensures ensures
- Causes the network to forget about everything but its parameters. - Causes the network to forget about everything but its parameters.
That is, for each layer we will have: - invokes subnet().clean()
- get_output().num_samples() == 0
- get_gradient_input().num_samples() == 0
However, running new input data though this network will still have the
same output it would have had regardless of any calls to clean().
Finally, the purpose of clean() is to compact the network object prior to
saving it to disk so that it takes up less space and the IO is quicker.
!*/ !*/
{
temp_tensor.clear();
sub.clear();
}
private:
loss_details_type loss;
subnet_type sub;
// These two objects don't logically contribute to the state of this object. They
// are here to prevent them from being reallocated over and over.
label_type temp_label;
resizable_tensor temp_tensor;
}; };
template <typename T, typename U>
struct is_layer_type<add_loss_layer<T,U>> : std::true_type {};
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
...@@ -541,11 +645,13 @@ namespace dlib ...@@ -541,11 +645,13 @@ namespace dlib
- SUBNET is an add_skip_layer object. - SUBNET is an add_skip_layer object.
WHAT THIS OBJECT REPRESENTS WHAT THIS OBJECT REPRESENTS
This object draws its inputs from subnet() and performs the identity This object adds a new layer to a deep neural network. However, this layer
transform. This means it is a no-op and its presence does not change simply performs the identity transform. This means it is a no-op and its
the behavior of the network. It exists solely to be used by add_skip_layer presence does not change the behavior of the network. It exists solely to
to reference a particular part of a network. be used by add_skip_layer to reference a particular part of a network.
Also, this object provides an interface identical to the one defined by the
add_layer object.
!*/ !*/
}; };
...@@ -576,8 +682,11 @@ namespace dlib ...@@ -576,8 +682,11 @@ namespace dlib
- SUBNET is an add_skip_layer object. - SUBNET is an add_skip_layer object.
WHAT THIS OBJECT REPRESENTS WHAT THIS OBJECT REPRESENTS
This object draws its inputs from layer<TAG_TYPE>(subnet()) This object adds a new layer to a deep neural network which draws its
and performs the identity transform. inputs from layer<TAG_TYPE>(subnet()) and performs the identity transform.
Also, this object provides an interface identical to the one defined by the
add_layer object.
!*/ !*/
}; };
...@@ -659,10 +768,10 @@ namespace dlib ...@@ -659,10 +768,10 @@ namespace dlib
layer_details_type l layer_details_type l
); );
/*! /*!
requires
- l implements the EXAMPLE_LAYER_ interface defined in layers_abstract.h
ensures ensures
- tests l for compliance against the EXAMPLE_LAYER_ interface spec. - Checks if l correctly implements the EXAMPLE_LAYER_ interface defined in
layers_abstract.h. Importantly, it computes numerical approximations to the
gradients and compares them to the outputs of the layer.
!*/ !*/
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment