Commit 463e1ce0 authored by Davis King's avatar Davis King

Added tensor valued member functions to the add_loss_layer.

parent 9bfd059f
...@@ -100,9 +100,9 @@ namespace dlib ...@@ -100,9 +100,9 @@ namespace dlib
WHAT THIS OBJECT REPRESENTS WHAT THIS OBJECT REPRESENTS
This is a tool that makes an add_layer or add_loss_layer object This is a tool that makes an add_layer or add_loss_layer object
expose only the part of its interface defined by the SUBNET expose only the part of its interface defined by the SUBNET
type in layers_abstract.h. This way, when we pass sub network type in layers_abstract.h. This way, when we pass subnetwork
objects to the layer callbacks those callbacks won't be able to objects to the layer callbacks those callbacks won't be able to
interact with the sub networks in a way other than specified interact with the subnetworks in a way other than specified
by the SUBNET interface spec. by the SUBNET interface spec.
!*/ !*/
...@@ -312,7 +312,7 @@ namespace dlib ...@@ -312,7 +312,7 @@ namespace dlib
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
// This version of add_layer handles the special case where the sub network being given is // This version of add_layer handles the special case where the subnetwork being given is
// just an input layer object. // just an input layer object.
template <typename LAYER_DETAILS, typename INPUT_LAYER, typename enabled> template <typename LAYER_DETAILS, typename INPUT_LAYER, typename enabled>
class add_layer class add_layer
...@@ -606,7 +606,7 @@ namespace dlib ...@@ -606,7 +606,7 @@ namespace dlib
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
// This version of add_tag_layer handles the special case where the sub network being given // This version of add_tag_layer handles the special case where the subnetwork being given
// is just an input layer object. // is just an input layer object.
template <unsigned long ID, typename INPUT_LAYER, typename enabled> template <unsigned long ID, typename INPUT_LAYER, typename enabled>
class add_tag_layer class add_tag_layer
...@@ -802,6 +802,28 @@ namespace dlib ...@@ -802,6 +802,28 @@ namespace dlib
) : ) :
sub(std::move(args)...) sub(std::move(args)...)
{ {
// TODO, rename sub to subnetwork
}
template <typename input_iterator>
void to_tensor (
input_iterator ibegin,
input_iterator iend,
resizable_tensor& data
) const
{
sub.to_tensor(ibegin,iend,data);
}
template <typename output_iterator>
void operator() (
const tensor& x,
output_iterator obegin
)
{
sub.forward(x);
const dimpl::subnet_wrapper<subnet_type> wsub(sub);
loss.to_label(wsub, obegin);
} }
template <typename input_iterator, typename output_iterator> template <typename input_iterator, typename output_iterator>
...@@ -811,18 +833,26 @@ namespace dlib ...@@ -811,18 +833,26 @@ namespace dlib
output_iterator obegin output_iterator obegin
) )
{ {
sub.to_tensor(ibegin,iend,temp_tensor); to_tensor(ibegin,iend,temp_tensor);
sub.forward(temp_tensor); (*this)(temp_tensor, obegin);
loss.to_label(sub, obegin);
} }
const label_type& operator() (const input_type& x) const label_type& operator() (const input_type& x)
{ {
(*this)(&x, &x+1, &temp_label); (*this)(&x, &x+1, &temp_label);
return temp_label; return temp_label;
} }
template <typename label_iterator>
double compute_loss (
const tensor& x,
label_iterator lbegin
)
{
sub.forward(x);
dimpl::subnet_wrapper<subnet_type> wsub(sub);
return loss.compute_loss(x, lbegin, wsub);
}
template <typename input_iterator, typename label_iterator> template <typename input_iterator, typename label_iterator>
double compute_loss ( double compute_loss (
...@@ -831,10 +861,17 @@ namespace dlib ...@@ -831,10 +861,17 @@ namespace dlib
label_iterator lbegin label_iterator lbegin
) )
{ {
sub.to_tensor(ibegin,iend,temp_tensor); to_tensor(ibegin,iend,temp_tensor);
sub.forward(temp_tensor); return compute_loss(temp_tensor, lbegin);
}
double compute_loss (
const tensor& x
)
{
sub.forward(x);
dimpl::subnet_wrapper<subnet_type> wsub(sub); dimpl::subnet_wrapper<subnet_type> wsub(sub);
return loss.compute_loss(temp_tensor, lbegin, wsub); return loss.compute_loss(x, wsub);
} }
template <typename input_iterator> template <typename input_iterator>
...@@ -843,10 +880,22 @@ namespace dlib ...@@ -843,10 +880,22 @@ namespace dlib
input_iterator iend input_iterator iend
) )
{ {
sub.to_tensor(ibegin,iend,temp_tensor); to_tensor(ibegin,iend,temp_tensor);
sub.forward(temp_tensor); return compute_loss(temp_tensor);
}
template <typename label_iterator, typename solver_type>
double update (
const tensor& x,
label_iterator lbegin,
sstack<solver_type,num_layers>& solvers
)
{
sub.forward(x);
dimpl::subnet_wrapper<subnet_type> wsub(sub); dimpl::subnet_wrapper<subnet_type> wsub(sub);
return loss.compute_loss(temp_tensor, wsub); double l = loss.compute_loss(x, lbegin, wsub);
sub.update(x, solvers);
return l;
} }
template <typename input_iterator, typename label_iterator, typename solver_type> template <typename input_iterator, typename label_iterator, typename solver_type>
...@@ -857,11 +906,20 @@ namespace dlib ...@@ -857,11 +906,20 @@ namespace dlib
sstack<solver_type,num_layers>& solvers sstack<solver_type,num_layers>& solvers
) )
{ {
sub.to_tensor(ibegin,iend,temp_tensor); to_tensor(ibegin,iend,temp_tensor);
sub.forward(temp_tensor); return update(temp_tensor, lbegin, solvers);
}
template <typename solver_type>
double update (
const tensor& x,
sstack<solver_type,num_layers>& solvers
)
{
sub.forward(x);
dimpl::subnet_wrapper<subnet_type> wsub(sub); dimpl::subnet_wrapper<subnet_type> wsub(sub);
double l = loss.compute_loss(temp_tensor, lbegin, wsub); double l = loss.compute_loss(x, wsub);
sub.update(temp_tensor, solvers); sub.update(x, solvers);
return l; return l;
} }
...@@ -872,12 +930,8 @@ namespace dlib ...@@ -872,12 +930,8 @@ namespace dlib
sstack<solver_type,num_layers>& solvers sstack<solver_type,num_layers>& solvers
) )
{ {
sub.to_tensor(ibegin,iend,temp_tensor); to_tensor(ibegin,iend,temp_tensor);
sub.forward(temp_tensor); return update(temp_tensor, solvers);
dimpl::subnet_wrapper<subnet_type> wsub(sub);
double l = loss.compute_loss(temp_tensor, wsub);
sub.update(temp_tensor, solvers);
return l;
} }
const subnet_type& subnet() const { return sub; } const subnet_type& subnet() const { return sub; }
......
...@@ -278,13 +278,14 @@ namespace dlib ...@@ -278,13 +278,14 @@ namespace dlib
to_tensor(ibegin,iend,temp_tensor); to_tensor(ibegin,iend,temp_tensor);
return forward(temp_tensor); return forward(temp_tensor);
- The return value from this function is also available in #get_output(). - The return value from this function is also available in #get_output().
i.e. this function returns #get_output().
- #get_output().num_samples() == std::distance(ibegin,iend)*sample_expansion_factor.
- have_same_dimensions(#get_gradient_input(), #get_output()) == true. - have_same_dimensions(#get_gradient_input(), #get_output()) == true.
- All elements of #get_gradient_input() are set to 0. - All elements of #get_gradient_input() are set to 0.
i.e. calling this function clears out #get_gradient_input() and ensures i.e. calling this function clears out #get_gradient_input() and ensures
it has the same dimensions as the most recent output. it has the same dimensions as the most recent output.
!*/ !*/
const tensor& operator() ( const tensor& operator() (
const input_type& x const input_type& x
); );
...@@ -298,6 +299,9 @@ namespace dlib ...@@ -298,6 +299,9 @@ namespace dlib
const tensor& x const tensor& x
); );
/*! /*!
requires
- x.num_samples()%sample_expansion_factor == 0
- x.num_samples() > 0
ensures ensures
- Runs x through the network and returns the results. In particular, this - Runs x through the network and returns the results. In particular, this
function performs the equivalent of: function performs the equivalent of:
...@@ -306,6 +310,8 @@ namespace dlib ...@@ -306,6 +310,8 @@ namespace dlib
layer_details().setup(subnet()); layer_details().setup(subnet());
layer_details().forward(subnet(), get_output()); layer_details().forward(subnet(), get_output());
- The return value from this function is also available in #get_output(). - The return value from this function is also available in #get_output().
i.e. this function returns #get_output().
- #get_output().num_samples() == x.num_samples().
- have_same_dimensions(#get_gradient_input(), #get_output()) == true - have_same_dimensions(#get_gradient_input(), #get_output()) == true
- All elements of #get_gradient_input() are set to 0. - All elements of #get_gradient_input() are set to 0.
i.e. calling this function clears out #get_gradient_input() and ensures i.e. calling this function clears out #get_gradient_input() and ensures
...@@ -511,6 +517,46 @@ namespace dlib ...@@ -511,6 +517,46 @@ namespace dlib
loss layer used by this network. loss layer used by this network.
!*/ !*/
template <typename input_iterator>
void to_tensor (
input_iterator ibegin,
input_iterator iend,
resizable_tensor& data
) const;
/*!
requires
- [ibegin, iend) is an iterator range over input_type objects.
- std::distance(ibegin,iend) > 0
ensures
- Converts the iterator range into a tensor and stores it into #data.
- #data.num_samples() == distance(ibegin,iend)*sample_expansion_factor.
- The data in the ith sample of #data corresponds to the input_type object
*(ibegin+i/sample_expansion_factor).
- Invokes data.async_copy_to_device() so that the data begins transferring
to the GPU device, if present.
- This function is implemented by calling the to_tensor() routine defined
at the input layer of this network.
!*/
// -------------
template <typename output_iterator>
void operator() (
const tensor& x,
output_iterator obegin
);
/*!
requires
- x.num_samples()%sample_expansion_factor == 0
- x.num_samples() > 0
- obegin == iterator pointing to the start of a range of
x.num_samples()/sample_expansion_factor label_type elements.
ensures
- runs x through the network and writes the output to the range at obegin.
- loss_details().to_label() is used to write the network output into
obegin.
!*/
template <typename input_iterator, typename label_iterator> template <typename input_iterator, typename label_iterator>
void operator() ( void operator() (
input_iterator ibegin, input_iterator ibegin,
...@@ -526,14 +572,41 @@ namespace dlib ...@@ -526,14 +572,41 @@ namespace dlib
ensures ensures
- runs [ibegin,iend) through the network and writes the output to the range - runs [ibegin,iend) through the network and writes the output to the range
at obegin. at obegin.
- loss_details().to_label() is used to write the network output into
obegin.
!*/ !*/
// -------------
const label_type& operator() ( const label_type& operator() (
const input_type& x const input_type& x
); );
/*! /*!
ensures ensures
- runs a single object, x, through the network and returns the output. - runs a single object, x, through the network and returns the output.
- loss_details().to_label() is used to convert the network output into a
label_type.
!*/
// -------------
template <typename label_iterator>
double compute_loss (
const tensor& x,
label_iterator lbegin
);
/*!
requires
- x.num_samples()%sample_expansion_factor == 0
- x.num_samples() > 0
- lbegin == iterator pointing to the start of a range of
x.num_samples()/sample_expansion_factor label_type elements.
ensures
- runs x through the network, compares the output to the expected output
pointed to by lbegin, and returns the resulting loss.
- for all valid k:
- the expected label of the kth sample in x is *(lbegin+k/sample_expansion_factor).
- This function does not update the network parameters.
!*/ !*/
template <typename input_iterator, typename label_iterator> template <typename input_iterator, typename label_iterator>
...@@ -551,6 +624,23 @@ namespace dlib ...@@ -551,6 +624,23 @@ namespace dlib
ensures ensures
- runs [ibegin,iend) through the network, compares the output to the - runs [ibegin,iend) through the network, compares the output to the
expected output pointed to by lbegin, and returns the resulting loss. expected output pointed to by lbegin, and returns the resulting loss.
- for all valid k:
- the expected label of *(ibegin+k) is *(lbegin+k).
- This function does not update the network parameters.
!*/
// -------------
double compute_loss (
const tensor& x
);
/*!
requires
- LOSS_DETAILS is an unsupervised loss. i.e. label_type==no_label_type.
- x.num_samples()%sample_expansion_factor == 0
- x.num_samples() > 0
ensures
- runs x through the network and returns the resulting loss.
- This function does not update the network parameters. - This function does not update the network parameters.
!*/ !*/
...@@ -569,6 +659,34 @@ namespace dlib ...@@ -569,6 +659,34 @@ namespace dlib
- This function does not update the network parameters. - This function does not update the network parameters.
!*/ !*/
// -------------
template <typename label_iterator, typename solver_type>
double update (
const tensor& x,
label_iterator lbegin,
sstack<solver_type,num_layers>& solvers
);
/*!
requires
- x.num_samples()%sample_expansion_factor == 0
- x.num_samples() > 0
- lbegin == iterator pointing to the start of a range of
x.num_samples()/sample_expansion_factor label_type elements.
- This instance of solvers has only ever been used with this network. That
is, if you want to call update() on some other neural network object then
you must not reuse the same solvers object.
ensures
- runs x through the network, compares the output to the expected output
pointed to by lbegin, and updates the network parameters via
backpropagation.
- for all valid k:
- the expected label of the kth sample in x is *(lbegin+k/sample_expansion_factor).
- The provided solvers are used to update the parameters in each layer of
the network.
- returns compute_loss(x,lbegin)
!*/
template <typename input_iterator, typename label_iterator, typename solver_type> template <typename input_iterator, typename label_iterator, typename solver_type>
double update ( double update (
input_iterator ibegin, input_iterator ibegin,
...@@ -589,11 +707,36 @@ namespace dlib ...@@ -589,11 +707,36 @@ namespace dlib
- runs [ibegin,iend) through the network, compares the output to the - runs [ibegin,iend) through the network, compares the output to the
expected output pointed to by lbegin, and updates the network parameters expected output pointed to by lbegin, and updates the network parameters
via backpropagation. via backpropagation.
- for all valid k:
- the expected label of *(ibegin+k) is *(lbegin+k).
- The provided solvers are used to update the parameters in each layer of - The provided solvers are used to update the parameters in each layer of
the network. the network.
- returns compute_loss(ibegin,iend,lbegin) - returns compute_loss(ibegin,iend,lbegin)
!*/ !*/
// -------------
template <typename solver_type>
double update (
const tensor& x,
sstack<solver_type,num_layers>& solvers
);
/*!
requires
- LOSS_DETAILS is an unsupervised loss. i.e. label_type==no_label_type.
- x.num_samples()%sample_expansion_factor == 0
- x.num_samples() > 0
- This instance of solvers has only ever been used with this network. That
is, if you want to call update() on some other neural network object then
you must not reuse the same solvers object.
ensures
- runs x through the network and updates the network parameters by
back-propagating the loss gradient through the network.
- The provided solvers are used to update the parameters in each layer of
the network.
- returns compute_loss(x)
!*/
template <typename input_iterator, typename solver_type> template <typename input_iterator, typename solver_type>
double update ( double update (
input_iterator ibegin, input_iterator ibegin,
...@@ -616,6 +759,8 @@ namespace dlib ...@@ -616,6 +759,8 @@ namespace dlib
- returns compute_loss(ibegin,iend) - returns compute_loss(ibegin,iend)
!*/ !*/
// -------------
void clean ( void clean (
); );
/*! /*!
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment