Commit 1c19a80a authored by Davis King's avatar Davis King

Changed the loss layer interface to use two typedefs, output_label_type and

training_label_type instead of a single label_type.  This way, the label
type used for training can be distinct from the type output by the network.
This change breaks backwards compatibility with the previous API.
parent 25ccbc42
......@@ -2120,12 +2120,12 @@ namespace dlib
{
private:
// We don't want anyone making these no_label_type objects. They are here only to
// allow add_loss_layer::label_type and dnn_trainer::label_type to exist which avoids
// needing to overload add_loss_layer and dnn_trainer for supervised an unsupervised
// losses. It also can be a type to use in template metaprogramming to indicate
// "no label". So here we make the constructor private with the exception that
// add_loss_layer objects can make it (again, just to simplify add_loss_layer's
// implementation).
// allow add_loss_layer::training_label_type and dnn_trainer::training_label_type
// to exist which avoids needing to overload add_loss_layer and dnn_trainer for
// supervised an unsupervised losses. It also can be a type to use in template
// metaprogramming to indicate "no label". So here we make the constructor private
// with the exception that add_loss_layer objects can make it (again, just to
// simplify add_loss_layer's implementation).
no_label_type(){};
template <typename LOSS_DETAILS, typename SUBNET> friend class add_loss_layer;
template < typename net_type, typename solver_type > friend class dnn_trainer;
......@@ -2137,14 +2137,25 @@ namespace dlib
class add_loss_layer
{
template <typename T, typename enabled=void>
struct get_loss_layer_label_type
struct get_loss_layer_training_label_type
{
typedef no_label_type type;
};
template <typename T>
struct get_loss_layer_label_type<T,typename std::enable_if<sizeof(typename T::label_type)!=0>::type>
struct get_loss_layer_training_label_type<T,typename std::enable_if<sizeof(typename T::training_label_type)!=0>::type>
{
typedef typename T::label_type type;
typedef typename T::training_label_type type;
};
template <typename T, typename enabled=void>
struct get_loss_layer_output_label_type
{
typedef no_label_type type;
};
template <typename T>
struct get_loss_layer_output_label_type<T,typename std::enable_if<sizeof(typename T::output_label_type)!=0>::type>
{
typedef typename T::output_label_type type;
};
public:
......@@ -2154,7 +2165,8 @@ namespace dlib
const static size_t num_layers = subnet_type::num_layers + 1;
// Note that the loss layer doesn't count as an additional computational layer.
const static size_t num_computational_layers = subnet_type::num_computational_layers;
typedef typename get_loss_layer_label_type<LOSS_DETAILS>::type label_type;
typedef typename get_loss_layer_training_label_type<LOSS_DETAILS>::type training_label_type;
typedef typename get_loss_layer_output_label_type<LOSS_DETAILS>::type output_label_type;
static_assert(is_nonloss_layer_type<SUBNET>::value,
"SUBNET must be of type add_layer, add_skip_layer, or add_tag_layer.");
......@@ -2250,19 +2262,19 @@ namespace dlib
(*this)(temp_tensor, obegin);
}
const label_type& operator() (const input_type& x)
const output_label_type& operator() (const input_type& x)
{
(*this)(&x, &x+1, &temp_label);
return temp_label;
}
template <typename iterable_type>
std::vector<label_type> operator() (
std::vector<output_label_type> operator() (
const iterable_type& data,
size_t batch_size = 128
)
{
std::vector<label_type> results(std::distance(data.begin(), data.end()));
std::vector<output_label_type> results(std::distance(data.begin(), data.end()));
auto o = results.begin();
auto i = data.begin();
auto num_remaining = results.size();
......@@ -2426,7 +2438,7 @@ namespace dlib
// These two objects don't logically contribute to the state of this object. They
// are here to prevent them from being reallocated over and over.
label_type temp_label;
output_label_type temp_label;
resizable_tensor temp_tensor;
};
......
......@@ -619,9 +619,12 @@ namespace dlib
typedef typename subnet_type::input_type input_type;
const static size_t num_computational_layers = subnet_type::num_computational_layers;
const static size_t num_layers = subnet_type::num_layers + 1;
// If LOSS_DETAILS is an unsupervised loss then label_type==no_label_type.
// If LOSS_DETAILS is an unsupervised loss then training_label_type==no_label_type.
// Otherwise it is defined as follows:
typedef typename LOSS_DETAILS::label_type label_type;
typedef typename LOSS_DETAILS::training_label_type training_label_type;
// Similarly, if LOSS_DETAILS doesn't provide any output conversion then
// output_label_type==no_label_type.
typedef typename LOSS_DETAILS::output_label_type output_label_type;
......@@ -768,7 +771,7 @@ namespace dlib
- x.num_samples()%sample_expansion_factor() == 0
- x.num_samples() > 0
- obegin == iterator pointing to the start of a range of
x.num_samples()/sample_expansion_factor() label_type elements.
x.num_samples()/sample_expansion_factor() output_label_type elements.
ensures
- runs x through the network and writes the output to the range at obegin.
- loss_details().to_label() is used to write the network output into
......@@ -786,7 +789,7 @@ namespace dlib
- [ibegin, iend) is an iterator range over input_type objects.
- std::distance(ibegin,iend) > 0
- obegin == iterator pointing to the start of a range of
std::distance(ibegin,iend) label_type elements.
std::distance(ibegin,iend) output_label_type elements.
ensures
- runs [ibegin,iend) through the network and writes the output to the range
at obegin.
......@@ -796,18 +799,18 @@ namespace dlib
// -------------
const label_type& operator() (
const output_label_type& operator() (
const input_type& x
);
/*!
ensures
- runs a single object, x, through the network and returns the output.
- loss_details().to_label() is used to convert the network output into a
label_type.
output_label_type.
!*/
template <typename iterable_type>
std::vector<label_type> operator() (
std::vector<output_label_type> operator() (
const iterable_type& data,
size_t batch_size = 128
);
......@@ -826,7 +829,7 @@ namespace dlib
items. Using a batch_size > 1 can be faster because it better exploits
the available hardware parallelism.
- loss_details().to_label() is used to convert the network output into a
label_type.
output_label_type.
!*/
// -------------
......@@ -844,7 +847,7 @@ namespace dlib
- x.num_samples()%sample_expansion_factor() == 0
- x.num_samples() > 0
- lbegin == iterator pointing to the start of a range of
x.num_samples()/sample_expansion_factor() label_type elements.
x.num_samples()/sample_expansion_factor() training_label_type elements.
ensures
- runs x through the network, compares the output to the expected output
pointed to by lbegin, and returns the resulting loss.
......@@ -864,7 +867,7 @@ namespace dlib
- [ibegin, iend) is an iterator range over input_type objects.
- std::distance(ibegin,iend) > 0
- lbegin == iterator pointing to the start of a range of
std::distance(ibegin,iend) label_type elements.
std::distance(ibegin,iend) training_label_type elements.
ensures
- runs [ibegin,iend) through the network, compares the output to the
expected output pointed to by lbegin, and returns the resulting loss.
......@@ -880,7 +883,7 @@ namespace dlib
);
/*!
requires
- LOSS_DETAILS is an unsupervised loss. i.e. label_type==no_label_type.
- LOSS_DETAILS is an unsupervised loss. i.e. training_label_type==no_label_type.
- sample_expansion_factor() != 0
(i.e. to_tensor() must have been called to set sample_expansion_factor()
to something non-zero.)
......@@ -898,7 +901,7 @@ namespace dlib
);
/*!
requires
- LOSS_DETAILS is an unsupervised loss. i.e. label_type==no_label_type.
- LOSS_DETAILS is an unsupervised loss. i.e. training_label_type==no_label_type.
- [ibegin, iend) is an iterator range over input_type objects.
- std::distance(ibegin,iend) > 0
ensures
......@@ -921,7 +924,7 @@ namespace dlib
- x.num_samples()%sample_expansion_factor() == 0
- x.num_samples() > 0
- lbegin == iterator pointing to the start of a range of
x.num_samples()/sample_expansion_factor() label_type elements.
x.num_samples()/sample_expansion_factor() training_label_type elements.
ensures
- runs x through the network, compares the output to the expected output
pointed to by lbegin, and computes parameter and data gradients with
......@@ -944,7 +947,7 @@ namespace dlib
- [ibegin, iend) is an iterator range over input_type objects.
- std::distance(ibegin,iend) > 0
- lbegin == iterator pointing to the start of a range of
std::distance(ibegin,iend) label_type elements.
std::distance(ibegin,iend) training_label_type elements.
ensures
- runs [ibegin,iend) through the network, compares the output to the
expected output pointed to by lbegin, and computes parameter and data
......@@ -961,7 +964,7 @@ namespace dlib
);
/*!
requires
- LOSS_DETAILS is an unsupervised loss. i.e. label_type==no_label_type.
- LOSS_DETAILS is an unsupervised loss. i.e. training_label_type==no_label_type.
- sample_expansion_factor() != 0
(i.e. to_tensor() must have been called to set sample_expansion_factor()
to something non-zero.)
......@@ -982,7 +985,7 @@ namespace dlib
);
/*!
requires
- LOSS_DETAILS is an unsupervised loss. i.e. label_type==no_label_type.
- LOSS_DETAILS is an unsupervised loss. i.e. training_label_type==no_label_type.
- [ibegin, iend) is an iterator range over input_type objects.
- std::distance(ibegin,iend) > 0
ensures
......
......@@ -21,7 +21,8 @@ namespace dlib
{
public:
typedef float label_type;
typedef float training_label_type;
typedef float output_label_type;
template <
typename SUB_TYPE,
......@@ -128,7 +129,8 @@ namespace dlib
{
public:
typedef float label_type;
typedef float training_label_type;
typedef float output_label_type;
template <
typename SUB_TYPE,
......@@ -244,7 +246,8 @@ namespace dlib
{
public:
typedef unsigned long label_type;
typedef unsigned long training_label_type;
typedef unsigned long output_label_type;
template <
typename SUB_TYPE,
......@@ -468,7 +471,8 @@ namespace dlib
public:
typedef std::vector<mmod_rect> label_type;
typedef std::vector<mmod_rect> training_label_type;
typedef std::vector<mmod_rect> output_label_type;
loss_mmod_() {}
......@@ -494,7 +498,7 @@ namespace dlib
DLIB_CASSERT(sub.sample_expansion_factor() == 1, sub.sample_expansion_factor());
std::vector<intermediate_detection> dets_accum;
label_type final_dets;
output_label_type final_dets;
for (long i = 0; i < output_tensor.num_samples(); ++i)
{
tensor_to_dets(input_tensor, output_tensor, i, dets_accum, adjust_threshold, sub);
......@@ -865,7 +869,7 @@ namespace dlib
{
public:
typedef unsigned long label_type;
typedef unsigned long training_label_type;
template <
......
......@@ -33,14 +33,16 @@ namespace dlib
Finally, note that there are two broad flavors of loss layer, supervised
and unsupervised. The EXAMPLE_LOSS_LAYER_ as shown here is a supervised
layer. To make an unsupervised loss you simply leave out the label_type
typedef, to_label(), and the truth iterator argument to
layer. To make an unsupervised loss you simply leave out the
training_label_type typedef and the truth iterator argument to
compute_loss_value_and_gradient().
!*/
public:
typedef whatever_type_you_use_for_labels label_type;
// In most cases training_label_type and output_label_type will be the same type.
typedef whatever_type_you_use_for_training_labels training_label_type;
typedef whatever_type_you_use_for_outout_labels output_label_type;
EXAMPLE_LOSS_LAYER_ (
);
......@@ -77,9 +79,9 @@ namespace dlib
- input_tensor.num_samples()%sub.sample_expansion_factor() == 0.
- iter == an iterator pointing to the beginning of a range of
input_tensor.num_samples()/sub.sample_expansion_factor() elements. Moreover,
they must be label_type elements.
they must be output_label_type elements.
ensures
- Converts the output of the provided network to label_type objects and
- Converts the output of the provided network to output_label_type objects and
stores the results into the range indicated by iter. In particular, for
all valid i, it will be the case that:
*(iter+i/sub.sample_expansion_factor()) is populated based on the output of
......@@ -108,7 +110,7 @@ namespace dlib
layer<i>(sub).get_output().
- truth == an iterator pointing to the beginning of a range of
input_tensor.num_samples()/sub.sample_expansion_factor() elements. Moreover,
they must be label_type elements.
they must be training_label_type elements.
- for all valid i:
- *(truth+i/sub.sample_expansion_factor()) is the label of the ith sample in
input_tensor.
......@@ -167,7 +169,8 @@ namespace dlib
!*/
public:
typedef float label_type;
typedef float training_label_type;
typedef float output_label_type;
template <
typename SUB_TYPE,
......@@ -234,7 +237,8 @@ namespace dlib
!*/
public:
typedef float label_type;
typedef float training_label_type;
typedef float output_label_type;
template <
typename SUB_TYPE,
......@@ -306,7 +310,8 @@ namespace dlib
public:
typedef unsigned long label_type;
typedef unsigned long training_label_type;
typedef unsigned long output_label_type;
template <
typename SUB_TYPE,
......@@ -443,7 +448,8 @@ namespace dlib
public:
typedef std::vector<mmod_rect> label_type;
typedef std::vector<mmod_rect> training_label_type;
typedef std::vector<mmod_rect> output_label_type;
loss_mmod_(
);
......
......@@ -30,20 +30,20 @@ namespace dlib
namespace impl
{
template <typename label_type>
template <typename training_label_type>
struct dnn_job_t
{
dnn_job_t() = default;
dnn_job_t(const dnn_job_t&) = delete;
dnn_job_t& operator=(const dnn_job_t&) = delete;
std::vector<std::vector<label_type>> labels;
std::vector<std::vector<training_label_type>> labels;
std::vector<resizable_tensor> t;
std::vector<int> have_data; // have_data[i] is true if there is data in labels[i] and t[i].
};
template <typename label_type>
void swap(dnn_job_t<label_type>& a, dnn_job_t<label_type>& b)
template <typename training_label_type>
void swap(dnn_job_t<training_label_type>& a, dnn_job_t<training_label_type>& b)
{
a.labels.swap(b.labels);
a.t.swap(b.t);
......@@ -63,12 +63,12 @@ namespace dlib
static_assert(is_loss_layer_type<net_type>::value,
"The last layer in a network must be a loss layer.");
typedef typename net_type::label_type label_type;
typedef typename net_type::training_label_type training_label_type;
typedef typename net_type::input_type input_type;
const static size_t num_computational_layers = net_type::num_computational_layers;
const static size_t num_layers = net_type::num_layers;
private:
typedef impl::dnn_job_t<label_type> job_t;
typedef impl::dnn_job_t<training_label_type> job_t;
public:
dnn_trainer() = delete;
......@@ -184,7 +184,7 @@ namespace dlib
void train_one_step (
const std::vector<input_type>& data,
const std::vector<label_type>& labels
const std::vector<training_label_type>& labels
)
{
DLIB_CASSERT(data.size() == labels.size());
......@@ -261,7 +261,7 @@ namespace dlib
void train (
const std::vector<input_type>& data,
const std::vector<label_type>& labels
const std::vector<training_label_type>& labels
)
{
DLIB_CASSERT(data.size() == labels.size() && data.size() > 0);
......@@ -322,7 +322,7 @@ namespace dlib
{
DLIB_CASSERT(data.size() > 0);
const bool has_unsupervised_loss = std::is_same<no_label_type, label_type>::value;
const bool has_unsupervised_loss = std::is_same<no_label_type, training_label_type>::value;
static_assert(has_unsupervised_loss,
"You can only call this version of train() when using an unsupervised loss.");
......@@ -562,7 +562,7 @@ namespace dlib
void thread() try
{
label_type pick_which_run_update;
training_label_type pick_which_run_update;
job_t next_job;
std::vector<dlib::future<double>> losses(devices.size());
......@@ -591,7 +591,7 @@ namespace dlib
++main_iteration_counter;
// Call compute_parameter_gradients() and update_parameters() but pick the
// right version for unsupervised or supervised training based on the type
// of label_type.
// of training_label_type.
for (size_t i = 0; i < devices.size(); ++i)
tp[i]->add_task_by_value([&,i](double& loss){ loss = compute_parameter_gradients(i, next_job, pick_which_run_update); }, losses[i]);
// aggregate loss values from all the network computations.
......@@ -988,7 +988,7 @@ namespace dlib
data_iterator dend
)
{
typename std::vector<label_type>::iterator nothing;
typename std::vector<training_label_type>::iterator nothing;
send_job(dbegin, dend, nothing);
}
......
......@@ -47,7 +47,7 @@ namespace dlib
public:
typedef typename net_type::label_type label_type;
typedef typename net_type::training_label_type training_label_type;
typedef typename net_type::input_type input_type;
const static size_t num_computational_layers = net_type::num_computational_layers;
......@@ -341,14 +341,14 @@ namespace dlib
void train (
const std::vector<input_type>& data,
const std::vector<label_type>& labels
const std::vector<training_label_type>& labels
);
/*!
requires
- data.size() == labels.size()
- data.size() > 0
- net_type uses a supervised loss.
i.e. net_type::label_type != no_label_type.
i.e. net_type::training_label_type != no_label_type.
ensures
- Trains a supervised neural network based on the given training data.
The goal of training is to find the network parameters that minimize
......@@ -374,7 +374,7 @@ namespace dlib
requires
- data.size() > 0
- net_type uses an unsupervised loss.
i.e. net_type::label_type == no_label_type.
i.e. net_type::training_label_type == no_label_type.
ensures
- Trains an unsupervised neural network based on the given training data.
The goal of training is to find the network parameters that minimize
......@@ -395,14 +395,14 @@ namespace dlib
void train_one_step (
const std::vector<input_type>& data,
const std::vector<label_type>& labels
const std::vector<training_label_type>& labels
);
/*!
requires
- data.size() == labels.size()
- data.size() > 0
- net_type uses a supervised loss.
i.e. net_type::label_type != no_label_type.
i.e. net_type::training_label_type != no_label_type.
ensures
- Performs one stochastic gradient update step based on the mini-batch of
data and labels supplied to this function. In particular, calling
......@@ -433,7 +433,7 @@ namespace dlib
- std::advance(lbegin, std::distance(dbegin, dend) - 1) is dereferencable
- std::distance(dbegin, dend) > 0
- net_type uses a supervised loss.
i.e. net_type::label_type != no_label_type.
i.e. net_type::training_label_type != no_label_type.
ensures
- Performs one stochastic gradient update step based on the mini-batch of
data and labels supplied to this function. In particular, calling
......@@ -457,7 +457,7 @@ namespace dlib
requires
- data.size() > 0
- net_type uses an unsupervised loss.
i.e. net_type::label_type == no_label_type.
i.e. net_type::training_label_type == no_label_type.
ensures
- Performs one stochastic gradient update step based on the mini-batch of
data supplied to this function. In particular, calling train_one_step()
......@@ -485,7 +485,7 @@ namespace dlib
requires
- std::distance(dbegin, dend) > 0
- net_type uses an unsupervised loss.
i.e. net_type::label_type == no_label_type.
i.e. net_type::training_label_type == no_label_type.
ensures
- Performs one stochastic gradient update step based on the mini-batch of
data supplied to this function. In particular, calling train_one_step()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment