Commit d85de930 authored by Davis King's avatar Davis King

Split the update() methods into two parts. One that computes gradients

with respect to parameters and one that updates the parameters with those
gradients.
parent 8c64a656
......@@ -804,31 +804,42 @@ namespace dlib
const tensor& get_final_data_gradient(
) const { return subnetwork->get_final_data_gradient(); }
template <typename solver_type>
void update(const tensor& x, sstack<solver_type> solvers, double step_size)
void back_propagate_error(const tensor& x)
{
update(x,private_get_gradient_input(),solvers,step_size);
back_propagate_error(x, private_get_gradient_input());
}
template <typename solver_type>
void update(const tensor& x, const tensor& gradient_input, sstack<solver_type> solvers, double step_size)
void back_propagate_error(const tensor& x, const tensor& gradient_input)
{
DLIB_CASSERT(solvers.size()>=num_computational_layers,"");
dimpl::subnet_wrapper<subnet_type> wsub(*subnetwork);
params_grad.copy_size(details.get_layer_params());
impl::call_layer_backward(details, private_get_output(),
gradient_input, wsub, static_cast<tensor&>(params_grad));
subnetwork->back_propagate_error(x);
// zero out get_gradient_input()
gradient_input_is_stale = true;
}
template <typename solver_type>
void update_parameters(sstack<solver_type> solvers, double step_size)
{
DLIB_CASSERT(solvers.size()>=num_computational_layers,"");
// Don't try to adjust the parameters if this layer doesn't have any.
if (params_grad.size() != 0)
{
const tensor& step = solvers.top()(details.get_layer_params(), static_cast<const tensor&>(params_grad));
tt::add(1,details.get_layer_params(), step_size, step);
}
subnetwork->update(x, solvers.pop(), step_size);
gradient_input_is_stale = true;
subnetwork->update_parameters(solvers.pop(), step_size);
}
const tensor& get_parameter_gradient(
) const { return params_grad; }
tensor& get_parameter_gradient (
) { return params_grad; }
const subnet_type& subnet() const { return *subnetwork; }
subnet_type& subnet() { return *subnetwork; }
......@@ -847,7 +858,7 @@ namespace dlib
friend void serialize(const add_layer& item, std::ostream& out)
{
int version = 1;
int version = 2;
serialize(version, out);
serialize(*item.subnetwork, out);
serialize(item.details, out);
......@@ -856,13 +867,14 @@ namespace dlib
serialize(item.get_output_and_gradient_input_disabled, out);
serialize(item.x_grad, out);
serialize(item.cached_output, out);
serialize(item.params_grad, out);
}
friend void deserialize(add_layer& item, std::istream& in)
{
int version = 0;
deserialize(version, in);
if (version != 1)
if (!(1 <= version && version <= 2))
throw serialization_error("Unexpected version found while deserializing dlib::add_layer.");
deserialize(*item.subnetwork, in);
deserialize(item.details, in);
......@@ -871,6 +883,8 @@ namespace dlib
deserialize(item.get_output_and_gradient_input_disabled, in);
deserialize(item.x_grad, in);
deserialize(item.cached_output, in);
if (version == 2)
deserialize(item.params_grad, in);
}
friend std::ostream& operator<< (std::ostream& out, const add_layer& item)
......@@ -910,6 +924,7 @@ namespace dlib
std::swap(get_output_and_gradient_input_disabled, item.get_output_and_gradient_input_disabled);
std::swap(x_grad, item.x_grad);
std::swap(cached_output, item.cached_output);
std::swap(params_grad, item.params_grad);
}
......@@ -924,10 +939,10 @@ namespace dlib
resizable_tensor x_grad;
resizable_tensor cached_output;
// The following 2 objects don't logically contribute to the state of this class.
// They are only here to prevent them from being reallocated over and over in
// member functions.
resizable_tensor params_grad;
// temp_tensor doesn't logically contribute to the state of this object.
// It is here only to prevent it from being reallocated over and over.
resizable_tensor temp_tensor;
};
......@@ -1118,16 +1133,12 @@ namespace dlib
const tensor& get_final_data_gradient(
) const { return grad_final; }
template <typename solver_type>
void update(const tensor& x, sstack<solver_type> solvers, double step_size)
void back_propagate_error(const tensor& x)
{
return update(x,private_get_gradient_input(),solvers, step_size);
back_propagate_error(x, private_get_gradient_input());
}
template <typename solver_type>
void update(const tensor& x, const tensor& gradient_input, sstack<solver_type> solvers, double step_size)
void back_propagate_error(const tensor& x, const tensor& gradient_input)
{
DLIB_CASSERT(solvers.size()>=num_computational_layers,"");
// make sure grad_final is initialized to 0
if (!have_same_dimensions(x, grad_final))
grad_final.copy_size(x);
......@@ -1138,15 +1149,27 @@ namespace dlib
impl::call_layer_backward(details, private_get_output(),
gradient_input, wsub, static_cast<tensor&>(params_grad));
// zero out get_gradient_input()
gradient_input_is_stale = true;
}
template <typename solver_type>
void update_parameters(sstack<solver_type> solvers, double step_size)
{
DLIB_CASSERT(solvers.size()>=num_computational_layers,"");
// Don't try to adjust the parameters if this layer doesn't have any.
if (params_grad.size() != 0)
{
if (params_grad.size() != 0) {
const tensor& step = solvers.top()(details.get_layer_params(), static_cast<const tensor&>(params_grad));
tt::add(1,details.get_layer_params(), step_size, step);
}
gradient_input_is_stale = true;
}
const tensor& get_parameter_gradient(
) const { return params_grad; }
tensor& get_parameter_gradient (
) { return params_grad; }
const subnet_type& subnet() const { return input_layer; }
subnet_type& subnet() { return input_layer; }
......@@ -1347,18 +1370,27 @@ namespace dlib
const tensor& get_final_data_gradient(
) const { return subnetwork.get_final_data_gradient(); }
template <typename solver_type>
void update(const tensor& x, sstack<solver_type> solvers, double step_size)
void back_propagate_error(const tensor& x)
{
subnetwork.back_propagate_error(x);
}
void back_propagate_error(const tensor& x, const tensor& gradient_input)
{
subnetwork.update(x,solvers, step_size);
subnetwork.back_propagate_error(x,gradient_input);
}
template <typename solver_type>
void update(const tensor& x, const tensor& gradient_input, sstack<solver_type> solvers, double step_size)
void update_parameters(sstack<solver_type> solvers, double step_size)
{
subnetwork.update(x,gradient_input,solvers, step_size);
subnetwork.update_parameters(solvers, step_size);
}
const tensor& get_parameter_gradient(
) const { return params_grad; }
tensor& get_parameter_gradient (
) { return params_grad; }
const subnet_type& subnet() const { return subnetwork; }
subnet_type& subnet() { return subnetwork; }
......@@ -1430,6 +1462,11 @@ namespace dlib
{ return subnetwork.private_get_gradient_input(); }
subnet_type subnetwork;
// This member doesn't logically contribute to the state of the object since it is
// always empty. It's just here so we can have the get_parameter_gradient() methods
// which have to return something. So they return this empty tensor.
resizable_tensor params_grad;
};
// ----------------------------------------------------------------------------------------
......@@ -1598,32 +1635,42 @@ namespace dlib
return details[0].get_gradient_input();
}
template <typename solver_type>
void update(const tensor& x, sstack<solver_type> solvers, double step_size)
const tensor& get_parameter_gradient(
) const { return details[0].get_parameter_gradient(); }
tensor& get_parameter_gradient (
) { return details[0].get_parameter_gradient(); }
void back_propagate_error(const tensor& x)
{
update(x,private_get_gradient_input(),solvers,step_size);
back_propagate_error(x, private_get_gradient_input());
}
template <typename solver_type>
void update(const tensor& x, const tensor& gradient_input, sstack<solver_type> solvers, double step_size)
void back_propagate_error(const tensor& x, const tensor& gradient_input)
{
const auto cnt = (REPEATED_LAYER<SUBNET>::num_computational_layers-SUBNET::num_computational_layers);
if (details.size() > 1)
{
details[0].update(details[1].get_output(), gradient_input, solvers,step_size);
details[0].back_propagate_error(details[1].get_output(), gradient_input);
for (size_t i = 1; i < details.size(); ++i)
{
if (i+1 < details.size())
details[i].update(details[i+1].get_output(), details[i-1].get_final_data_gradient(), solvers.pop(cnt*i),step_size);
details[i].back_propagate_error(details[i+1].get_output(), details[i-1].get_final_data_gradient());
else
details[i].update(subnetwork.get_output(), details[i-1].get_final_data_gradient(), solvers.pop(cnt*i),step_size);
details[i].back_propagate_error(subnetwork.get_output(), details[i-1].get_final_data_gradient());
}
}
else
{
details[0].update(subnetwork.get_output(), gradient_input, solvers,step_size);
details[0].back_propagate_error(subnetwork.get_output(), gradient_input);
}
subnetwork.update(x, details.back().get_final_data_gradient(), solvers.pop(cnt*details.size()),step_size);
subnetwork.back_propagate_error(x, details.back().get_final_data_gradient());
}
template <typename solver_type>
void update_parameters(sstack<solver_type> solvers, double step_size)
{
for (size_t i = 0; i < details.size(); ++i)
details[i].update_parameters(solvers.pop(comp_layers_in_each_group*i),step_size);
subnetwork.update_parameters(solvers.pop(comp_layers_in_each_group*details.size()),step_size);
}
const subnet_type& subnet() const { return subnetwork; }
......@@ -1827,25 +1874,19 @@ namespace dlib
return grad_final;
}
template <typename solver_type>
void update(
const tensor& /*x*/,
sstack<solver_type> /*solvers*/,
double /*step_size*/
)
void back_propagate_error(const tensor& /*x*/)
{
// nothing to update
// nothing to do
}
void back_propagate_error(const tensor& /*x*/, const tensor& /*gradient_input*/)
{
// nothing to do
}
template <typename solver_type>
void update(
const tensor& /*x*/,
const tensor& /*gradient_input*/,
sstack<solver_type> /*solvers*/,
double /*step_size*/
)
void update_parameters(sstack<solver_type> /*solvers*/, double /*step_size*/)
{
// nothing to update
// nothing to do
}
const subnet_type& subnet() const { return input_layer; }
......@@ -2141,58 +2182,55 @@ namespace dlib
return compute_loss(temp_tensor);
}
template <typename label_iterator, typename solver_type>
double update (
template <typename label_iterator>
double compute_parameter_gradients (
const tensor& x,
label_iterator lbegin,
sstack<solver_type> solvers,
double step_size
label_iterator lbegin
)
{
subnetwork.forward(x);
dimpl::subnet_wrapper<subnet_type> wsub(subnetwork);
double l = loss.compute_loss(x, lbegin, wsub);
subnetwork.update(x, solvers, step_size);
subnetwork.back_propagate_error(x);
return l;
}
template <typename input_iterator, typename label_iterator, typename solver_type>
double update (
template <typename input_iterator, typename label_iterator>
double compute_parameter_gradients (
input_iterator ibegin,
input_iterator iend,
label_iterator lbegin,
sstack<solver_type> solvers,
double step_size
label_iterator lbegin
)
{
to_tensor(ibegin,iend,temp_tensor);
return update(temp_tensor, lbegin, solvers, step_size);
return compute_parameter_gradients(temp_tensor, lbegin);
}
template <typename solver_type>
double update (
const tensor& x,
sstack<solver_type> solvers,
double step_size
double compute_parameter_gradients (
const tensor& x
)
{
subnetwork.forward(x);
dimpl::subnet_wrapper<subnet_type> wsub(subnetwork);
double l = loss.compute_loss(x, wsub);
subnetwork.update(x, solvers, step_size);
subnetwork.back_propagate_error(x);
return l;
}
template <typename input_iterator, typename solver_type>
double update (
template <typename input_iterator>
double compute_parameter_gradients (
input_iterator ibegin,
input_iterator iend,
input_iterator iend
)
{
to_tensor(ibegin,iend,temp_tensor);
return compute_parameter_gradients(temp_tensor);
}
template <typename solver_type>
void update_parameters (
sstack<solver_type> solvers,
double step_size
)
{
to_tensor(ibegin,iend,temp_tensor);
return update(temp_tensor, solvers, step_size);
subnetwork.update_parameters(solvers, step_size);
}
const subnet_type& subnet() const { return subnetwork; }
......@@ -2477,18 +2515,24 @@ namespace dlib
return subnetwork.get_final_data_gradient();
}
template <typename solver_type>
void update(const tensor& x, sstack<solver_type> solvers)
void back_propagate_error(const tensor& x)
{
subnetwork.update(x,solvers);
subnetwork.back_propagate_error(x);
}
template <typename solver_type>
void update(const tensor& x, const tensor& gradient_input, sstack<solver_type> solvers)
void update_parameters(sstack<solver_type> solvers, double step_size)
{
subnetwork.update(x,gradient_input,solvers);
subnetwork.update_parameters(solvers, step_size);
}
const tensor& get_parameter_gradient(
) const { return params_grad; }
tensor& get_parameter_gradient (
) { return params_grad; }
const subnet_type& subnet() const
{
return subnetwork;
......@@ -2558,6 +2602,11 @@ namespace dlib
{ return layer<TAG_TYPE>(subnetwork).private_get_gradient_input(); }
subnet_type subnetwork;
// This member doesn't logically contribute to the state of the object since it is
// always empty. It's just here so we can have the get_parameter_gradient() methods
// which have to return something. So they return this empty tensor.
resizable_tensor params_grad;
};
template <template<typename> class T, typename U>
struct is_nonloss_layer_type<add_skip_layer<T,U>> : std::true_type {};
......
......@@ -410,33 +410,53 @@ namespace dlib
/*!
ensures
- returns the error gradient for this network. That is, this is the error
gradient that this network will use to update itself when update() is
called. Therefore, when performing back propagation, layers that sit on
top of this network layer write their back propagated error gradients
into get_gradient_input(). Or to put it another way, during back
propagation, layers take the contents of their get_gradient_input() and
back propagate it through themselves and store the results into their
subnetwork's get_gradient_input().
gradient that this network will use to compute parameter gradients when
back_propagate_error() is called. Therefore, when performing back
propagation, layers that sit on top of this network layer write their
back-propagated error gradients into get_gradient_input(). Or to put it
another way, during back-propagation, layers take the contents of their
get_gradient_input() and back-propagate it through themselves and store
the result into their subnetwork's get_gradient_input().
This means you should consider get_gradient_input() as an input to the
update() method.
back_propagate_error() method.
!*/
const tensor& get_final_data_gradient(
) const;
/*!
ensures
- if update() has been called to back-propagate a gradient through this
network then you can call get_final_data_gradient() to obtain the last
gradient computed. That is, this function returns the gradient of the
network with respect to its inputs.
- if back_propagate_error() has been called to back-propagate a gradient
through this network then you can call get_final_data_gradient() to
obtain the last data gradient computed. That is, this function returns
the gradient of the network with respect to its inputs.
- Note that there is only one "final data gradient" for an entire network,
not one per layer, since there is only one input to the entire network.
!*/
template <typename solver_type>
void update(
const tensor& x,
sstack<solver_type> solvers,
double step_size
const tensor& get_parameter_gradient(
) const;
/*!
ensures
- if back_propagate_error() has been called then you can call
get_parameter_gradient() to find the gradient of this layer's parameters.
When we update the parameters by calling update_parameters(), it will use
the gradient in get_parameter_gradient() to perform the update.
Therefore, you should consider get_parameter_gradient() as an input to
update_parameters().
!*/
tensor& get_parameter_gradient (
);
/*!
ensures
- returns a non-const reference to the tensor returned by the above
get_parameter_gradient() method. You could use this method to modify the
parameter gradient in some way before invoking update_parameters().
!*/
void back_propagate_error(
const tensor& x
);
/*!
requires
......@@ -445,28 +465,21 @@ namespace dlib
subsequently modified in any way.
- get_gradient_input() has been set equal to the gradient of this network's
output with respect to some loss function.
- The given solvers have only ever been used with this network. That
is, if you want to call update() on some other neural network object then
you must NOT reuse the same solvers object.
- solvers.size() >= num_computational_layers
- 0 < step_size <= 1
ensures
- Back propagates the error gradient, get_gradient_input(), through this
network and uses the provided solvers to update the network parameters.
- The parameter delta vector output by the solvers is multiplied by
step_size before being added to the parameters.
network and computes parameter and data gradients, via backpropagation.
Specifically, this function populates get_final_data_gradient() and also,
for each layer, the tensor returned by get_parameter_gradient().
- All elements of #get_gradient_input() are set to 0.
- have_same_dimensions(#get_final_data_gradient(), x) == true
- have_same_dimensions(#get_final_data_gradient(), x) == true.
- have_same_dimensions(#get_parameter_gradient(), layer_details().get_layer_params()) == true.
- #get_final_data_gradient() contains the gradient of the network with
respect to x.
!*/
template <typename solver_type>
void update(
void back_propagate_error(
const tensor& x,
const tensor& gradient_input,
sstack<solver_type> solvers,
double step_size
const tensor& gradient_input
);
/*!
requires
......@@ -474,27 +487,45 @@ namespace dlib
Moreover, this was the most recent call to forward() and x has not been
subsequently modified in any way.
- have_same_dimensions(gradient_input, get_output()) == true
- The given solvers have only ever been used with this network. That
is, if you want to call update() on some other neural network object then
you must NOT reuse the same solvers object.
- solvers.size() >= num_computational_layers
- 0 < step_size <= 1
ensures
- This function is identical to the version of update() defined immediately
above except that it back-propagates gradient_input through the network
instead of get_gradient_input(). Therefore, this version of update is
equivalent to performing:
- This function is identical to the version of back_propagate_error()
defined immediately above except that it back-propagates gradient_input
through the network instead of get_gradient_input(). Therefore, this
version of back_propagate_error() is equivalent to performing:
get_gradient_input() = gradient_input;
update(x,solvers);
Except that calling update(x,gradient_input,solvers) avoids the copy
and is therefore slightly more efficient.
- The parameter delta vector output by the solvers is multiplied by
step_size before being added to the parameters.
back_propagate_error(x);
Except that calling back_propagate_error(x,gradient_input) avoids the
copy and is therefore slightly more efficient.
- All elements of #get_gradient_input() are set to 0.
- have_same_dimensions(#get_final_data_gradient(), x) == true.
- have_same_dimensions(#get_parameter_gradient(), layer_details().get_layer_params()) == true.
- #get_final_data_gradient() contains the gradient of the network with
respect to x.
!*/
template <typename solver_type>
void update_parameters(
sstack<solver_type> solvers,
double step_size
);
/*!
requires
- solver_type is an implementation of the EXAMPLE_SOLVER interface defined
in solvers_abstract.h
- back_propagate_error() has been called.
- The given solvers have only ever been used with this network. That is,
if you want to call update_parameters() on some other neural network
object then you must NOT reuse the same solvers object.
- solvers.size() >= num_computational_layers
- 0 < step_size <= 1
ensures
- Updates all the parameters in the network. In particular, we pass each
layer's parameter gradient (i.e. the tensor returned by the layer's
get_parameter_gradient() member) through that layer's corresponding
solver object. This produces a parameter delta vector and we add
step_size times that vector to the layer's parameters.
!*/
void clean(
);
/*!
......@@ -831,12 +862,10 @@ namespace dlib
// -------------
template <typename label_iterator, typename solver_type>
double update (
template <typename label_iterator>
double compute_parameter_gradients (
const tensor& x,
label_iterator lbegin,
sstack<solver_type> solvers,
double step_size
label_iterator lbegin
);
/*!
requires
......@@ -844,31 +873,22 @@ namespace dlib
- x.num_samples() > 0
- lbegin == iterator pointing to the start of a range of
x.num_samples()/sample_expansion_factor label_type elements.
- The given solvers have only ever been used with this network. That
is, if you want to call update() on some other neural network object then
you must NOT reuse the same solvers object.
- solvers.size() >= num_computational_layers
- 0 < step_size <= 1
ensures
- runs x through the network, compares the output to the expected output
pointed to by lbegin, and updates the network parameters via
backpropagation.
pointed to by lbegin, and computes parameter and data gradients with
respect to the loss, via backpropagation. Specifically, this function
updates get_final_data_gradient() and also, for each layer, the tensor
returned by get_parameter_gradient().
- for all valid k:
- the expected label of the kth sample in x is *(lbegin+k/sample_expansion_factor).
- The provided solvers are used to update the parameters in each layer of
the network.
- The parameter delta vector output by the solvers is multiplied by
step_size before being added to the parameters.
- returns compute_loss(x,lbegin)
!*/
template <typename input_iterator, typename label_iterator, typename solver_type>
double update (
template <typename input_iterator, typename label_iterator>
double compute_parameter_gradients (
input_iterator ibegin,
input_iterator iend,
label_iterator lbegin,
sstack<solver_type> solvers,
double step_size
label_iterator lbegin
);
/*!
requires
......@@ -876,77 +896,72 @@ namespace dlib
- std::distance(ibegin,iend) > 0
- lbegin == iterator pointing to the start of a range of
std::distance(ibegin,iend) label_type elements.
- The given solvers have only ever been used with this network. That
is, if you want to call update() on some other neural network object then
you must NOT reuse the same solvers object.
- solvers.size() >= num_computational_layers
- 0 < step_size <= 1
ensures
- runs [ibegin,iend) through the network, compares the output to the
expected output pointed to by lbegin, and updates the network parameters
via backpropagation.
expected output pointed to by lbegin, and computes parameter and data
gradients with respect to the loss, via backpropagation. Specifically,
this function updates get_final_data_gradient() and also, for each layer,
the tensor returned by get_parameter_gradient().
- for all valid k:
- the expected label of *(ibegin+k) is *(lbegin+k).
- The provided solvers are used to update the parameters in each layer of
the network.
- The parameter delta vector output by the solvers is multiplied by
step_size before being added to the parameters.
- returns compute_loss(ibegin,iend,lbegin)
!*/
// -------------
template <typename solver_type>
double update (
const tensor& x,
sstack<solver_type> solvers,
double step_size
double compute_parameter_gradients (
const tensor& x
);
/*!
requires
- LOSS_DETAILS is an unsupervised loss. i.e. label_type==no_label_type.
- x.num_samples()%sample_expansion_factor == 0
- x.num_samples() > 0
- The given solvers have only ever been used with this network. That
is, if you want to call update() on some other neural network object then
you must NOT reuse the same solvers object.
- solvers.size() >= num_computational_layers
- 0 < step_size <= 1
ensures
- runs x through the network and updates the network parameters by
back-propagating the loss gradient through the network.
- The provided solvers are used to update the parameters in each layer of
the network.
- The parameter delta vector output by the solvers is multiplied by
step_size before being added to the parameters.
- runs x through the network and computes parameter and data gradients with
respect to the loss, via backpropagation. Specifically, this function
updates get_final_data_gradient() and also, for each layer, the tensor
returned by get_parameter_gradient().
- returns compute_loss(x)
!*/
template <typename input_iterator, typename solver_type>
double update (
template <typename input_iterator>
double compute_parameter_gradients (
input_iterator ibegin,
input_iterator iend,
sstack<solver_type> solvers,
double step_size
input_iterator iend
);
/*!
requires
- LOSS_DETAILS is an unsupervised loss. i.e. label_type==no_label_type.
- [ibegin, iend) is an iterator range over input_type objects.
- std::distance(ibegin,iend) > 0
ensures
- runs [ibegin,iend) through the network and computes parameter and data
gradients with respect to the loss, via backpropagation. Specifically,
this function updates get_final_data_gradient() and also, for each layer,
the tensor returned by get_parameter_gradient().
- returns compute_loss(ibegin,iend)
!*/
template <typename solver_type>
void update_parameters (
sstack<solver_type> solvers,
double step_size
);
/*!
requires
- solver_type is an implementation of the EXAMPLE_SOLVER interface defined
in solvers_abstract.h
- compute_parameter_gradients() has been called.
- The given solvers have only ever been used with this network. That
is, if you want to call update() on some other neural network object then
you must NOT reuse the same solvers object.
is, if you want to call update_parameters() on some other neural network
object then you must NOT reuse the same solvers object.
- solvers.size() >= num_computational_layers
- 0 < step_size <= 1
ensures
- runs [ibegin,iend) through the network and updates the network parameters
by back-propagating the loss gradient through the network.
- The provided solvers are used to update the parameters in each layer of
the network.
- The parameter delta vector output by the solvers is multiplied by
step_size before being added to the parameters.
- returns compute_loss(ibegin,iend)
- Updates all the parameters in the network. In particular, we pass each
layer's parameter gradient (i.e. the tensor returned by the layer's
get_parameter_gradient() member) through that layer's corresponding
solver object. This produces a parameter delta vector and we add
step_size times that vector to the layer's parameters.
!*/
// -------------
......
......@@ -418,14 +418,16 @@ namespace dlib
template <typename T>
void run_update(job_t& next_job, const T&)
{
double loss = net.update(next_job.t, next_job.labels.begin(), make_sstack(solvers),step_size);
double loss = net.compute_parameter_gradients(next_job.t, next_job.labels.begin());
net.update_parameters(make_sstack(solvers),step_size);
record_loss(loss);
}
void run_update(job_t& next_job, const no_label_type&)
{
no_label_type pick_which_run_update;
double loss = net.update(next_job.t, make_sstack(solvers), step_size);
double loss = net.compute_parameter_gradients(next_job.t);
net.update_parameters(make_sstack(solvers), step_size);
record_loss(loss);
}
......@@ -438,8 +440,9 @@ namespace dlib
job_t next_job;
while(job_pipe.dequeue(next_job))
{
// call net.update() but pick the right version for unsupervised or
// supervised training based on the type of label_type.
// call net.compute_parameter_gradients() and net.update_parameters() but
// pick the right version for unsupervised or supervised training based on
// the type of label_type.
run_update(next_job, pick_which_run_update);
// If we have been running for a while then check if the loss is still
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment