Commit 4dbe3337 authored by Dennis Francis's avatar Dennis Francis Committed by Davis E. King

feature_addition : Mean squared loss layer for multiple output (#404) (#427)

* feature_addition : Mean squared loss layer for multiple output (#404)

* Added loss_mean_squared_multioutput layer to support multiple outputs.
* Also added a corresponding test case to test a single variable regression
  with multiple outputs.

* Added error checks on truth argument

Added assert statements to check that truth argument in
compute_loss_value_and_gradient() method contains matrices
of correct dimension relative to the output tensor's size.
Also the requirements on argument truth to the abstract
documentation.
parent 60092335
......@@ -1184,6 +1184,125 @@ namespace dlib
template <typename SUBNET>
using loss_mean_squared = add_loss_layer<loss_mean_squared_, SUBNET>;
// ----------------------------------------------------------------------------------------
class loss_mean_squared_multioutput_
{
public:
typedef matrix<float> training_label_type;
typedef matrix<float> output_label_type;
template <
typename SUB_TYPE,
typename label_iterator
>
void to_label (
const tensor& input_tensor,
const SUB_TYPE& sub,
label_iterator iter
) const
{
DLIB_CASSERT(sub.sample_expansion_factor() == 1);
const tensor& output_tensor = sub.get_output();
DLIB_CASSERT(output_tensor.nr() == 1 &&
output_tensor.nc() == 1)
DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples());
const float* out_data = output_tensor.host();
for (long i = 0; i < output_tensor.num_samples(); ++i)
{
*iter++ = mat(out_data, output_tensor.k(), 1);
out_data += output_tensor.k();
}
}
template <
typename const_label_iterator,
typename SUBNET
>
double compute_loss_value_and_gradient (
const tensor& input_tensor,
const_label_iterator truth,
SUBNET& sub
) const
{
const tensor& output_tensor = sub.get_output();
tensor& grad = sub.get_gradient_input();
DLIB_CASSERT(sub.sample_expansion_factor() == 1);
DLIB_CASSERT(input_tensor.num_samples() != 0);
DLIB_CASSERT(input_tensor.num_samples()%sub.sample_expansion_factor() == 0);
DLIB_CASSERT(input_tensor.num_samples() == grad.num_samples());
DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples());
DLIB_CASSERT(output_tensor.nr() == 1 &&
output_tensor.nc() == 1);
DLIB_CASSERT(grad.nr() == 1 &&
grad.nc() == 1);
DLIB_CASSERT(grad.k() == output_tensor.k());
const long k = output_tensor.k();
for (long idx = 0; idx < output_tensor.num_samples(); ++idx)
{
const_label_iterator truth_matrix_ptr = (truth + idx);
DLIB_CASSERT((*truth_matrix_ptr).nr() == k &&
(*truth_matrix_ptr).nc() == 1);
}
// The loss we output is the average loss over the mini-batch.
const double scale = 1.0/output_tensor.num_samples();
double loss = 0;
float* g = grad.host_write_only();
const float* out_data = output_tensor.host();
matrix<float> ytrue;
for (long i = 0; i < output_tensor.num_samples(); ++i)
{
ytrue = *truth++;
for (long j = 0; j < output_tensor.k(); ++j)
{
const float y = ytrue(j, 0);
const float temp1 = y - *out_data++;
const float temp2 = scale*temp1;
loss += 0.5*temp2*temp1;
*g = -temp2;
++g;
}
}
return loss;
}
friend void serialize(const loss_mean_squared_multioutput_& , std::ostream& out)
{
serialize("loss_mean_squared_multioutput_", out);
}
friend void deserialize(loss_mean_squared_multioutput_& , std::istream& in)
{
std::string version;
deserialize(version, in);
if (version != "loss_mean_squared_multioutput_")
throw serialization_error("Unexpected version found while deserializing dlib::loss_mean_squared_.");
}
friend std::ostream& operator<<(std::ostream& out, const loss_mean_squared_multioutput_& )
{
out << "loss_mean_squared_multioutput";
return out;
}
friend void to_xml(const loss_mean_squared_multioutput_& /*item*/, std::ostream& out)
{
out << "<loss_mean_squared_multioutput/>";
}
};
template <typename SUBNET>
using loss_mean_squared_multioutput = add_loss_layer<loss_mean_squared_multioutput_, SUBNET>;
// ----------------------------------------------------------------------------------------
}
......
......@@ -677,6 +677,65 @@ namespace dlib
template <typename SUBNET>
using loss_mean_squared = add_loss_layer<loss_mean_squared_, SUBNET>;
// ----------------------------------------------------------------------------------------
class loss_mean_squared_multioutput_
{
/*!
WHAT THIS OBJECT REPRESENTS
This object implements the loss layer interface defined above by
EXAMPLE_LOSS_LAYER_. In particular, it implements the mean squared loss, which is
appropriate for regression problems.
!*/
public:
typedef matrix<float> training_label_type;
typedef matrix<float> output_label_type;
template <
typename SUB_TYPE,
typename label_iterator
>
void to_label (
const tensor& input_tensor,
const SUB_TYPE& sub,
label_iterator iter
) const;
/*!
This function has the same interface as EXAMPLE_LOSS_LAYER_::to_label() except
it has the additional calling requirements that:
- sub.get_output().nr() == 1
- sub.get_output().nc() == 1
- sub.get_output().num_samples() == input_tensor.num_samples()
- sub.sample_expansion_factor() == 1
and the output label is the predicted continuous variable.
!*/
template <
typename const_label_iterator,
typename SUBNET
>
double compute_loss_value_and_gradient (
const tensor& input_tensor,
const_label_iterator truth,
SUBNET& sub
) const;
/*!
This function has the same interface as EXAMPLE_LOSS_LAYER_::compute_loss_value_and_gradient()
except it has the additional calling requirements that:
- sub.get_output().nr() == 1
- sub.get_output().nc() == 1
- sub.get_output().num_samples() == input_tensor.num_samples()
- sub.sample_expansion_factor() == 1
- (*(truth + idx)).nc() == 1 for all idx such that 0 <= idx < sub.get_output().num_samples()
- (*(truth + idx)).nr() == sub.get_output().k() for all idx such that 0 <= idx < sub.get_output().num_samples()
!*/
};
template <typename SUBNET>
using loss_mean_squared_multioutput = add_loss_layer<loss_mean_squared_multioutput_, SUBNET>;
// ----------------------------------------------------------------------------------------
}
......
......@@ -1781,6 +1781,67 @@ namespace
}
// ----------------------------------------------------------------------------------------
void test_multioutput_linear_regression()
{
const int num_outputs = 2;
::std::vector<matrix<double>> x(100);
::std::vector<matrix<float>> y(100);
::std::default_random_engine generator(16);
::std::normal_distribution<float> distribution(0,5);
::std::normal_distribution<float> slope_distribution(10,5);
::std::normal_distribution<float> intercept_distribution(50,10);
::std::vector<float> true_intercepts(num_outputs);
::std::vector<float> true_slopes(num_outputs);
for ( int jj = 0; jj < num_outputs; ++jj )
{
true_slopes[jj] = slope_distribution(generator);
true_intercepts[jj] = intercept_distribution(generator);
}
matrix<float> ytmp(num_outputs, 1);
for ( int ii = 0; ii < 100; ++ii )
{
const double val = static_cast<double>(ii);
matrix<double> tmp(1,1);
tmp = val;
x[ii] = tmp;
for ( int jj = 0; jj < num_outputs; ++jj )
ytmp(jj, 0) = (true_intercepts[jj] + true_slopes[jj]*static_cast<float>(val) + distribution(generator));
y[ii] = ytmp;
}
using net_type = loss_mean_squared_multioutput<fc<num_outputs, input<matrix<double>>>>;
net_type net;
layer<1>(net).layer_details().set_bias_learning_rate_multiplier(900);
sgd defsolver;
dnn_trainer<net_type> trainer(net, defsolver);
trainer.set_learning_rate(0.000015);
trainer.set_mini_batch_size(50);
trainer.set_max_num_epochs(170);
trainer.train(x, y);
float slope_error = 0.0;
float intercept_error = 0.0;
const float eps_slope = 0.5, eps_intercept = 2.0;
for ( int jj = 0; jj < num_outputs; ++jj )
{
slope_error += abs(layer<1>(net).layer_details().get_weights().host()[jj] - true_slopes[jj]);
intercept_error += abs(layer<1>(net).layer_details().get_biases().host()[jj] - true_intercepts[jj]);
}
slope_error /= float(num_outputs);
intercept_error /= float(num_outputs);
DLIB_TEST_MSG(slope_error <= eps_slope,
"Average absolute slope error = " << slope_error << " Error limit = " << eps_slope);
DLIB_TEST_MSG(intercept_error <= eps_intercept,
"Average absolute intercept error = " << intercept_error << " Error limit = " << eps_intercept);
}
// ----------------------------------------------------------------------------------------
class dnn_tester : public tester
......@@ -1849,6 +1910,7 @@ namespace
test_copy_tensor_cpu();
test_concat();
test_simple_linear_regression();
test_multioutput_linear_regression();
}
void perform_test()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment