feature_addition : Added a mean squared loss layer to DNN

Added mean squared loss layer "loss_mean_squared" to DNN as requested in https://github.com/davisking/dlib/issues/152 Also added test case of a simple linear regression with one variable that uses this layer.

feature_addition : Added a mean squared loss layer to DNN
Added mean squared loss layer "loss_mean_squared" to DNN as requested in https://github.com/davisking/dlib/issues/152 Also added test case of a simple linear regression with one variable that uses this layer.
cd4b62b4 · Dennis Francis · 2c8b4864 · cd4b62b4 · cd4b62b4 · cd4b62b4
Commit cd4b62b4 authored Nov 23, 2016 by Dennis Francis
Show whitespace changes
Inline Side-by-side

Showing with 214 additions and 0 deletions

loss.h dlib/dnn/loss.h +107 -0

loss_abstract.h dlib/dnn/loss_abstract.h +58 -0

dnn.cpp dlib/test/dnn.cpp +49 -0

No files found.
--- a/dlib/dnn/loss.h
+++ b/dlib/dnn/loss.h
@@ -1292,6 +1292,113 @@ namespace dlib
    template <typename SUBNET>
    using loss_metric_hardish = add_loss_layer<loss_metric_hardish_, SUBNET>;
+// ----------------------------------------------------------------------------------------
+    class loss_mean_squared_
+    {
+    public:
+        typedef float training_label_type;
+        typedef float output_label_type;
+        template <
+            typename SUB_TYPE,
+            typename label_iterator
+            >
+        void to_label (
+            const tensor& input_tensor,
+            const SUB_TYPE& sub,
+            label_iterator iter
+        ) const
+        {
+            DLIB_CASSERT(sub.sample_expansion_factor() == 1);
+            const tensor& output_tensor = sub.get_output();
+            DLIB_CASSERT(output_tensor.nr() == 1 &&
+                         output_tensor.nc() == 1 &&
+                         output_tensor.k() == 1);
+            DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples());
+            const float* out_data = output_tensor.host();
+            for (long i = 0; i < output_tensor.num_samples(); ++i)
+            {
+                *iter++ = out_data[i];
+            }
+        }
+        template <
+            typename const_label_iterator,
+            typename SUBNET
+            >
+        double compute_loss_value_and_gradient (
+            const tensor& input_tensor,
+            const_label_iterator truth,
+            SUBNET& sub
+        ) const
+        {
+            const tensor& output_tensor = sub.get_output();
+            tensor& grad = sub.get_gradient_input();
+            DLIB_CASSERT(sub.sample_expansion_factor() == 1);
+            DLIB_CASSERT(input_tensor.num_samples() != 0);
+            DLIB_CASSERT(input_tensor.num_samples()%sub.sample_expansion_factor() == 0);
+            DLIB_CASSERT(input_tensor.num_samples() == grad.num_samples());
+            DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples());
+            DLIB_CASSERT(output_tensor.nr() == 1 &&
+                         output_tensor.nc() == 1 &&
+                         output_tensor.k() == 1);
+            DLIB_CASSERT(grad.nr() == 1 &&
+                         grad.nc() == 1 &&
+                         grad.k() == 1);
+            // The loss we output is the average loss over the mini-batch.
+            const double scale = 1.0/output_tensor.num_samples();
+            double loss = 0;
+            float* g = grad.host_write_only();
+            const float* out_data = output_tensor.host();
+            for (long i = 0; i < output_tensor.num_samples(); ++i)
+            {
+                const float y = *truth++;
+		const float temp1 = y - out_data[i];
+		const float temp2 = scale*temp1;
+		loss += 0.5*temp2*temp1;
+		g[i] = -temp2;
+            }
+            return loss;
+        }
+        friend void serialize(const loss_mean_squared_& , std::ostream& out)
+        {
+            serialize("loss_mean_squared_", out);
+        }
+        friend void deserialize(loss_mean_squared_& , std::istream& in)
+        {
+            std::string version;
+            deserialize(version, in);
+            if (version != "loss_mean_squared_")
+                throw serialization_error("Unexpected version found while deserializing dlib::loss_mean_squared_.");
+        }
+        friend std::ostream& operator<<(std::ostream& out, const loss_mean_squared_& )
+        {
+            out << "loss_mean_squared";
+            return out;
+        }
+        friend void to_xml(const loss_mean_squared_& /*item*/, std::ostream& out)
+        {
+            out << "<loss_mean_squared/>";
+        }
+    };
+    template <typename SUBNET>
+    using loss_mean_squared = add_loss_layer<loss_mean_squared_, SUBNET>;
 // ----------------------------------------------------------------------------------------
 }

--- a/dlib/dnn/loss_abstract.h
+++ b/dlib/dnn/loss_abstract.h
@@ -527,6 +527,64 @@ namespace dlib
 // ----------------------------------------------------------------------------------------
+    class loss_mean_squared_
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object implements the loss layer interface defined above by
+                EXAMPLE_LOSS_LAYER_.  In particular, it implements the mean squared loss, which is
+                appropriate for regression problems.
+        !*/
+    public:
+        typedef float training_label_type;
+        typedef float output_label_type;
+        template <
+            typename SUB_TYPE,
+            typename label_iterator
+            >
+        void to_label (
+            const tensor& input_tensor,
+            const SUB_TYPE& sub,
+            label_iterator iter
+        ) const;
+        /*!
+            This function has the same interface as EXAMPLE_LOSS_LAYER_::to_label() except
+            it has the additional calling requirements that:
+                - sub.get_output().nr() == 1
+                - sub.get_output().nc() == 1
+                - sub.get_output().k() == 1
+                - sub.get_output().num_samples() == input_tensor.num_samples()
+                - sub.sample_expansion_factor() == 1
+            and the output label is the predicted continuous variable.
+        !*/
+        template <
+            typename const_label_iterator,
+            typename SUBNET
+            >
+        double compute_loss_value_and_gradient (
+            const tensor& input_tensor,
+            const_label_iterator truth,
+            SUBNET& sub
+        ) const;
+        /*!
+            This function has the same interface as EXAMPLE_LOSS_LAYER_::compute_loss_value_and_gradient()
+            except it has the additional calling requirements that:
+                - sub.get_output().nr() == 1
+                - sub.get_output().nc() == 1
+                - sub.get_output().k() == 1
+                - sub.get_output().num_samples() == input_tensor.num_samples()
+                - sub.sample_expansion_factor() == 1
+        !*/
+    };
+    template <typename SUBNET>
+    using loss_mean_squared = add_loss_layer<loss_mean_squared_, SUBNET>;
 }
 #endif // DLIB_DNn_LOSS_ABSTRACT_H_

--- a/dlib/test/dnn.cpp
+++ b/dlib/test/dnn.cpp
@@ -7,6 +7,7 @@
 #include <cstdlib>
 #include <ctime>
 #include <vector>
+#include <random>
 #include "../dnn.h"
 #include "tester.h"
@@ -1737,6 +1738,53 @@ namespace
        error = memcmp(g3.host(), b3g.host(), b3g.size());
        DLIB_TEST(error == 0);
    }
+// ----------------------------------------------------------------------------------------
+    void test_simple_linear_regression()
+    {
+	::std::vector<matrix<double>> x(100);
+	::std::vector<float> y(100);
+	::std::default_random_engine generator(16);
+	::std::normal_distribution<float> distribution(0,5);
+	const float true_intercept = 50.0;
+	const float true_slope = 10.0;
+	for ( int ii = 0; ii < 100; ++ii )
+	{
+	    const double val = static_cast<double>(ii);
+	    matrix<double> tmp(1,1);
+	    tmp = val;
+	    x[ii] = tmp;
+	    y[ii] = (true_intercept + true_slope*static_cast<float>(val) + distribution(generator));
+	}
+	using net_type = loss_mean_squared<
+	    fc<
+		1, input<matrix<double>>
+		>
+	    >;
+	net_type net;
+	layer<1>(net).layer_details().set_bias_learning_rate_multiplier(300);
+	sgd defsolver;
+	dnn_trainer<net_type> trainer(net, defsolver);
+	trainer.set_learning_rate(0.00001);
+	trainer.set_mini_batch_size(50);
+	trainer.set_max_num_epochs(170);
+	trainer.train(x, y);
+	const float slope = layer<1>(net).layer_details().get_weights().host()[0];
+	const float slope_error = abs(true_slope - slope);
+	const float intercept = layer<1>(net).layer_details().get_biases().host()[0];
+	const float intercept_error = abs(true_intercept - intercept);
+	const float eps_slope = 0.5, eps_intercept = 1.0;
+	DLIB_TEST_MSG(slope_error <= eps_slope,
+		      "Expected slope = " << true_slope << " Estimated slope = " << slope << " Error limit = " << eps_slope);
+	DLIB_TEST_MSG(intercept_error <= eps_intercept,
+		      "Expected intercept = " << true_intercept << " Estimated intercept = " << intercept << " Error limit = " << eps_intercept);
+    }
 // ----------------------------------------------------------------------------------------
    class dnn_tester : public tester
@@ -1804,6 +1852,7 @@ namespace
            test_visit_funcions();
            test_copy_tensor_cpu();
            test_concat();
+	    test_simple_linear_regression();
        }
        void perform_test()