Added loss_epsilon_insensitive_ layer

2b0a4a6f · Davis King · 6137540b · 2b0a4a6f · 2b0a4a6f · 2b0a4a6f
Commit 2b0a4a6f authored Nov 10, 2017 by Davis King
Hide whitespace changes
Inline Side-by-side

Showing with 279 additions and 0 deletions

loss.h dlib/dnn/loss.h +131 -0

loss_abstract.h dlib/dnn/loss_abstract.h +100 -0

dnn.cpp dlib/test/dnn.cpp +48 -0

No files found.
--- a/dlib/dnn/loss.h
+++ b/dlib/dnn/loss.h
@@ -1677,6 +1677,137 @@ namespace dlib
    template <typename SUBNET>
    using loss_mean_squared = add_loss_layer<loss_mean_squared_, SUBNET>;

+// ----------------------------------------------------------------------------------------
+
+    class loss_epsilon_insensitive_
+    {
+    public:
+
+        typedef float training_label_type;
+        typedef float output_label_type;
+
+        loss_epsilon_insensitive_() = default;
+        loss_epsilon_insensitive_(double eps) : eps(eps) 
+        {
+            DLIB_CASSERT(eps >= 0, "You can't set a negative error epsilon.");
+        }
+
+        double get_epsilon () const { return eps; }
+        void set_epsilon(double e)
+        {
+            DLIB_CASSERT(e >= 0, "You can't set a negative error epsilon.");
+            eps = e;
+        }
+
+        template <
+            typename SUB_TYPE,
+            typename label_iterator
+            >
+        void to_label (
+            const tensor& input_tensor,
+            const SUB_TYPE& sub,
+            label_iterator iter
+        ) const
+        {
+            DLIB_CASSERT(sub.sample_expansion_factor() == 1);
+
+            const tensor& output_tensor = sub.get_output();
+
+            DLIB_CASSERT(output_tensor.nr() == 1 &&
+                         output_tensor.nc() == 1 &&
+                         output_tensor.k() == 1);
+            DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples());
+
+            const float* out_data = output_tensor.host();
+            for (long i = 0; i < output_tensor.num_samples(); ++i)
+            {
+                *iter++ = out_data[i];
+            }
+        }
+
+
+        template <
+            typename const_label_iterator,
+            typename SUBNET
+            >
+        double compute_loss_value_and_gradient (
+            const tensor& input_tensor,
+            const_label_iterator truth,
+            SUBNET& sub
+        ) const
+        {
+            const tensor& output_tensor = sub.get_output();
+            tensor& grad = sub.get_gradient_input();
+
+            DLIB_CASSERT(sub.sample_expansion_factor() == 1);
+            DLIB_CASSERT(input_tensor.num_samples() != 0);
+            DLIB_CASSERT(input_tensor.num_samples()%sub.sample_expansion_factor() == 0);
+            DLIB_CASSERT(input_tensor.num_samples() == grad.num_samples());
+            DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples());
+            DLIB_CASSERT(output_tensor.nr() == 1 &&
+                         output_tensor.nc() == 1 &&
+                         output_tensor.k() == 1);
+            DLIB_CASSERT(grad.nr() == 1 &&
+                         grad.nc() == 1 &&
+                         grad.k() == 1);
+
+            // The loss we output is the average loss over the mini-batch.
+            const double scale = 1.0/output_tensor.num_samples();
+            double loss = 0;
+            float* g = grad.host_write_only();
+            const float* out_data = output_tensor.host();
+            for (long i = 0; i < output_tensor.num_samples(); ++i)
+            {
+                const float y = *truth++;
+                const float err = out_data[i]-y;
+                if (err > eps)
+                {
+                    loss += scale*(err-eps);
+                    g[i] = scale;
+                }
+                else if (err < -eps)
+                {
+                    loss += scale*(eps-err);
+                    g[i] = -scale;
+                }
+            }
+            return loss;
+        }
+
+        friend void serialize(const loss_epsilon_insensitive_& item, std::ostream& out)
+        {
+            serialize("loss_epsilon_insensitive_", out);
+            serialize(item.eps, out);
+        }
+
+        friend void deserialize(loss_epsilon_insensitive_& item, std::istream& in)
+        {
+            std::string version;
+            deserialize(version, in);
+            if (version != "loss_epsilon_insensitive_")
+                throw serialization_error("Unexpected version found while deserializing dlib::loss_epsilon_insensitive_.");
+            deserialize(item.eps, in);
+        }
+
+        friend std::ostream& operator<<(std::ostream& out, const loss_epsilon_insensitive_& item)
+        {
+            out << "loss_epsilon_insensitive epsilon: " << item.eps;
+            return out;
+        }
+
+        friend void to_xml(const loss_epsilon_insensitive_& item, std::ostream& out)
+        {
+            out << "<loss_epsilon_insensitive_ epsilon='" << item.eps << "'/>";
+        }
+
+    private:
+        double eps = 1;
+
+    };
+
+    template <typename SUBNET>
+    using loss_epsilon_insensitive = add_loss_layer<loss_epsilon_insensitive_, SUBNET>;
+
 // ----------------------------------------------------------------------------------------

    class loss_mean_squared_multioutput_

--- a/dlib/dnn/loss_abstract.h
+++ b/dlib/dnn/loss_abstract.h
@@ -779,6 +779,106 @@ namespace dlib
    template <typename SUBNET>
    using loss_ranking = add_loss_layer<loss_ranking_, SUBNET>;

+// ----------------------------------------------------------------------------------------
+
+    class loss_epsilon_insensitive_
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object implements the loss layer interface defined above by
+                EXAMPLE_LOSS_LAYER_.  In particular, it implements the epsilon insensitive
+                loss, which is appropriate for regression problems.  In particular, this
+                loss function is;
+                    loss(y1,y2) = abs(y1-y2)<epsilon ? 0 : abs(y1-y2)-epsilon
+
+                Therefore, the loss is basically just the abs() loss except there is a dead
+                zone around zero, causing the loss to not care about mistakes of magnitude
+                smaller than epsilon.
+        !*/
+    public:
+
+        typedef float training_label_type;
+        typedef float output_label_type;
+
+        loss_epsilon_insensitive_(
+        ) = default;
+        /*!
+            ensures
+                - #get_epsilon() == 1
+        !*/
+
+        loss_epsilon_insensitive_(
+            double eps
+        );
+        /*!
+            requires
+                - eps >= 0
+            ensures
+                - #get_epsilon() == eps
+        !*/
+
+        double get_epsilon (
+        ) const;
+        /*!
+            ensures
+                - returns the epsilon value used in the loss function.  Mistakes in the
+                  regressor smaller than get_epsilon() are ignored by the loss function.
+        !*/
+
+        void set_epsilon(
+            double eps
+        );
+        /*!
+            requires
+                - eps >= 0
+            ensures
+                - #get_epsilon() == eps
+        !*/
+
+        template <
+            typename SUB_TYPE,
+            typename label_iterator
+            >
+        void to_label (
+            const tensor& input_tensor,
+            const SUB_TYPE& sub,
+            label_iterator iter
+        ) const;
+        /*!
+            This function has the same interface as EXAMPLE_LOSS_LAYER_::to_label() except
+            it has the additional calling requirements that:
+                - sub.get_output().nr() == 1
+                - sub.get_output().nc() == 1
+                - sub.get_output().k() == 1
+                - sub.get_output().num_samples() == input_tensor.num_samples()
+                - sub.sample_expansion_factor() == 1
+            and the output label is the predicted continuous variable.
+        !*/
+
+        template <
+            typename const_label_iterator,
+            typename SUBNET
+            >
+        double compute_loss_value_and_gradient (
+            const tensor& input_tensor,
+            const_label_iterator truth,
+            SUBNET& sub
+        ) const;
+        /*!
+            This function has the same interface as EXAMPLE_LOSS_LAYER_::compute_loss_value_and_gradient()
+            except it has the additional calling requirements that:
+                - sub.get_output().nr() == 1
+                - sub.get_output().nc() == 1
+                - sub.get_output().k() == 1
+                - sub.get_output().num_samples() == input_tensor.num_samples()
+                - sub.sample_expansion_factor() == 1
+        !*/
+
+    };
+
+    template <typename SUBNET>
+    using loss_epsilon_insensitive = add_loss_layer<loss_epsilon_insensitive_, SUBNET>;
+
 // ----------------------------------------------------------------------------------------

    class loss_mean_squared_

--- a/dlib/test/dnn.cpp
+++ b/dlib/test/dnn.cpp
@@ -2116,6 +2116,53 @@ namespace

    }

+// ----------------------------------------------------------------------------------------
+
+    void test_simple_linear_regression_eil()
+    {
+        print_spinner();
+        const int num_samples = 1000;
+        ::std::vector<matrix<double>> x(num_samples);
+        ::std::vector<float> y(num_samples);
+        ::std::default_random_engine generator(16);
+        ::std::normal_distribution<float> distribution(0,0.0001);
+        const float true_intercept = 50.0;
+        const float true_slope = 10.0;
+        for ( int ii = 0; ii < num_samples; ++ii )
+        {
+            const double val = static_cast<double>(ii)/10;
+            matrix<double> tmp(1,1);
+            tmp = val;
+            x[ii] = tmp;
+            y[ii] = (true_intercept + true_slope*static_cast<float>(val) + distribution(generator));
+        }
+
+        using net_type = loss_epsilon_insensitive<fc<1, input<matrix<double>>>>;
+        net_type net(0.01);
+        layer<1>(net).layer_details().set_bias_learning_rate_multiplier(300);
+        sgd defsolver(0,0.9);
+        dnn_trainer<net_type> trainer(net, defsolver);
+        trainer.set_learning_rate(1e-5);
+        trainer.set_min_learning_rate(1e-8);
+        trainer.set_mini_batch_size(50);
+        trainer.set_max_num_epochs(570);
+        trainer.train(x, y);
+
+        const float slope = layer<1>(net).layer_details().get_weights().host()[0];
+        const float slope_error = abs(true_slope - slope);
+        const float intercept = layer<1>(net).layer_details().get_biases().host()[0];
+        const float intercept_error = abs(true_intercept - intercept);
+        const float eps_slope = 0.01, eps_intercept = 0.1;
+
+        dlog << LINFO << "slope_error: "<< slope_error;
+        dlog << LINFO << "intercept_error: "<< intercept_error;
+        DLIB_TEST_MSG(slope_error <= eps_slope,
+                      "Expected slope = " << true_slope << " Estimated slope = " << slope << " Error limit = " << eps_slope);
+        DLIB_TEST_MSG(intercept_error <= eps_intercept,
+                      "Expected intercept = " << true_intercept << " Estimated intercept = " << intercept << " Error limit = " << eps_intercept);
+
+    }
+
 // ----------------------------------------------------------------------------------------

    void test_simple_linear_regression_with_mult_prev()
@@ -2950,6 +2997,7 @@ namespace
            test_copy_tensor_add_to_cpu();
            test_concat();
            test_simple_linear_regression();
+            test_simple_linear_regression_eil();
            test_simple_linear_regression_with_mult_prev();
            test_multioutput_linear_regression();
            test_simple_autoencoder();