Added sigmoid, tanh, and softmax layers. Also added log loss layer.

3bb2a817 · Davis King · e89d468b · 3bb2a817 · 3bb2a817 · 3bb2a817
Commit 3bb2a817 authored Nov 20, 2015 by Davis King
Hide whitespace changes
Inline Side-by-side

Showing with 467 additions and 9 deletions

layers.h dlib/dnn/layers.h +157 -1

layers_abstract.h dlib/dnn/layers_abstract.h +124 -5

loss.h dlib/dnn/loss.h +104 -0

loss_abstract.h dlib/dnn/loss_abstract.h +82 -3

No files found.
--- a/dlib/dnn/layers.h
+++ b/dlib/dnn/layers.h
@@ -177,15 +177,171 @@ namespace dlib
                throw serialization_error("Unexpected version found while deserializing dlib::relu_.");
        }

+    private:
+        resizable_tensor params;
+    };
+
+
+    template <typename SUBNET>
+    using relu = add_layer<relu_, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+    class sig_
+    {
+    public:
+        sig_() 
+        {
+        }
+
+        template <typename SUBNET>
+        void setup (const SUBNET& sub)
+        {
+        }
+
+        void forward_inplace(const tensor& input, tensor& output)
+        {
+            tt::sigmoid(output, input);
+        } 
+
+        void backward_inplace(
+            const tensor& computed_output,
+            const tensor& gradient_input, 
+            tensor& data_grad, 
+            tensor& 
+        )
+        {
+            tt::sigmoid_gradient(data_grad, computed_output, gradient_input);
+        }
+
+        const tensor& get_layer_params() const { return params; }
+        tensor& get_layer_params() { return params; }
+
+        friend void serialize(const sig_& , std::ostream& out)
+        {
+            serialize("sig_", out);
+        }
+
+        friend void deserialize(sig_& , std::istream& in)
+        {
+            std::string version;
+            deserialize(version, in);
+            if (version != "sig_")
+                throw serialization_error("Unexpected version found while deserializing dlib::sig_.");
+        }

    private:
+        resizable_tensor params;
+    };
+
+
+    template <typename SUBNET>
+    using sig = add_layer<sig_, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+    class htan_
+    {
+    public:
+        htan_() 
+        {
+        }
+
+        template <typename SUBNET>
+        void setup (const SUBNET& sub)
+        {
+        }
+
+        void forward_inplace(const tensor& input, tensor& output)
+        {
+            tt::tanh(output, input);
+        } 

+        void backward_inplace(
+            const tensor& computed_output,
+            const tensor& gradient_input, 
+            tensor& data_grad, 
+            tensor& 
+        )
+        {
+            tt::tanh_gradient(data_grad, computed_output, gradient_input);
+        }
+
+        const tensor& get_layer_params() const { return params; }
+        tensor& get_layer_params() { return params; }
+
+        friend void serialize(const htan_& , std::ostream& out)
+        {
+            serialize("htan_", out);
+        }
+
+        friend void deserialize(htan_& , std::istream& in)
+        {
+            std::string version;
+            deserialize(version, in);
+            if (version != "htan_")
+                throw serialization_error("Unexpected version found while deserializing dlib::htan_.");
+        }
+
+    private:
        resizable_tensor params;
    };


    template <typename SUBNET>
-    using relu = add_layer<relu_, SUBNET>;
+    using htan = add_layer<htan_, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+    class softmax_
+    {
+    public:
+        softmax_() 
+        {
+        }
+
+        template <typename SUBNET>
+        void setup (const SUBNET& sub)
+        {
+        }
+
+        void forward_inplace(const tensor& input, tensor& output)
+        {
+            tt::softmax(output, input);
+        } 
+
+        void backward_inplace(
+            const tensor& computed_output,
+            const tensor& gradient_input, 
+            tensor& data_grad, 
+            tensor& 
+        )
+        {
+            tt::softmax_gradient(data_grad, computed_output, gradient_input);
+        }
+
+        const tensor& get_layer_params() const { return params; }
+        tensor& get_layer_params() { return params; }
+
+        friend void serialize(const softmax_& , std::ostream& out)
+        {
+            serialize("softmax_", out);
+        }
+
+        friend void deserialize(softmax_& , std::istream& in)
+        {
+            std::string version;
+            deserialize(version, in);
+            if (version != "softmax_")
+                throw serialization_error("Unexpected version found while deserializing dlib::softmax_.");
+        }
+
+    private:
+        resizable_tensor params;
+    };
+
+    template <typename SUBNET>
+    using softmax = add_layer<softmax_, SUBNET>;

 // ----------------------------------------------------------------------------------------


--- a/dlib/dnn/layers_abstract.h
+++ b/dlib/dnn/layers_abstract.h
@@ -19,7 +19,7 @@ namespace dlib
                This object represents a deep neural network.  In particular, it is
                the simplified interface through which layer objects interact with their
                subnetworks.  A layer's two important tasks are to (1) take outputs from its
-                subnetwork and forward propagate them though itself and (2) to backwards
+                subnetwork and forward propagate them through itself and (2) to backwards
                propagate an error gradient through itself and onto its subnetwork.
                The idea of a subnetwork is illustrated in the following diagram:

@@ -232,7 +232,7 @@ namespace dlib
                - have_same_dimensions(data_input,data_output) == true
                - setup() has been called.
            ensures
-                - Runs the data_input tensor though this layer and stores the output into
+                - Runs the data_input tensor through this layer and stores the output into
                  #data_output.
                - This function supports in-place operation, i.e. having
                  is_same_object(data_input, data_output)==true
@@ -371,9 +371,9 @@ namespace dlib
            WHAT THIS OBJECT REPRESENTS
                This is an implementation of the EXAMPLE_LAYER_ interface defined above.
                In particular, it defines a rectified linear layer.  Therefore, it passes
-                its inputs though the function f(x)=max(x,0) where f() is applied pointwise
-                across the input tensor.
-                
+                its inputs through the function 
+                    f(x)=max(x,0) 
+                where f() is applied pointwise across the input tensor.
        !*/

    public:
@@ -400,6 +400,125 @@ namespace dlib
    template <typename SUBNET>
    using relu = add_layer<relu_, SUBNET>;

+// ----------------------------------------------------------------------------------------
+
+    class sig_
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This is an implementation of the EXAMPLE_LAYER_ interface defined above.
+                In particular, it defines a sigmoid layer.  Therefore, it passes its inputs
+                through the function 
+                    f(x)=1/(1+exp(-x)) 
+                where f() is applied pointwise across the input tensor.
+        !*/
+
+    public:
+
+        sig_(
+        );
+
+        template <typename SUBNET> void setup (const SUBNET& sub);
+        void forward_inplace(const tensor& input, tensor& output);
+        void backward_inplace(const tensor& computed_output, const tensor& gradient_input, tensor& data_grad, tensor& params_grad);
+        const tensor& get_layer_params() const; 
+        tensor& get_layer_params(); 
+        /*!
+            These functions are implemented as described in the EXAMPLE_LAYER_ interface.
+        !*/
+    };
+
+    void serialize(const sig_& item, std::ostream& out);
+    void deserialize(sig_& item, std::istream& in);
+    /*!
+        provides serialization support  
+    !*/
+
+    template <typename SUBNET>
+    using sig = add_layer<sig_, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+    class htan_
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This is an implementation of the EXAMPLE_LAYER_ interface defined above.
+                In particular, it defines a hyperbolic tangent layer.  Therefore, it passes
+                its inputs through the function 
+                    f(x)=std::tanh(x)
+                where f() is applied pointwise across the input tensor.
+        !*/
+
+    public:
+
+        htan_(
+        );
+
+        template <typename SUBNET> void setup (const SUBNET& sub);
+        void forward_inplace(const tensor& input, tensor& output);
+        void backward_inplace(const tensor& computed_output, const tensor& gradient_input, tensor& data_grad, tensor& params_grad);
+        const tensor& get_layer_params() const; 
+        tensor& get_layer_params(); 
+        /*!
+            These functions are implemented as described in the EXAMPLE_LAYER_ interface.
+        !*/
+    };
+
+    void serialize(const htan_& item, std::ostream& out);
+    void deserialize(htan_& item, std::istream& in);
+    /*!
+        provides serialization support  
+    !*/
+
+    template <typename SUBNET>
+    using htan = add_layer<htan_, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+    class softmax_
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This is an implementation of the EXAMPLE_LAYER_ interface defined above.
+                In particular, it defines a softmax layer.  To be precise, we define the
+                softmax function s(x) as:
+                    s(x) == exp(x)/sum(exp(x)) 
+                where x is a vector.  Then this layer treats its input tensor as a
+                collection of multi-channel images and applies s() to each spatial location
+                in each image.  In each application, the tensor::k() channel elements at
+                each position are input to s() and then replaced by the outputs of s().   
+
+                This means that, for example, if you collapsed each output image to a 1
+                channel image by adding the channels then you would end up with images
+                where each pixel value was 1.  This is because the sum of the outputs of
+                s() will always be equal to 1.
+        !*/
+
+    public:
+
+        softmax_(
+        );
+
+        template <typename SUBNET> void setup (const SUBNET& sub);
+        void forward_inplace(const tensor& input, tensor& output);
+        void backward_inplace(const tensor& computed_output, const tensor& gradient_input, tensor& data_grad, tensor& params_grad);
+        const tensor& get_layer_params() const; 
+        tensor& get_layer_params(); 
+        /*!
+            These functions are implemented as described in the EXAMPLE_LAYER_ interface.
+        !*/
+    };
+
+    void serialize(const softmax_& item, std::ostream& out);
+    void deserialize(softmax_& item, std::istream& in);
+    /*!
+        provides serialization support  
+    !*/
+
+    template <typename SUBNET>
+    using softmax = add_layer<softmax_, SUBNET>;
+
 // ----------------------------------------------------------------------------------------

 }

--- a/dlib/dnn/loss.h
+++ b/dlib/dnn/loss.h
@@ -6,6 +6,7 @@
 #include "loss_abstract.h"
 #include "core.h"
 #include "../matrix.h"
+#include "tensor_tools.h"

 namespace dlib
 {
@@ -100,6 +101,109 @@ namespace dlib
    template <typename SUBNET>
    using loss_binary_hinge = add_loss_layer<loss_binary_hinge_, SUBNET>;

+// ----------------------------------------------------------------------------------------
+
+    class loss_binary_log_ 
+    {
+    public:
+
+        const static unsigned int sample_expansion_factor = 1;
+        typedef float label_type;
+
+        template <
+            typename SUB_TYPE,
+            typename label_iterator
+            >
+        void to_label (
+            const tensor& input_tensor,
+            const SUB_TYPE& sub,
+            label_iterator iter
+        ) const
+        {
+            const tensor& output_tensor = sub.get_output();
+            DLIB_CASSERT(output_tensor.nr() == 1 && 
+                         output_tensor.nc() == 1 && 
+                         output_tensor.k() == 1,"");
+            DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples(),"");
+
+            const float* out_data = output_tensor.host();
+            for (long i = 0; i < output_tensor.num_samples(); ++i)
+            {
+                *iter++ = out_data[i];
+            }
+        }
+
+
+        template <
+            typename const_label_iterator,
+            typename SUBNET
+            >
+        double compute_loss (
+            const tensor& input_tensor,
+            const_label_iterator truth, 
+            SUBNET& sub
+        ) const
+        {
+            const tensor& output_tensor = sub.get_output();
+            tensor& grad = sub.get_gradient_input();
+
+            DLIB_CASSERT(input_tensor.num_samples() != 0,"");
+            DLIB_CASSERT(input_tensor.num_samples()%sample_expansion_factor == 0,"");
+            DLIB_CASSERT(input_tensor.num_samples() == grad.num_samples(),"");
+            DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples(),"");
+            DLIB_CASSERT(output_tensor.nr() == 1 && 
+                         output_tensor.nc() == 1 && 
+                         output_tensor.k() == 1,"");
+            DLIB_CASSERT(grad.nr() == 1 && 
+                         grad.nc() == 1 && 
+                         grad.k() == 1,"");
+
+            tt::sigmoid(grad, output_tensor);
+
+            // The loss we output is the average loss over the mini-batch.
+            const double scale = 1.0/output_tensor.num_samples();
+            double loss = 0;
+            float* g = grad.host();
+            const float* out_data = output_tensor.host();
+            for (long i = 0; i < output_tensor.num_samples(); ++i)
+            {
+                const float y = *truth++;
+                DLIB_CASSERT(y == +1 || y == -1, "y: " << y);
+                float temp;
+                if (y > 0)
+                {
+                    temp = log1pexp(-out_data[i]);
+                    loss += scale*temp;
+                    g[i] = scale*(g[i]-1);
+                }
+                else
+                {
+                    temp = -(-out_data[i]-log1pexp(-out_data[i]));
+                    loss += scale*temp;
+                    g[i] = scale*g[i];
+                }
+            }
+            return loss;
+        }
+
+        friend void serialize(const loss_binary_log_& , std::ostream& out)
+        {
+            serialize("loss_binary_log_", out);
+        }
+
+        friend void deserialize(loss_binary_log_& , std::istream& in)
+        {
+            std::string version;
+            deserialize(version, in);
+            if (version != "loss_binary_log_")
+                throw serialization_error("Unexpected version found while deserializing dlib::loss_binary_log_.");
+        }
+
+    };
+
+    template <typename SUBNET>
+    using loss_binary_log = add_loss_layer<loss_binary_log_, SUBNET>;
+
 // ----------------------------------------------------------------------------------------

 }

--- a/dlib/dnn/loss_abstract.h
+++ b/dlib/dnn/loss_abstract.h
@@ -140,9 +140,11 @@ namespace dlib
        /*!
            WHAT THIS OBJECT REPRESENTS
                This object implements the loss layer interface defined above by
-                EXAMPLE_LOSS_LAYER_.  In particular, you use this loss to perform binary
-                classification with the hinge loss.  Therefore, the possible outputs/labels
-                when using this loss are +1 and -1.
+                EXAMPLE_LOSS_LAYER_.  In particular, it implements the hinge loss, which is
+                appropriate for binary classification problems.  Therefore, the possible
+                labels when using this loss are +1 and -1.  Moreover, it will cause the
+                network to produce outputs > 0 when predicting a member of the +1 class and
+                values < 0 otherwise.
        !*/
    public:

@@ -164,6 +166,10 @@ namespace dlib
                - sub.get_output().nr() == 1
                - sub.get_output().nc() == 1
                - sub.get_output().k() == 1
+                - sub.get_output().num_samples() == input_tensor.num_samples()
+            and the output label is the raw score for each classified object.  If the score
+            is > 0 then the classifier is predicting the +1 class, otherwise it is
+            predicting the -1 class.
        !*/

        template <
@@ -181,6 +187,7 @@ namespace dlib
                - sub.get_output().nr() == 1
                - sub.get_output().nc() == 1
                - sub.get_output().k() == 1
+                - sub.get_output().num_samples() == input_tensor.num_samples()
                - all values pointed to by truth are +1 or -1.
        !*/

@@ -195,6 +202,78 @@ namespace dlib
    template <typename SUBNET>
    using loss_binary_hinge = add_loss_layer<loss_binary_hinge_, SUBNET>;

+// ----------------------------------------------------------------------------------------
+
+    class loss_binary_log_ 
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object implements the loss layer interface defined above by
+                EXAMPLE_LOSS_LAYER_.  In particular, it implements the log loss, which is
+                appropriate for binary classification problems.  Therefore, the possible
+                labels when using this loss are +1 and -1.  Moreover, it will cause the
+                network to produce outputs > 0 when predicting a member of the +1 class and
+                values < 0 otherwise.
+
+                To be more specific, this object contains a sigmoid layer followed by a 
+                cross-entropy layer.  
+        !*/
+    public:
+
+        const static unsigned int sample_expansion_factor = 1;
+        typedef float label_type;
+
+        template <
+            typename SUB_TYPE,
+            typename label_iterator
+            >
+        void to_label (
+            const tensor& input_tensor,
+            const SUB_TYPE& sub,
+            label_iterator iter
+        ) const;
+        /*!
+            This function has the same interface as EXAMPLE_LOSS_LAYER_::to_label() except
+            it has the additional calling requirements that: 
+                - sub.get_output().nr() == 1
+                - sub.get_output().nc() == 1
+                - sub.get_output().k() == 1
+                - sub.get_output().num_samples() == input_tensor.num_samples()
+            and the output label is the raw score for each classified object.  If the score
+            is > 0 then the classifier is predicting the +1 class, otherwise it is
+            predicting the -1 class.
+        !*/
+
+        template <
+            typename const_label_iterator,
+            typename SUBNET
+            >
+        double compute_loss (
+            const tensor& input_tensor,
+            const_label_iterator truth, 
+            SUBNET& sub
+        ) const;
+        /*!
+            This function has the same interface as EXAMPLE_LOSS_LAYER_::to_label() except
+            it has the additional calling requirements that: 
+                - sub.get_output().nr() == 1
+                - sub.get_output().nc() == 1
+                - sub.get_output().k() == 1
+                - sub.get_output().num_samples() == input_tensor.num_samples()
+                - all values pointed to by truth are +1 or -1.
+        !*/
+
+    };
+
+    void serialize(const loss_binary_log_& item, std::ostream& out);
+    void deserialize(loss_binary_log_& item, std::istream& in);
+    /*!
+        provides serialization support  
+    !*/
+
+    template <typename SUBNET>
+    using loss_binary_log = add_loss_layer<loss_binary_log_, SUBNET>;
+
 // ----------------------------------------------------------------------------------------

 }