Adding a rough initial version of a deep learning API.

f335ce4f · Davis King · 16ea6f11 · f335ce4f · f335ce4f · f335ce4f
Commit f335ce4f authored Sep 23, 2015 by Davis King
9 changed files
--- a/dlib/dnn.h
+++ b/dlib/dnn.h
+// Copyright (C) 2015  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_DNn_
+#define DLIB_DNn_
+
+#include "dnn/tensor.h"
+#include "dnn/input.h"
+#include "dnn/layers.h"
+#include "dnn/loss.h"
+#include "dnn/core.h"
+#include "dnn/solvers.h"
+
+#endif // DLIB_DNn_
+
+
--- a/dlib/dnn/core.h
+++ b/dlib/dnn/core.h
--- a/dlib/dnn/core_abstract.h
+++ b/dlib/dnn/core_abstract.h
--- a/dlib/dnn/input.h
+++ b/dlib/dnn/input.h
+// Copyright (C) 2015  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_DNn_INPUT_H_
+#define DLIB_DNn_INPUT_H_
+
+#include <dlib/matrix.h>
+#include <dlib/pixel.h>
+
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    class input 
+    {
+    public:
+
+        // sample_expansion_factor must be > 0
+        const static unsigned int sample_expansion_factor = 1;
+        typedef T input_type;
+
+        template <typename input_iterator>
+        void to_tensor (
+            input_iterator begin,
+            input_iterator end,
+            resizable_tensor& data
+        ) const
+        /*!
+            requires
+                - [begin, end) is an iterator range over input_type objects.
+            ensures
+                - Converts the iterator range into a tensor and stores it into #data.
+                - Normally you would have #data.num_samples() == distance(begin,end) but
+                  you can also expand the output by some integer factor so long as the loss
+                  you use can deal with it correctly.
+                - #data.num_samples() == distance(begin,end)*sample_expansion_factor. 
+        !*/
+        {
+            // initialize data to the right size to contain the stuff in the iterator range.
+
+            for (input_iterator i = begin; i != end; ++i)
+            {
+                matrix<rgb_pixel> temp = *i;
+                // now copy *i into the right part of data.
+            }
+        }
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T,long NR, typename MM, typename L>
+    class input<matrix<T,NR,1,MM,L>> 
+    {
+    public:
+
+        // TODO, maybe we should only allow T to be float?  Seems kinda pointless to allow
+        // double. Don't forget to remove the matrix_cast if we enforce just float.
+        typedef matrix<T,NR,1,MM,L> input_type;
+        const static unsigned int sample_expansion_factor = 1;
+
+        template <typename input_iterator>
+        void to_tensor (
+            input_iterator begin,
+            input_iterator end,
+            resizable_tensor& data
+        ) const
+        /*!
+            requires
+                - [begin, end) is an iterator range over input_type objects.
+            ensures
+                - converts the iterator range into a tensor and stores it into #data.
+                - Normally you would have #data.num_samples() == distance(begin,end) but
+                  you can also expand the output by some integer factor so long as the loss
+                  you use can deal with it correctly.
+                - #data.num_samples() == distance(begin,end)*sample_expansion_factor. 
+        !*/
+        {
+            // initialize data to the right size to contain the stuff in the iterator range.
+            data.set_size(std::distance(begin,end), 1, 1, begin->size());
+
+            unsigned long idx = 0;
+            for (input_iterator i = begin; i != end; ++i)
+            {
+                data.set_sample(idx++, matrix_cast<float>(*i));
+            }
+        }
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    class input2
+    {
+    public:
+
+        input2(){}
+
+        input2(const input<T>&) {}
+
+        typedef T input_type;
+        const static unsigned int sample_expansion_factor = 1;
+
+        template <typename input_iterator>
+        void to_tensor (
+            input_iterator begin,
+            input_iterator end,
+            resizable_tensor& data
+        ) const
+        /*!
+            requires
+                - [begin, end) is an iterator range over T objects.
+            ensures
+                - converts the iterator range into a tensor and stores it into #data.
+                - Normally you would have #data.num_samples() == distance(begin,end) but
+                  you can also expand the output by some integer factor so long as the loss
+                  you use can deal with it correctly.
+                - #data.num_samples() == distance(begin,end)*K where K is an integer >= 1. 
+        !*/
+        {
+            // initialize data to the right size to contain the stuff in the iterator range.
+
+            for (input_iterator i = begin; i != end; ++i)
+            {
+                matrix<rgb_pixel> temp = *i;
+                // now copy *i into the right part of data.
+            }
+        }
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // #define DLIB_DNn_INPUT_H_
+
--- a/dlib/dnn/layers.h
+++ b/dlib/dnn/layers.h
+// Copyright (C) 2015  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_DNn_LAYERS_H_
+#define DLIB_DNn_LAYERS_H_
+
+#include "layers_abstract.h"
+#include "tensor.h"
+#include "core.h"
+#include <iostream>
+#include <string>
+#include <dlib/rand.h>
+#include <dlib/string.h>
+
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    class con_
+    {
+    public:
+        con_()
+        {}
+
+        template <typename SUB_NET>
+        void setup (const SUB_NET& sub)
+        {
+            // TODO
+        }
+
+        template <typename SUB_NET>
+        void forward(const SUB_NET& sub, resizable_tensor& output)
+        {
+            // TODO
+        } 
+
+        template <typename SUB_NET>
+        void backward(const tensor& gradient_input, SUB_NET& sub, tensor& params_grad)
+        {
+            // TODO
+        }
+
+        const tensor& get_layer_params() const { return params; }
+        tensor& get_layer_params() { return params; }
+
+    private:
+
+        resizable_tensor params;
+    };
+
+    template <typename SUB_NET>
+    using con = add_layer<con_, SUB_NET>;
+
+// ----------------------------------------------------------------------------------------
+
+    class fc_
+    {
+    public:
+        fc_() : num_outputs(1)
+        {
+            rnd.set_seed("fc_" + cast_to_string(num_outputs));
+        }
+
+        explicit fc_(unsigned long num_outputs_)
+        {
+            num_outputs = num_outputs_;
+            rnd.set_seed("fc_" + cast_to_string(num_outputs));
+        }
+
+        unsigned long get_num_outputs (
+        ) const { return num_outputs; }
+
+        template <typename SUB_NET>
+        void setup (const SUB_NET& sub)
+        {
+            num_inputs = sub.get_output().nr()*sub.get_output().nc()*sub.get_output().k();
+            params.set_size(num_inputs, num_outputs);
+
+            std::cout << "fc_::setup() " << params.size() << std::endl;
+
+            randomize_parameters(params, num_inputs+num_outputs, rnd);
+        }
+
+        template <typename SUB_NET>
+        void forward(const SUB_NET& sub, resizable_tensor& output)
+        {
+            output.set_size(sub.get_output().num_samples(), num_outputs);
+
+            output = mat(sub.get_output())*mat(params);
+        } 
+
+        template <typename SUB_NET>
+        void backward(const tensor& gradient_input, SUB_NET& sub, tensor& params_grad)
+        {
+            // d1*W*p1 + d2*W*p2
+            // total gradient = [d1*W; d2*W; d3*W; ...] == D*W
+
+
+            // compute the gradient of the parameters.  
+            params_grad += trans(mat(sub.get_output()))*mat(gradient_input);
+
+            // compute the gradient for the data
+            sub.get_gradient_input() += mat(gradient_input)*trans(mat(params));
+        }
+
+        const tensor& get_layer_params() const { return params; }
+        tensor& get_layer_params() { return params; }
+
+    private:
+
+        unsigned long num_outputs;
+        unsigned long num_inputs;
+        resizable_tensor params;
+        dlib::rand rnd;
+    };
+
+
+    template <typename SUB_NET>
+    using fc = add_layer<fc_, SUB_NET>;
+
+// ----------------------------------------------------------------------------------------
+
+    class relu_
+    {
+    public:
+        relu_() 
+        {
+        }
+
+        template <typename SUB_NET>
+        void setup (const SUB_NET& sub)
+        {
+        }
+
+        template <typename SUB_NET>
+        void forward(const SUB_NET& sub, resizable_tensor& output)
+        {
+            output.copy_size(sub.get_output());
+            output = lowerbound(mat(sub.get_output()), 0);
+        } 
+
+        template <typename SUB_NET>
+        void backward(const tensor& gradient_input, SUB_NET& sub, tensor& params_grad)
+        {
+            const float* grad = gradient_input.host();
+            const float* in = sub.get_output().host();
+            float* out = sub.get_gradient_input().host();
+            for (unsigned long i = 0; i < sub.get_output().size(); ++i)
+            {
+                if (in[i] > 0)
+                    out[i] += grad[i];
+            }
+
+        }
+
+        const tensor& get_layer_params() const { return params; }
+        tensor& get_layer_params() { return params; }
+
+    private:
+
+        resizable_tensor params;
+    };
+
+
+    template <typename SUB_NET>
+    using relu = add_layer<relu_, SUB_NET>;
+
+// ----------------------------------------------------------------------------------------
+
+    class multiply_
+    {
+    public:
+        multiply_() 
+        {
+        }
+
+
+        template <typename SUB_NET>
+        void setup (const SUB_NET& sub)
+        {
+            num_inputs = sub.get_output().nr()*sub.get_output().nc()*sub.get_output().k();
+            params.set_size(1, num_inputs);
+
+            std::cout << "multiply_::setup() " << params.size() << std::endl;
+
+            const int num_outputs = num_inputs;
+
+            randomize_parameters(params, num_inputs+num_outputs, rnd);
+        }
+
+        template <typename SUB_NET>
+        void forward(const SUB_NET& sub, resizable_tensor& output)
+        {
+            DLIB_CASSERT( sub.get_output().nr()*sub.get_output().nc()*sub.get_output().k() == params.size(), "");
+            DLIB_CASSERT( sub.get_output().nr()*sub.get_output().nc()*sub.get_output().k() == num_inputs, "");
+
+            output.copy_size(sub.get_output());
+            auto indata = sub.get_output().host();
+            auto outdata = output.host();
+            auto paramdata = params.host();
+            for (int i = 0; i < sub.get_output().num_samples(); ++i)
+            {
+                for (int j = 0; j < num_inputs; ++j)
+                {
+                    *outdata++ = *indata++ * paramdata[j];
+                }
+            }
+        } 
+
+        template <typename SUB_NET>
+        void backward(const tensor& gradient_input, SUB_NET& sub, tensor& params_grad)
+        {
+            params_grad += sum_rows(pointwise_multiply(mat(sub.get_output()),mat(gradient_input)));
+
+            for (long i = 0; i < gradient_input.num_samples(); ++i)
+            {
+                sub.get_gradient_input().add_to_sample(i, 
+                    pointwise_multiply(rowm(mat(gradient_input),i), mat(params)));
+            }
+        }
+
+        const tensor& get_layer_params() const { return params; }
+        tensor& get_layer_params() { return params; }
+
+    private:
+
+        int num_inputs;
+        resizable_tensor params;
+        dlib::rand rnd;
+    };
+
+    template <typename SUB_NET>
+    using multiply = add_layer<multiply_, SUB_NET>;
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // #define DLIB_DNn_LAYERS_H_
+
+
--- a/dlib/dnn/layers_abstract.h
+++ b/dlib/dnn/layers_abstract.h
+// Copyright (C) 2015  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_DNn_LAYERS_ABSTRACT_H_
+#ifdef DLIB_DNn_LAYERS_ABSTRACT_H_
+
+#include "tensor_abstract.h"
+#include "core_abstract.h"
+
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    class SUB_NET 
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+
+                By "Sub net" we mean the part of the network closer to the input.  Whenever
+                you get a SUB_NET it will always have computed its outputs and they will be
+                available in get_output().
+
+        !*/
+
+    public:
+
+        const tensor& get_output(
+        ) const;
+
+        tensor& get_gradient_input(
+        );
+
+        const NEXT_SUB_NET& sub_net(
+        ) const;
+
+        NEXT_SUB_NET& sub_net(
+        );
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    class EXAMPLE_LAYER_
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                Each layer in a deep neural network can be thought of as a function,
+                f(data,parameters), that takes in a data tensor, some parameters, and
+                produces an output tensor.  You create an entire deep network by composing
+                these functions.  Importantly, you are able to use a wide range of
+                different functions to accommodate whatever task you are trying to accomplish.
+                Dlib includes a number of common layer types but if you want to define your
+                own then you simply implement a class with the same interface as EXAMPLE_LAYER_.
+
+        !*/
+
+    public:
+
+        EXAMPLE_LAYER_(
+        );
+        /*!
+            ensures
+                - Default constructs this object.  This function is not required to do
+                  anything in particular but it is required that layer objects be default
+                  constructable. 
+        !*/
+
+        template <typename SUB_NET>
+        void setup (
+            const SUB_NET& sub
+        );
+        /*!
+            requires
+                - SUB_NET implements the SUB_NET interface defined at the top of this file.
+            ensures
+                - performs any necessary initial memory allocations and/or sets parameters
+                  to their initial values prior to learning.  Therefore, calling setup
+                  destroys any previously learned parameters.
+        !*/
+
+        template <typename SUB_NET>
+        void forward(
+            const SUB_NET& sub, 
+            resizable_tensor& output
+        );
+        /*!
+            requires
+                - SUB_NET implements the SUB_NET interface defined at the top of this file.
+                - setup() has been called.
+            ensures
+                - Runs the output of the sub-network through this layer and stores the
+                  output into #output.  In particular, forward() can use any of the outputs
+                  in sub (e.g. sub.get_output(), sub.sub_net().get_output(), etc.) to
+                  compute whatever it wants.
+                - #output.num_samples() == sub.get_output().num_samples()
+        !*/
+
+        template <typename SUB_NET>
+        void backward(
+            const tensor& gradient_input, 
+            SUB_NET& sub, 
+            tensor& params_grad
+        );
+        /*!
+            requires
+                - SUB_NET implements the SUB_NET interface defined at the top of this file.
+                - setup() has been called.
+                - gradient_input has the same dimensions as the output of forward(sub,output).
+                - have_same_dimensions(sub.get_gradient_input(), sub.get_output()) == true
+                - have_same_dimensions(params_grad, get_layer_params()) == true
+            ensures
+                - This function outputs the gradients of this layer with respect to the
+                  input data from sub and also with respect to this layer's parameters.
+                  These gradients are stored into #sub and #params_grad, respectively. To be
+                  precise, the gradients are taken of a function f(sub,get_layer_params())
+                  which is defined thusly:   
+                    - let OUT be the output of forward(sub,OUT).
+                    - let f(sub,get_layer_params()) == dot(OUT, gradient_input)
+                  Then we define the following gradient vectors: 
+                    - PARAMETER_GRADIENT == gradient of f(sub,get_layer_params()) with
+                      respect to get_layer_params(). 
+                    - for all valid I:
+                        - DATA_GRADIENT_I == gradient of f(sub,get_layer_params()) with
+                          respect to layer<I>(sub).get_output() (recall that forward() can
+                          draw inputs from the immediate sub layer, sub.sub_net(), or
+                          any earlier layer.  So you must consider the gradients with
+                          respect to all inputs drawn from sub)
+                  Finally, backward() adds these gradients into the output by performing:
+                    - params_grad += PARAMETER_GRADIENT
+                    - for all valid I:
+                        - layer<I>(sub).get_gradient_input() += DATA_GRADIENT_I
+        !*/
+
+        const tensor& get_layer_params(
+        ) const; 
+        /*!
+            ensures
+                - returns the parameters that define the behavior of forward().
+        !*/
+
+        tensor& get_layer_params(
+        ); 
+        /*!
+            ensures
+                - returns the parameters that define the behavior of forward().
+        !*/
+
+    };
+
+    // For each layer you define, always define an add_layer template so that layers can be
+    // easily composed.  Moreover, the convention is that the layer class ends with an _
+    // while the add_layer template has the same name but without the trailing _.
+    template <typename SUB_NET>
+    using EXAMPLE_LAYER = add_layer<EXAMPLE_LAYER_, SUB_NET>;
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    class fc_
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This is an implementation of the EXAMPLE_LAYER_ interface defined above.
+                In particular, it defines a fully connected layer that takes an input
+                tensor and multiplies it by a weight matrix and outputs the results.
+        !*/
+
+    public:
+        fc_(
+        );
+        /*!
+            ensures
+                - #get_num_outputs() == 1
+        !*/
+
+        explicit fc_(
+            unsigned long num_outputs
+        );
+        /*!
+            ensures
+                - #get_num_outputs() == num_outputs
+        !*/
+
+        unsigned long get_num_outputs (
+        ) const; 
+        /*!
+            ensures
+                - This layer outputs column vectors that contain get_num_outputs()
+                  elements. That is, the output tensor T from forward() will be such that:
+                    - T.num_samples() == however many samples were given to forward().
+                    - T.nr() == get_num_outputs()
+                    - The rest of the dimensions of T will be 1.
+        !*/
+
+        template <typename SUB_NET> void setup (const SUB_NET& sub);
+        template <typename SUB_NET> void forward(const SUB_NET& sub, resizable_tensor& output);
+        template <typename SUB_NET> void backward(const tensor& gradient_input, SUB_NET& sub, tensor& params_grad);
+        const tensor& get_layer_params() const; 
+        tensor& get_layer_params(); 
+        /*!
+            These functions are implemented as described in the EXAMPLE_LAYER_ interface.
+        !*/
+    };
+
+
+    template <typename SUB_NET>
+    using fc = add_layer<fc_, SUB_NET>;
+
+// ----------------------------------------------------------------------------------------
+
+    class relu_
+    {
+    public:
+
+        relu_(
+        );
+
+        template <typename SUB_NET> void setup (const SUB_NET& sub);
+        template <typename SUB_NET> void forward(const SUB_NET& sub, resizable_tensor& output);
+        template <typename SUB_NET> void backward(const tensor& gradient_input, SUB_NET& sub, tensor& params_grad);
+        const tensor& get_layer_params() const; 
+        tensor& get_layer_params(); 
+        /*!
+            These functions are implemented as described in the EXAMPLE_LAYER_ interface.
+        !*/
+    };
+
+
+    template <typename SUB_NET>
+    using relu = add_layer<relu_, SUB_NET>;
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // #define DLIB_DNn_LAYERS_H_
+
--- a/dlib/dnn/loss.h
+++ b/dlib/dnn/loss.h
+// Copyright (C) 2015  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_DNn_LOSS_H_
+#define DLIB_DNn_LOSS_H_
+
+#include "core.h"
+#include <dlib/matrix.h>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    class loss_binary_hinge_ 
+    {
+    public:
+
+        const static unsigned int sample_expansion_factor = 1;
+        typedef double label_type;
+
+        // Implementing to_label() is optional.  If you don't do it then it just means the
+        // automatic operator() mapping from tensors to outputs is missing from the net object.
+        template <
+            typename SUB_TYPE,
+            typename label_iterator
+            >
+        void to_label (
+            const SUB_TYPE& sub,
+            label_iterator iter
+        ) const
+        /*!
+            requires
+                - SUB_NET implements the SUB_NET interface defined at the top of layers_abstract.h.
+                - sub.get_output().num_samples() must be a multiple of sample_expansion_factor.
+                - iter == an iterator pointing to the beginning of a range of
+                  sub.get_output().num_samples()/sample_expansion_factor elements.  In
+                  particular, they must be label_type elements.
+        !*/
+        {
+            const tensor& output_tensor = sub.get_output();
+            DLIB_CASSERT(output_tensor.nr() == 1 && 
+                         output_tensor.nc() == 1 && 
+                         output_tensor.k() == 1,"");
+            DLIB_CASSERT(output_tensor.num_samples()%sample_expansion_factor == 0,"");
+
+            const float* out_data = output_tensor.host();
+            for (unsigned long i = 0; i < output_tensor.num_samples(); ++i)
+            {
+                *iter++ = out_data[i];
+            }
+        }
+
+        template <
+            typename label_iterator,
+            typename SUB_NET
+            >
+        double compute_loss (
+            const tensor& input_tensor,
+            label_iterator truth, // TODO, this parameter is optional.
+            SUB_NET& sub
+        ) const
+        /*!
+            requires
+                - SUB_NET implements the SUB_NET interface defined at the top of layers_abstract.h.
+                - input_tensor was given as input to the network sub and the outputs are now
+                  visible in sub.get_output(), sub.sub_net().get_output(), etc.
+                - input_tensor.num_samples() must be a multiple of sample_expansion_factor.
+                - input_tensor.num_samples() == sub.get_output().num_samples() == grad.num_samples()
+                - truth == an iterator pointing to the beginning of a range of
+                  input_tensor.num_samples()/sample_expansion_factor elements.  In particular,
+                  they must be label_type elements.
+                - sub.get_gradient_input() has the same dimensions as sub.get_output().
+                - for all valid i:
+                    - *(truth+i/sample_expansion_factor) is the label of the ith sample in
+                      sub.get_output().
+            ensures
+                - #sub.get_gradient_input() == the gradient of the loss with respect to
+                  sub.get_output().
+        !*/
+        {
+            const tensor& output_tensor = sub.get_output();
+            tensor& grad = sub.get_gradient_input();
+
+            // TODO, throw an exception instead of asserting, probably...
+            DLIB_CASSERT(input_tensor.num_samples() == grad.num_samples(),"");
+            DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples(),"");
+            DLIB_CASSERT(output_tensor.nr() == 1 && 
+                         output_tensor.nc() == 1 && 
+                         output_tensor.k() == 1,"");
+
+            // The loss we output is the average loss over the mini-batch.
+            const double scale = 1.0/output_tensor.num_samples();
+            double loss = 0;
+            const float* out_data = output_tensor.host();
+            float* g = grad.host();
+            for (unsigned long i = 0; i < output_tensor.num_samples(); ++i)
+            {
+                const float y = *truth++;
+                const float temp = 1-y*out_data[i];
+                if (temp > 0)
+                {
+                    loss += scale*temp;
+                    g[i] = -scale*y;
+                }
+                else
+                {
+                    g[i] = 0;
+                }
+            }
+            return loss;
+        }
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename SUB_NET>
+    using loss_binary_hinge = add_loss<loss_binary_hinge_, SUB_NET>;
+
+// ----------------------------------------------------------------------------------------
+
+    class loss_no_label_ 
+    {
+    public:
+
+        //typedef int label_type;
+
+        const static unsigned int sample_expansion_factor = 1;
+
+
+        template <
+            typename SUB_NET
+            >
+        double compute_loss (
+            const tensor& input_tensor,
+            SUB_NET& sub
+        ) const
+        /*!
+            requires
+                - SUB_NET implements the SUB_NET interface defined at the top of layers_abstract.h.
+                - input_tensor was given as input to the network sub and the outputs are now
+                  visible in sub.get_output(), sub.sub_net().get_output(), etc.
+                - input_tensor.num_samples() must be a multiple of sample_expansion_factor.
+                - input_tensor.num_samples() == sub.get_output().num_samples() == grad.num_samples()
+                - truth == an iterator pointing to the beginning of a range of
+                  input_tensor.num_samples()/sample_expansion_factor elements.  In particular,
+                  they must be label_type elements.
+                - sub.get_gradient_input() has the same dimensions as sub.get_output().
+                - for all valid i:
+                    - *(truth+i/sample_expansion_factor) is the label of the ith sample in
+                      sub.get_output().
+            ensures
+                - #sub.get_gradient_input() == the gradient of the loss with respect to
+                  sub.get_output().
+        !*/
+        {
+            return 0;
+        }
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename SUB_NET>
+    using loss_no_label = add_loss<loss_no_label_, SUB_NET>;
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // #define DLIB_DNn_LOSS_H_
+
+
--- a/dlib/dnn/solvers.h
+++ b/dlib/dnn/solvers.h
+// Copyright (C) 2015  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_DNn_SOLVERS_H_
+#define DLIB_DNn_SOLVERS_H_
+
+#include "tensor.h"
+#include <iostream>
+
+namespace dlib
+{
+    /*
+        class EXAMPLE_SOLVER 
+        {
+        };
+    */
+
+    struct sgd
+    {
+
+        matrix<float> v;
+        float weight_decay;
+        float eps;
+        float momentum;
+        sgd(double eps_ = 0.001) 
+        { 
+            weight_decay = 0.0005;
+            eps = eps_;
+            //eps = 0.001;
+            momentum = 0.9;
+        }
+
+        template <typename layer_type>
+        void operator() (layer_type& l, const tensor& params_grad)
+        /*!
+            requires
+                - l.get_layer_params().size() != 0
+                - l.get_layer_params() and params_grad have the same dimensions.
+        !*/
+        {
+            if (v.size() != 0)
+                v = momentum*v - weight_decay*eps*mat(l.get_layer_params()) - eps*mat(params_grad);
+            else
+                v =            - weight_decay*eps*mat(l.get_layer_params()) - eps*mat(params_grad);
+
+            l.get_layer_params() += v;
+        }
+    };
+
+
+}
+
+#endif // #define DLIB_DNn_SOLVERS_H_
+
+
+
--- a/dlib/dnn/tensor.h
+++ b/dlib/dnn/tensor.h