Finished the input interface and implementations. Also added docs about

layer conversions.

Finished the input interface and implementations. Also added docs about
layer conversions.
9b56dc35 · Davis King · bcef0f86 · 9b56dc35 · 9b56dc35 · 9b56dc35
Commit 9b56dc35 authored Sep 25, 2015 by Davis King
Hide whitespace changes
Inline Side-by-side

Showing with 218 additions and 73 deletions

input.h dlib/dnn/input.h +72 -73

input_abstract.h dlib/dnn/input_abstract.h +130 -0

layers_abstract.h dlib/dnn/layers_abstract.h +16 -0

No files found.
--- a/dlib/dnn/input.h
+++ b/dlib/dnn/input.h
@@ -3,7 +3,9 @@
 #ifndef DLIB_DNn_INPUT_H_
 #define DLIB_DNn_INPUT_H_
+#include "input_abstract.h"
 #include "../matrix.h"
+#include "../array2d.h"
 #include "../pixel.h"
@@ -13,51 +15,20 @@ namespace dlib
 // ----------------------------------------------------------------------------------------
    template <typename T>
-    class input 
+    class input
    {
-    public:
+        const static bool always_false = sizeof(T)!=sizeof(T); 
+        static_assert(always_false, "Unsupported type given to input<>.  input<> only supports "
-        // sample_expansion_factor must be > 0
+            "dlib::matrix and dlib::array2d objects."); 
-        const static unsigned int sample_expansion_factor = 1;
-        typedef T input_type;
-        template <typename input_iterator>
-        void to_tensor (
-            input_iterator begin,
-            input_iterator end,
-            resizable_tensor& data
-        ) const
-        /*!
-            requires
-                - [begin, end) is an iterator range over input_type objects.
-            ensures
-                - Converts the iterator range into a tensor and stores it into #data.
-                - Normally you would have #data.num_samples() == distance(begin,end) but
-                  you can also expand the output by some integer factor so long as the loss
-                  you use can deal with it correctly.
-                - #data.num_samples() == distance(begin,end)*sample_expansion_factor. 
-        !*/
-        {
-            // initialize data to the right size to contain the stuff in the iterator range.
-            for (input_iterator i = begin; i != end; ++i)
-            {
-                matrix<rgb_pixel> temp = *i;
-                // now copy *i into the right part of data.
-            }
-        }
    };
 // ----------------------------------------------------------------------------------------
-    template <typename T,long NR, typename MM, typename L>
+    template <typename T, long NR, long NC, typename MM, typename L>
-    class input<matrix<T,NR,1,MM,L>> 
+    class input<matrix<T,NR,NC,MM,L>> 
    {
    public:
+        typedef matrix<T,NR,NC,MM,L> input_type;
-        // TODO, maybe we should only allow T to be float?  Seems kinda pointless to allow
-        // double. Don't forget to remove the matrix_cast if we enforce just float.
-        typedef matrix<T,NR,1,MM,L> input_type;
        const static unsigned int sample_expansion_factor = 1;
        template <typename input_iterator>
@@ -66,40 +37,51 @@ namespace dlib
            input_iterator end,
            resizable_tensor& data
        ) const
-        /*!
-            requires
-                - [begin, end) is an iterator range over input_type objects.
-            ensures
-                - converts the iterator range into a tensor and stores it into #data.
-                - Normally you would have #data.num_samples() == distance(begin,end) but
-                  you can also expand the output by some integer factor so long as the loss
-                  you use can deal with it correctly.
-                - #data.num_samples() == distance(begin,end)*sample_expansion_factor. 
-        !*/
        {
+            DLIB_CASSERT(std::distance(begin,end) > 0,"");
+            const auto nr = begin->nr();
+            const auto nc = begin->nc();
+            // make sure all the input matrices have the same dimensions
+            for (auto i = begin; i != end; ++i)
+            {
+                DLIB_CASSERT(i->nr()==nr && i->nc()==nc,
+                    "\t input::to_tensor()"
+                    << "\n\t All matrices given to to_tensor() must have the same dimensions."
+                    << "\n\t nr: " << nr
+                    << "\n\t nc: " << nc
+                    << "\n\t i->nr(): " << i->nr()
+                    << "\n\t i->nc(): " << i->nc()
+                );
+            }
            // initialize data to the right size to contain the stuff in the iterator range.
-            data.set_size(std::distance(begin,end), 1, 1, begin->size());
+            data.set_size(std::distance(begin,end), nr, nc, pixel_traits<T>::num);
-            unsigned long idx = 0;
+            auto ptr = data.host();
-            for (input_iterator i = begin; i != end; ++i)
+            for (auto i = begin; i != end; ++i)
            {
-                data.set_sample(idx++, matrix_cast<float>(*i));
+                for (long r = 0; r < nr; ++r)
+                {
+                    for (long c = 0; c < nc; ++c)
+                    {
+                        auto temp = pixel_to_vector<float>((*i)(r,c));
+                        for (long j = 0; j < temp.size(); ++j)
+                            *ptr++ = temp(j);
+                    }
+                }
            }
        }
    };
 // ----------------------------------------------------------------------------------------
-    template <typename T>
+    template <typename T, typename MM>
-    class input2
+    class input<array2d<T,MM>> 
    {
    public:
+        typedef array2d<T,MM> input_type;
-        input2(){}
-        input2(const input<T>&) {}
-        typedef T input_type;
        const static unsigned int sample_expansion_factor = 1;
        template <typename input_iterator>
@@ -108,24 +90,41 @@ namespace dlib
            input_iterator end,
            resizable_tensor& data
        ) const
-        /*!
-            requires
-                - [begin, end) is an iterator range over T objects.
-            ensures
-                - converts the iterator range into a tensor and stores it into #data.
-                - Normally you would have #data.num_samples() == distance(begin,end) but
-                  you can also expand the output by some integer factor so long as the loss
-                  you use can deal with it correctly.
-                - #data.num_samples() == distance(begin,end)*K where K is an integer >= 1. 
-        !*/
        {
+            DLIB_CASSERT(std::distance(begin,end) > 0,"");
+            const auto nr = begin->nr();
+            const auto nc = begin->nc();
+            // make sure all the input matrices have the same dimensions
+            for (auto i = begin; i != end; ++i)
+            {
+                DLIB_CASSERT(i->nr()==nr && i->nc()==nc,
+                    "\t input::to_tensor()"
+                    << "\n\t All array2d objects given to to_tensor() must have the same dimensions."
+                    << "\n\t nr: " << nr
+                    << "\n\t nc: " << nc
+                    << "\n\t i->nr(): " << i->nr()
+                    << "\n\t i->nc(): " << i->nc()
+                );
+            }
            // initialize data to the right size to contain the stuff in the iterator range.
+            data.set_size(std::distance(begin,end), nr, nc, pixel_traits<T>::num);
-            for (input_iterator i = begin; i != end; ++i)
+            auto ptr = data.host();
+            for (auto i = begin; i != end; ++i)
            {
-                matrix<rgb_pixel> temp = *i;
+                for (long r = 0; r < nr; ++r)
-                // now copy *i into the right part of data.
+                {
+                    for (long c = 0; c < nc; ++c)
+                    {
+                        auto temp = pixel_to_vector<float>((*i)[r][c]);
+                        for (long j = 0; j < temp.size(); ++j)
+                            *ptr++ = temp(j);
+                    }
+                }
            }
        }
    };

--- a/dlib/dnn/input_abstract.h
+++ b/dlib/dnn/input_abstract.h
+// Copyright (C) 2015  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_DNn_INPUT_ABSTRACT_H_
+#ifdef DLIB_DNn_INPUT_ABSTRACT_H_
+#include "../matrix.h"
+#include "../pixel.h"
+namespace dlib
+{
+// ----------------------------------------------------------------------------------------
+    class EXAMPLE_INPUT
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                Each deep neural network model in dlib begins with an input layer. The job
+                of the input layer is to convert an input_type into a tensor.  Nothing more
+                and nothing less.  
+                Note that there is no dlib::EXAMPLE_INPUT type.  It is shown here purely to
+                document the interface that an input layer object must implement.  If you
+                are using some kind of image or matrix object as your input_type then you
+                can use the provided dlib::input layer type defined below.  Otherwise, you
+                need to define your own custom input layer.
+        !*/
+    public:
+        EXAMPLE_INPUT(
+        );
+        /*!
+            ensures
+                - Default constructs this object.  This function is not required to do
+                  anything in particular but it is required that layer objects be default
+                  constructable. 
+        !*/
+        EXAMPLE_INPUT(
+            const some_other_input_layer_type& item
+        );
+        /*!
+            ensures
+                - Constructs this object from item.  This form of constructor is optional
+                  but it allows you to provide a conversion from one input layer type to
+                  another.  For example, the following code is valid only if my_input2 can
+                  be constructed from my_input1:
+                    relu<fc<relu<fc<my_input1>>>> my_dnn1;
+                    relu<fc<relu<fc<my_input2>>>> my_dnn2(my_dnn1);
+                  This kind of pattern is useful if you want to use one type of input layer
+                  during training but a different type of layer during testing since it
+                  allows you to easily convert between related deep neural network types.  
+        !*/
+        // sample_expansion_factor must be > 0
+        const static unsigned int sample_expansion_factor = 1;
+        typedef whatever_type_to_tensor_expects input_type;
+        template <typename input_iterator>
+        void to_tensor (
+            input_iterator begin,
+            input_iterator end,
+            resizable_tensor& data
+        ) const
+        /*!
+            requires
+                - [begin, end) is an iterator range over input_type objects.
+                - std::distance(begin,end) > 0
+            ensures
+                - Converts the iterator range into a tensor and stores it into #data.
+                - #data.num_samples() == distance(begin,end)*sample_expansion_factor. 
+                - Normally you would have #data.num_samples() == distance(begin,end) but
+                  you can also expand the output by some integer factor so long as the loss
+                  you use can deal with it correctly.
+        !*/
+    };
+// ----------------------------------------------------------------------------------------
+    template <
+        typename T
+        >
+    class input 
+    {
+        /*!
+            REQUIREMENTS ON T
+                T is a matrix or array2d object and it must contain some kind of pixel
+                type.  I.e. pixel_traits<T::type> must be defined. 
+            WHAT THIS OBJECT REPRESENTS
+                This is a basic input layer that simply copies images into a tensor.  
+        !*/
+    public:
+        const static unsigned int sample_expansion_factor = 1;
+        typedef T input_type;
+        template <typename input_iterator>
+        void to_tensor (
+            input_iterator begin,
+            input_iterator end,
+            resizable_tensor& data
+        ) const;
+        /*!
+            requires
+                - [begin, end) is an iterator range over input_type objects.
+                - std::distance(begin,end) > 0
+                - The input range should contain image objects that all have the same
+                  dimensions.
+            ensures
+                - Converts the iterator range into a tensor and stores it into #data.  In
+                  particular, if the input images have R rows, C columns, and K channels
+                  (where K is given by pixel_traits::num) then we will have:
+                    - #data.num_samples() == std::distance(begin,end)
+                    - #data.nr() == R
+                    - #data.nc() == C
+                    - #data.k() == K
+                  For example, a matrix<float,3,3> would turn into a tensor with 3 rows, 3
+                  columns, and k()==1.  Or a matrix<rgb_pixel,4,5> would turn into a tensor
+                  with 4 rows, 5 columns, and k()==3 (since rgb_pixels have 3 channels).
+        !*/
+    };
+// ----------------------------------------------------------------------------------------
+}
+#endif // DLIB_DNn_INPUT_ABSTRACT_H_
--- a/dlib/dnn/layers_abstract.h
+++ b/dlib/dnn/layers_abstract.h
@@ -68,6 +68,22 @@ namespace dlib
                  constructable. 
        !*/
+        EXAMPLE_LAYER_(
+            const some_other_layer_type& item
+        );
+        /*!
+            ensures
+                - Constructs this object from item.  This form of constructor is optional
+                  but it allows you to provide a conversion from one layer type to another.
+                  For example, the following code is valid only if my_layer2 can be
+                  constructed from my_layer1:
+                    relu<fc<my_layer1<fc<input<matrix<float>>>>>> my_dnn1;
+                    relu<fc<my_layer2<fc<input<matrix<float>>>>>> my_dnn2(my_dnn1);
+                  This kind of pattern is useful if you want to use one type of layer
+                  during training but a different type of layer during testing since it
+                  allows you to easily convert between related deep neural network types.  
+        !*/
        template <typename SUB_NET>
        void setup (
            const SUB_NET& sub