Upgraded the input layer so you can give input<std::array<matrix<T>,K>> types

as input layer specifications. This will create input tensors with K channels.

Upgraded the input layer so you can give input<std::array<matrix<T>,K>> types
as input layer specifications. This will create input tensors with K channels.
858824e8 · Davis King · 6d5ad339 · 858824e8 · 858824e8
Commit 858824e8 authored Oct 31, 2017 by Davis King
Hide whitespace changes
Inline Side-by-side

Showing with 103 additions and 3 deletions

input.h dlib/dnn/input.h +94 -0

input_abstract.h dlib/dnn/input_abstract.h +9 -3

No files found.
--- a/dlib/dnn/input.h
+++ b/dlib/dnn/input.h
@@ -9,6 +9,7 @@
 #include "../pixel.h"
 #include "../image_processing.h"
 #include <sstream>
+#include <array>
 #include "tensor_tools.h"


@@ -399,6 +400,99 @@ namespace dlib
        }
    };

+// ----------------------------------------------------------------------------------------
+
+    template <typename T, long NR, long NC, typename MM, typename L, size_t K>
+    class input<std::array<matrix<T,NR,NC,MM,L>,K>> 
+    {
+    public:
+        typedef std::array<matrix<T,NR,NC,MM,L>,K> input_type;
+
+        input() {}
+        input(const input&) {}
+
+        bool image_contained_point ( const tensor& data, const point& p) const { return get_rect(data).contains(p); }
+        drectangle tensor_space_to_image_space ( const tensor& /*data*/, drectangle r) const { return r; }
+        drectangle image_space_to_tensor_space ( const tensor& /*data*/, double /*scale*/, drectangle r ) const { return r; }
+
+        template <typename forward_iterator>
+        void to_tensor (
+            forward_iterator ibegin,
+            forward_iterator iend,
+            resizable_tensor& data
+        ) const
+        {
+            DLIB_CASSERT(std::distance(ibegin,iend) > 0);
+            DLIB_CASSERT(ibegin->size() != 0, "When using std::array<matrix> inputs you can't give 0 sized arrays.");
+            const auto nr = (*ibegin)[0].nr();
+            const auto nc = (*ibegin)[0].nc();
+            // make sure all the input matrices have the same dimensions
+            for (auto i = ibegin; i != iend; ++i)
+            {
+                for (size_t k = 0; k < K; ++k)
+                {
+                    const auto& arr = *i;
+                    DLIB_CASSERT(arr[k].nr()==nr && arr[k].nc()==nc,
+                        "\t input::to_tensor()"
+                        << "\n\t When using std::array<matrix> as input, all matrices in a batch must have the same dimensions."
+                        << "\n\t nr: " << nr
+                        << "\n\t nc: " << nc
+                        << "\n\t k:  " << k 
+                        << "\n\t arr[k].nr(): " << arr[k].nr()
+                        << "\n\t arr[k].nc(): " << arr[k].nc()
+                    );
+                }
+            }
+
+            
+            // initialize data to the right size to contain the stuff in the iterator range.
+            data.set_size(std::distance(ibegin,iend), K, nr, nc);
+
+            auto ptr = data.host();
+            for (auto i = ibegin; i != iend; ++i)
+            {
+                for (size_t k = 0; k < K; ++k)
+                {
+                    for (long r = 0; r < nr; ++r)
+                    {
+                        for (long c = 0; c < nc; ++c)
+                        {
+                            if (is_same_type<T,unsigned char>::value)
+                                *ptr++ = (*i)[k](r,c)/256.0;
+                            else
+                                *ptr++ = (*i)[k](r,c);
+                        }
+                    }
+                }
+            }
+
+        }
+
+        friend void serialize(const input& /*item*/, std::ostream& out)
+        {
+            serialize("input<array<matrix>>", out);
+        }
+
+        friend void deserialize(input& /*item*/, std::istream& in)
+        {
+            std::string version;
+            deserialize(version, in);
+            if (version != "input<array<matrix>>")
+                throw serialization_error("Unexpected version found while deserializing dlib::input<array<matrix>>.");
+        }
+
+        friend std::ostream& operator<<(std::ostream& out, const input& /*item*/)
+        {
+            out << "input<array<matrix>>";
+            return out;
+        }
+
+        friend void to_xml(const input& /*item*/, std::ostream& out)
+        {
+            out << "<input/>";
+        }
+    };
+
 // ----------------------------------------------------------------------------------------

    template <typename T, typename MM>

--- a/dlib/dnn/input_abstract.h
+++ b/dlib/dnn/input_abstract.h
@@ -116,8 +116,11 @@ namespace dlib
    {
        /*!
            REQUIREMENTS ON T
-                T is a matrix or array2d object and it must contain some kind of pixel
-                type.  I.e. pixel_traits<T::type> must be defined. 
+                One of the following must be true:
+                    - T is a matrix or array2d object and it must contain some kind of
+                      pixel type.  I.e. pixel_traits<T::type> must be defined.   
+                    - T is a std::array<matrix<U>> where U is any built in scalar type like
+                      float, double, or unsigned char. 

            WHAT THIS OBJECT REPRESENTS
                This is a basic input layer that simply copies images into a tensor.  
@@ -141,7 +144,8 @@ namespace dlib
            ensures
                - Converts the iterator range into a tensor and stores it into #data.  In
                  particular, if the input images have R rows, C columns, and K channels
-                  (where K is given by pixel_traits::num) then we will have:
+                  (where K is given by pixel_traits::num or std::array::size() if
+                  std::array inputs are used) then we will have:
                    - #data.num_samples() == std::distance(ibegin,iend)
                    - #data.nr() == R
                    - #data.nc() == C
@@ -149,6 +153,8 @@ namespace dlib
                  For example, a matrix<float,3,3> would turn into a tensor with 3 rows, 3
                  columns, and k()==1.  Or a matrix<rgb_pixel,4,5> would turn into a tensor
                  with 4 rows, 5 columns, and k()==3 (since rgb_pixels have 3 channels).
+                  Or a std::array<matrix<float,3,3>,5> would turn into a tensor with 3 rows
+                  and columns, and k()==5 channels.
                - If the input data contains pixels of type unsigned char, rgb_pixel, or
                  other pixel types with a basic_pixel_type of unsigned char then each
                  value written to the output tensor is first divided by 256.0 so that the