Added input_rgb_image_pyramid

8a707f17 · Davis King · 09200e8d · 8a707f17 · 8a707f17
Commit 8a707f17 authored Sep 03, 2016 by Davis King
Hide whitespace changes
Inline Side-by-side

Showing with 331 additions and 0 deletions

input.h dlib/dnn/input.h +167 -0

input_abstract.h dlib/dnn/input_abstract.h +164 -0

No files found.
--- a/dlib/dnn/input.h
+++ b/dlib/dnn/input.h
@@ -7,6 +7,7 @@
 #include "../matrix.h"
 #include "../array2d.h"
 #include "../pixel.h"
+#include "../image_processing.h"
 #include <sstream>


@@ -57,6 +58,10 @@ namespace dlib
        float get_avg_green() const { return avg_green; }
        float get_avg_blue()  const { return avg_blue; }

+        bool image_contained_point ( const tensor& data, const point& p) const { return get_rect(data).contains(p); }
+        drectangle tensor_space_to_image_space ( const tensor& /*data*/, drectangle r) const { return r; }
+        drectangle image_space_to_tensor_space ( const tensor& /*data*/, double /*scale*/, drectangle r ) const { return r; }
+
        template <typename forward_iterator>
        void to_tensor (
            forward_iterator ibegin,
@@ -180,6 +185,10 @@ namespace dlib
        float get_avg_green() const { return avg_green; }
        float get_avg_blue()  const { return avg_blue; }

+        bool image_contained_point ( const tensor& data, const point& p) const { return get_rect(data).contains(p); }
+        drectangle tensor_space_to_image_space ( const tensor& /*data*/, drectangle r) const { return r; }
+        drectangle image_space_to_tensor_space ( const tensor& /*data*/, double /*scale*/, drectangle r ) const { return r; }
+
        template <typename forward_iterator>
        void to_tensor (
            forward_iterator ibegin,
@@ -298,6 +307,10 @@ namespace dlib
        template <typename mm>
        input(const input<array2d<T,mm>>&) {}

+        bool image_contained_point ( const tensor& data, const point& p) const { return get_rect(data).contains(p); }
+        drectangle tensor_space_to_image_space ( const tensor& /*data*/, drectangle r) const { return r; }
+        drectangle image_space_to_tensor_space ( const tensor& /*data*/, double /*scale*/, drectangle r ) const { return r; }
+
        template <typename forward_iterator>
        void to_tensor (
            forward_iterator ibegin,
@@ -391,6 +404,10 @@ namespace dlib
        template <long NR, long NC, typename mm, typename L>
        input(const input<matrix<T,NR,NC,mm,L>>&) {}

+        bool image_contained_point ( const tensor& data, const point& p) const { return get_rect(data).contains(p); }
+        drectangle tensor_space_to_image_space ( const tensor& /*data*/, drectangle r) const { return r; }
+        drectangle image_space_to_tensor_space ( const tensor& /*data*/, double /*scale*/, drectangle r ) const { return r; }
+
        template <typename forward_iterator>
        void to_tensor (
            forward_iterator ibegin,
@@ -468,6 +485,156 @@ namespace dlib
        }
    };

+// ----------------------------------------------------------------------------------------
+
+    template <typename PYRAMID_TYPE>
+    class input_rgb_image_pyramid
+    {
+    public:
+        typedef matrix<rgb_pixel> input_type;
+        typedef PYRAMID_TYPE pyramid_type;
+
+        input_rgb_image_pyramid (
+        ) : 
+            avg_red(122.782), 
+            avg_green(117.001),
+            avg_blue(104.298) 
+        {
+        }
+
+        input_rgb_image_pyramid (
+            float avg_red_,
+            float avg_green_,
+            float avg_blue_
+        ) : avg_red(avg_red_), avg_green(avg_green_), avg_blue(avg_blue_) 
+        {}
+
+        float get_avg_red()   const { return avg_red; }
+        float get_avg_green() const { return avg_green; }
+        float get_avg_blue()  const { return avg_blue; }
+
+        bool image_contained_point (
+            const tensor& data,
+            const point& p
+        ) const
+        {
+            auto&& rects = any_cast<std::vector<rectangle>>(data.annotation());
+            DLIB_CASSERT(rects.size() > 0);
+            return rects[0].contains(p);
+        }
+
+        drectangle tensor_space_to_image_space (
+            const tensor& data,
+            drectangle r
+        ) const
+        {
+            auto&& rects = any_cast<std::vector<rectangle>>(data.annotation());
+            return tiled_pyramid_to_image<pyramid_type>(rects, r);
+        }
+
+        drectangle image_space_to_tensor_space (
+            const tensor& data,
+            double scale,
+            drectangle r 
+        ) const
+        {
+            auto&& rects = any_cast<std::vector<rectangle>>(data.annotation());
+            return image_to_tiled_pyramid<pyramid_type>(rects, scale, r);
+        }
+
+        template <typename forward_iterator>
+        void to_tensor (
+            forward_iterator ibegin,
+            forward_iterator iend,
+            resizable_tensor& data
+        ) const
+        {
+            DLIB_CASSERT(std::distance(ibegin,iend) > 0);
+            auto nr = ibegin->nr();
+            auto nc = ibegin->nc();
+            // make sure all the input matrices have the same dimensions
+            for (auto i = ibegin; i != iend; ++i)
+            {
+                DLIB_CASSERT(i->nr()==nr && i->nc()==nc,
+                    "\t input_rgb_image_pyramid::to_tensor()"
+                    << "\n\t All matrices given to to_tensor() must have the same dimensions."
+                    << "\n\t nr: " << nr
+                    << "\n\t nc: " << nc
+                    << "\n\t i->nr(): " << i->nr()
+                    << "\n\t i->nc(): " << i->nc()
+                );
+            }
+
+
+            matrix<rgb_pixel> img;
+            create_tiled_pyramid<pyramid_type>(*ibegin, img, data.annotation().get<std::vector<rectangle>>());
+            nr = img.nr();
+            nc = img.nc();
+            data.set_size(std::distance(ibegin,iend), 3, nr, nc);
+
+            const size_t offset = nr*nc;
+            auto ptr = data.host();
+            while(true)
+            {
+                for (long r = 0; r < nr; ++r)
+                {
+                    for (long c = 0; c < nc; ++c)
+                    {
+                        rgb_pixel temp = img(r,c);
+                        auto p = ptr++;
+                        *p = (temp.red-avg_red)/256.0; 
+                        p += offset;
+                        *p = (temp.green-avg_green)/256.0; 
+                        p += offset;
+                        *p = (temp.blue-avg_blue)/256.0; 
+                        p += offset;
+                    }
+                }
+                ptr += offset*(data.k()-1);
+
+                ++ibegin;
+                if (ibegin == iend)
+                    break;
+                create_tiled_pyramid<pyramid_type>(*ibegin, img, data.annotation().get<std::vector<rectangle>>());
+            }
+        }
+
+        friend void serialize(const input_rgb_image_pyramid& item, std::ostream& out)
+        {
+            serialize("input_rgb_image_pyramid", out);
+            serialize(item.avg_red, out);
+            serialize(item.avg_green, out);
+            serialize(item.avg_blue, out);
+        }
+
+        friend void deserialize(input_rgb_image_pyramid& item, std::istream& in)
+        {
+            std::string version;
+            deserialize(version, in);
+            if (version != "input_rgb_image_pyramid")
+                throw serialization_error("Unexpected version found while deserializing dlib::input_rgb_image_pyramid.");
+            deserialize(item.avg_red, in);
+            deserialize(item.avg_green, in);
+            deserialize(item.avg_blue, in);
+        }
+
+        friend std::ostream& operator<<(std::ostream& out, const input_rgb_image_pyramid& item)
+        {
+            out << "input_rgb_image_pyramid("<<item.avg_red<<","<<item.avg_green<<","<<item.avg_blue<<")";
+            return out;
+        }
+
+        friend void to_xml(const input_rgb_image_pyramid& item, std::ostream& out)
+        {
+            out << "<input_rgb_image_pyramid r='"<<item.avg_red<<"' g='"<<item.avg_green<<"' b='"<<item.avg_blue<<"'/>";
+        }
+
+    private:
+        float avg_red;
+        float avg_green;
+        float avg_blue;
+    };
+
 // ----------------------------------------------------------------------------------------

 }

--- a/dlib/dnn/input_abstract.h
+++ b/dlib/dnn/input_abstract.h
@@ -153,6 +153,11 @@ namespace dlib
                  value written to the output tensor is first divided by 256.0 so that the
                  resulting outputs are all in the range [0,1].
        !*/
+
+        // Provided for compatibility with input_rgb_image_pyramid's interface
+        bool image_contained_point ( const tensor& data, const point& p) const { return get_rect(data).contains(p); }
+        drectangle tensor_space_to_image_space ( const tensor& /*data*/, drectangle r) const { return r; }
+        drectangle image_space_to_tensor_space ( const tensor& /*data*/, double /*scale*/, drectangle r ) const { return r; }
    };

 // ----------------------------------------------------------------------------------------
@@ -234,6 +239,12 @@ namespace dlib
                  subtracted (according to get_avg_red(), get_avg_green(), or
                  get_avg_blue()) and then is divided by 256.0.
        !*/
+
+
+        // Provided for compatibility with input_rgb_image_pyramid's interface
+        bool image_contained_point ( const tensor& data, const point& p) const { return get_rect(data).contains(p); }
+        drectangle tensor_space_to_image_space ( const tensor& /*data*/, drectangle r) const { return r; }
+        drectangle image_space_to_tensor_space ( const tensor& /*data*/, double /*scale*/, drectangle r ) const { return r; }
    };

 // ----------------------------------------------------------------------------------------
@@ -253,6 +264,159 @@ namespace dlib

    };

+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename PYRAMID_TYPE
+        >
+    class input_rgb_image_pyramid
+    {
+        /*!
+            REQUIREMENTS ON PYRAMID_TYPE
+                PYRAMID_TYPE must be an instance of the dlib::pyramid_down template.
+
+            WHAT THIS OBJECT REPRESENTS
+                This input layer works with RGB images of type matrix<rgb_pixel>.  It is
+                identical to input_rgb_image except that it outputs a tensor containing a
+                tiled image pyramid of each input image rather than a simple copy of each
+                image.  The tiled image pyramid is created using create_tiled_pyramid().
+        !*/
+
+    public:
+
+        typedef matrix<rgb_pixel> input_type;
+        typedef PYRAMID_TYPE pyramid_type;
+
+        input_rgb_image_pyramid (
+        );
+        /*!
+            ensures
+                - #get_avg_red()   == 122.782
+                - #get_avg_green() == 117.001
+                - #get_avg_blue()  == 104.298
+        !*/
+
+        input_rgb_image_pyramid (
+            float avg_red,
+            float avg_green,
+            float avg_blue
+        ); 
+        /*!
+            ensures
+                - #get_avg_red() == avg_red
+                - #get_avg_green() == avg_green
+                - #get_avg_blue() == avg_blue
+        !*/
+
+        float get_avg_red(
+        ) const;
+        /*!
+            ensures
+                - returns the value subtracted from the red color channel.
+        !*/
+
+        float get_avg_green(
+        ) const;
+        /*!
+            ensures
+                - returns the value subtracted from the green color channel.
+        !*/
+
+        float get_avg_blue(
+        ) const;
+        /*!
+            ensures
+                - returns the value subtracted from the blue color channel.
+        !*/
+
+        template <typename forward_iterator>
+        void to_tensor (
+            forward_iterator ibegin,
+            forward_iterator iend,
+            resizable_tensor& data
+        ) const;
+        /*!
+            requires
+                - [ibegin, iend) is an iterator range over input_type objects.
+                - std::distance(ibegin,iend) > 0
+                - The input range should contain images that all have the same
+                  dimensions.
+            ensures
+                - Converts the iterator range into a tensor and stores it into #data.  In
+                  particular, we will have:
+                    - #data.num_samples() == std::distance(ibegin,iend)
+                    - #data.k() == 3
+                    - Each sample in #data contains a tiled image pyramid of the
+                      corresponding input image.  The tiled pyramid is created by
+                      create_tiled_pyramid().
+                  Moreover, each color channel is normalized by having its average value
+                  subtracted (according to get_avg_red(), get_avg_green(), or
+                  get_avg_blue()) and then is divided by 256.0.
+        !*/
+
+        bool image_contained_point (
+            const tensor& data,
+            const point& p
+        ) const;
+        /*!
+            requires
+                - data is a tensor that was produced by this->to_tensor()
+            ensures
+                - Since data is a tensor that is built from a bunch of identically sized
+                  images, we can ask if those images were big enough to contain the point
+                  p.  This function returns the answer to that question.
+        !*/
+
+        drectangle image_space_to_tensor_space (
+            const tensor& data,
+            double scale,
+            drectangle r 
+        ) const;
+        /*!
+            requires
+                - data is a tensor that was produced by this->to_tensor()
+                - 0 < scale <= 1
+            ensures
+                - This function maps from to_tensor()'s input image space to its output
+                  tensor space.  Therefore, given that data is a tensor produced by
+                  to_tensor(), image_space_to_tensor_space() allows you to ask for the
+                  rectangle in data that corresponds to a rectangle in the original image
+                  space.
+
+                  Note that since the output tensor contains an image pyramid, there are
+                  multiple points in the output tensor that correspond to any input
+                  location.  So you must also specify a scale so we know what level of the
+                  pyramid is needed.  So given a rectangle r in an input image, you can
+                  ask, what rectangle in data corresponds to r when things are scale times
+                  smaller?  That rectangle is returned by this function.
+                - A scale of 1 means we don't move anywhere in the pyramid scale space relative
+                  to the input image while smaller values of scale mean we move down the
+                  pyramid.
+        !*/
+
+        drectangle tensor_space_to_image_space (
+            const tensor& data,
+            drectangle r
+        ) const;
+        /*!
+            requires
+                - data is a tensor that was produced by this->to_tensor()
+            ensures
+                - This function maps from to_tensor()'s output tensor space to its input
+                  image space.  Therefore, given that data is a tensor produced by
+                  to_tensor(), tensor_space_to_image_space() allows you to ask for the
+                  rectangle in the input image that corresponds to a rectangle in data.
+                - It should be noted that this function isn't always an inverse of
+                  image_space_to_tensor_space().  This is because you can ask
+                  image_space_to_tensor_space() for the coordinates of points outside the input
+                  image and they will be mapped to somewhere that doesn't have an inverse.
+                  But for points actually inside the input image this function performs an
+                  approximate inverse mapping.  I.e. when image_contained_point(data,center(r))==true 
+                  there is an approximate inverse.
+        !*/
+
+    };
+
 // ----------------------------------------------------------------------------------------

 }