Downsample layer (#1212)

Added resize_to_ layer

Downsample layer (#1212)
Added resize_to_ layer
1dec97d6 · davemers0160 · Davis E. King · cc0542b3 · 1dec97d6 · 1dec97d6
Commit 1dec97d6 authored Mar 30, 2018 by davemers0160 Committed by Davis E. King Mar 30, 2018
Show whitespace changes
Inline Side-by-side

Showing with 189 additions and 1 deletion

layers.h dlib/dnn/layers.h +114 -0

layers_abstract.h dlib/dnn/layers_abstract.h +57 -1

dnn.cpp dlib/test/dnn.cpp +18 -0

No files found.
--- a/dlib/dnn/layers.h
+++ b/dlib/dnn/layers.h
@@ -739,6 +739,120 @@ namespace dlib
        >
    using upsample = add_layer<upsample_<scale,scale>, SUBNET>;

+// ----------------------------------------------------------------------------------------
+
+    template <
+        long NR_, 
+        long NC_
+        >
+    class resize_to_
+    {
+    public:
+        static_assert(NR_ >= 1, "NR resize parameter can't be less than 1.");
+        static_assert(NC_ >= 1, "NC resize parameter can't be less than 1.");
+        
+        resize_to_()
+        {
+        }
+        
+        template <typename SUBNET>
+        void setup (const SUBNET& /*sub*/)
+        {
+        }
+    
+        template <typename SUBNET>
+        void forward(const SUBNET& sub, resizable_tensor& output)
+        {
+            scale_y = (double)NR_/(double)sub.get_output().nr();
+            scale_x = (double)NC_/(double)sub.get_output().nc();
+            
+            output.set_size(
+                sub.get_output().num_samples(),
+                sub.get_output().k(),
+                NR_,
+                NC_);
+            tt::resize_bilinear(output, sub.get_output());
+        } 
+        
+        template <typename SUBNET>
+        void backward(const tensor& gradient_input, SUBNET& sub, tensor& /*params_grad*/)
+        {
+            tt::resize_bilinear_gradient(sub.get_gradient_input(), gradient_input);
+        }
+        
+        inline dpoint map_input_to_output (dpoint p) const 
+        { 
+            p.x() = p.x()*scale_x;
+            p.y() = p.y()*scale_y;
+            return p; 
+        }
+
+        inline dpoint map_output_to_input (dpoint p) const 
+        { 
+            p.x() = p.x()/scale_x;
+            p.y() = p.y()/scale_y;
+            return p; 
+        }
+        
+        const tensor& get_layer_params() const { return params; }
+        tensor& get_layer_params() { return params; }
+        
+        friend void serialize(const resize_to_& item, std::ostream& out)
+        {
+            serialize("resize_to_", out);
+            serialize(NR_, out);
+            serialize(NC_, out);
+            serialize(item.scale_y, out);
+            serialize(item.scale_x, out);
+        }
+        
+        friend void deserialize(resize_to_& item, std::istream& in)
+        {
+            std::string version;
+            deserialize(version, in);
+            if (version != "resize_to_")
+                throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::resize_to_.");
+
+            long _nr;
+            long _nc;
+            deserialize(_nr, in);
+            deserialize(_nc, in);
+            deserialize(item.scale_y, in);
+            deserialize(item.scale_x, in);
+            if (_nr != NR_ || _nc != NC_)
+                throw serialization_error("Wrong size found while deserializing dlib::resize_to_");
+        }
+        
+        friend std::ostream& operator<<(std::ostream& out, const resize_to_& item)
+        {
+            out << "resize_to ("
+                << "nr=" << NR_
+                << ", nc=" << NC_
+                << ")";
+            return out;
+        }
+        
+        friend void to_xml(const resize_to_& item, std::ostream& out)
+        {
+            out << "<resize_to";
+            out << " nr='" << NR_ << "'" ;
+            out << " nc='" << NC_ << "'/>\n";
+        }
+    private:
+        resizable_tensor params;
+        double scale_y;
+        double scale_x;
+    
+    };  // end of class resize_to_
+    
+    
+    template <
+        long NR,
+        long NC,
+        typename SUBNET
+        >
+    using resize_to = add_layer<resize_to_<NR,NC>, SUBNET>;
+    
 // ----------------------------------------------------------------------------------------

    template <

--- a/dlib/dnn/layers_abstract.h
+++ b/dlib/dnn/layers_abstract.h
@@ -1191,7 +1191,7 @@ namespace dlib
                    - OUT.num_samples() == IN.num_samples()
                    - OUT.k()  == IN.k() 
                    - OUT.nr() == IN.nr()*scale_y
-                    - OUT.nc() == IN.nr()*scale_x
+                    - OUT.nc() == IN.nc()*scale_x
                    - for all valid i,k:  image_plane(OUT,i,k) is a copy of
                      image_plane(IN,i,k) that has been bilinearly interpolated to fit into
                      the shape of image_plane(OUT,i,k).
@@ -1224,6 +1224,62 @@ namespace dlib
        >
    using upsample = add_layer<upsample_<scale,scale>, SUBNET>;

+// ----------------------------------------------------------------------------------------
+
+    template <
+        long NR_, 
+        long NC_
+        >
+    class resize_to_
+    {
+        /*!
+            REQUIREMENTS ON THE INPUT ARGUMENTS
+                - NR_ >= 1
+                - NC_ >= 1
+
+            WHAT THIS OBJECT REPRESENTS
+                This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
+                defined above.  In particular, it allows you to resize a layer using
+                bilinear interpolation.  To be very specific, it resizes each of the
+                channels in an input tensor.  Therefore, if IN is the input tensor to this
+                layer and OUT the output tensor, then we will have:
+                    - OUT.num_samples() == IN.num_samples()
+                    - OUT.k()  == IN.k() 
+                    - OUT.nr() == NR_
+                    - OUT.nc() == NC_
+                    - for all valid i,k:  image_plane(OUT,i,k) is a copy of
+                      image_plane(IN,i,k) that has been bilinearly interpolated to fit into
+                      the shape of image_plane(OUT,i,k).
+        !*/
+    public:
+
+        resize_to_(
+        );
+        /*!
+            ensures
+                - This object has no state, so the constructor does nothing, aside from
+                  providing default constructability.
+        !*/
+
+        template <typename SUBNET> void setup (const SUBNET& sub);
+        template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output);
+        template <typename SUBNET> void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad);
+        dpoint map_input_to_output(dpoint p) const;
+        dpoint map_output_to_input(dpoint p) const;
+        const tensor& get_layer_params() const; 
+        tensor& get_layer_params(); 
+        /*!
+            These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface.
+        !*/
+    };
+
+    template <
+        long NR,
+        long NC,
+        typename SUBNET
+        >
+    using resize_to = add_layer<resize_to_<NR,NC>, SUBNET>;
+    
 // ----------------------------------------------------------------------------------------

    class dropout_

--- a/dlib/test/dnn.cpp
+++ b/dlib/test/dnn.cpp
@@ -1658,6 +1658,24 @@ namespace
            auto res = test_layer(l);
            DLIB_TEST_MSG(res, res);
        }
+        {
+            print_spinner();
+            resize_to_<1,1> l;
+            auto res = test_layer(l);
+            DLIB_TEST_MSG(res, res);
+        }
+        {
+            print_spinner();
+            resize_to_<2,1> l;
+            auto res = test_layer(l);
+            DLIB_TEST_MSG(res, res);
+        }
+        {
+            print_spinner();
+            resize_to_<2,2> l;
+            auto res = test_layer(l);
+            DLIB_TEST_MSG(res, res);
+        }
        {
            print_spinner();
            l2normalize_ l;