Pushed the padding parameters into the con_, max_pool_, and avg_pool_

interfaces. Also changed the default behavior when the stride isn't 1. Now the filters will be applied only to the "valid" part of the image.

Pushed the padding parameters into the con_, max_pool_, and avg_pool_
interfaces. Also changed the default behavior when the stride isn't 1. Now the filters will be applied only to the "valid" part of the image.
4ef5908b · Davis King · 6bab1f50 · 4ef5908b · 4ef5908b · 4ef5908b
Commit 4ef5908b authored May 05, 2016 by Davis King
Expand all Show whitespace changes
Inline Side-by-side

Showing with 87 additions and 16 deletions

layers.h dlib/dnn/layers.h +0 -0

layers_abstract.h dlib/dnn/layers_abstract.h +86 -15

dnn.cpp dlib/test/dnn.cpp +1 -1

No files found.
--- a/dlib/dnn/layers.h
+++ b/dlib/dnn/layers.h
--- a/dlib/dnn/layers_abstract.h
+++ b/dlib/dnn/layers_abstract.h
@@ -416,13 +416,18 @@ namespace dlib
        long _nr,
        long _nc,
        int _stride_y,
-        int _stride_x
+        int _stride_x,
+        int _padding_y = _stride_y!=1? 0 : _nr/2,
+        int _padding_x = _stride_x!=1? 0 : _nc/2
        >
    class con_
    {
        /*!
            REQUIREMENTS ON TEMPLATE ARGUMENTS
                All of them must be > 0.
+                Also, we require that:
+                    - 0 <= _padding_y && _padding_y < _nr
+                    - 0 <= _padding_x && _padding_x < _nc
            WHAT THIS OBJECT REPRESENTS
                This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
@@ -434,8 +439,8 @@ namespace dlib
                IN be the input tensor and OUT the output tensor):
                    - OUT.num_samples() == IN.num_samples()
                    - OUT.k()  == num_filters()
-                    - OUT.nr() == 1+(IN.nr()-nr()%2)/stride_y()
+                    - OUT.nr() == 1+(IN.nr() + 2*padding_y() - nr())/stride_y()
-                    - OUT.nc() == 1+(IN.nc()-nc()%2)/stride_x()
+                    - OUT.nc() == 1+(IN.nc() + 2*padding_x() - nc())/stride_x()
        !*/
    public:
@@ -448,6 +453,8 @@ namespace dlib
                - #nc() == _nc
                - #stride_y() == _stride_y
                - #stride_x() == _stride_x
+                - #padding_y() == _padding_y
+                - #padding_x() == _padding_x
        !*/
        long num_filters(
@@ -491,6 +498,22 @@ namespace dlib
                  time when it moves over the image.
        !*/
+        long padding_y(
+        ) const; 
+        /*!
+            ensures
+                - returns the number of pixels of zero padding added to the top and bottom
+                  sides of the image.
+        !*/
+        long padding_x(
+        ) const; 
+        /*!
+            ensures
+                - returns the number of pixels of zero padding added to the left and right 
+                  sides of the image.
+        !*/
        template <typename SUBNET> void setup (const SUBNET& sub);
        template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output);
        template <typename SUBNET> void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad);
@@ -813,13 +836,18 @@ namespace dlib
        long _nr,
        long _nc,
        int _stride_y,
-        int _stride_x
+        int _stride_x,
+        int _padding_y = _stride_y!=1? 0 : _nr/2,
+        int _padding_x = _stride_x!=1? 0 : _nc/2
        >
    class max_pool_
    {
        /*!
            REQUIREMENTS ON TEMPLATE ARGUMENTS
                All of them must be > 0.
+                Also, we require that:
+                    - 0 <= _padding_y && _padding_y < _nr
+                    - 0 <= _padding_x && _padding_x < _nc
            WHAT THIS OBJECT REPRESENTS
                This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
@@ -832,14 +860,14 @@ namespace dlib
                then OUT is defined as follows:
                    - OUT.num_samples() == IN.num_samples()
                    - OUT.k()  == IN.k()
-                    - OUT.nr() == 1+(IN.nr()-nr()%2)/stride_y()
+                    - OUT.nr() == 1+(IN.nr() + 2*padding_y() - nr())/stride_y()
-                    - OUT.nc() == 1+(IN.nc()-nc()%2)/stride_x()
+                    - OUT.nc() == 1+(IN.nc() + 2*padding_x() - nc())/stride_x()
                    - for all valid s, k, r, and c:
                        - image_plane(OUT,s,k)(r,c) == max(subm_clipped(image_plane(IN,s,k),
-                                                                  centered_rect(r*stride_y(),
+                                                                  centered_rect(x*stride_x() + nc()/2 - padding_x(),
-                                                                                c*stride_x(),
+                                                                                y*stride_y() + nr()/2 - padding_y(),
-                                                                                nr(),
+                                                                                nc(),
-                                                                                nc())))
+                                                                                nr())))
        !*/
    public:
@@ -852,6 +880,8 @@ namespace dlib
                - #nc() == _nc
                - #stride_y() == _stride_y
                - #stride_x() == _stride_x
+                - #padding_y() == _padding_y
+                - #padding_x() == _padding_x
        !*/
        long nr(
@@ -886,6 +916,22 @@ namespace dlib
                  at a time when it moves over the image.
        !*/
+        long padding_y(
+        ) const; 
+        /*!
+            ensures
+                - returns the number of pixels of zero padding added to the top and bottom
+                  sides of the image.
+        !*/
+        long padding_x(
+        ) const; 
+        /*!
+            ensures
+                - returns the number of pixels of zero padding added to the left and right 
+                  sides of the image.
+        !*/
        template <typename SUBNET> void setup (const SUBNET& sub);
        template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output);
        template <typename SUBNET> void backward(const tensor& computed_output, const tensor& gradient_input, SUBNET& sub, tensor& params_grad);
@@ -913,13 +959,18 @@ namespace dlib
        long _nr,
        long _nc,
        int _stride_y,
-        int _stride_x
+        int _stride_x,
+        int _padding_y = _stride_y!=1? 0 : _nr/2,
+        int _padding_x = _stride_x!=1? 0 : _nc/2
        >
    class avg_pool_
    {
        /*!
            REQUIREMENTS ON TEMPLATE ARGUMENTS
                All of them must be > 0.
+                Also, we require that:
+                    - 0 <= _padding_y && _padding_y < _nr
+                    - 0 <= _padding_x && _padding_x < _nc
            WHAT THIS OBJECT REPRESENTS
                This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
@@ -934,12 +985,14 @@ namespace dlib
                    - OUT.k()  == IN.k()
                    - OUT.nr() == 1+(IN.nr()-nr()%2)/stride_y()
                    - OUT.nc() == 1+(IN.nc()-nc()%2)/stride_x()
+                    - OUT.nr() == 1+(IN.nr() + 2*padding_y() - nr())/stride_y()
+                    - OUT.nc() == 1+(IN.nc() + 2*padding_x() - nc())/stride_x()
                    - for all valid s, k, r, and c:
                        - image_plane(OUT,s,k)(r,c) == mean(subm_clipped(image_plane(IN,s,k),
-                                                                  centered_rect(r*stride_y(),
+                                                                  centered_rect(x*stride_x() + nc()/2 - padding_x(),
-                                                                                c*stride_x(),
+                                                                                y*stride_y() + nr()/2 - padding_y(),
-                                                                                nr(),
+                                                                                nc(),
-                                                                                nc()))
+                                                                                nr())))
        !*/
    public:
@@ -952,6 +1005,8 @@ namespace dlib
                - #nc() == _nc
                - #stride_y() == _stride_y
                - #stride_x() == _stride_x
+                - #padding_y() == _padding_y
+                - #padding_x() == _padding_x
        !*/
        long nr(
@@ -986,6 +1041,22 @@ namespace dlib
                  at a time when it moves over the image.
        !*/
+        long padding_y(
+        ) const; 
+        /*!
+            ensures
+                - returns the number of pixels of zero padding added to the top and bottom
+                  sides of the image.
+        !*/
+        long padding_x(
+        ) const; 
+        /*!
+            ensures
+                - returns the number of pixels of zero padding added to the left and right 
+                  sides of the image.
+        !*/
        template <typename SUBNET> void setup (const SUBNET& sub);
        template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output);
        template <typename SUBNET> void backward(const tensor& computed_output, const tensor& gradient_input, SUBNET& sub, tensor& params_grad);

--- a/dlib/test/dnn.cpp
+++ b/dlib/test/dnn.cpp
@@ -1185,7 +1185,7 @@ namespace
        }
        {
            print_spinner();
-            con_<3,3,3,2,2> l;
+            con_<3,2,2,2,2> l;
            DLIB_TEST_MSG(test_layer(l), test_layer(l));
        }
        {