Added a padding parameter to the pooling and convolution classes. Still need

to expose it in the final layer interface.

Added a padding parameter to the pooling and convolution classes. Still need
to expose it in the final layer interface.
73d78355 · Davis King · 08c87784 · 73d78355 · 73d78355 · 73d78355
Commit 73d78355 authored May 04, 2016 by Davis King
7 changed files
--- a/dlib/dnn/cpu_dlib.cpp
+++ b/dlib/dnn/cpu_dlib.cpp
@@ -1245,7 +1245,7 @@ namespace dlib
    // ------------------------------------------------------------------------------------

        pooling::pooling (
-        ) : window_height(0),window_width(0),stride_y(0),stride_x(0),do_max_pooling(true)
+        ) : window_height(0),window_width(0),stride_y(0),stride_x(0),padding_y(0),padding_x(0),do_max_pooling(true)
        {
        }

@@ -1257,6 +1257,8 @@ namespace dlib
            window_width = 0;
            stride_y = 0;
            stride_x = 0;
+            padding_y = 0;
+            padding_x = 0;
        }

        void pooling::
@@ -1264,18 +1266,24 @@ namespace dlib
            int window_height_,
            int window_width_,
            int stride_y_,
-            int stride_x_
+            int stride_x_,
+            int padding_y_,
+            int padding_x_
        )
        {
            DLIB_CASSERT(window_width_ > 0,"");
            DLIB_CASSERT(window_height_ > 0,"");
            DLIB_CASSERT(stride_y_ > 0,"");
            DLIB_CASSERT(stride_x_ > 0,"");
+            DLIB_CASSERT(0 <= padding_y_ && padding_y_ < window_height_,"");
+            DLIB_CASSERT(0 <= padding_x_ && padding_x_ < window_width_, "");

            window_height = window_height_;
            window_width = window_width_;
            stride_y = stride_y_;
            stride_x = stride_x_;
+            padding_y = padding_y_;
+            padding_x = padding_x_;
            do_max_pooling = true;
        }

@@ -1284,18 +1292,24 @@ namespace dlib
            int window_height_,
            int window_width_,
            int stride_y_,
-            int stride_x_
+            int stride_x_,
+            int padding_y_,
+            int padding_x_
        )
        {
            DLIB_CASSERT(window_width_ > 0,"");
            DLIB_CASSERT(window_height_ > 0,"");
            DLIB_CASSERT(stride_y_ > 0,"");
            DLIB_CASSERT(stride_x_ > 0,"");
+            DLIB_CASSERT(0 <= padding_y_ && padding_y_ < window_height_,"");
+            DLIB_CASSERT(0 <= padding_x_ && padding_x_ < window_width_, "");

            window_height = window_height_;
            window_width = window_width_;
            stride_y = stride_y_;
            stride_x = stride_x_;
+            padding_y = padding_y_;
+            padding_x = padding_x_;
            do_max_pooling = false;
        }

@@ -1309,12 +1323,18 @@ namespace dlib
            DLIB_CASSERT(window_height > 0,"");
            DLIB_CASSERT(stride_y > 0,"");
            DLIB_CASSERT(stride_x > 0,"");
+            DLIB_CASSERT(0 <= padding_y && padding_y < window_height,"");
+            DLIB_CASSERT(0 <= padding_x && padding_x < window_width, "");
+            DLIB_CASSERT(window_width  <= src.nc() + 2*padding_x,
+                "Pooling windows must be small enough to fit into the padded image.");
+            DLIB_CASSERT(window_height <= src.nr() + 2*padding_y,
+                "Pooling windows must be small enough to fit into the padded image.");

            dest.set_size(
                 src.num_samples(),
                 src.k(),
-                 1+(src.nr()-window_height%2)/stride_y,
-                 1+(src.nc()-window_width%2)/stride_x
+                 1+(src.nr()+2*padding_y-window_height)/stride_y,
+                 1+(src.nc()+2*padding_x-window_width)/stride_x
                );

            if (src.size() == 0)
@@ -1326,6 +1346,8 @@ namespace dlib

            auto d = dest.host();
            auto s = src.host();
+            const long x_offset = window_width/2 - padding_x;
+            const long y_offset = window_height/2 - padding_y;
            if (does_max_pooling())
            {
                for (long n = 0; n < dest.num_samples(); ++n)
@@ -1339,8 +1361,8 @@ namespace dlib
                        {
                            for (long c = 0; c < dest.nc(); ++c)
                            {
-                                auto win = centered_rect(c*stride_x,
-                                    r*stride_y,
+                                auto win = centered_rect(c*stride_x+x_offset,
+                                    r*stride_y+y_offset,
                                    window_width,
                                    window_height);
                                dimg[r*dest.nc() + c] = max(subm_clipped(simg,win));
@@ -1362,8 +1384,8 @@ namespace dlib
                        {
                            for (long c = 0; c < dest.nc(); ++c)
                            {
-                                auto win = centered_rect(c*stride_x,
-                                    r*stride_y,
+                                auto win = centered_rect(c*stride_x+x_offset,
+                                    r*stride_y+y_offset,
                                    window_width,
                                    window_height);
                                dimg[r*dest.nc() + c] = mean(subm_clipped(simg,win));
@@ -1395,6 +1417,8 @@ namespace dlib
            auto gi = gradient_input.host();
            auto g = grad.host();
            auto s = src.host();
+            const long x_offset = window_width/2 - padding_x;
+            const long y_offset = window_height/2 - padding_y;
            if (does_max_pooling())
            {
                for (long n = 0; n < dest.num_samples(); ++n)
@@ -1410,8 +1434,8 @@ namespace dlib
                        {
                            for (long c = 0; c < dest.nc(); ++c)
                            {
-                                auto win = centered_rect(c*stride_x,
-                                    r*stride_y,
+                                auto win = centered_rect(c*stride_x+x_offset,
+                                    r*stride_y+y_offset,
                                    window_width,
                                    window_height).intersect(imgbox);
                                auto p = max_point(subm(simg,win))+win.tl_corner();
@@ -1436,8 +1460,8 @@ namespace dlib
                        {
                            for (long c = 0; c < dest.nc(); ++c)
                            {
-                                auto win = centered_rect(c*stride_x,
-                                    r*stride_y,
+                                auto win = centered_rect(c*stride_x+x_offset,
+                                    r*stride_y+y_offset,
                                    window_width,
                                    window_height).intersect(imgbox);
                                const float delta = giimg[r*dest.nc()+c]/win.area();
@@ -1467,14 +1491,16 @@ namespace dlib
            long filter_nr,
            long filter_nc,
            long stride_y,
-            long stride_x
+            long stride_x,
+            long padding_y,
+            long padding_x
        )
        {
            const auto d = data.host() + data.k()*data.nr()*data.nc()*n;
            const rectangle boundary = get_rect(data);

-            const long out_nr = 1+(data.nr()-filter_nr%2)/stride_y;
-            const long out_nc = 1+(data.nc()-filter_nc%2)/stride_x;
+            const long out_nr = 1+(data.nr()+2*padding_y-filter_nr)/stride_y;
+            const long out_nc = 1+(data.nc()+2*padding_x-filter_nc)/stride_x;

            output.set_size(out_nr*out_nc, 
                            data.k()*filter_nr*filter_nc);
@@ -1483,9 +1509,9 @@ namespace dlib

            // now fill in the Toeplitz output matrix for the n-th sample in data.  
            size_t cnt = 0;
-            for (long r = -(1-filter_nr%2); r < data.nr(); r+=stride_y)
+            for (long r = filter_nr-1-padding_y; r-padding_y < data.nr(); r+=stride_y)
            {
-                for (long c = -(1-filter_nc%2); c < data.nc(); c+=stride_x)
+                for (long c = filter_nc-1-padding_x; c-padding_x < data.nc(); c+=stride_x)
                {
                    for (long k = 0; k < data.k(); ++k)
                    {
@@ -1493,9 +1519,9 @@ namespace dlib
                        {
                            for (long x = 0; x < filter_nc; ++x)
                            {
-                                DLIB_CASSERT(cnt < output.size(),"");
-                                long xx = c-x+filter_nc/2;
-                                long yy = r-y+filter_nr/2;
+                                DLIB_ASSERT(cnt < output.size(),"");
+                                long xx = c-x;
+                                long yy = r-y;
                                if (boundary.contains(xx,yy))
                                    *t = d[(k*data.nr() + yy)*data.nc() + xx];
                                else
@@ -1516,7 +1542,9 @@ namespace dlib
            long filter_nr,
            long filter_nc,
            long stride_y,
-            long stride_x
+            long stride_x,
+            long padding_y,
+            long padding_x
        )
        {
            const auto d = data.host() + data.k()*data.nr()*data.nc()*n;
@@ -1526,9 +1554,9 @@ namespace dlib
            const float* t = &output(0,0);

            // now fill in the Toeplitz output matrix for the n-th sample in data.  
-            for (long r = -(1-filter_nr%2); r < data.nr(); r+=stride_y)
+            for (long r = filter_nr-1-padding_y; r-padding_y < data.nr(); r+=stride_y)
            {
-                for (long c = -(1-filter_nc%2); c < data.nc(); c+=stride_x)
+                for (long c = filter_nc-1-padding_x; c-padding_x < data.nc(); c+=stride_x)
                {
                    for (long k = 0; k < data.k(); ++k)
                    {
@@ -1536,8 +1564,8 @@ namespace dlib
                        {
                            for (long x = 0; x < filter_nc; ++x)
                            {
-                                long xx = c-x+filter_nc/2;
-                                long yy = r-y+filter_nr/2;
+                                long xx = c-x;
+                                long yy = r-y;
                                if (boundary.contains(xx,yy))
                                    d[(k*data.nr() + yy)*data.nc() + xx] += *t;
                                ++t;
@@ -1553,28 +1581,38 @@ namespace dlib
            const tensor& data,
            const tensor& filters,
            int stride_y,
-            int stride_x
+            int stride_x,
+            int padding_y,
+            int padding_x
        )
        {
            DLIB_CASSERT(is_same_object(output,data) == false,"");
            DLIB_CASSERT(is_same_object(output,filters) == false,"");
            DLIB_CASSERT(filters.k() == data.k(),"");
            DLIB_CASSERT(stride_y > 0 && stride_x > 0,"");
+            DLIB_CASSERT(0 <= padding_y && padding_y < filters.nr(),"");
+            DLIB_CASSERT(0 <= padding_x && padding_x < filters.nc(),"");
+            DLIB_CASSERT(filters.nr() <= data.nr() + 2*padding_y,
+                "Filter windows must be small enough to fit into the padded image.");
+            DLIB_CASSERT(filters.nc() <= data.nc() + 2*padding_x,
+                "Filter windows must be small enough to fit into the padded image.");

            output.set_size(data.num_samples(),
                            filters.num_samples(),
-                            1+(data.nr()-filters.nr()%2)/stride_y,
-                            1+(data.nc()-filters.nc()%2)/stride_x);
+                            1+(data.nr()+2*padding_y-filters.nr())/stride_y,
+                            1+(data.nc()+2*padding_x-filters.nc())/stride_x);

            matrix<float> temp;
            for (long n = 0; n < data.num_samples(); ++n)
            {
-                img2col(temp, data, n, filters.nr(), filters.nc(), stride_y, stride_x);
+                img2col(temp, data, n, filters.nr(), filters.nc(), stride_y, stride_x, padding_y, padding_x);
                output.set_sample(n, mat(filters)*trans(temp));
            }

            last_stride_y = stride_y;
            last_stride_x = stride_x;
+            last_padding_y = padding_y;
+            last_padding_x = padding_x;
        }

    // ------------------------------------------------------------------------------------
@@ -1595,7 +1633,7 @@ namespace dlib
                                    

                temp = trans(gi)*mat(filters);
-                col2img(temp, data_gradient, n, filters.nr(), filters.nc(), last_stride_y, last_stride_x);
+                col2img(temp, data_gradient, n, filters.nr(), filters.nc(), last_stride_y, last_stride_x, last_padding_y, last_padding_x);
            }
        }

@@ -1616,7 +1654,7 @@ namespace dlib
                              gradient_input.nr()*gradient_input.nc());


-                img2col(temp, data, n, filters_gradient.nr(), filters_gradient.nc(), last_stride_y, last_stride_x);
+                img2col(temp, data, n, filters_gradient.nr(), filters_gradient.nc(), last_stride_y, last_stride_x, last_padding_y, last_padding_x);
                if (n == 0)
                    filters_gradient = gi*temp;
                else

--- a/dlib/dnn/cpu_dlib.h
+++ b/dlib/dnn/cpu_dlib.h
@@ -279,14 +279,18 @@ namespace dlib
                int window_height,
                int window_width,
                int stride_y,
-                int stride_x
+                int stride_x,
+                int padding_y,
+                int padding_x
            );

            void setup_avg_pooling(
                int window_height,
                int window_width,
                int stride_y,
-                int stride_x
+                int stride_x,
+                int padding_y,
+                int padding_x
            );

            bool does_max_pooling(
@@ -309,6 +313,8 @@ namespace dlib
            int window_width;
            int stride_y;
            int stride_x;
+            int padding_y;
+            int padding_x;
            bool do_max_pooling;

        };
@@ -331,7 +337,9 @@ namespace dlib
                const tensor& data,
                const tensor& filters,
                int stride_y,
-                int stride_x
+                int stride_x,
+                int padding_y,
+                int padding_x
            );

            void get_gradient_for_data (
@@ -350,6 +358,8 @@ namespace dlib

            long last_stride_y;
            long last_stride_x;
+            long last_padding_y;
+            long last_padding_x;
        };

    // -----------------------------------------------------------------------------------

--- a/dlib/dnn/cudnn_dlibapi.cpp
+++ b/dlib/dnn/cudnn_dlibapi.cpp
@@ -737,6 +737,8 @@ namespace dlib

            stride_y = 0;
            stride_x = 0;
+            padding_y = 0;
+            padding_x = 0;
            data_num_samples = 0;
            data_k = 0;
            data_nr = 0;
@@ -752,7 +754,9 @@ namespace dlib
            const tensor& data,
            const tensor& filters,
            int stride_y_,
-            int stride_x_
+            int stride_x_,
+            int padding_y_,
+            int padding_x_
        ) 
        {
            DLIB_CASSERT(data.k() == filters.k(),"");
@@ -761,6 +765,8 @@ namespace dlib
            // anything.
            if (stride_y_ == stride_y && 
                stride_x_ == stride_x &&
+                padding_y_ == padding_y && 
+                padding_x_ == padding_x &&
                data_num_samples == data.num_samples() &&
                data_k == data.k() &&
                data_nr == data.nr() &&
@@ -778,6 +784,8 @@ namespace dlib
            {
                stride_y = stride_y_;
                stride_x = stride_x_;
+                padding_y = padding_y_;
+                padding_x = padding_x_;
                data_num_samples = data.num_samples();
                data_k = data.k();
                data_nr = data.nr();
@@ -798,8 +806,8 @@ namespace dlib

                CHECK_CUDNN(cudnnCreateConvolutionDescriptor((cudnnConvolutionDescriptor_t*)&conv_handle));
                CHECK_CUDNN(cudnnSetConvolution2dDescriptor((cudnnConvolutionDescriptor_t)conv_handle,
-                        filters.nr()/2, // vertical padding
-                        filters.nc()/2, // horizontal padding
+                        padding_y, // vertical padding
+                        padding_x, // horizontal padding
                        stride_y,
                        stride_x,
                        1, 1, // must be 1,1
@@ -907,22 +915,31 @@ namespace dlib
            const tensor& data,
            const tensor& filters,
            int stride_y,
-            int stride_x
+            int stride_x,
+            int padding_y,
+            int padding_x
        )
        {
            DLIB_CASSERT(is_same_object(output,data) == false,"");
            DLIB_CASSERT(is_same_object(output,filters) == false,"");
            DLIB_CASSERT(filters.k() == data.k(),"");
            DLIB_CASSERT(stride_y > 0 && stride_x > 0,"");
+            DLIB_CASSERT(filters.nr() <= data.nr() + 2*padding_y,
+                "Filter windows must be small enough to fit into the padded image.");
+            DLIB_CASSERT(filters.nc() <= data.nc() + 2*padding_x,
+                "Filter windows must be small enough to fit into the padded image.");
+

-            setup(data,filters,stride_y,stride_x);
+            setup(data,filters,stride_y,stride_x,padding_y,padding_x);

            output.set_size(out_num_samples, out_k, out_nr, out_nc);

            DLIB_ASSERT(output.num_samples() == data.num_samples(),out_num_samples << "  " << data.num_samples());
            DLIB_ASSERT(output.k() == filters.num_samples(),"");
-            DLIB_ASSERT(output.nr() == 1+(data.nr()-filters.nr()%2)/stride_y,"");
-            DLIB_ASSERT(output.nc() == 1+(data.nc()-filters.nc()%2)/stride_x,output.nc() << "  " <<1+(data.nc()-1)/stride_x << " : " << data.nc() << "  " << stride_x);
+            DLIB_ASSERT(output.nr() == 1+(data.nr()+2*padding_y-filters.nr())/stride_y,"");
+            DLIB_ASSERT(output.nc() == 1+(data.nc()+2*padding_x-filters.nc())/stride_x,"");
+
+

            const float alpha = 1;
            const float beta = 0;
@@ -995,7 +1012,7 @@ namespace dlib
    // ------------------------------------------------------------------------------------

        pooling::pooling (
-        ) : handle(nullptr),window_height(0),window_width(0),stride_y(0),stride_x(0)
+        ) : handle(nullptr),window_height(0),window_width(0),stride_y(0),stride_x(0),padding_y(0), padding_x(0)
        {
        }

@@ -1016,6 +1033,8 @@ namespace dlib
            window_width = 0;
            stride_y = 0;
            stride_x = 0;
+            padding_y = 0;
+            padding_x = 0;
        }

        void pooling::
@@ -1023,10 +1042,12 @@ namespace dlib
            int window_height_,
            int window_width_,
            int stride_y_,
-            int stride_x_
+            int stride_x_,
+            int padding_y_,
+            int padding_x_ 
        )
        {
-            setup(window_height_, window_width_, stride_y_, stride_x_, CUDNN_POOLING_MAX);
+            setup(window_height_, window_width_, stride_y_, stride_x_, padding_y_, padding_x_, CUDNN_POOLING_MAX);
            do_max_pooling = true;
        }

@@ -1035,10 +1056,12 @@ namespace dlib
            int window_height_,
            int window_width_,
            int stride_y_,
-            int stride_x_
+            int stride_x_,
+            int padding_y_,
+            int padding_x_
        )
        {
-            setup(window_height_, window_width_, stride_y_, stride_x_, CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING);
+            setup(window_height_, window_width_, stride_y_, stride_x_, padding_y_, padding_x_, CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING);
            do_max_pooling = false;
        }

@@ -1048,13 +1071,31 @@ namespace dlib
            int window_width_,
            int stride_y_,
            int stride_x_,
+            int padding_y_,
+            int padding_x_,
            int pooling_mode
        )
        {
+            DLIB_CASSERT (window_height_ > 0 && window_width_ > 0 && 
+                          stride_y_ > 0 && stride_x_ > 0 , 
+                          "window_height_: " << window_height_ 
+                          << "\t\n window_width_: " << window_width_ 
+                          << "\t\n stride_y_: " << stride_y_ 
+                          << "\t\n stride_x_: " << stride_x_ );
+            DLIB_CASSERT( 0 <= padding_y_ && padding_y_ < window_height_ && 
+                          0 <= padding_x_ && padding_x_ < window_width_,
+                          "window_height_: " << window_height_ 
+                          << "\t\n window_width_: " << window_width_ 
+                          << "\t\n padding_y_: " << padding_y_ 
+                          << "\t\n padding_x_: " << padding_x_ );
+
            if (window_height == window_height_ &&
                window_width  == window_width_ &&
                stride_y == stride_y_ &&
-                stride_x == stride_x_ )
+                stride_x == stride_x_ && 
+                padding_y == padding_y_ &&
+                padding_x == padding_x_
+                )
            {
                return;
            }
@@ -1066,6 +1107,8 @@ namespace dlib
                window_width = window_width_;
                stride_x = stride_x_;
                stride_y = stride_y_;
+                padding_y  = padding_y_;
+                padding_x  = padding_x_;
                cudnnPoolingDescriptor_t poolingDesc;
                CHECK_CUDNN(cudnnCreatePoolingDescriptor(&poolingDesc));
                handle = poolingDesc;
@@ -1075,8 +1118,8 @@ namespace dlib
                                                CUDNN_PROPAGATE_NAN,
                                                window_height,
                                                window_width,
-                                                window_height/2,
-                                                window_width/2,  
+                                                padding_y,
+                                                padding_x,  
                                                stride_y,
                                                stride_x));
            }
@@ -1093,6 +1136,10 @@ namespace dlib
            const tensor& src
        )
        {
+            DLIB_CASSERT(window_width  <= src.nc() + 2*padding_x,
+                "Pooling windows must be small enough to fit into the padded image.");
+            DLIB_CASSERT(window_height <= src.nr() + 2*padding_y,
+                "Pooling windows must be small enough to fit into the padded image.");
            const float alpha = 1;
            const float beta = 0;
            int outN;
@@ -1111,14 +1158,16 @@ namespace dlib

            DLIB_CASSERT(dest.num_samples() == src.num_samples(),"");
            DLIB_CASSERT(dest.k() == src.k(),"");
-            DLIB_CASSERT(dest.nr() == 1+(src.nr()-window_height%2)/stride_y, 
-                "\n stride_y: " << stride_y  <<
+            DLIB_CASSERT(dest.nr() == 1 + (src.nr() + 2*padding_y - window_height)/stride_y, 
+                "\n stride_y:  " << stride_y  <<
+                "\n padding_y: " << padding_y  <<
                "\n window_height: " << window_height  <<
                "\n src.nr(): " << src.nr()  <<
                "\n dest.nr(): " << dest.nr()  <<
                "\n src.nr()/stride_y: " <<  src.nr()/stride_y); 
-            DLIB_CASSERT(dest.nc() == 1+(src.nc()-window_width%2)/stride_x, 
-                "\n stride_x: " << stride_x  <<
+            DLIB_CASSERT(dest.nc() == 1 + (src.nc() + 2*padding_x - window_width)/stride_x, 
+                "\n stride_x:  " << stride_x  <<
+                "\n padding_x: " << padding_x  <<
                "\n window_width: " << window_width  <<
                "\n src.nc(): " << src.nc()  <<
                "\n dest.nc(): " << dest.nc()  <<

--- a/dlib/dnn/cudnn_dlibapi.h
+++ b/dlib/dnn/cudnn_dlibapi.h
@@ -221,12 +221,16 @@ namespace dlib
                const tensor& data,
                const tensor& filters,
                int stride_y,
-                int stride_x
+                int stride_x,
+                int padding_y,
+                int padding_x
            );
            /*!
                requires
                    - stride_y > 0
                    - stride_x > 0
+                    - 0 <= padding_y < filters.nr()
+                    - 0 <= padding_x < filters.nc()
                    - is_same_object(output,data) == false
                    - is_same_object(output,filters) == false
                ensures
@@ -286,18 +290,24 @@ namespace dlib
                const tensor& data,
                const tensor& filters,
                int stride_y,
-                int stride_x
+                int stride_x,
+                int padding_y,
+                int padding_x
            );
            /*!
                requires
                    - filters.k() == data.k()
                    - stride_y > 0
                    - stride_x > 0
+                    - 0 <= padding_y < filters.nr()
+                    - 0 <= padding_x < filters.nc()
            !*/

            // These variables record the type of data given to the last call to setup().
            int stride_y;
            int stride_x;
+            int padding_y;
+            int padding_x;
            long data_num_samples, data_k, data_nr, data_nc;
            long filters_num_samples, filters_k, filters_nr, filters_nc;

@@ -346,14 +356,18 @@ namespace dlib
                int window_height,
                int window_width,
                int stride_y,
-                int stride_x
+                int stride_x,
+                int padding_y,
+                int padding_x
            );

            void setup_avg_pooling(
                int window_height,
                int window_width,
                int stride_y,
-                int stride_x
+                int stride_x,
+                int padding_y,
+                int padding_x
            );

            bool does_max_pooling(
@@ -378,6 +392,8 @@ namespace dlib
                int window_width,
                int stride_y,
                int stride_x,
+                int padding_y,
+                int padding_x,
                int pooling_mode
            );

@@ -386,6 +402,8 @@ namespace dlib
            int window_width;
            int stride_y;
            int stride_x;
+            int padding_y;
+            int padding_x;
            bool do_max_pooling;
        };


--- a/dlib/dnn/layers.h
+++ b/dlib/dnn/layers.h
@@ -97,7 +97,10 @@ namespace dlib
                sub.get_output(),
                filters(params,0),
                _stride_y,
-                _stride_x);
+                _stride_x,
+                _nr/2,
+                _nc/2
+                );

            tt::add(1,output,1,biases(params,filters.size()));
        } 
@@ -221,7 +224,7 @@ namespace dlib
        {
            // this->mp is non-copyable so we have to write our own copy to avoid trying to
            // copy it and getting an error.
-            mp.setup_max_pooling(_nr, _nc, _stride_y, _stride_x);
+            mp.setup_max_pooling(_nr, _nc, _stride_y, _stride_x, _nr/2, _nc/2);
        }

        max_pool_& operator= (
@@ -233,14 +236,14 @@ namespace dlib

            // this->mp is non-copyable so we have to write our own copy to avoid trying to
            // copy it and getting an error.
-            mp.setup_max_pooling(_nr, _nc, _stride_y, _stride_x);
+            mp.setup_max_pooling(_nr, _nc, _stride_y, _stride_x, _nr/2, _nc/2);
            return *this;
        }

        template <typename SUBNET>
        void setup (const SUBNET& /*sub*/)
        {
-            mp.setup_max_pooling(_nr, _nc, _stride_y, _stride_x);
+            mp.setup_max_pooling(_nr, _nc, _stride_y, _stride_x, _nr/2, _nc/2);
        }

        template <typename SUBNET>
@@ -274,7 +277,7 @@ namespace dlib
            if (version != "max_pool_")
                throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::max_pool_.");

-            item.mp.setup_max_pooling(_nr, _nc, _stride_y, _stride_x);
+            item.mp.setup_max_pooling(_nr, _nc, _stride_y, _stride_x, _nr/2, _nc/2);

            long nr;
            long nc;
@@ -349,7 +352,7 @@ namespace dlib
        {
            // this->ap is non-copyable so we have to write our own copy to avoid trying to
            // copy it and getting an error.
-            ap.setup_avg_pooling(_nr, _nc, _stride_y, _stride_x);
+            ap.setup_avg_pooling(_nr, _nc, _stride_y, _stride_x, _nr/2, _nc/2);
        }

        avg_pool_& operator= (
@@ -361,14 +364,14 @@ namespace dlib

            // this->ap is non-copyable so we have to write our own copy to avoid trying to
            // copy it and getting an error.
-            ap.setup_avg_pooling(_nr, _nc, _stride_y, _stride_x);
+            ap.setup_avg_pooling(_nr, _nc, _stride_y, _stride_x, _nr/2, _nc/2);
            return *this;
        }

        template <typename SUBNET>
        void setup (const SUBNET& /*sub*/)
        {
-            ap.setup_avg_pooling(_nr, _nc, _stride_y, _stride_x);
+            ap.setup_avg_pooling(_nr, _nc, _stride_y, _stride_x, _nr/2, _nc/2);
        }

        template <typename SUBNET>
@@ -402,7 +405,7 @@ namespace dlib
            if (version != "avg_pool_")
                throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::avg_pool_.");

-            item.ap.setup_avg_pooling(_nr, _nc, _stride_y, _stride_x);
+            item.ap.setup_avg_pooling(_nr, _nc, _stride_y, _stride_x, _nr/2, _nc/2);

            long nr;
            long nc;

--- a/dlib/dnn/tensor_tools.h
+++ b/dlib/dnn/tensor_tools.h
@@ -639,22 +639,28 @@ namespace dlib { namespace tt
            const tensor& data,
            const tensor& filters,
            int stride_y,
-            int stride_x
-        ) { impl(output,data,filters,stride_y,stride_x); }
+            int stride_x,
+            int padding_y,
+            int padding_x
+        ) { impl(output,data,filters,stride_y,stride_x,padding_y,padding_x); }
        /*!
            requires
                - stride_y > 0
                - stride_x > 0
+                - 0 <= padding_y < filters.nr()
+                - 0 <= padding_x < filters.nc()
                - is_same_object(output,data) == false
                - is_same_object(output,filters) == false
                - filters.k() == data.k()
+                - filters.nr() <= src.nr() + 2*padding_y
+                - filters.nc() <= src.nc() + 2*padding_x
            ensures
                - convolves filters over data.  
                - filters contains filters.num_samples() filters. 
                - #output.num_samples() == data.num_samples()
                - #output.k() == filters.num_samples()
-                - #output.nr() == 1+(data.nr()-filters.nr()%2)/stride_y
-                - #output.nc() == 1+(data.nc()-filters.nc()%2)/stride_x
+                - #output.nr() == 1+(data.nr() + 2*padding_y - filters.nr())/stride_y
+                - #output.nc() == 1+(data.nc() + 2*padding_x - filters.nc())/stride_x
        !*/

        void get_gradient_for_data (
@@ -732,14 +738,18 @@ namespace dlib { namespace tt
            int window_height,
            int window_width,
            int stride_y,
-            int stride_x
-        ) { impl.setup_max_pooling(window_height, window_width, stride_y, stride_x); }
+            int stride_x,
+            int padding_y,
+            int padding_x
+        ) { impl.setup_max_pooling(window_height, window_width, stride_y, stride_x, padding_y, padding_x); }
        /*!
            requires
                - window_height > 0
                - window_width > 0
                - stride_y > 0
                - stride_x > 0
+                - 0 <= padding_y < window_height
+                - 0 <= padding_x < window_width
            ensures
                - When you call operator() it will do max pooling with the given
                  parameters.
@@ -749,14 +759,18 @@ namespace dlib { namespace tt
            int window_height,
            int window_width,
            int stride_y,
-            int stride_x
-        ) { impl.setup_avg_pooling(window_height, window_width, stride_y, stride_x); }
+            int stride_x,
+            int padding_y,
+            int padding_x
+        ) { impl.setup_avg_pooling(window_height, window_width, stride_y, stride_x, padding_y, padding_x); }
        /*!
            requires
                - window_height > 0
                - window_width > 0
                - stride_y > 0
                - stride_x > 0
+                - 0 <= padding_y < window_height
+                - 0 <= padding_x < window_width
            ensures
                - When you call operator() it will do average pooling with the given
                  parameters.
@@ -773,24 +787,22 @@ namespace dlib { namespace tt
            requires
                - is_same_object(dest,src) == false
                - either setup_max_pooling() or setup_avg_pooling() has been called.
+                - window_width  <= src.nc() + 2*padding_x
+                - window_height <= src.nr() + 2*padding_y
            ensures
                - #dest.num_samples() == src.num_samples()
                - #dest.k() == src.k()
-                - #dest.nr() == 1+(src.nr()-window_height%2)/stride_y
-                - #dest.nc() == 1+(src.nc()-window_width%2)/stride_x
+                - #dest.nr() == 1 + (src.nr() + 2*padding_y - window_height)/stride_y
+                - #dest.nc() == 1 + (src.nc() + 2*padding_x - window_width)/stride_x
+                - WINDOW == centered_rect(x*stride + window_width/2 - padding_x,
+                                          y*stride + window_height/2 - padding_y,
+                                          window_width,
+                                          window_height)
                - for all valid s, k, r, and c:
                    - if (does_max_pooling()) then
-                        - image_plane(#dest,s,k)(r,c) == max(subm_clipped(image_plane(src,s,k),
-                                                                      centered_rect(c*stride_x,
-                                                                                    r*stride_y,
-                                                                                    window_width,
-                                                                                    window_height)))
+                        - image_plane(#dest,s,k)(r,c) == max(subm_clipped(image_plane(src,s,k),WINDOW(c,r)))
                    - else
-                        - image_plane(#dest,s,k)(r,c) == mean(subm_clipped(image_plane(src,s,k),
-                                                                      centered_rect(c*stride_x,
-                                                                                    r*stride_y,
-                                                                                    window_width,
-                                                                                    window_height)))
+                        - image_plane(#dest,s,k)(r,c) == mean(subm_clipped(image_plane(src,s,k),WINDOW(c,r)))
        !*/

        void get_gradient(

--- a/dlib/test/dnn.cpp
+++ b/dlib/test/dnn.cpp
@@ -583,12 +583,19 @@ namespace

            const int stride_y = prnd.get_random_32bit_number()%5+1;
            const int stride_x = prnd.get_random_32bit_number()%5+1;
-            conv1(output1, data, filters, stride_y,stride_x);
-
-            conv2(output2, data, filters, stride_y,stride_x);
-
+            int padding_y = prnd.get_random_32bit_number()%(filters.nr()/2+1);
+            int padding_x = prnd.get_random_32bit_number()%(filters.nc()/2+1);
+            if (!(filters.nr() <= data.nr() + 2*padding_y))
+                padding_y = (filters.nr()-data.nr()+1)/2;
+            if (!(filters.nc() <= data.nc() + 2*padding_x))
+                padding_x = (filters.nc()-data.nc()+1)/2;
+            conv1(output1, data, filters, stride_y,stride_x, padding_y, padding_x);
+            conv2(output2, data, filters, stride_y,stride_x, padding_y, padding_x);
            dlog << LINFO << "forward error: "<< max(abs(mat(output1)-mat(output2)));
-            DLIB_TEST(max(abs(mat(output1)-mat(output2))) < 1e-3);
+            DLIB_TEST_MSG(max(abs(mat(output1)-mat(output2))) < 1e-3, max(abs(mat(output1)-mat(output2)))
+                 <<"\n\t padding_y: "<< padding_y 
+                 <<"\n\t padding_x: "<< padding_x 
+                 );



@@ -621,7 +628,7 @@ namespace
            conv2.get_gradient_for_filters(gi, data, filter_gradient2);

            dlog << LINFO << "filter gradient error: "<< max(abs(mat(filter_gradient1)-mat(filter_gradient2)));
-            DLIB_TEST(max(abs(mat(filter_gradient1)-mat(filter_gradient2))) < 1e-3);
+            DLIB_TEST_MSG(max(abs(mat(filter_gradient1)-mat(filter_gradient2))) < 1e-3, max(abs(mat(filter_gradient1)-mat(filter_gradient2))));
        }
    }

@@ -1026,12 +1033,14 @@ namespace
        const int window_height,
        const int window_width,
        const int stride_y,
-        const int stride_x 
+        const int stride_x,
+        const int padding_y,
+        const int padding_x
    )
    {
        print_spinner();
        resizable_tensor A, B, gradient_input;
-        A.set_size(2,2,16,7);
+        A.set_size(4,5,16,7);
        B.copy_size(A);
        gradient_input.copy_size(A);

@@ -1043,14 +1052,18 @@ namespace

        tt::pooling mp;

-        mp.setup_max_pooling(window_height,window_width,stride_y,stride_x);
+        mp.setup_max_pooling(window_height,window_width,stride_y,stride_x,padding_y,padding_x);
        mp(A, B);

        // make sure max pooling does what it's spec says it should.
        DLIB_TEST( A.num_samples() == B.num_samples());
        DLIB_TEST( A.k() == B.k());
-        DLIB_TEST( A.nr() == 1+(B.nr()-window_height%2)/stride_y);
-        DLIB_TEST( A.nc() == 1+(B.nc()-window_width%2)/stride_x);
+
+        DLIB_TEST( A.nr() == 1+(B.nr()+2*padding_y-window_height)/stride_y);
+        DLIB_TEST( A.nc() == 1+(B.nc()+2*padding_x-window_width)/stride_x);
+
+        const long x_offset = window_width/2 - padding_x;
+        const long y_offset = window_height/2 - padding_y;
        for (long s = 0; s < A.num_samples(); ++s)
        {
            for (long k = 0; k < A.k(); ++k)
@@ -1059,11 +1072,15 @@ namespace
                {
                    for (long c = 0; c < A.nc(); ++c)
                    {
-                        DLIB_TEST(image_plane(A,s,k)(r,c) == max(subm_clipped(image_plane(B,s,k),
-                                    centered_rect(c*stride_x,
-                                                  r*stride_y,
+                        DLIB_TEST_MSG(image_plane(A,s,k)(r,c) == max(subm_clipped(image_plane(B,s,k),
+                                    centered_rect(c*stride_x+x_offset,
+                                                  r*stride_y+y_offset,
                                                  window_width,
-                                                  window_height))));
+                                                  window_height))), 
+                                                  "padding: "<< padding_x << "  " << padding_y 
+                                                  << " window size: " << window_width << " " << window_height 
+                                                  << " stride: " << stride_x << " " << stride_y
+                                                  );
                    }
                }
            }
@@ -1076,12 +1093,14 @@ namespace
        const int window_height,
        const int window_width,
        const int stride_y,
-        const int stride_x 
+        const int stride_x,
+        const int padding_y,
+        const int padding_x
    )
    {
        print_spinner();
        resizable_tensor A, B, gradient_input;
-        A.set_size(2,2,16,7);
+        A.set_size(4,5,16,7);
        B.copy_size(A);
        gradient_input.copy_size(A);

@@ -1093,14 +1112,17 @@ namespace

        tt::pooling mp;

-        mp.setup_avg_pooling(window_height,window_width,stride_y,stride_x);
+        mp.setup_avg_pooling(window_height,window_width,stride_y,stride_x,padding_y,padding_x);
        mp(A, B);

        // make sure avg pooling does what it's spec says it should.
        DLIB_TEST( A.num_samples() == B.num_samples());
        DLIB_TEST( A.k() == B.k());
-        DLIB_TEST( A.nr() == 1+(B.nr()-window_height%2)/stride_y);
-        DLIB_TEST( A.nc() == 1+(B.nc()-window_width%2)/stride_x);
+        DLIB_TEST( A.nr() == 1+(B.nr()+2*padding_y-window_height)/stride_y);
+        DLIB_TEST( A.nc() == 1+(B.nc()+2*padding_x-window_width)/stride_x);
+
+        const long x_offset = window_width/2 - padding_x;
+        const long y_offset = window_height/2 - padding_y;
        for (long s = 0; s < A.num_samples(); ++s)
        {
            for (long k = 0; k < A.k(); ++k)
@@ -1110,8 +1132,8 @@ namespace
                    for (long c = 0; c < A.nc(); ++c)
                    {
                        float expected = mean(subm_clipped(image_plane(B,s,k),
-                                            centered_rect(c*stride_x,
-                                                        r*stride_y,
+                                            centered_rect(c*stride_x+x_offset,
+                                                        r*stride_y+y_offset,
                                                        window_width,
                                                        window_height)));
                        float err = abs(image_plane(A,s,k)(r,c) - expected);
@@ -1275,17 +1297,30 @@ namespace
            test_add();
            compare_adam();
 #endif
-            test_max_pool(1,1,2,3);
-            test_max_pool(3,3,1,1);
-            test_max_pool(3,3,2,2);
-            test_max_pool(2,2,2,2);
-            test_max_pool(4,5,3,1);
-            test_avg_pool(1,1,2,3);
-            test_avg_pool(3,3,1,1);
-            test_avg_pool(3,3,2,2);
-            test_avg_pool(2,2,2,2);
-            test_avg_pool(4,5,3,1);
-            test_avg_pool(100,100,100,100);
+            test_max_pool(1,1,2,3,0,0);
+            test_max_pool(3,3,1,1,0,0);
+            test_max_pool(3,3,2,2,0,0);
+            test_max_pool(2,2,2,2,0,0);
+            test_max_pool(4,5,3,1,0,0);
+            test_avg_pool(1,1,2,3,0,0);
+            test_avg_pool(3,3,1,1,0,0);
+            test_avg_pool(3,3,2,2,0,0);
+            test_avg_pool(2,2,2,2,0,0);
+            test_avg_pool(4,5,3,1,0,0);
+            test_avg_pool(4,4,2,2,0,0);
+            test_avg_pool(4,5,40,50,0,0);
+            test_max_pool(2,2,2,3,1,1);
+            test_max_pool(3,3,1,1,1,1);
+            test_max_pool(3,3,2,2,2,1);
+            test_max_pool(2,2,2,2,1,0);
+            test_max_pool(4,5,3,1,2,3);
+            test_avg_pool(1,1,2,3,0,0);
+            test_avg_pool(3,3,1,1,1,2);
+            test_avg_pool(3,3,2,2,2,1);
+            test_avg_pool(2,2,2,2,1,0);
+            test_avg_pool(4,5,3,1,2,4);
+            test_avg_pool(4,4,2,2,1,3);
+            test_avg_pool(4,5,40,50,0,1);
            test_tanh();
            test_softmax();
            test_sigmoid();