Made extract_fhog_features() faster by using simd instructions. Also added an

option to zero pad the borders of the output to it's easier to filter.

Made extract_fhog_features() faster by using simd instructions. Also added an
option to zero pad the borders of the output to it's easier to filter.
cbc79bcf · Davis King · 2895d425 · cbc79bcf · cbc79bcf
Commit cbc79bcf authored Nov 07, 2013 by Davis King
Show whitespace changes
Inline Side-by-side

Showing with 456 additions and 95 deletions

fhog.h dlib/image_transforms/fhog.h +391 -80

fhog_abstract.h dlib/image_transforms/fhog_abstract.h +65 -15

No files found.
--- a/dlib/image_transforms/fhog.h
+++ b/dlib/image_transforms/fhog.h
@@ -11,6 +11,8 @@
 #include "assign_image.h"
 #include "draw.h"
 #include "interpolation.h"
+#include "../simd/simd4i.h"
+#include "../simd/simd4f.h"
 namespace dlib
 {
@@ -57,6 +59,89 @@ namespace dlib
            }
        }
+        template <typename image_type>
+        inline typename dlib::enable_if_c<pixel_traits<typename image_type::type>::rgb>::type get_gradient (
+            const int r,
+            const int c,
+            const image_type& img,
+            simd4f& grad_x,
+            simd4f& grad_y,
+            simd4f& len
+        )
+        {
+            simd4i rleft((int)img[r][c-1].red, 
+                        (int)img[r][c].red,
+                        (int)img[r][c+1].red,
+                        (int)img[r][c+2].red);
+            simd4i rright((int)img[r][c+1].red, 
+                         (int)img[r][c+2].red,
+                         (int)img[r][c+3].red,
+                         (int)img[r][c+4].red);
+            simd4i rtop((int)img[r-1][c].red,
+                       (int)img[r-1][c+1].red,
+                       (int)img[r-1][c+2].red,
+                       (int)img[r-1][c+3].red);
+            simd4i rbottom((int)img[r+1][c].red,
+                          (int)img[r+1][c+1].red,
+                          (int)img[r+1][c+2].red,
+                          (int)img[r+1][c+3].red);
+            simd4i gleft((int)img[r][c-1].green, 
+                        (int)img[r][c].green,
+                        (int)img[r][c+1].green,
+                        (int)img[r][c+2].green);
+            simd4i gright((int)img[r][c+1].green, 
+                         (int)img[r][c+2].green,
+                         (int)img[r][c+3].green,
+                         (int)img[r][c+4].green);
+            simd4i gtop((int)img[r-1][c].green,
+                       (int)img[r-1][c+1].green,
+                       (int)img[r-1][c+2].green,
+                       (int)img[r-1][c+3].green);
+            simd4i gbottom((int)img[r+1][c].green,
+                          (int)img[r+1][c+1].green,
+                          (int)img[r+1][c+2].green,
+                          (int)img[r+1][c+3].green);
+            simd4i bleft((int)img[r][c-1].blue, 
+                        (int)img[r][c].blue,
+                        (int)img[r][c+1].blue,
+                        (int)img[r][c+2].blue);
+            simd4i bright((int)img[r][c+1].blue, 
+                         (int)img[r][c+2].blue,
+                         (int)img[r][c+3].blue,
+                         (int)img[r][c+4].blue);
+            simd4i btop((int)img[r-1][c].blue,
+                       (int)img[r-1][c+1].blue,
+                       (int)img[r-1][c+2].blue,
+                       (int)img[r-1][c+3].blue);
+            simd4i bbottom((int)img[r+1][c].blue,
+                          (int)img[r+1][c+1].blue,
+                          (int)img[r+1][c+2].blue,
+                          (int)img[r+1][c+3].blue);
+            simd4i grad_x_red   = rright-rleft;
+            simd4i grad_y_red   = rbottom-rtop;
+            simd4i grad_x_green = gright-gleft;
+            simd4i grad_y_green = gbottom-gtop;
+            simd4i grad_x_blue  = bright-bleft;
+            simd4i grad_y_blue  = bbottom-btop;
+            simd4i rlen = grad_x_red*grad_x_red + grad_y_red*grad_y_red;
+            simd4i glen = grad_x_green*grad_x_green + grad_y_green*grad_y_green;
+            simd4i blen = grad_x_blue*grad_x_blue + grad_y_blue*grad_y_blue;
+            simd4i cmp = rlen>glen;
+            simd4i tgrad_x = select(cmp,grad_x_red,grad_x_green);
+            simd4i tgrad_y = select(cmp,grad_y_red,grad_y_green);
+            simd4i tlen = select(cmp,rlen,glen);
+            cmp = tlen>blen;
+            grad_x = select(cmp,tgrad_x,grad_x_blue);
+            grad_y = select(cmp,tgrad_y,grad_y_blue);
+            len = select(cmp,tlen,blen);
+        }
    // ------------------------------------------------------------------------------------
        template <typename image_type>
@@ -73,10 +158,44 @@ namespace dlib
            len = length_squared(grad);
        }
+        template <typename image_type>
+        inline typename dlib::disable_if_c<pixel_traits<typename image_type::type>::rgb>::type get_gradient (
+            int r,
+            int c,
+            const image_type& img,
+            simd4f& grad_x,
+            simd4f& grad_y,
+            simd4f& len
+        )
+        {
+            simd4i left((int)get_pixel_intensity(img[r][c-1]), 
+                        (int)get_pixel_intensity(img[r][c]),
+                        (int)get_pixel_intensity(img[r][c+1]),
+                        (int)get_pixel_intensity(img[r][c+2]));
+            simd4i right((int)get_pixel_intensity(img[r][c+1]), 
+                         (int)get_pixel_intensity(img[r][c+2]),
+                         (int)get_pixel_intensity(img[r][c+3]),
+                         (int)get_pixel_intensity(img[r][c+4]));
+            simd4i top((int)get_pixel_intensity(img[r-1][c]),
+                       (int)get_pixel_intensity(img[r-1][c+1]),
+                       (int)get_pixel_intensity(img[r-1][c+2]),
+                       (int)get_pixel_intensity(img[r-1][c+3]));
+            simd4i bottom((int)get_pixel_intensity(img[r+1][c]),
+                          (int)get_pixel_intensity(img[r+1][c+1]),
+                          (int)get_pixel_intensity(img[r+1][c+2]),
+                          (int)get_pixel_intensity(img[r+1][c+3]));
+            grad_x = right-left;
+            grad_y = bottom-top;
+            len = (grad_x*grad_x + grad_y*grad_y);
+        }
    // ------------------------------------------------------------------------------------
        template <typename T, typename mm1, typename mm2>
-        void set_hog (
+        inline void set_hog (
            dlib::array<array2d<T,mm1>,mm2>& hog,
            int o,
            int x, 
@@ -91,21 +210,29 @@ namespace dlib
        void init_hog (
            dlib::array<array2d<T,mm1>,mm2>& hog,
            int hog_nr,
-            int hog_nc
+            int hog_nc,
+            int filter_rows_padding,
+            int filter_cols_padding
        )
        {
            const int num_hog_bands = 27+4;
            hog.resize(num_hog_bands);
            for (int i = 0; i < num_hog_bands; ++i)
            {
-                hog[i].set_size(hog_nr, hog_nc);
+                hog[i].set_size(hog_nr+filter_rows_padding-1, hog_nc+filter_cols_padding-1);
+                rectangle rect = get_rect(hog[i]);
+                rect.top() +=   (filter_rows_padding-1)/2;
+                rect.left() +=  (filter_cols_padding-1)/2;
+                rect.right() -= filter_cols_padding/2;
+                rect.bottom() -= filter_rows_padding/2;
+                zero_border_pixels(hog[i],rect);
            }
        }
    // ------------------------------------------------------------------------------------
        template <typename T, typename mm>
-        void set_hog (
+        inline void set_hog (
            array2d<matrix<T,31,1>,mm>& hog,
            int o,
            int x, 
@@ -120,10 +247,25 @@ namespace dlib
        void init_hog (
            array2d<matrix<T,31,1>,mm>& hog,
            int hog_nr,
-            int hog_nc
+            int hog_nc,
+            int filter_rows_padding,
+            int filter_cols_padding
        )
        {
-            hog.set_size(hog_nr, hog_nc);
+            hog.set_size(hog_nr+filter_rows_padding-1, hog_nc+filter_cols_padding-1);
+            // now zero out the border region
+            rectangle rect = get_rect(hog);
+            rect.top() +=   (filter_rows_padding-1)/2;
+            rect.left() +=  (filter_cols_padding-1)/2;
+            rect.right() -= filter_cols_padding/2;
+            rect.bottom() -= filter_rows_padding/2;
+            border_enumerator be(get_rect(hog),rect);
+            while (be.move_next())
+            {
+                const point p = be.element();
+                set_all_elements(hog[p.y()][p.x()], 0); 
+            }
        }
    // ------------------------------------------------------------------------------------
@@ -135,9 +277,22 @@ namespace dlib
        void impl_extract_fhog_features(
            const image_type& img, 
            out_type& hog, 
-            int cell_size
+            int cell_size,
+            int filter_rows_padding,
+            int filter_cols_padding
        ) 
        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT( cell_size > 0 &&
+                         filter_rows_padding > 0 &&
+                         filter_cols_padding > 0 ,
+                "\t void extract_fhog_features()"
+                << "\n\t Invalid inputs were given to this function. "
+                << "\n\t cell_size: " << cell_size 
+                << "\n\t filter_rows_padding: " << filter_rows_padding 
+                << "\n\t filter_cols_padding: " << filter_cols_padding 
+                );
            /*
                This function implements the HOG feature extraction method described in 
                the paper:
@@ -199,7 +354,11 @@ namespace dlib
                return;
            }
-            array2d<matrix<float,18,1> > hist(cells_nr, cells_nc);
+            // We give hist extra padding around the edges (1 cell all the way around the
+            // edge) so we can avoid needing to do boundary checks when indexing into it
+            // later on.  So some statements assign to the boundary but those values are
+            // never used.
+            array2d<matrix<float,18,1> > hist(cells_nr+2, cells_nc+2);
            for (long r = 0; r < hist.nr(); ++r)
            {
                for (long c = 0; c < hist.nc(); ++c)
@@ -219,23 +378,98 @@ namespace dlib
                hog.clear();
                return;
            }
-            init_hog(hog, hog_nr, hog_nc);
+            const int padding_rows_offset = (filter_rows_padding-1)/2;
+            const int padding_cols_offset = (filter_cols_padding-1)/2;
+            init_hog(hog, hog_nr, hog_nc, filter_rows_padding, filter_cols_padding);
-            const int visible_nr = cells_nr*cell_size;
+            const int visible_nr = std::min((long)cells_nr*cell_size,img.nr())-1;
-            const int visible_nc = cells_nc*cell_size;
+            const int visible_nc = std::min((long)cells_nc*cell_size,img.nc())-1;
            // First populate the gradient histograms
-            for (int y = 1; y < visible_nr-1; y++) 
+            for (int y = 1; y < visible_nr; y++) 
+            {
+                const double yp = ((double)y+0.5)/(double)cell_size - 0.5;
+                const int iyp = (int)std::floor(yp);
+                const double vy0 = yp-iyp;
+                const double vy1 = 1.0-vy0;
+                int x;
+                for (x = 1; x < visible_nc-3; x+=4) 
+                {
+                    simd4f xx(x,x+1,x+2,x+3);
+                    // v will be the length of the gradient vectors.
+                    simd4f grad_x, grad_y, v;
+                    get_gradient(y,x,img,grad_x,grad_y,v);
+                    // We will use bilinear interpolation to add into the histogram bins.
+                    // So first we precompute the values needed to determine how much each
+                    // pixel votes into each bin.
+                    simd4f xp = (xx+0.5)/(float)cell_size - 0.5;
+                    simd4f ixp = floor(xp);
+                    simd4f vx0 = xp-ixp;
+                    simd4f vx1 = 1.0f-vx0;
+                    v = sqrt(v);
+                    // Now snap the gradient to one of 18 orientations
+                    simd4f best_dot = 0;
+                    simd4f best_o = 0;
+                    for (int o = 0; o < 9; o++) 
                    {
-                const int r = std::min<int>(y, img.nr()-2);
+                        simd4f dot = grad_x*directions[o](0) + grad_y*directions[o](1);
-                for (int x = 1; x < visible_nc-1; x++) 
+                        simd4f_bool cmp = dot>best_dot;
+                        best_dot = select(cmp,dot,best_dot); 
+                        dot *= -1;
+                        best_o = select(cmp,o,best_o);
+                        cmp = dot>best_dot;
+                        best_dot = select(cmp,dot,best_dot);
+                        best_o = select(cmp,o+9,best_o);
+                    }
+                    // Add the gradient magnitude, v, to 4 histograms around pixel using
+                    // bilinear interpolation.
+                    vx1 *= v;
+                    vx0 *= v;
+                    // The amounts for each bin
+                    simd4f v11 = vy1*vx1;
+                    simd4f v01 = vy0*vx1;
+                    simd4f v10 = vy1*vx0;
+                    simd4f v00 = vy0*vx0;
+                    float _best_o[4]; best_o.store(_best_o);
+                    float _ixp[4];    ixp.store(_ixp);
+                    float _v11[4];    v11.store(_v11);
+                    float _v01[4];    v01.store(_v01);
+                    float _v10[4];    v10.store(_v10);
+                    float _v00[4];    v00.store(_v00);
+                    hist[iyp+1]  [_ixp[0]+1](_best_o[0]) += _v11[0];
+                    hist[iyp+1+1][_ixp[0]+1](_best_o[0]) += _v01[0];
+                    hist[iyp+1]  [_ixp[0]+2](_best_o[0]) += _v10[0];
+                    hist[iyp+1+1][_ixp[0]+2](_best_o[0]) += _v00[0];
+                    hist[iyp+1]  [_ixp[1]+1](_best_o[1]) += _v11[1];
+                    hist[iyp+1+1][_ixp[1]+1](_best_o[1]) += _v01[1];
+                    hist[iyp+1]  [_ixp[1]+2](_best_o[1]) += _v10[1];
+                    hist[iyp+1+1][_ixp[1]+2](_best_o[1]) += _v00[1];
+                    hist[iyp+1]  [_ixp[2]+1](_best_o[2]) += _v11[2];
+                    hist[iyp+1+1][_ixp[2]+1](_best_o[2]) += _v01[2];
+                    hist[iyp+1]  [_ixp[2]+2](_best_o[2]) += _v10[2];
+                    hist[iyp+1+1][_ixp[2]+2](_best_o[2]) += _v00[2];
+                    hist[iyp+1]  [_ixp[3]+1](_best_o[3]) += _v11[3];
+                    hist[iyp+1+1][_ixp[3]+1](_best_o[3]) += _v01[3];
+                    hist[iyp+1]  [_ixp[3]+2](_best_o[3]) += _v10[3];
+                    hist[iyp+1+1][_ixp[3]+2](_best_o[3]) += _v00[3];
+                }
+                // Now process the right columns that don't fit into simd registers.
+                for (; x < visible_nc; x++) 
                {
-                    const int c = std::min<int>(x, img.nc()-2);
                    matrix<double,2,1> grad;
                    double v;
-                    get_gradient(r,c,img,grad,v);
+                    get_gradient(y,x,img,grad,v);
-                    v = std::sqrt(v);
                    // snap to one of 18 orientations
                    double best_dot = 0;
@@ -255,27 +489,17 @@ namespace dlib
                        }
                    }
+                    v = std::sqrt(v);
                    // add to 4 histograms around pixel using bilinear interpolation
-                    double xp = ((double)x+0.5)/(double)cell_size - 0.5;
+                    const double xp = ((double)x+0.5)/(double)cell_size - 0.5;
-                    double yp = ((double)y+0.5)/(double)cell_size - 0.5;
+                    const int ixp = (int)std::floor(xp);
-                    int ixp = (int)std::floor(xp);
+                    const double vx0 = xp-ixp;
-                    int iyp = (int)std::floor(yp);
+                    const double vx1 = 1.0-vx0;
-                    double vx0 = xp-ixp;
-                    double vy0 = yp-iyp;
-                    double vx1 = 1.0-vx0;
-                    double vy1 = 1.0-vy0;
-                    if (ixp >= 0 && iyp >= 0) 
-                        hist[iyp][ixp](best_o) += vy1*vx1*v;
-                    if (iyp+1 < cells_nr && ixp >= 0) 
+                    hist[iyp+1][ixp+1](best_o) += vy1*vx1*v;
-                        hist[iyp+1][ixp](best_o) += vy0*vx1*v;
+                    hist[iyp+1+1][ixp+1](best_o) += vy0*vx1*v;
+                    hist[iyp+1][ixp+1+1](best_o) += vy1*vx0*v;
-                    if (iyp >= 0 && ixp+1 < cells_nc) 
+                    hist[iyp+1+1][ixp+1+1](best_o) += vy0*vx0*v;
-                        hist[iyp][ixp+1](best_o) += vy1*vx0*v;
-                    if (ixp+1 < cells_nc && iyp+1 < cells_nr) 
-                        hist[iyp+1][ixp+1](best_o) += vy0*vx0*v;
                }
            }
@@ -286,7 +510,7 @@ namespace dlib
                {
                    for (int o = 0; o < 9; o++) 
                    {
-                        norm[r][c] += (hist[r][c](o) + hist[r][c](o+9)) * (hist[r][c](o) + hist[r][c](o+9));
+                        norm[r][c] += (hist[r+1][c+1](o) + hist[r+1][c+1](o+9)) * (hist[r+1][c+1](o) + hist[r+1][c+1](o+9));
                    }
                }
            }
@@ -295,6 +519,7 @@ namespace dlib
            // compute features
            for (int y = 0; y < hog_nr; y++) 
            {
+                const int yy = y+padding_rows_offset; 
                for (int x = 0; x < hog_nc; x++) 
                {
                    const double nn1 = 0.2*std::sqrt(norm[y+1][x+1] + norm[y+1][x+2] + norm[y+2][x+1] + norm[y+2][x+2] + eps);
@@ -311,14 +536,16 @@ namespace dlib
                    double t3 = 0;
                    double t4 = 0;
+                    const int xx = x+padding_cols_offset; 
                    // contrast-sensitive features
                    for (int o = 0; o < 18; o++) 
                    {
-                        double h1 = std::min<double>(hist[y+1][x+1](o) , nn1)*n1;
+                        double h1 = std::min<double>(hist[y+1+1][x+1+1](o) , nn1)*n1;
-                        double h2 = std::min<double>(hist[y+1][x+1](o) , nn2)*n2;
+                        double h2 = std::min<double>(hist[y+1+1][x+1+1](o) , nn2)*n2;
-                        double h3 = std::min<double>(hist[y+1][x+1](o) , nn3)*n3;
+                        double h3 = std::min<double>(hist[y+1+1][x+1+1](o) , nn3)*n3;
-                        double h4 = std::min<double>(hist[y+1][x+1](o) , nn4)*n4;
+                        double h4 = std::min<double>(hist[y+1+1][x+1+1](o) , nn4)*n4;
-                        set_hog(hog,o,x,y, (h1 + h2 + h3 + h4));
+                        set_hog(hog,o,xx,yy, (h1 + h2 + h3 + h4));
                        t1 += h1;
                        t2 += h2;
                        t3 += h3;
@@ -328,19 +555,19 @@ namespace dlib
                    // contrast-insensitive features
                    for (int o = 0; o < 9; o++) 
                    {
-                        double sum = hist[y+1][x+1](o) + hist[y+1][x+1](o+9);
+                        double sum = hist[y+1+1][x+1+1](o) + hist[y+1+1][x+1+1](o+9);
                        double h1 = std::min(sum , nn1)*n1;
                        double h2 = std::min(sum , nn2)*n2;
                        double h3 = std::min(sum , nn3)*n3;
                        double h4 = std::min(sum , nn4)*n4;
-                        set_hog(hog,o+18,x,y, (h1 + h2 + h3 + h4));
+                        set_hog(hog,o+18,xx,yy, (h1 + h2 + h3 + h4));
                    }
                    // texture features
-                    set_hog(hog,27,x,y, 2*0.2357 * t1);
+                    set_hog(hog,27,xx,yy, 2*0.2357 * t1);
-                    set_hog(hog,28,x,y, 2*0.2357 * t2);
+                    set_hog(hog,28,xx,yy, 2*0.2357 * t2);
-                    set_hog(hog,29,x,y, 2*0.2357 * t3);
+                    set_hog(hog,29,xx,yy, 2*0.2357 * t3);
-                    set_hog(hog,30,x,y, 2*0.2357 * t4);
+                    set_hog(hog,30,xx,yy, 2*0.2357 * t4);
                }
            }
        }
@@ -383,10 +610,12 @@ namespace dlib
    void extract_fhog_features(
        const image_type& img, 
        dlib::array<array2d<T,mm1>,mm2>& hog, 
-        int cell_size = 8
+        int cell_size = 8,
+        int filter_rows_padding = 1,
+        int filter_cols_padding = 1
    ) 
    {
-        return impl_fhog::impl_extract_fhog_features(img, hog, cell_size);
+        return impl_fhog::impl_extract_fhog_features(img, hog, cell_size, filter_rows_padding, filter_cols_padding);
    }
    template <
@@ -397,10 +626,12 @@ namespace dlib
    void extract_fhog_features(
        const image_type& img, 
        array2d<matrix<T,31,1>,mm>& hog, 
-        int cell_size = 8
+        int cell_size = 8,
+        int filter_rows_padding = 1,
+        int filter_cols_padding = 1
    ) 
    {
-        return impl_fhog::impl_extract_fhog_features(img, hog, cell_size);
+        return impl_fhog::impl_extract_fhog_features(img, hog, cell_size, filter_rows_padding, filter_cols_padding);
    }
 // ----------------------------------------------------------------------------------------
@@ -408,52 +639,105 @@ namespace dlib
    inline point image_to_fhog (
        point p,
-        int cell_size = 8
+        int cell_size = 8,
+        int filter_rows_padding = 1,
+        int filter_cols_padding = 1
    )
    {
-        // There is a one pixel border around the imag.
+        // make sure requires clause is not broken
+        DLIB_ASSERT( cell_size > 0 &&
+            filter_rows_padding > 0 &&
+            filter_cols_padding > 0 ,
+            "\t point image_to_fhog()"
+            << "\n\t Invalid inputs were given to this function. "
+            << "\n\t cell_size: " << cell_size 
+            << "\n\t filter_rows_padding: " << filter_rows_padding 
+            << "\n\t filter_cols_padding: " << filter_cols_padding 
+        );
+        // There is a one pixel border around the image.
        p -= point(1,1);
        // There is also a 1 "cell" border around the HOG image formation.
-        return p/cell_size - point(1,1);
+        return p/cell_size - point(1,1) + point((filter_cols_padding-1)/2,(filter_rows_padding-1)/2);
    }
 // ----------------------------------------------------------------------------------------
    inline rectangle image_to_fhog (
        const rectangle& rect,
-        int cell_size = 8
+        int cell_size = 8,
+        int filter_rows_padding = 1,
+        int filter_cols_padding = 1
    ) 
    {
-        return rectangle(image_to_fhog(rect.tl_corner(),cell_size),
+        // make sure requires clause is not broken
-                         image_to_fhog(rect.br_corner(),cell_size));
+        DLIB_ASSERT( cell_size > 0 &&
+            filter_rows_padding > 0 &&
+            filter_cols_padding > 0 ,
+            "\t rectangle image_to_fhog()"
+            << "\n\t Invalid inputs were given to this function. "
+            << "\n\t cell_size: " << cell_size 
+            << "\n\t filter_rows_padding: " << filter_rows_padding 
+            << "\n\t filter_cols_padding: " << filter_cols_padding 
+        );
+        return rectangle(image_to_fhog(rect.tl_corner(),cell_size,filter_rows_padding,filter_cols_padding),
+                         image_to_fhog(rect.br_corner(),cell_size,filter_rows_padding,filter_cols_padding));
    }
 // ----------------------------------------------------------------------------------------
    inline point fhog_to_image (
        point p,
-        int cell_size = 8
+        int cell_size = 8,
+        int filter_rows_padding = 1,
+        int filter_cols_padding = 1
    )
    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT( cell_size > 0 &&
+            filter_rows_padding > 0 &&
+            filter_cols_padding > 0 ,
+            "\t point fhog_to_image()"
+            << "\n\t Invalid inputs were given to this function. "
+            << "\n\t cell_size: " << cell_size 
+            << "\n\t filter_rows_padding: " << filter_rows_padding 
+            << "\n\t filter_cols_padding: " << filter_cols_padding 
+        );
        // Convert to image space and then set to the center of the cell.
        point offset;
+        p = (p+point(1,1)-point((filter_cols_padding-1)/2,(filter_rows_padding-1)/2))*cell_size + point(1,1);
        if (p.x() >= 0 && p.y() >= 0) offset = point(cell_size/2,cell_size/2);
        if (p.x() <  0 && p.y() >= 0) offset = point(-cell_size/2,cell_size/2);
        if (p.x() >= 0 && p.y() <  0) offset = point(cell_size/2,-cell_size/2);
        if (p.x() <  0 && p.y() <  0) offset = point(-cell_size/2,-cell_size/2);
-        return (p+point(1,1))*cell_size + point(1,1) + offset;
+        return p + offset;
    }
 // ----------------------------------------------------------------------------------------
    inline rectangle fhog_to_image (
        const rectangle& rect,
-        int cell_size = 8
+        int cell_size = 8,
+        int filter_rows_padding = 1,
+        int filter_cols_padding = 1
    ) 
    {
-        return rectangle(fhog_to_image(rect.tl_corner(),cell_size),
+        // make sure requires clause is not broken
-                         fhog_to_image(rect.br_corner(),cell_size));
+        DLIB_ASSERT( cell_size > 0 &&
+            filter_rows_padding > 0 &&
+            filter_cols_padding > 0 ,
+            "\t rectangle fhog_to_image()"
+            << "\n\t Invalid inputs were given to this function. "
+            << "\n\t cell_size: " << cell_size 
+            << "\n\t filter_rows_padding: " << filter_rows_padding 
+            << "\n\t filter_cols_padding: " << filter_cols_padding 
+        );
+        return rectangle(fhog_to_image(rect.tl_corner(),cell_size,filter_rows_padding,filter_cols_padding),
+                         fhog_to_image(rect.br_corner(),cell_size,filter_rows_padding,filter_cols_padding));
    }
 // ----------------------------------------------------------------------------------------
@@ -466,25 +750,35 @@ namespace dlib
        >
    matrix<unsigned char> draw_fhog(
        const dlib::array<array2d<T,mm1>,mm2>& hog,
-        const long w = 15
+        const long cell_draw_size = 15
    )
    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT( cell_draw_size > 0 && hog.size()==31,
+            "\t matrix<unsigned char> draw_fhog()"
+            << "\n\t Invalid inputs were given to this function. "
+            << "\n\t cell_draw_size: " << cell_draw_size 
+            << "\n\t hog.size(): " << hog.size() 
+        );
        dlib::array<matrix<float> > mbars;
-        impl_fhog::create_fhog_bar_images(mbars,w);
+        impl_fhog::create_fhog_bar_images(mbars,cell_draw_size);
        // now draw the bars onto the HOG cells
-        matrix<float> himg(hog[0].nr()*w, hog[0].nc()*w);
+        matrix<float> himg(hog[0].nr()*cell_draw_size, hog[0].nc()*cell_draw_size);
        himg = 0;
        for (unsigned long d = 0; d < mbars.size(); ++d)
        {
-            for (long r = 0; r < himg.nr(); r+=w)
+            for (long r = 0; r < himg.nr(); r+=cell_draw_size)
            {
-                for (long c = 0; c < himg.nc(); c+=w)
+                for (long c = 0; c < himg.nc(); c+=cell_draw_size)
                {
-                    const float val = hog[d][r/w][c/w] + hog[d+mbars.size()][r/w][c/w] + hog[d+mbars.size()*2][r/w][c/w];
+                    const float val = hog[d][r/cell_draw_size][c/cell_draw_size] +
+                        hog[d+mbars.size()][r/cell_draw_size][c/cell_draw_size] +
+                        hog[d+mbars.size()*2][r/cell_draw_size][c/cell_draw_size];
                    if (val > 0)
                    {
-                        set_subm(himg, r, c, w, w) += val*mbars[d%mbars.size()];
+                        set_subm(himg, r, c, cell_draw_size, cell_draw_size) += val*mbars[d%mbars.size()];
                    }
                }
            }
@@ -501,9 +795,17 @@ namespace dlib
        >
    matrix<unsigned char> draw_fhog (
        const std::vector<matrix<T> >& hog,
-        const long w = 15
+        const long cell_draw_size = 15
    )
    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT( cell_draw_size > 0 && hog.size()==31,
+            "\t matrix<unsigned char> draw_fhog()"
+            << "\n\t Invalid inputs were given to this function. "
+            << "\n\t cell_draw_size: " << cell_draw_size 
+            << "\n\t hog.size(): " << hog.size() 
+        );
        // Just convert the input into the right object and then call the above draw_fhog()
        // function on it.
        dlib::array<array2d<T> > temp(hog.size());
@@ -518,7 +820,7 @@ namespace dlib
                }
            }
        }
-        return draw_fhog(temp,w);
+        return draw_fhog(temp,cell_draw_size);
    }
 // ----------------------------------------------------------------------------------------
@@ -529,25 +831,34 @@ namespace dlib
        >
    matrix<unsigned char> draw_fhog(
        const array2d<matrix<T,31,1>,mm>& hog,
-        const long w = 15
+        const long cell_draw_size = 15
    )
    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT( cell_draw_size > 0,
+            "\t matrix<unsigned char> draw_fhog()"
+            << "\n\t Invalid inputs were given to this function. "
+            << "\n\t cell_draw_size: " << cell_draw_size 
+        );
        dlib::array<matrix<float> > mbars;
-        impl_fhog::create_fhog_bar_images(mbars,w);
+        impl_fhog::create_fhog_bar_images(mbars,cell_draw_size);
        // now draw the bars onto the HOG cells
-        matrix<float> himg(hog.nr()*w, hog.nc()*w);
+        matrix<float> himg(hog.nr()*cell_draw_size, hog.nc()*cell_draw_size);
        himg = 0;
        for (unsigned long d = 0; d < mbars.size(); ++d)
        {
-            for (long r = 0; r < himg.nr(); r+=w)
+            for (long r = 0; r < himg.nr(); r+=cell_draw_size)
            {
-                for (long c = 0; c < himg.nc(); c+=w)
+                for (long c = 0; c < himg.nc(); c+=cell_draw_size)
                {
-                    const float val = hog[r/w][c/w](d) + hog[r/w][c/w](d+mbars.size()) + hog[r/w][c/w](d+mbars.size()*2);
+                    const float val = hog[r/cell_draw_size][c/cell_draw_size](d) +
+                        hog[r/cell_draw_size][c/cell_draw_size](d+mbars.size()) +
+                        hog[r/cell_draw_size][c/cell_draw_size](d+mbars.size()*2);
                    if (val > 0)
                    {
-                        set_subm(himg, r, c, w, w) += val*mbars[d%mbars.size()];
+                        set_subm(himg, r, c, cell_draw_size, cell_draw_size) += val*mbars[d%mbars.size()];
                    }
                }
            }

--- a/dlib/image_transforms/fhog_abstract.h
+++ b/dlib/image_transforms/fhog_abstract.h
@@ -20,11 +20,15 @@ namespace dlib
    void extract_fhog_features(
        const image_type& img, 
        array2d<matrix<T,31,1>,mm>& hog, 
-        int cell_size = 8
+        int cell_size = 8,
+        int filter_rows_padding = 1,
+        int filter_cols_padding = 1
    );
    /*!
        requires
            - cell_size > 0
+            - filter_rows_padding > 0
+            - filter_cols_padding > 0
            - in_image_type  == is an implementation of array2d/array2d_kernel_abstract.h
            - img contains some kind of pixel type. 
              (i.e. pixel_traits<typename in_image_type::type> is defined)
@@ -40,11 +44,29 @@ namespace dlib
            - The input image is broken into cells that are cell_size by cell_size pixels
              and within each cell we compute a 31 dimensional FHOG vector.  This vector
              describes the gradient structure within the cell.  
-            - #hog.nr() is approximately equal to img.nr()/cell_size.
+            - A common task is to convolve each channel of the hog image with a linear
-            - #hog.nc() is approximately equal to img.nc()/cell_size.
+              filter.  This is made more convenient if the contents of #hog includes extra
+              rows and columns of zero padding along the borders.  This extra padding
+              allows for more efficient convolution code since the code does not need to
+              perform expensive boundary checking.  Therefore, you can set
+              filter_rows_padding and filter_cols_padding to indicate the size of the
+              filter you wish to use and this function will ensure #hog has the appropriate
+              extra zero padding along the borders.  In particular, it will include the
+              following extra padding:
+                - (filter_rows_padding-1)/2 extra rows of zeros on the top of #hog.
+                - (filter_cols_padding-1)/2 extra columns of zeros on the left of #hog.
+                - filter_rows_padding/2 extra rows of zeros on the bottom of #hog.
+                - filter_cols_padding/2 extra columns of zeros on the right of #hog.
+              Therefore, the extra padding is done such that functions like
+              spatially_filter_image() apply their filters to the entire content containing
+              area of a hog image (note that you should use the following planar version of
+              extract_fhog_features() instead of the interlaced version if you want to use
+              spatially_filter_image() on a hog image).
+            - #hog.nr() is approximately equal to img.nr()/cell_size + filter_rows_padding-1.
+            - #hog.nc() is approximately equal to img.nc()/cell_size + filter_cols_padding-1.
            - for all valid r and c:
-                - #hog[r][c] == the FHOG vector describing the cell centered at the pixel
+                - #hog[r][c] == the FHOG vector describing the cell centered at the pixel location 
-                  location fhog_to_image(point(c,r),cell_size) in img.
+                  fhog_to_image(point(c,r),cell_size,filter_rows_padding,filter_cols_padding) in img.
    !*/
 // ----------------------------------------------------------------------------------------
@@ -58,11 +80,15 @@ namespace dlib
    void extract_fhog_features(
        const image_type& img, 
        dlib::array<array2d<T,mm1>,mm2>& hog, 
-        int cell_size = 8
+        int cell_size = 8,
+        int filter_rows_padding = 1,
+        int filter_cols_padding = 1
    );
    /*!
        requires
            - cell_size > 0
+            - filter_rows_padding > 0
+            - filter_cols_padding > 0
            - in_image_type  == is an implementation of array2d/array2d_kernel_abstract.h
            - img contains some kind of pixel type. 
              (i.e. pixel_traits<typename in_image_type::type> is defined)
@@ -83,11 +109,15 @@ namespace dlib
    inline point image_to_fhog (
        point p,
-        int cell_size = 8
+        int cell_size = 8,
+        int filter_rows_padding = 1,
+        int filter_cols_padding = 1
    );
    /*!
        requires
            - cell_size > 0
+            - filter_rows_padding > 0
+            - filter_cols_padding > 0
        ensures
            - When using extract_fhog_features(), each FHOG cell is extracted from a
              certain region in the input image.  image_to_fhog() returns the identity of
@@ -98,56 +128,75 @@ namespace dlib
              might not have corresponding feature locations.  E.g. border points or points
              outside the image.  In these cases the returned point will be outside the
              input image.
+            - Note that you should use the same values of cell_size, filter_rows_padding,
+              and filter_cols_padding that you used with extract_fhog_features().
    !*/
 // ----------------------------------------------------------------------------------------
    inline rectangle image_to_fhog (
        const rectangle& rect,
-        int cell_size = 8
+        int cell_size = 8,
+        int filter_rows_padding = 1,
+        int filter_cols_padding = 1
    );
    /*!
        requires
            - cell_size > 0
+            - filter_rows_padding > 0
+            - filter_cols_padding > 0
        ensures
            - maps a rectangle from image space to fhog space.  In particular this function returns:
-              rectangle(image_to_fhog(rect.tl_corner(),cell_size), image_to_fhog(rect.br_corner(),cell_size))
+              rectangle(image_to_fhog(rect.tl_corner(),cell_size,filter_rows_padding,filter_cols_padding), 
+                        image_to_fhog(rect.br_corner(),cell_size,filter_rows_padding,filter_cols_padding))
    !*/
 // ----------------------------------------------------------------------------------------
    inline point fhog_to_image (
        point p,
-        int cell_size = 8
+        int cell_size = 8,
+        int filter_rows_padding = 1,
+        int filter_cols_padding = 1
    );
    /*!
        requires
            - cell_size > 0
+            - filter_rows_padding > 0
+            - filter_cols_padding > 0
        ensures
            - Maps a pixel in a FHOG image (produced by extract_fhog_features()) back to the
              corresponding original input pixel.  Note that since FHOG images are
              spatially downsampled by aggregation into cells the mapping is not totally
              invertible.  Therefore, the returned location will be the center of the cell
              in the original image that contained the FHOG vector at position p.  Moreover,
-              cell_size should be set to the value used by the call to extract_fhog_features().
+              cell_size, filter_rows_padding, and filter_cols_padding should be set to the
+              values used by the call to extract_fhog_features().
            - Mapping from fhog space to image space is an invertible transformation.  That
-              is, for any point P we have P == image_to_fhog(fhog_to_image(P,cell_size),cell_size).
+              is, for any point P we have P == image_to_fhog(fhog_to_image(P,cell_size,filter_rows_padding,filter_cols_padding),
+                                                             cell_size,filter_rows_padding,filter_cols_padding).
    !*/
 // ----------------------------------------------------------------------------------------
    inline rectangle fhog_to_image (
        const rectangle& rect,
-        int cell_size = 8
+        int cell_size = 8,
+        int filter_rows_padding = 1,
+        int filter_cols_padding = 1
    );
    /*!
        requires
            - cell_size > 0
+            - filter_rows_padding > 0
+            - filter_cols_padding > 0
        ensures
            - maps a rectangle from fhog space to image space.  In particular this function returns:
-              rectangle(fhog_to_image(rect.tl_corner(),cell_size), fhog_to_image(rect.br_corner(),cell_size))
+              rectangle(fhog_to_image(rect.tl_corner(),cell_size,filter_rows_padding,filter_cols_padding), 
+                        fhog_to_image(rect.br_corner(),cell_size,filter_rows_padding,filter_cols_padding))
            - Mapping from fhog space to image space is an invertible transformation.  That
-              is, for any rectangle R we have R == image_to_fhog(fhog_to_image(R,cell_size),cell_size).
+              is, for any rectangle R we have R == image_to_fhog(fhog_to_image(R,cell_size,filter_rows_padding,filter_cols_padding),
+                                                                 cell_size,filter_rows_padding,filter_cols_padding).
    !*/
 // ----------------------------------------------------------------------------------------
@@ -187,6 +236,7 @@ namespace dlib
    /*!
        requires
            - cell_draw_size > 0
+            - hog.size() == 31
        ensures
            - This function just converts the given hog object into an array<array2d<T>>
              and passes it to the above draw_fhog() routine and returns the results.