Filled out spec for the FHOG feature extraction routines.

4e373064 · Davis King · 52cefbe2 · 4e373064 · 4e373064
Commit 4e373064 authored Oct 01, 2013 by Davis King
Hide whitespace changes
Inline Side-by-side

Showing with 118 additions and 24 deletions

fhog.h dlib/image_transforms/fhog.h +11 -11

fhog_abstract.h dlib/image_transforms/fhog_abstract.h +107 -13

No files found.
--- a/dlib/image_transforms/fhog.h
+++ b/dlib/image_transforms/fhog.h
@@ -135,7 +135,7 @@ namespace dlib
        void impl_extract_fhog_features(
            const image_type& img, 
            out_type& hog, 
-            int bin_size
+            int cell_size
        ) 
        {
            /*
@@ -190,8 +190,8 @@ namespace dlib
            // First we allocate memory for caching orientation histograms & their norms.
-            const int cells_nr = (int)((double)img.nr()/(double)bin_size + 0.5);
+            const int cells_nr = (int)((double)img.nr()/(double)cell_size + 0.5);
-            const int cells_nc = (int)((double)img.nc()/(double)bin_size + 0.5);
+            const int cells_nc = (int)((double)img.nc()/(double)cell_size + 0.5);
            array2d<matrix<float,18,1> > hist(cells_nr, cells_nc);
            for (long r = 0; r < hist.nr(); ++r)
@@ -210,8 +210,8 @@ namespace dlib
            const int hog_nc = std::max(cells_nc-2, 0);
            init_hog(hog, hog_nr, hog_nc);
-            const int visible_nr = cells_nr*bin_size;
+            const int visible_nr = cells_nr*cell_size;
-            const int visible_nc = cells_nc*bin_size;
+            const int visible_nc = cells_nc*cell_size;
            // First populate the gradient histograms
            for (int y = 1; y < visible_nr-1; y++) 
@@ -245,8 +245,8 @@ namespace dlib
                    }
                    // add to 4 histograms around pixel using bilinear interpolation
-                    double xp = ((double)x+0.5)/(double)bin_size - 0.5;
+                    double xp = ((double)x+0.5)/(double)cell_size - 0.5;
-                    double yp = ((double)y+0.5)/(double)bin_size - 0.5;
+                    double yp = ((double)y+0.5)/(double)cell_size - 0.5;
                    int ixp = (int)std::floor(xp);
                    int iyp = (int)std::floor(yp);
                    double vx0 = xp-ixp;
@@ -370,10 +370,10 @@ namespace dlib
    void extract_fhog_features(
        const image_type& img, 
        dlib::array<array2d<T,mm1>,mm2>& hog, 
-        int bin_size = 8
+        int cell_size = 8
    ) 
    {
-        return impl_fhog::impl_extract_fhog_features(img, hog, bin_size);
+        return impl_fhog::impl_extract_fhog_features(img, hog, cell_size);
    }
    template <
@@ -384,10 +384,10 @@ namespace dlib
    void extract_fhog_features(
        const image_type& img, 
        array2d<matrix<T,31,1>,mm>& hog, 
-        int bin_size = 8
+        int cell_size = 8
    ) 
    {
-        return impl_fhog::impl_extract_fhog_features(img, hog, bin_size);
+        return impl_fhog::impl_extract_fhog_features(img, hog, cell_size);
    }
 // ----------------------------------------------------------------------------------------

--- a/dlib/image_transforms/fhog_abstract.h
+++ b/dlib/image_transforms/fhog_abstract.h
@@ -13,41 +13,110 @@ namespace dlib
 // ----------------------------------------------------------------------------------------
    template <
-        typename image_type,
+        typename image_type, 
        typename T, 
-        typename mm1, 
+        typename mm
-        typename mm2
        >
    void extract_fhog_features(
        const image_type& img, 
-        dlib::array<array2d<T,mm1>,mm2>& hog, 
+        array2d<matrix<T,31,1>,mm>& hog, 
-        int bin_size = 8
+        int cell_size = 8
    );
+    /*!
+        requires
+            - cell_size > 0
+            - in_image_type  == is an implementation of array2d/array2d_kernel_abstract.h
+            - img contains some kind of pixel type. 
+              (i.e. pixel_traits<typename in_image_type::type> is defined)
+            - T should be float or double
+        ensures
+            - This function implements the HOG feature extraction method described in 
+              the paper:
+                Object Detection with Discriminatively Trained Part Based Models by
+                P. Felzenszwalb, R. Girshick, D. McAllester, D. Ramanan
+                IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 32, No. 9, Sep. 2010
+              This means that it takes an input image img and outputs Felzenszwalb's
+              version of the HOG features, which are stored into #hog.
+            - The input image is broken into cells that are cell_size by cell_size pixels
+              and within each cell we compute a 31 dimensional FHOG vector.  This vector
+              describes the gradient structure within the cell.  
+            - #hog.nr() is approximately equal to img.nr()/cell_size.
+            - #hog.nc() is approximately equal to img.nc()/cell_size.
+            - for all valid r and c:
+                - #hog[r][c] == the FHOG vector describing the cell centered at the pixel
+                  location fhog_to_image(point(c,r),cell_size) in img.
+    !*/
+// ----------------------------------------------------------------------------------------
    template <
-        typename image_type, 
+        typename image_type,
        typename T, 
-        typename mm
+        typename mm1, 
+        typename mm2
        >
    void extract_fhog_features(
        const image_type& img, 
-        array2d<matrix<T,31,1>,mm>& hog, 
+        dlib::array<array2d<T,mm1>,mm2>& hog, 
-        int bin_size = 8
+        int cell_size = 8
    );
+    /*!
+        requires
+            - cell_size > 0
+            - in_image_type  == is an implementation of array2d/array2d_kernel_abstract.h
+            - img contains some kind of pixel type. 
+              (i.e. pixel_traits<typename in_image_type::type> is defined)
+            - T should be float or double
+        ensures
+            - This function is identical to the above extract_fhog_features() routine
+              except that it outputs the results in a planar format rather than the
+              interlaced format used above.  That is, each element of the hog vector is
+              placed into one of 31 images inside #hog.  To be precise, if vhog is the
+              output of the above interlaced version of extract_fhog_features() then we
+              will have, for all valid r and c:
+                - #hog[i][r][c] == vhog[r][c](i)
+                  (where 0 <= i < 31)
+            - #hog.size() == 31
+    !*/
 // ----------------------------------------------------------------------------------------
    inline point image_to_fhog (
        point p,
-        int bin_size 
+        int cell_size 
    );
+    /*!
+        requires
+            - cell_size > 0
+        ensures
+            - When using extract_fhog_features(), each FHOG cell is extracted from a
+              certain region in the input image.  image_to_fhog() returns the identity of
+              the FHOG cell containing the image pixel at location p.  Or in other words,
+              let P == image_to_fhog(p) and hog be a FHOG feature map output by
+              extract_fhog_features(), then hog[P.y()][P.x()] == the FHOG vector/cell
+              containing the point p in the input image.  Note that some image points
+              might not have corresponding feature locations.  E.g. border points or points
+              outside the image.  In these cases the returned point will be outside the
+              input image.
+    !*/
 // ----------------------------------------------------------------------------------------
    inline point fhog_to_image (
        point p,
-        int bin_size 
+        int cell_size 
    );
+    /*!
+        requires
+            - cell_size > 0
+        ensures
+            - Maps a pixel in a FHOG image (produced by extract_fhog_features()) back to the
+              corresponding original input pixel.  Note that since FHOG images are
+              spatially downsampled by aggregation into cells the mapping is not totally
+              invertible.  Therefore, the returned location will be the center of the cell
+              in the original image that contained the FHOG vector at position p.  Moreover,
+              cell_size should be set to the value used by the call to extract_fhog_features().
+    !*/
 // ----------------------------------------------------------------------------------------
@@ -58,8 +127,21 @@ namespace dlib
        >
    matrix<unsigned char> draw_fhog(
        const dlib::array<array2d<T,mm1>,mm2>& hog,
-        const long w = 15
+        const long cell_draw_size = 15
    );
+    /*!
+        requires
+            - cell_draw_size > 0
+            - hog.size() == 31
+        ensures
+            - Interprets hog as a FHOG feature map output by extract_fhog_features() and
+              converts it into an image suitable for display on the screen.  In particular,
+              we draw all the hog cells into a grayscale image in a way that shows the
+              magnitude and orientation of the gradient energy in each cell.  The result is
+              then returned.
+            - The size of the cells in the output image will be rendered as cell_draw_size 
+              pixels wide and tall.
+    !*/
 // ----------------------------------------------------------------------------------------
@@ -69,8 +151,20 @@ namespace dlib
        >
    matrix<unsigned char> draw_fhog(
        const array2d<matrix<T,31,1>,mm>& hog,
-        const long w = 15
+        const long cell_draw_size = 15
    );
+    /*!
+        requires
+            - cell_draw_size > 0
+        ensures
+            - Interprets hog as a FHOG feature map output by extract_fhog_features() and
+              converts it into an image suitable for display on the screen.  In particular,
+              we draw all the hog cells into a grayscale image in a way that shows the
+              magnitude and orientation of the gradient energy in each cell.  The result is 
+              then returned.
+            - The size of the cells in the output image will be rendered as cell_draw_size 
+              pixels wide and tall.
+    !*/
 // ----------------------------------------------------------------------------------------