Added a function for computing Felzenszwalb's 31 channel HOG image

representation.

Added a function for computing Felzenszwalb's 31 channel HOG image
representation.
3cc32d00 · Davis King · ab38059d · 3cc32d00 · 3cc32d00 · 3cc32d00
Commit 3cc32d00 authored Sep 30, 2013 by Davis King
Hide whitespace changes
Inline Side-by-side

Showing with 579 additions and 0 deletions

image_transforms.h dlib/image_transforms.h +1 -0

fhog.h dlib/image_transforms/fhog.h +497 -0

fhog_abstract.h dlib/image_transforms/fhog_abstract.h +81 -0

No files found.
--- a/dlib/image_transforms.h
+++ b/dlib/image_transforms.h
@@ -16,6 +16,7 @@
 #include "image_transforms/colormaps.h"
 #include "image_transforms/segment_image.h"
 #include "image_transforms/interpolation.h"
+#include "image_transforms/fhog.h"

 #endif // DLIB_IMAGE_TRANSFORMs_

--- a/dlib/image_transforms/fhog.h
+++ b/dlib/image_transforms/fhog.h
+// Copyright (C) 2013  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_fHOG_H__
+#define DLIB_fHOG_H__
+
+#include "fhog_abstract.h"
+#include "../matrix.h"
+#include "../array2d.h"
+#include "../array.h"
+#include "../geometry.h"
+#include "assign_image.h"
+#include "draw.h"
+#include "interpolation.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    namespace impl_fhog
+    {
+        template <typename image_type>
+        inline typename dlib::enable_if_c<pixel_traits<typename image_type::type>::rgb>::type get_gradient (
+            const int r,
+            const int c,
+            const image_type& img,
+            matrix<double,2,1>& grad,
+            double& len
+        )
+        {
+            matrix<double,2,1> grad2, grad3;
+            // get the red gradient
+            grad = (int)img[r][c+1].red-(int)img[r][c-1].red, 
+                 (int)img[r+1][c].red-(int)img[r-1][c].red;
+            len = length_squared(grad);
+
+            // get the green gradient
+            grad2 = (int)img[r][c+1].green-(int)img[r][c-1].green, 
+                  (int)img[r+1][c].green-(int)img[r-1][c].green;
+            double v2 = length_squared(grad2);
+
+            // get the blue gradient
+            grad3 = (int)img[r][c+1].blue-(int)img[r][c-1].blue, 
+                  (int)img[r+1][c].blue-(int)img[r-1][c].blue;
+            double v3 = length_squared(grad3);
+
+            // pick color with strongest gradient
+            if (v2 > len) 
+            {
+                len = v2;
+                grad = grad2;
+            } 
+            if (v3 > len) 
+            {
+                len = v3;
+                grad = grad3;
+            }
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        template <typename image_type>
+        inline typename dlib::disable_if_c<pixel_traits<typename image_type::type>::rgb>::type get_gradient (
+            const int r,
+            const int c,
+            const image_type& img,
+            matrix<double,2,1>& grad,
+            double& len
+        )
+        {
+            grad = (int)get_pixel_intensity(img[r][c+1])-(int)get_pixel_intensity(img[r][c-1]), 
+            (int)get_pixel_intensity(img[r+1][c])-(int)get_pixel_intensity(img[r-1][c]);
+            len = length_squared(grad);
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        template <typename T, typename mm1, typename mm2>
+        void set_hog (
+            dlib::array<array2d<T,mm1>,mm2>& hog,
+            int o,
+            int x, 
+            int y,
+            const double& value
+        )
+        {
+            hog[o][y][x] = value;
+        }
+
+        template <typename T, typename mm1, typename mm2>
+        void init_hog (
+            dlib::array<array2d<T,mm1>,mm2>& hog,
+            int hog_nr,
+            int hog_nc
+        )
+        {
+            const int num_hog_bands = 27+4;
+            hog.resize(num_hog_bands);
+            for (int i = 0; i < num_hog_bands; ++i)
+            {
+                hog[i].set_size(hog_nr, hog_nc);
+            }
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        template <typename T, typename mm>
+        void set_hog (
+            array2d<matrix<T,31,1>,mm>& hog,
+            int o,
+            int x, 
+            int y,
+            const double& value
+        )
+        {
+            hog[y][x](o) = value;
+        }
+
+        template <typename T, typename mm>
+        void init_hog (
+            array2d<matrix<T,31,1>,mm>& hog,
+            int hog_nr,
+            int hog_nc
+        )
+        {
+            hog.set_size(hog_nr, hog_nc);
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        template <
+            typename image_type, 
+            typename out_type
+            >
+        void impl_extract_fhog_features(
+            const image_type& img, 
+            out_type& hog, 
+            int bin_size
+        ) 
+        {
+            /*
+                This function implements the HOG feature extraction method described in 
+                the paper:
+                    P. Felzenszwalb, R. Girshick, D. McAllester, D. Ramanan
+                    Object Detection with Discriminatively Trained Part Based Models
+                    IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 32, No. 9, Sep. 2010
+
+                Moreover, this function is derived from the HOG feature extraction code
+                from the features.cc file in the voc-releaseX code (see
+                http://people.cs.uchicago.edu/~rbg/latent/) which is has the following
+                license (note that the code has been modified to work with grayscale and
+                color as well as planar and interlaced input and output formats):
+
+                Copyright (C) 2011, 2012 Ross Girshick, Pedro Felzenszwalb
+                Copyright (C) 2008, 2009, 2010 Pedro Felzenszwalb, Ross Girshick
+                Copyright (C) 2007 Pedro Felzenszwalb, Deva Ramanan
+
+                Permission is hereby granted, free of charge, to any person obtaining
+                a copy of this software and associated documentation files (the
+                "Software"), to deal in the Software without restriction, including
+                without limitation the rights to use, copy, modify, merge, publish,
+                distribute, sublicense, and/or sell copies of the Software, and to
+                permit persons to whom the Software is furnished to do so, subject to
+                the following conditions:
+
+                The above copyright notice and this permission notice shall be
+                included in all copies or substantial portions of the Software.
+
+                THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+                EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+                MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+                NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+                LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+                OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+                WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+            */
+
+            // unit vectors used to compute gradient orientation
+            matrix<double,2,1> directions[9];
+            directions[0] =  1.0000, 0.0000; 
+            directions[1] =  0.9397, 0.3420;
+            directions[2] =  0.7660, 0.6428;
+            directions[3] =  0.500,  0.8660;
+            directions[4] =  0.1736, 0.9848;
+            directions[5] = -0.1736, 0.9848;
+            directions[6] = -0.5000, 0.8660;
+            directions[7] = -0.7660, 0.6428;
+            directions[8] = -0.9397, 0.3420;
+
+
+
+            // First we allocate memory for caching orientation histograms & their norms.
+            const int cells_nr = (int)((double)img.nr()/(double)bin_size + 0.5);
+            const int cells_nc = (int)((double)img.nc()/(double)bin_size + 0.5);
+
+            array2d<matrix<float,18,1> > hist(cells_nr, cells_nc);
+            for (long r = 0; r < hist.nr(); ++r)
+            {
+                for (long c = 0; c < hist.nc(); ++c)
+                {
+                    hist[r][c] = 0;
+                }
+            }
+
+            array2d<float> norm(cells_nr, cells_nc);
+            assign_all_pixels(norm, 0);
+
+            // memory for HOG features
+            const int hog_nr = std::max(cells_nr-2, 0);
+            const int hog_nc = std::max(cells_nc-2, 0);
+            init_hog(hog, hog_nr, hog_nc);
+
+            const int visible_nr = cells_nr*bin_size;
+            const int visible_nc = cells_nc*bin_size;
+
+            // First populate the gradient histograms
+            for (int y = 1; y < visible_nr-1; y++) 
+            {
+                for (int x = 1; x < visible_nc-1; x++) 
+                {
+                    const int r = std::min<int>(y, img.nr()-2);
+                    const int c = std::min<int>(x, img.nc()-2);
+
+                    matrix<double,2,1> grad;
+                    double v;
+                    get_gradient(r,c,img,grad,v);
+                    v = std::sqrt(v);
+
+                    // snap to one of 18 orientations
+                    double best_dot = 0;
+                    int best_o = 0;
+                    for (int o = 0; o < 9; o++) 
+                    {
+                        const double dot = dlib::dot(directions[o], grad); 
+                        if (dot > best_dot) 
+                        {
+                            best_dot = dot;
+                            best_o = o;
+                        } 
+                        else if (-dot > best_dot) 
+                        {
+                            best_dot = -dot;
+                            best_o = o+9;
+                        }
+                    }
+
+                    // add to 4 histograms around pixel using bilinear interpolation
+                    double xp = ((double)x+0.5)/(double)bin_size - 0.5;
+                    double yp = ((double)y+0.5)/(double)bin_size - 0.5;
+                    int ixp = (int)std::floor(xp);
+                    int iyp = (int)std::floor(yp);
+                    double vx0 = xp-ixp;
+                    double vy0 = yp-iyp;
+                    double vx1 = 1.0-vx0;
+                    double vy1 = 1.0-vy0;
+
+                    if (ixp >= 0 && iyp >= 0) 
+                        hist[iyp][ixp](best_o) += vy1*vx1*v;
+
+                    if (iyp+1 < cells_nr && ixp >= 0) 
+                        hist[iyp+1][ixp](best_o) += vy0*vx1*v;
+
+                    if (iyp >= 0 && ixp+1 < cells_nc) 
+                        hist[iyp][ixp+1](best_o) += vy1*vx0*v;
+
+                    if (ixp+1 < cells_nc && iyp+1 < cells_nr) 
+                        hist[iyp+1][ixp+1](best_o) += vy0*vx0*v;
+                }
+            }
+
+            // compute energy in each block by summing over orientations
+            for (int r = 0; r < cells_nr; ++r)
+            {
+                for (int c = 0; c < cells_nc; ++c)
+                {
+                    for (int o = 0; o < 9; o++) 
+                    {
+                        norm[r][c] += (hist[r][c](o) + hist[r][c](o+9)) * (hist[r][c](o) + hist[r][c](o+9));
+                    }
+                }
+            }
+
+            const double eps = 0.0001;
+            // compute features
+            for (int y = 0; y < hog_nr; y++) 
+            {
+                for (int x = 0; x < hog_nc; x++) 
+                {
+                    double n1, n2, n3, n4;
+
+                    n1 = 1.0 / std::sqrt(norm[y+1][x+1] + norm[y+1][x+2] + norm[y+2][x+1] + norm[y+2][x+2] + eps);
+                    n2 = 1.0 / std::sqrt(norm[y][x+1]   + norm[y][x+2]   + norm[y+1][x+1] + norm[y+1][x+2] + eps);
+                    n3 = 1.0 / std::sqrt(norm[y+1][x]   + norm[y+1][x+1] + norm[y+2][x]   + norm[y+2][x+1] + eps);
+                    n4 = 1.0 / std::sqrt(norm[y][x]     + norm[y][x+1]   + norm[y+1][x]   + norm[y+1][x+1] + eps);
+
+                    double t1 = 0;
+                    double t2 = 0;
+                    double t3 = 0;
+                    double t4 = 0;
+
+                    // contrast-sensitive features
+                    for (int o = 0; o < 18; o++) 
+                    {
+                        double h1 = std::min(hist[y+1][x+1](o) * n1, 0.2);
+                        double h2 = std::min(hist[y+1][x+1](o) * n2, 0.2);
+                        double h3 = std::min(hist[y+1][x+1](o) * n3, 0.2);
+                        double h4 = std::min(hist[y+1][x+1](o) * n4, 0.2);
+                        set_hog(hog,o,x,y,0.5 * (h1 + h2 + h3 + h4));
+                        t1 += h1;
+                        t2 += h2;
+                        t3 += h3;
+                        t4 += h4;
+                    }
+
+                    // contrast-insensitive features
+                    for (int o = 0; o < 9; o++) 
+                    {
+                        double sum = hist[y+1][x+1](o) + hist[y+1][x+1](o+9);
+                        double h1 = std::min(sum * n1, 0.2);
+                        double h2 = std::min(sum * n2, 0.2);
+                        double h3 = std::min(sum * n3, 0.2);
+                        double h4 = std::min(sum * n4, 0.2);
+                        set_hog(hog,o+18,x,y, 0.5 * (h1 + h2 + h3 + h4));
+                    }
+
+                    // texture features
+                    set_hog(hog,27,x,y, 0.2357 * t1);
+                    set_hog(hog,28,x,y, 0.2357 * t2);
+                    set_hog(hog,29,x,y, 0.2357 * t3);
+                    set_hog(hog,30,x,y, 0.2357 * t4);
+                }
+            }
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        inline void create_fhog_bar_images (
+            dlib::array<matrix<float> >& mbars,
+            const long w
+        )
+        {
+            const long bdims = 9;
+            // Make the oriented lines we use to draw on each HOG cell.
+            mbars.resize(bdims);
+            dlib::array<array2d<unsigned char> > bars(bdims);
+            array2d<unsigned char> temp(w,w);
+            for (unsigned long i = 0; i < bars.size(); ++i)
+            {
+                assign_all_pixels(temp, 0);
+                draw_line(temp, point(w/2,0), point(w/2,w-1), 255);
+                rotate_image(temp, bars[i], i*-pi/bars.size());
+
+                mbars[i] = subm(matrix_cast<float>(mat(bars[i])), centered_rect(get_rect(bars[i]),w,w) );
+            }
+        }
+
+    } // end namespace impl_fhog
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename image_type, 
+        typename T, 
+        typename mm1, 
+        typename mm2
+        >
+    void extract_fhog_features(
+        const image_type& img, 
+        dlib::array<array2d<T,mm1>,mm2>& hog, 
+        int bin_size = 8
+    ) 
+    {
+        return impl_fhog::impl_extract_fhog_features(img, hog, bin_size);
+    }
+
+    template <
+        typename image_type, 
+        typename T, 
+        typename mm
+        >
+    void extract_fhog_features(
+        const image_type& img, 
+        array2d<matrix<T,31,1>,mm>& hog, 
+        int bin_size = 8
+    ) 
+    {
+        return impl_fhog::impl_extract_fhog_features(img, hog, bin_size);
+    }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    inline point image_to_fhog (
+        point p,
+        int sbin
+    )
+    {
+        // There is a one pixel border around the imag.
+        p -= point(1,1);
+        // There is also a 1 "cell" border around the HOG image formation.
+        return p/sbin - point(1,1);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    inline point fhog_to_image (
+        point p,
+        int sbin
+    )
+    {
+        // Convert to image space and then set to the center of the cell.
+        return (p+point(1,1))*sbin + point(1,1) + point(sbin/2,sbin/2);
+    }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T, 
+        typename mm1, 
+        typename mm2
+        >
+    matrix<unsigned char> draw_fhog(
+        const dlib::array<array2d<T,mm1>,mm2>& hog,
+        const long w = 15
+    )
+    {
+        dlib::array<matrix<float> > mbars;
+        impl_fhog::create_fhog_bar_images(mbars,w);
+
+        // now draw the bars onto the HOG cells
+        matrix<float> himg(hog[0].nr()*w, hog[0].nc()*w);
+        himg = 0;
+        for (unsigned long d = 0; d < mbars.size(); ++d)
+        {
+            for (long r = 0; r < himg.nr(); r+=w)
+            {
+                for (long c = 0; c < himg.nc(); c+=w)
+                {
+                    const float val = hog[d][r/w][c/w] + hog[d+mbars.size()][r/w][c/w] + hog[d+mbars.size()*2][r/w][c/w];
+                    if (val > 0)
+                    {
+                        set_subm(himg, r, c, w, w) += val*mbars[d%mbars.size()];
+                    }
+                }
+            }
+        }
+
+        const double thresh = mean(himg) + 4*stddev(himg);
+        return matrix_cast<unsigned char>(upperbound(round(himg*255/thresh),255));
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T, 
+        typename mm
+        >
+    matrix<unsigned char> draw_fhog(
+        const array2d<matrix<T,31,1>,mm>& hog,
+        const long w = 15
+    )
+    {
+        dlib::array<matrix<float> > mbars;
+        impl_fhog::create_fhog_bar_images(mbars,w);
+
+        // now draw the bars onto the HOG cells
+        matrix<float> himg(hog.nr()*w, hog.nc()*w);
+        himg = 0;
+        for (unsigned long d = 0; d < mbars.size(); ++d)
+        {
+            for (long r = 0; r < himg.nr(); r+=w)
+            {
+                for (long c = 0; c < himg.nc(); c+=w)
+                {
+                    const float val = hog[r/w][c/w](d) + hog[r/w][c/w](d+mbars.size()) + hog[r/w][c/w](d+mbars.size()*2);
+                    if (val > 0)
+                    {
+                        set_subm(himg, r, c, w, w) += val*mbars[d%mbars.size()];
+                    }
+                }
+            }
+        }
+
+        const double thresh = mean(himg) + 4*stddev(himg);
+        return matrix_cast<unsigned char>(upperbound(round(himg*255/thresh),255));
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_fHOG_H__
+
--- a/dlib/image_transforms/fhog_abstract.h
+++ b/dlib/image_transforms/fhog_abstract.h
+// Copyright (C) 2013  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_fHOG_ABSTRACT_H__
+#ifdef DLIB_fHOG_ABSTRACT_H__
+
+#include "../matrix/matrix_abstract.h"
+#include "../array2d/array2d_kernel_abstract.h"
+#include "../array/array_kernel_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename image_type,
+        typename T, 
+        typename mm1, 
+        typename mm2
+        >
+    void extract_fhog_features(
+        const image_type& img, 
+        dlib::array<array2d<T,mm1>,mm2>& hog, 
+        int bin_size = 8
+    );
+
+    template <
+        typename image_type, 
+        typename T, 
+        typename mm
+        >
+    void extract_fhog_features(
+        const image_type& img, 
+        array2d<matrix<T,31,1>,mm>& hog, 
+        int bin_size = 8
+    );
+
+// ----------------------------------------------------------------------------------------
+
+    inline point image_to_fhog (
+        point p,
+        int bin_size 
+    );
+
+// ----------------------------------------------------------------------------------------
+
+    inline point fhog_to_image (
+        point p,
+        int bin_size 
+    );
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T, 
+        typename mm1, 
+        typename mm2
+        >
+    matrix<unsigned char> draw_fhog(
+        const dlib::array<array2d<T,mm1>,mm2>& hog,
+        const long w = 15
+    );
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T, 
+        typename mm
+        >
+    matrix<unsigned char> draw_fhog(
+        const array2d<matrix<T,31,1>,mm>& hog,
+        const long w = 15
+    );
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_fHOG_ABSTRACT_H__
+
+