Moved the HOG and image pyramid code into dlib proper.

--HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403460

Moved the HOG and image pyramid code into dlib proper.
--HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403460
92640ce2 · Davis King · 9ebbecb9 · 92640ce2 · 92640ce2 · 92640ce2
Commit 92640ce2 authored Feb 14, 2010 by Davis King
4 changed files
--- a/dlib/image_keypoint/hog.h
+++ b/dlib/image_keypoint/hog.h
--- a/dlib/image_keypoint/hog_abstract.h
+++ b/dlib/image_keypoint/hog_abstract.h
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_HoG_ABSTRACT_H__
+#ifdef DLIB_HoG_ABSTRACT_H__
+
+#include "../algs.h"
+#include "../matrix.h"
+#include "../array2d.h"
+#include <cmath>
+
+namespace dlib
+{
+    enum 
+    {
+        hog_no_interpolation,
+        hog_angle_interpolation,
+        hog_full_interpolation,
+        hog_signed_gradient,
+        hog_unsigned_gradient
+    };
+
+    template <
+        unsigned long cell_size_,
+        unsigned long block_size_,
+        unsigned long cell_stride_,
+        unsigned long num_orientation_bins_,
+        int           gradient_type_,
+        int           interpolation_type_
+        >
+    class hog_image : noncopyable
+    {
+        /*!
+            REQUIREMENTS ON TEMPLATE PARAMETERS 
+                - cell_size_ > 1
+                - block_size_ > 0
+                - cell_stride_ > 0
+                - num_orientation_bins_ > 0
+                - gradient_type_ == hog_signed_gradient or hog_unsigned_gradient
+                - interpolation_type_ == hog_no_interpolation, hog_angle_interpolation, or 
+                                         hog_full_interpolation
+
+            INITIAL VALUE
+                 - size() == 0
+
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for performing the image feature extraction algorithm
+                described in the following paper:
+                    Histograms of Oriented Gradients for Human Detection
+                    by Navneet Dalal and Bill Triggs
+
+                
+                To summarize the technique, this object tiles non-overlapping cells over an 
+                image.  Each of these cells is a box that is cell_size by cell_size pixels 
+                in size.  Each cell contains an array of size num_orientation_bins.  The array 
+                in a cell is used to store a histogram of all the edge orientations contained
+                within the cell's image region.  
+
+                Once the grid of cells and their histograms has been computed (via load())
+                you can obtain descriptors for each "block" in the image.  A block is just a
+                group of cells and blocks are allowed to overlap.  Each block is square and
+                made up of block_size*block_size cells.  So when you call operator()(r,c)
+                what you obtain is a vector that is just a bunch of cell histograms that
+                have been concatenated (and length normalized).
+
+                The template arguments control the various parameters of this algorithm.
+
+                The interpolation_type parameter controls the amount of interpolation
+                that happens during the creation of the edge orientation histograms.  It
+                varies from no interpolation at all to full spatial and angle interpolation.
+                
+                Angle interpolation means that an edge doesn't just go into its nearest 
+                histogram bin but instead gets interpolated into its two nearest neighbors.
+                Similarly, spatial interpolation means that an edge doesn't just go into
+                the cell it is in but it also contributes to nearby cells depending on how
+                close they are.  
+
+                The gradient_type parameter controls how edge orientations are measured.  
+                Consider the following ASCII art:
+                    signed gradients:           unsigned gradients:
+                           /\                           |
+                           ||                           |
+                       <---  ---->                ------+------
+                           ||                           |
+                           \/                           |
+
+                An image is full of gradients caused by edges between objects.  The direction 
+                of a gradient is determined by which end of it has pixels of highest intensity.  
+                So for example, suppose you had a picture containing black and white stripes.  
+                Then the magnitude of the gradient at each point in the image tells you if you 
+                are on the edge of a stripe and the gradient's orientation tells you which 
+                direction you have to move in to go into the white stripe.   
+
+                Signed gradients preserve this direction information while unsigned gradients
+                do not.  An unsigned gradient will only tell you the orientation of the stripe
+                but not which direction leads to the white stripe.   
+
+                Finally, the cell_stride parameter controls how much overlap you get between
+                blocks.  The maximum amount of overlap is obtained when cell_stride == 1 
+                and you would have no overlap if cell_stride == block_size. 
+        !*/
+
+    public:
+
+        const static unsigned long cell_size = cell_size_;
+        const static unsigned long block_size = block_size_;
+        const static unsigned long cell_stride = cell_stride_;
+        const static unsigned long num_orientation_bins = num_orientation_bins_;
+        const static int           gradient_type = gradient_type_;
+        const static int           interpolation_type = interpolation_type_;
+
+        const static long min_size = cell_size*block_size+2;
+
+        typedef matrix<double, block_size*block_size*num_orientation_bins, 1> descriptor_type;
+
+        hog_image (
+        );
+        /*!
+            ensures
+                - this object is properly initialized
+        !*/
+
+        template <
+            typename image_type
+            >
+        inline void load (
+            const image_type& img
+        );
+        /*!
+            requires
+                - image_type is a dlib::matrix or something convertible to a matrix
+                  via array_to_matrix()
+                - pixel_traits<typename image_type::type>::has_alpha == false
+            ensures
+                - if (img.nr() < min_size || img.nc() < min_size) then
+                    - the image is too small so we don't compute anything on it
+                    - #size() == 0
+                - else
+                    - generates a HOG image from the given image.   
+                    - #size() > 0
+        !*/
+
+        inline unsigned long size (
+        ) const;
+        /*!
+            ensures
+                - returns nr()*nc()
+        !*/
+
+        inline long nr (
+        ) const;
+        /*!
+            ensures
+                - returns the number of rows in this HOG image
+        !*/
+
+        inline long nc (
+        ) const;
+        /*!
+            ensures
+                - returns the number of columns in this HOG image
+        !*/
+
+        inline const descriptor_type& operator() (
+            long row,
+            long col
+        ) const;
+        /*!
+            requires
+                - 0 <= row < nr()
+                - 0 <= col < nc()
+            ensures
+                - returns the descriptor for the HOG block at the given row and column.  This descriptor 
+                  will include information from a window that is cell_size*block_size pixels wide and tall.
+                - The returned descriptor vector will have block_size*block_size*num_orientation_bins elements.
+        !*/
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_HoG_ABSTRACT_H__
+
+
--- a/dlib/image_transforms/image_pyramid.h
+++ b/dlib/image_transforms/image_pyramid.h
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_IMAGE_PYRaMID_H__
+#define DLIB_IMAGE_PYRaMID_H__
+
+#include "image_pyramid_abstract.h"
+#include "../pixel.h"
+#include "../array2d.h"
+
+namespace dlib
+{
+
+    class pyramid_down : noncopyable
+    {
+    public:
+
+        template <
+            typename in_image_type,
+            typename out_image_type
+            >
+        void operator() (
+            const in_image_type& original,
+            out_image_type& down
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(original.nr() > 10 && original.nc() > 10, 
+                        "\t void pyramid_down::operator()"
+                        << "\n\t original.nr(): " << original.nr()
+                        << "\n\t original.nc(): " << original.nc()
+                        << "\n\t this: " << this
+                        );
+
+            COMPILE_TIME_ASSERT( pixel_traits<typename in_image_type::type>::has_alpha == false );
+            COMPILE_TIME_ASSERT( pixel_traits<typename out_image_type::type>::has_alpha == false );
+
+            temp_img.set_size(original.nr(), (original.nc()-3)/2);
+            down.set_size((original.nr()-3)/2, (original.nc()-3)/2);
+
+
+            // This function applies a 5x5 gaussian filter to the image.  It
+            // does this by separating the filter into its horizontal and vertical
+            // components and then downsamples the image by dropping every other
+            // row and column.  Note that we can do these things all together in
+            // one step.
+
+            // apply row filter
+            for (long r = 0; r < temp_img.nr(); ++r)
+            {
+                long oc = 0;
+                for (long c = 0; c < temp_img.nc(); ++c)
+                {
+                    unsigned long pix1;
+                    unsigned long pix2;
+                    unsigned long pix3;
+                    unsigned long pix4;
+                    unsigned long pix5;
+
+                    assign_pixel(pix1, original[r][oc]);
+                    assign_pixel(pix2, original[r][oc+1]);
+                    assign_pixel(pix3, original[r][oc+2]);
+                    assign_pixel(pix4, original[r][oc+3]);
+                    assign_pixel(pix5, original[r][oc+4]);
+
+                    pix2 *= 4;
+                    pix3 *= 6;
+                    pix4 *= 4;
+                    
+                    assign_pixel(temp_img[r][c], pix1 + pix2 + pix3 + pix4 + pix5);
+                    oc += 2;
+                }
+            }
+
+
+            // apply column filter
+            long dr = 0;
+            for (long r = 2; r < temp_img.nr()-2; r += 2)
+            {
+                for (long c = 0; c < temp_img.nc(); ++c)
+                {
+                    unsigned long temp = temp_img[r-2][c] + 
+                                         temp_img[r-1][c]*4 +  
+                                         temp_img[r  ][c]*6 +  
+                                         temp_img[r-1][c]*4 +  
+                                         temp_img[r-2][c];  
+
+                    assign_pixel(down[dr][c],temp/256);
+                }
+                ++dr;
+            }
+
+        }
+
+    private:
+
+        array2d<unsigned long>::kernel_1a temp_img;
+
+    };
+
+}
+
+#endif // DLIB_IMAGE_PYRaMID_H__
+
--- a/dlib/image_transforms/image_pyramid_abstract.h
+++ b/dlib/image_transforms/image_pyramid_abstract.h
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_IMAGE_PYRaMID_ABSTRACT_H__
+#ifdef DLIB_IMAGE_PYRaMID_ABSTRACT_H__
+
+#include "dlib/pixel.h"
+#include "dlib/array2d.h"
+
+namespace dlib
+{
+
+    class pyramid_down : noncopyable
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This is a simple functor to help create image pyramids.
+        !*/
+    public:
+
+        template <
+            typename in_image_type,
+            typename out_image_type
+            >
+        void operator() (
+            const in_image_type& original,
+            out_image_type& down
+        );
+        /*!
+            requires
+                - original.nr() > 10
+                - original.nc() > 10
+                - in_image_type == is an implementation of array2d/array2d_kernel_abstract.h
+                - out_image_type == is an implementation of array2d/array2d_kernel_abstract.h
+                - pixel_traits<typename in_image_type::type>::has_alpha == false
+                - pixel_traits<typename out_image_type::type>::has_alpha == false
+            ensures
+                - #down will contain an image that is roughly half the size of the original
+                  image.  To be specific, this function performs the following steps:
+                    - 1. applies a 5x5 gaussian filter to the orignal image to smooth it a little.
+                    - 2. ever other row and column is discarded to create an image half the size
+                         of the original.  This smaller image is stored in #down.
+        !*/
+    };
+
+}
+
+#endif // DLIB_IMAGE_PYRaMID_ABSTRACT_H__
+
+