Added fine_hog_image object

da9de3d8 · Davis King · 124e2062 · da9de3d8 · da9de3d8 · da9de3d8
Commit da9de3d8 authored Jan 21, 2012 by Davis King
Showing with 656 additions and 0 deletions

image_keypoint.h dlib/image_keypoint.h +1 -0

fine_hog_image.h dlib/image_keypoint/fine_hog_image.h +379 -0

fine_hog_image_abstract.h dlib/image_keypoint/fine_hog_image_abstract.h +276 -0

No files found.
--- a/dlib/image_keypoint.h
+++ b/dlib/image_keypoint.h
@@ -7,6 +7,7 @@
 #include "image_keypoint/hessian_pyramid.h"
 #include "image_keypoint/hog.h"
 #include "image_keypoint/poly_image.h"
+#include "image_keypoint/fine_hog_image.h"
 #include "image_keypoint/hashed_feature_image.h"
 #include "image_keypoint/nearest_neighbor_feature_image.h"

--- a/dlib/image_keypoint/fine_hog_image.h
+++ b/dlib/image_keypoint/fine_hog_image.h
+// Copyright (C) 2012  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_FINE_HOG_IMaGE_H__
+#define DLIB_FINE_HOG_IMaGE_H__
+#include "fine_hog_image_abstract.h"
+#include "../array2d.h"
+#include "../matrix.h"
+#include "hog.h"
+namespace dlib
+{
+    template <
+        unsigned long cell_size_,
+        unsigned long block_size_,
+        unsigned long pixel_stride_,
+        unsigned char num_orientation_bins_,
+        int           gradient_type_
+        >
+    class fine_hog_image : noncopyable
+    {
+        COMPILE_TIME_ASSERT(cell_size_ > 1);
+        COMPILE_TIME_ASSERT(block_size_ > 0);
+        COMPILE_TIME_ASSERT(pixel_stride_ > 0);
+        COMPILE_TIME_ASSERT(num_orientation_bins_ > 0);
+        COMPILE_TIME_ASSERT( gradient_type_ == hog_signed_gradient ||
+                             gradient_type_ == hog_unsigned_gradient);
+    public:
+        const static unsigned long cell_size = cell_size_;
+        const static unsigned long block_size = block_size_;
+        const static unsigned long pixel_stride = pixel_stride_;
+        const static unsigned long num_orientation_bins = num_orientation_bins_;
+        const static int           gradient_type = gradient_type_;
+        const static long min_size = cell_size*block_size+2;
+        typedef matrix<double, block_size*block_size*num_orientation_bins, 1> descriptor_type;
+        fine_hog_image (
+        ) : 
+            num_block_rows(0),
+            num_block_cols(0)
+        {}
+        void clear (
+        )
+        {
+            num_block_rows = 0;
+            num_block_cols = 0;
+            hist_counts.clear();
+        }
+        void copy_configuration (
+            const fine_hog_image&
+        ){}
+        template <
+            typename image_type
+            >
+        inline void load (
+            const image_type& img
+        )
+        {
+            COMPILE_TIME_ASSERT( pixel_traits<typename image_type::type>::has_alpha == false );
+            load_impl(array_to_matrix(img));
+        }
+        inline void unload(
+        ) { clear(); }
+        inline unsigned long size (
+        ) const { return static_cast<unsigned long>(nr()*nc()); }
+        inline long nr (
+        ) const { return num_block_rows; }
+        inline long nc (
+        ) const { return num_block_cols; }
+        long get_num_dimensions (
+        ) const
+        {
+            return block_size*block_size*num_orientation_bins;
+        }
+        inline const descriptor_type& operator() (
+            long row,
+            long col
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT( 0 <= row && row < nr() &&
+                         0 <= col && col < nc(),
+                "\t descriptor_type fine_hog_image::operator()()"
+                << "\n\t invalid row or col argument"
+                << "\n\t row:  " << row
+                << "\n\t col:  " << col 
+                << "\n\t nr(): " << nr() 
+                << "\n\t nc(): " << nc() 
+                << "\n\t this: " << this
+                );
+            row *= pixel_stride;
+            col *= pixel_stride;
+            des = 0;
+            unsigned long off = 0;
+            for (unsigned long r = 0; r < block_size; ++r)
+            {
+                for (unsigned long c = 0; c < block_size; ++c)
+                {
+                    for (unsigned long rr = 0; rr < cell_size; ++rr)
+                    {
+                        for (unsigned long cc = 0; cc < cell_size; ++cc)
+                        {
+                            const histogram_count& hist = hist_counts[row + r*cell_size + rr][col + c*cell_size + cc];
+                            des(off + hist.quantized_angle_lower) += hist.lower_strength;
+                            des(off + hist.quantized_angle_upper) += hist.upper_strength;
+                        }
+                    }
+                    off += num_orientation_bins;
+                }
+            }
+            des /= length(des) + 1e-8;
+            return des;
+        }
+        const rectangle get_block_rect (
+            long row,
+            long col
+        ) const
+        {
+            row *= pixel_stride;
+            col *= pixel_stride;
+            // do this to account for the 1 pixel padding we use all around the image
+            ++row;
+            ++col;
+            return rectangle(col, row, col+cell_size*block_size-1, row+cell_size*block_size-1);
+        }
+        const point image_to_feat_space (
+            const point& p
+        ) const
+        {
+            const long border_size = 1 + cell_size*block_size/2;
+            return (p-point(border_size,border_size))/(long)pixel_stride;
+        }
+        const rectangle image_to_feat_space (
+            const rectangle& rect
+        ) const
+        {
+            return rectangle(image_to_feat_space(rect.tl_corner()), image_to_feat_space(rect.br_corner()));
+        }
+        const point feat_to_image_space (
+            const point& p
+        ) const
+        {
+            const long border_size = 1 + cell_size*block_size/2;
+            return p*(long)pixel_stride + point(border_size,border_size);
+        }
+        const rectangle feat_to_image_space (
+            const rectangle& rect
+        ) const
+        {
+            return rectangle(feat_to_image_space(rect.tl_corner()), feat_to_image_space(rect.br_corner()));
+        }
+        // these _PRIVATE_ functions are only here as a workaround for a bug in visual studio 2005.  
+        void _PRIVATE_serialize (std::ostream& out) const
+        {
+            // serialize hist_counts
+            serialize(hist_counts.nc(),out);
+            serialize(hist_counts.nr(),out);
+            hist_counts.reset();
+            while (hist_counts.move_next())
+                hist_counts.element().serialize(out);
+            hist_counts.reset();
+            serialize(num_block_rows, out);
+            serialize(num_block_cols, out);
+        }
+        void _PRIVATE_deserialize (std::istream& in )
+        {
+            // deserialize item.hist_counts
+            long nc, nr;
+            deserialize(nc,in);
+            deserialize(nr,in);
+            hist_counts.set_size(nr,nc);
+            while (hist_counts.move_next())
+                hist_counts.element().deserialize(in); 
+            hist_counts.reset();
+            deserialize(num_block_rows, in);
+            deserialize(num_block_cols, in);
+        }
+    private:
+        template <
+            typename image_type
+            >
+        void load_impl (
+            const image_type& img
+        )
+        {
+            // Note that we keep a border of 1 pixel all around the image so that we don't have
+            // to worry about running outside the image when computing the horizontal and vertical 
+            // gradients.
+            // check if the window is just too small
+            if (img.nr() < min_size || img.nc() < min_size)
+            {
+                // If the image is smaller than our windows then there aren't any descriptors at all!
+                num_block_rows = 0;
+                num_block_cols = 0;
+                hist_counts.clear();
+                return;
+            }
+            hist_counts.set_size(img.nr()-2, img.nc()-2);
+            const double pi = 3.1415926535898;
+            for (long r = 0; r < hist_counts.nr(); ++r)
+            {
+                for (long c = 0; c < hist_counts.nc(); ++c)
+                {
+                    unsigned long left; 
+                    unsigned long right;
+                    unsigned long top;   
+                    unsigned long bottom; 
+                    assign_pixel(left,   img(r+1,c));
+                    assign_pixel(right,  img(r+1,c+2));
+                    assign_pixel(top,    img(r  ,c+1));
+                    assign_pixel(bottom, img(r+2,c+1));
+                    double grad_x = (long)right-(long)left;
+                    double grad_y = (long)top-(long)bottom;
+                    // obtain the angle of the gradient.  Make sure it is scaled between 0 and 1.
+                    double angle = std::max(0.0, std::atan2(grad_y, grad_x)/pi + 1)/2;
+                    if (gradient_type == hog_unsigned_gradient)
+                    {
+                        angle *= 2;
+                        if (angle >= 1)
+                            angle -= 1;
+                    }
+                    // now scale angle to between 0 and num_orientation_bins
+                    angle *= num_orientation_bins;
+                    const double strength = std::sqrt(grad_y*grad_y + grad_x*grad_x);
+                    unsigned char quantized_angle_lower = static_cast<unsigned char>(std::floor(angle));
+                    unsigned char quantized_angle_upper = static_cast<unsigned char>(std::ceil(angle));
+                    quantized_angle_lower %= num_orientation_bins;
+                    quantized_angle_upper %= num_orientation_bins;
+                    const double angle_split = (angle-std::floor(angle));
+                    const double upper_strength = angle_split*strength;
+                    const double lower_strength = (1-angle_split)*strength;
+                    // Stick into gradient counts.  Note that we linearly interpolate between neighboring
+                    // histogram buckets.
+                    hist_counts[r][c].quantized_angle_lower = quantized_angle_lower;
+                    hist_counts[r][c].quantized_angle_upper = quantized_angle_upper;
+                    hist_counts[r][c].lower_strength = lower_strength;
+                    hist_counts[r][c].upper_strength = upper_strength;
+                }
+            }
+            // Now figure out how many feature extraction blocks we should have.  
+            num_block_rows = (hist_counts.nr() - block_size*cell_size + 1)/(long)pixel_stride; 
+            num_block_cols = (hist_counts.nc() - block_size*cell_size + 1)/(long)pixel_stride; 
+        }
+        struct histogram_count
+        {
+            unsigned char quantized_angle_lower;
+            unsigned char quantized_angle_upper;
+            float lower_strength;
+            float upper_strength;
+            void serialize(std::ostream& out) const
+            {
+                dlib::serialize(quantized_angle_lower, out);
+                dlib::serialize(quantized_angle_upper, out);
+                dlib::serialize(lower_strength, out);
+                dlib::serialize(upper_strength, out);
+            }
+            void deserialize(std::istream& in) 
+            {
+                dlib::deserialize(quantized_angle_lower, in);
+                dlib::deserialize(quantized_angle_upper, in);
+                dlib::deserialize(lower_strength, in);
+                dlib::deserialize(upper_strength, in);
+            }
+        };
+        array2d<histogram_count> hist_counts;
+        mutable descriptor_type des;
+        long num_block_rows;
+        long num_block_cols;
+    };
+// ----------------------------------------------------------------------------------------
+    template <
+        unsigned long T1,
+        unsigned long T2,
+        unsigned long T3,
+        unsigned char T4,
+        int           T5
+        >
+    void serialize (
+        const fine_hog_image<T1,T2,T3,T4,T5>& item,
+        std::ostream& out
+    )
+    {
+        item._PRIVATE_serialize(out);
+    }
+    template <
+        unsigned long T1,
+        unsigned long T2,
+        unsigned long T3,
+        unsigned char T4,
+        int           T5
+        >
+    void deserialize (
+        fine_hog_image<T1,T2,T3,T4,T5>& item,
+        std::istream& in 
+    )
+    {
+        item._PRIVATE_deserialize(in);
+    }
+// ----------------------------------------------------------------------------------------
+}
+#endif // DLIB_FINE_HOG_IMaGE_H__
--- a/dlib/image_keypoint/fine_hog_image_abstract.h
+++ b/dlib/image_keypoint/fine_hog_image_abstract.h
+// Copyright (C) 2012  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_FINE_HOG_IMaGE_ABSTRACT_H__
+#ifdef DLIB_FINE_HOG_IMaGE_ABSTRACT_H__
+#include "../array2d.h"
+#include "../matrix.h"
+#include "hog_abstract.h"
+namespace dlib
+{
+    template <
+        unsigned long cell_size_,
+        unsigned long block_size_,
+        unsigned long pixel_stride_,
+        unsigned char num_orientation_bins_,
+        int           gradient_type_
+        >
+    class fine_hog_image : noncopyable
+    {
+        /*!
+            REQUIREMENTS ON TEMPLATE PARAMETERS 
+                - cell_size_ > 1
+                - block_size_ > 0
+                - pixel_stride_ > 0
+                - num_orientation_bins_ > 0
+                - gradient_type_ == hog_signed_gradient or hog_unsigned_gradient
+            INITIAL VALUE
+                 - size() == 0
+            WHAT THIS OBJECT REPRESENTS
+                This object is a version of the hog_image that allows you to extract HOG
+                features at a finer resolution.  The hog_image can only extract HOG features
+                cell_size_ pixels apart.  However, this object, the fine_hog_image can 
+                extract HOG features from every pixel location.
+                The template arguments to this class have the same meaning as they do for
+                the hog_image, except for pixel_stride_.  This controls the stepping between
+                HOG extraction locations.  A value of 1 indicates HOG features should be
+                extracted from every pixel location.  A value of 2 indicates every other pixel
+                location, etc.
+                Finally, note that the interpolation used by this object is equivalent
+                to using hog_angle_interpolation with hog_image.  
+            THREAD SAFETY
+                Concurrent access to an instance of this object is not safe and should be protected
+                by a mutex lock except for the case where you are copying the configuration 
+                (via copy_configuration()) of a fine_hog_image object to many other threads.  
+                In this case, it is safe to copy the configuration of a shared object so long
+                as no other operations are performed on it.
+        !*/
+    public:
+        const static unsigned long cell_size = cell_size_;
+        const static unsigned long block_size = block_size_;
+        const static unsigned long pixel_stride = pixel_stride_;
+        const static unsigned long num_orientation_bins = num_orientation_bins_;
+        const static int           gradient_type = gradient_type_;
+        const static long min_size = cell_size*block_size+2;
+        typedef matrix<double, block_size*block_size*num_orientation_bins, 1> descriptor_type;
+        fine_hog_image (
+        );
+        /*!
+            ensures
+                - this object is properly initialized
+        !*/
+        void clear (
+        );
+        /*!
+            ensures
+                - this object will have its initial value
+        !*/
+        void copy_configuration (
+            const fine_hog_image&
+        );
+        /*!
+            ensures
+                - copies all the state information of item into *this, except for state 
+                  information populated by load().  More precisely, given two fine_hog_image 
+                  objects H1 and H2, the following sequence of instructions should always 
+                  result in both of them having the exact same state.
+                    H2.copy_configuration(H1);
+                    H1.load(img);
+                    H2.load(img);
+        !*/
+        template <
+            typename image_type
+            >
+        inline void load (
+            const image_type& img
+        );
+        /*!
+            requires
+                - image_type is a dlib::matrix or something convertible to a matrix
+                  via array_to_matrix()
+                - pixel_traits<typename image_type::type>::has_alpha == false
+            ensures
+                - if (img.nr() < min_size || img.nc() < min_size) then
+                    - the image is too small so we don't compute anything on it
+                    - #size() == 0
+                - else
+                    - generates a HOG image from the given image.   
+                    - #size() > 0
+        !*/
+        inline void unload(
+        );
+        /*!
+            ensures
+                - #nr() == 0
+                - #nc() == 0
+                - clears only the state information which is populated by load().  For 
+                  example, let H be a fine_hog_image object.  Then consider the two 
+                  sequences of instructions:
+                    Sequence 1:
+                        H.load(img);      
+                        H.unload();
+                        H.load(img);
+                    Sequence 2:
+                        H.load(img);
+                  Both sequence 1 and sequence 2 should have the same effect on H.  
+        !*/
+        inline unsigned long size (
+        ) const;
+        /*!
+            ensures
+                - returns nr()*nc()
+        !*/
+        inline long nr (
+        ) const;
+        /*!
+            ensures
+                - returns the number of rows in this HOG image
+        !*/
+        inline long nc (
+        ) const;
+        /*!
+            ensures
+                - returns the number of columns in this HOG image
+        !*/
+        long get_num_dimensions (
+        ) const;
+        /*!
+            ensures
+                - returns the number of dimensions in the feature vectors generated by
+                  this object.  
+                - In particular, returns the value block_size*block_size*num_orientation_bins
+        !*/
+        inline const descriptor_type& operator() (
+            long row,
+            long col
+        ) const;
+        /*!
+            requires
+                - 0 <= row < nr()
+                - 0 <= col < nc()
+            ensures
+                - returns the descriptor for the HOG block at the given row and column.  This descriptor 
+                  will include information from a window that is located at get_block_rect(row,col) in
+                  the original image given to load().
+                - The returned descriptor vector will have get_num_dimensions() elements.
+        !*/
+        const rectangle get_block_rect (
+            long row,
+            long col
+        ) const;
+        /*!
+            ensures
+                - returns a rectangle that tells you what part of the original image is associated
+                  with a particular HOG block.  That is, what part of the input image is associated
+                  with (*this)(row,col).
+                - The returned rectangle will be cell_size*block_size pixels wide and tall.
+        !*/
+        const point image_to_feat_space (
+            const point& p
+        ) const;
+        /*!
+            ensures
+                - Each local feature is extracted from a certain point in the input image.
+                  This function returns the identity of the local feature corresponding
+                  to the image location p.  Or in other words, let P == image_to_feat_space(p), 
+                  then (*this)(P.y(),P.x()) == the local feature closest to, or centered at, 
+                  the point p in the input image.  Note that some image points might not have 
+                  corresponding feature locations.  E.g. border points or points outside the 
+                  image.  In these cases the returned point will be outside get_rect(*this).
+        !*/
+        const rectangle image_to_feat_space (
+            const rectangle& rect
+        ) const;
+        /*!
+            ensures
+                - returns rectangle(image_to_feat_space(rect.tl_corner()), image_to_feat_space(rect.br_corner()));
+                  (i.e. maps a rectangle from image space to feature space)
+        !*/
+        const point feat_to_image_space (
+            const point& p
+        ) const;
+        /*!
+            ensures
+                - returns the location in the input image space corresponding to the center
+                  of the local feature at point p.  In other words, this function computes
+                  the inverse of image_to_feat_space().  Note that it may only do so approximately, 
+                  since more than one image location might correspond to the same local feature.  
+                  That is, image_to_feat_space() might not be invertible so this function gives 
+                  the closest possible result.
+        !*/
+        const rectangle feat_to_image_space (
+            const rectangle& rect
+        ) const;
+        /*!
+            ensures
+                - return rectangle(feat_to_image_space(rect.tl_corner()), feat_to_image_space(rect.br_corner()));
+                  (i.e. maps a rectangle from feature space to image space)
+        !*/
+    };
+// ----------------------------------------------------------------------------------------
+    template <
+        unsigned long T1,
+        unsigned long T2,
+        unsigned long T3,
+        unsigned char T4,
+        int           T5
+        >
+    void serialize (
+        const fine_hog_image<T1,T2,T3,T4,T5>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support 
+    !*/
+    template <
+        unsigned long T1,
+        unsigned long T2,
+        unsigned long T3,
+        unsigned char T4,
+        int           T5
+        >
+    void deserialize (
+        fine_hog_image<T1,T2,T3,T4,T5>& item,
+        std::istream& in 
+    );
+    /*!
+        provides deserialization support 
+    !*/
+// ----------------------------------------------------------------------------------------
+}
+#endif // DLIB_FINE_HOG_IMaGE_ABSTRACT_H__