Added the poly_image local feature extractor.

226f5af1 · Davis King · 2b4e363f · 226f5af1 · 226f5af1 · 226f5af1
Commit 226f5af1 authored Jan 01, 2012 by Davis King
4 changed files
--- a/dlib/image_keypoint.h
+++ b/dlib/image_keypoint.h
@@ -6,6 +6,7 @@
 #include "image_keypoint/surf.h"
 #include "image_keypoint/hessian_pyramid.h"
 #include "image_keypoint/hog.h"
+#include "image_keypoint/poly_image.h"
 #include "image_keypoint/hashed_feature_image.h"
 #include "image_keypoint/nearest_neighbor_feature_image.h"

--- a/dlib/image_keypoint/build_separable_poly_filters.h
+++ b/dlib/image_keypoint/build_separable_poly_filters.h
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_BUILD_SEPARABLE_PoLY_FILTERS_H__
+#define DLIB_BUILD_SEPARABLE_PoLY_FILTERS_H__
+#include "../matrix.h"
+#include "surf.h"
+#include "../uintn.h"
+#include <vector>
+namespace dlib
+{
+// ----------------------------------------------------------------------------------------
+    typedef std::pair<matrix<double,0,1>, matrix<double,0,1> > separable_filter_type;
+    typedef std::pair<matrix<int32,0,1>, matrix<int32,0,1> > separable_int32_filter_type;
+// ----------------------------------------------------------------------------------------
+    std::vector<std::vector<separable_filter_type> > build_separable_poly_filters (
+        const long window_size,
+        const long order = 2
+    ) 
+    /*!
+        requires
+            - 1 <= order <= 6
+            - window_size >= 3 && window_size is odd
+        ensures
+            - the "first" element is the row_filter, the second is the col_filter.
+            - Some filters are not totally separable and that's why they are grouped
+              into vectors of vectors.  The groups are all the parts of a partially
+              separable filter.
+    !*/
+    {
+        long num_filters = 6;
+        switch (order)
+        {
+            case 1: num_filters = 3; break;
+            case 2: num_filters = 6; break;
+            case 3: num_filters = 10; break;
+            case 4: num_filters = 15; break;
+            case 5: num_filters = 21; break;
+            case 6: num_filters = 28; break;
+        }
+        matrix<double> X(window_size*window_size,num_filters);
+        matrix<double,0,1> G(window_size*window_size,1);
+        const double sigma = window_size/4.0;
+        long cnt = 0;
+        for (double x = -window_size/2; x <= window_size/2; ++x)
+        {
+            for (double y = -window_size/2; y <= window_size/2; ++y)
+            {
+                X(cnt, 0) = 1;
+                X(cnt, 1) = x;
+                X(cnt, 2) = y;
+                if (X.nc() > 5)
+                {
+                    X(cnt, 3) = x*y;
+                    X(cnt, 4) = x*x;
+                    X(cnt, 5) = y*y;
+                }
+                if (X.nc() > 9)
+                {
+                    X(cnt, 6) = x*x*x;
+                    X(cnt, 7) = y*x*x;
+                    X(cnt, 8) = y*y*x;
+                    X(cnt, 9) = y*y*y;
+                }
+                if (X.nc() > 14)
+                {
+                    X(cnt, 10) = x*x*x*x;
+                    X(cnt, 11) = y*x*x*x;
+                    X(cnt, 12) = y*y*x*x;
+                    X(cnt, 13) = y*y*y*x;
+                    X(cnt, 14) = y*y*y*y;
+                }
+                if (X.nc() > 20)
+                {
+                    X(cnt, 15) = x*x*x*x*x;
+                    X(cnt, 16) = y*x*x*x*x;
+                    X(cnt, 17) = y*y*x*x*x;
+                    X(cnt, 18) = y*y*y*x*x;
+                    X(cnt, 19) = y*y*y*y*x;
+                    X(cnt, 20) = y*y*y*y*y;
+                }
+                if (X.nc() > 27)
+                {
+                    X(cnt, 21) = x*x*x*x*x*x;
+                    X(cnt, 22) = y*x*x*x*x*x;
+                    X(cnt, 23) = y*y*x*x*x*x;
+                    X(cnt, 24) = y*y*y*x*x*x;
+                    X(cnt, 25) = y*y*y*y*x*x;
+                    X(cnt, 26) = y*y*y*y*y*x;
+                    X(cnt, 27) = y*y*y*y*y*y;
+                }
+                G(cnt) = std::sqrt(gaussian(x,y,sigma));
+                ++cnt;
+            }
+        }
+        X = diagm(G)*X;
+        const matrix<double> S = inv(trans(X)*X)*trans(X)*diagm(G);
+        matrix<double,0,1> row_filter, col_filter;
+        matrix<double> u,v, temp;
+        matrix<double,0,1> w;
+        std::vector<std::vector<separable_filter_type> > results(num_filters);
+        for (long r = 0; r < S.nr(); ++r)
+        {
+            temp = reshape(rowm(S,r), window_size, window_size);
+            svd3(temp,u,w,v);
+            const double thresh = max(w)*1e-8;
+            for (long i = 0; i < w.size(); ++i)
+            {
+                if (w(i) > thresh)
+                {
+                    col_filter = std::sqrt(w(i))*colm(u,i);
+                    row_filter = std::sqrt(w(i))*colm(v,i);
+                    results[r].push_back(std::make_pair(row_filter, col_filter));
+                }
+            }
+        }
+        return results;
+    }
+// ----------------------------------------------------------------------------------------
+    std::vector<std::vector<separable_int32_filter_type> > build_separable_int32_poly_filters (
+        const long window_size,
+        const long order = 2,
+        const double max_range = 300.0
+    ) 
+    /*!
+        requires
+            - 1 <= order <= 6
+            - window_size >= 3 && window_size is odd
+            - max_range > 1
+        ensures
+            - the "first" element is the row_filter, the second is the col_filter.
+    !*/
+    {
+        const std::vector<std::vector<separable_filter_type> >& filters = build_separable_poly_filters(window_size, order);
+        std::vector<std::vector<separable_int32_filter_type> > int_filters(filters.size());
+        for (unsigned long i = 0; i < filters.size(); ++i)
+        {
+            double max_val = 0;
+            for (unsigned long j = 0; j < filters[i].size(); ++j)
+            {
+                const separable_filter_type& filt = filters[i][j];
+                max_val = std::max(max_val, max(abs(filt.first)));
+                max_val = std::max(max_val, max(abs(filt.second)));
+            }
+            if (max_val == 0)
+                max_val = 1;
+            int_filters[i].resize(filters[i].size());
+            for (unsigned long j = 0; j < filters[i].size(); ++j)
+            {
+                const separable_filter_type& filt = filters[i][j];
+                int_filters[i][j].first  = matrix_cast<int32>(round(filt.first*max_range/max_val));
+                int_filters[i][j].second = matrix_cast<int32>(round(filt.second*max_range/max_val));
+            }
+        }
+        return int_filters;
+    }
+}
+// ----------------------------------------------------------------------------------------
+#endif // DLIB_BUILD_SEPARABLE_PoLY_FILTERS_H__
--- a/dlib/image_keypoint/poly_image.h
+++ b/dlib/image_keypoint/poly_image.h
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_POLY_ImAGE_H__
+#define DLIB_POLY_ImAGE_H__
+#include "poly_image_abstract.h"
+#include "build_separable_poly_filters.h"
+#include "../algs.h"
+#include "../matrix.h"
+#include "../array2d.h"
+#include "../geometry.h"
+#include <cmath>
+namespace dlib
+{
+// ----------------------------------------------------------------------------------------
+    template <
+        long downsample
+        >
+    class poly_image : noncopyable
+    {
+        COMPILE_TIME_ASSERT(downsample >= 1);
+    public:
+        typedef matrix<double, 0, 1> descriptor_type;
+        poly_image (
+        ) 
+        {
+            clear();
+        }
+        void clear (
+        )
+        {
+            poly_coef.clear();
+            order = 3;
+            window_size = 13;
+            border_size = (long)std::ceil(std::floor(window_size/2.0)/downsample);
+            num_rows = 0;
+            num_cols = 0;
+            filters = build_separable_poly_filters(window_size, order);
+        }
+        long get_order (
+        ) const
+        {
+            return order;
+        }
+        long get_window_size (
+        ) const
+        {
+            return window_size;
+        }
+        void setup (
+            long order_,
+            long window_size_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(1 <= order_ && order_ <= 6 &&
+                        window_size_ >= 3 && (window_size_%2) == 1,
+                "\t descriptor_type poly_image::setup()"
+                << "\n\t Invalid arguments were given to this function."
+                << "\n\t order_:       " << order_ 
+                << "\n\t window_size_: " << window_size_ 
+                << "\n\t this: " << this
+                );
+            poly_coef.clear();
+            order = order_;
+            window_size = window_size_;
+            border_size = (long)std::ceil(std::floor(window_size/2.0)/downsample);
+            num_rows = 0;
+            num_cols = 0;
+            filters = build_separable_poly_filters(window_size, order);
+        }
+        void copy_configuration (
+            const poly_image& item
+        )
+        {
+            if (order != item.order || 
+                window_size != item.window_size)
+            {
+                order = item.order;
+                window_size = item.window_size;
+                border_size = item.border_size;
+                filters = item.filters;
+            }
+        }
+        template <
+            typename image_type
+            >
+        inline void load (
+            const image_type& img
+        )
+        {
+            COMPILE_TIME_ASSERT( pixel_traits<typename image_type::type>::has_alpha == false );
+            poly_coef.resize(get_num_dimensions());
+            des.set_size(get_num_dimensions());
+            array2d<float> coef0;
+            rectangle rect = filter_image(img, coef0, filters[0]);
+            num_rows = rect.height();
+            num_cols = rect.width();
+            for (unsigned long i = 1; i < filters.size(); ++i)
+            {
+                filter_image(img, poly_coef[i-1], filters[i]);
+                // intensity normalize everything
+                for (long r = 0; r < coef0.nr(); ++r)
+                {
+                    for (long c = 0; c < coef0.nc(); ++c)
+                    {
+                        if (coef0[r][c] >= 1)
+                            poly_coef[i-1][r][c] /= coef0[r][c];
+                        else
+                            poly_coef[i-1][r][c] = 0;
+                    }
+                }
+            }
+        }
+        void unload()
+        {
+            poly_coef.clear();
+            num_rows = 0;
+            num_cols = 0;
+        }
+        inline unsigned long size (
+        ) const { return static_cast<unsigned long>(nr()*nc()); }
+        inline long nr (
+        ) const { return num_rows; }
+        inline long nc (
+        ) const { return num_cols; }
+        long get_num_dimensions (
+        ) const
+        {
+            // -1 because we discard the constant term of the polynomial.
+            return filters.size()-1;
+        }
+        inline const descriptor_type& operator() (
+            long row,
+            long col
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT( 0 <= row && row < nr() &&
+                         0 <= col && col < nc(),
+                "\t descriptor_type poly_image::operator()()"
+                << "\n\t invalid row or col argument"
+                << "\n\t row:  " << row
+                << "\n\t col:  " << col 
+                << "\n\t nr(): " << nr() 
+                << "\n\t nc(): " << nc() 
+                << "\n\t this: " << this
+                );
+            // add because of the zero border around the poly_coef images
+            row += border_size;
+            col += border_size;
+            for (long i = 0; i < des.size(); ++i)
+                des(i) = poly_coef[i][row][col];
+            return des;
+        }
+        const rectangle get_block_rect (
+            long row,
+            long col
+        ) const
+        {
+            return centered_rect(downsample*point(col+border_size, row+border_size), 
+                                 window_size, window_size);
+        }
+        const point image_to_feat_space (
+            const point& p
+        ) const
+        {
+            return p/downsample - point(border_size, border_size);
+        }
+        const rectangle image_to_feat_space (
+            const rectangle& rect
+        ) const
+        {
+            return rectangle(image_to_feat_space(rect.tl_corner()), image_to_feat_space(rect.br_corner()));
+        }
+        const point feat_to_image_space (
+            const point& p
+        ) const
+        {
+            return (p + point(border_size, border_size))*downsample;
+        }
+        const rectangle feat_to_image_space (
+            const rectangle& rect
+        ) const
+        {
+            return rectangle(feat_to_image_space(rect.tl_corner()), feat_to_image_space(rect.br_corner()));
+        }
+        friend void serialize (const poly_image& item, std::ostream& out) 
+        {
+            serialize(item.poly_coef, out);
+            serialize(item.order, out);
+            serialize(item.window_size, out);
+            serialize(item.border_size, out);
+            serialize(item.num_rows, out);
+            serialize(item.num_cols, out);
+        }
+        friend void deserialize (poly_image& item, std::istream& in )
+        {
+            deserialize(item.poly_coef, in);
+            deserialize(item.order, in);
+            deserialize(item.window_size, in);
+            deserialize(item.border_size, in);
+            deserialize(item.num_rows, in);
+            deserialize(item.num_cols, in);
+            // just rebuild the filters instead of loading them
+            item.filters = build_separable_poly_filters(item.window_size, item.order);
+        }
+    private:
+        template <typename image_type>
+        rectangle filter_image (
+            const image_type& img,
+            array2d<float>& out,
+            const std::vector<separable_filter_type>& filter
+        ) const
+        {
+            rectangle rect = spatially_filter_image_separable_down(downsample, img, out, filter[0].first, filter[0].second);
+            for (unsigned long i = 1; i < filter.size(); ++i)
+            {
+                spatially_filter_image_separable_down(downsample, img, out, filter[i].first, filter[i].second, 1, false, true);
+            }
+            return rect;
+        }
+        std::vector<std::vector<separable_filter_type> > filters;
+        dlib::array<array2d<float> >::expand_1b poly_coef;
+        long order;
+        long window_size;
+        long border_size;
+        long num_rows;
+        long num_cols;
+        mutable descriptor_type des;
+    };
+// ----------------------------------------------------------------------------------------
+}
+#endif // DLIB_POLY_ImAGE_H__
--- a/dlib/image_keypoint/poly_image_abstract.h
+++ b/dlib/image_keypoint/poly_image_abstract.h
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_POLY_ImAGE_ABSTRACT_H__
+#ifdef DLIB_POLY_ImAGE_ABSTRACT_H__
+#include "../algs.h"
+#include "../matrix.h"
+#include "../geometry/rectangle_abstract.h"
+#include <cmath>
+namespace dlib
+{
+    template <
+        long downsample
+        >
+    class poly_image : noncopyable
+    {
+        /*!
+            REQUIREMENTS ON TEMPLATE PARAMETERS 
+                - downsample >= 1
+            INITIAL VALUE
+                 - size() == 0
+                 - get_order() == 3
+                 - get_window_size() == 13
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for extracting local feature descriptors from an image.
+                In particular, it fits a polynomial to every local pixel patch in an image and
+                allows you to query the coefficients of this polynomial.  The coefficients 
+                are intensity normalized by dividing them by the constant term of the fitted 
+                polynomial and then the constant term is discarded. 
+                Additionally, the user can specify a downsampling rate.  If the template argument
+                downsample is set to 1 then feature extraction is performed at every pixel of
+                an input image (except for a small area around the image border).  However,
+                if downsample is set to 2 then feature extraction is only performed at every
+                other pixel location.  More generally, if downsample is set to N then feature
+                extraction is performed only every N pixels.  
+            THREAD SAFETY
+                Concurrent access to an instance of this object is not safe and should be protected
+                by a mutex lock except for the case where you are copying the configuration 
+                (via copy_configuration()) of a poly_image object to many other threads.  
+                In this case, it is safe to copy the configuration of a shared object so long
+                as no other operations are performed on it.
+        !*/
+    public:
+        typedef matrix<double, 0, 1> descriptor_type;
+        poly_image (
+        ); 
+        /*!
+            ensures
+                - this object is properly initialized
+        !*/
+        void clear (
+        );
+        /*!
+            ensures
+                - this object will have its initial value
+        !*/
+        void setup (
+            long order,
+            long window_size
+        );
+        /*!
+            requires
+                - 1 <= order <= 6
+                - window_size >= 3 && window_size is odd
+            ensures
+                - #get_order() == order
+                - #get_window_size() == window_size
+        !*/
+        long get_order (
+        ) const;
+        /*!
+            ensures
+                - returns the order of the polynomial that will be fitted to 
+                  each local pixel patch during feature extraction.
+        !*/
+        long get_window_size (
+        ) const;
+        /*!
+            ensures
+                - returns the size of the window used for local feature extraction.
+                  This is the width and height of the window in pixels.
+        !*/
+        void copy_configuration (
+            const poly_image& item
+        );
+        /*!
+            ensures
+                - copies all the state information of item into *this, except for state 
+                  information populated by load().  More precisely, given two poly_image 
+                  objects H1 and H2, the following sequence of instructions should always 
+                  result in both of them having the exact same state.
+                    H2.copy_configuration(H1);
+                    H1.load(img);
+                    H2.load(img);
+        !*/
+        template <
+            typename image_type
+            >
+        inline void load (
+            const image_type& img
+        );
+        /*!
+            requires
+                - image_type == is an implementation of array2d/array2d_kernel_abstract.h
+                - pixel_traits<typename image_type::type>::has_alpha == false
+            ensures
+                - Performs the feature extraction described in the WHAT THIS OBJECT REPRESENTS
+                  section above.  This means after load() finishes you can call (*this)(row,col) 
+                  to obtain the polynomial coefficients for an order get_order() polynomial which 
+                  was fitted to the image patch get_block_rect(row,col).
+                - #size() > 0
+        !*/
+        void unload(
+        );
+        /*!
+            ensures
+                - #nr() == 0
+                - #nc() == 0
+                - clears only the state information which is populated by load().  For 
+                  example, let H be a poly_image object.  Then consider the two sequences 
+                  of instructions:
+                    Sequence 1:
+                        H.load(img);      
+                        H.unload();
+                        H.load(img);
+                    Sequence 2:
+                        H.load(img);
+                  Both sequence 1 and sequence 2 should have the same effect on H.  
+        !*/
+        inline unsigned long size (
+        ) const; 
+        /*!
+            ensures
+                - returns nr()*nc()
+        !*/
+        inline long nr (
+        ) const;
+        /*!
+            ensures
+                - returns the number of rows in this polynomial feature image
+        !*/
+        inline long nc (
+        ) const;
+        /*!
+            ensures
+                - returns the number of columns in this polynomial feature image
+        !*/
+        long get_num_dimensions (
+        ) const;
+        /*!
+            ensures
+                - returns the number of dimensions in the feature vectors generated by
+                  this object.  
+                - In this case, this will be the number of coefficients in an order 
+                  get_order() polynomial, except for the constant term of the polynomial.
+        !*/
+        inline const descriptor_type& operator() (
+            long row,
+            long col
+        ) const;
+        /*!
+            requires
+                - 0 <= row < nr()
+                - 0 <= col < nc()
+            ensures
+                - returns the descriptor for the polynomial filtering block at the given row and column.  
+                  This vector will contain the polynomial coefficients for a polynomial fitted to the
+                  image patch located at get_block_rect(row,col) in the original image given to load().
+                - The returned descriptor vector will have get_num_dimensions() elements.
+        !*/
+        const rectangle get_block_rect (
+            long row,
+            long col
+        ) const;
+        /*!
+            ensures
+                - returns a rectangle that tells you what part of the original image is associated
+                  with a particular polynomial filter block.  That is, what part of the input image 
+                  is associated with (*this)(row,col).
+                - The returned rectangle will be get_window_size() pixels wide and tall.
+        !*/
+        const point image_to_feat_space (
+            const point& p
+        ) const;
+        /*!
+            ensures
+                - Each local feature is extracted from a certain point in the input image.
+                  This function returns the identity of the local feature corresponding
+                  to the image location p.  Or in other words, let P == image_to_feat_space(p), 
+                  then (*this)(P.y(),P.x()) == the local feature closest to, or centered at, 
+                  the point p in the input image.  Note that some image points might not have 
+                  corresponding feature locations.  E.g. border points or points outside the 
+                  image.  In these cases the returned point will be outside get_rect(*this).
+        !*/
+        const rectangle image_to_feat_space (
+            const rectangle& rect
+        ) const;
+        /*!
+            ensures
+                - returns rectangle(image_to_feat_space(rect.tl_corner()), image_to_feat_space(rect.br_corner()));
+                  (i.e. maps a rectangle from image space to feature space)
+        !*/
+        const point feat_to_image_space (
+            const point& p
+        ) const;
+        /*!
+            ensures
+                - returns the location in the input image space corresponding to the center
+                  of the local feature at point p.  In other words, this function computes
+                  the inverse of image_to_feat_space().  Note that it may only do so approximately, 
+                  since more than one image location might correspond to the same local feature.  
+                  That is, image_to_feat_space() might not be invertible so this function gives 
+                  the closest possible result.
+        !*/
+        const rectangle feat_to_image_space (
+            const rectangle& rect
+        ) const;
+        /*!
+            ensures
+                - return rectangle(feat_to_image_space(rect.tl_corner()), feat_to_image_space(rect.br_corner()));
+                  (i.e. maps a rectangle from feature space to image space)
+        !*/
+    };
+// ----------------------------------------------------------------------------------------
+    template <
+        long downsample
+        >
+    void serialize (
+        const poly_image<downsample>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support 
+    !*/
+    template <
+        long downsample
+        >
+    void deserialize (
+        poly_image<downsample>& item,
+        std::istream& in 
+    );
+    /*!
+        provides deserialization support 
+    !*/
+// ----------------------------------------------------------------------------------------
+}
+#endif // DLIB_POLY_ImAGE_ABSTRACT_H__