Added scan_fhog_pyramid.

6401e693 · Davis King · dc4cc092 · 6401e693 · 6401e693 · 6401e693
Commit 6401e693 authored Nov 11, 2013 by Davis King
Showing with 1395 additions and 0 deletions

image_processing.h dlib/image_processing.h +1 -0

scan_fhog_pyramid.h dlib/image_processing/scan_fhog_pyramid.h +866 -0

scan_fhog_pyramid_abstract.h dlib/image_processing/scan_fhog_pyramid_abstract.h +528 -0

No files found.
--- a/dlib/image_processing.h
+++ b/dlib/image_processing.h
@@ -13,6 +13,7 @@
 #include "image_processing/scan_image_boxes.h"
 #include "image_processing/scan_image_custom.h"
 #include "image_processing/remove_unobtainable_rectangles.h"
+#include "image_processing/scan_fhog_pyramid.h"

 #endif // DLIB_IMAGE_PROCESSInG_H___


--- a/dlib/image_processing/scan_fhog_pyramid.h
+++ b/dlib/image_processing/scan_fhog_pyramid.h
+// Copyright (C) 2013  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_SCAN_fHOG_PYRAMID_H__
+#define DLIB_SCAN_fHOG_PYRAMID_H__
+
+#include "scan_fhog_pyramid_abstract.h"
+#include "../matrix.h"
+#include "../image_transforms.h"
+#include "../array.h"
+#include "../array2d.h"
+#include "object_detector.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Pyramid_type
+        >
+    class scan_fhog_pyramid : noncopyable
+    {
+
+    public:
+
+        typedef matrix<double,0,1> feature_vector_type;
+
+        typedef Pyramid_type pyramid_type;
+
+        scan_fhog_pyramid (
+        );  
+
+        template <
+            typename image_type
+            >
+        void load (
+            const image_type& img
+        );
+
+        inline bool is_loaded_with_image (
+        ) const;
+
+        inline void copy_configuration (
+            const scan_fhog_pyramid& item
+        );
+
+        void set_detection_window_size (
+            unsigned long width,
+            unsigned long height
+        )
+        {
+            window_width = width;
+            window_height = height;
+        }
+
+        inline unsigned long get_detection_window_width (
+        ) const { return window_width; }
+        inline unsigned long get_detection_window_height (
+        ) const { return window_height; }
+
+        inline unsigned long get_num_detection_templates (
+        ) const;
+
+        inline unsigned long get_num_movable_components_per_detection_template (
+        ) const;
+
+        void set_padding (
+            unsigned long new_padding
+        )
+        {
+            padding = new_padding;
+        }
+
+        unsigned long get_padding (
+        ) const { return padding; }
+
+        void set_cell_size (
+            unsigned long new_cell_size
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(new_cell_size > 0 ,
+                "\t void scan_fhog_pyramid::set_cell_size()"
+                << "\n\t You can't have zero sized fHOG cells. "
+                << "\n\t this: " << this
+                );
+
+            cell_size = new_cell_size;
+        }
+
+        unsigned long get_cell_size (
+        ) const { return cell_size; }
+
+        inline long get_num_dimensions (
+        ) const;
+
+        unsigned long get_max_pyramid_levels (
+        ) const;
+
+        void set_max_pyramid_levels (
+            unsigned long max_levels
+        );
+
+        void set_min_pyramid_layer_size (
+            unsigned long width,
+            unsigned long height 
+        );
+
+        inline unsigned long get_min_pyramid_layer_width (
+        ) const;
+
+        inline unsigned long get_min_pyramid_layer_height (
+        ) const;
+
+        void detect (
+            const feature_vector_type& w,
+            std::vector<std::pair<double, rectangle> >& dets,
+            const double thresh
+        ) const
+        {
+            fhog_filterbank temp = build_fhog_filterbank(w);
+            detect(temp, dets, thresh);
+        }
+
+        class fhog_filterbank 
+        {
+            friend class scan_fhog_pyramid;
+        public:
+            inline unsigned long get_num_dimensions() const
+            {
+                unsigned long dims = 0;
+                for (unsigned long i = 0; i < filters.size(); ++i)
+                {
+                    dims += filters[i].size();
+                }
+                return dims;
+            }
+
+            const std::vector<matrix<float> >& get_filters() const { return filters;} 
+
+            unsigned long num_separable_filters() const 
+            {
+                unsigned long num = 0;
+                for (unsigned long i = 0; i < row_filters.size(); ++i)
+                {
+                    num += row_filters[i].size();
+                }
+                return num;
+            }
+
+        private:
+            std::vector<matrix<float> > filters;
+            std::vector<std::vector<matrix<float,0,1> > > row_filters, col_filters;
+        };
+
+        fhog_filterbank build_fhog_filterbank (
+            const feature_vector_type& weights 
+        ) const
+        {
+            fhog_filterbank temp;
+            temp.filters.resize(31);
+            temp.row_filters.resize(31);
+            temp.col_filters.resize(31);
+
+            // load filters from w
+            unsigned long width, height;
+            compute_fhog_window_size(width, height);
+            const long size = width*height;
+            for (unsigned long i = 0; i < temp.filters.size(); ++i)
+            {
+                matrix<double> u,v,w,f;
+                f = reshape(rowm(weights, range(i*size, (i+1)*size-1)), height, width);
+                temp.filters[i] = matrix_cast<float>(f);
+
+                svd3(f, u,w,v);
+
+                matrix<double> w2 = w;
+                rsort_columns(u,w);
+                rsort_columns(v,w2);
+
+                double thresh = std::max(1e-3, max(w)*0.01);
+                w = round_zeros(w, thresh);
+
+
+                for (long j = 0; j < w.size(); ++j)
+                {
+                    if (w(j) != 0)
+                    {
+                        temp.col_filters[i].push_back(matrix_cast<float>(colm(u,j)*std::sqrt(w(j))));
+                        temp.row_filters[i].push_back(matrix_cast<float>(colm(v,j)*std::sqrt(w(j))));
+                    }
+                }
+            }
+
+            return temp;
+        }
+
+        void detect (
+            const fhog_filterbank& w,
+            std::vector<std::pair<double, rectangle> >& dets,
+            const double thresh
+        ) const;
+
+
+        void get_feature_vector (
+            const full_object_detection& obj,
+            feature_vector_type& psi
+        ) const;
+
+        full_object_detection get_full_object_detection (
+            const rectangle& rect,
+            const feature_vector_type& w
+        ) const;
+
+        const rectangle get_best_matching_rect (
+            const rectangle& rect
+        ) const;
+
+        double get_nuclear_norm_regularization_strength (
+        ) const { return nuclear_norm_regularization_strength; }
+
+        void set_nuclear_norm_regularization_strength (
+            double strength
+        ) 
+        /*!
+            requires
+                - strength >= 0
+            ensures
+                - #get_nuclear_norm_regularization_strength() == strength
+        !*/
+        {
+            nuclear_norm_regularization_strength = strength;
+        }
+
+        unsigned long get_fhog_window_width (
+        ) const 
+        {
+            unsigned long width, height;
+            compute_fhog_window_size(width, height);
+            return width;
+        }
+
+        unsigned long get_fhog_window_height (
+        ) const 
+        {
+            unsigned long width, height;
+            compute_fhog_window_size(width, height);
+            return height;
+        }
+
+        template <typename T>
+        friend void serialize (
+            const scan_fhog_pyramid<T>& item,
+            std::ostream& out
+        );
+
+        template <typename T>
+        friend void deserialize (
+            scan_fhog_pyramid<T>& item,
+            std::istream& in 
+        );
+
+    private:
+        inline void compute_fhog_window_size(
+            unsigned long& width,
+            unsigned long& height
+        ) const
+        {
+            const rectangle temp = grow_rect(image_to_fhog(centered_rect(point(0,0),window_width,window_height), cell_size), padding);
+            width = temp.width();
+            height = temp.height();
+        }
+
+        static bool compare_pair_rect (
+            const std::pair<double, rectangle>& a,
+            const std::pair<double, rectangle>& b
+        )
+        {
+            return a.first < b.first;
+        }
+
+        void get_mapped_rect_and_metadata (
+            const unsigned long number_pyramid_levels,
+            const rectangle& rect,
+            rectangle& mapped_rect,
+            rectangle& fhog_rect,
+            unsigned long& best_level
+        ) const;
+
+        double get_match_score (
+            rectangle r1,
+            rectangle r2
+        ) const
+        {
+            // make the rectangles overlap as much as possible before computing the match score.
+            r1 = move_rect(r1, r2.tl_corner());
+            return (r1.intersect(r2).area())/(double)(r1 + r2).area();
+        }
+
+        typedef array<array2d<float> > fhog_image;
+
+        array<fhog_image> feats;
+        int cell_size;
+        unsigned long padding; 
+        unsigned long window_width;
+        unsigned long window_height;
+        unsigned long max_pyramid_levels;
+        unsigned long min_pyramid_layer_width;
+        unsigned long min_pyramid_layer_height;
+        double nuclear_norm_regularization_strength;
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    void serialize (
+        const scan_fhog_pyramid<T>& item,
+        std::ostream& out
+    )
+    {
+        int version = 1;
+        serialize(version, out);
+        serialize(item.feats, out);
+        serialize(item.cell_size, out);
+        serialize(item.padding, out);
+        serialize(item.window_width, out);
+        serialize(item.window_height, out);
+        serialize(item.max_pyramid_levels, out);
+        serialize(item.min_pyramid_layer_width, out);
+        serialize(item.min_pyramid_layer_height, out);
+        serialize(item.nuclear_norm_regularization_strength, out);
+        serialize(item.get_num_dimensions(), out);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    void deserialize (
+        scan_fhog_pyramid<T>& item,
+        std::istream& in 
+    )
+    {
+        int version = 0;
+        deserialize(version, in);
+        if (version != 1)
+            throw serialization_error("Unsupported version found when deserializing a scan_fhog_pyramid object.");
+
+        deserialize(item.feats, in);
+        deserialize(item.cell_size, in);
+        deserialize(item.padding, in);
+        deserialize(item.window_width, in);
+        deserialize(item.window_height, in);
+        deserialize(item.max_pyramid_levels, in);
+        deserialize(item.min_pyramid_layer_width, in);
+        deserialize(item.min_pyramid_layer_height, in);
+        deserialize(item.nuclear_norm_regularization_strength, in);
+
+        // When developing some feature extractor, it's easy to accidentally change its
+        // number of dimensions and then try to deserialize data from an older version of
+        // your extractor into the current code.  This check is here to catch that kind of
+        // user error.
+        long dims;
+        deserialize(dims, in);
+        if (item.get_num_dimensions() != dims)
+            throw serialization_error("Number of dimensions in serialized scan_fhog_pyramid doesn't match the expected number.");
+    }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+//                         scan_fhog_pyramid member functions
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Pyramid_type
+        >
+    scan_fhog_pyramid<Pyramid_type>::
+    scan_fhog_pyramid (
+    ) : 
+        cell_size(8),
+        padding(1),
+        window_width(64),
+        window_height(64),
+        max_pyramid_levels(1000),
+        min_pyramid_layer_width(64),
+        min_pyramid_layer_height(64),
+        nuclear_norm_regularization_strength(0)
+    {
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Pyramid_type
+        >
+    template <
+        typename image_type
+        >
+    void scan_fhog_pyramid<Pyramid_type>::
+    load (
+        const image_type& img
+    )
+    {
+        unsigned long levels = 0;
+        rectangle rect = get_rect(img);
+
+        // figure out how many pyramid levels we should be using based on the image size
+        pyramid_type pyr;
+        do
+        {
+            rect = pyr.rect_down(rect);
+            ++levels;
+        } while (rect.width() >= min_pyramid_layer_width && rect.height() >= min_pyramid_layer_height &&
+                 levels < max_pyramid_levels);
+
+        if (feats.max_size() < levels)
+            feats.set_max_size(levels);
+        feats.set_size(levels);
+
+        unsigned long width, height;
+        compute_fhog_window_size(width,height);
+
+        // build our feature pyramid
+        extract_fhog_features(img, feats[0], cell_size,height,width);
+        if (feats.size() > 1)
+        {
+            image_type temp1, temp2;
+            pyr(img, temp1);
+            extract_fhog_features(temp1, feats[1], cell_size,height,width);
+            swap(temp1,temp2);
+
+            for (unsigned long i = 2; i < feats.size(); ++i)
+            {
+                pyr(temp2, temp1);
+                extract_fhog_features(temp1, feats[i], cell_size,height,width);
+                swap(temp1,temp2);
+            }
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Pyramid_type
+        >
+    bool scan_fhog_pyramid<Pyramid_type>::
+    is_loaded_with_image (
+    ) const
+    {
+        return feats.size() != 0;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Pyramid_type
+        >
+    void scan_fhog_pyramid<Pyramid_type>::
+    copy_configuration (
+        const scan_fhog_pyramid& item
+    )
+    {
+        cell_size = item.cell_size;
+        padding = item.padding;
+        window_width = item.window_width;
+        window_height = item.window_height;
+        max_pyramid_levels = item.max_pyramid_levels;
+        min_pyramid_layer_width = item.min_pyramid_layer_width;
+        min_pyramid_layer_height = item.min_pyramid_layer_height;
+        nuclear_norm_regularization_strength = item.nuclear_norm_regularization_strength;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Pyramid_type
+        >
+    unsigned long scan_fhog_pyramid<Pyramid_type>::
+    get_num_detection_templates (
+    ) const
+    {
+        return 1;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Pyramid_type
+        >
+    unsigned long scan_fhog_pyramid<Pyramid_type>::
+    get_num_movable_components_per_detection_template (
+    ) const
+    {
+        return 0;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Pyramid_type
+        >
+    long scan_fhog_pyramid<Pyramid_type>::
+    get_num_dimensions (
+    ) const
+    {
+        unsigned long width, height;
+        compute_fhog_window_size(width,height);
+        return width*height*31;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Pyramid_type
+        >
+    unsigned long scan_fhog_pyramid<Pyramid_type>::
+    get_max_pyramid_levels (
+    ) const
+    {
+        return max_pyramid_levels;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Pyramid_type
+        >
+    void scan_fhog_pyramid<Pyramid_type>::
+    set_max_pyramid_levels (
+        unsigned long max_levels
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(max_levels > 0 ,
+            "\t void scan_fhog_pyramid::set_max_pyramid_levels()"
+            << "\n\t You can't have zero levels. "
+            << "\n\t max_levels: " << max_levels 
+            << "\n\t this: " << this
+            );
+
+        max_pyramid_levels = max_levels;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Pyramid_type
+        >
+    void scan_fhog_pyramid<Pyramid_type>::
+    detect (
+        const fhog_filterbank& w,
+        std::vector<std::pair<double, rectangle> >& dets,
+        const double thresh
+    ) const
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_loaded_with_image() &&
+                    w.get_num_dimensions() == get_num_dimensions(), 
+            "\t void scan_fhog_pyramid::detect()"
+            << "\n\t Invalid inputs were given to this function "
+            << "\n\t is_loaded_with_image(): " << is_loaded_with_image()
+            << "\n\t w.get_num_dimensions(): " << w.get_num_dimensions()
+            << "\n\t get_num_dimensions():   " << get_num_dimensions()
+            << "\n\t this: " << this
+            );
+
+        dets.clear();
+
+        unsigned long width, height;
+        compute_fhog_window_size(width,height);
+        const point anchor((width+1)%2, 
+                           (height+1)%2);
+
+        array2d<float> saliency_image;
+        array2d<float> temp;
+        pyramid_type pyr;
+
+        const unsigned long num_separable_filters = w.num_separable_filters();
+        // for all pyramid levels
+        for (unsigned long l = 0; l < feats.size(); ++l)
+        {
+            rectangle area;
+            if (num_separable_filters > 62)
+            {
+                area = spatially_filter_image(feats[l][0], saliency_image, w.filters[0]);
+                for (unsigned long i = 1; i < w.filters.size(); ++i)
+                {
+                    // now we filter but the output adds to saliency_image rather than
+                    // overwriting it.
+                    spatially_filter_image(feats[l][i], saliency_image, w.filters[i], 1, false, true);
+                }
+            }
+            else
+            {
+                saliency_image.clear();
+
+                // find the first filter to apply
+                unsigned long i = 0;
+                while (i < w.row_filters.size() && w.row_filters[i].size() == 0) 
+                    ++i;
+
+                for (; i < w.row_filters.size(); ++i)
+                {
+                    for (unsigned long j = 0; j < w.row_filters[i].size(); ++j)
+                    {
+                        if (saliency_image.size() == 0)
+                            area = spatially_filter_image_separable(feats[l][i], saliency_image, w.row_filters[i][j], w.col_filters[i][j],1,false,false);
+                        else
+                            area = spatially_filter_image_separable(feats[l][i], saliency_image, w.row_filters[i][j], w.col_filters[i][j],1,false,true);
+                    }
+                }
+                if (saliency_image.size() == 0)
+                {
+                    saliency_image.set_size(feats[l][0].nr(), feats[l][0].nc());
+                    assign_all_pixels(saliency_image, 0);
+                }
+            }
+
+            // now search the saliency image for any detections
+            for (long r = area.top(); r <= area.bottom(); ++r)
+            {
+                for (long c = area.left(); c <= area.right(); ++c)
+                {
+                    // if we found a detection
+                    if (saliency_image[r][c] >= thresh)
+                    {
+                        rectangle rect = fhog_to_image(centered_rect(point(c,r)+anchor,width-2*padding,height-2*padding), cell_size, height,width);
+                        rect = pyr.rect_up(rect, l);
+                        dets.push_back(std::make_pair(saliency_image[r][c], rect));
+                    }
+                }
+            }
+        }
+
+        std::sort(dets.rbegin(), dets.rend(), compare_pair_rect);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Pyramid_type
+        >
+    const rectangle scan_fhog_pyramid<Pyramid_type>::
+    get_best_matching_rect (
+        const rectangle& rect
+    ) const
+    {
+        rectangle mapped_rect, fhog_rect;
+        unsigned long best_level;
+        get_mapped_rect_and_metadata(max_pyramid_levels, rect, mapped_rect, fhog_rect, best_level);
+        return mapped_rect;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Pyramid_type
+        >
+    void scan_fhog_pyramid<Pyramid_type>::
+    get_mapped_rect_and_metadata (
+        const unsigned long number_pyramid_levels,
+        const rectangle& rect,
+        rectangle& mapped_rect,
+        rectangle& fhog_rect,
+        unsigned long& best_level
+    ) const
+    {
+        pyramid_type pyr;
+        best_level = 0;
+        double best_match_score = -1;
+
+
+        unsigned long width, height;
+        compute_fhog_window_size(width,height);
+
+        // Figure out the pyramid level which best matches rect against our detection
+        // window. 
+        for (unsigned long l = 0; l < number_pyramid_levels; ++l)
+        {
+            const rectangle rect_fhog_space = image_to_fhog(pyr.rect_down(rect,l), cell_size, height,width);
+
+            const rectangle win_image_space = pyr.rect_up(fhog_to_image(centered_rect(center(rect_fhog_space),width-2*padding,height-2*padding), cell_size, height,width), l);
+
+            const double match_score = get_match_score(win_image_space, rect); 
+            if (match_score > best_match_score)
+            {
+                best_match_score = match_score;
+                best_level = l;
+                fhog_rect = centered_rect(center(rect_fhog_space), width, height);
+            }
+
+            if (rect_fhog_space.area() <= 1) 
+                break;
+        }
+        mapped_rect = pyr.rect_up(fhog_to_image(shrink_rect(fhog_rect,padding), cell_size,height,width),best_level);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Pyramid_type
+        >
+    full_object_detection scan_fhog_pyramid<Pyramid_type>::
+    get_full_object_detection (
+        const rectangle& rect,
+        const feature_vector_type& 
+    ) const
+    {
+        return full_object_detection(rect);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Pyramid_type
+        >
+    void scan_fhog_pyramid<Pyramid_type>::
+    get_feature_vector (
+        const full_object_detection& obj,
+        feature_vector_type& psi
+    ) const
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_loaded_with_image() &&
+                    psi.size() >= get_num_dimensions() &&
+                    obj.num_parts() == 0,
+            "\t void scan_fhog_pyramid::get_feature_vector()"
+            << "\n\t Invalid inputs were given to this function "
+            << "\n\t is_loaded_with_image(): " << is_loaded_with_image()
+            << "\n\t psi.size():             " << psi.size()
+            << "\n\t get_num_dimensions():   " << get_num_dimensions()
+            << "\n\t obj.num_parts():                            " << obj.num_parts()
+            << "\n\t this: " << this
+            );
+
+
+
+        rectangle mapped_rect;
+        unsigned long best_level;
+        rectangle fhog_rect;
+        get_mapped_rect_and_metadata(feats.size(), obj.get_rect(), mapped_rect, fhog_rect, best_level);
+
+
+        long i = 0;
+        for (unsigned long ii = 0; ii < feats[best_level].size(); ++ii)
+        {
+            const rectangle rect = get_rect(feats[best_level][0]);
+            for (long r = fhog_rect.top(); r <= fhog_rect.bottom(); ++r)
+            {
+                for (long c = fhog_rect.left(); c <= fhog_rect.right(); ++c)
+                {
+                    if (rect.contains(c,r))
+                        psi(i) += feats[best_level][ii][r][c];
+                    ++i;
+                }
+            }
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Pyramid_type
+        >
+    void scan_fhog_pyramid<Pyramid_type>::
+    set_min_pyramid_layer_size (
+        unsigned long width,
+        unsigned long height 
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(width > 0 && height > 0 ,
+            "\t void scan_fhog_pyramid::set_min_pyramid_layer_size()"
+            << "\n\t These sizes can't be zero. "
+            << "\n\t width:  " << width 
+            << "\n\t height: " << height 
+            << "\n\t this:   " << this
+            );
+
+        min_pyramid_layer_width = width;
+        min_pyramid_layer_height = height;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Pyramid_type
+        >
+    unsigned long scan_fhog_pyramid<Pyramid_type>::
+    get_min_pyramid_layer_width (
+    ) const
+    {
+        return min_pyramid_layer_width;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Pyramid_type
+        >
+    unsigned long scan_fhog_pyramid<Pyramid_type>::
+    get_min_pyramid_layer_height (
+    ) const
+    {
+        return min_pyramid_layer_height;
+    }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Pyramid_type
+        >
+    matrix<unsigned char> draw_fhog (
+        const object_detector<scan_fhog_pyramid<Pyramid_type> >& detector,
+        const long cell_draw_size = 15
+    )
+    {
+        typename scan_fhog_pyramid<Pyramid_type>::fhog_filterbank fb = detector.get_scanner().build_fhog_filterbank(detector.get_w());
+        return draw_fhog(fb.get_filters(),cell_draw_size);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Pyramid_type
+        >
+    unsigned long num_separable_filters (
+        const object_detector<scan_fhog_pyramid<Pyramid_type> >& detector
+    )
+    {
+        typename scan_fhog_pyramid<Pyramid_type>::fhog_filterbank fb = detector.get_scanner().build_fhog_filterbank(detector.get_w());
+        return fb.num_separable_filters();
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Pyramid_type,
+        typename svm_struct_prob_type
+        >
+    void configure_nuclear_norm_regularizer (
+        const scan_fhog_pyramid<Pyramid_type>& scanner,
+        svm_struct_prob_type& prob
+    )
+    { 
+        const double strength = scanner.get_nuclear_norm_regularization_strength();
+        if (strength != 0)
+        {
+            const unsigned long width = scanner.get_fhog_window_width();
+            const unsigned long height = scanner.get_fhog_window_height();
+            for (int i = 0; i < 31; ++i)
+            {
+                prob.add_nuclear_norm_regularizer(i*width*height, height, width, strength);
+            }
+            prob.set_cache_based_epsilon(0.001);
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SCAN_fHOG_PYRAMID_H__
+
--- a/dlib/image_processing/scan_fhog_pyramid_abstract.h
+++ b/dlib/image_processing/scan_fhog_pyramid_abstract.h
+// Copyright (C) 2013  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_SCAN_fHOG_PYRAMID_ABSTRACT_H__
+#ifdef DLIB_SCAN_fHOG_PYRAMID_ABSTRACT_H__
+
+#include <vector>
+#include "../image_transforms/fhog_abstract.h"
+#include "object_detector_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Pyramid_type
+        >
+    class scan_fhog_pyramid : noncopyable
+    {
+        /*!
+            REQUIREMENTS ON Pyramid_type
+                - Must be one of the pyramid_down objects defined in
+                  dlib/image_transforms/image_pyramid_abstract.h or an object with a
+                  compatible interface
+
+            INITIAL VALUE
+
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for running a fixed sized sliding window classifier
+                over an image pyramid.  In particular,  it slides a linear classifier over
+                a HOG pyramid as discussed in the paper:  
+                    Histograms of Oriented Gradients for Human Detection by Navneet Dalal
+                    and Bill Triggs, CVPR 2005
+                However, we augment the method slightly to use the version of HOG features 
+                from: 
+                    Object Detection with Discriminatively Trained Part Based Models by
+                    P. Felzenszwalb, R. Girshick, D. McAllester, D. Ramanan
+                    IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 32, No. 9, Sep. 2010
+                Since these HOG features have been shown to give superior performance. 
+
+
+            THREAD SAFETY
+                Concurrent access to an instance of this object is not safe and should be
+                protected by a mutex lock except for the case where you are copying the
+                configuration (via copy_configuration()) of a scan_fhog_pyramid object to
+                many other threads.  In this case, it is safe to copy the configuration of
+                a shared object so long as no other operations are performed on it.
+        !*/
+
+    public:
+        typedef matrix<double,0,1> feature_vector_type;
+        typedef Pyramid_type pyramid_type;
+
+        scan_fhog_pyramid (
+        );  
+        /*!
+            ensures
+                - this object is properly initialized
+        !*/
+
+        template <
+            typename image_type
+            >
+        void load (
+            const image_type& img
+        );
+        /*!
+            requires
+                - image_type == is an implementation of array2d/array2d_kernel_abstract.h
+                - img contains some kind of pixel type. 
+                  (i.e. pixel_traits<typename image_type::type> is defined)
+            ensures
+                - #is_loaded_with_image() == true
+                - This object is ready to run a classifier over img to detect object
+                  locations.  Call detect() to do this.
+        !*/
+
+        bool is_loaded_with_image (
+        ) const;
+        /*!
+            ensures
+                - returns true if this object has been loaded with an image to process and
+                  false otherwise.
+        !*/
+
+        void copy_configuration (
+            const scan_fhog_pyramid& item
+        );
+        /*!
+            ensures
+                - Copies all the state information of item into *this, except for state 
+                  information populated by load().  More precisely, given two scan_fhog_pyramid
+                  objects S1 and S2, the following sequence of instructions should always 
+                  result in both of them having the exact same state:
+                    S2.copy_configuration(S1);
+                    S1.load(img);
+                    S2.load(img);
+        !*/
+
+        void set_detection_window_size (
+            unsigned long window_width,
+            unsigned long window_height
+        );
+        /*!
+            requires
+                - window_width > 0
+                - window_height > 0
+            ensures
+                - When detect() is called, this object scans a window that is of the given
+                  width and height (in pixels) over each layer in an image pyramid.  This
+                  means that the rectangle detections which come out of detect() will have
+                  a width to height ratio approximately equal to window_width/window_height
+                  and will be approximately window_width*window_height pixels in area or
+                  larger.  Therefore, the smallest object that can be detected is roughly
+                  window_width by window_height pixels in size.
+                - #get_detection_window_width() == window_width
+                - #get_detection_window_height() == window_height
+                - Since we use a HOG feature representation, the detection procedure works
+                  as follows:
+                    Step 1. Make an image pyramid.
+                    Step 2. Convert each layer of the image pyramid into a 31 band HOG "image".
+                    Step 3. Scan a linear classifier over each HOG image in the pyramid. 
+                  Moreover, the HOG features quantize the input image into a grid of cells,
+                  each cell being get_cell_size() by get_cell_size() pixels in size.  So
+                  when we scan the object detector over the pyramid we are scanning an
+                  appropriately sized window over these smaller quantized HOG features.  In
+                  particular, the size of the window we scan over the HOG feature pyramid
+                  is #get_fhog_window_width() by #get_fhog_window_height() HOG cells in
+                  size.    
+        !*/
+
+        unsigned long get_detection_window_width (
+        ) const;
+        /*!
+            ensures
+                - returns the width, in pixels, of the detection window that is scanned
+                  over the image when detect() is called.    
+        !*/
+
+        inline unsigned long get_detection_window_height (
+        ) const; 
+        /*!
+            ensures
+                - returns the height, in pixels, of the detection window that is scanned
+                  over the image when detect() is called.  
+        !*/
+
+        unsigned long get_fhog_window_width (
+        ) const; 
+        /*!
+            ensures
+                - Returns the width of the HOG scanning window in terms of HOG cell blocks.
+                  Note that this is a function of get_detection_window_width(), get_cell_size(), 
+                  and get_padding() and is therefore not something you set directly. 
+                - #get_fhog_window_width() is approximately equal to the number of HOG cells 
+                  that fit into get_detection_window_width() pixels plus 2*get_padding()
+                  since we include additional padding around each window to add context.
+        !*/
+
+        unsigned long get_fhog_window_height (
+        ) const;
+        /*!
+            ensures
+                - Returns the height of the HOG scanning window in terms of HOG cell blocks.  
+                  Note that this is a function of get_detection_window_height(), get_cell_size(), 
+                  and get_padding() and is therefore not something you set directly. 
+                - #get_fhog_window_height() is approximately equal to the number of HOG cells 
+                  that fit into get_detection_window_height() pixels plus 2*get_padding()
+                  since we include additional padding around each window to add context.
+        !*/
+
+        void set_padding (
+            unsigned long new_padding
+        );
+        /*!
+            ensures
+                - #get_padding() == new_padding
+        !*/
+
+        unsigned long get_padding (
+        ) const;
+        /*!
+            ensures
+                - The HOG windows scanned over the HOG pyramid can include additional HOG
+                  cells outside the detection window.  This can help add context and
+                  improve detection accuracy.  This function returns the number of extra
+                  HOG cells added onto the border of the HOG windows which are scanned by
+                  detect().
+        !*/
+
+        unsigned long get_cell_size (
+        ) const;
+        /*!
+            ensures
+                - Returns the size of the HOG cells.  Each HOG cell is square and contains
+                  get_cell_size()*get_cell_size() pixels.
+        !*/
+
+        void set_cell_size (
+            unsigned long new_cell_size
+        );
+        /*!
+            requires
+                - new_cell_size > 0
+            ensures
+                - #get_cell_size() == new_cell_size
+        !*/
+
+        inline long get_num_dimensions (
+        ) const;
+        /*!
+            ensures
+                - get_fhog_window_width()*get_fhog_window_height()*31
+                  (i.e. The number of features is equal to the size of the HOG window
+                  times 31 since there are 31 channels in the HOG feature representation.)
+        !*/
+
+        inline unsigned long get_num_detection_templates (
+        ) const { return 1; }
+        /*!
+            ensures
+                - returns 1.  Note that this function is here only for compatibility with 
+                  the scan_image_pyramid object.  Notionally, its return value indicates 
+                  that a scan_fhog_pyramid object is always ready to detect objects once
+                  an image has been loaded.
+        !*/
+
+        inline unsigned long get_num_movable_components_per_detection_template (
+        ) const { return 0; }
+        /*!
+            ensures
+                - returns 0.  Note that this function is here only for compatibility with
+                  the scan_image_pyramid object.  Its return value means that this object
+                  does not support using movable part models.
+        !*/
+
+        unsigned long get_max_pyramid_levels (
+        ) const;
+        /*!
+            ensures
+                - returns the maximum number of image pyramid levels this object will use.
+                  Note that #get_max_pyramid_levels() == 1 indicates that no image pyramid
+                  will be used at all.  That is, only the original image will be processed
+                  and no lower scale versions will be created.  
+        !*/
+
+        void set_max_pyramid_levels (
+            unsigned long max_levels
+        );
+        /*!
+            requires
+                - max_levels > 0
+            ensures
+                - #get_max_pyramid_levels() == max_levels
+        !*/
+
+        void set_min_pyramid_layer_size (
+            unsigned long width,
+            unsigned long height 
+        );
+        /*!
+            requires
+                - width > 0
+                - height > 0
+            ensures
+                - #get_min_pyramid_layer_width() == width
+                - #get_min_pyramid_layer_height() == height
+        !*/
+
+        inline unsigned long get_min_pyramid_layer_width (
+        ) const;
+        /*!
+            ensures
+                - returns the smallest allowable width of an image in the image pyramid.
+                  All pyramids will always include the original input image, however, no
+                  pyramid levels will be created which have a width smaller than the
+                  value returned by this function.
+        !*/
+
+        inline unsigned long get_min_pyramid_layer_height (
+        ) const;
+        /*!
+            ensures
+                - returns the smallest allowable height of an image in the image pyramid.
+                  All pyramids will always include the original input image, however, no
+                  pyramid levels will be created which have a height smaller than the
+                  value returned by this function.
+        !*/
+
+        fhog_filterbank build_fhog_filterbank (
+            const feature_vector_type& weights 
+        ) const;
+        /*!
+            requires
+                - weights.size() >= get_num_dimensions()
+            ensures
+                - Creates and then returns a fhog_filterbank object FB such that:
+                    - FB.get_num_dimensions() == get_num_dimensions()
+                    - FB.get_filters() == the values in weights unpacked into 31 filters.
+                    - FB.num_separable_filters() == the number of separable filters necessary to
+                      represent all the filters in FB.get_filters().
+        !*/
+
+        class fhog_filterbank 
+        {
+            /*!
+                WHAT THIS OBJECT REPRESENTS
+                    This object represents a HOG filter bank.  That is, the classifier that
+                    is slid over a HOG pyramid is a set of 31 linear filters, each
+                    get_fhog_window_width() rows by get_fhog_window_height() columns in
+                    size.  This object contains that set of 31 filters.  
+            !*/
+
+        public:
+            unsigned long get_num_dimensions(
+            ) const;
+            /*!
+                ensures
+                    - Returns the total number of values in the filters.  
+            !*/
+
+            const std::vector<matrix<float> >& get_filters(
+            ) const; 
+            /*!
+                ensures
+                    - returns the set of 31 HOG filters in this object.
+            !*/
+
+            unsigned long num_separable_filters(
+            ) const;
+            /*!
+                ensures
+                    - returns the number of separable filters necessary to represent all
+                      the filters in get_filters().
+            !*/
+        };
+
+        void detect (
+            const fhog_filterbank& w,
+            std::vector<std::pair<double, rectangle> >& dets,
+            const double thresh
+        ) const;
+        /*!
+            requires
+                - w.get_num_dimensions() == get_num_dimensions()
+                - is_loaded_with_image() == true
+            ensures
+                - Scans the HOG filter defined by w over the HOG pyramid that was populated
+                  by the last call to load() and stores all object detections into #dets.  
+                - for all valid i:
+                    - #dets[i].second == The object box which produced this detection.  This rectangle gives
+                      the location of the detection.  Note that the rectangle will have been converted back into
+                      the original image input space.  That is, if this detection was made at a low level in the
+                      image pyramid then the object box will have been automatically mapped up the pyramid layers
+                      to the original image space.  Or in other words, if you plot #dets[i].second on top of the 
+                      image given to load() it will show up in the right place.
+                    - #dets[i].first == The score for this detection.  This value is equal to dot(w, feature vector
+                      for this sliding window location).
+                    - #dets[i].first >= thresh
+                - #dets will be sorted in descending order. (i.e.  #dets[i].first >= #dets[j].first for all i, and j>i)
+                - Elements of w beyond index get_num_dimensions()-1 are ignored.  I.e. only the first
+                  get_num_dimensions() are used.
+                - Note that no form of non-max suppression is performed.  If a window has a score >= thresh
+                  then it is reported in #dets.
+        !*/
+
+        void detect (
+            const feature_vector_type& w,
+            std::vector<std::pair<double, rectangle> >& dets,
+            const double thresh
+        ) const;
+        /*!
+            requires
+                - w.get_num_dimensions() >= get_num_dimensions()
+                - is_loaded_with_image() == true
+            ensures
+                - performs: detect(build_fhog_filterbank(w), dets, thresh)
+        !*/
+
+        void get_feature_vector (
+            const full_object_detection& obj,
+            feature_vector_type& psi
+        ) const;
+        /*!
+            requires
+                - obj.num_parts() == 0 
+                - is_loaded_with_image() == true
+                - psi.size() >= get_num_dimensions()
+                  (i.e. psi must have preallocated its memory before this function is called)
+            ensures
+                - This function allows you to determine the feature vector used for an
+                  object detection output from detect().  Note that this vector is
+                  added to psi.  Note also that you must use get_full_object_detection() to
+                  convert a rectangle from detect() into the needed full_object_detection.
+                - The dimensionality of the vector added to psi is get_num_dimensions().  This
+                  means that elements of psi after psi(get_num_dimensions()-1) are not modified.
+                - Since scan_fhog_pyramid only searches a limited set of object locations,
+                  not all possible rectangles can be output by detect().  So in the case
+                  where obj.get_rect() could not arise from a call to detect(), this
+                  function will map obj.get_rect() to the nearest possible rectangle and
+                  then add the feature vector for the mapped rectangle into #psi.
+                - get_best_matching_rect(obj.get_rect()) == the rectangle obj.get_rect()
+                  gets mapped to for feature extraction.
+        !*/
+
+        full_object_detection get_full_object_detection (
+            const rectangle& rect,
+            const feature_vector_type& w
+        ) const;
+        /*!
+            ensures
+                - returns full_object_detection(rect)
+                  (This function is here only for compatibility with the scan_image_pyramid
+                  object)
+        !*/
+
+        const rectangle get_best_matching_rect (
+            const rectangle& rect
+        ) const;
+        /*!
+            ensures
+                - Since scan_fhog_pyramid only searches a limited set of object locations,
+                  not all possible rectangles can be represented.  Therefore, this function
+                  allows you to supply a rectangle and obtain the nearest possible
+                  candidate object location rectangle.
+        !*/
+
+        double get_nuclear_norm_regularization_strength (
+        ) const;
+        /*!
+            ensures
+                - If the number of separable filters in a fhog_filterbank is small then the
+                  filter bank can be scanned over an image much faster than a normal set of
+                  31 filters.  Therefore, this object provides the option to encourage
+                  machine learning methods that learn a HOG filter bank (i.e.
+                  structural_object_detection_trainer) to select filter banks that have
+                  this beneficial property.  In particular, the value returned by
+                  get_nuclear_norm_regularization_strength() is a multiplier on a nuclear
+                  norm regularizer which will encourage the selection of filters that use a
+                  small number of separable components.  Larger values encourage tend to
+                  give a smaller number of separable filters. 
+                - if (get_nuclear_norm_regularization_strength() == 0) then
+                    - This feature is disabled
+                - else
+                    - A nuclear norm regularizer will be added when
+                      structural_object_detection_trainer is used to learn a HOG filter
+                      bank.  Note that this can make the training process take
+                      significantly longer (but can result in faster object detectors).
+        !*/
+
+        void set_nuclear_norm_regularization_strength (
+            double strength
+        );
+        /*!
+            requires
+                - strength >= 0
+            ensures
+                - #get_nuclear_norm_regularization_strength() == strength
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    void serialize (
+        const scan_fhog_pyramid<T>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support 
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    void deserialize (
+        scan_fhog_pyramid<T>& item,
+        std::istream& in 
+    );
+    /*!
+        provides deserialization support 
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Pyramid_type
+        >
+    matrix<unsigned char> draw_fhog (
+        const object_detector<scan_fhog_pyramid<Pyramid_type> >& detector,
+        const long cell_draw_size = 15
+    );
+    /*!
+        requires
+            - detector.get_w().size() >= detector.get_scanner().get_num_dimensions()
+              (i.e. the detector must have been populated with a HOG filter)
+        ensures
+            - Converts the HOG filters in the given detector into an image suitable for
+              display on the screen.  In particular, we draw all the HOG cells into a
+              grayscale image in a way that shows the magnitude and orientation of the
+              gradient energy in each cell.  The resulting image is then returned.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Pyramid_type
+        >
+    unsigned long num_separable_filters (
+        const object_detector<scan_fhog_pyramid<Pyramid_type> >& detector
+    );
+    /*!
+        requires
+            - detector.get_w().size() >= detector.get_scanner().get_num_dimensions()
+              (i.e. the detector must have been populated with a HOG filter)
+        ensures
+            - Returns the number of separable filters necessary to represent the HOG
+              filters in the given detector.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SCAN_fHOG_PYRAMID_ABSTRACT_H__
+
+