Added the scan_image_pyramid object.

51c0c148 · Davis King · 0aac2844 · 51c0c148 · 51c0c148 · 51c0c148
Commit 51c0c148 authored Sep 08, 2011 by Davis King
Showing with 1001 additions and 0 deletions

image_processing.h dlib/image_processing.h +1 -0

scan_image_pyramid.h dlib/image_processing/scan_image_pyramid.h +653 -0

scan_image_pyramid_abstract.h dlib/image_processing/scan_image_pyramid_abstract.h +347 -0

No files found.
--- a/dlib/image_processing.h
+++ b/dlib/image_processing.h
@@ -4,6 +4,7 @@
 #define DLIB_IMAGE_PROCESSInG_H___ 
 #include "image_processing/scan_image.h"
+#include "image_processing/scan_image_pyramid.h"
 #endif // DLIB_IMAGE_PROCESSInG_H___

--- a/dlib/image_processing/scan_image_pyramid.h
+++ b/dlib/image_processing/scan_image_pyramid.h
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_SCAN_IMaGE_PYRAMID_H__
+#define DLIB_SCAN_IMaGE_PYRAMID_H__
+#include "scan_image_pyramid_abstract.h"
+#include "../matrix.h"
+#include "../geometry.h"
+#include "../image_processing.h"
+#include "../array2d.h"
+#include <vector>
+namespace dlib
+{
+// ----------------------------------------------------------------------------------------
+    template <
+        typename Pyramid_type,
+        typename Feature_extractor_type
+        >
+    class scan_image_pyramid : noncopyable
+    {
+    public:
+        typedef matrix<double,0,1> feature_vector_type;
+        typedef Pyramid_type pyramid_type;
+        typedef Feature_extractor_type feature_extractor_type;
+        scan_image_pyramid (
+        );  
+        template <
+            typename image_type
+            >
+        void load (
+            const image_type& img
+        );
+        inline bool is_loaded_with_image (
+        ) const;
+        inline void copy_configuration(
+            const feature_extractor_type& fe
+        );
+        inline void copy_configuration (
+            const scan_image_pyramid& item
+        );
+        void add_detection_template (
+            const rectangle& object_box,
+            const std::vector<rectangle>& feature_extraction_regions 
+        );
+        inline unsigned long get_num_detection_templates (
+        ) const;
+        inline unsigned long get_num_components_per_detection_template (
+        ) const;
+        inline long get_num_dimensions (
+        ) const;
+        unsigned long get_max_pyramid_levels (
+        ) const;
+        void set_max_pyramid_levels (
+            unsigned long max_levels
+        );
+        inline unsigned long get_max_detections_per_template (
+        ) const;
+        void set_max_detections_per_template (
+            unsigned long max_dets
+        );
+        void detect (
+            const feature_vector_type& w,
+            std::vector<std::pair<double, rectangle> >& dets,
+            const double thresh
+        ) const;
+        void get_feature_vector (
+            const std::vector<rectangle>& rects,
+            feature_vector_type& psi,
+            std::vector<rectangle>& mapped_rects
+        ) const;
+        template <typename T, typename U>
+        friend void serialize (
+            const scan_image_pyramid<T,U>& item,
+            std::ostream& out
+        );
+        template <typename T, typename U>
+        friend void deserialize (
+            scan_image_pyramid<T,U>& item,
+            std::istream& in 
+        );
+    private:
+        static bool compare_pair_rect (
+            const std::pair<double, rectangle>& a,
+            const std::pair<double, rectangle>& b
+        )
+        {
+            return a.first < b.first;
+        }
+        struct detection_template
+        {
+            rectangle object_box; // always centered at (0,0)
+            std::vector<rectangle> rects; // template with respect to (0,0)
+        };
+        friend void serialize(const detection_template& item, std::ostream& out)
+        {
+            serialize(item.object_box, out);
+            serialize(item.rects, out);
+        }
+        friend void deserialize(detection_template& item, std::istream& in)
+        {
+            deserialize(item.object_box, in);
+            deserialize(item.rects, in);
+        }
+        feature_extractor_type feats_config; // just here to hold configuration.  use it to populate the feats elements.
+        typename array<feature_extractor_type>::kernel_2a feats;
+        std::vector<detection_template> det_templates;
+        unsigned long max_dets_per_template;
+        unsigned long max_pyramid_levels;
+    };
+// ----------------------------------------------------------------------------------------
+    template <typename T, typename U>
+    void serialize (
+        const scan_image_pyramid<T,U>& item,
+        std::ostream& out
+    )
+    {
+        serialize(item.feats_config, out);
+        serialize(item.feats, out);
+        serialize(item.det_templates, out);
+        serialize(item.max_dets_per_template, out);
+        serialize(item.max_pyramid_levels, out);
+    }
+// ----------------------------------------------------------------------------------------
+    template <typename T, typename U>
+    void deserialize (
+        scan_image_pyramid<T,U>& item,
+        std::istream& in 
+    )
+    {
+        deserialize(item.feats_config, in);
+        deserialize(item.feats, in);
+        deserialize(item.det_templates, in);
+        deserialize(item.max_dets_per_template, in);
+        deserialize(item.max_pyramid_levels, in);
+    }
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+//                         scan_image_pyramid member functions
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+    template <
+        typename Pyramid_type,
+        typename Feature_extractor_type
+        >
+    scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
+    scan_image_pyramid (
+    ) : 
+        max_dets_per_template(2000),
+        max_pyramid_levels(1000)
+    {
+    }
+// ----------------------------------------------------------------------------------------
+    template <
+        typename Pyramid_type,
+        typename Feature_extractor_type
+        >
+    template <
+        typename image_type
+        >
+    void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
+    load (
+        const image_type& img
+    )
+    {
+        int levels = 0;
+        rectangle rect = get_rect(img);
+        // figure out how many pyramid levels we should be using based on the image size
+        pyramid_type pyr;
+        while (rect.width() > 20 && rect.height() > 20)
+        {
+            rect = pyr.rect_down(rect);
+            ++levels;
+            if (levels >= max_pyramid_levels)
+                break;
+        }
+        std::cout << "levels: " << levels << std::endl;
+        if (feats.max_size() < levels)
+            feats.set_max_size(levels);
+        feats.set_size(levels);
+        for (unsigned long i = 0; i < feats.size(); ++i)
+            feats[i].copy_configuration(feats_config);
+        // build our feature pyramid
+        feats[0].load(img);
+        if (feats.size() > 1)
+        {
+            image_type temp1, temp2;
+            pyr(img, temp1);
+            feats[1].load(temp1);
+            swap(temp1,temp2);
+            for (unsigned long i = 2; i < feats.size(); ++i)
+            {
+                pyr(temp2, temp1);
+                feats[i].load(temp1);
+                swap(temp1,temp2);
+            }
+        }
+    }
+// ----------------------------------------------------------------------------------------
+    template <
+        typename Pyramid_type,
+        typename Feature_extractor_type
+        >
+    unsigned long scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
+    get_max_detections_per_template (
+    ) const
+    {
+        return max_dets_per_template;
+    }
+// ----------------------------------------------------------------------------------------
+    template <
+        typename Pyramid_type,
+        typename Feature_extractor_type
+        >
+    void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
+    set_max_detections_per_template (
+        unsigned long max_dets
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(max_dets > 0 ,
+            "\t void scan_image_pyramid::set_max_detections_per_template()"
+            << "\n\t The max number of possible detections can't be zero. "
+            << "\n\t max_dets: " << max_dets
+            << "\n\t this: " << this
+            );
+        max_dets_per_template = max_dets;
+    }
+// ----------------------------------------------------------------------------------------
+    template <
+        typename Pyramid_type,
+        typename Feature_extractor_type
+        >
+    bool scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
+    is_loaded_with_image (
+    ) const
+    {
+        return feats.size() != 0;
+    }
+// ----------------------------------------------------------------------------------------
+    template <
+        typename Pyramid_type,
+        typename Feature_extractor_type
+        >
+    void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
+    copy_configuration(
+        const feature_extractor_type& fe
+    )
+    {
+        return feats_config.copy_configuration(fe);
+    }
+// ----------------------------------------------------------------------------------------
+    template <
+        typename Pyramid_type,
+        typename Feature_extractor_type
+        >
+    void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
+    copy_configuration (
+        const scan_image_pyramid& item
+    )
+    {
+        feats_config.copy_configuration(item.feats_config);
+        det_templates = item.det_templates;
+        max_dets_per_template = item.max_dets_per_template;
+        max_pyramid_levels = item.max_pyramid_levels;
+    }
+// ----------------------------------------------------------------------------------------
+    template <
+        typename Pyramid_type,
+        typename Feature_extractor_type
+        >
+    void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
+    add_detection_template (
+        const rectangle& object_box,
+        const std::vector<rectangle>& feature_extraction_regions 
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT((get_num_detection_templates() == 0 || 
+                        get_num_components_per_detection_template() == feature_extraction_regions.size()) &&
+                        center(object_box) == point(0,0),
+            "\t void scan_image_pyramid::add_detection_template()"
+            << "\n\t The number of rects in this new detection template doesn't match "
+            << "\n\t the number in previous detection templates."
+            << "\n\t get_num_components_per_detection_template(): " << get_num_components_per_detection_template()
+            << "\n\t feature_extraction_regions.size(): " << feature_extraction_regions.size()
+            << "\n\t this: " << this
+            );
+        detection_template temp;
+        temp.object_box = object_box;
+        temp.rects = feature_extraction_regions;
+        det_templates.push_back(temp);
+    }
+// ----------------------------------------------------------------------------------------
+    template <
+        typename Pyramid_type,
+        typename Feature_extractor_type
+        >
+    unsigned long scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
+    get_num_detection_templates (
+    ) const
+    {
+        return det_templates.size();
+    }
+// ----------------------------------------------------------------------------------------
+    template <
+        typename Pyramid_type,
+        typename Feature_extractor_type
+        >
+    unsigned long scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
+    get_num_components_per_detection_template (
+    ) const
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(get_num_detection_templates() > 0 ,
+            "\t unsigned long scan_image_pyramid::get_num_components_per_detection_template()"
+            << "\n\t You need to give some detection templates before calling this function. "
+            << "\n\t get_num_detection_templates(): " << get_num_detection_templates()
+            << "\n\t this: " << this
+            );
+        return det_templates[0].rects.size();
+    }
+// ----------------------------------------------------------------------------------------
+    template <
+        typename Pyramid_type,
+        typename Feature_extractor_type
+        >
+    long scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
+    get_num_dimensions (
+    ) const
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(get_num_detection_templates() > 0 ,
+            "\t long scan_image_pyramid::get_num_dimensions()"
+            << "\n\t You need to give some detection templates before calling this function. "
+            << "\n\t get_num_detection_templates(): " << get_num_detection_templates()
+            << "\n\t this: " << this
+            );
+        return feats_config.get_num_dimensions()*get_num_components_per_detection_template();
+    }
+// ----------------------------------------------------------------------------------------
+    template <
+        typename Pyramid_type,
+        typename Feature_extractor_type
+        >
+    unsigned long scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
+    get_max_pyramid_levels (
+    ) const
+    {
+        return max_pyramid_levels;
+    }
+// ----------------------------------------------------------------------------------------
+    template <
+        typename Pyramid_type,
+        typename Feature_extractor_type
+        >
+    void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
+    set_max_pyramid_levels (
+        unsigned long max_levels
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(max_levels > 0 ,
+            "\t void scan_image_pyramid::set_max_pyramid_levels()"
+            << "\n\t You can't have zero levels. "
+            << "\n\t max_levels: " << max_levels 
+            << "\n\t this: " << this
+            );
+        max_pyramid_levels = max_levels;
+    }
+// ----------------------------------------------------------------------------------------
+    template <
+        typename Pyramid_type,
+        typename Feature_extractor_type
+        >
+    void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
+    detect (
+        const feature_vector_type& w,
+        std::vector<std::pair<double, rectangle> >& dets,
+        const double thresh
+    ) const
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(get_num_detection_templates() > 0 &&
+                    is_loaded_with_image() &&
+                    w.size() >= get_num_dimensions(), 
+            "\t void scan_image_pyramid::detect()"
+            << "\n\t Invalid inputs were given to this function "
+            << "\n\t get_num_detection_templates(): " << get_num_detection_templates()
+            << "\n\t is_loaded_with_image(): " << is_loaded_with_image()
+            << "\n\t w.size():               " << w.size()
+            << "\n\t get_num_dimensions():   " << get_num_dimensions()
+            << "\n\t this: " << this
+            );
+        dets.clear();
+        array<array2d<double> >::kernel_2a saliency_images;
+        saliency_images.set_max_size(get_num_components_per_detection_template());
+        saliency_images.set_size(get_num_components_per_detection_template());
+        std::vector<std::pair<unsigned int,rectangle> > region_rects(get_num_components_per_detection_template()); 
+        pyramid_type pyr;
+        std::vector<std::pair<double, point> > point_dets;
+        // for all pyramid levels
+        for (unsigned long l = 0; l < feats.size(); ++l)
+        {
+            for (unsigned long i = 0; i < saliency_images.size(); ++i)
+                saliency_images[i].set_size(feats[l].nr(), feats[l].nc());
+            // build saliency images for pyramid level l 
+            for (long r = 0; r < feats[l].nr(); ++r)
+            {
+                for (long c = 0; c < feats[l].nc(); ++c)
+                {
+                    const typename feature_extractor_type::descriptor_type& descriptor = feats[l](r,c);
+                    for (unsigned long i = 0; i < saliency_images.size(); ++i)
+                    {
+                        const unsigned long offset = feats_config.get_num_dimensions()*i;
+                        double sum = 0;
+                        for (unsigned long k = 0; k < descriptor.size(); ++k)
+                        {
+                            sum += w(descriptor[k].first + offset)*descriptor[k].second;
+                        }
+                        saliency_images[i][r][c] = sum;
+                    }
+                }
+            }
+            // now search the saliency images
+            for (unsigned long i = 0; i < det_templates.size(); ++i)
+            {
+                const point offset = -feats[l].image_to_feat_space(point(0,0));
+                for (unsigned long j = 0; j < region_rects.size(); ++j)
+                    region_rects[j] = make_pair(j, translate_rect(feats[l].image_to_feat_space(det_templates[i].rects[j]),offset)); 
+                scan_image(point_dets, saliency_images, region_rects, thresh, max_dets_per_template); 
+                // convert all the point detections into rectangles at the original image scale and coordinate system
+                for (unsigned long j = 0; j < point_dets.size(); ++j)
+                {
+                    const double score = point_dets[j].first;
+                    point p = point_dets[j].second;
+                    p = feats[l].feat_to_image_space(p);
+                    rectangle rect = translate_rect(det_templates[i].object_box, p);
+                    rectangle old_rect = rect; // TODO  remove later
+                    rect = pyr.rect_up(rect, l);
+                    DLIB_CASSERT(pyr.rect_down(rect,l) == old_rect,"");
+                    dets.push_back(std::make_pair(score, rect));
+                    {
+                        rectangle r = pyr.rect_down(rect,l);
+                        const point origin = center(r);
+                        DLIB_CASSERT(origin == p , origin << "  " << p);
+                        DLIB_CASSERT(feats[l].image_to_feat_space(origin) == point_dets[j].second, "");
+                    }
+                }
+            }
+        }
+        std::cout << "THRESH: " << thresh << std::endl;
+        std::cout << "NUM POINT DETS FOUND: " << dets.size() << std::endl;
+        std::sort(dets.rbegin(), dets.rend(), compare_pair_rect);
+    }
+// ----------------------------------------------------------------------------------------
+    template <
+        typename Pyramid_type,
+        typename Feature_extractor_type
+        >
+    void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
+    get_feature_vector (
+        const std::vector<rectangle>& rects,
+        feature_vector_type& psi,
+        std::vector<rectangle>& mapped_rects
+    ) const
+    {
+        psi = 0;
+        mapped_rects.clear();
+        pyramid_type pyr;
+        for (unsigned long i = 0; i < rects.size(); ++i)
+        {
+            // Figure out the pyramid level which best matches rects[i] against one of our 
+            // detection template object boxes.
+            unsigned long best_level = 0;
+            double match_score = std::numeric_limits<double>::infinity();
+            detection_template best_template;
+            rectangle rect = rects[i];
+            const dlib::vector<double,2> p(rect.width(), rect.height());
+            // for all the levels
+            for (unsigned long l = 0; l < feats.size(); ++l)
+            {
+                // Run the center point through the feature/image space transformation just to make
+                // sure we exactly replicate the procedure for shifting an object_box used elsewhere 
+                // in this file.
+                const point origin = feats[l].feat_to_image_space(feats[l].image_to_feat_space(center(pyr.rect_down(rect,l))));
+                for (unsigned long t = 0; t < det_templates.size(); ++t)
+                {
+                    // Map this detection template into the normal image space and see how
+                    // close it is to the rect we are looking for.  We do the translation here
+                    // because the rect_up() routine takes place using integer arithmetic and
+                    // could potentially give slightly different results with and without the
+                    // translation.
+                    rectangle mapped_rect = translate_rect(det_templates[t].object_box, origin);
+                    mapped_rect = pyr.rect_up(mapped_rect, l);
+                    const dlib::vector<double,2> p2(mapped_rect.width(),
+                                                    mapped_rect.height());
+                    if ((p-p2).length() < match_score)
+                    {
+                        match_score = (p-p2).length();
+                        best_level = l;
+                        best_template = det_templates[t];
+                    }
+                }
+            }
+            // Now get the features out of feats[best_level].  But first translate best_template 
+            // into the right spot (it should be centered at the location determined by rects[i])
+            // and convert it into the feature image coordinate system.
+            rect = pyr.rect_down(rects[i],best_level);
+            const point offset = -feats[best_level].image_to_feat_space(point(0,0));
+            const point origin = feats[best_level].image_to_feat_space(center(rect)) + offset;
+            for (unsigned long k = 0; k < best_template.rects.size(); ++k)
+            {
+                rectangle temp = best_template.rects[k];
+                temp = feats[best_level].image_to_feat_space(temp);
+                temp = translate_rect(temp, origin);
+                temp = get_rect(feats[best_level]).intersect(temp);
+                best_template.rects[k] = temp;
+            }
+            // The input rectangle was mapped to one of the detection templates.  Reverse the process
+            // to figure out what the mapped rectangle is in the original input space.
+            rectangle mapped_rect = translate_rect(best_template.object_box, feats[best_level].feat_to_image_space(origin-offset));
+            mapped_rect = pyr.rect_up(mapped_rect, best_level);
+            mapped_rects.push_back(mapped_rect);
+            for (unsigned long j = 0; j < best_template.rects.size(); ++j)
+            {
+                rect = best_template.rects[j];
+                const unsigned long template_region_id = j;
+                const unsigned long offset = feats_config.get_num_dimensions()*template_region_id;
+                for (long r = rect.top(); r <= rect.bottom(); ++r)
+                {
+                    for (long c = rect.left(); c <= rect.right(); ++c)
+                    {
+                        const typename feature_extractor_type::descriptor_type& descriptor = feats[best_level](r,c);
+                        for (unsigned long k = 0; k < descriptor.size(); ++k)
+                        {
+                            psi(descriptor[k].first + offset) += descriptor[k].second;
+                        }
+                    }
+                }
+            }
+        }
+    }
+// ----------------------------------------------------------------------------------------
+}
+#endif // DLIB_SCAN_IMaGE_PYRAMID_H__
--- a/dlib/image_processing/scan_image_pyramid_abstract.h
+++ b/dlib/image_processing/scan_image_pyramid_abstract.h
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_SCAN_IMaGE_PYRAMID_ABSTRACT_H__
+#ifdef DLIB_SCAN_IMaGE_PYRAMID_ABSTRACT_H__
+#include "../matrix.h"
+#include "../geometry.h"
+#include "../image_processing.h"
+#include "../array2d.h"
+#include <vector>
+namespace dlib
+{
+// ----------------------------------------------------------------------------------------
+    template <
+        typename Pyramid_type,
+        typename Feature_extractor_type
+        >
+    class scan_image_pyramid : noncopyable
+    {
+        /*!
+            REQUIREMENTS ON Pyramid_type
+                - must be one of the pyramid_down objects defined in 
+                  dlib/image_transforms/image_pyramid_abstract.h or an object with
+                  a compatible interface
+            REQUIREMENTS ON Feature_extractor_type
+                - must be an object with an interface compatible with the hashed_feature_image 
+                  object defined in dlib/image_keypoint/hashed_feature_image_abstract.h.
+            INITIAL VALUE
+                - get_num_detection_templates() == 0
+                - is_loaded_with_image() == false
+                - get_max_detections_per_template() == 2000
+                - get_max_pyramid_levels() == 1000
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for running a sliding window classifier over
+                an image pyramid.  This object can also be understood as a general 
+                tool for implementing the spatial pyramid models described in the paper:
+                    Beyond Bags of Features: Spatial Pyramid Matching for Recognizing 
+                    Natural Scene Categories by Svetlana Lazebnik, Cordelia Schmid, 
+                    and Jean Ponce
+                The sliding window classifiers used by this object have three parts: 
+                   1. The underlying feature extraction provided by Feature_extractor_type
+                      objects, which associate a vector with each location in an image.
+                   2. A detection template.  This is a rectangle which defines the shape of a 
+                      sliding window (the object_box), as well as a set of rectangles which
+                      envelop it.  This set of enveloping rectangles defines the spatial
+                      structure of the overall feature extraction within a sliding window.  
+                      In particular, each location of a sliding window has a feature vector
+                      associated with it.  This feature vector is defined as follows:
+                        - Let N denote the number of enveloping rectangles.
+                        - Let M denote the dimensionality of the vectors output by Feature_extractor_type
+                          objects.
+                        - Let F(i) == the M dimensional vector which is the sum of all vectors 
+                          given by our Feature_extractor_type object inside the ith enveloping 
+                          rectangle.
+                        - Then the feature vector for a sliding window is an M*N dimensional vector
+                          [F(1) F(2) F(3) ... F(N)] (i.e. it is a concatenation of the N vectors).
+                          This feature vector can be though of as a collection of N "bags of features",
+                          each bag coming from a spatial location determined one of the enveloping 
+                          rectangles. 
+                   3. A weight vector and a threshold value.  The dot product between the weight
+                      vector and the feature vector for a sliding window location gives the score 
+                      of the window.  If this score is greater than the threshold value then the 
+                      window location is output as a detection.
+                Finally, the sliding window classifiers described above are applied to every level 
+                of an image pyramid.  
+        !*/
+    public:
+        typedef matrix<double,0,1> feature_vector_type;
+        typedef Pyramid_type pyramid_type;
+        typedef Feature_extractor_type feature_extractor_type;
+        scan_image_pyramid (
+        );  
+        /*!
+            ensures
+                - this object is properly initialized
+        !*/
+        template <
+            typename image_type
+            >
+        void load (
+            const image_type& img
+        );
+        /*!
+            requires
+                - image_type must be a type with the following properties:
+                    - image_type is default constructable.
+                    - image_type is swappable by the global swap() function.
+                    - image_type logically represents some kind of image and therefore
+                      has .nr() and .nc() member functions.  .nr() should return the
+                      number of rows while .nc() returns the number of columns.
+                    - image_type objects can be loaded into Feature_extractor_type
+                      objects via Feature_extractor_type::load().
+                    - image_type objects can be used with Pyramid_type.  That is,
+                      if pyr is an object of type Pyramid_type while img1 and img2
+                      are objects of image_type.  Then pyr(img1,img2) should be
+                      a valid expression which downsamples img1 into img2.
+            ensures
+                - #is_loaded_with_image() == true
+                - This object is ready to run sliding window classifiers over img.  Call
+                  detect() to do this.
+        !*/
+        bool is_loaded_with_image (
+        ) const;
+        /*!
+            ensures
+                - returns true if this object has been loaded with an image to process
+                  and false otherwise.
+        !*/
+        void copy_configuration(
+            const feature_extractor_type& fe
+        );
+        /*!
+            ensures
+                - Let BASE_FE denote the feature_extractor_type object used
+                  internally for local feature extraction.  Then this function
+                  performs BASE_FE.copy_configuration(fe)
+                  (i.e. this function allows you to configure the parameters of the 
+                  underlying feature extractor used by a scan_image_pyramid object)
+        !*/
+        void copy_configuration (
+            const scan_image_pyramid& item
+        );
+        /*!
+            ensures
+                - copies all the state information of item into *this, except for state 
+                  information populated by load().  More precisely, given two scan_image_pyramid 
+                  objects S1 and S2, the following sequence of instructions should always 
+                  result in both of them having the exact same state.
+                    S2.copy_configuration(S1);
+                    S1.load(img);
+                    S2.load(img);
+        !*/
+        void add_detection_template (
+            const rectangle& object_box,
+            const std::vector<rectangle>& feature_extraction_regions 
+        );
+        /*!
+            requires
+                - center(object_box) == point(0,0),
+                - if (get_num_detection_templates() > 0) then
+                    - get_num_components_per_detection_template() == feature_extraction_regions.size()
+                      (i.e. if you already have detection templates in this object, then
+                      any new detection template must declare a consistent number of 
+                      feature extraction regions)
+            ensures
+                - Adds another detection template to this object.  In particular, object_box 
+                  defines the size and shape of a sliding window while feature_extraction_regions 
+                  defines the locations for feature extraction as discussed in the WHAT THIS 
+                  OBJECT REPRESENTS section above.  Note also that the locations of the feature 
+                  extraction regions are relative to the object_box.  
+                - #get_num_detection_templates() == get_num_detection_templates() + 1
+                - The order of rectangles in feature_extraction_regions matters.  Recall that
+                  each rectangle gets its own set of features.  So given two different templates, 
+                  their ith rectangles will both share the same part of the weight vector (w) 
+                  supplied to detect().  So there should be some reasonable correspondence 
+                  between the rectangle ordering in different detection templates.  For,
+                  example, different detection templates should place corresponding 
+                  feature extraction regions in roughly the same part of the object_box.
+        !*/
+        unsigned long get_num_detection_templates (
+        ) const;
+        /*!
+            ensures
+                - returns the number of detection templates in this object
+        !*/
+        unsigned long get_num_components_per_detection_template (
+        ) const;
+        /*!
+            requires
+                - get_num_detection_templates() > 0
+            ensures
+                - A detection template is a rectangle which defines the shape of a 
+                  sliding window (the object_box), as well as a set of rectangles which
+                  envelop it.  This function returns the number of enveloping rectangles
+                  in the detection templates used by this object.
+        !*/
+        long get_num_dimensions (
+        ) const;
+        /*!
+            requires
+                - get_num_detection_templates() > 0
+            ensures
+                - returns the number of dimensions in the feature vector for a sliding window
+                  location.  This value is the dimensionality of the underlying feature vectors 
+                  produced by Feature_extractor_type times get_num_components_per_detection_template().
+        !*/
+        unsigned long get_max_pyramid_levels (
+        ) const;
+        /*!
+            ensures
+                - returns the maximum number of image pyramid levels this object will use.
+                  Note that #get_max_pyramid_levels() == 1 indicates that no image pyramid
+                  will be used at all.  That is, only the original image will be processed
+                  and no lower scale versions will be created.  
+        !*/
+        void set_max_pyramid_levels (
+            unsigned long max_levels
+        );
+        /*!
+            requires
+                - max_levels > 0
+            ensures
+                - #get_max_pyramid_levels() == max_levels
+        !*/
+        unsigned long get_max_detections_per_template (
+        ) const;
+        /*!
+            ensures
+                - For each image pyramid layer and detection template, this object scans a sliding
+                  window classifier over an image and produces a number of detections.  This
+                  function returns a number which defines a hard upper limit on the number of
+                  detections allowed by a single scan.  This means that the total number of
+                  possible detections produced by detect() is get_max_detections_per_template()*
+                  get_num_detection_templates()*(number of image pyramid layers).
+        !*/
+        void set_max_detections_per_template (
+            unsigned long max_dets
+        );
+        /*!
+            requires
+                - max_dets > 0
+            ensures
+                - #get_max_detections_per_template() == max_dets
+        !*/
+        void detect (
+            const feature_vector_type& w,
+            std::vector<std::pair<double, rectangle> >& dets,
+            const double thresh
+        ) const;
+        /*!
+            requires
+                - w.size() >= get_num_dimensions()
+                - is_loaded_with_image() == true
+                - get_num_detection_templates() > 0
+            ensures
+                - Scans all the detection templates over all pyramid layers as discussed in the 
+                  WHAT THIS OBJECT REPRESENTS section and stores all detections into #dets.
+                - for all valid i:
+                    - #dets[i].second == The object box which produced this detection.  This rectangle gives
+                      the location of the detection.  Note that the rectangle will have been converted back into
+                      the original image input space.  That is, if this detection was made at a low level in the
+                      image pyramid then the object box will have been automatically mapped up the pyramid layers
+                      to the original image space.  Or in other words, if you plot #dets[i].second on top of the 
+                      image given to load() it will show up in the right place.
+                    - #dets[i].first == The score for this detection.  This value is equal to dot(w, feature vector
+                      for this sliding window location).
+                    - #dets[i].first >= thresh
+                - #dets will be sorted in descending order. (i.e.  #dets[i].first >= #dets[j].first for all i, and j>i)
+                - Elements of w beyond index get_num_dimensions()-1 are ignored.  I.e. only the first
+                  get_num_dimensions() are used.
+                - Note that no form of non-max suppression is performed.  If a window has a score >= thresh
+                  then it is reported in #dets (assuming the limit imposed by get_max_detections_per_template() hasn't 
+                  been reached).
+        !*/
+        void get_feature_vector (
+            const std::vector<rectangle>& rects,
+            feature_vector_type& psi,
+            std::vector<rectangle>& mapped_rects
+        ) const;
+        /*!
+            requires
+                - is_loaded_with_image() == true
+                - get_num_detection_templates() > 0
+                - psi.size() >= get_num_dimensions()
+            ensures
+                - This function allows you to determine the feature vector used for a sliding window location
+                  or the sum of such vectors for a set of locations.
+                - if (rects was produced by a call to detect(), i.e. rects contains the contents of dets) then
+                    - #psi == the sum of feature vectors corresponding to the sliding window locations contained
+                      in rects.
+                    - #mapped_rects == rects
+                    - Let w denote the w vector given to detect(), then we have:
+                        - dot(w,#psi) == sum of scores of the dets produced by detect()
+                - else
+                    - Since scan_image_pyramid is a sliding window classifier system, not all possible rectangles can 
+                      be output by detect().  So in the case where rects contains rectangles which could not arise
+                      from a call to detect(), this function will map the rectangles in rects to the nearest possible 
+                      object boxes and then store the sum of feature vectors for the mapped rectangles into #psi.
+                    - for all valid i: #mapped_rects[i] == the rectangle rects[i] gets mapped to for feature extraction.
+                - #mapped_rects.size() == rects.size()
+        !*/
+    };
+// ----------------------------------------------------------------------------------------
+    template <
+        typename Pyramid_type,
+        typename Feature_extractor_type
+        >
+    void serialize (
+        const scan_image_pyramid<Pyramid_type,Feature_extractor_type>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support 
+    !*/
+    template <
+        typename Pyramid_type,
+        typename Feature_extractor_type
+        >
+    void deserialize (
+        scan_image_pyramid<Pyramid_type,Feature_extractor_type>& item,
+        std::istream& in 
+    );
+    /*!
+        provides deserialization support 
+    !*/
+// ----------------------------------------------------------------------------------------
+}
+#endif // DLIB_SCAN_IMaGE_PYRAMID_ABSTRACT_H__