Refined the scan_image_pyramid interface a little. In particular, I split the

get_feature_vector() method into two separate functions so the interface is a little simpler and more flexible.

Refined the scan_image_pyramid interface a little. In particular, I split the
get_feature_vector() method into two separate functions so the interface is a little simpler and more flexible.
3ebf0f2e · Davis King · 578322dc · 3ebf0f2e · 3ebf0f2e · 3ebf0f2e
Commit 3ebf0f2e authored Dec 24, 2011 by Davis King
3 changed files
--- a/dlib/image_processing/scan_image_pyramid.h
+++ b/dlib/image_processing/scan_image_pyramid.h
@@ -86,8 +86,11 @@ namespace dlib

        void get_feature_vector (
            const std::vector<rectangle>& rects,
-            feature_vector_type& psi,
-            std::vector<rectangle>& mapped_rects
+            feature_vector_type& psi
+        ) const;
+
+        const rectangle get_best_matching_rect (
+            const rectangle& rect
        ) const;

        template <typename T, typename U>
@@ -128,6 +131,13 @@ namespace dlib
            deserialize(item.rects, in);
        }

+        void get_mapped_rect_and_metadata (
+            rectangle rect,
+            rectangle& mapped_rect,
+            detection_template& best_template,
+            unsigned long& best_level
+        ) const;
+

        feature_extractor_type feats_config; // just here to hold configuration.  use it to populate the feats elements.
        typename array<feature_extractor_type>::kernel_2a feats;
@@ -533,40 +543,48 @@ namespace dlib
        typename Pyramid_type,
        typename Feature_extractor_type
        >
-    void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
-    get_feature_vector (
-        const std::vector<rectangle>& rects,
-        feature_vector_type& psi,
-        std::vector<rectangle>& mapped_rects
+    const rectangle scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
+    get_best_matching_rect (
+        const rectangle& rect
    ) const
    {
        // make sure requires clause is not broken
        DLIB_ASSERT(get_num_detection_templates() > 0 &&
-                    is_loaded_with_image() &&
-                    psi.size() >= get_num_dimensions(), 
-            "\t void scan_image_pyramid::get_feature_vector()"
+                    is_loaded_with_image(),
+            "\t const rectangle scan_image_pyramid::get_best_matching_rect()"
            << "\n\t Invalid inputs were given to this function "
            << "\n\t get_num_detection_templates(): " << get_num_detection_templates()
            << "\n\t is_loaded_with_image(): " << is_loaded_with_image()
-            << "\n\t psi.size():             " << psi.size()
-            << "\n\t get_num_dimensions():   " << get_num_dimensions()
            << "\n\t this: " << this
            );

-        psi = 0;
+        rectangle mapped_rect;
+        detection_template best_template;
+        unsigned long best_level;
+        get_mapped_rect_and_metadata(rect, mapped_rect, best_template, best_level);
+        return mapped_rect;
+    }

-        mapped_rects.clear();
+// ----------------------------------------------------------------------------------------

-        pyramid_type pyr;
-        for (unsigned long i = 0; i < rects.size(); ++i)
+    template <
+        typename Pyramid_type,
+        typename Feature_extractor_type
+        >
+    void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
+    get_mapped_rect_and_metadata (
+        rectangle rect,
+        rectangle& mapped_rect,
+        detection_template& best_template,
+        unsigned long& best_level
+    ) const
    {
-            // Figure out the pyramid level which best matches rects[i] against one of our 
+        pyramid_type pyr;
+        // Figure out the pyramid level which best matches rect against one of our 
        // detection template object boxes.
-            unsigned long best_level = 0;
+        best_level = 0;
        double match_score = std::numeric_limits<double>::infinity();
-            detection_template best_template;

-            rectangle rect = rects[i];
        const dlib::vector<double,2> p(rect.width(), rect.height());

        // for all the levels
@@ -600,9 +618,9 @@ namespace dlib


        // Now get the features out of feats[best_level].  But first translate best_template 
-            // into the right spot (it should be centered at the location determined by rects[i])
+        // into the right spot (it should be centered at the location determined by rect)
        // and convert it into the feature image coordinate system.
-            rect = pyr.rect_down(rects[i],best_level);
+        rect = pyr.rect_down(rect,best_level);
        const point offset = -feats[best_level].image_to_feat_space(point(0,0));
        const point origin = feats[best_level].image_to_feat_space(center(rect)) + offset;
        for (unsigned long k = 0; k < best_template.rects.size(); ++k)
@@ -616,13 +634,49 @@ namespace dlib

        // The input rectangle was mapped to one of the detection templates.  Reverse the process
        // to figure out what the mapped rectangle is in the original input space.
-            rectangle mapped_rect = translate_rect(best_template.object_box, feats[best_level].feat_to_image_space(origin-offset));
+        mapped_rect = translate_rect(best_template.object_box, feats[best_level].feat_to_image_space(origin-offset));
        mapped_rect = pyr.rect_up(mapped_rect, best_level);
-            mapped_rects.push_back(mapped_rect);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Pyramid_type,
+        typename Feature_extractor_type
+        >
+    void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
+    get_feature_vector (
+        const std::vector<rectangle>& rects,
+        feature_vector_type& psi
+    ) const
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(get_num_detection_templates() > 0 &&
+                    is_loaded_with_image() &&
+                    psi.size() >= get_num_dimensions(), 
+            "\t void scan_image_pyramid::get_feature_vector()"
+            << "\n\t Invalid inputs were given to this function "
+            << "\n\t get_num_detection_templates(): " << get_num_detection_templates()
+            << "\n\t is_loaded_with_image(): " << is_loaded_with_image()
+            << "\n\t psi.size():             " << psi.size()
+            << "\n\t get_num_dimensions():   " << get_num_dimensions()
+            << "\n\t this: " << this
+            );
+
+        psi = 0;
+
+
+        pyramid_type pyr;
+        for (unsigned long i = 0; i < rects.size(); ++i)
+        {
+            rectangle mapped_rect;
+            detection_template best_template;
+            unsigned long best_level;
+            get_mapped_rect_and_metadata (rects[i], mapped_rect, best_template, best_level);

            for (unsigned long j = 0; j < best_template.rects.size(); ++j)
            {
-                rect = best_template.rects[j];
+                const rectangle rect = best_template.rects[j];
                const unsigned long template_region_id = j;
                const unsigned long offset = feats_config.get_num_dimensions()*template_region_id;
                for (long r = rect.top(); r <= rect.bottom(); ++r)

--- a/dlib/image_processing/scan_image_pyramid_abstract.h
+++ b/dlib/image_processing/scan_image_pyramid_abstract.h
@@ -289,10 +289,22 @@ namespace dlib
                  been reached).
        !*/

+        const rectangle get_best_matching_rect (
+            const rectangle& rect
+        ) const;
+        /*!
+            requires
+                - is_loaded_with_image() == true
+                - get_num_detection_templates() > 0
+            ensures
+                - Since scan_image_pyramid is a sliding window classifier system, not all possible rectangles 
+                  can be represented.  Therefore, this function allows you to supply a rectangle and obtain the
+                  nearest possible sliding window rectangle.
+        !*/
+
        void get_feature_vector (
            const std::vector<rectangle>& rects,
-            feature_vector_type& psi,
-            std::vector<rectangle>& mapped_rects
+            feature_vector_type& psi
        ) const;
        /*!
            requires
@@ -305,7 +317,6 @@ namespace dlib
                - if (rects was produced by a call to detect(), i.e. rects contains the contents of dets) then
                    - #psi == the sum of feature vectors corresponding to the sliding window locations contained
                      in rects.
-                    - #mapped_rects == rects
                    - Let w denote the w vector given to detect(), then we have:
                        - dot(w,#psi) == sum of scores of the dets produced by detect()
                - else
@@ -313,8 +324,8 @@ namespace dlib
                      be output by detect().  So in the case where rects contains rectangles which could not arise
                      from a call to detect(), this function will map the rectangles in rects to the nearest possible 
                      object boxes and then store the sum of feature vectors for the mapped rectangles into #psi.
-                    - for all valid i: #mapped_rects[i] == the rectangle rects[i] gets mapped to for feature extraction.
-                - #mapped_rects.size() == rects.size()
+                    - for all valid i: get_best_matching_rect(rects[i]) == the rectangle rects[i] gets mapped to for 
+                      feature extraction.
        !*/

    };

--- a/dlib/svm/structural_svm_object_detection_problem.h
+++ b/dlib/svm/structural_svm_object_detection_problem.h
@@ -156,7 +156,11 @@ namespace dlib
            scanner.load(images[idx]);
            psi.set_size(get_num_dimensions());
            std::vector<rectangle> mapped_rects;
-            scanner.get_feature_vector(truth_rects[idx], psi, mapped_rects);
+            scanner.get_feature_vector(truth_rects[idx], psi);
+            for (unsigned long i = 0; i < truth_rects[idx].size(); ++i)
+            {
+                mapped_rects.push_back(scanner.get_best_matching_rect(truth_rects[idx][i]));
+            }
            psi(scanner.get_num_dimensions()) = -1.0*truth_rects[idx].size();

            // check if any of the boxes overlap.  If they do then it is impossible for
@@ -328,8 +332,7 @@ namespace dlib

            psi.set_size(get_num_dimensions());
            psi = 0;
-            std::vector<rectangle> mapped_rects;
-            scanner.get_feature_vector(final_dets, psi, mapped_rects);
+            scanner.get_feature_vector(final_dets, psi);

            psi(scanner.get_num_dimensions()) = -1.0*final_dets.size();
        }