Added evaluate_detectors() to make it easy to run a bunch of HOG detectors

efficiently, even when their window sizes differ.

Added evaluate_detectors() to make it easy to run a bunch of HOG detectors
efficiently, even when their window sizes differ.
4f275bd7 · Davis King · 09af3eb8 · 4f275bd7 · 4f275bd7
Commit 4f275bd7 authored May 26, 2014 by Davis King
Show whitespace changes
Inline Side-by-side

Showing with 310 additions and 53 deletions

scan_fhog_pyramid.h dlib/image_processing/scan_fhog_pyramid.h +246 -53

scan_fhog_pyramid_abstract.h dlib/image_processing/scan_fhog_pyramid_abstract.h +64 -0

No files found.
--- a/dlib/image_processing/scan_fhog_pyramid.h
+++ b/dlib/image_processing/scan_fhog_pyramid.h
@@ -226,7 +226,6 @@ namespace dlib
                return num;
            }
-        private:
            std::vector<matrix<float> > filters;
            std::vector<std::vector<matrix<float,0,1> > > row_filters, col_filters;
        };
@@ -361,14 +360,6 @@ namespace dlib
            height = temp.height();
        }
-        static bool compare_pair_rect (
-            const std::pair<double, rectangle>& a,
-            const std::pair<double, rectangle>& b
-        )
-        {
-            return a.first < b.first;
-        }
        void get_mapped_rect_and_metadata (
            const unsigned long number_pyramid_levels,
            const rectangle& rect,
@@ -389,12 +380,6 @@ namespace dlib
        typedef array<array2d<float> > fhog_image;
-        static rectangle apply_filters_to_fhog (
-            const fhog_filterbank& w,
-            const fhog_image& feats,
-            array2d<float>& saliency_image
-        );
        feature_extractor_type fe;
        array<fhog_image> feats;
        int cell_size;
@@ -422,11 +407,12 @@ namespace dlib
 // ----------------------------------------------------------------------------------------
-    template <typename T, typename U>
+    namespace impl
-    rectangle scan_fhog_pyramid<T,U>::
+    {
-    apply_filters_to_fhog (
+        template <typename fhog_filterbank>
+        rectangle apply_filters_to_fhog (
            const fhog_filterbank& w,
-        const fhog_image& feats,
+            const array<array2d<float> >& feats,
            array2d<float>& saliency_image
        )
        {
@@ -471,6 +457,7 @@ namespace dlib
            }
            return area;
        }
+    }
 // ----------------------------------------------------------------------------------------
@@ -563,16 +550,23 @@ namespace dlib
 // ----------------------------------------------------------------------------------------
+    namespace impl
+    {
        template <
-        typename Pyramid_type,
+            typename pyramid_type,
+            typename image_type,
            typename feature_extractor_type
            >
-    template <
+        void create_fhog_pyramid (
-        typename image_type
+            const image_type& img,
-        >
+            const feature_extractor_type& fe,
-    void scan_fhog_pyramid<Pyramid_type,feature_extractor_type>::
+            array<array<array2d<float> > >& feats,
-    load (
+            int cell_size,
-        const image_type& img
+            int filter_rows_padding,
+            int filter_cols_padding,
+            unsigned long min_pyramid_layer_width,
+            unsigned long min_pyramid_layer_height,
+            unsigned long max_pyramid_levels
        )
        {
            unsigned long levels = 0;
@@ -591,14 +585,12 @@ namespace dlib
                feats.set_max_size(levels);
            feats.set_size(levels);
-        unsigned long width, height;
-        compute_fhog_window_size(width,height);
            typedef typename image_type::type pixel_type;
            typedef typename image_type::mem_manager_type mem_manager_type;
            // build our feature pyramid
-        fe(img, feats[0], cell_size,height,width);
+            fe(img, feats[0], cell_size,filter_rows_padding,filter_cols_padding);
            DLIB_ASSERT(feats[0].size() == fe.get_num_planes(), 
                "Invalid feature extractor used with dlib::scan_fhog_pyramid.  The output does not have the \n"
                "indicated number of planes.");
@@ -607,17 +599,39 @@ namespace dlib
            {
                array2d<pixel_type,mem_manager_type> temp1, temp2;
                pyr(img, temp1);
-            fe(temp1, feats[1], cell_size,height,width);
+                fe(temp1, feats[1], cell_size,filter_rows_padding,filter_cols_padding);
                swap(temp1,temp2);
                for (unsigned long i = 2; i < feats.size(); ++i)
                {
                    pyr(temp2, temp1);
-                fe(temp1, feats[i], cell_size,height,width);
+                    fe(temp1, feats[i], cell_size,filter_rows_padding,filter_cols_padding);
                    swap(temp1,temp2);
                }
            }
        }
+    }
+// ----------------------------------------------------------------------------------------
+    template <
+        typename Pyramid_type,
+        typename feature_extractor_type
+        >
+    template <
+        typename image_type
+        >
+    void scan_fhog_pyramid<Pyramid_type,feature_extractor_type>::
+    load (
+        const image_type& img
+    )
+    {
+        unsigned long width, height;
+        compute_fhog_window_size(width,height);
+        impl::create_fhog_pyramid<Pyramid_type>(img, fe, feats, cell_size, height,
+            width, min_pyramid_layer_width, min_pyramid_layer_height,
+            max_pyramid_levels);
+    }
 // ----------------------------------------------------------------------------------------
@@ -732,33 +746,36 @@ namespace dlib
 // ----------------------------------------------------------------------------------------
+    namespace impl
+    {
+        inline bool compare_pair_rect (
+            const std::pair<double, rectangle>& a,
+            const std::pair<double, rectangle>& b
+        )
+        {
+            return a.first < b.first;
+        }
        template <
-        typename Pyramid_type,
+            typename pyramid_type,
-        typename feature_extractor_type
+            typename feature_extractor_type,
+            typename fhog_filterbank
            >
-    void scan_fhog_pyramid<Pyramid_type,feature_extractor_type>::
+        void detect_from_fhog_pyramid (
-    detect (
+            const array<array<array2d<float> > >& feats,
+            const feature_extractor_type& fe,
            const fhog_filterbank& w,
-        std::vector<std::pair<double, rectangle> >& dets,
+            const double thresh,
-        const double thresh
+            const unsigned long det_box_height,
-    ) const
+            const unsigned long det_box_width,
+            const int cell_size,
+            const int filter_rows_padding,
+            const int filter_cols_padding,
+            std::vector<std::pair<double, rectangle> >& dets
+        ) 
        {
-        // make sure requires clause is not broken
-        DLIB_ASSERT(is_loaded_with_image() &&
-                    w.get_num_dimensions() == get_num_dimensions(), 
-            "\t void scan_fhog_pyramid::detect()"
-            << "\n\t Invalid inputs were given to this function "
-            << "\n\t is_loaded_with_image(): " << is_loaded_with_image()
-            << "\n\t w.get_num_dimensions(): " << w.get_num_dimensions()
-            << "\n\t get_num_dimensions():   " << get_num_dimensions()
-            << "\n\t this: " << this
-            );
            dets.clear();
-        unsigned long width, height;
-        compute_fhog_window_size(width,height);
            array2d<float> saliency_image;
            pyramid_type pyr;
@@ -775,7 +792,8 @@ namespace dlib
                        // if we found a detection
                        if (saliency_image[r][c] >= thresh)
                        {
-                        rectangle rect = fe.feats_to_image(centered_rect(point(c,r),width-2*padding,height-2*padding), cell_size, height,width);
+                            rectangle rect = fe.feats_to_image(centered_rect(point(c,r),det_box_width,det_box_height), 
+                                cell_size, filter_rows_padding, filter_cols_padding);
                            rect = pyr.rect_up(rect, l);
                            dets.push_back(std::make_pair(saliency_image[r][c], rect));
                        }
@@ -786,6 +804,53 @@ namespace dlib
            std::sort(dets.rbegin(), dets.rend(), compare_pair_rect);
        }
+        inline bool overlaps_any_box (
+            const test_box_overlap& tester,
+            const std::vector<rect_detection>& rects,
+            const rect_detection& rect
+        ) 
+        {
+            for (unsigned long i = 0; i < rects.size(); ++i)
+            {
+                if (tester(rects[i].rect, rect.rect))
+                    return true;
+            }
+            return false;
+        }
+    }
+// ----------------------------------------------------------------------------------------
+    template <
+        typename Pyramid_type,
+        typename feature_extractor_type
+        >
+    void scan_fhog_pyramid<Pyramid_type,feature_extractor_type>::
+    detect (
+        const fhog_filterbank& w,
+        std::vector<std::pair<double, rectangle> >& dets,
+        const double thresh
+    ) const
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_loaded_with_image() &&
+                    w.get_num_dimensions() == get_num_dimensions(), 
+            "\t void scan_fhog_pyramid::detect()"
+            << "\n\t Invalid inputs were given to this function "
+            << "\n\t is_loaded_with_image(): " << is_loaded_with_image()
+            << "\n\t w.get_num_dimensions(): " << w.get_num_dimensions()
+            << "\n\t get_num_dimensions():   " << get_num_dimensions()
+            << "\n\t this: " << this
+            );
+        unsigned long width, height;
+        compute_fhog_window_size(width,height);
+        impl::detect_from_fhog_pyramid<pyramid_type>(feats, fe, w, thresh,
+            height-2*padding, width-2*padding, cell_size, height, width, dets);
+    }
 // ----------------------------------------------------------------------------------------
    template <
@@ -1145,6 +1210,134 @@ namespace dlib
    };
 // ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+    template <
+        typename pyramid_type,
+        typename image_type
+        >
+    void evaluate_detectors (
+        const std::vector<object_detector<scan_fhog_pyramid<pyramid_type> > >& detectors,
+        const image_type& img,
+        std::vector<rect_detection>& dets,
+        const double adjust_threshold = 0
+    )
+    {
+        typedef scan_fhog_pyramid<pyramid_type> scanner_type;
+        dets.clear();
+        if (detectors.size() == 0)
+            return;
+        const int cell_size = detectors[0].get_scanner().get_cell_size();
+        // Find the maximum sized filters and also most extreme pyramiding settings used.
+        unsigned long max_filter_width = 0;
+        unsigned long max_filter_height = 0;
+        unsigned long min_pyramid_layer_width = std::numeric_limits<unsigned long>::max();
+        unsigned long min_pyramid_layer_height = std::numeric_limits<unsigned long>::max();
+        unsigned long max_pyramid_levels = 0;
+        bool all_cell_sizes_the_same = true;
+        for (unsigned long i = 0; i < detectors.size(); ++i)
+        {
+            const scanner_type& scanner = detectors[i].get_scanner();
+            max_filter_width = std::max(max_filter_width, scanner.get_fhog_window_width());
+            max_filter_height = std::max(max_filter_height, scanner.get_fhog_window_height());
+            max_pyramid_levels = std::max(max_pyramid_levels, scanner.get_max_pyramid_levels());
+            min_pyramid_layer_width = std::min(min_pyramid_layer_width, scanner.get_min_pyramid_layer_width());
+            min_pyramid_layer_height = std::min(min_pyramid_layer_height, scanner.get_min_pyramid_layer_height());
+            if (cell_size != scanner.get_cell_size())
+                all_cell_sizes_the_same = false;
+        }
+        std::vector<rect_detection> dets_accum;
+        // Do to the HOG feature extraction to make the fhog pyramid.  Again, note that we
+        // are making a pyramid that will work with any of the detectors.  But only if all
+        // the cell sizes are the same.  If they aren't then we have to calculate the
+        // pyramid for each detector individually.
+        array<array<array2d<float> > > feats;
+        if (all_cell_sizes_the_same)
+        {
+            impl::create_fhog_pyramid<pyramid_type>(img,
+                detectors[0].get_scanner().get_feature_extractor(), feats, cell_size,
+                max_filter_height, max_filter_width, min_pyramid_layer_width,
+                min_pyramid_layer_height, max_pyramid_levels);
+        }
+        std::vector<std::pair<double, rectangle> > temp_dets;
+        for (unsigned long i = 0; i < detectors.size(); ++i)
+        {
+            const scanner_type& scanner = detectors[i].get_scanner();
+            if (!all_cell_sizes_the_same)
+            {
+                impl::create_fhog_pyramid<pyramid_type>(img,
+                    scanner.get_feature_extractor(), feats, scanner.get_cell_size(),
+                    max_filter_height, max_filter_width, min_pyramid_layer_width,
+                    min_pyramid_layer_height, max_pyramid_levels);
+            }
+            const unsigned long det_box_width  = scanner.get_fhog_window_width()  - 2*scanner.get_padding();
+            const unsigned long det_box_height = scanner.get_fhog_window_height() - 2*scanner.get_padding();
+            // A single detector object might itself have multiple weight vectors in it. So
+            // we need to evaluate all of them.
+            for (unsigned d = 0; d < detectors[i].num_detectors(); ++d)
+            {
+                const double thresh = detectors[i].get_processed_w(d).w(scanner.get_num_dimensions());
+                impl::detect_from_fhog_pyramid<pyramid_type>(feats, scanner.get_feature_extractor(),
+                    detectors[i].get_processed_w(d).get_detect_argument(), thresh,
+                    det_box_height, det_box_width, cell_size, max_filter_height,
+                    max_filter_width, temp_dets);
+                for (unsigned long j = 0; j < temp_dets.size(); ++j)
+                {
+                    rect_detection temp;
+                    temp.detection_confidence = temp_dets[j].first-thresh;
+                    temp.weight_index = i;
+                    temp.rect = temp_dets[j].second;
+                    dets_accum.push_back(temp);
+                }
+            }
+        }
+        // Do non-max suppression
+        dets.clear();
+        if (detectors.size() > 1)
+            std::sort(dets_accum.rbegin(), dets_accum.rend());
+        for (unsigned long i = 0; i < dets_accum.size(); ++i)
+        {
+            const test_box_overlap tester = detectors[dets_accum[i].weight_index].get_overlap_tester();
+            if (impl::overlaps_any_box(tester, dets, dets_accum[i]))
+                continue;
+            dets.push_back(dets_accum[i]);
+        }
+    }
+// ----------------------------------------------------------------------------------------
+    template <
+        typename Pyramid_type,
+        typename image_type
+        >
+    std::vector<rectangle> evaluate_detectors (
+        const std::vector<object_detector<scan_fhog_pyramid<Pyramid_type> > >& detectors,
+        const image_type& img,
+        const double adjust_threshold = 0
+    )
+    {
+        std::vector<rectangle> out_dets;
+        std::vector<rect_detection> dets;
+        evaluate_detectors(detectors, img, dets, adjust_threshold);
+        out_dets.reserve(dets.size());
+        for (unsigned long i = 0; i < dets.size(); ++i)
+            out_dets.push_back(dets[i].rect);
+        return out_dets;
+    }
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
 }

--- a/dlib/image_processing/scan_fhog_pyramid_abstract.h
+++ b/dlib/image_processing/scan_fhog_pyramid_abstract.h
@@ -693,6 +693,70 @@ namespace dlib
        provides deserialization support 
    !*/
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+    template <
+        typename pyramid_type,
+        typename image_type
+        >
+    void evaluate_detectors (
+        const std::vector<object_detector<scan_fhog_pyramid<pyramid_type>>>& detectors,
+        const image_type& img,
+        std::vector<rect_detection>& dets,
+        const double adjust_threshold = 0
+    );
+    /*!
+        ensures
+            - This function runs each of the provided object_detector objects over img and
+              stores the resulting detections into #dets.  Importantly, this function is
+              faster than running each detector individually because it computes the HOG
+              features only once and then reuses them for each detector.  However, it is
+              important to note that this speedup is only possible if all the detectors use
+              the same cell_size parameter that determines how HOG features are computed.
+              If different cell_size values are used then this function will not be any
+              faster than running the detectors individually.
+            - This function applies non-max suppression to the outputs from all detectors
+              and therefore none of the outputs will overlap with each other.
+            - To be precise, this function performs object detection on the given image and
+              stores the detected objects into #dets.  In particular, we will have that:
+                - #dets is sorted such that the highest confidence detections come first.
+                  E.g. element 0 is the best detection, element 1 the next best, and so on.
+                - #dets.size() == the number of detected objects.
+                - #dets[i].detection_confidence == The strength of the i-th detection.
+                  Larger values indicate that the detector is more confident that #dets[i]
+                  is a correct detection rather than being a false alarm.  Moreover, the
+                  detection_confidence is equal to the detection value output by the
+                  scanner minus the threshold value stored at the end of the weight vector.
+                - #dets[i].rect == the bounding box for the i-th detection.
+                - The detection #dets[i].rect was produced by detectors[#dets[i].weight_index].
+            - The detection threshold is adjusted by having adjust_threshold added to it.
+              Therefore, an adjust_threshold value > 0 makes detecting objects harder while
+              a negative value makes it easier.  Moreover, the following will be true for
+              all valid i:
+                - #dets[i].detection_confidence >= adjust_threshold
+              This means that, for example, you can obtain the maximum possible number of
+              detections by setting adjust_threshold equal to negative infinity.
+    !*/
+// ----------------------------------------------------------------------------------------
+    template <
+        typename pyramid_type,
+        typename image_type
+        >
+    std::vector<rectangle> evaluate_detectors (
+        const std::vector<object_detector<scan_fhog_pyramid<pyramid_type>>>& detectors,
+        const image_type& img,
+        const double adjust_threshold = 0
+    );
+    /*!
+        ensures
+            - This function just calls the above evaluate_detectors() routine and copies
+              the output dets into a vector<rectangle> object and returns it.  Therefore,
+              this function is provided for convenience.
+    !*/
 // ----------------------------------------------------------------------------------------
 }