Upgraded the object_detector. Now it can store multiple weight vectors and hence

multiple object detectors.

Upgraded the object_detector. Now it can store multiple weight vectors and hence
multiple object detectors.
967b5215 · Davis King · 8194f4ab · 967b5215 · 967b5215 · 967b5215
Commit 967b5215 authored Nov 21, 2013 by Davis King
4 changed files
--- a/dlib/image_processing/object_detector.h
+++ b/dlib/image_processing/object_detector.h
@@ -67,8 +67,18 @@ namespace dlib
            const feature_vector_type& w_ 
        );
+        object_detector (
+            const image_scanner_type& scanner_, 
+            const test_box_overlap& overlap_tester_,
+            const std::vector<feature_vector_type>& w_ 
+        );
+        unsigned long num_detectors (
+        ) const { return w.size(); }
        const feature_vector_type& get_w (
-        ) const { return w.w; }
+            unsigned long idx = 0
+        ) const { return w[idx].w; }
        const test_box_overlap& get_overlap_tester (
        ) const;
@@ -115,6 +125,42 @@ namespace dlib
            double adjust_threshold = 0
        );
+        struct rect_detection
+        {
+            double detection_confidence;
+            unsigned long weight_index;
+            rectangle rect;
+            bool operator<(const rect_detection& item) const { return detection_confidence < item.detection_confidence; }
+        };
+        struct full_detection
+        {
+            double detection_confidence;
+            unsigned long weight_index;
+            full_object_detection rect;
+            bool operator<(const full_detection& item) const { return detection_confidence < item.detection_confidence; }
+        };
+        template <
+            typename image_type
+            >
+        void operator() (
+            const image_type& img,
+            std::vector<rect_detection>& final_dets,
+            double adjust_threshold = 0
+        );
+        template <
+            typename image_type
+            >
+        void operator() (
+            const image_type& img,
+            std::vector<full_detection>& final_dets,
+            double adjust_threshold = 0
+        );
        template <typename T>
        friend void serialize (
            const object_detector<T>& item,
@@ -130,33 +176,20 @@ namespace dlib
    private:
        bool overlaps_any_box (
-            const std::vector<rectangle>& rects,
+            const std::vector<rect_detection>& rects,
-            const dlib::rectangle& rect
-        ) const
-        {
-            for (unsigned long i = 0; i < rects.size(); ++i)
-            {
-                if (boxes_overlap(rects[i], rect))
-                    return true;
-            }
-            return false;
-        }
-        bool overlaps_any_box (
-            const std::vector<std::pair<double,rectangle> >& rects,
            const dlib::rectangle& rect
        ) const
        {
            for (unsigned long i = 0; i < rects.size(); ++i)
            {
-                if (boxes_overlap(rects[i].second, rect))
+                if (boxes_overlap(rects[i].rect, rect))
                    return true;
            }
            return false;
        }
        test_box_overlap boxes_overlap;
-        processed_weight_vector<image_scanner_type> w;
+        std::vector<processed_weight_vector<image_scanner_type> > w;
        image_scanner_type scanner;
    };
@@ -168,14 +201,17 @@ namespace dlib
        std::ostream& out
    )
    {
-        int version = 1;
+        int version = 2;
        serialize(version, out);
        T scanner;
        scanner.copy_configuration(item.scanner);
        serialize(scanner, out);
-        serialize(item.w.w, out);
        serialize(item.boxes_overlap, out);
+        // serialize all the weight vectors
+        serialize(item.w.size(), out);
+        for (unsigned long i = 0; i < item.w.size(); ++i)
+            serialize(item.w[i].w, out);
    }
 // ----------------------------------------------------------------------------------------
@@ -188,13 +224,31 @@ namespace dlib
    {
        int version = 0;
        deserialize(version, in);
-        if (version != 1)
+        if (version == 1)
+        {
+            deserialize(item.scanner, in);
+            item.w.resize(1);
+            deserialize(item.w[0].w, in);
+            item.w[0].init(item.scanner);
+            deserialize(item.boxes_overlap, in);
+        }
+        else if (version == 2)
+        {
+            deserialize(item.scanner, in);
+            deserialize(item.boxes_overlap, in);
+            unsigned long num_detectors = 0;
+            deserialize(num_detectors, in);
+            item.w.resize(num_detectors);
+            for (unsigned long i = 0; i < item.w.size(); ++i)
+            {
+                deserialize(item.w[i].w, in);
+                item.w[i].init(item.scanner);
+            }
+        }
+        else 
+        {
            throw serialization_error("Unexpected version encountered while deserializing a dlib::object_detector object.");
+        }
-        deserialize(item.scanner, in);
-        deserialize(item.w.w, in);
-        item.w.init(item.scanner);
-        deserialize(item.boxes_overlap, in);
    }
 // ----------------------------------------------------------------------------------------
@@ -252,8 +306,54 @@ namespace dlib
            );
        scanner.copy_configuration(scanner_);
-        w.w = w_;
+        w.resize(1);
-        w.init(scanner);
+        w[0].w = w_;
+        w[0].init(scanner);
+    }
+// ----------------------------------------------------------------------------------------
+    template <
+        typename image_scanner_type
+        >
+    object_detector<image_scanner_type>::
+    object_detector (
+        const image_scanner_type& scanner_, 
+        const test_box_overlap& overlap_tester,
+        const std::vector<feature_vector_type>& w_ 
+    ) :
+        boxes_overlap(overlap_tester)
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(scanner_.get_num_detection_templates() > 0 && w_.size() > 0,
+            "\t object_detector::object_detector(scanner_,overlap_tester,w_)"
+            << "\n\t Invalid inputs were given to this function "
+            << "\n\t scanner_.get_num_detection_templates(): " << scanner_.get_num_detection_templates()
+            << "\n\t w_.size():                     " << w_.size()
+            << "\n\t this: " << this
+            );
+#ifdef ENABLE_ASSERTS
+        for (unsigned long i = 0; i < w_.size(); ++i)
+        {
+            DLIB_ASSERT(w_[i].size() == scanner_.get_num_dimensions() + 1, 
+                "\t object_detector::object_detector(scanner_,overlap_tester,w_)"
+                << "\n\t Invalid inputs were given to this function "
+                << "\n\t scanner_.get_num_detection_templates(): " << scanner_.get_num_detection_templates()
+                << "\n\t w_["<<i<<"].size():                     " << w_[i].size()
+                << "\n\t scanner_.get_num_dimensions(): " << scanner_.get_num_dimensions()
+                << "\n\t this: " << this
+                );
+        }
+#endif
+        scanner.copy_configuration(scanner_);
+        w.resize(w_.size());
+        for (unsigned long i = 0; i < w.size(); ++i)
+        {
+            w[i].w = w_[i];
+            w[i].init(scanner);
+        }
    }
 // ----------------------------------------------------------------------------------------
@@ -272,7 +372,6 @@ namespace dlib
        boxes_overlap = item.boxes_overlap;
        w = item.w;
        scanner.copy_configuration(item.scanner);
-        w.init(scanner);
        return *this;
    }
@@ -284,29 +383,91 @@ namespace dlib
    template <
        typename image_type
        >
-    std::vector<rectangle> object_detector<image_scanner_type>::
+    void object_detector<image_scanner_type>::
    operator() (
        const image_type& img,
+        std::vector<rect_detection>& final_dets,
        double adjust_threshold
    ) 
    {
-        std::vector<rectangle> final_dets;
+        scanner.load(img);
-        if (w.w.size() != 0)
+        std::vector<std::pair<double, rectangle> > dets;
+        std::vector<rect_detection> dets_accum;
+        for (unsigned long i = 0; i < w.size(); ++i)
+        {
+            const double thresh = w[i].w(scanner.get_num_dimensions());
+            scanner.detect(w[i].get_detect_argument(), dets, thresh + adjust_threshold);
+            for (unsigned long j = 0; j < dets.size(); ++j)
+            {
+                rect_detection temp;
+                temp.detection_confidence = dets[j].first-thresh;
+                temp.weight_index = i;
+                temp.rect = dets[j].second;
+                dets_accum.push_back(temp);
+            }
+        }
+        // Do non-max suppression
+        final_dets.clear();
+        for (unsigned long i = 0; i < dets_accum.size(); ++i)
        {
-            std::vector<std::pair<double, rectangle> > dets;
+            if (overlaps_any_box(final_dets, dets_accum[i].rect))
-            const double thresh = w.w(scanner.get_num_dimensions());
+                continue;
-            scanner.load(img);
+            final_dets.push_back(dets_accum[i]);
-            scanner.detect(w.get_detect_argument(), dets, thresh + adjust_threshold);
+        }
+        std::sort(final_dets.rbegin(), final_dets.rend());
+    }
-            for (unsigned long i = 0; i < dets.size(); ++i)
+// ----------------------------------------------------------------------------------------
-            {
-                if (overlaps_any_box(final_dets, dets[i].second))
-                    continue;
-                final_dets.push_back(dets[i].second);
+    template <
-            }
+        typename image_scanner_type
+        >
+    template <
+        typename image_type
+        >
+    void object_detector<image_scanner_type>::
+    operator() (
+        const image_type& img,
+        std::vector<full_detection>& final_dets,
+        double adjust_threshold 
+    )
+    {
+        std::vector<rect_detection> dets;
+        (*this)(img,dets,adjust_threshold);
+        final_dets.resize(dets.size());
+        // convert all the rectangle detections into full_object_detections.
+        for (unsigned long i = 0; i < dets.size(); ++i)
+        {
+            final_dets[i].detection_confidence = dets[i].detection_confidence;
+            final_dets[i].weight_index = dets[i].weight_index;
+            final_dets[i].rect = scanner.get_full_object_detection(dets[i].rect, w[dets[i].weight_index].w);
        }
+    }
+// ----------------------------------------------------------------------------------------
+    template <
+        typename image_scanner_type
+        >
+    template <
+        typename image_type
+        >
+    std::vector<rectangle> object_detector<image_scanner_type>::
+    operator() (
+        const image_type& img,
+        double adjust_threshold
+    ) 
+    {
+        std::vector<rect_detection> dets;
+        (*this)(img,dets,adjust_threshold);
+        std::vector<rectangle> final_dets(dets.size());
+        for (unsigned long i = 0; i < dets.size(); ++i)
+            final_dets[i] = dets[i].rect;
        return final_dets;
    }
@@ -326,24 +487,12 @@ namespace dlib
        double adjust_threshold
    ) 
    {
-        final_dets.clear();
+        std::vector<rect_detection> dets;
-        if (w.w.size() != 0)
+        (*this)(img,dets,adjust_threshold);
-        {
-            std::vector<std::pair<double, rectangle> > dets;
-            const double thresh = w.w(scanner.get_num_dimensions());
-            scanner.load(img);
-            scanner.detect(w.get_detect_argument(), dets, thresh + adjust_threshold);
-            for (unsigned long i = 0; i < dets.size(); ++i)
+        final_dets.resize(dets.size());
-            {
+        for (unsigned long i = 0; i < dets.size(); ++i)
-                if (overlaps_any_box(final_dets, dets[i].second))
+            final_dets[i] = std::make_pair(dets[i].detection_confidence,dets[i].rect);
-                    continue;
-                dets[i].first -= thresh;
-                final_dets.push_back(dets[i]);
-            }
-        }
    }
 // ----------------------------------------------------------------------------------------
@@ -361,17 +510,17 @@ namespace dlib
        double adjust_threshold
    ) 
    {
-        std::vector<std::pair<double, rectangle> > temp_dets;
+        std::vector<rect_detection> dets;
-        (*this)(img,temp_dets,adjust_threshold);
+        (*this)(img,dets,adjust_threshold);
        final_dets.clear();
-        final_dets.reserve(temp_dets.size());
+        final_dets.reserve(dets.size());
        // convert all the rectangle detections into full_object_detections.
-        for (unsigned long i = 0; i < temp_dets.size(); ++i)
+        for (unsigned long i = 0; i < dets.size(); ++i)
        {
-            final_dets.push_back(std::make_pair(temp_dets[i].first, 
+            final_dets.push_back(std::make_pair(dets[i].detection_confidence, 
-                                                scanner.get_full_object_detection(temp_dets[i].second, w.w)));
+                                                scanner.get_full_object_detection(dets[i].rect, w[dets[i].weight_index].w)));
        }
    }
@@ -390,16 +539,16 @@ namespace dlib
        double adjust_threshold
    ) 
    {
-        std::vector<std::pair<double, rectangle> > temp_dets;
+        std::vector<rect_detection> dets;
-        (*this)(img,temp_dets,adjust_threshold);
+        (*this)(img,dets,adjust_threshold);
        final_dets.clear();
-        final_dets.reserve(temp_dets.size());
+        final_dets.reserve(dets.size());
        // convert all the rectangle detections into full_object_detections.
-        for (unsigned long i = 0; i < temp_dets.size(); ++i)
+        for (unsigned long i = 0; i < dets.size(); ++i)
        {
-            final_dets.push_back(scanner.get_full_object_detection(temp_dets[i].second, w.w));
+            final_dets.push_back(scanner.get_full_object_detection(dets[i].rect, w[dets[i].weight_index].w));
        }
    }

--- a/dlib/image_processing/object_detector_abstract.h
+++ b/dlib/image_processing/object_detector_abstract.h
@@ -22,15 +22,31 @@ namespace dlib
            REQUIREMENTS ON image_scanner_type
                image_scanner_type must be an implementation of 
                dlib/image_processing/scan_image_pyramid_abstract.h or 
+                dlib/image_processing/scan_fhog_pyramid.h or 
+                dlib/image_processing/scan_image_custom.h or 
                dlib/image_processing/scan_image_boxes_abstract.h 
            WHAT THIS OBJECT REPRESENTS
                This object is a tool for detecting the positions of objects in an image.
-                In particular, it is a simple container to aggregate an instance of the
+                In particular, it is a simple container to aggregate an instance of an image 
-                scan_image_pyramid or scan_image_boxes classes, the weight vector needed by
+                scanner (i.e. scan_image_pyramid, scan_fhog_pyramid, scan_image_custom, or
-                one of these image scanners, and finally an instance of test_box_overlap.
+                scan_image_boxes), the weight vector needed by one of these image scanners,
-                The test_box_overlap object is used to perform non-max suppression on the
+                and finally an instance of test_box_overlap.  The test_box_overlap object
-                output of the image scanner object.  
+                is used to perform non-max suppression on the output of the image scanner
+                object.  
+                Note further that this object can contain multiple weight vectors.  In this
+                case, it will run the image scanner multiple times, once with each of the
+                weight vectors.  Then it will aggregate the results from all runs, perform
+                non-max suppression and then return the results.  Therefore, the object_detector 
+                can also be used as a container for a set of object detectors that all use
+                the same image scanner but different weight vectors.  This is useful since
+                the object detection procedure has two parts.  A loading step where the
+                image is loaded into the scanner, then a detect step which uses the weight
+                vector to locate objects in the image.  Since the loading step is independent 
+                of the weight vector it is most efficient to run multiple detectors by
+                performing one load into a scanner followed by multiple detect steps.  This
+                avoids unnecessarily loading the same image into the scanner multiple times.  
        !*/
    public:
        typedef typename image_scanner_type::feature_vector_type feature_vector_type;
@@ -41,6 +57,7 @@ namespace dlib
            ensures
                - This detector won't generate any detections when
                  presented with an image.
+                - #num_detectors() == 0
        !*/
        object_detector (
@@ -77,13 +94,56 @@ namespace dlib
                - #get_scanner() == scanner
                  (note that only the "configuration" of scanner is copied.
                  I.e. the copy is done using copy_configuration())
+                - #num_detectors() == 1
+        !*/
+        object_detector (
+            const image_scanner_type& scanner, 
+            const test_box_overlap& overlap_tester,
+            const std::vector<feature_vector_type>& w 
+        );
+        /*!
+            requires
+                - for all valid i:
+                    - w[i].size() == scanner.get_num_dimensions() + 1
+                - scanner.get_num_detection_templates() > 0
+                - w.size() > 0
+            ensures
+                - When the operator() member function is called it will invoke
+                  scanner.detect(w[i],dets,w[i](w[i].size()-1)) for all valid i.  Then it
+                  will take all the detections output by the calls to detect() and suppress
+                  overlapping detections, and finally report the results.
+                - when #*this is used to detect objects, the set of output detections will
+                  never contain any overlaps with respect to overlap_tester.  That is, for
+                  all pairs of returned detections A and B, we will always have:
+                  overlap_tester(A,B) == false
+                - for all valid i:
+                    - #get_w(i) == w[i]
+                - #num_detectors() == w.size()
+                - #get_overlap_tester() == overlap_tester
+                - #get_scanner() == scanner
+                  (note that only the "configuration" of scanner is copied.
+                  I.e. the copy is done using copy_configuration())
+        !*/
+        unsigned long num_detectors (
+        ) const; 
+        /*!
+            ensures
+                - returns the number of weight vectors in this object.  Since each weight
+                  vector logically represents an object detector, this returns the number
+                  of object detectors contained in this object.
        !*/
        const feature_vector_type& get_w (
+            unsigned long idx = 0
        ) const;
        /*!
+            requires
+                - idx < num_detectors
            ensures
-                - returns the weight vector used by this object
+                - returns the idx-th weight vector loaded into this object.  All the weight vectors
+                  have the same dimension and logically each represents a different detector.
        !*/
        const test_box_overlap& get_overlap_tester (
@@ -112,31 +172,95 @@ namespace dlib
                - returns #*this
        !*/
+        struct rect_detection
+        {
+            double detection_confidence;
+            unsigned long weight_index;
+            rectangle rect;
+        };
        template <
            typename image_type
            >
-        std::vector<rectangle> operator() (
+        void operator() (
            const image_type& img,
-            const adjust_threshold = 0
+            std::vector<rect_detection>& dets,
+            double adjust_threshold = 0
        );
        /*!
            requires
                - img == an object which can be accepted by image_scanner_type::load()
            ensures
-                - performs object detection on the given image and returns a
+                - Performs object detection on the given image and stores the detected
-                  vector which indicates the locations of all detected objects.
+                  objects into #dets.  In particular, we will have that:
-                - The returned vector will be sorted in the sense that the highest
+                    - #dets is sorted such that the highest confidence detections come
-                  confidence detections come first.  E.g. element 0 is the best detection,
+                      first.  E.g. element 0 is the best detection, element 1 the next
-                  element 1 the next best, and so on.
+                      best, and so on.
+                    - #dets.size() == the number of detected objects.
+                    - #dets[i].detection_confidence == The strength of the i-th detection.
+                      Larger values indicate that the detector is more confident that
+                      #dets[i] is a correct detection rather than being a false alarm.
+                      Moreover, the detection_confidence is equal to the detection value
+                      output by the scanner minus the threshold value stored at the end of
+                      the weight vector in get_w(#dets[i].weight_index). 
+                    - #dets[i].weight_index == the index for the weight vector that
+                      generated this detection. 
+                    - #dets[i].rect == the bounding box for the i-th detection.
                - #get_scanner() will have been loaded with img. Therefore, you can call
                  #get_scanner().get_feature_vector() to obtain the feature vectors or
                  #get_scanner().get_full_object_detection() to get the
                  full_object_detections for the resulting object detection boxes.
                - The detection threshold is adjusted by having adjust_threshold added to
                  it.  Therefore, an adjust_threshold value > 0 makes detecting objects
-                  harder while a negative value makes it easier.  This means that, for
+                  harder while a negative value makes it easier.  Moreover, the following
-                  example, you can obtain the maximum possible number of detections by
+                  will be true for all valid i:
-                  setting adjust_threshold equal to negative infinity.
+                    - #dets[i].detection_confidence >= adjust_threshold
+                  This means that, for example, you can obtain the maximum possible number
+                  of detections by setting adjust_threshold equal to negative infinity.
+        !*/
+        struct full_detection
+        {
+            double detection_confidence;
+            unsigned long weight_index;
+            full_object_detection rect;
+        };
+        template <
+            typename image_type
+            >
+        void operator() (
+            const image_type& img,
+            std::vector<full_detection>& dets,
+            double adjust_threshold = 0
+        );
+        /*!
+            requires
+                - img == an object which can be accepted by image_scanner_type::load()
+            ensures
+                - This function is identical to the above operator() routine, except that
+                  it outputs full_object_detections instead of rectangles.  This means that
+                  the output includes part locations.  In particular, calling this function
+                  is the same as calling the above operator() routine and then using
+                  get_scanner().get_full_object_detection() to resolve all the rectangles
+                  into full_object_detections.  Therefore, this version of operator() is
+                  simply a convenience function for performing this set of operations.
+        !*/
+        template <
+            typename image_type
+            >
+        std::vector<rectangle> operator() (
+            const image_type& img,
+            const adjust_threshold = 0
+        );
+        /*!
+            requires
+                - img == an object which can be accepted by image_scanner_type::load()
+            ensures
+                - This function is identical to the above operator() routine, except that
+                  it returns a std::vector<rectangle> which contains just the bounding
+                  boxes of all the detections. 
        !*/
        template <
@@ -179,7 +303,7 @@ namespace dlib
            >
        void operator() (
            const image_type& img,
-            std::vector<std::pair<double, full_object_detection> >& final_dets,
+            std::vector<std::pair<double, full_object_detection> >& dets,
            double adjust_threshold = 0
        );
        /*!
@@ -200,7 +324,7 @@ namespace dlib
            >
        void operator() (
            const image_type& img,
-            std::vector<full_object_detection>& final_dets,
+            std::vector<full_object_detection>& dets,
            double adjust_threshold = 0
        );
        /*!

--- a/dlib/image_processing/scan_fhog_pyramid.h
+++ b/dlib/image_processing/scan_fhog_pyramid.h
@@ -865,19 +865,27 @@ namespace dlib
        >
    matrix<unsigned char> draw_fhog (
        const object_detector<scan_fhog_pyramid<Pyramid_type> >& detector,
+        const unsigned long weight_index = 0,
        const long cell_draw_size = 15
    )
    {
        // make sure requires clause is not broken
-        DLIB_ASSERT(cell_draw_size > 0 && detector.get_w().size() >= detector.get_scanner().get_num_dimensions() ,
+        DLIB_ASSERT(weight_index < detector.num_detectors(),
            "\t matrix draw_fhog()"
            << "\n\t Invalid arguments were given to this function. "
            << "\n\t cell_draw_size:                              " << cell_draw_size
-            << "\n\t detector.get_w().size():                     " << detector.get_w().size()
+            << "\n\t detector.num_detectors():                    " << detector.num_detectors()
+            );
+        DLIB_ASSERT(cell_draw_size > 0 && detector.get_w(weight_index).size() >= detector.get_scanner().get_num_dimensions(),
+            "\t matrix draw_fhog()"
+            << "\n\t Invalid arguments were given to this function. "
+            << "\n\t cell_draw_size:                              " << cell_draw_size
+            << "\n\t weight_index:                                " << weight_index
+            << "\n\t detector.get_w(weight_index).size():         " << detector.get_w(weight_index).size()
            << "\n\t detector.get_scanner().get_num_dimensions(): " << detector.get_scanner().get_num_dimensions()
            );
-        typename scan_fhog_pyramid<Pyramid_type>::fhog_filterbank fb = detector.get_scanner().build_fhog_filterbank(detector.get_w());
+        typename scan_fhog_pyramid<Pyramid_type>::fhog_filterbank fb = detector.get_scanner().build_fhog_filterbank(detector.get_w(weight_index));
        return draw_fhog(fb.get_filters(),cell_draw_size);
    }

--- a/dlib/image_processing/scan_fhog_pyramid_abstract.h
+++ b/dlib/image_processing/scan_fhog_pyramid_abstract.h
@@ -17,18 +17,21 @@ namespace dlib
        >
    matrix<unsigned char> draw_fhog (
        const object_detector<scan_fhog_pyramid<Pyramid_type> >& detector,
+        const unsigned long weight_index = 0,
        const long cell_draw_size = 15
    );
    /*!
        requires
            - cell_draw_size > 0
+            - weight_index < detector.num_detectors()
            - detector.get_w().size() >= detector.get_scanner().get_num_dimensions()
              (i.e. the detector must have been populated with a HOG filter)
        ensures
-            - Converts the HOG filters in the given detector into an image suitable for
+            - Converts the HOG filters in the given detector (specifically, the filters in
-              display on the screen.  In particular, we draw all the HOG cells into a
+              detector.get_w(weight_index)) into an image suitable for display on the
-              grayscale image in a way that shows the magnitude and orientation of the
+              screen.  In particular, we draw all the HOG cells into a grayscale image in a
-              gradient energy in each cell.  The resulting image is then returned.
+              way that shows the magnitude and orientation of the gradient energy in each
+              cell.  The resulting image is then returned.
    !*/
 // ----------------------------------------------------------------------------------------