Refactored the load_image_dataset() routines so they are easier to use and more

flexible. This introduces a slight backwards incompatibility in that the version that loads full_object_detection objects now returns an ignore rectangle set instead of a parts name list. But other than that the changes are backwards compatible with previous version of dlib.

Refactored the load_image_dataset() routines so they are easier to use and more
flexible. This introduces a slight backwards incompatibility in that the version that loads full_object_detection objects now returns an ignore rectangle set instead of a parts name list. But other than that the changes are backwards compatible with previous version of dlib.
033cf733 · Davis King · 5da76449 · 033cf733 · 033cf733
Commit 033cf733 authored Jan 26, 2014 by Davis King
Show whitespace changes
Inline Side-by-side

Showing with 302 additions and 64 deletions

load_image_dataset.h dlib/data_io/load_image_dataset.h +136 -23

load_image_dataset_abstract.h dlib/data_io/load_image_dataset_abstract.h +166 -41

No files found.
--- a/dlib/data_io/load_image_dataset.h
+++ b/dlib/data_io/load_image_dataset.h
@@ -12,12 +12,75 @@
 #include "../geometry.h"
 #include "image_dataset_metadata.h"
 #include <string>
+#include <set>
 #include "../image_processing/full_object_detection.h"


 namespace dlib
 {

+// ----------------------------------------------------------------------------------------
+
+    class image_dataset_file
+    {
+    public:
+        image_dataset_file(const std::string& filename)
+        {
+            _skip_empty_images = false;
+            _have_parts = false;
+            _filename = filename;
+        }
+
+        image_dataset_file boxes_match_label(
+            const std::string& label
+        ) const
+        {
+            image_dataset_file temp(*this);
+            temp._labels.insert(label);
+            return temp;
+        }
+
+        image_dataset_file skip_empty_images(
+        ) const
+        {
+            image_dataset_file temp(*this);
+            temp._skip_empty_images = true;
+            return temp;
+        }
+
+        image_dataset_file boxes_have_parts(
+        ) const
+        {
+            image_dataset_file temp(*this);
+            temp._have_parts = true;
+            return temp;
+        }
+
+        bool should_load_box (
+            const image_dataset_metadata::box& box
+        ) const
+        {
+            if (_have_parts && box.parts.size() == 0)
+                return false;
+            if (_labels.size() == 0)
+                return true;
+            if (_labels.count(box.label) != 0)
+                return true;
+            return false;
+        }
+
+        const std::string& get_filename() const { return _filename; }
+        bool should_skip_empty_images() const { return _skip_empty_images; }
+        bool should_boxes_have_parts() const { return _have_parts; }
+        const std::set<std::string>& get_selected_box_labels() const { return _labels; }
+
+    private:
+        std::string _filename;
+        std::set<std::string> _labels;
+        bool _skip_empty_images;
+        bool _have_parts;
+    };
+
 // ----------------------------------------------------------------------------------------

    template <
@@ -27,9 +90,7 @@ namespace dlib
    std::vector<std::vector<rectangle> > load_image_dataset (
        array<image_type,MM>& images,
        std::vector<std::vector<rectangle> >& object_locations,
-        const std::string& filename,
-        const std::string& label,
-        bool skip_empty_images = false
+        const image_dataset_file& source
    )
    {
        images.clear();
@@ -40,12 +101,12 @@ namespace dlib

        using namespace dlib::image_dataset_metadata;
        dataset data;
-        load_image_dataset_metadata(data, filename);
+        load_image_dataset_metadata(data, source.get_filename());

        // Set the current directory to be the one that contains the
        // metadata file. We do this because the file might contain
        // file paths which are relative to this folder.
-        set_current_dir(get_parent_directory(file(filename)));
+        set_current_dir(get_parent_directory(file(source.get_filename())));



@@ -58,7 +119,7 @@ namespace dlib
            ignored.clear();
            for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
            {
-                if (label.size() == 0 || data.images[i].boxes[j].label == label)
+                if (source.should_load_box(data.images[i].boxes[j]))
                {
                    if (data.images[i].boxes[j].ignore)
                        ignored.push_back(data.images[i].boxes[j].rect);
@@ -67,7 +128,7 @@ namespace dlib
                }
            }

-            if (!skip_empty_images || rects.size() != 0)
+            if (!source.should_skip_empty_images() || rects.size() != 0)
            {
                object_locations.push_back(rects);
                ignored_rects.push_back(ignored);
@@ -80,6 +141,29 @@ namespace dlib
        return ignored_rects;
    }

+// ----------------------------------------------------------------------------------------
+
+// ******* THIS FUNCTION IS DEPRECATED, you should use another version of load_image_dataset() *******
+    template <
+        typename image_type, 
+        typename MM
+        >
+    std::vector<std::vector<rectangle> > load_image_dataset (
+        array<image_type,MM>& images,
+        std::vector<std::vector<rectangle> >& object_locations,
+        const std::string& filename,
+        const std::string& label,
+        bool skip_empty_images = false
+    )
+    {
+        image_dataset_file f(filename);
+        if (label.size() != 0)
+            f = f.boxes_match_label(label);
+        if (skip_empty_images)
+            f = f.skip_empty_images();
+        return load_image_dataset(images, object_locations, f);
+    }
+
 // ----------------------------------------------------------------------------------------

    template <
@@ -92,35 +176,37 @@ namespace dlib
        const std::string& filename
    )
    {
-        return load_image_dataset(images, object_locations, filename, "");
+        return load_image_dataset(images, object_locations, image_dataset_file(filename));
    }

+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
 // ----------------------------------------------------------------------------------------

    template <
        typename image_type, 
        typename MM
        >
-    std::vector<std::string> load_image_dataset (
+    std::vector<std::vector<rectangle> > load_image_dataset (
        array<image_type,MM>& images,
        std::vector<std::vector<full_object_detection> >& object_locations,
-        const std::string& filename,
-        const std::string& label,
-        bool skip_empty_images = false
+        const image_dataset_file& source,
+        std::vector<std::string>& parts_list
    )
    {
+        parts_list.clear();
        images.clear();
        object_locations.clear();
        const std::string old_working_dir = get_current_dir();

        using namespace dlib::image_dataset_metadata;
        dataset data;
-        load_image_dataset_metadata(data, filename);
+        load_image_dataset_metadata(data, source.get_filename());

        // Set the current directory to be the one that contains the
        // metadata file. We do this because the file might contain
        // file paths which are relative to this folder.
-        set_current_dir(get_parent_directory(file(filename)));
+        set_current_dir(get_parent_directory(file(source.get_filename())));


        std::set<std::string> all_parts;
@@ -130,7 +216,7 @@ namespace dlib
        {
            for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
            {
-                if (label.size() == 0 || data.images[i].boxes[j].label == label)
+                if (source.should_load_box(data.images[i].boxes[j]))
                {
                    const std::map<std::string,point>& parts = data.images[i].boxes[j].parts;
                    std::map<std::string,point>::const_iterator itr;
@@ -145,21 +231,29 @@ namespace dlib

        // make a mapping between part names and the integers [0, all_parts.size())
        std::map<std::string,int> parts_idx;
-        std::vector<std::string> ret_parts_list;
        for (std::set<std::string>::iterator i = all_parts.begin(); i != all_parts.end(); ++i)
        {
-            parts_idx[*i] = ret_parts_list.size();
-            ret_parts_list.push_back(*i);
+            parts_idx[*i] = parts_list.size();
+            parts_list.push_back(*i);
        }

+        std::vector<std::vector<rectangle> > ignored_rects;
+        std::vector<rectangle> ignored;
        image_type img;
        std::vector<full_object_detection> object_dets;
        for (unsigned long i = 0; i < data.images.size(); ++i)
        {
            object_dets.clear();
+            ignored.clear();
            for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
            {
-                if (label.size() == 0 || data.images[i].boxes[j].label == label)
+                if (source.should_load_box(data.images[i].boxes[j]))
+                {
+                    if (data.images[i].boxes[j].ignore)
+                    {
+                        ignored.push_back(data.images[i].boxes[j].rect);
+                    }
+                    else
                    {
                        std::vector<point> partlist(parts_idx.size(), OBJECT_PART_NOT_PRESENT);

@@ -174,10 +268,12 @@ namespace dlib
                        object_dets.push_back(full_object_detection(data.images[i].boxes[j].rect, partlist));
                    }
                }
+            }

-            if (!skip_empty_images || object_dets.size() != 0)
+            if (!source.should_skip_empty_images() || object_dets.size() != 0)
            {
                object_locations.push_back(object_dets);
+                ignored_rects.push_back(ignored);
                load_image(img, data.images[i].filename);
                images.push_back(img);
            }
@@ -185,7 +281,7 @@ namespace dlib

        set_current_dir(old_working_dir);

-        return ret_parts_list;
+        return ignored_rects;
    }

 // ----------------------------------------------------------------------------------------
@@ -194,13 +290,30 @@ namespace dlib
        typename image_type, 
        typename MM
        >
-    std::vector<std::string> load_image_dataset (
+    std::vector<std::vector<rectangle> > load_image_dataset (
+        array<image_type,MM>& images,
+        std::vector<std::vector<full_object_detection> >& object_locations,
+        const image_dataset_file& source 
+    )
+    {
+        std::vector<std::string> parts_list;
+        return load_image_dataset(images, object_locations, source, parts_list);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename image_type, 
+        typename MM
+        >
+    std::vector<std::vector<rectangle> > load_image_dataset (
        array<image_type,MM>& images,
        std::vector<std::vector<full_object_detection> >& object_locations,
        const std::string& filename
    )
    {
-        return load_image_dataset(images, object_locations, filename, "");
+        std::vector<std::string> parts_list;
+        return load_image_dataset(images, object_locations, image_dataset_file(filename), parts_list);
    }

 // ----------------------------------------------------------------------------------------

--- a/dlib/data_io/load_image_dataset_abstract.h
+++ b/dlib/data_io/load_image_dataset_abstract.h
@@ -13,6 +13,111 @@
 namespace dlib
 {

+// ----------------------------------------------------------------------------------------
+
+    class image_dataset_file
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool used to tell the load_image_dataset() functions which
+                boxes and images to load from an XML based image dataset file.  By default,
+                this object tells load_image_dataset() to load all images and object boxes.
+        !*/
+
+    public:
+        image_dataset_file(
+            const std::string& filename
+        );
+        /*!
+            ensures
+                - #get_filename() == filename
+                - #should_skip_empty_images() == false
+                - #get_selected_box_labels().size() == 0
+                  This means that, initially, all boxes will be loaded.  Therefore, for all
+                  possible boxes B we have:
+                    - #should_load_box(B) == true
+        !*/
+
+        const std::string& get_filename(
+        ) const;
+        /*!
+            ensures
+                - returns the name of the XML image dataset metadata file given to this
+                  object's constructor.
+        !*/
+        
+        bool should_skip_empty_images(
+        ) const;
+        /*!
+            ensures
+                - returns true if we are supposed to skip images that don't have any boxes
+                  to load when loading an image dataset using load_image_dataset().
+        !*/
+
+        image_dataset_file boxes_match_label(
+            const std::string& label
+        ) const;
+        /*!
+            ensures
+                - returns a copy of *this that is identical in all respects to *this except
+                  that label will be included in the labels set (i.e. the set returned by
+                  get_selected_box_labels()).
+        !*/
+
+        const std::set<std::string>& get_selected_box_labels(
+        ) const;
+        /*!
+            ensures
+                - returns the set of box labels currently selected by the should_load_box()
+                  method.  Note that if the set is empty then we select all boxes.
+        !*/
+
+        image_dataset_file skip_empty_images(
+        ) const;
+        /*!
+            ensures
+                - returns a copy of *this that is identical in all respects to *this except
+                  that #should_skip_empty_images() == true.
+        !*/
+
+        bool should_boxes_have_parts(
+        ) const; 
+        /*!
+            ensures
+                - returns true if boxes must have some parts defined for them to be loaded.
+        !*/
+
+        image_dataset_file boxes_have_parts(
+        ) const;
+        /*!
+            ensures
+                - returns a copy of *this that is identical in all respects to *this except
+                  that #should_boxes_have_parts() == true.
+        !*/
+
+        bool should_load_box (
+            const image_dataset_metadata::box& box
+        ) const;
+        /*!
+            ensures
+                - returns true if we are supposed to load the given box from an image
+                  dataset XML file.  In particular, if should_load_box() returns false then
+                  the load_image_dataset() routines will not return the box at all, neither
+                  in the ignore rectangles list or in the primary object_locations vector.
+                  The behavior of this function is defined as follows:
+                    - if (should_boxes_have_parts() && boxes.parts.size() == 0) then
+                        - returns false
+                    - else if (get_selected_box_labels().size() == 0) then
+                        - returns true
+                    - else if (get_selected_box_labels().count(box.label) != 0) then
+                        - returns true
+                    - else
+                        - returns false
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
 // ----------------------------------------------------------------------------------------

    template <
@@ -22,9 +127,7 @@ namespace dlib
    std::vector<std::vector<rectangle> > load_image_dataset (
        array<image_type,MM>& images,
        std::vector<std::vector<rectangle> >& object_locations,
-        const std::string& filename,
-        const std::string& label,
-        bool skip_empty_images = false
+        const image_dataset_file& source
    );
    /*!
        requires
@@ -32,32 +135,28 @@ namespace dlib
            - pixel_traits<typename image_type::type> is defined  
        ensures
            - This routine loads the images and their associated object boxes from the
-              image metadata file indicated by filename.  This metadata file should be in
-              the XML format used by the save_image_dataset_metadata() routine.
+              image metadata file indicated by source.get_filename().  This metadata file
+              should be in the XML format used by the save_image_dataset_metadata() routine.
            - #images.size() == The number of images loaded from the metadata file.  This
-              is all the images listed in the file unless skip_empty_images is set to true.
+              is all the images listed in the file unless source.should_skip_empty_images()
+              is set to true.
            - #images.size() == #object_locations.size()
            - This routine is capable of loading any image format which can be read by the
              load_image() routine.
            - let IGNORED_RECTS denote the vector returned from this function.
            - IGNORED_RECTS.size() == #object_locations.size()
            - IGNORED_RECTS == a list of the rectangles which have the "ignore" flag set to
-              true in the XML file.
+              true in the input XML file.
            - for all valid i:  
                - #images[i] == a copy of the i-th image from the dataset.
                - #object_locations[i] == a vector of all the rectangles associated with
-                  #images[i].  Note that only rectangles that are not marked as "ignore"
-                  are stored into #object_locations.
-                - IGNORED_RECTS[i] == A vector of all the rectangles associated with
-                  #images[i] that are marked as "ignore".
-                - if (skip_empty_images == true) then
+                  #images[i].  These are the rectangles for which source.should_load_box()
+                  returns true and are also not marked as "ignore" in the XML file.
+                - IGNORED_RECTS[i] == A vector of all the rectangles associated with #images[i] 
+                  that are marked as "ignore" but not discarded by source.should_load_box().
+                - if (source.should_skip_empty_images() == true) then
                    - #object_locations[i].size() != 0
-                      (i.e. only images with non-ignored boxes in them will be loaded.)
-                - if (labels != "") then
-                    - Only boxes with the given label will be loaded into object_locations
-                      and IGNORED_RECTS.
-                - else
-                    - all boxes in the dataset will be loaded regardless of their labels.
+                      (i.e. we won't load images that don't end up having any object locations)
    !*/

 // ----------------------------------------------------------------------------------------
@@ -76,22 +175,22 @@ namespace dlib
            - image_type == is an implementation of array2d/array2d_kernel_abstract.h
            - pixel_traits<typename image_type::type> is defined  
        ensures
-            - performs: return load_image_dataset(images, object_locations, filename, "");
+            - performs: return load_image_dataset(images, object_locations, image_dataset_file(filename));
              (i.e. it ignores box labels and therefore loads all the boxes in the dataset)
    !*/

+// ----------------------------------------------------------------------------------------
 // ----------------------------------------------------------------------------------------

    template <
        typename image_type, 
        typename MM
        >
-    std::vector<std::string> load_image_dataset (
+    std::vector<std::vector<rectangle> > load_image_dataset (
        array<image_type,MM>& images,
        std::vector<std::vector<full_object_detection> >& object_locations,
-        const std::string& filename,
-        const std::string& label,
-        bool skip_empty_images = false
+        const image_dataset_file& source,
+        std::vector<std::string>& parts_list
    );
    /*!
        requires
@@ -99,35 +198,39 @@ namespace dlib
            - pixel_traits<typename image_type::type> is defined  
        ensures
            - This routine loads the images and their associated object locations from the
-              image metadata file indicated by filename.  This metadata file should be in
-              the XML format used by the save_image_dataset_metadata() routine.
+              image metadata file indicated by source.get_filename().  This metadata file
+              should be in the XML format used by the save_image_dataset_metadata() routine.
            - The difference between this function and the version of load_image_dataset()
              defined above is that this version will also load object part information and
              thus fully populates the full_object_detection objects.
            - #images.size() == The number of images loaded from the metadata file.  This
-              is all the images listed in the file unless skip_empty_images is set to true.
+              is all the images listed in the file unless source.should_skip_empty_images()
+              is set to true.
            - #images.size() == #object_locations.size()
            - This routine is capable of loading any image format which can be read
              by the load_image() routine.
-            - returns a vector, call it RETURNED_PARTS, that contains the list of object
-              parts found in the input file and loaded into object_locations.  
+            - #parts_list == a vector that contains the list of object parts found in the
+              input file and loaded into object_locations.  
+            - let IGNORED_RECTS denote the vector returned from this function.
+            - IGNORED_RECTS.size() == #object_locations.size()
+            - IGNORED_RECTS == a list of the rectangles which have the "ignore" flag set to
+              true in the input XML file.
            - for all valid i:  
-                - #images[i] == a copy of the ith image from the dataset.
-                - #object_locations[i] == a vector of all the object detections associated
-                  with #images[i]. 
-                - if (skip_empty_images == true) then
+                - #images[i] == a copy of the i-th image from the dataset.
+                - #object_locations[i] == a vector of all the rectangles associated with
+                  #images[i].  These are the rectangles for which source.should_load_box()
+                  returns true and are also not marked as "ignore" in the XML file.
+                - IGNORED_RECTS[i] == A vector of all the rectangles associated with #images[i] 
+                  that are marked as "ignore" but not discarded by source.should_load_box().
+                - if (source.should_skip_empty_images() == true) then
                    - #object_locations[i].size() != 0
-                      (i.e. only images with detection boxes in them will be loaded.)
+                      (i.e. we won't load images that don't end up having any object locations)
                - for all valid j:
-                    - #object_locations[i][j].num_parts() == RETURNED_PARTS.size()
+                    - #object_locations[i][j].num_parts() == #parts_list.size()
                    - for all valid k:
                        - #object_locations[i][j].part(k) == the location of the part
-                          with name RETURNED_PARTS[k] or OBJECT_PART_NOT_PRESENT if the
+                          with name #parts_list[k] or OBJECT_PART_NOT_PRESENT if the
                          part was not indicated for object #object_locations[i][j].
-                - if (labels != "") then
-                    - only boxes with the given label will be loaded into object_locations.
-                - else
-                    - all boxes in the dataset will be loaded into object_locations.
    !*/

 // ----------------------------------------------------------------------------------------
@@ -136,7 +239,29 @@ namespace dlib
        typename image_type, 
        typename MM
        >
-    std::vector<std::string> load_image_dataset (
+    std::vector<std::vector<rectangle> > load_image_dataset (
+        array<image_type,MM>& images,
+        std::vector<std::vector<full_object_detection> >& object_locations,
+        const image_dataset_file& source 
+    );
+    /*!
+        requires
+            - image_type == is an implementation of array2d/array2d_kernel_abstract.h
+            - pixel_traits<typename image_type::type> is defined  
+        ensures
+            - performs: return load_image_dataset(images, object_locations, source, parts_list);
+              (i.e. this function simply calls the above function and discards the output
+              parts_list.  So it is just a convenience function you can call if you don't
+              care about getting the parts list.)
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename image_type, 
+        typename MM
+        >
+    std::vector<std::vector<rectangle> > load_image_dataset (
        array<image_type,MM>& images,
        std::vector<std::vector<full_object_detection> >& object_locations,
        const std::string& filename
@@ -146,7 +271,7 @@ namespace dlib
            - image_type == is an implementation of array2d/array2d_kernel_abstract.h
            - pixel_traits<typename image_type::type> is defined  
        ensures
-            - performs: return load_image_dataset(images, object_locations, filename, "");
+            - performs: return load_image_dataset(images, object_locations, image_dataset_file(filename));
              (i.e. it ignores box labels and therefore loads all the boxes in the dataset)
    !*/