Added image_dataset_file::shrink_big_images(). So now load_image_dataset() can

load a dataset of high resolution files into a user requested lower resolution.

Added image_dataset_file::shrink_big_images(). So now load_image_dataset() can
load a dataset of high resolution files into a user requested lower resolution.
0beabe84 · Davis King · 2ed907aa · 0beabe84 · 0beabe84
Commit 0beabe84 authored Oct 02, 2016 by Davis King
Hide whitespace changes
Inline Side-by-side

Showing with 141 additions and 11 deletions

load_image_dataset.h dlib/data_io/load_image_dataset.h +120 -11

load_image_dataset_abstract.h dlib/data_io/load_image_dataset_abstract.h +21 -0

No files found.
--- a/dlib/data_io/load_image_dataset.h
+++ b/dlib/data_io/load_image_dataset.h
@@ -15,6 +15,8 @@
 #include <set>
 #include "../image_processing/full_object_detection.h"
 #include <utility>
+#include <limits>
+#include "../image_transforms/image_pyramid.h"


 namespace dlib
@@ -30,6 +32,7 @@ namespace dlib
            _skip_empty_images = false;
            _have_parts = false;
            _filename = filename;
+            _box_area_thresh = std::numeric_limits<double>::infinity();
        }

        image_dataset_file boxes_match_label(
@@ -57,6 +60,15 @@ namespace dlib
            return temp;
        }

+        image_dataset_file shrink_big_images(
+            double new_box_area_thresh = 150*150
+        ) const
+        {
+            image_dataset_file temp(*this);
+            temp._box_area_thresh = new_box_area_thresh;
+            return temp;
+        }
+
        bool should_load_box (
            const image_dataset_metadata::box& box
        ) const
@@ -73,6 +85,7 @@ namespace dlib
        const std::string& get_filename() const { return _filename; }
        bool should_skip_empty_images() const { return _skip_empty_images; }
        bool should_boxes_have_parts() const { return _have_parts; }
+        double box_area_thresh() const { return _box_area_thresh; }
        const std::set<std::string>& get_selected_box_labels() const { return _labels; }

    private:
@@ -80,6 +93,8 @@ namespace dlib
        std::set<std::string> _labels;
        bool _skip_empty_images;
        bool _have_parts;
+        double _box_area_thresh;
+
    };

 // ----------------------------------------------------------------------------------------
@@ -95,7 +110,6 @@ namespace dlib
    {
        images.clear();
        object_locations.clear();
-        const std::string old_working_dir = get_current_dir();

        std::vector<std::vector<rectangle> > ignored_rects;

@@ -106,7 +120,7 @@ namespace dlib
        // Set the current directory to be the one that contains the
        // metadata file. We do this because the file might contain
        // file paths which are relative to this folder.
-        set_current_dir(get_parent_directory(file(source.get_filename())));
+        locally_change_current_dir chdir(get_parent_directory(file(source.get_filename())));


        typedef typename array_type::value_type image_type;
@@ -116,6 +130,7 @@ namespace dlib
        std::vector<rectangle> rects, ignored;
        for (unsigned long i = 0; i < data.images.size(); ++i)
        {
+            double min_rect_size = std::numeric_limits<double>::infinity();
            rects.clear();
            ignored.clear();
            for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
@@ -123,22 +138,51 @@ namespace dlib
                if (source.should_load_box(data.images[i].boxes[j]))
                {
                    if (data.images[i].boxes[j].ignore)
+                    {
                        ignored.push_back(data.images[i].boxes[j].rect);
+                    }
                    else
+                    {
                        rects.push_back(data.images[i].boxes[j].rect);
+                        min_rect_size = std::min<double>(min_rect_size, rects.back().area());
+                    }
                }
            }

            if (!source.should_skip_empty_images() || rects.size() != 0)
            {
-                object_locations.push_back(rects);
-                ignored_rects.push_back(ignored);
                load_image(img, data.images[i].filename);
+                if (rects.size() != 0)  
+                {
+                    // if shrinking the image would still result in the smallest box being
+                    // bigger than the box area threshold then shrink the image.
+                    while(min_rect_size/2/2 > source.box_area_thresh())
+                    {
+                        pyramid_down<2> pyr;
+                        pyr(img);
+                        min_rect_size *= (1.0/2.0)*(1.0/2.0);
+                        for (auto&& r : rects)
+                            r = pyr.rect_down(r);
+                        for (auto&& r : ignored)
+                            r = pyr.rect_down(r);
+                    }
+                    while(min_rect_size*(2.0/3.0)*(2.0/3.0) > source.box_area_thresh())
+                    {
+                        pyramid_down<3> pyr;
+                        pyr(img);
+                        min_rect_size *= (2.0/3.0)*(2.0/3.0);
+                        for (auto&& r : rects)
+                            r = pyr.rect_down(r);
+                        for (auto&& r : ignored)
+                            r = pyr.rect_down(r);
+                    }
+                }
                images.push_back(img);
+                object_locations.push_back(rects);
+                ignored_rects.push_back(ignored);
            }
        }

-        set_current_dir(old_working_dir);
        return ignored_rects;
    }

@@ -171,23 +215,51 @@ namespace dlib
        std::vector<mmod_rect> rects;
        for (unsigned long i = 0; i < data.images.size(); ++i)
        {
+            double min_rect_size = std::numeric_limits<double>::infinity();
            rects.clear();
            for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
            {
                if (source.should_load_box(data.images[i].boxes[j]))
                {
                    if (data.images[i].boxes[j].ignore)
+                    {
                        rects.push_back(ignored_mmod_rect(data.images[i].boxes[j].rect));
+                    }
                    else
+                    {
                        rects.push_back(mmod_rect(data.images[i].boxes[j].rect));
+                        min_rect_size = std::min<double>(min_rect_size, rects.back().rect.area());
+                    }
+
                }
            }

            if (!source.should_skip_empty_images() || rects.size() != 0)
            {
-                object_locations.push_back(std::move(rects));
                load_image(img, data.images[i].filename);
+                if (rects.size() != 0)  
+                {
+                    // if shrinking the image would still result in the smallest box being
+                    // bigger than the box area threshold then shrink the image.
+                    while(min_rect_size/2/2 > source.box_area_thresh())
+                    {
+                        pyramid_down<2> pyr;
+                        pyr(img);
+                        min_rect_size *= (1.0/2.0)*(1.0/2.0);
+                        for (auto&& r : rects)
+                            r.rect = pyr.rect_down(r.rect);
+                    }
+                    while(min_rect_size*(2.0/3.0)*(2.0/3.0) > source.box_area_thresh())
+                    {
+                        pyramid_down<3> pyr;
+                        pyr(img);
+                        min_rect_size *= (2.0/3.0)*(2.0/3.0);
+                        for (auto&& r : rects)
+                            r.rect = pyr.rect_down(r.rect);
+                    }
+                }
                images.push_back(std::move(img));
+                object_locations.push_back(std::move(rects));
            }
        }
    }
@@ -261,7 +333,6 @@ namespace dlib
        parts_list.clear();
        images.clear();
        object_locations.clear();
-        const std::string old_working_dir = get_current_dir();

        using namespace dlib::image_dataset_metadata;
        dataset data;
@@ -270,7 +341,7 @@ namespace dlib
        // Set the current directory to be the one that contains the
        // metadata file. We do this because the file might contain
        // file paths which are relative to this folder.
-        set_current_dir(get_parent_directory(file(source.get_filename())));
+        locally_change_current_dir chdir(get_parent_directory(file(source.get_filename())));


        std::set<std::string> all_parts;
@@ -307,6 +378,7 @@ namespace dlib
        std::vector<full_object_detection> object_dets;
        for (unsigned long i = 0; i < data.images.size(); ++i)
        {
+            double min_rect_size = std::numeric_limits<double>::infinity();
            object_dets.clear();
            ignored.clear();
            for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
@@ -330,20 +402,57 @@ namespace dlib
                        }

                        object_dets.push_back(full_object_detection(data.images[i].boxes[j].rect, partlist));
+                        min_rect_size = std::min<double>(min_rect_size, object_dets.back().get_rect().area());
                    }
                }
            }

            if (!source.should_skip_empty_images() || object_dets.size() != 0)
            {
-                object_locations.push_back(object_dets);
-                ignored_rects.push_back(ignored);
                load_image(img, data.images[i].filename);
+                if (object_dets.size() != 0)  
+                {
+                    // if shrinking the image would still result in the smallest box being
+                    // bigger than the box area threshold then shrink the image.
+                    while(min_rect_size/2/2 > source.box_area_thresh())
+                    {
+                        pyramid_down<2> pyr;
+                        pyr(img);
+                        min_rect_size *= (1.0/2.0)*(1.0/2.0);
+                        for (auto&& r : object_dets)
+                        {
+                            r.get_rect() = pyr.rect_down(r.get_rect());
+                            for (unsigned long k = 0; k < r.num_parts(); ++k)
+                                r.part(k) = pyr.point_down(r.part(k));
+                        }
+                        for (auto&& r : ignored)
+                        {
+                            r = pyr.rect_down(r);
+                        }
+                    }
+                    while(min_rect_size*(2.0/3.0)*(2.0/3.0) > source.box_area_thresh())
+                    {
+                        pyramid_down<3> pyr;
+                        pyr(img);
+                        min_rect_size *= (2.0/3.0)*(2.0/3.0);
+                        for (auto&& r : object_dets)
+                        {
+                            r.get_rect() = pyr.rect_down(r.get_rect());
+                            for (unsigned long k = 0; k < r.num_parts(); ++k)
+                                r.part(k) = pyr.point_down(r.part(k));
+                        }
+                        for (auto&& r : ignored)
+                        {
+                            r = pyr.rect_down(r);
+                        }
+                    }
+                }
                images.push_back(img);
+                object_locations.push_back(object_dets);
+                ignored_rects.push_back(ignored);
            }
        }

-        set_current_dir(old_working_dir);

        return ignored_rects;
    }

--- a/dlib/data_io/load_image_dataset_abstract.h
+++ b/dlib/data_io/load_image_dataset_abstract.h
@@ -36,6 +36,7 @@ namespace dlib
                  This means that, initially, all boxes will be loaded.  Therefore, for all
                  possible boxes B we have:
                    - #should_load_box(B) == true
+                - #box_area_thresh() == infinity
        !*/

        const std::string& get_filename(
@@ -115,6 +116,26 @@ namespace dlib
                        - returns false
        !*/

+        image_dataset_file shrink_big_images(
+            double new_box_area_thresh = 150*150
+        ) const;
+        /*!
+            ensures
+                - returns a copy of *this that is identical in all respects to *this except
+                  that #box_area_thresh() == new_box_area_thresh
+        !*/
+
+        double box_area_thresh(
+        ) const;
+        /*!
+            ensures
+                - If the smallest non-ignored rectangle in an image has an area greater
+                  than box_area_thresh() then we will shrink the image until the area of
+                  the box is about equal to box_area_thresh().  This is useful if you have
+                  a dataset containing very high resolution images and you don't want to
+                  load it in its native high resolution.  Setting the box_area_thresh()
+                  allows you to control the resolution of the loaded images.
+        !*/
    };

 // ----------------------------------------------------------------------------------------