Generalized segment_image() so it works on any pixel type or array of vectors.

I also changed it's interface slightly. In particular, I removed the min_diff parameter and replaced it with an explicit min_size parameter.

Generalized segment_image() so it works on any pixel type or array of vectors.
I also changed it's interface slightly. In particular, I removed the min_diff parameter and replaced it with an explicit min_size parameter.
0a1908d7 · Davis King · 055d6f56 · 0a1908d7 · 0a1908d7
Commit 0a1908d7 authored Feb 28, 2013 by Davis King
Hide whitespace changes
Inline Side-by-side

Showing with 277 additions and 68 deletions

segment_image.h dlib/image_transforms/segment_image.h +262 -56

segment_image_abstract.h dlib/image_transforms/segment_image_abstract.h +15 -12

No files found.
--- a/dlib/image_transforms/segment_image.h
+++ b/dlib/image_transforms/segment_image.h
@@ -58,15 +58,18 @@ namespace dlib

 // ----------------------------------------------------------------------------------------

+// This is an overload of segment_image() that is optimized to segment images with 8bit
+// pixels very quickly.  We do this by using a radix sort instead of quicksort.
    template <
        typename in_image_type,
        typename out_image_type
        >
-    void segment_image (
+    typename enable_if<is_same_type<typename in_image_type::type,uint8> >::type 
+    segment_image (
        const in_image_type& in_img,
        out_image_type& out_img,
-        const unsigned long k = 200,
-        const unsigned long min_diff = 0
+        const double k = 200,
+        const unsigned long min_size = 10
    )
    {
        using namespace dlib::impl;
@@ -78,7 +81,6 @@ namespace dlib
            << "\n\t The input images can't be the same object."
            );

-        COMPILE_TIME_ASSERT(is_unsigned_type<ptype>::value && sizeof(ptype) <= 2);
        COMPILE_TIME_ASSERT(is_unsigned_type<typename out_image_type::type>::value);

        out_img.set_size(in_img.nr(), in_img.nc());
@@ -108,34 +110,22 @@ namespace dlib
            const ptype pix = in_img[r][c];
            if (area.contains(c-1,r))   counts[edge_diff(pix, in_img[r  ][c-1])] += 1;
            if (area.contains(c+1,r))   counts[edge_diff(pix, in_img[r  ][c+1])] += 1;
-
-            if (area.contains(c-1,r-1)) counts[edge_diff(pix, in_img[r-1][c-1])] += 1;
            if (area.contains(c  ,r-1)) counts[edge_diff(pix, in_img[r-1][c  ])] += 1;
-            if (area.contains(c+1,r-1)) counts[edge_diff(pix, in_img[r-1][c+1])] += 1;
-
-            if (area.contains(c-1,r+1)) counts[edge_diff(pix, in_img[r+1][c-1])] += 1;
            if (area.contains(c  ,r+1)) counts[edge_diff(pix, in_img[r+1][c  ])] += 1;
-            if (area.contains(c+1,r+1)) counts[edge_diff(pix, in_img[r+1][c+1])] += 1;
        }
        for (long r = 1; r+1 < in_img.nr(); ++r)
        {
            for (long c = 1; c+1 < in_img.nc(); ++c)
            {
                const ptype pix = in_img[r][c];
-                counts[edge_diff(pix, in_img[r  ][c-1])] += 1;
-                counts[edge_diff(pix, in_img[r  ][c+1])] += 1;
-
-                counts[edge_diff(pix, in_img[r-1][c-1])] += 1;
-                counts[edge_diff(pix, in_img[r-1][c  ])] += 1;
                counts[edge_diff(pix, in_img[r-1][c+1])] += 1;
-
-                counts[edge_diff(pix, in_img[r+1][c-1])] += 1;
+                counts[edge_diff(pix, in_img[r  ][c+1])] += 1;
                counts[edge_diff(pix, in_img[r+1][c  ])] += 1;
                counts[edge_diff(pix, in_img[r+1][c+1])] += 1;
            }
        }

-        const unsigned long num_edges = shrink_rect(area,1).area()*8 + in_img.nr()*2*5 - 8 + (in_img.nc()-2)*2*5;
+        const unsigned long num_edges = shrink_rect(area,1).area()*4 + in_img.nr()*2*3 - 4 + (in_img.nc()-2)*2*3;
        std::vector<segment_image_edge_data> sorted_edges(num_edges);

        // integrate counts.  The idea is to have sorted_edges[counts[i]] be the location that edges
@@ -155,9 +145,9 @@ namespace dlib
        be.reset();
        while(be.move_next())
        {
-            const long r = be.element().y();
-            const long c = be.element().x();
-            const point p(c,r);
+            const point p = be.element();
+            const long r = p.y();
+            const long c = p.x();
            const ptype pix = in_img[r][c];
            if (area.contains(c-1,r))
            {
@@ -171,37 +161,17 @@ namespace dlib
                sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c+1,r),diff);
            }

-            if (area.contains(c-1,r-1))
-            {
-                const ptype diff = edge_diff(pix, in_img[r-1][c-1]);
-                sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c-1,r-1),diff);
-            }
            if (area.contains(c  ,r-1))
            {
                const ptype diff = edge_diff(pix, in_img[r-1][c  ]);
                sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c  ,r-1),diff);
            }
-            if (area.contains(c+1,r-1))
-            {
-                const ptype diff = edge_diff(pix, in_img[r-1][c+1]);
-                sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c+1,r-1),diff);
-            }

-            if (area.contains(c-1,r+1))
-            {
-                const ptype diff = edge_diff(pix, in_img[r+1][c-1]);
-                sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c-1,r+1),diff);
-            }
            if (area.contains(c  ,r+1))
            {
                const ptype diff = edge_diff(pix, in_img[r+1][c  ]);
                sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c  ,r+1),diff);
            }
-            if (area.contains(c+1,r+1))
-            {
-                const ptype diff = edge_diff(pix, in_img[r+1][c+1]);
-                sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c+1,r+1),diff);
-            }
        }
        // same thing as the above loop but now we do it on the interior of the image and therefore
        // don't have to include the boundary checking if statements used above.
@@ -213,24 +183,14 @@ namespace dlib
                const ptype pix = in_img[r][c];
                ptype diff;

-                diff = edge_diff(pix, in_img[r  ][c-1]);
-                sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c-1,r),diff);
                diff = edge_diff(pix, in_img[r  ][c+1]);
                sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c+1,r),diff);
-
-                diff = edge_diff(pix, in_img[r-1][c-1]);
-                sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c-1,r-1),diff);
-                diff = edge_diff(pix, in_img[r-1][c  ]);
-                sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c  ,r-1),diff);
                diff = edge_diff(pix, in_img[r-1][c+1]);
                sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c+1,r-1),diff);
-
-                diff = edge_diff(pix, in_img[r+1][c-1]);
-                sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c-1,r+1),diff);
-                diff = edge_diff(pix, in_img[r+1][c  ]);
-                sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c  ,r+1),diff);
                diff = edge_diff(pix, in_img[r+1][c+1]);
                sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c+1,r+1),diff);
+                diff = edge_diff(pix, in_img[r+1][c  ]);
+                sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c  ,r+1),diff);
            }
        }

@@ -247,12 +207,240 @@ namespace dlib
            if (set1 != set2)
            {
                const ptype diff = sorted_edges[i].diff;
-                const ptype tau1 = k/data[set1].component_size;
-                const ptype tau2 = k/data[set2].component_size;
+                const ptype tau1 = static_cast<ptype>(std::floor(k/data[set1].component_size));
+                const ptype tau2 = static_cast<ptype>(std::floor(k/data[set2].component_size));

                const ptype mint = std::min(data[set1].internal_diff + tau1, 
                                            data[set2].internal_diff + tau2);
-                if (diff <= std::max<ptype>(mint,min_diff))
+                if (diff <= mint)
+                {
+                    const unsigned long new_set = sets.merge_sets(set1, set2);
+                    data[new_set].component_size = data[set1].component_size + data[set2].component_size;
+                    data[new_set].internal_diff = diff;
+                }
+            }
+        }
+
+        // now merge any really small blobs
+        if (min_size != 0)
+        {
+            for (unsigned long i = 0; i < sorted_edges.size(); ++i)
+            {
+                const unsigned long idx1 = sorted_edges[i].idx1;
+                const unsigned long idx2 = sorted_edges[i].idx2;
+
+                unsigned long set1 = sets.find_set(idx1);
+                unsigned long set2 = sets.find_set(idx2);
+                if (set1 != set2 && (data[set1].component_size < min_size || data[set2].component_size < min_size))
+                {
+                    const unsigned long new_set = sets.merge_sets(set1, set2);
+                    data[new_set].component_size = data[set1].component_size + data[set2].component_size;
+                    const ptype diff = sorted_edges[i].diff;
+                    data[new_set].internal_diff = diff;
+                }
+            }
+        }
+
+        unsigned long idx = 0;
+        for (long r = 0; r < out_img.nr(); ++r)
+        {
+            for (long c = 0; c < out_img.nc(); ++c)
+            {
+                out_img[r][c] = sets.find_set(idx++);
+            }
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    namespace impl
+    {
+        template <typename T, typename enabled = void>
+        struct edge_diff_funct 
+        {
+            template <typename pixel_type>
+            double operator()(
+                const pixel_type& a,
+                const pixel_type& b
+            ) const
+            {
+                return length(pixel_to_vector<double>(a) - pixel_to_vector<double>(b));
+            }
+        };
+
+        template <typename T>
+        struct edge_diff_funct<T, typename enable_if<is_matrix<T> >::type>
+        {
+            double operator()(
+                const T& a,
+                const T& b
+            ) const
+            {
+                return length(a-b);
+            }
+        };
+
+        template <typename T>
+        struct graph_image_segmentation_data2
+        {
+            graph_image_segmentation_data2() : component_size(1), internal_diff(0) {}
+            unsigned long component_size;
+            T internal_diff;
+        };
+
+        template <typename T>
+        struct segment_image_edge_data2
+        {
+            segment_image_edge_data2 (){}
+
+            segment_image_edge_data2 (
+                const rectangle& rect,
+                const point& p1,
+                const point& p2,
+                const T& diff_
+            ) :
+                idx1(p1.y()*rect.width() + p1.x()),
+                idx2(p2.y()*rect.width() + p2.x()),
+                diff(diff_)
+            {}
+
+            bool operator<(const segment_image_edge_data2& item) const
+            { return diff < item.diff; }
+
+            unsigned long idx1;
+            unsigned long idx2;
+            T diff;
+        };
+    }
+
+    // This is the general purpose version of segment_image().  It handles all pixel types.
+    template <
+        typename in_image_type,
+        typename out_image_type
+        >
+    typename disable_if<is_same_type<typename in_image_type::type,uint8> >::type 
+    segment_image (
+        const in_image_type& in_img,
+        out_image_type& out_img,
+        const double k = 200,
+        const unsigned long min_size = 10
+    )
+    {
+        using namespace dlib::impl;
+
+
+        typedef double diff_type;
+        typedef typename in_image_type::type ptype;
+        edge_diff_funct<ptype> edge_diff;
+
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_same_object(in_img, out_img) == false,
+            "\t void segment_image()"
+            << "\n\t The input images can't be the same object."
+            );
+
+        COMPILE_TIME_ASSERT(is_unsigned_type<typename out_image_type::type>::value);
+
+        out_img.set_size(in_img.nr(), in_img.nc());
+        // don't bother doing anything if the image is too small
+        if (in_img.nr() < 2 || in_img.nc() < 2)
+        {
+            assign_all_pixels(out_img,0);
+            return;
+        }
+
+        disjoint_subsets sets;
+        sets.set_size(in_img.size());
+
+
+        std::vector<graph_image_segmentation_data2<diff_type> > data(in_img.size());
+
+        const rectangle area = get_rect(in_img);
+
+        typedef segment_image_edge_data2<diff_type> segment_image_edge_data;
+
+        std::vector<segment_image_edge_data> sorted_edges;
+        sorted_edges.reserve(area.area()*4);
+
+
+        border_enumerator be(get_rect(in_img), 1);
+
+        // now build a sorted list of all the edges
+        be.reset();
+        while(be.move_next())
+        {
+            const point p = be.element();
+            const long r = p.y();
+            const long c = p.x();
+            const ptype& pix = in_img[r][c];
+            if (area.contains(c-1,r))
+            {
+                const diff_type diff = edge_diff(pix, in_img[r  ][c-1]);
+                sorted_edges.push_back(segment_image_edge_data(area,p,point(c-1,r),diff));
+            }
+
+            if (area.contains(c+1,r))
+            {
+                const diff_type diff = edge_diff(pix, in_img[r  ][c+1]);
+                sorted_edges.push_back(segment_image_edge_data(area,p,point(c+1,r),diff));
+            }
+
+            if (area.contains(c  ,r-1))
+            {
+                const diff_type diff = edge_diff(pix, in_img[r-1][c  ]);
+                sorted_edges.push_back( segment_image_edge_data(area,p,point(c  ,r-1),diff));
+            }
+            if (area.contains(c  ,r+1))
+            {
+                const diff_type diff = edge_diff(pix, in_img[r+1][c  ]);
+                sorted_edges.push_back( segment_image_edge_data(area,p,point(c  ,r+1),diff));
+            }
+        }
+        // same thing as the above loop but now we do it on the interior of the image and therefore
+        // don't have to include the boundary checking if statements used above.
+        for (long r = 1; r+1 < in_img.nr(); ++r)
+        {
+            for (long c = 1; c+1 < in_img.nc(); ++c)
+            {
+                const point p(c,r);
+                const ptype& pix = in_img[r][c];
+                diff_type diff;
+
+                diff = edge_diff(pix, in_img[r  ][c+1]);
+                sorted_edges.push_back( segment_image_edge_data(area,p,point(c+1,r),diff));
+                diff = edge_diff(pix, in_img[r+1][c+1]);
+                sorted_edges.push_back( segment_image_edge_data(area,p,point(c+1,r+1),diff));
+                diff = edge_diff(pix, in_img[r+1][c  ]);
+                sorted_edges.push_back( segment_image_edge_data(area,p,point(c  ,r+1),diff));
+                diff = edge_diff(pix, in_img[r-1][c+1]);
+                sorted_edges.push_back( segment_image_edge_data(area,p,point(c+1,r-1),diff));
+            }
+        }
+
+        std::sort(sorted_edges.begin(), sorted_edges.end());
+
+
+
+        // now start connecting blobs together to make a minimum spanning tree.
+        for (unsigned long i = 0; i < sorted_edges.size(); ++i)
+        {
+            const unsigned long idx1 = sorted_edges[i].idx1;
+            const unsigned long idx2 = sorted_edges[i].idx2;
+
+            unsigned long set1 = sets.find_set(idx1);
+            unsigned long set2 = sets.find_set(idx2);
+            if (set1 != set2)
+            {
+                const diff_type diff = sorted_edges[i].diff;
+                const diff_type tau1 = k/data[set1].component_size;
+                const diff_type tau2 = k/data[set2].component_size;
+
+                const diff_type mint = std::min(data[set1].internal_diff + tau1, 
+                                            data[set2].internal_diff + tau2);
+                if (diff <= mint)
                {
                    const unsigned long new_set = sets.merge_sets(set1, set2);
                    data[new_set].component_size = data[set1].component_size + data[set2].component_size;
@@ -261,6 +449,24 @@ namespace dlib
            }
        }

+        // now merge any really small blobs
+        if (min_size != 0)
+        {
+            for (unsigned long i = 0; i < sorted_edges.size(); ++i)
+            {
+                const unsigned long idx1 = sorted_edges[i].idx1;
+                const unsigned long idx2 = sorted_edges[i].idx2;
+
+                unsigned long set1 = sets.find_set(idx1);
+                unsigned long set2 = sets.find_set(idx2);
+                if (set1 != set2 && (data[set1].component_size < min_size || data[set2].component_size < min_size))
+                {
+                    const unsigned long new_set = sets.merge_sets(set1, set2);
+                    data[new_set].component_size = data[set1].component_size + data[set2].component_size;
+                }
+            }
+        }
+
        unsigned long idx = 0;
        for (long r = 0; r < out_img.nr(); ++r)
        {

--- a/dlib/image_transforms/segment_image_abstract.h
+++ b/dlib/image_transforms/segment_image_abstract.h
@@ -15,31 +15,34 @@ namespace dlib
    void segment_image (
        const in_image_type& in_img,
        out_image_type& out_img,
-        const unsigned long k = 200,
-        const unsigned long min_diff = 0
+        const double k = 200,
+        const unsigned long min_size = 10
    );
    /*!
        requires
            - in_image_type  == an implementation of array2d/array2d_kernel_abstract.h
            - out_image_type == an implementation of array2d/array2d_kernel_abstract.h
-            - in_image_type::type  == an unsigned 8-bit or 16bit integer type. 
+            - in_image_type::type  == Any pixel type with a pixel_traits specialization or a
+              dlib matrix object representing a row or column vector.
            - out_image_type::type == unsigned integer type 
            - is_same_object(in_img, out_img) == false
        ensures
-            - Attempts to segment in_img into regions which have some visual consistency to them.
-              In particular, this function implements the algorithm described in the paper:
-              Efficient Graph-Based Image Segmentation by Felzenszwalb and Huttenlocher.
+            - Attempts to segment in_img into regions which have some visual consistency to
+              them.  In particular, this function implements the algorithm described in the
+              paper: Efficient Graph-Based Image Segmentation by Felzenszwalb and Huttenlocher.
            - #out_img.nr() == in_img.nr()
            - #out_img.nc() == in_img.nc()
            - for all valid r and c:
                - #out_img[r][c] == an integer value indicating the identity of the segment
                  containing the pixel in_img[r][c].  
-            - The k parameter is a measure used to influence how large the segment regions will
-              be.  Larger k generally results in larger segments being produced.  For a deeper 
-              discussion of the k parameter you should consult the above referenced paper.
-            - Any neighboring segments with an edge between them with a pixel difference <= min_diff 
-              will always be merged.  So making min_diff bigger makes this algorithm more eager
-              to merge neighboring segments.
+            - The k parameter is a measure used to influence how large the segment regions
+              will be.  Larger k generally results in larger segments being produced.  For
+              a deeper discussion of the k parameter you should consult the above
+              referenced paper.
+            - min_size is a lower bound on the size of the output segments.  That is, it is
+              guaranteed that all output segments will have at least min_size pixels in
+              them (unless the whole image contains fewer than min_size pixels, in this
+              case the entire image will be put into a single segment).
    !*/

 // ----------------------------------------------------------------------------------------