Commit 0a1908d7 authored by Davis King's avatar Davis King

Generalized segment_image() so it works on any pixel type or array of vectors.

I also changed it's interface slightly.  In particular, I removed the min_diff
parameter and replaced it with an explicit min_size parameter.
parent 055d6f56
...@@ -58,15 +58,18 @@ namespace dlib ...@@ -58,15 +58,18 @@ namespace dlib
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
// This is an overload of segment_image() that is optimized to segment images with 8bit
// pixels very quickly. We do this by using a radix sort instead of quicksort.
template < template <
typename in_image_type, typename in_image_type,
typename out_image_type typename out_image_type
> >
void segment_image ( typename enable_if<is_same_type<typename in_image_type::type,uint8> >::type
segment_image (
const in_image_type& in_img, const in_image_type& in_img,
out_image_type& out_img, out_image_type& out_img,
const unsigned long k = 200, const double k = 200,
const unsigned long min_diff = 0 const unsigned long min_size = 10
) )
{ {
using namespace dlib::impl; using namespace dlib::impl;
...@@ -78,7 +81,6 @@ namespace dlib ...@@ -78,7 +81,6 @@ namespace dlib
<< "\n\t The input images can't be the same object." << "\n\t The input images can't be the same object."
); );
COMPILE_TIME_ASSERT(is_unsigned_type<ptype>::value && sizeof(ptype) <= 2);
COMPILE_TIME_ASSERT(is_unsigned_type<typename out_image_type::type>::value); COMPILE_TIME_ASSERT(is_unsigned_type<typename out_image_type::type>::value);
out_img.set_size(in_img.nr(), in_img.nc()); out_img.set_size(in_img.nr(), in_img.nc());
...@@ -108,34 +110,22 @@ namespace dlib ...@@ -108,34 +110,22 @@ namespace dlib
const ptype pix = in_img[r][c]; const ptype pix = in_img[r][c];
if (area.contains(c-1,r)) counts[edge_diff(pix, in_img[r ][c-1])] += 1; if (area.contains(c-1,r)) counts[edge_diff(pix, in_img[r ][c-1])] += 1;
if (area.contains(c+1,r)) counts[edge_diff(pix, in_img[r ][c+1])] += 1; if (area.contains(c+1,r)) counts[edge_diff(pix, in_img[r ][c+1])] += 1;
if (area.contains(c-1,r-1)) counts[edge_diff(pix, in_img[r-1][c-1])] += 1;
if (area.contains(c ,r-1)) counts[edge_diff(pix, in_img[r-1][c ])] += 1; if (area.contains(c ,r-1)) counts[edge_diff(pix, in_img[r-1][c ])] += 1;
if (area.contains(c+1,r-1)) counts[edge_diff(pix, in_img[r-1][c+1])] += 1;
if (area.contains(c-1,r+1)) counts[edge_diff(pix, in_img[r+1][c-1])] += 1;
if (area.contains(c ,r+1)) counts[edge_diff(pix, in_img[r+1][c ])] += 1; if (area.contains(c ,r+1)) counts[edge_diff(pix, in_img[r+1][c ])] += 1;
if (area.contains(c+1,r+1)) counts[edge_diff(pix, in_img[r+1][c+1])] += 1;
} }
for (long r = 1; r+1 < in_img.nr(); ++r) for (long r = 1; r+1 < in_img.nr(); ++r)
{ {
for (long c = 1; c+1 < in_img.nc(); ++c) for (long c = 1; c+1 < in_img.nc(); ++c)
{ {
const ptype pix = in_img[r][c]; const ptype pix = in_img[r][c];
counts[edge_diff(pix, in_img[r ][c-1])] += 1;
counts[edge_diff(pix, in_img[r ][c+1])] += 1;
counts[edge_diff(pix, in_img[r-1][c-1])] += 1;
counts[edge_diff(pix, in_img[r-1][c ])] += 1;
counts[edge_diff(pix, in_img[r-1][c+1])] += 1; counts[edge_diff(pix, in_img[r-1][c+1])] += 1;
counts[edge_diff(pix, in_img[r ][c+1])] += 1;
counts[edge_diff(pix, in_img[r+1][c-1])] += 1;
counts[edge_diff(pix, in_img[r+1][c ])] += 1; counts[edge_diff(pix, in_img[r+1][c ])] += 1;
counts[edge_diff(pix, in_img[r+1][c+1])] += 1; counts[edge_diff(pix, in_img[r+1][c+1])] += 1;
} }
} }
const unsigned long num_edges = shrink_rect(area,1).area()*8 + in_img.nr()*2*5 - 8 + (in_img.nc()-2)*2*5; const unsigned long num_edges = shrink_rect(area,1).area()*4 + in_img.nr()*2*3 - 4 + (in_img.nc()-2)*2*3;
std::vector<segment_image_edge_data> sorted_edges(num_edges); std::vector<segment_image_edge_data> sorted_edges(num_edges);
// integrate counts. The idea is to have sorted_edges[counts[i]] be the location that edges // integrate counts. The idea is to have sorted_edges[counts[i]] be the location that edges
...@@ -155,9 +145,9 @@ namespace dlib ...@@ -155,9 +145,9 @@ namespace dlib
be.reset(); be.reset();
while(be.move_next()) while(be.move_next())
{ {
const long r = be.element().y(); const point p = be.element();
const long c = be.element().x(); const long r = p.y();
const point p(c,r); const long c = p.x();
const ptype pix = in_img[r][c]; const ptype pix = in_img[r][c];
if (area.contains(c-1,r)) if (area.contains(c-1,r))
{ {
...@@ -171,37 +161,17 @@ namespace dlib ...@@ -171,37 +161,17 @@ namespace dlib
sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c+1,r),diff); sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c+1,r),diff);
} }
if (area.contains(c-1,r-1))
{
const ptype diff = edge_diff(pix, in_img[r-1][c-1]);
sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c-1,r-1),diff);
}
if (area.contains(c ,r-1)) if (area.contains(c ,r-1))
{ {
const ptype diff = edge_diff(pix, in_img[r-1][c ]); const ptype diff = edge_diff(pix, in_img[r-1][c ]);
sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c ,r-1),diff); sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c ,r-1),diff);
} }
if (area.contains(c+1,r-1))
{
const ptype diff = edge_diff(pix, in_img[r-1][c+1]);
sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c+1,r-1),diff);
}
if (area.contains(c-1,r+1))
{
const ptype diff = edge_diff(pix, in_img[r+1][c-1]);
sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c-1,r+1),diff);
}
if (area.contains(c ,r+1)) if (area.contains(c ,r+1))
{ {
const ptype diff = edge_diff(pix, in_img[r+1][c ]); const ptype diff = edge_diff(pix, in_img[r+1][c ]);
sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c ,r+1),diff); sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c ,r+1),diff);
} }
if (area.contains(c+1,r+1))
{
const ptype diff = edge_diff(pix, in_img[r+1][c+1]);
sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c+1,r+1),diff);
}
} }
// same thing as the above loop but now we do it on the interior of the image and therefore // same thing as the above loop but now we do it on the interior of the image and therefore
// don't have to include the boundary checking if statements used above. // don't have to include the boundary checking if statements used above.
...@@ -213,24 +183,14 @@ namespace dlib ...@@ -213,24 +183,14 @@ namespace dlib
const ptype pix = in_img[r][c]; const ptype pix = in_img[r][c];
ptype diff; ptype diff;
diff = edge_diff(pix, in_img[r ][c-1]);
sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c-1,r),diff);
diff = edge_diff(pix, in_img[r ][c+1]); diff = edge_diff(pix, in_img[r ][c+1]);
sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c+1,r),diff); sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c+1,r),diff);
diff = edge_diff(pix, in_img[r-1][c-1]);
sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c-1,r-1),diff);
diff = edge_diff(pix, in_img[r-1][c ]);
sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c ,r-1),diff);
diff = edge_diff(pix, in_img[r-1][c+1]); diff = edge_diff(pix, in_img[r-1][c+1]);
sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c+1,r-1),diff); sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c+1,r-1),diff);
diff = edge_diff(pix, in_img[r+1][c-1]);
sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c-1,r+1),diff);
diff = edge_diff(pix, in_img[r+1][c ]);
sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c ,r+1),diff);
diff = edge_diff(pix, in_img[r+1][c+1]); diff = edge_diff(pix, in_img[r+1][c+1]);
sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c+1,r+1),diff); sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c+1,r+1),diff);
diff = edge_diff(pix, in_img[r+1][c ]);
sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c ,r+1),diff);
} }
} }
...@@ -247,12 +207,240 @@ namespace dlib ...@@ -247,12 +207,240 @@ namespace dlib
if (set1 != set2) if (set1 != set2)
{ {
const ptype diff = sorted_edges[i].diff; const ptype diff = sorted_edges[i].diff;
const ptype tau1 = k/data[set1].component_size; const ptype tau1 = static_cast<ptype>(std::floor(k/data[set1].component_size));
const ptype tau2 = k/data[set2].component_size; const ptype tau2 = static_cast<ptype>(std::floor(k/data[set2].component_size));
const ptype mint = std::min(data[set1].internal_diff + tau1, const ptype mint = std::min(data[set1].internal_diff + tau1,
data[set2].internal_diff + tau2); data[set2].internal_diff + tau2);
if (diff <= std::max<ptype>(mint,min_diff)) if (diff <= mint)
{
const unsigned long new_set = sets.merge_sets(set1, set2);
data[new_set].component_size = data[set1].component_size + data[set2].component_size;
data[new_set].internal_diff = diff;
}
}
}
// now merge any really small blobs
if (min_size != 0)
{
for (unsigned long i = 0; i < sorted_edges.size(); ++i)
{
const unsigned long idx1 = sorted_edges[i].idx1;
const unsigned long idx2 = sorted_edges[i].idx2;
unsigned long set1 = sets.find_set(idx1);
unsigned long set2 = sets.find_set(idx2);
if (set1 != set2 && (data[set1].component_size < min_size || data[set2].component_size < min_size))
{
const unsigned long new_set = sets.merge_sets(set1, set2);
data[new_set].component_size = data[set1].component_size + data[set2].component_size;
const ptype diff = sorted_edges[i].diff;
data[new_set].internal_diff = diff;
}
}
}
unsigned long idx = 0;
for (long r = 0; r < out_img.nr(); ++r)
{
for (long c = 0; c < out_img.nc(); ++c)
{
out_img[r][c] = sets.find_set(idx++);
}
}
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
namespace impl
{
template <typename T, typename enabled = void>
struct edge_diff_funct
{
template <typename pixel_type>
double operator()(
const pixel_type& a,
const pixel_type& b
) const
{
return length(pixel_to_vector<double>(a) - pixel_to_vector<double>(b));
}
};
template <typename T>
struct edge_diff_funct<T, typename enable_if<is_matrix<T> >::type>
{
double operator()(
const T& a,
const T& b
) const
{
return length(a-b);
}
};
template <typename T>
struct graph_image_segmentation_data2
{
graph_image_segmentation_data2() : component_size(1), internal_diff(0) {}
unsigned long component_size;
T internal_diff;
};
template <typename T>
struct segment_image_edge_data2
{
segment_image_edge_data2 (){}
segment_image_edge_data2 (
const rectangle& rect,
const point& p1,
const point& p2,
const T& diff_
) :
idx1(p1.y()*rect.width() + p1.x()),
idx2(p2.y()*rect.width() + p2.x()),
diff(diff_)
{}
bool operator<(const segment_image_edge_data2& item) const
{ return diff < item.diff; }
unsigned long idx1;
unsigned long idx2;
T diff;
};
}
// This is the general purpose version of segment_image(). It handles all pixel types.
template <
typename in_image_type,
typename out_image_type
>
typename disable_if<is_same_type<typename in_image_type::type,uint8> >::type
segment_image (
const in_image_type& in_img,
out_image_type& out_img,
const double k = 200,
const unsigned long min_size = 10
)
{
using namespace dlib::impl;
typedef double diff_type;
typedef typename in_image_type::type ptype;
edge_diff_funct<ptype> edge_diff;
// make sure requires clause is not broken
DLIB_ASSERT(is_same_object(in_img, out_img) == false,
"\t void segment_image()"
<< "\n\t The input images can't be the same object."
);
COMPILE_TIME_ASSERT(is_unsigned_type<typename out_image_type::type>::value);
out_img.set_size(in_img.nr(), in_img.nc());
// don't bother doing anything if the image is too small
if (in_img.nr() < 2 || in_img.nc() < 2)
{
assign_all_pixels(out_img,0);
return;
}
disjoint_subsets sets;
sets.set_size(in_img.size());
std::vector<graph_image_segmentation_data2<diff_type> > data(in_img.size());
const rectangle area = get_rect(in_img);
typedef segment_image_edge_data2<diff_type> segment_image_edge_data;
std::vector<segment_image_edge_data> sorted_edges;
sorted_edges.reserve(area.area()*4);
border_enumerator be(get_rect(in_img), 1);
// now build a sorted list of all the edges
be.reset();
while(be.move_next())
{
const point p = be.element();
const long r = p.y();
const long c = p.x();
const ptype& pix = in_img[r][c];
if (area.contains(c-1,r))
{
const diff_type diff = edge_diff(pix, in_img[r ][c-1]);
sorted_edges.push_back(segment_image_edge_data(area,p,point(c-1,r),diff));
}
if (area.contains(c+1,r))
{
const diff_type diff = edge_diff(pix, in_img[r ][c+1]);
sorted_edges.push_back(segment_image_edge_data(area,p,point(c+1,r),diff));
}
if (area.contains(c ,r-1))
{
const diff_type diff = edge_diff(pix, in_img[r-1][c ]);
sorted_edges.push_back( segment_image_edge_data(area,p,point(c ,r-1),diff));
}
if (area.contains(c ,r+1))
{
const diff_type diff = edge_diff(pix, in_img[r+1][c ]);
sorted_edges.push_back( segment_image_edge_data(area,p,point(c ,r+1),diff));
}
}
// same thing as the above loop but now we do it on the interior of the image and therefore
// don't have to include the boundary checking if statements used above.
for (long r = 1; r+1 < in_img.nr(); ++r)
{
for (long c = 1; c+1 < in_img.nc(); ++c)
{
const point p(c,r);
const ptype& pix = in_img[r][c];
diff_type diff;
diff = edge_diff(pix, in_img[r ][c+1]);
sorted_edges.push_back( segment_image_edge_data(area,p,point(c+1,r),diff));
diff = edge_diff(pix, in_img[r+1][c+1]);
sorted_edges.push_back( segment_image_edge_data(area,p,point(c+1,r+1),diff));
diff = edge_diff(pix, in_img[r+1][c ]);
sorted_edges.push_back( segment_image_edge_data(area,p,point(c ,r+1),diff));
diff = edge_diff(pix, in_img[r-1][c+1]);
sorted_edges.push_back( segment_image_edge_data(area,p,point(c+1,r-1),diff));
}
}
std::sort(sorted_edges.begin(), sorted_edges.end());
// now start connecting blobs together to make a minimum spanning tree.
for (unsigned long i = 0; i < sorted_edges.size(); ++i)
{
const unsigned long idx1 = sorted_edges[i].idx1;
const unsigned long idx2 = sorted_edges[i].idx2;
unsigned long set1 = sets.find_set(idx1);
unsigned long set2 = sets.find_set(idx2);
if (set1 != set2)
{
const diff_type diff = sorted_edges[i].diff;
const diff_type tau1 = k/data[set1].component_size;
const diff_type tau2 = k/data[set2].component_size;
const diff_type mint = std::min(data[set1].internal_diff + tau1,
data[set2].internal_diff + tau2);
if (diff <= mint)
{ {
const unsigned long new_set = sets.merge_sets(set1, set2); const unsigned long new_set = sets.merge_sets(set1, set2);
data[new_set].component_size = data[set1].component_size + data[set2].component_size; data[new_set].component_size = data[set1].component_size + data[set2].component_size;
...@@ -261,6 +449,24 @@ namespace dlib ...@@ -261,6 +449,24 @@ namespace dlib
} }
} }
// now merge any really small blobs
if (min_size != 0)
{
for (unsigned long i = 0; i < sorted_edges.size(); ++i)
{
const unsigned long idx1 = sorted_edges[i].idx1;
const unsigned long idx2 = sorted_edges[i].idx2;
unsigned long set1 = sets.find_set(idx1);
unsigned long set2 = sets.find_set(idx2);
if (set1 != set2 && (data[set1].component_size < min_size || data[set2].component_size < min_size))
{
const unsigned long new_set = sets.merge_sets(set1, set2);
data[new_set].component_size = data[set1].component_size + data[set2].component_size;
}
}
}
unsigned long idx = 0; unsigned long idx = 0;
for (long r = 0; r < out_img.nr(); ++r) for (long r = 0; r < out_img.nr(); ++r)
{ {
......
...@@ -15,31 +15,34 @@ namespace dlib ...@@ -15,31 +15,34 @@ namespace dlib
void segment_image ( void segment_image (
const in_image_type& in_img, const in_image_type& in_img,
out_image_type& out_img, out_image_type& out_img,
const unsigned long k = 200, const double k = 200,
const unsigned long min_diff = 0 const unsigned long min_size = 10
); );
/*! /*!
requires requires
- in_image_type == an implementation of array2d/array2d_kernel_abstract.h - in_image_type == an implementation of array2d/array2d_kernel_abstract.h
- out_image_type == an implementation of array2d/array2d_kernel_abstract.h - out_image_type == an implementation of array2d/array2d_kernel_abstract.h
- in_image_type::type == an unsigned 8-bit or 16bit integer type. - in_image_type::type == Any pixel type with a pixel_traits specialization or a
dlib matrix object representing a row or column vector.
- out_image_type::type == unsigned integer type - out_image_type::type == unsigned integer type
- is_same_object(in_img, out_img) == false - is_same_object(in_img, out_img) == false
ensures ensures
- Attempts to segment in_img into regions which have some visual consistency to them. - Attempts to segment in_img into regions which have some visual consistency to
In particular, this function implements the algorithm described in the paper: them. In particular, this function implements the algorithm described in the
Efficient Graph-Based Image Segmentation by Felzenszwalb and Huttenlocher. paper: Efficient Graph-Based Image Segmentation by Felzenszwalb and Huttenlocher.
- #out_img.nr() == in_img.nr() - #out_img.nr() == in_img.nr()
- #out_img.nc() == in_img.nc() - #out_img.nc() == in_img.nc()
- for all valid r and c: - for all valid r and c:
- #out_img[r][c] == an integer value indicating the identity of the segment - #out_img[r][c] == an integer value indicating the identity of the segment
containing the pixel in_img[r][c]. containing the pixel in_img[r][c].
- The k parameter is a measure used to influence how large the segment regions will - The k parameter is a measure used to influence how large the segment regions
be. Larger k generally results in larger segments being produced. For a deeper will be. Larger k generally results in larger segments being produced. For
discussion of the k parameter you should consult the above referenced paper. a deeper discussion of the k parameter you should consult the above
- Any neighboring segments with an edge between them with a pixel difference <= min_diff referenced paper.
will always be merged. So making min_diff bigger makes this algorithm more eager - min_size is a lower bound on the size of the output segments. That is, it is
to merge neighboring segments. guaranteed that all output segments will have at least min_size pixels in
them (unless the whole image contains fewer than min_size pixels, in this
case the entire image will be put into a single segment).
!*/ !*/
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment