Commit 98579975 authored by Davis King's avatar Davis King

Added the scan_image() routine. It is a tool for sliding a set of rectangles

over an image space and finding the locations where the sum of pixels in
the rectangles exceeds a threshold.
parent f9b1aa35
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_IMAGE_PROCESSInG_H___
#define DLIB_IMAGE_PROCESSInG_H___
#include "image_processing/scan_image.h"
#endif // DLIB_IMAGE_PROCESSInG_H___
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_SCAN_iMAGE_H__
#define DLIB_SCAN_iMAGE_H__
#include <vector>
#include <utility>
#include "scan_image_abstract.h"
#include "../matrix.h"
#include "../algs.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
namespace impl
{
inline rectangle bounding_box_of_rects (
const std::vector<std::pair<unsigned int, rectangle> >& rects,
const point& origin
)
/*!
ensures
- returns the smallest rectangle that contains all the
rectangles in rects. That is, returns the rectangle that
contains translate_rect(rects[i].second,origin) for all valid i.
!*/
{
rectangle rect;
for (unsigned long i = 0; i < rects.size(); ++i)
{
rect += translate_rect(rects[i].second,origin);
}
return rect;
}
}
// ----------------------------------------------------------------------------------------
template <
typename image_array_type
>
bool all_images_same_size (
const image_array_type& images
)
{
if (images.size() == 0)
return true;
for (unsigned long i = 0; i < images.size(); ++i)
{
if (images[0].nr() != images[i].nr() ||
images[0].nc() != images[i].nc())
return false;
}
return true;
}
// ----------------------------------------------------------------------------------------
template <
typename image_array_type
>
double sum_of_rects_in_images (
const image_array_type& images,
const std::vector<std::pair<unsigned int, rectangle> >& rects,
const point& origin
)
{
DLIB_ASSERT(all_images_same_size(images),
"\t double sum_of_rects_in_images()"
<< "\n\t Invalid arguments given to this function."
<< "\n\t all_images_same_size(images): " << all_images_same_size(images)
);
#ifdef ENABLE_ASSERTS
for (unsigned long i = 0; i < rects.size(); ++i)
{
DLIB_ASSERT(rects[i].first < images.size(),
"\t double sum_of_rects_in_images()"
<< "\n\t rects["<<i<<"].first must refer to a valid image."
<< "\n\t rects["<<i<<"].first: " << rects[i].first
<< "\n\t images.size(): " << images.size()
);
}
#endif
typedef typename image_array_type::type::type pixel_type;
typedef typename promote<pixel_type>::type ptype;
ptype temp = 0;
for (unsigned long i = 0; i < rects.size(); ++i)
{
const typename image_array_type::type& img = images[rects[i].first];
const rectangle rect = get_rect(img).intersect(translate_rect(rects[i].second,origin));
temp += sum(matrix_cast<ptype>(subm(array_to_matrix(img), rect)));
}
return static_cast<double>(temp);
}
// ----------------------------------------------------------------------------------------
template <
typename image_array_type
>
void scan_image (
std::vector<std::pair<double, point> >& dets,
const image_array_type& images,
const std::vector<std::pair<unsigned int, rectangle> >& rects,
const double thresh,
const unsigned long max_dets
)
{
DLIB_ASSERT(images.size() > 0 && rects.size() > 0 && all_images_same_size(images),
"\t void scan_image()"
<< "\n\t Invalid arguments given to this function."
<< "\n\t images.size(): " << images.size()
<< "\n\t rects.size(): " << rects.size()
<< "\n\t all_images_same_size(images): " << all_images_same_size(images)
);
#ifdef ENABLE_ASSERTS
for (unsigned long i = 0; i < rects.size(); ++i)
{
DLIB_ASSERT(rects[i].first < images.size(),
"\t void scan_image()"
<< "\n\t rects["<<i<<"].first must refer to a valid image."
<< "\n\t rects["<<i<<"].first: " << rects[i].first
<< "\n\t images.size(): " << images.size()
);
}
#endif
dets.clear();
if (max_dets == 0)
return;
typedef typename image_array_type::type::type pixel_type;
typedef typename promote<pixel_type>::type ptype;
std::vector<std::vector<ptype> > column_sums(rects.size());
for (unsigned long i = 0; i < column_sums.size(); ++i)
{
const typename image_array_type::type& img = images[rects[i].first];
column_sums[i].resize(img.nc() + rects[i].second.width(),0);
const long top = -1 + rects[i].second.top();
const long bottom = -1 + rects[i].second.bottom();
long left = rects[i].second.left()-1;
// initialize column_sums[i] at row -1
for (unsigned long j = 0; j < column_sums[i].size(); ++j)
{
rectangle strip(left,top,left,bottom);
strip = strip.intersect(get_rect(img));
if (!strip.is_empty())
{
column_sums[i][j] = sum(matrix_cast<ptype>(subm(array_to_matrix(img),strip)));
}
++left;
}
}
const rectangle area = get_rect(images[0]);
// Figure out the area of the image where we won't need to do boundary checking
// when sliding the boxes around.
rectangle bound = dlib::impl::bounding_box_of_rects(rects, point(0,0));
rectangle free_area = get_rect(images[0]);
free_area.left() -= bound.left();
free_area.top() -= bound.top()-1;
free_area.right() -= bound.right();
free_area.bottom() -= bound.bottom();
// save widths to avoid computing them over and over
std::vector<long> widths(rects.size());
for (unsigned long i = 0; i < rects.size(); ++i)
widths[i] = rects[i].second.width();
// Now do the bulk of the scanning work.
for (long r = 0; r < images[0].nr(); ++r)
{
// set to sum at point(-1,r). i.e. should be equal to sum_of_rects_in_images(images, rects, point(-1,r))
// We compute it's value in the next loop.
ptype cur_sum = 0;
// Update the first part of column_sums since we only work on the c+width part of column_sums
// in the main loop.
for (unsigned long i = 0; i < rects.size(); ++i)
{
const typename image_array_type::type& img = images[rects[i].first];
const long top = r + rects[i].second.top() - 1;
const long bottom = r + rects[i].second.bottom();
const long width = rects[i].second.width();
for (long k = 0; k < width; ++k)
{
const long right = k-width + rects[i].second.right();
const ptype br_corner = area.contains(right,bottom) ? img[bottom][right] : 0;
const ptype tr_corner = area.contains(right,top) ? img[top][right] : 0;
// update the sum in this column now that we are on the next row
column_sums[i][k] = column_sums[i][k] + br_corner - tr_corner;
cur_sum += column_sums[i][k];
}
}
for (long c = 0; c < images[0].nc(); ++c)
{
// if we don't need to do the bounds checking on the image
if (free_area.contains(c,r))
{
for (unsigned long i = 0; i < rects.size(); ++i)
{
const typename image_array_type::type& img = images[rects[i].first];
const long top = r + rects[i].second.top() - 1;
const long bottom = r + rects[i].second.bottom();
const long right = c + rects[i].second.right();
const long width = widths[i];
const ptype br_corner = img[bottom][right];
const ptype tr_corner = img[top][right];
// update the sum in this column now that we are on the next row
column_sums[i][c+width] = column_sums[i][c+width] + br_corner - tr_corner;
// add in the new right side of the rect and subtract the old right side.
cur_sum = cur_sum + column_sums[i][c+width] - column_sums[i][c];
}
}
else
{
for (unsigned long i = 0; i < rects.size(); ++i)
{
const typename image_array_type::type& img = images[rects[i].first];
const long top = r + rects[i].second.top() - 1;
const long bottom = r + rects[i].second.bottom();
const long right = c + rects[i].second.right();
const long width = widths[i];
const ptype br_corner = area.contains(right,bottom) ? img[bottom][right] : 0;
const ptype tr_corner = area.contains(right,top) ? img[top][right] : 0;
// update the sum in this column now that we are on the next row
column_sums[i][c+width] = column_sums[i][c+width] + br_corner - tr_corner;
// add in the new right side of the rect and subtract the old right side.
cur_sum = cur_sum + column_sums[i][c+width] - column_sums[i][c];
}
}
if (cur_sum >= thresh)
{
dets.push_back(std::make_pair(cur_sum, point(c,r)));
if (dets.size() >= max_dets)
return;
}
}
}
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SCAN_iMAGE_H__
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_SCAN_iMAGE_ABSTRACT_H__
#ifdef DLIB_SCAN_iMAGE_ABSTRACT_H__
#include <vector>
#include <utility>
#include "../algs.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename image_array_type
>
bool all_images_same_size (
const image_array_type& images
);
/*!
requires
- image_array_type == an implementation of array/array_kernel_abstract.h
- image_array_type::type == an implementation of array2d/array2d_kernel_abstract.h
ensures
- if (all elements of images have the same dimensions (i.e.
for all i and j: get_rect(images[i]) == get_rect(images[j]))) then
- returns true
- else
- returns false
!*/
// ----------------------------------------------------------------------------------------
template <
typename image_array_type
>
double sum_of_rects_in_images (
const image_array_type& images,
const std::vector<std::pair<unsigned int, rectangle> >& rects,
const point& origin
);
/*!
requires
- image_array_type == an implementation of array/array_kernel_abstract.h
- image_array_type::type == an implementation of array2d/array2d_kernel_abstract.h
- image_array_type::type::type == a scalar pixel type (e.g. int rather than rgb_pixel)
- all_images_same_size(images) == true
- for all valid i: rects[i].first < images.size()
(i.e. all the rectangles must reference valid elements of images)
ensures
- returns the sum of the pixels inside the given rectangles. To be precise,
let RECT_SUM[i] = sum of pixels inside the rectangle translate_rect(rects[i].second, origin)
from the image images[rects[i].first]. Then this function returns the
sum of RECT_SUM[i] for all the valid values of i.
!*/
// ----------------------------------------------------------------------------------------
template <
typename image_array_type
>
void scan_image (
std::vector<std::pair<double, point> >& dets,
const image_array_type& images,
const std::vector<std::pair<unsigned int, rectangle> >& rects,
const double thresh,
const unsigned long max_dets
);
/*!
requires
- image_array_type == an implementation of array/array_kernel_abstract.h
- image_array_type::type == an implementation of array2d/array2d_kernel_abstract.h
- image_array_type::type::type == a scalar pixel type (e.g. int rather than rgb_pixel)
- images.size() > 0
- rects.size() > 0
- all_images_same_size(images) == true
- for all valid i: rects[i].first < images.size()
(i.e. all the rectangles must reference valid elements of images)
ensures
- slides the set of rectangles over the image space and reports the locations
which give a sum bigger than thresh.
- Specifically, we have:
- #dets.size() <= max_dets
(note that dets is cleared before new detections are added by scan_image())
- for all valid i:
- #dets[i].first == sum_of_rects_in_images(images,rects,#dets[i].second) >= thresh
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SCAN_iMAGE_ABSTRACT_H__
......@@ -82,6 +82,7 @@ set (tests
rand.cpp
read_write_mutex.cpp
reference_counter.cpp
scan_image.cpp
sequence.cpp
serialize.cpp
set.cpp
......
......@@ -97,6 +97,7 @@ SRC += queue.cpp
SRC += rand.cpp
SRC += read_write_mutex.cpp
SRC += reference_counter.cpp
SRC += scan_image.cpp
SRC += sequence.cpp
SRC += serialize.cpp
SRC += set.cpp
......
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#include <sstream>
#include <string>
#include <cstdlib>
#include <ctime>
#include "dlib/image_processing.h"
#include "dlib/test/tester.h"
#include "dlib/image_transforms.h"
#include "dlib/pixel.h"
#include "dlib/array2d.h"
#include "dlib/array.h"
// ----------------------------------------------------------------------------------------
namespace
{
using namespace test;
using namespace dlib;
using namespace std;
// Declare the logger we will use in this test. The name of the tester
// should start with "test."
logger dlog("test.scan_image");
// ----------------------------------------------------------------------------------------
template <
typename image_array_type
>
void scan_image_i (
std::vector<std::pair<double, point> >& dets,
const image_array_type& images,
const std::vector<std::pair<unsigned int, rectangle> >& rects,
const double thresh,
const unsigned long max_dets
)
{
typedef typename image_array_type::type::type pixel_type;
typedef typename promote<pixel_type>::type ptype;
typename array<integral_image_generic<ptype> >::kernel_2a iimg;
iimg.set_max_size(images.size());
iimg.set_size(images.size());
for (unsigned long i = 0; i < iimg.size(); ++i)
iimg[i].load(images[i]);
dets.clear();
for (long r = 0; r < images[0].nr(); ++r)
{
for (long c = 0; c < images[0].nc(); ++c)
{
ptype temp = 0;
for (unsigned long i = 0; i < rects.size(); ++i)
{
rectangle rtemp = translate_rect(rects[i].second,point(c,r)).intersect(get_rect(images[0]));
if (rtemp.is_empty() == false)
temp += iimg[rects[i].first].get_sum_of_area(rtemp);
}
if (temp > thresh)
{
dets.push_back(std::make_pair(temp, point(c,r)));
if (dets.size() >= max_dets)
return;
}
}
}
}
// ----------------------------------------------------------------------------------------
template <
typename image_array_type
>
void scan_image_old (
std::vector<std::pair<double, point> >& dets,
const image_array_type& images,
const std::vector<std::pair<unsigned int, rectangle> >& rects,
const double thresh,
const unsigned long max_dets
)
{
dets.clear();
if (max_dets == 0)
return;
typedef typename image_array_type::type::type pixel_type;
typedef typename promote<pixel_type>::type ptype;
std::vector<std::vector<ptype> > column_sums(rects.size());
for (unsigned long i = 0; i < column_sums.size(); ++i)
{
const typename image_array_type::type& img = images[rects[i].first];
column_sums[i].resize(img.nc() + rects[i].second.width(),0);
const long top = -1 + rects[i].second.top();
const long bottom = -1 + rects[i].second.bottom();
long left = rects[i].second.left()-1;
// initialize column_sums[i] at row -1
for (unsigned long j = 0; j < column_sums[i].size(); ++j)
{
rectangle strip(left,top,left,bottom);
strip = strip.intersect(get_rect(img));
if (!strip.is_empty())
{
column_sums[i][j] = sum(matrix_cast<ptype>(subm(array_to_matrix(img),strip)));
}
++left;
}
}
const rectangle area = get_rect(images[0]);
for (long r = 0; r < images[0].nr(); ++r)
{
// set to sum at point(-1,r). i.e. should be equal to sum_of_rects_in_images(images, rects, point(-1,r))
// We compute it's value in the next loop.
ptype cur_sum = 0;
// Update the first part of column_sums since we only work on the c+width part of column_sums
// in the main loop.
for (unsigned long i = 0; i < rects.size(); ++i)
{
const typename image_array_type::type& img = images[rects[i].first];
const long top = r + rects[i].second.top() - 1;
const long bottom = r + rects[i].second.bottom();
const long width = rects[i].second.width();
for (long k = 0; k < width; ++k)
{
const long right = k-width + rects[i].second.right();
const ptype br_corner = area.contains(right,bottom) ? img[bottom][right] : 0;
const ptype tr_corner = area.contains(right,top) ? img[top][right] : 0;
// update the sum in this column now that we are on the next row
column_sums[i][k] = column_sums[i][k] + br_corner - tr_corner;
cur_sum += column_sums[i][k];
}
}
for (long c = 0; c < images[0].nc(); ++c)
{
for (unsigned long i = 0; i < rects.size(); ++i)
{
const typename image_array_type::type& img = images[rects[i].first];
const long top = r + rects[i].second.top() - 1;
const long bottom = r + rects[i].second.bottom();
const long right = c + rects[i].second.right();
const long width = rects[i].second.width();
const ptype br_corner = area.contains(right,bottom) ? img[bottom][right] : 0;
const ptype tr_corner = area.contains(right,top) ? img[top][right] : 0;
// update the sum in this column now that we are on the next row
column_sums[i][c+width] = column_sums[i][c+width] + br_corner - tr_corner;
// add in the new right side of the rect and subtract the old right side.
cur_sum = cur_sum + column_sums[i][c+width] - column_sums[i][c];
}
if (cur_sum > thresh)
{
dets.push_back(std::make_pair(cur_sum, point(c,r)));
if (dets.size() >= max_dets)
return;
}
}
}
}
// ----------------------------------------------------------------------------------------
void run_test1()
{
dlog << LINFO << "run_test1()";
print_spinner();
array2d<unsigned char> img, temp_img;
img.set_size(600,600);
assign_all_pixels(img,0);
rectangle rect = centered_rect(10,10,5,5);
dlog << LTRACE << "expected: 10,10";
fill_rect(img, rect, 255);
array<array2d<unsigned char> >::expand_1b images;
std::vector<std::pair<unsigned int, rectangle> > rects;
for (int i = 0; i < 10; ++i)
{
assign_image(temp_img, img);
images.push_back(temp_img);
rects.push_back(make_pair(i,centered_rect(0,0,5,5)));
}
std::vector<std::pair<double, point> > dets, dets2, dets3;
dlog << LTRACE << "best score: "<< sum_of_rects_in_images(images,rects,point(10,10));
scan_image(dets,images,rects,30000, 100);
scan_image_i(dets2,images,rects,30000, 100);
scan_image_old(dets3,images,rects,30000, 100);
dlog << LTRACE << "dets.size(): "<< dets.size();
dlog << LTRACE << "dets2.size(): "<< dets2.size();
dlog << LTRACE << "dets3.size(): "<< dets3.size();
DLIB_TEST(dets.size() == dets2.size());
DLIB_TEST(dets.size() == dets3.size());
for (unsigned long i = 0; i < dets.size(); ++i)
{
//dlog << LTRACE << "dets["<<i<<"]: " << dets[i].second << " -> " << dets[i].first;
//dlog << LTRACE << "dets2["<<i<<"]: " << dets2[i].second << " -> " << dets2[i].first;
//dlog << LTRACE << "dets3["<<i<<"]: " << dets3[i].second << " -> " << dets3[i].first;
DLIB_TEST(sum_of_rects_in_images(images, rects, dets[i].second) == dets[i].first);
DLIB_TEST(sum_of_rects_in_images(images, rects, dets2[i].second) == dets2[i].first);
DLIB_TEST(sum_of_rects_in_images(images, rects, dets3[i].second) == dets3[i].first);
}
}
// ----------------------------------------------------------------------------------------
void run_test2()
{
print_spinner();
dlog << LINFO << "run_test2()";
array2d<unsigned char> img, temp_img;
img.set_size(600,600);
assign_all_pixels(img,0);
rectangle rect = centered_rect(10,11,5,6);
dlog << LTRACE << "expected: 10,11";
fill_rect(img, rect, 255);
array<array2d<unsigned char> >::expand_1b images;
std::vector<std::pair<unsigned int, rectangle> > rects;
for (int i = 0; i < 10; ++i)
{
assign_image(temp_img, img);
images.push_back(temp_img);
rects.push_back(make_pair(i,centered_rect(0,0,5,5)));
rects.push_back(make_pair(i,centered_rect(3,2,5,6)));
}
std::vector<std::pair<double, point> > dets, dets2, dets3;
scan_image(dets,images,rects,30000, 100);
scan_image_i(dets2,images,rects,30000, 100);
scan_image_old(dets3,images,rects,30000, 100);
dlog << LTRACE << "dets.size(): "<< dets.size();
dlog << LTRACE << "dets2.size(): "<< dets2.size();
dlog << LTRACE << "dets3.size(): "<< dets3.size();
DLIB_TEST(dets.size() == dets2.size());
DLIB_TEST(dets.size() == dets3.size());
for (unsigned long i = 0; i < dets.size(); ++i)
{
//dlog << LTRACE << "dets["<<i<<"]: " << dets[i].second << " -> " << dets[i].first;
//dlog << LTRACE << "dets2["<<i<<"]: " << dets2[i].second << " -> " << dets2[i].first;
//dlog << LTRACE << "dets3["<<i<<"]: " << dets3[i].second << " -> " << dets3[i].first;
DLIB_TEST(sum_of_rects_in_images(images, rects, dets[i].second) == dets[i].first);
DLIB_TEST(sum_of_rects_in_images(images, rects, dets2[i].second) == dets2[i].first);
DLIB_TEST(sum_of_rects_in_images(images, rects, dets3[i].second) == dets3[i].first);
}
}
// ----------------------------------------------------------------------------------------
template <typename pixel_type>
void run_test3(const double thresh)
{
dlog << LINFO << "running run_test3("<<thresh<<")";
dlib::rand rnd;
rnd.set_seed("235");
typename array<array2d<pixel_type> >::expand_1b images;
images.resize(1);
images[0].set_size(200,180);
for (int iter = 0; iter < 50; ++iter)
{
print_spinner();
assign_all_pixels(images[0], thresh - 0.0001);
for (int i = 0; i < 20; ++i)
{
point p1(rnd.get_random_32bit_number()%images[0].nc(),
rnd.get_random_32bit_number()%images[0].nr());
point p2(rnd.get_random_32bit_number()%images[0].nc(),
rnd.get_random_32bit_number()%images[0].nr());
rectangle rect(p1,p2);
fill_rect(images[0], rect, static_cast<pixel_type>(rnd.get_random_double()*10 - 5));
}
std::vector<std::pair<unsigned int, rectangle> > rects;
rects.push_back(make_pair(0,centered_rect(0,0,1+rnd.get_random_32bit_number()%40,1+rnd.get_random_32bit_number()%40)));
rects.push_back(make_pair(0,centered_rect(0,0,1+rnd.get_random_32bit_number()%40,1+rnd.get_random_32bit_number()%40)));
std::vector<std::pair<double, point> > dets, dets2, dets3;
scan_image(dets,images,rects,thresh, 100);
scan_image_i(dets2,images,rects,thresh, 100);
scan_image_old(dets3,images,rects,thresh, 100);
dlog << LTRACE << "dets.size(): "<< dets.size();
dlog << LTRACE << "dets2.size(): "<< dets2.size();
dlog << LTRACE << "dets3.size(): "<< dets3.size();
DLIB_TEST(dets.size() == dets2.size());
DLIB_TEST(dets.size() == dets3.size());
for (unsigned long i = 0; i < dets.size(); ++i)
{
//dlog << LTRACE << "dets["<<i<<"]: " << dets[i].second << " -> " << dets[i].first;
//dlog << LTRACE << "dets2["<<i<<"]: " << dets2[i].second << " -> " << dets2[i].first;
//dlog << LTRACE << "dets3["<<i<<"]: " << dets3[i].second << " -> " << dets3[i].first;
DLIB_TEST_MSG(std::abs(sum_of_rects_in_images(images, rects, dets[i].second) - dets[i].first) < 1e-6,
"error: "<< sum_of_rects_in_images(images, rects, dets[i].second) - dets[i].first
<< " dets["<<i<<"].second: " << dets[i].second
);
DLIB_TEST_MSG(std::abs(sum_of_rects_in_images(images, rects, dets2[i].second) - dets2[i].first) < 1e-6,
sum_of_rects_in_images(images, rects, dets2[i].second) - dets2[i].first
);
DLIB_TEST_MSG(std::abs(sum_of_rects_in_images(images, rects, dets3[i].second) - dets3[i].first) < 1e-6,
"error: "<< sum_of_rects_in_images(images, rects, dets3[i].second) - dets3[i].first
<< " dets3["<<i<<"].first: " << dets3[i].first
<< " dets3["<<i<<"].second: " << dets3[i].second
);
}
}
}
// ----------------------------------------------------------------------------------------
class scan_image_tester : public tester
{
public:
scan_image_tester (
) :
tester ("test_scan_image",
"Runs tests on the scan_image routine.")
{}
void perform_test (
)
{
run_test1();
run_test2();
run_test3<unsigned char>(1);
run_test3<unsigned char>(-1);
run_test3<double>(1);
run_test3<double>(-1);
}
} a;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment