Commit e0a6e305 authored by Davis King's avatar Davis King

Refactored the image pyramid code. Now there is just one templated object,

pyramid_down and you give it the downsampling amount as a template argument.
parent 39ed906c
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -10,11 +10,18 @@ ...@@ -10,11 +10,18 @@
namespace dlib namespace dlib
{ {
template <
unsigned int N
>
class pyramid_down : noncopyable class pyramid_down : noncopyable
{ {
/*! /*!
REQUIREMENTS ON N
N > 1
WHAT THIS OBJECT REPRESENTS WHAT THIS OBJECT REPRESENTS
This is a simple functor to help create image pyramids. This is a simple functor to help create image pyramids. In particular, it
downsamples images at a ratio of N to N-1.
WARNING, when mapping rectangles from one layer of a pyramid WARNING, when mapping rectangles from one layer of a pyramid
...@@ -43,18 +50,14 @@ namespace dlib ...@@ -43,18 +50,14 @@ namespace dlib
- pixel_traits<typename in_image_type::type>::has_alpha == false - pixel_traits<typename in_image_type::type>::has_alpha == false
- pixel_traits<typename out_image_type::type>::has_alpha == false - pixel_traits<typename out_image_type::type>::has_alpha == false
ensures ensures
- #down will contain an image that is roughly half the size of the original - #down will contain an image that is roughly (N-1)/N times the size of the
image. To be specific, this function performs the following steps: original image.
- 1. Applies a 5x5 Gaussian filter to the original image to smooth it a little. - If both input and output images contain RGB pixels then the downsampled image will
- 2. Every other row and column is discarded to create an image half the size
of the original. This smaller image is stored in #down.
- if both input and output images contain RGB pixels then the downsampled image will
be in color. Otherwise, the downsampling will be performed in a grayscale mode. be in color. Otherwise, the downsampling will be performed in a grayscale mode.
- The location of a point P in original image will show up at point point_down(P) - The location of a point P in original image will show up at point point_down(P)
in the #down image. in the #down image.
- Note that some points on the border of the original image will correspond to - Note that some points on the border of the original image might correspond to
points outside the #down image. This is because the 5x5 filter is not applied points outside the #down image.
at the borders.
!*/ !*/
// ------------------------------- // -------------------------------
...@@ -151,61 +154,6 @@ namespace dlib ...@@ -151,61 +154,6 @@ namespace dlib
}; };
// ----------------------------------------------------------------------------------------
class pyramid_down_3_2 : noncopyable
{
/*!
WHAT THIS OBJECT REPRESENTS
This is a function object with an interface identical to pyramid_down (defined
at the top of this file) except that it downsamples images at a ratio of 3 to 2
instead of 2 to 1.
!*/
};
// ----------------------------------------------------------------------------------------
class pyramid_down_4_3 : noncopyable
{
/*!
WHAT THIS OBJECT REPRESENTS
This is a function object with an interface identical to pyramid_down (defined
at the top of this file) except that it downsamples images at a ratio of 4 to 3
instead of 2 to 1.
!*/
};
// ----------------------------------------------------------------------------------------
class pyramid_down_5_4 : noncopyable
{
/*!
WHAT THIS OBJECT REPRESENTS
This is a function object with an interface identical to pyramid_down (defined
at the top of this file) except that it downsamples images at a ratio of 5 to 4
instead of 2 to 1.
!*/
};
// ----------------------------------------------------------------------------------------
template <
unsigned int N
>
class pyramid_down_generic : noncopyable
{
/*!
REQUIREMENTS ON N
N > 1
WHAT THIS OBJECT REPRESENTS
This is a function object with an interface identical to pyramid_down
(defined at the top of this file) except that it downsamples images at a
ratio of N to N-1 instead of 2 to 1.
!*/
};
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
class pyramid_disable : noncopyable class pyramid_disable : noncopyable
......
...@@ -379,7 +379,7 @@ namespace ...@@ -379,7 +379,7 @@ namespace
make_simple_test_data(images, object_locations); make_simple_test_data(images, object_locations);
typedef hashed_feature_image<hog_image<3,3,1,4,hog_signed_gradient,hog_full_interpolation> > feature_extractor_type; typedef hashed_feature_image<hog_image<3,3,1,4,hog_signed_gradient,hog_full_interpolation> > feature_extractor_type;
typedef scan_image_pyramid<pyramid_down, feature_extractor_type> image_scanner_type; typedef scan_image_pyramid<pyramid_down<2>, feature_extractor_type> image_scanner_type;
image_scanner_type scanner; image_scanner_type scanner;
const rectangle object_box = compute_box_dimensions(1,35*35); const rectangle object_box = compute_box_dimensions(1,35*35);
scanner.add_detection_template(object_box, create_grid_detection_template(object_box,2,2)); scanner.add_detection_template(object_box, create_grid_detection_template(object_box,2,2));
...@@ -463,7 +463,7 @@ namespace ...@@ -463,7 +463,7 @@ namespace
make_simple_test_data(images, object_locations); make_simple_test_data(images, object_locations);
typedef hashed_feature_image<hog_image<3,3,1,4,hog_signed_gradient,hog_full_interpolation> > feature_extractor_type; typedef hashed_feature_image<hog_image<3,3,1,4,hog_signed_gradient,hog_full_interpolation> > feature_extractor_type;
typedef scan_image_pyramid<pyramid_down, feature_extractor_type> image_scanner_type; typedef scan_image_pyramid<pyramid_down<2>, feature_extractor_type> image_scanner_type;
image_scanner_type scanner; image_scanner_type scanner;
const rectangle object_box = compute_box_dimensions(1,35*35); const rectangle object_box = compute_box_dimensions(1,35*35);
std::vector<rectangle> mboxes; std::vector<rectangle> mboxes;
...@@ -512,7 +512,7 @@ namespace ...@@ -512,7 +512,7 @@ namespace
make_simple_test_data(images, object_locations); make_simple_test_data(images, object_locations);
typedef hashed_feature_image<fine_hog_image<3,3,2,4,hog_signed_gradient> > feature_extractor_type; typedef hashed_feature_image<fine_hog_image<3,3,2,4,hog_signed_gradient> > feature_extractor_type;
typedef scan_image_pyramid<pyramid_down, feature_extractor_type> image_scanner_type; typedef scan_image_pyramid<pyramid_down<2>, feature_extractor_type> image_scanner_type;
image_scanner_type scanner; image_scanner_type scanner;
const rectangle object_box = compute_box_dimensions(1,35*35); const rectangle object_box = compute_box_dimensions(1,35*35);
scanner.add_detection_template(object_box, create_grid_detection_template(object_box,2,2)); scanner.add_detection_template(object_box, create_grid_detection_template(object_box,2,2));
...@@ -555,7 +555,7 @@ namespace ...@@ -555,7 +555,7 @@ namespace
make_simple_test_data(images, object_locations); make_simple_test_data(images, object_locations);
typedef hashed_feature_image<poly_image<2> > feature_extractor_type; typedef hashed_feature_image<poly_image<2> > feature_extractor_type;
typedef scan_image_pyramid<pyramid_down, feature_extractor_type> image_scanner_type; typedef scan_image_pyramid<pyramid_down<2>, feature_extractor_type> image_scanner_type;
image_scanner_type scanner; image_scanner_type scanner;
const rectangle object_box = compute_box_dimensions(1,35*35); const rectangle object_box = compute_box_dimensions(1,35*35);
scanner.add_detection_template(object_box, create_grid_detection_template(object_box,2,2)); scanner.add_detection_template(object_box, create_grid_detection_template(object_box,2,2));
...@@ -598,7 +598,7 @@ namespace ...@@ -598,7 +598,7 @@ namespace
make_simple_test_data(images, object_locations); make_simple_test_data(images, object_locations);
typedef hashed_feature_image<poly_image<2> > feature_extractor_type; typedef hashed_feature_image<poly_image<2> > feature_extractor_type;
typedef scan_image_pyramid<pyramid_down_3_2, feature_extractor_type> image_scanner_type; typedef scan_image_pyramid<pyramid_down<3>, feature_extractor_type> image_scanner_type;
image_scanner_type scanner; image_scanner_type scanner;
const rectangle object_box = compute_box_dimensions(1,35*35); const rectangle object_box = compute_box_dimensions(1,35*35);
std::vector<rectangle> mboxes; std::vector<rectangle> mboxes;
...@@ -647,12 +647,12 @@ namespace ...@@ -647,12 +647,12 @@ namespace
make_simple_test_data(images, object_locations); make_simple_test_data(images, object_locations);
typedef nearest_neighbor_feature_image<poly_image<5> > feature_extractor_type; typedef nearest_neighbor_feature_image<poly_image<5> > feature_extractor_type;
typedef scan_image_pyramid<pyramid_down, feature_extractor_type> image_scanner_type; typedef scan_image_pyramid<pyramid_down<2>, feature_extractor_type> image_scanner_type;
image_scanner_type scanner; image_scanner_type scanner;
setup_grid_detection_templates(scanner, object_locations, 2, 2); setup_grid_detection_templates(scanner, object_locations, 2, 2);
feature_extractor_type nnfe; feature_extractor_type nnfe;
pyramid_down pyr_down; pyramid_down<2> pyr_down;
poly_image<5> polyi; poly_image<5> polyi;
nnfe.set_basis(randomly_sample_image_features(images, pyr_down, polyi, 80)); nnfe.set_basis(randomly_sample_image_features(images, pyr_down, polyi, 80));
scanner.copy_configuration(nnfe); scanner.copy_configuration(nnfe);
...@@ -697,7 +697,7 @@ namespace ...@@ -697,7 +697,7 @@ namespace
image_scanner_type scanner; image_scanner_type scanner;
feature_extractor_type nnfe; feature_extractor_type nnfe;
pyramid_down pyr_down; pyramid_down<2> pyr_down;
poly_image<5> polyi; poly_image<5> polyi;
nnfe.set_basis(randomly_sample_image_features(images, pyr_down, polyi, 80)); nnfe.set_basis(randomly_sample_image_features(images, pyr_down, polyi, 80));
scanner.copy_configuration(nnfe); scanner.copy_configuration(nnfe);
...@@ -737,7 +737,7 @@ namespace ...@@ -737,7 +737,7 @@ namespace
std::vector<std::vector<rectangle> > object_locations; std::vector<std::vector<rectangle> > object_locations;
make_simple_test_data(images, object_locations); make_simple_test_data(images, object_locations);
typedef scan_image_pyramid<pyramid_down_5_4, very_simple_feature_extractor> image_scanner_type; typedef scan_image_pyramid<pyramid_down<5>, very_simple_feature_extractor> image_scanner_type;
image_scanner_type scanner; image_scanner_type scanner;
const rectangle object_box = compute_box_dimensions(1,70*70); const rectangle object_box = compute_box_dimensions(1,70*70);
scanner.add_detection_template(object_box, create_grid_detection_template(object_box,2,2)); scanner.add_detection_template(object_box, create_grid_detection_template(object_box,2,2));
...@@ -771,7 +771,7 @@ namespace ...@@ -771,7 +771,7 @@ namespace
class pyramid_down_funny : noncopyable class pyramid_down_funny : noncopyable
{ {
pyramid_down pyr; pyramid_down<2> pyr;
public: public:
template <typename T> template <typename T>
......
...@@ -23,7 +23,7 @@ namespace ...@@ -23,7 +23,7 @@ namespace
void test_pyramid_down_grayscale() void test_pyramid_down_grayscale()
{ {
array2d<unsigned char> img, down; array2d<unsigned char> img, down;
pyramid_down pyr; pyramid_down<2> pyr;
img.set_size(300,264); img.set_size(300,264);
...@@ -52,7 +52,7 @@ void test_pyramid_down_rgb() ...@@ -52,7 +52,7 @@ void test_pyramid_down_rgb()
{ {
array2d<rgb_pixel> img; array2d<rgb_pixel> img;
array2d<bgr_pixel> down; array2d<bgr_pixel> down;
pyramid_down pyr; pyramid_down<2> pyr;
img.set_size(231, 351); img.set_size(231, 351);
...@@ -325,59 +325,59 @@ void test_pyramid_down_small_sizes() ...@@ -325,59 +325,59 @@ void test_pyramid_down_small_sizes()
test_pyramid_down_rgb(); test_pyramid_down_rgb();
print_spinner(); print_spinner();
dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down>();"; dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down<2> >();";
test_pyramid_down_small_sizes<pyramid_down>(); test_pyramid_down_small_sizes<pyramid_down<2> >();
dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down_3_2>();"; dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down<3> >();";
test_pyramid_down_small_sizes<pyramid_down_3_2>(); test_pyramid_down_small_sizes<pyramid_down<3> >();
dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down_4_3>();"; dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down<4> >();";
test_pyramid_down_small_sizes<pyramid_down_4_3>(); test_pyramid_down_small_sizes<pyramid_down<4> >();
dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down_5_4>();"; dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down<5> >();";
test_pyramid_down_small_sizes<pyramid_down_5_4>(); test_pyramid_down_small_sizes<pyramid_down<5> >();
dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_disable>();"; dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_disable>();";
test_pyramid_down_small_sizes<pyramid_disable>(); test_pyramid_down_small_sizes<pyramid_disable>();
dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down_generic<3> >();"; dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down<9> >();";
test_pyramid_down_small_sizes<pyramid_down_generic<3> >(); test_pyramid_down_small_sizes<pyramid_down<9> >();
print_spinner(); print_spinner();
dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down>();"; dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down<2> >();";
test_pyramid_down_rgb2<pyramid_down>(); test_pyramid_down_rgb2<pyramid_down<2> >();
print_spinner(); print_spinner();
dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down_3_2>();"; dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down<3> >();";
test_pyramid_down_rgb2<pyramid_down_3_2>(); test_pyramid_down_rgb2<pyramid_down<3> >();
print_spinner(); print_spinner();
dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down_4_3>();"; dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down<4> >();";
test_pyramid_down_rgb2<pyramid_down_4_3>(); test_pyramid_down_rgb2<pyramid_down<4> >();
print_spinner(); print_spinner();
dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down_5_4>();"; dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down<5> >();";
test_pyramid_down_rgb2<pyramid_down_5_4>(); test_pyramid_down_rgb2<pyramid_down<5> >();
print_spinner(); print_spinner();
dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down_generic<5> >();"; dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down<8> >();";
test_pyramid_down_rgb2<pyramid_down_generic<5> >(); test_pyramid_down_rgb2<pyramid_down<8> >();
print_spinner(); print_spinner();
dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down>();"; dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down<2> >();";
test_pyramid_down_grayscale2<pyramid_down>(); test_pyramid_down_grayscale2<pyramid_down<2> >();
print_spinner(); print_spinner();
dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down_3_2>();"; dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down<3> >();";
test_pyramid_down_grayscale2<pyramid_down_3_2>(); test_pyramid_down_grayscale2<pyramid_down<3> >();
print_spinner(); print_spinner();
dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down_4_3>();"; dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down<4> >();";
test_pyramid_down_grayscale2<pyramid_down_4_3>(); test_pyramid_down_grayscale2<pyramid_down<4> >();
print_spinner(); print_spinner();
dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down_5_4>();"; dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down<5> >();";
test_pyramid_down_grayscale2<pyramid_down_5_4>(); test_pyramid_down_grayscale2<pyramid_down<5> >();
print_spinner(); print_spinner();
dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down_generic<6> >();"; dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down<6> >();";
test_pyramid_down_grayscale2<pyramid_down_generic<6> >(); test_pyramid_down_grayscale2<pyramid_down<6> >();
} }
} a; } a;
......
...@@ -196,7 +196,7 @@ int main() ...@@ -196,7 +196,7 @@ int main()
make_simple_test_data(images, object_locations); make_simple_test_data(images, object_locations);
typedef scan_image_pyramid<pyramid_down_5_4, very_simple_feature_extractor> image_scanner_type; typedef scan_image_pyramid<pyramid_down<5>, very_simple_feature_extractor> image_scanner_type;
image_scanner_type scanner; image_scanner_type scanner;
// Instead of using setup_grid_detection_templates() like in object_detector_ex.cpp, lets manually // Instead of using setup_grid_detection_templates() like in object_detector_ex.cpp, lets manually
// setup the sliding window box. We use a window with the same shape as the white boxes we // setup the sliding window box. We use a window with the same shape as the white boxes we
......
...@@ -145,11 +145,12 @@ int main() ...@@ -145,11 +145,12 @@ int main()
parameters yourself. They are automatically populated by the parameters yourself. They are automatically populated by the
structural_object_detection_trainer. structural_object_detection_trainer.
The sliding window classifiers described above are applied to every level of an image The sliding window classifiers described above are applied to every level of an
pyramid. So you need to tell scan_image_pyramid what kind of pyramid you want to image pyramid. So you need to tell scan_image_pyramid what kind of pyramid you want
use. In this case we are using pyramid_down which downsamples each pyramid layer by to use. In this case we are using pyramid_down<2> which downsamples each pyramid
half (dlib also contains other version of pyramid_down which result in finer grained layer by half (if you want to use a finer image pyramid then just change the
pyramids). template argument to a larger value. For example, using pyramid_down<5> would
downsample each layer by a ratio of 5 to 4).
Finally, some of the feature extraction zones are allowed to move freely within the Finally, some of the feature extraction zones are allowed to move freely within the
object box. This means that when we are sliding the classifier over an image, some object box. This means that when we are sliding the classifier over an image, some
...@@ -168,7 +169,7 @@ int main() ...@@ -168,7 +169,7 @@ int main()
feature extraction regions. feature extraction regions.
*/ */
typedef hashed_feature_image<hog_image<3,3,1,4,hog_signed_gradient,hog_full_interpolation> > feature_extractor_type; typedef hashed_feature_image<hog_image<3,3,1,4,hog_signed_gradient,hog_full_interpolation> > feature_extractor_type;
typedef scan_image_pyramid<pyramid_down, feature_extractor_type> image_scanner_type; typedef scan_image_pyramid<pyramid_down<2>, feature_extractor_type> image_scanner_type;
image_scanner_type scanner; image_scanner_type scanner;
// The hashed_feature_image in the scanner needs to be supplied with a hash function capable // The hashed_feature_image in the scanner needs to be supplied with a hash function capable
......
...@@ -133,7 +133,7 @@ int main(int argc, char** argv) ...@@ -133,7 +133,7 @@ int main(int argc, char** argv)
typedef hashed_feature_image<hog_image<4,4,1,9,hog_signed_gradient,hog_full_interpolation> > feature_extractor_type; typedef hashed_feature_image<hog_image<4,4,1,9,hog_signed_gradient,hog_full_interpolation> > feature_extractor_type;
typedef scan_image_pyramid<pyramid_down_3_2, feature_extractor_type> image_scanner_type; typedef scan_image_pyramid<pyramid_down<3>, feature_extractor_type> image_scanner_type;
if (parser.option("t") || parser.option("cross-validate")) if (parser.option("t") || parser.option("cross-validate"))
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment