Commit 954612b6 authored by Davis King's avatar Davis King

Cleaned up the code a bit. Still more cleaning to do.

parent b191400a
......@@ -9,6 +9,7 @@
#include "../matrix.h"
#include "../geometry.h"
#include "../pixel.h"
#include "../console_progress_indicator.h"
namespace dlib
{
......@@ -392,34 +393,131 @@ namespace dlib
!*/
public:
shape_predictor_trainer (
)
{
_cascade_depth = 10;
_tree_depth = 2;
_num_trees_per_cascade_level = 500;
_nu = 0.1;
_oversampling_amount = 20;
_feature_pool_size = 400;
_lambda = 0.1;
_num_test_splits = 20;
_feature_pool_region_padding = 0;
_verbose = false;
}
unsigned long get_cascade_depth (
) const { return _cascade_depth; }
unsigned long cascade_depth (
) const { return 10; }
void set_cascade_depth (
unsigned long depth
)
{
DLIB_CASSERT(depth > 0, "");
_cascade_depth = depth;
}
unsigned long tree_depth (
) const { return 2; }
unsigned long get_tree_depth (
) const { return _tree_depth; }
unsigned long num_trees_per_cascade_level (
) const { return 500; }
void set_tree_depth (
unsigned long depth
)
{
DLIB_CASSERT(depth > 0, "");
_tree_depth = depth;
}
unsigned long get_num_trees_per_cascade_level (
) const { return _num_trees_per_cascade_level; }
void set_num_trees_per_cascade_level (
unsigned long num
)
{
DLIB_CASSERT( num > 0, "");
_num_trees_per_cascade_level = num;
}
double get_nu (
) const { return 0.1; } // the regularizer
) const { return _nu; }
void set_nu (
double nu
)
{
DLIB_CASSERT(nu > 0,"");
_nu = nu;
}
std::string random_seed (
) const { return "dlib rules"; }
std::string get_random_seed (
) const { return rnd.get_seed(); }
void set_random_seed (
const std::string& seed
) { rnd.set_seed(seed); }
unsigned long oversampling_amount (
) const { return 20; }
unsigned long get_oversampling_amount (
) const { return _oversampling_amount; }
void set_oversampling_amount (
unsigned long amount
)
{
DLIB_CASSERT(amount > 0, "");
_oversampling_amount = amount;
}
unsigned long get_feature_pool_size (
) const { return _feature_pool_size; }
void set_feature_pool_size (
unsigned long size
)
{
DLIB_CASSERT(size > 1, "");
_feature_pool_size = size;
}
// feature sampling parameters
unsigned long feature_pool_size (
) const { return 400; }// this must be > 1
double get_lambda (
) const { return 0.1; }
) const { return _lambda; }
void set_lambda (
double lambda
)
{
DLIB_CASSERT(lambda > 0, "");
_lambda = lambda;
}
unsigned long get_num_test_splits (
) const { return 20; }
) const { return _num_test_splits; }
void set_num_test_splits (
unsigned long num
)
{
DLIB_CASSERT(num > 0, "");
_num_test_splits = num;
}
double get_feature_pool_region_padding (
) const { return 0; }
) const { return _feature_pool_region_padding; }
void set_feature_pool_region_padding (
double padding
)
{
_feature_pool_region_padding = padding;
}
void be_verbose (
)
{
_verbose = true;
}
void be_quiet (
)
{
_verbose = false;
}
template <typename image_array>
shape_predictor train (
......@@ -430,17 +528,21 @@ namespace dlib
using namespace impl;
DLIB_CASSERT(images.size() == objects.size() && images.size() > 0, "");
rnd.set_seed(get_random_seed());
std::vector<training_sample> samples;
const matrix<float,0,1> initial_shape = populate_training_sample_shapes(objects, samples);
const std::vector<std::vector<dlib::vector<float,2> > > pixel_coordinates = randomly_sample_pixel_coordinates(initial_shape);
rnd.set_seed(random_seed());
unsigned long trees_fit_so_far = 0;
console_progress_indicator pbar(get_cascade_depth()*get_num_trees_per_cascade_level());
if (_verbose)
std::cout << "Fitting trees..." << std::endl;
std::vector<std::vector<impl::regression_tree> > forests(cascade_depth());
std::vector<std::vector<impl::regression_tree> > forests(get_cascade_depth());
// Now start doing the actual training by filling in the forests
for (unsigned long cascade = 0; cascade < cascade_depth(); ++cascade)
for (unsigned long cascade = 0; cascade < get_cascade_depth(); ++cascade)
{
// TODO, add some verbose option that prints here
// Each cascade uses a different set of pixels for its features. We compute
// their representations relative to the initial shape first.
std::vector<unsigned long> anchor_idx;
......@@ -457,11 +559,20 @@ namespace dlib
}
// Now start building the trees at this cascade level.
for (unsigned long i = 0; i < num_trees_per_cascade_level(); ++i)
for (unsigned long i = 0; i < get_num_trees_per_cascade_level(); ++i)
{
forests[cascade].push_back(make_regression_tree(samples, pixel_coordinates[cascade]));
if (_verbose)
{
++trees_fit_so_far;
pbar.print_status(trees_fit_so_far);
}
}
}
if (_verbose)
std::cout << "Training complete " << std::endl;
return shape_predictor(initial_shape, forests, pixel_coordinates);
}
......@@ -488,7 +599,7 @@ namespace dlib
/*!
CONVENTION
- feature_pixel_values.size() == feature_pool_size()
- feature_pixel_values.size() == get_feature_pool_size()
- feature_pixel_values[j] == the value of the j-th feature pool
pixel when you look it up relative to the shape in current_shape.
......@@ -527,7 +638,7 @@ namespace dlib
impl::regression_tree tree;
// walk the tree in breadth first order
const unsigned long num_split_nodes = static_cast<unsigned long>(std::pow(2.0, (double)tree_depth())-1);
const unsigned long num_split_nodes = static_cast<unsigned long>(std::pow(2.0, (double)get_tree_depth())-1);
std::vector<matrix<float,0,1> > sums(num_split_nodes*2+1);
for (unsigned long i = 0; i < samples.size(); ++i)
sums[0] += samples[i].target_shape - samples[i].current_shape;
......@@ -574,8 +685,8 @@ namespace dlib
double accept_prob;
do
{
feat.idx1 = rnd.get_random_32bit_number()%feature_pool_size();
feat.idx2 = rnd.get_random_32bit_number()%feature_pool_size();
feat.idx1 = rnd.get_random_32bit_number()%get_feature_pool_size();
feat.idx2 = rnd.get_random_32bit_number()%get_feature_pool_size();
const double dist = length(pixel_coordinates[feat.idx1]-pixel_coordinates[feat.idx2]);
accept_prob = std::exp(-dist/lambda);
}
......@@ -699,7 +810,7 @@ namespace dlib
sample.image_idx = i;
sample.rect = objects[i][j].get_rect();
sample.target_shape = object_to_shape(objects[i][j]);
for (unsigned long itr = 0; itr < oversampling_amount(); ++itr)
for (unsigned long itr = 0; itr < get_oversampling_amount(); ++itr)
samples.push_back(sample);
mean_shape += sample.target_shape;
++count;
......@@ -711,7 +822,7 @@ namespace dlib
// now go pick random initial shapes
for (unsigned long i = 0; i < samples.size(); ++i)
{
if ((i%oversampling_amount()) == 0)
if ((i%get_oversampling_amount()) == 0)
{
// The mean shape is what we really use as an initial shape so always
// include it in the training set as an example starting shape.
......@@ -742,13 +853,13 @@ namespace dlib
) const
/*!
ensures
- #pixel_coordinates.size() == feature_pool_size()
- #pixel_coordinates.size() == get_feature_pool_size()
- for all valid i:
- pixel_coordinates[i] == a point in the box defined by the min/max x/y arguments.
!*/
{
pixel_coordinates.resize(feature_pool_size());
for (unsigned long i = 0; i < feature_pool_size(); ++i)
pixel_coordinates.resize(get_feature_pool_size());
for (unsigned long i = 0; i < get_feature_pool_size(); ++i)
{
pixel_coordinates[i].x() = rnd.get_random_double()*(max_x-min_x) + min_x;
pixel_coordinates[i].y() = rnd.get_random_double()*(max_y-min_y) + min_y;
......@@ -769,15 +880,26 @@ namespace dlib
const double max_y = max(colm(temp,1))+padding;
std::vector<std::vector<dlib::vector<float,2> > > pixel_coordinates;
pixel_coordinates.resize(cascade_depth());
for (unsigned long i = 0; i < cascade_depth(); ++i)
pixel_coordinates.resize(get_cascade_depth());
for (unsigned long i = 0; i < get_cascade_depth(); ++i)
randomly_sample_pixel_coordinates(pixel_coordinates[i], min_x, min_y, max_x, max_y);
return pixel_coordinates;
}
mutable dlib::rand rnd;
unsigned long _cascade_depth;
unsigned long _tree_depth;
unsigned long _num_trees_per_cascade_level;
double _nu;
unsigned long _oversampling_amount;
unsigned long _feature_pool_size;
double _lambda;
unsigned long _num_test_splits;
unsigned long _feature_pool_region_padding;
bool _verbose;
};
// ----------------------------------------------------------------------------------------
......
......@@ -16,6 +16,18 @@ namespace dlib
class shape_predictor
{
/*!
WHAT THIS OBJECT REPRESENTS
This object is a tool that takes in an image region containing some object
and outputs a "shape" or set of point locations that define the pose of the
object. The classic example of this is human face pose prediction, where
you take an image of a human face as input and are expected to identify the
locations of important facial landmarks such as the corners of the mouth
and eyes, tip of the nose, and so forth.
To create useful instantiations of this object you need to use the
shape_predictor_trainer object defined below to train a shape_predictor
using a set of training images, each annotated with shapes you want to
predict.
!*/
public:
......@@ -23,13 +35,15 @@ namespace dlib
shape_predictor (
);
/*!
ensures
- #num_parts() == 0
!*/
unsigned long num_parts (
) const;
/*!
ensures
- returns the number of points in the shape
- returns the number of parts in the shapes predicted by this object.
!*/
template <typename image_type>
......@@ -42,10 +56,18 @@ namespace dlib
- image_type == an image object that implements the interface defined in
dlib/image_processing/generic_image.h
ensures
- runs the tree regressor on the detection rect inside img and returns a
full_object_detection DET such that:
- Runs the shape prediction algorithm on the part of the image contained in
the given bounding rectangle. So it will try and fit the shape model to
the contents of the given rectangle in the image. For example, if there
is a human face inside the rectangle and you use a face landmarking shape
model then this function will return the locations of the face landmarks
as the parts. So the return value is a full_object_detection DET such
that:
- DET.get_rect() == rect
- DET.num_parts() == num_parts()
- for all valid i:
- DET.part(i) == the location in img for the i-th part of the shape
predicted by this object.
!*/
};
......@@ -61,46 +83,208 @@ namespace dlib
class shape_predictor_trainer
{
/*!
This thing really only works with unsigned char or rgb_pixel images (since we assume the threshold
should be in the range [-128,128]).
WHAT THIS OBJECT REPRESENTS
This object is a tool for training shape_predictors based on annotated training
images. Its implementation uses the algorithm described in:
One Millisecond Face Alignment with an Ensemble of Regression Trees
by Vahid Kazemi and Josephine Sullivan, CVPR 2014
!*/
public:
unsigned long cascade_depth (
) const { return 10; }
shape_predictor_trainer (
)
{
_cascade_depth = 10;
_tree_depth = 2;
_num_trees_per_cascade_level = 500;
_nu = 0.1;
_oversampling_amount = 20;
_feature_pool_size = 400;
_lambda = 0.1;
_num_test_splits = 20;
_feature_pool_region_padding = 0;
_verbose = false;
}
unsigned long get_cascade_depth (
) const;
/*!
!*/
void set_cascade_depth (
unsigned long depth
);
/*!
requires
- depth > 0
ensures
- #get_cascade_depth() == depth
!*/
unsigned long get_tree_depth (
) const;
/*!
!*/
void set_tree_depth (
unsigned long depth
);
/*!
requires
- depth > 0
ensures
- #get_tree_depth() == depth
!*/
unsigned long tree_depth (
) const { return 2; }
unsigned long get_num_trees_per_cascade_level (
) const;
/*!
!*/
unsigned long num_trees_per_cascade_level (
) const { return 500; }
void set_num_trees_per_cascade_level (
unsigned long num
);
/*!
requires
- num > 0
ensures
- #get_num_trees_per_cascade_level() == num
!*/
double get_nu (
) const { return 0.1; } // the regularizer
) const;
/*!
!*/
void set_nu (
double nu
);
/*!
requires
- nu > 0
ensures
- #get_nu() == nu
!*/
std::string get_random_seed (
) const;
/*!
!*/
void set_random_seed (
const std::string& seed
);
/*!
ensures
- #get_random_seed() == seed
!*/
unsigned long get_oversampling_amount (
) const;
/*!
!*/
void set_oversampling_amount (
unsigned long amount
);
/*!
requires
- amount > 0
ensures
- #get_oversampling_amount() == amount
!*/
std::string random_seed (
) const { return "dlib rules"; }
unsigned long get_feature_pool_size (
) const;
/*!
!*/
unsigned long oversampling_amount (
) const { return 20; }
void set_feature_pool_size (
unsigned long size
);
/*!
requires
- size > 1
ensures
- #get_feature_pool_size() == size
!*/
// feature sampling parameters
unsigned long feature_pool_size (
) const { return 400; }// this must be > 1
double get_lambda (
) const { return 0.1; }
) const;
/*!
!*/
void set_lambda (
double lambda
);
/*!
requires
- lambda > 0
ensures
- #get_lambda() == lambda
!*/
unsigned long get_num_test_splits (
) const { return 20; }
) const;
/*!
!*/
void set_num_test_splits (
unsigned long num
);
/*!
requires
- num > 0
ensures
- #get_num_test_splits() == num
!*/
double get_feature_pool_region_padding (
) const { return 0; }
) const;
/*!
!*/
void set_feature_pool_region_padding (
double padding
);
/*!
ensures
- #get_feature_pool_region_padding() == padding
!*/
void be_verbose (
);
/*!
ensures
- This object will print status messages to standard out so that a
user can observe the progress of the algorithm.
!*/
void be_quiet (
);
/*!
ensures
- this object will not print anything to standard out
!*/
template <typename image_array>
shape_predictor train (
const image_array& images,
const std::vector<std::vector<full_object_detection> >& objects
) const;
/*!
requires
- images.size() == objects.size()
- images.size() > 0
ensures
- This object will try to learn to predict the locations of an object's parts
based on the object bounding box (i.e. full_object_detection::get_rect())
and the image pixels in that box. That is, we will try to learn a
shape_predictor, SP, such that:
SP(images[i], objects[i][j].get_rect()) == objects[i][j]
This learned SP object is then returned.
!*/
};
// ----------------------------------------------------------------------------------------
......@@ -134,16 +318,16 @@ namespace dlib
valid i and j we perform:
sp(images[i], objects[i][j].get_rect())
and compare the result with the truth part positions in objects[i][j]. We
then return the average distance between a predicted part location and its
true position. This value is then returned.
then return the average distance (measured in pixels) between a predicted
part location and its true position.
- if (scales.size() != 0) then
- Each time we compute the distance between a predicted part location and
its true location in objects[i][j] we divide the distance by
scales[i][j]. Therefore, if you want the reported error to be the
average pixel distance then give an empty scales vector, but if you want
the returned value to be something else like the average distance
normalized by some feature of the objects (e.g. the interocular distance)
then you an supply those normalizing values via scales.
normalized by some feature of each object (e.g. the interocular distance)
then you can supply those normalizing values via scales.
!*/
template <
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment