Commit 954612b6 authored by Davis King's avatar Davis King

Cleaned up the code a bit. Still more cleaning to do.

parent b191400a
This diff is collapsed.
......@@ -16,6 +16,18 @@ namespace dlib
class shape_predictor
{
/*!
WHAT THIS OBJECT REPRESENTS
This object is a tool that takes in an image region containing some object
and outputs a "shape" or set of point locations that define the pose of the
object. The classic example of this is human face pose prediction, where
you take an image of a human face as input and are expected to identify the
locations of important facial landmarks such as the corners of the mouth
and eyes, tip of the nose, and so forth.
To create useful instantiations of this object you need to use the
shape_predictor_trainer object defined below to train a shape_predictor
using a set of training images, each annotated with shapes you want to
predict.
!*/
public:
......@@ -23,13 +35,15 @@ namespace dlib
shape_predictor (
);
/*!
ensures
- #num_parts() == 0
!*/
unsigned long num_parts (
) const;
/*!
ensures
- returns the number of points in the shape
- returns the number of parts in the shapes predicted by this object.
!*/
template <typename image_type>
......@@ -42,10 +56,18 @@ namespace dlib
- image_type == an image object that implements the interface defined in
dlib/image_processing/generic_image.h
ensures
- runs the tree regressor on the detection rect inside img and returns a
full_object_detection DET such that:
- Runs the shape prediction algorithm on the part of the image contained in
the given bounding rectangle. So it will try and fit the shape model to
the contents of the given rectangle in the image. For example, if there
is a human face inside the rectangle and you use a face landmarking shape
model then this function will return the locations of the face landmarks
as the parts. So the return value is a full_object_detection DET such
that:
- DET.get_rect() == rect
- DET.num_parts() == num_parts()
- for all valid i:
- DET.part(i) == the location in img for the i-th part of the shape
predicted by this object.
!*/
};
......@@ -61,46 +83,208 @@ namespace dlib
class shape_predictor_trainer
{
/*!
This thing really only works with unsigned char or rgb_pixel images (since we assume the threshold
should be in the range [-128,128]).
WHAT THIS OBJECT REPRESENTS
This object is a tool for training shape_predictors based on annotated training
images. Its implementation uses the algorithm described in:
One Millisecond Face Alignment with an Ensemble of Regression Trees
by Vahid Kazemi and Josephine Sullivan, CVPR 2014
!*/
public:
unsigned long cascade_depth (
) const { return 10; }
shape_predictor_trainer (
)
{
_cascade_depth = 10;
_tree_depth = 2;
_num_trees_per_cascade_level = 500;
_nu = 0.1;
_oversampling_amount = 20;
_feature_pool_size = 400;
_lambda = 0.1;
_num_test_splits = 20;
_feature_pool_region_padding = 0;
_verbose = false;
}
unsigned long get_cascade_depth (
) const;
/*!
!*/
void set_cascade_depth (
unsigned long depth
);
/*!
requires
- depth > 0
ensures
- #get_cascade_depth() == depth
!*/
unsigned long get_tree_depth (
) const;
/*!
!*/
void set_tree_depth (
unsigned long depth
);
/*!
requires
- depth > 0
ensures
- #get_tree_depth() == depth
!*/
unsigned long tree_depth (
) const { return 2; }
unsigned long get_num_trees_per_cascade_level (
) const;
/*!
!*/
unsigned long num_trees_per_cascade_level (
) const { return 500; }
void set_num_trees_per_cascade_level (
unsigned long num
);
/*!
requires
- num > 0
ensures
- #get_num_trees_per_cascade_level() == num
!*/
double get_nu (
) const { return 0.1; } // the regularizer
) const;
/*!
!*/
void set_nu (
double nu
);
/*!
requires
- nu > 0
ensures
- #get_nu() == nu
!*/
std::string get_random_seed (
) const;
/*!
!*/
void set_random_seed (
const std::string& seed
);
/*!
ensures
- #get_random_seed() == seed
!*/
unsigned long get_oversampling_amount (
) const;
/*!
!*/
void set_oversampling_amount (
unsigned long amount
);
/*!
requires
- amount > 0
ensures
- #get_oversampling_amount() == amount
!*/
std::string random_seed (
) const { return "dlib rules"; }
unsigned long get_feature_pool_size (
) const;
/*!
!*/
unsigned long oversampling_amount (
) const { return 20; }
void set_feature_pool_size (
unsigned long size
);
/*!
requires
- size > 1
ensures
- #get_feature_pool_size() == size
!*/
// feature sampling parameters
unsigned long feature_pool_size (
) const { return 400; }// this must be > 1
double get_lambda (
) const { return 0.1; }
) const;
/*!
!*/
void set_lambda (
double lambda
);
/*!
requires
- lambda > 0
ensures
- #get_lambda() == lambda
!*/
unsigned long get_num_test_splits (
) const { return 20; }
) const;
/*!
!*/
void set_num_test_splits (
unsigned long num
);
/*!
requires
- num > 0
ensures
- #get_num_test_splits() == num
!*/
double get_feature_pool_region_padding (
) const { return 0; }
) const;
/*!
!*/
void set_feature_pool_region_padding (
double padding
);
/*!
ensures
- #get_feature_pool_region_padding() == padding
!*/
void be_verbose (
);
/*!
ensures
- This object will print status messages to standard out so that a
user can observe the progress of the algorithm.
!*/
void be_quiet (
);
/*!
ensures
- this object will not print anything to standard out
!*/
template <typename image_array>
shape_predictor train (
const image_array& images,
const std::vector<std::vector<full_object_detection> >& objects
) const;
/*!
requires
- images.size() == objects.size()
- images.size() > 0
ensures
- This object will try to learn to predict the locations of an object's parts
based on the object bounding box (i.e. full_object_detection::get_rect())
and the image pixels in that box. That is, we will try to learn a
shape_predictor, SP, such that:
SP(images[i], objects[i][j].get_rect()) == objects[i][j]
This learned SP object is then returned.
!*/
};
// ----------------------------------------------------------------------------------------
......@@ -134,16 +318,16 @@ namespace dlib
valid i and j we perform:
sp(images[i], objects[i][j].get_rect())
and compare the result with the truth part positions in objects[i][j]. We
then return the average distance between a predicted part location and its
true position. This value is then returned.
then return the average distance (measured in pixels) between a predicted
part location and its true position.
- if (scales.size() != 0) then
- Each time we compute the distance between a predicted part location and
its true location in objects[i][j] we divide the distance by
scales[i][j]. Therefore, if you want the reported error to be the
average pixel distance then give an empty scales vector, but if you want
the returned value to be something else like the average distance
normalized by some feature of the objects (e.g. the interocular distance)
then you an supply those normalizing values via scales.
normalized by some feature of each object (e.g. the interocular distance)
then you can supply those normalizing values via scales.
!*/
template <
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment