Cleaned up the code a bit. Still more cleaning to do.

954612b6 · Davis King · b191400a · 954612b6 · 954612b6
Commit 954612b6 authored Aug 24, 2014 by Davis King
Expand all Show whitespace changes
Inline Side-by-side

Showing with 211 additions and 27 deletions

shape_predictor.h dlib/image_processing/shape_predictor.h +0 -0

shape_predictor_abstract.h dlib/image_processing/shape_predictor_abstract.h +211 -27

No files found.
--- a/dlib/image_processing/shape_predictor.h
+++ b/dlib/image_processing/shape_predictor.h
--- a/dlib/image_processing/shape_predictor_abstract.h
+++ b/dlib/image_processing/shape_predictor_abstract.h
@@ -16,6 +16,18 @@ namespace dlib
    class shape_predictor
    {
        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool that takes in an image region containing some object
+                and outputs a "shape" or set of point locations that define the pose of the
+                object.  The classic example of this is human face pose prediction, where
+                you take an image of a human face as input and are expected to identify the
+                locations of important facial landmarks such as the corners of the mouth
+                and eyes, tip of the nose, and so forth.
+
+                To create useful instantiations of this object you need to use the
+                shape_predictor_trainer object defined below to train a shape_predictor
+                using a set of training images, each annotated with shapes you want to
+                predict.
        !*/

    public:
@@ -23,13 +35,15 @@ namespace dlib
        shape_predictor (
        );
        /*!
+            ensures
+                - #num_parts() == 0
        !*/

        unsigned long num_parts (
        ) const;
        /*!
            ensures
-                - returns the number of points in the shape
+                - returns the number of parts in the shapes predicted by this object.
        !*/

        template <typename image_type>
@@ -42,10 +56,18 @@ namespace dlib
                - image_type == an image object that implements the interface defined in
                  dlib/image_processing/generic_image.h 
            ensures
-                - runs the tree regressor on the detection rect inside img and returns a 
-                  full_object_detection DET such that:
+                - Runs the shape prediction algorithm on the part of the image contained in
+                  the given bounding rectangle.  So it will try and fit the shape model to
+                  the contents of the given rectangle in the image.  For example, if there
+                  is a human face inside the rectangle and you use a face landmarking shape
+                  model then this function will return the locations of the face landmarks
+                  as the parts.  So the return value is a full_object_detection DET such
+                  that:
                    - DET.get_rect() == rect
                    - DET.num_parts() == num_parts()
+                    - for all valid i:
+                        - DET.part(i) == the location in img for the i-th part of the shape
+                          predicted by this object.
        !*/

    };
@@ -61,46 +83,208 @@ namespace dlib
    class shape_predictor_trainer
    {
        /*!
-            This thing really only works with unsigned char or rgb_pixel images (since we assume the threshold 
-            should be in the range [-128,128]).
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for training shape_predictors based on annotated training
+                images.  Its implementation uses the algorithm described in:
+                    One Millisecond Face Alignment with an Ensemble of Regression Trees
+                    by Vahid Kazemi and Josephine Sullivan, CVPR 2014
        !*/

    public:

-        unsigned long cascade_depth (
-        ) const { return 10; }
+        shape_predictor_trainer (
+        )
+        {
+            _cascade_depth = 10;
+            _tree_depth = 2;
+            _num_trees_per_cascade_level = 500;
+            _nu = 0.1;
+            _oversampling_amount = 20;
+            _feature_pool_size = 400;
+            _lambda = 0.1;
+            _num_test_splits = 20;
+            _feature_pool_region_padding = 0;
+            _verbose = false;
+        }
+
+        unsigned long get_cascade_depth (
+        ) const;
+        /*!
+        !*/
+
+        void set_cascade_depth (
+            unsigned long depth
+        );
+        /*!
+            requires
+                - depth > 0
+            ensures
+                - #get_cascade_depth() == depth
+        !*/
+
+        unsigned long get_tree_depth (
+        ) const; 
+        /*!
+        !*/
+
+        void set_tree_depth (
+            unsigned long depth
+        );
+        /*!
+            requires
+                - depth > 0
+            ensures
+                - #get_tree_depth() == depth
+        !*/

-        unsigned long tree_depth (
-        ) const { return 2; }
+        unsigned long get_num_trees_per_cascade_level (
+        ) const;
+        /*!
+        !*/

-        unsigned long num_trees_per_cascade_level (
-        ) const { return 500; }
+        void set_num_trees_per_cascade_level (
+            unsigned long num
+        );
+        /*!
+            requires
+                - num > 0
+            ensures
+                - #get_num_trees_per_cascade_level() == num
+        !*/

        double get_nu (
-        ) const { return 0.1; } // the regularizer 
+        ) const; 
+        /*!
+        !*/
+
+        void set_nu (
+            double nu
+        );
+        /*!
+            requires
+                - nu > 0
+            ensures
+                - #get_nu() == nu
+        !*/
+
+        std::string get_random_seed (
+        ) const;
+        /*!
+        !*/
+
+        void set_random_seed (
+            const std::string& seed
+        );
+        /*!
+            ensures
+                - #get_random_seed() == seed
+        !*/
+
+        unsigned long get_oversampling_amount (
+        ) const;
+        /*!
+        !*/
+
+        void set_oversampling_amount (
+            unsigned long amount
+        );
+        /*!
+            requires
+                - amount > 0
+            ensures
+                - #get_oversampling_amount() == amount
+        !*/

-        std::string random_seed (
-        ) const { return "dlib rules"; }
+        unsigned long get_feature_pool_size (
+        ) const;
+        /*!
+        !*/

-        unsigned long oversampling_amount (
-        ) const { return 20; }
+        void set_feature_pool_size (
+            unsigned long size
+        );
+        /*!
+            requires
+                - size > 1
+            ensures
+                - #get_feature_pool_size() == size
+        !*/

-        // feature sampling parameters
-        unsigned long feature_pool_size (
-        ) const { return 400; }// this must be > 1
        double get_lambda (
-        ) const { return 0.1; }
+        ) const;
+        /*!
+        !*/
+
+        void set_lambda (
+            double lambda
+        );
+        /*!
+            requires
+                - lambda > 0
+            ensures
+                - #get_lambda() == lambda
+        !*/
+
        unsigned long get_num_test_splits (
-        ) const { return 20; }
+        ) const;
+        /*!
+        !*/
+
+        void set_num_test_splits (
+            unsigned long num
+        );
+        /*!
+            requires
+                - num > 0
+            ensures
+                - #get_num_test_splits() == num
+        !*/
+
        double get_feature_pool_region_padding (
-        ) const { return 0; }
+        ) const; 
+        /*!
+        !*/
+
+        void set_feature_pool_region_padding (
+            double padding 
+        );
+        /*!
+            ensures
+                - #get_feature_pool_region_padding() == padding
+        !*/
+
+        void be_verbose (
+        );
+        /*!
+            ensures
+                - This object will print status messages to standard out so that a 
+                  user can observe the progress of the algorithm.
+        !*/
+
+        void be_quiet (
+        );
+        /*!
+            ensures
+                - this object will not print anything to standard out
+        !*/

        template <typename image_array>
        shape_predictor train (
            const image_array& images,
            const std::vector<std::vector<full_object_detection> >& objects
        ) const;
-
+        /*!
+            requires
+                - images.size() == objects.size()
+                - images.size() > 0
+            ensures
+                - This object will try to learn to predict the locations of an object's parts 
+                  based on the object bounding box (i.e.  full_object_detection::get_rect()) 
+                  and the image pixels in that box.  That is, we will try to learn a
+                  shape_predictor, SP, such that:
+                    SP(images[i], objects[i][j].get_rect()) == objects[i][j]
+                  This learned SP object is then returned.
+        !*/
    };

 // ----------------------------------------------------------------------------------------
@@ -134,16 +318,16 @@ namespace dlib
              valid i and j we perform:
                sp(images[i], objects[i][j].get_rect())
              and compare the result with the truth part positions in objects[i][j].  We
-              then return the average distance between a predicted part location and its
-              true position.  This value is then returned. 
+              then return the average distance (measured in pixels) between a predicted
+              part location and its true position.  
            - if (scales.size() != 0) then
                - Each time we compute the distance between a predicted part location and
                  its true location in objects[i][j] we divide the distance by
                  scales[i][j].  Therefore, if you want the reported error to be the
                  average pixel distance then give an empty scales vector, but if you want
                  the returned value to be something else like the average distance
-                  normalized by some feature of the objects (e.g. the interocular distance)
-                  then you an supply those normalizing values via scales.
+                  normalized by some feature of each object (e.g. the interocular distance)
+                  then you can supply those normalizing values via scales.
    !*/

    template <