Added comments

56f4e19a · Davis King · d53d49eb · 56f4e19a · 56f4e19a
Commit 56f4e19a authored Oct 02, 2016 by Davis King
Hide whitespace changes
Inline Side-by-side

Showing with 114 additions and 64 deletions

dnn_mmod_dog_hipsterizer.cpp examples/dnn_mmod_dog_hipsterizer.cpp +61 -32

dnn_mmod_face_detection_ex.cpp examples/dnn_mmod_face_detection_ex.cpp +53 -32

No files found.
--- a/examples/dnn_mmod_dog_hipsterizer.cpp
+++ b/examples/dnn_mmod_dog_hipsterizer.cpp
+// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
+/*
+    This example shows how to run a CNN based dog face detector using dlib.  The
+    example loads a pretrained model and uses it to find dog faces in images.
+    We also use the dlib::shape_predictor to find the location of the eyes and
+    nose and then draw glasses and a mustache onto each dog found :)
+    Users who are just learning about dlib's deep learning API should read the
+    dnn_introduction_ex.cpp and dnn_introduction2_ex.cpp examples to learn how
+    the API works.  For an introduction to the object detection method you
+    should read dnn_mmod_ex.cpp
+    TRAINING THE MODEL
+        Finally, users interested in how the dog face detector was trained should
+        read the dnn_mmod_ex.cpp example program.  It should be noted that the
+        dog face detector used in this example uses a bigger training dataset and
+        larger CNN architecture than what is shown in dnn_mmod_ex.cpp, but
+        otherwise training is the same.  If you compare the net_type statements
+        in this file and dnn_mmod_ex.cpp you will see that they are very similar
+        except that the number of parameters has been increased.
+        Additionally, the following training parameters were different during
+        training: The following lines in dnn_mmod_ex.cpp were changed from
+            mmod_options options(face_boxes_train, 40*40);
+            trainer.set_iterations_without_progress_threshold(300);
+        to the following when training the model used in this example:
+            mmod_options options(face_boxes_train, 80*80);
+            trainer.set_iterations_without_progress_threshold(8000);
+        Also, the random_cropper was left at its default settings,  So we didn't
+        call these functions:
+            cropper.set_chip_dims(200, 200);
+            cropper.set_min_object_height(0.2);
+        The training data used to create the model is also available at 
+        http://dlib.net/files/data/CU_dogs_fully_labeled.tar.gz
+        Lastly, the shape_predictor was trained with default settings except we
+        used the following non-default settings: cascade depth=20, tree
+        depth=5, padding=0.2
+*/
 #include <iostream>
@@ -10,30 +54,6 @@
 using namespace std;
 using namespace dlib;
-/*
-    Training differences with dnn_mmod_ex.cpp
-    A slightly bigger network architecture.  Also, to train you must replace the affine layers with bn_con layers.
-    mmod_options options(training_labels, 80*80);
-    instead of 
-    mmod_options options(face_boxes_train, 40*40);
-    trainer.set_iterations_without_progress_threshold(8000);
-    instead of 
-    trainer.set_iterations_without_progress_threshold(300);
-    random cropper was left at its default settings,  So we didn't call these functions:
-    cropper.set_chip_dims(200, 200);
-    cropper.set_min_object_height(0.2);
-    // shape predictor was trained with these settings: tree cascade depth=20, tree depth=5, padding=0.2
-*/
 // ----------------------------------------------------------------------------------------
 template <long num_filters, typename SUBNET> using con5d = con<num_filters,5,5,2,2,SUBNET>;
@@ -46,20 +66,19 @@ using net_type = loss_mmod<con<1,9,9,1,1,rcon5<rcon5<rcon5<downsampler<input_rgb
 // ----------------------------------------------------------------------------------------
 int main(int argc, char** argv) try
 {
    if (argc < 3)
    {
-        cout << "Give the path to the examples/faces directory as the argument to this" << endl;
+        cout << "Call this program like this:" << endl;
-        cout << "program.  For example, if you are in the examples folder then execute " << endl;
+        cout << "./dnn_mmod_dog_hipsterizer mmod_dog_hipsterizer.dat faces/dogs.jpg" << endl;
-        cout << "this program by running: " << endl;
+        cout << "\nYou can get the mmod_dog_hipsterizer.dat file from:\n";
-        cout << "   ./fhog_object_detector_ex faces" << endl;
+        cout << "http://dlib.net/files/mmod_dog_hipsterizer.dat.bz2" << endl;
-        cout << endl;
        return 0;
    }
+    // load the models as well as glasses and mustache.
    net_type net;
    shape_predictor sp;
    matrix<rgb_alpha_pixel> glasses, mustache;
@@ -67,11 +86,12 @@ int main(int argc, char** argv) try
    pyramid_up(glasses);
    pyramid_up(mustache);
-    // right eye (59,35),  left eye (176,36)
    image_window win1(glasses);
    image_window win2(mustache);
    image_window win_wireframe, win_hipster;
+    // Now process each image, find dogs, and hipsterize them by drawing glasses and a
+    // mustache on each dog :)
    for (int i = 2; i < argc; ++i)
    {
        matrix<rgb_pixel> img;
@@ -84,9 +104,12 @@ int main(int argc, char** argv) try
        auto dets = net(img);
        win_wireframe.clear_overlay();
        win_wireframe.set_image(img);
+        // We will also draw a wireframe on each dog's face so you can see where the
+        // shape_predictor is identifying face landmarks.
        std::vector<image_window::overlay_line> lines;
        for (auto&& d : dets)
        {
+            // get the landmarks for this dog's face
            auto shape = sp(img, d.rect);
            const rgb_pixel color(0,255,0);
@@ -97,9 +120,11 @@ int main(int argc, char** argv) try
            auto rear = shape.part(4);
            auto reye = shape.part(5);
+            // The locations of the left and right ends of the mustache.
            auto lmustache = 1.3*(leye-reye)/2 + nose;
            auto rmustache = 1.3*(reye-leye)/2 + nose;
+            // Draw the glasses onto the image.
            std::vector<point> from = {2*point(176,36), 2*point(59,35)}, to = {leye, reye};
            auto tform = find_similarity_transform(from, to);
            for (long r = 0; r < glasses.nr(); ++r)
@@ -111,6 +136,8 @@ int main(int argc, char** argv) try
                        assign_pixel(img(p.y(),p.x()), glasses(r,c));
                }
            }
+            // Draw the mustache onto the image right under the dog's nose.
            auto mrect = get_rect(mustache);
            from = {mrect.tl_corner(), mrect.tr_corner()};
            to = {rmustache, lmustache};
@@ -126,6 +153,7 @@ int main(int argc, char** argv) try
            }
+            // Record the lines needed for the face wire frame.
            lines.push_back(image_window::overlay_line(leye, nose, color));
            lines.push_back(image_window::overlay_line(nose, reye, color));
            lines.push_back(image_window::overlay_line(reye, leye, color));
@@ -138,6 +166,7 @@ int main(int argc, char** argv) try
        win_wireframe.add_overlay(lines);
        win_hipster.set_image(img);
+        cout << "Hit enter to process the next image." << endl;
        cin.get();
    }
 }

--- a/examples/dnn_mmod_face_detection_ex.cpp
+++ b/examples/dnn_mmod_face_detection_ex.cpp
+// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
+/*
+    This example shows how to run a CNN based face detector using dlib.  The
+    example loads a pretrained model and uses it to find faces in images.  The
+    CNN model is much more accurate than the HOG based model shown in the
+    face_detection_ex.cpp example, but takes much more computational power to
+    run, and is meant to be executed on a GPU to attain reasonable speed.  For
+    example, on a NVIDIA Titan X GPU, this example program processes images at
+    about the same speed as face_detection_ex.cpp.
+    Also, users who are just learning about dlib's deep learning API should read
+    the dnn_introduction_ex.cpp and dnn_introduction2_ex.cpp examples to learn
+    how the API works.  For an introduction to the object detection method you
+    should read dnn_mmod_ex.cpp
+    TRAINING THE MODEL
+        Finally, users interested in how the face detector was trained should
+        read the dnn_mmod_ex.cpp example program.  It should be noted that the
+        face detector used in this example uses a bigger training dataset and
+        larger CNN architecture than what is shown in dnn_mmod_ex.cpp, but
+        otherwise training is the same.  If you compare the net_type statements
+        in this file and dnn_mmod_ex.cpp you will see that they are very similar
+        except that the number of parameters has been increased.
+        Additionally, the following training parameters were different during
+        training: The following lines in dnn_mmod_ex.cpp were changed from
+            mmod_options options(face_boxes_train, 40*40);
+            trainer.set_iterations_without_progress_threshold(300);
+        to the following when training the model used in this example:
+            mmod_options options(face_boxes_train, 80*80);
+            trainer.set_iterations_without_progress_threshold(8000);
+        Also, the random_cropper was left at its default settings,  So we didn't
+        call these functions:
+            cropper.set_chip_dims(200, 200);
+            cropper.set_min_object_height(0.2);
+        The training data used to create the model is also available at 
+        http://dlib.net/files/data/dlib_face_detection_dataset-2016-09-30.tar.gz
+*/
 #include <iostream>
@@ -10,26 +52,6 @@
 using namespace std;
 using namespace dlib;
-/*
-    Training differences with dnn_mmod_ex.cpp
-    A slightly bigger network architecture.  Also, to train you must replace the affine layers with bn_con layers.
-    mmod_options options(training_labels, 80*80);
-    instead of 
-    mmod_options options(face_boxes_train, 40*40);
-    trainer.set_iterations_without_progress_threshold(8000);
-    instead of 
-    trainer.set_iterations_without_progress_threshold(300);
-    random cropper was left at its default settings,  So we didn't call these functions:
-    cropper.set_chip_dims(200, 200);
-    cropper.set_min_object_height(0.2);
-*/
 // ----------------------------------------------------------------------------------------
 template <long num_filters, typename SUBNET> using con5d = con<num_filters,5,5,2,2,SUBNET>;
@@ -45,13 +67,12 @@ using net_type = loss_mmod<con<1,9,9,1,1,rcon5<rcon5<rcon5<downsampler<input_rgb
 int main(int argc, char** argv) try
 {
-    if (argc < 3)
+    if (argc == 1)
    {
-        cout << "Give the path to the examples/faces directory as the argument to this" << endl;
+        cout << "Call this program like this:" << endl;
-        cout << "program.  For example, if you are in the examples folder then execute " << endl;
+        cout << "./dnn_mmod_face_detection_ex mmod_human_face_detector.dat faces/*.jpg" << endl;
-        cout << "this program by running: " << endl;
+        cout << "\nYou can get the mmod_human_face_detector.dat file from:\n";
-        cout << "   ./fhog_object_detector_ex faces" << endl;
+        cout << "http://dlib.net/files/mmod_human_face_detector.dat.bz2" << endl;
-        cout << endl;
        return 0;
    }
@@ -71,15 +92,17 @@ int main(int argc, char** argv) try
        pyramid_up(img);
        // Note that you can process a bunch of images in a std::vector at once and it runs
-        // faster, since this will form mini-batches of images and therefore get better
+        // much faster, since this will form mini-batches of images and therefore get
-        // parallelism out of your GPU hardware.  However, all the images must be the same
+        // better parallelism out of your GPU hardware.  However, all the images must be
-        // size.  To avoid this requirement on images being the same size we process them
+        // the same size.  To avoid this requirement on images being the same size we
-        // individually in this example.
+        // process them individually in this example.
        auto dets = net(img);
        win.clear_overlay();
        win.set_image(img);
        for (auto&& d : dets)
            win.add_overlay(d);
+        cout << "Hit enter to process the next image." << endl;
        cin.get();
    }
 }
@@ -89,5 +112,3 @@ catch(std::exception& e)
 }