Slightly improved example. Still needs a lot of work though.

f698db9c · Davis King · 6753460d · f698db9c
Commit f698db9c authored Jan 06, 2014 by Davis King
Hide whitespace changes
Inline Side-by-side

Showing with 112 additions and 47 deletions

fhog_object_detector_ex.cpp examples/fhog_object_detector_ex.cpp +112 -47

No files found.
--- a/examples/fhog_object_detector_ex.cpp
+++ b/examples/fhog_object_detector_ex.cpp
 // The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
 /*

-    This is an example illustrating the use of the dlib tools for
-    detecting objects in images.  In this example we will create
-    three simple images, each containing some white squares.  We
-    will then use the sliding window classifier tools to learn to 
-    detect these squares.
+    This example program shows how to find frontal human faces in an image.  In
+    particular, this program shows how you can take a list of images from the
+    command line and display each on the screen with red boxes overlaid on each
+    human face.
+
+    The examples/faces folder contains some jpg images of people.  You can run
+    this program on them and see the detections by executing the following:
+        ./face_detection_ex faces/*.jpg
+
+    
+    This face detector is made using the now classic Histogram of Oriented
+    Gradients (HOG) feature combined with a linear classifier, an image pyramid,
+    and sliding window detection scheme.  This type of object detector is fairly
+    general and capable of detecting many types of semi-rigid objects in
+    addition to human faces.  Therefore, if you are interested in making your
+    own object detectors then read the fhog_object_detector_ex.cpp example
+    program.  It shows how to use the machine learning tools used to create this
+    face detector. 
+
+
+    Finally, note that the face detector is fastest when compiled with at least
+    SSE2 instructions enabled.  So if you are using a PC with an Intel or AMD
+    chip then you should enable at least SSE2.  If you are using cmake to
+    compile this program you can enable them by using one of the following
+    commands when you create the build project:
+        cmake path_to_dclib/examples -DUSE_SSE2_INSTRUCTIONS=ON
+        cmake path_to_dclib/examples -DUSE_SSE4_INSTRUCTIONS=ON
+        cmake path_to_dclib/examples -DUSE_AVX_INSTRUCTIONS=ON
+    This will set the appropriate compiler options for GCC, clang, Visual
+    Studio, or the Intel compiler.  If you are using another compiler then you
+    need to consult your compiler's manual to determine how to enable these
+    instructions.  Note that AVX is the fastest but requires a CPU from at least
+    2011.  SSE4 is the next fastest and is supported by most current machines.  

 */

-#include <dlib/time_this.h>
-
-#include <dlib/image_processing/frontal_face_detector.h>

 #include <dlib/svm_threaded.h>
 #include <dlib/gui_widgets.h>
-#include <dlib/array.h>
-#include <dlib/array2d.h>
-#include <dlib/image_keypoint.h>
 #include <dlib/image_processing.h>
 #include <dlib/data_io.h>

@@ -30,79 +52,122 @@ using namespace dlib;

 // ----------------------------------------------------------------------------------------

-int main()
+int main(int argc, char** argv)
 {  
-    /*
-        NOTES
-        - explain the concepts of ignore boxes
-    */

    try
    {
+        if (argc != 2)
+        {
+            cout << "Give the path to the dclib/examples/faces directory as the argument to this" << endl;
+            cout << "program.  For example, if you are in the dclib/examples folder then execute " << endl;
+            cout << "this program by running: " << endl;
+            cout << "   ./fhog_object_detector_ex faces" << endl;
+            cout << endl;
+            return 0;
+        }
+        const std::string faces_directory = argv[1];
+
        dlib::array<array2d<unsigned char> > images, images_test;
        std::vector<std::vector<rectangle> > object_locations, object_locations_test;

-        load_image_dataset(images, object_locations, "../faces/training.xml");
-        upsample_image_dataset<pyramid_down<2> >(images, object_locations);
-
-        load_image_dataset(images_test, object_locations_test, "../faces/testing.xml");
+        /*
+            These xml files are created by the imglab tool.
+            To create this annotated data you will need to use the imglab tool 
+            included with dlib.  It is located in the tools/imglab folder and can be compiled
+            using the following commands.  
+                cd tools/imglab
+                mkdir build
+                cd build
+                cmake ..
+                cmake --build . --config Release
+            Note that you may need to install CMake (www.cmake.org) for this to work.  
+
+            Next, lets assume you have a folder of images called /tmp/images.  These images 
+            should contain examples of the objects you want to learn to detect.  You will 
+            use the imglab tool to label these objects.  Do this by typing the following
+                ./imglab -c mydataset.xml /tmp/images
+            This will create a file called mydataset.xml which simply lists the images in 
+            /tmp/images.  To annotate them run
+                ./imglab mydataset.xml
+            A window will appear showing all the images.  You can use the up and down arrow 
+            keys to cycle though the images and the mouse to label objects.  In particular, 
+            holding the shift key, left clicking, and dragging the mouse will allow you to 
+            draw boxes around the objects you wish to detect.  So next, label all the objects 
+            with boxes.  Note that it is important to label all the objects since any object 
+            not labeled is implicitly assumed to be not an object we should detect.
+
+            Once you finish labeling objects go to the file menu, click save, and then close 
+            the program. This will save the object boxes back to mydataset.xml.  You can verify 
+            this by opening the tool again with
+                ./imglab mydataset.xml
+            and observing that the boxes are present.
+        */
+        load_image_dataset(images, object_locations, faces_directory+"/training.xml");
+        load_image_dataset(images_test, object_locations_test, faces_directory+"/testing.xml");
+        upsample_image_dataset<pyramid_down<2> >(images,      object_locations);
        upsample_image_dataset<pyramid_down<2> >(images_test, object_locations_test);
-
-
        add_image_left_right_flips(images, object_locations);
-
        cout << "num training images: " << images.size() << endl;
        cout << "num testing images:  " << images_test.size() << endl;

-
        typedef scan_fhog_pyramid<pyramid_down<6> > image_scanner_type; 
        image_scanner_type scanner;
-
-        scanner.set_detection_window_size(80, 80); // faces
-        //scanner.set_nuclear_norm_regularization_strength(1.0);
-
+        scanner.set_detection_window_size(80, 80); 
        structural_object_detection_trainer<image_scanner_type> trainer(scanner);
-        trainer.set_num_threads(6); // Set this to the number of processing cores on your machine. 
+        trainer.set_num_threads(4); // Set this to the number of processing cores on your machine. 
        trainer.set_c(1);
-        //trainer.set_c(10);
        trainer.be_verbose();
+        // stop when the risk gap is less than 0.01
        trainer.set_epsilon(0.01);

-        // TODO, talk about this option
+        // TODO, talk about this option. 
        //remove_unobtainable_rectangles(trainer, images, object_locations);

        object_detector<image_scanner_type> detector = trainer.train(images, object_locations);
-        cout << "num filters 0.0:  "<< num_separable_filters(detector) << endl;

-        cout << "training results 0.0: " << test_object_detection_function(detector, images, object_locations) << endl;
-        cout << "testing results 0.0:  " << test_object_detection_function(detector, images_test, object_locations_test) << endl;
-
-        detector = threshold_filter_singular_values(detector,0.01);
-        cout << "num filters 0.01: "<< num_separable_filters(detector) << endl;
-        cout << "testing results 0.01: " << test_object_detection_function(detector, images_test, object_locations_test) << endl;
-
-        detector = threshold_filter_singular_values(detector,0.1);
-        cout << "num filters 0.1: "<< num_separable_filters(detector) << endl;
-        cout << "testing results 0.1: " << test_object_detection_function(detector, images_test, object_locations_test) << endl;
+        // prints the precision, recall, and average precision 
+        cout << "training results: " << test_object_detection_function(detector, images, object_locations) << endl;
+        cout << "testing results:  " << test_object_detection_function(detector, images_test, object_locations_test) << endl;


+        image_window hogwin(draw_fhog(detector), "Learned fHOG filter");

-        image_window win, hogwin(draw_fhog(detector));
+        image_window win; 
        for (unsigned long i = 0; i < images_test.size(); ++i)
        {
-            std::vector<rectangle> dets;
-            TIME_THIS(
-            dets = detector(images_test[i]);
-            );
+            std::vector<rectangle> dets = detector(images_test[i]);
+            // Now we show the image on the screen and the face detections as
+            // red overlay boxes.
            win.clear_overlay();
            win.set_image(images_test[i]);
            win.add_overlay(dets, rgb_pixel(255,0,0));
+
+            cout << "Hit enter to process the next image..." << endl;
            cin.get();
        }


        ofstream fout("face_detector.svm", ios::binary);
        serialize(detector, fout);
+        fout.close();
+
+        ifstream fin("face_detector.svm", ios::binary);
+        object_detector<image_scanner_type> detector2;
+        deserialize(detector2, fin);
+
+
+
+        /*
+            Advanced features...
+            - explain the concepts of ignore boxes
+            - talk about putting multiple detectors inside a single object_detector object.  
+        */
+
+        // talk about low nuclear norm stuff
+        //scanner.set_nuclear_norm_regularization_strength(1.0);
+        detector = threshold_filter_singular_values(detector,0.1);
+        cout << "num filters 0.0:  "<< num_separable_filters(detector) << endl;

    }
    catch (exception& e)