Finished the more complex metric learning example and added some example data.

fd132304 · Davis King · f4b3c7ee · fd132304 · fd132304 · fd132304
Commit fd132304 authored Dec 18, 2016 by Davis King
56 changed files
--- a/examples/dnn_metric_learning_on_images_ex.cpp
+++ b/examples/dnn_metric_learning_on_images_ex.cpp
+// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
+/*
+    This is an example illustrating the use of the deep learning tools from the
+    dlib C++ Library.  In it, we will show how to use the loss_metric layer to do
+    metric learning on images.  
+
+    The main reason you might want to use this kind of algorithm is because you
+    would like to use a k-nearest neighbor classifier or similar algorithm, but
+    you don't know a good way to calculate the distance between two things.  A
+    popular example would be face recognition.  There are a whole lot of papers
+    that train some kind of deep metric learning algorithm that embeds face
+    images in some vector space where images of the same person are close to each
+    other and images of different people are far apart.  Then in that vector
+    space it's very easy to do face recognition with some kind of k-nearest
+    neighbor classifier.  
+    
+    In this example we will use the ResNet-34 network from the dnn_imagenet_ex.cpp 
+    example to learn to map images into some vector space where pictures of
+    the same person are close and pictures of different people are far apart.  
+
+    You might want to read the simpler introduction to the deep metric learning
+    API, dnn_metric_learning_ex.cpp, before reading this example.  You should
+    also have read the examples that introduce the dlib DNN API before
+    continuing.  These are dnn_introduction_ex.cpp and dnn_introduction2_ex.cpp.
+
+*/

 #include <dlib/dnn.h>
 #include <dlib/image_io.h>
@@ -6,7 +32,30 @@
 using namespace dlib;
 using namespace std;

+// ----------------------------------------------------------------------------------------

+// We will need to create some functions for loading data.  This program will
+// expect to be given a directory structured as follows:
+//    top_level_directory/
+//        person1/
+//            image1.jpg
+//            image2.jpg
+//            image3.jpg
+//        person2/
+//            image4.jpg
+//            image5.jpg
+//            image6.jpg
+//        person3/
+//            image7.jpg
+//            image8.jpg
+//            image9.jpg
+//
+// The specific folder and image names don't matter, nor does the number of folders or
+// images.  What does matter is that there is a top level folder, which contains
+// subfolders, and each subfolder contains images of a single person.
+
+// This function spiders the top level directory and obtains a list of all the
+// image files.
 std::vector<std::vector<string>> load_objects_list (
    const string& dir 
 )
@@ -23,9 +72,16 @@ std::vector<std::vector<string>> load_objects_list (
    return objects;
 }

+// This function takes the output of load_objects_list() as input and randomly
+// selects images for training.  It should also be pointed out that it's really
+// important that each mini-batch contain multiple images of each person.  This
+// is because the metric learning algorithm needs to consider pairs of images
+// that should be close (i.e. images of the same person) as well as pairs of
+// images that should be far apart (i.e. images of different people) during each
+// training step.
 void load_mini_batch (
-    const size_t num_ids,
-    const size_t samples_per_id,
+    const size_t num_people,     // how many different people to include
+    const size_t samples_per_id, // how many images per person to select.
    dlib::rand& rnd,
    const std::vector<std::vector<string>>& objs,
    std::vector<matrix<rgb_pixel>>& images,
@@ -34,11 +90,18 @@ void load_mini_batch (
 {
    images.clear();
    labels.clear();
+    DLIB_CASSERT(num_people <= objs.size(), "The dataset doesn't have that many people in it.");

+    std::vector<bool> already_selected(objs.size(), false);
    matrix<rgb_pixel> image; 
-    for (size_t i = 0; i < num_ids; ++i)
+    for (size_t i = 0; i < num_people; ++i)
    {
-        const size_t id = rnd.get_random_32bit_number()%objs.size();
+        size_t id = rnd.get_random_32bit_number()%objs.size();
+        // don't pick a person we already added to the mini-batch
+        while(already_selected[id])
+            id = rnd.get_random_32bit_number()%objs.size();
+        already_selected[id] = true;
+
        for (size_t j = 0; j < samples_per_id; ++j)
        {
            const auto& obj = objs[id][rnd.get_random_32bit_number()%objs[id].size()];
@@ -65,9 +128,12 @@ void load_mini_batch (
    }
 }

-
 // ----------------------------------------------------------------------------------------

+// The next page of code defines the ResNet-34 network.  It's basically copied
+// and pasted from the dnn_imagenet_ex.cpp example, except we replaced the loss
+// layer with loss_metric.
+
 template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET>
 using residual = add_prev1<block<N,BN,1,tag1<SUBNET>>>;

@@ -83,7 +149,6 @@ template <int N, typename SUBNET> using ares      = relu<residual<block,N,affine
 template <int N, typename SUBNET> using res_down  = relu<residual_down<block,N,bn_con,SUBNET>>;
 template <int N, typename SUBNET> using ares_down = relu<residual_down<block,N,affine,SUBNET>>;

-
 // ----------------------------------------------------------------------------------------

 template <typename SUBNET> using level1 = res<512,res<512,res_down<512,SUBNET>>>;
@@ -125,8 +190,11 @@ int main(int argc, char** argv)
 {
    if (argc != 2)
    {
-        cout << "Give folder as input.  It should contain sub-folders of images and we will " << endl;
-        cout << "learn to distinguish these sub-folders with metric learning." << endl;
+        cout << "Give a folder as input.  It should contain sub-folders of images and we will " << endl;
+        cout << "learn to distinguish between these sub-folders with metric learning.  " << endl;
+        cout << "For example, you can run this program on the very small examples/johns dataset" << endl;
+        cout << "that comes with dlib by running this command:" << endl;
+        cout << "   ./dnn_metric_learning_on_images_ex johns" << endl;
        return 1;
    }

@@ -144,10 +212,16 @@ int main(int argc, char** argv)
    trainer.set_learning_rate(0.1);
    trainer.be_verbose();
    trainer.set_synchronization_file("face_metric_sync", std::chrono::minutes(5));
+    // I've set this to something really small to make the example terminate
+    // sooner.  But when you really want to train a good model you should set
+    // this to something like 8000 so training doesn't terminate too early.
    trainer.set_iterations_without_progress_threshold(300);

-    // It's important to feed the GPU fast enough to keep it occupied.  So here we create a
-    // bunch of threads that are responsible for creating mini-batches of training data.
+    // If you have a lot of data then it might not be reasonable to load it all
+    // into RAM.  So you will need to be sure you are decompressing your images
+    // and loading them fast enough to keep the GPU occupied.  I like to do this
+    // using the following coding pattern: create a bunch of threads that dump
+    // mini-batches into dlib::pipes.  
    dlib::pipe<std::vector<matrix<rgb_pixel>>> qimages(4);
    dlib::pipe<std::vector<unsigned long>> qlabels(4);
    auto data_loader = [&qimages, &qlabels, &objs](time_t seed)
@@ -159,7 +233,7 @@ int main(int argc, char** argv)
        {
            try
            {
-                load_mini_batch(15,15,rnd, objs, images, labels);
+                load_mini_batch(5, 5, rnd, objs, images, labels);
                qimages.enqueue(images);
                qlabels.enqueue(labels);
            }
@@ -170,6 +244,8 @@ int main(int argc, char** argv)
            }
        }
    };
+    // Run the data_loader from 5 threads.  You should set the number of threads
+    // relative to the number of CPU cores you have.
    std::thread data_loader1([data_loader](){ data_loader(1); });
    std::thread data_loader2([data_loader](){ data_loader(2); });
    std::thread data_loader3([data_loader](){ data_loader(3); });
@@ -186,7 +262,7 @@ int main(int argc, char** argv)
        trainer.train_one_step(images, labels);
    }

-    // wait for training threads to stop
+    // Wait for training threads to stop
    trainer.get_net();
    cout << "done training" << endl;

@@ -207,16 +283,16 @@ int main(int argc, char** argv)



-    // Now, just to show an example of how you would use the network, lets check how well
+    // Now, just to show an example of how you would use the network, let's check how well
    // it performs on the training data.
    dlib::rand rnd(time(0));
-    load_mini_batch(15,15,rnd, objs, images, labels);
+    load_mini_batch(5, 5, rnd, objs, images, labels);

    // Run all the images through the network to get their vector embeddings.
    std::vector<matrix<float,0,1>> embedded = net(images);

-    // Now, check if the embedding puts things with the same labels near each other and
-    // things with different labels far apart.
+    // Now, check if the embedding puts images with the same labels near each other and
+    // images with different labels far apart.
    int num_right = 0;
    int num_wrong = 0;
    for (size_t i = 0; i < embedded.size(); ++i)
@@ -225,7 +301,7 @@ int main(int argc, char** argv)
        {
            if (labels[i] == labels[j])
            {
-                // The loss_metric layer will cause things with the same label to be less
+                // The loss_metric layer will cause images with the same label to be less
                // than net.loss_details().get_distance_threshold() distance from each
                // other.  So we can use that distance value as our testing threshold.
                if (length(embedded[i]-embedded[j]) < net.loss_details().get_distance_threshold())

--- a/examples/johns/John_Salley/000179_02159509.jpg
+++ b/examples/johns/John_Salley/000179_02159509.jpg
--- a/examples/johns/John_Salley/000183_02159543.jpg
+++ b/examples/johns/John_Salley/000183_02159543.jpg
--- a/examples/johns/John_Salley/000186_02159346.jpg
+++ b/examples/johns/John_Salley/000186_02159346.jpg
--- a/examples/johns/John_Salley/000189_02159361.jpg
+++ b/examples/johns/John_Salley/000189_02159361.jpg
--- a/examples/johns/John_Salley/000190_02159501.jpg
+++ b/examples/johns/John_Salley/000190_02159501.jpg
--- a/examples/johns/John_Salley/000192_02159531.jpg
+++ b/examples/johns/John_Salley/000192_02159531.jpg
--- a/examples/johns/John_Salley/000194_02159572.jpg
+++ b/examples/johns/John_Salley/000194_02159572.jpg
--- a/examples/johns/John_Salley/000197_02159322.jpg
+++ b/examples/johns/John_Salley/000197_02159322.jpg
--- a/examples/johns/John_Salley/000197_02159525.jpg
+++ b/examples/johns/John_Salley/000197_02159525.jpg
--- a/examples/johns/John_Salley/000198_02159470.jpg
+++ b/examples/johns/John_Salley/000198_02159470.jpg
--- a/examples/johns/John_Salley/000200_02159354.jpg
+++ b/examples/johns/John_Salley/000200_02159354.jpg
--- a/examples/johns/John_Savage/000264_01099001.jpg
+++ b/examples/johns/John_Savage/000264_01099001.jpg
--- a/examples/johns/John_Savage/000274_01099061.jpg
+++ b/examples/johns/John_Savage/000274_01099061.jpg
--- a/examples/johns/John_Savage/000277_01099000.jpg
+++ b/examples/johns/John_Savage/000277_01099000.jpg
--- a/examples/johns/John_Savage/000289_01099139.jpg
+++ b/examples/johns/John_Savage/000289_01099139.jpg
--- a/examples/johns/John_Savage/000290_01099067.jpg
+++ b/examples/johns/John_Savage/000290_01099067.jpg
--- a/examples/johns/John_Savage/000290_01099090.jpg
+++ b/examples/johns/John_Savage/000290_01099090.jpg
--- a/examples/johns/John_Savage/000291_01099023.jpg
+++ b/examples/johns/John_Savage/000291_01099023.jpg
--- a/examples/johns/John_Savage/000291_01099214.jpg
+++ b/examples/johns/John_Savage/000291_01099214.jpg
--- a/examples/johns/John_Savage/000293_01099081.jpg
+++ b/examples/johns/John_Savage/000293_01099081.jpg
--- a/examples/johns/John_Savage/000296_01099007.jpg
+++ b/examples/johns/John_Savage/000296_01099007.jpg
--- a/examples/johns/John_Savage/000299_01099008.jpg
+++ b/examples/johns/John_Savage/000299_01099008.jpg
--- a/examples/johns/John_Schneider/000288_00925786.jpg
+++ b/examples/johns/John_Schneider/000288_00925786.jpg
--- a/examples/johns/John_Schneider/000302_00925785.jpg
+++ b/examples/johns/John_Schneider/000302_00925785.jpg
--- a/examples/johns/John_Schneider/000307_00925823.jpg
+++ b/examples/johns/John_Schneider/000307_00925823.jpg
--- a/examples/johns/John_Schneider/000325_00925954.jpg
+++ b/examples/johns/John_Schneider/000325_00925954.jpg
--- a/examples/johns/John_Schneider/000326_00925765.jpg
+++ b/examples/johns/John_Schneider/000326_00925765.jpg
--- a/examples/johns/John_Schneider/000326_00926089.jpg
+++ b/examples/johns/John_Schneider/000326_00926089.jpg
--- a/examples/johns/John_Schneider/000326_00926128.jpg
+++ b/examples/johns/John_Schneider/000326_00926128.jpg
--- a/examples/johns/John_Schneider/000326_00926139.jpg
+++ b/examples/johns/John_Schneider/000326_00926139.jpg
--- a/examples/johns/John_Schneider/000329_00925859.jpg
+++ b/examples/johns/John_Schneider/000329_00925859.jpg
--- a/examples/johns/John_Schneider/000329_00925963.jpg
+++ b/examples/johns/John_Schneider/000329_00925963.jpg
--- a/examples/johns/John_Schneider/000331_00926012.jpg
+++ b/examples/johns/John_Schneider/000331_00926012.jpg
--- a/examples/johns/John_Shimkus/000373_03228153.jpg
+++ b/examples/johns/John_Shimkus/000373_03228153.jpg
--- a/examples/johns/John_Shimkus/000375_03227651.jpg
+++ b/examples/johns/John_Shimkus/000375_03227651.jpg
--- a/examples/johns/John_Shimkus/000376_02340068.jpg
+++ b/examples/johns/John_Shimkus/000376_02340068.jpg
--- a/examples/johns/John_Shimkus/000378_02340151.jpg
+++ b/examples/johns/John_Shimkus/000378_02340151.jpg
--- a/examples/johns/John_Shimkus/000378_03227610.jpg
+++ b/examples/johns/John_Shimkus/000378_03227610.jpg
--- a/examples/johns/John_Shimkus/000383_03227939.jpg
+++ b/examples/johns/John_Shimkus/000383_03227939.jpg
--- a/examples/johns/John_Shimkus/000385_03227766.jpg
+++ b/examples/johns/John_Shimkus/000385_03227766.jpg
--- a/examples/johns/John_Shimkus/000388_03227773.jpg
+++ b/examples/johns/John_Shimkus/000388_03227773.jpg
--- a/examples/johns/John_Shimkus/000390_03227666.jpg
+++ b/examples/johns/John_Shimkus/000390_03227666.jpg
--- a/examples/johns/John_Shimkus/000394_02340150.jpg
+++ b/examples/johns/John_Shimkus/000394_02340150.jpg
--- a/examples/johns/John_Shimkus/000396_03227722.jpg
+++ b/examples/johns/John_Shimkus/000396_03227722.jpg
--- a/examples/johns/John_Simm/000288_00470387.jpg
+++ b/examples/johns/John_Simm/000288_00470387.jpg
--- a/examples/johns/John_Simm/000297_00470170.jpg
+++ b/examples/johns/John_Simm/000297_00470170.jpg
--- a/examples/johns/John_Simm/000300_00470148.jpg
+++ b/examples/johns/John_Simm/000300_00470148.jpg
--- a/examples/johns/John_Simm/000304_00470122.jpg
+++ b/examples/johns/John_Simm/000304_00470122.jpg
--- a/examples/johns/John_Simm/000305_00470162.jpg
+++ b/examples/johns/John_Simm/000305_00470162.jpg
--- a/examples/johns/John_Simm/000305_00470717.jpg
+++ b/examples/johns/John_Simm/000305_00470717.jpg
--- a/examples/johns/John_Simm/000306_00470222.jpg
+++ b/examples/johns/John_Simm/000306_00470222.jpg
--- a/examples/johns/John_Simm/000306_00470223.jpg
+++ b/examples/johns/John_Simm/000306_00470223.jpg
--- a/examples/johns/John_Simm/000309_00470287.jpg
+++ b/examples/johns/John_Simm/000309_00470287.jpg
--- a/examples/johns/John_Simm/000310_00470421.jpg
+++ b/examples/johns/John_Simm/000310_00470421.jpg
--- a/examples/johns/John_Simm/000310_00470511.jpg
+++ b/examples/johns/John_Simm/000310_00470511.jpg