Commit fd132304 authored by Davis King's avatar Davis King

Finished the more complex metric learning example and added some example data.

parent f4b3c7ee
// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
/*
This is an example illustrating the use of the deep learning tools from the
dlib C++ Library. In it, we will show how to use the loss_metric layer to do
metric learning on images.
The main reason you might want to use this kind of algorithm is because you
would like to use a k-nearest neighbor classifier or similar algorithm, but
you don't know a good way to calculate the distance between two things. A
popular example would be face recognition. There are a whole lot of papers
that train some kind of deep metric learning algorithm that embeds face
images in some vector space where images of the same person are close to each
other and images of different people are far apart. Then in that vector
space it's very easy to do face recognition with some kind of k-nearest
neighbor classifier.
In this example we will use the ResNet-34 network from the dnn_imagenet_ex.cpp
example to learn to map images into some vector space where pictures of
the same person are close and pictures of different people are far apart.
You might want to read the simpler introduction to the deep metric learning
API, dnn_metric_learning_ex.cpp, before reading this example. You should
also have read the examples that introduce the dlib DNN API before
continuing. These are dnn_introduction_ex.cpp and dnn_introduction2_ex.cpp.
*/
#include <dlib/dnn.h>
#include <dlib/image_io.h>
......@@ -6,7 +32,30 @@
using namespace dlib;
using namespace std;
// ----------------------------------------------------------------------------------------
// We will need to create some functions for loading data. This program will
// expect to be given a directory structured as follows:
// top_level_directory/
// person1/
// image1.jpg
// image2.jpg
// image3.jpg
// person2/
// image4.jpg
// image5.jpg
// image6.jpg
// person3/
// image7.jpg
// image8.jpg
// image9.jpg
//
// The specific folder and image names don't matter, nor does the number of folders or
// images. What does matter is that there is a top level folder, which contains
// subfolders, and each subfolder contains images of a single person.
// This function spiders the top level directory and obtains a list of all the
// image files.
std::vector<std::vector<string>> load_objects_list (
const string& dir
)
......@@ -23,9 +72,16 @@ std::vector<std::vector<string>> load_objects_list (
return objects;
}
// This function takes the output of load_objects_list() as input and randomly
// selects images for training. It should also be pointed out that it's really
// important that each mini-batch contain multiple images of each person. This
// is because the metric learning algorithm needs to consider pairs of images
// that should be close (i.e. images of the same person) as well as pairs of
// images that should be far apart (i.e. images of different people) during each
// training step.
void load_mini_batch (
const size_t num_ids,
const size_t samples_per_id,
const size_t num_people, // how many different people to include
const size_t samples_per_id, // how many images per person to select.
dlib::rand& rnd,
const std::vector<std::vector<string>>& objs,
std::vector<matrix<rgb_pixel>>& images,
......@@ -34,11 +90,18 @@ void load_mini_batch (
{
images.clear();
labels.clear();
DLIB_CASSERT(num_people <= objs.size(), "The dataset doesn't have that many people in it.");
std::vector<bool> already_selected(objs.size(), false);
matrix<rgb_pixel> image;
for (size_t i = 0; i < num_ids; ++i)
for (size_t i = 0; i < num_people; ++i)
{
const size_t id = rnd.get_random_32bit_number()%objs.size();
size_t id = rnd.get_random_32bit_number()%objs.size();
// don't pick a person we already added to the mini-batch
while(already_selected[id])
id = rnd.get_random_32bit_number()%objs.size();
already_selected[id] = true;
for (size_t j = 0; j < samples_per_id; ++j)
{
const auto& obj = objs[id][rnd.get_random_32bit_number()%objs[id].size()];
......@@ -65,9 +128,12 @@ void load_mini_batch (
}
}
// ----------------------------------------------------------------------------------------
// The next page of code defines the ResNet-34 network. It's basically copied
// and pasted from the dnn_imagenet_ex.cpp example, except we replaced the loss
// layer with loss_metric.
template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET>
using residual = add_prev1<block<N,BN,1,tag1<SUBNET>>>;
......@@ -83,7 +149,6 @@ template <int N, typename SUBNET> using ares = relu<residual<block,N,affine
template <int N, typename SUBNET> using res_down = relu<residual_down<block,N,bn_con,SUBNET>>;
template <int N, typename SUBNET> using ares_down = relu<residual_down<block,N,affine,SUBNET>>;
// ----------------------------------------------------------------------------------------
template <typename SUBNET> using level1 = res<512,res<512,res_down<512,SUBNET>>>;
......@@ -125,8 +190,11 @@ int main(int argc, char** argv)
{
if (argc != 2)
{
cout << "Give folder as input. It should contain sub-folders of images and we will " << endl;
cout << "learn to distinguish these sub-folders with metric learning." << endl;
cout << "Give a folder as input. It should contain sub-folders of images and we will " << endl;
cout << "learn to distinguish between these sub-folders with metric learning. " << endl;
cout << "For example, you can run this program on the very small examples/johns dataset" << endl;
cout << "that comes with dlib by running this command:" << endl;
cout << " ./dnn_metric_learning_on_images_ex johns" << endl;
return 1;
}
......@@ -144,10 +212,16 @@ int main(int argc, char** argv)
trainer.set_learning_rate(0.1);
trainer.be_verbose();
trainer.set_synchronization_file("face_metric_sync", std::chrono::minutes(5));
// I've set this to something really small to make the example terminate
// sooner. But when you really want to train a good model you should set
// this to something like 8000 so training doesn't terminate too early.
trainer.set_iterations_without_progress_threshold(300);
// It's important to feed the GPU fast enough to keep it occupied. So here we create a
// bunch of threads that are responsible for creating mini-batches of training data.
// If you have a lot of data then it might not be reasonable to load it all
// into RAM. So you will need to be sure you are decompressing your images
// and loading them fast enough to keep the GPU occupied. I like to do this
// using the following coding pattern: create a bunch of threads that dump
// mini-batches into dlib::pipes.
dlib::pipe<std::vector<matrix<rgb_pixel>>> qimages(4);
dlib::pipe<std::vector<unsigned long>> qlabels(4);
auto data_loader = [&qimages, &qlabels, &objs](time_t seed)
......@@ -159,7 +233,7 @@ int main(int argc, char** argv)
{
try
{
load_mini_batch(15,15,rnd, objs, images, labels);
load_mini_batch(5, 5, rnd, objs, images, labels);
qimages.enqueue(images);
qlabels.enqueue(labels);
}
......@@ -170,6 +244,8 @@ int main(int argc, char** argv)
}
}
};
// Run the data_loader from 5 threads. You should set the number of threads
// relative to the number of CPU cores you have.
std::thread data_loader1([data_loader](){ data_loader(1); });
std::thread data_loader2([data_loader](){ data_loader(2); });
std::thread data_loader3([data_loader](){ data_loader(3); });
......@@ -186,7 +262,7 @@ int main(int argc, char** argv)
trainer.train_one_step(images, labels);
}
// wait for training threads to stop
// Wait for training threads to stop
trainer.get_net();
cout << "done training" << endl;
......@@ -207,16 +283,16 @@ int main(int argc, char** argv)
// Now, just to show an example of how you would use the network, lets check how well
// Now, just to show an example of how you would use the network, let's check how well
// it performs on the training data.
dlib::rand rnd(time(0));
load_mini_batch(15,15,rnd, objs, images, labels);
load_mini_batch(5, 5, rnd, objs, images, labels);
// Run all the images through the network to get their vector embeddings.
std::vector<matrix<float,0,1>> embedded = net(images);
// Now, check if the embedding puts things with the same labels near each other and
// things with different labels far apart.
// Now, check if the embedding puts images with the same labels near each other and
// images with different labels far apart.
int num_right = 0;
int num_wrong = 0;
for (size_t i = 0; i < embedded.size(); ++i)
......@@ -225,7 +301,7 @@ int main(int argc, char** argv)
{
if (labels[i] == labels[j])
{
// The loss_metric layer will cause things with the same label to be less
// The loss_metric layer will cause images with the same label to be less
// than net.loss_details().get_distance_threshold() distance from each
// other. So we can use that distance value as our testing threshold.
if (length(embedded[i]-embedded[j]) < net.loss_details().get_distance_threshold())
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment