Commit 1ab34825 authored by Davis King's avatar Davis King

Clarified a few comments and simplified the serialization code a bit.

Also just cleaned up a few minor details.
parent 773fe59a
......@@ -59,16 +59,20 @@ for f in glob.glob(os.path.join(faces_folder_path, "*.jpg")):
# Ask the detector to find the bounding boxes of each face. The 1 in the
# second argument indicates that we should upsample the image 1 time. This
# will make everything bigger and allow us to detect more faces.
dets = detector(img, 1)
print("Number of faces detected: {}".format(len(dets)))
for k, d in enumerate(dets):
print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
k, d.left(),, d.right(), d.bottom()))
shapes = predictor(img, d)
print("Part 0: {}, Part 1: {} ...".format(shapes.part(0),
# Add all facial landmarks one at a time
# Get the landmarks/parts for the face in box d.
shape = predictor(img, d)
print("Part 0: {}, Part 1: {} ...".format(shape.part(0),
# Draw the face landmarks on the screen.
raw_input("Hit enter to continue")
......@@ -2,9 +2,8 @@
# The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
# This simple example shows how to call dlib's optimal linear assignment
# problem solver.
# It is an implementation of the famous Hungarian algorithm and is quite fast,
# operating in O(N^3) time.
# problem solver. It is an implementation of the famous Hungarian algorithm
# and is quite fast, operating in O(N^3) time.
# Dlib comes with a compiled python interface for python 2.7 on MS Windows. If
......@@ -83,45 +83,47 @@ def print_segment(sentence, names):
# Now let's make some training data. Each example is a sentence as well as a
# set of ranges which indicate the locations of any names.
names = dlib.ranges() # make an array of dlib.range objects.
segments = dlib.rangess() # make an array of arrays of dlib.range objects.
sentences = ["The other day I saw a man named Jim Smith",
"Davis King is the main author of the dlib Library",
"Bob Jones is a name and so is George Clinton",
"My dog is named Bob Barker",
"ABC is an acronym but John James Smith is a name",
"No names in this sentence at all"]
# set of ranges which indicate the locations of any names.
names = dlib.ranges() # make an array of dlib.range objects.
segments = dlib.rangess() # make an array of arrays of dlib.range objects.
sentences = []
sentences.append("The other day I saw a man named Jim Smith")
# We want to detect person names. So we note that the name is located within
# the range [8, 10). Note that we use half open ranges to identify segments.
# So in this case, the segment identifies the string "Jim Smith".
# So in this case, the segment identifies the string "Jim Smith".
names.append(dlib.range(8, 10))
# make names empty for use again below
names.clear() # make names empty for use again below
sentences.append("Davis King is the main author of the dlib Library")
names.append(dlib.range(0, 2))
sentences.append("Bob Jones is a name and so is George Clinton")
names.append(dlib.range(0, 2))
names.append(dlib.range(8, 10))
sentences.append("My dog is named Bob Barker")
names.append(dlib.range(4, 6))
sentences.append("ABC is an acronym but John James Smith is a name")
names.append(dlib.range(5, 8))
sentences.append("No names in this sentence at all")
# Now before we can pass these training sentences to the dlib tools we need to
# convert them into arrays of vectors as discussed above. We can use either a
# sparse or dense representation depending on our needs. In this example, we
......@@ -36,6 +36,7 @@ if len(sys.argv) != 2:
faces_folder = sys.argv[1]
# Now let's do the training. The train_simple_object_detector() function has a
# bunch of options, all of which come with reasonable default values. The next
# few lines goes over some of these options.
......@@ -55,6 +56,9 @@ options.C = 5
options.num_threads = 4
options.be_verbose = True
training_xml_path = os.path.join(faces_folder, "training.xml")
testing_xml_path = os.path.join(faces_folder, "testing.xml")
# This function does the actual training. It will save the final detector to
# detector.svm. The input is an XML file that lists the images in the training
# dataset and also contains the positions of the face boxes. To create your
......@@ -63,11 +67,10 @@ options.be_verbose = True
# images with boxes. To see how to use it read the tools/imglab/README.txt
# file. But for this example, we just use the training.xml file included with
# dlib.
training_xml_path = os.path.join(faces_folder, "training.xml")
testing_xml_path = os.path.join(faces_folder, "testing.xml")
dlib.train_simple_object_detector(training_xml_path, "detector.svm", options)
# Now that we have a face detector we can test it. The first statement tests
# it on the training data. It will print(the precision, recall, and then)
# average precision.
......@@ -80,6 +83,10 @@ print("Training accuracy: {}".format(
print("Testing accuracy: {}".format(
dlib.test_simple_object_detector(testing_xml_path, "detector.svm")))
# Now let's use the detector as you would in a normal application. First we
# will load it from disk.
detector = dlib.simple_object_detector("detector.svm")
......@@ -106,6 +113,12 @@ for f in glob.glob(os.path.join(faces_folder, "*.jpg")):
raw_input("Hit enter to continue")
# Finally, note that you don't have to use the XML based input to
# train_simple_object_detector(). If you have already loaded your training
# images and bounding boxes for the objects then you can call it as shown
......@@ -126,10 +139,10 @@ boxes_img2 = ([dlib.rectangle(left=154, top=46, right=228, bottom=121),
boxes = [boxes_img1, boxes_img2]
detector2 = dlib.train_simple_object_detector(images, boxes, options)
# We could save this detector by uncommenting the following
# We could save this detector to disk by uncommenting the following.'detector2.svm')
# Now let's load the trained detector and look at its HOG filter!
# Now let's look at its HOG filter!
raw_input("Hit enter to continue")
......@@ -8,7 +8,7 @@
# In particular, we will train a face landmarking model based on a small
# dataset and then evaluate it. If you want to visualize the output of the
# trained model on some images then you can run the
# example program with sp.dat as the input
# example program with predictor.dat as the input
# model.
# It should also be noted that this kind of model, while often used for face
......@@ -49,7 +49,7 @@ options = dlib.shape_predictor_training_options()
# Now make the object responsible for training the model.
# This algorithm has a bunch of parameters you can mess with. The
# documentation for the shape_predictor_trainer explains all of them.
# You should also read Kazemi paper which explains all the parameters
# You should also read Kazemi's paper which explains all the parameters
# in great detail. However, here I'm just setting three of them
# differently than their default values. I'm doing this because we
# have a very small dataset. In particular, setting the oversampling
......@@ -63,33 +63,35 @@ = 0.05
options.tree_depth = 2
options.be_verbose = True
# This function does the actual training. It will save the final predictor to
# predictor.dat. The input is an XML file that lists the images in the training
# dataset and also contains the positions of the face parts.
# dlib.train_shape_predictor() does the actual training. It will save the
# final predictor to predictor.dat. The input is an XML file that lists the
# images in the training dataset and also contains the positions of the face
# parts.
training_xml_path = os.path.join(faces_folder, "training_with_face_landmarks.xml")
testing_xml_path = os.path.join(faces_folder, "testing_with_face_landmarks.xml")
dlib.train_shape_predictor(training_xml_path, "predictor.dat", options)
# Now that we have a facial landmark predictor we can test it. The first
# statement tests it on the training data. It will print the mean average error
print("") # Print blank line to create gap from previous output
print("Training accuracy: {}".format(
# Now that we have a model we can test it. dlib.test_shape_predictor()
# measures the average distance between a face landmark output by the
# shape_predictor and where it should be according to the truth data.
print("\nTraining accuracy: {}".format(
dlib.test_shape_predictor(training_xml_path, "predictor.dat")))
# However, to get an idea if it really worked without overfitting we need to
# run it on images it wasn't trained on. The next line does this. Happily, we
# see that the object detector works perfectly on the testing images.
# The real test is to see how well it does on data it wasn't trained on. We
# trained it on a very small dataset so the accuracy is not extremely high, but
# it's still doing quite good. Moreover, if you train it on one of the large
# face landmarking datasets you will obtain state-of-the-art results, as shown
# in the Kazemi paper.
testing_xml_path = os.path.join(faces_folder, "testing_with_face_landmarks.xml")
print("Testing accuracy: {}".format(
dlib.test_shape_predictor(testing_xml_path, "predictor.dat")))
# Now let's use the detector as you would in a normal application. First we
# will load it from disk. We also need to load a face detector to provide the
# initial estimate of the facial location
detector = dlib.get_frontal_face_detector()
# Now let's it as you would in a normal application. First we will load it
# from disk. We also need to load a face detector to provide the initial
# estimate of the facial location.
predictor = dlib.shape_predictor("predictor.dat")
detector = dlib.get_frontal_face_detector()
# Now let's run the detector and predictor over the images in the faces folder
# and display the results.
# Now let's run the detector and shape_predictor over the images in the faces
# folder and display the results.
print("Showing detections and predictions on the images in the faces folder...")
win = dlib.image_window()
for f in glob.glob(os.path.join(faces_folder, "*.jpg")):
......@@ -99,21 +101,21 @@ for f in glob.glob(os.path.join(faces_folder, "*.jpg")):
# Ask the detector to find the bounding boxes of each face. The 1 in the
# second argument indicates that we should upsample the image 1 time. This
# will make everything bigger and allow us to detect more faces.
dets = detector(img, 1)
print("Number of faces detected: {}".format(len(dets)))
for k, d in enumerate(dets):
print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
k, d.left(),, d.right(), d.bottom()))
shapes = predictor(img, d)
print("Part 0: {}, Part 1: {} ...".format(shapes.part(0),
# Add all facial landmarks one at a time
# Get the landmarks/parts for the face in box d.
shape = predictor(img, d)
print("Part 0: {}, Part 1: {} ...".format(shape.part(0),
# Draw the face landmarks on the screen.
raw_input("Hit enter to continue")
# Finally, note that you don't have to use the XML based input to
# train_shape_predictor(). If you have already loaded your training
# images and fll_object_detections for the objects then you can call it with
# the existing objects.
......@@ -51,9 +51,7 @@ void add_overlay_rect (
const rgb_pixel& color
std::vector<rectangle> rects;
win.add_overlay(rects, color);
win.add_overlay(rect, color);
void add_overlay_parts (
......@@ -62,9 +60,7 @@ void add_overlay_parts (
const rgb_pixel& color
std::vector<full_object_detection> detections;
win.add_overlay(render_face_detections(detections, color));
win.add_overlay(render_face_detections(detection, color));
boost::shared_ptr<image_window> make_image_window_from_image(object img)
......@@ -257,8 +257,9 @@ ensures \n\
"This object represents a sliding window histogram-of-oriented-gradients based object detector.")
.def("__init__", make_constructor(&load_object_from_file<type>),
"Loads a simple_object_detector from a file that contains the output of the \n\
train_simple_object_detector() routine.")
"Loads an object detector from a file that contains the output of the \n\
train_simple_object_detector() routine or a serialized C++ object of type\n\
.def("__call__", run_detector_with_upscale, (arg("image"), arg("upsample_num_times")=0),
"requires \n\
- image is a numpy ndarray containing either an 8bit grayscale or RGB \n\
......@@ -39,9 +39,10 @@ namespace dlib
inline void save_simple_object_detector(const simple_object_detector& detector, const std::string& detector_output_filename)
std::ofstream fout(detector_output_filename.c_str(), std::ios::binary);
int version = 1;
serialize(detector, fout);
serialize(version, fout);
// Don't need to save version of upsampling amount because want to write out the
// object detector just like the C++ code that serializes an object_detector would.
// We also don't know the upsampling amount in this case anyway.
......@@ -38,9 +38,7 @@ full_object_detection run_predictor (
void save_shape_predictor(const shape_predictor& predictor, const std::string& predictor_output_filename)
std::ofstream fout(predictor_output_filename.c_str(), std::ios::binary);
int version = 1;
serialize(predictor, fout);
serialize(version, fout);
// ----------------------------------------------------------------------------------------
......@@ -95,7 +93,7 @@ inline shape_predictor train_shape_predictor_on_images_py (
throw dlib::error("The length of the detections list must match the length of the images list.");
std::vector<std::vector<full_object_detection> > detections(num_images);
dlib::array<array2d<rgb_pixel> > images(num_images);
dlib::array<array2d<unsigned char> > images(num_images);
images_and_nested_params_to_dlib(pyimages, pydetections, images, detections);
return train_shape_predictor_on_images(images, detections, options);
......@@ -121,9 +119,9 @@ inline double test_shape_predictor_with_images_py (
std::vector<std::vector<double> > scales;
if (num_scales > 0)
dlib::array<array2d<rgb_pixel> > images(num_images);
dlib::array<array2d<unsigned char> > images(num_images);
// Now copy the data into dlib based objects so we can call the trainer.
// Now copy the data into dlib based objects so we can call the testing routine.
for (unsigned long i = 0; i < num_images; ++i)
const unsigned long num_boxes = len(pydetections[i]);
......@@ -193,7 +191,7 @@ void bind_shape_predictors()
"The regularization parameter. Larger values of this parameter \
will cause the algorithm to fit the training data better but may also \
cause overfitting.")
cause overfitting. The value must be in the range (0, 1].")
.add_property("oversampling_amount", &type::oversampling_amount,
"The number of randomly selected initial starting points sampled for each training example")
......@@ -232,7 +230,7 @@ train_shape_predictor() routine.")
- box is the bounding box to begin the shape prediction inside. \n\
ensures \n\
- This function runs the shape predictor on the input image and returns \n\
a single full object detection.")
a single full_object_detection.")
.def("save", save_shape_predictor, (arg("predictor_output_filename")), "Save a shape_predictor to the provided path.")
......@@ -241,36 +239,28 @@ ensures \n\
(arg("images"), arg("object_detections"), arg("options")),
"requires \n\
- options.lambda > 0 \n\
- > 0 \n\
- 0 < <= 1 \n\
- options.feature_pool_region_padding >= 0 \n\
- len(images) == len(object_detections) \n\
- images should be a list of numpy matrices that represent images, either RGB or grayscale. \n\
- object_detections should be a list of lists of dlib.full_object_detection objects. \
Each dlib.full_object_detection contains the bounding box and the lists of points that make up the object parts.\n\
ensures \n\
- Uses the shape_predictor_trainer to train a \n\
shape_predictor based on the provided labeled images and full object detections.\n\
- This function will apply a reasonable set of default parameters and \n\
preprocessing techniques to the training procedure for shape_predictors \n\
objects. So the point of this function is to provide you with a very easy \n\
way to train a basic shape predictor. \n\
- Uses dlib's shape_predictor_trainer object to train a \n\
shape_predictor based on the provided labeled images, full_object_detections, and options.\n\
- The trained shape_predictor is returned");
def("train_shape_predictor", train_shape_predictor,
(arg("dataset_filename"), arg("predictor_output_filename"), arg("options")),
"requires \n\
- options.lambda > 0 \n\
- > 0 \n\
- 0 < <= 1 \n\
- options.feature_pool_region_padding >= 0 \n\
ensures \n\
- Uses the shape_predictor_trainer to train a \n\
- Uses dlib's shape_predictor_trainer to train a \n\
shape_predictor based on the labeled images in the XML file \n\
dataset_filename. This function assumes the file dataset_filename is in the \n\
dataset_filename and the provided options. This function assumes the file dataset_filename is in the \n\
XML format produced by dlib's save_image_dataset_metadata() routine. \n\
- This function will apply a reasonable set of default parameters and \n\
preprocessing techniques to the training procedure for shape_predictors \n\
objects. So the point of this function is to provide you with a very easy \n\
way to train a basic shape predictor. \n\
- The trained shape predictor is serialized to the file predictor_output_filename.");
def("test_shape_predictor", test_shape_predictor_py,
......@@ -73,8 +73,8 @@ namespace dlib
if (options.lambda <= 0)
throw error("Invalid lambda value given to train_shape_predictor(), lambda must be > 0.");
if ( <= 0)
throw error("Invalid nu value given to train_shape_predictor(), nu must be > 0.");
if (!(0 < && <= 1))
throw error("Invalid nu value given to train_shape_predictor(). It is required that 0 < nu <= 1.");
if (options.feature_pool_region_padding < 0)
throw error("Invalid feature_pool_region_padding value given to train_shape_predictor(), feature_pool_region_padding must be >= 0.");
......@@ -123,16 +123,13 @@ namespace dlib
const shape_predictor_training_options& options
dlib::array<array2d<rgb_pixel> > images;
dlib::array<array2d<unsigned char> > images;
std::vector<std::vector<full_object_detection> > objects;
load_image_dataset(images, objects, dataset_filename);
shape_predictor predictor = train_shape_predictor_on_images(images, objects, options);
std::ofstream fout(predictor_output_filename.c_str(), std::ios::binary);
int version = 1;
serialize(predictor, fout);
serialize(version, fout);
serialize(predictor_output_filename) << predictor;
if (options.be_verbose)
std::cout << "Training complete, saved predictor to file " << predictor_output_filename << std::endl;
......@@ -165,7 +162,7 @@ namespace dlib
// Load the images, no scales can be provided
dlib::array<array2d<rgb_pixel> > images;
dlib::array<array2d<unsigned char> > images;
// This interface cannot take the scales parameter.
std::vector<std::vector<double> > scales;
std::vector<std::vector<full_object_detection> > objects;
......@@ -173,14 +170,7 @@ namespace dlib
// Load the shape predictor
shape_predictor predictor;
int version = 0;
std::ifstream fin(predictor_filename.c_str(), std::ios::binary);
if (!fin)
throw error("Unable to open file " + predictor_filename);
deserialize(predictor, fin);
deserialize(version, fin);
if (version != 1)
throw error("Unknown shape_predictor format.");
deserialize(predictor_filename) >> predictor;
return test_shape_predictor_with_images(images, objects, scales, predictor);
......@@ -276,32 +276,33 @@ namespace dlib
// Load the detector off disk (We have to use the explicit serialization here
// so that we have an open file stream)
simple_object_detector detector;
int version = 0;
std::ifstream fin(detector_filename.c_str(), std::ios::binary);
if (!fin)
throw error("Unable to open file " + detector_filename);
deserialize(detector, fin);
deserialize(version, fin);
if (version != 1)
throw error("Unknown simple_object_detector format.");
/* Here we need a little hack to deal with whether we are going to be loading a
* simple_object_detector (possibly trained outside of Python) or a
* simple_object_detector_py (definitely trained from Python). In order to do
* this we peek into the filestream to see if there is more data after the
* version number. If there is, it will be the upsampling amount. Therefore,
* by default we set the upsampling amount to -1 so that we can catch when
* no upsampling amount has been passed (numbers less than 0). If -1 is
* passed, we assume no upsampling and use 0. If a number > 0 is passed,
* we use that, else we use the upsampling amount cached with the detector
* (if it exists).
* simple_object_detector_py (definitely trained from Python). In order to do this
* we peek into the filestream to see if there is more data after the object
* detector. If there is, it will be the version and upsampling amount. Therefore,
* by default we set the upsampling amount to -1 so that we can catch when no
* upsampling amount has been passed (numbers less than 0). If -1 is passed, we
* assume no upsampling and use 0. If a number > 0 is passed, we use that, else we
* use the upsampling amount saved in the detector file (if it exists).
unsigned int final_upsampling_amount = 0;
const unsigned int cached_upsample_amount = fin.peek();
if (fin.peek() != EOF)
int version = 0;
deserialize(version, fin);
if (version != 1)
throw error("Unknown simple_object_detector format.");
deserialize(final_upsampling_amount, fin);
if (upsample_amount >= 0)
final_upsampling_amount = upsample_amount;
else if (cached_upsample_amount != std::char_traits<wchar_t>::eof()) // peek() returns EOF if no more data
deserialize(final_upsampling_amount, fin);
return test_simple_object_detector_with_images(images, final_upsampling_amount, boxes, ignore, detector);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment