• Patrick Snape's avatar
    Update the interface to be more Pythonic · dd19ce84
    Patrick Snape authored
    This is the biggest change so far. Now, there are two different
    classes of interface. One where you pass ONLY file paths,
    and one where you pass ONLY Python objects.
    
    The file paths are maintained to keep a matching interface with
    the C++ examples of dlib. So shape predicition and object
    detection can be trained using the dlib XML file paths and then
    serialize the detectors to disk.
    
    Shape prediction and object detection can also be trained using
    numpy arrays and in-memory objects. In this case, the predictor
    and detector objects are returned from the training functions.
    To facilitate serializing these objects, they now have a 'save'
    method.
    
    Tetsing follows a similar pattern, in that it can take either XML
    files are or in-memory objects. I also added back the concept of
    upsampling during testing to make amends for removing the
    simple_object_detector_py struct.
    dd19ce84
shape_predictor.h 7.51 KB
// Copyright (C) 2014  Davis E. King (davis@dlib.net)
// License: Boost Software License   See LICENSE.txt for the full license.
#ifndef DLIB_SHAPE_PREDICTOR_DETECTOR_H__
#define DLIB_SHAPE_PREDICTOR_DETECTOR_H__

#include "dlib/string.h"
#include "dlib/geometry.h"
#include "dlib/data_io/load_image_dataset.h"
#include "dlib/image_processing.h"

using namespace std;

namespace dlib
{

// ----------------------------------------------------------------------------------------

    struct shape_predictor_training_options
    {
        shape_predictor_training_options()
        {
            be_verbose = false;
            cascade_depth = 10;
            tree_depth = 4;
            num_trees_per_cascade_level = 500;
            nu = 0.1;
            oversampling_amount = 20;
            feature_pool_size = 400;
            lambda = 0.1;
            num_test_splits = 20;
            feature_pool_region_padding = 0;
            random_seed = "";
        }

        bool be_verbose;
        unsigned long cascade_depth;
        unsigned long tree_depth;
        unsigned long num_trees_per_cascade_level;
        double nu;
        unsigned long oversampling_amount;
        unsigned long feature_pool_size;
        double lambda;
        unsigned long num_test_splits;
        double feature_pool_region_padding;
        std::string random_seed;
    };

// ----------------------------------------------------------------------------------------

    namespace impl
    {
        inline bool contains_any_detections (
            const std::vector<std::vector<full_object_detection> >& detections
        )
        {
            for (unsigned long i = 0; i < detections.size(); ++i)
            {
                if (detections[i].size() != 0)
                    return true;
            }
            return false;
        }
    }

// ----------------------------------------------------------------------------------------

    template <typename image_array>
    inline shape_predictor train_shape_predictor_on_images (
        image_array& images,
        std::vector<std::vector<full_object_detection> >& detections,
        const shape_predictor_training_options& options
    )
    {
        if (options.lambda <= 0)
            throw error("Invalid lambda value given to train_shape_predictor(), lambda must be > 0.");
        if (options.nu <= 0)
            throw error("Invalid nu value given to train_shape_predictor(), nu must be > 0.");
        if (options.feature_pool_region_padding < 0)
            throw error("Invalid feature_pool_region_padding value given to train_shape_predictor(), feature_pool_region_padding must be >= 0.");

        if (images.size() != detections.size())
            throw error("The list of images must have the same length as the list of detections.");

        if (!impl::contains_any_detections(detections))
            throw error("Error, the training dataset does not have any labeled object detections in it.");

        shape_predictor_trainer trainer;

        trainer.set_cascade_depth(options.cascade_depth);
        trainer.set_tree_depth(options.tree_depth);
        trainer.set_num_trees_per_cascade_level(options.num_trees_per_cascade_level);
        trainer.set_nu(options.nu);
        trainer.set_random_seed(options.random_seed);
        trainer.set_oversampling_amount(options.oversampling_amount);
        trainer.set_feature_pool_size(options.feature_pool_size);
        trainer.set_feature_pool_region_padding(options.feature_pool_region_padding);
        trainer.set_lambda(options.lambda);
        trainer.set_num_test_splits(options.num_test_splits);

        if (options.be_verbose)
        {
            std::cout << "Training with cascade depth: " << options.cascade_depth << std::endl;
            std::cout << "Training with tree depth: " << options.tree_depth << std::endl;
            std::cout << "Training with " << options.num_trees_per_cascade_level << " trees per cascade level."<< std::endl;
            std::cout << "Training with nu: " << options.nu << std::endl;
            std::cout << "Training with random seed: " << options.random_seed << std::endl;
            std::cout << "Training with oversampling amount: " << options.oversampling_amount << std::endl;
            std::cout << "Training with feature pool size: " << options.feature_pool_size << std::endl;
            std::cout << "Training with feature pool region padding: " << options.feature_pool_region_padding << std::endl;
            std::cout << "Training with lambda: " << options.lambda << std::endl;
            std::cout << "Training with " << options.num_test_splits << " split tests."<< std::endl;
            trainer.be_verbose();
        }

        shape_predictor predictor = trainer.train(images, detections);

        return predictor;
    }

    inline void train_shape_predictor (
        const std::string& dataset_filename,
        const std::string& predictor_output_filename,
        const shape_predictor_training_options& options
    )
    {
        dlib::array<array2d<rgb_pixel> > images;
        std::vector<std::vector<full_object_detection> > objects;
        load_image_dataset(images, objects, dataset_filename);

        shape_predictor predictor = train_shape_predictor_on_images(images, objects, options);

        std::ofstream fout(predictor_output_filename.c_str(), std::ios::binary);
        int version = 1;
        serialize(predictor, fout);
        serialize(version, fout);

        if (options.be_verbose)
            std::cout << "Training complete, saved predictor to file " << predictor_output_filename << std::endl;
    }

// ----------------------------------------------------------------------------------------

    template <typename image_array>
    inline double test_shape_predictor_with_images (
            image_array& images,
            std::vector<std::vector<full_object_detection> >& detections,
            std::vector<std::vector<double> >& scales,
            const shape_predictor& predictor
    )
    {
        if (images.size() != detections.size())
            throw error("The list of images must have the same length as the list of detections.");
        if (scales.size() > 0  && scales.size() != images.size())
            throw error("The list of scales must have the same length as the list of detections.");

        if (scales.size() > 0)
            return test_shape_predictor(predictor, images, detections, scales);
        else
            return test_shape_predictor(predictor, images, detections);
    }

    inline double test_shape_predictor_py (
        const std::string& dataset_filename,
        const std::string& predictor_filename
    )
    {
        // Load the images, no scales can be provided
        dlib::array<array2d<rgb_pixel> > images;
        // This interface cannot take the scales parameter.
        std::vector<std::vector<double> > scales;
        std::vector<std::vector<full_object_detection> > objects;
        load_image_dataset(images, objects, dataset_filename);

        // Load the shape predictor
        shape_predictor predictor;
        int version = 0;
        std::ifstream fin(predictor_filename.c_str(), std::ios::binary);
        if (!fin)
            throw error("Unable to open file " + predictor_filename);
        deserialize(predictor, fin);
        deserialize(version, fin);
        if (version != 1)
            throw error("Unknown shape_predictor format.");

        return test_shape_predictor_with_images(images, objects, scales, predictor);
    }

// ----------------------------------------------------------------------------------------

}

#endif // DLIB_SHAPE_PREDICTOR_DETECTOR_H__