Filled in the rest of the interface for the structural_sequence_labeling_trainer

and cleaned up a few other related things.

Filled in the rest of the interface for the structural_sequence_labeling_trainer
and cleaned up a few other related things.
36cf4a9b · Davis King · 72d3ab86 · 36cf4a9b · 36cf4a9b · 36cf4a9b
Commit 36cf4a9b authored Nov 02, 2011 by Davis King
6 changed files
--- a/dlib/svm/cross_validate_sequence_labeler.h
+++ b/dlib/svm/cross_validate_sequence_labeler.h
@@ -42,7 +42,7 @@ namespace dlib
            for (unsigned long j = 0; j < pred.size(); ++j)
            {
                const unsigned long truth = labels[i][j];
-                if (truth >= res.nr())
+                if (truth >= static_cast<unsigned long>(res.nr()))
                {
                    // ignore labels the labeler doesn't know about.
                    continue;

--- a/dlib/svm/sequence_labeler.h
+++ b/dlib/svm/sequence_labeler.h
@@ -125,7 +125,7 @@ namespace dlib
            weights(weights_)
        {
            // make sure requires clause is not broken
-            DLIB_ASSERT(fe_.num_features() == weights_.size(),
+            DLIB_ASSERT(fe_.num_features() == static_cast<unsigned long>(weights_.size()),
                "\t sequence_labeler::sequence_labeler()"
                << "\n\t These sizes should match"
                << "\n\t fe_.num_features(): " << fe_.num_features() 

--- a/dlib/svm/structural_sequence_labeling_trainer.h
+++ b/dlib/svm/structural_sequence_labeling_trainer.h
@@ -29,10 +29,15 @@ namespace dlib
        explicit structural_sequence_labeling_trainer (
            const feature_extractor& fe_
        ) : fe(fe_)
-        {}
+        {
+            set_defaults();
+        }

        structural_sequence_labeling_trainer (
-        ) {}
+        )
+        {
+            set_defaults();
+        }

        const feature_extractor& get_feature_extractor (
        ) const { return fe; }
@@ -40,6 +45,96 @@ namespace dlib
        unsigned long num_labels (
        ) const { return fe.num_labels(); }

+        void set_num_threads (
+            unsigned long num
+        )
+        {
+            num_threads = num;
+        }
+
+        unsigned long get_num_threads (
+        ) const
+        {
+            return num_threads;
+        }
+
+        void set_epsilon (
+            double eps_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps_ > 0,
+                "\t void structural_sequence_labeling_trainer::set_epsilon()"
+                << "\n\t eps_ must be greater than 0"
+                << "\n\t eps_: " << eps_ 
+                << "\n\t this: " << this
+                );
+
+            eps = eps_;
+        }
+
+        const double get_epsilon (
+        ) const { return eps; }
+
+        void set_max_cache_size (
+            unsigned long max_size
+        )
+        {
+            max_cache_size = max_size;
+        }
+
+        unsigned long get_max_cache_size (
+        ) const
+        {
+            return max_cache_size; 
+        }
+
+        void be_verbose (
+        )
+        {
+            verbose = true;
+        }
+
+        void be_quiet (
+        )
+        {
+            verbose = false;
+        }
+
+        void set_oca (
+            const oca& item
+        )
+        {
+            solver = item;
+        }
+
+        const oca get_oca (
+        ) const
+        {
+            return solver;
+        }
+
+        void set_c (
+            double C_ 
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(C_ > 0,
+                "\t void structural_sequence_labeling_trainer::set_c()"
+                << "\n\t C_ must be greater than 0"
+                << "\n\t C_:    " << C_ 
+                << "\n\t this: " << this
+                );
+
+            C = C_;
+        }
+
+        double get_c (
+        ) const
+        {
+            return C;
+        }
+

        const sequence_labeler<feature_extractor> train(
            const std::vector<sample_sequence_type>& x,
@@ -77,12 +172,14 @@ namespace dlib



-            structural_svm_sequence_labeling_problem<feature_extractor> prob(x, y, fe);
-            oca solver;
+            structural_svm_sequence_labeling_problem<feature_extractor> prob(x, y, fe, num_threads);
            matrix<double,0,1> weights; 
-            prob.be_verbose();
-            prob.set_epsilon(0.5);
-            prob.set_c(100);
+            if (verbose)
+                prob.be_verbose();
+
+            prob.set_epsilon(eps);
+            prob.set_c(C);
+            prob.set_max_cache_size(max_cache_size);
            solver(prob, weights);

            return sequence_labeler<feature_extractor>(fe,weights);
@@ -90,6 +187,22 @@ namespace dlib

    private:

+        double C;
+        oca solver;
+        double eps;
+        bool verbose;
+        unsigned long num_threads;
+        unsigned long max_cache_size;
+
+        void set_defaults ()
+        {
+            C = 100;
+            verbose = false;
+            eps = 0.1;
+            num_threads = 2;
+            max_cache_size = 40;
+        }
+
        feature_extractor fe;
    };


--- a/dlib/svm/structural_sequence_labeling_trainer_abstract.h
+++ b/dlib/svm/structural_sequence_labeling_trainer_abstract.h
@@ -20,32 +20,61 @@ namespace dlib
    class structural_sequence_labeling_trainer
    {
        /*!
+            REQUIREMENTS ON feature_extractor
+                It must be an object that implements an interface compatible with 
+                the example_feature_extractor defined in dlib/svm/sequence_labeler_abstract.h.
+
            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for learning to do sequence labeling based
+                on a set of training data.  The training procedure produces a
+                sequence_labeler object which can be use to predict the labels of
+                new data sequences.
+
+                Note that this is just a convenience wrapper around the 
+                structural_svm_sequence_labeling_problem to make it look 
+                similar to all the other trainers in dlib.  
        !*/
+
    public:
        typedef typename feature_extractor::sample_type sample_type;
-        typedef std::vector<sample_type> sample_sequence_type;
+        typedef std::vector<sample_type>   sample_sequence_type;
        typedef std::vector<unsigned long> labeled_sequence_type;
-
        typedef sequence_labeler<feature_extractor> trained_function_type;

-        explicit structural_sequence_labeling_trainer (
-            const feature_extractor& fe_
-        ) : fe(fe_)
-        {}
-
        structural_sequence_labeling_trainer (
-        ) {}
+        );
+        /*!
+            ensures
+                - #get_c() == 100
+                - this object isn't verbose
+                - #get_epsilon() == 0.1
+                - #get_num_threads() == 2
+                - #get_max_cache_size() == 40
+                - #get_feature_extractor() == a default initialized feature_extractor
+        !*/
+
+        explicit structural_sequence_labeling_trainer (
+            const feature_extractor& fe
+        );
+        /*!
+            ensures
+                - #get_c() == 100
+                - this object isn't verbose
+                - #get_epsilon() == 0.1
+                - #get_num_threads() == 2
+                - #get_max_cache_size() == 40
+                - #get_feature_extractor() == fe 
+        !*/

        const feature_extractor& get_feature_extractor (
-        ) const { return fe; }
+        ) const;
        /*!
            ensures
                - returns the feature extractor used by this object
        !*/

        unsigned long num_labels (
-        ) const { return fe.num_labels(); }
+        ) const; 
        /*!
            ensures
                - returns get_feature_extractor().num_labels()
@@ -53,6 +82,115 @@ namespace dlib
                  element of a sequence)
        !*/

+        void set_num_threads (
+            unsigned long num
+        );
+        /*!
+            ensures
+                - #get_num_threads() == num
+        !*/
+
+        unsigned long get_num_threads (
+        ) const;
+        /*!
+            ensures
+                - returns the number of threads used during training.  You should 
+                  usually set this equal to the number of processing cores on your
+                  machine.
+        !*/
+
+        void set_epsilon (
+            double eps
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_epsilon() == eps
+        !*/
+
+        const double get_epsilon (
+        ) const;
+        /*!
+            ensures
+                - returns the error epsilon that determines when training should stop.
+                  Smaller values may result in a more accurate solution but take longer 
+                  to train.  You can think of this epsilon value as saying "solve the 
+                  optimization problem until the average number of labeling mistakes per 
+                  training sample is within epsilon of its optimal value".
+        !*/
+
+        void set_max_cache_size (
+            unsigned long max_size
+        );
+        /*!
+            ensures
+                - #get_max_cache_size() == max_size
+        !*/
+
+        unsigned long get_max_cache_size (
+        ) const;
+        /*!
+            ensures
+                - During training, this object basically runs the sequence_labeler on 
+                  each training sample, over and over.  To speed this up, it is possible to 
+                  cache the results of these labeler invocations.  This function returns the 
+                  number of cache elements per training sample kept in the cache.  Note 
+                  that a value of 0 means caching is not used at all.  
+        !*/
+
+        void be_verbose (
+        );
+        /*!
+            ensures
+                - This object will print status messages to standard out so that a 
+                  user can observe the progress of the algorithm.
+        !*/
+
+        void be_quiet (
+        );
+        /*!
+            ensures
+                - this object will not print anything to standard out
+        !*/
+
+        void set_oca (
+            const oca& item
+        );
+        /*!
+            ensures
+                - #get_oca() == item 
+        !*/
+
+        const oca get_oca (
+        ) const;
+        /*!
+            ensures
+                - returns a copy of the optimizer used to solve the structural SVM problem.  
+        !*/
+
+        void set_c (
+            double C
+        );
+        /*!
+            requires
+                - C > 0
+            ensures
+                - #get_c() = C
+        !*/
+
+        double get_c (
+        ) const;
+        /*!
+            ensures
+                - returns the SVM regularization parameter.  It is the parameter 
+                  that determines the trade-off between trying to fit the training 
+                  data (i.e. minimize the loss) or allowing more errors but hopefully 
+                  improving the generalization of the resulting sequence labeler.  Larger 
+                  values encourage exact fitting while smaller values of C may encourage 
+                  better generalization. 
+        !*/
+
        const sequence_labeler<feature_extractor> train(
            const std::vector<sample_sequence_type>& x,
            const std::vector<labeled_sequence_type>& y

--- a/dlib/svm/structural_svm_sequence_labeling_problem.h
+++ b/dlib/svm/structural_svm_sequence_labeling_problem.h
@@ -76,9 +76,10 @@ namespace dlib
        structural_svm_sequence_labeling_problem(
            const std::vector<std::vector<sample_type> >& samples_,
            const std::vector<std::vector<unsigned long> >& labels_,
-            const feature_extractor& fe_        
+            const feature_extractor& fe_,
+            unsigned long num_threads = 2
        ) :
-            structural_svm_problem_threaded<matrix_type,feature_vector_type>(4),
+            structural_svm_problem_threaded<matrix_type,feature_vector_type>(num_threads),
            samples(samples_),
            labels(labels_),
            fe(fe_)

--- a/dlib/svm/structural_svm_sequence_labeling_problem_abstract.h
+++ b/dlib/svm/structural_svm_sequence_labeling_problem_abstract.h
@@ -4,11 +4,10 @@
 #ifdef DLIB_STRUCTURAL_SVM_SEQUENCE_LaBELING_PROBLEM_ABSTRACT_H__


-#include "structural_svm_sequence_labeling_problem_abstract.h"
 #include "../matrix.h"
-#include "sequence_labeler.h"
 #include <vector>
-#include "structural_svm_problem_threaded.h"
+#include "structural_svm_problem_threaded_abstract.h"
+#include "sequence_labeler_abstract.h"

 // ----------------------------------------------------------------------------------------

@@ -19,26 +18,52 @@ namespace dlib
        typename feature_extractor
        >
    class structural_svm_sequence_labeling_problem : noncopyable,
-        public structural_svm_problem_threaded<matrix<double,0,1>, 
-                                               std::vector<std::pair<unsigned long,double> > >
+                                                     public structural_svm_problem_threaded<matrix<double,0,1>, 
+                                                            std::vector<std::pair<unsigned long,double> > >
    {
-    public:
-        typedef matrix<double,0,1> matrix_type;
-        typedef std::vector<std::pair<unsigned long, double> > feature_vector_type;
+        /*!
+            REQUIREMENTS ON feature_extractor
+                It must be an object that implements an interface compatible with 
+                the example_feature_extractor defined in dlib/svm/sequence_labeler_abstract.h.
+
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for learning the weight vector needed to use
+                a sequence_labeler object.  

+                It learns the parameter vector by formulating the problem as a structural 
+                SVM problem.  The general approach is discussed in the paper:
+                    Hidden Markov Support Vector Machines by 
+                    Y. Altun, I. Tsochantaridis, T. Hofmann
+                While the particular optimization strategy used is the method from: 
+                    T. Joachims, T. Finley, Chun-Nam Yu, Cutting-Plane Training of 
+                    Structural SVMs, Machine Learning, 77(1):27-59, 2009.
+        !*/
+
+    public:
        typedef typename feature_extractor::sample_type sample_type;

        structural_svm_sequence_labeling_problem(
-            const std::vector<std::vector<sample_type> >& samples_,
-            const std::vector<std::vector<unsigned long> >& labels_,
-            const feature_extractor& fe_        
-        ) :
-            structural_svm_problem_threaded<matrix_type,feature_vector_type>(4),
-            samples(samples_),
-            labels(labels_),
-            fe(fe_)
-        {
-        }
+            const std::vector<std::vector<sample_type> >& samples,
+            const std::vector<std::vector<unsigned long> >& labels,
+            const feature_extractor& fe,
+            unsigned long num_threads = 2
+        );
+        /*!
+            requires
+                - is_sequence_labeling_problem(samples, labels)
+                - for all valid i and j: labels[i][j] < fe.num_labels()
+            ensures
+                - This object attempts to learn a mapping from the given samples to the 
+                  given labels.  In particular, it attempts to learn to predict labels[i] 
+                  based on samples[i].  Or in other words, this object can be used to learn 
+                  a parameter vector, w, such that a sequence_labeler declared as:
+                    sequence_labeler<feature_extractor> labeler(fe,w)
+                  results in a labeler object which attempts to compute the following mapping:
+                    labels[i] == labeler(samples[i])
+                - This object will use num_threads threads during the optimization 
+                  procedure.  You should set this parameter equal to the number of 
+                  available processing cores on your machine.
+        !*/
    };

 // ----------------------------------------------------------------------------------------