Refactored the svm_nu_trainer. Specifically, I pulled the quadratic

solver out and made it a separate class. The kernel_matrix_cache has also been removed in favor of the new symmetric_matrix_cache. Finally, the remaining bits of the svm_nu_trainer have been moved into svm_nu_trainer.h Also note that invalid_svm_nu_error has been renamed to invalid_nu_error. --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403994

Refactored the svm_nu_trainer. Specifically, I pulled the quadratic
solver out and made it a separate class. The kernel_matrix_cache has also been removed in favor of the new symmetric_matrix_cache. Finally, the remaining bits of the svm_nu_trainer have been moved into svm_nu_trainer.h Also note that invalid_svm_nu_error has been renamed to invalid_nu_error. --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403994
b7a02418 · Davis King · dbba600f · b7a02418 · b7a02418 · b7a02418
Commit b7a02418 authored Dec 21, 2010 by Davis King
8 changed files
--- a/dlib/optimization.h
+++ b/dlib/optimization.h
@@ -6,6 +6,7 @@
 #include "optimization/optimization.h"
 #include "optimization/optimization_bobyqa.h"
 #include "optimization/optimization_solve_qp_using_smo.h"
+#include "optimization/optimization_solve_qp2_using_smo.h"
 #include "optimization/optimization_oca.h"
 #include "optimization/optimization_trust_region.h"
 #include "optimization/optimization_least_squares.h"

--- a/dlib/optimization/optimization_solve_qp2_using_smo.h
+++ b/dlib/optimization/optimization_solve_qp2_using_smo.h
--- a/dlib/optimization/optimization_solve_qp2_using_smo_abstract.h
+++ b/dlib/optimization/optimization_solve_qp2_using_smo_abstract.h
+// Copyright (C) 2007  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_OPTIMIZATION_SOLVE_QP2_USING_SMO_ABSTRACT_H_
+#ifdef DLIB_OPTIMIZATION_SOLVE_QP2_USING_SMO_ABSTRACT_H_
+
+#include "../matrix/matrix_abstract.h"
+#include "../algs.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    class invalid_nu_error : public dlib::error 
+    { 
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object is an exception class used to indicate that a
+                value of nu given to the solve_qp2_using_smo object is incompatible 
+                with the constraints of the quadratic program.
+
+                this->nu will be set to the invalid value of nu used.
+        !*/
+
+    public: 
+        invalid_nu_error(const std::string& msg, double nu_) : dlib::error(msg), nu(nu_) {};
+        const double nu;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    typename T::type maximum_nu (
+        const T& y
+    );
+    /*!
+        requires
+            - T == a matrix object or an object convertible to a matrix via vector_to_matrix()
+            - is_col_vector(y) == true
+            - y.size() > 1
+            - sum((y == +1) + (y == -1)) == y.size()
+              (i.e. all elements of y must be equal to +1 or -1)
+        ensures
+            - returns the maximum valid nu that can be used with solve_qp2_using_smo and
+              the given y vector.
+              (i.e. 2.0*min(sum(y == +1), sum(y == -1))/y.size())
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type
+        >
+    class solve_qp2_using_smo
+    {
+        /*!
+            REQUIREMENTS ON matrix_type
+                Must be some type of dlib::matrix.
+
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for solving the following quadratic programming
+                problem using the sequential minimal optimization algorithm:  
+
+                  Minimize: f(alpha) == 0.5*trans(alpha)*Q*alpha 
+                  subject to the following constraints:
+                    - sum(alpha) == nu*y.size() 
+                    - 0 <= min(alpha) && max(alpha) <= 1 
+                    - trans(y)*alpha == 0
+                  Where f is convex.  This means that Q should be symmetric and positive-semidefinite.
+                
+                
+                This object implements the strategy used by the LIBSVM tool and described 
+                by the following papers:
+                    - Chang and Lin, Training {nu}-Support Vector Classifiers: Theory and Algorithms
+                    - Chih-Chung Chang and Chih-Jen Lin, LIBSVM : a library for support vector 
+                      machines, 2001. Software available at http://www.csie.ntu.edu.tw/~cjlin/libsvm
+        !*/
+
+    public:
+
+        typedef typename matrix_type::mem_manager_type mem_manager_type;
+        typedef typename matrix_type::type scalar_type;
+        typedef typename matrix_type::layout_type layout_type;
+        typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix;
+        typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix;
+
+        template <
+            typename EXP1,
+            typename EXP2,
+            long NR
+            >
+        void operator() ( 
+            const matrix_exp<EXP1>& Q,
+            const matrix_exp<EXP2>& y,
+            const scalar_type nu,
+            matrix<scalar_type,NR,1,mem_manager_type, layout_type>& alpha,
+            scalar_type eps
+        );
+        /*!
+            requires
+                - Q.nr() == Q.nc()
+                - is_col_vector(y) == true
+                - y.size() == Q.nr()
+                - y.size() > 1
+                - sum((y == +1) + (y == -1)) == y.size()
+                  (i.e. all elements of y must be equal to +1 or -1)
+                - alpha must be capable of representing a vector of size y.size() elements
+                - 0 < nu <= 1
+                - eps > 0
+            ensures
+                - This function solves the quadratic program defined in this class's main comment.
+                - The solution to the quadratic program will be stored in #alpha.
+                - #alpha.size() == y.size()
+                - This function uses an implementation of the sequential minimal optimization 
+                  algorithm.  It runs until the KKT violation is less than eps.  So eps controls 
+                  how accurate the solution is and smaller values result in better solutions.
+                  (a reasonable eps is usually about 1e-3)
+                - #get_gradient() == Q*(#alpha)
+                  (i.e. stores the gradient of f() at #alpha in get_gradient())
+            throws
+                - invalid_nu_error
+                  This exception is thrown if nu >= maximum_nu(y).  
+                  (some values of nu cause the constraints to become impossible to satisfy. 
+                  If this is detected then an exception is thrown).
+        !*/
+
+        const column_matrix& get_gradient (
+        ) const;
+        /*!
+            ensures
+                - returns the gradient vector at the solution of the last problem solved
+                  by this object.  If no problem has been solved then returns an empty
+                  vector.
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_OPTIMIZATION_SOLVE_QP2_USING_SMO_ABSTRACT_H_
+
+
+
--- a/dlib/svm/svm.h
+++ b/dlib/svm/svm.h
--- a/dlib/svm/svm_abstract.h
+++ b/dlib/svm/svm_abstract.h
@@ -11,52 +11,13 @@
 #include "../serialize.h"
 #include "function_abstract.h"
 #include "kernel_abstract.h"
+#include "svm_nu_trainer_abstract.h"

 namespace dlib
 {

 // ----------------------------------------------------------------------------------------
 // ----------------------------------------------------------------------------------------
-// ----------------------------------------------------------------------------------------
-
-    class invalid_svm_nu_error : public dlib::error 
-    { 
-        /*!
-            WHAT THIS OBJECT REPRESENTS
-                This object is an exception class used to indicate that a
-                value of nu used for svm training is incompatible with a
-                particular data set.
-
-                this->nu will be set to the invalid value of nu used.
-        !*/
-
-    public: 
-        invalid_svm_nu_error(const std::string& msg, double nu_) : dlib::error(msg), nu(nu_) {};
-        const double nu;
-    };
-
-// ----------------------------------------------------------------------------------------
-
-    template <
-        typename T
-        >
-    typename T::type maximum_nu (
-        const T& y
-    );
-    /*!
-        requires
-            - T == a matrix object or an object convertible to a matrix via 
-              vector_to_matrix()
-            - y.nc() == 1
-            - y.nr() > 1
-            - for all valid i:
-                - y(i) == -1 or +1
-        ensures
-            - returns the maximum valid nu that can be used with the svm_nu_trainer and
-              the training set labels from the given y vector.
-              (i.e. 2.0*min(number of +1 examples in y, number of -1 examples in y)/y.nr())
-    !*/
-
 // ----------------------------------------------------------------------------------------

    template <
@@ -85,189 +46,6 @@ namespace dlib

 // ----------------------------------------------------------------------------------------
 // ----------------------------------------------------------------------------------------
-// ----------------------------------------------------------------------------------------
-
-    template <
-        typename K 
-        >
-    class svm_nu_trainer
-    {
-        /*!
-            REQUIREMENTS ON K 
-                is a kernel function object as defined in dlib/svm/kernel_abstract.h 
-
-            WHAT THIS OBJECT REPRESENTS
-                This object implements a trainer for a nu support vector machine for 
-                solving binary classification problems.
-
-                The implementation of the nu-svm training algorithm used by this object is based
-                on the following excellent papers:
-                    - Chang and Lin, Training {nu}-Support Vector Classifiers: Theory and Algorithms
-                    - Chih-Chung Chang and Chih-Jen Lin, LIBSVM : a library for support vector 
-                      machines, 2001. Software available at http://www.csie.ntu.edu.tw/~cjlin/libsvm
-
-        !*/
-
-    public:
-        typedef K kernel_type;
-        typedef typename kernel_type::scalar_type scalar_type;
-        typedef typename kernel_type::sample_type sample_type;
-        typedef typename kernel_type::mem_manager_type mem_manager_type;
-        typedef decision_function<kernel_type> trained_function_type;
-
-        svm_nu_trainer (
-        );
-        /*!
-            ensures
-                - This object is properly initialized and ready to be used
-                  to train a support vector machine.
-                - #get_nu() == 0.1 
-                - #get_cache_size() == 200
-                - #get_epsilon() == 0.001
-        !*/
-
-        svm_nu_trainer (
-            const kernel_type& kernel, 
-            const scalar_type& nu
-        );
-        /*!
-            requires
-                - 0 < nu <= 1
-            ensures
-                - This object is properly initialized and ready to be used
-                  to train a support vector machine.
-                - #get_kernel() == kernel
-                - #get_nu() == nu
-                - #get_cache_size() == 200
-                - #get_epsilon() == 0.001
-        !*/
-
-        void set_cache_size (
-            long cache_size
-        );
-        /*!
-            requires
-                - cache_size > 0
-            ensures
-                - #get_cache_size() == cache_size 
-        !*/
-
-        const long get_cache_size (
-        ) const;
-        /*!
-            ensures
-                - returns the number of megabytes of cache this object will use
-                  when it performs training via the this->train() function.
-                  (bigger values of this may make training go faster but won't affect 
-                  the result.  However, too big a value will cause you to run out of 
-                  memory, obviously.)
-        !*/
-
-        void set_epsilon (
-            scalar_type eps
-        );
-        /*!
-            requires
-                - eps > 0
-            ensures
-                - #get_epsilon() == eps 
-        !*/
-
-        const scalar_type get_epsilon (
-        ) const;
-        /*!
-            ensures
-                - returns the error epsilon that determines when training should stop.
-                  Generally a good value for this is 0.001.  Smaller values may result
-                  in a more accurate solution but take longer to execute.
-        !*/
-
-        void set_kernel (
-            const kernel_type& k
-        );
-        /*!
-            ensures
-                - #get_kernel() == k 
-        !*/
-
-        const kernel_type& get_kernel (
-        ) const;
-        /*!
-            ensures
-                - returns a copy of the kernel function in use by this object
-        !*/
-
-        void set_nu (
-            scalar_type nu
-        );
-        /*!
-            requires
-                - 0 < nu <= 1
-            ensures
-                - #get_nu() == nu
-        !*/
-
-        const scalar_type get_nu (
-        ) const;
-        /*!
-            ensures
-                - returns the nu svm parameter.  This is a value between 0 and
-                  1.  It is the parameter that determines the trade off between
-                  trying to fit the training data exactly or allowing more errors 
-                  but hopefully improving the generalization ability of the 
-                  resulting classifier.  Smaller values encourage exact fitting 
-                  while larger values of nu may encourage better generalization. 
-                  For more information you should consult the papers referenced 
-                  above.
-        !*/
-
-        template <
-            typename in_sample_vector_type,
-            typename in_scalar_vector_type
-            >
-        const decision_function<kernel_type> train (
-            const in_sample_vector_type& x,
-            const in_scalar_vector_type& y
-        ) const;
-        /*!
-            requires
-                - is_binary_classification_problem(x,y) == true
-                - x == a matrix or something convertible to a matrix via vector_to_matrix().
-                  Also, x should contain sample_type objects.
-                - y == a matrix or something convertible to a matrix via vector_to_matrix().
-                  Also, y should contain scalar_type objects.
-            ensures
-                - trains a nu support vector classifier given the training samples in x and 
-                  labels in y.  Training is done when the error is less than get_epsilon().
-                - returns a decision function F with the following properties:
-                    - if (new_x is a sample predicted have +1 label) then
-                        - F(new_x) >= 0
-                    - else
-                        - F(new_x) < 0
-            throws
-                - invalid_svm_nu_error
-                  This exception is thrown if get_nu() >= maximum_nu(y)
-                - std::bad_alloc
-        !*/
-
-        void swap (
-            svm_nu_trainer& item
-        );
-        /*!
-            ensures
-                - swaps *this and item
-        !*/
-    }; 
-
-    template <typename K>
-    void swap (
-        svm_nu_trainer<K>& a,
-        svm_nu_trainer<K>& b
-    ) { a.swap(b); }
-    /*!
-        provides a global swap
-    !*/
-
 // ----------------------------------------------------------------------------------------

    template <
@@ -288,9 +66,8 @@ namespace dlib
            - is_binary_classification_problem(x,y) == true
            - trainer_type == some kind of batch trainer object (e.g. svm_nu_trainer)
        ensures
-            - trains a nu support vector classifier given the training samples in x and 
-              labels in y.  
-            - returns a probabilistic_decision_function that represents the trained svm.
+            - trains a classifier given the training samples in x and labels in y.  
+            - returns a probabilistic_decision_function that represents the trained classifier.
            - The parameters of the probability model are estimated by performing k-fold 
              cross validation. 
            - The number of folds used is given by the folds argument.

--- a/dlib/svm/svm_nu_trainer.h
+++ b/dlib/svm/svm_nu_trainer.h
--- a/dlib/svm/svm_nu_trainer_abstract.h
+++ b/dlib/svm/svm_nu_trainer_abstract.h
+// Copyright (C) 2007  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_SVm_NU_TRAINER_ABSTRACT_
+#ifdef DLIB_SVm_NU_TRAINER_ABSTRACT_
+
+#include <cmath>
+#include <limits>
+#include <sstream>
+#include "../matrix/matrix_abstract.h"
+#include "../algs.h"
+#include "../serialize.h"
+#include "function_abstract.h"
+#include "kernel_abstract.h"
+#include "../optimization/optimization_solve_qp2_using_smo_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K 
+        >
+    class svm_nu_trainer
+    {
+        /*!
+            REQUIREMENTS ON K 
+                is a kernel function object as defined in dlib/svm/kernel_abstract.h 
+
+            WHAT THIS OBJECT REPRESENTS
+                This object implements a trainer for a nu support vector machine for 
+                solving binary classification problems.
+
+                The implementation of the nu-svm training algorithm used by this object is based
+                on the following excellent papers:
+                    - Chang and Lin, Training {nu}-Support Vector Classifiers: Theory and Algorithms
+                    - Chih-Chung Chang and Chih-Jen Lin, LIBSVM : a library for support vector 
+                      machines, 2001. Software available at http://www.csie.ntu.edu.tw/~cjlin/libsvm
+
+        !*/
+
+    public:
+        typedef K kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        svm_nu_trainer (
+        );
+        /*!
+            ensures
+                - This object is properly initialized and ready to be used
+                  to train a support vector machine.
+                - #get_nu() == 0.1 
+                - #get_cache_size() == 200
+                - #get_epsilon() == 0.001
+        !*/
+
+        svm_nu_trainer (
+            const kernel_type& kernel, 
+            const scalar_type& nu
+        );
+        /*!
+            requires
+                - 0 < nu <= 1
+            ensures
+                - This object is properly initialized and ready to be used
+                  to train a support vector machine.
+                - #get_kernel() == kernel
+                - #get_nu() == nu
+                - #get_cache_size() == 200
+                - #get_epsilon() == 0.001
+        !*/
+
+        void set_cache_size (
+            long cache_size
+        );
+        /*!
+            requires
+                - cache_size > 0
+            ensures
+                - #get_cache_size() == cache_size 
+        !*/
+
+        const long get_cache_size (
+        ) const;
+        /*!
+            ensures
+                - returns the number of megabytes of cache this object will use
+                  when it performs training via the this->train() function.
+                  (bigger values of this may make training go faster but won't affect 
+                  the result.  However, too big a value will cause you to run out of 
+                  memory, obviously.)
+        !*/
+
+        void set_epsilon (
+            scalar_type eps
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_epsilon() == eps 
+        !*/
+
+        const scalar_type get_epsilon (
+        ) const;
+        /*!
+            ensures
+                - returns the error epsilon that determines when training should stop.
+                  Generally a good value for this is 0.001.  Smaller values may result
+                  in a more accurate solution but take longer to execute.
+        !*/
+
+        void set_kernel (
+            const kernel_type& k
+        );
+        /*!
+            ensures
+                - #get_kernel() == k 
+        !*/
+
+        const kernel_type& get_kernel (
+        ) const;
+        /*!
+            ensures
+                - returns a copy of the kernel function in use by this object
+        !*/
+
+        void set_nu (
+            scalar_type nu
+        );
+        /*!
+            requires
+                - 0 < nu <= 1
+            ensures
+                - #get_nu() == nu
+        !*/
+
+        const scalar_type get_nu (
+        ) const;
+        /*!
+            ensures
+                - returns the nu svm parameter.  This is a value between 0 and
+                  1.  It is the parameter that determines the trade off between
+                  trying to fit the training data exactly or allowing more errors 
+                  but hopefully improving the generalization ability of the 
+                  resulting classifier.  Smaller values encourage exact fitting 
+                  while larger values of nu may encourage better generalization. 
+                  For more information you should consult the papers referenced 
+                  above.
+        !*/
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const;
+        /*!
+            requires
+                - is_binary_classification_problem(x,y) == true
+                - x == a matrix or something convertible to a matrix via vector_to_matrix().
+                  Also, x should contain sample_type objects.
+                - y == a matrix or something convertible to a matrix via vector_to_matrix().
+                  Also, y should contain scalar_type objects.
+            ensures
+                - trains a nu support vector classifier given the training samples in x and 
+                  labels in y.  Training is done when the error is less than get_epsilon().
+                - returns a decision function F with the following properties:
+                    - if (new_x is a sample predicted have +1 label) then
+                        - F(new_x) >= 0
+                    - else
+                        - F(new_x) < 0
+            throws
+                - invalid_nu_error
+                  This exception is thrown if get_nu() >= maximum_nu(y)
+                - std::bad_alloc
+        !*/
+
+        void swap (
+            svm_nu_trainer& item
+        );
+        /*!
+            ensures
+                - swaps *this and item
+        !*/
+    }; 
+
+    template <typename K>
+    void swap (
+        svm_nu_trainer<K>& a,
+        svm_nu_trainer<K>& b
+    ) { a.swap(b); }
+    /*!
+        provides a global swap
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SVm_NU_TRAINER_ABSTRACT_
+
+
+
--- a/dlib/svm/svm_threaded.h
+++ b/dlib/svm/svm_threaded.h
@@ -58,7 +58,7 @@ namespace dlib
                    // on very large datasets.  Every bit of freed memory helps out.
                    j = job<trainer_type>();
                }
-                catch (invalid_svm_nu_error&)
+                catch (invalid_nu_error&)
                {
                    // If this is a svm_nu_trainer then we might get this exception if the nu is
                    // invalid.  In this case just return a cross validation score of 0.