Added an epsilon-insensitive support vector regression class.

--HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%404025

Added an epsilon-insensitive support vector regression class.
--HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%404025
75f0d1ac · Davis King · f710b18b · 75f0d1ac · 75f0d1ac · 75f0d1ac
Commit 75f0d1ac authored Dec 23, 2010 by Davis King
Show whitespace changes
Inline Side-by-side

Showing with 603 additions and 0 deletions

svm.h dlib/svm.h +1 -0

svr_trainer.h dlib/svm/svr_trainer.h +393 -0

svr_trainer_abstract.h dlib/svm/svr_trainer_abstract.h +209 -0

No files found.
--- a/dlib/svm.h
+++ b/dlib/svm.h
@@ -26,6 +26,7 @@
 #include "svm/sort_basis_vectors.h"
 #include "svm/svm_c_trainer.h"
 #include "svm/svm_one_class_trainer.h"
+#include "svm/svr_trainer.h"

 #endif // DLIB_SVm_HEADER


--- a/dlib/svm/svr_trainer.h
+++ b/dlib/svm/svr_trainer.h
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_SVm_EPSILON_REGRESSION_TRAINER_H__ 
+#define DLIB_SVm_EPSILON_REGRESSION_TRAINER_H__
+
+
+#include "svr_trainer_abstract.h"
+#include <cmath>
+#include <limits>
+#include "../matrix.h"
+#include "../algs.h"
+
+#include "function.h"
+#include "kernel.h"
+#include "../optimization/optimization_solve_qp3_using_smo.h"
+
+namespace dlib 
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K 
+        >
+    class svr_trainer
+    {
+    public:
+        typedef K kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        svr_trainer (
+        ) :
+            C(1),
+            eps_insensitivity(0.1),
+            cache_size(200),
+            eps(0.001)
+        {
+        }
+
+        void set_cache_size (
+            long cache_size_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(cache_size_ > 0,
+                "\tvoid svr_trainer::set_cache_size(cache_size_)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t cache_size: " << cache_size_ 
+                );
+            cache_size = cache_size_;
+        }
+
+        long get_cache_size (
+        ) const
+        {
+            return cache_size;
+        }
+
+        void set_epsilon (
+            scalar_type eps_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps_ > 0,
+                "\tvoid svr_trainer::set_epsilon(eps_)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t eps_: " << eps_ 
+                );
+            eps = eps_;
+        }
+
+        const scalar_type get_epsilon (
+        ) const
+        { 
+            return eps;
+        }
+
+        void set_epsilon_insensitivity (
+            scalar_type eps_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps_ > 0,
+                "\tvoid svr_trainer::set_epsilon_insensitivity(eps_)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t eps_: " << eps_ 
+                );
+            eps_insensitivity = eps_;
+        }
+
+        const scalar_type get_epsilon_insensitivity (
+        ) const
+        { 
+            return eps_insensitivity;
+        }
+
+        void set_kernel (
+            const kernel_type& k
+        )
+        {
+            kernel_function = k;
+        }
+
+        const kernel_type& get_kernel (
+        ) const
+        {
+            return kernel_function;
+        }
+
+        void set_c (
+            scalar_type C_ 
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(C_ > 0,
+                "\t void svr_trainer::set_c()"
+                << "\n\t C must be greater than 0"
+                << "\n\t C_:    " << C_ 
+                << "\n\t this: " << this
+                );
+
+            C = C_;
+        }
+
+        const scalar_type get_c (
+        ) const
+        {
+            return C;
+        }
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const
+        {
+            return do_train(vector_to_matrix(x), vector_to_matrix(y));
+        }
+
+        void swap (
+            svr_trainer& item
+        )
+        {
+            exchange(kernel_function, item.kernel_function);
+            exchange(C,            item.C);
+            exchange(eps_insensitivity, item.eps_insensitivity);
+            exchange(cache_size,      item.cache_size);
+            exchange(eps,             item.eps);
+        }
+
+    private:
+
+    // ------------------------------------------------------------------------------------
+
+        template <typename M>
+        struct op_quad 
+        {
+            explicit op_quad( 
+                const M& m_
+            ) : m(m_) {}
+
+            const M& m;
+
+            typedef typename M::type type;
+            typedef type const_ret_type;
+            const static long cost = M::cost + 2;
+
+            inline const_ret_type apply ( long r, long c) const
+            { 
+                if (r < m.nr())
+                {
+                    if (c < m.nc())
+                    {
+                        return m(r,c);
+                    }
+                    else
+                    {
+                        return -m(r,c-m.nc());
+                    }
+                }
+                else
+                {
+                    if (c < m.nc())
+                    {
+                        return -m(r-m.nr(),c);
+                    }
+                    else
+                    {
+                        return m(r-m.nr(),c-m.nc());
+                    }
+                }
+            }
+
+            const static long NR = 2*M::NR;
+            const static long NC = 2*M::NC;
+            typedef typename M::mem_manager_type mem_manager_type;
+            typedef typename M::layout_type layout_type;
+
+            long nr () const { return 2*m.nr(); }
+            long nc () const { return 2*m.nc(); }
+
+            template <typename U> bool aliases               ( const matrix_exp<U>& item) const 
+            { return m.aliases(item); }
+            template <typename U> bool destructively_aliases ( const matrix_exp<U>& item) const 
+            { return m.aliases(item); }
+        };
+
+        template <
+            typename EXP
+            >
+        const matrix_op<op_quad<EXP> >  make_quad (
+            const matrix_exp<EXP>& m
+        ) const
+        /*!
+            ensures
+                - returns the following matrix:
+                     m -m
+                    -m  m
+                - I.e. returns a matrix that is twice the size of m and just
+                  contains copies of m and -m
+        !*/
+        {
+            typedef op_quad<EXP> op;
+            return matrix_op<op>(op(m.ref()));
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> do_train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const
+        {
+            typedef typename K::scalar_type scalar_type;
+            typedef typename decision_function<K>::sample_vector_type sample_vector_type;
+            typedef typename decision_function<K>::scalar_vector_type scalar_vector_type;
+
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_learning_problem(x,y) == true,
+                "\tdecision_function svr_trainer::train(x,y)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t x.nr(): " << x.nr() 
+                << "\n\t y.nr(): " << y.nr() 
+                << "\n\t x.nc(): " << x.nc() 
+                << "\n\t y.nc(): " << y.nc() 
+                );
+
+
+            scalar_vector_type alpha;
+
+            solve_qp3_using_smo<scalar_vector_type> solver;
+
+            solver(symmetric_matrix_cache<float>(make_quad(kernel_matrix(kernel_function,x)), cache_size), 
+                   uniform_matrix<scalar_type>(2*x.size(),1, eps_insensitivity) + join_cols(y,-y),
+                   join_cols(uniform_matrix<scalar_type>(x.size(),1,1), uniform_matrix<scalar_type>(x.size(),1,-1)), 
+                   0,
+                   C,
+                   C,
+                   alpha,
+                   eps);
+
+            scalar_type b;
+            calculate_b(alpha,solver.get_gradient(),C,b);
+
+            alpha = -rowm(alpha,range(0,x.size()-1)) + rowm(alpha,range(x.size(), alpha.size()-1));
+            
+            // count the number of support vectors
+            const long sv_count = (long)sum(alpha != 0);
+
+            scalar_vector_type sv_alpha;
+            sample_vector_type support_vectors;
+
+            // size these column vectors so that they have an entry for each support vector
+            sv_alpha.set_size(sv_count);
+            support_vectors.set_size(sv_count);
+
+            // load the support vectors and their alpha values into these new column matrices
+            long idx = 0;
+            for (long i = 0; i < alpha.nr(); ++i)
+            {
+                if (alpha(i) != 0)
+                {
+                    sv_alpha(idx) = alpha(i);
+                    support_vectors(idx) = x(i);
+                    ++idx;
+                }
+            }
+
+            // now return the decision function
+            return decision_function<K> (sv_alpha, -b, kernel_function, support_vectors);
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        template <
+            typename scalar_vector_type
+            >
+        void calculate_b(
+            const scalar_vector_type& alpha,
+            const scalar_vector_type& df,
+            const scalar_type& C,
+            scalar_type& b
+        ) const
+        {
+            using namespace std;
+            long num_free = 0;
+            scalar_type sum_free = 0;
+
+            scalar_type upper_bound;
+            scalar_type lower_bound;
+
+            find_min_and_max(df, upper_bound, lower_bound);
+
+            for(long i = 0; i < alpha.nr(); ++i)
+            {
+                if(i < alpha.nr()/2)
+                {
+                    if(alpha(i) == C)
+                    {
+                        if (df(i) > upper_bound)
+                            upper_bound = df(i);
+                    }
+                    else if(alpha(i) == 0)
+                    {
+                        if (df(i) < lower_bound)
+                            lower_bound = df(i);
+                    }
+                    else
+                    {
+                        ++num_free;
+                        sum_free += df(i);
+                    }
+                }
+                else
+                {
+                    if(alpha(i) == C)
+                    {
+                        if (-df(i) > upper_bound)
+                            upper_bound = -df(i);
+                    }
+                    else if(alpha(i) == 0)
+                    {
+                        if (-df(i) < lower_bound)
+                            lower_bound = -df(i);
+                    }
+                    else
+                    {
+                        ++num_free;
+                        sum_free -= df(i);
+                    }
+                }
+            }
+
+            if(num_free > 0)
+                b = sum_free/num_free;
+            else
+                b = (upper_bound+lower_bound)/2;
+        }
+
+    // ------------------------------------------------------------------------------------
+
+
+        kernel_type kernel_function;
+        scalar_type C;
+        scalar_type eps_insensitivity;
+        long cache_size;
+        scalar_type eps;
+    }; // end of class svr_trainer
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename K>
+    void swap (
+        svr_trainer<K>& a,
+        svr_trainer<K>& b
+    ) { a.swap(b); }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SVm_EPSILON_REGRESSION_TRAINER_H__
+
--- a/dlib/svm/svr_trainer_abstract.h
+++ b/dlib/svm/svr_trainer_abstract.h
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_SVm_EPSILON_REGRESSION_TRAINER_ABSTRACT_
+#ifdef DLIB_SVm_EPSILON_REGRESSION_TRAINER_ABSTRACT_
+
+#include <cmath>
+#include <limits>
+#include "../matrix/matrix_abstract.h"
+#include "../algs.h"
+#include "function_abstract.h"
+#include "kernel_abstract.h"
+#include "../optimization/optimization_solve_qp3_using_smo_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K 
+        >
+    class svr_trainer
+    {
+        /*!
+            REQUIREMENTS ON K 
+                is a kernel function object as defined in dlib/svm/kernel_abstract.h 
+
+            WHAT THIS OBJECT REPRESENTS
+                This object implements a trainer for performing epsilon-insensitive support 
+                vector regression.  It is implemented using the SMO algorithm.
+
+                The implementation of the eps-SVR training algorithm used by this object is based
+                on the following paper:
+                    - Chih-Chung Chang and Chih-Jen Lin, LIBSVM : a library for support vector 
+                      machines, 2001. Software available at http://www.csie.ntu.edu.tw/~cjlin/libsvm
+        !*/
+
+    public:
+        typedef K kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        svr_trainer (
+        );
+        /*!
+            ensures
+                - This object is properly initialized and ready to be used
+                  to train a support vector machine.
+                - #get_c() == 1
+                - #get_epsilon_insensitivity() == 0.1
+                - #get_cache_size() == 200
+                - #get_epsilon() == 0.001
+        !*/
+
+        void set_cache_size (
+            long cache_size
+        );
+        /*!
+            requires
+                - cache_size > 0
+            ensures
+                - #get_cache_size() == cache_size 
+        !*/
+
+        const long get_cache_size (
+        ) const;
+        /*!
+            ensures
+                - returns the number of megabytes of cache this object will use
+                  when it performs training via the this->train() function.
+                  (bigger values of this may make training go faster but won't affect 
+                  the result.  However, too big a value will cause you to run out of 
+                  memory, obviously.)
+        !*/
+
+        void set_epsilon (
+            scalar_type eps
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_epsilon() == eps 
+        !*/
+
+        const scalar_type get_epsilon (
+        ) const;
+        /*!
+            ensures
+                - returns the error epsilon that determines when training should stop.
+                  Generally a good value for this is 0.001.  Smaller values may result
+                  in a more accurate solution but take longer to execute.
+        !*/
+
+        void set_epsilon_insensitivity (
+            scalar_type eps
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_epsilon_insensitivity() == eps
+        !*/
+
+        const scalar_type get_epsilon_insensitivity (
+        ) const;
+        /*!
+            ensures
+                - This object tries to find a function which minimizes the
+                  regression error on a training set.  This error is measured
+                  in the following way:
+                    - if (abs(predicted_value - true_labeled_value) < eps) then
+                        - The error is 0.  That is, any function which gets within
+                          eps of the correct output is good enough.
+                    - else
+                        - The error grows linearly once it gets bigger than eps
+                 
+                  So epsilon-insensitive regression means we do regression but 
+                  stop trying to fit a data point once it is "close enough".  
+                  This function returns that eps value which controls what we 
+                  mean by "close enough".
+        !*/
+
+        void set_kernel (
+            const kernel_type& k
+        );
+        /*!
+            ensures
+                - #get_kernel() == k 
+        !*/
+
+        const kernel_type& get_kernel (
+        ) const;
+        /*!
+            ensures
+                - returns a copy of the kernel function in use by this object
+        !*/
+
+        void set_c (
+            scalar_type C 
+        );
+        /*!
+            requires
+                - C > 0
+            ensures
+                - #get_c() == C 
+        !*/
+
+        const scalar_type get_c (
+        ) const;
+        /*!
+            ensures
+                - returns the SVR regularization parameter.  It is the parameter that 
+                  determines the trade-off between trying to reduce the training error 
+                  or allowing more errors but hopefully improving the generalization 
+                  of the resulting decision_function.  Larger values encourage exact 
+                  fitting while smaller values of C may encourage better generalization. 
+        !*/
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const;
+        /*!
+            requires
+                - is_learning_problem(x,y) == true
+                - x == a matrix or something convertible to a matrix via vector_to_matrix().
+                  Also, x should contain sample_type objects.
+                - y == a matrix or something convertible to a matrix via vector_to_matrix().
+                  Also, y should contain scalar_type objects.
+            ensures
+                - performs support vector regression given the training samples in x and 
+                  target values in y.  
+                - returns a decision_function F with the following properties:
+                    - F(new_x) == predicted y value
+        !*/
+
+        void swap (
+            svr_trainer& item
+        );
+        /*!
+            ensures
+                - swaps *this and item
+        !*/
+    }; 
+
+    template <typename K>
+    void swap (
+        svr_trainer<K>& a,
+        svr_trainer<K>& b
+    ) { a.swap(b); }
+    /*!
+        provides a global swap
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SVm_EPSILON_REGRESSION_TRAINER_ABSTRACT_
+
+
+