Added the svr_linear_trainer.

0cdbbe85 · Davis King · 73551a87 · 0cdbbe85 · 0cdbbe85 · 0cdbbe85
Commit 0cdbbe85 authored May 04, 2013 by Davis King
6 changed files
--- a/dlib/svm.h
+++ b/dlib/svm.h
@@ -48,6 +48,7 @@
 #include "svm/sequence_labeler.h"
 #include "svm/assignment_function.h"
 #include "svm/active_learning.h"
+#include "svm/svr_linear_trainer.h"

 #endif // DLIB_SVm_HEADER


--- a/dlib/svm/svr_linear_trainer.h
+++ b/dlib/svm/svr_linear_trainer.h
--- a/dlib/svm/svr_linear_trainer_abstract.h
+++ b/dlib/svm/svr_linear_trainer_abstract.h
+// Copyright (C) 2013  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_SVR_LINEAR_TrAINER_ABSTRACT_H__
+#ifdef DLIB_SVR_LINEAR_TrAINER_ABSTRACT_H__
+
+#include "sparse_vector_abstract.h"
+#include "function_abstract.h"
+#include "kernel_abstract.h"
+#include "../algs.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K 
+        >
+    class svr_linear_trainer
+    {
+        /*!
+            REQUIREMENTS ON K 
+                Is either linear_kernel or sparse_linear_kernel.  
+
+            WHAT THIS OBJECT REPRESENTS
+                This object implements a trainer for performing epsilon-insensitive support
+                vector regression.  It uses the oca optimizer so it is very efficient at
+                solving this problem when linear kernels are used, making it suitable for
+                use with large datasets. 
+                
+                For an introduction to support vector regression see the following paper:
+                    A Tutorial on Support Vector Regression by Alex J. Smola and Bernhard Scholkopf.
+                Note that this object solves the version of support vector regression
+                defined by equation (3) in the paper, except that we incorporate the bias
+                term into the w vector by appending a 1 to the end of each sample.
+        !*/
+
+    public:
+        typedef K kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        svr_linear_trainer (
+        );
+        /*!
+            ensures
+                - This object is properly initialized and ready to be used to train a
+                  ranking support vector machine.
+                - #get_oca() == oca() (i.e. an instance of oca with default parameters) 
+                - #get_c() == 1
+                - #get_epsilon() == 0.01
+                - #get_epsilon_insensitivity() = 0.1
+                - This object will not be verbose unless be_verbose() is called
+                - #get_max_iterations() == 10000
+                - #learns_nonnegative_weights() == false
+                - #forces_last_weight_to_1() == false
+        !*/
+
+        explicit svr_linear_trainer (
+            const scalar_type& C
+        );
+        /*!
+            requires
+                - C > 0
+            ensures
+                - This object is properly initialized and ready to be used to train a
+                  ranking support vector machine.
+                - #get_oca() == oca() (i.e. an instance of oca with default parameters) 
+                - #get_c() == C
+                - #get_epsilon() == 0.01
+                - #get_epsilon_insensitivity() = 0.1
+                - This object will not be verbose unless be_verbose() is called
+                - #get_max_iterations() == 10000
+                - #learns_nonnegative_weights() == false
+                - #forces_last_weight_to_1() == false
+        !*/
+
+        void set_epsilon (
+            scalar_type eps_
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_epsilon() == eps 
+        !*/
+
+        const scalar_type get_epsilon (
+        ) const; 
+        /*!
+            ensures
+                - returns the error epsilon that determines when training should stop.
+                  Smaller values may result in a more accurate solution but take longer to
+                  train.  You can think of this epsilon value as saying "solve the
+                  optimization problem until the average regression error is within epsilon
+                  of its optimal value".  See get_epsilon_insensitivity() below for a
+                  definition of "regression error".
+        !*/
+
+        void set_epsilon_insensitivity (
+            scalar_type eps_
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_epsilon_insensitivity() == eps
+        !*/
+
+        const scalar_type get_epsilon_insensitivity (
+        ) const;
+        /*!
+            ensures
+                - This object tries to find a function which minimizes the regression error
+                  on a training set.  This error is measured in the following way:
+                    - if (abs(predicted_value - true_labeled_value) < eps) then
+                        - The error is 0.  That is, any function which gets within eps of
+                          the correct output is good enough.
+                    - else
+                        - The error grows linearly once it gets bigger than eps.
+                 
+                  So epsilon-insensitive regression means we do regression but stop trying
+                  to fit a data point once it is "close enough".  This function returns
+                  that eps value which controls what we mean by "close enough".
+        !*/
+
+        unsigned long get_max_iterations (
+        ) const; 
+        /*!
+            ensures
+                - returns the maximum number of iterations the SVM optimizer is allowed to
+                  run before it is required to stop and return a result.
+        !*/
+
+        void set_max_iterations (
+            unsigned long max_iter
+        );
+        /*!
+            ensures
+                - #get_max_iterations() == max_iter
+        !*/
+
+        void be_verbose (
+        );
+        /*!
+            ensures
+                - This object will print status messages to standard out so that a user can
+                  observe the progress of the algorithm.
+        !*/
+
+        void be_quiet (
+        );
+        /*!
+            ensures
+                - this object will not print anything to standard out
+        !*/
+
+        bool forces_last_weight_to_1 (
+        ) const;
+        /*!
+            ensures
+                - returns true if this trainer has the constraint that the last weight in
+                  the learned parameter vector must be 1.  This is the weight corresponding
+                  to the feature in the training vectors with the highest dimension.  
+                - Forcing the last weight to 1 also disables the bias and therefore the b
+                  field of the learned decision_function will be 0 when forces_last_weight_to_1() == true.
+        !*/
+
+        void force_last_weight_to_1 (
+            bool should_last_weight_be_1
+        );
+        /*!
+            ensures
+                - #forces_last_weight_to_1() == should_last_weight_be_1
+        !*/
+
+        void set_oca (
+            const oca& item
+        );
+        /*!
+            ensures
+                - #get_oca() == item 
+        !*/
+
+        const oca get_oca (
+        ) const;
+        /*!
+            ensures
+                - returns a copy of the optimizer used to solve the SVM problem.  
+        !*/
+
+        const kernel_type get_kernel (
+        ) const;
+        /*!
+            ensures
+                - returns a copy of the kernel function in use by this object.  Since the
+                  linear kernels don't have any parameters this function just returns
+                  kernel_type()
+        !*/
+
+        bool learns_nonnegative_weights (
+        ) const; 
+        /*!
+            ensures
+                - The output of training is a weight vector and a bias value.  These two
+                  things define the resulting decision function.  That is, the decision
+                  function simply takes the dot product between the learned weight vector
+                  and a test sample, then subtracts the bias value.  Therefore, if
+                  learns_nonnegative_weights() == true then the resulting learned weight
+                  vector will always have non-negative entries.  The bias value may still
+                  be negative though.
+        !*/
+       
+        void set_learns_nonnegative_weights (
+            bool value
+        );
+        /*!
+            ensures
+                - #learns_nonnegative_weights() == value
+        !*/
+
+        void set_c (
+            scalar_type C_ 
+        );
+        /*!
+            requires
+                - C > 0
+            ensures
+                - #get_c() == C 
+        !*/
+
+        const scalar_type get_c (
+        ) const;
+        /*!
+            ensures
+                - returns the SVM regularization parameter.  It is the parameter that
+                  determines the trade off between trying to fit the training data exactly
+                  or allowing more errors but hopefully improving the generalization of the
+                  resulting classifier.  Larger values encourage exact fitting while
+                  smaller values of C may encourage better generalization. 
+        !*/
+
+        const decision_function<kernel_type> train (
+            const std::vector<sample_type>& samples,
+            const std::vector<scalar_type>& targets
+        ) const;
+        /*!
+            requires
+                - is_learning_problem(samples,targets) == true
+            ensures
+                - performs support vector regression given the training samples and targets.  
+                - returns a decision_function F with the following properties:
+                    - F(new_sample) == predicted target value for new_sample
+                    - F.alpha.size() == 1
+                    - F.basis_vectors.size() == 1
+                    - F.alpha(0) == 1
+        !*/
+
+    }; 
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SVR_LINEAR_TrAINER_ABSTRACT_H__
+
+
--- a/dlib/test/CMakeLists.txt
+++ b/dlib/test/CMakeLists.txt
@@ -124,15 +124,16 @@ set (tests
   svm.cpp
   svm_multiclass_linear.cpp
   svm_struct.cpp
+   svr_linear_trainer.cpp
   symmetric_matrix_cache.cpp
   thread_pool.cpp
   threads.cpp
   timer.cpp
   tokenizer.cpp
   trust_region.cpp
-   vectorstream.cpp
   tuple.cpp
   type_safe_union.cpp
+   vectorstream.cpp
   )

 # create a variable called target_name and set it to the string "test"

--- a/dlib/test/makefile
+++ b/dlib/test/makefile
@@ -139,6 +139,7 @@ SRC += svm_c_linear_dcd.cpp
 SRC += svm.cpp
 SRC += svm_multiclass_linear.cpp
 SRC += svm_struct.cpp
+SRC += svr_linear_trainer.cpp
 SRC += symmetric_matrix_cache.cpp
 SRC += thread_pool.cpp
 SRC += threads.cpp

--- a/dlib/test/svr_linear_trainer.cpp
+++ b/dlib/test/svr_linear_trainer.cpp
+// Copyright (C) 2013  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+
+
+#include <dlib/matrix.h>
+#include <sstream>
+#include <string>
+#include <ctime>
+#include <vector>
+#include <dlib/statistics.h>
+
+#include "tester.h"
+#include <dlib/svm.h>
+
+
+namespace  
+{
+
+    using namespace test;
+    using namespace dlib;
+    using namespace std;
+
+    logger dlog("test.svr_linear_trainer");
+
+    typedef matrix<double, 0, 1> sample_type;
+    typedef std::vector<std::pair<unsigned int, double> > sparse_sample_type;
+
+// ----------------------------------------------------------------------------------------
+
+    double sinc(double x)
+    {
+        if (x == 0)
+            return 1;
+        return sin(x)/x;
+    }
+
+    template <typename scalar_type>
+    void test1()
+    {
+        typedef matrix<scalar_type,0,1> sample_type;
+
+        typedef radial_basis_kernel<sample_type> kernel_type;
+
+        print_spinner();
+
+        std::vector<sample_type> samples;
+        std::vector<scalar_type> targets;
+
+        // The first thing we do is pick a few training points from the sinc() function.
+        sample_type m(1);
+        for (scalar_type x = -10; x <= 4; x += 1)
+        {
+            m(0) = x;
+
+            samples.push_back(m);
+            targets.push_back(sinc(x)+1.1);
+        }
+
+        randomize_samples(samples, targets);
+
+        empirical_kernel_map<kernel_type> ekm;
+        ekm.load(kernel_type(0.1), samples);
+
+        for (unsigned long i = 0; i < samples.size(); ++i)
+            samples[i] = ekm.project(samples[i]);
+
+        svr_linear_trainer<linear_kernel<sample_type> > linear_trainer;
+        linear_trainer.set_c(30);
+        linear_trainer.set_epsilon_insensitivity(0.001);
+
+        matrix<double> res = cross_validate_regression_trainer(linear_trainer, samples, targets, 5);
+        dlog << LINFO << "MSE and R-Squared: "<< res;
+        DLIB_TEST(res(0) < 1e-4);
+        DLIB_TEST(res(1) > 0.99);
+
+        dlib::rand rnd;
+
+        samples.clear();
+        targets.clear();
+        std::vector<scalar_type> noisefree_targets;
+        for (scalar_type x = 0; x <= 5; x += 0.1)
+        {
+            m(0) = x;
+            samples.push_back(matrix_cast<scalar_type>(linpiece(m, linspace(0,5,20))));
+            targets.push_back(x*x + rnd.get_random_gaussian());
+            noisefree_targets.push_back(x*x);
+        }
+        linear_trainer.set_learns_nonnegative_weights(true);
+        linear_trainer.set_epsilon_insensitivity(1.0);
+        decision_function<linear_kernel<sample_type> > df2 = linear_trainer.train(samples, targets);
+
+        print_spinner();
+        res = test_regression_function(df2, samples, noisefree_targets);
+        dlog << LINFO << "MSE and R-Squared: "<< res;
+        DLIB_TEST(res(0) < 0.15);
+        DLIB_TEST(res(1) > 0.98);
+        DLIB_TEST(df2.basis_vectors.size()==1);
+        DLIB_TEST(max(df2.basis_vectors(0)) >= 0);
+
+        linear_trainer.force_last_weight_to_1(true);
+        df2 = linear_trainer.train(samples, targets);
+        DLIB_TEST(std::abs(df2.basis_vectors(0)(samples[0].size()-1) - 1.0) < 1e-14);
+
+        res = test_regression_function(df2, samples, noisefree_targets);
+        dlog << LINFO << "MSE and R-Squared: "<< res;
+        DLIB_TEST(res(0) < 0.20);
+        DLIB_TEST(res(1) > 0.98);
+
+
+        // convert into sparse vectors and try it out
+        typedef std::vector<std::pair<unsigned long, scalar_type> > sparse_samp;
+        std::vector<sparse_samp> ssamples;
+        for (unsigned long i = 0; i < samples.size(); ++i)
+        {
+            sparse_samp s;
+            for (long j = 0; j < samples[i].size(); ++j)
+                s.push_back(make_pair(j,samples[i](j)));
+            ssamples.push_back(s);
+        }
+
+        svr_linear_trainer<sparse_linear_kernel<sparse_samp> > strainer;
+        strainer.set_learns_nonnegative_weights(true);
+        strainer.set_epsilon_insensitivity(1.0);
+        strainer.set_c(30);
+        decision_function<sparse_linear_kernel<sparse_samp> > df;
+        df = strainer.train(ssamples, targets);
+        res = test_regression_function(df, ssamples, noisefree_targets);
+        dlog << LINFO << "MSE and R-Squared: "<< res;
+        DLIB_TEST(res(0) < 0.15);
+        DLIB_TEST(res(1) > 0.98);
+        DLIB_TEST(df2.basis_vectors.size()==1);
+        DLIB_TEST(max(sparse_to_dense(df2.basis_vectors(0))) >= 0);
+    }
+
+
+// ----------------------------------------------------------------------------------------
+
+    class tester_svr_linear_trainer : public tester
+    {
+    public:
+        tester_svr_linear_trainer (
+        ) :
+            tester ("test_svr_linear_trainer",
+                    "Runs tests on the svr_linear_trainer.")
+        {}
+
+        void perform_test (
+        )
+        {
+            dlog << LINFO << "TEST double";
+            test1<double>();
+            dlog << LINFO << "TEST float";
+            test1<float>();
+        }
+    } a;
+
+}
+
+
+