Commit 0cdbbe85 authored by Davis King's avatar Davis King

Added the svr_linear_trainer.

parent 73551a87
......@@ -48,6 +48,7 @@
#include "svm/sequence_labeler.h"
#include "svm/assignment_function.h"
#include "svm/active_learning.h"
#include "svm/svr_linear_trainer.h"
#endif // DLIB_SVm_HEADER
......
This diff is collapsed.
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_SVR_LINEAR_TrAINER_ABSTRACT_H__
#ifdef DLIB_SVR_LINEAR_TrAINER_ABSTRACT_H__
#include "sparse_vector_abstract.h"
#include "function_abstract.h"
#include "kernel_abstract.h"
#include "../algs.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename K
>
class svr_linear_trainer
{
/*!
REQUIREMENTS ON K
Is either linear_kernel or sparse_linear_kernel.
WHAT THIS OBJECT REPRESENTS
This object implements a trainer for performing epsilon-insensitive support
vector regression. It uses the oca optimizer so it is very efficient at
solving this problem when linear kernels are used, making it suitable for
use with large datasets.
For an introduction to support vector regression see the following paper:
A Tutorial on Support Vector Regression by Alex J. Smola and Bernhard Scholkopf.
Note that this object solves the version of support vector regression
defined by equation (3) in the paper, except that we incorporate the bias
term into the w vector by appending a 1 to the end of each sample.
!*/
public:
typedef K kernel_type;
typedef typename kernel_type::scalar_type scalar_type;
typedef typename kernel_type::sample_type sample_type;
typedef typename kernel_type::mem_manager_type mem_manager_type;
typedef decision_function<kernel_type> trained_function_type;
svr_linear_trainer (
);
/*!
ensures
- This object is properly initialized and ready to be used to train a
ranking support vector machine.
- #get_oca() == oca() (i.e. an instance of oca with default parameters)
- #get_c() == 1
- #get_epsilon() == 0.01
- #get_epsilon_insensitivity() = 0.1
- This object will not be verbose unless be_verbose() is called
- #get_max_iterations() == 10000
- #learns_nonnegative_weights() == false
- #forces_last_weight_to_1() == false
!*/
explicit svr_linear_trainer (
const scalar_type& C
);
/*!
requires
- C > 0
ensures
- This object is properly initialized and ready to be used to train a
ranking support vector machine.
- #get_oca() == oca() (i.e. an instance of oca with default parameters)
- #get_c() == C
- #get_epsilon() == 0.01
- #get_epsilon_insensitivity() = 0.1
- This object will not be verbose unless be_verbose() is called
- #get_max_iterations() == 10000
- #learns_nonnegative_weights() == false
- #forces_last_weight_to_1() == false
!*/
void set_epsilon (
scalar_type eps_
);
/*!
requires
- eps > 0
ensures
- #get_epsilon() == eps
!*/
const scalar_type get_epsilon (
) const;
/*!
ensures
- returns the error epsilon that determines when training should stop.
Smaller values may result in a more accurate solution but take longer to
train. You can think of this epsilon value as saying "solve the
optimization problem until the average regression error is within epsilon
of its optimal value". See get_epsilon_insensitivity() below for a
definition of "regression error".
!*/
void set_epsilon_insensitivity (
scalar_type eps_
);
/*!
requires
- eps > 0
ensures
- #get_epsilon_insensitivity() == eps
!*/
const scalar_type get_epsilon_insensitivity (
) const;
/*!
ensures
- This object tries to find a function which minimizes the regression error
on a training set. This error is measured in the following way:
- if (abs(predicted_value - true_labeled_value) < eps) then
- The error is 0. That is, any function which gets within eps of
the correct output is good enough.
- else
- The error grows linearly once it gets bigger than eps.
So epsilon-insensitive regression means we do regression but stop trying
to fit a data point once it is "close enough". This function returns
that eps value which controls what we mean by "close enough".
!*/
unsigned long get_max_iterations (
) const;
/*!
ensures
- returns the maximum number of iterations the SVM optimizer is allowed to
run before it is required to stop and return a result.
!*/
void set_max_iterations (
unsigned long max_iter
);
/*!
ensures
- #get_max_iterations() == max_iter
!*/
void be_verbose (
);
/*!
ensures
- This object will print status messages to standard out so that a user can
observe the progress of the algorithm.
!*/
void be_quiet (
);
/*!
ensures
- this object will not print anything to standard out
!*/
bool forces_last_weight_to_1 (
) const;
/*!
ensures
- returns true if this trainer has the constraint that the last weight in
the learned parameter vector must be 1. This is the weight corresponding
to the feature in the training vectors with the highest dimension.
- Forcing the last weight to 1 also disables the bias and therefore the b
field of the learned decision_function will be 0 when forces_last_weight_to_1() == true.
!*/
void force_last_weight_to_1 (
bool should_last_weight_be_1
);
/*!
ensures
- #forces_last_weight_to_1() == should_last_weight_be_1
!*/
void set_oca (
const oca& item
);
/*!
ensures
- #get_oca() == item
!*/
const oca get_oca (
) const;
/*!
ensures
- returns a copy of the optimizer used to solve the SVM problem.
!*/
const kernel_type get_kernel (
) const;
/*!
ensures
- returns a copy of the kernel function in use by this object. Since the
linear kernels don't have any parameters this function just returns
kernel_type()
!*/
bool learns_nonnegative_weights (
) const;
/*!
ensures
- The output of training is a weight vector and a bias value. These two
things define the resulting decision function. That is, the decision
function simply takes the dot product between the learned weight vector
and a test sample, then subtracts the bias value. Therefore, if
learns_nonnegative_weights() == true then the resulting learned weight
vector will always have non-negative entries. The bias value may still
be negative though.
!*/
void set_learns_nonnegative_weights (
bool value
);
/*!
ensures
- #learns_nonnegative_weights() == value
!*/
void set_c (
scalar_type C_
);
/*!
requires
- C > 0
ensures
- #get_c() == C
!*/
const scalar_type get_c (
) const;
/*!
ensures
- returns the SVM regularization parameter. It is the parameter that
determines the trade off between trying to fit the training data exactly
or allowing more errors but hopefully improving the generalization of the
resulting classifier. Larger values encourage exact fitting while
smaller values of C may encourage better generalization.
!*/
const decision_function<kernel_type> train (
const std::vector<sample_type>& samples,
const std::vector<scalar_type>& targets
) const;
/*!
requires
- is_learning_problem(samples,targets) == true
ensures
- performs support vector regression given the training samples and targets.
- returns a decision_function F with the following properties:
- F(new_sample) == predicted target value for new_sample
- F.alpha.size() == 1
- F.basis_vectors.size() == 1
- F.alpha(0) == 1
!*/
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SVR_LINEAR_TrAINER_ABSTRACT_H__
......@@ -124,15 +124,16 @@ set (tests
svm.cpp
svm_multiclass_linear.cpp
svm_struct.cpp
svr_linear_trainer.cpp
symmetric_matrix_cache.cpp
thread_pool.cpp
threads.cpp
timer.cpp
tokenizer.cpp
trust_region.cpp
vectorstream.cpp
tuple.cpp
type_safe_union.cpp
vectorstream.cpp
)
# create a variable called target_name and set it to the string "test"
......
......@@ -139,6 +139,7 @@ SRC += svm_c_linear_dcd.cpp
SRC += svm.cpp
SRC += svm_multiclass_linear.cpp
SRC += svm_struct.cpp
SRC += svr_linear_trainer.cpp
SRC += symmetric_matrix_cache.cpp
SRC += thread_pool.cpp
SRC += threads.cpp
......
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#include <dlib/matrix.h>
#include <sstream>
#include <string>
#include <ctime>
#include <vector>
#include <dlib/statistics.h>
#include "tester.h"
#include <dlib/svm.h>
namespace
{
using namespace test;
using namespace dlib;
using namespace std;
logger dlog("test.svr_linear_trainer");
typedef matrix<double, 0, 1> sample_type;
typedef std::vector<std::pair<unsigned int, double> > sparse_sample_type;
// ----------------------------------------------------------------------------------------
double sinc(double x)
{
if (x == 0)
return 1;
return sin(x)/x;
}
template <typename scalar_type>
void test1()
{
typedef matrix<scalar_type,0,1> sample_type;
typedef radial_basis_kernel<sample_type> kernel_type;
print_spinner();
std::vector<sample_type> samples;
std::vector<scalar_type> targets;
// The first thing we do is pick a few training points from the sinc() function.
sample_type m(1);
for (scalar_type x = -10; x <= 4; x += 1)
{
m(0) = x;
samples.push_back(m);
targets.push_back(sinc(x)+1.1);
}
randomize_samples(samples, targets);
empirical_kernel_map<kernel_type> ekm;
ekm.load(kernel_type(0.1), samples);
for (unsigned long i = 0; i < samples.size(); ++i)
samples[i] = ekm.project(samples[i]);
svr_linear_trainer<linear_kernel<sample_type> > linear_trainer;
linear_trainer.set_c(30);
linear_trainer.set_epsilon_insensitivity(0.001);
matrix<double> res = cross_validate_regression_trainer(linear_trainer, samples, targets, 5);
dlog << LINFO << "MSE and R-Squared: "<< res;
DLIB_TEST(res(0) < 1e-4);
DLIB_TEST(res(1) > 0.99);
dlib::rand rnd;
samples.clear();
targets.clear();
std::vector<scalar_type> noisefree_targets;
for (scalar_type x = 0; x <= 5; x += 0.1)
{
m(0) = x;
samples.push_back(matrix_cast<scalar_type>(linpiece(m, linspace(0,5,20))));
targets.push_back(x*x + rnd.get_random_gaussian());
noisefree_targets.push_back(x*x);
}
linear_trainer.set_learns_nonnegative_weights(true);
linear_trainer.set_epsilon_insensitivity(1.0);
decision_function<linear_kernel<sample_type> > df2 = linear_trainer.train(samples, targets);
print_spinner();
res = test_regression_function(df2, samples, noisefree_targets);
dlog << LINFO << "MSE and R-Squared: "<< res;
DLIB_TEST(res(0) < 0.15);
DLIB_TEST(res(1) > 0.98);
DLIB_TEST(df2.basis_vectors.size()==1);
DLIB_TEST(max(df2.basis_vectors(0)) >= 0);
linear_trainer.force_last_weight_to_1(true);
df2 = linear_trainer.train(samples, targets);
DLIB_TEST(std::abs(df2.basis_vectors(0)(samples[0].size()-1) - 1.0) < 1e-14);
res = test_regression_function(df2, samples, noisefree_targets);
dlog << LINFO << "MSE and R-Squared: "<< res;
DLIB_TEST(res(0) < 0.20);
DLIB_TEST(res(1) > 0.98);
// convert into sparse vectors and try it out
typedef std::vector<std::pair<unsigned long, scalar_type> > sparse_samp;
std::vector<sparse_samp> ssamples;
for (unsigned long i = 0; i < samples.size(); ++i)
{
sparse_samp s;
for (long j = 0; j < samples[i].size(); ++j)
s.push_back(make_pair(j,samples[i](j)));
ssamples.push_back(s);
}
svr_linear_trainer<sparse_linear_kernel<sparse_samp> > strainer;
strainer.set_learns_nonnegative_weights(true);
strainer.set_epsilon_insensitivity(1.0);
strainer.set_c(30);
decision_function<sparse_linear_kernel<sparse_samp> > df;
df = strainer.train(ssamples, targets);
res = test_regression_function(df, ssamples, noisefree_targets);
dlog << LINFO << "MSE and R-Squared: "<< res;
DLIB_TEST(res(0) < 0.15);
DLIB_TEST(res(1) > 0.98);
DLIB_TEST(df2.basis_vectors.size()==1);
DLIB_TEST(max(sparse_to_dense(df2.basis_vectors(0))) >= 0);
}
// ----------------------------------------------------------------------------------------
class tester_svr_linear_trainer : public tester
{
public:
tester_svr_linear_trainer (
) :
tester ("test_svr_linear_trainer",
"Runs tests on the svr_linear_trainer.")
{}
void perform_test (
)
{
dlog << LINFO << "TEST double";
test1<double>();
dlog << LINFO << "TEST float";
test1<float>();
}
} a;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment