Commit 9c02764a authored by Davis King's avatar Davis King

Added the svm_c_trainer. It uses the solve_qp3_using_smo optimizer I just added.

--HG--
extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%404004
parent 2b9c85ee
......@@ -24,6 +24,7 @@
#include "svm/simplify_linear_decision_function.h"
#include "svm/krr_trainer.h"
#include "svm/sort_basis_vectors.h"
#include "svm/svm_c_trainer.h"
#endif // DLIB_SVm_HEADER
......
// Copyright (C) 2007 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_SVm_C_TRAINER_H__
#define DLIB_SVm_C_TRAINER_H__
//#include "local/make_label_kernel_matrix.h"
#include "svm_c_trainer_abstract.h"
#include "calculate_rho_and_b.h"
#include <cmath>
#include <limits>
#include <sstream>
#include "../matrix.h"
#include "../algs.h"
#include "function.h"
#include "kernel.h"
#include "../optimization/optimization_solve_qp3_using_smo.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename K
>
class svm_c_trainer
{
public:
typedef K kernel_type;
typedef typename kernel_type::scalar_type scalar_type;
typedef typename kernel_type::sample_type sample_type;
typedef typename kernel_type::mem_manager_type mem_manager_type;
typedef decision_function<kernel_type> trained_function_type;
svm_c_trainer (
) :
Cpos(1),
Cneg(1),
cache_size(200),
eps(0.001)
{
}
svm_c_trainer (
const kernel_type& kernel_,
const scalar_type& C_
) :
kernel_function(kernel_),
Cpos(C_),
Cneg(C_),
cache_size(200),
eps(0.001)
{
// make sure requires clause is not broken
DLIB_ASSERT(0 < C_,
"\tsvm_c_trainer::svm_c_trainer(kernel,C)"
<< "\n\t invalid inputs were given to this function"
<< "\n\t C_: " << C_
);
}
void set_cache_size (
long cache_size_
)
{
// make sure requires clause is not broken
DLIB_ASSERT(cache_size_ > 0,
"\tvoid svm_c_trainer::set_cache_size(cache_size_)"
<< "\n\t invalid inputs were given to this function"
<< "\n\t cache_size: " << cache_size_
);
cache_size = cache_size_;
}
long get_cache_size (
) const
{
return cache_size;
}
void set_epsilon (
scalar_type eps_
)
{
// make sure requires clause is not broken
DLIB_ASSERT(eps_ > 0,
"\tvoid svm_c_trainer::set_epsilon(eps_)"
<< "\n\t invalid inputs were given to this function"
<< "\n\t eps_: " << eps_
);
eps = eps_;
}
const scalar_type get_epsilon (
) const
{
return eps;
}
void set_kernel (
const kernel_type& k
)
{
kernel_function = k;
}
const kernel_type& get_kernel (
) const
{
return kernel_function;
}
void set_c (
scalar_type C
)
{
// make sure requires clause is not broken
DLIB_ASSERT(C > 0,
"\t void svm_c_trainer::set_c()"
<< "\n\t C must be greater than 0"
<< "\n\t C: " << C
<< "\n\t this: " << this
);
Cpos = C;
Cneg = C;
}
const scalar_type get_c_class1 (
) const
{
return Cpos;
}
const scalar_type get_c_class2 (
) const
{
return Cneg;
}
void set_c_class1 (
scalar_type C
)
{
// make sure requires clause is not broken
DLIB_ASSERT(C > 0,
"\t void svm_c_trainer::set_c_class1()"
<< "\n\t C must be greater than 0"
<< "\n\t C: " << C
<< "\n\t this: " << this
);
Cpos = C;
}
void set_c_class2 (
scalar_type C
)
{
// make sure requires clause is not broken
DLIB_ASSERT(C > 0,
"\t void svm_c_trainer::set_c_class2()"
<< "\n\t C must be greater than 0"
<< "\n\t C: " << C
<< "\n\t this: " << this
);
Cneg = C;
}
template <
typename in_sample_vector_type,
typename in_scalar_vector_type
>
const decision_function<kernel_type> train (
const in_sample_vector_type& x,
const in_scalar_vector_type& y
) const
{
return do_train(vector_to_matrix(x), vector_to_matrix(y));
}
void swap (
svm_c_trainer& item
)
{
exchange(kernel_function, item.kernel_function);
exchange(Cpos, item.Cpos);
exchange(Cneg, item.Cneg);
exchange(cache_size, item.cache_size);
exchange(eps, item.eps);
}
private:
// ------------------------------------------------------------------------------------
template <
typename in_sample_vector_type,
typename in_scalar_vector_type
>
const decision_function<kernel_type> do_train (
const in_sample_vector_type& x,
const in_scalar_vector_type& y
) const
{
typedef typename K::scalar_type scalar_type;
typedef typename decision_function<K>::sample_vector_type sample_vector_type;
typedef typename decision_function<K>::scalar_vector_type scalar_vector_type;
// make sure requires clause is not broken
DLIB_ASSERT(is_binary_classification_problem(x,y) == true,
"\tdecision_function svm_c_trainer::train(x,y)"
<< "\n\t invalid inputs were given to this function"
<< "\n\t x.nr(): " << x.nr()
<< "\n\t y.nr(): " << y.nr()
<< "\n\t x.nc(): " << x.nc()
<< "\n\t y.nc(): " << y.nc()
<< "\n\t is_binary_classification_problem(x,y): " << is_binary_classification_problem(x,y)
);
scalar_vector_type alpha;
solve_qp3_using_smo<scalar_vector_type> solver;
solver(symmetric_matrix_cache<float>((diagm(y)*kernel_matrix(kernel_function,x)*diagm(y)), cache_size),
//solver(symmetric_matrix_cache<float>(make_label_kernel_matrix(kernel_matrix(kernel_function,x),y), cache_size),
uniform_matrix<scalar_type>(y.size(),1,-1),
y,
0,
Cpos,
Cneg,
alpha,
eps);
scalar_type rho, b;
calculate_rho_and_b(y,alpha,solver.get_gradient(),rho,b);
alpha = pointwise_multiply(alpha,y);
// count the number of support vectors
const long sv_count = (long)sum(alpha != 0);
scalar_vector_type sv_alpha;
sample_vector_type support_vectors;
// size these column vectors so that they have an entry for each support vector
sv_alpha.set_size(sv_count);
support_vectors.set_size(sv_count);
// load the support vectors and their alpha values into these new column matrices
long idx = 0;
for (long i = 0; i < alpha.nr(); ++i)
{
if (alpha(i) != 0)
{
sv_alpha(idx) = alpha(i);
support_vectors(idx) = x(i);
++idx;
}
}
// now return the decision function
return decision_function<K> (sv_alpha, b*rho, kernel_function, support_vectors);
}
// ------------------------------------------------------------------------------------
kernel_type kernel_function;
scalar_type Cpos;
scalar_type Cneg;
long cache_size;
scalar_type eps;
}; // end of class svm_c_trainer
// ----------------------------------------------------------------------------------------
template <typename K>
void swap (
svm_c_trainer<K>& a,
svm_c_trainer<K>& b
) { a.swap(b); }
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SVm_C_TRAINER_H__
// Copyright (C) 2007 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_SVm_C_TRAINER_ABSTRACT_
#ifdef DLIB_SVm_C_TRAINER_ABSTRACT_
#include <cmath>
#include <limits>
#include <sstream>
#include "../matrix/matrix_abstract.h"
#include "../algs.h"
#include "function_abstract.h"
#include "kernel_abstract.h"
#include "../optimization/optimization_solve_qp3_using_smo_abstract.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename K
>
class svm_c_trainer
{
/*!
REQUIREMENTS ON K
is a kernel function object as defined in dlib/svm/kernel_abstract.h
WHAT THIS OBJECT REPRESENTS
This object implements a trainer for a C support vector machine for
solving binary classification problems. It is implemented using the SMO
algorithm.
The implementation of the C-SVM training algorithm used by this object is based
on the following paper:
- Chih-Chung Chang and Chih-Jen Lin, LIBSVM : a library for support vector
machines, 2001. Software available at http://www.csie.ntu.edu.tw/~cjlin/libsvm
!*/
public:
typedef K kernel_type;
typedef typename kernel_type::scalar_type scalar_type;
typedef typename kernel_type::sample_type sample_type;
typedef typename kernel_type::mem_manager_type mem_manager_type;
typedef decision_function<kernel_type> trained_function_type;
svm_c_trainer (
);
/*!
ensures
- This object is properly initialized and ready to be used
to train a support vector machine.
- #get_c_class1() == 1
- #get_c_class2() == 1
- #get_cache_size() == 200
- #get_epsilon() == 0.001
!*/
svm_c_trainer (
const kernel_type& kernel,
const scalar_type& C
);
/*!
requires
- 0 < C
ensures
- This object is properly initialized and ready to be used
to train a support vector machine.
- #get_kernel() == kernel
- #get_c_class1() == C
- #get_c_class2() == C
- #get_cache_size() == 200
- #get_epsilon() == 0.001
!*/
void set_cache_size (
long cache_size
);
/*!
requires
- cache_size > 0
ensures
- #get_cache_size() == cache_size
!*/
const long get_cache_size (
) const;
/*!
ensures
- returns the number of megabytes of cache this object will use
when it performs training via the this->train() function.
(bigger values of this may make training go faster but won't affect
the result. However, too big a value will cause you to run out of
memory, obviously.)
!*/
void set_epsilon (
scalar_type eps
);
/*!
requires
- eps > 0
ensures
- #get_epsilon() == eps
!*/
const scalar_type get_epsilon (
) const;
/*!
ensures
- returns the error epsilon that determines when training should stop.
Generally a good value for this is 0.001. Smaller values may result
in a more accurate solution but take longer to execute.
!*/
void set_kernel (
const kernel_type& k
);
/*!
ensures
- #get_kernel() == k
!*/
const kernel_type& get_kernel (
) const;
/*!
ensures
- returns a copy of the kernel function in use by this object
!*/
void set_c (
scalar_type C
);
/*!
requires
- C > 0
ensures
- #get_c_class1() == C
- #get_c_class2() == C
!*/
const scalar_type get_c_class1 (
) const;
/*!
ensures
- returns the SVM regularization parameter for the +1 class.
It is the parameter that determines the trade off between
trying to fit the +1 training data exactly or allowing more errors
but hopefully improving the generalization ability of the
resulting classifier. Larger values encourage exact fitting
while smaller values of C may encourage better generalization.
!*/
const scalar_type get_c_class2 (
) const;
/*!
ensures
- returns the SVM regularization parameter for the -1 class.
It is the parameter that determines the trade off between
trying to fit the -1 training data exactly or allowing more errors
but hopefully improving the generalization ability of the
resulting classifier. Larger values encourage exact fitting
while smaller values of C may encourage better generalization.
!*/
void set_c_class1 (
scalar_type C
);
/*!
requires
- C > 0
ensures
- #get_c_class1() == C
!*/
void set_c_class2 (
scalar_type C
);
/*!
requires
- C > 0
ensures
- #get_c_class2() == C
!*/
template <
typename in_sample_vector_type,
typename in_scalar_vector_type
>
const decision_function<kernel_type> train (
const in_sample_vector_type& x,
const in_scalar_vector_type& y
) const;
/*!
requires
- is_binary_classification_problem(x,y) == true
- x == a matrix or something convertible to a matrix via vector_to_matrix().
Also, x should contain sample_type objects.
- y == a matrix or something convertible to a matrix via vector_to_matrix().
Also, y should contain scalar_type objects.
ensures
- trains a C support vector classifier given the training samples in x and
labels in y. Training is done when the error is less than get_epsilon().
- returns a decision function F with the following properties:
- if (new_x is a sample predicted have +1 label) then
- F(new_x) >= 0
- else
- F(new_x) < 0
!*/
void swap (
svm_c_trainer& item
);
/*!
ensures
- swaps *this and item
!*/
};
template <typename K>
void swap (
svm_c_trainer<K>& a,
svm_c_trainer<K>& b
) { a.swap(b); }
/*!
provides a global swap
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SVm_C_TRAINER_ABSTRACT_
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment