Commit b7a02418 authored by Davis King's avatar Davis King

Refactored the svm_nu_trainer. Specifically, I pulled the quadratic

solver out and made it a separate class.  The kernel_matrix_cache has also
been removed in favor of the new symmetric_matrix_cache.  Finally, the
remaining bits of the svm_nu_trainer have been moved into svm_nu_trainer.h

Also note that invalid_svm_nu_error has been renamed to invalid_nu_error.

--HG--
extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403994
parent dbba600f
......@@ -6,6 +6,7 @@
#include "optimization/optimization.h"
#include "optimization/optimization_bobyqa.h"
#include "optimization/optimization_solve_qp_using_smo.h"
#include "optimization/optimization_solve_qp2_using_smo.h"
#include "optimization/optimization_oca.h"
#include "optimization/optimization_trust_region.h"
#include "optimization/optimization_least_squares.h"
......
// Copyright (C) 2007 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_SOLVE_QP2_USING_SMo_H__
#define DLIB_SOLVE_QP2_USING_SMo_H__
#include "optimization_solve_qp2_using_smo_abstract.h"
#include <cmath>
#include <limits>
#include <sstream>
#include "../matrix.h"
#include "../algs.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
class invalid_nu_error : public dlib::error
{
public:
invalid_nu_error(const std::string& msg, double nu_) : dlib::error(msg), nu(nu_) {};
const double nu;
};
// ----------------------------------------------------------------------------------------
template <
typename T
>
typename T::type maximum_nu_impl (
const T& y
)
{
typedef typename T::type scalar_type;
// make sure requires clause is not broken
DLIB_ASSERT(y.size() > 1 && is_col_vector(y),
"\ttypedef T::type maximum_nu(y)"
<< "\n\ty should be a column vector with more than one entry"
<< "\n\ty.nr(): " << y.nr()
<< "\n\ty.nc(): " << y.nc()
);
long pos_count = 0;
long neg_count = 0;
for (long r = 0; r < y.nr(); ++r)
{
if (y(r) == 1.0)
{
++pos_count;
}
else if (y(r) == -1.0)
{
++neg_count;
}
else
{
// make sure requires clause is not broken
DLIB_ASSERT(y(r) == -1.0 || y(r) == 1.0,
"\ttypedef T::type maximum_nu(y)"
<< "\n\ty should contain only 1 and 0 entries"
<< "\n\tr: " << r
<< "\n\ty(r): " << y(r)
);
}
}
return static_cast<scalar_type>(2.0*(scalar_type)std::min(pos_count,neg_count)/(scalar_type)y.nr());
}
template <
typename T
>
typename T::type maximum_nu (
const T& y
)
{
return maximum_nu_impl(vector_to_matrix(y));
}
template <
typename T
>
typename T::value_type maximum_nu (
const T& y
)
{
return maximum_nu_impl(vector_to_matrix(y));
}
// ----------------------------------------------------------------------------------------
template <
typename matrix_type
>
class solve_qp2_using_smo
{
public:
typedef typename matrix_type::mem_manager_type mem_manager_type;
typedef typename matrix_type::type scalar_type;
typedef typename matrix_type::layout_type layout_type;
typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix;
typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix;
template <
typename EXP1,
typename EXP2,
long NR
>
void operator() (
const matrix_exp<EXP1>& Q,
const matrix_exp<EXP2>& y,
const scalar_type nu,
matrix<scalar_type,NR,1,mem_manager_type, layout_type>& alpha,
scalar_type eps
)
{
DLIB_ASSERT(Q.nr() == Q.nc() && y.size() == Q.nr() && y.size() > 1 && is_col_vector(y) &&
sum((y == +1) + (y == -1)) == y.size() &&
0 < nu && nu <= 1 &&
eps > 0,
"\t void solve_qp2_using_smo::operator()"
<< "\n\t invalid arguments were given to this function"
<< "\n\t Q.nr(): " << Q.nr()
<< "\n\t Q.nc(): " << Q.nc()
<< "\n\t is_col_vector(y): " << is_col_vector(y)
<< "\n\t y.size(): " << y.size()
<< "\n\t sum((y == +1) + (y == -1)): " << sum((y == +1) + (y == -1))
<< "\n\t nu: " << nu
<< "\n\t eps: " << eps
);
alpha.set_size(Q.nr(),1);
df.set_size(Q.nr());
// now initialize alpha
set_initial_alpha(y, nu, alpha);
const scalar_type tau = 1e-12;
typedef typename colm_exp<EXP1>::type col_type;
set_all_elements(df, 0);
// initialize df. Compute df = Q*alpha
for (long r = 0; r < df.nr(); ++r)
{
if (alpha(r) != 0)
{
df += alpha(r)*matrix_cast<scalar_type>(colm(Q,r));
}
}
// now perform the actual optimization of alpha
long i=0, j=0;
while (find_working_group(y,alpha,Q,df,tau,eps,i,j))
{
const scalar_type old_alpha_i = alpha(i);
const scalar_type old_alpha_j = alpha(j);
optimize_working_pair(alpha,Q,df,tau,i,j);
// update the df vector now that we have modified alpha(i) and alpha(j)
scalar_type delta_alpha_i = alpha(i) - old_alpha_i;
scalar_type delta_alpha_j = alpha(j) - old_alpha_j;
col_type Q_i = colm(Q,i);
col_type Q_j = colm(Q,j);
for(long k = 0; k < df.nr(); ++k)
df(k) += Q_i(k)*delta_alpha_i + Q_j(k)*delta_alpha_j;
}
}
const column_matrix& get_gradient (
) const { return df; }
private:
// -------------------------------------------------------------------------------------
template <
typename scalar_type,
typename scalar_vector_type,
typename scalar_vector_type2
>
inline void set_initial_alpha (
const scalar_vector_type& y,
const scalar_type nu,
scalar_vector_type2& alpha
) const
{
set_all_elements(alpha,0);
const scalar_type l = y.nr();
scalar_type temp = nu*l/2;
long num = (long)std::floor(temp);
long num_total = (long)std::ceil(temp);
bool has_slack = false;
int count = 0;
for (int i = 0; i < alpha.nr(); ++i)
{
if (y(i) == 1)
{
if (count < num)
{
++count;
alpha(i) = 1;
}
else
{
has_slack = true;
if (num_total > num)
{
++count;
alpha(i) = temp - std::floor(temp);
}
break;
}
}
}
if (count != num_total || has_slack == false)
{
std::ostringstream sout;
sout << "Invalid nu of " << nu << ". It is required that: 0 < nu < " << 2*(scalar_type)count/y.nr();
throw invalid_nu_error(sout.str(),nu);
}
has_slack = false;
count = 0;
for (int i = 0; i < alpha.nr(); ++i)
{
if (y(i) == -1)
{
if (count < num)
{
++count;
alpha(i) = 1;
}
else
{
has_slack = true;
if (num_total > num)
{
++count;
alpha(i) = temp - std::floor(temp);
}
break;
}
}
}
if (count != num_total || has_slack == false)
{
std::ostringstream sout;
sout << "Invalid nu of " << nu << ". It is required that: 0 < nu < " << 2*(scalar_type)count/y.nr();
throw invalid_nu_error(sout.str(),nu);
}
}
// ------------------------------------------------------------------------------------
template <
typename scalar_vector_type,
typename scalar_type,
typename EXP,
typename U, typename V
>
inline bool find_working_group (
const V& y,
const U& alpha,
const matrix_exp<EXP>& Q,
const scalar_vector_type& df,
const scalar_type tau,
const scalar_type eps,
long& i_out,
long& j_out
) const
{
using namespace std;
long ip = 0;
long jp = 0;
long in = 0;
long jn = 0;
typedef typename colm_exp<EXP>::type col_type;
typedef typename diag_exp<EXP>::type diag_type;
scalar_type ip_val = -numeric_limits<scalar_type>::infinity();
scalar_type jp_val = numeric_limits<scalar_type>::infinity();
scalar_type in_val = -numeric_limits<scalar_type>::infinity();
scalar_type jn_val = numeric_limits<scalar_type>::infinity();
// loop over the alphas and find the maximum ip and in indices.
for (long i = 0; i < alpha.nr(); ++i)
{
if (y(i) == 1)
{
if (alpha(i) < 1.0)
{
if (-df(i) > ip_val)
{
ip_val = -df(i);
ip = i;
}
}
}
else
{
if (alpha(i) > 0.0)
{
if (df(i) > in_val)
{
in_val = df(i);
in = i;
}
}
}
}
scalar_type Mp = numeric_limits<scalar_type>::infinity();
scalar_type Mn = numeric_limits<scalar_type>::infinity();
// Pick out the columns and diagonal of Q we need below. Doing
// it this way is faster if Q is actually a symmetric_matrix_cache
// object.
col_type Q_ip = colm(Q,ip);
col_type Q_in = colm(Q,in);
diag_type Q_diag = diag(Q);
// now we need to find the minimum jp and jn indices
for (long j = 0; j < alpha.nr(); ++j)
{
if (y(j) == 1)
{
if (alpha(j) > 0.0)
{
scalar_type b = ip_val + df(j);
if (-df(j) < Mp)
Mp = -df(j);
if (b > 0)
{
scalar_type a = Q_ip(ip) + Q_diag(j) - 2*Q_ip(j);
if (a <= 0)
a = tau;
scalar_type temp = -b*b/a;
if (temp < jp_val)
{
jp_val = temp;
jp = j;
}
}
}
}
else
{
if (alpha(j) < 1.0)
{
scalar_type b = in_val - df(j);
if (df(j) < Mn)
Mn = df(j);
if (b > 0)
{
scalar_type a = Q_in(in) + Q_diag(j) - 2*Q_in(j);
if (a <= 0)
a = tau;
scalar_type temp = -b*b/a;
if (temp < jn_val)
{
jn_val = temp;
jn = j;
}
}
}
}
}
// if we are at the optimal point then return false so the caller knows
// to stop optimizing
if (std::max(ip_val - Mp, in_val - Mn) < eps)
return false;
if (jp_val < jn_val)
{
i_out = ip;
j_out = jp;
}
else
{
i_out = in;
j_out = jn;
}
if (j_out >= 0 && i_out >= 0)
return true;
else
return false;
}
// ------------------------------------------------------------------------------------
template <
typename EXP,
typename T, typename U
>
inline void optimize_working_pair (
T& alpha,
const matrix_exp<EXP>& Q,
const U& df,
const scalar_type tau,
const long i,
const long j
) const
{
scalar_type quad_coef = Q(i,i)+Q(j,j)-2*Q(j,i);
if (quad_coef <= 0)
quad_coef = tau;
scalar_type delta = (df(i)-df(j))/quad_coef;
scalar_type sum = alpha(i) + alpha(j);
alpha(i) -= delta;
alpha(j) += delta;
if(sum > 1)
{
if(alpha(i) > 1)
{
alpha(i) = 1;
alpha(j) = sum - 1;
}
else if(alpha(j) > 1)
{
alpha(j) = 1;
alpha(i) = sum - 1;
}
}
else
{
if(alpha(j) < 0)
{
alpha(j) = 0;
alpha(i) = sum;
}
else if(alpha(i) < 0)
{
alpha(i) = 0;
alpha(j) = sum;
}
}
}
// ------------------------------------------------------------------------------------
column_matrix df; // gradient of f(alpha)
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SOLVE_QP2_USING_SMo_H__
// Copyright (C) 2007 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_OPTIMIZATION_SOLVE_QP2_USING_SMO_ABSTRACT_H_
#ifdef DLIB_OPTIMIZATION_SOLVE_QP2_USING_SMO_ABSTRACT_H_
#include "../matrix/matrix_abstract.h"
#include "../algs.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
class invalid_nu_error : public dlib::error
{
/*!
WHAT THIS OBJECT REPRESENTS
This object is an exception class used to indicate that a
value of nu given to the solve_qp2_using_smo object is incompatible
with the constraints of the quadratic program.
this->nu will be set to the invalid value of nu used.
!*/
public:
invalid_nu_error(const std::string& msg, double nu_) : dlib::error(msg), nu(nu_) {};
const double nu;
};
// ----------------------------------------------------------------------------------------
template <
typename T
>
typename T::type maximum_nu (
const T& y
);
/*!
requires
- T == a matrix object or an object convertible to a matrix via vector_to_matrix()
- is_col_vector(y) == true
- y.size() > 1
- sum((y == +1) + (y == -1)) == y.size()
(i.e. all elements of y must be equal to +1 or -1)
ensures
- returns the maximum valid nu that can be used with solve_qp2_using_smo and
the given y vector.
(i.e. 2.0*min(sum(y == +1), sum(y == -1))/y.size())
!*/
// ----------------------------------------------------------------------------------------
template <
typename matrix_type
>
class solve_qp2_using_smo
{
/*!
REQUIREMENTS ON matrix_type
Must be some type of dlib::matrix.
WHAT THIS OBJECT REPRESENTS
This object is a tool for solving the following quadratic programming
problem using the sequential minimal optimization algorithm:
Minimize: f(alpha) == 0.5*trans(alpha)*Q*alpha
subject to the following constraints:
- sum(alpha) == nu*y.size()
- 0 <= min(alpha) && max(alpha) <= 1
- trans(y)*alpha == 0
Where f is convex. This means that Q should be symmetric and positive-semidefinite.
This object implements the strategy used by the LIBSVM tool and described
by the following papers:
- Chang and Lin, Training {nu}-Support Vector Classifiers: Theory and Algorithms
- Chih-Chung Chang and Chih-Jen Lin, LIBSVM : a library for support vector
machines, 2001. Software available at http://www.csie.ntu.edu.tw/~cjlin/libsvm
!*/
public:
typedef typename matrix_type::mem_manager_type mem_manager_type;
typedef typename matrix_type::type scalar_type;
typedef typename matrix_type::layout_type layout_type;
typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix;
typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix;
template <
typename EXP1,
typename EXP2,
long NR
>
void operator() (
const matrix_exp<EXP1>& Q,
const matrix_exp<EXP2>& y,
const scalar_type nu,
matrix<scalar_type,NR,1,mem_manager_type, layout_type>& alpha,
scalar_type eps
);
/*!
requires
- Q.nr() == Q.nc()
- is_col_vector(y) == true
- y.size() == Q.nr()
- y.size() > 1
- sum((y == +1) + (y == -1)) == y.size()
(i.e. all elements of y must be equal to +1 or -1)
- alpha must be capable of representing a vector of size y.size() elements
- 0 < nu <= 1
- eps > 0
ensures
- This function solves the quadratic program defined in this class's main comment.
- The solution to the quadratic program will be stored in #alpha.
- #alpha.size() == y.size()
- This function uses an implementation of the sequential minimal optimization
algorithm. It runs until the KKT violation is less than eps. So eps controls
how accurate the solution is and smaller values result in better solutions.
(a reasonable eps is usually about 1e-3)
- #get_gradient() == Q*(#alpha)
(i.e. stores the gradient of f() at #alpha in get_gradient())
throws
- invalid_nu_error
This exception is thrown if nu >= maximum_nu(y).
(some values of nu cause the constraints to become impossible to satisfy.
If this is detected then an exception is thrown).
!*/
const column_matrix& get_gradient (
) const;
/*!
ensures
- returns the gradient vector at the solution of the last problem solved
by this object. If no problem has been solved then returns an empty
vector.
!*/
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_OPTIMIZATION_SOLVE_QP2_USING_SMO_ABSTRACT_H_
......@@ -16,83 +16,11 @@
#include "kernel.h"
#include "../enable_if.h"
#include "../optimization.h"
#include "svm_nu_trainer.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
class invalid_svm_nu_error : public dlib::error
{
public:
invalid_svm_nu_error(const std::string& msg, double nu_) : dlib::error(msg), nu(nu_) {};
const double nu;
};
// ----------------------------------------------------------------------------------------
template <
typename T
>
typename T::type maximum_nu_impl (
const T& y
)
{
typedef typename T::type scalar_type;
// make sure requires clause is not broken
DLIB_ASSERT(y.nr() > 1 && y.nc() == 1,
"\ttypedef T::type maximum_nu(y)"
<< "\n\ty should be a column vector with more than one entry"
<< "\n\ty.nr(): " << y.nr()
<< "\n\ty.nc(): " << y.nc()
);
long pos_count = 0;
long neg_count = 0;
for (long r = 0; r < y.nr(); ++r)
{
if (y(r) == 1.0)
{
++pos_count;
}
else if (y(r) == -1.0)
{
++neg_count;
}
else
{
// make sure requires clause is not broken
DLIB_ASSERT(y(r) == -1.0 || y(r) == 1.0,
"\ttypedef T::type maximum_nu(y)"
<< "\n\ty should contain only 1 and 0 entries"
<< "\n\tr: " << r
<< "\n\ty(r): " << y(r)
);
}
}
return static_cast<scalar_type>(2.0*(scalar_type)std::min(pos_count,neg_count)/(scalar_type)y.nr());
}
template <
typename T
>
typename T::type maximum_nu (
const T& y
)
{
return maximum_nu_impl(vector_to_matrix(y));
}
template <
typename T
>
typename T::value_type maximum_nu (
const T& y
)
{
return maximum_nu_impl(vector_to_matrix(y));
}
// ----------------------------------------------------------------------------------------
template <
......@@ -135,149 +63,6 @@ namespace dlib
return is_binary_classification_problem_impl(vector_to_matrix(x), vector_to_matrix(x_labels));
}
// ----------------------------------------------------------------------------------------
template <
typename K,
typename sample_vector_type,
typename scalar_vector_type
>
class kernel_matrix_cache
{
public:
typedef float scalar_type;
//typedef typename K::scalar_type scalar_type;
typedef typename K::sample_type sample_type;
typedef typename K::mem_manager_type mem_manager_type;
const sample_vector_type& x;
const scalar_vector_type& y;
K kernel_function;
mutable matrix<scalar_type,0,0,mem_manager_type> cache;
mutable matrix<scalar_type,0,1,mem_manager_type> diag_cache;
mutable matrix<long,0,1,mem_manager_type> lookup;
mutable matrix<long,0,1,mem_manager_type> rlookup;
mutable long next;
/*!
INITIAL VALUE
- for all valid x:
- lookup(x) == -1
- rlookup(x) == -1
CONVENTION
- if (lookup(c) != -1) then
- cache(lookup(c),*) == the cached column c of the kernel matrix
- rlookup(lookup(c)) == c
- if (rlookup(x) != -1) then
- lookup(rlookup(x)) == x
- cache(x,*) == the cached column rlookup(x) of the kernel matrix
- next == the next row in the cache table to use to cache something
!*/
public:
kernel_matrix_cache (
const sample_vector_type& x_,
const scalar_vector_type& y_,
K kernel_function_,
long max_size_megabytes
) : x(x_), y(y_), kernel_function(kernel_function_)
{
// figure out how many rows of the kernel matrix we can have
// with the given amount of memory.
long max_size = (max_size_megabytes*1024*1024)/(x.nr()*sizeof(scalar_type));
// don't let it be 0
if (max_size == 0)
max_size = 1;
long size = std::min(max_size,x.nr());
diag_cache.set_size(x.nr(),1);
cache.set_size(size,x.nr());
lookup.set_size(x.nr(),1);
rlookup.set_size(size,1);
set_all_elements(lookup,-1);
set_all_elements(rlookup,-1);
next = 0;
for (long i = 0; i < diag_cache.nr(); ++i)
diag_cache(i) = kernel_function(x(i),x(i));
}
inline bool is_cached (
long r
) const
{
return (lookup(r) != -1);
}
const scalar_type* col(long i) const
{
if (is_cached(i) == false)
add_col_to_cache(i);
// find where this column is in the cache
long idx = lookup(i);
if (idx == next)
{
// if this column was the next to be replaced
// then make sure that doesn't happen
next = (next + 1)%cache.nr();
}
return &cache(idx,0);
}
const scalar_type* diag() const { return &diag_cache(0); }
inline scalar_type operator () (
long r,
long c
) const
{
if (lookup(c) != -1)
{
return cache(lookup(c),r);
}
else if (r == c)
{
return diag_cache(r);
}
else if (lookup(r) != -1)
{
// the kernel is symmetric so this is legit
return cache(lookup(r),c);
}
else
{
add_col_to_cache(c);
return cache(lookup(c),r);
}
}
private:
void add_col_to_cache(
long c
) const
{
// if the lookup table is pointing to cache(next,*) then clear lookup(next)
if (rlookup(next) != -1)
lookup(rlookup(next)) = -1;
// make the lookup table so that it says c is now cached at the spot indicated by next
lookup(c) = next;
rlookup(next) = c;
// compute this column in the kernel matrix and store it in the cache
for (long i = 0; i < cache.nc(); ++i)
cache(next,i) = y(c)*y(i)*kernel_function(x(c),x(i));
next = (next + 1)%cache.nr();
}
};
// ----------------------------------------------------------------------------------------
template <
......@@ -488,7 +273,7 @@ namespace dlib
// do the training and testing
res += test_binary_decision_function(trainer.train(x_train,y_train),x_test,y_test);
}
catch (invalid_svm_nu_error&)
catch (invalid_nu_error&)
{
// Just ignore the error in this case since we are going to
// interpret an invalid nu value the same as generating a decision
......@@ -675,8 +460,8 @@ namespace dlib
/*
This function fits a sigmoid function to the output of the
svm trained by svm_nu_trainer. The technique used is the one
described in the papers:
svm trained by svm_nu_trainer or a similar trainer. The
technique used is the one described in the papers:
Probabilistic Outputs for Support Vector Machines and
Comparisons to Regularized Likelihood Methods by
......@@ -1021,633 +806,7 @@ namespace dlib
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template <
typename K
>
class svm_nu_trainer
{
public:
typedef K kernel_type;
typedef typename kernel_type::scalar_type scalar_type;
typedef typename kernel_type::sample_type sample_type;
typedef typename kernel_type::mem_manager_type mem_manager_type;
typedef decision_function<kernel_type> trained_function_type;
svm_nu_trainer (
) :
nu(0.1),
cache_size(200),
eps(0.001)
{
}
svm_nu_trainer (
const kernel_type& kernel_,
const scalar_type& nu_
) :
kernel_function(kernel_),
nu(nu_),
cache_size(200),
eps(0.001)
{
// make sure requires clause is not broken
DLIB_ASSERT(0 < nu && nu <= 1,
"\tsvm_nu_trainer::svm_nu_trainer(kernel,nu)"
<< "\n\t invalid inputs were given to this function"
<< "\n\t nu: " << nu
);
}
void set_cache_size (
long cache_size_
)
{
// make sure requires clause is not broken
DLIB_ASSERT(cache_size_ > 0,
"\tvoid svm_nu_trainer::set_cache_size(cache_size_)"
<< "\n\t invalid inputs were given to this function"
<< "\n\t cache_size: " << cache_size_
);
cache_size = cache_size_;
}
long get_cache_size (
) const
{
return cache_size;
}
void set_epsilon (
scalar_type eps_
)
{
// make sure requires clause is not broken
DLIB_ASSERT(eps_ > 0,
"\tvoid svm_nu_trainer::set_epsilon(eps_)"
<< "\n\t invalid inputs were given to this function"
<< "\n\t eps: " << eps_
);
eps = eps_;
}
const scalar_type get_epsilon (
) const
{
return eps;
}
void set_kernel (
const kernel_type& k
)
{
kernel_function = k;
}
const kernel_type& get_kernel (
) const
{
return kernel_function;
}
void set_nu (
scalar_type nu_
)
{
// make sure requires clause is not broken
DLIB_ASSERT(0 < nu_ && nu_ <= 1,
"\tvoid svm_nu_trainer::set_nu(nu_)"
<< "\n\t invalid inputs were given to this function"
<< "\n\t nu: " << nu_
);
nu = nu_;
}
const scalar_type get_nu (
) const
{
return nu;
}
template <
typename in_sample_vector_type,
typename in_scalar_vector_type
>
const decision_function<kernel_type> train (
const in_sample_vector_type& x,
const in_scalar_vector_type& y
) const
{
return do_train(vector_to_matrix(x), vector_to_matrix(y));
}
void swap (
svm_nu_trainer& item
)
{
exchange(kernel_function, item.kernel_function);
exchange(nu, item.nu);
exchange(cache_size, item.cache_size);
exchange(eps, item.eps);
}
private:
// ------------------------------------------------------------------------------------
template <
typename in_sample_vector_type,
typename in_scalar_vector_type
>
const decision_function<kernel_type> do_train (
const in_sample_vector_type& x,
const in_scalar_vector_type& y
) const
{
typedef typename K::scalar_type scalar_type;
typedef typename decision_function<K>::sample_vector_type sample_vector_type;
typedef typename decision_function<K>::scalar_vector_type scalar_vector_type;
// make sure requires clause is not broken
DLIB_ASSERT(is_binary_classification_problem(x,y) == true,
"\tdecision_function svm_nu_trainer::train(x,y)"
<< "\n\t invalid inputs were given to this function"
<< "\n\t x.nr(): " << x.nr()
<< "\n\t y.nr(): " << y.nr()
<< "\n\t x.nc(): " << x.nc()
<< "\n\t y.nc(): " << y.nc()
<< "\n\t is_binary_classification_problem(x,y): " << ((is_binary_classification_problem(x,y))? "true":"false")
);
const scalar_type tau = 1e-12;
scalar_vector_type df; // delta f(alpha)
scalar_vector_type alpha;
kernel_matrix_cache<K, in_sample_vector_type, in_scalar_vector_type> Q(x,y,kernel_function,cache_size);
typedef typename kernel_matrix_cache<K, in_sample_vector_type, in_scalar_vector_type>::scalar_type cache_type;
alpha.set_size(x.nr());
df.set_size(x.nr());
// now initialize alpha
set_initial_alpha(y, nu, alpha);
set_all_elements(df, 0);
// initialize df. Compute df = Q*alpha
for (long r = 0; r < df.nr(); ++r)
{
if (alpha(r) != 0)
{
const cache_type* Q_r = Q.col(r);
for (long c = 0; c < alpha.nr(); ++c)
{
df(c) += alpha(r)*Q_r[c];
}
}
}
// now perform the actual optimization of alpha
long i, j;
while (find_working_group(y,alpha,Q,df,tau,eps,i,j))
{
const scalar_type old_alpha_i = alpha(i);
const scalar_type old_alpha_j = alpha(j);
optimize_working_pair(y,alpha,Q,df,tau,i,j);
// update the df vector now that we have modified alpha(i) and alpha(j)
scalar_type delta_alpha_i = alpha(i) - old_alpha_i;
scalar_type delta_alpha_j = alpha(j) - old_alpha_j;
const cache_type* Q_i = Q.col(i);
const cache_type* Q_j = Q.col(j);
for(long k = 0; k < df.nr(); ++k)
df(k) += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
}
scalar_type rho, b;
calculate_rho_and_b(y,alpha,df,rho,b);
alpha = pointwise_multiply(alpha,y)/rho;
// count the number of support vectors
long sv_count = 0;
for (long i = 0; i < alpha.nr(); ++i)
{
if (alpha(i) != 0)
++sv_count;
}
scalar_vector_type sv_alpha;
sample_vector_type support_vectors;
// size these column vectors so that they have an entry for each support vector
sv_alpha.set_size(sv_count);
support_vectors.set_size(sv_count);
// load the support vectors and their alpha values into these new column matrices
long idx = 0;
for (long i = 0; i < alpha.nr(); ++i)
{
if (alpha(i) != 0)
{
sv_alpha(idx) = alpha(i);
support_vectors(idx) = x(i);
++idx;
}
}
// now return the decision function
return decision_function<K> (sv_alpha, b, kernel_function, support_vectors);
}
// ------------------------------------------------------------------------------------
template <
typename scalar_type,
typename scalar_vector_type,
typename scalar_vector_type2
>
inline void set_initial_alpha (
const scalar_vector_type& y,
const scalar_type nu,
scalar_vector_type2& alpha
) const
{
set_all_elements(alpha,0);
const scalar_type l = y.nr();
scalar_type temp = nu*l/2;
long num = (long)std::floor(temp);
long num_total = (long)std::ceil(temp);
bool has_slack = false;
int count = 0;
for (int i = 0; i < alpha.nr(); ++i)
{
if (y(i) == 1)
{
if (count < num)
{
++count;
alpha(i) = 1;
}
else
{
has_slack = true;
if (num_total > num)
{
++count;
alpha(i) = temp - std::floor(temp);
}
break;
}
}
}
if (count != num_total || has_slack == false)
{
std::ostringstream sout;
sout << "Invalid nu of " << nu << ". It is required that: 0 < nu < " << 2*(scalar_type)count/y.nr();
throw invalid_svm_nu_error(sout.str(),nu);
}
has_slack = false;
count = 0;
for (int i = 0; i < alpha.nr(); ++i)
{
if (y(i) == -1)
{
if (count < num)
{
++count;
alpha(i) = 1;
}
else
{
has_slack = true;
if (num_total > num)
{
++count;
alpha(i) = temp - std::floor(temp);
}
break;
}
}
}
if (count != num_total || has_slack == false)
{
std::ostringstream sout;
sout << "Invalid nu of " << nu << ". It is required that: 0 < nu < " << 2*(scalar_type)count/y.nr();
throw invalid_svm_nu_error(sout.str(),nu);
}
}
// ------------------------------------------------------------------------------------
template <
typename sample_vector_type,
typename scalar_vector_type,
typename scalar_vector_type2,
typename scalar_type
>
inline bool find_working_group (
const scalar_vector_type2& y,
const scalar_vector_type& alpha,
const kernel_matrix_cache<K,sample_vector_type, scalar_vector_type2>& Q,
const scalar_vector_type& df,
const scalar_type tau,
const scalar_type eps,
long& i_out,
long& j_out
) const
{
using namespace std;
long ip = -1;
long jp = -1;
long in = -1;
long jn = -1;
typedef typename kernel_matrix_cache<K, sample_vector_type, scalar_vector_type2>::scalar_type cache_type;
scalar_type ip_val = -numeric_limits<scalar_type>::infinity();
scalar_type jp_val = numeric_limits<scalar_type>::infinity();
scalar_type in_val = -numeric_limits<scalar_type>::infinity();
scalar_type jn_val = numeric_limits<scalar_type>::infinity();
// loop over the alphas and find the maximum ip and in indices.
for (long i = 0; i < alpha.nr(); ++i)
{
if (y(i) == 1)
{
if (alpha(i) < 1.0)
{
if (-df(i) > ip_val)
{
ip_val = -df(i);
ip = i;
}
}
}
else
{
if (alpha(i) > 0.0)
{
if (df(i) > in_val)
{
in_val = df(i);
in = i;
}
}
}
}
scalar_type Mp = numeric_limits<scalar_type>::infinity();
scalar_type Mn = numeric_limits<scalar_type>::infinity();
// As a speed hack, pull out pointers to the columns of the
// kernel matrix we will be using below rather than accessing
// them through the Q(r,c) syntax.
const cache_type* Q_ip = 0;
const cache_type* Q_in = 0;
const cache_type* Q_diag = Q.diag();
if (ip != -1)
Q_ip = Q.col(ip);
if (in != -1)
Q_in = Q.col(in);
// now we need to find the minimum jp and jn indices
for (long j = 0; j < alpha.nr(); ++j)
{
if (y(j) == 1)
{
if (alpha(j) > 0.0)
{
scalar_type b = ip_val + df(j);
if (-df(j) < Mp)
Mp = -df(j);
if (b > 0)
{
scalar_type a = Q_ip[ip] + Q_diag[j] - 2*Q_ip[j];
if (a <= 0)
a = tau;
scalar_type temp = -b*b/a;
if (temp < jp_val)
{
jp_val = temp;
jp = j;
}
}
}
}
else
{
if (alpha(j) < 1.0)
{
scalar_type b = in_val - df(j);
if (df(j) < Mn)
Mn = df(j);
if (b > 0)
{
scalar_type a = Q_in[in] + Q_diag[j] - 2*Q_in[j];
if (a <= 0)
a = tau;
scalar_type temp = -b*b/a;
if (temp < jn_val)
{
jn_val = temp;
jn = j;
}
}
}
}
}
// if we are at the optimal point then return false so the caller knows
// to stop optimizing
if (std::max(ip_val - Mp, in_val - Mn) < eps)
return false;
if (jp_val < jn_val)
{
i_out = ip;
j_out = jp;
}
else
{
i_out = in;
j_out = jn;
}
if (j_out >= 0 && i_out >= 0)
return true;
else
return false;
}
// ------------------------------------------------------------------------------------
template <
typename scalar_vector_type,
typename scalar_vector_type2,
typename scalar_type
>
void calculate_rho_and_b(
const scalar_vector_type2& y,
const scalar_vector_type& alpha,
const scalar_vector_type& df,
scalar_type& rho,
scalar_type& b
) const
{
using namespace std;
long num_p_free = 0;
long num_n_free = 0;
scalar_type sum_p_free = 0;
scalar_type sum_n_free = 0;
scalar_type upper_bound_p = -numeric_limits<scalar_type>::infinity();
scalar_type upper_bound_n = -numeric_limits<scalar_type>::infinity();
scalar_type lower_bound_p = numeric_limits<scalar_type>::infinity();
scalar_type lower_bound_n = numeric_limits<scalar_type>::infinity();
for(long i = 0; i < alpha.nr(); ++i)
{
if(y(i) == 1)
{
if(alpha(i) == 1)
{
if (df(i) > upper_bound_p)
upper_bound_p = df(i);
}
else if(alpha(i) == 0)
{
if (df(i) < lower_bound_p)
lower_bound_p = df(i);
}
else
{
++num_p_free;
sum_p_free += df(i);
}
}
else
{
if(alpha(i) == 1)
{
if (df(i) > upper_bound_n)
upper_bound_n = df(i);
}
else if(alpha(i) == 0)
{
if (df(i) < lower_bound_n)
lower_bound_n = df(i);
}
else
{
++num_n_free;
sum_n_free += df(i);
}
}
}
scalar_type r1,r2;
if(num_p_free > 0)
r1 = sum_p_free/num_p_free;
else
r1 = (upper_bound_p+lower_bound_p)/2;
if(num_n_free > 0)
r2 = sum_n_free/num_n_free;
else
r2 = (upper_bound_n+lower_bound_n)/2;
rho = (r1+r2)/2;
b = (r1-r2)/2/rho;
}
// ------------------------------------------------------------------------------------
template <
typename sample_vector_type,
typename scalar_vector_type,
typename scalar_vector_type2,
typename scalar_type
>
inline void optimize_working_pair (
const scalar_vector_type2& ,
scalar_vector_type& alpha,
const kernel_matrix_cache<K, sample_vector_type, scalar_vector_type2>& Q,
const scalar_vector_type& df,
const scalar_type tau,
const long i,
const long j
) const
{
scalar_type quad_coef = Q(i,i)+Q(j,j)-2*Q(j,i);
if (quad_coef <= 0)
quad_coef = tau;
scalar_type delta = (df(i)-df(j))/quad_coef;
scalar_type sum = alpha(i) + alpha(j);
alpha(i) -= delta;
alpha(j) += delta;
if(sum > 1)
{
if(alpha(i) > 1)
{
alpha(i) = 1;
alpha(j) = sum - 1;
}
else if(alpha(j) > 1)
{
alpha(j) = 1;
alpha(i) = sum - 1;
}
}
else
{
if(alpha(j) < 0)
{
alpha(j) = 0;
alpha(i) = sum;
}
else if(alpha(i) < 0)
{
alpha(i) = 0;
alpha(j) = sum;
}
}
}
// ------------------------------------------------------------------------------------
kernel_type kernel_function;
scalar_type nu;
long cache_size;
scalar_type eps;
}; // end of class svm_nu_trainer
// ----------------------------------------------------------------------------------------
template <typename K>
void swap (
svm_nu_trainer<K>& a,
svm_nu_trainer<K>& b
) { a.swap(b); }
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SVm_
......
......@@ -11,52 +11,13 @@
#include "../serialize.h"
#include "function_abstract.h"
#include "kernel_abstract.h"
#include "svm_nu_trainer_abstract.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
class invalid_svm_nu_error : public dlib::error
{
/*!
WHAT THIS OBJECT REPRESENTS
This object is an exception class used to indicate that a
value of nu used for svm training is incompatible with a
particular data set.
this->nu will be set to the invalid value of nu used.
!*/
public:
invalid_svm_nu_error(const std::string& msg, double nu_) : dlib::error(msg), nu(nu_) {};
const double nu;
};
// ----------------------------------------------------------------------------------------
template <
typename T
>
typename T::type maximum_nu (
const T& y
);
/*!
requires
- T == a matrix object or an object convertible to a matrix via
vector_to_matrix()
- y.nc() == 1
- y.nr() > 1
- for all valid i:
- y(i) == -1 or +1
ensures
- returns the maximum valid nu that can be used with the svm_nu_trainer and
the training set labels from the given y vector.
(i.e. 2.0*min(number of +1 examples in y, number of -1 examples in y)/y.nr())
!*/
// ----------------------------------------------------------------------------------------
template <
......@@ -85,189 +46,6 @@ namespace dlib
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template <
typename K
>
class svm_nu_trainer
{
/*!
REQUIREMENTS ON K
is a kernel function object as defined in dlib/svm/kernel_abstract.h
WHAT THIS OBJECT REPRESENTS
This object implements a trainer for a nu support vector machine for
solving binary classification problems.
The implementation of the nu-svm training algorithm used by this object is based
on the following excellent papers:
- Chang and Lin, Training {nu}-Support Vector Classifiers: Theory and Algorithms
- Chih-Chung Chang and Chih-Jen Lin, LIBSVM : a library for support vector
machines, 2001. Software available at http://www.csie.ntu.edu.tw/~cjlin/libsvm
!*/
public:
typedef K kernel_type;
typedef typename kernel_type::scalar_type scalar_type;
typedef typename kernel_type::sample_type sample_type;
typedef typename kernel_type::mem_manager_type mem_manager_type;
typedef decision_function<kernel_type> trained_function_type;
svm_nu_trainer (
);
/*!
ensures
- This object is properly initialized and ready to be used
to train a support vector machine.
- #get_nu() == 0.1
- #get_cache_size() == 200
- #get_epsilon() == 0.001
!*/
svm_nu_trainer (
const kernel_type& kernel,
const scalar_type& nu
);
/*!
requires
- 0 < nu <= 1
ensures
- This object is properly initialized and ready to be used
to train a support vector machine.
- #get_kernel() == kernel
- #get_nu() == nu
- #get_cache_size() == 200
- #get_epsilon() == 0.001
!*/
void set_cache_size (
long cache_size
);
/*!
requires
- cache_size > 0
ensures
- #get_cache_size() == cache_size
!*/
const long get_cache_size (
) const;
/*!
ensures
- returns the number of megabytes of cache this object will use
when it performs training via the this->train() function.
(bigger values of this may make training go faster but won't affect
the result. However, too big a value will cause you to run out of
memory, obviously.)
!*/
void set_epsilon (
scalar_type eps
);
/*!
requires
- eps > 0
ensures
- #get_epsilon() == eps
!*/
const scalar_type get_epsilon (
) const;
/*!
ensures
- returns the error epsilon that determines when training should stop.
Generally a good value for this is 0.001. Smaller values may result
in a more accurate solution but take longer to execute.
!*/
void set_kernel (
const kernel_type& k
);
/*!
ensures
- #get_kernel() == k
!*/
const kernel_type& get_kernel (
) const;
/*!
ensures
- returns a copy of the kernel function in use by this object
!*/
void set_nu (
scalar_type nu
);
/*!
requires
- 0 < nu <= 1
ensures
- #get_nu() == nu
!*/
const scalar_type get_nu (
) const;
/*!
ensures
- returns the nu svm parameter. This is a value between 0 and
1. It is the parameter that determines the trade off between
trying to fit the training data exactly or allowing more errors
but hopefully improving the generalization ability of the
resulting classifier. Smaller values encourage exact fitting
while larger values of nu may encourage better generalization.
For more information you should consult the papers referenced
above.
!*/
template <
typename in_sample_vector_type,
typename in_scalar_vector_type
>
const decision_function<kernel_type> train (
const in_sample_vector_type& x,
const in_scalar_vector_type& y
) const;
/*!
requires
- is_binary_classification_problem(x,y) == true
- x == a matrix or something convertible to a matrix via vector_to_matrix().
Also, x should contain sample_type objects.
- y == a matrix or something convertible to a matrix via vector_to_matrix().
Also, y should contain scalar_type objects.
ensures
- trains a nu support vector classifier given the training samples in x and
labels in y. Training is done when the error is less than get_epsilon().
- returns a decision function F with the following properties:
- if (new_x is a sample predicted have +1 label) then
- F(new_x) >= 0
- else
- F(new_x) < 0
throws
- invalid_svm_nu_error
This exception is thrown if get_nu() >= maximum_nu(y)
- std::bad_alloc
!*/
void swap (
svm_nu_trainer& item
);
/*!
ensures
- swaps *this and item
!*/
};
template <typename K>
void swap (
svm_nu_trainer<K>& a,
svm_nu_trainer<K>& b
) { a.swap(b); }
/*!
provides a global swap
!*/
// ----------------------------------------------------------------------------------------
template <
......@@ -288,9 +66,8 @@ namespace dlib
- is_binary_classification_problem(x,y) == true
- trainer_type == some kind of batch trainer object (e.g. svm_nu_trainer)
ensures
- trains a nu support vector classifier given the training samples in x and
labels in y.
- returns a probabilistic_decision_function that represents the trained svm.
- trains a classifier given the training samples in x and labels in y.
- returns a probabilistic_decision_function that represents the trained classifier.
- The parameters of the probability model are estimated by performing k-fold
cross validation.
- The number of folds used is given by the folds argument.
......
// Copyright (C) 2007 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_SVm_NU_TRAINER_H__
#define DLIB_SVm_NU_TRAINER_H__
//#include "local/make_label_kernel_matrix.h"
#include "svm_nu_trainer_abstract.h"
#include <cmath>
#include <limits>
#include <sstream>
#include "../matrix.h"
#include "../algs.h"
#include "../serialize.h"
#include "function.h"
#include "kernel.h"
#include "../optimization/optimization_solve_qp2_using_smo.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename K
>
class svm_nu_trainer
{
public:
typedef K kernel_type;
typedef typename kernel_type::scalar_type scalar_type;
typedef typename kernel_type::sample_type sample_type;
typedef typename kernel_type::mem_manager_type mem_manager_type;
typedef decision_function<kernel_type> trained_function_type;
svm_nu_trainer (
) :
nu(0.1),
cache_size(200),
eps(0.001)
{
}
svm_nu_trainer (
const kernel_type& kernel_,
const scalar_type& nu_
) :
kernel_function(kernel_),
nu(nu_),
cache_size(200),
eps(0.001)
{
// make sure requires clause is not broken
DLIB_ASSERT(0 < nu && nu <= 1,
"\tsvm_nu_trainer::svm_nu_trainer(kernel,nu)"
<< "\n\t invalid inputs were given to this function"
<< "\n\t nu: " << nu
);
}
void set_cache_size (
long cache_size_
)
{
// make sure requires clause is not broken
DLIB_ASSERT(cache_size_ > 0,
"\tvoid svm_nu_trainer::set_cache_size(cache_size_)"
<< "\n\t invalid inputs were given to this function"
<< "\n\t cache_size: " << cache_size_
);
cache_size = cache_size_;
}
long get_cache_size (
) const
{
return cache_size;
}
void set_epsilon (
scalar_type eps_
)
{
// make sure requires clause is not broken
DLIB_ASSERT(eps_ > 0,
"\tvoid svm_nu_trainer::set_epsilon(eps_)"
<< "\n\t invalid inputs were given to this function"
<< "\n\t eps: " << eps_
);
eps = eps_;
}
const scalar_type get_epsilon (
) const
{
return eps;
}
void set_kernel (
const kernel_type& k
)
{
kernel_function = k;
}
const kernel_type& get_kernel (
) const
{
return kernel_function;
}
void set_nu (
scalar_type nu_
)
{
// make sure requires clause is not broken
DLIB_ASSERT(0 < nu_ && nu_ <= 1,
"\tvoid svm_nu_trainer::set_nu(nu_)"
<< "\n\t invalid inputs were given to this function"
<< "\n\t nu: " << nu_
);
nu = nu_;
}
const scalar_type get_nu (
) const
{
return nu;
}
template <
typename in_sample_vector_type,
typename in_scalar_vector_type
>
const decision_function<kernel_type> train (
const in_sample_vector_type& x,
const in_scalar_vector_type& y
) const
{
return do_train(vector_to_matrix(x), vector_to_matrix(y));
}
void swap (
svm_nu_trainer& item
)
{
exchange(kernel_function, item.kernel_function);
exchange(nu, item.nu);
exchange(cache_size, item.cache_size);
exchange(eps, item.eps);
}
private:
// ------------------------------------------------------------------------------------
template <
typename in_sample_vector_type,
typename in_scalar_vector_type
>
const decision_function<kernel_type> do_train (
const in_sample_vector_type& x,
const in_scalar_vector_type& y
) const
{
typedef typename K::scalar_type scalar_type;
typedef typename decision_function<K>::sample_vector_type sample_vector_type;
typedef typename decision_function<K>::scalar_vector_type scalar_vector_type;
// make sure requires clause is not broken
DLIB_ASSERT(is_binary_classification_problem(x,y) == true,
"\tdecision_function svm_nu_trainer::train(x,y)"
<< "\n\t invalid inputs were given to this function"
<< "\n\t x.nr(): " << x.nr()
<< "\n\t y.nr(): " << y.nr()
<< "\n\t x.nc(): " << x.nc()
<< "\n\t y.nc(): " << y.nc()
<< "\n\t is_binary_classification_problem(x,y): " << is_binary_classification_problem(x,y)
);
scalar_vector_type alpha;
solve_qp2_using_smo<scalar_vector_type> solver;
solver(symmetric_matrix_cache<float>((diagm(y)*kernel_matrix(kernel_function,x)*diagm(y)), cache_size),
//solver(symmetric_matrix_cache<float>(make_label_kernel_matrix(kernel_matrix(kernel_function,x),y), cache_size),
y,
nu,
alpha,
eps);
scalar_type rho, b;
calculate_rho_and_b(y,alpha,solver.get_gradient(),rho,b);
alpha = pointwise_multiply(alpha,y)/rho;
// count the number of support vectors
const long sv_count = sum(alpha != 0);
scalar_vector_type sv_alpha;
sample_vector_type support_vectors;
// size these column vectors so that they have an entry for each support vector
sv_alpha.set_size(sv_count);
support_vectors.set_size(sv_count);
// load the support vectors and their alpha values into these new column matrices
long idx = 0;
for (long i = 0; i < alpha.nr(); ++i)
{
if (alpha(i) != 0)
{
sv_alpha(idx) = alpha(i);
support_vectors(idx) = x(i);
++idx;
}
}
// now return the decision function
return decision_function<K> (sv_alpha, b, kernel_function, support_vectors);
}
// ------------------------------------------------------------------------------------
template <
typename scalar_vector_type,
typename scalar_vector_type2,
typename scalar_type
>
void calculate_rho_and_b(
const scalar_vector_type2& y,
const scalar_vector_type& alpha,
const scalar_vector_type& df,
scalar_type& rho,
scalar_type& b
) const
{
using namespace std;
long num_p_free = 0;
long num_n_free = 0;
scalar_type sum_p_free = 0;
scalar_type sum_n_free = 0;
scalar_type upper_bound_p = -numeric_limits<scalar_type>::infinity();
scalar_type upper_bound_n = -numeric_limits<scalar_type>::infinity();
scalar_type lower_bound_p = numeric_limits<scalar_type>::infinity();
scalar_type lower_bound_n = numeric_limits<scalar_type>::infinity();
for(long i = 0; i < alpha.nr(); ++i)
{
if(y(i) == 1)
{
if(alpha(i) == 1)
{
if (df(i) > upper_bound_p)
upper_bound_p = df(i);
}
else if(alpha(i) == 0)
{
if (df(i) < lower_bound_p)
lower_bound_p = df(i);
}
else
{
++num_p_free;
sum_p_free += df(i);
}
}
else
{
if(alpha(i) == 1)
{
if (df(i) > upper_bound_n)
upper_bound_n = df(i);
}
else if(alpha(i) == 0)
{
if (df(i) < lower_bound_n)
lower_bound_n = df(i);
}
else
{
++num_n_free;
sum_n_free += df(i);
}
}
}
scalar_type r1,r2;
if(num_p_free > 0)
r1 = sum_p_free/num_p_free;
else
r1 = (upper_bound_p+lower_bound_p)/2;
if(num_n_free > 0)
r2 = sum_n_free/num_n_free;
else
r2 = (upper_bound_n+lower_bound_n)/2;
rho = (r1+r2)/2;
b = (r1-r2)/2/rho;
}
// ------------------------------------------------------------------------------------
kernel_type kernel_function;
scalar_type nu;
long cache_size;
scalar_type eps;
}; // end of class svm_nu_trainer
// ----------------------------------------------------------------------------------------
template <typename K>
void swap (
svm_nu_trainer<K>& a,
svm_nu_trainer<K>& b
) { a.swap(b); }
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SVm_NU_TRAINER_H__
// Copyright (C) 2007 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_SVm_NU_TRAINER_ABSTRACT_
#ifdef DLIB_SVm_NU_TRAINER_ABSTRACT_
#include <cmath>
#include <limits>
#include <sstream>
#include "../matrix/matrix_abstract.h"
#include "../algs.h"
#include "../serialize.h"
#include "function_abstract.h"
#include "kernel_abstract.h"
#include "../optimization/optimization_solve_qp2_using_smo_abstract.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename K
>
class svm_nu_trainer
{
/*!
REQUIREMENTS ON K
is a kernel function object as defined in dlib/svm/kernel_abstract.h
WHAT THIS OBJECT REPRESENTS
This object implements a trainer for a nu support vector machine for
solving binary classification problems.
The implementation of the nu-svm training algorithm used by this object is based
on the following excellent papers:
- Chang and Lin, Training {nu}-Support Vector Classifiers: Theory and Algorithms
- Chih-Chung Chang and Chih-Jen Lin, LIBSVM : a library for support vector
machines, 2001. Software available at http://www.csie.ntu.edu.tw/~cjlin/libsvm
!*/
public:
typedef K kernel_type;
typedef typename kernel_type::scalar_type scalar_type;
typedef typename kernel_type::sample_type sample_type;
typedef typename kernel_type::mem_manager_type mem_manager_type;
typedef decision_function<kernel_type> trained_function_type;
svm_nu_trainer (
);
/*!
ensures
- This object is properly initialized and ready to be used
to train a support vector machine.
- #get_nu() == 0.1
- #get_cache_size() == 200
- #get_epsilon() == 0.001
!*/
svm_nu_trainer (
const kernel_type& kernel,
const scalar_type& nu
);
/*!
requires
- 0 < nu <= 1
ensures
- This object is properly initialized and ready to be used
to train a support vector machine.
- #get_kernel() == kernel
- #get_nu() == nu
- #get_cache_size() == 200
- #get_epsilon() == 0.001
!*/
void set_cache_size (
long cache_size
);
/*!
requires
- cache_size > 0
ensures
- #get_cache_size() == cache_size
!*/
const long get_cache_size (
) const;
/*!
ensures
- returns the number of megabytes of cache this object will use
when it performs training via the this->train() function.
(bigger values of this may make training go faster but won't affect
the result. However, too big a value will cause you to run out of
memory, obviously.)
!*/
void set_epsilon (
scalar_type eps
);
/*!
requires
- eps > 0
ensures
- #get_epsilon() == eps
!*/
const scalar_type get_epsilon (
) const;
/*!
ensures
- returns the error epsilon that determines when training should stop.
Generally a good value for this is 0.001. Smaller values may result
in a more accurate solution but take longer to execute.
!*/
void set_kernel (
const kernel_type& k
);
/*!
ensures
- #get_kernel() == k
!*/
const kernel_type& get_kernel (
) const;
/*!
ensures
- returns a copy of the kernel function in use by this object
!*/
void set_nu (
scalar_type nu
);
/*!
requires
- 0 < nu <= 1
ensures
- #get_nu() == nu
!*/
const scalar_type get_nu (
) const;
/*!
ensures
- returns the nu svm parameter. This is a value between 0 and
1. It is the parameter that determines the trade off between
trying to fit the training data exactly or allowing more errors
but hopefully improving the generalization ability of the
resulting classifier. Smaller values encourage exact fitting
while larger values of nu may encourage better generalization.
For more information you should consult the papers referenced
above.
!*/
template <
typename in_sample_vector_type,
typename in_scalar_vector_type
>
const decision_function<kernel_type> train (
const in_sample_vector_type& x,
const in_scalar_vector_type& y
) const;
/*!
requires
- is_binary_classification_problem(x,y) == true
- x == a matrix or something convertible to a matrix via vector_to_matrix().
Also, x should contain sample_type objects.
- y == a matrix or something convertible to a matrix via vector_to_matrix().
Also, y should contain scalar_type objects.
ensures
- trains a nu support vector classifier given the training samples in x and
labels in y. Training is done when the error is less than get_epsilon().
- returns a decision function F with the following properties:
- if (new_x is a sample predicted have +1 label) then
- F(new_x) >= 0
- else
- F(new_x) < 0
throws
- invalid_nu_error
This exception is thrown if get_nu() >= maximum_nu(y)
- std::bad_alloc
!*/
void swap (
svm_nu_trainer& item
);
/*!
ensures
- swaps *this and item
!*/
};
template <typename K>
void swap (
svm_nu_trainer<K>& a,
svm_nu_trainer<K>& b
) { a.swap(b); }
/*!
provides a global swap
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SVm_NU_TRAINER_ABSTRACT_
......@@ -58,7 +58,7 @@ namespace dlib
// on very large datasets. Every bit of freed memory helps out.
j = job<trainer_type>();
}
catch (invalid_svm_nu_error&)
catch (invalid_nu_error&)
{
// If this is a svm_nu_trainer then we might get this exception if the nu is
// invalid. In this case just return a cross validation score of 0.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment