Commit b43813a2 authored by Davis King's avatar Davis King

Added another version of the quadratic solver. It's basically a copy

of solve_qp2_using_smo but this one solves the problem associated with
C-SVMs and a few other things.

--HG--
extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%404002
parent d7e61080
// Copyright (C) 2007 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_SOLVE_QP3_USING_SMo_H__
#define DLIB_SOLVE_QP3_USING_SMo_H__
#include "optimization_solve_qp3_using_smo_abstract.h"
#include <cmath>
#include <limits>
#include <sstream>
#include "../matrix.h"
#include "../algs.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
class invalid_qp3_error : public dlib::error
{
public:
invalid_qp3_error(
const std::string& msg,
double B_,
double Cp_,
double Cn_
) :
dlib::error(msg),
B(B_),
Cp(Cp_),
Cn(Cn_)
{};
const double B;
const double Cp;
const double Cn;
};
// ----------------------------------------------------------------------------------------
template <
typename matrix_type
>
class solve_qp3_using_smo
{
public:
typedef typename matrix_type::mem_manager_type mem_manager_type;
typedef typename matrix_type::type scalar_type;
typedef typename matrix_type::layout_type layout_type;
typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix;
typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix;
template <
typename EXP1,
typename EXP2,
typename EXP3,
long NR
>
unsigned long operator() (
const matrix_exp<EXP1>& Q,
const matrix_exp<EXP2>& p,
const matrix_exp<EXP3>& y,
const scalar_type B,
const scalar_type Cp,
const scalar_type Cn,
matrix<scalar_type,NR,1,mem_manager_type, layout_type>& alpha,
scalar_type eps
)
{
DLIB_ASSERT(Q.nr() == Q.nc() && y.size() == Q.nr() && p.size() == y.size() &&
y.size() > 0 && is_col_vector(y) && is_col_vector(p) &&
sum((y == +1) + (y == -1)) == y.size() &&
Cp > 0 && Cn > 0 &&
eps > 0,
"\t void solve_qp3_using_smo::operator()"
<< "\n\t invalid arguments were given to this function"
<< "\n\t Q.nr(): " << Q.nr()
<< "\n\t Q.nc(): " << Q.nc()
<< "\n\t is_col_vector(p): " << is_col_vector(p)
<< "\n\t p.size(): " << p.size()
<< "\n\t is_col_vector(y): " << is_col_vector(y)
<< "\n\t y.size(): " << y.size()
<< "\n\t sum((y == +1) + (y == -1)): " << sum((y == +1) + (y == -1))
<< "\n\t Cp: " << Cp
<< "\n\t Cn: " << Cn
<< "\n\t eps: " << eps
);
set_initial_alpha(y, B, Cp, Cn, alpha);
const scalar_type tau = 1e-12;
typedef typename colm_exp<EXP1>::type col_type;
// initialize df. Compute df = Q*alpha + p
df = p;
for (long r = 0; r < df.nr(); ++r)
{
if (alpha(r) != 0)
{
df += alpha(r)*matrix_cast<scalar_type>(colm(Q,r));
}
}
unsigned long count = 0;
// now perform the actual optimization of alpha
long i=0, j=0;
while (find_working_group(y,alpha,Q,df,Cp,Cn,tau,eps,i,j))
{
++count;
const scalar_type old_alpha_i = alpha(i);
const scalar_type old_alpha_j = alpha(j);
optimize_working_pair(alpha,Q,y,df,tau,i,j, Cp, Cn );
// update the df vector now that we have modified alpha(i) and alpha(j)
scalar_type delta_alpha_i = alpha(i) - old_alpha_i;
scalar_type delta_alpha_j = alpha(j) - old_alpha_j;
col_type Q_i = colm(Q,i);
col_type Q_j = colm(Q,j);
for(long k = 0; k < df.nr(); ++k)
df(k) += Q_i(k)*delta_alpha_i + Q_j(k)*delta_alpha_j;
}
return count;
}
const column_matrix& get_gradient (
) const { return df; }
private:
// -------------------------------------------------------------------------------------
template <
typename scalar_type,
typename scalar_vector_type,
typename scalar_vector_type2
>
inline void set_initial_alpha (
const scalar_vector_type& y,
const scalar_type B,
const scalar_type Cp,
const scalar_type Cn,
scalar_vector_type2& alpha
) const
{
alpha.set_size(y.size());
set_all_elements(alpha,0);
// It's easy in the B == 0 case
if (B == 0)
return;
const scalar_type C = (B > 0)? Cp : Cn;
scalar_type temp = std::abs(B)/C;
long num = (long)std::floor(temp);
long num_total = (long)std::ceil(temp);
bool has_slack = false;
long count = 0;
for (long i = 0; i < alpha.nr(); ++i)
{
if (y(i) == 1)
{
if (count < num)
{
++count;
alpha(i) = C;
}
else
{
has_slack = true;
if (num_total > num)
{
++count;
alpha(i) = C*(temp - std::floor(temp));
}
break;
}
}
}
if (count != num_total || has_slack == false)
{
std::ostringstream sout;
sout << "Invalid QP3 constraint parameters of B: " << B << ", Cp: " << Cp << ", Cn: "<< Cn;
throw invalid_qp3_error(sout.str(),B,Cp,Cn);
}
}
// ------------------------------------------------------------------------------------
template <
typename scalar_vector_type,
typename scalar_type,
typename EXP,
typename U, typename V
>
inline bool find_working_group (
const V& y,
const U& alpha,
const matrix_exp<EXP>& Q,
const scalar_vector_type& df,
const scalar_type Cp,
const scalar_type Cn,
const scalar_type tau,
const scalar_type eps,
long& i_out,
long& j_out
) const
{
using namespace std;
long ip = 0;
long jp = 0;
typedef typename colm_exp<EXP>::type col_type;
typedef typename diag_exp<EXP>::type diag_type;
scalar_type ip_val = -numeric_limits<scalar_type>::infinity();
scalar_type jp_val = numeric_limits<scalar_type>::infinity();
// loop over the alphas and find the maximum ip and in indices.
for (long i = 0; i < alpha.nr(); ++i)
{
if (y(i) == 1)
{
if (alpha(i) < Cp)
{
if (-df(i) > ip_val)
{
ip_val = -df(i);
ip = i;
}
}
}
else
{
if (alpha(i) > 0.0)
{
if (df(i) > ip_val)
{
ip_val = df(i);
ip = i;
}
}
}
}
scalar_type Mp = -numeric_limits<scalar_type>::infinity();
// Pick out the column and diagonal of Q we need below. Doing
// it this way is faster if Q is actually a symmetric_matrix_cache
// object.
col_type Q_ip = colm(Q,ip);
diag_type Q_diag = diag(Q);
// now we need to find the minimum jp indices
for (long j = 0; j < alpha.nr(); ++j)
{
if (y(j) == 1)
{
if (alpha(j) > 0.0)
{
scalar_type b = ip_val + df(j);
if (df(j) > Mp)
Mp = df(j);
if (b > 0)
{
scalar_type a = Q_ip(ip) + Q_diag(j) - 2*y(ip)*Q_ip(j);
if (a <= 0)
a = tau;
scalar_type temp = -b*b/a;
if (temp < jp_val)
{
jp_val = temp;
jp = j;
}
}
}
}
else
{
if (alpha(j) < Cn)
{
scalar_type b = ip_val - df(j);
if (-df(j) > Mp)
Mp = -df(j);
if (b > 0)
{
scalar_type a = Q_ip(ip) + Q_diag(j) + 2*y(ip)*Q_ip(j);
if (a <= 0)
a = tau;
scalar_type temp = -b*b/a;
if (temp < jp_val)
{
jp_val = temp;
jp = j;
}
}
}
}
}
// if we are at the optimal point then return false so the caller knows
// to stop optimizing
if (Mp + ip_val < eps)
return false;
i_out = ip;
j_out = jp;
return true;
}
// ------------------------------------------------------------------------------------
template <
typename EXP,
typename EXP2,
typename T, typename U
>
inline void optimize_working_pair (
T& alpha,
const matrix_exp<EXP>& Q,
const matrix_exp<EXP2>& y,
const U& df,
const scalar_type& tau,
const long i,
const long j,
const scalar_type& Cp,
const scalar_type& Cn
) const
{
const scalar_type Ci = (y(i) > 0 )? Cp : Cn;
const scalar_type Cj = (y(j) > 0 )? Cp : Cn;
if (y(i) != y(j))
{
scalar_type quad_coef = Q(i,i)+Q(j,j)+2*Q(j,i);
if (quad_coef <= 0)
quad_coef = tau;
scalar_type delta = (-df(i)-df(j))/quad_coef;
scalar_type diff = alpha(i) - alpha(j);
alpha(i) += delta;
alpha(j) += delta;
if (diff > 0)
{
if (alpha(j) < 0)
{
alpha(j) = 0;
alpha(i) = diff;
}
}
else
{
if (alpha(i) < 0)
{
alpha(i) = 0;
alpha(j) = -diff;
}
}
if (diff > Ci - Cj)
{
if (alpha(i) > Ci)
{
alpha(i) = Ci;
alpha(j) = Ci - diff;
}
}
else
{
if (alpha(j) > Cj)
{
alpha(j) = Cj;
alpha(i) = Cj + diff;
}
}
}
else
{
scalar_type quad_coef = Q(i,i)+Q(j,j)-2*Q(j,i);
if (quad_coef <= 0)
quad_coef = tau;
scalar_type delta = (df(i)-df(j))/quad_coef;
scalar_type sum = alpha(i) + alpha(j);
alpha(i) -= delta;
alpha(j) += delta;
if(sum > Ci)
{
if(alpha(i) > Ci)
{
alpha(i) = Ci;
alpha(j) = sum - Ci;
}
}
else
{
if(alpha(j) < 0)
{
alpha(j) = 0;
alpha(i) = sum;
}
}
if(sum > Cj)
{
if(alpha(j) > Cj)
{
alpha(j) = Cj;
alpha(i) = sum - Cj;
}
}
else
{
if(alpha(i) < 0)
{
alpha(i) = 0;
alpha(j) = sum;
}
}
}
}
// ------------------------------------------------------------------------------------
column_matrix df; // gradient of f(alpha)
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SOLVE_QP3_USING_SMo_H__
// Copyright (C) 2010 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_OPTIMIZATION_SOLVE_QP3_USING_SMO_ABSTRACT_H_
#ifdef DLIB_OPTIMIZATION_SOLVE_QP3_USING_SMO_ABSTRACT_H_
#include "../matrix/matrix_abstract.h"
#include "../algs.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
class invalid_qp3_error : public dlib::error
{
/*!
WHAT THIS OBJECT REPRESENTS
This object is an exception class used to indicate that the values
of B, Cp, and Cn given to the solve_qp3_using_smo object are incompatible
with the constraints of the quadratic program.
this->B, this->Cp, and this->Cn will be set to the invalid values used.
!*/
public:
invalid_qp3_error( const std::string& msg, double B_, double Cp_, double Cn_) :
dlib::error(msg), B(B_), Cp(Cp_), Cn(Cn_) {};
const double B;
const double Cp;
const double Cn;
};
// ----------------------------------------------------------------------------------------
template <
typename matrix_type
>
class solve_qp3_using_smo
{
/*!
REQUIREMENTS ON matrix_type
Must be some type of dlib::matrix.
WHAT THIS OBJECT REPRESENTS
This object is a tool for solving the following quadratic programming
problem using the sequential minimal optimization algorithm:
Minimize: f(alpha) == 0.5*trans(alpha)*Q*alpha + trans(p)*alpha
subject to the following constraints:
- for all i such that y(i) == +1: 0 <= alpha(i) <= Cp
- for all i such that y(i) == -1: 0 <= alpha(i) <= Cn
- trans(y)*alpha == B
Where all elements of y must be equal to +1 or -1 and f is convex.
This means that Q should be symmetric and positive-semidefinite.
This object implements the strategy used by the LIBSVM tool. The following paper
can be consulted for additional details:
Chih-Chung Chang and Chih-Jen Lin, LIBSVM : a library for support vector
machines, 2001. Software available at http://www.csie.ntu.edu.tw/~cjlin/libsvm
!*/
public:
typedef typename matrix_type::mem_manager_type mem_manager_type;
typedef typename matrix_type::type scalar_type;
typedef typename matrix_type::layout_type layout_type;
typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix;
typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix;
template <
typename EXP1,
typename EXP2,
typename EXP3,
long NR
>
unsigned long operator() (
const matrix_exp<EXP1>& Q,
const matrix_exp<EXP2>& p,
const matrix_exp<EXP3>& y,
const scalar_type B,
const scalar_type Cp,
const scalar_type Cn,
matrix<scalar_type,NR,1,mem_manager_type, layout_type>& alpha,
scalar_type eps
);
/*!
requires
- Q.nr() == Q.nc()
- is_col_vector(y) == true
- is_col_vector(p) == true
- p.size() == y.size() == Q.nr()
- y.size() > 0
- sum((y == +1) + (y == -1)) == y.size()
(i.e. all elements of y must be equal to +1 or -1)
- alpha must be capable of representing a vector of size y.size() elements
- Cp > 0
- Cn > 0
- eps > 0
ensures
- This function solves the quadratic program defined in this class's main comment.
- The solution to the quadratic program will be stored in #alpha.
- #alpha.size() == y.size()
- This function uses an implementation of the sequential minimal optimization
algorithm. It runs until the KKT violation is less than eps. So eps controls
how accurate the solution is and smaller values result in better solutions.
(a reasonable eps is usually about 1e-3)
- #get_gradient() == Q*(#alpha)
(i.e. stores the gradient of f() at #alpha in get_gradient())
- returns the number of iterations performed.
throws
- invalid_qp3_error
This exception is thrown if the given parameters cause the constraints
of the quadratic programming problem to be impossible to satisfy.
!*/
const column_matrix& get_gradient (
) const;
/*!
ensures
- returns the gradient vector at the solution of the last problem solved
by this object. If no problem has been solved then returns an empty
vector.
!*/
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_OPTIMIZATION_SOLVE_QP3_USING_SMO_ABSTRACT_H_
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment