Commit 64c4f6df authored by Davis King's avatar Davis King

Added a max iteration parameter to the solver. Also changed it slightly

to be more robust to numerically difficult problems.

--HG--
extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403476
parent b30a275a
......@@ -74,11 +74,12 @@ namespace dlib
typename EXP2,
typename T, long NR, long NC, typename MM, typename L
>
void solve_qp_using_smo (
unsigned long solve_qp_using_smo (
const matrix_exp<EXP1>& Q,
const matrix_exp<EXP2>& b,
matrix<T,NR,NC,MM,L>& alpha,
T eps
T eps,
unsigned long max_iter
)
{
// make sure requires clause is not broken
......@@ -89,7 +90,8 @@ namespace dlib
b.size() == Q.nr() &&
alpha.size() > 0 &&
min(alpha) >= 0 &&
eps > 0,
eps > 0 &&
max_iter > 0,
"\t void solve_qp_using_smo()"
<< "\n\t Invalid arguments were given to this function"
<< "\n\t Q.nr(): " << Q.nr()
......@@ -101,6 +103,7 @@ namespace dlib
<< "\n\t Q.nr(): " << Q.nr()
<< "\n\t min(alpha): " << min(alpha)
<< "\n\t eps: " << eps
<< "\n\t max_iter: " << max_iter
);
// Compute f'(alpha) (i.e. the gradient of f(alpha)) for the current alpha.
......@@ -108,7 +111,8 @@ namespace dlib
const T tau = 1000*std::numeric_limits<T>::epsilon();
while (true)
unsigned long iter = 0;
for (; iter < max_iter; ++iter)
{
// Find the two elements of df that satisfy the following:
// - small_idx == index_of_min(df)
......@@ -163,13 +167,26 @@ namespace dlib
alpha(small_idx) = old_alpha_big + old_alpha_small;
}
// Now update the gradient. We will perform the equivalent of: df = Q*alpha - b;
const T delta_alpha_big = alpha(big_idx) - old_alpha_big;
const T delta_alpha_small = alpha(small_idx) - old_alpha_small;
for(long k = 0; k < df.nr(); ++k)
df(k) += Q(big_idx,k)*delta_alpha_big + Q(small_idx,k)*delta_alpha_small;
if ((iter%(max_iter/10)) == (max_iter/10 -1))
{
// Perform this form of the update every so often because doing so can help
// avoid the buildup of numerical errors you get with the alternate update
// below.
df = Q*alpha - b;
}
else
{
// Now update the gradient. We will perform the equivalent of: df = Q*alpha - b;
const T delta_alpha_big = alpha(big_idx) - old_alpha_big;
const T delta_alpha_small = alpha(small_idx) - old_alpha_small;
for(long k = 0; k < df.nr(); ++k)
df(k) += Q(big_idx,k)*delta_alpha_big + Q(small_idx,k)*delta_alpha_small;
}
}
return iter+1;
}
// ----------------------------------------------------------------------------------------
......
......@@ -15,11 +15,12 @@ namespace dlib
typename EXP2,
typename T, long NR, long NC, typename MM, typename L
>
void solve_qp_using_smo (
unsigned long solve_qp_using_smo (
const matrix_exp<EXP1>& Q,
const matrix_exp<EXP2>& b,
matrix<T,NR,NC,MM,L>& alpha,
T eps
T eps,
unsigned long max_iter
);
/*!
requires
......@@ -30,6 +31,7 @@ namespace dlib
- alpha.size() > 0
- min(alpha) >= 0
- eps > 0
- max_iter > 0
ensures
- Let C == sum(alpha) (i.e. C is the sum of the alpha values you
supply to this function)
......@@ -44,6 +46,9 @@ namespace dlib
and it works on the problem until the KKT violation is less than eps.
So eps controls how accurate the solution is and smaller values result
in better solutions.
- At most max_iter iterations of optimization will be performed.
- returns the number of iterations performed. If this method fails to
converge to eps accuracy then the number returned will be max_iter+1.
!*/
// ----------------------------------------------------------------------------------------
......
......@@ -157,7 +157,8 @@ namespace
alpha = C/alpha.size();
x = alpha;
solve_qp_using_smo(test.Q, test.b, alpha, 0.00001);
const unsigned long max_iter = 100000;
solve_qp_using_smo(test.Q, test.b, alpha, 0.00001, max_iter);
DLIB_TEST_MSG(abs(sum(alpha) - C) < 1e-13, abs(sum(alpha) - C) );
dlog << LTRACE << "alpha: " << alpha;
dlog << LINFO << "SMO: true objective: "<< 0.5*trans(alpha)*test.Q*alpha - trans(alpha)*test.b;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment