Commit cb677742 authored by Davis King's avatar Davis King

A test to see more info about what's going weird on appveyor.

parent 23332d68
...@@ -272,8 +272,241 @@ namespace ...@@ -272,8 +272,241 @@ namespace
DLIB_TEST_MSG(abs(computed_obj - true_obj) < 1e-8, abs(computed_obj - true_obj)); DLIB_TEST_MSG(abs(computed_obj - true_obj) < 1e-8, abs(computed_obj - true_obj));
} }
template <
typename EXP1,
typename EXP2,
typename EXP3,
typename EXP4,
typename T, long NR, long NC, typename MM, typename L,
long NR2, long NC2
>
unsigned long solve_qp4_using_smo_local (
const matrix_exp<EXP1>& A,
const matrix_exp<EXP2>& Q,
const matrix_exp<EXP3>& b,
const matrix_exp<EXP4>& d,
matrix<T,NR,NC,MM,L>& alpha,
matrix<T,NR2,NC2,MM,L>& lambda,
T eps,
unsigned long max_iter,
T max_lambda = std::numeric_limits<T>::infinity()
)
{
// make sure requires clause is not broken
DLIB_ASSERT(A.nc() == alpha.size() &&
Q.nr() == Q.nc() &&
is_col_vector(b) &&
is_col_vector(alpha) &&
b.size() == alpha.size() &&
b.size() == Q.nr() &&
alpha.size() > 0 &&
min(alpha) >= 0 &&
eps > 0 &&
max_iter > 0,
"\t void solve_qp4_using_smo()"
<< "\n\t Invalid arguments were given to this function"
<< "\n\t A.nc(): " << A.nc()
<< "\n\t Q.nr(): " << Q.nr()
<< "\n\t Q.nc(): " << Q.nc()
<< "\n\t is_col_vector(b): " << is_col_vector(b)
<< "\n\t is_col_vector(alpha): " << is_col_vector(alpha)
<< "\n\t b.size(): " << b.size()
<< "\n\t alpha.size(): " << alpha.size()
<< "\n\t Q.nr(): " << Q.nr()
<< "\n\t min(alpha): " << min(alpha)
<< "\n\t eps: " << eps
<< "\n\t max_iter: " << max_iter
);
DLIB_ASSERT(is_col_vector(d) == true &&
max_lambda >= 0 &&
d.size() == A.nr(),
"\t void solve_qp4_using_smo()"
<< "\n\t Invalid arguments were given to this function"
<< "\n\t A.nr(): " << A.nr()
<< "\n\t d.size(): " << d.size()
<< "\n\t max_lambda: " << max_lambda
);
const T C = sum(alpha);
/*
For this optimization problem, it is the case that the optimal
value of lambda is given by a simple closed form expression if we
know the optimal alpha. So what we will do is to just optimize
alpha and every now and then we will update lambda with its optimal
value. Therefore, we use essentially the same method as the
solve_qp_using_smo() routine.
*/
const bool d_is_zero = d==zeros_matrix(d);
// compute optimal lambda for current alpha
if (d_is_zero)
lambda = A*alpha;
else
lambda = A*alpha + d;
lambda = clamp(lambda, 0, max_lambda);
// Compute f'(alpha) (i.e. the gradient of f(alpha) with respect to alpha) for the current alpha.
matrix<T,NR,NC,MM,L> df = Q*alpha - b - trans(A)*lambda;
const T tau = 1000*std::numeric_limits<T>::epsilon();
T big, little;
unsigned long iter = 0;
for (; iter < max_iter; ++iter)
{
// Find the two elements of df that satisfy the following:
// - little_idx == index_of_min(df)
// - big_idx == the index of the largest element in df such that alpha(big_idx) > 0
// These two indices will tell us which two alpha values are most in violation of the KKT
// optimality conditions.
big = -std::numeric_limits<T>::max();
long big_idx = 0;
little = std::numeric_limits<T>::max();
long little_idx = 0;
for (long i = 0; i < df.nr(); ++i)
{
if (df(i) > big && alpha(i) > 0)
{
big = df(i);
big_idx = i;
}
if (df(i) < little)
{
little = df(i);
little_idx = i;
}
}
// Check how big the duality gap is and stop when it goes below eps.
// The duality gap is the gap between the objective value of the function
// we are optimizing and the value of its primal form. This value is always
// greater than or equal to the distance to the optimum solution so it is a
// good way to decide if we should stop.
if (trans(alpha)*df - C*little < eps)
{
// compute optimal lambda and recheck the duality gap to make
// sure we have really converged.
if (d_is_zero)
lambda = A*alpha;
else
lambda = A*alpha + d;
lambda = clamp(lambda, 0, max_lambda);
df = Q*alpha - b - trans(A)*lambda;
if (trans(alpha)*df - C*min(df) < eps)
break;
else
continue;
}
// Save these values, we will need them later.
const T old_alpha_big = alpha(big_idx);
const T old_alpha_little = alpha(little_idx);
// Now optimize the two variables we just picked.
T quad_coef = Q(big_idx,big_idx) + Q(little_idx,little_idx) - 2*Q(big_idx, little_idx);
if (quad_coef <= tau)
quad_coef = tau;
const T delta = (big - little)/quad_coef;
alpha(big_idx) -= delta;
alpha(little_idx) += delta;
// Make sure alpha stays feasible. That is, make sure the updated alpha doesn't
// violate the non-negativity constraint.
if (alpha(big_idx) < 0)
{
// Since an alpha can't be negative we will just set it to 0 and shift all the
// weight to the other alpha.
alpha(big_idx) = 0;
alpha(little_idx) = old_alpha_big + old_alpha_little;
}
// Every 300 iterations
if ((iter%300) == 299)
{
// compute the optimal lambda for the current alpha
if (d_is_zero)
lambda = A*alpha;
else
lambda = A*alpha + d;
lambda = clamp(lambda, 0, max_lambda);
// Perform this form of the update every so often because doing so can help
// avoid the buildup of numerical errors you get with the alternate update
// below.
df = Q*alpha - b - trans(A)*lambda;
}
else
{
// Now update the gradient. We will perform the equivalent of: df = Q*alpha - b;
const T delta_alpha_big = alpha(big_idx) - old_alpha_big;
const T delta_alpha_little = alpha(little_idx) - old_alpha_little;
for(long k = 0; k < df.nr(); ++k)
df(k) += Q(big_idx,k)*delta_alpha_big + Q(little_idx,k)*delta_alpha_little;;
}
}
using namespace std;
cout << "SMO: " << endl;
cout << " duality gap: "<< trans(alpha)*df - C*min(df) << endl;
cout << " KKT gap: "<< big-little << endl;
cout << " iter: "<< iter+1 << endl;
cout << " eps: "<< eps << endl;
return iter+1;
}
void test_qp4_test6_local()
{
matrix<double> A(3,3);
A = 1,2,4,
3,1,6,
6,7,-2;
matrix<double,0,1> b(3);
b = -1,
-2,
-3;
const double C = 2;
matrix<double,0,1> alpha(3), d(3), lambda;
alpha = C/2, C/2, 0;
unsigned long iters = solve_qp4_using_smo_local(A, tmp(trans(A)*A), b, d, alpha, lambda, 1e-9, 3000);
matrix<double,0,1> w = lowerbound(-A*alpha, 0);
dlog << LINFO << "***** LOCAL **************************************************";
dlog << LINFO << "alpha: " << trans(alpha);
dlog << LINFO << "lambda: " << trans(lambda);
dlog << LINFO << "w: " << trans(w);
const double computed_obj = compute_objective_value(w,A,b,C);
w = 0, 0, 0;
const double true_obj = compute_objective_value(w,A,b,C);
dlog << LINFO << "computed obj: "<< computed_obj;
dlog << LINFO << "with true w obj: "<< true_obj;
DLIB_TEST_MSG(abs(computed_obj - true_obj) < 1e-8,
"computed_obj: "<< computed_obj << " true_obj: " << true_obj << " delta: "<< abs(computed_obj - true_obj)
<< " iters: " << iters
<< "\n alpha: " << trans(alpha)
<< " lambda: " << trans(lambda)
);
}
void test_qp4_test6() void test_qp4_test6()
{ {
test_qp4_test6_local();
matrix<double> A(3,3); matrix<double> A(3,3);
A = 1,2,4, A = 1,2,4,
3,1,6, 3,1,6,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment