A test to see more info about what's going weird on appveyor.

cb677742 · Davis King · 23332d68 · cb677742
Commit cb677742 authored May 16, 2017 by Davis King
Hide whitespace changes
Inline Side-by-side

Showing with 233 additions and 0 deletions

opt_qp_solver.cpp dlib/test/opt_qp_solver.cpp +233 -0

No files found.
--- a/dlib/test/opt_qp_solver.cpp
+++ b/dlib/test/opt_qp_solver.cpp
@@ -272,8 +272,241 @@ namespace
        DLIB_TEST_MSG(abs(computed_obj - true_obj) < 1e-8, abs(computed_obj - true_obj));
    }
+    template <
+        typename EXP1,
+        typename EXP2,
+        typename EXP3,
+        typename EXP4,
+        typename T, long NR, long NC, typename MM, typename L,
+        long NR2, long NC2
+        >
+    unsigned long solve_qp4_using_smo_local ( 
+        const matrix_exp<EXP1>& A,
+        const matrix_exp<EXP2>& Q,
+        const matrix_exp<EXP3>& b,
+        const matrix_exp<EXP4>& d,
+        matrix<T,NR,NC,MM,L>& alpha,
+        matrix<T,NR2,NC2,MM,L>& lambda,
+        T eps,
+        unsigned long max_iter,
+        T max_lambda = std::numeric_limits<T>::infinity()
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(A.nc() == alpha.size() &&
+                     Q.nr() == Q.nc() &&
+                     is_col_vector(b) &&
+                     is_col_vector(alpha) &&
+                     b.size() == alpha.size() &&
+                     b.size() == Q.nr() &&
+                     alpha.size() > 0 &&
+                     min(alpha) >= 0 &&
+                     eps > 0 &&
+                     max_iter > 0,
+                     "\t void solve_qp4_using_smo()"
+                     << "\n\t Invalid arguments were given to this function"
+                     << "\n\t A.nc():               " << A.nc()
+                     << "\n\t Q.nr():               " << Q.nr()
+                     << "\n\t Q.nc():               " << Q.nc()
+                     << "\n\t is_col_vector(b):     " << is_col_vector(b)
+                     << "\n\t is_col_vector(alpha): " << is_col_vector(alpha)
+                     << "\n\t b.size():             " << b.size() 
+                     << "\n\t alpha.size():         " << alpha.size() 
+                     << "\n\t Q.nr():               " << Q.nr() 
+                     << "\n\t min(alpha):           " << min(alpha) 
+                     << "\n\t eps:                  " << eps 
+                     << "\n\t max_iter:             " << max_iter 
+        );
+        DLIB_ASSERT(is_col_vector(d) == true &&
+                     max_lambda >= 0 &&
+                     d.size() == A.nr(),
+                     "\t void solve_qp4_using_smo()"
+                     << "\n\t Invalid arguments were given to this function"
+                     << "\n\t A.nr():     " << A.nr()
+                     << "\n\t d.size():   " << d.size()
+                     << "\n\t max_lambda: " << max_lambda
+        );
+        const T C = sum(alpha);
+        /*
+            For this optimization problem, it is the case that the optimal
+            value of lambda is given by a simple closed form expression if we
+            know the optimal alpha.  So what we will do is to just optimize 
+            alpha and every now and then we will update lambda with its optimal
+            value.  Therefore, we use essentially the same method as the
+            solve_qp_using_smo() routine.  
+        */
+        const bool d_is_zero = d==zeros_matrix(d);
+        // compute optimal lambda for current alpha
+        if (d_is_zero)
+            lambda = A*alpha;
+        else
+            lambda = A*alpha + d;
+        lambda = clamp(lambda, 0, max_lambda);
+        // Compute f'(alpha) (i.e. the gradient of f(alpha) with respect to alpha) for the current alpha.  
+        matrix<T,NR,NC,MM,L> df = Q*alpha - b - trans(A)*lambda;
+        const T tau = 1000*std::numeric_limits<T>::epsilon();
+        T big, little;
+        unsigned long iter = 0;
+        for (; iter < max_iter; ++iter)
+        {
+            // Find the two elements of df that satisfy the following:
+            //    - little_idx == index_of_min(df)
+            //    - big_idx   == the index of the largest element in df such that alpha(big_idx) > 0
+            // These two indices will tell us which two alpha values are most in violation of the KKT 
+            // optimality conditions.  
+            big = -std::numeric_limits<T>::max();
+            long big_idx = 0;
+            little = std::numeric_limits<T>::max();
+            long little_idx = 0;
+            for (long i = 0; i < df.nr(); ++i)
+            {
+                if (df(i) > big && alpha(i) > 0)
+                {
+                    big = df(i);
+                    big_idx = i;
+                }
+                if (df(i) < little)
+                {
+                    little = df(i);
+                    little_idx = i;
+                }
+            }
+            // Check how big the duality gap is and stop when it goes below eps.  
+            // The duality gap is the gap between the objective value of the function
+            // we are optimizing and the value of its primal form.  This value is always 
+            // greater than or equal to the distance to the optimum solution so it is a 
+            // good way to decide if we should stop.   
+            if (trans(alpha)*df - C*little < eps)
+            {
+                // compute optimal lambda and recheck the duality gap to make
+                // sure we have really converged.
+                if (d_is_zero)
+                    lambda = A*alpha;
+                else
+                    lambda = A*alpha + d;
+                lambda = clamp(lambda, 0, max_lambda);
+                df = Q*alpha - b - trans(A)*lambda;
+                if (trans(alpha)*df - C*min(df) < eps)
+                    break;
+                else
+                    continue;
+            }
+            // Save these values, we will need them later.
+            const T old_alpha_big = alpha(big_idx);
+            const T old_alpha_little = alpha(little_idx);
+            // Now optimize the two variables we just picked. 
+            T quad_coef = Q(big_idx,big_idx) + Q(little_idx,little_idx) - 2*Q(big_idx, little_idx);
+            if (quad_coef <= tau)
+                quad_coef = tau;
+            const T delta = (big - little)/quad_coef;
+            alpha(big_idx) -= delta;
+            alpha(little_idx) += delta;
+            // Make sure alpha stays feasible.  That is, make sure the updated alpha doesn't
+            // violate the non-negativity constraint.  
+            if (alpha(big_idx) < 0)
+            {
+                // Since an alpha can't be negative we will just set it to 0 and shift all the
+                // weight to the other alpha.
+                alpha(big_idx) = 0;
+                alpha(little_idx) = old_alpha_big + old_alpha_little;
+            }
+            // Every 300 iterations
+            if ((iter%300) == 299)
+            {
+                // compute the optimal lambda for the current alpha
+                if (d_is_zero)
+                    lambda = A*alpha;
+                else
+                    lambda = A*alpha + d;
+                lambda = clamp(lambda, 0, max_lambda);
+                // Perform this form of the update every so often because doing so can help
+                // avoid the buildup of numerical errors you get with the alternate update
+                // below.
+                df = Q*alpha - b - trans(A)*lambda;
+            }
+            else
+            {
+                // Now update the gradient. We will perform the equivalent of: df = Q*alpha - b;
+                const T delta_alpha_big   = alpha(big_idx) - old_alpha_big;
+                const T delta_alpha_little = alpha(little_idx) - old_alpha_little;
+                for(long k = 0; k < df.nr(); ++k)
+                    df(k) += Q(big_idx,k)*delta_alpha_big + Q(little_idx,k)*delta_alpha_little;;
+            }
+        }
+        using namespace std;
+        cout << "SMO: " << endl;
+        cout << "   duality gap: "<< trans(alpha)*df - C*min(df) << endl;
+        cout << "   KKT gap:     "<< big-little << endl;
+        cout << "   iter:        "<< iter+1 << endl;
+        cout << "   eps:         "<< eps << endl;
+        return iter+1;
+    }
+    void test_qp4_test6_local()
+    {
+        matrix<double> A(3,3);
+        A = 1,2,4,
+        3,1,6,
+        6,7,-2;
+        matrix<double,0,1> b(3);
+        b = -1,
+        -2,
+        -3;
+        const double C = 2;
+        matrix<double,0,1> alpha(3), d(3), lambda;
+        alpha = C/2, C/2, 0;
+        unsigned long iters = solve_qp4_using_smo_local(A, tmp(trans(A)*A), b, d, alpha, lambda, 1e-9, 3000);
+        matrix<double,0,1> w = lowerbound(-A*alpha, 0);
+        dlog << LINFO << "***** LOCAL **************************************************";
+        dlog << LINFO << "alpha: " << trans(alpha);
+        dlog << LINFO << "lambda: " << trans(lambda);
+        dlog << LINFO << "w:     " << trans(w);
+        const double computed_obj = compute_objective_value(w,A,b,C);
+        w = 0, 0, 0;
+        const double true_obj = compute_objective_value(w,A,b,C);
+        dlog << LINFO << "computed obj:      "<< computed_obj;
+        dlog << LINFO << "with true w obj:   "<< true_obj;
+        DLIB_TEST_MSG(abs(computed_obj - true_obj) < 1e-8, 
+            "computed_obj: "<< computed_obj << "  true_obj: " << true_obj << "  delta: "<<  abs(computed_obj - true_obj)
+            << "  iters: " << iters
+            << "\n  alpha: " << trans(alpha) 
+            << "   lambda: " << trans(lambda) 
+            );
+    }
    void test_qp4_test6()
    {
+        test_qp4_test6_local();
        matrix<double> A(3,3);
        A = 1,2,4,
        3,1,6,