Added a max iteration parameter to the solver. Also changed it slightly

to be more robust to numerically difficult problems. --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403476

Added a max iteration parameter to the solver. Also changed it slightly
to be more robust to numerically difficult problems. --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403476
64c4f6df · Davis King · b30a275a · 64c4f6df · 64c4f6df · 64c4f6df
Commit 64c4f6df authored Feb 19, 2010 by Davis King
3 changed files
--- a/dlib/optimization/optimization_solve_qp_using_smo.h
+++ b/dlib/optimization/optimization_solve_qp_using_smo.h
@@ -74,11 +74,12 @@ namespace dlib
        typename EXP2,
        typename T, long NR, long NC, typename MM, typename L
        >
-    void solve_qp_using_smo ( 
+    unsigned long solve_qp_using_smo ( 
        const matrix_exp<EXP1>& Q,
        const matrix_exp<EXP2>& b,
        matrix<T,NR,NC,MM,L>& alpha,
-        T eps
+        T eps,
+        unsigned long max_iter
    )
    {
        // make sure requires clause is not broken
@@ -89,7 +90,8 @@ namespace dlib
                     b.size() == Q.nr() &&
                     alpha.size() > 0 &&
                     min(alpha) >= 0 &&
-                     eps > 0,
+                     eps > 0 &&
+                     max_iter > 0,
                     "\t void solve_qp_using_smo()"
                     << "\n\t Invalid arguments were given to this function"
                     << "\n\t Q.nr():               " << Q.nr()
@@ -101,6 +103,7 @@ namespace dlib
                     << "\n\t Q.nr():               " << Q.nr() 
                     << "\n\t min(alpha):           " << min(alpha) 
                     << "\n\t eps:                  " << eps 
+                     << "\n\t max_iter:             " << max_iter 
        );

        // Compute f'(alpha) (i.e. the gradient of f(alpha)) for the current alpha.  
@@ -108,7 +111,8 @@ namespace dlib

        const T tau = 1000*std::numeric_limits<T>::epsilon();

-        while (true)
+        unsigned long iter = 0;
+        for (; iter < max_iter; ++iter)
        {
            // Find the two elements of df that satisfy the following:
            //    - small_idx == index_of_min(df)
@@ -163,13 +167,26 @@ namespace dlib
                alpha(small_idx) = old_alpha_big + old_alpha_small;
            }

-            // Now update the gradient. We will perform the equivalent of: df = Q*alpha - b;
-            const T delta_alpha_big   = alpha(big_idx) - old_alpha_big;
-            const T delta_alpha_small = alpha(small_idx) - old_alpha_small;

-            for(long k = 0; k < df.nr(); ++k)
-                df(k) += Q(big_idx,k)*delta_alpha_big + Q(small_idx,k)*delta_alpha_small;
+            if ((iter%(max_iter/10)) == (max_iter/10 -1))
+            {
+                // Perform this form of the update every so often because doing so can help
+                // avoid the buildup of numerical errors you get with the alternate update
+                // below.
+                df = Q*alpha - b;
+            }
+            else
+            {
+                // Now update the gradient. We will perform the equivalent of: df = Q*alpha - b;
+                const T delta_alpha_big   = alpha(big_idx) - old_alpha_big;
+                const T delta_alpha_small = alpha(small_idx) - old_alpha_small;
+
+                for(long k = 0; k < df.nr(); ++k)
+                    df(k) += Q(big_idx,k)*delta_alpha_big + Q(small_idx,k)*delta_alpha_small;
+            }
        }
+
+        return iter+1;
    }

 // ----------------------------------------------------------------------------------------

--- a/dlib/optimization/optimization_solve_qp_using_smo_abstract.h
+++ b/dlib/optimization/optimization_solve_qp_using_smo_abstract.h
@@ -15,11 +15,12 @@ namespace dlib
        typename EXP2,
        typename T, long NR, long NC, typename MM, typename L
        >
-    void solve_qp_using_smo ( 
+    unsigned long solve_qp_using_smo ( 
        const matrix_exp<EXP1>& Q,
        const matrix_exp<EXP2>& b,
        matrix<T,NR,NC,MM,L>& alpha,
-        T eps
+        T eps,
+        unsigned long max_iter
    );
    /*!
        requires
@@ -30,6 +31,7 @@ namespace dlib
            - alpha.size() > 0
            - min(alpha) >= 0
            - eps > 0
+            - max_iter > 0
        ensures
            - Let C == sum(alpha) (i.e. C is the sum of the alpha values you 
              supply to this function)
@@ -44,6 +46,9 @@ namespace dlib
              and it works on the problem until the KKT violation is less than eps.  
              So eps controls how accurate the solution is and smaller values result 
              in better solutions.
+            - At most max_iter iterations of optimization will be performed.  
+            - returns the number of iterations performed.  If this method fails to
+              converge to eps accuracy then the number returned will be max_iter+1.
    !*/

 // ----------------------------------------------------------------------------------------

--- a/dlib/test/opt_qp_solver.cpp
+++ b/dlib/test/opt_qp_solver.cpp
@@ -157,7 +157,8 @@ namespace
            alpha = C/alpha.size();
            x = alpha;

-            solve_qp_using_smo(test.Q, test.b, alpha, 0.00001);
+            const unsigned long max_iter = 100000;
+            solve_qp_using_smo(test.Q, test.b, alpha, 0.00001, max_iter);
            DLIB_TEST_MSG(abs(sum(alpha) - C) < 1e-13, abs(sum(alpha) - C) );
            dlog << LTRACE << "alpha: " << alpha;
            dlog << LINFO << "SMO: true objective: "<< 0.5*trans(alpha)*test.Q*alpha - trans(alpha)*test.b;