Added solve_qp_box_constrained_blockdiag()

daa2adbd · Davis King · 096ab3c8 · daa2adbd · daa2adbd · daa2adbd
Commit daa2adbd authored Mar 20, 2017 by Davis King
3 changed files
--- a/dlib/optimization/optimization_solve_qp_using_smo.h
+++ b/dlib/optimization/optimization_solve_qp_using_smo.h
--- a/dlib/optimization/optimization_solve_qp_using_smo_abstract.h
+++ b/dlib/optimization/optimization_solve_qp_using_smo_abstract.h
@@ -4,6 +4,8 @@
 #ifdef DLIB_OPTIMIZATION_SOLVE_QP_UsING_SMO_ABSTRACT_Hh_

 #include "../matrix.h"
+#include <map>
+#include "../unordered_pair.h"

 namespace dlib
 {
@@ -162,6 +164,74 @@ namespace dlib
              converge to eps accuracy then the number returned will be max_iter+1.
    !*/

+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T, long NR, long NC, typename MM, typename L
+        >
+    unsigned long solve_qp_box_constrained_blockdiag ( 
+        const std::vector<matrix<T,NR,NR,MM,L>>& Q_blocks,
+        const std::vector<matrix<T,NR,NC,MM,L>>& bs,
+        const std::map<unordered_pair<size_t>, matrix<T,NR,NC,MM,L>>& Q_offdiag,
+        std::vector<matrix<T,NR,NC,MM,L>>& alphas,
+        const std::vector<matrix<T,NR,NC,MM,L>>& lowers,
+        const std::vector<matrix<T,NR,NC,MM,L>>& uppers,
+        T eps,
+        unsigned long max_iter
+    );
+    /*!
+        requires
+            - Q_blocks.size() > 0
+            - Q_blocks.size() == bs.size() == alphas.size() == lowers.size() == uppers.size()
+            - All the matrices in Q_blocks have the same dimensions.  Moreover, they are square
+              matrices.
+            - All the matrices in bs, Q_offdiag, alphas, lowers, and uppers have the same
+              dimensions.  Moreover, they are all column vectors.
+            - Q_blocks[0].nr() == alphas[0].size()
+              (i.e. the dimensionality of the column vectors in alphas must match the
+              dimensionality of the square matrices in Q_blocks.)
+            - for all valid i:
+                - 0 <= min(alphas[i]-lowers[i])
+                - 0 <= max(uppers[i]-alphas[i])
+            - eps > 0
+            - max_iter > 0
+        ensures
+            - This function solves the same QP as solve_qp_box_constrained(), except it is
+              optimized for the case where the Q matrix has a certain sparsity structure.
+              To be precise:
+                - Let Q1 be a block diagonal matrix with the elements of Q_blocks placed
+                  along its diagonal, and in the order contained in Q_blocks.  
+                - Let Q2 be a matrix with the same size as Q1, except instead of being block diagonal, it
+                  is block structured into Q_blocks.nr() by Q_blocks.nc() blocks.  If we let (r,c) be the
+                  coordinate of each block then each block contains the matrix
+                  diagm(Q_offdiag[make_unordered_pair(r,c)]) or the zero matrix if Q_offdiag has no entry
+                  for the coordinate (r,c).
+                - Let Q == Q1+Q2
+                - Let b == the concatenation of all the vectors in bs into one big vector.
+                - Let alpha == the concatenation of all the vectors in alphas into one big vector.
+                - Let lower == the concatenation of all the vectors in lowers into one big vector.
+                - Let upper == the concatenation of all the vectors in uppers into one big vector.
+                - Then this function solves the following quadratic program:
+                    Minimize: f(alpha) == 0.5*trans(alpha)*Q*alpha + trans(b)*alpha 
+                    subject to the following box constraints on alpha:
+                        - 0 <= min(alpha-lower)
+                        - 0 <= max(upper-alpha)
+                    Where f is convex.  This means that Q should be positive-semidefinite.
+                - More specifically, this function is identical to
+                  solve_qp_box_constrained(Q, b, alpha, lower, upper, eps, max_iter),
+                  except that it runs faster since it avoids unnecessary computation by
+                  taking advantage of the sparsity structure in the QP.
+            - The solution to the above QP will be stored in #alphas.
+            - This function uses a combination of a SMO algorithm along with Nesterov's
+              method as the main iteration of the solver.  It starts the algorithm with the
+              given alpha and it works on the problem until the derivative of f(alpha) is
+              smaller than eps for each element of alpha or the alpha value is at a box
+              constraint.  So eps controls how accurate the solution is and smaller values
+              result in better solutions.
+            - At most max_iter iterations of optimization will be performed.  
+            - returns the number of iterations performed.  If this method fails to
+              converge to eps accuracy then the number returned will be max_iter+1.
+    !*/
 // ----------------------------------------------------------------------------------------

    template <

--- a/dlib/test/opt_qp_solver.cpp
+++ b/dlib/test/opt_qp_solver.cpp
@@ -507,6 +507,81 @@ namespace
        DLIB_TEST(length(A*c1 - B*c2) < 4);
    }

+// ----------------------------------------------------------------------------------------
+
+    void test_solve_qp_box_constrained_blockdiag()
+    {
+        dlib::rand rnd;
+        for (int iter = 0; iter < 50; ++iter)
+        {
+            print_spinner();
+
+            matrix<double> Q1, Q2;
+            matrix<double,0,1> b1, b2;
+
+            Q1 = randm(4,4,rnd); Q1 = Q1*trans(Q1);
+            Q2 = randm(4,4,rnd); Q2 = Q2*trans(Q2);
+            b1 = gaussian_randm(4,1, iter*2+0);
+            b2 = gaussian_randm(4,1, iter*2+1);
+
+            std::map<unordered_pair<size_t>, matrix<double,0,1>> offdiag;
+
+            if (rnd.get_random_gaussian() > 0)
+                offdiag[make_unordered_pair(0,0)] = randm(4,1,rnd);
+            if (rnd.get_random_gaussian() > 0)
+                offdiag[make_unordered_pair(1,0)] = randm(4,1,rnd);
+            if (rnd.get_random_gaussian() > 0)
+                offdiag[make_unordered_pair(1,1)] = randm(4,1,rnd);
+
+            std::vector<matrix<double>> Q_blocks = {Q1, Q2};
+            std::vector<matrix<double,0,1>> bs = {b1, b2};
+
+
+            // make the single big Q and b
+            matrix<double> Q = join_cols(join_rows(Q1, zeros_matrix(Q1)),
+                join_rows(zeros_matrix(Q2),Q2));
+            matrix<double,0,1> b = join_cols(b1,b2);
+            for (auto& p : offdiag)
+            {
+                long r = p.first.first;
+                long c = p.first.second;
+                set_subm(Q, 4*r,4*c, 4,4) += diagm(p.second);
+                if (c != r)
+                    set_subm(Q, 4*c,4*r, 4,4) += diagm(p.second);
+            }
+
+
+            matrix<double,0,1> alpha = zeros_matrix(b);
+            matrix<double,0,1> lower = -10000*ones_matrix(b);
+            matrix<double,0,1> upper = 10000*ones_matrix(b);
+
+            auto iters = solve_qp_box_constrained(Q, b, alpha, lower, upper, 1e-9, 10000);
+            dlog << LINFO << "iters: "<< iters;
+            dlog << LINFO << "alpha: " << trans(alpha);
+
+            dlog << LINFO;
+
+            std::vector<matrix<double,0,1>> alphas(2);
+            alphas[0] = zeros_matrix<double>(4,1); alphas[1] = zeros_matrix<double>(4,1);
+
+            lower = -10000*ones_matrix(alphas[0]);
+            upper = 10000*ones_matrix(alphas[0]);
+            std::vector<matrix<double,0,1>> lowers = {lower,lower}, uppers = {upper, upper};
+            auto iters2 = solve_qp_box_constrained_blockdiag(Q_blocks, bs, offdiag, alphas, lowers, uppers, 1e-9, 10000);
+            dlog << LINFO << "iters2: "<< iters2;
+            dlog << LINFO << "alpha: " << trans(join_cols(alphas[0],alphas[1]));
+
+            dlog << LINFO << "obj1: "<< 0.5*trans(alpha)*Q*alpha + trans(b)*alpha;
+            dlog << LINFO << "obj2: "<< 0.5*trans(join_cols(alphas[0],alphas[1]))*Q*join_cols(alphas[0],alphas[1]) + trans(b)*join_cols(alphas[0],alphas[1]);
+            dlog << LINFO << "obj1-obj2: "<<(0.5*trans(alpha)*Q*alpha + trans(b)*alpha) - (0.5*trans(join_cols(alphas[0],alphas[1]))*Q*join_cols(alphas[0],alphas[1]) + trans(b)*join_cols(alphas[0],alphas[1]));
+
+            DLIB_TEST_MSG(max(abs(alpha - join_cols(alphas[0], alphas[1]))) < 1e-6, max(abs(alpha - join_cols(alphas[0], alphas[1]))));
+
+            DLIB_TEST(iters == iters2);
+
+        }
+    }
+
 // ----------------------------------------------------------------------------------------

    class opt_qp_solver_tester : public tester
@@ -566,6 +641,7 @@ namespace


            test_find_gap_between_convex_hulls();
+            test_solve_qp_box_constrained_blockdiag();
        }

        double do_the_test (