Commit daa2adbd authored by Davis King's avatar Davis King

Added solve_qp_box_constrained_blockdiag()

parent 096ab3c8
......@@ -5,6 +5,8 @@
#include "optimization_solve_qp_using_smo_abstract.h"
#include "../matrix.h"
#include <map>
#include "../unordered_pair.h"
namespace dlib
{
......@@ -548,6 +550,254 @@ namespace dlib
return iter+1;
}
// ----------------------------------------------------------------------------------------
template <
typename T, long NR, long NC, typename MM, typename L
>
unsigned long solve_qp_box_constrained_blockdiag (
const std::vector<matrix<T,NR,NR,MM,L>>& Q_blocks,
const std::vector<matrix<T,NR,NC,MM,L>>& bs,
const std::map<unordered_pair<size_t>, matrix<T,NR,NC,MM,L>>& Q_offdiag,
std::vector<matrix<T,NR,NC,MM,L>>& alphas,
const std::vector<matrix<T,NR,NC,MM,L>>& lowers,
const std::vector<matrix<T,NR,NC,MM,L>>& uppers,
T eps,
unsigned long max_iter
)
{
// make sure requires clause is not broken
DLIB_CASSERT(Q_blocks.size() > 0);
DLIB_CASSERT(Q_blocks.size() == bs.size() &&
Q_blocks.size() == alphas.size() &&
Q_blocks.size() == lowers.size() &&
Q_blocks.size() == uppers.size(),
"Q_blocks.size(): "<< Q_blocks.size() << "\n" <<
"bs.size(): "<< bs.size() << "\n" <<
"alphas.size(): "<< alphas.size() << "\n" <<
"lowers.size(): "<< lowers.size() << "\n" <<
"uppers.size(): "<< uppers.size() << "\n"
);
for (auto& Q : Q_blocks)
{
DLIB_CASSERT(Q.nr() == Q.nc(), "All the matrices in Q_blocks have the same dimensions.");
DLIB_CASSERT(Q.nr() == Q_blocks[0].nr() && Q.nc() == Q_blocks[0].nc(), "All the matrices in Q_blocks have the same dimensions.");
}
#ifdef ENABLE_ASSERTS
for (size_t i = 0; i < alphas.size(); ++i)
{
DLIB_CASSERT(is_col_vector(bs[i]) && bs[i].size() == Q_blocks[0].nr(),
"is_col_vector(bs["<<i<<"]): " << is_col_vector(bs[i]) << "\n" <<
"bs["<<i<<"].size(): " << bs[i].size() << "\n" <<
"Q_blocks[0].nr(): " << Q_blocks[0].nr());
for (auto& Qoffdiag : Q_offdiag)
{
auto& Q_offdiag_element = Qoffdiag.second;
long r = Qoffdiag.first.first;
long c = Qoffdiag.first.second;
DLIB_CASSERT(is_col_vector(Q_offdiag_element) && Q_offdiag_element.size() == Q_blocks[0].nr(),
"is_col_vector(Q_offdiag["<<r<<","<<c<<"]): " << is_col_vector(Q_offdiag_element) << "\n" <<
"Q_offdiag["<<r<<","<<c<<"].size(): " << Q_offdiag_element.size() << "\n" <<
"Q_blocks[0].nr(): " << Q_blocks[0].nr());
}
DLIB_CASSERT(is_col_vector(alphas[i]) && alphas[i].size() == Q_blocks[0].nr(),
"is_col_vector(alphas["<<i<<"]): " << is_col_vector(alphas[i]) << "\n" <<
"alphas["<<i<<"].size(): " << alphas[i].size() << "\n" <<
"Q_blocks[0].nr(): " << Q_blocks[0].nr());
DLIB_CASSERT(is_col_vector(lowers[i]) && lowers[i].size() == Q_blocks[0].nr(),
"is_col_vector(lowers["<<i<<"]): " << is_col_vector(lowers[i]) << "\n" <<
"lowers["<<i<<"].size(): " << lowers[i].size() << "\n" <<
"Q_blocks[0].nr(): " << Q_blocks[0].nr());
DLIB_CASSERT(is_col_vector(uppers[i]) && uppers[i].size() == Q_blocks[0].nr(),
"is_col_vector(uppers["<<i<<"]): " << is_col_vector(uppers[i]) << "\n" <<
"uppers["<<i<<"].size(): " << uppers[i].size() << "\n" <<
"Q_blocks[0].nr(): " << Q_blocks[0].nr());
DLIB_CASSERT(0 <= min(alphas[i]-lowers[i]), "min(alphas["<<i<<"]-lowers["<<i<<"]): " << min(alphas[i]-lowers[i]));
DLIB_CASSERT(0 <= max(uppers[i]-alphas[i]), "max(uppers["<<i<<"]-alphas["<<i<<"]): " << max(uppers[i]-alphas[i]));
}
DLIB_CASSERT(eps > 0 && max_iter > 0, "eps: " << eps << "\nmax_iter: "<< max_iter);
#endif // ENABLE_ASSERTS
// Compute f'(alpha) (i.e. the gradient of f(alpha)) for the current alpha.
std::vector<matrix<T,NR,NC,MM,L>> df;// = Q*alpha + b;
auto compute_df = [&]()
{
df.resize(Q_blocks.size());
for (size_t i = 0; i < df.size(); ++i)
df[i] = Q_blocks[i]*alphas[i] + bs[i];
// Don't forget to include the Q_offdiag terms in the computation
for (auto& p : Q_offdiag)
{
long r = p.first.first;
long c = p.first.second;
df[r] += pointwise_multiply(p.second, alphas[c]);
if (r != c)
df[c] += pointwise_multiply(p.second, alphas[r]);
}
};
compute_df();
std::vector<matrix<T,NR,NC,MM,L>> Q_diag, Q_ggd;
std::vector<matrix<T,NR,NC,MM,L>> QQ;// = reciprocal_max(diag(Q));
QQ.resize(Q_blocks.size());
Q_diag.resize(Q_blocks.size());
Q_ggd.resize(Q_blocks.size());
// We need to get an upper bound on the Lipschitz constant for this QP. Since that
// is just the max eigenvalue of Q we can do it using Gershgorin disks.
//const T lipschitz_bound = max(diag(Q) + (sum_cols(abs(Q)) - abs(diag(Q))));
for (size_t i = 0; i < QQ.size(); ++i)
{
auto f = Q_offdiag.find(make_unordered_pair(i,i));
if (f != Q_offdiag.end())
Q_diag[i] = diag(Q_blocks[i]) + f->second;
else
Q_diag[i] = diag(Q_blocks[i]);
QQ[i] = reciprocal_max(Q_diag[i]);
Q_ggd[i] = Q_diag[i] + (sum_cols(abs(Q_blocks[i]))-abs(diag(Q_blocks[i])));
}
for (auto& p : Q_offdiag)
{
long r = p.first.first;
long c = p.first.second;
if (r != c)
{
Q_ggd[r] += abs(p.second);
Q_ggd[c] += abs(p.second);
}
}
T lipschitz_bound = -std::numeric_limits<T>::infinity();
for (auto& x : Q_ggd)
lipschitz_bound = std::max(lipschitz_bound, max(x));
const long num_variables = alphas.size()*alphas[0].size();
// First we use a coordinate descent method to initialize alpha.
double max_df = 0;
for (long iter = 0; iter < num_variables*2; ++iter)
{
max_df = 0;
long best_r =0;
size_t best_r2 =0;
// find the best alpha to optimize.
for (size_t r2 = 0; r2 < alphas.size(); ++r2)
{
auto& alpha = alphas[r2];
auto& df_ = df[r2];
auto& lower = lowers[r2];
auto& upper = uppers[r2];
for (long r = 0; r < alpha.nr(); ++r)
{
if (alpha(r) <= lower(r) && df_(r) > 0)
;//alpha(r) = lower(r);
else if (alpha(r) >= upper(r) && df_(r) < 0)
;//alpha(r) = upper(r);
else if (std::abs(df_(r)) > max_df)
{
best_r = r;
best_r2 = r2;
max_df = std::abs(df_(r));
}
}
}
// now optimize alphas[best_r2](best_r)
const long r = best_r;
auto& alpha = alphas[best_r2];
auto& lower = lowers[best_r2];
auto& upper = uppers[best_r2];
auto& df_ = df[best_r2];
const T old_alpha = alpha(r);
alpha(r) = -(df_(r)-Q_diag[best_r2](r)*alpha(r))*QQ[best_r2](r);
if (alpha(r) < lower(r))
alpha(r) = lower(r);
else if (alpha(r) > upper(r))
alpha(r) = upper(r);
const T delta = old_alpha-alpha(r);
// Now update the gradient. We will perform the equivalent of: df = Q*alpha +
// b; except we only need to compute one column of the matrix multiply because
// only one element of alpha changed.
auto& Q = Q_blocks[best_r2];
for(long k = 0; k < df_.nr(); ++k)
df_(k) -= Q(r,k)*delta;
for(size_t j = 0; j < Q_blocks.size(); ++j)
{
auto f = Q_offdiag.find(make_unordered_pair(best_r2, j));
if (f != Q_offdiag.end())
df[j](r) -= f->second(r)*delta;
}
}
std::vector<matrix<T,NR,NC,MM,L>> v(alphas), v_old(alphas.size());
for (size_t i = 0; i < v_old.size(); ++i)
v_old[i].set_size(alphas.size());
double lambda = 0;
unsigned long iter;
// Now do the main iteration block of this solver. The coordinate descent method
// we used above can improve the objective rapidly in the beginning. However,
// Nesterov's method has more rapid convergence once it gets going so this is what
// we use for the main iteration.
for (iter = 0; iter < max_iter; ++iter)
{
const double next_lambda = (1 + std::sqrt(1+4*lambda*lambda))/2;
const double gamma = (1-lambda)/next_lambda;
lambda = next_lambda;
v_old.swap(v);
//df = Q*alpha + b;
compute_df();
// now take a projected gradient step using Nesterov's method.
for (size_t j = 0; j < alphas.size(); ++j)
{
v[j] = clamp(alphas[j] - 1.0/lipschitz_bound * df[j], lowers[j], uppers[j]);
alphas[j] = clamp((1-gamma)*v[j] + gamma*v_old[j], lowers[j], uppers[j]);
}
// check for convergence every 10 iterations
if (iter%10 == 0)
{
max_df = 0;
for (size_t r2 = 0; r2 < alphas.size(); ++r2)
{
auto& alpha = alphas[r2];
auto& df_ = df[r2];
auto& lower = lowers[r2];
auto& upper = uppers[r2];
for (long r = 0; r < alpha.nr(); ++r)
{
if (alpha(r) <= lower(r) && df_(r) > 0)
;//alpha(r) = lower(r);
else if (alpha(r) >= upper(r) && df_(r) < 0)
;//alpha(r) = upper(r);
else if (std::abs(df_(r)) > max_df)
max_df = std::abs(df_(r));
}
}
if (max_df < eps)
break;
}
}
return iter+1;
}
// ----------------------------------------------------------------------------------------
template <
......
......@@ -4,6 +4,8 @@
#ifdef DLIB_OPTIMIZATION_SOLVE_QP_UsING_SMO_ABSTRACT_Hh_
#include "../matrix.h"
#include <map>
#include "../unordered_pair.h"
namespace dlib
{
......@@ -162,6 +164,74 @@ namespace dlib
converge to eps accuracy then the number returned will be max_iter+1.
!*/
// ----------------------------------------------------------------------------------------
template <
typename T, long NR, long NC, typename MM, typename L
>
unsigned long solve_qp_box_constrained_blockdiag (
const std::vector<matrix<T,NR,NR,MM,L>>& Q_blocks,
const std::vector<matrix<T,NR,NC,MM,L>>& bs,
const std::map<unordered_pair<size_t>, matrix<T,NR,NC,MM,L>>& Q_offdiag,
std::vector<matrix<T,NR,NC,MM,L>>& alphas,
const std::vector<matrix<T,NR,NC,MM,L>>& lowers,
const std::vector<matrix<T,NR,NC,MM,L>>& uppers,
T eps,
unsigned long max_iter
);
/*!
requires
- Q_blocks.size() > 0
- Q_blocks.size() == bs.size() == alphas.size() == lowers.size() == uppers.size()
- All the matrices in Q_blocks have the same dimensions. Moreover, they are square
matrices.
- All the matrices in bs, Q_offdiag, alphas, lowers, and uppers have the same
dimensions. Moreover, they are all column vectors.
- Q_blocks[0].nr() == alphas[0].size()
(i.e. the dimensionality of the column vectors in alphas must match the
dimensionality of the square matrices in Q_blocks.)
- for all valid i:
- 0 <= min(alphas[i]-lowers[i])
- 0 <= max(uppers[i]-alphas[i])
- eps > 0
- max_iter > 0
ensures
- This function solves the same QP as solve_qp_box_constrained(), except it is
optimized for the case where the Q matrix has a certain sparsity structure.
To be precise:
- Let Q1 be a block diagonal matrix with the elements of Q_blocks placed
along its diagonal, and in the order contained in Q_blocks.
- Let Q2 be a matrix with the same size as Q1, except instead of being block diagonal, it
is block structured into Q_blocks.nr() by Q_blocks.nc() blocks. If we let (r,c) be the
coordinate of each block then each block contains the matrix
diagm(Q_offdiag[make_unordered_pair(r,c)]) or the zero matrix if Q_offdiag has no entry
for the coordinate (r,c).
- Let Q == Q1+Q2
- Let b == the concatenation of all the vectors in bs into one big vector.
- Let alpha == the concatenation of all the vectors in alphas into one big vector.
- Let lower == the concatenation of all the vectors in lowers into one big vector.
- Let upper == the concatenation of all the vectors in uppers into one big vector.
- Then this function solves the following quadratic program:
Minimize: f(alpha) == 0.5*trans(alpha)*Q*alpha + trans(b)*alpha
subject to the following box constraints on alpha:
- 0 <= min(alpha-lower)
- 0 <= max(upper-alpha)
Where f is convex. This means that Q should be positive-semidefinite.
- More specifically, this function is identical to
solve_qp_box_constrained(Q, b, alpha, lower, upper, eps, max_iter),
except that it runs faster since it avoids unnecessary computation by
taking advantage of the sparsity structure in the QP.
- The solution to the above QP will be stored in #alphas.
- This function uses a combination of a SMO algorithm along with Nesterov's
method as the main iteration of the solver. It starts the algorithm with the
given alpha and it works on the problem until the derivative of f(alpha) is
smaller than eps for each element of alpha or the alpha value is at a box
constraint. So eps controls how accurate the solution is and smaller values
result in better solutions.
- At most max_iter iterations of optimization will be performed.
- returns the number of iterations performed. If this method fails to
converge to eps accuracy then the number returned will be max_iter+1.
!*/
// ----------------------------------------------------------------------------------------
template <
......
......@@ -507,6 +507,81 @@ namespace
DLIB_TEST(length(A*c1 - B*c2) < 4);
}
// ----------------------------------------------------------------------------------------
void test_solve_qp_box_constrained_blockdiag()
{
dlib::rand rnd;
for (int iter = 0; iter < 50; ++iter)
{
print_spinner();
matrix<double> Q1, Q2;
matrix<double,0,1> b1, b2;
Q1 = randm(4,4,rnd); Q1 = Q1*trans(Q1);
Q2 = randm(4,4,rnd); Q2 = Q2*trans(Q2);
b1 = gaussian_randm(4,1, iter*2+0);
b2 = gaussian_randm(4,1, iter*2+1);
std::map<unordered_pair<size_t>, matrix<double,0,1>> offdiag;
if (rnd.get_random_gaussian() > 0)
offdiag[make_unordered_pair(0,0)] = randm(4,1,rnd);
if (rnd.get_random_gaussian() > 0)
offdiag[make_unordered_pair(1,0)] = randm(4,1,rnd);
if (rnd.get_random_gaussian() > 0)
offdiag[make_unordered_pair(1,1)] = randm(4,1,rnd);
std::vector<matrix<double>> Q_blocks = {Q1, Q2};
std::vector<matrix<double,0,1>> bs = {b1, b2};
// make the single big Q and b
matrix<double> Q = join_cols(join_rows(Q1, zeros_matrix(Q1)),
join_rows(zeros_matrix(Q2),Q2));
matrix<double,0,1> b = join_cols(b1,b2);
for (auto& p : offdiag)
{
long r = p.first.first;
long c = p.first.second;
set_subm(Q, 4*r,4*c, 4,4) += diagm(p.second);
if (c != r)
set_subm(Q, 4*c,4*r, 4,4) += diagm(p.second);
}
matrix<double,0,1> alpha = zeros_matrix(b);
matrix<double,0,1> lower = -10000*ones_matrix(b);
matrix<double,0,1> upper = 10000*ones_matrix(b);
auto iters = solve_qp_box_constrained(Q, b, alpha, lower, upper, 1e-9, 10000);
dlog << LINFO << "iters: "<< iters;
dlog << LINFO << "alpha: " << trans(alpha);
dlog << LINFO;
std::vector<matrix<double,0,1>> alphas(2);
alphas[0] = zeros_matrix<double>(4,1); alphas[1] = zeros_matrix<double>(4,1);
lower = -10000*ones_matrix(alphas[0]);
upper = 10000*ones_matrix(alphas[0]);
std::vector<matrix<double,0,1>> lowers = {lower,lower}, uppers = {upper, upper};
auto iters2 = solve_qp_box_constrained_blockdiag(Q_blocks, bs, offdiag, alphas, lowers, uppers, 1e-9, 10000);
dlog << LINFO << "iters2: "<< iters2;
dlog << LINFO << "alpha: " << trans(join_cols(alphas[0],alphas[1]));
dlog << LINFO << "obj1: "<< 0.5*trans(alpha)*Q*alpha + trans(b)*alpha;
dlog << LINFO << "obj2: "<< 0.5*trans(join_cols(alphas[0],alphas[1]))*Q*join_cols(alphas[0],alphas[1]) + trans(b)*join_cols(alphas[0],alphas[1]);
dlog << LINFO << "obj1-obj2: "<<(0.5*trans(alpha)*Q*alpha + trans(b)*alpha) - (0.5*trans(join_cols(alphas[0],alphas[1]))*Q*join_cols(alphas[0],alphas[1]) + trans(b)*join_cols(alphas[0],alphas[1]));
DLIB_TEST_MSG(max(abs(alpha - join_cols(alphas[0], alphas[1]))) < 1e-6, max(abs(alpha - join_cols(alphas[0], alphas[1]))));
DLIB_TEST(iters == iters2);
}
}
// ----------------------------------------------------------------------------------------
class opt_qp_solver_tester : public tester
......@@ -566,6 +641,7 @@ namespace
test_find_gap_between_convex_hulls();
test_solve_qp_box_constrained_blockdiag();
}
double do_the_test (
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment