Commit 295ae6fc authored by Davis King's avatar Davis King

Added an optional non-negativity constraint on w to the oca optimizer.

parent 82e1df7a
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
#include "../matrix.h" #include "../matrix.h"
#include "optimization_solve_qp_using_smo.h" #include "optimization_solve_qp_using_smo.h"
#include <list> #include <vector>
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
...@@ -110,7 +110,8 @@ namespace dlib ...@@ -110,7 +110,8 @@ namespace dlib
> >
typename matrix_type::type operator() ( typename matrix_type::type operator() (
const oca_problem<matrix_type>& problem, const oca_problem<matrix_type>& problem,
matrix_type& w matrix_type& w,
bool require_nonnegative_w = false
) const ) const
{ {
// make sure requires clause is not broken // make sure requires clause is not broken
...@@ -130,10 +131,10 @@ namespace dlib ...@@ -130,10 +131,10 @@ namespace dlib
const scalar_type C = problem.get_c(); const scalar_type C = problem.get_c();
std::list<vect_type> planes; matrix<scalar_type,0,0,mem_manager_type, layout_type> planes;
std::vector<scalar_type> bs, miss_count; std::vector<scalar_type> bs, miss_count;
vect_type temp, alpha; vect_type new_plane, alpha;
w.set_size(problem.get_num_dimensions(), 1); w.set_size(problem.get_num_dimensions(), 1);
w = 0; w = 0;
...@@ -154,7 +155,7 @@ namespace dlib ...@@ -154,7 +155,7 @@ namespace dlib
// The flat lower bounding plane is always good to have if we know // The flat lower bounding plane is always good to have if we know
// what it is. // what it is.
bs.push_back(R_lower_bound); bs.push_back(R_lower_bound);
planes.push_back(zeros_matrix<scalar_type>(w.size(),1)); planes = zeros_matrix(w);
alpha = uniform_matrix<scalar_type>(1,1, C); alpha = uniform_matrix<scalar_type>(1,1, C);
miss_count.push_back(0); miss_count.push_back(0);
...@@ -169,9 +170,12 @@ namespace dlib ...@@ -169,9 +170,12 @@ namespace dlib
// add the next cutting plane // add the next cutting plane
scalar_type cur_risk; scalar_type cur_risk;
planes.resize(planes.size()+1); problem.get_risk(w, cur_risk, new_plane);
problem.get_risk(w, cur_risk, planes.back()); if (planes.size() != 0)
bs.push_back(cur_risk - dot(w,planes.back())); planes = join_rows(planes, new_plane);
else
planes = new_plane;
bs.push_back(cur_risk - dot(w,new_plane));
miss_count.push_back(0); miss_count.push_back(0);
// If alpha is empty then initialize it (we must always have sum(alpha) == C). // If alpha is empty then initialize it (we must always have sum(alpha) == C).
...@@ -187,24 +191,22 @@ namespace dlib ...@@ -187,24 +191,22 @@ namespace dlib
// report current status // report current status
const scalar_type risk_gap = cur_risk - (cp_obj-wnorm)/C; const scalar_type risk_gap = cur_risk - (cp_obj-wnorm)/C;
if (counter > 0 && problem.optimization_status(cur_obj, cur_obj - cp_obj, if (counter > 0 && problem.optimization_status(cur_obj, cur_obj - cp_obj,
cur_risk, risk_gap, planes.size(), counter)) cur_risk, risk_gap, planes.nc(), counter))
{ {
break; break;
} }
// compute kernel matrix for all the planes // compute kernel matrix for all the planes
K.swap(Ktmp); K.swap(Ktmp);
K.set_size(planes.size(), planes.size()); K.set_size(planes.nc(), planes.nc());
// copy over the old K matrix // copy over the old K matrix
set_subm(K, 0,0, Ktmp.nr(), Ktmp.nc()) = Ktmp; set_subm(K, 0,0, Ktmp.nr(), Ktmp.nc()) = Ktmp;
// now add the new row and column to K // now add the new row and column to K
long rr = 0; for (long c = 0; c < planes.nc(); ++c)
for (typename std::list<vect_type>::iterator r = planes.begin(); r != planes.end(); ++r)
{ {
K(rr, Ktmp.nc()) = dot(*r, planes.back()); K(c, Ktmp.nc()) = dot(colm(planes,c), new_plane);
K(Ktmp.nc(), rr) = K(rr,Ktmp.nc()); K(Ktmp.nc(), c) = K(c,Ktmp.nc());
++rr;
} }
...@@ -216,23 +218,22 @@ namespace dlib ...@@ -216,23 +218,22 @@ namespace dlib
eps = 1e-16; eps = 1e-16;
// Note that we warm start this optimization by using the alpha from the last // Note that we warm start this optimization by using the alpha from the last
// iteration as the starting point. // iteration as the starting point.
if (require_nonnegative_w)
solve_qp4_using_smo(planes, K, vector_to_matrix(bs), alpha, eps, sub_max_iter);
else
solve_qp_using_smo(K, vector_to_matrix(bs), alpha, eps, sub_max_iter); solve_qp_using_smo(K, vector_to_matrix(bs), alpha, eps, sub_max_iter);
// construct the w that minimized the subproblem. // construct the w that minimized the subproblem.
w = 0; w = -(planes*alpha);
rr = 0; if (require_nonnegative_w)
for (typename std::list<vect_type>::iterator i = planes.begin(); i != planes.end(); ++i) w = lowerbound(w,0);
{
if (alpha(rr) != 0) for (long i = 0; i < alpha.size(); ++i)
{ {
w -= alpha(rr)*(*i); if (alpha(i) != 0)
miss_count[rr] = 0; miss_count[i] = 0;
}
else else
{ miss_count[i] += 1;
miss_count[rr] += 1;
}
++rr;
} }
// Compute the lower bound on the true objective given to us by the cutting // Compute the lower bound on the true objective given to us by the cutting
...@@ -245,13 +246,11 @@ namespace dlib ...@@ -245,13 +246,11 @@ namespace dlib
while (max(vector_to_matrix(miss_count)) >= inactive_thresh) while (max(vector_to_matrix(miss_count)) >= inactive_thresh)
{ {
const long idx = index_of_max(vector_to_matrix(miss_count)); const long idx = index_of_max(vector_to_matrix(miss_count));
typename std::list<vect_type>::iterator i0 = planes.begin();
advance(i0, idx);
planes.erase(i0);
bs.erase(bs.begin()+idx); bs.erase(bs.begin()+idx);
miss_count.erase(miss_count.begin()+idx); miss_count.erase(miss_count.begin()+idx);
K = removerc(K, idx, idx); K = removerc(K, idx, idx);
alpha = remove_row(alpha,idx); alpha = remove_row(alpha,idx);
planes = remove_col(planes,idx);
} }
++counter; ++counter;
......
...@@ -124,7 +124,8 @@ namespace dlib ...@@ -124,7 +124,8 @@ namespace dlib
For reference, OCA solves optimization problems with the following form: For reference, OCA solves optimization problems with the following form:
Minimize: f(w) == 0.5*dot(w,w) + C*R(w) Minimize: f(w) == 0.5*dot(w,w) + C*R(w)
Where R(w) is a user-supplied convex function and C > 0 Where R(w) is a user-supplied convex function and C > 0. Optionally,
this object can also add the non-negativity constraint that min(w) >= 0.
For a detailed discussion you should consult the following papers For a detailed discussion you should consult the following papers
...@@ -149,7 +150,8 @@ namespace dlib ...@@ -149,7 +150,8 @@ namespace dlib
> >
typename matrix_type::type operator() ( typename matrix_type::type operator() (
const oca_problem<matrix_type>& problem, const oca_problem<matrix_type>& problem,
matrix_type& w matrix_type& w,
bool require_nonnegative_w = false
) const; ) const;
/*! /*!
requires requires
...@@ -160,6 +162,10 @@ namespace dlib ...@@ -160,6 +162,10 @@ namespace dlib
- The optimization algorithm runs until problem.optimization_status() - The optimization algorithm runs until problem.optimization_status()
indicates it is time to stop. indicates it is time to stop.
- returns the objective value at the solution #w - returns the objective value at the solution #w
- if (require_nonnegative_w == true) then
- Adds the constraint that every element of w be non-negative.
Therefore, if this argument is true then #w won't contain any
negative values.
!*/ !*/
void set_subproblem_epsilon ( void set_subproblem_epsilon (
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment