Commit eef728d2 authored by Davis King's avatar Davis King

Simplified the oca implementation slightly. It no longer keeps track of the

best point seen so far.  This change will allow me to use the optimizer in conjunction
with methods for quickly approximating subgradients and risk values without needing
to worry about the optimizer permanently fixating on what it erroneously thinks
is the best point.  Now the optimizer will just keep solving the cutting plane
subproblem and will use that solution as the current iterate and best known
solution.

--HG--
extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%404170
parent bc487527
...@@ -131,15 +131,14 @@ namespace dlib ...@@ -131,15 +131,14 @@ namespace dlib
std::list<vect_type> planes; std::list<vect_type> planes;
std::vector<scalar_type> bs, miss_count; std::vector<scalar_type> bs, miss_count;
vect_type temp, alpha, w_cur; vect_type temp, alpha;
w.set_size(problem.get_num_dimensions(), 1); w.set_size(problem.get_num_dimensions(), 1);
w = 0; w = 0;
w_cur = w;
// The best objective value seen so far. Note also // The current objective value. Note also that w always contains
// that w always contains the best solution seen so far. // the current solution.
scalar_type best_obj = std::numeric_limits<scalar_type>::max(); scalar_type cur_obj = std::numeric_limits<scalar_type>::max();
// This will hold the cutting plane objective value. This value is // This will hold the cutting plane objective value. This value is
// a lower bound on the true optimal objective value. // a lower bound on the true optimal objective value.
...@@ -170,8 +169,8 @@ namespace dlib ...@@ -170,8 +169,8 @@ namespace dlib
// add the next cutting plane // add the next cutting plane
scalar_type cur_risk; scalar_type cur_risk;
planes.resize(planes.size()+1); planes.resize(planes.size()+1);
problem.get_risk(w_cur, cur_risk, planes.back()); problem.get_risk(w, cur_risk, planes.back());
bs.push_back(cur_risk - dot(w_cur,planes.back())); bs.push_back(cur_risk - dot(w,planes.back()));
miss_count.push_back(0); miss_count.push_back(0);
// If alpha is empty then initialize it (we must always have sum(alpha) == C). // If alpha is empty then initialize it (we must always have sum(alpha) == C).
...@@ -181,15 +180,7 @@ namespace dlib ...@@ -181,15 +180,7 @@ namespace dlib
else else
alpha = join_cols(alpha,zeros_matrix<scalar_type>(1,1)); alpha = join_cols(alpha,zeros_matrix<scalar_type>(1,1));
// Check the objective value at w_cur and see if it is better than cur_obj = 0.5*trans(w)*w + C*cur_risk;
// the best seen so far.
const scalar_type cur_obj = 0.5*trans(w_cur)*w_cur + C*cur_risk;
if (cur_obj < best_obj)
{
best_obj = cur_obj;
// move w_cur into w
w.swap(w_cur);
}
// compute kernel matrix for all the planes // compute kernel matrix for all the planes
...@@ -208,9 +199,9 @@ namespace dlib ...@@ -208,9 +199,9 @@ namespace dlib
} }
// solve the cutting plane subproblem for the next w_cur. We solve it to an // solve the cutting plane subproblem for the next w. We solve it to an
// accuracy that is related to how big the error gap is // accuracy that is related to how big the error gap is
scalar_type eps = std::min<scalar_type>(sub_eps, 0.1*(best_obj-cp_obj)) ; scalar_type eps = std::min<scalar_type>(sub_eps, 0.1*(cur_obj-cp_obj)) ;
// just a sanity check // just a sanity check
if (eps < 1e-16) if (eps < 1e-16)
eps = 1e-16; eps = 1e-16;
...@@ -218,14 +209,14 @@ namespace dlib ...@@ -218,14 +209,14 @@ namespace dlib
// iteration as the starting point. // iteration as the starting point.
solve_qp_using_smo(K, vector_to_matrix(bs), alpha, eps, sub_max_iter); solve_qp_using_smo(K, vector_to_matrix(bs), alpha, eps, sub_max_iter);
// construct the w_cur that minimized the subproblem. // construct the w that minimized the subproblem.
w_cur = 0; w = 0;
rr = 0; rr = 0;
for (typename std::list<vect_type>::iterator i = planes.begin(); i != planes.end(); ++i) for (typename std::list<vect_type>::iterator i = planes.begin(); i != planes.end(); ++i)
{ {
if (alpha(rr) != 0) if (alpha(rr) != 0)
{ {
w_cur -= alpha(rr)*(*i); w -= alpha(rr)*(*i);
miss_count[rr] = 0; miss_count[rr] = 0;
} }
else else
...@@ -237,10 +228,10 @@ namespace dlib ...@@ -237,10 +228,10 @@ namespace dlib
// Compute the lower bound on the true objective given to us by the cutting // Compute the lower bound on the true objective given to us by the cutting
// plane subproblem. // plane subproblem.
cp_obj = -0.5*trans(w_cur)*w_cur + trans(alpha)*vector_to_matrix(bs); cp_obj = -0.5*trans(w)*w + trans(alpha)*vector_to_matrix(bs);
// report current status // report current status
if (problem.optimization_status(best_obj, best_obj - cp_obj, planes.size(), counter)) if (problem.optimization_status(cur_obj, cur_obj - cp_obj, planes.size(), counter))
break; break;
// If it has been a while since a cutting plane was an active constraint then // If it has been a while since a cutting plane was an active constraint then
...@@ -259,7 +250,7 @@ namespace dlib ...@@ -259,7 +250,7 @@ namespace dlib
} }
return best_obj; return cur_obj;
} }
private: private:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment