Commit f25ca01e authored by Davis King's avatar Davis King

The oca solver now supports taking a user supplied prior vector. That is,

it lets you use a regularizer like ||w-prior||^2 instead of the usual ||w||^2
regularizer.
parent 2c03e133
...@@ -115,13 +115,55 @@ namespace dlib ...@@ -115,13 +115,55 @@ namespace dlib
unsigned long num_nonnegative = 0, unsigned long num_nonnegative = 0,
unsigned long force_weight_to_1 = std::numeric_limits<unsigned long>::max() unsigned long force_weight_to_1 = std::numeric_limits<unsigned long>::max()
) const ) const
{
matrix_type empty_prior;
return oca_impl(problem, w, empty_prior, false, num_nonnegative, force_weight_to_1);
}
template <
typename matrix_type
>
typename matrix_type::type operator() (
const oca_problem<matrix_type>& problem,
matrix_type& w,
const matrix_type& prior
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(is_col_vector(prior) && prior.size() == problem.get_num_dimensions(),
"\t scalar_type oca::operator()"
<< "\n\t The prior vector does not have the correct dimensions."
<< "\n\t is_col_vector(prior): " << is_col_vector(prior)
<< "\n\t prior.size(): " << prior.size()
<< "\n\t problem.get_num_dimensions(): " << problem.get_num_dimensions()
<< "\n\t this: " << this
);
// disable the force weight to 1 option for this mode. We also disable the
// non-negative constraints.
unsigned long force_weight_to_1 = std::numeric_limits<unsigned long>::max();
return oca_impl(problem, w, prior, true, 0, force_weight_to_1);
}
private:
template <
typename matrix_type
>
typename matrix_type::type oca_impl (
const oca_problem<matrix_type>& problem,
matrix_type& w,
const matrix_type& prior,
bool have_prior,
unsigned long num_nonnegative,
unsigned long force_weight_to_1
) const
{ {
const unsigned long num_dims = problem.get_num_dimensions(); const unsigned long num_dims = problem.get_num_dimensions();
// make sure requires clause is not broken // make sure requires clause is not broken
DLIB_ASSERT(problem.get_c() > 0 && DLIB_ASSERT(problem.get_c() > 0 &&
problem.get_num_dimensions() > 0, problem.get_num_dimensions() > 0,
"\t void oca::operator()" "\t scalar_type oca::operator()"
<< "\n\t The oca_problem is invalid" << "\n\t The oca_problem is invalid"
<< "\n\t problem.get_c(): " << problem.get_c() << "\n\t problem.get_c(): " << problem.get_c()
<< "\n\t problem.get_num_dimensions(): " << num_dims << "\n\t problem.get_num_dimensions(): " << num_dims
...@@ -172,6 +214,7 @@ namespace dlib ...@@ -172,6 +214,7 @@ namespace dlib
K(0,0) = 0; K(0,0) = 0;
} }
const double prior_norm = have_prior ? 0.5*dot(prior,prior) : 0;
unsigned long counter = 0; unsigned long counter = 0;
while (true) while (true)
...@@ -196,6 +239,9 @@ namespace dlib ...@@ -196,6 +239,9 @@ namespace dlib
set_rowm(new_plane, range(force_weight_to_1, new_plane.size()-1)) = 0; set_rowm(new_plane, range(force_weight_to_1, new_plane.size()-1)) = 0;
} }
if (have_prior)
bs.push_back(cur_risk - dot(w,new_plane) + dot(prior,new_plane));
else
bs.push_back(cur_risk - dot(w,new_plane)); bs.push_back(cur_risk - dot(w,new_plane));
planes.add(planes.size(), new_plane); planes.add(planes.size(), new_plane);
miss_count.push_back(0); miss_count.push_back(0);
...@@ -208,10 +254,11 @@ namespace dlib ...@@ -208,10 +254,11 @@ namespace dlib
alpha = join_cols(alpha,zeros_matrix<scalar_type>(1,1)); alpha = join_cols(alpha,zeros_matrix<scalar_type>(1,1));
const scalar_type wnorm = 0.5*trans(w)*w; const scalar_type wnorm = 0.5*trans(w)*w;
cur_obj = wnorm + C*cur_risk; const double prior_part = have_prior? dot(w,prior) : 0;
cur_obj = wnorm + C*cur_risk + prior_norm-prior_part;
// report current status // report current status
const scalar_type risk_gap = cur_risk - (cp_obj-wnorm)/C; const scalar_type risk_gap = cur_risk - (cp_obj-wnorm+prior_part-prior_norm)/C;
if (counter > 0 && problem.optimization_status(cur_obj, cur_obj - cp_obj, if (counter > 0 && problem.optimization_status(cur_obj, cur_obj - cp_obj,
cur_risk, risk_gap, planes.size(), counter)) cur_risk, risk_gap, planes.size(), counter))
{ {
...@@ -273,7 +320,8 @@ namespace dlib ...@@ -273,7 +320,8 @@ namespace dlib
// Compute the lower bound on the true objective given to us by the cutting // Compute the lower bound on the true objective given to us by the cutting
// plane subproblem. // plane subproblem.
cp_obj = -0.5*trans(w)*w + trans(alpha)*mat(bs); cp_obj = -0.5*trans(w)*w + trans(alpha)*mat(bs);
if (have_prior)
w += prior;
// If it has been a while since a cutting plane was an active constraint then // If it has been a while since a cutting plane was an active constraint then
// we should throw it away. // we should throw it away.
...@@ -296,8 +344,6 @@ namespace dlib ...@@ -296,8 +344,6 @@ namespace dlib
return cur_obj; return cur_obj;
} }
private:
double sub_eps; double sub_eps;
unsigned long sub_max_iter; unsigned long sub_max_iter;
......
...@@ -19,12 +19,18 @@ namespace dlib ...@@ -19,12 +19,18 @@ namespace dlib
problems solved by the oca optimizer defined later in this file. problems solved by the oca optimizer defined later in this file.
OCA solves optimization problems with the following form: OCA solves optimization problems with the following form:
Minimize: f(w) == 0.5*dot(w,w) + C*R(w) Minimize: f(w) == 0.5*length_squared(w) + C*R(w)
Where R(w) is a user-supplied convex function and C > 0. Optionally, Where R(w) is a user-supplied convex function and C > 0. Optionally,
there can also be non-negativity constraints on some or all of the there can also be non-negativity constraints on some or all of the
elements of w. elements of w.
Or it can alternatively solve:
Minimize: f(w) == 0.5*length_squared(w-prior) + C*R(w)
Where prior is a user supplied vector and R(w) has the same
interpretation as above.
Note that the stopping condition must be provided by the user Note that the stopping condition must be provided by the user
in the form of the optimization_status() function. in the form of the optimization_status() function.
...@@ -124,12 +130,18 @@ namespace dlib ...@@ -124,12 +130,18 @@ namespace dlib
by the oca_problem abstract class. by the oca_problem abstract class.
For reference, OCA solves optimization problems with the following form: For reference, OCA solves optimization problems with the following form:
Minimize: f(w) == 0.5*dot(w,w) + C*R(w) Minimize: f(w) == 0.5*length_squared(w) + C*R(w)
Where R(w) is a user-supplied convex function and C > 0. Optionally, Where R(w) is a user-supplied convex function and C > 0. Optionally,
this object can also add non-negativity constraints to some or all this object can also add non-negativity constraints to some or all
of the elements of w. of the elements of w.
Or it can alternatively solve:
Minimize: f(w) == 0.5*length_squared(w-prior) + C*R(w)
Where prior is a user supplied vector and R(w) has the same
interpretation as above.
For a detailed discussion you should consult the following papers For a detailed discussion you should consult the following papers
from the Journal of Machine Learning Research: from the Journal of Machine Learning Research:
...@@ -162,7 +174,9 @@ namespace dlib ...@@ -162,7 +174,9 @@ namespace dlib
- problem.get_c() > 0 - problem.get_c() > 0
- problem.get_num_dimensions() > 0 - problem.get_num_dimensions() > 0
ensures ensures
- solves the given oca problem and stores the solution in #w - solves the given oca problem and stores the solution in #w. In particular,
this function solves:
Minimize: f(w) == 0.5*length_squared(w) + C*R(w)
- The optimization algorithm runs until problem.optimization_status() - The optimization algorithm runs until problem.optimization_status()
indicates it is time to stop. indicates it is time to stop.
- returns the objective value at the solution #w - returns the objective value at the solution #w
...@@ -183,6 +197,30 @@ namespace dlib ...@@ -183,6 +197,30 @@ namespace dlib
values of 0. values of 0.
!*/ !*/
template <
typename matrix_type
>
typename matrix_type::type operator() (
const oca_problem<matrix_type>& problem,
matrix_type& w,
const matrix_type& prior
) const;
/*!
requires
- problem.get_c() > 0
- problem.get_num_dimensions() > 0
- is_col_vector(prior) == true
- prior.size() == problem.get_num_dimensions()
ensures
- solves the given oca problem and stores the solution in #w.
- In this mode, we solve a version of the problem with a different
regularizer. In particular, this function solves:
Minimize: f(w) == 0.5*length_squared(w-prior) + C*R(w)
- The optimization algorithm runs until problem.optimization_status()
indicates it is time to stop.
- returns the objective value at the solution #w
!*/
void set_subproblem_epsilon ( void set_subproblem_epsilon (
double eps double eps
); );
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment