The oca solver now supports taking a user supplied prior vector. That is,

it lets you use a regularizer like ||w-prior||^2 instead of the usual ||w||^2 regularizer.

The oca solver now supports taking a user supplied prior vector. That is,
it lets you use a regularizer like ||w-prior||^2 instead of the usual ||w||^2 regularizer.
f25ca01e · Davis King · 2c03e133 · f25ca01e · f25ca01e
Commit f25ca01e authored Feb 10, 2014 by Davis King
Hide whitespace changes
Inline Side-by-side

Showing with 94 additions and 10 deletions

optimization_oca.h dlib/optimization/optimization_oca.h +53 -7

optimization_oca_abstract.h dlib/optimization/optimization_oca_abstract.h +41 -3

No files found.
--- a/dlib/optimization/optimization_oca.h
+++ b/dlib/optimization/optimization_oca.h
@@ -115,13 +115,55 @@ namespace dlib
            unsigned long num_nonnegative = 0,
            unsigned long force_weight_to_1 = std::numeric_limits<unsigned long>::max()
        ) const
+        {
+            matrix_type empty_prior;
+            return oca_impl(problem, w, empty_prior, false, num_nonnegative, force_weight_to_1);
+        }
+
+        template <
+            typename matrix_type
+            >
+        typename matrix_type::type operator() (
+            const oca_problem<matrix_type>& problem,
+            matrix_type& w,
+            const matrix_type& prior
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_col_vector(prior) && prior.size() == problem.get_num_dimensions(),
+                "\t scalar_type oca::operator()"
+                << "\n\t The prior vector does not have the correct dimensions."
+                << "\n\t is_col_vector(prior):         " << is_col_vector(prior) 
+                << "\n\t prior.size():                 " << prior.size() 
+                << "\n\t problem.get_num_dimensions(): " << problem.get_num_dimensions() 
+                << "\n\t this:                         " << this
+                );
+            // disable the force weight to 1 option for this mode.  We also disable the
+            // non-negative constraints.
+            unsigned long force_weight_to_1 = std::numeric_limits<unsigned long>::max();
+            return oca_impl(problem, w, prior, true, 0, force_weight_to_1);
+        }
+
+    private:
+
+        template <
+            typename matrix_type
+            >
+        typename matrix_type::type oca_impl (
+            const oca_problem<matrix_type>& problem,
+            matrix_type& w,
+            const matrix_type& prior,
+            bool have_prior,
+            unsigned long num_nonnegative,
+            unsigned long force_weight_to_1
+        ) const
        {
            const unsigned long num_dims = problem.get_num_dimensions();

            // make sure requires clause is not broken
            DLIB_ASSERT(problem.get_c() > 0 &&
                        problem.get_num_dimensions() > 0,
-                "\t void oca::operator()"
+                "\t scalar_type oca::operator()"
                << "\n\t The oca_problem is invalid"
                << "\n\t problem.get_c():              " << problem.get_c() 
                << "\n\t problem.get_num_dimensions(): " << num_dims 
@@ -172,6 +214,7 @@ namespace dlib
                K(0,0) = 0;
            }

+            const double prior_norm = have_prior ?  0.5*dot(prior,prior) : 0;

            unsigned long counter = 0;
            while (true)
@@ -196,7 +239,10 @@ namespace dlib
                    set_rowm(new_plane, range(force_weight_to_1, new_plane.size()-1)) = 0;
                }

-                bs.push_back(cur_risk - dot(w,new_plane));
+                if (have_prior)
+                    bs.push_back(cur_risk - dot(w,new_plane) + dot(prior,new_plane));
+                else
+                    bs.push_back(cur_risk - dot(w,new_plane));
                planes.add(planes.size(), new_plane);
                miss_count.push_back(0);

@@ -208,10 +254,11 @@ namespace dlib
                    alpha = join_cols(alpha,zeros_matrix<scalar_type>(1,1));

                const scalar_type wnorm = 0.5*trans(w)*w;
-                cur_obj = wnorm + C*cur_risk;
+                const double prior_part = have_prior? dot(w,prior) : 0;
+                cur_obj = wnorm + C*cur_risk + prior_norm-prior_part;

                // report current status
-                const scalar_type risk_gap = cur_risk - (cp_obj-wnorm)/C;
+                const scalar_type risk_gap = cur_risk - (cp_obj-wnorm+prior_part-prior_norm)/C;
                if (counter > 0 && problem.optimization_status(cur_obj, cur_obj - cp_obj, 
                                                               cur_risk, risk_gap, planes.size(), counter))
                {
@@ -273,7 +320,8 @@ namespace dlib
                // Compute the lower bound on the true objective given to us by the cutting 
                // plane subproblem.
                cp_obj = -0.5*trans(w)*w + trans(alpha)*mat(bs);
-
+                if (have_prior)
+                    w += prior;

                // If it has been a while since a cutting plane was an active constraint then
                // we should throw it away.
@@ -296,8 +344,6 @@ namespace dlib
            return cur_obj;
        }

-    private:
-
        double sub_eps;

        unsigned long sub_max_iter;

--- a/dlib/optimization/optimization_oca_abstract.h
+++ b/dlib/optimization/optimization_oca_abstract.h
@@ -19,12 +19,18 @@ namespace dlib
                problems solved by the oca optimizer defined later in this file.

                OCA solves optimization problems with the following form:
-                    Minimize: f(w) == 0.5*dot(w,w) + C*R(w)
+                    Minimize: f(w) == 0.5*length_squared(w) + C*R(w)

                    Where R(w) is a user-supplied convex function and C > 0.  Optionally,
                    there can also be non-negativity constraints on some or all of the 
                    elements of w.

+                Or it can alternatively solve:
+                    Minimize: f(w) == 0.5*length_squared(w-prior) + C*R(w)
+
+                    Where prior is a user supplied vector and R(w) has the same
+                    interpretation as above.
+                       

                Note that the stopping condition must be provided by the user
                in the form of the optimization_status() function.
@@ -124,12 +130,18 @@ namespace dlib
                by the oca_problem abstract class.  

                For reference, OCA solves optimization problems with the following form:
-                    Minimize: f(w) == 0.5*dot(w,w) + C*R(w)
+                    Minimize: f(w) == 0.5*length_squared(w) + C*R(w)

                    Where R(w) is a user-supplied convex function and C > 0.  Optionally,
                    this object can also add non-negativity constraints to some or all
                    of the elements of w.

+                Or it can alternatively solve:
+                    Minimize: f(w) == 0.5*length_squared(w-prior) + C*R(w)
+
+                    Where prior is a user supplied vector and R(w) has the same
+                    interpretation as above.
+                       

                For a detailed discussion you should consult the following papers
                from the Journal of Machine Learning Research:
@@ -162,7 +174,9 @@ namespace dlib
                - problem.get_c() > 0
                - problem.get_num_dimensions() > 0
            ensures
-                - solves the given oca problem and stores the solution in #w
+                - solves the given oca problem and stores the solution in #w.  In particular,
+                  this function solves:
+                    Minimize: f(w) == 0.5*length_squared(w) + C*R(w)
                - The optimization algorithm runs until problem.optimization_status() 
                  indicates it is time to stop.
                - returns the objective value at the solution #w
@@ -183,6 +197,30 @@ namespace dlib
                          values of 0.
        !*/

+        template <
+            typename matrix_type
+            >
+        typename matrix_type::type operator() (
+            const oca_problem<matrix_type>& problem,
+            matrix_type& w,
+            const matrix_type& prior
+        ) const;
+        /*!
+            requires
+                - problem.get_c() > 0
+                - problem.get_num_dimensions() > 0
+                - is_col_vector(prior) == true
+                - prior.size() == problem.get_num_dimensions()
+            ensures
+                - solves the given oca problem and stores the solution in #w.
+                - In this mode, we solve a version of the problem with a different
+                  regularizer.  In particular, this function solves:
+                    Minimize: f(w) == 0.5*length_squared(w-prior) + C*R(w)
+                - The optimization algorithm runs until problem.optimization_status() 
+                  indicates it is time to stop.
+                - returns the objective value at the solution #w
+        !*/
+
        void set_subproblem_epsilon (
            double eps
        );