Fixed the distributed version of the structural svm solver to work with the

recent changes to the core solver. Also added support for the nuclear norm regularization and cache refinement options.

Fixed the distributed version of the structural svm solver to work with the
recent changes to the core solver. Also added support for the nuclear norm regularization and cache refinement options.
ea460afd · Davis King · 43fdc03f · ea460afd · ea460afd · ea460afd
Commit ea460afd authored Nov 08, 2013 by Davis King
3 changed files
--- a/dlib/svm/structural_svm_distributed.h
+++ b/dlib/svm/structural_svm_distributed.h
--- a/dlib/svm/structural_svm_distributed_abstract.h
+++ b/dlib/svm/structural_svm_distributed_abstract.h
@@ -47,6 +47,8 @@ namespace dlib
                - Note that the following parameters within the given problem are ignored:
                    - problem.get_c()
                    - problem.get_epsilon()
+                    - problem.get_cache_based_epsilon()
+                    - problem.num_nuclear_norm_regularizers()
                    - weather the problem is verbose or not
                  Instead, they are defined by the svm_struct_controller_node. Note, however,
                  that the problem.get_max_cache_size() parameter is meaningful and controls
@@ -145,6 +147,87 @@ namespace dlib
                  optimal value".
        !*/
+        double get_cache_based_epsilon (
+        ) const;
+        /*!
+            ensures
+                - if (get_max_cache_size() != 0) then
+                    - The solver will not stop when the average sample risk is within
+                      get_epsilon() of its optimal value.  Instead, it will keep running
+                      but will run the optimizer completely on the cache until the average
+                      sample risk is within #get_cache_based_epsilon() of its optimal
+                      value.  This means that it will perform this additional refinement in
+                      the solution accuracy without making any additional calls to the
+                      separation_oracle().  This is useful when using a nuclear norm
+                      regularization term because it allows you to quickly solve the
+                      optimization problem to a high precision, which in the case of a
+                      nuclear norm regularized problem means that many of the learned
+                      matrices will be low rank or very close to low rank due to the
+                      nuclear norm regularizer.  This may not happen without solving the
+                      problem to a high accuracy or their ranks may be difficult to
+                      determine, so the extra accuracy given by the cache based refinement
+                      is very useful.  Finally, note that we include the nuclear norm term
+                      as part of the "risk" for the purposes of determining when to stop.  
+                - else
+                    - The value of #get_cache_based_epsilon() has no effect.
+        !*/
+        void set_cache_based_epsilon (
+            double eps
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_cache_based_epsilon() == eps
+        !*/
+        void add_nuclear_norm_regularizer (
+            long first_dimension,
+            long rows,
+            long cols,
+            double regularization_strength
+        );
+        /*!
+            requires
+                - 0 <= first_dimension < number of dimensions in problem 
+                - 0 <= rows
+                - 0 <= cols
+                - first_dimension+rows*cols <= number of dimensions in problem
+                - 0 < regularization_strength
+            ensures
+                - Adds a nuclear norm regularization term to the optimization problem
+                  solved by this object.  That is, instead of solving:
+                    Minimize: h(w) == 0.5*dot(w,w) + C*R(w)
+                  this object will solve:
+                    Minimize: h(w) == 0.5*dot(w,w) + C*R(w) + regularization_strength*nuclear_norm_of(part of w)
+                  where "part of w" is the part of w indicated by the arguments to this
+                  function. In particular, the part of w included in the nuclear norm is
+                  exactly the matrix reshape(rowm(w, range(first_dimension, first_dimension+rows*cols-1)), rows, cols).
+                  Therefore, if you think of the w vector as being the concatenation of a
+                  bunch of matrices then you can use multiple calls to add_nuclear_norm_regularizer() 
+                  to add nuclear norm regularization terms to any of the matrices packed into w.
+                - #num_nuclear_norm_regularizers() == num_nuclear_norm_regularizers() + 1
+        !*/
+        unsigned long num_nuclear_norm_regularizers (
+        ) const; 
+        /*!
+            ensures
+                - returns the number of nuclear norm regularizers that are currently a part
+                  of this optimization problem.  That is, returns the number of times
+                  add_nuclear_norm_regularizer() has been called since the last call to
+                  clear_nuclear_norm_regularizers() or object construction, whichever is
+                  most recent.
+        !*/
+        void clear_nuclear_norm_regularizers (
+        );
+        /*!
+            ensures
+                - #num_nuclear_norm_regularizers() == 0
+        !*/
        void be_verbose (
        );
        /*!

--- a/dlib/svm/structural_svm_problem.h
+++ b/dlib/svm/structural_svm_problem.h
@@ -14,6 +14,19 @@
 namespace dlib
 {
+// ----------------------------------------------------------------------------------------
+    namespace impl
+    {
+        struct nuclear_norm_regularizer
+        {
+            long first_dimension;
+            long nr;
+            long nc;
+            double regularization_strength;
+        };
+    }
 // ----------------------------------------------------------------------------------------
    template <
@@ -345,7 +358,7 @@ namespace dlib
                << "\n\t this: " << this
                );
-            nuclear_norm_regularizer temp;
+            impl::nuclear_norm_regularizer temp;
            temp.first_dimension = first_dimension;
            temp.nr = rows;
            temp.nc = cols;
@@ -464,45 +477,6 @@ namespace dlib
            return false;
        }
-        void compute_nuclear_norm_parts(
-            const matrix_type& m,
-            matrix_type& grad,
-            scalar_type& obj
-        ) const
-        {
-            obj = 0;
-            grad.set_size(m.size());
-            grad = 0;
-            matrix<double> u,v,w,f;
-            nuclear_norm_part = 0;
-            for (unsigned long i = 0; i < nuclear_norm_regularizers.size(); ++i)
-            {
-                const long nr = nuclear_norm_regularizers[i].nr;
-                const long nc = nuclear_norm_regularizers[i].nc;
-                const long size = nr*nc;
-                const long idx = nuclear_norm_regularizers[i].first_dimension;
-                const double strength = nuclear_norm_regularizers[i].regularization_strength;
-                f = matrix_cast<double>(reshape(rowm(m, range(idx, idx+size-1)), nr, nc));
-                svd3(f, u,w,v);
-                w = round_zeros(w, std::max(1e-9,max(w)*1e-7)); 
-                const double norm = sum(w);
-                obj += strength*norm;
-                nuclear_norm_part += strength*norm/C;
-                w = w>0;
-                f = u*diagm(w)*trans(v);
-                set_rowm(grad, range(idx, idx+size-1)) = matrix_cast<double>(strength*reshape_to_column_vector(f));
-            }
-            obj /= C;
-            grad /= C;
-        }
        virtual void get_risk (
            matrix_type& w,
            scalar_type& risk,
@@ -566,6 +540,46 @@ namespace dlib
        }
    protected:
+        void compute_nuclear_norm_parts(
+            const matrix_type& m,
+            matrix_type& grad,
+            scalar_type& obj
+        ) const
+        {
+            obj = 0;
+            grad.set_size(m.size());
+            grad = 0;
+            matrix<double> u,v,w,f;
+            nuclear_norm_part = 0;
+            for (unsigned long i = 0; i < nuclear_norm_regularizers.size(); ++i)
+            {
+                const long nr = nuclear_norm_regularizers[i].nr;
+                const long nc = nuclear_norm_regularizers[i].nc;
+                const long size = nr*nc;
+                const long idx = nuclear_norm_regularizers[i].first_dimension;
+                const double strength = nuclear_norm_regularizers[i].regularization_strength;
+                f = matrix_cast<double>(reshape(rowm(m, range(idx, idx+size-1)), nr, nc));
+                svd3(f, u,w,v);
+                w = round_zeros(w, std::max(1e-9,max(w)*1e-7)); 
+                const double norm = sum(w);
+                obj += strength*norm;
+                nuclear_norm_part += strength*norm/C;
+                w = w>0;
+                f = u*diagm(w)*trans(v);
+                set_rowm(grad, range(idx, idx+size-1)) = matrix_cast<double>(strength*reshape_to_column_vector(f));
+            }
+            obj /= C;
+            grad /= C;
+        }
        void separation_oracle_cached (
            const long idx,
            const matrix_type& current_solution,
@@ -580,16 +594,8 @@ namespace dlib
                                                loss,
                                                psi);
        }
-    private:
-        struct nuclear_norm_regularizer
+        std::vector<impl::nuclear_norm_regularizer> nuclear_norm_regularizers;
-        {
-            long first_dimension;
-            long nr;
-            long nc;
-            double regularization_strength;
-        };
-        std::vector<nuclear_norm_regularizer> nuclear_norm_regularizers;
        mutable scalar_type saved_current_risk_gap;
        mutable matrix_type psi_true;