Added a trust region optimizer.

--HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403919

Added a trust region optimizer.
--HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403919
682f6b3d · Davis King · 571aff20 · 682f6b3d · 682f6b3d · 682f6b3d
Commit 682f6b3d authored Nov 26, 2010 by Davis King
6 changed files
--- a/dlib/optimization.h
+++ b/dlib/optimization.h
@@ -7,6 +7,7 @@
 #include "optimization/optimization_bobyqa.h"
 #include "optimization/optimization_solve_qp_using_smo.h"
 #include "optimization/optimization_oca.h"
+#include "optimization/optimization_trust_region.h"

 #endif // DLIB_OPTIMIZATIOn_HEADER


--- a/dlib/optimization/optimization_trust_region.h
+++ b/dlib/optimization/optimization_trust_region.h
--- a/dlib/optimization/optimization_trust_region_abstract.h
+++ b/dlib/optimization/optimization_trust_region_abstract.h
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_OPTIMIZATION_TRUST_REGIoN_H__
+#define DLIB_OPTIMIZATION_TRUST_REGIoN_H__
+
+#include "../matrix.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename EXP1,
+        typename EXP2,
+        typename T, long NR, long NC, typename MM, typename L
+        >
+    unsigned long solve_trust_region_subproblem ( 
+        const matrix_exp<EXP1>& B,
+        const matrix_exp<EXP2>& g,
+        const typename EXP1::type radius,
+        matrix<T,NR,NC,MM,L>& p,
+        double eps,
+        unsigned long max_iter
+    );
+    /*!
+        requires
+            - B == trans(B)
+              (i.e. B should be a symmetric matrix)
+            - B.nr() == B.nc()
+            - is_col_vector(g) == true
+            - g.size() == B.nr()
+            - p is capable of containing a column vector the size of g
+              (i.e. p = g; should be a legal expression)
+            - radius > 0
+            - eps > 0
+            - max_iter > 0
+        ensures
+            - This function solves the following optimization problem:
+                Minimize: f(p) == 0.5*trans(p)*B*p + trans(g)*p
+                subject to the following constraint:
+                    - length(p) <= radius
+            - returns the number of iterations performed.  If this method fails to
+              converge to eps accuracy then the number returned will be max_iter+1.
+            - if this function returns 0 or 1 then we are not hitting the radius bound.
+              Otherwise, the radius constraint is active and std::abs(length(#p)-radius) <= eps.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    class function_model 
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object defines the interface for a function model
+                used by the trust-region optimizers defined below.
+
+                In particular, this object represents a function f() and
+                its associated derivative and hessian.
+
+        !*/
+
+        // Define the type used to represent column vectors
+        typedef matrix<double,0,1> column_vector;
+        // Define the type used to represent the hessian matrix
+        typedef matrix<double> general_matrix;
+
+        double operator() ( 
+            const column_vector& x
+        ) const;
+        /*!
+            ensures
+                - returns f(x)
+                  (i.e. evaluates this model at the given point and returns the value)
+        !*/
+
+        void get_derivative_and_hessian (
+            const column_vector& x,
+            column_vector& d,
+            general_matrix& h
+        ) const;
+        /*!
+            ensures
+                - #d == the derivative of f() at x
+                - #h == the hessian matrix of f() at x
+                - is_col_vector(#d) == true
+                - #d.size() == x.size()
+                - #h.nr() == #h.nc() == x.size()
+                - #h == trans(#h)
+        !*/
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename stop_strategy_type,
+        typename funct_model
+        >
+    double find_min_trust_region (
+        stop_strategy_type stop_strategy,
+        const funct_model& model, 
+        typename funct_model::column_vector& x, 
+        double radius = 1
+    );
+    /*!
+        requires
+            - stop_strategy == an object that defines a stop strategy such as one of 
+              the objects from dlib/optimization/optimization_stop_strategies_abstract.h
+            - is_col_vector(x) == true
+            - radius > 0
+            - model must be an object with an interface as defined by the function_model
+              example object shown above.
+        ensures
+            - Performs an unconstrained minimization of the function defined by model 
+              starting from the initial point x.  This function uses a trust region
+              algorithm to perform the minimization.  The radius parameter defines
+              the initial size of the trust region.
+            - The function is optimized until stop_strategy decides that an acceptable 
+              point has been found or the trust region subproblem fails to make progress.
+            - #x == the value of x that was found to minimize model()
+            - returns model(#x). 
+            - When this function makes calls to model.get_derivative_and_hessian() it always 
+              does so by first calling model() and then calling model.get_derivative_and_hessian().  
+              That is, any call to model.get_derivative_and_hessian(val) will always be 
+              preceded by a call to model(val) with the same value.  This way you can reuse 
+              any redundant computations performed by model() and model.get_derivative_and_hessian()
+              as appropriate.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename stop_strategy_type,
+        typename funct_model
+        >
+    double find_max_trust_region (
+        stop_strategy_type stop_strategy,
+        const funct_model& model, 
+        typename funct_model::column_vector& x, 
+        double radius = 1
+    );
+    /*!
+        requires
+            - stop_strategy == an object that defines a stop strategy such as one of 
+              the objects from dlib/optimization/optimization_stop_strategies_abstract.h
+            - is_col_vector(x) == true
+            - radius > 0
+            - model must be an object with an interface as defined by the function_model
+              example object shown above.
+        ensures
+            - Performs an unconstrained maximization of the function defined by model 
+              starting from the initial point x.  This function uses a trust region
+              algorithm to perform the maximization.  The radius parameter defines
+              the initial size of the trust region.
+            - The function is optimized until stop_strategy decides that an acceptable 
+              point has been found or the trust region subproblem fails to make progress.
+            - #x == the value of x that was found to maximize model()
+            - returns model(#x). 
+            - When this function makes calls to model.get_derivative_and_hessian() it always 
+              does so by first calling model() and then calling model.get_derivative_and_hessian().  
+              That is, any call to model.get_derivative_and_hessian(val) will always be 
+              preceded by a call to model(val) with the same value.  This way you can reuse 
+              any redundant computations performed by model() and model.get_derivative_and_hessian()
+              as appropriate.
+            - Note that this function solves the maximization problem by converting it 
+              into a minimization problem.  Therefore, the values of model() and its derivative
+              reported to the stopping strategy will be negated.  That is, stop_strategy
+              will see -model() and -derivative.  All this really means is that the status 
+              messages from a stopping strategy in verbose mode will display a negated objective
+              value.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_OPTIMIZATION_TRUST_REGIoN_H__
+
+
--- a/dlib/test/CMakeLists.txt
+++ b/dlib/test/CMakeLists.txt
@@ -83,12 +83,13 @@ set (tests
   statistics.cpp
   std_vector_c.cpp
   string.cpp
-   svm.cpp
   svm_c_linear.cpp
+   svm.cpp
   thread_pool.cpp
   threads.cpp
   timer.cpp
   tokenizer.cpp
+   trust_region.cpp
   tuple.cpp
   type_safe_union.cpp
   )

--- a/dlib/test/makefile
+++ b/dlib/test/makefile
@@ -99,6 +99,7 @@ SRC += thread_pool.cpp
 SRC += threads.cpp
 SRC += timer.cpp
 SRC += tokenizer.cpp
+SRC += trust_region.cpp
 SRC += tuple.cpp
 SRC += type_safe_union.cpp


--- a/dlib/test/trust_region.cpp
+++ b/dlib/test/trust_region.cpp
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+
+
+#include <dlib/optimization.h>
+#include <sstream>
+#include <string>
+#include <cstdlib>
+#include <ctime>
+#include <vector>
+#include "../rand.h"
+
+#include "tester.h"
+
+
+namespace  
+{
+
+    using namespace test;
+    using namespace dlib;
+    using namespace std;
+
+    logger dlog("test.trust_region");
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    T rosen ( const matrix<T,2,1>& m)
+    {
+        const T x = m(0); 
+        const T y = m(1);
+
+        // compute Rosenbrock's function and return the result
+        return 100.0*pow(y - x*x,2) + pow(1 - x,2);
+    }
+
+    template <typename T>
+    const matrix<T,2,1> rosen_derivative ( const matrix<T,2,1>& m)
+    {
+        const T x = m(0);
+        const T y = m(1);
+
+        // make us a column vector of length 2
+        matrix<T,2,1> res(2);
+
+        // now compute the gradient vector
+        res(0) = -400*x*(y-x*x) - 2*(1-x); // derivative of rosen() with respect to x
+        res(1) = 200*(y-x*x);              // derivative of rosen() with respect to y
+        return res;
+    }
+
+    template <typename T>
+    const matrix<T,2,2> rosen_hessian ( const matrix<T,2,1>& m)
+    {
+        const T x = m(0);
+        const T y = m(1);
+
+        // make us a column vector of length 2
+        matrix<T,2,2> res;
+
+        // now compute the gradient vector
+        res(0,0) = -400*y + 3*400*x*x + 2; 
+        res(1,1) = 200;              
+
+        res(0,1) = -400*x;              
+        res(1,0) = -400*x;              
+        return res;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    struct rosen_model
+    {
+        typedef matrix<T,2,1> column_vector;
+        typedef matrix<T,2,2> general_matrix;
+
+        T operator() ( column_vector x) const
+        {
+            return static_cast<T>(rosen(x));
+        }
+
+        void get_derivative_and_hessian (
+            const column_vector& x,
+            column_vector& d,
+            general_matrix& h
+        ) const 
+        {
+            d = rosen_derivative(x);
+            h = rosen_hessian(x);
+        }
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    struct neg_rosen_model
+    {
+        typedef matrix<T,0,1> column_vector;
+        typedef matrix<T,0,0> general_matrix;
+
+        T operator() ( column_vector x) const
+        {
+            return -static_cast<T>(rosen<T>(x));
+        }
+
+        void get_derivative_and_hessian (
+            const column_vector& x,
+            column_vector& d,
+            general_matrix& h
+        ) const 
+        {
+            d = -matrix_cast<T>(rosen_derivative<T>(x));
+            h = -matrix_cast<T>(rosen_hessian<T>(x));
+        }
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    dlib::rand::float_1a rnd;
+
+    template <typename T>
+    void test_with_rosen()
+    {
+        print_spinner();
+
+        matrix<T,2,1> ans;
+        ans = 1,1;
+
+        matrix<T,2,1> p = 100*matrix_cast<T>(randm(2,1,rnd)) - 50;
+
+        T obj = find_min_trust_region(objective_delta_stop_strategy(0, 100), rosen_model<T>(), p);
+
+        DLIB_TEST_MSG(obj == 0, "obj: " << obj);
+        DLIB_TEST_MSG(length(p-ans) == 0, "length(p): " << length(p-ans));
+
+        matrix<T,0,1> p2 = 100*matrix_cast<T>(randm(2,1,rnd)) - 50;
+        obj = find_max_trust_region(objective_delta_stop_strategy(0, 100), neg_rosen_model<T>(), p2);
+
+        DLIB_TEST_MSG(obj == 0, "obj: " << obj);
+        DLIB_TEST_MSG(length(p2-ans) == 0, "length(p2): " << length(p2-ans));
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    void test_trust_region_sub_problem()
+    {
+        dlog << LINFO << "subproblem test 1";
+        {
+            matrix<double,2,2> B;
+            B = 1, 0,
+                0, 1;
+
+            matrix<double,2,1> g, p, ans;
+            g = 0;
+
+            ans = 0;
+
+            solve_trust_region_subproblem(B,g,1,p, 0.001, 10);
+
+            DLIB_TEST(length(p-ans) < 1e-10);
+            solve_trust_region_subproblem(B,g,1,p, 0.001, 1);
+            DLIB_TEST(length(p-ans) < 1e-10);
+        }
+
+        dlog << LINFO << "subproblem test 2";
+        {
+            matrix<double,2,2> B;
+            B = 1, 0,
+                0, 1;
+
+            B *= 0.1;
+
+            matrix<double,2,1> g, p, ans;
+            g = 1;
+
+            ans = -g / length(g);
+
+            solve_trust_region_subproblem(B,g,1,p, 1e-6, 20);
+
+            DLIB_TEST(length(p-ans) < 1e-4);
+        }
+
+        dlog << LINFO << "subproblem test 3";
+        {
+            matrix<double,2,2> B;
+            B = 0, 0,
+                0, 0;
+
+            matrix<double,2,1> g, p, ans;
+            g = 1;
+
+            ans = -g / length(g);
+
+            solve_trust_region_subproblem(B,g,1,p, 1e-6, 20);
+
+            dlog << LINFO << "ans: " << trans(ans);
+            dlog << LINFO << "p: " << trans(p);
+            DLIB_TEST(length(p-ans) < 1e-4);
+        }
+        return;
+
+        dlog << LINFO << "subproblem test 4";
+        {
+            matrix<double,2,2> B;
+            B = 2, 0,
+                0, -1;
+
+
+            matrix<double,2,1> g, p, ans;
+            g = 0;
+
+            ans = 0, -1;
+
+            solve_trust_region_subproblem(B,g,1,p, 1e-6, 20);
+
+            DLIB_TEST(length(p-ans) < 1e-4);
+        }
+
+
+        dlog << LINFO << "subproblem test 5";
+        {
+            matrix<double,2,2> B;
+            B = 2, 0,
+                0, -1;
+
+
+            matrix<double,2,1> g, p, ans;
+            g = 0, 1;
+
+            ans = 0, -1;
+
+            solve_trust_region_subproblem(B,g,1,p, 1e-6, 20);
+
+            DLIB_TEST(length(p-ans) < 1e-4);
+        }
+
+        dlog << LINFO << "subproblem test 6";
+        for (int i = 0; i < 10; ++i)
+        {
+            matrix<double,10,10> B;
+
+            B = randm(10,10, rnd);
+
+            B = 0.01*B*trans(B);
+
+
+            matrix<double,10,1> g, p, ans;
+            g = 1;
+
+            solve_trust_region_subproblem(B,g,1,p, 1e-6, 20);
+
+            DLIB_TEST(std::abs(length(p) - 1) < 1e-4);
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    class optimization_tester : public tester
+    {
+    public:
+        optimization_tester (
+        ) :
+            tester ("test_trust_region",
+                    "Runs tests on the trust region optimization component.")
+        {}
+
+        void perform_test (
+        )
+        {
+            dlog << LINFO << "test with rosen<float>";
+            for (int i = 0; i < 50; ++i)
+                test_with_rosen<float>();
+
+            dlog << LINFO << "test with rosen<double>";
+            for (int i = 0; i < 50; ++i)
+                test_with_rosen<double>();
+
+
+            test_trust_region_sub_problem();
+        }
+    } a;
+
+}
+
+