Added a trust region optimizer.

--HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403919

Added a trust region optimizer.
--HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403919
682f6b3d · Davis King · 571aff20 · 682f6b3d · 682f6b3d · 682f6b3d
Commit 682f6b3d authored Nov 26, 2010 by Davis King
6 changed files
--- a/dlib/optimization.h
+++ b/dlib/optimization.h
@@ -7,6 +7,7 @@
 #include "optimization/optimization_bobyqa.h"
 #include "optimization/optimization_solve_qp_using_smo.h"
 #include "optimization/optimization_oca.h"
+#include "optimization/optimization_trust_region.h"
 #endif // DLIB_OPTIMIZATIOn_HEADER

--- a/dlib/optimization/optimization_trust_region.h
+++ b/dlib/optimization/optimization_trust_region.h
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_OPTIMIZATION_TRUST_REGIoN_H__
+#define DLIB_OPTIMIZATION_TRUST_REGIoN_H__
+#include "../matrix.h"
+#include "optimization_trust_region_abstract.h"
+namespace dlib
+{
+// ----------------------------------------------------------------------------------------
+    template <
+        typename EXP1,
+        typename EXP2,
+        typename T, long NR, long NC, typename MM, typename L
+        >
+    unsigned long solve_trust_region_subproblem ( 
+        const matrix_exp<EXP1>& B,
+        const matrix_exp<EXP2>& g,
+        const typename EXP1::type radius,
+        matrix<T,NR,NC,MM,L>& p,
+        double eps,
+        unsigned long max_iter
+    )
+    {
+        /*
+            This is an implementation of algorithm 4.3(Trust Region Subproblem)
+            from the book Numerical Optimization by Nocedal and Wright.  Some of
+            the details are also from Practical Methods of Optimization by Fletcher.
+        */
+        // make sure requires clause is not broken
+        DLIB_ASSERT(B.nr() == B.nc() && is_col_vector(g) && g.size() == B.nr() && equal(B,trans(B)),
+            "\t unsigned long solve_trust_region_subproblem()"
+            << "\n\t invalid arguments were given to this function"
+            << "\n\t B.nr():           " << B.nr()
+            << "\n\t B.nc():           " << B.nc()
+            << "\n\t is_col_vector(g): " << is_col_vector(g) 
+            << "\n\t g.size():         " << g.size() 
+            );
+        DLIB_ASSERT(radius > 0 && eps > 0 && max_iter > 0,
+            "\t unsigned long solve_trust_region_subproblem()"
+            << "\n\t invalid arguments were given to this function"
+            << "\n\t radius:   " << radius
+            << "\n\t eps:      " << eps 
+            << "\n\t max_iter: " << max_iter 
+            );
+        const_temp_matrix<EXP1> BB(B);
+        const_temp_matrix<EXP2> gg(g);
+        p.set_size(g.nr(),g.nc());
+        p = 0;
+        const T numeric_eps = max(abs(BB))*std::numeric_limits<T>::epsilon();
+        matrix<T,EXP1::NR,EXP2::NR,MM,L> R;
+        T lambda = 0;
+        // We need to put a bracket around lambda.  It can't go below 0.  We
+        // can get an upper bound using Gershgorin disks.  
+        // This number is a lower bound on the eigenvalues in BB
+        const T BB_min_eigenvalue = min(diag(BB) - (sum_cols(abs(BB)) - abs(diag(BB))));
+        const T g_norm = length(gg);
+        T lambda_min = 0;
+        T lambda_max = g_norm/radius - BB_min_eigenvalue;
+        // If we can tell that the minimum is at 0 then don't do anything.  Just return the answer. 
+        if (g_norm < numeric_eps && BB_min_eigenvalue > numeric_eps)
+        {
+            return 0;
+        }
+        // how much lambda has changed recently
+        T lambda_delta = 0;
+        for (unsigned long i = 0; i < max_iter; ++i)
+        {
+            R = chol(BB + lambda*identity_matrix<T>(BB.nr()));
+            // if the cholesky decomposition doesn't exist. 
+            if (R(R.nr()-1, R.nc()-1) <= numeric_eps)
+            {
+                // If B is indefinite and g is equal to 0 then we should
+                // quit this loop and go right to the eigenvalue decomposition method.
+                if (g_norm < numeric_eps)
+                    break;
+                // narrow the bracket on lambda.  Obviously the current lambda is
+                // too small.
+                lambda_min = lambda;
+                // jump towards the max value.  Eventually there will
+                // be a lambda that results in a cholesky decomposition.
+                const T alpha = 0.10;
+                lambda = (1-alpha)*lambda + alpha*lambda_max;
+                continue;
+            }
+            p = -gg;
+            // Solve RR'*p = -g for p.
+            using namespace blas_bindings;
+            // Solve R*q = -g for q where q = R'*p.
+            triangular_solver(CblasLeft, CblasLower, CblasNoTrans, CblasNonUnit, R, p);
+            const T q_norm = length(p);
+            // Solve R'*p = q for p.
+            triangular_solver(CblasLeft, CblasLower, CblasTrans, CblasNonUnit, R, p);
+            const T p_norm = length(p);
+            // check if we are done.  
+            if (lambda == 0)
+            {
+                if (p_norm < radius)
+                {
+                    // i will always be 0 in this case.  So we return 1.
+                    return i+1;
+                }
+            }
+            else
+            {
+                // if we are close enough to the solution then terminate
+                if (std::abs(p_norm - radius)/radius < eps)
+                    return i+1;
+            }
+            // shrink our bracket on lambda
+            if (p_norm < radius)
+                lambda_max = lambda;
+            else
+                lambda_min = lambda;
+            // This is only going to happen when g is basically 0 and B is indefinite.  
+            if (p_norm < numeric_eps)
+            {
+                break;
+            }
+            const T old_lambda = lambda;
+            // figure out which lambda to try next
+            lambda = lambda + std::pow(q_norm/p_norm,2)*(p_norm - radius)/radius;
+            // make sure the chosen lambda is within our bracket (but not exactly at either end).
+            const T gap = (lambda_max-lambda_min)*0.01;
+            lambda = put_in_range(lambda_min+gap, lambda_max-gap, lambda);
+            // Keep track of how much lambda is thrashing around inside the search bracket.  If it
+            // keeps moving around a whole lot then cut the search bracket in half.
+            lambda_delta += std::abs(lambda - old_lambda);
+            if (lambda_delta > 3*(lambda_max-lambda_min))
+            {
+                lambda = (lambda_min+lambda_max)/2;
+                lambda_delta = 0;
+            }
+        } // end for loop
+        // We are probably in the "hard case".   Use an eigenvalue decomposition to sort things out.
+        // Either that or the eps was just set too tight and really we are already done.
+        eigenvalue_decomposition<EXP1> ed(BB);
+        matrix<T,NR,NC,MM,L> ev = ed.get_real_eigenvalues();
+        const long min_eig_idx = index_of_min(ev);
+        ev -= min(ev);
+        // zero out any values which are basically zero
+        ev = pointwise_multiply(ev, ev > max(abs(ev))*std::numeric_limits<T>::epsilon());
+        ev = reciprocal(ev);
+        // figure out part of what p should be assuming we are in the hard case.
+        matrix<T,NR,NC,MM,L> p_hard;
+        p_hard = trans(ed.get_pseudo_v())*gg;
+        p_hard = diagm(ev)*p_hard;
+        p_hard = ed.get_pseudo_v()*p_hard;
+        // If we really are in the hard case then this if will be true.  Otherwise, the p
+        // we found in the "easy case" loop up top is the best answer.
+        if (length(p_hard) < radius && length(p_hard) >= length(p))
+        {
+            // adjust the length of p_hard by adding a component along the eigenvector associated with
+            // the smallest eigenvalue.  We want to make it the case that length(p) == radius.
+            const T tau = std::sqrt(radius*radius - length_squared(p_hard));
+            p = p_hard + tau*colm(ed.get_pseudo_v(),min_eig_idx);
+            // if we have to do an eigenvalue decomposition then say we did all the iterations
+            return max_iter;
+        }
+        // if we get this far it means we didn't converge to eps accuracy. 
+        return max_iter+1;
+    }
+// ----------------------------------------------------------------------------------------
+    template <
+        typename stop_strategy_type,
+        typename funct_model
+        >
+    double find_min_trust_region (
+        stop_strategy_type stop_strategy,
+        const funct_model& model, 
+        typename funct_model::column_vector& x, 
+        double radius = 1
+    )
+    {
+        /*
+            This is an implementation of algorithm 4.1(Trust Region)
+            from the book Numerical Optimization by Nocedal and Wright.  
+        */
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_col_vector(x) && radius > 0,
+            "\t double find_min_trust_region()"
+            << "\n\t invalid arguments were given to this function"
+            << "\n\t is_col_vector(x): " << is_col_vector(x) 
+            << "\n\t radius:           " << radius
+            );
+        typedef typename funct_model::column_vector T;
+        typedef typename T::type type;
+        typedef typename T::mem_manager_type mem_manager_type;
+        typedef typename T::layout_type layout_type;
+        typename funct_model::general_matrix h;
+        typename funct_model::column_vector g, p, d;
+        type f_value = model(x);
+        model.get_derivative_and_hessian(x,g,h);
+        // Sometimes the loop below won't modify x because the trust region step failed.
+        // This bool tells us when we are in that case.
+        bool stale_x = false;
+        while(stale_x || stop_strategy.should_continue_search(x, f_value, g))
+        {
+            // Try to scale the trust region to some reasonable elliptic shape
+            // appropriate for the current problem.
+            //d = 1,1;
+            //d = clamp(abs(g),1e-1, 1e1);
+            d = clamp(diag(abs(h)),1e-1, 1e1);
+            const unsigned long iter = solve_trust_region_subproblem(inv(diagm(d))*h*inv(diagm(d)),
+                                                                     inv(diagm(d))*g,
+                                                                     radius,
+                                                                     p, 
+                                                                     0.1, 
+                                                                     20);
+            // convert back from the spherical trust region to our elliptical one.
+            p = inv(diagm(d))*p;
+            const type new_f_value = model(x+p);
+            const type predicted_improvement = -0.5*trans(p)*h*p - trans(g)*p;
+            const type measured_improvement = (f_value - new_f_value);
+            // If the sub-problem can't find a way to improve then stop.  This only happens when p is essentially 0.
+            if (std::abs(predicted_improvement) <= std::abs(measured_improvement)*std::numeric_limits<type>::epsilon())
+                break;
+            const type rho = measured_improvement/predicted_improvement;
+            if (rho < 0.25)
+            {
+                radius *= 0.25;
+                // something has gone horribly wrong if the radius has shrunk to zero.  So just
+                // give up if that happens.
+                if (radius == 0)
+                    break;
+            }
+            else
+            {
+                // if rho > 0.75 and we are being checked by the radius 
+                if (rho > 0.75 && iter > 1)
+                {
+                    radius = std::min<type>(1000,  2*radius);
+                }
+            }
+            if (rho > 0)
+            {
+                x = x + p;
+                f_value = new_f_value;
+                model.get_derivative_and_hessian(x,g,h);
+                stale_x = false;
+            }
+            else
+            {
+                stale_x = true;
+            }
+        }
+        return f_value;
+    }
+// ----------------------------------------------------------------------------------------
+    template <typename funct_model>
+    struct negate_tr_model 
+    {
+        negate_tr_model( const funct_model& m) : model(m) {}
+        const funct_model& model;
+        typedef typename funct_model::column_vector column_vector;
+        typedef typename funct_model::general_matrix general_matrix;
+        template <typename T>
+        typename T::type operator() (const T& x) const
+        {
+            return -model(x);
+        }
+        template <typename T, typename U>
+        void get_derivative_and_hessian (
+            const T& x,
+            T& d,
+            U& h
+        ) const 
+        {
+            model.get_derivative_and_hessian(x,d,h);
+            d = -d;
+            h = -h;
+        }
+    };
+// ----------------------------------------------------------------------------------------
+    template <
+        typename stop_strategy_type,
+        typename funct_model
+        >
+    double find_max_trust_region (
+        stop_strategy_type stop_strategy,
+        const funct_model& model, 
+        typename funct_model::column_vector& x, 
+        double radius = 1
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_col_vector(x) && radius > 0,
+            "\t double find_max_trust_region()"
+            << "\n\t invalid arguments were given to this function"
+            << "\n\t is_col_vector(x): " << is_col_vector(x) 
+            << "\n\t radius:           " << radius
+            );
+        return -find_min_trust_region(stop_strategy,
+                                      negate_tr_model<funct_model>(model),
+                                      x,
+                                      radius);
+    }
+// ----------------------------------------------------------------------------------------
+}
+#endif // DLIB_OPTIMIZATION_TRUST_REGIoN_H__
--- a/dlib/optimization/optimization_trust_region_abstract.h
+++ b/dlib/optimization/optimization_trust_region_abstract.h
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_OPTIMIZATION_TRUST_REGIoN_H__
+#define DLIB_OPTIMIZATION_TRUST_REGIoN_H__
+#include "../matrix.h"
+namespace dlib
+{
+// ----------------------------------------------------------------------------------------
+    template <
+        typename EXP1,
+        typename EXP2,
+        typename T, long NR, long NC, typename MM, typename L
+        >
+    unsigned long solve_trust_region_subproblem ( 
+        const matrix_exp<EXP1>& B,
+        const matrix_exp<EXP2>& g,
+        const typename EXP1::type radius,
+        matrix<T,NR,NC,MM,L>& p,
+        double eps,
+        unsigned long max_iter
+    );
+    /*!
+        requires
+            - B == trans(B)
+              (i.e. B should be a symmetric matrix)
+            - B.nr() == B.nc()
+            - is_col_vector(g) == true
+            - g.size() == B.nr()
+            - p is capable of containing a column vector the size of g
+              (i.e. p = g; should be a legal expression)
+            - radius > 0
+            - eps > 0
+            - max_iter > 0
+        ensures
+            - This function solves the following optimization problem:
+                Minimize: f(p) == 0.5*trans(p)*B*p + trans(g)*p
+                subject to the following constraint:
+                    - length(p) <= radius
+            - returns the number of iterations performed.  If this method fails to
+              converge to eps accuracy then the number returned will be max_iter+1.
+            - if this function returns 0 or 1 then we are not hitting the radius bound.
+              Otherwise, the radius constraint is active and std::abs(length(#p)-radius) <= eps.
+    !*/
+// ----------------------------------------------------------------------------------------
+    class function_model 
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object defines the interface for a function model
+                used by the trust-region optimizers defined below.
+                In particular, this object represents a function f() and
+                its associated derivative and hessian.
+        !*/
+        // Define the type used to represent column vectors
+        typedef matrix<double,0,1> column_vector;
+        // Define the type used to represent the hessian matrix
+        typedef matrix<double> general_matrix;
+        double operator() ( 
+            const column_vector& x
+        ) const;
+        /*!
+            ensures
+                - returns f(x)
+                  (i.e. evaluates this model at the given point and returns the value)
+        !*/
+        void get_derivative_and_hessian (
+            const column_vector& x,
+            column_vector& d,
+            general_matrix& h
+        ) const;
+        /*!
+            ensures
+                - #d == the derivative of f() at x
+                - #h == the hessian matrix of f() at x
+                - is_col_vector(#d) == true
+                - #d.size() == x.size()
+                - #h.nr() == #h.nc() == x.size()
+                - #h == trans(#h)
+        !*/
+    };
+// ----------------------------------------------------------------------------------------
+    template <
+        typename stop_strategy_type,
+        typename funct_model
+        >
+    double find_min_trust_region (
+        stop_strategy_type stop_strategy,
+        const funct_model& model, 
+        typename funct_model::column_vector& x, 
+        double radius = 1
+    );
+    /*!
+        requires
+            - stop_strategy == an object that defines a stop strategy such as one of 
+              the objects from dlib/optimization/optimization_stop_strategies_abstract.h
+            - is_col_vector(x) == true
+            - radius > 0
+            - model must be an object with an interface as defined by the function_model
+              example object shown above.
+        ensures
+            - Performs an unconstrained minimization of the function defined by model 
+              starting from the initial point x.  This function uses a trust region
+              algorithm to perform the minimization.  The radius parameter defines
+              the initial size of the trust region.
+            - The function is optimized until stop_strategy decides that an acceptable 
+              point has been found or the trust region subproblem fails to make progress.
+            - #x == the value of x that was found to minimize model()
+            - returns model(#x). 
+            - When this function makes calls to model.get_derivative_and_hessian() it always 
+              does so by first calling model() and then calling model.get_derivative_and_hessian().  
+              That is, any call to model.get_derivative_and_hessian(val) will always be 
+              preceded by a call to model(val) with the same value.  This way you can reuse 
+              any redundant computations performed by model() and model.get_derivative_and_hessian()
+              as appropriate.
+    !*/
+// ----------------------------------------------------------------------------------------
+    template <
+        typename stop_strategy_type,
+        typename funct_model
+        >
+    double find_max_trust_region (
+        stop_strategy_type stop_strategy,
+        const funct_model& model, 
+        typename funct_model::column_vector& x, 
+        double radius = 1
+    );
+    /*!
+        requires
+            - stop_strategy == an object that defines a stop strategy such as one of 
+              the objects from dlib/optimization/optimization_stop_strategies_abstract.h
+            - is_col_vector(x) == true
+            - radius > 0
+            - model must be an object with an interface as defined by the function_model
+              example object shown above.
+        ensures
+            - Performs an unconstrained maximization of the function defined by model 
+              starting from the initial point x.  This function uses a trust region
+              algorithm to perform the maximization.  The radius parameter defines
+              the initial size of the trust region.
+            - The function is optimized until stop_strategy decides that an acceptable 
+              point has been found or the trust region subproblem fails to make progress.
+            - #x == the value of x that was found to maximize model()
+            - returns model(#x). 
+            - When this function makes calls to model.get_derivative_and_hessian() it always 
+              does so by first calling model() and then calling model.get_derivative_and_hessian().  
+              That is, any call to model.get_derivative_and_hessian(val) will always be 
+              preceded by a call to model(val) with the same value.  This way you can reuse 
+              any redundant computations performed by model() and model.get_derivative_and_hessian()
+              as appropriate.
+            - Note that this function solves the maximization problem by converting it 
+              into a minimization problem.  Therefore, the values of model() and its derivative
+              reported to the stopping strategy will be negated.  That is, stop_strategy
+              will see -model() and -derivative.  All this really means is that the status 
+              messages from a stopping strategy in verbose mode will display a negated objective
+              value.
+    !*/
+// ----------------------------------------------------------------------------------------
+}
+#endif // DLIB_OPTIMIZATION_TRUST_REGIoN_H__
--- a/dlib/test/CMakeLists.txt
+++ b/dlib/test/CMakeLists.txt
@@ -83,12 +83,13 @@ set (tests
   statistics.cpp
   std_vector_c.cpp
   string.cpp
-   svm.cpp
   svm_c_linear.cpp
+   svm.cpp
   thread_pool.cpp
   threads.cpp
   timer.cpp
   tokenizer.cpp
+   trust_region.cpp
   tuple.cpp
   type_safe_union.cpp
   )

--- a/dlib/test/makefile
+++ b/dlib/test/makefile
@@ -99,6 +99,7 @@ SRC += thread_pool.cpp
 SRC += threads.cpp
 SRC += timer.cpp
 SRC += tokenizer.cpp
+SRC += trust_region.cpp
 SRC += tuple.cpp
 SRC += type_safe_union.cpp

--- a/dlib/test/trust_region.cpp
+++ b/dlib/test/trust_region.cpp
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#include <dlib/optimization.h>
+#include <sstream>
+#include <string>
+#include <cstdlib>
+#include <ctime>
+#include <vector>
+#include "../rand.h"
+#include "tester.h"
+namespace  
+{
+    using namespace test;
+    using namespace dlib;
+    using namespace std;
+    logger dlog("test.trust_region");
+// ----------------------------------------------------------------------------------------
+    template <typename T>
+    T rosen ( const matrix<T,2,1>& m)
+    {
+        const T x = m(0); 
+        const T y = m(1);
+        // compute Rosenbrock's function and return the result
+        return 100.0*pow(y - x*x,2) + pow(1 - x,2);
+    }
+    template <typename T>
+    const matrix<T,2,1> rosen_derivative ( const matrix<T,2,1>& m)
+    {
+        const T x = m(0);
+        const T y = m(1);
+        // make us a column vector of length 2
+        matrix<T,2,1> res(2);
+        // now compute the gradient vector
+        res(0) = -400*x*(y-x*x) - 2*(1-x); // derivative of rosen() with respect to x
+        res(1) = 200*(y-x*x);              // derivative of rosen() with respect to y
+        return res;
+    }
+    template <typename T>
+    const matrix<T,2,2> rosen_hessian ( const matrix<T,2,1>& m)
+    {
+        const T x = m(0);
+        const T y = m(1);
+        // make us a column vector of length 2
+        matrix<T,2,2> res;
+        // now compute the gradient vector
+        res(0,0) = -400*y + 3*400*x*x + 2; 
+        res(1,1) = 200;              
+        res(0,1) = -400*x;              
+        res(1,0) = -400*x;              
+        return res;
+    }
+// ----------------------------------------------------------------------------------------
+    template <typename T>
+    struct rosen_model
+    {
+        typedef matrix<T,2,1> column_vector;
+        typedef matrix<T,2,2> general_matrix;
+        T operator() ( column_vector x) const
+        {
+            return static_cast<T>(rosen(x));
+        }
+        void get_derivative_and_hessian (
+            const column_vector& x,
+            column_vector& d,
+            general_matrix& h
+        ) const 
+        {
+            d = rosen_derivative(x);
+            h = rosen_hessian(x);
+        }
+    };
+// ----------------------------------------------------------------------------------------
+    template <typename T>
+    struct neg_rosen_model
+    {
+        typedef matrix<T,0,1> column_vector;
+        typedef matrix<T,0,0> general_matrix;
+        T operator() ( column_vector x) const
+        {
+            return -static_cast<T>(rosen<T>(x));
+        }
+        void get_derivative_and_hessian (
+            const column_vector& x,
+            column_vector& d,
+            general_matrix& h
+        ) const 
+        {
+            d = -matrix_cast<T>(rosen_derivative<T>(x));
+            h = -matrix_cast<T>(rosen_hessian<T>(x));
+        }
+    };
+// ----------------------------------------------------------------------------------------
+    dlib::rand::float_1a rnd;
+    template <typename T>
+    void test_with_rosen()
+    {
+        print_spinner();
+        matrix<T,2,1> ans;
+        ans = 1,1;
+        matrix<T,2,1> p = 100*matrix_cast<T>(randm(2,1,rnd)) - 50;
+        T obj = find_min_trust_region(objective_delta_stop_strategy(0, 100), rosen_model<T>(), p);
+        DLIB_TEST_MSG(obj == 0, "obj: " << obj);
+        DLIB_TEST_MSG(length(p-ans) == 0, "length(p): " << length(p-ans));
+        matrix<T,0,1> p2 = 100*matrix_cast<T>(randm(2,1,rnd)) - 50;
+        obj = find_max_trust_region(objective_delta_stop_strategy(0, 100), neg_rosen_model<T>(), p2);
+        DLIB_TEST_MSG(obj == 0, "obj: " << obj);
+        DLIB_TEST_MSG(length(p2-ans) == 0, "length(p2): " << length(p2-ans));
+    }
+// ----------------------------------------------------------------------------------------
+    void test_trust_region_sub_problem()
+    {
+        dlog << LINFO << "subproblem test 1";
+        {
+            matrix<double,2,2> B;
+            B = 1, 0,
+                0, 1;
+            matrix<double,2,1> g, p, ans;
+            g = 0;
+            ans = 0;
+            solve_trust_region_subproblem(B,g,1,p, 0.001, 10);
+            DLIB_TEST(length(p-ans) < 1e-10);
+            solve_trust_region_subproblem(B,g,1,p, 0.001, 1);
+            DLIB_TEST(length(p-ans) < 1e-10);
+        }
+        dlog << LINFO << "subproblem test 2";
+        {
+            matrix<double,2,2> B;
+            B = 1, 0,
+                0, 1;
+            B *= 0.1;
+            matrix<double,2,1> g, p, ans;
+            g = 1;
+            ans = -g / length(g);
+            solve_trust_region_subproblem(B,g,1,p, 1e-6, 20);
+            DLIB_TEST(length(p-ans) < 1e-4);
+        }
+        dlog << LINFO << "subproblem test 3";
+        {
+            matrix<double,2,2> B;
+            B = 0, 0,
+                0, 0;
+            matrix<double,2,1> g, p, ans;
+            g = 1;
+            ans = -g / length(g);
+            solve_trust_region_subproblem(B,g,1,p, 1e-6, 20);
+            dlog << LINFO << "ans: " << trans(ans);
+            dlog << LINFO << "p: " << trans(p);
+            DLIB_TEST(length(p-ans) < 1e-4);
+        }
+        return;
+        dlog << LINFO << "subproblem test 4";
+        {
+            matrix<double,2,2> B;
+            B = 2, 0,
+                0, -1;
+            matrix<double,2,1> g, p, ans;
+            g = 0;
+            ans = 0, -1;
+            solve_trust_region_subproblem(B,g,1,p, 1e-6, 20);
+            DLIB_TEST(length(p-ans) < 1e-4);
+        }
+        dlog << LINFO << "subproblem test 5";
+        {
+            matrix<double,2,2> B;
+            B = 2, 0,
+                0, -1;
+            matrix<double,2,1> g, p, ans;
+            g = 0, 1;
+            ans = 0, -1;
+            solve_trust_region_subproblem(B,g,1,p, 1e-6, 20);
+            DLIB_TEST(length(p-ans) < 1e-4);
+        }
+        dlog << LINFO << "subproblem test 6";
+        for (int i = 0; i < 10; ++i)
+        {
+            matrix<double,10,10> B;
+            B = randm(10,10, rnd);
+            B = 0.01*B*trans(B);
+            matrix<double,10,1> g, p, ans;
+            g = 1;
+            solve_trust_region_subproblem(B,g,1,p, 1e-6, 20);
+            DLIB_TEST(std::abs(length(p) - 1) < 1e-4);
+        }
+    }
+// ----------------------------------------------------------------------------------------
+    class optimization_tester : public tester
+    {
+    public:
+        optimization_tester (
+        ) :
+            tester ("test_trust_region",
+                    "Runs tests on the trust region optimization component.")
+        {}
+        void perform_test (
+        )
+        {
+            dlog << LINFO << "test with rosen<float>";
+            for (int i = 0; i < 50; ++i)
+                test_with_rosen<float>();
+            dlog << LINFO << "test with rosen<double>";
+            for (int i = 0; i < 50; ++i)
+                test_with_rosen<double>();
+            test_trust_region_sub_problem();
+        }
+    } a;
+}