Added some functions to make it easy to do a line search on a function

of a single variable when derivatives are not available. --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403229

Added some functions to make it easy to do a line search on a function
of a single variable when derivatives are not available. --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403229
93bb778f · Davis King · 538a106c · 93bb778f · 93bb778f · 93bb778f
Commit 93bb778f authored Oct 05, 2009 by Davis King
3 changed files
--- a/dlib/optimization/optimization_line_search.h
+++ b/dlib/optimization/optimization_line_search.h
@@ -8,6 +8,7 @@
 #include "../matrix.h"
 #include "../algs.h"
 #include "optimization_line_search_abstract.h"
+#include <utility>

 namespace dlib
 {
@@ -192,6 +193,47 @@ namespace dlib
        return put_in_range(0,1,x);
    }

+// ----------------------------------------------------------------------------------------
+
+    inline double lagrange_poly_min_extrap (
+        double p1, 
+        double p2,
+        double p3,
+        double f1,
+        double f2,
+        double f3
+    )
+    {
+        DLIB_ASSERT(p1 < p2 && p2 < p3 && f1 >= f2 && f2 <= f3,
+                     "   p1: " << p1 
+                     << "   p2: " << p2 
+                     << "   p3: " << p3  
+                     << "   f1: " << f1 
+                     << "   f2: " << f2 
+                     << "   f3: " << f3);
+
+        // This formula is out of the book Nonlinear Optimization by Andrzej Ruszczynski.  See section 5.2.
+        double temp1 =    f1*(p3*p3 - p2*p2) + f2*(p1*p1 - p3*p3) + f3*(p2*p2 - p1*p1);
+        double temp2 = 2*(f1*(p3 - p2)       + f2*(p1 - p3)       + f3*(p2 - p1) );
+
+        if (temp2 == 0)
+        {
+            return p2;
+        }
+
+        const double result = temp1/temp2;
+
+        // do a final sanity check to make sure the result is in the right range
+        if (p1 <= result && result <= p3)
+        {
+            return result;
+        }
+        else
+        {
+            return std::min(std::max(p1,result),p3);
+        }
+    }
+
 // ----------------------------------------------------------------------------------------

    template <
@@ -396,6 +438,233 @@ namespace dlib
        }
    }

+// ----------------------------------------------------------------------------------------
+
+    template <typename funct>
+    std::pair<double,double> find_min_single_variable (
+        const funct& f,
+        const double starting_point,
+        const double begin = -1e200,
+        const double end = 1e200,
+        const double eps = 1e-3,
+        const long max_iter = 100
+    )
+    {
+        DLIB_CASSERT( eps > 0 &&
+                      max_iter > 1 &&
+                      begin <= starting_point && starting_point <= end,
+                      "eps: " << eps
+                      << "\n max_iter: "<< max_iter 
+                      << "\n begin: "<< begin 
+                      << "\n end:   "<< end 
+                      << "\n starting_point: "<< starting_point 
+        );
+
+        double p1=0, p2=0, p3=0, f1=0, f2=0, f3=0;
+        long f_evals = 1;
+
+        const std::pair<double,double> p(starting_point, f(starting_point));
+
+        if (begin == end)
+        {
+            return p;
+        }
+
+        using std::abs;
+        using std::min;
+        using std::max;
+
+        // find three bracketing points such that f1 > f2 < f3.   Do this by generating a sequence
+        // of points expanding away from 0.   Also note that, in the following code, it is always the
+        // case that p1 < p2 < p3.
+
+
+
+        // The first thing we do is get a starting set of 3 points that are inside the [begin,end] bounds
+        p1 = max(p.first-1, begin);
+        p3 = min(p.first+1, end);
+        if (p1 == p.first)
+            f1 = p.second;
+        else
+            f1 = f(p1);
+
+        if (p3 == p.first)
+            f3 = p.second;
+        else
+            f3 = f(p3);
+
+        if (p.first == p1 || p.first == p3)
+        {
+            p2 = (p1+p3)/2;
+            f2 = f(p2);
+        }
+        else
+        {
+            p2 = p.first;
+            f2 = p.second;
+        }
+
+        f_evals += 2;
+
+        // Now we have 3 points on the function.  Start looking for a bracketing set such that
+        // f1 > f2 < f3 is the case.
+        double jump_size = 1;
+        while ( !(f1 > f2 && f2 < f3))
+        {
+            // check for hitting max_iter
+            if (f_evals >= max_iter)
+            {
+                if (f1 < min(f2,f3)) return std::make_pair(p1, f1);
+                if (f2 < min(f1,f3)) return std::make_pair(p2, f2);
+
+                return std::make_pair(p3, f3);
+            }
+
+            // if f1 is small then take a step to the left
+            if (f1 < f3)
+            {
+                // check if the minimum is butting up against the bounds and return if we are.
+                if (p1 == begin)
+                    return std::make_pair(p1, f1);
+
+                p3 = p2;
+                f3 = f2;
+
+                p2 = p1;
+                f2 = f1;
+
+                p1 = max(p1 - jump_size, begin);
+                f1 = f(p1);
+
+            }
+            // otherwise f3 is small and we should take a step to the right
+            else 
+            {
+                // check if the minimum is butting up against the bounds and return if we are.
+                if (p3 == end)
+                    return std::make_pair(p3, f3);
+
+                p1 = p2;
+                f1 = f2;
+
+                p2 = p3;
+                f2 = f3;
+
+                p3 = min(p3 + jump_size, end);
+                f3 = f(p3);
+            }
+
+            ++f_evals;
+            jump_size *= 2;
+        }
+
+
+        // Loop until we have done the max allowable number of iterations or
+        // the bracketing window is smaller than eps.
+        // Within this loop we maintain the invariant that: f1 > f2 < f3 and p1 < p2 < p3
+        const double tau = 0.1;
+        while( f_evals < max_iter && p3-p1 > eps)
+        {
+            double p_min = lagrange_poly_min_extrap(p1,p2,p3, f1,f2,f3);
+
+
+            // make sure p_min isn't too close to the three points we already have
+            if (p_min < p2)
+            {
+                const double min_dist = (p2-p1)*tau;
+                if (abs(p1-p_min) < min_dist) 
+                {
+                    p_min = p1 + min_dist;
+                }
+                else if (abs(p2-p_min) < min_dist)
+                {
+                    p_min = p2 - min_dist;
+                }
+            }
+            else
+            {
+                const double min_dist = (p3-p2)*tau;
+                if (abs(p2-p_min) < min_dist) 
+                {
+                    p_min = p2 + min_dist;
+                }
+                else if (abs(p3-p_min) < min_dist)
+                {
+                    p_min = p3 - min_dist;
+                }
+            }
+
+            // make sure one side of the bracket isn't super huge compared to the other
+            // side.  If it is then contract it.
+            const double bracket_ratio = abs(p1-p2)/abs(p2-p3);
+            if ( !( bracket_ratio < 100 && bracket_ratio > 0.01) )
+            {
+                // Force p_min to be on a reasonable side.  But only if lagrange_poly_min_extrap()
+                // didn't put it on a good side already.
+                if (bracket_ratio > 1 && p_min > p2)
+                    p_min = (p1+p2)/2;
+                else if (p_min < p2)
+                    p_min = (p2+p3)/2;
+            }
+
+
+            const double f_min = f(p_min);
+
+
+            // Remove one of the endpoints of our bracket depending on where the new point falls.
+            if (p_min < p2)
+            {
+                if (f1 > f_min && f_min < f2)
+                {
+                    p3 = p2;
+                    f3 = f2;
+                    p2 = p_min;
+                    f2 = f_min;
+                }
+                else
+                {
+                    p1 = p_min;
+                    f1 = f_min;
+                }
+            }
+            else
+            {
+                if (f2 > f_min && f_min < f3)
+                {
+                    p1 = p2;
+                    f1 = f2;
+                    p2 = p_min;
+                    f2 = f_min;
+                }
+                else
+                {
+                    p3 = p_min;
+                    f3 = f_min;
+                }
+            }
+
+
+            ++f_evals;
+        }
+
+        return std::make_pair(p2, f2);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename funct>
+    std::pair<double,double> find_max_single_variable (
+        const funct& f,
+        const double starting_point,
+        const double begin = -1e200,
+        const double end = 1e200,
+        const double eps = 1e-3,
+        const long max_iter = 100
+    )
+    {
+        return find_min_single_variable(negate_function(f), starting_point, begin, end, eps, max_iter);
+    }
+
 // ----------------------------------------------------------------------------------------

 }

--- a/dlib/optimization/optimization_line_search_abstract.h
+++ b/dlib/optimization/optimization_line_search_abstract.h
@@ -103,6 +103,27 @@ namespace dlib
            - returns the point in the range [0,1] that minimizes the polynomial c(x) 
    !*/

+// ----------------------------------------------------------------------------------------
+
+    inline double lagrange_poly_min_extrap (
+        double p1, 
+        double p2,
+        double p3,
+        double f1,
+        double f2,
+        double f3
+    );
+    /*!
+        requires
+            - f1 >= f2 <= f3
+            - p1 < p2 < p3
+        ensures
+            - let c(x) be the second order Lagrange polynomial that interpolates the
+              points p1, p2, and p3 where c(p1)==f1, c(p2)==f2, and c(p3)==f3
+            - this function returns the point in the range [p1,p3] that minimizes 
+              the polynomial c(x) 
+    !*/
+
 // ----------------------------------------------------------------------------------------

    template <
@@ -150,6 +171,66 @@ namespace dlib
        and also in the more recent book Numerical Optimization by Nocedal and Wright.
    */

+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename funct
+        >
+    std::pair<double,double> find_min_single_variable (
+        const funct& f,
+        const double starting_point,
+        const double begin = -1e200,
+        const double end = 1e200,
+        const double eps = 1e-3,
+        const long max_iter = 100
+    )
+    /*!
+        requires
+            - eps > 0
+            - max_iter > 1
+            - begin <= starting_point <= end
+            - f must be a function of a double that returns a double
+              (e.g. f(starting_point) should be a valid expression that evaluates to a double)
+        ensures
+            - Finds a point P such that:
+                - P is a local minimum of the function f().   
+                - begin <= P <= end
+            - Evaluates f() no more than max_iter times
+            - Stops searching when the window around the minimum point is smaller than eps.
+              The search will begin with the given starting_point.
+            - returns std::make_pair(P, f(P))
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename funct
+        >
+    std::pair<double,double> find_max_single_variable (
+        const funct& f,
+        const double starting_point,
+        const double begin = -1e200,
+        const double end = 1e200,
+        const double eps = 1e-3,
+        const long max_iter = 100
+    )
+    /*!
+        requires
+            - eps > 0
+            - max_iter > 1
+            - begin <= starting_point <= end
+            - f must be a function of a double that returns a double
+              (e.g. f(starting_point) should be a valid expression that evaluates to a double)
+        ensures
+            - Finds a point P such that:
+                - P is a local maximum of the function f().   
+                - begin <= P <= end
+            - Evaluates f() no more than max_iter times
+            - Stops searching when the window around the minimum point is smaller than eps.
+              The search will begin with the given starting_point.
+            - returns std::make_pair(P, f(P))
+    !*/
+
 // ----------------------------------------------------------------------------------------

 }

--- a/dlib/test/optimization.cpp
+++ b/dlib/test/optimization.cpp
@@ -128,6 +128,14 @@ namespace
            pow(std::sqrt(10.0)*(x(0) - x(3))*(x(0) - x(3)),2);
    }

+// ----------------------------------------------------------------------------------------
+
+// a simple function with a minimum at zero
+    double single_variable_function ( double x)
+    {
+        ++total_count;
+        return 3*x*x + 5;
+    }

 // ----------------------------------------------------------------------------------------
 // ----------------------------------------------------------------------------------------
@@ -669,6 +677,30 @@ namespace
        dlog << LINFO << "find_max_bobyqa(): got neg_rosen in " << total_count;
    }

+// ----------------------------------------------------------------------------------------
+
+    void test_single_variable_function (
+        const double p
+    )
+    {
+        const double eps = 1e-9;
+
+
+        dlog << LINFO << "testing with single_variable_function and the start point: " << p;
+
+        total_count = 0;
+        double out = find_min_single_variable(&single_variable_function, p, -1e100, 1e100, eps, 1000).first;
+        DLIB_TEST_MSG(std::abs(out) < 1e-6, out);
+        dlog << LINFO << "find_min_single_variable(): got single_variable_function in " << total_count;
+
+
+        total_count = 0;
+        out = find_max_single_variable(negate_function(&single_variable_function), p, -1e100, 1e100, eps, 1000).first;
+        DLIB_TEST_MSG(std::abs(out) < 1e-6, out);
+        dlog << LINFO << "find_max_single_variable(): got single_variable_function in " << total_count;
+
+    }
+
 // ----------------------------------------------------------------------------------------

    void optimization_test (
@@ -684,6 +716,11 @@ namespace

        p.set_size(2);

+        // test with single_variable_function
+        test_single_variable_function(0);
+        test_single_variable_function(1);
+        test_single_variable_function(-10);
+        test_single_variable_function(900.53);

        // test with the rosen function
        p(0) = 9;