Split the code up into multiple files and setup the abstracts for each of them.

--HG-- rename : dlib/optimization/optimization.h => dlib/optimization/optimization_line_search.h rename : dlib/optimization/optimization_abstract.h => dlib/optimization/optimization_line_search_abstract.h rename : dlib/optimization/optimization.h => dlib/optimization/optimization_search_strategies.h rename : dlib/optimization/optimization_abstract.h => dlib/optimization/optimization_search_strategies_abstract.h rename : dlib/optimization/optimization.h => dlib/optimization/optimization_stop_strategies.h rename : dlib/optimization/optimization_abstract.h => dlib/optimization/optimization_stop_strategies_abstract.h extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403190

Split the code up into multiple files and setup the abstracts for each of them.
--HG-- rename : dlib/optimization/optimization.h => dlib/optimization/optimization_line_search.h rename : dlib/optimization/optimization_abstract.h => dlib/optimization/optimization_line_search_abstract.h rename : dlib/optimization/optimization.h => dlib/optimization/optimization_search_strategies.h rename : dlib/optimization/optimization_abstract.h => dlib/optimization/optimization_search_strategies_abstract.h rename : dlib/optimization/optimization.h => dlib/optimization/optimization_stop_strategies.h rename : dlib/optimization/optimization_abstract.h => dlib/optimization/optimization_stop_strategies_abstract.h extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403190
9fb35897 · Davis King · 75b3839a · 9fb35897 · 9fb35897 · 9fb35897
Commit 9fb35897 authored Sep 07, 2009 by Davis King
8 changed files
--- a/dlib/optimization/optimization.h
+++ b/dlib/optimization/optimization.h
@@ -5,329 +5,14 @@

 #include <cmath>
 #include <limits>
-#include "../matrix.h"
-#include "../algs.h"
 #include "optimization_abstract.h"
-#include "../sequence.h"
+#include "optimization_search_strategies.h"
+#include "optimization_stop_strategies.h"
+#include "optimization_line_search.h"

 namespace dlib
 {

-// ----------------------------------------------------------------------------------------
-// ----------------------------------------------------------------------------------------
-// ----------------------------------------------------------------------------------------
-
-    class objective_delta_stop_strategy
-    {
-    public:
-        objective_delta_stop_strategy (
-            double min_delta = 1e-7
-        ) : _been_used(false), _min_delta(min_delta), _max_iter(0), _cur_iter(0), _prev_funct_value(0) {}
-
-        objective_delta_stop_strategy (
-            double min_delta,
-            unsigned long max_iter
-        ) : _been_used(false), _min_delta(min_delta), _max_iter(max_iter), _cur_iter(0), _prev_funct_value(0) {}
-
-        template <typename T>
-        bool should_continue_search (
-            const T& ,
-            const double funct_value,
-            const T& 
-        ) 
-        {
-            ++_cur_iter;
-            if (_been_used)
-            {
-                // Check if we have hit the max allowable number of iterations.  (but only
-                // check if _max_iter is enabled (i.e. not 0)).
-                if (_max_iter != 0 && _cur_iter > _max_iter)
-                    return false;
-
-                // check if the function change was too small
-                if (std::abs(funct_value - _prev_funct_value) < _min_delta)
-                    return false;
-            }
-
-            _been_used = true;
-            _prev_funct_value = funct_value;
-            return true;
-        }
-
-    private:
-
-        bool _been_used;
-        double _min_delta;
-        unsigned long _max_iter;
-        unsigned long _cur_iter;
-        double _prev_funct_value;
-    };
-
-// ----------------------------------------------------------------------------------------
-// ----------------------------------------------------------------------------------------
-// ----------------------------------------------------------------------------------------
-
-    class cg_search_strategy
-    {
-    public:
-        cg_search_strategy() : been_used(false) {}
-
-        double get_wolfe_rho (
-        ) const { return 0.001; }
-
-        double get_wolfe_sigma (
-        ) const { return 0.01; }
-
-        template <typename T>
-        const matrix<double,0,1>& get_next_direction (
-            const T& ,
-            const double ,
-            const T& funct_derivative
-        )
-        /*!
-            requires
-                - for some function f():
-                    - funct_value == f(x)
-                    - funct_derivative == derivative(f)(x)
-            ensures
-                - returns the next search direction (from the point x).  This
-                  direction is chosen using the Polak-Ribiere conjugate gradient
-                  method.
-        !*/
-        {
-            if (been_used == false)
-            {
-                been_used = true;
-                prev_direction = -funct_derivative;
-            }
-            else
-            {
-                // Use the Polak-Ribiere (4.1.12) conjugate gradient described by Fletcher on page 83
-                const double temp = trans(prev_derivative)*prev_derivative;
-                // If this value hits zero then just use the direction of steepest descent.
-                if (std::abs(temp) < std::numeric_limits<double>::epsilon())
-                {
-                    prev_derivative = funct_derivative;
-                    prev_direction = -funct_derivative;
-                    return prev_direction;
-                }
-
-                double b = trans(funct_derivative-prev_derivative)*funct_derivative/(temp);
-                prev_direction = -funct_derivative + b*prev_direction;
-
-            }
-
-            prev_derivative = funct_derivative;
-            return prev_direction;
-        }
-
-    private:
-        bool been_used;
-        matrix<double,0,1> prev_derivative;
-        matrix<double,0,1> prev_direction;
-    };
-
-// ----------------------------------------------------------------------------------------
-
-    class bfgs_search_strategy
-    {
-    public:
-        bfgs_search_strategy() : been_used(false), been_used_twice(false) {}
-
-        double get_wolfe_rho (
-        ) const { return 0.01; }
-
-        double get_wolfe_sigma (
-        ) const { return 0.9; }
-
-        template <typename T>
-        const matrix<double,0,1>& get_next_direction (
-            const T& x,
-            const double ,
-            const T& funct_derivative
-        )
-        {
-            if (been_used == false)
-            {
-                been_used = true;
-                H = identity_matrix<double>(x.size());
-            }
-            else
-            {
-                // update H with the BFGS formula from (3.2.12) on page 55 of Fletcher 
-                delta = (x-prev_x); 
-                gamma = funct_derivative-prev_derivative;
-
-                double dg = dot(delta,gamma);
-
-                // Try to set the initial value of the H matrix to something reasonable if we are still
-                // in the early stages of figuring out what it is.  This formula below is what is suggested
-                // in the book Numerical Optimization by Nocedal and Wright in the chapter on Quasi-Newton methods.
-                if (been_used_twice == false)
-                {
-                    double gg = trans(gamma)*gamma;
-                    if (std::abs(gg) > std::numeric_limits<double>::epsilon())
-                    {
-                        const double temp = put_in_range(0.01, 100, dg/gg);
-                        H = diagm(uniform_matrix<double>(x.size(),1, temp));
-                        been_used_twice = true;
-                    }
-                }
-
-                Hg = H*gamma;
-                gH = trans(trans(gamma)*H);
-                double gHg = trans(gamma)*H*gamma;
-                if (gHg < std::numeric_limits<double>::infinity() && dg < std::numeric_limits<double>::infinity() &&
-                    dg != 0)
-                {
-                    H += (1 + gHg/dg)*delta*trans(delta)/(dg) - (delta*trans(gH) + Hg*trans(delta))/(dg);
-                }
-                else
-                {
-                    H = identity_matrix<double>(H.nr());
-                    been_used_twice = false;
-                }
-            }
-
-            prev_x = x;
-            prev_direction = -H*funct_derivative;
-            prev_derivative = funct_derivative;
-            return prev_direction;
-        }
-
-    private:
-        bool been_used;
-        bool been_used_twice;
-        matrix<double,0,1> prev_x;
-        matrix<double,0,1> prev_derivative;
-        matrix<double,0,1> prev_direction;
-        matrix<double> H;
-        matrix<double,0,1> delta, gamma, Hg, gH;
-    };
-
-// ----------------------------------------------------------------------------------------
-
-    class lbfgs_search_strategy
-    {
-    public:
-        lbfgs_search_strategy(unsigned long max_size_) : max_size(max_size_), been_used(false) {}
-
-        lbfgs_search_strategy(const lbfgs_search_strategy& item) 
-        {
-            max_size = item.max_size;
-            been_used = item.been_used;
-            prev_x = item.prev_x;
-            prev_derivative = item.prev_derivative;
-            prev_direction = item.prev_direction;
-            alpha = item.alpha;
-            dh_temp = item.dh_temp;
-        }
-
-        double get_wolfe_rho (
-        ) const { return 0.01; }
-
-        double get_wolfe_sigma (
-        ) const { return 0.9; }
-
-        template <typename T>
-        const matrix<double,0,1>& get_next_direction (
-            const T& x,
-            const double ,
-            const T& funct_derivative
-        )
-        {
-            if (been_used == false)
-            {
-                prev_direction = -funct_derivative;
-                been_used = true;
-            }
-            else
-            {
-                // add an element into the stored data sequence
-                dh_temp.s = x - prev_x;
-                dh_temp.y = funct_derivative - prev_derivative;
-                double temp = dlib::dot(dh_temp.s, dh_temp.y);
-                // only accept this bit of data if temp isn't zero
-                if (std::abs(temp) > std::numeric_limits<double>::epsilon())
-                {
-                    dh_temp.rho = 1/temp;
-                    data.add(data.size(), dh_temp);
-                }
-                else
-                {
-                    data.clear();
-                }
-
-                if (data.size() > 0)
-                {
-                    // This block of code is from algorithm 7.4 in the Nocedal book.
-
-                    prev_direction = -funct_derivative;
-                    alpha.resize(data.size());
-                    for (unsigned long i = data.size()-1; i < data.size(); --i)
-                    {
-                        alpha[i] = data[i].rho*dot(data[i].s, prev_direction);
-                        prev_direction -= alpha[i]*data[i].y;
-                    }
-
-                    // Take a guess at what the first H matrix should be.  This formula below is what is suggested
-                    // in the book Numerical Optimization by Nocedal and Wright in the chapter on Large Scale 
-                    // Unconstrained Optimization (in the L-BFGS section).
-                    double H_0 = 1.0/data[data.size()-1].rho/dot(data[data.size()-1].y, data[data.size()-1].y);
-                    H_0 = put_in_range(0.001, 1000.0, H_0);
-                    prev_direction *= H_0;
-
-                    for (unsigned long i = 0; i < data.size(); ++i)
-                    {
-                        double beta = data[i].rho*dot(data[i].y, prev_direction);
-                        prev_direction += data[i].s * (alpha[i] - beta);
-                    }
-                }
-                else
-                {
-                    prev_derivative = -funct_derivative;
-                }
-
-            }
-
-            if (data.size() > max_size)
-            {
-                // remove the oldest element in the data sequence
-                data.remove(0, dh_temp);
-            }
-
-            prev_x = x;
-            prev_derivative = funct_derivative;
-            return prev_direction;
-        }
-
-    private:
-
-        struct data_helper
-        {
-            matrix<double,0,1> s;
-            matrix<double,0,1> y;
-            double rho;
-
-            friend void swap(data_helper& a, data_helper& b)
-            {
-                a.s.swap(b.s);
-                a.y.swap(b.y);
-                std::swap(a.rho, b.rho);
-            }
-        };
-        sequence<data_helper>::kernel_2a data;
-
-        unsigned long max_size;
-        bool been_used;
-        matrix<double,0,1> prev_x;
-        matrix<double,0,1> prev_derivative;
-        matrix<double,0,1> prev_direction;
-        std::vector<double> alpha;
-
-        data_helper dh_temp;
-    };
-
 // ----------------------------------------------------------------------------------------
 // ----------------------------------------------------------------------------------------
 //                    Functions that transform other functions  
@@ -432,393 +117,10 @@ namespace dlib
    template <typename funct>
    const negate_function_object<funct> negate_function(const funct& f) { return negate_function_object<funct>(f); }

-// ----------------------------------------------------------------------------------------
-
-    template <typename funct, typename T>
-    class line_search_funct 
-    {
-    public:
-        // You get an error on this line when you pass in a global function to this function.
-        // You have to either use a function object or pass a pointer to your global function
-        // by taking its address using the & operator.  (This check is here because gcc 4.0
-        // has a bug that causes it to silently corrupt return values from functions that
-        // invoked through a reference)
-        COMPILE_TIME_ASSERT(is_function<funct>::value == false);
-
-        line_search_funct(const funct& f_, const T& start_, const T& direction_) 
-            : f(f_),start(start_), direction(direction_), matrix_r(0), scalar_r(0)
-        {}
-
-        line_search_funct(const funct& f_, const T& start_, const T& direction_, T& r) 
-            : f(f_),start(start_), direction(direction_), matrix_r(&r), scalar_r(0)
-        {}
-
-        line_search_funct(const funct& f_, const T& start_, const T& direction_, double& r) 
-            : f(f_),start(start_), direction(direction_), matrix_r(0), scalar_r(&r)
-        {}
-
-        double operator()(const double& x) const
-        {
-            return get_value(f(start + x*direction));
-        }
-
-        // TODO figure out some requirements for these two functions and add them to the abstract
-        const T& get_last_scalar_eval (
-        ) const { return *scalar_r; }
-
-        const T& get_last_gradient_eval (
-        ) const { return *matrix_r; }
-
-    private:
-
-        double get_value (const double& r) const
-        {
-            // save a copy of this value for later
-            if (scalar_r)
-                *scalar_r = r;
-
-            return r;
-        }
-
-        template <typename U>
-        double get_value (const U& r) const
-        {
-            // U should be a matrix type
-            COMPILE_TIME_ASSERT(is_matrix<U>::value);
-
-            // save a copy of this value for later
-            if (matrix_r)
-                *matrix_r = r;
-
-            return trans(r)*direction;
-        }
-
-        const funct& f;
-        const T& start;
-        const T& direction;
-        T* matrix_r;
-        double* scalar_r;
-    };
-
-    template <typename funct, typename T>
-    const line_search_funct<funct,T> make_line_search_function(const funct& f, const T& start, const T& direction) 
-    { 
-        // You get an error on this line when you pass in a global function to this function.
-        // You have to either use a function object or pass a pointer to your global function
-        // by taking its address using the & operator.  (This check is here because gcc 4.0
-        // has a bug that causes it to silently corrupt return values from functions that
-        // invoked through a reference)
-        COMPILE_TIME_ASSERT(is_function<funct>::value == false);
-
-        COMPILE_TIME_ASSERT(is_matrix<T>::value);
-        DLIB_ASSERT (
-            is_col_vector(start) && is_col_vector(direction) && start.size() == direction.size(),
-            "\tline_search_funct make_line_search_function(f,start,direction)"
-            << "\n\tYou have to supply column vectors to this function"
-            << "\n\tstart.nc():     " << start.nc()
-            << "\n\tdirection.nc(): " << direction.nc()
-            << "\n\tstart.nr():     " << start.nr()
-            << "\n\tdirection.nr(): " << direction.nr()
-        );
-        return line_search_funct<funct,T>(f,start,direction); 
-    }
-
-// ----------------------------------------------------------------------------------------
-
-    template <typename funct, typename T>
-    const line_search_funct<funct,T> make_line_search_function(const funct& f, const T& start, const T& direction, double& f_out) 
-    { 
-        // You get an error on this line when you pass in a global function to this function.
-        // You have to either use a function object or pass a pointer to your global function
-        // by taking its address using the & operator.  (This check is here because gcc 4.0
-        // has a bug that causes it to silently corrupt return values from functions that
-        // invoked through a reference)
-        COMPILE_TIME_ASSERT(is_function<funct>::value == false);
-
-        COMPILE_TIME_ASSERT(is_matrix<T>::value);
-        DLIB_ASSERT (
-            is_col_vector(start) && is_col_vector(direction) && start.size() == direction.size(),
-            "\tline_search_funct make_line_search_function(f,start,direction)"
-            << "\n\tYou have to supply column vectors to this function"
-            << "\n\tstart.nc():     " << start.nc()
-            << "\n\tdirection.nc(): " << direction.nc()
-            << "\n\tstart.nr():     " << start.nr()
-            << "\n\tdirection.nr(): " << direction.nr()
-        );
-        return line_search_funct<funct,T>(f,start,direction, f_out); 
-    }
-
-// ----------------------------------------------------------------------------------------
-
-    template <typename funct, typename T>
-    const line_search_funct<funct,T> make_line_search_function(const funct& f, const T& start, const T& direction, T& grad_out) 
-    { 
-        // You get an error on this line when you pass in a global function to this function.
-        // You have to either use a function object or pass a pointer to your global function
-        // by taking its address using the & operator.  (This check is here because gcc 4.0
-        // has a bug that causes it to silently corrupt return values from functions that
-        // invoked through a reference)
-        COMPILE_TIME_ASSERT(is_function<funct>::value == false);
-
-        COMPILE_TIME_ASSERT(is_matrix<T>::value);
-        DLIB_ASSERT (
-            is_col_vector(start) && is_col_vector(direction) && start.size() == direction.size(),
-            "\tline_search_funct make_line_search_function(f,start,direction)"
-            << "\n\tYou have to supply column vectors to this function"
-            << "\n\tstart.nc():     " << start.nc()
-            << "\n\tdirection.nc(): " << direction.nc()
-            << "\n\tstart.nr():     " << start.nr()
-            << "\n\tdirection.nr(): " << direction.nr()
-        );
-        return line_search_funct<funct,T>(f,start,direction,grad_out); 
-    }
-
 // ----------------------------------------------------------------------------------------
 // ----------------------------------------------------------------------------------------
 //                    Functions that perform unconstrained optimization 
 // ----------------------------------------------------------------------------------------
-// ----------------------------------------------------------------------------------------
-
-    inline double poly_min_extrap (
-        double f0,
-        double d0,
-        double f1,
-        double d1
-    )
-    {
-        const double n = 3*(f1 - f0) - 2*d0 - d1;
-        const double e = d0 + d1 - 2*(f1 - f0);
-
-
-        // find the minimum of the derivative of the polynomial
-
-        double temp = std::max(n*n - 3*e*d0,0.0);
-
-        if (temp < 0)
-            return 0.5;
-
-        temp = std::sqrt(temp);
-
-        if (std::abs(e) <= std::numeric_limits<double>::epsilon())
-            return 0.5;
-
-        // figure out the two possible min values
-        double x1 = (temp - n)/(3*e);
-        double x2 = -(temp + n)/(3*e);
-
-        // compute the value of the interpolating polynomial at these two points
-        double y1 = f0 + d0*x1 + n*x1*x1 + e*x1*x1*x1;
-        double y2 = f0 + d0*x2 + n*x2*x2 + e*x2*x2*x2;
-
-        // pick the best point
-        double x;
-        if (y1 < y2)
-            x = x1;
-        else
-            x = x2;
-
-        // now make sure the minimum is within the allowed range of (0,1) 
-        return put_in_range(0,1,x);
-    }
-
-// ----------------------------------------------------------------------------------------
-
-    template <
-        typename funct, 
-        typename funct_der
-        >
-    double line_search (
-        const funct& f, 
-        const double f0,
-        const funct_der& der, 
-        const double d0,
-        double rho, 
-        double sigma, 
-        double minf
-    )
-    {
-        // You get an error on this line when you pass in a global function to this function.
-        // You have to either use a function object or pass a pointer to your global function
-        // by taking its address using the & operator.  (This check is here because gcc 4.0
-        // has a bug that causes it to silently corrupt return values from functions that
-        // invoked through a reference)
-        COMPILE_TIME_ASSERT(is_function<funct>::value == false);
-        COMPILE_TIME_ASSERT(is_function<funct_der>::value == false);
-
-        DLIB_ASSERT (
-            1 > sigma && sigma > rho && rho > 0,
-            "\tdouble line_search()"
-            << "\n\tYou have given invalid arguments to this function"
-            << "\n\tsigma: " << sigma
-            << "\n\trho:   " << rho 
-        );
-
-        // The bracketing phase of this function is implemented according to block 2.6.2 from
-        // the book Practical Methods of Optimization by R. Fletcher.   The sectioning 
-        // phase is an implementation of 2.6.4 from the same book.
-
-        // tau1 > 1. Controls the alpha jump size during the search
-        const double tau1 = 9;
-
-        // it must be the case that 0 < tau2 < tau3 <= 1/2 for the algorithm to function
-        // correctly but the specific values of tau2 and tau3 aren't super important.
-        const double tau2 = 1.0/10.0;
-        const double tau3 = 1.0/2.0;
-
-
-        // Stop right away and return a step size of 0 if the gradient is 0 at the starting point
-        if (std::abs(d0) < std::numeric_limits<double>::epsilon())
-            return 0;
-
-        // Figure out a reasonable upper bound on how large alpha can get.
-        const double mu = (minf-f0)/(rho*d0);
-
-
-        double alpha = 1;
-        if (mu < 0)
-            alpha = -alpha;
-        alpha = put_in_range(0, 0.65*mu, alpha);
-
-
-        double last_alpha = 0;
-        double last_val = f0;
-        double last_val_der = d0;
-
-        // the bracketing stage will find a find a range of points [a,b]
-        // that contains a reasonable solution to the line search
-        double a, b;
-
-        // These variables will hold the values and derivatives of f(a) and f(b)
-        double a_val, b_val, a_val_der, b_val_der;
-
-        // This thresh value represents the Wolfe curvature condition
-        const double thresh = std::abs(sigma*d0);
-
-        // do the bracketing stage to find the bracket range [a,b]
-        while (true)
-        {
-            const double val = f(alpha);
-            const double val_der = der(alpha);
-
-            // we are done with the line search since we found a value smaller
-            // than the minimum f value
-            if (val <= minf)
-                return alpha;
-
-            if (val > f0 + rho*alpha*d0 || val >= last_val)
-            {
-                a_val = last_val;
-                a_val_der = last_val_der;
-                b_val = val;
-                b_val_der = val_der;
-
-                a = last_alpha;
-                b = alpha;
-                break;
-            }
-
-            if (std::abs(val_der) <= thresh)
-                return alpha;
-
-            // if we are stuck not making progress then quit with the current alpha
-            if (last_alpha == alpha)
-                return alpha;
-
-            if (val_der >= 0)
-            {
-                a_val = val;
-                a_val_der = val_der;
-                b_val = last_val;
-                b_val_der = last_val_der;
-
-                a = alpha;
-                b = last_alpha;
-                break;
-            }
-
-            if (mu <= 2*alpha - last_alpha)
-            {
-                last_alpha = alpha;
-                alpha = mu;
-            }
-            else
-            {
-                const double temp = alpha;
-
-                double first = 2*alpha - last_alpha;
-                double last;
-                if (mu > 0)
-                    last = std::min(mu, alpha + tau1*(alpha - last_alpha));
-                else
-                    last = std::max(mu, alpha + tau1*(alpha - last_alpha));
-
-
-                // pick a point between first and last by doing some kind of interpolation
-                if (last_alpha < alpha)
-                    alpha = last_alpha + (alpha-last_alpha)*poly_min_extrap(last_val, last_val_der, val, val_der);
-                else
-                    alpha = alpha + (last_alpha-alpha)*poly_min_extrap(val, val_der, last_val, last_val_der);
-
-                alpha = put_in_range(first,last,alpha);
-
-
-                last_alpha = temp;
-            }
-
-            last_val = val;
-            last_val_der = val_der;
-
-        }
-
-
-        // Now do the sectioning phase from 2.6.4
-        while (true)
-        {
-            double first = a + tau2*(b-a);
-            double last = b - tau3*(b-a);
-
-            // use interpolation to pick alpha between first and last
-            alpha = a + (b-a)*poly_min_extrap(a_val, a_val_der, b_val, b_val_der);
-            alpha = put_in_range(first,last,alpha);
-
-            const double val = f(alpha);
-            const double val_der = der(alpha);
-
-
-            // stop if the interval gets so small that it isn't shrinking any more due to rounding error 
-            if (a == first || b == last)
-            {
-                return b;
-            }
-
-
-            if (val > f0 + rho*alpha*d0 || val >= a_val)
-            {
-                b = alpha;
-                b_val = val;
-                b_val_der = val_der;
-            }
-            else
-            {
-                if (std::abs(val_der) <= thresh)
-                    return alpha;
-
-                if ( (b-a)*val_der >= 0)
-                {
-                    b = a;
-                    b_val = a_val;
-                    b_val_der = a_val_der;
-                }
-
-                a = alpha;
-                a_val = val;
-                a_val_der = val_der;
-            }
-        }
-    }
-
-// ----------------------------------------------------------------------------------------
-// ----------------------------------------------------------------------------------------
 // ----------------------------------------------------------------------------------------

    template <
@@ -859,7 +161,7 @@ namespace dlib
        double f_value = f(x);
        g = der(x);

-        while(stop_strategy.should_continue_search(x, f_value, g))
+        while(stop_strategy.should_continue_search(x, f_value, g) && f_value > min_f)
        {
            s = search_strategy.get_next_direction(x, f_value, g);

@@ -870,6 +172,7 @@ namespace dlib
                        dot(g,s), // compute initial gradient for the line search
                        search_strategy.get_wolfe_rho(), search_strategy.get_wolfe_sigma(), min_f);

+            // Take the search step indicated by the above line search
            x += alpha*s;
        }
    }
@@ -910,10 +213,13 @@ namespace dlib

        T g, s;

+        // This function is basically just a copy of find_min() but with - put in the right places
+        // to flip things around so that it ends up looking for the max rather than the min.
+
        double f_value = -f(x);
        g = -der(x);

-        while(stop_strategy.should_continue_search(x, f_value, g))
+        while(stop_strategy.should_continue_search(x, f_value, g) && f_value > -max_f)
        {
            s = search_strategy.get_next_direction(x, f_value, g);

@@ -924,9 +230,11 @@ namespace dlib
                        dot(g,s), // compute initial gradient for the line search
                        search_strategy.get_wolfe_rho(), search_strategy.get_wolfe_sigma(), -max_f);

+            // Take the search step indicated by the above line search
            x += alpha*s;

-            // we have to negate the outputs from the line search 
+            // Don't forget to negate these outputs from the line search since they are 
+            // from the unnegated versions of f() and der()
            g *= -1;
            f_value *= -1;
        }
@@ -971,7 +279,7 @@ namespace dlib
        double f_value = f(x);
        g = derivative(f,derivative_eps)(x);

-        while(stop_strategy.should_continue_search(x, f_value, g))
+        while(stop_strategy.should_continue_search(x, f_value, g) && f_value > min_f)
        {
            s = search_strategy.get_next_direction(x, f_value, g);

@@ -983,6 +291,7 @@ namespace dlib
                        //derivative(make_line_search_function(f,x,s),derivative_eps)(0),
                        search_strategy.get_wolfe_rho(), search_strategy.get_wolfe_sigma(), min_f);

+            // Take the search step indicated by the above line search
            x += alpha*s;

            g = derivative(f,derivative_eps)(x);
@@ -1023,6 +332,7 @@ namespace dlib
            << "\n\tderivative_eps: " << derivative_eps 
        );

+        // Just negate the necessary things and call the find_min version of this function.
        find_min_using_approximate_derivatives(
            search_strategy, 
            stop_strategy, 

--- a/dlib/optimization/optimization_abstract.h
+++ b/dlib/optimization/optimization_abstract.h
@@ -7,6 +7,9 @@
 #include <limits>
 #include "../matrix/matrix_abstract.h"
 #include "../algs.h"
+#include "optimization_search_strategies_abstract.h"
+#include "optimization_stop_strategies_abstract.h"
+#include "optimization_line_search_abstract.h"


 namespace dlib
@@ -28,7 +31,8 @@ namespace dlib

        Note that if funct is a function of a double then the derivative of 
        funct is just a double but if funct is a function of a dlib::matrix (i.e. a
-        function of many variables) then its derivative is a gradient vector.
+        function of many variables) then its derivative is a gradient vector (a column
+        vector in particular).
    !*/

    template <
@@ -60,228 +64,173 @@ namespace dlib
            - returns derivative(f, 1e-7)
    !*/

-
 // ----------------------------------------------------------------------------------------

    template <
-        typename funct, 
-        typename T
+        typename funct
        >
-    class line_search_funct; 
+    class negate_function_object;
    /*!
-        This object is a function object that represents a line search function.
-
-        It represents a function with the signature:
-            double l(double x)
+        This is a function object that represents the negative of some other
+        function.   
    !*/

    template <
-        typename funct, 
-        typename T
+        typename funct
        >
-    const line_search_funct<funct,T> make_line_search_function (
-        const funct& f, 
-        const T& start, 
-        const T& direction
-    ); 
+    const negate_function_object<funct> negate_function(
+        const funct& f
+    );
    /*!
        requires
-            - is_matrix<T>::value == true (i.e. T must be a dlib::matrix)
-            - f must take a dlib::matrix that is a column vector
-            - is_col_vector(start) && is_col_vector(direction) && start.size() == direction.size() 
-              (i.e. start and direction should be column vectors of the same size)
-            - f must return either a double or a column vector the same length and
-              type as start
-            - f(start + 1.5*direction) should be a valid expression
+            - f == a function that returns a scalar
        ensures
-            - if (f returns a double) then
-                - returns a line search function that computes l(x) == f(start + x*direction)
-            - else
-                - returns a line search function that computes l(x) == trans(f(start + x*direction))*direction
-                - We assume that f is the derivative of some other function and that what
-                  f returns is a gradient vector. 
-                  So the following two expressions both create the derivative of l(x): 
-                    - derivative(make_line_search_function(funct,start,direction))
-                    - make_line_search_function(derivative(funct),start,direction)
+            - returns a function that represents the negation of f.  That is,
+              the returned function object represents g(x) == -f(x)
    !*/

 // ----------------------------------------------------------------------------------------
 // ----------------------------------------------------------------------------------------
 //                    Functions that perform unconstrained optimization 
 // ----------------------------------------------------------------------------------------
-// ----------------------------------------------------------------------------------------
-
-    inline double poly_min_extrap (
-        double f0,
-        double d0,
-        double f1,
-        double d1
-    );
-    /*!
-        ensures
-            - let c(x) be a 3rd degree polynomial such that:
-                - c(0) == f0
-                - c(1) == f1
-                - derivative of c(x) at x==0 is d0
-                - derivative of c(x) at x==1 is d1
-            - returns the point in the range [0,1] that minimizes the polynomial c(x) 
-    !*/
-
-// ----------------------------------------------------------------------------------------
-
-    template <
-        typename funct, 
-        typename funct_der
-        >
-    double line_search (
-        const funct& f, 
-        const funct_der& der, 
-        double rho, 
-        double sigma, 
-        double minf,
-        double& f0_out
-    );
-    /*!
-        requires
-            - 1 > sigma > rho > 0
-            - f and der are scalar functions of scalars
-              (e.g. line_search_funct objects)
-            - der is the derivative of f
-        ensures
-            - returns a value alpha such that f(alpha) is
-              significantly closer to the minimum of f than f(0).
-            - bigger values of sigma result in a less accurate but faster line search
-            - f0_out == f(0)
-    !*/
-
-// ----------------------------------------------------------------------------------------
-// ----------------------------------------------------------------------------------------
 // ----------------------------------------------------------------------------------------

    template <
+        typename search_strategy_type,
+        typename stop_strategy_type,
        typename funct, 
        typename funct_der, 
        typename T
        >
-    void find_min_quasi_newton (
+    void find_min (
+        search_strategy_type search_strategy,
+        stop_strategy_type stop_strategy,
        const funct& f, 
        const funct_der& der, 
        T& x, 
-        double min_f, 
-        double min_delta = 1e-7 
+        double min_f
    );
    /*!
        requires
-            - min_delta >= 0 
+            - search_strategy == an object that defines a search strategy such as one 
+              of the objects from dlib/optimization/optimization_search_strategies_abstract.h
+            - stop_strategy == an object that defines a stop strategy such as one of 
+              the objects from dlib/optimization/optimization_stop_strategies_abstract.h
            - f(x) must be a valid expression that evaluates to a double
            - der(x) must be a valid expression that evaluates to the derivative of
              f() at x.
-            - is_matrix<T>::value == true (i.e. T must be a dlib::matrix type)
-            - x.nc() == 1 (i.e. x must be a column vector)
+            - is_col_vector(x) == true
        ensures
-            - Performs an unconstrained minimization of the function f() using the BFGS 
-              quasi newton method.  The optimization stops when any of the following
-              conditions are satisfied: 
-                - the change in f() from one iteration to the next is less than min_delta
-                - f(#x) <= min_f
+            - Performs an unconstrained minimization of the function f() using the given
+              search_strategy.  
+            - The function is optimized until stop_strategy decides that an acceptable 
+              point has been found or f(#x) < min_f.
            - #x == the value of x that was found to minimize f()
    !*/

 // ----------------------------------------------------------------------------------------

    template <
+        typename search_strategy_type,
+        typename stop_strategy_type,
        typename funct, 
+        typename funct_der, 
        typename T
        >
-    void find_min_quasi_newton2 (
+    void find_max (
+        search_strategy_type search_strategy,
+        stop_strategy_type stop_strategy,
        const funct& f, 
+        const funct_der& der, 
        T& x, 
-        double min_f, 
-        double min_delta = 1e-7, 
-        const double derivative_eps = 1e-7 
+        double max_f
    );
    /*!
        requires
-            - min_delta >= 0 
-            - derivative_eps > 0 
+            - search_strategy == an object that defines a search strategy such as one 
+              of the objects from dlib/optimization/optimization_search_strategies_abstract.h
+            - stop_strategy == an object that defines a stop strategy such as one of 
+              the objects from dlib/optimization/optimization_stop_strategies_abstract.h
            - f(x) must be a valid expression that evaluates to a double
-            - is_matrix<T>::value == true (i.e. T must be a dlib::matrix type)
-            - x.nc() == 1 (i.e. x must be a column vector)
+            - der(x) must be a valid expression that evaluates to the derivative of
+              f() at x.
+            - is_col_vector(x) == true
        ensures
-            - Performs an unconstrained minimization of the function f() using a 
-              quasi newton method.  The optimization stops when any of the following
-              conditions are satisfied: 
-                - the change in f() from one iteration to the next is less than min_delta
-                - f(#x) <= min_f
-            - Uses the dlib::derivative(f,derivative_eps) function to compute gradient
-              information
-            - #x == the value of x that was found to minimize f()
+            - Performs an unconstrained maximization of the function f() using the given
+              search_strategy.  
+            - The function is optimized until stop_strategy decides that an acceptable 
+              point has been found or f(#x) > max_f.
+            - #x == the value of x that was found to maximize f()
    !*/

-// ----------------------------------------------------------------------------------------
-// ----------------------------------------------------------------------------------------
 // ----------------------------------------------------------------------------------------

    template <
-        typename funct, 
-        typename funct_der, 
+        typename search_strategy_type,
+        typename stop_strategy_type,
+        typename funct,
        typename T
        >
-    void find_min_conjugate_gradient (
-        const funct& f, 
-        const funct_der& der, 
-        T& x, 
-        double min_f, 
-        double min_delta = 1e-7
+    void find_min_using_approximate_derivatives (
+        search_strategy_type search_strategy,
+        stop_strategy_type stop_strategy,
+        const funct& f,
+        T& x,
+        double min_f,
+        double derivative_eps = 1e-7
    );
    /*!
        requires
-            - min_delta >= 0 
+            - search_strategy == an object that defines a search strategy such as one 
+              of the objects from dlib/optimization/optimization_search_strategies_abstract.h
+            - stop_strategy == an object that defines a stop strategy such as one of 
+              the objects from dlib/optimization/optimization_stop_strategies_abstract.h
            - f(x) must be a valid expression that evaluates to a double
-            - der(x) must be a valid expression that evaluates to the derivative of
-              f() at x.
-            - is_matrix<T>::value == true (i.e. T must be a dlib::matrix type)
-            - x.nc() == 1 (i.e. x must be a column vector)
+            - is_col_vector(x) == true
+            - derivative_eps > 0 
        ensures
-            - Performs an unconstrained minimization of the function f() using a 
-              conjugate gradient method.  The optimization stops when any of the following
-              conditions are satisfied: 
-                - the change in f() from one iteration to the next is less than min_delta
-                - f(#x) <= min_f
+            - Performs an unconstrained minimization of the function f() using the given
+              search_strategy.  
+            - The function is optimized until stop_strategy decides that an acceptable 
+              point has been found or f(#x) < min_f.
            - #x == the value of x that was found to minimize f()
+            - Uses the dlib::derivative(f,derivative_eps) function to compute gradient
+              information.
    !*/

 // ----------------------------------------------------------------------------------------

    template <
-        typename funct, 
+        typename search_strategy_type,
+        typename stop_strategy_type,
+        typename funct,
        typename T
        >
-    void find_min_conjugate_gradient2 (
-        const funct& f, 
-        T& x, 
-        double min_f, 
-        double min_delta = 1e-7,
-        const double derivative_eps = 1e-7 
+    void find_max_using_approximate_derivatives (
+        search_strategy_type search_strategy,
+        stop_strategy_type stop_strategy,
+        const funct& f,
+        T& x,
+        double max_f,
+        double derivative_eps = 1e-7
    );
    /*!
        requires
-            - min_delta >= 0 
-            - derivative_eps > 0
+            - search_strategy == an object that defines a search strategy such as one 
+              of the objects from dlib/optimization/optimization_search_strategies_abstract.h
+            - stop_strategy == an object that defines a stop strategy such as one of 
+              the objects from dlib/optimization/optimization_stop_strategies_abstract.h
            - f(x) must be a valid expression that evaluates to a double
-            - der(x) must be a valid expression that evaluates to the derivative of
-              f() at x.
-            - is_matrix<T>::value == true (i.e. T must be a dlib::matrix type)
-            - x.nc() == 1 (i.e. x must be a column vector)
+            - is_col_vector(x) == true
+            - derivative_eps > 0 
        ensures
-            - Performs an unconstrained minimization of the function f() using a 
-              conjugate gradient method.  The optimization stops when any of the following
-              conditions are satisfied: 
-                - the change in f() from one iteration to the next is less than min_delta
-                - f(#x) <= min_f
+            - Performs an unconstrained maximization of the function f() using the given
+              search_strategy.  
+            - The function is optimized until stop_strategy decides that an acceptable 
+              point has been found or f(#x) > max_f.
+            - #x == the value of x that was found to maximize f()
            - Uses the dlib::derivative(f,derivative_eps) function to compute gradient
-              information
-            - #x == the value of x that was found to minimize f()
+              information.
    !*/

 // ----------------------------------------------------------------------------------------

--- a/dlib/optimization/optimization_line_search.h
+++ b/dlib/optimization/optimization_line_search.h
+// Copyright (C) 2008  Davis E. King (davisking@users.sourceforge.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_OPTIMIZATIOn_LINE_SEARCH_H_
+#define DLIB_OPTIMIZATIOn_LINE_SEARCH_H_
+
+#include <cmath>
+#include <limits>
+#include "../matrix.h"
+#include "../algs.h"
+#include "optimization_line_search_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename funct, typename T>
+    class line_search_funct 
+    {
+    public:
+        // You get an error on this line when you pass in a global function to this function.
+        // You have to either use a function object or pass a pointer to your global function
+        // by taking its address using the & operator.  (This check is here because gcc 4.0
+        // has a bug that causes it to silently corrupt return values from functions that
+        // invoked through a reference)
+        COMPILE_TIME_ASSERT(is_function<funct>::value == false);
+
+        line_search_funct(const funct& f_, const T& start_, const T& direction_) 
+            : f(f_),start(start_), direction(direction_), matrix_r(0), scalar_r(0)
+        {}
+
+        line_search_funct(const funct& f_, const T& start_, const T& direction_, T& r) 
+            : f(f_),start(start_), direction(direction_), matrix_r(&r), scalar_r(0)
+        {}
+
+        line_search_funct(const funct& f_, const T& start_, const T& direction_, double& r) 
+            : f(f_),start(start_), direction(direction_), matrix_r(0), scalar_r(&r)
+        {}
+
+        double operator()(const double& x) const
+        {
+            return get_value(f(start + x*direction));
+        }
+
+    private:
+
+        double get_value (const double& r) const
+        {
+            // save a copy of this value for later
+            if (scalar_r)
+                *scalar_r = r;
+
+            return r;
+        }
+
+        template <typename U>
+        double get_value (const U& r) const
+        {
+            // U should be a matrix type
+            COMPILE_TIME_ASSERT(is_matrix<U>::value);
+
+            // save a copy of this value for later
+            if (matrix_r)
+                *matrix_r = r;
+
+            return dot(r,direction);
+        }
+
+        const funct& f;
+        const T& start;
+        const T& direction;
+        T* matrix_r;
+        double* scalar_r;
+    };
+
+    template <typename funct, typename T>
+    const line_search_funct<funct,T> make_line_search_function(const funct& f, const T& start, const T& direction) 
+    { 
+        // You get an error on this line when you pass in a global function to this function.
+        // You have to either use a function object or pass a pointer to your global function
+        // by taking its address using the & operator.  (This check is here because gcc 4.0
+        // has a bug that causes it to silently corrupt return values from functions that
+        // invoked through a reference)
+        COMPILE_TIME_ASSERT(is_function<funct>::value == false);
+
+        COMPILE_TIME_ASSERT(is_matrix<T>::value);
+        DLIB_ASSERT (
+            is_col_vector(start) && is_col_vector(direction) && start.size() == direction.size(),
+            "\tline_search_funct make_line_search_function(f,start,direction)"
+            << "\n\tYou have to supply column vectors to this function"
+            << "\n\tstart.nc():     " << start.nc()
+            << "\n\tdirection.nc(): " << direction.nc()
+            << "\n\tstart.nr():     " << start.nr()
+            << "\n\tdirection.nr(): " << direction.nr()
+        );
+        return line_search_funct<funct,T>(f,start,direction); 
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename funct, typename T>
+    const line_search_funct<funct,T> make_line_search_function(const funct& f, const T& start, const T& direction, double& f_out) 
+    { 
+        // You get an error on this line when you pass in a global function to this function.
+        // You have to either use a function object or pass a pointer to your global function
+        // by taking its address using the & operator.  (This check is here because gcc 4.0
+        // has a bug that causes it to silently corrupt return values from functions that
+        // invoked through a reference)
+        COMPILE_TIME_ASSERT(is_function<funct>::value == false);
+
+        COMPILE_TIME_ASSERT(is_matrix<T>::value);
+        DLIB_ASSERT (
+            is_col_vector(start) && is_col_vector(direction) && start.size() == direction.size(),
+            "\tline_search_funct make_line_search_function(f,start,direction)"
+            << "\n\tYou have to supply column vectors to this function"
+            << "\n\tstart.nc():     " << start.nc()
+            << "\n\tdirection.nc(): " << direction.nc()
+            << "\n\tstart.nr():     " << start.nr()
+            << "\n\tdirection.nr(): " << direction.nr()
+        );
+        return line_search_funct<funct,T>(f,start,direction, f_out); 
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename funct, typename T>
+    const line_search_funct<funct,T> make_line_search_function(const funct& f, const T& start, const T& direction, T& grad_out) 
+    { 
+        // You get an error on this line when you pass in a global function to this function.
+        // You have to either use a function object or pass a pointer to your global function
+        // by taking its address using the & operator.  (This check is here because gcc 4.0
+        // has a bug that causes it to silently corrupt return values from functions that
+        // invoked through a reference)
+        COMPILE_TIME_ASSERT(is_function<funct>::value == false);
+
+        COMPILE_TIME_ASSERT(is_matrix<T>::value);
+        DLIB_ASSERT (
+            is_col_vector(start) && is_col_vector(direction) && start.size() == direction.size(),
+            "\tline_search_funct make_line_search_function(f,start,direction)"
+            << "\n\tYou have to supply column vectors to this function"
+            << "\n\tstart.nc():     " << start.nc()
+            << "\n\tdirection.nc(): " << direction.nc()
+            << "\n\tstart.nr():     " << start.nr()
+            << "\n\tdirection.nr(): " << direction.nr()
+        );
+        return line_search_funct<funct,T>(f,start,direction,grad_out); 
+    }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    inline double poly_min_extrap (
+        double f0,
+        double d0,
+        double f1,
+        double d1
+    )
+    {
+        const double n = 3*(f1 - f0) - 2*d0 - d1;
+        const double e = d0 + d1 - 2*(f1 - f0);
+
+
+        // find the minimum of the derivative of the polynomial
+
+        double temp = std::max(n*n - 3*e*d0,0.0);
+
+        if (temp < 0)
+            return 0.5;
+
+        temp = std::sqrt(temp);
+
+        if (std::abs(e) <= std::numeric_limits<double>::epsilon())
+            return 0.5;
+
+        // figure out the two possible min values
+        double x1 = (temp - n)/(3*e);
+        double x2 = -(temp + n)/(3*e);
+
+        // compute the value of the interpolating polynomial at these two points
+        double y1 = f0 + d0*x1 + n*x1*x1 + e*x1*x1*x1;
+        double y2 = f0 + d0*x2 + n*x2*x2 + e*x2*x2*x2;
+
+        // pick the best point
+        double x;
+        if (y1 < y2)
+            x = x1;
+        else
+            x = x2;
+
+        // now make sure the minimum is within the allowed range of (0,1) 
+        return put_in_range(0,1,x);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename funct, 
+        typename funct_der
+        >
+    double line_search (
+        const funct& f, 
+        const double f0,
+        const funct_der& der, 
+        const double d0,
+        double rho, 
+        double sigma, 
+        double min_f
+    )
+    {
+        // You get an error on this line when you pass in a global function to this function.
+        // You have to either use a function object or pass a pointer to your global function
+        // by taking its address using the & operator.  (This check is here because gcc 4.0
+        // has a bug that causes it to silently corrupt return values from functions that
+        // invoked through a reference)
+        COMPILE_TIME_ASSERT(is_function<funct>::value == false);
+        COMPILE_TIME_ASSERT(is_function<funct_der>::value == false);
+
+        DLIB_ASSERT (
+            1 > sigma && sigma > rho && rho > 0,
+            "\tdouble line_search()"
+            << "\n\tYou have given invalid arguments to this function"
+            << "\n\tsigma: " << sigma
+            << "\n\trho:   " << rho 
+        );
+
+        // The bracketing phase of this function is implemented according to block 2.6.2 from
+        // the book Practical Methods of Optimization by R. Fletcher.   The sectioning 
+        // phase is an implementation of 2.6.4 from the same book.
+
+        // tau1 > 1. Controls the alpha jump size during the search
+        const double tau1 = 9;
+
+        // it must be the case that 0 < tau2 < tau3 <= 1/2 for the algorithm to function
+        // correctly but the specific values of tau2 and tau3 aren't super important.
+        const double tau2 = 1.0/10.0;
+        const double tau3 = 1.0/2.0;
+
+
+        // Stop right away and return a step size of 0 if the gradient is 0 at the starting point
+        if (std::abs(d0) < std::numeric_limits<double>::epsilon())
+            return 0;
+
+        // Stop right away if the current value is good enough according to min_f
+        if (f0 <= min_f)
+            return 0;
+
+        // Figure out a reasonable upper bound on how large alpha can get.
+        const double mu = (min_f-f0)/(rho*d0);
+
+
+        double alpha = 1;
+        if (mu < 0)
+            alpha = -alpha;
+        alpha = put_in_range(0, 0.65*mu, alpha);
+
+
+        double last_alpha = 0;
+        double last_val = f0;
+        double last_val_der = d0;
+
+        // the bracketing stage will find a find a range of points [a,b]
+        // that contains a reasonable solution to the line search
+        double a, b;
+
+        // These variables will hold the values and derivatives of f(a) and f(b)
+        double a_val, b_val, a_val_der, b_val_der;
+
+        // This thresh value represents the Wolfe curvature condition
+        const double thresh = std::abs(sigma*d0);
+
+        // do the bracketing stage to find the bracket range [a,b]
+        while (true)
+        {
+            const double val = f(alpha);
+            const double val_der = der(alpha);
+
+            // we are done with the line search since we found a value smaller
+            // than the minimum f value
+            if (val <= min_f)
+                return alpha;
+
+            if (val > f0 + rho*alpha*d0 || val >= last_val)
+            {
+                a_val = last_val;
+                a_val_der = last_val_der;
+                b_val = val;
+                b_val_der = val_der;
+
+                a = last_alpha;
+                b = alpha;
+                break;
+            }
+
+            if (std::abs(val_der) <= thresh)
+                return alpha;
+
+            // if we are stuck not making progress then quit with the current alpha
+            if (last_alpha == alpha)
+                return alpha;
+
+            if (val_der >= 0)
+            {
+                a_val = val;
+                a_val_der = val_der;
+                b_val = last_val;
+                b_val_der = last_val_der;
+
+                a = alpha;
+                b = last_alpha;
+                break;
+            }
+
+            if (mu <= 2*alpha - last_alpha)
+            {
+                last_alpha = alpha;
+                alpha = mu;
+            }
+            else
+            {
+                const double temp = alpha;
+
+                double first = 2*alpha - last_alpha;
+                double last;
+                if (mu > 0)
+                    last = std::min(mu, alpha + tau1*(alpha - last_alpha));
+                else
+                    last = std::max(mu, alpha + tau1*(alpha - last_alpha));
+
+
+                // pick a point between first and last by doing some kind of interpolation
+                if (last_alpha < alpha)
+                    alpha = last_alpha + (alpha-last_alpha)*poly_min_extrap(last_val, last_val_der, val, val_der);
+                else
+                    alpha = alpha + (last_alpha-alpha)*poly_min_extrap(val, val_der, last_val, last_val_der);
+
+                alpha = put_in_range(first,last,alpha);
+
+
+                last_alpha = temp;
+            }
+
+            last_val = val;
+            last_val_der = val_der;
+
+        }
+
+
+        // Now do the sectioning phase from 2.6.4
+        while (true)
+        {
+            double first = a + tau2*(b-a);
+            double last = b - tau3*(b-a);
+
+            // use interpolation to pick alpha between first and last
+            alpha = a + (b-a)*poly_min_extrap(a_val, a_val_der, b_val, b_val_der);
+            alpha = put_in_range(first,last,alpha);
+
+            const double val = f(alpha);
+            const double val_der = der(alpha);
+
+            // we are done with the line search since we found a value smaller
+            // than the minimum f value
+            if (val <= min_f)
+                return alpha;
+
+            // stop if the interval gets so small that it isn't shrinking any more due to rounding error 
+            if (a == first || b == last)
+            {
+                return b;
+            }
+
+
+            if (val > f0 + rho*alpha*d0 || val >= a_val)
+            {
+                b = alpha;
+                b_val = val;
+                b_val_der = val_der;
+            }
+            else
+            {
+                if (std::abs(val_der) <= thresh)
+                    return alpha;
+
+                if ( (b-a)*val_der >= 0)
+                {
+                    b = a;
+                    b_val = a_val;
+                    b_val_der = a_val_der;
+                }
+
+                a = alpha;
+                a_val = val;
+                a_val_der = val_der;
+            }
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_OPTIMIZATIOn_LINE_SEARCH_H_
+
--- a/dlib/optimization/optimization_line_search_abstract.h
+++ b/dlib/optimization/optimization_line_search_abstract.h
+// Copyright (C) 2008  Davis E. King (davisking@users.sourceforge.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_OPTIMIZATIOn_ABSTRACT_
+#ifdef DLIB_OPTIMIZATIOn_ABSTRACT_
+
+#include <cmath>
+#include <limits>
+#include "../matrix/matrix_abstract.h"
+#include "../algs.h"
+
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename funct, 
+        typename T
+        >
+    class line_search_funct; 
+    /*!
+        This object is a function object that represents a line search function.
+
+        Moreover, it represents a function with the signature:
+            double l(double x)
+    !*/
+
+    template <
+        typename funct, 
+        typename T
+        >
+    const line_search_funct<funct,T> make_line_search_function (
+        const funct& f, 
+        const T& start, 
+        const T& direction
+    ); 
+    /*!
+        requires
+            - is_col_vector(start) && is_col_vector(direction) && start.size() == direction.size() 
+              (i.e. start and direction should be column vectors of the same size)
+            - f must return either a double or a column vector the same length as start
+            - f(start + 1.5*direction) should be a valid expression
+        ensures
+            - if (f returns a double) then
+                - returns a line search function that computes l(x) == f(start + x*direction)
+            - else
+                - returns a line search function that computes l(x) == dot(f(start + x*direction),direction).
+                  That is, we assume f is the derivative of some other function and that what
+                  f returns is a gradient vector. 
+                  So the following two expressions both create the derivative of l(x): 
+                    - derivative(make_line_search_function(funct,start,direction))
+                    - make_line_search_function(derivative(funct),start,direction)
+    !*/
+
+    template <
+        typename funct, 
+        typename T
+        >
+    const line_search_funct<funct,T> make_line_search_function (
+        const funct& f, 
+        const T& start, 
+        const T& direction,
+        double& f_out
+    ); 
+    /*!
+        This function is identical to the above three argument version of make_line_search_function() 
+        except that, if f() outputs a double, every time f() is evaluated its output is also stored 
+        into f_out.
+    !*/
+
+    template <
+        typename funct, 
+        typename T
+        >
+    const line_search_funct<funct,T> make_line_search_function (
+        const funct& f, 
+        const T& start, 
+        const T& direction,
+        T& gradient_out
+    ); 
+    /*!
+        This function is identical to the above three argument version of make_line_search_function() 
+        except that, if f() outputs a column vector, every time f() is evaluated its output is also 
+        stored into gradient_out.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    inline double poly_min_extrap (
+        double f0,
+        double d0,
+        double f1,
+        double d1
+    );
+    /*!
+        ensures
+            - let c(x) be a 3rd degree polynomial such that:
+                - c(0) == f0
+                - c(1) == f1
+                - derivative of c(x) at x==0 is d0
+                - derivative of c(x) at x==1 is d1
+            - returns the point in the range [0,1] that minimizes the polynomial c(x) 
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename funct, 
+        typename funct_der
+        >
+    double line_search (
+        const funct& f, 
+        const double f0,
+        const funct_der& der, 
+        const double d0,
+        double rho, 
+        double sigma, 
+        double min_f
+    )
+    /*!
+        requires
+            - 0 < rho < sigma < 1
+            - f and der are scalar functions of scalars
+              (e.g. line_search_funct objects)
+            - der is the derivative of f
+            - f0 == f(0)
+            - d0 == der(0)
+        ensures
+            - Performs a line search and uses the strong Wolfe conditions to decide when
+              the search can stop.  
+                - rho == the parameter of the Wolfe sufficient decrease condition
+                - sigma == the parameter of the Wolfe curvature condition
+            - returns a value alpha such that f(alpha) is significantly closer to 
+              the minimum of f than f(0).
+            - It is assumed that the minimum possible value of f(x) is min_f.  So if
+              an alpha is found such that f(alpha) <= min_f then the search stops
+              immediately.
+    !*/
+
+    /*
+        A good discussion of the Wolfe conditions and line search algorithms in 
+        general can be found in the book Practical Methods of Optimization by R. Fletcher
+        and also in the more recent book Numerical Optimization by Nocedal and Wright.
+    */
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_OPTIMIZATIOn_ABSTRACT_
+
--- a/dlib/optimization/optimization_search_strategies.h
+++ b/dlib/optimization/optimization_search_strategies.h
+// Copyright (C) 2008  Davis E. King (davisking@users.sourceforge.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_OPTIMIZATIOn_SEARCH_STRATEGIES_H_
+#define DLIB_OPTIMIZATIOn_SEARCH_STRATEGIES_H_
+
+#include <cmath>
+#include <limits>
+#include "../matrix.h"
+#include "../algs.h"
+#include "optimization_search_strategies_abstract.h"
+#include "../sequence.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    class cg_search_strategy
+    {
+    public:
+        cg_search_strategy() : been_used(false) {}
+
+        double get_wolfe_rho (
+        ) const { return 0.001; }
+
+        double get_wolfe_sigma (
+        ) const { return 0.01; }
+
+        template <typename T>
+        const matrix<double,0,1>& get_next_direction (
+            const T& ,
+            const double ,
+            const T& funct_derivative
+        )
+        {
+            if (been_used == false)
+            {
+                been_used = true;
+                prev_direction = -funct_derivative;
+            }
+            else
+            {
+                // Use the Polak-Ribiere (4.1.12) conjugate gradient described by Fletcher on page 83
+                const double temp = trans(prev_derivative)*prev_derivative;
+                // If this value hits zero then just use the direction of steepest descent.
+                if (std::abs(temp) < std::numeric_limits<double>::epsilon())
+                {
+                    prev_derivative = funct_derivative;
+                    prev_direction = -funct_derivative;
+                    return prev_direction;
+                }
+
+                double b = trans(funct_derivative-prev_derivative)*funct_derivative/(temp);
+                prev_direction = -funct_derivative + b*prev_direction;
+
+            }
+
+            prev_derivative = funct_derivative;
+            return prev_direction;
+        }
+
+    private:
+        bool been_used;
+        matrix<double,0,1> prev_derivative;
+        matrix<double,0,1> prev_direction;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    class bfgs_search_strategy
+    {
+    public:
+        bfgs_search_strategy() : been_used(false), been_used_twice(false) {}
+
+        double get_wolfe_rho (
+        ) const { return 0.01; }
+
+        double get_wolfe_sigma (
+        ) const { return 0.9; }
+
+        template <typename T>
+        const matrix<double,0,1>& get_next_direction (
+            const T& x,
+            const double ,
+            const T& funct_derivative
+        )
+        {
+            if (been_used == false)
+            {
+                been_used = true;
+                H = identity_matrix<double>(x.size());
+            }
+            else
+            {
+                // update H with the BFGS formula from (3.2.12) on page 55 of Fletcher 
+                delta = (x-prev_x); 
+                gamma = funct_derivative-prev_derivative;
+
+                double dg = dot(delta,gamma);
+
+                // Try to set the initial value of the H matrix to something reasonable if we are still
+                // in the early stages of figuring out what it is.  This formula below is what is suggested
+                // in the book Numerical Optimization by Nocedal and Wright in the chapter on Quasi-Newton methods.
+                if (been_used_twice == false)
+                {
+                    double gg = trans(gamma)*gamma;
+                    if (std::abs(gg) > std::numeric_limits<double>::epsilon())
+                    {
+                        const double temp = put_in_range(0.01, 100, dg/gg);
+                        H = diagm(uniform_matrix<double>(x.size(),1, temp));
+                        been_used_twice = true;
+                    }
+                }
+
+                Hg = H*gamma;
+                gH = trans(trans(gamma)*H);
+                double gHg = trans(gamma)*H*gamma;
+                if (gHg < std::numeric_limits<double>::infinity() && dg < std::numeric_limits<double>::infinity() &&
+                    dg != 0)
+                {
+                    H += (1 + gHg/dg)*delta*trans(delta)/(dg) - (delta*trans(gH) + Hg*trans(delta))/(dg);
+                }
+                else
+                {
+                    H = identity_matrix<double>(H.nr());
+                    been_used_twice = false;
+                }
+            }
+
+            prev_x = x;
+            prev_direction = -H*funct_derivative;
+            prev_derivative = funct_derivative;
+            return prev_direction;
+        }
+
+    private:
+        bool been_used;
+        bool been_used_twice;
+        matrix<double,0,1> prev_x;
+        matrix<double,0,1> prev_derivative;
+        matrix<double,0,1> prev_direction;
+        matrix<double> H;
+        matrix<double,0,1> delta, gamma, Hg, gH;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    class lbfgs_search_strategy
+    {
+    public:
+        lbfgs_search_strategy(unsigned long max_size_) : max_size(max_size_), been_used(false) {}
+
+        lbfgs_search_strategy(const lbfgs_search_strategy& item) 
+        {
+            max_size = item.max_size;
+            been_used = item.been_used;
+            prev_x = item.prev_x;
+            prev_derivative = item.prev_derivative;
+            prev_direction = item.prev_direction;
+            alpha = item.alpha;
+            dh_temp = item.dh_temp;
+        }
+
+        double get_wolfe_rho (
+        ) const { return 0.01; }
+
+        double get_wolfe_sigma (
+        ) const { return 0.9; }
+
+        template <typename T>
+        const matrix<double,0,1>& get_next_direction (
+            const T& x,
+            const double ,
+            const T& funct_derivative
+        )
+        {
+            if (been_used == false)
+            {
+                prev_direction = -funct_derivative;
+                been_used = true;
+            }
+            else
+            {
+                // add an element into the stored data sequence
+                dh_temp.s = x - prev_x;
+                dh_temp.y = funct_derivative - prev_derivative;
+                double temp = dlib::dot(dh_temp.s, dh_temp.y);
+                // only accept this bit of data if temp isn't zero
+                if (std::abs(temp) > std::numeric_limits<double>::epsilon())
+                {
+                    dh_temp.rho = 1/temp;
+                    data.add(data.size(), dh_temp);
+                }
+                else
+                {
+                    data.clear();
+                }
+
+                if (data.size() > 0)
+                {
+                    // This block of code is from algorithm 7.4 in the Nocedal book.
+
+                    prev_direction = -funct_derivative;
+                    alpha.resize(data.size());
+                    for (unsigned long i = data.size()-1; i < data.size(); --i)
+                    {
+                        alpha[i] = data[i].rho*dot(data[i].s, prev_direction);
+                        prev_direction -= alpha[i]*data[i].y;
+                    }
+
+                    // Take a guess at what the first H matrix should be.  This formula below is what is suggested
+                    // in the book Numerical Optimization by Nocedal and Wright in the chapter on Large Scale 
+                    // Unconstrained Optimization (in the L-BFGS section).
+                    double H_0 = 1.0/data[data.size()-1].rho/dot(data[data.size()-1].y, data[data.size()-1].y);
+                    H_0 = put_in_range(0.001, 1000.0, H_0);
+                    prev_direction *= H_0;
+
+                    for (unsigned long i = 0; i < data.size(); ++i)
+                    {
+                        double beta = data[i].rho*dot(data[i].y, prev_direction);
+                        prev_direction += data[i].s * (alpha[i] - beta);
+                    }
+                }
+                else
+                {
+                    prev_derivative = -funct_derivative;
+                }
+
+            }
+
+            if (data.size() > max_size)
+            {
+                // remove the oldest element in the data sequence
+                data.remove(0, dh_temp);
+            }
+
+            prev_x = x;
+            prev_derivative = funct_derivative;
+            return prev_direction;
+        }
+
+    private:
+
+        struct data_helper
+        {
+            matrix<double,0,1> s;
+            matrix<double,0,1> y;
+            double rho;
+
+            friend void swap(data_helper& a, data_helper& b)
+            {
+                a.s.swap(b.s);
+                a.y.swap(b.y);
+                std::swap(a.rho, b.rho);
+            }
+        };
+        sequence<data_helper>::kernel_2a data;
+
+        unsigned long max_size;
+        bool been_used;
+        matrix<double,0,1> prev_x;
+        matrix<double,0,1> prev_derivative;
+        matrix<double,0,1> prev_direction;
+        std::vector<double> alpha;
+
+        data_helper dh_temp;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_OPTIMIZATIOn_SEARCH_STRATEGIES_H_
+
--- a/dlib/optimization/optimization_search_strategies_abstract.h
+++ b/dlib/optimization/optimization_search_strategies_abstract.h
+// Copyright (C) 2008  Davis E. King (davisking@users.sourceforge.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_OPTIMIZATIOn_SEARCH_STRATEGIES_ABSTRACT_
+#ifdef DLIB_OPTIMIZATIOn_SEARCH_STRATEGIES_ABSTRACT_
+
+#include <cmath>
+#include <limits>
+#include "../matrix/matrix_abstract.h"
+#include "../algs.h"
+
+
+namespace dlib
+{
+    /*
+        A good discussion of the search strategies in this file can be found in the 
+        following book:  Numerical Optimization by Nocedal and Wright.
+    */
+
+// ----------------------------------------------------------------------------------------
+
+    class cg_search_strategy
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object represents a strategy for determining which direction
+                a line search should be carried out along.  This particular object
+                is an implementation of the Polak-Ribiere conjugate gradient method
+                for determining this direction.
+
+                This method uses an amount of memory that is linear in the number
+                of variables to be optimized.  So it is capable of handling problems
+                with a very large number of variables.  However, it is generally
+                not as good as the L-BFGS algorithm (which is defined below in
+                the lbfgs_search_strategy class).
+        !*/
+
+    public:
+        cg_search_strategy(
+        );
+        /*!
+            ensures
+                - This object is properly initialized and ready to generate
+                  search directions.
+        !*/
+
+        double get_wolfe_rho (
+        ) const;
+        /*!
+            ensures
+                - returns the value of the Wolfe rho parameter that should be used when 
+                  this search strategy is used with the line_search() function.
+        !*/
+
+        double get_wolfe_sigma (
+        ) const; 
+        /*!
+            ensures
+                - returns the value of the Wolfe sigma parameter that should be used when 
+                  this search strategy is used with the line_search() function.
+        !*/
+
+        template <typename T>
+        const matrix<double,0,1>& get_next_direction (
+            const T& x,
+            const double funct_value,
+            const T& funct_derivative
+        );
+        /*!
+            requires
+                - for some function f():
+                    - funct_value == f(x)
+                    - funct_derivative == derivative(f)(x)
+            ensures
+                - Assuming that a line search is going to be conducted starting from the point x,
+                  this function returns the direction in which the search should proceed.
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    class bfgs_search_strategy
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object represents a strategy for determining which direction
+                a line search should be carried out along.  This particular object
+                is an implementation of the BFGS quasi-newton method for determining 
+                this direction.
+
+                This method uses an amount of memory that is quadratic in the number
+                of variables to be optimized.  It is generally very effective but 
+                if your problem has a very large number of variables then it isn't 
+                appropriate.  Instead You should try the lbfgs_search_strategy.
+        !*/
+
+    public:
+        bfgs_search_strategy(
+        );
+        /*!
+            ensures
+                - This object is properly initialized and ready to generate
+                  search directions.
+        !*/
+
+        double get_wolfe_rho (
+        ) const;
+        /*!
+            ensures
+                - returns the value of the Wolfe rho parameter that should be used when 
+                  this search strategy is used with the line_search() function.
+        !*/
+
+        double get_wolfe_sigma (
+        ) const; 
+        /*!
+            ensures
+                - returns the value of the Wolfe sigma parameter that should be used when 
+                  this search strategy is used with the line_search() function.
+        !*/
+
+        template <typename T>
+        const matrix<double,0,1>& get_next_direction (
+            const T& x,
+            const double funct_value,
+            const T& funct_derivative
+        );
+        /*!
+            requires
+                - for some function f():
+                    - funct_value == f(x)
+                    - funct_derivative == derivative(f)(x)
+            ensures
+                - Assuming that a line search is going to be conducted starting from the point x,
+                  this function returns the direction in which the search should proceed.
+        !*/
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    class lbfgs_search_strategy
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object represents a strategy for determining which direction
+                a line search should be carried out along.  This particular object
+                is an implementation of the L-BFGS quasi-newton method for determining 
+                this direction.
+
+                This method uses an amount of memory that is linear in the number
+                of variables to be optimized.  This makes it an excellent method 
+                to use when an optimization problem has a large number of variables.
+        !*/
+    public:
+        lbfgs_search_strategy(
+            unsigned long max_size
+        ); 
+        /*!
+            requires
+                - max_size > 0
+            ensures
+                - This object is properly initialized and ready to generate
+                  search directions.
+                - L-BFGS works by remembering a certain number of position and gradient 
+                  pairs.  It uses this remembered information to compute search directions.
+                  The max_size argument determines how many of these pairs will be remembered.
+                  Typically, using between 3 and 30 pairs performs well for many problems.
+        !*/
+
+        double get_wolfe_rho (
+        ) const;
+        /*!
+            ensures
+                - returns the value of the Wolfe rho parameter that should be used when 
+                  this search strategy is used with the line_search() function.
+        !*/
+
+        double get_wolfe_sigma (
+        ) const; 
+        /*!
+            ensures
+                - returns the value of the Wolfe sigma parameter that should be used when 
+                  this search strategy is used with the line_search() function.
+        !*/
+
+        template <typename T>
+        const matrix<double,0,1>& get_next_direction (
+            const T& x,
+            const double funct_value,
+            const T& funct_derivative
+        );
+        /*!
+            requires
+                - for some function f():
+                    - funct_value == f(x)
+                    - funct_derivative == derivative(f)(x)
+            ensures
+                - Assuming that a line search is going to be conducted starting from the point x,
+                  this function returns the direction in which the search should proceed.
+        !*/
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_OPTIMIZATIOn_SEARCH_STRATEGIES_ABSTRACT_
+
--- a/dlib/optimization/optimization_stop_strategies.h
+++ b/dlib/optimization/optimization_stop_strategies.h
+// Copyright (C) 2008  Davis E. King (davisking@users.sourceforge.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_OPTIMIZATIOn_STOP_STRATEGIES_H_
+#define DLIB_OPTIMIZATIOn_STOP_STRATEGIES_H_
+
+#include <cmath>
+#include <limits>
+#include "../matrix.h"
+#include "../algs.h"
+#include "optimization_stop_strategies_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    class objective_delta_stop_strategy
+    {
+    public:
+        objective_delta_stop_strategy (
+            double min_delta = 1e-7
+        ) : _been_used(false), _min_delta(min_delta), _max_iter(0), _cur_iter(0), _prev_funct_value(0) {}
+
+        objective_delta_stop_strategy (
+            double min_delta,
+            unsigned long max_iter
+        ) : _been_used(false), _min_delta(min_delta), _max_iter(max_iter), _cur_iter(0), _prev_funct_value(0) {}
+
+        template <typename T>
+        bool should_continue_search (
+            const T& ,
+            const double funct_value,
+            const T& 
+        ) 
+        {
+            ++_cur_iter;
+            if (_been_used)
+            {
+                // Check if we have hit the max allowable number of iterations.  (but only
+                // check if _max_iter is enabled (i.e. not 0)).
+                if (_max_iter != 0 && _cur_iter > _max_iter)
+                    return false;
+
+                // check if the function change was too small
+                if (std::abs(funct_value - _prev_funct_value) < _min_delta)
+                    return false;
+            }
+
+            _been_used = true;
+            _prev_funct_value = funct_value;
+            return true;
+        }
+
+    private:
+
+        bool _been_used;
+        double _min_delta;
+        unsigned long _max_iter;
+        unsigned long _cur_iter;
+        double _prev_funct_value;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_OPTIMIZATIOn_STOP_STRATEGIES_H_
+
--- a/dlib/optimization/optimization_stop_strategies_abstract.h
+++ b/dlib/optimization/optimization_stop_strategies_abstract.h
+// Copyright (C) 2008  Davis E. King (davisking@users.sourceforge.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_OPTIMIZATIOn_STOP_STRATEGIES_ABSTRACT_
+#ifdef DLIB_OPTIMIZATIOn_STOP_STRATEGIES_ABSTRACT_
+
+#include <cmath>
+#include <limits>
+#include "../matrix/matrix_abstract.h"
+#include "../algs.h"
+
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    class objective_delta_stop_strategy
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object represents a strategy for deciding if an optimization
+                algorithm should terminate.   This particular object looks at the 
+                change in the objective function from one iteration to the next and 
+                bases its decision on how large this change is.  If the change
+                is below a user given threshold then the search stops.
+        !*/
+
+    public:
+        objective_delta_stop_strategy (
+            double min_delta = 1e-7
+        ); 
+        /*!
+            requires
+                - min_delta >= 0
+            ensures
+                - This stop strategy object will only consider a search to be complete
+                  if a change in an objective function from one iteration to the next
+                  is less than min_delta.
+        !*/
+
+        objective_delta_stop_strategy (
+            double min_delta,
+            unsigned long max_iter
+        );
+        /*!
+            requires
+                - min_delta >= 0
+                - max_iter > 0
+            ensures
+                - This stop strategy object will only consider a search to be complete
+                  if a change in an objective function from one iteration to the next
+                  is less than min_delta or more than max_iter iterations has been
+                  executed.
+        !*/
+
+        template <typename T>
+        bool should_continue_search (
+            const T& x,
+            const double funct_value,
+            const T& funct_derivative
+        );
+        /*!
+            requires
+                - for some function f():
+                    - funct_value == f(x)
+                    - funct_derivative == derivative(f)(x)
+            ensures
+                - returns true if the point x doest not satisfy the stopping condition and
+                  false otherwise.
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_OPTIMIZATIOn_STOP_STRATEGIES_ABSTRACT_
+