Commit 9fb35897 authored by Davis King's avatar Davis King

Split the code up into multiple files and setup the abstracts for each of them.

--HG--
rename : dlib/optimization/optimization.h => dlib/optimization/optimization_line_search.h
rename : dlib/optimization/optimization_abstract.h => dlib/optimization/optimization_line_search_abstract.h
rename : dlib/optimization/optimization.h => dlib/optimization/optimization_search_strategies.h
rename : dlib/optimization/optimization_abstract.h => dlib/optimization/optimization_search_strategies_abstract.h
rename : dlib/optimization/optimization.h => dlib/optimization/optimization_stop_strategies.h
rename : dlib/optimization/optimization_abstract.h => dlib/optimization/optimization_stop_strategies_abstract.h
extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403190
parent 75b3839a
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
// Copyright (C) 2008 Davis E. King (davisking@users.sourceforge.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_OPTIMIZATIOn_ABSTRACT_
#ifdef DLIB_OPTIMIZATIOn_ABSTRACT_
#include <cmath>
#include <limits>
#include "../matrix/matrix_abstract.h"
#include "../algs.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename funct,
typename T
>
class line_search_funct;
/*!
This object is a function object that represents a line search function.
Moreover, it represents a function with the signature:
double l(double x)
!*/
template <
typename funct,
typename T
>
const line_search_funct<funct,T> make_line_search_function (
const funct& f,
const T& start,
const T& direction
);
/*!
requires
- is_col_vector(start) && is_col_vector(direction) && start.size() == direction.size()
(i.e. start and direction should be column vectors of the same size)
- f must return either a double or a column vector the same length as start
- f(start + 1.5*direction) should be a valid expression
ensures
- if (f returns a double) then
- returns a line search function that computes l(x) == f(start + x*direction)
- else
- returns a line search function that computes l(x) == dot(f(start + x*direction),direction).
That is, we assume f is the derivative of some other function and that what
f returns is a gradient vector.
So the following two expressions both create the derivative of l(x):
- derivative(make_line_search_function(funct,start,direction))
- make_line_search_function(derivative(funct),start,direction)
!*/
template <
typename funct,
typename T
>
const line_search_funct<funct,T> make_line_search_function (
const funct& f,
const T& start,
const T& direction,
double& f_out
);
/*!
This function is identical to the above three argument version of make_line_search_function()
except that, if f() outputs a double, every time f() is evaluated its output is also stored
into f_out.
!*/
template <
typename funct,
typename T
>
const line_search_funct<funct,T> make_line_search_function (
const funct& f,
const T& start,
const T& direction,
T& gradient_out
);
/*!
This function is identical to the above three argument version of make_line_search_function()
except that, if f() outputs a column vector, every time f() is evaluated its output is also
stored into gradient_out.
!*/
// ----------------------------------------------------------------------------------------
inline double poly_min_extrap (
double f0,
double d0,
double f1,
double d1
);
/*!
ensures
- let c(x) be a 3rd degree polynomial such that:
- c(0) == f0
- c(1) == f1
- derivative of c(x) at x==0 is d0
- derivative of c(x) at x==1 is d1
- returns the point in the range [0,1] that minimizes the polynomial c(x)
!*/
// ----------------------------------------------------------------------------------------
template <
typename funct,
typename funct_der
>
double line_search (
const funct& f,
const double f0,
const funct_der& der,
const double d0,
double rho,
double sigma,
double min_f
)
/*!
requires
- 0 < rho < sigma < 1
- f and der are scalar functions of scalars
(e.g. line_search_funct objects)
- der is the derivative of f
- f0 == f(0)
- d0 == der(0)
ensures
- Performs a line search and uses the strong Wolfe conditions to decide when
the search can stop.
- rho == the parameter of the Wolfe sufficient decrease condition
- sigma == the parameter of the Wolfe curvature condition
- returns a value alpha such that f(alpha) is significantly closer to
the minimum of f than f(0).
- It is assumed that the minimum possible value of f(x) is min_f. So if
an alpha is found such that f(alpha) <= min_f then the search stops
immediately.
!*/
/*
A good discussion of the Wolfe conditions and line search algorithms in
general can be found in the book Practical Methods of Optimization by R. Fletcher
and also in the more recent book Numerical Optimization by Nocedal and Wright.
*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_OPTIMIZATIOn_ABSTRACT_
// Copyright (C) 2008 Davis E. King (davisking@users.sourceforge.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_OPTIMIZATIOn_SEARCH_STRATEGIES_H_
#define DLIB_OPTIMIZATIOn_SEARCH_STRATEGIES_H_
#include <cmath>
#include <limits>
#include "../matrix.h"
#include "../algs.h"
#include "optimization_search_strategies_abstract.h"
#include "../sequence.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
class cg_search_strategy
{
public:
cg_search_strategy() : been_used(false) {}
double get_wolfe_rho (
) const { return 0.001; }
double get_wolfe_sigma (
) const { return 0.01; }
template <typename T>
const matrix<double,0,1>& get_next_direction (
const T& ,
const double ,
const T& funct_derivative
)
{
if (been_used == false)
{
been_used = true;
prev_direction = -funct_derivative;
}
else
{
// Use the Polak-Ribiere (4.1.12) conjugate gradient described by Fletcher on page 83
const double temp = trans(prev_derivative)*prev_derivative;
// If this value hits zero then just use the direction of steepest descent.
if (std::abs(temp) < std::numeric_limits<double>::epsilon())
{
prev_derivative = funct_derivative;
prev_direction = -funct_derivative;
return prev_direction;
}
double b = trans(funct_derivative-prev_derivative)*funct_derivative/(temp);
prev_direction = -funct_derivative + b*prev_direction;
}
prev_derivative = funct_derivative;
return prev_direction;
}
private:
bool been_used;
matrix<double,0,1> prev_derivative;
matrix<double,0,1> prev_direction;
};
// ----------------------------------------------------------------------------------------
class bfgs_search_strategy
{
public:
bfgs_search_strategy() : been_used(false), been_used_twice(false) {}
double get_wolfe_rho (
) const { return 0.01; }
double get_wolfe_sigma (
) const { return 0.9; }
template <typename T>
const matrix<double,0,1>& get_next_direction (
const T& x,
const double ,
const T& funct_derivative
)
{
if (been_used == false)
{
been_used = true;
H = identity_matrix<double>(x.size());
}
else
{
// update H with the BFGS formula from (3.2.12) on page 55 of Fletcher
delta = (x-prev_x);
gamma = funct_derivative-prev_derivative;
double dg = dot(delta,gamma);
// Try to set the initial value of the H matrix to something reasonable if we are still
// in the early stages of figuring out what it is. This formula below is what is suggested
// in the book Numerical Optimization by Nocedal and Wright in the chapter on Quasi-Newton methods.
if (been_used_twice == false)
{
double gg = trans(gamma)*gamma;
if (std::abs(gg) > std::numeric_limits<double>::epsilon())
{
const double temp = put_in_range(0.01, 100, dg/gg);
H = diagm(uniform_matrix<double>(x.size(),1, temp));
been_used_twice = true;
}
}
Hg = H*gamma;
gH = trans(trans(gamma)*H);
double gHg = trans(gamma)*H*gamma;
if (gHg < std::numeric_limits<double>::infinity() && dg < std::numeric_limits<double>::infinity() &&
dg != 0)
{
H += (1 + gHg/dg)*delta*trans(delta)/(dg) - (delta*trans(gH) + Hg*trans(delta))/(dg);
}
else
{
H = identity_matrix<double>(H.nr());
been_used_twice = false;
}
}
prev_x = x;
prev_direction = -H*funct_derivative;
prev_derivative = funct_derivative;
return prev_direction;
}
private:
bool been_used;
bool been_used_twice;
matrix<double,0,1> prev_x;
matrix<double,0,1> prev_derivative;
matrix<double,0,1> prev_direction;
matrix<double> H;
matrix<double,0,1> delta, gamma, Hg, gH;
};
// ----------------------------------------------------------------------------------------
class lbfgs_search_strategy
{
public:
lbfgs_search_strategy(unsigned long max_size_) : max_size(max_size_), been_used(false) {}
lbfgs_search_strategy(const lbfgs_search_strategy& item)
{
max_size = item.max_size;
been_used = item.been_used;
prev_x = item.prev_x;
prev_derivative = item.prev_derivative;
prev_direction = item.prev_direction;
alpha = item.alpha;
dh_temp = item.dh_temp;
}
double get_wolfe_rho (
) const { return 0.01; }
double get_wolfe_sigma (
) const { return 0.9; }
template <typename T>
const matrix<double,0,1>& get_next_direction (
const T& x,
const double ,
const T& funct_derivative
)
{
if (been_used == false)
{
prev_direction = -funct_derivative;
been_used = true;
}
else
{
// add an element into the stored data sequence
dh_temp.s = x - prev_x;
dh_temp.y = funct_derivative - prev_derivative;
double temp = dlib::dot(dh_temp.s, dh_temp.y);
// only accept this bit of data if temp isn't zero
if (std::abs(temp) > std::numeric_limits<double>::epsilon())
{
dh_temp.rho = 1/temp;
data.add(data.size(), dh_temp);
}
else
{
data.clear();
}
if (data.size() > 0)
{
// This block of code is from algorithm 7.4 in the Nocedal book.
prev_direction = -funct_derivative;
alpha.resize(data.size());
for (unsigned long i = data.size()-1; i < data.size(); --i)
{
alpha[i] = data[i].rho*dot(data[i].s, prev_direction);
prev_direction -= alpha[i]*data[i].y;
}
// Take a guess at what the first H matrix should be. This formula below is what is suggested
// in the book Numerical Optimization by Nocedal and Wright in the chapter on Large Scale
// Unconstrained Optimization (in the L-BFGS section).
double H_0 = 1.0/data[data.size()-1].rho/dot(data[data.size()-1].y, data[data.size()-1].y);
H_0 = put_in_range(0.001, 1000.0, H_0);
prev_direction *= H_0;
for (unsigned long i = 0; i < data.size(); ++i)
{
double beta = data[i].rho*dot(data[i].y, prev_direction);
prev_direction += data[i].s * (alpha[i] - beta);
}
}
else
{
prev_derivative = -funct_derivative;
}
}
if (data.size() > max_size)
{
// remove the oldest element in the data sequence
data.remove(0, dh_temp);
}
prev_x = x;
prev_derivative = funct_derivative;
return prev_direction;
}
private:
struct data_helper
{
matrix<double,0,1> s;
matrix<double,0,1> y;
double rho;
friend void swap(data_helper& a, data_helper& b)
{
a.s.swap(b.s);
a.y.swap(b.y);
std::swap(a.rho, b.rho);
}
};
sequence<data_helper>::kernel_2a data;
unsigned long max_size;
bool been_used;
matrix<double,0,1> prev_x;
matrix<double,0,1> prev_derivative;
matrix<double,0,1> prev_direction;
std::vector<double> alpha;
data_helper dh_temp;
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_OPTIMIZATIOn_SEARCH_STRATEGIES_H_
// Copyright (C) 2008 Davis E. King (davisking@users.sourceforge.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_OPTIMIZATIOn_SEARCH_STRATEGIES_ABSTRACT_
#ifdef DLIB_OPTIMIZATIOn_SEARCH_STRATEGIES_ABSTRACT_
#include <cmath>
#include <limits>
#include "../matrix/matrix_abstract.h"
#include "../algs.h"
namespace dlib
{
/*
A good discussion of the search strategies in this file can be found in the
following book: Numerical Optimization by Nocedal and Wright.
*/
// ----------------------------------------------------------------------------------------
class cg_search_strategy
{
/*!
WHAT THIS OBJECT REPRESENTS
This object represents a strategy for determining which direction
a line search should be carried out along. This particular object
is an implementation of the Polak-Ribiere conjugate gradient method
for determining this direction.
This method uses an amount of memory that is linear in the number
of variables to be optimized. So it is capable of handling problems
with a very large number of variables. However, it is generally
not as good as the L-BFGS algorithm (which is defined below in
the lbfgs_search_strategy class).
!*/
public:
cg_search_strategy(
);
/*!
ensures
- This object is properly initialized and ready to generate
search directions.
!*/
double get_wolfe_rho (
) const;
/*!
ensures
- returns the value of the Wolfe rho parameter that should be used when
this search strategy is used with the line_search() function.
!*/
double get_wolfe_sigma (
) const;
/*!
ensures
- returns the value of the Wolfe sigma parameter that should be used when
this search strategy is used with the line_search() function.
!*/
template <typename T>
const matrix<double,0,1>& get_next_direction (
const T& x,
const double funct_value,
const T& funct_derivative
);
/*!
requires
- for some function f():
- funct_value == f(x)
- funct_derivative == derivative(f)(x)
ensures
- Assuming that a line search is going to be conducted starting from the point x,
this function returns the direction in which the search should proceed.
!*/
};
// ----------------------------------------------------------------------------------------
class bfgs_search_strategy
{
/*!
WHAT THIS OBJECT REPRESENTS
This object represents a strategy for determining which direction
a line search should be carried out along. This particular object
is an implementation of the BFGS quasi-newton method for determining
this direction.
This method uses an amount of memory that is quadratic in the number
of variables to be optimized. It is generally very effective but
if your problem has a very large number of variables then it isn't
appropriate. Instead You should try the lbfgs_search_strategy.
!*/
public:
bfgs_search_strategy(
);
/*!
ensures
- This object is properly initialized and ready to generate
search directions.
!*/
double get_wolfe_rho (
) const;
/*!
ensures
- returns the value of the Wolfe rho parameter that should be used when
this search strategy is used with the line_search() function.
!*/
double get_wolfe_sigma (
) const;
/*!
ensures
- returns the value of the Wolfe sigma parameter that should be used when
this search strategy is used with the line_search() function.
!*/
template <typename T>
const matrix<double,0,1>& get_next_direction (
const T& x,
const double funct_value,
const T& funct_derivative
);
/*!
requires
- for some function f():
- funct_value == f(x)
- funct_derivative == derivative(f)(x)
ensures
- Assuming that a line search is going to be conducted starting from the point x,
this function returns the direction in which the search should proceed.
!*/
};
// ----------------------------------------------------------------------------------------
class lbfgs_search_strategy
{
/*!
WHAT THIS OBJECT REPRESENTS
This object represents a strategy for determining which direction
a line search should be carried out along. This particular object
is an implementation of the L-BFGS quasi-newton method for determining
this direction.
This method uses an amount of memory that is linear in the number
of variables to be optimized. This makes it an excellent method
to use when an optimization problem has a large number of variables.
!*/
public:
lbfgs_search_strategy(
unsigned long max_size
);
/*!
requires
- max_size > 0
ensures
- This object is properly initialized and ready to generate
search directions.
- L-BFGS works by remembering a certain number of position and gradient
pairs. It uses this remembered information to compute search directions.
The max_size argument determines how many of these pairs will be remembered.
Typically, using between 3 and 30 pairs performs well for many problems.
!*/
double get_wolfe_rho (
) const;
/*!
ensures
- returns the value of the Wolfe rho parameter that should be used when
this search strategy is used with the line_search() function.
!*/
double get_wolfe_sigma (
) const;
/*!
ensures
- returns the value of the Wolfe sigma parameter that should be used when
this search strategy is used with the line_search() function.
!*/
template <typename T>
const matrix<double,0,1>& get_next_direction (
const T& x,
const double funct_value,
const T& funct_derivative
);
/*!
requires
- for some function f():
- funct_value == f(x)
- funct_derivative == derivative(f)(x)
ensures
- Assuming that a line search is going to be conducted starting from the point x,
this function returns the direction in which the search should proceed.
!*/
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_OPTIMIZATIOn_SEARCH_STRATEGIES_ABSTRACT_
// Copyright (C) 2008 Davis E. King (davisking@users.sourceforge.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_OPTIMIZATIOn_STOP_STRATEGIES_H_
#define DLIB_OPTIMIZATIOn_STOP_STRATEGIES_H_
#include <cmath>
#include <limits>
#include "../matrix.h"
#include "../algs.h"
#include "optimization_stop_strategies_abstract.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
class objective_delta_stop_strategy
{
public:
objective_delta_stop_strategy (
double min_delta = 1e-7
) : _been_used(false), _min_delta(min_delta), _max_iter(0), _cur_iter(0), _prev_funct_value(0) {}
objective_delta_stop_strategy (
double min_delta,
unsigned long max_iter
) : _been_used(false), _min_delta(min_delta), _max_iter(max_iter), _cur_iter(0), _prev_funct_value(0) {}
template <typename T>
bool should_continue_search (
const T& ,
const double funct_value,
const T&
)
{
++_cur_iter;
if (_been_used)
{
// Check if we have hit the max allowable number of iterations. (but only
// check if _max_iter is enabled (i.e. not 0)).
if (_max_iter != 0 && _cur_iter > _max_iter)
return false;
// check if the function change was too small
if (std::abs(funct_value - _prev_funct_value) < _min_delta)
return false;
}
_been_used = true;
_prev_funct_value = funct_value;
return true;
}
private:
bool _been_used;
double _min_delta;
unsigned long _max_iter;
unsigned long _cur_iter;
double _prev_funct_value;
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_OPTIMIZATIOn_STOP_STRATEGIES_H_
// Copyright (C) 2008 Davis E. King (davisking@users.sourceforge.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_OPTIMIZATIOn_STOP_STRATEGIES_ABSTRACT_
#ifdef DLIB_OPTIMIZATIOn_STOP_STRATEGIES_ABSTRACT_
#include <cmath>
#include <limits>
#include "../matrix/matrix_abstract.h"
#include "../algs.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
class objective_delta_stop_strategy
{
/*!
WHAT THIS OBJECT REPRESENTS
This object represents a strategy for deciding if an optimization
algorithm should terminate. This particular object looks at the
change in the objective function from one iteration to the next and
bases its decision on how large this change is. If the change
is below a user given threshold then the search stops.
!*/
public:
objective_delta_stop_strategy (
double min_delta = 1e-7
);
/*!
requires
- min_delta >= 0
ensures
- This stop strategy object will only consider a search to be complete
if a change in an objective function from one iteration to the next
is less than min_delta.
!*/
objective_delta_stop_strategy (
double min_delta,
unsigned long max_iter
);
/*!
requires
- min_delta >= 0
- max_iter > 0
ensures
- This stop strategy object will only consider a search to be complete
if a change in an objective function from one iteration to the next
is less than min_delta or more than max_iter iterations has been
executed.
!*/
template <typename T>
bool should_continue_search (
const T& x,
const double funct_value,
const T& funct_derivative
);
/*!
requires
- for some function f():
- funct_value == f(x)
- funct_derivative == derivative(f)(x)
ensures
- returns true if the point x doest not satisfy the stopping condition and
false otherwise.
!*/
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_OPTIMIZATIOn_STOP_STRATEGIES_ABSTRACT_
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment