Commit d69b8d71 authored by Davis King's avatar Davis King

Added versions of the find_min_* functions that don't take

a derivative function.

--HG--
extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402272
parent 9717c6ca
......@@ -56,7 +56,16 @@ namespace dlib
template <typename funct>
const central_differences<funct> derivative(const funct& f) { return central_differences<funct>(f); }
template <typename funct>
const central_differences<funct> derivative(const funct& f, double eps) { return central_differences<funct>(f,eps); }
const central_differences<funct> derivative(const funct& f, double eps)
{
DLIB_ASSERT (
eps > 0,
"\tcentral_differences derivative(f,eps)"
<< "\n\tYou must give an epsilon > 0"
<< "\n\teps: " << eps
);
return central_differences<funct>(f,eps);
}
// ----------------------------------------------------------------------------------------
......@@ -484,6 +493,139 @@ namespace dlib
}
// ----------------------------------------------------------------------------------------
template <
typename funct,
typename T
>
void find_min_quasi_newton (
const funct& f,
T& x,
double min_f,
double min_delta = 1e-7,
double derivative_eps = 1e-7
)
{
COMPILE_TIME_ASSERT(is_matrix<T>::value);
DLIB_ASSERT (
min_delta >= 0 && x.nc() == 1,
"\tdouble find_min_quasi_newton()"
<< "\n\tYou have to supply column vectors to this function"
<< "\n\tmin_delta: " << min_delta
<< "\n\tx.nc(): " << x.nc()
<< "\n\tderivative_eps: " << derivative_eps
);
T g, g2, s, Hg, gH;
double alpha = 10;
matrix<double,T::NR,T::NR> H(x.nr(),x.nr());
T delta, gamma;
H = identity_matrix<double>(H.nr());
g = derivative(f,derivative_eps)(x);
double f_value = min_f - 1;
double old_f_value = 0;
// loop until the derivative is almost zero
while(std::abs(old_f_value - f_value) > min_delta)
{
old_f_value = f_value;
s = -H*g;
alpha = line_search(
make_line_search_function(f,x,s),
derivative(make_line_search_function(f,x,s),derivative_eps),
0.01, 0.9,min_f, f_value);
x += alpha*s;
g2 = derivative(f,derivative_eps)(x);
// update H with the BFGS formula from (3.2.12) on page 55 of Fletcher
delta = alpha*s;
gamma = g2-g;
Hg = H*gamma;
gH = trans(trans(gamma)*H);
double gHg = trans(gamma)*H*gamma;
double dg = trans(delta)*gamma;
if (gHg < std::numeric_limits<double>::infinity() && dg < std::numeric_limits<double>::infinity() &&
dg != 0)
{
H += (1 + gHg/dg)*delta*trans(delta)/(dg) - (delta*trans(gH) + Hg*trans(delta))/(dg);
}
else
{
H = identity_matrix<double>(H.nr());
}
g.swap(g2);
}
}
// ----------------------------------------------------------------------------------------
template <
typename funct,
typename T
>
void find_min_conjugate_gradient (
const funct& f,
T& x,
double min_f,
double min_delta = 1e-7,
double derivative_eps = 1e-7
)
{
COMPILE_TIME_ASSERT(is_matrix<T>::value);
DLIB_ASSERT (
min_delta >= 0 && x.nc() == 1 && derivative_eps > 0,
"\tdouble find_min_conjugate_gradient()"
<< "\n\tYou have to supply column vectors to this function"
<< "\n\tmin_delta: " << min_delta
<< "\n\tx.nc(): " << x.nc()
<< "\n\tderivative_eps: " << derivative_eps
);
T g, g2, s;
double alpha = 0;
g = derivative(f,derivative_eps)(x);
s = -g;
double f_value = min_f - 1;
double old_f_value = 0;
// loop until the derivative is almost zero
while(std::abs(old_f_value - f_value) > min_delta)
{
old_f_value = f_value;
alpha = line_search(
make_line_search_function(f,x,s),
derivative(make_line_search_function(f,x,s),derivative_eps),
0.001, 0.010,min_f, f_value);
x += alpha*s;
g2 = derivative(f,derivative_eps)(x);
// Use the Polak-Ribiere (4.1.12) conjugate gradient described by Fletcher on page 83
double b = trans(g2-g)*g2/(trans(g)*g);
s = -g2 + b*s;
g.swap(g2);
}
}
// ----------------------------------------------------------------------------------------
}
......
......@@ -38,6 +38,7 @@ namespace dlib
requires
- f == a function that returns a scalar
- f must take either double or a dlib::matrix that is a column vector
- eps > 0
ensures
- returns a function that represents the derivative of the function f. It
is approximated numerically by:
......@@ -142,6 +143,8 @@ namespace dlib
- f0_out == f(0)
!*/
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template <
......@@ -173,6 +176,39 @@ namespace dlib
- #x == the value of x that was found to minimize f()
!*/
// ----------------------------------------------------------------------------------------
template <
typename funct,
typename T
>
void find_min_quasi_newton (
const funct& f,
T& x,
double min_f,
double min_delta = 1e-7,
const double derivative_eps = 1e-7
);
/*!
requires
- min_delta >= 0
- derivative_eps > 0
- f(x) must be a valid expression that evaluates to a double
- is_matrix<T>::value == true (i.e. T must be a dlib::matrix type)
- x.nc() == 1 (i.e. x must be a column vector)
ensures
- Performs an unconstrained minimization of the function f() using a
quasi newton method. The optimization stops when any of the following
conditions are satisfied:
- the change in f() from one iteration to the next is less than min_delta
- f(#x) <= min_f
- Uses the dlib::derivative(f,derivative_eps) function to compute gradient
information
- #x == the value of x that was found to minimize f()
!*/
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template <
......@@ -204,6 +240,39 @@ namespace dlib
- #x == the value of x that was found to minimize f()
!*/
// ----------------------------------------------------------------------------------------
template <
typename funct,
typename T
>
void find_min_conjugate_gradient (
const funct& f,
T& x,
double min_f,
double min_delta = 1e-7,
const double derivative_eps = 1e-7
);
/*!
requires
- min_delta >= 0
- derivative_eps > 0
- f(x) must be a valid expression that evaluates to a double
- der(x) must be a valid expression that evaluates to the derivative of
f() at x.
- is_matrix<T>::value == true (i.e. T must be a dlib::matrix type)
- x.nc() == 1 (i.e. x must be a column vector)
ensures
- Performs an unconstrained minimization of the function f() using a
conjugate gradient method. The optimization stops when any of the following
conditions are satisfied:
- the change in f() from one iteration to the next is less than min_delta
- f(#x) <= min_f
- Uses the dlib::derivative(f,derivative_eps) function to compute gradient
information
- #x == the value of x that was found to minimize f()
!*/
// ----------------------------------------------------------------------------------------
}
......
......@@ -149,6 +149,18 @@ namespace
find_min_conjugate_gradient(apq<T>, derivative(apq<T>), x, minf, eps);
DLIB_CASSERT(dlib::equal(x,opt, 1e-5),opt-x);
dlog << LINFO << "find_min_conjugate_gradient got apq/noder in " << total_count;
total_count = 0;
x = p;
find_min_quasi_newton(apq<T>, x, minf, eps);
DLIB_CASSERT(dlib::equal(x,opt, 1e-5),opt-x);
dlog << LINFO << "find_min_quasi_newton got apq/noder2 in " << total_count;
total_count = 0;
x = p;
find_min_conjugate_gradient(apq<T>, x, minf, eps);
DLIB_CASSERT(dlib::equal(x,opt, 1e-5),opt-x);
dlog << LINFO << "find_min_conjugate_gradient got apq/noder2 in " << total_count;
}
void test_powell (
......@@ -174,6 +186,18 @@ namespace
find_min_conjugate_gradient(powell, derivative(powell,1e-10), x, minf, eps);
DLIB_CASSERT(dlib::equal(x,opt, 1e-2),opt-x);
dlog << LINFO << "find_min_conjugate_gradient got powell/noder in " << total_count;
total_count = 0;
x = p;
find_min_quasi_newton(powell, x, minf, eps);
DLIB_CASSERT(dlib::equal(x,opt, 1e-2),opt-x);
dlog << LINFO << "find_min_quasi_newton got powell/noder2 in " << total_count;
total_count = 0;
x = p;
find_min_conjugate_gradient(powell, x, minf, eps);
DLIB_CASSERT(dlib::equal(x,opt, 1e-2),opt-x);
dlog << LINFO << "find_min_conjugate_gradient got powell/noder2 in " << total_count;
}
......@@ -213,6 +237,18 @@ namespace
find_min_conjugate_gradient(simple, derivative(simple), x, minf, eps);
DLIB_CASSERT(dlib::equal(x,opt, 1e-5),opt-x);
dlog << LINFO << "find_min_conjugate_gradient got simple/noder in " << total_count;
total_count = 0;
x = p;
find_min_quasi_newton(simple, x, minf, eps);
DLIB_CASSERT(dlib::equal(x,opt, 1e-5),opt-x);
dlog << LINFO << "find_min_quasi_newton got simple/noder2 in " << total_count;
total_count = 0;
x = p;
find_min_conjugate_gradient(simple, x, minf, eps);
DLIB_CASSERT(dlib::equal(x,opt, 1e-5),opt-x);
dlog << LINFO << "find_min_conjugate_gradient got simple/noder2 in " << total_count;
}
......@@ -243,14 +279,28 @@ namespace
total_count = 0;
x = p;
find_min_quasi_newton(rosen, derivative(rosen), x, minf, eps);
DLIB_CASSERT(dlib::equal(x,opt, 1e-5),opt-x);
DLIB_CASSERT(dlib::equal(x,opt, 1e-4),opt-x);
dlog << LINFO << "find_min_quasi_newton got rosen/noder in " << total_count;
total_count = 0;
x = p;
find_min_conjugate_gradient(rosen, derivative(rosen), x, minf, eps);
DLIB_CASSERT(dlib::equal(x,opt, 1e-5),opt-x);
DLIB_CASSERT(dlib::equal(x,opt, 1e-4),opt-x);
dlog << LINFO << "find_min_conjugate_gradient got rosen/noder in " << total_count;
/* This test fails
total_count = 0;
x = p;
find_min_quasi_newton(rosen, x, minf, eps, 1e-13);
DLIB_CASSERT(dlib::equal(x,opt, 1e-2),opt-x);
dlog << LINFO << "find_min_quasi_newton got rosen/noder2 in " << total_count;
*/
total_count = 0;
x = p;
find_min_conjugate_gradient(rosen, x, minf, eps, 1e-11);
DLIB_CASSERT(dlib::equal(x,opt, 1e-4),opt-x);
dlog << LINFO << "find_min_conjugate_gradient got rosen/noder2 in " << total_count;
}
// ----------------------------------------------------------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment