Commit 50012d2c authored by Davis King's avatar Davis King

merged

parents 28da9a42 3e559e42
...@@ -456,6 +456,11 @@ namespace dlib ...@@ -456,6 +456,11 @@ namespace dlib
const matrix_exp<EXP2>& x_upper const matrix_exp<EXP2>& x_upper
) )
{ {
/*
The implementation of this function is more or less based on the discussion in
the paper Projected Newton-type Methods in Machine Learning by Mark Schmidt, et al.
*/
// make sure the requires clause is not violated // make sure the requires clause is not violated
COMPILE_TIME_ASSERT(is_matrix<T>::value); COMPILE_TIME_ASSERT(is_matrix<T>::value);
DLIB_ASSERT ( DLIB_ASSERT (
...@@ -490,6 +495,7 @@ namespace dlib ...@@ -490,6 +495,7 @@ namespace dlib
// active constraint. // active constraint.
const double gap_eps = 1e-8; const double gap_eps = 1e-8;
double last_alpha = 1;
while(stop_strategy.should_continue_search(x, f_value, g)) while(stop_strategy.should_continue_search(x, f_value, g))
{ {
s = search_strategy.get_next_direction(x, f_value, zero_bounded_variables(gap_eps, g, x, g, x_lower, x_upper)); s = search_strategy.get_next_direction(x, f_value, zero_bounded_variables(gap_eps, g, x, g, x_lower, x_upper));
...@@ -499,10 +505,19 @@ namespace dlib ...@@ -499,10 +505,19 @@ namespace dlib
make_line_search_function(clamp_function(f,x_lower,x_upper), x, s, f_value), make_line_search_function(clamp_function(f,x_lower,x_upper), x, s, f_value),
f_value, f_value,
dot(g,s), // compute gradient for the line search dot(g,s), // compute gradient for the line search
1, last_alpha,
search_strategy.get_wolfe_rho(), search_strategy.get_wolfe_rho(),
search_strategy.get_max_line_search_iterations()); search_strategy.get_max_line_search_iterations());
// Do a trust region style thing for alpha. The idea is that if we take a
// small step then we are likely to take another small step. So we reuse the
// alpha from the last iteration unless the line search didn't shrink alpha at
// all, in that case, we start with a bigger alpha next time.
if (alpha == last_alpha)
last_alpha = std::min(last_alpha*10,1.0);
else
last_alpha = alpha;
// Take the search step indicated by the above line search // Take the search step indicated by the above line search
x = clamp(x + alpha*s, x_lower, x_upper); x = clamp(x + alpha*s, x_lower, x_upper);
g = der(x); g = der(x);
...@@ -601,6 +616,7 @@ namespace dlib ...@@ -601,6 +616,7 @@ namespace dlib
// active constraint. // active constraint.
const double gap_eps = 1e-8; const double gap_eps = 1e-8;
double last_alpha = 1;
while(stop_strategy.should_continue_search(x, f_value, g)) while(stop_strategy.should_continue_search(x, f_value, g))
{ {
s = search_strategy.get_next_direction(x, f_value, zero_bounded_variables(gap_eps, g, x, g, x_lower, x_upper)); s = search_strategy.get_next_direction(x, f_value, zero_bounded_variables(gap_eps, g, x, g, x_lower, x_upper));
...@@ -610,10 +626,19 @@ namespace dlib ...@@ -610,10 +626,19 @@ namespace dlib
negate_function(make_line_search_function(clamp_function(f,x_lower,x_upper), x, s, f_value)), negate_function(make_line_search_function(clamp_function(f,x_lower,x_upper), x, s, f_value)),
f_value, f_value,
dot(g,s), // compute gradient for the line search dot(g,s), // compute gradient for the line search
1, last_alpha,
search_strategy.get_wolfe_rho(), search_strategy.get_wolfe_rho(),
search_strategy.get_max_line_search_iterations()); search_strategy.get_max_line_search_iterations());
// Do a trust region style thing for alpha. The idea is that if we take a
// small step then we are likely to take another small step. So we reuse the
// alpha from the last iteration unless the line search didn't shrink alpha at
// all, in that case, we start with a bigger alpha next time.
if (alpha == last_alpha)
last_alpha = std::min(last_alpha*10,1.0);
else
last_alpha = alpha;
// Take the search step indicated by the above line search // Take the search step indicated by the above line search
x = clamp(x + alpha*s, x_lower, x_upper); x = clamp(x + alpha*s, x_lower, x_upper);
g = -der(x); g = -der(x);
......
...@@ -183,6 +183,57 @@ namespace dlib ...@@ -183,6 +183,57 @@ namespace dlib
return put_in_range(0,1,alpha); return put_in_range(0,1,alpha);
} }
// ----------------------------------------------------------------------------------------
inline double poly_min_extrap (
double f0,
double d0,
double x1,
double f_x1,
double x2,
double f_x2
)
{
DLIB_ASSERT(0 < x1 && x1 < x2,"Invalid inputs were given to this function");
// The contents of this function follow the equations described on page 58 of the
// book Numerical Optimization by Nocedal and Wright, second edition.
matrix<double,2,2> m;
matrix<double,2,1> v;
const double aa2 = x2*x2;
const double aa1 = x1*x1;
m = aa2, -aa1,
-aa2*x2, aa1*x1;
v = f_x1 - f0 - d0*x1,
f_x2 - f0 - d0*x2;
double temp = aa2*aa1*(x1-x2);
// just take a guess if this happens
if (temp == 0)
{
return x1/2.0;
}
matrix<double,2,1> temp2;
temp2 = m*v/temp;
const double a = temp2(0);
const double b = temp2(1);
temp = b*b - 3*a*d0;
if (temp < 0 || a == 0)
{
// This is probably a line so just pick the lowest point
if (f0 < f_x2)
return 0;
else
return x2;
}
temp = (-b + std::sqrt(temp))/(3*a);
return put_in_range(0, x2, temp);
}
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
inline double lagrange_poly_min_extrap ( inline double lagrange_poly_min_extrap (
...@@ -447,11 +498,17 @@ namespace dlib ...@@ -447,11 +498,17 @@ namespace dlib
<< "\n\t max_iter: " << max_iter << "\n\t max_iter: " << max_iter
); );
// If the gradient is telling us we need to search backwards then that is what we // make sure alpha is going in the right direction. That is, it should be opposite
// will do. // the direction of the gradient.
if (d0 > 0 && alpha > 0) if ((d0 > 0 && alpha > 0) ||
(d0 < 0 && alpha < 0))
{
alpha *= -1; alpha *= -1;
}
bool have_prev_alpha = false;
double prev_alpha = 0;
double prev_val = 0;
unsigned long iter = 0; unsigned long iter = 0;
while (true) while (true)
{ {
...@@ -466,12 +523,26 @@ namespace dlib ...@@ -466,12 +523,26 @@ namespace dlib
// Interpolate a new alpha. We also make sure the step by which we // Interpolate a new alpha. We also make sure the step by which we
// reduce alpha is not super small. // reduce alpha is not super small.
double step; double step;
if (d0 < 0) if (!have_prev_alpha)
step = put_in_range(0.1,0.9, poly_min_extrap(f0, d0, val)); {
if (d0 < 0)
step = alpha*put_in_range(0.1,0.9, poly_min_extrap(f0, d0, val));
else
step = alpha*put_in_range(0.1,0.9, poly_min_extrap(f0, -d0, val));
have_prev_alpha = true;
}
else else
step = put_in_range(0.1,0.9, poly_min_extrap(f0, -d0, val)); {
if (d0 < 0)
step = put_in_range(0.1*alpha,0.9*alpha, poly_min_extrap(f0, d0, alpha, val, prev_alpha, prev_val));
else
step = put_in_range(0.1*alpha,0.9*alpha, -poly_min_extrap(f0, -d0, -alpha, val, -prev_alpha, prev_val));
}
prev_alpha = alpha;
prev_val = val;
alpha *= step; alpha = step;
} }
} }
} }
......
...@@ -119,6 +119,28 @@ namespace dlib ...@@ -119,6 +119,28 @@ namespace dlib
- returns the point in the range [0,1] that minimizes the polynomial c(x) - returns the point in the range [0,1] that minimizes the polynomial c(x)
!*/ !*/
// ----------------------------------------------------------------------------------------
inline double poly_min_extrap (
double f0,
double d0,
double x1,
double f_x1,
double x2,
double f_x2
)
/*!
requires
- 0 < x1 < x2
ensures
- let f(x) be a 3rd degree polynomial such that:
- f(0) == f0
- derivative of f(x) at x==0 is d0
- f(x1) == f_x1
- f(x2) == f_x2
- returns the point in the range [0,x2] that minimizes the polynomial f(x)
!*/
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
inline double lagrange_poly_min_extrap ( inline double lagrange_poly_min_extrap (
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment