Added find_max_box_constrained()

103884fc · Davis King · 5494e540 · 103884fc · 103884fc · 103884fc
Commit 103884fc authored Sep 21, 2013 by Davis King
Showing with 224 additions and 3 deletions

optimization.h dlib/optimization/optimization.h +86 -0

optimization_abstract.h dlib/optimization/optimization_abstract.h +58 -0

optimization.cpp dlib/test/optimization.cpp +80 -3

No files found.
--- a/dlib/optimization/optimization.h
+++ b/dlib/optimization/optimization.h
@@ -565,6 +565,92 @@ namespace dlib
        return f_value;
    }

+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename search_strategy_type,
+        typename stop_strategy_type,
+        typename funct, 
+        typename funct_der, 
+        typename T,
+        typename EXP1,
+        typename EXP2
+        >
+    double find_max_box_constrained (
+        search_strategy_type search_strategy,
+        stop_strategy_type stop_strategy,
+        const funct& f, 
+        const funct_der& der, 
+        T& x,
+        const matrix_exp<EXP1>& x_lower,
+        const matrix_exp<EXP2>& x_upper
+    )
+    {
+        // make sure the requires clause is not violated
+        COMPILE_TIME_ASSERT(is_matrix<T>::value);
+        DLIB_ASSERT (
+            is_col_vector(x) && is_col_vector(x_lower) && is_col_vector(x_upper) &&
+            x.size() == x_lower.size() && x.size() == x_upper.size(),
+            "\tdouble find_max_box_constrained()"
+            << "\n\t The inputs to this function must be equal length column vectors."
+            << "\n\t is_col_vector(x):       " << is_col_vector(x)
+            << "\n\t is_col_vector(x_upper): " << is_col_vector(x_upper)
+            << "\n\t is_col_vector(x_upper): " << is_col_vector(x_upper)
+            << "\n\t x.size():               " << x.size()
+            << "\n\t x_lower.size():         " << x_lower.size()
+            << "\n\t x_upper.size():         " << x_upper.size()
+        );
+        DLIB_ASSERT (
+            min(x_upper-x_lower) > 0,
+            "\tdouble find_max_box_constrained()"
+            << "\n\t You have to supply proper box constraints to this function."
+            << "\n\r min(x_upper-x_lower): " << min(x_upper-x_lower)
+        );
+
+        // This function is basically just a copy of find_min_box_constrained() but with - put 
+        // in the right places to flip things around so that it ends up looking for the max
+        // rather than the min.
+
+        T g, s;
+        double f_value = -f(x);
+        g = -der(x);
+
+        DLIB_ASSERT(is_finite(f_value), "The objective function generated non-finite outputs");
+        DLIB_ASSERT(is_finite(g), "The objective function generated non-finite outputs");
+
+        // gap_eps determines how close we have to get to a bound constraint before we
+        // start basically dropping it from the optimization and consider it to be an
+        // active constraint.
+        const double gap_eps = 1e-8;
+
+        while(stop_strategy.should_continue_search(x, f_value, g))
+        {
+            s = search_strategy.get_next_direction(x, f_value, zero_bounded_variables(gap_eps, g, x, g, x_lower, x_upper));
+            s = gap_step_assign_bounded_variables(gap_eps, s, x, g, x_lower, x_upper);
+
+            double alpha = backtracking_line_search(
+                        negate_function(make_line_search_function(clamp_function(f,x_lower,x_upper), x, s, f_value)),
+                        f_value,
+                        dot(g,s), // compute gradient for the line search
+                        1, 
+                        search_strategy.get_wolfe_rho(), 
+                        search_strategy.get_max_line_search_iterations());
+
+            // Take the search step indicated by the above line search
+            x = clamp(x + alpha*s, x_lower, x_upper);
+            g = -der(x);
+
+            // Don't forget to negate the output from the line search since it is  from the
+            // unnegated version of f() 
+            f_value *= -1;
+
+            DLIB_ASSERT(is_finite(f_value), "The objective function generated non-finite outputs");
+            DLIB_ASSERT(is_finite(g), "The objective function generated non-finite outputs");
+        }
+
+        return -f_value;
+    }
+
 // ----------------------------------------------------------------------------------------

 }

--- a/dlib/optimization/optimization_abstract.h
+++ b/dlib/optimization/optimization_abstract.h
@@ -332,6 +332,64 @@ namespace dlib
              the box constraints defined by x_lower and x_upper.
    !*/

+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename search_strategy_type,
+        typename stop_strategy_type,
+        typename funct, 
+        typename funct_der, 
+        typename T,
+        typename EXP1,
+        typename EXP2
+        >
+    double find_max_box_constrained (
+        search_strategy_type search_strategy,
+        stop_strategy_type stop_strategy,
+        const funct& f, 
+        const funct_der& der, 
+        T& x,
+        const matrix_exp<EXP1>& x_lower,
+        const matrix_exp<EXP2>& x_upper
+    );
+    /*!
+        requires
+            - search_strategy == an object that defines a search strategy such as one 
+              of the objects from dlib/optimization/optimization_search_strategies_abstract.h
+            - stop_strategy == an object that defines a stop strategy such as one of 
+              the objects from dlib/optimization/optimization_stop_strategies_abstract.h
+            - f(x) must be a valid expression that evaluates to a double
+            - der(x) must be a valid expression that evaluates to the derivative of f() at x.
+            - is_col_vector(x) == true
+            - is_col_vector(x_lower) == true
+            - is_col_vector(x_upper) == true
+            - x.size() == x_lower.size() == x_upper.size()
+              (i.e. x, x_lower, and x_upper need to all be column vectors of the same dimensionality)
+            - min(x_upper-x_lower) > 0
+              (i.e. x_upper must contain upper bounds relative to x_lower)
+        ensures
+            - Performs a box constrained maximization of the function f() using the given
+              search_strategy and starting from the initial point x.  That is, we try to
+              find the x value that maximizes f(x) but is also within the box constraints 
+              specified by x_lower and x_upper.  That is, we ensure that #x satisfies: 
+                - min(#x - x_lower) >= 0 && min(x_upper - #x) >= 0
+            - This function uses a backtracking line search along with a gradient projection
+              to handle the box constraints.
+            - The function is optimized until stop_strategy decides that an acceptable
+              point has been found. 
+            - #x == the value of x that was found to maximize f() within the given box
+              constraints.
+            - returns f(#x). 
+            - When calling f() and der(), the input passed to them will always be inside
+              the box constraints defined by x_lower and x_upper.
+            - Note that this function solves the maximization problem by converting it 
+              into a minimization problem.  Therefore, the values of f and its derivative
+              reported to the stopping strategy will be negated.  That is, stop_strategy
+              will see -f() and -der().  All this really means is that the status messages
+              from a stopping strategy in verbose mode will display a negated objective
+              value.
+    !*/
+
 }

 #endif // DLIB_OPTIMIZATIOn_ABSTRACT_

--- a/dlib/test/optimization.cpp
+++ b/dlib/test/optimization.cpp
@@ -943,6 +943,71 @@ namespace
        return unorm;
    }

+    template <typename der_funct, typename T>
+    double unconstrained_gradient_magnitude_neg_funct (
+        const der_funct& grad,
+        const T& x,
+        const T& lower,
+        const T& upper
+    )
+    {
+        T g = grad(x);
+
+        double unorm = 0;
+
+        for (long i = 0; i < g.size(); ++i)
+        {
+            if (lower(i) < x(i) && x(i) < upper(i))
+                unorm += g(i)*g(i);
+            else if (x(i) == lower(i) && g(i) > 0)
+                unorm += g(i)*g(i);
+            else if (x(i) == upper(i) && g(i) < 0)
+                unorm += g(i)*g(i);
+        }
+
+        return unorm;
+    }
+
+    template <typename search_strategy_type>
+    double test_bound_solver_neg_rosen (dlib::rand& rnd, search_strategy_type search_strategy)
+    {
+        using namespace dlib::test_functions;
+        print_spinner();
+        matrix<double,2,1> starting_point, lower, upper, x;
+
+
+        // pick random bounds
+        lower = rnd.get_random_gaussian()+1, rnd.get_random_gaussian()+1;
+        upper = rnd.get_random_gaussian()+1, rnd.get_random_gaussian()+1;
+        while (upper(0) < lower(0)) upper(0) = rnd.get_random_gaussian()+1;
+        while (upper(1) < lower(1)) upper(1) = rnd.get_random_gaussian()+1;
+
+        starting_point = rnd.get_random_double()*(upper(0)-lower(0))+lower(0), 
+                       rnd.get_random_double()*(upper(1)-lower(1))+lower(1);
+
+        dlog << LINFO << "lower: "<< trans(lower);
+        dlog << LINFO << "upper: "<< trans(upper);
+        dlog << LINFO << "starting: "<< trans(starting_point);
+
+        x = starting_point;
+        double val = find_max_box_constrained( 
+            search_strategy,
+            objective_delta_stop_strategy(1e-16, 500), 
+            neg_rosen, der_neg_rosen, x,
+            lower,  
+            upper   
+        );
+
+        DLIB_TEST(std::abs(val - neg_rosen(x)) < 1e-14);
+        dlog << LINFO << "neg_rosen solution:\n" << x;
+
+        dlog << LINFO << "neg_rosen gradient: "<< trans(der_neg_rosen(x));
+        const double gradient_residual = unconstrained_gradient_magnitude_neg_funct(der_neg_rosen, x, lower, upper);
+        dlog << LINFO << "gradient_residual: "<< gradient_residual;
+
+        return gradient_residual;
+    }
+
    template <typename search_strategy_type>
    double test_bound_solver_rosen (dlib::rand& rnd, search_strategy_type search_strategy)
    {
@@ -965,7 +1030,7 @@ namespace
        dlog << LINFO << "starting: "<< trans(starting_point);

        x = starting_point;
-        find_min_box_constrained( 
+        double val = find_min_box_constrained( 
            search_strategy,
            objective_delta_stop_strategy(1e-16, 500), 
            rosen, der_rosen, x,
@@ -973,6 +1038,7 @@ namespace
            upper   
        );

+        DLIB_TEST(std::abs(val - rosen(x)) < 1e-14);
        dlog << LINFO << "rosen solution:\n" << x;

        dlog << LINFO << "rosen gradient: "<< trans(der_rosen(x));
@@ -1012,7 +1078,7 @@ namespace
        dlog << LINFO << "starting: "<< trans(starting_point);

        x = starting_point;
-        find_min_box_constrained( 
+        double val = find_min_box_constrained( 
            search_strategy,
            objective_delta_stop_strategy(1e-16, 500), 
            brown, brown_derivative, x,
@@ -1020,6 +1086,7 @@ namespace
            upper   
        );

+        DLIB_TEST(std::abs(val - brown(x)) < 1e-14);
        dlog << LINFO << "brown solution:\n" << x;
        return unconstrained_gradient_magnitude(brown_derivative, x, lower, upper);
    }
@@ -1027,10 +1094,10 @@ namespace
    template <typename search_strategy_type>
    void test_box_constrained_optimizers(search_strategy_type search_strategy)
    {
-        dlog << LINFO << "test find_min_box_constrained() on rosen";
        dlib::rand rnd;
        running_stats<double> rs;

+        dlog << LINFO << "test find_min_box_constrained() on rosen";
        for (int i = 0; i < 1000; ++i)
            rs.add(test_bound_solver_rosen(rnd, search_strategy));
        dlog << LINFO << "mean rosen gradient: " << rs.mean();
@@ -1046,6 +1113,16 @@ namespace
        dlog << LINFO << "max brown gradient:  " << rs.max();
        DLIB_TEST(rs.mean() < 1e-6);
        DLIB_TEST(rs.max() < 1e-4);
+
+        dlog << LINFO << "test find_max_box_constrained() on neg_rosen";
+        rs.clear();
+        for (int i = 0; i < 1000; ++i)
+            rs.add(test_bound_solver_neg_rosen(rnd, search_strategy));
+        dlog << LINFO << "mean neg_rosen gradient: " << rs.mean();
+        dlog << LINFO << "max neg_rosen gradient:  " << rs.max();
+        DLIB_TEST(rs.mean() < 1e-12);
+        DLIB_TEST(rs.max() < 1e-9);
+
    }

    void test_poly_min_extract_2nd()