Added a .add() to upper_bound_function so that the upper bound can be quickly

updated without needing to resolve the whole QP.

Added a .add() to upper_bound_function so that the upper bound can be quickly
updated without needing to resolve the whole QP.
5529ddfb · Davis King · 7e39a527 · 5529ddfb · 5529ddfb · 5529ddfb
Commit 5529ddfb authored Nov 18, 2017 by Davis King
3 changed files
--- a/dlib/global_optimization/upper_bound_function.h
+++ b/dlib/global_optimization/upper_bound_function.h
@@ -33,21 +33,114 @@ namespace dlib
        upper_bound_function(
        ) = default;
+        upper_bound_function(
+            const double relative_noise_magnitude,
+            const double solver_eps 
+        ) : relative_noise_magnitude(relative_noise_magnitude), solver_eps(solver_eps)
+        {
+            DLIB_CASSERT(relative_noise_magnitude >= 0);
+            DLIB_CASSERT(solver_eps > 0);
+        }
        explicit upper_bound_function(
            const std::vector<function_evaluation>& _points,
            const double relative_noise_magnitude = 0.001,
            const double solver_eps = 0.0001
-        ) : points(_points)
+        ) : relative_noise_magnitude(relative_noise_magnitude), solver_eps(solver_eps), points(_points)
        {
-            DLIB_CASSERT(points.size() > 1);
-            DLIB_CASSERT(points[0].x.size() > 0, "The vectors can't be empty.");
            DLIB_CASSERT(relative_noise_magnitude >= 0);
            DLIB_CASSERT(solver_eps > 0);
+            if (points.size() > 1)
+            {
+                DLIB_CASSERT(points[0].x.size() > 0, "The vectors can't be empty.");
+                const long dims = points[0].x.size();
+                for (auto& p : points)
+                    DLIB_CASSERT(p.x.size() == dims, "All the vectors given to upper_bound_function must have the same dimensionality.");
+                learn_params();
+            }
+        }
+        void add (
+            const function_evaluation& point
+        )
+        {
+            DLIB_CASSERT(point.x.size() != 0, "The vectors can't be empty.");
+            if (points.size() == 0)
+            {
+                points.push_back(point);
+                return;
+            }
+            DLIB_CASSERT(point.x.size() == dimensionality(), "All the vectors given to upper_bound_function must have the same dimensionality.");
+            if (points.size() < 4)
+            {
+                points.push_back(point);
+                *this = upper_bound_function(points, relative_noise_magnitude, solver_eps);
+                return;
+            }
+            points.push_back(point);
+            // add constraints between the new point and the old points
+            for (size_t i = 0; i < points.size()-1; ++i)
+                active_constraints.push_back(std::make_pair(i,points.size()-1));
+            learn_params();
+        }
+        long num_points(
+        ) const 
+        { 
+            return points.size(); 
+        }
+        long dimensionality(
+        ) const
+        { 
+            if (points.size() == 0)
+                return 0;
+            else
+                return points[0].x.size();
+        }
+        const std::vector<function_evaluation>& get_points(
+        ) const 
+        { 
+            return points; 
+        }
+        double operator() (
+            const matrix<double,0,1>& x
+        ) const
+        {
+            DLIB_CASSERT(num_points() > 0);
+            DLIB_CASSERT(x.size() == dimensionality());
+            double upper_bound = std::numeric_limits<double>::infinity();
+            for (size_t i = 0; i < points.size(); ++i)
+            {
+                const double local_bound = points[i].y + std::sqrt(offsets[i] + dot(slopes, squared(x-points[i].x)));
+                upper_bound = std::min(upper_bound, local_bound);
+            }
+            return upper_bound;
+        }
+    private:
+        void learn_params (
+        )
+        {
            const long dims = points[0].x.size();
-            for (auto& p : points)
-                DLIB_CASSERT(p.x.size() == dims, "All the vectors given to upper_bound_function must have the same dimensionality.");
+            using sample_type = std::vector<std::pair<size_t,double>>;
            using sample_type = std::vector<std::pair<size_t,double>>;
            using kernel_type = sparse_linear_kernel<sample_type>;
@@ -65,8 +158,6 @@ namespace dlib
                y_rs.add(v.y);
            }
-            x.reserve(points.size()*(points.size()-1)/2);
-            y.reserve(points.size()*(points.size()-1)/2);
            // compute normalization vectors for the data.  The only reason we do this is
            // to make the optimization well conditioned.  In particular, scaling the y
@@ -80,31 +171,47 @@ namespace dlib
            for (size_t i = 0; i < xscale.size(); ++i)
                xscale[i] = 1.0/(x_rs[i].stddev()*yscale); // make it so that xscale[i]*yscale ==  1/x_rs[i].stddev()
            sample_type samp;
-            for (size_t i = 0; i < points.size(); ++i)
+            auto add_constraint = [&](long i, long j) {
+                samp.clear();
+                for (long k = 0; k < dims; ++k)
+                {
+                    double temp = (points[i].x(k) - points[j].x(k))*xscale[k]*yscale;
+                    samp.push_back(std::make_pair(k, temp*temp));
+                }
+                if (points[i].y > points[j].y)
+                    samp.push_back(std::make_pair(dims + j, relative_noise_magnitude));
+                else
+                    samp.push_back(std::make_pair(dims + i, relative_noise_magnitude));
+                const double diff = (points[i].y - points[j].y)*yscale;
+                samp.push_back(std::make_pair(dims + points.size(), 1-diff*diff));
+                x.push_back(samp);
+                y.push_back(1);
+            };
+            if (active_constraints.size() == 0)
            {
-                for (size_t j = i+1; j < points.size(); ++j)
+                x.reserve(points.size()*(points.size()-1)/2);
+                y.reserve(points.size()*(points.size()-1)/2);
+                for (size_t i = 0; i < points.size(); ++i)
                {
-                    samp.clear();
+                    for (size_t j = i+1; j < points.size(); ++j)
-                    for (long k = 0; k < dims; ++k)
                    {
-                        double temp = (points[i].x(k) - points[j].x(k))*xscale[k]*yscale;
+                        add_constraint(i,j);
-                        samp.push_back(std::make_pair(k, temp*temp));
                    }
+                }
+            }
+            else
+            {
+                for (auto& p : active_constraints)
+                    add_constraint(p.first, p.second);
+            }
-                    if (points[i].y > points[j].y)
-                        samp.push_back(std::make_pair(dims + j, relative_noise_magnitude));
-                    else
-                        samp.push_back(std::make_pair(dims + i, relative_noise_magnitude));
-                    const double diff = (points[i].y - points[j].y)*yscale;
-                    samp.push_back(std::make_pair(dims + points.size(), 1-diff*diff));
-                    x.push_back(samp);
-                    y.push_back(1);
-                }
-            }
            svm_c_linear_dcd_trainer<kernel_type> trainer;
            trainer.set_c(std::numeric_limits<double>::infinity());
@@ -112,83 +219,60 @@ namespace dlib
            trainer.force_last_weight_to_1(true);
            trainer.set_epsilon(solver_eps);
-            auto df = trainer.train(x,y);
+            svm_c_linear_dcd_trainer<kernel_type>::optimizer_state state;
+            auto df = trainer.train(x,y, state);
-            const auto& bv = df.basis_vectors(0);
+            // save the active constraints for later so we can use them inside add() to add
-            slopes.set_size(dims);
+            // new points efficiently.
-            for (long i = 0; i < dims; ++i)
+            if (active_constraints.size() == 0)
-                slopes(i) = bv[i].second*xscale[i]*xscale[i];
-            //cout << "slopes:" << trans(slopes);
-            offsets.resize(points.size());
-            auto s = x.begin();
-            for (size_t i = 0; i < points.size(); ++i)
            {
-                for (size_t j = i+1; j < points.size(); ++j)
+                long k = 0;
+                for (size_t i = 0; i < points.size(); ++i)
                {
-                    double val = df(*s);
+                    for (size_t j = i+1; j < points.size(); ++j)
-                    // If the constraint wasn't exactly satisfied then we need to adjust
-                    // the offsets so that it is satisfied.  So we check for that here
-                    if (points[i].y > points[j].y)
-                    {
-                        if (val + offsets[j] < 1)
-                            offsets[j] = 1-val;
-                    }
-                    else
                    {
-                        if (val + offsets[i] < 1)
+                        if (state.get_alpha()[k++] != 0)
-                            offsets[i] = 1-val;
+                            active_constraints.push_back(std::make_pair(i,j));
                    }
-                    ++s;
                }
            }
+            else
-            for (size_t i = 0; i < points.size(); ++i)
            {
-                offsets[i] += bv[slopes.size()+i].second*relative_noise_magnitude;
+                DLIB_CASSERT(state.get_alpha().size() == active_constraints.size());
+                new_active_constraints.clear();
+                for (size_t i = 0; i < state.get_alpha().size(); ++i)
+                {
+                    if (state.get_alpha()[i] != 0)
+                        new_active_constraints.push_back(active_constraints[i]);
+                }
+                active_constraints.swap(new_active_constraints);
            }
-        }
-        long num_points(
+            //std::cout << "points.size(): " << points.size() << std::endl;
-        ) const 
+            //std::cout << "active_constraints.size(): " << active_constraints.size() << std::endl;
-        { 
-            return points.size(); 
-        }
-        long dimensionality(
-        ) const
-        { 
-            if (points.size() == 0)
-                return 0;
-            else
-                return points[0].x.size();
-        }
-        double operator() (
+            const auto& bv = df.basis_vectors(0);
-            matrix<double,0,1> x
+            slopes.set_size(dims);
-        ) const
+            for (long i = 0; i < dims; ++i)
-        {
+                slopes(i) = bv[i].second*xscale[i]*xscale[i];
-            DLIB_CASSERT(num_points() > 0);
-            DLIB_CASSERT(x.size() == dimensionality());
+            //std::cout << "slopes:" << trans(slopes);
+            offsets.assign(points.size(),0);
-            double upper_bound = std::numeric_limits<double>::infinity();
            for (size_t i = 0; i < points.size(); ++i)
            {
-                const double local_bound = points[i].y + std::sqrt(offsets[i] + dot(slopes, squared(x-points[i].x)));
+                offsets[i] += bv[slopes.size()+i].second*relative_noise_magnitude;
-                upper_bound = std::min(upper_bound, local_bound);
            }
-            return upper_bound;
        }
-    private:
+        double relative_noise_magnitude = 0.001;
+        double solver_eps = 0.0001; 
+        std::vector<std::pair<size_t,size_t>> active_constraints, new_active_constraints;
        std::vector<function_evaluation> points;
        std::vector<double> offsets; // offsets.size() == points.size()

--- a/dlib/global_optimization/upper_bound_function_abstract.h
+++ b/dlib/global_optimization/upper_bound_function_abstract.h
@@ -104,8 +104,8 @@ namespace dlib
        );
        /*!
            requires
-                - points.size() > 1
                - all the x vectors in points must have the same non-zero dimensionality.
+                - relative_noise_magnitude >= 0
                - solver_eps > 0
            ensures
                - Creates an upper bounding function U(x), as described above, assuming that
@@ -116,16 +116,67 @@ namespace dlib
                  only do this if you know F(x) is non-stochastic and continuous
                  everywhere.
                - When solving the QP used to find the parameters of U(x), the upper
-                  bounding function, we solve the QP to solver_eps accuracy.
+                  bounding function, we solve the QP to solver_eps accuracy.   It's
+                  possible that large enough solver_eps can lead to upper bounds that don't
+                  upper bound all the supplied points.  But for reasonable epsilon values
+                  this shouldn't be a problem. 
                - #num_points() == points.size()
                - #dimensionality() == points[0].x.size()
        !*/
+        upper_bound_function(
+            const double relative_noise_magnitude,
+            const double solver_eps 
+        );
+        /*!
+            requires
+                - relative_noise_magnitude >= 0
+                - solver_eps > 0
+            ensures
+                - #num_points() == 0
+                - #dimensionality() == 0
+                - This destructor is the same as calling the above constructor with points.size()==0
+        !*/
+        void add (
+            const function_evaluation& point
+        );
+        /*!
+            requires
+                - num_points() == 0 || point.x.size() == dimensionality()
+                - point.x.size() != 0
+            ensures
+                - Adds point to get_points().
+                - Incrementally updates the upper bounding function with the given function
+                  evaluation.  That is, we assume that F(point.x)==point.y and solve the QP
+                  described above to find the new U(x) that upper bounds all the points
+                  this object knows about (i.e. all the points in get_points() and the new point).
+                - Calling add() is much faster than recreating the upper_bound_function
+                  from scratch with all the points.  This is because we warm start with the
+                  previous solution to the QP.  This is done by discarding any non-active
+                  constraints and solving the QP again with only the previously active
+                  constraints and the new constraints formed by all the pairs of the new
+                  point and the old points.  This means the QP solved by add() is much
+                  smaller than the QP that would be solved by a fresh call to the
+                  upper_bound_function constructor.
+        !*/
+        const std::vector<function_evaluation>& get_points(
+        ) const;
+        /*!
+            ensures
+                - returns the points from F(x) used to define this upper bounding function.
+                  These are all the function_evaluation objects given to this object via
+                  its constructor and add().
+        !*/
        long num_points(
        ) const;
        /*!
            ensures
                - returns the number of points used to define the upper bounding function.
+                  (i.e. returns get_points().size())
        !*/
        long dimensionality(
@@ -136,7 +187,7 @@ namespace dlib
        !*/
        double operator() (
-            matrix<double,0,1> x
+            const matrix<double,0,1>& x
        ) const;
        /*!
            requires

--- a/dlib/test/global_optimization.cpp
+++ b/dlib/test/global_optimization.cpp
@@ -38,13 +38,29 @@ namespace
        std::vector<function_evaluation> evals;
-        for (int i = 0; i < 200; ++i)
+        for (int i = 0; i < 100; ++i)
        {
            auto x = make_rnd();
            evals.emplace_back(x,rosen(x));
        }
        upper_bound_function ub(evals, relative_noise_magnitude, solver_eps);
+        DLIB_TEST(ub.num_points() == (long)evals.size());
+        DLIB_TEST(ub.dimensionality() == 2);
+        for (auto& ev : evals)
+        {
+            dlog << LINFO << ub(ev.x) - ev.y;
+            DLIB_TEST_MSG(ub(ev.x) - ev.y > -1e10, ub(ev.x) - ev.y);
+        }
+        for (int i = 0; i < 100; ++i)
+        {
+            auto x = make_rnd();
+            evals.emplace_back(x,rosen(x));
+            ub.add(evals.back());
+        }
        DLIB_TEST(ub.num_points() == (long)evals.size());
        DLIB_TEST(ub.dimensionality() == 2);