Commit 1e90fc6d authored by Davis King's avatar Davis King

Added upper_bound_function object.

parent 1bfc31de
// Copyright (C) 2017 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_GLOBAL_OPTIMIZATIOn_HEADER
#define DLIB_GLOBAL_OPTIMIZATIOn_HEADER
#include "global_optimization/upper_bound_function.h"
#endif // DLIB_GLOBAL_OPTIMIZATIOn_HEADER
// Copyright (C) 2017 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_UPPER_bOUND_FUNCTION_Hh_
#define DLIB_UPPER_bOUND_FUNCTION_Hh_
#include "upper_bound_function_abstract.h"
#include "../svm/svm_c_linear_dcd_trainer.h"
#include "../statistics.h"
#include <limits>
#include <utility>
namespace dlib
{
// ----------------------------------------------------------------------------------------
struct function_evaluation
{
function_evaluation() = default;
function_evaluation(const matrix<double,0,1>& x, double y) :x(x), y(y) {}
matrix<double,0,1> x;
double y = std::numeric_limits<double>::quiet_NaN();
};
// ----------------------------------------------------------------------------------------
class upper_bound_function
{
public:
upper_bound_function(
) = default;
explicit upper_bound_function(
const std::vector<function_evaluation>& _points,
const double relative_noise_magnitude = 0.001,
const double solver_eps = 0.0001
) : points(_points)
{
DLIB_CASSERT(points.size() > 1);
DLIB_CASSERT(points[0].x.size() > 0, "The vectors can't be empty.");
DLIB_CASSERT(relative_noise_magnitude >= 0);
DLIB_CASSERT(solver_eps > 0);
const long dims = points[0].x.size();
for (auto& p : points)
DLIB_CASSERT(p.x.size() == dims, "All the vectors given to upper_bound_function must have the same dimensionality.");
using sample_type = std::vector<std::pair<size_t,double>>;
using kernel_type = sparse_linear_kernel<sample_type>;
std::vector<sample_type> x;
std::vector<double> y;
// normalize the data so the values aren't extreme
std::vector<matrix<double,0,1>> temp;
for (auto& v : points)
temp.push_back(v.x);
xnormalizer.train(temp);
for (auto& v : points)
v.x = xnormalizer(v.x);
x.reserve(points.size()*(points.size()-1)/2);
y.reserve(points.size()*(points.size()-1)/2);
sample_type samp;
for (size_t i = 0; i < points.size(); ++i)
{
for (size_t j = i+1; j < points.size(); ++j)
{
samp.clear();
for (long k = 0; k < dims; ++k)
{
double temp = points[i].x(k) - points[j].x(k);
samp.push_back(std::make_pair(k, temp*temp));
}
if (points[i].y > points[j].y)
samp.push_back(std::make_pair(dims + j, relative_noise_magnitude));
else
samp.push_back(std::make_pair(dims + i, relative_noise_magnitude));
double diff = points[i].y - points[j].y;
samp.push_back(std::make_pair(dims + points.size(), 1-diff*diff));
x.push_back(samp);
y.push_back(1);
}
}
svm_c_linear_dcd_trainer<kernel_type> trainer;
trainer.set_c(std::numeric_limits<double>::infinity());
//trainer.be_verbose();
trainer.force_last_weight_to_1(true);
trainer.set_epsilon(solver_eps);
auto df = trainer.train(x,y);
const auto& bv = df.basis_vectors(0);
slopes.set_size(dims);
for (long i = 0; i < dims; ++i)
slopes(i) = bv[i].second;
//cout << "slopes:" << trans(slopes);
offsets.resize(points.size());
auto s = x.begin();
for (size_t i = 0; i < points.size(); ++i)
{
for (size_t j = i+1; j < points.size(); ++j)
{
double val = df(*s);
// If the constraint wasn't exactly satisfied then we need to adjust
// the offsets so that it is satisfied. So we check for that here
if (points[i].y > points[j].y)
{
if (val + offsets[j] < 1)
offsets[j] = 1-val;
}
else
{
if (val + offsets[i] < 1)
offsets[i] = 1-val;
}
++s;
}
}
for (size_t i = 0; i < points.size(); ++i)
{
offsets[i] += bv[slopes.size()+i].second*relative_noise_magnitude;
}
}
long num_points(
) const
{
return points.size();
}
long dimensionality(
) const
{
if (points.size() == 0)
return 0;
else
return points[0].x.size();
}
double operator() (
matrix<double,0,1> x
) const
{
DLIB_CASSERT(num_points() > 0);
DLIB_CASSERT(x.size() == dimensionality());
x = xnormalizer(x);
double upper_bound = std::numeric_limits<double>::infinity();
for (size_t i = 0; i < points.size(); ++i)
{
const double local_bound = points[i].y + std::sqrt(offsets[i] + dot(slopes, squared(x-points[i].x)));
upper_bound = std::min(upper_bound, local_bound);
}
return upper_bound;
}
private:
vector_normalizer<matrix<double,0,1>> xnormalizer;
std::vector<function_evaluation> points;
std::vector<double> offsets; // offsets.size() == points.size()
matrix<double,0,1> slopes; // slopes.size() == points[0].first.size()
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_UPPER_bOUND_FUNCTION_Hh_
// Copyright (C) 2017 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_UPPER_bOUND_FUNCTION_ABSTRACT_Hh_
#ifdef DLIB_UPPER_bOUND_FUNCTION_ABSTRACT_Hh_
#include "../matrix.h"
#include <limits>
namespace dlib
{
// ----------------------------------------------------------------------------------------
struct function_evaluation
{
/*!
WHAT THIS OBJECT REPRESENTS
This object records the output of a real valued function in response to
some input.
In particular, if you have a function F(x) then the function_evaluation is
simply a struct that records x and the scalar value F(x).
!*/
function_evaluation() = default;
function_evaluation(const matrix<double,0,1>& x, double y) :x(x), y(y) {}
matrix<double,0,1> x;
double y = std::numeric_limits<double>::quiet_NaN();
};
// ----------------------------------------------------------------------------------------
class upper_bound_function
{
/*!
WHAT THIS OBJECT REPRESENTS
This object represents a non-parametric function that can be used to define
an upper bound on some more complex and unknown function. To describe this
precisely, lets assume there is a function F(x) which you are capable of
sampling from but otherwise know nothing about, and that you would like to
find an upper bounding function U(x) such that U(x) >= F(x) for any x. It
would also be good if U(x)-F(x) was minimal. I.e. we would like U(x) to be
a tight upper bound, not something vacuous like U(x) = infinity.
The upper_bound_function class is a tool for creating this kind of upper
bounding function from a set of function_evaluations of F(x). We do this
by considering only U(x) of the form:
U(x) = {
double min_ub = infinity;
for (size_t i = 0; i < POINTS.size(); ++i) {
function_evaluation p = POINTS[i]
double local_bound = p.y + sqrt(noise_terms[i] + sqrt(trans(p.x-x)*M*(p.x-x)))
min_ub = min(min_ub, local_bound)
}
}
Where POINTS is an array of function_evaluation instances drawn from F(x),
M is a diagonal matrix, and noise_terms is an array of scalars.
To create an upper bound U(x), the upper_bound_function takes a POINTS array
containing evaluations of F(x) as input and solves the following quadratic
program to find the parameters of U(x):
min_{M,noise_terms}: sum(squared(M)) + sum(squared(noise_terms/relative_noise_magnitude))
s.t. U(POINTS[i].x) >= POINTS[i].y, for all i
noise_terms[i] >= 0
diag(M) >= 0
Therefore, the quadratic program finds the U(x) that always upper bounds
F(x) on the supplied POINTS, but is otherwise as small as possible.
The inspiration for the upper_bound_function object came from this
excellent paper:
Global optimization of Lipschitz functions
Malherbe, Cédric and Vayatis, Nicolas
International Conference on Machine Learning - 2017
In that paper, they propose to use a simpler U(x) where noise_terms is
always 0 and M is a diagonal matrix where each diagonal element is the same
value. Therefore, there is only a single scalar parameter for U(x) in
their formulation of the problem. This causes difficulties if F(x) is
stochastic or has discontinuities since, without the noise term, M will
become really huge and the upper bound becomes vacuously large. It is also
problematic if the gradient of F(x) with respect to x contains elements of
widely varying magnitude since the simpler formulation of U(x) assumes a
uniform rate of change regardless of which dimension is varying.
!*/
public:
upper_bound_function(
);
/*!
ensures
- #num_points() == 0
- #dimensionality() == 0
!*/
explicit upper_bound_function(
const std::vector<function_evaluation>& points,
const double relative_noise_magnitude = 0.001,
const double solver_eps = 0.0001
);
/*!
requires
- points.size() > 1
- all the x vectors in points must have the same non-zero dimensionality.
- solver_eps > 0
ensures
- Creates an upper bounding function U(x), as described above, assuming that
the given points are drawn from F(x).
- Uses the provided relative_noise_magnitude when solving the QP, as
described above. Note that relative_noise_magnitude can be set to 0. If
you do this then all the noise terms are constrained to 0. You should
only do this if you know F(x) is non-stochastic and continuous
everywhere.
- When solving the QP used to find the parameters of U(x), the upper
bounding function, we solve the QP to solver_eps accuracy.
- #num_points() == points.size()
- #dimensionality() == points[0].x.size()
!*/
long num_points(
) const;
/*!
ensures
- returns the number of points used to define the upper bounding function.
!*/
long dimensionality(
) const;
/*!
ensures
- returns the dimensionality of the input vectors to the upper bounding function.
!*/
double operator() (
matrix<double,0,1> x
) const;
/*!
requires
- num_points() > 0
- x.size() == dimensionality()
ensures
- return U(x)
(i.e. returns the upper bound on F(x) at x given by our upper bounding function)
!*/
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_UPPER_bOUND_FUNCTION_ABSTRACT_Hh_
...@@ -103,6 +103,7 @@ set (tests ...@@ -103,6 +103,7 @@ set (tests
one_vs_one_trainer.cpp one_vs_one_trainer.cpp
optimization.cpp optimization.cpp
optimization_test_functions.cpp optimization_test_functions.cpp
global_optimization.cpp
opt_qp_solver.cpp opt_qp_solver.cpp
parallel_for.cpp parallel_for.cpp
parse.cpp parse.cpp
......
// Copyright (C) 2017 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#include <dlib/global_optimization.h>
#include <dlib/statistics.h>
#include <sstream>
#include <string>
#include <cstdlib>
#include <ctime>
#include <vector>
#include <dlib/rand.h>
#include "tester.h"
namespace
{
using namespace test;
using namespace dlib;
using namespace std;
logger dlog("test.global_optimization");
// ----------------------------------------------------------------------------------------
void test_upper_bound_function(double relative_noise_magnitude, double solver_eps)
{
print_spinner();
dlog << LINFO << "test_upper_bound_function, relative_noise_magnitude="<< relative_noise_magnitude << ", solver_eps=" << solver_eps;
auto rosen = [](const matrix<double,0,1>& x) { return -1*( 100*std::pow(x(1) - x(0)*x(0),2.0) + std::pow(1 - x(0),2)); };
dlib::rand rnd;
auto make_rnd = [&rnd]() { matrix<double,0,1> x(2); x = 2*rnd.get_random_double(), 2*rnd.get_random_double(); return x; };
std::vector<function_evaluation> evals;
for (int i = 0; i < 200; ++i)
{
auto x = make_rnd();
evals.emplace_back(x,rosen(x));
}
upper_bound_function ub(evals, relative_noise_magnitude, solver_eps);
DLIB_TEST(ub.num_points() == (long)evals.size());
DLIB_TEST(ub.dimensionality() == 2);
for (auto& ev : evals)
{
dlog << LINFO << ub(ev.x) - ev.y;
DLIB_TEST_MSG(ub(ev.x) - ev.y > -1e10, ub(ev.x) - ev.y);
}
if (solver_eps < 0.001)
{
dlog << LINFO << "out of sample points: ";
for (int i = 0; i < 10; ++i)
{
auto x = make_rnd();
dlog << LINFO << ub(x) - rosen(x);
DLIB_TEST_MSG(ub(x) - rosen(x) > 1e-10, ub(x) - rosen(x));
}
}
}
// ----------------------------------------------------------------------------------------
class global_optimization_tester : public tester
{
public:
global_optimization_tester (
) :
tester ("test_global_optimization",
"Runs tests on the global optimization components.")
{}
void perform_test (
)
{
test_upper_bound_function(0.1, 1e-6);
test_upper_bound_function(0.0, 1e-6);
test_upper_bound_function(0.0, 1e-1);
}
} a;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment