Commit 5529ddfb authored by Davis King's avatar Davis King

Added a .add() to upper_bound_function so that the upper bound can be quickly

updated without needing to resolve the whole QP.
parent 7e39a527
......@@ -33,21 +33,114 @@ namespace dlib
upper_bound_function(
) = default;
upper_bound_function(
const double relative_noise_magnitude,
const double solver_eps
) : relative_noise_magnitude(relative_noise_magnitude), solver_eps(solver_eps)
{
DLIB_CASSERT(relative_noise_magnitude >= 0);
DLIB_CASSERT(solver_eps > 0);
}
explicit upper_bound_function(
const std::vector<function_evaluation>& _points,
const double relative_noise_magnitude = 0.001,
const double solver_eps = 0.0001
) : points(_points)
) : relative_noise_magnitude(relative_noise_magnitude), solver_eps(solver_eps), points(_points)
{
DLIB_CASSERT(points.size() > 1);
DLIB_CASSERT(points[0].x.size() > 0, "The vectors can't be empty.");
DLIB_CASSERT(relative_noise_magnitude >= 0);
DLIB_CASSERT(solver_eps > 0);
if (points.size() > 1)
{
DLIB_CASSERT(points[0].x.size() > 0, "The vectors can't be empty.");
const long dims = points[0].x.size();
for (auto& p : points)
DLIB_CASSERT(p.x.size() == dims, "All the vectors given to upper_bound_function must have the same dimensionality.");
learn_params();
}
}
void add (
const function_evaluation& point
)
{
DLIB_CASSERT(point.x.size() != 0, "The vectors can't be empty.");
if (points.size() == 0)
{
points.push_back(point);
return;
}
DLIB_CASSERT(point.x.size() == dimensionality(), "All the vectors given to upper_bound_function must have the same dimensionality.");
if (points.size() < 4)
{
points.push_back(point);
*this = upper_bound_function(points, relative_noise_magnitude, solver_eps);
return;
}
points.push_back(point);
// add constraints between the new point and the old points
for (size_t i = 0; i < points.size()-1; ++i)
active_constraints.push_back(std::make_pair(i,points.size()-1));
learn_params();
}
long num_points(
) const
{
return points.size();
}
long dimensionality(
) const
{
if (points.size() == 0)
return 0;
else
return points[0].x.size();
}
const std::vector<function_evaluation>& get_points(
) const
{
return points;
}
double operator() (
const matrix<double,0,1>& x
) const
{
DLIB_CASSERT(num_points() > 0);
DLIB_CASSERT(x.size() == dimensionality());
double upper_bound = std::numeric_limits<double>::infinity();
for (size_t i = 0; i < points.size(); ++i)
{
const double local_bound = points[i].y + std::sqrt(offsets[i] + dot(slopes, squared(x-points[i].x)));
upper_bound = std::min(upper_bound, local_bound);
}
return upper_bound;
}
private:
void learn_params (
)
{
const long dims = points[0].x.size();
using sample_type = std::vector<std::pair<size_t,double>>;
using sample_type = std::vector<std::pair<size_t,double>>;
using kernel_type = sparse_linear_kernel<sample_type>;
......@@ -65,8 +158,6 @@ namespace dlib
y_rs.add(v.y);
}
x.reserve(points.size()*(points.size()-1)/2);
y.reserve(points.size()*(points.size()-1)/2);
// compute normalization vectors for the data. The only reason we do this is
// to make the optimization well conditioned. In particular, scaling the y
......@@ -80,12 +171,8 @@ namespace dlib
for (size_t i = 0; i < xscale.size(); ++i)
xscale[i] = 1.0/(x_rs[i].stddev()*yscale); // make it so that xscale[i]*yscale == 1/x_rs[i].stddev()
sample_type samp;
for (size_t i = 0; i < points.size(); ++i)
{
for (size_t j = i+1; j < points.size(); ++j)
{
auto add_constraint = [&](long i, long j) {
samp.clear();
for (long k = 0; k < dims; ++k)
{
......@@ -103,8 +190,28 @@ namespace dlib
x.push_back(samp);
y.push_back(1);
};
if (active_constraints.size() == 0)
{
x.reserve(points.size()*(points.size()-1)/2);
y.reserve(points.size()*(points.size()-1)/2);
for (size_t i = 0; i < points.size(); ++i)
{
for (size_t j = i+1; j < points.size(); ++j)
{
add_constraint(i,j);
}
}
}
else
{
for (auto& p : active_constraints)
add_constraint(p.first, p.second);
}
svm_c_linear_dcd_trainer<kernel_type> trainer;
trainer.set_c(std::numeric_limits<double>::infinity());
......@@ -112,83 +219,60 @@ namespace dlib
trainer.force_last_weight_to_1(true);
trainer.set_epsilon(solver_eps);
auto df = trainer.train(x,y);
const auto& bv = df.basis_vectors(0);
slopes.set_size(dims);
for (long i = 0; i < dims; ++i)
slopes(i) = bv[i].second*xscale[i]*xscale[i];
//cout << "slopes:" << trans(slopes);
offsets.resize(points.size());
svm_c_linear_dcd_trainer<kernel_type>::optimizer_state state;
auto df = trainer.train(x,y, state);
auto s = x.begin();
// save the active constraints for later so we can use them inside add() to add
// new points efficiently.
if (active_constraints.size() == 0)
{
long k = 0;
for (size_t i = 0; i < points.size(); ++i)
{
for (size_t j = i+1; j < points.size(); ++j)
{
double val = df(*s);
// If the constraint wasn't exactly satisfied then we need to adjust
// the offsets so that it is satisfied. So we check for that here
if (points[i].y > points[j].y)
{
if (val + offsets[j] < 1)
offsets[j] = 1-val;
}
else
{
if (val + offsets[i] < 1)
offsets[i] = 1-val;
if (state.get_alpha()[k++] != 0)
active_constraints.push_back(std::make_pair(i,j));
}
++s;
}
}
for (size_t i = 0; i < points.size(); ++i)
else
{
offsets[i] += bv[slopes.size()+i].second*relative_noise_magnitude;
DLIB_CASSERT(state.get_alpha().size() == active_constraints.size());
new_active_constraints.clear();
for (size_t i = 0; i < state.get_alpha().size(); ++i)
{
if (state.get_alpha()[i] != 0)
new_active_constraints.push_back(active_constraints[i]);
}
active_constraints.swap(new_active_constraints);
}
long num_points(
) const
{
return points.size();
}
//std::cout << "points.size(): " << points.size() << std::endl;
//std::cout << "active_constraints.size(): " << active_constraints.size() << std::endl;
long dimensionality(
) const
{
if (points.size() == 0)
return 0;
else
return points[0].x.size();
}
double operator() (
matrix<double,0,1> x
) const
{
DLIB_CASSERT(num_points() > 0);
DLIB_CASSERT(x.size() == dimensionality());
const auto& bv = df.basis_vectors(0);
slopes.set_size(dims);
for (long i = 0; i < dims; ++i)
slopes(i) = bv[i].second*xscale[i]*xscale[i];
//std::cout << "slopes:" << trans(slopes);
offsets.assign(points.size(),0);
double upper_bound = std::numeric_limits<double>::infinity();
for (size_t i = 0; i < points.size(); ++i)
{
const double local_bound = points[i].y + std::sqrt(offsets[i] + dot(slopes, squared(x-points[i].x)));
upper_bound = std::min(upper_bound, local_bound);
offsets[i] += bv[slopes.size()+i].second*relative_noise_magnitude;
}
return upper_bound;
}
private:
double relative_noise_magnitude = 0.001;
double solver_eps = 0.0001;
std::vector<std::pair<size_t,size_t>> active_constraints, new_active_constraints;
std::vector<function_evaluation> points;
std::vector<double> offsets; // offsets.size() == points.size()
......
......@@ -104,8 +104,8 @@ namespace dlib
);
/*!
requires
- points.size() > 1
- all the x vectors in points must have the same non-zero dimensionality.
- relative_noise_magnitude >= 0
- solver_eps > 0
ensures
- Creates an upper bounding function U(x), as described above, assuming that
......@@ -116,16 +116,67 @@ namespace dlib
only do this if you know F(x) is non-stochastic and continuous
everywhere.
- When solving the QP used to find the parameters of U(x), the upper
bounding function, we solve the QP to solver_eps accuracy.
bounding function, we solve the QP to solver_eps accuracy. It's
possible that large enough solver_eps can lead to upper bounds that don't
upper bound all the supplied points. But for reasonable epsilon values
this shouldn't be a problem.
- #num_points() == points.size()
- #dimensionality() == points[0].x.size()
!*/
upper_bound_function(
const double relative_noise_magnitude,
const double solver_eps
);
/*!
requires
- relative_noise_magnitude >= 0
- solver_eps > 0
ensures
- #num_points() == 0
- #dimensionality() == 0
- This destructor is the same as calling the above constructor with points.size()==0
!*/
void add (
const function_evaluation& point
);
/*!
requires
- num_points() == 0 || point.x.size() == dimensionality()
- point.x.size() != 0
ensures
- Adds point to get_points().
- Incrementally updates the upper bounding function with the given function
evaluation. That is, we assume that F(point.x)==point.y and solve the QP
described above to find the new U(x) that upper bounds all the points
this object knows about (i.e. all the points in get_points() and the new point).
- Calling add() is much faster than recreating the upper_bound_function
from scratch with all the points. This is because we warm start with the
previous solution to the QP. This is done by discarding any non-active
constraints and solving the QP again with only the previously active
constraints and the new constraints formed by all the pairs of the new
point and the old points. This means the QP solved by add() is much
smaller than the QP that would be solved by a fresh call to the
upper_bound_function constructor.
!*/
const std::vector<function_evaluation>& get_points(
) const;
/*!
ensures
- returns the points from F(x) used to define this upper bounding function.
These are all the function_evaluation objects given to this object via
its constructor and add().
!*/
long num_points(
) const;
/*!
ensures
- returns the number of points used to define the upper bounding function.
(i.e. returns get_points().size())
!*/
long dimensionality(
......@@ -136,7 +187,7 @@ namespace dlib
!*/
double operator() (
matrix<double,0,1> x
const matrix<double,0,1>& x
) const;
/*!
requires
......
......@@ -38,13 +38,29 @@ namespace
std::vector<function_evaluation> evals;
for (int i = 0; i < 200; ++i)
for (int i = 0; i < 100; ++i)
{
auto x = make_rnd();
evals.emplace_back(x,rosen(x));
}
upper_bound_function ub(evals, relative_noise_magnitude, solver_eps);
DLIB_TEST(ub.num_points() == (long)evals.size());
DLIB_TEST(ub.dimensionality() == 2);
for (auto& ev : evals)
{
dlog << LINFO << ub(ev.x) - ev.y;
DLIB_TEST_MSG(ub(ev.x) - ev.y > -1e10, ub(ev.x) - ev.y);
}
for (int i = 0; i < 100; ++i)
{
auto x = make_rnd();
evals.emplace_back(x,rosen(x));
ub.add(evals.back());
}
DLIB_TEST(ub.num_points() == (long)evals.size());
DLIB_TEST(ub.dimensionality() == 2);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment