Commit 5529ddfb authored by Davis King's avatar Davis King

Added a .add() to upper_bound_function so that the upper bound can be quickly

updated without needing to resolve the whole QP.
parent 7e39a527
...@@ -33,21 +33,114 @@ namespace dlib ...@@ -33,21 +33,114 @@ namespace dlib
upper_bound_function( upper_bound_function(
) = default; ) = default;
upper_bound_function(
const double relative_noise_magnitude,
const double solver_eps
) : relative_noise_magnitude(relative_noise_magnitude), solver_eps(solver_eps)
{
DLIB_CASSERT(relative_noise_magnitude >= 0);
DLIB_CASSERT(solver_eps > 0);
}
explicit upper_bound_function( explicit upper_bound_function(
const std::vector<function_evaluation>& _points, const std::vector<function_evaluation>& _points,
const double relative_noise_magnitude = 0.001, const double relative_noise_magnitude = 0.001,
const double solver_eps = 0.0001 const double solver_eps = 0.0001
) : points(_points) ) : relative_noise_magnitude(relative_noise_magnitude), solver_eps(solver_eps), points(_points)
{ {
DLIB_CASSERT(points.size() > 1);
DLIB_CASSERT(points[0].x.size() > 0, "The vectors can't be empty.");
DLIB_CASSERT(relative_noise_magnitude >= 0); DLIB_CASSERT(relative_noise_magnitude >= 0);
DLIB_CASSERT(solver_eps > 0); DLIB_CASSERT(solver_eps > 0);
if (points.size() > 1)
{
DLIB_CASSERT(points[0].x.size() > 0, "The vectors can't be empty.");
const long dims = points[0].x.size(); const long dims = points[0].x.size();
for (auto& p : points) for (auto& p : points)
DLIB_CASSERT(p.x.size() == dims, "All the vectors given to upper_bound_function must have the same dimensionality."); DLIB_CASSERT(p.x.size() == dims, "All the vectors given to upper_bound_function must have the same dimensionality.");
learn_params();
}
}
void add (
const function_evaluation& point
)
{
DLIB_CASSERT(point.x.size() != 0, "The vectors can't be empty.");
if (points.size() == 0)
{
points.push_back(point);
return;
}
DLIB_CASSERT(point.x.size() == dimensionality(), "All the vectors given to upper_bound_function must have the same dimensionality.");
if (points.size() < 4)
{
points.push_back(point);
*this = upper_bound_function(points, relative_noise_magnitude, solver_eps);
return;
}
points.push_back(point);
// add constraints between the new point and the old points
for (size_t i = 0; i < points.size()-1; ++i)
active_constraints.push_back(std::make_pair(i,points.size()-1));
learn_params();
}
long num_points(
) const
{
return points.size();
}
long dimensionality(
) const
{
if (points.size() == 0)
return 0;
else
return points[0].x.size();
}
const std::vector<function_evaluation>& get_points(
) const
{
return points;
}
double operator() (
const matrix<double,0,1>& x
) const
{
DLIB_CASSERT(num_points() > 0);
DLIB_CASSERT(x.size() == dimensionality());
double upper_bound = std::numeric_limits<double>::infinity();
for (size_t i = 0; i < points.size(); ++i)
{
const double local_bound = points[i].y + std::sqrt(offsets[i] + dot(slopes, squared(x-points[i].x)));
upper_bound = std::min(upper_bound, local_bound);
}
return upper_bound;
}
private:
void learn_params (
)
{
const long dims = points[0].x.size();
using sample_type = std::vector<std::pair<size_t,double>>;
using sample_type = std::vector<std::pair<size_t,double>>; using sample_type = std::vector<std::pair<size_t,double>>;
using kernel_type = sparse_linear_kernel<sample_type>; using kernel_type = sparse_linear_kernel<sample_type>;
...@@ -65,8 +158,6 @@ namespace dlib ...@@ -65,8 +158,6 @@ namespace dlib
y_rs.add(v.y); y_rs.add(v.y);
} }
x.reserve(points.size()*(points.size()-1)/2);
y.reserve(points.size()*(points.size()-1)/2);
// compute normalization vectors for the data. The only reason we do this is // compute normalization vectors for the data. The only reason we do this is
// to make the optimization well conditioned. In particular, scaling the y // to make the optimization well conditioned. In particular, scaling the y
...@@ -80,12 +171,8 @@ namespace dlib ...@@ -80,12 +171,8 @@ namespace dlib
for (size_t i = 0; i < xscale.size(); ++i) for (size_t i = 0; i < xscale.size(); ++i)
xscale[i] = 1.0/(x_rs[i].stddev()*yscale); // make it so that xscale[i]*yscale == 1/x_rs[i].stddev() xscale[i] = 1.0/(x_rs[i].stddev()*yscale); // make it so that xscale[i]*yscale == 1/x_rs[i].stddev()
sample_type samp; sample_type samp;
for (size_t i = 0; i < points.size(); ++i) auto add_constraint = [&](long i, long j) {
{
for (size_t j = i+1; j < points.size(); ++j)
{
samp.clear(); samp.clear();
for (long k = 0; k < dims; ++k) for (long k = 0; k < dims; ++k)
{ {
...@@ -103,8 +190,28 @@ namespace dlib ...@@ -103,8 +190,28 @@ namespace dlib
x.push_back(samp); x.push_back(samp);
y.push_back(1); y.push_back(1);
};
if (active_constraints.size() == 0)
{
x.reserve(points.size()*(points.size()-1)/2);
y.reserve(points.size()*(points.size()-1)/2);
for (size_t i = 0; i < points.size(); ++i)
{
for (size_t j = i+1; j < points.size(); ++j)
{
add_constraint(i,j);
} }
} }
}
else
{
for (auto& p : active_constraints)
add_constraint(p.first, p.second);
}
svm_c_linear_dcd_trainer<kernel_type> trainer; svm_c_linear_dcd_trainer<kernel_type> trainer;
trainer.set_c(std::numeric_limits<double>::infinity()); trainer.set_c(std::numeric_limits<double>::infinity());
...@@ -112,83 +219,60 @@ namespace dlib ...@@ -112,83 +219,60 @@ namespace dlib
trainer.force_last_weight_to_1(true); trainer.force_last_weight_to_1(true);
trainer.set_epsilon(solver_eps); trainer.set_epsilon(solver_eps);
auto df = trainer.train(x,y); svm_c_linear_dcd_trainer<kernel_type>::optimizer_state state;
auto df = trainer.train(x,y, state);
const auto& bv = df.basis_vectors(0);
slopes.set_size(dims);
for (long i = 0; i < dims; ++i)
slopes(i) = bv[i].second*xscale[i]*xscale[i];
//cout << "slopes:" << trans(slopes);
offsets.resize(points.size());
auto s = x.begin(); // save the active constraints for later so we can use them inside add() to add
// new points efficiently.
if (active_constraints.size() == 0)
{
long k = 0;
for (size_t i = 0; i < points.size(); ++i) for (size_t i = 0; i < points.size(); ++i)
{ {
for (size_t j = i+1; j < points.size(); ++j) for (size_t j = i+1; j < points.size(); ++j)
{ {
double val = df(*s); if (state.get_alpha()[k++] != 0)
// If the constraint wasn't exactly satisfied then we need to adjust active_constraints.push_back(std::make_pair(i,j));
// the offsets so that it is satisfied. So we check for that here
if (points[i].y > points[j].y)
{
if (val + offsets[j] < 1)
offsets[j] = 1-val;
}
else
{
if (val + offsets[i] < 1)
offsets[i] = 1-val;
} }
++s;
} }
} }
else
for (size_t i = 0; i < points.size(); ++i)
{ {
offsets[i] += bv[slopes.size()+i].second*relative_noise_magnitude; DLIB_CASSERT(state.get_alpha().size() == active_constraints.size());
new_active_constraints.clear();
for (size_t i = 0; i < state.get_alpha().size(); ++i)
{
if (state.get_alpha()[i] != 0)
new_active_constraints.push_back(active_constraints[i]);
} }
active_constraints.swap(new_active_constraints);
} }
long num_points( //std::cout << "points.size(): " << points.size() << std::endl;
) const //std::cout << "active_constraints.size(): " << active_constraints.size() << std::endl;
{
return points.size();
}
long dimensionality(
) const
{
if (points.size() == 0)
return 0;
else
return points[0].x.size();
}
double operator() ( const auto& bv = df.basis_vectors(0);
matrix<double,0,1> x slopes.set_size(dims);
) const for (long i = 0; i < dims; ++i)
{ slopes(i) = bv[i].second*xscale[i]*xscale[i];
DLIB_CASSERT(num_points() > 0);
DLIB_CASSERT(x.size() == dimensionality());
//std::cout << "slopes:" << trans(slopes);
offsets.assign(points.size(),0);
double upper_bound = std::numeric_limits<double>::infinity();
for (size_t i = 0; i < points.size(); ++i) for (size_t i = 0; i < points.size(); ++i)
{ {
const double local_bound = points[i].y + std::sqrt(offsets[i] + dot(slopes, squared(x-points[i].x))); offsets[i] += bv[slopes.size()+i].second*relative_noise_magnitude;
upper_bound = std::min(upper_bound, local_bound);
} }
return upper_bound;
} }
private:
double relative_noise_magnitude = 0.001;
double solver_eps = 0.0001;
std::vector<std::pair<size_t,size_t>> active_constraints, new_active_constraints;
std::vector<function_evaluation> points; std::vector<function_evaluation> points;
std::vector<double> offsets; // offsets.size() == points.size() std::vector<double> offsets; // offsets.size() == points.size()
......
...@@ -104,8 +104,8 @@ namespace dlib ...@@ -104,8 +104,8 @@ namespace dlib
); );
/*! /*!
requires requires
- points.size() > 1
- all the x vectors in points must have the same non-zero dimensionality. - all the x vectors in points must have the same non-zero dimensionality.
- relative_noise_magnitude >= 0
- solver_eps > 0 - solver_eps > 0
ensures ensures
- Creates an upper bounding function U(x), as described above, assuming that - Creates an upper bounding function U(x), as described above, assuming that
...@@ -116,16 +116,67 @@ namespace dlib ...@@ -116,16 +116,67 @@ namespace dlib
only do this if you know F(x) is non-stochastic and continuous only do this if you know F(x) is non-stochastic and continuous
everywhere. everywhere.
- When solving the QP used to find the parameters of U(x), the upper - When solving the QP used to find the parameters of U(x), the upper
bounding function, we solve the QP to solver_eps accuracy. bounding function, we solve the QP to solver_eps accuracy. It's
possible that large enough solver_eps can lead to upper bounds that don't
upper bound all the supplied points. But for reasonable epsilon values
this shouldn't be a problem.
- #num_points() == points.size() - #num_points() == points.size()
- #dimensionality() == points[0].x.size() - #dimensionality() == points[0].x.size()
!*/ !*/
upper_bound_function(
const double relative_noise_magnitude,
const double solver_eps
);
/*!
requires
- relative_noise_magnitude >= 0
- solver_eps > 0
ensures
- #num_points() == 0
- #dimensionality() == 0
- This destructor is the same as calling the above constructor with points.size()==0
!*/
void add (
const function_evaluation& point
);
/*!
requires
- num_points() == 0 || point.x.size() == dimensionality()
- point.x.size() != 0
ensures
- Adds point to get_points().
- Incrementally updates the upper bounding function with the given function
evaluation. That is, we assume that F(point.x)==point.y and solve the QP
described above to find the new U(x) that upper bounds all the points
this object knows about (i.e. all the points in get_points() and the new point).
- Calling add() is much faster than recreating the upper_bound_function
from scratch with all the points. This is because we warm start with the
previous solution to the QP. This is done by discarding any non-active
constraints and solving the QP again with only the previously active
constraints and the new constraints formed by all the pairs of the new
point and the old points. This means the QP solved by add() is much
smaller than the QP that would be solved by a fresh call to the
upper_bound_function constructor.
!*/
const std::vector<function_evaluation>& get_points(
) const;
/*!
ensures
- returns the points from F(x) used to define this upper bounding function.
These are all the function_evaluation objects given to this object via
its constructor and add().
!*/
long num_points( long num_points(
) const; ) const;
/*! /*!
ensures ensures
- returns the number of points used to define the upper bounding function. - returns the number of points used to define the upper bounding function.
(i.e. returns get_points().size())
!*/ !*/
long dimensionality( long dimensionality(
...@@ -136,7 +187,7 @@ namespace dlib ...@@ -136,7 +187,7 @@ namespace dlib
!*/ !*/
double operator() ( double operator() (
matrix<double,0,1> x const matrix<double,0,1>& x
) const; ) const;
/*! /*!
requires requires
......
...@@ -38,13 +38,29 @@ namespace ...@@ -38,13 +38,29 @@ namespace
std::vector<function_evaluation> evals; std::vector<function_evaluation> evals;
for (int i = 0; i < 200; ++i) for (int i = 0; i < 100; ++i)
{ {
auto x = make_rnd(); auto x = make_rnd();
evals.emplace_back(x,rosen(x)); evals.emplace_back(x,rosen(x));
} }
upper_bound_function ub(evals, relative_noise_magnitude, solver_eps); upper_bound_function ub(evals, relative_noise_magnitude, solver_eps);
DLIB_TEST(ub.num_points() == (long)evals.size());
DLIB_TEST(ub.dimensionality() == 2);
for (auto& ev : evals)
{
dlog << LINFO << ub(ev.x) - ev.y;
DLIB_TEST_MSG(ub(ev.x) - ev.y > -1e10, ub(ev.x) - ev.y);
}
for (int i = 0; i < 100; ++i)
{
auto x = make_rnd();
evals.emplace_back(x,rosen(x));
ub.add(evals.back());
}
DLIB_TEST(ub.num_points() == (long)evals.size()); DLIB_TEST(ub.num_points() == (long)evals.size());
DLIB_TEST(ub.dimensionality() == 2); DLIB_TEST(ub.dimensionality() == 2);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment