Commit eff11e27 authored by Davis King's avatar Davis King

Renamed the support_vectors field of the decision_function and distance_function objects

to basis_vectors.  A long time ago the name support_vectors made sense but now that these
objects are used by a lot of algorithms that don't technically produce support vectors
this name is confusing.  The name basis_vectors more accurately reflects how these objects
get used.

--HG--
extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403278
parent 4ddd0cae
......@@ -34,7 +34,7 @@ namespace dlib
scalar_vector_type alpha;
scalar_type b;
K kernel_function;
sample_vector_type support_vectors;
sample_vector_type basis_vectors;
decision_function (
) : b(0), kernel_function(K()) {}
......@@ -45,19 +45,19 @@ namespace dlib
alpha(d.alpha),
b(d.b),
kernel_function(d.kernel_function),
support_vectors(d.support_vectors)
basis_vectors(d.basis_vectors)
{}
decision_function (
const scalar_vector_type& alpha_,
const scalar_type& b_,
const K& kernel_function_,
const sample_vector_type& support_vectors_
const sample_vector_type& basis_vectors_
) :
alpha(alpha_),
b(b_),
kernel_function(kernel_function_),
support_vectors(support_vectors_)
basis_vectors(basis_vectors_)
{}
decision_function& operator= (
......@@ -69,7 +69,7 @@ namespace dlib
alpha = d.alpha;
b = d.b;
kernel_function = d.kernel_function;
support_vectors = d.support_vectors;
basis_vectors = d.basis_vectors;
}
return *this;
}
......@@ -80,7 +80,7 @@ namespace dlib
{
scalar_type temp = 0;
for (long i = 0; i < alpha.nr(); ++i)
temp += alpha(i) * kernel_function(x,support_vectors(i));
temp += alpha(i) * kernel_function(x,basis_vectors(i));
return temp - b;
}
......@@ -99,7 +99,7 @@ namespace dlib
serialize(item.alpha, out);
serialize(item.b, out);
serialize(item.kernel_function, out);
serialize(item.support_vectors, out);
serialize(item.basis_vectors, out);
}
catch (serialization_error e)
{
......@@ -120,7 +120,7 @@ namespace dlib
deserialize(item.alpha, in);
deserialize(item.b, in);
deserialize(item.kernel_function, in);
deserialize(item.support_vectors, in);
deserialize(item.basis_vectors, in);
}
catch (serialization_error e)
{
......@@ -246,7 +246,7 @@ namespace dlib
scalar_vector_type alpha;
scalar_type b;
K kernel_function;
sample_vector_type support_vectors;
sample_vector_type basis_vectors;
distance_function (
) : b(0), kernel_function(K()) {}
......@@ -257,19 +257,19 @@ namespace dlib
alpha(d.alpha),
b(d.b),
kernel_function(d.kernel_function),
support_vectors(d.support_vectors)
basis_vectors(d.basis_vectors)
{}
distance_function (
const scalar_vector_type& alpha_,
const scalar_type& b_,
const K& kernel_function_,
const sample_vector_type& support_vectors_
const sample_vector_type& basis_vectors_
) :
alpha(alpha_),
b(b_),
kernel_function(kernel_function_),
support_vectors(support_vectors_)
basis_vectors(basis_vectors_)
{}
distance_function& operator= (
......@@ -281,7 +281,7 @@ namespace dlib
alpha = d.alpha;
b = d.b;
kernel_function = d.kernel_function;
support_vectors = d.support_vectors;
basis_vectors = d.basis_vectors;
}
return *this;
}
......@@ -292,7 +292,7 @@ namespace dlib
{
scalar_type temp = 0;
for (long i = 0; i < alpha.nr(); ++i)
temp += alpha(i) * kernel_function(x,support_vectors(i));
temp += alpha(i) * kernel_function(x,basis_vectors(i));
temp = b + kernel_function(x,x) - 2*temp;
if (temp > 0)
......@@ -308,7 +308,7 @@ namespace dlib
scalar_type temp = 0;
for (long i = 0; i < alpha.nr(); ++i)
for (long j = 0; j < x.alpha.nr(); ++j)
temp += alpha(i)*x.alpha(j) * kernel_function(support_vectors(i), x.support_vectors(j));
temp += alpha(i)*x.alpha(j) * kernel_function(basis_vectors(i), x.basis_vectors(j));
temp = b + x.b - 2*temp;
if (temp > 0)
......@@ -331,7 +331,7 @@ namespace dlib
serialize(item.alpha, out);
serialize(item.b, out);
serialize(item.kernel_function, out);
serialize(item.support_vectors, out);
serialize(item.basis_vectors, out);
}
catch (serialization_error e)
{
......@@ -352,7 +352,7 @@ namespace dlib
deserialize(item.alpha, in);
deserialize(item.b, in);
deserialize(item.kernel_function, in);
deserialize(item.support_vectors, in);
deserialize(item.basis_vectors, in);
}
catch (serialization_error e)
{
......
......@@ -42,7 +42,7 @@ namespace dlib
scalar_vector_type alpha;
scalar_type b;
K kernel_function;
sample_vector_type support_vectors;
sample_vector_type basis_vectors;
decision_function (
);
......@@ -50,7 +50,7 @@ namespace dlib
ensures
- #b == 0
- #alpha.nr() == 0
- #support_vectors.nr() == 0
- #basis_vectors.nr() == 0
!*/
decision_function (
......@@ -65,11 +65,11 @@ namespace dlib
const scalar_vector_type& alpha_,
const scalar_type& b_,
const K& kernel_function_,
const sample_vector_type& support_vectors_
) : alpha(alpha_), b(b_), kernel_function(kernel_function_), support_vectors(support_vectors_) {}
const sample_vector_type& basis_vectors_
) : alpha(alpha_), b(b_), kernel_function(kernel_function_), basis_vectors(basis_vectors_) {}
/*!
ensures
- populates the decision function with the given support vectors, weights(i.e. alphas),
- populates the decision function with the given basis vectors, weights(i.e. alphas),
b term, and kernel function.
!*/
......@@ -93,7 +93,7 @@ namespace dlib
{
scalar_type temp = 0;
for (long i = 0; i < alpha.nr(); ++i)
temp += alpha(i) * kernel_function(x,support_vectors(i));
temp += alpha(i) * kernel_function(x,basis_vectors(i));
return temp - b;
}
......@@ -255,7 +255,7 @@ namespace dlib
scalar_vector_type alpha;
scalar_type b;
K kernel_function;
sample_vector_type support_vectors;
sample_vector_type basis_vectors;
distance_function (
);
......@@ -263,7 +263,7 @@ namespace dlib
ensures
- #b == 0
- #alpha.nr() == 0
- #support_vectors.nr() == 0
- #basis_vectors.nr() == 0
!*/
distance_function (
......@@ -278,11 +278,11 @@ namespace dlib
const scalar_vector_type& alpha_,
const scalar_type& b_,
const K& kernel_function_,
const sample_vector_type& support_vectors_
) : alpha(alpha_), b(b_), kernel_function(kernel_function_), support_vectors(support_vectors_) {}
const sample_vector_type& basis_vectors_
) : alpha(alpha_), b(b_), kernel_function(kernel_function_), basis_vectors(basis_vectors_) {}
/*!
ensures
- populates the decision function with the given support vectors, weights(i.e. alphas),
- populates the distance function with the given basis vectors, weights(i.e. alphas),
b term, and kernel function.
!*/
......@@ -301,15 +301,15 @@ namespace dlib
/*!
ensures
- Let O(x) represent the point x projected into kernel induced feature space.
- let c == sum alpha(i)*O(support_vectors(i)) == the point in kernel space that
- let c == sum alpha(i)*O(basis_vectors(i)) == the point in kernel space that
this object represents.
- Then this object returns the distance between the points O(x) and c in kernel
- Then this object returns the distance between the point O(x) and c in kernel
space.
!*/
{
scalar_type temp = 0;
for (long i = 0; i < alpha.nr(); ++i)
temp += alpha(i) * kernel_function(x,support_vectors(i));
temp += alpha(i) * kernel_function(x,basis_vectors(i));
temp = b + kernel_function(x,x) - 2*temp;
if (temp > 0)
......@@ -329,7 +329,7 @@ namespace dlib
scalar_type temp = 0;
for (long i = 0; i < alpha.nr(); ++i)
for (long j = 0; j < x.alpha.nr(); ++j)
temp += alpha(i)*x.alpha(j) * kernel_function(support_vectors(i), x.support_vectors(j));
temp += alpha(i)*x.alpha(j) * kernel_function(basis_vectors(i), x.basis_vectors(j));
temp = b + x.b - 2*temp;
if (temp > 0)
......
......@@ -93,9 +93,9 @@ namespace dlib
) const;
/*!
ensures
- returns the maximum number of dictionary vectors (i.e. support
vectors) this object will use at a time. That is, dictionary_size()
will never be greater than max_dictionary_size().
- returns the maximum number of dictionary vectors this object will
use at a time. That is, dictionary_size() will never be greater
than max_dictionary_size().
!*/
bool remove_oldest_first (
......@@ -122,7 +122,9 @@ namespace dlib
) const;
/*!
ensures
- returns the number of "support vectors" in the dictionary.
- returns the number of basis vectors in the dictionary. These are
the basis vectors used by this object to represent a point in kernel
feature space.
!*/
scalar_type samples_trained (
......@@ -140,11 +142,11 @@ namespace dlib
test used for sparsification (see the KRLS paper for details). This is
a number which governs how accurately this object will approximate the
centroid it is learning. Smaller values generally result in a more
accurate estimate while also resulting in a bigger set of support
vectors in the learned dictionary. Bigger tolerances values result in
a less accurate estimate but also in less support vectors. (Note
that in any case, the max_dictionary_size() limits the number
of support vectors no matter the setting of the tolerance)
accurate estimate while also resulting in a bigger set of vectors in
the dictionary. Bigger tolerances values result in a less accurate
estimate but also in less dictionary vectors. (Note that in any case,
the max_dictionary_size() limits the number of dictionary vectors no
matter the setting of the tolerance)
!*/
void clear_dictionary (
......
......@@ -253,8 +253,8 @@ namespace dlib
if (samples_seen > 0)
{
temp.b = squared_norm();
temp.support_vectors.set_size(1);
temp.support_vectors(0) = w;
temp.basis_vectors.set_size(1);
temp.basis_vectors(0) = w;
temp.alpha.set_size(1);
temp.alpha(0) = alpha;
}
......@@ -595,20 +595,20 @@ namespace dlib
if (std::abs(w_extra) > std::numeric_limits<scalar_type>::epsilon())
{
scale = (x_extra/w_extra);
temp.support_vectors.set_size(1);
temp.basis_vectors.set_size(1);
temp.alpha.set_size(1);
temp.support_vectors(0) = w*scale;
temp.basis_vectors(0) = w*scale;
temp.alpha(0) = alpha/scale;
}
else
{
// In this case w_extra is zero. So the only way we can get the same
// thing in the output support vector set is by using two vectors
temp.support_vectors.set_size(2);
// thing in the output basis vector set is by using two vectors
temp.basis_vectors.set_size(2);
temp.alpha.set_size(2);
temp.support_vectors(0) = 2*w;
temp.basis_vectors(0) = 2*w;
temp.alpha(0) = alpha;
temp.support_vectors(1) = w;
temp.basis_vectors(1) = w;
temp.alpha(1) = -alpha;
}
......@@ -883,8 +883,8 @@ namespace dlib
if (samples_seen > 0)
{
temp.b = squared_norm();
temp.support_vectors.set_size(1);
temp.support_vectors(0) = sample_type(w.begin(), w.end());
temp.basis_vectors.set_size(1);
temp.basis_vectors(0) = sample_type(w.begin(), w.end());
temp.alpha.set_size(1);
temp.alpha(0) = alpha;
}
......@@ -1220,22 +1220,22 @@ namespace dlib
if (std::abs(w_extra) > std::numeric_limits<scalar_type>::epsilon())
{
scale = (x_extra/w_extra);
temp.support_vectors.set_size(1);
temp.basis_vectors.set_size(1);
temp.alpha.set_size(1);
temp.support_vectors(0) = sample_type(w.begin(), w.end());
sparse_vector::scale_by(temp.support_vectors(0), scale);
temp.basis_vectors(0) = sample_type(w.begin(), w.end());
sparse_vector::scale_by(temp.basis_vectors(0), scale);
temp.alpha(0) = alpha/scale;
}
else
{
// In this case w_extra is zero. So the only way we can get the same
// thing in the output support vector set is by using two vectors
temp.support_vectors.set_size(2);
// thing in the output basis vector set is by using two vectors
temp.basis_vectors.set_size(2);
temp.alpha.set_size(2);
temp.support_vectors(0) = sample_type(w.begin(), w.end());
sparse_vector::scale_by(temp.support_vectors(0), 2);
temp.basis_vectors(0) = sample_type(w.begin(), w.end());
sparse_vector::scale_by(temp.basis_vectors(0), 2);
temp.alpha(0) = alpha;
temp.support_vectors(1) = sample_type(w.begin(), w.end());
temp.basis_vectors(1) = sample_type(w.begin(), w.end());
temp.alpha(1) = -alpha;
}
......
......@@ -72,9 +72,9 @@ namespace dlib
test in the KRLS algorithm. This is a number which governs how
accurately this object will approximate the decision function it is
learning. Smaller values generally result in a more accurate
estimate while also resulting in a bigger set of support vectors in
estimate while also resulting in a bigger set of dictionary vectors in
the learned decision function. Bigger tolerances values result in a
less accurate decision function but also in less support vectors.
less accurate decision function but also in less dictionary vectors.
!*/
const kernel_type& get_kernel (
......@@ -98,7 +98,7 @@ namespace dlib
/*!
ensures
- clears out all learned data
(e.g. #get_decision_function().support_vectors.size() == 0)
(e.g. #get_decision_function().basis_vectors.size() == 0)
!*/
scalar_type operator() (
......@@ -135,8 +135,8 @@ namespace dlib
) const;
/*!
ensures
- returns the number of "support vectors" in the dictionary. That is,
returns a number equal to get_decision_function().support_vectors.size()
- returns the number of vectors in the dictionary. That is,
returns a number equal to get_decision_function().basis_vectors.size()
!*/
decision_function<kernel_type> get_decision_function (
......
......@@ -248,7 +248,7 @@ namespace dlib
) const
{
distance_function<offset_kernel<kernel_type> > df = w.get_distance_function();
return decision_function<kernel_type>(df.alpha, -tau*sum(df.alpha), kernel, df.support_vectors);
return decision_function<kernel_type>(df.alpha, -tau*sum(df.alpha), kernel, df.basis_vectors);
}
void swap (
......@@ -572,7 +572,7 @@ namespace dlib
{
decision_function<kernel_type> df = my_trainer.get_decision_function();
std::cout << "\rbatch_trainer(): Percent complete: 100 " << std::endl;
std::cout << " Num sv: " << df.support_vectors.size() << std::endl;
std::cout << " Num sv: " << df.basis_vectors.size() << std::endl;
std::cout << " bias: " << df.b << std::endl;
return df;
}
......@@ -632,14 +632,14 @@ namespace dlib
cached_df = my_trainer.get_decision_function();
std::cout << "\rbatch_trainer(): Percent complete: 100 " << std::endl;
std::cout << " Num sv: " << cached_df.support_vectors.size() << std::endl;
std::cout << " Num sv: " << cached_df.basis_vectors.size() << std::endl;
std::cout << " bias: " << cached_df.b << std::endl;
return decision_function<kernel_type> (
cached_df.alpha,
cached_df.b,
trainer.get_kernel(),
rowm(x, cached_df.support_vectors)
rowm(x, cached_df.basis_vectors)
);
}
else
......@@ -651,7 +651,7 @@ namespace dlib
cached_df.alpha,
cached_df.b,
trainer.get_kernel(),
rowm(x, cached_df.support_vectors)
rowm(x, cached_df.basis_vectors)
);
}
}
......
......@@ -75,7 +75,7 @@ namespace dlib
) const;
/*!
ensures
- returns the maximum number of centers (a.k.a. support_vectors in the
- returns the maximum number of centers (a.k.a. basis_vectors in the
trained decision_function) you will get when you train this object on data.
!*/
......
......@@ -117,7 +117,7 @@ namespace dlib
{
for (long c = 0; c < K.nc(); ++c)
{
K(r,c) = kernel(lisf[r], dec_funct.support_vectors(c));
K(r,c) = kernel(lisf[r], dec_funct.basis_vectors(c));
}
}
......@@ -255,7 +255,7 @@ namespace dlib
for (long j = 0; j < dec_funct.alpha.size(); ++j)
{
bias += dec_funct.alpha(i)*dec_funct.alpha(j)*
k(dec_funct.support_vectors(i), dec_funct.support_vectors(j));
k(dec_funct.basis_vectors(i), dec_funct.basis_vectors(j));
}
}
}
......@@ -332,9 +332,9 @@ namespace dlib
double temp = 0;
for (long i = 0; i < out_vectors.size(); ++i)
{
for (long j = 0; j < dec_funct.support_vectors.nr(); ++j)
for (long j = 0; j < dec_funct.basis_vectors.nr(); ++j)
{
temp -= b(i)*dec_funct.alpha(j)*k(out_vectors(i), dec_funct.support_vectors(j));
temp -= b(i)*dec_funct.alpha(j)*k(out_vectors(i), dec_funct.basis_vectors(j));
}
}
......@@ -436,9 +436,9 @@ namespace dlib
}
for (long i = 0; i < out_vectors.size(); ++i)
{
for (long j = 0; j < dec_funct.support_vectors.size(); ++j)
for (long j = 0; j < dec_funct.basis_vectors.size(); ++j)
{
res(i) -= dec_funct.alpha(j)*k(out_vectors(i), dec_funct.support_vectors(j));
res(i) -= dec_funct.alpha(j)*k(out_vectors(i), dec_funct.basis_vectors(j));
}
}
......@@ -454,9 +454,9 @@ namespace dlib
{
temp += b(j)*K_der(out_vectors(j), out_vectors(i));
}
for (long j = 0; j < dec_funct.support_vectors.nr(); ++j)
for (long j = 0; j < dec_funct.basis_vectors.nr(); ++j)
{
temp -= dec_funct.alpha(j)*K_der(dec_funct.support_vectors(j), out_vectors(i) );
temp -= dec_funct.alpha(j)*K_der(dec_funct.basis_vectors(j), out_vectors(i) );
}
// store the gradient for out_vectors[i] into result in the proper spot
......@@ -529,7 +529,7 @@ namespace dlib
{
for (long c = 0; c < K.nc(); ++c)
{
K(r,c) = kernel(lisf[r], dec_funct.support_vectors(c));
K(r,c) = kernel(lisf[r], dec_funct.basis_vectors(c));
}
}
......@@ -556,7 +556,7 @@ namespace dlib
// Do a final reoptimization of beta just to make sure it is optimal given the new
// set of support vectors.
// set of basis vectors.
for (long r = 0; r < K_inv.nr(); ++r)
{
for (long c = 0; c < K_inv.nc(); ++c)
......@@ -569,7 +569,7 @@ namespace dlib
{
for (long c = 0; c < K.nc(); ++c)
{
K(r,c) = kernel(out_vectors(r), dec_funct.support_vectors(c));
K(r,c) = kernel(out_vectors(r), dec_funct.basis_vectors(c));
}
}
......
......@@ -378,9 +378,9 @@ namespace
DLIB_TEST_MSG(mean(peg_cv) > 0.9, peg_cv);
DLIB_TEST_MSG(mean(peg_c_cv) > 0.9, peg_c_cv);
const long num_sv = trainer.train(x,y).support_vectors.size();
const long num_sv = trainer.train(x,y).basis_vectors.size();
print_spinner();
const long num_rv = rvm_trainer.train(x,y).support_vectors.size();
const long num_rv = rvm_trainer.train(x,y).basis_vectors.size();
print_spinner();
dlog << LDEBUG << "num sv: " << num_sv;
dlog << LDEBUG << "num rv: " << num_rv;
......@@ -394,14 +394,14 @@ namespace
matrix<scalar_type> svm_reduced_error = test_binary_decision_function(df, x, y);
print_spinner();
dlog << LDEBUG << "svm reduced test error: " << svm_reduced_error;
dlog << LDEBUG << "svm reduced num sv: " << df.support_vectors.size();
dlog << LDEBUG << "svm reduced num sv: " << df.basis_vectors.size();
DLIB_TEST(mean(svm_reduced_error) > 0.9);
svm_cv = cross_validate_trainer(reduced(trainer,30), x,y, 4);
dlog << LDEBUG << "svm reduced cv: " << svm_cv;
DLIB_TEST_MSG(mean(svm_cv) > 0.9, svm_cv);
DLIB_TEST(df.support_vectors.size() == 19);
DLIB_TEST(df.basis_vectors.size() == 19);
dlog << LINFO << " end test_binary_classification()";
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment