Commit f2de334c authored by Davis King's avatar Davis King

Added some functions to the kcentroid to allow the user to

compute the inner_product() of kcentroids as well as a few
other useful things.

--HG--
extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402885
parent 3846bc63
......@@ -41,7 +41,9 @@ namespace dlib
) :
kernel(kernel_),
my_tolerance(tolerance_),
my_max_dictionary_size(max_dictionary_size_)
my_max_dictionary_size(max_dictionary_size_),
bias(0),
bias_is_stale(false)
{
// make sure requires clause is not broken
DLIB_ASSERT(tolerance_ >= 0,
......@@ -97,6 +99,28 @@ namespace dlib
refresh_bias();
x.refresh_bias();
scalar_type temp = x.bias + bias - 2*inner_product(x);
if (temp > 0)
return std::sqrt(temp);
else
return 0;
}
scalar_type inner_product (
const sample_type& x
) const
{
scalar_type temp = 0;
for (unsigned long i = 0; i < alpha.size(); ++i)
temp += alpha[i]*kernel(dictionary[i], x);
return temp;
}
scalar_type inner_product (
const kcentroid& x
) const
{
scalar_type temp = 0;
for (unsigned long i = 0; i < alpha.size(); ++i)
{
......@@ -105,12 +129,14 @@ namespace dlib
temp += alpha[i]*x.alpha[j]*kernel(dictionary[i], x.dictionary[j]);
}
}
return temp;
}
temp = x.bias + bias - 2*temp;
if (temp > 0)
return std::sqrt(temp);
else
return 0;
scalar_type squared_norm (
) const
{
refresh_bias();
return bias;
}
scalar_type operator() (
......@@ -120,12 +146,9 @@ namespace dlib
// make sure the bias terms are up to date
refresh_bias();
scalar_type temp = 0;
const scalar_type kxx = kernel(x,x);
for (unsigned long i = 0; i < alpha.size(); ++i)
temp += alpha[i]*kernel(dictionary[i], x);
temp = kxx + bias - 2*temp;
scalar_type temp = kxx + bias - 2*inner_product(x);
if (temp > 0)
return std::sqrt(temp);
else
......@@ -168,6 +191,16 @@ namespace dlib
return train_and_maybe_test(x,cscale,xscale,true);
}
void scale_by (
scalar_type cscale
)
{
for (unsigned long i = 0; i < alpha.size(); ++i)
{
alpha[i] = cscale*alpha[i];
}
}
void train (
const sample_type& x,
scalar_type cscale,
......
......@@ -24,20 +24,29 @@ namespace dlib
- samples_trained() == 0
WHAT THIS OBJECT REPRESENTS
This is an implementation of an online algorithm for recursively estimating the
centroid of a sequence of training points. It uses the sparsification technique
described in the paper The Kernel Recursive Least Squares Algorithm by Yaakov Engel.
This object then allows you to compute the distance between the centroid
and any test points. So you can use this object to predict how similar a test
point is to the data this object has been trained on (larger distances from the
centroid indicate dissimilarity/anomalous points).
This object represents a weighted sum of sample points in a kernel induced
feature space. It can be used to kernelized any algorithm that requires only
the ability to perform vector addition, subtraction, scalar multiplication,
and inner products.
An example use of this object is as an online algorithm for recursively estimating
the centroid of a sequence of training points. This object then allows you to
compute the distance between the centroid and any test points. So you can use
this object to predict how similar a test point is to the data this object has
been trained on (larger distances from the centroid indicate dissimilarity/anomalous
points).
Also note that the algorithm internally keeps a set of "dictionary vectors"
that are used to represent the centroid. You can force the algorithm to use
no more than a set number of vectors by setting the 3rd constructor argument
to whatever you want. However, note that doing this causes the algorithm
to bias it's results towards more recent training examples.
This object also uses the sparsification technique described in the paper The
Kernel Recursive Least Squares Algorithm by Yaakov Engel. This technique
allows us to keep the number of dictionary vectors down to a minimum. In fact,
the object has a user selectable tolerance parameter that controls the trade off
between accuracy and number of stored dictionary vectors.
!*/
public:
......@@ -77,6 +86,13 @@ namespace dlib
greater than max_dictionary_size().
!*/
unsigned long dictionary_size (
) const;
/*!
ensures
- returns the number of "support vectors" in the dictionary.
!*/
scalar_type samples_trained (
) const;
/*!
......@@ -126,6 +142,32 @@ namespace dlib
to this object so far.
!*/
scalar_type inner_product (
const sample_type& x
) const;
/*!
ensures
- returns the inner product of the given x point and the current
estimate of the centroid of the training samples given to this object
so far.
!*/
scalar_type inner_product (
const kcentroid& x
) const;
/*!
ensures
- returns the inner product between x and this centroid object.
!*/
scalar_type squared_norm (
) const;
/*!
ensures
- returns the squared norm of the centroid vector represented by this
object. I.e. returns this->inner_product(*this)
!*/
void train (
const sample_type& x
);
......@@ -151,6 +193,17 @@ namespace dlib
you want.
!*/
void scale_by (
scalar_type cscale
);
/*!
ensures
- multiplies the current centroid vector by the given scale value.
This function is equivalent to calling train(some_x_value, cscale, 0).
So it performs:
- new_centroid == cscale*old_centroid
!*/
scalar_type test_and_train (
const sample_type& x
);
......@@ -185,13 +238,6 @@ namespace dlib
- swaps *this with item
!*/
unsigned long dictionary_size (
) const;
/*!
ensures
- returns the number of "support vectors" in the dictionary.
!*/
distance_function<kernel_type> get_distance_function (
) const;
/*!
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment