Commit 62876d4c authored by Davis King's avatar Davis King

Polished the empirical_kernel_map some more.

--HG--
extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403303
parent 8dbe2979
......@@ -31,22 +31,24 @@ namespace dlib
void load(
const kernel_type& kernel_,
const std::vector<sample_type>& samples
const std::vector<sample_type>& basis_samples
)
{
load(kernel_, vector_to_matrix(samples));
load(kernel_, vector_to_matrix(basis_samples));
}
template <typename EXP>
void load(
const kernel_type& kernel_,
const matrix_exp<EXP>& samples
const matrix_exp<EXP>& basis_samples
)
{
// make sure requires clause is not broken
DLIB_ASSERT(samples.size() > 0,
"\tvoid empirical_kernel_map::load(kernel,samples)"
<< "\n\t You have to give a non-empty set of samples"
DLIB_ASSERT(basis_samples.size() > 0 && is_vector(basis_samples),
"\tvoid empirical_kernel_map::load(kernel,basis_samples)"
<< "\n\t You have to give a non-empty set of basis_samples and it must be a vector"
<< "\n\t basis_samples.size(): " << basis_samples.size()
<< "\n\t is_vector(basis_samples): " << is_vector(basis_samples)
<< "\n\t this: " << this
);
......@@ -55,24 +57,24 @@ namespace dlib
weights.set_size(0,0);
kernel = kernel_;
basis.clear();
basis.reserve(samples.size());
basis.reserve(basis_samples.size());
// find out the value of the largest norm of the elements in samples.
const scalar_type max_norm = max(diag(kernel_matrix(kernel, samples)));
// find out the value of the largest norm of the elements in basis_samples.
const scalar_type max_norm = max(diag(kernel_matrix(kernel, basis_samples)));
// Copy all the samples into basis but make sure we don't copy any samples
// Copy all the basis_samples into basis but make sure we don't copy any samples
// that have length 0
for (long i = 0; i < samples.size(); ++i)
for (long i = 0; i < basis_samples.size(); ++i)
{
const scalar_type norm = kernel(samples(i), samples(i));
const scalar_type norm = kernel(basis_samples(i), basis_samples(i));
if (norm > max_norm*std::numeric_limits<scalar_type>::epsilon())
{
basis.push_back(samples(i));
basis.push_back(basis_samples(i));
}
}
if (basis.size() == 0)
throw empirical_kernel_map_error("All samples given to empirical_kernel_map::load() were zero vectors");
throw empirical_kernel_map_error("All basis_samples given to empirical_kernel_map::load() were zero vectors");
matrix<scalar_type,0,0,mem_manager_type> K(kernel_matrix(kernel, basis)), U,W,V;
......@@ -103,6 +105,7 @@ namespace dlib
++counter;
}
}
}
const kernel_type get_kernel (
......@@ -124,12 +127,48 @@ namespace dlib
return weights.nr();
}
template <typename EXP>
const decision_function<kernel_type> convert_to_decision_function (
const matrix<EXP>& vect
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(out_vector_size() != 0 && is_vector(vect) && out_vector_size() == vect.size(),
"\t const decision_function empirical_kernel_map::convert_to_decision_function()"
<< "\n\t Invalid inputs to this function."
<< "\n\t out_vector_size(): " << out_vector_size()
<< "\n\t is_vector(vect): " << is_vector(vect)
<< "\n\t vect.size(): " << vect.size()
<< "\n\t this: " << this
);
return decision_function<kernel_type>(trans(weights)*vect, 0, kernel, vector_to_matrix(basis));
}
template <typename EXP>
const distance_function<kernel_type> convert_to_distance_function (
const matrix<EXP>& vect
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(out_vector_size() != 0 && is_vector(vect) && out_vector_size() == vect.size(),
"\t const distance_function empirical_kernel_map::convert_to_distance_function()"
<< "\n\t Invalid inputs to this function."
<< "\n\t out_vector_size(): " << out_vector_size()
<< "\n\t is_vector(vect): " << is_vector(vect)
<< "\n\t vect.size(): " << vect.size()
<< "\n\t this: " << this
);
return distance_function<kernel_type>(trans(weights)*vect, dot(vect,vect), kernel, vector_to_matrix(basis));
}
const matrix<scalar_type,0,1,mem_manager_type>& project (
const sample_type& samp
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(out_vector_size() > 0,
DLIB_ASSERT(out_vector_size() != 0,
"\tconst matrix empirical_kernel_map::project()"
<< "\n\t You have to load this object with data before you can call this function"
<< "\n\t this: " << this
......
......@@ -26,8 +26,30 @@ namespace dlib
- out_vector_size() == 0
WHAT THIS OBJECT REPRESENTS
TODO
This object represents a map from objects of sample_type (the kind of object
the kernel function operates on) to finite dimensional column vectors which
represent points in the kernel feature space defined by whatever kernel
is used with this object.
More precisely, to use this object you supply it with a particular kernel and
a set of basis samples. After that you can present it with new samples and it
will project them into the part of kernel feature space spanned by your basis
samples.
This means the empirical_kernel_map is a tool you can use to very easily kernelize
any algorithm that operates on column vectors. All you have to do is select a
set of basis samples and then use the empirical_kernel_map to project all your
data points into the part of kernel feature space spanned by those basis samples.
Then just run your normal algorithm on the output vectors and it will be effectively
kernelized.
Regarding methods to select a set of basis samples, if you are working with only a
few thousand samples then you can just use all of them as basis samples.
Alternatively, the linearly_independent_subset_finder often works well for
selecting a basis set. Some people also find that picking a random subset
works fine.
!*/
public:
typedef kern_type kernel_type;
......@@ -44,15 +66,22 @@ namespace dlib
template <typename EXP>
void load(
const kernel_type& kernel,
const matrix_exp<EXP>& samples
const matrix_exp<EXP>& basis_samples
);
/*!
requires
- samples.size() > 0
- is_vector(basis_samples) == true
- basis_samples.size() > 0
- kernel must be capable of operating on the elements of basis_samples. That is,
expressions such as kernel(basis_samples(0), basis_samples(0)) should make sense.
ensures
- 0 < #out_vector_size() <= samples.size()
- 0 < #out_vector_size() <= basis_samples.size()
- #get_kernel() == kernel
- TODO
- This function constructs a map between normal sample_type objects and the
subspace of the kernel feature space defined by the given kernel and the
given set of basis samples. So after this function has been called you
will be able to project sample_type objects into kernel feature space
and obtain the resulting vector as a normal column matrix.
throws
- empirical_kernel_map_error
This exception is thrown if we are unable to create a kernel map.
......@@ -61,15 +90,15 @@ namespace dlib
void load(
const kernel_type& kernel,
const std::vector<sample_type>& samples
const std::vector<sample_type>& basis_samples
);
/*!
requires
- samples.size() > 0
- basis_samples.size() > 0
ensures
- performs load(kernel,vector_to_matrix(samples)). I.e. This function
- performs load(kernel,vector_to_matrix(basis_samples)). I.e. This function
does the exact same thing as the above load() function but lets you use
a std::vector of samples in addition to a row/column matrix of samples.
a std::vector of basis samples in addition to a row/column matrix of basis samples.
!*/
const kernel_type get_kernel (
......@@ -99,9 +128,63 @@ namespace dlib
requires
- out_vector_size() != 0
ensures
- takes the given sample and maps it into the kernel feature space
- takes the given sample and projects it into the kernel feature space
of out_vector_size() dimensions defined by this kernel map and
returns the resulting vector.
- in more precise terms, this function returns a vector V such that:
- V.size() == out_vector_size()
- for any sample_type object S, the following equality is approximately true:
- get_kernel()(sample,S) == dot(V, project(S)).
- The approximation error in the above equality will be zero (within rounding error)
if both sample_type objects involved are within the span of the set of basis
samples given to the load() function. If they are not then there will be some
approximation error. Note that all the basis samples are always within their
own span. So the equality is always exact for the samples given to the load()
function.
!*/
template <typename EXP>
const decision_function<kernel_type> convert_to_decision_function (
const matrix<EXP>& vect
) const;
/*!
requires
- is_vector(vect) == true
- vect.size() == out_vector_size()
- out_vector_size() != 0
ensures
- This function interprets the given vector as a point in the kernel feature space defined
by this empirical_kernel_map. The return value of this function is a decision
function, DF, that represents the given vector in the following sense:
- for all possible sample_type objects, S, it is the case that DF(S) == dot(project(S), vect)
(i.e. the returned decision function computes dot products, in kernel feature space,
between vect and any argument you give it. )
- DF.kernel_function == get_kernel()
- DF.b == 0
- DF.basis_vectors == these will be the basis samples given to the previous call to load(). Note
that it is possible for there to be fewer basis_vectors than basis samples given to load().
!*/
template <typename EXP>
const distance_function<kernel_type> convert_to_distance_function (
const matrix<EXP>& vect
) const
/*!
requires
- is_vector(vect) == true
- vect.size() == out_vector_size()
- out_vector_size() != 0
ensures
- This function interprets the given vector as a point in the kernel feature space defined
by this empirical_kernel_map. The return value of this function is a distance
function, DF, that represents the given vector in the following sense:
- for all possible sample_type objects, S, it is the case that DF(S) == length(project(S) - vect)
(i.e. the returned distance function computes distances, in kernel feature space,
between vect and any argument you give it. )
- DF.kernel_function == get_kernel()
- DF.b == dot(vect,vect)
- DF.basis_vectors == these will be the basis samples given to the previous call to load(). Note
that it is possible for there to be fewer basis_vectors than basis samples given to load().
!*/
void swap (
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment