Commit 3d559172 authored by Davis King's avatar Davis King

Added a test for the linear_manifold_regularizer

--HG--
extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403636
parent 0d775acb
...@@ -44,6 +44,7 @@ set (tests ...@@ -44,6 +44,7 @@ set (tests
image.cpp image.cpp
kcentroid.cpp kcentroid.cpp
kernel_matrix.cpp kernel_matrix.cpp
linear_manifold_regularizer.cpp
lz77_buffer.cpp lz77_buffer.cpp
map.cpp map.cpp
matrix2.cpp matrix2.cpp
......
// Copyright (C) 2010 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#include "tester.h"
#include <dlib/manifold_regularization.h>
#include <dlib/svm.h>
#include <dlib/rand.h>
#include <dlib/string.h>
#include <vector>
#include <sstream>
#include <ctime>
namespace
{
using namespace test;
using namespace dlib;
using namespace std;
dlib::logger dlog("test.linear_manifold_regularizer");
class linear_manifold_regularizer_tester : public tester
{
/*!
WHAT THIS OBJECT REPRESENTS
This object represents a unit test. When it is constructed
it adds itself into the testing framework.
!*/
public:
linear_manifold_regularizer_tester (
) :
tester (
"test_linear_manifold_regularizer", // the command line argument name for this test
"Run tests on the linear_manifold_regularizer object.", // the command line argument description
0 // the number of command line arguments for this test
)
{
}
dlib::rand::float_1a rnd;
typedef matrix<double, 0, 1> sample_type;
typedef radial_basis_kernel<sample_type> kernel_type;
void do_the_test()
{
print_spinner();
std::vector<sample_type> samples;
// Declare an instance of the kernel we will be using.
const kernel_type kern(0.1);
const unsigned long num_points = 200;
// create a large dataset with two concentric circles. There will be 100000 points on each circle
// for a total of 200000 samples.
generate_circle(samples, 1, num_points); // circle of radius 1
generate_circle(samples, 5, num_points); // circle of radius 5
// Create a set of sample_pairs that tells us which samples are "close" and should thus
// be classified similarly. These edges will be used to define the manifold regularizer.
// To find these edges we use a simple function that samples point pairs randomly and
// returns the top 5% with the shortest edges.
std::vector<sample_pair> edges;
find_k_nearest_neighbors(samples, squared_euclidean_distance(), 2, edges);
dlog << LTRACE << "number of edges generated: " << edges.size();
empirical_kernel_map<kernel_type> ekm;
// Since the circles are not linearly separable we will use an empirical kernel map to
// map them into a space where they are separable. So we create an empirical_kernel_map
// using a random subset of our data samples as basis samples. Note, however, that even
// though the circles are linearly separable in this new space given by the empirical_kernel_map
// we still won't be able to correctly classify all the points given just the 2 labeled examples.
// We will need to make use of the nearest neighbor information stored in edges. To do that
// we will use the linear_manifold_regularizer next.
ekm.load(kern, randomly_subsample(samples, 100));
// Project all the samples into the span of our 50 basis samples
for (unsigned long i = 0; i < samples.size(); ++i)
samples[i] = ekm.project(samples[i]);
// Now create the manifold regularizer. The result is a transformation matrix that
// embodies the manifold assumption discussed above.
linear_manifold_regularizer<sample_type> lmr;
lmr.build(samples, edges, use_gaussian_weights(0.1));
matrix<double> T = lmr.get_transformation_matrix(10000);
print_spinner();
// generate the T matrix manually and make sure it matches. The point of this test
// is to make sure that the more complex version of this that happens inside the linear_manifold_regularizer
// is correct. It uses a tedious block of loops to do it in a way that is a lot faster for sparse
// W matrices but isn't super straight forward.
matrix<double> X(samples[0].size(), samples.size());
for (unsigned long i = 0; i < samples.size(); ++i)
set_colm(X,i) = samples[i];
matrix<double> W(samples.size(), samples.size());
W = 0;
for (unsigned long i = 0; i < edges.size(); ++i)
{
W(edges[i].index1(), edges[i].index2()) = use_gaussian_weights(0.1)(edges[i]);
W(edges[i].index2(), edges[i].index1()) = use_gaussian_weights(0.1)(edges[i]);
}
matrix<double> L = diagm(sum_rows(W)) - W;
matrix<double> trueT = inv_lower_triangular(chol(identity_matrix<double>(X.nr()) + 10000.0/edges.size()*X*L*trans(X)));
dlog << LTRACE << "T error: "<< max(abs(T - trueT));
DLIB_TEST(max(abs(T - trueT)) < 1e-7);
print_spinner();
// Apply the transformation generated by the linear_manifold_regularizer to
// all our samples.
for (unsigned long i = 0; i < samples.size(); ++i)
samples[i] = T*samples[i];
// For convenience, generate a projection_function and merge the transformation
// matrix T into it.
projection_function<kernel_type> proj = ekm.get_projection_function();
proj.weights = T*proj.weights;
// Pick 2 different labeled points. One on the inner circle and another on the outer.
// For each of these test points we will see if using the single plane that separates
// them is a good way to separate the concentric circles. Also do this a bunch
// of times with different randomly chosen points so we can see how robust the result is.
for (int itr = 0; itr < 10; ++itr)
{
print_spinner();
std::vector<sample_type> test_points;
// generate a random point from the radius 1 circle
generate_circle(test_points, 1, 1);
// generate a random point from the radius 5 circle
generate_circle(test_points, 5, 1);
// project the two test points into kernel space. Recall that this projection_function
// has the manifold regularizer incorporated into it.
const sample_type class1_point = proj(test_points[0]);
const sample_type class2_point = proj(test_points[1]);
double num_wrong = 0;
// Now attempt to classify all the data samples according to which point
// they are closest to. The output of this program shows that without manifold
// regularization this test will fail but with it it will perfectly classify
// all the points.
for (unsigned long i = 0; i < samples.size(); ++i)
{
double distance_to_class1 = length(samples[i] - class1_point);
double distance_to_class2 = length(samples[i] - class2_point);
bool predicted_as_class_1 = (distance_to_class1 < distance_to_class2);
bool really_is_class_1 = (i < num_points);
// now count how many times we make a mistake
if (predicted_as_class_1 != really_is_class_1)
++num_wrong;
}
DLIB_TEST_MSG(num_wrong == 0, num_wrong);
}
}
void generate_circle (
std::vector<sample_type>& samples,
double radius,
const long num
)
{
sample_type m(2,1);
for (long i = 0; i < num; ++i)
{
double sign = 1;
if (rnd.get_random_double() < 0.5)
sign = -1;
m(0) = 2*radius*rnd.get_random_double()-radius;
m(1) = sign*sqrt(radius*radius - m(0)*m(0));
samples.push_back(m);
}
}
void perform_test (
)
{
for (int i = 0; i < 5; ++i)
{
do_the_test();
}
}
};
// Create an instance of this object. Doing this causes this test
// to be automatically inserted into the testing framework whenever this cpp file
// is linked into the project. Note that since we are inside an unnamed-namespace
// we won't get any linker errors about the symbol a being defined multiple times.
linear_manifold_regularizer_tester a;
}
...@@ -54,6 +54,7 @@ SRC += hash_table.cpp ...@@ -54,6 +54,7 @@ SRC += hash_table.cpp
SRC += image.cpp SRC += image.cpp
SRC += kcentroid.cpp SRC += kcentroid.cpp
SRC += kernel_matrix.cpp SRC += kernel_matrix.cpp
SRC += linear_manifold_regularizer.cpp
SRC += lz77_buffer.cpp SRC += lz77_buffer.cpp
SRC += map.cpp SRC += map.cpp
SRC += matrix2.cpp SRC += matrix2.cpp
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment