Added a test for the linear_manifold_regularizer

--HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403636

Added a test for the linear_manifold_regularizer
--HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403636
3d559172 · Davis King · 0d775acb · 3d559172 · 3d559172 · 3d559172
Commit 3d559172 authored May 16, 2010 by Davis King
Show whitespace changes
Inline Side-by-side

Showing with 214 additions and 0 deletions

CMakeLists.txt dlib/test/CMakeLists.txt +1 -0

linear_manifold_regularizer.cpp dlib/test/linear_manifold_regularizer.cpp +212 -0

makefile dlib/test/makefile +1 -0

No files found.
--- a/dlib/test/CMakeLists.txt
+++ b/dlib/test/CMakeLists.txt
@@ -44,6 +44,7 @@ set (tests
   image.cpp
   kcentroid.cpp
   kernel_matrix.cpp
+   linear_manifold_regularizer.cpp
   lz77_buffer.cpp
   map.cpp
   matrix2.cpp

--- a/dlib/test/linear_manifold_regularizer.cpp
+++ b/dlib/test/linear_manifold_regularizer.cpp
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#include "tester.h"
+#include <dlib/manifold_regularization.h>
+#include <dlib/svm.h>
+#include <dlib/rand.h>
+#include <dlib/string.h>
+#include <vector>
+#include <sstream>
+#include <ctime>
+namespace  
+{
+    using namespace test;
+    using namespace dlib;
+    using namespace std;
+    dlib::logger dlog("test.linear_manifold_regularizer");
+    class linear_manifold_regularizer_tester : public tester
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object represents a unit test.  When it is constructed
+                it adds itself into the testing framework.
+        !*/
+    public:
+        linear_manifold_regularizer_tester (
+        ) :
+            tester (
+                "test_linear_manifold_regularizer",       // the command line argument name for this test
+                "Run tests on the linear_manifold_regularizer object.", // the command line argument description
+                0                     // the number of command line arguments for this test
+            )
+        {
+        }
+        dlib::rand::float_1a rnd;
+        typedef matrix<double, 0, 1> sample_type;
+        typedef radial_basis_kernel<sample_type> kernel_type;
+        void do_the_test()
+        {
+            print_spinner();
+            std::vector<sample_type> samples;
+            // Declare an instance of the kernel we will be using.  
+            const kernel_type kern(0.1);
+            const unsigned long num_points = 200;
+            // create a large dataset with two concentric circles.  There will be 100000 points on each circle
+            // for a total of 200000 samples.
+            generate_circle(samples, 1, num_points);  // circle of radius 1
+            generate_circle(samples, 5, num_points);  // circle of radius 5
+            // Create a set of sample_pairs that tells us which samples are "close" and should thus 
+            // be classified similarly.  These edges will be used to define the manifold regularizer.
+            // To find these edges we use a simple function that samples point pairs randomly and 
+            // returns the top 5% with the shortest edges.
+            std::vector<sample_pair> edges;
+            find_k_nearest_neighbors(samples, squared_euclidean_distance(), 2, edges);
+            dlog << LTRACE << "number of edges generated: " << edges.size();
+            empirical_kernel_map<kernel_type> ekm;
+            // Since the circles are not linearly separable we will use an empirical kernel map to 
+            // map them into a space where they are separable.  So we create an empirical_kernel_map 
+            // using a random subset of our data samples as basis samples.  Note, however, that even
+            // though the circles are linearly separable in this new space given by the empirical_kernel_map
+            // we still won't be able to correctly classify all the points given just the 2 labeled examples.
+            // We will need to make use of the nearest neighbor information stored in edges.  To do that
+            // we will use the linear_manifold_regularizer next.
+            ekm.load(kern, randomly_subsample(samples, 100));
+            // Project all the samples into the span of our 50 basis samples
+            for (unsigned long i = 0; i < samples.size(); ++i)
+                samples[i] = ekm.project(samples[i]);
+            // Now create the manifold regularizer.   The result is a transformation matrix that
+            // embodies the manifold assumption discussed above. 
+            linear_manifold_regularizer<sample_type> lmr;
+            lmr.build(samples, edges, use_gaussian_weights(0.1));
+            matrix<double> T = lmr.get_transformation_matrix(10000);
+            print_spinner();
+            // generate the T matrix manually and make sure it matches.  The point of this test
+            // is to make sure that the more complex version of this that happens inside the linear_manifold_regularizer
+            // is correct.  It uses a tedious block of loops to do it in a way that is a lot faster for sparse
+            // W matrices but isn't super straight forward.  
+            matrix<double> X(samples[0].size(), samples.size());
+            for (unsigned long i = 0; i < samples.size(); ++i)
+                set_colm(X,i) = samples[i];
+            matrix<double> W(samples.size(), samples.size());
+            W = 0;
+            for (unsigned long i = 0; i < edges.size(); ++i)
+            {
+                W(edges[i].index1(), edges[i].index2()) = use_gaussian_weights(0.1)(edges[i]);
+                W(edges[i].index2(), edges[i].index1()) = use_gaussian_weights(0.1)(edges[i]);
+            }
+            matrix<double> L = diagm(sum_rows(W)) - W;
+            matrix<double> trueT = inv_lower_triangular(chol(identity_matrix<double>(X.nr()) + 10000.0/edges.size()*X*L*trans(X)));
+            dlog << LTRACE << "T error: "<< max(abs(T - trueT));
+            DLIB_TEST(max(abs(T - trueT)) < 1e-7);
+            print_spinner();
+            // Apply the transformation generated by the linear_manifold_regularizer to 
+            // all our samples.
+            for (unsigned long i = 0; i < samples.size(); ++i)
+                samples[i] = T*samples[i];
+            // For convenience, generate a projection_function and merge the transformation
+            // matrix T into it.  
+            projection_function<kernel_type> proj = ekm.get_projection_function();
+            proj.weights = T*proj.weights;
+            // Pick 2 different labeled points.  One on the inner circle and another on the outer.  
+            // For each of these test points we will see if using the single plane that separates
+            // them is a good way to separate the concentric circles.  Also do this a bunch 
+            // of times with different randomly chosen points so we can see how robust the result is.
+            for (int itr = 0; itr < 10; ++itr)
+            {
+                print_spinner();
+                std::vector<sample_type> test_points;
+                // generate a random point from the radius 1 circle
+                generate_circle(test_points, 1, 1);
+                // generate a random point from the radius 5 circle
+                generate_circle(test_points, 5, 1);
+                // project the two test points into kernel space.  Recall that this projection_function
+                // has the manifold regularizer incorporated into it.  
+                const sample_type class1_point = proj(test_points[0]);
+                const sample_type class2_point = proj(test_points[1]);
+                double num_wrong = 0;
+                // Now attempt to classify all the data samples according to which point
+                // they are closest to.  The output of this program shows that without manifold 
+                // regularization this test will fail but with it it will perfectly classify
+                // all the points.
+                for (unsigned long i = 0; i < samples.size(); ++i)
+                {
+                    double distance_to_class1 = length(samples[i] - class1_point);
+                    double distance_to_class2 = length(samples[i] - class2_point);
+                    bool predicted_as_class_1 = (distance_to_class1 < distance_to_class2);
+                    bool really_is_class_1 = (i < num_points);
+                    // now count how many times we make a mistake
+                    if (predicted_as_class_1 != really_is_class_1)
+                        ++num_wrong;
+                }
+                DLIB_TEST_MSG(num_wrong == 0, num_wrong);
+            }
+        }
+        void generate_circle (
+            std::vector<sample_type>& samples,
+            double radius,
+            const long num
+        )
+        {
+            sample_type m(2,1);
+            for (long i = 0; i < num; ++i)
+            {
+                double sign = 1;
+                if (rnd.get_random_double() < 0.5)
+                    sign = -1;
+                m(0) = 2*radius*rnd.get_random_double()-radius;
+                m(1) = sign*sqrt(radius*radius - m(0)*m(0));
+                samples.push_back(m);
+            }
+        }
+        void perform_test (
+        )
+        {
+            for (int i = 0; i < 5; ++i)
+            {
+                do_the_test();
+            }
+        }
+    };
+    // Create an instance of this object.  Doing this causes this test
+    // to be automatically inserted into the testing framework whenever this cpp file
+    // is linked into the project.  Note that since we are inside an unnamed-namespace 
+    // we won't get any linker errors about the symbol a being defined multiple times. 
+    linear_manifold_regularizer_tester a;
+}
--- a/dlib/test/makefile
+++ b/dlib/test/makefile
@@ -54,6 +54,7 @@ SRC += hash_table.cpp
 SRC += image.cpp
 SRC += kcentroid.cpp
 SRC += kernel_matrix.cpp
+SRC += linear_manifold_regularizer.cpp
 SRC += lz77_buffer.cpp
 SRC += map.cpp
 SRC += matrix2.cpp