Added Sammon's algorithm.

46c00173 · Davis King · bf2edbec · 46c00173 · 46c00173 · 46c00173
Commit 46c00173 authored Aug 27, 2012 by Davis King
6 changed files
--- a/dlib/statistics.h
+++ b/dlib/statistics.h
@@ -7,6 +7,7 @@
 #include "statistics/dpca.h"
 #include "statistics/random_subset_selector.h"
 #include "statistics/image_feature_sampling.h"
+#include "statistics/sammon.h"

 #endif // DLIB_STATISTICs_H_ 


--- a/dlib/statistics/sammon.h
+++ b/dlib/statistics/sammon.h
--- a/dlib/statistics/sammon_abstract.h
+++ b/dlib/statistics/sammon_abstract.h
+// Copyright (C) 2012  Emanuele Cesena (emanuele.cesena@gmail.com), Davis E. King
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_SAMMoN_ABSTRACT_H__
+#ifdef DLIB_SAMMoN_ABSTRACT_H__
+
+#include "../matrix/matrix_abstract.h"
+#include <vector>
+
+namespace dlib
+{
+
+    class sammon_projection
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This is a function object that computes the Sammon projection of a set
+                of N points in a L-dimensional vector space onto a d-dimensional space
+                (d < L), according to the paper:
+                    A Nonlinear Mapping for Data Structure Analysis (1969) by J.W. Sammon
+
+                The current implementation is a vectorized version of the original algorithm.
+        !*/
+
+    public:
+
+        sammon_projection(
+        );
+        /*!
+            ensures
+                - this object is properly initialized 
+        !*/
+
+        template <typename matrix_type>
+        std::vector<matrix<double,0,1> > operator() (
+            const std::vector<matrix_type>& data,       
+            long num_dims                      
+        );
+        /*!
+            requires
+                - num_dims > 0
+                - matrix_type should be a kind of dlib::matrix of doubles capable
+                  of representing column vectors.
+                - for all valid i:
+                    - is_col_vector(data[i]) == true
+                    - data[0].size() == data[i].size()
+                      (i.e. all the vectors in data must have the same dimensionality)
+                - if (data.size() != 0) then
+                    - 0 < num_dims <= data[0].size()
+                      (i.e. you can't project into a higher dimension than the input data,
+                      only to a lower dimension.)
+            ensures
+                - This routine computes Sammon's dimensionality reduction method based on the
+                  given input data.  It will attempt to project the contents of data into a
+                  num_dims dimensional space that preserves relative distances between the
+                  input data points.
+                - This function returns a std::vector, OUT, such that:
+                    - OUT == a set of column vectors that represent the Sammon's projection of 
+                      the input data vectors. 
+                    - OUT.size() == data.size()
+                    - for all valid i:
+                        - OUT[i].size() == num_dims
+                        - OUT[i] == the Sammon projection of the input vector data[i]
+        !*/
+
+        template <typename matrix_type>
+        void operator() (
+            const std::vector<matrix_type>& data,       
+            long num_dims,                     
+            std::vector<matrix<double,0,1> >& result,   
+            double &err,                                
+            unsigned long num_iters = 1000,             
+            const double err_delta = 1.0e-9            
+        );
+        /*!
+            requires
+                - num_iters > 0
+                - err_delta > 0
+                - num_dims > 0
+                - matrix_type should be a kind of dlib::matrix of doubles capable
+                  of representing column vectors.
+                - for all valid i:
+                    - is_col_vector(data[i]) == true
+                    - data[0].size() == data[i].size()
+                      (i.e. all the vectors in data must have the same dimensionality)
+                - if (data.size() != 0) then
+                    - 0 < num_dims <= data[0].size()
+                      (i.e. you can't project into a higher dimension than the input data,
+                      only to a lower dimension.)
+            ensures
+                - This routine computes Sammon's dimensionality reduction method based on the
+                  given input data.  It will attempt to project the contents of data into a
+                  num_dims dimensional space that preserves relative distances between the
+                  input data points.
+                - #err == the final error value at the end of the algorithm.  The goal of Sammon's
+                  algorithm is to find a lower dimensional projection of the input data that
+                  preserves the relative distances between points.  The value in #err is a measure
+                  of the total error at the end of the algorithm.  So smaller values indicate
+                  a better projection was found than if a large value is returned via #err.
+                - Sammon's algorithm will run until either num_iters iterations has executed
+                  or the change in error from one iteration to the next is less than err_delta.
+                - Upon completion, the output of Sammon's projection is stored into #result, in
+                  particular, we will have:
+                    - #result == a set of column vectors that represent the Sammon's projection of 
+                      the input data vectors. 
+                    - #result.size() == data.size()
+                    - for all valid i:
+                        - #result[i].size() == num_dims
+                        - #result[i] == the Sammon projection of the input vector data[i]
+        !*/
+
+    };
+
+} 
+
+#endif // DLIB_SAMMoN_ABSTRACT_H__
+
+
--- a/dlib/test/CMakeLists.txt
+++ b/dlib/test/CMakeLists.txt
@@ -91,6 +91,7 @@ set (tests
   read_write_mutex.cpp
   reference_counter.cpp
   rls.cpp
+   sammon.cpp
   scan_image.cpp
   sequence.cpp
   sequence_labeler.cpp

--- a/dlib/test/makefile
+++ b/dlib/test/makefile
@@ -106,6 +106,7 @@ SRC += rand.cpp
 SRC += read_write_mutex.cpp
 SRC += reference_counter.cpp
 SRC += rls.cpp
+SRC += sammon.cpp
 SRC += scan_image.cpp
 SRC += sequence.cpp
 SRC += sequence_labeler.cpp

--- a/dlib/test/sammon.cpp
+++ b/dlib/test/sammon.cpp
+// Copyright (C) 2012  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+
+
+#include <sstream>
+#include <string>
+#include <cstdlib>
+#include <ctime>
+#include <cmath>
+#include <dlib/statistics.h>
+
+#include "tester.h"
+
+namespace  
+{
+
+    using namespace test;
+    using namespace dlib;
+    using namespace std;
+
+    logger dlog("test.sammon");
+
+
+    std::vector<matrix<double,4,1> > make_test_data4(
+    )
+    {
+        std::vector<matrix<double,4,1> > data;
+
+        matrix<double,4,1> m;
+
+        m = 0,0,0, 0; data.push_back(m);
+        m = 1,0,0, 0; data.push_back(m);
+        m = 0,1,0, 0; data.push_back(m);
+        m = 0,0,1, 0; data.push_back(m);
+
+        return data;
+    }
+
+    std::vector<matrix<double,3,1> > make_test_data3(
+    )
+    {
+        std::vector<matrix<double,3,1> > data;
+
+        matrix<double,3,1> m;
+
+        m = 0,0,0; data.push_back(m);
+        m = 1,0,0; data.push_back(m);
+        m = 0,1,0; data.push_back(m);
+        m = 0,0,1; data.push_back(m);
+
+        return data;
+    }
+
+    std::vector<matrix<double> > make_test_data3d(
+    )
+    {
+        std::vector<matrix<double> > data;
+
+        matrix<double,3,1> m;
+
+        m = 0,0,0; data.push_back(m);
+        m = 1,0,0; data.push_back(m);
+        m = 0,1,0; data.push_back(m);
+        m = 0,0,1; data.push_back(m);
+
+        return data;
+    }
+
+
+    void runtest()
+    {
+        sammon_projection s;
+        std::vector<matrix<double, 0, 1> >  projs = s(make_test_data3(),2);
+        running_stats<double> rs1, rs2;
+
+        rs1.add(length(projs[0] - projs[1]));
+        rs1.add(length(projs[0] - projs[2]));
+        rs1.add(length(projs[0] - projs[3]));
+
+        rs2.add(length(projs[1] - projs[2]));
+        rs2.add(length(projs[2] - projs[3]));
+        rs2.add(length(projs[3] - projs[1]));
+
+        DLIB_TEST(rs1.stddev()/rs1.mean() < 1e-4);
+        DLIB_TEST(rs2.stddev()/rs2.mean() < 1e-4);
+
+
+
+        projs = s(make_test_data4(),2);
+        rs1.clear();
+        rs2.clear();
+
+        rs1.add(length(projs[0] - projs[1]));
+        rs1.add(length(projs[0] - projs[2]));
+        rs1.add(length(projs[0] - projs[3]));
+
+        rs2.add(length(projs[1] - projs[2]));
+        rs2.add(length(projs[2] - projs[3]));
+        rs2.add(length(projs[3] - projs[1]));
+
+        DLIB_TEST(rs1.stddev()/rs1.mean() < 1e-4);
+        DLIB_TEST(rs2.stddev()/rs2.mean() < 1e-4);
+
+        projs = s(make_test_data3d(),2);
+        rs1.clear();
+        rs2.clear();
+
+        rs1.add(length(projs[0] - projs[1]));
+        rs1.add(length(projs[0] - projs[2]));
+        rs1.add(length(projs[0] - projs[3]));
+
+        rs2.add(length(projs[1] - projs[2]));
+        rs2.add(length(projs[2] - projs[3]));
+        rs2.add(length(projs[3] - projs[1]));
+
+        DLIB_TEST(rs1.stddev()/rs1.mean() < 1e-4);
+        DLIB_TEST(rs2.stddev()/rs2.mean() < 1e-4);
+    }
+
+    void runtest2()
+    {
+        sammon_projection s;
+        std::vector<matrix<double, 0, 1> >  projs, temp;
+
+        DLIB_TEST(s(projs,3).size() == 0);
+
+        matrix<double,2,1> m;
+        m = 1,2;
+        projs.push_back(m);
+        temp = s(projs,2);
+        DLIB_TEST(temp.size() == 1);
+        DLIB_TEST(temp[0].size() == 2);
+
+        projs.push_back(m);
+        temp = s(projs,1);
+        DLIB_TEST(temp.size() == 2);
+        DLIB_TEST(temp[0].size() == 1);
+        DLIB_TEST(temp[1].size() == 1);
+    }
+
+    void runtest3(int num_dims)
+    {
+        sammon_projection s;
+        std::vector<matrix<double, 0, 1> >  projs;
+        matrix<double,3,1> m;
+        m = 1, 1, 1;
+        projs.push_back(m);
+
+        m = 1, 2, 1;
+        projs.push_back(m);
+
+        m = 1, 3, 1;
+        projs.push_back(m);
+
+        projs = s(projs,num_dims);
+
+        const double d1a = length(projs[0] - projs[1]);
+        const double d1b = length(projs[1] - projs[2]);
+        const double d2  = length(projs[0] - projs[2]);
+
+        DLIB_TEST(std::abs(d1a-d1b)/d1a < 1e-8);
+        DLIB_TEST(std::abs(d2/d1a-2) < 1e-8);
+    }
+
+    void runtest4(int num_dims)
+    {
+        sammon_projection s;
+        std::vector<matrix<double, 0, 1> >  projs;
+        matrix<double,3,1> m;
+        m = 1, 1, 1;
+        projs.push_back(m);
+
+        m = 1, 2, 1;
+        projs.push_back(m);
+
+
+        projs = s(projs,num_dims);
+
+        DLIB_TEST(length(projs[0] - projs[1]) > 1e-5); 
+    }
+
+    class sammon_tester : public tester
+    {
+    public:
+        sammon_tester (
+        ) :
+            tester ("test_sammon",
+                    "Runs tests on the sammon_projection component.")
+        {}
+
+        void perform_test (
+        )
+        {
+            print_spinner();
+            runtest();
+            print_spinner();
+            runtest2();
+            print_spinner();
+            runtest3(2);
+            print_spinner();
+            runtest4(2);
+            runtest3(1);
+            print_spinner();
+            runtest4(1);
+        }
+    } a;
+
+}
+
+
+