Commit 46c00173 authored by Davis King's avatar Davis King

Added Sammon's algorithm.

parent bf2edbec
......@@ -7,6 +7,7 @@
#include "statistics/dpca.h"
#include "statistics/random_subset_selector.h"
#include "statistics/image_feature_sampling.h"
#include "statistics/sammon.h"
#endif // DLIB_STATISTICs_H_
......
This diff is collapsed.
// Copyright (C) 2012 Emanuele Cesena (emanuele.cesena@gmail.com), Davis E. King
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_SAMMoN_ABSTRACT_H__
#ifdef DLIB_SAMMoN_ABSTRACT_H__
#include "../matrix/matrix_abstract.h"
#include <vector>
namespace dlib
{
class sammon_projection
{
/*!
WHAT THIS OBJECT REPRESENTS
This is a function object that computes the Sammon projection of a set
of N points in a L-dimensional vector space onto a d-dimensional space
(d < L), according to the paper:
A Nonlinear Mapping for Data Structure Analysis (1969) by J.W. Sammon
The current implementation is a vectorized version of the original algorithm.
!*/
public:
sammon_projection(
);
/*!
ensures
- this object is properly initialized
!*/
template <typename matrix_type>
std::vector<matrix<double,0,1> > operator() (
const std::vector<matrix_type>& data,
long num_dims
);
/*!
requires
- num_dims > 0
- matrix_type should be a kind of dlib::matrix of doubles capable
of representing column vectors.
- for all valid i:
- is_col_vector(data[i]) == true
- data[0].size() == data[i].size()
(i.e. all the vectors in data must have the same dimensionality)
- if (data.size() != 0) then
- 0 < num_dims <= data[0].size()
(i.e. you can't project into a higher dimension than the input data,
only to a lower dimension.)
ensures
- This routine computes Sammon's dimensionality reduction method based on the
given input data. It will attempt to project the contents of data into a
num_dims dimensional space that preserves relative distances between the
input data points.
- This function returns a std::vector, OUT, such that:
- OUT == a set of column vectors that represent the Sammon's projection of
the input data vectors.
- OUT.size() == data.size()
- for all valid i:
- OUT[i].size() == num_dims
- OUT[i] == the Sammon projection of the input vector data[i]
!*/
template <typename matrix_type>
void operator() (
const std::vector<matrix_type>& data,
long num_dims,
std::vector<matrix<double,0,1> >& result,
double &err,
unsigned long num_iters = 1000,
const double err_delta = 1.0e-9
);
/*!
requires
- num_iters > 0
- err_delta > 0
- num_dims > 0
- matrix_type should be a kind of dlib::matrix of doubles capable
of representing column vectors.
- for all valid i:
- is_col_vector(data[i]) == true
- data[0].size() == data[i].size()
(i.e. all the vectors in data must have the same dimensionality)
- if (data.size() != 0) then
- 0 < num_dims <= data[0].size()
(i.e. you can't project into a higher dimension than the input data,
only to a lower dimension.)
ensures
- This routine computes Sammon's dimensionality reduction method based on the
given input data. It will attempt to project the contents of data into a
num_dims dimensional space that preserves relative distances between the
input data points.
- #err == the final error value at the end of the algorithm. The goal of Sammon's
algorithm is to find a lower dimensional projection of the input data that
preserves the relative distances between points. The value in #err is a measure
of the total error at the end of the algorithm. So smaller values indicate
a better projection was found than if a large value is returned via #err.
- Sammon's algorithm will run until either num_iters iterations has executed
or the change in error from one iteration to the next is less than err_delta.
- Upon completion, the output of Sammon's projection is stored into #result, in
particular, we will have:
- #result == a set of column vectors that represent the Sammon's projection of
the input data vectors.
- #result.size() == data.size()
- for all valid i:
- #result[i].size() == num_dims
- #result[i] == the Sammon projection of the input vector data[i]
!*/
};
}
#endif // DLIB_SAMMoN_ABSTRACT_H__
......@@ -91,6 +91,7 @@ set (tests
read_write_mutex.cpp
reference_counter.cpp
rls.cpp
sammon.cpp
scan_image.cpp
sequence.cpp
sequence_labeler.cpp
......
......@@ -106,6 +106,7 @@ SRC += rand.cpp
SRC += read_write_mutex.cpp
SRC += reference_counter.cpp
SRC += rls.cpp
SRC += sammon.cpp
SRC += scan_image.cpp
SRC += sequence.cpp
SRC += sequence_labeler.cpp
......
// Copyright (C) 2012 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#include <sstream>
#include <string>
#include <cstdlib>
#include <ctime>
#include <cmath>
#include <dlib/statistics.h>
#include "tester.h"
namespace
{
using namespace test;
using namespace dlib;
using namespace std;
logger dlog("test.sammon");
std::vector<matrix<double,4,1> > make_test_data4(
)
{
std::vector<matrix<double,4,1> > data;
matrix<double,4,1> m;
m = 0,0,0, 0; data.push_back(m);
m = 1,0,0, 0; data.push_back(m);
m = 0,1,0, 0; data.push_back(m);
m = 0,0,1, 0; data.push_back(m);
return data;
}
std::vector<matrix<double,3,1> > make_test_data3(
)
{
std::vector<matrix<double,3,1> > data;
matrix<double,3,1> m;
m = 0,0,0; data.push_back(m);
m = 1,0,0; data.push_back(m);
m = 0,1,0; data.push_back(m);
m = 0,0,1; data.push_back(m);
return data;
}
std::vector<matrix<double> > make_test_data3d(
)
{
std::vector<matrix<double> > data;
matrix<double,3,1> m;
m = 0,0,0; data.push_back(m);
m = 1,0,0; data.push_back(m);
m = 0,1,0; data.push_back(m);
m = 0,0,1; data.push_back(m);
return data;
}
void runtest()
{
sammon_projection s;
std::vector<matrix<double, 0, 1> > projs = s(make_test_data3(),2);
running_stats<double> rs1, rs2;
rs1.add(length(projs[0] - projs[1]));
rs1.add(length(projs[0] - projs[2]));
rs1.add(length(projs[0] - projs[3]));
rs2.add(length(projs[1] - projs[2]));
rs2.add(length(projs[2] - projs[3]));
rs2.add(length(projs[3] - projs[1]));
DLIB_TEST(rs1.stddev()/rs1.mean() < 1e-4);
DLIB_TEST(rs2.stddev()/rs2.mean() < 1e-4);
projs = s(make_test_data4(),2);
rs1.clear();
rs2.clear();
rs1.add(length(projs[0] - projs[1]));
rs1.add(length(projs[0] - projs[2]));
rs1.add(length(projs[0] - projs[3]));
rs2.add(length(projs[1] - projs[2]));
rs2.add(length(projs[2] - projs[3]));
rs2.add(length(projs[3] - projs[1]));
DLIB_TEST(rs1.stddev()/rs1.mean() < 1e-4);
DLIB_TEST(rs2.stddev()/rs2.mean() < 1e-4);
projs = s(make_test_data3d(),2);
rs1.clear();
rs2.clear();
rs1.add(length(projs[0] - projs[1]));
rs1.add(length(projs[0] - projs[2]));
rs1.add(length(projs[0] - projs[3]));
rs2.add(length(projs[1] - projs[2]));
rs2.add(length(projs[2] - projs[3]));
rs2.add(length(projs[3] - projs[1]));
DLIB_TEST(rs1.stddev()/rs1.mean() < 1e-4);
DLIB_TEST(rs2.stddev()/rs2.mean() < 1e-4);
}
void runtest2()
{
sammon_projection s;
std::vector<matrix<double, 0, 1> > projs, temp;
DLIB_TEST(s(projs,3).size() == 0);
matrix<double,2,1> m;
m = 1,2;
projs.push_back(m);
temp = s(projs,2);
DLIB_TEST(temp.size() == 1);
DLIB_TEST(temp[0].size() == 2);
projs.push_back(m);
temp = s(projs,1);
DLIB_TEST(temp.size() == 2);
DLIB_TEST(temp[0].size() == 1);
DLIB_TEST(temp[1].size() == 1);
}
void runtest3(int num_dims)
{
sammon_projection s;
std::vector<matrix<double, 0, 1> > projs;
matrix<double,3,1> m;
m = 1, 1, 1;
projs.push_back(m);
m = 1, 2, 1;
projs.push_back(m);
m = 1, 3, 1;
projs.push_back(m);
projs = s(projs,num_dims);
const double d1a = length(projs[0] - projs[1]);
const double d1b = length(projs[1] - projs[2]);
const double d2 = length(projs[0] - projs[2]);
DLIB_TEST(std::abs(d1a-d1b)/d1a < 1e-8);
DLIB_TEST(std::abs(d2/d1a-2) < 1e-8);
}
void runtest4(int num_dims)
{
sammon_projection s;
std::vector<matrix<double, 0, 1> > projs;
matrix<double,3,1> m;
m = 1, 1, 1;
projs.push_back(m);
m = 1, 2, 1;
projs.push_back(m);
projs = s(projs,num_dims);
DLIB_TEST(length(projs[0] - projs[1]) > 1e-5);
}
class sammon_tester : public tester
{
public:
sammon_tester (
) :
tester ("test_sammon",
"Runs tests on the sammon_projection component.")
{}
void perform_test (
)
{
print_spinner();
runtest();
print_spinner();
runtest2();
print_spinner();
runtest3(2);
print_spinner();
runtest4(2);
runtest3(1);
print_spinner();
runtest4(1);
}
} a;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment