Commit 4370a5f7 authored by Davis King's avatar Davis King

Split the vector_normalizer into a normal normalizer and a pca normalizer

version.  I did this mostly so that the normal version can return
matrix_type objects and thus avoid any reallocation of matrix objects
in client code.

--HG--
extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403154
parent 23243adf
......@@ -179,11 +179,115 @@ namespace dlib
m = mean(vector_to_matrix(samples));
sd = reciprocal(sqrt(variance(vector_to_matrix(samples))));
pca.set_size(0,0);
}
long in_vector_size (
) const
{
return m.nr();
}
long out_vector_size (
) const
{
return m.nr();
}
const matrix_type& means (
) const
{
return m;
}
const matrix_type& std_devs (
) const
{
return sd;
}
const matrix_type& operator() (
const matrix_type& x
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(x.nr() == in_vector_size() && x.nc() == 1,
"\tmatrix vector_normalizer::operator()"
<< "\n\t you have given invalid arguments to this function"
<< "\n\t x.nr(): " << x.nr()
<< "\n\t in_vector_size(): " << in_vector_size()
<< "\n\t x.nc(): " << x.nc()
<< "\n\t this: " << this
);
temp_out = pointwise_multiply(x-m, sd);
return temp_out;
}
void swap (
vector_normalizer& item
)
{
m.swap(item.m);
sd.swap(item.sd);
temp_out.swap(item.temp_out);
}
friend void deserialize (
vector_normalizer& item,
std::istream& in
)
{
deserialize(item.m, in);
deserialize(item.sd, in);
// Keep deserializing the pca matrix for backwards compatibility.
matrix<double> pca;
deserialize(pca, in);
}
friend void serialize (
const vector_normalizer& item,
std::ostream& out
)
{
serialize(item.m, out);
serialize(item.sd, out);
// Keep serializing the pca matrix for backwards compatibility.
matrix<double> pca;
serialize(pca, out);
}
private:
// ------------------- private data members -------------------
matrix_type m, sd;
// This is just a temporary variable that doesn't contribute to the
// state of this object.
mutable matrix_type temp_out;
};
template <
typename matrix_type
>
inline void swap (
vector_normalizer<matrix_type>& a,
vector_normalizer<matrix_type>& b
) { a.swap(b); }
// ----------------------------------------------------------------------------------------
template <
typename matrix_type
>
class vector_normalizer_pca
{
public:
typedef typename matrix_type::mem_manager_type mem_manager_type;
typedef typename matrix_type::type scalar_type;
template <typename vector_type>
void train_pca (
void train (
const vector_type& samples,
const double eps = 0.99
)
......@@ -195,12 +299,12 @@ namespace dlib
// make sure requires clause is not broken
DLIB_ASSERT(samples.size() > 0,
"\tvoid vector_normalizer::train_pca()"
"\tvoid vector_normalizer_pca::train_pca()"
<< "\n\tyou have to give a nonempty set of samples to this function"
<< "\n\tthis: " << this
);
DLIB_ASSERT(0 < eps && eps <= 1,
"\tvoid vector_normalizer::train_pca()"
"\tvoid vector_normalizer_pca::train_pca()"
<< "\n\tyou have to give a nonempty set of samples to this function"
<< "\n\tthis: " << this
);
......@@ -216,9 +320,6 @@ namespace dlib
long out_vector_size (
) const
{
if (pca.size() == 0)
return m.nr();
else
return pca.nr();
}
......@@ -246,7 +347,7 @@ namespace dlib
{
// make sure requires clause is not broken
DLIB_ASSERT(x.nr() == in_vector_size() && x.nc() == 1,
"\tmatrix vector_normalizer::operator()"
"\tmatrix vector_normalizer_pca::operator()"
<< "\n\t you have given invalid arguments to this function"
<< "\n\t x.nr(): " << x.nr()
<< "\n\t in_vector_size(): " << in_vector_size()
......@@ -254,21 +355,15 @@ namespace dlib
<< "\n\t this: " << this
);
if (pca.size() == 0)
{
temp_out = pointwise_multiply(x-m, sd);
}
else
{
// If we have a pca transform matrix on hand then
// also apply that.
temp_out = pca*pointwise_multiply(x-m, sd);
}
return temp_out;
}
void swap (
vector_normalizer& item
vector_normalizer_pca& item
)
{
m.swap(item.m);
......@@ -278,7 +373,7 @@ namespace dlib
}
friend void deserialize (
vector_normalizer& item,
vector_normalizer_pca& item,
std::istream& in
)
{
......@@ -288,7 +383,7 @@ namespace dlib
}
friend void serialize (
const vector_normalizer& item,
const vector_normalizer_pca& item,
std::ostream& out
)
{
......@@ -365,8 +460,8 @@ namespace dlib
typename matrix_type
>
inline void swap (
vector_normalizer<matrix_type>& a,
vector_normalizer<matrix_type>& b
vector_normalizer_pca<matrix_type>& a,
vector_normalizer_pca<matrix_type>& b
) { a.swap(b); }
// ----------------------------------------------------------------------------------------
......
......@@ -164,7 +164,6 @@ namespace dlib
- out_vector_size() == 0
- means().size() == 0
- std_devs().size() == 0
- pca_matrix().size() == 0
WHAT THIS OBJECT REPRESENTS
This object represents something that can learn to normalize a set
......@@ -174,6 +173,13 @@ namespace dlib
Also, if desired, this object can use principal component
analysis for the purposes of reducing the number of elements in a
vector.
THREAD SAFETY
Note that this object contains a cached matrix object it uses
to store intermediate results for normalization. This avoids
needing to reallocate it every time this object performs normalization
but also makes it non-thread safe. So make sure you don't share
this object between threads.
!*/
public:
......@@ -197,11 +203,146 @@ namespace dlib
vectors in the given set of samples.
- #means() == mean(samples)
- #std_devs() == reciprocal(sqrt(variance(samples)));
- #pca_matrix().size() == 0
!*/
long in_vector_size (
) const;
/*!
ensures
- returns the number of rows that input vectors are
required to contain if they are to be normalized by
this object.
!*/
long out_vector_size (
) const;
/*!
ensures
- returns the number of rows in the normalized vectors
that come out of this object.
!*/
const matrix_type& means (
) const;
/*!
ensures
- returns a matrix M such that:
- M.nc() == 1
- M.nr() == in_vector_size()
- M(i) == the mean of the ith input feature shown to train()
!*/
const matrix_type& std_devs (
) const;
/*!
ensures
- returns a matrix SD such that:
- SD.nc() == 1
- SD.nr() == in_vector_size()
- SD(i) == the reciprocal of the standard deviation of the ith
input feature shown to train()
!*/
const matrix_type& operator() (
const matrix_type& x
) const;
/*!
requires
- x.nr() == in_vector_size()
- x.nc() == 1
ensures
- returns a normalized version of x, call it Z, that has the
following properties:
- Z.nr() == out_vector_size()
- Z.nc() == 1
- the expected value of each element of Z is 0
- the expected variance of each element of Z is 1
- Z == pointwise_multiply(x-means(), std_devs());
!*/
void swap (
vector_normalizer& item
);
/*!
ensures
- swaps *this and item
!*/
};
template <
typename matrix_type
>
inline void swap (
vector_normalizer<matrix_type>& a,
vector_normalizer<matrix_type>& b
) { a.swap(b); }
/*!
provides a global swap function
!*/
template <
typename matrix_type,
>
void deserialize (
vector_normalizer<matrix_type>& item,
std::istream& in
);
/*!
provides deserialization support
!*/
template <
typename matrix_type,
>
void serialize (
const vector_normalizer<matrix_type>& item,
std::ostream& out
);
/*!
provides serialization support
!*/
// ----------------------------------------------------------------------------------------
template <
typename matrix_type
>
class vector_normalizer_pca
{
/*!
REQUIREMENTS ON matrix_type
- must be a dlib::matrix object capable of representing column
vectors
INITIAL VALUE
- in_vector_size() == 0
- out_vector_size() == 0
- means().size() == 0
- std_devs().size() == 0
- pca_matrix().size() == 0
WHAT THIS OBJECT REPRESENTS
This object represents something that can learn to normalize a set
of column vectors. In particular, normalized column vectors should
have zero mean and a variance of one.
Also, this object uses principal component analysis for the purposes
of reducing the number of elements in a vector.
THREAD SAFETY
Note that this object contains a cached matrix object it uses
to store intermediate results for normalization. This avoids
needing to reallocate it every time this object performs normalization
but also makes it non-thread safe. So make sure you don't share
this object between threads.
!*/
public:
typedef typename matrix_type::mem_manager_type mem_manager_type;
typedef typename matrix_type::type scalar_type;
template <typename vector_type>
void train_pca (
void train (
const vector_type& samples,
const double eps = 0.99
);
......@@ -253,7 +394,6 @@ namespace dlib
- M.nc() == 1
- M.nr() == in_vector_size()
- M(i) == the mean of the ith input feature shown to train()
or train_pca()
!*/
const matrix<scalar_type,0,1,mem_manager_type>& std_devs (
......@@ -264,21 +404,18 @@ namespace dlib
- SD.nc() == 1
- SD.nr() == in_vector_size()
- SD(i) == the reciprocal of the standard deviation of the ith
input feature shown to train() or train_pca()
input feature shown to train()
!*/
const matrix<scalar_type,0,0,mem_manager_type>& pca_matrix (
) const;
/*!
ensures
- if (PCA is used in normalization) then
- returns a matrix PCA such that:
- PCA.nr() == out_vector_size()
- PCA.nc() == in_vector_size()
- PCA == the principal component analysis transformation
matrix
- else
- returns an empty matrix object (i.e. it has size() == 0)
!*/
const matrix<scalar_type,0,1,mem_manager_type>& operator() (
......@@ -295,14 +432,11 @@ namespace dlib
- Z.nc() == 1
- the expected value of each element of Z is 0
- the expected variance of each element of Z is 1
- if (pca_matrix().size() > 0) then
- Z == pca_matrix()*pointwise_multiply(x-means(), std_devs());
- else
- Z == pointwise_multiply(x-means(), std_devs());
!*/
void swap (
vector_normalizer& item
vector_normalizer_pca& item
);
/*!
ensures
......@@ -314,8 +448,8 @@ namespace dlib
typename matrix_type
>
inline void swap (
vector_normalizer<matrix_type>& a,
vector_normalizer<matrix_type>& b
vector_normalizer_pca<matrix_type>& a,
vector_normalizer_pca<matrix_type>& b
) { a.swap(b); }
/*!
provides a global swap function
......@@ -325,7 +459,7 @@ namespace dlib
typename matrix_type,
>
void deserialize (
vector_normalizer<matrix_type>& item,
vector_normalizer_pca<matrix_type>& item,
std::istream& in
);
/*!
......@@ -336,7 +470,7 @@ namespace dlib
typename matrix_type,
>
void serialize (
const vector_normalizer<matrix_type>& item,
const vector_normalizer_pca<matrix_type>& item,
std::ostream& out
);
/*!
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment