Added a unit test for the discriminant_pca object and also fixed a few minor bugs

and clarified a few things. Also added the ability to add discriminant_pca objects together. --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403332

Added a unit test for the discriminant_pca object and also fixed a few minor bugs
and clarified a few things. Also added the ability to add discriminant_pca objects together. --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403332
edd2cb4f · Davis King · 3f6e1a6d · edd2cb4f · edd2cb4f · edd2cb4f
Commit edd2cb4f authored Dec 19, 2009 by Davis King
Showing with 164 additions and 14 deletions

dpca.h dlib/statistics/dpca.h +133 -7

dpca_abstract.h dlib/statistics/dpca_abstract.h +30 -7

CMakeLists.txt dlib/test/CMakeLists.txt +1 -0

discriminant_pca.cpp dlib/test/discriminant_pca.cpp +0 -0

No files found.
--- a/dlib/statistics/dpca.h
+++ b/dlib/statistics/dpca.h
@@ -16,7 +16,7 @@ namespace dlib
 // ----------------------------------------------------------------------------------------

    template <
-        typename column_matrix
+        typename column_matrix_type
        >
    class discriminant_pca
    {
@@ -56,6 +56,7 @@ namespace dlib
            discriminant_pca_error(const std::string& message): error(message) {}
        };

+        typedef column_matrix_type column_matrix;
        typedef typename column_matrix::mem_manager_type mem_manager_type;
        typedef typename column_matrix::type scalar_type;
        typedef typename column_matrix::layout_type layout_type;
@@ -98,6 +99,14 @@ namespace dlib
            scalar_type weight
        )
        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(weight >= 0,
+                "\t void discriminant_pca::set_within_class_weight()"
+                << "\n\t You can't use negative weight values"
+                << "\n\t weight: " << weight 
+                << "\n\t this:   " << this
+                );
+
            within_weight = weight;
        }

@@ -111,6 +120,14 @@ namespace dlib
            scalar_type weight
        )
        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(weight >= 0,
+                "\t void discriminant_pca::set_between_class_weight()"
+                << "\n\t You can't use negative weight values"
+                << "\n\t weight: " << weight 
+                << "\n\t this:   " << this
+                );
+
            between_weight = weight;
        }

@@ -125,6 +142,20 @@ namespace dlib
            const column_matrix& y
        )
        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_col_vector(x) && is_col_vector(y) && 
+                         x.size() == y.size() &&
+                         (in_vector_size() == 0 || x.size() == in_vector_size()),
+                "\t void discriminant_pca::add_to_within_class_variance()"
+                << "\n\t Invalid inputs were given to this function"
+                << "\n\t is_col_vector(x): " << is_col_vector(x) 
+                << "\n\t is_col_vector(y): " << is_col_vector(y) 
+                << "\n\t x.size():         " << x.size() 
+                << "\n\t y.size():         " << y.size() 
+                << "\n\t in_vector_size(): " << in_vector_size() 
+                << "\n\t this:             " << this
+                );
+
            vect_size = x.size();
            if (within_count == 0)
            {
@@ -142,6 +173,20 @@ namespace dlib
            const column_matrix& y
        )
        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_col_vector(x) && is_col_vector(y) && 
+                         x.size() == y.size() &&
+                         (in_vector_size() == 0 || x.size() == in_vector_size()),
+                "\t void discriminant_pca::add_to_between_class_variance()"
+                << "\n\t Invalid inputs were given to this function"
+                << "\n\t is_col_vector(x): " << is_col_vector(x) 
+                << "\n\t is_col_vector(y): " << is_col_vector(y) 
+                << "\n\t x.size():         " << x.size() 
+                << "\n\t y.size():         " << y.size() 
+                << "\n\t in_vector_size(): " << in_vector_size() 
+                << "\n\t this:             " << this
+                );
+
            vect_size = x.size();
            if (between_count == 0)
            {
@@ -158,6 +203,16 @@ namespace dlib
            const column_matrix& x
        )
        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_col_vector(x) && (in_vector_size() == 0 || x.size() == in_vector_size()),
+                "\t void discriminant_pca::add_to_total_variance()"
+                << "\n\t Invalid inputs were given to this function"
+                << "\n\t is_col_vector(x): " << is_col_vector(x) 
+                << "\n\t in_vector_size(): " << in_vector_size() 
+                << "\n\t x.size():         " << x.size() 
+                << "\n\t this:             " << this
+                );
+
            vect_size = x.size();
            if (total_count == 0)
            {
@@ -188,6 +243,15 @@ namespace dlib
            const double eps = 0.99
        ) const
        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(0 < eps && eps <= 1 && in_vector_size() != 0,
+                "\t void discriminant_pca::dpca_matrix()"
+                << "\n\t Invalid inputs were given to this function"
+                << "\n\t eps:              " << eps 
+                << "\n\t in_vector_size(): " << in_vector_size() 
+                << "\n\t this:             " << this
+                );
+
            general_matrix cov;

            // now combine the three measures of variance into a single matrix by using the
@@ -252,7 +316,7 @@ namespace dlib
            swap(between_weight, item.between_weight);
            swap(within_cov, item.within_cov);
            swap(within_count, item.within_count);
-            swap(between_weight, item.between_weight);
+            swap(within_weight, item.within_weight);
        }

        friend void deserialize (
@@ -269,7 +333,7 @@ namespace dlib
            deserialize( item.between_weight, in);
            deserialize( item.within_cov, in);
            deserialize( item.within_count, in);
-            deserialize( item.between_weight, in);
+            deserialize( item.within_weight, in);
        }

        friend void serialize (
@@ -286,7 +350,69 @@ namespace dlib
            serialize( item.between_weight, out);
            serialize( item.within_cov, out);
            serialize( item.within_count, out);
-            serialize( item.between_weight, out);
+            serialize( item.within_weight, out);
+        }
+
+        const discriminant_pca operator+ (
+            const discriminant_pca& item
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT((in_vector_size() == 0 || item.in_vector_size() == 0 || in_vector_size() == item.in_vector_size()) &&
+                         between_class_weight() == item.between_class_weight() &&
+                         within_class_weight() == item.within_class_weight(),
+                "\t discriminant_pca discriminant_pca::operator+()"
+                << "\n\t The two discriminant_pca objects being added must have compatible parameters"
+                << "\n\t in_vector_size():            " << in_vector_size() 
+                << "\n\t item.in_vector_size():       " << item.in_vector_size() 
+                << "\n\t between_class_weight():      " << between_class_weight() 
+                << "\n\t item.between_class_weight(): " << item.between_class_weight() 
+                << "\n\t within_class_weight():       " << within_class_weight() 
+                << "\n\t item.within_class_weight():  " << item.within_class_weight() 
+                << "\n\t this:                        " << this
+                );
+
+            discriminant_pca temp(item);
+
+            // We need to make sure to ignore empty matrices.  That's what these if statements
+            // are for.
+
+            if (total_count != 0 && temp.total_count != 0)
+            {
+                temp.total_cov += total_cov;
+                temp.total_sum += total_sum;
+                temp.total_count += total_count;
+            }
+            else if (total_count != 0)
+            {
+                temp.total_cov = total_cov;
+                temp.total_sum = total_sum;
+                temp.total_count = total_count;
+            }
+
+            if (between_count != 0 && temp.between_count != 0)
+            {
+                temp.between_cov += between_cov;
+                temp.between_count += between_count;
+            }
+            else if (between_count != 0)
+            {
+                temp.between_cov = between_cov;
+                temp.between_count = between_count;
+            }
+
+            if (within_count != 0 && temp.within_count != 0)
+            {
+                temp.within_cov += within_cov;
+                temp.within_count += within_count;
+            }
+            else if (within_count != 0)
+            {
+                temp.within_cov = within_cov;
+                temp.within_count = within_count;
+            }
+
+            return temp;
        }

    private:
@@ -320,16 +446,16 @@ namespace dlib

        general_matrix total_cov;
        general_matrix total_sum;
-        long total_count;
+        scalar_type total_count;

        long vect_size;

        general_matrix between_cov;
-        long between_count;
+        scalar_type between_count;
        scalar_type between_weight;

        general_matrix within_cov;
-        long within_count;
+        scalar_type within_count;
        scalar_type within_weight;
    };


--- a/dlib/statistics/dpca_abstract.h
+++ b/dlib/statistics/dpca_abstract.h
@@ -13,12 +13,12 @@ namespace dlib
 // ----------------------------------------------------------------------------------------

    template <
-        typename column_matrix
+        typename column_matrix_type
        >
    class discriminant_pca
    {
        /*!
-            REQUIREMENTS ON column_matrix
+            REQUIREMENTS ON column_matrix_type
                Must be some type of dlib::matrix capable of representing a column vector.

            INITIAL VALUE
@@ -36,7 +36,7 @@ namespace dlib
                dimensionality reduction rule using a bunch of data that is partially labeled.  
                
                It functions by estimating three different scatter matrices.  The first is the total scatter 
-                matrix St (i.e.  the total data covariance matrix), the second is the between class scatter 
+                matrix St (i.e. the total data covariance matrix), the second is the between class scatter 
                matrix Sb (basically a measure of the variance between data of different classes) and the 
                third is the within class scatter matrix Sw (a measure of the variance of data within the 
                same classes).  
@@ -45,8 +45,8 @@ namespace dlib
                   S = St + a*Sb - b*Sw
                Where a and b are user supplied weights.  Then the largest eigenvalues of the S matrix are 
                computed and their associated eigenvectors are returned as the output of this algorithm.  
-                That is, the desired linear dimensionality reduction is given by the transformation matrix 
-                with these eigenvectors stored in its rows.
+                That is, the desired linear dimensionality reduction is given by the matrix with these 
+                eigenvectors stored in its rows.

                Note that if a and b are set to 0 (or no labeled data is provided) then the output transformation
                matrix is the same as the one produced by the classical PCA algorithm.
@@ -60,6 +60,7 @@ namespace dlib
            a DPCA matrix.
        !*/

+        typedef column_matrix_type column_matrix;
        typedef typename column_matrix::mem_manager_type mem_manager_type;
        typedef typename column_matrix::type scalar_type;
        typedef typename column_matrix::layout_type layout_type;
@@ -214,17 +215,20 @@ namespace dlib
                - in_vector_size() != 0
                  (i.e. you have to have given this object some data)
            ensures
-                - #is_col_vector(eigenvalues) == true
+                - is_col_vector(#eigenvalues) == true
                - #dpca_mat.nr() == eigenvalues.size() 
                - #dpca_mat.nc() == in_vector_size()
                - rowm(#dpca_mat,i) represents the ith eigenvector of the S matrix described
                  in the class description and its eigenvalue is given by eigenvalues(i).
                - all values in #eigenvalues are > 0.  Moreover, the eigenvalues are in
                  sorted order with the largest eigenvalue stored at eigenvalues(0).
+                - (#dpca_mat)*trans(#dpca_mat) == identity_matrix.  
+                  (i.e. the rows of the dpca_matrix are all unit length vectors and are mutually
+                  orthogonal)
                - Note that #dpca_mat is the desired linear transformation matrix.  That is, 
                  multiplying a vector by #dpca_mat performs the desired linear dimensionality 
                  reduction.
-                - sum(eigenvalues) will be equal to about eps times the total sum of all 
+                - sum(#eigenvalues) will be equal to about eps times the total sum of all 
                  positive eigenvalues in the S matrix described in this class's description.
                  This means that eps is a number that controls how "lossy" the dimensionality
                  reduction will be.  Large values of eps result in more output dimensions 
@@ -237,6 +241,23 @@ namespace dlib
                    that prevents this algorithm from working properly.
        !*/

+        const discriminant_pca operator+ (
+            const discriminant_pca& item
+        ) const;
+        /*!
+            requires
+                - in_vector_size() == 0 || item.in_vector_size() == 0 || in_vector_size() == item.in_vector_size()
+                  (i.e. the in_vector_size() of *this and item must match or one must be zero)
+                - between_class_weight() == item.between_class_weight()
+                - within_class_weight() == item.within_class_weight()
+            ensures
+                - returns a new discriminant_pca object that represents the combination of all 
+                  the measurements given to *this and item.  That is, this function returns a
+                  discriminant_pca object, R, that is equivalent to what you would obtain if all
+                  modifying calls (e.g. the add_to_*() functions) to *this and item had instead 
+                  been done to R.
+        !*/
+
        void swap (
            discriminant_pca& item
        );
@@ -247,6 +268,8 @@ namespace dlib

    };

+// ----------------------------------------------------------------------------------------
+
    template <
        typename column_matrix
        >

--- a/dlib/test/CMakeLists.txt
+++ b/dlib/test/CMakeLists.txt
@@ -31,6 +31,7 @@ set (tests
   conditioning_class.cpp
   config_reader.cpp
   directed_graph.cpp
+   discriminant_pca.cpp
   empirical_kernel_map.cpp
   entropy_coder.cpp
   entropy_encoder_model.cpp

--- a/dlib/test/discriminant_pca.cpp
+++ b/dlib/test/discriminant_pca.cpp