Added cca()

fd9ed739 · Davis King · 3dfa76a3 · fd9ed739 · fd9ed739 · fd9ed739
Commit fd9ed739 authored Jan 14, 2013 by Davis King
Hide whitespace changes
Inline Side-by-side

Showing with 259 additions and 0 deletions

statistics.h dlib/statistics.h +1 -0

cca.h dlib/statistics/cca.h +122 -0

cca_abstract.h dlib/statistics/cca_abstract.h +136 -0

No files found.
--- a/dlib/statistics.h
+++ b/dlib/statistics.h
@@ -8,6 +8,7 @@
 #include "statistics/random_subset_selector.h"
 #include "statistics/image_feature_sampling.h"
 #include "statistics/sammon.h"
+#include "statistics/cca.h"

 #endif // DLIB_STATISTICs_H_ 


--- a/dlib/statistics/cca.h
+++ b/dlib/statistics/cca.h
+// Copyright (C) 2013  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_CCA_h__
+#define DLIB_CCA_h__
+
+#include "cca_abstract.h"
+#include "../algs.h"
+#include "../matrix.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    matrix<T,0,1> compute_correlations (
+        const matrix<T>& L,
+        const matrix<T>& R
+    )
+    {
+        matrix<T> A, B, C;
+        A = diag(trans(R)*L);
+        B = sqrt(diag(trans(L)*L));
+        C = sqrt(diag(trans(R)*R));
+        A = pointwise_multiply(A , reciprocal(pointwise_multiply(B,C)));
+        return A;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type, 
+        typename T
+        >
+    matrix<T,0,1> impl_cca (
+        const matrix_type& L,
+        const matrix_type& R,
+        matrix<T>& Ltrans,
+        matrix<T>& Rtrans,
+        unsigned long num_correlations,
+        unsigned long extra_rank,
+        unsigned long q,
+        unsigned long num_output_correlations
+    )
+    {
+        matrix<T> Ul, Vl;
+        matrix<T> Ur, Vr;
+        matrix<T> U, V;
+        matrix<T,0,1> Dr, Dl, D;
+
+
+        // Note that we add a few more singular vectors in because it helps improve the
+        // final results if we run this part with a little higher rank than the final SVD.
+        svd_fast(L, Ul, Dl, Vl, num_correlations+extra_rank, q);
+        svd_fast(R, Ur, Dr, Vr, num_correlations+extra_rank, q);
+
+        // This matrix is really small so we can do a normal full SVD on it.
+        svd3(trans(Ul)*Ur, U, D, V);
+        // now throw away extra columns of the transformations.  We do this in a way
+        // that keeps the directions that have the highest correlations.
+        matrix<T,0,1> temp = D;
+        rsort_columns(U, temp);
+        rsort_columns(V, D);
+        U = colm(U, range(0, num_output_correlations-1));
+        V = colm(V, range(0, num_output_correlations-1));
+        D = rowm(D, range(0, num_output_correlations-1));
+
+        Ltrans = Vl*inv(diagm(Dl))*U;
+        Rtrans = Vr*inv(diagm(Dr))*V;
+
+        // Note that the D matrix contains the correlation values for the transformed
+        // vectors.  However, when the L and R matrices have rank higher than
+        // num_correlations+extra_rank then the values in D become only approximate.
+        return D; 
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    matrix<T,0,1> cca (
+        const matrix<T>& L,
+        const matrix<T>& R,
+        matrix<T>& Ltrans,
+        matrix<T>& Rtrans,
+        unsigned long num_correlations,
+        unsigned long extra_rank = 5,
+        unsigned long q = 2
+    )
+    {
+        using std::min;
+        const unsigned long n = min(num_correlations, (unsigned long)min(R.nr(),min(L.nc(), R.nc())));
+        return impl_cca(L,R,Ltrans, Rtrans, num_correlations, extra_rank, q, n); 
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename sparse_vector_type, typename T>
+    matrix<T,0,1> cca (
+        const std::vector<sparse_vector_type>& L,
+        const std::vector<sparse_vector_type>& R,
+        matrix<T>& Ltrans,
+        matrix<T>& Rtrans,
+        unsigned long num_correlations,
+        unsigned long extra_rank = 5,
+        unsigned long q = 2
+    )
+    {
+        using std::min;
+        const unsigned long n = min(max_index_plus_one(L), max_index_plus_one(R));
+        const unsigned long num_output_correlations = min(num_correlations, min(R.size(),n));
+        return impl_cca(L,R,Ltrans, Rtrans, num_correlations, extra_rank, q, num_output_correlations); 
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_CCA_h__
+
+
--- a/dlib/statistics/cca_abstract.h
+++ b/dlib/statistics/cca_abstract.h
+// Copyright (C) 2013  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_CCA_AbSTRACT_H__
+#ifdef DLIB_CCA_AbSTRACT_H__
+
+#include "../matrix/matrix_la_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    matrix<T,0,1> compute_correlations (
+        const matrix<T>& L,
+        const matrix<T>& R
+    );
+    /*!
+        requires
+            - L.size() > 0 
+            - R.size() > 0 
+            - L.nr() == R.nr()
+        ensures
+            - This function treats L and R as sequences of paired row vectors.  It
+              then computes the correlation values between the elements of these 
+              row vectors.  In particular, we return a matrix COR such that:
+                - COR.size() == L.nc()
+                - for all valid i:
+                    - COR(i) == the correlation coefficient between the following 
+                      sequence of paired numbers: (L(k,i), R(k,i)) for k: 0 <= k < L.nr().
+                      Therefore, COR(i) is a value between -1 and 1 inclusive where 1
+                      indicates perfect correlation and -1 perfect anti-correlation.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    matrix<T,0,1> cca (
+        const matrix<T>& L,
+        const matrix<T>& R,
+        matrix<T>& Ltrans,
+        matrix<T>& Rtrans,
+        unsigned long num_correlations,
+        unsigned long extra_rank = 5,
+        unsigned long q = 2
+    );
+    /*!
+        requires
+            - num_correlations > 0
+            - L.size() > 0 
+            - R.size() > 0 
+            - L.nr() == R.nr()
+        ensures
+            - This function performs a canonical correlation analysis between the row
+              vectors in L and R.  That is, it finds two transformation matrices, Ltrans
+              and Rtrans, such that row vectors in the transformed matrices L*Ltrans and
+              R*Rtrans are as correlated as possible.  That is, we try to find two transforms
+              such that the correlation values returned by compute_correlations(L*Ltrans, R*Rtrans)
+              would be maximized.
+            - Let N == min(num_correlations, min(R.nr(),min(L.nc(),R.nc())))
+              (This is the actual number of elements in the transformed vectors.
+              Therefore, note that you can't get more outputs than there are rows or
+              columns in the input matrices.)
+            - #Ltrans.nr() == L.nc()
+            - #Ltrans.nc() == N 
+            - #Rtrans.nr() == R.nc()
+            - #Rtrans.nc() == N 
+            - No centering is applied to the L and R matrices.  Therefore, if you want a
+              CCA relative to the centered vectors then you must apply centering yourself
+              before calling cca().
+            - This function works with reduced rank approximations of the L and R matrices.
+              This makes it fast when working with large matrices.  In particular, we use
+              the svd_fast() routine to find reduced rank representations of the input
+              matrices by calling it as follows: svd_fast(L, U,D,V, num_correlations+extra_rank, q) 
+              and similarly for R.  This means that you can use the extra_rank and q
+              arguments to cca() to influence the accuracy of the reduced rank
+              approximation.  However, the default values should work fine for most
+              problems.
+            - returns an estimate of compute_correlations(L*#Ltrans, R*#Rtrans).  The
+              returned vector should exactly match the output of compute_correlations()
+              when the reduced rank approximation to L and R is accurate.  However, when L
+              and/or R are higher rank than num_correlations+extra_rank the return value of
+              this function will deviate from compute_correlations(L*#Ltrans, R*#Rtrans).
+              This deviation can be used to check if the reduced rank approximation is
+              working or you need to increase extra_rank.
+            - A good discussion of CCA can be found in the paper "Canonical Correlation
+              Analysis" by David Weenink.  In particular, this function is implemented
+              using equations 29 and 30 from his paper.  We also use the idea of doing CCA
+              on a reduced rank approximation of L and R as suggested by Paramveer S.
+              Dhillon in his paper "Two Step CCA: A new spectral method for estimating
+              vector models of words".
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename sparse_vector_type, 
+        typename T
+        >
+    matrix<T,0,1> cca (
+        const std::vector<sparse_vector_type>& L,
+        const std::vector<sparse_vector_type>& R,
+        matrix<T>& Ltrans,
+        matrix<T>& Rtrans,
+        unsigned long num_correlations,
+        unsigned long extra_rank = 5,
+        unsigned long q = 2
+    );
+    /*!
+        requires
+            - num_correlations > 0
+            - L.size() == R.size()
+            - max_index_plus_one(L) > 0 && max_index_plus_one(R) > 0
+              (i.e. L and R can't be a empty matrices)
+        ensures
+            - This is just an overload of the cca() function defined above.  Except in this
+              case we take a sparse representation of the input L and R matrices rather than
+              dense matrices.  Therefore, in this case, we interpret L and R as matrices
+              with L.size() rows, where each row is defined by a sparse vector.  So this 
+              function does exactly the same thing as the above cca().
+            - Note that you can apply the output transforms to a sparse vector with the
+              following code:
+                sparse_matrix_vector_multiply(trans(Ltrans), your_sparse_vector)
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_CCA_AbSTRACT_H__
+
+