Use banded Cholesky factorization if possible (#857)

* Use banded Cholesky factorization if possible Computation cost from n.n.n -> n.n.b where b is band size * Tidy up whitespace * Remove typo * Escape from banded matrix detection correctly * Use LAPACK banded Cholesky factorisation where possible * Add banded chol tests * Add test for banded chol in column major layout * Use row major layout for banded chol - more efficient as we will pass to LAPACK

Use banded Cholesky factorization if possible (#857)
* Use banded Cholesky factorization if possible Computation cost from n.n.n -> n.n.b where b is band size * Tidy up whitespace * Remove typo * Escape from banded matrix detection correctly * Use LAPACK banded Cholesky factorisation where possible * Add banded chol tests * Add test for banded chol in column major layout * Use row major layout for banded chol - more efficient as we will pass to LAPACK
45ce9f7b · Sean Warren · Davis E. King · 9bc7070a · 45ce9f7b · 45ce9f7b
Commit 45ce9f7b authored Oct 24, 2017 by Sean Warren Committed by Davis E. King Oct 24, 2017
Hide whitespace changes
Inline Side-by-side

Showing with 295 additions and 2 deletions

pbtrf.h dlib/matrix/lapack/pbtrf.h +178 -0

matrix_la.h dlib/matrix/matrix_la.h +75 -2

matrix.cpp dlib/test/matrix.cpp +42 -0

No files found.
--- a/dlib/matrix/lapack/pbtrf.h
+++ b/dlib/matrix/lapack/pbtrf.h
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_LAPACk_BDC_Hh_
+#define DLIB_LAPACk_BDC_Hh_
+
+#include "fortran_id.h"
+#include "../matrix.h"
+namespace dlib
+{
+    namespace lapack
+    {
+        namespace binding
+        {
+            extern "C"
+            {
+                void DLIB_FORTRAN_ID(dpbtrf) (const char* uplo, const integer* n, const integer* kd,
+                                              double* ab, const integer* ldab, integer* info);
+
+                void DLIB_FORTRAN_ID(spbtrf) (const char* uplo, const integer* n, const integer* kd,
+                                              float* ab, const integer* ldab, integer* info);
+
+            }
+
+            inline integer pbtrf (const char uplo, const integer n, const integer kd,
+                                  double* ab, const integer ldab)
+            {
+                integer info = 0;
+                DLIB_FORTRAN_ID(dpbtrf)(&uplo, &n, &kd, ab, &ldab, &info);
+                return info;
+            }
+
+            inline integer pbtrf (const char uplo, const integer n, const integer kd,
+                                  float* ab, const integer ldab)
+            {
+                integer info = 0;
+                DLIB_FORTRAN_ID(spbtrf)(&uplo, &n, &kd, ab, &ldab, &info);
+                return info;
+            }
+        }
+
+    // ------------------------------------------------------------------------------------
+/*  DPBTRF(l)		LAPACK routine (version	1.1)		    DPBTRF(l)
+
+NAME
+  DPBTRF - compute the Cholesky	factorization of a real	symmetric positive
+  definite band	matrix A
+
+SYNOPSIS
+
+  SUBROUTINE DPBTRF( UPLO, N, KD, AB, LDAB, INFO )
+
+      CHARACTER	     UPLO
+
+      INTEGER	     INFO, KD, LDAB, N
+
+      DOUBLE	     PRECISION AB( LDAB, * )
+
+PURPOSE
+  DPBTRF computes the Cholesky factorization of	a real symmetric positive
+  definite band	matrix A.
+
+  The factorization has	the form
+     A = U**T *	U,  if UPLO = 'U', or
+     A = L  * L**T,  if	UPLO = 'L',
+  where	U is an	upper triangular matrix	and L is lower triangular.
+
+ARGUMENTS
+
+  UPLO	  (input) CHARACTER*1
+	  = 'U':  Upper	triangle of A is stored;
+	  = 'L':  Lower	triangle of A is stored.
+
+  N	  (input) INTEGER
+	  The order of the matrix A.  N	>= 0.
+
+  KD	  (input) INTEGER
+	  The number of	superdiagonals of the matrix A if UPLO = 'U', or the
+	  number of subdiagonals if UPLO = 'L'.	 KD >= 0.
+
+  AB	  (input/output) DOUBLE	PRECISION array, dimension (LDAB,N)
+	  On entry, the	upper or lower triangle	of the symmetric band matrix
+	  A, stored in the first KD+1 rows of the array.  The j-th column of
+	  A is stored in the j-th column of the	array AB as follows: if	UPLO
+	  = 'U', AB(kd+1+i-j,j)	= A(i,j) for max(1,j-kd)<=i<=j;	if UPLO	=
+	  'L', AB(1+i-j,j)    =	A(i,j) for j<=i<=min(n,j+kd).
+
+	  On exit, if INFO = 0,	the triangular factor U	or L from the Chole-
+	  sky factorization A =	U**T*U or A = L*L**T of	the band matrix	A, in
+	  the same storage format as A.
+
+  LDAB	  (input) INTEGER
+	  The leading dimension	of the array AB.  LDAB >= KD+1.
+
+  INFO	  (output) INTEGER
+	  = 0:	successful exit
+	  < 0:	if INFO	= -i, the i-th argument	had an illegal value
+	  > 0:	if INFO	= i, the leading minor of order	i is not positive
+	  definite, and	the factorization could	not be completed.
+
+FURTHER	DETAILS
+  The band storage scheme is illustrated by the	following example, when	N =
+  6, KD	= 2, and UPLO =	'U':
+
+  On entry:			  On exit:
+
+      *	   *   a13  a24	 a35  a46      *    *	u13  u24  u35  u46
+      *	  a12  a23  a34	 a45  a56      *   u12	u23  u34  u45  u56
+     a11  a22  a33  a44	 a55  a66     u11  u22	u33  u44  u55  u66
+
+  Similarly, if	UPLO = 'L' the format of A is as follows:
+
+  On entry:			  On exit:
+
+     a11  a22  a33  a44	 a55  a66     l11  l22	l33  l44  l55  l66
+     a21  a32  a43  a54	 a65   *      l21  l32	l43  l54  l65	*
+     a31  a42  a53  a64	  *    *      l31  l42	l53  l64   *	*
+
+  Array	elements marked	* are not used by the routine.
+
+  Contributed by
+  Peter	Mayes and Giuseppe Radicati, IBM ECSEC,	Rome, March 23,	1989 */
+
+    // ------------------------------------------------------------------------------------
+
+        template <
+            typename T, 
+            long NR1, long NC1,
+            typename MM
+            >
+        int pbtrf (
+            char uplo, matrix<T,NR1,NC1,MM,column_major_layout>& ab
+        )
+        {
+            const long ldab = ab.nr();
+            const long n = ab.nc();
+            const long kd = ldab - 1; // assume fully packed 
+
+            int info = binding::pbtrf(uplo, n, kd, &ab(0,0), ldab);
+
+            return info;
+        }
+
+    // ------------------------------------------------------------------------------------
+
+
+        template <
+            typename T, 
+            long NR1, long NC1,
+            typename MM
+            >
+        int pbtrf (
+            char uplo, matrix<T,NR1,NC1,MM,row_major_layout>& ab
+        )
+        {
+            const long ldab = ab.nr();
+            const long n = ab.nc();
+            const long kd = ldab - 1; // assume fully packed 
+
+            matrix<T,NC1,NR1,MM,row_major_layout> abt = trans(ab);
+
+            int info = binding::pbtrf(uplo, n, kd, &abt(0,0), ldab);
+
+            ab = trans(abt);
+
+            return info;
+        }
+
+    // ------------------------------------------------------------------------------------
+
+    }
+
+}
+
+// ----------------------------------------------------------------------------------------
+
+#endif // DLIB_LAPACk_BDC_Hh_
+
+
--- a/dlib/matrix/matrix_la.h
+++ b/dlib/matrix/matrix_la.h
@@ -17,10 +17,13 @@

 #ifdef DLIB_USE_LAPACK
 #include "lapack/potrf.h"
+#include "lapack/pbtrf.h"
 #include "lapack/gesdd.h"
 #include "lapack/gesvd.h"
 #endif

+#include <iostream>
+
 namespace dlib
 {

@@ -1117,7 +1120,78 @@ convergence:
            );
        typename matrix_exp<EXP>::matrix_type L(A.nr(),A.nc());

-#ifdef DLIB_USE_LAPACK
+        typedef typename EXP::type T;
+
+        bool banded = false;
+        long bandwidth = 0;
+
+        if (A.nr() > 4) // Only test for banded matrix if matrix is big enough
+        {
+           // Detect if matrix is banded and, if so, matrix bandwidth
+           banded = true;
+           for (long r = 0; r < A.nr(); ++r)
+              for (long c = (r + bandwidth + 1); c < A.nc(); ++c)
+                 if (A(r, c) != 0)
+                 {
+                    bandwidth = c - r;
+                    if (bandwidth > A.nr() / 2)
+                    {
+                       banded = false;
+                       goto escape_banded_detection;
+                    }
+                 }
+        }
+escape_banded_detection:
+
+        if (banded)
+        {
+           // Store in compact form - use column major for LAPACK
+           matrix<T,0,0,default_memory_manager,column_major_layout> B(bandwidth + 1, A.nc());
+           set_all_elements(B, 0);
+
+           for (long r = 0; r < A.nr(); ++r)
+              for (long c = r; c < std::min(r + bandwidth + 1, A.nc()); ++c)
+                 B(c - r, r) = A(r, c);
+
+#ifdef DLIB_USE_LAPACK 
+
+           lapack::pbtrf('L', B);
+           
+#else
+
+           // Peform compact Cholesky
+           for (long k = 0; k < A.nr(); ++k)
+           {
+              long last = std::min(k + bandwidth, A.nr() - 1) - k;
+              for (long j = 1; j <= last; ++j)
+              {
+                 long i = k + j;
+                 for (long c = 0; c <= (last - j); ++c)
+                    B(c, i) -= B(j, k) / B(0, k) * B(c + j, k);
+              }
+              T norm = std::sqrt(B(0, k));
+              for (long i = 0; i <= bandwidth; ++i)
+                 B(i, k) /= norm;
+           }
+           for (long c = A.nc() - bandwidth + 1; c < A.nc(); ++c)
+              B(bandwidth, c) = 0;
+
+#endif
+
+           // Unpack lower triangular area
+           set_all_elements(L, 0);
+           for (long c = 0; c < A.nc(); ++c)
+              for (long i = 0; i <= bandwidth; ++i)
+              {
+                 long ind = c + i;
+                 if (ind < A.nc())
+                    L(ind, c) = B(i, c);
+              }
+
+           return L;
+        }
+
+#ifdef DLIB_USE_LAPACK        
        // Only call LAPACK if the matrix is big enough.  Otherwise,
        // our own code is faster, especially for statically dimensioned 
        // matrices.
@@ -1129,7 +1203,6 @@ convergence:
            return lowerm(L);
        }
 #endif
-        typedef typename EXP::type T;
        set_all_elements(L,0);

        // do nothing if the matrix is empty

--- a/dlib/test/matrix.cpp
+++ b/dlib/test/matrix.cpp
+
 // Copyright (C) 2006  Davis E. King (davis@dlib.net)
 // License: Boost Software License   See LICENSE.txt for the full license.

@@ -23,8 +24,27 @@ namespace
    using namespace dlib;
    using namespace std;

+    dlib::rand rnd;
+
    logger dlog("test.matrix");

+    template <typename type>
+    const matrix<type> rand_sp_banded(long n, long bw)
+    {
+        matrix<type> m = 10 * identity_matrix<type>(n);
+        for (long row = 0; row < m.nr(); ++row)
+        {
+            for (long col = row; col < min(m.nc(), row + bw); ++col)
+            {
+                type r = rnd.get_random_double();
+                m(row,col) += r;
+                m(col,row) += r; 
+            }
+        }
+
+        return m;
+    }
+
    void matrix_test (
    )
    /*!
@@ -730,6 +750,28 @@ namespace

        }

+        {
+            // Test band chol
+            matrix<double> m = rand_sp_banded<double>(10, 3);
+
+            matrix<double> L = chol(m);
+            DLIB_TEST_MSG(equal(L*trans(L), m, 1e-10), L*trans(L)-m);   
+            DLIB_TEST_MSG(equal(inv(m), inv_upper_triangular(trans(L))*inv_lower_triangular((L))), "");
+            DLIB_TEST_MSG(equal(inv(m), trans(inv_lower_triangular(L))*inv_lower_triangular((L))), ""); 
+            DLIB_TEST_MSG(equal(inv(m), trans(inv_lower_triangular(L))*trans(inv_upper_triangular(trans(L)))), "");
+        }
+
+        {
+            // Test band chol in column major layout
+            matrix<double,10,10,default_memory_manager,column_major_layout> m(rand_sp_banded<double>(10, 3));
+
+            matrix<double> L = chol(m);
+            DLIB_TEST_MSG(equal(L*trans(L), m, 1e-10), L*trans(L)-m);   
+            DLIB_TEST_MSG(equal(inv(m), inv_upper_triangular(trans(L))*inv_lower_triangular((L))), "");
+            DLIB_TEST_MSG(equal(inv(m), trans(inv_lower_triangular(L))*inv_lower_triangular((L))), ""); 
+            DLIB_TEST_MSG(equal(inv(m), trans(inv_lower_triangular(L))*trans(inv_upper_triangular(trans(L)))), "");
+        }
+
        {
            matrix<int> m(3,4), m2;
            m = 1,2,3,4,