blob: d8814cdbbc1a8304e1745dc6ce64766d329f3428 [file] [log] [blame]
 // Copyright ©2019 The Gonum Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package gonum import ( "gonum.org/v1/gonum/blas" "gonum.org/v1/gonum/blas/blas64" ) // Dpbtrf computes the Cholesky factorization of an n×n symmetric positive // definite band matrix // A = Uᵀ * U if uplo == blas.Upper // A = L * Lᵀ if uplo == blas.Lower // where U is an upper triangular band matrix and L is lower triangular. kd is // the number of super- or sub-diagonals of A. // // The band storage scheme is illustrated below when n = 6 and kd = 2. Elements // marked * are not used by the function. // // uplo == blas.Upper // On entry: On return: // a00 a01 a02 u00 u01 u02 // a11 a12 a13 u11 u12 u13 // a22 a23 a24 u22 u23 u24 // a33 a34 a35 u33 u34 u35 // a44 a45 * u44 u45 * // a55 * * u55 * * // // uplo == blas.Lower // On entry: On return: // * * a00 * * l00 // * a10 a11 * l10 l11 // a20 a21 a22 l20 l21 l22 // a31 a32 a33 l31 l32 l33 // a42 a43 a44 l42 l43 l44 // a53 a54 a55 l53 l54 l55 func (impl Implementation) Dpbtrf(uplo blas.Uplo, n, kd int, ab []float64, ldab int) (ok bool) { const nbmax = 32 switch { case uplo != blas.Upper && uplo != blas.Lower: panic(badUplo) case n < 0: panic(nLT0) case kd < 0: panic(kdLT0) case ldab < kd+1: panic(badLdA) } // Quick return if possible. if n == 0 { return true } if len(ab) < (n-1)*ldab+kd+1 { panic(shortAB) } opts := string(blas.Upper) if uplo == blas.Lower { opts = string(blas.Lower) } nb := impl.Ilaenv(1, "DPBTRF", opts, n, kd, -1, -1) // The block size must not exceed the semi-bandwidth kd, and must not // exceed the limit set by the size of the local array work. nb = min(nb, nbmax) if nb <= 1 || kd < nb { // Use unblocked code. return impl.Dpbtf2(uplo, n, kd, ab, ldab) } // Use blocked code. ldwork := nb work := make([]float64, nb*ldwork) bi := blas64.Implementation() if uplo == blas.Upper { // Compute the Cholesky factorization of a symmetric band // matrix, given the upper triangle of the matrix in band // storage. // Process the band matrix one diagonal block at a time. for i := 0; i < n; i += nb { ib := min(nb, n-i) // Factorize the diagonal block. ok := impl.Dpotf2(uplo, ib, ab[i*ldab:], ldab-1) if !ok { return false } if i+ib >= n { continue } // Update the relevant part of the trailing submatrix. // If A11 denotes the diagonal block which has just been // factorized, then we need to update the remaining // blocks in the diagram: // // A11 A12 A13 // A22 A23 // A33 // // The numbers of rows and columns in the partitioning // are ib, i2, i3 respectively. The blocks A12, A22 and // A23 are empty if ib = kd. The upper triangle of A13 // lies outside the band. i2 := min(kd-ib, n-i-ib) if i2 > 0 { // Update A12. bi.Dtrsm(blas.Left, blas.Upper, blas.Trans, blas.NonUnit, ib, i2, 1, ab[i*ldab:], ldab-1, ab[i*ldab+ib:], ldab-1) // Update A22. bi.Dsyrk(blas.Upper, blas.Trans, i2, ib, -1, ab[i*ldab+ib:], ldab-1, 1, ab[(i+ib)*ldab:], ldab-1) } i3 := min(ib, n-i-kd) if i3 > 0 { // Copy the lower triangle of A13 into the work array. for ii := 0; ii < ib; ii++ { for jj := 0; jj <= min(ii, i3-1); jj++ { work[ii*ldwork+jj] = ab[(i+ii)*ldab+kd-ii+jj] } } // Update A13 (in the work array). bi.Dtrsm(blas.Left, blas.Upper, blas.Trans, blas.NonUnit, ib, i3, 1, ab[i*ldab:], ldab-1, work, ldwork) // Update A23. if i2 > 0 { bi.Dgemm(blas.Trans, blas.NoTrans, i2, i3, ib, -1, ab[i*ldab+ib:], ldab-1, work, ldwork, 1, ab[(i+ib)*ldab+kd-ib:], ldab-1) } // Update A33. bi.Dsyrk(blas.Upper, blas.Trans, i3, ib, -1, work, ldwork, 1, ab[(i+kd)*ldab:], ldab-1) // Copy the lower triangle of A13 back into place. for ii := 0; ii < ib; ii++ { for jj := 0; jj <= min(ii, i3-1); jj++ { ab[(i+ii)*ldab+kd-ii+jj] = work[ii*ldwork+jj] } } } } } else { // Compute the Cholesky factorization of a symmetric band // matrix, given the lower triangle of the matrix in band // storage. // Process the band matrix one diagonal block at a time. for i := 0; i < n; i += nb { ib := min(nb, n-i) // Factorize the diagonal block. ok := impl.Dpotf2(uplo, ib, ab[i*ldab+kd:], ldab-1) if !ok { return false } if i+ib >= n { continue } // Update the relevant part of the trailing submatrix. // If A11 denotes the diagonal block which has just been // factorized, then we need to update the remaining // blocks in the diagram: // // A11 // A21 A22 // A31 A32 A33 // // The numbers of rows and columns in the partitioning // are ib, i2, i3 respectively. The blocks A21, A22 and // A32 are empty if ib = kd. The lowr triangle of A31 // lies outside the band. i2 := min(kd-ib, n-i-ib) if i2 > 0 { // Update A21. bi.Dtrsm(blas.Right, blas.Lower, blas.Trans, blas.NonUnit, i2, ib, 1, ab[i*ldab+kd:], ldab-1, ab[(i+ib)*ldab+kd-ib:], ldab-1) // Update A22. bi.Dsyrk(blas.Lower, blas.NoTrans, i2, ib, -1, ab[(i+ib)*ldab+kd-ib:], ldab-1, 1, ab[(i+ib)*ldab+kd:], ldab-1) } i3 := min(ib, n-i-kd) if i3 > 0 { // Copy the upper triangle of A31 into the work array. for ii := 0; ii < i3; ii++ { for jj := ii; jj < ib; jj++ { work[ii*ldwork+jj] = ab[(ii+i+kd)*ldab+jj-ii] } } // Update A31 (in the work array). bi.Dtrsm(blas.Right, blas.Lower, blas.Trans, blas.NonUnit, i3, ib, 1, ab[i*ldab+kd:], ldab-1, work, ldwork) // Update A32. if i2 > 0 { bi.Dgemm(blas.NoTrans, blas.Trans, i3, i2, ib, -1, work, ldwork, ab[(i+ib)*ldab+kd-ib:], ldab-1, 1, ab[(i+kd)*ldab+ib:], ldab-1) } // Update A33. bi.Dsyrk(blas.Lower, blas.NoTrans, i3, ib, -1, work, ldwork, 1, ab[(i+kd)*ldab+kd:], ldab-1) // Copy the upper triangle of A31 back into place. for ii := 0; ii < i3; ii++ { for jj := ii; jj < ib; jj++ { ab[(ii+i+kd)*ldab+jj-ii] = work[ii*ldwork+jj] } } } } } return true }