blob: d8814cdbbc1a8304e1745dc6ce64766d329f3428 [file] [log] [blame]
// Copyright ©2019 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/blas/blas64"
)
// Dpbtrf computes the Cholesky factorization of an n×n symmetric positive
// definite band matrix
// A = Uᵀ * U if uplo == blas.Upper
// A = L * Lᵀ if uplo == blas.Lower
// where U is an upper triangular band matrix and L is lower triangular. kd is
// the number of super- or sub-diagonals of A.
//
// The band storage scheme is illustrated below when n = 6 and kd = 2. Elements
// marked * are not used by the function.
//
// uplo == blas.Upper
// On entry: On return:
// a00 a01 a02 u00 u01 u02
// a11 a12 a13 u11 u12 u13
// a22 a23 a24 u22 u23 u24
// a33 a34 a35 u33 u34 u35
// a44 a45 * u44 u45 *
// a55 * * u55 * *
//
// uplo == blas.Lower
// On entry: On return:
// * * a00 * * l00
// * a10 a11 * l10 l11
// a20 a21 a22 l20 l21 l22
// a31 a32 a33 l31 l32 l33
// a42 a43 a44 l42 l43 l44
// a53 a54 a55 l53 l54 l55
func (impl Implementation) Dpbtrf(uplo blas.Uplo, n, kd int, ab []float64, ldab int) (ok bool) {
const nbmax = 32
switch {
case uplo != blas.Upper && uplo != blas.Lower:
panic(badUplo)
case n < 0:
panic(nLT0)
case kd < 0:
panic(kdLT0)
case ldab < kd+1:
panic(badLdA)
}
// Quick return if possible.
if n == 0 {
return true
}
if len(ab) < (n-1)*ldab+kd+1 {
panic(shortAB)
}
opts := string(blas.Upper)
if uplo == blas.Lower {
opts = string(blas.Lower)
}
nb := impl.Ilaenv(1, "DPBTRF", opts, n, kd, -1, -1)
// The block size must not exceed the semi-bandwidth kd, and must not
// exceed the limit set by the size of the local array work.
nb = min(nb, nbmax)
if nb <= 1 || kd < nb {
// Use unblocked code.
return impl.Dpbtf2(uplo, n, kd, ab, ldab)
}
// Use blocked code.
ldwork := nb
work := make([]float64, nb*ldwork)
bi := blas64.Implementation()
if uplo == blas.Upper {
// Compute the Cholesky factorization of a symmetric band
// matrix, given the upper triangle of the matrix in band
// storage.
// Process the band matrix one diagonal block at a time.
for i := 0; i < n; i += nb {
ib := min(nb, n-i)
// Factorize the diagonal block.
ok := impl.Dpotf2(uplo, ib, ab[i*ldab:], ldab-1)
if !ok {
return false
}
if i+ib >= n {
continue
}
// Update the relevant part of the trailing submatrix.
// If A11 denotes the diagonal block which has just been
// factorized, then we need to update the remaining
// blocks in the diagram:
//
// A11 A12 A13
// A22 A23
// A33
//
// The numbers of rows and columns in the partitioning
// are ib, i2, i3 respectively. The blocks A12, A22 and
// A23 are empty if ib = kd. The upper triangle of A13
// lies outside the band.
i2 := min(kd-ib, n-i-ib)
if i2 > 0 {
// Update A12.
bi.Dtrsm(blas.Left, blas.Upper, blas.Trans, blas.NonUnit, ib, i2,
1, ab[i*ldab:], ldab-1, ab[i*ldab+ib:], ldab-1)
// Update A22.
bi.Dsyrk(blas.Upper, blas.Trans, i2, ib,
-1, ab[i*ldab+ib:], ldab-1, 1, ab[(i+ib)*ldab:], ldab-1)
}
i3 := min(ib, n-i-kd)
if i3 > 0 {
// Copy the lower triangle of A13 into the work array.
for ii := 0; ii < ib; ii++ {
for jj := 0; jj <= min(ii, i3-1); jj++ {
work[ii*ldwork+jj] = ab[(i+ii)*ldab+kd-ii+jj]
}
}
// Update A13 (in the work array).
bi.Dtrsm(blas.Left, blas.Upper, blas.Trans, blas.NonUnit, ib, i3,
1, ab[i*ldab:], ldab-1, work, ldwork)
// Update A23.
if i2 > 0 {
bi.Dgemm(blas.Trans, blas.NoTrans, i2, i3, ib,
-1, ab[i*ldab+ib:], ldab-1, work, ldwork,
1, ab[(i+ib)*ldab+kd-ib:], ldab-1)
}
// Update A33.
bi.Dsyrk(blas.Upper, blas.Trans, i3, ib,
-1, work, ldwork, 1, ab[(i+kd)*ldab:], ldab-1)
// Copy the lower triangle of A13 back into place.
for ii := 0; ii < ib; ii++ {
for jj := 0; jj <= min(ii, i3-1); jj++ {
ab[(i+ii)*ldab+kd-ii+jj] = work[ii*ldwork+jj]
}
}
}
}
} else {
// Compute the Cholesky factorization of a symmetric band
// matrix, given the lower triangle of the matrix in band
// storage.
// Process the band matrix one diagonal block at a time.
for i := 0; i < n; i += nb {
ib := min(nb, n-i)
// Factorize the diagonal block.
ok := impl.Dpotf2(uplo, ib, ab[i*ldab+kd:], ldab-1)
if !ok {
return false
}
if i+ib >= n {
continue
}
// Update the relevant part of the trailing submatrix.
// If A11 denotes the diagonal block which has just been
// factorized, then we need to update the remaining
// blocks in the diagram:
//
// A11
// A21 A22
// A31 A32 A33
//
// The numbers of rows and columns in the partitioning
// are ib, i2, i3 respectively. The blocks A21, A22 and
// A32 are empty if ib = kd. The lowr triangle of A31
// lies outside the band.
i2 := min(kd-ib, n-i-ib)
if i2 > 0 {
// Update A21.
bi.Dtrsm(blas.Right, blas.Lower, blas.Trans, blas.NonUnit, i2, ib,
1, ab[i*ldab+kd:], ldab-1, ab[(i+ib)*ldab+kd-ib:], ldab-1)
// Update A22.
bi.Dsyrk(blas.Lower, blas.NoTrans, i2, ib,
-1, ab[(i+ib)*ldab+kd-ib:], ldab-1, 1, ab[(i+ib)*ldab+kd:], ldab-1)
}
i3 := min(ib, n-i-kd)
if i3 > 0 {
// Copy the upper triangle of A31 into the work array.
for ii := 0; ii < i3; ii++ {
for jj := ii; jj < ib; jj++ {
work[ii*ldwork+jj] = ab[(ii+i+kd)*ldab+jj-ii]
}
}
// Update A31 (in the work array).
bi.Dtrsm(blas.Right, blas.Lower, blas.Trans, blas.NonUnit, i3, ib,
1, ab[i*ldab+kd:], ldab-1, work, ldwork)
// Update A32.
if i2 > 0 {
bi.Dgemm(blas.NoTrans, blas.Trans, i3, i2, ib,
-1, work, ldwork, ab[(i+ib)*ldab+kd-ib:], ldab-1,
1, ab[(i+kd)*ldab+ib:], ldab-1)
}
// Update A33.
bi.Dsyrk(blas.Lower, blas.NoTrans, i3, ib,
-1, work, ldwork, 1, ab[(i+kd)*ldab+kd:], ldab-1)
// Copy the upper triangle of A31 back into place.
for ii := 0; ii < i3; ii++ {
for jj := ii; jj < ib; jj++ {
ab[(ii+i+kd)*ldab+jj-ii] = work[ii*ldwork+jj]
}
}
}
}
}
return true
}