blob: a86a8a569c0bb3ec3e50b4011c43ff9e372b3be1 [file] [log] [blame]
 // Copyright ©2015 The Gonum Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package gonum import ( "gonum.org/v1/gonum/blas" "gonum.org/v1/gonum/lapack" ) // Dormlq multiplies the matrix C by the orthogonal matrix Q defined by the // slices a and tau. A and tau are as returned from Dgelqf. // C = Q * C if side == blas.Left and trans == blas.NoTrans // C = Qᵀ * C if side == blas.Left and trans == blas.Trans // C = C * Q if side == blas.Right and trans == blas.NoTrans // C = C * Qᵀ if side == blas.Right and trans == blas.Trans // If side == blas.Left, A is a matrix of side k×m, and if side == blas.Right // A is of size k×n. This uses a blocked algorithm. // // work is temporary storage, and lwork specifies the usable memory length. // At minimum, lwork >= m if side == blas.Left and lwork >= n if side == blas.Right, // and this function will panic otherwise. // Dormlq uses a block algorithm, but the block size is limited // by the temporary space available. If lwork == -1, instead of performing Dormlq, // the optimal work length will be stored into work[0]. // // tau contains the Householder scales and must have length at least k, and // this function will panic otherwise. func (impl Implementation) Dormlq(side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64, lwork int) { left := side == blas.Left nw := m if left { nw = n } switch { case !left && side != blas.Right: panic(badSide) case trans != blas.Trans && trans != blas.NoTrans: panic(badTrans) case m < 0: panic(mLT0) case n < 0: panic(nLT0) case k < 0: panic(kLT0) case left && k > m: panic(kGTM) case !left && k > n: panic(kGTN) case left && lda < max(1, m): panic(badLdA) case !left && lda < max(1, n): panic(badLdA) case lwork < max(1, nw) && lwork != -1: panic(badLWork) case len(work) < max(1, lwork): panic(shortWork) } // Quick return if possible. if m == 0 || n == 0 || k == 0 { work[0] = 1 return } const ( nbmax = 64 ldt = nbmax tsize = nbmax * ldt ) opts := string(side) + string(trans) nb := min(nbmax, impl.Ilaenv(1, "DORMLQ", opts, m, n, k, -1)) lworkopt := max(1, nw)*nb + tsize if lwork == -1 { work[0] = float64(lworkopt) return } switch { case left && len(a) < (k-1)*lda+m: panic(shortA) case !left && len(a) < (k-1)*lda+n: panic(shortA) case len(tau) < k: panic(shortTau) case len(c) < (m-1)*ldc+n: panic(shortC) } nbmin := 2 if 1 < nb && nb < k { iws := nw*nb + tsize if lwork < iws { nb = (lwork - tsize) / nw nbmin = max(2, impl.Ilaenv(2, "DORMLQ", opts, m, n, k, -1)) } } if nb < nbmin || k <= nb { // Call unblocked code. impl.Dorml2(side, trans, m, n, k, a, lda, tau, c, ldc, work) work[0] = float64(lworkopt) return } t := work[:tsize] wrk := work[tsize:] ldwrk := nb notrans := trans == blas.NoTrans transt := blas.NoTrans if notrans { transt = blas.Trans } switch { case left && notrans: for i := 0; i < k; i += nb { ib := min(nb, k-i) impl.Dlarft(lapack.Forward, lapack.RowWise, m-i, ib, a[i*lda+i:], lda, tau[i:], t, ldt) impl.Dlarfb(side, transt, lapack.Forward, lapack.RowWise, m-i, n, ib, a[i*lda+i:], lda, t, ldt, c[i*ldc:], ldc, wrk, ldwrk) } case left && !notrans: for i := ((k - 1) / nb) * nb; i >= 0; i -= nb { ib := min(nb, k-i) impl.Dlarft(lapack.Forward, lapack.RowWise, m-i, ib, a[i*lda+i:], lda, tau[i:], t, ldt) impl.Dlarfb(side, transt, lapack.Forward, lapack.RowWise, m-i, n, ib, a[i*lda+i:], lda, t, ldt, c[i*ldc:], ldc, wrk, ldwrk) } case !left && notrans: for i := ((k - 1) / nb) * nb; i >= 0; i -= nb { ib := min(nb, k-i) impl.Dlarft(lapack.Forward, lapack.RowWise, n-i, ib, a[i*lda+i:], lda, tau[i:], t, ldt) impl.Dlarfb(side, transt, lapack.Forward, lapack.RowWise, m, n-i, ib, a[i*lda+i:], lda, t, ldt, c[i:], ldc, wrk, ldwrk) } case !left && !notrans: for i := 0; i < k; i += nb { ib := min(nb, k-i) impl.Dlarft(lapack.Forward, lapack.RowWise, n-i, ib, a[i*lda+i:], lda, tau[i:], t, ldt) impl.Dlarfb(side, transt, lapack.Forward, lapack.RowWise, m, n-i, ib, a[i*lda+i:], lda, t, ldt, c[i:], ldc, wrk, ldwrk) } } work[0] = float64(lworkopt) }