| // Copyright ©2017 The gonum Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package gonum |
| |
| import ( |
| "gonum.org/v1/gonum/blas" |
| "gonum.org/v1/gonum/blas/blas64" |
| ) |
| |
| // Dgeqp3 computes a QR factorization with column pivoting of the |
| // m×n matrix A: A*P = Q*R using Level 3 BLAS. |
| // |
| // The matrix Q is represented as a product of elementary reflectors |
| // Q = H_0 H_1 . . . H_{k-1}, where k = min(m,n). |
| // Each H_i has the form |
| // H_i = I - tau * v * v^T |
| // where tau and v are real vectors with v[0:i-1] = 0 and v[i] = 1; |
| // v[i:m] is stored on exit in A[i:m, i], and tau in tau[i]. |
| // |
| // jpvt specifies a column pivot to be applied to A. If |
| // jpvt[j] is at least zero, the jth column of A is permuted |
| // to the front of A*P (a leading column), if jpvt[j] is -1 |
| // the jth column of A is a free column. If jpvt[j] < -1, Dgeqp3 |
| // will panic. On return, jpvt holds the permutation that was |
| // applied; the jth column of A*P was the jpvt[j] column of A. |
| // jpvt must have length n or Dgeqp3 will panic. |
| // |
| // tau holds the scalar factors of the elementary reflectors. |
| // It must have length min(m, n), otherwise Dgeqp3 will panic. |
| // |
| // work must have length at least max(1,lwork), and lwork must be at least |
| // 3*n+1, otherwise Dgeqp3 will panic. For optimal performance lwork must |
| // be at least 2*n+(n+1)*nb, where nb is the optimal blocksize. On return, |
| // work[0] will contain the optimal value of lwork. |
| // |
| // If lwork == -1, instead of performing Dgeqp3, only the optimal value of lwork |
| // will be stored in work[0]. |
| // |
| // Dgeqp3 is an internal routine. It is exported for testing purposes. |
| func (impl Implementation) Dgeqp3(m, n int, a []float64, lda int, jpvt []int, tau, work []float64, lwork int) { |
| const ( |
| inb = 1 |
| inbmin = 2 |
| ixover = 3 |
| ) |
| checkMatrix(m, n, a, lda) |
| |
| if len(jpvt) != n { |
| panic(badIpiv) |
| } |
| for _, v := range jpvt { |
| if v < -1 || n <= v { |
| panic("lapack: jpvt element out of range") |
| } |
| } |
| minmn := min(m, n) |
| if len(work) < max(1, lwork) { |
| panic(badWork) |
| } |
| |
| var iws, lwkopt, nb int |
| if minmn == 0 { |
| iws = 1 |
| lwkopt = 1 |
| } else { |
| iws = 3*n + 1 |
| nb = impl.Ilaenv(inb, "DGEQRF", " ", m, n, -1, -1) |
| lwkopt = 2*n + (n+1)*nb |
| } |
| work[0] = float64(lwkopt) |
| |
| if lwork == -1 { |
| return |
| } |
| |
| if len(tau) < minmn { |
| panic(badTau) |
| } |
| |
| bi := blas64.Implementation() |
| |
| // Move initial columns up front. |
| var nfxd int |
| for j := 0; j < n; j++ { |
| if jpvt[j] == -1 { |
| jpvt[j] = j |
| continue |
| } |
| if j != nfxd { |
| bi.Dswap(m, a[j:], lda, a[nfxd:], lda) |
| jpvt[j], jpvt[nfxd] = jpvt[nfxd], j |
| } else { |
| jpvt[j] = j |
| } |
| nfxd++ |
| } |
| |
| // Factorize nfxd columns. |
| // |
| // Compute the QR factorization of nfxd columns and update remaining columns. |
| if nfxd > 0 { |
| na := min(m, nfxd) |
| impl.Dgeqrf(m, na, a, lda, tau, work, lwork) |
| iws = max(iws, int(work[0])) |
| if na < n { |
| impl.Dormqr(blas.Left, blas.Trans, m, n-na, na, a, lda, tau[:na], a[na:], lda, |
| work, lwork) |
| iws = max(iws, int(work[0])) |
| } |
| } |
| |
| if nfxd >= minmn { |
| work[0] = float64(iws) |
| return |
| } |
| |
| // Factorize free columns. |
| sm := m - nfxd |
| sn := n - nfxd |
| sminmn := minmn - nfxd |
| |
| // Determine the block size. |
| nb = impl.Ilaenv(inb, "DGEQRF", " ", sm, sn, -1, -1) |
| nbmin := 2 |
| nx := 0 |
| |
| if 1 < nb && nb < sminmn { |
| // Determine when to cross over from blocked to unblocked code. |
| nx = max(0, impl.Ilaenv(ixover, "DGEQRF", " ", sm, sn, -1, -1)) |
| |
| if nx < sminmn { |
| // Determine if workspace is large enough for blocked code. |
| minws := 2*sn + (sn+1)*nb |
| iws = max(iws, minws) |
| if lwork < minws { |
| // Not enough workspace to use optimal nb. Reduce |
| // nb and determine the minimum value of nb. |
| nb = (lwork - 2*sn) / (sn + 1) |
| nbmin = max(2, impl.Ilaenv(inbmin, "DGEQRF", " ", sm, sn, -1, -1)) |
| } |
| } |
| } |
| |
| // Initialize partial column norms. |
| // The first n elements of work store the exact column norms. |
| for j := nfxd; j < n; j++ { |
| work[j] = bi.Dnrm2(sm, a[nfxd*lda+j:], lda) |
| work[n+j] = work[j] |
| } |
| j := nfxd |
| if nbmin <= nb && nb < sminmn && nx < sminmn { |
| // Use blocked code initially. |
| |
| // Compute factorization. |
| var fjb int |
| for topbmn := minmn - nx; j < topbmn; j += fjb { |
| jb := min(nb, topbmn-j) |
| |
| // Factorize jb columns among columns j:n. |
| fjb = impl.Dlaqps(m, n-j, j, jb, a[j:], lda, jpvt[j:], tau[j:], |
| work[j:n], work[j+n:2*n], work[2*n:2*n+jb], work[2*n+jb:], jb) |
| } |
| } |
| |
| // Use unblocked code to factor the last or only block. |
| if j < minmn { |
| impl.Dlaqp2(m, n-j, j, a[j:], lda, jpvt[j:], tau[j:], |
| work[j:n], work[j+n:2*n], work[2*n:]) |
| } |
| |
| work[0] = float64(iws) |
| } |