| // Copyright ©2016 The gonum Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package gonum |
| |
| import ( |
| "gonum.org/v1/gonum/blas" |
| "gonum.org/v1/gonum/blas/blas64" |
| ) |
| |
| // Dsytrd reduces a symmetric n×n matrix A to symmetric tridiagonal form by an |
| // orthogonal similarity transformation |
| // Q^T * A * Q = T |
| // where Q is an orthonormal matrix and T is symmetric and tridiagonal. |
| // |
| // On entry, a contains the elements of the input matrix in the triangle specified |
| // by uplo. On exit, the diagonal and sub/super-diagonal are overwritten by the |
| // corresponding elements of the tridiagonal matrix T. The remaining elements in |
| // the triangle, along with the array tau, contain the data to construct Q as |
| // the product of elementary reflectors. |
| // |
| // If uplo == blas.Upper, Q is constructed with |
| // Q = H_{n-2} * ... * H_1 * H_0 |
| // where |
| // H_i = I - tau_i * v * v^T |
| // v is constructed as v[i+1:n] = 0, v[i] = 1, v[0:i-1] is stored in A[0:i-1, i+1]. |
| // The elements of A are |
| // [ d e v1 v2 v3] |
| // [ d e v2 v3] |
| // [ d e v3] |
| // [ d e] |
| // [ e] |
| // |
| // If uplo == blas.Lower, Q is constructed with |
| // Q = H_0 * H_1 * ... * H_{n-2} |
| // where |
| // H_i = I - tau_i * v * v^T |
| // v is constructed as v[0:i+1] = 0, v[i+1] = 1, v[i+2:n] is stored in A[i+2:n, i]. |
| // The elements of A are |
| // [ d ] |
| // [ e d ] |
| // [v0 e d ] |
| // [v0 v1 e d ] |
| // [v0 v1 v2 e d] |
| // |
| // d must have length n, and e and tau must have length n-1. Dsytrd will panic if |
| // these conditions are not met. |
| // |
| // work is temporary storage, and lwork specifies the usable memory length. At minimum, |
| // lwork >= 1, and Dsytrd will panic otherwise. The amount of blocking is |
| // limited by the usable length. |
| // If lwork == -1, instead of computing Dsytrd the optimal work length is stored |
| // into work[0]. |
| // |
| // Dsytrd is an internal routine. It is exported for testing purposes. |
| func (impl Implementation) Dsytrd(uplo blas.Uplo, n int, a []float64, lda int, d, e, tau, work []float64, lwork int) { |
| checkMatrix(n, n, a, lda) |
| if len(d) < n { |
| panic(badD) |
| } |
| if len(e) < n-1 { |
| panic(badE) |
| } |
| if len(tau) < n-1 { |
| panic(badTau) |
| } |
| if len(work) < lwork { |
| panic(shortWork) |
| } |
| if lwork != -1 && lwork < 1 { |
| panic(badWork) |
| } |
| |
| var upper bool |
| var opts string |
| switch uplo { |
| case blas.Upper: |
| upper = true |
| opts = "U" |
| case blas.Lower: |
| opts = "L" |
| default: |
| panic(badUplo) |
| } |
| |
| if n == 0 { |
| work[0] = 1 |
| return |
| } |
| |
| nb := impl.Ilaenv(1, "DSYTRD", opts, n, -1, -1, -1) |
| lworkopt := n * nb |
| if lwork == -1 { |
| work[0] = float64(lworkopt) |
| return |
| } |
| |
| nx := n |
| bi := blas64.Implementation() |
| var ldwork int |
| if 1 < nb && nb < n { |
| // Determine when to cross over from blocked to unblocked code. The last |
| // block is always handled by unblocked code. |
| opts := "L" |
| if upper { |
| opts = "U" |
| } |
| nx = max(nb, impl.Ilaenv(3, "DSYTRD", opts, n, -1, -1, -1)) |
| if nx < n { |
| // Determine if workspace is large enough for blocked code. |
| ldwork = nb |
| iws := n * ldwork |
| if lwork < iws { |
| // Not enough workspace to use optimal nb: determine the minimum |
| // value of nb and reduce nb or force use of unblocked code by |
| // setting nx = n. |
| nb = max(lwork/n, 1) |
| nbmin := impl.Ilaenv(2, "DSYTRD", opts, n, -1, -1, -1) |
| if nb < nbmin { |
| nx = n |
| } |
| } |
| } else { |
| nx = n |
| } |
| } else { |
| nb = 1 |
| } |
| ldwork = nb |
| |
| if upper { |
| // Reduce the upper triangle of A. Columns 0:kk are handled by the |
| // unblocked method. |
| var i int |
| kk := n - ((n-nx+nb-1)/nb)*nb |
| for i = n - nb; i >= kk; i -= nb { |
| // Reduce columns i:i+nb to tridiagonal form and form the matrix W |
| // which is needed to update the unreduced part of the matrix. |
| impl.Dlatrd(uplo, i+nb, nb, a, lda, e, tau, work, ldwork) |
| |
| // Update the unreduced submatrix A[0:i-1,0:i-1], using an update |
| // of the form A = A - V*W^T - W*V^T. |
| bi.Dsyr2k(uplo, blas.NoTrans, i, nb, -1, a[i:], lda, work, ldwork, 1, a, lda) |
| |
| // Copy superdiagonal elements back into A, and diagonal elements into D. |
| for j := i; j < i+nb; j++ { |
| a[(j-1)*lda+j] = e[j-1] |
| d[j] = a[j*lda+j] |
| } |
| } |
| // Use unblocked code to reduce the last or only block |
| // check that i == kk. |
| impl.Dsytd2(uplo, kk, a, lda, d, e, tau) |
| } else { |
| var i int |
| // Reduce the lower triangle of A. |
| for i = 0; i < n-nx; i += nb { |
| // Reduce columns 0:i+nb to tridiagonal form and form the matrix W |
| // which is needed to update the unreduced part of the matrix. |
| impl.Dlatrd(uplo, n-i, nb, a[i*lda+i:], lda, e[i:], tau[i:], work, ldwork) |
| |
| // Update the unreduced submatrix A[i+ib:n, i+ib:n], using an update |
| // of the form A = A + V*W^T - W*V^T. |
| bi.Dsyr2k(uplo, blas.NoTrans, n-i-nb, nb, -1, a[(i+nb)*lda+i:], lda, |
| work[nb*ldwork:], ldwork, 1, a[(i+nb)*lda+i+nb:], lda) |
| |
| // Copy subdiagonal elements back into A, and diagonal elements into D. |
| for j := i; j < i+nb; j++ { |
| a[(j+1)*lda+j] = e[j] |
| d[j] = a[j*lda+j] |
| } |
| } |
| // Use unblocked code to reduce the last or only block. |
| impl.Dsytd2(uplo, n-i, a[i*lda+i:], lda, d[i:], e[i:], tau[i:]) |
| } |
| work[0] = float64(lworkopt) |
| } |