| // Copyright ©2016 The gonum Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package gonum |
| |
| import ( |
| "math" |
| |
| "gonum.org/v1/gonum/blas" |
| ) |
| |
| // Dlaqr04 computes the eigenvalues of a block of an n×n upper Hessenberg matrix |
| // H, and optionally the matrices T and Z from the Schur decomposition |
| // H = Z T Z^T |
| // where T is an upper quasi-triangular matrix (the Schur form), and Z is the |
| // orthogonal matrix of Schur vectors. |
| // |
| // wantt indicates whether the full Schur form T is required. If wantt is false, |
| // then only enough of H will be updated to preserve the eigenvalues. |
| // |
| // wantz indicates whether the n×n matrix of Schur vectors Z is required. If it |
| // is true, the orthogonal similarity transformation will be accumulated into |
| // Z[iloz:ihiz+1,ilo:ihi+1], otherwise Z will not be referenced. |
| // |
| // ilo and ihi determine the block of H on which Dlaqr04 operates. It must hold that |
| // 0 <= ilo <= ihi < n, if n > 0, |
| // ilo == 0 and ihi == -1, if n == 0, |
| // and the block must be isolated, that is, |
| // ilo == 0 or H[ilo,ilo-1] == 0, |
| // ihi == n-1 or H[ihi+1,ihi] == 0, |
| // otherwise Dlaqr04 will panic. |
| // |
| // wr and wi must have length ihi+1. |
| // |
| // iloz and ihiz specify the rows of Z to which transformations will be applied |
| // if wantz is true. It must hold that |
| // 0 <= iloz <= ilo, and ihi <= ihiz < n, |
| // otherwise Dlaqr04 will panic. |
| // |
| // work must have length at least lwork and lwork must be |
| // lwork >= 1, if n <= 11, |
| // lwork >= n, if n > 11, |
| // otherwise Dlaqr04 will panic. lwork as large as 6*n may be required for |
| // optimal performance. On return, work[0] will contain the optimal value of |
| // lwork. |
| // |
| // If lwork is -1, instead of performing Dlaqr04, the function only estimates the |
| // optimal workspace size and stores it into work[0]. Neither h nor z are |
| // accessed. |
| // |
| // recur is the non-negative recursion depth. For recur > 0, Dlaqr04 behaves |
| // as DLAQR0, for recur == 0 it behaves as DLAQR4. |
| // |
| // unconverged indicates whether Dlaqr04 computed all the eigenvalues of H[ilo:ihi+1,ilo:ihi+1]. |
| // |
| // If unconverged is zero and wantt is true, H will contain on return the upper |
| // quasi-triangular matrix T from the Schur decomposition. 2×2 diagonal blocks |
| // (corresponding to complex conjugate pairs of eigenvalues) will be returned in |
| // standard form, with H[i,i] == H[i+1,i+1] and H[i+1,i]*H[i,i+1] < 0. |
| // |
| // If unconverged is zero and if wantt is false, the contents of h on return is |
| // unspecified. |
| // |
| // If unconverged is zero, all the eigenvalues have been computed and their real |
| // and imaginary parts will be stored on return in wr[ilo:ihi+1] and |
| // wi[ilo:ihi+1], respectively. If two eigenvalues are computed as a complex |
| // conjugate pair, they are stored in consecutive elements of wr and wi, say the |
| // i-th and (i+1)th, with wi[i] > 0 and wi[i+1] < 0. If wantt is true, then the |
| // eigenvalues are stored in the same order as on the diagonal of the Schur form |
| // returned in H, with wr[i] = H[i,i] and, if H[i:i+2,i:i+2] is a 2×2 diagonal |
| // block, wi[i] = sqrt(-H[i+1,i]*H[i,i+1]) and wi[i+1] = -wi[i]. |
| // |
| // If unconverged is positive, some eigenvalues have not converged, and |
| // wr[unconverged:ihi+1] and wi[unconverged:ihi+1] will contain those |
| // eigenvalues which have been successfully computed. Failures are rare. |
| // |
| // If unconverged is positive and wantt is true, then on return |
| // (initial H)*U = U*(final H), (*) |
| // where U is an orthogonal matrix. The final H is upper Hessenberg and |
| // H[unconverged:ihi+1,unconverged:ihi+1] is upper quasi-triangular. |
| // |
| // If unconverged is positive and wantt is false, on return the remaining |
| // unconverged eigenvalues are the eigenvalues of the upper Hessenberg matrix |
| // H[ilo:unconverged,ilo:unconverged]. |
| // |
| // If unconverged is positive and wantz is true, then on return |
| // (final Z) = (initial Z)*U, |
| // where U is the orthogonal matrix in (*) regardless of the value of wantt. |
| // |
| // References: |
| // [1] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part I: |
| // Maintaining Well-Focused Shifts and Level 3 Performance. SIAM J. Matrix |
| // Anal. Appl. 23(4) (2002), pp. 929—947 |
| // URL: http://dx.doi.org/10.1137/S0895479801384573 |
| // [2] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part II: |
| // Aggressive Early Deflation. SIAM J. Matrix Anal. Appl. 23(4) (2002), pp. 948—973 |
| // URL: http://dx.doi.org/10.1137/S0895479801384585 |
| // |
| // Dlaqr04 is an internal routine. It is exported for testing purposes. |
| func (impl Implementation) Dlaqr04(wantt, wantz bool, n, ilo, ihi int, h []float64, ldh int, wr, wi []float64, iloz, ihiz int, z []float64, ldz int, work []float64, lwork int, recur int) (unconverged int) { |
| const ( |
| // Matrices of order ntiny or smaller must be processed by |
| // Dlahqr because of insufficient subdiagonal scratch space. |
| // This is a hard limit. |
| ntiny = 11 |
| // Exceptional deflation windows: try to cure rare slow |
| // convergence by varying the size of the deflation window after |
| // kexnw iterations. |
| kexnw = 5 |
| // Exceptional shifts: try to cure rare slow convergence with |
| // ad-hoc exceptional shifts every kexsh iterations. |
| kexsh = 6 |
| |
| // See https://github.com/gonum/lapack/pull/151#discussion_r68162802 |
| // and the surrounding discussion for an explanation where these |
| // constants come from. |
| // TODO(vladimir-ch): Similar constants for exceptional shifts |
| // are used also in dlahqr.go. The first constant is different |
| // there, it is equal to 3. Why? And does it matter? |
| wilk1 = 0.75 |
| wilk2 = -0.4375 |
| ) |
| |
| switch { |
| case ilo < 0 || max(0, n-1) < ilo: |
| panic(badIlo) |
| case ihi < min(ilo, n-1) || n <= ihi: |
| panic(badIhi) |
| case lwork < 1 && n <= ntiny && lwork != -1: |
| panic(badWork) |
| // TODO(vladimir-ch): Enable if and when we figure out what the minimum |
| // necessary lwork value is. Dlaqr04 says that the minimum is n which |
| // clashes with Dlaqr23's opinion about optimal work when nw <= 2 |
| // (independent of n). |
| // case lwork < n && n > ntiny && lwork != -1: |
| // panic(badWork) |
| case len(work) < lwork: |
| panic(shortWork) |
| case recur < 0: |
| panic("lapack: recur is negative") |
| } |
| if wantz { |
| if iloz < 0 || ilo < iloz { |
| panic("lapack: invalid value of iloz") |
| } |
| if ihiz < ihi || n <= ihiz { |
| panic("lapack: invalid value of ihiz") |
| } |
| } |
| if lwork != -1 { |
| checkMatrix(n, n, h, ldh) |
| if wantz { |
| checkMatrix(n, n, z, ldz) |
| } |
| switch { |
| case ilo > 0 && h[ilo*ldh+ilo-1] != 0: |
| panic("lapack: block not isolated") |
| case ihi+1 < n && h[(ihi+1)*ldh+ihi] != 0: |
| panic("lapack: block not isolated") |
| case len(wr) != ihi+1: |
| panic("lapack: bad length of wr") |
| case len(wi) != ihi+1: |
| panic("lapack: bad length of wi") |
| } |
| } |
| |
| // Quick return. |
| if n == 0 { |
| work[0] = 1 |
| return 0 |
| } |
| |
| if n <= ntiny { |
| // Tiny matrices must use Dlahqr. |
| work[0] = 1 |
| if lwork == -1 { |
| return 0 |
| } |
| return impl.Dlahqr(wantt, wantz, n, ilo, ihi, h, ldh, wr, wi, iloz, ihiz, z, ldz) |
| } |
| |
| // Use small bulge multi-shift QR with aggressive early deflation on |
| // larger-than-tiny matrices. |
| var jbcmpz string |
| if wantt { |
| jbcmpz = "S" |
| } else { |
| jbcmpz = "E" |
| } |
| if wantz { |
| jbcmpz += "V" |
| } else { |
| jbcmpz += "N" |
| } |
| |
| var fname string |
| if recur > 0 { |
| fname = "DLAQR0" |
| } else { |
| fname = "DLAQR4" |
| } |
| // nwr is the recommended deflation window size. n is greater than 11, |
| // so there is enough subdiagonal workspace for nwr >= 2 as required. |
| // (In fact, there is enough subdiagonal space for nwr >= 3.) |
| // TODO(vladimir-ch): If there is enough space for nwr >= 3, should we |
| // use it? |
| nwr := impl.Ilaenv(13, fname, jbcmpz, n, ilo, ihi, lwork) |
| nwr = max(2, nwr) |
| nwr = min(ihi-ilo+1, min((n-1)/3, nwr)) |
| |
| // nsr is the recommended number of simultaneous shifts. n is greater |
| // than 11, so there is enough subdiagonal workspace for nsr to be even |
| // and greater than or equal to two as required. |
| nsr := impl.Ilaenv(15, fname, jbcmpz, n, ilo, ihi, lwork) |
| nsr = min(nsr, min((n+6)/9, ihi-ilo)) |
| nsr = max(2, nsr&^1) |
| |
| // Workspace query call to Dlaqr23. |
| impl.Dlaqr23(wantt, wantz, n, ilo, ihi, nwr+1, nil, 0, iloz, ihiz, nil, 0, |
| nil, nil, nil, 0, n, nil, 0, n, nil, 0, work, -1, recur) |
| // Optimal workspace is max(Dlaqr5, Dlaqr23). |
| lwkopt := max(3*nsr/2, int(work[0])) |
| // Quick return in case of workspace query. |
| if lwork == -1 { |
| work[0] = float64(lwkopt) |
| return 0 |
| } |
| |
| // Dlahqr/Dlaqr04 crossover point. |
| nmin := impl.Ilaenv(12, fname, jbcmpz, n, ilo, ihi, lwork) |
| nmin = max(ntiny, nmin) |
| |
| // Nibble determines when to skip a multi-shift QR sweep (Dlaqr5). |
| nibble := impl.Ilaenv(14, fname, jbcmpz, n, ilo, ihi, lwork) |
| nibble = max(0, nibble) |
| |
| // Computation mode of far-from-diagonal orthogonal updates in Dlaqr5. |
| kacc22 := impl.Ilaenv(16, fname, jbcmpz, n, ilo, ihi, lwork) |
| kacc22 = max(0, min(kacc22, 2)) |
| |
| // nwmax is the largest possible deflation window for which there is |
| // sufficient workspace. |
| nwmax := min((n-1)/3, lwork/2) |
| nw := nwmax // Start with maximum deflation window size. |
| |
| // nsmax is the largest number of simultaneous shifts for which there is |
| // sufficient workspace. |
| nsmax := min((n+6)/9, 2*lwork/3) &^ 1 |
| |
| ndfl := 1 // Number of iterations since last deflation. |
| ndec := 0 // Deflation window size decrement. |
| |
| // Main loop. |
| var ( |
| itmax = max(30, 2*kexsh) * max(10, (ihi-ilo+1)) |
| it = 0 |
| ) |
| for kbot := ihi; kbot >= ilo; { |
| if it == itmax { |
| unconverged = kbot + 1 |
| break |
| } |
| it++ |
| |
| // Locate active block. |
| ktop := ilo |
| for k := kbot; k >= ilo+1; k-- { |
| if h[k*ldh+k-1] == 0 { |
| ktop = k |
| break |
| } |
| } |
| |
| // Select deflation window size nw. |
| // |
| // Typical Case: |
| // If possible and advisable, nibble the entire active block. |
| // If not, use size min(nwr,nwmax) or min(nwr+1,nwmax) |
| // depending upon which has the smaller corresponding |
| // subdiagonal entry (a heuristic). |
| // |
| // Exceptional Case: |
| // If there have been no deflations in kexnw or more |
| // iterations, then vary the deflation window size. At first, |
| // because larger windows are, in general, more powerful than |
| // smaller ones, rapidly increase the window to the maximum |
| // possible. Then, gradually reduce the window size. |
| nh := kbot - ktop + 1 |
| nwupbd := min(nh, nwmax) |
| if ndfl < kexnw { |
| nw = min(nwupbd, nwr) |
| } else { |
| nw = min(nwupbd, 2*nw) |
| } |
| if nw < nwmax { |
| if nw >= nh-1 { |
| nw = nh |
| } else { |
| kwtop := kbot - nw + 1 |
| if math.Abs(h[kwtop*ldh+kwtop-1]) > math.Abs(h[(kwtop-1)*ldh+kwtop-2]) { |
| nw++ |
| } |
| } |
| } |
| if ndfl < kexnw { |
| ndec = -1 |
| } else if ndec >= 0 || nw >= nwupbd { |
| ndec++ |
| if nw-ndec < 2 { |
| ndec = 0 |
| } |
| nw -= ndec |
| } |
| |
| // Split workspace under the subdiagonal of H into: |
| // - an nw×nw work array V in the lower left-hand corner, |
| // - an nw×nhv horizontal work array along the bottom edge (nhv |
| // must be at least nw but more is better), |
| // - an nve×nw vertical work array along the left-hand-edge |
| // (nhv can be any positive integer but more is better). |
| kv := n - nw |
| kt := nw |
| kwv := nw + 1 |
| nhv := n - kwv - kt |
| // Aggressive early deflation. |
| ls, ld := impl.Dlaqr23(wantt, wantz, n, ktop, kbot, nw, |
| h, ldh, iloz, ihiz, z, ldz, wr[:kbot+1], wi[:kbot+1], |
| h[kv*ldh:], ldh, nhv, h[kv*ldh+kt:], ldh, nhv, h[kwv*ldh:], ldh, work, lwork, recur) |
| |
| // Adjust kbot accounting for new deflations. |
| kbot -= ld |
| // ks points to the shifts. |
| ks := kbot - ls + 1 |
| |
| // Skip an expensive QR sweep if there is a (partly heuristic) |
| // reason to expect that many eigenvalues will deflate without |
| // it. Here, the QR sweep is skipped if many eigenvalues have |
| // just been deflated or if the remaining active block is small. |
| if ld > 0 && (100*ld > nw*nibble || kbot-ktop+1 <= min(nmin, nwmax)) { |
| // ld is positive, note progress. |
| ndfl = 1 |
| continue |
| } |
| |
| // ns is the nominal number of simultaneous shifts. This may be |
| // lowered (slightly) if Dlaqr23 did not provide that many |
| // shifts. |
| ns := min(min(nsmax, nsr), max(2, kbot-ktop)) &^ 1 |
| |
| // If there have been no deflations in a multiple of kexsh |
| // iterations, then try exceptional shifts. Otherwise use shifts |
| // provided by Dlaqr23 above or from the eigenvalues of a |
| // trailing principal submatrix. |
| if ndfl%kexsh == 0 { |
| ks = kbot - ns + 1 |
| for i := kbot; i > max(ks, ktop+1); i -= 2 { |
| ss := math.Abs(h[i*ldh+i-1]) + math.Abs(h[(i-1)*ldh+i-2]) |
| aa := wilk1*ss + h[i*ldh+i] |
| _, _, _, _, wr[i-1], wi[i-1], wr[i], wi[i], _, _ = |
| impl.Dlanv2(aa, ss, wilk2*ss, aa) |
| } |
| if ks == ktop { |
| wr[ks+1] = h[(ks+1)*ldh+ks+1] |
| wi[ks+1] = 0 |
| wr[ks] = wr[ks+1] |
| wi[ks] = wi[ks+1] |
| } |
| } else { |
| // If we got ns/2 or fewer shifts, use Dlahqr or recur |
| // into Dlaqr04 on a trailing principal submatrix to get |
| // more. Since ns <= nsmax <=(n+6)/9, there is enough |
| // space below the subdiagonal to fit an ns×ns scratch |
| // array. |
| if kbot-ks+1 <= ns/2 { |
| ks = kbot - ns + 1 |
| kt = n - ns |
| impl.Dlacpy(blas.All, ns, ns, h[ks*ldh+ks:], ldh, h[kt*ldh:], ldh) |
| if ns > nmin && recur > 0 { |
| ks += impl.Dlaqr04(false, false, ns, 1, ns-1, h[kt*ldh:], ldh, |
| wr[ks:ks+ns], wi[ks:ks+ns], 0, 0, nil, 0, work, lwork, recur-1) |
| } else { |
| ks += impl.Dlahqr(false, false, ns, 0, ns-1, h[kt*ldh:], ldh, |
| wr[ks:ks+ns], wi[ks:ks+ns], 0, 0, nil, 0) |
| } |
| // In case of a rare QR failure use eigenvalues |
| // of the trailing 2×2 principal submatrix. |
| if ks >= kbot { |
| aa := h[(kbot-1)*ldh+kbot-1] |
| bb := h[(kbot-1)*ldh+kbot] |
| cc := h[kbot*ldh+kbot-1] |
| dd := h[kbot*ldh+kbot] |
| _, _, _, _, wr[kbot-1], wi[kbot-1], wr[kbot], wi[kbot], _, _ = |
| impl.Dlanv2(aa, bb, cc, dd) |
| ks = kbot - 1 |
| } |
| } |
| |
| if kbot-ks+1 > ns { |
| // Sorting the shifts helps a little. Bubble |
| // sort keeps complex conjugate pairs together. |
| sorted := false |
| for k := kbot; k > ks; k-- { |
| if sorted { |
| break |
| } |
| sorted = true |
| for i := ks; i < k; i++ { |
| if math.Abs(wr[i])+math.Abs(wi[i]) >= math.Abs(wr[i+1])+math.Abs(wi[i+1]) { |
| continue |
| } |
| sorted = false |
| wr[i], wr[i+1] = wr[i+1], wr[i] |
| wi[i], wi[i+1] = wi[i+1], wi[i] |
| } |
| } |
| } |
| |
| // Shuffle shifts into pairs of real shifts and pairs of |
| // complex conjugate shifts using the fact that complex |
| // conjugate shifts are already adjacent to one another. |
| // TODO(vladimir-ch): The shuffling here could probably |
| // be removed but I'm not sure right now and it's safer |
| // to leave it. |
| for i := kbot; i > ks+1; i -= 2 { |
| if wi[i] == -wi[i-1] { |
| continue |
| } |
| wr[i], wr[i-1], wr[i-2] = wr[i-1], wr[i-2], wr[i] |
| wi[i], wi[i-1], wi[i-2] = wi[i-1], wi[i-2], wi[i] |
| } |
| } |
| |
| // If there are only two shifts and both are real, then use only one. |
| if kbot-ks+1 == 2 && wi[kbot] == 0 { |
| if math.Abs(wr[kbot]-h[kbot*ldh+kbot]) < math.Abs(wr[kbot-1]-h[kbot*ldh+kbot]) { |
| wr[kbot-1] = wr[kbot] |
| } else { |
| wr[kbot] = wr[kbot-1] |
| } |
| } |
| |
| // Use up to ns of the the smallest magnitude shifts. If there |
| // aren't ns shifts available, then use them all, possibly |
| // dropping one to make the number of shifts even. |
| ns = min(ns, kbot-ks+1) &^ 1 |
| ks = kbot - ns + 1 |
| |
| // Split workspace under the subdiagonal into: |
| // - a kdu×kdu work array U in the lower left-hand-corner, |
| // - a kdu×nhv horizontal work array WH along the bottom edge |
| // (nhv must be at least kdu but more is better), |
| // - an nhv×kdu vertical work array WV along the left-hand-edge |
| // (nhv must be at least kdu but more is better). |
| kdu := 3*ns - 3 |
| ku := n - kdu |
| kwh := kdu |
| kwv = kdu + 3 |
| nhv = n - kwv - kdu |
| // Small-bulge multi-shift QR sweep. |
| impl.Dlaqr5(wantt, wantz, kacc22, n, ktop, kbot, ns, |
| wr[ks:ks+ns], wi[ks:ks+ns], h, ldh, iloz, ihiz, z, ldz, |
| work, 3, h[ku*ldh:], ldh, nhv, h[kwv*ldh:], ldh, nhv, h[ku*ldh+kwh:], ldh) |
| |
| // Note progress (or the lack of it). |
| if ld > 0 { |
| ndfl = 1 |
| } else { |
| ndfl++ |
| } |
| } |
| |
| work[0] = float64(lwkopt) |
| return unconverged |
| } |