lapack/gonum/dlaqr23.go - third_party/github.com/gonum/gonum - Git at Google

 // Copyright ©2016 The gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package gonum

 import (
 	"math"

 	"gonum.org/v1/gonum/blas"
 	"gonum.org/v1/gonum/blas/blas64"
 	"gonum.org/v1/gonum/lapack"
 )

 // Dlaqr23 performs the orthogonal similarity transformation of an n×n upper
 // Hessenberg matrix to detect and deflate fully converged eigenvalues from a
 // trailing principal submatrix using aggressive early deflation [1].
 //
 // On return, H will be overwritten by a new Hessenberg matrix that is a
 // perturbation of an orthogonal similarity transformation of H. It is hoped
 // that on output H will have many zero subdiagonal entries.
 //
 // If wantt is true, the matrix H will be fully updated so that the
 // quasi-triangular Schur factor can be computed. If wantt is false, then only
 // enough of H will be updated to preserve the eigenvalues.
 //
 // If wantz is true, the orthogonal similarity transformation will be
 // accumulated into Z[iloz:ihiz+1,ktop:kbot+1], otherwise Z is not referenced.
 //
 // ktop and kbot determine a block [ktop:kbot+1,ktop:kbot+1] along the diagonal
 // of H. It must hold that
 //  0 <= ilo <= ihi < n,     if n > 0,
 //  ilo == 0 and ihi == -1,  if n == 0,
 // and the block must be isolated, that is, it must hold that
 //  ktop == 0   or H[ktop,ktop-1] == 0,
 //  kbot == n-1 or H[kbot+1,kbot] == 0,
 // otherwise Dlaqr23 will panic.
 //
 // nw is the deflation window size. It must hold that
 //  0 <= nw <= kbot-ktop+1,
 // otherwise Dlaqr23 will panic.
 //
 // iloz and ihiz specify the rows of the n×n matrix Z to which transformations
 // will be applied if wantz is true. It must hold that
 //  0 <= iloz <= ktop,  and  kbot <= ihiz < n,
 // otherwise Dlaqr23 will panic.
 //
 // sr and si must have length kbot+1, otherwise Dlaqr23 will panic.
 //
 // v and ldv represent an nw×nw work matrix.
 // t and ldt represent an nw×nh work matrix, and nh must be at least nw.
 // wv and ldwv represent an nv×nw work matrix.
 //
 // work must have length at least lwork and lwork must be at least max(1,2*nw),
 // otherwise Dlaqr23 will panic. Larger values of lwork may result in greater
 // efficiency. On return, work[0] will contain the optimal value of lwork.
 //
 // If lwork is -1, instead of performing Dlaqr23, the function only estimates the
 // optimal workspace size and stores it into work[0]. Neither h nor z are
 // accessed.
 //
 // recur is the non-negative recursion depth. For recur > 0, Dlaqr23 behaves
 // as DLAQR3, for recur == 0 it behaves as DLAQR2.
 //
 // On return, ns and nd will contain respectively the number of unconverged
 // (i.e., approximate) eigenvalues and converged eigenvalues that are stored in
 // sr and si.
 //
 // On return, the real and imaginary parts of approximate eigenvalues that may
 // be used for shifts will be stored respectively in sr[kbot-nd-ns+1:kbot-nd+1]
 // and si[kbot-nd-ns+1:kbot-nd+1].
 //
 // On return, the real and imaginary parts of converged eigenvalues will be
 // stored respectively in sr[kbot-nd+1:kbot+1] and si[kbot-nd+1:kbot+1].
 //
 // References:
 //  [1] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part II:
 //      Aggressive Early Deflation. SIAM J. Matrix Anal. Appl 23(4) (2002), pp. 948—973
 //      URL: http://dx.doi.org/10.1137/S0895479801384585
 //
 func (impl Implementation) Dlaqr23(wantt, wantz bool, n, ktop, kbot, nw int, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, sr, si []float64, v []float64, ldv int, nh int, t []float64, ldt int, nv int, wv []float64, ldwv int, work []float64, lwork int, recur int) (ns, nd int) {
 	switch {
 	case ktop < 0 || max(0, n-1) < ktop:
 		panic("lapack: invalid value of ktop")
 	case kbot < min(ktop, n-1) || n <= kbot:
 		panic("lapack: invalid value of kbot")
 	case (nw < 0 || kbot-ktop+1 < nw) && lwork != -1:
 		panic("lapack: invalid value of nw")
 	case nh < nw:
 		panic("lapack: invalid value of nh")
 	case lwork < max(1, 2*nw) && lwork != -1:
 		panic(badWork)
 	case len(work) < lwork:
 		panic(shortWork)
 	case recur < 0:
 		panic("lapack: recur is negative")
 	}
 	if wantz {
 		switch {
 		case iloz < 0 || ktop < iloz:
 			panic("lapack: invalid value of iloz")
 		case ihiz < kbot || n <= ihiz:
 			panic("lapack: invalid value of ihiz")
 		}
 	}
 	if lwork != -1 {
 		// Check input slices only if not doing workspace query.
 		checkMatrix(n, n, h, ldh)
 		checkMatrix(nw, nw, v, ldv)
 		checkMatrix(nw, nh, t, ldt)
 		checkMatrix(nv, nw, wv, ldwv)
 		if wantz {
 			checkMatrix(n, n, z, ldz)
 		}
 		switch {
 		case ktop > 0 && h[ktop*ldh+ktop-1] != 0:
 			panic("lapack: block not isolated")
 		case kbot+1 < n && h[(kbot+1)*ldh+kbot] != 0:
 			panic("lapack: block not isolated")
 		case len(sr) != kbot+1:
 			panic("lapack: bad length of sr")
 		case len(si) != kbot+1:
 			panic("lapack: bad length of si")
 		}
 	}

 	// Quick return for zero window size.
 	if nw == 0 {
 		work[0] = 1
 		return 0, 0
 	}

 	// LAPACK code does not enforce the documented behavior
 	//  nw <= kbot-ktop+1
 	// but we do (we panic above).
 	jw := nw
 	lwkopt := max(1, 2*nw)
 	if jw > 2 {
 		// Workspace query call to Dgehrd.
 		impl.Dgehrd(jw, 0, jw-2, nil, 0, nil, work, -1)
 		lwk1 := int(work[0])
 		// Workspace query call to Dormhr.
 		impl.Dormhr(blas.Right, blas.NoTrans, jw, jw, 0, jw-2, nil, 0, nil, nil, 0, work, -1)
 		lwk2 := int(work[0])
 		if recur > 0 {
 			// Workspace query call to Dlaqr04.
 			impl.Dlaqr04(true, true, jw, 0, jw-1, nil, 0, nil, nil, 0, jw-1, nil, 0, work, -1, recur-1)
 			lwk3 := int(work[0])
 			// Optimal workspace.
 			lwkopt = max(jw+max(lwk1, lwk2), lwk3)
 		} else {
 			// Optimal workspace.
 			lwkopt = jw + max(lwk1, lwk2)
 		}
 	}
 	// Quick return in case of workspace query.
 	if lwork == -1 {
 		work[0] = float64(lwkopt)
 		return 0, 0
 	}

 	// Machine constants.
 	ulp := dlamchP
 	smlnum := float64(n) / ulp * dlamchS

 	// Setup deflation window.
 	var s float64
 	kwtop := kbot - jw + 1
 	if kwtop != ktop {
 		s = h[kwtop*ldh+kwtop-1]
 	}
 	if kwtop == kbot {
 		// 1×1 deflation window.
 		sr[kwtop] = h[kwtop*ldh+kwtop]
 		si[kwtop] = 0
 		ns = 1
 		nd = 0
 		if math.Abs(s) <= math.Max(smlnum, ulp*math.Abs(h[kwtop*ldh+kwtop])) {
 			ns = 0
 			nd = 1
 			if kwtop > ktop {
 				h[kwtop*ldh+kwtop-1] = 0
 			}
 		}
 		work[0] = 1
 		return ns, nd
 	}

 	// Convert to spike-triangular form. In case of a rare QR failure, this
 	// routine continues to do aggressive early deflation using that part of
 	// the deflation window that converged using infqr here and there to
 	// keep track.
 	impl.Dlacpy(blas.Upper, jw, jw, h[kwtop*ldh+kwtop:], ldh, t, ldt)
 	bi := blas64.Implementation()
 	bi.Dcopy(jw-1, h[(kwtop+1)*ldh+kwtop:], ldh+1, t[ldt:], ldt+1)
 	impl.Dlaset(blas.All, jw, jw, 0, 1, v, ldv)
 	nmin := impl.Ilaenv(12, "DLAQR3", "SV", jw, 0, jw-1, lwork)
 	var infqr int
 	if recur > 0 && jw > nmin {
 		infqr = impl.Dlaqr04(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv, work, lwork, recur-1)
 	} else {
 		infqr = impl.Dlahqr(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv)
 	}
 	// Note that ilo == 0 which conveniently coincides with the success
 	// value of infqr, that is, infqr as an index always points to the first
 	// converged eigenvalue.

 	// Dtrexc needs a clean margin near the diagonal.
 	for j := 0; j < jw-3; j++ {
 		t[(j+2)*ldt+j] = 0
 		t[(j+3)*ldt+j] = 0
 	}
 	if jw >= 3 {
 		t[(jw-1)*ldt+jw-3] = 0
 	}

 	ns = jw
 	ilst := infqr
 	// Deflation detection loop.
 	for ilst < ns {
 		bulge := false
 		if ns >= 2 {
 			bulge = t[(ns-1)*ldt+ns-2] != 0
 		}
 		if !bulge {
 			// Real eigenvalue.
 			abst := math.Abs(t[(ns-1)*ldt+ns-1])
 			if abst == 0 {
 				abst = math.Abs(s)
 			}
 			if math.Abs(s*v[ns-1]) <= math.Max(smlnum, ulp*abst) {
 				// Deflatable.
 				ns--
 			} else {
 				// Undeflatable, move it up out of the way.
 				// Dtrexc can not fail in this case.
 				_, ilst, _ = impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, ns-1, ilst, work)
 				ilst++
 			}
 			continue
 		}
 		// Complex conjugate pair.
 		abst := math.Abs(t[(ns-1)*ldt+ns-1]) + math.Sqrt(math.Abs(t[(ns-1)*ldt+ns-2]))*math.Sqrt(math.Abs(t[(ns-2)*ldt+ns-1]))
 		if abst == 0 {
 			abst = math.Abs(s)
 		}
 		if math.Max(math.Abs(s*v[ns-1]), math.Abs(s*v[ns-2])) <= math.Max(smlnum, ulp*abst) {
 			// Deflatable.
 			ns -= 2
 		} else {
 			// Undeflatable, move them up out of the way.
 			// Dtrexc does the right thing with ilst in case of a
 			// rare exchange failure.
 			_, ilst, _ = impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, ns-1, ilst, work)
 			ilst += 2
 		}
 	}

 	// Return to Hessenberg form.
 	if ns == 0 {
 		s = 0
 	}
 	if ns < jw {
 		// Sorting diagonal blocks of T improves accuracy for graded
 		// matrices. Bubble sort deals well with exchange failures.
 		sorted := false
 		i := ns
 		for !sorted {
 			sorted = true
 			kend := i - 1
 			i = infqr
 			var k int
 			if i == ns-1 || t[(i+1)*ldt+i] == 0 {
 				k = i + 1
 			} else {
 				k = i + 2
 			}
 			for k <= kend {
 				var evi float64
 				if k == i+1 {
 					evi = math.Abs(t[i*ldt+i])
 				} else {
 					evi = math.Abs(t[i*ldt+i]) + math.Sqrt(math.Abs(t[(i+1)*ldt+i]))*math.Sqrt(math.Abs(t[i*ldt+i+1]))
 				}

 				var evk float64
 				if k == kend || t[(k+1)*ldt+k] == 0 {
 					evk = math.Abs(t[k*ldt+k])
 				} else {
 					evk = math.Abs(t[k*ldt+k]) + math.Sqrt(math.Abs(t[(k+1)*ldt+k]))*math.Sqrt(math.Abs(t[k*ldt+k+1]))
 				}

 				if evi >= evk {
 					i = k
 				} else {
 					sorted = false
 					_, ilst, ok := impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, i, k, work)
 					if ok {
 						i = ilst
 					} else {
 						i = k
 					}
 				}
 				if i == kend || t[(i+1)*ldt+i] == 0 {
 					k = i + 1
 				} else {
 					k = i + 2
 				}
 			}
 		}
 	}

 	// Restore shift/eigenvalue array from T.
 	for i := jw - 1; i >= infqr; {
 		if i == infqr || t[i*ldt+i-1] == 0 {
 			sr[kwtop+i] = t[i*ldt+i]
 			si[kwtop+i] = 0
 			i--
 			continue
 		}
 		aa := t[(i-1)*ldt+i-1]
 		bb := t[(i-1)*ldt+i]
 		cc := t[i*ldt+i-1]
 		dd := t[i*ldt+i]
 		_, _, _, _, sr[kwtop+i-1], si[kwtop+i-1], sr[kwtop+i], si[kwtop+i], _, _ = impl.Dlanv2(aa, bb, cc, dd)
 		i -= 2
 	}

 	if ns < jw || s == 0 {
 		if ns > 1 && s != 0 {
 			// Reflect spike back into lower triangle.
 			bi.Dcopy(ns, v[:ns], 1, work[:ns], 1)
 			_, tau := impl.Dlarfg(ns, work[0], work[1:ns], 1)
 			work[0] = 1
 			impl.Dlaset(blas.Lower, jw-2, jw-2, 0, 0, t[2*ldt:], ldt)
 			impl.Dlarf(blas.Left, ns, jw, work[:ns], 1, tau, t, ldt, work[jw:])
 			impl.Dlarf(blas.Right, ns, ns, work[:ns], 1, tau, t, ldt, work[jw:])
 			impl.Dlarf(blas.Right, jw, ns, work[:ns], 1, tau, v, ldv, work[jw:])
 			impl.Dgehrd(jw, 0, ns-1, t, ldt, work[:jw-1], work[jw:], lwork-jw)
 		}

 		// Copy updated reduced window into place.
 		if kwtop > 0 {
 			h[kwtop*ldh+kwtop-1] = s * v[0]
 		}
 		impl.Dlacpy(blas.Upper, jw, jw, t, ldt, h[kwtop*ldh+kwtop:], ldh)
 		bi.Dcopy(jw-1, t[ldt:], ldt+1, h[(kwtop+1)*ldh+kwtop:], ldh+1)

 		// Accumulate orthogonal matrix in order to update H and Z, if
 		// requested.
 		if ns > 1 && s != 0 {
 			// work[:ns-1] contains the elementary reflectors stored
 			// by a call to Dgehrd above.
 			impl.Dormhr(blas.Right, blas.NoTrans, jw, ns, 0, ns-1,
 				t, ldt, work[:ns-1], v, ldv, work[jw:], lwork-jw)
 		}

 		// Update vertical slab in H.
 		var ltop int
 		if !wantt {
 			ltop = ktop
 		}
 		for krow := ltop; krow < kwtop; krow += nv {
 			kln := min(nv, kwtop-krow)
 			bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw,
 				1, h[krow*ldh+kwtop:], ldh, v, ldv,
 				0, wv, ldwv)
 			impl.Dlacpy(blas.All, kln, jw, wv, ldwv, h[krow*ldh+kwtop:], ldh)
 		}

 		// Update horizontal slab in H.
 		if wantt {
 			for kcol := kbot + 1; kcol < n; kcol += nh {
 				kln := min(nh, n-kcol)
 				bi.Dgemm(blas.Trans, blas.NoTrans, jw, kln, jw,
 					1, v, ldv, h[kwtop*ldh+kcol:], ldh,
 					0, t, ldt)
 				impl.Dlacpy(blas.All, jw, kln, t, ldt, h[kwtop*ldh+kcol:], ldh)
 			}
 		}

 		// Update vertical slab in Z.
 		if wantz {
 			for krow := iloz; krow <= ihiz; krow += nv {
 				kln := min(nv, ihiz-krow+1)
 				bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw,
 					1, z[krow*ldz+kwtop:], ldz, v, ldv,
 					0, wv, ldwv)
 				impl.Dlacpy(blas.All, kln, jw, wv, ldwv, z[krow*ldz+kwtop:], ldz)
 			}
 		}
 	}

 	// The number of deflations.
 	nd = jw - ns
 	// Shifts are converged eigenvalues that could not be deflated.
 	// Subtracting infqr from the spike length takes care of the case of a
 	// rare QR failure while calculating eigenvalues of the deflation
 	// window.
 	ns -= infqr
 	work[0] = float64(lwkopt)
 	return ns, nd
 }
	// Copyright ©2016 The gonum Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package gonum

	import (
	"math"

	"gonum.org/v1/gonum/blas"
	"gonum.org/v1/gonum/blas/blas64"
	"gonum.org/v1/gonum/lapack"
	)

	// Dlaqr23 performs the orthogonal similarity transformation of an n×n upper
	// Hessenberg matrix to detect and deflate fully converged eigenvalues from a
	// trailing principal submatrix using aggressive early deflation [1].
	//
	// On return, H will be overwritten by a new Hessenberg matrix that is a
	// perturbation of an orthogonal similarity transformation of H. It is hoped
	// that on output H will have many zero subdiagonal entries.
	//
	// If wantt is true, the matrix H will be fully updated so that the
	// quasi-triangular Schur factor can be computed. If wantt is false, then only
	// enough of H will be updated to preserve the eigenvalues.
	//
	// If wantz is true, the orthogonal similarity transformation will be
	// accumulated into Z[iloz:ihiz+1,ktop:kbot+1], otherwise Z is not referenced.
	//
	// ktop and kbot determine a block [ktop:kbot+1,ktop:kbot+1] along the diagonal
	// of H. It must hold that
	// 0 <= ilo <= ihi < n, if n > 0,
	// ilo == 0 and ihi == -1, if n == 0,
	// and the block must be isolated, that is, it must hold that
	// ktop == 0 or H[ktop,ktop-1] == 0,
	// kbot == n-1 or H[kbot+1,kbot] == 0,
	// otherwise Dlaqr23 will panic.
	//
	// nw is the deflation window size. It must hold that
	// 0 <= nw <= kbot-ktop+1,
	// otherwise Dlaqr23 will panic.
	//
	// iloz and ihiz specify the rows of the n×n matrix Z to which transformations
	// will be applied if wantz is true. It must hold that
	// 0 <= iloz <= ktop, and kbot <= ihiz < n,
	// otherwise Dlaqr23 will panic.
	//
	// sr and si must have length kbot+1, otherwise Dlaqr23 will panic.
	//
	// v and ldv represent an nw×nw work matrix.
	// t and ldt represent an nw×nh work matrix, and nh must be at least nw.
	// wv and ldwv represent an nv×nw work matrix.
	//
	// work must have length at least lwork and lwork must be at least max(1,2*nw),
	// otherwise Dlaqr23 will panic. Larger values of lwork may result in greater
	// efficiency. On return, work[0] will contain the optimal value of lwork.
	//
	// If lwork is -1, instead of performing Dlaqr23, the function only estimates the
	// optimal workspace size and stores it into work[0]. Neither h nor z are
	// accessed.
	//
	// recur is the non-negative recursion depth. For recur > 0, Dlaqr23 behaves
	// as DLAQR3, for recur == 0 it behaves as DLAQR2.
	//
	// On return, ns and nd will contain respectively the number of unconverged
	// (i.e., approximate) eigenvalues and converged eigenvalues that are stored in
	// sr and si.
	//
	// On return, the real and imaginary parts of approximate eigenvalues that may
	// be used for shifts will be stored respectively in sr[kbot-nd-ns+1:kbot-nd+1]
	// and si[kbot-nd-ns+1:kbot-nd+1].
	//
	// On return, the real and imaginary parts of converged eigenvalues will be
	// stored respectively in sr[kbot-nd+1:kbot+1] and si[kbot-nd+1:kbot+1].
	//
	// References:
	// [1] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part II:
	// Aggressive Early Deflation. SIAM J. Matrix Anal. Appl 23(4) (2002), pp. 948—973
	// URL: http://dx.doi.org/10.1137/S0895479801384585
	//
	func (impl Implementation) Dlaqr23(wantt, wantz bool, n, ktop, kbot, nw int, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, sr, si []float64, v []float64, ldv int, nh int, t []float64, ldt int, nv int, wv []float64, ldwv int, work []float64, lwork int, recur int) (ns, nd int) {
	switch {
	case ktop < 0 \|\| max(0, n-1) < ktop:
	panic("lapack: invalid value of ktop")
	case kbot < min(ktop, n-1) \|\| n <= kbot:
	panic("lapack: invalid value of kbot")
	case (nw < 0 \|\| kbot-ktop+1 < nw) && lwork != -1:
	panic("lapack: invalid value of nw")
	case nh < nw:
	panic("lapack: invalid value of nh")
	case lwork < max(1, 2*nw) && lwork != -1:
	panic(badWork)
	case len(work) < lwork:
	panic(shortWork)
	case recur < 0:
	panic("lapack: recur is negative")
	}
	if wantz {
	switch {
	case iloz < 0 \|\| ktop < iloz:
	panic("lapack: invalid value of iloz")
	case ihiz < kbot \|\| n <= ihiz:
	panic("lapack: invalid value of ihiz")
	}
	}
	if lwork != -1 {
	// Check input slices only if not doing workspace query.
	checkMatrix(n, n, h, ldh)
	checkMatrix(nw, nw, v, ldv)
	checkMatrix(nw, nh, t, ldt)
	checkMatrix(nv, nw, wv, ldwv)
	if wantz {
	checkMatrix(n, n, z, ldz)
	}
	switch {
	case ktop > 0 && h[ktop*ldh+ktop-1] != 0:
	panic("lapack: block not isolated")
	case kbot+1 < n && h[(kbot+1)*ldh+kbot] != 0:
	panic("lapack: block not isolated")
	case len(sr) != kbot+1:
	panic("lapack: bad length of sr")
	case len(si) != kbot+1:
	panic("lapack: bad length of si")
	}
	}

	// Quick return for zero window size.
	if nw == 0 {
	work[0] = 1
	return 0, 0
	}

	// LAPACK code does not enforce the documented behavior
	// nw <= kbot-ktop+1
	// but we do (we panic above).
	jw := nw
	lwkopt := max(1, 2*nw)
	if jw > 2 {
	// Workspace query call to Dgehrd.
	impl.Dgehrd(jw, 0, jw-2, nil, 0, nil, work, -1)
	lwk1 := int(work[0])
	// Workspace query call to Dormhr.
	impl.Dormhr(blas.Right, blas.NoTrans, jw, jw, 0, jw-2, nil, 0, nil, nil, 0, work, -1)
	lwk2 := int(work[0])
	if recur > 0 {
	// Workspace query call to Dlaqr04.
	impl.Dlaqr04(true, true, jw, 0, jw-1, nil, 0, nil, nil, 0, jw-1, nil, 0, work, -1, recur-1)
	lwk3 := int(work[0])
	// Optimal workspace.
	lwkopt = max(jw+max(lwk1, lwk2), lwk3)
	} else {
	// Optimal workspace.
	lwkopt = jw + max(lwk1, lwk2)
	}
	}
	// Quick return in case of workspace query.
	if lwork == -1 {
	work[0] = float64(lwkopt)
	return 0, 0
	}

	// Machine constants.
	ulp := dlamchP
	smlnum := float64(n) / ulp * dlamchS

	// Setup deflation window.
	var s float64
	kwtop := kbot - jw + 1
	if kwtop != ktop {
	s = h[kwtop*ldh+kwtop-1]
	}
	if kwtop == kbot {
	// 1×1 deflation window.
	sr[kwtop] = h[kwtop*ldh+kwtop]
	si[kwtop] = 0
	ns = 1
	nd = 0
	if math.Abs(s) <= math.Max(smlnum, ulpmath.Abs(h[kwtopldh+kwtop])) {
	ns = 0
	nd = 1
	if kwtop > ktop {
	h[kwtop*ldh+kwtop-1] = 0
	}
	}
	work[0] = 1
	return ns, nd
	}

	// Convert to spike-triangular form. In case of a rare QR failure, this
	// routine continues to do aggressive early deflation using that part of
	// the deflation window that converged using infqr here and there to
	// keep track.
	impl.Dlacpy(blas.Upper, jw, jw, h[kwtop*ldh+kwtop:], ldh, t, ldt)
	bi := blas64.Implementation()
	bi.Dcopy(jw-1, h[(kwtop+1)*ldh+kwtop:], ldh+1, t[ldt:], ldt+1)
	impl.Dlaset(blas.All, jw, jw, 0, 1, v, ldv)
	nmin := impl.Ilaenv(12, "DLAQR3", "SV", jw, 0, jw-1, lwork)
	var infqr int
	if recur > 0 && jw > nmin {
	infqr = impl.Dlaqr04(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv, work, lwork, recur-1)
	} else {
	infqr = impl.Dlahqr(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv)
	}
	// Note that ilo == 0 which conveniently coincides with the success
	// value of infqr, that is, infqr as an index always points to the first
	// converged eigenvalue.

	// Dtrexc needs a clean margin near the diagonal.
	for j := 0; j < jw-3; j++ {
	t[(j+2)*ldt+j] = 0
	t[(j+3)*ldt+j] = 0
	}
	if jw >= 3 {
	t[(jw-1)*ldt+jw-3] = 0
	}

	ns = jw
	ilst := infqr
	// Deflation detection loop.
	for ilst < ns {
	bulge := false
	if ns >= 2 {
	bulge = t[(ns-1)*ldt+ns-2] != 0
	}
	if !bulge {
	// Real eigenvalue.
	abst := math.Abs(t[(ns-1)*ldt+ns-1])
	if abst == 0 {
	abst = math.Abs(s)
	}
	if math.Abs(sv[ns-1]) <= math.Max(smlnum, ulpabst) {
	// Deflatable.
	ns--
	} else {
	// Undeflatable, move it up out of the way.
	// Dtrexc can not fail in this case.
	_, ilst, _ = impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, ns-1, ilst, work)
	ilst++
	}
	continue
	}
	// Complex conjugate pair.
	abst := math.Abs(t[(ns-1)ldt+ns-1]) + math.Sqrt(math.Abs(t[(ns-1)ldt+ns-2]))math.Sqrt(math.Abs(t[(ns-2)ldt+ns-1]))
	if abst == 0 {
	abst = math.Abs(s)
	}
	if math.Max(math.Abs(sv[ns-1]), math.Abs(sv[ns-2])) <= math.Max(smlnum, ulp*abst) {
	// Deflatable.
	ns -= 2
	} else {
	// Undeflatable, move them up out of the way.
	// Dtrexc does the right thing with ilst in case of a
	// rare exchange failure.
	_, ilst, _ = impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, ns-1, ilst, work)
	ilst += 2
	}
	}

	// Return to Hessenberg form.
	if ns == 0 {
	s = 0
	}
	if ns < jw {
	// Sorting diagonal blocks of T improves accuracy for graded
	// matrices. Bubble sort deals well with exchange failures.
	sorted := false
	i := ns
	for !sorted {
	sorted = true
	kend := i - 1
	i = infqr
	var k int
	if i == ns-1 \|\| t[(i+1)*ldt+i] == 0 {
	k = i + 1
	} else {
	k = i + 2
	}
	for k <= kend {
	var evi float64
	if k == i+1 {
	evi = math.Abs(t[i*ldt+i])
	} else {
	evi = math.Abs(t[ildt+i]) + math.Sqrt(math.Abs(t[(i+1)ldt+i]))math.Sqrt(math.Abs(t[ildt+i+1]))
	}

	var evk float64
	if k == kend \|\| t[(k+1)*ldt+k] == 0 {
	evk = math.Abs(t[k*ldt+k])
	} else {
	evk = math.Abs(t[kldt+k]) + math.Sqrt(math.Abs(t[(k+1)ldt+k]))math.Sqrt(math.Abs(t[kldt+k+1]))
	}

	if evi >= evk {
	i = k
	} else {
	sorted = false
	_, ilst, ok := impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, i, k, work)
	if ok {
	i = ilst
	} else {
	i = k
	}
	}
	if i == kend \|\| t[(i+1)*ldt+i] == 0 {
	k = i + 1
	} else {
	k = i + 2
	}
	}
	}
	}

	// Restore shift/eigenvalue array from T.
	for i := jw - 1; i >= infqr; {
	if i == infqr \|\| t[i*ldt+i-1] == 0 {
	sr[kwtop+i] = t[i*ldt+i]
	si[kwtop+i] = 0
	i--
	continue
	}
	aa := t[(i-1)*ldt+i-1]
	bb := t[(i-1)*ldt+i]
	cc := t[i*ldt+i-1]
	dd := t[i*ldt+i]
	_, _, _, _, sr[kwtop+i-1], si[kwtop+i-1], sr[kwtop+i], si[kwtop+i], _, _ = impl.Dlanv2(aa, bb, cc, dd)
	i -= 2
	}

	if ns < jw \|\| s == 0 {
	if ns > 1 && s != 0 {
	// Reflect spike back into lower triangle.
	bi.Dcopy(ns, v[:ns], 1, work[:ns], 1)
	_, tau := impl.Dlarfg(ns, work[0], work[1:ns], 1)
	work[0] = 1
	impl.Dlaset(blas.Lower, jw-2, jw-2, 0, 0, t[2*ldt:], ldt)
	impl.Dlarf(blas.Left, ns, jw, work[:ns], 1, tau, t, ldt, work[jw:])
	impl.Dlarf(blas.Right, ns, ns, work[:ns], 1, tau, t, ldt, work[jw:])
	impl.Dlarf(blas.Right, jw, ns, work[:ns], 1, tau, v, ldv, work[jw:])
	impl.Dgehrd(jw, 0, ns-1, t, ldt, work[:jw-1], work[jw:], lwork-jw)
	}

	// Copy updated reduced window into place.
	if kwtop > 0 {
	h[kwtopldh+kwtop-1] = s v[0]
	}
	impl.Dlacpy(blas.Upper, jw, jw, t, ldt, h[kwtop*ldh+kwtop:], ldh)
	bi.Dcopy(jw-1, t[ldt:], ldt+1, h[(kwtop+1)*ldh+kwtop:], ldh+1)

	// Accumulate orthogonal matrix in order to update H and Z, if
	// requested.
	if ns > 1 && s != 0 {
	// work[:ns-1] contains the elementary reflectors stored
	// by a call to Dgehrd above.
	impl.Dormhr(blas.Right, blas.NoTrans, jw, ns, 0, ns-1,
	t, ldt, work[:ns-1], v, ldv, work[jw:], lwork-jw)
	}

	// Update vertical slab in H.
	var ltop int
	if !wantt {
	ltop = ktop
	}
	for krow := ltop; krow < kwtop; krow += nv {
	kln := min(nv, kwtop-krow)
	bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw,
	1, h[krow*ldh+kwtop:], ldh, v, ldv,
	0, wv, ldwv)
	impl.Dlacpy(blas.All, kln, jw, wv, ldwv, h[krow*ldh+kwtop:], ldh)
	}

	// Update horizontal slab in H.
	if wantt {
	for kcol := kbot + 1; kcol < n; kcol += nh {
	kln := min(nh, n-kcol)
	bi.Dgemm(blas.Trans, blas.NoTrans, jw, kln, jw,
	1, v, ldv, h[kwtop*ldh+kcol:], ldh,
	0, t, ldt)
	impl.Dlacpy(blas.All, jw, kln, t, ldt, h[kwtop*ldh+kcol:], ldh)
	}
	}

	// Update vertical slab in Z.
	if wantz {
	for krow := iloz; krow <= ihiz; krow += nv {
	kln := min(nv, ihiz-krow+1)
	bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw,
	1, z[krow*ldz+kwtop:], ldz, v, ldv,
	0, wv, ldwv)
	impl.Dlacpy(blas.All, kln, jw, wv, ldwv, z[krow*ldz+kwtop:], ldz)
	}
	}
	}

	// The number of deflations.
	nd = jw - ns
	// Shifts are converged eigenvalues that could not be deflated.
	// Subtracting infqr from the spike length takes care of the case of a
	// rare QR failure while calculating eigenvalues of the deflation
	// window.
	ns -= infqr
	work[0] = float64(lwkopt)
	return ns, nd
	}