| // Copyright ©2019 The Gonum Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package gonum |
| |
| import ( |
| "math/cmplx" |
| |
| "gonum.org/v1/gonum/blas" |
| "gonum.org/v1/gonum/internal/asm/c128" |
| ) |
| |
| var _ blas.Complex128Level3 = Implementation{} |
| |
| // Zgemm performs one of the matrix-matrix operations |
| // C = alpha * op(A) * op(B) + beta * C |
| // where op(X) is one of |
| // op(X) = X or op(X) = Xᵀ or op(X) = Xᴴ, |
| // alpha and beta are scalars, and A, B and C are matrices, with op(A) an m×k matrix, |
| // op(B) a k×n matrix and C an m×n matrix. |
| func (Implementation) Zgemm(tA, tB blas.Transpose, m, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) { |
| switch tA { |
| default: |
| panic(badTranspose) |
| case blas.NoTrans, blas.Trans, blas.ConjTrans: |
| } |
| switch tB { |
| default: |
| panic(badTranspose) |
| case blas.NoTrans, blas.Trans, blas.ConjTrans: |
| } |
| switch { |
| case m < 0: |
| panic(mLT0) |
| case n < 0: |
| panic(nLT0) |
| case k < 0: |
| panic(kLT0) |
| } |
| rowA, colA := m, k |
| if tA != blas.NoTrans { |
| rowA, colA = k, m |
| } |
| if lda < max(1, colA) { |
| panic(badLdA) |
| } |
| rowB, colB := k, n |
| if tB != blas.NoTrans { |
| rowB, colB = n, k |
| } |
| if ldb < max(1, colB) { |
| panic(badLdB) |
| } |
| if ldc < max(1, n) { |
| panic(badLdC) |
| } |
| |
| // Quick return if possible. |
| if m == 0 || n == 0 { |
| return |
| } |
| |
| // For zero matrix size the following slice length checks are trivially satisfied. |
| if len(a) < (rowA-1)*lda+colA { |
| panic(shortA) |
| } |
| if len(b) < (rowB-1)*ldb+colB { |
| panic(shortB) |
| } |
| if len(c) < (m-1)*ldc+n { |
| panic(shortC) |
| } |
| |
| // Quick return if possible. |
| if (alpha == 0 || k == 0) && beta == 1 { |
| return |
| } |
| |
| if alpha == 0 { |
| if beta == 0 { |
| for i := 0; i < m; i++ { |
| for j := 0; j < n; j++ { |
| c[i*ldc+j] = 0 |
| } |
| } |
| } else { |
| for i := 0; i < m; i++ { |
| for j := 0; j < n; j++ { |
| c[i*ldc+j] *= beta |
| } |
| } |
| } |
| return |
| } |
| |
| switch tA { |
| case blas.NoTrans: |
| switch tB { |
| case blas.NoTrans: |
| // Form C = alpha * A * B + beta * C. |
| for i := 0; i < m; i++ { |
| switch { |
| case beta == 0: |
| for j := 0; j < n; j++ { |
| c[i*ldc+j] = 0 |
| } |
| case beta != 1: |
| for j := 0; j < n; j++ { |
| c[i*ldc+j] *= beta |
| } |
| } |
| for l := 0; l < k; l++ { |
| tmp := alpha * a[i*lda+l] |
| for j := 0; j < n; j++ { |
| c[i*ldc+j] += tmp * b[l*ldb+j] |
| } |
| } |
| } |
| case blas.Trans: |
| // Form C = alpha * A * Bᵀ + beta * C. |
| for i := 0; i < m; i++ { |
| switch { |
| case beta == 0: |
| for j := 0; j < n; j++ { |
| c[i*ldc+j] = 0 |
| } |
| case beta != 1: |
| for j := 0; j < n; j++ { |
| c[i*ldc+j] *= beta |
| } |
| } |
| for l := 0; l < k; l++ { |
| tmp := alpha * a[i*lda+l] |
| for j := 0; j < n; j++ { |
| c[i*ldc+j] += tmp * b[j*ldb+l] |
| } |
| } |
| } |
| case blas.ConjTrans: |
| // Form C = alpha * A * Bᴴ + beta * C. |
| for i := 0; i < m; i++ { |
| switch { |
| case beta == 0: |
| for j := 0; j < n; j++ { |
| c[i*ldc+j] = 0 |
| } |
| case beta != 1: |
| for j := 0; j < n; j++ { |
| c[i*ldc+j] *= beta |
| } |
| } |
| for l := 0; l < k; l++ { |
| tmp := alpha * a[i*lda+l] |
| for j := 0; j < n; j++ { |
| c[i*ldc+j] += tmp * cmplx.Conj(b[j*ldb+l]) |
| } |
| } |
| } |
| } |
| case blas.Trans: |
| switch tB { |
| case blas.NoTrans: |
| // Form C = alpha * Aᵀ * B + beta * C. |
| for i := 0; i < m; i++ { |
| for j := 0; j < n; j++ { |
| var tmp complex128 |
| for l := 0; l < k; l++ { |
| tmp += a[l*lda+i] * b[l*ldb+j] |
| } |
| if beta == 0 { |
| c[i*ldc+j] = alpha * tmp |
| } else { |
| c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] |
| } |
| } |
| } |
| case blas.Trans: |
| // Form C = alpha * Aᵀ * Bᵀ + beta * C. |
| for i := 0; i < m; i++ { |
| for j := 0; j < n; j++ { |
| var tmp complex128 |
| for l := 0; l < k; l++ { |
| tmp += a[l*lda+i] * b[j*ldb+l] |
| } |
| if beta == 0 { |
| c[i*ldc+j] = alpha * tmp |
| } else { |
| c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] |
| } |
| } |
| } |
| case blas.ConjTrans: |
| // Form C = alpha * Aᵀ * Bᴴ + beta * C. |
| for i := 0; i < m; i++ { |
| for j := 0; j < n; j++ { |
| var tmp complex128 |
| for l := 0; l < k; l++ { |
| tmp += a[l*lda+i] * cmplx.Conj(b[j*ldb+l]) |
| } |
| if beta == 0 { |
| c[i*ldc+j] = alpha * tmp |
| } else { |
| c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] |
| } |
| } |
| } |
| } |
| case blas.ConjTrans: |
| switch tB { |
| case blas.NoTrans: |
| // Form C = alpha * Aᴴ * B + beta * C. |
| for i := 0; i < m; i++ { |
| for j := 0; j < n; j++ { |
| var tmp complex128 |
| for l := 0; l < k; l++ { |
| tmp += cmplx.Conj(a[l*lda+i]) * b[l*ldb+j] |
| } |
| if beta == 0 { |
| c[i*ldc+j] = alpha * tmp |
| } else { |
| c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] |
| } |
| } |
| } |
| case blas.Trans: |
| // Form C = alpha * Aᴴ * Bᵀ + beta * C. |
| for i := 0; i < m; i++ { |
| for j := 0; j < n; j++ { |
| var tmp complex128 |
| for l := 0; l < k; l++ { |
| tmp += cmplx.Conj(a[l*lda+i]) * b[j*ldb+l] |
| } |
| if beta == 0 { |
| c[i*ldc+j] = alpha * tmp |
| } else { |
| c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] |
| } |
| } |
| } |
| case blas.ConjTrans: |
| // Form C = alpha * Aᴴ * Bᴴ + beta * C. |
| for i := 0; i < m; i++ { |
| for j := 0; j < n; j++ { |
| var tmp complex128 |
| for l := 0; l < k; l++ { |
| tmp += cmplx.Conj(a[l*lda+i]) * cmplx.Conj(b[j*ldb+l]) |
| } |
| if beta == 0 { |
| c[i*ldc+j] = alpha * tmp |
| } else { |
| c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| // Zhemm performs one of the matrix-matrix operations |
| // C = alpha*A*B + beta*C if side == blas.Left |
| // C = alpha*B*A + beta*C if side == blas.Right |
| // where alpha and beta are scalars, A is an m×m or n×n hermitian matrix and B |
| // and C are m×n matrices. The imaginary parts of the diagonal elements of A are |
| // assumed to be zero. |
| func (Implementation) Zhemm(side blas.Side, uplo blas.Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) { |
| na := m |
| if side == blas.Right { |
| na = n |
| } |
| switch { |
| case side != blas.Left && side != blas.Right: |
| panic(badSide) |
| case uplo != blas.Lower && uplo != blas.Upper: |
| panic(badUplo) |
| case m < 0: |
| panic(mLT0) |
| case n < 0: |
| panic(nLT0) |
| case lda < max(1, na): |
| panic(badLdA) |
| case ldb < max(1, n): |
| panic(badLdB) |
| case ldc < max(1, n): |
| panic(badLdC) |
| } |
| |
| // Quick return if possible. |
| if m == 0 || n == 0 { |
| return |
| } |
| |
| // For zero matrix size the following slice length checks are trivially satisfied. |
| if len(a) < lda*(na-1)+na { |
| panic(shortA) |
| } |
| if len(b) < ldb*(m-1)+n { |
| panic(shortB) |
| } |
| if len(c) < ldc*(m-1)+n { |
| panic(shortC) |
| } |
| |
| // Quick return if possible. |
| if alpha == 0 && beta == 1 { |
| return |
| } |
| |
| if alpha == 0 { |
| if beta == 0 { |
| for i := 0; i < m; i++ { |
| ci := c[i*ldc : i*ldc+n] |
| for j := range ci { |
| ci[j] = 0 |
| } |
| } |
| } else { |
| for i := 0; i < m; i++ { |
| ci := c[i*ldc : i*ldc+n] |
| c128.ScalUnitary(beta, ci) |
| } |
| } |
| return |
| } |
| |
| if side == blas.Left { |
| // Form C = alpha*A*B + beta*C. |
| for i := 0; i < m; i++ { |
| atmp := alpha * complex(real(a[i*lda+i]), 0) |
| bi := b[i*ldb : i*ldb+n] |
| ci := c[i*ldc : i*ldc+n] |
| if beta == 0 { |
| for j, bij := range bi { |
| ci[j] = atmp * bij |
| } |
| } else { |
| for j, bij := range bi { |
| ci[j] = atmp*bij + beta*ci[j] |
| } |
| } |
| if uplo == blas.Upper { |
| for k := 0; k < i; k++ { |
| atmp = alpha * cmplx.Conj(a[k*lda+i]) |
| c128.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) |
| } |
| for k := i + 1; k < m; k++ { |
| atmp = alpha * a[i*lda+k] |
| c128.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) |
| } |
| } else { |
| for k := 0; k < i; k++ { |
| atmp = alpha * a[i*lda+k] |
| c128.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) |
| } |
| for k := i + 1; k < m; k++ { |
| atmp = alpha * cmplx.Conj(a[k*lda+i]) |
| c128.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) |
| } |
| } |
| } |
| } else { |
| // Form C = alpha*B*A + beta*C. |
| if uplo == blas.Upper { |
| for i := 0; i < m; i++ { |
| for j := n - 1; j >= 0; j-- { |
| abij := alpha * b[i*ldb+j] |
| aj := a[j*lda+j+1 : j*lda+n] |
| bi := b[i*ldb+j+1 : i*ldb+n] |
| ci := c[i*ldc+j+1 : i*ldc+n] |
| var tmp complex128 |
| for k, ajk := range aj { |
| ci[k] += abij * ajk |
| tmp += bi[k] * cmplx.Conj(ajk) |
| } |
| ajj := complex(real(a[j*lda+j]), 0) |
| if beta == 0 { |
| c[i*ldc+j] = abij*ajj + alpha*tmp |
| } else { |
| c[i*ldc+j] = abij*ajj + alpha*tmp + beta*c[i*ldc+j] |
| } |
| } |
| } |
| } else { |
| for i := 0; i < m; i++ { |
| for j := 0; j < n; j++ { |
| abij := alpha * b[i*ldb+j] |
| aj := a[j*lda : j*lda+j] |
| bi := b[i*ldb : i*ldb+j] |
| ci := c[i*ldc : i*ldc+j] |
| var tmp complex128 |
| for k, ajk := range aj { |
| ci[k] += abij * ajk |
| tmp += bi[k] * cmplx.Conj(ajk) |
| } |
| ajj := complex(real(a[j*lda+j]), 0) |
| if beta == 0 { |
| c[i*ldc+j] = abij*ajj + alpha*tmp |
| } else { |
| c[i*ldc+j] = abij*ajj + alpha*tmp + beta*c[i*ldc+j] |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| // Zherk performs one of the hermitian rank-k operations |
| // C = alpha*A*Aᴴ + beta*C if trans == blas.NoTrans |
| // C = alpha*Aᴴ*A + beta*C if trans == blas.ConjTrans |
| // where alpha and beta are real scalars, C is an n×n hermitian matrix and A is |
| // an n×k matrix in the first case and a k×n matrix in the second case. |
| // |
| // The imaginary parts of the diagonal elements of C are assumed to be zero, and |
| // on return they will be set to zero. |
| func (Implementation) Zherk(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha float64, a []complex128, lda int, beta float64, c []complex128, ldc int) { |
| var rowA, colA int |
| switch trans { |
| default: |
| panic(badTranspose) |
| case blas.NoTrans: |
| rowA, colA = n, k |
| case blas.ConjTrans: |
| rowA, colA = k, n |
| } |
| switch { |
| case uplo != blas.Lower && uplo != blas.Upper: |
| panic(badUplo) |
| case n < 0: |
| panic(nLT0) |
| case k < 0: |
| panic(kLT0) |
| case lda < max(1, colA): |
| panic(badLdA) |
| case ldc < max(1, n): |
| panic(badLdC) |
| } |
| |
| // Quick return if possible. |
| if n == 0 { |
| return |
| } |
| |
| // For zero matrix size the following slice length checks are trivially satisfied. |
| if len(a) < (rowA-1)*lda+colA { |
| panic(shortA) |
| } |
| if len(c) < (n-1)*ldc+n { |
| panic(shortC) |
| } |
| |
| // Quick return if possible. |
| if (alpha == 0 || k == 0) && beta == 1 { |
| return |
| } |
| |
| if alpha == 0 { |
| if uplo == blas.Upper { |
| if beta == 0 { |
| for i := 0; i < n; i++ { |
| ci := c[i*ldc+i : i*ldc+n] |
| for j := range ci { |
| ci[j] = 0 |
| } |
| } |
| } else { |
| for i := 0; i < n; i++ { |
| ci := c[i*ldc+i : i*ldc+n] |
| ci[0] = complex(beta*real(ci[0]), 0) |
| if i != n-1 { |
| c128.DscalUnitary(beta, ci[1:]) |
| } |
| } |
| } |
| } else { |
| if beta == 0 { |
| for i := 0; i < n; i++ { |
| ci := c[i*ldc : i*ldc+i+1] |
| for j := range ci { |
| ci[j] = 0 |
| } |
| } |
| } else { |
| for i := 0; i < n; i++ { |
| ci := c[i*ldc : i*ldc+i+1] |
| if i != 0 { |
| c128.DscalUnitary(beta, ci[:i]) |
| } |
| ci[i] = complex(beta*real(ci[i]), 0) |
| } |
| } |
| } |
| return |
| } |
| |
| calpha := complex(alpha, 0) |
| if trans == blas.NoTrans { |
| // Form C = alpha*A*Aᴴ + beta*C. |
| cbeta := complex(beta, 0) |
| if uplo == blas.Upper { |
| for i := 0; i < n; i++ { |
| ci := c[i*ldc+i : i*ldc+n] |
| ai := a[i*lda : i*lda+k] |
| switch { |
| case beta == 0: |
| // Handle the i-th diagonal element of C. |
| ci[0] = complex(alpha*real(c128.DotcUnitary(ai, ai)), 0) |
| // Handle the remaining elements on the i-th row of C. |
| for jc := range ci[1:] { |
| j := i + 1 + jc |
| ci[jc+1] = calpha * c128.DotcUnitary(a[j*lda:j*lda+k], ai) |
| } |
| case beta != 1: |
| cii := calpha*c128.DotcUnitary(ai, ai) + cbeta*ci[0] |
| ci[0] = complex(real(cii), 0) |
| for jc, cij := range ci[1:] { |
| j := i + 1 + jc |
| ci[jc+1] = calpha*c128.DotcUnitary(a[j*lda:j*lda+k], ai) + cbeta*cij |
| } |
| default: |
| cii := calpha*c128.DotcUnitary(ai, ai) + ci[0] |
| ci[0] = complex(real(cii), 0) |
| for jc, cij := range ci[1:] { |
| j := i + 1 + jc |
| ci[jc+1] = calpha*c128.DotcUnitary(a[j*lda:j*lda+k], ai) + cij |
| } |
| } |
| } |
| } else { |
| for i := 0; i < n; i++ { |
| ci := c[i*ldc : i*ldc+i+1] |
| ai := a[i*lda : i*lda+k] |
| switch { |
| case beta == 0: |
| // Handle the first i-1 elements on the i-th row of C. |
| for j := range ci[:i] { |
| ci[j] = calpha * c128.DotcUnitary(a[j*lda:j*lda+k], ai) |
| } |
| // Handle the i-th diagonal element of C. |
| ci[i] = complex(alpha*real(c128.DotcUnitary(ai, ai)), 0) |
| case beta != 1: |
| for j, cij := range ci[:i] { |
| ci[j] = calpha*c128.DotcUnitary(a[j*lda:j*lda+k], ai) + cbeta*cij |
| } |
| cii := calpha*c128.DotcUnitary(ai, ai) + cbeta*ci[i] |
| ci[i] = complex(real(cii), 0) |
| default: |
| for j, cij := range ci[:i] { |
| ci[j] = calpha*c128.DotcUnitary(a[j*lda:j*lda+k], ai) + cij |
| } |
| cii := calpha*c128.DotcUnitary(ai, ai) + ci[i] |
| ci[i] = complex(real(cii), 0) |
| } |
| } |
| } |
| } else { |
| // Form C = alpha*Aᴴ*A + beta*C. |
| if uplo == blas.Upper { |
| for i := 0; i < n; i++ { |
| ci := c[i*ldc+i : i*ldc+n] |
| switch { |
| case beta == 0: |
| for jc := range ci { |
| ci[jc] = 0 |
| } |
| case beta != 1: |
| c128.DscalUnitary(beta, ci) |
| ci[0] = complex(real(ci[0]), 0) |
| default: |
| ci[0] = complex(real(ci[0]), 0) |
| } |
| for j := 0; j < k; j++ { |
| aji := cmplx.Conj(a[j*lda+i]) |
| if aji != 0 { |
| c128.AxpyUnitary(calpha*aji, a[j*lda+i:j*lda+n], ci) |
| } |
| } |
| c[i*ldc+i] = complex(real(c[i*ldc+i]), 0) |
| } |
| } else { |
| for i := 0; i < n; i++ { |
| ci := c[i*ldc : i*ldc+i+1] |
| switch { |
| case beta == 0: |
| for j := range ci { |
| ci[j] = 0 |
| } |
| case beta != 1: |
| c128.DscalUnitary(beta, ci) |
| ci[i] = complex(real(ci[i]), 0) |
| default: |
| ci[i] = complex(real(ci[i]), 0) |
| } |
| for j := 0; j < k; j++ { |
| aji := cmplx.Conj(a[j*lda+i]) |
| if aji != 0 { |
| c128.AxpyUnitary(calpha*aji, a[j*lda:j*lda+i+1], ci) |
| } |
| } |
| c[i*ldc+i] = complex(real(c[i*ldc+i]), 0) |
| } |
| } |
| } |
| } |
| |
| // Zher2k performs one of the hermitian rank-2k operations |
| // C = alpha*A*Bᴴ + conj(alpha)*B*Aᴴ + beta*C if trans == blas.NoTrans |
| // C = alpha*Aᴴ*B + conj(alpha)*Bᴴ*A + beta*C if trans == blas.ConjTrans |
| // where alpha and beta are scalars with beta real, C is an n×n hermitian matrix |
| // and A and B are n×k matrices in the first case and k×n matrices in the second case. |
| // |
| // The imaginary parts of the diagonal elements of C are assumed to be zero, and |
| // on return they will be set to zero. |
| func (Implementation) Zher2k(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta float64, c []complex128, ldc int) { |
| var row, col int |
| switch trans { |
| default: |
| panic(badTranspose) |
| case blas.NoTrans: |
| row, col = n, k |
| case blas.ConjTrans: |
| row, col = k, n |
| } |
| switch { |
| case uplo != blas.Lower && uplo != blas.Upper: |
| panic(badUplo) |
| case n < 0: |
| panic(nLT0) |
| case k < 0: |
| panic(kLT0) |
| case lda < max(1, col): |
| panic(badLdA) |
| case ldb < max(1, col): |
| panic(badLdB) |
| case ldc < max(1, n): |
| panic(badLdC) |
| } |
| |
| // Quick return if possible. |
| if n == 0 { |
| return |
| } |
| |
| // For zero matrix size the following slice length checks are trivially satisfied. |
| if len(a) < (row-1)*lda+col { |
| panic(shortA) |
| } |
| if len(b) < (row-1)*ldb+col { |
| panic(shortB) |
| } |
| if len(c) < (n-1)*ldc+n { |
| panic(shortC) |
| } |
| |
| // Quick return if possible. |
| if (alpha == 0 || k == 0) && beta == 1 { |
| return |
| } |
| |
| if alpha == 0 { |
| if uplo == blas.Upper { |
| if beta == 0 { |
| for i := 0; i < n; i++ { |
| ci := c[i*ldc+i : i*ldc+n] |
| for j := range ci { |
| ci[j] = 0 |
| } |
| } |
| } else { |
| for i := 0; i < n; i++ { |
| ci := c[i*ldc+i : i*ldc+n] |
| ci[0] = complex(beta*real(ci[0]), 0) |
| if i != n-1 { |
| c128.DscalUnitary(beta, ci[1:]) |
| } |
| } |
| } |
| } else { |
| if beta == 0 { |
| for i := 0; i < n; i++ { |
| ci := c[i*ldc : i*ldc+i+1] |
| for j := range ci { |
| ci[j] = 0 |
| } |
| } |
| } else { |
| for i := 0; i < n; i++ { |
| ci := c[i*ldc : i*ldc+i+1] |
| if i != 0 { |
| c128.DscalUnitary(beta, ci[:i]) |
| } |
| ci[i] = complex(beta*real(ci[i]), 0) |
| } |
| } |
| } |
| return |
| } |
| |
| conjalpha := cmplx.Conj(alpha) |
| cbeta := complex(beta, 0) |
| if trans == blas.NoTrans { |
| // Form C = alpha*A*Bᴴ + conj(alpha)*B*Aᴴ + beta*C. |
| if uplo == blas.Upper { |
| for i := 0; i < n; i++ { |
| ci := c[i*ldc+i+1 : i*ldc+n] |
| ai := a[i*lda : i*lda+k] |
| bi := b[i*ldb : i*ldb+k] |
| if beta == 0 { |
| cii := alpha*c128.DotcUnitary(bi, ai) + conjalpha*c128.DotcUnitary(ai, bi) |
| c[i*ldc+i] = complex(real(cii), 0) |
| for jc := range ci { |
| j := i + 1 + jc |
| ci[jc] = alpha*c128.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c128.DotcUnitary(a[j*lda:j*lda+k], bi) |
| } |
| } else { |
| cii := alpha*c128.DotcUnitary(bi, ai) + conjalpha*c128.DotcUnitary(ai, bi) + cbeta*c[i*ldc+i] |
| c[i*ldc+i] = complex(real(cii), 0) |
| for jc, cij := range ci { |
| j := i + 1 + jc |
| ci[jc] = alpha*c128.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c128.DotcUnitary(a[j*lda:j*lda+k], bi) + cbeta*cij |
| } |
| } |
| } |
| } else { |
| for i := 0; i < n; i++ { |
| ci := c[i*ldc : i*ldc+i] |
| ai := a[i*lda : i*lda+k] |
| bi := b[i*ldb : i*ldb+k] |
| if beta == 0 { |
| for j := range ci { |
| ci[j] = alpha*c128.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c128.DotcUnitary(a[j*lda:j*lda+k], bi) |
| } |
| cii := alpha*c128.DotcUnitary(bi, ai) + conjalpha*c128.DotcUnitary(ai, bi) |
| c[i*ldc+i] = complex(real(cii), 0) |
| } else { |
| for j, cij := range ci { |
| ci[j] = alpha*c128.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c128.DotcUnitary(a[j*lda:j*lda+k], bi) + cbeta*cij |
| } |
| cii := alpha*c128.DotcUnitary(bi, ai) + conjalpha*c128.DotcUnitary(ai, bi) + cbeta*c[i*ldc+i] |
| c[i*ldc+i] = complex(real(cii), 0) |
| } |
| } |
| } |
| } else { |
| // Form C = alpha*Aᴴ*B + conj(alpha)*Bᴴ*A + beta*C. |
| if uplo == blas.Upper { |
| for i := 0; i < n; i++ { |
| ci := c[i*ldc+i : i*ldc+n] |
| switch { |
| case beta == 0: |
| for jc := range ci { |
| ci[jc] = 0 |
| } |
| case beta != 1: |
| c128.DscalUnitary(beta, ci) |
| ci[0] = complex(real(ci[0]), 0) |
| default: |
| ci[0] = complex(real(ci[0]), 0) |
| } |
| for j := 0; j < k; j++ { |
| aji := a[j*lda+i] |
| bji := b[j*ldb+i] |
| if aji != 0 { |
| c128.AxpyUnitary(alpha*cmplx.Conj(aji), b[j*ldb+i:j*ldb+n], ci) |
| } |
| if bji != 0 { |
| c128.AxpyUnitary(conjalpha*cmplx.Conj(bji), a[j*lda+i:j*lda+n], ci) |
| } |
| } |
| ci[0] = complex(real(ci[0]), 0) |
| } |
| } else { |
| for i := 0; i < n; i++ { |
| ci := c[i*ldc : i*ldc+i+1] |
| switch { |
| case beta == 0: |
| for j := range ci { |
| ci[j] = 0 |
| } |
| case beta != 1: |
| c128.DscalUnitary(beta, ci) |
| ci[i] = complex(real(ci[i]), 0) |
| default: |
| ci[i] = complex(real(ci[i]), 0) |
| } |
| for j := 0; j < k; j++ { |
| aji := a[j*lda+i] |
| bji := b[j*ldb+i] |
| if aji != 0 { |
| c128.AxpyUnitary(alpha*cmplx.Conj(aji), b[j*ldb:j*ldb+i+1], ci) |
| } |
| if bji != 0 { |
| c128.AxpyUnitary(conjalpha*cmplx.Conj(bji), a[j*lda:j*lda+i+1], ci) |
| } |
| } |
| ci[i] = complex(real(ci[i]), 0) |
| } |
| } |
| } |
| } |
| |
| // Zsymm performs one of the matrix-matrix operations |
| // C = alpha*A*B + beta*C if side == blas.Left |
| // C = alpha*B*A + beta*C if side == blas.Right |
| // where alpha and beta are scalars, A is an m×m or n×n symmetric matrix and B |
| // and C are m×n matrices. |
| func (Implementation) Zsymm(side blas.Side, uplo blas.Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) { |
| na := m |
| if side == blas.Right { |
| na = n |
| } |
| switch { |
| case side != blas.Left && side != blas.Right: |
| panic(badSide) |
| case uplo != blas.Lower && uplo != blas.Upper: |
| panic(badUplo) |
| case m < 0: |
| panic(mLT0) |
| case n < 0: |
| panic(nLT0) |
| case lda < max(1, na): |
| panic(badLdA) |
| case ldb < max(1, n): |
| panic(badLdB) |
| case ldc < max(1, n): |
| panic(badLdC) |
| } |
| |
| // Quick return if possible. |
| if m == 0 || n == 0 { |
| return |
| } |
| |
| // For zero matrix size the following slice length checks are trivially satisfied. |
| if len(a) < lda*(na-1)+na { |
| panic(shortA) |
| } |
| if len(b) < ldb*(m-1)+n { |
| panic(shortB) |
| } |
| if len(c) < ldc*(m-1)+n { |
| panic(shortC) |
| } |
| |
| // Quick return if possible. |
| if alpha == 0 && beta == 1 { |
| return |
| } |
| |
| if alpha == 0 { |
| if beta == 0 { |
| for i := 0; i < m; i++ { |
| ci := c[i*ldc : i*ldc+n] |
| for j := range ci { |
| ci[j] = 0 |
| } |
| } |
| } else { |
| for i := 0; i < m; i++ { |
| ci := c[i*ldc : i*ldc+n] |
| c128.ScalUnitary(beta, ci) |
| } |
| } |
| return |
| } |
| |
| if side == blas.Left { |
| // Form C = alpha*A*B + beta*C. |
| for i := 0; i < m; i++ { |
| atmp := alpha * a[i*lda+i] |
| bi := b[i*ldb : i*ldb+n] |
| ci := c[i*ldc : i*ldc+n] |
| if beta == 0 { |
| for j, bij := range bi { |
| ci[j] = atmp * bij |
| } |
| } else { |
| for j, bij := range bi { |
| ci[j] = atmp*bij + beta*ci[j] |
| } |
| } |
| if uplo == blas.Upper { |
| for k := 0; k < i; k++ { |
| atmp = alpha * a[k*lda+i] |
| c128.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) |
| } |
| for k := i + 1; k < m; k++ { |
| atmp = alpha * a[i*lda+k] |
| c128.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) |
| } |
| } else { |
| for k := 0; k < i; k++ { |
| atmp = alpha * a[i*lda+k] |
| c128.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) |
| } |
| for k := i + 1; k < m; k++ { |
| atmp = alpha * a[k*lda+i] |
| c128.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) |
| } |
| } |
| } |
| } else { |
| // Form C = alpha*B*A + beta*C. |
| if uplo == blas.Upper { |
| for i := 0; i < m; i++ { |
| for j := n - 1; j >= 0; j-- { |
| abij := alpha * b[i*ldb+j] |
| aj := a[j*lda+j+1 : j*lda+n] |
| bi := b[i*ldb+j+1 : i*ldb+n] |
| ci := c[i*ldc+j+1 : i*ldc+n] |
| var tmp complex128 |
| for k, ajk := range aj { |
| ci[k] += abij * ajk |
| tmp += bi[k] * ajk |
| } |
| if beta == 0 { |
| c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp |
| } else { |
| c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp + beta*c[i*ldc+j] |
| } |
| } |
| } |
| } else { |
| for i := 0; i < m; i++ { |
| for j := 0; j < n; j++ { |
| abij := alpha * b[i*ldb+j] |
| aj := a[j*lda : j*lda+j] |
| bi := b[i*ldb : i*ldb+j] |
| ci := c[i*ldc : i*ldc+j] |
| var tmp complex128 |
| for k, ajk := range aj { |
| ci[k] += abij * ajk |
| tmp += bi[k] * ajk |
| } |
| if beta == 0 { |
| c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp |
| } else { |
| c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp + beta*c[i*ldc+j] |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| // Zsyrk performs one of the symmetric rank-k operations |
| // C = alpha*A*Aᵀ + beta*C if trans == blas.NoTrans |
| // C = alpha*Aᵀ*A + beta*C if trans == blas.Trans |
| // where alpha and beta are scalars, C is an n×n symmetric matrix and A is |
| // an n×k matrix in the first case and a k×n matrix in the second case. |
| func (Implementation) Zsyrk(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, beta complex128, c []complex128, ldc int) { |
| var rowA, colA int |
| switch trans { |
| default: |
| panic(badTranspose) |
| case blas.NoTrans: |
| rowA, colA = n, k |
| case blas.Trans: |
| rowA, colA = k, n |
| } |
| switch { |
| case uplo != blas.Lower && uplo != blas.Upper: |
| panic(badUplo) |
| case n < 0: |
| panic(nLT0) |
| case k < 0: |
| panic(kLT0) |
| case lda < max(1, colA): |
| panic(badLdA) |
| case ldc < max(1, n): |
| panic(badLdC) |
| } |
| |
| // Quick return if possible. |
| if n == 0 { |
| return |
| } |
| |
| // For zero matrix size the following slice length checks are trivially satisfied. |
| if len(a) < (rowA-1)*lda+colA { |
| panic(shortA) |
| } |
| if len(c) < (n-1)*ldc+n { |
| panic(shortC) |
| } |
| |
| // Quick return if possible. |
| if (alpha == 0 || k == 0) && beta == 1 { |
| return |
| } |
| |
| if alpha == 0 { |
| if uplo == blas.Upper { |
| if beta == 0 { |
| for i := 0; i < n; i++ { |
| ci := c[i*ldc+i : i*ldc+n] |
| for j := range ci { |
| ci[j] = 0 |
| } |
| } |
| } else { |
| for i := 0; i < n; i++ { |
| ci := c[i*ldc+i : i*ldc+n] |
| c128.ScalUnitary(beta, ci) |
| } |
| } |
| } else { |
| if beta == 0 { |
| for i := 0; i < n; i++ { |
| ci := c[i*ldc : i*ldc+i+1] |
| for j := range ci { |
| ci[j] = 0 |
| } |
| } |
| } else { |
| for i := 0; i < n; i++ { |
| ci := c[i*ldc : i*ldc+i+1] |
| c128.ScalUnitary(beta, ci) |
| } |
| } |
| } |
| return |
| } |
| |
| if trans == blas.NoTrans { |
| // Form C = alpha*A*Aᵀ + beta*C. |
| if uplo == blas.Upper { |
| for i := 0; i < n; i++ { |
| ci := c[i*ldc+i : i*ldc+n] |
| ai := a[i*lda : i*lda+k] |
| if beta == 0 { |
| for jc := range ci { |
| j := i + jc |
| ci[jc] = alpha * c128.DotuUnitary(ai, a[j*lda:j*lda+k]) |
| } |
| } else { |
| for jc, cij := range ci { |
| j := i + jc |
| ci[jc] = beta*cij + alpha*c128.DotuUnitary(ai, a[j*lda:j*lda+k]) |
| } |
| } |
| } |
| } else { |
| for i := 0; i < n; i++ { |
| ci := c[i*ldc : i*ldc+i+1] |
| ai := a[i*lda : i*lda+k] |
| if beta == 0 { |
| for j := range ci { |
| ci[j] = alpha * c128.DotuUnitary(ai, a[j*lda:j*lda+k]) |
| } |
| } else { |
| for j, cij := range ci { |
| ci[j] = beta*cij + alpha*c128.DotuUnitary(ai, a[j*lda:j*lda+k]) |
| } |
| } |
| } |
| } |
| } else { |
| // Form C = alpha*Aᵀ*A + beta*C. |
| if uplo == blas.Upper { |
| for i := 0; i < n; i++ { |
| ci := c[i*ldc+i : i*ldc+n] |
| switch { |
| case beta == 0: |
| for jc := range ci { |
| ci[jc] = 0 |
| } |
| case beta != 1: |
| for jc := range ci { |
| ci[jc] *= beta |
| } |
| } |
| for j := 0; j < k; j++ { |
| aji := a[j*lda+i] |
| if aji != 0 { |
| c128.AxpyUnitary(alpha*aji, a[j*lda+i:j*lda+n], ci) |
| } |
| } |
| } |
| } else { |
| for i := 0; i < n; i++ { |
| ci := c[i*ldc : i*ldc+i+1] |
| switch { |
| case beta == 0: |
| for j := range ci { |
| ci[j] = 0 |
| } |
| case beta != 1: |
| for j := range ci { |
| ci[j] *= beta |
| } |
| } |
| for j := 0; j < k; j++ { |
| aji := a[j*lda+i] |
| if aji != 0 { |
| c128.AxpyUnitary(alpha*aji, a[j*lda:j*lda+i+1], ci) |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| // Zsyr2k performs one of the symmetric rank-2k operations |
| // C = alpha*A*Bᵀ + alpha*B*Aᵀ + beta*C if trans == blas.NoTrans |
| // C = alpha*Aᵀ*B + alpha*Bᵀ*A + beta*C if trans == blas.Trans |
| // where alpha and beta are scalars, C is an n×n symmetric matrix and A and B |
| // are n×k matrices in the first case and k×n matrices in the second case. |
| func (Implementation) Zsyr2k(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) { |
| var row, col int |
| switch trans { |
| default: |
| panic(badTranspose) |
| case blas.NoTrans: |
| row, col = n, k |
| case blas.Trans: |
| row, col = k, n |
| } |
| switch { |
| case uplo != blas.Lower && uplo != blas.Upper: |
| panic(badUplo) |
| case n < 0: |
| panic(nLT0) |
| case k < 0: |
| panic(kLT0) |
| case lda < max(1, col): |
| panic(badLdA) |
| case ldb < max(1, col): |
| panic(badLdB) |
| case ldc < max(1, n): |
| panic(badLdC) |
| } |
| |
| // Quick return if possible. |
| if n == 0 { |
| return |
| } |
| |
| // For zero matrix size the following slice length checks are trivially satisfied. |
| if len(a) < (row-1)*lda+col { |
| panic(shortA) |
| } |
| if len(b) < (row-1)*ldb+col { |
| panic(shortB) |
| } |
| if len(c) < (n-1)*ldc+n { |
| panic(shortC) |
| } |
| |
| // Quick return if possible. |
| if (alpha == 0 || k == 0) && beta == 1 { |
| return |
| } |
| |
| if alpha == 0 { |
| if uplo == blas.Upper { |
| if beta == 0 { |
| for i := 0; i < n; i++ { |
| ci := c[i*ldc+i : i*ldc+n] |
| for j := range ci { |
| ci[j] = 0 |
| } |
| } |
| } else { |
| for i := 0; i < n; i++ { |
| ci := c[i*ldc+i : i*ldc+n] |
| c128.ScalUnitary(beta, ci) |
| } |
| } |
| } else { |
| if beta == 0 { |
| for i := 0; i < n; i++ { |
| ci := c[i*ldc : i*ldc+i+1] |
| for j := range ci { |
| ci[j] = 0 |
| } |
| } |
| } else { |
| for i := 0; i < n; i++ { |
| ci := c[i*ldc : i*ldc+i+1] |
| c128.ScalUnitary(beta, ci) |
| } |
| } |
| } |
| return |
| } |
| |
| if trans == blas.NoTrans { |
| // Form C = alpha*A*Bᵀ + alpha*B*Aᵀ + beta*C. |
| if uplo == blas.Upper { |
| for i := 0; i < n; i++ { |
| ci := c[i*ldc+i : i*ldc+n] |
| ai := a[i*lda : i*lda+k] |
| bi := b[i*ldb : i*ldb+k] |
| if beta == 0 { |
| for jc := range ci { |
| j := i + jc |
| ci[jc] = alpha*c128.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c128.DotuUnitary(bi, a[j*lda:j*lda+k]) |
| } |
| } else { |
| for jc, cij := range ci { |
| j := i + jc |
| ci[jc] = alpha*c128.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c128.DotuUnitary(bi, a[j*lda:j*lda+k]) + beta*cij |
| } |
| } |
| } |
| } else { |
| for i := 0; i < n; i++ { |
| ci := c[i*ldc : i*ldc+i+1] |
| ai := a[i*lda : i*lda+k] |
| bi := b[i*ldb : i*ldb+k] |
| if beta == 0 { |
| for j := range ci { |
| ci[j] = alpha*c128.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c128.DotuUnitary(bi, a[j*lda:j*lda+k]) |
| } |
| } else { |
| for j, cij := range ci { |
| ci[j] = alpha*c128.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c128.DotuUnitary(bi, a[j*lda:j*lda+k]) + beta*cij |
| } |
| } |
| } |
| } |
| } else { |
| // Form C = alpha*Aᵀ*B + alpha*Bᵀ*A + beta*C. |
| if uplo == blas.Upper { |
| for i := 0; i < n; i++ { |
| ci := c[i*ldc+i : i*ldc+n] |
| switch { |
| case beta == 0: |
| for jc := range ci { |
| ci[jc] = 0 |
| } |
| case beta != 1: |
| for jc := range ci { |
| ci[jc] *= beta |
| } |
| } |
| for j := 0; j < k; j++ { |
| aji := a[j*lda+i] |
| bji := b[j*ldb+i] |
| if aji != 0 { |
| c128.AxpyUnitary(alpha*aji, b[j*ldb+i:j*ldb+n], ci) |
| } |
| if bji != 0 { |
| c128.AxpyUnitary(alpha*bji, a[j*lda+i:j*lda+n], ci) |
| } |
| } |
| } |
| } else { |
| for i := 0; i < n; i++ { |
| ci := c[i*ldc : i*ldc+i+1] |
| switch { |
| case beta == 0: |
| for j := range ci { |
| ci[j] = 0 |
| } |
| case beta != 1: |
| for j := range ci { |
| ci[j] *= beta |
| } |
| } |
| for j := 0; j < k; j++ { |
| aji := a[j*lda+i] |
| bji := b[j*ldb+i] |
| if aji != 0 { |
| c128.AxpyUnitary(alpha*aji, b[j*ldb:j*ldb+i+1], ci) |
| } |
| if bji != 0 { |
| c128.AxpyUnitary(alpha*bji, a[j*lda:j*lda+i+1], ci) |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| // Ztrmm performs one of the matrix-matrix operations |
| // B = alpha * op(A) * B if side == blas.Left, |
| // B = alpha * B * op(A) if side == blas.Right, |
| // where alpha is a scalar, B is an m×n matrix, A is a unit, or non-unit, |
| // upper or lower triangular matrix and op(A) is one of |
| // op(A) = A if trans == blas.NoTrans, |
| // op(A) = Aᵀ if trans == blas.Trans, |
| // op(A) = Aᴴ if trans == blas.ConjTrans. |
| func (Implementation) Ztrmm(side blas.Side, uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int) { |
| na := m |
| if side == blas.Right { |
| na = n |
| } |
| switch { |
| case side != blas.Left && side != blas.Right: |
| panic(badSide) |
| case uplo != blas.Lower && uplo != blas.Upper: |
| panic(badUplo) |
| case trans != blas.NoTrans && trans != blas.Trans && trans != blas.ConjTrans: |
| panic(badTranspose) |
| case diag != blas.Unit && diag != blas.NonUnit: |
| panic(badDiag) |
| case m < 0: |
| panic(mLT0) |
| case n < 0: |
| panic(nLT0) |
| case lda < max(1, na): |
| panic(badLdA) |
| case ldb < max(1, n): |
| panic(badLdB) |
| } |
| |
| // Quick return if possible. |
| if m == 0 || n == 0 { |
| return |
| } |
| |
| // For zero matrix size the following slice length checks are trivially satisfied. |
| if len(a) < (na-1)*lda+na { |
| panic(shortA) |
| } |
| if len(b) < (m-1)*ldb+n { |
| panic(shortB) |
| } |
| |
| // Quick return if possible. |
| if alpha == 0 { |
| for i := 0; i < m; i++ { |
| bi := b[i*ldb : i*ldb+n] |
| for j := range bi { |
| bi[j] = 0 |
| } |
| } |
| return |
| } |
| |
| noConj := trans != blas.ConjTrans |
| noUnit := diag == blas.NonUnit |
| if side == blas.Left { |
| if trans == blas.NoTrans { |
| // Form B = alpha*A*B. |
| if uplo == blas.Upper { |
| for i := 0; i < m; i++ { |
| aii := alpha |
| if noUnit { |
| aii *= a[i*lda+i] |
| } |
| bi := b[i*ldb : i*ldb+n] |
| for j := range bi { |
| bi[j] *= aii |
| } |
| for ja, aij := range a[i*lda+i+1 : i*lda+m] { |
| j := ja + i + 1 |
| if aij != 0 { |
| c128.AxpyUnitary(alpha*aij, b[j*ldb:j*ldb+n], bi) |
| } |
| } |
| } |
| } else { |
| for i := m - 1; i >= 0; i-- { |
| aii := alpha |
| if noUnit { |
| aii *= a[i*lda+i] |
| } |
| bi := b[i*ldb : i*ldb+n] |
| for j := range bi { |
| bi[j] *= aii |
| } |
| for j, aij := range a[i*lda : i*lda+i] { |
| if aij != 0 { |
| c128.AxpyUnitary(alpha*aij, b[j*ldb:j*ldb+n], bi) |
| } |
| } |
| } |
| } |
| } else { |
| // Form B = alpha*Aᵀ*B or B = alpha*Aᴴ*B. |
| if uplo == blas.Upper { |
| for k := m - 1; k >= 0; k-- { |
| bk := b[k*ldb : k*ldb+n] |
| for ja, ajk := range a[k*lda+k+1 : k*lda+m] { |
| if ajk == 0 { |
| continue |
| } |
| j := k + 1 + ja |
| if noConj { |
| c128.AxpyUnitary(alpha*ajk, bk, b[j*ldb:j*ldb+n]) |
| } else { |
| c128.AxpyUnitary(alpha*cmplx.Conj(ajk), bk, b[j*ldb:j*ldb+n]) |
| } |
| } |
| akk := alpha |
| if noUnit { |
| if noConj { |
| akk *= a[k*lda+k] |
| } else { |
| akk *= cmplx.Conj(a[k*lda+k]) |
| } |
| } |
| if akk != 1 { |
| c128.ScalUnitary(akk, bk) |
| } |
| } |
| } else { |
| for k := 0; k < m; k++ { |
| bk := b[k*ldb : k*ldb+n] |
| for j, ajk := range a[k*lda : k*lda+k] { |
| if ajk == 0 { |
| continue |
| } |
| if noConj { |
| c128.AxpyUnitary(alpha*ajk, bk, b[j*ldb:j*ldb+n]) |
| } else { |
| c128.AxpyUnitary(alpha*cmplx.Conj(ajk), bk, b[j*ldb:j*ldb+n]) |
| } |
| } |
| akk := alpha |
| if noUnit { |
| if noConj { |
| akk *= a[k*lda+k] |
| } else { |
| akk *= cmplx.Conj(a[k*lda+k]) |
| } |
| } |
| if akk != 1 { |
| c128.ScalUnitary(akk, bk) |
| } |
| } |
| } |
| } |
| } else { |
| if trans == blas.NoTrans { |
| // Form B = alpha*B*A. |
| if uplo == blas.Upper { |
| for i := 0; i < m; i++ { |
| bi := b[i*ldb : i*ldb+n] |
| for k := n - 1; k >= 0; k-- { |
| abik := alpha * bi[k] |
| if abik == 0 { |
| continue |
| } |
| bi[k] = abik |
| if noUnit { |
| bi[k] *= a[k*lda+k] |
| } |
| c128.AxpyUnitary(abik, a[k*lda+k+1:k*lda+n], bi[k+1:]) |
| } |
| } |
| } else { |
| for i := 0; i < m; i++ { |
| bi := b[i*ldb : i*ldb+n] |
| for k := 0; k < n; k++ { |
| abik := alpha * bi[k] |
| if abik == 0 { |
| continue |
| } |
| bi[k] = abik |
| if noUnit { |
| bi[k] *= a[k*lda+k] |
| } |
| c128.AxpyUnitary(abik, a[k*lda:k*lda+k], bi[:k]) |
| } |
| } |
| } |
| } else { |
| // Form B = alpha*B*Aᵀ or B = alpha*B*Aᴴ. |
| if uplo == blas.Upper { |
| for i := 0; i < m; i++ { |
| bi := b[i*ldb : i*ldb+n] |
| for j, bij := range bi { |
| if noConj { |
| if noUnit { |
| bij *= a[j*lda+j] |
| } |
| bij += c128.DotuUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n]) |
| } else { |
| if noUnit { |
| bij *= cmplx.Conj(a[j*lda+j]) |
| } |
| bij += c128.DotcUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n]) |
| } |
| bi[j] = alpha * bij |
| } |
| } |
| } else { |
| for i := 0; i < m; i++ { |
| bi := b[i*ldb : i*ldb+n] |
| for j := n - 1; j >= 0; j-- { |
| bij := bi[j] |
| if noConj { |
| if noUnit { |
| bij *= a[j*lda+j] |
| } |
| bij += c128.DotuUnitary(a[j*lda:j*lda+j], bi[:j]) |
| } else { |
| if noUnit { |
| bij *= cmplx.Conj(a[j*lda+j]) |
| } |
| bij += c128.DotcUnitary(a[j*lda:j*lda+j], bi[:j]) |
| } |
| bi[j] = alpha * bij |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| // Ztrsm solves one of the matrix equations |
| // op(A) * X = alpha * B if side == blas.Left, |
| // X * op(A) = alpha * B if side == blas.Right, |
| // where alpha is a scalar, X and B are m×n matrices, A is a unit or |
| // non-unit, upper or lower triangular matrix and op(A) is one of |
| // op(A) = A if transA == blas.NoTrans, |
| // op(A) = Aᵀ if transA == blas.Trans, |
| // op(A) = Aᴴ if transA == blas.ConjTrans. |
| // On return the matrix X is overwritten on B. |
| func (Implementation) Ztrsm(side blas.Side, uplo blas.Uplo, transA blas.Transpose, diag blas.Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int) { |
| na := m |
| if side == blas.Right { |
| na = n |
| } |
| switch { |
| case side != blas.Left && side != blas.Right: |
| panic(badSide) |
| case uplo != blas.Lower && uplo != blas.Upper: |
| panic(badUplo) |
| case transA != blas.NoTrans && transA != blas.Trans && transA != blas.ConjTrans: |
| panic(badTranspose) |
| case diag != blas.Unit && diag != blas.NonUnit: |
| panic(badDiag) |
| case m < 0: |
| panic(mLT0) |
| case n < 0: |
| panic(nLT0) |
| case lda < max(1, na): |
| panic(badLdA) |
| case ldb < max(1, n): |
| panic(badLdB) |
| } |
| |
| // Quick return if possible. |
| if m == 0 || n == 0 { |
| return |
| } |
| |
| // For zero matrix size the following slice length checks are trivially satisfied. |
| if len(a) < (na-1)*lda+na { |
| panic(shortA) |
| } |
| if len(b) < (m-1)*ldb+n { |
| panic(shortB) |
| } |
| |
| if alpha == 0 { |
| for i := 0; i < m; i++ { |
| for j := 0; j < n; j++ { |
| b[i*ldb+j] = 0 |
| } |
| } |
| return |
| } |
| |
| noConj := transA != blas.ConjTrans |
| noUnit := diag == blas.NonUnit |
| if side == blas.Left { |
| if transA == blas.NoTrans { |
| // Form B = alpha*inv(A)*B. |
| if uplo == blas.Upper { |
| for i := m - 1; i >= 0; i-- { |
| bi := b[i*ldb : i*ldb+n] |
| if alpha != 1 { |
| c128.ScalUnitary(alpha, bi) |
| } |
| for ka, aik := range a[i*lda+i+1 : i*lda+m] { |
| k := i + 1 + ka |
| if aik != 0 { |
| c128.AxpyUnitary(-aik, b[k*ldb:k*ldb+n], bi) |
| } |
| } |
| if noUnit { |
| c128.ScalUnitary(1/a[i*lda+i], bi) |
| } |
| } |
| } else { |
| for i := 0; i < m; i++ { |
| bi := b[i*ldb : i*ldb+n] |
| if alpha != 1 { |
| c128.ScalUnitary(alpha, bi) |
| } |
| for j, aij := range a[i*lda : i*lda+i] { |
| if aij != 0 { |
| c128.AxpyUnitary(-aij, b[j*ldb:j*ldb+n], bi) |
| } |
| } |
| if noUnit { |
| c128.ScalUnitary(1/a[i*lda+i], bi) |
| } |
| } |
| } |
| } else { |
| // Form B = alpha*inv(Aᵀ)*B or B = alpha*inv(Aᴴ)*B. |
| if uplo == blas.Upper { |
| for i := 0; i < m; i++ { |
| bi := b[i*ldb : i*ldb+n] |
| if noUnit { |
| if noConj { |
| c128.ScalUnitary(1/a[i*lda+i], bi) |
| } else { |
| c128.ScalUnitary(1/cmplx.Conj(a[i*lda+i]), bi) |
| } |
| } |
| for ja, aij := range a[i*lda+i+1 : i*lda+m] { |
| if aij == 0 { |
| continue |
| } |
| j := i + 1 + ja |
| if noConj { |
| c128.AxpyUnitary(-aij, bi, b[j*ldb:j*ldb+n]) |
| } else { |
| c128.AxpyUnitary(-cmplx.Conj(aij), bi, b[j*ldb:j*ldb+n]) |
| } |
| } |
| if alpha != 1 { |
| c128.ScalUnitary(alpha, bi) |
| } |
| } |
| } else { |
| for i := m - 1; i >= 0; i-- { |
| bi := b[i*ldb : i*ldb+n] |
| if noUnit { |
| if noConj { |
| c128.ScalUnitary(1/a[i*lda+i], bi) |
| } else { |
| c128.ScalUnitary(1/cmplx.Conj(a[i*lda+i]), bi) |
| } |
| } |
| for j, aij := range a[i*lda : i*lda+i] { |
| if aij == 0 { |
| continue |
| } |
| if noConj { |
| c128.AxpyUnitary(-aij, bi, b[j*ldb:j*ldb+n]) |
| } else { |
| c128.AxpyUnitary(-cmplx.Conj(aij), bi, b[j*ldb:j*ldb+n]) |
| } |
| } |
| if alpha != 1 { |
| c128.ScalUnitary(alpha, bi) |
| } |
| } |
| } |
| } |
| } else { |
| if transA == blas.NoTrans { |
| // Form B = alpha*B*inv(A). |
| if uplo == blas.Upper { |
| for i := 0; i < m; i++ { |
| bi := b[i*ldb : i*ldb+n] |
| if alpha != 1 { |
| c128.ScalUnitary(alpha, bi) |
| } |
| for j, bij := range bi { |
| if bij == 0 { |
| continue |
| } |
| if noUnit { |
| bi[j] /= a[j*lda+j] |
| } |
| c128.AxpyUnitary(-bi[j], a[j*lda+j+1:j*lda+n], bi[j+1:n]) |
| } |
| } |
| } else { |
| for i := 0; i < m; i++ { |
| bi := b[i*ldb : i*ldb+n] |
| if alpha != 1 { |
| c128.ScalUnitary(alpha, bi) |
| } |
| for j := n - 1; j >= 0; j-- { |
| if bi[j] == 0 { |
| continue |
| } |
| if noUnit { |
| bi[j] /= a[j*lda+j] |
| } |
| c128.AxpyUnitary(-bi[j], a[j*lda:j*lda+j], bi[:j]) |
| } |
| } |
| } |
| } else { |
| // Form B = alpha*B*inv(Aᵀ) or B = alpha*B*inv(Aᴴ). |
| if uplo == blas.Upper { |
| for i := 0; i < m; i++ { |
| bi := b[i*ldb : i*ldb+n] |
| for j := n - 1; j >= 0; j-- { |
| bij := alpha * bi[j] |
| if noConj { |
| bij -= c128.DotuUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n]) |
| if noUnit { |
| bij /= a[j*lda+j] |
| } |
| } else { |
| bij -= c128.DotcUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n]) |
| if noUnit { |
| bij /= cmplx.Conj(a[j*lda+j]) |
| } |
| } |
| bi[j] = bij |
| } |
| } |
| } else { |
| for i := 0; i < m; i++ { |
| bi := b[i*ldb : i*ldb+n] |
| for j, bij := range bi { |
| bij *= alpha |
| if noConj { |
| bij -= c128.DotuUnitary(a[j*lda:j*lda+j], bi[:j]) |
| if noUnit { |
| bij /= a[j*lda+j] |
| } |
| } else { |
| bij -= c128.DotcUnitary(a[j*lda:j*lda+j], bi[:j]) |
| if noUnit { |
| bij /= cmplx.Conj(a[j*lda+j]) |
| } |
| } |
| bi[j] = bij |
| } |
| } |
| } |
| } |
| } |
| } |