blob: 8bc3bf66da52cd3f6d9b9ba701d9bf2e78a8a55f [file] [log] [blame]
// Copyright ©2018 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/internal/asm/f32"
"gonum.org/v1/gonum/internal/asm/f64"
)
// TODO(Kunde21): Merge these methods back into level2double/level2single when Sgemv assembly kernels are merged into f32.
// Dgemv computes
// y = alpha * A * x + beta * y if tA = blas.NoTrans
// y = alpha * Aᵀ * x + beta * y if tA = blas.Trans or blas.ConjTrans
// where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars.
func (Implementation) Dgemv(tA blas.Transpose, m, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) {
if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
panic(badTranspose)
}
if m < 0 {
panic(mLT0)
}
if n < 0 {
panic(nLT0)
}
if lda < max(1, n) {
panic(badLdA)
}
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
// Set up indexes
lenX := m
lenY := n
if tA == blas.NoTrans {
lenX = n
lenY = m
}
// Quick return if possible
if m == 0 || n == 0 {
return
}
if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) {
panic(shortX)
}
if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) {
panic(shortY)
}
if len(a) < lda*(m-1)+n {
panic(shortA)
}
// Quick return if possible
if alpha == 0 && beta == 1 {
return
}
if alpha == 0 {
// First form y = beta * y
if incY > 0 {
Implementation{}.Dscal(lenY, beta, y, incY)
} else {
Implementation{}.Dscal(lenY, beta, y, -incY)
}
return
}
// Form y = alpha * A * x + y
if tA == blas.NoTrans {
f64.GemvN(uintptr(m), uintptr(n), alpha, a, uintptr(lda), x, uintptr(incX), beta, y, uintptr(incY))
return
}
// Cases where a is transposed.
f64.GemvT(uintptr(m), uintptr(n), alpha, a, uintptr(lda), x, uintptr(incX), beta, y, uintptr(incY))
}
// Sgemv computes
// y = alpha * A * x + beta * y if tA = blas.NoTrans
// y = alpha * Aᵀ * x + beta * y if tA = blas.Trans or blas.ConjTrans
// where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Sgemv(tA blas.Transpose, m, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) {
if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
panic(badTranspose)
}
if m < 0 {
panic(mLT0)
}
if n < 0 {
panic(nLT0)
}
if lda < max(1, n) {
panic(badLdA)
}
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
// Quick return if possible.
if m == 0 || n == 0 {
return
}
// Set up indexes
lenX := m
lenY := n
if tA == blas.NoTrans {
lenX = n
lenY = m
}
if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) {
panic(shortX)
}
if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) {
panic(shortY)
}
if len(a) < lda*(m-1)+n {
panic(shortA)
}
// Quick return if possible.
if alpha == 0 && beta == 1 {
return
}
// First form y = beta * y
if incY > 0 {
Implementation{}.Sscal(lenY, beta, y, incY)
} else {
Implementation{}.Sscal(lenY, beta, y, -incY)
}
if alpha == 0 {
return
}
var kx, ky int
if incX < 0 {
kx = -(lenX - 1) * incX
}
if incY < 0 {
ky = -(lenY - 1) * incY
}
// Form y = alpha * A * x + y
if tA == blas.NoTrans {
if incX == 1 && incY == 1 {
for i := 0; i < m; i++ {
y[i] += alpha * f32.DotUnitary(a[lda*i:lda*i+n], x[:n])
}
return
}
iy := ky
for i := 0; i < m; i++ {
y[iy] += alpha * f32.DotInc(x, a[lda*i:lda*i+n], uintptr(n), uintptr(incX), 1, uintptr(kx), 0)
iy += incY
}
return
}
// Cases where a is transposed.
if incX == 1 && incY == 1 {
for i := 0; i < m; i++ {
tmp := alpha * x[i]
if tmp != 0 {
f32.AxpyUnitaryTo(y, tmp, a[lda*i:lda*i+n], y[:n])
}
}
return
}
ix := kx
for i := 0; i < m; i++ {
tmp := alpha * x[ix]
if tmp != 0 {
f32.AxpyInc(tmp, a[lda*i:lda*i+n], y, uintptr(n), 1, uintptr(incY), 0, uintptr(ky))
}
ix += incX
}
}