| // Copyright ©2017 The Gonum Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package f32_test |
| |
| import ( |
| "fmt" |
| "testing" |
| |
| . "gonum.org/v1/gonum/internal/asm/f32" |
| "gonum.org/v1/gonum/internal/math32" |
| ) |
| |
| type SgemvCase struct { |
| m int |
| n int |
| A []float32 |
| x []float32 |
| y []float32 |
| |
| NoTrans []SgemvSubcase |
| Trans []SgemvSubcase |
| } |
| |
| type SgemvSubcase struct { |
| alpha float32 |
| beta float32 |
| want []float32 |
| wantRevX []float32 |
| wantRevY []float32 |
| wantRevXY []float32 |
| } |
| |
| var SgemvCases = []SgemvCase{ |
| { // 1x1 |
| m: 1, |
| n: 1, |
| A: []float32{4.1}, |
| x: []float32{2.2}, |
| y: []float32{6.8}, |
| |
| NoTrans: []SgemvSubcase{ // (1x1) |
| {alpha: 0, beta: 0, |
| want: []float32{0}, |
| wantRevX: []float32{0}, |
| wantRevY: []float32{0}, |
| wantRevXY: []float32{0}, |
| }, |
| {alpha: 0, beta: 1, |
| want: []float32{6.8}, |
| wantRevX: []float32{6.8}, |
| wantRevY: []float32{6.8}, |
| wantRevXY: []float32{6.8}, |
| }, |
| {alpha: 1, beta: 0, |
| want: []float32{9.02}, |
| wantRevX: []float32{9.02}, |
| wantRevY: []float32{9.02}, |
| wantRevXY: []float32{9.02}, |
| }, |
| {alpha: 8, beta: -6, |
| want: []float32{31.36}, |
| wantRevX: []float32{31.36}, |
| wantRevY: []float32{31.36}, |
| wantRevXY: []float32{31.36}, |
| }, |
| }, |
| |
| Trans: []SgemvSubcase{ // (1x1) |
| {alpha: 0, beta: 0, |
| want: []float32{0}, |
| wantRevX: []float32{0}, |
| wantRevY: []float32{0}, |
| wantRevXY: []float32{0}, |
| }, |
| {alpha: 0, beta: 1, |
| want: []float32{2.2}, |
| wantRevX: []float32{2.2}, |
| wantRevY: []float32{2.2}, |
| wantRevXY: []float32{2.2}, |
| }, |
| {alpha: 1, beta: 0, |
| want: []float32{27.88}, |
| wantRevX: []float32{27.88}, |
| wantRevY: []float32{27.88}, |
| wantRevXY: []float32{27.88}, |
| }, |
| {alpha: 8, beta: -6, |
| want: []float32{209.84}, |
| wantRevX: []float32{209.84}, |
| wantRevY: []float32{209.84}, |
| wantRevXY: []float32{209.84}, |
| }, |
| }, |
| }, |
| |
| { // 3x2 |
| m: 3, |
| n: 2, |
| A: []float32{ |
| 4.67, 2.75, |
| 0.48, 1.21, |
| 2.28, 2.82, |
| }, |
| x: []float32{3.38, 3}, |
| y: []float32{2.8, 1.71, 2.64}, |
| |
| NoTrans: []SgemvSubcase{ // (2x2, 1x2) |
| {alpha: 0, beta: 0, |
| want: []float32{0, 0, 0}, |
| wantRevX: []float32{0, 0, 0}, |
| wantRevY: []float32{0, 0, 0}, |
| wantRevXY: []float32{0, 0, 0}, |
| }, |
| {alpha: 0, beta: 1, |
| want: []float32{2.8, 1.71, 2.64}, |
| wantRevX: []float32{2.8, 1.71, 2.64}, |
| wantRevY: []float32{2.8, 1.71, 2.64}, |
| wantRevXY: []float32{2.8, 1.71, 2.64}, |
| }, |
| {alpha: 1, beta: 0, |
| want: []float32{24.0346, 5.2524, 16.1664}, |
| wantRevX: []float32{23.305, 5.5298, 16.3716}, |
| wantRevY: []float32{16.1664, 5.2524, 24.0346}, |
| wantRevXY: []float32{16.3716, 5.5298, 23.305}, |
| }, |
| {alpha: 8, beta: -6, |
| want: []float32{175.4768, 31.7592, 113.4912}, |
| wantRevX: []float32{169.64, 33.9784, 115.1328}, |
| wantRevY: []float32{112.5312, 31.7592, 176.4368}, |
| wantRevXY: []float32{114.1728, 33.9784, 170.6}, |
| }, |
| }, |
| |
| Trans: []SgemvSubcase{ // (2x2) |
| {alpha: 0, beta: 0, |
| want: []float32{0, 0}, |
| wantRevX: []float32{0, 0}, |
| wantRevY: []float32{0, 0}, |
| wantRevXY: []float32{0, 0}, |
| }, |
| {alpha: 0, beta: 1, |
| want: []float32{3.38, 3}, |
| wantRevX: []float32{3.38, 3}, |
| wantRevY: []float32{3.38, 3}, |
| wantRevXY: []float32{3.38, 3}, |
| }, |
| {alpha: 1, beta: 0, |
| want: []float32{19.916, 17.2139}, |
| wantRevX: []float32{19.5336, 17.2251}, |
| wantRevY: []float32{17.2139, 19.916}, |
| wantRevXY: []float32{17.2251, 19.5336}, |
| }, |
| {alpha: 8, beta: -6, |
| want: []float32{139.048, 119.7112}, |
| wantRevX: []float32{135.9888, 119.8008}, |
| wantRevY: []float32{117.4312, 141.328}, |
| wantRevXY: []float32{117.5208, 138.2688}, |
| }, |
| }, |
| }, |
| |
| { // 3x3 |
| m: 3, |
| n: 3, |
| A: []float32{ |
| 4.38, 4.4, 4.26, |
| 4.18, 0.56, 2.57, |
| 2.59, 2.07, 0.46, |
| }, |
| x: []float32{4.82, 1.82, 1.12}, |
| y: []float32{0.24, 1.41, 3.45}, |
| |
| NoTrans: []SgemvSubcase{ // (2x2, 2x1, 1x2, 1x1) |
| {alpha: 0, beta: 0, |
| want: []float32{0, 0, 0}, |
| wantRevX: []float32{0, 0, 0}, |
| wantRevY: []float32{0, 0, 0}, |
| wantRevXY: []float32{0, 0, 0}, |
| }, |
| {alpha: 0, beta: 1, |
| want: []float32{0.24, 1.41, 3.45}, |
| wantRevX: []float32{0.24, 1.41, 3.45}, |
| wantRevY: []float32{0.24, 1.41, 3.45}, |
| wantRevXY: []float32{0.24, 1.41, 3.45}, |
| }, |
| {alpha: 1, beta: 0, |
| want: []float32{33.8908, 24.0452, 16.7664}, |
| wantRevX: []float32{33.4468, 18.0882, 8.8854}, |
| wantRevY: []float32{16.7664, 24.0452, 33.8908}, |
| wantRevXY: []float32{8.8854, 18.0882, 33.4468}, |
| }, |
| {alpha: 8, beta: -6, |
| want: []float32{269.6864, 183.9016, 113.4312}, |
| wantRevX: []float32{266.1344, 136.2456, 50.3832}, |
| wantRevY: []float32{132.6912, 183.9016, 250.4264}, |
| wantRevXY: []float32{69.6432, 136.2456, 246.8744}, |
| }, |
| }, |
| |
| Trans: []SgemvSubcase{ // (2x2, 1x2, 2x1, 1x1) |
| {alpha: 0, beta: 0, |
| want: []float32{0, 0, 0}, |
| wantRevX: []float32{0, 0, 0}, |
| wantRevY: []float32{0, 0, 0}, |
| wantRevXY: []float32{0, 0, 0}, |
| }, |
| {alpha: 0, beta: 1, |
| want: []float32{4.82, 1.82, 1.12}, |
| wantRevX: []float32{4.82, 1.82, 1.12}, |
| wantRevY: []float32{4.82, 1.82, 1.12}, |
| wantRevXY: []float32{4.82, 1.82, 1.12}, |
| }, |
| {alpha: 1, beta: 0, |
| want: []float32{15.8805, 8.9871, 6.2331}, |
| wantRevX: []float32{21.6264, 16.4664, 18.4311}, |
| wantRevY: []float32{6.2331, 8.9871, 15.8805}, |
| wantRevXY: []float32{18.4311, 16.4664, 21.6264}, |
| }, |
| {alpha: 8, beta: -6, |
| want: []float32{98.124, 60.9768, 43.1448}, |
| wantRevX: []float32{144.0912, 120.8112, 140.7288}, |
| wantRevY: []float32{20.9448, 60.9768, 120.324}, |
| wantRevXY: []float32{118.5288, 120.8112, 166.2912}, |
| }, |
| }, |
| }, |
| |
| { // 5x3 |
| m: 5, |
| n: 3, |
| A: []float32{ |
| 4.1, 6.2, 8.1, |
| 9.6, 3.5, 9.1, |
| 10, 7, 3, |
| 1, 1, 2, |
| 9, 2, 5, |
| }, |
| x: []float32{1, 2, 3}, |
| y: []float32{7, 8, 9, 10, 11}, |
| |
| NoTrans: []SgemvSubcase{ //(4x2, 4x1, 1x2, 1x1) |
| {alpha: 0, beta: 0, |
| want: []float32{0, 0, 0, 0, 0}, |
| wantRevX: []float32{0, 0, 0, 0, 0}, |
| wantRevY: []float32{0, 0, 0, 0, 0}, |
| wantRevXY: []float32{0, 0, 0, 0, 0}, |
| }, |
| {alpha: 0, beta: 1, |
| want: []float32{7, 8, 9, 10, 11}, |
| wantRevX: []float32{7, 8, 9, 10, 11}, |
| wantRevY: []float32{7, 8, 9, 10, 11}, |
| wantRevXY: []float32{7, 8, 9, 10, 11}, |
| }, |
| {alpha: 1, beta: 0, |
| want: []float32{40.8, 43.9, 33, 9, 28}, |
| wantRevX: []float32{32.8, 44.9, 47, 7, 36}, |
| wantRevY: []float32{28, 9, 33, 43.9, 40.8}, |
| wantRevXY: []float32{36, 7, 47, 44.9, 32.8}, |
| }, |
| {alpha: 8, beta: -6, |
| want: []float32{284.4, 303.2, 210, 12, 158}, |
| wantRevX: []float32{220.4, 311.2, 322, -4, 222}, |
| wantRevY: []float32{182, 24, 210, 291.2, 260.4}, |
| wantRevXY: []float32{246, 8, 322, 299.2, 196.4}, |
| }, |
| }, |
| |
| Trans: []SgemvSubcase{ //( 2x4, 1x4, 2x1, 1x1) |
| {alpha: 0, beta: 0, |
| want: []float32{0, 0, 0}, |
| wantRevX: []float32{0, 0, 0}, |
| wantRevY: []float32{0, 0, 0}, |
| wantRevXY: []float32{0, 0, 0}, |
| }, |
| {alpha: 0, beta: 1, |
| want: []float32{1, 2, 3}, |
| wantRevX: []float32{1, 2, 3}, |
| wantRevY: []float32{1, 2, 3}, |
| wantRevXY: []float32{1, 2, 3}, |
| }, |
| {alpha: 1, beta: 0, |
| want: []float32{304.5, 166.4, 231.5}, |
| wantRevX: []float32{302.1, 188.2, 258.1}, |
| wantRevY: []float32{231.5, 166.4, 304.5}, |
| wantRevXY: []float32{258.1, 188.2, 302.1}, |
| }, |
| {alpha: 8, beta: -6, |
| want: []float32{2430, 1319.2, 1834}, |
| wantRevX: []float32{2410.8, 1493.6, 2046.8}, |
| wantRevY: []float32{1846, 1319.2, 2418}, |
| wantRevXY: []float32{2058.8, 1493.6, 2398.8}, |
| }, |
| }, |
| }, |
| |
| { // 3x5 |
| m: 3, |
| n: 5, |
| A: []float32{ |
| 1.4, 2.34, 3.96, 0.96, 2.3, |
| 3.43, 0.62, 1.09, 0.2, 3.56, |
| 1.15, 0.58, 3.8, 1.16, 0.01, |
| }, |
| x: []float32{2.34, 2.82, 4.73, 0.22, 3.91}, |
| y: []float32{2.46, 2.22, 4.75}, |
| |
| NoTrans: []SgemvSubcase{ // (2x4, 2x1, 1x4, 1x1) |
| {alpha: 0, beta: 0, |
| want: []float32{0, 0, 0}, |
| wantRevX: []float32{0, 0, 0}, |
| wantRevY: []float32{0, 0, 0}, |
| wantRevXY: []float32{0, 0, 0}, |
| }, |
| {alpha: 0, beta: 1, |
| want: []float32{2.46, 2.22, 4.75}, |
| wantRevX: []float32{2.46, 2.22, 4.75}, |
| wantRevY: []float32{2.46, 2.22, 4.75}, |
| wantRevXY: []float32{2.46, 2.22, 4.75}, |
| }, |
| {alpha: 1, beta: 0, |
| want: []float32{37.8098, 28.8939, 22.5949}, |
| wantRevX: []float32{32.8088, 27.5978, 25.8927}, |
| wantRevY: []float32{22.5949, 28.8939, 37.8098}, |
| wantRevXY: []float32{25.8927, 27.5978, 32.8088}, |
| }, |
| {alpha: 8, beta: -6, |
| want: []float32{287.7184, 217.8312, 152.2592}, |
| wantRevX: []float32{247.7104, 207.4624, 178.6416}, |
| wantRevY: []float32{165.9992, 217.8312, 273.9784}, |
| wantRevXY: []float32{192.3816, 207.4624, 233.9704}, |
| }, |
| }, |
| |
| Trans: []SgemvSubcase{ // (4x2, 1x2, 4x1, 1x1) |
| {alpha: 0, beta: 0, |
| want: []float32{0, 0, 0, 0, 0}, |
| wantRevX: []float32{0, 0, 0, 0, 0}, |
| wantRevY: []float32{0, 0, 0, 0, 0}, |
| wantRevXY: []float32{0, 0, 0, 0, 0}, |
| }, |
| {alpha: 0, beta: 1, |
| want: []float32{2.34, 2.82, 4.73, 0.22, 3.91}, |
| wantRevX: []float32{2.34, 2.82, 4.73, 0.22, 3.91}, |
| wantRevY: []float32{2.34, 2.82, 4.73, 0.22, 3.91}, |
| wantRevXY: []float32{2.34, 2.82, 4.73, 0.22, 3.91}, |
| }, |
| {alpha: 1, beta: 0, |
| want: []float32{16.5211, 9.8878, 30.2114, 8.3156, 13.6087}, |
| wantRevX: []float32{17.0936, 13.9182, 30.5778, 7.8576, 18.8528}, |
| wantRevY: []float32{13.6087, 8.3156, 30.2114, 9.8878, 16.5211}, |
| wantRevXY: []float32{18.8528, 7.8576, 30.5778, 13.9182, 17.0936}, |
| }, |
| {alpha: 8, beta: -6, |
| want: []float32{118.1288, 62.1824, 213.3112, 65.2048, 85.4096}, |
| wantRevX: []float32{122.7088, 94.4256, 216.2424, 61.5408, 127.3624}, |
| wantRevY: []float32{94.8296, 49.6048, 213.3112, 77.7824, 108.7088}, |
| wantRevXY: []float32{136.7824, 45.9408, 216.2424, 110.0256, 113.2888}, |
| }, |
| }, |
| }, |
| |
| { // 7x7 & nan test |
| m: 7, |
| n: 7, |
| A: []float32{ |
| 0.9, 2.6, 0.5, 1.8, 2.3, 0.6, 0.2, |
| 1.6, 0.6, 1.3, 2.1, 1.4, 0.4, 0.8, |
| 2.9, 0.9, 2.3, 2.5, 1.4, 1.8, 1.6, |
| 2.6, 2.8, 2.1, 0.3, nan, 2.2, 1.3, |
| 0.2, 2.2, 1.8, 1.8, 2.1, 1.3, 1.4, |
| 1.7, 1.4, 2.3, 2., 1., 0., 1.4, |
| 2.1, 1.9, 0.8, 2.9, 1.3, 0.3, 1.3, |
| }, |
| x: []float32{0.4, 2.8, 3.5, 0.3, 0.6, 2.5, 3.1}, |
| y: []float32{3.2, 4.4, 5., 4.3, 4.1, 1.4, 0.2}, |
| |
| NoTrans: []SgemvSubcase{ // (4x4, 4x2, 4x1, 2x4, 2x2, 2x1, 1x4, 1x2, 1x1) |
| {alpha: 0, beta: 0, |
| want: []float32{0, 0, 0, nan, 0, 0, 0}, |
| wantRevX: []float32{0, 0, 0, nan, 0, 0, 0}, |
| wantRevY: []float32{0, 0, 0, nan, 0, 0, 0}, |
| wantRevXY: []float32{0, 0, 0, nan, 0, 0, 0}, |
| }, |
| {alpha: 0, beta: 1, |
| want: []float32{3.2, 4.4, 5., nan, 4.1, 1.4, 0.2}, |
| wantRevX: []float32{3.2, 4.4, 5., nan, 4.1, 1.4, 0.2}, |
| wantRevY: []float32{3.2, 4.4, 5., nan, 4.1, 1.4, 0.2}, |
| wantRevXY: []float32{3.2, 4.4, 5., nan, 4.1, 1.4, 0.2}, |
| }, |
| {alpha: 1, beta: 0, |
| want: []float32{13.43, 11.82, 22.78, nan, 21.93, 18.19, 15.39}, |
| wantRevX: []float32{19.94, 14.21, 23.95, nan, 19.29, 14.81, 18.52}, |
| wantRevY: []float32{15.39, 18.19, 21.93, nan, 22.78, 11.82, 13.43}, |
| wantRevXY: []float32{18.52, 14.81, 19.29, nan, 23.95, 14.21, 19.94}, |
| }, |
| {alpha: 8, beta: -6, |
| want: []float32{88.24, 68.16, 152.24, nan, 150.84, 137.12, 121.92}, |
| wantRevX: []float32{140.32, 87.28, 161.6, nan, 129.72, 110.08, 146.96}, |
| wantRevY: []float32{103.92, 119.12, 145.44, nan, 157.64, 86.16, 106.24}, |
| wantRevXY: []float32{128.96, 92.08, 124.32, nan, 167., 105.28, 158.32}, |
| }, |
| }, |
| |
| Trans: []SgemvSubcase{ // (4x4, 2x4, 1x4, 4x2, 2x2, 1x2, 4x1, 2x1, 1x1) |
| {alpha: 0, beta: 0, |
| want: []float32{0, 0, 0, 0, nan, 0, 0}, |
| wantRevX: []float32{0, 0, 0, 0, nan, 0, 0}, |
| wantRevY: []float32{0, 0, nan, 0, 0, 0, 0}, |
| wantRevXY: []float32{0, 0, nan, 0, 0, 0, 0}, |
| }, |
| {alpha: 0, beta: 1, |
| want: []float32{0.4, 2.8, 3.5, 0.3, nan, 2.5, 3.1}, |
| wantRevX: []float32{0.4, 2.8, 3.5, 0.3, nan, 2.5, 3.1}, |
| wantRevY: []float32{0.4, 2.8, nan, 0.3, 0.6, 2.5, 3.1}, |
| wantRevXY: []float32{0.4, 2.8, nan, 0.3, 0.6, 2.5, 3.1}, |
| }, |
| {alpha: 1, beta: 0, |
| want: []float32{39.22, 38.86, 38.61, 39.55, nan, 27.53, 25.71}, |
| wantRevX: []float32{40.69, 40.33, 42.06, 41.92, nan, 24.98, 30.63}, |
| wantRevY: []float32{25.71, 27.53, nan, 39.55, 38.61, 38.86, 39.22}, |
| wantRevXY: []float32{30.63, 24.98, nan, 41.92, 42.06, 40.33, 40.69}, |
| }, |
| {alpha: 8, beta: -6, |
| want: []float32{311.36, 294.08, 287.88, 314.6, nan, 205.24, 187.08}, |
| wantRevX: []float32{323.12, 305.84, 315.48, 333.56, nan, 184.84, 226.44}, |
| wantRevY: []float32{203.28, 203.44, nan, 314.6, 305.28, 295.88, 295.16}, |
| wantRevXY: []float32{242.64, 183.04, nan, 333.56, 332.88, 307.64, 306.92}, |
| }, |
| }, |
| }, |
| { // 11x11 |
| m: 11, |
| n: 11, |
| A: []float32{ |
| 0.4, 3., 2.5, 2., 0.4, 2., 2., 1., 0.1, 0.3, 2., |
| 1.7, 0.7, 2.6, 1.6, 0.5, 2.4, 3., 0.9, 0.1, 2.8, 1.3, |
| 1.1, 2.2, 1.5, 0.8, 2.9, 0.4, 0.5, 1.7, 0.8, 2.6, 0.7, |
| 2.2, 1.7, 0.8, 2.9, 0.7, 0.7, 1.7, 1.8, 1.9, 2.4, 1.9, |
| 0.3, 0.5, 1.6, 1.5, 1.5, 2.4, 1.7, 1.2, 1.9, 2.8, 1.2, |
| 1.4, 2.2, 1.7, 1.4, 2.7, 1.4, 0.9, 1.8, 0.5, 1.2, 1.9, |
| 0.8, 2.3, 1.7, 1.3, 2., 2.8, 2.6, 0.4, 2.5, 1.3, 0.5, |
| 2.4, 2.8, 1.1, 0.2, 0.4, 2.8, 0.5, 0.5, 0., 2.8, 1.9, |
| 2.3, 1.8, 2.3, 1.7, 1.1, 0.1, 1.4, 1.2, 1.9, 0.5, 0.6, |
| 0.6, 2.4, 1.2, 0.3, 1.4, 1.3, 2.5, 2.6, 0., 1.3, 2.6, |
| 0.7, 1.5, 0.2, 1.4, 1.1, 1.8, 0.2, 1., 1., 0.6, 1.2, |
| }, |
| x: []float32{2.5, 1.2, 0.8, 2.9, 3.4, 1.8, 4.6, 3.3, 3.8, 0.9, 1.1}, |
| y: []float32{3.8, 3.4, 1.6, 4.8, 4.3, 0.5, 2., 2.5, 1.5, 2.8, 3.9}, |
| |
| NoTrans: []SgemvSubcase{ // (4x4, 4x2, 4x1, 2x4, 2x2, 2x1, 1x4, 1x2, 1x1) |
| {alpha: 0, beta: 0, |
| want: []float32{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, |
| wantRevX: []float32{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, |
| wantRevY: []float32{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, |
| wantRevXY: []float32{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, |
| }, |
| {alpha: 0, beta: 1, |
| want: []float32{3.8, 3.4, 1.6, 4.8, 4.3, 0.5, 2., 2.5, 1.5, 2.8, 3.9}, |
| wantRevX: []float32{3.8, 3.4, 1.6, 4.8, 4.3, 0.5, 2., 2.5, 1.5, 2.8, 3.9}, |
| wantRevY: []float32{3.8, 3.4, 1.6, 4.8, 4.3, 0.5, 2., 2.5, 1.5, 2.8, 3.9}, |
| wantRevXY: []float32{3.8, 3.4, 1.6, 4.8, 4.3, 0.5, 2., 2.5, 1.5, 2.8, 3.9}, |
| }, |
| {alpha: 1, beta: 0, |
| want: []float32{32.71, 38.93, 33.55, 45.46, 39.24, 38.41, 46.23, 25.78, 37.33, 37.42, 24.63}, |
| wantRevX: []float32{39.82, 43.78, 37.73, 41.19, 40.17, 44.41, 42.75, 28.14, 35.6, 41.25, 23.9}, |
| wantRevY: []float32{24.63, 37.42, 37.33, 25.78, 46.23, 38.41, 39.24, 45.46, 33.55, 38.93, 32.71}, |
| wantRevXY: []float32{23.9, 41.25, 35.6, 28.14, 42.75, 44.41, 40.17, 41.19, 37.73, 43.78, 39.82}, |
| }, |
| {alpha: 8, beta: -6, |
| want: []float32{238.88, 291.04, 258.8, 334.88, 288.12, 304.28, 357.84, 191.24, 289.64, 282.56, 173.64}, |
| wantRevX: []float32{295.76, 329.84, 292.24, 300.72, 295.56, 352.28, 330., 210.12, 275.8, 313.2, 167.8}, |
| wantRevY: []float32{174.24, 278.96, 289.04, 177.44, 344.04, 304.28, 301.92, 348.68, 259.4, 294.64, 238.28}, |
| wantRevXY: []float32{168.4, 309.6, 275.2, 196.32, 316.2, 352.28, 309.36, 314.52, 292.84, 333.44, 295.16}, |
| }, |
| }, |
| |
| Trans: []SgemvSubcase{ // (4x4, 2x4, 1x4, 4x2, 2x2, 1x2, 4x1, 2x1, 1x1) |
| {alpha: 0, beta: 0, |
| want: []float32{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, |
| wantRevX: []float32{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, |
| wantRevY: []float32{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, |
| wantRevXY: []float32{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, |
| }, |
| {alpha: 0, beta: 1, |
| want: []float32{2.5, 1.2, 0.8, 2.9, 3.4, 1.8, 4.6, 3.3, 3.8, 0.9, 1.1}, |
| wantRevX: []float32{2.5, 1.2, 0.8, 2.9, 3.4, 1.8, 4.6, 3.3, 3.8, 0.9, 1.1}, |
| wantRevY: []float32{2.5, 1.2, 0.8, 2.9, 3.4, 1.8, 4.6, 3.3, 3.8, 0.9, 1.1}, |
| wantRevXY: []float32{2.5, 1.2, 0.8, 2.9, 3.4, 1.8, 4.6, 3.3, 3.8, 0.9, 1.1}, |
| }, |
| {alpha: 1, beta: 0, |
| want: []float32{37.07, 55.58, 46.05, 47.34, 33.88, 54.19, 50.85, 39.31, 31.29, 55.31, 46.98}, |
| wantRevX: []float32{38.11, 63.38, 46.44, 40.04, 34.63, 59.27, 50.13, 35.45, 28.26, 51.64, 46.22}, |
| wantRevY: []float32{46.98, 55.31, 31.29, 39.31, 50.85, 54.19, 33.88, 47.34, 46.05, 55.58, 37.07}, |
| wantRevXY: []float32{46.22, 51.64, 28.26, 35.45, 50.13, 59.27, 34.63, 40.04, 46.44, 63.38, 38.11}, |
| }, |
| {alpha: 8, beta: -6, |
| want: []float32{281.56, 437.44, 363.6, 361.32, 250.64, 422.72, 379.2, 294.68, 227.52, 437.08, 369.24}, |
| wantRevX: []float32{289.88, 499.84, 366.72, 302.92, 256.64, 463.36, 373.44, 263.8, 203.28, 407.72, 363.16}, |
| wantRevY: []float32{360.84, 435.28, 245.52, 297.08, 386.4, 422.72, 243.44, 358.92, 345.6, 439.24, 289.96}, |
| wantRevXY: []float32{354.76, 405.92, 221.28, 266.2, 380.64, 463.36, 249.44, 300.52, 348.72, 501.64, 298.28}, |
| }, |
| }, |
| }, |
| } |
| |
| func TestGemv(t *testing.T) { |
| for _, test := range SgemvCases { |
| t.Run(fmt.Sprintf("(%vx%v)", test.m, test.n), func(tt *testing.T) { |
| for i, cas := range test.NoTrans { |
| tt.Run(fmt.Sprintf("NoTrans case %v", i), func(st *testing.T) { |
| sgemvcomp(st, test, false, cas, i) |
| }) |
| } |
| for i, cas := range test.Trans { |
| tt.Run(fmt.Sprintf("Trans case %v", i), func(st *testing.T) { |
| sgemvcomp(st, test, true, cas, i) |
| }) |
| } |
| }) |
| } |
| } |
| |
| func sgemvcomp(t *testing.T, test SgemvCase, trans bool, cas SgemvSubcase, i int) { |
| const ( |
| tol = 1e-6 |
| |
| xGdVal, yGdVal, aGdVal = 0.5, 1.5, 10 |
| gdLn = 4 |
| ) |
| if trans { |
| test.x, test.y = test.y, test.x |
| } |
| prefix := fmt.Sprintf("Test (%vx%v) t:%v (a:%v,b:%v)", test.m, test.n, trans, cas.alpha, cas.beta) |
| xg, yg := guardVector(test.x, xGdVal, gdLn), guardVector(test.y, yGdVal, gdLn) |
| x, y := xg[gdLn:len(xg)-gdLn], yg[gdLn:len(yg)-gdLn] |
| ag := guardVector(test.A, aGdVal, gdLn) |
| a := ag[gdLn : len(ag)-gdLn] |
| |
| lda := uintptr(test.n) |
| if trans { |
| GemvT(uintptr(test.m), uintptr(test.n), cas.alpha, a, lda, x, 1, cas.beta, y, 1) |
| } else { |
| GemvN(uintptr(test.m), uintptr(test.n), cas.alpha, a, lda, x, 1, cas.beta, y, 1) |
| } |
| for i := range cas.want { |
| if !sameApprox(y[i], cas.want[i], tol) { |
| t.Errorf(msgVal, prefix, i, y[i], cas.want[i]) |
| } |
| } |
| |
| if !isValidGuard(xg, xGdVal, gdLn) { |
| t.Errorf(msgGuard, prefix, "x", xg[:gdLn], xg[len(xg)-gdLn:]) |
| } |
| if !isValidGuard(yg, yGdVal, gdLn) { |
| t.Errorf(msgGuard, prefix, "y", yg[:gdLn], yg[len(yg)-gdLn:]) |
| } |
| if !isValidGuard(ag, aGdVal, gdLn) { |
| t.Errorf(msgGuard, prefix, "a", ag[:gdLn], ag[len(ag)-gdLn:]) |
| } |
| if !equalStrided(test.x, x, 1) { |
| t.Errorf(msgReadOnly, prefix, "x") |
| } |
| if !equalStrided(test.A, a, 1) { |
| t.Errorf(msgReadOnly, prefix, "a") |
| } |
| |
| for _, inc := range newIncSet(-1, 1, 2, 3, 90) { |
| incPrefix := fmt.Sprintf("%s inc(x:%v, y:%v)", prefix, inc.x, inc.y) |
| want, incY := cas.want, inc.y |
| switch { |
| case inc.x < 0 && inc.y < 0: |
| want = cas.wantRevXY |
| incY = -inc.y |
| case inc.x < 0: |
| want = cas.wantRevX |
| case inc.y < 0: |
| want = cas.wantRevY |
| incY = -inc.y |
| } |
| xg, yg := guardIncVector(test.x, xGdVal, inc.x, gdLn), guardIncVector(test.y, yGdVal, inc.y, gdLn) |
| x, y := xg[gdLn:len(xg)-gdLn], yg[gdLn:len(yg)-gdLn] |
| ag := guardVector(test.A, aGdVal, gdLn) |
| a := ag[gdLn : len(ag)-gdLn] |
| |
| if trans { |
| GemvT(uintptr(test.m), uintptr(test.n), cas.alpha, |
| a, lda, x, uintptr(inc.x), |
| cas.beta, y, uintptr(inc.y)) |
| } else { |
| GemvN(uintptr(test.m), uintptr(test.n), cas.alpha, |
| a, lda, x, uintptr(inc.x), |
| cas.beta, y, uintptr(inc.y)) |
| } |
| for i := range want { |
| if !sameApprox(y[i*incY], want[i], tol) { |
| t.Errorf(msgVal, incPrefix, i, y[i*incY], want[i]) |
| t.Error(y[i*incY] - want[i]) |
| } |
| } |
| |
| checkValidIncGuard(t, xg, xGdVal, inc.x, gdLn) |
| checkValidIncGuard(t, yg, yGdVal, inc.y, gdLn) |
| if !isValidGuard(ag, aGdVal, gdLn) { |
| t.Errorf(msgGuard, incPrefix, "a", ag[:gdLn], ag[len(ag)-gdLn:]) |
| } |
| if !equalStrided(test.x, x, inc.x) { |
| t.Errorf(msgReadOnly, incPrefix, "x") |
| } |
| if !equalStrided(test.A, a, 1) { |
| t.Errorf(msgReadOnly, incPrefix, "a") |
| } |
| } |
| } |
| |
| // equalStrided returns true if the strided vector x contains elements of the |
| // dense vector ref at indices i*inc, false otherwise. |
| func equalStrided(ref, x []float32, inc int) bool { |
| if inc < 0 { |
| inc = -inc |
| } |
| for i, v := range ref { |
| if !scalarSame(x[i*inc], v) { |
| return false |
| } |
| } |
| return true |
| } |
| |
| func scalarSame(a, b float32) bool { |
| return a == b || (math32.IsNaN(a) && math32.IsNaN(b)) |
| } |