Always test all four components of output
More thorough testing.
Signed-off-by: Wladimir J. van der Laan <laanwj@gmail.com>
diff --git a/src/etnaviv_verifyops.c b/src/etnaviv_verifyops.c
index 77fd470..f00ef94 100644
--- a/src/etnaviv_verifyops.c
+++ b/src/etnaviv_verifyops.c
@@ -211,6 +211,17 @@
}
}
+void i32_generate_values_h4(size_t seed, void *a, size_t width)
+{
+ uint32_t base = seed * width;
+ for (size_t x=0; x<width; ++x) {
+ ((uint32_t*)a)[x*4+0] = base + x;
+ ((uint32_t*)a)[x*4+1] = base + x + 0x010000;
+ ((uint32_t*)a)[x*4+2] = base + x + 0x020000;
+ ((uint32_t*)a)[x*4+3] = base + x + 0x030000;
+ }
+}
+
void i32_generate_values_v(size_t seed, void *b, size_t height)
{
uint32_t base = seed * height;
@@ -222,10 +233,25 @@
}
}
+void i32_generate_values_v4(size_t seed, void *b, size_t height)
+{
+ uint32_t base = seed * height;
+ for (size_t y=0; y<height; ++y) {
+ ((uint32_t*)b)[y*4+0] = base + y;
+ ((uint32_t*)b)[y*4+1] = base + y + 0x010000;
+ ((uint32_t*)b)[y*4+2] = base + y + 0x020000;
+ ((uint32_t*)b)[y*4+3] = base + y + 0x030000;
+ }
+}
+
+/* shortcut for source value 0 */
#define A (a[x*4])
#define B (b[y*4])
+/* source value (component) */
+#define AI(i) (a[x*4+(i)])
+#define BI(i) (b[y*4+(i)])
/* Scalar computations broadcasted to all channels */
-#define CPU_COMPUTE_FUNC1(_name, _type, _expr) \
+#define CPU_COMPUTE_FUNC1_BCAST(_name, _type, _expr) \
static void _name(_type *out, const _type *a, const _type *b, size_t width, size_t height) \
{ \
for(size_t y=0; y<height; ++y) { \
@@ -234,6 +260,19 @@
} \
} \
}
+/* Scalar computation on one channel only, rest will stay at padding pattern */
+#define CPU_COMPUTE_FUNC1_PAD(_name, _type, _expr) \
+ static void _name(_type *out, const _type *a, const _type *b, size_t width, size_t height) \
+ { \
+ for(size_t y=0; y<height; ++y) { \
+ for(size_t x=0; x<width; ++x) { \
+ out[(y*width+x)*4+0] = (_expr); \
+ out[(y*width+x)*4+1] = 0x55555555; \
+ out[(y*width+x)*4+2] = 0xaaaaaaaa; \
+ out[(y*width+x)*4+3] = 0x55555555; \
+ } \
+ } \
+ }
/* Independent expressions for channels */
#define CPU_COMPUTE_FUNC4(_name, _type, _expr0, _expr1, _expr2, _expr3) \
static void _name(_type *out, const _type *a, const _type *b, size_t width, size_t height) \
@@ -249,24 +288,27 @@
}
CPU_COMPUTE_FUNC4(nop_compute_cpu, uint32_t, 0xaaaaaaaa, 0x55555555, 0xaaaaaaaa, 0x55555555);
/* u32 */
-CPU_COMPUTE_FUNC1(addu32_compute_cpu, uint32_t, A + B);
-CPU_COMPUTE_FUNC1(mulu32_compute_cpu, uint32_t, A * B);
-CPU_COMPUTE_FUNC1(mulhu32_compute_cpu, uint32_t, ((uint64_t)A * (uint64_t)B)>>32);
-CPU_COMPUTE_FUNC1(madu32_compute_cpu, uint32_t, A * B + 0x12345678);
-CPU_COMPUTE_FUNC1(lshiftu32_compute_cpu, uint32_t, A << (B&31));
-CPU_COMPUTE_FUNC1(rshiftu32_compute_cpu, uint32_t, A >> (B&31));
-CPU_COMPUTE_FUNC1(rotateu32_compute_cpu, uint32_t, (A << (B&31)) | (A >> ((32-B)&31)));
-CPU_COMPUTE_FUNC1(oru32_compute_cpu, uint32_t, A | B);
-CPU_COMPUTE_FUNC1(andu32_compute_cpu, uint32_t, A & B);
-CPU_COMPUTE_FUNC1(xoru32_compute_cpu, uint32_t, A ^ B);
-CPU_COMPUTE_FUNC1(notu32_compute_cpu, uint32_t, ~A);
-CPU_COMPUTE_FUNC1(leadzerou32_compute_cpu, uint32_t, (A != 0) ? __builtin_clz(A) : 0x20);
+CPU_COMPUTE_FUNC1_PAD(addu32_single_compute_cpu, uint32_t, A + B);
+CPU_COMPUTE_FUNC1_BCAST(addu32_compute_cpu, uint32_t, A + B);
+CPU_COMPUTE_FUNC1_BCAST(mulu32_compute_cpu, uint32_t, A * B);
+CPU_COMPUTE_FUNC1_BCAST(mulhu32_compute_cpu, uint32_t, ((uint64_t)A * (uint64_t)B)>>32);
+CPU_COMPUTE_FUNC1_BCAST(madu32_compute_cpu, uint32_t, A * B + 0x12345678);
+CPU_COMPUTE_FUNC1_BCAST(lshiftu32_compute_cpu, uint32_t, A << (B&31));
+CPU_COMPUTE_FUNC1_BCAST(rshiftu32_compute_cpu, uint32_t, A >> (B&31));
+CPU_COMPUTE_FUNC1_BCAST(rotateu32_compute_cpu, uint32_t, (A << (B&31)) | (A >> ((32-B)&31)));
+CPU_COMPUTE_FUNC1_BCAST(oru32_compute_cpu, uint32_t, A | B);
+CPU_COMPUTE_FUNC1_BCAST(andu32_compute_cpu, uint32_t, A & B);
+CPU_COMPUTE_FUNC1_BCAST(xoru32_compute_cpu, uint32_t, A ^ B);
+CPU_COMPUTE_FUNC1_BCAST(notu32_compute_cpu, uint32_t, ~A);
+CPU_COMPUTE_FUNC1_BCAST(leadzerou32_compute_cpu, uint32_t, (A != 0) ? __builtin_clz(A) : 0x20);
/* float */
-CPU_COMPUTE_FUNC1(addf32_compute_cpu, float, A + B);
-CPU_COMPUTE_FUNC1(mulf32_compute_cpu, float, A * B);
+CPU_COMPUTE_FUNC4(addf32_compute_cpu, float, AI(0) + BI(0), AI(1) + BI(1), AI(2) + BI(2), AI(3) + BI(3));
+CPU_COMPUTE_FUNC4(mulf32_compute_cpu, float, AI(0) * BI(0), AI(1) * BI(1), AI(2) * BI(2), AI(3) * BI(3));
#undef A
#undef B
+#undef AI
+#undef BI
#undef CPU_COMPUTE
/* Tests GPU code must take from a[x] t2 and b[y] t3, and output to t4.
@@ -279,7 +321,7 @@
}))
},
/* Add will only output one element at a time */
- {"add.u32", 1, CT_INT32, i32_generate_values_h, i32_generate_values_v, (void*)addu32_compute_cpu,
+ {"add.u32", 4, CT_INT32, i32_generate_values_h, i32_generate_values_v, (void*)addu32_single_compute_cpu,
GPU_CODE(((uint32_t[]){
0x00841001, 0x00202800, 0x80000000, 0x00000038, /* add.u32 t4, t2, void, t3 */
}))
@@ -351,7 +393,7 @@
// add.u16 does nothing
// 0x00801001, 0x15402800, 0xc0000000, 0x00000018, /* add.u16 t0.x___, t2.yyyy, void, t1.xxxx */
// Need an effective way of comparing these
- {"add.f32", 1, CT_FLOAT32, i32_generate_values_h, i32_generate_values_v, (void*)addf32_compute_cpu,
+ {"add.f32", 4, CT_FLOAT32, i32_generate_values_h4, i32_generate_values_v4, (void*)addf32_compute_cpu,
GPU_CODE(((uint32_t[]){
0x07841001, 0x39002800, 0x00000000, 0x00390038, /* add t4, t2, void, t3 */
}))