verifyops: Add testing for i2f/f2i
Signed-off-by: Wladimir J. van der Laan <laanwj@gmail.com>
diff --git a/src/etnaviv_verifyops.c b/src/etnaviv_verifyops.c
index b625b94..7ff0fb6 100644
--- a/src/etnaviv_verifyops.c
+++ b/src/etnaviv_verifyops.c
@@ -21,6 +21,7 @@
#include "drm_setup.h"
#include "cmdstream.h"
#include "memutil.h"
+#include "float_helpers.h"
#include "state.xml.h"
#include "state_3d.xml.h"
@@ -201,6 +202,7 @@
etna_set_state(stream, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_TEXTURE | VIVS_GL_FLUSH_CACHE_SHADER_L1);
}
+/** Generate horizontal and vertical testing table values */
void i32_generate_values_h(size_t seed, void *a, size_t width)
{
uint32_t base = seed * width;
@@ -245,6 +247,41 @@
}
}
+/** CPU-side helper emulation functions */
+
+/* Float to integer conversion emulation for GC2000. There is a significant
+ difference from GC3000 here:
+ - NaN is is converted to 0x80000000/0x7fffffff instead of 0x00000000
+ */
+inline int32_t f2i_s32_gc2000(float f)
+{
+ if (isnan(f)) {
+ uint32_t u = fui(f);
+ if (u & 0x80000000) {
+ return 0x80000000; /* "negative NaN" */
+ } else {
+ return 0x7fffffff; /* "postiive NaN" */
+ }
+ } else {
+ return f;
+ }
+}
+inline uint32_t f2i_u32_gc2000(float f)
+{
+ if (isnan(f)) {
+ uint32_t u = fui(f);
+ if (u & 0x80000000) {
+ return 0x00000000; /* "negative NaN" */
+ } else {
+ return 0xffffffff; /* "postiive NaN" */
+ }
+ } else {
+ return f;
+ }
+}
+
+/** Testing macros for generating CPU implementations */
+
/* shortcut for source value 0 */
#define A (a[x*4])
#define B (b[y*4])
@@ -261,6 +298,16 @@
} \
} \
}
+/* Scalar computations broadcasted to all channels - conversion between types */
+#define CPU_COMPUTE_FUNC1_CVT_BCAST(_name, _typeo, _typei, _expr) \
+ static void _name(_typeo *out, const _typei *a, const _typei *b, size_t width, size_t height) \
+ { \
+ for(size_t y=0; y<height; ++y) { \
+ for(size_t x=0; x<width; ++x) { \
+ out[(y*width+x)*4+0] = out[(y*width+x)*4+1] = out[(y*width+x)*4+2] = out[(y*width+x)*4+3] = (_expr); \
+ } \
+ } \
+ }
/* Scalar computation per channel (use index i) */
#define CPU_COMPUTE_FUNC1_MULTI(_name, _type, _expr) \
static void _name(_type *out, const _type *a, const _type *b, size_t width, size_t height) \
@@ -273,6 +320,18 @@
} \
} \
}
+/* Scalar conversion per channel (use index i) */
+#define CPU_COMPUTE_FUNC1_CVT_MULTI(_name, _typeo, _typei, _expr) \
+ static void _name(_typeo *out, const _typei *a, const _typei *b, size_t width, size_t height) \
+ { \
+ for(size_t y=0; y<height; ++y) { \
+ for(size_t x=0; x<width; ++x) { \
+ for(size_t i=0; i<4; ++i) { \
+ out[(y*width+x)*4+i] = (_expr); \
+ } \
+ } \
+ } \
+ }
/* Scalar computation on one channel only, rest will stay at padding pattern */
#define CPU_COMPUTE_FUNC1_PAD(_name, _type, _expr) \
static void _name(_type *out, const _type *a, const _type *b, size_t width, size_t height) \
@@ -315,6 +374,7 @@
CPU_COMPUTE_FUNC1_BCAST(notu32_compute_cpu, uint32_t, ~A);
CPU_COMPUTE_FUNC1_BCAST(leadzerou32_compute_cpu, uint32_t, (A != 0) ? __builtin_clz(A) : 0x20);
/* 4-wide u32 (GC3000) */
+CPU_COMPUTE_FUNC1_MULTI(addu32_4w_compute_cpu, uint32_t, AI(i) + BI(i));
CPU_COMPUTE_FUNC1_MULTI(lshiftu32_4w_compute_cpu, uint32_t, AI(i) << (BI(i)&31));
CPU_COMPUTE_FUNC1_MULTI(rshiftu32_4w_compute_cpu, uint32_t, AI(i) >> (BI(i)&31));
CPU_COMPUTE_FUNC1_MULTI(rotateu32_4w_compute_cpu, uint32_t, (AI(i) << (BI(i)&31)) | (AI(i) >> ((32-BI(i))&31)));
@@ -326,7 +386,16 @@
/* float */
CPU_COMPUTE_FUNC1_MULTI(addf32_compute_cpu, float, AI(i) + BI(i));
CPU_COMPUTE_FUNC1_MULTI(mulf32_compute_cpu, float, AI(i) * BI(i));
-
+/* conversion between float and int (GC2000) */
+CPU_COMPUTE_FUNC1_CVT_BCAST(f2i_s32_compute_cpu, int32_t, float, f2i_s32_gc2000(A));
+CPU_COMPUTE_FUNC1_CVT_BCAST(f2i_u32_compute_cpu, uint32_t, float, f2i_u32_gc2000(A));
+CPU_COMPUTE_FUNC1_CVT_BCAST(i2f_s32_compute_cpu, float, int32_t, A);
+CPU_COMPUTE_FUNC1_CVT_BCAST(i2f_u32_compute_cpu, float, uint32_t, A);
+/* 4-wide conversion (GC3000) - seems to match ARM semantics */
+CPU_COMPUTE_FUNC1_CVT_MULTI(f2i_s32_4w_compute_cpu, int32_t, float, AI(i));
+CPU_COMPUTE_FUNC1_CVT_MULTI(f2i_u32_4w_compute_cpu, uint32_t, float, AI(i));
+CPU_COMPUTE_FUNC1_CVT_MULTI(i2f_s32_4w_compute_cpu, float, int32_t, AI(i));
+CPU_COMPUTE_FUNC1_CVT_MULTI(i2f_u32_4w_compute_cpu, float, uint32_t, AI(i));
#undef A
#undef B
#undef AI
@@ -334,7 +403,7 @@
#undef CPU_COMPUTE
/* Tests GPU code must take from a[x] t2 and b[y] t3, and output to t4.
- * It can also take an ancillary argument in u3, taken from auxin.
+ * It can also take an ancillary argument in u3, taken from the auxin field.
*/
struct op_test op_tests[] = {
{"nop", HWT_ALL, CT_INT32, i32_generate_values_h, i32_generate_values_v, (void*)nop_compute_cpu,
@@ -342,12 +411,7 @@
0x00000000, 0x00000000, 0x00000000, 0x00000000, /* nop */
}))
},
- /* Add will only output one element at a time */
- {"add.u32", HWT_ALL, CT_INT32, i32_generate_values_h, i32_generate_values_v, (void*)addu32_single_compute_cpu,
- GPU_CODE(((uint32_t[]){
- 0x00841001, 0x00202800, 0x80000000, 0x00000038, /* add.u32 t4, t2, void, t3 */
- }))
- },
+ /* Pretty much arbitrary test for multiple instructions */
{"add4.u32", HWT_ALL, CT_INT32_BCAST, i32_generate_values_h, i32_generate_values_v, (void*)addu32_compute_cpu,
GPU_CODE(((uint32_t[]){
0x00841001, 0x00202800, 0x80000000, 0x00000038, /* add.u32 t4.x___, t2.xxxx, void, t3.xxxx */
@@ -356,6 +420,7 @@
0x04041001, 0x00202800, 0x80000000, 0x00000038, /* add.u32 t4.___w, t2.xxxx, void, t3.xxxx */
}))
},
+ /** These are scalar and broadcast the result on any known hw */
{"imullo0.u32", HWT_ALL, CT_INT32_BCAST, i32_generate_values_h, i32_generate_values_v, (void*)mulu32_compute_cpu,
GPU_CODE(((uint32_t[]){
0x0784103c, 0x39202800, 0x81c801c0, 0x00000000, /* imullo0.u32 t4, t2, t3, void */
@@ -374,6 +439,11 @@
},
/** GC2000 behavior of bitwise instructions */
+ {"add.u32", HWT_GC2000, CT_INT32_BCAST, i32_generate_values_h, i32_generate_values_v, (void*)addu32_compute_cpu,
+ GPU_CODE(((uint32_t[]){
+ 0x07841001, 0x39202800, 0x80000000, 0x00390038, /* add.u32 t4, t2, void, t3 */
+ }))
+ },
{"lshift.u32", HWT_GC2000, CT_INT32_BCAST, i32_generate_values_h, i32_generate_values_v, (void*)lshiftu32_compute_cpu,
GPU_CODE(((uint32_t[]){
0x07841019, 0x39202800, 0x80010000, 0x00390038, /* lshift.u32 t4, t2, void, t3 */
@@ -404,18 +474,47 @@
0x0784101e, 0x39202800, 0x80010000, 0x00390038, /* xor.u32 t4, t2, void, t3 */
}))
},
- {"not.u32", HWT_GC2000, CT_INT32_BCAST, i32_generate_values_h, i32_generate_values_v, (void*)notu32_compute_cpu,
+ {"not.u32", HWT_GC2000, CT_INT32_BCAST, i32_generate_values_h, NULL, (void*)notu32_compute_cpu,
GPU_CODE(((uint32_t[]){
0x0784101f, 0x00200000, 0x80010000, 0x00390028, /* not.u32 t4, void, void, t2 */
}))
},
- {"leadzero.u32", HWT_GC2000, CT_INT32_BCAST, i32_generate_values_h, i32_generate_values_v, (void*)leadzerou32_compute_cpu,
+ {"leadzero.u32", HWT_GC2000, CT_INT32_BCAST, i32_generate_values_h, NULL, (void*)leadzerou32_compute_cpu,
GPU_CODE(((uint32_t[]){
0x07841018, 0x00200000, 0x80010000, 0x00390028, /* leadzero.u32 t4, void, void, t2 */
}))
},
+ /** Conversion instructions - GC2000 */
+ {"f2i.s32", HWT_GC2000, CT_INT32_BCAST, i32_generate_values_h, NULL, (void*)f2i_s32_compute_cpu,
+ GPU_CODE(((uint32_t[]){
+ 0x0784102e, 0x39002800, 0x40000000, 0x00000000, /* f2i.s32 t4, t2, void, void */
+ })), {}
+ },
+ {"f2i.u32", HWT_GC2000, CT_INT32_BCAST, i32_generate_values_h, NULL, (void*)f2i_u32_compute_cpu,
+ GPU_CODE(((uint32_t[]){
+ 0x0784102e, 0x39202800, 0x80000000, 0x00000000, /* f2i.u32 t4, t2, void, void */
+ })), {}
+ },
+ /* Need to use "imprecise" float comparison here, as unlike on GC3000 the
+ output will, for some values, be off-by-one compared to ARM.
+ */
+ {"i2f.s32", HWT_GC2000, CT_FLOAT32_BCAST, i32_generate_values_h, NULL, (void*)i2f_s32_compute_cpu,
+ GPU_CODE(((uint32_t[]){
+ 0x0784102d, 0x39002800, 0x40000000, 0x00000000, /* i2f.s32 t4, t2, void, void */
+ })), {}
+ },
+ {"i2f.u32", HWT_GC2000, CT_FLOAT32_BCAST, i32_generate_values_h, NULL, (void*)i2f_u32_compute_cpu,
+ GPU_CODE(((uint32_t[]){
+ 0x0784102d, 0x39202800, 0x80000000, 0x00000000, /* i2f.u32 t4, t2, void, void */
+ })), {}
+ },
- /** GC3000 behavior of bitwise instructions */
+ /** GC3000 behavior of bitwise and some ALU instructions */
+ {"add.u32", HWT_GC3000, CT_INT32, i32_generate_values_h, i32_generate_values_v, (void*)addu32_4w_compute_cpu,
+ GPU_CODE(((uint32_t[]){
+ 0x07841001, 0x39202800, 0x80000000, 0x00390038, /* add.u32 t4, t2, void, t3 */
+ }))
+ },
{"lshift.u32", HWT_GC3000, CT_INT32, i32_generate_values_h4, i32_generate_values_v4, (void*)lshiftu32_4w_compute_cpu,
GPU_CODE(((uint32_t[]){
0x07841019, 0x39202800, 0x80010000, 0x00390038, /* lshift.u32 t4, t2, void, t3 */
@@ -446,19 +545,41 @@
0x0784101e, 0x39202800, 0x80010000, 0x00390038, /* xor.u32 t4, t2, void, t3 */
}))
},
- {"not.u32", HWT_GC3000, CT_INT32, i32_generate_values_h4, i32_generate_values_v4, (void*)notu32_4w_compute_cpu,
+ {"not.u32", HWT_GC3000, CT_INT32, i32_generate_values_h4, NULL, (void*)notu32_4w_compute_cpu,
GPU_CODE(((uint32_t[]){
0x0784101f, 0x00200000, 0x80010000, 0x00390028, /* not.u32 t4, void, void, t2 */
}))
},
- {"leadzero.u32", HWT_GC3000, CT_INT32, i32_generate_values_h4, i32_generate_values_v4, (void*)leadzerou32_4w_compute_cpu,
+ {"leadzero.u32", HWT_GC3000, CT_INT32, i32_generate_values_h4, NULL, (void*)leadzerou32_4w_compute_cpu,
GPU_CODE(((uint32_t[]){
0x07841018, 0x00200000, 0x80010000, 0x00390028, /* leadzero.u32 t4, void, void, t2 */
}))
},
+
+ /** Conversion instructions - GC3000 */
+ {"f2i.s32", HWT_GC3000, CT_INT32, i32_generate_values_h, NULL, (void*)f2i_s32_4w_compute_cpu,
+ GPU_CODE(((uint32_t[]){
+ 0x0784102e, 0x39002800, 0x40000000, 0x00000000, /* f2i.s32 t4, t2, void, void */
+ })), {}
+ },
+ {"f2i.u32", HWT_GC3000, CT_INT32, i32_generate_values_h, NULL, (void*)f2i_u32_4w_compute_cpu,
+ GPU_CODE(((uint32_t[]){
+ 0x0784102e, 0x39202800, 0x80000000, 0x00000000, /* f2i.u32 t4, t2, void, void */
+ })), {}
+ },
+ {"i2f.s32", HWT_GC3000, CT_INT32, i32_generate_values_h, NULL, (void*)i2f_s32_4w_compute_cpu,
+ GPU_CODE(((uint32_t[]){
+ 0x0784102d, 0x39002800, 0x40000000, 0x00000000, /* i2f.s32 t4, t2, void, void */
+ })), {}
+ },
+ {"i2f.u32", HWT_GC3000, CT_INT32, i32_generate_values_h, NULL, (void*)i2f_u32_4w_compute_cpu,
+ GPU_CODE(((uint32_t[]){
+ 0x0784102d, 0x39202800, 0x80000000, 0x00000000, /* i2f.u32 t4, t2, void, void */
+ })), {}
+ },
// add.u16 does nothing
- // 0x00801001, 0x15402800, 0xc0000000, 0x00000018, /* add.u16 t0.x___, t2.yyyy, void, t1.xxxx */
- // Need an effective way of comparing these
+
+ /** Float ALU instructions */
{"add.f32", HWT_ALL, CT_FLOAT32, i32_generate_values_h4, i32_generate_values_v4, (void*)addf32_compute_cpu,
GPU_CODE(((uint32_t[]){
0x07841001, 0x39002800, 0x00000000, 0x00390038, /* add t4, t2, void, t3 */
@@ -538,7 +659,8 @@
seedx = rand();
seedy = rand();
cur_test->generate_values_h(seedx, a_cpu, width);
- cur_test->generate_values_v(seedy, b_cpu, height);
+ if (cur_test->generate_values_v)
+ cur_test->generate_values_v(seedy, b_cpu, height);
cur_test->compute_cpu(out_cpu, a_cpu, b_cpu, width, height);
memset(etna_bo_map(bo_out), 0, out_size);
@@ -630,10 +752,19 @@
fprintf(stderr, "Do not know how to handle GPU model %08x\n", (uint32_t)val);
goto error;
}
+ /* TODO real argument parsing */
+ const char *only_test = NULL;
+ int reps = 100;
+ if (argc > 2) {
+ only_test = argv[2];
+ reps = 1000; /* do more rounds if running only one test */
+ }
for (unsigned t=0; t<ARRAY_SIZE(op_tests); ++t)
{
+ if (only_test && strcmp(only_test, op_tests[t].op_name))
+ continue;
if (op_tests[t].hardware_type & hwt) {
- perform_test(hwt, info, &op_tests[t], 100);
+ perform_test(hwt, info, &op_tests[t], reps);
} else {
printf("%s: (skipped)\n", op_tests[t].op_name);
}
diff --git a/src/float_helpers.h b/src/float_helpers.h
new file mode 100644
index 0000000..a453877
--- /dev/null
+++ b/src/float_helpers.h
@@ -0,0 +1,65 @@
+/* Floating point helpers - from Mesa u_math.h */
+#ifndef H_FLOAT_HELPERS
+#define H_FLOAT_HELPERS
+
+#include <stdbool.h>
+
+union fi {
+ float f;
+ int32_t i;
+ uint32_t ui;
+};
+
+/**
+ * Return float bits.
+ */
+static inline unsigned
+fui( float f )
+{
+ union fi fi;
+ fi.f = f;
+ return fi.ui;
+}
+
+static inline float
+uif(uint32_t ui)
+{
+ union fi fi;
+ fi.ui = ui;
+ return fi.f;
+}
+
+/**
+ * Single-float
+ */
+static inline bool
+util_is_inf_or_nan(float x)
+{
+ union fi tmp;
+ tmp.f = x;
+ return (tmp.ui & 0x7f800000) == 0x7f800000;
+}
+
+
+static inline bool
+util_is_nan(float x)
+{
+ union fi tmp;
+ tmp.f = x;
+ return (tmp.ui & 0x7fffffff) > 0x7f800000;
+}
+
+
+static inline int
+util_inf_sign(float x)
+{
+ union fi tmp;
+ tmp.f = x;
+ if ((tmp.ui & 0x7fffffff) != 0x7f800000) {
+ return 0;
+ }
+
+ return (x < 0) ? -1 : 1;
+}
+
+#endif