GC3000 support in etnaviv_verifyops
This makes the base framework run - some specific instructions do fail.
The significant differences with GC2000:
- CL seems to run on PS, not on VS
- PA/RA must be set up differently
- Use unified uniforms insted of VS uniforms
- Code must be in memory, not loaded into the GPU
- GC3000 has some extra state that must be set up
Signed-off-by: Wladimir J. van der Laan <laanwj@gmail.com>
diff --git a/src/drm_setup.c b/src/drm_setup.c
index 711799d..e232c3e 100644
--- a/src/drm_setup.c
+++ b/src/drm_setup.c
@@ -38,7 +38,7 @@
}
/* TODO: we assume that core 1 is a 3D+CL capable one.
- * This is pretty much only true for GC2000.
+ * This is pretty much only true for i.MX6q(p).
* If the tests don't work on your hardware check this carefully.
*/
info->gpu = etna_gpu_new(info->dev, 1);
diff --git a/src/etnaviv_verifyops.c b/src/etnaviv_verifyops.c
index 952edc3..77fd470 100644
--- a/src/etnaviv_verifyops.c
+++ b/src/etnaviv_verifyops.c
@@ -68,12 +68,18 @@
static const char *COMPS = "xyzw";
+typedef enum {
+ HWT_GC2000 = 1,
+ HWT_GC3000 = 2,
+} HardwareType;
+
#define MAX_INST 1024
-static void gen_cmd_stream(struct etna_cmd_stream *stream, struct gpu_code *gpu_code, struct etna_bo *out, struct etna_bo *in0, struct etna_bo *in1, uint32_t *auxin)
+static void gen_cmd_stream(HardwareType hwt, struct etna_cmd_stream *stream, struct gpu_code *gpu_code, struct etna_bo *bo_code, struct etna_bo *out, struct etna_bo *in0, struct etna_bo *in1, uint32_t *auxin)
{
unsigned num_inst;
uint32_t code[MAX_INST*4];
unsigned code_ptr = 0;
+ unsigned uniform_base = 0;
for (unsigned i=0; i<prelude.size; ++i)
code[code_ptr++] = prelude.code[i];
@@ -83,39 +89,56 @@
code[code_ptr++] = postlude.code[i];
assert((code_ptr & 3)==0);
num_inst = code_ptr / 4; /* number of instructions including final nop */
+ memcpy(etna_bo_map(bo_code), code, code_ptr * 4); /* upload for gc3000 */
etna_set_state(stream, VIVS_PA_SYSTEM_MODE, VIVS_PA_SYSTEM_MODE_UNK0 | VIVS_PA_SYSTEM_MODE_UNK4);
etna_set_state(stream, VIVS_GL_API_MODE, VIVS_GL_API_MODE_OPENCL);
- /* Need to write *something* to VS input registers before writing shader uniforms and code. Otherwise
- * the whole thing will hang when running this first after boot.
- */
- etna_set_state(stream, VIVS_VS_INPUT_COUNT, VIVS_VS_INPUT_COUNT_COUNT(1) | VIVS_VS_INPUT_COUNT_UNK8(31));
- etna_set_state(stream, VIVS_VS_INPUT(0), VIVS_VS_INPUT_I0(0) | VIVS_VS_INPUT_I1(1) | VIVS_VS_INPUT_I2(2) | VIVS_VS_INPUT_I3(3));
+ if (hwt == HWT_GC2000) {
+ /* Need to write *something* to VS input registers before writing shader uniforms and code. Otherwise
+ * the whole thing will hang when running this first after boot.
+ */
+ etna_set_state(stream, VIVS_VS_INPUT_COUNT, VIVS_VS_INPUT_COUNT_COUNT(1) | VIVS_VS_INPUT_COUNT_UNK8(31));
+ etna_set_state(stream, VIVS_VS_INPUT(0), VIVS_VS_INPUT_I0(0) | VIVS_VS_INPUT_I1(1) | VIVS_VS_INPUT_I2(2) | VIVS_VS_INPUT_I3(3));
+ }
- etna_set_state_from_bo(stream, VIVS_VS_UNIFORMS(0), out, ETNA_RELOC_WRITE); /* u0.x */
- etna_set_state_from_bo(stream, VIVS_VS_UNIFORMS(1), in0, ETNA_RELOC_READ); /* u0.y */
- etna_set_state_from_bo(stream, VIVS_VS_UNIFORMS(2), in1, ETNA_RELOC_READ); /* u0.z */
- etna_set_state(stream, VIVS_VS_UNIFORMS(3), 0x4); /* u0.w Left-shift */
- etna_set_state(stream, VIVS_VS_UNIFORMS(4), 0x10); /* u1.x Row stride */
- etna_set_state(stream, VIVS_VS_UNIFORMS(5), 0x0); /* u1.y Unused */
- etna_set_state(stream, VIVS_VS_UNIFORMS(6), 0x0); /* u1.z Unused */
- etna_set_state(stream, VIVS_VS_UNIFORMS(7), 0x0); /* u1.w Unused */
- etna_set_state(stream, VIVS_VS_UNIFORMS(8), 0xaaaaaaaa); /* u2.x Default output (if GPU program generates no output in t4) */
- etna_set_state(stream, VIVS_VS_UNIFORMS(9), 0x55555555); /* u2.y */
- etna_set_state(stream, VIVS_VS_UNIFORMS(10), 0xaaaaaaaa); /* u2.z */
- etna_set_state(stream, VIVS_VS_UNIFORMS(11), 0x55555555); /* u2.w */
- etna_set_state(stream, VIVS_VS_UNIFORMS(12), auxin[0]); /* u3.x Ancillary input for testing three-operand instructions */
- etna_set_state(stream, VIVS_VS_UNIFORMS(13), auxin[1]); /* u3.y */
- etna_set_state(stream, VIVS_VS_UNIFORMS(14), auxin[2]); /* u3.z */
- etna_set_state(stream, VIVS_VS_UNIFORMS(15), auxin[3]); /* u3.w */
+ if (hwt == HWT_GC3000) {
+ /* GC3000: unified uniforms, shader instructions in memory */
+ uniform_base = VIVS_SH_UNIFORMS(0);
+ etna_set_state_from_bo(stream, VIVS_PS_INST_ADDR, bo_code, ETNA_RELOC_READ);
- for (unsigned i=0; i<code_ptr; ++i)
- etna_set_state(stream, VIVS_SH_INST_MEM(i), code[i]);
+ } else if (hwt == HWT_GC2000) {
+ /* GC2000: VS uniforms, shader instructions on-chip */
+ uniform_base = VIVS_VS_UNIFORMS(0);
+ for (unsigned i=0; i<code_ptr; ++i)
+ etna_set_state(stream, VIVS_SH_INST_MEM(i), code[i]);
+ }
+ /* Set uniforms */
+ etna_set_state_from_bo(stream, uniform_base + 0*4, out, ETNA_RELOC_WRITE); /* u0.x */
+ etna_set_state_from_bo(stream, uniform_base + 1*4, in0, ETNA_RELOC_READ); /* u0.y */
+ etna_set_state_from_bo(stream, uniform_base + 2*4, in1, ETNA_RELOC_READ); /* u0.z */
+ etna_set_state(stream, uniform_base + 3*4, 0x4); /* u0.w Left-shift */
+ etna_set_state(stream, uniform_base + 4*4, 0x10); /* u1.x Row stride */
+ etna_set_state(stream, uniform_base + 5*4, 0x0); /* u1.y Unused */
+ etna_set_state(stream, uniform_base + 6*4, 0x0); /* u1.z Unused */
+ etna_set_state(stream, uniform_base + 7*4, 0x0); /* u1.w Unused */
+ etna_set_state(stream, uniform_base + 8*4, 0xaaaaaaaa); /* u2.x Default output (if GPU program generates no output in t4) */
+ etna_set_state(stream, uniform_base + 9*4, 0x55555555); /* u2.y */
+ etna_set_state(stream, uniform_base + 10*4, 0xaaaaaaaa); /* u2.z */
+ etna_set_state(stream, uniform_base + 11*4, 0x55555555); /* u2.w */
+ etna_set_state(stream, uniform_base + 12*4, auxin[0]); /* u3.x Ancillary input for testing three-operand instructions */
+ etna_set_state(stream, uniform_base + 13*4, auxin[1]); /* u3.y */
+ etna_set_state(stream, uniform_base + 14*4, auxin[2]); /* u3.z */
+ etna_set_state(stream, uniform_base + 15*4, auxin[3]); /* u3.w */
etna_set_state(stream, VIVS_VS_INPUT_COUNT, VIVS_VS_INPUT_COUNT_COUNT(1) | VIVS_VS_INPUT_COUNT_UNK8(1));
etna_set_state(stream, VIVS_VS_TEMP_REGISTER_CONTROL, VIVS_VS_TEMP_REGISTER_CONTROL_NUM_TEMPS(10));
etna_set_state(stream, VIVS_VS_OUTPUT(0), VIVS_VS_OUTPUT_O0(0) | VIVS_VS_OUTPUT_O1(0) | VIVS_VS_OUTPUT_O2(0) | VIVS_VS_OUTPUT_O3(0));
- etna_set_state(stream, VIVS_VS_NEW_UNK00860, 0x0);
+ /* Unknown state set differently for GC2000 and GC3000 */
+ if (hwt == HWT_GC3000) {
+ etna_set_state(stream, VIVS_VS_NEW_UNK00860, 0x1011); /* PS/VS units? */
+ } else if (hwt == HWT_GC2000) {
+ etna_set_state(stream, VIVS_VS_NEW_UNK00860, 0x0);
+ }
etna_set_state(stream, VIVS_VS_RANGE, VIVS_VS_RANGE_LOW(0x0) | VIVS_VS_RANGE_HIGH(num_inst - 2));
etna_set_state(stream, VIVS_VS_LOAD_BALANCING, VIVS_VS_LOAD_BALANCING_A(0x42) | VIVS_VS_LOAD_BALANCING_B(0x5) | VIVS_VS_LOAD_BALANCING_C(0x3f) | VIVS_VS_LOAD_BALANCING_D(0xf));
etna_set_state(stream, VIVS_VS_OUTPUT_COUNT, 1);
@@ -129,7 +152,16 @@
etna_set_state(stream, VIVS_PS_CONTROL, 0);
etna_set_state(stream, VIVS_PS_UNK01030, 0x0);
- etna_set_state(stream, VIVS_PA_ATTRIBUTE_ELEMENT_COUNT, VIVS_PA_ATTRIBUTE_ELEMENT_COUNT_UNK0(0x0) | VIVS_PA_ATTRIBUTE_ELEMENT_COUNT_COUNT(0x0));
+ if (hwt == HWT_GC3000) {
+ /* GC3000: Needs some PA state */
+ etna_set_state(stream, VIVS_PA_SHADER_ATTRIBUTES(0), VIVS_PA_SHADER_ATTRIBUTES_UNK4(0x0) | VIVS_PA_SHADER_ATTRIBUTES_UNK8(0x2));
+ etna_set_state(stream, VIVS_PA_ATTRIBUTE_ELEMENT_COUNT, VIVS_PA_ATTRIBUTE_ELEMENT_COUNT_UNK0(0x0) | VIVS_PA_ATTRIBUTE_ELEMENT_COUNT_COUNT(0x1));
+
+ } else if (hwt == HWT_GC2000) {
+
+ /* GC2000: Disable PA */
+ etna_set_state(stream, VIVS_PA_ATTRIBUTE_ELEMENT_COUNT, VIVS_PA_ATTRIBUTE_ELEMENT_COUNT_UNK0(0x0) | VIVS_PA_ATTRIBUTE_ELEMENT_COUNT_COUNT(0x0));
+ }
etna_set_state(stream, VIVS_CL_UNK00924, 0x0);
etna_set_state(stream, VIVS_CL_CONFIG, VIVS_CL_CONFIG_DIMENSIONS(0x2) | VIVS_CL_CONFIG_TRAVERSE_ORDER(0x0) | VIVS_CL_CONFIG_SWATH_SIZE_X(0x0) | VIVS_CL_CONFIG_SWATH_SIZE_Y(0x0) | VIVS_CL_CONFIG_SWATH_SIZE_Z(0x0) | VIVS_CL_CONFIG_VALUE_ORDER(0x3));
@@ -141,6 +173,27 @@
etna_set_state(stream, VIVS_CL_WORKGROUP_Z, VIVS_CL_WORKGROUP_Z_SIZE(0x3ff) | VIVS_CL_WORKGROUP_Z_COUNT(0xffff));
etna_set_state(stream, VIVS_CL_THREAD_ALLOCATION, 0x4);
+ if (hwt == HWT_GC3000) {
+
+ /* GC3000-only unknown state */
+ etna_set_state(stream, VIVS_RA_CONTROL, VIVS_RA_CONTROL_UNK0);
+ etna_set_state(stream, VIVS_PS_UNK01024, 0x0);
+ etna_set_state(stream, VIVS_VS_UNK00868, 0x21);
+ /* GC3000 uses the PS_RANGE instead of VS_RANGE for marking the CL shader instruction range */
+ etna_set_state(stream, VIVS_PS_RANGE, VIVS_PS_RANGE_LOW(0x0) | VIVS_PS_RANGE_HIGH(num_inst - 2));
+ /* GC3000: Needs PS output register */
+ etna_set_state(stream, VIVS_PS_OUTPUT_REG, 0x0);
+ /* Load balancing set differently for GC3000 */
+ etna_set_state(stream, VIVS_VS_LOAD_BALANCING, VIVS_VS_LOAD_BALANCING_A(0x0) | VIVS_VS_LOAD_BALANCING_B(0x0) | VIVS_VS_LOAD_BALANCING_C(0x3f) | VIVS_VS_LOAD_BALANCING_D(0xf));
+ /* GC3000: Extra registers that seem to mirror CL_GLOBAL and CL_WORKGROUP */
+ etna_set_state(stream, VIVS_CL_UNK00940, 0x1);
+ etna_set_state(stream, VIVS_CL_UNK00944, 0x1);
+ etna_set_state(stream, VIVS_CL_UNK00948, 0xffffffff);
+ etna_set_state(stream, VIVS_CL_UNK0094C, 0x7);
+ etna_set_state(stream, VIVS_CL_UNK00950, 0x7);
+ etna_set_state(stream, VIVS_CL_UNK00954, 0x3ff);
+ }
+
/* Kick off program */
etna_set_state(stream, VIVS_CL_KICKER, 0xbadabeeb);
/* Flush caches so that we can see the output */
@@ -340,19 +393,20 @@
return false;
}
-int perform_test(struct drm_test_info *info, struct op_test *cur_test, int repeats)
+int perform_test(HardwareType hwt, struct drm_test_info *info, struct op_test *cur_test, int repeats)
{
int retval = -1;
const size_t unit_size = 16; /* vec4 of any 32-bit type */
const size_t width = 16;
const size_t height = 16;
size_t seedx, seedy;
- struct etna_bo *bo_out=0, *bo_in0=0, *bo_in1=0;
+ struct etna_bo *bo_out=0, *bo_in0=0, *bo_in1=0, *bo_code=0;
unsigned int errors = 0;
size_t out_size = width * height * unit_size;
size_t in0_size = width * unit_size;
size_t in1_size = height * unit_size;
+ size_t max_code_size = MAX_INST * 16;
void *out_cpu = malloc(out_size);
void *a_cpu = malloc(in0_size);
@@ -367,6 +421,7 @@
bo_out = etna_bo_new(info->dev, out_size, DRM_ETNA_GEM_CACHE_UNCACHED);
bo_in0 = etna_bo_new(info->dev, in0_size, DRM_ETNA_GEM_CACHE_UNCACHED);
bo_in1 = etna_bo_new(info->dev, in1_size, DRM_ETNA_GEM_CACHE_UNCACHED);
+ bo_code = etna_bo_new(info->dev, max_code_size, DRM_ETNA_GEM_CACHE_UNCACHED);
if (!bo_in0 || !bo_in1 || !bo_out) {
fprintf(stderr, "Unable to allocate buffer\n");
goto out;
@@ -383,7 +438,7 @@
memcpy(etna_bo_map(bo_in1), b_cpu, in1_size);
/* generate command sequence */
- gen_cmd_stream(info->stream, &cur_test->gpu_code, bo_out, bo_in0, bo_in1, cur_test->auxin);
+ gen_cmd_stream(hwt, info->stream, &cur_test->gpu_code, bo_code, bo_out, bo_in0, bo_in1, cur_test->auxin);
/* execute command sequence */
etna_cmd_stream_finish(info->stream);
@@ -451,12 +506,25 @@
{
srand(time(NULL));
struct drm_test_info *info;
+ uint64_t val;
+ HardwareType hwt = HWT_GC2000;
if ((info = drm_test_setup(argc, argv)) == NULL) {
return 1;
}
+ if (etna_gpu_get_param(info->gpu, ETNA_GPU_MODEL, &val)) {
+ fprintf(stderr, "Could not get GPU model\n");
+ return 1;
+ }
+ switch (val) {
+ case 0x2000: printf(" Model: GC2000\n"); hwt = HWT_GC2000; break;
+ case 0x3000: printf(" Model: GC3000\n"); hwt = HWT_GC3000; break;
+ default:
+ fprintf(stderr, "Do not know how to handle GPU model %08x\n", (uint32_t)val);
+ return 1;
+ }
for (unsigned t=0; t<ARRAY_SIZE(op_tests); ++t)
{
- perform_test(info, &op_tests[t], 100);
+ perform_test(hwt, info, &op_tests[t], 100);
}
drm_test_teardown(info);