| /* |
| * Copyright 2022 Alyssa Rosenzweig |
| * Copyright 2021 Collabora, Ltd. |
| * SPDX-License-Identifier: MIT |
| */ |
| |
| #include "util/compiler.h" |
| #include "agx_compiler.h" |
| #include "agx_debug.h" |
| #include "agx_opcodes.h" |
| |
| /* Validatation doesn't make sense in release builds */ |
| #ifndef NDEBUG |
| |
| #define agx_validate_assert(stmt) \ |
| if (!(stmt)) { \ |
| return false; \ |
| } |
| |
| /* |
| * If a block contains phi nodes, they must come at the start of the block. If a |
| * block contains control flow, it must come at the beginning/end as applicable. |
| * Therefore the form of a valid block is: |
| * |
| * Control flow instructions (else) |
| * Phi nodes |
| * General instructions |
| * Control flow instructions (except else) |
| * |
| * Validate that this form is satisfied. |
| */ |
| enum agx_block_state { |
| AGX_BLOCK_STATE_CF_ELSE = 0, |
| AGX_BLOCK_STATE_PHI = 1, |
| AGX_BLOCK_STATE_BODY = 2, |
| AGX_BLOCK_STATE_CF = 3 |
| }; |
| |
| static bool |
| agx_validate_block_form(agx_block *block) |
| { |
| enum agx_block_state state = AGX_BLOCK_STATE_CF_ELSE; |
| |
| agx_foreach_instr_in_block(block, I) { |
| switch (I->op) { |
| case AGX_OPCODE_PRELOAD: |
| case AGX_OPCODE_ELSE_ICMP: |
| case AGX_OPCODE_ELSE_FCMP: |
| agx_validate_assert(state == AGX_BLOCK_STATE_CF_ELSE); |
| break; |
| |
| case AGX_OPCODE_PHI: |
| agx_validate_assert(state == AGX_BLOCK_STATE_CF_ELSE || |
| state == AGX_BLOCK_STATE_PHI); |
| |
| state = AGX_BLOCK_STATE_PHI; |
| break; |
| |
| case AGX_OPCODE_EXPORT: |
| agx_validate_assert(agx_num_successors(block) == 0); |
| state = AGX_BLOCK_STATE_CF; |
| break; |
| |
| default: |
| if (instr_after_logical_end(I)) { |
| state = AGX_BLOCK_STATE_CF; |
| } else { |
| agx_validate_assert(state != AGX_BLOCK_STATE_CF); |
| state = AGX_BLOCK_STATE_BODY; |
| } |
| break; |
| } |
| } |
| |
| return true; |
| } |
| |
| /* |
| * Only moves and phis use stack. Phis cannot use moves due to their |
| * parallel nature, so we allow phis to take memory, later lowered to moves. |
| */ |
| static bool |
| is_stack_valid(agx_instr *I) |
| { |
| return (I->op == AGX_OPCODE_MOV) || (I->op == AGX_OPCODE_PHI); |
| } |
| |
| static bool |
| agx_validate_sources(agx_instr *I) |
| { |
| agx_foreach_src(I, s) { |
| agx_index src = I->src[s]; |
| |
| if (src.type == AGX_INDEX_IMMEDIATE) { |
| agx_validate_assert(!src.kill); |
| agx_validate_assert(!src.cache); |
| agx_validate_assert(!src.discard); |
| |
| bool ldst = agx_allows_16bit_immediate(I); |
| |
| /* Immediates are encoded as 8-bit (16-bit for memory load/store). For |
| * integers, they extend to 16-bit. For floating point, they are 8-bit |
| * minifloats. The 8-bit minifloats are a strict subset of 16-bit |
| * standard floats, so we treat them as such in the IR, with an |
| * implicit f16->f32 for 32-bit floating point operations. |
| */ |
| agx_validate_assert(src.size == AGX_SIZE_16); |
| agx_validate_assert(src.value < (1 << (ldst ? 16 : 8))); |
| } else if (I->op == AGX_OPCODE_COLLECT && !agx_is_null(src)) { |
| agx_validate_assert(src.size == I->src[0].size); |
| } else if (I->op == AGX_OPCODE_PHI) { |
| agx_validate_assert(src.size == I->dest[0].size); |
| agx_validate_assert(!agx_is_null(src)); |
| } |
| |
| agx_validate_assert(!src.memory || is_stack_valid(I)); |
| } |
| |
| return true; |
| } |
| |
| static bool |
| agx_validate_defs(agx_instr *I, BITSET_WORD *defs) |
| { |
| agx_foreach_ssa_src(I, s) { |
| /* Skip phis, they're special in loop headers */ |
| if (I->op == AGX_OPCODE_PHI) |
| break; |
| |
| /* Sources must be defined before their use */ |
| if (!BITSET_TEST(defs, I->src[s].value)) |
| return false; |
| } |
| |
| agx_foreach_ssa_dest(I, d) { |
| /* Static single assignment */ |
| if (BITSET_TEST(defs, I->dest[d].value)) |
| return false; |
| |
| BITSET_SET(defs, I->dest[d].value); |
| |
| if (I->dest[d].memory && !is_stack_valid(I)) |
| return false; |
| } |
| |
| return true; |
| } |
| |
| /** Returns number of registers written by an instruction */ |
| static unsigned |
| agx_write_registers(const agx_instr *I, unsigned d) |
| { |
| unsigned size = agx_size_align_16(I->dest[d].size); |
| |
| switch (I->op) { |
| case AGX_OPCODE_MOV: |
| case AGX_OPCODE_PHI: |
| /* Tautological */ |
| return agx_index_size_16(I->dest[d]); |
| |
| case AGX_OPCODE_ITER: |
| case AGX_OPCODE_ITERPROJ: |
| assert(1 <= I->channels && I->channels <= 4); |
| return I->channels * size; |
| |
| case AGX_OPCODE_IMAGE_LOAD: |
| case AGX_OPCODE_TEXTURE_LOAD: |
| case AGX_OPCODE_TEXTURE_SAMPLE: |
| /* Even when masked out, these clobber 4 registers. |
| * |
| * TODO: Figure out the sparse interaction. |
| */ |
| return (I->sparse ? 8 : 4) * size; |
| |
| case AGX_OPCODE_DEVICE_LOAD: |
| case AGX_OPCODE_LOCAL_LOAD: |
| case AGX_OPCODE_STACK_LOAD: |
| case AGX_OPCODE_LD_TILE: |
| /* Can write 16-bit or 32-bit. Anything logically 64-bit is already |
| * expanded to 32-bit in the mask. |
| */ |
| return util_bitcount(I->mask) * MIN2(size, 2); |
| |
| case AGX_OPCODE_LDCF: |
| return 6; |
| case AGX_OPCODE_COLLECT: |
| return I->nr_srcs * agx_size_align_16(I->src[0].size); |
| default: |
| return size; |
| } |
| } |
| |
| struct dim_info { |
| unsigned comps; |
| bool array; |
| }; |
| |
| static struct dim_info |
| agx_dim_info(enum agx_dim dim) |
| { |
| switch (dim) { |
| case AGX_DIM_1D: |
| return (struct dim_info){1, false}; |
| case AGX_DIM_1D_ARRAY: |
| return (struct dim_info){1, true}; |
| case AGX_DIM_2D: |
| return (struct dim_info){2, false}; |
| case AGX_DIM_2D_ARRAY: |
| return (struct dim_info){2, true}; |
| case AGX_DIM_2D_MS: |
| return (struct dim_info){3, false}; |
| case AGX_DIM_3D: |
| return (struct dim_info){3, false}; |
| case AGX_DIM_CUBE: |
| return (struct dim_info){3, false}; |
| case AGX_DIM_CUBE_ARRAY: |
| return (struct dim_info){3, true}; |
| case AGX_DIM_2D_MS_ARRAY: |
| return (struct dim_info){2, true}; |
| default: |
| unreachable("invalid dim"); |
| } |
| } |
| |
| /* |
| * Return number of registers required for coordinates for a texture/image |
| * instruction. We handle layer + sample index as 32-bit even when only the |
| * lower 16-bits are present. LOD queries do not take a layer. |
| */ |
| static unsigned |
| agx_coordinate_registers(const agx_instr *I) |
| { |
| struct dim_info dim = agx_dim_info(I->dim); |
| bool has_array = !I->query_lod; |
| |
| return 2 * (dim.comps + (has_array && dim.array)); |
| } |
| |
| static unsigned |
| agx_read_registers(const agx_instr *I, unsigned s) |
| { |
| unsigned size = agx_size_align_16(I->src[s].size); |
| |
| switch (I->op) { |
| case AGX_OPCODE_MOV: |
| case AGX_OPCODE_EXPORT: |
| /* Tautological */ |
| return agx_index_size_16(I->src[0]); |
| |
| case AGX_OPCODE_PHI: |
| if (I->src[s].type == AGX_INDEX_IMMEDIATE) |
| return size; |
| else |
| return agx_index_size_16(I->dest[0]); |
| |
| case AGX_OPCODE_SPLIT: |
| return I->nr_dests * agx_size_align_16(agx_split_width(I)); |
| |
| case AGX_OPCODE_UNIFORM_STORE: |
| if (s == 0) |
| return util_bitcount(I->mask) * size; |
| else |
| return size; |
| |
| case AGX_OPCODE_DEVICE_STORE: |
| case AGX_OPCODE_LOCAL_STORE: |
| case AGX_OPCODE_STACK_STORE: |
| case AGX_OPCODE_ST_TILE: |
| /* See agx_write_registers */ |
| if (s == 0) |
| return util_bitcount(I->mask) * MIN2(size, 2); |
| else if (s == 2 && I->explicit_coords) |
| return 2; |
| else |
| return size; |
| |
| case AGX_OPCODE_ZS_EMIT: |
| if (s == 1) { |
| /* Depth (bit 0) is fp32, stencil (bit 1) is u16 in the hw but we pad |
| * up to u32 for simplicity |
| */ |
| bool z = !!(I->zs & 1); |
| bool s = !!(I->zs & 2); |
| assert(z || s); |
| |
| return (z && s) ? 4 : z ? 2 : 1; |
| } else { |
| return 1; |
| } |
| |
| case AGX_OPCODE_IMAGE_WRITE: |
| if (s == 0) |
| return 4 * size /* data */; |
| else if (s == 1) |
| return agx_coordinate_registers(I); |
| else |
| return size; |
| |
| case AGX_OPCODE_IMAGE_LOAD: |
| case AGX_OPCODE_TEXTURE_LOAD: |
| case AGX_OPCODE_TEXTURE_SAMPLE: |
| if (s == 0) { |
| return agx_coordinate_registers(I); |
| } else if (s == 1) { |
| /* LOD */ |
| if (I->lod_mode == AGX_LOD_MODE_LOD_GRAD || |
| I->lod_mode == AGX_LOD_MODE_LOD_GRAD_MIN) { |
| |
| /* Technically only 16-bit but we model as 32-bit to keep the IR |
| * simple, since the gradient is otherwise 32-bit. |
| */ |
| unsigned min = I->lod_mode == AGX_LOD_MODE_LOD_GRAD_MIN ? 2 : 0; |
| |
| switch (I->dim) { |
| case AGX_DIM_1D: |
| case AGX_DIM_1D_ARRAY: |
| return (2 * 2 * 1) + min; |
| case AGX_DIM_2D: |
| case AGX_DIM_2D_ARRAY: |
| case AGX_DIM_2D_MS_ARRAY: |
| case AGX_DIM_2D_MS: |
| return (2 * 2 * 2) + min; |
| case AGX_DIM_CUBE: |
| case AGX_DIM_CUBE_ARRAY: |
| case AGX_DIM_3D: |
| return (2 * 2 * 3) + min; |
| } |
| |
| unreachable("Invalid texture dimension"); |
| } else if (I->lod_mode == AGX_LOD_MODE_AUTO_LOD_BIAS_MIN) { |
| return 2; |
| } else { |
| return 1; |
| } |
| } else if (s == 5) { |
| /* Compare/offset */ |
| return 2 * ((!!I->shadow) + (!!I->offset)); |
| } else { |
| return size; |
| } |
| |
| case AGX_OPCODE_BLOCK_IMAGE_STORE: |
| if (s == 3 && I->explicit_coords) |
| return agx_coordinate_registers(I); |
| else |
| return size; |
| |
| case AGX_OPCODE_ATOMIC: |
| case AGX_OPCODE_LOCAL_ATOMIC: |
| if (s == 0 && I->atomic_opc == AGX_ATOMIC_OPC_CMPXCHG) |
| return size * 2; |
| else |
| return size; |
| |
| default: |
| return size; |
| } |
| } |
| |
| /* Type check the dimensionality of sources and destinations. */ |
| static bool |
| agx_validate_width(agx_context *ctx) |
| { |
| bool succ = true; |
| enum agx_size *sizes = calloc(ctx->alloc, sizeof(*sizes)); |
| |
| agx_foreach_instr_global(ctx, I) { |
| agx_foreach_dest(I, d) { |
| unsigned exp = agx_write_registers(I, d); |
| unsigned act = |
| agx_channels(I->dest[d]) * agx_size_align_16(I->dest[d].size); |
| |
| if (exp != act) { |
| succ = false; |
| fprintf(stderr, "destination %u, expected width %u, got width %u\n", |
| d, exp, act); |
| agx_print_instr(I, stderr); |
| fprintf(stderr, "\n"); |
| } |
| |
| if (I->dest[d].type == AGX_INDEX_NORMAL) |
| sizes[I->dest[d].value] = I->dest[d].size; |
| } |
| |
| agx_foreach_src(I, s) { |
| if (I->src[s].type == AGX_INDEX_NULL) |
| continue; |
| |
| unsigned exp = agx_read_registers(I, s); |
| unsigned act = |
| agx_channels(I->src[s]) * agx_size_align_16(I->src[s].size); |
| |
| if (exp != act) { |
| succ = false; |
| fprintf(stderr, "source %u, expected width %u, got width %u\n", s, |
| exp, act); |
| agx_print_instr(I, stderr); |
| fprintf(stderr, "\n"); |
| } |
| } |
| } |
| |
| /* Check sources after all defs processed for proper backedge handling */ |
| agx_foreach_instr_global(ctx, I) { |
| agx_foreach_ssa_src(I, s) { |
| if (sizes[I->src[s].value] != I->src[s].size) { |
| succ = false; |
| fprintf(stderr, "source %u, expected el size %u, got el size %u\n", |
| s, agx_size_align_16(sizes[I->src[s].value]), |
| agx_size_align_16(I->src[s].size)); |
| agx_print_instr(I, stderr); |
| fprintf(stderr, "\n"); |
| } |
| } |
| } |
| |
| free(sizes); |
| return succ; |
| } |
| |
| static bool |
| agx_validate_predecessors(agx_block *block) |
| { |
| /* Loop headers (only) have predecessors that are later in source form */ |
| bool has_later_preds = false; |
| |
| agx_foreach_predecessor(block, pred) { |
| if ((*pred)->index >= block->index) |
| has_later_preds = true; |
| } |
| |
| if (has_later_preds && !block->loop_header) |
| return false; |
| |
| /* Successors and predecessors are found together */ |
| agx_foreach_predecessor(block, pred) { |
| bool found = false; |
| |
| agx_foreach_successor((*pred), succ) { |
| if (succ == block) |
| found = true; |
| } |
| |
| if (!found) |
| return false; |
| } |
| |
| return true; |
| } |
| |
| static bool |
| agx_validate_sr(const agx_instr *I) |
| { |
| bool none = (I->op == AGX_OPCODE_GET_SR); |
| bool coverage = (I->op == AGX_OPCODE_GET_SR_COVERAGE); |
| bool barrier = (I->op == AGX_OPCODE_GET_SR_BARRIER); |
| |
| /* Filter get_sr instructions */ |
| if (!(none || coverage || barrier)) |
| return true; |
| |
| switch (I->sr) { |
| case AGX_SR_ACTIVE_THREAD_INDEX_IN_QUAD: |
| case AGX_SR_ACTIVE_THREAD_INDEX_IN_SUBGROUP: |
| case AGX_SR_TOTAL_ACTIVE_THREADS_IN_QUAD: |
| case AGX_SR_TOTAL_ACTIVE_THREADS_IN_SUBGROUP: |
| case AGX_SR_COVERAGE_MASK: |
| case AGX_SR_IS_ACTIVE_THREAD: |
| return coverage; |
| |
| case AGX_SR_HELPER_OP: |
| case AGX_SR_HELPER_ARG_L: |
| case AGX_SR_HELPER_ARG_H: |
| return barrier; |
| |
| default: |
| return none; |
| } |
| } |
| |
| void |
| agx_validate(agx_context *ctx, const char *after) |
| { |
| bool fail = false; |
| |
| if (agx_compiler_debug & AGX_DBG_NOVALIDATE) |
| return; |
| |
| int last_index = -1; |
| |
| agx_foreach_block(ctx, block) { |
| if ((int)block->index < last_index) { |
| fprintf(stderr, "Out-of-order block index %d vs %d after %s\n", |
| block->index, last_index, after); |
| agx_print_block(block, stderr); |
| fail = true; |
| } |
| |
| last_index = block->index; |
| |
| if (!agx_validate_block_form(block)) { |
| fprintf(stderr, "Invalid block form after %s\n", after); |
| agx_print_block(block, stderr); |
| fail = true; |
| } |
| |
| if (!agx_validate_predecessors(block)) { |
| fprintf(stderr, "Invalid loop header flag after %s\n", after); |
| agx_print_block(block, stderr); |
| fail = true; |
| } |
| } |
| |
| { |
| BITSET_WORD *defs = calloc(sizeof(BITSET_WORD), BITSET_WORDS(ctx->alloc)); |
| |
| agx_foreach_instr_global(ctx, I) { |
| if (!agx_validate_defs(I, defs)) { |
| fprintf(stderr, "Invalid defs after %s\n", after); |
| agx_print_instr(I, stderr); |
| fail = true; |
| } |
| } |
| |
| /* agx_validate_defs skips phi sources, so validate them now */ |
| agx_foreach_block(ctx, block) { |
| agx_foreach_phi_in_block(block, phi) { |
| agx_foreach_ssa_src(phi, s) { |
| if (!BITSET_TEST(defs, phi->src[s].value)) { |
| fprintf(stderr, "Undefined phi source %u after %s\n", |
| phi->src[s].value, after); |
| agx_print_instr(phi, stderr); |
| fail = true; |
| } |
| } |
| } |
| } |
| |
| free(defs); |
| } |
| |
| agx_foreach_instr_global(ctx, I) { |
| if (!agx_validate_sources(I)) { |
| fprintf(stderr, "Invalid sources form after %s\n", after); |
| agx_print_instr(I, stderr); |
| fail = true; |
| } |
| |
| if (!agx_validate_sr(I)) { |
| fprintf(stderr, "Invalid SR after %s\n", after); |
| agx_print_instr(I, stdout); |
| fail = true; |
| } |
| } |
| |
| if (!agx_validate_width(ctx)) { |
| fprintf(stderr, "Invalid vectors after %s\n", after); |
| fail = true; |
| } |
| |
| if (fail) { |
| agx_print_shader(ctx, stderr); |
| exit(1); |
| } |
| } |
| |
| #endif /* NDEBUG */ |