blob: 7bf697f1a597e767d118fb5e2d6c0d9b1504ae40 [file]
/*
* Copyright 2022 Alyssa Rosenzweig
* Copyright 2021 Collabora, Ltd.
* SPDX-License-Identifier: MIT
*/
#include "util/compiler.h"
#include "agx_compiler.h"
#include "agx_debug.h"
#include "agx_opcodes.h"
/* Validatation doesn't make sense in release builds */
#ifndef NDEBUG
#define agx_validate_assert(stmt) \
if (!(stmt)) { \
return false; \
}
/*
* If a block contains phi nodes, they must come at the start of the block. If a
* block contains control flow, it must come at the beginning/end as applicable.
* Therefore the form of a valid block is:
*
* Control flow instructions (else)
* Phi nodes
* General instructions
* Control flow instructions (except else)
*
* Validate that this form is satisfied.
*/
enum agx_block_state {
AGX_BLOCK_STATE_CF_ELSE = 0,
AGX_BLOCK_STATE_PHI = 1,
AGX_BLOCK_STATE_BODY = 2,
AGX_BLOCK_STATE_CF = 3
};
static bool
agx_validate_block_form(agx_block *block)
{
enum agx_block_state state = AGX_BLOCK_STATE_CF_ELSE;
agx_foreach_instr_in_block(block, I) {
switch (I->op) {
case AGX_OPCODE_PRELOAD:
case AGX_OPCODE_ELSE_ICMP:
case AGX_OPCODE_ELSE_FCMP:
agx_validate_assert(state == AGX_BLOCK_STATE_CF_ELSE);
break;
case AGX_OPCODE_PHI:
agx_validate_assert(state == AGX_BLOCK_STATE_CF_ELSE ||
state == AGX_BLOCK_STATE_PHI);
state = AGX_BLOCK_STATE_PHI;
break;
case AGX_OPCODE_EXPORT:
agx_validate_assert(agx_num_successors(block) == 0);
state = AGX_BLOCK_STATE_CF;
break;
default:
if (instr_after_logical_end(I)) {
state = AGX_BLOCK_STATE_CF;
} else {
agx_validate_assert(state != AGX_BLOCK_STATE_CF);
state = AGX_BLOCK_STATE_BODY;
}
break;
}
}
return true;
}
/*
* Only moves and phis use stack. Phis cannot use moves due to their
* parallel nature, so we allow phis to take memory, later lowered to moves.
*/
static bool
is_stack_valid(agx_instr *I)
{
return (I->op == AGX_OPCODE_MOV) || (I->op == AGX_OPCODE_PHI);
}
static bool
agx_validate_sources(agx_instr *I)
{
agx_foreach_src(I, s) {
agx_index src = I->src[s];
if (src.type == AGX_INDEX_IMMEDIATE) {
agx_validate_assert(!src.kill);
agx_validate_assert(!src.cache);
agx_validate_assert(!src.discard);
bool ldst = agx_allows_16bit_immediate(I);
/* Immediates are encoded as 8-bit (16-bit for memory load/store). For
* integers, they extend to 16-bit. For floating point, they are 8-bit
* minifloats. The 8-bit minifloats are a strict subset of 16-bit
* standard floats, so we treat them as such in the IR, with an
* implicit f16->f32 for 32-bit floating point operations.
*/
agx_validate_assert(src.size == AGX_SIZE_16);
agx_validate_assert(src.value < (1 << (ldst ? 16 : 8)));
} else if (I->op == AGX_OPCODE_COLLECT && !agx_is_null(src)) {
agx_validate_assert(src.size == I->src[0].size);
} else if (I->op == AGX_OPCODE_PHI) {
agx_validate_assert(src.size == I->dest[0].size);
agx_validate_assert(!agx_is_null(src));
}
agx_validate_assert(!src.memory || is_stack_valid(I));
}
return true;
}
static bool
agx_validate_defs(agx_instr *I, BITSET_WORD *defs)
{
agx_foreach_ssa_src(I, s) {
/* Skip phis, they're special in loop headers */
if (I->op == AGX_OPCODE_PHI)
break;
/* Sources must be defined before their use */
if (!BITSET_TEST(defs, I->src[s].value))
return false;
}
agx_foreach_ssa_dest(I, d) {
/* Static single assignment */
if (BITSET_TEST(defs, I->dest[d].value))
return false;
BITSET_SET(defs, I->dest[d].value);
if (I->dest[d].memory && !is_stack_valid(I))
return false;
}
return true;
}
/** Returns number of registers written by an instruction */
static unsigned
agx_write_registers(const agx_instr *I, unsigned d)
{
unsigned size = agx_size_align_16(I->dest[d].size);
switch (I->op) {
case AGX_OPCODE_MOV:
case AGX_OPCODE_PHI:
/* Tautological */
return agx_index_size_16(I->dest[d]);
case AGX_OPCODE_ITER:
case AGX_OPCODE_ITERPROJ:
assert(1 <= I->channels && I->channels <= 4);
return I->channels * size;
case AGX_OPCODE_IMAGE_LOAD:
case AGX_OPCODE_TEXTURE_LOAD:
case AGX_OPCODE_TEXTURE_SAMPLE:
/* Even when masked out, these clobber 4 registers.
*
* TODO: Figure out the sparse interaction.
*/
return (I->sparse ? 8 : 4) * size;
case AGX_OPCODE_DEVICE_LOAD:
case AGX_OPCODE_LOCAL_LOAD:
case AGX_OPCODE_STACK_LOAD:
case AGX_OPCODE_LD_TILE:
/* Can write 16-bit or 32-bit. Anything logically 64-bit is already
* expanded to 32-bit in the mask.
*/
return util_bitcount(I->mask) * MIN2(size, 2);
case AGX_OPCODE_LDCF:
return 6;
case AGX_OPCODE_COLLECT:
return I->nr_srcs * agx_size_align_16(I->src[0].size);
default:
return size;
}
}
struct dim_info {
unsigned comps;
bool array;
};
static struct dim_info
agx_dim_info(enum agx_dim dim)
{
switch (dim) {
case AGX_DIM_1D:
return (struct dim_info){1, false};
case AGX_DIM_1D_ARRAY:
return (struct dim_info){1, true};
case AGX_DIM_2D:
return (struct dim_info){2, false};
case AGX_DIM_2D_ARRAY:
return (struct dim_info){2, true};
case AGX_DIM_2D_MS:
return (struct dim_info){3, false};
case AGX_DIM_3D:
return (struct dim_info){3, false};
case AGX_DIM_CUBE:
return (struct dim_info){3, false};
case AGX_DIM_CUBE_ARRAY:
return (struct dim_info){3, true};
case AGX_DIM_2D_MS_ARRAY:
return (struct dim_info){2, true};
default:
unreachable("invalid dim");
}
}
/*
* Return number of registers required for coordinates for a texture/image
* instruction. We handle layer + sample index as 32-bit even when only the
* lower 16-bits are present. LOD queries do not take a layer.
*/
static unsigned
agx_coordinate_registers(const agx_instr *I)
{
struct dim_info dim = agx_dim_info(I->dim);
bool has_array = !I->query_lod;
return 2 * (dim.comps + (has_array && dim.array));
}
static unsigned
agx_read_registers(const agx_instr *I, unsigned s)
{
unsigned size = agx_size_align_16(I->src[s].size);
switch (I->op) {
case AGX_OPCODE_MOV:
case AGX_OPCODE_EXPORT:
/* Tautological */
return agx_index_size_16(I->src[0]);
case AGX_OPCODE_PHI:
if (I->src[s].type == AGX_INDEX_IMMEDIATE)
return size;
else
return agx_index_size_16(I->dest[0]);
case AGX_OPCODE_SPLIT:
return I->nr_dests * agx_size_align_16(agx_split_width(I));
case AGX_OPCODE_UNIFORM_STORE:
if (s == 0)
return util_bitcount(I->mask) * size;
else
return size;
case AGX_OPCODE_DEVICE_STORE:
case AGX_OPCODE_LOCAL_STORE:
case AGX_OPCODE_STACK_STORE:
case AGX_OPCODE_ST_TILE:
/* See agx_write_registers */
if (s == 0)
return util_bitcount(I->mask) * MIN2(size, 2);
else if (s == 2 && I->explicit_coords)
return 2;
else
return size;
case AGX_OPCODE_ZS_EMIT:
if (s == 1) {
/* Depth (bit 0) is fp32, stencil (bit 1) is u16 in the hw but we pad
* up to u32 for simplicity
*/
bool z = !!(I->zs & 1);
bool s = !!(I->zs & 2);
assert(z || s);
return (z && s) ? 4 : z ? 2 : 1;
} else {
return 1;
}
case AGX_OPCODE_IMAGE_WRITE:
if (s == 0)
return 4 * size /* data */;
else if (s == 1)
return agx_coordinate_registers(I);
else
return size;
case AGX_OPCODE_IMAGE_LOAD:
case AGX_OPCODE_TEXTURE_LOAD:
case AGX_OPCODE_TEXTURE_SAMPLE:
if (s == 0) {
return agx_coordinate_registers(I);
} else if (s == 1) {
/* LOD */
if (I->lod_mode == AGX_LOD_MODE_LOD_GRAD ||
I->lod_mode == AGX_LOD_MODE_LOD_GRAD_MIN) {
/* Technically only 16-bit but we model as 32-bit to keep the IR
* simple, since the gradient is otherwise 32-bit.
*/
unsigned min = I->lod_mode == AGX_LOD_MODE_LOD_GRAD_MIN ? 2 : 0;
switch (I->dim) {
case AGX_DIM_1D:
case AGX_DIM_1D_ARRAY:
return (2 * 2 * 1) + min;
case AGX_DIM_2D:
case AGX_DIM_2D_ARRAY:
case AGX_DIM_2D_MS_ARRAY:
case AGX_DIM_2D_MS:
return (2 * 2 * 2) + min;
case AGX_DIM_CUBE:
case AGX_DIM_CUBE_ARRAY:
case AGX_DIM_3D:
return (2 * 2 * 3) + min;
}
unreachable("Invalid texture dimension");
} else if (I->lod_mode == AGX_LOD_MODE_AUTO_LOD_BIAS_MIN) {
return 2;
} else {
return 1;
}
} else if (s == 5) {
/* Compare/offset */
return 2 * ((!!I->shadow) + (!!I->offset));
} else {
return size;
}
case AGX_OPCODE_BLOCK_IMAGE_STORE:
if (s == 3 && I->explicit_coords)
return agx_coordinate_registers(I);
else
return size;
case AGX_OPCODE_ATOMIC:
case AGX_OPCODE_LOCAL_ATOMIC:
if (s == 0 && I->atomic_opc == AGX_ATOMIC_OPC_CMPXCHG)
return size * 2;
else
return size;
default:
return size;
}
}
/* Type check the dimensionality of sources and destinations. */
static bool
agx_validate_width(agx_context *ctx)
{
bool succ = true;
enum agx_size *sizes = calloc(ctx->alloc, sizeof(*sizes));
agx_foreach_instr_global(ctx, I) {
agx_foreach_dest(I, d) {
unsigned exp = agx_write_registers(I, d);
unsigned act =
agx_channels(I->dest[d]) * agx_size_align_16(I->dest[d].size);
if (exp != act) {
succ = false;
fprintf(stderr, "destination %u, expected width %u, got width %u\n",
d, exp, act);
agx_print_instr(I, stderr);
fprintf(stderr, "\n");
}
if (I->dest[d].type == AGX_INDEX_NORMAL)
sizes[I->dest[d].value] = I->dest[d].size;
}
agx_foreach_src(I, s) {
if (I->src[s].type == AGX_INDEX_NULL)
continue;
unsigned exp = agx_read_registers(I, s);
unsigned act =
agx_channels(I->src[s]) * agx_size_align_16(I->src[s].size);
if (exp != act) {
succ = false;
fprintf(stderr, "source %u, expected width %u, got width %u\n", s,
exp, act);
agx_print_instr(I, stderr);
fprintf(stderr, "\n");
}
}
}
/* Check sources after all defs processed for proper backedge handling */
agx_foreach_instr_global(ctx, I) {
agx_foreach_ssa_src(I, s) {
if (sizes[I->src[s].value] != I->src[s].size) {
succ = false;
fprintf(stderr, "source %u, expected el size %u, got el size %u\n",
s, agx_size_align_16(sizes[I->src[s].value]),
agx_size_align_16(I->src[s].size));
agx_print_instr(I, stderr);
fprintf(stderr, "\n");
}
}
}
free(sizes);
return succ;
}
static bool
agx_validate_predecessors(agx_block *block)
{
/* Loop headers (only) have predecessors that are later in source form */
bool has_later_preds = false;
agx_foreach_predecessor(block, pred) {
if ((*pred)->index >= block->index)
has_later_preds = true;
}
if (has_later_preds && !block->loop_header)
return false;
/* Successors and predecessors are found together */
agx_foreach_predecessor(block, pred) {
bool found = false;
agx_foreach_successor((*pred), succ) {
if (succ == block)
found = true;
}
if (!found)
return false;
}
return true;
}
static bool
agx_validate_sr(const agx_instr *I)
{
bool none = (I->op == AGX_OPCODE_GET_SR);
bool coverage = (I->op == AGX_OPCODE_GET_SR_COVERAGE);
bool barrier = (I->op == AGX_OPCODE_GET_SR_BARRIER);
/* Filter get_sr instructions */
if (!(none || coverage || barrier))
return true;
switch (I->sr) {
case AGX_SR_ACTIVE_THREAD_INDEX_IN_QUAD:
case AGX_SR_ACTIVE_THREAD_INDEX_IN_SUBGROUP:
case AGX_SR_TOTAL_ACTIVE_THREADS_IN_QUAD:
case AGX_SR_TOTAL_ACTIVE_THREADS_IN_SUBGROUP:
case AGX_SR_COVERAGE_MASK:
case AGX_SR_IS_ACTIVE_THREAD:
return coverage;
case AGX_SR_HELPER_OP:
case AGX_SR_HELPER_ARG_L:
case AGX_SR_HELPER_ARG_H:
return barrier;
default:
return none;
}
}
void
agx_validate(agx_context *ctx, const char *after)
{
bool fail = false;
if (agx_compiler_debug & AGX_DBG_NOVALIDATE)
return;
int last_index = -1;
agx_foreach_block(ctx, block) {
if ((int)block->index < last_index) {
fprintf(stderr, "Out-of-order block index %d vs %d after %s\n",
block->index, last_index, after);
agx_print_block(block, stderr);
fail = true;
}
last_index = block->index;
if (!agx_validate_block_form(block)) {
fprintf(stderr, "Invalid block form after %s\n", after);
agx_print_block(block, stderr);
fail = true;
}
if (!agx_validate_predecessors(block)) {
fprintf(stderr, "Invalid loop header flag after %s\n", after);
agx_print_block(block, stderr);
fail = true;
}
}
{
BITSET_WORD *defs = calloc(sizeof(BITSET_WORD), BITSET_WORDS(ctx->alloc));
agx_foreach_instr_global(ctx, I) {
if (!agx_validate_defs(I, defs)) {
fprintf(stderr, "Invalid defs after %s\n", after);
agx_print_instr(I, stderr);
fail = true;
}
}
/* agx_validate_defs skips phi sources, so validate them now */
agx_foreach_block(ctx, block) {
agx_foreach_phi_in_block(block, phi) {
agx_foreach_ssa_src(phi, s) {
if (!BITSET_TEST(defs, phi->src[s].value)) {
fprintf(stderr, "Undefined phi source %u after %s\n",
phi->src[s].value, after);
agx_print_instr(phi, stderr);
fail = true;
}
}
}
}
free(defs);
}
agx_foreach_instr_global(ctx, I) {
if (!agx_validate_sources(I)) {
fprintf(stderr, "Invalid sources form after %s\n", after);
agx_print_instr(I, stderr);
fail = true;
}
if (!agx_validate_sr(I)) {
fprintf(stderr, "Invalid SR after %s\n", after);
agx_print_instr(I, stdout);
fail = true;
}
}
if (!agx_validate_width(ctx)) {
fprintf(stderr, "Invalid vectors after %s\n", after);
fail = true;
}
if (fail) {
agx_print_shader(ctx, stderr);
exit(1);
}
}
#endif /* NDEBUG */