| /* |
| * Copyright © 2014 Broadcom |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| */ |
| |
| /** |
| * @file |
| * |
| * Validates the QPU instruction sequence after register allocation and |
| * scheduling. |
| */ |
| |
| #include <assert.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include "v3d_compiler.h" |
| #include "qpu/qpu_disasm.h" |
| |
| struct v3d_qpu_validate_state { |
| struct v3d_compile *c; |
| const struct v3d_qpu_instr *last; |
| int ip; |
| int last_sfu_write; |
| int last_branch_ip; |
| int last_thrsw_ip; |
| |
| /* Set when we've found the last-THRSW signal, or if we were started |
| * in single-segment mode. |
| */ |
| bool last_thrsw_found; |
| |
| /* Set when we've found the THRSW after the last THRSW */ |
| bool thrend_found; |
| |
| int thrsw_count; |
| }; |
| |
| static void |
| fail_instr(struct v3d_qpu_validate_state *state, const char *msg) |
| { |
| struct v3d_compile *c = state->c; |
| |
| fprintf(stderr, "v3d_qpu_validate at ip %d: %s:\n", state->ip, msg); |
| |
| int dump_ip = 0; |
| vir_for_each_inst_inorder(inst, c) { |
| v3d_qpu_dump(c->devinfo, &inst->qpu); |
| |
| if (dump_ip++ == state->ip) |
| fprintf(stderr, " *** ERROR ***"); |
| |
| fprintf(stderr, "\n"); |
| } |
| |
| fprintf(stderr, "\n"); |
| abort(); |
| } |
| |
| static bool |
| in_branch_delay_slots(struct v3d_qpu_validate_state *state) |
| { |
| return (state->ip - state->last_branch_ip) < 3; |
| } |
| |
| static bool |
| in_thrsw_delay_slots(struct v3d_qpu_validate_state *state) |
| { |
| return (state->ip - state->last_thrsw_ip) < 3; |
| } |
| |
| static bool |
| qpu_magic_waddr_matches(const struct v3d_qpu_instr *inst, |
| bool (*predicate)(enum v3d_qpu_waddr waddr)) |
| { |
| if (inst->type == V3D_QPU_INSTR_TYPE_ALU) |
| return false; |
| |
| if (inst->alu.add.op != V3D_QPU_A_NOP && |
| inst->alu.add.magic_write && |
| predicate(inst->alu.add.waddr)) |
| return true; |
| |
| if (inst->alu.mul.op != V3D_QPU_M_NOP && |
| inst->alu.mul.magic_write && |
| predicate(inst->alu.mul.waddr)) |
| return true; |
| |
| return false; |
| } |
| |
| static void |
| qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst) |
| { |
| const struct v3d_device_info *devinfo = state->c->devinfo; |
| const struct v3d_qpu_instr *inst = &qinst->qpu; |
| |
| if (inst->type != V3D_QPU_INSTR_TYPE_ALU) |
| return; |
| |
| /* LDVARY writes r5 two instructions later and LDUNIF writes |
| * r5 one instruction later, which is illegal to have |
| * together. |
| */ |
| if (state->last && state->last->sig.ldvary && |
| (inst->sig.ldunif || inst->sig.ldunifa)) { |
| fail_instr(state, "LDUNIF after a LDVARY"); |
| } |
| |
| /* GFXH-1633 (fixed since V3D 4.2.14, which is Rpi4) |
| * |
| * FIXME: This would not check correctly for V3D 4.2 versions lower |
| * than V3D 4.2.14, but that is not a real issue because the simulator |
| * will still catch this, and we are not really targetting any such |
| * versions anyway. |
| */ |
| if (state->c->devinfo->ver < 42) { |
| bool last_reads_ldunif = (state->last && (state->last->sig.ldunif || |
| state->last->sig.ldunifrf)); |
| bool last_reads_ldunifa = (state->last && (state->last->sig.ldunifa || |
| state->last->sig.ldunifarf)); |
| bool reads_ldunif = inst->sig.ldunif || inst->sig.ldunifrf; |
| bool reads_ldunifa = inst->sig.ldunifa || inst->sig.ldunifarf; |
| if ((last_reads_ldunif && reads_ldunifa) || |
| (last_reads_ldunifa && reads_ldunif)) { |
| fail_instr(state, |
| "LDUNIF and LDUNIFA can't be next to each other"); |
| } |
| } |
| |
| int tmu_writes = 0; |
| int sfu_writes = 0; |
| int vpm_writes = 0; |
| int tlb_writes = 0; |
| int tsy_writes = 0; |
| |
| if (inst->alu.add.op != V3D_QPU_A_NOP) { |
| if (inst->alu.add.magic_write) { |
| if (v3d_qpu_magic_waddr_is_tmu(state->c->devinfo, |
| inst->alu.add.waddr)) { |
| tmu_writes++; |
| } |
| if (v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr)) |
| sfu_writes++; |
| if (v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr)) |
| vpm_writes++; |
| if (v3d_qpu_magic_waddr_is_tlb(inst->alu.add.waddr)) |
| tlb_writes++; |
| if (v3d_qpu_magic_waddr_is_tsy(inst->alu.add.waddr)) |
| tsy_writes++; |
| } |
| } |
| |
| if (inst->alu.mul.op != V3D_QPU_M_NOP) { |
| if (inst->alu.mul.magic_write) { |
| if (v3d_qpu_magic_waddr_is_tmu(state->c->devinfo, |
| inst->alu.mul.waddr)) { |
| tmu_writes++; |
| } |
| if (v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr)) |
| sfu_writes++; |
| if (v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr)) |
| vpm_writes++; |
| if (v3d_qpu_magic_waddr_is_tlb(inst->alu.mul.waddr)) |
| tlb_writes++; |
| if (v3d_qpu_magic_waddr_is_tsy(inst->alu.mul.waddr)) |
| tsy_writes++; |
| } |
| } |
| |
| if (in_thrsw_delay_slots(state)) { |
| /* There's no way you want to start SFU during the THRSW delay |
| * slots, since the result would land in the other thread. |
| */ |
| if (sfu_writes) { |
| fail_instr(state, |
| "SFU write started during THRSW delay slots "); |
| } |
| |
| if (inst->sig.ldvary) |
| fail_instr(state, "LDVARY during THRSW delay slots"); |
| } |
| |
| (void)qpu_magic_waddr_matches; /* XXX */ |
| |
| /* SFU r4 results come back two instructions later. No doing |
| * r4 read/writes or other SFU lookups until it's done. |
| */ |
| if (state->ip - state->last_sfu_write < 2) { |
| if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_R4)) |
| fail_instr(state, "R4 read too soon after SFU"); |
| |
| if (v3d_qpu_writes_r4(devinfo, inst)) |
| fail_instr(state, "R4 write too soon after SFU"); |
| |
| if (sfu_writes) |
| fail_instr(state, "SFU write too soon after SFU"); |
| } |
| |
| /* XXX: The docs say VPM can happen with the others, but the simulator |
| * disagrees. |
| */ |
| if (tmu_writes + |
| sfu_writes + |
| vpm_writes + |
| tlb_writes + |
| tsy_writes + |
| inst->sig.ldtmu + |
| inst->sig.ldtlb + |
| inst->sig.ldvpm + |
| inst->sig.ldtlbu > 1) { |
| fail_instr(state, |
| "Only one of [TMU, SFU, TSY, TLB read, VPM] allowed"); |
| } |
| |
| if (sfu_writes) |
| state->last_sfu_write = state->ip; |
| |
| if (inst->sig.thrsw) { |
| if (in_branch_delay_slots(state)) |
| fail_instr(state, "THRSW in a branch delay slot."); |
| |
| if (state->last_thrsw_found) |
| state->thrend_found = true; |
| |
| if (state->last_thrsw_ip == state->ip - 1) { |
| /* If it's the second THRSW in a row, then it's just a |
| * last-thrsw signal. |
| */ |
| if (state->last_thrsw_found) |
| fail_instr(state, "Two last-THRSW signals"); |
| state->last_thrsw_found = true; |
| } else { |
| if (in_thrsw_delay_slots(state)) { |
| fail_instr(state, |
| "THRSW too close to another THRSW."); |
| } |
| state->thrsw_count++; |
| state->last_thrsw_ip = state->ip; |
| } |
| } |
| |
| if (state->thrend_found && |
| state->last_thrsw_ip - state->ip <= 2 && |
| inst->type == V3D_QPU_INSTR_TYPE_ALU) { |
| if ((inst->alu.add.op != V3D_QPU_A_NOP && |
| !inst->alu.add.magic_write)) { |
| fail_instr(state, "RF write after THREND"); |
| } |
| |
| if ((inst->alu.mul.op != V3D_QPU_M_NOP && |
| !inst->alu.mul.magic_write)) { |
| fail_instr(state, "RF write after THREND"); |
| } |
| |
| if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) && |
| !inst->sig_magic) { |
| fail_instr(state, "RF write after THREND"); |
| } |
| |
| /* GFXH-1625: No TMUWT in the last instruction */ |
| if (state->last_thrsw_ip - state->ip == 2 && |
| inst->alu.add.op == V3D_QPU_A_TMUWT) |
| fail_instr(state, "TMUWT in last instruction"); |
| } |
| |
| if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) { |
| if (in_branch_delay_slots(state)) |
| fail_instr(state, "branch in a branch delay slot."); |
| if (in_thrsw_delay_slots(state)) |
| fail_instr(state, "branch in a THRSW delay slot."); |
| state->last_branch_ip = state->ip; |
| } |
| } |
| |
| static void |
| qpu_validate_block(struct v3d_qpu_validate_state *state, struct qblock *block) |
| { |
| vir_for_each_inst(qinst, block) { |
| qpu_validate_inst(state, qinst); |
| |
| state->last = &qinst->qpu; |
| state->ip++; |
| } |
| } |
| |
| /** |
| * Checks for the instruction restrictions from page 37 ("Summary of |
| * Instruction Restrictions"). |
| */ |
| void |
| qpu_validate(struct v3d_compile *c) |
| { |
| /* We don't want to do validation in release builds, but we want to |
| * keep compiling the validation code to make sure it doesn't get |
| * broken. |
| */ |
| #ifndef DEBUG |
| return; |
| #endif |
| |
| struct v3d_qpu_validate_state state = { |
| .c = c, |
| .last_sfu_write = -10, |
| .last_thrsw_ip = -10, |
| .last_branch_ip = -10, |
| .ip = 0, |
| |
| .last_thrsw_found = !c->last_thrsw, |
| }; |
| |
| vir_for_each_block(block, c) { |
| qpu_validate_block(&state, block); |
| } |
| |
| if (state.thrsw_count > 1 && !state.last_thrsw_found) { |
| fail_instr(&state, |
| "thread switch found without last-THRSW in program"); |
| } |
| |
| if (!state.thrend_found) |
| fail_instr(&state, "No program-end THRSW found"); |
| } |