| /* |
| * Copyright (C) 2019 Connor Abbott <cwabbott0@gmail.com> |
| * Copyright (C) 2019 Lyude Paul <thatslyude@gmail.com> |
| * Copyright (C) 2019 Ryan Houdek <Sonicadvance1@gmail.com> |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| * SOFTWARE. |
| */ |
| |
| #include <stdbool.h> |
| #include <stdio.h> |
| #include <stdint.h> |
| #include <assert.h> |
| #include <inttypes.h> |
| #include <string.h> |
| |
| #include "bifrost.h" |
| #include "disassemble.h" |
| #include "bi_print_common.h" |
| #include "util/compiler.h" |
| #include "util/macros.h" |
| |
| // return bits (high, lo] |
| static uint64_t bits(uint32_t word, unsigned lo, unsigned high) |
| { |
| if (high == 32) |
| return word >> lo; |
| return (word & ((1 << high) - 1)) >> lo; |
| } |
| |
| // each of these structs represents an instruction that's dispatched in one |
| // cycle. Note that these instructions are packed in funny ways within the |
| // clause, hence the need for a separate struct. |
| struct bifrost_alu_inst { |
| uint32_t fma_bits; |
| uint32_t add_bits; |
| uint64_t reg_bits; |
| }; |
| |
| static unsigned get_reg0(struct bifrost_regs regs) |
| { |
| if (regs.ctrl == 0) |
| return regs.reg0 | ((regs.reg1 & 0x1) << 5); |
| |
| return regs.reg0 <= regs.reg1 ? regs.reg0 : 63 - regs.reg0; |
| } |
| |
| static unsigned get_reg1(struct bifrost_regs regs) |
| { |
| return regs.reg0 <= regs.reg1 ? regs.reg1 : 63 - regs.reg1; |
| } |
| |
| // this represents the decoded version of the ctrl register field. |
| struct bifrost_reg_ctrl { |
| bool read_reg0; |
| bool read_reg1; |
| struct bifrost_reg_ctrl_23 slot23; |
| }; |
| |
| static void dump_header(FILE *fp, struct bifrost_header header, bool verbose) |
| { |
| fprintf(fp, "ds(%u) ", header.dependency_slot); |
| |
| if (header.staging_barrier) |
| fprintf(fp, "osrb "); |
| |
| fprintf(fp, "%s ", bi_flow_control_name(header.flow_control)); |
| |
| if (header.suppress_inf) |
| fprintf(fp, "inf_suppress "); |
| if (header.suppress_nan) |
| fprintf(fp, "nan_suppress "); |
| |
| if (header.flush_to_zero == BIFROST_FTZ_DX11) |
| fprintf(fp, "ftz_dx11 "); |
| else if (header.flush_to_zero == BIFROST_FTZ_ALWAYS) |
| fprintf(fp, "ftz_hsa "); |
| if (header.flush_to_zero == BIFROST_FTZ_ABRUPT) |
| fprintf(fp, "ftz_au "); |
| |
| assert(!header.zero1); |
| assert(!header.zero2); |
| |
| if (header.float_exceptions == BIFROST_EXCEPTIONS_DISABLED) |
| fprintf(fp, "fpe_ts "); |
| else if (header.float_exceptions == BIFROST_EXCEPTIONS_PRECISE_DIVISION) |
| fprintf(fp, "fpe_pd "); |
| else if (header.float_exceptions == BIFROST_EXCEPTIONS_PRECISE_SQRT) |
| fprintf(fp, "fpe_psqr "); |
| |
| if (header.message_type) |
| fprintf(fp, "%s ", bi_message_type_name(header.message_type)); |
| |
| if (header.terminate_discarded_threads) |
| fprintf(fp, "td "); |
| |
| if (header.next_clause_prefetch) |
| fprintf(fp, "ncph "); |
| |
| if (header.next_message_type) |
| fprintf(fp, "next_%s ", bi_message_type_name(header.next_message_type)); |
| if (header.dependency_wait != 0) { |
| fprintf(fp, "dwb("); |
| bool first = true; |
| for (unsigned i = 0; i < 8; i++) { |
| if (header.dependency_wait & (1 << i)) { |
| if (!first) { |
| fprintf(fp, ", "); |
| } |
| fprintf(fp, "%u", i); |
| first = false; |
| } |
| } |
| fprintf(fp, ") "); |
| } |
| |
| fprintf(fp, "\n"); |
| } |
| |
| static struct bifrost_reg_ctrl DecodeRegCtrl(FILE *fp, struct bifrost_regs regs, bool first) |
| { |
| struct bifrost_reg_ctrl decoded = {}; |
| unsigned ctrl; |
| if (regs.ctrl == 0) { |
| ctrl = regs.reg1 >> 2; |
| decoded.read_reg0 = !(regs.reg1 & 0x2); |
| decoded.read_reg1 = false; |
| } else { |
| ctrl = regs.ctrl; |
| decoded.read_reg0 = decoded.read_reg1 = true; |
| } |
| |
| /* Modify control based on state */ |
| if (first) |
| ctrl = (ctrl & 0x7) | ((ctrl & 0x8) << 1); |
| else if (regs.reg2 == regs.reg3) |
| ctrl += 16; |
| |
| decoded.slot23 = bifrost_reg_ctrl_lut[ctrl]; |
| ASSERTED struct bifrost_reg_ctrl_23 reserved = { 0 }; |
| assert(memcmp(&decoded.slot23, &reserved, sizeof(reserved))); |
| |
| return decoded; |
| } |
| |
| static void dump_regs(FILE *fp, struct bifrost_regs srcs, bool first) |
| { |
| struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, srcs, first); |
| fprintf(fp, " # "); |
| if (ctrl.read_reg0) |
| fprintf(fp, "slot 0: r%u ", get_reg0(srcs)); |
| if (ctrl.read_reg1) |
| fprintf(fp, "slot 1: r%u ", get_reg1(srcs)); |
| |
| const char *slot3_fma = ctrl.slot23.slot3_fma ? "FMA" : "ADD"; |
| |
| if (ctrl.slot23.slot2 == BIFROST_OP_WRITE) |
| fprintf(fp, "slot 2: r%u (write FMA) ", srcs.reg2); |
| else if (ctrl.slot23.slot2 == BIFROST_OP_WRITE_LO) |
| fprintf(fp, "slot 2: r%u (write lo FMA) ", srcs.reg2); |
| else if (ctrl.slot23.slot2 == BIFROST_OP_WRITE_HI) |
| fprintf(fp, "slot 2: r%u (write hi FMA) ", srcs.reg2); |
| else if (ctrl.slot23.slot2 == BIFROST_OP_READ) |
| fprintf(fp, "slot 2: r%u (read) ", srcs.reg2); |
| |
| if (ctrl.slot23.slot3 == BIFROST_OP_WRITE) |
| fprintf(fp, "slot 3: r%u (write %s) ", srcs.reg3, slot3_fma); |
| else if (ctrl.slot23.slot3 == BIFROST_OP_WRITE_LO) |
| fprintf(fp, "slot 3: r%u (write lo %s) ", srcs.reg3, slot3_fma); |
| else if (ctrl.slot23.slot3 == BIFROST_OP_WRITE_HI) |
| fprintf(fp, "slot 3: r%u (write hi %s) ", srcs.reg3, slot3_fma); |
| |
| if (srcs.fau_idx) |
| fprintf(fp, "fau %X ", srcs.fau_idx); |
| |
| fprintf(fp, "\n"); |
| } |
| |
| static void |
| bi_disasm_dest_mask(FILE *fp, enum bifrost_reg_op op) |
| { |
| if (op == BIFROST_OP_WRITE_LO) |
| fprintf(fp, ".h0"); |
| else if (op == BIFROST_OP_WRITE_HI) |
| fprintf(fp, ".h1"); |
| } |
| |
| void |
| bi_disasm_dest_fma(FILE *fp, struct bifrost_regs *next_regs, bool last) |
| { |
| /* If this is the last instruction, next_regs points to the first reg entry. */ |
| struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, *next_regs, last); |
| if (ctrl.slot23.slot2 >= BIFROST_OP_WRITE) { |
| fprintf(fp, "r%u:t0", next_regs->reg2); |
| bi_disasm_dest_mask(fp, ctrl.slot23.slot2); |
| } else if (ctrl.slot23.slot3 >= BIFROST_OP_WRITE && ctrl.slot23.slot3_fma) { |
| fprintf(fp, "r%u:t0", next_regs->reg3); |
| bi_disasm_dest_mask(fp, ctrl.slot23.slot3); |
| } else |
| fprintf(fp, "t0"); |
| } |
| |
| void |
| bi_disasm_dest_add(FILE *fp, struct bifrost_regs *next_regs, bool last) |
| { |
| /* If this is the last instruction, next_regs points to the first reg entry. */ |
| struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, *next_regs, last); |
| |
| if (ctrl.slot23.slot3 >= BIFROST_OP_WRITE && !ctrl.slot23.slot3_fma) { |
| fprintf(fp, "r%u:t1", next_regs->reg3); |
| bi_disasm_dest_mask(fp, ctrl.slot23.slot3); |
| } else |
| fprintf(fp, "t1"); |
| } |
| |
| static void dump_const_imm(FILE *fp, uint32_t imm) |
| { |
| union { |
| float f; |
| uint32_t i; |
| } fi; |
| fi.i = imm; |
| fprintf(fp, "0x%08x /* %f */", imm, fi.f); |
| } |
| |
| static void |
| dump_pc_imm(FILE *fp, uint64_t imm, unsigned branch_offset, enum bi_constmod mod, bool high32) |
| { |
| if (mod == BI_CONSTMOD_PC_HI && !high32) { |
| dump_const_imm(fp, imm); |
| return; |
| } |
| |
| /* 60-bit sign-extend */ |
| uint64_t zx64 = (imm << 4); |
| int64_t sx64 = zx64; |
| sx64 >>= 4; |
| |
| /* 28-bit sign extend x 2 */ |
| uint32_t imm32[2] = { (uint32_t) imm, (uint32_t) (imm >> 32) }; |
| uint32_t zx32[2] = { imm32[0] << 4, imm32[1] << 4 }; |
| int32_t sx32[2] = { zx32[0], zx32[1] }; |
| sx32[0] >>= 4; |
| sx32[1] >>= 4; |
| |
| int64_t offs = 0; |
| |
| switch (mod) { |
| case BI_CONSTMOD_PC_LO: |
| offs = sx64; |
| break; |
| case BI_CONSTMOD_PC_HI: |
| offs = sx32[1]; |
| break; |
| case BI_CONSTMOD_PC_LO_HI: |
| offs = sx32[high32]; |
| break; |
| default: |
| unreachable("Invalid PC modifier"); |
| } |
| |
| assert((offs & 15) == 0); |
| fprintf(fp, "clause_%" PRId64, branch_offset + (offs / 16)); |
| |
| if (mod == BI_CONSTMOD_PC_LO && high32) |
| fprintf(fp, " >> 32"); |
| |
| /* While technically in spec, referencing the current clause as (pc + |
| * 0) likely indicates an unintended infinite loop */ |
| if (offs == 0) |
| fprintf(fp, " /* XXX: likely an infinite loop */"); |
| } |
| |
| /* Convert an index to an embedded constant in FAU-RAM to the index of the |
| * embedded constant. No, it's not in order. Yes, really. */ |
| |
| static unsigned |
| const_fau_to_idx(unsigned fau_value) |
| { |
| unsigned map[8] = { |
| ~0, ~0, 4, 5, 0, 1, 2, 3 |
| }; |
| |
| assert(map[fau_value] < 6); |
| return map[fau_value]; |
| } |
| |
| static void dump_fau_src(FILE *fp, struct bifrost_regs srcs, unsigned branch_offset, struct bi_constants *consts, bool high32) |
| { |
| if (srcs.fau_idx & 0x80) { |
| unsigned uniform = (srcs.fau_idx & 0x7f); |
| fprintf(fp, "u%u.w%u", uniform, high32); |
| } else if (srcs.fau_idx >= 0x20) { |
| unsigned idx = const_fau_to_idx(srcs.fau_idx >> 4); |
| uint64_t imm = consts->raw[idx]; |
| imm |= (srcs.fau_idx & 0xf); |
| if (consts->mods[idx] != BI_CONSTMOD_NONE) |
| dump_pc_imm(fp, imm, branch_offset, consts->mods[idx], high32); |
| else if (high32) |
| dump_const_imm(fp, imm >> 32); |
| else |
| dump_const_imm(fp, imm); |
| } else { |
| switch (srcs.fau_idx) { |
| case 0: |
| fprintf(fp, "#0"); |
| break; |
| case 1: |
| fprintf(fp, "lane_id"); |
| break; |
| case 2: |
| fprintf(fp, "warp_id"); |
| break; |
| case 3: |
| fprintf(fp, "core_id"); |
| break; |
| case 4: |
| fprintf(fp, "framebuffer_size"); |
| break; |
| case 5: |
| fprintf(fp, "atest_datum"); |
| break; |
| case 6: |
| fprintf(fp, "sample"); |
| break; |
| case 8: |
| case 9: |
| case 10: |
| case 11: |
| case 12: |
| case 13: |
| case 14: |
| case 15: |
| fprintf(fp, "blend_descriptor_%u", (unsigned) srcs.fau_idx - 8); |
| break; |
| default: |
| fprintf(fp, "XXX - reserved%u", (unsigned) srcs.fau_idx); |
| break; |
| } |
| |
| if (high32) |
| fprintf(fp, ".y"); |
| else |
| fprintf(fp, ".x"); |
| } |
| } |
| |
| void |
| dump_src(FILE *fp, unsigned src, struct bifrost_regs srcs, unsigned branch_offset, struct bi_constants *consts, bool isFMA) |
| { |
| switch (src) { |
| case 0: |
| fprintf(fp, "r%u", get_reg0(srcs)); |
| break; |
| case 1: |
| fprintf(fp, "r%u", get_reg1(srcs)); |
| break; |
| case 2: |
| fprintf(fp, "r%u", srcs.reg2); |
| break; |
| case 3: |
| if (isFMA) |
| fprintf(fp, "#0"); |
| else |
| fprintf(fp, "t"); // i.e. the output of FMA this cycle |
| break; |
| case 4: |
| dump_fau_src(fp, srcs, branch_offset, consts, false); |
| break; |
| case 5: |
| dump_fau_src(fp, srcs, branch_offset, consts, true); |
| break; |
| case 6: |
| fprintf(fp, "t0"); |
| break; |
| case 7: |
| fprintf(fp, "t1"); |
| break; |
| } |
| } |
| |
| /* Tables for decoding M0, or if M0 == 7, M1 respectively. |
| * |
| * XXX: It's not clear if the third entry of M1_table corresponding to (7, 2) |
| * should have PC_LO_HI in the EC1 slot, or it's a weird hybrid mode? I would |
| * say this needs testing but no code should ever actually use this mode. |
| */ |
| |
| static const enum bi_constmod M1_table[7][2] = { |
| { BI_CONSTMOD_NONE, BI_CONSTMOD_NONE }, |
| { BI_CONSTMOD_PC_LO, BI_CONSTMOD_NONE }, |
| { BI_CONSTMOD_PC_LO, BI_CONSTMOD_PC_LO }, |
| { ~0, ~0 }, |
| { BI_CONSTMOD_PC_HI, BI_CONSTMOD_NONE }, |
| { BI_CONSTMOD_PC_HI, BI_CONSTMOD_PC_HI }, |
| { BI_CONSTMOD_PC_LO, BI_CONSTMOD_NONE }, |
| }; |
| |
| static const enum bi_constmod M2_table[4][2] = { |
| { BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_NONE }, |
| { BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_PC_HI }, |
| { BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_PC_LO_HI }, |
| { BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_PC_HI }, |
| }; |
| |
| static void |
| decode_M(enum bi_constmod *mod, unsigned M1, unsigned M2, bool single) |
| { |
| if (M1 >= 8) { |
| mod[0] = BI_CONSTMOD_NONE; |
| |
| if (!single) |
| mod[1] = BI_CONSTMOD_NONE; |
| |
| return; |
| } else if (M1 == 7) { |
| assert(M2 < 4); |
| memcpy(mod, M2_table[M2], sizeof(*mod) * (single ? 1 : 2)); |
| } else { |
| assert(M1 != 3); |
| memcpy(mod, M1_table[M1], sizeof(*mod) * (single ? 1 : 2)); |
| } |
| } |
| |
| static void dump_clause(FILE *fp, uint32_t *words, unsigned *size, unsigned offset, bool verbose) |
| { |
| // State for a decoded clause |
| struct bifrost_alu_inst instrs[8] = {}; |
| struct bi_constants consts = {}; |
| unsigned num_instrs = 0; |
| unsigned num_consts = 0; |
| uint64_t header_bits = 0; |
| |
| unsigned i; |
| for (i = 0; ; i++, words += 4) { |
| if (verbose) { |
| fprintf(fp, "# "); |
| for (int j = 0; j < 4; j++) |
| fprintf(fp, "%08x ", words[3 - j]); // low bit on the right |
| fprintf(fp, "\n"); |
| } |
| unsigned tag = bits(words[0], 0, 8); |
| |
| // speculatively decode some things that are common between many formats, so we can share some code |
| struct bifrost_alu_inst main_instr = {}; |
| // 20 bits |
| main_instr.add_bits = bits(words[2], 2, 32 - 13); |
| // 23 bits |
| main_instr.fma_bits = bits(words[1], 11, 32) | bits(words[2], 0, 2) << (32 - 11); |
| // 35 bits |
| main_instr.reg_bits = ((uint64_t) bits(words[1], 0, 11)) << 24 | (uint64_t) bits(words[0], 8, 32); |
| |
| uint64_t const0 = bits(words[0], 8, 32) << 4 | (uint64_t) words[1] << 28 | bits(words[2], 0, 4) << 60; |
| uint64_t const1 = bits(words[2], 4, 32) << 4 | (uint64_t) words[3] << 32; |
| |
| /* Z-bit */ |
| bool stop = tag & 0x40; |
| |
| if (verbose) { |
| fprintf(fp, "# tag: 0x%02x\n", tag); |
| } |
| if (tag & 0x80) { |
| /* Format 5 or 10 */ |
| unsigned idx = stop ? 5 : 2; |
| main_instr.add_bits |= ((tag >> 3) & 0x7) << 17; |
| instrs[idx + 1] = main_instr; |
| instrs[idx].add_bits = bits(words[3], 0, 17) | ((tag & 0x7) << 17); |
| instrs[idx].fma_bits |= bits(words[2], 19, 32) << 10; |
| consts.raw[0] = bits(words[3], 17, 32) << 4; |
| } else { |
| bool done = false; |
| switch ((tag >> 3) & 0x7) { |
| case 0x0: |
| switch (tag & 0x7) { |
| case 0x3: |
| /* Format 1 */ |
| main_instr.add_bits |= bits(words[3], 29, 32) << 17; |
| instrs[1] = main_instr; |
| num_instrs = 2; |
| done = stop; |
| break; |
| case 0x4: |
| /* Format 3 */ |
| instrs[2].add_bits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17; |
| instrs[2].fma_bits |= bits(words[2], 19, 32) << 10; |
| consts.raw[0] = const0; |
| decode_M(&consts.mods[0], bits(words[2], 4, 8), bits(words[2], 8, 12), true); |
| num_instrs = 3; |
| num_consts = 1; |
| done = stop; |
| break; |
| case 0x1: |
| case 0x5: |
| /* Format 4 */ |
| instrs[2].add_bits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17; |
| instrs[2].fma_bits |= bits(words[2], 19, 32) << 10; |
| main_instr.add_bits |= bits(words[3], 26, 29) << 17; |
| instrs[3] = main_instr; |
| if ((tag & 0x7) == 0x5) { |
| num_instrs = 4; |
| done = stop; |
| } |
| break; |
| case 0x6: |
| /* Format 8 */ |
| instrs[5].add_bits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17; |
| instrs[5].fma_bits |= bits(words[2], 19, 32) << 10; |
| consts.raw[0] = const0; |
| decode_M(&consts.mods[0], bits(words[2], 4, 8), bits(words[2], 8, 12), true); |
| num_instrs = 6; |
| num_consts = 1; |
| done = stop; |
| break; |
| case 0x7: |
| /* Format 9 */ |
| instrs[5].add_bits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17; |
| instrs[5].fma_bits |= bits(words[2], 19, 32) << 10; |
| main_instr.add_bits |= bits(words[3], 26, 29) << 17; |
| instrs[6] = main_instr; |
| num_instrs = 7; |
| done = stop; |
| break; |
| default: |
| unreachable("[INSTR_INVALID_ENC] Invalid tag bits"); |
| } |
| break; |
| case 0x2: |
| case 0x3: { |
| /* Format 6 or 11 */ |
| unsigned idx = ((tag >> 3) & 0x7) == 2 ? 4 : 7; |
| main_instr.add_bits |= (tag & 0x7) << 17; |
| instrs[idx] = main_instr; |
| consts.raw[0] |= (bits(words[2], 19, 32) | ((uint64_t) words[3] << 13)) << 19; |
| num_consts = 1; |
| num_instrs = idx + 1; |
| done = stop; |
| break; |
| } |
| case 0x4: { |
| /* Format 2 */ |
| unsigned idx = stop ? 4 : 1; |
| main_instr.add_bits |= (tag & 0x7) << 17; |
| instrs[idx] = main_instr; |
| instrs[idx + 1].fma_bits |= bits(words[3], 22, 32); |
| instrs[idx + 1].reg_bits = bits(words[2], 19, 32) | (bits(words[3], 0, 22) << (32 - 19)); |
| break; |
| } |
| case 0x1: |
| /* Format 0 - followed by constants */ |
| num_instrs = 1; |
| done = stop; |
| FALLTHROUGH; |
| case 0x5: |
| /* Format 0 - followed by instructions */ |
| header_bits = bits(words[2], 19, 32) | ((uint64_t) words[3] << (32 - 19)); |
| main_instr.add_bits |= (tag & 0x7) << 17; |
| instrs[0] = main_instr; |
| break; |
| case 0x6: |
| case 0x7: { |
| /* Format 12 */ |
| unsigned pos = tag & 0xf; |
| |
| struct { |
| unsigned const_idx; |
| unsigned nr_tuples; |
| } pos_table[0x10] = { |
| { 0, 1 }, |
| { 0, 2 }, |
| { 0, 4 }, |
| { 1, 3 }, |
| { 1, 5 }, |
| { 2, 4 }, |
| { 0, 7 }, |
| { 1, 6 }, |
| { 3, 5 }, |
| { 1, 8 }, |
| { 2, 7 }, |
| { 3, 6 }, |
| { 3, 8 }, |
| { 4, 7 }, |
| { 5, 6 }, |
| { ~0, ~0 } |
| }; |
| |
| ASSERTED bool valid_count = pos_table[pos].nr_tuples == num_instrs; |
| assert(valid_count && "INSTR_INVALID_ENC"); |
| |
| unsigned const_idx = pos_table[pos].const_idx; |
| |
| if (num_consts < const_idx + 2) |
| num_consts = const_idx + 2; |
| |
| consts.raw[const_idx] = const0; |
| consts.raw[const_idx + 1] = const1; |
| |
| /* Calculate M values from A, B and 4-bit |
| * unsigned arithmetic. Mathematically it |
| * should be (A - B) % 16 but we use this |
| * alternate form to avoid sign issues */ |
| |
| unsigned A1 = bits(words[2], 0, 4); |
| unsigned B1 = bits(words[3], 28, 32); |
| unsigned A2 = bits(words[1], 0, 4); |
| unsigned B2 = bits(words[2], 28, 32); |
| |
| unsigned M1 = (16 + A1 - B1) & 0xF; |
| unsigned M2 = (16 + A2 - B2) & 0xF; |
| |
| decode_M(&consts.mods[const_idx], M1, M2, false); |
| |
| done = stop; |
| break; |
| } |
| default: |
| break; |
| } |
| |
| if (done) |
| break; |
| } |
| } |
| |
| *size = i + 1; |
| |
| if (verbose) { |
| fprintf(fp, "# header: %012" PRIx64 "\n", header_bits); |
| } |
| |
| struct bifrost_header header; |
| memcpy((char *) &header, (char *) &header_bits, sizeof(struct bifrost_header)); |
| dump_header(fp, header, verbose); |
| |
| fprintf(fp, "{\n"); |
| for (i = 0; i < num_instrs; i++) { |
| struct bifrost_regs regs, next_regs; |
| if (i + 1 == num_instrs) { |
| memcpy((char *) &next_regs, (char *) &instrs[0].reg_bits, |
| sizeof(next_regs)); |
| } else { |
| memcpy((char *) &next_regs, (char *) &instrs[i + 1].reg_bits, |
| sizeof(next_regs)); |
| } |
| |
| memcpy((char *) ®s, (char *) &instrs[i].reg_bits, sizeof(regs)); |
| |
| if (verbose) { |
| fprintf(fp, " # regs: %016" PRIx64 "\n", instrs[i].reg_bits); |
| dump_regs(fp, regs, i == 0); |
| } |
| |
| bi_disasm_fma(fp, instrs[i].fma_bits, ®s, &next_regs, |
| header.staging_register, offset, &consts, |
| i + 1 == num_instrs); |
| |
| bi_disasm_add(fp, instrs[i].add_bits, ®s, &next_regs, |
| header.staging_register, offset, &consts, |
| i + 1 == num_instrs); |
| } |
| fprintf(fp, "}\n"); |
| |
| if (verbose) { |
| for (unsigned i = 0; i < num_consts; i++) { |
| fprintf(fp, "# const%d: %08" PRIx64 "\n", 2 * i, consts.raw[i] & 0xffffffff); |
| fprintf(fp, "# const%d: %08" PRIx64 "\n", 2 * i + 1, consts.raw[i] >> 32); |
| } |
| } |
| |
| fprintf(fp, "\n"); |
| return; |
| } |
| |
| void disassemble_bifrost(FILE *fp, uint8_t *code, size_t size, bool verbose) |
| { |
| uint32_t *words = (uint32_t *) code; |
| uint32_t *words_end = words + (size / 4); |
| // used for displaying branch targets |
| unsigned offset = 0; |
| while (words != words_end) { |
| /* Shaders have zero bytes at the end for padding; stop |
| * disassembling when we hit them. */ |
| if (*words == 0) |
| break; |
| |
| fprintf(fp, "clause_%u:\n", offset); |
| |
| unsigned size; |
| dump_clause(fp, words, &size, offset, verbose); |
| |
| words += size * 4; |
| offset += size; |
| } |
| } |
| |