| """ |
| Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io> |
| |
| Permission is hereby granted, free of charge, to any person obtaining a |
| copy of this software and associated documentation files (the "Software"), |
| to deal in the Software without restriction, including without limitation |
| the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| and/or sell copies of the Software, and to permit persons to whom the |
| Software is furnished to do so, subject to the following conditions: |
| |
| The above copyright notice and this permission notice (including the next |
| paragraph) shall be included in all copies or substantial portions of the |
| Software. |
| |
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| SOFTWARE. |
| """ |
| |
| opcodes = {} |
| immediates = {} |
| enums = {} |
| |
| VARIABLE = ~0 |
| |
| class Opcode(object): |
| def __init__(self, name, dests, srcs, imms, is_float, can_eliminate, |
| encoding_16, encoding_32): |
| self.name = name |
| self.dests = dests if dests != VARIABLE else 0 |
| self.srcs = srcs if srcs != VARIABLE else 0 |
| self.variable_srcs = (srcs == VARIABLE) |
| self.variable_dests = (dests == VARIABLE) |
| self.imms = imms |
| self.is_float = is_float |
| self.can_eliminate = can_eliminate |
| self.encoding_16 = encoding_16 |
| self.encoding_32 = encoding_32 |
| |
| class Immediate(object): |
| def __init__(self, name, ctype): |
| self.name = name |
| self.ctype = ctype |
| |
| class Encoding(object): |
| def __init__(self, description): |
| (exact, mask, length_short, length_long) = description |
| |
| # Convenience |
| if length_long is None: |
| length_long = length_short |
| |
| self.exact = exact |
| self.mask = mask |
| self.length_short = length_short |
| self.extensible = length_short != length_long |
| |
| if self.extensible: |
| assert(length_long == length_short + (4 if length_short > 8 else 2)) |
| |
| def op(name, encoding_32, dests = 1, srcs = 0, imms = [], is_float = False, |
| can_eliminate = True, encoding_16 = None): |
| encoding_16 = Encoding(encoding_16) if encoding_16 is not None else None |
| encoding_32 = Encoding(encoding_32) if encoding_32 is not None else None |
| |
| opcodes[name] = Opcode(name, dests, srcs, imms, is_float, can_eliminate, encoding_16, encoding_32) |
| |
| def immediate(name, ctype = "uint32_t"): |
| imm = Immediate(name, ctype) |
| immediates[name] = imm |
| return imm |
| |
| def enum(name, value_dict): |
| enums[name] = value_dict |
| return immediate(name, "enum agx_" + name) |
| |
| L = (1 << 15) |
| _ = None |
| |
| FORMAT = immediate("format", "enum agx_format") |
| IMM = immediate("imm") |
| WRITEOUT = immediate("writeout") |
| INDEX = immediate("index") |
| COMPONENT = immediate("component") |
| CHANNELS = immediate("channels") |
| TRUTH_TABLE = immediate("truth_table") |
| ROUND = immediate("round", "enum agx_round") |
| SHIFT = immediate("shift") |
| MASK = immediate("mask") |
| BFI_MASK = immediate("bfi_mask") |
| LOD_MODE = immediate("lod_mode", "enum agx_lod_mode") |
| |
| DIM = enum("dim", { |
| 0: '1d', |
| 1: '1d_array', |
| 2: '2d', |
| 3: '2d_array', |
| 4: '2d_ms', |
| 5: '3d', |
| 6: 'cube', |
| 7: 'cube_array' |
| }) |
| |
| OFFSET = immediate("offset", "bool") |
| SHADOW = immediate("shadow", "bool") |
| SCOREBOARD = immediate("scoreboard") |
| ICOND = immediate("icond", "enum agx_icond") |
| FCOND = immediate("fcond", "enum agx_fcond") |
| NEST = immediate("nest") |
| INVERT_COND = immediate("invert_cond") |
| NEST = immediate("nest") |
| TARGET = immediate("target", "agx_block *") |
| PERSPECTIVE = immediate("perspective", "bool") |
| SR = enum("sr", { |
| 0: 'threadgroup_position_in_grid.x', |
| 1: 'threadgroup_position_in_grid.y', |
| 2: 'threadgroup_position_in_grid.z', |
| 4: 'threads_per_threadgroup.x', |
| 5: 'threads_per_threadgroup.y', |
| 6: 'threads_per_threadgroup.z', |
| 8: 'dispatch_threads_per_threadgroup.x', |
| 9: 'dispatch_threads_per_threadgroup.y', |
| 10: 'dispatch_threads_per_threadgroup.z', |
| 48: 'thread_position_in_threadgroup.x', |
| 49: 'thread_position_in_threadgroup.y', |
| 50: 'thread_position_in_threadgroup.z', |
| 51: 'thread_index_in_threadgroup', |
| 52: 'thread_index_in_subgroup', |
| 53: 'subgroup_index_in_threadgroup', |
| 56: 'active_thread_index_in_quad', |
| 58: 'active_thread_index_in_subgroup', |
| 62: 'backfacing', |
| 80: 'thread_position_in_grid.x', |
| 81: 'thread_position_in_grid.y', |
| 82: 'thread_position_in_grid.z', |
| }) |
| |
| FUNOP = lambda x: (x << 28) |
| FUNOP_MASK = FUNOP((1 << 14) - 1) |
| |
| def funop(name, opcode): |
| op(name, (0x0A | L | (opcode << 28), |
| 0x3F | L | (((1 << 14) - 1) << 28), 6, _), |
| srcs = 1, is_float = True) |
| |
| # Listing of opcodes |
| funop("floor", 0b000000) |
| funop("srsqrt", 0b000001) |
| funop("dfdx", 0b000100) |
| funop("dfdy", 0b000110) |
| funop("rcp", 0b001000) |
| funop("rsqrt", 0b001001) |
| funop("sin_pt_1", 0b001010) |
| funop("log2", 0b001100) |
| funop("exp2", 0b001101) |
| funop("sin_pt_2", 0b001110) |
| funop("ceil", 0b010000) |
| funop("trunc", 0b100000) |
| funop("roundeven", 0b110000) |
| |
| op("fadd", |
| encoding_16 = (0x26 | L, 0x3F | L, 6, _), |
| encoding_32 = (0x2A | L, 0x3F | L, 6, _), |
| srcs = 2, is_float = True) |
| |
| op("fma", |
| encoding_16 = (0x36, 0x3F, 6, 8), |
| encoding_32 = (0x3A, 0x3F, 6, 8), |
| srcs = 3, is_float = True) |
| |
| op("fmul", |
| encoding_16 = ((0x16 | L), (0x3F | L), 6, _), |
| encoding_32 = ((0x1A | L), (0x3F | L), 6, _), |
| srcs = 2, is_float = True) |
| |
| op("mov_imm", |
| encoding_32 = (0x62, 0xFF, 6, 8), |
| encoding_16 = (0x62, 0xFF, 4, 6), |
| imms = [IMM]) |
| |
| op("iadd", |
| encoding_32 = (0x0E, 0x3F | L, 8, _), |
| srcs = 2, imms = [SHIFT]) |
| |
| op("imad", |
| encoding_32 = (0x1E, 0x3F | L, 8, _), |
| srcs = 3, imms = [SHIFT]) |
| |
| op("bfi", |
| encoding_32 = (0x2E, 0x7F | (0x3 << 26), 8, _), |
| srcs = 3, imms = [BFI_MASK]) |
| |
| op("bfeil", |
| encoding_32 = (0x2E | L, 0x7F | L | (0x3 << 26), 8, _), |
| srcs = 3, imms = [BFI_MASK]) |
| |
| op("asr", |
| encoding_32 = (0x2E | L | (0x1 << 26), 0x7F | L | (0x3 << 26), 8, _), |
| srcs = 2) |
| |
| op("icmpsel", |
| encoding_32 = (0x12, 0x7F, 8, 10), |
| srcs = 4, imms = [ICOND]) |
| |
| op("fcmpsel", |
| encoding_32 = (0x02, 0x7F, 8, 10), |
| srcs = 4, imms = [FCOND]) |
| |
| # sources are coordinates, LOD, texture, sampler, shadow/offset |
| # TODO: anything else? |
| op("texture_sample", |
| encoding_32 = (0x31, 0x7F, 8, 10), # XXX WRONG SIZE |
| srcs = 5, imms = [DIM, LOD_MODE, MASK, SCOREBOARD, OFFSET, SHADOW]) |
| op("texture_load", |
| encoding_32 = (0x71, 0x7F, 8, 10), # XXX WRONG SIZE |
| srcs = 5, imms = [DIM, LOD_MODE, MASK, SCOREBOARD, OFFSET]) |
| |
| # sources are base, index |
| op("device_load", |
| encoding_32 = (0x05, 0x7F, 6, 8), |
| srcs = 2, imms = [FORMAT, MASK, SCOREBOARD]) |
| |
| # sources are value, index |
| # TODO: Consider permitting the short form |
| op("uniform_store", |
| encoding_32 = ((0b111 << 27) | 0b1000101 | (1 << 47), 0, 8, _), |
| dests = 0, srcs = 2, can_eliminate = False) |
| |
| op("wait", (0x38, 0xFF, 2, _), dests = 0, |
| can_eliminate = False, imms = [SCOREBOARD]) |
| |
| op("get_sr", (0x72, 0x7F | L, 4, _), dests = 1, imms = [SR]) |
| |
| op("sample_mask", (0x7fc1, 0xffff, 6, _), dests = 0, srcs = 1, can_eliminate = False) |
| |
| # Essentially same encoding |
| op("ld_tile", (0x49, 0x7F, 8, _), dests = 1, srcs = 0, imms = [FORMAT, MASK]) |
| |
| op("st_tile", (0x09, 0x7F, 8, _), dests = 0, srcs = 1, |
| can_eliminate = False, imms = [FORMAT, MASK]) |
| |
| for (name, exact) in [("any", 0xC000), ("none", 0xC200)]: |
| op("jmp_exec_" + name, (exact, (1 << 16) - 1, 6, _), dests = 0, srcs = 0, |
| can_eliminate = False, imms = [TARGET]) |
| |
| # TODO: model implicit r0l destinations |
| op("pop_exec", (0x52 | (0x3 << 9), ((1 << 48) - 1) ^ (0x3 << 7) ^ (0x3 << 11), 6, _), |
| dests = 0, srcs = 0, can_eliminate = False, imms = [NEST]) |
| |
| for is_float in [False, True]: |
| mod_mask = 0 if is_float else (0x3 << 26) | (0x3 << 38) |
| |
| for (cf, cf_op) in [("if", 0), ("else", 1), ("while", 2)]: |
| name = "{}_{}cmp".format(cf, "f" if is_float else "i") |
| exact = 0x42 | (0x0 if is_float else 0x10) | (cf_op << 9) |
| mask = 0x7F | (0x3 << 9) | mod_mask | (0x3 << 44) |
| imms = [NEST, FCOND if is_float else ICOND, INVERT_COND] |
| |
| op(name, (exact, mask, 6, _), dests = 0, srcs = 2, can_eliminate = False, |
| imms = imms, is_float = is_float) |
| |
| op("bitop", (0x7E, 0x7F, 6, _), srcs = 2, imms = [TRUTH_TABLE]) |
| op("convert", (0x3E | L, 0x7F | L | (0x3 << 38), 6, _), srcs = 2, imms = [ROUND]) |
| op("iter", (0x21, 0xBF, 8, _), srcs = 2, imms = [CHANNELS, PERSPECTIVE]) |
| op("ldcf", (0xA1, 0xBF, 8, _), srcs = 1, imms = [CHANNELS]) |
| op("st_vary", None, dests = 0, srcs = 2, can_eliminate = False) |
| op("stop", (0x88, 0xFFFF, 2, _), dests = 0, can_eliminate = False) |
| op("trap", (0x08, 0xFFFF, 2, _), dests = 0, can_eliminate = False) |
| op("writeout", (0x48, 0xFF, 4, _), dests = 0, imms = [WRITEOUT], can_eliminate = False) |
| |
| # Convenient aliases. |
| op("mov", _, srcs = 1) |
| op("not", _, srcs = 1) |
| op("xor", _, srcs = 2) |
| op("and", _, srcs = 2) |
| op("or", _, srcs = 2) |
| |
| # Indicates the logical end of the block, before final branches/control flow |
| op("logical_end", _, dests = 0, srcs = 0, can_eliminate = False) |
| |
| op("collect", _, srcs = VARIABLE) |
| op("split", _, srcs = 1, dests = VARIABLE) |
| op("phi", _, srcs = VARIABLE) |
| |
| op("unit_test", _, dests = 0, srcs = 1, can_eliminate = False) |
| |
| # Like mov, but takes a register and can only appear at the start. Gauranteed |
| # to be coalesced during RA, rather than lowered to a real move. |
| op("preload", _, srcs = 1) |
| |
| # Set the nesting counter. Lowers to mov r0l, x after RA. |
| op("nest", _, dests = 0, srcs = 1, can_eliminate = False) |