| <?xml version="1.0" encoding="UTF-8"?> |
| <database xmlns="http://nouveau.freedesktop.org/" |
| xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
| xsi:schemaLocation="http://nouveau.freedesktop.org/ rules-ng.xsd"> |
| |
| <enum name="vgt_event_type" varset="chip"> |
| <value name="VS_DEALLOC" value="0"/> |
| <value name="PS_DEALLOC" value="1"/> |
| <value name="VS_DONE_TS" value="2"/> |
| <value name="PS_DONE_TS" value="3"/> |
| <value name="CACHE_FLUSH_TS" value="4"/> |
| <value name="CONTEXT_DONE" value="5"/> |
| <value name="CACHE_FLUSH" value="6"/> |
| <value name="VIZQUERY_START" value="7" variants="A2XX"/> |
| <value name="HLSQ_FLUSH" value="7" variants="A3XX-A4XX"/> |
| <value name="VIZQUERY_END" value="8" variants="A2XX"/> |
| <value name="SC_WAIT_WC" value="9" variants="A2XX"/> |
| <value name="WRITE_PRIMITIVE_COUNTS" value="9" variants="A6XX"/> |
| <value name="START_PRIMITIVE_CTRS" value="11" variants="A6XX"/> |
| <value name="STOP_PRIMITIVE_CTRS" value="12" variants="A6XX"/> |
| <!-- Not sure that these 4 events don't have the same meaning as on A5XX+ --> |
| <value name="RST_PIX_CNT" value="13" variants="A2XX-A4XX"/> |
| <value name="RST_VTX_CNT" value="14" variants="A2XX-A4XX"/> |
| <value name="TILE_FLUSH" value="15" variants="A2XX-A4XX"/> |
| <value name="STAT_EVENT" value="16" variants="A2XX-A4XX"/> |
| <value name="CACHE_FLUSH_AND_INV_TS_EVENT" value="20" variants="A2XX-A4XX"/> |
| <value name="ZPASS_DONE" value="21"/> |
| <value name="CACHE_FLUSH_AND_INV_EVENT" value="22" variants="A2XX"/> |
| <value name="RB_DONE_TS" value="22" variants="A3XX-"/> |
| <value name="PERFCOUNTER_START" value="23" variants="A2XX-A4XX"/> |
| <value name="PERFCOUNTER_STOP" value="24" variants="A2XX-A4XX"/> |
| <value name="VS_FETCH_DONE" value="27"/> |
| <value name="FACENESS_FLUSH" value="28" variants="A2XX-A4XX"/> |
| |
| <!-- a5xx events --> |
| <value name="WT_DONE_TS" value="8" variants="A5XX-"/> |
| <value name="START_FRAGMENT_CTRS" value="13" variants="A5XX-"/> |
| <value name="STOP_FRAGMENT_CTRS" value="14" variants="A5XX-"/> |
| <value name="START_COMPUTE_CTRS" value="15" variants="A5XX-"/> |
| <value name="STOP_COMPUTE_CTRS" value="16" variants="A5XX-"/> |
| <value name="FLUSH_SO_0" value="17" variants="A5XX-"/> |
| <value name="FLUSH_SO_1" value="18" variants="A5XX-"/> |
| <value name="FLUSH_SO_2" value="19" variants="A5XX-"/> |
| <value name="FLUSH_SO_3" value="20" variants="A5XX-"/> |
| <value name="PC_CCU_INVALIDATE_DEPTH" value="24" variants="A5XX-"/> |
| <value name="PC_CCU_INVALIDATE_COLOR" value="25" variants="A5XX-"/> |
| <value name="PC_CCU_RESOLVE_TS" value="26" variants="A6XX"/> |
| <value name="PC_CCU_FLUSH_DEPTH_TS" value="28" variants="A5XX-"/> |
| <value name="PC_CCU_FLUSH_COLOR_TS" value="29" variants="A5XX-"/> |
| <value name="BLIT" value="30" variants="A5XX-"/> |
| <doc> |
| Clears based on GRAS_LRZ_CNTL configuration, could clear |
| fast-clear buffer or LRZ direction. |
| LRZ direction is stored at lrz_fc_offset + 0x200, has 1 byte which |
| could be expressed by enum: |
| CUR_DIR_DISABLED = 0x0 |
| CUR_DIR_GE = 0x1 |
| CUR_DIR_LE = 0x2 |
| CUR_DIR_UNSET = 0x3 |
| Clear of direction means setting the direction to CUR_DIR_UNSET. |
| </doc> |
| <value name="LRZ_CLEAR" value="37" variants="A5XX-"/> |
| <value name="LRZ_FLUSH" value="38" variants="A5XX-"/> |
| <value name="BLIT_OP_FILL_2D" value="39" variants="A5XX-"/> |
| <value name="BLIT_OP_COPY_2D" value="40" variants="A5XX-"/> |
| <value name="BLIT_OP_SCALE_2D" value="42" variants="A5XX-"/> |
| <value name="CONTEXT_DONE_2D" value="43" variants="A5XX-"/> |
| <value name="UNK_2C" value="44" variants="A5XX-"/> |
| <value name="UNK_2D" value="45" variants="A5XX-"/> |
| |
| <!-- a6xx events --> |
| <value name="CACHE_INVALIDATE" value="49" variants="A6XX"/> |
| <value name="LABEL" value="63" variants="A6XX-"/> |
| |
| <!-- note, some of these are the same as a6xx, just named differently --> |
| <value name="CCU_INVALIDATE_DEPTH" value="24" variants="A7XX"/> |
| <value name="CCU_INVALIDATE_COLOR" value="25" variants="A7XX"/> |
| <value name="CCU_RESOLVE_CLEAN" value="26" variants="A7XX"/> |
| <value name="CCU_FLUSH_DEPTH" value="28" variants="A7XX"/> |
| <value name="CCU_FLUSH_COLOR" value="29" variants="A7XX"/> |
| <value name="CCU_RESOLVE" value="30" variants="A7XX"/> |
| <value name="CCU_END_RESOLVE_GROUP" value="31" variants="A7XX"/> |
| <value name="CCU_CLEAN_DEPTH" value="32" variants="A7XX"/> |
| <value name="CCU_CLEAN_COLOR" value="33" variants="A7XX"/> |
| <value name="CACHE_RESET" value="48" variants="A7XX"/> |
| <value name="CACHE_CLEAN" value="49" variants="A7XX"/> |
| <!-- TODO: deal with name conflicts with other gens --> |
| <value name="CACHE_FLUSH7" value="50" variants="A7XX"/> |
| <value name="CACHE_INVALIDATE7" value="51" variants="A7XX"/> |
| </enum> |
| |
| <enum name="pc_di_primtype"> |
| <value name="DI_PT_NONE" value="0"/> |
| <!-- POINTLIST_PSIZE is used on a3xx/a4xx when gl_PointSize is written: --> |
| <value name="DI_PT_POINTLIST_PSIZE" value="1"/> |
| <value name="DI_PT_LINELIST" value="2"/> |
| <value name="DI_PT_LINESTRIP" value="3"/> |
| <value name="DI_PT_TRILIST" value="4"/> |
| <value name="DI_PT_TRIFAN" value="5"/> |
| <value name="DI_PT_TRISTRIP" value="6"/> |
| <value name="DI_PT_LINELOOP" value="7"/> <!-- a22x, a3xx --> |
| <value name="DI_PT_RECTLIST" value="8"/> |
| <value name="DI_PT_POINTLIST" value="9"/> |
| <value name="DI_PT_LINE_ADJ" value="0xa"/> |
| <value name="DI_PT_LINESTRIP_ADJ" value="0xb"/> |
| <value name="DI_PT_TRI_ADJ" value="0xc"/> |
| <value name="DI_PT_TRISTRIP_ADJ" value="0xd"/> |
| |
| <value name="DI_PT_PATCHES0" value="0x1f"/> |
| <value name="DI_PT_PATCHES1" value="0x20"/> |
| <value name="DI_PT_PATCHES2" value="0x21"/> |
| <value name="DI_PT_PATCHES3" value="0x22"/> |
| <value name="DI_PT_PATCHES4" value="0x23"/> |
| <value name="DI_PT_PATCHES5" value="0x24"/> |
| <value name="DI_PT_PATCHES6" value="0x25"/> |
| <value name="DI_PT_PATCHES7" value="0x26"/> |
| <value name="DI_PT_PATCHES8" value="0x27"/> |
| <value name="DI_PT_PATCHES9" value="0x28"/> |
| <value name="DI_PT_PATCHES10" value="0x29"/> |
| <value name="DI_PT_PATCHES11" value="0x2a"/> |
| <value name="DI_PT_PATCHES12" value="0x2b"/> |
| <value name="DI_PT_PATCHES13" value="0x2c"/> |
| <value name="DI_PT_PATCHES14" value="0x2d"/> |
| <value name="DI_PT_PATCHES15" value="0x2e"/> |
| <value name="DI_PT_PATCHES16" value="0x2f"/> |
| <value name="DI_PT_PATCHES17" value="0x30"/> |
| <value name="DI_PT_PATCHES18" value="0x31"/> |
| <value name="DI_PT_PATCHES19" value="0x32"/> |
| <value name="DI_PT_PATCHES20" value="0x33"/> |
| <value name="DI_PT_PATCHES21" value="0x34"/> |
| <value name="DI_PT_PATCHES22" value="0x35"/> |
| <value name="DI_PT_PATCHES23" value="0x36"/> |
| <value name="DI_PT_PATCHES24" value="0x37"/> |
| <value name="DI_PT_PATCHES25" value="0x38"/> |
| <value name="DI_PT_PATCHES26" value="0x39"/> |
| <value name="DI_PT_PATCHES27" value="0x3a"/> |
| <value name="DI_PT_PATCHES28" value="0x3b"/> |
| <value name="DI_PT_PATCHES29" value="0x3c"/> |
| <value name="DI_PT_PATCHES30" value="0x3d"/> |
| <value name="DI_PT_PATCHES31" value="0x3e"/> |
| </enum> |
| |
| <enum name="pc_di_src_sel"> |
| <value name="DI_SRC_SEL_DMA" value="0"/> |
| <value name="DI_SRC_SEL_IMMEDIATE" value="1"/> |
| <value name="DI_SRC_SEL_AUTO_INDEX" value="2"/> |
| <value name="DI_SRC_SEL_AUTO_XFB" value="3"/> |
| </enum> |
| |
| <enum name="pc_di_face_cull_sel"> |
| <value name="DI_FACE_CULL_NONE" value="0"/> |
| <value name="DI_FACE_CULL_FETCH" value="1"/> |
| <value name="DI_FACE_BACKFACE_CULL" value="2"/> |
| <value name="DI_FACE_FRONTFACE_CULL" value="3"/> |
| </enum> |
| |
| <enum name="pc_di_index_size"> |
| <value name="INDEX_SIZE_IGN" value="0"/> |
| <value name="INDEX_SIZE_16_BIT" value="0"/> |
| <value name="INDEX_SIZE_32_BIT" value="1"/> |
| <value name="INDEX_SIZE_8_BIT" value="2"/> |
| <value name="INDEX_SIZE_INVALID"/> |
| </enum> |
| |
| <enum name="pc_di_vis_cull_mode"> |
| <value name="IGNORE_VISIBILITY" value="0"/> |
| <value name="USE_VISIBILITY" value="1"/> |
| </enum> |
| |
| <enum name="adreno_pm4_packet_type"> |
| <value name="CP_TYPE0_PKT" value="0x00000000"/> |
| <value name="CP_TYPE1_PKT" value="0x40000000"/> |
| <value name="CP_TYPE2_PKT" value="0x80000000"/> |
| <value name="CP_TYPE3_PKT" value="0xc0000000"/> |
| <value name="CP_TYPE4_PKT" value="0x40000000"/> |
| <value name="CP_TYPE7_PKT" value="0x70000000"/> |
| </enum> |
| |
| <!-- |
| Note that in some cases, the same packet id is recycled on a later |
| generation, so variants attribute is used to distinguish. They |
| may not be completely accurate, we would probably have to analyze |
| the pfp and me/pm4 firmware to verify the packet is actually |
| handled on a particular generation. But it is at least enough to |
| disambiguate the packet-id's that were re-used for different |
| packets starting with a5xx. |
| --> |
| <enum name="adreno_pm4_type3_packets" varset="chip"> |
| <doc>initialize CP's micro-engine</doc> |
| <value name="CP_ME_INIT" value="0x48"/> |
| <doc>skip N 32-bit words to get to the next packet</doc> |
| <value name="CP_NOP" value="0x10"/> |
| <doc> |
| indirect buffer dispatch. prefetch parser uses this packet |
| type to determine whether to pre-fetch the IB |
| </doc> |
| <value name="CP_PREEMPT_ENABLE" value="0x1c"/> |
| <value name="CP_PREEMPT_TOKEN" value="0x1e"/> |
| <value name="CP_INDIRECT_BUFFER" value="0x3f"/> |
| <doc> |
| Takes the same arguments as CP_INDIRECT_BUFFER, but jumps to |
| another buffer at the same level. Must be at the end of IB, and |
| doesn't work with draw state IB's. |
| </doc> |
| <value name="CP_INDIRECT_BUFFER_CHAIN" value="0x57" variants="A5XX-"/> |
| <doc>indirect buffer dispatch. same as IB, but init is pipelined</doc> |
| <value name="CP_INDIRECT_BUFFER_PFD" value="0x37"/> |
| <doc>wait for the IDLE state of the engine</doc> |
| <value name="CP_WAIT_FOR_IDLE" value="0x26"/> |
| <doc>wait until a register or memory location is a specific value</doc> |
| <value name="CP_WAIT_REG_MEM" value="0x3c"/> |
| <doc>wait until a register location is equal to a specific value</doc> |
| <value name="CP_WAIT_REG_EQ" value="0x52"/> |
| <doc>wait until a register location is >= a specific value</doc> |
| <value name="CP_WAIT_REG_GTE" value="0x53" variants="A2XX-A4XX"/> |
| <doc>wait until a read completes</doc> |
| <value name="CP_WAIT_UNTIL_READ" value="0x5c" variants="A2XX-A4XX"/> |
| <doc>wait until all base/size writes from an IB_PFD packet have completed</doc> |
| <value name="CP_WAIT_IB_PFD_COMPLETE" value="0x5d"/> |
| <doc>register read/modify/write</doc> |
| <value name="CP_REG_RMW" value="0x21"/> |
| <doc>Set binning configuration registers</doc> |
| <value name="CP_SET_BIN_DATA" value="0x2f" variants="A2XX-A4XX"/> |
| <value name="CP_SET_BIN_DATA5" value="0x2f" variants="A5XX-"/> |
| <doc>reads register in chip and writes to memory</doc> |
| <value name="CP_REG_TO_MEM" value="0x3e"/> |
| <doc>write N 32-bit words to memory</doc> |
| <value name="CP_MEM_WRITE" value="0x3d"/> |
| <doc>write CP_PROG_COUNTER value to memory</doc> |
| <value name="CP_MEM_WRITE_CNTR" value="0x4f"/> |
| <doc>conditional execution of a sequence of packets</doc> |
| <value name="CP_COND_EXEC" value="0x44"/> |
| <doc>conditional write to memory or register</doc> |
| <value name="CP_COND_WRITE" value="0x45" variants="A2XX-A4XX"/> |
| <value name="CP_COND_WRITE5" value="0x45" variants="A5XX-"/> |
| <doc>generate an event that creates a write to memory when completed</doc> |
| <value name="CP_EVENT_WRITE" value="0x46"/> |
| <doc>generate a VS|PS_done event</doc> |
| <value name="CP_EVENT_WRITE_SHD" value="0x58"/> |
| <doc>generate a cache flush done event</doc> |
| <value name="CP_EVENT_WRITE_CFL" value="0x59"/> |
| <doc>generate a z_pass done event</doc> |
| <value name="CP_EVENT_WRITE_ZPD" value="0x5b"/> |
| <doc> |
| not sure the real name, but this seems to be what is used for |
| opencl, instead of CP_DRAW_INDX.. |
| </doc> |
| <value name="CP_RUN_OPENCL" value="0x31"/> |
| <doc>initiate fetch of index buffer and draw</doc> |
| <value name="CP_DRAW_INDX" value="0x22"/> |
| <doc>draw using supplied indices in packet</doc> |
| <value name="CP_DRAW_INDX_2" value="0x36" variants="A2XX-A4XX"/> <!-- this is something different on a6xx and unused on a5xx --> |
| <doc>initiate fetch of index buffer and binIDs and draw</doc> |
| <value name="CP_DRAW_INDX_BIN" value="0x34" variants="A2XX-A4XX"/> |
| <doc>initiate fetch of bin IDs and draw using supplied indices</doc> |
| <value name="CP_DRAW_INDX_2_BIN" value="0x35" variants="A2XX-A4XX"/> |
| <doc>begin/end initiator for viz query extent processing</doc> |
| <value name="CP_VIZ_QUERY" value="0x23" variants="A2XX-A4XX"/> |
| <doc>fetch state sub-blocks and initiate shader code DMAs</doc> |
| <value name="CP_SET_STATE" value="0x25"/> |
| <doc>load constant into chip and to memory</doc> |
| <value name="CP_SET_CONSTANT" value="0x2d"/> |
| <doc>load sequencer instruction memory (pointer-based)</doc> |
| <value name="CP_IM_LOAD" value="0x27"/> |
| <doc>load sequencer instruction memory (code embedded in packet)</doc> |
| <value name="CP_IM_LOAD_IMMEDIATE" value="0x2b"/> |
| <doc>load constants from a location in memory</doc> |
| <value name="CP_LOAD_CONSTANT_CONTEXT" value="0x2e" variants="A2XX"/> |
| <doc>selective invalidation of state pointers</doc> |
| <value name="CP_INVALIDATE_STATE" value="0x3b"/> |
| <doc>dynamically changes shader instruction memory partition</doc> |
| <value name="CP_SET_SHADER_BASES" value="0x4a" variants="A2XX-A4XX"/> |
| <doc>sets the 64-bit BIN_MASK register in the PFP</doc> |
| <value name="CP_SET_BIN_MASK" value="0x50" variants="A2XX-A4XX"/> |
| <doc>sets the 64-bit BIN_SELECT register in the PFP</doc> |
| <value name="CP_SET_BIN_SELECT" value="0x51" variants="A2XX-A4XX"/> |
| <doc>updates the current context, if needed</doc> |
| <value name="CP_CONTEXT_UPDATE" value="0x5e"/> |
| <doc>generate interrupt from the command stream</doc> |
| <value name="CP_INTERRUPT" value="0x40"/> |
| <doc>copy sequencer instruction memory to system memory</doc> |
| <value name="CP_IM_STORE" value="0x2c" variants="A2XX"/> |
| |
| <!-- For a20x --> |
| <!-- TODO handle variants.. |
| <doc> |
| Program an offset that will added to the BIN_BASE value of |
| the 3D_DRAW_INDX_BIN packet |
| </doc> |
| <value name="CP_SET_BIN_BASE_OFFSET" value="0x4b"/> |
| --> |
| |
| <!-- for a22x --> |
| <doc> |
| sets draw initiator flags register in PFP, gets bitwise-ORed into |
| every draw initiator |
| </doc> |
| <value name="CP_SET_DRAW_INIT_FLAGS" value="0x4b"/> |
| <doc>sets the register protection mode</doc> |
| <value name="CP_SET_PROTECTED_MODE" value="0x5f"/> |
| |
| <value name="CP_BOOTSTRAP_UCODE" value="0x6f"/> |
| |
| <!-- for a3xx --> |
| <doc>load high level sequencer command</doc> |
| <value name="CP_LOAD_STATE" value="0x30" variants="A3XX"/> |
| <value name="CP_LOAD_STATE4" value="0x30" variants="A4XX-A5XX"/> |
| <doc>Conditionally load a IB based on a flag, prefetch enabled</doc> |
| <value name="CP_COND_INDIRECT_BUFFER_PFE" value="0x3a"/> |
| <doc>Conditionally load a IB based on a flag, prefetch disabled</doc> |
| <value name="CP_COND_INDIRECT_BUFFER_PFD" value="0x32" variants="A3XX"/> |
| <doc>Load a buffer with pre-fetch enabled</doc> |
| <value name="CP_INDIRECT_BUFFER_PFE" value="0x3f" variants="A5XX"/> |
| <doc>Set bin (?)</doc> |
| <value name="CP_SET_BIN" value="0x4c" variants="A2XX"/> |
| |
| <doc>test 2 memory locations to dword values specified</doc> |
| <value name="CP_TEST_TWO_MEMS" value="0x71"/> |
| |
| <doc>Write register, ignoring context state for context sensitive registers</doc> |
| <value name="CP_REG_WR_NO_CTXT" value="0x78"/> |
| |
| <doc>Record the real-time when this packet is processed by PFP</doc> |
| <value name="CP_RECORD_PFP_TIMESTAMP" value="0x11"/> |
| |
| <!-- Used to switch GPU between secure and non-secure modes --> |
| <value name="CP_SET_SECURE_MODE" value="0x66"/> |
| |
| <doc>PFP waits until the FIFO between the PFP and the ME is empty</doc> |
| <value name="CP_WAIT_FOR_ME" value="0x13"/> |
| |
| <!-- for a4xx --> |
| <doc> |
| Used a bit like CP_SET_CONSTANT on a2xx, but can write multiple |
| groups of registers. Looks like it can be used to create state |
| objects in GPU memory, and on state change only emit pointer |
| (via CP_SET_DRAW_STATE), which should be nice for reducing CPU |
| overhead: |
| |
| (A4x) save PM4 stream pointers to execute upon a visible draw |
| </doc> |
| <value name="CP_SET_DRAW_STATE" value="0x43" variants="A4XX-"/> |
| <value name="CP_DRAW_INDX_OFFSET" value="0x38"/> |
| <value name="CP_DRAW_INDIRECT" value="0x28" variants="A4XX-"/> |
| <value name="CP_DRAW_INDX_INDIRECT" value="0x29" variants="A4XX-"/> |
| <value name="CP_DRAW_INDIRECT_MULTI" value="0x2a" variants="A6XX"/> |
| <value name="CP_DRAW_AUTO" value="0x24"/> |
| |
| <doc> |
| Enable or disable predication globally. Also resets the |
| predicate to "passing" and the local bit to enabled when |
| enabling global predication. |
| </doc> |
| <value name="CP_DRAW_PRED_ENABLE_GLOBAL" value="0x19"/> |
| |
| <doc> |
| Enable or disable predication locally. Unlike globally enabling |
| predication, this packet doesn't touch any other state. |
| Predication only happens when enabled globally and locally and a |
| predicate has been set. This should be used for internal draws |
| which aren't supposed to use the predication state: |
| |
| CP_DRAW_PRED_ENABLE_LOCAL(0) |
| ... do draw... |
| CP_DRAW_PRED_ENABLE_LOCAL(1) |
| </doc> |
| <value name="CP_DRAW_PRED_ENABLE_LOCAL" value="0x1a"/> |
| |
| <doc> |
| Latch a draw predicate into the internal register. |
| </doc> |
| <value name="CP_DRAW_PRED_SET" value="0x4e"/> |
| |
| <doc> |
| for A4xx |
| Write to register with address that does not fit into type-0 pkt |
| </doc> |
| <value name="CP_WIDE_REG_WRITE" value="0x74" variants="A4XX"/> |
| |
| <doc>copy from ME scratch RAM to a register</doc> |
| <value name="CP_SCRATCH_TO_REG" value="0x4d"/> |
| |
| <doc>Copy from REG to ME scratch RAM</doc> |
| <value name="CP_REG_TO_SCRATCH" value="0x4a"/> |
| |
| <doc>Wait for memory writes to complete</doc> |
| <value name="CP_WAIT_MEM_WRITES" value="0x12"/> |
| |
| <doc>Conditional execution based on register comparison</doc> |
| <value name="CP_COND_REG_EXEC" value="0x47"/> |
| |
| <doc>Memory to REG copy</doc> |
| <value name="CP_MEM_TO_REG" value="0x42"/> |
| |
| <value name="CP_EXEC_CS_INDIRECT" value="0x41" variants="A4XX-"/> |
| <value name="CP_EXEC_CS" value="0x33"/> |
| |
| <doc> |
| for a5xx |
| </doc> |
| <value name="CP_PERFCOUNTER_ACTION" value="0x50" variants="A5XX"/> |
| <!-- switches SMMU pagetable, used on a5xx+ only --> |
| <value name="CP_SMMU_TABLE_UPDATE" value="0x53" variants="A5XX-"/> |
| <!-- for a6xx --> |
| <doc>Tells CP the current mode of GPU operation</doc> |
| <value name="CP_SET_MARKER" value="0x65" variants="A6XX"/> |
| <doc>Instruct CP to set a few internal CP registers</doc> |
| <value name="CP_SET_PSEUDO_REG" value="0x56" variants="A6XX"/> |
| <!-- |
| pairs of regid and value.. seems to be used to program some TF |
| related regs: |
| --> |
| <value name="CP_CONTEXT_REG_BUNCH" value="0x5c" variants="A5XX-"/> |
| <!-- A5XX Enable yield in RB only --> |
| <value name="CP_YIELD_ENABLE" value="0x1c" variants="A5XX"/> |
| <doc> |
| Enables IB2 skipping. If both GLOBAL and LOCAL are 1 and |
| nothing is left in the visibility stream, then |
| CP_INDIRECT_BUFFER will be skipped, and draws will early return |
| from their IB. |
| </doc> |
| <value name="CP_SKIP_IB2_ENABLE_GLOBAL" value="0x1d" variants="A5XX-"/> |
| <value name="CP_SKIP_IB2_ENABLE_LOCAL" value="0x23" variants="A5XX-"/> |
| <value name="CP_SET_SUBDRAW_SIZE" value="0x35" variants="A5XX-"/> |
| <value name="CP_WHERE_AM_I" value="0x62" variants="A5XX-"/> |
| <value name="CP_SET_VISIBILITY_OVERRIDE" value="0x64" variants="A5XX-"/> |
| <!-- Enable/Disable/Defer A5x global preemption model --> |
| <value name="CP_PREEMPT_ENABLE_GLOBAL" value="0x69" variants="A5XX"/> |
| <!-- Enable/Disable A5x local preemption model --> |
| <value name="CP_PREEMPT_ENABLE_LOCAL" value="0x6a" variants="A5XX"/> |
| <!-- Yield token on a5xx similar to CP_PREEMPT on a4xx --> |
| <value name="CP_CONTEXT_SWITCH_YIELD" value="0x6b" variants="A5XX"/> |
| <!-- Inform CP about current render mode (needed for a5xx preemption) --> |
| <value name="CP_SET_RENDER_MODE" value="0x6c" variants="A5XX"/> |
| <value name="CP_COMPUTE_CHECKPOINT" value="0x6e" variants="A5XX"/> |
| <!-- check if this works on earlier.. --> |
| <value name="CP_MEM_TO_MEM" value="0x73" variants="A5XX-"/> |
| <value name="CP_BLIT" value="0x2c" variants="A5XX-"/> |
| |
| <!-- Test specified bit in specified register and set predicate --> |
| <value name="CP_REG_TEST" value="0x39" variants="A5XX-"/> |
| |
| <!-- |
| Seems to set the mode flags which control which CP_SET_DRAW_STATE |
| packets are executed, based on their ENABLE_MASK values |
| |
| CP_SET_MODE w/ payload of 0x1 seems to cause CP_SET_DRAW_STATE |
| packets w/ ENABLE_MASK & 0x6 to execute immediately |
| --> |
| <value name="CP_SET_MODE" value="0x63" variants="A6XX"/> |
| |
| <!-- |
| Seems like there are now separate blocks of state for VS vs FS/CS |
| (probably these amounts to geometry vs fragments so that geometry |
| stage of the pipeline for next draw can start while fragment stage |
| of current draw is still running. The format of the payload of the |
| packets is the same, the only difference is the offsets of the regs |
| the firmware code that handles the packet writes. |
| |
| Note that for CL, starting with a6xx, the preferred # of local |
| threads is no longer the same as the max, implying that the shader |
| core can now run warps from unrelated shaders (ie. |
| CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE vs |
| CL_KERNEL_WORK_GROUP_SIZE) |
| --> |
| <value name="CP_LOAD_STATE6_GEOM" value="0x32" variants="A6XX"/> |
| <value name="CP_LOAD_STATE6_FRAG" value="0x34" variants="A6XX"/> |
| <!-- |
| Note: For IBO state (Image/SSBOs) which have shared state across |
| shader stages, for 3d pipeline CP_LOAD_STATE6 is used. But for |
| compute shaders, CP_LOAD_STATE6_FRAG is used. Possibly they are |
| interchangable. |
| --> |
| <value name="CP_LOAD_STATE6" value="0x36" variants="A6XX"/> |
| |
| <!-- internal packets: --> |
| <value name="IN_IB_PREFETCH_END" value="0x17" variants="A2XX"/> |
| <value name="IN_SUBBLK_PREFETCH" value="0x1f" variants="A2XX"/> |
| <value name="IN_INSTR_PREFETCH" value="0x20" variants="A2XX"/> |
| <value name="IN_INSTR_MATCH" value="0x47" variants="A2XX"/> |
| <value name="IN_CONST_PREFETCH" value="0x49" variants="A2XX"/> |
| <value name="IN_INCR_UPDT_STATE" value="0x55" variants="A2XX"/> |
| <value name="IN_INCR_UPDT_CONST" value="0x56" variants="A2XX"/> |
| <value name="IN_INCR_UPDT_INSTR" value="0x57" variants="A2XX"/> |
| |
| <!-- jmptable entry used to handle type4 packet on a5xx+: --> |
| <value name="PKT4" value="0x04" variants="A5XX-"/> |
| |
| <!-- TODO do these exist on A5xx? --> |
| <value name="CP_SCRATCH_WRITE" value="0x4c" variants="A6XX"/> |
| <value name="CP_REG_TO_MEM_OFFSET_MEM" value="0x74" variants="A6XX"/> |
| <value name="CP_REG_TO_MEM_OFFSET_REG" value="0x72" variants="A6XX"/> |
| <value name="CP_WAIT_MEM_GTE" value="0x14" variants="A6XX"/> |
| <value name="CP_WAIT_TWO_REGS" value="0x70" variants="A6XX"/> |
| <value name="CP_MEMCPY" value="0x75" variants="A6XX"/> |
| <value name="CP_SET_BIN_DATA5_OFFSET" value="0x2e" variants="A6XX"/> |
| <!-- Note, kgsl calls this CP_SET_AMBLE: --> |
| <value name="CP_SET_CTXSWITCH_IB" value="0x55" variants="A6XX"/> |
| |
| <!-- |
| Seems to always have the payload: |
| 00000002 00008801 00004010 |
| or: |
| 00000002 00008801 00004090 |
| or: |
| 00000002 00008801 00000010 |
| 00000002 00008801 00010010 |
| 00000002 00008801 00d64010 |
| ... |
| Note set for compute shaders.. |
| Is 0x8801 a register offset? |
| This appears to be a special sort of register write packet |
| more or less, but the firmware has some special handling.. |
| Seems like it intercepts/modifies certain register offsets, |
| but others are treated like a normal PKT4 reg write. I |
| guess there are some registers that the fw controls certain |
| bits. |
| --> |
| <value name="CP_REG_WRITE" value="0x6d" variants="A6XX"/> |
| |
| <doc> |
| These first appear in a650_sqe.bin. They can in theory be used |
| to loop any sequence of IB1 commands, but in practice they are |
| used to loop over bins. There is a fixed-size per-iteration |
| prefix, used to set per-bin state, and then the following IB1 |
| commands are executed until CP_END_BIN which are always the same |
| for each iteration and usually contain a list of |
| CP_INDIRECT_BUFFER calls to IB2 commands which setup state and |
| execute restore/draw/save commands. This replaces the previous |
| technique of just repeating the CP_INDIRECT_BUFFER calls and |
| "unrolling" the loop. |
| </doc> |
| <value name="CP_START_BIN" value="0x50" variants="A6XX"/> |
| <value name="CP_END_BIN" value="0x51" variants="A6XX"/> |
| |
| <value name="CP_WAIT_TIMESTAMP" value="0x14" variants="A7XX-"/> |
| <value name="CP_THREAD_CONTROL" value="0x17" variants="A7XX-"/> |
| </enum> |
| |
| |
| <domain name="CP_LOAD_STATE" width="32"> |
| <doc>Load state, a3xx (and later?)</doc> |
| <enum name="adreno_state_block"> |
| <value name="SB_VERT_TEX" value="0"/> |
| <value name="SB_VERT_MIPADDR" value="1"/> |
| <value name="SB_FRAG_TEX" value="2"/> |
| <value name="SB_FRAG_MIPADDR" value="3"/> |
| <value name="SB_VERT_SHADER" value="4"/> |
| <value name="SB_GEOM_SHADER" value="5"/> |
| <value name="SB_FRAG_SHADER" value="6"/> |
| <value name="SB_COMPUTE_SHADER" value="7"/> |
| </enum> |
| <enum name="adreno_state_type"> |
| <value name="ST_SHADER" value="0"/> |
| <value name="ST_CONSTANTS" value="1"/> |
| </enum> |
| <enum name="adreno_state_src"> |
| <value name="SS_DIRECT" value="0"> |
| <doc>inline with the CP_LOAD_STATE packet</doc> |
| </value> |
| <value name="SS_INVALID_ALL_IC" value="2"/> |
| <value name="SS_INVALID_PART_IC" value="3"/> |
| <value name="SS_INDIRECT" value="4"> |
| <doc>in buffer pointed to by EXT_SRC_ADDR</doc> |
| </value> |
| <value name="SS_INDIRECT_TCM" value="5"/> |
| <value name="SS_INDIRECT_STM" value="6"/> |
| </enum> |
| <reg32 offset="0" name="0"> |
| <bitfield name="DST_OFF" low="0" high="15" type="uint"/> |
| <bitfield name="STATE_SRC" low="16" high="18" type="adreno_state_src"/> |
| <bitfield name="STATE_BLOCK" low="19" high="21" type="adreno_state_block"/> |
| <bitfield name="NUM_UNIT" low="22" high="31" type="uint"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="STATE_TYPE" low="0" high="1" type="adreno_state_type"/> |
| <bitfield name="EXT_SRC_ADDR" low="2" high="31" shr="2"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_LOAD_STATE4" width="32" varset="chip"> |
| <doc>Load state, a4xx+</doc> |
| <enum name="a4xx_state_block"> |
| <!-- |
| unknown: 0x7 and 0xf <- seen in compute shader |
| |
| STATE_BLOCK = 0x6, STATE_TYPE = 0x2 possibly used for preemption? |
| Seen in some GL shaders. Payload is NUM_UNIT dwords, and it contains |
| the gpuaddr of the following shader constants block. DST_OFF seems |
| to specify which shader stage: |
| |
| 16 -> vert |
| 36 -> tcs |
| 56 -> tes |
| 76 -> geom |
| 96 -> frag |
| |
| Example: |
| |
| opcode: CP_LOAD_STATE4 (30) (12 dwords) |
| { DST_OFF = 16 | STATE_SRC = SS4_DIRECT | STATE_BLOCK = 0x6 | NUM_UNIT = 4 } |
| { STATE_TYPE = 0x2 | EXT_SRC_ADDR = 0 } |
| { EXT_SRC_ADDR_HI = 0 } |
| 0000: c0264100 00000000 00000000 00000000 |
| 0000: 70b0000b 01180010 00000002 00000000 c0264100 00000000 00000000 00000000 |
| |
| opcode: CP_LOAD_STATE4 (30) (4 dwords) |
| { DST_OFF = 16 | STATE_SRC = SS4_INDIRECT | STATE_BLOCK = SB4_VS_SHADER | NUM_UNIT = 4 } |
| { STATE_TYPE = ST4_CONSTANTS | EXT_SRC_ADDR = 0xc0264100 } |
| { EXT_SRC_ADDR_HI = 0 } |
| 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 |
| 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 |
| 0000: 00000040 0000000c 00000000 00000000 00000000 00000000 00000000 00000000 |
| |
| STATE_BLOCK = 0x6, STATE_TYPE = 0x1, seen in compute shader. NUM_UNITS * 2 dwords. |
| |
| --> |
| <value name="SB4_VS_TEX" value="0x0"/> |
| <value name="SB4_HS_TEX" value="0x1"/> <!-- aka. TCS --> |
| <value name="SB4_DS_TEX" value="0x2"/> <!-- aka. TES --> |
| <value name="SB4_GS_TEX" value="0x3"/> |
| <value name="SB4_FS_TEX" value="0x4"/> |
| <value name="SB4_CS_TEX" value="0x5"/> |
| <value name="SB4_VS_SHADER" value="0x8"/> |
| <value name="SB4_HS_SHADER" value="0x9"/> |
| <value name="SB4_DS_SHADER" value="0xa"/> |
| <value name="SB4_GS_SHADER" value="0xb"/> |
| <value name="SB4_FS_SHADER" value="0xc"/> |
| <value name="SB4_CS_SHADER" value="0xd"/> |
| <!-- |
| for SSBO, STATE_TYPE=0 appears to be addresses (four dwords each), |
| STATE_TYPE=1 sizes, STATE_TYPE=2 addresses again (two dwords each) |
| |
| Compute has it's own dedicated SSBO state, it seems, but the rest |
| of the stages share state |
| --> |
| <value name="SB4_SSBO" value="0xe"/> |
| <value name="SB4_CS_SSBO" value="0xf"/> |
| </enum> |
| <enum name="a4xx_state_type"> |
| <value name="ST4_SHADER" value="0"/> |
| <value name="ST4_CONSTANTS" value="1"/> |
| <value name="ST4_UBO" value="2"/> |
| </enum> |
| <enum name="a4xx_state_src"> |
| <value name="SS4_DIRECT" value="0"/> |
| <value name="SS4_INDIRECT" value="2"/> |
| </enum> |
| <reg32 offset="0" name="0"> |
| <bitfield name="DST_OFF" low="0" high="13" type="uint"/> |
| <bitfield name="STATE_SRC" low="16" high="17" type="a4xx_state_src"/> |
| <bitfield name="STATE_BLOCK" low="18" high="21" type="a4xx_state_block"/> |
| <bitfield name="NUM_UNIT" low="22" high="31" type="uint"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="STATE_TYPE" low="0" high="1" type="a4xx_state_type"/> |
| <bitfield name="EXT_SRC_ADDR" low="2" high="31" shr="2"/> |
| </reg32> |
| <reg32 offset="2" name="2" varset="chip" variants="A5XX-"> |
| <bitfield name="EXT_SRC_ADDR_HI" low="0" high="31" shr="0"/> |
| </reg32> |
| </domain> |
| |
| <!-- looks basically same CP_LOAD_STATE4 --> |
| <domain name="CP_LOAD_STATE6" width="32" varset="chip"> |
| <doc>Load state, a6xx+</doc> |
| <enum name="a6xx_state_block"> |
| <value name="SB6_VS_TEX" value="0x0"/> |
| <value name="SB6_HS_TEX" value="0x1"/> <!-- aka. TCS --> |
| <value name="SB6_DS_TEX" value="0x2"/> <!-- aka. TES --> |
| <value name="SB6_GS_TEX" value="0x3"/> |
| <value name="SB6_FS_TEX" value="0x4"/> |
| <value name="SB6_CS_TEX" value="0x5"/> |
| <value name="SB6_VS_SHADER" value="0x8"/> |
| <value name="SB6_HS_SHADER" value="0x9"/> |
| <value name="SB6_DS_SHADER" value="0xa"/> |
| <value name="SB6_GS_SHADER" value="0xb"/> |
| <value name="SB6_FS_SHADER" value="0xc"/> |
| <value name="SB6_CS_SHADER" value="0xd"/> |
| <value name="SB6_IBO" value="0xe"/> |
| <value name="SB6_CS_IBO" value="0xf"/> |
| </enum> |
| <enum name="a6xx_state_type"> |
| <value name="ST6_SHADER" value="0"/> |
| <value name="ST6_CONSTANTS" value="1"/> |
| <value name="ST6_UBO" value="2"/> |
| <value name="ST6_IBO" value="3"/> |
| </enum> |
| <enum name="a6xx_state_src"> |
| <value name="SS6_DIRECT" value="0"/> |
| <value name="SS6_BINDLESS" value="1"/> <!-- TODO does this exist on a4xx/a5xx? --> |
| <value name="SS6_INDIRECT" value="2"/> |
| <doc> |
| SS6_UBO used by the a6xx vulkan blob with tesselation constants |
| in this case, EXT_SRC_ADDR is (ubo_id shl 16 | offset) |
| to load constants from a UBO loaded with DST_OFF = 14 and offset 0, |
| EXT_SRC_ADDR = 0xe0000 |
| (offset is a guess, should be in bytes given that maxUniformBufferRange=64k) |
| </doc> |
| <value name="SS6_UBO" value="3"/> |
| </enum> |
| <reg32 offset="0" name="0"> |
| <bitfield name="DST_OFF" low="0" high="13" type="uint"/> |
| <bitfield name="STATE_TYPE" low="14" high="15" type="a6xx_state_type"/> |
| <bitfield name="STATE_SRC" low="16" high="17" type="a6xx_state_src"/> |
| <bitfield name="STATE_BLOCK" low="18" high="21" type="a6xx_state_block"/> |
| <bitfield name="NUM_UNIT" low="22" high="31" type="uint"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="EXT_SRC_ADDR" low="2" high="31" shr="2"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="EXT_SRC_ADDR_HI" low="0" high="31" shr="0"/> |
| </reg32> |
| <reg64 offset="1" name="EXT_SRC_ADDR" type="address"/> |
| </domain> |
| |
| <bitset name="vgt_draw_initiator" inline="yes"> |
| <bitfield name="PRIM_TYPE" low="0" high="5" type="pc_di_primtype"/> |
| <bitfield name="SOURCE_SELECT" low="6" high="7" type="pc_di_src_sel"/> |
| <bitfield name="VIS_CULL" low="9" high="10" type="pc_di_vis_cull_mode"/> |
| <bitfield name="INDEX_SIZE" pos="11" type="pc_di_index_size"/> |
| <bitfield name="NOT_EOP" pos="12" type="boolean"/> |
| <bitfield name="SMALL_INDEX" pos="13" type="boolean"/> |
| <bitfield name="PRE_DRAW_INITIATOR_ENABLE" pos="14" type="boolean"/> |
| <bitfield name="NUM_INSTANCES" low="24" high="31" type="uint"/> |
| </bitset> |
| |
| <!-- changed on a4xx: --> |
| <enum name="a4xx_index_size"> |
| <value name="INDEX4_SIZE_8_BIT" value="0"/> |
| <value name="INDEX4_SIZE_16_BIT" value="1"/> |
| <value name="INDEX4_SIZE_32_BIT" value="2"/> |
| </enum> |
| |
| <enum name="a6xx_patch_type"> |
| <value name="TESS_QUADS" value="0"/> |
| <value name="TESS_TRIANGLES" value="1"/> |
| <value name="TESS_ISOLINES" value="2"/> |
| </enum> |
| |
| <bitset name="vgt_draw_initiator_a4xx" inline="yes"> |
| <!-- When the 0x20 bit is set, it's the number of patch vertices - 1 --> |
| <bitfield name="PRIM_TYPE" low="0" high="5" type="pc_di_primtype"/> |
| <bitfield name="SOURCE_SELECT" low="6" high="7" type="pc_di_src_sel"/> |
| <bitfield name="VIS_CULL" low="8" high="9" type="pc_di_vis_cull_mode"/> |
| <bitfield name="INDEX_SIZE" low="10" high="11" type="a4xx_index_size"/> |
| <bitfield name="PATCH_TYPE" low="12" high="13" type="a6xx_patch_type"/> |
| <bitfield name="GS_ENABLE" pos="16" type="boolean"/> |
| <bitfield name="TESS_ENABLE" pos="17" type="boolean"/> |
| </bitset> |
| |
| <domain name="CP_DRAW_INDX" width="32"> |
| <reg32 offset="0" name="0"> |
| <bitfield name="VIZ_QUERY" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="1" name="1" type="vgt_draw_initiator"/> |
| <reg32 offset="2" name="2"> |
| <bitfield name="NUM_INDICES" low="0" high="31" type="uint"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="INDX_BASE" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="4" name="4"> |
| <bitfield name="INDX_SIZE" low="0" high="31"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_DRAW_INDX_2" width="32"> |
| <reg32 offset="0" name="0"> |
| <bitfield name="VIZ_QUERY" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="1" name="1" type="vgt_draw_initiator"/> |
| <reg32 offset="2" name="2"> |
| <bitfield name="NUM_INDICES" low="0" high="31" type="uint"/> |
| </reg32> |
| <!-- followed by NUM_INDICES indices.. --> |
| </domain> |
| |
| <domain name="CP_DRAW_INDX_OFFSET" width="32"> |
| <reg32 offset="0" name="0" type="vgt_draw_initiator_a4xx"/> |
| <reg32 offset="1" name="1"> |
| <bitfield name="NUM_INSTANCES" low="0" high="31" type="uint"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="NUM_INDICES" low="0" high="31" type="uint"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="FIRST_INDX" low="0" high="31"/> |
| </reg32> |
| |
| <stripe varset="chip" variants="A5XX-"> |
| <reg32 offset="4" name="4"> |
| <bitfield name="INDX_BASE_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="5" name="5"> |
| <bitfield name="INDX_BASE_HI" low="0" high="31"/> |
| </reg32> |
| <reg64 offset="4" name="INDX_BASE" type="address"/> |
| <reg32 offset="6" name="6"> |
| <!-- max # of elements in index buffer --> |
| <bitfield name="MAX_INDICES" low="0" high="31"/> |
| </reg32> |
| </stripe> |
| |
| <reg32 offset="4" name="4"> |
| <bitfield name="INDX_BASE" low="0" high="31" type="address"/> |
| </reg32> |
| |
| <reg32 offset="5" name="5"> |
| <bitfield name="INDX_SIZE" low="0" high="31" type="uint"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_DRAW_INDIRECT" width="32" varset="chip" prefix="chip" variants="A4XX-"> |
| <reg32 offset="0" name="0" type="vgt_draw_initiator_a4xx"/> |
| <stripe varset="chip" variants="A4XX"> |
| <reg32 offset="1" name="1"> |
| <bitfield name="INDIRECT" low="0" high="31"/> |
| </reg32> |
| </stripe> |
| <stripe varset="chip" variants="A5XX-"> |
| <reg32 offset="1" name="1"> |
| <bitfield name="INDIRECT_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="INDIRECT_HI" low="0" high="31"/> |
| </reg32> |
| <reg64 offset="1" name="INDIRECT" type="address"/> |
| </stripe> |
| </domain> |
| |
| <domain name="CP_DRAW_INDX_INDIRECT" width="32" varset="chip" prefix="chip" variants="A4XX-"> |
| <reg32 offset="0" name="0" type="vgt_draw_initiator_a4xx"/> |
| <stripe varset="chip" variants="A4XX"> |
| <reg32 offset="1" name="1"> |
| <bitfield name="INDX_BASE" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <!-- max # of bytes in index buffer --> |
| <bitfield name="INDX_SIZE" low="0" high="31" type="uint"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="INDIRECT" low="0" high="31"/> |
| </reg32> |
| </stripe> |
| <stripe varset="chip" variants="A5XX-"> |
| <reg32 offset="1" name="1"> |
| <bitfield name="INDX_BASE_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="INDX_BASE_HI" low="0" high="31"/> |
| </reg32> |
| <reg64 offset="1" name="INDX_BASE" type="address"/> |
| <reg32 offset="3" name="3"> |
| <!-- max # of elements in index buffer --> |
| <bitfield name="MAX_INDICES" low="0" high="31" type="uint"/> |
| </reg32> |
| <reg32 offset="4" name="4"> |
| <bitfield name="INDIRECT_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="5" name="5"> |
| <bitfield name="INDIRECT_HI" low="0" high="31"/> |
| </reg32> |
| <reg64 offset="4" name="INDIRECT" type="address"/> |
| </stripe> |
| </domain> |
| |
| <domain name="CP_DRAW_INDIRECT_MULTI" width="32" varset="chip" prefix="chip" variants="A6XX-"> |
| <enum name="a6xx_draw_indirect_opcode"> |
| <value name="INDIRECT_OP_NORMAL" value="0x2"/> |
| <value name="INDIRECT_OP_INDEXED" value="0x4"/> |
| <value name="INDIRECT_OP_INDIRECT_COUNT" value="0x6"/> |
| <value name="INDIRECT_OP_INDIRECT_COUNT_INDEXED" value="0x7"/> |
| </enum> |
| <reg32 offset="0" name="0" type="vgt_draw_initiator_a4xx"/> |
| <reg32 offset="1" name="1"> |
| <bitfield name="OPCODE" low="0" high="3" type="a6xx_draw_indirect_opcode" addvariant="yes"/> |
| <doc> |
| DST_OFF same as in CP_LOAD_STATE6 - vec4 VS const at this offset will |
| be updated for each draw to {draw_id, first_vertex, first_instance, 0} |
| value of 0 disables it |
| </doc> |
| <bitfield name="DST_OFF" low="8" high="21" type="hex"/> |
| </reg32> |
| <reg32 offset="2" name="DRAW_COUNT" type="uint"/> |
| <stripe varset="a6xx_draw_indirect_opcode" variants="INDIRECT_OP_NORMAL"> |
| <reg64 offset="3" name="INDIRECT" type="address"/> |
| <reg32 offset="5" name="STRIDE" type="uint"/> |
| </stripe> |
| <stripe varset="a6xx_draw_indirect_opcode" variants="INDIRECT_OP_INDEXED" prefix="INDEXED"> |
| <reg64 offset="3" name="INDEX" type="address"/> |
| <reg32 offset="5" name="MAX_INDICES" type="uint"/> |
| <reg64 offset="6" name="INDIRECT" type="address"/> |
| <reg32 offset="8" name="STRIDE" type="uint"/> |
| </stripe> |
| <stripe varset="a6xx_draw_indirect_opcode" variants="INDIRECT_OP_INDIRECT_COUNT" prefix="INDIRECT"> |
| <reg64 offset="3" name="INDIRECT" type="address"/> |
| <reg64 offset="5" name="INDIRECT_COUNT" type="address"/> |
| <reg32 offset="7" name="STRIDE" type="uint"/> |
| </stripe> |
| <stripe varset="a6xx_draw_indirect_opcode" variants="INDIRECT_OP_INDIRECT_COUNT_INDEXED" prefix="INDIRECT_INDEXED"> |
| <reg64 offset="3" name="INDEX" type="address"/> |
| <reg32 offset="5" name="MAX_INDICES" type="uint"/> |
| <reg64 offset="6" name="INDIRECT" type="address"/> |
| <reg64 offset="8" name="INDIRECT_COUNT" type="address"/> |
| <reg32 offset="10" name="STRIDE" type="uint"/> |
| </stripe> |
| </domain> |
| |
| <domain name="CP_DRAW_PRED_ENABLE_GLOBAL" width="32" varset="chip"> |
| <reg32 offset="0" name="0"> |
| <bitfield name="ENABLE" pos="0" type="boolean"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_DRAW_PRED_ENABLE_LOCAL" width="32" varset="chip"> |
| <reg32 offset="0" name="0"> |
| <bitfield name="ENABLE" pos="0" type="boolean"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_DRAW_PRED_SET" width="32" varset="chip"> |
| <enum name="cp_draw_pred_src"> |
| <!-- |
| Sources 1-4 seem to be about combining reading |
| SO/primitive queries and setting the predicate, which is |
| a DX11-specific optimization (since in DX11 you can only |
| predicate on the result of queries). |
| --> |
| <value name="PRED_SRC_MEM" value="5"> |
| <doc> |
| Read a 64-bit value at the given address and |
| test if it equals/doesn't equal 0. |
| </doc> |
| </value> |
| </enum> |
| <enum name="cp_draw_pred_test"> |
| <value name="NE_0_PASS" value="0"/> |
| <value name="EQ_0_PASS" value="1"/> |
| </enum> |
| <reg32 offset="0" name="0"> |
| <bitfield name="SRC" low="4" high="7" type="cp_draw_pred_src"/> |
| <bitfield name="TEST" pos="8" type="cp_draw_pred_test"/> |
| </reg32> |
| <reg64 offset="1" name="MEM_ADDR" type="address"/> |
| </domain> |
| |
| <domain name="CP_SET_DRAW_STATE" width="32" varset="chip" variants="A4XX-"> |
| <array offset="0" stride="3" length="100"> |
| <reg32 offset="0" name="0"> |
| <bitfield name="COUNT" low="0" high="15" type="uint"/> |
| <bitfield name="DIRTY" pos="16" type="boolean"/> |
| <bitfield name="DISABLE" pos="17" type="boolean"/> |
| <bitfield name="DISABLE_ALL_GROUPS" pos="18" type="boolean"/> |
| <bitfield name="LOAD_IMMED" pos="19" type="boolean"/> |
| <bitfield name="BINNING" pos="20" varset="chip" variants="A6XX-" type="boolean"/> |
| <bitfield name="GMEM" pos="21" varset="chip" variants="A6XX-" type="boolean"/> |
| <bitfield name="SYSMEM" pos="22" varset="chip" variants="A6XX-" type="boolean"/> |
| <bitfield name="GROUP_ID" low="24" high="28" type="uint"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="ADDR_LO" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="2" name="2" varset="chip" variants="A5XX-"> |
| <bitfield name="ADDR_HI" low="0" high="31" type="hex"/> |
| </reg32> |
| </array> |
| </domain> |
| |
| <domain name="CP_SET_BIN" width="32"> |
| <doc>value at offset 0 always seems to be 0x00000000..</doc> |
| <reg32 offset="0" name="0"/> |
| <reg32 offset="1" name="1"> |
| <bitfield name="X1" low="0" high="15" type="uint"/> |
| <bitfield name="Y1" low="16" high="31" type="uint"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="X2" low="0" high="15" type="uint"/> |
| <bitfield name="Y2" low="16" high="31" type="uint"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_SET_BIN_DATA" width="32"> |
| <reg32 offset="0" name="0"> |
| <!-- corresponds to VSC_PIPE[n].DATA_ADDR --> |
| <bitfield name="BIN_DATA_ADDR" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <!-- seesm to correspond to VSC_SIZE_ADDRESS --> |
| <bitfield name="BIN_SIZE_ADDRESS" low="0" high="31"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_SET_BIN_DATA5" width="32"> |
| <reg32 offset="0" name="0"> |
| <!-- equiv to PC_VSTREAM_CONTROL.SIZE on a3xx/a4xx: --> |
| <bitfield name="VSC_SIZE" low="16" high="21" type="uint"/> |
| <!-- equiv to PC_VSTREAM_CONTROL.N on a3xx/a4xx: --> |
| <bitfield name="VSC_N" low="22" high="26" type="uint"/> |
| </reg32> |
| <!-- BIN_DATA_ADDR -> VSC_PIPE[p].DATA_ADDRESS --> |
| <reg32 offset="1" name="1"> |
| <bitfield name="BIN_DATA_ADDR_LO" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="BIN_DATA_ADDR_HI" low="0" high="31" type="hex"/> |
| </reg32> |
| <!-- BIN_SIZE_ADDRESS -> VSC_SIZE_ADDRESS + (p * 4)--> |
| <reg32 offset="3" name="3"> |
| <bitfield name="BIN_SIZE_ADDRESS_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="4" name="4"> |
| <bitfield name="BIN_SIZE_ADDRESS_HI" low="0" high="31"/> |
| </reg32> |
| <!-- new on a6xx, where BIN_DATA_ADDR is the DRAW_STRM: --> |
| <reg32 offset="5" name="5"> |
| <bitfield name="BIN_PRIM_STRM_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="6" name="6"> |
| <bitfield name="BIN_PRIM_STRM_HI" low="0" high="31"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_SET_BIN_DATA5_OFFSET" width="32"> |
| <doc> |
| Like CP_SET_BIN_DATA5, but set the pointers as offsets from the |
| pointers stored in VSC_PIPE_{DATA,DATA2,SIZE}_ADDRESS. Useful |
| for Vulkan where these values aren't known when the command |
| stream is recorded. |
| </doc> |
| <reg32 offset="0" name="0"> |
| <!-- equiv to PC_VSTREAM_CONTROL.SIZE on a3xx/a4xx: --> |
| <bitfield name="VSC_SIZE" low="16" high="21" type="uint"/> |
| <!-- equiv to PC_VSTREAM_CONTROL.N on a3xx/a4xx: --> |
| <bitfield name="VSC_N" low="22" high="26" type="uint"/> |
| </reg32> |
| <!-- BIN_DATA_ADDR -> VSC_PIPE[p].DATA_ADDRESS --> |
| <reg32 offset="1" name="1"> |
| <bitfield name="BIN_DATA_OFFSET" low="0" high="31" type="uint"/> |
| </reg32> |
| <!-- BIN_SIZE_ADDRESS -> VSC_SIZE_ADDRESS + (p * 4)--> |
| <reg32 offset="2" name="2"> |
| <bitfield name="BIN_SIZE_OFFSET" low="0" high="31" type="uint"/> |
| </reg32> |
| <!-- BIN_DATA2_ADDR -> VSC_PIPE[p].DATA2_ADDRESS --> |
| <reg32 offset="3" name="3"> |
| <bitfield name="BIN_DATA2_OFFSET" low="0" high="31" type="uint"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_REG_RMW" width="32"> |
| <doc> |
| Modifies DST_REG using two sources that can either be registers |
| or immediates. If SRC1_ADD is set, then do the following: |
| |
| $dst = (($dst & $src0) rot $rotate) + $src1 |
| |
| Otherwise: |
| |
| $dst = (($dst & $src0) rot $rotate) | $src1 |
| |
| Here "rot" means rotate left. |
| </doc> |
| <reg32 offset="0" name="0"> |
| <bitfield name="DST_REG" low="0" high="17" type="hex"/> |
| <bitfield name="ROTATE" low="24" high="28" type="uint"/> |
| <bitfield name="SRC1_ADD" pos="29" type="boolean"/> |
| <bitfield name="SRC1_IS_REG" pos="30" type="boolean"/> |
| <bitfield name="SRC0_IS_REG" pos="31" type="boolean"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="SRC0" low="0" high="31" type="uint"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="SRC1" low="0" high="31" type="uint"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_REG_TO_MEM" width="32"> |
| <reg32 offset="0" name="0"> |
| <bitfield name="REG" low="0" high="17" type="hex"/> |
| <!-- number of registers/dwords copied is max(CNT, 1). --> |
| <bitfield name="CNT" low="18" high="29" type="uint"/> |
| <bitfield name="64B" pos="30" type="boolean"/> |
| <bitfield name="ACCUMULATE" pos="31" type="boolean"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="DEST" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2" varset="chip" variants="A5XX-"> |
| <bitfield name="DEST_HI" low="0" high="31"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_REG_TO_MEM_OFFSET_REG" width="32"> |
| <doc> |
| Like CP_REG_TO_MEM, but the memory address to write to can be |
| offsetted using either one or two registers or scratch |
| registers. |
| </doc> |
| <reg32 offset="0" name="0"> |
| <bitfield name="REG" low="0" high="17" type="hex"/> |
| <!-- number of registers/dwords copied is max(CNT, 1). --> |
| <bitfield name="CNT" low="18" high="29" type="uint"/> |
| <bitfield name="64B" pos="30" type="boolean"/> |
| <bitfield name="ACCUMULATE" pos="31" type="boolean"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="DEST" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2" varset="chip" variants="A5XX-"> |
| <bitfield name="DEST_HI" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="OFFSET0" low="0" high="17" type="hex"/> |
| <bitfield name="OFFSET0_SCRATCH" pos="19" type="boolean"/> |
| </reg32> |
| <!-- followed by an optional identical OFFSET1 dword --> |
| </domain> |
| |
| <domain name="CP_REG_TO_MEM_OFFSET_MEM" width="32"> |
| <doc> |
| Like CP_REG_TO_MEM, but the memory address to write to can be |
| offsetted using a DWORD in memory. |
| </doc> |
| <reg32 offset="0" name="0"> |
| <bitfield name="REG" low="0" high="17" type="hex"/> |
| <!-- number of registers/dwords copied is max(CNT, 1). --> |
| <bitfield name="CNT" low="18" high="29" type="uint"/> |
| <bitfield name="64B" pos="30" type="boolean"/> |
| <bitfield name="ACCUMULATE" pos="31" type="boolean"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="DEST" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2" varset="chip" variants="A5XX-"> |
| <bitfield name="DEST_HI" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="OFFSET_LO" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="4" name="4"> |
| <bitfield name="OFFSET_HI" low="0" high="31" type="hex"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_MEM_TO_REG" width="32"> |
| <reg32 offset="0" name="0"> |
| <bitfield name="REG" low="0" high="17" type="hex"/> |
| <!-- number of registers/dwords copied is max(CNT, 1). --> |
| <bitfield name="CNT" low="19" high="29" type="uint"/> |
| <!-- shift each DWORD left by 2 while copying --> |
| <bitfield name="SHIFT_BY_2" pos="30" type="boolean"/> |
| <!-- does the same thing as CP_MEM_TO_MEM::UNK31 --> |
| <bitfield name="UNK31" pos="31" type="boolean"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="SRC" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2" varset="chip" variants="A5XX-"> |
| <bitfield name="SRC_HI" low="0" high="31"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_MEM_TO_MEM" width="32"> |
| <reg32 offset="0" name="0"> |
| <!-- |
| not sure how many src operands we have, but the low |
| bits negate the n'th src argument. |
| --> |
| <bitfield name="NEG_A" pos="0" type="boolean"/> |
| <bitfield name="NEG_B" pos="1" type="boolean"/> |
| <bitfield name="NEG_C" pos="2" type="boolean"/> |
| |
| <!-- if set treat src/dst as 64bit values --> |
| <bitfield name="DOUBLE" pos="29" type="boolean"/> |
| <!-- execute CP_WAIT_FOR_MEM_WRITES beforehand --> |
| <bitfield name="WAIT_FOR_MEM_WRITES" pos="30" type="boolean"/> |
| <!-- some other kind of wait --> |
| <bitfield name="UNK31" pos="31" type="boolean"/> |
| </reg32> |
| <!-- |
| followed by sequence of addresses.. the first is the |
| destination and the rest are N src addresses which are |
| summed (after being negated if NEG_x bit set) allowing |
| to do things like 'result += end - start' (which turns |
| out to be useful for queries and accumulating results |
| across multiple tiles) |
| --> |
| </domain> |
| |
| <domain name="CP_MEMCPY" width="32"> |
| <reg32 offset="0" name="0"> |
| <bitfield name="DWORDS" low="0" high="31" type="uint"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="SRC_LO" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="SRC_HI" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="DST_LO" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="4" name="4"> |
| <bitfield name="DST_HI" low="0" high="31" type="hex"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_REG_TO_SCRATCH" width="32"> |
| <reg32 offset="0" name="0"> |
| <bitfield name="REG" low="0" high="17" type="hex"/> |
| <bitfield name="SCRATCH" low="20" high="22" type="uint"/> |
| <!-- number of registers/dwords copied is CNT + 1. --> |
| <bitfield name="CNT" low="24" high="26" type="uint"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_SCRATCH_TO_REG" width="32"> |
| <reg32 offset="0" name="0"> |
| <bitfield name="REG" low="0" high="17" type="hex"/> |
| <!-- note: CP_MEM_TO_REG always sets this when writing to the register --> |
| <bitfield name="UNK18" pos="18" type="boolean"/> |
| <bitfield name="SCRATCH" low="20" high="22" type="uint"/> |
| <!-- number of registers/dwords copied is CNT + 1. --> |
| <bitfield name="CNT" low="24" high="26" type="uint"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_SCRATCH_WRITE" width="32"> |
| <reg32 offset="0" name="0"> |
| <bitfield name="SCRATCH" low="20" high="22" type="uint"/> |
| </reg32> |
| <!-- followed by one or more DWORDs to write to scratch registers --> |
| </domain> |
| |
| <domain name="CP_MEM_WRITE" width="32"> |
| <reg32 offset="0" name="0"> |
| <bitfield name="ADDR_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="ADDR_HI" low="0" high="31"/> |
| </reg32> |
| <!-- followed by the DWORDs to write --> |
| </domain> |
| |
| <enum name="cp_cond_function"> |
| <value value="0" name="WRITE_ALWAYS"/> |
| <value value="1" name="WRITE_LT"/> |
| <value value="2" name="WRITE_LE"/> |
| <value value="3" name="WRITE_EQ"/> |
| <value value="4" name="WRITE_NE"/> |
| <value value="5" name="WRITE_GE"/> |
| <value value="6" name="WRITE_GT"/> |
| </enum> |
| |
| <domain name="CP_COND_WRITE" width="32"> |
| <reg32 offset="0" name="0"> |
| <bitfield name="FUNCTION" low="0" high="2" type="cp_cond_function"/> |
| <bitfield name="POLL_MEMORY" pos="4" type="boolean"/> |
| <bitfield name="WRITE_MEMORY" pos="8" type="boolean"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="POLL_ADDR" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="REF" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="MASK" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="4" name="4"> |
| <bitfield name="WRITE_ADDR" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="5" name="5"> |
| <bitfield name="WRITE_DATA" low="0" high="31"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_COND_WRITE5" width="32"> |
| <reg32 offset="0" name="0"> |
| <bitfield name="FUNCTION" low="0" high="2" type="cp_cond_function"/> |
| <bitfield name="SIGNED_COMPARE" pos="3" type="boolean"/> |
| <!-- if both POLL_MEMORY and POLL_SCRATCH are false, it polls a register at POLL_ADDR_LO instead. --> |
| <bitfield name="POLL_MEMORY" pos="4" type="boolean"/> |
| <bitfield name="POLL_SCRATCH" pos="5" type="boolean"/> |
| <bitfield name="WRITE_MEMORY" pos="8" type="boolean"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="POLL_ADDR_LO" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="POLL_ADDR_HI" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="REF" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="4" name="4"> |
| <bitfield name="MASK" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="5" name="5"> |
| <bitfield name="WRITE_ADDR_LO" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="6" name="6"> |
| <bitfield name="WRITE_ADDR_HI" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="7" name="7"> |
| <bitfield name="WRITE_DATA" low="0" high="31"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_WAIT_MEM_GTE" width="32"> |
| <doc> |
| Wait until a memory value is greater than or equal to the |
| reference, using signed comparison. |
| </doc> |
| <reg32 offset="0" name="0"> |
| <!-- Reserved for flags, presumably? Unused in FW --> |
| <bitfield name="RESERVED" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="POLL_ADDR_LO" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="POLL_ADDR_HI" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="REF" low="0" high="31"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_WAIT_REG_MEM" width="32"> |
| <doc> |
| This uses the same internal comparison as CP_COND_WRITE, |
| but waits until the comparison is true instead. It busy-loops in |
| the CP for the given number of cycles before trying again. |
| </doc> |
| <reg32 offset="0" name="0"> |
| <bitfield name="FUNCTION" low="0" high="2" type="cp_cond_function"/> |
| <bitfield name="SIGNED_COMPARE" pos="3" type="boolean"/> |
| <bitfield name="POLL_MEMORY" pos="4" type="boolean"/> |
| <bitfield name="POLL_SCRATCH" pos="5" type="boolean"/> |
| <bitfield name="WRITE_MEMORY" pos="8" type="boolean"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="POLL_ADDR_LO" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="POLL_ADDR_HI" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="REF" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="4" name="4"> |
| <bitfield name="MASK" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="5" name="5"> |
| <bitfield name="DELAY_LOOP_CYCLES" low="0" high="31"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_WAIT_TWO_REGS" width="32"> |
| <doc> |
| Waits for REG0 to not be 0 or REG1 to not equal REF |
| </doc> |
| <reg32 offset="0" name="0"> |
| <bitfield name="REG0" low="0" high="17" type="hex"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="REG1" low="0" high="17" type="hex"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="REF" low="0" high="31" type="uint"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_DISPATCH_COMPUTE" width="32"> |
| <reg32 offset="0" name="0"/> |
| <reg32 offset="1" name="1"> |
| <bitfield name="X" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="Y" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="Z" low="0" high="31"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_SET_RENDER_MODE" width="32"> |
| <enum name="render_mode_cmd"> |
| <value value="1" name="BYPASS"/> |
| <value value="2" name="BINNING"/> |
| <value value="3" name="GMEM"/> |
| <value value="5" name="BLIT2D"/> |
| <!-- placeholder name.. used when CP_BLIT packets with BLIT_OP_SCALE?? --> |
| <value value="7" name="BLIT2DSCALE"/> |
| <!-- 8 set before going back to BYPASS exiting 2D --> |
| <value value="8" name="END2D"/> |
| </enum> |
| <reg32 offset="0" name="0"> |
| <bitfield name="MODE" low="0" high="8" type="render_mode_cmd"/> |
| <!-- |
| normally 0x1/0x3, sometimes see 0x5/0x8 with unknown registers in |
| 0x21xx range.. possibly (at least some) a5xx variants have a |
| 2d core? |
| --> |
| </reg32> |
| <!-- I think first buffer is for GPU to save context in case of ctx switch? --> |
| <reg32 offset="1" name="1"> |
| <bitfield name="ADDR_0_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="ADDR_0_HI" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <!-- |
| set when in GMEM.. maybe indicates GMEM contents need to be |
| preserved on ctx switch? |
| --> |
| <bitfield name="VSC_ENABLE" pos="3" type="boolean"/> |
| <bitfield name="GMEM_ENABLE" pos="4" type="boolean"/> |
| </reg32> |
| <reg32 offset="4" name="4"/> |
| <!-- second buffer looks like some cmdstream.. length in dwords: --> |
| <reg32 offset="5" name="5"> |
| <bitfield name="ADDR_1_LEN" low="0" high="31" type="uint"/> |
| </reg32> |
| <reg32 offset="6" name="6"> |
| <bitfield name="ADDR_1_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="7" name="7"> |
| <bitfield name="ADDR_1_HI" low="0" high="31"/> |
| </reg32> |
| </domain> |
| |
| <!-- this looks fairly similar to CP_SET_RENDER_MODE minus first dword --> |
| <domain name="CP_COMPUTE_CHECKPOINT" width="32"> |
| <!-- I think first buffer is for GPU to save context in case of ctx switch? --> |
| <reg32 offset="0" name="0"> |
| <bitfield name="ADDR_0_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="ADDR_0_HI" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| </reg32> |
| <!-- second buffer looks like some cmdstream.. length in dwords: --> |
| <reg32 offset="3" name="3"> |
| <bitfield name="ADDR_1_LEN" low="0" high="31" type="uint"/> |
| </reg32> |
| <reg32 offset="4" name="4"/> |
| <reg32 offset="5" name="5"> |
| <bitfield name="ADDR_1_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="6" name="6"> |
| <bitfield name="ADDR_1_HI" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="7" name="7"/> |
| </domain> |
| |
| <domain name="CP_PERFCOUNTER_ACTION" width="32"> |
| <reg32 offset="0" name="0"> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="ADDR_0_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="ADDR_0_HI" low="0" high="31"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_EVENT_WRITE" width="32"> |
| <reg32 offset="0" name="0"> |
| <bitfield name="EVENT" low="0" high="7" type="vgt_event_type"/> |
| <!-- when set, write back timestamp instead of value from packet: --> |
| <bitfield name="TIMESTAMP" pos="30" type="boolean"/> |
| <bitfield name="IRQ" pos="31" type="boolean"/> |
| </reg32> |
| <!-- |
| TODO what is gpuaddr for, seems to be all 0's.. maybe needed for |
| context switch? |
| --> |
| <reg32 offset="1" name="1"> |
| <bitfield name="ADDR_0_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="ADDR_0_HI" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <!-- ??? --> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_BLIT" width="32"> |
| <enum name="cp_blit_cmd"> |
| <value value="0" name="BLIT_OP_FILL"/> |
| <value value="1" name="BLIT_OP_COPY"/> |
| <value value="3" name="BLIT_OP_SCALE"/> <!-- used for mipmap generation --> |
| </enum> |
| <reg32 offset="0" name="0"> |
| <bitfield name="OP" low="0" high="3" type="cp_blit_cmd"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="SRC_X1" low="0" high="13" type="uint"/> |
| <bitfield name="SRC_Y1" low="16" high="29" type="uint"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="SRC_X2" low="0" high="13" type="uint"/> |
| <bitfield name="SRC_Y2" low="16" high="29" type="uint"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="DST_X1" low="0" high="13" type="uint"/> |
| <bitfield name="DST_Y1" low="16" high="29" type="uint"/> |
| </reg32> |
| <reg32 offset="4" name="4"> |
| <bitfield name="DST_X2" low="0" high="13" type="uint"/> |
| <bitfield name="DST_Y2" low="16" high="29" type="uint"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_EXEC_CS" width="32"> |
| <reg32 offset="0" name="0"> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="NGROUPS_X" low="0" high="31" type="uint"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="NGROUPS_Y" low="0" high="31" type="uint"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="NGROUPS_Z" low="0" high="31" type="uint"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_EXEC_CS_INDIRECT" width="32" varset="chip" prefix="chip" variants="A4XX-"> |
| <reg32 offset="0" name="0"> |
| </reg32> |
| <stripe varset="chip" variants="A4XX"> |
| <reg32 offset="1" name="1"> |
| <bitfield name="ADDR" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <!-- localsize is value minus one: --> |
| <bitfield name="LOCALSIZEX" low="2" high="11" type="uint"/> |
| <bitfield name="LOCALSIZEY" low="12" high="21" type="uint"/> |
| <bitfield name="LOCALSIZEZ" low="22" high="31" type="uint"/> |
| </reg32> |
| </stripe> |
| <stripe varset="chip" variants="A5XX-"> |
| <reg32 offset="1" name="1"> |
| <bitfield name="ADDR_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="ADDR_HI" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <!-- localsize is value minus one: --> |
| <bitfield name="LOCALSIZEX" low="2" high="11" type="uint"/> |
| <bitfield name="LOCALSIZEY" low="12" high="21" type="uint"/> |
| <bitfield name="LOCALSIZEZ" low="22" high="31" type="uint"/> |
| </reg32> |
| </stripe> |
| </domain> |
| |
| <domain name="CP_SET_MARKER" width="32" varset="chip" prefix="chip" variants="A6XX-"> |
| <doc>Tell CP the current operation mode, indicates save and restore procedure</doc> |
| <enum name="a6xx_marker"> |
| <value value="1" name="RM6_BYPASS"/> |
| <value value="2" name="RM6_BINNING"/> |
| <value value="4" name="RM6_GMEM"/> |
| <value value="5" name="RM6_ENDVIS"/> |
| <value value="6" name="RM6_RESOLVE"/> |
| <value value="7" name="RM6_YIELD"/> |
| <value value="8" name="RM6_COMPUTE"/> |
| <value value="0xc" name="RM6_BLIT2DSCALE"/> <!-- no-op (at least on current sqe fw) --> |
| |
| <!-- |
| These values come from a6xx_set_marker() in the |
| downstream kernel, and they can only be set by the kernel |
| --> |
| <value value="0xd" name="RM6_IB1LIST_START"/> |
| <value value="0xe" name="RM6_IB1LIST_END"/> |
| <!-- IFPC - inter-frame power collapse --> |
| <value value="0x100" name="RM6_IFPC_ENABLE"/> |
| <value value="0x101" name="RM6_IFPC_DISABLE"/> |
| </enum> |
| <reg32 offset="0" name="0"> |
| <!-- |
| NOTE: blob driver and some versions of freedreno/turnip set |
| b4, which is unused (at least by current sqe fw), but interferes |
| with parsing if we extend the size of the bitfield to include |
| b8 (only sent by kernel mode driver). Really, the way the |
| parsing works in the firmware, only b0-b3 are considered, but |
| if b8 is set, the low bits are interpreted differently. To |
| model this, without getting confused by spurious b4, this is |
| described as two overlapping bitfields: |
| --> |
| <bitfield name="MODE" low="0" high="8" type="a6xx_marker"/> |
| <bitfield name="MARKER" low="0" high="3" type="a6xx_marker"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_SET_PSEUDO_REG" width="32" varset="chip" prefix="chip" variants="A6XX-"> |
| <doc>Set internal CP registers, used to indicate context save data addresses</doc> |
| <enum name="pseudo_reg"> |
| <value value="0" name="SMMU_INFO"/> |
| <value value="1" name="NON_SECURE_SAVE_ADDR"/> |
| <value value="2" name="SECURE_SAVE_ADDR"/> |
| <value value="3" name="NON_PRIV_SAVE_ADDR"/> |
| <value value="4" name="COUNTER"/> |
| </enum> |
| <array offset="0" stride="3" length="100"> |
| <reg32 offset="0" name="0"> |
| <bitfield name="PSEUDO_REG" low="0" high="2" type="pseudo_reg"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="HI" low="0" high="31"/> |
| </reg32> |
| </array> |
| </domain> |
| |
| <domain name="CP_REG_TEST" width="32" varset="chip" prefix="chip" variants="A6XX-"> |
| <doc> |
| Tests bit in specified register and sets predicate for CP_COND_REG_EXEC. |
| So: |
| |
| opcode: CP_REG_TEST (39) (2 dwords) |
| { REG = 0xc10 | BIT = 0 } |
| 0000: 70b90001 00000c10 |
| opcode: CP_COND_REG_EXEC (47) (3 dwords) |
| 0000: 70c70002 10000000 00000004 |
| opcode: CP_INDIRECT_BUFFER (3f) (4 dwords) |
| |
| Will execute the CP_INDIRECT_BUFFER only if b0 in the register at |
| offset 0x0c10 is 1 |
| </doc> |
| <reg32 offset="0" name="0"> |
| <!-- the register to test --> |
| <bitfield name="REG" low="0" high="17"/> |
| <!-- the bit to test --> |
| <bitfield name="BIT" low="20" high="24" type="uint"/> |
| <!-- execute CP_WAIT_FOR_ME beforehand --> |
| <bitfield name="WAIT_FOR_ME" pos="25" type="boolean"/> |
| <!-- |
| Appears only in: |
| opcode: CP_REG_TEST (39) (4 dwords) |
| { REG = 0 | BIT = 0 | WAIT_FOR_ME | UNK31 } |
| Seem to force CP_REG_TEST to write false |
| --> |
| <bitfield name="UNK31" pos="31" type="boolean"/> |
| </reg32> |
| </domain> |
| |
| <!-- I *think* this existed at least as far back as a4xx --> |
| <domain name="CP_COND_REG_EXEC" width="32"> |
| <enum name="compare_mode"> |
| <!-- use the predicate bit set by CP_REG_TEST --> |
| <value value="1" name="PRED_TEST"/> |
| <!-- compare two registers directly for equality --> |
| <value value="2" name="REG_COMPARE"/> |
| <!-- test if certain render modes are set via CP_SET_MARKER --> |
| <value value="3" name="RENDER_MODE" varset="chip" variants="A6XX-"/> |
| </enum> |
| <reg32 offset="0" name="0"> |
| <bitfield name="REG0" low="0" high="17" type="hex"/> |
| |
| <!-- |
| Blob uses them for vkCmdClearAttachments in gmem mode. Examples: |
| opcode: CP_COND_REG_EXEC (47) (3 dwords) |
| { REG0 = 0 | MODE = PRED_TEST | 0x140000 } |
| opcode: CP_COND_REG_EXEC (47) (3 dwords) |
| { REG0 = 0 | MODE = PRED_TEST | 0x100000 } |
| --> |
| <bitfield name="UNK18" pos="18" varset="chip" variants="A6XX-" type="boolean"/> |
| <bitfield name="UNK20" pos="20" varset="chip" variants="A6XX-" type="boolean"/> |
| |
| <!-- |
| Note: these bits have the same meaning, and use the same |
| internal mechanism as the bits in CP_SET_DRAW_STATE. |
| When RENDER_MODE is selected, they're used as |
| a bitmask of which modes pass the test. |
| --> |
| |
| <!-- RM6_BINNING --> |
| <bitfield name="BINNING" pos="25" varset="chip" variants="A6XX-" type="boolean"/> |
| <!-- all others --> |
| <bitfield name="GMEM" pos="26" varset="chip" variants="A6XX-" type="boolean"/> |
| <!-- RM6_BYPASS --> |
| <bitfield name="SYSMEM" pos="27" varset="chip" variants="A6XX-" type="boolean"/> |
| |
| <bitfield name="MODE" low="28" high="31" type="compare_mode"/> |
| </reg32> |
| |
| <!-- in REG_COMPARE mode, there's an extra DWORD here with REG1 --> |
| |
| <reg32 offset="1" name="1"> |
| <bitfield name="DWORDS" low="0" high="31" type="uint"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_COND_EXEC" width="32"> |
| <doc> |
| Executes the following DWORDs of commands if the dword at ADDR0 |
| is not equal to 0 and the dword at ADDR1 is less than REF |
| (signed comparison). |
| </doc> |
| <reg32 offset="0" name="0"> |
| <bitfield name="ADDR0_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="ADDR0_HI" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="ADDR1_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="ADDR1_HI" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="4" name="4"> |
| <bitfield name="REF" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="5" name="5"> |
| <bitfield name="DWORDS" low="0" high="31" type="uint"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_SET_CTXSWITCH_IB" width="32"> |
| <doc> |
| Used by the userspace driver to set various IB's which are |
| executed during context save/restore for handling |
| state that isn't restored by the |
| context switch routine itself. |
| </doc> |
| <enum name="ctxswitch_ib"> |
| <value name="RESTORE_IB" value="0"> |
| <doc>Executed unconditionally when switching back to the context.</doc> |
| </value> |
| <value name="YIELD_RESTORE_IB" value="1"> |
| <doc> |
| Executed when switching back after switching |
| away during execution of |
| a CP_SET_MARKER packet with RM6_YIELD as the |
| payload *and* the normal save routine was |
| bypassed for a shorter one. I think this is |
| connected to the "skipsaverestore" bit set by |
| the kernel when preempting. |
| </doc> |
| </value> |
| <value name="SAVE_IB" value="2"> |
| <doc> |
| Executed when switching away from the context, |
| except for context switches initiated via |
| CP_YIELD. |
| </doc> |
| </value> |
| <value name="RB_SAVE_IB" value="3"> |
| <doc> |
| This can only be set by the RB (i.e. the kernel) |
| and executes with protected mode off, but |
| is otherwise similar to SAVE_IB. |
| |
| Note, kgsl calls this CP_KMD_AMBLE_TYPE |
| </doc> |
| </value> |
| </enum> |
| <reg32 offset="0" name="0"> |
| <bitfield name="ADDR_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="ADDR_HI" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="DWORDS" low="0" high="19" type="uint"/> |
| <bitfield name="TYPE" low="20" high="21" type="ctxswitch_ib"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_REG_WRITE" width="32"> |
| <enum name="reg_tracker"> |
| <doc> |
| Keep shadow copies of these registers and only set them |
| when drawing, avoiding redundant writes: |
| - VPC_CNTL_0 |
| - HLSQ_CONTROL_1_REG |
| - HLSQ_UNKNOWN_B980 |
| </doc> |
| <value name="TRACK_CNTL_REG" value="0x1"/> |
| <doc> |
| Track RB_RENDER_CNTL, and insert a WFI in the following |
| situation: |
| - There is a write that disables binning |
| - There was a draw with binning left enabled, but in |
| BYPASS mode |
| Presumably this is a hang workaround? |
| </doc> |
| <value name="TRACK_RENDER_CNTL" value="0x2"/> |
| <doc> |
| Do a mysterious CP_EVENT_WRITE 0x3f when the low bit of |
| the data to write is 0. Used by the Vulkan blob with |
| PC_MULTIVIEW_CNTL, but this isn't predicated on particular |
| register(s) like the others. |
| </doc> |
| <value name="UNK_EVENT_WRITE" value="0x4"/> |
| <doc> |
| Tracks GRAS_LRZ_CNTL::GREATER, GRAS_LRZ_CNTL::DIR, and |
| GRAS_LRZ_DEPTH_VIEW with previous values, and if one of |
| the following is true: |
| - GRAS_LRZ_CNTL::GREATER has changed |
| - GRAS_LRZ_CNTL::DIR has changed, the old value is not |
| CUR_DIR_GE, and the new value is not CUR_DIR_DISABLED |
| - GRAS_LRZ_DEPTH_VIEW has changed |
| then it does a LRZ_FLUSH with GRAS_LRZ_CNTL::ENABLE |
| forced to 1. |
| Only exists in a650_sqe.fw. |
| </doc> |
| <value name="TRACK_LRZ" value="0x8"/> |
| </enum> |
| <reg32 offset="0" name="0"> |
| <bitfield name="TRACKER" low="0" high="3" type="reg_tracker"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_SMMU_TABLE_UPDATE" width="32"> |
| <doc> |
| Note that the SMMU's definition of TTBRn can take different forms |
| depending on the pgtable format. But a5xx+ only uses aarch64 |
| format. |
| </doc> |
| <reg32 offset="0" name="0"> |
| <bitfield name="TTBR0_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="TTBR0_HI" low="0" high="15"/> |
| <bitfield name="ASID" low="16" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <doc>Unused, does not apply to aarch64 pgtable format</doc> |
| <bitfield name="CONTEXTIDR" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="CONTEXTBANK" low="0" high="31"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_START_BIN" width="32"> |
| <reg32 offset="0" name="BIN_COUNT" type="uint"/> |
| <reg64 offset="1" name="PREFIX_ADDR" type="address"/> |
| <reg32 offset="3" name="PREFIX_DWORDS"> |
| <doc> |
| Size of prefix for each bin. For each bin index i, the |
| prefix commands at PREFIX_ADDR + i * PREFIX_DWORDS are |
| executed in an IB2 before the IB1 commands following |
| this packet. |
| </doc> |
| </reg32> |
| <reg32 offset="4" name="BODY_DWORDS"> |
| <doc>Number of dwords after this packet until CP_END_BIN</doc> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_THREAD_CONTROL" width="32"> |
| <enum name="cp_thread"> |
| <value name="CP_SET_THREAD_BR" value="1"/> |
| <value name="CP_SET_THREAD_BV" value="2"/> |
| <value name="CP_SET_THREAD_BOTH" value="3"/> |
| </enum> |
| <reg32 offset="0" name="0"> |
| <bitfield low="0" high="1" name="THREAD" type="cp_thread"/> |
| <bitfield pos="27" name="CONCURRENT_BIN_DISABLE" type="boolean"/> |
| <bitfield pos="31" name="SYNC_THREADS" type="boolean"/> |
| </reg32> |
| </domain> |
| |
| </database> |
| |