| -- SPDX-License-Identifier: MIT |
| -- |
| -- Parse cmdstream dump and analyse blits and batches |
| |
| --local posix = require "posix" |
| |
| function printf(fmt, ...) |
| return io.write(string.format(fmt, ...)) |
| end |
| |
| function dbg(fmt, ...) |
| --printf(fmt, ...) |
| end |
| |
| printf("Analyzing Data...\n") |
| |
| local r = rnn.init("a630") |
| |
| -- Each submit, all draws will target the same N MRTs: |
| local mrts = {} |
| local allmrts = {} -- includes historical render targets |
| function push_mrt(fmt, w, h, samples, base, flag, gmem) |
| dbg("MRT: %s %ux%u 0x%x\n", fmt, w, h, base) |
| |
| local mrt = {} |
| mrt.format = fmt |
| mrt.w = w |
| mrt.h = h |
| mrt.samples = samples |
| mrt.base = base |
| mrt.flag = flag |
| mrt.gmem = gmem |
| |
| mrts[base] = mrt |
| allmrts[base] = mrt |
| end |
| |
| -- And each each draw will read from M sources/textures: |
| local sources = {} |
| function push_source(fmt, w, h, samples, base, flag) |
| dbg("SRC: %s %ux%u 0x%x\n", fmt, w, h, base) |
| |
| local source = {} |
| source.format = fmt |
| source.w = w |
| source.h = h |
| source.samples = samples |
| source.base = base |
| source.flag = flag |
| |
| sources[base] = source |
| end |
| |
| local binw |
| local binh |
| local nbins |
| local blits = 0 |
| local draws = 0 |
| local drawmode |
| local cleared |
| local restored |
| local resolved |
| local nullbatch |
| local depthtest |
| local depthwrite |
| local stenciltest |
| local stencilwrite |
| |
| function reset() |
| dbg("reset\n") |
| mrts = {} |
| sources = {} |
| draws = 0 |
| blits = 0 |
| cleared = {} |
| restored = {} |
| resolved = {} |
| depthtest = false |
| depthwrite = false |
| stenciltest = false |
| stencilwrite = false |
| drawmode = Nil |
| end |
| |
| function start_submit() |
| dbg("start_submit\n") |
| reset() |
| nullbatch = true |
| end |
| |
| function finish() |
| dbg("finish\n") |
| |
| printf("\n") |
| |
| -- TODO we get false-positives for 'NULL BATCH!' because we don't have |
| -- a really good way to differentiate between submits and cmds. Ie. |
| -- with growable cmdstream, and a large # of tiles, IB1 can get split |
| -- across multiple buffers. Since we ignore GMEM draws for window- |
| -- offset != 0,0, the later cmds will appear as null batches |
| if draws == 0 and blits == 0 then |
| if nullbatch then |
| printf("NULL BATCH!\n"); |
| end |
| return |
| end |
| |
| if draws > 0 then |
| printf("Batch:\n") |
| printf("-------\n") |
| printf(" # of draws: %u\n", draws) |
| printf(" mode: %s\n", drawmode) |
| if drawmode == "RM6_BIN_RENDER_START" then |
| printf(" bin size: %ux%u (%u bins)\n", binw, binh, nbins) |
| end |
| if depthtest or depthwrite then |
| printf(" ") |
| if depthtest then |
| printf("DEPTHTEST ") |
| end |
| if depthwrite then |
| printf("DEPTHWRITE") |
| end |
| printf("\n") |
| end |
| if stenciltest or stencilwrite then |
| printf(" ") |
| if stenciltest then |
| printf("STENCILTEST ") |
| end |
| if stencilwrite then |
| printf("STENCILWRITE") |
| end |
| printf("\n") |
| end |
| else |
| printf("Blit:\n") |
| printf("-----\n") |
| end |
| |
| local keys = {} |
| for base in pairs(mrts) do |
| table.insert(keys,base) |
| end |
| table.sort(keys) |
| |
| for _,base in ipairs(keys) do |
| local mrt = mrts[base] |
| printf(" MRT[0x%x:0x%x]:\t%ux%u\t\t%s (%s)", base, mrt.flag, mrt.w, mrt.h, mrt.format, mrt.samples) |
| if drawmode == "RM6_BIN_RENDER_START" then |
| if cleared[mrt.gmem] then |
| printf("\tCLEARED") |
| end |
| if restored[mrt.gmem] then |
| printf("\tRESTORED") |
| end |
| if resolved[mrt.gmem] then |
| printf("\tRESOLVED") |
| end |
| else |
| if cleared[mrt.base] then |
| printf("\tCLEARED") |
| end |
| end |
| printf("\n") |
| end |
| |
| function print_source(source) |
| printf(" SRC[0x%x:0x%x]:\t%ux%u\t\t%s (%s)\n", source.base, source.flag, source.w, source.h, source.format, source.samples) |
| end |
| |
| for base,source in pairs(sources) do |
| -- only show sources that have been previously rendered to, other |
| -- textures are less interesting. Possibly this should be an |
| -- option somehow |
| if draws < 10 then |
| print_source(source) |
| elseif allmrts[base] or draws == 0 then |
| print_source(source) |
| elseif source.flag and allmrts[source.flag] then |
| print_source(source) |
| end |
| end |
| reset() |
| end |
| |
| function end_submit() |
| dbg("end_submit\n") |
| finish() |
| end |
| |
| -- Track the current mode: |
| local mode = "" |
| function CP_SET_MARKER(pkt, size) |
| mode = pkt[0].MODE |
| dbg("mode: %s\n", mode) |
| end |
| |
| function CP_EVENT_WRITE(pkt, size) |
| if tostring(pkt[0].EVENT) ~= "CCU_RESOLVE" then |
| return |
| end |
| nullbatch = false |
| local m = tostring(mode) |
| if m == "RM6_BIN_RENDER_START" then |
| -- either clear or restore: |
| if r.RB_RESOLVE_OPERATION.CLEAR_MASK == 0 then |
| restored[r.RB_RESOLVE_GMEM_BUFFER_BASE] = 1 |
| else |
| cleared[r.RB_RESOLVE_GMEM_BUFFER_BASE] = 1 |
| end |
| -- push_mrt() because we could have GMEM |
| -- passes with only a clear and no draws: |
| local flag = 0 |
| local sysmem = 0; |
| -- try to match up the GMEM addr with the MRT/DEPTH state, |
| -- to avoid relying on RB_RESOLVE_SYSTEM_BUFFER_BASE also getting written: |
| for n = 0,r.RB_PS_MRT_CNTL.MRT-1 do |
| if r.RB_MRT[n].BASE_GMEM == r.RB_RESOLVE_GMEM_BUFFER_BASE then |
| sysmem = r.RB_MRT[n].BASE |
| flag = r.RB_COLOR_FLAG_BUFFER[n].ADDR |
| break |
| end |
| end |
| if sysmem == 0 and r.RB_RESOLVE_GMEM_BUFFER_BASE == r.RB_DEPTH_GMEM_BASE then |
| sysmem = r.RB_DEPTH_BUFFER_BASE |
| flag = r.RB_DEPTH_FLAG_BUFFER_BASE |
| |
| end |
| --NOTE this can get confused by previous blits: |
| --if sysmem == 0 then |
| -- -- fallback: |
| -- sysmem = r.RB_RESOLVE_SYSTEM_BUFFER_BASE |
| -- flag = r.RB_RESOLVE_SYSTEM_FLAG_BUFFER_BASE |
| --end |
| if not r.RB_RESOLVE_SYSTEM_BUFFER_INFO.FLAGS then |
| flag = 0 |
| end |
| -- TODO maybe just emit RB_RESOLVE_SYSTEM_BUFFER_BASE/HI for clears.. otherwise |
| -- we get confused by stale values in registers.. not sure |
| -- if this is a problem w/ blob |
| push_mrt(r.RB_RESOLVE_SYSTEM_BUFFER_INFO.COLOR_FORMAT, |
| r.RB_RESOLVE_CNTL_2.X + 1, |
| r.RB_RESOLVE_CNTL_2.Y + 1, |
| r.RB_RESOLVE_SYSTEM_BUFFER_INFO.SAMPLES, |
| sysmem, |
| flag, |
| r.RB_RESOLVE_GMEM_BUFFER_BASE) |
| elseif m == "RM6_BIN_RESOLVE" then |
| resolved[r.RB_RESOLVE_GMEM_BUFFER_BASE] = 1 |
| else |
| printf("I am confused!!!\n") |
| end |
| end |
| |
| function A6XX_TEX_MEMOBJ(pkt, size) |
| push_source(pkt[0].FMT, |
| pkt[1].WIDTH, pkt[1].HEIGHT, |
| pkt[0].SAMPLES, |
| pkt[4].BASE_LO | (pkt[5].BASE_HI << 32), |
| pkt[7].FLAG_LO | (pkt[8].FLAG_HI << 32)) |
| end |
| |
| function handle_blit() |
| -- blob sometimes uses CP_BLIT for resolves, so filter those out: |
| -- TODO it would be nice to not hard-code GMEM addr: |
| -- TODO I guess the src can be an offset from GMEM addr.. |
| if r.TPL1_A2D_SRC_TEXTURE_BASE == 0x100000 and not r.RB_A2D_BLT_CNTL.SOLID_COLOR then |
| resolved[0] = 1 |
| return |
| end |
| if draws > 0 then |
| finish() |
| end |
| reset() |
| drawmode = "BLIT" |
| -- This kinda assumes that we are doing full img blits, which is maybe |
| -- Not completely legit. We could perhaps instead just track pitch and |
| -- size/pitch?? Or maybe the size doesn't matter much |
| push_mrt(r.RB_A2D_DEST_BUFFER_INFO.COLOR_FORMAT, |
| r.GRAS_A2D_DEST_BR.X + 1, |
| r.GRAS_A2D_DEST_BR.Y + 1, |
| "MSAA_ONE", |
| r.RB_A2D_DEST_BUFFER_BASE, |
| r.RB_A2D_DEST_FLAG_BUFFER_BASE, |
| -1) |
| if r.RB_A2D_BLT_CNTL.SOLID_COLOR then |
| dbg("CLEAR=%x\n", r.RB_A2D_DEST_BUFFER_BASE) |
| cleared[r.RB_A2D_DEST_BUFFER_BASE] = 1 |
| else |
| push_source(r.SP_2D_SRC_FORMAT.COLOR_FORMAT, |
| r.GRAS_A2D_SRC_XMAX.X + 1, |
| r.GRAS_A2D_SRC_YMAX.Y + 1, |
| "MSAA_ONE", |
| r.TPL1_A2D_SRC_TEXTURE_BASE, |
| r.TPL1_A2D_SRC_TEXTURE_FLAG_BASE) |
| end |
| blits = blits + 1 |
| finish() |
| end |
| |
| function valid_transition(curmode, newmode) |
| if curmode == "RM6_BIN_VISIBILITY" and newmode == "RM6_BIN_RENDER_START" then |
| return true |
| end |
| if curmode == "RM6_BIN_RENDER_START" and newmode == "RM6_BIN_RESOLVE" then |
| return true |
| end |
| return false |
| end |
| |
| function draw(primtype, nindx) |
| dbg("draw: %s (%s)\n", primtype, mode) |
| nullbatch = false |
| if primtype == "BLIT_OP_SCALE" then |
| handle_blit() |
| return |
| elseif primtype == "EVENT:CCU_RESOLVE" then |
| return |
| end |
| |
| local m = tostring(mode) |
| |
| -- detect changes in drawmode which indicate a different |
| -- pass.. BINNING->GMEM means same pass, but other |
| -- transitions mean different pass: |
| if drawmode and m ~= drawmode then |
| dbg("%s -> %s transition\n", drawmode, m) |
| if not valid_transition(drawmode, m) then |
| dbg("invalid transition, new render pass!\n") |
| finish() |
| reset() |
| end |
| end |
| |
| if m ~= "RM6_BIN_RENDER_START" and m ~= "RM6_DIRECT_RENDER" then |
| if m == "RM6_BIN_VISIBILITY" then |
| drawmode = m |
| return |
| end |
| if m == "RM6_BIN_RESOLVE" and primtype == "EVENT:BLIT" then |
| return |
| end |
| if m == "RM6_BLIT2DSCALE" and primtype == "EVENT:LRZ_CLEAR" then |
| return |
| end |
| printf("unknown MODE %s for primtype %s\n", m, primtype) |
| return |
| end |
| |
| -- Only count the first tile for GMEM mode to avoid counting |
| -- each draw for each tile |
| if m == "RM6_BIN_RENDER_START" then |
| if r.RB_WINDOW_OFFSET.X ~= 0 or r.RB_WINDOW_OFFSET.Y ~= 0 then |
| return |
| end |
| end |
| |
| drawmode = m |
| local render_components = {} |
| render_components[0] = r.RB_PS_OUTPUT_MASK.RT0; |
| render_components[1] = r.RB_PS_OUTPUT_MASK.RT1; |
| render_components[2] = r.RB_PS_OUTPUT_MASK.RT2; |
| render_components[3] = r.RB_PS_OUTPUT_MASK.RT3; |
| render_components[4] = r.RB_PS_OUTPUT_MASK.RT4; |
| render_components[5] = r.RB_PS_OUTPUT_MASK.RT5; |
| render_components[6] = r.RB_PS_OUTPUT_MASK.RT6; |
| render_components[7] = r.RB_PS_OUTPUT_MASK.RT7; |
| for n = 0,r.RB_PS_MRT_CNTL.MRT-1 do |
| if render_components[n] ~= 0 then |
| push_mrt(r.RB_MRT[n].BUF_INFO.COLOR_FORMAT, |
| r.GRAS_SC_SCREEN_SCISSOR[0].BR.X + 1, |
| r.GRAS_SC_SCREEN_SCISSOR[0].BR.Y + 1, |
| r.RB_RESOLVE_GMEM_BUFFER_INFO.SAMPLES, |
| r.RB_MRT[n].BASE, |
| r.RB_COLOR_FLAG_BUFFER[n].ADDR, |
| r.RB_MRT[n].BASE_GMEM) |
| end |
| end |
| |
| local depthbase = r.RB_DEPTH_BUFFER_BASE |
| |
| if depthbase ~= 0 then |
| push_mrt(r.RB_DEPTH_BUFFER_INFO.DEPTH_FORMAT, |
| r.GRAS_SC_SCREEN_SCISSOR[0].BR.X + 1, |
| r.GRAS_SC_SCREEN_SCISSOR[0].BR.Y + 1, |
| r.RB_RESOLVE_GMEM_BUFFER_INFO.SAMPLES, |
| depthbase, |
| r.RB_DEPTH_FLAG_BUFFER_BASE, |
| r.RB_DEPTH_GMEM_BASE) |
| end |
| |
| if r.RB_DEPTH_CNTL.Z_WRITE_ENABLE then |
| depthwrite = true |
| end |
| |
| if r.RB_DEPTH_CNTL.Z_TEST_ENABLE then |
| depthtest = true |
| end |
| |
| -- clearly 0 != false.. :-/ |
| if r.RB_STENCIL_WRITE_MASK.WRMASK ~= 0 then |
| stencilwrite = true |
| end |
| |
| if r.RB_STENCIL_CNTL.STENCIL_ENABLE then |
| stenciltest = true |
| end |
| |
| -- TODO should also check for stencil buffer for z32+s8 case |
| |
| if m == "RM6_BIN_RENDER_START" then |
| binw = r.VSC_BIN_SIZE.BINW |
| binh = r.VSC_BIN_SIZE.BINH |
| nbins = r.VSC_EXPANDED_BIN_CNTL.NX * r.VSC_EXPANDED_BIN_CNTL.NY |
| end |
| |
| draws = draws + 1 |
| end |
| |