| #version 450 |
| |
| #extension GL_EXT_buffer_reference : require |
| #extension GL_EXT_buffer_reference_uvec2 : require |
| |
| layout(local_size_x = 1) in; |
| |
| layout(push_constant) uniform _push { |
| uvec2 image_address; |
| uvec2 scratch_address; |
| uvec2 aux_address; |
| uint body_offset; |
| uint block_count; |
| }; |
| |
| layout(buffer_reference, std430, buffer_reference_align = 16) buffer image_buffer { |
| uvec4 data[]; |
| }; |
| |
| layout(buffer_reference, std430, buffer_reference_align = 4) buffer aux_buffer { |
| uint size_bytes; |
| }; |
| |
| // AFBC constants. |
| const uint kAfbcTilePixelWidth = 16; |
| const uint kAfbcTilePixelHeight = 16; |
| const uint kAfbcTilePixels = kAfbcTilePixelWidth * kAfbcTilePixelHeight; |
| const uint kAfbcPixelsPerUvec4 = 4; |
| const uint kAfbcTileUvec4s = kAfbcTilePixels / kAfbcPixelsPerUvec4; |
| const uint kAfbcSuperblockTileWidth = 8; |
| const uint kAfbcSuperblockTileHeight = 8; |
| const uint kAfbcSuperblockTileCount = kAfbcSuperblockTileWidth * kAfbcSuperblockTileHeight; |
| |
| // Shared memory used for staging. |
| shared uint body_sizes[kAfbcSuperblockTileCount]; |
| |
| uint subtileSize(uint subtile) |
| { |
| return subtile == 1 ? 64 : subtile; |
| } |
| |
| void determineBodySizeAndUnpack(uint tile_base_idx, uint i) |
| { |
| restrict image_buffer image = image_buffer(image_address); |
| uint tile_idx = tile_base_idx + i; |
| |
| // Get tile start from header. |
| uint packed_tile_start = image.data[tile_idx].x / 16; |
| |
| uint body_size = 0; |
| |
| // Determine size of body for non-solid tiles. |
| if (packed_tile_start != 0) |
| { |
| restrict image_buffer scratch = image_buffer(scratch_address); |
| |
| uint h1 = image.data[tile_idx].y; |
| uint h2 = image.data[tile_idx].z; |
| uint h3 = image.data[tile_idx].w; |
| |
| // Extract the size of each subtile from the header. |
| body_size += subtileSize(h1 & 0x3f); |
| body_size += subtileSize((h1 >> 6) & 0x3f); |
| body_size += subtileSize((h1 >> 12) & 0x3f); |
| body_size += subtileSize((h1 >> 18) & 0x3f); |
| body_size += subtileSize((h1 >> 24) & 0x3f); |
| body_size += subtileSize((h1 >> 30) | (h2 & 0xf) << 2); |
| body_size += subtileSize((h2 >> 4) & 0x3f); |
| body_size += subtileSize((h2 >> 10) & 0x3f); |
| body_size += subtileSize((h2 >> 16) & 0x3f); |
| body_size += subtileSize((h2 >> 22) & 0x3f); |
| body_size += subtileSize((h2 >> 28) | (h3 & 0x3) << 4); |
| body_size += subtileSize((h3 >> 2) & 0x3f); |
| body_size += subtileSize((h3 >> 8) & 0x3f); |
| body_size += subtileSize((h3 >> 14) & 0x3f); |
| body_size += subtileSize((h3 >> 20) & 0x3f); |
| body_size += subtileSize((h3 >> 26) & 0x3f); |
| |
| // Roundup to number of uvec4s. |
| body_size = (body_size + 15) / 16; |
| |
| // Unpacked tile start at fixed offset. |
| uint unpacked_tile_start = i * kAfbcTileUvec4s; |
| |
| // Copy tile body. |
| for (uint j = 0; j < body_size; j++) |
| { |
| scratch.data[unpacked_tile_start + j] = image.data[packed_tile_start + j]; |
| } |
| } |
| |
| // Store body size in shared memory. |
| body_sizes[i] = body_size; |
| } |
| |
| void writeOut(uint tile_base_idx, uint i) |
| { |
| restrict image_buffer image = image_buffer(image_address); |
| |
| uint tile_idx = tile_base_idx + i; |
| |
| // Get tile header. |
| uvec4 header = image.data[tile_idx]; |
| |
| // Load body size from shared memory. |
| uint body_size = body_sizes[i]; |
| |
| uint image_tile_start = 0; |
| if (body_size != 0) |
| { |
| restrict image_buffer scratch = image_buffer(scratch_address); |
| |
| // Image tile start. |
| uint image_tile_start = body_offset / 16 + tile_idx * kAfbcTileUvec4s; |
| |
| // Scratch tile start. |
| uint scratch_tile_start = i * kAfbcTileUvec4s; |
| |
| // Copy tile body. |
| for (uint j = 0; j < body_size; j++) |
| { |
| image.data[image_tile_start + j] = scratch.data[scratch_tile_start + j]; |
| } |
| } |
| |
| // Write tile start and rest of header. |
| image.data[tile_idx] = uvec4(image_tile_start * 16, header.yzw); |
| } |
| |
| void processBlock(uint block) |
| { |
| uint start_tile = block * kAfbcSuperblockTileCount; |
| |
| // |
| // Stage 1: Determine body sizes and unpack tiles into staging memory. |
| // |
| for (uint i = gl_LocalInvocationID.x; i < kAfbcSuperblockTileCount; i += gl_WorkGroupSize.x) |
| { |
| determineBodySizeAndUnpack(start_tile, i); |
| } |
| |
| // Ensure all threads in work group have executed statements |
| // above before we proceed to next stage. |
| barrier(); |
| |
| // |
| // Stage 2: Write out tiles to image from staging memory. |
| // |
| for (uint i = gl_LocalInvocationID.x; i < kAfbcSuperblockTileCount; i += gl_WorkGroupSize.x) |
| { |
| writeOut(start_tile, i); |
| } |
| } |
| |
| void main() |
| { |
| // Process tiles from back to front. |
| uint i = block_count; |
| while (i-- > 0) |
| { |
| processBlock(i); |
| } |
| |
| if (gl_LocalInvocationID.x == 0) |
| { |
| restrict aux_buffer aux = aux_buffer(aux_address); |
| aux.size_bytes = body_offset + block_count * kAfbcSuperblockTileCount * kAfbcTilePixels * 4; |
| } |
| } |