blob: 3ef88906cd65496d19c43149fe51c0cb12b8e49a [file] [log] [blame] [edit]
#version 450
#extension GL_EXT_buffer_reference : require
#extension GL_EXT_buffer_reference_uvec2 : require
layout(local_size_x = 1) in;
layout(push_constant) uniform _push {
uvec2 image_address;
uvec2 scratch_address;
uvec2 aux_address;
uint body_offset;
uint block_count;
layout(buffer_reference, std430, buffer_reference_align = 16) buffer image_buffer {
uvec4 data[];
layout(buffer_reference, std430, buffer_reference_align = 4) buffer aux_buffer {
uint size_bytes;
// AFBC constants.
const uint kAfbcTilePixelWidth = 16;
const uint kAfbcTilePixelHeight = 16;
const uint kAfbcTilePixels = kAfbcTilePixelWidth * kAfbcTilePixelHeight;
const uint kAfbcPixelsPerUvec4 = 4;
const uint kAfbcTileUvec4s = kAfbcTilePixels / kAfbcPixelsPerUvec4;
const uint kAfbcSuperblockTileWidth = 8;
const uint kAfbcSuperblockTileHeight = 8;
const uint kAfbcSuperblockTileCount = kAfbcSuperblockTileWidth * kAfbcSuperblockTileHeight;
// Shared memory used for staging.
shared uint body_sizes[kAfbcSuperblockTileCount];
uint subtileSize(uint subtile)
return subtile == 1 ? 64 : subtile;
void determineBodySizeAndUnpack(uint tile_base_idx, uint i)
restrict image_buffer image = image_buffer(image_address);
uint tile_idx = tile_base_idx + i;
// Get tile start from header.
uint packed_tile_start =[tile_idx].x / 16;
uint body_size = 0;
// Determine size of body for non-solid tiles.
if (packed_tile_start != 0)
restrict image_buffer scratch = image_buffer(scratch_address);
uint h1 =[tile_idx].y;
uint h2 =[tile_idx].z;
uint h3 =[tile_idx].w;
// Extract the size of each subtile from the header.
body_size += subtileSize(h1 & 0x3f);
body_size += subtileSize((h1 >> 6) & 0x3f);
body_size += subtileSize((h1 >> 12) & 0x3f);
body_size += subtileSize((h1 >> 18) & 0x3f);
body_size += subtileSize((h1 >> 24) & 0x3f);
body_size += subtileSize((h1 >> 30) | (h2 & 0xf) << 2);
body_size += subtileSize((h2 >> 4) & 0x3f);
body_size += subtileSize((h2 >> 10) & 0x3f);
body_size += subtileSize((h2 >> 16) & 0x3f);
body_size += subtileSize((h2 >> 22) & 0x3f);
body_size += subtileSize((h2 >> 28) | (h3 & 0x3) << 4);
body_size += subtileSize((h3 >> 2) & 0x3f);
body_size += subtileSize((h3 >> 8) & 0x3f);
body_size += subtileSize((h3 >> 14) & 0x3f);
body_size += subtileSize((h3 >> 20) & 0x3f);
body_size += subtileSize((h3 >> 26) & 0x3f);
// Roundup to number of uvec4s.
body_size = (body_size + 15) / 16;
// Unpacked tile start at fixed offset.
uint unpacked_tile_start = i * kAfbcTileUvec4s;
// Copy tile body.
for (uint j = 0; j < body_size; j++)
{[unpacked_tile_start + j] =[packed_tile_start + j];
// Store body size in shared memory.
body_sizes[i] = body_size;
void writeOut(uint tile_base_idx, uint i)
restrict image_buffer image = image_buffer(image_address);
uint tile_idx = tile_base_idx + i;
// Get tile header.
uvec4 header =[tile_idx];
// Load body size from shared memory.
uint body_size = body_sizes[i];
uint image_tile_start = 0;
if (body_size != 0)
restrict image_buffer scratch = image_buffer(scratch_address);
// Image tile start.
uint image_tile_start = body_offset / 16 + tile_idx * kAfbcTileUvec4s;
// Scratch tile start.
uint scratch_tile_start = i * kAfbcTileUvec4s;
// Copy tile body.
for (uint j = 0; j < body_size; j++)
{[image_tile_start + j] =[scratch_tile_start + j];
// Write tile start and rest of header.[tile_idx] = uvec4(image_tile_start * 16, header.yzw);
void processBlock(uint block)
uint start_tile = block * kAfbcSuperblockTileCount;
// Stage 1: Determine body sizes and unpack tiles into staging memory.
for (uint i = gl_LocalInvocationID.x; i < kAfbcSuperblockTileCount; i += gl_WorkGroupSize.x)
determineBodySizeAndUnpack(start_tile, i);
// Ensure all threads in work group have executed statements
// above before we proceed to next stage.
// Stage 2: Write out tiles to image from staging memory.
for (uint i = gl_LocalInvocationID.x; i < kAfbcSuperblockTileCount; i += gl_WorkGroupSize.x)
writeOut(start_tile, i);
void main()
// Process tiles from back to front.
uint i = block_count;
while (i-- > 0)
if (gl_LocalInvocationID.x == 0)
restrict aux_buffer aux = aux_buffer(aux_address);
aux.size_bytes = body_offset + block_count * kAfbcSuperblockTileCount * kAfbcTilePixels * 4;