blob: 50ad9416eef5c33b2f32390c06dcffd8136adf99 [file] [log] [blame]
// Copyright 2019 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef SRC_GRAPHICS_LIB_COMPUTE_SPINEL_CORE_H_
#define SRC_GRAPHICS_LIB_COMPUTE_SPINEL_CORE_H_
//
// clang-format off
//
#define SPN_EMPTY
#define SPN_UINT_MAX 0xFFFFFFFF
//
// TILE SIZE
//
// Width is a power-of-2 of height
//
#define SPN_TILE_WIDTH (1<<SPN_DEVICE_TILE_WIDTH_LOG2)
#define SPN_TILE_HEIGHT (1<<SPN_DEVICE_TILE_HEIGHT_LOG2)
#define SPN_TILE_HEIGHT_MASK (SPN_TILE_HEIGHT - 1)
//
// TAGGED BLOCK ID
//
// 0 5 31
// | TAG | BLOCK ID |
// | | SUBBLOCK | BLOCK |
// +-----+----------+----------+
// | 5 | SUB | 27 - SUB |
//
// BLOCK ID
//
// 0 27 31
// | BLOCK ID | |
// | SUBBLOCK | BLOCK | N/A |
// +----------+----------+-----+
// | SUB | 27 - SUB | 5 |
//
//
// There are 27 bits of subblocks and 5 bits of tag.
//
// The block pool vends block ids.
//
// There are (2^S) subblocks in a block.
//
// There are at least 2 subblocks per block.
//
#define SPN_TAGGED_BLOCK_ID_BITS_ID 27 // this size is cast in stone
#define SPN_TAGGED_BLOCK_ID_BITS_TAG 5 // which leaves 5 bits of tag
#define SPN_TAGGED_BLOCK_ID_INVALID SPN_UINT_MAX
#define SPN_TAGGED_BLOCK_ID_MASK_TAG SPN_BITS_TO_MASK(SPN_TAGGED_BLOCK_ID_BITS_TAG)
#define SPN_TAGGED_BLOCK_ID_GET_TAG(tbid_) ((tbid_) & SPN_TAGGED_BLOCK_ID_MASK_TAG)
#define SPN_TAGGED_BLOCK_ID_GET_ID(tbid_) SPN_BITFIELD_EXTRACT(tbid_,SPN_TAGGED_BLOCK_ID_BITS_TAG,SPN_TAGGED_BLOCK_ID_BITS_ID)
#define SPN_BLOCK_ID_MAX SPN_BITS_TO_MASK(SPN_TAGGED_BLOCK_ID_BITS_ID)
#define SPN_BLOCK_ID_INVALID SPN_UINT_MAX
#define SPN_BLOCK_ID_TAG_PATH_LINE 0 // 0 -- 4 segments
#define SPN_BLOCK_ID_TAG_PATH_QUAD 1 // 1 -- 6 segments
#define SPN_BLOCK_ID_TAG_PATH_CUBIC 2 // 2 -- 8 segments
#define SPN_BLOCK_ID_TAG_PATH_RAT_QUAD 3 // 3 -- 7 segments : 6 + w1 -- w0 = w2 = 1
#define SPN_BLOCK_ID_TAG_PATH_RAT_CUBIC 4 // 4 -- 10 segments : 8 + w1 + w2 -- w0 = w3 = 1
#define SPN_BLOCK_ID_TAG_PATH_RESERVED_5 5
#define SPN_BLOCK_ID_TAG_PATH_RESERVED_6 6
#define SPN_BLOCK_ID_TAG_PATH_RESERVED_7 7
// ...
// tags 8-29 are available
// ...
#define SPN_BLOCK_ID_TAG_PATH_COUNT 5 // how many path types? can share same value with PATH_NEXT
#define SPN_BLOCK_ID_TAG_PATH_NEXT (SPN_TAGGED_BLOCK_ID_MASK_TAG - 1) // 30 : 0x1E
#define SPN_BLOCK_ID_TAG_INVALID SPN_TAGGED_BLOCK_ID_MASK_TAG // 31 : 0x1F
//
// BLOCK POOL
//
#define SPN_BLOCK_POOL_BLOCK_DWORDS (1<<SPN_DEVICE_BLOCK_POOL_BLOCK_DWORDS_LOG2)
#define SPN_BLOCK_POOL_SUBBLOCK_DWORDS (1<<SPN_DEVICE_BLOCK_POOL_SUBBLOCK_DWORDS_LOG2)
#define SPN_BLOCK_POOL_BLOCK_DWORDS_MASK SPN_BITS_TO_MASK(SPN_DEVICE_BLOCK_POOL_BLOCK_DWORDS_LOG2)
#define SPN_BLOCK_POOL_SUBBLOCK_DWORDS_MASK SPN_BITS_TO_MASK(SPN_DEVICE_BLOCK_POOL_SUBBLOCK_DWORDS_LOG2)
#define SPN_BLOCK_POOL_SUBBLOCKS_PER_BLOCK_LOG2 (SPN_DEVICE_BLOCK_POOL_BLOCK_DWORDS_LOG2 - SPN_DEVICE_BLOCK_POOL_SUBBLOCK_DWORDS_LOG2)
#define SPN_BLOCK_POOL_SUBBLOCKS_PER_BLOCK (1<<SPN_BLOCK_POOL_SUBBLOCKS_PER_BLOCK_LOG2)
#define SPN_BLOCK_POOL_SUBBLOCKS_PER_BLOCK_MASK SPN_BITS_TO_MASK(SPN_BLOCK_POOL_SUBBLOCKS_PER_BLOCK_LOG2)
#define SPN_BLOCK_POOL_BLOCK_QWORDS_LOG2 (SPN_DEVICE_BLOCK_POOL_BLOCK_DWORDS_LOG2-1)
#define SPN_BLOCK_POOL_BLOCK_QWORDS (1<<SPN_BLOCK_POOL_BLOCK_QWORDS_LOG2)
#define SPN_BLOCK_POOL_BLOCK_QWORDS_MASK SPN_BITS_TO_MASK(SPN_BLOCK_POOL_BLOCK_QWORDS_LOG2)
#define SPN_BLOCK_POOL_SUBBLOCK_QWORDS_LOG2 (SPN_DEVICE_BLOCK_POOL_SUBBLOCK_DWORDS_LOG2-1)
#define SPN_BLOCK_POOL_SUBBLOCK_QWORDS (1<<SPN_BLOCK_POOL_SUBBLOCK_QWORDS_LOG2)
#define SPN_BLOCK_POOL_SUBBLOCK_OWORDS_LOG2 (SPN_DEVICE_BLOCK_POOL_SUBBLOCK_DWORDS_LOG2-2)
#define SPN_BLOCK_POOL_SUBBLOCK_OWORDS (1<<SPN_BLOCK_POOL_SUBBLOCK_OWORDS_LOG2)
#define SPN_BLOCK_POOL_ATOMICS_READS 0
#define SPN_BLOCK_POOL_ATOMICS_WRITES 1
//
//
//
#define SPN_BLOCK_POOL_SUBBLOCKS_PER_SUBGROUP(subgroup_size_) \
(subgroup_size_ / SPN_BLOCK_POOL_SUBBLOCK_DWORDS)
#define SPN_BLOCK_POOL_SUBGROUPS_PER_BLOCK_LOG2(subgroup_size_log2_) \
(SPN_DEVICE_BLOCK_POOL_BLOCK_DWORDS_LOG2 - subgroup_size_log2_)
#define SPN_BLOCK_POOL_SUBGROUPS_PER_BLOCK(subgroup_size_log2_) \
(1 << SPN_BLOCK_POOL_SUBGROUPS_PER_BLOCK_LOG2(subgroup_size_log2_))
#define SPN_BLOCK_POOL_SUBGROUPS_PER_BLOCK_MASK(subgroup_size_log2_) \
SPN_BITS_TO_MASK(SPN_BLOCK_POOL_SUBGROUPS_PER_BLOCK_LOG2(subgroup_size_log2_))
//
//
//
#define SPN_BLOCK_ID_IS_BLOCK(bid_) (((bid_) & SPN_BLOCK_POOL_SUBBLOCKS_PER_BLOCK_MASK) == 0)
//
// PATH HEAD
//
// struct spinel_path_header
// {
// uint32_t handle; // host handle
// uint32_t blocks; // total number of blocks in entire path object -- includes nodes and segments
// uint32_t nodes; // number of trailing path node blocks -- not including head
//
// struct {
// uint32_t lines; // count of segments
// uint32_t quads; // count of segments
// uint32_t cubics; // count of segments
// uint32_t rat_quads; // count of segments
// uint32_t rat_cubics; // count of segments
// } prims;
//
// struct {
// float x0;
// float y0;
// float x1;
// float y1;
// } bounds; // float4: bounds
// };
//
#define SPN_PATH_HEAD_DWORDS 12
#define SPN_PATH_HEAD_QWORDS (SPN_PATH_HEAD_DWORDS / 2)
#define SPN_PATH_HEAD_DWORDS_POW2_RU 16
#define SPN_PATH_HEAD_OFFSET_HANDLE 0
#define SPN_PATH_HEAD_OFFSET_BLOCKS 1
#define SPN_PATH_HEAD_OFFSET_NODES 2
#define SPN_PATH_HEAD_OFFSET_PRIMS 3
#define SPN_PATH_HEAD_OFFSET_LINES (SPN_PATH_HEAD_OFFSET_PRIMS + SPN_BLOCK_ID_TAG_PATH_LINE)
#define SPN_PATH_HEAD_OFFSET_QUADS (SPN_PATH_HEAD_OFFSET_PRIMS + SPN_BLOCK_ID_TAG_PATH_QUAD)
#define SPN_PATH_HEAD_OFFSET_CUBICS (SPN_PATH_HEAD_OFFSET_PRIMS + SPN_BLOCK_ID_TAG_PATH_CUBIC)
#define SPN_PATH_HEAD_OFFSET_RAT_QUADS (SPN_PATH_HEAD_OFFSET_PRIMS + SPN_BLOCK_ID_TAG_PATH_RAT_QUAD)
#define SPN_PATH_HEAD_OFFSET_RAT_CUBICS (SPN_PATH_HEAD_OFFSET_PRIMS + SPN_BLOCK_ID_TAG_PATH_RAT_CUBIC)
#define SPN_PATH_HEAD_OFFSET_BOUNDS 8
//
// PATH HEAD COMPILE-TIME PREDICATES
//
#define SPN_PATH_HEAD_ELEM_GTE(sgsz_,x_,i_) \
((x_) >= (i_) * sgsz_)
#define SPN_PATH_HEAD_ELEM_IN_RANGE(sgsz_,x_,i_) \
(SPN_PATH_HEAD_ELEM_GTE(sgsz_,x_,i_) && !SPN_PATH_HEAD_ELEM_GTE(sgsz_,x_,(i_)+1))
#define SPN_PATH_HEAD_ENTIRELY_HEADER(sgsz_,i_) \
SPN_PATH_HEAD_ELEM_GTE(sgsz_,SPN_PATH_HEAD_DWORDS,(i_)+1)
#define SPN_PATH_HEAD_PARTIALLY_HEADER(sgsz_,i_) \
SPN_PATH_HEAD_ELEM_IN_RANGE(sgsz_,SPN_PATH_HEAD_DWORDS,i_)
#define SPN_PATH_HEAD_IS_HEADER(sgsz_,i_) \
(gl_SubgroupInvocationID + i_ * sgsz_ < SPN_PATH_HEAD_DWORDS)
//
// RASTERIZATION TYPES
//
// Note that the projective rasterization types precede the integral and
// rational path primitives in order to exploit a coalesced uvec4[2] path header
// load.
//
#define SPN_RAST_TYPE_PROJ_LINE 0 // Lines and integral beziers with
#define SPN_RAST_TYPE_PROJ_QUAD 1 // projective transforms applied
#define SPN_RAST_TYPE_PROJ_CUBIC 2 // are rationals without weights.
#define SPN_RAST_TYPE_LINE 3
#define SPN_RAST_TYPE_QUAD 4
#define SPN_RAST_TYPE_CUBIC 5
#define SPN_RAST_TYPE_RAT_QUAD 6
#define SPN_RAST_TYPE_RAT_CUBIC 7
#define SPN_RAST_TYPE_COUNT 8
//
// FILL COMMANDS
//
//
// A fill command is expanded into one or more rasterize commands.
//
// The rasterize command points to a specific dword of a block.
//
// For GLSL we will use a uvec4 laid out as follows:
//
// union {
//
// uvec4 u32v4;
//
// struct spinel_cmd_fill {
// uint32_t path_h; // host id
// uint32_t na : 16; // unused
// uint32_t cohort : 15; // cohort is 8-11 bits
// uint32_t transform_type : 1; // transform type: 0=affine,1=projective
// uint32_t transform; // transform index
// uint32_t clip; // clip index
// } fill;
//
// struct spinel_cmd_rast {
// uint32_t node_id; // device block id
// uint32_t node_dword : 16; // block dword offset
// uint32_t cohort : 15; // cohort is 8-11 bits
// uint32_t transform_type : 1; // transform type: 0=affine,1=projective
// uint32_t transform // transform index
// uint32_t clip; // clip index
// } rast;
//
// };
//
// NOTE(allanmac): We can pack the transform and clip indices down to a
// more practical 16 bits in case we want to add additional
// rasterization command indices or flags.
//
#define SPN_CMD_FILL_TRANSFORM_TYPE_AFFINE 0
#define SPN_CMD_FILL_TRANSFORM_TYPE_PROJECTIVE 1
#define SPN_CMD_FILL_GET_PATH_H(c_) c_[0]
#define SPN_CMD_FILL_GET_COHORT(c_) SPN_BITFIELD_EXTRACT(c_[1],16,15)
#define SPN_CMD_FILL_GET_TRANSFORM_TYPE(c_) SPN_BITFIELD_EXTRACT(c_[1],31,1)
#define SPN_CMD_FILL_GET_TRANSFORM(c_) c_[2]
#define SPN_CMD_FILL_GET_CLIP(c_) c_[3]
#define SPN_CMD_FILL_IS_TRANSFORM_TYPE_AFFINE(c_) ((c_[1] & SPN_BITS_TO_MASK_AT(31,1)) == 0)
#define SPN_CMD_FILL_IS_TRANSFORM_TYPE_PROJECTIVE(c_) ((c_[1] & SPN_BITS_TO_MASK_AT(31,1)) != 0)
//
//
//
#define SPN_CMD_RASTERIZE_GET_COHORT(c_) SPN_CMD_FILL_GET_COHORT(c_)
#define SPN_CMD_RASTERIZE_GET_TRANSFORM_TYPE(c_) SPN_CMD_FILL_GET_TRANSFORM_TYPE(c_)
#define SPN_CMD_RASTERIZE_GET_TRANSFORM(c_) SPN_CMD_FILL_GET_TRANSFORM(c_)
#define SPN_CMD_RASTERIZE_GET_CLIP(c_) SPN_CMD_FILL_GET_CLIP(c_)
#define SPN_CMD_RASTERIZE_IS_TRANSFORM_TYPE_AFFINE(c_) SPN_CMD_FILL_IS_TRANSFORM_TYPE_AFFINE(c_)
#define SPN_CMD_RASTERIZE_IS_TRANSFORM_TYPE_PROJECTIVE(c_) SPN_CMD_FILL_IS_TRANSFORM_TYPE_PROJECTIVE(c_)
#define SPN_CMD_RASTERIZE_GET_NODE_ID(c_) c_[0]
#define SPN_CMD_RASTERIZE_GET_NODE_DWORD(c_) SPN_BITFIELD_EXTRACT(c_[1],0,16)
#define SPN_CMD_RASTERIZE_SET_NODE_ID(c_,n_id_) c_[0] = n_id_
#define SPN_CMD_RASTERIZE_SET_NODE_DWORD(c_,n_lo_) c_[1] = SPN_BITFIELD_INSERT(c_[1],n_lo_,0,16)
//
// Spinel supports a projective transformation matrix with the
// requirement that w2 is implicitly 1.0.
//
// A---------B----+
// | sx shx | tx |
// | shy sy | ty |
// C---------D----+
// | w0 w1 | 1 |
// +---------+----+
//
// The transformation matrix can be initialized with the array:
//
// { sx shx shy sy tx ty w0 w1 }
//
// struct spinel_transform
// {
// SPN_TYPE_MAT2X2 a; // { { sx shx } {shy sy } } -- rotate
// SPN_TYPE_VEC2 b; // { tx ty } -- translate
// SPN_TYPE_VEC2 c; // { w0 w1 } -- project
// };
//
// struct spinel_transform_lo
// {
// SPN_TYPE_MAT2X2 a; // { { sx shx } {shy sy } } -- rotate
// };
//
// struct spinel_transform_hi
// {
// SPN_TYPE_VEC2 b; // { tx ty } -- translate
// SPN_TYPE_VEC2 c; // { w0 w1 } -- project
// };
//
//
// Note that the raster builder is storing the transform as two
// float[4] quads.
//
// The rasterization shaders then load these vec4 quads as mat2
// matrices.
//
#define SPN_TRANSFORM_LO_INDEX_SX 0
#define SPN_TRANSFORM_LO_INDEX_SHX 1
#define SPN_TRANSFORM_LO_INDEX_SHY 2
#define SPN_TRANSFORM_LO_INDEX_SY 3
#define SPN_TRANSFORM_HI_INDEX_TX 0
#define SPN_TRANSFORM_HI_INDEX_TY 1
#define SPN_TRANSFORM_HI_INDEX_W0 2
#define SPN_TRANSFORM_HI_INDEX_W1 3
//
// PATHS COPY COMMANDS
//
// The PATH COPY command is simply a 32-bit tagged block id with a
// host-controlled rolling counter stuffed into the id field.
//
#define SPN_PATHS_COPY_CMD_TYPE_SEGS 0
#define SPN_PATHS_COPY_CMD_TYPE_NODE 1
#define SPN_PATHS_COPY_CMD_TYPE_HEAD 2
#define SPN_PATHS_COPY_CMD_GET_TYPE(cmd) SPN_TAGGED_BLOCK_ID_GET_TAG(cmd)
//
// RASTER HEAD
//
// The raster header and nodes use a strided layout so that the block is
// split in two with the low dword of the 64-bit keys stored in hte
// first half of the block and the high dword in the second half.
//
// Note: a simple 32-bit .pkidx implies a 16 GB limit to the block pool.
//
// Note: we could interpret the 32-bit .pkidx as the low bits indexing
// the dwords in the low half of the block and the high bits indexing
// qwords. This will index a 32 GB block pool.
//
// raster head block
// {
// struct spinel_raster_header.lo
// {
// uint32_t nodes; // # of nodes -- not including header
// uint32_t ttsks; // # of ttsks
// uint32_t ttpks; // # of ttpks
// uint32_t pkidx; // block pool dword of first ttpk.lo
// uint32_t blocks; // # of blocks -- head+node+skb+pkb
//
// ... TTXK.lo ...
// };
//
// struct spinel_raster_header.hi
// {
// int32_t x0; // axis-aligned bounding box
// int32_t x1; // axis-aligned bounding box
// int32_t y0; // axis-aligned bounding box
// int32_t y1; // axis-aligned bounding box
// uint32_t na0; // reserved
//
// ... TTXK.hi ...
// };
// }
//
// Usage:
//
// - RASTERS_RECLAIM: this shader only needs to load the low dwords of
// each block because only the block and node counts and the TTXB id
// of each key are required.
//
// - RASTERS_PREFIX: this shader needs to vector load the values
// calculated by RASTERS_ALLOC and write them back to the block.
//
// - PLACE_TT*K: these shaders need to efficiently load the raster
// header.
//
#define SPN_RASTER_NODE_QWORDS SPN_BLOCK_POOL_BLOCK_QWORDS
#define SPN_RASTER_HEAD_DWORDS 10
#define SPN_RASTER_HEAD_QWORDS (SPN_RASTER_HEAD_DWORDS / 2)
#define SPN_RASTER_HEAD_LO_OFFSET_NODES 0
#define SPN_RASTER_HEAD_LO_OFFSET_TTSKS 1
#define SPN_RASTER_HEAD_LO_OFFSET_TTPKS 2
#define SPN_RASTER_HEAD_LO_OFFSET_PKIDX 3
#define SPN_RASTER_HEAD_LO_OFFSET_BLOCKS 4
#define SPN_RASTER_HEAD_HI_OFFSET_X0 0
#define SPN_RASTER_HEAD_HI_OFFSET_X1 1
#define SPN_RASTER_HEAD_HI_OFFSET_Y0 2
#define SPN_RASTER_HEAD_HI_OFFSET_Y1 3
#define SPN_RASTER_HEAD_HI_OFFSET_NA0 4
//
// RASTER HEAD COMPILE-TIME PREDICATES
//
#define SPN_RASTER_HEAD_ELEM_GTE(sgsz_,x_,i_) \
((x_) >= (i_) * sgsz_)
#define SPN_RASTER_HEAD_ELEM_IN_RANGE(sgsz_,x_,i_) \
(SPN_RASTER_HEAD_ELEM_GTE(sgsz_,x_,i_) && \
!SPN_RASTER_HEAD_ELEM_GTE(sgsz_,x_,(i_)+1))
#define SPN_RASTER_HEAD_ENTIRELY_HEADER(sgsz_,i_) \
SPN_RASTER_HEAD_ELEM_GTE(sgsz_,SPN_RASTER_HEAD_QWORDS,(i_)+1)
#define SPN_RASTER_HEAD_PARTIALLY_HEADER(sgsz_,i_) \
SPN_RASTER_HEAD_ELEM_IN_RANGE(sgsz_,SPN_RASTER_HEAD_QWORDS,i_)
#define SPN_RASTER_HEAD_IS_HEADER(sgsz_,i_) \
(gl_SubgroupInvocationID + i_ * sgsz_ < SPN_RASTER_HEAD_QWORDS)
//
// Hard requirements:
//
// - A TTXB "block pool" extent that is at least 1GB.
//
// - A virtual surface of at least 8K x 8K
//
// - A very large physical surface because it's advantageous to tile the
// physical surface since it's likely to shrink the post-place TTCK sorting
// step.
//
// TTXB BITS
// EXTENT +------------------------------------+
// SIZE (MB) | 22 23 24 25 26 27 |
// +----+------------------------------------+
// TTXB | 8 | 128 256 512 1024 2048 4096 |
// DWORDS | 16 | 256 512 1024 2048 4096 8192 |
// +----+------------------------------------+
//
//
// X/Y BITS
// SURFACE DIM +------------------------------------------------------+
// | 5 6 7 8* 9* 10 11 12 13 |
// +----+------------------------------------------------------+
// TILE | 2 | 128 256 512 1024 2048 4096 8192 16384 32768 |
// AXIS | 3 | 256 512 1024 2048 4096 8192 16384 32768 65536 |
// LOG2 | 4 | 512 1024 2048 4096 8192 16384 32768 65536 128K |
// +----+------------------------------------------------------+
// TILES^2 | 1024 4096 16384 65536 256K 1M 4M 16M 64M |
// +------------------------------------------------------+
//
// The following values should be pretty future-proof across all GPUs:
//
// - The minimum addressable subblock size is 16 dwords (64 bytes)
// to ensure there is enough space for a path or raster header and
// its payload.
//
// - Blocks are power-of-2 multiples of subblocks. Larger blocks can
// reduce allocation activity (fewer atomic adds).
//
// - 27 bits of TTXB_ID space implies a max of 4GB-32GB of
// rasterized paths depending on the size of the TTXB block.
// This could enable interesting use cases.
//
// - A virtual rasterization surface that's from +/-16K to +/-128K
// depending on the size of the TTXB block.
//
// - Keys that (optionally) only require a 32-bit high word
// comparison.
//
// - Support for a minimum of 256K layers. This can be practically
// raised to 1m or 2m layers.
//
//
// The size of the cohort determines the max number of rasters that can
// be submitted to the GPU in a single dispatch. We want this number to
// be as large as possible. A dispatch of 2048 subgroups is very large
// but there is potential to push this to 8192 with modifications to the
// segmenter and possibly an auxiliary extent.
//
// The max cohort id is reserved as it indicates an invalid TTRK.
//
// Each cohort member launches one subgroup per block of common path
// geometry.
//
// The rasterizer produces TTRK keys:
//
// TTRK (64-BIT COMPARE)
//
// 0 63
// | TTSB_ID | NEW_Y | NEW_X | X_LO | X_HI | Y | RASTER COHORT ID |
// +---------+--------+--------+------+------+------+------------------+
// | 27 | 1 (=0) | 1 (=0) | 3 | 9 | 12 | 11 |
//
// After segmentation the cohort id can be ignored as we've gathered
// enough statistics on the cohort to execute the prefix kernel.
//
#define SPN_TTRK_LO_BITS_TTSB_ID SPN_TAGGED_BLOCK_ID_BITS_ID
#define SPN_TTRK_LO_HI_BITS_X 12
#define SPN_TTRK_LO_BITS_X 3
#define SPN_TTRK_HI_BITS_X 9
#define SPN_TTRK_HI_BITS_Y 12
#define SPN_TTRK_HI_BITS_COHORT 11
#define SPN_TTRK_BITS_XY (SPN_TTRK_LO_HI_BITS_X + SPN_TTRK_HI_BITS_Y)
#define SPN_TTRK_LO_BITS_XY SPN_TTRK_LO_BITS_X
#define SPN_TTRK_HI_BITS_XY (SPN_TTRK_HI_BITS_X + SPN_TTRK_HI_BITS_Y)
#define SPN_TTRK_BITS_XY_COHORT (SPN_TTRK_BITS_XY + SPN_TTRK_HI_BITS_COHORT)
#define SPN_TTRK_LO_OFFSET_NEW_Y SPN_TTRK_LO_BITS_TTSB_ID
#define SPN_TTRK_LO_OFFSET_NEW_X (SPN_TTRK_LO_OFFSET_NEW_Y + 1)
#define SPN_TTRK_LO_OFFSET_X (SPN_TTRK_LO_OFFSET_NEW_X + 1)
#define SPN_TTRK_HI_OFFSET_Y SPN_TTRK_HI_BITS_X
#define SPN_TTRK_HI_OFFSET_COHORT (32 - SPN_TTRK_HI_BITS_COHORT)
#define SPN_TTRK_LO_MASK_NEW_Y SPN_BITS_TO_MASK_AT(SPN_TTRK_LO_OFFSET_NEW_Y,1)
#define SPN_TTRK_LO_MASK_NEW_X SPN_BITS_TO_MASK_AT(SPN_TTRK_LO_OFFSET_NEW_X,1)
#define SPN_TTRK_LO_MASK_X SPN_BITS_TO_MASK_AT(SPN_TTRK_LO_OFFSET_X,SPN_TTRK_LO_BITS_X)
#define SPN_TTRK_HI_MASK_X SPN_BITS_TO_MASK(SPN_TTRK_HI_BITS_X)
#define SPN_TTRK_HI_MASK_Y SPN_BITS_TO_MASK_AT(SPN_TTRK_HI_OFFSET_Y,SPN_TTRK_HI_BITS_Y)
#define SPN_TTRK_HI_MASK_Y_COHORT SPN_BITS_TO_MASK_AT(SPN_TTRK_HI_OFFSET_Y,SPN_TTRK_HI_BITS_Y + SPN_TTRK_HI_BITS_COHORT)
#define SPN_TTRK_HI_MASK_COHORT SPN_BITS_TO_MASK_AT(SPN_TTRK_HI_OFFSET_COHORT,SPN_TTRK_HI_BITS_COHORT)
#define SPN_TTRK_LO_GET_TTSB_ID(t_lo_) SPN_BITFIELD_EXTRACT(t_lo_,0,SPN_TTRK_LO_BITS_TTSB_ID)
#define SPN_TTRK_HI_GET_COHORT(t_hi_) SPN_BITFIELD_EXTRACT(t_hi_,SPN_TTRK_HI_OFFSET_COHORT,SPN_TTRK_HI_BITS_COHORT)
#define SPN_TTRK_GET_COHORT(t_) SPN_TTRK_HI_GET_COHORT(t_.y)
#define SPN_TTRK_SET_XY(t_,xy_) SPN_GLSL_INSERT_UVEC2_UINT(t_,xy_,SPN_TTRK_LO_OFFSET_X,SPN_TTRK_BITS_XY)
#define SPN_TTRK_SET_COHORT(t_,c_) (t_)[1] = SPN_BITFIELD_INSERT((t_)[1],c_,SPN_TTRK_HI_OFFSET_COHORT,SPN_TTRK_HI_BITS_COHORT)
#define SPN_TTRK_IS_NEW_X(t_) (SPN_BITFIELD_EXTRACT((t_)[0],SPN_TTRK_LO_OFFSET_NEW_X,1) != 0)
#define SPN_TTRK_IS_NEW_Y(t_) (SPN_BITFIELD_EXTRACT((t_)[0],SPN_TTRK_LO_OFFSET_NEW_Y,1) != 0)
#define SPN_TTRK_IS_INVALID_COHORT(t_) (((t_) & SPN_TTRK_HI_MASK_COHORT) == SPN_TTRK_HI_MASK_COHORT)
//
// TTSK v1 ( DEFAULT )
//
// 0 63
// | TTSB_ID | SPAN | X | Y |
// +---------+---------+----+----+
// | 27 | 13 [<0] | 12 | 12 |
//
//
// TTPK v2 ( DEFAULT )
//
// 0 63
// | TTPB_ID | SPAN | X | Y |
// +---------+---------------+----+----+
// | 27 | 13 [+1,+4095] | 12 | 12 |
//
//
// A TTSK.SPAN inherits the TTRK[0] dword unmodified (in flux).
//
// A TTPK.SPAN has a range of [+1,+4095].
//
// A TTXK.SPAN of 0 indicates either:
//
// - an invalid key
// - a TTXK key pointing to all TTS_INVALID values
// - a TTPK key pointing to all zero values
//
// In all cases, this key can be skipped during rendering.
//
// TTXK.Y and TTXK.X are signed but stored as biased unsigned.
//
// An invalid TTXK has a span of zero and a TTXB_ID of all 1's.
//
#define SPN_TTSK_IS_NEW_X(t_) SPN_TTRK_IS_NEW_X(t_)
#define SPN_TTSK_IS_NEW_Y(t_) SPN_TTRK_IS_NEW_Y(t_)
#define SPN_TTXK_LO_BITS_TTXB_ID SPN_TTRK_LO_BITS_TTSB_ID
#define SPN_TTXK_LO_HI_BITS_SPAN 13
#define SPN_TTXK_LO_BITS_SPAN 5 // straddles a
#define SPN_TTXK_HI_BITS_SPAN 8 // word boundary
#define SPN_TTXK_HI_BITS_X SPN_TTRK_LO_HI_BITS_X
#define SPN_TTXK_HI_BITS_Y SPN_TTRK_HI_BITS_Y
#define SPN_TTXK_HI_BITS_XY (SPN_TTXK_HI_BITS_Y + SPN_TTXK_HI_BITS_X)
#define SPN_TTXK_LO_OFFSET_SPAN SPN_TTXK_LO_BITS_TTXB_ID
#define SPN_TTXK_HI_OFFSET_X (32 - SPN_TTXK_HI_BITS_XY) // 8
#define SPN_TTXK_HI_OFFSET_Y (32 - SPN_TTXK_HI_BITS_Y) // 20
#define SPN_TTXK_HI_OFFSET_XY (32 - SPN_TTXK_HI_BITS_XY) // 8
#define SPN_TTXK_LO_MASK_TTXB_ID SPN_BITS_TO_MASK(SPN_TTXK_LO_BITS_TTXB_ID)
#define SPN_TTXK_LO_MASK_SPAN SPN_BITS_TO_MASK_AT(SPN_TTXK_LO_OFFSET_SPAN,SPN_TTXK_LO_BITS_SPAN)
#define SPN_TTXK_HI_MASK_SPAN SPN_BITS_TO_MASK(SPN_TTXK_HI_BITS_SPAN)
#define SPN_TTXK_HI_MASK_X SPN_BITS_TO_MASK_AT(SPN_TTXK_HI_OFFSET_X,SPN_TTXK_HI_BITS_X)
#define SPN_TTXK_HI_MASK_Y SPN_BITS_TO_MASK_AT(SPN_TTXK_HI_OFFSET_Y,SPN_TTXK_HI_BITS_Y)
#define SPN_TTXK_HI_MASK_XY SPN_BITS_TO_MASK_AT(SPN_TTXK_HI_OFFSET_X,SPN_TTXK_HI_BITS_XY)
#define SPN_TTXK_HI_ONE_X (1u << SPN_TTXK_HI_OFFSET_X)
#define SPN_TTXK_LO_GET_TTXB_ID(t_lo_) SPN_BITFIELD_EXTRACT(t_lo_,0,SPN_TTXK_LO_BITS_TTXB_ID)
#define SPN_TTXK_HI_GET_XY(t_hi_) SPN_BITFIELD_EXTRACT(t_hi_,SPN_TTXK_HI_OFFSET_XY,SPN_TTXK_HI_BITS_XY)
#define SPN_TTXK_GET_MASKED_XY(t_) ((t_)[1] & SPN_TTXK_HI_MASK_XY)
#define SPN_TTXK_GET_TTXB_ID(t_) SPN_TTXK_LO_GET_TTXB_ID((t_)[0])
#define SPN_TTXK_GET_SPAN(t_) SPN_GLSL_EXTRACT_UVEC2_INT((t_),SPN_TTXK_LO_OFFSET_SPAN,SPN_TTXK_LO_HI_BITS_SPAN)
#define SPN_TTXK_GET_X(t_) SPN_BITFIELD_EXTRACT((t_)[1],SPN_TTXK_HI_OFFSET_X,SPN_TTXK_HI_BITS_X)
#define SPN_TTXK_GET_Y(t_) SPN_BITFIELD_EXTRACT((t_)[1],SPN_TTXK_HI_OFFSET_Y,SPN_TTXK_HI_BITS_Y)
#define SPN_TTXK_GET_XY(t_) SPN_TTXK_HI_GET_XY((t_)[1])
#define SPN_TTXK_SET_TTXB_ID(t_,i_) t_[0] = SPN_BITFIELD_INSERT(t_[0],(i_),0,SPN_TTXK_LO_BITS_TTXB_ID)
#define SPN_TTXK_SET_SPAN(t_,s_) SPN_GLSL_INSERT_UVEC2_UINT((t_),(s_),SPN_TTXK_LO_OFFSET_SPAN,SPN_TTXK_LO_HI_BITS_SPAN)
#define SPN_TTXK_SET_XY(t_,i_) t_[1] = SPN_BITFIELD_INSERT(t_[1],(i_),SPN_TTXK_HI_OFFSET_XY,SPN_TTXK_HI_BITS_XY)
#define SPN_TTXK_INVALID uvec2(SPN_TTXK_LO_MASK_TTXB_ID,0)
//
// XY
//
// 0 31
// | X | Y |
// +----+----+
// | 12 | 22 |
//
// A few shaders probe the XY value.
//
// The max value of X is 4095.
//
#define SPN_XY_GET_Y(xy_) SPN_BITFIELD_EXTRACT(xy_,SPN_TTXK_HI_BITS_X,32-SPN_TTXK_HI_BITS_X)
#define SPN_XY_X_MASK SPN_BITS_TO_MASK(SPN_TTXK_HI_BITS_X)
//
// TTCK (64-BIT COMPARE) -- DEFAULT
//
// TODO(fxbug.dev/112827): The TTSB encoding should encode TTP values between
// -1073741824 to 1073741823 (signed 31 bit integer). LAYER bits rise to 19.
//
// 0 63
// | PAYLOAD/TTSB/TTPB_ID | PREFIX | LAYER | X | Y |
// +----------------------+--------+-------+-----+-----+
// | 27 | 1 | 18 | 9 | 9 |
//
// 0 31 63
// | PAYLOAD/TTSB/TTPB_ID | PREFIX | LAYER_LO | LAYER_HI | X | Y |
// +----------------------+--------+----------+----------+-----+-----+
// | 27 | 1 | 4 | 14 | 9 | 9 |
//
//
// TTCK.X and TTCK.Y are unsigned
//
// +-----------+-------------+
// | TILE SIZE | MAX SURFACE |
// +-----------+-------------+
// | 16x16 | 8K x 8K | NVIDIA, AMD
// | 8x8 | 4K x 4K | INTEL GEN+, Mali G52+
// | 4x4 | 2K x 2K | Mali G31, SwiftShader
// +-----------+-------------+
//
#define SPN_TTCK_LO_BITS_TTXB_ID SPN_TAGGED_BLOCK_ID_BITS_ID
#define SPN_TTCK_LO_BITS_PREFIX 1
#define SPN_TTCK_LO_HI_BITS_LAYER 18
#define SPN_TTCK_LO_BITS_LAYER 4
#define SPN_TTCK_HI_BITS_LAYER 14
#define SPN_TTCK_HI_BITS_X 9
#define SPN_TTCK_HI_BITS_Y 9
#define SPN_TTCK_HI_BITS_XY (SPN_TTCK_HI_BITS_X + SPN_TTCK_HI_BITS_Y)
#define SPN_TTCK_HI_BITS_LXY (SPN_TTCK_LO_HI_BITS_LAYER + SPN_TTCK_HI_BITS_X + SPN_TTCK_HI_BITS_Y)
#define SPN_TTCK_LO_OFFSET_PREFIX SPN_TTCK_LO_BITS_TTXB_ID
#define SPN_TTCK_LO_OFFSET_LAYER (SPN_TTCK_LO_OFFSET_PREFIX + SPN_TTCK_LO_BITS_PREFIX)
#define SPN_TTCK_HI_OFFSET_X (32 - SPN_TTCK_HI_BITS_XY)
#define SPN_TTCK_HI_OFFSET_Y (32 - SPN_TTCK_HI_BITS_Y)
#define SPN_TTCK_HI_OFFSET_XY (32 - SPN_TTCK_HI_BITS_XY)
#define SPN_TTCK_LO_MASK_TTXB_ID SPN_BITS_TO_MASK(SPN_TTCK_LO_BITS_TTXB_ID)
#define SPN_TTCK_LO_MASK_PREFIX SPN_BITS_TO_MASK_AT(SPN_TTCK_LO_OFFSET_PREFIX,SPN_TTCK_LO_BITS_PREFIX)
#define SPN_TTCK_LO_MASK_LAYER SPN_BITS_TO_MASK_AT(SPN_TTCK_LO_OFFSET_LAYER,SPN_TTCK_LO_BITS_LAYER)
#define SPN_TTCK_HI_MASK_LAYER SPN_BITS_TO_MASK(SPN_TTCK_HI_BITS_LAYER)
#define SPN_TTCK_HI_MASK_XY SPN_BITS_TO_MASK_AT(SPN_TTCK_HI_OFFSET_XY,SPN_TTCK_HI_BITS_XY)
#define SPN_TTCK_GET_TTXB_ID(t_) ( t_[0] & SPN_TTCK_LO_MASK_TTXB_ID)
#define SPN_TTCK_LO_GET_TTXB_ID(t_lo_) ( t_lo_ & SPN_TTCK_LO_MASK_TTXB_ID)
#define SPN_TTCK_IS_PREFIX(t_) ((t_[0] & SPN_TTCK_LO_MASK_PREFIX) != 0)
#define SPN_TTCK_LO_IS_PREFIX(t_lo_) ((t_lo_ & SPN_TTCK_LO_MASK_PREFIX) != 0)
#define SPN_TTCK_GET_LAYER(t_) SPN_GLSL_EXTRACT_UVEC2_UINT(t_,SPN_TTCK_LO_OFFSET_LAYER,SPN_TTCK_LO_HI_BITS_LAYER)
#define SPN_TTCK_SET_LAYER(t_,l_) SPN_GLSL_INSERT_UVEC2_UINT(t_,l_,SPN_TTCK_LO_OFFSET_LAYER,SPN_TTCK_LO_HI_BITS_LAYER)
#define SPN_TTCK_GET_Y(t_) SPN_BITFIELD_EXTRACT(t_[1],SPN_TTCK_HI_OFFSET_Y,SPN_TTCK_HI_BITS_Y)
#define SPN_TTCK_GET_X(t_) SPN_BITFIELD_EXTRACT(t_[1],SPN_TTCK_HI_OFFSET_X,SPN_TTCK_HI_BITS_X)
#define SPN_TTCK_ADD_X(t_,d_) (t_[1] += ((d_) << SPN_TTCK_HI_OFFSET_X))
#define SPN_TTCK_LAYER_MAX SPN_BITS_TO_MASK(SPN_TTCK_LO_HI_BITS_LAYER)
//
// 16x16 TILE TRACE SUBPIXEL
//
// 0 31
// | TX | DX | TY | DY |
// +----+----+----+----+
// | 9 | 7 | 9 | 7 |
//
// 8x8 TILE TRACE SUBPIXEL (BIAS X)
//
// 0 31
// | TX | DX | TY | DY |
// +----+----+----+----+
// | 9 | 8 | 8 | 7 |
//
// 8x8 TILE TRACE SUBPIXEL (BIAS Y)
//
// 0 31
// | TX | DX | TY | DY |
// +----+----+----+----+
// | 8 | 7 | 9 | 8 |
//
// 4x4 TILE TRACE SUBPIXEL
//
// 0 31
// | TX | DX | TY | DY |
// +----+----+----+----+
// | 8 | 8 | 8 | 8 |
//
// A 32-bit encoding of a subpixel-resolution line segment in a tile up to 16x16
// (WxH) pixels.
//
// Subpixel resolution is 5 bits.
//
// We're using this representation across all target
// architectures.
//
// A 16x16 tile X is encoded as:
//
// TX : 9 : unsigned min(x0,x1) tile subpixel coordinate with a range of
// [0,511].
//
// DX : 7 : signed subpixel delta x1-x0. The range of the delta is [-32,32]
// including 0. Note that with 7 signed bits the range of the
// bitfield is [-64,63]. An "invalid" TTS relies on DX being
// infeasible value.
//
// A 16x16 tile Y is encoded as:
//
// TY : 9 : unsigned min(y0,y1) tile subpixel coordinate with a range of
// [0,511].
//
// DY : 7 : signed subpixel delta (y1-y0). The range of delta is [-32,-1] or
// [+1,+32] because horizontal lines are not encoded.
//
// NOTE(allanmac): There are assumptions in the shaders that the X and Y
// subpixel resolutions are the same. Despite this, let's keep the X and Y
// definitions separated.
//
// NOTE(allanmac): The subpixel resolution *could* be increased on devices with
// tiles smaller than 16x16. Just beware of the floating point multiplication
// using the SPN_TTS_FILL_MAX_AREA_RCP_F32 reciprocal -- other approaches can be
// considered.
//
#if defined(SPN_DEVICE_TILE_WIDTH_LOG2) && defined(SPN_DEVICE_TILE_HEIGHT_LOG2)
//
// As tile sizes shrink, bits become available.
//
// On the 8x8 tile, arbitrarily default to providing more "Y" resolution because
// the virtual workspace is reflected.
//
// Define SPN_DEVICE_TILE_HEIGHT_BIAS or SPN_DEVICE_TILE_WIDTH_BIAS to
// explicitly steer where the extra bits are assigned.
//
#if (SPN_DEVICE_TILE_WIDTH_LOG2 == 4) && (SPN_DEVICE_TILE_HEIGHT_LOG2 == 4)
#define SPN_TTS_BITS_TX 9 // [ 0, 511]
#define SPN_TTS_BITS_DX 7 // [ -64, 63] -> [-32,32]
#define SPN_TTS_BITS_TY 9 // [ 0, 511]
#define SPN_TTS_BITS_DY 7 // [ -64, 63] -> [-32,32]
#define SPN_TTS_SUBPIXEL_X_LOG2 5
#define SPN_TTS_SUBPIXEL_Y_LOG2 5
#elif (SPN_DEVICE_TILE_WIDTH_LOG2 == 3) && (SPN_DEVICE_TILE_HEIGHT_LOG2 == 3) && !defined(SPN_DEVICE_TILE_WIDTH_BIAS) \
&& !defined(SPN_DEVICE_TILE_HEIGHT_BIAS)
#define SPN_TTS_BITS_TX 9 // [ 0, 511]
#define SPN_TTS_BITS_DX 7 // [ -64, 63] -> [-32,32]
#define SPN_TTS_BITS_TY 9 // [ 0, 511]
#define SPN_TTS_BITS_DY 7 // [ -64, 63] -> [-32,32]
#define SPN_TTS_SUBPIXEL_X_LOG2 5
#define SPN_TTS_SUBPIXEL_Y_LOG2 5
#elif (SPN_DEVICE_TILE_WIDTH_LOG2 == 3) && (SPN_DEVICE_TILE_HEIGHT_LOG2 == 3) && defined(SPN_DEVICE_TILE_WIDTH_BIAS) // (BIAS X)
#error "No support for SPN_DEVICE_TILE_WIDTH_BIAS or SPN_DEVICE_TILE_HEIGHT_BIAS until the octant logic is updated."
#define SPN_TTS_BITS_TX 9 // [ 0, 511]
#define SPN_TTS_BITS_DX 8 // [-128, 127] -> [-64,64]
#define SPN_TTS_BITS_TY 8 // [ 0, 255]
#define SPN_TTS_BITS_DY 7 // [ -64, 63] -> [-32,32]
#define SPN_TTS_SUBPIXEL_X_LOG2 6
#define SPN_TTS_SUBPIXEL_Y_LOG2 5
#elif (SPN_DEVICE_TILE_WIDTH_LOG2 == 3) && (SPN_DEVICE_TILE_HEIGHT_LOG2 == 3) && defined(SPN_DEVICE_TILE_HEIGHT_BIAS) // (BIAS Y)
#error "No support for SPN_DEVICE_TILE_WIDTH_BIAS or SPN_DEVICE_TILE_HEIGHT_BIAS until the octant logic is updated."
#define SPN_TTS_BITS_TX 8 // [ 0, 255]
#define SPN_TTS_BITS_DX 7 // [ -64, 63] -> [-32,32]
#define SPN_TTS_BITS_TY 9 // [ 0, 511]
#define SPN_TTS_BITS_DY 8 // [-128, 127] -> [-64,64]
#define SPN_TTS_SUBPIXEL_X_LOG2 5
#define SPN_TTS_SUBPIXEL_Y_LOG2 6
#elif (SPN_DEVICE_TILE_WIDTH_LOG2 == 2) && (SPN_DEVICE_TILE_HEIGHT_LOG2 == 2)
#define SPN_TTS_BITS_TX 8 // [ 0, 255]
#define SPN_TTS_BITS_DX 8 // [-128, 127] -> [-64,64]
#define SPN_TTS_BITS_TY 8 // [ 0, 255]
#define SPN_TTS_BITS_DY 8 // [-128, 127] -> [-64,64]
#define SPN_TTS_SUBPIXEL_X_LOG2 6
#define SPN_TTS_SUBPIXEL_Y_LOG2 6
#else
#error "SPN_DEVICE_TILE_WIDTH_LOG2 or SPN_DEVICE_TILE_HEIGHT_LOG2 not defined!"
#endif
//
//
//
#define SPN_TTS_SUBPIXEL_X_SIZE (1 << SPN_TTS_SUBPIXEL_X_LOG2)
#define SPN_TTS_SUBPIXEL_Y_SIZE (1 << SPN_TTS_SUBPIXEL_Y_LOG2)
#define SPN_TTS_PIXEL_X_LOG2 (SPN_TTS_BITS_TX - SPN_TTS_SUBPIXEL_X_LOG2)
#define SPN_TTS_PIXEL_Y_LOG2 (SPN_TTS_BITS_TY - SPN_TTS_SUBPIXEL_Y_LOG2)
#define SPN_TTS_SUBPIXEL_X_RESL float(SPN_TTS_SUBPIXEL_X_SIZE)
#define SPN_TTS_SUBPIXEL_Y_RESL float(SPN_TTS_SUBPIXEL_Y_SIZE)
#define SPN_TTS_SUBPIXEL_X_SCALE_UP SPN_TTS_SUBPIXEL_X_RESL
#define SPN_TTS_SUBPIXEL_Y_SCALE_UP SPN_TTS_SUBPIXEL_Y_RESL
#define SPN_TTS_SUBPIXEL_X_SCALE_DOWN (1.0f / SPN_TTS_SUBPIXEL_X_RESL)
#define SPN_TTS_SUBPIXEL_Y_SCALE_DOWN (1.0f / SPN_TTS_SUBPIXEL_Y_RESL)
//
// TTXK.X and .Y are biased and unsigned
//
#define SPN_TILE_SUBPIXEL_X_BITS_LOG2 (SPN_DEVICE_TILE_WIDTH_LOG2 + SPN_TTS_SUBPIXEL_X_LOG2)
#define SPN_TILE_SUBPIXEL_Y_BITS_LOG2 (SPN_DEVICE_TILE_HEIGHT_LOG2 + SPN_TTS_SUBPIXEL_Y_LOG2)
#define SPN_TILE_SUBPIXEL_X_SIZE (1 << SPN_TILE_SUBPIXEL_X_BITS_LOG2)
#define SPN_TILE_SUBPIXEL_Y_SIZE (1 << SPN_TILE_SUBPIXEL_Y_BITS_LOG2)
#define SPN_TTXK_X_BIAS (1 << (SPN_TTXK_HI_BITS_X + SPN_TILE_SUBPIXEL_X_BITS_LOG2 - 1))
#define SPN_TTXK_Y_BIAS (1 << (SPN_TTXK_HI_BITS_Y + SPN_TILE_SUBPIXEL_Y_BITS_LOG2 - 1))
#define SPN_TTXK_TILE_X_BIAS (1 << (SPN_TTXK_HI_BITS_X - 1))
#define SPN_TTXK_TILE_Y_BIAS (1 << (SPN_TTXK_HI_BITS_Y - 1))
#define SPN_TTXK_XY_BIAS ivec2(SPN_TTXK_X_BIAS, SPN_TTXK_Y_BIAS)
//
//
//
#define SPN_TTS_OFFSET_TX 0
#define SPN_TTS_OFFSET_DX (SPN_TTS_OFFSET_TX + SPN_TTS_BITS_TX)
#define SPN_TTS_OFFSET_TY (SPN_TTS_OFFSET_DX + SPN_TTS_BITS_DX)
#define SPN_TTS_OFFSET_DY (SPN_TTS_OFFSET_TY + SPN_TTS_BITS_TY)
#define SPN_TTS_OFFSET_TX_PIXEL (SPN_TTS_OFFSET_TX + SPN_TTS_SUBPIXEL_X_LOG2)
#define SPN_TTS_OFFSET_TY_PIXEL (SPN_TTS_OFFSET_TY + SPN_TTS_SUBPIXEL_Y_LOG2)
#define SPN_TTS_MASK_TX SPN_BITS_TO_MASK(SPN_TTS_BITS_TX)
#define SPN_TTS_MASK_DX SPN_BITS_TO_MASK_AT(SPN_TTS_BITS_DX,SPN_TTS_OFFSET_DX)
#define SPN_TTS_MASK_TY SPN_BITS_TO_MASK_AT(SPN_TTS_BITS_TY,SPN_TTS_OFFSET_TY)
#define SPN_TTS_GET_DX(tts_) SPN_BITFIELD_EXTRACT(int(tts_),SPN_TTS_OFFSET_DX,SPN_TTS_BITS_DX)
#define SPN_TTS_GET_DY(tts_) SPN_BITFIELD_EXTRACT(int(tts_),SPN_TTS_OFFSET_DY,SPN_TTS_BITS_DY)
#define SPN_TTS_GET_TX_SUBPIXEL(tts_) SPN_BITFIELD_EXTRACT(uint(tts_),SPN_TTS_OFFSET_TX,SPN_TTS_SUBPIXEL_X_LOG2)
#define SPN_TTS_GET_TY_SUBPIXEL(tts_) SPN_BITFIELD_EXTRACT(uint(tts_),SPN_TTS_OFFSET_TY,SPN_TTS_SUBPIXEL_Y_LOG2)
#define SPN_TTS_GET_TX_PIXEL(tts_) SPN_BITFIELD_EXTRACT(uint(tts_),SPN_TTS_OFFSET_TX_PIXEL,SPN_TTS_PIXEL_X_LOG2)
#define SPN_TTS_GET_TY_PIXEL(tts_) SPN_BITFIELD_EXTRACT(uint(tts_),SPN_TTS_OFFSET_TY_PIXEL,SPN_TTS_PIXEL_Y_LOG2)
#define SPN_TTS_GET_TX(tts_) SPN_BITFIELD_EXTRACT(uint(tts_),SPN_TTS_OFFSET_TX,SPN_TTS_BITS_TX)
#define SPN_TTS_GET_TY(tts_) SPN_BITFIELD_EXTRACT(uint(tts_),SPN_TTS_OFFSET_TY,SPN_TTS_BITS_TY)
//
// Use impossible DX/DY values for TTS_INVALID
//
#define SPN_TTS_INVALID 0
//
// Note that for a subpixel resolution of 5 bits, 2048.0 can be represented
// exactly with fp16... fortuitous!
//
#define SPN_TTS_FILL_MAX_AREA (2 * SPN_TTS_SUBPIXEL_X_SIZE * SPN_TTS_SUBPIXEL_Y_SIZE)
#define SPN_TTS_FILL_MAX_AREA_2 (2 * SPN_TTS_FILL_MAX_AREA)
#define SPN_TTS_FILL_EVEN_ODD_MASK (SPN_TTS_FILL_MAX_AREA_2 - 1)
#define SPN_TTS_FILL_MAX_AREA_RCP_F32 (1.0f / SPN_TTS_FILL_MAX_AREA)
//
//
//
#endif // defined(SPN_DEVICE_TILE_WIDTH_LOG2) && defined(SPN_DEVICE_TILE_HEIGHT_LOG2)#endif
//
// RASTER COHORT METADATA
//
// NOTE: Don't trim array even though the last entry in the pow2 array is not
// used because there are aligned structure members following the metadata
// table.
//
// FIXME(allanmac): the UINT64_MAX key is reserved in this segmenting phase.
// This implies that the cohort id of all 1's needs to be reserved. TL;DR: the
// raster builder must only build (SPN_RASTER_COHORT_METAS_SIZE-1) rasters.
//
// FIXME(allanmac): split RKOFF from UVEC4/alloc
//
// struct spinel_rc_meta
// {
// SPN_TYPE_U32VEC2 alloc [SPN_RASTER_COHORT_METAS_SIZE]; // block pool reads -- uninitialized
// SPN_TYPE_U32 rk_off[SPN_RASTER_COHORT_METAS_SIZE]; // offset of rk keys -- zeroed
// SPN_TYPE_U32 blocks[SPN_RASTER_COHORT_METAS_SIZE]; // number of blocks -- zeroed
// SPN_TYPE_U32 ttpks [SPN_RASTER_COHORT_METAS_SIZE]; // number of TTPK keys -- zeroed
// SPN_TYPE_U32 ttrks [SPN_RASTER_COHORT_METAS_SIZE]; // number of TTRK keys -- zeroed
// //
// // FIXME(allanmac): the signed bounding box will be added to the meta
// // using the atomic signed min/max trick.
// //
// };
//
#define SPN_RASTER_COHORT_METAS_SIZE_LOG2 SPN_TTRK_HI_BITS_COHORT
#define SPN_RASTER_COHORT_METAS_SIZE (1 << SPN_RASTER_COHORT_METAS_SIZE_LOG2)
#define SPN_RASTER_COHORT_META_ALLOC_OFFSET_SK_READS 0 // alloc[0] - block holding first ttsk (head)
#define SPN_RASTER_COHORT_META_ALLOC_OFFSET_PK_READS 1 // alloc[1] - block holding first ttpk (head/node)
#define SPN_RASTER_COHORT_MAX_SIZE (SPN_RASTER_COHORT_METAS_SIZE - 1)
//
// STYLING STRUCTS
//
//
// LAYER
//
// | LAYER |
// +---------------+
// | cmds | parent |
// +------+--------+
// 0 1 2
//
// GROUP
//
// | GROUP |
// +--------------+---------+---------------+
// | parents | range | cmds |
// | depth | base | lo | hi | enter | leave |
// +-------+------+----+----+-------+-------+
// 0 1 2 3 4 5 6
//
//
// It's simpler to define the group as a uvec2[3]:
//
// struct spinel_group_node
// {
// spinel_group_parents parents; // path of parent groups leading back to root
// spinel_group_range range; // range of layers enclosed by this group
// spinel_group_cmds cmds; // enter/leave command indices
// };
//
// The RENDER kernel lays out the current layer node, group node and
// flags in either registers or shared memory:
//
// LGF -- layer / group / flags
// optional
// | current layer | current group | | | |
// +---------------+------------+-------+-------------+.......+.......+.......f....
// | layer | parents | range | cmds | layer | group | flags | ...
// | cmds parent | depth base | lo hi | enter leave | id | id | |
// +------+--------+------+-----+---+---+------+------+.......+-......+.......+....
// 0 1 2 3 4 5 6 7 8 9 10 11
//
//
// struct spinel_layer_node
// {
// uint32_t cmds; // starting index of sequence of command dwords
// uint32_t parent; // index of parent group
// };
//
// struct spinel_group_parents
// {
// uint32_t depth;
// uint32_t base;
// };
//
// struct spinel_group_range
// {
// uint32_t lo; // first layer
// uint32_t hi; // last layer
// };
//
// struct spinel_group_cmds
// {
// uint32_t enter; // starting index of sequence of command dwords
// uint32_t leave; // starting index of sequence of command dwords
// };
//
//
//
//
#define SPN_STYLING_LAYER_OFFSET_CMDS 0
#define SPN_STYLING_LAYER_OFFSET_PARENT 1
#define SPN_STYLING_LAYER_COUNT_DWORDS 2
#define SPN_STYLING_GROUP_OFFSET_PARENTS_DEPTH 0
#define SPN_STYLING_GROUP_OFFSET_PARENTS_BASE 1
#define SPN_STYLING_GROUP_OFFSET_RANGE_LO 2
#define SPN_STYLING_GROUP_OFFSET_RANGE_HI 3
#define SPN_STYLING_GROUP_OFFSET_CMDS_ENTER 4
#define SPN_STYLING_GROUP_OFFSET_CMDS_LEAVE 5
#define SPN_STYLING_GROUP_COUNT_DWORDS 6
//
//
//
#define SPN_STYLING_CMDS_BITS_COUNT 3
#define SPN_STYLING_CMDS_BITS_BASE (32-SPN_STYLING_CMDS_BITS_COUNT)
#define SPN_STYLING_CMDS_OFFSET_BASE 0
#define SPN_STYLING_CMDS_OFFSET_COUNT SPN_STYLING_CMDS_BITS_BASE
#define SPN_STYLING_CMDS_MAX_BASE ((1<<SPN_STYLING_CMDS_BITS_BASE) - 1)
#define SPN_STYLING_CMDS_MAX_COUNT ((1<<SPN_STYLING_CMDS_BITS_COUNT) - 1)
#define SPN_STYLING_CMDS_GET_COUNT(c_) SPN_BITFIELD_EXTRACT(c_, \
SPN_STYLING_CMDS_OFFSET_COUNT, \
SPN_STYLING_CMDS_BITS_COUNT)
#define SPN_STYLING_CMDS_GET_BASE(c_) SPN_BITFIELD_EXTRACT(c_, \
SPN_STYLING_CMDS_OFFSET_BASE, \
SPN_STYLING_CMDS_BITS_BASE)
#if 0
union spinel_gradient_vector
{
skc_float4 f32v4;
struct {
skc_float dx;
skc_float p0;
skc_float dy;
skc_float denom;
};
union skc_gradient_slope slopes[4];
};
#endif
//
// FIXME -- will eventually need to know if this gradient is
// perspective transformed and if so additional values will need to be
// encoded
//
// VERSION 1
// =============================================================
//
// LINEAR GRADIENT HEADER FOR N STOPS
//
// +----------+----------+------------+----------+-------------+
// | HEADER | INFO | LUTS | FLOORS | COLORS |
// +----------+----------+------------+----------+-------------+
// | uintv4 | u32v2[1] | f32v2[N-1] | f32[N-2] | ushort2[4N] |
// +----------+----------+------------+----------+-------------+
//
// COLOR PAIR WORD EXPANSION TOTAL
// +------------+---------------------------------+--------+-------------------------+
// | ushort2 | 4 + 2 + 2*(N-1) + N - 2 + 4*N | 7N + 2 | = 7(N-1+1)+2 = 7(N-1)+9 |
// +------------+---------------------------------+--------+-------------------------+
//
// COLOR LAYOUT:
//
// R[0]R[1], R[1]R[2], ... R[N-1]R[N-1]
// G[0]G[1], G[1]G[2], ... G[N-1]G[N-1]
// B[0]B[1], B[1]B[2], ... B[N-1]B[N-1]
// A[0]A[1], A[1]A[2], ... A[N-1]A[N-1]
//
//
// MINIMUM DWORDS: N=2 --> 16
//
//
// VERSION 2
// =============================================================
//
// LINEAR GRADIENT DESCRIPTOR FOR N STOPS
//
// +--------------- REMOVE ME LATER
// v
// +--------+------+-------+---+----------+-----------+
// | VECTOR | TYPE | COUNT | N | SLOPES | COLORS |
// +--------+------+-------+---+----------+-----------+
// | f32v4 | 1 | 1 | 1 | f32[N-1] | f16v2[4N] |
// +--------+------+-------+---+----------+-----------+
//
// COLOR PAIR WORD EXPANSION TOTAL
// +------------+--------------------------------+--------+
// | f16v2 | 4 + 1 + 1 + 1 + [N-1] + [4*N] | 5N + 6 |
// +------------+--------------------------------+--------+
//
// COLOR LAYOUT:
//
// R[0]R[1], R[1]R[2], ... R[N-1]R[N-1] <-------------------------- FIXME -- USE HERB'S SINGLE FMA REPRESENTATION
// G[0]G[1], G[1]G[2], ... G[N-1]G[N-1] <-------------------------- FIXME -- USE HERB'S SINGLE FMA REPRESENTATION
// B[0]B[1], B[1]B[2], ... B[N-1]B[N-1] <-------------------------- FIXME -- USE HERB'S SINGLE FMA REPRESENTATION
// A[0]A[1], A[1]A[2], ... A[N-1]A[N-1] <-------------------------- FIXME -- USE HERB'S SINGLE FMA REPRESENTATION
//
//
// MINIMUM DWORDS: N=2 --> 16
//
//
// VERSION 3+
// =============================================================
//
// FIXME -- will probably want to try using the sampler/texture
// hardware to interpolate colors.
//
// This will require that the colors are laid out in sampler-friendly
// order:
//
// RGBA[0]RGBA[1], RGBA[1]RGBA[2], ..., RGBA[N-1]RGBA[N-1]
//
//
#if 0
#define SPN_GRADIENT_HEADER_DWORDS_LUTS_OFFSET 4
#define SPN_GRADIENT_HEADER_DWORDS_TOTAL(n_minus_1) (7 * (n_minus_1) + 9)
#define SPN_GRADIENT_HEADER_DWORDS_MIN SPN_GRADIENT_HEADER_DWORDS_TOTAL(1)
#define SPN_GRADIENT_CMD_DWORDS_V1(n) (1 + SPN_GRADIENT_HEADER_DWORDS_TOTAL(n-1))
#endif
#define SPN_GRADIENT_CMD_DWORDS_V1(n) (7 * (n) + 2)
#define SPN_GRADIENT_CMD_DWORDS_V2(n) (5 * (n) + 6)
#define SPN_GRADIENT_CMD_DWORDS_V2_ADJUST(v1,v2) (SPN_GRADIENT_CMD_DWORDS_V1(v1) - ((v2) + 6))
//
// clang-format on
//
#endif // SRC_GRAPHICS_LIB_COMPUTE_SPINEL_CORE_H_