| // Copyright 2021 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #version 460 |
| |
| // |
| // Each workgroup fills up to RS_BLOCK_KEYVALS |
| // |
| |
| // clang-format off |
| #extension GL_GOOGLE_include_directive : require |
| #extension GL_EXT_control_flow_attributes : require |
| // clang-format on |
| |
| // |
| // Load arch/keyval configuration |
| // |
| #include "config.h" |
| |
| // |
| // Buffer reference macros and push constants |
| // |
| #include "bufref.h" |
| #include "push.h" |
| |
| // |
| // Subgroup uniform support |
| // |
| #if defined(RS_SCATTER_SUBGROUP_UNIFORM_DISABLE) && defined(GL_EXT_subgroupuniform_qualifier) |
| #extension GL_EXT_subgroupuniform_qualifier : required |
| #define RS_SUBGROUP_UNIFORM subgroupuniformEXT |
| #else |
| #define RS_SUBGROUP_UNIFORM |
| #endif |
| |
| // |
| // Declare the push constants |
| // |
| RS_STRUCT_PUSH_FILL(); |
| |
| layout(push_constant) uniform block_push |
| { |
| rs_push_fill push; |
| }; |
| |
| // |
| // The "init" shader configures the fill info structure. |
| // |
| RS_STRUCT_INDIRECT_INFO_FILL(); |
| |
| // |
| // Check all switches are defined |
| // |
| #ifndef RS_FILL_WORKGROUP_SIZE_LOG2 |
| #error "Undefined: RS_FILL_WORKGROUP_SIZE_LOG2" |
| #endif |
| |
| // |
| #ifndef RS_FILL_BLOCK_ROWS |
| #error "Undefined: RS_FILL_BLOCK_ROWS" |
| #endif |
| |
| // |
| // Local macros |
| // |
| // clang-format off |
| #define RS_WORKGROUP_SIZE (1 << RS_FILL_WORKGROUP_SIZE_LOG2) |
| #define RS_FILL_BLOCK_DWORDS (RS_FILL_BLOCK_ROWS * RS_WORKGROUP_SIZE) |
| #define RS_RADIX_MASK ((1 << RS_RADIX_LOG2) - 1) |
| // clang-format on |
| |
| // |
| // |
| // |
| layout(local_size_x = RS_WORKGROUP_SIZE) in; |
| |
| // |
| // |
| // |
| layout(buffer_reference, std430) buffer buffer_rs_indirect_info_fill |
| { |
| rs_indirect_info_fill info; |
| }; |
| |
| // |
| // TODO(https://fxbug.dev/42055401): Consider filling using uvec2 or uvec4 for |
| // improved performance. |
| // |
| layout(buffer_reference, std430) buffer buffer_rs_dwords |
| { |
| uint32_t extent[]; |
| }; |
| |
| // |
| // |
| // |
| void |
| main() |
| { |
| // |
| // Define indirect info bufref for the fill |
| // |
| readonly RS_BUFREF_DEFINE(buffer_rs_indirect_info_fill, rs_info, push.devaddr_info); |
| |
| RS_SUBGROUP_UNIFORM const rs_indirect_info_fill info = rs_info.info; |
| |
| // |
| // Define dwords bufref |
| // |
| // Assumes less than 2^32-1 keys and then extended multiplies it by |
| // the keyval size. |
| // |
| // Assumes push.devaddr_dwords_base is suitably aligned for filling |
| // RS_FILL_BLOCK_DWORDS. Aligning at a subgroup or transaction size |
| // is fine. |
| // |
| const uint32_t dwords_idx = (info.block_offset + gl_WorkGroupID.x) * RS_FILL_BLOCK_DWORDS + // |
| gl_LocalInvocationID.x; |
| |
| u32vec2 dwords_offset; |
| |
| umulExtended(dwords_idx, 4, dwords_offset.y, dwords_offset.x); |
| |
| writeonly RS_BUFREF_DEFINE_AT_OFFSET_U32VEC2(buffer_rs_dwords, |
| rs_dwords, |
| push.devaddr_dwords, |
| dwords_offset); |
| |
| // |
| // Fills are always aligned to RS_FILL_BLOCK_DWORDS |
| // |
| // ((v >= min) && (v < max)) == ((v - min) < (max - min)) |
| // |
| const uint32_t row_idx = dwords_idx - info.dword_offset_min; |
| |
| [[unroll]] for (uint32_t ii = 0; ii < RS_FILL_BLOCK_ROWS; ii++) |
| { |
| if (row_idx + (ii * RS_WORKGROUP_SIZE) < info.dword_offset_max_minus_min) |
| { |
| rs_dwords.extent[ii * RS_WORKGROUP_SIZE] = push.dword; |
| } |
| } |
| } |
| |
| // |
| // |
| // |