blob: 920eb05a1d173a4a380b8b2d760f2e3c813f9530 [file] [log] [blame]
// Copyright 2021 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#version 460
//
// Each workgroup fills up to RS_BLOCK_KEYVALS
//
// clang-format off
#extension GL_GOOGLE_include_directive : require
#extension GL_EXT_control_flow_attributes : require
// clang-format on
//
// Load arch/keyval configuration
//
#include "config.h"
//
// Buffer reference macros and push constants
//
#include "bufref.h"
#include "push.h"
//
// Subgroup uniform support
//
#if defined(RS_SCATTER_SUBGROUP_UNIFORM_DISABLE) && defined(GL_EXT_subgroupuniform_qualifier)
#extension GL_EXT_subgroupuniform_qualifier : required
#define RS_SUBGROUP_UNIFORM subgroupuniformEXT
#else
#define RS_SUBGROUP_UNIFORM
#endif
//
// Declare the push constants
//
RS_STRUCT_PUSH_FILL();
layout(push_constant) uniform block_push
{
rs_push_fill push;
};
//
// The "init" shader configures the fill info structure.
//
RS_STRUCT_INDIRECT_INFO_FILL();
//
// Check all switches are defined
//
#ifndef RS_FILL_WORKGROUP_SIZE_LOG2
#error "Undefined: RS_FILL_WORKGROUP_SIZE_LOG2"
#endif
//
#ifndef RS_FILL_BLOCK_ROWS
#error "Undefined: RS_FILL_BLOCK_ROWS"
#endif
//
// Local macros
//
// clang-format off
#define RS_WORKGROUP_SIZE (1 << RS_FILL_WORKGROUP_SIZE_LOG2)
#define RS_FILL_BLOCK_DWORDS (RS_FILL_BLOCK_ROWS * RS_WORKGROUP_SIZE)
#define RS_RADIX_MASK ((1 << RS_RADIX_LOG2) - 1)
// clang-format on
//
//
//
layout(local_size_x = RS_WORKGROUP_SIZE) in;
//
//
//
layout(buffer_reference, std430) buffer buffer_rs_indirect_info_fill
{
rs_indirect_info_fill info;
};
//
// TODO(https://fxbug.dev/42055401): Consider filling using uvec2 or uvec4 for
// improved performance.
//
layout(buffer_reference, std430) buffer buffer_rs_dwords
{
uint32_t extent[];
};
//
//
//
void
main()
{
//
// Define indirect info bufref for the fill
//
readonly RS_BUFREF_DEFINE(buffer_rs_indirect_info_fill, rs_info, push.devaddr_info);
RS_SUBGROUP_UNIFORM const rs_indirect_info_fill info = rs_info.info;
//
// Define dwords bufref
//
// Assumes less than 2^32-1 keys and then extended multiplies it by
// the keyval size.
//
// Assumes push.devaddr_dwords_base is suitably aligned for filling
// RS_FILL_BLOCK_DWORDS. Aligning at a subgroup or transaction size
// is fine.
//
const uint32_t dwords_idx = (info.block_offset + gl_WorkGroupID.x) * RS_FILL_BLOCK_DWORDS + //
gl_LocalInvocationID.x;
u32vec2 dwords_offset;
umulExtended(dwords_idx, 4, dwords_offset.y, dwords_offset.x);
writeonly RS_BUFREF_DEFINE_AT_OFFSET_U32VEC2(buffer_rs_dwords,
rs_dwords,
push.devaddr_dwords,
dwords_offset);
//
// Fills are always aligned to RS_FILL_BLOCK_DWORDS
//
// ((v >= min) && (v < max)) == ((v - min) < (max - min))
//
const uint32_t row_idx = dwords_idx - info.dword_offset_min;
[[unroll]] for (uint32_t ii = 0; ii < RS_FILL_BLOCK_ROWS; ii++)
{
if (row_idx + (ii * RS_WORKGROUP_SIZE) < info.dword_offset_max_minus_min)
{
rs_dwords.extent[ii * RS_WORKGROUP_SIZE] = push.dword;
}
}
}
//
//
//