| // Copyright 2019 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #version 460 |
| |
| // |
| // FILL DISPATCH |
| // |
| // This fixes up the 8 path primitive counts so they can be used by |
| // vkCmdDispatchIndirect(). |
| // |
| // It also computes the exclusive prefix sum of the counts so each |
| // rasterization workgroup type (lines, quads, etc.) knows where to |
| // begin the cmd_rast[] buffer. |
| // |
| // The sum is stored in the 4th element of each quad. |
| // |
| |
| // |
| // |
| // |
| #extension GL_GOOGLE_include_directive : require |
| #extension GL_KHR_shader_subgroup_basic : require |
| #extension GL_KHR_shader_subgroup_ballot : require |
| #extension GL_KHR_shader_subgroup_arithmetic : require |
| |
| // |
| // |
| // |
| #extension GL_EXT_debug_printf : enable |
| |
| // |
| // |
| // |
| #include "config.h" |
| #include "push.h" |
| |
| // |
| // Local defines |
| // |
| #define SPN_FILL_DISPATCH_SUBGROUP_SIZE (1 << SPN_DEVICE_FILL_DISPATCH_SUBGROUP_SIZE_LOG2) |
| |
| // |
| // Workgroup size |
| // |
| layout(local_size_x = SPN_FILL_DISPATCH_SUBGROUP_SIZE) in; |
| |
| // |
| // Push constants |
| // |
| SPN_PUSH_LAYOUT_FILL_DISPATCH(); |
| |
| // |
| // Buffer references |
| // |
| SPN_BUFFER_DEFINE_RASTERIZE_FILL_SCAN(writeonly, readonly, noaccess); |
| |
| // |
| // FIXME(allanmac): Support computing multi-subgroup workgroups. |
| // |
| void |
| main() |
| { |
| SPN_BUFREF_DEFINE(SPN_BUFFER_TYPE(rasterize_fill_scan), |
| fill_scan, |
| push.devaddr_rasterize_fill_scan); |
| |
| #if (SPN_FILL_DISPATCH_SUBGROUP_SIZE == 4) |
| // |
| // SIMD4 -- ARM Bifrost4 and SwiftShader |
| // |
| u32vec4 dispatch[2] = { u32vec4(0, 1, 1, 0), u32vec4(0, 1, 1, 0) }; |
| |
| // |
| // Load 8 primitive counts. |
| // |
| dispatch[0][0] = fill_scan.counts[0 + gl_SubgroupInvocationID]; |
| dispatch[1][0] = fill_scan.counts[4 + gl_SubgroupInvocationID]; |
| |
| // |
| // Inclusive scan-add of commands by type. |
| // |
| dispatch[0][3] = subgroupInclusiveAdd(dispatch[0][0]); |
| dispatch[1][3] = subgroupInclusiveAdd(dispatch[1][0]) + subgroupBroadcast(dispatch[0][3], 3); |
| |
| // |
| // Convert back to exclusive scan-add. |
| // |
| dispatch[0][3] -= dispatch[0][0]; |
| dispatch[1][3] -= dispatch[1][0]; |
| |
| // |
| // Store 8 primitive counts; |
| // |
| fill_scan.dispatch[0 + gl_SubgroupInvocationID] = dispatch[0]; |
| fill_scan.dispatch[4 + gl_SubgroupInvocationID] = dispatch[1]; |
| |
| #elif (SPN_FILL_DISPATCH_SUBGROUP_SIZE >= 8) |
| // |
| // SIMD8+ -- every other GPU I'm aware of... |
| // |
| u32vec4 dispatch = u32vec4(0, 1, 1, 0); |
| |
| // Is valid lane? |
| #if (SPN_FILL_DISPATCH_SUBGROUP_SIZE > 8) |
| const bool is_valid = (gl_SubgroupInvocationID < SPN_RAST_TYPE_COUNT); |
| #endif |
| |
| // |
| // Load 8 primitive counts. |
| // |
| #if (SPN_FILL_DISPATCH_SUBGROUP_SIZE > 8) |
| if (is_valid) |
| #endif |
| { |
| // |
| // TODO(https://fxbug.dev/42052237): This eventually needs to setup the |
| // dispatch based on rounded-up workgroup size and not subgroup |
| // size. |
| // |
| dispatch[0] = fill_scan.counts[gl_SubgroupInvocationID]; |
| } |
| |
| // |
| // Exclusive scan-add of commands by type. |
| // |
| dispatch[3] = subgroupExclusiveAdd(dispatch[0]); |
| |
| // |
| // Store 8 primitive counts. |
| // |
| #if (SPN_FILL_DISPATCH_SUBGROUP_SIZE > 8) |
| if (is_valid) |
| #endif |
| { |
| fill_scan.dispatch[gl_SubgroupInvocationID] = dispatch; |
| } |
| |
| #else |
| #error "Unexpected subgroup size!" |
| #endif |
| } |
| |
| // |
| // |
| // |