src/graphics/lib/compute/spinel/platforms/vk/shaders/fill_dispatch.comp - fuchsia - Git at Google

 // Copyright 2019 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #version 460

 //
 // FILL DISPATCH
 //
 // This fixes up the 8 path primitive counts so they can be used by
 // vkCmdDispatchIndirect().
 //
 // It also computes the exclusive prefix sum of the counts so each
 // rasterization workgroup type (lines, quads, etc.) knows where to
 // begin the cmd_rast[] buffer.
 //
 // The sum is stored in the 4th element of each quad.
 //

 //
 //
 //
 #extension GL_GOOGLE_include_directive : require
 #extension GL_KHR_shader_subgroup_basic : require
 #extension GL_KHR_shader_subgroup_ballot : require
 #extension GL_KHR_shader_subgroup_arithmetic : require

 //
 //
 //
 #extension GL_EXT_debug_printf : enable

 //
 //
 //
 #include "config.h"
 #include "push.h"

 //
 // Local defines
 //
 #define SPN_FILL_DISPATCH_SUBGROUP_SIZE (1 << SPN_DEVICE_FILL_DISPATCH_SUBGROUP_SIZE_LOG2)

 //
 // Workgroup size
 //
 layout(local_size_x = SPN_FILL_DISPATCH_SUBGROUP_SIZE) in;

 //
 // Push constants
 //
 SPN_PUSH_LAYOUT_FILL_DISPATCH();

 //
 // Buffer references
 //
 SPN_BUFFER_DEFINE_RASTERIZE_FILL_SCAN(writeonly, readonly, noaccess);

 //
 // FIXME(allanmac): Support computing multi-subgroup workgroups.
 //
 void
 main()
 {
   SPN_BUFREF_DEFINE(SPN_BUFFER_TYPE(rasterize_fill_scan),
                     fill_scan,
                     push.devaddr_rasterize_fill_scan);

 #if (SPN_FILL_DISPATCH_SUBGROUP_SIZE == 4)
   //
   // SIMD4 -- ARM Bifrost4 and SwiftShader
   //
   u32vec4 dispatch[2] = { u32vec4(0, 1, 1, 0), u32vec4(0, 1, 1, 0) };

   //
   // Load 8 primitive counts.
   //
   dispatch[0][0] = fill_scan.counts[0 + gl_SubgroupInvocationID];
   dispatch[1][0] = fill_scan.counts[4 + gl_SubgroupInvocationID];

   //
   // Inclusive scan-add of commands by type.
   //
   dispatch[0][3] = subgroupInclusiveAdd(dispatch[0][0]);
   dispatch[1][3] = subgroupInclusiveAdd(dispatch[1][0]) + subgroupBroadcast(dispatch[0][3], 3);

   //
   // Convert back to exclusive scan-add.
   //
   dispatch[0][3] -= dispatch[0][0];
   dispatch[1][3] -= dispatch[1][0];

   //
   // Store 8 primitive counts;
   //
   fill_scan.dispatch[0 + gl_SubgroupInvocationID] = dispatch[0];
   fill_scan.dispatch[4 + gl_SubgroupInvocationID] = dispatch[1];

 #elif (SPN_FILL_DISPATCH_SUBGROUP_SIZE >= 8)
   //
   // SIMD8+ -- every other GPU I'm aware of...
   //
   u32vec4 dispatch = u32vec4(0, 1, 1, 0);

   // Is valid lane?
 #if (SPN_FILL_DISPATCH_SUBGROUP_SIZE > 8)
   const bool is_valid = (gl_SubgroupInvocationID < SPN_RAST_TYPE_COUNT);
 #endif

   //
   // Load 8 primitive counts.
   //
 #if (SPN_FILL_DISPATCH_SUBGROUP_SIZE > 8)
   if (is_valid)
 #endif
     {
       //
       // TODO(https://fxbug.dev/42052237): This eventually needs to setup the
       // dispatch based on rounded-up workgroup size and not subgroup
       // size.
       //
       dispatch[0] = fill_scan.counts[gl_SubgroupInvocationID];
     }

   //
   // Exclusive scan-add of commands by type.
   //
   dispatch[3] = subgroupExclusiveAdd(dispatch[0]);

   //
   // Store 8 primitive counts.
   //
 #if (SPN_FILL_DISPATCH_SUBGROUP_SIZE > 8)
   if (is_valid)
 #endif
     {
       fill_scan.dispatch[gl_SubgroupInvocationID] = dispatch;
     }

 #else
 #error "Unexpected subgroup size!"
 #endif
 }

 //
 //
 //
	// Copyright 2019 The Fuchsia Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#version 460

	//
	// FILL DISPATCH
	//
	// This fixes up the 8 path primitive counts so they can be used by
	// vkCmdDispatchIndirect().
	//
	// It also computes the exclusive prefix sum of the counts so each
	// rasterization workgroup type (lines, quads, etc.) knows where to
	// begin the cmd_rast[] buffer.
	//
	// The sum is stored in the 4th element of each quad.
	//

	//
	//
	//
	#extension GL_GOOGLE_include_directive : require
	#extension GL_KHR_shader_subgroup_basic : require
	#extension GL_KHR_shader_subgroup_ballot : require
	#extension GL_KHR_shader_subgroup_arithmetic : require

	//
	//
	//
	#extension GL_EXT_debug_printf : enable

	//
	//
	//
	#include "config.h"
	#include "push.h"

	//
	// Local defines
	//
	#define SPN_FILL_DISPATCH_SUBGROUP_SIZE (1 << SPN_DEVICE_FILL_DISPATCH_SUBGROUP_SIZE_LOG2)

	//
	// Workgroup size
	//
	layout(local_size_x = SPN_FILL_DISPATCH_SUBGROUP_SIZE) in;

	//
	// Push constants
	//
	SPN_PUSH_LAYOUT_FILL_DISPATCH();

	//
	// Buffer references
	//
	SPN_BUFFER_DEFINE_RASTERIZE_FILL_SCAN(writeonly, readonly, noaccess);

	//
	// FIXME(allanmac): Support computing multi-subgroup workgroups.
	//
	void
	main()
	{
	SPN_BUFREF_DEFINE(SPN_BUFFER_TYPE(rasterize_fill_scan),
	fill_scan,
	push.devaddr_rasterize_fill_scan);

	#if (SPN_FILL_DISPATCH_SUBGROUP_SIZE == 4)
	//
	// SIMD4 -- ARM Bifrost4 and SwiftShader
	//
	u32vec4 dispatch[2] = { u32vec4(0, 1, 1, 0), u32vec4(0, 1, 1, 0) };

	//
	// Load 8 primitive counts.
	//
	dispatch[0][0] = fill_scan.counts[0 + gl_SubgroupInvocationID];
	dispatch[1][0] = fill_scan.counts[4 + gl_SubgroupInvocationID];

	//
	// Inclusive scan-add of commands by type.
	//
	dispatch[0][3] = subgroupInclusiveAdd(dispatch[0][0]);
	dispatch[1][3] = subgroupInclusiveAdd(dispatch[1][0]) + subgroupBroadcast(dispatch[0][3], 3);

	//
	// Convert back to exclusive scan-add.
	//
	dispatch[0][3] -= dispatch[0][0];
	dispatch[1][3] -= dispatch[1][0];

	//
	// Store 8 primitive counts;
	//
	fill_scan.dispatch[0 + gl_SubgroupInvocationID] = dispatch[0];
	fill_scan.dispatch[4 + gl_SubgroupInvocationID] = dispatch[1];

	#elif (SPN_FILL_DISPATCH_SUBGROUP_SIZE >= 8)
	//
	// SIMD8+ -- every other GPU I'm aware of...
	//
	u32vec4 dispatch = u32vec4(0, 1, 1, 0);

	// Is valid lane?
	#if (SPN_FILL_DISPATCH_SUBGROUP_SIZE > 8)
	const bool is_valid = (gl_SubgroupInvocationID < SPN_RAST_TYPE_COUNT);
	#endif

	//
	// Load 8 primitive counts.
	//
	#if (SPN_FILL_DISPATCH_SUBGROUP_SIZE > 8)
	if (is_valid)
	#endif
	{
	//
	// TODO(https://fxbug.dev/42052237): This eventually needs to setup the
	// dispatch based on rounded-up workgroup size and not subgroup
	// size.
	//
	dispatch[0] = fill_scan.counts[gl_SubgroupInvocationID];
	}

	//
	// Exclusive scan-add of commands by type.
	//
	dispatch[3] = subgroupExclusiveAdd(dispatch[0]);

	//
	// Store 8 primitive counts.
	//
	#if (SPN_FILL_DISPATCH_SUBGROUP_SIZE > 8)
	if (is_valid)
	#endif
	{
	fill_scan.dispatch[gl_SubgroupInvocationID] = dispatch;
	}

	#else
	#error "Unexpected subgroup size!"
	#endif
	}

	//
	//
	//