src/vulkan/runtime/bvh/leaf.h - third_party/mesa - Git at Google

 /*
  * Copyright © 2022 Konstantin Seurer
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  * and/or sell copies of the Software, and to permit persons to whom the
  * Software is furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice (including the next
  * paragraph) shall be included in all copies or substantial portions of the
  * Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */

 #extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
 #extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
 #extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
 #extension GL_EXT_shader_explicit_arithmetic_types_int64 : require
 #extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
 #extension GL_EXT_scalar_block_layout : require
 #extension GL_EXT_buffer_reference : require
 #extension GL_EXT_buffer_reference2 : require
 #extension GL_KHR_shader_subgroup_vote : require
 #extension GL_KHR_shader_subgroup_arithmetic : require
 #extension GL_KHR_shader_subgroup_ballot : require

 #include "vk_build_interface.h"

 layout(local_size_x_id = SUBGROUP_SIZE_ID, local_size_y = 1, local_size_z = 1) in;

 layout(push_constant) uniform CONSTS {
    leaf_args args;
 };

 bool
 build_triangle(inout vk_aabb bounds, VOID_REF dst_ptr, vk_bvh_geometry_data geom_data, uint32_t global_id)
 {
    bool is_valid = true;
    triangle_indices indices = load_indices(geom_data.indices, geom_data.index_format, global_id);

    triangle_vertices vertices = load_vertices(geom_data.data, indices, geom_data.vertex_format, geom_data.stride);

    /* An inactive triangle is one for which the first (X) component of any vertex is NaN. If any
     * other vertex component is NaN, and the first is not, the behavior is undefined. If the vertex
     * format does not have a NaN representation, then all triangles are considered active.
     */
    if (isnan(vertices.vertex[0].x) || isnan(vertices.vertex[1].x) || isnan(vertices.vertex[2].x)) {
       is_valid = false;
       if (!VK_BUILD_FLAG(VK_BUILD_FLAG_ALWAYS_ACTIVE))
          return false;
    }

    if (geom_data.transform != NULL) {
       mat4 transform = mat4(1.0);

       for (uint32_t col = 0; col < 4; col++)
       for (uint32_t row = 0; row < 3; row++)
       transform[col][row] = DEREF(INDEX(float, geom_data.transform, col + row * 4));

       for (uint32_t i = 0; i < 3; i++)
       vertices.vertex[i] = transform * vertices.vertex[i];
    }

    REF(vk_ir_triangle_node) node = REF(vk_ir_triangle_node)(dst_ptr);

    bounds.min = vec3(INFINITY);
    bounds.max = vec3(-INFINITY);

    for (uint32_t coord = 0; coord < 3; coord++)
    for (uint32_t comp = 0; comp < 3; comp++) {
       DEREF(node).coords[coord][comp] = vertices.vertex[coord][comp];
       bounds.min[comp] = min(bounds.min[comp], vertices.vertex[coord][comp]);
       bounds.max[comp] = max(bounds.max[comp], vertices.vertex[coord][comp]);
    }

    DEREF(node).base.aabb = bounds;
    DEREF(node).triangle_id = global_id;
    DEREF(node).geometry_id_and_flags = geom_data.geometry_id;
    DEREF(node).id = 9;

    return is_valid;
 }

 bool
 build_aabb(inout vk_aabb bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t geometry_id, uint32_t global_id)
 {
    bool is_valid = true;
    REF(vk_ir_aabb_node) node = REF(vk_ir_aabb_node)(dst_ptr);

    for (uint32_t vec = 0; vec < 2; vec++)
    for (uint32_t comp = 0; comp < 3; comp++) {
       float coord = DEREF(INDEX(float, src_ptr, comp + vec * 3));

       if (vec == 0)
       bounds.min[comp] = coord;
       else
       bounds.max[comp] = coord;
    }

    /* An inactive AABB is one for which the minimum X coordinate is NaN. If any other component is
     * NaN, and the first is not, the behavior is undefined.
     */
    if (isnan(bounds.min.x)) {
       is_valid = false;
       if (!VK_BUILD_FLAG(VK_BUILD_FLAG_ALWAYS_ACTIVE))
          return false;
    }

    DEREF(node).base.aabb = bounds;
    DEREF(node).primitive_id = global_id;
    DEREF(node).geometry_id_and_flags = geometry_id;

    return is_valid;
 }

 vk_aabb
 calculate_instance_node_bounds(vk_aabb blas_aabb, mat3x4 otw_matrix)
 {
    vk_aabb aabb;

    for (uint32_t comp = 0; comp < 3; ++comp) {
       aabb.min[comp] = otw_matrix[comp][3];
       aabb.max[comp] = otw_matrix[comp][3];
       for (uint32_t col = 0; col < 3; ++col) {
          aabb.min[comp] +=
             min(otw_matrix[comp][col] * blas_aabb.min[col], otw_matrix[comp][col] * blas_aabb.max[col]);
          aabb.max[comp] +=
             max(otw_matrix[comp][col] * blas_aabb.min[col], otw_matrix[comp][col] * blas_aabb.max[col]);
       }
    }
    return aabb;
 }

 bool
 build_instance(inout vk_aabb bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t global_id)
 {
    REF(vk_ir_instance_node) node = REF(vk_ir_instance_node)(dst_ptr);

    AccelerationStructureInstance instance = DEREF(REF(AccelerationStructureInstance)(src_ptr));

    /* An inactive instance is one whose acceleration structure handle is VK_NULL_HANDLE. Since the active terminology is
     * only relevant for BVH updates, which we do not implement, we can also skip instances with mask == 0.
     */
    if (instance.accelerationStructureReference == 0 || instance.custom_instance_and_mask < (1u << 24u))
       return false;

    DEREF(node).base_ptr = instance.accelerationStructureReference;

    mat4 transform = mat4(instance.transform);
    DEREF(node).otw_matrix = mat3x4(transform);

    vk_aabb blas_aabb = DEREF(REF(vk_aabb)(instance.accelerationStructureReference + BVH_BOUNDS_OFFSET));

    if (any(isnan(blas_aabb.min)) || any(isnan(blas_aabb.max)))
       return false;

    bounds = calculate_instance_node_bounds(blas_aabb, mat3x4(transform));

 #ifdef CALCULATE_FINE_INSTANCE_NODE_BOUNDS
    vec3 blas_aabb_extent = blas_aabb.max - blas_aabb.min;
    float blas_aabb_volume = blas_aabb_extent.x * blas_aabb_extent.y * blas_aabb_extent.z;
    blas_aabb_volume *= abs(determinant(mat3(transform)));

    vec3 bounds_extent = bounds.max - bounds.min;
    float bounds_volume = bounds_extent.x * bounds_extent.y * bounds_extent.z;

    /* Only try calculating finer-grained instance node bounds if the volume of the transformed
     * instance AABB is significantly higher than the volume of the BLAS without transformations
     * applied. Otherwise, the finer-grained bounds won't be much smaller and the additional overhead
     * wouldn't be worth it.
     */
    if (bounds_volume > 1.4f * blas_aabb_volume)
       bounds = CALCULATE_FINE_INSTANCE_NODE_BOUNDS(instance.accelerationStructureReference, mat3x4(transform));
 #endif

    DEREF(node).base.aabb = bounds;
    DEREF(node).custom_instance_and_mask = instance.custom_instance_and_mask;
    DEREF(node).sbt_offset_and_flags = instance.sbt_offset_and_flags;
    DEREF(node).instance_id = global_id;

    if (!VK_BUILD_FLAG(VK_BUILD_FLAG_PROPAGATE_CULL_FLAGS))
       return true;

    uint32_t root_flags = 0;
    if ((instance.sbt_offset_and_flags & (VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR << 24)) != 0)
       root_flags = VK_BVH_BOX_FLAG_ONLY_OPAQUE;
    else if ((instance.sbt_offset_and_flags & (VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR << 24)) != 0)
       root_flags = VK_BVH_BOX_FLAG_NO_OPAQUE;
    else
       root_flags = DEREF(REF(uint32_t)(instance.accelerationStructureReference + ROOT_FLAGS_OFFSET));
    DEREF(node).root_flags = root_flags;

    return true;
 }

 void
 main(void)
 {
    uint32_t global_id = gl_GlobalInvocationID.x;
    uint32_t primitive_id = args.geom_data.first_id + global_id;

    REF(key_id_pair) id_ptr = INDEX(key_id_pair, args.ids, primitive_id);
    uint32_t src_offset = global_id * args.geom_data.stride;

    uint32_t dst_stride;
    uint32_t node_type;
    if (args.geom_data.geometry_type == VK_GEOMETRY_TYPE_TRIANGLES_KHR) {
       dst_stride = SIZEOF(vk_ir_triangle_node);
       node_type = vk_ir_node_triangle;
    } else if (args.geom_data.geometry_type == VK_GEOMETRY_TYPE_AABBS_KHR) {
       dst_stride = SIZEOF(vk_ir_aabb_node);
       node_type = vk_ir_node_aabb;
    } else {
       dst_stride = SIZEOF(vk_ir_instance_node);
       node_type = vk_ir_node_instance;
    }

    uint32_t dst_offset = primitive_id * dst_stride;
    VOID_REF dst_ptr = OFFSET(args.bvh, dst_offset);

    vk_aabb bounds;
    bool is_active;
    if (args.geom_data.geometry_type == VK_GEOMETRY_TYPE_TRIANGLES_KHR) {
       is_active = build_triangle(bounds, dst_ptr, args.geom_data, global_id);
    } else if (args.geom_data.geometry_type == VK_GEOMETRY_TYPE_AABBS_KHR) {
       VOID_REF src_ptr = OFFSET(args.geom_data.data, src_offset);
       is_active = build_aabb(bounds, src_ptr, dst_ptr, args.geom_data.geometry_id, global_id);
    } else {
       VOID_REF src_ptr = OFFSET(args.geom_data.data, src_offset);
       /* arrayOfPointers */
       if (args.geom_data.stride == 8) {
          src_ptr = DEREF(REF(VOID_REF)(src_ptr));
       }

       is_active = build_instance(bounds, src_ptr, dst_ptr, global_id);
    }

    if (VK_BUILD_FLAG(VK_BUILD_FLAG_ALWAYS_ACTIVE) &&
        !is_active && args.geom_data.geometry_type != VK_GEOMETRY_TYPE_INSTANCES_KHR) {
       bounds.min = vec3(0.0);
       bounds.max = vec3(0.0);
       is_active = true;
    }

    DEREF(id_ptr).id = is_active ? pack_ir_node_id(dst_offset, node_type) : VK_BVH_INVALID_NODE;

    uvec4 ballot = subgroupBallot(is_active);
    if (subgroupElect())
       atomicAdd(DEREF(args.header).active_leaf_count, subgroupBallotBitCount(ballot));

    atomicMin(DEREF(args.header).min_bounds[0], to_emulated_float(bounds.min.x));
    atomicMin(DEREF(args.header).min_bounds[1], to_emulated_float(bounds.min.y));
    atomicMin(DEREF(args.header).min_bounds[2], to_emulated_float(bounds.min.z));
    atomicMax(DEREF(args.header).max_bounds[0], to_emulated_float(bounds.max.x));
    atomicMax(DEREF(args.header).max_bounds[1], to_emulated_float(bounds.max.y));
    atomicMax(DEREF(args.header).max_bounds[2], to_emulated_float(bounds.max.z));
 }
	/*
	* Copyright © 2022 Konstantin Seurer
	*
	* Permission is hereby granted, free of charge, to any person obtaining a
	* copy of this software and associated documentation files (the "Software"),
	* to deal in the Software without restriction, including without limitation
	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	* and/or sell copies of the Software, and to permit persons to whom the
	* Software is furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice (including the next
	* paragraph) shall be included in all copies or substantial portions of the
	* Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
	* IN THE SOFTWARE.
	*/

	#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
	#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
	#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
	#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require
	#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
	#extension GL_EXT_scalar_block_layout : require
	#extension GL_EXT_buffer_reference : require
	#extension GL_EXT_buffer_reference2 : require
	#extension GL_KHR_shader_subgroup_vote : require
	#extension GL_KHR_shader_subgroup_arithmetic : require
	#extension GL_KHR_shader_subgroup_ballot : require

	#include "vk_build_interface.h"

	layout(local_size_x_id = SUBGROUP_SIZE_ID, local_size_y = 1, local_size_z = 1) in;

	layout(push_constant) uniform CONSTS {
	leaf_args args;
	};

	bool
	build_triangle(inout vk_aabb bounds, VOID_REF dst_ptr, vk_bvh_geometry_data geom_data, uint32_t global_id)
	{
	bool is_valid = true;
	triangle_indices indices = load_indices(geom_data.indices, geom_data.index_format, global_id);

	triangle_vertices vertices = load_vertices(geom_data.data, indices, geom_data.vertex_format, geom_data.stride);

	/* An inactive triangle is one for which the first (X) component of any vertex is NaN. If any
	* other vertex component is NaN, and the first is not, the behavior is undefined. If the vertex
	* format does not have a NaN representation, then all triangles are considered active.
	*/
	if (isnan(vertices.vertex[0].x) \|\| isnan(vertices.vertex[1].x) \|\| isnan(vertices.vertex[2].x)) {
	is_valid = false;
	if (!VK_BUILD_FLAG(VK_BUILD_FLAG_ALWAYS_ACTIVE))
	return false;
	}

	if (geom_data.transform != NULL) {
	mat4 transform = mat4(1.0);

	for (uint32_t col = 0; col < 4; col++)
	for (uint32_t row = 0; row < 3; row++)
	transform[col][row] = DEREF(INDEX(float, geom_data.transform, col + row * 4));

	for (uint32_t i = 0; i < 3; i++)
	vertices.vertex[i] = transform * vertices.vertex[i];
	}

	REF(vk_ir_triangle_node) node = REF(vk_ir_triangle_node)(dst_ptr);

	bounds.min = vec3(INFINITY);
	bounds.max = vec3(-INFINITY);

	for (uint32_t coord = 0; coord < 3; coord++)
	for (uint32_t comp = 0; comp < 3; comp++) {
	DEREF(node).coords[coord][comp] = vertices.vertex[coord][comp];
	bounds.min[comp] = min(bounds.min[comp], vertices.vertex[coord][comp]);
	bounds.max[comp] = max(bounds.max[comp], vertices.vertex[coord][comp]);
	}

	DEREF(node).base.aabb = bounds;
	DEREF(node).triangle_id = global_id;
	DEREF(node).geometry_id_and_flags = geom_data.geometry_id;
	DEREF(node).id = 9;

	return is_valid;
	}

	bool
	build_aabb(inout vk_aabb bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t geometry_id, uint32_t global_id)
	{
	bool is_valid = true;
	REF(vk_ir_aabb_node) node = REF(vk_ir_aabb_node)(dst_ptr);

	for (uint32_t vec = 0; vec < 2; vec++)
	for (uint32_t comp = 0; comp < 3; comp++) {
	float coord = DEREF(INDEX(float, src_ptr, comp + vec * 3));

	if (vec == 0)
	bounds.min[comp] = coord;
	else
	bounds.max[comp] = coord;
	}

	/* An inactive AABB is one for which the minimum X coordinate is NaN. If any other component is
	* NaN, and the first is not, the behavior is undefined.
	*/
	if (isnan(bounds.min.x)) {
	is_valid = false;
	if (!VK_BUILD_FLAG(VK_BUILD_FLAG_ALWAYS_ACTIVE))
	return false;
	}

	DEREF(node).base.aabb = bounds;
	DEREF(node).primitive_id = global_id;
	DEREF(node).geometry_id_and_flags = geometry_id;

	return is_valid;
	}

	vk_aabb
	calculate_instance_node_bounds(vk_aabb blas_aabb, mat3x4 otw_matrix)
	{
	vk_aabb aabb;

	for (uint32_t comp = 0; comp < 3; ++comp) {
	aabb.min[comp] = otw_matrix[comp][3];
	aabb.max[comp] = otw_matrix[comp][3];
	for (uint32_t col = 0; col < 3; ++col) {
	aabb.min[comp] +=
	min(otw_matrix[comp][col] * blas_aabb.min[col], otw_matrix[comp][col] * blas_aabb.max[col]);
	aabb.max[comp] +=
	max(otw_matrix[comp][col] * blas_aabb.min[col], otw_matrix[comp][col] * blas_aabb.max[col]);
	}
	}
	return aabb;
	}

	bool
	build_instance(inout vk_aabb bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t global_id)
	{
	REF(vk_ir_instance_node) node = REF(vk_ir_instance_node)(dst_ptr);

	AccelerationStructureInstance instance = DEREF(REF(AccelerationStructureInstance)(src_ptr));

	/* An inactive instance is one whose acceleration structure handle is VK_NULL_HANDLE. Since the active terminology is
	* only relevant for BVH updates, which we do not implement, we can also skip instances with mask == 0.
	*/
	if (instance.accelerationStructureReference == 0 \|\| instance.custom_instance_and_mask < (1u << 24u))
	return false;

	DEREF(node).base_ptr = instance.accelerationStructureReference;

	mat4 transform = mat4(instance.transform);
	DEREF(node).otw_matrix = mat3x4(transform);

	vk_aabb blas_aabb = DEREF(REF(vk_aabb)(instance.accelerationStructureReference + BVH_BOUNDS_OFFSET));

	if (any(isnan(blas_aabb.min)) \|\| any(isnan(blas_aabb.max)))
	return false;

	bounds = calculate_instance_node_bounds(blas_aabb, mat3x4(transform));

	#ifdef CALCULATE_FINE_INSTANCE_NODE_BOUNDS
	vec3 blas_aabb_extent = blas_aabb.max - blas_aabb.min;
	float blas_aabb_volume = blas_aabb_extent.x * blas_aabb_extent.y * blas_aabb_extent.z;
	blas_aabb_volume *= abs(determinant(mat3(transform)));

	vec3 bounds_extent = bounds.max - bounds.min;
	float bounds_volume = bounds_extent.x * bounds_extent.y * bounds_extent.z;

	/* Only try calculating finer-grained instance node bounds if the volume of the transformed
	* instance AABB is significantly higher than the volume of the BLAS without transformations
	* applied. Otherwise, the finer-grained bounds won't be much smaller and the additional overhead
	* wouldn't be worth it.
	*/
	if (bounds_volume > 1.4f * blas_aabb_volume)
	bounds = CALCULATE_FINE_INSTANCE_NODE_BOUNDS(instance.accelerationStructureReference, mat3x4(transform));
	#endif

	DEREF(node).base.aabb = bounds;
	DEREF(node).custom_instance_and_mask = instance.custom_instance_and_mask;
	DEREF(node).sbt_offset_and_flags = instance.sbt_offset_and_flags;
	DEREF(node).instance_id = global_id;

	if (!VK_BUILD_FLAG(VK_BUILD_FLAG_PROPAGATE_CULL_FLAGS))
	return true;

	uint32_t root_flags = 0;
	if ((instance.sbt_offset_and_flags & (VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR << 24)) != 0)
	root_flags = VK_BVH_BOX_FLAG_ONLY_OPAQUE;
	else if ((instance.sbt_offset_and_flags & (VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR << 24)) != 0)
	root_flags = VK_BVH_BOX_FLAG_NO_OPAQUE;
	else
	root_flags = DEREF(REF(uint32_t)(instance.accelerationStructureReference + ROOT_FLAGS_OFFSET));
	DEREF(node).root_flags = root_flags;

	return true;
	}

	void
	main(void)
	{
	uint32_t global_id = gl_GlobalInvocationID.x;
	uint32_t primitive_id = args.geom_data.first_id + global_id;

	REF(key_id_pair) id_ptr = INDEX(key_id_pair, args.ids, primitive_id);
	uint32_t src_offset = global_id * args.geom_data.stride;

	uint32_t dst_stride;
	uint32_t node_type;
	if (args.geom_data.geometry_type == VK_GEOMETRY_TYPE_TRIANGLES_KHR) {
	dst_stride = SIZEOF(vk_ir_triangle_node);
	node_type = vk_ir_node_triangle;
	} else if (args.geom_data.geometry_type == VK_GEOMETRY_TYPE_AABBS_KHR) {
	dst_stride = SIZEOF(vk_ir_aabb_node);
	node_type = vk_ir_node_aabb;
	} else {
	dst_stride = SIZEOF(vk_ir_instance_node);
	node_type = vk_ir_node_instance;
	}

	uint32_t dst_offset = primitive_id * dst_stride;
	VOID_REF dst_ptr = OFFSET(args.bvh, dst_offset);

	vk_aabb bounds;
	bool is_active;
	if (args.geom_data.geometry_type == VK_GEOMETRY_TYPE_TRIANGLES_KHR) {
	is_active = build_triangle(bounds, dst_ptr, args.geom_data, global_id);
	} else if (args.geom_data.geometry_type == VK_GEOMETRY_TYPE_AABBS_KHR) {
	VOID_REF src_ptr = OFFSET(args.geom_data.data, src_offset);
	is_active = build_aabb(bounds, src_ptr, dst_ptr, args.geom_data.geometry_id, global_id);
	} else {
	VOID_REF src_ptr = OFFSET(args.geom_data.data, src_offset);
	/* arrayOfPointers */
	if (args.geom_data.stride == 8) {
	src_ptr = DEREF(REF(VOID_REF)(src_ptr));
	}

	is_active = build_instance(bounds, src_ptr, dst_ptr, global_id);
	}

	if (VK_BUILD_FLAG(VK_BUILD_FLAG_ALWAYS_ACTIVE) &&
	!is_active && args.geom_data.geometry_type != VK_GEOMETRY_TYPE_INSTANCES_KHR) {
	bounds.min = vec3(0.0);
	bounds.max = vec3(0.0);
	is_active = true;
	}

	DEREF(id_ptr).id = is_active ? pack_ir_node_id(dst_offset, node_type) : VK_BVH_INVALID_NODE;

	uvec4 ballot = subgroupBallot(is_active);
	if (subgroupElect())
	atomicAdd(DEREF(args.header).active_leaf_count, subgroupBallotBitCount(ballot));

	atomicMin(DEREF(args.header).min_bounds[0], to_emulated_float(bounds.min.x));
	atomicMin(DEREF(args.header).min_bounds[1], to_emulated_float(bounds.min.y));
	atomicMin(DEREF(args.header).min_bounds[2], to_emulated_float(bounds.min.z));
	atomicMax(DEREF(args.header).max_bounds[0], to_emulated_float(bounds.max.x));
	atomicMax(DEREF(args.header).max_bounds[1], to_emulated_float(bounds.max.y));
	atomicMax(DEREF(args.header).max_bounds[2], to_emulated_float(bounds.max.z));
	}