| /* |
| * Copyright © 2021 Bas Nieuwenhuizen |
| * Copyright © 2023 Valve Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| */ |
| |
| #include "vk_acceleration_structure.h" |
| |
| #include "vk_alloc.h" |
| #include "vk_common_entrypoints.h" |
| #include "vk_device.h" |
| #include "vk_command_buffer.h" |
| #include "vk_log.h" |
| #include "vk_meta.h" |
| |
| #include "bvh/vk_build_interface.h" |
| #include "bvh/vk_bvh.h" |
| |
| #include "radix_sort/common/vk/barrier.h" |
| #include "radix_sort/shaders/push.h" |
| |
| #include "util/u_string.h" |
| |
| static const uint32_t leaf_spv[] = { |
| #include "bvh/leaf.spv.h" |
| }; |
| |
| static const uint32_t morton_spv[] = { |
| #include "bvh/morton.spv.h" |
| }; |
| |
| static const uint32_t lbvh_main_spv[] = { |
| #include "bvh/lbvh_main.spv.h" |
| }; |
| |
| static const uint32_t lbvh_generate_ir_spv[] = { |
| #include "bvh/lbvh_generate_ir.spv.h" |
| }; |
| |
| static const uint32_t ploc_spv[] = { |
| #include "bvh/ploc_internal.spv.h" |
| }; |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| vk_common_CreateAccelerationStructureKHR(VkDevice _device, |
| const VkAccelerationStructureCreateInfoKHR *pCreateInfo, |
| const VkAllocationCallbacks *pAllocator, |
| VkAccelerationStructureKHR *pAccelerationStructure) |
| { |
| VK_FROM_HANDLE(vk_device, device, _device); |
| VK_FROM_HANDLE(vk_buffer, buffer, pCreateInfo->buffer); |
| |
| struct vk_acceleration_structure *accel_struct = vk_object_alloc( |
| device, pAllocator, sizeof(struct vk_acceleration_structure), |
| VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR); |
| |
| if (!accel_struct) |
| return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| accel_struct->buffer = buffer; |
| accel_struct->offset = pCreateInfo->offset; |
| accel_struct->size = pCreateInfo->size; |
| |
| if (pCreateInfo->deviceAddress && |
| vk_acceleration_structure_get_va(accel_struct) != pCreateInfo->deviceAddress) |
| return vk_error(device, VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS); |
| |
| *pAccelerationStructure = vk_acceleration_structure_to_handle(accel_struct); |
| return VK_SUCCESS; |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| vk_common_DestroyAccelerationStructureKHR(VkDevice _device, |
| VkAccelerationStructureKHR accelerationStructure, |
| const VkAllocationCallbacks *pAllocator) |
| { |
| VK_FROM_HANDLE(vk_device, device, _device); |
| VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, accelerationStructure); |
| |
| if (!accel_struct) |
| return; |
| |
| vk_object_free(device, pAllocator, accel_struct); |
| } |
| |
| VKAPI_ATTR VkDeviceAddress VKAPI_CALL |
| vk_common_GetAccelerationStructureDeviceAddressKHR( |
| VkDevice _device, const VkAccelerationStructureDeviceAddressInfoKHR *pInfo) |
| { |
| VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, pInfo->accelerationStructure); |
| return vk_acceleration_structure_get_va(accel_struct); |
| } |
| |
| #define KEY_ID_PAIR_SIZE 8 |
| #define MORTON_BIT_SIZE 24 |
| |
| static void |
| vk_acceleration_structure_build_state_init(struct vk_acceleration_structure_build_state *state, |
| struct vk_device *device, uint32_t leaf_count, |
| const VkAccelerationStructureBuildGeometryInfoKHR *build_info, |
| const struct vk_acceleration_structure_build_args *args) |
| { |
| state->build_info = build_info; |
| state->leaf_node_count = leaf_count; |
| |
| if (leaf_count <= 4) |
| state->config.internal_type = VK_INTERNAL_BUILD_TYPE_LBVH; |
| else if (build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR) |
| state->config.internal_type = VK_INTERNAL_BUILD_TYPE_PLOC; |
| else if (!(build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_KHR) && |
| !(build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR)) |
| state->config.internal_type = VK_INTERNAL_BUILD_TYPE_PLOC; |
| else |
| state->config.internal_type = VK_INTERNAL_BUILD_TYPE_LBVH; |
| |
| if (build_info->mode == VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR && |
| build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR && |
| device->as_build_ops->update_as[0]) |
| state->config.internal_type = VK_INTERNAL_BUILD_TYPE_UPDATE; |
| |
| if ((build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR) && |
| build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR && |
| device->as_build_ops->update_as[0]) |
| state->config.updateable = true; |
| |
| if (device->as_build_ops->get_build_config) |
| device->as_build_ops->get_build_config(vk_device_to_handle(device), state); |
| |
| uint32_t internal_count = MAX2(leaf_count, 2) - 1; |
| |
| radix_sort_vk_memory_requirements_t requirements = { |
| 0, |
| }; |
| radix_sort_vk_get_memory_requirements(args->radix_sort, leaf_count, |
| &requirements); |
| |
| uint32_t ir_leaf_size; |
| switch (vk_get_as_geometry_type(build_info)) { |
| case VK_GEOMETRY_TYPE_TRIANGLES_KHR: |
| ir_leaf_size = sizeof(struct vk_ir_triangle_node); |
| break; |
| case VK_GEOMETRY_TYPE_AABBS_KHR: |
| ir_leaf_size = sizeof(struct vk_ir_aabb_node); |
| break; |
| case VK_GEOMETRY_TYPE_INSTANCES_KHR: |
| ir_leaf_size = sizeof(struct vk_ir_instance_node); |
| break; |
| default: |
| unreachable("Unknown VkGeometryTypeKHR"); |
| } |
| |
| uint32_t offset = 0; |
| |
| uint32_t ploc_scratch_space = 0; |
| uint32_t lbvh_node_space = 0; |
| |
| if (state->config.internal_type == VK_INTERNAL_BUILD_TYPE_PLOC) |
| ploc_scratch_space = DIV_ROUND_UP(leaf_count, PLOC_WORKGROUP_SIZE) * sizeof(struct ploc_prefix_scan_partition); |
| else |
| lbvh_node_space = sizeof(struct lbvh_node_info) * internal_count; |
| |
| uint32_t encode_scratch_size = 0; |
| if (device->as_build_ops->get_encode_scratch_size) |
| encode_scratch_size = device->as_build_ops->get_encode_scratch_size(vk_device_to_handle(device), state); |
| |
| state->scratch.header_offset = offset; |
| offset += sizeof(struct vk_ir_header); |
| |
| /* The encode passes should not need node sorting state. Reuse the space reserved for node sorting. */ |
| uint32_t encode_scratch_end = offset + encode_scratch_size; |
| |
| state->scratch.sort_buffer_offset[0] = offset; |
| offset += requirements.keyvals_size; |
| |
| state->scratch.sort_buffer_offset[1] = offset; |
| offset += requirements.keyvals_size; |
| |
| state->scratch.sort_internal_offset = offset; |
| /* Internal sorting data is not needed when PLOC/LBVH are invoked, |
| * save space by aliasing them */ |
| state->scratch.ploc_prefix_sum_partition_offset = offset; |
| state->scratch.lbvh_node_offset = offset; |
| offset += MAX3(requirements.internal_size, ploc_scratch_space, lbvh_node_space); |
| |
| /* Make sure encode scratch space does not overlap the BVH. */ |
| offset = MAX2(offset, encode_scratch_end); |
| |
| state->scratch.ir_offset = offset; |
| offset += ir_leaf_size * leaf_count; |
| |
| state->scratch.internal_node_offset = offset; |
| offset += sizeof(struct vk_ir_box_node) * internal_count; |
| |
| state->scratch.size = offset; |
| |
| if (build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR && |
| device->as_build_ops->update_as[0]) { |
| state->scratch.update_size = device->as_build_ops->get_update_scratch_size(vk_device_to_handle(device), state); |
| } else { |
| state->scratch.update_size = offset; |
| } |
| } |
| |
| struct bvh_state { |
| struct vk_acceleration_structure_build_state vk; |
| |
| uint32_t scratch_offset; |
| |
| uint32_t internal_node_count; |
| |
| /* Radix sort state */ |
| uint32_t scatter_blocks; |
| uint32_t count_ru_scatter; |
| uint32_t histo_blocks; |
| uint32_t count_ru_histo; |
| struct rs_push_scatter push_scatter; |
| |
| uint32_t last_encode_pass; |
| }; |
| |
| struct bvh_batch_state { |
| bool any_updateable; |
| bool any_non_updateable; |
| bool any_ploc; |
| bool any_lbvh; |
| bool any_update; |
| }; |
| |
| struct vk_bvh_build_pipeline_layout_key { |
| enum vk_meta_object_key_type type; |
| uint32_t size; |
| }; |
| |
| struct vk_bvh_build_pipeline_key { |
| enum vk_meta_object_key_type type; |
| uint32_t flags; |
| }; |
| |
| VkResult |
| vk_get_bvh_build_pipeline_layout(struct vk_device *device, struct vk_meta_device *meta, |
| unsigned push_constant_size, VkPipelineLayout *layout) |
| { |
| struct vk_bvh_build_pipeline_layout_key key = { |
| .type = VK_META_OBJECT_KEY_BVH_PIPELINE_LAYOUT, |
| .size = push_constant_size, |
| }; |
| |
| VkPushConstantRange push_constant_range = { |
| .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, |
| .size = push_constant_size, |
| }; |
| |
| return vk_meta_get_pipeline_layout( |
| device, meta, NULL, &push_constant_range, &key, sizeof(key), layout); |
| } |
| |
| VkResult |
| vk_get_bvh_build_pipeline_spv(struct vk_device *device, struct vk_meta_device *meta, |
| enum vk_meta_object_key_type type, const uint32_t *spv, |
| uint32_t spv_size, unsigned push_constant_size, |
| const struct vk_acceleration_structure_build_args *args, |
| uint32_t flags, VkPipeline *pipeline) |
| { |
| VkPipelineLayout layout; |
| VkResult result = vk_get_bvh_build_pipeline_layout(device, meta, push_constant_size, &layout); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| struct vk_bvh_build_pipeline_key key = { |
| .type = type, |
| .flags = flags, |
| }; |
| |
| VkPipeline pipeline_from_cache = vk_meta_lookup_pipeline(meta, &key, sizeof(key)); |
| if (pipeline_from_cache != VK_NULL_HANDLE) { |
| *pipeline = pipeline_from_cache; |
| return VK_SUCCESS; |
| } |
| |
| VkShaderModuleCreateInfo module_info = { |
| .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, |
| .pNext = NULL, |
| .flags = 0, |
| .codeSize = spv_size, |
| .pCode = spv, |
| }; |
| |
| VkSpecializationMapEntry spec_map[4] = { |
| { |
| .constantID = SUBGROUP_SIZE_ID, |
| .offset = 0, |
| .size = sizeof(args->subgroup_size), |
| }, |
| { |
| .constantID = BVH_BOUNDS_OFFSET_ID, |
| .offset = sizeof(args->subgroup_size), |
| .size = sizeof(args->bvh_bounds_offset), |
| }, |
| { |
| .constantID = BUILD_FLAGS_ID, |
| .offset = sizeof(args->subgroup_size) + sizeof(args->bvh_bounds_offset), |
| .size = sizeof(flags), |
| }, |
| { |
| .constantID = ROOT_FLAGS_OFFSET_ID, |
| .offset = sizeof(args->subgroup_size) + |
| sizeof(args->bvh_bounds_offset), |
| .size = sizeof(args->root_flags_offset), |
| } |
| }; |
| |
| uint32_t spec_constants[4] = { |
| args->subgroup_size, |
| args->bvh_bounds_offset, |
| flags, |
| args->root_flags_offset, |
| }; |
| |
| VkSpecializationInfo spec_info = { |
| .mapEntryCount = ARRAY_SIZE(spec_map), |
| .pMapEntries = spec_map, |
| .dataSize = sizeof(spec_constants), |
| .pData = spec_constants, |
| }; |
| |
| VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT rssci = { |
| .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, |
| .pNext = &module_info, |
| .requiredSubgroupSize = args->subgroup_size, |
| }; |
| |
| VkPipelineShaderStageCreateInfo shader_stage = { |
| .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, |
| .pNext = &rssci, |
| .flags = VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT, |
| .stage = VK_SHADER_STAGE_COMPUTE_BIT, |
| .pName = "main", |
| .pSpecializationInfo = &spec_info, |
| }; |
| |
| VkComputePipelineCreateInfo pipeline_info = { |
| .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, |
| .stage = shader_stage, |
| .flags = 0, |
| .layout = layout, |
| }; |
| |
| return vk_meta_create_compute_pipeline(device, meta, &pipeline_info, |
| &key, sizeof(key), pipeline); |
| } |
| |
| static uint32_t |
| pack_geometry_id_and_flags(uint32_t geometry_id, uint32_t flags) |
| { |
| uint32_t geometry_id_and_flags = geometry_id; |
| if (flags & VK_GEOMETRY_OPAQUE_BIT_KHR) |
| geometry_id_and_flags |= VK_GEOMETRY_OPAQUE; |
| |
| return geometry_id_and_flags; |
| } |
| |
| struct vk_bvh_geometry_data |
| vk_fill_geometry_data(VkAccelerationStructureTypeKHR type, uint32_t first_id, uint32_t geom_index, |
| const VkAccelerationStructureGeometryKHR *geometry, |
| const VkAccelerationStructureBuildRangeInfoKHR *build_range_info) |
| { |
| struct vk_bvh_geometry_data data = { |
| .first_id = first_id, |
| .geometry_id = pack_geometry_id_and_flags(geom_index, geometry->flags), |
| .geometry_type = geometry->geometryType, |
| }; |
| |
| switch (geometry->geometryType) { |
| case VK_GEOMETRY_TYPE_TRIANGLES_KHR: |
| assert(type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR); |
| |
| data.data = geometry->geometry.triangles.vertexData.deviceAddress + |
| build_range_info->firstVertex * geometry->geometry.triangles.vertexStride; |
| data.indices = geometry->geometry.triangles.indexData.deviceAddress; |
| |
| if (geometry->geometry.triangles.indexType == VK_INDEX_TYPE_NONE_KHR) |
| data.data += build_range_info->primitiveOffset; |
| else |
| data.indices += build_range_info->primitiveOffset; |
| |
| data.transform = geometry->geometry.triangles.transformData.deviceAddress; |
| if (data.transform) |
| data.transform += build_range_info->transformOffset; |
| |
| data.stride = geometry->geometry.triangles.vertexStride; |
| data.vertex_format = geometry->geometry.triangles.vertexFormat; |
| data.index_format = geometry->geometry.triangles.indexType; |
| break; |
| case VK_GEOMETRY_TYPE_AABBS_KHR: |
| assert(type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR); |
| |
| data.data = geometry->geometry.aabbs.data.deviceAddress + build_range_info->primitiveOffset; |
| data.stride = geometry->geometry.aabbs.stride; |
| break; |
| case VK_GEOMETRY_TYPE_INSTANCES_KHR: |
| assert(type == VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR); |
| |
| data.data = geometry->geometry.instances.data.deviceAddress + build_range_info->primitiveOffset; |
| |
| if (geometry->geometry.instances.arrayOfPointers) |
| data.stride = 8; |
| else |
| data.stride = sizeof(VkAccelerationStructureInstanceKHR); |
| break; |
| default: |
| unreachable("Unknown geometryType"); |
| } |
| |
| return data; |
| } |
| |
| void |
| vk_accel_struct_cmd_begin_debug_marker(VkCommandBuffer commandBuffer, |
| enum vk_acceleration_structure_build_step step, |
| const char *format, ...) |
| { |
| VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); |
| struct vk_device *device = cmd_buffer->base.device; |
| |
| va_list ap; |
| va_start(ap, format); |
| |
| char *name; |
| if (vasprintf(&name, format, ap) == -1) { |
| va_end(ap); |
| return; |
| } |
| |
| va_end(ap); |
| |
| VkDebugMarkerMarkerInfoEXT marker = { |
| .sType = VK_STRUCTURE_TYPE_DEBUG_MARKER_MARKER_INFO_EXT, |
| .pMarkerName = name, |
| }; |
| |
| device->dispatch_table.CmdDebugMarkerBeginEXT(commandBuffer, &marker); |
| } |
| |
| void |
| vk_accel_struct_cmd_end_debug_marker(VkCommandBuffer commandBuffer) |
| { |
| VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); |
| struct vk_device *device = cmd_buffer->base.device; |
| |
| device->dispatch_table.CmdDebugMarkerEndEXT(commandBuffer); |
| } |
| |
| static VkResult |
| build_leaves(VkCommandBuffer commandBuffer, |
| struct vk_device *device, struct vk_meta_device *meta, |
| const struct vk_acceleration_structure_build_args *args, |
| uint32_t infoCount, |
| const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, |
| const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos, |
| struct bvh_state *bvh_states, |
| bool updateable) |
| { |
| VkPipeline pipeline; |
| VkPipelineLayout layout; |
| |
| /* Many apps are broken and will make inactive primitives active when |
| * updating, even though this is disallowed by the spec. To handle this, |
| * we use a different variant for updateable acceleration structures when |
| * the driver implements an update pass. This passes through inactive leaf |
| * nodes as if they were active, with an empty bounding box. It's then the |
| * driver or HW's responsibility to filter out inactive nodes. |
| */ |
| const uint32_t *spirv = leaf_spv; |
| size_t spirv_size = sizeof(leaf_spv); |
| |
| if (device->as_build_ops->leaf_spirv_override) { |
| spirv = device->as_build_ops->leaf_spirv_override; |
| spirv_size = device->as_build_ops->leaf_spirv_override_size; |
| } |
| |
| uint32_t flags = 0; |
| if (updateable) |
| flags |= VK_BUILD_FLAG_ALWAYS_ACTIVE; |
| if (args->propagate_cull_flags) |
| flags |= VK_BUILD_FLAG_PROPAGATE_CULL_FLAGS; |
| VkResult result = vk_get_bvh_build_pipeline_spv(device, meta, VK_META_OBJECT_KEY_LEAF, |
| spirv, spirv_size, sizeof(struct leaf_args), |
| args, flags, |
| &pipeline); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| result = vk_get_bvh_build_pipeline_layout(device, meta, sizeof(struct leaf_args), &layout); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| if (args->emit_markers) { |
| device->as_build_ops->begin_debug_marker(commandBuffer, |
| VK_ACCELERATION_STRUCTURE_BUILD_STEP_BUILD_LEAVES, |
| "build_leaves"); |
| } |
| |
| const struct vk_device_dispatch_table *disp = &device->dispatch_table; |
| disp->CmdBindPipeline( |
| commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); |
| |
| for (uint32_t i = 0; i < infoCount; ++i) { |
| if (bvh_states[i].vk.config.internal_type == VK_INTERNAL_BUILD_TYPE_UPDATE) |
| continue; |
| if (bvh_states[i].vk.config.updateable != updateable) |
| continue; |
| |
| struct leaf_args leaf_consts = { |
| .bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].vk.scratch.ir_offset, |
| .header = pInfos[i].scratchData.deviceAddress + bvh_states[i].vk.scratch.header_offset, |
| .ids = pInfos[i].scratchData.deviceAddress + bvh_states[i].vk.scratch.sort_buffer_offset[0], |
| }; |
| |
| for (unsigned j = 0; j < pInfos[i].geometryCount; ++j) { |
| const VkAccelerationStructureGeometryKHR *geom = |
| pInfos[i].pGeometries ? &pInfos[i].pGeometries[j] : pInfos[i].ppGeometries[j]; |
| |
| const VkAccelerationStructureBuildRangeInfoKHR *build_range_info = &ppBuildRangeInfos[i][j]; |
| |
| if (build_range_info->primitiveCount == 0) |
| continue; |
| |
| leaf_consts.geom_data = vk_fill_geometry_data(pInfos[i].type, bvh_states[i].vk.leaf_node_count, j, geom, build_range_info); |
| |
| disp->CmdPushConstants(commandBuffer, layout, |
| VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(leaf_consts), &leaf_consts); |
| device->cmd_dispatch_unaligned(commandBuffer, build_range_info->primitiveCount, 1, 1); |
| |
| bvh_states[i].vk.leaf_node_count += build_range_info->primitiveCount; |
| } |
| } |
| |
| if (args->emit_markers) |
| device->as_build_ops->end_debug_marker(commandBuffer); |
| |
| return VK_SUCCESS; |
| } |
| |
| static VkResult |
| morton_generate(VkCommandBuffer commandBuffer, struct vk_device *device, |
| struct vk_meta_device *meta, |
| const struct vk_acceleration_structure_build_args *args, |
| uint32_t infoCount, |
| const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, |
| struct bvh_state *bvh_states) |
| { |
| VkPipeline pipeline; |
| VkPipelineLayout layout; |
| |
| VkResult result = vk_get_bvh_build_pipeline_spv(device, meta, VK_META_OBJECT_KEY_MORTON, |
| morton_spv, sizeof(morton_spv), |
| sizeof(struct morton_args), args, 0, |
| &pipeline); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| result = vk_get_bvh_build_pipeline_layout(device, meta, sizeof(struct morton_args), &layout); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| if (args->emit_markers) { |
| device->as_build_ops->begin_debug_marker(commandBuffer, |
| VK_ACCELERATION_STRUCTURE_BUILD_STEP_MORTON_GENERATE, |
| "morton_generate"); |
| } |
| |
| const struct vk_device_dispatch_table *disp = &device->dispatch_table; |
| disp->CmdBindPipeline( |
| commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); |
| |
| for (uint32_t i = 0; i < infoCount; ++i) { |
| if (bvh_states[i].vk.config.internal_type == VK_INTERNAL_BUILD_TYPE_UPDATE) |
| continue; |
| const struct morton_args consts = { |
| .bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].vk.scratch.ir_offset, |
| .header = pInfos[i].scratchData.deviceAddress + bvh_states[i].vk.scratch.header_offset, |
| .ids = pInfos[i].scratchData.deviceAddress + bvh_states[i].vk.scratch.sort_buffer_offset[0], |
| }; |
| |
| disp->CmdPushConstants(commandBuffer, layout, |
| VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts); |
| device->cmd_dispatch_unaligned(commandBuffer, bvh_states[i].vk.leaf_node_count, 1, 1); |
| } |
| |
| if (args->emit_markers) |
| device->as_build_ops->end_debug_marker(commandBuffer); |
| |
| return VK_SUCCESS; |
| } |
| |
| static void |
| morton_sort(VkCommandBuffer commandBuffer, struct vk_device *device, |
| const struct vk_acceleration_structure_build_args *args, |
| uint32_t infoCount, |
| const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states) |
| { |
| const struct vk_device_dispatch_table *disp = &device->dispatch_table; |
| |
| if (args->emit_markers) { |
| device->as_build_ops->begin_debug_marker(commandBuffer, |
| VK_ACCELERATION_STRUCTURE_BUILD_STEP_MORTON_SORT, |
| "morton_sort"); |
| } |
| |
| /* Copyright 2019 The Fuchsia Authors. */ |
| const radix_sort_vk_t *rs = args->radix_sort; |
| |
| /* |
| * OVERVIEW |
| * |
| * 1. Pad the keyvals in `scatter_even`. |
| * 2. Zero the `histograms` and `partitions`. |
| * --- BARRIER --- |
| * 3. HISTOGRAM is dispatched before PREFIX. |
| * --- BARRIER --- |
| * 4. PREFIX is dispatched before the first SCATTER. |
| * --- BARRIER --- |
| * 5. One or more SCATTER dispatches. |
| * |
| * Note that the `partitions` buffer can be zeroed anytime before the first |
| * scatter. |
| */ |
| |
| /* How many passes? */ |
| uint32_t keyval_bytes = rs->config.keyval_dwords * (uint32_t)sizeof(uint32_t); |
| uint32_t keyval_bits = keyval_bytes * 8; |
| uint32_t key_bits = MIN2(MORTON_BIT_SIZE, keyval_bits); |
| uint32_t passes = (key_bits + RS_RADIX_LOG2 - 1) / RS_RADIX_LOG2; |
| |
| for (uint32_t i = 0; i < infoCount; ++i) { |
| if (bvh_states[i].vk.leaf_node_count) |
| bvh_states[i].scratch_offset = bvh_states[i].vk.scratch.sort_buffer_offset[passes & 1]; |
| else |
| bvh_states[i].scratch_offset = bvh_states[i].vk.scratch.sort_buffer_offset[0]; |
| } |
| |
| /* |
| * PAD KEYVALS AND ZERO HISTOGRAM/PARTITIONS |
| * |
| * Pad fractional blocks with max-valued keyvals. |
| * |
| * Zero the histograms and partitions buffer. |
| * |
| * This assumes the partitions follow the histograms. |
| */ |
| |
| /* FIXME(allanmac): Consider precomputing some of these values and hang them off `rs`. */ |
| |
| /* How many scatter blocks? */ |
| uint32_t scatter_wg_size = 1 << rs->config.scatter.workgroup_size_log2; |
| uint32_t scatter_block_kvs = scatter_wg_size * rs->config.scatter.block_rows; |
| |
| /* |
| * How many histogram blocks? |
| * |
| * Note that it's OK to have more max-valued digits counted by the histogram |
| * than sorted by the scatters because the sort is stable. |
| */ |
| uint32_t histo_wg_size = 1 << rs->config.histogram.workgroup_size_log2; |
| uint32_t histo_block_kvs = histo_wg_size * rs->config.histogram.block_rows; |
| |
| uint32_t pass_idx = (keyval_bytes - passes); |
| |
| for (uint32_t i = 0; i < infoCount; ++i) { |
| if (!bvh_states[i].vk.leaf_node_count) |
| continue; |
| if (bvh_states[i].vk.config.internal_type == VK_INTERNAL_BUILD_TYPE_UPDATE) |
| continue; |
| |
| uint64_t keyvals_even_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].vk.scratch.sort_buffer_offset[0]; |
| uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].vk.scratch.sort_internal_offset; |
| |
| bvh_states[i].scatter_blocks = (bvh_states[i].vk.leaf_node_count + scatter_block_kvs - 1) / scatter_block_kvs; |
| bvh_states[i].count_ru_scatter = bvh_states[i].scatter_blocks * scatter_block_kvs; |
| |
| bvh_states[i].histo_blocks = (bvh_states[i].count_ru_scatter + histo_block_kvs - 1) / histo_block_kvs; |
| bvh_states[i].count_ru_histo = bvh_states[i].histo_blocks * histo_block_kvs; |
| |
| /* Fill with max values */ |
| if (bvh_states[i].count_ru_histo > bvh_states[i].vk.leaf_node_count) { |
| device->cmd_fill_buffer_addr(commandBuffer, keyvals_even_addr + |
| bvh_states[i].vk.leaf_node_count * keyval_bytes, |
| (bvh_states[i].count_ru_histo - bvh_states[i].vk.leaf_node_count) * keyval_bytes, |
| 0xFFFFFFFF); |
| } |
| |
| /* |
| * Zero histograms and invalidate partitions. |
| * |
| * Note that the partition invalidation only needs to be performed once |
| * because the even/odd scatter dispatches rely on the the previous pass to |
| * leave the partitions in an invalid state. |
| * |
| * Note that the last workgroup doesn't read/write a partition so it doesn't |
| * need to be initialized. |
| */ |
| uint32_t histo_partition_count = passes + bvh_states[i].scatter_blocks - 1; |
| |
| uint32_t fill_base = pass_idx * (RS_RADIX_SIZE * sizeof(uint32_t)); |
| |
| device->cmd_fill_buffer_addr(commandBuffer, |
| internal_addr + rs->internal.histograms.offset + fill_base, |
| histo_partition_count * (RS_RADIX_SIZE * sizeof(uint32_t)) + keyval_bytes * sizeof(uint32_t), 0); |
| } |
| |
| /* |
| * Pipeline: HISTOGRAM |
| * |
| * TODO(allanmac): All subgroups should try to process approximately the same |
| * number of blocks in order to minimize tail effects. This was implemented |
| * and reverted but should be reimplemented and benchmarked later. |
| */ |
| vk_barrier_transfer_w_to_compute_r(commandBuffer); |
| |
| disp->CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, |
| rs->pipelines.named.histogram); |
| |
| for (uint32_t i = 0; i < infoCount; ++i) { |
| if (!bvh_states[i].vk.leaf_node_count) |
| continue; |
| if (bvh_states[i].vk.config.internal_type == VK_INTERNAL_BUILD_TYPE_UPDATE) |
| continue; |
| |
| uint64_t keyvals_even_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].vk.scratch.sort_buffer_offset[0]; |
| uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].vk.scratch.sort_internal_offset; |
| |
| /* Dispatch histogram */ |
| struct rs_push_histogram push_histogram = { |
| .devaddr_histograms = internal_addr + rs->internal.histograms.offset, |
| .devaddr_keyvals = keyvals_even_addr, |
| .passes = passes, |
| }; |
| |
| disp->CmdPushConstants(commandBuffer, rs->pipeline_layouts.named.histogram, VK_SHADER_STAGE_COMPUTE_BIT, 0, |
| sizeof(push_histogram), &push_histogram); |
| |
| disp->CmdDispatch(commandBuffer, bvh_states[i].histo_blocks, 1, 1); |
| } |
| |
| /* |
| * Pipeline: PREFIX |
| * |
| * Launch one workgroup per pass. |
| */ |
| vk_barrier_compute_w_to_compute_r(commandBuffer); |
| |
| disp->CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, |
| rs->pipelines.named.prefix); |
| |
| for (uint32_t i = 0; i < infoCount; ++i) { |
| if (!bvh_states[i].vk.leaf_node_count) |
| continue; |
| if (bvh_states[i].vk.config.internal_type == VK_INTERNAL_BUILD_TYPE_UPDATE) |
| continue; |
| |
| uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].vk.scratch.sort_internal_offset; |
| |
| struct rs_push_prefix push_prefix = { |
| .devaddr_histograms = internal_addr + rs->internal.histograms.offset, |
| }; |
| |
| disp->CmdPushConstants(commandBuffer, rs->pipeline_layouts.named.prefix, VK_SHADER_STAGE_COMPUTE_BIT, 0, |
| sizeof(push_prefix), &push_prefix); |
| |
| disp->CmdDispatch(commandBuffer, passes, 1, 1); |
| } |
| |
| /* Pipeline: SCATTER */ |
| vk_barrier_compute_w_to_compute_r(commandBuffer); |
| |
| uint32_t histogram_offset = pass_idx * (RS_RADIX_SIZE * sizeof(uint32_t)); |
| |
| for (uint32_t i = 0; i < infoCount; i++) { |
| uint64_t keyvals_even_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].vk.scratch.sort_buffer_offset[0]; |
| uint64_t keyvals_odd_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].vk.scratch.sort_buffer_offset[1]; |
| uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].vk.scratch.sort_internal_offset; |
| |
| bvh_states[i].push_scatter = (struct rs_push_scatter){ |
| .devaddr_keyvals_even = keyvals_even_addr, |
| .devaddr_keyvals_odd = keyvals_odd_addr, |
| .devaddr_partitions = internal_addr + rs->internal.partitions.offset, |
| .devaddr_histograms = internal_addr + rs->internal.histograms.offset + histogram_offset, |
| }; |
| } |
| |
| bool is_even = true; |
| |
| while (true) { |
| uint32_t pass_dword = pass_idx / 4; |
| |
| /* Bind new pipeline */ |
| VkPipeline p = |
| is_even ? rs->pipelines.named.scatter[pass_dword].even : rs->pipelines.named.scatter[pass_dword].odd; |
| disp->CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, p); |
| |
| /* Update push constants that changed */ |
| VkPipelineLayout pl = is_even ? rs->pipeline_layouts.named.scatter[pass_dword].even |
| : rs->pipeline_layouts.named.scatter[pass_dword].odd; |
| |
| for (uint32_t i = 0; i < infoCount; i++) { |
| if (!bvh_states[i].vk.leaf_node_count) |
| continue; |
| if (bvh_states[i].vk.config.internal_type == VK_INTERNAL_BUILD_TYPE_UPDATE) |
| continue; |
| |
| bvh_states[i].push_scatter.pass_offset = (pass_idx & 3) * RS_RADIX_LOG2; |
| |
| disp->CmdPushConstants(commandBuffer, pl, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(struct rs_push_scatter), |
| &bvh_states[i].push_scatter); |
| |
| disp->CmdDispatch(commandBuffer, bvh_states[i].scatter_blocks, 1, 1); |
| |
| bvh_states[i].push_scatter.devaddr_histograms += (RS_RADIX_SIZE * sizeof(uint32_t)); |
| } |
| |
| /* Continue? */ |
| if (++pass_idx >= keyval_bytes) |
| break; |
| |
| vk_barrier_compute_w_to_compute_r(commandBuffer); |
| |
| is_even ^= true; |
| } |
| |
| if (args->emit_markers) |
| device->as_build_ops->end_debug_marker(commandBuffer); |
| } |
| |
| static VkResult |
| lbvh_build_internal(VkCommandBuffer commandBuffer, |
| struct vk_device *device, struct vk_meta_device *meta, |
| const struct vk_acceleration_structure_build_args *args, |
| uint32_t infoCount, |
| const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states) |
| { |
| VkPipeline pipeline; |
| VkPipelineLayout layout; |
| |
| uint32_t flags = 0; |
| if (args->propagate_cull_flags) |
| flags |= VK_BUILD_FLAG_PROPAGATE_CULL_FLAGS; |
| |
| VkResult result = vk_get_bvh_build_pipeline_spv(device, meta, VK_META_OBJECT_KEY_LBVH_MAIN, |
| lbvh_main_spv, sizeof(lbvh_main_spv), |
| sizeof(struct lbvh_main_args), args, flags, |
| &pipeline); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| result = vk_get_bvh_build_pipeline_layout(device, meta, sizeof(struct lbvh_main_args), &layout); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| if (args->emit_markers) { |
| device->as_build_ops->begin_debug_marker(commandBuffer, |
| VK_ACCELERATION_STRUCTURE_BUILD_STEP_LBVH_BUILD_INTERNAL, |
| "lbvh_build_internal"); |
| } |
| |
| const struct vk_device_dispatch_table *disp = &device->dispatch_table; |
| disp->CmdBindPipeline( |
| commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); |
| |
| for (uint32_t i = 0; i < infoCount; ++i) { |
| if (bvh_states[i].vk.config.internal_type != VK_INTERNAL_BUILD_TYPE_LBVH) |
| continue; |
| |
| uint32_t src_scratch_offset = bvh_states[i].scratch_offset; |
| uint32_t internal_node_count = MAX2(bvh_states[i].vk.leaf_node_count, 2) - 1; |
| |
| const struct lbvh_main_args consts = { |
| .bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].vk.scratch.ir_offset, |
| .src_ids = pInfos[i].scratchData.deviceAddress + src_scratch_offset, |
| .node_info = pInfos[i].scratchData.deviceAddress + bvh_states[i].vk.scratch.lbvh_node_offset, |
| .id_count = bvh_states[i].vk.leaf_node_count, |
| .internal_node_base = bvh_states[i].vk.scratch.internal_node_offset - bvh_states[i].vk.scratch.ir_offset, |
| }; |
| |
| disp->CmdPushConstants(commandBuffer, layout, |
| VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts); |
| device->cmd_dispatch_unaligned(commandBuffer, internal_node_count, 1, 1); |
| bvh_states[i].internal_node_count = internal_node_count; |
| } |
| |
| vk_barrier_compute_w_to_compute_r(commandBuffer); |
| |
| result = vk_get_bvh_build_pipeline_spv(device, meta, VK_META_OBJECT_KEY_LBVH_GENERATE_IR, |
| lbvh_generate_ir_spv, sizeof(lbvh_generate_ir_spv), |
| sizeof(struct lbvh_generate_ir_args), args, flags, |
| &pipeline); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| result = vk_get_bvh_build_pipeline_layout(device, meta, sizeof(struct lbvh_generate_ir_args), &layout); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| disp->CmdBindPipeline( |
| commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); |
| |
| for (uint32_t i = 0; i < infoCount; ++i) { |
| if (bvh_states[i].vk.config.internal_type != VK_INTERNAL_BUILD_TYPE_LBVH) |
| continue; |
| |
| const struct lbvh_generate_ir_args consts = { |
| .bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].vk.scratch.ir_offset, |
| .node_info = pInfos[i].scratchData.deviceAddress + bvh_states[i].vk.scratch.lbvh_node_offset, |
| .header = pInfos[i].scratchData.deviceAddress + bvh_states[i].vk.scratch.header_offset, |
| .internal_node_base = bvh_states[i].vk.scratch.internal_node_offset - bvh_states[i].vk.scratch.ir_offset, |
| }; |
| |
| disp->CmdPushConstants(commandBuffer, layout, |
| VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts); |
| device->cmd_dispatch_unaligned(commandBuffer, bvh_states[i].internal_node_count, 1, 1); |
| } |
| |
| if (args->emit_markers) |
| device->as_build_ops->end_debug_marker(commandBuffer); |
| |
| return VK_SUCCESS; |
| } |
| |
| static VkResult |
| ploc_build_internal(VkCommandBuffer commandBuffer, |
| struct vk_device *device, struct vk_meta_device *meta, |
| const struct vk_acceleration_structure_build_args *args, |
| uint32_t infoCount, |
| const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states) |
| { |
| VkPipeline pipeline; |
| VkPipelineLayout layout; |
| |
| uint32_t flags = 0; |
| if (args->propagate_cull_flags) |
| flags |= VK_BUILD_FLAG_PROPAGATE_CULL_FLAGS; |
| |
| VkResult result = vk_get_bvh_build_pipeline_spv(device, meta, VK_META_OBJECT_KEY_PLOC, ploc_spv, |
| sizeof(ploc_spv), sizeof(struct ploc_args), |
| args, flags, &pipeline); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| result = vk_get_bvh_build_pipeline_layout(device, meta, sizeof(struct ploc_args), &layout); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| if (args->emit_markers) { |
| device->as_build_ops->begin_debug_marker(commandBuffer, |
| VK_ACCELERATION_STRUCTURE_BUILD_STEP_PLOC_BUILD_INTERNAL, |
| "ploc_build_internal"); |
| } |
| |
| const struct vk_device_dispatch_table *disp = &device->dispatch_table; |
| disp->CmdBindPipeline( |
| commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); |
| |
| for (uint32_t i = 0; i < infoCount; ++i) { |
| if (bvh_states[i].vk.config.internal_type != VK_INTERNAL_BUILD_TYPE_PLOC) |
| continue; |
| |
| uint32_t src_scratch_offset = bvh_states[i].scratch_offset; |
| uint32_t dst_scratch_offset = (src_scratch_offset == bvh_states[i].vk.scratch.sort_buffer_offset[0]) |
| ? bvh_states[i].vk.scratch.sort_buffer_offset[1] |
| : bvh_states[i].vk.scratch.sort_buffer_offset[0]; |
| |
| const struct ploc_args consts = { |
| .bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].vk.scratch.ir_offset, |
| .header = pInfos[i].scratchData.deviceAddress + bvh_states[i].vk.scratch.header_offset, |
| .ids_0 = pInfos[i].scratchData.deviceAddress + src_scratch_offset, |
| .ids_1 = pInfos[i].scratchData.deviceAddress + dst_scratch_offset, |
| .prefix_scan_partitions = |
| pInfos[i].scratchData.deviceAddress + bvh_states[i].vk.scratch.ploc_prefix_sum_partition_offset, |
| .internal_node_offset = bvh_states[i].vk.scratch.internal_node_offset - bvh_states[i].vk.scratch.ir_offset, |
| }; |
| |
| disp->CmdPushConstants(commandBuffer, layout, |
| VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts); |
| disp->CmdDispatch(commandBuffer, MAX2(DIV_ROUND_UP(bvh_states[i].vk.leaf_node_count, PLOC_WORKGROUP_SIZE), 1), 1, 1); |
| } |
| |
| if (args->emit_markers) |
| device->as_build_ops->end_debug_marker(commandBuffer); |
| |
| return VK_SUCCESS; |
| } |
| |
| void |
| vk_cmd_build_acceleration_structures(VkCommandBuffer commandBuffer, |
| struct vk_device *device, |
| struct vk_meta_device *meta, |
| uint32_t infoCount, |
| const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, |
| const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos, |
| const struct vk_acceleration_structure_build_args *args) |
| { |
| VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); |
| const struct vk_acceleration_structure_build_ops *ops = device->as_build_ops; |
| |
| struct bvh_batch_state batch_state = {0}; |
| |
| struct bvh_state *bvh_states = calloc(infoCount, sizeof(struct bvh_state)); |
| |
| if (args->emit_markers) { |
| uint32_t num_of_blas = 0; |
| uint32_t num_of_tlas = 0; |
| for (uint32_t i = 0; i < infoCount; ++i) { |
| switch (pInfos[i].type) { |
| case VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR: |
| num_of_tlas++; |
| break; |
| case VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR: |
| num_of_blas++; |
| break; |
| default: |
| break; |
| } |
| } |
| ops->begin_debug_marker(commandBuffer, |
| VK_ACCELERATION_STRUCTURE_BUILD_STEP_TOP, |
| "vkCmdBuildAccelerationStructuresKHR() TLAS(%u) BLAS(%u)", |
| num_of_tlas, num_of_blas); |
| } |
| |
| for (uint32_t i = 0; i < infoCount; ++i) { |
| uint32_t leaf_node_count = 0; |
| for (uint32_t j = 0; j < pInfos[i].geometryCount; ++j) { |
| leaf_node_count += ppBuildRangeInfos[i][j].primitiveCount; |
| } |
| |
| vk_acceleration_structure_build_state_init(&bvh_states[i].vk, cmd_buffer->base.device, leaf_node_count, |
| pInfos + i, args); |
| |
| bvh_states[i].vk.build_range_infos = ppBuildRangeInfos[i]; |
| /* The leaf node dispatch code uses leaf_node_count as a base index. */ |
| bvh_states[i].vk.leaf_node_count = 0; |
| |
| if (bvh_states[i].vk.config.updateable) |
| batch_state.any_updateable = true; |
| else |
| batch_state.any_non_updateable = true; |
| |
| if (bvh_states[i].vk.config.internal_type == VK_INTERNAL_BUILD_TYPE_PLOC) { |
| batch_state.any_ploc = true; |
| } else if (bvh_states[i].vk.config.internal_type == VK_INTERNAL_BUILD_TYPE_LBVH) { |
| batch_state.any_lbvh = true; |
| } else if (bvh_states[i].vk.config.internal_type == VK_INTERNAL_BUILD_TYPE_UPDATE) { |
| batch_state.any_update = true; |
| /* For updates, the leaf node pass never runs, so set leaf_node_count here. */ |
| bvh_states[i].vk.leaf_node_count = leaf_node_count; |
| } else { |
| unreachable("Unknown internal_build_type"); |
| } |
| |
| if (bvh_states[i].vk.config.internal_type != VK_INTERNAL_BUILD_TYPE_UPDATE) { |
| /* The internal node count is updated in lbvh_build_internal for LBVH |
| * and from the PLOC shader for PLOC. */ |
| struct vk_ir_header header = { |
| .min_bounds = {0x7fffffff, 0x7fffffff, 0x7fffffff}, |
| .max_bounds = {0x80000000, 0x80000000, 0x80000000}, |
| .dispatch_size_y = 1, |
| .dispatch_size_z = 1, |
| .sync_data = |
| { |
| .current_phase_end_counter = TASK_INDEX_INVALID, |
| /* Will be updated by the first PLOC shader invocation */ |
| .task_counts = {TASK_INDEX_INVALID, TASK_INDEX_INVALID}, |
| }, |
| }; |
| |
| device->write_buffer_cp(commandBuffer, pInfos[i].scratchData.deviceAddress + bvh_states[i].vk.scratch.header_offset, |
| &header, sizeof(header)); |
| } else { |
| ops->init_update_scratch(commandBuffer, &bvh_states[i].vk); |
| } |
| } |
| |
| /* Wait for the write_buffer_cp to land before using in compute shaders */ |
| device->flush_buffer_write_cp(commandBuffer); |
| device->dispatch_table.CmdPipelineBarrier(commandBuffer, |
| VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, |
| VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, |
| 0, /* dependencyFlags */ |
| 1, |
| &(VkMemoryBarrier) { |
| .srcAccessMask = 0, |
| .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, |
| }, 0, NULL, 0, NULL); |
| |
| if (batch_state.any_lbvh || batch_state.any_ploc) { |
| VkResult result; |
| |
| if (batch_state.any_non_updateable) { |
| result = |
| build_leaves(commandBuffer, device, meta, args, infoCount, pInfos, |
| ppBuildRangeInfos, bvh_states, false); |
| |
| if (result != VK_SUCCESS) { |
| free(bvh_states); |
| vk_command_buffer_set_error(cmd_buffer, result); |
| return; |
| } |
| } |
| |
| if (batch_state.any_updateable) { |
| result = |
| build_leaves(commandBuffer, device, meta, args, infoCount, pInfos, |
| ppBuildRangeInfos, bvh_states, true); |
| |
| if (result != VK_SUCCESS) { |
| free(bvh_states); |
| vk_command_buffer_set_error(cmd_buffer, result); |
| return; |
| } |
| } |
| |
| vk_barrier_compute_w_to_compute_r(commandBuffer); |
| |
| result = |
| morton_generate(commandBuffer, device, meta, args, infoCount, pInfos, bvh_states); |
| |
| if (result != VK_SUCCESS) { |
| free(bvh_states); |
| vk_command_buffer_set_error(cmd_buffer, result); |
| return; |
| } |
| |
| vk_barrier_compute_w_to_compute_r(commandBuffer); |
| |
| morton_sort(commandBuffer, device, args, infoCount, pInfos, bvh_states); |
| |
| vk_barrier_compute_w_to_compute_r(commandBuffer); |
| |
| if (batch_state.any_lbvh) { |
| result = |
| lbvh_build_internal(commandBuffer, device, meta, args, infoCount, pInfos, bvh_states); |
| |
| if (result != VK_SUCCESS) { |
| free(bvh_states); |
| vk_command_buffer_set_error(cmd_buffer, result); |
| return; |
| } |
| } |
| |
| if (batch_state.any_ploc) { |
| result = |
| ploc_build_internal(commandBuffer, device, meta, args, infoCount, pInfos, bvh_states); |
| |
| if (result != VK_SUCCESS) { |
| vk_command_buffer_set_error(cmd_buffer, result); |
| return; |
| } |
| } |
| |
| vk_barrier_compute_w_to_compute_r(commandBuffer); |
| vk_barrier_compute_w_to_indirect_compute_r(commandBuffer); |
| } |
| |
| /* Calculate number of leaves and internal nodes to encode */ |
| uint32_t num_leaves = 0; |
| uint32_t num_internal_node = 0; |
| for ( uint32_t i = 0; i < infoCount; i++) { |
| num_leaves += bvh_states[i].vk.leaf_node_count; |
| num_internal_node += bvh_states[i].internal_node_count; |
| } |
| |
| if (args->emit_markers) |
| device->as_build_ops->begin_debug_marker(commandBuffer, |
| VK_ACCELERATION_STRUCTURE_BUILD_STEP_ENCODE, |
| "encode_leaves=%u encode_ir_node=%u", |
| num_leaves, num_internal_node); |
| |
| for (unsigned pass = 0; pass < ARRAY_SIZE(ops->encode_as); pass++) { |
| if (!ops->encode_as[pass] && !ops->update_as[pass]) |
| break; |
| |
| bool progress; |
| do { |
| progress = false; |
| |
| bool update; |
| uint32_t encode_key = 0; |
| uint32_t update_key = 0; |
| for (uint32_t i = 0; i < infoCount; ++i) { |
| if (bvh_states[i].last_encode_pass == pass + 1) |
| continue; |
| |
| if (!progress) { |
| update = (bvh_states[i].vk.config.internal_type == |
| VK_INTERNAL_BUILD_TYPE_UPDATE); |
| if (update && !ops->update_as[pass]) |
| continue; |
| if (!update && !ops->encode_as[pass]) |
| continue; |
| encode_key = bvh_states[i].vk.config.encode_key[pass]; |
| update_key = bvh_states[i].vk.config.update_key[pass]; |
| progress = true; |
| if (update) |
| ops->update_bind_pipeline[pass](commandBuffer, &bvh_states[i].vk); |
| else |
| ops->encode_bind_pipeline[pass](commandBuffer, &bvh_states[i].vk); |
| } else { |
| if (update != (bvh_states[i].vk.config.internal_type == |
| VK_INTERNAL_BUILD_TYPE_UPDATE) || |
| encode_key != bvh_states[i].vk.config.encode_key[pass] || |
| update_key != bvh_states[i].vk.config.update_key[pass]) |
| continue; |
| } |
| |
| if (update) |
| ops->update_as[pass](commandBuffer, &bvh_states[i].vk); |
| else |
| ops->encode_as[pass](commandBuffer, &bvh_states[i].vk); |
| |
| bvh_states[i].last_encode_pass = pass + 1; |
| } |
| } while (progress); |
| } |
| |
| if (args->emit_markers) |
| device->as_build_ops->end_debug_marker(commandBuffer); |
| |
| if (args->emit_markers) |
| device->as_build_ops->end_debug_marker(commandBuffer); |
| |
| free(bvh_states); |
| } |
| |
| void |
| vk_get_as_build_sizes(VkDevice _device, VkAccelerationStructureBuildTypeKHR buildType, |
| const VkAccelerationStructureBuildGeometryInfoKHR *pBuildInfo, |
| const uint32_t *pMaxPrimitiveCounts, |
| VkAccelerationStructureBuildSizesInfoKHR *pSizeInfo, |
| const struct vk_acceleration_structure_build_args *args) |
| { |
| VK_FROM_HANDLE(vk_device, device, _device); |
| |
| uint32_t leaf_count = 0; |
| for (uint32_t i = 0; i < pBuildInfo->geometryCount; i++) |
| leaf_count += pMaxPrimitiveCounts[i]; |
| |
| struct vk_acceleration_structure_build_state state = { 0 }; |
| vk_acceleration_structure_build_state_init(&state, device, leaf_count, pBuildInfo, args); |
| |
| pSizeInfo->accelerationStructureSize = device->as_build_ops->get_as_size(_device, &state); |
| pSizeInfo->updateScratchSize = state.scratch.update_size; |
| pSizeInfo->buildScratchSize = state.scratch.size; |
| } |
| |
| /* Return true if the common framework supports using this format for loading |
| * vertices. Must match the formats handled by load_vertices() on the GPU. |
| */ |
| bool |
| vk_acceleration_struct_vtx_format_supported(VkFormat format) |
| { |
| switch (format) { |
| case VK_FORMAT_R32G32_SFLOAT: |
| case VK_FORMAT_R32G32B32_SFLOAT: |
| case VK_FORMAT_R32G32B32A32_SFLOAT: |
| case VK_FORMAT_R16G16_SFLOAT: |
| case VK_FORMAT_R16G16B16_SFLOAT: |
| case VK_FORMAT_R16G16B16A16_SFLOAT: |
| case VK_FORMAT_R16G16_SNORM: |
| case VK_FORMAT_R16G16_UNORM: |
| case VK_FORMAT_R16G16B16A16_SNORM: |
| case VK_FORMAT_R16G16B16A16_UNORM: |
| case VK_FORMAT_R8G8_SNORM: |
| case VK_FORMAT_R8G8_UNORM: |
| case VK_FORMAT_R8G8B8A8_SNORM: |
| case VK_FORMAT_R8G8B8A8_UNORM: |
| case VK_FORMAT_A2B10G10R10_UNORM_PACK32: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| /* Stubs of optional functions for drivers that don't implment them. */ |
| |
| VKAPI_ATTR void VKAPI_CALL |
| vk_common_CmdBuildAccelerationStructuresIndirectKHR(VkCommandBuffer commandBuffer, |
| uint32_t infoCount, |
| const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, |
| const VkDeviceAddress *pIndirectDeviceAddresses, |
| const uint32_t *pIndirectStrides, |
| const uint32_t *const *ppMaxPrimitiveCounts) |
| { |
| unreachable("Unimplemented"); |
| } |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| vk_common_WriteAccelerationStructuresPropertiesKHR(VkDevice _device, uint32_t accelerationStructureCount, |
| const VkAccelerationStructureKHR *pAccelerationStructures, |
| VkQueryType queryType, |
| size_t dataSize, |
| void *pData, |
| size_t stride) |
| { |
| VK_FROM_HANDLE(vk_device, device, _device); |
| unreachable("Unimplemented"); |
| return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT); |
| } |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| vk_common_BuildAccelerationStructuresKHR(VkDevice _device, |
| VkDeferredOperationKHR deferredOperation, |
| uint32_t infoCount, |
| const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, |
| const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos) |
| { |
| VK_FROM_HANDLE(vk_device, device, _device); |
| unreachable("Unimplemented"); |
| return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT); |
| } |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| vk_common_CopyAccelerationStructureKHR(VkDevice _device, |
| VkDeferredOperationKHR deferredOperation, |
| const VkCopyAccelerationStructureInfoKHR *pInfo) |
| { |
| VK_FROM_HANDLE(vk_device, device, _device); |
| unreachable("Unimplemented"); |
| return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT); |
| } |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| vk_common_CopyMemoryToAccelerationStructureKHR(VkDevice _device, |
| VkDeferredOperationKHR deferredOperation, |
| const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo) |
| { |
| VK_FROM_HANDLE(vk_device, device, _device); |
| unreachable("Unimplemented"); |
| return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT); |
| } |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| vk_common_CopyAccelerationStructureToMemoryKHR(VkDevice _device, |
| VkDeferredOperationKHR deferredOperation, |
| const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo) |
| { |
| VK_FROM_HANDLE(vk_device, device, _device); |
| unreachable("Unimplemented"); |
| return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT); |
| } |