| /* |
| * Copyright © 2021 Google |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| */ |
| |
| #include "radv_acceleration_structure.h" |
| #include "radv_debug.h" |
| #include "radv_meta.h" |
| #include "radv_private.h" |
| #include "radv_rt_common.h" |
| #include "radv_shader.h" |
| |
| #include "nir/nir.h" |
| #include "nir/nir_builder.h" |
| #include "nir/nir_builtin_builder.h" |
| |
| /* Traversal stack size. This stack is put in LDS and experimentally 16 entries results in best |
| * performance. */ |
| #define MAX_STACK_ENTRY_COUNT 16 |
| |
| static VkRayTracingPipelineCreateInfoKHR |
| radv_create_merged_rt_create_info(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo) |
| { |
| VkRayTracingPipelineCreateInfoKHR local_create_info = *pCreateInfo; |
| uint32_t total_stages = pCreateInfo->stageCount; |
| uint32_t total_groups = pCreateInfo->groupCount; |
| |
| if (pCreateInfo->pLibraryInfo) { |
| for (unsigned i = 0; i < pCreateInfo->pLibraryInfo->libraryCount; ++i) { |
| RADV_FROM_HANDLE(radv_pipeline, pipeline, pCreateInfo->pLibraryInfo->pLibraries[i]); |
| struct radv_library_pipeline *library_pipeline = radv_pipeline_to_library(pipeline); |
| |
| total_stages += library_pipeline->stage_count; |
| total_groups += library_pipeline->group_count; |
| } |
| } |
| VkPipelineShaderStageCreateInfo *stages = NULL; |
| VkRayTracingShaderGroupCreateInfoKHR *groups = NULL; |
| local_create_info.stageCount = total_stages; |
| local_create_info.groupCount = total_groups; |
| local_create_info.pStages = stages = |
| malloc(sizeof(VkPipelineShaderStageCreateInfo) * total_stages); |
| local_create_info.pGroups = groups = |
| malloc(sizeof(VkRayTracingShaderGroupCreateInfoKHR) * total_groups); |
| if (!local_create_info.pStages || !local_create_info.pGroups) |
| return local_create_info; |
| |
| total_stages = pCreateInfo->stageCount; |
| total_groups = pCreateInfo->groupCount; |
| for (unsigned j = 0; j < pCreateInfo->stageCount; ++j) |
| stages[j] = pCreateInfo->pStages[j]; |
| for (unsigned j = 0; j < pCreateInfo->groupCount; ++j) |
| groups[j] = pCreateInfo->pGroups[j]; |
| |
| if (pCreateInfo->pLibraryInfo) { |
| for (unsigned i = 0; i < pCreateInfo->pLibraryInfo->libraryCount; ++i) { |
| RADV_FROM_HANDLE(radv_pipeline, pipeline, pCreateInfo->pLibraryInfo->pLibraries[i]); |
| struct radv_library_pipeline *library_pipeline = radv_pipeline_to_library(pipeline); |
| |
| for (unsigned j = 0; j < library_pipeline->stage_count; ++j) |
| stages[total_stages + j] = library_pipeline->stages[j]; |
| for (unsigned j = 0; j < library_pipeline->group_count; ++j) { |
| VkRayTracingShaderGroupCreateInfoKHR *dst = &groups[total_groups + j]; |
| *dst = library_pipeline->groups[j]; |
| if (dst->generalShader != VK_SHADER_UNUSED_KHR) |
| dst->generalShader += total_stages; |
| if (dst->closestHitShader != VK_SHADER_UNUSED_KHR) |
| dst->closestHitShader += total_stages; |
| if (dst->anyHitShader != VK_SHADER_UNUSED_KHR) |
| dst->anyHitShader += total_stages; |
| if (dst->intersectionShader != VK_SHADER_UNUSED_KHR) |
| dst->intersectionShader += total_stages; |
| } |
| total_stages += library_pipeline->stage_count; |
| total_groups += library_pipeline->group_count; |
| } |
| } |
| return local_create_info; |
| } |
| |
| static VkResult |
| radv_rt_pipeline_library_create(VkDevice _device, VkPipelineCache _cache, |
| const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, |
| const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline) |
| { |
| RADV_FROM_HANDLE(radv_device, device, _device); |
| struct radv_library_pipeline *pipeline; |
| |
| pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, |
| VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); |
| if (pipeline == NULL) |
| return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| radv_pipeline_init(device, &pipeline->base, RADV_PIPELINE_LIBRARY); |
| |
| VkRayTracingPipelineCreateInfoKHR local_create_info = |
| radv_create_merged_rt_create_info(pCreateInfo); |
| if (!local_create_info.pStages || !local_create_info.pGroups) |
| goto fail; |
| |
| if (local_create_info.stageCount) { |
| pipeline->stage_count = local_create_info.stageCount; |
| |
| size_t size = sizeof(VkPipelineShaderStageCreateInfo) * local_create_info.stageCount; |
| pipeline->stages = malloc(size); |
| if (!pipeline->stages) |
| goto fail; |
| |
| memcpy(pipeline->stages, local_create_info.pStages, size); |
| |
| pipeline->hashes = malloc(sizeof(*pipeline->hashes) * local_create_info.stageCount); |
| if (!pipeline->hashes) |
| goto fail; |
| |
| pipeline->identifiers = malloc(sizeof(*pipeline->identifiers) * local_create_info.stageCount); |
| if (!pipeline->identifiers) |
| goto fail; |
| |
| for (uint32_t i = 0; i < local_create_info.stageCount; i++) { |
| RADV_FROM_HANDLE(vk_shader_module, module, pipeline->stages[i].module); |
| |
| const VkPipelineShaderStageModuleIdentifierCreateInfoEXT *iinfo = |
| vk_find_struct_const(local_create_info.pStages[i].pNext, |
| PIPELINE_SHADER_STAGE_MODULE_IDENTIFIER_CREATE_INFO_EXT); |
| |
| if (module) { |
| struct vk_shader_module *new_module = vk_shader_module_clone(NULL, module); |
| pipeline->stages[i].module = vk_shader_module_to_handle(new_module); |
| pipeline->stages[i].pNext = NULL; |
| } else { |
| assert(iinfo); |
| pipeline->identifiers[i].identifierSize = |
| MIN2(iinfo->identifierSize, sizeof(pipeline->hashes[i].sha1)); |
| memcpy(pipeline->hashes[i].sha1, iinfo->pIdentifier, |
| pipeline->identifiers[i].identifierSize); |
| pipeline->stages[i].module = VK_NULL_HANDLE; |
| pipeline->stages[i].pNext = &pipeline->identifiers[i]; |
| pipeline->identifiers[i].sType = |
| VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_MODULE_IDENTIFIER_CREATE_INFO_EXT; |
| pipeline->identifiers[i].pNext = NULL; |
| pipeline->identifiers[i].pIdentifier = pipeline->hashes[i].sha1; |
| } |
| } |
| } |
| |
| if (local_create_info.groupCount) { |
| size_t size = sizeof(VkRayTracingShaderGroupCreateInfoKHR) * local_create_info.groupCount; |
| pipeline->group_count = local_create_info.groupCount; |
| pipeline->groups = malloc(size); |
| if (!pipeline->groups) |
| goto fail; |
| memcpy(pipeline->groups, local_create_info.pGroups, size); |
| } |
| |
| *pPipeline = radv_pipeline_to_handle(&pipeline->base); |
| |
| free((void *)local_create_info.pGroups); |
| free((void *)local_create_info.pStages); |
| return VK_SUCCESS; |
| fail: |
| free(pipeline->groups); |
| free(pipeline->stages); |
| free(pipeline->hashes); |
| free(pipeline->identifiers); |
| free((void *)local_create_info.pGroups); |
| free((void *)local_create_info.pStages); |
| return VK_ERROR_OUT_OF_HOST_MEMORY; |
| } |
| |
| /* |
| * Global variables for an RT pipeline |
| */ |
| struct rt_variables { |
| const VkRayTracingPipelineCreateInfoKHR *create_info; |
| |
| /* idx of the next shader to run in the next iteration of the main loop. |
| * During traversal, idx is used to store the SBT index and will contain |
| * the correct resume index upon returning. |
| */ |
| nir_variable *idx; |
| |
| /* scratch offset of the argument area relative to stack_ptr */ |
| nir_variable *arg; |
| |
| nir_variable *stack_ptr; |
| |
| /* global address of the SBT entry used for the shader */ |
| nir_variable *shader_record_ptr; |
| |
| /* trace_ray arguments */ |
| nir_variable *accel_struct; |
| nir_variable *flags; |
| nir_variable *cull_mask; |
| nir_variable *sbt_offset; |
| nir_variable *sbt_stride; |
| nir_variable *miss_index; |
| nir_variable *origin; |
| nir_variable *tmin; |
| nir_variable *direction; |
| nir_variable *tmax; |
| |
| /* Properties of the primitive currently being visited. */ |
| nir_variable *primitive_id; |
| nir_variable *geometry_id_and_flags; |
| nir_variable *instance_addr; |
| nir_variable *hit_kind; |
| nir_variable *opaque; |
| |
| /* Output variables for intersection & anyhit shaders. */ |
| nir_variable *ahit_accept; |
| nir_variable *ahit_terminate; |
| |
| /* Array of stack size struct for recording the max stack size for each group. */ |
| struct radv_pipeline_shader_stack_size *stack_sizes; |
| unsigned stage_idx; |
| }; |
| |
| static void |
| reserve_stack_size(struct rt_variables *vars, uint32_t size) |
| { |
| for (uint32_t group_idx = 0; group_idx < vars->create_info->groupCount; group_idx++) { |
| const VkRayTracingShaderGroupCreateInfoKHR *group = vars->create_info->pGroups + group_idx; |
| |
| if (vars->stage_idx == group->generalShader || vars->stage_idx == group->closestHitShader) |
| vars->stack_sizes[group_idx].recursive_size = |
| MAX2(vars->stack_sizes[group_idx].recursive_size, size); |
| |
| if (vars->stage_idx == group->anyHitShader || vars->stage_idx == group->intersectionShader) |
| vars->stack_sizes[group_idx].non_recursive_size = |
| MAX2(vars->stack_sizes[group_idx].non_recursive_size, size); |
| } |
| } |
| |
| static struct rt_variables |
| create_rt_variables(nir_shader *shader, const VkRayTracingPipelineCreateInfoKHR *create_info, |
| struct radv_pipeline_shader_stack_size *stack_sizes) |
| { |
| struct rt_variables vars = { |
| .create_info = create_info, |
| }; |
| vars.idx = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "idx"); |
| vars.arg = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "arg"); |
| vars.stack_ptr = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "stack_ptr"); |
| vars.shader_record_ptr = |
| nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "shader_record_ptr"); |
| |
| const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3); |
| vars.accel_struct = |
| nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "accel_struct"); |
| vars.flags = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "ray_flags"); |
| vars.cull_mask = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "cull_mask"); |
| vars.sbt_offset = |
| nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "sbt_offset"); |
| vars.sbt_stride = |
| nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "sbt_stride"); |
| vars.miss_index = |
| nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "miss_index"); |
| vars.origin = nir_variable_create(shader, nir_var_shader_temp, vec3_type, "ray_origin"); |
| vars.tmin = nir_variable_create(shader, nir_var_shader_temp, glsl_float_type(), "ray_tmin"); |
| vars.direction = nir_variable_create(shader, nir_var_shader_temp, vec3_type, "ray_direction"); |
| vars.tmax = nir_variable_create(shader, nir_var_shader_temp, glsl_float_type(), "ray_tmax"); |
| |
| vars.primitive_id = |
| nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "primitive_id"); |
| vars.geometry_id_and_flags = |
| nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "geometry_id_and_flags"); |
| vars.instance_addr = |
| nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "instance_addr"); |
| vars.hit_kind = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "hit_kind"); |
| vars.opaque = nir_variable_create(shader, nir_var_shader_temp, glsl_bool_type(), "opaque"); |
| |
| vars.ahit_accept = |
| nir_variable_create(shader, nir_var_shader_temp, glsl_bool_type(), "ahit_accept"); |
| vars.ahit_terminate = |
| nir_variable_create(shader, nir_var_shader_temp, glsl_bool_type(), "ahit_terminate"); |
| |
| vars.stack_sizes = stack_sizes; |
| return vars; |
| } |
| |
| /* |
| * Remap all the variables between the two rt_variables struct for inlining. |
| */ |
| static void |
| map_rt_variables(struct hash_table *var_remap, struct rt_variables *src, |
| const struct rt_variables *dst) |
| { |
| src->create_info = dst->create_info; |
| |
| _mesa_hash_table_insert(var_remap, src->idx, dst->idx); |
| _mesa_hash_table_insert(var_remap, src->arg, dst->arg); |
| _mesa_hash_table_insert(var_remap, src->stack_ptr, dst->stack_ptr); |
| _mesa_hash_table_insert(var_remap, src->shader_record_ptr, dst->shader_record_ptr); |
| |
| _mesa_hash_table_insert(var_remap, src->accel_struct, dst->accel_struct); |
| _mesa_hash_table_insert(var_remap, src->flags, dst->flags); |
| _mesa_hash_table_insert(var_remap, src->cull_mask, dst->cull_mask); |
| _mesa_hash_table_insert(var_remap, src->sbt_offset, dst->sbt_offset); |
| _mesa_hash_table_insert(var_remap, src->sbt_stride, dst->sbt_stride); |
| _mesa_hash_table_insert(var_remap, src->miss_index, dst->miss_index); |
| _mesa_hash_table_insert(var_remap, src->origin, dst->origin); |
| _mesa_hash_table_insert(var_remap, src->tmin, dst->tmin); |
| _mesa_hash_table_insert(var_remap, src->direction, dst->direction); |
| _mesa_hash_table_insert(var_remap, src->tmax, dst->tmax); |
| |
| _mesa_hash_table_insert(var_remap, src->primitive_id, dst->primitive_id); |
| _mesa_hash_table_insert(var_remap, src->geometry_id_and_flags, dst->geometry_id_and_flags); |
| _mesa_hash_table_insert(var_remap, src->instance_addr, dst->instance_addr); |
| _mesa_hash_table_insert(var_remap, src->hit_kind, dst->hit_kind); |
| _mesa_hash_table_insert(var_remap, src->opaque, dst->opaque); |
| _mesa_hash_table_insert(var_remap, src->ahit_accept, dst->ahit_accept); |
| _mesa_hash_table_insert(var_remap, src->ahit_terminate, dst->ahit_terminate); |
| |
| src->stack_sizes = dst->stack_sizes; |
| src->stage_idx = dst->stage_idx; |
| } |
| |
| /* |
| * Create a copy of the global rt variables where the primitive/instance related variables are |
| * independent.This is needed as we need to keep the old values of the global variables around |
| * in case e.g. an anyhit shader reject the collision. So there are inner variables that get copied |
| * to the outer variables once we commit to a better hit. |
| */ |
| static struct rt_variables |
| create_inner_vars(nir_builder *b, const struct rt_variables *vars) |
| { |
| struct rt_variables inner_vars = *vars; |
| inner_vars.idx = |
| nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_idx"); |
| inner_vars.shader_record_ptr = nir_variable_create( |
| b->shader, nir_var_shader_temp, glsl_uint64_t_type(), "inner_shader_record_ptr"); |
| inner_vars.primitive_id = |
| nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_primitive_id"); |
| inner_vars.geometry_id_and_flags = nir_variable_create( |
| b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_geometry_id_and_flags"); |
| inner_vars.tmax = |
| nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "inner_tmax"); |
| inner_vars.instance_addr = nir_variable_create(b->shader, nir_var_shader_temp, |
| glsl_uint64_t_type(), "inner_instance_addr"); |
| inner_vars.hit_kind = |
| nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_hit_kind"); |
| |
| return inner_vars; |
| } |
| |
| /* The hit attributes are stored on the stack. This is the offset compared to the current stack |
| * pointer of where the hit attrib is stored. */ |
| const uint32_t RADV_HIT_ATTRIB_OFFSET = -(16 + RADV_MAX_HIT_ATTRIB_SIZE); |
| |
| static void |
| insert_rt_return(nir_builder *b, const struct rt_variables *vars) |
| { |
| nir_store_var(b, vars->stack_ptr, nir_iadd_imm(b, nir_load_var(b, vars->stack_ptr), -16), 1); |
| nir_store_var(b, vars->idx, |
| nir_load_scratch(b, 1, 32, nir_load_var(b, vars->stack_ptr), .align_mul = 16), 1); |
| } |
| |
| enum sbt_type { |
| SBT_RAYGEN = offsetof(VkTraceRaysIndirectCommand2KHR, raygenShaderRecordAddress), |
| SBT_MISS = offsetof(VkTraceRaysIndirectCommand2KHR, missShaderBindingTableAddress), |
| SBT_HIT = offsetof(VkTraceRaysIndirectCommand2KHR, hitShaderBindingTableAddress), |
| SBT_CALLABLE = offsetof(VkTraceRaysIndirectCommand2KHR, callableShaderBindingTableAddress), |
| }; |
| |
| static nir_ssa_def * |
| get_sbt_ptr(nir_builder *b, nir_ssa_def *idx, enum sbt_type binding) |
| { |
| nir_ssa_def *desc_base_addr = nir_load_sbt_base_amd(b); |
| |
| nir_ssa_def *desc = |
| nir_pack_64_2x32(b, nir_build_load_smem_amd(b, 2, desc_base_addr, nir_imm_int(b, binding))); |
| |
| nir_ssa_def *stride_offset = nir_imm_int(b, binding + (binding == SBT_RAYGEN ? 8 : 16)); |
| nir_ssa_def *stride = |
| nir_pack_64_2x32(b, nir_build_load_smem_amd(b, 2, desc_base_addr, stride_offset)); |
| |
| return nir_iadd(b, desc, nir_imul(b, nir_u2u64(b, idx), stride)); |
| } |
| |
| static void |
| load_sbt_entry(nir_builder *b, const struct rt_variables *vars, nir_ssa_def *idx, |
| enum sbt_type binding, unsigned offset) |
| { |
| nir_ssa_def *addr = get_sbt_ptr(b, idx, binding); |
| |
| nir_ssa_def *load_addr = nir_iadd_imm(b, addr, offset); |
| nir_ssa_def *v_idx = nir_build_load_global(b, 1, 32, load_addr); |
| |
| nir_store_var(b, vars->idx, v_idx, 1); |
| |
| nir_ssa_def *record_addr = nir_iadd_imm(b, addr, RADV_RT_HANDLE_SIZE); |
| nir_store_var(b, vars->shader_record_ptr, record_addr, 1); |
| } |
| |
| /* This lowers all the RT instructions that we do not want to pass on to the combined shader and |
| * that we can implement using the variables from the shader we are going to inline into. */ |
| static void |
| lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned call_idx_base) |
| { |
| nir_builder b_shader; |
| nir_builder_init(&b_shader, nir_shader_get_entrypoint(shader)); |
| |
| nir_foreach_block (block, nir_shader_get_entrypoint(shader)) { |
| nir_foreach_instr_safe (instr, block) { |
| switch (instr->type) { |
| case nir_instr_type_intrinsic: { |
| b_shader.cursor = nir_before_instr(instr); |
| nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); |
| nir_ssa_def *ret = NULL; |
| |
| switch (intr->intrinsic) { |
| case nir_intrinsic_rt_execute_callable: { |
| uint32_t size = align(nir_intrinsic_stack_size(intr), 16) + RADV_MAX_HIT_ATTRIB_SIZE; |
| uint32_t ret_idx = call_idx_base + nir_intrinsic_call_idx(intr) + 1; |
| |
| nir_store_var( |
| &b_shader, vars->stack_ptr, |
| nir_iadd_imm(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), size), 1); |
| nir_store_scratch(&b_shader, nir_imm_int(&b_shader, ret_idx), |
| nir_load_var(&b_shader, vars->stack_ptr), .align_mul = 16); |
| |
| nir_store_var(&b_shader, vars->stack_ptr, |
| nir_iadd_imm(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), 16), |
| 1); |
| load_sbt_entry(&b_shader, vars, intr->src[0].ssa, SBT_CALLABLE, 0); |
| |
| nir_store_var(&b_shader, vars->arg, |
| nir_iadd_imm(&b_shader, intr->src[1].ssa, -size - 16), 1); |
| |
| reserve_stack_size(vars, size + 16); |
| break; |
| } |
| case nir_intrinsic_rt_trace_ray: { |
| uint32_t size = align(nir_intrinsic_stack_size(intr), 16) + RADV_MAX_HIT_ATTRIB_SIZE; |
| uint32_t ret_idx = call_idx_base + nir_intrinsic_call_idx(intr) + 1; |
| |
| nir_store_var( |
| &b_shader, vars->stack_ptr, |
| nir_iadd_imm(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), size), 1); |
| nir_store_scratch(&b_shader, nir_imm_int(&b_shader, ret_idx), |
| nir_load_var(&b_shader, vars->stack_ptr), .align_mul = 16); |
| |
| nir_store_var(&b_shader, vars->stack_ptr, |
| nir_iadd_imm(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), 16), |
| 1); |
| |
| nir_store_var(&b_shader, vars->idx, nir_imm_int(&b_shader, 1), 1); |
| nir_store_var(&b_shader, vars->arg, |
| nir_iadd_imm(&b_shader, intr->src[10].ssa, -size - 16), 1); |
| |
| reserve_stack_size(vars, size + 16); |
| |
| /* Per the SPIR-V extension spec we have to ignore some bits for some arguments. */ |
| nir_store_var(&b_shader, vars->accel_struct, intr->src[0].ssa, 0x1); |
| nir_store_var(&b_shader, vars->flags, intr->src[1].ssa, 0x1); |
| nir_store_var(&b_shader, vars->cull_mask, |
| nir_iand_imm(&b_shader, intr->src[2].ssa, 0xff), 0x1); |
| nir_store_var(&b_shader, vars->sbt_offset, |
| nir_iand_imm(&b_shader, intr->src[3].ssa, 0xf), 0x1); |
| nir_store_var(&b_shader, vars->sbt_stride, |
| nir_iand_imm(&b_shader, intr->src[4].ssa, 0xf), 0x1); |
| nir_store_var(&b_shader, vars->miss_index, |
| nir_iand_imm(&b_shader, intr->src[5].ssa, 0xffff), 0x1); |
| nir_store_var(&b_shader, vars->origin, intr->src[6].ssa, 0x7); |
| nir_store_var(&b_shader, vars->tmin, intr->src[7].ssa, 0x1); |
| nir_store_var(&b_shader, vars->direction, intr->src[8].ssa, 0x7); |
| nir_store_var(&b_shader, vars->tmax, intr->src[9].ssa, 0x1); |
| break; |
| } |
| case nir_intrinsic_rt_resume: { |
| uint32_t size = align(nir_intrinsic_stack_size(intr), 16) + RADV_MAX_HIT_ATTRIB_SIZE; |
| |
| nir_store_var( |
| &b_shader, vars->stack_ptr, |
| nir_iadd_imm(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), -size), 1); |
| break; |
| } |
| case nir_intrinsic_rt_return_amd: { |
| if (shader->info.stage == MESA_SHADER_RAYGEN) { |
| nir_store_var(&b_shader, vars->idx, nir_imm_int(&b_shader, 0), 1); |
| break; |
| } |
| insert_rt_return(&b_shader, vars); |
| break; |
| } |
| case nir_intrinsic_load_scratch: { |
| nir_instr_rewrite_src_ssa( |
| instr, &intr->src[0], |
| nir_iadd(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), intr->src[0].ssa)); |
| continue; |
| } |
| case nir_intrinsic_store_scratch: { |
| nir_instr_rewrite_src_ssa( |
| instr, &intr->src[1], |
| nir_iadd(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), intr->src[1].ssa)); |
| continue; |
| } |
| case nir_intrinsic_load_rt_arg_scratch_offset_amd: { |
| ret = nir_load_var(&b_shader, vars->arg); |
| break; |
| } |
| case nir_intrinsic_load_shader_record_ptr: { |
| ret = nir_load_var(&b_shader, vars->shader_record_ptr); |
| break; |
| } |
| case nir_intrinsic_load_ray_launch_id: { |
| ret = nir_load_global_invocation_id(&b_shader, 32); |
| break; |
| } |
| case nir_intrinsic_load_ray_launch_size: { |
| nir_ssa_def *launch_size_addr = |
| nir_load_ray_launch_size_addr_amd(&b_shader); |
| |
| nir_ssa_def * xy = nir_build_load_smem_amd( |
| &b_shader, 2, launch_size_addr, nir_imm_int(&b_shader, 0)); |
| nir_ssa_def * z = nir_build_load_smem_amd( |
| &b_shader, 1, launch_size_addr, nir_imm_int(&b_shader, 8)); |
| |
| nir_ssa_def *xyz[3] = { |
| nir_channel(&b_shader, xy, 0), |
| nir_channel(&b_shader, xy, 1), |
| z, |
| }; |
| ret = nir_vec(&b_shader, xyz, 3); |
| break; |
| } |
| case nir_intrinsic_load_ray_t_min: { |
| ret = nir_load_var(&b_shader, vars->tmin); |
| break; |
| } |
| case nir_intrinsic_load_ray_t_max: { |
| ret = nir_load_var(&b_shader, vars->tmax); |
| break; |
| } |
| case nir_intrinsic_load_ray_world_origin: { |
| ret = nir_load_var(&b_shader, vars->origin); |
| break; |
| } |
| case nir_intrinsic_load_ray_world_direction: { |
| ret = nir_load_var(&b_shader, vars->direction); |
| break; |
| } |
| case nir_intrinsic_load_ray_instance_custom_index: { |
| nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr); |
| nir_ssa_def *custom_instance_and_mask = nir_build_load_global( |
| &b_shader, 1, 32, |
| nir_iadd_imm(&b_shader, instance_node_addr, |
| offsetof(struct radv_bvh_instance_node, custom_instance_and_mask))); |
| ret = nir_iand_imm(&b_shader, custom_instance_and_mask, 0xFFFFFF); |
| break; |
| } |
| case nir_intrinsic_load_primitive_id: { |
| ret = nir_load_var(&b_shader, vars->primitive_id); |
| break; |
| } |
| case nir_intrinsic_load_ray_geometry_index: { |
| ret = nir_load_var(&b_shader, vars->geometry_id_and_flags); |
| ret = nir_iand_imm(&b_shader, ret, 0xFFFFFFF); |
| break; |
| } |
| case nir_intrinsic_load_instance_id: { |
| nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr); |
| ret = nir_build_load_global( |
| &b_shader, 1, 32, |
| nir_iadd_imm(&b_shader, instance_node_addr, |
| offsetof(struct radv_bvh_instance_node, instance_id))); |
| break; |
| } |
| case nir_intrinsic_load_ray_flags: { |
| ret = nir_load_var(&b_shader, vars->flags); |
| break; |
| } |
| case nir_intrinsic_load_ray_hit_kind: { |
| ret = nir_load_var(&b_shader, vars->hit_kind); |
| break; |
| } |
| case nir_intrinsic_load_ray_world_to_object: { |
| unsigned c = nir_intrinsic_column(intr); |
| nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr); |
| nir_ssa_def *wto_matrix[3]; |
| nir_build_wto_matrix_load(&b_shader, instance_node_addr, wto_matrix); |
| |
| nir_ssa_def *vals[3]; |
| for (unsigned i = 0; i < 3; ++i) |
| vals[i] = nir_channel(&b_shader, wto_matrix[i], c); |
| |
| ret = nir_vec(&b_shader, vals, 3); |
| break; |
| } |
| case nir_intrinsic_load_ray_object_to_world: { |
| unsigned c = nir_intrinsic_column(intr); |
| nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr); |
| nir_ssa_def *rows[3]; |
| for (unsigned r = 0; r < 3; ++r) |
| rows[r] = nir_build_load_global( |
| &b_shader, 4, 32, |
| nir_iadd_imm(&b_shader, instance_node_addr, |
| offsetof(struct radv_bvh_instance_node, otw_matrix) + r * 16)); |
| ret = |
| nir_vec3(&b_shader, nir_channel(&b_shader, rows[0], c), |
| nir_channel(&b_shader, rows[1], c), nir_channel(&b_shader, rows[2], c)); |
| break; |
| } |
| case nir_intrinsic_load_ray_object_origin: { |
| nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr); |
| nir_ssa_def *wto_matrix[3]; |
| nir_build_wto_matrix_load(&b_shader, instance_node_addr, wto_matrix); |
| ret = nir_build_vec3_mat_mult(&b_shader, nir_load_var(&b_shader, vars->origin), |
| wto_matrix, true); |
| break; |
| } |
| case nir_intrinsic_load_ray_object_direction: { |
| nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr); |
| nir_ssa_def *wto_matrix[3]; |
| nir_build_wto_matrix_load(&b_shader, instance_node_addr, wto_matrix); |
| ret = nir_build_vec3_mat_mult( |
| &b_shader, nir_load_var(&b_shader, vars->direction), wto_matrix, false); |
| break; |
| } |
| case nir_intrinsic_load_intersection_opaque_amd: { |
| ret = nir_load_var(&b_shader, vars->opaque); |
| break; |
| } |
| case nir_intrinsic_load_cull_mask: { |
| ret = nir_load_var(&b_shader, vars->cull_mask); |
| break; |
| } |
| case nir_intrinsic_ignore_ray_intersection: { |
| nir_store_var(&b_shader, vars->ahit_accept, nir_imm_false(&b_shader), 0x1); |
| |
| /* The if is a workaround to avoid having to fix up control flow manually */ |
| nir_push_if(&b_shader, nir_imm_true(&b_shader)); |
| nir_jump(&b_shader, nir_jump_return); |
| nir_pop_if(&b_shader, NULL); |
| break; |
| } |
| case nir_intrinsic_terminate_ray: { |
| nir_store_var(&b_shader, vars->ahit_accept, nir_imm_true(&b_shader), 0x1); |
| nir_store_var(&b_shader, vars->ahit_terminate, nir_imm_true(&b_shader), 0x1); |
| |
| /* The if is a workaround to avoid having to fix up control flow manually */ |
| nir_push_if(&b_shader, nir_imm_true(&b_shader)); |
| nir_jump(&b_shader, nir_jump_return); |
| nir_pop_if(&b_shader, NULL); |
| break; |
| } |
| case nir_intrinsic_report_ray_intersection: { |
| nir_push_if( |
| &b_shader, |
| nir_iand( |
| &b_shader, |
| nir_fge(&b_shader, nir_load_var(&b_shader, vars->tmax), intr->src[0].ssa), |
| nir_fge(&b_shader, intr->src[0].ssa, nir_load_var(&b_shader, vars->tmin)))); |
| { |
| nir_store_var(&b_shader, vars->ahit_accept, nir_imm_true(&b_shader), 0x1); |
| nir_store_var(&b_shader, vars->tmax, intr->src[0].ssa, 1); |
| nir_store_var(&b_shader, vars->hit_kind, intr->src[1].ssa, 1); |
| } |
| nir_pop_if(&b_shader, NULL); |
| break; |
| } |
| case nir_intrinsic_load_sbt_offset_amd: { |
| ret = nir_load_var(&b_shader, vars->sbt_offset); |
| break; |
| } |
| case nir_intrinsic_load_sbt_stride_amd: { |
| ret = nir_load_var(&b_shader, vars->sbt_stride); |
| break; |
| } |
| case nir_intrinsic_load_accel_struct_amd: { |
| ret = nir_load_var(&b_shader, vars->accel_struct); |
| break; |
| } |
| case nir_intrinsic_execute_closest_hit_amd: { |
| nir_store_var(&b_shader, vars->tmax, intr->src[1].ssa, 0x1); |
| nir_store_var(&b_shader, vars->primitive_id, intr->src[2].ssa, 0x1); |
| nir_store_var(&b_shader, vars->instance_addr, intr->src[3].ssa, 0x1); |
| nir_store_var(&b_shader, vars->geometry_id_and_flags, intr->src[4].ssa, 0x1); |
| nir_store_var(&b_shader, vars->hit_kind, intr->src[5].ssa, 0x1); |
| load_sbt_entry(&b_shader, vars, intr->src[0].ssa, SBT_HIT, 0); |
| |
| nir_ssa_def *should_return = |
| nir_ior(&b_shader, |
| nir_test_mask(&b_shader, nir_load_var(&b_shader, vars->flags), |
| SpvRayFlagsSkipClosestHitShaderKHRMask), |
| nir_ieq_imm(&b_shader, nir_load_var(&b_shader, vars->idx), 0)); |
| |
| /* should_return is set if we had a hit but we won't be calling the closest hit |
| * shader and hence need to return immediately to the calling shader. */ |
| nir_push_if(&b_shader, should_return); |
| insert_rt_return(&b_shader, vars); |
| nir_pop_if(&b_shader, NULL); |
| break; |
| } |
| case nir_intrinsic_execute_miss_amd: { |
| nir_store_var(&b_shader, vars->tmax, intr->src[0].ssa, 0x1); |
| nir_ssa_def *undef = nir_ssa_undef(&b_shader, 1, 32); |
| nir_store_var(&b_shader, vars->primitive_id, undef, 0x1); |
| nir_store_var(&b_shader, vars->instance_addr, nir_ssa_undef(&b_shader, 1, 64), 0x1); |
| nir_store_var(&b_shader, vars->geometry_id_and_flags, undef, 0x1); |
| nir_store_var(&b_shader, vars->hit_kind, undef, 0x1); |
| nir_ssa_def *miss_index = nir_load_var(&b_shader, vars->miss_index); |
| load_sbt_entry(&b_shader, vars, miss_index, SBT_MISS, 0); |
| |
| /* In case of a NULL miss shader, do nothing and just return. */ |
| nir_push_if(&b_shader, nir_ieq_imm(&b_shader, nir_load_var(&b_shader, vars->idx), 0)); |
| insert_rt_return(&b_shader, vars); |
| nir_pop_if(&b_shader, NULL); |
| break; |
| } |
| default: |
| continue; |
| } |
| |
| if (ret) |
| nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret); |
| nir_instr_remove(instr); |
| break; |
| } |
| case nir_instr_type_jump: { |
| nir_jump_instr *jump = nir_instr_as_jump(instr); |
| if (jump->type == nir_jump_halt) { |
| b_shader.cursor = nir_instr_remove(instr); |
| nir_jump(&b_shader, nir_jump_return); |
| } |
| break; |
| } |
| default: |
| break; |
| } |
| } |
| } |
| |
| nir_metadata_preserve(nir_shader_get_entrypoint(shader), nir_metadata_none); |
| } |
| |
| static void |
| insert_rt_case(nir_builder *b, nir_shader *shader, struct rt_variables *vars, nir_ssa_def *idx, |
| uint32_t call_idx_base, uint32_t call_idx) |
| { |
| struct hash_table *var_remap = _mesa_pointer_hash_table_create(NULL); |
| |
| nir_opt_dead_cf(shader); |
| |
| struct rt_variables src_vars = create_rt_variables(shader, vars->create_info, vars->stack_sizes); |
| map_rt_variables(var_remap, &src_vars, vars); |
| |
| NIR_PASS_V(shader, lower_rt_instructions, &src_vars, call_idx_base); |
| |
| NIR_PASS(_, shader, nir_opt_remove_phis); |
| NIR_PASS(_, shader, nir_lower_returns); |
| NIR_PASS(_, shader, nir_opt_dce); |
| |
| reserve_stack_size(vars, shader->scratch_size); |
| |
| nir_push_if(b, nir_ieq_imm(b, idx, call_idx)); |
| nir_inline_function_impl(b, nir_shader_get_entrypoint(shader), NULL, var_remap); |
| nir_pop_if(b, NULL); |
| |
| /* Adopt the instructions from the source shader, since they are merely moved, not cloned. */ |
| ralloc_adopt(ralloc_context(b->shader), ralloc_context(shader)); |
| |
| ralloc_free(var_remap); |
| } |
| |
| static bool |
| lower_rt_derefs(nir_shader *shader) |
| { |
| nir_function_impl *impl = nir_shader_get_entrypoint(shader); |
| |
| bool progress = false; |
| |
| nir_builder b; |
| nir_builder_init(&b, impl); |
| |
| b.cursor = nir_before_cf_list(&impl->body); |
| nir_ssa_def *arg_offset = nir_load_rt_arg_scratch_offset_amd(&b); |
| |
| nir_foreach_block (block, impl) { |
| nir_foreach_instr_safe (instr, block) { |
| if (instr->type != nir_instr_type_deref) |
| continue; |
| |
| nir_deref_instr *deref = nir_instr_as_deref(instr); |
| b.cursor = nir_before_instr(&deref->instr); |
| |
| nir_deref_instr *replacement = NULL; |
| if (nir_deref_mode_is(deref, nir_var_shader_call_data)) { |
| deref->modes = nir_var_function_temp; |
| progress = true; |
| |
| if (deref->deref_type == nir_deref_type_var) |
| replacement = |
| nir_build_deref_cast(&b, arg_offset, nir_var_function_temp, deref->var->type, 0); |
| } else if (nir_deref_mode_is(deref, nir_var_ray_hit_attrib)) { |
| deref->modes = nir_var_function_temp; |
| progress = true; |
| |
| if (deref->deref_type == nir_deref_type_var) |
| replacement = nir_build_deref_cast(&b, nir_imm_int(&b, RADV_HIT_ATTRIB_OFFSET), |
| nir_var_function_temp, deref->type, 0); |
| } |
| |
| if (replacement != NULL) { |
| nir_ssa_def_rewrite_uses(&deref->dest.ssa, &replacement->dest.ssa); |
| nir_instr_remove(&deref->instr); |
| } |
| } |
| } |
| |
| if (progress) |
| nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance); |
| else |
| nir_metadata_preserve(impl, nir_metadata_all); |
| |
| return progress; |
| } |
| |
| static nir_shader * |
| parse_rt_stage(struct radv_device *device, const VkPipelineShaderStageCreateInfo *sinfo) |
| { |
| struct radv_pipeline_key key; |
| memset(&key, 0, sizeof(key)); |
| |
| struct radv_pipeline_stage rt_stage; |
| |
| radv_pipeline_stage_init(sinfo, &rt_stage, vk_to_mesa_shader_stage(sinfo->stage)); |
| |
| nir_shader *shader = radv_shader_spirv_to_nir(device, &rt_stage, &key); |
| |
| if (shader->info.stage == MESA_SHADER_RAYGEN || shader->info.stage == MESA_SHADER_CLOSEST_HIT || |
| shader->info.stage == MESA_SHADER_CALLABLE || shader->info.stage == MESA_SHADER_MISS) { |
| nir_block *last_block = nir_impl_last_block(nir_shader_get_entrypoint(shader)); |
| nir_builder b_inner; |
| nir_builder_init(&b_inner, nir_shader_get_entrypoint(shader)); |
| b_inner.cursor = nir_after_block(last_block); |
| nir_rt_return_amd(&b_inner); |
| } |
| |
| NIR_PASS(_, shader, nir_lower_vars_to_explicit_types, |
| nir_var_function_temp | nir_var_shader_call_data | nir_var_ray_hit_attrib, |
| glsl_get_natural_size_align_bytes); |
| |
| NIR_PASS(_, shader, lower_rt_derefs); |
| |
| NIR_PASS(_, shader, nir_lower_explicit_io, nir_var_function_temp, |
| nir_address_format_32bit_offset); |
| |
| return shader; |
| } |
| |
| static nir_function_impl * |
| lower_any_hit_for_intersection(nir_shader *any_hit) |
| { |
| nir_function_impl *impl = nir_shader_get_entrypoint(any_hit); |
| |
| /* Any-hit shaders need three parameters */ |
| assert(impl->function->num_params == 0); |
| nir_parameter params[] = { |
| { |
| /* A pointer to a boolean value for whether or not the hit was |
| * accepted. |
| */ |
| .num_components = 1, |
| .bit_size = 32, |
| }, |
| { |
| /* The hit T value */ |
| .num_components = 1, |
| .bit_size = 32, |
| }, |
| { |
| /* The hit kind */ |
| .num_components = 1, |
| .bit_size = 32, |
| }, |
| }; |
| impl->function->num_params = ARRAY_SIZE(params); |
| impl->function->params = ralloc_array(any_hit, nir_parameter, ARRAY_SIZE(params)); |
| memcpy(impl->function->params, params, sizeof(params)); |
| |
| nir_builder build; |
| nir_builder_init(&build, impl); |
| nir_builder *b = &build; |
| |
| b->cursor = nir_before_cf_list(&impl->body); |
| |
| nir_ssa_def *commit_ptr = nir_load_param(b, 0); |
| nir_ssa_def *hit_t = nir_load_param(b, 1); |
| nir_ssa_def *hit_kind = nir_load_param(b, 2); |
| |
| nir_deref_instr *commit = |
| nir_build_deref_cast(b, commit_ptr, nir_var_function_temp, glsl_bool_type(), 0); |
| |
| nir_foreach_block_safe (block, impl) { |
| nir_foreach_instr_safe (instr, block) { |
| switch (instr->type) { |
| case nir_instr_type_intrinsic: { |
| nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); |
| switch (intrin->intrinsic) { |
| case nir_intrinsic_ignore_ray_intersection: |
| b->cursor = nir_instr_remove(&intrin->instr); |
| /* We put the newly emitted code inside a dummy if because it's |
| * going to contain a jump instruction and we don't want to |
| * deal with that mess here. It'll get dealt with by our |
| * control-flow optimization passes. |
| */ |
| nir_store_deref(b, commit, nir_imm_false(b), 0x1); |
| nir_push_if(b, nir_imm_true(b)); |
| nir_jump(b, nir_jump_return); |
| nir_pop_if(b, NULL); |
| break; |
| |
| case nir_intrinsic_terminate_ray: |
| /* The "normal" handling of terminateRay works fine in |
| * intersection shaders. |
| */ |
| break; |
| |
| case nir_intrinsic_load_ray_t_max: |
| nir_ssa_def_rewrite_uses(&intrin->dest.ssa, hit_t); |
| nir_instr_remove(&intrin->instr); |
| break; |
| |
| case nir_intrinsic_load_ray_hit_kind: |
| nir_ssa_def_rewrite_uses(&intrin->dest.ssa, hit_kind); |
| nir_instr_remove(&intrin->instr); |
| break; |
| |
| default: |
| break; |
| } |
| break; |
| } |
| case nir_instr_type_jump: { |
| nir_jump_instr *jump = nir_instr_as_jump(instr); |
| if (jump->type == nir_jump_halt) { |
| b->cursor = nir_instr_remove(instr); |
| nir_jump(b, nir_jump_return); |
| } |
| break; |
| } |
| |
| default: |
| break; |
| } |
| } |
| } |
| |
| nir_validate_shader(any_hit, "after initial any-hit lowering"); |
| |
| nir_lower_returns_impl(impl); |
| |
| nir_validate_shader(any_hit, "after lowering returns"); |
| |
| return impl; |
| } |
| |
| /* Inline the any_hit shader into the intersection shader so we don't have |
| * to implement yet another shader call interface here. Neither do any recursion. |
| */ |
| static void |
| nir_lower_intersection_shader(nir_shader *intersection, nir_shader *any_hit) |
| { |
| void *dead_ctx = ralloc_context(intersection); |
| |
| nir_function_impl *any_hit_impl = NULL; |
| struct hash_table *any_hit_var_remap = NULL; |
| if (any_hit) { |
| any_hit = nir_shader_clone(dead_ctx, any_hit); |
| NIR_PASS(_, any_hit, nir_opt_dce); |
| any_hit_impl = lower_any_hit_for_intersection(any_hit); |
| any_hit_var_remap = _mesa_pointer_hash_table_create(dead_ctx); |
| } |
| |
| nir_function_impl *impl = nir_shader_get_entrypoint(intersection); |
| |
| nir_builder build; |
| nir_builder_init(&build, impl); |
| nir_builder *b = &build; |
| |
| b->cursor = nir_before_cf_list(&impl->body); |
| |
| nir_variable *commit = nir_local_variable_create(impl, glsl_bool_type(), "ray_commit"); |
| nir_store_var(b, commit, nir_imm_false(b), 0x1); |
| |
| nir_foreach_block_safe (block, impl) { |
| nir_foreach_instr_safe (instr, block) { |
| if (instr->type != nir_instr_type_intrinsic) |
| continue; |
| |
| nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); |
| if (intrin->intrinsic != nir_intrinsic_report_ray_intersection) |
| continue; |
| |
| b->cursor = nir_instr_remove(&intrin->instr); |
| nir_ssa_def *hit_t = nir_ssa_for_src(b, intrin->src[0], 1); |
| nir_ssa_def *hit_kind = nir_ssa_for_src(b, intrin->src[1], 1); |
| nir_ssa_def *min_t = nir_load_ray_t_min(b); |
| nir_ssa_def *max_t = nir_load_ray_t_max(b); |
| |
| /* bool commit_tmp = false; */ |
| nir_variable *commit_tmp = nir_local_variable_create(impl, glsl_bool_type(), "commit_tmp"); |
| nir_store_var(b, commit_tmp, nir_imm_false(b), 0x1); |
| |
| nir_push_if(b, nir_iand(b, nir_fge(b, hit_t, min_t), nir_fge(b, max_t, hit_t))); |
| { |
| /* Any-hit defaults to commit */ |
| nir_store_var(b, commit_tmp, nir_imm_true(b), 0x1); |
| |
| if (any_hit_impl != NULL) { |
| nir_push_if(b, nir_inot(b, nir_load_intersection_opaque_amd(b))); |
| { |
| nir_ssa_def *params[] = { |
| &nir_build_deref_var(b, commit_tmp)->dest.ssa, |
| hit_t, |
| hit_kind, |
| }; |
| nir_inline_function_impl(b, any_hit_impl, params, any_hit_var_remap); |
| } |
| nir_pop_if(b, NULL); |
| } |
| |
| nir_push_if(b, nir_load_var(b, commit_tmp)); |
| { |
| nir_report_ray_intersection(b, 1, hit_t, hit_kind); |
| } |
| nir_pop_if(b, NULL); |
| } |
| nir_pop_if(b, NULL); |
| |
| nir_ssa_def *accepted = nir_load_var(b, commit_tmp); |
| nir_ssa_def_rewrite_uses(&intrin->dest.ssa, accepted); |
| } |
| } |
| |
| /* We did some inlining; have to re-index SSA defs */ |
| nir_index_ssa_defs(impl); |
| |
| /* Eliminate the casts introduced for the commit return of the any-hit shader. */ |
| NIR_PASS(_, intersection, nir_opt_deref); |
| |
| ralloc_free(dead_ctx); |
| } |
| |
| /* Variables only used internally to ray traversal. This is data that describes |
| * the current state of the traversal vs. what we'd give to a shader. e.g. what |
| * is the instance we're currently visiting vs. what is the instance of the |
| * closest hit. */ |
| struct rt_traversal_vars { |
| nir_variable *origin; |
| nir_variable *dir; |
| nir_variable *inv_dir; |
| nir_variable *sbt_offset_and_flags; |
| nir_variable *instance_addr; |
| nir_variable *hit; |
| nir_variable *bvh_base; |
| nir_variable *stack; |
| nir_variable *top_stack; |
| nir_variable *stack_base; |
| nir_variable *current_node; |
| nir_variable *previous_node; |
| nir_variable *instance_top_node; |
| nir_variable *instance_bottom_node; |
| }; |
| |
| static struct rt_traversal_vars |
| init_traversal_vars(nir_builder *b) |
| { |
| const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3); |
| struct rt_traversal_vars ret; |
| |
| ret.origin = nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, "traversal_origin"); |
| ret.dir = nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, "traversal_dir"); |
| ret.inv_dir = |
| nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, "traversal_inv_dir"); |
| ret.sbt_offset_and_flags = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), |
| "traversal_sbt_offset_and_flags"); |
| ret.instance_addr = |
| nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint64_t_type(), "instance_addr"); |
| ret.hit = nir_variable_create(b->shader, nir_var_shader_temp, glsl_bool_type(), "traversal_hit"); |
| ret.bvh_base = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint64_t_type(), |
| "traversal_bvh_base"); |
| ret.stack = |
| nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "traversal_stack_ptr"); |
| ret.top_stack = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), |
| "traversal_top_stack_ptr"); |
| ret.stack_base = |
| nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "traversal_stack_base"); |
| ret.current_node = |
| nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "current_node;"); |
| ret.previous_node = |
| nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "previous_node"); |
| ret.instance_top_node = |
| nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "instance_top_node"); |
| ret.instance_bottom_node = |
| nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "instance_bottom_node"); |
| return ret; |
| } |
| |
| static void |
| visit_any_hit_shaders(struct radv_device *device, |
| const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, nir_builder *b, |
| struct rt_variables *vars) |
| { |
| nir_ssa_def *sbt_idx = nir_load_var(b, vars->idx); |
| |
| nir_push_if(b, nir_ine_imm(b, sbt_idx, 0)); |
| for (unsigned i = 0; i < pCreateInfo->groupCount; ++i) { |
| const VkRayTracingShaderGroupCreateInfoKHR *group_info = &pCreateInfo->pGroups[i]; |
| uint32_t shader_id = VK_SHADER_UNUSED_KHR; |
| |
| switch (group_info->type) { |
| case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR: |
| shader_id = group_info->anyHitShader; |
| break; |
| default: |
| break; |
| } |
| if (shader_id == VK_SHADER_UNUSED_KHR) |
| continue; |
| |
| const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->pStages[shader_id]; |
| nir_shader *nir_stage = parse_rt_stage(device, stage); |
| |
| vars->stage_idx = shader_id; |
| insert_rt_case(b, nir_stage, vars, sbt_idx, 0, i + 2); |
| } |
| nir_pop_if(b, NULL); |
| } |
| |
| struct traversal_data { |
| struct radv_device *device; |
| const VkRayTracingPipelineCreateInfoKHR *createInfo; |
| struct rt_variables *vars; |
| struct rt_traversal_vars *trav_vars; |
| }; |
| |
| static void |
| handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *intersection, |
| const struct radv_ray_traversal_args *args) |
| { |
| struct traversal_data *data = args->data; |
| |
| nir_ssa_def *geometry_id = nir_iand_imm(b, intersection->base.geometry_id_and_flags, 0xfffffff); |
| nir_ssa_def *sbt_idx = nir_iadd( |
| b, |
| nir_iadd(b, nir_load_var(b, data->vars->sbt_offset), |
| nir_iand_imm(b, nir_load_var(b, data->trav_vars->sbt_offset_and_flags), 0xffffff)), |
| nir_imul(b, nir_load_var(b, data->vars->sbt_stride), geometry_id)); |
| |
| nir_ssa_def *hit_kind = |
| nir_bcsel(b, intersection->frontface, nir_imm_int(b, 0xFE), nir_imm_int(b, 0xFF)); |
| |
| nir_ssa_def *barycentrics_addr = |
| nir_iadd_imm(b, nir_load_var(b, data->vars->stack_ptr), RADV_HIT_ATTRIB_OFFSET); |
| nir_ssa_def *prev_barycentrics = nir_load_scratch(b, 2, 32, barycentrics_addr, .align_mul = 16); |
| nir_store_scratch(b, intersection->barycentrics, barycentrics_addr, .align_mul = 16); |
| |
| nir_store_var(b, data->vars->ahit_accept, nir_imm_true(b), 0x1); |
| nir_store_var(b, data->vars->ahit_terminate, nir_imm_false(b), 0x1); |
| |
| nir_push_if(b, nir_inot(b, intersection->base.opaque)); |
| { |
| struct rt_variables inner_vars = create_inner_vars(b, data->vars); |
| |
| nir_store_var(b, inner_vars.primitive_id, intersection->base.primitive_id, 1); |
| nir_store_var(b, inner_vars.geometry_id_and_flags, intersection->base.geometry_id_and_flags, |
| 1); |
| nir_store_var(b, inner_vars.tmax, intersection->t, 0x1); |
| nir_store_var(b, inner_vars.instance_addr, nir_load_var(b, data->trav_vars->instance_addr), |
| 0x1); |
| nir_store_var(b, inner_vars.hit_kind, hit_kind, 0x1); |
| |
| load_sbt_entry(b, &inner_vars, sbt_idx, SBT_HIT, 4); |
| |
| visit_any_hit_shaders(data->device, data->createInfo, b, &inner_vars); |
| |
| nir_push_if(b, nir_inot(b, nir_load_var(b, data->vars->ahit_accept))); |
| { |
| nir_store_scratch(b, prev_barycentrics, barycentrics_addr, .align_mul = 16); |
| nir_jump(b, nir_jump_continue); |
| } |
| nir_pop_if(b, NULL); |
| } |
| nir_pop_if(b, NULL); |
| |
| nir_store_var(b, data->vars->primitive_id, intersection->base.primitive_id, 1); |
| nir_store_var(b, data->vars->geometry_id_and_flags, intersection->base.geometry_id_and_flags, 1); |
| nir_store_var(b, data->vars->tmax, intersection->t, 0x1); |
| nir_store_var(b, data->vars->instance_addr, nir_load_var(b, data->trav_vars->instance_addr), |
| 0x1); |
| nir_store_var(b, data->vars->hit_kind, hit_kind, 0x1); |
| |
| nir_store_var(b, data->vars->idx, sbt_idx, 1); |
| nir_store_var(b, data->trav_vars->hit, nir_imm_true(b), 1); |
| |
| nir_ssa_def *terminate_on_first_hit = |
| nir_test_mask(b, args->flags, SpvRayFlagsTerminateOnFirstHitKHRMask); |
| nir_ssa_def *ray_terminated = nir_load_var(b, data->vars->ahit_terminate); |
| nir_push_if(b, nir_ior(b, terminate_on_first_hit, ray_terminated)); |
| { |
| nir_jump(b, nir_jump_break); |
| } |
| nir_pop_if(b, NULL); |
| } |
| |
| static void |
| handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersection, |
| const struct radv_ray_traversal_args *args) |
| { |
| struct traversal_data *data = args->data; |
| |
| nir_ssa_def *geometry_id = nir_iand_imm(b, intersection->geometry_id_and_flags, 0xfffffff); |
| nir_ssa_def *sbt_idx = nir_iadd( |
| b, |
| nir_iadd(b, nir_load_var(b, data->vars->sbt_offset), |
| nir_iand_imm(b, nir_load_var(b, data->trav_vars->sbt_offset_and_flags), 0xffffff)), |
| nir_imul(b, nir_load_var(b, data->vars->sbt_stride), geometry_id)); |
| |
| struct rt_variables inner_vars = create_inner_vars(b, data->vars); |
| |
| /* For AABBs the intersection shader writes the hit kind, and only does it if it is the |
| * next closest hit candidate. */ |
| inner_vars.hit_kind = data->vars->hit_kind; |
| |
| nir_store_var(b, inner_vars.primitive_id, intersection->primitive_id, 1); |
| nir_store_var(b, inner_vars.geometry_id_and_flags, intersection->geometry_id_and_flags, 1); |
| nir_store_var(b, inner_vars.tmax, nir_load_var(b, data->vars->tmax), 0x1); |
| nir_store_var(b, inner_vars.instance_addr, nir_load_var(b, data->trav_vars->instance_addr), 0x1); |
| nir_store_var(b, inner_vars.opaque, intersection->opaque, 1); |
| |
| load_sbt_entry(b, &inner_vars, sbt_idx, SBT_HIT, 4); |
| |
| nir_store_var(b, data->vars->ahit_accept, nir_imm_false(b), 0x1); |
| nir_store_var(b, data->vars->ahit_terminate, nir_imm_false(b), 0x1); |
| |
| nir_push_if(b, nir_ine_imm(b, nir_load_var(b, inner_vars.idx), 0)); |
| for (unsigned i = 0; i < data->createInfo->groupCount; ++i) { |
| const VkRayTracingShaderGroupCreateInfoKHR *group_info = &data->createInfo->pGroups[i]; |
| uint32_t shader_id = VK_SHADER_UNUSED_KHR; |
| uint32_t any_hit_shader_id = VK_SHADER_UNUSED_KHR; |
| |
| switch (group_info->type) { |
| case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: |
| shader_id = group_info->intersectionShader; |
| any_hit_shader_id = group_info->anyHitShader; |
| break; |
| default: |
| break; |
| } |
| if (shader_id == VK_SHADER_UNUSED_KHR) |
| continue; |
| |
| const VkPipelineShaderStageCreateInfo *stage = &data->createInfo->pStages[shader_id]; |
| nir_shader *nir_stage = parse_rt_stage(data->device, stage); |
| |
| nir_shader *any_hit_stage = NULL; |
| if (any_hit_shader_id != VK_SHADER_UNUSED_KHR) { |
| stage = &data->createInfo->pStages[any_hit_shader_id]; |
| any_hit_stage = parse_rt_stage(data->device, stage); |
| |
| nir_lower_intersection_shader(nir_stage, any_hit_stage); |
| ralloc_free(any_hit_stage); |
| } |
| |
| inner_vars.stage_idx = shader_id; |
| insert_rt_case(b, nir_stage, &inner_vars, nir_load_var(b, inner_vars.idx), 0, i + 2); |
| } |
| nir_push_else(b, NULL); |
| { |
| nir_ssa_def *vec3_zero = nir_channels(b, nir_imm_vec4(b, 0, 0, 0, 0), 0x7); |
| nir_ssa_def *vec3_inf = |
| nir_channels(b, nir_imm_vec4(b, INFINITY, INFINITY, INFINITY, 0), 0x7); |
| |
| nir_ssa_def *bvh_lo = |
| nir_build_load_global(b, 3, 32, nir_iadd_imm(b, intersection->node_addr, 0)); |
| nir_ssa_def *bvh_hi = |
| nir_build_load_global(b, 3, 32, nir_iadd_imm(b, intersection->node_addr, 12)); |
| |
| bvh_lo = nir_fsub(b, bvh_lo, nir_load_var(b, data->trav_vars->origin)); |
| bvh_hi = nir_fsub(b, bvh_hi, nir_load_var(b, data->trav_vars->origin)); |
| nir_ssa_def *t_vec = |
| nir_fmin(b, nir_fmul(b, bvh_lo, nir_load_var(b, data->trav_vars->inv_dir)), |
| nir_fmul(b, bvh_hi, nir_load_var(b, data->trav_vars->inv_dir))); |
| nir_ssa_def *t2_vec = |
| nir_fmax(b, nir_fmul(b, bvh_lo, nir_load_var(b, data->trav_vars->inv_dir)), |
| nir_fmul(b, bvh_hi, nir_load_var(b, data->trav_vars->inv_dir))); |
| /* If we run parallel to one of the edges the range should be [0, inf) not [0,0] */ |
| t2_vec = nir_bcsel(b, nir_feq(b, nir_load_var(b, data->trav_vars->dir), vec3_zero), vec3_inf, |
| t2_vec); |
| |
| nir_ssa_def *t_min = nir_fmax(b, nir_channel(b, t_vec, 0), nir_channel(b, t_vec, 1)); |
| t_min = nir_fmax(b, t_min, nir_channel(b, t_vec, 2)); |
| |
| nir_ssa_def *t_max = nir_fmin(b, nir_channel(b, t2_vec, 0), nir_channel(b, t2_vec, 1)); |
| t_max = nir_fmin(b, t_max, nir_channel(b, t2_vec, 2)); |
| |
| nir_push_if(b, nir_iand(b, nir_fge(b, nir_load_var(b, data->vars->tmax), t_min), |
| nir_fge(b, t_max, nir_load_var(b, data->vars->tmin)))); |
| { |
| nir_store_var(b, data->vars->ahit_accept, nir_imm_true(b), 0x1); |
| nir_store_var(b, inner_vars.tmax, nir_fmax(b, t_min, nir_load_var(b, data->vars->tmin)), |
| 1); |
| } |
| nir_pop_if(b, NULL); |
| } |
| nir_pop_if(b, NULL); |
| |
| nir_push_if(b, nir_load_var(b, data->vars->ahit_accept)); |
| { |
| nir_store_var(b, data->vars->primitive_id, intersection->primitive_id, 1); |
| nir_store_var(b, data->vars->geometry_id_and_flags, intersection->geometry_id_and_flags, 1); |
| nir_store_var(b, data->vars->tmax, nir_load_var(b, inner_vars.tmax), 0x1); |
| nir_store_var(b, data->vars->instance_addr, nir_load_var(b, data->trav_vars->instance_addr), |
| 0x1); |
| |
| nir_store_var(b, data->vars->idx, sbt_idx, 1); |
| nir_store_var(b, data->trav_vars->hit, nir_imm_true(b), 1); |
| |
| nir_ssa_def *terminate_on_first_hit = |
| nir_test_mask(b, args->flags, SpvRayFlagsTerminateOnFirstHitKHRMask); |
| nir_ssa_def *ray_terminated = nir_load_var(b, data->vars->ahit_terminate); |
| nir_push_if(b, nir_ior(b, terminate_on_first_hit, ray_terminated)); |
| { |
| nir_jump(b, nir_jump_break); |
| } |
| nir_pop_if(b, NULL); |
| } |
| nir_pop_if(b, NULL); |
| } |
| |
| static void |
| store_stack_entry(nir_builder *b, nir_ssa_def *index, nir_ssa_def *value, |
| const struct radv_ray_traversal_args *args) |
| { |
| nir_store_shared(b, value, index, .base = 0, .align_mul = 4); |
| } |
| |
| static nir_ssa_def * |
| load_stack_entry(nir_builder *b, nir_ssa_def *index, const struct radv_ray_traversal_args *args) |
| { |
| return nir_load_shared(b, 1, 32, index, .base = 0, .align_mul = 4); |
| } |
| |
| static nir_shader * |
| build_traversal_shader(struct radv_device *device, |
| const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, |
| struct radv_pipeline_shader_stack_size *stack_sizes) |
| { |
| nir_builder b = radv_meta_init_shader(device, MESA_SHADER_COMPUTE, "rt_traversal"); |
| b.shader->info.internal = false; |
| b.shader->info.workgroup_size[0] = 8; |
| b.shader->info.workgroup_size[1] = device->physical_device->rt_wave_size == 64 ? 8 : 4; |
| b.shader->info.shared_size = |
| device->physical_device->rt_wave_size * MAX_STACK_ENTRY_COUNT * sizeof(uint32_t); |
| struct rt_variables vars = create_rt_variables(b.shader, pCreateInfo, stack_sizes); |
| |
| /* initialize trace_ray arguments */ |
| nir_ssa_def *accel_struct = nir_load_accel_struct_amd(&b); |
| nir_store_var(&b, vars.flags, nir_load_ray_flags(&b), 0x1); |
| nir_store_var(&b, vars.cull_mask, nir_load_cull_mask(&b), 0x1); |
| nir_store_var(&b, vars.sbt_offset, nir_load_sbt_offset_amd(&b), 0x1); |
| nir_store_var(&b, vars.sbt_stride, nir_load_sbt_stride_amd(&b), 0x1); |
| nir_store_var(&b, vars.origin, nir_load_ray_world_origin(&b), 0x7); |
| nir_store_var(&b, vars.tmin, nir_load_ray_t_min(&b), 0x1); |
| nir_store_var(&b, vars.direction, nir_load_ray_world_direction(&b), 0x7); |
| nir_store_var(&b, vars.tmax, nir_load_ray_t_max(&b), 0x1); |
| nir_store_var(&b, vars.arg, nir_load_rt_arg_scratch_offset_amd(&b), 0x1); |
| nir_store_var(&b, vars.stack_ptr, nir_imm_int(&b, 0), 0x1); |
| |
| struct rt_traversal_vars trav_vars = init_traversal_vars(&b); |
| |
| nir_store_var(&b, trav_vars.hit, nir_imm_false(&b), 1); |
| |
| nir_push_if(&b, nir_ine_imm(&b, accel_struct, 0)); |
| { |
| nir_ssa_def *bvh_offset = nir_build_load_global( |
| &b, 1, 32, |
| nir_iadd_imm(&b, accel_struct, offsetof(struct radv_accel_struct_header, bvh_offset)), |
| .access = ACCESS_NON_WRITEABLE); |
| nir_ssa_def *root_bvh_base = nir_iadd(&b, accel_struct, nir_u2u64(&b, bvh_offset)); |
| root_bvh_base = build_addr_to_node(&b, root_bvh_base); |
| |
| nir_store_var(&b, trav_vars.bvh_base, root_bvh_base, 1); |
| |
| nir_ssa_def *vec3ones = nir_channels(&b, nir_imm_vec4(&b, 1.0, 1.0, 1.0, 1.0), 0x7); |
| |
| nir_store_var(&b, trav_vars.origin, nir_load_var(&b, vars.origin), 7); |
| nir_store_var(&b, trav_vars.dir, nir_load_var(&b, vars.direction), 7); |
| nir_store_var(&b, trav_vars.inv_dir, nir_fdiv(&b, vec3ones, nir_load_var(&b, trav_vars.dir)), |
| 7); |
| nir_store_var(&b, trav_vars.sbt_offset_and_flags, nir_imm_int(&b, 0), 1); |
| nir_store_var(&b, trav_vars.instance_addr, nir_imm_int64(&b, 0), 1); |
| |
| nir_store_var(&b, trav_vars.stack, nir_imul_imm(&b, nir_load_local_invocation_index(&b), sizeof(uint32_t)), 1); |
| nir_store_var(&b, trav_vars.stack_base, nir_load_var(&b, trav_vars.stack), 1); |
| nir_store_var(&b, trav_vars.current_node, nir_imm_int(&b, RADV_BVH_ROOT_NODE), 0x1); |
| nir_store_var(&b, trav_vars.previous_node, nir_imm_int(&b, RADV_BVH_INVALID_NODE), 0x1); |
| nir_store_var(&b, trav_vars.instance_top_node, nir_imm_int(&b, RADV_BVH_INVALID_NODE), 0x1); |
| nir_store_var(&b, trav_vars.instance_bottom_node, nir_imm_int(&b, RADV_BVH_NO_INSTANCE_ROOT), 0x1); |
| |
| nir_store_var(&b, trav_vars.top_stack, nir_imm_int(&b, -1), 1); |
| |
| struct radv_ray_traversal_vars trav_vars_args = { |
| .tmax = nir_build_deref_var(&b, vars.tmax), |
| .origin = nir_build_deref_var(&b, trav_vars.origin), |
| .dir = nir_build_deref_var(&b, trav_vars.dir), |
| .inv_dir = nir_build_deref_var(&b, trav_vars.inv_dir), |
| .bvh_base = nir_build_deref_var(&b, trav_vars.bvh_base), |
| .stack = nir_build_deref_var(&b, trav_vars.stack), |
| .top_stack = nir_build_deref_var(&b, trav_vars.top_stack), |
| .stack_base = nir_build_deref_var(&b, trav_vars.stack_base), |
| .current_node = nir_build_deref_var(&b, trav_vars.current_node), |
| .previous_node = nir_build_deref_var(&b, trav_vars.previous_node), |
| .instance_top_node = nir_build_deref_var(&b, trav_vars.instance_top_node), |
| .instance_bottom_node = nir_build_deref_var(&b, trav_vars.instance_bottom_node), |
| .instance_addr = nir_build_deref_var(&b, trav_vars.instance_addr), |
| .sbt_offset_and_flags = nir_build_deref_var(&b, trav_vars.sbt_offset_and_flags), |
| }; |
| |
| struct traversal_data data = { |
| .device = device, |
| .createInfo = pCreateInfo, |
| .vars = &vars, |
| .trav_vars = &trav_vars, |
| }; |
| |
| struct radv_ray_traversal_args args = { |
| .root_bvh_base = root_bvh_base, |
| .flags = nir_load_var(&b, vars.flags), |
| .cull_mask = nir_load_var(&b, vars.cull_mask), |
| .origin = nir_load_var(&b, vars.origin), |
| .tmin = nir_load_var(&b, vars.tmin), |
| .dir = nir_load_var(&b, vars.direction), |
| .vars = trav_vars_args, |
| .stack_stride = device->physical_device->rt_wave_size * sizeof(uint32_t), |
| .stack_entries = MAX_STACK_ENTRY_COUNT, |
| .stack_store_cb = store_stack_entry, |
| .stack_load_cb = load_stack_entry, |
| .aabb_cb = (pCreateInfo->flags & VK_PIPELINE_CREATE_RAY_TRACING_SKIP_AABBS_BIT_KHR) |
| ? NULL |
| : handle_candidate_aabb, |
| .triangle_cb = (pCreateInfo->flags & VK_PIPELINE_CREATE_RAY_TRACING_SKIP_TRIANGLES_BIT_KHR) |
| ? NULL |
| : handle_candidate_triangle, |
| .data = &data, |
| }; |
| |
| radv_build_ray_traversal(device, &b, &args); |
| } |
| nir_pop_if(&b, NULL); |
| |
| /* Initialize follow-up shader. */ |
| nir_push_if(&b, nir_load_var(&b, trav_vars.hit)); |
| { |
| nir_execute_closest_hit_amd( |
| &b, nir_load_var(&b, vars.idx), nir_load_var(&b, vars.tmax), |
| nir_load_var(&b, vars.primitive_id), nir_load_var(&b, vars.instance_addr), |
| nir_load_var(&b, vars.geometry_id_and_flags), nir_load_var(&b, vars.hit_kind)); |
| } |
| nir_push_else(&b, NULL); |
| { |
| /* Only load the miss shader if we actually miss. It is valid to not specify an SBT pointer |
| * for miss shaders if none of the rays miss. */ |
| nir_execute_miss_amd(&b, nir_load_var(&b, vars.tmax)); |
| } |
| nir_pop_if(&b, NULL); |
| |
| /* Deal with all the inline functions. */ |
| nir_index_ssa_defs(nir_shader_get_entrypoint(b.shader)); |
| nir_metadata_preserve(nir_shader_get_entrypoint(b.shader), nir_metadata_none); |
| |
| /* Lower and cleanup variables */ |
| NIR_PASS_V(b.shader, nir_lower_global_vars_to_local); |
| NIR_PASS_V(b.shader, nir_lower_vars_to_ssa); |
| |
| return b.shader; |
| } |
| |
| static unsigned |
| compute_rt_stack_size(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, |
| const struct radv_pipeline_shader_stack_size *stack_sizes) |
| { |
| unsigned raygen_size = 0; |
| unsigned callable_size = 0; |
| unsigned chit_size = 0; |
| unsigned miss_size = 0; |
| unsigned non_recursive_size = 0; |
| |
| for (unsigned i = 0; i < pCreateInfo->groupCount; ++i) { |
| non_recursive_size = MAX2(stack_sizes[i].non_recursive_size, non_recursive_size); |
| |
| const VkRayTracingShaderGroupCreateInfoKHR *group_info = &pCreateInfo->pGroups[i]; |
| uint32_t shader_id = VK_SHADER_UNUSED_KHR; |
| unsigned size = stack_sizes[i].recursive_size; |
| |
| switch (group_info->type) { |
| case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR: |
| shader_id = group_info->generalShader; |
| break; |
| case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: |
| case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR: |
| shader_id = group_info->closestHitShader; |
| break; |
| default: |
| break; |
| } |
| if (shader_id == VK_SHADER_UNUSED_KHR) |
| continue; |
| |
| const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->pStages[shader_id]; |
| switch (stage->stage) { |
| case VK_SHADER_STAGE_RAYGEN_BIT_KHR: |
| raygen_size = MAX2(raygen_size, size); |
| break; |
| case VK_SHADER_STAGE_MISS_BIT_KHR: |
| miss_size = MAX2(miss_size, size); |
| break; |
| case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR: |
| chit_size = MAX2(chit_size, size); |
| break; |
| case VK_SHADER_STAGE_CALLABLE_BIT_KHR: |
| callable_size = MAX2(callable_size, size); |
| break; |
| default: |
| unreachable("Invalid stage type in RT shader"); |
| } |
| } |
| return raygen_size + |
| MIN2(pCreateInfo->maxPipelineRayRecursionDepth, 1) * |
| MAX2(MAX2(chit_size, miss_size), non_recursive_size) + |
| MAX2(0, (int)(pCreateInfo->maxPipelineRayRecursionDepth) - 1) * |
| MAX2(chit_size, miss_size) + |
| 2 * callable_size; |
| } |
| |
| bool |
| radv_rt_pipeline_has_dynamic_stack_size(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo) |
| { |
| if (!pCreateInfo->pDynamicState) |
| return false; |
| |
| for (unsigned i = 0; i < pCreateInfo->pDynamicState->dynamicStateCount; ++i) { |
| if (pCreateInfo->pDynamicState->pDynamicStates[i] == |
| VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR) |
| return true; |
| } |
| |
| return false; |
| } |
| |
| static bool |
| should_move_rt_instruction(nir_intrinsic_op intrinsic) |
| { |
| switch (intrinsic) { |
| case nir_intrinsic_load_rt_arg_scratch_offset_amd: |
| case nir_intrinsic_load_ray_flags: |
| case nir_intrinsic_load_ray_object_origin: |
| case nir_intrinsic_load_ray_world_origin: |
| case nir_intrinsic_load_ray_t_min: |
| case nir_intrinsic_load_ray_object_direction: |
| case nir_intrinsic_load_ray_world_direction: |
| case nir_intrinsic_load_ray_t_max: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| static void |
| move_rt_instructions(nir_shader *shader) |
| { |
| nir_cursor target = nir_before_cf_list(&nir_shader_get_entrypoint(shader)->body); |
| |
| nir_foreach_block (block, nir_shader_get_entrypoint(shader)) { |
| nir_foreach_instr_safe (instr, block) { |
| if (instr->type != nir_instr_type_intrinsic) |
| continue; |
| |
| nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr); |
| |
| if (!should_move_rt_instruction(intrinsic->intrinsic)) |
| continue; |
| |
| nir_instr_move(target, instr); |
| } |
| } |
| |
| nir_metadata_preserve(nir_shader_get_entrypoint(shader), |
| nir_metadata_all & (~nir_metadata_instr_index)); |
| } |
| |
| static nir_shader * |
| create_rt_shader(struct radv_device *device, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, |
| struct radv_pipeline_shader_stack_size *stack_sizes) |
| { |
| struct radv_pipeline_key key; |
| memset(&key, 0, sizeof(key)); |
| |
| nir_builder b = radv_meta_init_shader(device, MESA_SHADER_COMPUTE, "rt_combined"); |
| b.shader->info.internal = false; |
| b.shader->info.workgroup_size[0] = 8; |
| b.shader->info.workgroup_size[1] = device->physical_device->rt_wave_size == 64 ? 8 : 4; |
| |
| struct rt_variables vars = create_rt_variables(b.shader, pCreateInfo, stack_sizes); |
| load_sbt_entry(&b, &vars, nir_imm_int(&b, 0), SBT_RAYGEN, 0); |
| if (radv_rt_pipeline_has_dynamic_stack_size(pCreateInfo)) |
| nir_store_var(&b, vars.stack_ptr, nir_load_rt_dynamic_callable_stack_base_amd(&b), 0x1); |
| else |
| nir_store_var(&b, vars.stack_ptr, nir_imm_int(&b, 0), 0x1); |
| |
| nir_loop *loop = nir_push_loop(&b); |
| |
| nir_push_if(&b, nir_ieq_imm(&b, nir_load_var(&b, vars.idx), 0)); |
| nir_jump(&b, nir_jump_break); |
| nir_pop_if(&b, NULL); |
| |
| nir_ssa_def *idx = nir_load_var(&b, vars.idx); |
| |
| /* Insert traversal shader */ |
| nir_shader *traversal = build_traversal_shader(device, pCreateInfo, stack_sizes); |
| assert(b.shader->info.shared_size == 0); |
| b.shader->info.shared_size = traversal->info.shared_size; |
| assert(b.shader->info.shared_size <= 32768); |
| insert_rt_case(&b, traversal, &vars, idx, 0, 1); |
| |
| /* We do a trick with the indexing of the resume shaders so that the first |
| * shader of stage x always gets id x and the resume shader ids then come after |
| * stageCount. This makes the shadergroup handles independent of compilation. */ |
| unsigned call_idx_base = pCreateInfo->stageCount + 1; |
| for (unsigned i = 0; i < pCreateInfo->stageCount; ++i) { |
| const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->pStages[i]; |
| gl_shader_stage type = vk_to_mesa_shader_stage(stage->stage); |
| if (type != MESA_SHADER_RAYGEN && type != MESA_SHADER_CALLABLE && |
| type != MESA_SHADER_CLOSEST_HIT && type != MESA_SHADER_MISS) |
| continue; |
| |
| nir_shader *nir_stage = parse_rt_stage(device, stage); |
| |
| /* Move ray tracing system values to the top that are set by rt_trace_ray |
| * to prevent them from being overwritten by other rt_trace_ray calls. |
| */ |
| NIR_PASS_V(nir_stage, move_rt_instructions); |
| |
| const nir_lower_shader_calls_options opts = { |
| .address_format = nir_address_format_32bit_offset, |
| .stack_alignment = 16, |
| .localized_loads = true |
| }; |
| uint32_t num_resume_shaders = 0; |
| nir_shader **resume_shaders = NULL; |
| nir_lower_shader_calls(nir_stage, &opts, &resume_shaders, |
| &num_resume_shaders, nir_stage); |
| |
| vars.stage_idx = i; |
| insert_rt_case(&b, nir_stage, &vars, idx, call_idx_base, i + 2); |
| for (unsigned j = 0; j < num_resume_shaders; ++j) { |
| insert_rt_case(&b, resume_shaders[j], &vars, idx, call_idx_base, call_idx_base + 1 + j); |
| } |
| call_idx_base += num_resume_shaders; |
| } |
| |
| nir_pop_loop(&b, loop); |
| |
| if (radv_rt_pipeline_has_dynamic_stack_size(pCreateInfo)) |
| b.shader->scratch_size = 16; /* To enable scratch. */ |
| else |
| b.shader->scratch_size += compute_rt_stack_size(pCreateInfo, stack_sizes); |
| |
| /* Deal with all the inline functions. */ |
| nir_index_ssa_defs(nir_shader_get_entrypoint(b.shader)); |
| nir_metadata_preserve(nir_shader_get_entrypoint(b.shader), nir_metadata_none); |
| |
| return b.shader; |
| } |
| |
| static struct radv_pipeline_key |
| radv_generate_rt_pipeline_key(const struct radv_ray_tracing_pipeline *pipeline, |
| VkPipelineCreateFlags flags) |
| { |
| struct radv_pipeline_key key = radv_generate_pipeline_key(&pipeline->base.base, flags); |
| key.cs.compute_subgroup_size = pipeline->base.base.device->physical_device->rt_wave_size; |
| |
| return key; |
| } |
| |
| static VkResult |
| radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache, |
| const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, |
| const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline) |
| { |
| RADV_FROM_HANDLE(radv_device, device, _device); |
| RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache); |
| RADV_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->layout); |
| VkResult result; |
| struct radv_ray_tracing_pipeline *rt_pipeline = NULL; |
| uint8_t hash[20]; |
| nir_shader *shader = NULL; |
| bool keep_statistic_info = |
| (pCreateInfo->flags & VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR) || |
| (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) || device->keep_shader_info; |
| |
| if (pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR) |
| return radv_rt_pipeline_library_create(_device, _cache, pCreateInfo, pAllocator, pPipeline); |
| |
| VkRayTracingPipelineCreateInfoKHR local_create_info = |
| radv_create_merged_rt_create_info(pCreateInfo); |
| if (!local_create_info.pStages || !local_create_info.pGroups) { |
| result = VK_ERROR_OUT_OF_HOST_MEMORY; |
| goto fail; |
| } |
| |
| radv_hash_rt_shaders(hash, &local_create_info, radv_get_hash_flags(device, keep_statistic_info)); |
| struct vk_shader_module module = {.base.type = VK_OBJECT_TYPE_SHADER_MODULE}; |
| |
| VkPipelineShaderStageCreateInfo stage = { |
| .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, |
| .pNext = NULL, |
| .stage = VK_SHADER_STAGE_COMPUTE_BIT, |
| .module = vk_shader_module_to_handle(&module), |
| .pName = "main", |
| }; |
| VkPipelineCreateFlags flags = |
| pCreateInfo->flags | VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT; |
| |
| rt_pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*rt_pipeline), 8, |
| VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); |
| if (rt_pipeline == NULL) { |
| result = VK_ERROR_OUT_OF_HOST_MEMORY; |
| goto fail; |
| } |
| |
| radv_pipeline_init(device, &rt_pipeline->base.base, RADV_PIPELINE_RAY_TRACING); |
| rt_pipeline->group_count = local_create_info.groupCount; |
| |
| const VkPipelineCreationFeedbackCreateInfo *creation_feedback = |
| vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO); |
| |
| struct radv_pipeline_key key = radv_generate_rt_pipeline_key(rt_pipeline, pCreateInfo->flags); |
| UNUSED gl_shader_stage last_vgt_api_stage = MESA_SHADER_NONE; |
| |
| /* First check if we can get things from the cache before we take the expensive step of |
| * generating the nir. */ |
| result = radv_create_shaders( |
| &rt_pipeline->base.base, pipeline_layout, device, cache, &key, &stage, 1, flags, hash, |
| creation_feedback, &rt_pipeline->stack_sizes, &rt_pipeline->group_count, 0, &last_vgt_api_stage); |
| |
| if (result != VK_SUCCESS && result != VK_PIPELINE_COMPILE_REQUIRED) |
| goto pipeline_fail; |
| |
| if (result == VK_PIPELINE_COMPILE_REQUIRED) { |
| if (pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) |
| goto pipeline_fail; |
| |
| rt_pipeline->stack_sizes = |
| calloc(sizeof(*rt_pipeline->stack_sizes), local_create_info.groupCount); |
| if (!rt_pipeline->stack_sizes) { |
| result = VK_ERROR_OUT_OF_HOST_MEMORY; |
| goto pipeline_fail; |
| } |
| |
| shader = create_rt_shader(device, &local_create_info, rt_pipeline->stack_sizes); |
| module.nir = shader; |
| result = radv_create_shaders(&rt_pipeline->base.base, pipeline_layout, device, cache, &key, |
| &stage, 1, pCreateInfo->flags, hash, creation_feedback, |
| &rt_pipeline->stack_sizes, &rt_pipeline->group_count, 0, |
| &last_vgt_api_stage); |
| if (result != VK_SUCCESS) |
| goto shader_fail; |
| } |
| |
| radv_compute_pipeline_init(&rt_pipeline->base, pipeline_layout); |
| |
| rt_pipeline->group_handles = |
| calloc(sizeof(*rt_pipeline->group_handles), local_create_info.groupCount); |
| if (!rt_pipeline->group_handles) { |
| result = VK_ERROR_OUT_OF_HOST_MEMORY; |
| goto shader_fail; |
| } |
| |
| rt_pipeline->dynamic_stack_size = radv_rt_pipeline_has_dynamic_stack_size(pCreateInfo); |
| |
| /* For General and ClosestHit shaders, we can use the shader ID directly as handle. |
| * As (potentially different) AnyHit shaders are inlined, for Intersection shaders |
| * we use the Group ID. |
| */ |
| for (unsigned i = 0; i < local_create_info.groupCount; ++i) { |
| const VkRayTracingShaderGroupCreateInfoKHR *group_info = &local_create_info.pGroups[i]; |
| switch (group_info->type) { |
| case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR: |
| if (group_info->generalShader != VK_SHADER_UNUSED_KHR) |
| rt_pipeline->group_handles[i].handles[0] = group_info->generalShader + 2; |
| break; |
| case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: |
| if (group_info->intersectionShader != VK_SHADER_UNUSED_KHR) |
| rt_pipeline->group_handles[i].handles[1] = i + 2; |
| FALLTHROUGH; |
| case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR: |
| if (group_info->closestHitShader != VK_SHADER_UNUSED_KHR) |
| rt_pipeline->group_handles[i].handles[0] = group_info->closestHitShader + 2; |
| if (group_info->anyHitShader != VK_SHADER_UNUSED_KHR) |
| rt_pipeline->group_handles[i].handles[1] = i + 2; |
| break; |
| case VK_SHADER_GROUP_SHADER_MAX_ENUM_KHR: |
| unreachable("VK_SHADER_GROUP_SHADER_MAX_ENUM_KHR"); |
| } |
| } |
| |
| *pPipeline = radv_pipeline_to_handle(&rt_pipeline->base.base); |
| |
| shader_fail: |
| ralloc_free(shader); |
| pipeline_fail: |
| if (result != VK_SUCCESS) |
| radv_pipeline_destroy(device, &rt_pipeline->base.base, pAllocator); |
| fail: |
| free((void *)local_create_info.pGroups); |
| free((void *)local_create_info.pStages); |
| return result; |
| } |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| radv_CreateRayTracingPipelinesKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation, |
| VkPipelineCache pipelineCache, uint32_t count, |
| const VkRayTracingPipelineCreateInfoKHR *pCreateInfos, |
| const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines) |
| { |
| VkResult result = VK_SUCCESS; |
| |
| unsigned i = 0; |
| for (; i < count; i++) { |
| VkResult r; |
| r = radv_rt_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator, |
| &pPipelines[i]); |
| if (r != VK_SUCCESS) { |
| result = r; |
| pPipelines[i] = VK_NULL_HANDLE; |
| |
| if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT) |
| break; |
| } |
| } |
| |
| for (; i < count; ++i) |
| pPipelines[i] = VK_NULL_HANDLE; |
| |
| return result; |
| } |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| radv_GetRayTracingShaderGroupHandlesKHR(VkDevice device, VkPipeline _pipeline, uint32_t firstGroup, |
| uint32_t groupCount, size_t dataSize, void *pData) |
| { |
| RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline); |
| struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline); |
| char *data = pData; |
| |
| STATIC_ASSERT(sizeof(*rt_pipeline->group_handles) <= RADV_RT_HANDLE_SIZE); |
| |
| memset(data, 0, groupCount * RADV_RT_HANDLE_SIZE); |
| |
| for (uint32_t i = 0; i < groupCount; ++i) { |
| memcpy(data + i * RADV_RT_HANDLE_SIZE, &rt_pipeline->group_handles[firstGroup + i], |
| sizeof(*rt_pipeline->group_handles)); |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| VKAPI_ATTR VkDeviceSize VKAPI_CALL |
| radv_GetRayTracingShaderGroupStackSizeKHR(VkDevice device, VkPipeline _pipeline, uint32_t group, |
| VkShaderGroupShaderKHR groupShader) |
| { |
| RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline); |
| struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline); |
| const struct radv_pipeline_shader_stack_size *stack_size = &rt_pipeline->stack_sizes[group]; |
| |
| if (groupShader == VK_SHADER_GROUP_SHADER_ANY_HIT_KHR || |
| groupShader == VK_SHADER_GROUP_SHADER_INTERSECTION_KHR) |
| return stack_size->non_recursive_size; |
| else |
| return stack_size->recursive_size; |
| } |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| radv_GetRayTracingCaptureReplayShaderGroupHandlesKHR(VkDevice _device, VkPipeline pipeline, |
| uint32_t firstGroup, uint32_t groupCount, |
| size_t dataSize, void *pData) |
| { |
| RADV_FROM_HANDLE(radv_device, device, _device); |
| unreachable("Unimplemented"); |
| return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT); |
| } |