| /* |
| * Copyright © 2016 Red Hat. |
| * Copyright © 2016 Bas Nieuwenhuizen |
| * |
| * based in part on anv driver which is: |
| * Copyright © 2015 Intel Corporation |
| * |
| * SPDX-License-Identifier: MIT |
| */ |
| |
| #include "radv_pipeline.h" |
| #include "meta/radv_meta.h" |
| #include "nir/nir.h" |
| #include "nir/radv_nir.h" |
| #include "spirv/nir_spirv.h" |
| #include "util/disk_cache.h" |
| #include "util/os_time.h" |
| #include "util/u_atomic.h" |
| #include "radv_cs.h" |
| #include "radv_debug.h" |
| #include "radv_descriptors.h" |
| #include "radv_pipeline_rt.h" |
| #include "radv_rmv.h" |
| #include "radv_shader.h" |
| #include "radv_shader_args.h" |
| #include "vk_pipeline.h" |
| #include "vk_render_pass.h" |
| #include "vk_util.h" |
| |
| #include "util/u_debug.h" |
| #include "ac_binary.h" |
| #include "ac_nir.h" |
| #include "ac_shader_util.h" |
| #include "aco_interface.h" |
| #include "sid.h" |
| #include "vk_format.h" |
| |
| bool |
| radv_pipeline_capture_shaders(const struct radv_compiler_info *compiler_info, VkPipelineCreateFlags2 flags) |
| { |
| return (flags & VK_PIPELINE_CREATE_2_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR) || |
| compiler_info->debug.capture_shaders; |
| } |
| |
| bool |
| radv_pipeline_capture_shader_stats(const struct radv_compiler_info *compiler_info, VkPipelineCreateFlags2 flags) |
| { |
| return (flags & VK_PIPELINE_CREATE_2_CAPTURE_STATISTICS_BIT_KHR) || compiler_info->debug.capture_shader_stats; |
| } |
| |
| bool |
| radv_pipeline_skip_shaders_cache(const struct radv_device *device, const struct radv_pipeline *pipeline) |
| { |
| const struct radv_physical_device *pdev = radv_device_physical(device); |
| const struct radv_instance *instance = radv_physical_device_instance(pdev); |
| |
| /* Skip the shaders cache when any of the below are true: |
| * - trap handler is present |
| * - shaders are dumped for debugging (RADV_DEBUG=shaders) |
| * - shaders IR are captured (NIR, backend IR and ASM) |
| * - binaries are captured (driver shouldn't store data to an internal cache) |
| */ |
| return device->trap_handler_shader || (instance->debug_flags & RADV_DEBUG_DUMP_SHADERS) || |
| (pipeline->create_flags & |
| (VK_PIPELINE_CREATE_2_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR | VK_PIPELINE_CREATE_2_CAPTURE_DATA_BIT_KHR)); |
| } |
| |
| void |
| radv_pipeline_init(struct radv_device *device, struct radv_pipeline *pipeline, enum radv_pipeline_type type) |
| { |
| vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE); |
| |
| pipeline->type = type; |
| } |
| |
| void |
| radv_pipeline_destroy(struct radv_device *device, struct radv_pipeline *pipeline, |
| const VkAllocationCallbacks *allocator) |
| { |
| if (pipeline->cache_object) |
| vk_pipeline_cache_object_unref(&device->vk, pipeline->cache_object); |
| |
| switch (pipeline->type) { |
| case RADV_PIPELINE_GRAPHICS: |
| radv_destroy_graphics_pipeline(device, radv_pipeline_to_graphics(pipeline)); |
| break; |
| case RADV_PIPELINE_GRAPHICS_LIB: |
| radv_destroy_graphics_lib_pipeline(device, radv_pipeline_to_graphics_lib(pipeline)); |
| break; |
| case RADV_PIPELINE_COMPUTE: |
| radv_destroy_compute_pipeline(device, radv_pipeline_to_compute(pipeline)); |
| break; |
| case RADV_PIPELINE_RAY_TRACING: |
| radv_destroy_ray_tracing_pipeline(device, radv_pipeline_to_ray_tracing(pipeline)); |
| break; |
| default: |
| UNREACHABLE("invalid pipeline type"); |
| } |
| |
| radv_rmv_log_resource_destroy(device, (uint64_t)radv_pipeline_to_handle(pipeline)); |
| vk_object_base_finish(&pipeline->base); |
| vk_free2(&device->vk.alloc, allocator, pipeline); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| radv_DestroyPipeline(VkDevice _device, VkPipeline _pipeline, const VkAllocationCallbacks *pAllocator) |
| { |
| VK_FROM_HANDLE(radv_device, device, _device); |
| VK_FROM_HANDLE(radv_pipeline, pipeline, _pipeline); |
| |
| if (!_pipeline) |
| return; |
| |
| radv_pipeline_destroy(device, pipeline, pAllocator); |
| } |
| |
| struct radv_shader_stage_key |
| radv_pipeline_get_shader_key(const struct radv_device *device, const VkPipelineShaderStageCreateInfo *stage, |
| VkPipelineCreateFlags2 flags, const void *pNext) |
| { |
| const struct radv_compiler_info *compiler_info = &device->compiler_info; |
| mesa_shader_stage s = vk_to_mesa_shader_stage(stage->stage); |
| struct vk_pipeline_robustness_state rs; |
| struct radv_shader_stage_key key = {0}; |
| |
| key.keep_statistic_info = radv_pipeline_capture_shader_stats(compiler_info, flags); |
| key.keep_executable_info = radv_pipeline_capture_shaders(compiler_info, flags); |
| |
| if (flags & VK_PIPELINE_CREATE_2_DISABLE_OPTIMIZATION_BIT) |
| key.optimisations_disabled = 1; |
| |
| if (flags & VK_PIPELINE_CREATE_2_VIEW_INDEX_FROM_DEVICE_INDEX_BIT) |
| key.view_index_from_device_index = 1; |
| |
| if (flags & VK_PIPELINE_CREATE_2_INDIRECT_BINDABLE_BIT_EXT) |
| key.indirect_bindable = 1; |
| |
| if (flags & VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT) |
| key.descriptor_heap = 1; |
| |
| if (stage->stage & RADV_GRAPHICS_STAGE_BITS) { |
| key.version = compiler_info->override_graphics_shader_version; |
| } else if (stage->stage & RADV_RT_STAGE_BITS) { |
| key.version = compiler_info->override_ray_tracing_shader_version; |
| } else { |
| assert(stage->stage == VK_SHADER_STAGE_COMPUTE_BIT); |
| key.version = compiler_info->override_compute_shader_version; |
| } |
| |
| vk_pipeline_robustness_state_fill(&device->vk.robustness_state, &rs, pNext, stage->pNext); |
| |
| radv_set_stage_key_robustness(&rs, s, &key); |
| |
| const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *const subgroup_size = |
| vk_find_struct_const(stage->pNext, PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO); |
| |
| if (subgroup_size) { |
| if (subgroup_size->requiredSubgroupSize == 32) |
| key.subgroup_required_size = RADV_REQUIRED_WAVE32; |
| else if (subgroup_size->requiredSubgroupSize == 64) |
| key.subgroup_required_size = RADV_REQUIRED_WAVE64; |
| else |
| UNREACHABLE("Unsupported required subgroup size."); |
| } |
| |
| if (stage->flags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT) { |
| key.subgroup_require_full = 1; |
| } |
| |
| if (stage->flags & VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT) { |
| key.subgroup_allow_varying = 1; |
| } |
| |
| return key; |
| } |
| |
| void |
| radv_pipeline_stage_init(VkPipelineCreateFlags2 pipeline_flags, const VkPipelineShaderStageCreateInfo *sinfo, |
| const struct radv_pipeline_layout *pipeline_layout, |
| const struct radv_shader_stage_key *stage_key, struct radv_shader_stage *out_stage) |
| { |
| const VkShaderModuleCreateInfo *minfo = vk_find_struct_const(sinfo->pNext, SHADER_MODULE_CREATE_INFO); |
| const VkPipelineShaderStageModuleIdentifierCreateInfoEXT *iinfo = |
| vk_find_struct_const(sinfo->pNext, PIPELINE_SHADER_STAGE_MODULE_IDENTIFIER_CREATE_INFO_EXT); |
| |
| if (sinfo->module == VK_NULL_HANDLE && !minfo && !iinfo) |
| return; |
| |
| memset(out_stage, 0, sizeof(*out_stage)); |
| |
| out_stage->stage = vk_to_mesa_shader_stage(sinfo->stage); |
| out_stage->next_stage = MESA_SHADER_NONE; |
| out_stage->entrypoint = sinfo->pName; |
| out_stage->spec_info = sinfo->pSpecializationInfo; |
| out_stage->feedback.flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT; |
| out_stage->key = *stage_key; |
| |
| if (sinfo->module != VK_NULL_HANDLE) { |
| struct vk_shader_module *module = vk_shader_module_from_handle(sinfo->module); |
| |
| out_stage->spirv.data = module->data; |
| out_stage->spirv.size = module->size; |
| out_stage->spirv.object = &module->base; |
| |
| if (module->nir) |
| out_stage->internal_nir = module->nir; |
| } else if (minfo) { |
| out_stage->spirv.data = (const char *)minfo->pCode; |
| out_stage->spirv.size = minfo->codeSize; |
| } |
| |
| const VkShaderDescriptorSetAndBindingMappingInfoEXT *mapping = |
| vk_find_struct_const(sinfo->pNext, SHADER_DESCRIPTOR_SET_AND_BINDING_MAPPING_INFO_EXT); |
| out_stage->layout.mapping = mapping; |
| |
| radv_shader_layout_init(pipeline_layout, out_stage->stage, &out_stage->layout); |
| |
| vk_pipeline_hash_shader_stage(pipeline_flags, sinfo, NULL, out_stage->shader_blake3); |
| } |
| |
| void |
| radv_pipeline_stage_finish(struct radv_shader_stage *stage) |
| { |
| ralloc_free(stage->nir); |
| vk_sampler_state_array_finish(&stage->layout.embedded_samplers); |
| } |
| |
| void |
| radv_shader_layout_init(const struct radv_pipeline_layout *pipeline_layout, mesa_shader_stage stage, |
| struct radv_shader_layout *layout) |
| { |
| if (!pipeline_layout) |
| return; |
| |
| layout->num_sets = pipeline_layout->num_sets; |
| for (unsigned i = 0; i < pipeline_layout->num_sets; i++) { |
| layout->set[i].layout = pipeline_layout->set[i].layout; |
| layout->set[i].dynamic_offset_start = pipeline_layout->set[i].dynamic_offset_start; |
| } |
| |
| layout->use_dynamic_descriptors = pipeline_layout->dynamic_offset_count && |
| (pipeline_layout->dynamic_shader_stages & mesa_to_vk_shader_stage(stage)); |
| layout->independent_sets = pipeline_layout->independent_sets; |
| } |
| |
| static nir_component_mask_t |
| non_uniform_access_callback(const nir_src *src, void *_) |
| { |
| if (src->ssa->num_components == 1) |
| return 0x1; |
| return nir_chase_binding(*src).success ? 0x2 : 0x3; |
| } |
| |
| void |
| radv_postprocess_nir(const struct radv_compiler_info *compiler_info, const struct radv_graphics_state_key *gfx_state, |
| struct radv_shader_stage *stage) |
| { |
| enum amd_gfx_level gfx_level = compiler_info->ac->gfx_level; |
| const bool use_llvm = compiler_info->debug.use_llvm; |
| bool progress; |
| |
| /* Wave and workgroup size should already be filled. */ |
| assert(stage->info.wave_size && stage->info.workgroup_size); |
| |
| if (stage->stage == MESA_SHADER_FRAGMENT) { |
| if (!stage->key.optimisations_disabled) { |
| NIR_PASS(_, stage->nir, nir_opt_cse); |
| } |
| NIR_PASS(_, stage->nir, radv_nir_lower_fs_intrinsics, stage, gfx_state); |
| } |
| |
| /* LLVM could support more of these in theory. */ |
| radv_nir_opt_tid_function_options tid_options = { |
| .use_masked_swizzle_amd = true, |
| .use_dpp16_shift_amd = !use_llvm && gfx_level >= GFX8, |
| .use_clustered_rotate = !use_llvm, |
| .hw_subgroup_size = stage->info.wave_size, |
| .hw_ballot_bit_size = stage->info.wave_size, |
| .hw_ballot_num_comp = 1, |
| }; |
| NIR_PASS(_, stage->nir, radv_nir_opt_tid_function, &tid_options); |
| |
| NIR_PASS(_, stage->nir, ac_nir_flag_smem_for_loads, gfx_level, use_llvm); |
| |
| NIR_PASS(_, stage->nir, nir_lower_memory_model); |
| |
| nir_load_store_vectorize_options vectorize_opts = { |
| .modes = nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_push_const | nir_var_mem_shared | nir_var_mem_global | |
| nir_var_shader_temp, |
| .callback = ac_nir_mem_vectorize_callback, |
| .cb_data = &(struct ac_nir_config){gfx_level, !use_llvm}, |
| .robust_modes = 0, |
| .bounds_checked_modes = nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_shared, |
| /* Only vectorize shared2 during late optimizations. */ |
| .has_shared2_amd = false, |
| }; |
| |
| if (stage->key.uniform_robustness2) |
| vectorize_opts.robust_modes |= nir_var_mem_ubo; |
| |
| if (stage->key.storage_robustness2) |
| vectorize_opts.robust_modes |= nir_var_mem_ssbo; |
| |
| bool constant_fold_for_push_const = false; |
| if (!stage->key.optimisations_disabled) { |
| progress = false; |
| NIR_PASS(progress, stage->nir, nir_opt_load_store_vectorize, &vectorize_opts); |
| if (progress) { |
| NIR_PASS(_, stage->nir, nir_opt_copy_prop); |
| NIR_PASS(_, stage->nir, nir_opt_shrink_stores, !compiler_info->cache_key->disable_shrink_image_store); |
| |
| constant_fold_for_push_const = true; |
| } |
| } |
| |
| enum nir_lower_non_uniform_access_type lower_non_uniform_access_types = |
| nir_lower_non_uniform_ubo_access | nir_lower_non_uniform_ssbo_access | nir_lower_non_uniform_texture_access | |
| nir_lower_non_uniform_image_access | nir_lower_non_uniform_texture_query | nir_lower_non_uniform_image_query; |
| |
| /* In practice, most shaders do not have non-uniform-qualified |
| * accesses (see |
| * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17558#note_1475069) |
| * thus a cheaper and likely to fail check is run first. |
| */ |
| if (nir_has_non_uniform_access(stage->nir, lower_non_uniform_access_types)) { |
| if (!stage->key.optimisations_disabled) { |
| NIR_PASS(_, stage->nir, nir_opt_non_uniform_access); |
| } |
| |
| if (!use_llvm) { |
| nir_lower_non_uniform_access_options options = { |
| .types = lower_non_uniform_access_types, |
| .callback = &non_uniform_access_callback, |
| .callback_data = NULL, |
| }; |
| NIR_PASS(_, stage->nir, nir_lower_non_uniform_access, &options); |
| } |
| } |
| |
| progress = false; |
| NIR_PASS(progress, stage->nir, ac_nir_lower_mem_access_bit_sizes, gfx_level, use_llvm); |
| if (progress) |
| constant_fold_for_push_const = true; |
| |
| NIR_PASS(_, stage->nir, ac_nir_lower_image_tex, |
| &(ac_nir_lower_image_tex_options){ |
| .gfx_level = gfx_level, |
| .lower_array_layer_round_even = |
| !compiler_info->ac->conformant_trunc_coord && !compiler_info->cache_key->disable_trunc_coord, |
| .fix_derivs_in_divergent_cf = stage->stage == MESA_SHADER_FRAGMENT && !use_llvm, |
| .max_wqm_vgprs = 64, // TODO: improve spiller and RA support for linear VGPRs |
| }); |
| |
| if (stage->nir->info.uses_resource_info_query) |
| NIR_PASS(_, stage->nir, ac_nir_lower_resinfo, gfx_level); |
| |
| /* Ensure split load_push_constant still have constant offsets, for radv_nir_lower_descriptors. */ |
| if (constant_fold_for_push_const && stage->args.ac.inline_push_const_mask) |
| NIR_PASS(_, stage->nir, nir_opt_constant_folding); |
| |
| /* Optimize NIR before NGG culling */ |
| bool is_last_vgt_stage = radv_is_last_vgt_stage(stage); |
| bool lowered_ngg = stage->info.is_ngg && is_last_vgt_stage; |
| if (lowered_ngg && stage->nir->info.stage != MESA_SHADER_GEOMETRY && stage->info.has_ngg_culling) |
| radv_optimize_nir_algebraic_early(stage->nir); |
| |
| /* This has to be done after nir_opt_algebraic for best descriptor vectorization, but also before |
| * NGG culling. |
| */ |
| NIR_PASS(_, stage->nir, radv_nir_lower_descriptors, compiler_info, stage); |
| |
| NIR_PASS(_, stage->nir, nir_lower_alu_width, ac_nir_opt_vectorize_cb, &gfx_level); |
| |
| nir_move_options sink_opts = nir_move_const_undef | nir_move_copies | nir_dont_move_byte_word_vecs; |
| |
| if (!stage->key.optimisations_disabled) { |
| NIR_PASS(_, stage->nir, nir_opt_licm); |
| |
| if (stage->stage == MESA_SHADER_VERTEX) { |
| /* Always load all VS inputs at the top to eliminate needless VMEM->s_wait->VMEM sequences. |
| * Each s_wait can cost 1000 cycles, so make sure all VS input loads are grouped. |
| */ |
| NIR_PASS(_, stage->nir, nir_opt_move_to_top, nir_move_to_top_input_loads); |
| NIR_PASS(_, stage->nir, nir_opt_sink, sink_opts); |
| NIR_PASS(_, stage->nir, nir_opt_move, sink_opts); |
| } else { |
| if (stage->stage != MESA_SHADER_FRAGMENT || !compiler_info->cache_key->disable_sinking_load_input_fs) |
| sink_opts |= nir_move_load_input | nir_move_load_frag_coord; |
| |
| NIR_PASS(_, stage->nir, nir_opt_sink, sink_opts); |
| NIR_PASS(_, stage->nir, nir_opt_move, sink_opts | nir_move_load_input | nir_move_load_frag_coord); |
| } |
| } |
| |
| /* Lower VS inputs. We need to do this after nir_opt_sink, because |
| * load_input can be reordered, but buffer loads can't. |
| */ |
| if (stage->stage == MESA_SHADER_VERTEX) { |
| NIR_PASS(_, stage->nir, radv_nir_lower_vs_inputs, compiler_info, stage, gfx_state); |
| } |
| |
| /* Lower I/O intrinsics to memory instructions. */ |
| bool io_to_mem = radv_nir_lower_io_to_mem(compiler_info, stage); |
| if (lowered_ngg) { |
| radv_lower_ngg(compiler_info, stage, gfx_state); |
| } else if (is_last_vgt_stage) { |
| if (stage->stage != MESA_SHADER_GEOMETRY) { |
| NIR_PASS(_, stage->nir, ac_nir_lower_legacy_vs, gfx_level, |
| stage->info.outinfo.clip_dist_mask | stage->info.outinfo.cull_dist_mask, false, |
| stage->info.outinfo.vs_output_param_offset, stage->info.outinfo.param_exports, |
| stage->info.outinfo.export_prim_id, false, stage->info.force_vrs_per_vertex); |
| |
| } else { |
| ac_nir_lower_legacy_gs_options options = { |
| .has_gen_prim_query = false, |
| .has_pipeline_stats_query = false, |
| .gfx_level = gfx_level, |
| .export_clipdist_mask = stage->info.outinfo.clip_dist_mask | stage->info.outinfo.cull_dist_mask, |
| .param_offsets = stage->info.outinfo.vs_output_param_offset, |
| .has_param_exports = stage->info.outinfo.param_exports, |
| .force_vrs = stage->info.force_vrs_per_vertex, |
| }; |
| ac_nir_legacy_gs_info info = {0}; |
| |
| NIR_PASS(_, stage->nir, ac_nir_lower_legacy_gs, &options, &stage->gs_copy_shader, &info); |
| |
| for (unsigned i = 0; i < 4; i++) |
| stage->info.gs.num_components_per_stream[i] = info.num_components_per_stream[i]; |
| } |
| } else if (stage->stage == MESA_SHADER_FRAGMENT) { |
| ac_nir_lower_ps_late_options late_options = { |
| .gfx_level = gfx_level, |
| .use_aco = !use_llvm, |
| .bc_optimize_for_persp = G_0286CC_PERSP_CENTER_ENA(stage->info.ps.spi_ps_input_ena) && |
| G_0286CC_PERSP_CENTROID_ENA(stage->info.ps.spi_ps_input_ena), |
| .bc_optimize_for_linear = G_0286CC_LINEAR_CENTER_ENA(stage->info.ps.spi_ps_input_ena) && |
| G_0286CC_LINEAR_CENTROID_ENA(stage->info.ps.spi_ps_input_ena), |
| .uses_discard = stage->info.ps.can_discard, |
| .dcc_decompress_gfx11 = gfx_state->dcc_decompress_gfx11, |
| .no_color_export = stage->info.ps.has_epilog, |
| .no_depth_export = stage->info.ps.exports_mrtz_via_epilog, |
| |
| }; |
| |
| if (!late_options.no_color_export) { |
| late_options.dual_src_blend = gfx_state->ps.epilog.mrt0_is_dual_src; |
| late_options.color_is_int8 = gfx_state->ps.epilog.color_is_int8; |
| late_options.color_is_int10 = gfx_state->ps.epilog.color_is_int10; |
| late_options.enable_mrt_output_nan_fixup = |
| gfx_state->ps.epilog.enable_mrt_output_nan_fixup && !stage->nir->info.internal; |
| /* Need to filter out unwritten color slots. */ |
| late_options.spi_shader_col_format = |
| gfx_state->ps.epilog.spi_shader_col_format & stage->info.ps.colors_written; |
| late_options.alpha_to_one = gfx_state->ps.epilog.alpha_to_one; |
| } |
| |
| if (!late_options.no_depth_export) { |
| /* Compared to gfx_state.ps.alpha_to_coverage_via_mrtz, |
| * radv_shader_info.ps.writes_mrt0_alpha need any depth/stencil/sample_mask exist. |
| * ac_nir_lower_ps() require this field to reflect whether alpha via mrtz is really |
| * present. |
| */ |
| late_options.alpha_to_coverage_via_mrtz = stage->info.ps.writes_mrt0_alpha; |
| } |
| |
| NIR_PASS(_, stage->nir, ac_nir_lower_ps_late, &late_options); |
| } |
| |
| if (radv_shader_should_clear_lds(compiler_info, stage->nir)) { |
| const unsigned chunk_size = 16; /* max single store size */ |
| const unsigned shared_size = align(stage->nir->info.shared_size, chunk_size); |
| NIR_PASS(_, stage->nir, nir_clear_shared_memory, shared_size, chunk_size); |
| } |
| |
| /* This must be after lowering resources to descriptor loads and before lowering intrinsics |
| * to args and lowering int64. |
| */ |
| if (!use_llvm) |
| ac_nir_optimize_uniform_atomics(stage->nir); |
| |
| NIR_PASS(_, stage->nir, nir_opt_uniform_subgroup, |
| &(struct nir_lower_subgroups_options){ |
| .subgroup_size = stage->info.wave_size, |
| .ballot_bit_size = stage->info.wave_size, |
| .ballot_components = 1, |
| .lower_ballot_bit_count_to_mbcnt_amd = true, |
| }); |
| |
| NIR_PASS(_, stage->nir, nir_opt_idiv_const, 8); |
| |
| NIR_PASS(_, stage->nir, nir_lower_idiv, |
| &(nir_lower_idiv_options){ |
| .allow_fp16 = gfx_level >= GFX9, |
| }); |
| |
| NIR_PASS( |
| _, stage->nir, ac_nir_lower_intrinsics_to_args, &stage->args.ac, |
| &(ac_nir_lower_intrinsics_to_args_options){ |
| .gfx_level = gfx_level, |
| .has_ls_vgpr_init_bug = compiler_info->ac->has_ls_vgpr_init_bug && gfx_state && !gfx_state->vs.has_prolog, |
| .hw_stage = radv_select_hw_stage(&stage->info, gfx_level), |
| .wave_size = stage->info.wave_size, |
| .workgroup_size = stage->info.workgroup_size, |
| .use_llvm = use_llvm, |
| .load_grid_size_from_user_sgpr = compiler_info->load_grid_size_from_user_sgpr, |
| }); |
| NIR_PASS(_, stage->nir, radv_nir_lower_abi, gfx_level, stage, gfx_state, compiler_info->hw.address32_hi); |
| |
| if (!stage->key.optimisations_disabled) { |
| NIR_PASS(_, stage->nir, nir_opt_dce); |
| |
| NIR_PASS(_, stage->nir, nir_opt_copy_prop); |
| NIR_PASS(_, stage->nir, nir_opt_constant_folding); |
| NIR_PASS(_, stage->nir, nir_opt_cse); |
| NIR_PASS(_, stage->nir, nir_opt_if, nir_opt_if_optimize_phi_true_false); |
| NIR_PASS(_, stage->nir, nir_opt_shrink_vectors, true); |
| |
| NIR_PASS(_, stage->nir, ac_nir_flag_smem_for_loads, gfx_level, use_llvm); |
| NIR_PASS(_, stage->nir, ac_nir_lower_mem_access_bit_sizes, gfx_level, use_llvm); |
| |
| nir_load_store_vectorize_options late_vectorize_opts = { |
| .modes = |
| nir_var_mem_global | nir_var_mem_shared | nir_var_shader_out | nir_var_mem_task_payload | nir_var_shader_in, |
| .callback = ac_nir_mem_vectorize_callback, |
| .cb_data = &(struct ac_nir_config){gfx_level, !use_llvm}, |
| .robust_modes = 0, |
| .bounds_checked_modes = nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_shared, |
| .has_shared2_amd = true, |
| }; |
| NIR_PASS(_, stage->nir, nir_opt_load_store_vectorize, &late_vectorize_opts); |
| } |
| |
| NIR_PASS(_, stage->nir, ac_nir_lower_mem_access_bit_sizes, gfx_level, use_llvm); |
| NIR_PASS(_, stage->nir, ac_nir_lower_global_access); |
| NIR_PASS(_, stage->nir, nir_lower_int64); |
| |
| if (compiler_info->cache_key->mitigate_smem_oob) |
| NIR_PASS(_, stage->nir, ac_nir_fixup_mem_access_gfx6, &stage->args.ac, 4096, true, true); |
| |
| bool opt_intrinsics = false; |
| if (gfx_level >= GFX11) |
| NIR_PASS(opt_intrinsics, stage->nir, ac_nir_opt_flip_if_for_mem_loads); |
| if (opt_intrinsics) /* optimize inot(inverse_ballot) */ |
| NIR_PASS(_, stage->nir, nir_opt_intrinsics); |
| |
| radv_optimize_nir_algebraic( |
| stage->nir, io_to_mem || lowered_ngg || stage->stage == MESA_SHADER_COMPUTE || stage->stage == MESA_SHADER_TASK, |
| gfx_level >= GFX8, gfx_level); |
| |
| if (stage->nir->info.cs.has_cooperative_matrix) |
| NIR_PASS(_, stage->nir, radv_nir_opt_cooperative_matrix, gfx_level); |
| |
| NIR_PASS(_, stage->nir, nir_lower_fp16_casts, nir_lower_fp16_split_fp64); |
| |
| if (ac_nir_might_lower_bit_size(stage->nir)) { |
| if (gfx_level >= GFX8) |
| nir_divergence_analysis(stage->nir); |
| |
| if (nir_lower_bit_size(stage->nir, ac_nir_lower_bit_size_callback, &gfx_level)) { |
| NIR_PASS(_, stage->nir, nir_opt_constant_folding); |
| } |
| } |
| if (gfx_level >= GFX9) { |
| bool separate_g16 = gfx_level >= GFX10; |
| struct nir_opt_tex_srcs_options opt_srcs_options[] = { |
| { |
| .sampler_dims = ~(BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) | BITFIELD_BIT(GLSL_SAMPLER_DIM_BUF)), |
| .src_types = (1 << nir_tex_src_coord) | (1 << nir_tex_src_lod) | (1 << nir_tex_src_bias) | |
| (1 << nir_tex_src_min_lod) | (1 << nir_tex_src_ms_index) | |
| (separate_g16 ? 0 : (1 << nir_tex_src_ddx) | (1 << nir_tex_src_ddy)), |
| }, |
| { |
| .sampler_dims = ~BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE), |
| .src_types = (1 << nir_tex_src_ddx) | (1 << nir_tex_src_ddy), |
| }, |
| }; |
| struct nir_opt_16bit_tex_image_options opt_16bit_options = { |
| .rounding_mode = nir_rounding_mode_undef, |
| .opt_tex_dest_types = nir_type_float | nir_type_int | nir_type_uint, |
| .opt_image_dest_types = nir_type_float | nir_type_int | nir_type_uint, |
| .integer_dest_saturates = true, |
| .opt_image_store_data = true, |
| .opt_image_srcs = true, |
| .opt_srcs_options_count = separate_g16 ? 2 : 1, |
| .opt_srcs_options = opt_srcs_options, |
| }; |
| bool run_copy_prop = false; |
| NIR_PASS(run_copy_prop, stage->nir, nir_opt_16bit_tex_image, &opt_16bit_options); |
| |
| /* Optimizing 16bit texture/image dests leaves scalar moves that need to be removed |
| * before the next alu nir_lower_alu_width, otherwise we might end up with invalid swizzles |
| * in the backend. |
| * It also allows nir_opt_vectorize to make more progress. |
| */ |
| if (run_copy_prop) { |
| NIR_PASS(_, stage->nir, nir_opt_copy_prop); |
| NIR_PASS(_, stage->nir, nir_opt_dce); |
| } |
| |
| if (!stage->key.optimisations_disabled) { |
| NIR_PASS(_, stage->nir, nir_opt_vectorize, ac_nir_opt_vectorize_cb, &gfx_level); |
| } |
| } |
| |
| /* cleanup passes */ |
| NIR_PASS(_, stage->nir, nir_lower_alu_width, ac_nir_opt_vectorize_cb, &gfx_level); |
| |
| NIR_PASS(_, stage->nir, nir_lower_load_const_to_scalar); |
| NIR_PASS(_, stage->nir, nir_opt_copy_prop); |
| NIR_PASS(_, stage->nir, nir_opt_dce); |
| |
| if (!stage->key.optimisations_disabled) { |
| sink_opts |= nir_move_comparisons | nir_move_load_ubo | nir_move_load_ssbo | nir_move_alu; |
| NIR_PASS(_, stage->nir, nir_opt_sink, sink_opts); |
| |
| nir_move_options move_opts = nir_move_const_undef | nir_move_load_ubo | nir_move_load_input | |
| nir_move_load_frag_coord | nir_move_comparisons | nir_move_copies | |
| nir_dont_move_byte_word_vecs | nir_move_alu; |
| NIR_PASS(_, stage->nir, nir_opt_move, move_opts); |
| |
| /* Run nir_opt_move again to make sure that comparision are as close as possible to the first use to prevent SCC |
| * spilling. |
| */ |
| NIR_PASS(_, stage->nir, nir_opt_move, nir_move_comparisons); |
| } |
| |
| stage->info.nir_shared_size = stage->nir->info.shared_size; |
| } |
| |
| bool |
| radv_shader_should_clear_lds(const struct radv_compiler_info *compiler_info, const nir_shader *shader) |
| { |
| return (shader->info.stage == MESA_SHADER_COMPUTE || shader->info.stage == MESA_SHADER_MESH || |
| shader->info.stage == MESA_SHADER_TASK) && |
| shader->info.shared_size > 0 && compiler_info->cache_key->clear_lds; |
| } |
| |
| static uint32_t |
| radv_get_executable_count(struct radv_pipeline *pipeline) |
| { |
| uint32_t ret = 0; |
| |
| if (pipeline->type == RADV_PIPELINE_RAY_TRACING) { |
| struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline); |
| for (uint32_t i = 0; i < rt_pipeline->stage_count; i++) |
| ret += rt_pipeline->stages[i].shader ? 1 : 0; |
| for (uint32_t i = 0; i < rt_pipeline->group_count; i++) |
| ret += rt_pipeline->groups[i].ahit_isec_shader ? 1 : 0; |
| } |
| |
| for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) { |
| if (!pipeline->shaders[i]) |
| continue; |
| |
| ret += 1u; |
| if (i == MESA_SHADER_GEOMETRY && pipeline->gs_copy_shader) { |
| ret += 1u; |
| } |
| } |
| |
| return ret; |
| } |
| |
| struct radv_shader * |
| radv_get_shader_from_executable_index(struct radv_pipeline *pipeline, int index, mesa_shader_stage *stage) |
| { |
| if (pipeline->type == RADV_PIPELINE_RAY_TRACING) { |
| struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline); |
| for (uint32_t i = 0; i < rt_pipeline->stage_count; i++) { |
| struct radv_ray_tracing_stage *rt_stage = &rt_pipeline->stages[i]; |
| if (!rt_stage->shader) |
| continue; |
| |
| if (!index) { |
| *stage = rt_stage->stage; |
| return rt_stage->shader; |
| } |
| |
| index--; |
| } |
| for (uint32_t i = 0; i < rt_pipeline->group_count; i++) { |
| struct radv_ray_tracing_group *rt_group = &rt_pipeline->groups[i]; |
| if (!rt_group->ahit_isec_shader) |
| continue; |
| |
| if (!index) { |
| *stage = |
| rt_group->intersection_shader != VK_SHADER_UNUSED_KHR ? MESA_SHADER_INTERSECTION : MESA_SHADER_ANY_HIT; |
| return rt_group->ahit_isec_shader; |
| } |
| |
| index--; |
| } |
| } |
| |
| for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) { |
| if (!pipeline->shaders[i]) |
| continue; |
| if (!index) { |
| *stage = i; |
| return pipeline->shaders[i]; |
| } |
| |
| --index; |
| |
| if (i == MESA_SHADER_GEOMETRY && pipeline->gs_copy_shader) { |
| if (!index) { |
| *stage = i; |
| return pipeline->gs_copy_shader; |
| } |
| --index; |
| } |
| } |
| |
| *stage = -1; |
| return NULL; |
| } |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| radv_GetPipelineExecutablePropertiesKHR(VkDevice _device, const VkPipelineInfoKHR *pPipelineInfo, |
| uint32_t *pExecutableCount, VkPipelineExecutablePropertiesKHR *pProperties) |
| { |
| VK_FROM_HANDLE(radv_pipeline, pipeline, pPipelineInfo->pipeline); |
| VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutablePropertiesKHR, out, pProperties, pExecutableCount); |
| |
| const uint32_t count = radv_get_executable_count(pipeline); |
| for (uint32_t executable_idx = 0; executable_idx < count; executable_idx++) { |
| VkPipelineExecutablePropertiesKHR *props = vk_outarray_next_typed(VkPipelineExecutablePropertiesKHR, &out); |
| if (!props) |
| continue; |
| |
| mesa_shader_stage stage; |
| struct radv_shader *shader = radv_get_shader_from_executable_index(pipeline, executable_idx, &stage); |
| |
| props->stages = mesa_to_vk_shader_stage(stage); |
| |
| const char *name = _mesa_shader_stage_to_string(stage); |
| const char *description = NULL; |
| switch (stage) { |
| case MESA_SHADER_VERTEX: |
| description = "Vulkan Vertex Shader"; |
| break; |
| case MESA_SHADER_TESS_CTRL: |
| if (!pipeline->shaders[MESA_SHADER_VERTEX]) { |
| props->stages |= VK_SHADER_STAGE_VERTEX_BIT; |
| name = "vertex + tessellation control"; |
| description = "Combined Vulkan Vertex and Tessellation Control Shaders"; |
| } else { |
| description = "Vulkan Tessellation Control Shader"; |
| } |
| break; |
| case MESA_SHADER_TESS_EVAL: |
| description = "Vulkan Tessellation Evaluation Shader"; |
| break; |
| case MESA_SHADER_GEOMETRY: |
| if (shader->info.type == RADV_SHADER_TYPE_GS_COPY) { |
| name = "geometry copy"; |
| description = "Extra shader stage that loads the GS output ringbuffer into the rasterizer"; |
| break; |
| } |
| |
| if (pipeline->shaders[MESA_SHADER_TESS_CTRL] && !pipeline->shaders[MESA_SHADER_TESS_EVAL]) { |
| props->stages |= VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; |
| name = "tessellation evaluation + geometry"; |
| description = "Combined Vulkan Tessellation Evaluation and Geometry Shaders"; |
| } else if (!pipeline->shaders[MESA_SHADER_TESS_CTRL] && !pipeline->shaders[MESA_SHADER_VERTEX]) { |
| props->stages |= VK_SHADER_STAGE_VERTEX_BIT; |
| name = "vertex + geometry"; |
| description = "Combined Vulkan Vertex and Geometry Shaders"; |
| } else { |
| description = "Vulkan Geometry Shader"; |
| } |
| break; |
| case MESA_SHADER_FRAGMENT: |
| description = "Vulkan Fragment Shader"; |
| break; |
| case MESA_SHADER_COMPUTE: |
| description = "Vulkan Compute Shader"; |
| break; |
| case MESA_SHADER_MESH: |
| description = "Vulkan Mesh Shader"; |
| break; |
| case MESA_SHADER_TASK: |
| description = "Vulkan Task Shader"; |
| break; |
| case MESA_SHADER_RAYGEN: |
| description = "Vulkan Ray Generation Shader"; |
| break; |
| case MESA_SHADER_ANY_HIT: |
| description = "Vulkan Any-Hit Shader"; |
| break; |
| case MESA_SHADER_CLOSEST_HIT: |
| description = "Vulkan Closest-Hit Shader"; |
| break; |
| case MESA_SHADER_MISS: |
| description = "Vulkan Miss Shader"; |
| break; |
| case MESA_SHADER_INTERSECTION: |
| if (shader->info.type == RADV_SHADER_TYPE_RT_TRAVERSAL) |
| description = "Shader responsible for traversing the acceleration structure"; |
| else |
| description = "Vulkan Intersection Shader"; |
| break; |
| case MESA_SHADER_CALLABLE: |
| description = "Vulkan Callable Shader"; |
| break; |
| default: |
| UNREACHABLE("Unsupported shader stage"); |
| } |
| |
| props->subgroupSize = shader->info.wave_size; |
| VK_COPY_STR(props->name, name); |
| VK_COPY_STR(props->description, description); |
| } |
| |
| return vk_outarray_status(&out); |
| } |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| radv_GetPipelineExecutableStatisticsKHR(VkDevice _device, const VkPipelineExecutableInfoKHR *pExecutableInfo, |
| uint32_t *pStatisticCount, VkPipelineExecutableStatisticKHR *pStatistics) |
| { |
| VK_FROM_HANDLE(radv_device, device, _device); |
| VK_FROM_HANDLE(radv_pipeline, pipeline, pExecutableInfo->pipeline); |
| mesa_shader_stage stage; |
| struct radv_shader *shader = |
| radv_get_shader_from_executable_index(pipeline, pExecutableInfo->executableIndex, &stage); |
| |
| const struct radv_physical_device *pdev = radv_device_physical(device); |
| const enum amd_gfx_level gfx_level = pdev->info.gfx_level; |
| |
| VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out, pStatistics, pStatisticCount); |
| |
| struct amd_stats stats = {0}; |
| if (shader->dbg.statistics) |
| stats = *shader->dbg.statistics; |
| stats.driverhash = pipeline->pipeline_hash; |
| stats.sgprs = shader->config.num_sgprs; |
| stats.vgprs = shader->config.num_vgprs; |
| stats.spillsgprs = shader->config.spilled_sgprs; |
| stats.spillvgprs = shader->config.spilled_vgprs; |
| stats.codesize = shader->exec_size; |
| stats.lds = align(shader->config.lds_size, ac_shader_get_lds_alloc_granularity(gfx_level)); |
| stats.scratch = shader->config.scratch_bytes_per_wave; |
| stats.maxwaves = shader->max_waves; |
| |
| switch (stage) { |
| case MESA_SHADER_VERTEX: |
| if (gfx_level <= GFX8 || (!shader->info.vs.as_es && !shader->info.vs.as_ls)) { |
| /* VS inputs when VS is a separate stage */ |
| stats.inputs += util_bitcount(shader->info.vs.input_slot_usage_mask); |
| } |
| break; |
| |
| case MESA_SHADER_TESS_CTRL: |
| if (gfx_level >= GFX9) { |
| /* VS inputs when pipeline has tess */ |
| stats.inputs += util_bitcount(shader->info.vs.input_slot_usage_mask); |
| } |
| |
| /* VS -> TCS inputs */ |
| stats.inputs += shader->info.tcs.num_linked_inputs; |
| break; |
| |
| case MESA_SHADER_TESS_EVAL: |
| if (gfx_level <= GFX8 || !shader->info.tes.as_es) { |
| /* TCS -> TES inputs when TES is a separate stage */ |
| stats.inputs += shader->info.tes.num_linked_inputs + shader->info.tes.num_linked_patch_inputs; |
| } |
| break; |
| |
| case MESA_SHADER_GEOMETRY: |
| /* The IO stats of the GS copy shader are already reflected by GS and FS, so leave it empty. */ |
| if (shader->info.type == RADV_SHADER_TYPE_GS_COPY) |
| break; |
| |
| if (gfx_level >= GFX9) { |
| if (shader->info.gs.es_type == MESA_SHADER_VERTEX) { |
| /* VS inputs when pipeline has GS but no tess */ |
| stats.inputs += util_bitcount(shader->info.vs.input_slot_usage_mask); |
| } else if (shader->info.gs.es_type == MESA_SHADER_TESS_EVAL) { |
| /* TCS -> TES inputs when pipeline has GS */ |
| stats.inputs += shader->info.tes.num_linked_inputs + shader->info.tes.num_linked_patch_inputs; |
| } |
| } |
| |
| /* VS -> GS or TES -> GS inputs */ |
| stats.inputs += shader->info.gs.num_linked_inputs; |
| break; |
| |
| case MESA_SHADER_FRAGMENT: |
| stats.inputs += shader->info.ps.num_inputs; |
| break; |
| |
| default: |
| /* Other stages don't have IO or we are not interested in them. */ |
| break; |
| } |
| |
| switch (stage) { |
| case MESA_SHADER_VERTEX: |
| if (!shader->info.vs.as_ls && !shader->info.vs.as_es) { |
| /* VS -> FS outputs. */ |
| stats.outputs += shader->info.outinfo.param_exports + shader->info.outinfo.prim_param_exports; |
| } else if (gfx_level <= GFX8) { |
| /* VS -> TCS, VS -> GS outputs on GFX6-8 */ |
| stats.outputs += shader->info.vs.num_linked_outputs; |
| } |
| break; |
| |
| case MESA_SHADER_TESS_CTRL: |
| if (gfx_level >= GFX9) { |
| /* VS -> TCS outputs on GFX9+ */ |
| stats.outputs += shader->info.vs.num_linked_outputs; |
| } |
| |
| /* TCS -> TES outputs */ |
| stats.outputs += shader->info.tcs.io_info.highest_remapped_vram_output + |
| shader->info.tcs.io_info.highest_remapped_vram_patch_output; |
| break; |
| |
| case MESA_SHADER_TESS_EVAL: |
| if (!shader->info.tes.as_es) { |
| /* TES -> FS outputs */ |
| stats.outputs += shader->info.outinfo.param_exports + shader->info.outinfo.prim_param_exports; |
| } else if (gfx_level <= GFX8) { |
| /* TES -> GS outputs on GFX6-8 */ |
| stats.outputs += shader->info.tes.num_linked_outputs; |
| } |
| break; |
| |
| case MESA_SHADER_GEOMETRY: |
| /* The IO stats of the GS copy shader are already reflected by GS and FS, so leave it empty. */ |
| if (shader->info.type == RADV_SHADER_TYPE_GS_COPY) |
| break; |
| |
| if (gfx_level >= GFX9) { |
| if (shader->info.gs.es_type == MESA_SHADER_VERTEX) { |
| /* VS -> GS outputs on GFX9+ */ |
| stats.outputs += shader->info.vs.num_linked_outputs; |
| } else if (shader->info.gs.es_type == MESA_SHADER_TESS_EVAL) { |
| /* TES -> GS outputs on GFX9+ */ |
| stats.outputs += shader->info.tes.num_linked_outputs; |
| } |
| } |
| |
| if (shader->info.is_ngg) { |
| /* GS -> FS outputs (GFX10+ NGG) */ |
| stats.outputs += shader->info.outinfo.param_exports + shader->info.outinfo.prim_param_exports; |
| } else { |
| /* GS -> FS outputs (GFX6-10.3 legacy) */ |
| stats.outputs += DIV_ROUND_UP(((uint32_t)shader->info.gs.num_components_per_stream[0] + |
| (uint32_t)shader->info.gs.num_components_per_stream[1] + |
| (uint32_t)shader->info.gs.num_components_per_stream[2] + |
| (uint32_t)shader->info.gs.num_components_per_stream[3]) * |
| 4, |
| 16); |
| } |
| break; |
| |
| case MESA_SHADER_MESH: |
| /* MS -> FS outputs */ |
| stats.outputs += shader->info.outinfo.param_exports + shader->info.outinfo.prim_param_exports; |
| break; |
| |
| case MESA_SHADER_FRAGMENT: |
| stats.outputs += DIV_ROUND_UP(util_bitcount(shader->info.ps.colors_written), 4) + !!shader->info.ps.writes_z + |
| !!shader->info.ps.writes_stencil + !!shader->info.ps.writes_sample_mask + |
| !!shader->info.ps.writes_mrt0_alpha; |
| break; |
| |
| default: |
| /* Other stages don't have IO or we are not interested in them. */ |
| break; |
| } |
| |
| vk_add_amd_stats(out, &stats); |
| |
| return vk_outarray_status(&out); |
| } |
| |
| static VkResult |
| radv_copy_representation(void *data, size_t *data_size, const char *src) |
| { |
| size_t total_size = strlen(src) + 1; |
| |
| if (!data) { |
| *data_size = total_size; |
| return VK_SUCCESS; |
| } |
| |
| size_t size = MIN2(total_size, *data_size); |
| |
| memcpy(data, src, size); |
| if (size) |
| *((char *)data + size - 1) = 0; |
| return size < total_size ? VK_INCOMPLETE : VK_SUCCESS; |
| } |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| radv_GetPipelineExecutableInternalRepresentationsKHR( |
| VkDevice _device, const VkPipelineExecutableInfoKHR *pExecutableInfo, uint32_t *pInternalRepresentationCount, |
| VkPipelineExecutableInternalRepresentationKHR *pInternalRepresentations) |
| { |
| VK_FROM_HANDLE(radv_device, device, _device); |
| VK_FROM_HANDLE(radv_pipeline, pipeline, pExecutableInfo->pipeline); |
| const struct radv_physical_device *pdev = radv_device_physical(device); |
| mesa_shader_stage stage; |
| struct radv_shader *shader = |
| radv_get_shader_from_executable_index(pipeline, pExecutableInfo->executableIndex, &stage); |
| |
| VkPipelineExecutableInternalRepresentationKHR *p = pInternalRepresentations; |
| VkPipelineExecutableInternalRepresentationKHR *end = |
| p + (pInternalRepresentations ? *pInternalRepresentationCount : 0); |
| VkResult result = VK_SUCCESS; |
| /* optimized NIR */ |
| if (p < end) { |
| p->isText = true; |
| VK_COPY_STR(p->name, "NIR Shader(s)"); |
| VK_COPY_STR(p->description, "The optimized NIR shader(s)"); |
| if (radv_copy_representation(p->pData, &p->dataSize, shader->dbg.nir_string) != VK_SUCCESS) |
| result = VK_INCOMPLETE; |
| } |
| ++p; |
| |
| /* backend IR */ |
| if (p < end) { |
| p->isText = true; |
| if (pdev->use_llvm) { |
| VK_COPY_STR(p->name, "LLVM IR"); |
| VK_COPY_STR(p->description, "The LLVM IR after some optimizations"); |
| } else { |
| VK_COPY_STR(p->name, "ACO IR"); |
| VK_COPY_STR(p->description, "The ACO IR after some optimizations"); |
| } |
| if (radv_copy_representation(p->pData, &p->dataSize, shader->dbg.ir_string) != VK_SUCCESS) |
| result = VK_INCOMPLETE; |
| } |
| ++p; |
| |
| /* Disassembler */ |
| if (p < end && shader->dbg.disasm_string) { |
| p->isText = true; |
| VK_COPY_STR(p->name, "Assembly"); |
| VK_COPY_STR(p->description, "Final Assembly"); |
| if (radv_copy_representation(p->pData, &p->dataSize, shader->dbg.disasm_string) != VK_SUCCESS) |
| result = VK_INCOMPLETE; |
| } |
| ++p; |
| |
| if (!pInternalRepresentations) |
| *pInternalRepresentationCount = p - pInternalRepresentations; |
| else if (p > end) { |
| result = VK_INCOMPLETE; |
| *pInternalRepresentationCount = end - pInternalRepresentations; |
| } else { |
| *pInternalRepresentationCount = p - pInternalRepresentations; |
| } |
| |
| return result; |
| } |
| |
| static void |
| vk_shader_module_finish(void *_module) |
| { |
| struct vk_shader_module *module = _module; |
| vk_object_base_finish(&module->base); |
| } |
| |
| VkShaderDescriptorSetAndBindingMappingInfoEXT * |
| radv_copy_descriptor_heap_mapping_info(const VkShaderDescriptorSetAndBindingMappingInfoEXT *mapping, void *mem_ctx) |
| { |
| VkShaderDescriptorSetAndBindingMappingInfoEXT *new_mapping = |
| ralloc(mem_ctx, VkShaderDescriptorSetAndBindingMappingInfoEXT); |
| if (!new_mapping) |
| return NULL; |
| |
| new_mapping->sType = mapping->sType; |
| new_mapping->pNext = NULL; |
| new_mapping->mappingCount = mapping->mappingCount; |
| |
| const uint32_t mappings_size = sizeof(VkDescriptorSetAndBindingMappingEXT) * mapping->mappingCount; |
| new_mapping->pMappings = ralloc_size(mem_ctx, mappings_size); |
| if (!new_mapping->pMappings) |
| return NULL; |
| |
| memcpy((void *)new_mapping->pMappings, mapping->pMappings, mappings_size); |
| |
| return new_mapping; |
| } |
| |
| VkPipelineShaderStageCreateInfo * |
| radv_copy_shader_stage_create_info(struct radv_device *device, uint32_t stageCount, |
| const VkPipelineShaderStageCreateInfo *pStages, void *mem_ctx) |
| { |
| VkPipelineShaderStageCreateInfo *new_stages; |
| |
| size_t size = sizeof(VkPipelineShaderStageCreateInfo) * stageCount; |
| new_stages = ralloc_size(mem_ctx, size); |
| if (!new_stages) |
| return NULL; |
| |
| if (size) |
| memcpy(new_stages, pStages, size); |
| |
| for (uint32_t i = 0; i < stageCount; i++) { |
| VK_FROM_HANDLE(vk_shader_module, module, new_stages[i].module); |
| |
| const VkShaderModuleCreateInfo *minfo = vk_find_struct_const(pStages[i].pNext, SHADER_MODULE_CREATE_INFO); |
| |
| if (module) { |
| struct vk_shader_module *new_module = ralloc_size(mem_ctx, sizeof(struct vk_shader_module) + module->size); |
| if (!new_module) |
| return NULL; |
| |
| ralloc_set_destructor(new_module, vk_shader_module_finish); |
| vk_object_base_init(&device->vk, &new_module->base, VK_OBJECT_TYPE_SHADER_MODULE); |
| |
| new_module->nir = NULL; |
| memcpy(new_module->hash, module->hash, sizeof(module->hash)); |
| new_module->size = module->size; |
| memcpy(new_module->data, module->data, module->size); |
| |
| module = new_module; |
| } else if (minfo) { |
| module = ralloc_size(mem_ctx, sizeof(struct vk_shader_module) + minfo->codeSize); |
| if (!module) |
| return NULL; |
| |
| vk_shader_module_init(&device->vk, module, minfo); |
| } |
| |
| if (module) { |
| const VkSpecializationInfo *spec = new_stages[i].pSpecializationInfo; |
| if (spec) { |
| VkSpecializationInfo *new_spec = ralloc(mem_ctx, VkSpecializationInfo); |
| if (!new_spec) |
| return NULL; |
| |
| new_spec->mapEntryCount = spec->mapEntryCount; |
| uint32_t map_entries_size = sizeof(VkSpecializationMapEntry) * spec->mapEntryCount; |
| new_spec->pMapEntries = ralloc_size(mem_ctx, map_entries_size); |
| if (!new_spec->pMapEntries) |
| return NULL; |
| memcpy((void *)new_spec->pMapEntries, spec->pMapEntries, map_entries_size); |
| |
| new_spec->dataSize = spec->dataSize; |
| new_spec->pData = ralloc_size(mem_ctx, spec->dataSize); |
| if (!new_spec->pData) |
| return NULL; |
| memcpy((void *)new_spec->pData, spec->pData, spec->dataSize); |
| |
| new_stages[i].pSpecializationInfo = new_spec; |
| } |
| |
| new_stages[i].module = vk_shader_module_to_handle(module); |
| new_stages[i].pName = ralloc_strdup(mem_ctx, new_stages[i].pName); |
| if (!new_stages[i].pName) |
| return NULL; |
| new_stages[i].pNext = NULL; |
| } |
| |
| const VkShaderDescriptorSetAndBindingMappingInfoEXT *mapping = |
| vk_find_struct_const(pStages[i].pNext, SHADER_DESCRIPTOR_SET_AND_BINDING_MAPPING_INFO_EXT); |
| if (mapping) { |
| VkShaderDescriptorSetAndBindingMappingInfoEXT *copied_mapping = |
| radv_copy_descriptor_heap_mapping_info(mapping, mem_ctx); |
| if (!copied_mapping) |
| return NULL; |
| |
| new_stages[i].pNext = copied_mapping; |
| } |
| } |
| |
| return new_stages; |
| } |
| |
| void |
| radv_pipeline_hash(const struct radv_device *device, const struct radv_pipeline_layout *pipeline_layout, |
| blake3_hasher *ctx) |
| { |
| _mesa_blake3_update(ctx, device->cache_hash, sizeof(device->cache_hash)); |
| if (pipeline_layout) |
| _mesa_blake3_update(ctx, pipeline_layout->hash, sizeof(pipeline_layout->hash)); |
| } |
| |
| void |
| radv_pipeline_hash_shader_stage(VkPipelineCreateFlags2 pipeline_flags, const VkPipelineShaderStageCreateInfo *sinfo, |
| const struct radv_shader_stage_key *stage_key, blake3_hasher *ctx) |
| { |
| unsigned char shader_blake3[BLAKE3_KEY_LEN]; |
| |
| vk_pipeline_hash_shader_stage(pipeline_flags, sinfo, NULL, shader_blake3); |
| |
| _mesa_blake3_update(ctx, shader_blake3, sizeof(shader_blake3)); |
| _mesa_blake3_update(ctx, stage_key, sizeof(*stage_key)); |
| } |
| |
| static void |
| radv_print_pso_history(const struct radv_pipeline *pipeline, const struct radv_shader *shader, FILE *output) |
| { |
| const uint64_t start_addr = radv_shader_get_va(shader) & ((1ull << 48) - 1); |
| const uint64_t end_addr = start_addr + shader->code_size; |
| |
| fprintf(output, "pipeline_hash=%.16llx, VA=%.16llx-%.16llx, stage=%s\n", (long long)pipeline->pipeline_hash, |
| (long long)start_addr, (long long)end_addr, _mesa_shader_stage_to_string(shader->info.stage)); |
| fflush(output); |
| } |
| |
| void |
| radv_pipeline_report_pso_history(const struct radv_device *device, struct radv_pipeline *pipeline) |
| { |
| const struct radv_physical_device *pdev = radv_device_physical(device); |
| const struct radv_instance *instance = radv_physical_device_instance(pdev); |
| FILE *output = instance->pso_history_logfile ? instance->pso_history_logfile : stderr; |
| |
| if (!(instance->debug_flags & RADV_DEBUG_PSO_HISTORY)) |
| return; |
| |
| /* Only report PSO history for application pipelines. */ |
| if (pipeline->is_internal) |
| return; |
| |
| switch (pipeline->type) { |
| case RADV_PIPELINE_GRAPHICS: |
| for (uint32_t i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) { |
| const struct radv_shader *shader = pipeline->shaders[i]; |
| |
| if (shader) |
| radv_print_pso_history(pipeline, shader, output); |
| } |
| |
| if (pipeline->gs_copy_shader) |
| radv_print_pso_history(pipeline, pipeline->gs_copy_shader, output); |
| break; |
| case RADV_PIPELINE_COMPUTE: |
| radv_print_pso_history(pipeline, pipeline->shaders[MESA_SHADER_COMPUTE], output); |
| break; |
| case RADV_PIPELINE_RAY_TRACING: { |
| struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline); |
| |
| if (rt_pipeline->prolog) |
| radv_print_pso_history(pipeline, rt_pipeline->prolog, output); |
| |
| if (pipeline->shaders[MESA_SHADER_INTERSECTION]) |
| radv_print_pso_history(pipeline, pipeline->shaders[MESA_SHADER_INTERSECTION], output); |
| |
| for (uint32_t i = 0; i < rt_pipeline->non_imported_stage_count; i++) { |
| const struct radv_shader *shader = rt_pipeline->stages[i].shader; |
| |
| if (shader) |
| radv_print_pso_history(pipeline, shader, output); |
| } |
| break; |
| } |
| default: |
| break; |
| } |
| } |