| /* |
| * Copyright © 2025 Intel Corporation |
| * SPDX-License-Identifier: MIT |
| */ |
| |
| #include "brw_nir.h" |
| #include "intel_nir.h" |
| |
| #include "intel_nir.h" |
| #include "compiler/nir/nir_builder.h" |
| #include "compiler/spirv/nir_spirv.h" |
| #include "compiler/spirv/spirv_info.h" |
| #include "dev/intel_debug.h" |
| #include "util/u_dynarray.h" |
| |
| static void |
| optimize(nir_shader *nir) |
| { |
| bool progress; |
| do { |
| progress = false; |
| |
| NIR_PASS(progress, nir, nir_split_var_copies); |
| NIR_PASS(progress, nir, nir_split_struct_vars, nir_var_function_temp); |
| NIR_PASS(progress, nir, nir_lower_var_copies); |
| NIR_PASS(progress, nir, nir_lower_vars_to_ssa); |
| |
| NIR_PASS(progress, nir, nir_copy_prop); |
| NIR_PASS(progress, nir, nir_opt_remove_phis); |
| NIR_PASS(progress, nir, nir_lower_all_phis_to_scalar); |
| NIR_PASS(progress, nir, nir_opt_dce); |
| NIR_PASS(progress, nir, nir_opt_dead_cf); |
| NIR_PASS(progress, nir, nir_opt_cse); |
| |
| nir_opt_peephole_select_options peephole_select_options = { |
| .limit = 64, |
| .expensive_alu_ok = true, |
| }; |
| NIR_PASS(progress, nir, nir_opt_peephole_select, &peephole_select_options); |
| NIR_PASS(progress, nir, nir_opt_phi_precision); |
| NIR_PASS(progress, nir, nir_opt_algebraic); |
| NIR_PASS(progress, nir, nir_opt_constant_folding); |
| |
| NIR_PASS(progress, nir, nir_opt_deref); |
| NIR_PASS(progress, nir, nir_opt_copy_prop_vars); |
| NIR_PASS(progress, nir, nir_opt_undef); |
| NIR_PASS(progress, nir, nir_lower_undef_to_zero); |
| |
| NIR_PASS(progress, nir, nir_opt_shrink_vectors, true); |
| NIR_PASS(progress, nir, nir_opt_loop_unroll); |
| |
| } while (progress); |
| } |
| |
| nir_shader * |
| brw_nir_from_spirv(void *mem_ctx, const uint32_t *spirv, size_t spirv_size) |
| { |
| static const struct spirv_capabilities spirv_caps = { |
| .Addresses = true, |
| .Float16 = true, |
| .Float64 = true, |
| .Groups = true, |
| .StorageImageWriteWithoutFormat = true, |
| .Int8 = true, |
| .Int16 = true, |
| .Int64 = true, |
| .Int64Atomics = true, |
| .Kernel = true, |
| .Linkage = true, /* We receive linked kernel from clc */ |
| .DenormFlushToZero = true, |
| .DenormPreserve = true, |
| .SignedZeroInfNanPreserve = true, |
| .RoundingModeRTE = true, |
| .RoundingModeRTZ = true, |
| .GenericPointer = true, |
| .GroupNonUniform = true, |
| .GroupNonUniformArithmetic = true, |
| .GroupNonUniformClustered = true, |
| .GroupNonUniformBallot = true, |
| .GroupNonUniformQuad = true, |
| .GroupNonUniformShuffle = true, |
| .GroupNonUniformVote = true, |
| .SubgroupDispatch = true, |
| }; |
| struct spirv_to_nir_options spirv_options = { |
| .environment = NIR_SPIRV_OPENCL, |
| .capabilities = &spirv_caps, |
| .printf = true, |
| .shared_addr_format = nir_address_format_62bit_generic, |
| .global_addr_format = nir_address_format_62bit_generic, |
| .temp_addr_format = nir_address_format_62bit_generic, |
| .constant_addr_format = nir_address_format_64bit_global, |
| .create_library = true, |
| }; |
| |
| assert(spirv_size % 4 == 0); |
| |
| const nir_shader_compiler_options *nir_options = &brw_scalar_nir_options; |
| |
| nir_shader *nir = |
| spirv_to_nir(spirv, spirv_size / 4, NULL, 0, MESA_SHADER_KERNEL, |
| "library", &spirv_options, nir_options); |
| nir_validate_shader(nir, "after spirv_to_nir"); |
| nir_validate_ssa_dominance(nir, "after spirv_to_nir"); |
| ralloc_steal(mem_ctx, nir); |
| nir->info.name = ralloc_strdup(nir, "library"); |
| |
| nir_fixup_is_exported(nir); |
| |
| NIR_PASS(_, nir, nir_lower_system_values); |
| NIR_PASS(_, nir, nir_lower_calls_to_builtins); |
| |
| NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_function_temp); |
| NIR_PASS(_, nir, nir_lower_returns); |
| NIR_PASS(_, nir, nir_inline_functions); |
| //nir_remove_non_exported(nir); |
| NIR_PASS(_, nir, nir_copy_prop); |
| NIR_PASS(_, nir, nir_opt_deref); |
| |
| /* We can't deal with constant data, get rid of it */ |
| nir_lower_constant_to_temp(nir); |
| |
| /* We can go ahead and lower the rest of the constant initializers. We do |
| * this here so that nir_remove_dead_variables and split_per_member_structs |
| * below see the corresponding stores. |
| */ |
| NIR_PASS(_, nir, nir_lower_variable_initializers, ~0); |
| |
| /* LLVM loves take advantage of the fact that vec3s in OpenCL are 16B |
| * aligned and so it can just read/write them as vec4s. This results in a |
| * LOT of vec4->vec3 casts on loads and stores. One solution to this |
| * problem is to get rid of all vec3 variables. |
| */ |
| NIR_PASS(_, nir, nir_lower_vec3_to_vec4, |
| nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared | |
| nir_var_mem_global | nir_var_mem_constant); |
| |
| /* We assign explicit types early so that the optimizer can take advantage |
| * of that information and hopefully get rid of some of our memcpys. |
| */ |
| NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, |
| nir_var_uniform | nir_var_shader_temp | nir_var_function_temp | |
| nir_var_mem_shared | nir_var_mem_global, |
| glsl_get_cl_type_size_align); |
| |
| optimize(nir); |
| |
| NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_all, NULL); |
| |
| /* Lower again, this time after dead-variables to get more compact variable |
| * layouts. |
| */ |
| NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, |
| nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared | |
| nir_var_mem_global | nir_var_mem_constant, |
| glsl_get_cl_type_size_align); |
| assert(nir->constant_data_size == 0); |
| |
| NIR_PASS(_, nir, nir_lower_memcpy); |
| |
| NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_constant, |
| nir_address_format_64bit_global); |
| |
| NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_uniform, |
| nir_address_format_64bit_global); |
| |
| /* Note: we cannot lower explicit I/O here, because we need derefs in tact |
| * for function calls into the library to work. |
| */ |
| |
| NIR_PASS(_, nir, nir_lower_convert_alu_types, NULL); |
| NIR_PASS(_, nir, nir_opt_if, 0); |
| NIR_PASS(_, nir, nir_opt_idiv_const, 16); |
| |
| optimize(nir); |
| |
| return nir; |
| } |