blob: 7a4d8067b57454bc282d980b4b0da56fe8988376 [file] [log] [blame]
/*
* Copyright © 2025 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#include "brw_nir.h"
#include "intel_nir.h"
#include "intel_nir.h"
#include "compiler/nir/nir_builder.h"
#include "compiler/spirv/nir_spirv.h"
#include "compiler/spirv/spirv_info.h"
#include "dev/intel_debug.h"
#include "util/u_dynarray.h"
static void
optimize(nir_shader *nir)
{
bool progress;
do {
progress = false;
NIR_PASS(progress, nir, nir_split_var_copies);
NIR_PASS(progress, nir, nir_split_struct_vars, nir_var_function_temp);
NIR_PASS(progress, nir, nir_lower_var_copies);
NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
NIR_PASS(progress, nir, nir_copy_prop);
NIR_PASS(progress, nir, nir_opt_remove_phis);
NIR_PASS(progress, nir, nir_lower_all_phis_to_scalar);
NIR_PASS(progress, nir, nir_opt_dce);
NIR_PASS(progress, nir, nir_opt_dead_cf);
NIR_PASS(progress, nir, nir_opt_cse);
nir_opt_peephole_select_options peephole_select_options = {
.limit = 64,
.expensive_alu_ok = true,
};
NIR_PASS(progress, nir, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS(progress, nir, nir_opt_phi_precision);
NIR_PASS(progress, nir, nir_opt_algebraic);
NIR_PASS(progress, nir, nir_opt_constant_folding);
NIR_PASS(progress, nir, nir_opt_deref);
NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
NIR_PASS(progress, nir, nir_opt_undef);
NIR_PASS(progress, nir, nir_lower_undef_to_zero);
NIR_PASS(progress, nir, nir_opt_shrink_vectors, true);
NIR_PASS(progress, nir, nir_opt_loop_unroll);
} while (progress);
}
nir_shader *
brw_nir_from_spirv(void *mem_ctx, const uint32_t *spirv, size_t spirv_size)
{
static const struct spirv_capabilities spirv_caps = {
.Addresses = true,
.Float16 = true,
.Float64 = true,
.Groups = true,
.StorageImageWriteWithoutFormat = true,
.Int8 = true,
.Int16 = true,
.Int64 = true,
.Int64Atomics = true,
.Kernel = true,
.Linkage = true, /* We receive linked kernel from clc */
.DenormFlushToZero = true,
.DenormPreserve = true,
.SignedZeroInfNanPreserve = true,
.RoundingModeRTE = true,
.RoundingModeRTZ = true,
.GenericPointer = true,
.GroupNonUniform = true,
.GroupNonUniformArithmetic = true,
.GroupNonUniformClustered = true,
.GroupNonUniformBallot = true,
.GroupNonUniformQuad = true,
.GroupNonUniformShuffle = true,
.GroupNonUniformVote = true,
.SubgroupDispatch = true,
};
struct spirv_to_nir_options spirv_options = {
.environment = NIR_SPIRV_OPENCL,
.capabilities = &spirv_caps,
.printf = true,
.shared_addr_format = nir_address_format_62bit_generic,
.global_addr_format = nir_address_format_62bit_generic,
.temp_addr_format = nir_address_format_62bit_generic,
.constant_addr_format = nir_address_format_64bit_global,
.create_library = true,
};
assert(spirv_size % 4 == 0);
const nir_shader_compiler_options *nir_options = &brw_scalar_nir_options;
nir_shader *nir =
spirv_to_nir(spirv, spirv_size / 4, NULL, 0, MESA_SHADER_KERNEL,
"library", &spirv_options, nir_options);
nir_validate_shader(nir, "after spirv_to_nir");
nir_validate_ssa_dominance(nir, "after spirv_to_nir");
ralloc_steal(mem_ctx, nir);
nir->info.name = ralloc_strdup(nir, "library");
nir_fixup_is_exported(nir);
NIR_PASS(_, nir, nir_lower_system_values);
NIR_PASS(_, nir, nir_lower_calls_to_builtins);
NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_function_temp);
NIR_PASS(_, nir, nir_lower_returns);
NIR_PASS(_, nir, nir_inline_functions);
//nir_remove_non_exported(nir);
NIR_PASS(_, nir, nir_copy_prop);
NIR_PASS(_, nir, nir_opt_deref);
/* We can't deal with constant data, get rid of it */
nir_lower_constant_to_temp(nir);
/* We can go ahead and lower the rest of the constant initializers. We do
* this here so that nir_remove_dead_variables and split_per_member_structs
* below see the corresponding stores.
*/
NIR_PASS(_, nir, nir_lower_variable_initializers, ~0);
/* LLVM loves take advantage of the fact that vec3s in OpenCL are 16B
* aligned and so it can just read/write them as vec4s. This results in a
* LOT of vec4->vec3 casts on loads and stores. One solution to this
* problem is to get rid of all vec3 variables.
*/
NIR_PASS(_, nir, nir_lower_vec3_to_vec4,
nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
nir_var_mem_global | nir_var_mem_constant);
/* We assign explicit types early so that the optimizer can take advantage
* of that information and hopefully get rid of some of our memcpys.
*/
NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
nir_var_uniform | nir_var_shader_temp | nir_var_function_temp |
nir_var_mem_shared | nir_var_mem_global,
glsl_get_cl_type_size_align);
optimize(nir);
NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_all, NULL);
/* Lower again, this time after dead-variables to get more compact variable
* layouts.
*/
NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
nir_var_mem_global | nir_var_mem_constant,
glsl_get_cl_type_size_align);
assert(nir->constant_data_size == 0);
NIR_PASS(_, nir, nir_lower_memcpy);
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_constant,
nir_address_format_64bit_global);
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_uniform,
nir_address_format_64bit_global);
/* Note: we cannot lower explicit I/O here, because we need derefs in tact
* for function calls into the library to work.
*/
NIR_PASS(_, nir, nir_lower_convert_alu_types, NULL);
NIR_PASS(_, nir, nir_opt_if, 0);
NIR_PASS(_, nir, nir_opt_idiv_const, 16);
optimize(nir);
return nir;
}