src/intel/compiler/brw_spirv.c - third_party/mesa - Git at Google

 /*
  * Copyright © 2025 Intel Corporation
  * SPDX-License-Identifier: MIT
  */

 #include "brw_nir.h"
 #include "intel_nir.h"

 #include "intel_nir.h"
 #include "compiler/nir/nir_builder.h"
 #include "compiler/spirv/nir_spirv.h"
 #include "compiler/spirv/spirv_info.h"
 #include "dev/intel_debug.h"
 #include "util/u_dynarray.h"

 static void
 optimize(nir_shader *nir)
 {
    bool progress;
    do {
       progress = false;

       NIR_PASS(progress, nir, nir_split_var_copies);
       NIR_PASS(progress, nir, nir_split_struct_vars, nir_var_function_temp);
       NIR_PASS(progress, nir, nir_lower_var_copies);
       NIR_PASS(progress, nir, nir_lower_vars_to_ssa);

       NIR_PASS(progress, nir, nir_copy_prop);
       NIR_PASS(progress, nir, nir_opt_remove_phis);
       NIR_PASS(progress, nir, nir_lower_all_phis_to_scalar);
       NIR_PASS(progress, nir, nir_opt_dce);
       NIR_PASS(progress, nir, nir_opt_dead_cf);
       NIR_PASS(progress, nir, nir_opt_cse);

       nir_opt_peephole_select_options peephole_select_options = {
          .limit = 64,
          .expensive_alu_ok = true,
       };
       NIR_PASS(progress, nir, nir_opt_peephole_select, &peephole_select_options);
       NIR_PASS(progress, nir, nir_opt_phi_precision);
       NIR_PASS(progress, nir, nir_opt_algebraic);
       NIR_PASS(progress, nir, nir_opt_constant_folding);

       NIR_PASS(progress, nir, nir_opt_deref);
       NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
       NIR_PASS(progress, nir, nir_opt_undef);
       NIR_PASS(progress, nir, nir_lower_undef_to_zero);

       NIR_PASS(progress, nir, nir_opt_shrink_vectors, true);
       NIR_PASS(progress, nir, nir_opt_loop_unroll);

    } while (progress);
 }

 nir_shader *
 brw_nir_from_spirv(void *mem_ctx, const uint32_t *spirv, size_t spirv_size)
 {
    static const struct spirv_capabilities spirv_caps = {
       .Addresses = true,
       .Float16 = true,
       .Float64 = true,
       .Groups = true,
       .StorageImageWriteWithoutFormat = true,
       .Int8 = true,
       .Int16 = true,
       .Int64 = true,
       .Int64Atomics = true,
       .Kernel = true,
       .Linkage = true, /* We receive linked kernel from clc */
       .DenormFlushToZero = true,
       .DenormPreserve = true,
       .SignedZeroInfNanPreserve = true,
       .RoundingModeRTE = true,
       .RoundingModeRTZ = true,
       .GenericPointer = true,
       .GroupNonUniform = true,
       .GroupNonUniformArithmetic = true,
       .GroupNonUniformClustered = true,
       .GroupNonUniformBallot = true,
       .GroupNonUniformQuad = true,
       .GroupNonUniformShuffle = true,
       .GroupNonUniformVote = true,
       .SubgroupDispatch = true,
    };
    struct spirv_to_nir_options spirv_options = {
       .environment = NIR_SPIRV_OPENCL,
       .capabilities = &spirv_caps,
       .printf = true,
       .shared_addr_format = nir_address_format_62bit_generic,
       .global_addr_format = nir_address_format_62bit_generic,
       .temp_addr_format = nir_address_format_62bit_generic,
       .constant_addr_format = nir_address_format_64bit_global,
       .create_library = true,
    };

    assert(spirv_size % 4 == 0);

    const nir_shader_compiler_options *nir_options = &brw_scalar_nir_options;

    nir_shader *nir =
       spirv_to_nir(spirv, spirv_size / 4, NULL, 0, MESA_SHADER_KERNEL,
                    "library", &spirv_options, nir_options);
    nir_validate_shader(nir, "after spirv_to_nir");
    nir_validate_ssa_dominance(nir, "after spirv_to_nir");
    ralloc_steal(mem_ctx, nir);
    nir->info.name = ralloc_strdup(nir, "library");

    nir_fixup_is_exported(nir);

    NIR_PASS(_, nir, nir_lower_system_values);
    NIR_PASS(_, nir, nir_lower_calls_to_builtins);

    NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_function_temp);
    NIR_PASS(_, nir, nir_lower_returns);
    NIR_PASS(_, nir, nir_inline_functions);
    //nir_remove_non_exported(nir);
    NIR_PASS(_, nir, nir_copy_prop);
    NIR_PASS(_, nir, nir_opt_deref);

    /* We can't deal with constant data, get rid of it */
    nir_lower_constant_to_temp(nir);

    /* We can go ahead and lower the rest of the constant initializers.  We do
     * this here so that nir_remove_dead_variables and split_per_member_structs
     * below see the corresponding stores.
     */
    NIR_PASS(_, nir, nir_lower_variable_initializers, ~0);

    /* LLVM loves take advantage of the fact that vec3s in OpenCL are 16B
     * aligned and so it can just read/write them as vec4s.  This results in a
     * LOT of vec4->vec3 casts on loads and stores.  One solution to this
     * problem is to get rid of all vec3 variables.
     */
    NIR_PASS(_, nir, nir_lower_vec3_to_vec4,
             nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
                nir_var_mem_global | nir_var_mem_constant);

    /* We assign explicit types early so that the optimizer can take advantage
     * of that information and hopefully get rid of some of our memcpys.
     */
    NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
             nir_var_uniform | nir_var_shader_temp | nir_var_function_temp |
                nir_var_mem_shared | nir_var_mem_global,
             glsl_get_cl_type_size_align);

    optimize(nir);

    NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_all, NULL);

    /* Lower again, this time after dead-variables to get more compact variable
     * layouts.
     */
    NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
             nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
                nir_var_mem_global | nir_var_mem_constant,
             glsl_get_cl_type_size_align);
    assert(nir->constant_data_size == 0);

    NIR_PASS(_, nir, nir_lower_memcpy);

    NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_constant,
             nir_address_format_64bit_global);

    NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_uniform,
             nir_address_format_64bit_global);

    /* Note: we cannot lower explicit I/O here, because we need derefs in tact
     * for function calls into the library to work.
     */

    NIR_PASS(_, nir, nir_lower_convert_alu_types, NULL);
    NIR_PASS(_, nir, nir_opt_if, 0);
    NIR_PASS(_, nir, nir_opt_idiv_const, 16);

    optimize(nir);

    return nir;
 }
	/*
	* Copyright © 2025 Intel Corporation
	* SPDX-License-Identifier: MIT
	*/

	#include "brw_nir.h"
	#include "intel_nir.h"

	#include "intel_nir.h"
	#include "compiler/nir/nir_builder.h"
	#include "compiler/spirv/nir_spirv.h"
	#include "compiler/spirv/spirv_info.h"
	#include "dev/intel_debug.h"
	#include "util/u_dynarray.h"

	static void
	optimize(nir_shader *nir)
	{
	bool progress;
	do {
	progress = false;

	NIR_PASS(progress, nir, nir_split_var_copies);
	NIR_PASS(progress, nir, nir_split_struct_vars, nir_var_function_temp);
	NIR_PASS(progress, nir, nir_lower_var_copies);
	NIR_PASS(progress, nir, nir_lower_vars_to_ssa);

	NIR_PASS(progress, nir, nir_copy_prop);
	NIR_PASS(progress, nir, nir_opt_remove_phis);
	NIR_PASS(progress, nir, nir_lower_all_phis_to_scalar);
	NIR_PASS(progress, nir, nir_opt_dce);
	NIR_PASS(progress, nir, nir_opt_dead_cf);
	NIR_PASS(progress, nir, nir_opt_cse);

	nir_opt_peephole_select_options peephole_select_options = {
	.limit = 64,
	.expensive_alu_ok = true,
	};
	NIR_PASS(progress, nir, nir_opt_peephole_select, &peephole_select_options);
	NIR_PASS(progress, nir, nir_opt_phi_precision);
	NIR_PASS(progress, nir, nir_opt_algebraic);
	NIR_PASS(progress, nir, nir_opt_constant_folding);

	NIR_PASS(progress, nir, nir_opt_deref);
	NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
	NIR_PASS(progress, nir, nir_opt_undef);
	NIR_PASS(progress, nir, nir_lower_undef_to_zero);

	NIR_PASS(progress, nir, nir_opt_shrink_vectors, true);
	NIR_PASS(progress, nir, nir_opt_loop_unroll);

	} while (progress);
	}

	nir_shader *
	brw_nir_from_spirv(void mem_ctx, const uint32_t spirv, size_t spirv_size)
	{
	static const struct spirv_capabilities spirv_caps = {
	.Addresses = true,
	.Float16 = true,
	.Float64 = true,
	.Groups = true,
	.StorageImageWriteWithoutFormat = true,
	.Int8 = true,
	.Int16 = true,
	.Int64 = true,
	.Int64Atomics = true,
	.Kernel = true,
	.Linkage = true, /* We receive linked kernel from clc */
	.DenormFlushToZero = true,
	.DenormPreserve = true,
	.SignedZeroInfNanPreserve = true,
	.RoundingModeRTE = true,
	.RoundingModeRTZ = true,
	.GenericPointer = true,
	.GroupNonUniform = true,
	.GroupNonUniformArithmetic = true,
	.GroupNonUniformClustered = true,
	.GroupNonUniformBallot = true,
	.GroupNonUniformQuad = true,
	.GroupNonUniformShuffle = true,
	.GroupNonUniformVote = true,
	.SubgroupDispatch = true,
	};
	struct spirv_to_nir_options spirv_options = {
	.environment = NIR_SPIRV_OPENCL,
	.capabilities = &spirv_caps,
	.printf = true,
	.shared_addr_format = nir_address_format_62bit_generic,
	.global_addr_format = nir_address_format_62bit_generic,
	.temp_addr_format = nir_address_format_62bit_generic,
	.constant_addr_format = nir_address_format_64bit_global,
	.create_library = true,
	};

	assert(spirv_size % 4 == 0);

	const nir_shader_compiler_options *nir_options = &brw_scalar_nir_options;

	nir_shader *nir =
	spirv_to_nir(spirv, spirv_size / 4, NULL, 0, MESA_SHADER_KERNEL,
	"library", &spirv_options, nir_options);
	nir_validate_shader(nir, "after spirv_to_nir");
	nir_validate_ssa_dominance(nir, "after spirv_to_nir");
	ralloc_steal(mem_ctx, nir);
	nir->info.name = ralloc_strdup(nir, "library");

	nir_fixup_is_exported(nir);

	NIR_PASS(_, nir, nir_lower_system_values);
	NIR_PASS(_, nir, nir_lower_calls_to_builtins);

	NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_function_temp);
	NIR_PASS(_, nir, nir_lower_returns);
	NIR_PASS(_, nir, nir_inline_functions);
	//nir_remove_non_exported(nir);
	NIR_PASS(_, nir, nir_copy_prop);
	NIR_PASS(_, nir, nir_opt_deref);

	/* We can't deal with constant data, get rid of it */
	nir_lower_constant_to_temp(nir);

	/* We can go ahead and lower the rest of the constant initializers. We do
	* this here so that nir_remove_dead_variables and split_per_member_structs
	* below see the corresponding stores.
	*/
	NIR_PASS(_, nir, nir_lower_variable_initializers, ~0);

	/* LLVM loves take advantage of the fact that vec3s in OpenCL are 16B
	* aligned and so it can just read/write them as vec4s. This results in a
	* LOT of vec4->vec3 casts on loads and stores. One solution to this
	* problem is to get rid of all vec3 variables.
	*/
	NIR_PASS(_, nir, nir_lower_vec3_to_vec4,
	nir_var_shader_temp \| nir_var_function_temp \| nir_var_mem_shared \|
	nir_var_mem_global \| nir_var_mem_constant);

	/* We assign explicit types early so that the optimizer can take advantage
	* of that information and hopefully get rid of some of our memcpys.
	*/
	NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
	nir_var_uniform \| nir_var_shader_temp \| nir_var_function_temp \|
	nir_var_mem_shared \| nir_var_mem_global,
	glsl_get_cl_type_size_align);

	optimize(nir);

	NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_all, NULL);

	/* Lower again, this time after dead-variables to get more compact variable
	* layouts.
	*/
	NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
	nir_var_shader_temp \| nir_var_function_temp \| nir_var_mem_shared \|
	nir_var_mem_global \| nir_var_mem_constant,
	glsl_get_cl_type_size_align);
	assert(nir->constant_data_size == 0);

	NIR_PASS(_, nir, nir_lower_memcpy);

	NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_constant,
	nir_address_format_64bit_global);

	NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_uniform,
	nir_address_format_64bit_global);

	/* Note: we cannot lower explicit I/O here, because we need derefs in tact
	* for function calls into the library to work.
	*/

	NIR_PASS(_, nir, nir_lower_convert_alu_types, NULL);
	NIR_PASS(_, nir, nir_opt_if, 0);
	NIR_PASS(_, nir, nir_opt_idiv_const, 16);

	optimize(nir);

	return nir;
	}