src/compiler/nir/nir_shader_compiler_options.h - third_party/mesa - Git at Google

 /*
  * Copyright © 2014 Connor Abbott
  * SPDX-License-Identifier: MIT
  */

 #ifndef NIR_SHADER_COMPILER_OPTIONS_H
 #define NIR_SHADER_COMPILER_OPTIONS_H

 #include "util/macros.h"
 #include "nir_defines.h"
 #include <stdbool.h>
 #include <stdint.h>

 #ifdef __cplusplus
 extern "C" {
 #endif

 typedef enum {
    nir_lower_imul64 = (1 << 0),
    nir_lower_isign64 = (1 << 1),
    /** Lower all int64 modulus and division opcodes */
    nir_lower_divmod64 = (1 << 2),
    /** Lower all 64-bit umul_high and imul_high opcodes */
    nir_lower_imul_high64 = (1 << 3),
    nir_lower_bcsel64 = (1 << 4),
    nir_lower_icmp64 = (1 << 5),
    nir_lower_iadd64 = (1 << 6),
    nir_lower_iabs64 = (1 << 7),
    nir_lower_ineg64 = (1 << 8),
    nir_lower_logic64 = (1 << 9),
    nir_lower_minmax64 = (1 << 10),
    nir_lower_shift64 = (1 << 11),
    nir_lower_imul_2x32_64 = (1 << 12),
    nir_lower_extract64 = (1 << 13),
    nir_lower_ufind_msb64 = (1 << 14),
    nir_lower_bit_count64 = (1 << 15),
    nir_lower_subgroup_shuffle64 = (1 << 16),
    nir_lower_scan_reduce_bitwise64 = (1 << 17),
    nir_lower_scan_reduce_iadd64 = (1 << 18),
    nir_lower_vote_ieq64 = (1 << 19),
    nir_lower_usub_sat64 = (1 << 20),
    nir_lower_iadd_sat64 = (1 << 21),
    nir_lower_find_lsb64 = (1 << 22),
    nir_lower_conv64 = (1 << 23),
    nir_lower_uadd_sat64 = (1 << 24),
    nir_lower_iadd3_64 = (1 << 25),
    nir_lower_bitfield_reverse64 = (1 << 26),
    nir_lower_bitfield_extract64 = (1 << 27),
 } nir_lower_int64_options;

 typedef enum {
    nir_lower_drcp = (1 << 0),
    nir_lower_dsqrt = (1 << 1),
    nir_lower_drsq = (1 << 2),
    nir_lower_dtrunc = (1 << 3),
    nir_lower_dfloor = (1 << 4),
    nir_lower_dceil = (1 << 5),
    nir_lower_dfract = (1 << 6),
    nir_lower_dround_even = (1 << 7),
    nir_lower_dmod = (1 << 8),
    nir_lower_dsub = (1 << 9),
    nir_lower_ddiv = (1 << 10),
    nir_lower_dsign = (1 << 11),
    nir_lower_dminmax = (1 << 12),
    nir_lower_dsat = (1 << 13),
    nir_lower_fp64_full_software = (1 << 14),
 } nir_lower_doubles_options;

 typedef enum {
    nir_divergence_single_prim_per_subgroup = (1 << 0),
    nir_divergence_single_patch_per_tcs_subgroup = (1 << 1),
    nir_divergence_single_patch_per_tes_subgroup = (1 << 2),
    nir_divergence_view_index_uniform = (1 << 3),
    nir_divergence_single_frag_shading_rate_per_subgroup = (1 << 4),
    nir_divergence_multiple_workgroup_per_compute_subgroup = (1 << 5),
    nir_divergence_shader_record_ptr_uniform = (1 << 6),
    nir_divergence_uniform_load_tears = (1 << 7),
    /* If used, this allows phis for divergent merges with undef and a uniform source to be considered uniform */
    nir_divergence_ignore_undef_if_phi_srcs = (1 << 8),
 } nir_divergence_options;

 /** An instruction filtering callback
  *
  * Returns true if the instruction should be processed and false otherwise.
  */
 typedef bool (*nir_instr_filter_cb)(const nir_instr *, const void *);

 typedef enum {
    /**
     * Whether a fragment shader can interpolate the same input multiple times
     * with different modes (smooth, noperspective) and locations (pixel,
     * centroid, sample, at_offset, at_sample), excluding the flat mode.
     *
     * This matches AMD GPU flexibility and limitations and is a superset of
     * the GL4 requirement that each input can be interpolated at its specified
     * location, and then also as centroid, at_offset, and at_sample.
     */
    nir_io_has_flexible_input_interpolation_except_flat = BITFIELD_BIT(0),

    /**
     * nir_opt_varyings compacts (relocates) components of varyings by
     * rewriting their locations completely, effectively moving components of
     * varyings between slots. This option forces nir_opt_varyings to make
     * VARYING_SLOT_POS unused by moving its contents to VARn if the consumer
     * is not FS. If this option is not set and POS is unused, it moves
     * components of VARn to POS until it's fully used.
     */
    nir_io_dont_use_pos_for_non_fs_varyings = BITFIELD_BIT(1),

    nir_io_16bit_input_output_support = BITFIELD_BIT(2),

    /**
     * Implement mediump inputs and outputs as normal 32-bit IO.
     * Causes the mediump flag to be not set for IO semantics, essentially
     * destroying any mediump-related IO information in the shader.
     */
    nir_io_mediump_is_32bit = BITFIELD_BIT(3),

    /**
     * Whether nir_opt_vectorize_io should ignore FS inputs.
     */
    nir_io_prefer_scalar_fs_inputs = BITFIELD_BIT(4),

    /**
     * Whether interpolated fragment shader vec4 slots can use load_input for
     * a subset of its components to skip interpolation for those components.
     * The result of such load_input is a value from a random (not necessarily
     * provoking) vertex. If a value from the provoking vertex is required,
     * the vec4 slot should have no load_interpolated_input instructions.
     *
     * This exposes the AMD capability that allows packing flat inputs with
     * interpolated inputs in a limited number of cases. Normally, flat
     * components must be in a separate vec4 slot to get the value from
     * the provoking vertex. If the compiler can prove that all per-vertex
     * values are equal (convergent, i.e. the provoking vertex doesn't matter),
     * it can put such flat components into any interpolated vec4 slot.
     *
     * It should also be set if the hw can mix flat and interpolated components
     * in the same vec4 slot.
     *
     * This causes nir_opt_varyings to skip interpolation for all varyings
     * that are convergent, and enables better compaction and inter-shader code
     * motion for convergent varyings.
     */
    nir_io_mix_convergent_flat_with_interpolated = BITFIELD_BIT(5),

    /**
     * Whether src_type and dest_type of IO intrinsics are irrelevant and
     * should be ignored by nir_opt_vectorize_io. All drivers that always treat
     * load_input and store_output as untyped and load_interpolated_input as
     * float##bit_size should set this.
     */
    nir_io_vectorizer_ignores_types = BITFIELD_BIT(6),

    /**
     * Whether nir_opt_varyings should never promote convergent FS inputs
     * to flat.
     */
    nir_io_always_interpolate_convergent_fs_inputs = BITFIELD_BIT(7),

    /**
     * Whether the first assigned color channel component should be equal to
     * the first unused VARn component.
     *
     * For example, if the first unused VARn channel is VAR0.z, color channels
     * are assigned in this order:
     *       COL0.z, COL0.w, COL0.x, COL0.y, COL1.z, COL1.w, COL1.x, COL1.y
     *
     * This allows certain drivers to merge outputs if each output sets
     * different components, for example 2 outputs writing VAR0.xy and COL0.z
     * will only use 1 HW output.
     */
    nir_io_compaction_rotates_color_channels = BITFIELD_BIT(8),

    /**
     * Whether to group TES inputs as follows:
     * - inputs used to compute only POS/CLIP outputs are first
     * - inputs used to compute both POS/CLIP outputs and other outputs are next
     * - inputs used to compute only other outputs are last
     */
    nir_io_compaction_groups_tes_inputs_into_pos_and_var_groups = BITFIELD_BIT(9),

    /**
     * RADV expects that high 16 bits of outputs set component >= 4. That's not
     * legal in NIR, but RADV unfortunately relies on it because it's not
     * validated.
     */
    nir_io_radv_intrinsic_component_workaround = BITFIELD_BIT(10),

    /* Options affecting the GLSL compiler or Gallium are below. */

    /**
     * Lower load_deref/store_deref to load_input/store_output/etc. intrinsics.
     * This is only affects GLSL compilation and Gallium.
     */
    nir_io_has_intrinsics = BITFIELD_BIT(16),

    /**
     * Whether clip and cull distance arrays should be separate. If this is not
     * set, cull distances will be moved into VARYING_SLOT_CLIP_DISTn after clip
     * distances, and shader_info::clip_distance_array_size will be the index
     * of the first cull distance. nir_lower_clip_cull_distance_array_vars does
     * that.
     */
    nir_io_separate_clip_cull_distance_arrays = BITFIELD_BIT(17),
 } nir_io_options;

 typedef enum {
    nir_lower_packing_op_pack_64_2x32,
    nir_lower_packing_op_unpack_64_2x32,
    nir_lower_packing_op_pack_64_4x16,
    nir_lower_packing_op_unpack_64_4x16,
    nir_lower_packing_op_pack_32_2x16,
    nir_lower_packing_op_unpack_32_2x16,
    nir_lower_packing_op_pack_32_4x8,
    nir_lower_packing_op_unpack_32_4x8,
    nir_lower_packing_num_ops,
 } nir_lower_packing_op;

 typedef struct nir_shader_compiler_options {
    bool lower_fdiv;
    bool lower_ffma16;
    bool lower_ffma32;
    bool lower_ffma64;
    bool fuse_ffma16;
    bool fuse_ffma32;
    bool fuse_ffma64;
    bool lower_flrp16;
    bool lower_flrp32;
    /** Lowers flrp when it does not support doubles */
    bool lower_flrp64;
    bool lower_fpow;
    bool lower_fsat;
    bool lower_fsqrt;
    bool lower_sincos;
    bool lower_fmod;
    /** Lowers ibitfield_extract/ubitfield_extract for 8, 16 & 32 bits. */
    bool lower_bitfield_extract8;
    bool lower_bitfield_extract16;
    bool lower_bitfield_extract;
    /** Lowers bitfield_insert. */
    bool lower_bitfield_insert;
    /** Lowers bitfield_reverse to shifts. */
    bool lower_bitfield_reverse;
    /** Lowers bit_count to shifts. */
    bool lower_bit_count;
    /** Lowers ifind_msb. */
    bool lower_ifind_msb;
    /** Lowers ufind_msb. */
    bool lower_ufind_msb;
    /** Lowers find_lsb to ufind_msb and logic ops */
    bool lower_find_lsb;
    bool lower_uadd_carry;
    bool lower_usub_borrow;
    /** Lowers imul_high/umul_high to 16-bit multiplies and carry operations. */
    bool lower_mul_high;
    bool lower_mul_high16;
    /** lowers fneg to fmul(x, -1.0). Driver must call nir_opt_algebraic_late() */
    bool lower_fneg;
    /** lowers ineg to isub. Driver must call nir_opt_algebraic_late(). */
    bool lower_ineg;
    /** lowers fisnormal to alu ops. */
    bool lower_fisnormal;

    /* lower {slt,sge,seq,sne} to {flt,fge,feq,fneu} + b2f: */
    bool lower_scmp;

    /* lower b/fall_equalN/b/fany_nequalN (ex:fany_nequal4 to sne+fdot4+fsat) */
    bool lower_vector_cmp;

    /** enable rules to avoid bit ops */
    bool lower_bitops;

    /** enables rules to lower isign to imin+imax */
    bool lower_isign;

    /** enables rules to lower fsign to fsub and flt */
    bool lower_fsign;

    /** enables rules to lower iabs to ineg+imax */
    bool lower_iabs;

    /** enable rules that avoid generating umax from signed integer ops */
    bool lower_umax;

    /** enable rules that avoid generating umin from signed integer ops */
    bool lower_umin;

    /* lower fmin/fmax with signed zero preserve to fmin/fmax with
     * no_signed_zero, for backends whose fmin/fmax implementations do not
     * implement IEEE-754-2019 semantics for signed zero.
     */
    bool lower_fminmax_signed_zero;

    /* lower fdph to fdot4 */
    bool lower_fdph;

    /* Does the native fdot instruction replicate its result for four
     * components?  If so, then opt_algebraic_late will turn all fdotN
     * instructions into fdotN_replicated instructions.
     */
    bool fdot_replicates;

    /** lowers ffloor to fsub+ffract: */
    bool lower_ffloor;

    /** lowers ffract to fsub+ffloor: */
    bool lower_ffract;

    /** lowers fceil to fneg+ffloor+fneg: */
    bool lower_fceil;

    bool lower_ftrunc;

    /** Lowers fround_even to ffract+feq+csel.
     *
     * Not correct in that it doesn't correctly handle the "_even" part of the
     * rounding, but good enough for DX9 array indexing handling on DX9-class
     * hardware.
     */
    bool lower_fround_even;

    bool lower_ldexp;

    bool lower_pack_half_2x16;
    bool lower_pack_unorm_2x16;
    bool lower_pack_snorm_2x16;
    bool lower_pack_unorm_4x8;
    bool lower_pack_snorm_4x8;
    bool lower_pack_64_2x32;
    bool lower_pack_64_4x16;
    bool lower_pack_32_2x16;
    bool lower_pack_64_2x32_split;
    bool lower_pack_32_2x16_split;
    bool lower_unpack_half_2x16;
    bool lower_unpack_unorm_2x16;
    bool lower_unpack_snorm_2x16;
    bool lower_unpack_unorm_4x8;
    bool lower_unpack_snorm_4x8;
    bool lower_unpack_64_2x32_split;
    bool lower_unpack_32_2x16_split;

    bool lower_pack_split;

    bool lower_extract_byte;
    bool lower_extract_word;
    bool lower_insert_byte;
    bool lower_insert_word;

    /* Indicates that the driver only has zero-based vertex id */
    bool vertex_id_zero_based;

    /**
     * If enabled, gl_BaseVertex will be lowered as:
     * is_indexed_draw (~0/0) & firstvertex
     */
    bool lower_base_vertex;

    /* Indicates that gl_InstanceIndex already includes base index
     * and doesn't require further lowering.
     */
    bool instance_id_includes_base_index;

    /**
     * If enabled, gl_HelperInvocation will be lowered as:
     *
     *   !((1 << sample_id) & sample_mask_in))
     *
     * This depends on some possibly hw implementation details, which may
     * not be true for all hw.  In particular that the FS is only executed
     * for covered samples or for helper invocations.  So, do not blindly
     * enable this option.
     *
     * Note: See also issue #22 in ARB_shader_image_load_store
     */
    bool lower_helper_invocation;

    /**
     * Convert gl_SampleMaskIn to gl_HelperInvocation as follows:
     *
     *   gl_SampleMaskIn == 0 ---> gl_HelperInvocation
     *   gl_SampleMaskIn != 0 ---> !gl_HelperInvocation
     */
    bool optimize_sample_mask_in;

    /**
     * Optimize load_front_face ? a : -a to load_front_face_fsign * a
     */
    bool optimize_load_front_face_fsign;

    /**
     * Optimize boolean reductions of quad broadcasts. This should only be enabled if
     * nir_intrinsic_reduce supports INCLUDE_HELPERS.
     */
    bool optimize_quad_vote_to_reduce;

    bool lower_cs_local_index_to_id;
    bool lower_cs_local_id_to_index;

    /* Prevents lowering global_invocation_id to be in terms of workgroup_id */
    bool has_cs_global_id;

    bool lower_device_index_to_zero;

    /* Set if nir_lower_pntc_ytransform() should invert gl_PointCoord.
     * Either when frame buffer is flipped or GL_POINT_SPRITE_COORD_ORIGIN
     * is GL_LOWER_LEFT.
     */
    bool lower_wpos_pntc;

    /**
     * Set if nir_op_[iu]hadd and nir_op_[iu]rhadd instructions should be
     * lowered to simple arithmetic.
     *
     * If this flag is set, the lowering will be applied to all bit-sizes of
     * these instructions.
     *
     * :c:member:`lower_hadd64`
     */
    bool lower_hadd;

    /**
     * Set if only 64-bit nir_op_[iu]hadd and nir_op_[iu]rhadd instructions
     * should be lowered to simple arithmetic.
     *
     * If this flag is set, the lowering will be applied to only 64-bit
     * versions of these instructions.
     *
     * :c:member:`lower_hadd`
     */
    bool lower_hadd64;

    /**
     * Set if nir_op_uadd_sat should be lowered to simple arithmetic.
     *
     * If this flag is set, the lowering will be applied to all bit-sizes of
     * these instructions.
     */
    bool lower_uadd_sat;

    /**
     * Set if nir_op_usub_sat should be lowered to simple arithmetic.
     *
     * If this flag is set, the lowering will be applied to all bit-sizes of
     * these instructions.
     */
    bool lower_usub_sat;

    /**
     * Set if nir_op_iadd_sat and nir_op_isub_sat should be lowered to simple
     * arithmetic.
     *
     * If this flag is set, the lowering will be applied to all bit-sizes of
     * these instructions.
     */
    bool lower_iadd_sat;

    /**
     * Set if imul_32x16 and umul_32x16 should be lowered to simple
     * arithmetic.
     */
    bool lower_mul_32x16;

    /**
     * Set if bf2f and f2bf should be lowered to arithmetic.
     */
    bool lower_bfloat16_conversions;

    bool vectorize_tess_levels;
    bool lower_to_scalar;
    nir_instr_filter_cb lower_to_scalar_filter;

    /**
     * Disables potentially harmful algebraic transformations for architectures
     * with SIMD-within-a-register semantics.
     *
     * Note, to actually vectorize 16bit instructions, use nir_opt_vectorize()
     * with a suitable callback function.
     */
    bool vectorize_vec2_16bit;

    /**
     * Should the linker unify inputs_read/outputs_written between adjacent
     * shader stages which are linked into a single program?
     */
    bool unify_interfaces;

    /**
     * Whether nir_lower_io() will lower interpolateAt functions to
     * load_interpolated_input intrinsics.
     *
     * Unlike nir_lower_io_use_interpolated_input_intrinsics this will only
     * lower these functions and leave input load intrinsics untouched.
     */
    bool lower_interpolate_at;

    /* Lowers when 32x32->64 bit multiplication is not supported */
    bool lower_mul_2x32_64;

    /* Indicates that urol and uror are supported */
    bool has_rotate8;
    bool has_rotate16;
    bool has_rotate32;

    /** Backend supports shfr */
    bool has_shfr32;

    /** Backend supports ternary addition */
    bool has_iadd3;

    /**
     * Backend supports amul and would like them generated whenever
     * possible. This is stronger than has_imul24 for amul, but does not imply
     * support for imul24.
     */
    bool has_amul;

    /**
     * Backend supports imul24, and would like to use it (when possible)
     * for address/offset calculation.  If true, driver should call
     * nir_lower_amul().  (If not set, amul will automatically be lowered
     * to imul.)
     */
    bool has_imul24;

    /** Backend supports umul24, if not set  umul24 will automatically be lowered
     * to imul with masked inputs */
    bool has_umul24;

    /** Backend supports imul24_relaxed and umul24_relaxed, if not set they will be lowered
     * to imul24, umul24 or imul.
     */
    bool has_mul24_relaxed;

    /** Backend supports 32-bit imad */
    bool has_imad32;

    /** Backend supports umad24, if not set  umad24 will automatically be lowered
     * to imul with masked inputs and iadd */
    bool has_umad24;

    /* Backend supports fused compare against zero and csel */
    bool has_fused_comp_and_csel;
    /* Backend supports fused int eq/ne against zero and csel. */
    bool has_icsel_eqz64;
    bool has_icsel_eqz32;
    bool has_icsel_eqz16;

    /* Backend supports fneo, fequ, fltu, fgeu. */
    bool has_fneo_fcmpu;

    /* Backend supports ford and funord. */
    bool has_ford_funord;

    /** Backend supports fsub, if not set fsub will automatically be lowered to
     * fadd(x, fneg(y)). If true, driver should call nir_opt_algebraic_late(). */
    bool has_fsub;

    /** Backend supports isub, if not set isub will automatically be lowered to
     * iadd(x, ineg(y)). If true, driver should call nir_opt_algebraic_late(). */
    bool has_isub;

    /** Backend supports pack_32_4x8 or pack_32_4x8_split. */
    bool has_pack_32_4x8;

    /** Backend supports nir_load_texture_scale and prefers it over txs for nir
     * lowerings. */
    bool has_texture_scaling;

    /** Backend supports sdot_4x8_iadd. */
    bool has_sdot_4x8;

    /** Backend supports udot_4x8_uadd. */
    bool has_udot_4x8;

    /** Backend supports sudot_4x8_iadd. */
    bool has_sudot_4x8;

    /** Backend supports sdot_4x8_iadd_sat. */
    bool has_sdot_4x8_sat;

    /** Backend supports udot_4x8_uadd_sat. */
    bool has_udot_4x8_sat;

    /** Backend supports sudot_4x8_iadd_sat. */
    bool has_sudot_4x8_sat;

    /** Backend supports sdot_2x16 and udot_2x16 opcodes. */
    bool has_dot_2x16;

    /** Backend supports bfdot2_bfadd opcode. */
    bool has_bfdot2_bfadd;

    /** Backend supports fmulz (and ffmaz if lower_ffma32=false) */
    bool has_fmulz;

    /**
     * Backend supports fmulz (and ffmaz if lower_ffma32=false) but only if
     * FLOAT_CONTROLS_DENORM_PRESERVE_FP32 is not set
     */
    bool has_fmulz_no_denorms;

    /** Backend supports 32bit ufind_msb_rev and ifind_msb_rev. */
    bool has_find_msb_rev;

    /** Backend supports pack_half_2x16_rtz_split. */
    bool has_pack_half_2x16_rtz;

    /** Backend supports bitz/bitnz. */
    bool has_bit_test;

    /** Backend supports ubfe/ibfe. */
    bool has_bfe;

    /** Backend supports bfm. */
    bool has_bfm;

    /** Backend supports bfi. */
    bool has_bfi;

    /** Backend supports bitfield_select. */
    bool has_bitfield_select;

    /** Backend supports uclz. */
    bool has_uclz;

    /** Backend support msad_u4x8. */
    bool has_msad;

    /** Backend supports f2e4m3fn_satfn */
    bool has_f2e4m3fn_satfn;

    /** Backend supports load_global_bounded intrinsics. */
    bool has_load_global_bounded;

    /**
     * Is this the Intel vec4 backend?
     *
     * Used to inhibit algebraic optimizations that are known to be harmful on
     * the Intel vec4 backend.  This is generally applicable to any
     * optimization that might cause more immediate values to be used in
     * 3-source (e.g., ffma and flrp) instructions.
     */
    bool intel_vec4;

    /**
     * For most Intel GPUs, all ternary operations such as FMA and BFE cannot
     * have immediates, so two to three instructions may eventually be needed.
     */
    bool avoid_ternary_with_two_constants;

    /** Whether 8-bit ALU is supported. */
    bool support_8bit_alu;

    /** Whether 16-bit ALU is supported. */
    bool support_16bit_alu;

    unsigned max_unroll_iterations;
    unsigned max_unroll_iterations_aggressive;
    unsigned max_unroll_iterations_fp64;

    bool lower_uniforms_to_ubo;

    /* Specifies if indirect sampler array access will trigger forced loop
     * unrolling.
     */
    bool force_indirect_unrolling_sampler;

    /* Some older drivers don't support GLSL versions with the concept of flat
     * varyings and also don't support integers. This setting helps us avoid
     * marking varyings as flat and potentially having them changed to ints via
     * varying packing.
     */
    bool no_integers;

    /**
     * Specifies which type of indirectly accessed variables should force
     * loop unrolling.
     */
    nir_variable_mode force_indirect_unrolling;

    bool driver_functions;

    /**
     * If true, the driver will call nir_lower_int64 itself and the frontend
     * should not do so. This may enable better optimization around address
     * modes.
     */
    bool late_lower_int64;
    nir_lower_int64_options lower_int64_options;
    nir_lower_doubles_options lower_doubles_options;
    nir_divergence_options divergence_analysis_options;

    /**
     * The masks of shader stages that support indirect indexing with
     * load_input and store_output intrinsics. It's used by
     * nir_lower_io_passes.
     */
    uint8_t support_indirect_inputs;
    uint8_t support_indirect_outputs;

    /** store the variable offset into the instrinsic range_base instead
     *  of adding it to the image index.
     */
    bool lower_image_offset_to_range_base;

    /** store the variable offset into the instrinsic range_base instead
     *  of adding it to the atomic source
     */
    bool lower_atomic_offset_to_range_base;

    /** Don't convert medium-precision casts (e.g. f2fmp) into concrete
     *  type casts (e.g. f2f16).
     */
    bool preserve_mediump;

    /** lowers fquantize2f16 to alu ops. */
    bool lower_fquantize2f16;

    /** Lower f2f16 to f2f16_rtz when execution mode is not rtne. */
    bool force_f2f16_rtz;

    /** Lower VARYING_SLOT_LAYER in FS to SYSTEM_VALUE_LAYER_ID. */
    bool lower_layer_fs_input_to_sysval;

    /** clip/cull distance and tess level arrays use compact semantics */
    bool compact_arrays;

    /**
     * Whether discard gets emitted as nir_intrinsic_demote.
     * Otherwise, nir_intrinsic_terminate is being used.
     */
    bool discard_is_demote;

    /**
     * Whether the new-style derivative intrinsics are supported. If false,
     * legacy ALU derivative ops will be emitted. This transitional option will
     * be removed once all drivers are converted to derivative intrinsics.
     */
    bool has_ddx_intrinsics;

    /** Whether derivative intrinsics must be scalarized. */
    bool scalarize_ddx;

    /**
     * Assign a range of driver locations to per-view outputs, with unique
     * slots for each view. If unset, per-view outputs will be treated
     * similarly to other arrayed IO, and only slots for one view will be
     * assigned. Regardless of this setting, per-view outputs are only assigned
     * slots for one value in var->data.location.
     */
    bool per_view_unique_driver_locations;

    /**
     * Emit nir_intrinsic_store_per_view_output with compacted view indices
     * rather than absolute view indices. When using compacted indices, the Nth
     * index refers to the Nth enabled view, not the Nth absolute view. For
     * example, with view mask 0b1010, compacted index 0 is absolute index 1,
     * and compacted index 1 is absolute index 3. Note that compacted view
     * indices do not correspond directly to gl_ViewIndex.
     *
     * If compact_view_index is unset, per-view indices must be constant before
     * nir_lower_io. This can be guaranteed by calling nir_lower_io_temporaries
     * first.
     */
    bool compact_view_index;

    /** Options determining lowering and behavior of inputs and outputs. */
    nir_io_options io_options;

    /**
     * Bit mask of nir_lower_packing_op to skip lowering some nir ops in
     * nir_lower_packing().
     */
    unsigned skip_lower_packing_ops;

    /** Driver callback where drivers can define how to lower mediump.
     *  Used by nir_lower_io_passes.
     */
    void (*lower_mediump_io)(struct nir_shader *nir);

    /**
     * Return the maximum cost of an expression that's written to a shader
     * output that can be moved into the next shader to remove that output.
     *
     * Currently only uniform expressions are moved. A uniform expression is
     * any ALU expression sourcing only constants, uniforms, and UBO loads.
     *
     * Set to NULL or return 0 if you only want to propagate constants from
     * outputs to inputs.
     *
     * Drivers can set the maximum cost based on the types of consecutive
     * shaders or shader SHA1s.
     *
     * Drivers should also set "varying_estimate_instr_cost".
     */
    unsigned (*varying_expression_max_cost)(struct nir_shader *consumer,
                                            struct nir_shader *producer);

    /**
     * Return the cost of an instruction that could be moved into the next
     * shader. If the cost of all instructions in an expression is <=
     * varying_expression_max_cost(), the instruction is moved.
     *
     * When this callback isn't set, nir_opt_varyings uses its own version.
     */
    unsigned (*varying_estimate_instr_cost)(struct nir_instr *instr);

    /**
     * When the varying_expression_max_cost callback isn't set, this specifies
     * the maximum cost of a uniform expression that is allowed to be moved
     * from output stores into the next shader stage to eliminate those output
     * stores and corresponding inputs.
     *
     * 0 only allows propagating constants written to output stores to
     * the next shader.
     *
     * At least 2 is required for moving a uniform stored in an output into
     * the next shader according to default_varying_estimate_instr_cost.
     */
    unsigned max_varying_expression_cost;
 } nir_shader_compiler_options;

 #ifdef __cplusplus
 }
 #endif

 #endif /* NIR_SHADER_COMPILER_OPTIONS_H */