blob: 7aa0724586de42a6cd7367016a04809ca83f2828 [file] [log] [blame]
/*
* Copyright © 2014 Connor Abbott
* SPDX-License-Identifier: MIT
*/
#ifndef NIR_SHADER_COMPILER_OPTIONS_H
#define NIR_SHADER_COMPILER_OPTIONS_H
#include "util/macros.h"
#include "nir_defines.h"
#include <stdbool.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef enum {
nir_lower_imul64 = (1 << 0),
nir_lower_isign64 = (1 << 1),
/** Lower all int64 modulus and division opcodes */
nir_lower_divmod64 = (1 << 2),
/** Lower all 64-bit umul_high and imul_high opcodes */
nir_lower_imul_high64 = (1 << 3),
nir_lower_bcsel64 = (1 << 4),
nir_lower_icmp64 = (1 << 5),
nir_lower_iadd64 = (1 << 6),
nir_lower_iabs64 = (1 << 7),
nir_lower_ineg64 = (1 << 8),
nir_lower_logic64 = (1 << 9),
nir_lower_minmax64 = (1 << 10),
nir_lower_shift64 = (1 << 11),
nir_lower_imul_2x32_64 = (1 << 12),
nir_lower_extract64 = (1 << 13),
nir_lower_ufind_msb64 = (1 << 14),
nir_lower_bit_count64 = (1 << 15),
nir_lower_subgroup_shuffle64 = (1 << 16),
nir_lower_scan_reduce_bitwise64 = (1 << 17),
nir_lower_scan_reduce_iadd64 = (1 << 18),
nir_lower_vote_ieq64 = (1 << 19),
nir_lower_usub_sat64 = (1 << 20),
nir_lower_iadd_sat64 = (1 << 21),
nir_lower_find_lsb64 = (1 << 22),
nir_lower_conv64 = (1 << 23),
nir_lower_uadd_sat64 = (1 << 24),
nir_lower_iadd3_64 = (1 << 25),
nir_lower_bitfield_reverse64 = (1 << 26),
nir_lower_bitfield_extract64 = (1 << 27),
} nir_lower_int64_options;
typedef enum {
nir_lower_drcp = (1 << 0),
nir_lower_dsqrt = (1 << 1),
nir_lower_drsq = (1 << 2),
nir_lower_dtrunc = (1 << 3),
nir_lower_dfloor = (1 << 4),
nir_lower_dceil = (1 << 5),
nir_lower_dfract = (1 << 6),
nir_lower_dround_even = (1 << 7),
nir_lower_dmod = (1 << 8),
nir_lower_dsub = (1 << 9),
nir_lower_ddiv = (1 << 10),
nir_lower_dsign = (1 << 11),
nir_lower_dminmax = (1 << 12),
nir_lower_dsat = (1 << 13),
nir_lower_fp64_full_software = (1 << 14),
} nir_lower_doubles_options;
typedef enum {
nir_divergence_single_prim_per_subgroup = (1 << 0),
nir_divergence_single_patch_per_tcs_subgroup = (1 << 1),
nir_divergence_single_patch_per_tes_subgroup = (1 << 2),
nir_divergence_view_index_uniform = (1 << 3),
nir_divergence_single_frag_shading_rate_per_subgroup = (1 << 4),
nir_divergence_multiple_workgroup_per_compute_subgroup = (1 << 5),
nir_divergence_shader_record_ptr_uniform = (1 << 6),
nir_divergence_uniform_load_tears = (1 << 7),
/* If used, this allows phis for divergent merges with undef and a uniform source to be considered uniform */
nir_divergence_ignore_undef_if_phi_srcs = (1 << 8),
} nir_divergence_options;
/** An instruction filtering callback
*
* Returns true if the instruction should be processed and false otherwise.
*/
typedef bool (*nir_instr_filter_cb)(const nir_instr *, const void *);
typedef enum {
/**
* Whether a fragment shader can interpolate the same input multiple times
* with different modes (smooth, noperspective) and locations (pixel,
* centroid, sample, at_offset, at_sample), excluding the flat mode.
*
* This matches AMD GPU flexibility and limitations and is a superset of
* the GL4 requirement that each input can be interpolated at its specified
* location, and then also as centroid, at_offset, and at_sample.
*/
nir_io_has_flexible_input_interpolation_except_flat = BITFIELD_BIT(0),
/**
* nir_opt_varyings compacts (relocates) components of varyings by
* rewriting their locations completely, effectively moving components of
* varyings between slots. This option forces nir_opt_varyings to make
* VARYING_SLOT_POS unused by moving its contents to VARn if the consumer
* is not FS. If this option is not set and POS is unused, it moves
* components of VARn to POS until it's fully used.
*/
nir_io_dont_use_pos_for_non_fs_varyings = BITFIELD_BIT(1),
nir_io_16bit_input_output_support = BITFIELD_BIT(2),
/**
* Implement mediump inputs and outputs as normal 32-bit IO.
* Causes the mediump flag to be not set for IO semantics, essentially
* destroying any mediump-related IO information in the shader.
*/
nir_io_mediump_is_32bit = BITFIELD_BIT(3),
/**
* Whether nir_opt_vectorize_io should ignore FS inputs.
*/
nir_io_prefer_scalar_fs_inputs = BITFIELD_BIT(4),
/**
* Whether interpolated fragment shader vec4 slots can use load_input for
* a subset of its components to skip interpolation for those components.
* The result of such load_input is a value from a random (not necessarily
* provoking) vertex. If a value from the provoking vertex is required,
* the vec4 slot should have no load_interpolated_input instructions.
*
* This exposes the AMD capability that allows packing flat inputs with
* interpolated inputs in a limited number of cases. Normally, flat
* components must be in a separate vec4 slot to get the value from
* the provoking vertex. If the compiler can prove that all per-vertex
* values are equal (convergent, i.e. the provoking vertex doesn't matter),
* it can put such flat components into any interpolated vec4 slot.
*
* It should also be set if the hw can mix flat and interpolated components
* in the same vec4 slot.
*
* This causes nir_opt_varyings to skip interpolation for all varyings
* that are convergent, and enables better compaction and inter-shader code
* motion for convergent varyings.
*/
nir_io_mix_convergent_flat_with_interpolated = BITFIELD_BIT(5),
/**
* Whether src_type and dest_type of IO intrinsics are irrelevant and
* should be ignored by nir_opt_vectorize_io. All drivers that always treat
* load_input and store_output as untyped and load_interpolated_input as
* float##bit_size should set this.
*/
nir_io_vectorizer_ignores_types = BITFIELD_BIT(6),
/**
* Whether nir_opt_varyings should never promote convergent FS inputs
* to flat.
*/
nir_io_always_interpolate_convergent_fs_inputs = BITFIELD_BIT(7),
/**
* Whether the first assigned color channel component should be equal to
* the first unused VARn component.
*
* For example, if the first unused VARn channel is VAR0.z, color channels
* are assigned in this order:
* COL0.z, COL0.w, COL0.x, COL0.y, COL1.z, COL1.w, COL1.x, COL1.y
*
* This allows certain drivers to merge outputs if each output sets
* different components, for example 2 outputs writing VAR0.xy and COL0.z
* will only use 1 HW output.
*/
nir_io_compaction_rotates_color_channels = BITFIELD_BIT(8),
/**
* Whether to group TES inputs as follows:
* - inputs used to compute only POS/CLIP outputs are first
* - inputs used to compute both POS/CLIP outputs and other outputs are next
* - inputs used to compute only other outputs are last
*/
nir_io_compaction_groups_tes_inputs_into_pos_and_var_groups = BITFIELD_BIT(9),
/**
* RADV expects that high 16 bits of outputs set component >= 4. That's not
* legal in NIR, but RADV unfortunately relies on it because it's not
* validated.
*/
nir_io_radv_intrinsic_component_workaround = BITFIELD_BIT(10),
/* Options affecting the GLSL compiler or Gallium are below. */
/**
* Lower load_deref/store_deref to load_input/store_output/etc. intrinsics.
* This is only affects GLSL compilation and Gallium.
*/
nir_io_has_intrinsics = BITFIELD_BIT(16),
/**
* Whether clip and cull distance arrays should be separate. If this is not
* set, cull distances will be moved into VARYING_SLOT_CLIP_DISTn after clip
* distances, and shader_info::clip_distance_array_size will be the index
* of the first cull distance. nir_lower_clip_cull_distance_array_vars does
* that.
*/
nir_io_separate_clip_cull_distance_arrays = BITFIELD_BIT(17),
} nir_io_options;
typedef enum {
nir_lower_packing_op_pack_64_2x32,
nir_lower_packing_op_unpack_64_2x32,
nir_lower_packing_op_pack_64_4x16,
nir_lower_packing_op_unpack_64_4x16,
nir_lower_packing_op_pack_32_2x16,
nir_lower_packing_op_unpack_32_2x16,
nir_lower_packing_op_pack_32_4x8,
nir_lower_packing_op_unpack_32_4x8,
nir_lower_packing_num_ops,
} nir_lower_packing_op;
typedef struct nir_shader_compiler_options {
bool lower_fdiv;
bool lower_ffma16;
bool lower_ffma32;
bool lower_ffma64;
bool fuse_ffma16;
bool fuse_ffma32;
bool fuse_ffma64;
bool lower_flrp16;
bool lower_flrp32;
/** Lowers flrp when it does not support doubles */
bool lower_flrp64;
bool lower_fpow;
bool lower_fsat;
bool lower_fsqrt;
bool lower_sincos;
bool lower_fmod;
/** Lowers ibitfield_extract/ubitfield_extract for 8, 16 & 32 bits. */
bool lower_bitfield_extract8;
bool lower_bitfield_extract16;
bool lower_bitfield_extract;
/** Lowers bitfield_insert. */
bool lower_bitfield_insert;
/** Lowers bitfield_reverse to shifts. */
bool lower_bitfield_reverse;
/** Lowers bit_count to shifts. */
bool lower_bit_count;
/** Lowers ifind_msb. */
bool lower_ifind_msb;
/** Lowers ufind_msb. */
bool lower_ufind_msb;
/** Lowers find_lsb to ufind_msb and logic ops */
bool lower_find_lsb;
bool lower_uadd_carry;
bool lower_usub_borrow;
/** Lowers imul_high/umul_high to 16-bit multiplies and carry operations. */
bool lower_mul_high;
bool lower_mul_high16;
/** lowers fneg to fmul(x, -1.0). Driver must call nir_opt_algebraic_late() */
bool lower_fneg;
/** lowers ineg to isub. Driver must call nir_opt_algebraic_late(). */
bool lower_ineg;
/** lowers fisnormal to alu ops. */
bool lower_fisnormal;
/* lower {slt,sge,seq,sne} to {flt,fge,feq,fneu} + b2f: */
bool lower_scmp;
/* lower b/fall_equalN/b/fany_nequalN (ex:fany_nequal4 to sne+fdot4+fsat) */
bool lower_vector_cmp;
/** enable rules to avoid bit ops */
bool lower_bitops;
/** enables rules to lower isign to imin+imax */
bool lower_isign;
/** enables rules to lower fsign to fsub and flt */
bool lower_fsign;
/** enables rules to lower iabs to ineg+imax */
bool lower_iabs;
/** enable rules that avoid generating umax from signed integer ops */
bool lower_umax;
/** enable rules that avoid generating umin from signed integer ops */
bool lower_umin;
/* lower fmin/fmax with signed zero preserve to fmin/fmax with
* no_signed_zero, for backends whose fmin/fmax implementations do not
* implement IEEE-754-2019 semantics for signed zero.
*/
bool lower_fminmax_signed_zero;
/* lower fdph to fdot4 */
bool lower_fdph;
/* Does the native fdot instruction replicate its result for four
* components? If so, then opt_algebraic_late will turn all fdotN
* instructions into fdotN_replicated instructions.
*/
bool fdot_replicates;
/** lowers ffloor to fsub+ffract: */
bool lower_ffloor;
/** lowers ffract to fsub+ffloor: */
bool lower_ffract;
/** lowers fceil to fneg+ffloor+fneg: */
bool lower_fceil;
bool lower_ftrunc;
/** Lowers fround_even to ffract+feq+csel.
*
* Not correct in that it doesn't correctly handle the "_even" part of the
* rounding, but good enough for DX9 array indexing handling on DX9-class
* hardware.
*/
bool lower_fround_even;
bool lower_ldexp;
bool lower_pack_half_2x16;
bool lower_pack_unorm_2x16;
bool lower_pack_snorm_2x16;
bool lower_pack_unorm_4x8;
bool lower_pack_snorm_4x8;
bool lower_pack_64_2x32;
bool lower_pack_64_4x16;
bool lower_pack_32_2x16;
bool lower_pack_64_2x32_split;
bool lower_pack_32_2x16_split;
bool lower_unpack_half_2x16;
bool lower_unpack_unorm_2x16;
bool lower_unpack_snorm_2x16;
bool lower_unpack_unorm_4x8;
bool lower_unpack_snorm_4x8;
bool lower_unpack_64_2x32_split;
bool lower_unpack_32_2x16_split;
bool lower_pack_split;
bool lower_extract_byte;
bool lower_extract_word;
bool lower_insert_byte;
bool lower_insert_word;
/* Indicates that the driver only has zero-based vertex id */
bool vertex_id_zero_based;
/**
* If enabled, gl_BaseVertex will be lowered as:
* is_indexed_draw (~0/0) & firstvertex
*/
bool lower_base_vertex;
/* Indicates that gl_InstanceIndex already includes base index
* and doesn't require further lowering.
*/
bool instance_id_includes_base_index;
/**
* If enabled, gl_HelperInvocation will be lowered as:
*
* !((1 << sample_id) & sample_mask_in))
*
* This depends on some possibly hw implementation details, which may
* not be true for all hw. In particular that the FS is only executed
* for covered samples or for helper invocations. So, do not blindly
* enable this option.
*
* Note: See also issue #22 in ARB_shader_image_load_store
*/
bool lower_helper_invocation;
/**
* Convert gl_SampleMaskIn to gl_HelperInvocation as follows:
*
* gl_SampleMaskIn == 0 ---> gl_HelperInvocation
* gl_SampleMaskIn != 0 ---> !gl_HelperInvocation
*/
bool optimize_sample_mask_in;
/**
* Optimize load_front_face ? a : -a to load_front_face_fsign * a
*/
bool optimize_load_front_face_fsign;
/**
* Optimize boolean reductions of quad broadcasts. This should only be enabled if
* nir_intrinsic_reduce supports INCLUDE_HELPERS.
*/
bool optimize_quad_vote_to_reduce;
bool lower_cs_local_index_to_id;
bool lower_cs_local_id_to_index;
/* Prevents lowering global_invocation_id to be in terms of workgroup_id */
bool has_cs_global_id;
bool lower_device_index_to_zero;
/* Set if nir_lower_pntc_ytransform() should invert gl_PointCoord.
* Either when frame buffer is flipped or GL_POINT_SPRITE_COORD_ORIGIN
* is GL_LOWER_LEFT.
*/
bool lower_wpos_pntc;
/**
* Set if nir_op_[iu]hadd and nir_op_[iu]rhadd instructions should be
* lowered to simple arithmetic.
*
* If this flag is set, the lowering will be applied to all bit-sizes of
* these instructions.
*
* :c:member:`lower_hadd64`
*/
bool lower_hadd;
/**
* Set if only 64-bit nir_op_[iu]hadd and nir_op_[iu]rhadd instructions
* should be lowered to simple arithmetic.
*
* If this flag is set, the lowering will be applied to only 64-bit
* versions of these instructions.
*
* :c:member:`lower_hadd`
*/
bool lower_hadd64;
/**
* Set if nir_op_uadd_sat should be lowered to simple arithmetic.
*
* If this flag is set, the lowering will be applied to all bit-sizes of
* these instructions.
*/
bool lower_uadd_sat;
/**
* Set if nir_op_usub_sat should be lowered to simple arithmetic.
*
* If this flag is set, the lowering will be applied to all bit-sizes of
* these instructions.
*/
bool lower_usub_sat;
/**
* Set if nir_op_iadd_sat and nir_op_isub_sat should be lowered to simple
* arithmetic.
*
* If this flag is set, the lowering will be applied to all bit-sizes of
* these instructions.
*/
bool lower_iadd_sat;
/**
* Set if imul_32x16 and umul_32x16 should be lowered to simple
* arithmetic.
*/
bool lower_mul_32x16;
/**
* Set if bf2f and f2bf should be lowered to arithmetic.
*/
bool lower_bfloat16_conversions;
bool vectorize_tess_levels;
bool lower_to_scalar;
nir_instr_filter_cb lower_to_scalar_filter;
/**
* Disables potentially harmful algebraic transformations for architectures
* with SIMD-within-a-register semantics.
*
* Note, to actually vectorize 16bit instructions, use nir_opt_vectorize()
* with a suitable callback function.
*/
bool vectorize_vec2_16bit;
/**
* Should the linker unify inputs_read/outputs_written between adjacent
* shader stages which are linked into a single program?
*/
bool unify_interfaces;
/**
* Whether nir_lower_io() will lower interpolateAt functions to
* load_interpolated_input intrinsics.
*
* Unlike nir_lower_io_use_interpolated_input_intrinsics this will only
* lower these functions and leave input load intrinsics untouched.
*/
bool lower_interpolate_at;
/* Lowers when 32x32->64 bit multiplication is not supported */
bool lower_mul_2x32_64;
/* Indicates that urol and uror are supported */
bool has_rotate8;
bool has_rotate16;
bool has_rotate32;
/** Backend supports shfr */
bool has_shfr32;
/** Backend supports ternary addition */
bool has_iadd3;
/**
* Backend supports amul and would like them generated whenever
* possible. This is stronger than has_imul24 for amul, but does not imply
* support for imul24.
*/
bool has_amul;
/**
* Backend supports imul24, and would like to use it (when possible)
* for address/offset calculation. If true, driver should call
* nir_lower_amul(). (If not set, amul will automatically be lowered
* to imul.)
*/
bool has_imul24;
/** Backend supports umul24, if not set umul24 will automatically be lowered
* to imul with masked inputs */
bool has_umul24;
/** Backend supports imul24_relaxed and umul24_relaxed, if not set they will be lowered
* to imul24, umul24 or imul.
*/
bool has_mul24_relaxed;
/** Backend supports 32-bit imad */
bool has_imad32;
/** Backend supports umad24, if not set umad24 will automatically be lowered
* to imul with masked inputs and iadd */
bool has_umad24;
/* Backend supports fused compare against zero and csel */
bool has_fused_comp_and_csel;
/* Backend supports fused int eq/ne against zero and csel. */
bool has_icsel_eqz64;
bool has_icsel_eqz32;
bool has_icsel_eqz16;
/* Backend supports fneo, fequ, fltu, fgeu. */
bool has_fneo_fcmpu;
/* Backend supports ford and funord. */
bool has_ford_funord;
/** Backend supports fsub, if not set fsub will automatically be lowered to
* fadd(x, fneg(y)). If true, driver should call nir_opt_algebraic_late(). */
bool has_fsub;
/** Backend supports isub, if not set isub will automatically be lowered to
* iadd(x, ineg(y)). If true, driver should call nir_opt_algebraic_late(). */
bool has_isub;
/** Backend supports pack_32_4x8 or pack_32_4x8_split. */
bool has_pack_32_4x8;
/** Backend supports nir_load_texture_scale and prefers it over txs for nir
* lowerings. */
bool has_texture_scaling;
/** Backend supports sdot_4x8_iadd. */
bool has_sdot_4x8;
/** Backend supports udot_4x8_uadd. */
bool has_udot_4x8;
/** Backend supports sudot_4x8_iadd. */
bool has_sudot_4x8;
/** Backend supports sdot_4x8_iadd_sat. */
bool has_sdot_4x8_sat;
/** Backend supports udot_4x8_uadd_sat. */
bool has_udot_4x8_sat;
/** Backend supports sudot_4x8_iadd_sat. */
bool has_sudot_4x8_sat;
/** Backend supports sdot_2x16 and udot_2x16 opcodes. */
bool has_dot_2x16;
/** Backend supports bfdot2_bfadd opcode. */
bool has_bfdot2_bfadd;
/** Backend supports fmulz (and ffmaz if lower_ffma32=false) */
bool has_fmulz;
/**
* Backend supports fmulz (and ffmaz if lower_ffma32=false) but only if
* FLOAT_CONTROLS_DENORM_PRESERVE_FP32 is not set
*/
bool has_fmulz_no_denorms;
/** Backend supports 32bit ufind_msb_rev and ifind_msb_rev. */
bool has_find_msb_rev;
/** Backend supports pack_half_2x16_rtz_split. */
bool has_pack_half_2x16_rtz;
/** Backend supports bitz/bitnz. */
bool has_bit_test;
/** Backend supports ubfe/ibfe. */
bool has_bfe;
/** Backend supports bfm. */
bool has_bfm;
/** Backend supports bfi. */
bool has_bfi;
/** Backend supports bitfield_select. */
bool has_bitfield_select;
/** Backend supports uclz. */
bool has_uclz;
/** Backend support msad_u4x8. */
bool has_msad;
/** Backend supports f2e4m3fn_satfn */
bool has_f2e4m3fn_satfn;
/** Backend supports load_global_bounded intrinsics. */
bool has_load_global_bounded;
/**
* Is this the Intel vec4 backend?
*
* Used to inhibit algebraic optimizations that are known to be harmful on
* the Intel vec4 backend. This is generally applicable to any
* optimization that might cause more immediate values to be used in
* 3-source (e.g., ffma and flrp) instructions.
*/
bool intel_vec4;
/**
* For most Intel GPUs, all ternary operations such as FMA and BFE cannot
* have immediates, so two to three instructions may eventually be needed.
*/
bool avoid_ternary_with_two_constants;
/** Whether 8-bit ALU is supported. */
bool support_8bit_alu;
/** Whether 16-bit ALU is supported. */
bool support_16bit_alu;
unsigned max_unroll_iterations;
unsigned max_unroll_iterations_aggressive;
unsigned max_unroll_iterations_fp64;
bool lower_uniforms_to_ubo;
/* Specifies if indirect sampler array access will trigger forced loop
* unrolling.
*/
bool force_indirect_unrolling_sampler;
/* Some older drivers don't support GLSL versions with the concept of flat
* varyings and also don't support integers. This setting helps us avoid
* marking varyings as flat and potentially having them changed to ints via
* varying packing.
*/
bool no_integers;
/**
* Specifies which type of indirectly accessed variables should force
* loop unrolling.
*/
nir_variable_mode force_indirect_unrolling;
bool driver_functions;
/**
* If true, the driver will call nir_lower_int64 itself and the frontend
* should not do so. This may enable better optimization around address
* modes.
*/
bool late_lower_int64;
nir_lower_int64_options lower_int64_options;
nir_lower_doubles_options lower_doubles_options;
nir_divergence_options divergence_analysis_options;
/**
* The masks of shader stages that support indirect indexing with
* load_input and store_output intrinsics. It's used by
* nir_lower_io_passes.
*/
uint8_t support_indirect_inputs;
uint8_t support_indirect_outputs;
/** store the variable offset into the instrinsic range_base instead
* of adding it to the image index.
*/
bool lower_image_offset_to_range_base;
/** store the variable offset into the instrinsic range_base instead
* of adding it to the atomic source
*/
bool lower_atomic_offset_to_range_base;
/** Don't convert medium-precision casts (e.g. f2fmp) into concrete
* type casts (e.g. f2f16).
*/
bool preserve_mediump;
/** lowers fquantize2f16 to alu ops. */
bool lower_fquantize2f16;
/** Lower f2f16 to f2f16_rtz when execution mode is not rtne. */
bool force_f2f16_rtz;
/** Lower VARYING_SLOT_LAYER in FS to SYSTEM_VALUE_LAYER_ID. */
bool lower_layer_fs_input_to_sysval;
/** clip/cull distance and tess level arrays use compact semantics */
bool compact_arrays;
/**
* Whether discard gets emitted as nir_intrinsic_demote.
* Otherwise, nir_intrinsic_terminate is being used.
*/
bool discard_is_demote;
/**
* Whether the new-style derivative intrinsics are supported. If false,
* legacy ALU derivative ops will be emitted. This transitional option will
* be removed once all drivers are converted to derivative intrinsics.
*/
bool has_ddx_intrinsics;
/** Whether derivative intrinsics must be scalarized. */
bool scalarize_ddx;
/**
* Assign a range of driver locations to per-view outputs, with unique
* slots for each view. If unset, per-view outputs will be treated
* similarly to other arrayed IO, and only slots for one view will be
* assigned. Regardless of this setting, per-view outputs are only assigned
* slots for one value in var->data.location.
*/
bool per_view_unique_driver_locations;
/**
* Emit nir_intrinsic_store_per_view_output with compacted view indices
* rather than absolute view indices. When using compacted indices, the Nth
* index refers to the Nth enabled view, not the Nth absolute view. For
* example, with view mask 0b1010, compacted index 0 is absolute index 1,
* and compacted index 1 is absolute index 3. Note that compacted view
* indices do not correspond directly to gl_ViewIndex.
*
* If compact_view_index is unset, per-view indices must be constant before
* nir_lower_io. This can be guaranteed by calling nir_lower_io_temporaries
* first.
*/
bool compact_view_index;
/** Options determining lowering and behavior of inputs and outputs. */
nir_io_options io_options;
/**
* Bit mask of nir_lower_packing_op to skip lowering some nir ops in
* nir_lower_packing().
*/
unsigned skip_lower_packing_ops;
/** Driver callback where drivers can define how to lower mediump.
* Used by nir_lower_io_passes.
*/
void (*lower_mediump_io)(struct nir_shader *nir);
/**
* Return the maximum cost of an expression that's written to a shader
* output that can be moved into the next shader to remove that output.
*
* Currently only uniform expressions are moved. A uniform expression is
* any ALU expression sourcing only constants, uniforms, and UBO loads.
*
* Set to NULL or return 0 if you only want to propagate constants from
* outputs to inputs.
*
* Drivers can set the maximum cost based on the types of consecutive
* shaders or shader SHA1s.
*
* Drivers should also set "varying_estimate_instr_cost".
*/
unsigned (*varying_expression_max_cost)(struct nir_shader *consumer,
struct nir_shader *producer);
/**
* Return the cost of an instruction that could be moved into the next
* shader. If the cost of all instructions in an expression is <=
* varying_expression_max_cost(), the instruction is moved.
*
* When this callback isn't set, nir_opt_varyings uses its own version.
*/
unsigned (*varying_estimate_instr_cost)(struct nir_instr *instr);
/**
* When the varying_expression_max_cost callback isn't set, this specifies
* the maximum cost of a uniform expression that is allowed to be moved
* from output stores into the next shader stage to eliminate those output
* stores and corresponding inputs.
*
* 0 only allows propagating constants written to output stores to
* the next shader.
*
* At least 2 is required for moving a uniform stored in an output into
* the next shader according to default_varying_estimate_instr_cost.
*/
unsigned max_varying_expression_cost;
} nir_shader_compiler_options;
#ifdef __cplusplus
}
#endif
#endif /* NIR_SHADER_COMPILER_OPTIONS_H */