| /* |
| * Copyright © 2016 Intel Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| * |
| */ |
| |
| #ifndef SHADER_INFO_H |
| #define SHADER_INFO_H |
| |
| #include "util/bitset.h" |
| #include "util/sha1/sha1.h" |
| #include "shader_enums.h" |
| #include <stdint.h> |
| |
| #ifdef __cplusplus |
| extern "C" { |
| #endif |
| |
| #define MAX_XFB_BUFFERS 4 |
| #define MAX_INLINABLE_UNIFORMS 4 |
| |
| struct spirv_supported_capabilities { |
| bool address; |
| bool atomic_storage; |
| bool demote_to_helper_invocation; |
| bool derivative_group; |
| bool descriptor_array_dynamic_indexing; |
| bool descriptor_array_non_uniform_indexing; |
| bool descriptor_indexing; |
| bool device_group; |
| bool draw_parameters; |
| bool float16_atomic_add; |
| bool float16_atomic_min_max; |
| bool float32_atomic_add; |
| bool float32_atomic_min_max; |
| bool float64; |
| bool float64_atomic_add; |
| bool float64_atomic_min_max; |
| bool fragment_shader_sample_interlock; |
| bool fragment_shader_pixel_interlock; |
| bool fragment_shading_rate; |
| bool generic_pointers; |
| bool geometry_streams; |
| bool groups; |
| bool image_ms_array; |
| bool image_read_without_format; |
| bool image_write_without_format; |
| bool image_atomic_int64; |
| bool int8; |
| bool int16; |
| bool int64; |
| bool int64_atomics; |
| bool integer_functions2; |
| bool kernel; |
| bool kernel_image; |
| bool kernel_image_read_write; |
| bool linkage; |
| bool literal_sampler; |
| bool mesh_shading; |
| bool mesh_shading_nv; |
| bool min_lod; |
| bool multiview; |
| bool per_view_attributes_nv; |
| bool physical_storage_buffer_address; |
| bool post_depth_coverage; |
| bool printf; |
| bool ray_cull_mask; |
| bool ray_tracing; |
| bool ray_query; |
| bool ray_traversal_primitive_culling; |
| bool runtime_descriptor_array; |
| bool float_controls; |
| bool shader_clock; |
| bool shader_viewport_index_layer; |
| bool shader_viewport_mask_nv; |
| bool sparse_residency; |
| bool stencil_export; |
| bool storage_8bit; |
| bool storage_16bit; |
| bool storage_image_ms; |
| bool subgroup_arithmetic; |
| bool subgroup_ballot; |
| bool subgroup_basic; |
| bool subgroup_dispatch; |
| bool subgroup_quad; |
| bool subgroup_shuffle; |
| bool subgroup_uniform_control_flow; |
| bool subgroup_vote; |
| bool tessellation; |
| bool transform_feedback; |
| bool variable_pointers; |
| bool vk_memory_model; |
| bool vk_memory_model_device_scope; |
| bool workgroup_memory_explicit_layout; |
| bool float16; |
| bool amd_fragment_mask; |
| bool amd_gcn_shader; |
| bool amd_shader_ballot; |
| bool amd_trinary_minmax; |
| bool amd_image_read_write_lod; |
| bool amd_shader_explicit_vertex_parameter; |
| bool amd_image_gather_bias_lod; |
| |
| bool intel_subgroup_shuffle; |
| bool intel_subgroup_buffer_block_io; |
| }; |
| |
| typedef struct shader_info { |
| const char *name; |
| |
| /* Descriptive name provided by the client; may be NULL */ |
| const char *label; |
| |
| /* Shader is internal, and should be ignored by things like NIR_DEBUG=print */ |
| bool internal; |
| |
| /* SHA1 of the original source, used by shader detection in drivers. */ |
| uint8_t source_sha1[SHA1_DIGEST_LENGTH]; |
| |
| /** The shader stage, such as MESA_SHADER_VERTEX. */ |
| gl_shader_stage stage:8; |
| |
| /** The shader stage in a non SSO linked program that follows this stage, |
| * such as MESA_SHADER_FRAGMENT. |
| */ |
| gl_shader_stage next_stage:8; |
| |
| /* Number of textures used by this shader */ |
| uint8_t num_textures; |
| /* Number of uniform buffers used by this shader */ |
| uint8_t num_ubos; |
| /* Number of atomic buffers used by this shader */ |
| uint8_t num_abos; |
| /* Number of shader storage buffers (max .driver_location + 1) used by this |
| * shader. In the case of nir_lower_atomics_to_ssbo being used, this will |
| * be the number of actual SSBOs in gl_program->info, and the lowered SSBOs |
| * and atomic counters in nir_shader->info. |
| */ |
| uint8_t num_ssbos; |
| /* Number of images used by this shader */ |
| uint8_t num_images; |
| |
| /* Which inputs are actually read */ |
| uint64_t inputs_read; |
| /* Which outputs are actually written */ |
| uint64_t outputs_written; |
| /* Which outputs are actually read */ |
| uint64_t outputs_read; |
| /* Which system values are actually read */ |
| BITSET_DECLARE(system_values_read, SYSTEM_VALUE_MAX); |
| |
| /* Which I/O is per-primitive, for read/written information combine with |
| * the fields above. |
| */ |
| uint64_t per_primitive_inputs; |
| uint64_t per_primitive_outputs; |
| |
| /* Which I/O is per-view */ |
| uint64_t per_view_outputs; |
| |
| /* Which 16-bit inputs and outputs are used corresponding to |
| * VARYING_SLOT_VARn_16BIT. |
| */ |
| uint16_t inputs_read_16bit; |
| uint16_t outputs_written_16bit; |
| uint16_t outputs_read_16bit; |
| uint16_t inputs_read_indirectly_16bit; |
| uint16_t outputs_accessed_indirectly_16bit; |
| |
| /* Which patch inputs are actually read */ |
| uint32_t patch_inputs_read; |
| /* Which patch outputs are actually written */ |
| uint32_t patch_outputs_written; |
| /* Which patch outputs are read */ |
| uint32_t patch_outputs_read; |
| |
| /* Which inputs are read indirectly (subset of inputs_read) */ |
| uint64_t inputs_read_indirectly; |
| /* Which outputs are read or written indirectly */ |
| uint64_t outputs_accessed_indirectly; |
| /* Which patch inputs are read indirectly (subset of patch_inputs_read) */ |
| uint64_t patch_inputs_read_indirectly; |
| /* Which patch outputs are read or written indirectly */ |
| uint64_t patch_outputs_accessed_indirectly; |
| |
| /** Bitfield of which textures are used */ |
| BITSET_DECLARE(textures_used, 128); |
| |
| /** Bitfield of which textures are used by texelFetch() */ |
| BITSET_DECLARE(textures_used_by_txf, 128); |
| |
| /** Bitfield of which samplers are used */ |
| BITSET_DECLARE(samplers_used, 32); |
| |
| /** Bitfield of which images are used */ |
| BITSET_DECLARE(images_used, 64); |
| /** Bitfield of which images are buffers. */ |
| BITSET_DECLARE(image_buffers, 64); |
| /** Bitfield of which images are MSAA. */ |
| BITSET_DECLARE(msaa_images, 64); |
| |
| /* SPV_KHR_float_controls: execution mode for floating point ops */ |
| uint16_t float_controls_execution_mode; |
| |
| /** |
| * Size of shared variables accessed by compute/task/mesh shaders. |
| */ |
| unsigned shared_size; |
| |
| /** |
| * Size of task payload variables accessed by task/mesh shaders. |
| */ |
| unsigned task_payload_size; |
| |
| /** |
| * Number of ray tracing queries in the shader (counts all elements of all |
| * variables). |
| */ |
| unsigned ray_queries; |
| |
| /** |
| * Local workgroup size used by compute/task/mesh shaders. |
| */ |
| uint16_t workgroup_size[3]; |
| |
| enum gl_subgroup_size subgroup_size; |
| |
| /** |
| * Uses subgroup intrinsics which can communicate across a quad. |
| */ |
| bool uses_wide_subgroup_intrinsics; |
| |
| /* Transform feedback buffer strides in dwords, max. 1K - 4. */ |
| uint8_t xfb_stride[MAX_XFB_BUFFERS]; |
| |
| uint16_t inlinable_uniform_dw_offsets[MAX_INLINABLE_UNIFORMS]; |
| uint8_t num_inlinable_uniforms:4; |
| |
| /* The size of the gl_ClipDistance[] array, if declared. */ |
| uint8_t clip_distance_array_size:4; |
| |
| /* The size of the gl_CullDistance[] array, if declared. */ |
| uint8_t cull_distance_array_size:4; |
| |
| /* Whether or not this shader ever uses textureGather() */ |
| bool uses_texture_gather:1; |
| |
| /* Whether texture size, levels, or samples is queried. */ |
| bool uses_resource_info_query:1; |
| |
| /** |
| * True if this shader uses the fddx/fddy opcodes. |
| * |
| * Note that this does not include the "fine" and "coarse" variants. |
| */ |
| bool uses_fddx_fddy:1; |
| |
| /** Has divergence analysis ever been run? */ |
| bool divergence_analysis_run:1; |
| |
| /* Bitmask of bit-sizes used with ALU instructions. */ |
| uint8_t bit_sizes_float; |
| uint8_t bit_sizes_int; |
| |
| /* Whether the first UBO is the default uniform buffer, i.e. uniforms. */ |
| bool first_ubo_is_default_ubo:1; |
| |
| /* Whether or not separate shader objects were used */ |
| bool separate_shader:1; |
| |
| /** Was this shader linked with any transform feedback varyings? */ |
| bool has_transform_feedback_varyings:1; |
| |
| /* Whether flrp has been lowered. */ |
| bool flrp_lowered:1; |
| |
| /* Whether nir_lower_io has been called to lower derefs. |
| * nir_variables for inputs and outputs might not be present in the IR. |
| */ |
| bool io_lowered:1; |
| |
| /* Whether the shader writes memory, including transform feedback. */ |
| bool writes_memory:1; |
| |
| /* Whether gl_Layer is viewport-relative */ |
| bool layer_viewport_relative:1; |
| |
| /* Whether explicit barriers are used */ |
| bool uses_control_barrier : 1; |
| bool uses_memory_barrier : 1; |
| |
| /* Whether ARB_bindless_texture ops or variables are used */ |
| bool uses_bindless : 1; |
| |
| /** |
| * Shared memory types have explicit layout set. Used for |
| * SPV_KHR_workgroup_storage_explicit_layout. |
| */ |
| bool shared_memory_explicit_layout:1; |
| |
| /** |
| * Used for VK_KHR_zero_initialize_workgroup_memory. |
| */ |
| bool zero_initialize_shared_memory:1; |
| |
| /** |
| * Used for ARB_compute_variable_group_size. |
| */ |
| bool workgroup_size_variable:1; |
| |
| /** |
| * Set if this shader uses legacy (DX9 or ARB assembly) math rules. |
| * |
| * From the ARB_fragment_program specification: |
| * |
| * "The following rules apply to multiplication: |
| * |
| * 1. <x> * <y> == <y> * <x>, for all <x> and <y>. |
| * 2. +/-0.0 * <x> = +/-0.0, at least for all <x> that correspond to |
| * *representable numbers (IEEE "not a number" and "infinity" |
| * *encodings may be exceptions). |
| * 3. +1.0 * <x> = <x>, for all <x>."" |
| * |
| * However, in effect this was due to DX9 semantics implying that 0*x=0 even |
| * for inf/nan if the hardware generated them instead of float_min/max. So, |
| * you should not have an exception for inf/nan to rule 2 above. |
| * |
| * One implementation of this behavior would be to flush all generated NaNs |
| * to zero, at which point 0*Inf=Nan=0. Most DX9/ARB-asm hardware did not |
| * generate NaNs, and the only way the GPU saw one was to possibly feed it |
| * in as a uniform. |
| */ |
| bool use_legacy_math_rules; |
| |
| union { |
| struct { |
| /* Which inputs are doubles */ |
| uint64_t double_inputs; |
| |
| /* For AMD-specific driver-internal shaders. It replaces vertex |
| * buffer loads with code generating VS inputs from scalar registers. |
| * |
| * Valid values: SI_VS_BLIT_SGPRS_POS_* |
| */ |
| uint8_t blit_sgprs_amd:4; |
| |
| /* True if the shader writes position in window space coordinates pre-transform */ |
| bool window_space_position:1; |
| |
| /** Is an edge flag input needed? */ |
| bool needs_edge_flag:1; |
| } vs; |
| |
| struct { |
| /** The output primitive type */ |
| uint16_t output_primitive; |
| |
| /** The input primitive type */ |
| uint16_t input_primitive; |
| |
| /** The maximum number of vertices the geometry shader might write. */ |
| uint16_t vertices_out; |
| |
| /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */ |
| uint8_t invocations; |
| |
| /** The number of vertices received per input primitive (max. 6) */ |
| uint8_t vertices_in:3; |
| |
| /** Whether or not this shader uses EndPrimitive */ |
| bool uses_end_primitive:1; |
| |
| /** The streams used in this shaders (max. 4) */ |
| uint8_t active_stream_mask:4; |
| } gs; |
| |
| struct { |
| bool uses_discard:1; |
| bool uses_demote:1; |
| bool uses_fbfetch_output:1; |
| bool color_is_dual_source:1; |
| |
| /** |
| * True if this fragment shader requires helper invocations. This |
| * can be caused by the use of ALU derivative ops, texture |
| * instructions which do implicit derivatives, and the use of quad |
| * subgroup operations. |
| */ |
| bool needs_quad_helper_invocations:1; |
| |
| /** |
| * True if this fragment shader requires helper invocations for |
| * all subgroup operations, not just quad ops and derivatives. |
| */ |
| bool needs_all_helper_invocations:1; |
| |
| /** |
| * Whether any inputs are declared with the "sample" qualifier. |
| */ |
| bool uses_sample_qualifier:1; |
| |
| /** |
| * Whether sample shading is used. |
| */ |
| bool uses_sample_shading:1; |
| |
| /** |
| * Whether early fragment tests are enabled as defined by |
| * ARB_shader_image_load_store. |
| */ |
| bool early_fragment_tests:1; |
| |
| /** |
| * Defined by INTEL_conservative_rasterization. |
| */ |
| bool inner_coverage:1; |
| |
| bool post_depth_coverage:1; |
| |
| /** |
| * \name ARB_fragment_coord_conventions |
| * @{ |
| */ |
| bool pixel_center_integer:1; |
| bool origin_upper_left:1; |
| /*@}*/ |
| |
| bool pixel_interlock_ordered:1; |
| bool pixel_interlock_unordered:1; |
| bool sample_interlock_ordered:1; |
| bool sample_interlock_unordered:1; |
| |
| /** |
| * Flags whether NIR's base types on the FS color outputs should be |
| * ignored. |
| * |
| * GLSL requires that fragment shader output base types match the |
| * render target's base types for the behavior to be defined. From |
| * the GL 4.6 spec: |
| * |
| * "If the values written by the fragment shader do not match the |
| * format(s) of the corresponding color buffer(s), the result is |
| * undefined." |
| * |
| * However, for NIR shaders translated from TGSI, we don't have the |
| * output types any more, so the driver will need to do whatever |
| * fixups are necessary to handle effectively untyped data being |
| * output from the FS. |
| */ |
| bool untyped_color_outputs:1; |
| |
| /** gl_FragDepth layout for ARB_conservative_depth. */ |
| enum gl_frag_depth_layout depth_layout:3; |
| |
| /** |
| * Interpolation qualifiers for drivers that lowers color inputs |
| * to system values. |
| */ |
| unsigned color0_interp:3; /* glsl_interp_mode */ |
| bool color0_sample:1; |
| bool color0_centroid:1; |
| unsigned color1_interp:3; /* glsl_interp_mode */ |
| bool color1_sample:1; |
| bool color1_centroid:1; |
| |
| /* Bitmask of gl_advanced_blend_mode values that may be used with this |
| * shader. |
| */ |
| unsigned advanced_blend_modes; |
| } fs; |
| |
| struct { |
| uint16_t workgroup_size_hint[3]; |
| |
| uint8_t user_data_components_amd:3; |
| |
| /* |
| * Arrangement of invocations used to calculate derivatives in a compute |
| * shader. From NV_compute_shader_derivatives. |
| */ |
| enum gl_derivative_group derivative_group:2; |
| |
| /** |
| * pointer size is: |
| * AddressingModelLogical: 0 (default) |
| * AddressingModelPhysical32: 32 |
| * AddressingModelPhysical64: 64 |
| */ |
| unsigned ptr_size; |
| } cs; |
| |
| /* Applies to both TCS and TES. */ |
| struct { |
| enum tess_primitive_mode _primitive_mode; |
| |
| /** The number of vertices in the TCS output patch. */ |
| uint8_t tcs_vertices_out; |
| unsigned spacing:2; /*gl_tess_spacing*/ |
| |
| /** Is the vertex order counterclockwise? */ |
| bool ccw:1; |
| bool point_mode:1; |
| |
| /* Bit mask of TCS per-vertex inputs (VS outputs) that are used |
| * with a vertex index that is NOT the invocation id |
| */ |
| uint64_t tcs_cross_invocation_inputs_read; |
| |
| /* Bit mask of TCS per-vertex outputs that are used |
| * with a vertex index that is NOT the invocation id |
| */ |
| uint64_t tcs_cross_invocation_outputs_read; |
| } tess; |
| |
| /* Applies to MESH. */ |
| struct { |
| /* Bit mask of MS outputs that are used |
| * with an index that is NOT the local invocation index. |
| */ |
| uint64_t ms_cross_invocation_output_access; |
| |
| uint16_t max_vertices_out; |
| uint16_t max_primitives_out; |
| uint16_t primitive_type; /* GL_POINTS, GL_LINES or GL_TRIANGLES. */ |
| |
| /* TODO: remove this when we stop supporting NV_mesh_shader. */ |
| bool nv; |
| } mesh; |
| }; |
| } shader_info; |
| |
| #ifdef __cplusplus |
| } |
| #endif |
| |
| #endif /* SHADER_INFO_H */ |