| # Copyright © 2024 Intel Corporation |
| |
| # Permission is hereby granted, free of charge, to any person obtaining a |
| # copy of this software and associated documentation files (the "Software"), |
| # to deal in the Software without restriction, including without limitation |
| # the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| # and/or sell copies of the Software, and to permit persons to whom the |
| # Software is furnished to do so, subject to the following conditions: |
| |
| # The above copyright notice and this permission notice (including the next |
| # paragraph) shall be included in all copies or substantial portions of the |
| # Software. |
| |
| # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| # IN THE SOFTWARE. |
| |
| from textwrap import dedent |
| |
| # TYPES is an ordered list of all declarations in this file. |
| TYPES = [] |
| |
| # TYPES_BY_NAME allows the lookup of any declaration |
| TYPES_BY_NAME = {} |
| |
| class Define: |
| """Specifies a c macro definition.""" |
| def __init__(self, name, value, comment=None): |
| self.name = name |
| self.value = value |
| self.comment = comment |
| TYPES.append(self) |
| |
| class EnumValue: |
| """allows comments and setting of enum values""" |
| def __init__(self, name, value=None, comment=None, |
| group_begin=None, group_end=None): |
| self.name = name |
| self.value = value |
| self.comment = comment |
| self.group_begin = group_begin |
| self.group_end = group_end |
| |
| def __str__(self): |
| return self.name |
| |
| class Enum: |
| """Stores details needed to declare and serialize an enumeration""" |
| def __init__(self, name, values, external=False): |
| self.name = name |
| self.values = [] |
| for v in values: |
| if isinstance(v, EnumValue): |
| self.values.append(v) |
| else: |
| self.values.append(EnumValue(v)) |
| |
| self.external = external |
| TYPES.append(self) |
| TYPES_BY_NAME[name] = TYPES[-1] |
| |
| class Member: |
| """Stores details needed to declare and serialize the member of a struct.""" |
| def __init__(self, member_type, name, array=None, |
| compiler_field=False, ray_tracing_field=False, |
| comment=None): |
| self.member_type = member_type |
| self.name = name |
| self.array = array |
| # indicates whether this field is used by the compiler, and whether it |
| # should be included in the shader compiler cache hash function. |
| self.compiler_field = compiler_field |
| self.ray_tracing_field = ray_tracing_field |
| self.comment=comment |
| |
| class Struct: |
| """Stores details needed to declare and serialize a struct""" |
| def __init__(self, name, members): |
| self.name = name |
| self.members = members |
| TYPES.append(self) |
| TYPES_BY_NAME[name] = TYPES[-1] |
| |
| INT_TYPES = set(["uint8_t", |
| "uint16_t", |
| "uint32_t", |
| "uint64_t", |
| "unsigned", |
| "int"]) |
| |
| FUNDAMENTAL_TYPES = set(["char", "bool"]).union(INT_TYPES) |
| |
| Define("INTEL_DEVICE_MAX_NAME_SIZE", 64) |
| Define("INTEL_DEVICE_MAX_SLICES", 8) |
| Define("INTEL_DEVICE_MAX_SUBSLICES", 8, "Maximum on gfx11") |
| Define("INTEL_DEVICE_MAX_EUS_PER_SUBSLICE", 16, "Maximum on gfx11") |
| Define("INTEL_DEVICE_MAX_PIXEL_PIPES", 16, "Maximum on DG2") |
| |
| Enum("intel_platform", |
| [EnumValue("INTEL_PLATFORM_GFX3", value=1), |
| "INTEL_PLATFORM_I965", |
| "INTEL_PLATFORM_ILK", |
| "INTEL_PLATFORM_G4X", |
| "INTEL_PLATFORM_SNB", |
| "INTEL_PLATFORM_IVB", |
| "INTEL_PLATFORM_BYT", |
| "INTEL_PLATFORM_HSW", |
| "INTEL_PLATFORM_BDW", |
| "INTEL_PLATFORM_CHV", |
| "INTEL_PLATFORM_SKL", |
| "INTEL_PLATFORM_BXT", |
| "INTEL_PLATFORM_KBL", |
| "INTEL_PLATFORM_GLK", |
| "INTEL_PLATFORM_CFL", |
| "INTEL_PLATFORM_ICL", |
| "INTEL_PLATFORM_EHL", |
| "INTEL_PLATFORM_TGL", |
| "INTEL_PLATFORM_RKL", |
| "INTEL_PLATFORM_DG1", |
| "INTEL_PLATFORM_ADL", |
| "INTEL_PLATFORM_RPL", |
| EnumValue("INTEL_PLATFORM_DG2_G10", group_begin="DG2"), |
| "INTEL_PLATFORM_DG2_G11", |
| EnumValue("INTEL_PLATFORM_DG2_G12", group_end="DG2"), |
| EnumValue("INTEL_PLATFORM_ATSM_G10", group_begin="ATSM"), |
| EnumValue("INTEL_PLATFORM_ATSM_G11", group_end="ATSM"), |
| EnumValue("INTEL_PLATFORM_MTL_U", group_begin="MTL"), |
| EnumValue("INTEL_PLATFORM_MTL_H", group_end="MTL"), |
| EnumValue("INTEL_PLATFORM_ARL_U", group_begin="ARL"), |
| EnumValue("INTEL_PLATFORM_ARL_H", group_end="ARL"), |
| "INTEL_PLATFORM_LNL", |
| "INTEL_PLATFORM_BMG", |
| "INTEL_PLATFORM_PTL", |
| "INTEL_PLATFORM_WCL", |
| ]) |
| |
| Struct("intel_memory_class_instance", |
| [ Member("int", "klass", |
| comment = "Kernel backend specific class value, no translation needed yet"), |
| Member("int", "instance")]) |
| |
| Enum("intel_device_info_mmap_mode", |
| [EnumValue("INTEL_DEVICE_INFO_MMAP_MODE_UC", value=0), |
| EnumValue("INTEL_DEVICE_INFO_MMAP_MODE_WC"), |
| EnumValue("INTEL_DEVICE_INFO_MMAP_MODE_WB"), |
| EnumValue("INTEL_DEVICE_INFO_MMAP_MODE_XD", |
| comment=dedent("""\ |
| Xe2+ only. Only supported in GPU side and used for displayable |
| buffers.""")) |
| ]) |
| |
| Struct("intel_device_info_pat_entry", |
| [Member("uint8_t", "index"), |
| Member("intel_device_info_mmap_mode", "mmap", |
| comment=dedent("""\ |
| This tells KMD what caching mode the CPU mapping should use. |
| It has nothing to do with any PAT cache modes."""))]) |
| |
| Enum("intel_cmat_scope", |
| [EnumValue("INTEL_CMAT_SCOPE_NONE", value=0), |
| "INTEL_CMAT_SCOPE_SUBGROUP"]) |
| |
| Enum("intel_cooperative_matrix_component_type", |
| ["INTEL_CMAT_FLOAT16", |
| "INTEL_CMAT_FLOAT32", |
| "INTEL_CMAT_SINT32", |
| "INTEL_CMAT_SINT8", |
| "INTEL_CMAT_UINT32", |
| "INTEL_CMAT_UINT8", |
| "INTEL_CMAT_BFLOAT16"]) |
| |
| Enum("intel_engine_class", |
| ["INTEL_ENGINE_CLASS_RENDER", |
| "INTEL_ENGINE_CLASS_COPY", |
| "INTEL_ENGINE_CLASS_VIDEO", |
| "INTEL_ENGINE_CLASS_VIDEO_ENHANCE", |
| "INTEL_ENGINE_CLASS_COMPUTE", |
| "INTEL_ENGINE_CLASS_INVALID"]) |
| |
| Struct("intel_cooperative_matrix_configuration", |
| [Member("intel_cmat_scope", "scope", |
| comment=dedent("""\ |
| Matrix A is MxK. |
| Matrix B is KxN. |
| Matrix C and Matrix Result are MxN. |
| |
| Result = A * B + C;""")), |
| Member("uint8_t", "m"), |
| Member("uint8_t", "n"), |
| Member("uint8_t", "k"), |
| Member("intel_cooperative_matrix_component_type", "a"), |
| Member("intel_cooperative_matrix_component_type", "b"), |
| Member("intel_cooperative_matrix_component_type", "c"), |
| Member("intel_cooperative_matrix_component_type", "result")]) |
| |
| Enum("intel_kmd_type", |
| ["INTEL_KMD_TYPE_INVALID", |
| "INTEL_KMD_TYPE_I915", |
| "INTEL_KMD_TYPE_XE", |
| "INTEL_KMD_TYPE_STUB", |
| "INTEL_KMD_TYPE_LAST" |
| ], external=True) |
| |
| Struct("intel_device_info_mem_region", |
| [Member("uint64_t", "size"), |
| Member("uint64_t", "free")]) |
| |
| Struct("intel_device_info_ram_desc", |
| [Member("intel_memory_class_instance", "mem"), |
| Member("intel_device_info_mem_region", "mappable"), |
| Member("intel_device_info_mem_region", "unmappable")]) |
| |
| Struct("intel_device_info_mem_desc", |
| [Member("bool", "use_class_instance"), |
| Member("intel_device_info_ram_desc", "sram"), |
| Member("intel_device_info_ram_desc", "vram")]) |
| |
| Struct("intel_device_info_urb_desc", |
| [Member("int", "size"), |
| Member("int", "min_entries", array=4), |
| Member("int", "max_entries", array=4)]) |
| |
| Struct("intel_device_info_pat_desc", |
| [Member("intel_device_info_pat_entry", "cached_coherent", |
| comment="To be used when CPU access is frequent, WB + 1 or 2 way coherent"), |
| |
| Member("intel_device_info_pat_entry", "scanout", |
| comment="scanout and external BOs"), |
| |
| Member("intel_device_info_pat_entry", "compressed_scanout", |
| comment="Only supported in Xe2, compressed + WC for displayable resources"), |
| |
| Member("intel_device_info_pat_entry", "compressed", |
| comment="Only supported in Xe2, compressed + WC for non-displayable resources"), |
| |
| Member("intel_device_info_pat_entry", "writeback_incoherent", |
| comment=("BOs without special needs, can be WB not coherent " |
| "or WC it depends on the platforms and KMD")), |
| |
| Member("intel_device_info_pat_entry", "writecombining")]) |
| |
| Struct("intel_device_info", |
| [Member("intel_kmd_type", "kmd_type"), |
| |
| Member("int", "ver", compiler_field=True, |
| comment="Driver internal numbers used to differentiate platforms."), |
| |
| Member("int", "verx10", compiler_field=True), |
| |
| Member("uint32_t", "gfx_ip_ver", compiler_field=True, |
| comment=dedent("""\ |
| This is the run-time hardware GFX IP version that may be more specific |
| than ver/verx10. ver/verx10 may be more useful for comparing a class |
| of devices whereas gfx_ip_ver may be more useful for precisely |
| checking for a graphics ip type. GFX_IP_VER(major, minor) should be |
| used to compare IP versions.""")), |
| |
| Member("int", "revision", |
| comment=dedent("""\ |
| This revision is queried from KMD unlike |
| pci_revision_id from drm device. Its value is not always |
| same as the pci_revision_id. |
| For LNL+ this is the stepping of GT IP/GMD RevId.""")), |
| |
| Member("int", "gt"), |
| Member("uint16_t", "pci_domain", comment="PCI info"), |
| Member("uint8_t", "pci_bus"), |
| Member("uint8_t", "pci_dev"), |
| Member("uint8_t", "pci_func"), |
| Member("uint16_t", "pci_device_id"), |
| Member("uint8_t", "pci_revision_id"), |
| Member("intel_platform", "platform", compiler_field=True), |
| Member("bool", "has_hiz_and_separate_stencil"), |
| Member("bool", "has_sample_with_hiz"), |
| Member("bool", "has_bit6_swizzle"), |
| Member("bool", "has_llc"), |
| Member("bool", "has_pln", compiler_field=True), |
| Member("bool", "has_64bit_float", compiler_field=True), |
| Member("bool", "has_64bit_float_via_math_pipe", compiler_field=True), |
| Member("bool", "has_64bit_int", compiler_field=True), |
| Member("bool", "has_bfloat16", compiler_field=True), |
| Member("bool", "has_integer_dword_mul", compiler_field=True), |
| Member("bool", "has_systolic", compiler_field=True), |
| Member("bool", "supports_simd16_3src", compiler_field=True), |
| Member("bool", "disable_ccs_repack"), |
| |
| Member("bool", "has_illegal_ccs_values", |
| comment="True if CCS needs to be initialized before use."), |
| |
| Member("bool", "has_flat_ccs", |
| comment=dedent("""\ |
| True if CCS uses a flat virtual address translation to a memory |
| carve-out, rather than aux map translations, or additional surfaces.""")), |
| |
| Member("bool", "has_aux_map"), |
| Member("bool", "has_caching_uapi"), |
| Member("bool", "has_tiling_uapi"), |
| Member("bool", "has_ray_tracing", compiler_field=True), |
| Member("bool", "has_ray_query"), |
| Member("bool", "has_local_mem"), |
| Member("bool", "has_lsc", compiler_field=True), |
| Member("bool", "has_mesh_shading"), |
| Member("bool", "has_mmap_offset"), |
| Member("bool", "has_partial_mmap_offset"), |
| Member("bool", "has_userptr_probe"), |
| Member("bool", "has_context_isolation"), |
| Member("bool", "has_set_pat_uapi"), |
| Member("bool", "has_indirect_unroll"), |
| |
| Member("bool", "has_coarse_pixel_primitive_and_cb", compiler_field=True, |
| comment=dedent("""\ |
| Whether this platform supports fragment shading rate controlled by a |
| primitive in geometry shaders and by a control buffer.""")), |
| |
| Member("bool", "has_compute_engine", comment="Whether this platform has compute engine"), |
| |
| Member("bool", "needs_null_push_constant_tbimr_workaround", |
| comment=dedent("""\ |
| Whether the platform needs an undocumented workaround for a hardware bug |
| that affects draw calls with a pixel shader that has 0 push constant cycles |
| when TBIMR is enabled, which has been seen to lead to hangs. To avoid the |
| issue we simply pad the push constant payload to be at least 1 register.""")), |
| |
| Member("unsigned", "num_slices", |
| comment=dedent("""\ |
| GPU hardware limits |
| |
| In general, you can find shader thread maximums by looking at the "Maximum |
| Number of Threads" field in the Intel PRM description of the 3DSTATE_VS, |
| 3DSTATE_GS, 3DSTATE_HS, 3DSTATE_DS, and 3DSTATE_PS commands. URB entry |
| limits come from the "Number of URB Entries" field in the |
| 3DSTATE_URB_VS command and friends. |
| |
| These fields are used to calculate the scratch space to allocate. The |
| amount of scratch space can be larger without being harmful on modern |
| GPUs, however, prior to Haswell, programming the maximum number of threads |
| to greater than the hardware maximum would cause GPU performance to tank. |
| |
| Total number of slices present on the device whether or not they've been |
| fused off. |
| |
| XXX: CS thread counts are limited by the inability to do cross subslice |
| communication. It is the effectively the number of logical threads which |
| can be executed in a subslice. Fuse configurations may cause this number |
| to change, so we program @max_cs_threads as the lower maximum.""")), |
| |
| Member("unsigned", "max_slices", compiler_field=True, |
| comment=dedent("""\ |
| Maximum number of slices present on this device (can be more than |
| num_slices if some slices are fused).""")), |
| |
| Member("unsigned", "num_subslices", array="INTEL_DEVICE_MAX_SLICES", |
| comment="Number of subslices for each slice (used to be uniform until CNL)."), |
| |
| Member("unsigned", "max_subslices_per_slice", compiler_field=True, |
| comment=dedent("""\ |
| Maximum number of subslices per slice present on this device (can be |
| more than the maximum value in the num_subslices[] array if some |
| subslices are fused). |
| |
| This is GT_SS_PER_SLICE in SKU.""")), |
| |
| Member("unsigned", "ppipe_subslices", array="INTEL_DEVICE_MAX_PIXEL_PIPES", |
| comment="Number of subslices on each pixel pipe (ICL)."), |
| |
| Member("unsigned", "max_eus_per_subslice", compiler_field=True, |
| comment="Maximum number of EUs per subslice (some EUs can be fused off)."), |
| |
| Member("unsigned", "num_thread_per_eu", compiler_field=True, |
| comment="Number of threads per eu, varies between 4 and 8 between generations."), |
| |
| Member("uint8_t", "grf_size", |
| comment="Size of a register from the EU GRF file in bytes."), |
| |
| Member("uint8_t", "slice_masks", |
| comment="A bit mask of the slices available."), |
| |
| Member("uint8_t", "subslice_masks", |
| array="INTEL_DEVICE_MAX_SLICES * DIV_ROUND_UP(INTEL_DEVICE_MAX_SUBSLICES, 8)", |
| compiler_field=True, |
| ray_tracing_field=True, |
| comment=dedent("""\ |
| An array of bit mask of the subslices available, use subslice_slice_stride |
| to access this array.""")), |
| |
| Member("unsigned", "subslice_total", |
| comment=dedent("""\ |
| The number of enabled subslices (considering fusing). For exactly which |
| subslices are enabled, see subslice_masks[].""")), |
| |
| Member("uint8_t", "eu_masks", |
| array=("INTEL_DEVICE_MAX_SLICES * INTEL_DEVICE_MAX_SUBSLICES * " |
| "DIV_ROUND_UP(INTEL_DEVICE_MAX_EUS_PER_SUBSLICE, 8)"), |
| comment=dedent("""\ |
| An array of bit mask of EUs available, use eu_slice_stride & |
| eu_subslice_stride to access this array.""")), |
| |
| Member("uint16_t", "subslice_slice_stride", compiler_field=True, |
| comment="Stride to access subslice_masks[]."), |
| |
| Member("uint16_t", "eu_slice_stride", |
| comment="Strides to access eu_masks[]."), |
| |
| Member("uint16_t", "eu_subslice_stride"), |
| Member("unsigned", "l3_banks"), |
| |
| Member("unsigned", "max_vs_threads", |
| comment="Maximum Vertex Shader threads"), |
| |
| Member("unsigned", "max_tcs_threads", |
| comment="Maximum Hull Shader threads"), |
| |
| Member("unsigned", "max_tes_threads", |
| comment="Maximum Domain Shader threads"), |
| |
| Member("unsigned", "max_gs_threads", |
| comment="Maximum Geometry Shader threads"), |
| |
| Member("unsigned", "max_wm_threads", |
| comment=dedent("""\ |
| Theoretical maximum number of Pixel Shader threads. |
| |
| PSD means Pixel Shader Dispatcher. On modern Intel GPUs, hardware will |
| automatically scale pixel shader thread count, based on a single value |
| programmed into 3DSTATE_PS. |
| |
| To calculate the maximum number of threads for Gfx8 beyond (which have |
| multiple Pixel Shader Dispatchers): |
| |
| - Look up 3DSTATE_PS and find "Maximum Number of Threads Per PSD" |
| - Usually there's only one PSD per subslice, so use the number of |
| subslices for number of PSDs. |
| - For max_wm_threads, the total should be PSD threads * #PSDs.""")), |
| |
| Member("unsigned", "max_threads_per_psd"), |
| |
| Member("unsigned", "max_cs_threads", |
| comment=dedent("""\ |
| Maximum Compute Shader threads per subslice. |
| Actual maximum compute shader threads is max_cs_threads * subslices. |
| |
| Thread count * number of EUs per subslice""")), |
| |
| Member("unsigned", "max_cs_workgroup_threads", compiler_field=True, |
| comment=dedent("""\ |
| Maximum number of threads per workgroup supported by the GPGPU_WALKER or |
| COMPUTE_WALKER command. |
| |
| This may be smaller than max_cs_threads as it takes into account added |
| restrictions on the GPGPU/COMPUTE_WALKER commands. While max_cs_threads |
| expresses the total parallelism of the GPU, this expresses the maximum |
| number of threads we can dispatch in a single workgroup.""")), |
| |
| |
| Member("unsigned", "max_scratch_ids", array="MESA_SHADER_STAGES", compiler_field=True, |
| comment=dedent("""\ |
| The maximum number of potential scratch ids. Due to hardware |
| implementation details, the range of scratch ids may be larger than the |
| number of subslices.""")), |
| |
| Member("uint32_t", "max_scratch_size_per_thread", compiler_field=True), |
| |
| Member("intel_device_info_urb_desc", "urb"), |
| Member("unsigned", "max_constant_urb_size_kb"), |
| Member("unsigned", "mesh_max_constant_urb_size_kb"), |
| Member("unsigned", "engine_class_prefetch", array="INTEL_ENGINE_CLASS_INVALID"), |
| Member("unsigned", "engine_class_supported_count", array="INTEL_ENGINE_CLASS_INVALID"), |
| Member("unsigned", "mem_alignment"), |
| Member("uint64_t", "timestamp_frequency"), |
| Member("uint64_t", "aperture_bytes"), |
| Member("uint64_t", "gtt_size"), |
| Member("int", "simulator_id"), |
| Member("char", "name", array="INTEL_DEVICE_MAX_NAME_SIZE"), |
| Member("bool", "no_hw"), |
| Member("bool", "probe_forced", comment="Device needed INTEL_FORCE_PROBE"), |
| Member("intel_device_info_mem_desc", "mem"), |
| Member("intel_device_info_pat_desc", "pat"), |
| Member("intel_cooperative_matrix_configuration", |
| "cooperative_matrix_configurations", array=16)] |
| ) |