| /* |
| * Copyright 2024 Advanced Micro Devices, Inc. |
| * |
| * SPDX-License-Identifier: MIT |
| */ |
| |
| #ifndef AC_NIR_META_H |
| #define AC_NIR_META_H |
| |
| #include "ac_gpu_info.h" |
| #include "nir_defines.h" |
| #include "util/box.h" |
| |
| union ac_ps_resolve_key { |
| struct { |
| bool use_aco:1; |
| bool src_is_array:1; |
| uint8_t log_samples:2; |
| uint8_t last_src_channel:2; /* this shouldn't be greater than last_dst_channel */ |
| uint8_t last_dst_channel:2; |
| bool x_clamp_to_edge:1; |
| bool y_clamp_to_edge:1; |
| bool a16:1; |
| bool d16:1; |
| }; |
| uint64_t key; /* use with hash_table_u64 */ |
| }; |
| |
| /* Only immutable settings. */ |
| struct ac_ps_resolve_options { |
| const nir_shader_compiler_options *nir_options; |
| const struct radeon_info *info; |
| bool use_aco; /* global driver setting */ |
| bool no_fmask; /* FMASK disabled by a debug option, ignored on GFX11+ */ |
| bool print_key; /* print ac_ps_resolve_key into stderr */ |
| }; |
| |
| nir_shader * |
| ac_create_resolve_ps(const struct ac_ps_resolve_options *options, |
| const union ac_ps_resolve_key *key); |
| |
| /* Universal optimized compute shader for image blits and clears. */ |
| #define SI_MAX_COMPUTE_BLIT_LANE_SIZE 16 |
| #define SI_MAX_COMPUTE_BLIT_SAMPLES 8 |
| |
| /* This describes all possible variants of the compute blit shader. */ |
| union ac_cs_blit_key { |
| struct { |
| bool use_aco:1; |
| /* Workgroup settings. */ |
| uint8_t wg_dim:2; /* 1, 2, or 3 */ |
| bool has_start_xyz:1; |
| /* The size of a block of pixels that a single thread will process. */ |
| uint8_t log_lane_width:3; |
| uint8_t log_lane_height:2; |
| uint8_t log_lane_depth:2; |
| /* Declaration modifiers. */ |
| bool is_clear:1; |
| bool src_is_1d:1; |
| bool dst_is_1d:1; |
| bool src_is_msaa:1; |
| bool dst_is_msaa:1; |
| bool src_has_z:1; |
| bool dst_has_z:1; |
| bool a16:1; |
| bool d16:1; |
| uint8_t log_samples:2; |
| bool sample0_only:1; /* src is MSAA, dst is not MSAA, log2_samples is ignored */ |
| /* Source coordinate modifiers. */ |
| bool x_clamp_to_edge:1; |
| bool y_clamp_to_edge:1; |
| bool flip_x:1; |
| bool flip_y:1; |
| /* Output modifiers. */ |
| bool sint_to_uint:1; |
| bool uint_to_sint:1; |
| bool dst_is_srgb:1; |
| bool use_integer_one:1; |
| uint8_t last_src_channel:2; /* this shouldn't be greater than last_dst_channel */ |
| uint8_t last_dst_channel:2; |
| }; |
| uint64_t key; |
| }; |
| |
| struct ac_cs_blit_options { |
| /* Global options. */ |
| const nir_shader_compiler_options *nir_options; |
| const struct radeon_info *info; |
| bool use_aco; /* global driver setting */ |
| bool no_fmask; /* FMASK disabled by a global debug option, ignored on GFX11+ */ |
| bool print_key; /* print ac_ps_resolve_key into stderr */ |
| bool fail_if_slow; /* fail if a gfx blit is faster, set to false on compute queues */ |
| |
| bool is_nested; /* for internal use, don't set */ |
| }; |
| |
| struct ac_cs_blit_description |
| { |
| struct { |
| struct radeon_surf *surf; |
| uint8_t dim; /* 1 = 1D texture, 2 = 2D texture, 3 = 3D texture */ |
| bool is_array; /* array or cube texture */ |
| unsigned width0; /* level 0 width */ |
| unsigned height0; /* level 0 height */ |
| uint8_t num_samples; |
| uint8_t level; |
| struct pipe_box box; /* negative width, height only legal for src */ |
| enum pipe_format format; /* format reinterpretation */ |
| } dst, src; |
| |
| bool is_gfx_queue; |
| bool dst_has_dcc; |
| bool sample0_only; /* copy sample 0 instead of resolving */ |
| union pipe_color_union clear_color; /* if src.surf == NULL, this is the clear color */ |
| }; |
| |
| /* Dispatch parameters generated by the blit. */ |
| struct ac_cs_blit_dispatch { |
| union ac_cs_blit_key shader_key; |
| uint32_t user_data[8]; /* for nir_intrinsic_load_user_data_amd */ |
| |
| unsigned wg_size[3]; /* variable workgroup size (NUM_THREAD_FULL) */ |
| unsigned last_wg_size[3]; /* workgroup size of the last workgroup (NUM_THREAD_PARTIAL) */ |
| unsigned num_workgroups[3]; /* DISPATCH_DIRECT parameters */ |
| }; |
| |
| struct ac_cs_blit_dispatches { |
| unsigned num_dispatches; |
| struct ac_cs_blit_dispatch dispatches[7]; |
| }; |
| |
| nir_shader * |
| ac_create_blit_cs(const struct ac_cs_blit_options *options, const union ac_cs_blit_key *key); |
| |
| bool |
| ac_prepare_compute_blit(const struct ac_cs_blit_options *options, |
| const struct ac_cs_blit_description *blit, |
| struct ac_cs_blit_dispatches *dispatches); |
| |
| /* clear_buffer/copy_buffer compute shader. */ |
| union ac_cs_clear_copy_buffer_key { |
| struct { |
| bool is_clear:1; |
| unsigned dwords_per_thread:3; /* 1..4 allowed */ |
| bool clear_value_size_is_12:1; |
| bool src_is_sparse:1; |
| /* Unaligned clears and copies. */ |
| unsigned src_align_offset:2; /* how much is the source address unaligned */ |
| unsigned dst_align_offset:4; /* the first thread shouldn't write this many bytes */ |
| unsigned dst_last_thread_bytes:4; /* if non-zero, the last thread should write this many bytes */ |
| bool dst_single_thread_unaligned:1; /* only 1 thread executes, both previous fields apply */ |
| bool has_start_thread:1; /* whether the first few threads should be skipped, making later |
| waves start on a 256B boundary */ |
| }; |
| uint64_t key; |
| }; |
| |
| struct ac_cs_clear_copy_buffer_options { |
| const nir_shader_compiler_options *nir_options; |
| const struct radeon_info *info; |
| bool print_key; /* print the shader key into stderr */ |
| bool fail_if_slow; /* fail if a gfx blit is faster, set to false on compute queues */ |
| }; |
| |
| struct ac_cs_clear_copy_buffer_info { |
| unsigned dst_offset; |
| unsigned src_offset; |
| unsigned size; |
| unsigned clear_value_size; |
| uint32_t clear_value[4]; |
| unsigned dwords_per_thread; /* Set to 0 to let the code choose the optimal value. */ |
| bool render_condition_enabled; |
| bool dst_is_vram; |
| bool src_is_vram; |
| bool src_is_sparse; |
| }; |
| |
| struct ac_cs_clear_copy_buffer_dispatch { |
| union ac_cs_clear_copy_buffer_key shader_key; |
| uint32_t user_data[6]; /* for nir_intrinsic_load_user_data_amd */ |
| unsigned num_ssbos; |
| unsigned workgroup_size; |
| unsigned num_threads; |
| |
| struct { |
| unsigned offset; |
| unsigned size; |
| } ssbo[2]; |
| }; |
| |
| nir_shader * |
| ac_create_clear_copy_buffer_cs(struct ac_cs_clear_copy_buffer_options *options, |
| union ac_cs_clear_copy_buffer_key *key); |
| |
| bool |
| ac_prepare_cs_clear_copy_buffer(const struct ac_cs_clear_copy_buffer_options *options, |
| const struct ac_cs_clear_copy_buffer_info *info, |
| struct ac_cs_clear_copy_buffer_dispatch *out); |
| |
| #endif |