blob: dd3edad902f804984508022d8211795af21a94a8 [file] [log] [blame]
/*
* Copyright 2024 Advanced Micro Devices, Inc.
*
* SPDX-License-Identifier: MIT
*/
#ifndef AC_NIR_META_H
#define AC_NIR_META_H
#include "ac_gpu_info.h"
#include "nir_defines.h"
#include "util/box.h"
union ac_ps_resolve_key {
struct {
bool use_aco:1;
bool src_is_array:1;
uint8_t log_samples:2;
uint8_t last_src_channel:2; /* this shouldn't be greater than last_dst_channel */
uint8_t last_dst_channel:2;
bool x_clamp_to_edge:1;
bool y_clamp_to_edge:1;
bool a16:1;
bool d16:1;
};
uint64_t key; /* use with hash_table_u64 */
};
/* Only immutable settings. */
struct ac_ps_resolve_options {
const nir_shader_compiler_options *nir_options;
const struct radeon_info *info;
bool use_aco; /* global driver setting */
bool no_fmask; /* FMASK disabled by a debug option, ignored on GFX11+ */
bool print_key; /* print ac_ps_resolve_key into stderr */
};
nir_shader *
ac_create_resolve_ps(const struct ac_ps_resolve_options *options,
const union ac_ps_resolve_key *key);
/* Universal optimized compute shader for image blits and clears. */
#define SI_MAX_COMPUTE_BLIT_LANE_SIZE 16
#define SI_MAX_COMPUTE_BLIT_SAMPLES 8
/* This describes all possible variants of the compute blit shader. */
union ac_cs_blit_key {
struct {
bool use_aco:1;
/* Workgroup settings. */
uint8_t wg_dim:2; /* 1, 2, or 3 */
bool has_start_xyz:1;
/* The size of a block of pixels that a single thread will process. */
uint8_t log_lane_width:3;
uint8_t log_lane_height:2;
uint8_t log_lane_depth:2;
/* Declaration modifiers. */
bool is_clear:1;
bool src_is_1d:1;
bool dst_is_1d:1;
bool src_is_msaa:1;
bool dst_is_msaa:1;
bool src_has_z:1;
bool dst_has_z:1;
bool a16:1;
bool d16:1;
uint8_t log_samples:2;
bool sample0_only:1; /* src is MSAA, dst is not MSAA, log2_samples is ignored */
/* Source coordinate modifiers. */
bool x_clamp_to_edge:1;
bool y_clamp_to_edge:1;
bool flip_x:1;
bool flip_y:1;
/* Output modifiers. */
bool sint_to_uint:1;
bool uint_to_sint:1;
bool dst_is_srgb:1;
bool use_integer_one:1;
uint8_t last_src_channel:2; /* this shouldn't be greater than last_dst_channel */
uint8_t last_dst_channel:2;
};
uint64_t key;
};
struct ac_cs_blit_options {
/* Global options. */
const nir_shader_compiler_options *nir_options;
const struct radeon_info *info;
bool use_aco; /* global driver setting */
bool no_fmask; /* FMASK disabled by a global debug option, ignored on GFX11+ */
bool print_key; /* print ac_ps_resolve_key into stderr */
bool fail_if_slow; /* fail if a gfx blit is faster, set to false on compute queues */
bool is_nested; /* for internal use, don't set */
};
struct ac_cs_blit_description
{
struct {
struct radeon_surf *surf;
uint8_t dim; /* 1 = 1D texture, 2 = 2D texture, 3 = 3D texture */
bool is_array; /* array or cube texture */
unsigned width0; /* level 0 width */
unsigned height0; /* level 0 height */
uint8_t num_samples;
uint8_t level;
struct pipe_box box; /* negative width, height only legal for src */
enum pipe_format format; /* format reinterpretation */
} dst, src;
bool is_gfx_queue;
bool dst_has_dcc;
bool sample0_only; /* copy sample 0 instead of resolving */
union pipe_color_union clear_color; /* if src.surf == NULL, this is the clear color */
};
/* Dispatch parameters generated by the blit. */
struct ac_cs_blit_dispatch {
union ac_cs_blit_key shader_key;
uint32_t user_data[8]; /* for nir_intrinsic_load_user_data_amd */
unsigned wg_size[3]; /* variable workgroup size (NUM_THREAD_FULL) */
unsigned last_wg_size[3]; /* workgroup size of the last workgroup (NUM_THREAD_PARTIAL) */
unsigned num_workgroups[3]; /* DISPATCH_DIRECT parameters */
};
struct ac_cs_blit_dispatches {
unsigned num_dispatches;
struct ac_cs_blit_dispatch dispatches[7];
};
nir_shader *
ac_create_blit_cs(const struct ac_cs_blit_options *options, const union ac_cs_blit_key *key);
bool
ac_prepare_compute_blit(const struct ac_cs_blit_options *options,
const struct ac_cs_blit_description *blit,
struct ac_cs_blit_dispatches *dispatches);
/* clear_buffer/copy_buffer compute shader. */
union ac_cs_clear_copy_buffer_key {
struct {
bool is_clear:1;
unsigned dwords_per_thread:3; /* 1..4 allowed */
bool clear_value_size_is_12:1;
bool src_is_sparse:1;
/* Unaligned clears and copies. */
unsigned src_align_offset:2; /* how much is the source address unaligned */
unsigned dst_align_offset:4; /* the first thread shouldn't write this many bytes */
unsigned dst_last_thread_bytes:4; /* if non-zero, the last thread should write this many bytes */
bool dst_single_thread_unaligned:1; /* only 1 thread executes, both previous fields apply */
bool has_start_thread:1; /* whether the first few threads should be skipped, making later
waves start on a 256B boundary */
};
uint64_t key;
};
struct ac_cs_clear_copy_buffer_options {
const nir_shader_compiler_options *nir_options;
const struct radeon_info *info;
bool print_key; /* print the shader key into stderr */
bool fail_if_slow; /* fail if a gfx blit is faster, set to false on compute queues */
};
struct ac_cs_clear_copy_buffer_info {
unsigned dst_offset;
unsigned src_offset;
unsigned size;
unsigned clear_value_size;
uint32_t clear_value[4];
unsigned dwords_per_thread; /* Set to 0 to let the code choose the optimal value. */
bool render_condition_enabled;
bool dst_is_vram;
bool src_is_vram;
bool src_is_sparse;
};
struct ac_cs_clear_copy_buffer_dispatch {
union ac_cs_clear_copy_buffer_key shader_key;
uint32_t user_data[6]; /* for nir_intrinsic_load_user_data_amd */
unsigned num_ssbos;
unsigned workgroup_size;
unsigned num_threads;
struct {
unsigned offset;
unsigned size;
} ssbo[2];
};
nir_shader *
ac_create_clear_copy_buffer_cs(struct ac_cs_clear_copy_buffer_options *options,
union ac_cs_clear_copy_buffer_key *key);
bool
ac_prepare_cs_clear_copy_buffer(const struct ac_cs_clear_copy_buffer_options *options,
const struct ac_cs_clear_copy_buffer_info *info,
struct ac_cs_clear_copy_buffer_dispatch *out);
#endif