blob: 89f7eb41dcdad399477bda3f96c53bf909543adb [file] [log] [blame]
/*
* Copyright © 2016 Red Hat.
* Copyright © 2016 Bas Nieuwenhuizen
*
* based in part on anv driver which is:
* Copyright © 2015 Intel Corporation
*
* SPDX-License-Identifier: MIT
*/
#ifndef RADV_DEVICE_H
#define RADV_DEVICE_H
#include "ac_descriptors.h"
#include "ac_spm.h"
#include "ac_sqtt.h"
#include "util/mesa-blake3.h"
#include "radv_pipeline.h"
#include "radv_printf.h"
#include "radv_queue.h"
#include "radv_radeon_winsys.h"
#include "radv_rra.h"
#include "radv_shader.h"
#include "vk_acceleration_structure.h"
#include "vk_device.h"
#include "vk_meta.h"
#include "vk_texcompress_astc.h"
#include "vk_texcompress_etc2.h"
#define RADV_NUM_HW_CTX (RADEON_CTX_PRIORITY_REALTIME + 1)
struct radv_image_view;
enum radv_dispatch_table {
RADV_DEVICE_DISPATCH_TABLE,
RADV_ANNOTATE_DISPATCH_TABLE,
RADV_APP_DISPATCH_TABLE,
RADV_RGP_DISPATCH_TABLE,
RADV_RRA_DISPATCH_TABLE,
RADV_RMV_DISPATCH_TABLE,
RADV_CTX_ROLL_DISPATCH_TABLE,
RADV_DISPATCH_TABLE_COUNT,
};
struct radv_layer_dispatch_tables {
struct vk_device_dispatch_table annotate;
struct vk_device_dispatch_table app;
struct vk_device_dispatch_table rgp;
struct vk_device_dispatch_table rra;
struct vk_device_dispatch_table rmv;
struct vk_device_dispatch_table ctx_roll;
};
struct radv_device_cache_key {
uint32_t keep_shader_info : 1;
uint32_t image_2d_view_of_3d : 1;
uint32_t mesh_shader_queries : 1;
uint32_t primitives_generated_query : 1;
uint32_t trap_excp_flags : 4;
uint32_t reserved : 24;
};
enum radv_force_vrs {
RADV_FORCE_VRS_1x1 = 0,
RADV_FORCE_VRS_2x2,
RADV_FORCE_VRS_2x1,
RADV_FORCE_VRS_1x2,
};
struct radv_notifier {
int fd;
int watch;
bool quit;
thrd_t thread;
};
struct radv_meta_state {
VkAllocationCallbacks alloc;
VkPipelineCache cache;
uint32_t initial_cache_entries;
/*
* For on-demand pipeline creation, makes sure that
* only one thread tries to build a pipeline at the same time.
*/
mtx_t mtx;
struct {
struct radix_sort_vk *radix_sort;
struct vk_acceleration_structure_build_ops build_ops;
struct vk_acceleration_structure_build_args build_args;
struct {
VkBuffer buffer;
VkDeviceMemory memory;
VkAccelerationStructureKHR accel_struct;
} null;
} accel_struct_build;
struct vk_texcompress_etc2_state etc_decode;
struct vk_texcompress_astc_state *astc_decode;
struct vk_meta_device device;
};
struct radv_memory_trace_data {
/* ID of the PTE update event in ftrace data */
uint16_t ftrace_update_ptes_id;
uint32_t num_cpus;
int *pipe_fds;
};
struct radv_sqtt_timestamp {
uint8_t *map;
unsigned offset;
uint64_t size;
struct radeon_winsys_bo *bo;
struct list_head list;
};
#define RADV_BORDER_COLOR_COUNT 4096
#define RADV_BORDER_COLOR_BUFFER_SIZE (sizeof(VkClearColorValue) * RADV_BORDER_COLOR_COUNT)
struct radv_device_border_color_data {
bool used[RADV_BORDER_COLOR_COUNT];
struct radeon_winsys_bo *bo;
VkClearColorValue *colors_gpu_ptr;
/* Mutex is required to guarantee vkCreateSampler thread safety
* given that we are writing to a buffer and checking color occupation */
mtx_t mutex;
};
struct radv_pso_cache_stats {
uint32_t hits;
uint32_t misses;
};
struct radv_device {
struct vk_device vk;
struct radeon_winsys *ws;
struct radv_layer_dispatch_tables layer_dispatch;
struct radeon_winsys_ctx *hw_ctx[RADV_NUM_HW_CTX];
struct radv_meta_state meta_state;
struct radv_queue *queues[RADV_MAX_QUEUE_FAMILIES];
int queue_count[RADV_MAX_QUEUE_FAMILIES];
bool pbb_allowed;
uint32_t scratch_waves;
uint32_t dispatch_initiator;
uint32_t dispatch_initiator_task;
/* MSAA sample locations.
* The first index is the sample index.
* The second index is the coordinate: X, Y. */
float sample_locations_1x[1][2];
float sample_locations_2x[2][2];
float sample_locations_4x[4][2];
float sample_locations_8x[8][2];
/* GFX7 and later */
uint32_t gfx_init_size_dw;
struct radeon_winsys_bo *gfx_init;
struct radeon_winsys_bo *trace_bo;
struct radv_trace_data *trace_data;
/* Whether to keep shader debug info, for debugging. */
bool keep_shader_info;
/* Backup in-memory cache to be used if the app doesn't provide one */
struct vk_pipeline_cache *mem_cache;
/*
* use different counters so MSAA MRTs get consecutive surface indices,
* even if MASK is allocated in between.
*/
uint32_t image_mrt_offset_counter;
uint32_t fmask_mrt_offset_counter;
struct list_head shader_arenas;
struct hash_table_u64 *capture_replay_arena_vas;
unsigned shader_arena_shift;
uint8_t shader_free_list_mask;
struct radv_shader_free_list shader_free_list;
struct radv_shader_free_list capture_replay_free_list;
struct list_head shader_block_obj_pool;
mtx_t shader_arena_mutex;
mtx_t shader_upload_hw_ctx_mutex;
struct radeon_winsys_ctx *shader_upload_hw_ctx;
VkSemaphore shader_upload_sem;
uint64_t shader_upload_seq;
struct list_head shader_dma_submissions;
mtx_t shader_dma_submission_list_mutex;
cnd_t shader_dma_submission_list_cond;
/* Whether to DMA shaders to invisible VRAM or to upload directly through BAR. */
bool shader_use_invisible_vram;
/* Whether to inline the compute dispatch size in user sgprs. */
bool load_grid_size_from_user_sgpr;
/* Whether the driver uses a global BO list. */
bool use_global_bo_list;
/* Whether anisotropy is forced with RADV_TEX_ANISO (-1 is disabled). */
int force_aniso;
/* Always disable TRUNC_COORD. */
bool disable_trunc_coord;
struct radv_device_border_color_data border_color_data;
/* Thread trace. */
struct ac_sqtt sqtt;
bool sqtt_enabled;
bool sqtt_triggered;
/* SQTT timestamps for queue events. */
simple_mtx_t sqtt_timestamp_mtx;
struct radv_sqtt_timestamp sqtt_timestamp;
/* SQTT timed cmd buffers. */
simple_mtx_t sqtt_command_pool_mtx;
struct vk_command_pool *sqtt_command_pool[2];
/* Memory trace. */
struct radv_memory_trace_data memory_trace;
/* SPM. */
struct ac_spm spm;
/* Radeon Raytracing Analyzer trace. */
struct radv_rra_trace_data rra_trace;
FILE *ctx_roll_file;
simple_mtx_t ctx_roll_mtx;
/* Trap handler. */
struct radv_shader *trap_handler_shader;
struct radeon_winsys_bo *tma_bo; /* Trap Memory Address */
uint32_t *tma_ptr;
/* Overallocation. */
bool overallocation_disallowed;
uint64_t allocated_memory_size[VK_MAX_MEMORY_HEAPS];
mtx_t overallocation_mutex;
/* RADV_FORCE_VRS. */
struct radv_notifier notifier;
enum radv_force_vrs force_vrs;
/* Depth image for VRS when not bound by the app. */
struct {
struct radv_image *image;
struct radv_buffer *buffer; /* HTILE */
struct radv_device_memory *mem;
} vrs;
/* Prime blit sdma queue */
struct radv_queue *private_sdma_queue;
struct radv_shader_part_cache vs_prologs;
struct radv_shader_part *simple_vs_prologs[MAX_VERTEX_ATTRIBS];
struct radv_shader_part *instance_rate_vs_prologs[816];
struct radv_shader_part_cache ps_epilogs;
simple_mtx_t trace_mtx;
/* Whether per-vertex VRS is forced. */
bool force_vrs_enabled;
simple_mtx_t pstate_mtx;
unsigned pstate_cnt;
/* BO to contain some performance counter helpers:
* - A lock for profiling cmdbuffers.
* - a temporary fence for the end query synchronization.
* - the pass to use for profiling. (as an array of bools)
*/
struct radeon_winsys_bo *perf_counter_bo;
/* Interleaved lock/unlock commandbuffers for perfcounter passes. */
struct radeon_cmdbuf **perf_counter_lock_cs;
bool uses_shadow_regs;
struct hash_table *rt_handles;
simple_mtx_t rt_handles_mtx;
struct radv_printf_data printf;
struct radv_device_cache_key cache_key;
blake3_hash cache_hash;
/* Not NULL if a GPU hang report has been generated for VK_EXT_device_fault. */
char *gpu_hang_report;
/* PSO cache stats */
simple_mtx_t pso_cache_stats_mtx;
struct radv_pso_cache_stats pso_cache_stats[RADV_PIPELINE_TYPE_COUNT];
struct radv_address_binding_tracker *addr_binding_tracker;
};
VK_DEFINE_HANDLE_CASTS(radv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
static inline struct radv_physical_device *
radv_device_physical(const struct radv_device *dev)
{
return (struct radv_physical_device *)dev->vk.physical;
}
static inline bool
radv_uses_primitives_generated_query(const struct radv_device *device)
{
return device->vk.enabled_features.primitivesGeneratedQuery ||
device->vk.enabled_features.primitivesGeneratedQueryWithRasterizerDiscard ||
device->vk.enabled_features.primitivesGeneratedQueryWithNonZeroStreams;
}
static inline bool
radv_uses_image_float32_atomics(const struct radv_device *device)
{
return device->vk.enabled_features.shaderImageFloat32Atomics ||
device->vk.enabled_features.sparseImageFloat32Atomics ||
device->vk.enabled_features.shaderImageFloat32AtomicMinMax ||
device->vk.enabled_features.sparseImageFloat32AtomicMinMax;
}
VkResult radv_device_init_vrs_state(struct radv_device *device);
unsigned radv_get_default_max_sample_dist(int log_samples);
void radv_emit_default_sample_locations(const struct radv_physical_device *pdev, struct radeon_cmdbuf *cs,
int nr_samples);
struct radv_color_buffer_info {
struct ac_cb_surface ac;
};
struct radv_ds_buffer_info {
struct ac_ds_surface ac;
uint32_t db_render_override2;
uint32_t db_render_control;
};
void radv_initialise_color_surface(struct radv_device *device, struct radv_color_buffer_info *cb,
struct radv_image_view *iview);
void radv_initialise_vrs_surface(struct radv_image *image, struct radv_buffer *htile_buffer,
struct radv_ds_buffer_info *ds);
void radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buffer_info *ds,
struct radv_image_view *iview, VkImageAspectFlags ds_aspects);
void radv_gfx11_set_db_render_control(const struct radv_device *device, unsigned num_samples,
unsigned *db_render_control);
bool radv_device_set_pstate(struct radv_device *device, bool enable);
bool radv_device_acquire_performance_counters(struct radv_device *device);
void radv_device_release_performance_counters(struct radv_device *device);
bool radv_device_should_clear_vram(const struct radv_device *device);
#endif /* RADV_DEVICE_H */