blob: d692d1c3ebb51e944690fd23af35553ff7bf041d [file] [edit]
/*
* Copyright 2021 Valve Corporation
*
* SPDX-License-Identifier: MIT
*/
#ifndef AC_SPM_H
#define AC_SPM_H
#include <stdint.h>
#include "ac_perfcounter.h"
#include "util/u_dynarray.h"
struct ac_cmdbuf;
#define AC_SPM_MAX_COUNTER_PER_BLOCK 16
#define AC_SPM_GLOBAL_TIMESTAMP_COUNTERS 4 /* in unit of 16-bit counters*/
#define AC_SPM_NUM_COUNTER_PER_MUXSEL 16 /* 16 16-bit counters per muxsel */
#define AC_SPM_MUXSEL_LINE_SIZE ((AC_SPM_NUM_COUNTER_PER_MUXSEL * 2) / 4) /* in dwords */
#define AC_SPM_NUM_PERF_SEL 4
#define AC_SPM_RING_BASE_ALIGN 32
/* GFX10+ */
enum ac_spm_global_block {
AC_SPM_GLOBAL_BLOCK_CPG,
AC_SPM_GLOBAL_BLOCK_CPC,
AC_SPM_GLOBAL_BLOCK_CPF,
AC_SPM_GLOBAL_BLOCK_GDS,
AC_SPM_GLOBAL_BLOCK_GCR,
AC_SPM_GLOBAL_BLOCK_PH,
AC_SPM_GLOBAL_BLOCK_GE,
AC_SPM_GLOBAL_BLOCK_GE1 = AC_SPM_GLOBAL_BLOCK_GE,
AC_SPM_GLOBAL_BLOCK_GL2A,
AC_SPM_GLOBAL_BLOCK_GL2C,
AC_SPM_GLOBAL_BLOCK_SDMA,
AC_SPM_GLOBAL_BLOCK_GUS,
AC_SPM_GLOBAL_BLOCK_GCEA,
AC_SPM_GLOBAL_BLOCK_GCEA_CPWD = AC_SPM_GLOBAL_BLOCK_GCEA,
AC_SPM_GLOBAL_BLOCK_GCEA_SE = AC_SPM_GLOBAL_BLOCK_GCEA,
AC_SPM_GLOBAL_BLOCK_CHA,
AC_SPM_GLOBAL_BLOCK_CHC,
AC_SPM_GLOBAL_BLOCK_CHCG,
AC_SPM_GLOBAL_BLOCK_GPUVMATCL2,
AC_SPM_GLOBAL_BLOCK_GPUVMVML2,
AC_SPM_GLOBAL_BLOCK_GE2SE, /* Per-SE counters */
AC_SPM_GLOBAL_BLOCK_GE2DIST,
/* GFX11+ */
/* gap */
AC_SPM_GLOBAL_BLOCK_RSPM = 31,
};
enum ac_spm_se_block {
AC_SPM_SE_BLOCK_CB,
AC_SPM_SE_BLOCK_DB,
AC_SPM_SE_BLOCK_PA,
AC_SPM_SE_BLOCK_SX,
AC_SPM_SE_BLOCK_SC,
AC_SPM_SE_BLOCK_TA,
AC_SPM_SE_BLOCK_TD,
AC_SPM_SE_BLOCK_TCP,
AC_SPM_SE_BLOCK_SPI,
AC_SPM_SE_BLOCK_SQG,
AC_SPM_SE_BLOCK_GL1A,
AC_SPM_SE_BLOCK_RMI,
AC_SPM_SE_BLOCK_GL1C,
AC_SPM_SE_BLOCK_GL1CG,
/* GFX11+ */
AC_SPM_SE_BLOCK_CBR,
AC_SPM_SE_BLOCK_DBR,
AC_SPM_SE_BLOCK_GL1H,
AC_SPM_SE_BLOCK_SQC,
AC_SPM_SE_BLOCK_PC,
AC_SPM_SE_BLOCK_EA,
AC_SPM_SE_BLOCK_GE,
AC_SPM_SE_BLOCK_GL2A,
AC_SPM_SE_BLOCK_GL2C,
AC_SPM_SE_BLOCK_WGS,
AC_SPM_SE_BLOCK_GL1XA,
AC_SPM_SE_BLOCK_GL1XC,
AC_SPM_SE_BLOCK_UTCL1,
/* gap */
AC_SPM_SE_BLOCK_SE_RPM = 31,
};
enum ac_spm_segment_type {
AC_SPM_SEGMENT_TYPE_SE0,
AC_SPM_SEGMENT_TYPE_SE1,
AC_SPM_SEGMENT_TYPE_SE2,
AC_SPM_SEGMENT_TYPE_SE3,
AC_SPM_SEGMENT_TYPE_SE4,
AC_SPM_SEGMENT_TYPE_SE5,
AC_SPM_SEGMENT_TYPE_GLOBAL,
AC_SPM_SEGMENT_TYPE_COUNT,
};
enum ac_spm_raw_counter_id {
AC_SPM_TCP_PERF_SEL_REQ = 0,
AC_SPM_TCP_PERF_SEL_REQ_MISS,
AC_SPM_SQC_PERF_SEL_DCACHE_HITS,
AC_SPM_SQC_PERF_SEL_DCACHE_MISSES,
AC_SPM_SQC_PERF_SEL_DCACHE_MISSES_DUPLICATE,
AC_SPM_SQC_PERF_SEL_ICACHE_HITS,
AC_SPM_SQC_PERF_SEL_ICACHE_MISSES,
AC_SPM_SQC_PERF_SEL_ICACHE_MISSES_DUPLICATE,
AC_SPM_GL1C_PERF_SEL_REQ,
AC_SPM_GL1C_PERF_SEL_REQ_MISS,
AC_SPM_GL2C_PERF_SEL_REQ,
AC_SPM_GL2C_PERF_SEL_MISS,
AC_SPM_CPF_PERF_SEL_STAT_BUSY,
AC_SPM_SQC_PERF_SEL_LDS_BANK_CONFLICT,
AC_SPM_GL2C_PERF_SEL_EA_RDREQ_32B,
AC_SPM_GL2C_PERF_SEL_EA_RDREQ_64B,
AC_SPM_GL2C_PERF_SEL_EA_RDREQ_96B,
AC_SPM_GL2C_PERF_SEL_EA_RDREQ_128B,
AC_SPM_GL2C_PERF_SEL_EA_RDREQ_256B,
AC_SPM_GL2C_PERF_SEL_EA_WRREQ,
AC_SPM_GL2C_PERF_SEL_EA_WRREQ_64B,
AC_SPM_GCEA_PERF_SEL_SARB_DRAM_SIZED_REQUESTS,
AC_SPM_GCEA_CPWD_PERF_SEL_SARB_DRAM_RD_SIZE_REQ,
AC_SPM_GCEA_CPWD_PERF_SEL_SARB_DRAM_WR_SIZE_REQ,
AC_SPM_GCEA_PERF_SEL_SARB_IO_SIZED_REQUESTS,
AC_SPM_GCEA_SE_PERF_SEL_SARB_IO_RD_SIZE_REQ,
AC_SPM_GCEA_SE_PERF_SEL_SARB_IO_WR_SIZE_REQ,
AC_SPM_TA_PERF_SEL_TA_BUSY,
AC_SPM_TCP_PERF_SEL_TCP_TA_REQ_STALL,
AC_SPM_TD_PERF_SEL_RAY_TRACING_BVH4_TRI_NODE,
AC_SPM_TD_PERF_SEL_RAY_TRACING_BVH4_FP16_BOX_NODE,
AC_SPM_TD_PERF_SEL_RAY_TRACING_BVH4_FP32_BOX_NODE,
AC_SPM_GL2C_PERF_SEL_EA_WRREQ_STALL,
AC_SPM_RAW_COUNTER_ID_COUNT,
};
enum ac_spm_raw_counter_op {
AC_SPM_RAW_COUNTER_OP_SUM = 0,
AC_SPM_RAW_COUNTER_OP_MAX,
};
struct ac_spm_counter_descr {
enum ac_spm_raw_counter_id id;
enum ac_pc_gpu_block gpu_block;
uint32_t event_id;
};
struct ac_spm_counter_create_info {
struct ac_spm_counter_descr *b;
uint32_t instance;
};
union ac_spm_muxsel {
struct {
uint16_t counter : 6;
uint16_t block : 4;
uint16_t shader_array : 1; /* 0: SA0, 1: SA1 */
uint16_t instance : 5;
} gfx10;
struct {
uint16_t counter : 5;
uint16_t instance : 5;
uint16_t shader_array : 1;
uint16_t block : 5;
} gfx11;
uint16_t value;
};
struct ac_spm_muxsel_line {
union ac_spm_muxsel muxsel[AC_SPM_NUM_COUNTER_PER_MUXSEL];
};
struct ac_spm_counter_info {
/* General info. */
enum ac_spm_raw_counter_id id;
enum ac_pc_gpu_block gpu_block;
uint32_t instance;
uint32_t event_id;
/* Muxsel info. */
enum ac_spm_segment_type segment_type;
bool is_even;
union ac_spm_muxsel muxsel;
/* Output info. */
uint64_t offset;
};
struct ac_spm_counter_select {
uint8_t active; /* mask of used 16-bit counters. */
uint32_t sel0;
uint32_t sel1;
};
struct ac_spm_block_instance {
uint32_t grbm_gfx_index;
uint32_t num_counters;
struct ac_spm_counter_select counters[AC_SPM_MAX_COUNTER_PER_BLOCK];
};
struct ac_spm_block_select {
const struct ac_pc_block *b;
uint32_t num_instances;
struct ac_spm_block_instance *instances;
};
struct ac_spm {
/* struct radeon_winsys_bo or struct si_resource */
void *bo;
void *ptr;
uint8_t ptr_granularity;
uint32_t buffer_size;
uint16_t sample_interval;
/* Enabled counters. */
unsigned num_counters;
struct ac_spm_counter_info *counters;
/* Block/counters selection. */
uint32_t num_block_sel;
struct ac_spm_block_select *block_sel;
struct {
uint32_t num_counters;
struct ac_spm_counter_select counters[16];
} sqg[AC_SPM_SEGMENT_TYPE_GLOBAL];
struct {
uint32_t grbm_gfx_index;
uint32_t num_counters;
struct ac_spm_counter_select counters[16];
} sq_wgp[AMD_MAX_WGP];
/* Muxsel lines. */
unsigned num_muxsel_lines[AC_SPM_SEGMENT_TYPE_COUNT];
struct ac_spm_muxsel_line *muxsel_lines[AC_SPM_SEGMENT_TYPE_COUNT];
unsigned max_se_muxsel_lines;
};
struct ac_spm_trace {
void *ptr;
uint16_t sample_interval;
unsigned num_counters;
struct ac_spm_counter_info *counters;
uint32_t sample_size_in_bytes;
uint32_t num_samples;
};
enum ac_spm_group_id {
AC_SPM_GROUP_CACHE,
AC_SPM_GROUP_LDS,
AC_SPM_GROUP_MEMORY_BYTES,
AC_SPM_GROUP_MEMORY_PERCENTAGE,
AC_SPM_GROUP_RT,
AC_SPM_GROUP_COUNT,
};
enum ac_spm_counter_id {
AC_SPM_COUNTER_INST_CACHE_HIT,
AC_SPM_COUNTER_SCALAR_CACHE_HIT,
AC_SPM_COUNTER_L0_CACHE_HIT,
AC_SPM_COUNTER_L1_CACHE_HIT, /* < GFX12 */
AC_SPM_COUNTER_L2_CACHE_HIT,
AC_SPM_COUNTER_CS_LDS_BANK_CONFLICT,
AC_SPM_COUNTER_FETCH_SIZE,
AC_SPM_COUNTER_WRITE_SIZE,
AC_SPM_COUNTER_LOCAL_VID_MEM_BYTES,
AC_SPM_COUNTER_PCIE_BYTES,
AC_SPM_COUNTER_MEM_UNIT_BUSY,
AC_SPM_COUNTER_MEM_UNIT_STALLED,
AC_SPM_COUNTER_RAY_BOX_TESTS,
AC_SPM_COUNTER_RAY_TRI_TESTS,
AC_SPM_COUNTER_WRITE_UNIT_STALLED, /* GFX12+ */
AC_SPM_COUNTER_COUNT,
};
enum ac_spm_component_id {
AC_SPM_COMPONENT_INST_CACHE_REQUEST_COUNT,
AC_SPM_COMPONENT_INST_CACHE_HIT_COUNT,
AC_SPM_COMPONENT_INST_CACHE_MISS_COUNT,
AC_SPM_COMPONENT_SCALAR_CACHE_REQUEST_COUNT,
AC_SPM_COMPONENT_SCALAR_CACHE_HIT_COUNT,
AC_SPM_COMPONENT_SCALAR_CACHE_MISS_COUNT,
AC_SPM_COMPONENT_L0_CACHE_REQUEST_COUNT,
AC_SPM_COMPONENT_L0_CACHE_HIT_COUNT,
AC_SPM_COMPONENT_L0_CACHE_MISS_COUNT,
AC_SPM_COMPONENT_L1_CACHE_REQUEST_COUNT, /* < GFX12 */
AC_SPM_COMPONENT_L1_CACHE_HIT_COUNT, /* < GFX12 */
AC_SPM_COMPONENT_L1_CACHE_MISS_COUNT, /* < GFX12 */
AC_SPM_COMPONENT_L2_CACHE_REQUEST_COUNT,
AC_SPM_COMPONENT_L2_CACHE_HIT_COUNT,
AC_SPM_COMPONENT_L2_CACHE_MISS_COUNT,
AC_SPM_COMPONENT_GPU_BUSY_CYCLES,
AC_SPM_COMPONENT_CS_LDS_BANK_CONFLICT_CYCLES,
AC_SPM_COMPONENT_MEM_UNIT_BUSY_CYCLES,
AC_SPM_COMPONENT_MEM_UNIT_STALLED_CYCLES,
AC_SPM_COMPONENT_WRITE_UNIT_STALLED_CYCLES, /* GFX12+ */
AC_SPM_COMPONENT_COUNT,
};
enum ac_spm_usage_type {
AC_SPM_USAGE_PERCENTAGE = 1,
AC_SPM_USAGE_CYCLES = 2,
AC_SPM_USAGE_BYTES = 4,
AC_SPM_USAGE_ITEMS = 5,
};
#define AC_SPM_MAX_COMPONENTS_PER_COUNTER 3
#define AC_SPM_MAX_COUNTERS_PER_GROUP 5
struct ac_spm_derived_component_descr {
enum ac_spm_component_id id;
enum ac_spm_counter_id counter_id;
const char *name;
enum ac_spm_usage_type usage;
};
struct ac_spm_derived_counter_descr {
enum ac_spm_counter_id id;
enum ac_spm_group_id group_id;
const char *name;
const char *desc;
enum ac_spm_usage_type usage;
uint32_t num_components;
struct ac_spm_derived_component_descr *components[AC_SPM_MAX_COMPONENTS_PER_COUNTER];
};
struct ac_spm_derived_group_descr {
enum ac_spm_group_id id;
const char *name;
uint32_t num_counters;
struct ac_spm_derived_counter_descr *counters[AC_SPM_MAX_COUNTERS_PER_GROUP];
};
struct ac_spm_derived_group {
const struct ac_spm_derived_group_descr *descr;
uint32_t counter_ids[AC_SPM_MAX_COUNTERS_PER_GROUP];
};
struct ac_spm_derived_counter {
const struct ac_spm_derived_counter_descr *descr;
uint32_t component_ids[AC_SPM_MAX_COMPONENTS_PER_COUNTER];
struct util_dynarray values;
};
struct ac_spm_derived_component {
const struct ac_spm_derived_component_descr *descr;
struct util_dynarray values;
};
struct ac_spm_derived_trace {
uint32_t num_timestamps;
uint64_t *timestamps;
uint32_t num_groups;
struct ac_spm_derived_group groups[AC_SPM_GROUP_COUNT];
uint32_t num_counters;
struct ac_spm_derived_counter counters[AC_SPM_COUNTER_COUNT];
uint32_t num_components;
struct ac_spm_derived_component components[AC_SPM_COMPONENT_COUNT];
uint32_t sample_interval;
};
bool ac_init_spm(const struct radeon_info *info,
const struct ac_perfcounters *pc,
struct ac_spm *spm);
void ac_destroy_spm(struct ac_spm *spm);
bool ac_spm_get_trace(const struct ac_spm *spm, struct ac_spm_trace *trace);
struct ac_spm_derived_trace *
ac_spm_get_derived_trace(const struct radeon_info *info,
const struct ac_spm_trace *spm_trace);
void
ac_spm_destroy_derived_trace(struct ac_spm_derived_trace *spm_derived_trace);
void
ac_emit_spm_setup(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
enum amd_ip_type ip_type, const struct ac_spm *spm,
uint64_t va);
void
ac_emit_spm_start(struct ac_cmdbuf *cs, enum amd_ip_type ip_type,
const struct radeon_info *info);
void
ac_emit_spm_stop(struct ac_cmdbuf *cs, enum amd_ip_type ip_type,
const struct radeon_info *info);
void
ac_emit_spm_reset(struct ac_cmdbuf *cs);
#endif