blob: 3d064bc6d2e0d8f86d0d2256fe910ced58ea10f3 [file] [log] [blame]
// Copyright 2017 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#pragma once
#include <zircon/compiler.h>
#include <stdint.h>
#ifdef __Fuchsia__
#include <zircon/device/ioctl.h>
#include <zircon/device/ioctl-wrapper.h>
#include <zircon/types.h>
#include <stddef.h>
#endif
__BEGIN_CDECLS
#if !defined(__x86_64__)
#error "unsupported architecture"
#endif
// MSRs
#define IPM_MSR_BITS(len, shift) (((1ULL << (len)) - 1) << (shift))
// Bits in the IA32_PERFEVTSELx MSRs.
#define IA32_PERFEVTSEL_EVENT_SELECT_SHIFT (0)
#define IA32_PERFEVTSEL_EVENT_SELECT_LEN (8)
#define IA32_PERFEVTSEL_EVENT_SELECT_MASK \
IPM_MSR_BITS(IA32_PERFEVTSEL_EVENT_SELECT_LEN, IA32_PERFEVTSEL_EVENT_SELECT_SHIFT)
#define IA32_PERFEVTSEL_UMASK_SHIFT (8)
#define IA32_PERFEVTSEL_UMASK_LEN (8)
#define IA32_PERFEVTSEL_UMASK_MASK \
IPM_MSR_BITS(IA32_PERFEVTSEL_UMASK_LEN, IA32_PERFEVTSEL_UMASK_SHIFT)
#define IA32_PERFEVTSEL_USR_SHIFT (16)
#define IA32_PERFEVTSEL_USR_LEN (1)
#define IA32_PERFEVTSEL_USR_MASK \
IPM_MSR_BITS(IA32_PERFEVTSEL_USR_LEN, IA32_PERFEVTSEL_USR_SHIFT)
#define IA32_PERFEVTSEL_OS_SHIFT (17)
#define IA32_PERFEVTSEL_OS_LEN (1)
#define IA32_PERFEVTSEL_OS_MASK \
IPM_MSR_BITS(IA32_PERFEVTSEL_OS_LEN, IA32_PERFEVTSEL_OS_SHIFT)
#define IA32_PERFEVTSEL_E_SHIFT (18)
#define IA32_PERFEVTSEL_E_LEN (1)
#define IA32_PERFEVTSEL_E_MASK \
IPM_MSR_BITS(IA32_PERFEVTSEL_E_LEN, IA32_PERFEVTSEL_E_SHIFT)
#define IA32_PERFEVTSEL_PC_SHIFT (19)
#define IA32_PERFEVTSEL_PC_LEN (1)
#define IA32_PERFEVTSEL_PC_MASK \
IPM_MSR_BITS(IA32_PERFEVTSEL_PC_LEN, IA32_PERFEVTSEL_PC_SHIFT)
#define IA32_PERFEVTSEL_INT_SHIFT (20)
#define IA32_PERFEVTSEL_INT_LEN (1)
#define IA32_PERFEVTSEL_INT_MASK \
IPM_MSR_BITS(IA32_PERFEVTSEL_INT_LEN, IA32_PERFEVTSEL_INT_SHIFT)
#define IA32_PERFEVTSEL_ANY_SHIFT (21)
#define IA32_PERFEVTSEL_ANY_LEN (1)
#define IA32_PERFEVTSEL_ANY_MASK \
IPM_MSR_BITS(IA32_PERFEVTSEL_ANY_LEN, IA32_PERFEVTSEL_ANY_SHIFT)
#define IA32_PERFEVTSEL_EN_SHIFT (22)
#define IA32_PERFEVTSEL_EN_LEN (1)
#define IA32_PERFEVTSEL_EN_MASK \
IPM_MSR_BITS(IA32_PERFEVTSEL_EN_LEN, IA32_PERFEVTSEL_EN_SHIFT)
#define IA32_PERFEVTSEL_INV_SHIFT (23)
#define IA32_PERFEVTSEL_INV_LEN (1)
#define IA32_PERFEVTSEL_INV_MASK \
IPM_MSR_BITS(IA32_PERFEVTSEL_INV_LEN, IA32_PERFEVTSEL_INV_SHIFT)
#define IA32_PERFEVTSEL_CMASK_SHIFT (24)
#define IA32_PERFEVTSEL_CMASK_LEN (8)
#define IA32_PERFEVTSEL_CMASK_MASK \
IPM_MSR_BITS(IA32_PERFEVTSEL_CMASK_LEN, IA32_PERFEVTSEL_CMASK_SHIFT)
// Bits in the IA32_FIXED_CTR_CTRL MSR.
#define IA32_FIXED_CTR_CTRL_EN_SHIFT(ctr) (0 + (ctr) * 4)
#define IA32_FIXED_CTR_CTRL_EN_LEN (2)
#define IA32_FIXED_CTR_CTRL_EN_MASK(ctr) \
IPM_MSR_BITS(IA32_FIXED_CTR_CTRL_EN_LEN, IA32_FIXED_CTR_CTRL_EN_SHIFT(ctr))
#define IA32_FIXED_CTR_CTRL_ANY_SHIFT(ctr) (2 + (ctr) * 4)
#define IA32_FIXED_CTR_CTRL_ANY_LEN (1)
#define IA32_FIXED_CTR_CTRL_ANY_MASK(ctr) \
IPM_MSR_BITS(IA32_FIXED_CTR_CTRL_ANY_LEN, IA32_FIXED_CTR_CTRL_ANY_SHIFT(ctr))
#define IA32_FIXED_CTR_CTRL_PMI_SHIFT(ctr) (3 + (ctr) * 4)
#define IA32_FIXED_CTR_CTRL_PMI_LEN (1)
#define IA32_FIXED_CTR_CTRL_PMI_MASK(ctr) \
IPM_MSR_BITS(IA32_FIXED_CTR_CTRL_PMI_LEN, IA32_FIXED_CTR_CTRL_PMI_SHIFT(ctr))
// The IA32_PERF_GLOBAL_CTRL MSR.
#define IA32_PERF_GLOBAL_CTRL_PMC_EN_SHIFT(ctr) (ctr)
#define IA32_PERF_GLOBAL_CTRL_PMC_EN_LEN (1)
#define IA32_PERF_GLOBAL_CTRL_PMC_EN_MASK(ctr) \
IPM_MSR_BITS(IA32_PERF_GLOBAL_CTRL_PMC_EN_LEN, IA32_PERF_GLOBAL_CTRL_PMC_EN_SHIFT(ctr))
#define IA32_PERF_GLOBAL_CTRL_FIXED_EN_SHIFT(ctr) (32 + (ctr))
#define IA32_PERF_GLOBAL_CTRL_FIXED_EN_LEN (1)
#define IA32_PERF_GLOBAL_CTRL_FIXED_EN_MASK(ctr) \
IPM_MSR_BITS(IA32_PERF_GLOBAL_CTRL_FIXED_EN_LEN, IA32_PERF_GLOBAL_CTRL_FIXED_EN_SHIFT(ctr))
// Bits in the IA32_PERF_GLOBAL_STATUS MSR.
// Note: Use these values for IA32_PERF_GLOBAL_STATUS_RESET and
// IA32_PERF_GLOBAL_STATUS_SET too.
#define IA32_PERF_GLOBAL_STATUS_PMC_OVF_SHIFT(ctr) (ctr)
#define IA32_PERF_GLOBAL_STATUS_PMC_OVF_LEN (1)
#define IA32_PERF_GLOBAL_STATUS_PMC_OVF_MASK(ctr) \
IPM_MSR_BITS(IA32_PERF_GLOBAL_STATUS_PMC_OVF_LEN, IA32_PERF_GLOBAL_STATUS_PMC_OVF_SHIFT(ctr))
#define IA32_PERF_GLOBAL_STATUS_FIXED_OVF_SHIFT(ctr) (32 + (ctr))
#define IA32_PERF_GLOBAL_STATUS_FIXED_OVF_LEN (1)
#define IA32_PERF_GLOBAL_STATUS_FIXED_OVF_MASK(ctr) \
IPM_MSR_BITS(IA32_PERF_GLOBAL_STATUS_FIXED_OVF_LEN, IA32_PERF_GLOBAL_STATUS_FIXED_OVF_SHIFT(ctr))
#define IA32_PERF_GLOBAL_STATUS_TRACE_TOPA_PMI_SHIFT (55)
#define IA32_PERF_GLOBAL_STATUS_TRACE_TOPA_PMI_LEN (1)
#define IA32_PERF_GLOBAL_STATUS_TRACE_TOPA_PMI_MASK \
IPM_MSR_BITS(IA32_PERF_GLOBAL_STATUS_TRACE_TOPA_PMI_LEN, IA32_PERF_GLOBAL_STATUS_TRACE_TOPA_PMI_SHIFT)
#define IA32_PERF_GLOBAL_STATUS_LBR_FRZ_SHIFT (58)
#define IA32_PERF_GLOBAL_STATUS_LBR_FRZ_LEN (1)
#define IA32_PERF_GLOBAL_STATUS_LBR_FRZ_MASK \
IPM_MSR_BITS(IA32_PERF_GLOBAL_STATUS_LBR_FRZ_LEN, IA32_PERF_GLOBAL_STATUS_LBR_FRZ_SHIFT)
#define IA32_PERF_GLOBAL_STATUS_CTR_FRZ_SHIFT (59)
#define IA32_PERF_GLOBAL_STATUS_CTR_FRZ_LEN (1)
#define IA32_PERF_GLOBAL_STATUS_CTR_FRZ_MASK \
IPM_MSR_BITS(IA32_PERF_GLOBAL_STATUS_CTR_FRZ_LEN, IA32_PERF_GLOBAL_STATUS_CTR_FRZ_SHIFT)
#define IA32_PERF_GLOBAL_STATUS_ASCI_SHIFT (60)
#define IA32_PERF_GLOBAL_STATUS_ASCI_LEN (1)
#define IA32_PERF_GLOBAL_STATUS_ASCI_MASK \
IPM_MSR_BITS(IA32_PERF_GLOBAL_STATUS_ASCI_LEN, IA32_PERF_GLOBAL_STATUS_ASCI_SHIFT)
#define IA32_PERF_GLOBAL_STATUS_UNCORE_OVF_SHIFT (61)
#define IA32_PERF_GLOBAL_STATUS_UNCORE_OVF_LEN (1)
#define IA32_PERF_GLOBAL_STATUS_UNCORE_OVF_MASK \
IPM_MSR_BITS(IA32_PERF_GLOBAL_STATUS_UNCORE_OVF_LEN, IA32_PERF_GLOBAL_STATUS_UNCORE_OVF_SHIFT)
#define IA32_PERF_GLOBAL_STATUS_DS_BUFFER_OVF_SHIFT (62)
#define IA32_PERF_GLOBAL_STATUS_DS_BUFFER_OVF_LEN (1)
#define IA32_PERF_GLOBAL_STATUS_DS_BUFFER_OVF_MASK \
IPM_MSR_BITS(IA32_PERF_GLOBAL_STATUS_DS_BUFFER_OVF_LEN, IA32_PERF_GLOBAL_STATUS_DS_BUFFER_OVF_SHIFT)
#define IA32_PERF_GLOBAL_STATUS_COND_CHGD_SHIFT (63)
#define IA32_PERF_GLOBAL_STATUS_COND_CHGD_LEN (1)
#define IA32_PERF_GLOBAL_STATUS_COND_CHGD_MASK \
IPM_MSR_BITS(IA32_PERF_GLOBAL_STATUS_COND_CHGD_LEN, IA32_PERF_GLOBAL_STATUS_COND_CHGD_SHIFT)
// Bits in the IA32_PERF_GLOBAL_INUSE MSR.
#define IA32_PERF_GLOBAL_STATUS_INUSE_PERFEVTSEL_SHIFT(ctr) (ctr)
#define IA32_PERF_GLOBAL_STATUS_INUSE_PERFEVTSEL_LEN (1)
#define IA32_PERF_GLOBAL_STATUS_INUSE_PERFEVTSEL_MASK(ctr) \
IPM_MSR_BITS(IA32_PERF_GLOBAL_STATUS_INUSE_PERFEVTSEL_LEN, IA32_PERF_GLOBAL_STATUS_INUSE_PERFEVTSEL_SHIFT(ctr))
#define IA32_PERF_GLOBAL_STATUS_INUSE_FIXED_CTR_SHIFT(ctr) (32 + (ctr))
#define IA32_PERF_GLOBAL_STATUS_INUSE_FIXED_CTR_LEN (1)
#define IA32_PERF_GLOBAL_STATUS_INUSE_FIXED_CTR_MASK(ctr) \
IPM_MSR_BITS(IA32_PERF_GLOBAL_STATUS_INUSE_FIXED_CTR_LEN, IA32_PERF_GLOBAL_STATUS_INUSE_FIXED_CTR_SHIFT(ctr))
#define IA32_PERF_GLOBAL_STATUS_INUSE_PMI_SHIFT (63)
#define IA32_PERF_GLOBAL_STATUS_INUSE_PMI_LEN (1)
#define IA32_PERF_GLOBAL_STATUS_INUSE_PMI_MASK \
IPM_MSR_BITS(IA32_PERF_GLOBAL_STATUS_INUSE_PMI_LEN, IA32_PERF_GLOBAL_STATUS_INUSE_PMI_SHIFT)
// Bits in the IA32_PERF_GLOBAL_OVF_CTRL MSR.
#define IA32_PERF_GLOBAL_OVF_CTRL_PMC_CLR_OVF_SHIFT(ctr) (0)
#define IA32_PERF_GLOBAL_OVF_CTRL_PMC_CLR_OVF_LEN (1)
#define IA32_PERF_GLOBAL_OVF_CTRL_PMC_CLR_OVF_MASK(ctr) \
IPM_MSR_BITS(IA32_PERF_GLOBAL_OVF_CTRL_PMC_CLR_OVF_LEN, IA32_PERF_GLOBAL_OVF_CTRL_PMC_CLR_OVF_SHIFT(ctr))
#define IA32_PERF_GLOBAL_OVF_CTRL_FIXED_CTR_CLR_OVF_SHIFT(ctr) (32 + (ctr))
#define IA32_PERF_GLOBAL_OVF_CTRL_FIXED_CTR_CLR_OVF_LEN (1)
#define IA32_PERF_GLOBAL_OVF_CTRL_FIXED_CTR_CLR_OVF_MASK(ctr) \
IPM_MSR_BITS(IA32_PERF_GLOBAL_OVF_CTRL_FIXED_CTR_CLR_OVF_LEN, IA32_PERF_GLOBAL_OVF_CTRL_FIXED_CTR_CLR_OVF_SHIFT(ctr))
#define IA32_PERF_GLOBAL_OVF_CTRL_UNCORE_CLR_OVF_SHIFT (61)
#define IA32_PERF_GLOBAL_OVF_CTRL_UNCORE_CLR_OVF_LEN (1)
#define IA32_PERF_GLOBAL_OVF_CTRL_UNCORE_CLR_OVF_MASK \
IPM_MSR_BITS(IA32_PERF_GLOBAL_OVF_CTRL_UNCORE_CLR_OVF_LEN, IA32_PERF_GLOBAL_OVF_CTRL_UNCORE_CLR_OVF_SHIFT)
#define IA32_PERF_GLOBAL_OVF_CTRL_DS_BUFFER_CLR_OVF_SHIFT (62)
#define IA32_PERF_GLOBAL_OVF_CTRL_DS_BUFFER_CLR_OVF_LEN (1)
#define IA32_PERF_GLOBAL_OVF_CTRL_DS_BUFFER_CLR_OVF_MASK \
IPM_MSR_BITS(IA32_PERF_GLOBAL_OVF_CTRL_DS_BUFFER_CLR_OVF_LEN, IA32_PERF_GLOBAL_OVF_CTRL_DS_BUFFER_CLR_OVF_SHIFT)
#define IA32_PERF_GLOBAL_OVF_CTRL_CLR_COND_CHGD_SHIFT (63)
#define IA32_PERF_GLOBAL_OVF_CTRL_CLR_COND_CHGD_LEN (1)
#define IA32_PERF_GLOBAL_OVF_CTRL_CLR_COND_CHGD_MASK \
IPM_MSR_BITS(IA32_PERF_GLOBAL_OVF_CTRL_CLR_COND_CHGD_LEN, IA32_PERF_GLOBAL_OVF_CTRL_CLR_COND_CHGD_SHIFT)
// Bits in the IA32_DEBUGCTL MSR.
#define IA32_DEBUGCTL_LBR_SHIFT (0)
#define IA32_DEBUGCTL_LBR_LEN (1)
#define IA32_DEBUGCTL_LBR_MASK \
IPM_MSR_BITS(IA32_DEBUGCTL_LBR_LEN, IA32_DEBUGCTL_LBR_SHIFT)
#define IA32_DEBUGCTL_BTF_SHIFT (1)
#define IA32_DEBUGCTL_BTF_LEN (1)
#define IA32_DEBUGCTL_BTF_MASK \
IPM_MSR_BITS(IA32_DEBUGCTL_BTF_LEN, IA32_DEBUGCTL_BTF_SHIFT)
#define IA32_DEBUGCTL_TR_SHIFT (6)
#define IA32_DEBUGCTL_TR_LEN (1)
#define IA32_DEBUGCTL_TR_MASK \
IPM_MSR_BITS(IA32_DEBUGCTL_TR_LEN, IA32_DEBUGCTL_TR_SHIFT)
#define IA32_DEBUGCTL_BTS_SHIFT (7)
#define IA32_DEBUGCTL_BTS_LEN (1)
#define IA32_DEBUGCTL_BTS_MASK \
IPM_MSR_BITS(IA32_DEBUGCTL_BTS_LEN, IA32_DEBUGCTL_BTS_SHIFT)
#define IA32_DEBUGCTL_BTINT_SHIFT (8)
#define IA32_DEBUGCTL_BTINT_LEN (1)
#define IA32_DEBUGCTL_BTINT_MASK \
IPM_MSR_BITS(IA32_DEBUGCTL_BTINT_LEN, IA32_DEBUGCTL_BTINT_SHIFT)
#define IA32_DEBUGCTL_BTS_OFF_OS_SHIFT (9)
#define IA32_DEBUGCTL_BTS_OFF_OS_LEN (1)
#define IA32_DEBUGCTL_BTS_OFF_OS_MASK \
IPM_MSR_BITS(IA32_DEBUGCTL_BTS_OFF_OS_LEN, IA32_DEBUGCTL_BTS_OFF_OS_SHIFT)
#define IA32_DEBUGCTL_BTS_OFF_USR_SHIFT (10)
#define IA32_DEBUGCTL_BTS_OFF_USR_LEN (1)
#define IA32_DEBUGCTL_BTS_OFF_USR_MASK \
IPM_MSR_BITS(IA32_DEBUGCTL_BTS_OFF_USR_LEN, IA32_DEBUGCTL_BTS_OFF_USR_SHIFT)
#define IA32_DEBUGCTL_FREEZE_LBRS_ON_PMI_SHIFT (11)
#define IA32_DEBUGCTL_FREEZE_LBRS_ON_PMI_LEN (1)
#define IA32_DEBUGCTL_FREEZE_LBRS_ON_PMI_MASK \
IPM_MSR_BITS(IA32_DEBUGCTL_FREEZE_LBRS_ON_PMI_LEN, IA32_DEBUGCTL_FREEZE_LBRS_ON_PMI_SHIFT)
#define IA32_DEBUGCTL_FREEZE_PERFMON_ON_PMI_SHIFT (12)
#define IA32_DEBUGCTL_FREEZE_PERFMON_ON_PMI_LEN (1)
#define IA32_DEBUGCTL_FREEZE_PERFMON_ON_PMI_MASK \
IPM_MSR_BITS(IA32_DEBUGCTL_FREEZE_PERFMON_ON_PMI_LEN, IA32_DEBUGCTL_FREEZE_PERFMON_ON_PMI_SHIFT)
#define IA32_DEBUGCTL_FREEZE_WHILE_SMM_EN_SHIFT (14)
#define IA32_DEBUGCTL_FREEZE_WHILE_SMM_EN_LEN (1)
#define IA32_DEBUGCTL_FREEZE_WHILE_SMM_EN_MASK \
IPM_MSR_BITS(IA32_DEBUGCTL_FREEZE_WHILE_SMM_EN_LEN, IA32_DEBUGCTL_FREEZE_WHILE_SMM_EN_SHIFT)
#define IA32_DEBUGCTL_RTM_SHIFT (15)
#define IA32_DEBUGCTL_RTM_LEN (1)
#define IA32_DEBUGCTL_RTM_MASK \
IPM_MSR_BITS(IA32_DEBUGCTL_RTM_LEN, IA32_DEBUGCTL_RTM_SHIFT)
// maximum number of programmable counters
#define IPM_MAX_PROGRAMMABLE_COUNTERS 8
// maximum number of fixed-use counters
#define IPM_MAX_FIXED_COUNTERS 3
// API version number (useful when doing incompatible upgrades)
#define IPM_API_VERSION 1
// Buffer format version
#define IPM_BUFFER_COUNTING_MODE_VERSION 0
#define IPM_BUFFER_SAMPLING_MODE_VERSION 0
// The HW PERF pseudo register sets.
// These are accessed via mtrace for now.
// Current state of data collection.
typedef struct {
// S/W API version (some future proofing, always zero for now).
uint32_t api_version;
// The H/W Performance Monitor version.
uint32_t pm_version;
// The number of fixed counters.
uint32_t num_fixed_counters;
// The number of programmable counters.
uint32_t num_programmable_counters;
// The PERF_CAPABILITIES MSR.
uint64_t perf_capabilities;
// True if MTRACE_IPM_ALLOC done.
bool alloced;
// True if MTRACE_IPM_START done.
bool started;
} zx_x86_ipm_state_t;
// This is for passing buffer specs to the kernel (for setting up the
// debug store MSRs, or for directly writing in "counting mode").
// TODO(dje): First pass. Pass vmo? Need to rework for tracing jobs/processes
// anyway.
typedef struct {
zx_handle_t vmo;
uint64_t start_offset;
uint64_t end_offset;
} zx_x86_ipm_buffer_t;
typedef struct {
// IA32_PERF_GLOBAL_CTRL
uint64_t global_ctrl;
// IA32_PERFEVTSEL_*
uint64_t programmable_events[IPM_MAX_PROGRAMMABLE_COUNTERS];
// IA32_FIXED_CTR_CTRL
uint64_t fixed_counter_ctrl;
// IA32_DEBUGCTL
uint64_t debug_ctrl;
// Sampling frequency. If zero then do simple counting (collect a tally
// of all counts and report at the end).
// When a counter gets this many hits an interrupt is generated.
uint32_t sample_freq;
// TODO(dje): Add initial counter values here instead of always resetting
// to zero?
} zx_x86_ipm_perf_config_t;
// Header for each data buffer.
typedef struct {
// Format version number (some future proofing, always zero for now).
uint32_t version;
uint32_t padding;
uint64_t ticks_per_second;
uint64_t capture_end;
} zx_x86_ipm_buffer_info_t;
// This is the format of the data in the trace buffer for "counting mode".
typedef struct {
// IA32_PERF_GLOBAL_STATUS
uint64_t status;
zx_time_t time;
// IA32_PMC_*
uint64_t programmable_counters[IPM_MAX_PROGRAMMABLE_COUNTERS];
// IA32_FIXED_CTR*
uint64_t fixed_counters[IPM_MAX_FIXED_COUNTERS];
} zx_x86_ipm_counters_t;
// Sampling mode data in the buffer.
// This does not include the counter value (e.g., the sample frequency)
// in order to keep the size small: The user should know what value was
// configured for each counter (currently they all get the same value but
// that could change).
typedef struct {
zx_time_t time;
uint32_t counter;
// OR'd to the value in |counter| to indicate a fixed counter.
#define IPM_COUNTER_NUMBER_FIXED 0x100
uint32_t padding_reserved;
uint64_t pc;
} zx_x86_ipm_sample_record_t;
///////////////////////////////////////////////////////////////////////////////
// Flags for the counters in intel-pm.inc.
// See for example Intel Volume 3, Table 19-3.
// "Non-Architectural Performance Events of the Processor Core Supported by
// Skylake Microarchitecture and Kaby Lake Microarchitecture"
// Flags for non-architectural counters
// CounterMask values
#define IPM_REG_FLAG_CMSK_MASK 0xff
#define IPM_REG_FLAG_CMSK1 1
#define IPM_REG_FLAG_CMSK2 2
#define IPM_REG_FLAG_CMSK4 4
#define IPM_REG_FLAG_CMSK5 5
#define IPM_REG_FLAG_CMSK6 6
#define IPM_REG_FLAG_CMSK8 8
#define IPM_REG_FLAG_CMSK10 10
#define IPM_REG_FLAG_CMSK12 12
#define IPM_REG_FLAG_CMSK16 16
#define IPM_REG_FLAG_CMSK20 20
// AnyThread = 1 required
#define IPM_REG_FLAG_ANYT 0x100
// Invert = 1 required
#define IPM_REG_FLAG_INV 0x200
// Edge = 1 required
#define IPM_REG_FLAG_EDG 0x400
// Also supports PEBS and DataLA
#define IPM_REG_FLAG_PSDLA 0x800
// Also supports PEBS
#define IPM_REG_FLAG_PS 0x1000
// Extra flags
// Architectural event
#define IPM_REG_FLAG_ARCH 0x10000
// Fixed counters
#define IPM_REG_FLAG_FIXED0 0x100000
#define IPM_REG_FLAG_FIXED1 0x200000
#define IPM_REG_FLAG_FIXED2 0x400000
///////////////////////////////////////////////////////////////////////////////
// To simplify use by the trace client, which uses categories to distinguish
// what to trace, we provide a set of predefined trace categories.
// Note that there are only up to 11 counters that can be active at once,
// so we cannot provide an "all" category.
// TODO(dje): Provide one or more user-defined categories and allow user to
// specify what gets collected (say via scripting language used by
// cpuperf_provider).
// Only one of the programmable categories can be selected at a time.
// Anything more complex can't use ioctl_ipm_stage_simple_perf_config.
// The value is the "id" specified in intel-pm-categories.inc.
#define IPM_CATEGORY_PROGRAMMABLE_MASK 0xff
#define IPM_CATEGORY_PROGRAMMABLE_MAX (IPM_CATEGORY_PROGRAMMABLE_MASK)
// The fixed counters are separate and fixed-purpose, any combination may
// be used.
#define IPM_CATEGORY_FIXED_MASK 0xf000
#define IPM_CATEGORY_FIXED_CTR0 0x1000
#define IPM_CATEGORY_FIXED_CTR1 0x2000
#define IPM_CATEGORY_FIXED_CTR2 0x4000
// One or both of these must be added.
// If both are elided then no data collection is done.
#define IPM_CATEGORY_OS 0x10000
#define IPM_CATEGORY_USR 0x20000
// Only one of the following may be specified.
// A better way would be to provide numeric arguments to categories, but
// this is for the "simple mode" of driving the device, so we KISS for now.
#define IPM_CATEGORY_MODE_MASK 0xff00000
#define IPM_CATEGORY_TALLY 0x0000000 // cpu:tally
#define IPM_CATEGORY_SAMPLE_1000 0x0100000 // cpu:sample-1000
#define IPM_CATEGORY_SAMPLE_5000 0x0200000 // cpu:sample-5000
#define IPM_CATEGORY_SAMPLE_10000 0x0300000 // cpu:sample-10000
#define IPM_CATEGORY_SAMPLE_50000 0x0400000 // cpu:sample-50000
#define IPM_CATEGORY_SAMPLE_100000 0x0500000 // cpu:sample-100000
#define IPM_CATEGORY_SAMPLE_500000 0x0600000 // cpu:sample-500000
#define IPM_CATEGORY_SAMPLE_1000000 0x0700000 // cpu:sample-1000000
// TODO(dje): Provide values for old versions of macros.
#define IPM_CATEGORY_COUNT IPM_CATEGORY_TALLY
#define IPM_CATEGORY_FIXED IPM_CATEGORY_FIXED_MASK
#define EVENT_INSTRUCTIONS_RETIRED EVENT_ARCH_INSTRUCTIONS_RETIRED
#define EVENT_UNHALTED_CORE_CYCLES EVENT_ARCH_UNHALTED_CORE_CYCLES
#define EVENT_UNHALTED_REFERENCE_CYCLES EVENT_ARCH_UNHALTED_REFERENCE_CYCLES
// Only one of the programmable categories can be chosen.
// See intel-pm-categories.inc for how this translates to actual registers.
typedef enum {
#define DEF_CATEGORY(symbol, id, name, counters...) symbol,
#include <zircon/device/cpu-trace/intel-pm-categories.inc>
IPM_CATEGORY_MAX
} ipm_perf_event_category_t;
///////////////////////////////////////////////////////////////////////////////
#ifdef __Fuchsia__
// ioctls
// Fetch the state of data collection.
// Must be called prior to STAGE_CPU_DATA and after any intermediate FREE.
// Output: zx_x86_ipm_state_t
// TODO(dje): Not entirely happy with the use of the names "state" and "config"
// here. Swapping them feels a bit better.
#define IOCTL_IPM_GET_STATE \
IOCTL(IOCTL_KIND_DEFAULT, IOCTL_FAMILY_IPM, 0)
IOCTL_WRAPPER_OUT(ioctl_ipm_get_state, IOCTL_IPM_GET_STATE,
zx_x86_ipm_state_t);
// The configuration for a data collection run.
// This is generally the first call to allocate resources for a trace,
// "trace" is used generically here: == "data collection run".
// TODO(dje): At the moment we only support one active trace. Will relax in
// time once things are working (e.g., so different data collections can be
// going on at the same time for, say, different processes or jobs).
typedef struct {
uint32_t num_buffers; // must be #cpus for now
uint32_t buffer_size;
// TODO(dje): Later provide ability to request other resources needed
// for the trace. For now, give client access to full data collection
// capabilities provided by h/w.
// Also provide ability to specify "trace entire system" vs "trace this
// process/job". Maybe even just a particular cpu - dunno.
} ioctl_ipm_trace_config_t;
// Create a trace, allocating the needed trace buffers and other resources.
// Think open(O_CREAT|...) of a file.
// For "counting mode" this is just a page per cpu to hold resulting
// counter values. TODO(dje): constrain buffer_size.
// For "sampling mode" this is #cpus buffers each of size buffer_size.
// "other resources" is basically a catch-all for other things that will
// be needed.
// TODO(dje): Return a descriptor for the trace so that different clients
// can make different requests and potentially have them all be active
// (e.g., different traces for different processes/jobs, assuming various
// factors like them being sufficiently compatible for whatever definition
// of "compatible" ultimately arises).
// Input: ioctl_ipm_trace_config_t
#define IOCTL_IPM_ALLOC_TRACE \
IOCTL(IOCTL_KIND_DEFAULT, IOCTL_FAMILY_IPM, 1)
IOCTL_WRAPPER_IN(ioctl_ipm_alloc_trace, IOCTL_IPM_ALLOC_TRACE,
ioctl_ipm_trace_config_t);
// Free all trace buffers and any other resources allocated for the trace.
// Should be the last thing called (e.g., think close() of an fd).
// TODO(dje): See IOCTL_IPM_ALLOC_TRACE.
#define IOCTL_IPM_FREE_TRACE \
IOCTL(IOCTL_KIND_DEFAULT, IOCTL_FAMILY_IPM, 2)
IOCTL_WRAPPER(ioctl_ipm_free_trace, IOCTL_IPM_FREE_TRACE);
// Return config data for a trace buffer.
// Output: ioctl_ipm_trace_config_t
#define IOCTL_IPM_GET_TRACE_CONFIG \
IOCTL(IOCTL_KIND_DEFAULT, IOCTL_FAMILY_IPM, 3)
IOCTL_WRAPPER_OUT(ioctl_ipm_get_trace_config, IOCTL_IPM_GET_TRACE_CONFIG,
ioctl_ipm_trace_config_t);
// Full-featured perf-data trace configuration.
typedef struct {
zx_x86_ipm_perf_config_t config;
} ioctl_ipm_perf_config_t;
// Stage performance monitor configuration for a cpu.
// Must be called with data collection off and after INIT.
// Note: This doesn't actually configure the counters, this just stages
// the values for subsequent use by START.
// Input: ioctl_ipm_perf_config_t
// TODO(dje): Provide a more abstract way to configure the hardware.
#define IOCTL_IPM_STAGE_PERF_CONFIG \
IOCTL(IOCTL_KIND_DEFAULT, IOCTL_FAMILY_IPM, 4)
IOCTL_WRAPPER_IN(ioctl_ipm_stage_perf_config, IOCTL_IPM_STAGE_PERF_CONFIG,
ioctl_ipm_perf_config_t);
// A simple way for clients to request particular counters without having to
// deal with the details.
typedef struct {
// Sampling frequency. If zero then do simple counting (tally).
// When a counter gets this many hits an interrupt is generated.
uint32_t sample_freq;
// A mask of IPM_CATEGORY_* values.
uint32_t categories;
} ioctl_ipm_simple_perf_config_t;
// Specify what to trace using "categories".
// Must be called with data collection off and after ALLOC.
// Note: This doesn't actually configure the counters, this just stages
// the values for subsequent use by START.
// Input: ioctl_ipm_simple_config_t
#define IOCTL_IPM_STAGE_SIMPLE_PERF_CONFIG \
IOCTL(IOCTL_KIND_DEFAULT, IOCTL_FAMILY_IPM, 5)
IOCTL_WRAPPER_IN(ioctl_ipm_stage_simple_perf_config,
IOCTL_IPM_STAGE_SIMPLE_PERF_CONFIG,
ioctl_ipm_simple_perf_config_t);
// Fetch performance monitor configuration for a cpu.
// Must be called with data collection off and after INIT.
// Output: ioctl_ipm_perf_config_t
#define IOCTL_IPM_GET_PERF_CONFIG \
IOCTL(IOCTL_KIND_DEFAULT, IOCTL_FAMILY_IPM, 6)
IOCTL_WRAPPER_OUT(ioctl_ipm_get_perf_config, IOCTL_IPM_GET_PERF_CONFIG,
ioctl_ipm_perf_config_t);
// This contains the run-time produced data about the buffer.
// Not the trace data itself, just info about the data.
typedef struct {
// Offset in the buffer where tracing stopped.
uint64_t capture_end;
} ioctl_ipm_buffer_info_t;
// Get trace data associated with the buffer.
// Input: trace buffer descriptor (0, 1, 2, ..., |num_buffers|-1)
// Output: ioctl_ipm_buffer_info_t
#define IOCTL_IPM_GET_BUFFER_INFO \
IOCTL(IOCTL_KIND_DEFAULT, IOCTL_FAMILY_IPM, 7)
IOCTL_WRAPPER_INOUT(ioctl_ipm_get_buffer_info, IOCTL_IPM_GET_BUFFER_INFO,
uint32_t, ioctl_ipm_buffer_info_t);
typedef struct {
uint32_t descriptor;
} ioctl_ipm_buffer_handle_req_t;
// Return a handle of a trace buffer.
// There is no API to get N handles, we have to get them one at a time.
// [There's no point in trying to micro-optimize this and, say, get 3 at
// a time.]
// Input: trace buffer descriptor (0, 1, 2, ..., |num_buffers|-1)
// Output: handle of the vmo of the buffer
#define IOCTL_IPM_GET_BUFFER_HANDLE \
IOCTL(IOCTL_KIND_GET_HANDLE, IOCTL_FAMILY_IPM, 8)
IOCTL_WRAPPER_INOUT(ioctl_ipm_get_buffer_handle, IOCTL_IPM_GET_BUFFER_HANDLE,
ioctl_ipm_buffer_handle_req_t, zx_handle_t);
// Turn on data collection.
// Must be called after INIT and with data collection off.
#define IOCTL_IPM_START \
IOCTL(IOCTL_KIND_DEFAULT, IOCTL_FAMILY_IPM, 10)
IOCTL_WRAPPER(ioctl_ipm_start, IOCTL_IPM_START);
// Turn off data collection.
// May be called before INIT.
// May be called multiple times.
#define IOCTL_IPM_STOP \
IOCTL(IOCTL_KIND_DEFAULT, IOCTL_FAMILY_IPM, 11)
IOCTL_WRAPPER(ioctl_ipm_stop, IOCTL_IPM_STOP);
#endif // __Fuchsia__
__END_CDECLS