blob: 6b86801b69376d91020faff0543a98cca69aa8a4 [file] [log] [blame] [edit]
// Copyright 2020 The Fuchsia Authors
//
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file or at
// https://opensource.org/licenses/MIT
#ifndef ZIRCON_KERNEL_LIB_ARCH_INCLUDE_LIB_ARCH_X86_LBR_H_
#define ZIRCON_KERNEL_LIB_ARCH_INCLUDE_LIB_ARCH_X86_LBR_H_
#include <lib/arch/x86/cpuid.h>
#include <lib/arch/x86/trace.h>
#include <zircon/types.h>
#include <optional>
#include <type_traits>
#include <utility>
namespace arch {
// [intel/vol3]: Table 17-13. MSR_LBR_SELECT for Intel® microarchitecture
// code name Haswell.
//
// Register controlling LBR filtering.
//
// Though the referenced section is for Haswell, the layout is generically
// accurate, modulo EN_CALLSTACK (see note below).
struct LbrSelectMsr : public hwreg::RegisterBase<LbrSelectMsr, uint64_t> {
DEF_RSVDZ_FIELD(63, 10);
// This field is actually on present on Atom microarchitectures post-Goldmont
// and and Core microarchitectures post-Haswell; it is otherwise reserved.
// Care should be taken only to use this field when present.
DEF_BIT(9, en_callstack);
DEF_BIT(8, far_branch);
DEF_BIT(7, near_rel_jmp);
DEF_BIT(6, near_ind_jmp);
DEF_BIT(5, near_ret);
DEF_BIT(4, near_ind_call);
DEF_BIT(3, near_rel_call);
DEF_BIT(2, jcc);
DEF_BIT(1, cpl_neq_0);
DEF_BIT(0, cpl_eq_0);
static auto Get() {
return hwreg::RegisterAddr<LbrSelectMsr>(static_cast<uint32_t>(X86Msr::MSR_LBR_SELECT));
}
};
// [intel/vol3]: 17.4.8 LBR Stack.
//
// Points to the "top" of the LBR stack.
struct LbrTopOfStackMsr : public hwreg::RegisterBase<LbrTopOfStackMsr, uint64_t> {
// Gives the index of the most recent branch record, in turn given by bits
// [stack_size:0] of the register value. We unfortunately cannot trust the
// higher bits to be reserved as zero as that is not expressly documented.
size_t top(size_t stack_size) { return static_cast<size_t>(reg_value()) & (stack_size - 1); }
static auto Get() {
return hwreg::RegisterAddr<LbrTopOfStackMsr>(static_cast<uint32_t>(X86Msr::MSR_LASTBRANCH_TOS));
}
};
// [intel/vol3]: 17.4.8.1 LBR Stack and Intel® 64 Processors.
//
// Pointer the source instruction in a branch, possibly along with metadata.
struct LbrFromIpMsr : public hwreg::RegisterBase<LbrFromIpMsr, uint64_t> {
uint64_t value(LbrFormat format);
// Metadata accessors return optional data, as the latter are only present
// in particular (older) formats; on newer microarchictures it is expected
// that this information can be found in the MSR_LBR_INFO_* MSRs instead.
std::optional<bool> tsx_abort(LbrFormat format);
std::optional<bool> in_tsx(LbrFormat format);
std::optional<bool> mispredicted(LbrFormat format);
// Returns the value associated with MSR_LASTBRANCH_N_FROM_IP.
static auto Get(size_t N) {
return hwreg::RegisterAddr<LbrFromIpMsr>(
static_cast<uint32_t>(X86Msr::MSR_LASTBRANCH_0_FROM_IP) + static_cast<uint32_t>(N));
}
};
// [intel/vol3]: 17.4.8.1 LBR Stack and Intel® 64 Processors.
//
// Pointer the destination instruction in a branch, possibly along with
// metadata.
struct LbrToIpMsr : public hwreg::RegisterBase<LbrToIpMsr, uint64_t> {
uint64_t value(LbrFormat format);
// Optional, as this field is only present in particular (older) formats; on
// newer microarchictures it is expected that this information can be found
// in found in the MSR_LBR_INFO_* MSRs instead.
std::optional<uint16_t> cycle_count(LbrFormat format);
// Returns the value associated with MSR_LASTBRANCH_N_TO_IP.
static auto Get(size_t N) {
return hwreg::RegisterAddr<LbrToIpMsr>(static_cast<uint32_t>(X86Msr::MSR_LASTBRANCH_0_TO_IP) +
static_cast<uint32_t>(N));
}
};
// [intel/vol3]: Table 17-16. MSR_LBR_INFO_x.
//
// Additional branch metadata.
//
// Though the referenced section is for Haswell, the layout is generically
// accurate.
struct LbrInfoMsr : public hwreg::RegisterBase<LbrInfoMsr, uint64_t> {
DEF_BIT(63, mispred);
DEF_BIT(62, in_tsx);
DEF_BIT(61, tsx_abort);
// Bits [60:16] are zero.
DEF_FIELD(15, 0, cycle_count);
// Returns the value associated with MSR_LBR_INFO_N.
static auto Get(size_t N) {
return hwreg::RegisterAddr<LbrInfoMsr>(static_cast<uint32_t>(X86Msr::MSR_LBR_INFO_0) +
static_cast<uint32_t>(N));
}
};
// A simple synthesis of the information provided by the TO, FROM, and INFO
// MSRS.
struct LastBranchRecord {
zx_vaddr_t from;
zx_vaddr_t to;
// Whether the branch was mispredicted.
std::optional<bool> mispredicted;
// Elapsed core clocks since the last update to the LBR stack.
std::optional<uint16_t> cycle_count;
// Whether the branch entry occurred in a TSX (Transactional Synchronization
// Extension) region.
std::optional<bool> in_tsx;
// As above, but also whether a transaction was aborted.
std::optional<bool> tsx_abort;
};
// LbrStack provides access to the underlying Last Branch Record stack for the
// current CPU. It provides means of enabling, disabling, and iterating over
// the current records. The lifetime of this class has no bearing on that of
// the hardware feature. In principle, the same `LbrStack` could be used to
// access branch records on multiple CPUs.
//
// This abstraction provides no thread safety; it is a thin wrapper around
// access to the current LBR stack's hardware interface.
//
// Example usage (dumping kernel branch records):
// ```
// hwreg::X86MrsIo msr_io;
// LbrStack lbr_stack;
// DEBUG_ASSERT(lbr_stack.is_supported());
// DEBUG_ASSERT(lbr_stack.is_enabled(msr_io)); // Previously enabled.
//
// PrintfSymbolizerContext(stdout);
// printf("Last kernel branch records:\n");
// lbr_stack.ForEachRecord(msr_io, [](const LastBranchRecord& lbr) {
// // Only include branches that end in the kernel.
// if (is_kernel_address(lbr.to)) {
// printf("from: {{{pc:%#" PRIxPTR "}}}\n", lbr.from);
// printf("to: {{{pc:%#" PRIxPTR "}}}\n", lbr.to);
// }
// });
// ```
class LbrStack {
public:
template <typename CpuidIoProvider>
explicit LbrStack(CpuidIoProvider&& io) {
// While it would have been possible to create a chain of deferred
// constructors to initialize the members as const in an initializer list,
// that would have been won at the minor expense of some readability.
bool supports_pdcm = io.template Read<CpuidFeatureFlagsC>().pdcm();
Initialize(GetMicroarchitecture(std::forward<CpuidIoProvider>(io)), supports_pdcm);
}
LbrStack() = delete;
LbrStack(const LbrStack&) = default;
LbrStack(LbrStack&&) = default;
// Gives the size (or depth) of the LBR stack, which is the maximum number
// of records that can be stored.
size_t size() const { return size_; }
bool is_supported() const { return supported_; }
template <typename MsrIo>
bool is_enabled(MsrIo io) const {
return supported_ && DebugControlMsr::Get().ReadFrom(&io).lbr();
}
// Enables the recording of LBRs on the current CPU with a set of default
// options (e.g., for callstack profiling when available). If |for_user| is
// true, only records that end in CPL > 0 will be recorded. This operation is
// idempotent.
template <typename MsrIo>
void Enable(MsrIo io, bool for_user) const {
ZX_ASSERT(supported_);
DebugControlMsr::Get().ReadFrom(&io).set_lbr(1).set_freeze_lbr_on_pmi(1).WriteTo(&io);
GetDefaultSettings(for_user).WriteTo(&io);
}
// Disables the recording of LBRs on the current CPU. This operation is
// idempotent.
template <typename MsrIo>
void Disable(MsrIo io) const {
ZX_ASSERT(supported_);
DebugControlMsr::Get().ReadFrom(&io).set_lbr(0).WriteTo(&io);
}
template <typename MsrIo, typename LbrCallback>
void ForEachRecord(MsrIo io, LbrCallback&& callback) const {
static_assert(std::is_invocable_r_v<void, LbrCallback, const LastBranchRecord&>);
ZX_ASSERT(supported_);
LbrFormat format = PerfCapabilitiesMsr::Get().ReadFrom(&io).lbr_fmt();
size_t top = LbrTopOfStackMsr::Get().ReadFrom(&io).top(size_);
for (size_t i = 0; i < size_; ++i) {
size_t idx = (top + i) % size_;
LbrFromIpMsr from = LbrFromIpMsr::Get(idx).ReadFrom(&io);
LbrToIpMsr to = LbrToIpMsr::Get(idx).ReadFrom(&io);
LastBranchRecord record{
.from = static_cast<zx_vaddr_t>(from.value(format)),
.to = static_cast<zx_vaddr_t>(to.value(format)),
.mispredicted = from.mispredicted(format),
.cycle_count = to.cycle_count(format),
.in_tsx = from.in_tsx(format),
.tsx_abort = from.tsx_abort(format),
};
// The *Info formats expect all metadata to be found in the info MSRs;
// defer evaluation until we are in such a case, as only then will we
// know that the latter are supported.
if (format == LbrFormat::k64BitEipWithFlagsInfo ||
format == LbrFormat::k64BitLipWithFlagsInfo) {
LbrInfoMsr info = LbrInfoMsr::Get(idx).ReadFrom(&io);
record.mispredicted = info.mispred();
record.cycle_count = info.cycle_count();
record.in_tsx = info.in_tsx();
record.tsx_abort = info.tsx_abort();
}
std::forward<LbrCallback>(callback)(record);
}
}
private:
// Initializes the stack abstraction. `supports_pdcm` gives whether the
// IA32_PERF_CAPABILITIES MSR is accessible, which is how one determines the
// format of the LBRs.
void Initialize(Microarchitecture microarch, bool supports_pdcm);
// A reasonable set of default settings (e.g., excluding returns and other
// information implicitly found in a backtrace), enablind callstack profiling
// (see description of |callstack_profiling_| when appropriate). Revisit this
// set of choices when we have use-cases for variations.
LbrSelectMsr GetDefaultSettings(bool for_user) const;
// Members are properly initialized in Initialize().
size_t size_ = 0;
bool supported_ = true;
// Whether we can automatically flush records from the on-chip registers (in
// a LIFO manner) when return instructions are executed, discarding branch
// information relative to leaf functions. [x86/v3/17.11] gives the
// description.
bool callstack_profiling_ = false;
};
} // namespace arch
#endif // ZIRCON_KERNEL_LIB_ARCH_INCLUDE_LIB_ARCH_X86_LBR_H_