blob: fd42b5d357e9bac60920c0d458c06a12dee58967 [file] [log] [blame]
// Copyright 2016 The Fuchsia Authors
// Copyright (c) 2016 Travis Geiselbrecht
//
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file or at
// https://opensource.org/licenses/MIT
#include "arch/x86/mp.h"
#include <assert.h>
#include <debug.h>
#include <lib/arch/x86/boot-cpuid.h>
#include <lib/arch/x86/bug.h>
#include <lib/arch/x86/descriptor-regs.h>
#include <lib/console.h>
#include <lib/ktrace.h>
#include <platform.h>
#include <stdio.h>
#include <string.h>
#include <trace.h>
#include <zircon/compiler.h>
#include <zircon/errors.h>
#include <zircon/types.h>
#include <new>
#include <arch/mp.h>
#include <arch/mp_unplug_event.h>
#include <arch/ops.h>
#include <arch/x86.h>
#include <arch/x86/apic.h>
#include <arch/x86/descriptor.h>
#include <arch/x86/feature.h>
#include <arch/x86/idle_states.h>
#include <arch/x86/interrupts.h>
#include <arch/x86/mmu.h>
#include <arch/x86/mwait_monitor.h>
#include <dev/hw_rng.h>
#include <dev/interrupt.h>
#include <hwreg/x86msr.h>
#include <kernel/auto_preempt_disabler.h>
#include <kernel/cpu.h>
#include <kernel/timer.h>
#include <ktl/algorithm.h>
#include <ktl/align.h>
// Enable/disable ktraces local to this file.
#define LOCAL_KTRACE_ENABLE 0
struct x86_percpu* ap_percpus;
uint8_t x86_num_cpus = 1;
static bool use_monitor = false;
extern struct idt _idt;
#if __has_feature(safe_stack)
static uint8_t unsafe_kstack[PAGE_SIZE] __ALIGNED(16);
#define unsafe_kstack_end (&unsafe_kstack[sizeof(unsafe_kstack)])
#else
#define unsafe_kstack_end nullptr
#endif
// Holds an array of MwaitMonitor objects used to signal that a CPU is about-to-enter or
// should-wake-from the idle thread.
MwaitMonitorArray gMwaitMonitorArray;
// Fake monitor to use until smp is initialized. The size of the memory range doesn't matter, since
// it won't actually get used in a non-smp environment.
MwaitMonitor gFakeMonitor;
// For use with gMonitorArray.
constexpr uint8_t kTargetStateNotIdle = 0;
constexpr uint8_t kTargetStateIdle = 1;
// Also set up a fake table of idle states.
x86_idle_states_t fake_supported_idle_states = {
.states = {X86_CSTATE_C1(0)},
.default_state_mask = kX86IdleStateMaskC1Only,
};
X86IdleStates fake_idle_states = X86IdleStates(&fake_supported_idle_states);
// Pre-initialize the per cpu structure for the boot cpu. Referenced by early boot code prior to
// being able to initialize via code.
struct x86_percpu bp_percpu = {
.direct = &bp_percpu,
.current_thread = {},
.stack_guard = {},
.kernel_unsafe_sp = (uintptr_t)unsafe_kstack_end,
.saved_user_sp = {},
.blocking_disallowed = {},
.monitor = &gFakeMonitor,
.halt_interlock = {},
.idle_states = &fake_idle_states,
// Start with an invalid ID until we know the local APIC is set up.
.apic_id = INVALID_APIC_ID,
.gpf_return_target = {},
.cpu_num = 0,
.num_spinlocks = 0,
.last_user_aspace = nullptr,
.high_level_percpu = {},
.default_tss = {},
.interrupt_stacks = {},
};
zx_status_t x86_allocate_ap_structures(uint32_t* apic_ids, uint8_t cpu_count) {
ASSERT(ap_percpus == nullptr);
DEBUG_ASSERT(cpu_count >= 1);
if (cpu_count == 0) {
return ZX_ERR_INVALID_ARGS;
}
if (cpu_count > 1) {
size_t len = sizeof(*ap_percpus) * (cpu_count - 1);
ap_percpus = (x86_percpu*)memalign(MAX_CACHE_LINE, len);
if (ap_percpus == nullptr) {
return ZX_ERR_NO_MEMORY;
}
memset(ap_percpus, 0, len);
// TODO(maniscalco): There's a data race here that we should fix. We could be racing with the
// idle thread on this CPU. Consider reworking the monitor initialization sequence or perhaps
// upgrading this to an atomic. Same goes for the assignment to |bp_percpu.monitor| below.
use_monitor = arch::BootCpuid<arch::CpuidFeatureFlagsC>().monitor() &&
arch::BootCpuidSupports<arch::CpuidMonitorMwaitB>() &&
!x86_get_microarch_config()->idle_prefer_hlt;
if (use_monitor) {
printf("initializing mwait/monitor for idle threads\n");
zx_status_t status = gMwaitMonitorArray.Init(cpu_count);
if (status != ZX_OK) {
return status;
}
bp_percpu.monitor = &gMwaitMonitorArray.GetForCpu(BOOT_CPU_ID);
for (cpu_num_t i = 1; i < cpu_count; ++i) {
ap_percpus[i - 1].monitor = &gMwaitMonitorArray.GetForCpu(i);
}
uint16_t idle_states_size = sizeof(X86IdleStates);
if (idle_states_size < MAX_CACHE_LINE) {
idle_states_size = MAX_CACHE_LINE;
}
X86IdleStates* idle_states =
static_cast<X86IdleStates*>(memalign(idle_states_size, idle_states_size * cpu_count));
if (idle_states == nullptr) {
return ZX_ERR_NO_MEMORY;
}
const x86_idle_states_t* supported_idle_states = x86_get_idle_states();
bp_percpu.idle_states = idle_states;
// Placement new the BP idle-states table.
new (bp_percpu.idle_states) X86IdleStates(supported_idle_states);
for (uint i = 1; i < cpu_count; ++i) {
ap_percpus[i - 1].idle_states = reinterpret_cast<X86IdleStates*>(
reinterpret_cast<uintptr_t>(idle_states) + (i * idle_states_size));
// Placement new the other idle-states tables.
new (ap_percpus[i - 1].idle_states) X86IdleStates(supported_idle_states);
}
}
}
uint32_t bootstrap_ap = apic_local_id();
DEBUG_ASSERT(bootstrap_ap == apic_bsp_id());
uint apic_idx = 0;
for (uint i = 0; i < cpu_count; ++i) {
if (apic_ids[i] == bootstrap_ap) {
continue;
}
DEBUG_ASSERT(apic_idx != (uint)(cpu_count - 1));
if (apic_idx == (uint)cpu_count - 1) {
/* Never found bootstrap CPU in apic id list */
return ZX_ERR_BAD_STATE;
}
ap_percpus[apic_idx].cpu_num = apic_idx + 1;
ap_percpus[apic_idx].apic_id = apic_ids[i];
ap_percpus[apic_idx].direct = &ap_percpus[apic_idx];
apic_idx++;
}
x86_num_cpus = cpu_count;
return ZX_OK;
}
static struct x86_percpu* x86_percpu_for(cpu_num_t cpu_num) {
return (cpu_num == 0) ? &bp_percpu : &ap_percpus[cpu_num - 1];
}
void x86_init_percpu(cpu_num_t cpu_num) {
struct x86_percpu* const percpu = x86_percpu_for(cpu_num);
DEBUG_ASSERT(percpu->cpu_num == cpu_num);
DEBUG_ASSERT(percpu->direct == percpu);
// Assembly code has already set up %gs.base so that this function's own code can use it
// implicitly for stack-protector or safe-stack.
DEBUG_ASSERT(read_msr(X86_MSR_IA32_GS_BASE) == (uintptr_t)percpu);
/* set the KERNEL_GS_BASE MSR to 0 */
/* when we enter user space, this will be populated via a swapgs */
write_msr(X86_MSR_IA32_KERNEL_GS_BASE, 0);
x86_feature_early_init_percpu();
x86_extended_register_init();
x86_extended_register_enable_feature(X86_EXTENDED_REGISTER_SSE);
x86_extended_register_enable_feature(X86_EXTENDED_REGISTER_AVX);
gdt_load(gdt_get());
// Disable the LDT so userspace cannot make segment selectors that point to it. See
// https://fxbug.dev/42159255
arch::DisableLdt();
x86_initialize_percpu_tss();
// Setup the post early boot IDT
if (cpu_num == 0) {
idt_setup(&_idt);
// Setup alternate stacks to guarantee stack consistency when handling these interrupts.
idt_set_ist_index(&_idt, X86_INT_NMI, NMI_IST_INDEX);
idt_set_ist_index(&_idt, X86_INT_MACHINE_CHECK, MCE_IST_INDEX);
idt_set_ist_index(&_idt, X86_INT_DOUBLE_FAULT, DBF_IST_INDEX);
idt_load(&_idt);
} else {
// Load the read-only IDT setup on arch initialization.
idt_load(idt_get_readonly());
}
/* load the syscall entry point */
write_msr(X86_MSR_IA32_LSTAR, (uint64_t)&x86_syscall);
/* set the STAR MSR to load the appropriate kernel code selector on syscall
* and the appropriate user code selector on return.
* on syscall entry the following are loaded into segment registers:
* CS = CODE_64_SELECTOR (STAR[47:32])
* SS = DATA_SELECTOR (STAR[47:32] + 0x8)
* on syscall exit:
* CS = USER_CODE_64_SELECTOR (STAR[63:48] + 0x16)
* SS = USER_DATA_SELECTOR (STAR[63:48] + 0x8)
*/
write_msr(X86_MSR_IA32_STAR,
(uint64_t)USER_CODE_SELECTOR << 48 | (uint64_t)CODE_64_SELECTOR << 32);
// Set the FMASK register to mask off certain bits in RFLAGS on syscall
// entry. See docs/kernel_invariants.md.
uint64_t mask = X86_FLAGS_AC | /* disable alignment check/access control (this
* prevents ring 0 from performing data access
* to ring 3 if SMAP is available) */
X86_FLAGS_NT | /* clear nested task */
X86_FLAGS_IOPL_MASK | /* set iopl to 0 */
X86_FLAGS_STATUS_MASK; /* clear all status flags, interrupt disabled, trap flag */
write_msr(X86_MSR_IA32_FMASK, mask);
// Apply the same mask to our current flags, to ensure that flags are set to known-good values,
// because some flags may be inherited by later kernel threads. We do this just in case any bad
// values were left behind by firmware or the bootloader.
x86_restore_flags(x86_save_flags() & ~mask);
/* enable syscall instruction */
uint64_t efer_msr = read_msr(X86_MSR_IA32_EFER);
efer_msr |= X86_EFER_SCE;
write_msr(X86_MSR_IA32_EFER, efer_msr);
uint64_t cr4 = x86_get_cr4();
// Enable {rd,wr}{fs,gs}base instructions.
if (x86_feature_test(X86_FEATURE_FSGSBASE)) {
cr4 |= X86_CR4_FSGSBASE;
}
if (x86_feature_test(X86_FEATURE_UMIP)) {
cr4 |= X86_CR4_UMIP;
}
x86_set_cr4(cr4);
// Store the processor number in IA32_TSC_AUX, so RDTSCP/RDP can efficiently get the current CPU
// from userspace.
if (x86_feature_test(X86_FEATURE_RDTSCP)) {
write_msr(X86_MSR_IA32_TSC_AUX, cpu_num);
}
switch (x86_vendor) {
case X86_VENDOR_INTEL:
x86_intel_init_percpu();
break;
case X86_VENDOR_AMD:
x86_amd_init_percpu();
break;
default:
break;
}
arch::ApplyX86ErrataWorkarounds(arch::BootCpuidIo{}, hwreg::X86MsrIo{});
}
void x86_set_local_apic_id(uint32_t apic_id) {
struct x86_percpu* percpu = x86_get_percpu();
DEBUG_ASSERT(percpu->cpu_num == 0);
percpu->apic_id = apic_id;
}
int x86_apic_id_to_cpu_num(uint32_t apic_id) {
if (bp_percpu.apic_id == apic_id) {
return (int)bp_percpu.cpu_num;
}
for (uint i = 0; i < (uint)x86_num_cpus - 1; ++i) {
if (ap_percpus[i].apic_id == apic_id) {
return (int)ap_percpus[i].cpu_num;
}
}
return -1;
}
void arch_mp_reschedule(cpu_mask_t mask) {
cpu_mask_t needs_ipi = 0;
if (use_monitor) {
while (mask) {
cpu_num_t cpu_id = lowest_cpu_set(mask);
cpu_mask_t cpu_mask = cpu_num_to_mask(cpu_id);
struct x86_percpu* percpu = cpu_id ? &ap_percpus[cpu_id - 1] : &bp_percpu;
// When a cpu sees that it is about to start the idle thread, it sets its own monitor flag.
// When a cpu is rescheduling another cpu, if it sees the monitor flag set, it can clear the
// flag to wake up the other cpu w/o an IPI. When the other cpu wakes up, the idle thread sees
// the cleared flag and preempts itself. Both of these operations are under the scheduler
// lock, so there are no races where the wrong signal can be sent.
const uint8_t old_target_state = percpu->monitor->Exchange(kTargetStateNotIdle);
if (old_target_state != kTargetStateIdle) {
// CPU was not idle. We'll need to send it an IPI.
needs_ipi |= cpu_mask;
}
mask &= ~cpu_mask;
}
} else {
needs_ipi = mask;
// We are attempting to wake the set up CPUs in |mask| and cause them to schedule a new thread.
// A target CPU spins for a short time before executing halt; before it spins, it sets the
// |halt_interlock| flag to '1'. Before a target CPU executes the halt instruction, it sets the
// |halt_interlock| flag to '2' and skips the halt if the flag was cleared while spinning. Try
// to clear the |halt_interlock| flag from 1 -> 0. If we do so, we can skip sending an IPI and
// prevent an unnecessary halt instruction.
while (mask) {
cpu_num_t cpu_id = lowest_cpu_set(mask);
cpu_mask_t cpu_mask = cpu_num_to_mask(cpu_id);
struct x86_percpu* percpu = cpu_id ? &ap_percpus[cpu_id - 1] : &bp_percpu;
uint32_t expect_spin = 1;
bool did_fast_wakeup = percpu->halt_interlock.compare_exchange_strong(expect_spin, 0);
if (did_fast_wakeup) {
needs_ipi &= ~cpu_mask;
}
mask &= ~cpu_mask;
}
}
if (needs_ipi) {
arch_mp_send_ipi(MP_IPI_TARGET_MASK, needs_ipi, MP_IPI_RESCHEDULE);
}
}
void arch_idle_enter(zx_duration_t max_latency) {
struct x86_percpu* percpu = x86_get_percpu();
const cpu_mask_t local_reschedule_mask = cpu_num_to_mask(arch_curr_cpu_num());
PreemptionState& preemption_state = Thread::Current::preemption_state();
if (use_monitor) {
bool rsb_maybe_empty = false;
// It's critical that the monitor only indidates this CPU is idle when this thread cannot be
// preempted. If we are preempted while "showing idle", the signaling CPU may see we're idle,
// elide the IPI and result in a lost reschedule event. Prior to re-enabling preemption (i.e.
// prior to destroying this RAII object), we must set the moniotor to "not idle".
AutoPreemptDisabler preempt_disabled;
percpu->monitor->Write(kTargetStateIdle);
while (percpu->monitor->Read() == kTargetStateIdle && !preemption_state.preempts_pending()) {
X86IdleState* next_state = percpu->idle_states->PickIdleState();
rsb_maybe_empty |= x86_intel_idle_state_may_empty_rsb(next_state);
ktrace::Scope trace = KTRACE_CPU_BEGIN_SCOPE_ENABLE(
LOCAL_KTRACE_ENABLE, "kernel:sched", "idle", ("mwait hint", next_state->MwaitHint()));
// 1) Disable interrupts 2) Arm the monitor 3) Check our monitor flag and whether or not we
// have pending interrupts 4) Re-enable interrupts as we drop into mwait.
//
// We perform the final check in step #3 to make sure that no one ended up writing to
// percpu->monitor just before we managed to arm the monitor in step #2. We keep interrupts
// disabled during this sequence in order to make sure that we don't take an interrupt between
// steps #3 and #4 and then fail to drop out of mwait as a result. Interrupts will be
// re-enabled on the instruction immediately before the mwait instruction, placing it in the
// interrupt shadow and guaranteeing that we enter the mwait before any interrupts can
// actually fire.
//
arch_disable_ints();
percpu->monitor->PrepareForWait();
if (percpu->monitor->Read() == kTargetStateIdle && !preemption_state.preempts_pending()) {
auto start = current_time();
// AMD-SB-1045: Clear the RAS before a thread enters MWAIT to prevent paired hyperthreads
// from consuming this thread's RAS entries.
if (x86_cpu_vulnerable_to_rsb_cross_thread()) {
x86_ras_fill();
}
x86_enable_ints_and_mwait(next_state->MwaitHint());
auto duration = zx_time_sub_time(current_time(), start);
percpu->idle_states->RecordDuration(duration);
next_state->RecordDuration(duration);
next_state->CountEntry();
} else {
arch_enable_ints();
}
}
// Spectre V2: If we enter a deep sleep state, fill the RSB before RET-ing from this function.
// (CVE-2017-5715, see Intel "Deep Dive: Retpoline: A Branch Target Injection Mitigation").
if (x86_cpu_vulnerable_to_rsb_underflow() & rsb_maybe_empty) {
x86_ras_fill();
}
// At this point, we woke up either because another CPU poked us, or because we have a local
// preempt pending. When we exit this block, our AutoPreemptDisabler will destruct and perform
// trigger a preempt operation, but only if we have a local preemption pending. This may not be
// the case if we woke up from being poked instead of because of an interrupt causing a thread
// to be assigned to this core.
//
// So, simply unconditionally force there to be a local preempt pending and let the APD
// destructor take care of things for us. We are about to re-enable preemption, it is critical
// that we update our state to Not-Idle to avoid the possibility of a lost reschedule event.
// See the related comment earlier in this function where the |AutoPreemptDisabler| is
// constructed.
preemption_state.preempts_pending_add(local_reschedule_mask);
percpu->monitor->Write(kTargetStateNotIdle);
} else {
AutoPreemptDisabler preempt_disabled;
// Set the halt_interlock flag and spin for a little bit, in case a wakeup happens very shortly
// before we decide to go to sleep. If the halt_interlock flag is changed, another CPU has woken
// us, avoid the halt instruction.
ktrace::Scope trace =
KTRACE_CPU_BEGIN_SCOPE_ENABLE(LOCAL_KTRACE_ENABLE, "kernel:sched", "idle");
constexpr int kPauseIterations = 3000;
uint32_t halt_interlock_spinning = 1;
percpu->halt_interlock.store(1, ktl::memory_order_relaxed);
for (int i = 0; i < kPauseIterations && !preemption_state.preempts_pending(); i++) {
arch::Yield();
if (percpu->halt_interlock.load(ktl::memory_order_relaxed) != 1) {
break;
}
}
// Compare-exchange halt_interlock from 1 -> 2, to indicate we are no longer spinning. If the
// halt_interlock flag was changed, another CPU must have done it; avoid HLT and switch to a new
// runnable thread. Otherwise, setting it to '2' re-enables reschedule IPIs.
bool no_fast_wakeup =
percpu->halt_interlock.compare_exchange_strong(halt_interlock_spinning, 2);
if (no_fast_wakeup && !preemption_state.preempts_pending()) {
arch_disable_ints();
// AMD-SB-1045: Clear the RAS before a thread enters HLT to prevent paired hyperthreads from
// consuming this thread's RAS entries.
if (x86_cpu_vulnerable_to_rsb_cross_thread()) {
x86_ras_fill();
}
if (!preemption_state.preempts_pending()) {
x86_enable_ints_and_hlt();
} else {
// Re-enable interrupts if a reschedule IPI, timer tick, or other PreemptSetPending happened
// and we didn't call x86_idle.
arch_enable_ints();
}
}
// See the comment above in the monitor/mwait version of this loop. Make sure we have a local
// preempt pending before we drop our auto-preempt disabler.
preemption_state.preempts_pending_add(local_reschedule_mask);
}
}
void arch_mp_send_ipi(mp_ipi_target_t target, cpu_mask_t mask, mp_ipi_t ipi) {
uint8_t vector = 0;
switch (ipi) {
case MP_IPI_GENERIC:
vector = X86_INT_IPI_GENERIC;
break;
case MP_IPI_RESCHEDULE:
vector = X86_INT_IPI_RESCHEDULE;
break;
case MP_IPI_INTERRUPT:
vector = X86_INT_IPI_INTERRUPT;
break;
case MP_IPI_HALT:
vector = X86_INT_IPI_HALT;
break;
default:
panic("Unexpected MP IPI value: %u", static_cast<uint32_t>(ipi));
}
switch (target) {
case MP_IPI_TARGET_ALL_BUT_LOCAL:
apic_send_broadcast_ipi(vector, DELIVERY_MODE_FIXED);
break;
case MP_IPI_TARGET_ALL:
apic_send_broadcast_self_ipi(vector, DELIVERY_MODE_FIXED);
break;
case MP_IPI_TARGET_MASK:
apic_send_mask_ipi(vector, mask, DELIVERY_MODE_FIXED);
break;
default:
panic("Unexpected MP IPI target: %u", static_cast<uint32_t>(target));
}
}
void x86_ipi_halt_handler(void*) {
printf("halting cpu %u\n", arch_curr_cpu_num());
platform_halt_cpu();
for (;;) {
x86_cli();
x86_hlt();
}
}
// Forcibly stops all other CPUs except the current one and the BSP (which is cpu 0)
void x86_force_halt_all_but_local_and_bsp(void) {
cpu_num_t self = arch_curr_cpu_num();
for (cpu_num_t i = 1; i < x86_num_cpus; ++i) {
if (i == self) {
continue;
}
uint32_t dst_apic_id = ap_percpus[i - 1].apic_id;
apic_send_ipi(0, static_cast<uint8_t>(dst_apic_id), DELIVERY_MODE_INIT);
}
}
zx_status_t arch_mp_prep_cpu_unplug(cpu_num_t cpu_id) {
if (cpu_id == 0 || cpu_id >= x86_num_cpus) {
return ZX_ERR_INVALID_ARGS;
}
return ZX_OK;
}
zx_status_t arch_mp_cpu_unplug(cpu_num_t cpu_id) {
/* we do not allow unplugging the bootstrap processor */
if (cpu_id == 0 || cpu_id >= x86_num_cpus) {
return ZX_ERR_INVALID_ARGS;
}
uint32_t dst_apic_id = ap_percpus[cpu_id - 1].apic_id;
if (dst_apic_id == INVALID_APIC_ID) {
/* This is a transient state that can occur during CPU onlining */
return ZX_ERR_UNAVAILABLE;
}
DEBUG_ASSERT(dst_apic_id < UINT8_MAX);
apic_send_ipi(0, (uint8_t)dst_apic_id, DELIVERY_MODE_INIT);
return ZX_OK;
}
zx_status_t arch_mp_cpu_hotplug(cpu_num_t cpu_id) {
if (cpu_id >= x86_num_cpus) {
return ZX_ERR_INVALID_ARGS;
}
if (mp_is_cpu_online(cpu_id)) {
return ZX_ERR_BAD_STATE;
}
DEBUG_ASSERT(cpu_id != 0);
if (cpu_id == 0) {
/* We shouldn't be able to shutoff the bootstrap CPU, so
* no reason to be able to bring it back via this route. */
return ZX_ERR_INVALID_ARGS;
}
struct x86_percpu* percpu = &ap_percpus[cpu_id - 1];
DEBUG_ASSERT(percpu->apic_id != INVALID_APIC_ID);
return x86_bringup_aps(&percpu->apic_id, 1);
}
/* Used to suspend work on a CPU until it is further shutdown */
void arch_flush_state_and_halt(MpUnplugEvent* flush_done) {
DEBUG_ASSERT(arch_ints_disabled());
__asm__ volatile("wbinvd" : : : "memory");
Thread::Current::Get()->preemption_state().PreemptDisable();
flush_done->Signal();
while (1) {
__asm__ volatile("cli; hlt" : : : "memory");
}
}
void arch_setup_percpu(cpu_num_t cpu_num, struct percpu* percpu) {
x86_percpu* arch_percpu = x86_percpu_for(cpu_num);
DEBUG_ASSERT(arch_percpu != nullptr);
DEBUG_ASSERT(arch_percpu->high_level_percpu == nullptr ||
arch_percpu->high_level_percpu == percpu);
arch_percpu->high_level_percpu = percpu;
}
static void reset_idle_counters(X86IdleStates* idle_states) {
for (unsigned i = 0; i < idle_states->NumStates(); ++i) {
idle_states->States()[i].ResetCounters();
}
}
static void report_idlestates(cpu_num_t cpu_num, const X86IdleStates& idle_states) {
printf("CPU %u:\n", cpu_num);
const X86IdleState* states = idle_states.ConstStates();
for (unsigned i = 0; i < idle_states.NumStates(); ++i) {
const auto& state = states[i];
printf(" %4s (MWAIT %02X): %lu entries, %lu ns avg duration (%ld ns total)\n", state.Name(),
state.MwaitHint(), state.TimesEntered(),
state.TimesEntered() > 0 ? state.CumulativeDuration() / (state.TimesEntered()) : 0l,
state.CumulativeDuration());
}
}
static int cmd_idlestates(int argc, const cmd_args* argv, uint32_t flags) {
if (argc < 2) {
usage:
printf("Usage: %s (printstats | resetstats | setmask)\n", argv[0].str);
return ZX_ERR_INVALID_ARGS;
}
if (!use_monitor) {
printf("%s is only supported on systems with X86_FEATURE_MON\n", argv[0].str);
return ZX_ERR_NOT_SUPPORTED;
}
if (!strcmp(argv[1].str, "resetstats")) {
reset_idle_counters(bp_percpu.idle_states);
for (cpu_num_t i = 1; i < x86_num_cpus; ++i) {
reset_idle_counters(ap_percpus[i - 1].idle_states);
}
} else if (!strcmp(argv[1].str, "printstats")) {
report_idlestates(0, *bp_percpu.idle_states);
for (cpu_num_t i = 1; i < x86_num_cpus; ++i) {
report_idlestates(i, *ap_percpus[i - 1].idle_states);
}
} else if (!strcmp(argv[1].str, "setmask")) {
if (argc < 3) {
printf("Usage: %s setmask $mask\n", argv[0].str);
return ZX_ERR_INVALID_ARGS;
}
bp_percpu.idle_states->SetStateMask(static_cast<uint32_t>(argv[2].u));
for (unsigned i = 1; i < x86_num_cpus; ++i) {
ap_percpus[i - 1].idle_states->SetStateMask(static_cast<uint32_t>(argv[2].u));
}
} else {
goto usage;
}
return ZX_OK;
}
STATIC_COMMAND_START
STATIC_COMMAND("idlestates", "control or report on CPU idle state selection", &cmd_idlestates)
STATIC_COMMAND_END(idlestates)