blob: d812e257077b94fcbcaa5d7b1be3ec701acebfac [file] [log] [blame]
// Copyright 2016 The Fuchsia Authors
// Copyright (c) 2013, Google Inc. All rights reserved.
//
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file or at
// https://opensource.org/licenses/MIT
#include <assert.h>
#include <inttypes.h>
#include <lib/affine/ratio.h>
#include <lib/arch/intrin.h>
#include <lib/arch/ticks.h>
#include <lib/boot-options/boot-options.h>
#include <lib/counters.h>
#include <lib/fit/defer.h>
#include <lib/fixed_point.h>
#include <lib/unittest/unittest.h>
#include <lib/zbi-format/driver-config.h>
#include <platform.h>
#include <pow2.h>
#include <trace.h>
#include <zircon/types.h>
#include <arch/interrupt.h>
#include <arch/quirks.h>
#include <dev/interrupt.h>
#include <dev/timer/arm_generic.h>
#include <kernel/scheduler.h>
#include <ktl/atomic.h>
#include <ktl/limits.h>
#include <lk/init.h>
#include <phys/handoff.h>
#include <platform/boot_timestamps.h>
#include <platform/timer.h>
#include <ktl/enforce.h>
#define LOCAL_TRACE 0
// AArch64 timer control registers
#define TIMER_REG_CNTKCTL "cntkctl_el1"
#define TIMER_REG_CNTFRQ "cntfrq_el0"
// CNTP AArch64 registers
#define TIMER_REG_CNTP_CTL "cntp_ctl_el0"
#define TIMER_REG_CNTP_CVAL "cntp_cval_el0"
#define TIMER_REG_CNTP_TVAL "cntp_tval_el0"
#define TIMER_REG_CNTPCT "cntpct_el0"
// CNTPS "AArch64" registers
#define TIMER_REG_CNTPS_CTL "cntps_ctl_el1"
#define TIMER_REG_CNTPS_CVAL "cntps_cval_el1"
#define TIMER_REG_CNTPS_TVAL "cntps_tval_el1"
// CNTV "AArch64" registers
#define TIMER_REG_CNTV_CTL "cntv_ctl_el0"
#define TIMER_REG_CNTV_CVAL "cntv_cval_el0"
#define TIMER_REG_CNTV_TVAL "cntv_tval_el0"
#define TIMER_REG_CNTVCT "cntvct_el0"
arch::EarlyTicks kernel_entry_ticks;
arch::EarlyTicks kernel_virtual_entry_ticks;
KCOUNTER(platform_timer_set_counter, "platform.timer.set")
KCOUNTER(platform_timer_cancel_counter, "platform.timer.cancel")
namespace {
// Counter-timer Kernel Control Register, EL1.
static constexpr uint64_t CNTKCTL_EL1_ENABLE_PHYSICAL_COUNTER = 1 << 0;
static constexpr uint64_t CNTKCTL_EL1_ENABLE_VIRTUAL_COUNTER = 1 << 1;
// Global saved config state
int timer_irq;
uint32_t timer_cntfrq; // Timer tick rate in Hz.
enum timer_irq_assignment {
IRQ_PHYS,
IRQ_VIRT,
IRQ_SPHYS,
};
timer_irq_assignment timer_assignment;
// event stream state
uint32_t event_stream_shift;
uint32_t event_stream_freq;
// Definition of the function signature we use to fetch the value of the chosen
// reference counter.
using ReadArmCounterFunc = uint64_t();
} // anonymous namespace
static uint32_t read_cntp_ctl() { return __arm_rsr(TIMER_REG_CNTP_CTL); }
static uint32_t read_cntv_ctl() { return __arm_rsr(TIMER_REG_CNTV_CTL); }
static uint32_t read_cntps_ctl() { return __arm_rsr(TIMER_REG_CNTPS_CTL); }
static void write_cntp_ctl(uint32_t val) {
LTRACEF_LEVEL(3, "cntp_ctl: 0x%x %x\n", val, read_cntp_ctl());
__arm_wsr(TIMER_REG_CNTP_CTL, val);
__isb(ARM_MB_SY);
}
static void write_cntv_ctl(uint32_t val) {
LTRACEF_LEVEL(3, "cntv_ctl: 0x%x %x\n", val, read_cntv_ctl());
__arm_wsr(TIMER_REG_CNTV_CTL, val);
__isb(ARM_MB_SY);
}
static void write_cntps_ctl(uint32_t val) {
LTRACEF_LEVEL(3, "cntps_ctl: 0x%x %x\n", val, read_cntps_ctl());
__arm_wsr(TIMER_REG_CNTPS_CTL, val);
__isb(ARM_MB_SY);
}
static void write_cntp_cval(uint64_t val) {
LTRACEF_LEVEL(3, "cntp_cval: 0x%016" PRIx64 ", %" PRIu64 "\n", val, val);
__arm_wsr64(TIMER_REG_CNTP_CVAL, val);
__isb(ARM_MB_SY);
}
static void write_cntv_cval(uint64_t val) {
LTRACEF_LEVEL(3, "cntv_cval: 0x%016" PRIx64 ", %" PRIu64 "\n", val, val);
__arm_wsr64(TIMER_REG_CNTV_CVAL, val);
__isb(ARM_MB_SY);
}
static void write_cntps_cval(uint64_t val) {
LTRACEF_LEVEL(3, "cntps_cval: 0x%016" PRIx64 ", %" PRIu64 "\n", val, val);
__arm_wsr64(TIMER_REG_CNTPS_CVAL, val);
__isb(ARM_MB_SY);
}
static void write_cntp_tval(int32_t val) {
LTRACEF_LEVEL(3, "cntp_tval: %d\n", val);
__arm_wsr(TIMER_REG_CNTP_TVAL, val);
__isb(ARM_MB_SY);
}
static void write_cntv_tval(int32_t val) {
LTRACEF_LEVEL(3, "cntv_tval: %d\n", val);
__arm_wsr(TIMER_REG_CNTV_TVAL, val);
__isb(ARM_MB_SY);
}
static void write_cntps_tval(int32_t val) {
LTRACEF_LEVEL(3, "cntps_tval: %d\n", val);
__arm_wsr(TIMER_REG_CNTPS_TVAL, val);
__isb(ARM_MB_SY);
}
// fwd decls to ensure that the read counter function all match the signature
// defined by ReadArmCounterFunc.
static ReadArmCounterFunc read_zero;
static ReadArmCounterFunc read_cntpct_a73;
static ReadArmCounterFunc read_cntvct_a73;
static ReadArmCounterFunc read_cntpct;
static ReadArmCounterFunc read_cntvct;
static uint64_t read_zero() { return 0; }
static uint64_t read_cntpct_a73() {
// Workaround for Cortex-A73 erratum 858921.
// Fix will be applied to all cores, as two consecutive reads should be
// faster than checking if core is A73 and branching before every read.
const uint64_t old_read = __arm_rsr64(TIMER_REG_CNTPCT);
const uint64_t new_read = __arm_rsr64(TIMER_REG_CNTPCT);
return (((old_read ^ new_read) >> 32) & 1) ? old_read : new_read;
}
static uint64_t read_cntvct_a73() {
// Workaround for Cortex-A73 erratum 858921.
// Fix will be applied to all cores, as two consecutive reads should be
// faster than checking if core is A73 and branching before every read.
const uint64_t old_read = __arm_rsr64(TIMER_REG_CNTVCT);
const uint64_t new_read = __arm_rsr64(TIMER_REG_CNTVCT);
return (((old_read ^ new_read) >> 32) & 1) ? old_read : new_read;
}
static uint64_t read_cntpct() { return __arm_rsr64(TIMER_REG_CNTPCT); }
static uint64_t read_cntvct() { return __arm_rsr64(TIMER_REG_CNTVCT); }
struct timer_reg_procs {
void (*write_ctl)(uint32_t val);
void (*write_cval)(uint64_t val);
void (*write_tval)(int32_t val);
uint64_t arch::EarlyTicks::* early_ticks;
};
[[maybe_unused]] static const struct timer_reg_procs cntp_procs = {
.write_ctl = write_cntp_ctl,
.write_cval = write_cntp_cval,
.write_tval = write_cntp_tval,
.early_ticks = &arch::EarlyTicks::cntpct_el0,
};
[[maybe_unused]] static const struct timer_reg_procs cntv_procs = {
.write_ctl = write_cntv_ctl,
.write_cval = write_cntv_cval,
.write_tval = write_cntv_tval,
.early_ticks = &arch::EarlyTicks::cntvct_el0,
};
[[maybe_unused]] static const struct timer_reg_procs cntps_procs = {
.write_ctl = write_cntps_ctl,
.write_cval = write_cntps_cval,
.write_tval = write_cntps_tval,
.early_ticks = &arch::EarlyTicks::cntpct_el0,
};
// Notes about the `read_arm_counter` function pointer:
//
// At startup time, we have not yet initialized our timer hardware, and
// therefore must return zero as required by platform_current_raw_ticks_synchronized
// (see the comments around that function for more details). Therefore, this
// function pointer initially points to a function that always returns zero.
//
// Once we figure out what CPUs we are running on, identify their HW, and check
// in with the rest of the system, we can pick which version of the
// read_arm_counter function we need to use. There exists a bug in certain ARM
// Cortex-A73 CPUs which can lead to a bad read of either the VCT or PCT
// counters. It is documented as Errata 858921
// ( https://documentation-service.arm.com/static/5fa29fa7b209f547eebd3613 )
// Thus, on systems with A73 cores, we must utilize a function that accounts for
// this errata. On systems without A73 cores, we select a faster implementation.
//
// In order to make this switch, however, and not need any locks, we need to
// make sure that the function pointer is declared as an atomic. Otherwise, we
// could be writing to the pointer when someone else is reading it (in order to
// read the clock) which would be a formal data race. Note that we don't need
// any ordering of the loads and stores of the function pointer beyond
// `memory_order_relaxed`. It is not important that we establish a specific
// order of the pointer's value relative to other memory accesses in the system.
// We just need to make sure that _if_ we decide to switch to the faster version
// of the counter read, that all of the CPUs _eventually_ see the pointer
// update (which they will do because of unavoidable synchronizing events like
// taking exceptions).
//
#if (TIMER_ARM_GENERIC_SELECTED_CNTV)
static struct timer_reg_procs reg_procs = cntv_procs;
#else
static struct timer_reg_procs reg_procs = cntp_procs;
#endif
static ktl::atomic<ReadArmCounterFunc*> read_arm_counter{read_zero};
static inline void write_ctl(uint32_t val) { reg_procs.write_ctl(val); }
static inline void write_cval(uint64_t val) { reg_procs.write_cval(val); }
[[maybe_unused]] static inline void write_tval(uint32_t val) { reg_procs.write_tval(val); }
static void platform_tick() {
write_ctl(0);
timer_tick();
}
template <GetTicksSyncFlag Flags>
inline zx_ticks_t platform_current_raw_ticks_synchronized() {
// Make certain that any reads of the raw system timer are guaranteed to take
// place in a region defined by the template |Flags|. Note that the
// methodology used here was defined in
//
// 'Arm Architecture Reference Manual for A-profile architecture'
// revision 'ARM DDI 0487K.a'
//
// In particular, please refer to examples D12-3 and D12-4. Note that we
// chose to use the "DMB and Branch Dependency" approach (instead of a DSB) to
// ensure that timer reads take place after all previous memory accesses, and
// we use a load dependent on the value of the timer load to ensure that the
// timer load takes place before subsequent memory accesses. Additionally, we
// do not make any attempt to use self-synchronizing timer register accesses.
// Refer to the cited examples for the potentially valid sequences.
uint64_t temp;
// Do we need to guarantee that this clock read occurs after previous loads,
// stores, or both?
//
// If so, we need to implement the solution described in refer to Example
// D12-4 in the ARM ARM, referenced above.
//
// We need our timer read to happen after all previous memory accesses.
// This is implemented as a DMB followed by read from any valid memory
// location with a "branch" which depends on that read. Note that
// "branch" is in air quotes because its target is the next instruction,
// so whether or not the branch gets taken, the result is the same (to
// execute the next instruction). Finally, the sequence ends with an ISB.
//
// Also note that we need the DMB approach here, even when we only care about
// previous loads, because we are attempting to ensure that our counter read
// takes place after _all_ previous loads. If we were interested in only
// ensuring that the counter observation took place after all previous loads
// of a _specific_ variable (call it `X`), there would be another option to
// consider. It is shown in example D12-3 of the ARM ARM, and involves
// creating a branch which depends on the previously loaded value of `X`,
// followed by an ISB. Since the spec of this function is to ensure that the
// counter observation follow _all_ previous loads, it is not a technique we
// can use here.
//
// TODO(johngro): Consider adding an API for a raw counter read which would
// allow for this. There are two key cases (reading a synthetic kernel clock,
// and reading the monotonic or boot timeline) where we don't actually need to
// ensure that our counter read takes place after all previous loads, just the
// previous load of a specific variable (the generation counter in the case of
// a synthetic clock, and the offset in the case of a mono/boot timeline
// read). That said, designing such an API presents some challenges as not
// all of the specific variable reads have the same requirements. For
// example, in the case of synthetic kernel clocks, the extra variable needs
// to be read with acquire semantics. In the case of mono/boot reads, relaxed
// semantics are all that are needed.
//
constexpr bool must_read_after_all_previous_accesses =
(Flags & (GetTicksSyncFlag::kAfterPreviousLoads | GetTicksSyncFlag::kAfterPreviousStores)) !=
GetTicksSyncFlag::kNone;
if constexpr (must_read_after_all_previous_accesses) {
// We need our timer read to happen after all previous memory accesses.
// This is implemented as a DMB followed by read from any valid memory
// location with a "branch" which depends on that read. Note that
// "branch" is in air quotes because its target is the next instruction,
// so whether or not the branch gets taken, the result is the same (to
// execute the next instruction). Finally, the sequence ends with an ISB.
//
// Refer to Example D12-4 in the ARM ARM referenced above.
//
__asm__ volatile(
"dmb sy;"
"ldr %[temp], [sp];"
"cbz %[temp], 1f;"
"1: isb;"
: [temp] "=r"(temp) // outputs : we overwrite the register selected for "temp"
: // inputs : we have no inputs
: "memory"); // clobbers : nothing, however we specify "memory" in order to
// prevent re-ordering, as a signal fence would do.
}
// Now actually read from the configured system timer.
const zx_ticks_t ret = read_arm_counter.load(ktl::memory_order_relaxed)();
// Do we need to guarantee that this clock read occurs before subsequent loads,
// stores, or both? If so, the recipe is the same in all cases. We introduce
// a load operation which has data dependency on ret, forcing the timer read
// to finish before the dependent load can occur.
//
// Refer to Example D12-4 in the ARM ARM referenced above.
//
constexpr bool must_read_before_any_subsequent_access =
(Flags & (GetTicksSyncFlag::kBeforeSubsequentLoads |
GetTicksSyncFlag::kBeforeSubsequentStores)) != GetTicksSyncFlag::kNone;
if constexpr (must_read_before_any_subsequent_access) {
__asm__ volatile(
"eor %[temp], %[ret], %[ret];"
"ldr %[temp], [sp, %[temp]];"
: [temp] "=&r"(temp) // outputs : we overwrite the register selected for "temp"
: [ret] "r"(ret) // inputs : we consume the register holding |ret|
: "cc", "memory"); // clobbers : EOR will clobber our flags, "memory" to prevent
// re-ordering.
}
return ret;
}
// Explicit instantiation of all of the forms of synchronized tick access.
#define EXPAND_PLATFORM_CURRENT_RAW_TICKS_SYNCHRONIZED(flags) \
template zx_ticks_t \
platform_current_raw_ticks_synchronized<static_cast<GetTicksSyncFlag>(flags)>()
EXPAND_PLATFORM_CURRENT_RAW_TICKS_SYNCHRONIZED(0);
EXPAND_PLATFORM_CURRENT_RAW_TICKS_SYNCHRONIZED(1);
EXPAND_PLATFORM_CURRENT_RAW_TICKS_SYNCHRONIZED(2);
EXPAND_PLATFORM_CURRENT_RAW_TICKS_SYNCHRONIZED(3);
EXPAND_PLATFORM_CURRENT_RAW_TICKS_SYNCHRONIZED(4);
EXPAND_PLATFORM_CURRENT_RAW_TICKS_SYNCHRONIZED(5);
EXPAND_PLATFORM_CURRENT_RAW_TICKS_SYNCHRONIZED(6);
EXPAND_PLATFORM_CURRENT_RAW_TICKS_SYNCHRONIZED(7);
EXPAND_PLATFORM_CURRENT_RAW_TICKS_SYNCHRONIZED(8);
EXPAND_PLATFORM_CURRENT_RAW_TICKS_SYNCHRONIZED(9);
EXPAND_PLATFORM_CURRENT_RAW_TICKS_SYNCHRONIZED(10);
EXPAND_PLATFORM_CURRENT_RAW_TICKS_SYNCHRONIZED(11);
EXPAND_PLATFORM_CURRENT_RAW_TICKS_SYNCHRONIZED(12);
EXPAND_PLATFORM_CURRENT_RAW_TICKS_SYNCHRONIZED(13);
EXPAND_PLATFORM_CURRENT_RAW_TICKS_SYNCHRONIZED(14);
EXPAND_PLATFORM_CURRENT_RAW_TICKS_SYNCHRONIZED(15);
#undef EXPAND_PLATFORM_CURRENT_RAW_TICKS_SYNCHRONIZED
zx_ticks_t platform_convert_early_ticks(arch::EarlyTicks sample) {
// Early tick timestamps are always raw ticks. We need to convert back to
// ticks by subtracting the raw_ticks to ticks offset.
return sample.*reg_procs.early_ticks + timer_get_mono_ticks_offset();
}
zx_status_t platform_set_oneshot_timer(zx_ticks_t deadline) {
DEBUG_ASSERT(arch_ints_disabled());
if (deadline < 0) {
deadline = 0;
}
// Even if the deadline has already passed, the ARMv8-A timer will fire the
// interrupt.
write_cval(deadline);
write_ctl(1);
kcounter_add(platform_timer_set_counter, 1);
return 0;
}
void platform_stop_timer() {
write_ctl(0);
kcounter_add(platform_timer_cancel_counter, 1);
}
void platform_shutdown_timer() {
DEBUG_ASSERT(arch_ints_disabled());
mask_interrupt(timer_irq);
}
zx_status_t platform_suspend_timer_curr_cpu() {
DEBUG_ASSERT(arch_ints_disabled());
// Save the cntkctl_el1 register, which includes the event stream state and
// whether EL0 can read PCT.
percpu::GetCurrent().resume_state.cntkctl_el1 = __arm_rsr64("cntkctl_el1");
write_ctl(0);
return mask_interrupt(timer_irq);
}
zx_status_t platform_resume_timer_curr_cpu() {
DEBUG_ASSERT(arch_ints_disabled());
__arm_wsr64("cntkctl_el1", percpu::GetCurrent().resume_state.cntkctl_el1);
unmask_interrupt(timer_irq);
// Kick the timer to get things going again.
//
// TODO(https://fxbug.dev/417558115): Remove/merge this with the logic in
// IdlePowerThread::UpdateMonotonicClock such that we don't kick the platform
// timer twice.
return platform_set_oneshot_timer(0);
}
bool platform_usermode_can_access_tick_registers() {
// We always use the ARM generic timer for the tick counter, and these
// registers are accessible from usermode
return true;
}
template <bool AllowDebugPrint = false>
static inline affine::Ratio arm_generic_timer_compute_conversion_factors(uint32_t cntfrq) {
affine::Ratio cntpct_to_nsec = {ZX_SEC(1), cntfrq};
if constexpr (AllowDebugPrint) {
dprintf(SPEW, "arm generic timer cntpct_per_nsec: %u/%u\n", cntpct_to_nsec.numerator(),
cntpct_to_nsec.denominator());
}
return cntpct_to_nsec;
}
// Run once on the boot cpu to decide if we want to start an event stream on each
// cpu and at what rate.
static void event_stream_init(uint32_t cntfrq) {
if (!gBootOptions->arm64_event_stream_enabled) {
return;
}
// Compute the closest power of two from the timer frequency to get to the target.
//
// The mechanism to select the rate of the event counter is to select which bit in the virtual
// counter it should watch for a transition from 0->1 or 1->0 on. This has the effect of
// of selecting a power of two to divide the virtual counter by plus one.
//
// There's no real out of range value here. Everything gets clamped to a shift value of [0, 15].
uint shift;
for (shift = 0; shift <= 14; shift++) {
// Find a matching shift to the target frequency within range. If the target frequency is too
// large even for shift 0 then it'll just pick shift 0 because of the <=.
if (log2_uint_floor(cntfrq >> (shift + 1)) <=
log2_uint_floor(gBootOptions->arm64_event_stream_freq_hz)) {
break;
}
}
// If we ran off the end of the for loop 15 is the max shift and is okay
DEBUG_ASSERT(shift <= 15);
// Save the computed state
event_stream_shift = shift;
event_stream_freq = (cntfrq >> (event_stream_shift + 1));
dprintf(INFO, "arm generic timer will enable event stream on all cpus: shift %u, %u Hz\n",
event_stream_shift, event_stream_freq);
}
static void event_stream_enable_percpu() {
if (!gBootOptions->arm64_event_stream_enabled) {
return;
}
DEBUG_ASSERT(event_stream_shift <= 15);
// Enable the event stream
uint64_t cntkctl = __arm_rsr64(TIMER_REG_CNTKCTL);
// Set the trigger bit (field 7:4)
cntkctl &= ~(0xfUL << 4);
cntkctl |= event_stream_shift << 4; // EVNTI
// Clear the transition bit to 0
cntkctl &= ~(1 << 3); // ENVTDIR
// Enable the stream
cntkctl |= (1 << 2); // EVNTEN
__arm_wsr64(TIMER_REG_CNTKCTL, cntkctl);
dprintf(INFO, "arm generic timer cpu-%u: event stream enabled\n", arch_curr_cpu_num());
}
static void arm_generic_timer_init(uint32_t freq_override) {
if (freq_override == 0) {
// Read the firmware supplied cntfrq register. Note: it may not be correct
// in buggy firmware situations, so always provide a mechanism to override it.
timer_cntfrq = __arm_rsr(TIMER_REG_CNTFRQ);
LTRACEF("cntfrq: %#08x, %u\n", timer_cntfrq, timer_cntfrq);
} else {
timer_cntfrq = freq_override;
}
dprintf(INFO, "arm generic timer freq %u Hz\n", timer_cntfrq);
// No way to reasonably continue. Just hard stop.
ASSERT(timer_cntfrq != 0);
timer_set_ticks_to_time_ratio(arm_generic_timer_compute_conversion_factors<true>(timer_cntfrq));
// Set up the hardware timer irq handler for this vector. Use the permanent irq handler
// registraion scheme since it is enabled on all cpus and does not need any locking
// for reentrancy and deregistration purposes.
zx_status_t status = register_permanent_int_handler(timer_irq, &platform_tick);
DEBUG_ASSERT(status == ZX_OK);
// At this point in time, we expect that the `cntkctl_el1` timer register has
// the bit which permits EL0 access to the VCT to be set, and perhaps also the
// bit which allows access to the PCT if that happens to be the time reference
// we have decided to use.
//
// *None* of the other timer HW access bits should be set. EL0 only gets to
// look at the counter, and nothing more.
//
// ASSERT this now.
[[maybe_unused]] const uint64_t expected =
CNTKCTL_EL1_ENABLE_VIRTUAL_COUNTER |
(ArmUsePhysTimerInVdso() ? CNTKCTL_EL1_ENABLE_PHYSICAL_COUNTER : 0);
ASSERT_MSG(const uint64_t current = __arm_rsr64(TIMER_REG_CNTKCTL);
current == expected,
"CNTKCTL_EL1 register does not match reference counter selection (%016lx != %016lx)",
current, expected);
// Determine and compute values for the event stream if requested
event_stream_init(timer_cntfrq);
// try to enable the event stream if requested
event_stream_enable_percpu();
// enable the IRQ on the boot cpu
LTRACEF("unmask irq %d on cpu %u\n", timer_irq, arch_curr_cpu_num());
unmask_interrupt(timer_irq);
}
static void arm_generic_timer_init_secondary_cpu(uint level) {
// try to enable the event stream if requested
event_stream_enable_percpu();
LTRACEF("unmask irq %d on cpu %u\n", timer_irq, arch_curr_cpu_num());
unmask_interrupt(timer_irq);
}
// secondary cpu initialize the timer just before the kernel starts with interrupts enabled
LK_INIT_HOOK_FLAGS(arm_generic_timer_init_secondary_cpu, arm_generic_timer_init_secondary_cpu,
LK_INIT_LEVEL_THREADING - 1, LK_INIT_FLAG_SECONDARY_CPUS)
void ArmGenericTimerInit(const zbi_dcfg_arm_generic_timer_driver_t& config) {
uint32_t irq_phys = config.irq_phys;
uint32_t irq_virt = config.irq_virt;
uint32_t irq_sphys = config.irq_sphys;
// If boot-options have been configured to force us to use the physical
// counter as our reference, clear out any virtual timer hardware and let
// nature take its course. If we don't have an interrupt configured for using
// the physical timer hardware (either PHYS or SPHYS), we are going to end up
// panicking.
if (gBootOptions->arm64_force_pct) {
dprintf(INFO,
"arm generic timer forcing use of PCT. IRQs provided were "
"(virt %u, phys %u, sphys %u)\n",
irq_virt, irq_phys, irq_sphys);
irq_virt = 0;
}
// Always prefer to use the virtual timer if we have the option to do so.
// Additionally, always start by using the versions of the timer read which
// have the A73 errata workaround.
//
// Currently, we have not had a chance to boot all of our CPUs and determine
// if we have any A73's in the mix. Until we know, it is safe to use the A73
// versions of the reads, just a small bit slower. Later on, if we know it is
// safe to do so, we can switch to using the workaround-free version.
const char* timer_str = "";
if (irq_virt) {
timer_str = "virt";
timer_irq = irq_virt;
timer_assignment = IRQ_VIRT;
reg_procs = cntv_procs;
read_arm_counter.store(read_cntvct_a73, ktl::memory_order_relaxed);
} else if (irq_phys) {
timer_str = "phys";
timer_irq = irq_phys;
timer_assignment = IRQ_PHYS;
reg_procs = cntp_procs;
read_arm_counter.store(read_cntpct_a73, ktl::memory_order_relaxed);
arm64_allow_pct_in_el0();
} else if (irq_sphys) {
timer_str = "sphys";
timer_irq = irq_sphys;
timer_assignment = IRQ_SPHYS;
reg_procs = cntps_procs;
read_arm_counter.store(read_cntpct_a73, ktl::memory_order_relaxed);
arm64_allow_pct_in_el0();
} else {
panic("no irqs set in arm_generic_timer_pdev_init\n");
}
ZX_ASSERT(reg_procs.early_ticks);
// We cannot actually reset the value on the ticks timer, so instead we use
// the time of clock selection (now) to define the zero point on our ticks
// timeline moving forward.
timer_set_initial_ticks_offset(-read_arm_counter.load(ktl::memory_order_relaxed)());
arch::ThreadMemoryBarrier();
dprintf(INFO, "arm generic timer using %s timer, irq %d\n", timer_str, timer_irq);
arm_generic_timer_init(config.freq_override);
}
bool ArmUsePhysTimerInVdso() { return timer_assignment != IRQ_VIRT; }
static void late_update_keep_or_disable_a73_timer_workaround(uint) {
// By the time we make it to LK_INIT_LEVEL_SMP_READY we should have started
// and sync'ed up with all of our secondary CPUs. If not, something has gone
// terribly wrong, and we should continue to use the A73 workaround out of an
// abundance of caution.
//
zx_status_t status = mp_wait_for_all_cpus_ready(Deadline::no_slack(0));
if (status != ZX_OK) {
dprintf(ALWAYS,
"At least one CPU has failed to check in by INIT_LEVEL_SMP_READY in the "
"init sequence. Keeping A73 counter workarounds in place.\n");
} else {
if (arch_quirks_needs_arm_erratum_858921_mitigation() == false) {
// If all of our CPUs have checked in, and we have not discovered any A73
// CPUs, we can switch to using the simple register read instead of the
// double-read required by the A73 workaround.
ReadArmCounterFunc& thunk = ArmUsePhysTimerInVdso() ? read_cntpct : read_cntvct;
read_arm_counter.store(thunk, ktl::memory_order_relaxed);
} else {
dprintf(INFO, "A73 cores detected. Keeping arm generic timer A73 workaround\n");
}
}
}
LK_INIT_HOOK(late_update_keep_or_disable_a73_timer_workaround,
&late_update_keep_or_disable_a73_timer_workaround, LK_INIT_LEVEL_SMP_READY)
/********************************************************************************
*
* Tests
*
********************************************************************************/
namespace {
[[maybe_unused]] constexpr uint32_t kMinTestFreq = 1;
[[maybe_unused]] constexpr uint32_t kMaxTestFreq = ktl::numeric_limits<uint32_t>::max();
[[maybe_unused]] constexpr uint32_t kCurTestFreq = 0;
inline uint64_t abs_int64(int64_t a) { return (a > 0) ? a : static_cast<int64_t>(-a); }
bool test_time_conversion_check_result(uint64_t a, uint64_t b, uint64_t limit) {
BEGIN_TEST;
if (a != b) {
uint64_t diff = abs_int64(a - b);
ASSERT_LE(diff, limit);
}
END_TEST;
}
bool test_time_to_ticks(uint32_t cntfrq) {
BEGIN_TEST;
affine::Ratio time_to_ticks;
if (cntfrq == kCurTestFreq) {
uint64_t tps = ticks_per_second();
ASSERT_LE(tps, ktl::numeric_limits<uint32_t>::max());
cntfrq = static_cast<uint32_t>(tps);
time_to_ticks = timer_get_ticks_to_time_ratio().Inverse();
} else {
time_to_ticks = arm_generic_timer_compute_conversion_factors(cntfrq).Inverse();
}
constexpr uint64_t VECTORS[] = {
0,
1,
60 * 60 * 24,
60 * 60 * 24 * 365,
60 * 60 * 24 * (365 * 10 + 2),
60ULL * 60 * 24 * (365 * 100 + 2),
};
for (auto vec : VECTORS) {
uint64_t cntpct = time_to_ticks.Scale(vec);
constexpr uint32_t nanos_per_sec = ZX_SEC(1);
uint64_t expected_cntpct = ((uint64_t)cntfrq * vec + (nanos_per_sec / 2)) / nanos_per_sec;
if (!test_time_conversion_check_result(cntpct, expected_cntpct, 1)) {
printf("FAIL: zx_time_to_ticks(%" PRIu64 "): got %" PRIu64 ", expect %" PRIu64 "\n", vec,
cntpct, expected_cntpct);
ASSERT_TRUE(false);
}
}
END_TEST;
}
bool test_ticks_to_time(uint32_t cntfrq) {
BEGIN_TEST;
affine::Ratio ticks_to_time;
if (cntfrq == kCurTestFreq) {
uint64_t tps = ticks_per_second();
ASSERT_LE(tps, ktl::numeric_limits<uint32_t>::max());
cntfrq = static_cast<uint32_t>(tps);
ticks_to_time = timer_get_ticks_to_time_ratio();
} else {
ticks_to_time = arm_generic_timer_compute_conversion_factors(cntfrq);
}
constexpr uint64_t VECTORS[] = {
1,
60 * 60 * 24,
60 * 60 * 24 * 365,
60 * 60 * 24 * (365 * 10 + 2),
60ULL * 60 * 24 * (365 * 50 + 2),
};
for (auto vec : VECTORS) {
zx_time_t expected_zx_time = ZX_SEC(vec);
uint64_t cntpct = (uint64_t)cntfrq * vec;
zx_time_t zx_time = ticks_to_time.Scale(cntpct);
const uint64_t limit = (1000 * 1000 + cntfrq - 1) / cntfrq;
if (!test_time_conversion_check_result(zx_time, expected_zx_time, limit)) {
printf("ticks_to_zx_time(0x%" PRIx64 "): got 0x%" PRIx64 ", expect 0x%" PRIx64 "\n", cntpct,
static_cast<uint64_t>(zx_time), static_cast<uint64_t>(expected_zx_time));
ASSERT_TRUE(false);
}
}
END_TEST;
}
// Verify that the event stream will break CPUs out of WFE.
//
// Start one thread for each CPU that's online and active. Each thread will then disable
// interrupts and issue a series of WFEs. If the event stream is working as expected, each thread
// will eventually complete its series of WFEs and terminate. If the event stream is not working
// as expected, one or more threads will hang.
bool test_event_stream() {
BEGIN_TEST;
if (!gBootOptions->arm64_event_stream_enabled) {
printf("event stream disabled, skipping test\n");
END_TEST;
}
struct Args {
ktl::atomic<uint32_t> waiting{0};
};
auto func = [](void* args_) -> int {
auto* args = reinterpret_cast<Args*>(args_);
{
InterruptDisableGuard guard;
// Signal that we are ready.
args->waiting.fetch_sub(1);
// Wait until everyone else is ready.
while (args->waiting.load() > 0) {
}
// If the event stream is working, it (or something else) will break us out on each iteration.
for (int i = 0; i < 1000; ++i) {
// The SEVL sets the event flag for this CPU. The first WFE consumes the now set event
// flag. By setting then consuming, we can be sure the second WFE will actually wait for an
// event.
__asm__ volatile("sevl;wfe;wfe");
}
}
printf("cpu-%u done\n", arch_curr_cpu_num());
return 0;
};
Args args;
Thread* threads[SMP_MAX_CPUS]{};
// How many online+active CPUs do we have?
uint32_t num_cpus = ktl::popcount(mp_get_online_mask() & Scheduler::PeekActiveMask());
args.waiting.store(num_cpus);
// Create a thread bound to each online+active CPU, but don't start them just yet.
cpu_num_t last = 0;
for (cpu_num_t i = 0; i < percpu::processor_count(); ++i) {
if (mp_is_cpu_online(i) && Scheduler::PeekIsActive(i)) {
threads[i] = Thread::Create("test_event_stream", func, &args, DEFAULT_PRIORITY);
threads[i]->SetCpuAffinity(cpu_num_to_mask(i));
last = i;
}
}
// Because these threads have hard affinity and will disable interrupts we need to take care in
// how we start them. If we start one that's bound to our current CPU, we may get preempted
// deadlock. To avoid this, bind the current thread to the *last* online+active CPU.
const cpu_mask_t orig_mask = Thread::Current::Get()->GetCpuAffinity();
Thread::Current::Get()->SetCpuAffinity(cpu_num_to_mask(last));
auto restore_mask =
fit::defer([&orig_mask]() { Thread::Current::Get()->SetCpuAffinity(orig_mask); });
// Now that we're running on the last online+active CPU we can simply start them in order.
for (cpu_num_t i = 0; i < percpu::processor_count(); ++i) {
if (threads[i] != nullptr) {
threads[i]->Resume();
}
}
// Finally, wait for them to complete.
for (size_t i = 0; i < percpu::processor_count(); ++i) {
if (threads[i] != nullptr) {
threads[i]->Join(nullptr, ZX_TIME_INFINITE);
}
}
END_TEST;
}
} // namespace
UNITTEST_START_TESTCASE(arm_clock_tests)
UNITTEST("Time --> Ticks (min freq)", []() -> bool { return test_time_to_ticks(kMinTestFreq); })
UNITTEST("Time --> Ticks (max freq)", []() -> bool { return test_time_to_ticks(kMaxTestFreq); })
UNITTEST("Time --> Ticks (cur freq)", []() -> bool { return test_time_to_ticks(kCurTestFreq); })
UNITTEST("Ticks --> Time (min freq)", []() -> bool { return test_ticks_to_time(kMinTestFreq); })
UNITTEST("Ticks --> Time (max freq)", []() -> bool { return test_ticks_to_time(kMaxTestFreq); })
UNITTEST("Ticks --> Time (cur freq)", []() -> bool { return test_ticks_to_time(kCurTestFreq); })
UNITTEST("Event Stream", test_event_stream)
UNITTEST_END_TESTCASE(arm_clock_tests, "arm_clock", "Tests for ARM tick count and current time")