| // Copyright 2020 The Fuchsia Authors |
| // |
| // Use of this source code is governed by a MIT-style |
| // license that can be found in the LICENSE file or at |
| // https://opensource.org/licenses/MIT |
| |
| #include "arch/x86/pv.h" |
| |
| #include <lib/arch/intrin.h> |
| #include <zircon/types.h> |
| |
| #include <arch/ops.h> |
| #include <arch/x86.h> |
| #include <arch/x86/feature.h> |
| #include <arch/x86/platform_access.h> |
| #include <arch/x86/registers.h> |
| #include <ktl/atomic.h> |
| #include <vm/physmap.h> |
| #include <vm/pmm.h> |
| |
| #include <ktl/enforce.h> |
| |
| // Paravirtual functions, to execute some functions in a Hypervisor-specific way. |
| // The paravirtual optimizations in this file are implemented by kvm/qemu. |
| |
| static volatile pv_clock_boot_time* boot_time = nullptr; |
| static volatile pv_clock_system_time* system_time = nullptr; |
| |
| static constexpr uint64_t kSystemTimeEnable = 1u; |
| |
| zx_status_t pv_clock_init(void) { |
| if (boot_time != nullptr || system_time != nullptr) { |
| return ZX_ERR_BAD_STATE; |
| } |
| |
| paddr_t pa; |
| vm_page_t* page = nullptr; |
| zx_status_t status = pmm_alloc_page(0, &page, &pa); |
| if (status != ZX_OK) { |
| return status; |
| } |
| page->set_state(vm_page_state::WIRED); |
| arch_zero_page(paddr_to_physmap(pa)); |
| boot_time = static_cast<pv_clock_boot_time*>(paddr_to_physmap(pa)); |
| write_msr(kKvmBootTime, pa); |
| |
| status = pmm_alloc_page(0, &page, &pa); |
| if (status != ZX_OK) { |
| return status; |
| } |
| page->set_state(vm_page_state::WIRED); |
| arch_zero_page(paddr_to_physmap(pa)); |
| system_time = static_cast<pv_clock_system_time*>(paddr_to_physmap(pa)); |
| |
| // Note: We're setting up one, system-wide PV clock rather than per-CPU system |
| // clocks. This is OK because |
| // - the PV clock is only used if it's stable |
| // - we assume invariant TSC if the clock is stable |
| // - we don't read from the clock's tsc_timestamp; we use rdtsc directly |
| write_msr(kKvmSystemTimeMsr, pa | kSystemTimeEnable); |
| |
| return ZX_OK; |
| } |
| |
| void pv_clock_shutdown() { |
| DEBUG_ASSERT(arch_curr_cpu_num() == 0); |
| |
| // Tell our hypervisor to stop updating the clock. |
| write_msr(kKvmSystemTimeMsr, 0); |
| } |
| |
| bool pv_clock_is_stable() { |
| bool is_stable = (system_time->flags & kKvmSystemTimeStable) || |
| x86_feature_test(X86_FEATURE_KVM_PV_CLOCK_STABLE); |
| printf("pv_clock: Clocksource is %sstable\n", (is_stable ? "" : "not ")); |
| return is_stable; |
| } |
| |
| uint64_t pv_clock_get_tsc_freq() { |
| printf("pv_clock: Fetching TSC frequency\n"); |
| uint32_t tsc_mul = 0; |
| int8_t tsc_shift = 0; |
| uint32_t pre_version = 0, post_version = 0; |
| ktl::atomic_ref<volatile uint32_t> version(system_time->version); |
| do { |
| pre_version = version.load(); |
| if (pre_version % 2 != 0) { |
| arch::Yield(); |
| continue; |
| } |
| tsc_mul = system_time->tsc_mul; |
| tsc_shift = system_time->tsc_shift; |
| post_version = version.load(); |
| } while (pre_version != post_version); |
| |
| uint64_t tsc_khz = 1000000ULL << 32; |
| tsc_khz = tsc_khz / tsc_mul; |
| if (tsc_shift > 0) { |
| tsc_khz >>= tsc_shift; |
| } else { |
| tsc_khz <<= -tsc_shift; |
| } |
| return tsc_khz * 1000; |
| } |
| |
| int pv_ipi(uint64_t mask_low, uint64_t mask_high, uint64_t start_id, uint64_t icr) { |
| static constexpr uint32_t kPvIpiNum = 10; |
| |
| int ret; |
| switch (x86_vendor) { |
| case X86_VENDOR_INTEL: |
| __asm__ __volatile__("vmcall" |
| : "=a"(ret) |
| : "a"(kPvIpiNum), "b"(mask_low), "c"(mask_high), "d"(start_id), "S"(icr) |
| : "memory"); |
| break; |
| case X86_VENDOR_AMD: |
| __asm__ __volatile__("vmmcall" |
| : "=a"(ret) |
| : "a"(kPvIpiNum), "b"(mask_low), "c"(mask_high), "d"(start_id), "S"(icr) |
| : "memory"); |
| break; |
| default: |
| PANIC_UNIMPLEMENTED; |
| } |
| return ret; |
| } |
| |
| static PvEoi g_pv_eoi[SMP_MAX_CPUS]; |
| |
| void PvEoi::InitAll() { |
| for (PvEoi& pv : g_pv_eoi) { |
| pv.Init(); |
| } |
| } |
| |
| void PvEoi::Init() { |
| ZX_DEBUG_ASSERT(!arch_blocking_disallowed()); |
| ZX_DEBUG_ASSERT(!enabled_.load()); |
| ZX_DEBUG_ASSERT(state_paddr_ == 0); |
| |
| state_paddr_ = vaddr_to_paddr(&state_); |
| ZX_DEBUG_ASSERT(state_paddr_ != 0); |
| ZX_DEBUG_ASSERT(state_paddr_ % alignof(decltype(PvEoi::state_)) == 0); |
| } |
| |
| PvEoi* PvEoi::get() { return &g_pv_eoi[arch_curr_cpu_num()]; } |
| |
| void PvEoi::Enable(MsrAccess* msr) { |
| // It is critical that this method does not block as it may be called early during boot, prior to |
| // the calling CPU being marked active. |
| |
| ZX_DEBUG_ASSERT(!enabled_.load()); |
| ZX_DEBUG_ASSERT(state_paddr_ != 0); |
| |
| msr->write_msr(X86_MSR_KVM_PV_EOI_EN, state_paddr_ | X86_MSR_KVM_PV_EOI_EN_ENABLE); |
| enabled_.store(true, ktl::memory_order_release); |
| } |
| |
| void PvEoi::Disable(MsrAccess* msr) { |
| // It is critical that this method does not block as it may be called when the current CPU is |
| // being shutdown. |
| |
| // Mark as disabled before writing to the MSR; otherwise an interrupt appearing in the window |
| // between the two could fail to EOI via the legacy mechanism. |
| enabled_.store(false, ktl::memory_order_release); |
| msr->write_msr(X86_MSR_KVM_PV_EOI_EN, 0); |
| } |
| |
| bool PvEoi::Eoi() { |
| if (!enabled_.load(ktl::memory_order_relaxed)) { |
| return false; |
| } |
| |
| uint64_t old_val = state_.exchange(0); |
| return old_val != 0; |
| } |
| |
| PvEoi::~PvEoi() { ZX_DEBUG_ASSERT(!enabled_.load()); } |