blob: 4fdf6b05e100a5f9f237fee46fd73d75f9f90645 [file] [log] [blame]
// Copyright 2016 The Fuchsia Authors
//
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file or at
// https://opensource.org/licenses/MIT
/****************************************************************************
* This file handles detection of supported extended register saving
* mechanisms. Of the ones detected, the following is our preference for
* mechanisms, from best to worst:
*
* 1) XSAVES (performs modified+init optimizations, uses compressed register
* form, and can save supervisor-only registers)
* 2) XSAVEOPT (performs modified+init optimizations)
* 3) XSAVE (no optimizations/compression, but can save all supported extended
* registers)
* 4) FXSAVE (can only save FPU/SSE registers)
* 5) none (will not save any extended registers, will not allow enabling
* features that use extended registers.)
****************************************************************************/
#include <arch/ops.h>
#include <arch/x86.h>
#include <arch/x86/mp.h>
#include <arch/x86/feature.h>
#include <arch/x86/proc_trace.h>
#include <arch/x86/registers.h>
#include <inttypes.h>
#include <zircon/compiler.h>
#include <kernel/spinlock.h>
#include <kernel/thread.h>
#include <fbl/auto_call.h>
#include <string.h>
#include <trace.h>
#define LOCAL_TRACE 0
#define IA32_XSS_MSR 0xDA0
/* offset in xsave area that components >= 2 start at */
#define XSAVE_EXTENDED_AREA_OFFSET 576
/* bits 2 through 62 of state vector can optionally be set */
#define XSAVE_MAX_EXT_COMPONENTS 61
#define XSAVE_XCOMP_BV_COMPACT (1ULL<<63)
#define XSAVE_STATE_PT_BIT 8
#define XSAVE_STATE_MAX_BIT 62
static void fxsave(void *register_state);
static void fxrstor(void *register_state);
static void xrstor(void *register_state, uint64_t feature_mask);
static void xrstors(void *register_state, uint64_t feature_mask);
static void xsave(void *register_state, uint64_t feature_mask);
static void xsaveopt(void *register_state, uint64_t feature_mask);
static void xsaves(void *register_state, uint64_t feature_mask);
static void read_xsave_state_info(void);
static void recompute_state_size(void);
static struct {
/* Total size of this component in bytes */
uint32_t size;
/* If true, this component must be aligned to a 64-byte boundary */
bool align64;
} state_components[XSAVE_MAX_EXT_COMPONENTS];
/* Supported bits in XCR0 (each corresponds to a state component) */
static uint64_t xcr0_component_bitmap = 0;
/* Supported bits in IA32_XSS (each corresponds to a state component) */
static uint64_t xss_component_bitmap = 0;
/* Maximum total size for xsave, if all features are enabled */
static size_t xsave_max_area_size = 0;
/* Does this processor support the XSAVES instruction */
static bool xsaves_supported = false;
/* Does this processor support the XSAVEOPT instruction */
static bool xsaveopt_supported = false;
/* Does this processor support the XGETBV instruction with ecx=1 */
static bool xgetbv_1_supported = false;
/* Does this processor support the XSAVE instruction */
static bool xsave_supported = false;
/* Does this processor support FXSAVE */
static bool fxsave_supported = false;
/* Maximum register state size */
static size_t register_state_size = 0;
/* Spinlock to guard register state size changes */
static spin_lock_t state_lock = SPIN_LOCK_INITIAL_VALUE;
/* For FXRSTOR, we need 512 bytes to save the state. For XSAVE-based
* mechanisms, we only need 512 + 64 bytes for the initial state, since
* our initial state only needs to specify some SSE state (masking exceptions),
* and XSAVE doesn't require space for any disabled register groups after
* the last enabled one. */
static uint8_t __ALIGNED(64)
extended_register_init_state[512 + 64] = {0};
/* Format described in Intel 3A section 13.4 */
struct xsave_area {
/* legacy region */
uint8_t legacy_region_0[24];
uint32_t mxcsr;
uint8_t legacy_region_1[484];
/* xsave_header */
uint64_t xstate_bv;
uint64_t xcomp_bv;
uint8_t reserved[48];
uint8_t extended_region[];
} __PACKED;
static void x86_extended_register_cpu_init(void)
{
if (likely(xsave_supported)) {
ulong cr4 = x86_get_cr4();
/* Enable XSAVE feature set */
x86_set_cr4(cr4 | X86_CR4_OSXSAVE);
/* Put xcr0 into a known state (X87 must be enabled in this register) */
x86_xsetbv(0, X86_XSAVE_STATE_X87);
}
/* Enable the FPU */
__UNUSED bool enabled = x86_extended_register_enable_feature(
X86_EXTENDED_REGISTER_X87);
DEBUG_ASSERT(enabled);
}
/* Figure out what forms of register saving this machine supports and
* select the best one */
void x86_extended_register_init(void)
{
/* Have we already read the cpu support info */
static bool info_initialized = false;
bool initialized_cpu_already = false;
if (!info_initialized) {
DEBUG_ASSERT(arch_curr_cpu_num() == 0);
read_xsave_state_info();
info_initialized = true;
/* We currently assume that if xsave isn't support fxsave is */
fxsave_supported = x86_feature_test(X86_FEATURE_FXSR);
/* Set up initial states */
if (likely(fxsave_supported || xsave_supported)) {
x86_extended_register_cpu_init();
initialized_cpu_already = true;
/* Intel Vol 3 section 13.5.4 describes the XSAVE initialization. */
if (xsave_supported) {
/* The only change we want to make to the init state is having
* SIMD exceptions masked */
struct xsave_area *area =
(struct xsave_area *)extended_register_init_state;
area->xstate_bv |= X86_XSAVE_STATE_SSE;
area->mxcsr = 0x3f << 7;
/* If xsaves is being used, then make the saved state be in
* compact form. xrstors will GPF if it is not. */
if (xsaves_supported) {
area->xcomp_bv |= XSAVE_XCOMP_BV_COMPACT;
area->xcomp_bv |= area->xstate_bv;
}
} else {
fxsave(&extended_register_init_state);
}
}
if (likely(xsave_supported)) {
recompute_state_size();
} else if (fxsave_supported) {
register_state_size = 512;
}
}
/* Ensure that xsaves_supported == true implies xsave_supported == true */
DEBUG_ASSERT(!xsaves_supported || xsave_supported);
/* Ensure that xsaveopt_supported == true implies xsave_supported == true */
DEBUG_ASSERT(!xsaveopt_supported || xsave_supported);
if (!initialized_cpu_already) {
x86_extended_register_cpu_init();
}
}
bool x86_extended_register_enable_feature(
enum x86_extended_register_feature feature)
{
/* We currently assume this is only called during initialization.
* We rely on interrupts being disabled so xgetbv/xsetbv will not be
* racey */
DEBUG_ASSERT(arch_ints_disabled());
switch (feature) {
case X86_EXTENDED_REGISTER_X87: {
if (unlikely(!x86_feature_test(X86_FEATURE_FPU) ||
(!fxsave_supported && !xsave_supported))) {
return false;
}
/* No x87 emul, monitor co-processor */
ulong cr0 = x86_get_cr0();
cr0 &= ~X86_CR0_EM;
cr0 |= X86_CR0_NE;
cr0 |= X86_CR0_MP;
x86_set_cr0(cr0);
/* Init x87, starts with exceptions masked */
__asm__ __volatile__ ("finit" : : : "memory");
if (likely(xsave_supported)) {
x86_xsetbv(0, x86_xgetbv(0) | X86_XSAVE_STATE_X87);
}
break;
}
case X86_EXTENDED_REGISTER_SSE: {
if (unlikely(
!x86_feature_test(X86_FEATURE_SSE) ||
!x86_feature_test(X86_FEATURE_FXSR))) {
return false;
}
/* Init SSE */
ulong cr4 = x86_get_cr4();
cr4 |= X86_CR4_OSXMMEXPT;
cr4 |= X86_CR4_OSFXSR;
x86_set_cr4(cr4);
/* mask all exceptions */
uint32_t mxcsr = 0;
__asm__ __volatile__("stmxcsr %0" : "=m" (mxcsr));
mxcsr = (0x3f << 7);
__asm__ __volatile__("ldmxcsr %0" : : "m" (mxcsr));
if (likely(xsave_supported)) {
x86_xsetbv(0, x86_xgetbv(0) | X86_XSAVE_STATE_SSE);
}
break;
}
case X86_EXTENDED_REGISTER_AVX: {
if (!xsave_supported ||
!(xcr0_component_bitmap & X86_XSAVE_STATE_AVX)) {
return false;
}
/* Enable SIMD exceptions */
ulong cr4 = x86_get_cr4();
cr4 |= X86_CR4_OSXMMEXPT;
x86_set_cr4(cr4);
x86_xsetbv(0, x86_xgetbv(0) | X86_XSAVE_STATE_AVX);
break;
}
case X86_EXTENDED_REGISTER_MPX: {
/* Currently unsupported */
return false;
}
case X86_EXTENDED_REGISTER_AVX512: {
const uint64_t xsave_avx512 =
X86_XSAVE_STATE_AVX512_OPMASK |
X86_XSAVE_STATE_AVX512_LOWERZMM_HIGH |
X86_XSAVE_STATE_AVX512_HIGHERZMM;
if (!xsave_supported ||
(xcr0_component_bitmap & xsave_avx512) != xsave_avx512) {
return false;
}
x86_xsetbv(0, x86_xgetbv(0) | xsave_avx512);
break;
}
case X86_EXTENDED_REGISTER_PT: {
if (!xsaves_supported ||
!(xss_component_bitmap & X86_XSAVE_STATE_PT)) {
return false;
}
x86_set_extended_register_pt_state(true);
break;
}
case X86_EXTENDED_REGISTER_PKRU: {
/* Currently unsupported */
return false;
}
default:
return false;
}
recompute_state_size();
return true;
}
size_t x86_extended_register_size(void) {
return register_state_size;
}
void x86_extended_register_init_state(void *register_state)
{
// Copy the initialization state; this overcopies on systems that fall back
// to fxsave, but the buffer is required to be large enough.
memcpy(register_state, extended_register_init_state, sizeof(extended_register_init_state));
}
void x86_extended_register_save_state(void *register_state)
{
/* The idle threads have no extended register state */
if (unlikely(!register_state)) {
return;
}
if (xsaves_supported) {
xsaves(register_state, ~0ULL);
} else if (xsaveopt_supported) {
xsaveopt(register_state, ~0ULL);
} else if (xsave_supported) {
xsave(register_state, ~0ULL);
} else if (fxsave_supported) {
fxsave(register_state);
}
}
void x86_extended_register_restore_state(void *register_state)
{
/* The idle threads have no extended register state */
if (unlikely(!register_state)) {
return;
}
if (xsaves_supported) {
xrstors(register_state, ~0ULL);
} else if (xsave_supported) {
xrstor(register_state, ~0ULL);
} else if (fxsave_supported) {
fxrstor(register_state);
}
}
void x86_extended_register_context_switch(
thread_t *old_thread, thread_t *new_thread)
{
if (likely(old_thread)) {
x86_extended_register_save_state(old_thread->arch.extended_register_state);
}
x86_extended_register_restore_state(new_thread->arch.extended_register_state);
}
static void read_xsave_state_info(void)
{
xsave_supported = x86_feature_test(X86_FEATURE_XSAVE);
if (!xsave_supported) {
LTRACEF("xsave not supported\n");
return;
}
/* if we bail, set everything to unsupported */
auto ac = fbl::MakeAutoCall([]() {
xsave_supported = false;
xsaves_supported = false;
xsaveopt_supported = false;
});
/* This procedure is described in Intel Vol 1 section 13.2 */
/* Read feature support from subleaves 0 and 1 */
struct cpuid_leaf leaf;
if (!x86_get_cpuid_subleaf(X86_CPUID_XSAVE, 0, &leaf)) {
LTRACEF("could not find xsave leaf\n");
return;
}
xcr0_component_bitmap = ((uint64_t)leaf.d << 32) | leaf.a;
size_t max_area = XSAVE_EXTENDED_AREA_OFFSET;
x86_get_cpuid_subleaf(X86_CPUID_XSAVE, 1, &leaf);
xgetbv_1_supported = !!(leaf.a & (1<<2));
xsaves_supported = !!(leaf.a & (1<<3));
xsaveopt_supported = !!(leaf.a & (1<<0));
xss_component_bitmap = ((uint64_t)leaf.d << 32) | leaf.c;
LTRACEF("xcr0 bitmap: %016" PRIx64 "\n", xcr0_component_bitmap);
LTRACEF("xss bitmap: %016" PRIx64 "\n", xss_component_bitmap);
/* Sanity check; all CPUs that support xsave support components 0 and 1 */
DEBUG_ASSERT((xcr0_component_bitmap & 0x3) == 0x3);
if ((xcr0_component_bitmap & 0x3) != 0x3) {
LTRACEF("unexpected xcr0 bitmap %016" PRIx64 "\n",
xcr0_component_bitmap);
return;
}
/* we're okay from now on out */
ac.cancel();
/* Read info about the state components */
for (uint i = 0; i < XSAVE_MAX_EXT_COMPONENTS; ++i) {
uint idx = i + 2;
if (!(xcr0_component_bitmap & (1ULL << idx)) &&
!(xss_component_bitmap & (1ULL << idx))) {
continue;
}
x86_get_cpuid_subleaf(X86_CPUID_XSAVE, idx, &leaf);
bool align64 = !!(leaf.c & 0x2);
state_components[i].size = leaf.a;
state_components[i].align64 = align64;
LTRACEF("component %u size: %u (xcr0 %d)\n",
idx, state_components[i].size,
!!(xcr0_component_bitmap & (1ULL << idx)));
if (align64) {
max_area = ROUNDUP(max_area, 64);
}
max_area += leaf.a;
}
xsave_max_area_size = max_area;
LTRACEF("total xsave size: %zu\n", max_area);
return;
}
static void recompute_state_size(void) {
if (!xsave_supported) {
return;
}
size_t new_size = 0;
/* If we're in a compacted form, compute the total size. The algorithm
* for this is defined in Intel Vol 1 section 13.4.3 */
if (xsaves_supported) {
new_size = XSAVE_EXTENDED_AREA_OFFSET;
uint64_t enabled_features = x86_xgetbv(0) | read_msr(IA32_XSS_MSR);
for (uint i = 0; i < XSAVE_MAX_EXT_COMPONENTS; ++i) {
uint idx = i + 2;
if (!(enabled_features & (1ULL << idx))) {
continue;
}
if (state_components[i].align64) {
new_size = ROUNDUP(new_size, 64);
}
new_size += state_components[i].size;
}
} else {
/* Otherwise, use CPUID.(EAX=0xD,ECX=1):EBX, which stores the computed
* maximum size required for saving everything specified in XCR0 */
struct cpuid_leaf leaf;
x86_get_cpuid_subleaf(X86_CPUID_XSAVE, 0, &leaf);
new_size = leaf.b;
}
spin_lock(&state_lock);
/* Only allow size to increase; all CPUs should converge to the same value,
* but for sanity let's keep it monotonically increasing */
if (new_size > register_state_size) {
register_state_size = new_size;
DEBUG_ASSERT(register_state_size <= X86_MAX_EXTENDED_REGISTER_SIZE);
}
spin_unlock(&state_lock);
}
static void fxsave(void *register_state)
{
__asm__ __volatile__("fxsave %0"
: "=m" (*(uint8_t *)register_state)
:
: "memory");
}
static void fxrstor(void *register_state)
{
__asm__ __volatile__("fxrstor %0"
:
: "m" (*(uint8_t *)register_state)
: "memory");
}
static void xrstor(void *register_state, uint64_t feature_mask)
{
__asm__ volatile("xrstor %0"
:
: "m"(*(uint8_t *)register_state),
"d"((uint32_t)(feature_mask >> 32)),
"a"((uint32_t)feature_mask)
: "memory");
}
static void xrstors(void *register_state, uint64_t feature_mask)
{
__asm__ volatile("xrstors %0"
:
: "m"(*(uint8_t *)register_state),
"d"((uint32_t)(feature_mask >> 32)),
"a"((uint32_t)feature_mask)
: "memory");
}
static void xsave(void *register_state, uint64_t feature_mask)
{
__asm__ volatile("xsave %0"
: "+m"(*(uint8_t *)register_state)
: "d"((uint32_t)(feature_mask >> 32)),
"a"((uint32_t)feature_mask)
: "memory");
}
static void xsaveopt(void *register_state, uint64_t feature_mask)
{
__asm__ volatile("xsaveopt %0"
: "+m"(*(uint8_t *)register_state)
: "d"((uint32_t)(feature_mask >> 32)),
"a"((uint32_t)feature_mask)
: "memory");
}
static void xsaves(void *register_state, uint64_t feature_mask)
{
__asm__ volatile("xsaves %0"
: "+m"(*(uint8_t *)register_state)
: "d"((uint32_t)(feature_mask >> 32)),
"a"((uint32_t)feature_mask)
: "memory");
}
uint64_t x86_xgetbv(uint32_t reg)
{
uint32_t hi, lo;
__asm__ volatile("xgetbv"
: "=d" (hi), "=a" (lo)
: "c"(reg)
: "memory");
return ((uint64_t)hi << 32) + lo;
}
void x86_xsetbv(uint32_t reg, uint64_t val)
{
__asm__ volatile("xsetbv"
:
: "c"(reg), "d"((uint32_t)(val >> 32)), "a"((uint32_t)val)
: "memory");
}
// Set the extended register PT mode to trace either cpus (!threads)
// or threads.
// WARNING: All PT MSRs should be set to init values before changing the mode.
// See x86_ipt_set_mode_task.
void x86_set_extended_register_pt_state(bool threads) {
if (!xsaves_supported || !(xss_component_bitmap & X86_XSAVE_STATE_PT))
return;
uint64_t xss = read_msr(IA32_XSS_MSR);
if (threads)
xss |= X86_XSAVE_STATE_PT;
else
xss &= ~(0ULL + X86_XSAVE_STATE_PT);
write_msr(IA32_XSS_MSR, xss);
}