zircon/kernel/arch/x86/feature.cc - fuchsia - Git at Google

 // Copyright 2016 The Fuchsia Authors
 //
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file or at
 // https://opensource.org/licenses/MIT
 #include "arch/x86/feature.h"

 #include <assert.h>
 #include <bits.h>
 #include <lib/arch/x86/boot-cpuid.h>
 #include <lib/arch/x86/bug.h>
 #include <lib/arch/x86/cache.h>
 #include <lib/arch/x86/extension.h>
 #include <lib/arch/x86/feature.h>
 #include <lib/arch/x86/power.h>
 #include <lib/arch/x86/speculation.h>
 #include <lib/boot-options/boot-options.h>
 #include <lib/system-topology.h>
 #include <stdint.h>
 #include <string.h>
 #include <trace.h>

 #include <arch/ops.h>
 #include <arch/x86/cpuid.h>
 #include <arch/x86/hwp.h>
 #include <arch/x86/mmu.h>
 #include <arch/x86/platform_access.h>
 #include <arch/x86/pv.h>
 #include <fbl/algorithm.h>
 #include <hwreg/x86msr.h>
 #include <ktl/atomic.h>
 #include <ktl/iterator.h>
 #include <platform/pc/bootbyte.h>

 #include <ktl/enforce.h>

 #define LOCAL_TRACE 0

 struct cpuid_leaf _cpuid[MAX_SUPPORTED_CPUID + 1];
 struct cpuid_leaf _cpuid_hyp[MAX_SUPPORTED_CPUID_HYP - X86_CPUID_HYP_BASE + 1];
 struct cpuid_leaf _cpuid_ext[MAX_SUPPORTED_CPUID_EXT - X86_CPUID_EXT_BASE + 1];
 uint32_t max_cpuid = 0;
 uint32_t max_hyp_cpuid = 0;
 uint32_t max_ext_cpuid = 0;

 enum x86_vendor_list x86_vendor;
 const x86_microarch_config_t* x86_microarch_config;

 static struct x86_model_info model_info;

 bool g_x86_feature_fsgsbase;
 bool g_x86_feature_invpcid;
 bool g_x86_feature_pcid_enabled;
 bool g_x86_feature_has_smap;

 bool g_has_meltdown;
 bool g_has_l1tf;
 bool g_l1d_flush_on_vmentry;
 bool g_has_mds_taa;
 bool g_has_swapgs_bug;
 bool g_has_ssb;
 bool g_has_ssbd;
 bool g_ssb_mitigated;
 bool g_has_md_clear;
 bool g_md_clear_on_user_return;
 bool g_has_spec_ctrl;
 bool g_has_ibpb;
 bool g_should_ibpb_on_ctxt_switch;
 bool g_ras_fill_on_ctxt_switch;
 bool g_cpu_vulnerable_to_rsb_underflow;
 bool g_cpu_vulnerable_to_rsb_cross_thread;
 bool g_has_enhanced_ibrs;
 bool g_has_retbleed;
 bool g_stibp_enabled;

 enum x86_hypervisor_list x86_hypervisor;
 bool g_hypervisor_has_pv_clock;
 bool g_hypervisor_has_pv_eoi;
 bool g_hypervisor_has_pv_ipi;

 static ktl::atomic<bool> g_cpuid_initialized;

 static enum x86_hypervisor_list get_hypervisor();

 void x86_feature_early_init_percpu(void) {
   if (g_cpuid_initialized.exchange(true)) {
     return;
   }
   /* test for cpuid count */
   cpuid(0, &_cpuid[0].a, &_cpuid[0].b, &_cpuid[0].c, &_cpuid[0].d);

   max_cpuid = _cpuid[0].a;
   if (max_cpuid > MAX_SUPPORTED_CPUID)
     max_cpuid = MAX_SUPPORTED_CPUID;

   LTRACEF("max cpuid 0x%x\n", max_cpuid);

   /* figure out the vendor */
   union {
     uint32_t vendor_id[3];
     char vendor_string[12];
   } vu;
   vu.vendor_id[0] = _cpuid[0].b;
   vu.vendor_id[1] = _cpuid[0].d;
   vu.vendor_id[2] = _cpuid[0].c;
   if (!memcmp(vu.vendor_string, "GenuineIntel", sizeof(vu.vendor_string))) {
     x86_vendor = X86_VENDOR_INTEL;
   } else if (!memcmp(vu.vendor_string, "AuthenticAMD", sizeof(vu.vendor_string))) {
     x86_vendor = X86_VENDOR_AMD;
   } else {
     x86_vendor = X86_VENDOR_UNKNOWN;
   }

   /* read in the base cpuids */
   for (uint32_t i = 1; i <= max_cpuid; i++) {
     cpuid_c(i, 0, &_cpuid[i].a, &_cpuid[i].b, &_cpuid[i].c, &_cpuid[i].d);
   }

   /* test for extended cpuid count */
   cpuid(X86_CPUID_EXT_BASE, &_cpuid_ext[0].a, &_cpuid_ext[0].b, &_cpuid_ext[0].c, &_cpuid_ext[0].d);

   max_ext_cpuid = _cpuid_ext[0].a;
   LTRACEF("max extended cpuid 0x%x\n", max_ext_cpuid);
   if (max_ext_cpuid > MAX_SUPPORTED_CPUID_EXT)
     max_ext_cpuid = MAX_SUPPORTED_CPUID_EXT;

   /* read in the extended cpuids */
   for (uint32_t i = X86_CPUID_EXT_BASE + 1; i - 1 < max_ext_cpuid; i++) {
     uint32_t index = i - X86_CPUID_EXT_BASE;
     cpuid_c(i, 0, &_cpuid_ext[index].a, &_cpuid_ext[index].b, &_cpuid_ext[index].c,
             &_cpuid_ext[index].d);
   }

   /* read in the hypervisor cpuids. the maximum leaf is reported at X86_CPUID_HYP_BASE. */
   cpuid(X86_CPUID_HYP_VENDOR, &_cpuid_ext[0].a, &_cpuid_ext[0].b, &_cpuid_ext[0].c,
         &_cpuid_ext[0].d);
   max_hyp_cpuid = _cpuid_ext[0].a;
   if (max_hyp_cpuid > MAX_SUPPORTED_CPUID_HYP)
     max_hyp_cpuid = MAX_SUPPORTED_CPUID_HYP;
   for (uint32_t i = X86_CPUID_HYP_BASE; i <= max_hyp_cpuid; i++) {
     uint32_t index = i - X86_CPUID_HYP_BASE;
     cpuid(i, &_cpuid_hyp[index].a, &_cpuid_hyp[index].b, &_cpuid_hyp[index].c,
           &_cpuid_hyp[index].d);
   }

   /* populate the model info */
   const struct cpuid_leaf* leaf = x86_get_cpuid_leaf(X86_CPUID_MODEL_FEATURES);
   if (leaf) {
     model_info.processor_type = (uint8_t)BITS_SHIFT(leaf->a, 13, 12);
     model_info.family = (uint8_t)BITS_SHIFT(leaf->a, 11, 8);
     model_info.model = (uint8_t)BITS_SHIFT(leaf->a, 7, 4);
     model_info.stepping = (uint8_t)BITS_SHIFT(leaf->a, 3, 0);
     model_info.display_family = model_info.family;
     model_info.display_model = model_info.model;

     if (model_info.family == 0xf) {
       model_info.display_family += BITS_SHIFT(leaf->a, 27, 20);
     }

     if (model_info.family == 0xf || model_info.family == 0x6) {
       model_info.display_model += BITS_SHIFT(leaf->a, 19, 16) << 4;
     }
   }

   cpu_id::CpuId cpuid;
   x86_microarch_config = get_microarch_config(&cpuid);
   x86_hypervisor = get_hypervisor();
   g_hypervisor_has_pv_clock =
       x86_hypervisor == X86_HYPERVISOR_KVM && x86_feature_test(X86_FEATURE_KVM_PV_CLOCK);
   g_hypervisor_has_pv_eoi =
       x86_hypervisor == X86_HYPERVISOR_KVM && x86_feature_test(X86_FEATURE_KVM_PV_EOI);
   g_hypervisor_has_pv_ipi =
       x86_hypervisor == X86_HYPERVISOR_KVM && x86_feature_test(X86_FEATURE_KVM_PV_IPI);
   g_x86_feature_has_smap = x86_feature_test(X86_FEATURE_SMAP);
   g_x86_feature_fsgsbase = x86_feature_test(X86_FEATURE_FSGSBASE);
   g_x86_feature_invpcid = x86_feature_test(X86_FEATURE_INVPCID);
 }

 // Invoked on the boot CPU during boot, after platform is available.
 void x86_cpu_feature_init() {
   DEBUG_ASSERT(arch_curr_cpu_num() == 0);

   arch::BootCpuidIo cpuid;
   hwreg::X86MsrIo msr;

   // TODO(61093): Replace with newer lib/arch and hwreg counterparts.
   cpu_id::CpuId cpuid_old;
   MsrAccess msr_old;

   // Get microcode patch level
   switch (x86_vendor) {
     case X86_VENDOR_INTEL:
       model_info.patch_level = x86_intel_get_patch_level();
       break;
     case X86_VENDOR_AMD:
       model_info.patch_level = x86_amd_get_patch_level();
       break;
     default:
       break;
   }

   g_has_swapgs_bug = arch::HasX86SwapgsBug(cpuid);

   // If mitigations are enabled, try to disable TSX. Disabling TSX prevents exploiting
   // TAA/CacheOut attacks and potential future exploits. It also avoids MD_CLEAR on CPUs
   // without MDS.
   //
   // WARNING: If we disable TSX, we must do so before we determine whether we are affected by
   // TAA/Cacheout; otherwise the TAA/Cacheout determination code will run before the TSX
   // CPUID bit is masked.
   if (!gBootOptions->x86_disable_spec_mitigations && arch::DisableTsx(cpuid, msr)) {
     // If successful, repopulate the boot CPU's CPUID cache in order to reflect
     // the disabling.
     arch::InitializeBootCpuid();
   }

   g_has_md_clear = cpuid.Read<arch::CpuidExtendedFeatureFlagsD>().md_clear();
   g_has_mds_taa = arch::HasX86MdsTaaBugs(cpuid, msr);
   g_md_clear_on_user_return = !gBootOptions->x86_disable_spec_mitigations && g_has_mds_taa &&
                               g_has_md_clear && gBootOptions->x86_md_clear_on_user_return;
   g_has_spec_ctrl = arch::SpeculationControlMsr::IsSupported(cpuid);
   g_has_ssb = arch::HasX86SsbBug(cpuid, msr);
   g_has_ssbd = arch::CanMitigateX86SsbBug(cpuid);
   g_ssb_mitigated = !gBootOptions->x86_disable_spec_mitigations && g_has_ssb && g_has_ssbd &&
                     gBootOptions->x86_spec_store_bypass_disable;
   g_has_ibpb = arch::HasIbpb(cpuid);
   g_has_enhanced_ibrs = arch::HasIbrs(cpuid, msr, /*always_on_mode=*/true);
   g_has_meltdown = arch::HasX86MeltdownBug(cpuid, msr);
   g_has_l1tf = arch::HasX86L1tfBug(cpuid, msr);
   g_l1d_flush_on_vmentry = !gBootOptions->x86_disable_spec_mitigations && g_has_l1tf &&
                            arch::BootCpuid<arch::CpuidExtendedFeatureFlagsD>().l1d_flush();
   g_ras_fill_on_ctxt_switch = !gBootOptions->x86_disable_spec_mitigations;
   g_cpu_vulnerable_to_rsb_underflow = !gBootOptions->x86_disable_spec_mitigations &&
                                       (x86_vendor == X86_VENDOR_INTEL) &&
                                       x86_intel_cpu_has_rsb_fallback(&cpuid_old, &msr_old);
   // TODO(https://fxbug.dev/42108888, https://fxbug.dev/42072538): Consider whether a process can
   // opt-out of an IBPB on switch, either on switch-in (ex: its compiled with a retpoline) or
   // switch-out (ex: it promises not to attack the next process).
   // TODO(https://fxbug.dev/42108888, https://fxbug.dev/42072538): Should we have an individual knob
   // for IBPB?
   g_should_ibpb_on_ctxt_switch = !gBootOptions->x86_disable_spec_mitigations && g_has_ibpb;

   switch (x86_vendor) {
     case X86_VENDOR_INTEL:
       g_has_retbleed = false;  // TODO: Enumerate Intel CPUs affected by RETBLEED.
       break;
     case X86_VENDOR_AMD:
       g_has_retbleed = x86_amd_has_retbleed();
       break;
     case X86_VENDOR_UNKNOWN:
       break;
   }
   g_x86_feature_pcid_enabled =
       x86_feature_test(X86_FEATURE_PCID) && g_x86_feature_invpcid && gBootOptions->x86_enable_pcid;
 }

 // Invoked on each CPU during boot, after platform init has taken place.
 void x86_cpu_feature_late_init_percpu(void) {
   const bool on_boot_cpu = arch_curr_cpu_num() == 0;
   const system_topology::Graph& topology = system_topology::Graph::GetSystemTopology();
   const bool ht_disabled = (topology.processor_count() == topology.logical_processor_count()) ||
                            !gBootOptions->smp_ht_enabled;

   arch::BootCpuidIo cpuid;
   hwreg::X86MsrIo msr;

   // Same reasoning as was done in x86_cpu_feature_init() for the boot CPU.
   if (!gBootOptions->x86_disable_spec_mitigations && !on_boot_cpu) {
     arch::DisableTsx(cpuid, msr);
   }

   // Spectre v2 hardware-related mitigations; retpolines may further be used,
   // which is taken care of by the code-patching engine.
   bool stibp_enabled = false;
   if (!gBootOptions->x86_disable_spec_mitigations) {
     auto spectre_v2_mitigation = arch::GetPreferredSpectreV2Mitigation(cpuid, msr);
     if ((spectre_v2_mitigation == arch::SpectreV2Mitigation::kIbpbRetpolineStibp) && ht_disabled) {
       spectre_v2_mitigation = arch::SpectreV2Mitigation::kIbpbRetpoline;
     }
     switch (spectre_v2_mitigation) {
       case arch::SpectreV2Mitigation::kIbrs:  // Enhanced IBRS
         arch::EnableIbrs(cpuid, msr);
         break;
       case arch::SpectreV2Mitigation::kIbpbRetpoline:
         break;
       case arch::SpectreV2Mitigation::kIbpbRetpolineStibp:
         // Enable STIPB for added cross-hyperthread security.
         stibp_enabled = true;
         arch::EnableStibp(cpuid, msr);
         break;
     }
   }
   // AMD-SB-1045: On certain processors (Zen, Zen2), when a thread halts, it's Return Stack Buffer
   // entries become available to the other hyperthread; the contents of the entries aren't
   // cleared, however. On these processors, clear the return stack buffer before entering MWAIT or
   // halt to prevent the hyperbuddy from consuming our (stale) RSB entries.
   if (!gBootOptions->x86_disable_spec_mitigations) {
     if (x86_vendor == X86_VENDOR_AMD && model_info.display_family == 0x17 && !ht_disabled) {
       g_cpu_vulnerable_to_rsb_cross_thread = true;
     }
   }

   // RETbleed mitigations
   // Some RETbleed mitigations may overlap with Spectre V2 mitigations.
   if (!gBootOptions->x86_disable_spec_mitigations && g_has_retbleed) {
     if (x86_vendor == X86_VENDOR_AMD) {
       if (arch::HasStibp(cpuid, false) && !stibp_enabled && !ht_disabled) {
         stibp_enabled = true;
         arch::EnableStibp(cpuid, msr);
       }
       x86_amd_zen2_retbleed_mitigation(model_info);
     }
     if (x86_vendor == X86_VENDOR_INTEL) {
       // TODO: Mitigate RETbleed on Intel processors.
     }
   }

   g_stibp_enabled |= stibp_enabled;

   // Mitigate Spectre v4 (Speculative Store Bypass) if requested.
   if (x86_cpu_should_mitigate_ssb()) {
     if (!arch::MitigateX86SsbBug(cpuid, msr)) {
       printf("failed to mitigate SSB (Speculative Store Bypass) vulnerability\n");
     }
   }

   // Enable/disable Turbo on the processor.
   if (arch::SetX86CpuTurboState(cpuid, msr, gBootOptions->x86_turbo)) {
     // Since IA32_MISC_ENABLE may be updated and leaf 0x6 references the
     // former's state, repopulate the boot CPUID cache.
     if (on_boot_cpu) {
       arch::InitializeBootCpuid();
       printf("Turbo performance boost: %s\n", gBootOptions->x86_turbo ? "enabled" : "disabled");
     }
   } else if (on_boot_cpu) {
     printf("Turbo performance boost: unsupported\n");
   }

   // TODO(https://fxbug.dev/42139346): Replace with newer lib/arch and hwreg counterparts.
   cpu_id::CpuId cpuid_old;
   MsrAccess msr_old;

   // Set up hardware-controlled performance states.
   if (gBootOptions->x86_hwp) {
     x86::IntelHwpInit(&cpuid_old, &msr_old, gBootOptions->x86_hwp_policy);
   }

   // If we are running under a hypervisor and paravirtual EOI (PV_EOI) is available, enable it.
   if (x86_hypervisor_has_pv_eoi()) {
     PvEoi::get()->Enable(&msr_old);
   }
 }

 static enum x86_hypervisor_list get_hypervisor() {
   if (!x86_feature_test(X86_FEATURE_HYPERVISOR)) {
     return X86_HYPERVISOR_NONE;
   }
   uint32_t a, b, c, d;
   cpuid(X86_CPUID_HYP_VENDOR, &a, &b, &c, &d);
   union {
     uint32_t vendor_id[3];
     char vendor_string[12];
   } vu;
   vu.vendor_id[0] = b;
   vu.vendor_id[1] = c;
   vu.vendor_id[2] = d;
   if (a >= X86_CPUID_KVM_FEATURES &&
       !memcmp(vu.vendor_string, "KVMKVMKVM\0\0\0", sizeof(vu.vendor_string))) {
     return X86_HYPERVISOR_KVM;
   } else {
     return X86_HYPERVISOR_UNKNOWN;
   }
 }

 bool x86_get_cpuid_subleaf(enum x86_cpuid_leaf_num num, uint32_t subleaf, struct cpuid_leaf* leaf) {
   if (num < X86_CPUID_EXT_BASE) {
     if (num > max_cpuid)
       return false;
   } else if (num > max_ext_cpuid) {
     return false;
   }

   cpuid_c((uint32_t)num, subleaf, &leaf->a, &leaf->b, &leaf->c, &leaf->d);
   return true;
 }

 const struct x86_model_info* x86_get_model(void) { return &model_info; }

 // Printable registers can take up quite a bit of unsafe stack space. By
 // constructing them as temporary variables within in a separate,
 // non-inline-able function, we ensure that only one such register lives on the
 // stack at a given time across consecutive calls to print their fields.
 template <typename RegisterType, typename PrintCallback>
 [[gnu::noinline]] void PrintFields(PrintCallback& print_cb) {
   arch::BootCpuid<RegisterType>().ForEachField(print_cb);
 }

 void x86_feature_debug(void) {
   // Allows us to take advantage of custom print format specifiers, which the
   // compiler would otherwise complain about.
   auto Printf = [](const char* fmt, ...) {
     va_list args;
     va_start(args, fmt);
     vprintf(fmt, args);
     va_end(args);
   };

   Printf("\n");

   arch::BootCpuidIo io;

   {
     arch::CpuCacheInfo caches(io);
     Printf("==== X86 CACHE INFO ====\n");
     Printf("%-5s | %-11s | %-10s | %-5s | %-6s |\n", "Level", "Type", "Size (KiB)", "Sets",
            "Assoc.");
     for (const auto& cache : caches) {
       Printf("L%-4zu | %-11V | %-10zu | %-5zu | %-6zu |\n", cache.level, arch::ToString(cache.type),
              cache.size_kb, cache.number_of_sets, cache.ways_of_associativity);
     }
     Printf("\n");
   }

   Printf("Vendor: %V\n", arch::ToString(arch::GetVendor(io)));
   Printf("Microarchitecture: %V\n", arch::ToString(arch::GetMicroarchitecture(io)));
   Printf("Processor: %V\n", arch::ProcessorName(io).name());
   {
     ktl::string_view hypervisor = arch::HypervisorName(io).name();
     Printf("Hypervisor: %V\n", hypervisor.empty() ? "None" : hypervisor);
   }

   const auto version = io.Read<arch::CpuidVersionInfo>();
   Printf("Family/Model/Stepping: %#x/%#x/%#x\n", version.family(), version.model(),
          version.stepping());
   Printf("Patch level: %x\n", model_info.patch_level);

   auto print_feature = [col = size_t{0}](const char* name, auto value, auto, auto) mutable {
     if (name && value) {
       col += printf("%s%s", col ? ", " : "", name);
       if (col >= 80) {
         printf("\n");
         col = 0;
       }
     }
   };

   Printf("\nFeatures:\n");
   PrintFields<arch::CpuidFeatureFlagsC>(print_feature);
   PrintFields<arch::CpuidFeatureFlagsD>(print_feature);
   PrintFields<arch::CpuidExtendedFeatureFlagsB>(print_feature);
   // TODO(https://fxbug.dev/42147424): Print when we can afford to.
   // io.Read<arch::CpuidAmdFeatureFlagsC>().ForEachField(print_feature);
   Printf("\n");

   // Print synthetic 'features'/properties.
   auto print_property = [col = size_t{0}](const char* property, bool print = true) mutable {
     if (print) {
       col += printf("%s%s", col ? ", " : "", property);
       if (col >= 80) {
         printf("\n");
         col = 0;
       }
     }
   };
   Printf("\nProperties:\n");
   print_property("meltdown", g_has_meltdown);
   print_property("l1tf", g_has_l1tf);
   print_property("mds/taa", g_has_mds_taa);
   print_property("md_clear", g_has_md_clear);
   print_property("md_clear_user_return", g_md_clear_on_user_return);
   print_property("swapgs_bug", g_has_swapgs_bug);
   print_property("pcid_good", g_x86_feature_pcid_enabled);
   print_property("spec_ctrl", g_has_spec_ctrl);
   print_property("ssb", g_has_ssb);
   print_property("ssbd", g_has_ssbd);
   print_property("ssb_mitigated", g_ssb_mitigated);
   print_property("ibpb", g_has_ibpb);
   print_property("l1d_flush_on_vmentry", g_l1d_flush_on_vmentry);
   print_property("ibpb_ctxt_switch", g_should_ibpb_on_ctxt_switch);
   print_property("ras_fill", g_ras_fill_on_ctxt_switch);
   print_property("enhanced_ibrs", g_has_enhanced_ibrs);
 #ifdef KERNEL_RETPOLINE
   print_property("retpoline");
 #endif
 #ifdef X64_KERNEL_JCC_WORKAROUND
   print_property("jcc_fix");
 #endif
 #ifdef HARDEN_SLS
   print_property("harden_sls");
 #endif
   print_property("retbleed", g_has_retbleed);
   print_property("stibp_enabled", g_stibp_enabled);
 #ifdef ZERO_CALL_USED_REGS
   print_property("zero_call_used_regs");
 #endif
   print_property("cpu_vulnerable_to_rsb_cross_thread", g_cpu_vulnerable_to_rsb_cross_thread);
   if (arch::BootCpuidSupports<arch::CpuidPerformanceMonitoringA>()) {
     const arch::CpuidPerformanceMonitoringA eax = io.Read<arch::CpuidPerformanceMonitoringA>();
     const arch::CpuidPerformanceMonitoringD edx = io.Read<arch::CpuidPerformanceMonitoringD>();
     if (eax.version() > 0) {
       printf("\narch_pmu version %u general purpose counters %u fixed counters %u\n", eax.version(),
              eax.num_general_counters(), edx.num_fixed_counters());
     }
   }
   Printf("\n\n");
 }

 // The highest priority mechanism to determine the apic frequency.
 static uint64_t default_apic_freq() {
   // The APIC frequency is the core crystal clock frequency if it is
   // enumerated in the CPUID leaf 0x15, or the processor's bus clock
   // frequency.

   const struct cpuid_leaf* tsc_leaf = x86_get_cpuid_leaf(X86_CPUID_TSC);
   if (tsc_leaf && tsc_leaf->c != 0) {
     return tsc_leaf->c;
   }
   return 0;
 }

 // From Intel SDMv3 section 19.7.3 (Determining the Processor Base Frequency).
 // For cores that have a hard coded bus frequency or crystal clock,
 // fall back to this value if cpuid 15h doesn't fully specify it and we're not
 // running in a hypervisor.
 static uint64_t apic_freq_constant_fallback(const uint64_t hardcoded_apic_freq) {
   uint64_t v = default_apic_freq();
   if (v != 0) {
     return v;
   }
   if (x86_feature_test(X86_FEATURE_HYPERVISOR)) {
     return 0;
   }
   return hardcoded_apic_freq;
 }

 // From Intel SDMv3 section 19.7.3 (Determining the Processor Base Frequency).
 static uint64_t skl_apic_freq() { return apic_freq_constant_fallback(24ul * 1000 * 1000); }

 // From Intel SDMv3 section 19.7.3 (Determining the Processor Base Frequency).
 static uint64_t skl_x_apic_freq() { return apic_freq_constant_fallback(25ul * 1000 * 1000); }

 // From Intel SDMv3 section 19.7.3 (Determining the Processor Base Frequency).
 static uint64_t bdw_apic_freq() { return apic_freq_constant_fallback(100ul * 1000 * 1000); }

 static uint64_t bulldozer_apic_freq() {
   // 15h BKDG documents that is is 100Mhz.
   return apic_freq_constant_fallback(100ul * 1000 * 1000);
 }

 static uint64_t unknown_freq() { return 0; }

 static uint64_t intel_tsc_freq() {
   const uint64_t core_crystal_clock_freq = x86_get_microarch_config()->get_apic_freq();

   // If this leaf is present, then 19.7.3 (Determining the Processor Base
   // Frequency) documents this as the nominal TSC frequency.
   const struct cpuid_leaf* tsc_leaf = x86_get_cpuid_leaf(X86_CPUID_TSC);
   if (tsc_leaf && tsc_leaf->a) {
     return (core_crystal_clock_freq * tsc_leaf->b) / tsc_leaf->a;
   }
   return 0;
 }

 static uint64_t amd_compute_p_state_clock(uint64_t p_state_msr) {
   // is it valid?
   if (!BIT(p_state_msr, 63))
     return 0;

   // different AMD microarchitectures use slightly different formulas to compute
   // the effective clock rate of a P state
   uint64_t clock = 0;
   switch (x86_microarch_config->x86_microarch) {
     case X86_MICROARCH_AMD_BULLDOZER:
     case X86_MICROARCH_AMD_JAGUAR: {
       uint64_t did = BITS_SHIFT(p_state_msr, 8, 6);
       uint64_t fid = BITS(p_state_msr, 5, 0);

       clock = (100 * (fid + 0x10) / (1 << did)) * 1000 * 1000;
       break;
     }
     case X86_MICROARCH_AMD_ZEN: {
       uint64_t fid = BITS(p_state_msr, 7, 0);

       clock = (fid * 25) * 1000 * 1000;
       break;
     }
     default:
       break;
   }

   return clock;
 }

 static uint64_t zen_tsc_freq() {
   const uint32_t p0_state_msr = 0xc0010064;  // base P-state MSR
   // According to the Family 17h PPR, the first P-state MSR is indeed
   // P0 state and appears to be experimentally so
   uint64_t p0_state;
   if (read_msr_safe(p0_state_msr, &p0_state) != ZX_OK)
     return 0;

   return amd_compute_p_state_clock(p0_state);
 }

 static void unknown_reboot_system() {}

 static void unknown_reboot_reason(uint64_t reason) {
   // Write the boot reason to cmos, which should be present on all PCs.
   bootbyte_set_reason(reason);
 }

 static void cf9_reboot_system() {
   // 100-Series Chipset Reset Control Register: CPU + SYS Reset
   // AMD Bulldozer+ defines this the same way
   outp(0xcf9, 0x06);
 }

 static void cf9_reboot_reason(uint64_t reason) {
   // Write the boot reason to cmos, which should be present on all PCs.
   bootbyte_set_reason(reason);

   // 100-Series Chipset Reset Control Register: CPU + SYS Reset
   // AMD Bulldozer+ defines this the same way

   // clear PCI reset sequence
   outp(0xcf9, 0x02);
   // discarded reads acting as a small delay on the bus
   (void)inp(0xcf9);
   (void)inp(0xcf9);
   outp(0xcf9, 0x04);
 }

 // Intel microarches
 static const x86_microarch_config_t icelake_config{
     .x86_microarch = X86_MICROARCH_INTEL_ICELAKE,

     .get_apic_freq = default_apic_freq,
     .get_tsc_freq = intel_tsc_freq,
     .reboot_system = cf9_reboot_system,
     .reboot_reason = cf9_reboot_reason,
     .disable_c1e = true,
     .idle_prefer_hlt = false,
     .idle_states =
         {
             .states = {X86_CSTATE_C1(0)},
             .default_state_mask = kX86IdleStateMaskC1Only,
         },
 };

 static const x86_microarch_config_t tiger_lake_config{
     .x86_microarch = X86_MICROARCH_INTEL_TIGERLAKE,
     .get_apic_freq = default_apic_freq,
     .get_tsc_freq = intel_tsc_freq,
     .reboot_system = cf9_reboot_system,
     .reboot_reason = cf9_reboot_reason,
     .disable_c1e = true,
     .idle_prefer_hlt = false,
     .idle_states =
         {
             .states =
                 {
                     // TODO(https://fxbug.dev/42053624): fill this in.
                     X86_CSTATE_C1(0),
                 },
             .default_state_mask = kX86IdleStateMaskC1Only,
         },
 };

 static const x86_microarch_config_t alder_lake_config{
     .x86_microarch = X86_MICROARCH_INTEL_ALDERLAKE,
     .get_apic_freq = default_apic_freq,
     .get_tsc_freq = intel_tsc_freq,
     .reboot_system = cf9_reboot_system,
     .reboot_reason = cf9_reboot_reason,
     .disable_c1e = true,
     .idle_prefer_hlt = false,
     .idle_states =
         {
             .states =
                 {
                     // TODO(https://fxbug.dev/42053624): fill this in.
                     X86_CSTATE_C1(0),
                 },
             .default_state_mask = kX86IdleStateMaskC1Only,
         },
 };

 static const x86_microarch_config_t cannon_lake_config{
     .x86_microarch = X86_MICROARCH_INTEL_CANNONLAKE,
     .get_apic_freq = default_apic_freq,
     .get_tsc_freq = intel_tsc_freq,
     .reboot_system = cf9_reboot_system,
     .reboot_reason = cf9_reboot_reason,
     .disable_c1e = true,
     .idle_prefer_hlt = false,
     .idle_states =
         {
             .states =
                 {// TODO: Read exit_latency from IRTL registers
                  {.name = "C6", .mwait_hint = 0x20, .exit_latency = 120, .flushes_tlb = true},
                  X86_CSTATE_C1(0)},
             .default_state_mask = kX86IdleStateMaskC1Only,
         },
 };

 static const x86_microarch_config_t skylake_config{
     .x86_microarch = X86_MICROARCH_INTEL_SKYLAKE,
     .get_apic_freq = skl_apic_freq,
     .get_tsc_freq = intel_tsc_freq,
     .reboot_system = cf9_reboot_system,
     .reboot_reason = cf9_reboot_reason,
     .disable_c1e = true,
     .idle_prefer_hlt = false,
     .idle_states =
         {
             .states =
                 {
                     {.name = "C10", .mwait_hint = 0x60, .exit_latency = 890, .flushes_tlb = true},
                     {.name = "C9", .mwait_hint = 0x50, .exit_latency = 480, .flushes_tlb = true},
                     {.name = "C8", .mwait_hint = 0x40, .exit_latency = 200, .flushes_tlb = true},
                     {.name = "C7s", .mwait_hint = 0x33, .exit_latency = 124, .flushes_tlb = true},
                     {.name = "C6", .mwait_hint = 0x20, .exit_latency = 85, .flushes_tlb = true},
                     {.name = "C3", .mwait_hint = 0x10, .exit_latency = 70, .flushes_tlb = true},
                     X86_CSTATE_C1(0),
                 },
             .default_state_mask = kX86IdleStateMaskC1Only,
         },
 };

 static const x86_microarch_config_t skylake_x_config{
     .x86_microarch = X86_MICROARCH_INTEL_SKYLAKE,
     .get_apic_freq = skl_x_apic_freq,
     .get_tsc_freq = intel_tsc_freq,
     .reboot_system = cf9_reboot_system,
     .reboot_reason = cf9_reboot_reason,
     .disable_c1e = true,
     .idle_prefer_hlt = false,
     .idle_states =
         {
             .states = {X86_CSTATE_C1(0)},
             .default_state_mask = kX86IdleStateMaskC1Only,
         },
 };

 static const x86_microarch_config_t broadwell_config{
     .x86_microarch = X86_MICROARCH_INTEL_BROADWELL,
     .get_apic_freq = bdw_apic_freq,
     .get_tsc_freq = intel_tsc_freq,
     .reboot_system = cf9_reboot_system,
     .reboot_reason = cf9_reboot_reason,
     .disable_c1e = true,
     .idle_prefer_hlt = false,
     .idle_states =
         {
             .states = {X86_CSTATE_C1(0)},
             .default_state_mask = kX86IdleStateMaskC1Only,
         },
 };
 static const x86_microarch_config_t haswell_config{
     .x86_microarch = X86_MICROARCH_INTEL_HASWELL,
     .get_apic_freq = bdw_apic_freq,
     .get_tsc_freq = intel_tsc_freq,
     .reboot_system = cf9_reboot_system,
     .reboot_reason = cf9_reboot_reason,
     .disable_c1e = true,
     .idle_prefer_hlt = false,
     .idle_states =
         {
             .states = {X86_CSTATE_C1(0)},
             .default_state_mask = kX86IdleStateMaskC1Only,
         },
 };
 static const x86_microarch_config_t ivybridge_config{
     .x86_microarch = X86_MICROARCH_INTEL_IVY_BRIDGE,
     .get_apic_freq = bdw_apic_freq,
     .get_tsc_freq = intel_tsc_freq,
     .reboot_system = unknown_reboot_system,
     .reboot_reason = unknown_reboot_reason,
     .disable_c1e = true,
     .idle_prefer_hlt = false,
     .idle_states =
         {
             .states = {X86_CSTATE_C1(0)},
             .default_state_mask = kX86IdleStateMaskC1Only,
         },
 };
 static const x86_microarch_config_t sandybridge_config{
     .x86_microarch = X86_MICROARCH_INTEL_SANDY_BRIDGE,
     .get_apic_freq = bdw_apic_freq,
     .get_tsc_freq = intel_tsc_freq,
     .reboot_system = unknown_reboot_system,
     .reboot_reason = unknown_reboot_reason,
     .disable_c1e = true,
     .idle_prefer_hlt = false,
     .idle_states =
         {
             .states = {X86_CSTATE_C1(0)},
             .default_state_mask = kX86IdleStateMaskC1Only,
         },
 };
 static const x86_microarch_config_t westmere_config{
     .x86_microarch = X86_MICROARCH_INTEL_WESTMERE,
     .get_apic_freq = default_apic_freq,
     .get_tsc_freq = intel_tsc_freq,
     .reboot_system = unknown_reboot_system,
     .reboot_reason = unknown_reboot_reason,
     .disable_c1e = true,
     .idle_prefer_hlt = false,
     .idle_states =
         {
             .states = {X86_CSTATE_C1(0)},
             .default_state_mask = kX86IdleStateMaskC1Only,
         },
 };
 static const x86_microarch_config_t nehalem_config{
     .x86_microarch = X86_MICROARCH_INTEL_NEHALEM,
     .get_apic_freq = default_apic_freq,
     .get_tsc_freq = intel_tsc_freq,
     .reboot_system = unknown_reboot_system,
     .reboot_reason = unknown_reboot_reason,
     .disable_c1e = true,
     .idle_prefer_hlt = false,
     .idle_states =
         {
             .states = {X86_CSTATE_C1(0)},
             .default_state_mask = kX86IdleStateMaskC1Only,
         },
 };
 static const x86_microarch_config_t silvermont_config{
     .x86_microarch = X86_MICROARCH_INTEL_SILVERMONT,
     .get_apic_freq = default_apic_freq,
     .get_tsc_freq = intel_tsc_freq,
     .reboot_system = unknown_reboot_system,
     .reboot_reason = unknown_reboot_reason,
     .disable_c1e = false,
     .idle_prefer_hlt = false,
     .idle_states =
         {
             .states = {X86_CSTATE_C1(0)},
             .default_state_mask = kX86IdleStateMaskC1Only,
         },
 };
 static const x86_microarch_config_t goldmont_config{
     .x86_microarch = X86_MICROARCH_INTEL_GOLDMONT,
     .get_apic_freq = default_apic_freq,
     .get_tsc_freq = intel_tsc_freq,
     .reboot_system = cf9_reboot_system,
     .reboot_reason = cf9_reboot_reason,
     .disable_c1e = false,
     // [APL30] Apollo Lake SOCs (Goldmont) have an errata which causes stores to not always wake
     // MWAIT-ing cores. Prefer HLT to avoid the issue.
     .idle_prefer_hlt = true,
     .idle_states =
         {
             .states = {X86_CSTATE_C1(0)},
             .default_state_mask = kX86IdleStateMaskC1Only,
         },
 };
 static const x86_microarch_config_t goldmont_plus_config{
     .x86_microarch = X86_MICROARCH_INTEL_GOLDMONT_PLUS,
     .get_apic_freq = default_apic_freq,
     .get_tsc_freq = intel_tsc_freq,
     .reboot_system = unknown_reboot_system,
     .reboot_reason = unknown_reboot_reason,
     .disable_c1e = false,
     .idle_prefer_hlt = false,
     .idle_states =
         {
             .states =
                 {
                     // TODO(https://fxbug.dev/42110877): Read C6 and deeper latency from IRTL
                     // registers
                     {.name = "C10", .mwait_hint = 0x60, .exit_latency = 10000, .flushes_tlb = true},
                     {.name = "C9", .mwait_hint = 0x50, .exit_latency = 2000, .flushes_tlb = true},
                     {.name = "C8", .mwait_hint = 0x40, .exit_latency = 1000, .flushes_tlb = true},
                     {.name = "C7s", .mwait_hint = 0x31, .exit_latency = 155, .flushes_tlb = true},
                     {.name = "C6", .mwait_hint = 0x20, .exit_latency = 133, .flushes_tlb = true},
                     {.name = "C1E", .mwait_hint = 0x01, .exit_latency = 10, .flushes_tlb = false},
                     X86_CSTATE_C1(0),
                 },
             .default_state_mask = kX86IdleStateMaskC1Only,
         },
 };
 static const x86_microarch_config_t intel_default_config{
     .x86_microarch = X86_MICROARCH_UNKNOWN,
     .get_apic_freq = default_apic_freq,
     .get_tsc_freq = intel_tsc_freq,
     .reboot_system = unknown_reboot_system,
     .reboot_reason = unknown_reboot_reason,
     .disable_c1e = false,
     .idle_prefer_hlt = false,
     .idle_states =
         {
             .states = {X86_CSTATE_C1(0)},
             .default_state_mask = kX86IdleStateMaskC1Only,
         },
 };

 // AMD microarches
 static const x86_microarch_config_t zen_config{
     .x86_microarch = X86_MICROARCH_AMD_ZEN,
     .get_apic_freq = unknown_freq,
     .get_tsc_freq = zen_tsc_freq,
     .reboot_system = cf9_reboot_system,
     .reboot_reason = cf9_reboot_reason,
     .disable_c1e = false,
     // Zen SOCs save substantial power using HLT instead of MWAIT.
     // TODO(https://fxbug.dev/42139534): Use a predictor/selection to use mwait for short sleeps.
     .idle_prefer_hlt = true,
     .idle_states =
         {
             .states = {X86_CSTATE_C1(0)},
             .default_state_mask = kX86IdleStateMaskC1Only,
         },
 };
 static const x86_microarch_config_t jaguar_config{
     .x86_microarch = X86_MICROARCH_AMD_JAGUAR,
     .get_apic_freq = unknown_freq,
     .get_tsc_freq = unknown_freq,
     .reboot_system = cf9_reboot_system,
     .reboot_reason = cf9_reboot_reason,
     .disable_c1e = false,
     .idle_prefer_hlt = false,
     .idle_states =
         {
             .states = {X86_CSTATE_C1(0)},
             .default_state_mask = kX86IdleStateMaskC1Only,
         },
 };
 static const x86_microarch_config_t bulldozer_config{
     .x86_microarch = X86_MICROARCH_AMD_BULLDOZER,
     .get_apic_freq = bulldozer_apic_freq,
     .get_tsc_freq = unknown_freq,
     .reboot_system = cf9_reboot_system,
     .reboot_reason = cf9_reboot_reason,
     .disable_c1e = false,
     // Excavator SOCs in particular save substantial power using HLT instead of MWAIT
     .idle_prefer_hlt = true,
     .idle_states =
         {
             .states = {X86_CSTATE_C1(0)},
             .default_state_mask = kX86IdleStateMaskC1Only,
         },
 };
 static const x86_microarch_config_t amd_default_config{
     .x86_microarch = X86_MICROARCH_UNKNOWN,
     .get_apic_freq = unknown_freq,
     .get_tsc_freq = unknown_freq,
     .reboot_system = cf9_reboot_system,
     .reboot_reason = cf9_reboot_reason,
     .disable_c1e = false,
     .idle_prefer_hlt = false,
     .idle_states =
         {
             .states = {X86_CSTATE_C1(0)},
             .default_state_mask = kX86IdleStateMaskC1Only,
         },
 };

 // Unknown vendor config
 static const x86_microarch_config_t unknown_vendor_config{
     .x86_microarch = X86_MICROARCH_UNKNOWN,
     .get_apic_freq = unknown_freq,
     .get_tsc_freq = unknown_freq,
     .reboot_system = unknown_reboot_system,
     .reboot_reason = unknown_reboot_reason,
     .disable_c1e = false,
     .idle_prefer_hlt = false,
     .idle_states =
         {
             .states = {X86_CSTATE_C1(0)},
             .default_state_mask = kX86IdleStateMaskC1Only,
         },
 };

 const x86_microarch_config_t* get_microarch_config(const cpu_id::CpuId* cpuid) {
   auto vendor = cpuid->ReadManufacturerInfo();
   auto processor_id = cpuid->ReadProcessorId();

   if (vendor.manufacturer() == cpu_id::ManufacturerInfo::INTEL && processor_id.family() == 0x6) {
     /* Table largely from https://en.wikichip.org/wiki/intel/cpuid */
     switch (processor_id.model()) {
       /* Big cores */
       case 0x1a: /* Nehalem */
       case 0x1e: /* Nehalem */
       case 0x1f: /* Nehalem */
       case 0x2e: /* Nehalem */
         return &nehalem_config;
       case 0x25: /* Westmere */
       case 0x2c: /* Westmere */
       case 0x2f: /* Westmere */
         return &westmere_config;
       case 0x2a: /* Sandy Bridge */
       case 0x2d: /* Sandy Bridge EP */
         return &sandybridge_config;
       case 0x3a: /* Ivy Bridge */
       case 0x3e: /* Ivy Bridge EP */
         return &ivybridge_config;
       case 0x3c: /* Haswell DT */
       case 0x3f: /* Haswell MB */
       case 0x45: /* Haswell ULT */
       case 0x46: /* Haswell ULX */
         return &haswell_config;
       case 0x3d: /* Broadwell */
       case 0x47: /* Broadwell H */
       case 0x56: /* Broadwell EP */
       case 0x4f: /* Broadwell EX */
         return &broadwell_config;
       case 0x4e: /* Skylake Y/U */
       case 0x5e: /* Skylake H/S */
       case 0x8e: /* Kaby Lake Y/U, Coffee Lake, Whiskey Lake, Amber Lake, Comet Lake U */
       case 0x9e: /* Kaby Lake H/S, Coffee Lake, Whiskey Lake */
       case 0xa5: /* Comet Lake S/H */
         return &skylake_config;
       case 0x55: /* Skylake X/SP, Cascade Lake */
         return &skylake_x_config;
       case 0x66: /* Cannon Lake U */
         return &cannon_lake_config;
       case 0x6a: /* Ice Lake-SP */
       case 0x6c: /* Ice Lake-DE */
       case 0x7d: /* Ice Lake Y */
       case 0x7e: /* Ice Lake U */
         return &icelake_config;
       case 0x8c: /* Tiger Lake UP */
       case 0x8d: /* Tiger Lake H */
       case 0xa7: /* Rocket Lake S */
         return &tiger_lake_config;
       case 0x97: /* Alder Lake S */
       case 0x9a: /* Alder Lake H/P/U */
       case 0xb7: /* Raptor Lake S */
         return &alder_lake_config;

       /* Small cores */
       case 0x37: /* Silvermont */
       case 0x4a: /* Silvermont "Cherry View" */
       case 0x4d: /* Silvermont "Avoton", "Rangeley" */
       case 0x5a: /* Silvermont "Anniedale" */
       case 0x5d: /* Silvermont "SoFIA" */
       case 0x4c: /* Airmont "Braswell", "Cherry Trail" */
         return &silvermont_config;
       case 0x5c: /* Goldmont (Apollo Lake) */
       case 0x5f: /* Goldmont (Denverton) */
         return &goldmont_config;
       case 0x7a: /* Goldmont Plus (Gemini Lake) */
       case 0x8a: /* Tremont (Lakefield) */
       case 0x96: /* Tremont (Elkhart Lake) */
       case 0x9c: /* Tremont (Jasper Lake) */
         return &goldmont_plus_config;

       default:
         return &intel_default_config;
     }
   } else if (vendor.manufacturer() == cpu_id::ManufacturerInfo::AMD) {
     switch (processor_id.family()) {
       case 0x15:
         return &bulldozer_config;
       case 0x16:
         return &jaguar_config;
       case 0x17: /* Zen 1, 2 */
       case 0x19: /* Zen 3, 4 */
         return &zen_config;
       default:
         return &amd_default_config;
     }
   }

   return &unknown_vendor_config;
 }

 extern "C" {

 void x86_cpu_maybe_l1d_flush(zx_status_t syscall_return) {
   if (gBootOptions->x86_disable_spec_mitigations) {
     return;
   }

   // Spectre V1: If we are returning from a syscall with one of these errors, flush the entire
   // L1D cache. This prevents hostile code from reading any data the kernel brought in to cache,
   // even speculatively.
   //
   // We only flush on these errors as they are not expected in the steady state and cover most
   // expected Spectre V1 attack constructions. Most attacks will either pass in invalid indexes
   // or invalid handles, to leak table contents; ZX_ERR_INVALID_ARGS and ZX_ERR_BAD_HANDLE cover
   // those cases.
   //
   // Allowing a process to cause an L1D cache flush is low risk; the process could cycle enough
   // data through the L1 to evict + replace all data very quickly. Allowing a process to cause
   // a WBINVD, however, would be higher-risk - it flushes every cache in the system, which could
   // be very disruptive to other work; therefore we don't fall back from IA32_FLUSH_CMD to WBINVD.
   if (syscall_return == ZX_ERR_INVALID_ARGS || syscall_return == ZX_ERR_BAD_HANDLE) {
     if (x86_feature_test(X86_FEATURE_L1D_FLUSH)) {
       write_msr(X86_MSR_IA32_FLUSH_CMD, 1);
     }
   }
 }
 }