[kernel][x86] Spectre V2: Flush RAS on context switches, not every kernel entry

x86 processors consult a hardware stack to predict target addresses of
RET instructions. The behavior of RAS/RSBs are well-documented, so they
are used as a core structure in Spectre V2 indirect branch target poisoning
defenses (retpolines); however RSBs themselves may be attacked and are
sometimes susceptible to underflow (SpectreRSB and Skylake underflow).

To protect against RSB attacks (cross-process and user->kernel), we
conservatively overwrote the RSB on all kernel entries in an earlier
commit.

That was unnecessary - potentially poisoned RSB entries can only be
consumed by RETs not paired with CALLs. This situation is only possible
in the context switch code and on #VMExits. Move the RSB flush out of
the kernel entries to the context switch code.

Also add a TODO to the x86_mwait() idle code - certain idle states clear
the RSB and open underflow attacks.

Bug: 33667 Spectre mitigations?
Bug: 12540 Speculative Execution Mitigations.

Change-Id: I67d1d0e18eb64581f8faa77920f486b457ee4da1
diff --git a/zircon/kernel/arch/x86/exceptions.S b/zircon/kernel/arch/x86/exceptions.S
index a5a2845..0749080 100644
--- a/zircon/kernel/arch/x86/exceptions.S
+++ b/zircon/kernel/arch/x86/exceptions.S
@@ -104,16 +104,6 @@
 
     /* swap gs to kernel space */
     swapgs
-
-    /* Spectre V2: Overwrite the Return Address Stack to ensure its not poisoned/can't underflow */
-.Lmaybe_exception_ras_fill:
-    jmp   1f
-    .rept 51
-    nop
-    .endr
-    APPLY_CODE_PATCH_FUNC_WITH_DEFAULT(x86_ras_fill_select, .Lmaybe_exception_ras_fill, 53)
-
-    .align 16
 1:
     /* Certain processors may speculative past a conditional swapgs, with a caller-controlled
      * GS_BASE register. This may allow callers to infer the contents of kernel offsets or data.
diff --git a/zircon/kernel/arch/x86/feature.cc b/zircon/kernel/arch/x86/feature.cc
index d974c201..e998526 100644
--- a/zircon/kernel/arch/x86/feature.cc
+++ b/zircon/kernel/arch/x86/feature.cc
@@ -51,7 +51,7 @@
 bool g_md_clear_on_user_return;
 bool g_has_ibpb;
 bool g_should_ibpb_on_ctxt_switch;
-bool g_ras_fill_on_entry;
+bool g_ras_fill_on_ctxt_switch;
 bool g_has_enhanced_ibrs;
 bool g_enhanced_ibrs_enabled;
 bool g_amd_retpoline;
@@ -192,6 +192,7 @@
     // See "Indirect Branch Control Extension" Revision 4.10.18, Extended Usage Models.
     g_has_enhanced_ibrs = x86_amd_cpu_has_ibrs_always_on(&cpuid);
   }
+  g_ras_fill_on_ctxt_switch = (x86_get_disable_spec_mitigations() == false);
   // TODO(fxb/33667, fxb/12150): Consider whether a process can opt-out of an IBPB on switch,
   // either on switch-in (ex: its compiled with a retpoline) or switch-out (ex: it promises
   // not to attack the next process).
@@ -425,7 +426,7 @@
     printf("l1d_flush_on_vmentry ");
   if (g_should_ibpb_on_ctxt_switch)
     printf("ibpb_ctxt_switch ");
-  if (g_ras_fill_on_entry)
+  if (g_ras_fill_on_ctxt_switch)
     printf("ras_fill ");
   if (g_has_enhanced_ibrs)
     printf("enhanced_ibrs ");
@@ -950,22 +951,6 @@
   }
 }
 
-extern "C" void x86_ras_fill_select(const CodePatchInfo* patch) {
-  const size_t kSize = 53;
-  DEBUG_ASSERT(patch->dest_size == kSize);
-
-  // TODO(fxb/33667): Consider whether Enhanced IBRS + SMEP + SMAP requires filling RAS.
-  // Copy the contents of x86_ras_fill to the kernel syscall / exception entry point,
-  // except for the final RET.
-  if (!g_disable_spec_mitigations) {
-     extern char x86_ras_fill_start;
-     extern char x86_ras_fill_end;
-     DEBUG_ASSERT(&x86_ras_fill_end - &x86_ras_fill_start == kSize);
-     memcpy(patch->dest_addr, (void*) x86_ras_fill, kSize);
-     g_ras_fill_on_entry = true;
-  }
-}
-
 void x86_retpoline_select(const CodePatchInfo* patch) {
   if (g_disable_spec_mitigations || g_enhanced_ibrs_enabled) {
     const size_t kSize = 3;
diff --git a/zircon/kernel/arch/x86/include/arch/x86/feature.h b/zircon/kernel/arch/x86/include/arch/x86/feature.h
index 38b799a..e3beb77 100644
--- a/zircon/kernel/arch/x86/include/arch/x86/feature.h
+++ b/zircon/kernel/arch/x86/include/arch/x86/feature.h
@@ -345,6 +345,11 @@
   return g_disable_spec_mitigations;
 }
 
+static inline bool x86_cpu_should_ras_fill_on_ctxt_switch(void) {
+  extern bool g_ras_fill_on_ctxt_switch;
+  return g_ras_fill_on_ctxt_switch;
+}
+
 static inline bool x86_cpu_should_ibpb_on_ctxt_switch(void) {
   extern bool g_should_ibpb_on_ctxt_switch;
   return g_should_ibpb_on_ctxt_switch;
diff --git a/zircon/kernel/arch/x86/mp.cc b/zircon/kernel/arch/x86/mp.cc
index 3399977..c3b36b8 100644
--- a/zircon/kernel/arch/x86/mp.cc
+++ b/zircon/kernel/arch/x86/mp.cc
@@ -361,6 +361,8 @@
           next_state->CountEntry();
         }
       }
+      // TODO(fxb/33667, fxb/12540): Spectre V2 (Skylake+ only): If we enter a deep sleep state,
+      // fill the RSB before RET-ing from this function.
       thread_preempt();
     }
   } else {
diff --git a/zircon/kernel/arch/x86/ops.S b/zircon/kernel/arch/x86/ops.S
index 8ac64dd..a2b8350 100644
--- a/zircon/kernel/arch/x86/ops.S
+++ b/zircon/kernel/arch/x86/ops.S
@@ -144,8 +144,6 @@
 //
 //    Overwriting the RAS prevents those attacks.
 FUNCTION(x86_ras_fill)
-.global x86_ras_fill_start
-x86_ras_fill_start:
     pushq %rcx
     // Execute a loop of 32 calls; each call creates a return address stack
     // (RAS/RSB) entry pointing at the predicted return address, |trap|.
@@ -177,7 +175,5 @@
 
     addq $256, %rsp
     popq %rcx
-.global x86_ras_fill_end
-x86_ras_fill_end:
     ret
 END_FUNCTION(x86_ras_fill)
diff --git a/zircon/kernel/arch/x86/syscall.S b/zircon/kernel/arch/x86/syscall.S
index cc88390..210c688 100644
--- a/zircon/kernel/arch/x86/syscall.S
+++ b/zircon/kernel/arch/x86/syscall.S
@@ -299,16 +299,6 @@
     push_value %r11 /* user RFLAGS */
     push_value %rcx /* user RIP */
 
-    /* Spectre V2: Overwrite the Return Address Stack to ensure its not poisoned/can't underflow */
-.Lmaybe_syscall_ras_fill:
-    jmp 1f
-    .rept 51
-    nop
-    .endr
-    APPLY_CODE_PATCH_FUNC_WITH_DEFAULT(x86_ras_fill_select, .Lmaybe_syscall_ras_fill, 53)
-
-    .align 16
-1:
     // Any changes to the stack here need to be reflected in
     // pre_push and post_pop macros above to maintain alignment.
     // Verify the syscall is in range and jump to it.
diff --git a/zircon/kernel/arch/x86/thread.cc b/zircon/kernel/arch/x86/thread.cc
index a342c1f..868d7d2 100644
--- a/zircon/kernel/arch/x86/thread.cc
+++ b/zircon/kernel/arch/x86/thread.cc
@@ -93,6 +93,12 @@
 }
 
 __NO_SAFESTACK static void x86_aspace_context_switch(thread_t* oldthread, thread_t* newthread) {
+  // Spectre V2: Overwrite the Return Address Stack to ensure its not poisoned
+  // TODO(fxb/33667, fxb/12540): On CPUs without RSB Alternate, we can skip this fill when
+  // |oldthread| is a kernel thread.
+  if (likely(x86_cpu_should_ras_fill_on_ctxt_switch())) {
+    x86_ras_fill();
+  }
   auto* const percpu = x86_get_percpu();
   // Flush Indirect Branch Predictor State, if:
   // 1) We are switching from a user address space to another user address space OR