[x86] Correct halting cores during panic

We were having CPUs issue INIT IPIs to themselves, which is invalid.
Instead have CPUs signal that they are safe to send an INIT IPI to, and
have the panic orchestrator send the INIT.

ZX-1828 #done

Change-Id: I80b6fcee63f705f6405bed62a6d428fbe8ebfcc2
diff --git a/kernel/arch/x86/include/arch/x86/mp.h b/kernel/arch/x86/include/arch/x86/mp.h
index 593f7e0..233d6d0 100644
--- a/kernel/arch/x86/include/arch/x86/mp.h
+++ b/kernel/arch/x86/include/arch/x86/mp.h
@@ -129,6 +129,7 @@
 void x86_ipi_reschedule_handler(void);
 void x86_ipi_halt_handler(void) __NO_RETURN;
 void x86_secondary_entry(volatile int *aps_still_booting, thread_t *thread);
+void x86_force_halt_all_but_local_and_bsp(void);
 
 __END_CDECLS
 
diff --git a/kernel/arch/x86/mp.cpp b/kernel/arch/x86/mp.cpp
index 236c871..e120b8a 100644
--- a/kernel/arch/x86/mp.cpp
+++ b/kernel/arch/x86/mp.cpp
@@ -354,6 +354,20 @@
     }
 }
 
+// Forcibly stops all other CPUs except the current one and the BSP (which is
+// cpu 0)
+void x86_force_halt_all_but_local_and_bsp(void) {
+    cpu_num_t self = arch_curr_cpu_num();
+    for (cpu_num_t i = 1; i < x86_num_cpus; ++i) {
+        if (i == self) {
+            continue;
+        }
+        uint32_t dst_apic_id = ap_percpus[i - 1].apic_id;
+        apic_send_ipi(0, static_cast<uint8_t>(dst_apic_id),
+                      DELIVERY_MODE_INIT);
+    }
+}
+
 zx_status_t arch_mp_prep_cpu_unplug(uint cpu_id) {
     if (cpu_id == 0 || cpu_id >= x86_num_cpus) {
         return ZX_ERR_INVALID_ARGS;
diff --git a/kernel/platform/pc/power.cpp b/kernel/platform/pc/power.cpp
index a5daaa3..c8a767d 100644
--- a/kernel/platform/pc/power.cpp
+++ b/kernel/platform/pc/power.cpp
@@ -5,16 +5,17 @@
 // https://opensource.org/licenses/MIT
 //
 
+#include <arch/x86/apic.h>
 #include <arch/mp.h>
 #include <arch/x86.h>
 #include <arch/x86/mp.h>
+#include <fbl/atomic.h>
 #include <stdio.h>
 #include <string.h>
 
 #include <platform.h>
 #include <platform/keyboard.h>
 
-#include <arch/x86/apic.h>
 #include <lib/console.h>
 #include <lib/version.h>
 
@@ -30,33 +31,43 @@
     outp(0xCF9, 0x06);
 }
 
-static volatile int panic_started;
+static fbl::atomic<cpu_mask_t> halted_cpus(0);
 
 static void halt_other_cpus(void) {
-    static volatile int halted = 0;
+    static fbl::atomic<int> halted(0);
 
-    if (atomic_swap(&halted, 1) == 0) {
+    if (halted.exchange(1) == 0) {
         // stop the other cpus
         printf("stopping other cpus\n");
         arch_mp_send_ipi(MP_IPI_TARGET_ALL_BUT_LOCAL, 0, MP_IPI_HALT);
 
+        cpu_mask_t targets = mp_get_online_mask() & ~cpu_num_to_mask(arch_curr_cpu_num());
         // spin for a while
         // TODO: find a better way to spin at this low level
         for (volatile int i = 0; i < 100000000; i++) {
+            if (halted_cpus.load() == targets) {
+                break;
+            }
             __asm volatile("nop");
         }
+
+        // Don't send an INIT IPI to the BSP, since that may cause the system to
+        // reboot
+        x86_force_halt_all_but_local_and_bsp();
     }
 }
 
 void platform_halt_cpu(void) {
-    apic_send_self_ipi(0x00, DELIVERY_MODE_INIT);
+    // Signal that this CPU is in its halt loop
+    halted_cpus.fetch_or(cpu_num_to_mask(arch_curr_cpu_num()));
 }
 
 void platform_panic_start(void) {
     platform_debug_panic_start();
     arch_disable_ints();
 
-    if (atomic_swap(&panic_started, 1) == 0) {
+    static fbl::atomic<int> panic_started(0);
+    if (panic_started.exchange(1) == 0) {
 #if WITH_LIB_DEBUGLOG
         dlog_bluescreen_init();
 #endif