zircon/kernel/lib/lockup_detector/lockup_detector.cc - fuchsia - Git at Google

 // Copyright 2020 The Fuchsia Authors
 //
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file or at
 // https://opensource.org/licenses/MIT

 #include "lib/lockup_detector.h"

 #include <inttypes.h>
 #include <lib/affine/ratio.h>
 #include <lib/boot-options/boot-options.h>
 #include <lib/console.h>
 #include <lib/counters.h>
 #include <lib/crashlog.h>
 #include <lib/lockup_detector/inline_impl.h>
 #include <lib/lockup_detector/state.h>
 #include <lib/relaxed_atomic.h>
 #include <lib/version.h>
 #include <lib/zircon-internal/macros.h>
 #include <platform.h>
 #include <zircon/time.h>

 #include <dev/hw_watchdog.h>
 #include <fbl/algorithm.h>
 #include <kernel/auto_preempt_disabler.h>
 #include <kernel/mp.h>
 #include <kernel/percpu.h>
 #include <kernel/thread.h>
 #include <ktl/array.h>
 #include <ktl/atomic.h>
 #include <ktl/iterator.h>
 #include <object/process_dispatcher.h>
 #include <object/thread_dispatcher.h>

 #if defined(__aarch64__)
 #include <arch/arm64/dap.h>
 #endif

 // Counter for the number of lockups detected.
 KCOUNTER(counter_lockup_cs_count, "lockup_detector.critical_section.count")

 // Counters for number of lockups exceeding a given duration.
 KCOUNTER(counter_lockup_cs_exceeding_10ms, "lockup_detector.critical_section.exceeding_ms.10")
 KCOUNTER(counter_lockup_cs_exceeding_1000ms, "lockup_detector.critical_section.exceeding_ms.1000")
 KCOUNTER(counter_lockup_cs_exceeding_100000ms,
          "lockup_detector.critical_section.exceeding_ms.100000")

 // Counts the number of times the lockup detector has emitted a "no heartbeat" oops.
 KCOUNTER(counter_lockup_no_heartbeat_oops, "lockup_detector.no_heartbeat_oops")

 LockupDetectorState gLockupDetectorPerCpuState[SMP_MAX_CPUS];

 namespace {

 enum class FailureSeverity { Oops, Fatal };

 inline zx_duration_t TicksToDuration(zx_ticks_t ticks) {
   return platform_get_ticks_to_time_ratio().Scale(ticks);
 }

 inline zx_ticks_t DurationToTicks(zx_duration_t duration) {
   return platform_get_ticks_to_time_ratio().Inverse().Scale(duration);
 }

 #if defined(__aarch64__)
 void DumpRegistersAndBacktrace(cpu_num_t cpu, FILE* output_target) {
   arm64_dap_processor_state state;
   zx_status_t result = arm64_dap_read_processor_state(cpu, &state);

   if (result != ZX_OK) {
     fprintf(output_target, "Failed to read DAP state (res %d)\n", result);
     return;
   }

   fprintf(output_target, "DAP state:\n");
   state.Dump(output_target);
   fprintf(output_target, "\n");

   if constexpr (__has_feature(shadow_call_stack)) {
     constexpr const char* bt_fmt = "{{{bt:%u:%p}}}\n";
     uint32_t n = 0;

     // Don't attempt to do any backtracking unless this looks like the thread is
     // in the kernel right now.  The PC might be completely bogus, but even if
     // it is in a legit user mode process, I'm not sure of a good way to print
     // the symbolizer context for that process, or to figure out if the process
     // is using a shadow call stack or not.
     if (state.get_el_level() != 1u) {
       fprintf(output_target, "Skipping backtrace, CPU-%u EL is %u, not 1\n", cpu,
               state.get_el_level());
       return;
     }

     // Print the symbolizer context, and then the PC as frame 0's address, and
     // the LR as frame 1's address.
     PrintSymbolizerContext(output_target);
     fprintf(output_target, bt_fmt, n++, reinterpret_cast<void*>(state.pc));
     fprintf(output_target, bt_fmt, n++, reinterpret_cast<void*>(state.r[30]));

     constexpr size_t PtrSize = sizeof(void*);
     uintptr_t ret_addr_ptr = state.r[18];
     if (ret_addr_ptr & (PtrSize - 1)) {
       fprintf(output_target, "Halting backtrace, x18 (0x%" PRIu64 ") is not %zu byte aligned.\n",
               ret_addr_ptr, PtrSize);
       return;
     }

     constexpr uint32_t MAX_BACKTRACE = 32;
     for (; n < MAX_BACKTRACE; ++n) {
       // Attempt to back up one level.  Never cross a page boundary when we do this.
       static_assert(fbl::is_pow2(static_cast<uint64_t>(PAGE_SIZE)),
                     "PAGE_SIZE is not a power of 2!  Wut??");
       if ((ret_addr_ptr & (PAGE_SIZE - 1)) == 0) {
         break;
       }

       ret_addr_ptr -= PtrSize;

       // Print out this level
       fprintf(output_target, bt_fmt, n, reinterpret_cast<void**>(ret_addr_ptr)[0]);
     }
   }
 }
 #elif defined(__x86_64__)
 void DumpRegistersAndBacktrace(cpu_num_t cpu, FILE* output_target) {
   fprintf(output_target, "Regs and Backtrace unavailable for CPU-%u on x64\n", cpu);
 }
 #else
 #error "Unknown architecture! Neither __aarch64__ nor __x86_64__ are defined"
 #endif

 void DumpCommonDiagnostics(cpu_num_t cpu, FILE* output_target, FailureSeverity severity) {
   DEBUG_ASSERT(arch_ints_disabled());

   auto& percpu = percpu::Get(cpu);
   fprintf(output_target, "timer_ints: %lu\n", percpu.stats.timer_ints);

   if (ThreadLock::Get()->lock().HolderCpu() == cpu) {
     fprintf(output_target,
             "thread lock is held by cpu %u, skipping thread and scheduler diagnostics\n", cpu);
     return;
   }

   Guard<MonitoredSpinLock, IrqSave> thread_lock_guard{ThreadLock::Get(), SOURCE_TAG};
   percpu.scheduler.Dump(output_target);
   Thread* thread = percpu.scheduler.active_thread();
   if (thread != nullptr) {
     fprintf(output_target, "thread: pid=%lu tid=%lu\n", thread->pid(), thread->tid());
     ThreadDispatcher* user_thread = thread->user_thread();
     if (user_thread != nullptr) {
       ProcessDispatcher* process = user_thread->process();
       char name[ZX_MAX_NAME_LEN]{};
       process->get_name(name);
       fprintf(output_target, "process: name=%s\n", name);
     }
   }

   if (severity == FailureSeverity::Fatal) {
     fprintf(output_target, "\n");
     DumpRegistersAndBacktrace(cpu, output_target);
   }
 }

 class TA_CAP("mutex") FatalConditionReporterRole {
  public:
   FatalConditionReporterRole() = default;
   FatalConditionReporterRole(const FatalConditionReporterRole&) = delete;
   FatalConditionReporterRole(FatalConditionReporterRole&&) = delete;
   FatalConditionReporterRole& operator=(const FatalConditionReporterRole&) = delete;
   FatalConditionReporterRole& operator=(FatalConditionReporterRole&&) = delete;

   bool Acquire() TA_TRY_ACQ(true) {
     // A fatal condition has been observed and we are on the road to rebooting.
     // Attempt to pet the watchdog one last time, then suppress all future pets.
     // If anything goes wrong from here on out which prevents us from reporting
     // the fatal condition, we want the HW WDT (if present) to reboot us.
     hw_watchdog_pet();
     hw_watchdog_suppress_petting(true);

     // Now that the WDT is armed, attempt to assume the role of the fatal
     // condition reporter.  If we fail, then someone else is already in the
     // process of reporting the fatal condition.  We will just leave them to
     // their task.  If they hang while attempting to write a crashlog and reboot
     // the system, the HW WDT will end up rebooting the system for them (if
     // present).
     const cpu_num_t current_cpu = arch_curr_cpu_num();
     cpu_num_t expected = INVALID_CPU;
     return reporter_id_.compare_exchange_strong(expected, current_cpu);
   }

   // No one should ever actually release the role of the fatal condition
   // reporter.  Even so, we need to have a call to "release" in place in order
   // to make the static thread analysis happy.  If we ever _do_ actually make it
   // to this function, it means that the CPU which was assigned the role of
   // fatal condition reporter failed to reboot the system for some bizarre
   // reason.  Do our best to panic the system in this case.
   void Release() TA_REL() { PANIC("Fatal condition reporter failed to reboot!"); }

  private:
   ktl::atomic<cpu_num_t> reporter_id_{INVALID_CPU};
 };

 FatalConditionReporterRole g_fatal_condition_reporter_role;

 class HeartbeatLockupChecker {
  public:
   static zx_duration_t period() { return period_; }
   static zx_duration_t threshold() { return threshold_; }
   static zx_duration_t fatal_threshold() { return fatal_threshold_; }

   static void InitStaticParams() {
     period_ = ZX_MSEC(gBootOptions->lockup_detector_heartbeat_period_ms);
     threshold_ = ZX_MSEC(gBootOptions->lockup_detector_heartbeat_age_threshold_ms);
     fatal_threshold_ = ZX_MSEC(gBootOptions->lockup_detector_age_fatal_threshold_ms);
   }

   // TODO(johngro): once state->current_checker_id becomes a more formal
   // spinlock, come back here and TA_REQ(state.current_checker_id)
   static void PerformCheck(LockupDetectorState& state, cpu_num_t cpu, zx_time_t now_mono);

  private:
   HeartbeatLockupChecker() = default;

   // Note that the following static parameters are non-atomic because they are
   // currently setup only once by the primary CPU before any other CPUs have
   // started, and because they cannot change after setup. If we ever turn these
   // into dynamic properties which can be adjusted while the system is running,
   // we need to come back here and make them atomics with proper memory order
   // semantics.
   //
   // The period at which CPUs emit heartbeats.  0 means heartbeats are disabled.
   static inline zx_duration_t period_{0};

   // If a CPU's most recent heartbeat is older than this threshold, it is
   // considered to be locked up and a KERNEL OOPS will be triggered.
   static inline zx_duration_t threshold_{0};

   // If a CPU's most recent heartbeat is older than this threshold, it is
   // considered to be locked up and crashlog will be generated followed by a
   // reboot.
   static inline zx_duration_t fatal_threshold_{0};
 };

 class CriticalSectionLockupChecker {
  public:
   static zx_ticks_t threshold_ticks() { return threshold_ticks_.load(); }
   static void set_threshold_ticks(zx_ticks_t val) { threshold_ticks_.store(val); }
   static zx_ticks_t fatal_threshold_ticks() { return fatal_threshold_ticks_; }

   static bool IsEnabled() { return (threshold_ticks() > 0) || (fatal_threshold_ticks() > 0); }

   static void InitStaticParams() {
     const zx_duration_t threshold_duration =
         ZX_MSEC(gBootOptions->lockup_detector_critical_section_threshold_ms);
     threshold_ticks_.store(DurationToTicks(threshold_duration));

     const zx_duration_t fatal_threshold_duration =
         ZX_MSEC(gBootOptions->lockup_detector_critical_section_fatal_threshold_ms);
     fatal_threshold_ticks_ = DurationToTicks(fatal_threshold_duration);

     worst_case_threshold_ticks_ = DurationToTicks(counter_buckets_[0].exceeding);
   }

   static void RecordCriticalSectionBucketCounters(zx_ticks_t lockup_ticks) {
     // Fast abort if the time spent in the critical sections is less than the
     // minimum bucket threshold.
     if (lockup_ticks < worst_case_threshold_ticks_) {
       return;
     }

     zx_duration_t lockup_duration = TicksToDuration(lockup_ticks);
     for (auto iter = counter_buckets_.rbegin(); iter != counter_buckets_.rend(); iter++) {
       if (lockup_duration >= iter->exceeding) {
         kcounter_add(iter->counter, 1);
         break;
       }
     }
   }

   static void PerformCheck(LockupDetectorState& state, cpu_num_t cpu, zx_ticks_t now_ticks);

  private:
   CriticalSectionLockupChecker() = default;

   // Provides histogram-like kcounter functionality.
   struct CounterBucket {
     const zx_duration_t exceeding;
     const Counter& counter;
   };

   static constexpr const ktl::array counter_buckets_{
       CounterBucket{ZX_MSEC(10), counter_lockup_cs_exceeding_10ms},
       CounterBucket{ZX_MSEC(1000), counter_lockup_cs_exceeding_1000ms},
       CounterBucket{ZX_MSEC(100000), counter_lockup_cs_exceeding_100000ms},
   };

   // These thresholds control whether critical section checking is enabled and how long is "too
   // long".
   //
   // A non-zero |threshold_ticks| enables critical section checking with a non-fatal action (log,
   // but don't reboot).
   //
   // A non-zero |fatal_threshold_ticks| enables critical section checking with a fatal action
   // (reboot).
   //
   // These values are expressed in units of ticks rather than nanoseconds because it is faster to
   // read the platform timer's tick count than to get current_time().
   //
   // These variables are atomic because, although set early during lockup detector initialization,
   // their values may change to facilitate testing (see |lockup_set_cs_threshold_ticks|).  Use
   // relaxed operations because these fields are accessed within the critical section and must be
   // fast.
   static inline RelaxedAtomic<zx_ticks_t> threshold_ticks_{0};
   static inline RelaxedAtomic<zx_ticks_t> fatal_threshold_ticks_{0};

   static inline zx_ticks_t worst_case_threshold_ticks_{ktl::numeric_limits<zx_ticks_t>::max()};
 };

 void HeartbeatLockupChecker::PerformCheck(LockupDetectorState& state, cpu_num_t cpu,
                                           zx_time_t now_mono) {
   // If the heartbeat mechanism is currently not active for this CPU, just skip
   // all of the checks.
   auto& hb_state = state.heartbeat;
   if (!hb_state.active.load()) {
     return;
   }

   // Observe each of the details we need to know to make a determination of
   // whether or not we should report a failure.
   const zx_time_t observed_last_heartbeat = hb_state.last_heartbeat.load();
   zx_duration_t observed_age = zx_time_sub_time(now_mono, observed_last_heartbeat);
   const auto& cs_state = state.critical_section;
   // Note, we're loading name with relaxed semantics so there is nothing
   // ensuring that we see the "lastest value".  Idealy we'd use
   // memory_order_acquire when reading name and memory_order_release when
   // writing.  However, doing so has a measusrable performance impact and it's
   // crucial to minimize lockup_detector overhead.  We tolerate stale values
   // because we're only using name to help us find the point where the lockup
   // occurred.
   const char* const observed_name = cs_state.name.load(ktl::memory_order_relaxed);

   // If this is the worst gap we have ever seen, record that fact now.
   if (observed_age > hb_state.max_gap.load()) {
     hb_state.max_gap.store(observed_age);
   }

   // A shared lambda used to report errors in a consistent fashion, either to
   // just stdout, or to the stdout_panic buffer in the case that this is a fatal
   // condition.
   auto ReportFailure = [&](FailureSeverity severity) {
     FILE* output_target = (severity == FailureSeverity::Fatal) ? &stdout_panic_buffer : stdout;

     // Print an OOPS header so that we properly trigger tefmo checks, but only
     // send it to stdout.  If this a fatal failure, we don't want to waste any
     // bytes saying "OOPS" in the crashlog.  It should be pretty clear from the
     // fact that we are filing a crashlog that things went pretty seriously
     // wrong.
     KERNEL_OOPS("");
     fprintf(output_target,
             "lockup_detector: no heartbeat from CPU-%u in %" PRId64 " ms, last_heartbeat=%" PRId64
             " observed now=%" PRId64 " name=%s.\nReported by [CPU-%u] (message rate limited)\n",
             cpu, observed_age / ZX_MSEC(1), observed_last_heartbeat, now_mono,
             (observed_name ? observed_name : "unknown"), arch_curr_cpu_num());
     DumpCommonDiagnostics(cpu, output_target, severity);
   };

   // If we have a fatal threshold configured, and we have exceeded that
   // threshold, then it is time to file a crashlog and reboot the system.
   if ((fatal_threshold() > 0) && (observed_age > fatal_threshold())) {
     if (g_fatal_condition_reporter_role.Acquire()) {
       platform_panic_start();
       ReportFailure(FailureSeverity::Fatal);
       platform_halt(HALT_ACTION_REBOOT, ZirconCrashReason::SoftwareWatchdog);
       g_fatal_condition_reporter_role.Release();
     }
   }

   if ((threshold() > 0) && (observed_age > threshold()) && hb_state.alert_limiter.Ready()) {
     kcounter_add(counter_lockup_no_heartbeat_oops, 1);
     ReportFailure(FailureSeverity::Oops);
   }
 }

 void CriticalSectionLockupChecker::PerformCheck(LockupDetectorState& state, cpu_num_t cpu,
                                                 zx_ticks_t now_ticks) {
   auto& cs_state = state.critical_section;

   // Observe all of the info we need to make a decision as to whether or not there
   // has been a condition violation.
   const zx_ticks_t observed_threshold_ticks = threshold_ticks();
   // Use Acquire semantics to ensure that if we observe a previously stored |begin_ticks| value we
   // will also observe stores to other fields that were issued prior to a Release on |begin_ticks|.
   const zx_ticks_t observed_begin_ticks = cs_state.begin_ticks.load(ktl::memory_order_acquire);
   const char* const observed_name = cs_state.name.load(ktl::memory_order_relaxed);
   const zx_ticks_t observed_worst_case_ticks =
       cs_state.worst_case_ticks.load(ktl::memory_order_relaxed);

   // If observed_begin_ticks is non-zero, then the CPU we are checking is currently in a
   // critical section.  Compute how long it has been in the CS and check to see
   // if it exceeds any of our configured thresholds.
   if (observed_begin_ticks > 0) {
     const zx_ticks_t age_ticks = zx_time_sub_time(now_ticks, observed_begin_ticks);

     // A shared lambda used to report errors in a consistent fashion, either to
     // just stdout, or to the stdout_panic buffer in the case that this is a fatal
     // condition.
     auto ReportFailure = [&](FailureSeverity severity) {
       FILE* output_target = (severity == FailureSeverity::Fatal) ? &stdout_panic_buffer : stdout;

       // See the comment in HeartbeatLockupChecker::PerformCheck for an explanation of why this
       // curious empty-string OOPS is here.
       KERNEL_OOPS("");
       fprintf(output_target,
               "lockup_detector: CPU-%u in critical section for %" PRId64 " ms, threshold=%" PRId64
               " ms start=%" PRId64 " now=%" PRId64
               " name=%s.\n"
               "Reported by [CPU-%u] (message rate limited)\n",
               cpu, TicksToDuration(age_ticks) / ZX_MSEC(1),
               TicksToDuration(observed_threshold_ticks) / ZX_MSEC(1),
               TicksToDuration(observed_begin_ticks), TicksToDuration(now_ticks),
               (observed_name ? observed_name : "unknown"), arch_curr_cpu_num());

       DumpCommonDiagnostics(cpu, output_target, severity);
     };

     // Check the fatal condition first.
     if ((fatal_threshold_ticks() > 0) && (age_ticks >= fatal_threshold_ticks())) {
       if (g_fatal_condition_reporter_role.Acquire()) {
         platform_panic_start();
         ReportFailure(FailureSeverity::Fatal);
         platform_halt(HALT_ACTION_REBOOT, ZirconCrashReason::SoftwareWatchdog);
         g_fatal_condition_reporter_role.Release();
       }
     }

     // Next, check to see if our "oops" threshold was exceeded.
     if ((observed_threshold_ticks > 0) && (age_ticks >= observed_threshold_ticks)) {
       // Threshold exceeded.  Record this in the kcounters if this is the first time
       // we have seen this event, and then decide whether or not to print out an
       // oops based on our rate limiter.
       if (cs_state.last_counted_begin_ticks != observed_begin_ticks) {
         kcounter_add(counter_lockup_cs_count, 1);
         cs_state.last_counted_begin_ticks = observed_begin_ticks;
       }

       if (cs_state.ongoing_call_alert_limiter.Ready()) {
         ReportFailure(FailureSeverity::Oops);
       }
     }
   }

   // Next check to see if we have a new worst case time spent in a critical
   // section to report.
   if ((observed_worst_case_ticks > worst_case_threshold_ticks_) &&
       (observed_worst_case_ticks > cs_state.reported_worst_case_ticks) &&
       cs_state.worst_case_alert_limiter.Ready()) {
     // Remember the last worst case we reported, so we don't report it multiple
     // times.
     cs_state.reported_worst_case_ticks = observed_worst_case_ticks;

     // Now go ahead and report the new worst case.
     const zx_duration_t duration = TicksToDuration(observed_worst_case_ticks);
     printf(
         "lockup_detector: CPU-%u encountered a new worst case critical section section time of "
         "%" PRId64 " usec. Reported by [CPU-%u] (message rate limited)\n",
         cpu, duration / ZX_USEC(1), arch_curr_cpu_num());
   }
 }

 // Return an absolute deadline |duration| nanoseconds from now with a jitter of +/- |percent|%.
 Deadline DeadlineWithJitterAfter(zx_duration_t duration, uint32_t percent) {
   DEBUG_ASSERT(percent <= 100);
   const zx_duration_t delta = affine::Ratio{(rand() / 100) * percent, RAND_MAX}.Scale(duration);
   return Deadline::after(zx_duration_add_duration(duration, delta));
 }

 // Record that the current CPU is still alive by having it update its last
 // heartbeat.  Then, check all of current CPU's peers to see if they have
 // tripped any of our low level lockup detectors. This currently consists of:
 //
 // 1) The heartbeat detector (verifies that CPU timers are working)
 // 2) The critical section detector (verifies that no CPU spends too long in a
 //    critical section of code, such as an SMC call).
 //
 void DoHeartbeatAndCheckPeerCpus(Timer* timer, zx_time_t now_mono, void* arg) {
   const zx_ticks_t now_ticks = current_ticks();
   const cpu_num_t current_cpu = arch_curr_cpu_num();

   // Record that we are still alive.
   auto& checker_state = *(reinterpret_cast<LockupDetectorState*>(arg));
   checker_state.heartbeat.last_heartbeat.store(now_mono);

   // Pet the HW WDT, but only if we have a fatal heartbeat threshold
   // configured. We don't want the heartbeat checkers to be petting the dog if
   // they don't plan to reboot the system if things start to get really bad.
   if (HeartbeatLockupChecker::fatal_threshold() > 0) {
     hw_watchdog_pet();
   }

   // Now, check each of the lockup conditions for each of our peers.
   for (cpu_num_t cpu = 0; cpu < percpu::processor_count(); ++cpu) {
     if (cpu == current_cpu || !mp_is_cpu_online(cpu) || !mp_is_cpu_active(cpu)) {
       continue;
     }
     LockupDetectorState& state = gLockupDetectorPerCpuState[cpu];

     // Attempt to claim the role of the "checker" for this CPU.  If we fail to
     // do so, then another CPU is checking this CPU already, so we will just
     // skip our checks this time.  Note that this leaves a small gap in
     // detection ability.
     //
     // If the other checker has discovered no trouble and is just about to drop
     // the role of checker, but time has progressed to the point where a failure
     // would now be detected.  In this case, we would have reported the problem
     // had we been able to assume the checker role, but since it had not been
     // released yet, we will miss it.
     //
     // This gap is an acknowledged limitation.  Never stalling in these threads
     // is a more important property to maintain then having perfect gap free
     // coverage.  Presumably, some other core will check again in a short while
     // (or, we will do so ourselves next time around).
     //
     // TODO(johngro): either just replace this with a spin-try-lock, or spend
     // some time reviewing the memory order here.  CST seems like overkill, but
     // then again, checks are currently only performed once per second, so I
     // would rather be correct than fast for the time being.
     cpu_num_t expected = INVALID_CPU;
     if (state.current_checker_id.compare_exchange_strong(expected, current_cpu)) {
       // Now that we are the assigned "checker", perform the checks.  Start with
       // the CriticalSection check.  If there is a fatal condition to be
       // reported, we would rather start with the CriticalSection fatal
       // condition as it will can provide more specific details about the lockup
       // than the heartbeat checker can.
       CriticalSectionLockupChecker::PerformCheck(state, cpu, now_ticks);
       HeartbeatLockupChecker::PerformCheck(state, cpu, now_mono);

       // Next, release our role as checker for this CPU.
       state.current_checker_id.store(INVALID_CPU);
     }
   }

   // If heartbeats are still enabled for this core, schedule the next check.
   if (checker_state.heartbeat.active.load()) {
     timer->Set(Deadline::after(HeartbeatLockupChecker::period()), DoHeartbeatAndCheckPeerCpus, arg);
   }
 }

 // Stop the process of having the current CPU recording heartbeats and checking
 // in on other CPUs.
 void stop_heartbeats() {
   LockupDetectorState& state = gLockupDetectorPerCpuState[arch_curr_cpu_num()];
   state.heartbeat.active.store(false);
   percpu::GetCurrent().lockup_detector_timer.Cancel();
 }

 // Start the process of recording heartbeats and checking in on other CPUs on
 // the current CPU.
 void start_heartbeats() {
   if (HeartbeatLockupChecker::period() <= 0) {
     stop_heartbeats();
     return;
   }

   // To be safe, make sure we have a recent last heartbeat before activating.
   LockupDetectorState& state = gLockupDetectorPerCpuState[arch_curr_cpu_num()];
   auto& hb_state = state.heartbeat;
   const zx_time_t now = current_time();
   hb_state.last_heartbeat.store(now);
   hb_state.active.store(true);

   // Use a deadline with some jitter to avoid having all CPUs heartbeat at the same time.
   const Deadline deadline = DeadlineWithJitterAfter(HeartbeatLockupChecker::period(), 10);
   percpu::GetCurrent().lockup_detector_timer.Set(deadline, DoHeartbeatAndCheckPeerCpus, &state);
 }

 }  // namespace

 void lockup_primary_init() {
   // Initialize parameters for the heartbeat checks.
   HeartbeatLockupChecker::InitStaticParams();

   dprintf(INFO,
           "lockup_detector: heartbeats %s, period is %" PRId64 " ms, threshold is %" PRId64
           " ms, fatal threshold is %" PRId64 " ms\n",
           (HeartbeatLockupChecker::period() > 0) ? "enabled" : "disabled",
           HeartbeatLockupChecker::period() / ZX_MSEC(1),
           HeartbeatLockupChecker::threshold() / ZX_MSEC(1),
           HeartbeatLockupChecker::fatal_threshold() / ZX_MSEC(1));

   // Initialize parameters for the critical section checks, but only if the
   // heartbeat mechanism is enabled.  If the heartbeat mechanism is disabled, no
   // checks will ever be performed.
   //
   // TODO(johngro): relax this.  There is no strong reason to not do our
   // periodic checking if any of the check conditions are enabled.
   if constexpr (LOCKUP_CRITICAL_SECTION_ENALBED) {
     if (HeartbeatLockupChecker::period() > 0) {
       CriticalSectionLockupChecker::InitStaticParams();

       if (CriticalSectionLockupChecker::IsEnabled()) {
         dprintf(
             INFO,
             "lockup_detector: critical section threshold is %" PRId64
             " ms, fatal threshold is %" PRId64 " ms\n",
             TicksToDuration(CriticalSectionLockupChecker::threshold_ticks()) / ZX_MSEC(1),
             TicksToDuration(CriticalSectionLockupChecker::fatal_threshold_ticks()) / ZX_MSEC(1));
       } else {
         dprintf(INFO, "lockup_detector: critical section detection disabled by threshold\n");
       }
     } else {
       dprintf(
           INFO,
           "lockup_detector: critical section detection disabled because heartbeats are disabled\n");
     }
   } else {
     dprintf(INFO, "lockup_detector: critical section detection disabled by build\n");
   }

   // Kick off heartbeats on this CPU, if they are enabled.
   start_heartbeats();
 }

 void lockup_secondary_init() { start_heartbeats(); }
 void lockup_secondary_shutdown() { stop_heartbeats(); }

 // TODO(johngro): Make the definition of the various checkers available (perhaps
 // in a "lockup_detector" namespace) so that things like tests outside of this
 // translational unit can directly query stuff like this, instead of needing to
 // bound through a functions like this.
 zx_ticks_t lockup_get_cs_threshold_ticks() {
   return CriticalSectionLockupChecker::threshold_ticks();
 }
 void lockup_set_cs_threshold_ticks(zx_ticks_t val) {
   CriticalSectionLockupChecker::set_threshold_ticks(val);
 }

 void lockup_timed_begin(const char* name) {
   LockupDetectorState& state = gLockupDetectorPerCpuState[arch_curr_cpu_num()];
   const bool outermost = lockup_internal::Enter(state, name);
   if (likely(outermost)) {
     auto& cs_state = state.critical_section;
     // We're using memory_order_relaxed instead of memory_order_release to
     // minimize performance impact.  As a result, HeartbeatLockupChecker may see
     // stale name values because there is nothing for it to synchronize-with.
     // However, if CriticalSectionLockupChecker is enabled, then the begin_ticks
     // store with release semantics will ensure the CriticalSectionLockupChecker
     // sees the latest value.
     cs_state.name.store(name, ktl::memory_order_relaxed);
     if (CriticalSectionLockupChecker::IsEnabled()) {
       const zx_ticks_t now = current_ticks();
       // Use release semantics to ensure that if an observer sees this store to |begin_ticks|,
       // they will also see the stores that preceded it.
       cs_state.begin_ticks.store(now, ktl::memory_order_release);
     }
   }
 }

 void lockup_timed_end() {
   LockupDetectorState& state = gLockupDetectorPerCpuState[arch_curr_cpu_num()];
   lockup_internal::CallIfOuterAndLeave(state, [](LockupDetectorState& state) {
     // Is this a new worst for us?
     const zx_ticks_t now_ticks = current_ticks();
     auto& cs_state = state.critical_section;
     const zx_ticks_t begin = cs_state.begin_ticks.load(ktl::memory_order_relaxed);
     zx_ticks_t delta = zx_time_sub_time(now_ticks, begin);

     // Update our counters.
     CriticalSectionLockupChecker::RecordCriticalSectionBucketCounters(delta);

     if (delta > cs_state.worst_case_ticks.load(ktl::memory_order_relaxed)) {
       cs_state.worst_case_ticks.store(delta, ktl::memory_order_relaxed);
     }

     // See comment in lockup_timed_begin at the point where name is stored.
     cs_state.name.store(nullptr, ktl::memory_order_relaxed);

     // We are done with the CS now.  Clear the begin time to indicate that we are not in any
     // critical section.
     //
     // Use release semantics to ensure that if an observer sees this store to |begin_ticks|, they
     // will also see any of our previous stores.
     cs_state.begin_ticks.store(0, ktl::memory_order_release);
   });
 }

 int64_t lockup_get_critical_section_oops_count() { return counter_lockup_cs_count.Value(); }

 int64_t lockup_get_no_heartbeat_oops_count() { return counter_lockup_no_heartbeat_oops.Value(); }

 namespace {

 void lockup_status() {
   const zx_ticks_t ticks = CriticalSectionLockupChecker::threshold_ticks();
   printf("critical section threshold is %" PRId64 " ticks (%" PRId64 " ms)\n", ticks,
          TicksToDuration(ticks) / ZX_MSEC(1));
   if (ticks != 0) {
     for (cpu_num_t i = 0; i < percpu::processor_count(); i++) {
       if (!mp_is_cpu_active(i)) {
         printf("CPU-%u is not active, skipping\n", i);
         continue;
       }

       const auto& cs_state = gLockupDetectorPerCpuState[i].critical_section;
       const zx_ticks_t begin_ticks = cs_state.begin_ticks.load(ktl::memory_order_acquire);
       const char* name = cs_state.name.load(ktl::memory_order_relaxed);
       const zx_ticks_t now = current_ticks();
       const int64_t worst_case_usec =
           TicksToDuration(cs_state.worst_case_ticks.load(ktl::memory_order_relaxed)) / ZX_USEC(1);
       if (begin_ticks == 0) {
         printf("CPU-%u not in critical section (worst case %" PRId64 " uSec)\n", i,
                worst_case_usec);
       } else {
         zx_duration_t duration = TicksToDuration(now - begin_ticks);
         printf("CPU-%u in critical section (%s) for %" PRId64 " ms (worst case %" PRId64 " uSec)\n",
                i, (name != nullptr ? name : "unknown"), duration / ZX_MSEC(1), worst_case_usec);
       }
     }
   }

   printf("heartbeat period is %" PRId64 " ms, heartbeat threshold is %" PRId64 " ms\n",
          HeartbeatLockupChecker::period() / ZX_MSEC(1),
          HeartbeatLockupChecker::threshold() / ZX_MSEC(1));

   for (cpu_num_t cpu = 0; cpu < percpu::processor_count(); ++cpu) {
     if (!mp_is_cpu_online(cpu) | !mp_is_cpu_active(cpu)) {
       continue;
     }

     const auto& hb_state = gLockupDetectorPerCpuState[cpu].heartbeat;
     if (!hb_state.active.load()) {
       printf("CPU-%u heartbeats disabled\n", cpu);
       continue;
     }
     const zx_time_t last_heartbeat = hb_state.last_heartbeat.load();
     const zx_duration_t age = zx_time_sub_time(current_time(), last_heartbeat);
     const zx_duration_t max_gap = hb_state.max_gap.load();
     printf("CPU-%u last heartbeat at %" PRId64 " ms, age is %" PRId64 " ms, max gap is %" PRId64
            " ms\n",
            cpu, last_heartbeat / ZX_MSEC(1), age / ZX_MSEC(1), max_gap / ZX_MSEC(1));
   }
 }

 // Runs |func| on |cpu|, passing |duration| as an argument.
 void run_lockup_func(cpu_num_t cpu, zx_duration_t duration, thread_start_routine func) {
   Thread* t = Thread::Create("lockup-test", func, &duration, DEFAULT_PRIORITY);
   t->SetCpuAffinity(cpu_num_to_mask(cpu));
   t->Resume();
   t->Join(nullptr, ZX_TIME_INFINITE);
 }

 // Trigger a temporary lockup of |cpu| by holding a spinlock for |duration|.
 void lockup_trigger_spinlock(cpu_num_t cpu, zx_duration_t duration) {
   run_lockup_func(cpu, duration, [](void* arg) -> int {
     const zx_duration_t duration = *reinterpret_cast<zx_duration_t*>(arg);
     // Acquire a spinlock and hold it for |duration|.
     DECLARE_SINGLETON_SPINLOCK_WITH_TYPE(lockup_test_lock, MonitoredSpinLock);
     Guard<MonitoredSpinLock, IrqSave> guard{lockup_test_lock::Get(), SOURCE_TAG};
     const zx_time_t deadline = zx_time_add_duration(current_time(), duration);
     while (current_time() < deadline) {
       arch::Yield();
     }
     return 0;
   });
 }

 // Trigger a temporary lockup of |cpu| by remaining in a critical section for |duration|.
 void lockup_trigger_critical_section(cpu_num_t cpu, zx_duration_t duration) {
   run_lockup_func(cpu, duration, [](void* arg) -> int {
     const zx_duration_t duration = *reinterpret_cast<zx_duration_t*>(arg);
     AutoPreemptDisabler preempt_disable;
     LOCKUP_TIMED_BEGIN("trigger-tool");
     const zx_time_t deadline = zx_time_add_duration(current_time(), duration);
     while (current_time() < deadline) {
       arch::Yield();
     }
     LOCKUP_TIMED_END();
     return 0;
   });
 }

 int cmd_lockup(int argc, const cmd_args* argv, uint32_t flags) {
   auto usage = [cmd_name = argv[0].str]() -> int {
     printf("usage:\n");
     printf("%s status                                 : print lockup detector status\n", cmd_name);
     printf("%s test_spinlock <cpu> <num msec>         : hold spinlock on <cpu> for <num msec>\n",
            cmd_name);
     printf(
         "%s test_critical_section <cpu> <num msec> : hold critical section on <cpu> for <num "
         "msec>\n",
         cmd_name);
     return ZX_ERR_INTERNAL;
   };

   if (argc < 2) {
     printf("not enough arguments\n");
     return usage();
   }

   if (!strcmp(argv[1].str, "status")) {
     lockup_status();
   } else if (!strcmp(argv[1].str, "test_spinlock")) {
     if (argc < 4) {
       return usage();
     }
     const auto cpu = static_cast<cpu_num_t>(argv[2].u);
     const auto ms = static_cast<uint32_t>(argv[3].u);
     printf("test_spinlock: locking up CPU %u for %u ms\n", cpu, ms);
     lockup_trigger_spinlock(cpu, ZX_MSEC(ms));
     printf("done\n");
   } else if (!strcmp(argv[1].str, "test_critical_section")) {
     if (argc < 4) {
       return usage();
     }
     const auto cpu = static_cast<cpu_num_t>(argv[2].u);
     const auto ms = static_cast<uint32_t>(argv[3].u);
     printf("test_critical_section: locking up CPU %u for %u ms\n", cpu, ms);
     lockup_trigger_critical_section(cpu, ZX_MSEC(ms));
     printf("done\n");
   } else {
     printf("unknown command\n");
     return usage();
   }

   return ZX_OK;
 }

 }  // namespace

 STATIC_COMMAND_START
 STATIC_COMMAND("lockup", "lockup detector commands", &cmd_lockup)
 STATIC_COMMAND_END(lockup)