blob: 5a00b73f5742eade0e0001831cd2f7efa62993b5 [file] [log] [blame]
// Copyright 2020 The Fuchsia Authors
//
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file or at
// https://opensource.org/licenses/MIT
#include <assert.h>
#include <debug.h>
#include <platform.h>
#include <stdio.h>
#include <stdlib.h>
#include <kernel/lockdep.h>
#include <kernel/spinlock.h>
#include <kernel/timer.h>
#include <ktl/limits.h>
#include <platform/crashlog.h>
#include <ram-crashlog/ram-crashlog.h>
#include <vm/physmap.h>
namespace {
void* ram_crashlog_vaddr;
size_t ram_crashlog_size;
recovered_ram_crashlog_t recovered_log;
zx_status_t log_recovery_result = ZX_ERR_INTERNAL;
DECLARE_SINGLETON_SPINLOCK(uptime_updater_lock);
Timer uptime_updater_timer TA_GUARDED(uptime_updater_lock::Get());
bool uptime_updater_enabled TA_GUARDED(uptime_updater_lock::Get()) = false;
// Make sure we print the crashlog status to the klog only once, no matter how
// many times recover_crashlog is called.
ktl::atomic<bool> crashlog_status_printed_to_klog{false};
inline bool should_print_crashlog_status() {
bool expected = false;
return crashlog_status_printed_to_klog.compare_exchange_strong(expected, true);
}
void default_platform_stow_crashlog(zircon_crash_reason_t reason, const void* log, size_t len) {
// We are not going to store more than 4GB of payload. That is just not happening.
if (len > ktl::numeric_limits<uint32_t>::max()) {
len = ktl::numeric_limits<uint32_t>::max();
}
// The RAM crashlog library will gracefully handle a nullptr or 0 length here;
// no need to explicitly check that they are valid.
ram_crashlog_stow(ram_crashlog_vaddr, ram_crashlog_size, log, static_cast<uint32_t>(len), reason,
current_time());
}
size_t default_platform_recover_crashlog(size_t len, void* cookie,
void (*func)(const void* data, size_t off, size_t len,
void* cookie)) {
// If we failed to recover any crashlog, simply report the size as 0.
ZbiHwRebootReason hw_reason = platform_hw_reboot_reason();
const char* str_hw_reason;
char str_hw_reason_buf[16];
switch (hw_reason) {
case ZbiHwRebootReason::Undefined:
str_hw_reason = "UNKNOWN";
break;
case ZbiHwRebootReason::Cold:
str_hw_reason = "COLD BOOT";
break;
case ZbiHwRebootReason::Warm:
str_hw_reason = "WARM BOOT";
break;
case ZbiHwRebootReason::Brownout:
str_hw_reason = "BROWNOUT";
break;
case ZbiHwRebootReason::Watchdog:
str_hw_reason = "HW WATCHDOG";
break;
default:
snprintf(str_hw_reason_buf, sizeof(str_hw_reason_buf), "0x%08x",
static_cast<uint32_t>(hw_reason));
str_hw_reason = str_hw_reason_buf;
break;
}
if (log_recovery_result != ZX_OK) {
// Do not bother to log any recovery errors if the log was "corrupt", and we
// either don't know the HW reboot reason, or we know that the reason is a
// cold boot. We don't expect to recover any log during a cold boot, and
// systems which do not report a HW reboot reason via the ZBI will always
// just tell us "unknown".
if (should_print_crashlog_status()) {
if (!((log_recovery_result == ZX_ERR_IO_DATA_INTEGRITY) &&
((hw_reason == ZbiHwRebootReason::Undefined) ||
(hw_reason == ZbiHwRebootReason::Cold)))) {
printf("Crashlog: Failed to recover crashlog. Result %d, HW Reboot Reason %s\n",
log_recovery_result, str_hw_reason);
}
}
return 0;
}
// OK, we have a log. Render the "preamble" of the log into a local stack
// buffer as part of computing the final size. Currently, the log is expected
// to be nothing but text, so we need to take the structured information we
// have access to and put it into string form. This includes:
// 1) The uptime estimate
// 2) The "software" reboot reason.
// 3) The "hardware" reboot reason (only if given to us by the bootloader).
// 4) The payload damage indicator (only if there was potential damage to the
// payload)
//
// The first few lines of text need to be structured so that they can be
// understood by the crash-log harvester up in userland. Right now, this is
// just a loose convention. Someday, it would be good to pass this data in a
// much more structured form.
const recovered_ram_crashlog_t& rlog = recovered_log;
const char* str_reason;
char preamble[256];
size_t offset = 0;
switch (rlog.reason) {
case ZirconCrashReason::Unknown:
// If we rebooted spontaneously, check to see if we have some more details
// provided by way of the bootloader and the HW reboot reason register.
switch (hw_reason) {
case ZbiHwRebootReason::Brownout:
case ZbiHwRebootReason::Watchdog:
str_reason = str_hw_reason;
break;
default:
str_reason = "UNKNOWN";
break;
}
break;
case ZirconCrashReason::Oom:
str_reason = "OOM";
break;
case ZirconCrashReason::Panic:
str_reason = "KERNEL PANIC";
break;
case ZirconCrashReason::SoftwareWatchdog:
str_reason = "SW WATCHDOG";
break;
case ZirconCrashReason::NoCrash:
str_reason = "NO CRASH";
break;
default:
str_reason = nullptr;
break;
}
if (should_print_crashlog_status()) {
// Provide some basic details about the crashlog we recovered in the kernel
// log. This can assist in debugging failure in CI/CQ where we might have
// access to serial logs, but nothing else.
int64_t uptime_msec = rlog.uptime / ZX_MSEC(1);
if (rlog.reason == ZirconCrashReason::NoCrash) {
printf("Crashlog: Clean reboot. Uptime (%" PRId64 ".%03" PRId64 " sec) HW Reason \"%s\"\n",
uptime_msec / 1000, uptime_msec % 1000, str_hw_reason);
} else {
printf("Crashlog: Uptime (%" PRId64 ".%03" PRId64
" sec) SW Reason \"%s\" HW Reason \"%s\" Payload %s PLen %u\n",
uptime_msec / 1000, uptime_msec % 1000, str_reason, str_hw_reason,
rlog.payload_valid ? "valid" : "invalid", rlog.payload_len);
}
}
// First line must give the reboot reason, and be followed by two newlines.
DEBUG_ASSERT(offset <= sizeof(preamble));
offset += snprintf(preamble + offset, sizeof(preamble) - offset, "ZIRCON REBOOT REASON (%s)\n\n",
str_reason);
// Uptime estimate comes next with a newline between the tag and the actual number
DEBUG_ASSERT(offset <= sizeof(preamble));
offset += snprintf(preamble + offset, sizeof(preamble) - offset, "UPTIME (ms)\n%ld\n",
rlog.uptime / ZX_MSEC(1));
// After this, we are basically just free form text.
DEBUG_ASSERT(offset <= sizeof(preamble));
if (str_hw_reason != nullptr) {
offset += snprintf(preamble + offset, sizeof(preamble) - offset, "HW REBOOT REASON (%s)\n",
str_hw_reason);
} else {
offset += snprintf(preamble + offset, sizeof(preamble) - offset, "HW REBOOT REASON (0x%08x)\n",
static_cast<uint32_t>(hw_reason));
}
if (rlog.payload_valid == false) {
DEBUG_ASSERT(offset <= sizeof(preamble));
offset +=
snprintf(preamble + offset, sizeof(preamble) - offset,
"WARNING - The following crashlog payload failed length/CRC sanity checks and may "
"contain errors!\n");
}
// If the user passed us a length of zero, then they just want us to tell them
// the size of a rendered log. Don't make any callbacks if this is the case.
if (len != 0) {
DEBUG_ASSERT(offset <= sizeof(preamble));
func(preamble, 0, offset, cookie);
if (rlog.payload && rlog.payload_len) {
func(rlog.payload, offset, rlog.payload_len, cookie);
}
}
// Report the total length.
return offset + rlog.payload_len;
}
void update_uptime_locked() TA_REQ(uptime_updater_lock::Get()) {
if (uptime_updater_enabled) {
constexpr zx_duration_t kDefaultUpdateInterval = ZX_SEC(1);
default_platform_stow_crashlog(ZirconCrashReason::Unknown, nullptr, 0);
Deadline next_update_time =
Deadline::after(kDefaultUpdateInterval, {kDefaultUpdateInterval / 2, TIMER_SLACK_CENTER});
uptime_updater_timer.Set(
next_update_time,
[](Timer*, zx_time_t now, void* arg) {
Guard<SpinLock, IrqSave> guard{uptime_updater_lock::Get()};
update_uptime_locked();
},
nullptr);
}
}
void default_platform_enable_crashlog_uptime_updates(bool enabled) {
// Can't enable something we don't have.
enabled = enabled && platform_has_ram_crashlog();
{
Guard<SpinLock, IrqSave> guard{uptime_updater_lock::Get()};
if (uptime_updater_enabled != enabled) {
uptime_updater_enabled = enabled;
if (uptime_updater_enabled) {
update_uptime_locked();
} else {
uptime_updater_timer.Cancel();
}
}
}
}
} // namespace
void (*platform_stow_crashlog)(zircon_crash_reason_t reason, const void* log,
size_t len) = default_platform_stow_crashlog;
size_t (*platform_recover_crashlog)(size_t len, void* cookie,
void (*func)(const void* data, size_t off, size_t len,
void* cookie)) = default_platform_recover_crashlog;
void (*platform_enable_crashlog_uptime_updates)(bool enabled) =
default_platform_enable_crashlog_uptime_updates;
void platform_set_ram_crashlog_location(paddr_t phys, size_t len) {
if (phys && len) {
ram_crashlog_vaddr = paddr_to_physmap(phys);
ram_crashlog_size = len;
// Go ahead and "recover" the log right now. All this will do is verify the
// various CRCs and extract the results if everything checks out. We don't
// want to do this more than once.
log_recovery_result =
ram_crashlog_recover(ram_crashlog_vaddr, ram_crashlog_size, &recovered_log);
}
}
bool platform_has_ram_crashlog() { return ram_crashlog_vaddr && ram_crashlog_size; }