blob: d04c555f1bbef06c7d57bddfb9c15af46c342efb [file] [log] [blame] [edit]
// Copyright 2016 The Fuchsia Authors
//
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file or at
// https://opensource.org/licenses/MIT
#include <debug.h>
#include <lib/boot-options/boot-options.h>
#include <lib/fit/defer.h>
#include <lib/fxt/fields.h>
#include <lib/fxt/interned_category.h>
#include <lib/fxt/interned_string.h>
#include <lib/ktrace.h>
#include <lib/syscalls/zx-syscall-numbers.h>
#include <lib/zircon-internal/thread_annotations.h>
#include <platform.h>
#include <string.h>
#include <zircon/errors.h>
#include <zircon/types.h>
#include <arch/ops.h>
#include <arch/user_copy.h>
#include <fbl/alloc_checker.h>
#include <hypervisor/ktrace.h>
#include <kernel/koid.h>
#include <kernel/mp.h>
#include <ktl/atomic.h>
#include <ktl/iterator.h>
#include <lk/init.h>
#include <object/thread_dispatcher.h>
#include <vm/fault.h>
#include <vm/vm_aspace.h>
#include <ktl/enforce.h>
namespace {
using fxt::operator""_category;
struct CategoryEntry {
uint32_t index;
const fxt::InternedCategory& category;
};
const CategoryEntry kCategories[] = {
{KTRACE_GRP_META_BIT, "kernel:meta"_category},
{KTRACE_GRP_MEMORY_BIT, "kernel:memory"_category},
{KTRACE_GRP_SCHEDULER_BIT, "kernel:sched"_category},
{KTRACE_GRP_TASKS_BIT, "kernel:tasks"_category},
{KTRACE_GRP_IPC_BIT, "kernel:ipc"_category},
{KTRACE_GRP_IRQ_BIT, "kernel:irq"_category},
{KTRACE_GRP_PROBE_BIT, "kernel:probe"_category},
{KTRACE_GRP_ARCH_BIT, "kernel:arch"_category},
{KTRACE_GRP_SYSCALL_BIT, "kernel:syscall"_category},
{KTRACE_GRP_VM_BIT, "kernel:vm"_category},
{KTRACE_GRP_RESTRICTED_BIT, "kernel:restricted"_category},
};
void SetupCategoryBits() {
for (const CategoryEntry& entry : kCategories) {
if (entry.category.index() == fxt::InternedCategory::kInvalidIndex) {
entry.category.SetIndex(entry.index);
} else {
dprintf(INFO, "Found category \"%s\" already initialized to 0x%04x!\n",
entry.category.string(), (1u << entry.category.index()));
}
}
// If debug assertions are enabled, validate that all interned categories have been initialized.
if constexpr (DEBUG_ASSERT_IMPLEMENTED) {
for (const fxt::InternedCategory& category : fxt::InternedCategory::Iterate()) {
DEBUG_ASSERT_MSG(category.index() != fxt::InternedCategory::kInvalidIndex,
"Interned category %s was not initialized\n", category.string());
}
}
}
} // namespace
KTrace KTrace::instance_;
ktl::byte* KTrace::KernelAspaceAllocator::Allocate(uint32_t size) {
VmAspace* kaspace = VmAspace::kernel_aspace();
char name[32] = "ktrace-percpu-buffer";
void* ptr;
const zx_status_t status = kaspace->Alloc(name, size, &ptr, 0, VmAspace::VMM_FLAG_COMMIT,
ARCH_MMU_FLAG_PERM_READ | ARCH_MMU_FLAG_PERM_WRITE);
if (status != ZX_OK) {
return nullptr;
}
return static_cast<ktl::byte*>(ptr);
}
void KTrace::KernelAspaceAllocator::Free(ktl::byte* ptr) {
if (ptr != nullptr) {
VmAspace* kaspace = VmAspace::kernel_aspace();
kaspace->FreeRegion(reinterpret_cast<vaddr_t>(ptr));
}
}
zx_status_t KTrace::Allocate() {
if (percpu_buffers_) {
return ZX_OK;
}
// The number of buffers to initialize and their size should be set before this method is called.
DEBUG_ASSERT(num_buffers_ != 0);
DEBUG_ASSERT(buffer_size_ != 0);
// Allocate the per-CPU SPSC buffer data structures.
// Initially, store the unique pointer in a local variable. This will ensure that the buffers are
// destructed upon initialization below.
fbl::AllocChecker ac;
ktl::unique_ptr buffers = ktl::make_unique<PerCpuBuffer[]>(&ac, num_buffers_);
if (!ac.check()) {
return ZX_ERR_NO_MEMORY;
}
// Initialize each per-CPU buffer by allocating the storage used to back it.
for (uint32_t i = 0; i < num_buffers_; i++) {
const zx_status_t status = buffers[i].Init(buffer_size_);
if (status != ZX_OK) {
// Any allocated buffers will be destructed when we return.
DiagsPrintf(INFO, "ktrace: cannot alloc buffer %u: %d\n", i, status);
return ZX_ERR_NO_MEMORY;
}
}
// Take ownership of the newly created per-CPU buffers.
percpu_buffers_ = ktl::move(buffers);
return ZX_OK;
}
zx::result<KTrace::Reservation> KTrace::Reserve(uint64_t header) {
DEBUG_ASSERT(arch_ints_disabled());
// Compute the number of bytes we need to reserve from the provided fxt header.
const uint32_t num_words = fxt::RecordFields::RecordSize::Get<uint32_t>(header);
const uint32_t num_bytes = num_words * sizeof(uint64_t);
// If writes are disabled, then return an error. We return ZX_ERR_BAD_STATE, because this means
// that tracing was disabled.
//
// It is valid for writes to be disabled immediately after this check. This is ok because Stop,
// which disables writes, will follow up with an IPI to all cores and wait for those IPIs to
// return. Because interrupts have been disabled prior to this check, that IPI will not return
// until this write operation is complete.
if (!WritesEnabled()) {
return zx::error(ZX_ERR_BAD_STATE);
}
// Check which CPU we're running on and Reserve a slot in the appropriate SPSC buffer.
const cpu_num_t cpu_num = arch_curr_cpu_num();
DEBUG_ASSERT(percpu_buffers_ != nullptr);
zx::result<PerCpuBuffer::Reservation> result = percpu_buffers_[cpu_num].Reserve(num_bytes);
if (result.is_error()) {
return result.take_error();
}
Reservation res(ktl::move(result.value()), header);
return zx::ok(ktl::move(res));
}
void KTrace::ReportMetadata() {
// Emit the FXT metadata records. These must be emitted on the boot CPU to ensure that they
// are read at the very beginning of the trace.
auto emit_starting_records = [](void* arg) {
DEBUG_ASSERT(arch_ints_disabled());
// Emit the magic and initialization records.
KTrace* ktrace = static_cast<KTrace*>(arg);
zx_status_t status = fxt::WriteMagicNumberRecord(ktrace);
DEBUG_ASSERT(status == ZX_OK);
status = fxt::WriteInitializationRecord(ktrace, ticks_per_second());
DEBUG_ASSERT(status == ZX_OK);
// Emit strings needed to improve readability, such as syscall names, to the trace buffer.
for (const fxt::InternedString& interned_string : fxt::InternedString::Iterate()) {
fxt::WriteStringRecord(
ktrace, interned_string.id(), interned_string.string(),
strnlen(interned_string.string(), fxt::InternedString::kMaxStringLength));
}
// Emit the KOIDs of each CPU to the trace buffer.
const uint32_t max_cpus = arch_max_num_cpus();
char name[32];
for (uint32_t i = 0; i < max_cpus; i++) {
snprintf(name, sizeof(name), "cpu-%u", i);
fxt::WriteKernelObjectRecord(ktrace, fxt::Koid(ktrace->cpu_context_map_.GetCpuKoid(i)),
ZX_OBJ_TYPE_THREAD, fxt::StringRef{name},
fxt::Argument{"process"_intern, kNoProcess});
}
};
const cpu_mask_t target_mask = cpu_num_to_mask(BOOT_CPU_ID);
mp_sync_exec(mp_ipi_target::MASK, target_mask, emit_starting_records, &GetInstance());
// Emit the names of all live processes and threads to the trace buffer. Note that these records
// will be inserted into the buffer associated with the CPU we're running on, which may not be
// the boot CPU. Fortunately for us, these process and thread names, unlike the other metadata
// records, do not need to exist before any records that reference them are emitted.
ktrace_report_live_processes();
ktrace_report_live_threads();
}
zx_status_t KTrace::Start(uint32_t, uint32_t categories) {
// Allocate the buffers. This will be a no-op if the buffers are already initialized.
if (zx_status_t status = Allocate(); status != ZX_OK) {
return status;
}
// If writes are already enabled, then a trace session is already in progress and all we need to
// do is set the categories bitmask and return.
if (WritesEnabled()) {
set_categories_bitmask(categories);
return ZX_OK;
}
// Otherwise, enable writes.
EnableWrites();
// Report static metadata before setting the categories bitmask.
// These metadata records must be emitted before we enable arbitrary categories, otherwise generic
// trace records may fill up the buffer and cause these metadata records to be dropped, which
// could make the trace unreadable.
ReportMetadata();
set_categories_bitmask(categories);
DiagsPrintf(INFO, "Enabled category mask: 0x%03x\n", categories);
DiagsPrintf(INFO, "Trace category states:\n");
for (const fxt::InternedCategory& category : fxt::InternedCategory::Iterate()) {
DiagsPrintf(INFO, " %-20s : 0x%03x : %s\n", category.string(), (1u << category.index()),
IsCategoryEnabled(category) ? "enabled" : "disabled");
}
return ZX_OK;
}
void KTrace::Init(uint32_t bufsize, uint32_t initial_grpmask) {
Guard<Mutex> guard{&lock_};
ASSERT_MSG(buffer_size_ == 0, "KTrace::Init called twice");
// Allocate the KOIDs used to annotate CPU trace records.
cpu_context_map_.Init();
// Compute the per-CPU buffer size, ensuring that the resulting value is a power of two.
num_buffers_ = arch_max_num_cpus();
const uint32_t raw_percpu_bufsize = bufsize / num_buffers_;
DEBUG_ASSERT(raw_percpu_bufsize > 0);
const int leading_zeros = __builtin_clz(raw_percpu_bufsize);
buffer_size_ = 1u << (31 - leading_zeros);
// If the initial_grpmask was zero, then we can delay allocation of the KTrace buffer.
if (initial_grpmask == 0) {
return;
}
// Otherwise, begin tracing immediately.
Start(KTRACE_ACTION_START, initial_grpmask);
}
zx_status_t KTrace::Stop() {
// Calling Stop on an uninitialized KTrace buffer is a no-op.
if (!percpu_buffers_) {
return ZX_OK;
}
// Clear the categories bitmask and disable writes. This prevents any new writes from starting.
set_categories_bitmask(0u);
DisableWrites();
// Wait for any in-progress writes to complete and emit any dropped record statistics.
// We accomplish this by sending an IPI to all cores that instructs them to EmitDropStats.
// Since writes run with interrupts disabled, the mere fact that a core is able to process this
// IPI means that it is not performing any other concurrent writes. Additionally, mp_sync_exec
// issues a memory barrier that ensures that every other core will see that writes are disabled
// after processing the IPI.
auto emit_drop_stats = [](void* arg) {
const cpu_num_t curr_cpu = arch_curr_cpu_num();
PerCpuBuffer* percpu_buffers = static_cast<PerCpuBuffer*>(arg);
PerCpuBuffer& curr_cpu_buffer = percpu_buffers[curr_cpu];
// We do not require that this call succeeds. If the trace buffer still doesn't have enough
// space to contain the dropped record statistics, this will fail, but there's not much we can
// do about that.
curr_cpu_buffer.EmitDropStats();
};
mp_sync_exec(mp_ipi_target::ALL, 0, emit_drop_stats, percpu_buffers_.get());
return ZX_OK;
}
zx_status_t KTrace::Rewind() {
// Calling Rewind on an uninitialized KTrace buffer is a no-op.
if (!percpu_buffers_) {
return ZX_OK;
}
// Rewind calls Drain on each per-CPU buffer. As mentioned in the doc comments of that method,
// it is invalid to call Drain concurrently with a Read, and the method is only guaranteed to
// fully empty the buffer if there are no concurrent Write operations. We ensure that these
// prerequisites are met by:
// 1. Holding the lock_, ensuring that there can be no other readers.
// 2. Ensuring writes are disabled to prevent any future writes from starting.
// 3. Performing the Drain within an IPI on each core, ensuring that this operation does not
// race with any in-progress writes.
// Rewind also resets the dropped record statistics on every buffer to prepare for the next
// tracing session.
DisableWrites();
auto run_drain = [](void* arg) {
const cpu_num_t curr_cpu = arch_curr_cpu_num();
PerCpuBuffer* percpu_buffers = static_cast<PerCpuBuffer*>(arg);
PerCpuBuffer& curr_cpu_buffer = percpu_buffers[curr_cpu];
curr_cpu_buffer.Drain();
curr_cpu_buffer.ResetDropStats();
};
mp_sync_exec(mp_ipi_target::ALL, 0, run_drain, percpu_buffers_.get());
return ZX_OK;
}
zx::result<size_t> KTrace::ReadUser(user_out_ptr<void> ptr, uint32_t offset, size_t len) {
// Reads must be serialized with respect to all other non-write operations.
Guard<Mutex> guard{&lock_};
// If the passed in ptr is nullptr, then return the buffer size needed to read all of the
// per-CPU buffers' contents.
if (!ptr) {
return zx::ok(buffer_size_ * num_buffers_);
}
// If the per-CPU buffers have not been initialized, there's nothing to do, so return early.
if (!percpu_buffers_) {
return zx::ok(0);
}
// Eventually, this should support users passing in buffers smaller than the sum of the size of
// all per-CPU buffers, but for now we do not allow this.
if (len < (buffer_size_ * num_buffers_)) {
return zx::error(ZX_ERR_INVALID_ARGS);
}
// Iterate through each per-CPU buffer and read its contents.
size_t bytes_read = 0;
user_out_ptr<ktl::byte> byte_ptr = ptr.reinterpret<ktl::byte>();
auto copy_fn = [&](uint32_t byte_offset, ktl::span<ktl::byte> src) {
// This is safe to do while holding the lock_ because the KTrace lock is a leaf lock that is
// not acquired during the course of a page fault.
zx_status_t status = ZX_ERR_BAD_STATE;
guard.CallUntracked([&]() {
// Compute the destination address for this segment.
user_out_ptr out_ptr = byte_ptr.byte_offset(bytes_read + byte_offset);
// Prepare the destination range of this segment to improve efficiency by
// coalescing potential page faults into a bulk operation.
// TODO(eieio): This could be improved further by restructuring the copy
// out operation so that the full destination range can be determined and
// soft-faulted in a single operation.
Thread::Current::SoftFaultInRange(reinterpret_cast<vaddr_t>(out_ptr.get()),
VMM_PF_FLAG_USER | VMM_PF_FLAG_WRITE, src.size());
// Copy the trace data to the user segment.
status = out_ptr.copy_array_to_user(src.data(), src.size());
});
return status;
};
for (uint32_t i = 0; i < num_buffers_; i++) {
const zx::result<size_t> result = percpu_buffers_[i].Read(copy_fn, static_cast<uint32_t>(len));
if (result.is_error()) {
DiagsPrintf(INFO, "failed to copy out ktrace data: %d\n", result.status_value());
// If we copied some data from a previous buffer, we have to return the fact that we did so
// here. Otherwise, that data will be lost.
if (bytes_read != 0) {
return zx::ok(bytes_read);
}
// Otherwise, return the error.
return zx::error(result.status_value());
}
bytes_read += result.value();
}
return zx::ok(bytes_read);
}
void KTrace::InitHook(unsigned) {
const uint32_t bufsize = gBootOptions->ktrace_bufsize << 20;
const uint32_t initial_grpmask = gBootOptions->ktrace_grpmask;
dprintf(INFO, "ktrace_init: bufsize=%u grpmask=%x\n", bufsize, initial_grpmask);
if (!bufsize) {
dprintf(INFO, "ktrace: disabled\n");
return;
}
// Coerce the category ids to match the pre-defined bit mappings of aged ktrace interface.
// TODO(eieio): Remove this when kernel migrates to IOB-based tracing with extensible categories.
SetupCategoryBits();
dprintf(INFO, "Trace categories: \n");
for (const fxt::InternedCategory& category : fxt::InternedCategory::Iterate()) {
dprintf(INFO, " %-20s : 0x%03x\n", category.string(), (1u << category.index()));
}
if (!initial_grpmask) {
dprintf(INFO, "ktrace: delaying buffer allocation\n");
}
// Initialize the singleton data structures.
GetInstance().Init(bufsize, initial_grpmask);
}
// Finish initialization before starting userspace (i.e. before debug syscalls can occur).
LK_INIT_HOOK(ktrace, KTrace::InitHook, LK_INIT_LEVEL_USER - 1)