blob: af44a96766e48c25a6c5fc9cf0c14c87db6746c6 [file] [log] [blame]
// Copyright 2020 The Fuchsia Authors
//
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file or at
// https://opensource.org/licenses/MIT
#ifndef ZIRCON_KERNEL_INCLUDE_KERNEL_TASK_RUNTIME_STATS_H_
#define ZIRCON_KERNEL_INCLUDE_KERNEL_TASK_RUNTIME_STATS_H_
#include <lib/arch/intrin.h>
#include <lib/kconcurrent/copy.h>
#include <lib/kconcurrent/seqlock.h>
#include <lib/relaxed_atomic.h>
#include <zircon/syscalls/object.h>
#include <zircon/time.h>
#include <kernel/lockdep.h>
#include <kernel/scheduler_state.h>
#include <ktl/array.h>
//
// Types and utilities for efficiently accumulating and aggregating task runtime
// stats.
//
// Runtime stats are maintained at three levels: thread, process, and job.
// Threads maintain and update their runtime stats as threads change state.
// Terminating threads roll up to their owning process, and terminating
// processes roll up to their owning job, however queries of process and job
// stats require summing all of the currently running thread stats with the
// rolled up stats for terminated threads.
//
// Per-thread stats are maintained by ThreadRuntimeStats, which provides a
// sequence locked snapshot of the runtime stats with an affordance to
// compensate for unaccounted run-time/queue-time when a thread is in a runnable
// state (i.e. ready or running).
//
// Runtime stats of a thread, process, or job.
//
// Not safe for concurrent use by multiple threads.
struct TaskRuntimeStats {
// The total duration (in ticks) spent running on a CPU.
zx_ticks_t cpu_ticks = 0;
// The total duration (in ticks) spent ready to start running.
zx_ticks_t queue_ticks = 0;
// The total duration (in ticks) spent handling page faults.
zx_ticks_t page_fault_ticks = 0;
// The total duration (in ticks) spent contented on kernel locks.
zx_ticks_t lock_contention_ticks = 0;
// Adds another TaskRuntimeStats to this one.
constexpr TaskRuntimeStats& operator+=(const TaskRuntimeStats& other) {
cpu_ticks = zx_ticks_add_ticks(cpu_ticks, other.cpu_ticks);
queue_ticks = zx_ticks_add_ticks(queue_ticks, other.queue_ticks);
page_fault_ticks = zx_ticks_add_ticks(page_fault_ticks, other.page_fault_ticks);
lock_contention_ticks = zx_ticks_add_ticks(lock_contention_ticks, other.lock_contention_ticks);
return *this;
}
// Conversion to zx_info_task_runtime_t.
operator zx_info_task_runtime_t() const;
};
struct TaskRuntimeStatsTests; // fwd decl so we can be friends with our tests.
namespace task_runtime_stats::internal {
// Manages sequence locked updates and access to per-thread runtime stats.
class ThreadRuntimeStats {
static constexpr concurrent::SyncOpt kRuntimeStatsSyncType = concurrent::SyncOpt::Fence;
template <typename>
struct LockOption {};
public:
struct ThreadStats {
// When the thread entered its current state.
zx_ticks_t total_running_ticks = 0;
zx_ticks_t total_ready_ticks = 0;
zx_ticks_t state_change_ticks = 0;
thread_state current_state{THREAD_INITIAL};
};
ThreadRuntimeStats() = default;
// Update must be locked Exclusive with either IrqSave or NoIrqSave.
static constexpr LockOption<ExclusiveIrqSave> IrqSave{};
static constexpr LockOption<ExclusiveNoIrqSave> NoIrqSave{};
// Updates the ThreadStats state with the given deltas and last thread state.
template <typename ExclusiveOption>
void Update(thread_state new_state, LockOption<ExclusiveOption>) TA_EXCL(seq_lock_) {
// Enter our sequence lock and obtain a mutable reference to our payload so
// we can update the payload in-place. We are free to read the contents of
// the payload without the use of any atomics (the SeqLock behaves like a
// spinlock in this situation, ensures proper ordering and excluding other
// writes), however we need to make sure to use relaxed atomic stores when
// writing to the payload (as we may have concurrent reads taking place from
// observers).
Guard<SeqLock<kRuntimeStatsSyncType>, ExclusiveOption> thread_guard{&seq_lock_};
ThreadStats& stats = published_stats_.BeginInPlaceUpdate();
// Skip the update if we are already in the new state.
//
// This can happen when a thread unblocks, and Scheduler::Unblock changes
// the state of the thread from BLOCKED to READY, and records the transition
// in the runtime stats in the process. Eventually, we will reschedule on
// this CPU, and we will record another state transition in the stats
// (here), but we actually already recorded the stats previously.
//
// TODO(johngro): Look into changing this behavior. It would be better if
// we only ever called this method when we _knew_ for a fact that we were
// changing states.
if (stats.current_state == new_state) {
return;
}
// Make sure that our sampling of the ticks counter takes place between the
// two stores of the sequence number, and is not allowed to move outside of
// the update transaction because of pipelined execution. We need to
// explicitly specify that this read needs to take place after previous
// stores, but we should not need to explicitly prevent it from moving
// beyond subsequent stores. This is because we have data dependency in the
// pipeline. We store our TSC sample in our updated payload, and that store
// is not allowed to move past our store of the final updated sequence
// number.
const zx_ticks_t now =
platform_current_ticks_synchronized<GetTicksSyncFlag::kAfterPreviousStores>();
// Now go ahead an update our payload, making sure to use relaxed atomic
// stores when writing to the contents.
if (stats.current_state == THREAD_RUNNING) {
const zx_ticks_t delta = zx_ticks_sub_ticks(now, stats.state_change_ticks);
ktl::atomic_ref(stats.total_running_ticks)
.store(zx_ticks_add_ticks(stats.total_running_ticks, delta), ktl::memory_order_relaxed);
} else if (stats.current_state == THREAD_READY) {
const zx_ticks_t delta = zx_ticks_sub_ticks(now, stats.state_change_ticks);
ktl::atomic_ref(stats.total_ready_ticks)
.store(zx_ticks_add_ticks(stats.total_ready_ticks, delta), ktl::memory_order_relaxed);
}
ktl::atomic_ref(stats.state_change_ticks).store(now, ktl::memory_order_relaxed);
ktl::atomic_ref(stats.current_state).store(new_state, ktl::memory_order_relaxed);
}
// Updates the page fault / lock contention ticks with the given deltas. These values do not
// require relative coherence with other state.
void AddPageFaultTicks(zx_ticks_t delta) { page_fault_ticks_.fetch_add(delta); }
void AddLockContentionTicks(zx_ticks_t delta) { lock_contention_ticks_.fetch_add(delta); }
// Returns the instantaneous runtime stats for the thread, including the time
// the thread has spent in its current state (if that state is either READY or
// RUNNING).
TaskRuntimeStats GetCompensatedTaskRuntimeStats() const TA_EXCL(seq_lock_) {
ReadResult res = Read();
// Adjust for the current time if the thread was in a state that we track
// when we queried its stats.
if ((res.stats.current_state == THREAD_RUNNING) || (res.stats.current_state == THREAD_READY)) {
const zx_ticks_t delta = zx_ticks_sub_ticks(res.now, res.stats.state_change_ticks);
zx_ticks_t& counter = (res.stats.current_state == THREAD_RUNNING)
? res.stats.total_running_ticks
: res.stats.total_ready_ticks;
counter = zx_ticks_add_ticks(counter, delta);
}
return TaskRuntimeStats{.cpu_ticks = res.stats.total_running_ticks,
.queue_ticks = res.stats.total_ready_ticks,
.page_fault_ticks = page_fault_ticks_,
.lock_contention_ticks = lock_contention_ticks_};
}
private:
friend struct ::TaskRuntimeStatsTests;
struct ReadResult {
ThreadStats stats;
zx_ticks_t now{};
};
// Returns a coherent snapshot of the ThreadStats state.
ReadResult Read() const TA_EXCL(seq_lock_) {
ReadResult ret;
for (bool success = false; !success; arch::Yield()) {
// TODO(johngro): Look into doing this without disabling ints. Right
// now, the following is possible:
//
// 1) Code enters the lock-guard, "holding" the SeqLock for read.
// 2) Before exiting, the preemption timer fires.
// 3) The scheduler selects a new thread during preemption, and needs to
// update stats for the old thread/process.
// 4) It calls into Update which attempts to hold the SeqLock exclusively.
// 5) Lockdep asserts, because it looks like we are attempting to enter
// the same lock class multiple times.
//
// Typically, this would be an error, but in the case of a seqlock, it
// actually isn't. The read operation (holding the lock with shared
// access) cannot block the write operation (obtaining the lock
// exclusively). The write during the preempt would simply cause the
// initial read transaction to fail and try again. Lockdep does not
// know this however, so it complains.
//
// In the short term, we just turn off interrupts during the read.
// Moving forward, it would be better to teach lockdep about the proper
// SeqLock semantics, and only have it complain if code attempts to
// enter the lock exclusively multiple times.
//
Guard<SeqLock<kRuntimeStatsSyncType>, SharedIrqSave> guard{&seq_lock_, success};
// Make sure that our sampling of the ticks counter takes place between
// the two reads of the sequence number, and is not allowed to move
// outside of the region because of pipelined execution.
ret.now = platform_current_ticks_synchronized<GetTicksSyncFlag::kAfterPreviousLoads |
GetTicksSyncFlag::kBeforeSubsequentLoads>();
published_stats_.Read(ret.stats);
}
return ret;
}
mutable DECLARE_SEQLOCK_EXPLICIT_SYNC(ThreadRuntimeStats, kRuntimeStatsSyncType) seq_lock_;
SeqLockPayload<ThreadStats, decltype(seq_lock_)> published_stats_ TA_GUARDED(seq_lock_){};
RelaxedAtomic<zx_ticks_t> page_fault_ticks_{0};
RelaxedAtomic<zx_ticks_t> lock_contention_ticks_{0};
};
} // namespace task_runtime_stats::internal
using ThreadRuntimeStats = task_runtime_stats::internal::ThreadRuntimeStats;
#endif // ZIRCON_KERNEL_INCLUDE_KERNEL_TASK_RUNTIME_STATS_H_