zircon/kernel/include/kernel/task_runtime_stats.h - fuchsia - Git at Google

 // Copyright 2020 The Fuchsia Authors
 //
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file or at
 // https://opensource.org/licenses/MIT

 #ifndef ZIRCON_KERNEL_INCLUDE_KERNEL_TASK_RUNTIME_STATS_H_
 #define ZIRCON_KERNEL_INCLUDE_KERNEL_TASK_RUNTIME_STATS_H_

 #include <lib/arch/intrin.h>
 #include <lib/kconcurrent/copy.h>
 #include <lib/kconcurrent/seqlock.h>
 #include <lib/relaxed_atomic.h>
 #include <zircon/syscalls/object.h>
 #include <zircon/time.h>

 #include <kernel/lockdep.h>
 #include <kernel/scheduler_state.h>
 #include <ktl/array.h>

 //
 // Types and utilities for efficiently accumulating and aggregating task runtime
 // stats.
 //
 // Runtime stats are maintained at three levels: thread, process, and job.
 // Threads maintain and update their runtime stats as threads change state.
 // Terminating threads roll up to their owning process, and terminating
 // processes roll up to their owning job, however queries of process and job
 // stats require summing all of the currently running thread stats with the
 // rolled up stats for terminated threads.
 //
 // Per-thread stats are maintained by ThreadRuntimeStats, which provides a
 // sequence locked snapshot of the runtime stats with an affordance to
 // compensate for unaccounted run-time/queue-time when a thread is in a runnable
 // state (i.e. ready or running).
 //

 // Runtime stats of a thread, process, or job.
 //
 // Not safe for concurrent use by multiple threads.
 struct TaskRuntimeStats {
   // The total duration (in ticks) spent running on a CPU.
   zx_ticks_t cpu_ticks = 0;

   // The total duration (in ticks) spent ready to start running.
   zx_ticks_t queue_ticks = 0;

   // The total duration (in ticks) spent handling page faults.
   zx_ticks_t page_fault_ticks = 0;

   // The total duration (in ticks) spent contented on kernel locks.
   zx_ticks_t lock_contention_ticks = 0;

   // Adds another TaskRuntimeStats to this one.
   constexpr TaskRuntimeStats& operator+=(const TaskRuntimeStats& other) {
     cpu_ticks = zx_ticks_add_ticks(cpu_ticks, other.cpu_ticks);
     queue_ticks = zx_ticks_add_ticks(queue_ticks, other.queue_ticks);
     page_fault_ticks = zx_ticks_add_ticks(page_fault_ticks, other.page_fault_ticks);
     lock_contention_ticks = zx_ticks_add_ticks(lock_contention_ticks, other.lock_contention_ticks);
     return *this;
   }

   // Conversion to zx_info_task_runtime_t.
   operator zx_info_task_runtime_t() const;
 };

 struct TaskRuntimeStatsTests;  // fwd decl so we can be friends with our tests.

 namespace task_runtime_stats::internal {

 // Manages sequence locked updates and access to per-thread runtime stats.
 class ThreadRuntimeStats {
   static constexpr concurrent::SyncOpt kRuntimeStatsSyncType = concurrent::SyncOpt::Fence;

   template <typename>
   struct LockOption {};

  public:
   struct ThreadStats {
     // When the thread entered its current state.
     zx_ticks_t total_running_ticks = 0;
     zx_ticks_t total_ready_ticks = 0;
     zx_ticks_t state_change_ticks = 0;
     thread_state current_state{THREAD_INITIAL};
   };

   ThreadRuntimeStats() = default;

   // Update must be locked Exclusive with either IrqSave or NoIrqSave.
   static constexpr LockOption<ExclusiveIrqSave> IrqSave{};
   static constexpr LockOption<ExclusiveNoIrqSave> NoIrqSave{};

   // Updates the ThreadStats state with the given deltas and last thread state.
   template <typename ExclusiveOption>
   void Update(thread_state new_state, LockOption<ExclusiveOption>) TA_EXCL(seq_lock_) {
     // Enter our sequence lock and obtain a mutable reference to our payload so
     // we can update the payload in-place.  We are free to read the contents of
     // the payload without the use of any atomics (the SeqLock behaves like a
     // spinlock in this situation, ensures proper ordering and excluding other
     // writes), however we need to make sure to use relaxed atomic stores when
     // writing to the payload (as we may have concurrent reads taking place from
     // observers).
     Guard<SeqLock<kRuntimeStatsSyncType>, ExclusiveOption> thread_guard{&seq_lock_};
     ThreadStats& stats = published_stats_.BeginInPlaceUpdate();

     // Skip the update if we are already in the new state.
     //
     // This can happen when a thread unblocks, and Scheduler::Unblock changes
     // the state of the thread from BLOCKED to READY, and records the transition
     // in the runtime stats in the process. Eventually, we will reschedule on
     // this CPU, and we will record another state transition in the stats
     // (here), but we actually already recorded the stats previously.
     //
     // TODO(johngro): Look into changing this behavior.  It would be better if
     // we only ever called this method when we _knew_ for a fact that we were
     // changing states.
     if (stats.current_state == new_state) {
       return;
     }

     // Make sure that our sampling of the ticks counter takes place between the
     // two stores of the sequence number, and is not allowed to move outside of
     // the update transaction because of pipelined execution.  We need to
     // explicitly specify that this read needs to take place after previous
     // stores, but we should not need to explicitly prevent it from moving
     // beyond subsequent stores. This is because we have data dependency in the
     // pipeline.  We store our TSC sample in our updated payload, and that store
     // is not allowed to move past our store of the final updated sequence
     // number.
     const zx_ticks_t now =
         platform_current_ticks_synchronized<GetTicksSyncFlag::kAfterPreviousStores>();

     // Now go ahead an update our payload, making sure to use relaxed atomic
     // stores when writing to the contents.
     if (stats.current_state == THREAD_RUNNING) {
       const zx_ticks_t delta = zx_ticks_sub_ticks(now, stats.state_change_ticks);
       ktl::atomic_ref(stats.total_running_ticks)
           .store(zx_ticks_add_ticks(stats.total_running_ticks, delta), ktl::memory_order_relaxed);
     } else if (stats.current_state == THREAD_READY) {
       const zx_ticks_t delta = zx_ticks_sub_ticks(now, stats.state_change_ticks);
       ktl::atomic_ref(stats.total_ready_ticks)
           .store(zx_ticks_add_ticks(stats.total_ready_ticks, delta), ktl::memory_order_relaxed);
     }

     ktl::atomic_ref(stats.state_change_ticks).store(now, ktl::memory_order_relaxed);
     ktl::atomic_ref(stats.current_state).store(new_state, ktl::memory_order_relaxed);
   }

   // Updates the page fault / lock contention ticks with the given deltas. These values do not
   // require relative coherence with other state.
   void AddPageFaultTicks(zx_ticks_t delta) { page_fault_ticks_.fetch_add(delta); }
   void AddLockContentionTicks(zx_ticks_t delta) { lock_contention_ticks_.fetch_add(delta); }

   // Returns the instantaneous runtime stats for the thread, including the time
   // the thread has spent in its current state (if that state is either READY or
   // RUNNING).
   TaskRuntimeStats GetCompensatedTaskRuntimeStats() const TA_EXCL(seq_lock_) {
     ReadResult res = Read();

     // Adjust for the current time if the thread was in a state that we track
     // when we queried its stats.
     if ((res.stats.current_state == THREAD_RUNNING) || (res.stats.current_state == THREAD_READY)) {
       const zx_ticks_t delta = zx_ticks_sub_ticks(res.now, res.stats.state_change_ticks);
       zx_ticks_t& counter = (res.stats.current_state == THREAD_RUNNING)
                                 ? res.stats.total_running_ticks
                                 : res.stats.total_ready_ticks;
       counter = zx_ticks_add_ticks(counter, delta);
     }

     return TaskRuntimeStats{.cpu_ticks = res.stats.total_running_ticks,
                             .queue_ticks = res.stats.total_ready_ticks,
                             .page_fault_ticks = page_fault_ticks_,
                             .lock_contention_ticks = lock_contention_ticks_};
   }

  private:
   friend struct ::TaskRuntimeStatsTests;

   struct ReadResult {
     ThreadStats stats;
     zx_ticks_t now{};
   };

   // Returns a coherent snapshot of the ThreadStats state.
   ReadResult Read() const TA_EXCL(seq_lock_) {
     ReadResult ret;
     for (bool success = false; !success; arch::Yield()) {
       // TODO(johngro): Look into doing this without disabling ints.  Right
       // now, the following is possible:
       //
       // 1) Code enters the lock-guard, "holding" the SeqLock for read.
       // 2) Before exiting, the preemption timer fires.
       // 3) The scheduler selects a new thread during preemption, and needs to
       //    update stats for the old thread/process.
       // 4) It calls into Update which attempts to hold the SeqLock exclusively.
       // 5) Lockdep asserts, because it looks like we are attempting to enter
       //    the same lock class multiple times.
       //
       // Typically, this would be an error, but in the case of a seqlock, it
       // actually isn't.  The read operation (holding the lock with shared
       // access) cannot block the write operation (obtaining the lock
       // exclusively).  The write during the preempt would simply cause the
       // initial read transaction to fail and try again.  Lockdep does not
       // know this however, so it complains.
       //
       // In the short term, we just turn off interrupts during the read.
       // Moving forward, it would be better to teach lockdep about the proper
       // SeqLock semantics, and only have it complain if code attempts to
       // enter the lock exclusively multiple times.
       //
       Guard<SeqLock<kRuntimeStatsSyncType>, SharedIrqSave> guard{&seq_lock_, success};
       // Make sure that our sampling of the ticks counter takes place between
       // the two reads of the sequence number, and is not allowed to move
       // outside of the region because of pipelined execution.
       ret.now = platform_current_ticks_synchronized<GetTicksSyncFlag::kAfterPreviousLoads |
                                                     GetTicksSyncFlag::kBeforeSubsequentLoads>();
       published_stats_.Read(ret.stats);
     }

     return ret;
   }

   mutable DECLARE_SEQLOCK_EXPLICIT_SYNC(ThreadRuntimeStats, kRuntimeStatsSyncType) seq_lock_;
   SeqLockPayload<ThreadStats, decltype(seq_lock_)> published_stats_ TA_GUARDED(seq_lock_){};
   RelaxedAtomic<zx_ticks_t> page_fault_ticks_{0};
   RelaxedAtomic<zx_ticks_t> lock_contention_ticks_{0};
 };

 }  // namespace task_runtime_stats::internal

 using ThreadRuntimeStats = task_runtime_stats::internal::ThreadRuntimeStats;

 #endif  // ZIRCON_KERNEL_INCLUDE_KERNEL_TASK_RUNTIME_STATS_H_
	// Copyright 2020 The Fuchsia Authors
	//
	// Use of this source code is governed by a MIT-style
	// license that can be found in the LICENSE file or at
	// https://opensource.org/licenses/MIT

	#ifndef ZIRCON_KERNEL_INCLUDE_KERNEL_TASK_RUNTIME_STATS_H_
	#define ZIRCON_KERNEL_INCLUDE_KERNEL_TASK_RUNTIME_STATS_H_

	#include <lib/arch/intrin.h>
	#include <lib/kconcurrent/copy.h>
	#include <lib/kconcurrent/seqlock.h>
	#include <lib/relaxed_atomic.h>
	#include <zircon/syscalls/object.h>
	#include <zircon/time.h>

	#include <kernel/lockdep.h>
	#include <kernel/scheduler_state.h>
	#include <ktl/array.h>

	//
	// Types and utilities for efficiently accumulating and aggregating task runtime
	// stats.
	//
	// Runtime stats are maintained at three levels: thread, process, and job.
	// Threads maintain and update their runtime stats as threads change state.
	// Terminating threads roll up to their owning process, and terminating
	// processes roll up to their owning job, however queries of process and job
	// stats require summing all of the currently running thread stats with the
	// rolled up stats for terminated threads.
	//
	// Per-thread stats are maintained by ThreadRuntimeStats, which provides a
	// sequence locked snapshot of the runtime stats with an affordance to
	// compensate for unaccounted run-time/queue-time when a thread is in a runnable
	// state (i.e. ready or running).
	//

	// Runtime stats of a thread, process, or job.
	//
	// Not safe for concurrent use by multiple threads.
	struct TaskRuntimeStats {
	// The total duration (in ticks) spent running on a CPU.
	zx_ticks_t cpu_ticks = 0;

	// The total duration (in ticks) spent ready to start running.
	zx_ticks_t queue_ticks = 0;

	// The total duration (in ticks) spent handling page faults.
	zx_ticks_t page_fault_ticks = 0;

	// The total duration (in ticks) spent contented on kernel locks.
	zx_ticks_t lock_contention_ticks = 0;

	// Adds another TaskRuntimeStats to this one.
	constexpr TaskRuntimeStats& operator+=(const TaskRuntimeStats& other) {
	cpu_ticks = zx_ticks_add_ticks(cpu_ticks, other.cpu_ticks);
	queue_ticks = zx_ticks_add_ticks(queue_ticks, other.queue_ticks);
	page_fault_ticks = zx_ticks_add_ticks(page_fault_ticks, other.page_fault_ticks);
	lock_contention_ticks = zx_ticks_add_ticks(lock_contention_ticks, other.lock_contention_ticks);
	return *this;
	}

	// Conversion to zx_info_task_runtime_t.
	operator zx_info_task_runtime_t() const;
	};

	struct TaskRuntimeStatsTests; // fwd decl so we can be friends with our tests.

	namespace task_runtime_stats::internal {

	// Manages sequence locked updates and access to per-thread runtime stats.
	class ThreadRuntimeStats {
	static constexpr concurrent::SyncOpt kRuntimeStatsSyncType = concurrent::SyncOpt::Fence;

	template <typename>
	struct LockOption {};

	public:
	struct ThreadStats {
	// When the thread entered its current state.
	zx_ticks_t total_running_ticks = 0;
	zx_ticks_t total_ready_ticks = 0;
	zx_ticks_t state_change_ticks = 0;
	thread_state current_state{THREAD_INITIAL};
	};

	ThreadRuntimeStats() = default;

	// Update must be locked Exclusive with either IrqSave or NoIrqSave.
	static constexpr LockOption<ExclusiveIrqSave> IrqSave{};
	static constexpr LockOption<ExclusiveNoIrqSave> NoIrqSave{};

	// Updates the ThreadStats state with the given deltas and last thread state.
	template <typename ExclusiveOption>
	void Update(thread_state new_state, LockOption<ExclusiveOption>) TA_EXCL(seq_lock_) {
	// Enter our sequence lock and obtain a mutable reference to our payload so
	// we can update the payload in-place. We are free to read the contents of
	// the payload without the use of any atomics (the SeqLock behaves like a
	// spinlock in this situation, ensures proper ordering and excluding other
	// writes), however we need to make sure to use relaxed atomic stores when
	// writing to the payload (as we may have concurrent reads taking place from
	// observers).
	Guard<SeqLock<kRuntimeStatsSyncType>, ExclusiveOption> thread_guard{&seq_lock_};
	ThreadStats& stats = published_stats_.BeginInPlaceUpdate();

	// Skip the update if we are already in the new state.
	//
	// This can happen when a thread unblocks, and Scheduler::Unblock changes
	// the state of the thread from BLOCKED to READY, and records the transition
	// in the runtime stats in the process. Eventually, we will reschedule on
	// this CPU, and we will record another state transition in the stats
	// (here), but we actually already recorded the stats previously.
	//
	// TODO(johngro): Look into changing this behavior. It would be better if
	// we only ever called this method when we _knew_ for a fact that we were
	// changing states.
	if (stats.current_state == new_state) {
	return;
	}

	// Make sure that our sampling of the ticks counter takes place between the
	// two stores of the sequence number, and is not allowed to move outside of
	// the update transaction because of pipelined execution. We need to
	// explicitly specify that this read needs to take place after previous
	// stores, but we should not need to explicitly prevent it from moving
	// beyond subsequent stores. This is because we have data dependency in the
	// pipeline. We store our TSC sample in our updated payload, and that store
	// is not allowed to move past our store of the final updated sequence
	// number.
	const zx_ticks_t now =
	platform_current_ticks_synchronized<GetTicksSyncFlag::kAfterPreviousStores>();

	// Now go ahead an update our payload, making sure to use relaxed atomic
	// stores when writing to the contents.
	if (stats.current_state == THREAD_RUNNING) {
	const zx_ticks_t delta = zx_ticks_sub_ticks(now, stats.state_change_ticks);
	ktl::atomic_ref(stats.total_running_ticks)
	.store(zx_ticks_add_ticks(stats.total_running_ticks, delta), ktl::memory_order_relaxed);
	} else if (stats.current_state == THREAD_READY) {
	const zx_ticks_t delta = zx_ticks_sub_ticks(now, stats.state_change_ticks);
	ktl::atomic_ref(stats.total_ready_ticks)
	.store(zx_ticks_add_ticks(stats.total_ready_ticks, delta), ktl::memory_order_relaxed);
	}

	ktl::atomic_ref(stats.state_change_ticks).store(now, ktl::memory_order_relaxed);
	ktl::atomic_ref(stats.current_state).store(new_state, ktl::memory_order_relaxed);
	}

	// Updates the page fault / lock contention ticks with the given deltas. These values do not
	// require relative coherence with other state.
	void AddPageFaultTicks(zx_ticks_t delta) { page_fault_ticks_.fetch_add(delta); }
	void AddLockContentionTicks(zx_ticks_t delta) { lock_contention_ticks_.fetch_add(delta); }

	// Returns the instantaneous runtime stats for the thread, including the time
	// the thread has spent in its current state (if that state is either READY or
	// RUNNING).
	TaskRuntimeStats GetCompensatedTaskRuntimeStats() const TA_EXCL(seq_lock_) {
	ReadResult res = Read();

	// Adjust for the current time if the thread was in a state that we track
	// when we queried its stats.
	if ((res.stats.current_state == THREAD_RUNNING) \|\| (res.stats.current_state == THREAD_READY)) {
	const zx_ticks_t delta = zx_ticks_sub_ticks(res.now, res.stats.state_change_ticks);
	zx_ticks_t& counter = (res.stats.current_state == THREAD_RUNNING)
	? res.stats.total_running_ticks
	: res.stats.total_ready_ticks;
	counter = zx_ticks_add_ticks(counter, delta);
	}

	return TaskRuntimeStats{.cpu_ticks = res.stats.total_running_ticks,
	.queue_ticks = res.stats.total_ready_ticks,
	.page_fault_ticks = page_fault_ticks_,
	.lock_contention_ticks = lock_contention_ticks_};
	}

	private:
	friend struct ::TaskRuntimeStatsTests;

	struct ReadResult {
	ThreadStats stats;
	zx_ticks_t now{};
	};

	// Returns a coherent snapshot of the ThreadStats state.
	ReadResult Read() const TA_EXCL(seq_lock_) {
	ReadResult ret;
	for (bool success = false; !success; arch::Yield()) {
	// TODO(johngro): Look into doing this without disabling ints. Right
	// now, the following is possible:
	//
	// 1) Code enters the lock-guard, "holding" the SeqLock for read.
	// 2) Before exiting, the preemption timer fires.
	// 3) The scheduler selects a new thread during preemption, and needs to
	// update stats for the old thread/process.
	// 4) It calls into Update which attempts to hold the SeqLock exclusively.
	// 5) Lockdep asserts, because it looks like we are attempting to enter
	// the same lock class multiple times.
	//
	// Typically, this would be an error, but in the case of a seqlock, it
	// actually isn't. The read operation (holding the lock with shared
	// access) cannot block the write operation (obtaining the lock
	// exclusively). The write during the preempt would simply cause the
	// initial read transaction to fail and try again. Lockdep does not
	// know this however, so it complains.
	//
	// In the short term, we just turn off interrupts during the read.
	// Moving forward, it would be better to teach lockdep about the proper
	// SeqLock semantics, and only have it complain if code attempts to
	// enter the lock exclusively multiple times.
	//
	Guard<SeqLock<kRuntimeStatsSyncType>, SharedIrqSave> guard{&seq_lock_, success};
	// Make sure that our sampling of the ticks counter takes place between
	// the two reads of the sequence number, and is not allowed to move
	// outside of the region because of pipelined execution.
	ret.now = platform_current_ticks_synchronized<GetTicksSyncFlag::kAfterPreviousLoads \|
	GetTicksSyncFlag::kBeforeSubsequentLoads>();
	published_stats_.Read(ret.stats);
	}

	return ret;
	}

	mutable DECLARE_SEQLOCK_EXPLICIT_SYNC(ThreadRuntimeStats, kRuntimeStatsSyncType) seq_lock_;
	SeqLockPayload<ThreadStats, decltype(seq_lock_)> published_stats_ TA_GUARDED(seq_lock_){};
	RelaxedAtomic<zx_ticks_t> page_fault_ticks_{0};
	RelaxedAtomic<zx_ticks_t> lock_contention_ticks_{0};
	};

	} // namespace task_runtime_stats::internal

	using ThreadRuntimeStats = task_runtime_stats::internal::ThreadRuntimeStats;

	#endif // ZIRCON_KERNEL_INCLUDE_KERNEL_TASK_RUNTIME_STATS_H_