blob: 7c9c4f02ea0e70dbe36b68ac420ba0494d7f8cd8 [file] [log] [blame]
// Copyright 2016 The Fuchsia Authors
// Copyright (c) 2008-2015 Travis Geiselbrecht
//
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file or at
// https://opensource.org/licenses/MIT
#include "kernel/wait.h"
#include <lib/ktrace.h>
#include <platform.h>
#include <trace.h>
#include <zircon/errors.h>
#include <kernel/auto_preempt_disabler.h>
#include <kernel/owned_wait_queue.h>
#include <kernel/scheduler.h>
#include <kernel/thread.h>
#include <kernel/timer.h>
#include <ktl/algorithm.h>
#include <ktl/move.h>
#include "kernel/wait_queue_internal.h"
#include <ktl/enforce.h>
#define LOCAL_TRACE 0
#ifndef WAIT_QUEUE_DEPTH_TRACING_ENABLED
#define WAIT_QUEUE_DEPTH_TRACING_ENABLED false
#endif
static inline void WqTraceDepth(const WaitQueueCollection* collection, uint32_t depth) {
if constexpr (WAIT_QUEUE_DEPTH_TRACING_ENABLED) {
ktrace_probe(TraceEnabled<true>{}, TraceContext::Cpu, "wq_depth"_stringref,
reinterpret_cast<uint64_t>(collection), static_cast<uint64_t>(depth));
}
}
// add expensive code to do a full validation of the wait queue at various entry points
// to this module.
#define WAIT_QUEUE_VALIDATION (0 || (LK_DEBUGLEVEL > 2))
// Wait queues come in 2 flavors (traditional and owned) which are distinguished
// using the magic number. When DEBUG_ASSERT checking the magic number, check
// against both of the possible valid magic numbers.
#define DEBUG_ASSERT_MAGIC_CHECK(_queue) \
DEBUG_ASSERT_MSG( \
((_queue)->magic_ == kMagic) || ((_queue)->magic_ == OwnedWaitQueue::kOwnedMagic), \
"magic 0x%08x", ((_queue)->magic_));
// Wait queues are building blocks that other locking primitives use to handle
// blocking threads.
void WaitQueue::TimeoutHandler(Timer* timer, zx_time_t now, void* arg) {
Thread* thread = (Thread*)arg;
thread->canary().Assert();
// spin trylocking on the thread lock since the routine that set up the callback,
// wait_queue_block, may be trying to simultaneously cancel this timer while holding the
// thread_lock.
if (timer->TrylockOrCancel(&thread_lock)) {
return;
}
AnnotatedAutoPreemptDisabler aapd;
UnblockThread(thread, ZX_ERR_TIMED_OUT);
thread_lock.Release();
}
// Deal with the consequences of a change of maximum priority across the set of
// waiters in a wait queue.
void WaitQueue::UpdatePriority(int old_prio) TA_REQ(thread_lock, preempt_disabled_token) {
// If this is an owned wait queue, and the maximum priority of its set of
// waiters has changed, make sure to apply any needed priority inheritance.
if ((magic_ == OwnedWaitQueue::kOwnedMagic) && (old_prio != BlockedPriority())) {
static_cast<OwnedWaitQueue*>(this)->WaitersPriorityChanged(old_prio);
}
}
// Remove a thread from a wait queue, maintain the wait queue's internal count,
// and update the WaitQueue specific bookkeeping in the thread in the process.
void WaitQueue::Dequeue(Thread* t, zx_status_t wait_queue_error) TA_REQ(thread_lock) {
DEBUG_ASSERT(t != nullptr);
DEBUG_ASSERT(t->wait_queue_state().InWaitQueue());
DEBUG_ASSERT(t->state() == THREAD_BLOCKED || t->state() == THREAD_BLOCKED_READ_LOCK);
DEBUG_ASSERT(t->wait_queue_state().blocking_wait_queue_ == this);
collection_.Remove(t);
t->wait_queue_state().blocked_status_ = wait_queue_error;
t->wait_queue_state().blocking_wait_queue_ = nullptr;
}
Thread* WaitQueueCollection::Peek(zx_time_t signed_now) {
// Find the "best" thread in the queue to run at time |now|. See the comments
// in thread.h, immediately above the definition of WaitQueueCollection for
// details of how the data structure and this algorithm work.
// If the collection is empty, there is nothing to do.
if (threads_.is_empty()) {
return nullptr;
}
// If the front of the collection has a key with the fair thread bit set in
// it, then there are no deadline threads in the collection, and the front of
// the queue is the proper choice.
Thread& front = threads_.front();
const uint64_t front_key = front.wait_queue_state().blocked_threads_tree_sort_key_;
if ((front_key & kFairThreadSortKeyBit) != 0) {
// Front of the queue is a fair thread, which means that there are no
// deadline threads in the queue. This thread is our best choice.
return &front;
}
// Looks like we have deadline threads waiting in the queue. Is the absolute
// deadline of the front of the queue in the future? If so, then this is our
// best choice.
//
// TODO(johngro): Is it actually worth this optimistic check, or would it be
// better to simply do the search every time?
DEBUG_ASSERT(signed_now >= 0);
const uint64_t now = static_cast<uint64_t>(signed_now);
if (front_key > now) {
return &front;
}
// Actually search the tree for the deadline thread with the smallest relative
// deadline which is in the future relative to now.
auto best_deadline = threads_.upper_bound({now, 0});
if (best_deadline.IsValid()) {
return &*best_deadline;
}
// Looks like we have deadline threads, but all of their deadlines have
// expired. Choose the thread with the minimum relative deadline in the tree.
Thread* min_relative = threads_.root()->wait_queue_state().subtree_min_rel_deadline_thread_;
DEBUG_ASSERT(min_relative != nullptr);
return min_relative;
}
void WaitQueueCollection::Insert(Thread* thread) {
WqTraceDepth(this, Count() + 1);
auto& wq_state = thread->wait_queue_state();
DEBUG_ASSERT(wq_state.blocked_threads_tree_sort_key_ == 0);
DEBUG_ASSERT(wq_state.subtree_min_rel_deadline_thread_ == nullptr);
// Pre-compute our sort key so that it does not have to be done every time we
// need to compare our node against another node while we exist in the tree.
//
// See the comments in thread.h, immediately above the definition of
// WaitQueueCollection for details of why we compute the key in this fashion.
static_assert(SchedTime::Format::FractionalBits == 0,
"WaitQueueCollection assumes that the raw_value() of a SchedTime is always a whole "
"number of nanoseconds");
static_assert(SchedDuration::Format::FractionalBits == 0,
"WaitQueueCollection assumes that the raw_value() of a SchedDuration is always a "
"whole number of nanoseconds");
const auto& sched_state = thread->scheduler_state();
if (sched_state.discipline() == SchedDiscipline::Fair) {
// Statically assert that the offset we are going to add to a fair thread's
// start time to form its virtual start time can never be the equivalent of
// something more than ~1 year. If the resolution of SchedWeight becomes
// too fine, it could drive the sum of the thread's virtual start time into
// saturation for low weight threads, making the key useless for sorting.
// By putting a limit of 1 year on the offset, we know that the
// current_time() of the system would need to be greater than 2^63
// nanoseconds minus one year, or about 291 years, before this can happen.
constexpr SchedWeight kMinPosWeight{ffl::FromRatio<int64_t>(1, SchedWeight::Format::Power)};
constexpr SchedDuration OneYear{SchedMs(zx_duration_t(1) * 86400 * 365245)};
static_assert(OneYear >= (Scheduler::kDefaultTargetLatency / kMinPosWeight),
"SchedWeight resolution is too fine");
SchedTime key =
sched_state.start_time() + (Scheduler::kDefaultTargetLatency / sched_state.fair().weight);
wq_state.blocked_threads_tree_sort_key_ =
static_cast<uint64_t>(key.raw_value()) | kFairThreadSortKeyBit;
} else {
wq_state.blocked_threads_tree_sort_key_ =
static_cast<uint64_t>(sched_state.finish_time().raw_value());
}
threads_.insert(thread);
}
void WaitQueueCollection::Remove(Thread* thread) {
WqTraceDepth(this, Count() - 1);
threads_.erase(*thread);
// In a debug build, zero out the sort key now that we have left the
// collection. This can help to find bugs by allowing us to assert that the
// value is zero during insertion, however it is not strictly needed in a
// production build and can be skipped.
#ifdef DEBUG_ASSERT_IMPLEMENTED
auto& wq_state = thread->wait_queue_state();
wq_state.blocked_threads_tree_sort_key_ = 0;
#endif
}
void WaitQueue::ValidateQueue() TA_REQ(thread_lock) {
DEBUG_ASSERT_MAGIC_CHECK(this);
thread_lock.AssertHeld();
}
////////////////////////////////////////////////////////////////////////////////
//
// Begin user facing API
//
////////////////////////////////////////////////////////////////////////////////
// return the numeric priority of the highest priority thread queued
int WaitQueue::BlockedPriority() const {
// TODO(johngro): Remove this, as well as the concept of "priority" from all
// of the OwnedWaitQueue and profile inheritance code. The wait queue
// ordering no longer depends on the deprecated concept of priority, and there
// is no point in maintaining the system of inheriting the "maximum priority"
// during inheritance events.
//
// Instead, PI will be switched over to inheriting the sum of the weights of
// all of the upstream threads, modeling the weight of a deadline thread as
// the weight of a "max priority" thread (as is done today). This will be a
// temporary stepping stone on the way to implementing generalize deadline
// inheritance, which depends on knowing the minimum relative deadline across
// a set of waiting threads, something which is already being maintained using
// the WaitQueueCollection's augmented binary tree.
int ret = -1;
for (const Thread& t : collection_.threads_) {
ret = ktl::max(ret, t.scheduler_state().effective_priority());
}
return ret;
}
/**
* @brief Block until a wait queue is notified, ignoring existing signals
* in |signal_mask|.
*
* This function puts the current thread at the end of a wait
* queue and then blocks until some other thread wakes the queue
* up again.
*
* @param deadline The time at which to abort the wait
* @param slack The amount of time it is acceptable to deviate from deadline
* @param signal_mask Mask of existing signals to ignore
* @param reason Reason for the block
* @param interruptible Whether the block can be interrupted
*
* If the deadline is zero, this function returns immediately with
* ZX_ERR_TIMED_OUT. If the deadline is ZX_TIME_INFINITE, this function
* waits indefinitely. Otherwise, this function returns with
* ZX_ERR_TIMED_OUT when the deadline elapses.
*
* @return ZX_ERR_TIMED_OUT on timeout, else returns the return
* value specified when the queue was woken by wait_queue_wake_one().
*/
zx_status_t WaitQueue::BlockEtc(const Deadline& deadline, uint signal_mask,
ResourceOwnership reason, Interruptible interruptible)
TA_REQ(thread_lock) {
Thread* current_thread = Thread::Current::Get();
DEBUG_ASSERT_MAGIC_CHECK(this);
DEBUG_ASSERT(current_thread->state() == THREAD_RUNNING);
// Any time a thread blocks, it should be holding exactly one spinlock, and it
// should be the thread lock. If a thread blocks while holding another spin
// lock, something has gone very wrong.
thread_lock.AssertHeld();
DEBUG_ASSERT(arch_num_spinlocks_held() == 1);
if (WAIT_QUEUE_VALIDATION) {
ValidateQueue();
}
zx_status_t res = BlockEtcPreamble(deadline, signal_mask, reason, interruptible);
if (res != ZX_OK) {
return res;
}
return BlockEtcPostamble(deadline);
}
/**
* @brief Wake up one thread sleeping on a wait queue
*
* This function removes one thread (if any) from the head of the wait queue and
* makes it executable. The new thread will be placed in the run queue.
*
* @param wait_queue_error The return value which the new thread will receive
* from wait_queue_block().
*
* @return Whether a thread was woken
*/
bool WaitQueue::WakeOne(zx_status_t wait_queue_error) {
Thread* t;
bool woke = false;
// Note(johngro): No one should ever calling wait_queue_wake_one on an
// instance of an OwnedWaitQueue. OwnedWaitQueues need to deal with
// priority inheritance, and all wake operations on an OwnedWaitQueue should
// be going through their interface instead.
DEBUG_ASSERT(magic_ == kMagic);
thread_lock.AssertHeld();
if (WAIT_QUEUE_VALIDATION) {
ValidateQueue();
}
t = Peek(current_time());
if (t) {
Dequeue(t, wait_queue_error);
ktrace_ptr(TAG_KWAIT_WAKE, this, 0, 0);
// Wake up the new thread, putting it in a run queue on a cpu.
Scheduler::Unblock(t);
woke = true;
}
return woke;
}
void WaitQueue::DequeueThread(Thread* t, zx_status_t wait_queue_error) {
DEBUG_ASSERT_MAGIC_CHECK(this);
thread_lock.AssertHeld();
if (WAIT_QUEUE_VALIDATION) {
ValidateQueue();
}
Dequeue(t, wait_queue_error);
}
void WaitQueue::MoveThread(WaitQueue* source, WaitQueue* dest, Thread* t) {
DEBUG_ASSERT_MAGIC_CHECK(source);
DEBUG_ASSERT_MAGIC_CHECK(dest);
thread_lock.AssertHeld();
if (WAIT_QUEUE_VALIDATION) {
source->ValidateQueue();
dest->ValidateQueue();
}
DEBUG_ASSERT(t != nullptr);
DEBUG_ASSERT(t->wait_queue_state().InWaitQueue());
DEBUG_ASSERT(t->state() == THREAD_BLOCKED || t->state() == THREAD_BLOCKED_READ_LOCK);
DEBUG_ASSERT(t->wait_queue_state().blocking_wait_queue_ == source);
DEBUG_ASSERT(source->collection_.Count() > 0);
source->collection_.Remove(t);
dest->collection_.Insert(t);
t->wait_queue_state().blocking_wait_queue_ = dest;
}
/**
* @brief Wake all threads sleeping on a wait queue
*
* This function removes all threads (if any) from the wait queue and
* makes them executable. The new threads will be placed at the head of the
* run queue.
*
* @param wait_queue_error The return value which the new thread will receive
* from wait_queue_block().
*
* @return The number of threads woken
*/
void WaitQueue::WakeAll(zx_status_t wait_queue_error) {
Thread* t;
// Note(johngro): See the note in wake_one. On one should ever be calling
// this method on an OwnedWaitQueue
DEBUG_ASSERT(magic_ == kMagic);
thread_lock.AssertHeld();
if (WAIT_QUEUE_VALIDATION) {
ValidateQueue();
}
if (collection_.Count() == 0) {
return;
}
// pop all the threads off the wait queue into the run queue
// TODO(johngro): Look into ways to optimize this. There is no real reason to:
//
// 1) Remove the threads from the collection in wake order.
// 2) Rebalance the tree's wait queue collection during the process of
// removing the nodes.
// 3) Have separate node storage in the thread for existing on the unblock
// list.
//
Thread::UnblockList list;
zx_time_t now = current_time();
while ((t = Peek(now))) {
Dequeue(t, wait_queue_error);
list.push_back(t);
}
DEBUG_ASSERT(collection_.Count() == 0);
ktrace_ptr(TAG_KWAIT_WAKE, this, 0, 0);
// Wake up the new thread(s), putting it in a run queue on a cpu.
Scheduler::Unblock(ktl::move(list));
}
bool WaitQueue::IsEmpty() const {
DEBUG_ASSERT_MAGIC_CHECK(this);
thread_lock.AssertHeld();
return collection_.Count() == 0;
}
/**
* @brief Tear down a wait queue
*
* This panics if any threads were waiting on this queue, because that
* would indicate a race condition for most uses of wait queues. If a
* thread is currently waiting, it could have been scheduled later, in
* which case it would have called Block() on an invalid wait
* queue.
*/
WaitQueue::~WaitQueue() {
DEBUG_ASSERT_MAGIC_CHECK(this);
if (collection_.Count() != 0) {
panic("~WaitQueue() called on non-empty WaitQueue\n");
}
magic_ = 0;
}
/**
* @brief Wake a specific thread in a wait queue
*
* This function extracts a specific thread from a wait queue, wakes it,
* puts it at the head of the run queue, and does a reschedule if
* necessary.
*
* @param t The thread to wake
* @param wait_queue_error The return value which the new thread will receive from
* wait_queue_block().
*
* @return ZX_ERR_BAD_STATE if thread was not in any wait queue.
*/
zx_status_t WaitQueue::UnblockThread(Thread* t, zx_status_t wait_queue_error) {
t->canary().Assert();
thread_lock.AssertHeld();
if (t->state() != THREAD_BLOCKED && t->state() != THREAD_BLOCKED_READ_LOCK) {
return ZX_ERR_BAD_STATE;
}
WaitQueue* wq = t->wait_queue_state().blocking_wait_queue_;
DEBUG_ASSERT(wq != nullptr);
DEBUG_ASSERT_MAGIC_CHECK(wq);
DEBUG_ASSERT(t->wait_queue_state().InWaitQueue());
if (WAIT_QUEUE_VALIDATION) {
wq->ValidateQueue();
}
int old_wq_prio = wq->BlockedPriority();
wq->Dequeue(t, wait_queue_error);
wq->UpdatePriority(old_wq_prio);
Scheduler::Unblock(t);
return ZX_OK;
}
void WaitQueue::PriorityChanged(Thread* t, int old_prio, PropagatePI propagate) {
t->canary().Assert();
thread_lock.AssertHeld();
DEBUG_ASSERT(t->state() == THREAD_BLOCKED || t->state() == THREAD_BLOCKED_READ_LOCK);
DEBUG_ASSERT(t->wait_queue_state().blocking_wait_queue_ == this);
DEBUG_ASSERT_MAGIC_CHECK(this);
LTRACEF("%p %d -> %d\n", t, old_prio, t->scheduler_state().effective_priority());
// |t|'s effective priority has already been re-calculated. If |t| is
// currently at the head of this WaitQueue, then |t|'s old priority is the
// previous priority of the WaitQueue. Otherwise, it is the priority of
// the WaitQueue as it stands before we re-insert |t|.
const int old_wq_prio = (Peek(current_time()) == t) ? old_prio : BlockedPriority();
// simple algorithm: remove the thread from the queue and add it back
// TODO: implement optimal algorithm depending on all the different edge
// cases of how the thread was previously queued and what priority its
// switching to.
collection_.Remove(t);
collection_.Insert(t);
if (propagate == PropagatePI::Yes) {
UpdatePriority(old_wq_prio);
}
if (WAIT_QUEUE_VALIDATION) {
ValidateQueue();
}
}