zircon/kernel/kernel/wait.cc - fuchsia - Git at Google

 // Copyright 2016 The Fuchsia Authors
 // Copyright (c) 2008-2015 Travis Geiselbrecht
 //
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file or at
 // https://opensource.org/licenses/MIT

 #include "kernel/wait.h"

 #include <lib/ktrace.h>
 #include <platform.h>
 #include <trace.h>
 #include <zircon/errors.h>

 #include <kernel/auto_preempt_disabler.h>
 #include <kernel/owned_wait_queue.h>
 #include <kernel/scheduler.h>
 #include <kernel/thread.h>
 #include <kernel/timer.h>
 #include <ktl/algorithm.h>
 #include <ktl/move.h>

 #include "kernel/wait_queue_internal.h"

 #include <ktl/enforce.h>

 #define LOCAL_TRACE 0

 #ifndef WAIT_QUEUE_DEPTH_TRACING_ENABLED
 #define WAIT_QUEUE_DEPTH_TRACING_ENABLED false
 #endif

 static inline void WqTraceDepth(const WaitQueueCollection* collection, uint32_t depth) {
   if constexpr (WAIT_QUEUE_DEPTH_TRACING_ENABLED) {
     ktrace_probe(TraceEnabled<true>{}, TraceContext::Cpu, "wq_depth"_stringref,
                  reinterpret_cast<uint64_t>(collection), static_cast<uint64_t>(depth));
   }
 }

 // add expensive code to do a full validation of the wait queue at various entry points
 // to this module.
 #define WAIT_QUEUE_VALIDATION (0 || (LK_DEBUGLEVEL > 2))

 // Wait queues come in 2 flavors (traditional and owned) which are distinguished
 // using the magic number.  When DEBUG_ASSERT checking the magic number, check
 // against both of the possible valid magic numbers.
 #define DEBUG_ASSERT_MAGIC_CHECK(_queue)                                                 \
   DEBUG_ASSERT_MSG(                                                                      \
       ((_queue)->magic_ == kMagic) || ((_queue)->magic_ == OwnedWaitQueue::kOwnedMagic), \
       "magic 0x%08x", ((_queue)->magic_));

 // Wait queues are building blocks that other locking primitives use to handle
 // blocking threads.
 void WaitQueue::TimeoutHandler(Timer* timer, zx_time_t now, void* arg) {
   Thread* thread = (Thread*)arg;

   thread->canary().Assert();

   // spin trylocking on the thread lock since the routine that set up the callback,
   // wait_queue_block, may be trying to simultaneously cancel this timer while holding the
   // thread_lock.
   if (timer->TrylockOrCancel(&thread_lock)) {
     return;
   }

   AnnotatedAutoPreemptDisabler aapd;
   UnblockThread(thread, ZX_ERR_TIMED_OUT);
   thread_lock.Release();
 }

 // Deal with the consequences of a change of maximum priority across the set of
 // waiters in a wait queue.
 void WaitQueue::UpdatePriority(int old_prio) TA_REQ(thread_lock, preempt_disabled_token) {
   // If this is an owned wait queue, and the maximum priority of its set of
   // waiters has changed, make sure to apply any needed priority inheritance.
   if ((magic_ == OwnedWaitQueue::kOwnedMagic) && (old_prio != BlockedPriority())) {
     static_cast<OwnedWaitQueue*>(this)->WaitersPriorityChanged(old_prio);
   }
 }

 // Remove a thread from a wait queue, maintain the wait queue's internal count,
 // and update the WaitQueue specific bookkeeping in the thread in the process.
 void WaitQueue::Dequeue(Thread* t, zx_status_t wait_queue_error) TA_REQ(thread_lock) {
   DEBUG_ASSERT(t != nullptr);
   DEBUG_ASSERT(t->wait_queue_state().InWaitQueue());
   DEBUG_ASSERT(t->state() == THREAD_BLOCKED || t->state() == THREAD_BLOCKED_READ_LOCK);
   DEBUG_ASSERT(t->wait_queue_state().blocking_wait_queue_ == this);

   collection_.Remove(t);
   t->wait_queue_state().blocked_status_ = wait_queue_error;
   t->wait_queue_state().blocking_wait_queue_ = nullptr;
 }

 Thread* WaitQueueCollection::Peek(zx_time_t signed_now) {
   // Find the "best" thread in the queue to run at time |now|.  See the comments
   // in thread.h, immediately above the definition of WaitQueueCollection for
   // details of how the data structure and this algorithm work.

   // If the collection is empty, there is nothing to do.
   if (threads_.is_empty()) {
     return nullptr;
   }

   // If the front of the collection has a key with the fair thread bit set in
   // it, then there are no deadline threads in the collection, and the front of
   // the queue is the proper choice.
   Thread& front = threads_.front();
   const uint64_t front_key = front.wait_queue_state().blocked_threads_tree_sort_key_;

   if ((front_key & kFairThreadSortKeyBit) != 0) {
     // Front of the queue is a fair thread, which means that there are no
     // deadline threads in the queue.  This thread is our best choice.
     return &front;
   }

   // Looks like we have deadline threads waiting in the queue.  Is the absolute
   // deadline of the front of the queue in the future?  If so, then this is our
   // best choice.
   //
   // TODO(johngro): Is it actually worth this optimistic check, or would it be
   // better to simply do the search every time?
   DEBUG_ASSERT(signed_now >= 0);
   const uint64_t now = static_cast<uint64_t>(signed_now);
   if (front_key > now) {
     return &front;
   }

   // Actually search the tree for the deadline thread with the smallest relative
   // deadline which is in the future relative to now.
   auto best_deadline = threads_.upper_bound({now, 0});
   if (best_deadline.IsValid()) {
     return &*best_deadline;
   }

   // Looks like we have deadline threads, but all of their deadlines have
   // expired.  Choose the thread with the minimum relative deadline in the tree.
   Thread* min_relative = threads_.root()->wait_queue_state().subtree_min_rel_deadline_thread_;
   DEBUG_ASSERT(min_relative != nullptr);
   return min_relative;
 }

 void WaitQueueCollection::Insert(Thread* thread) {
   WqTraceDepth(this, Count() + 1);

   auto& wq_state = thread->wait_queue_state();
   DEBUG_ASSERT(wq_state.blocked_threads_tree_sort_key_ == 0);
   DEBUG_ASSERT(wq_state.subtree_min_rel_deadline_thread_ == nullptr);

   // Pre-compute our sort key so that it does not have to be done every time we
   // need to compare our node against another node while we exist in the tree.
   //
   // See the comments in thread.h, immediately above the definition of
   // WaitQueueCollection for details of why we compute the key in this fashion.
   static_assert(SchedTime::Format::FractionalBits == 0,
                 "WaitQueueCollection assumes that the raw_value() of a SchedTime is always a whole "
                 "number of nanoseconds");
   static_assert(SchedDuration::Format::FractionalBits == 0,
                 "WaitQueueCollection assumes that the raw_value() of a SchedDuration is always a "
                 "whole number of nanoseconds");

   const auto& sched_state = thread->scheduler_state();
   if (sched_state.discipline() == SchedDiscipline::Fair) {
     // Statically assert that the offset we are going to add to a fair thread's
     // start time to form its virtual start time can never be the equivalent of
     // something more than ~1 year.  If the resolution of SchedWeight becomes
     // too fine, it could drive the sum of the thread's virtual start time into
     // saturation for low weight threads, making the key useless for sorting.
     // By putting a limit of 1 year on the offset, we know that the
     // current_time() of the system would need to be greater than 2^63
     // nanoseconds minus one year, or about 291 years, before this can happen.
     constexpr SchedWeight kMinPosWeight{ffl::FromRatio<int64_t>(1, SchedWeight::Format::Power)};
     constexpr SchedDuration OneYear{SchedMs(zx_duration_t(1) * 86400 * 365245)};
     static_assert(OneYear >= (Scheduler::kDefaultTargetLatency / kMinPosWeight),
                   "SchedWeight resolution is too fine");

     SchedTime key =
         sched_state.start_time() + (Scheduler::kDefaultTargetLatency / sched_state.fair().weight);
     wq_state.blocked_threads_tree_sort_key_ =
         static_cast<uint64_t>(key.raw_value()) | kFairThreadSortKeyBit;
   } else {
     wq_state.blocked_threads_tree_sort_key_ =
         static_cast<uint64_t>(sched_state.finish_time().raw_value());
   }

   threads_.insert(thread);
 }

 void WaitQueueCollection::Remove(Thread* thread) {
   WqTraceDepth(this, Count() - 1);
   threads_.erase(*thread);

   // In a debug build, zero out the sort key now that we have left the
   // collection.  This can help to find bugs by allowing us to assert that the
   // value is zero during insertion, however it is not strictly needed in a
   // production build and can be skipped.
 #ifdef DEBUG_ASSERT_IMPLEMENTED
   auto& wq_state = thread->wait_queue_state();
   wq_state.blocked_threads_tree_sort_key_ = 0;
 #endif
 }

 void WaitQueue::ValidateQueue() TA_REQ(thread_lock) {
   DEBUG_ASSERT_MAGIC_CHECK(this);
   thread_lock.AssertHeld();
 }

 ////////////////////////////////////////////////////////////////////////////////
 //
 // Begin user facing API
 //
 ////////////////////////////////////////////////////////////////////////////////

 // return the numeric priority of the highest priority thread queued
 int WaitQueue::BlockedPriority() const {
   // TODO(johngro): Remove this, as well as the concept of "priority" from all
   // of the OwnedWaitQueue and profile inheritance code.  The wait queue
   // ordering no longer depends on the deprecated concept of priority, and there
   // is no point in maintaining the system of inheriting the "maximum priority"
   // during inheritance events.
   //
   // Instead, PI will be switched over to inheriting the sum of the weights of
   // all of the upstream threads, modeling the weight of a deadline thread as
   // the weight of a "max priority" thread (as is done today).  This will be a
   // temporary stepping stone on the way to implementing generalize deadline
   // inheritance, which depends on knowing the minimum relative deadline across
   // a set of waiting threads, something which is already being maintained using
   // the WaitQueueCollection's augmented binary tree.
   int ret = -1;
   for (const Thread& t : collection_.threads_) {
     ret = ktl::max(ret, t.scheduler_state().effective_priority());
   }
   return ret;
 }

 /**
  * @brief  Block until a wait queue is notified, ignoring existing signals
  *         in |signal_mask|.
  *
  * This function puts the current thread at the end of a wait
  * queue and then blocks until some other thread wakes the queue
  * up again.
  *
  * @param  deadline       The time at which to abort the wait
  * @param  slack          The amount of time it is acceptable to deviate from deadline
  * @param  signal_mask    Mask of existing signals to ignore
  * @param  reason         Reason for the block
  * @param  interruptible  Whether the block can be interrupted
  *
  * If the deadline is zero, this function returns immediately with
  * ZX_ERR_TIMED_OUT.  If the deadline is ZX_TIME_INFINITE, this function
  * waits indefinitely.  Otherwise, this function returns with
  * ZX_ERR_TIMED_OUT when the deadline elapses.
  *
  * @return ZX_ERR_TIMED_OUT on timeout, else returns the return
  * value specified when the queue was woken by wait_queue_wake_one().
  */
 zx_status_t WaitQueue::BlockEtc(const Deadline& deadline, uint signal_mask,
                                 ResourceOwnership reason, Interruptible interruptible)
     TA_REQ(thread_lock) {
   Thread* current_thread = Thread::Current::Get();

   DEBUG_ASSERT_MAGIC_CHECK(this);
   DEBUG_ASSERT(current_thread->state() == THREAD_RUNNING);

   // Any time a thread blocks, it should be holding exactly one spinlock, and it
   // should be the thread lock.  If a thread blocks while holding another spin
   // lock, something has gone very wrong.
   thread_lock.AssertHeld();
   DEBUG_ASSERT(arch_num_spinlocks_held() == 1);

   if (WAIT_QUEUE_VALIDATION) {
     ValidateQueue();
   }

   zx_status_t res = BlockEtcPreamble(deadline, signal_mask, reason, interruptible);
   if (res != ZX_OK) {
     return res;
   }

   return BlockEtcPostamble(deadline);
 }

 /**
  * @brief  Wake up one thread sleeping on a wait queue
  *
  * This function removes one thread (if any) from the head of the wait queue and
  * makes it executable.  The new thread will be placed in the run queue.
  *
  * @param wait_queue_error  The return value which the new thread will receive
  * from wait_queue_block().
  *
  * @return  Whether a thread was woken
  */
 bool WaitQueue::WakeOne(zx_status_t wait_queue_error) {
   Thread* t;
   bool woke = false;

   // Note(johngro): No one should ever calling wait_queue_wake_one on an
   // instance of an OwnedWaitQueue.  OwnedWaitQueues need to deal with
   // priority inheritance, and all wake operations on an OwnedWaitQueue should
   // be going through their interface instead.
   DEBUG_ASSERT(magic_ == kMagic);
   thread_lock.AssertHeld();

   if (WAIT_QUEUE_VALIDATION) {
     ValidateQueue();
   }

   t = Peek(current_time());
   if (t) {
     Dequeue(t, wait_queue_error);
     ktrace_ptr(TAG_KWAIT_WAKE, this, 0, 0);

     // Wake up the new thread, putting it in a run queue on a cpu.
     Scheduler::Unblock(t);
     woke = true;
   }

   return woke;
 }

 void WaitQueue::DequeueThread(Thread* t, zx_status_t wait_queue_error) {
   DEBUG_ASSERT_MAGIC_CHECK(this);
   thread_lock.AssertHeld();

   if (WAIT_QUEUE_VALIDATION) {
     ValidateQueue();
   }

   Dequeue(t, wait_queue_error);
 }

 void WaitQueue::MoveThread(WaitQueue* source, WaitQueue* dest, Thread* t) {
   DEBUG_ASSERT_MAGIC_CHECK(source);
   DEBUG_ASSERT_MAGIC_CHECK(dest);
   thread_lock.AssertHeld();

   if (WAIT_QUEUE_VALIDATION) {
     source->ValidateQueue();
     dest->ValidateQueue();
   }

   DEBUG_ASSERT(t != nullptr);
   DEBUG_ASSERT(t->wait_queue_state().InWaitQueue());
   DEBUG_ASSERT(t->state() == THREAD_BLOCKED || t->state() == THREAD_BLOCKED_READ_LOCK);
   DEBUG_ASSERT(t->wait_queue_state().blocking_wait_queue_ == source);
   DEBUG_ASSERT(source->collection_.Count() > 0);

   source->collection_.Remove(t);
   dest->collection_.Insert(t);
   t->wait_queue_state().blocking_wait_queue_ = dest;
 }

 /**
  * @brief  Wake all threads sleeping on a wait queue
  *
  * This function removes all threads (if any) from the wait queue and
  * makes them executable.  The new threads will be placed at the head of the
  * run queue.
  *
  * @param wait_queue_error  The return value which the new thread will receive
  * from wait_queue_block().
  *
  * @return  The number of threads woken
  */
 void WaitQueue::WakeAll(zx_status_t wait_queue_error) {
   Thread* t;

   // Note(johngro): See the note in wake_one.  On one should ever be calling
   // this method on an OwnedWaitQueue
   DEBUG_ASSERT(magic_ == kMagic);
   thread_lock.AssertHeld();

   if (WAIT_QUEUE_VALIDATION) {
     ValidateQueue();
   }

   if (collection_.Count() == 0) {
     return;
   }

   // pop all the threads off the wait queue into the run queue
   // TODO(johngro): Look into ways to optimize this.  There is no real reason to:
   //
   // 1) Remove the threads from the collection in wake order.
   // 2) Rebalance the tree's wait queue collection during the process of
   //    removing the nodes.
   // 3) Have separate node storage in the thread for existing on the unblock
   //    list.
   //
   Thread::UnblockList list;
   zx_time_t now = current_time();
   while ((t = Peek(now))) {
     Dequeue(t, wait_queue_error);
     list.push_back(t);
   }

   DEBUG_ASSERT(collection_.Count() == 0);
   ktrace_ptr(TAG_KWAIT_WAKE, this, 0, 0);

   // Wake up the new thread(s), putting it in a run queue on a cpu.
   Scheduler::Unblock(ktl::move(list));
 }

 bool WaitQueue::IsEmpty() const {
   DEBUG_ASSERT_MAGIC_CHECK(this);
   thread_lock.AssertHeld();

   return collection_.Count() == 0;
 }

 /**
  * @brief  Tear down a wait queue
  *
  * This panics if any threads were waiting on this queue, because that
  * would indicate a race condition for most uses of wait queues.  If a
  * thread is currently waiting, it could have been scheduled later, in
  * which case it would have called Block() on an invalid wait
  * queue.
  */
 WaitQueue::~WaitQueue() {
   DEBUG_ASSERT_MAGIC_CHECK(this);

   if (collection_.Count() != 0) {
     panic("~WaitQueue() called on non-empty WaitQueue\n");
   }

   magic_ = 0;
 }

 /**
  * @brief  Wake a specific thread in a wait queue
  *
  * This function extracts a specific thread from a wait queue, wakes it,
  * puts it at the head of the run queue, and does a reschedule if
  * necessary.
  *
  * @param t  The thread to wake
  * @param wait_queue_error  The return value which the new thread will receive from
  * wait_queue_block().
  *
  * @return ZX_ERR_BAD_STATE if thread was not in any wait queue.
  */
 zx_status_t WaitQueue::UnblockThread(Thread* t, zx_status_t wait_queue_error) {
   t->canary().Assert();
   thread_lock.AssertHeld();

   if (t->state() != THREAD_BLOCKED && t->state() != THREAD_BLOCKED_READ_LOCK) {
     return ZX_ERR_BAD_STATE;
   }

   WaitQueue* wq = t->wait_queue_state().blocking_wait_queue_;
   DEBUG_ASSERT(wq != nullptr);
   DEBUG_ASSERT_MAGIC_CHECK(wq);
   DEBUG_ASSERT(t->wait_queue_state().InWaitQueue());

   if (WAIT_QUEUE_VALIDATION) {
     wq->ValidateQueue();
   }

   int old_wq_prio = wq->BlockedPriority();
   wq->Dequeue(t, wait_queue_error);
   wq->UpdatePriority(old_wq_prio);

   Scheduler::Unblock(t);
   return ZX_OK;
 }

 void WaitQueue::PriorityChanged(Thread* t, int old_prio, PropagatePI propagate) {
   t->canary().Assert();
   thread_lock.AssertHeld();
   DEBUG_ASSERT(t->state() == THREAD_BLOCKED || t->state() == THREAD_BLOCKED_READ_LOCK);

   DEBUG_ASSERT(t->wait_queue_state().blocking_wait_queue_ == this);
   DEBUG_ASSERT_MAGIC_CHECK(this);

   LTRACEF("%p %d -> %d\n", t, old_prio, t->scheduler_state().effective_priority());

   // |t|'s effective priority has already been re-calculated.  If |t| is
   // currently at the head of this WaitQueue, then |t|'s old priority is the
   // previous priority of the WaitQueue.  Otherwise, it is the priority of
   // the WaitQueue as it stands before we re-insert |t|.
   const int old_wq_prio = (Peek(current_time()) == t) ? old_prio : BlockedPriority();

   // simple algorithm: remove the thread from the queue and add it back
   // TODO: implement optimal algorithm depending on all the different edge
   // cases of how the thread was previously queued and what priority its
   // switching to.
   collection_.Remove(t);
   collection_.Insert(t);

   if (propagate == PropagatePI::Yes) {
     UpdatePriority(old_wq_prio);
   }
   if (WAIT_QUEUE_VALIDATION) {
     ValidateQueue();
   }
 }
	// Copyright 2016 The Fuchsia Authors
	// Copyright (c) 2008-2015 Travis Geiselbrecht
	//
	// Use of this source code is governed by a MIT-style
	// license that can be found in the LICENSE file or at
	// https://opensource.org/licenses/MIT

	#include "kernel/wait.h"

	#include <lib/ktrace.h>
	#include <platform.h>
	#include <trace.h>
	#include <zircon/errors.h>

	#include <kernel/auto_preempt_disabler.h>
	#include <kernel/owned_wait_queue.h>
	#include <kernel/scheduler.h>
	#include <kernel/thread.h>
	#include <kernel/timer.h>
	#include <ktl/algorithm.h>
	#include <ktl/move.h>

	#include "kernel/wait_queue_internal.h"

	#include <ktl/enforce.h>

	#define LOCAL_TRACE 0

	#ifndef WAIT_QUEUE_DEPTH_TRACING_ENABLED
	#define WAIT_QUEUE_DEPTH_TRACING_ENABLED false
	#endif

	static inline void WqTraceDepth(const WaitQueueCollection* collection, uint32_t depth) {
	if constexpr (WAIT_QUEUE_DEPTH_TRACING_ENABLED) {
	ktrace_probe(TraceEnabled<true>{}, TraceContext::Cpu, "wq_depth"_stringref,
	reinterpret_cast<uint64_t>(collection), static_cast<uint64_t>(depth));
	}
	}

	// add expensive code to do a full validation of the wait queue at various entry points
	// to this module.
	#define WAIT_QUEUE_VALIDATION (0 \|\| (LK_DEBUGLEVEL > 2))

	// Wait queues come in 2 flavors (traditional and owned) which are distinguished
	// using the magic number. When DEBUG_ASSERT checking the magic number, check
	// against both of the possible valid magic numbers.
	#define DEBUG_ASSERT_MAGIC_CHECK(_queue) \
	DEBUG_ASSERT_MSG( \
	((_queue)->magic_ == kMagic) \|\| ((_queue)->magic_ == OwnedWaitQueue::kOwnedMagic), \
	"magic 0x%08x", ((_queue)->magic_));

	// Wait queues are building blocks that other locking primitives use to handle
	// blocking threads.
	void WaitQueue::TimeoutHandler(Timer* timer, zx_time_t now, void* arg) {
	Thread* thread = (Thread*)arg;

	thread->canary().Assert();

	// spin trylocking on the thread lock since the routine that set up the callback,
	// wait_queue_block, may be trying to simultaneously cancel this timer while holding the
	// thread_lock.
	if (timer->TrylockOrCancel(&thread_lock)) {
	return;
	}

	AnnotatedAutoPreemptDisabler aapd;
	UnblockThread(thread, ZX_ERR_TIMED_OUT);
	thread_lock.Release();
	}

	// Deal with the consequences of a change of maximum priority across the set of
	// waiters in a wait queue.
	void WaitQueue::UpdatePriority(int old_prio) TA_REQ(thread_lock, preempt_disabled_token) {
	// If this is an owned wait queue, and the maximum priority of its set of
	// waiters has changed, make sure to apply any needed priority inheritance.
	if ((magic_ == OwnedWaitQueue::kOwnedMagic) && (old_prio != BlockedPriority())) {
	static_cast<OwnedWaitQueue*>(this)->WaitersPriorityChanged(old_prio);
	}
	}

	// Remove a thread from a wait queue, maintain the wait queue's internal count,
	// and update the WaitQueue specific bookkeeping in the thread in the process.
	void WaitQueue::Dequeue(Thread* t, zx_status_t wait_queue_error) TA_REQ(thread_lock) {
	DEBUG_ASSERT(t != nullptr);
	DEBUG_ASSERT(t->wait_queue_state().InWaitQueue());
	DEBUG_ASSERT(t->state() == THREAD_BLOCKED \|\| t->state() == THREAD_BLOCKED_READ_LOCK);
	DEBUG_ASSERT(t->wait_queue_state().blocking_wait_queue_ == this);

	collection_.Remove(t);
	t->wait_queue_state().blocked_status_ = wait_queue_error;
	t->wait_queue_state().blocking_wait_queue_ = nullptr;
	}

	Thread* WaitQueueCollection::Peek(zx_time_t signed_now) {
	// Find the "best" thread in the queue to run at time \|now\|. See the comments
	// in thread.h, immediately above the definition of WaitQueueCollection for
	// details of how the data structure and this algorithm work.

	// If the collection is empty, there is nothing to do.
	if (threads_.is_empty()) {
	return nullptr;
	}

	// If the front of the collection has a key with the fair thread bit set in
	// it, then there are no deadline threads in the collection, and the front of
	// the queue is the proper choice.
	Thread& front = threads_.front();
	const uint64_t front_key = front.wait_queue_state().blocked_threads_tree_sort_key_;

	if ((front_key & kFairThreadSortKeyBit) != 0) {
	// Front of the queue is a fair thread, which means that there are no
	// deadline threads in the queue. This thread is our best choice.
	return &front;
	}

	// Looks like we have deadline threads waiting in the queue. Is the absolute
	// deadline of the front of the queue in the future? If so, then this is our
	// best choice.
	//
	// TODO(johngro): Is it actually worth this optimistic check, or would it be
	// better to simply do the search every time?
	DEBUG_ASSERT(signed_now >= 0);
	const uint64_t now = static_cast<uint64_t>(signed_now);
	if (front_key > now) {
	return &front;
	}

	// Actually search the tree for the deadline thread with the smallest relative
	// deadline which is in the future relative to now.
	auto best_deadline = threads_.upper_bound({now, 0});
	if (best_deadline.IsValid()) {
	return &*best_deadline;
	}

	// Looks like we have deadline threads, but all of their deadlines have
	// expired. Choose the thread with the minimum relative deadline in the tree.
	Thread* min_relative = threads_.root()->wait_queue_state().subtree_min_rel_deadline_thread_;
	DEBUG_ASSERT(min_relative != nullptr);
	return min_relative;
	}

	void WaitQueueCollection::Insert(Thread* thread) {
	WqTraceDepth(this, Count() + 1);

	auto& wq_state = thread->wait_queue_state();
	DEBUG_ASSERT(wq_state.blocked_threads_tree_sort_key_ == 0);
	DEBUG_ASSERT(wq_state.subtree_min_rel_deadline_thread_ == nullptr);

	// Pre-compute our sort key so that it does not have to be done every time we
	// need to compare our node against another node while we exist in the tree.
	//
	// See the comments in thread.h, immediately above the definition of
	// WaitQueueCollection for details of why we compute the key in this fashion.
	static_assert(SchedTime::Format::FractionalBits == 0,
	"WaitQueueCollection assumes that the raw_value() of a SchedTime is always a whole "
	"number of nanoseconds");
	static_assert(SchedDuration::Format::FractionalBits == 0,
	"WaitQueueCollection assumes that the raw_value() of a SchedDuration is always a "
	"whole number of nanoseconds");

	const auto& sched_state = thread->scheduler_state();
	if (sched_state.discipline() == SchedDiscipline::Fair) {
	// Statically assert that the offset we are going to add to a fair thread's
	// start time to form its virtual start time can never be the equivalent of
	// something more than ~1 year. If the resolution of SchedWeight becomes
	// too fine, it could drive the sum of the thread's virtual start time into
	// saturation for low weight threads, making the key useless for sorting.
	// By putting a limit of 1 year on the offset, we know that the
	// current_time() of the system would need to be greater than 2^63
	// nanoseconds minus one year, or about 291 years, before this can happen.
	constexpr SchedWeight kMinPosWeight{ffl::FromRatio<int64_t>(1, SchedWeight::Format::Power)};
	constexpr SchedDuration OneYear{SchedMs(zx_duration_t(1) * 86400 * 365245)};
	static_assert(OneYear >= (Scheduler::kDefaultTargetLatency / kMinPosWeight),
	"SchedWeight resolution is too fine");

	SchedTime key =
	sched_state.start_time() + (Scheduler::kDefaultTargetLatency / sched_state.fair().weight);
	wq_state.blocked_threads_tree_sort_key_ =
	static_cast<uint64_t>(key.raw_value()) \| kFairThreadSortKeyBit;
	} else {
	wq_state.blocked_threads_tree_sort_key_ =
	static_cast<uint64_t>(sched_state.finish_time().raw_value());
	}

	threads_.insert(thread);
	}

	void WaitQueueCollection::Remove(Thread* thread) {
	WqTraceDepth(this, Count() - 1);
	threads_.erase(*thread);

	// In a debug build, zero out the sort key now that we have left the
	// collection. This can help to find bugs by allowing us to assert that the
	// value is zero during insertion, however it is not strictly needed in a
	// production build and can be skipped.
	#ifdef DEBUG_ASSERT_IMPLEMENTED
	auto& wq_state = thread->wait_queue_state();
	wq_state.blocked_threads_tree_sort_key_ = 0;
	#endif
	}

	void WaitQueue::ValidateQueue() TA_REQ(thread_lock) {
	DEBUG_ASSERT_MAGIC_CHECK(this);
	thread_lock.AssertHeld();
	}

	////////////////////////////////////////////////////////////////////////////////
	//
	// Begin user facing API
	//
	////////////////////////////////////////////////////////////////////////////////

	// return the numeric priority of the highest priority thread queued
	int WaitQueue::BlockedPriority() const {
	// TODO(johngro): Remove this, as well as the concept of "priority" from all
	// of the OwnedWaitQueue and profile inheritance code. The wait queue
	// ordering no longer depends on the deprecated concept of priority, and there
	// is no point in maintaining the system of inheriting the "maximum priority"
	// during inheritance events.
	//
	// Instead, PI will be switched over to inheriting the sum of the weights of
	// all of the upstream threads, modeling the weight of a deadline thread as
	// the weight of a "max priority" thread (as is done today). This will be a
	// temporary stepping stone on the way to implementing generalize deadline
	// inheritance, which depends on knowing the minimum relative deadline across
	// a set of waiting threads, something which is already being maintained using
	// the WaitQueueCollection's augmented binary tree.
	int ret = -1;
	for (const Thread& t : collection_.threads_) {
	ret = ktl::max(ret, t.scheduler_state().effective_priority());
	}
	return ret;
	}

	/**
	* @brief Block until a wait queue is notified, ignoring existing signals
	* in \|signal_mask\|.
	*
	* This function puts the current thread at the end of a wait
	* queue and then blocks until some other thread wakes the queue
	* up again.
	*
	* @param deadline The time at which to abort the wait
	* @param slack The amount of time it is acceptable to deviate from deadline
	* @param signal_mask Mask of existing signals to ignore
	* @param reason Reason for the block
	* @param interruptible Whether the block can be interrupted
	*
	* If the deadline is zero, this function returns immediately with
	* ZX_ERR_TIMED_OUT. If the deadline is ZX_TIME_INFINITE, this function
	* waits indefinitely. Otherwise, this function returns with
	* ZX_ERR_TIMED_OUT when the deadline elapses.
	*
	* @return ZX_ERR_TIMED_OUT on timeout, else returns the return
	* value specified when the queue was woken by wait_queue_wake_one().
	*/
	zx_status_t WaitQueue::BlockEtc(const Deadline& deadline, uint signal_mask,
	ResourceOwnership reason, Interruptible interruptible)
	TA_REQ(thread_lock) {
	Thread* current_thread = Thread::Current::Get();

	DEBUG_ASSERT_MAGIC_CHECK(this);
	DEBUG_ASSERT(current_thread->state() == THREAD_RUNNING);

	// Any time a thread blocks, it should be holding exactly one spinlock, and it
	// should be the thread lock. If a thread blocks while holding another spin
	// lock, something has gone very wrong.
	thread_lock.AssertHeld();
	DEBUG_ASSERT(arch_num_spinlocks_held() == 1);

	if (WAIT_QUEUE_VALIDATION) {
	ValidateQueue();
	}

	zx_status_t res = BlockEtcPreamble(deadline, signal_mask, reason, interruptible);
	if (res != ZX_OK) {
	return res;
	}

	return BlockEtcPostamble(deadline);
	}

	/**
	* @brief Wake up one thread sleeping on a wait queue
	*
	* This function removes one thread (if any) from the head of the wait queue and
	* makes it executable. The new thread will be placed in the run queue.
	*
	* @param wait_queue_error The return value which the new thread will receive
	* from wait_queue_block().
	*
	* @return Whether a thread was woken
	*/
	bool WaitQueue::WakeOne(zx_status_t wait_queue_error) {
	Thread* t;
	bool woke = false;

	// Note(johngro): No one should ever calling wait_queue_wake_one on an
	// instance of an OwnedWaitQueue. OwnedWaitQueues need to deal with
	// priority inheritance, and all wake operations on an OwnedWaitQueue should
	// be going through their interface instead.
	DEBUG_ASSERT(magic_ == kMagic);
	thread_lock.AssertHeld();

	if (WAIT_QUEUE_VALIDATION) {
	ValidateQueue();
	}

	t = Peek(current_time());
	if (t) {
	Dequeue(t, wait_queue_error);
	ktrace_ptr(TAG_KWAIT_WAKE, this, 0, 0);

	// Wake up the new thread, putting it in a run queue on a cpu.
	Scheduler::Unblock(t);
	woke = true;
	}

	return woke;
	}

	void WaitQueue::DequeueThread(Thread* t, zx_status_t wait_queue_error) {
	DEBUG_ASSERT_MAGIC_CHECK(this);
	thread_lock.AssertHeld();

	if (WAIT_QUEUE_VALIDATION) {
	ValidateQueue();
	}

	Dequeue(t, wait_queue_error);
	}

	void WaitQueue::MoveThread(WaitQueue* source, WaitQueue* dest, Thread* t) {
	DEBUG_ASSERT_MAGIC_CHECK(source);
	DEBUG_ASSERT_MAGIC_CHECK(dest);
	thread_lock.AssertHeld();

	if (WAIT_QUEUE_VALIDATION) {
	source->ValidateQueue();
	dest->ValidateQueue();
	}

	DEBUG_ASSERT(t != nullptr);
	DEBUG_ASSERT(t->wait_queue_state().InWaitQueue());
	DEBUG_ASSERT(t->state() == THREAD_BLOCKED \|\| t->state() == THREAD_BLOCKED_READ_LOCK);
	DEBUG_ASSERT(t->wait_queue_state().blocking_wait_queue_ == source);
	DEBUG_ASSERT(source->collection_.Count() > 0);

	source->collection_.Remove(t);
	dest->collection_.Insert(t);
	t->wait_queue_state().blocking_wait_queue_ = dest;
	}

	/**
	* @brief Wake all threads sleeping on a wait queue
	*
	* This function removes all threads (if any) from the wait queue and
	* makes them executable. The new threads will be placed at the head of the
	* run queue.
	*
	* @param wait_queue_error The return value which the new thread will receive
	* from wait_queue_block().
	*
	* @return The number of threads woken
	*/
	void WaitQueue::WakeAll(zx_status_t wait_queue_error) {
	Thread* t;

	// Note(johngro): See the note in wake_one. On one should ever be calling
	// this method on an OwnedWaitQueue
	DEBUG_ASSERT(magic_ == kMagic);
	thread_lock.AssertHeld();

	if (WAIT_QUEUE_VALIDATION) {
	ValidateQueue();
	}

	if (collection_.Count() == 0) {
	return;
	}

	// pop all the threads off the wait queue into the run queue
	// TODO(johngro): Look into ways to optimize this. There is no real reason to:
	//
	// 1) Remove the threads from the collection in wake order.
	// 2) Rebalance the tree's wait queue collection during the process of
	// removing the nodes.
	// 3) Have separate node storage in the thread for existing on the unblock
	// list.
	//
	Thread::UnblockList list;
	zx_time_t now = current_time();
	while ((t = Peek(now))) {
	Dequeue(t, wait_queue_error);
	list.push_back(t);
	}

	DEBUG_ASSERT(collection_.Count() == 0);
	ktrace_ptr(TAG_KWAIT_WAKE, this, 0, 0);

	// Wake up the new thread(s), putting it in a run queue on a cpu.
	Scheduler::Unblock(ktl::move(list));
	}

	bool WaitQueue::IsEmpty() const {
	DEBUG_ASSERT_MAGIC_CHECK(this);
	thread_lock.AssertHeld();

	return collection_.Count() == 0;
	}

	/**
	* @brief Tear down a wait queue
	*
	* This panics if any threads were waiting on this queue, because that
	* would indicate a race condition for most uses of wait queues. If a
	* thread is currently waiting, it could have been scheduled later, in
	* which case it would have called Block() on an invalid wait
	* queue.
	*/
	WaitQueue::~WaitQueue() {
	DEBUG_ASSERT_MAGIC_CHECK(this);

	if (collection_.Count() != 0) {
	panic("~WaitQueue() called on non-empty WaitQueue\n");
	}

	magic_ = 0;
	}

	/**
	* @brief Wake a specific thread in a wait queue
	*
	* This function extracts a specific thread from a wait queue, wakes it,
	* puts it at the head of the run queue, and does a reschedule if
	* necessary.
	*
	* @param t The thread to wake
	* @param wait_queue_error The return value which the new thread will receive from
	* wait_queue_block().
	*
	* @return ZX_ERR_BAD_STATE if thread was not in any wait queue.
	*/
	zx_status_t WaitQueue::UnblockThread(Thread* t, zx_status_t wait_queue_error) {
	t->canary().Assert();
	thread_lock.AssertHeld();

	if (t->state() != THREAD_BLOCKED && t->state() != THREAD_BLOCKED_READ_LOCK) {
	return ZX_ERR_BAD_STATE;
	}

	WaitQueue* wq = t->wait_queue_state().blocking_wait_queue_;
	DEBUG_ASSERT(wq != nullptr);
	DEBUG_ASSERT_MAGIC_CHECK(wq);
	DEBUG_ASSERT(t->wait_queue_state().InWaitQueue());

	if (WAIT_QUEUE_VALIDATION) {
	wq->ValidateQueue();
	}

	int old_wq_prio = wq->BlockedPriority();
	wq->Dequeue(t, wait_queue_error);
	wq->UpdatePriority(old_wq_prio);

	Scheduler::Unblock(t);
	return ZX_OK;
	}

	void WaitQueue::PriorityChanged(Thread* t, int old_prio, PropagatePI propagate) {
	t->canary().Assert();
	thread_lock.AssertHeld();
	DEBUG_ASSERT(t->state() == THREAD_BLOCKED \|\| t->state() == THREAD_BLOCKED_READ_LOCK);

	DEBUG_ASSERT(t->wait_queue_state().blocking_wait_queue_ == this);
	DEBUG_ASSERT_MAGIC_CHECK(this);

	LTRACEF("%p %d -> %d\n", t, old_prio, t->scheduler_state().effective_priority());

	// \|t\|'s effective priority has already been re-calculated. If \|t\| is
	// currently at the head of this WaitQueue, then \|t\|'s old priority is the
	// previous priority of the WaitQueue. Otherwise, it is the priority of
	// the WaitQueue as it stands before we re-insert \|t\|.
	const int old_wq_prio = (Peek(current_time()) == t) ? old_prio : BlockedPriority();

	// simple algorithm: remove the thread from the queue and add it back
	// TODO: implement optimal algorithm depending on all the different edge
	// cases of how the thread was previously queued and what priority its
	// switching to.
	collection_.Remove(t);
	collection_.Insert(t);

	if (propagate == PropagatePI::Yes) {
	UpdatePriority(old_wq_prio);
	}
	if (WAIT_QUEUE_VALIDATION) {
	ValidateQueue();
	}
	}