blob: 1277b3a7b78294c7cfe860a8429449706dac2f14 [file] [log] [blame]
// Copyright 2016 The Fuchsia Authors
// Copyright (c) 2008-2015 Travis Geiselbrecht
//
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file or at
// https://opensource.org/licenses/MIT
/**
* @file
* @brief Kernel threading
*
* This file is the core kernel threading interface.
*
* @defgroup thread Threads
* @{
*/
#include "kernel/thread.h"
#include <assert.h>
#include <debug.h>
#include <inttypes.h>
#include <lib/arch/intrin.h>
#include <lib/counters.h>
#include <lib/fit/defer.h>
#include <lib/fxt/interned_string.h>
#include <lib/heap.h>
#include <lib/ktrace.h>
#include <lib/lazy_init/lazy_init.h>
#include <lib/thread_sampler/thread_sampler.h>
#include <lib/version.h>
#include <lib/zircon-internal/macros.h>
#include <platform.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <zircon/errors.h>
#include <zircon/listnode.h>
#include <zircon/time.h>
#include <zircon/types.h>
#include <arch/debugger.h>
#include <arch/exception.h>
#include <arch/interrupt.h>
#include <arch/ops.h>
#include <kernel/auto_preempt_disabler.h>
#include <kernel/cpu.h>
#include <kernel/dpc.h>
#include <kernel/idle_power_thread.h>
#include <kernel/lockdep.h>
#include <kernel/mp.h>
#include <kernel/percpu.h>
#include <kernel/restricted.h>
#include <kernel/scheduler.h>
#include <kernel/stats.h>
#include <kernel/thread_lock.h>
#include <kernel/timer.h>
#include <ktl/algorithm.h>
#include <ktl/atomic.h>
#include <lk/main.h>
#include <lockdep/lockdep.h>
#include <object/process_dispatcher.h>
#include <object/thread_dispatcher.h>
#include <pretty/hexdump.h>
#include <vm/kstack.h>
#include <vm/vm.h>
#include <vm/vm_address_region.h>
#include <vm/vm_aspace.h>
#include "lib/zx/time.h"
#include <ktl/enforce.h>
#define LOCAL_TRACE 0
// kernel counters.
// The counters below never decrease.
//
// counts the number of Threads successfully created.
KCOUNTER(thread_create_count, "thread.create")
// counts the number of detached Threads that exited. Never decreases.
KCOUNTER(thread_detached_exit_count, "thread.detached_exit")
// counts the number of Threads joined. Never decreases.
KCOUNTER(thread_join_count, "thread.join")
// counts the number of calls to suspend() that succeeded.
KCOUNTER(thread_suspend_count, "thread.suspend")
// counts the number of calls to resume() that succeeded.
KCOUNTER(thread_resume_count, "thread.resume")
// counts the number of times a thread's timeslice extension was activated (see
// |PreemptionState::SetTimesliceExtension|).
KCOUNTER(thread_timeslice_extended, "thread.timeslice_extended")
// counts the number of calls to restricted_kick() that succeeded.
KCOUNTER(thread_restricted_kick_count, "thread.restricted_kick")
// counts the number of failed samples
KCOUNTER(thread_sampling_failed, "thread.sampling_failed")
// The global thread list. This is a lazy_init type, since initial thread code
// manipulates the list before global constructors are run. This is initialized by
// thread_init_early.
static lazy_init::LazyInit<Thread::List> thread_list;
Thread::MigrateList Thread::migrate_list_;
// master thread spinlock
MonitoredSpinLock thread_lock __CPU_ALIGN_EXCLUSIVE{"thread_lock"_intern};
// The global preempt disabled token singleton
PreemptDisabledToken preempt_disabled_token;
const char* ToString(enum thread_state state) {
switch (state) {
case THREAD_INITIAL:
return "initial";
case THREAD_READY:
return "ready";
case THREAD_RUNNING:
return "running";
case THREAD_BLOCKED:
return "blocked";
case THREAD_BLOCKED_READ_LOCK:
return "blocked read lock";
case THREAD_SLEEPING:
return "sleeping";
case THREAD_SUSPENDED:
return "suspended";
case THREAD_DEATH:
return "death";
default:
return "[unknown]";
}
}
static void init_thread_lock_state(Thread* t) {
#if WITH_LOCK_DEP
lockdep::SystemInitThreadLockState(&t->lock_state());
#endif
}
void WaitQueueCollection::ThreadState::Block(Interruptible interruptible, zx_status_t status) {
blocked_status_ = status;
interruptible_ = interruptible;
Scheduler::Block();
interruptible_ = Interruptible::No;
}
void WaitQueueCollection::ThreadState::UnblockIfInterruptible(Thread* thread, zx_status_t status) {
if (interruptible_ == Interruptible::Yes) {
WaitQueue::UnblockThread(thread, status);
}
}
void WaitQueueCollection::ThreadState::Unsleep(Thread* thread, zx_status_t status) {
blocked_status_ = status;
Scheduler::Unblock(thread);
}
void WaitQueueCollection::ThreadState::UnsleepIfInterruptible(Thread* thread, zx_status_t status) {
if (interruptible_ == Interruptible::Yes) {
Unsleep(thread, status);
}
}
WaitQueueCollection::ThreadState::~ThreadState() {
DEBUG_ASSERT(blocking_wait_queue_ == nullptr);
// owned_wait_queues_ is a fbl:: list of unmanaged pointers. It will debug
// assert if it is not empty when it destructs; we do not need to do so
// here.
}
// Default constructor/destructor.
Thread::Thread() {}
Thread::~Thread() {
// At this point, the thread must not be on the global thread list or migrate
// list.
DEBUG_ASSERT(!thread_list_node_.InContainer());
DEBUG_ASSERT(!migrate_list_node_.InContainer());
}
void Thread::set_name(ktl::string_view name) {
// |name| must fit in ZX_MAX_NAME_LEN bytes, minus 1 for the trailing NUL.
name = name.substr(0, ZX_MAX_NAME_LEN - 1);
memcpy(name_, name.data(), name.size());
memset(name_ + name.size(), 0, ZX_MAX_NAME_LEN - name.size());
}
void construct_thread(Thread* t, const char* name) {
// Placement new to trigger any special construction requirements of the
// Thread structure.
//
// TODO(johngro): now that we have converted Thread over to C++, consider
// switching to using C++ constructors/destructors and new/delete to handle
// all of this instead of using construct_thread and free_thread_resources
new (t) Thread();
t->set_name(name);
init_thread_lock_state(t);
}
void TaskState::Init(thread_start_routine entry, void* arg) {
entry_ = entry;
arg_ = arg;
}
zx_status_t TaskState::Join(zx_time_t deadline) {
return retcode_wait_queue_.Block(deadline, Interruptible::No);
}
void TaskState::WakeJoiners(zx_status_t status) { retcode_wait_queue_.WakeAll(status); }
static void free_thread_resources(Thread* t) {
// free the thread structure itself. Manually trigger the struct's
// destructor so that DEBUG_ASSERTs present in the owned_wait_queues member
// get triggered.
bool thread_needs_free = t->free_struct();
t->~Thread();
if (thread_needs_free) {
free(t);
}
}
zx_status_t Thread::Current::Fault(Thread::Current::FaultType type, vaddr_t va, uint flags) {
if (is_kernel_address(va)) {
// Kernel addresses should never fault.
return ZX_ERR_NOT_FOUND;
}
// If this thread is a kernel thread, then it must be running a unit test that set `aspace_`
// explicitly, so use `aspace_` to resolve the fault.
//
// If this is a user thread in restricted mode, then `aspace_` is set to the restricted address
// space and should be used to resolve the fault.
//
// Otherwise, this is a user thread running in normal mode. Therefore, we must consult the
// process' aspace_at function to resolve the fault.
Thread* t = Thread::Current::Get();
VmAspace* containing_aspace;
bool in_restricted = t->restricted_state_ && t->restricted_state_->in_restricted();
if (!t->user_thread_ || in_restricted) {
containing_aspace = t->aspace_;
} else {
containing_aspace = t->user_thread_->process()->aspace_at(va);
}
// Call the appropriate fault function on the containing address space.
switch (type) {
case Thread::Current::FaultType::PageFault:
return containing_aspace->PageFault(va, flags);
case Thread::Current::FaultType::SoftFault:
return containing_aspace->SoftFault(va, flags);
case Thread::Current::FaultType::AccessedFault:
DEBUG_ASSERT(flags == 0);
return containing_aspace->AccessedFault(va);
}
// This should be unreachable, and is here mainly to satisfy GCC.
return ZX_ERR_NOT_FOUND;
}
void Thread::Trampoline() {
// Release the incoming lock held across reschedule.
Scheduler::LockHandoff();
arch_enable_ints();
Thread* ct = Thread::Current::Get();
int ret = ct->task_state_.entry()(ct->task_state_.arg());
Thread::Current::Exit(ret);
}
/**
* @brief Create a new thread
*
* This function creates a new thread. The thread is initially suspended, so you
* need to call thread_resume() to execute it.
*
* @param t If not nullptr, use the supplied Thread
* @param name Name of thread
* @param entry Entry point of thread
* @param arg Arbitrary argument passed to entry(). It can be null.
* in which case |user_thread| will be used.
* @param priority Execution priority for the thread.
* @param alt_trampoline If not nullptr, an alternate trampoline for the thread
* to start on.
*
* Thread priority is an integer from 0 (lowest) to 31 (highest). Some standard
* priorities are defined in <kernel/thread.h>:
*
* HIGHEST_PRIORITY
* DPC_PRIORITY
* HIGH_PRIORITY
* DEFAULT_PRIORITY
* LOW_PRIORITY
* IDLE_PRIORITY
* LOWEST_PRIORITY
*
* Stack size is set to DEFAULT_STACK_SIZE
*
* @return Pointer to thread object, or nullptr on failure.
*/
Thread* Thread::CreateEtc(Thread* t, const char* name, thread_start_routine entry, void* arg,
const SchedulerState::BaseProfile& profile,
thread_trampoline_routine alt_trampoline) {
unsigned int flags = 0;
if (!t) {
t = static_cast<Thread*>(memalign(alignof(Thread), sizeof(Thread)));
if (!t) {
return nullptr;
}
flags |= THREAD_FLAG_FREE_STRUCT;
}
// assert that t is at least as aligned as the Thread is supposed to be
DEBUG_ASSERT(IS_ALIGNED(t, alignof(Thread)));
construct_thread(t, name);
t->task_state_.Init(entry, arg);
Scheduler::InitializeThread(t, profile);
zx_status_t status = t->stack_.Init();
if (status != ZX_OK) {
free_thread_resources(t);
return nullptr;
}
// save whether or not we need to free the thread struct and/or stack
t->flags_ = flags;
if (likely(alt_trampoline == nullptr)) {
alt_trampoline = &Thread::Trampoline;
}
// set up the initial stack frame
arch_thread_initialize(t, (vaddr_t)alt_trampoline);
// add it to the global thread list
{
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
thread_list->push_front(t);
}
kcounter_add(thread_create_count, 1);
return t;
}
Thread* Thread::Create(const char* name, thread_start_routine entry, void* arg, int priority) {
return Thread::CreateEtc(nullptr, name, entry, arg, SchedulerState::BaseProfile{priority},
nullptr);
}
Thread* Thread::Create(const char* name, thread_start_routine entry, void* arg,
const SchedulerState::BaseProfile& profile) {
return Thread::CreateEtc(nullptr, name, entry, arg, profile, nullptr);
}
/**
* @brief Make a suspended thread executable.
*
* This function is called to start a thread which has just been
* created with thread_create() or which has been suspended with
* thread_suspend(). It can not fail.
*/
void Thread::Resume() {
canary_.Assert();
// We cannot allow a resume to happen if we are holding any spinlocks, unless
// local preemption has been disabled. If we have local preemption enabled,
// and a spinlock held, then it is theoretically possible for our current CPU
// to be chosen as the target for the thread being resumed, triggering a local
// preemption event. This is illegal; being preempted while holding a
// spinlock means that we might lose our CPU while holding a spinlock.
//
// So, assert this here. Either we have no spinlocks, or preemption has
// already been disabled (presumably to the point where we have dropped all of
// our spinlocks)
DEBUG_ASSERT_MSG((arch_num_spinlocks_held() == 0) ||
(Thread::Current::Get()->preemption_state().PreemptIsEnabled() == false),
"It is illegal to Resume a thread when any spinlocks are held unless local "
"preemption is disabled. (spinlocks held %u, preemption enabled %d)",
arch_num_spinlocks_held(),
Thread::Current::Get()->preemption_state().PreemptIsEnabled());
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
if (state() == THREAD_DEATH) {
// The thread is dead, resuming it is a no-op.
return;
}
// Emit the thread metadata the first time the thread is resumed so that trace
// events written by this thread have the correct name and process association.
if (state() == THREAD_INITIAL) {
KTRACE_KERNEL_OBJECT("kernel:meta", tid(), ZX_OBJ_TYPE_THREAD, name(),
("process", ktrace::Koid(pid())));
}
// Clear the suspend signal in case there is a pending suspend
signals_.fetch_and(~THREAD_SIGNAL_SUSPEND, ktl::memory_order_relaxed);
if (state() == THREAD_INITIAL || state() == THREAD_SUSPENDED) {
// Wake up the new thread, putting it in a run queue on a cpu.
Scheduler::Unblock(this);
}
kcounter_add(thread_resume_count, 1);
}
zx_status_t Thread::DetachAndResume() {
zx_status_t status = Detach();
if (status != ZX_OK) {
return status;
}
Resume();
return ZX_OK;
}
/**
* @brief Suspend an initialized/ready/running thread
*
* @return ZX_OK on success, ZX_ERR_BAD_STATE if the thread is dead
*/
zx_status_t Thread::Suspend() {
canary_.Assert();
DEBUG_ASSERT(!IsIdle());
// Disable preemption to defer rescheduling until the end of this scope.
AnnotatedAutoPreemptDisabler preempt_disable;
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
if (state() == THREAD_DEATH) {
return ZX_ERR_BAD_STATE;
}
signals_.fetch_or(THREAD_SIGNAL_SUSPEND, ktl::memory_order_relaxed);
switch (state()) {
case THREAD_DEATH:
// This should be unreachable because this state was handled above.
panic("Unexpected thread state");
case THREAD_INITIAL:
// Thread hasn't been started yet, add it to the run queue to transition
// properly through the INITIAL -> READY state machine first, then it
// will see the signal and go to SUSPEND before running user code.
//
// Though the state here is still INITIAL, the higher-level code has
// already executed ThreadDispatcher::Start() so all the userspace
// entry data has been initialized and will be ready to go as soon as
// the thread is unsuspended.
Scheduler::Unblock(this);
break;
case THREAD_READY:
// thread is ready to run and not blocked or suspended.
// will wake up and deal with the signal soon.
break;
case THREAD_RUNNING:
// thread is running (on another cpu)
// The following call is not essential. It just makes the
// thread suspension happen sooner rather than at the next
// timer interrupt or syscall.
mp_interrupt(MP_IPI_TARGET_MASK, cpu_num_to_mask(scheduler_state_.curr_cpu_));
break;
case THREAD_SUSPENDED:
// thread is suspended already
break;
case THREAD_BLOCKED:
case THREAD_BLOCKED_READ_LOCK:
// thread is blocked on something and marked interruptible
wait_queue_state_.UnblockIfInterruptible(this, ZX_ERR_INTERNAL_INTR_RETRY);
break;
case THREAD_SLEEPING:
// thread is sleeping
wait_queue_state_.UnsleepIfInterruptible(this, ZX_ERR_INTERNAL_INTR_RETRY);
break;
}
kcounter_add(thread_suspend_count, 1);
return ZX_OK;
}
zx_status_t Thread::RestrictedKick() {
LTRACE_ENTRY;
canary_.Assert();
DEBUG_ASSERT(!IsIdle());
bool kicking_myself = (Thread::Current::Get() == this);
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
if (state() == THREAD_DEATH) {
return ZX_ERR_BAD_STATE;
}
signals_.fetch_or(THREAD_SIGNAL_RESTRICTED_KICK, ktl::memory_order_relaxed);
if (state() == THREAD_RUNNING && !kicking_myself) {
// thread is running (on another cpu)
// Send an interrupt to make sure that the thread processes the new signals.
// If the thread executes a regular syscall or is rescheduled the signals will
// also be processed then, but there's no upper bound on how long that could take.
mp_interrupt(MP_IPI_TARGET_MASK, cpu_num_to_mask(scheduler_state_.curr_cpu_));
}
kcounter_add(thread_restricted_kick_count, 1);
return ZX_OK;
}
// Signal an exception on the current thread, to be handled when the
// current syscall exits. Unlike other signals, this is synchronous, in
// the sense that a thread signals itself. This exists primarily so that
// we can unwind the stack in order to get the state of userland's
// callee-saved registers at the point where userland invoked the
// syscall.
void Thread::Current::SignalPolicyException(uint32_t policy_exception_code,
uint32_t policy_exception_data) {
Thread* t = Thread::Current::Get();
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
t->signals_.fetch_or(THREAD_SIGNAL_POLICY_EXCEPTION, ktl::memory_order_relaxed);
t->extra_policy_exception_code_ = policy_exception_code;
t->extra_policy_exception_data_ = policy_exception_data;
}
void Thread::EraseFromListsLocked() {
thread_list->erase(*this);
if (migrate_list_node_.InContainer()) {
migrate_list_.erase(*this);
}
}
zx_status_t Thread::Join(int* out_retcode, zx_time_t deadline) {
canary_.Assert();
{
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
if (flags_ & THREAD_FLAG_DETACHED) {
// the thread is detached, go ahead and exit
return ZX_ERR_BAD_STATE;
}
// wait for the thread to die
if (state() != THREAD_DEATH) {
zx_status_t status = task_state_.Join(deadline);
if (status != ZX_OK) {
return status;
}
}
canary_.Assert();
DEBUG_ASSERT(state() == THREAD_DEATH);
wait_queue_state_.AssertNotBlocked();
// save the return code
if (out_retcode) {
*out_retcode = task_state_.retcode();
}
// remove it from global lists
EraseFromListsLocked();
// Our canary_ will be cleared out in free_thread_resources, which
// explicitly invokes ~Thread.
}
free_thread_resources(this);
kcounter_add(thread_join_count, 1);
return ZX_OK;
}
zx_status_t Thread::Detach() {
canary_.Assert();
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
// if another thread is blocked inside Join() on this thread,
// wake them up with a specific return code
task_state_.WakeJoiners(ZX_ERR_BAD_STATE);
// if it's already dead, then just do what join would have and exit
if (state() == THREAD_DEATH) {
flags_ &= ~THREAD_FLAG_DETACHED; // makes sure Join continues
guard.Release();
return Join(nullptr, 0);
} else {
flags_ |= THREAD_FLAG_DETACHED;
return ZX_OK;
}
}
// called back in the DPC worker thread to free the stack and/or the thread structure
// itself for a thread that is exiting on its own.
void Thread::FreeDpc(Dpc* dpc) {
Thread* t = dpc->arg<Thread>();
t->canary_.Assert();
DEBUG_ASSERT(t->state() == THREAD_DEATH);
// grab and release the thread lock, which effectively serializes us with
// the thread that is queuing itself for destruction.
{
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
ktl::atomic_signal_fence(ktl::memory_order_seq_cst);
}
free_thread_resources(t);
}
__NO_RETURN void Thread::Current::ExitLocked(int retcode) TA_REQ(thread_lock) {
Thread* current_thread = Thread::Current::Get();
// create a dpc on the stack to queue up a free.
// must be put at top scope in this function to force the compiler to keep it from
// reusing the stack before the function exits
Dpc free_dpc;
// enter the dead state
current_thread->set_death();
current_thread->task_state_.set_retcode(retcode);
current_thread->CallMigrateFnLocked(Thread::MigrateStage::Exiting);
// Make sure that we have released any wait queues we may have owned when we
// exited. TODO(johngro): Should we log a warning or take any other
// actions here? Normally, if a thread exits while owning a wait queue, it
// means that it exited while holding some sort of mutex or other
// synchronization object which will now never be released. This is usually
// Very Bad. If any of the OwnedWaitQueues are being used for user-mode
// futexes, who can say what the right thing to do is. In the case of a
// kernel mode mutex, it might be time to panic.
OwnedWaitQueue::DisownAllQueues(current_thread);
// Disable preemption to keep from switching to the DPC thread until the final
// reschedule.
current_thread->preemption_state().PreemptDisable();
// if we're detached, then do our teardown here
if (current_thread->flags_ & THREAD_FLAG_DETACHED) {
kcounter_add(thread_detached_exit_count, 1);
// remove it from global lists
current_thread->EraseFromListsLocked();
// queue a dpc to free the stack and, optionally, the thread structure
if (current_thread->stack_.base() || (current_thread->flags_ & THREAD_FLAG_FREE_STRUCT)) {
free_dpc = Dpc(&Thread::FreeDpc, current_thread);
zx_status_t status = free_dpc.QueueThreadLocked();
DEBUG_ASSERT(status == ZX_OK);
}
} else {
// signal if anyone is waiting
current_thread->task_state_.WakeJoiners(ZX_OK);
}
// Final reschedule.
Scheduler::RescheduleInternal();
panic("somehow fell through thread_exit()\n");
}
/**
* @brief Remove this thread from the scheduler, discarding
* its execution state.
*
* This is almost certainly not the function you want. In the general case,
* this is incredibly unsafe.
*
* This will free any resources allocated by thread_create.
*/
void Thread::Forget() {
{
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
[[maybe_unused]] Thread* current_thread = Thread::Current::Get();
DEBUG_ASSERT(current_thread != this);
EraseFromListsLocked();
}
DEBUG_ASSERT(!wait_queue_state_.InWaitQueue());
free_thread_resources(this);
}
/**
* @brief Terminate the current thread
*
* Current thread exits with the specified return code.
*
* This function does not return.
*/
void Thread::Current::Exit(int retcode) {
Thread* current_thread = Thread::Current::Get();
current_thread->canary_.Assert();
DEBUG_ASSERT(current_thread->state() == THREAD_RUNNING);
DEBUG_ASSERT(!current_thread->IsIdle());
if (current_thread->user_thread_) {
DEBUG_ASSERT(!arch_ints_disabled() || !thread_lock.IsHeld());
current_thread->user_thread_->ExitingCurrent();
}
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
Thread::Current::ExitLocked(retcode);
}
void Thread::Current::Kill() {
Thread* current_thread = Thread::Current::Get();
current_thread->canary_.Assert();
DEBUG_ASSERT(current_thread->state() == THREAD_RUNNING);
DEBUG_ASSERT(!current_thread->IsIdle());
current_thread->Kill();
}
// kill a thread
void Thread::Kill() {
canary_.Assert();
// Disable preemption to defer rescheduling until the end of this scope.
AnnotatedAutoPreemptDisabler preempt_disable;
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
// deliver a signal to the thread.
signals_.fetch_or(THREAD_SIGNAL_KILL, ktl::memory_order_relaxed);
// we are killing ourself
if (this == Thread::Current::Get()) {
return;
}
// general logic is to wake up the thread so it notices it had a signal delivered to it
switch (state()) {
case THREAD_INITIAL:
// thread hasn't been started yet.
// not really safe to wake it up, since it's only in this state because it's under
// construction by the creator thread.
break;
case THREAD_READY:
// thread is ready to run and not blocked or suspended.
// will wake up and deal with the signal soon.
// TODO: short circuit if it was blocked from user space
break;
case THREAD_RUNNING:
// thread is running (on another cpu).
// The following call is not essential. It just makes the
// thread termination happen sooner rather than at the next
// timer interrupt or syscall.
mp_interrupt(MP_IPI_TARGET_MASK, cpu_num_to_mask(scheduler_state_.curr_cpu_));
break;
case THREAD_SUSPENDED:
// thread is suspended, resume it so it can get the kill signal
Scheduler::Unblock(this);
break;
case THREAD_BLOCKED:
case THREAD_BLOCKED_READ_LOCK:
// thread is blocked on something and marked interruptible
wait_queue_state_.UnblockIfInterruptible(this, ZX_ERR_INTERNAL_INTR_KILLED);
break;
case THREAD_SLEEPING:
// thread is sleeping
wait_queue_state_.UnsleepIfInterruptible(this, ZX_ERR_INTERNAL_INTR_KILLED);
break;
case THREAD_DEATH:
// thread is already dead
return;
}
}
cpu_mask_t Thread::GetCpuAffinity() const {
canary_.Assert();
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
return scheduler_state_.hard_affinity();
}
cpu_mask_t Thread::SetCpuAffinity(cpu_mask_t affinity) {
canary_.Assert();
DEBUG_ASSERT_MSG(
(affinity & mp_get_active_mask()) != 0,
"Attempted to set affinity mask to %#x, which has no overlap of active CPUs %#x.", affinity,
mp_get_active_mask());
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
const cpu_mask_t previous_affinity = scheduler_state_.hard_affinity();
scheduler_state_.hard_affinity_ = affinity;
// Migrate to a different CPU if the current is no longer in the affinity mask.
if ((affinity & cpu_num_to_mask(arch_curr_cpu_num())) == 0) {
Scheduler::Migrate(this);
}
return previous_affinity;
}
cpu_mask_t Thread::SetSoftCpuAffinity(cpu_mask_t affinity) {
canary_.Assert();
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
const cpu_mask_t previous_affinity = scheduler_state_.soft_affinity();
scheduler_state_.soft_affinity_ = affinity;
// Migrate to a different CPU if the current is no longer in the affinity mask.
if ((affinity & cpu_num_to_mask(arch_curr_cpu_num())) == 0) {
Scheduler::Migrate(this);
}
return previous_affinity;
}
cpu_mask_t Thread::GetSoftCpuAffinity() const {
canary_.Assert();
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
return scheduler_state_.soft_affinity_;
}
void Thread::Current::MigrateToCpu(const cpu_num_t target_cpu) {
Thread::Current::Get()->SetCpuAffinity(cpu_num_to_mask(target_cpu));
}
void Thread::SetMigrateFn(MigrateFn migrate_fn) {
canary_.Assert();
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
SetMigrateFnLocked(ktl::move(migrate_fn));
}
void Thread::SetMigrateFnLocked(MigrateFn migrate_fn) {
DEBUG_ASSERT(!migrate_fn || !migrate_pending_);
canary_.Assert();
// If |migrate_fn_| was previously set, remove |this| from |migrate_list_|.
if (migrate_fn_) {
migrate_list_.erase(*this);
}
migrate_fn_ = ktl::move(migrate_fn);
// Clear stale state when (un) setting the migrate fn.
// TODO(https://fxbug.dev/42164826): Cleanup the migrate fn feature and associated state
// and clearly define and check invariants.
scheduler_state().next_cpu_ = INVALID_CPU;
migrate_pending_ = false;
// If |migrate_fn_| is valid, add |this| to |migrate_list_|.
if (migrate_fn_) {
migrate_list_.push_front(this);
}
}
void Thread::CallMigrateFnLocked(MigrateStage stage) {
if (unlikely(migrate_fn_)) {
switch (stage) {
case MigrateStage::Before:
// We are leaving our last CPU and calling our migration function as we
// go. Assert that we are running on the proper CPU, and clear our last
// cpu bookkeeping to indicate that the migration has started.
DEBUG_ASSERT_MSG(scheduler_state().last_cpu_ == arch_curr_cpu_num(),
"Attempting to run Before stage of migration on a CPU "
"which is not the last CPU the thread ran on (last cpu = "
"%u, curr cpu = %u)\n",
scheduler_state().last_cpu_, arch_curr_cpu_num());
scheduler_state().last_cpu_ = INVALID_CPU;
if (!migrate_pending_) {
migrate_pending_ = true;
migrate_fn_(this, stage);
}
break;
case MigrateStage::After:
if (migrate_pending_) {
migrate_pending_ = false;
migrate_fn_(this, stage);
}
break;
case MigrateStage::Exiting:
migrate_fn_(this, stage);
break;
}
}
}
void Thread::CallMigrateFnForCpuLocked(cpu_num_t cpu) {
for (auto& thread : migrate_list_) {
if (thread.state() != THREAD_READY && thread.scheduler_state().last_cpu_ == cpu) {
thread.CallMigrateFnLocked(Thread::MigrateStage::Before);
}
}
}
void Thread::SetContextSwitchFn(ContextSwitchFn context_switch_fn) {
canary_.Assert();
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
SetContextSwitchFnLocked(ktl::move(context_switch_fn));
}
void Thread::SetContextSwitchFnLocked(ContextSwitchFn context_switch_fn) {
canary_.Assert();
context_switch_fn_ = ktl::move(context_switch_fn);
}
bool Thread::CheckKillSignal() {
thread_lock.AssertHeld();
if (signals() & THREAD_SIGNAL_KILL) {
// Ensure we don't recurse into thread_exit.
DEBUG_ASSERT(state() != THREAD_DEATH);
return true;
} else {
return false;
}
}
zx_status_t Thread::CheckKillOrSuspendSignal() const {
const auto current_signals = signals();
if (unlikely(current_signals & THREAD_SIGNAL_KILL)) {
return ZX_ERR_INTERNAL_INTR_KILLED;
}
if (unlikely(current_signals & THREAD_SIGNAL_SUSPEND)) {
return ZX_ERR_INTERNAL_INTR_RETRY;
}
return ZX_OK;
}
// finish suspending the current thread
void Thread::Current::DoSuspend() {
Thread* current_thread = Thread::Current::Get();
// Note: After calling this callback, we must not return without
// calling the callback with THREAD_USER_STATE_RESUME. That is
// because those callbacks act as barriers which control when it is
// safe for the zx_thread_read_state()/zx_thread_write_state()
// syscalls to access the userland register state kept by Thread.
if (current_thread->user_thread_) {
DEBUG_ASSERT(!arch_ints_disabled() || !thread_lock.IsHeld());
current_thread->user_thread_->Suspending();
}
{
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
// make sure we haven't been killed while the lock was dropped for the user callback
if (current_thread->CheckKillSignal()) {
guard.Release();
Thread::Current::Exit(0);
}
// Make sure the suspend signal wasn't cleared while we were running the
// callback.
if (current_thread->signals() & THREAD_SIGNAL_SUSPEND) {
current_thread->set_suspended();
current_thread->signals_.fetch_and(~THREAD_SIGNAL_SUSPEND, ktl::memory_order_relaxed);
// directly invoke the context switch, since we've already manipulated this thread's state
Scheduler::RescheduleInternal();
// If the thread was killed, we should not allow it to resume. We
// shouldn't call user_callback() with THREAD_USER_STATE_RESUME in
// this case, because there might not have been any request to
// resume the thread.
if (current_thread->CheckKillSignal()) {
guard.Release();
Thread::Current::Exit(0);
}
}
}
if (current_thread->user_thread_) {
DEBUG_ASSERT(!arch_ints_disabled() || !thread_lock.IsHeld());
current_thread->user_thread_->Resuming();
}
}
void Thread::SignalSampleStack(Timer* timer, zx_time_t, void* per_cpu_state) {
// Regardless of if the thread is marked to be sampled or not we'll set the sample_stack thread
// signal. This reduces the time we spend in the interrupt context and means we don't need to grab
// a lock here. When we handle the thread signal in ProcessPendingSignals we'll check if the
// thread actually needs to be sampled.
Thread* current_thread = Thread::Current::Get();
current_thread->canary_.Assert();
current_thread->signals_.fetch_or(THREAD_SIGNAL_SAMPLE_STACK, ktl::memory_order_relaxed);
// We set the timer here as opposed to when we handle the THREAD_SIGNAL_SAMPLE_STACK as a thread
// could be suspended or killed before the sample signal is handled.
reinterpret_cast<sampler::internal::PerCpuState*>(per_cpu_state)->SetTimer();
}
void Thread::Current::DoSampleStack(GeneralRegsSource source, void* gregs) {
DEBUG_ASSERT(!arch_ints_disabled());
Thread* current_thread = Thread::Current::Get();
// Make sure the sample signal wasn't cleared while we were running the
// callback.
if (current_thread->signals() & THREAD_SIGNAL_SAMPLE_STACK) {
current_thread->signals_.fetch_and(~THREAD_SIGNAL_SAMPLE_STACK, ktl::memory_order_relaxed);
if (current_thread->user_thread() == nullptr) {
// There's no user thread to sample, just move on.
return;
}
const uint64_t expected_sampler = current_thread->user_thread()->SamplerId();
// If a thread was marked to be sampled but was first suspended, it may now be long after the
// sampling session has ended. sampler::SampleThread grabs the global state, checks if it's
// valid and if the session is the one we were expecting to sample to before attempting a
// sample.
auto sampler_result = sampler::ThreadSamplerDispatcher::SampleThread(
current_thread->pid(), current_thread->tid(), source, gregs, expected_sampler);
// ZX_ERR_NOT_SUPPORTED indicates that we didn't take a sample, but that was intentional and we
// should move on.
if (sampler_result.is_error() && sampler_result.error_value() != ZX_ERR_NOT_SUPPORTED) {
// Any other error means sampling failed for this thread and likely won't succeed in the
// future. Likely either the global sampler is now disabled or the per cpu buffer is full.
// Disable future attempts to sample.
kcounter_add(thread_sampling_failed, 1);
current_thread->user_thread()->DisableStackSampling();
}
}
}
bool Thread::SaveUserStateLocked() {
thread_lock.AssertHeld();
DEBUG_ASSERT(this == Thread::Current::Get());
DEBUG_ASSERT(user_thread_ != nullptr);
if (user_state_saved_) {
return false;
}
user_state_saved_ = true;
arch_save_user_state(this);
return true;
}
void Thread::RestoreUserStateLocked() {
thread_lock.AssertHeld();
DEBUG_ASSERT(this == Thread::Current::Get());
DEBUG_ASSERT(user_thread_ != nullptr);
DEBUG_ASSERT(user_state_saved_);
user_state_saved_ = false;
arch_restore_user_state(this);
}
ScopedThreadExceptionContext::ScopedThreadExceptionContext(const arch_exception_context_t* context)
: thread_(Thread::Current::Get()), context_(context) {
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
// It's possible that the context and state have been installed/saved earlier in the call chain.
// If so, then it's some other object's responsibilty to remove/restore.
need_to_remove_ = arch_install_exception_context(thread_, context_);
need_to_restore_ = thread_->SaveUserStateLocked();
}
ScopedThreadExceptionContext::~ScopedThreadExceptionContext() {
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
// Did we save the state? If so, then it's our job to restore it.
if (need_to_restore_) {
thread_->RestoreUserStateLocked();
}
// Did we install the exception context? If so, then it's out job to remove it.
if (need_to_remove_) {
arch_remove_exception_context(thread_);
}
}
void Thread::Current::ProcessPendingSignals(GeneralRegsSource source, void* gregs) {
// Prior to calling this method, interrupts must be disabled. This method may enable interrupts,
// but if it does, it will disable them prior to returning.
//
// TODO(maniscalco): Refer the reader to the to-be-written theory of operations documentation for
// kernel thread signals.
DEBUG_ASSERT(arch_ints_disabled());
Thread* const current_thread = Thread::Current::Get();
// IF we're currently in restricted mode, we may decide to exit to normal mode instead as part
// of signal processing. Store this information in a local so that we can process all signals
// before actually exiting.
bool exit_to_normal_mode = false;
// It's possible for certain signals to be asserted, processed, and then re-asserted during this
// method so loop until there are no pending signals.
unsigned int signals;
while ((signals = current_thread->signals()) != 0) {
// THREAD_SIGNAL_KILL
//
// We check this signal first because if asserted, the thread will terminate so there's no point
// in checking other signals before kill.
if (signals & THREAD_SIGNAL_KILL) {
// We're going to call Exit, which generates a user-visible exception on user threads. If
// this thread has user mode component, call arch_set_suspended_general_regs() to make the
// general registers available to a debugger during the exception.
if (current_thread->user_thread_) {
// TODO(https://fxbug.dev/42076855): Do we need to hold the thread lock here?
Guard<MonitoredSpinLock, NoIrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
arch_set_suspended_general_regs(current_thread, source, gregs);
}
// Thread::Current::Exit may only be called with interrupts enabled. Note, this is thread is
// committed to terminating and will never return so we don't need to worry about disabling
// interrupts post-Exit.
arch_enable_ints();
Thread::Current::Exit(0);
__UNREACHABLE;
}
const bool has_user_thread = current_thread->user_thread_ != nullptr;
// THREAD_SIGNAL_POLICY_EXCEPTION
//
if (signals & THREAD_SIGNAL_POLICY_EXCEPTION) {
DEBUG_ASSERT(has_user_thread);
// TODO(https://fxbug.dev/42077109): Consider wrapping this up in a method
// (e.g. Thread::Current::ClearSignals) and think hard about whether relaxed is sufficient.
current_thread->signals_.fetch_and(~THREAD_SIGNAL_POLICY_EXCEPTION,
ktl::memory_order_relaxed);
uint32_t policy_exception_code;
uint32_t policy_exception_data;
{
// TODO(https://fxbug.dev/42076855): Do we need to hold the thread lock here?
Guard<MonitoredSpinLock, NoIrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
// Policy exceptions are user-visible so must make the general register state available to
// a debugger.
arch_set_suspended_general_regs(current_thread, source, gregs);
policy_exception_code = current_thread->extra_policy_exception_code_;
policy_exception_data = current_thread->extra_policy_exception_data_;
}
// Call arch_dispatch_user_policy_exception with interrupts enabled, but be sure to disable
// them afterwards.
arch_enable_ints();
zx_status_t status =
arch_dispatch_user_policy_exception(policy_exception_code, policy_exception_data);
ZX_ASSERT_MSG(status == ZX_OK, "arch_dispatch_user_policy_exception() failed: status=%d\n",
status);
arch_disable_ints();
{
// TODO(https://fxbug.dev/42076855): Do we need to hold the thread lock here?
Guard<MonitoredSpinLock, NoIrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
arch_reset_suspended_general_regs(current_thread);
}
}
// THREAD_SIGNAL_SUSPEND
//
if (signals & THREAD_SIGNAL_SUSPEND) {
if (has_user_thread) {
bool saved;
{
// TODO(https://fxbug.dev/42076855): Do we need to hold the thread lock here?
Guard<MonitoredSpinLock, NoIrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
// This thread has been asked to suspend. When a user thread is suspended, its full
// register state (not just the general purpose registers) is accessible to a debugger.
arch_set_suspended_general_regs(current_thread, source, gregs);
saved = current_thread->SaveUserStateLocked();
}
// The enclosing function, is called at the boundary of kernel and user mode (e.g. just
// before returning from a syscall, timer interrupt, or architectural exception/fault).
// We're about the perform a save. If the save fails (returns false), then we likely have a
// mismatched save/restore pair, which is a bug.
DEBUG_ASSERT(saved);
arch_enable_ints();
Thread::Current::DoSuspend();
arch_disable_ints();
{
// TODO(https://fxbug.dev/42076855): Do we need to hold the thread lock here?
Guard<MonitoredSpinLock, NoIrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
if (saved) {
current_thread->RestoreUserStateLocked();
}
arch_reset_suspended_general_regs(current_thread);
}
} else {
// No user mode component so we don't need to save any register state.
arch_enable_ints();
Thread::Current::DoSuspend();
arch_disable_ints();
}
}
// THREAD_SIGNAL_RESTRICTED_KICK
//
if (signals & THREAD_SIGNAL_RESTRICTED_KICK) {
current_thread->signals_.fetch_and(~THREAD_SIGNAL_RESTRICTED_KICK, ktl::memory_order_relaxed);
if (arch_get_restricted_flag()) {
exit_to_normal_mode = true;
} else {
// If we aren't currently in restricted mode,Remember that we have a restricted kick pending
// for the next time we try to enter restricted mode.
current_thread->set_restricted_kick_pending(true);
}
}
if (signals & THREAD_SIGNAL_SAMPLE_STACK) {
// Sampling the user stack may page fault as we try to do usercopies.
arch_enable_ints();
Thread::Current::DoSampleStack(source, gregs);
arch_disable_ints();
}
}
// Interrupts must remain disabled for the remainder of this function. If for any reason we need
// to enable interrupts in handling we have to re-enter the loop above in order to process signals
// that may have been raised.
if (exit_to_normal_mode) {
switch (source) {
case GeneralRegsSource::Iframe: {
const iframe_t* iframe = reinterpret_cast<const iframe_t*>(gregs);
RestrictedLeaveIframe(iframe, ZX_RESTRICTED_REASON_KICK);
__UNREACHABLE;
}
#if defined(__x86_64__)
case GeneralRegsSource::Syscall: {
const syscall_regs_t* syscall_regs = reinterpret_cast<const syscall_regs_t*>(gregs);
RestrictedLeaveSyscall(syscall_regs, ZX_RESTRICTED_REASON_KICK);
__UNREACHABLE;
}
#endif // defined(__x86_64__)
default:
DEBUG_ASSERT_MSG(false, "invalid source %u\n", static_cast<uint32_t>(source));
}
}
}
bool Thread::Current::CheckForRestrictedKick() {
LTRACE_ENTRY;
DEBUG_ASSERT(arch_ints_disabled());
Thread* current_thread = Thread::Current::Get();
if (current_thread->restricted_kick_pending()) {
current_thread->set_restricted_kick_pending(false);
return true;
}
return false;
}
/**
* @brief Yield the cpu to another thread
*
* This function places the current thread at the end of the run queue
* and yields the cpu to another waiting thread (if any.)
*
* This function will return at some later time. Possibly immediately if
* no other threads are waiting to execute.
*/
void Thread::Current::Yield() {
[[maybe_unused]] Thread* current_thread = Thread::Current::Get();
current_thread->canary_.Assert();
DEBUG_ASSERT(current_thread->state() == THREAD_RUNNING);
DEBUG_ASSERT(!arch_blocking_disallowed());
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
CPU_STATS_INC(yields);
Scheduler::Yield();
}
/**
* @brief Preempt the current thread from an interrupt
*
* This function places the current thread at the head of the run
* queue and then yields the cpu to another thread.
*/
void Thread::Current::Preempt() {
Thread* current_thread = Thread::Current::Get();
current_thread->canary_.Assert();
DEBUG_ASSERT(current_thread->state() == THREAD_RUNNING);
DEBUG_ASSERT(!arch_blocking_disallowed());
if (!current_thread->IsIdle()) {
// only track when a meaningful preempt happens
CPU_STATS_INC(irq_preempts);
}
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
Scheduler::Preempt();
}
/**
* @brief Reevaluate the run queue on the current cpu.
*
* This function places the current thread at the head of the run
* queue and then yields the cpu to another thread. Similar to
* thread_preempt, but intended to be used at non interrupt context.
*/
void Thread::Current::Reschedule() {
Thread* current_thread = Thread::Current::Get();
current_thread->canary_.Assert();
DEBUG_ASSERT(current_thread->state() == THREAD_RUNNING);
DEBUG_ASSERT(!arch_blocking_disallowed());
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
Scheduler::Reschedule();
}
void PreemptionState::SetPreemptionTimerForExtension(zx_time_t deadline) {
// Interrupts must be disabled when calling PreemptReset.
InterruptDisableGuard interrupt_disable;
percpu::Get(arch_curr_cpu_num()).timer_queue.PreemptReset(deadline);
kcounter_add(thread_timeslice_extended, 1);
}
void PreemptionState::FlushPendingContinued(Flush flush) {
// If we're flushing the local CPU, make sure OK to block since flushing
// local may trigger a reschedule.
DEBUG_ASSERT(((flush & FlushLocal) == 0) || !arch_blocking_disallowed());
const auto do_flush = [this, flush]() TA_REQ(thread_lock) {
// Recheck, pending preemptions could have been flushed by a context switch
// before interrupts were disabled.
const cpu_mask_t pending_mask = preempts_pending_;
// If there is a pending local preemption the scheduler will take care of
// flushing all pending reschedules.
const cpu_mask_t current_cpu_mask = cpu_num_to_mask(arch_curr_cpu_num());
if ((pending_mask & current_cpu_mask) != 0 && (flush & FlushLocal) != 0) {
// Clear the local preempt pending flag before calling preempt. Failure
// to do this can cause recursion during Scheduler::Preempt if any code
// (such as debug tracing code) attempts to disable and re-enable
// preemption during the scheduling operation.
preempts_pending_ &= ~current_cpu_mask;
Scheduler::Preempt();
} else if ((flush & FlushRemote) != 0) {
// The current cpu is ignored by mp_reschedule if present in the mask.
mp_reschedule(pending_mask, 0);
preempts_pending_ &= current_cpu_mask;
}
};
// This method may be called with interrupts enabled or disabled and with or
// without holding the thread lock.
InterruptDisableGuard interrupt_disable;
if (thread_lock.IsHeld()) {
thread_lock.AssertHeld();
do_flush();
} else {
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
do_flush();
}
}
// timer callback to wake up a sleeping thread
void Thread::SleepHandler(Timer* timer, zx_time_t now, void* arg) {
Thread* t = static_cast<Thread*>(arg);
t->canary_.Assert();
t->HandleSleep(timer, now);
}
void Thread::HandleSleep(Timer* timer, zx_time_t now) {
// spin trylocking on the thread lock since the routine that set up the callback,
// thread_sleep_etc, may be trying to simultaneously cancel this timer while holding the
// thread_lock.
if (timer->TrylockOrCancel(&thread_lock)) {
return;
}
if (state() != THREAD_SLEEPING) {
thread_lock.Release();
return;
}
// Unblock the thread, regardless of whether the sleep was interruptible.
wait_queue_state_.Unsleep(this, ZX_OK);
thread_lock.Release();
}
#define MIN_SLEEP_SLACK ZX_USEC(1)
#define MAX_SLEEP_SLACK ZX_SEC(1)
#define DIV_SLEEP_SLACK 10u
// computes the amount of slack the thread_sleep timer will use
static zx_duration_t sleep_slack(zx_time_t deadline, zx_time_t now) {
if (deadline < now) {
return MIN_SLEEP_SLACK;
}
zx_duration_t slack = zx_time_sub_time(deadline, now) / DIV_SLEEP_SLACK;
return ktl::max(MIN_SLEEP_SLACK, ktl::min(slack, MAX_SLEEP_SLACK));
}
/**
* @brief Put thread to sleep; deadline specified in ns
*
* This function puts the current thread to sleep until the specified
* deadline has occurred.
*
* Note that this function could continue to sleep after the specified deadline
* if other threads are running. When the deadline occurrs, this thread will
* be placed at the head of the run queue.
*
* interruptible argument allows this routine to return early if the thread was signaled
* for something.
*/
zx_status_t Thread::Current::SleepEtc(const Deadline& deadline, Interruptible interruptible,
zx_time_t now) {
Thread* current_thread = Thread::Current::Get();
current_thread->canary_.Assert();
DEBUG_ASSERT(current_thread->state() == THREAD_RUNNING);
DEBUG_ASSERT(!current_thread->IsIdle());
DEBUG_ASSERT(!arch_blocking_disallowed());
// Skip all of the work if the deadline has already passed.
if (deadline.when() <= now) {
return ZX_OK;
}
Timer timer;
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
// if we've been killed and going in interruptible, abort here
if (interruptible == Interruptible::Yes && unlikely((current_thread->signals()))) {
if (current_thread->signals() & THREAD_SIGNAL_KILL) {
return ZX_ERR_INTERNAL_INTR_KILLED;
} else {
return ZX_ERR_INTERNAL_INTR_RETRY;
}
}
// set a one shot timer to wake us up and reschedule
timer.Set(deadline, &Thread::SleepHandler, current_thread);
current_thread->set_sleeping();
current_thread->wait_queue_state_.Block(interruptible, ZX_OK);
// always cancel the timer, since we may be racing with the timer tick on other cpus
timer.Cancel();
return current_thread->wait_queue_state_.BlockedStatus();
}
zx_status_t Thread::Current::Sleep(zx_time_t deadline) {
const zx_time_t now = current_time();
return SleepEtc(Deadline::no_slack(deadline), Interruptible::No, now);
}
zx_status_t Thread::Current::SleepRelative(zx_duration_t delay) {
const zx_time_t now = current_time();
const Deadline deadline = Deadline::no_slack(zx_time_add_duration(now, delay));
return SleepEtc(deadline, Interruptible::No, now);
}
zx_status_t Thread::Current::SleepInterruptible(zx_time_t deadline) {
const zx_time_t now = current_time();
const TimerSlack slack(sleep_slack(deadline, now), TIMER_SLACK_LATE);
const Deadline slackDeadline(deadline, slack);
return SleepEtc(slackDeadline, Interruptible::Yes, now);
}
/**
* @brief Return the number of nanoseconds a thread has been running for.
*
* This takes the thread_lock to ensure there are no races while calculating the
* runtime of the thread.
*/
zx_duration_t Thread::Runtime() const {
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
zx_duration_t runtime = scheduler_state_.runtime_ns();
if (state() == THREAD_RUNNING) {
zx_duration_t recent =
zx_time_sub_time(current_time(), scheduler_state_.last_started_running());
runtime = zx_duration_add_duration(runtime, recent);
}
return runtime;
}
/**
* @brief Get the last CPU the given thread was run on, or INVALID_CPU if the
* thread has never run.
*/
cpu_num_t Thread::LastCpu() const {
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
return scheduler_state_.last_cpu_;
}
/**
* @brief Get the last CPU the given thread was run on, or INVALID_CPU if the
* thread has never run.
*/
cpu_num_t Thread::LastCpuLocked() const { return scheduler_state_.last_cpu_; }
/**
* @brief Construct a thread t around the current running state
*
* This should be called once per CPU initialization. It will create
* a thread that is pinned to the current CPU and running at the
* highest priority.
*/
void thread_construct_first(Thread* t, const char* name) {
DEBUG_ASSERT(arch_ints_disabled());
construct_thread(t, name);
t->set_detached(true);
// Setup the scheduler state.
Scheduler::InitializeFirstThread(t);
// Start out with preemption disabled to avoid attempts to reschedule until
// threading is fulling enabled. This simplifies code paths shared between
// initialization and runtime (e.g. logging). Preemption is enabled when the
// idle thread for the current CPU is ready.
t->preemption_state().PreemptDisable();
arch_thread_construct_first(t);
// Take care not to touch any locks when invoked by early init code that runs
// before global ctors are called. The thread_list is safe to mutate before
// global ctors are run.
if (lk_global_constructors_called()) {
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
thread_list->push_front(t);
} else {
[t]() TA_NO_THREAD_SAFETY_ANALYSIS { thread_list->push_front(t); }();
}
}
/**
* @brief Initialize threading system
*
* This function is called once, from kmain()
*/
void thread_init_early() {
DEBUG_ASSERT(arch_curr_cpu_num() == 0);
// Initialize the thread list. This needs to be done manually now, since initial thread code
// manipulates the list before global constructors are run.
thread_list.Initialize();
// Init the boot percpu data.
percpu::InitializeBoot();
// create a thread to cover the current running state
Thread* t = &percpu::Get(0).idle_power_thread.thread();
thread_construct_first(t, "bootstrap");
}
/**
* @brief Change name of current thread
*/
void Thread::Current::SetName(const char* name) {
Thread* current_thread = Thread::Current::Get();
strlcpy(current_thread->name_, name, sizeof(current_thread->name_));
}
/**
* @brief Change the base profile of current thread
*
* Changes the base profile of the thread to the base profile supplied by the
* users, dealing with any side effects in the process.
*
* @param profile The base profile to apply to the thread.
*/
void Thread::SetBaseProfile(const SchedulerState::BaseProfile& profile) {
canary_.Assert();
// It is not sufficient to simply hold the thread lock while changing the
// profile of a thread. Doing so runs the risk that a change to a PI graph
// results in another thread becoming "more runnable" than we are, and then
// immediately context switching to that thread.
//
// Basically, when we interact with the scheduler, we cannot always think of
// the thread lock as a lock. While we cannot take any interrupts, and no
// other threads can access our object's state, we _can_ accidentally give up
// our timeslice to another thread, and the thread lock as well in the
// process. That thread can then (rarely) end up calling back into object
// state we are modifying (like, an OwnedWaitQueue) which could end up being
// Very Bad.
//
// By adding an AutoPreemptDisabler, we can make the thread_lock behave more
// like a real lock (at least for the OWQ state). Interactions with the
// scheduler might result in another thread needing to run, but at least we
// will have have deferred that until we are finished interacting with our
// queue and have dropped the thread lock.
AnnotatedAutoPreemptDisabler apd;
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
this->get_lock().AssertHeld();
OwnedWaitQueue::SetThreadBaseProfileAndPropagate(*this, profile);
}
/**
* @brief Set the pointer to the user-mode thread, this will receive callbacks:
* ThreadDispatcher::Exiting()
* ThreadDispatcher::Suspending() / Resuming()
*
* This also caches the assocatiated koids of the thread and process
* dispatchers associated with the given ThreadDispatcher.
*/
void Thread::SetUsermodeThread(fbl::RefPtr<ThreadDispatcher> user_thread) {
canary_.Assert();
DEBUG_ASSERT(state() == THREAD_INITIAL);
DEBUG_ASSERT(!user_thread_);
user_thread_ = ktl::move(user_thread);
tid_ = user_thread_->get_koid();
pid_ = user_thread_->process()->get_koid();
// All user mode threads are detached since they are responsible for cleaning themselves up.
// We can set this directly because we've checked that we are in the initial state.
flags_ |= THREAD_FLAG_DETACHED;
}
/**
* @brief Become an idle thread
*
* This function marks the current thread as the idle thread -- the one which
* executes when there is nothing else to do. This function does not return.
* This thread is called once at boot on the first cpu.
*/
void Thread::Current::BecomeIdle() {
DEBUG_ASSERT(arch_ints_disabled());
Thread* t = Thread::Current::Get();
cpu_num_t curr_cpu = arch_curr_cpu_num();
// Set our name
char name[16];
snprintf(name, sizeof(name), "idle %u", curr_cpu);
Thread::Current::SetName(name);
// Mark ourself as idle
t->flags_ |= THREAD_FLAG_IDLE;
// Now that we are the idle thread, make sure that we drop out of the
// scheduler's bookkeeping altogether.
{
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
Scheduler::RemoveFirstThread(t);
}
t->set_running();
// Cpu is active.
mp_set_curr_cpu_active(true);
mp_set_cpu_idle(curr_cpu);
// Pend a preemption to ensure a reschedule.
arch_set_blocking_disallowed(true);
t->preemption_state().PreemptSetPending();
arch_set_blocking_disallowed(false);
mp_signal_curr_cpu_ready();
// Enable preemption to start scheduling. Preemption is disabled during early
// threading startup on each CPU to prevent incidental thread wakeups (e.g.
// due to logging) from rescheduling on the local CPU before the idle thread
// is ready.
t->preemption_state().PreemptReenable();
DEBUG_ASSERT(t->preemption_state().PreemptIsEnabled());
// We're now properly in the idle routine. Reenable interrupts and drop
// into the idle routine, never return.
arch_enable_ints();
IdlePowerThread::Run(nullptr);
__UNREACHABLE;
}
/**
* @brief Create a thread around the current execution context, preserving |t|'s stack
*
* Prior to calling, |t->stack| must be properly constructed. See |vm_allocate_kstack|.
*/
void Thread::SecondaryCpuInitEarly() {
DEBUG_ASSERT(arch_ints_disabled());
DEBUG_ASSERT(stack_.base() != 0);
DEBUG_ASSERT(IS_ALIGNED(this, alignof(Thread)));
// At this point, the CPU isn't far enough along to allow threads to block. Set blocking
// disallowed until to catch bugs where code might block before we're ready.
arch_set_blocking_disallowed(true);
percpu::InitializeSecondaryFinish();
char name[16];
snprintf(name, sizeof(name), "cpu_init %u", arch_curr_cpu_num());
thread_construct_first(this, name);
// Emitting the thread metadata usually happens during Thread::Resume(), however, cpu_init threads
// are never resumed. Emit the metadata here so that the thread name is associated with its tid.
KTRACE_KERNEL_OBJECT("kernel:meta", this->tid(), ZX_OBJ_TYPE_THREAD, this->name(),
("process", ktrace::Koid(this->pid())));
}
/**
* @brief The last routine called on the secondary cpu's bootstrap thread.
*/
void thread_secondary_cpu_entry() {
DEBUG_ASSERT(arch_blocking_disallowed());
mp_set_curr_cpu_active(true);
percpu& current_cpu = percpu::GetCurrent();
// Signal the idle/power thread to transition to active but don't wait for it, since it cannot run
// until this thread either blocks or exits below. The idle thread will run immediately upon exit
// and complete the transition, if necessary.
const IdlePowerThread::TransitionResult result =
current_cpu.idle_power_thread.TransitionOfflineToActive(ZX_TIME_INFINITE_PAST);
// The first time a secondary CPU becomes active after boot the CPU power thread is already in the
// active state. If the CPU power thread is not in its initial active state, it is being returned
// to active from offline and needs to be revived to resume its normal function.
if (result.starting_state != IdlePowerThread::State::Active) {
Thread::ReviveIdlePowerThread(arch_curr_cpu_num());
}
// CAREFUL: This must happen after the idle/power thread is revived, since creating the DPC thread
// can contend on VM locks and could cause this CPU to go idle.
current_cpu.dpc_queue.InitForCurrentCpu();
// Remove ourselves from the Scheduler's bookkeeping.
{
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
Scheduler::RemoveFirstThread(Thread::Current::Get());
}
mp_signal_curr_cpu_ready();
// Exit from our bootstrap thread, and enter the scheduler on this cpu.
Thread::Current::Exit(0);
}
/**
* @brief Create an idle thread for a secondary CPU
*/
Thread* Thread::CreateIdleThread(cpu_num_t cpu_num) {
DEBUG_ASSERT(cpu_num != 0 && cpu_num < SMP_MAX_CPUS);
char name[16];
snprintf(name, sizeof(name), "idle %u", cpu_num);
Thread* t = Thread::CreateEtc(&percpu::Get(cpu_num).idle_power_thread.thread(), name,
IdlePowerThread::Run, nullptr,
SchedulerState::BaseProfile{IDLE_PRIORITY}, nullptr);
if (t == nullptr) {
return t;
}
t->flags_ |= THREAD_FLAG_IDLE | THREAD_FLAG_DETACHED;
t->scheduler_state_.hard_affinity_ = cpu_num_to_mask(cpu_num);
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
Scheduler::UnblockIdle(t);
return t;
}
void Thread::ReviveIdlePowerThread(cpu_num_t cpu_num) {
DEBUG_ASSERT(cpu_num != 0 && cpu_num < SMP_MAX_CPUS);
Thread* thread = &percpu::Get(cpu_num).idle_power_thread.thread();
DEBUG_ASSERT(thread->flags() & THREAD_FLAG_IDLE);
DEBUG_ASSERT(thread->scheduler_state().hard_affinity() == cpu_num_to_mask(cpu_num));
DEBUG_ASSERT(thread->task_state().entry() == IdlePowerThread::Run);
arch_thread_initialize(thread, reinterpret_cast<vaddr_t>(Thread::Trampoline));
thread->preemption_state().Reset();
}
/**
* @brief Return the name of the "owner" of the thread.
*
* Returns "kernel" if there is no owner.
*/
void Thread::OwnerName(char (&out_name)[ZX_MAX_NAME_LEN]) const {
if (user_thread_) {
[[maybe_unused]] zx_status_t status = user_thread_->process()->get_name(out_name);
DEBUG_ASSERT(status == ZX_OK);
return;
}
memcpy(out_name, "kernel", 7);
}
static const char* thread_state_to_str(enum thread_state state) {
switch (state) {
case THREAD_INITIAL:
return "init";
case THREAD_SUSPENDED:
return "susp";
case THREAD_READY:
return "rdy";
case THREAD_RUNNING:
return "run";
case THREAD_BLOCKED:
case THREAD_BLOCKED_READ_LOCK:
return "blok";
case THREAD_SLEEPING:
return "slep";
case THREAD_DEATH:
return "deth";
default:
return "unkn";
}
}
/**
* @brief Dump debugging info about the specified thread.
*/
void ThreadDumper::DumpLocked(const Thread* t, bool full_dump) {
if (!t->canary().Valid()) {
dprintf(INFO, "dump_thread WARNING: thread at %p has bad magic\n", t);
}
zx_duration_t runtime = t->scheduler_state().runtime_ns();
if (t->state() == THREAD_RUNNING) {
zx_duration_t recent =
zx_time_sub_time(current_time(), t->scheduler_state().last_started_running());
runtime = zx_duration_add_duration(runtime, recent);
}
char oname[ZX_MAX_NAME_LEN];
t->OwnerName(oname);
char profile_str[64]{0};
if (const SchedulerState::EffectiveProfile ep =
t->scheduler_state().SnapshotEffectiveProfileLocked();
ep.IsFair()) {
snprintf(profile_str, sizeof(profile_str), "Fair (w %ld)", ep.fair.weight.raw_value());
} else {
DEBUG_ASSERT(ep.IsDeadline());
snprintf(profile_str, sizeof(profile_str), "Deadline (c,d = %ld,%ld)",
ep.deadline.capacity_ns.raw_value(), ep.deadline.deadline_ns.raw_value());
}
if (full_dump) {
dprintf(INFO, "dump_thread: t %p (%s:%s)\n", t, oname, t->name());
dprintf(INFO,
"\tstate %s, curr/last cpu %d/%d, hard_affinity %#x, soft_cpu_affinity %#x, "
"%s, remaining time slice %" PRIi64 "\n",
thread_state_to_str(t->state()), (int)t->scheduler_state().curr_cpu(),
(int)t->scheduler_state().last_cpu(), t->scheduler_state().hard_affinity(),
t->scheduler_state().soft_affinity(), profile_str,
t->scheduler_state().time_slice_ns());
dprintf(INFO, "\truntime_ns %" PRIi64 ", runtime_s %" PRIi64 "\n", runtime,
runtime / 1000000000);
t->stack().DumpInfo(INFO);
dprintf(INFO, "\tentry %p, arg %p, flags 0x%x %s%s%s%s\n", t->task_state_.entry_,
t->task_state_.arg_, t->flags_, (t->flags_ & THREAD_FLAG_DETACHED) ? "Dt" : "",
(t->flags_ & THREAD_FLAG_FREE_STRUCT) ? "Ft" : "",
(t->flags_ & THREAD_FLAG_IDLE) ? "Id" : "", (t->flags_ & THREAD_FLAG_VCPU) ? "Vc" : "");
dprintf(INFO, "\twait queue %p, blocked_status %d, interruptible %s, wait queues owned %s\n",
t->wait_queue_state().blocking_wait_queue_, t->wait_queue_state().blocked_status_,
t->wait_queue_state().interruptible_ == Interruptible::Yes ? "yes" : "no",
t->wait_queue_state().owned_wait_queues_.is_empty() ? "no" : "yes");
dprintf(INFO, "\taspace %p\n", t->aspace_);
dprintf(INFO, "\tuser_thread %p, pid %" PRIu64 ", tid %" PRIu64 "\n", t->user_thread_.get(),
t->pid(), t->tid());
arch_dump_thread(t);
} else {
printf("thr %p st %4s owq %d %s pid %" PRIu64 " tid %" PRIu64 " (%s:%s)\n", t,
thread_state_to_str(t->state()), !t->wait_queue_state().owned_wait_queues_.is_empty(),
profile_str, t->pid(), t->tid(), oname, t->name());
}
}
void Thread::Dump(bool full) const {
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
ThreadDumper::DumpLocked(this, full);
}
/**
* @brief Dump debugging info about all threads
*/
void Thread::DumpAllLocked(bool full) {
for (const Thread& t : thread_list.Get()) {
if (!t.canary().Valid()) {
dprintf(INFO, "bad magic on thread struct %p, aborting.\n", &t);
hexdump(&t, sizeof(Thread));
break;
}
ThreadDumper::DumpLocked(&t, full);
}
}
void Thread::DumpAll(bool full) {
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
DumpAllLocked(full);
}
void Thread::DumpTid(zx_koid_t tid, bool full) {
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
DumpTidLocked(tid, full);
}
void Thread::DumpTidLocked(zx_koid_t tid, bool full) {
for (const Thread& t : thread_list.Get()) {
if (t.tid() != tid) {
continue;
}
if (!t.canary().Valid()) {
dprintf(INFO, "bad magic on thread struct %p, aborting.\n", &t);
hexdump(&t, sizeof(Thread));
break;
}
ThreadDumper::DumpLocked(&t, full);
}
}
Thread* thread_id_to_thread_slow(zx_koid_t tid) {
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
for (Thread& t : thread_list.Get()) {
if (t.tid() == tid) {
return &t;
}
}
return nullptr;
}
/** @} */
// Used by ktrace at the start of a trace to ensure that all
// the running threads, processes, and their names are known
void ktrace_report_live_threads() {
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
for (Thread& t : thread_list.Get()) {
t.canary().Assert();
KTRACE_KERNEL_OBJECT_ALWAYS(t.tid(), ZX_OBJ_TYPE_THREAD, t.name(),
("process", ktrace::Koid(t.pid())));
}
}
void Thread::UpdateRuntimeStats(thread_state new_state) {
if (user_thread_) {
user_thread_->UpdateRuntimeStats(new_state);
}
}
namespace {
// TODO(maniscalco): Consider moving this method to the KernelStack class.
// That's probably a better home for it.
zx_status_t ReadStack(Thread* thread, vaddr_t ptr, vaddr_t* out, size_t sz) {
if (!is_kernel_address(ptr) || (ptr < thread->stack().base()) ||
(ptr > (thread->stack().top() - sz))) {
return ZX_ERR_NOT_FOUND;
}
memcpy(out, reinterpret_cast<const void*>(ptr), sz);
return ZX_OK;
}
void GetBacktraceCommon(Thread* thread, vaddr_t fp, Backtrace& out_bt) {
// Be sure that all paths out of this function leave with |out_bt| either
// properly filled in or empty.
out_bt.reset();
// Without frame pointers, dont even try. The compiler should optimize out
// the body of all the callers if it's not present.
if (!WITH_FRAME_POINTERS) {
return;
}
// Perhaps we don't yet have a thread context?
if (thread == nullptr) {
return;
}
if (fp == 0) {
return;
}
vaddr_t pc;
size_t n = 0;
for (; n < Backtrace::kMaxSize; n++) {
vaddr_t actual_fp = fp;
// RISC-V has a nonstandard frame pointer which points to the CFA instead of
// the previous frame pointer. Since the frame pointer and return address are
// always just below the CFA, subtract 16 bytes to get to the actual frame pointer.
#if __riscv
actual_fp -= 16;
#endif
if (ReadStack(thread, actual_fp + 8, &pc, sizeof(vaddr_t))) {
break;
}
out_bt.push_back(pc);
if (ReadStack(thread, actual_fp, &fp, sizeof(vaddr_t))) {
break;
}
}
}
} // namespace
void Thread::Current::GetBacktrace(Backtrace& out_bt) {
auto fp = reinterpret_cast<vaddr_t>(__GET_FRAME(0));
GetBacktraceCommon(Thread::Current::Get(), fp, out_bt);
// (https://fxbug.dev/42179766): Force the function to not tail call GetBacktraceCommon.
// This will make sure the frame pointer we grabbed at the top
// of the function is still valid across the call.
asm("");
}
void Thread::Current::GetBacktrace(vaddr_t fp, Backtrace& out_bt) {
GetBacktraceCommon(Thread::Current::Get(), fp, out_bt);
}
void Thread::GetBacktrace(Backtrace& out_bt) {
Guard<MonitoredSpinLock, IrqSave> guard{ThreadLock::Get(), SOURCE_TAG};
// Get the starting point if it's in a usable state.
vaddr_t fp = 0;
switch (state()) {
case THREAD_BLOCKED:
case THREAD_BLOCKED_READ_LOCK:
case THREAD_SLEEPING:
case THREAD_SUSPENDED:
// Thread is blocked, so ask the arch code to get us a starting point.
fp = arch_thread_get_blocked_fp(this);
break;
default:
// Not in a valid state, can't get a backtrace. Reset it so the caller
// doesn't inadvertently use a previous value.
out_bt.reset();
return;
}
GetBacktraceCommon(this, fp, out_bt);
}