blob: ed3e9fbd869d6f588124fbc673df58219e5526f6 [file] [log] [blame] [edit]
// Copyright 2016 The Fuchsia Authors
// Copyright (c) 2008-2015 Travis Geiselbrecht
//
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file or at
// https://opensource.org/licenses/MIT
/**
* @file
* @brief Kernel threading
*
* This file is the core kernel threading interface.
*
* @defgroup thread Threads
* @{
*/
#include <kernel/thread.h>
#include <arch/exception.h>
#include <assert.h>
#include <debug.h>
#include <err.h>
#include <inttypes.h>
#include <kernel/atomic.h>
#include <kernel/dpc.h>
#include <kernel/mp.h>
#include <kernel/percpu.h>
#include <kernel/sched.h>
#include <kernel/stats.h>
#include <kernel/thread.h>
#include <kernel/timer.h>
#include <lib/counters.h>
#include <lib/heap.h>
#include <lib/ktrace.h>
#include <list.h>
#include <malloc.h>
#include <object/c_user_thread.h>
#include <platform.h>
#include <printf.h>
#include <string.h>
#include <target.h>
#include <vm/vm.h>
#include <zircon/types.h>
// kernel counters. TODO(cpu): remove LK-era counters
// The counters below never decrease.
//
// counts the number of thread_t succesfully created.
KCOUNTER(thread_create_count, "kernel.thread.create");
// counts the number of thread_t joined. Never decreases.
KCOUNTER(thread_join_count, "kernel.thread.join");
// counts the number of calls to suspend() that succeeded.
KCOUNTER(thread_suspend_count, "kernel.thread.suspend");
// counts the number of calls to resume() that succeeded.
KCOUNTER(thread_resume_count, "kernel.thread.resume");
// global thread list
static struct list_node thread_list = LIST_INITIAL_VALUE(thread_list);
// master thread spinlock
spin_lock_t thread_lock __CPU_ALIGN_EXCLUSIVE = SPIN_LOCK_INITIAL_VALUE;
// local routines
static void thread_exit_locked(thread_t* current_thread, int retcode) __NO_RETURN;
static void thread_do_suspend(void);
static void init_thread_struct(thread_t* t, const char* name) {
memset(t, 0, sizeof(thread_t));
t->magic = THREAD_MAGIC;
strlcpy(t->name, name, sizeof(t->name));
wait_queue_init(&t->retcode_wait_queue);
}
static void initial_thread_func(void) TA_REQ(thread_lock) __NO_RETURN;
static void initial_thread_func(void) {
int ret;
// release the thread lock that was implicitly held across the reschedule
spin_unlock(&thread_lock);
arch_enable_ints();
thread_t* ct = get_current_thread();
ret = ct->entry(ct->arg);
thread_exit(ret);
}
// Invoke |t|'s user_callback with |new_state|.
//
// Since user_callback may call into the scheduler it's crucial that the scheduler lock
// (thread_lock) is not held when calling this function. Otherwise, we risk recursive deadlock.
static void invoke_user_callback(thread_t* t, enum thread_user_state_change new_state)
TA_EXCL(thread_lock) {
DEBUG_ASSERT(!arch_ints_disabled() || !spin_lock_held(&thread_lock));
if (t->user_callback) {
t->user_callback(new_state, t->user_thread);
}
}
/**
* @brief Create a new thread
*
* This function creates a new thread. The thread is initially suspended, so you
* need to call thread_resume() to execute it.
*
* @param name Name of thread
* @param entry Entry point of thread
* @param arg Arbitrary argument passed to entry()
* @param priority Execution priority for the thread.
* @param stack_size Stack size for the thread.
* @param alt_trampoline If not NULL, an alternate trampoline for the thread
* to start on.
*
* Thread priority is an integer from 0 (lowest) to 31 (highest). Some standard
* prioritys are defined in <kernel/thread.h>:
*
* HIGHEST_PRIORITY
* DPC_PRIORITY
* HIGH_PRIORITY
* DEFAULT_PRIORITY
* LOW_PRIORITY
* IDLE_PRIORITY
* LOWEST_PRIORITY
*
* Stack size is typically set to DEFAULT_STACK_SIZE
*
* @return Pointer to thread object, or NULL on failure.
*/
thread_t* thread_create_etc(
thread_t* t,
const char* name,
thread_start_routine entry, void* arg,
int priority,
void* stack, void* unsafe_stack, size_t stack_size,
thread_trampoline_routine alt_trampoline) {
unsigned int flags = 0;
if (!t) {
t = static_cast<thread_t*>(malloc(sizeof(thread_t)));
if (!t)
return NULL;
flags |= THREAD_FLAG_FREE_STRUCT;
}
init_thread_struct(t, name);
t->entry = entry;
t->arg = arg;
t->state = THREAD_INITIAL;
t->signals = 0;
t->blocking_wait_queue = NULL;
t->blocked_status = ZX_OK;
t->interruptable = false;
t->curr_cpu = INVALID_CPU;
t->last_cpu = INVALID_CPU;
t->cpu_affinity = CPU_MASK_ALL;
t->retcode = 0;
wait_queue_init(&t->retcode_wait_queue);
sched_init_thread(t, priority);
// create the stack
if (!stack) {
if (THREAD_STACK_BOUNDS_CHECK) {
stack_size += THREAD_STACK_PADDING_SIZE;
flags |= THREAD_FLAG_DEBUG_STACK_BOUNDS_CHECK;
}
t->stack = malloc(stack_size);
if (!t->stack) {
if (flags & THREAD_FLAG_FREE_STRUCT)
free(t);
return NULL;
}
flags |= THREAD_FLAG_FREE_STACK;
if (THREAD_STACK_BOUNDS_CHECK) {
memset(t->stack, STACK_DEBUG_BYTE, THREAD_STACK_PADDING_SIZE);
}
} else {
t->stack = stack;
}
#if __has_feature(safe_stack)
if (!unsafe_stack) {
DEBUG_ASSERT(!stack);
DEBUG_ASSERT(flags & THREAD_FLAG_FREE_STACK);
t->unsafe_stack = malloc(stack_size);
if (!t->unsafe_stack) {
free(t->stack);
if (flags & THREAD_FLAG_FREE_STRUCT)
free(t);
return NULL;
}
if (THREAD_STACK_BOUNDS_CHECK) {
memset(t->unsafe_stack, STACK_DEBUG_BYTE, THREAD_STACK_PADDING_SIZE);
}
} else {
DEBUG_ASSERT(stack);
t->unsafe_stack = unsafe_stack;
}
#else
DEBUG_ASSERT(!unsafe_stack);
#endif
t->stack_size = stack_size;
// save whether or not we need to free the thread struct and/or stack
t->flags = flags;
if (likely(alt_trampoline == NULL)) {
alt_trampoline = initial_thread_func;
}
// set up the initial stack frame
arch_thread_initialize(t, (vaddr_t)alt_trampoline);
// add it to the global thread list
THREAD_LOCK(state);
list_add_head(&thread_list, &t->thread_list_node);
THREAD_UNLOCK(state);
kcounter_add(thread_create_count, 1);
return t;
}
thread_t* thread_create(const char* name, thread_start_routine entry, void* arg, int priority, size_t stack_size) {
return thread_create_etc(NULL, name, entry, arg, priority,
NULL, NULL, stack_size, NULL);
}
static void free_thread_resources(thread_t* t) {
// free its stack and the thread structure itself
if (t->flags & THREAD_FLAG_FREE_STACK) {
if (t->stack)
free(t->stack);
#if __has_feature(safe_stack)
if (t->unsafe_stack)
free(t->unsafe_stack);
#endif
}
// call the tls callback for each slot as long there is one
for (uint ix = 0; ix != THREAD_MAX_TLS_ENTRY; ++ix) {
if (t->tls_callback[ix]) {
t->tls_callback[ix](t->tls[ix]);
}
}
t->magic = 0;
if (t->flags & THREAD_FLAG_FREE_STRUCT)
free(t);
}
/**
* @brief Flag a thread as real time
*
* @param t Thread to flag
*
* @return ZX_OK on success
*/
zx_status_t thread_set_real_time(thread_t* t) {
if (!t)
return ZX_ERR_INVALID_ARGS;
DEBUG_ASSERT(t->magic == THREAD_MAGIC);
THREAD_LOCK(state);
if (t == get_current_thread()) {
// if we're currently running, cancel the preemption timer.
timer_cancel(&percpu[arch_curr_cpu_num()].preempt_timer);
}
t->flags |= THREAD_FLAG_REAL_TIME;
THREAD_UNLOCK(state);
return ZX_OK;
}
/**
* @brief Make a suspended thread executable.
*
* This function is called to start a thread which has just been
* created with thread_create() or which has been suspended with
* thread_suspend(). It can not fail.
*
* @param t Thread to resume
*/
void thread_resume(thread_t* t) {
DEBUG_ASSERT(t->magic == THREAD_MAGIC);
bool ints_disabled = arch_ints_disabled();
bool resched = false;
if (!ints_disabled) // HACK, don't resced into bootstrap thread before idle thread is set up
resched = true;
THREAD_LOCK(state);
if (t->state == THREAD_DEATH) {
THREAD_UNLOCK(state);
// The thread is dead, resuming it is a no-op.
return;
}
// Clear the suspend signal in case there is a pending suspend
t->signals &= ~THREAD_SIGNAL_SUSPEND;
if (t->state == THREAD_INITIAL || t->state == THREAD_SUSPENDED) {
// wake up the new thread, putting it in a run queue on a cpu. reschedule if the local
// cpu run queue was modified
bool local_resched = sched_unblock(t);
if (resched && local_resched)
sched_reschedule();
}
THREAD_UNLOCK(state);
kcounter_add(thread_resume_count, 1);
}
zx_status_t thread_detach_and_resume(thread_t* t) {
zx_status_t err;
err = thread_detach(t);
if (err < 0)
return err;
thread_resume(t);
return ZX_OK;
}
/**
* @brief Suspend a ready/running thread
*
* @param t Thread to suspend
*
* @return ZX_OK on success.
*/
zx_status_t thread_suspend(thread_t* t) {
DEBUG_ASSERT(t->magic == THREAD_MAGIC);
DEBUG_ASSERT(!thread_is_idle(t));
THREAD_LOCK(state);
if (t->state == THREAD_INITIAL || t->state == THREAD_DEATH) {
THREAD_UNLOCK(state);
return ZX_ERR_BAD_STATE;
}
t->signals |= THREAD_SIGNAL_SUSPEND;
bool local_resched = false;
switch (t->state) {
case THREAD_INITIAL:
case THREAD_DEATH:
// This should be unreachable because these two states were handled
// above.
panic("Unexpected thread state");
case THREAD_READY:
// thread is ready to run and not blocked or suspended.
// will wake up and deal with the signal soon.
break;
case THREAD_RUNNING:
// thread is running (on another cpu)
// The following call is not essential. It just makes the
// thread suspension happen sooner rather than at the next
// timer interrupt or syscall.
mp_reschedule(cpu_num_to_mask(t->curr_cpu), 0);
break;
case THREAD_SUSPENDED:
// thread is suspended already
break;
case THREAD_BLOCKED:
// thread is blocked on something and marked interruptable
if (t->interruptable)
wait_queue_unblock_thread(t, ZX_ERR_INTERNAL_INTR_RETRY);
break;
case THREAD_SLEEPING:
// thread is sleeping
if (t->interruptable) {
t->blocked_status = ZX_ERR_INTERNAL_INTR_RETRY;
local_resched = sched_unblock(t);
}
break;
}
// reschedule if the local cpu run queue was modified
if (local_resched)
sched_reschedule();
THREAD_UNLOCK(state);
kcounter_add(thread_suspend_count, 1);
return ZX_OK;
}
// Signal an exception on the current thread, to be handled when the
// current syscall exits. Unlike other signals, this is synchronous, in
// the sense that a thread signals itself. This exists primarily so that
// we can unwind the stack in order to get the state of userland's
// callee-saved registers at the point where userland invoked the
// syscall.
void thread_signal_policy_exception(void) {
thread_t* t = get_current_thread();
THREAD_LOCK(state);
t->signals |= THREAD_SIGNAL_POLICY_EXCEPTION;
THREAD_UNLOCK(state);
}
zx_status_t thread_join(thread_t* t, int* retcode, zx_time_t deadline) {
DEBUG_ASSERT(t->magic == THREAD_MAGIC);
THREAD_LOCK(state);
if (t->flags & THREAD_FLAG_DETACHED) {
// the thread is detached, go ahead and exit
THREAD_UNLOCK(state);
return ZX_ERR_BAD_STATE;
}
// wait for the thread to die
if (t->state != THREAD_DEATH) {
zx_status_t err = wait_queue_block(&t->retcode_wait_queue, deadline);
if (err < 0) {
THREAD_UNLOCK(state);
return err;
}
}
DEBUG_ASSERT(t->magic == THREAD_MAGIC);
DEBUG_ASSERT(t->state == THREAD_DEATH);
DEBUG_ASSERT(t->blocking_wait_queue == NULL);
DEBUG_ASSERT(!list_in_list(&t->queue_node));
// save the return code
if (retcode)
*retcode = t->retcode;
// remove it from the master thread list
list_delete(&t->thread_list_node);
// clear the structure's magic
t->magic = 0;
THREAD_UNLOCK(state);
free_thread_resources(t);
kcounter_add(thread_join_count, 1);
return ZX_OK;
}
zx_status_t thread_detach(thread_t* t) {
DEBUG_ASSERT(t->magic == THREAD_MAGIC);
THREAD_LOCK(state);
// if another thread is blocked inside thread_join() on this thread,
// wake them up with a specific return code
wait_queue_wake_all(&t->retcode_wait_queue, false, ZX_ERR_BAD_STATE);
// if it's already dead, then just do what join would have and exit
if (t->state == THREAD_DEATH) {
t->flags &= ~THREAD_FLAG_DETACHED; // makes sure thread_join continues
THREAD_UNLOCK(state);
return thread_join(t, NULL, 0);
} else {
t->flags |= THREAD_FLAG_DETACHED;
THREAD_UNLOCK(state);
return ZX_OK;
}
}
// called back in the DPC worker thread to free the stack and/or the thread structure
// itself for a thread that is exiting on its own.
static void thread_free_dpc(struct dpc* dpc) {
thread_t* t = (thread_t*)dpc->arg;
DEBUG_ASSERT(t->magic == THREAD_MAGIC);
DEBUG_ASSERT(t->state == THREAD_DEATH);
// grab and release the thread lock, which effectively serializes us with
// the thread that is queuing itself for destruction.
THREAD_LOCK(state);
atomic_signal_fence();
THREAD_UNLOCK(state);
free_thread_resources(t);
}
__NO_RETURN static void thread_exit_locked(thread_t* current_thread,
int retcode) TA_REQ(thread_lock) {
// create a dpc on the stack to queue up a free.
// must be put at top scope in this function to force the compiler to keep it from
// reusing the stack before the function exits
dpc_t free_dpc;
// enter the dead state
current_thread->state = THREAD_DEATH;
current_thread->retcode = retcode;
// if we're detached, then do our teardown here
if (current_thread->flags & THREAD_FLAG_DETACHED) {
// remove it from the master thread list
list_delete(&current_thread->thread_list_node);
// if we have to do any freeing of either the stack or the thread structure, queue
// a dpc to do the cleanup
if ((current_thread->flags & THREAD_FLAG_FREE_STACK && current_thread->stack) ||
current_thread->flags & THREAD_FLAG_FREE_STRUCT) {
free_dpc.func = thread_free_dpc;
free_dpc.arg = (void*)current_thread;
dpc_queue_thread_locked(&free_dpc);
}
} else {
// signal if anyone is waiting
wait_queue_wake_all(&current_thread->retcode_wait_queue, false, 0);
}
// reschedule
sched_resched_internal();
panic("somehow fell through thread_exit()\n");
}
/**
* @brief Remove this thread from the scheduler, discarding
* its execution state.
*
* This is almost certainly not the function you want. In the general case,
* this is incredibly unsafe.
*
* This will free any resources allocated by thread_create.
*/
void thread_forget(thread_t* t) {
THREAD_LOCK(state);
__UNUSED thread_t* current_thread = get_current_thread();
DEBUG_ASSERT(current_thread != t);
list_delete(&t->thread_list_node);
THREAD_UNLOCK(state);
DEBUG_ASSERT(!list_in_list(&t->queue_node));
free_thread_resources(t);
}
/**
* @brief Terminate the current thread
*
* Current thread exits with the specified return code.
*
* This function does not return.
*/
void thread_exit(int retcode) {
thread_t* current_thread = get_current_thread();
DEBUG_ASSERT(current_thread->magic == THREAD_MAGIC);
DEBUG_ASSERT(current_thread->state == THREAD_RUNNING);
DEBUG_ASSERT(!thread_is_idle(current_thread));
invoke_user_callback(current_thread, THREAD_USER_STATE_EXIT);
THREAD_LOCK(state);
thread_exit_locked(current_thread, retcode);
}
// kill a thread
void thread_kill(thread_t* t) {
DEBUG_ASSERT(t->magic == THREAD_MAGIC);
THREAD_LOCK(state);
// deliver a signal to the thread.
// NOTE: it's not important to do this atomically, since we're inside
// the thread lock, but go ahead and flush it out to memory to avoid the amount
// of races if another thread is looking at this.
t->signals |= THREAD_SIGNAL_KILL;
smp_mb();
bool local_resched = false;
// we are killing ourself
if (t == get_current_thread())
goto done;
// general logic is to wake up the thread so it notices it had a signal delivered to it
switch (t->state) {
case THREAD_INITIAL:
// thread hasn't been started yet.
// not really safe to wake it up, since it's only in this state because it's under
// construction by the creator thread.
break;
case THREAD_READY:
// thread is ready to run and not blocked or suspended.
// will wake up and deal with the signal soon.
// TODO: short circuit if it was blocked from user space
break;
case THREAD_RUNNING:
// thread is running (on another cpu).
// The following call is not essential. It just makes the
// thread termination happen sooner rather than at the next
// timer interrupt or syscall.
mp_reschedule(cpu_num_to_mask(t->curr_cpu), 0);
break;
case THREAD_SUSPENDED:
// thread is suspended, resume it so it can get the kill signal
local_resched = sched_unblock(t);
break;
case THREAD_BLOCKED:
// thread is blocked on something and marked interruptable
if (t->interruptable)
wait_queue_unblock_thread(t, ZX_ERR_INTERNAL_INTR_KILLED);
break;
case THREAD_SLEEPING:
// thread is sleeping
if (t->interruptable) {
t->blocked_status = ZX_ERR_INTERNAL_INTR_KILLED;
local_resched = sched_unblock(t);
}
break;
case THREAD_DEATH:
// thread is already dead
goto done;
}
if (local_resched) {
// reschedule if the local cpu run queue was modified
sched_reschedule();
}
done:
THREAD_UNLOCK(state);
}
// Sets the cpu affinity mask of a thread to the passed in mask and migrate
// the thread if active.
void thread_set_cpu_affinity(thread_t* t, cpu_mask_t affinity) {
DEBUG_ASSERT(t->magic == THREAD_MAGIC);
THREAD_LOCK(state);
// make sure the passed in mask is valid and at least one cpu can run the thread
if (affinity & mp_get_active_mask()) {
// set the affinity mask
t->cpu_affinity = affinity;
// let the scheduler deal with it
sched_migrate(t);
}
THREAD_UNLOCK(state);
}
void thread_migrate_to_cpu(const cpu_num_t target_cpu) {
thread_set_cpu_affinity(get_current_thread(), cpu_num_to_mask(target_cpu));
}
// thread_lock must be held when calling this function. This function will
// not return if it decides to kill the thread.
static void check_kill_signal(thread_t* current_thread,
spin_lock_saved_state_t state) TA_REQ(thread_lock) {
DEBUG_ASSERT(arch_ints_disabled());
DEBUG_ASSERT(spin_lock_held(&thread_lock));
if (current_thread->signals & THREAD_SIGNAL_KILL) {
// Ensure we don't recurse into thread_exit.
DEBUG_ASSERT(current_thread->state != THREAD_DEATH);
THREAD_UNLOCK(state);
thread_exit(0);
// Unreachable.
}
}
// finish suspending the current thread
static void thread_do_suspend(void) {
thread_t* current_thread = get_current_thread();
// Note: After calling this callback, we must not return without
// calling the callback with THREAD_USER_STATE_RESUME. That is
// because those callbacks act as barriers which control when it is
// safe for the zx_thread_read_state()/zx_thread_write_state()
// syscalls to access the userland register state kept by thread_t.
invoke_user_callback(current_thread, THREAD_USER_STATE_SUSPEND);
THREAD_LOCK(state);
// make sure we haven't been killed while the lock was dropped for the user callback
check_kill_signal(current_thread, state);
// Make sure the suspend signal wasn't cleared while we were running the
// callback.
if (current_thread->signals & THREAD_SIGNAL_SUSPEND) {
current_thread->state = THREAD_SUSPENDED;
current_thread->signals &= ~THREAD_SIGNAL_SUSPEND;
// directly invoke the context switch, since we've already manipulated this thread's state
sched_resched_internal();
// If the thread was killed, we should not allow it to resume. We
// shouldn't call user_callback() with THREAD_USER_STATE_RESUME in
// this case, because there might not have been any request to
// resume the thread.
check_kill_signal(current_thread, state);
}
THREAD_UNLOCK(state);
invoke_user_callback(current_thread, THREAD_USER_STATE_RESUME);
}
// check for any pending signals and handle them
void thread_process_pending_signals(void) {
thread_t* current_thread = get_current_thread();
if (likely(current_thread->signals == 0))
return;
// grab the thread lock so we can safely look at the signal mask
THREAD_LOCK(state);
check_kill_signal(current_thread, state);
// Report exceptions raised by syscalls
if (current_thread->signals & THREAD_SIGNAL_POLICY_EXCEPTION) {
current_thread->signals &= ~THREAD_SIGNAL_POLICY_EXCEPTION;
THREAD_UNLOCK(state);
zx_status_t status = arch_dispatch_user_policy_exception();
if (status != ZX_OK) {
panic("arch_dispatch_user_policy_exception() failed: status=%d\n",
status);
}
return;
}
if (current_thread->signals & THREAD_SIGNAL_SUSPEND) {
// transition the thread to the suspended state
DEBUG_ASSERT(current_thread->state == THREAD_RUNNING);
THREAD_UNLOCK(state);
thread_do_suspend();
} else {
THREAD_UNLOCK(state);
}
}
/**
* @brief Yield the cpu to another thread
*
* This function places the current thread at the end of the run queue
* and yields the cpu to another waiting thread (if any.)
*
* This function will return at some later time. Possibly immediately if
* no other threads are waiting to execute.
*/
void thread_yield(void) {
__UNUSED thread_t* current_thread = get_current_thread();
DEBUG_ASSERT(current_thread->magic == THREAD_MAGIC);
DEBUG_ASSERT(current_thread->state == THREAD_RUNNING);
DEBUG_ASSERT(!arch_in_int_handler());
THREAD_LOCK(state);
CPU_STATS_INC(yields);
sched_yield();
THREAD_UNLOCK(state);
}
/**
* @brief Preempt the current thread from an interrupt
*
* This function places the current thread at the head of the run
* queue and then yields the cpu to another thread.
*/
void thread_preempt(void) {
thread_t* current_thread = get_current_thread();
DEBUG_ASSERT(current_thread->magic == THREAD_MAGIC);
DEBUG_ASSERT(current_thread->state == THREAD_RUNNING);
DEBUG_ASSERT(!arch_in_int_handler());
if (!thread_is_idle(current_thread)) {
// only track when a meaningful preempt happens
CPU_STATS_INC(irq_preempts);
}
THREAD_LOCK(state);
sched_preempt();
THREAD_UNLOCK(state);
}
/**
* @brief Reevaluate the run queue on the current cpu.
*
* This function places the current thread at the head of the run
* queue and then yields the cpu to another thread. Similar to
* thread_preempt, but intended to be used at non interrupt context.
*/
void thread_reschedule(void) {
thread_t* current_thread = get_current_thread();
DEBUG_ASSERT(current_thread->magic == THREAD_MAGIC);
DEBUG_ASSERT(current_thread->state == THREAD_RUNNING);
DEBUG_ASSERT(!arch_in_int_handler());
THREAD_LOCK(state);
sched_reschedule();
THREAD_UNLOCK(state);
}
void thread_check_preempt_pending(void) {
thread_t* current_thread = get_current_thread();
// First check preempt_pending without the expense of taking the lock.
// At this point, interrupts could be enabled, so an interrupt handler
// might preempt us and set preempt_pending to false after we read it.
if (unlikely(current_thread->preempt_pending)) {
THREAD_LOCK(state);
// Recheck preempt_pending just in case it got set to false after
// our earlier check. Its value now cannot change because
// interrupts are now disabled.
if (likely(current_thread->preempt_pending)) {
// This will set preempt_pending = false for us.
sched_reschedule();
}
THREAD_UNLOCK(state);
}
}
// timer callback to wake up a sleeping thread
static void thread_sleep_handler(timer_t* timer, zx_time_t now, void* arg) {
thread_t* t = (thread_t*)arg;
DEBUG_ASSERT(t->magic == THREAD_MAGIC);
// spin trylocking on the thread lock since the routine that set up the callback,
// thread_sleep_etc, may be trying to simultaneously cancel this timer while holding the
// thread_lock.
if (timer_trylock_or_cancel(timer, &thread_lock))
return;
if (t->state != THREAD_SLEEPING) {
spin_unlock(&thread_lock);
return;
}
t->blocked_status = ZX_OK;
// unblock the thread
if (sched_unblock(t))
sched_reschedule();
spin_unlock(&thread_lock);
}
#define MIN_SLEEP_SLACK ZX_USEC(1)
#define MAX_SLEEP_SLACK ZX_SEC(1)
#define DIV_SLEEP_SLACK 10u
// computes the amount of slack the thread_sleep timer will use
static uint64_t sleep_slack(zx_time_t deadline, zx_time_t now) {
if (deadline < now)
return MIN_SLEEP_SLACK;
zx_duration_t slack = (deadline - now) / DIV_SLEEP_SLACK;
return MAX(MIN_SLEEP_SLACK, MIN(slack, MAX_SLEEP_SLACK));
}
/**
* @brief Put thread to sleep; deadline specified in ns
*
* This function puts the current thread to sleep until the specified
* deadline has expired.
*
* Note that this function could continue to sleep after the specified deadline
* if other threads are running. When the deadline expires, this thread will
* be placed at the head of the run queue.
*
* interruptable argument allows this routine to return early if the thread was signaled
* for something.
*/
zx_status_t thread_sleep_etc(zx_time_t deadline, bool interruptable) {
thread_t* current_thread = get_current_thread();
zx_time_t now = current_time();
zx_status_t blocked_status;
DEBUG_ASSERT(current_thread->magic == THREAD_MAGIC);
DEBUG_ASSERT(current_thread->state == THREAD_RUNNING);
DEBUG_ASSERT(!thread_is_idle(current_thread));
DEBUG_ASSERT(!arch_in_int_handler());
// Skip all of the work if the deadline has already passed.
if (deadline <= now) {
return ZX_OK;
}
timer_t timer;
timer_init(&timer);
THREAD_LOCK(state);
// if we've been killed and going in interruptable, abort here
if (interruptable && unlikely((current_thread->signals))) {
if (current_thread->signals & THREAD_SIGNAL_KILL) {
blocked_status = ZX_ERR_INTERNAL_INTR_KILLED;
} else {
blocked_status = ZX_ERR_INTERNAL_INTR_RETRY;
}
goto out;
}
// set a one shot timer to wake us up and reschedule
timer_set(&timer, deadline,
TIMER_SLACK_LATE, sleep_slack(deadline, now), thread_sleep_handler, current_thread);
current_thread->state = THREAD_SLEEPING;
current_thread->blocked_status = ZX_OK;
current_thread->interruptable = interruptable;
sched_block();
current_thread->interruptable = false;
blocked_status = current_thread->blocked_status;
// always cancel the timer, since we may be racing with the timer tick on other cpus
timer_cancel(&timer);
out:
THREAD_UNLOCK(state);
return blocked_status;
}
zx_status_t thread_sleep_relative(zx_duration_t delay) {
if (delay != ZX_TIME_INFINITE) {
delay += current_time();
}
return thread_sleep(delay);
}
/**
* @brief Return the number of nanoseconds a thread has been running for.
*
* This takes the thread_lock to ensure there are no races while calculating the
* runtime of the thread.
*/
zx_duration_t thread_runtime(const thread_t* t) {
THREAD_LOCK(state);
zx_duration_t runtime = t->runtime_ns;
if (t->state == THREAD_RUNNING) {
runtime += current_time() - t->last_started_running;
}
THREAD_UNLOCK(state);
return runtime;
}
/**
* @brief Construct a thread t around the current running state
*
* This should be called once per CPU initialization. It will create
* a thread that is pinned to the current CPU and running at the
* highest priority.
*/
void thread_construct_first(thread_t* t, const char* name) {
DEBUG_ASSERT(arch_ints_disabled());
cpu_num_t cpu = arch_curr_cpu_num();
init_thread_struct(t, name);
t->state = THREAD_RUNNING;
t->flags = THREAD_FLAG_DETACHED;
t->signals = 0;
t->curr_cpu = cpu;
t->last_cpu = cpu;
t->cpu_affinity = cpu_num_to_mask(cpu);
sched_init_thread(t, HIGHEST_PRIORITY);
arch_thread_construct_first(t);
set_current_thread(t);
THREAD_LOCK(state);
list_add_head(&thread_list, &t->thread_list_node);
THREAD_UNLOCK(state);
}
/**
* @brief Initialize threading system
*
* This function is called once, from kmain()
*/
void thread_init_early(void) {
DEBUG_ASSERT(arch_curr_cpu_num() == 0);
// create a thread to cover the current running state
thread_t* t = &percpu[0].idle_thread;
thread_construct_first(t, "bootstrap");
sched_init_early();
}
/**
* @brief Complete thread initialization
*
* This function is called once at boot time
*/
void thread_init(void) {
for (uint i = 0; i < SMP_MAX_CPUS; i++) {
timer_init(&percpu[i].preempt_timer);
}
}
/**
* @brief Change name of current thread
*/
void thread_set_name(const char* name) {
thread_t* current_thread = get_current_thread();
strlcpy(current_thread->name, name, sizeof(current_thread->name));
}
/**
* @brief Set the callback pointer to a function called on user thread state
* changes (e.g. exit, suspend, resume)
*/
void thread_set_user_callback(thread_t* t, thread_user_callback_t cb) {
DEBUG_ASSERT(t->state == THREAD_INITIAL);
t->user_callback = cb;
}
/**
* @brief Change priority of current thread
*
* See thread_create() for a discussion of priority values.
*/
void thread_set_priority(thread_t* t, int priority) {
DEBUG_ASSERT(t->magic == THREAD_MAGIC);
THREAD_LOCK(state);
if (priority <= IDLE_PRIORITY)
priority = IDLE_PRIORITY + 1;
if (priority > HIGHEST_PRIORITY)
priority = HIGHEST_PRIORITY;
sched_change_priority(t, priority);
THREAD_UNLOCK(state);
}
/**
* @brief Become an idle thread
*
* This function marks the current thread as the idle thread -- the one which
* executes when there is nothing else to do. This function does not return.
* This function is called once at boot time.
*/
void thread_become_idle(void) {
DEBUG_ASSERT(arch_ints_disabled());
thread_t* t = get_current_thread();
char name[16];
snprintf(name, sizeof(name), "idle %u", arch_curr_cpu_num());
thread_set_name(name);
// mark ourself as idle
t->flags |= THREAD_FLAG_IDLE;
cpu_num_t curr_cpu = arch_curr_cpu_num();
t->last_cpu = curr_cpu;
t->curr_cpu = curr_cpu;
t->cpu_affinity = cpu_num_to_mask(curr_cpu);
sched_init_thread(t, IDLE_PRIORITY);
mp_set_curr_cpu_active(true);
mp_set_cpu_idle(arch_curr_cpu_num());
// enable interrupts and start the scheduler
arch_enable_ints();
thread_reschedule();
arch_idle_thread_routine(NULL);
}
/**
* @brief Create a thread around the current execution context
*/
void thread_secondary_cpu_init_early(thread_t* t) {
DEBUG_ASSERT(arch_ints_disabled());
char name[16];
snprintf(name, sizeof(name), "cpu_init %u", arch_curr_cpu_num());
thread_construct_first(t, name);
}
void thread_secondary_cpu_entry(void) {
uint cpu = arch_curr_cpu_num();
mp_set_curr_cpu_active(true);
mp_set_cpu_idle(cpu);
dpc_init_for_cpu();
// Exit from our bootstrap thread, and enter the scheduler on this cpu
thread_exit(0);
}
/**
* @brief Create an idle thread for a secondary CPU
*/
thread_t* thread_create_idle_thread(cpu_num_t cpu_num) {
DEBUG_ASSERT(cpu_num != 0 && cpu_num < SMP_MAX_CPUS);
// Shouldn't be initialized yet
DEBUG_ASSERT(percpu[cpu_num].idle_thread.magic != THREAD_MAGIC);
char name[16];
snprintf(name, sizeof(name), "idle %u", cpu_num);
thread_t* t = thread_create_etc(
&percpu[cpu_num].idle_thread, name,
arch_idle_thread_routine, NULL,
IDLE_PRIORITY,
NULL, NULL, DEFAULT_STACK_SIZE,
NULL);
if (t == NULL) {
return t;
}
t->flags |= THREAD_FLAG_IDLE | THREAD_FLAG_DETACHED;
t->cpu_affinity = cpu_num_to_mask(cpu_num);
THREAD_LOCK(state);
sched_unblock_idle(t);
THREAD_UNLOCK(state);
return t;
}
/**
* @brief Return the name of the "owner" of the thread.
*
* Returns "kernel" if there is no owner.
*/
void thread_owner_name(thread_t* t, char out_name[THREAD_NAME_LENGTH]) {
if (t->user_thread) {
get_user_thread_process_name(t->user_thread, out_name);
return;
}
memcpy(out_name, "kernel", 7);
}
static const char* thread_state_to_str(enum thread_state state) {
switch (state) {
case THREAD_INITIAL:
return "init";
case THREAD_SUSPENDED:
return "susp";
case THREAD_READY:
return "rdy";
case THREAD_RUNNING:
return "run";
case THREAD_BLOCKED:
return "blok";
case THREAD_SLEEPING:
return "slep";
case THREAD_DEATH:
return "deth";
default:
return "unkn";
}
}
/**
* @brief Dump debugging info about the specified thread.
*/
void dump_thread(thread_t* t, bool full_dump) {
if (t->magic != THREAD_MAGIC) {
dprintf(INFO, "dump_thread WARNING: thread at %p has bad magic\n", t);
}
zx_duration_t runtime = t->runtime_ns;
if (t->state == THREAD_RUNNING) {
runtime += current_time() - t->last_started_running;
}
char oname[THREAD_NAME_LENGTH];
thread_owner_name(t, oname);
if (full_dump) {
dprintf(INFO, "dump_thread: t %p (%s:%s)\n", t, oname, t->name);
dprintf(INFO, "\tstate %s, curr/last cpu %d/%d, cpu_affinity %#x, priority %d [%d:%d,%d], "
"remaining time slice %" PRIu64 "\n",
thread_state_to_str(t->state), (int)t->curr_cpu, (int)t->last_cpu, t->cpu_affinity,
t->effec_priority, t->base_priority,
t->priority_boost, t->inherited_priority, t->remaining_time_slice);
dprintf(INFO, "\truntime_ns %" PRIu64 ", runtime_s %" PRIu64 "\n",
runtime, runtime / 1000000000);
dprintf(INFO, "\tstack %p, stack_size %zu\n", t->stack, t->stack_size);
dprintf(INFO, "\tentry %p, arg %p, flags 0x%x %s%s%s%s%s%s\n", t->entry, t->arg, t->flags,
(t->flags & THREAD_FLAG_DETACHED) ? "Dt" : "",
(t->flags & THREAD_FLAG_FREE_STACK) ? "Fs" : "",
(t->flags & THREAD_FLAG_FREE_STRUCT) ? "Ft" : "",
(t->flags & THREAD_FLAG_REAL_TIME) ? "Rt" : "",
(t->flags & THREAD_FLAG_IDLE) ? "Id" : "",
(t->flags & THREAD_FLAG_DEBUG_STACK_BOUNDS_CHECK) ? "Sc" : "");
dprintf(INFO, "\twait queue %p, blocked_status %d, interruptable %d, mutexes held %d\n",
t->blocking_wait_queue, t->blocked_status, t->interruptable, t->mutexes_held);
dprintf(INFO, "\taspace %p\n", t->aspace);
dprintf(INFO, "\tuser_thread %p, pid %" PRIu64 ", tid %" PRIu64 "\n",
t->user_thread, t->user_pid, t->user_tid);
arch_dump_thread(t);
} else {
printf("thr %p st %4s m %d pri %2d [%d:%d,%d] pid %" PRIu64 " tid %" PRIu64 " (%s:%s)\n",
t, thread_state_to_str(t->state), t->mutexes_held, t->effec_priority, t->base_priority,
t->priority_boost, t->inherited_priority, t->user_pid,
t->user_tid, oname, t->name);
}
}
/**
* @brief Dump debugging info about all threads
*/
void dump_all_threads(bool full) {
THREAD_LOCK(state);
dump_all_threads_locked(full);
THREAD_UNLOCK(state);
}
void dump_all_threads_locked(bool full) {
thread_t* t;
list_for_every_entry (&thread_list, t, thread_t, thread_list_node) {
if (t->magic != THREAD_MAGIC) {
dprintf(INFO, "bad magic on thread struct %p, aborting.\n", t);
hexdump(t, sizeof(thread_t));
break;
}
dump_thread(t, full);
}
}
void dump_thread_user_tid(uint64_t tid, bool full) {
THREAD_LOCK(state);
dump_thread_user_tid_locked(tid, full);
THREAD_UNLOCK(state);
}
void dump_thread_user_tid_locked(uint64_t tid, bool full) {
thread_t* t;
list_for_every_entry (&thread_list, t, thread_t, thread_list_node) {
if (t->user_tid != tid) {
continue;
}
if (t->magic != THREAD_MAGIC) {
dprintf(INFO, "bad magic on thread struct %p, aborting.\n", t);
hexdump(t, sizeof(thread_t));
break;
}
dump_thread(t, full);
}
}
thread_t* thread_id_to_thread_slow(uint64_t tid) {
thread_t* t;
list_for_every_entry (&thread_list, t, thread_t, thread_list_node) {
if (t->user_tid == tid) {
return t;
}
}
return NULL;
}
/** @} */
#if WITH_LIB_KTRACE
// Used by ktrace at the start of a trace to ensure that all
// the running threads, processes, and their names are known
void ktrace_report_live_threads(void) {
thread_t* t;
THREAD_LOCK(state);
list_for_every_entry (&thread_list, t, thread_t, thread_list_node) {
DEBUG_ASSERT(t->magic == THREAD_MAGIC);
if (t->user_tid) {
ktrace_name(TAG_THREAD_NAME,
static_cast<uint32_t>(t->user_tid), static_cast<uint32_t>(t->user_pid), t->name);
} else {
ktrace_name(TAG_KTHREAD_NAME,
static_cast<uint32_t>(reinterpret_cast<uintptr_t>(t)), 0, t->name);
}
}
THREAD_UNLOCK(state);
}
#endif
#define THREAD_BACKTRACE_DEPTH 16
typedef struct thread_backtrace {
void* pc[THREAD_BACKTRACE_DEPTH];
} thread_backtrace_t;
static zx_status_t thread_read_stack(thread_t* t, void* ptr, void* out, size_t sz) {
if (!is_kernel_address((uintptr_t)ptr) ||
(ptr < t->stack) ||
(ptr > (static_cast<char*>(t->stack) + t->stack_size - sizeof(void*)))) {
return ZX_ERR_NOT_FOUND;
}
memcpy(out, ptr, sz);
return ZX_OK;
}
static size_t thread_get_backtrace(thread_t* t, void* fp, thread_backtrace_t* tb) {
// without frame pointers, dont even try
// the compiler should optimize out the body of all the callers if it's not present
if (!WITH_FRAME_POINTERS)
return 0;
void* pc;
if (t == NULL) {
return 0;
}
size_t n = 0;
for (; n < THREAD_BACKTRACE_DEPTH; n++) {
if (thread_read_stack(t, static_cast<char*>(fp) + 8, &pc, sizeof(void*))) {
break;
}
tb->pc[n] = pc;
if (thread_read_stack(t, fp, &fp, sizeof(void*))) {
break;
}
}
return n;
}
static zx_status_t _thread_print_backtrace(thread_t* t, void *fp) {
if (!t || !fp) {
return ZX_ERR_BAD_STATE;
}
thread_backtrace_t tb;
size_t count = thread_get_backtrace(t, fp, &tb);
if (count == 0) {
return ZX_ERR_BAD_STATE;
}
for (size_t n = 0; n < count; n++) {
printf("bt#%02zu: %p\n", n, tb.pc[n]);
}
printf("bt#%02zu: end\n", count);
return ZX_OK;
}
// print the backtrace of the current thread, at the current spot
void thread_print_current_backtrace(void) {
_thread_print_backtrace(get_current_thread(), __GET_FRAME(0));
}
// print the backtrace of a passed in thread, if possible
zx_status_t thread_print_backtrace(thread_t* t) {
// get the starting point if it's in a usable state
void *fp = NULL;
switch (t->state) {
case THREAD_BLOCKED:
case THREAD_SLEEPING:
case THREAD_SUSPENDED:
// thread is blocked, so ask the arch code to get us a starting point
fp = arch_thread_get_blocked_fp(t);
break;
// we can't deal with every other state
default:
return ZX_ERR_BAD_STATE;
}
return _thread_print_backtrace(t, fp);
}