WIP priority inheiritance
Change-Id: Id73ee904ed3681b9fe96ca1e69bc74a3d0576c0b
diff --git a/kernel/include/kernel/sched.h b/kernel/include/kernel/sched.h
index 598975a..640c82a 100644
--- a/kernel/include/kernel/sched.h
+++ b/kernel/include/kernel/sched.h
@@ -15,6 +15,7 @@
/* not intended to be used by regular kernel code */
void sched_init_early(void);
+void sched_init_thread(thread_t* t, int priority);
void sched_block(void);
void sched_yield(void);
void sched_preempt(void);
@@ -23,6 +24,11 @@
void sched_unblock_idle(thread_t* t);
void sched_migrate(thread_t* t);
+/* set the inheirited priority of a thread and return if the caller should locally reschedule.
+ * pri should be < MAX_PRIORITY, negative values disable priority inheiritance
+ */
+void sched_inheirit_priority(thread_t* t, int pri, bool* local_resched);
+
/* return true if the thread was placed on the current cpu's run queue */
/* this usually means the caller should locally reschedule soon */
bool sched_unblock(thread_t* t) __WARN_UNUSED_RESULT;
diff --git a/kernel/include/kernel/thread.h b/kernel/include/kernel/thread.h
index 158c370..a6c63ff 100644
--- a/kernel/include/kernel/thread.h
+++ b/kernel/include/kernel/thread.h
@@ -95,8 +95,11 @@
* left the scheduler. */
zx_duration_t runtime_ns;
+ /* priority */
+ int effec_priority;
int base_priority;
int priority_boost;
+ int inheirited_priority;
/* current cpu the thread is either running on or in the ready queue, undefined otherwise */
cpu_num_t curr_cpu;
@@ -115,6 +118,9 @@
/* are we allowed to be interrupted on the current thing we're blocked/sleeping on */
bool interruptable;
+ /* number of mutexes we currently hold */
+ int mutexes_held;
+
/* pointer to the kernel address space this thread is associated with */
struct vmm_aspace* aspace;
diff --git a/kernel/include/kernel/wait.h b/kernel/include/kernel/wait.h
index 48ffbfd..819b467 100644
--- a/kernel/include/kernel/wait.h
+++ b/kernel/include/kernel/wait.h
@@ -56,6 +56,11 @@
zx_status_t wait_queue_block_with_mask(wait_queue_t*, zx_time_t deadline,
uint signal_mask);
+/* returns the highest priority of all the blocked threads on this wait queue.
+ * returns -1 if no threads are blocked.
+ */
+int wait_queue_blocked_priority(wait_queue_t*);
+
/*
* release one or more threads from the wait queue.
* reschedule = should the system reschedule if any is released.
diff --git a/kernel/kernel/mutex.c b/kernel/kernel/mutex.c
index 9088f4c..6c10cb6 100644
--- a/kernel/kernel/mutex.c
+++ b/kernel/kernel/mutex.c
@@ -74,6 +74,7 @@
oldval = 0;
if (likely(atomic_cmpxchg_u64(&m->val, &oldval, (uintptr_t)ct))) {
// acquired it cleanly
+ ct->mutexes_held++;
return;
}
@@ -100,6 +101,11 @@
goto retry;
}
+ // have the holder inheirit our priority
+ // discard the local reschedule flag because we're just about to block anyway
+ bool unused;
+ sched_inheirit_priority(mutex_holder(m), ct->effec_priority, &unused);
+
// we have signalled that we're blocking, so drop into the wait queue
zx_status_t ret = wait_queue_block(&m->wait, ZX_TIME_INFINITE);
if (unlikely(ret < ZX_OK)) {
@@ -112,6 +118,9 @@
// someone must have woken us up, we should own the mutex now
DEBUG_ASSERT(ct == mutex_holder(m));
+ // record that we hold it
+ ct->mutexes_held++;
+
THREAD_UNLOCK(state);
}
@@ -121,13 +130,33 @@
thread_t* ct = get_current_thread();
uintptr_t oldval;
+ // we're going to release it, mark as such
+ ct->mutexes_held--;
+
// in case there's no contention, try the fast path
oldval = (uintptr_t)ct;
if (likely(atomic_cmpxchg_u64(&m->val, &oldval, 0))) {
// we're done, exit
+ // if we had inheirited any priorities, undo it if we are no longer holding any mutexes
+ if (unlikely(ct->inheirited_priority >= 0) && ct->mutexes_held == 0) {
+ spin_lock_saved_state_t state;
+ if (!thread_lock_held)
+ spin_lock_irqsave(&thread_lock, state);
+
+ bool local_resched = false;
+ sched_inheirit_priority(ct, -1, &local_resched);
+ if (reschedule && local_resched) {
+ sched_reschedule();
+ }
+
+ if (!thread_lock_held)
+ spin_unlock_irqrestore(&thread_lock, state);
+ }
return;
}
+ DEBUG_ASSERT(ct->mutexes_held >= 0);
+
// must have been some contention, try the slow release
#if LK_DEBUGLEVEL > 0
@@ -159,9 +188,22 @@
ktrace(TAG_KWAIT_WAKE, (uintptr_t)&m->wait >> 32, (uintptr_t)&m->wait, 1, 0);
+ // boost the priority of the new thread we're waking
+ // if the wait queue is empty, it'll return -1 and we'll deboost the thread if
+ // it's not already holding a mutex
+ bool local_resched = false;
+ int blocked_priority = wait_queue_blocked_priority(&m->wait);
+ if (blocked_priority >= 0 || t->mutexes_held == 0) {
+ sched_inheirit_priority(t, blocked_priority, &local_resched);
+ }
+
+ // deboost ourself if this is the last mutex we held
+ if (ct->inheirited_priority >= 0 && ct->mutexes_held == 0)
+ sched_inheirit_priority(ct, -1, &local_resched);
+
// wake up the new thread, putting it in a run queue on a cpu. reschedule if the local
// cpu run queue was modified
- bool local_resched = sched_unblock(t);
+ local_resched |= sched_unblock(t);
if (reschedule && local_resched)
sched_reschedule();
diff --git a/kernel/kernel/sched.c b/kernel/kernel/sched.c
index 87e1a52..f0bdf89 100644
--- a/kernel/kernel/sched.c
+++ b/kernel/kernel/sched.c
@@ -55,10 +55,14 @@
static bool local_migrate_if_needed(thread_t* curr_thread);
/* compute the effective priority of a thread */
-static int effec_priority(const thread_t* t) {
+static void compute_effec_priority(thread_t* t) {
int ep = t->base_priority + t->priority_boost;
+ if (t->inheirited_priority > ep)
+ ep = t->inheirited_priority;
+
DEBUG_ASSERT(ep >= LOWEST_PRIORITY && ep <= HIGHEST_PRIORITY);
- return ep;
+
+ t->effec_priority = ep;
}
/* boost the priority of the thread by +1 */
@@ -72,6 +76,7 @@
if (t->priority_boost < MAX_PRIORITY_ADJ &&
likely((t->base_priority + t->priority_boost) < HIGHEST_PRIORITY)) {
t->priority_boost++;
+ compute_effec_priority(t);
}
}
@@ -106,6 +111,7 @@
/* drop a level */
t->priority_boost--;
+ compute_effec_priority(t);
}
/* pick a 'random' cpu out of the passed in mask of cpus */
@@ -198,10 +204,8 @@
static void insert_in_run_queue_head(cpu_num_t cpu, thread_t* t) {
DEBUG_ASSERT(!list_in_list(&t->queue_node));
- int ep = effec_priority(t);
-
- list_add_head(&percpu[cpu].run_queue[ep], &t->queue_node);
- percpu[cpu].run_queue_bitmap |= (1u << ep);
+ list_add_head(&percpu[cpu].run_queue[t->effec_priority], &t->queue_node);
+ percpu[cpu].run_queue_bitmap |= (1u << t->effec_priority);
/* mark the cpu as busy since the run queue now has at least one item in it */
mp_set_cpu_busy(cpu);
@@ -210,10 +214,8 @@
static void insert_in_run_queue_tail(cpu_num_t cpu, thread_t* t) {
DEBUG_ASSERT(!list_in_list(&t->queue_node));
- int ep = effec_priority(t);
-
- list_add_tail(&percpu[cpu].run_queue[ep], &t->queue_node);
- percpu[cpu].run_queue_bitmap |= (1u << ep);
+ list_add_tail(&percpu[cpu].run_queue[t->effec_priority], &t->queue_node);
+ percpu[cpu].run_queue_bitmap |= (1u << t->effec_priority);
/* mark the cpu as busy since the run queue now has at least one item in it */
mp_set_cpu_busy(cpu);
@@ -248,6 +250,13 @@
return &c->idle_thread;
}
+void sched_init_thread(thread_t* t, int priority) {
+ t->base_priority = priority;
+ t->priority_boost = 0;
+ t->inheirited_priority = -1;
+ compute_effec_priority(t);
+}
+
void sched_block(void) {
DEBUG_ASSERT(spin_lock_held(&thread_lock));
@@ -528,9 +537,8 @@
DEBUG_ASSERT(is_valid_cpu_num(t->curr_cpu));
struct percpu* c = &percpu[t->curr_cpu];
- int pri = effec_priority(t);
- if (list_is_empty(&c->run_queue[pri])) {
- c->run_queue_bitmap &= ~(1u << pri);
+ if (list_is_empty(&c->run_queue[t->effec_priority])) {
+ c->run_queue_bitmap &= ~(1u << t->effec_priority);
}
find_cpu_and_insert(t, &local_resched, &accum_cpu_mask);
@@ -549,6 +557,75 @@
}
}
+/* set the priority to the higher value of what it was before and the newly inheirited value */
+/* pri < 0 disables priority inheiritance and goes back to the naturally computed values */
+void sched_inheirit_priority(thread_t* t, int pri, bool *local_resched) {
+ DEBUG_ASSERT(spin_lock_held(&thread_lock));
+
+ if (pri > HIGHEST_PRIORITY)
+ pri = HIGHEST_PRIORITY;
+
+ // if we're setting it to something real and it's less than the current, skip
+ if (pri >= 0 && pri <= t->inheirited_priority)
+ return;
+
+ // adjust the priority and remember the old value
+ t->inheirited_priority = pri;
+ int old_ep = t->effec_priority;
+ compute_effec_priority(t);
+ if (old_ep == t->effec_priority) {
+ // same effective priority, nothing to do
+ return;
+ }
+
+ // see if we need to do something based on the state of the thread
+ cpu_mask_t accum_cpu_mask = 0;
+ switch (t->state) {
+ case THREAD_RUNNING:
+ if (t->effec_priority < old_ep) {
+ // we're currently running and dropped our effective priority, might want to resched
+ if (t == get_current_thread()) {
+ *local_resched |= true;
+ } else {
+ accum_cpu_mask = cpu_num_to_mask(t->curr_cpu);
+ }
+ }
+ break;
+ case THREAD_READY:
+ // it's sitting in a run queue somewhere, remove and add back to the proper queue on that cpu
+ DEBUG_ASSERT_MSG(list_in_list(&t->queue_node), "thread %p name %s curr_cpu %u\n", t, t->name, t->curr_cpu);
+ list_delete(&t->queue_node);
+
+ DEBUG_ASSERT(is_valid_cpu_num(t->curr_cpu));
+
+ struct percpu* c = &percpu[t->curr_cpu];
+ if (list_is_empty(&c->run_queue[old_ep])) {
+ c->run_queue_bitmap &= ~(1u << old_ep);
+ }
+
+ if (t->effec_priority > old_ep) {
+ insert_in_run_queue_head(t->curr_cpu, t);
+ if (t->curr_cpu == arch_curr_cpu_num()) {
+ *local_resched |= true;
+ } else {
+ accum_cpu_mask = cpu_num_to_mask(t->curr_cpu);
+ }
+ } else {
+ insert_in_run_queue_tail(t->curr_cpu, t);
+ }
+
+ break;
+ default:
+ // the other states do not matter, exit
+ return;
+ }
+
+ // send some ipis based on the previous code
+ if (accum_cpu_mask) {
+ mp_reschedule(MP_IPI_TARGET_MASK, accum_cpu_mask, 0);
+ }
+}
+
/* preemption timer that is set whenever a thread is scheduled */
static enum handler_return sched_timer_tick(struct timer* t, zx_time_t now, void* arg) {
/* if the preemption timer went off on the idle or a real time thread, ignore it */
@@ -692,10 +769,13 @@
/* set some optional target debug leds */
target_set_debug_led(0, !thread_is_idle(newthread));
- TRACE_CONTEXT_SWITCH("cpu %u, old %p (%s, pri %d:%d, flags 0x%x), new %p (%s, pri %d:%d, flags 0x%x)\n",
- cpu, oldthread, oldthread->name, oldthread->base_priority, oldthread->priority_boost,
+ TRACE_CONTEXT_SWITCH("cpu %u old %p (%s, pri %d [%d:%d], flags 0x%x) "
+ "new %p (%s, pri %d [%d:%d], flags 0x%x)\n",
+ cpu, oldthread, oldthread->name, oldthread->effec_priority,
+ oldthread->base_priority, oldthread->priority_boost,
oldthread->flags, newthread, newthread->name,
- newthread->base_priority, newthread->priority_boost, newthread->flags);
+ newthread->effec_priority, newthread->base_priority,
+ newthread->priority_boost, newthread->flags);
/* check that the old thread has not blown its stack just before pushing its context */
if (THREAD_STACK_BOUNDS_CHECK &&
diff --git a/kernel/kernel/thread.c b/kernel/kernel/thread.c
index 121ede0..6612c32 100644
--- a/kernel/kernel/thread.c
+++ b/kernel/kernel/thread.c
@@ -122,8 +122,6 @@
t->entry = entry;
t->arg = arg;
- t->base_priority = priority;
- t->priority_boost = 0;
t->state = THREAD_INITIAL;
t->signals = 0;
t->blocking_wait_queue = NULL;
@@ -136,6 +134,8 @@
t->retcode = 0;
wait_queue_init(&t->retcode_wait_queue);
+ sched_init_thread(t, priority);
+
/* create the stack */
if (!stack) {
if (THREAD_STACK_BOUNDS_CHECK) {
@@ -1168,10 +1168,11 @@
if (full_dump) {
dprintf(INFO, "dump_thread: t %p (%s:%s)\n", t, oname, t->name);
- dprintf(INFO, "\tstate %s, curr/last cpu %d/%d, cpu_affinity %#x, priority %d:%d, "
+ dprintf(INFO, "\tstate %s, curr/last cpu %d/%d, cpu_affinity %#x, priority %d [%d:%d,%d], "
"remaining time slice %" PRIu64 "\n",
- thread_state_to_str(t->state), (int)t->curr_cpu, (int)t->last_cpu, t->cpu_affinity, t->base_priority,
- t->priority_boost, t->remaining_time_slice);
+ thread_state_to_str(t->state), (int)t->curr_cpu, (int)t->last_cpu, t->cpu_affinity,
+ t->effec_priority, t->base_priority,
+ t->priority_boost, t->inheirited_priority, t->remaining_time_slice);
dprintf(INFO, "\truntime_ns %" PRIu64 ", runtime_s %" PRIu64 "\n",
runtime, runtime / 1000000000);
dprintf(INFO, "\tstack %p, stack_size %zu\n", t->stack, t->stack_size);
@@ -1182,15 +1183,16 @@
(t->flags & THREAD_FLAG_REAL_TIME) ? "Rt" : "",
(t->flags & THREAD_FLAG_IDLE) ? "Id" : "",
(t->flags & THREAD_FLAG_DEBUG_STACK_BOUNDS_CHECK) ? "Sc" : "");
- dprintf(INFO, "\twait queue %p, blocked_status %d, interruptable %d\n",
- t->blocking_wait_queue, t->blocked_status, t->interruptable);
+ dprintf(INFO, "\twait queue %p, blocked_status %d, interruptable %d, mutexes held %d\n",
+ t->blocking_wait_queue, t->blocked_status, t->interruptable, t->mutexes_held);
dprintf(INFO, "\taspace %p\n", t->aspace);
dprintf(INFO, "\tuser_thread %p, pid %" PRIu64 ", tid %" PRIu64 "\n",
t->user_thread, t->user_pid, t->user_tid);
arch_dump_thread(t);
} else {
- printf("thr %p st %4s pri %2d:%d pid %" PRIu64 " tid %" PRIu64 " (%s:%s)\n",
- t, thread_state_to_str(t->state), t->base_priority, t->priority_boost, t->user_pid,
+ printf("thr %p st %4s m %d pri %2d [%d:%d,%d] pid %" PRIu64 " tid %" PRIu64 " (%s:%s)\n",
+ t, thread_state_to_str(t->state), t->mutexes_held, t->effec_priority, t->base_priority,
+ t->priority_boost, t->inheirited_priority, t->user_pid,
t->user_tid, oname, t->name);
}
}
diff --git a/kernel/kernel/wait.c b/kernel/kernel/wait.c
index c8bda5b..f1b59d9 100644
--- a/kernel/kernel/wait.c
+++ b/kernel/kernel/wait.c
@@ -48,17 +48,17 @@
list_initialize(&t->queue_node);
list_add_head(&wait->heads, &t->wait_queue_heads_node);
} else {
- int pri = t->base_priority; // TODO: get the effective priority from sched.c
+ int pri = t->effec_priority;
// walk through the sorted list of wait queue heads
thread_t* temp;
list_for_every_entry(&wait->heads, temp, thread_t, wait_queue_heads_node) {
- if (pri > temp->base_priority) {
+ if (pri > temp->effec_priority) {
// insert ourself here as a new queue head
list_initialize(&t->queue_node);
list_add_before(&temp->wait_queue_heads_node, &t->wait_queue_heads_node);
return;
- } else if (temp->base_priority == pri) {
+ } else if (temp->effec_priority == pri) {
// same priority, add ourself to the tail of this queue
list_add_tail(&temp->queue_node, &t->queue_node);
list_clear_node(&t->wait_queue_heads_node);
@@ -114,6 +114,14 @@
}
}
+int wait_queue_blocked_priority(wait_queue_t* wait) {
+ thread_t* t = list_peek_head_type(&wait->heads, thread_t, wait_queue_heads_node);
+ if (!t)
+ return -1;
+
+ return t->effec_priority;
+}
+
static zx_status_t wait_queue_block_worker(wait_queue_t* wait, zx_time_t deadline,
uint signal_mask) {
timer_t timer;