[kernel][sched] Tweak fair priority curve and preemption / latency. - Switch to an exponential priority-to-weight curve to yield a ~65% bandwidth increase to priority 24 (high priority) over priority 16 (default priority), and a constant ~10% difference between adjacent priorities. - Tweak the bandwidth adjustment to use the start of the current time slice when changing the task rate so that preemptions happen faster when high priority work is available. - Adjust the target latency to provide a time slice more similar to the previous scheduler when a high priority thread competes with lower priority threads. - Minor refactor to remove weight-to-priority conversions, which would require a fixed-point logarithm to compute the inverse of the exponential term. - Optimize the priority-to-weight conversion using a constant table. Bug: MTWN-269 Bug: ZX-3504 Test: Detailed tracing of synthetic workload; observe ratios of bandwidth between priority 16 and priority 24 tasks. Change-Id: I596c45cf3d8c94f93c986508f9ec1007624ac9f3

commit: 6af83dfd47369cf7aa4fd86973dd6c7fdcd32713 [log] [tgz]
author: Corey Tabaka <eieio@google.com> Thu Apr 25 09:00:11 2019 +0000
committer: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org> Thu Apr 25 09:00:11 2019 +0000
tree: 3a20d74b8dfe7730bcac391da59a03ae96da1004
parent: 1f889682b01c4442e3336ff81e8805c6741101b7 [diff]
diff --git a/zircon/kernel/include/kernel/fair_scheduler.h b/zircon/kernel/include/kernel/fair_scheduler.h
index 83cebd4..fdead45 100644
--- a/zircon/kernel/include/kernel/fair_scheduler.h
+++ b/zircon/kernel/include/kernel/fair_scheduler.h

@@ -34,10 +34,12 @@
     static constexpr SchedDuration kDefaultMinimumGranularity = SchedUs(750);
 
     // Default target latency for a scheduling period.
-    static constexpr SchedDuration kDefaultTargetLatency = SchedMs(6);
+    static constexpr SchedDuration kDefaultTargetLatency = SchedMs(16);
 
     // Default peak latency for a scheduling period.
-    static constexpr SchedDuration kDefaultPeakLatency = SchedMs(10);
+    static constexpr SchedDuration kDefaultPeakLatency = SchedMs(24);
+
+    static_assert(kDefaultPeakLatency >= kDefaultTargetLatency);
 
     FairScheduler() = default;
     ~FairScheduler() = default;
@@ -80,7 +82,7 @@
     friend void sched_preempt_timer_tick(zx_time_t now);
 
     // Static scheduler methods called by the wrapper API above.
-    static void InitializeThread(thread_t* thread, SchedWeight weight);
+    static void InitializeThread(thread_t* thread, int priority);
     static void Block() TA_REQ(thread_lock);
     static void Yield() TA_REQ(thread_lock);
     static void Preempt() TA_REQ(thread_lock);
@@ -91,9 +93,9 @@
     static void UnblockIdle(thread_t* idle_thread) TA_REQ(thread_lock);
     static void Migrate(thread_t* thread) TA_REQ(thread_lock);
     static void MigrateUnpinnedThreads(cpu_num_t current_cpu) TA_REQ(thread_lock);
-    static void ChangeWeight(thread_t* thread, SchedWeight weight,
+    static void ChangeWeight(thread_t* thread, int priority,
                              cpu_mask_t* cpus_to_reschedule_mask) TA_REQ(thread_lock);
-    static void InheritWeight(thread_t* thread, SchedWeight weight,
+    static void InheritWeight(thread_t* thread, int priority,
                               cpu_mask_t* cpus_to_reschedule_mask) TA_REQ(thread_lock);
     static void TimerTick(SchedTime now);
 
@@ -129,7 +131,6 @@
                                    cpu_mask_t* cpus_to_reschedule_mask,
                                    PropagatePI propagate) TA_REQ(thread_lock);
 
-
     // Common logic for reschedule API.
     void RescheduleCommon(SchedTime now) TA_REQ(thread_lock);
 

diff --git a/zircon/kernel/include/kernel/fair_task_state.h b/zircon/kernel/include/kernel/fair_task_state.h
index 5c2adeb..304137e 100644
--- a/zircon/kernel/include/kernel/fair_task_state.h
+++ b/zircon/kernel/include/kernel/fair_task_state.h

@@ -17,16 +17,21 @@
 // Forward declaration.
 typedef struct thread thread_t;
 
-// Fixed-point task weight/priority. The 5bit fractional component supports 32
-// priority levels (1/32 through 32/32), while the 26bit integer component
-// supports sums of ~64M threads with weight 1.0.
+// Fixed-point task weight.
 //
-// Weights should not be negative however, the value is signed for consistency
+// The 16bit fractional component accommodates the exponential curve defining
+// the priority-to-weight relation:
+//
+//      Weight = 1.225^(Priority - 31)
+//
+// This yields roughly 10% bandwidth difference between adjacent priorities.
+//
+// Weights should not be negative, however, the value is signed for consistency
 // with zx_time_t (SchedTime) and zx_duration_t (SchedDuration), which are the
 // primary types used in conjunction with SchedWeight. This is to make it less
 // likely that expressions involving weights are accidentally promoted to
 // unsigned.
-using SchedWeight = ffl::Fixed<int32_t, 5>;
+using SchedWeight = ffl::Fixed<int64_t, 16>;
 
 // Fixed-point types wrapping time and duration types to make time expressions
 // cleaner in the scheduler code.

diff --git a/zircon/kernel/kernel/debug.cpp b/zircon/kernel/kernel/debug.cpp
index 055c678..ea6db11 100644
--- a/zircon/kernel/kernel/debug.cpp
+++ b/zircon/kernel/kernel/debug.cpp

@@ -137,7 +137,7 @@
         printf("\ttimer interrupts: %lu\n", percpu.stats.timer_ints);
         printf("\ttimers: %lu\n", percpu.stats.timers);
 #if WITH_FAIR_SCHEDULER
-        printf("\ttotal weight: %d\n", percpu.fair_runqueue.GetTotalWeight().raw_value());
+        printf("\ttotal weight: %" PRIi64 "\n", percpu.fair_runqueue.GetTotalWeight().raw_value());
         printf("\trunnable tasks: %zu\n", percpu.fair_runqueue.GetRunnableTasks());
 #endif
     }

diff --git a/zircon/kernel/kernel/fair_scheduler.cpp b/zircon/kernel/kernel/fair_scheduler.cpp
index 62ab612..6a608f2 100644
--- a/zircon/kernel/kernel/fair_scheduler.cpp
+++ b/zircon/kernel/kernel/fair_scheduler.cpp

@@ -64,20 +64,34 @@
 
 namespace {
 
-// Converts from kernel priority value in the range [0, 31] to weight.
+// Conversion table entry. Scales the integer argument to a fixed-point weight
+// in the interval (0.0, 1.0].
+struct WeightTableEntry {
+    constexpr WeightTableEntry(int64_t value)
+        : value{FromRatio<int64_t>(value, SchedWeight::Format::Power)} {}
+    constexpr operator SchedWeight() const { return value; }
+    const SchedWeight value;
+};
+
+// Table of fixed-point constants converting from kernel priority to fair
+// scheduler weight.
+constexpr WeightTableEntry kPriorityToWeightTable[] = {
+    121, 149, 182, 223, 273, 335, 410, 503,
+    616, 754, 924, 1132, 1386, 1698, 2080, 2549,
+    3122, 3825, 4685, 5739, 7030, 8612, 10550, 12924,
+    15832, 19394, 23757, 29103, 35651, 43672, 53499, 65536};
+
+// Converts from kernel priority value in the interval [0, 31] to weight in the
+// interval (0.0, 1.0]. See the definition of SchedWeight for an explanation of
+// the weight distribution.
 constexpr SchedWeight PriorityToWeight(int priority) {
-    return FromRatio(priority + 1, NUM_PRIORITIES);
+    return kPriorityToWeightTable[priority];
 }
 
 // The minimum possible weight and its reciprocal.
 constexpr SchedWeight kMinWeight = PriorityToWeight(LOWEST_PRIORITY);
 constexpr SchedWeight kReciprocalMinWeight = 1 / kMinWeight;
 
-// Converts from weight to kernel priority value in the range [0, 31].
-constexpr int WeightToPriority(SchedWeight weight) {
-    return Round<int>(kReciprocalMinWeight * weight) - 1;
-}
-
 // Utility operator to make expressions more succinct that update thread times
 // and durations of basic types using the fixed-point counterparts.
 constexpr zx_time_t& operator+=(zx_time_t& value, SchedDuration delta) {
@@ -168,10 +182,10 @@
     return &percpu::Get(cpu).fair_runqueue;
 }
 
-void FairScheduler::InitializeThread(thread_t* thread, SchedWeight weight) {
-    new (&thread->fair_task_state) FairTaskState{weight};
-    thread->base_priority = WeightToPriority(weight);
-    thread->effec_priority = WeightToPriority(weight);
+void FairScheduler::InitializeThread(thread_t* thread, int priority) {
+    new (&thread->fair_task_state) FairTaskState{PriorityToWeight(priority)};
+    thread->base_priority = priority;
+    thread->effec_priority = priority;
     thread->inherited_priority = -1;
     thread->priority_boost = 0;
 }
@@ -322,7 +336,7 @@
     // Adjust the rate of the current thread when demand changes. Changes in
     // demand could be due to threads entering or leaving the run queue, or due
     // to weights changing in the current or enqueued threads.
-    if (weight_total_ > 0 && weight_total_ != scheduled_weight_total_) {
+    if (weight_total_ > SchedWeight{0} && weight_total_ != scheduled_weight_total_) {
         LOCAL_KTRACE_DURATION trace_adjust_rate{"adjust_rate"_stringref};
         scheduled_weight_total_ = weight_total_;
 
@@ -333,7 +347,7 @@
         // Update the preemption timer if necessary.
         if (timeslice_changed && timeslice_remaining) {
             const SchedTime absolute_deadline_ns =
-                current_thread->last_started_running + time_slice_ns;
+                start_of_current_time_slice_ns_ + time_slice_ns;
             timer_preempt_reset(absolute_deadline_ns.raw_value());
         }
 
@@ -464,9 +478,15 @@
     FairTaskState* const state = &thread->fair_task_state;
 
     // Calculate the relative portion of the scheduling period.
-    const Fixed<int64_t, 5> time_slice_grans = scheduling_period_grans_ *
-                                               state->weight_ / weight_total_;
-    const SchedDuration time_slice_ns = time_slice_grans.Ceiling() *
+    const SchedWeight proportional_time_slice_grans = scheduling_period_grans_ *
+                                                      state->weight_ / weight_total_;
+
+    // Ensure that the time slice is at least the minimum granularity.
+    const int64_t time_slice_grans = Round<int64_t>(proportional_time_slice_grans);
+    const int64_t minimum_time_slice_grans = time_slice_grans > 0 ? time_slice_grans : 1;
+
+    // Calcluate the time slice in nanoseconds.
+    const SchedDuration time_slice_ns = minimum_time_slice_grans *
                                         minimum_granularity_ns_;
 
     trace.End(state->weight_.raw_value(), weight_total_.raw_value());
@@ -482,7 +502,6 @@
 
     FairTaskState* const state = &thread->fair_task_state;
     state->time_slice_ns_ = CalculateTimeslice(thread);
-    DEBUG_ASSERT(state->time_slice_ns_ >= minimum_granularity_ns_);
 
     SCHED_LTRACEF("name=%s weight_total=%#x weight=%#x time_slice_ns=%ld\n",
                   thread->name,
@@ -510,8 +529,8 @@
 
     const SchedDuration scheduling_period_ns = scheduling_period_grans_ *
                                                minimum_granularity_ns_;
-    const SchedDuration delta_norm = scheduling_period_ns /
-                                     (kReciprocalMinWeight * state->weight_);
+    const SchedWeight rate = kReciprocalMinWeight * state->weight_;
+    const SchedDuration delta_norm = scheduling_period_ns / rate;
     state->virtual_finish_time_ = state->virtual_start_time_ + delta_norm;
 
     DEBUG_ASSERT_MSG(state->virtual_start_time_ < state->virtual_finish_time_,
@@ -916,18 +935,15 @@
     }
 }
 
-void FairScheduler::ChangeWeight(thread_t* thread, SchedWeight weight,
+void FairScheduler::ChangeWeight(thread_t* thread, int priority,
                                  cpu_mask_t* cpus_to_reschedule_mask) {
     LOCAL_KTRACE_DURATION trace{"sched_change_weight"_stringref};
 
     DEBUG_ASSERT(spin_lock_held(&thread_lock));
-    SCHED_LTRACEF("thread={%s, %s} base=%d effective=%d inherited=%d "
-                  "current_weight=%d new_weight=%d\n",
+    SCHED_LTRACEF("thread={%s, %s} base=%d effective=%d inherited=%d\n",
                   thread->name, ToString(thread->state),
                   thread->base_priority, thread->effec_priority,
-                  thread->inherited_priority,
-                  WeightToPriority(thread->fair_task_state.weight()),
-                  WeightToPriority(weight));
+                  thread->inherited_priority);
 
     if (thread_is_idle(thread) || thread->state == THREAD_DEATH) {
         return;
@@ -938,20 +954,20 @@
     // deadline scheduler is available and remove this conversion once the
     // kernel uses the abstraction throughout.
     const int original_priority = thread->effec_priority;
-    thread->base_priority = WeightToPriority(weight);
+    thread->base_priority = priority;
     thread->priority_boost = 0;
 
     // Adjust the effective priority for inheritance, if necessary.
     if (thread->inherited_priority > thread->base_priority) {
         thread->effec_priority = thread->inherited_priority;
-        weight = PriorityToWeight(thread->effec_priority);
     } else {
         thread->effec_priority = thread->base_priority;
     }
 
     // Perform the state-specific updates if the effective priority changed.
     if (thread->effec_priority != original_priority) {
-        UpdateWeightCommon(thread, original_priority, weight,
+        UpdateWeightCommon(thread, original_priority,
+                           PriorityToWeight(thread->effec_priority),
                            cpus_to_reschedule_mask,
                            PropagatePI::Yes);
     }
@@ -959,20 +975,16 @@
     trace.End(original_priority, thread->effec_priority);
 }
 
-void FairScheduler::InheritWeight(thread_t* thread, SchedWeight weight,
+void FairScheduler::InheritWeight(thread_t* thread, int priority,
                                   cpu_mask_t* cpus_to_reschedule_mask) {
     LOCAL_KTRACE_DURATION trace{"sched_inherit_weight"_stringref};
 
     DEBUG_ASSERT(spin_lock_held(&thread_lock));
-    SCHED_LTRACEF("thread={%s, %s} base=%d effective=%d inherited=%d "
-                  "current_weight=%d new_weight=%d\n",
+    SCHED_LTRACEF("thread={%s, %s} base=%d effective=%d inherited=%d\n",
                   thread->name, ToString(thread->state),
                   thread->base_priority, thread->effec_priority,
-                  thread->inherited_priority,
-                  WeightToPriority(thread->fair_task_state.weight()),
-                  WeightToPriority(weight));
+                  thread->inherited_priority);
 
-    const int priority = WeightToPriority(weight);
     const int original_priority = thread->effec_priority;
     thread->inherited_priority = priority;
     thread->priority_boost = 0;
@@ -981,12 +993,12 @@
         thread->effec_priority = thread->inherited_priority;
     } else {
         thread->effec_priority = thread->base_priority;
-        weight = PriorityToWeight(thread->effec_priority);
     }
 
     // Perform the state-specific updates if the effective priority changed.
     if (thread->effec_priority != original_priority) {
-        UpdateWeightCommon(thread, original_priority, weight,
+        UpdateWeightCommon(thread, original_priority,
+                           PriorityToWeight(thread->effec_priority),
                            cpus_to_reschedule_mask,
                            PropagatePI::No);
     }
@@ -1002,7 +1014,7 @@
 // Temporary compatibility with the thread layer.
 
 void sched_init_thread(thread_t* thread, int priority) {
-    FairScheduler::InitializeThread(thread, PriorityToWeight(priority));
+    FairScheduler::InitializeThread(thread, priority);
 }
 
 void sched_block() {
@@ -1048,8 +1060,7 @@
 void sched_inherit_priority(thread_t* thread, int priority,
                             bool* local_reschedule,
                             cpu_mask_t* cpus_to_reschedule_mask) {
-    FairScheduler::InheritWeight(thread, PriorityToWeight(priority),
-                                 cpus_to_reschedule_mask);
+    FairScheduler::InheritWeight(thread, priority, cpus_to_reschedule_mask);
 
     const cpu_mask_t current_cpu_mask = cpu_num_to_mask(arch_curr_cpu_num());
     if (*cpus_to_reschedule_mask & current_cpu_mask) {
@@ -1059,8 +1070,7 @@
 
 void sched_change_priority(thread_t* thread, int priority) {
     cpu_mask_t cpus_to_reschedule_mask = 0;
-    FairScheduler::ChangeWeight(thread, PriorityToWeight(priority),
-                                &cpus_to_reschedule_mask);
+    FairScheduler::ChangeWeight(thread, priority, &cpus_to_reschedule_mask);
 
     const cpu_mask_t current_cpu_mask = cpu_num_to_mask(arch_curr_cpu_num());
     if (cpus_to_reschedule_mask & current_cpu_mask) {
commit	6af83dfd47369cf7aa4fd86973dd6c7fdcd32713	[log] [tgz]
author	Corey Tabaka <eieio@google.com>	Thu Apr 25 09:00:11 2019 +0000
committer	CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>	Thu Apr 25 09:00:11 2019 +0000
tree	3a20d74b8dfe7730bcac391da59a03ae96da1004
parent	1f889682b01c4442e3336ff81e8805c6741101b7 [diff]