| // Copyright 2018 The Fuchsia Authors |
| // |
| // Use of this source code is governed by a MIT-style |
| // license that can be found in the LICENSE file or at |
| // https://opensource.org/licenses/MIT |
| #ifndef ZIRCON_KERNEL_INCLUDE_KERNEL_SCHEDULER_H_ |
| #define ZIRCON_KERNEL_INCLUDE_KERNEL_SCHEDULER_H_ |
| |
| #include <platform.h> |
| #include <stdint.h> |
| #include <zircon/syscalls/scheduler.h> |
| #include <zircon/types.h> |
| |
| #include <fbl/function.h> |
| #include <fbl/intrusive_pointer_traits.h> |
| #include <fbl/intrusive_wavl_tree.h> |
| #include <ffl/fixed.h> |
| #include <kernel/sched.h> |
| #include <kernel/scheduler_state.h> |
| #include <kernel/thread.h> |
| #include <kernel/wait.h> |
| #include <lib/relaxed_atomic.h> |
| |
| // Forward declaration. |
| struct percpu; |
| |
| // Implements fair and deadline scheduling algorithms and manages the associated |
| // per-CPU state. |
| class Scheduler { |
| public: |
| // Default minimum granularity of time slices. |
| static constexpr SchedDuration kDefaultMinimumGranularity = SchedUs(750); |
| |
| // Default target latency for a scheduling period. |
| static constexpr SchedDuration kDefaultTargetLatency = SchedMs(16); |
| |
| // Default peak latency for a scheduling period. |
| static constexpr SchedDuration kDefaultPeakLatency = SchedMs(24); |
| |
| static_assert(kDefaultPeakLatency >= kDefaultTargetLatency); |
| |
| // The adjustment rate of the exponential moving average tracking the expected |
| // runtime of each thread. |
| static constexpr size_t kExpectedRuntimeAdjustmentRateShift = 8; |
| |
| Scheduler() = default; |
| ~Scheduler() = default; |
| |
| Scheduler(const Scheduler&) = delete; |
| Scheduler& operator=(const Scheduler&) = delete; |
| |
| // Accessors for total weight and number of runnable tasks. |
| SchedWeight GetTotalWeight() const TA_EXCL(thread_lock); |
| size_t GetRunnableTasks() const TA_EXCL(thread_lock); |
| |
| // Dumps the state of the run queue to the debug log. |
| void Dump() TA_REQ(thread_lock); |
| |
| // Returns the number of the CPU this scheduler instance is associated with. |
| cpu_num_t this_cpu() const { return this_cpu_; } |
| |
| zx_duration_t predicted_queue_time_ns() const { |
| return total_expected_runtime_ns_.load().raw_value(); |
| } |
| |
| private: |
| // Allow percpu to init our cpu number. |
| friend struct percpu; |
| |
| // Befriend the sched API wrappers to enable calling the static methods |
| // below. |
| friend void sched_init_thread(Thread* thread, int priority); |
| friend void sched_block(); |
| friend void sched_yield(); |
| friend void sched_preempt(); |
| friend void sched_reschedule(); |
| friend void sched_resched_internal(); |
| friend void sched_unblock_idle(Thread* thread); |
| friend void sched_migrate(Thread* thread); |
| friend void sched_inherit_priority(Thread* thread, int priority, bool* local_reschedule, |
| cpu_mask_t* cpus_to_reschedule_mask); |
| friend void sched_change_priority(Thread* thread, int priority); |
| friend void sched_change_deadline(Thread* thread, const zx_sched_deadline_params_t& params); |
| friend bool sched_unblock(Thread* t); |
| friend bool sched_unblock_list(struct list_node* list); |
| friend void sched_transition_off_cpu(); |
| friend void sched_preempt_timer_tick(zx_time_t now); |
| |
| // Static scheduler methods called by the wrapper API above. |
| static void InitializeThread(Thread* thread, int priority); |
| static void InitializeThread(Thread* thread, const zx_sched_deadline_params_t& params); |
| static void Block() TA_REQ(thread_lock); |
| static void Yield() TA_REQ(thread_lock); |
| static void Preempt() TA_REQ(thread_lock); |
| static void Reschedule() TA_REQ(thread_lock); |
| static void RescheduleInternal() TA_REQ(thread_lock); |
| static bool Unblock(Thread* thread) __WARN_UNUSED_RESULT TA_REQ(thread_lock); |
| static bool Unblock(list_node* thread_list) __WARN_UNUSED_RESULT TA_REQ(thread_lock); |
| static void UnblockIdle(Thread* idle_thread) TA_REQ(thread_lock); |
| static void Migrate(Thread* thread) TA_REQ(thread_lock); |
| static void MigrateUnpinnedThreads() TA_REQ(thread_lock); |
| static void ChangeWeight(Thread* thread, int priority, cpu_mask_t* cpus_to_reschedule_mask) |
| TA_REQ(thread_lock); |
| static void ChangeDeadline(Thread* thread, const SchedDeadlineParams& params, |
| cpu_mask_t* cpus_to_reschedule_mask) TA_REQ(thread_lock); |
| static void InheritWeight(Thread* thread, int priority, cpu_mask_t* cpus_to_reschedule_mask) |
| TA_REQ(thread_lock); |
| static void TimerTick(SchedTime now); |
| |
| // Specifies how to place a thread in the virtual timeline and run queue. |
| enum class Placement { |
| // Selects a place in the queue based on the current insertion time and |
| // thread weight or deadline. |
| Insertion, |
| |
| // Selects a place in the queue based on the original insertion time and |
| // the updated (inherited or changed) weight or deadline. |
| Adjustment, |
| |
| // Selects a place in the queue based on the original insertion time and |
| // the updated time slice due to being preempted by another thread. |
| Preemption, |
| }; |
| |
| // Returns the current system time as a SchedTime value. |
| static SchedTime CurrentTime() { return SchedTime{current_time()}; } |
| |
| // Returns the Scheduler instance for the current CPU. |
| static Scheduler* Get(); |
| |
| // Returns the Scheduler instance for the given CPU. |
| static Scheduler* Get(cpu_num_t cpu); |
| |
| // Returns a CPU to run the given thread on. |
| static cpu_num_t FindTargetCpu(Thread* thread) TA_REQ(thread_lock); |
| |
| // Updates the system load metrics. |
| void UpdateCounters(SchedDuration queue_time_ns) TA_REQ(thread_lock); |
| |
| // Updates the thread's weight and updates state-dependent bookkeeping. |
| static void UpdateWeightCommon(Thread* thread, int original_priority, SchedWeight weight, |
| cpu_mask_t* cpus_to_reschedule_mask, PropagatePI propagate) |
| TA_REQ(thread_lock); |
| |
| // Updates the thread's deadline and updates state-dependent bookkeeping. |
| static void UpdateDeadlineCommon(Thread* thread, int original_priority, |
| const SchedDeadlineParams& params, |
| cpu_mask_t* cpus_to_reschedule_mask, PropagatePI propagate) |
| TA_REQ(thread_lock); |
| |
| using EndTraceCallback = fbl::InlineFunction<void(), sizeof(void*)>; |
| |
| // Common logic for reschedule API. |
| void RescheduleCommon(SchedTime now, EndTraceCallback end_outer_trace = nullptr) |
| TA_REQ(thread_lock); |
| |
| // Evaluates the schedule and returns the thread that should execute, |
| // updating the run queue as necessary. |
| Thread* EvaluateNextThread(SchedTime now, Thread* current_thread, bool timeslice_expired, |
| SchedDuration total_runtime_ns) TA_REQ(thread_lock); |
| |
| // Adds a thread to the run queue tree. The thread must be active on this |
| // CPU. |
| void QueueThread(Thread* thread, Placement placement, SchedTime now = SchedTime{0}, |
| SchedDuration total_runtime_ns = SchedDuration{0}) TA_REQ(thread_lock); |
| |
| // Removes the thread at the head of the first eligible run queue. |
| Thread* DequeueThread(SchedTime now) TA_REQ(thread_lock); |
| |
| // Removes the thread at the head of the fair run queue and returns it. |
| Thread* DequeueFairThread() TA_REQ(thread_lock); |
| |
| // Removes the eligible thread with the earliest deadline in the deadline run |
| // queue and returns it. |
| Thread* DequeueDeadlineThread(SchedTime eligible_time) TA_REQ(thread_lock); |
| |
| // Returns the eligible thread in the run queue with a deadline earlier than |
| // the given deadline, or nullptr if one does not exist. |
| Thread* FindEarlierDeadlineThread(SchedTime eligible_time, SchedTime finish_time) |
| TA_REQ(thread_lock); |
| |
| // Removes the eligible thread with a deadline earlier than the given deadline |
| // and returns it or nullptr if one does not exist. |
| Thread* DequeueEarlierDeadlineThread(SchedTime eligible_time, SchedTime finish_time) |
| TA_REQ(thread_lock); |
| |
| // Returns the time that the next deadline task will become eligible or infinite |
| // if there are no ready deadline tasks. |
| SchedTime GetNextEligibleTime() TA_REQ(thread_lock); |
| |
| // Calculates the timeslice of the thread based on the current run queue |
| // state. |
| SchedDuration CalculateTimeslice(Thread* thread) TA_REQ(thread_lock); |
| |
| // Returns the completion time clamped to the start of the earliest deadline |
| // thread that will become eligible in that time frame. |
| SchedTime ClampToDeadline(SchedTime completion_time) TA_REQ(thread_lock); |
| |
| // Returns the completion time clamped to the start of the earliest deadline |
| // thread that will become eligible in that time frame and also has an earlier |
| // deadline than the given finish time. |
| SchedTime ClampToEarlierDeadline(SchedTime completion_time, SchedTime finish_time) |
| TA_REQ(thread_lock); |
| |
| // Updates the timeslice of the thread based on the current run queue state. |
| // Returns the absolute deadline for the next time slice, which may be earlier |
| // than the completion of the time slice if other threads could preempt the |
| // given thread before the time slice is exhausted. |
| SchedTime NextThreadTimeslice(Thread* thread, SchedTime now) TA_REQ(thread_lock); |
| |
| // Updates the scheduling period based on the number of active threads. |
| void UpdatePeriod() TA_REQ(thread_lock); |
| |
| // Updates the global virtual timeline. |
| void UpdateTimeline(SchedTime now) TA_REQ(thread_lock); |
| |
| // Makes a thread active on this CPU's scheduler and inserts it into the |
| // run queue tree. |
| void Insert(SchedTime now, Thread* thread) TA_REQ(thread_lock); |
| |
| // Removes the thread from this CPU's scheduler. The thread must not be in |
| // the run queue tree. |
| void Remove(Thread* thread) TA_REQ(thread_lock); |
| |
| // Returns true if there is at least one eligible deadline thread in the |
| // run queue. |
| inline bool IsDeadlineThreadEligible(SchedTime eligible_time) TA_REQ(thread_lock) { |
| return !deadline_run_queue_.is_empty() && |
| deadline_run_queue_.front().scheduler_state_.start_time_ <= eligible_time; |
| } |
| |
| // Traits type to adapt the WAVLTree to Thread with node state in the |
| // scheduler_state member. |
| struct TaskTraits { |
| using KeyType = SchedulerState::KeyType; |
| static KeyType GetKey(const Thread& thread) { return thread.scheduler_state_.key(); } |
| static bool LessThan(KeyType a, KeyType b) { return a < b; } |
| static bool EqualTo(KeyType a, KeyType b) { return a == b; } |
| static auto& node_state(Thread& thread) { return thread.scheduler_state_.run_queue_node_; } |
| }; |
| |
| // Observer that maintains the subtree invariant min_finish_time as nodes are |
| // added to and removed from the run queue. |
| struct SubtreeMinObserver { |
| template <typename Iter> |
| static void RecordInsert(Iter node); |
| template <typename Iter> |
| static void RecordInsertCollision(Thread* node, Iter collision); |
| template <typename Iter> |
| static void RecordInsertReplace(Iter node, Thread* replacement); |
| template <typename Iter> |
| static void RecordInsertTraverse(Thread* node, Iter ancestor); |
| template <typename Iter> |
| static void RecordRotation(Iter pivot, Iter lr_child, Iter rl_child, Iter parent, Iter sibling); |
| template <typename Iter> |
| static void RecordErase(Thread* node, Iter invalidated); |
| |
| static void RecordInsertPromote() {} |
| static void RecordInsertRotation() {} |
| static void RecordInsertDoubleRotation() {} |
| static void RecordEraseDemote() {} |
| static void RecordEraseRotation() {} |
| static void RecordEraseDoubleRotation() {} |
| }; |
| |
| // Alias of the WAVLTree type for the run queue. |
| using RunQueue = fbl::WAVLTree<SchedTime, Thread*, TaskTraits, TaskTraits, fbl::DefaultObjectTag, |
| SubtreeMinObserver>; |
| |
| // Finds the next eligible thread in the given run queue. |
| static Thread* FindEarliestEligibleThread(RunQueue* run_queue, SchedTime eligible_time) |
| TA_REQ(thread_lock); |
| |
| // Returns the run queue for the given thread's scheduling discipline. |
| inline RunQueue& GetRunQueue(Thread* thread) { |
| return thread->scheduler_state_.discipline() == SchedDiscipline::Fair ? fair_run_queue_ |
| : deadline_run_queue_; |
| } |
| |
| // The run queue of fair scheduled threads ready to run, but not currently running. |
| TA_GUARDED(thread_lock) |
| RunQueue fair_run_queue_; |
| |
| // The run queue of deadline scheduled threads ready to run, but not currently running. |
| TA_GUARDED(thread_lock) |
| RunQueue deadline_run_queue_; |
| |
| // Pointer to the thread actively running on this CPU. |
| TA_GUARDED(thread_lock) |
| Thread* active_thread_{nullptr}; |
| |
| // Monotonically increasing counter to break ties when queuing tasks with |
| // the same key. This has the effect of placing newly queued tasks behind |
| // already queued tasks with the same key. This is also necessary to |
| // guarantee uniqueness of the key as required by the WAVLTree container. |
| TA_GUARDED(thread_lock) |
| uint64_t generation_count_{0}; |
| |
| // Count of the fair threads running on this CPU, including threads in the run |
| // queue and the currently running thread. Does not include the idle thread. |
| TA_GUARDED(thread_lock) |
| int32_t runnable_fair_task_count_{0}; |
| |
| // Count of the deadline threads running on this CPU, including threads in the |
| // run queue and the currently running thread. Does not include the idle |
| // thread. |
| TA_GUARDED(thread_lock) |
| int32_t runnable_deadline_task_count_{0}; |
| |
| // Total weights of threads running on this CPU, including threads in the |
| // run queue and the currently running thread. Does not include the idle |
| // thread. |
| TA_GUARDED(thread_lock) |
| SchedWeight weight_total_{0}; |
| |
| // The value of |weight_total_| when the current thread was scheduled. |
| // Provides a reference for determining whether the total weights changed |
| // since the last reschedule. |
| TA_GUARDED(thread_lock) |
| SchedWeight scheduled_weight_total_{0}; |
| |
| // The global virtual time of this run queue. |
| TA_GUARDED(thread_lock) |
| SchedTime virtual_time_{0}; |
| |
| // The system time since the last update to the global virtual time. |
| TA_GUARDED(thread_lock) |
| SchedTime last_update_time_ns_{0}; |
| |
| // The system time that the current time slice started. |
| TA_GUARDED(thread_lock) |
| SchedTime start_of_current_time_slice_ns_{0}; |
| |
| // The system time that the current thread should be preempted. |
| TA_GUARDED(thread_lock) |
| SchedTime absolute_deadline_ns_{0}; |
| |
| // The sum of the expected runtimes of all active threads on this CPU. This |
| // value is an estimate of the average queuimg time for this CPU, given the |
| // current set of active threads. |
| RelaxedAtomic<SchedDuration> total_expected_runtime_ns_{SchedNs(0)}; |
| |
| // The sum of the worst case utilization of all active deadline threads on |
| // this CPU. |
| TA_GUARDED(thread_lock) |
| SchedUtilization total_deadline_utilization_{0}; |
| |
| // Scheduling period in which every runnable task executes once in units of |
| // minimum granularity. |
| TA_GUARDED(thread_lock) |
| SchedDuration scheduling_period_grans_{kDefaultTargetLatency / kDefaultMinimumGranularity}; |
| |
| // The smallest timeslice a thread is allocated in a single round. |
| TA_GUARDED(thread_lock) |
| SchedDuration minimum_granularity_ns_{kDefaultMinimumGranularity}; |
| |
| // The target scheduling period. The scheduling period is set to this value |
| // when the number of tasks is low enough for the sum of all timeslices to |
| // fit within this duration. This has the effect of increasing the size of |
| // the timeslices under nominal load to reduce scheduling overhead. |
| TA_GUARDED(thread_lock) |
| SchedDuration target_latency_grans_{kDefaultTargetLatency / kDefaultMinimumGranularity}; |
| |
| // The scheduling period threshold over which the CPU is considered |
| // oversubscribed. |
| TA_GUARDED(thread_lock) |
| SchedDuration peak_latency_grans_{kDefaultPeakLatency / kDefaultMinimumGranularity}; |
| |
| // The CPU this scheduler instance is associated with. |
| // NOTE: This member is not initialized to prevent clobbering the value set |
| // by sched_early_init(), which is called before the global ctors that |
| // initialize the rest of the members of this class. |
| // TODO(eieio): Figure out a better long-term solution to determine which |
| // CPU is associated with each instance of this class. This is needed by |
| // non-static methods that are called from arbitrary CPUs, namely Insert(). |
| cpu_num_t this_cpu_; |
| }; |
| |
| #endif // ZIRCON_KERNEL_INCLUDE_KERNEL_SCHEDULER_H_ |