| // Copyright 2020 The Fuchsia Authors |
| // |
| // Use of this source code is governed by a MIT-style |
| // license that can be found in the LICENSE file or at |
| // https://opensource.org/licenses/MIT |
| |
| #ifndef ZIRCON_KERNEL_VM_INCLUDE_VM_PAGE_QUEUES_H_ |
| #define ZIRCON_KERNEL_VM_INCLUDE_VM_PAGE_QUEUES_H_ |
| |
| #include <lib/fitx/result.h> |
| #include <sys/types.h> |
| #include <zircon/listnode.h> |
| |
| #include <fbl/algorithm.h> |
| #include <fbl/macros.h> |
| #include <kernel/event.h> |
| #include <kernel/lockdep.h> |
| #include <kernel/mutex.h> |
| #include <kernel/semaphore.h> |
| #include <ktl/array.h> |
| #include <ktl/optional.h> |
| #include <vm/page.h> |
| |
| class VmCowPages; |
| class VmCowPagesContainer; |
| |
| // Allocated pages that are part of the cow pages in a VmObjectPaged can be placed in a page queue. |
| // The page queues provide a way to |
| // * Classify and group pages across VMO boundaries |
| // * Retrieve the VMO that a page is contained in (via a back reference stored in the vm_page_t) |
| // Once a page has been placed in a page queue its queue_node becomes owned by the page queue and |
| // must not be used until the page has been Remove'd. It is not sufficient to call list_delete on |
| // the queue_node yourself as this operation is not atomic and needs to be performed whilst holding |
| // the PageQueues::lock_. |
| class PageQueues { |
| public: |
| // The number of pager backed queues is slightly arbitrary, but to be useful you want at least 3 |
| // representing |
| // * Very new pages that you probably don't want to evict as doing so probably implies you are in |
| // swap death |
| // * Slightly old pages that could be evicted if needed |
| // * Very old pages that you'd be happy to evict |
| // With two active queues 8 page queues are used so that there is some fidelity of information in |
| // the inactive queues. Additional queues have reduced value as sufficiently old pages quickly |
| // become equivalently unlikely to be used in the future. |
| static constexpr size_t kNumPagerBacked = 8; |
| |
| // Two active queues are used to allow for better fidelity of active information. This prevents |
| // a race between aging once and needing to collect/harvest age information. |
| static constexpr size_t kNumActiveQueues = 2; |
| |
| static_assert(kNumPagerBacked > kNumActiveQueues, "Needs to be at least one non-active queue"); |
| |
| // In addition to active and inactive, we want to consider some of the queues as 'oldest' to |
| // provide an additional way to limit eviction. Presently the processing of the LRU queue to make |
| // room for aging is not integrated with the Evictor, and so will not trigger eviction, therefore |
| // to have a non-zero number of pages ever appear in an oldest queue for eviction the last two |
| // queues are considered the oldest. |
| static constexpr size_t kNumOldestQueues = 2; |
| static_assert(kNumOldestQueues + kNumActiveQueues <= kNumPagerBacked); |
| |
| static constexpr zx_duration_t kDefaultMinMruRotateTime = ZX_SEC(5); |
| static constexpr zx_duration_t kDefaultMaxMruRotateTime = ZX_SEC(5); |
| |
| // This is presently an arbitrary constant, since the min and max mru rotate time are currently |
| // fixed at the same value, meaning that the active ratio can not presently trigger, or prevent, |
| // aging. |
| static constexpr uint64_t kDefaultActiveRatioMultiplier = 0; |
| |
| PageQueues(); |
| ~PageQueues(); |
| |
| DISALLOW_COPY_ASSIGN_AND_MOVE(PageQueues); |
| |
| // This is a specialized version of MarkAccessed designed to be called during accessed harvesting. |
| // It does not update active/inactive counts, and this needs to be done separately once harvesting |
| // is complete. It is only permitted to call this in between BeginAccessScan and EndAccessScan |
| // calls. |
| void MarkAccessedDeferredCount(vm_page_t* page) { |
| // Ensure that the page queues is returning the cached counts at the moment, otherwise we might |
| // race. |
| DEBUG_ASSERT(use_cached_queue_counts_.load(ktl::memory_order_relaxed)); |
| auto queue_ref = page->object.get_page_queue_ref(); |
| uint8_t old_gen = queue_ref.load(ktl::memory_order_relaxed); |
| // Between loading the mru_gen and finally storing it in the queue_ref it's possible for our |
| // calculated target_queue to become invalid. This is extremely unlikely as it would require |
| // us to stall for long enough for the lru_gen to pass this point, but if it does happen then |
| // ProcessLruQueues will notice our queue is invalid and correct our age to be that of lru_gen. |
| const uint32_t target_queue = mru_gen_to_queue(); |
| do { |
| // If we ever find old_gen to not be in the active/inactive range then this means the page has |
| // either been racily removed from, or was never in, the pager backed queue. In which case we |
| // can return as there's nothing to be marked accessed. |
| if (!queue_is_pager_backed(static_cast<PageQueue>(old_gen))) { |
| return; |
| } |
| } while (!queue_ref.compare_exchange_weak(old_gen, static_cast<uint8_t>(target_queue), |
| ktl::memory_order_relaxed)); |
| page_queue_counts_[old_gen].fetch_sub(1, ktl::memory_order_relaxed); |
| page_queue_counts_[target_queue].fetch_add(1, ktl::memory_order_relaxed); |
| } |
| |
| // Place page in the wired queue. Must not already be in a page queue. |
| void SetWired(vm_page_t* page); |
| // Moves page from whichever queue it is currently in, to the wired queue. |
| void MoveToWired(vm_page_t* page); |
| // Moves page from whichever queue it is currently in, to the wired queue, and also sets the |
| // backlink information. |
| void MoveToWired(vm_page_t* page, VmCowPages* object, uint64_t page_offset); |
| // Place page in the unswappable queue. Must not already be in a page queue. |
| void SetUnswappable(vm_page_t* page); |
| // Moves page from whichever queue it is currently in, to the unswappable queue. |
| void MoveToUnswappable(vm_page_t* page); |
| // Place page in the pager backed queue. Must not already be in a page queue. Sets the back |
| // reference information. If the page is removed from the referenced object (especially if it's |
| // due to the object being destroyed) then this back reference *must* be updated, either by |
| // calling Remove or calling MoveToPagerBacked with the new object information. |
| void SetPagerBacked(vm_page_t* page, VmCowPages* object, uint64_t page_offset); |
| // Moves page from whichever queue it is currently in, to the pager backed queue. Same rules on |
| // keeping the back reference up to date as given in SetPagerBacked apply. |
| void MoveToPagerBacked(vm_page_t* page, VmCowPages* object, uint64_t page_offset); |
| // Moves page from whichever queue it is currently in, to the DontNeed pager backed queue. The |
| // object back reference information must have already been set by a previous call to |
| // SetPagerBacked or MoveToPagerBacked. Same rules on keeping the back reference up to date as |
| // given in SetPagerBacked apply. |
| void MoveToPagerBackedDontNeed(vm_page_t* page); |
| // Place page in the Dirty pager backed queue. Must not already be in a page queue. Sets the back |
| // reference information. Same rules on keeping the back reference up to date as given in |
| // SetPagerBacked apply. |
| void SetPagerBackedDirty(vm_page_t* page, VmCowPages* object, uint64_t page_offset); |
| // Moves page from whichever queue it is currently in, to the Dirty pager backed queue. The |
| // object back reference information must have already been set by a previous call to |
| // SetPagerBacked or MoveToPagerBacked. Same rules on keeping the back reference up to date as |
| // given in SetPagerBacked apply. |
| void MoveToPagerBackedDirty(vm_page_t* page, VmCowPages* object, uint64_t page_offset); |
| // Place page in the unswappable zero forked queue. Must not already be in a page queue. Same |
| // rules for back pointers apply as for SetPagerBacked. |
| void SetUnswappableZeroFork(vm_page_t* page, VmCowPages* object, uint64_t page_offset); |
| // Moves page from whichever queue it is currently in, to the unswappable zero forked queue. Same |
| // rules for back pointers apply as for SetPagerBacked. |
| void MoveToUnswappableZeroFork(vm_page_t* page, VmCowPages* object, uint64_t page_offset); |
| |
| // Removes the page from any page list and returns ownership of the queue_node. |
| void Remove(vm_page_t* page); |
| // Batched version of Remove that also places all the pages in the specified list |
| void RemoveArrayIntoList(vm_page_t** page, size_t count, list_node_t* out_list); |
| |
| // Variation on MoveToUnswappable that allows for already holding the lock. |
| void MoveToUnswappableLocked(vm_page_t* page) TA_REQ(lock_); |
| |
| // Tells the page queue this page has been accessed, and it should have its position in the queues |
| // updated. This method will take the internal page queues lock and should not be used for |
| // accessed harvesting, where MarkAccessedDeferredCount should be used instead. |
| void MarkAccessed(vm_page_t* page); |
| |
| // Provides access to the underlying lock, allowing _Locked variants to be called. Use of this is |
| // highly discouraged as the underlying lock is a CriticalMutex which disables preemption. |
| // Preferably *Array variations should be used, but this provides a higher performance mechanism |
| // when needed. |
| Lock<CriticalMutex>* get_lock() TA_RET_CAP(lock_) { return &lock_; } |
| |
| // Used to identify the reason that aging is triggered, mostly for debugging and informational |
| // purposes. |
| enum class AgeReason { |
| // There is no current age reason. |
| None, |
| // Aging occurred due to the maximum timeout being reached before any other reason could trigger |
| Timeout, |
| // The allowable ratio of active versus inactive pages was exceeded. |
| ActiveRatio, |
| // An explicit call to RotatePagerBackedQueues caused aging. This would typically occur due to |
| // test code or via the kernel debug console. |
| Manual, |
| }; |
| static const char* string_from_age_reason(PageQueues::AgeReason reason); |
| |
| // Rotates the pager backed queues to perform aging. Every existing queue is now considered to be |
| // one epoch older. To achieve these two things are done: |
| // 1. A new queue, representing the current epoch, needs to be allocated to put pages that get |
| // accessed from here into. This just involves incrementing the MRU generation. |
| // 2. As there is a limited number of page queues 'allocating' one might involve cleaning up an |
| // old queue. See the description of ProcessDontNeedAndLruQueues for how this process works. |
| void RotatePagerBackedQueues(AgeReason reason = AgeReason::Manual); |
| |
| // Used to represent and return page backlink information acquired whilst holding the page queue |
| // lock. The contained vmo could be null if the refptr could not be upgraded, indicating that the |
| // vmo was being destroyed whilst trying to construct the backlink. |
| // The page and offset contained here are not synchronized and must be separately validated before |
| // use. This can be done by acquiring the returned vmo's lock and then validating that the page is |
| // still contained at the offset. |
| struct VmoBacklink { |
| fbl::RefPtr<VmCowPages> cow; |
| vm_page_t* page = nullptr; |
| uint64_t offset = 0; |
| }; |
| |
| // Moves a page from from the unswappable zero fork queue into the unswappable queue and returns |
| // the backlink information. If the zero fork queue is empty then a nullopt is returned, otherwise |
| // if it has_value the vmo field may be null to indicate that the vmo is running its destructor |
| // (see VmoBacklink for more details). |
| ktl::optional<VmoBacklink> PopUnswappableZeroFork(); |
| |
| // Looks at the pager_backed queues and returns backlink information of the first page found. The |
| // queues themselves are walked from the current LRU queue up to the queue that is at most |
| // |lowest_queue| epochs from the most recent. |lowest_queue| therefore represents the youngest |
| // age that would be accepted. If no page was found a nullopt is returned, otherwise if |
| // it has_value the vmo field may be null to indicate that the vmo is running its destructor (see |
| // VmoBacklink for more details). If a page is returned its location in the pager_backed queue is |
| // not modified. |
| ktl::optional<VmoBacklink> PeekPagerBacked(size_t lowest_queue); |
| |
| // Not all methods are safe to call via a referenced VmoContainerBacklink since VmCowPages |
| // refcount may already be 0, but RemovePageForEviction() is. For loaned page reclaim we don't |
| // have the option of just recognizing that the VmCowPages is deleting soon and moving on - we |
| // must get the page. |
| struct VmoContainerBacklink { |
| fbl::RefPtr<VmCowPagesContainer> cow_container; |
| vm_page_t* page = nullptr; |
| uint64_t offset = 0; |
| }; |
| |
| // Called while the loaning VmCowPages is known referenced, so the loaning VmCowPages won't be |
| // running its destructor. The owning_cow parameter can be nullptr, if the caller doesn't care |
| // to exclude the owning cow from being returned, or if there isn't an owning cow. We use a |
| // VmoContainerBacklink instead of VmoBacklink so that it remains possible to get a backlink |
| // until _after_ all the pages have been removed from the VmCowPages and have become FREE. Not |
| // all methods are safe to call via a referenced VmoContainerBacklink, but RemovePageForEviction() |
| // is. |
| ktl::optional<VmoContainerBacklink> GetCowWithReplaceablePage(vm_page_t* page, |
| VmCowPages* owning_cow); |
| |
| // Helper struct to group pager-backed queue length counts returned by GetPagerQueueCounts. |
| struct PagerCounts { |
| size_t total = 0; |
| size_t newest = 0; |
| size_t oldest = 0; |
| }; |
| |
| // Returns just the pager-backed queue counts. Called from the zx_object_get_info() syscall. |
| PagerCounts GetPagerQueueCounts() const; |
| |
| // Helper struct to group queue length counts returned by QueueCounts. |
| struct Counts { |
| ktl::array<size_t, kNumPagerBacked> pager_backed = {0}; |
| size_t pager_backed_dont_need = 0; |
| size_t unswappable = 0; |
| size_t wired = 0; |
| size_t unswappable_zero_fork = 0; |
| |
| bool operator==(const Counts& other) const { |
| return pager_backed == other.pager_backed && |
| pager_backed_dont_need == other.pager_backed_dont_need && |
| unswappable == other.unswappable && wired == other.wired && |
| unswappable_zero_fork == other.unswappable_zero_fork; |
| } |
| bool operator!=(const Counts& other) const { return !(*this == other); } |
| }; |
| |
| Counts QueueCounts() const; |
| |
| struct ActiveInactiveCounts { |
| // Whether the returned counts were cached values, or the current 'true' values. Cached values |
| // are returned if an accessed scan is ongoing, as the true values cannot be determined in a |
| // race free way. |
| bool cached = false; |
| // Pages that would normally be available for eviction, but are presently considered active and |
| // so will not be evicted. |
| size_t active = 0; |
| // Pages that are available for eviction due to not presently being considered active. |
| size_t inactive = 0; |
| |
| bool operator==(const ActiveInactiveCounts& other) const { |
| return cached == other.cached && active == other.active && inactive == other.inactive; |
| } |
| bool operator!=(const ActiveInactiveCounts& other) const { return !(*this == other); } |
| }; |
| |
| // Retrieves the current active/inactive counts, or a cache of the last known good ones if |
| // accessed harvesting is happening. This method is guaranteed to return in a small window of time |
| // due to only needing to acquire a single lock that has very short critical sections. However, |
| // this means it may have to return old values if accessed scanning is happening. If blocking and |
| // waiting is acceptable then |scanner_synchronized_active_inactive_counts| should be used, which |
| // calls this when it knows accessed scanning is not happening, guaranteeing a live value. |
| ActiveInactiveCounts GetActiveInactiveCounts() const TA_EXCL(lock_); |
| |
| void Dump() TA_EXCL(lock_); |
| |
| // These query functions are marked Debug as it is generally a racy way to determine a pages state |
| // and these are exposed for the purpose of writing tests or asserts against the pagequeue. |
| |
| // This takes an optional output parameter that, if the function returns true, will contain the |
| // index of the queue that the page was in. |
| bool DebugPageIsPagerBacked(const vm_page_t* page, size_t* queue = nullptr) const; |
| bool DebugPageIsPagerBackedDontNeed(const vm_page_t* page) const; |
| bool DebugPageIsPagerBackedDirty(const vm_page_t* page) const; |
| bool DebugPageIsUnswappable(const vm_page_t* page) const; |
| bool DebugPageIsUnswappableZeroFork(const vm_page_t* page) const; |
| bool DebugPageIsAnyUnswappable(const vm_page_t* page) const; |
| bool DebugPageIsWired(const vm_page_t* page) const; |
| |
| // These methods are public so that the scanner can call. Once the scanner is an object that can |
| // be friended, and not a collection of anonymous functions, these can be made private. |
| |
| // Creates any threads for queue management. This needs to be done separately to construction as |
| // there is a recursive dependency where creating threads will need to manipulate pages, which |
| // will call back into the page queues. |
| // Delaying thread creation is fine as these threads are purely for aging and eviction management, |
| // which is not needed during early kernel boot. |
| // Failure to start the threads may cause operations such as RotatePagerBackedQueues to block |
| // indefinitely as they might attempt to offload work to a nonexistent thread. This issue is only |
| // relevant for unittests that may wish to avoid starting the threads for some tests. |
| // It is the responsibility of the caller to only call this once, otherwise it will panic. |
| void StartThreads(zx_duration_t min_mru_rotate_time, zx_duration_t max_mru_rotate_time); |
| |
| // Sets the active ratio multiplier. |
| void SetActiveRatioMultiplier(uint32_t multiplier); |
| |
| // Controls to enable and disable the active aging system. These must be called alternately and |
| // not in parallel. That is, it is an error to call DisableAging twice without calling EnableAging |
| // in between. Similar for EnableAging. |
| void DisableAging(); |
| void EnableAging(); |
| |
| // Called by the scanner to indicate the beginning of an accessed scan. This allows |
| // MarkAccessedDeferredCount, and will cause the active/inactive counts returned by |
| // GetActiveInactiveCounts to remain unchanged until the accessed scan is complete. |
| void BeginAccessScan(); |
| void EndAccessScan(); |
| |
| private: |
| // Specifies the indices for both the page_queues_ and the page_queue_counts_ |
| enum PageQueue : uint8_t { |
| PageQueueNone = 0, |
| PageQueueUnswappable, |
| PageQueueWired, |
| PageQueueUnswappableZeroFork, |
| PageQueuePagerBackedDirty, |
| PageQueuePagerBackedDontNeed, |
| PageQueuePagerBackedBase, |
| PageQueuePagerBackedLast = PageQueuePagerBackedBase + kNumPagerBacked - 1, |
| PageQueueNumQueues, |
| }; |
| |
| // Ensure that the pager-backed queue counts are always at the end. |
| static_assert(PageQueuePagerBackedLast + 1 == PageQueueNumQueues); |
| |
| // The page queue index, unlike the full generation count, needs to be able to fit inside a |
| // uint8_t in the vm_page_t. |
| static_assert(PageQueueNumQueues < 256); |
| |
| // Converts free running generation to pager backed queue. |
| static constexpr PageQueue gen_to_queue(uint64_t gen) { |
| return static_cast<PageQueue>((gen % kNumPagerBacked) + PageQueuePagerBackedBase); |
| } |
| |
| // Checks if a candidate pager backed page queue would be valid given a specific lru and mru |
| // queue. |
| static constexpr bool queue_is_valid(PageQueue page_queue, PageQueue lru, PageQueue mru) { |
| DEBUG_ASSERT(page_queue >= PageQueuePagerBackedBase); |
| if (lru <= mru) { |
| return page_queue >= lru && page_queue <= mru; |
| } else { |
| return page_queue <= mru || page_queue >= lru; |
| } |
| } |
| |
| // Returns whether this queue is pager backed, and hence can be active or inactive. If this |
| // returns false then it is guaranteed that both |queue_is_active| and |queue_is_inactive| would |
| // return false. |
| static constexpr bool queue_is_pager_backed(PageQueue page_queue) { |
| // We check against the the DontNeed queue and not the base queue so that accessing a page can |
| // move it from the DontNeed list into the LRU queues. To keep this case efficient we require |
| // that the DontNeed queue be directly before the LRU queues. |
| static_assert(PageQueuePagerBackedDontNeed + 1 == PageQueuePagerBackedBase); |
| |
| // Ensure that the Dirty queue comes before the smallest queue that would return true for this |
| // function. This function is used for computing active/inactive sets for the purpose of |
| // eviction, and dirty pages cannot be evicted. The Dirty queue also needs to come before the |
| // DontNeed queue so that MarkAccessed does not try to move the page to the MRU queue on |
| // access. All pager-backed queues except the Dirty queue contain evictable pages. |
| static_assert(PageQueuePagerBackedDirty < PageQueuePagerBackedDontNeed); |
| |
| return page_queue >= PageQueuePagerBackedDontNeed; |
| } |
| |
| // Calculates the age of a queue against a given mru, with 0 meaning page_queue==mru |
| // This is only meaningful to call on pager backed queues. |
| static constexpr uint queue_age(PageQueue page_queue, PageQueue mru) { |
| DEBUG_ASSERT(page_queue >= PageQueuePagerBackedBase); |
| if (page_queue <= mru) { |
| return mru - page_queue; |
| } else { |
| return (static_cast<uint>(kNumPagerBacked) - page_queue) + mru; |
| } |
| } |
| |
| // Returns whether the given page queue would be considered active against a given mru. |
| // This is valid to call on any page queue, not just pager backed ones, and as such this returning |
| // false does not imply the queue is inactive. |
| static constexpr bool queue_is_active(PageQueue page_queue, PageQueue mru) { |
| if (page_queue < PageQueuePagerBackedBase) { |
| return false; |
| } |
| return queue_age(page_queue, mru) < kNumActiveQueues; |
| } |
| |
| // Returns whether the given page queue would be considered inactive against a given mru. |
| // This is valid to call on any page queue, not just pager backed ones, and as such this returning |
| // false does not imply the queue is active. |
| static constexpr bool queue_is_inactive(PageQueue page_queue, PageQueue mru) { |
| // The DontNeed queue does not have an age, and so we cannot call queue_age on it, but it should |
| // definitely be considered part of the inactive set. |
| if (page_queue == PageQueuePagerBackedDontNeed) { |
| return true; |
| } |
| if (page_queue < PageQueuePagerBackedBase) { |
| return false; |
| } |
| return queue_age(page_queue, mru) >= kNumActiveQueues; |
| } |
| |
| PageQueue mru_gen_to_queue() const { |
| return gen_to_queue(mru_gen_.load(ktl::memory_order_relaxed)); |
| } |
| |
| PageQueue lru_gen_to_queue() const { |
| return gen_to_queue(lru_gen_.load(ktl::memory_order_relaxed)); |
| } |
| |
| // This processes the DontNeed queue and the LRU queue. |
| // For the DontNeed queue the pages are first placed in a temporary list (if |peek| is false), and |
| // then processed to ensure termination (see comment on dont_need_processing_list_ for full |
| // details). If |peek| is true we will either return a page from the DontNeed queue, or it will be |
| // empty, and so no temporary list is needed. For the LRU queue, the aim is to make the lru_gen_ |
| // be the passed in target_gen. It achieves this by walking all the pages in the queue and either |
| // 1. For pages that have a newest accessed time and are in the wrong queue, are moved into the |
| // correct queue. |
| // 2. For pages that are in the correct queue, they are either returned (if |peek| is true), or |
| // moved to another queue - pages in the DontNeed queue are moved between |
| // dont_need_processing_list_ and regular list, although this is in some ways the some queue |
| // as the pages do not actually have their queue updated, and pages in the LRU queue have |
| // their age effectively decreased by being moved to the next queue. |
| // In the second case for LRU, pages get moved into the next queue so that the LRU queue can |
| // become empty, allowing the gen to be incremented to eventually reach the |target_gen|. The |
| // mechanism of freeing up the LRU queue is necessary to make room for new MRU queues. When |peek| |
| // is false, this always returns a nullopt and guarantees that it moved lru_gen_ to at least |
| // target_gen. If |peek| is true, then the first time it hits a page in case (2), it returns it |
| // instead of decreasing its age. |
| ktl::optional<PageQueues::VmoBacklink> ProcessDontNeedAndLruQueues(uint64_t target_gen, |
| bool peek); |
| |
| enum ProcessingQueue { |
| DontNeed, |
| Lru, |
| }; |
| // Helper used by ProcessDontNeedAndLruQueues. |processing_queue| indicates whether the LRU queue |
| // should be processed or the DontNeed queue. |target_gen| controls whether the function needs to |
| // return early in the face of multiple concurrent calls, each of which acquire and drop the |
| // lock_. For the LRU queue, |target_gen| is the minimum value lru_gen_ should advance to. For |
| // the DontNeed queue |target_gen| is ignored. If |peek| is true, the first page that is |
| // encountered in the respective queue, whose age does not require to be fixed up, is returned. |
| ktl::optional<PageQueues::VmoBacklink> ProcessQueueHelper(ProcessingQueue processing_queue, |
| uint64_t target_gen, bool peek); |
| |
| // Helpers for adding and removing to the queues. All of the public Set/Move/Remove operations |
| // are convenience wrappers around these. |
| void RemoveLocked(vm_page_t* page) TA_REQ(lock_); |
| void SetQueueLocked(vm_page_t* page, PageQueue queue) TA_REQ(lock_); |
| void MoveToQueueLocked(vm_page_t* page, PageQueue queue) TA_REQ(lock_); |
| void SetQueueBacklinkLocked(vm_page_t* page, void* object, uintptr_t page_offset, PageQueue queue) |
| TA_REQ(lock_); |
| void MoveToQueueBacklinkLocked(vm_page_t* page, void* object, uintptr_t page_offset, |
| PageQueue queue) TA_REQ(lock_); |
| |
| // Updates the active/inactive counts assuming a single page has moved from |old_queue| to |
| // |new_queue|. Either of these can be PageQueueNone to simulate pages being added or removed. |
| void UpdateActiveInactiveLocked(PageQueue old_queue, PageQueue new_queue) TA_REQ(lock_); |
| |
| // Recalculates |active_queue_count_| and |inactive_queue_count_|. This is pulled into a helper |
| // method as this needs to be done both when accessed scanning completes, or if the mru_gen_ is |
| // changed. |
| void RecalculateActiveInactiveLocked() TA_REQ(lock_); |
| |
| // Internal locked version of GetActiveInactiveCounts. |
| ActiveInactiveCounts GetActiveInactiveCountsLocked() const TA_REQ(lock_); |
| |
| // Internal helper for shutting down any threads created in |StartThreads|. |
| void StopThreads(); |
| |
| // Entry point for the thread that will performing aging and increment the mru generation. |
| void MruThread(); |
| void MaybeTriggerAging() TA_EXCL(lock_); |
| void MaybeTriggerAgingLocked() TA_REQ(lock_); |
| AgeReason GetAgeReason() const TA_EXCL(lock_); |
| AgeReason GetAgeReasonLocked() const TA_REQ(lock_); |
| |
| void LruThread(); |
| void MaybeTriggerLruProcessing(); |
| bool NeedsLruProcessing() const; |
| |
| // The lock_ is needed to protect the linked lists queues as these cannot be implemented with |
| // atomics. |
| DECLARE_CRITICAL_MUTEX(PageQueues) mutable lock_; |
| |
| // This Event is a binary semaphore and is used to control aging. Is acquired by the aging thread |
| // when it performs aging, and can be acquired separately to block aging. For this purpose it |
| // needs to start as being initially signalled. |
| AutounsignalEvent aging_token_ = AutounsignalEvent(true); |
| // Flag used to catch programming errors related to double enabling or disabling aging. |
| ktl::atomic<bool> aging_disabled_ = false; |
| |
| // Time at which the mru_gen_ was last incremented. |
| ktl::atomic<zx_time_t> last_age_time_ = ZX_TIME_INFINITE_PAST; |
| // Reason the last aging event happened, this is purely for informational/debugging purposes. |
| AgeReason last_age_reason_ TA_GUARDED(lock_) = AgeReason::None; |
| // Used to signal the aging thread that it should wake up and see if it needs to do anything. |
| AutounsignalEvent aging_event_; |
| // Used to signal the lru thread that it should wake up and check if the lru queue needs |
| // processing. |
| AutounsignalEvent lru_event_; |
| |
| // The page queues are placed into an array, indexed by page queue, for consistency and uniformity |
| // of access. This does mean that the list for PageQueueNone does not actually have any pages in |
| // it, and should always be empty. |
| // The pager backed queues are the more complicated as, unlike the other categories, pages can be |
| // in one of the queues, and can move around. The pager backed queues themselves store pages that |
| // are roughly grouped by their last access time. The relationship is not precise as pages are not |
| // moved between queues unless it becomes strictly necessary. This is in contrast to the queue |
| // counts that are always up to date. |
| // |
| // What this means is that the vm_page::page_queue index is always up to do date, and the |
| // page_queue_counts_ represent an accurate count of pages with that vm_page::page_queue index, |
| // but counting the pages actually in the linked list may not yield the correct number. |
| // |
| // New pager backed pages are always placed into the queue associated with the MRU generation. If |
| // they get accessed the vm_page_t::page_queue gets updated along with the counts. At some point |
| // the LRU queue will get processed (see |ProcessDontNeedAndLruQueues|) and this will cause pages |
| // to get relocated to their correct list. |
| // |
| // Consider the following example: |
| // |
| // LRU MRU LRU MRU LRU MRU LRU MRU MRU LRU |
| // | | | | | | | | | | |
| // | | Insert A | | Age | | Touch A | | Age | | |
| // V v Queue=2 v v Queue=2 v v Queue=3 v v Queue=3 v v |
| // [][ ][ ][] -------> [][ ][a][] -------> [][ ][a][ ] -------> [][ ][a][ ] -------> [ ][ ][a][] |
| // |
| // At this point page A, in its vm_page_t, has its queue marked as 3, and the page_queue_counts |
| // are {0,0,1,0}, but the page itself remains in the linked list for queue 2. If the LRU queue is |
| // then processed to increment it we would do. |
| // |
| // MRU LRU MRU LRU MRU LRU |
| // | | | | | | |
| // | | Move LRU | | Move LRU | | |
| // V v Queue=3 v v Queue=3 v v |
| // [ ][ ][a][] -------> [ ][][a][] -------> [][ ][][a] |
| // |
| // In the second processing of the LRU queue it gets noticed that the page, based on |
| // vm_page_t::page_queue, is in the wrong queue and gets moved into the correct one. |
| // |
| // For specifics on how LRU and MRU generations map to LRU and MRU queues, see comments on |
| // |lru_gen_| and |mru_gen_|. |
| ktl::array<list_node_t, PageQueueNumQueues> page_queues_ TA_GUARDED(lock_); |
| |
| // The generation counts are monotonic increasing counters and used to represent the effective age |
| // of the oldest and newest pager backed queues. The page queues themselves are treated as a fixed |
| // size circular buffer that the generations map onto (see definition of |gen_to_queue|).This |
| // means all pages in the system have an age somewhere in [lru_gen_, mru_gen_] and so the lru and |
| // mru generations cannot drift apart by more than kNumPagerBacked, otherwise there would not be |
| // enough queues. |
| // A pages age being between [lru_gen_, mru_gen_] is not an invariant as MarkAccessed can race and |
| // mark pages as being in an invalid queue. This race will get noticed by ProcessLruQueues and |
| // the page will get updated at that point to have a valid queue. Importantly, whilst pages can |
| // think they are in a queue that is invalid, only valid linked lists in the page_queues_ will |
| // ever have pages in them. This invariant is easy to enforce as the page_queues_ are updated |
| // under a lock. |
| ktl::atomic<uint64_t> lru_gen_ = 0; |
| ktl::atomic<uint64_t> mru_gen_ = kNumPagerBacked - 1; |
| |
| // This semaphore counts the amount of space remaining for the mru to grow before it would overlap |
| // with the lru. Having this as a semaphore (even though it can always be calculated from lru_gen_ |
| // and mru_gen_ above) provides a way for the aging thread to block when it needs to wait for |
| // eviction/lru processing to happen. This allows eviction/lru processing to be happening |
| // concurrently in a different thread, without requiring it to happen in-line in the aging thread. |
| // Without this the aging thread would need to process the LRU queue directly if it needed to make |
| // space. Initially, with the lru_gen_ and mru_gen_ definitions above, we start with no space for |
| // the mru to grow, so initialize this to 0. |
| Semaphore mru_semaphore_ = Semaphore(0); |
| |
| // Tracks the counts of pages in each queue in O(1) time complexity. As pages are moved between |
| // queues, the corresponding source and destination counts are decremented and incremented, |
| // respectively. |
| // |
| // The first entry of the array is left special: it logically represents pages not in any queue. |
| // For simplicity, it is initialized to zero rather than the total number of pages in the system. |
| // Consequently, the value of this entry is a negative number with absolute value equal to the |
| // total number of pages in all queues. This approach avoids unnecessary branches when updating |
| // counts. |
| ktl::array<ktl::atomic<size_t>, PageQueueNumQueues> page_queue_counts_ = {}; |
| |
| // These are the continuously updated active/inactive queue counts. Continuous here means updated |
| // by all page queue methods except for MarkAccessedDeferredCount. Due to races whilst accessed |
| // harvesting is happening, these could be inaccurate or even become negative, and should not be |
| // read from whilst used_cached_queue_counts_ is true, and need to be completely recalculated |
| // prior to setting |used_cached_queue_counts_| back to false. |
| int64_t active_queue_count_ TA_GUARDED(lock_) = 0; |
| int64_t inactive_queue_count_ TA_GUARDED(lock_) = 0; |
| // When accessed harvesting is happening these hold the last known 'good' values of the |
| // active/inactive queue counts. |
| uint64_t cached_active_queue_count_ TA_GUARDED(lock_) = 0; |
| uint64_t cached_inactive_queue_count_ TA_GUARDED(lock_) = 0; |
| // Indicates whether the cached counts should be returned in queries or not. This also indicates |
| // whether the page queues expect accessed harvesting to be happening. This is only an atomic |
| // so that MarkAccessedDeferredCount can reference it in a DEBUG_ASSERT without triggering |
| // memory safety issues. |
| ktl::atomic<bool> use_cached_queue_counts_ = false; |
| |
| // Track the mru and lru threads and have a signalling mechanism to shut them down. |
| ktl::atomic<bool> shutdown_threads_ = false; |
| Thread* mru_thread_ TA_GUARDED(lock_) = nullptr; |
| Thread* lru_thread_ TA_GUARDED(lock_) = nullptr; |
| |
| // Queue rotation parameters. These are not locked as they are only read by the mru thread, and |
| // are set before the mru thread is started. |
| zx_duration_t min_mru_rotate_time_; |
| zx_duration_t max_mru_rotate_time_; |
| |
| // Current active ratio multiplier. |
| uint64_t active_ratio_multiplier_ TA_GUARDED(lock_); |
| |
| // This list is a temporary list used when updating the ages of pages in the DontNeed queue. |
| // Logically it can be considered the same queue as the PageQueuePagerBackedDontNeed, just a |
| // different physical list, and hence whether a page is here or in the regular |
| // page_queue_[PageQueuePagerBackedDontNeed] the page_queue_count_[PageQueuePagerBackedDontNeed] |
| // is the same. |
| // The way this list is used is when processing the LRU queue, all the pages in the regular |
| // DontNeed queue are moved here, and then this list is processed till its empty by moving them |
| // back to the DontNeed list. By doing this it can be guaranteed that original page is checked, |
| // without new DontNeed pages being able to cause the process to not terminate. |
| // Due to the way list_node_t's work, whether a page is in this specific list, or the page_queues_ |
| // list, they can still be removed with list_delete, which is why it's safe to move the pages |
| // here. |
| // Note that we cannot guard this list by the dont_need_processing_lock_ since peek actions, that |
| // do not take that lock, also need to be able to remove items from this list. |
| list_node_t dont_need_processing_list_ TA_GUARDED(lock_) = LIST_INITIAL_CLEARED_VALUE; |
| |
| // The way processing works by moving all the DontNeed pages to the dont_need_process_list_ and |
| // then iterating over them means that it only make sense for one of these operations to be being |
| // done at a time. Since only one LRU rotation will realistically happen at a time, this lock in |
| // practice should never actually block anything. |
| DECLARE_MUTEX(PageQueues) dont_need_processing_lock_; |
| }; |
| |
| #endif // ZIRCON_KERNEL_VM_INCLUDE_VM_PAGE_QUEUES_H_ |