| // Copyright 2021 The Fuchsia Authors |
| // |
| // Use of this source code is governed by a MIT-style |
| // license that can be found in the LICENSE file or at |
| // https://opensource.org/licenses/MIT |
| |
| #include <lib/boot-options/boot-options.h> |
| #include <lib/counters.h> |
| #include <lib/fit/defer.h> |
| #include <lib/zircon-internal/macros.h> |
| |
| #include <cassert> |
| #include <cstdint> |
| |
| #include <kernel/lockdep.h> |
| #include <ktl/algorithm.h> |
| #include <vm/evictor.h> |
| #include <vm/pmm.h> |
| #include <vm/scanner.h> |
| #include <vm/stack_owned_loaned_pages_interval.h> |
| #include <vm/vm_cow_pages.h> |
| |
| #include "pmm_node.h" |
| |
| #include <ktl/enforce.h> |
| |
| namespace { |
| |
| KCOUNTER(pager_backed_pages_evicted, "vm.reclamation.pages_evicted_pager_backed") |
| KCOUNTER(discardable_pages_evicted, "vm.reclamation.pages_evicted_discardable") |
| |
| inline void CheckedIncrement(uint64_t* a, uint64_t b) { |
| uint64_t result; |
| bool overflow = add_overflow(*a, b, &result); |
| DEBUG_ASSERT(!overflow); |
| *a = result; |
| } |
| |
| } // namespace |
| |
| Evictor::Evictor(PmmNode* node) : pmm_node_(node), page_queues_(node->GetPageQueues()) {} |
| |
| Evictor::Evictor(PmmNode* node, PageQueues* queues) : pmm_node_(node), page_queues_(queues) {} |
| |
| Evictor::~Evictor() { DisableEviction(); } |
| |
| bool Evictor::IsEvictionEnabled() const { |
| Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG}; |
| return eviction_enabled_; |
| } |
| |
| void Evictor::EnableEviction() { |
| { |
| Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG}; |
| // It's an error to call this whilst the eviction thread is still exiting. |
| ASSERT(!eviction_thread_exiting_); |
| eviction_enabled_ = true; |
| |
| if (eviction_thread_) { |
| return; |
| } |
| } |
| |
| // Set up the eviction thread to process asynchronous one-shot and continuous eviction requests. |
| auto eviction_thread = [](void* arg) -> int { |
| Evictor* evictor = reinterpret_cast<Evictor*>(arg); |
| return evictor->EvictionThreadLoop(); |
| }; |
| eviction_thread_ = Thread::Create("eviction-thread", eviction_thread, this, LOW_PRIORITY); |
| DEBUG_ASSERT(eviction_thread_); |
| eviction_thread_->Resume(); |
| } |
| |
| void Evictor::DisableEviction() { |
| Thread* eviction_thread = nullptr; |
| { |
| // Grab the lock and update any state. We cannot actually wait for the eviction thread to |
| // complete whilst the lock is held, however. |
| Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG}; |
| if (!eviction_thread_) { |
| return; |
| } |
| // It's an error to call this in parallel with another DisableEviction call. |
| ASSERT(!eviction_thread_exiting_); |
| eviction_thread = eviction_thread_; |
| eviction_thread_exiting_ = true; |
| eviction_signal_.Signal(); |
| } |
| // Now with the lock dropped wait for the thread to complete. Use a locally cached copy of the |
| // pointer so that even if the scanner performs a concurrent EnableEviction call we should not |
| // crash or have races, although the eviction thread may fail to join. |
| int res = 0; |
| eviction_thread->Join(&res, ZX_TIME_INFINITE); |
| DEBUG_ASSERT(res == 0); |
| { |
| Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG}; |
| // Now update the state to indicate that eviction is disabled. |
| eviction_thread_ = nullptr; |
| eviction_enabled_ = false; |
| eviction_thread_exiting_ = false; |
| } |
| } |
| |
| void Evictor::SetDiscardableEvictionsPercent(uint32_t discardable_percent) { |
| Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG}; |
| if (discardable_percent <= 100) { |
| discardable_evictions_percent_ = discardable_percent; |
| } |
| } |
| |
| void Evictor::DebugSetMinDiscardableAge(zx_time_t age) { |
| Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG}; |
| min_discardable_age_ = age; |
| } |
| |
| void Evictor::SetContinuousEvictionInterval(zx_time_t eviction_interval) { |
| Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG}; |
| default_eviction_interval_ = eviction_interval; |
| } |
| |
| Evictor::EvictionTarget Evictor::DebugGetOneShotEvictionTarget() const { |
| Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG}; |
| return one_shot_eviction_target_; |
| } |
| |
| void Evictor::SetOneShotEvictionTarget(EvictionTarget target) { |
| Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG}; |
| one_shot_eviction_target_ = target; |
| } |
| |
| void Evictor::CombineOneShotEvictionTarget(EvictionTarget target) { |
| Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG}; |
| one_shot_eviction_target_.pending = one_shot_eviction_target_.pending || target.pending; |
| one_shot_eviction_target_.level = ktl::max(one_shot_eviction_target_.level, target.level); |
| CheckedIncrement(&one_shot_eviction_target_.min_pages_to_free, target.min_pages_to_free); |
| one_shot_eviction_target_.free_pages_target = |
| ktl::max(one_shot_eviction_target_.free_pages_target, target.free_pages_target); |
| one_shot_eviction_target_.print_counts = |
| one_shot_eviction_target_.print_counts || target.print_counts; |
| } |
| |
| Evictor::EvictedPageCounts Evictor::EvictOneShotFromPreloadedTarget() { |
| EvictedPageCounts total_evicted_counts = {}; |
| |
| // Create a local copy of the eviction target to operate against. |
| EvictionTarget target; |
| { |
| Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG}; |
| target = one_shot_eviction_target_; |
| one_shot_eviction_target_ = {}; |
| } |
| if (!target.pending) { |
| return total_evicted_counts; |
| } |
| |
| uint64_t free_pages_before = pmm_node_->CountFreePages(); |
| |
| total_evicted_counts = |
| EvictUntilTargetsMet(target.min_pages_to_free, target.free_pages_target, target.level); |
| |
| if (target.print_counts) { |
| printf("[EVICT]: Free memory before eviction was %zuMB and after eviction is %zuMB\n", |
| free_pages_before * PAGE_SIZE / MB, pmm_node_->CountFreePages() * PAGE_SIZE / MB); |
| if (total_evicted_counts.pager_backed > 0) { |
| printf("[EVICT]: Evicted %lu user pager backed pages\n", total_evicted_counts.pager_backed); |
| } |
| if (total_evicted_counts.discardable > 0) { |
| printf("[EVICT]: Evicted %lu pages from discardable vmos\n", |
| total_evicted_counts.discardable); |
| } |
| } |
| |
| return total_evicted_counts; |
| } |
| |
| uint64_t Evictor::EvictOneShotSynchronous(uint64_t min_mem_to_free, EvictionLevel eviction_level, |
| Output output) { |
| if (!IsEvictionEnabled()) { |
| return 0; |
| } |
| SetOneShotEvictionTarget(EvictionTarget{ |
| .pending = true, |
| // No target free pages to get to. Evict based only on the min pages requested to evict. |
| .free_pages_target = 0, |
| // For synchronous eviction, set the eviction level and min target as requested. |
| .min_pages_to_free = min_mem_to_free / PAGE_SIZE, |
| .level = eviction_level, |
| .print_counts = (output == Output::Print), |
| }); |
| |
| auto evicted_counts = EvictOneShotFromPreloadedTarget(); |
| return evicted_counts.pager_backed + evicted_counts.discardable; |
| } |
| |
| void Evictor::EvictOneShotAsynchronous(uint64_t min_mem_to_free, uint64_t free_mem_target, |
| Evictor::EvictionLevel eviction_level, |
| Evictor::Output output) { |
| if (!IsEvictionEnabled()) { |
| return; |
| } |
| CombineOneShotEvictionTarget(Evictor::EvictionTarget{ |
| .pending = true, |
| .free_pages_target = free_mem_target / PAGE_SIZE, |
| .min_pages_to_free = min_mem_to_free / PAGE_SIZE, |
| .level = eviction_level, |
| .print_counts = (output == Output::Print), |
| }); |
| // Unblock the eviction thread. |
| eviction_signal_.Signal(); |
| } |
| |
| Evictor::EvictedPageCounts Evictor::EvictUntilTargetsMet(uint64_t min_pages_to_evict, |
| uint64_t free_pages_target, |
| EvictionLevel level) { |
| EvictedPageCounts total_evicted_counts = {}; |
| if (!IsEvictionEnabled()) { |
| return total_evicted_counts; |
| } |
| |
| // Wait until no eviction attempts are ongoing, so that we don't overshoot the free pages target. |
| no_ongoing_eviction_.Wait(Deadline::infinite()); |
| auto signal_cleanup = fit::defer([&]() { |
| // Unblock any waiting eviction requests. |
| no_ongoing_eviction_.Signal(); |
| }); |
| |
| uint64_t total_non_loaned_pages_freed = 0; |
| |
| DEBUG_ASSERT(pmm_node_); |
| |
| while (true) { |
| const uint64_t free_pages = pmm_node_->CountFreePages(); |
| uint64_t pages_to_free = 0; |
| if (total_non_loaned_pages_freed < min_pages_to_evict) { |
| pages_to_free = min_pages_to_evict - total_non_loaned_pages_freed; |
| } else if (free_pages < free_pages_target) { |
| pages_to_free = free_pages_target - free_pages; |
| } else { |
| // The targets have been met. No more eviction is required right now. |
| break; |
| } |
| |
| // Compute the desired number of discardable pages to free (vs pager-backed). |
| uint64_t pages_to_free_discardable = 0; |
| if (level == EvictionLevel::IncludeNewest) { |
| // If we're including newest pages too, first try to reclaim as much from discardable VMOs as |
| // possible. |
| pages_to_free_discardable = pages_to_free; |
| } else { |
| Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG}; |
| DEBUG_ASSERT(discardable_evictions_percent_ <= 100); |
| pages_to_free_discardable = pages_to_free * discardable_evictions_percent_ / 100; |
| } |
| |
| uint64_t pages_freed = EvictDiscardable(pages_to_free_discardable); |
| total_evicted_counts.discardable += pages_freed; |
| total_non_loaned_pages_freed += pages_freed; |
| |
| // If we've already met the current target, continue to the next iteration of the loop. |
| if (pages_freed >= pages_to_free) { |
| continue; |
| } |
| DEBUG_ASSERT(pages_to_free > pages_freed); |
| // Free pager backed memory to get to |pages_to_free|. |
| uint64_t pages_to_free_pager_backed = pages_to_free - pages_freed; |
| |
| EvictedPageCounts pages_freed_pager_backed = |
| EvictPagerBacked(pages_to_free_pager_backed, level); |
| total_evicted_counts.pager_backed += pages_freed_pager_backed.pager_backed; |
| total_evicted_counts.pager_backed_loaned += pages_freed_pager_backed.pager_backed_loaned; |
| total_non_loaned_pages_freed += pages_freed_pager_backed.pager_backed; |
| |
| pages_freed += pages_freed_pager_backed.pager_backed; |
| |
| // Should we fail to free any pages then we give up and consider the eviction request complete. |
| if (pages_freed == 0) { |
| break; |
| } |
| } |
| |
| return total_evicted_counts; |
| } |
| |
| uint64_t Evictor::EvictDiscardable(uint64_t target_pages) const { |
| if (!IsEvictionEnabled()) { |
| return 0; |
| } |
| |
| list_node_t freed_list; |
| list_initialize(&freed_list); |
| |
| // Reclaim |target_pages| from discardable vmos that have been reclaimable for at least |
| // |min_discardable_age_|. |
| zx_time_t min_age; |
| { |
| Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG}; |
| min_age = min_discardable_age_; |
| } |
| uint64_t count = VmCowPages::ReclaimPagesFromDiscardableVmos(target_pages, min_age, &freed_list); |
| |
| DEBUG_ASSERT(pmm_node_); |
| pmm_node_->FreeList(&freed_list); |
| |
| discardable_pages_evicted.Add(count); |
| return count; |
| } |
| |
| Evictor::EvictedPageCounts Evictor::EvictPagerBacked(uint64_t target_pages, |
| EvictionLevel eviction_level) const { |
| EvictedPageCounts counts = {}; |
| |
| if (!IsEvictionEnabled()) { |
| return counts; |
| } |
| |
| list_node_t freed_list; |
| list_initialize(&freed_list); |
| |
| // Avoid evicting from the newest queue to prevent thrashing. |
| const size_t lowest_evict_queue = |
| eviction_level == EvictionLevel::IncludeNewest |
| ? PageQueues::kNumActiveQueues |
| : PageQueues::kNumPagerBacked - PageQueues::kNumOldestQueues; |
| |
| // TODO(fxbug.dev/85056): Always follow the hint for now, i.e. protect hinted pages from eviction |
| // even in the face of OOM. |
| // |
| // Desired future behavior: |
| // If we're going to include newest pages, ignore eviction hints as well, i.e. also consider |
| // evicting pages with always_need set if we encounter them in LRU order. |
| const VmCowPages::EvictionHintAction hint_action = VmCowPages::EvictionHintAction::Follow; |
| |
| // We stack-own loaned pages from RemovePageForEviction() to FreeList() below. |
| __UNINITIALIZED StackOwnedLoanedPagesInterval raii_interval; |
| |
| DEBUG_ASSERT(page_queues_); |
| while (counts.pager_backed < target_pages) { |
| // TODO(rashaeqbal): The sequence of actions in PeekPagerBacked() and RemovePageForEviction() |
| // implicitly guarantee forward progress in this loop, so that we're not stuck trying to evict |
| // the same page (i.e. PeekPagerBacked keeps returning the same page). It would be nice to have |
| // some explicit checks here (or in PageQueues) to guarantee forward progress. Or we might want |
| // to use cursors to iterate the queues instead of peeking the tail each time. |
| if (ktl::optional<PageQueues::VmoBacklink> backlink = |
| page_queues_->PeekPagerBacked(lowest_evict_queue)) { |
| if (!backlink->cow) { |
| continue; |
| } |
| if (backlink->cow->RemovePageForEviction(backlink->page, backlink->offset, hint_action)) { |
| list_add_tail(&freed_list, &backlink->page->queue_node); |
| if (pmm_is_loaned(backlink->page)) { |
| counts.pager_backed_loaned++; |
| } else { |
| counts.pager_backed++; |
| } |
| } |
| } else { |
| break; |
| } |
| } |
| |
| DEBUG_ASSERT(pmm_node_); |
| pmm_node_->FreeList(&freed_list); |
| |
| pager_backed_pages_evicted.Add(counts.pager_backed + counts.pager_backed_loaned); |
| return counts; |
| } |
| |
| void Evictor::EnableContinuousEviction(uint64_t min_mem_to_free, uint64_t free_mem_target, |
| EvictionLevel eviction_level, Output output) { |
| { |
| Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG}; |
| // Combine min target with previously outstanding min target. |
| CheckedIncrement(&continuous_eviction_target_.min_pages_to_free, min_mem_to_free / PAGE_SIZE); |
| continuous_eviction_target_.free_pages_target = free_mem_target / PAGE_SIZE; |
| continuous_eviction_target_.level = eviction_level; |
| continuous_eviction_target_.print_counts = (output == Output::Print); |
| // .pending has no relevance here since eviction is controlled by the eviction interval. |
| |
| // Configure eviction to occur at intervals of |default_eviction_interval_|. |
| next_eviction_interval_ = default_eviction_interval_; |
| } |
| // Unblock the eviction thread. |
| eviction_signal_.Signal(); |
| } |
| |
| void Evictor::DisableContinuousEviction() { |
| Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG}; |
| continuous_eviction_target_ = {}; |
| // In the next iteration of the eviction thread loop, we will see this value and block |
| // indefinitely. |
| next_eviction_interval_ = ZX_TIME_INFINITE; |
| } |
| |
| int Evictor::EvictionThreadLoop() { |
| while (!eviction_thread_exiting_) { |
| // Block until |next_eviction_interval_| is elapsed. |
| zx_time_t wait_interval; |
| { |
| Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG}; |
| wait_interval = next_eviction_interval_; |
| } |
| eviction_signal_.Wait(Deadline::no_slack(zx_time_add_duration(current_time(), wait_interval))); |
| |
| if (eviction_thread_exiting_) { |
| break; |
| } |
| |
| // Process a one-shot target if there is one. This is a no-op and no pages are evicted if no |
| // one-shot target is pending. |
| auto evicted = EvictOneShotFromPreloadedTarget(); |
| |
| // In practice either one-shot eviction or continuous eviction will be enabled at a time. We can |
| // skip the rest of the loop if we evicted something here, and go back to wait for another |
| // request. If both one-shot and continuous modes are used together, at worst we will wait for |
| // |next_eviction_interval_| before evicting as required by the continuous mode, which should |
| // still be fine. |
| if (evicted.discardable + evicted.pager_backed > 0) { |
| continue; |
| } |
| |
| // Read control parameters into local variables under the lock. |
| EvictionTarget target; |
| { |
| Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG}; |
| target = continuous_eviction_target_; |
| } |
| |
| uint64_t free_pages_before = pmm_node_->CountFreePages(); |
| |
| evicted = |
| EvictUntilTargetsMet(target.min_pages_to_free, target.free_pages_target, target.level); |
| |
| if (target.print_counts) { |
| printf("[EVICT]: Free memory before eviction was %zuMB and after eviction is %zuMB\n", |
| free_pages_before * PAGE_SIZE / MB, pmm_node_->CountFreePages() * PAGE_SIZE / MB); |
| if (evicted.pager_backed > 0) { |
| printf("[EVICT]: Evicted %lu user pager backed pages\n", evicted.pager_backed); |
| } |
| if (evicted.discardable > 0) { |
| printf("[EVICT]: Evicted %lu pages from discardable vmos\n", evicted.discardable); |
| } |
| } |
| |
| uint64_t total_evicted = evicted.discardable + evicted.pager_backed; |
| // If no pages were evicted, we don't have anything to decrement from the min pages target. Skip |
| // the rest of the loop. |
| if (total_evicted == 0) { |
| continue; |
| } |
| |
| { |
| // Update min pages target based on the number of pages evicted. |
| Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG}; |
| if (total_evicted < continuous_eviction_target_.min_pages_to_free) { |
| continuous_eviction_target_.min_pages_to_free -= total_evicted; |
| } else { |
| continuous_eviction_target_.min_pages_to_free = 0; |
| } |
| } |
| } |
| return 0; |
| } |