blob: ba291e1391937b8b5755f1795f40f65007ed3d1b [file] [log] [blame]
// Copyright 2021 The Fuchsia Authors
//
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file or at
// https://opensource.org/licenses/MIT
#include <lib/boot-options/boot-options.h>
#include <lib/counters.h>
#include <lib/fit/defer.h>
#include <lib/zircon-internal/macros.h>
#include <cassert>
#include <cstdint>
#include <kernel/lockdep.h>
#include <ktl/algorithm.h>
#include <vm/evictor.h>
#include <vm/pmm.h>
#include <vm/scanner.h>
#include <vm/stack_owned_loaned_pages_interval.h>
#include <vm/vm_cow_pages.h>
#include "pmm_node.h"
#include <ktl/enforce.h>
namespace {
KCOUNTER(pager_backed_pages_evicted, "vm.reclamation.pages_evicted_pager_backed")
KCOUNTER(discardable_pages_evicted, "vm.reclamation.pages_evicted_discardable")
inline void CheckedIncrement(uint64_t* a, uint64_t b) {
uint64_t result;
bool overflow = add_overflow(*a, b, &result);
DEBUG_ASSERT(!overflow);
*a = result;
}
} // namespace
Evictor::Evictor(PmmNode* node) : pmm_node_(node), page_queues_(node->GetPageQueues()) {}
Evictor::Evictor(PmmNode* node, PageQueues* queues) : pmm_node_(node), page_queues_(queues) {}
Evictor::~Evictor() { DisableEviction(); }
bool Evictor::IsEvictionEnabled() const {
Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG};
return eviction_enabled_;
}
void Evictor::EnableEviction() {
{
Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG};
// It's an error to call this whilst the eviction thread is still exiting.
ASSERT(!eviction_thread_exiting_);
eviction_enabled_ = true;
if (eviction_thread_) {
return;
}
}
// Set up the eviction thread to process asynchronous one-shot and continuous eviction requests.
auto eviction_thread = [](void* arg) -> int {
Evictor* evictor = reinterpret_cast<Evictor*>(arg);
return evictor->EvictionThreadLoop();
};
eviction_thread_ = Thread::Create("eviction-thread", eviction_thread, this, LOW_PRIORITY);
DEBUG_ASSERT(eviction_thread_);
eviction_thread_->Resume();
}
void Evictor::DisableEviction() {
Thread* eviction_thread = nullptr;
{
// Grab the lock and update any state. We cannot actually wait for the eviction thread to
// complete whilst the lock is held, however.
Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG};
if (!eviction_thread_) {
return;
}
// It's an error to call this in parallel with another DisableEviction call.
ASSERT(!eviction_thread_exiting_);
eviction_thread = eviction_thread_;
eviction_thread_exiting_ = true;
eviction_signal_.Signal();
}
// Now with the lock dropped wait for the thread to complete. Use a locally cached copy of the
// pointer so that even if the scanner performs a concurrent EnableEviction call we should not
// crash or have races, although the eviction thread may fail to join.
int res = 0;
eviction_thread->Join(&res, ZX_TIME_INFINITE);
DEBUG_ASSERT(res == 0);
{
Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG};
// Now update the state to indicate that eviction is disabled.
eviction_thread_ = nullptr;
eviction_enabled_ = false;
eviction_thread_exiting_ = false;
}
}
void Evictor::SetDiscardableEvictionsPercent(uint32_t discardable_percent) {
Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG};
if (discardable_percent <= 100) {
discardable_evictions_percent_ = discardable_percent;
}
}
void Evictor::DebugSetMinDiscardableAge(zx_time_t age) {
Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG};
min_discardable_age_ = age;
}
void Evictor::SetContinuousEvictionInterval(zx_time_t eviction_interval) {
Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG};
default_eviction_interval_ = eviction_interval;
}
Evictor::EvictionTarget Evictor::DebugGetOneShotEvictionTarget() const {
Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG};
return one_shot_eviction_target_;
}
void Evictor::SetOneShotEvictionTarget(EvictionTarget target) {
Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG};
one_shot_eviction_target_ = target;
}
void Evictor::CombineOneShotEvictionTarget(EvictionTarget target) {
Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG};
one_shot_eviction_target_.pending = one_shot_eviction_target_.pending || target.pending;
one_shot_eviction_target_.level = ktl::max(one_shot_eviction_target_.level, target.level);
CheckedIncrement(&one_shot_eviction_target_.min_pages_to_free, target.min_pages_to_free);
one_shot_eviction_target_.free_pages_target =
ktl::max(one_shot_eviction_target_.free_pages_target, target.free_pages_target);
one_shot_eviction_target_.print_counts =
one_shot_eviction_target_.print_counts || target.print_counts;
}
Evictor::EvictedPageCounts Evictor::EvictOneShotFromPreloadedTarget() {
EvictedPageCounts total_evicted_counts = {};
// Create a local copy of the eviction target to operate against.
EvictionTarget target;
{
Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG};
target = one_shot_eviction_target_;
one_shot_eviction_target_ = {};
}
if (!target.pending) {
return total_evicted_counts;
}
uint64_t free_pages_before = pmm_node_->CountFreePages();
total_evicted_counts =
EvictUntilTargetsMet(target.min_pages_to_free, target.free_pages_target, target.level);
if (target.print_counts) {
printf("[EVICT]: Free memory before eviction was %zuMB and after eviction is %zuMB\n",
free_pages_before * PAGE_SIZE / MB, pmm_node_->CountFreePages() * PAGE_SIZE / MB);
if (total_evicted_counts.pager_backed > 0) {
printf("[EVICT]: Evicted %lu user pager backed pages\n", total_evicted_counts.pager_backed);
}
if (total_evicted_counts.discardable > 0) {
printf("[EVICT]: Evicted %lu pages from discardable vmos\n",
total_evicted_counts.discardable);
}
}
return total_evicted_counts;
}
uint64_t Evictor::EvictOneShotSynchronous(uint64_t min_mem_to_free, EvictionLevel eviction_level,
Output output) {
if (!IsEvictionEnabled()) {
return 0;
}
SetOneShotEvictionTarget(EvictionTarget{
.pending = true,
// No target free pages to get to. Evict based only on the min pages requested to evict.
.free_pages_target = 0,
// For synchronous eviction, set the eviction level and min target as requested.
.min_pages_to_free = min_mem_to_free / PAGE_SIZE,
.level = eviction_level,
.print_counts = (output == Output::Print),
});
auto evicted_counts = EvictOneShotFromPreloadedTarget();
return evicted_counts.pager_backed + evicted_counts.discardable;
}
void Evictor::EvictOneShotAsynchronous(uint64_t min_mem_to_free, uint64_t free_mem_target,
Evictor::EvictionLevel eviction_level,
Evictor::Output output) {
if (!IsEvictionEnabled()) {
return;
}
CombineOneShotEvictionTarget(Evictor::EvictionTarget{
.pending = true,
.free_pages_target = free_mem_target / PAGE_SIZE,
.min_pages_to_free = min_mem_to_free / PAGE_SIZE,
.level = eviction_level,
.print_counts = (output == Output::Print),
});
// Unblock the eviction thread.
eviction_signal_.Signal();
}
Evictor::EvictedPageCounts Evictor::EvictUntilTargetsMet(uint64_t min_pages_to_evict,
uint64_t free_pages_target,
EvictionLevel level) {
EvictedPageCounts total_evicted_counts = {};
if (!IsEvictionEnabled()) {
return total_evicted_counts;
}
// Wait until no eviction attempts are ongoing, so that we don't overshoot the free pages target.
no_ongoing_eviction_.Wait(Deadline::infinite());
auto signal_cleanup = fit::defer([&]() {
// Unblock any waiting eviction requests.
no_ongoing_eviction_.Signal();
});
uint64_t total_non_loaned_pages_freed = 0;
DEBUG_ASSERT(pmm_node_);
while (true) {
const uint64_t free_pages = pmm_node_->CountFreePages();
uint64_t pages_to_free = 0;
if (total_non_loaned_pages_freed < min_pages_to_evict) {
pages_to_free = min_pages_to_evict - total_non_loaned_pages_freed;
} else if (free_pages < free_pages_target) {
pages_to_free = free_pages_target - free_pages;
} else {
// The targets have been met. No more eviction is required right now.
break;
}
// Compute the desired number of discardable pages to free (vs pager-backed).
uint64_t pages_to_free_discardable = 0;
if (level == EvictionLevel::IncludeNewest) {
// If we're including newest pages too, first try to reclaim as much from discardable VMOs as
// possible.
pages_to_free_discardable = pages_to_free;
} else {
Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG};
DEBUG_ASSERT(discardable_evictions_percent_ <= 100);
pages_to_free_discardable = pages_to_free * discardable_evictions_percent_ / 100;
}
uint64_t pages_freed = EvictDiscardable(pages_to_free_discardable);
total_evicted_counts.discardable += pages_freed;
total_non_loaned_pages_freed += pages_freed;
// If we've already met the current target, continue to the next iteration of the loop.
if (pages_freed >= pages_to_free) {
continue;
}
DEBUG_ASSERT(pages_to_free > pages_freed);
// Free pager backed memory to get to |pages_to_free|.
uint64_t pages_to_free_pager_backed = pages_to_free - pages_freed;
EvictedPageCounts pages_freed_pager_backed =
EvictPagerBacked(pages_to_free_pager_backed, level);
total_evicted_counts.pager_backed += pages_freed_pager_backed.pager_backed;
total_evicted_counts.pager_backed_loaned += pages_freed_pager_backed.pager_backed_loaned;
total_non_loaned_pages_freed += pages_freed_pager_backed.pager_backed;
pages_freed += pages_freed_pager_backed.pager_backed;
// Should we fail to free any pages then we give up and consider the eviction request complete.
if (pages_freed == 0) {
break;
}
}
return total_evicted_counts;
}
uint64_t Evictor::EvictDiscardable(uint64_t target_pages) const {
if (!IsEvictionEnabled()) {
return 0;
}
list_node_t freed_list;
list_initialize(&freed_list);
// Reclaim |target_pages| from discardable vmos that have been reclaimable for at least
// |min_discardable_age_|.
zx_time_t min_age;
{
Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG};
min_age = min_discardable_age_;
}
uint64_t count = VmCowPages::ReclaimPagesFromDiscardableVmos(target_pages, min_age, &freed_list);
DEBUG_ASSERT(pmm_node_);
pmm_node_->FreeList(&freed_list);
discardable_pages_evicted.Add(count);
return count;
}
Evictor::EvictedPageCounts Evictor::EvictPagerBacked(uint64_t target_pages,
EvictionLevel eviction_level) const {
EvictedPageCounts counts = {};
if (!IsEvictionEnabled()) {
return counts;
}
list_node_t freed_list;
list_initialize(&freed_list);
// Avoid evicting from the newest queue to prevent thrashing.
const size_t lowest_evict_queue =
eviction_level == EvictionLevel::IncludeNewest
? PageQueues::kNumActiveQueues
: PageQueues::kNumPagerBacked - PageQueues::kNumOldestQueues;
// TODO(fxbug.dev/85056): Always follow the hint for now, i.e. protect hinted pages from eviction
// even in the face of OOM.
//
// Desired future behavior:
// If we're going to include newest pages, ignore eviction hints as well, i.e. also consider
// evicting pages with always_need set if we encounter them in LRU order.
const VmCowPages::EvictionHintAction hint_action = VmCowPages::EvictionHintAction::Follow;
// We stack-own loaned pages from RemovePageForEviction() to FreeList() below.
__UNINITIALIZED StackOwnedLoanedPagesInterval raii_interval;
DEBUG_ASSERT(page_queues_);
while (counts.pager_backed < target_pages) {
// TODO(rashaeqbal): The sequence of actions in PeekPagerBacked() and RemovePageForEviction()
// implicitly guarantee forward progress in this loop, so that we're not stuck trying to evict
// the same page (i.e. PeekPagerBacked keeps returning the same page). It would be nice to have
// some explicit checks here (or in PageQueues) to guarantee forward progress. Or we might want
// to use cursors to iterate the queues instead of peeking the tail each time.
if (ktl::optional<PageQueues::VmoBacklink> backlink =
page_queues_->PeekPagerBacked(lowest_evict_queue)) {
if (!backlink->cow) {
continue;
}
if (backlink->cow->RemovePageForEviction(backlink->page, backlink->offset, hint_action)) {
list_add_tail(&freed_list, &backlink->page->queue_node);
if (pmm_is_loaned(backlink->page)) {
counts.pager_backed_loaned++;
} else {
counts.pager_backed++;
}
}
} else {
break;
}
}
DEBUG_ASSERT(pmm_node_);
pmm_node_->FreeList(&freed_list);
pager_backed_pages_evicted.Add(counts.pager_backed + counts.pager_backed_loaned);
return counts;
}
void Evictor::EnableContinuousEviction(uint64_t min_mem_to_free, uint64_t free_mem_target,
EvictionLevel eviction_level, Output output) {
{
Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG};
// Combine min target with previously outstanding min target.
CheckedIncrement(&continuous_eviction_target_.min_pages_to_free, min_mem_to_free / PAGE_SIZE);
continuous_eviction_target_.free_pages_target = free_mem_target / PAGE_SIZE;
continuous_eviction_target_.level = eviction_level;
continuous_eviction_target_.print_counts = (output == Output::Print);
// .pending has no relevance here since eviction is controlled by the eviction interval.
// Configure eviction to occur at intervals of |default_eviction_interval_|.
next_eviction_interval_ = default_eviction_interval_;
}
// Unblock the eviction thread.
eviction_signal_.Signal();
}
void Evictor::DisableContinuousEviction() {
Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG};
continuous_eviction_target_ = {};
// In the next iteration of the eviction thread loop, we will see this value and block
// indefinitely.
next_eviction_interval_ = ZX_TIME_INFINITE;
}
int Evictor::EvictionThreadLoop() {
while (!eviction_thread_exiting_) {
// Block until |next_eviction_interval_| is elapsed.
zx_time_t wait_interval;
{
Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG};
wait_interval = next_eviction_interval_;
}
eviction_signal_.Wait(Deadline::no_slack(zx_time_add_duration(current_time(), wait_interval)));
if (eviction_thread_exiting_) {
break;
}
// Process a one-shot target if there is one. This is a no-op and no pages are evicted if no
// one-shot target is pending.
auto evicted = EvictOneShotFromPreloadedTarget();
// In practice either one-shot eviction or continuous eviction will be enabled at a time. We can
// skip the rest of the loop if we evicted something here, and go back to wait for another
// request. If both one-shot and continuous modes are used together, at worst we will wait for
// |next_eviction_interval_| before evicting as required by the continuous mode, which should
// still be fine.
if (evicted.discardable + evicted.pager_backed > 0) {
continue;
}
// Read control parameters into local variables under the lock.
EvictionTarget target;
{
Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG};
target = continuous_eviction_target_;
}
uint64_t free_pages_before = pmm_node_->CountFreePages();
evicted =
EvictUntilTargetsMet(target.min_pages_to_free, target.free_pages_target, target.level);
if (target.print_counts) {
printf("[EVICT]: Free memory before eviction was %zuMB and after eviction is %zuMB\n",
free_pages_before * PAGE_SIZE / MB, pmm_node_->CountFreePages() * PAGE_SIZE / MB);
if (evicted.pager_backed > 0) {
printf("[EVICT]: Evicted %lu user pager backed pages\n", evicted.pager_backed);
}
if (evicted.discardable > 0) {
printf("[EVICT]: Evicted %lu pages from discardable vmos\n", evicted.discardable);
}
}
uint64_t total_evicted = evicted.discardable + evicted.pager_backed;
// If no pages were evicted, we don't have anything to decrement from the min pages target. Skip
// the rest of the loop.
if (total_evicted == 0) {
continue;
}
{
// Update min pages target based on the number of pages evicted.
Guard<MonitoredSpinLock, IrqSave> guard{&lock_, SOURCE_TAG};
if (total_evicted < continuous_eviction_target_.min_pages_to_free) {
continuous_eviction_target_.min_pages_to_free -= total_evicted;
} else {
continuous_eviction_target_.min_pages_to_free = 0;
}
}
}
return 0;
}