blob: d2d5b8346c266ccab71ebfdf43e4fb4beb8b071e [file] [log] [blame]
// Copyright 2018 The Fuchsia Authors
//
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file or at
// https://opensource.org/licenses/MIT
#include "pmm_node.h"
#include <align.h>
#include <assert.h>
#include <inttypes.h>
#include <lib/boot-options/boot-options.h>
#include <lib/counters.h>
#include <lib/instrumentation/asan.h>
#include <lib/zircon-internal/macros.h>
#include <trace.h>
#include <new>
#include <fbl/algorithm.h>
#include <kernel/auto_preempt_disabler.h>
#include <kernel/mp.h>
#include <kernel/thread.h>
#include <pretty/cpp/sizes.h>
#include <vm/bootalloc.h>
#include <vm/physmap.h>
#include <vm/pmm.h>
#include <vm/pmm_checker.h>
#include <vm/stack_owned_loaned_pages_interval.h>
#include "vm/pmm.h"
#include "vm_priv.h"
#define LOCAL_TRACE VM_GLOBAL_TRACE(0)
using pretty::FormattedBytes;
// The number of PMM allocation calls that have failed.
KCOUNTER(pmm_alloc_failed, "vm.pmm.alloc.failed")
KCOUNTER(pmm_alloc_delayed, "vm.pmm.alloc.delayed")
namespace {
void noop_callback(void* context, uint8_t idx) {}
// Indicates whether a PMM alloc call has ever failed with ZX_ERR_NO_MEMORY. Used to trigger an OOM
// response. See |MemoryWatchdog::WorkerThread|.
ktl::atomic<bool> alloc_failed_no_mem;
} // namespace
// Poison a page |p| with value |value|. Accesses to a poisoned page via the physmap are not
// allowed and may cause faults or kASAN checks.
void PmmNode::AsanPoisonPage(vm_page_t* p, uint8_t value) {
#if __has_feature(address_sanitizer)
asan_poison_shadow(reinterpret_cast<uintptr_t>(paddr_to_physmap(p->paddr())), PAGE_SIZE, value);
#endif // __has_feature(address_sanitizer)
}
// Unpoison a page |p|. Accesses to a unpoisoned pages will not cause KASAN check failures.
void PmmNode::AsanUnpoisonPage(vm_page_t* p) {
#if __has_feature(address_sanitizer)
asan_unpoison_shadow(reinterpret_cast<uintptr_t>(paddr_to_physmap(p->paddr())), PAGE_SIZE);
#endif // __has_feature(address_sanitizer)
}
PmmNode::PmmNode() : evictor_(this) {
// Initialize the reclamation watermarks such that system never
// falls into a low memory state.
uint64_t default_watermark = 0;
InitReclamation(&default_watermark, 1, 0, nullptr, noop_callback);
}
PmmNode::~PmmNode() {}
// We disable thread safety analysis here, since this function is only called
// during early boot before threading exists.
zx_status_t PmmNode::AddArena(const pmm_arena_info_t* info) TA_NO_THREAD_SAFETY_ANALYSIS {
dprintf(INFO, "PMM: adding arena %p name '%s' base %#" PRIxPTR " size %#zx\n", info, info->name,
info->base, info->size);
// Make sure we're in early boot (ints disabled and no active CPUs according
// to the scheduler).
DEBUG_ASSERT(mp_get_active_mask() == 0);
DEBUG_ASSERT(arch_ints_disabled());
DEBUG_ASSERT(IS_PAGE_ALIGNED(info->base));
DEBUG_ASSERT(IS_PAGE_ALIGNED(info->size));
DEBUG_ASSERT(info->size > 0);
// allocate a c++ arena object
PmmArena* arena = new (boot_alloc_mem(sizeof(PmmArena))) PmmArena();
// initialize the object
auto status = arena->Init(info, this);
if (status != ZX_OK) {
// leaks boot allocator memory
arena->~PmmArena();
printf("PMM: pmm_add_arena failed to initialize arena\n");
return status;
}
// walk the arena list, inserting in ascending order of arena base address
for (auto& a : arena_list_) {
if (a.base() > arena->base()) {
arena_list_.insert(a, arena);
goto done_add;
}
}
// walked off the end, add it to the end of the list
arena_list_.push_back(arena);
done_add:
arena_cumulative_size_ += info->size;
return ZX_OK;
}
size_t PmmNode::NumArenas() const {
Guard<Mutex> guard{&lock_};
return arena_list_.size();
}
zx_status_t PmmNode::GetArenaInfo(size_t count, uint64_t i, pmm_arena_info_t* buffer,
size_t buffer_size) {
Guard<Mutex> guard{&lock_};
if ((count == 0) || (count + i > arena_list_.size()) || (i >= arena_list_.size())) {
return ZX_ERR_OUT_OF_RANGE;
}
const size_t size_required = count * sizeof(pmm_arena_info_t);
if (buffer_size < size_required) {
return ZX_ERR_BUFFER_TOO_SMALL;
}
// Skip the first |i| elements.
auto iter = arena_list_.begin();
for (uint64_t j = 0; j < i; j++) {
iter++;
}
// Copy the next |count| elements.
for (uint64_t j = 0; j < count; j++) {
buffer[j] = iter->info();
iter++;
}
return ZX_OK;
}
// called at boot time as arenas are brought online, no locks are acquired
void PmmNode::AddFreePages(list_node* list) TA_NO_THREAD_SAFETY_ANALYSIS {
LTRACEF("list %p\n", list);
uint64_t free_count = 0;
vm_page *temp, *page;
list_for_every_entry_safe (list, page, temp, vm_page, queue_node) {
list_delete(&page->queue_node);
DEBUG_ASSERT(!page->loaned);
DEBUG_ASSERT(!page->loan_cancelled);
DEBUG_ASSERT(page->is_free());
list_add_tail(&free_list_, &page->queue_node);
++free_count;
}
free_count_.fetch_add(free_count);
ASSERT(free_count_);
free_pages_evt_.Signal();
LTRACEF("free count now %" PRIu64 "\n", free_count_.load(ktl::memory_order_relaxed));
}
void PmmNode::FillFreePagesAndArm() {
Guard<Mutex> guard{&lock_};
if (!free_fill_enabled_) {
return;
}
vm_page* page;
list_for_every_entry (&free_list_, page, vm_page, queue_node) {
checker_.FillPattern(page);
}
list_for_every_entry (&free_loaned_list_, page, vm_page, queue_node) {
checker_.FillPattern(page);
}
// Now that every page has been filled, we can arm the checker.
checker_.Arm();
checker_.PrintStatus(stdout);
}
void PmmNode::CheckAllFreePages() {
Guard<Mutex> guard{&lock_};
if (!checker_.IsArmed()) {
return;
}
uint64_t free_page_count = 0;
uint64_t free_loaned_page_count = 0;
vm_page* page;
list_for_every_entry (&free_list_, page, vm_page, queue_node) {
checker_.AssertPattern(page);
++free_page_count;
}
list_for_every_entry (&free_loaned_list_, page, vm_page, queue_node) {
checker_.AssertPattern(page);
++free_loaned_page_count;
}
ASSERT(free_page_count == free_count_.load(ktl::memory_order_relaxed));
ASSERT(free_loaned_page_count == free_loaned_count_.load(ktl::memory_order_relaxed));
}
#if __has_feature(address_sanitizer)
void PmmNode::PoisonAllFreePages() {
Guard<Mutex> guard{&lock_};
vm_page* page;
list_for_every_entry (&free_list_, page, vm_page, queue_node) {
AsanPoisonPage(page, kAsanPmmFreeMagic);
};
list_for_every_entry (&free_loaned_list_, page, vm_page, queue_node) {
AsanPoisonPage(page, kAsanPmmFreeMagic);
};
}
#endif // __has_feature(address_sanitizer)
void PmmNode::EnableFreePageFilling(size_t fill_size, PmmChecker::Action action) {
Guard<Mutex> guard{&lock_};
checker_.SetFillSize(fill_size);
checker_.SetAction(action);
free_fill_enabled_ = true;
}
void PmmNode::DisableChecker() {
Guard<Mutex> guard{&lock_};
checker_.Disarm();
free_fill_enabled_ = false;
}
void PmmNode::AllocPageHelperLocked(vm_page_t* page) {
LTRACEF("allocating page %p, pa %#" PRIxPTR ", prev state %s\n", page, page->paddr(),
page_state_to_string(page->state()));
AsanUnpoisonPage(page);
DEBUG_ASSERT(page->is_free());
DEBUG_ASSERT(!page->object.is_stack_owned());
if (page->is_loaned()) {
page->object.set_stack_owner(&StackOwnedLoanedPagesInterval::current());
// We want the set_stack_owner() to be visible before set_state(), but we don't need to make
// set_state() a release just for the benefit of loaned pages, so we use this fence.
ktl::atomic_thread_fence(ktl::memory_order_release);
}
page->set_state(vm_page_state::ALLOC);
if (unlikely(free_fill_enabled_)) {
checker_.AssertPattern(page);
}
}
zx_status_t PmmNode::AllocPage(uint alloc_flags, vm_page_t** page_out, paddr_t* pa_out) {
DEBUG_ASSERT(Thread::Current::memory_allocation_state().IsEnabled());
AutoPreemptDisabler preempt_disable;
Guard<Mutex> guard{&lock_};
// If the caller sets PMM_ALLOC_FLAG_MUST_BORROW, the caller must also set
// PMM_ALLOC_FLAG_CAN_BORROW, and must not set PMM_ALLOC_FLAG_CAN_WAIT.
DEBUG_ASSERT(
!(alloc_flags & PMM_ALLOC_FLAG_MUST_BORROW) ||
((alloc_flags & PMM_ALLOC_FLAG_CAN_BORROW) && !((alloc_flags & PMM_ALLOC_FLAG_CAN_WAIT))));
const bool can_borrow = pmm_physical_page_borrowing_config()->is_any_borrowing_enabled() &&
!!(alloc_flags & PMM_ALLOC_FLAG_CAN_BORROW);
const bool must_borrow = can_borrow && !!(alloc_flags & PMM_ALLOC_FLAG_MUST_BORROW);
const bool use_loaned_list = can_borrow && (!list_is_empty(&free_loaned_list_) || must_borrow);
list_node* const which_list = use_loaned_list ? &free_loaned_list_ : &free_list_;
// Note that we do not care if the allocation is happening from the loaned list or not since if
// we are in the OOM state we still want to preference those loaned pages to allocations that
// cannot be delayed.
if ((alloc_flags & PMM_ALLOC_FLAG_CAN_WAIT) && InOomStateLocked() && !never_return_should_wait_) {
pmm_alloc_delayed.Add(1);
return ZX_ERR_SHOULD_WAIT;
}
vm_page* page = list_remove_head_type(which_list, vm_page, queue_node);
if (!page) {
if (!must_borrow) {
// Allocation failures from the regular free list are likely to become user-visible.
ReportAllocFailure();
}
return ZX_ERR_NO_MEMORY;
}
DEBUG_ASSERT(can_borrow || !page->is_loaned());
AllocPageHelperLocked(page);
if (use_loaned_list) {
DecrementFreeLoanedCountLocked(1);
} else {
DecrementFreeCountLocked(1);
}
if (pa_out) {
*pa_out = page->paddr();
}
if (page_out) {
*page_out = page;
}
return ZX_OK;
}
zx_status_t PmmNode::AllocPages(size_t count, uint alloc_flags, list_node* list) {
LTRACEF("count %zu\n", count);
DEBUG_ASSERT(Thread::Current::memory_allocation_state().IsEnabled());
// list must be initialized prior to calling this
DEBUG_ASSERT(list);
if (unlikely(count == 0)) {
return ZX_OK;
} else if (count == 1) {
vm_page* page;
zx_status_t status = AllocPage(alloc_flags, &page, nullptr);
if (likely(status == ZX_OK)) {
list_add_tail(list, &page->queue_node);
}
return status;
}
// If the caller sets PMM_ALLOC_FLAG_MUST_BORROW, the caller must also set
// PMM_ALLOC_FLAG_CAN_BORROW, and must not set PMM_ALLOC_FLAG_CAN_WAIT.
DEBUG_ASSERT(
!(alloc_flags & PMM_ALLOC_FLAG_MUST_BORROW) ||
((alloc_flags & PMM_ALLOC_FLAG_CAN_BORROW) && !((alloc_flags & PMM_ALLOC_FLAG_CAN_WAIT))));
const bool can_borrow = pmm_physical_page_borrowing_config()->is_any_borrowing_enabled() &&
!!(alloc_flags & PMM_ALLOC_FLAG_CAN_BORROW);
const bool must_borrow = can_borrow && !!(alloc_flags & PMM_ALLOC_FLAG_MUST_BORROW);
AutoPreemptDisabler preempt_disable;
Guard<Mutex> guard{&lock_};
uint64_t free_count;
if (must_borrow) {
free_count = 0;
} else {
free_count = free_count_.load(ktl::memory_order_relaxed);
}
uint64_t available_count = free_count;
uint64_t free_loaned_count = 0;
if (can_borrow) {
free_loaned_count = free_loaned_count_.load(ktl::memory_order_relaxed);
available_count += free_loaned_count;
}
if (unlikely(count > available_count)) {
if ((alloc_flags & PMM_ALLOC_FLAG_CAN_WAIT) && !never_return_should_wait_) {
pmm_alloc_delayed.Add(1);
return ZX_ERR_SHOULD_WAIT;
}
if (!must_borrow) {
// Allocation failures from the regular free list are likely to become user-visible.
ReportAllocFailure();
}
return ZX_ERR_NO_MEMORY;
}
// Prefer to allocate from loaned, if allowed by this allocation. If loaned is not allowed by
// this allocation, free_loaned_count will be zero here.
DEBUG_ASSERT(can_borrow || !free_loaned_count);
DEBUG_ASSERT(!must_borrow || !free_count);
uint64_t from_loaned_free = ktl::min(count, free_loaned_count);
uint64_t from_free = count - from_loaned_free;
DecrementFreeCountLocked(from_free);
// For simplicity of oom state detection we do this check after decrementing the free count, since
// the error case is unlikely and not performance critical.
// Even if no pages are being requested from the regular free list (if loaned pages can be used)
// we still fail in the oom state since we would prefer those loaned pages to be used to fulfill
// allocations that cannot be delayed.
if ((alloc_flags & PMM_ALLOC_FLAG_CAN_WAIT) && InOomStateLocked() && !never_return_should_wait_) {
IncrementFreeCountLocked(from_free);
pmm_alloc_delayed.Add(1);
return ZX_ERR_SHOULD_WAIT;
}
DecrementFreeLoanedCountLocked(from_loaned_free);
do {
DEBUG_ASSERT(count == from_loaned_free + from_free);
list_node* which_list;
size_t which_count;
if (can_borrow && !list_is_empty(&free_loaned_list_)) {
which_list = &free_loaned_list_;
which_count = from_loaned_free;
from_loaned_free = 0;
} else {
DEBUG_ASSERT(!must_borrow);
which_list = &free_list_;
which_count = from_free;
from_free = 0;
}
count -= which_count;
DEBUG_ASSERT(which_count > 0);
auto node = which_list;
while (which_count > 0) {
node = list_next(which_list, node);
DEBUG_ASSERT(can_borrow || !containerof(node, vm_page, queue_node)->is_loaned());
AllocPageHelperLocked(containerof(node, vm_page, queue_node));
--which_count;
}
list_node tmp_list = LIST_INITIAL_VALUE(tmp_list);
list_split_after(which_list, node, &tmp_list);
if (list_is_empty(list)) {
list_move(which_list, list);
} else {
list_splice_after(which_list, list_peek_tail(list));
}
list_move(&tmp_list, which_list);
DEBUG_ASSERT(count == from_loaned_free + from_free);
} while (count > 0);
return ZX_OK;
}
zx_status_t PmmNode::AllocRange(paddr_t address, size_t count, list_node* list) {
LTRACEF("address %#" PRIxPTR ", count %zu\n", address, count);
DEBUG_ASSERT(Thread::Current::memory_allocation_state().IsEnabled());
// list must be initialized prior to calling this
DEBUG_ASSERT(list);
// On error scenarios we will free the list, so make sure the caller didn't leave anything in
// there.
DEBUG_ASSERT(list_is_empty(list));
size_t allocated = 0;
if (count == 0) {
return ZX_OK;
}
address = ROUNDDOWN(address, PAGE_SIZE);
AutoPreemptDisabler preempt_disable;
Guard<Mutex> guard{&lock_};
// walk through the arenas, looking to see if the physical page belongs to it
for (auto& a : arena_list_) {
for (; allocated < count && a.address_in_arena(address); address += PAGE_SIZE) {
vm_page_t* page = a.FindSpecific(address);
if (!page) {
break;
}
if (!page->is_free()) {
break;
}
// We never allocate loaned pages for caller of AllocRange()
if (page->loaned) {
break;
}
list_delete(&page->queue_node);
AllocPageHelperLocked(page);
list_add_tail(list, &page->queue_node);
allocated++;
DecrementFreeCountLocked(1);
}
if (allocated == count) {
break;
}
}
if (allocated != count) {
// we were not able to allocate the entire run, free these pages
FreeListLocked(list);
return ZX_ERR_NOT_FOUND;
}
return ZX_OK;
}
zx_status_t PmmNode::AllocContiguous(const size_t count, uint alloc_flags, uint8_t alignment_log2,
paddr_t* pa, list_node* list) {
DEBUG_ASSERT(Thread::Current::memory_allocation_state().IsEnabled());
LTRACEF("count %zu, align %u\n", count, alignment_log2);
if (count == 0) {
return ZX_OK;
}
if (alignment_log2 < PAGE_SIZE_SHIFT) {
alignment_log2 = PAGE_SIZE_SHIFT;
}
DEBUG_ASSERT(!(alloc_flags & (PMM_ALLOC_FLAG_CAN_BORROW | PMM_ALLOC_FLAG_MUST_BORROW |
PMM_ALLOC_FLAG_CAN_WAIT)));
// pa and list must be valid pointers
DEBUG_ASSERT(pa);
DEBUG_ASSERT(list);
AutoPreemptDisabler preempt_disable;
Guard<Mutex> guard{&lock_};
for (auto& a : arena_list_) {
vm_page_t* p = a.FindFreeContiguous(count, alignment_log2);
if (!p) {
continue;
}
*pa = p->paddr();
// remove the pages from the run out of the free list
for (size_t i = 0; i < count; i++, p++) {
DEBUG_ASSERT_MSG(p->is_free(), "p %p state %u\n", p, static_cast<uint32_t>(p->state()));
// Loaned pages are never returned by FindFreeContiguous() above.
DEBUG_ASSERT(!p->loaned);
DEBUG_ASSERT(list_in_list(&p->queue_node));
list_delete(&p->queue_node);
p->set_state(vm_page_state::ALLOC);
DecrementFreeCountLocked(1);
AsanUnpoisonPage(p);
checker_.AssertPattern(p);
list_add_tail(list, &p->queue_node);
}
return ZX_OK;
}
// We could potentially move contents of non-pinned pages out of the way for critical contiguous
// allocations, but for now...
LTRACEF("couldn't find run\n");
return ZX_ERR_NOT_FOUND;
}
void PmmNode::FreePageHelperLocked(vm_page* page) {
LTRACEF("page %p state %zu paddr %#" PRIxPTR "\n", page, VmPageStateIndex(page->state()),
page->paddr());
DEBUG_ASSERT(!page->is_free());
DEBUG_ASSERT(page->state() != vm_page_state::OBJECT || page->object.pin_count == 0);
// mark it free
page->set_state(vm_page_state::FREE);
// Coming from OBJECT or ALLOC, this will only be true if the page was loaned (and may still be
// loaned, but doesn't have to be currently loaned if the contiguous VMO the page was loaned from
// was deleted during stack ownership).
//
// Coming from a state other than OBJECT or ALLOC, this currently won't be true, but if it were
// true in future, it would only be because a state other than OBJECT or ALLOC has a (future)
// field overlapping, in which case we do want to clear the invalid stack owner pointer value.
// We'll be ok to clear this invalid stack owner after setting FREE previously (instead of
// clearing before) because the stack owner is only read elsewhere for pages with an underlying
// contiguous VMO owner (whether actually loaned at the time or not), and pages with an underlying
// contiguous VMO owner can only be in FREE, ALLOC, OBJECT states, which all have this field, so
// reading an invalid stack owner pointer elsewhere won't happen (there's a magic number canary
// just in case though). We could instead clear out any invalid stack owner pointer before
// setting FREE above and have a shorter comment here, but there's no actual need for the extra
// "if", so we just let this "if" handle it (especially since this whole paragraph is a
// hypothetical future since there aren't any overlapping fields yet as of this comment).
if (page->object.is_stack_owned()) {
// Make FREE visible before lack of stack owner.
ktl::atomic_thread_fence(ktl::memory_order_release);
page->object.clear_stack_owner();
}
if (unlikely(free_fill_enabled_)) {
checker_.FillPattern(page);
}
AsanPoisonPage(page, kAsanPmmFreeMagic);
}
void PmmNode::FreePage(vm_page* page) {
AutoPreemptDisabler preempt_disable;
Guard<Mutex> guard{&lock_};
// pages freed individually shouldn't be in a queue
DEBUG_ASSERT(!list_in_list(&page->queue_node));
FreePageHelperLocked(page);
list_node* which_list = nullptr;
if (!page->loaned) {
IncrementFreeCountLocked(1);
which_list = &free_list_;
} else if (!page->loan_cancelled) {
IncrementFreeLoanedCountLocked(1);
which_list = &free_loaned_list_;
}
// Add the page to the appropriate free queue, unless loan_cancelled. The loan_cancelled pages
// don't go in any free queue because they shouldn't get re-used until reclaimed by their
// underlying contiguous VMO or until that underlying contiguous VMO is deleted.
DEBUG_ASSERT(which_list || page->loan_cancelled);
if (which_list) {
if constexpr (!__has_feature(address_sanitizer)) {
list_add_head(which_list, &page->queue_node);
} else {
// If address sanitizer is enabled, put the page at the tail to maximize reuse distance.
list_add_tail(which_list, &page->queue_node);
}
}
}
void PmmNode::FreeListLocked(list_node* list) {
DEBUG_ASSERT(list);
// process list backwards so the head is as hot as possible
uint64_t count = 0;
uint64_t loaned_count = 0;
list_node freed_loaned_list = LIST_INITIAL_VALUE(freed_loaned_list);
{ // scope page
vm_page* page = list_peek_tail_type(list, vm_page_t, queue_node);
while (page) {
FreePageHelperLocked(page);
vm_page_t* next_page = list_prev_type(list, &page->queue_node, vm_page_t, queue_node);
if (page->loaned) {
// Remove from |list| and possibly put on freed_loaned_list instead, to route to the correct
// free list, or no free list if loan_cancelled.
list_delete(&page->queue_node);
if (!page->loan_cancelled) {
list_add_head(&freed_loaned_list, &page->queue_node);
++loaned_count;
}
} else {
count++;
}
page = next_page;
}
} // end scope page
if constexpr (!__has_feature(address_sanitizer)) {
// splice list at the head of free_list_; free_loaned_list_.
list_splice_after(list, &free_list_);
list_splice_after(&freed_loaned_list, &free_loaned_list_);
} else {
// If address sanitizer is enabled, put the pages at the tail to maximize reuse distance.
if (!list_is_empty(&free_list_)) {
list_splice_after(list, list_peek_tail(&free_list_));
} else {
list_splice_after(list, &free_list_);
}
if (!list_is_empty(&free_loaned_list_)) {
list_splice_after(&freed_loaned_list, list_peek_tail(&free_loaned_list_));
} else {
list_splice_after(&freed_loaned_list, &free_loaned_list_);
}
}
IncrementFreeCountLocked(count);
IncrementFreeLoanedCountLocked(loaned_count);
}
void PmmNode::FreeList(list_node* list) {
AutoPreemptDisabler preempt_disable;
Guard<Mutex> guard{&lock_};
FreeListLocked(list);
}
bool PmmNode::InOomStateLocked() {
if (mem_avail_state_cur_index_ == 0) {
return true;
}
// See pmm_check_alloc_random_should_wait in pmm.cc for an assertion that random should wait is
// only enabled if DEBUG_ASSERT_IMPLEMENTED.
if constexpr (DEBUG_ASSERT_IMPLEMENTED) {
// Randomly try to make 10% of allocations delayed allocations.
if (gBootOptions->pmm_alloc_random_should_wait && rand() < (RAND_MAX / 10)) {
return true;
}
}
return false;
}
uint64_t PmmNode::CountFreePages() const TA_NO_THREAD_SAFETY_ANALYSIS {
return free_count_.load(ktl::memory_order_relaxed);
}
uint64_t PmmNode::CountLoanedFreePages() const TA_NO_THREAD_SAFETY_ANALYSIS {
return free_loaned_count_.load(ktl::memory_order_relaxed);
}
uint64_t PmmNode::CountLoanedNotFreePages() const TA_NO_THREAD_SAFETY_ANALYSIS {
AutoPreemptDisabler preempt_disable;
Guard<Mutex> guard{&lock_};
return loaned_count_.load(ktl::memory_order_relaxed) -
free_loaned_count_.load(ktl::memory_order_relaxed);
}
uint64_t PmmNode::CountLoanedPages() const TA_NO_THREAD_SAFETY_ANALYSIS {
return loaned_count_.load(ktl::memory_order_relaxed);
}
uint64_t PmmNode::CountLoanCancelledPages() const TA_NO_THREAD_SAFETY_ANALYSIS {
return loan_cancelled_count_.load(ktl::memory_order_relaxed);
}
uint64_t PmmNode::CountTotalBytes() const TA_NO_THREAD_SAFETY_ANALYSIS {
return arena_cumulative_size_;
}
void PmmNode::DumpFree() const TA_NO_THREAD_SAFETY_ANALYSIS {
auto megabytes_free = CountFreePages() * PAGE_SIZE / MB;
printf(" %zu free MBs\n", megabytes_free);
}
void PmmNode::Dump(bool is_panic) const {
// No lock analysis here, as we want to just go for it in the panic case without the lock.
auto dump = [this]() TA_NO_THREAD_SAFETY_ANALYSIS {
uint64_t free_count = free_count_.load(ktl::memory_order_relaxed);
uint64_t free_loaned_count = free_loaned_count_.load(ktl::memory_order_relaxed);
printf(
"pmm node %p: free_count %zu (%zu bytes), free_loaned_count: %zu (%zu bytes), total size "
"%zu\n",
this, free_count, free_count * PAGE_SIZE, free_loaned_count, free_loaned_count * PAGE_SIZE,
arena_cumulative_size_);
for (auto& a : arena_list_) {
a.Dump(false, false);
}
};
if (is_panic) {
dump();
} else {
Guard<Mutex> guard{&lock_};
dump();
}
}
zx_status_t PmmNode::InitReclamation(const uint64_t* watermarks, uint8_t watermark_count,
uint64_t debounce, void* context,
mem_avail_state_updated_callback_t callback) {
if (watermark_count > MAX_WATERMARK_COUNT) {
return ZX_ERR_INVALID_ARGS;
}
AutoPreemptDisabler preempt_disable;
Guard<Mutex> guard{&lock_};
uint64_t tmp[MAX_WATERMARK_COUNT];
uint64_t tmp_debounce = fbl::round_up(debounce, static_cast<uint64_t>(PAGE_SIZE)) / PAGE_SIZE;
for (uint8_t i = 0; i < watermark_count; i++) {
tmp[i] = watermarks[i] / PAGE_SIZE;
if (i > 0) {
if (tmp[i] <= tmp[i - 1]) {
return ZX_ERR_INVALID_ARGS;
}
} else {
if (tmp[i] < tmp_debounce) {
return ZX_ERR_INVALID_ARGS;
}
}
}
mem_avail_state_watermark_count_ = watermark_count;
mem_avail_state_debounce_ = tmp_debounce;
mem_avail_state_context_ = context;
mem_avail_state_callback_ = callback;
memcpy(mem_avail_state_watermarks_, tmp, sizeof(mem_avail_state_watermarks_));
static_assert(sizeof(tmp) == sizeof(mem_avail_state_watermarks_));
UpdateMemAvailStateLocked();
return ZX_OK;
}
void PmmNode::UpdateMemAvailStateLocked() {
// Find the smallest watermark which is greater than the number of free pages.
uint8_t target = mem_avail_state_watermark_count_;
for (uint8_t i = 0; i < mem_avail_state_watermark_count_; i++) {
if (mem_avail_state_watermarks_[i] > free_count_.load(ktl::memory_order_relaxed)) {
target = i;
break;
}
}
SetMemAvailStateLocked(target);
}
void PmmNode::SetMemAvailStateLocked(uint8_t mem_avail_state) {
mem_avail_state_cur_index_ = mem_avail_state;
if (mem_avail_state_cur_index_ == 0) {
if (likely(!never_return_should_wait_)) {
free_pages_evt_.Unsignal();
}
} else {
free_pages_evt_.Signal();
}
if (mem_avail_state_cur_index_ > 0) {
// If there is a smaller watermark, then we transition into that state when the
// number of free pages drops more than |mem_avail_state_debounce_| pages into that state.
mem_avail_state_lower_bound_ =
mem_avail_state_watermarks_[mem_avail_state_cur_index_ - 1] - mem_avail_state_debounce_;
} else {
// There is no smaller state, so we can't ever transition down.
mem_avail_state_lower_bound_ = 0;
}
if (mem_avail_state_cur_index_ < mem_avail_state_watermark_count_) {
// If there is a larger watermark, then we transition out of the current state when
// the number of free pages exceedes the current state's watermark by at least
// |mem_avail_state_debounce_|.
mem_avail_state_upper_bound_ =
mem_avail_state_watermarks_[mem_avail_state_cur_index_] + mem_avail_state_debounce_;
} else {
// There is no larger state, so we can't ever transition up.
mem_avail_state_upper_bound_ = UINT64_MAX / PAGE_SIZE;
}
mem_avail_state_callback_(mem_avail_state_context_, mem_avail_state_cur_index_);
}
void PmmNode::DumpMemAvailState() const {
Guard<Mutex> guard{&lock_};
printf("watermarks: [");
for (unsigned i = 0; i < mem_avail_state_watermark_count_; i++) {
printf("%s%s", FormattedBytes(mem_avail_state_watermarks_[i] * PAGE_SIZE).c_str(),
i + 1 == mem_avail_state_watermark_count_ ? "]\n" : ", ");
}
printf("debounce: %s\n", FormattedBytes(mem_avail_state_debounce_ * PAGE_SIZE).c_str());
printf("current state: %u\n", mem_avail_state_cur_index_);
printf("current bounds: [%s, %s]\n",
FormattedBytes(mem_avail_state_lower_bound_ * PAGE_SIZE).c_str(),
FormattedBytes(mem_avail_state_upper_bound_ * PAGE_SIZE).c_str());
printf("free memory: %s\n", FormattedBytes(free_count_ * PAGE_SIZE).c_str());
}
uint64_t PmmNode::DebugNumPagesTillMemState(uint8_t mem_state_idx) const {
Guard<Mutex> guard{&lock_};
if (mem_avail_state_cur_index_ <= mem_state_idx) {
// Already in mem_state_idx, or in a state with less available memory than mem_state_idx.
return 0;
}
// We need to either get free_pages below mem_avail_state_watermarks_[mem_state_idx] or, if we are
// in state (mem_state_idx + 1), we also need to clear the debounce amount. For simplicity we just
// always allocate the debounce amount as well.
uint64_t trigger = mem_avail_state_watermarks_[mem_state_idx] - mem_avail_state_debounce_;
return (free_count_ - trigger);
}
uint8_t PmmNode::DebugMaxMemAvailState() const {
Guard<Mutex> guard{&lock_};
return mem_avail_state_watermark_count_;
}
void PmmNode::DebugMemAvailStateCallback(uint8_t mem_state_idx) const {
Guard<Mutex> guard{&lock_};
if (mem_state_idx >= mem_avail_state_watermark_count_) {
return;
}
// Invoke callback for the requested state without allocating additional memory, or messing with
// any of the internal memory state tracking counters.
mem_avail_state_callback_(mem_avail_state_context_, mem_state_idx);
}
void PmmNode::StopReturningShouldWait() {
Guard<Mutex> guard{&lock_};
never_return_should_wait_ = true;
free_pages_evt_.Signal();
}
int64_t PmmNode::get_alloc_failed_count() { return pmm_alloc_failed.SumAcrossAllCpus(); }
bool PmmNode::has_alloc_failed_no_mem() {
return alloc_failed_no_mem.load(ktl::memory_order_relaxed);
}
void PmmNode::BeginLoan(list_node* page_list) {
DEBUG_ASSERT(page_list);
AutoPreemptDisabler preempt_disable;
Guard<Mutex> guard{&lock_};
uint64_t loaned_count = 0;
vm_page* page;
list_for_every_entry (page_list, page, vm_page, queue_node) {
DEBUG_ASSERT(!page->loaned);
DEBUG_ASSERT(!page->is_free());
page->loaned = true;
++loaned_count;
DEBUG_ASSERT(!page->loan_cancelled);
}
IncrementLoanedCountLocked(loaned_count);
// Callers of BeginLoan() generally won't want the pages loaned to them; the intent is to loan to
// the rest of the system, so go ahead and free also. Some callers will basically choose between
// pmm_begin_loan() and pmm_free().
FreeListLocked(page_list);
}
void PmmNode::CancelLoan(paddr_t address, size_t count) {
AutoPreemptDisabler preempt_disable;
Guard<Mutex> guard{&lock_};
DEBUG_ASSERT(IS_PAGE_ALIGNED(address));
paddr_t end = address + count * PAGE_SIZE;
DEBUG_ASSERT(address <= end);
uint64_t loan_cancelled_count = 0;
uint64_t no_longer_free_loaned_count = 0;
ForPagesInPhysRangeLocked(address, count,
[&loan_cancelled_count, &no_longer_free_loaned_count](vm_page_t* page) {
// We can assert this because of PageSource's overlapping request
// handling.
DEBUG_ASSERT(page->is_loaned());
bool was_cancelled = page->loan_cancelled;
// We can assert this because of PageSource's overlapping request
// handling.
DEBUG_ASSERT(!was_cancelled);
page->loan_cancelled = true;
++loan_cancelled_count;
if (page->is_free()) {
// Currently in free_loaned_list_.
DEBUG_ASSERT(list_in_list(&page->queue_node));
// Remove from free_loaned_list_ to prevent any new use until
// after EndLoan.
list_delete(&page->queue_node);
no_longer_free_loaned_count++;
}
});
IncrementLoanCancelledCountLocked(loan_cancelled_count);
DecrementFreeLoanedCountLocked(no_longer_free_loaned_count);
}
void PmmNode::EndLoan(paddr_t address, size_t count, list_node* page_list) {
AutoPreemptDisabler preempt_disable;
Guard<Mutex> guard{&lock_};
DEBUG_ASSERT(IS_PAGE_ALIGNED(address));
paddr_t end = address + count * PAGE_SIZE;
DEBUG_ASSERT(address <= end);
uint64_t loan_ended_count = 0;
ForPagesInPhysRangeLocked(address, count, [this, &page_list, &loan_ended_count](vm_page_t* page) {
AssertHeld(lock_);
// PageSource serializing such that there's only one request to PageProvider in flight at a time
// for any given page is the main reason we can assert these instead of needing to check these.
DEBUG_ASSERT(page->is_loaned());
DEBUG_ASSERT(page->is_loan_cancelled());
DEBUG_ASSERT(page->is_free());
// Already not in free_loaned_list_ (because loan_cancelled already).
DEBUG_ASSERT(!list_in_list(&page->queue_node));
page->loaned = false;
page->loan_cancelled = false;
++loan_ended_count;
AllocPageHelperLocked(page);
list_add_tail(page_list, &page->queue_node);
});
DecrementLoanCancelledCountLocked(loan_ended_count);
DecrementLoanedCountLocked(loan_ended_count);
}
void PmmNode::DeleteLender(paddr_t address, size_t count) {
AutoPreemptDisabler preempt_disable;
Guard<Mutex> guard{&lock_};
DEBUG_ASSERT(IS_PAGE_ALIGNED(address));
paddr_t end = address + count * PAGE_SIZE;
DEBUG_ASSERT(address <= end);
uint64_t removed_free_loaned_count = 0;
uint64_t added_free_count = 0;
uint64_t loan_ended_count = 0;
uint64_t loan_un_cancelled_count = 0;
ForPagesInPhysRangeLocked(address, count,
[this, &removed_free_loaned_count, &loan_un_cancelled_count,
&added_free_count, &loan_ended_count](vm_page_t* page) {
DEBUG_ASSERT(page->loaned);
if (page->is_free() && !page->loan_cancelled) {
// Remove from free_loaned_list_.
list_delete(&page->queue_node);
++removed_free_loaned_count;
}
if (page->loan_cancelled) {
++loan_un_cancelled_count;
}
if (page->is_free()) {
// add it to the free queue
if constexpr (!__has_feature(address_sanitizer)) {
list_add_head(&free_list_, &page->queue_node);
} else {
// If address sanitizer is enabled, put the page at the tail to
// maximize reuse distance.
list_add_tail(&free_list_, &page->queue_node);
}
added_free_count++;
}
page->loan_cancelled = false;
page->loaned = false;
++loan_ended_count;
});
DecrementFreeLoanedCountLocked(removed_free_loaned_count);
IncrementFreeCountLocked(added_free_count);
DecrementLoanedCountLocked(loan_ended_count);
DecrementLoanCancelledCountLocked(loan_un_cancelled_count);
}
bool PmmNode::IsLoaned(vm_page_t* page) {
AutoPreemptDisabler preempt_disable;
Guard<Mutex> guard{&lock_};
return page->loaned;
}
template <typename F>
void PmmNode::ForPagesInPhysRangeLocked(paddr_t start, size_t count, F func) {
DEBUG_ASSERT(IS_PAGE_ALIGNED(start));
// We only intend ForPagesInRange() to be used after arenas have been added to the global
// pmm_node.
DEBUG_ASSERT(mp_get_active_mask() != 0);
if (unlikely(arena_list_.is_empty())) {
// We're in a unit test, using ManagedPmmNode which has no arenas. So fall back to the global
// pmm_node (which has at least one arena) to find the actual vm_page_t for each page.
//
// TODO: Make ManagedPmmNode have a more real arena, possibly by allocating a contiguous VMO and
// creating an arena from that.
paddr_t end = start + count * PAGE_SIZE;
for (paddr_t iter = start; iter < end; iter += PAGE_SIZE) {
vm_page_t* page = paddr_to_vm_page(iter);
func(page);
}
return;
}
// We have at least one arena, so use arena_list_ directly.
paddr_t end = start + count * PAGE_SIZE;
DEBUG_ASSERT(start <= end);
paddr_t page_addr = start;
for (auto& a : arena_list_) {
for (; page_addr < end && a.address_in_arena(page_addr); page_addr += PAGE_SIZE) {
vm_page_t* page = a.FindSpecific(page_addr);
DEBUG_ASSERT(page);
DEBUG_ASSERT(page_addr == page->paddr());
func(page);
}
if (page_addr == end) {
break;
}
}
DEBUG_ASSERT(page_addr == end);
}
void PmmNode::ReportAllocFailure() {
kcounter_add(pmm_alloc_failed, 1);
// Update before signaling the MemoryWatchdog to ensure it observes the update.
//
// |alloc_failed_no_mem| latches so only need to invoke the callback once. We could call it on
// every failure, but that's wasteful and we don't want to spam any underlying Event (or the
// thread lock or the MemoryWatchdog).
const bool first_time = !alloc_failed_no_mem.exchange(true, ktl::memory_order_relaxed);
if (first_time) {
// Note, the |cur_state| value passed to the callback doesn't really matter because all we're
// trying to do here is signal and unblock the MemoryWatchdog's worker thread.
mem_avail_state_callback_(mem_avail_state_context_, mem_avail_state_cur_index_);
}
}