blob: 31dfb5d6d0e8ad95a010d303df8a6d375c4590f9 [file] [log] [blame]
// Copyright 2018 The Fuchsia Authors
//
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file or at
// https://opensource.org/licenses/MIT
#include "pmm_node.h"
#include <align.h>
#include <assert.h>
#include <inttypes.h>
#include <lib/boot-options/boot-options.h>
#include <lib/counters.h>
#include <lib/crypto/global_prng.h>
#include <lib/instrumentation/asan.h>
#include <lib/zircon-internal/macros.h>
#include <trace.h>
#include <new>
#include <fbl/algorithm.h>
#include <kernel/auto_preempt_disabler.h>
#include <kernel/mp.h>
#include <kernel/thread.h>
#include <pretty/cpp/sizes.h>
#include <vm/physmap.h>
#include <vm/pmm.h>
#include <vm/pmm_checker.h>
#include <vm/stack_owned_loaned_pages_interval.h>
#include "vm/pmm.h"
#include "vm_priv.h"
#define LOCAL_TRACE VM_GLOBAL_TRACE(0)
using pretty::FormattedBytes;
// The number of PMM allocation calls that have failed.
KCOUNTER(pmm_alloc_failed, "vm.pmm.alloc.failed")
KCOUNTER(pmm_alloc_delayed, "vm.pmm.alloc.delayed")
namespace {
// Indicates whether a PMM alloc call has ever failed with ZX_ERR_NO_MEMORY. Used to trigger an OOM
// response. See |MemoryWatchdog::WorkerThread|.
ktl::atomic<bool> alloc_failed_no_mem;
// Poison a page |p| with value |value|. Accesses to a poisoned page via the physmap are not
// allowed and may cause faults or kASAN checks.
void AsanPoisonPage(vm_page_t* p, uint8_t value) {
#if __has_feature(address_sanitizer)
asan_poison_shadow(reinterpret_cast<uintptr_t>(paddr_to_physmap(p->paddr())), PAGE_SIZE, value);
#endif // __has_feature(address_sanitizer)
}
// Unpoison a page |p|. Accesses to a unpoisoned pages will not cause KASAN check failures.
void AsanUnpoisonPage(vm_page_t* p) {
#if __has_feature(address_sanitizer)
asan_unpoison_shadow(reinterpret_cast<uintptr_t>(paddr_to_physmap(p->paddr())), PAGE_SIZE);
#endif // __has_feature(address_sanitizer)
}
} // namespace
// We disable thread safety analysis here, since this function is only called
// during early boot before threading exists.
zx_status_t PmmNode::AddArena(const pmm_arena_info_t* info) TA_NO_THREAD_SAFETY_ANALYSIS {
dprintf(INFO, "PMM: adding arena %p name '%s' base %#" PRIxPTR " size %#zx\n", info, info->name,
info->base, info->size);
// Make sure we're in early boot (ints disabled and no active CPUs according
// to the scheduler).
DEBUG_ASSERT(mp_get_active_mask() == 0);
DEBUG_ASSERT(arch_ints_disabled());
DEBUG_ASSERT(IS_PAGE_ALIGNED(info->base));
DEBUG_ASSERT(IS_PAGE_ALIGNED(info->size));
DEBUG_ASSERT(info->size > 0);
// Allocate an arena object out of the array inside PmmNode
if (used_arena_count_ >= kArenaCount) {
printf("PMM: pmm_add_arena failed to allocate arena\n");
return ZX_ERR_NO_MEMORY;
}
PmmArena* arena = &arenas_[used_arena_count_++];
// Initialize the object.
auto status = arena->Init(info, this);
if (status != ZX_OK) {
used_arena_count_--;
printf("PMM: pmm_add_arena failed to initialize arena\n");
return status;
}
arena_cumulative_size_ += info->size;
return ZX_OK;
}
zx_status_t PmmNode::GetArenaInfo(size_t count, uint64_t i, pmm_arena_info_t* buffer,
size_t buffer_size) {
Guard<Mutex> guard{&lock_};
if ((count == 0) || (count + i > active_arenas().size()) || (i >= active_arenas().size())) {
return ZX_ERR_OUT_OF_RANGE;
}
const size_t size_required = count * sizeof(pmm_arena_info_t);
if (buffer_size < size_required) {
return ZX_ERR_BUFFER_TOO_SMALL;
}
// Skip the first |i| elements.
auto iter = active_arenas().begin();
for (uint64_t j = 0; j < i; j++) {
iter++;
}
// Copy the next |count| elements.
for (uint64_t j = 0; j < count; j++) {
buffer[j] = iter->info();
iter++;
}
return ZX_OK;
}
// called at boot time as arenas are brought online, no locks are acquired
void PmmNode::AddFreePages(list_node* list) TA_NO_THREAD_SAFETY_ANALYSIS {
LTRACEF("list %p\n", list);
uint64_t free_count = 0;
vm_page *temp, *page;
list_for_every_entry_safe (list, page, temp, vm_page, queue_node) {
list_delete(&page->queue_node);
DEBUG_ASSERT(!page->is_loaned());
DEBUG_ASSERT(!page->is_loan_cancelled());
DEBUG_ASSERT(page->is_free());
list_add_tail(&free_list_, &page->queue_node);
++free_count;
}
free_count_.fetch_add(free_count);
ASSERT(free_count_);
free_pages_evt_.Signal();
LTRACEF("free count now %" PRIu64 "\n", free_count_.load(ktl::memory_order_relaxed));
}
void PmmNode::FillFreePagesAndArm() {
Guard<Mutex> guard{&lock_};
if (!free_fill_enabled_) {
return;
}
vm_page* page;
list_for_every_entry (&free_list_, page, vm_page, queue_node) {
checker_.FillPattern(page);
}
list_for_every_entry (&free_loaned_list_, page, vm_page, queue_node) {
checker_.FillPattern(page);
}
// Now that every page has been filled, we can arm the checker.
checker_.Arm();
all_free_pages_filled_ = true;
checker_.PrintStatus(stdout);
}
void PmmNode::CheckAllFreePages() {
Guard<Mutex> guard{&lock_};
if (!checker_.IsArmed()) {
return;
}
uint64_t free_page_count = 0;
uint64_t free_loaned_page_count = 0;
vm_page* page;
list_for_every_entry (&free_list_, page, vm_page, queue_node) {
checker_.AssertPattern(page);
++free_page_count;
}
list_for_every_entry (&free_loaned_list_, page, vm_page, queue_node) {
checker_.AssertPattern(page);
++free_loaned_page_count;
}
ASSERT(free_page_count == free_count_.load(ktl::memory_order_relaxed));
ASSERT(free_loaned_page_count == free_loaned_count_.load(ktl::memory_order_relaxed));
}
#if __has_feature(address_sanitizer)
void PmmNode::PoisonAllFreePages() {
Guard<Mutex> guard{&lock_};
vm_page* page;
list_for_every_entry (&free_list_, page, vm_page, queue_node) {
AsanPoisonPage(page, kAsanPmmFreeMagic);
};
list_for_every_entry (&free_loaned_list_, page, vm_page, queue_node) {
AsanPoisonPage(page, kAsanPmmFreeMagic);
};
}
#endif // __has_feature(address_sanitizer)
bool PmmNode::EnableFreePageFilling(size_t fill_size, PmmChecker::Action action) {
Guard<Mutex> guard{&lock_};
if (free_fill_enabled_) {
// Checker is already enabled.
return false;
}
checker_.SetFillSize(fill_size);
checker_.SetAction(action);
// As free_fill_enabled_ may be examined outside of the lock, ensure the manipulations to checker_
// complete first by performing a release. See IsFreeFillEnabledRacy for where the acquire is
// performed.
free_fill_enabled_.store(true, ktl::memory_order_release);
return true;
}
void PmmNode::AllocPageHelperLocked(vm_page_t* page) {
LTRACEF("allocating page %p, pa %#" PRIxPTR ", prev state %s\n", page, page->paddr(),
page_state_to_string(page->state()));
AsanUnpoisonPage(page);
DEBUG_ASSERT(page->is_free());
DEBUG_ASSERT(!page->object.is_stack_owned());
if (page->is_loaned()) {
page->object.set_stack_owner(&StackOwnedLoanedPagesInterval::current());
// We want the set_stack_owner() to be visible before set_state(), but we don't need to make
// set_state() a release just for the benefit of loaned pages, so we use this fence.
ktl::atomic_thread_fence(ktl::memory_order_release);
}
// Here we transition the page from FREE->ALLOC, completing the transfer of ownership from the
// PmmNode to the stack. This must be done under lock_, and more specifically the same lock_
// acquisition that removes the page from the free list, as both being the free list, or being
// in the ALLOC state, indicate ownership by the PmmNode.
page->set_state(vm_page_state::ALLOC);
}
zx_status_t PmmNode::AllocPage(uint alloc_flags, vm_page_t** page_out, paddr_t* pa_out) {
DEBUG_ASSERT(Thread::Current::memory_allocation_state().IsEnabled());
vm_page* page = nullptr;
bool free_list_had_fill_pattern = false;
{
AutoPreemptDisabler preempt_disable;
Guard<Mutex> guard{&lock_};
free_list_had_fill_pattern = all_free_pages_filled_;
// The PMM_ALLOC_FLAG_LOANED flag is not compatible with PMM_ALLOC_FLAG_CAN_WAIT
DEBUG_ASSERT(
!((alloc_flags & PMM_ALLOC_FLAG_LOANED) && (alloc_flags & PMM_ALLOC_FLAG_CAN_WAIT)));
const bool use_loaned_list = pmm_physical_page_borrowing_config()->is_any_borrowing_enabled() &&
(alloc_flags & PMM_ALLOC_FLAG_LOANED);
list_node* const which_list = use_loaned_list ? &free_loaned_list_ : &free_list_;
// Note that we do not care if the allocation is happening from the loaned list or not since if
// we are in the OOM state we still want to preference those loaned pages to allocations that
// cannot be delayed.
if ((alloc_flags & PMM_ALLOC_FLAG_CAN_WAIT) && InOomStateLocked() &&
!never_return_should_wait_) {
pmm_alloc_delayed.Add(1);
return ZX_ERR_SHOULD_WAIT;
}
page = list_remove_head_type(which_list, vm_page, queue_node);
if (!page) {
if (!use_loaned_list) {
// Allocation failures from the regular free list are likely to become user-visible.
ReportAllocFailureLocked();
}
return ZX_ERR_NO_MEMORY;
}
DEBUG_ASSERT(use_loaned_list || !page->is_loaned());
AllocPageHelperLocked(page);
if (use_loaned_list) {
DecrementFreeLoanedCountLocked(1);
} else {
DecrementFreeCountLocked(1);
}
}
if (free_list_had_fill_pattern) {
checker_.AssertPattern(page);
}
if (pa_out) {
*pa_out = page->paddr();
}
if (page_out) {
*page_out = page;
}
return ZX_OK;
}
zx_status_t PmmNode::AllocPages(size_t count, uint alloc_flags, list_node* list) {
LTRACEF("count %zu\n", count);
DEBUG_ASSERT(Thread::Current::memory_allocation_state().IsEnabled());
// list must be initialized prior to calling this
DEBUG_ASSERT(list);
if (unlikely(count == 0)) {
return ZX_OK;
} else if (count == 1) {
vm_page* page;
zx_status_t status = AllocPage(alloc_flags, &page, nullptr);
if (likely(status == ZX_OK)) {
list_add_tail(list, &page->queue_node);
}
return status;
}
bool free_list_had_fill_pattern = false;
{
AutoPreemptDisabler preempt_disable;
Guard<Mutex> guard{&lock_};
free_list_had_fill_pattern = all_free_pages_filled_;
// The PMM_ALLOC_FLAG_LOANED flag is not compatible with PMM_ALLOC_FLAG_CAN_WAIT
DEBUG_ASSERT(
!((alloc_flags & PMM_ALLOC_FLAG_LOANED) && (alloc_flags & PMM_ALLOC_FLAG_CAN_WAIT)));
const bool use_loaned_list = pmm_physical_page_borrowing_config()->is_any_borrowing_enabled() &&
(alloc_flags & PMM_ALLOC_FLAG_LOANED);
list_node* const which_list = use_loaned_list ? &free_loaned_list_ : &free_list_;
uint64_t free_count = use_loaned_list ? free_loaned_count_.load(ktl::memory_order_relaxed)
: free_count_.load(ktl::memory_order_relaxed);
if (unlikely(count > free_count)) {
if ((alloc_flags & PMM_ALLOC_FLAG_CAN_WAIT) && !never_return_should_wait_) {
pmm_alloc_delayed.Add(1);
return ZX_ERR_SHOULD_WAIT;
}
if (!use_loaned_list) {
// Allocation failures from the regular free list are likely to become user-visible.
ReportAllocFailureLocked();
}
return ZX_ERR_NO_MEMORY;
}
// For simplicity of oom state detection we decrement the free count and then check for whether
// we should wait or not. The error case is unlikely, and hence not performance critical, so
// having to redundantly re-increment is not a big deal.
if (use_loaned_list) {
DecrementFreeLoanedCountLocked(count);
} else {
DecrementFreeCountLocked(count);
}
if ((alloc_flags & PMM_ALLOC_FLAG_CAN_WAIT) && InOomStateLocked() &&
!never_return_should_wait_) {
// Loaned allocations do not support waiting, so we never have to undo the loaned count.
DEBUG_ASSERT(!use_loaned_list);
IncrementFreeCountLocked(count);
pmm_alloc_delayed.Add(1);
return ZX_ERR_SHOULD_WAIT;
}
auto node = which_list;
while (count > 0) {
node = list_next(which_list, node);
DEBUG_ASSERT(use_loaned_list || !containerof(node, vm_page, queue_node)->is_loaned());
AllocPageHelperLocked(containerof(node, vm_page, queue_node));
--count;
}
list_node tmp_list = LIST_INITIAL_VALUE(tmp_list);
list_split_after(which_list, node, &tmp_list);
if (list_is_empty(list)) {
list_move(which_list, list);
} else {
list_splice_after(which_list, list_peek_tail(list));
}
list_move(&tmp_list, which_list);
}
if (free_list_had_fill_pattern) {
vm_page* page;
list_for_every_entry (list, page, vm_page, queue_node) {
checker_.AssertPattern(page);
}
}
return ZX_OK;
}
zx_status_t PmmNode::AllocRange(paddr_t address, size_t count, list_node* list) {
LTRACEF("address %#" PRIxPTR ", count %zu\n", address, count);
DEBUG_ASSERT(Thread::Current::memory_allocation_state().IsEnabled());
// list must be initialized prior to calling this
DEBUG_ASSERT(list);
// On error scenarios we will free the list, so make sure the caller didn't leave anything in
// there.
DEBUG_ASSERT(list_is_empty(list));
size_t allocated = 0;
if (count == 0) {
return ZX_OK;
}
address = ROUNDDOWN(address, PAGE_SIZE);
bool free_list_had_fill_pattern = false;
{
AutoPreemptDisabler preempt_disable;
Guard<Mutex> guard{&lock_};
free_list_had_fill_pattern = all_free_pages_filled_;
// walk through the arenas, looking to see if the physical page belongs to it
for (auto& a : active_arenas()) {
for (; allocated < count && a.address_in_arena(address); address += PAGE_SIZE) {
vm_page_t* page = a.FindSpecific(address);
if (!page) {
break;
}
// As we hold lock_, we can assume that any page in the FREE state is owned by us, and
// protected by lock_, and so should is_free() be true we will be allowed to assume it is in
// the free list, remove it from said list, and allocate it.
if (!page->is_free()) {
break;
}
// We never allocate loaned pages for caller of AllocRange()
if (page->is_loaned()) {
break;
}
list_delete(&page->queue_node);
AllocPageHelperLocked(page);
list_add_tail(list, &page->queue_node);
allocated++;
DecrementFreeCountLocked(1);
}
if (allocated == count) {
break;
}
}
if (allocated != count) {
// We were not able to allocate the entire run, free these pages. As we allocated these pages
// under this lock acquisition, the fill status is whatever it was before, i.e. the status of
// whether free pages have all been filled..
FreeListLocked(list, all_free_pages_filled_);
return ZX_ERR_NOT_FOUND;
}
}
if (free_list_had_fill_pattern) {
vm_page* page;
list_for_every_entry (list, page, vm_page, queue_node) {
checker_.AssertPattern(page);
}
}
return ZX_OK;
}
zx_status_t PmmNode::AllocContiguous(const size_t count, uint alloc_flags, uint8_t alignment_log2,
paddr_t* pa, list_node* list) {
DEBUG_ASSERT(Thread::Current::memory_allocation_state().IsEnabled());
LTRACEF("count %zu, align %u\n", count, alignment_log2);
if (count == 0) {
return ZX_OK;
}
if (alignment_log2 < PAGE_SIZE_SHIFT) {
alignment_log2 = PAGE_SIZE_SHIFT;
}
DEBUG_ASSERT(!(alloc_flags & (PMM_ALLOC_FLAG_LOANED | PMM_ALLOC_FLAG_CAN_WAIT)));
// pa and list must be valid pointers
DEBUG_ASSERT(pa);
DEBUG_ASSERT(list);
AutoPreemptDisabler preempt_disable;
Guard<Mutex> guard{&lock_};
for (auto& a : active_arenas()) {
// FindFreeContiguous will search the arena for FREE pages. As we hold lock_, any pages in the
// FREE state are assumed to be owned by us, and would only be modified if lock_ were held.
vm_page_t* p = a.FindFreeContiguous(count, alignment_log2);
if (!p) {
continue;
}
*pa = p->paddr();
// remove the pages from the run out of the free list
for (size_t i = 0; i < count; i++, p++) {
DEBUG_ASSERT_MSG(p->is_free(), "p %p state %u\n", p, static_cast<uint32_t>(p->state()));
// Loaned pages are never returned by FindFreeContiguous() above.
DEBUG_ASSERT(!p->is_loaned());
DEBUG_ASSERT(list_in_list(&p->queue_node));
// Atomically (that is, in a single lock acquisition) remove this page from both the free list
// and FREE state, ensuring it is owned by us.
list_delete(&p->queue_node);
p->set_state(vm_page_state::ALLOC);
DecrementFreeCountLocked(1);
AsanUnpoisonPage(p);
checker_.AssertPattern(p);
list_add_tail(list, &p->queue_node);
}
return ZX_OK;
}
// We could potentially move contents of non-pinned pages out of the way for critical contiguous
// allocations, but for now...
LTRACEF("couldn't find run\n");
return ZX_ERR_NOT_FOUND;
}
void PmmNode::FreePageHelperLocked(vm_page* page, bool already_filled) {
LTRACEF("page %p state %zu paddr %#" PRIxPTR "\n", page, VmPageStateIndex(page->state()),
page->paddr());
DEBUG_ASSERT(!page->is_free());
DEBUG_ASSERT(page->state() != vm_page_state::OBJECT || page->object.pin_count == 0);
// mark it free. This makes the page owned the PmmNode, even though it may not be in any page
// list, since the page is findable via the arena, and so we must ensure to:
// 1. Be performing set_state here under the lock_
// 2. Place the page in the free list and cease referring to the page before ever dropping lock_
page->set_state(vm_page_state::FREE);
// Coming from OBJECT or ALLOC, this will only be true if the page was loaned (and may still be
// loaned, but doesn't have to be currently loaned if the contiguous VMO the page was loaned from
// was deleted during stack ownership).
//
// Coming from a state other than OBJECT or ALLOC, this currently won't be true, but if it were
// true in future, it would only be because a state other than OBJECT or ALLOC has a (future)
// field overlapping, in which case we do want to clear the invalid stack owner pointer value.
// We'll be ok to clear this invalid stack owner after setting FREE previously (instead of
// clearing before) because the stack owner is only read elsewhere for pages with an underlying
// contiguous VMO owner (whether actually loaned at the time or not), and pages with an underlying
// contiguous VMO owner can only be in FREE, ALLOC, OBJECT states, which all have this field, so
// reading an invalid stack owner pointer elsewhere won't happen (there's a magic number canary
// just in case though). We could instead clear out any invalid stack owner pointer before
// setting FREE above and have a shorter comment here, but there's no actual need for the extra
// "if", so we just let this "if" handle it (especially since this whole paragraph is a
// hypothetical future since there aren't any overlapping fields yet as of this comment).
if (page->object.is_stack_owned()) {
// Make FREE visible before lack of stack owner.
ktl::atomic_thread_fence(ktl::memory_order_release);
page->object.clear_stack_owner();
}
// The caller may have called IsFreeFillEnabledRacy and potentially already filled a pattern,
// however if it raced with enabling of free filling we may still need to fill the pattern. This
// should be unlikely, and since free filling can never be turned back off there is no race in the
// other direction. As we hold lock we can safely perform a relaxed read.
if (!already_filled && free_fill_enabled_.load(ktl::memory_order_relaxed)) {
checker_.FillPattern(page);
}
AsanPoisonPage(page, kAsanPmmFreeMagic);
}
void PmmNode::FreePage(vm_page* page) {
AutoPreemptDisabler preempt_disable;
const bool fill = IsFreeFillEnabledRacy();
if (fill) {
checker_.FillPattern(page);
}
Guard<Mutex> guard{&lock_};
// pages freed individually shouldn't be in a queue
DEBUG_ASSERT(!list_in_list(&page->queue_node));
FreePageHelperLocked(page, fill);
list_node* which_list = nullptr;
if (!page->is_loaned()) {
IncrementFreeCountLocked(1);
which_list = &free_list_;
} else if (!page->is_loan_cancelled()) {
IncrementFreeLoanedCountLocked(1);
which_list = &free_loaned_list_;
}
// Add the page to the appropriate free queue, unless loan_cancelled. The loan_cancelled pages
// don't go in any free queue because they shouldn't get re-used until reclaimed by their
// underlying contiguous VMO or until that underlying contiguous VMO is deleted.
DEBUG_ASSERT(which_list || page->is_loan_cancelled());
if (which_list) {
if constexpr (!__has_feature(address_sanitizer)) {
list_add_head(which_list, &page->queue_node);
} else {
// If address sanitizer is enabled, put the page at the tail to maximize reuse distance.
list_add_tail(which_list, &page->queue_node);
}
}
}
void PmmNode::FreeListLocked(list_node* list, bool already_filled) {
DEBUG_ASSERT(list);
// process list backwards so the head is as hot as possible
uint64_t count = 0;
uint64_t loaned_count = 0;
list_node freed_loaned_list = LIST_INITIAL_VALUE(freed_loaned_list);
{ // scope page
vm_page* page = list_peek_tail_type(list, vm_page_t, queue_node);
while (page) {
FreePageHelperLocked(page, already_filled);
vm_page_t* next_page = list_prev_type(list, &page->queue_node, vm_page_t, queue_node);
if (page->is_loaned()) {
// Remove from |list| and possibly put on freed_loaned_list instead, to route to the correct
// free list, or no free list if loan_cancelled.
list_delete(&page->queue_node);
if (!page->is_loan_cancelled()) {
list_add_head(&freed_loaned_list, &page->queue_node);
++loaned_count;
}
} else {
count++;
}
page = next_page;
}
} // end scope page
if constexpr (!__has_feature(address_sanitizer)) {
// splice list at the head of free_list_; free_loaned_list_.
list_splice_after(list, &free_list_);
list_splice_after(&freed_loaned_list, &free_loaned_list_);
} else {
// If address sanitizer is enabled, put the pages at the tail to maximize reuse distance.
if (!list_is_empty(&free_list_)) {
list_splice_after(list, list_peek_tail(&free_list_));
} else {
list_splice_after(list, &free_list_);
}
if (!list_is_empty(&free_loaned_list_)) {
list_splice_after(&freed_loaned_list, list_peek_tail(&free_loaned_list_));
} else {
list_splice_after(&freed_loaned_list, &free_loaned_list_);
}
}
IncrementFreeCountLocked(count);
IncrementFreeLoanedCountLocked(loaned_count);
}
void PmmNode::FreeList(list_node* list) {
AutoPreemptDisabler preempt_disable;
const bool fill = IsFreeFillEnabledRacy();
if (fill) {
vm_page* page;
list_for_every_entry (list, page, vm_page, queue_node) {
checker_.FillPattern(page);
}
}
Guard<Mutex> guard{&lock_};
FreeListLocked(list, fill);
}
bool PmmNode::InOomStateLocked() {
if (mem_avail_state_cur_index_ == 0) {
return true;
}
// See pmm_check_alloc_random_should_wait in pmm.cc for an assertion that random should wait is
// only enabled if DEBUG_ASSERT_IMPLEMENTED.
if constexpr (DEBUG_ASSERT_IMPLEMENTED) {
// Randomly try to make 10% of allocations delayed allocations.
if (gBootOptions->pmm_alloc_random_should_wait &&
rand_r(&random_should_wait_seed_) < (RAND_MAX / 10)) {
return true;
}
}
return false;
}
uint64_t PmmNode::CountFreePages() const TA_NO_THREAD_SAFETY_ANALYSIS {
return free_count_.load(ktl::memory_order_relaxed);
}
uint64_t PmmNode::CountLoanedFreePages() const TA_NO_THREAD_SAFETY_ANALYSIS {
return free_loaned_count_.load(ktl::memory_order_relaxed);
}
uint64_t PmmNode::CountLoanedNotFreePages() const TA_NO_THREAD_SAFETY_ANALYSIS {
AutoPreemptDisabler preempt_disable;
Guard<Mutex> guard{&lock_};
return loaned_count_.load(ktl::memory_order_relaxed) -
free_loaned_count_.load(ktl::memory_order_relaxed);
}
uint64_t PmmNode::CountLoanedPages() const TA_NO_THREAD_SAFETY_ANALYSIS {
return loaned_count_.load(ktl::memory_order_relaxed);
}
uint64_t PmmNode::CountLoanCancelledPages() const TA_NO_THREAD_SAFETY_ANALYSIS {
return loan_cancelled_count_.load(ktl::memory_order_relaxed);
}
uint64_t PmmNode::CountTotalBytes() const TA_NO_THREAD_SAFETY_ANALYSIS {
return arena_cumulative_size_;
}
void PmmNode::DumpFree() const TA_NO_THREAD_SAFETY_ANALYSIS {
auto megabytes_free = CountFreePages() * PAGE_SIZE / MB;
printf(" %zu free MBs\n", megabytes_free);
}
void PmmNode::Dump(bool is_panic) const {
// No lock analysis here, as we want to just go for it in the panic case without the lock.
auto dump = [this]() TA_NO_THREAD_SAFETY_ANALYSIS {
uint64_t free_count = free_count_.load(ktl::memory_order_relaxed);
uint64_t free_loaned_count = free_loaned_count_.load(ktl::memory_order_relaxed);
printf(
"pmm node %p: free_count %zu (%zu bytes), free_loaned_count: %zu (%zu bytes), total size "
"%zu\n",
this, free_count, free_count * PAGE_SIZE, free_loaned_count, free_loaned_count * PAGE_SIZE,
arena_cumulative_size_);
for (const auto& a : active_arenas()) {
a.Dump(false, false);
}
};
if (is_panic) {
dump();
} else {
Guard<Mutex> guard{&lock_};
dump();
}
}
zx_status_t PmmNode::InitReclamation(const uint64_t* watermarks, uint8_t watermark_count,
uint64_t debounce, void* context,
mem_avail_state_updated_callback_t callback) {
if (watermark_count > MAX_WATERMARK_COUNT) {
return ZX_ERR_INVALID_ARGS;
}
AutoPreemptDisabler preempt_disable;
Guard<Mutex> guard{&lock_};
uint64_t tmp[MAX_WATERMARK_COUNT];
uint64_t tmp_debounce = fbl::round_up(debounce, static_cast<uint64_t>(PAGE_SIZE)) / PAGE_SIZE;
for (uint8_t i = 0; i < watermark_count; i++) {
tmp[i] = watermarks[i] / PAGE_SIZE;
if (i > 0) {
if (tmp[i] <= tmp[i - 1]) {
return ZX_ERR_INVALID_ARGS;
}
} else {
if (tmp[i] < tmp_debounce) {
return ZX_ERR_INVALID_ARGS;
}
}
}
mem_avail_state_watermark_count_ = watermark_count;
mem_avail_state_debounce_ = tmp_debounce;
mem_avail_state_context_ = context;
mem_avail_state_callback_ = callback;
memcpy(mem_avail_state_watermarks_, tmp, sizeof(mem_avail_state_watermarks_));
static_assert(sizeof(tmp) == sizeof(mem_avail_state_watermarks_));
UpdateMemAvailStateLocked();
return ZX_OK;
}
void PmmNode::UpdateMemAvailStateLocked() {
// Find the smallest watermark which is greater than the number of free pages.
uint8_t target = mem_avail_state_watermark_count_;
for (uint8_t i = 0; i < mem_avail_state_watermark_count_; i++) {
if (mem_avail_state_watermarks_[i] > free_count_.load(ktl::memory_order_relaxed)) {
target = i;
break;
}
}
SetMemAvailStateLocked(target);
}
void PmmNode::SetMemAvailStateLocked(uint8_t mem_avail_state) {
mem_avail_state_cur_index_ = mem_avail_state;
if (mem_avail_state_cur_index_ == 0) {
if (likely(!never_return_should_wait_)) {
free_pages_evt_.Unsignal();
}
} else {
free_pages_evt_.Signal();
}
if (mem_avail_state_cur_index_ > 0) {
// If there is a smaller watermark, then we transition into that state when the
// number of free pages drops more than |mem_avail_state_debounce_| pages into that state.
mem_avail_state_lower_bound_ =
mem_avail_state_watermarks_[mem_avail_state_cur_index_ - 1] - mem_avail_state_debounce_;
} else {
// There is no smaller state, so we can't ever transition down.
mem_avail_state_lower_bound_ = 0;
}
if (mem_avail_state_cur_index_ < mem_avail_state_watermark_count_) {
// If there is a larger watermark, then we transition out of the current state when
// the number of free pages exceedes the current state's watermark by at least
// |mem_avail_state_debounce_|.
mem_avail_state_upper_bound_ =
mem_avail_state_watermarks_[mem_avail_state_cur_index_] + mem_avail_state_debounce_;
} else {
// There is no larger state, so we can't ever transition up.
mem_avail_state_upper_bound_ = UINT64_MAX / PAGE_SIZE;
}
mem_avail_state_callback_(mem_avail_state_context_, mem_avail_state_cur_index_);
}
void PmmNode::DumpMemAvailState() const {
Guard<Mutex> guard{&lock_};
printf("watermarks: [");
for (unsigned i = 0; i < mem_avail_state_watermark_count_; i++) {
printf("%s%s", FormattedBytes(mem_avail_state_watermarks_[i] * PAGE_SIZE).c_str(),
i + 1 == mem_avail_state_watermark_count_ ? "]\n" : ", ");
}
printf("debounce: %s\n", FormattedBytes(mem_avail_state_debounce_ * PAGE_SIZE).c_str());
printf("current state: %u\n", mem_avail_state_cur_index_);
printf("current bounds: [%s, %s]\n",
FormattedBytes(mem_avail_state_lower_bound_ * PAGE_SIZE).c_str(),
FormattedBytes(mem_avail_state_upper_bound_ * PAGE_SIZE).c_str());
printf("free memory: %s\n", FormattedBytes(free_count_ * PAGE_SIZE).c_str());
}
uint64_t PmmNode::DebugNumPagesTillMemState(uint8_t mem_state_idx) const {
Guard<Mutex> guard{&lock_};
if (mem_avail_state_cur_index_ <= mem_state_idx) {
// Already in mem_state_idx, or in a state with less available memory than mem_state_idx.
return 0;
}
// We need to either get free_pages below mem_avail_state_watermarks_[mem_state_idx] or, if we are
// in state (mem_state_idx + 1), we also need to clear the debounce amount. For simplicity we just
// always allocate the debounce amount as well.
uint64_t trigger = mem_avail_state_watermarks_[mem_state_idx] - mem_avail_state_debounce_;
return (free_count_ - trigger);
}
uint8_t PmmNode::DebugMaxMemAvailState() const {
Guard<Mutex> guard{&lock_};
return mem_avail_state_watermark_count_;
}
void PmmNode::DebugMemAvailStateCallback(uint8_t mem_state_idx) const {
Guard<Mutex> guard{&lock_};
if (mem_state_idx >= mem_avail_state_watermark_count_) {
return;
}
// Invoke callback for the requested state without allocating additional memory, or messing with
// any of the internal memory state tracking counters.
mem_avail_state_callback_(mem_avail_state_context_, mem_state_idx);
}
void PmmNode::StopReturningShouldWait() {
Guard<Mutex> guard{&lock_};
never_return_should_wait_ = true;
free_pages_evt_.Signal();
}
int64_t PmmNode::get_alloc_failed_count() { return pmm_alloc_failed.SumAcrossAllCpus(); }
bool PmmNode::has_alloc_failed_no_mem() {
return alloc_failed_no_mem.load(ktl::memory_order_relaxed);
}
void PmmNode::BeginLoan(list_node* page_list) {
DEBUG_ASSERT(page_list);
AutoPreemptDisabler preempt_disable;
const bool fill = IsFreeFillEnabledRacy();
if (fill) {
vm_page* page;
list_for_every_entry (page_list, page, vm_page, queue_node) {
checker_.FillPattern(page);
}
}
Guard<Mutex> guard{&lock_};
uint64_t loaned_count = 0;
vm_page* page;
list_for_every_entry (page_list, page, vm_page, queue_node) {
DEBUG_ASSERT(!page->is_loaned());
DEBUG_ASSERT(!page->is_free());
page->set_is_loaned();
++loaned_count;
DEBUG_ASSERT(!page->is_loan_cancelled());
}
IncrementLoanedCountLocked(loaned_count);
// Callers of BeginLoan() generally won't want the pages loaned to them; the intent is to loan to
// the rest of the system, so go ahead and free also. Some callers will basically choose between
// pmm_begin_loan() and pmm_free().
FreeListLocked(page_list, fill);
}
void PmmNode::CancelLoan(paddr_t address, size_t count) {
AutoPreemptDisabler preempt_disable;
Guard<Mutex> guard{&lock_};
DEBUG_ASSERT(IS_PAGE_ALIGNED(address));
paddr_t end = address + count * PAGE_SIZE;
DEBUG_ASSERT(address <= end);
uint64_t loan_cancelled_count = 0;
uint64_t no_longer_free_loaned_count = 0;
ForPagesInPhysRangeLocked(address, count,
[&loan_cancelled_count, &no_longer_free_loaned_count](vm_page_t* page) {
// We can assert this because of PageSource's overlapping request
// handling.
DEBUG_ASSERT(page->is_loaned());
bool was_cancelled = page->is_loan_cancelled();
// We can assert this because of PageSource's overlapping request
// handling.
DEBUG_ASSERT(!was_cancelled);
page->set_is_loan_cancelled();
++loan_cancelled_count;
if (page->is_free()) {
// Currently in free_loaned_list_.
DEBUG_ASSERT(list_in_list(&page->queue_node));
// Remove from free_loaned_list_ to prevent any new use until
// after EndLoan.
list_delete(&page->queue_node);
no_longer_free_loaned_count++;
}
});
IncrementLoanCancelledCountLocked(loan_cancelled_count);
DecrementFreeLoanedCountLocked(no_longer_free_loaned_count);
}
void PmmNode::EndLoan(paddr_t address, size_t count, list_node* page_list) {
bool free_list_had_fill_pattern = false;
{
AutoPreemptDisabler preempt_disable;
Guard<Mutex> guard{&lock_};
free_list_had_fill_pattern = all_free_pages_filled_;
DEBUG_ASSERT(IS_PAGE_ALIGNED(address));
paddr_t end = address + count * PAGE_SIZE;
DEBUG_ASSERT(address <= end);
uint64_t loan_ended_count = 0;
ForPagesInPhysRangeLocked(address, count,
[this, &page_list, &loan_ended_count](vm_page_t* page) {
AssertHeld(lock_);
// PageSource serializing such that there's only one request to
// PageProvider in flight at a time for any given page is the main
// reason we can assert these instead of needing to check these.
DEBUG_ASSERT(page->is_loaned());
DEBUG_ASSERT(page->is_loan_cancelled());
DEBUG_ASSERT(page->is_free());
// Already not in free_loaned_list_ (because loan_cancelled
// already).
DEBUG_ASSERT(!list_in_list(&page->queue_node));
page->clear_is_loaned();
page->clear_is_loan_cancelled();
++loan_ended_count;
AllocPageHelperLocked(page);
list_add_tail(page_list, &page->queue_node);
});
DecrementLoanCancelledCountLocked(loan_ended_count);
DecrementLoanedCountLocked(loan_ended_count);
}
if (free_list_had_fill_pattern) {
vm_page* page;
list_for_every_entry (page_list, page, vm_page, queue_node) {
checker_.AssertPattern(page);
}
}
}
void PmmNode::DeleteLender(paddr_t address, size_t count) {
AutoPreemptDisabler preempt_disable;
Guard<Mutex> guard{&lock_};
DEBUG_ASSERT(IS_PAGE_ALIGNED(address));
paddr_t end = address + count * PAGE_SIZE;
DEBUG_ASSERT(address <= end);
uint64_t removed_free_loaned_count = 0;
uint64_t added_free_count = 0;
uint64_t loan_ended_count = 0;
uint64_t loan_un_cancelled_count = 0;
ForPagesInPhysRangeLocked(address, count,
[this, &removed_free_loaned_count, &loan_un_cancelled_count,
&added_free_count, &loan_ended_count](vm_page_t* page) {
DEBUG_ASSERT(page->is_loaned());
if (page->is_free() && !page->is_loan_cancelled()) {
// Remove from free_loaned_list_.
list_delete(&page->queue_node);
++removed_free_loaned_count;
}
if (page->is_loan_cancelled()) {
++loan_un_cancelled_count;
}
if (page->is_free()) {
// add it to the free queue
if constexpr (!__has_feature(address_sanitizer)) {
list_add_head(&free_list_, &page->queue_node);
} else {
// If address sanitizer is enabled, put the page at the tail to
// maximize reuse distance.
list_add_tail(&free_list_, &page->queue_node);
}
added_free_count++;
}
page->clear_is_loan_cancelled();
page->clear_is_loaned();
++loan_ended_count;
});
DecrementFreeLoanedCountLocked(removed_free_loaned_count);
IncrementFreeCountLocked(added_free_count);
DecrementLoanedCountLocked(loan_ended_count);
DecrementLoanCancelledCountLocked(loan_un_cancelled_count);
}
template <typename F>
void PmmNode::ForPagesInPhysRangeLocked(paddr_t start, size_t count, F func) {
DEBUG_ASSERT(IS_PAGE_ALIGNED(start));
// We only intend ForPagesInRange() to be used after arenas have been added to the global
// pmm_node.
DEBUG_ASSERT(mp_get_active_mask() != 0);
if (unlikely(active_arenas().empty())) {
// We're in a unit test, using ManagedPmmNode which has no arenas. So fall back to the global
// pmm_node (which has at least one arena) to find the actual vm_page_t for each page.
//
// TODO: Make ManagedPmmNode have a more real arena, possibly by allocating a contiguous VMO and
// creating an arena from that.
paddr_t end = start + count * PAGE_SIZE;
for (paddr_t iter = start; iter < end; iter += PAGE_SIZE) {
vm_page_t* page = paddr_to_vm_page(iter);
func(page);
}
return;
}
// We have at least one arena, so use active_arenas() directly.
paddr_t end = start + count * PAGE_SIZE;
DEBUG_ASSERT(start <= end);
paddr_t page_addr = start;
for (auto& a : active_arenas()) {
for (; page_addr < end && a.address_in_arena(page_addr); page_addr += PAGE_SIZE) {
vm_page_t* page = a.FindSpecific(page_addr);
DEBUG_ASSERT(page);
DEBUG_ASSERT(page_addr == page->paddr());
func(page);
}
if (page_addr == end) {
break;
}
}
DEBUG_ASSERT(page_addr == end);
}
void PmmNode::ReportAllocFailureLocked() {
kcounter_add(pmm_alloc_failed, 1);
// Update before signaling the MemoryWatchdog to ensure it observes the update.
//
// |alloc_failed_no_mem| latches so only need to invoke the callback once. We could call it on
// every failure, but that's wasteful and we don't want to spam any underlying Event (or the
// thread lock or the MemoryWatchdog).
const bool first_time = !alloc_failed_no_mem.exchange(true, ktl::memory_order_relaxed);
if (first_time) {
// Note, the |cur_state| value passed to the callback doesn't really matter because all we're
// trying to do here is signal and unblock the MemoryWatchdog's worker thread.
mem_avail_state_callback_(mem_avail_state_context_, mem_avail_state_cur_index_);
}
}
void PmmNode::ReportAllocFailure() {
Guard<Mutex> guard{&lock_};
ReportAllocFailureLocked();
}
void PmmNode::SeedRandomShouldWait() {
if constexpr (DEBUG_ASSERT_IMPLEMENTED) {
Guard<Mutex> guard{&lock_};
crypto::global_prng::GetInstance()->Draw(&random_should_wait_seed_,
sizeof(random_should_wait_seed_));
}
}
zx_status_t PmmNode::SetPageCompression(fbl::RefPtr<VmCompression> compression) {
Guard<Mutex> guard{&compression_lock_};
if (page_compression_) {
return ZX_ERR_ALREADY_EXISTS;
}
page_compression_ = ktl::move(compression);
return ZX_OK;
}