blob: d658d5147407f364c5ed7239081eb6d3d7a711b5 [file] [log] [blame] [edit]
// Copyright 2018 The Fuchsia Authors
//
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file or at
// https://opensource.org/licenses/MIT
#include "vm/pmm_node.h"
#include <align.h>
#include <assert.h>
#include <inttypes.h>
#include <lib/boot-options/boot-options.h>
#include <lib/counters.h>
#include <lib/crypto/global_prng.h>
#include <lib/instrumentation/asan.h>
#include <lib/memalloc/range.h>
#include <lib/zircon-internal/macros.h>
#include <trace.h>
#include <fbl/algorithm.h>
#include <kernel/auto_preempt_disabler.h>
#include <kernel/mp.h>
#include <kernel/scheduler.h>
#include <kernel/thread.h>
#include <phys/handoff.h>
#include <pretty/cpp/sizes.h>
#include <vm/compression.h>
#include <vm/phys/arena.h>
#include <vm/physmap.h>
#include <vm/pmm.h>
#include <vm/pmm_checker.h>
#include "vm_priv.h"
#define LOCAL_TRACE VM_GLOBAL_TRACE(0)
// The number of PMM allocation calls that have failed.
KCOUNTER(pmm_alloc_failed, "vm.pmm.alloc.failed")
KCOUNTER(pmm_alloc_delayed, "vm.pmm.alloc.delayed")
namespace {
// Indicates whether a PMM alloc call has ever failed with ZX_ERR_NO_MEMORY. Used to trigger an
// OOM response. See |MemoryWatchdog::WorkerThread|.
ktl::atomic<bool> alloc_failed_no_mem;
// Poison a page |p| with value |value|. Accesses to a poisoned page via the physmap are not
// allowed and may cause faults or kASAN checks.
void AsanPoisonPage(vm_page_t* p, uint8_t value) {
#if __has_feature(address_sanitizer)
asan_poison_shadow(reinterpret_cast<uintptr_t>(paddr_to_physmap(p->paddr())), PAGE_SIZE, value);
#endif // __has_feature(address_sanitizer)
}
// Unpoison a page |p|. Accesses to a unpoisoned pages will not cause KASAN check failures.
void AsanUnpoisonPage(vm_page_t* p) {
#if __has_feature(address_sanitizer)
asan_unpoison_shadow(reinterpret_cast<uintptr_t>(paddr_to_physmap(p->paddr())), PAGE_SIZE);
#endif // __has_feature(address_sanitizer)
}
void ReturnPagesToFreeList(list_node* target_list, list_node* to_free) {
if constexpr (!__has_feature(address_sanitizer)) {
// splice list at the head of free_list_; free_loaned_list_.
list_splice_after(to_free, target_list);
} else {
// If address sanitizer is enabled, put the pages at the tail to maximize reuse distance.
if (!list_is_empty(target_list)) {
list_splice_after(to_free, list_peek_tail(target_list));
} else {
list_splice_after(to_free, target_list);
}
}
}
} // namespace
// We disable thread safety analysis here, since this function is only called
// during early boot before threading exists.
zx_status_t PmmNode::Init(ktl::span<const memalloc::Range> ranges) TA_NO_THREAD_SAFETY_ANALYSIS {
// Make sure we're in early boot (ints disabled and no active Schedulers)
DEBUG_ASSERT(Scheduler::PeekActiveMask() == 0);
DEBUG_ASSERT(arch_ints_disabled());
zx_status_t status = ZX_OK;
auto init_arena = [&status, this](const PmmArenaSelection& selected) {
if (status == ZX_ERR_NO_MEMORY) {
return;
}
zx_status_t init_status = InitArena(selected);
if (status == ZX_OK) {
status = init_status;
}
};
bool allocation_excluded = false;
auto record_error = [&allocation_excluded](const PmmArenaSelectionError& error) {
bool allocated = memalloc::IsAllocatedType(error.range.type);
allocation_excluded = allocation_excluded || allocated;
// If we have to throw out less than two pages of free RAM, don't regard
// that as a full blown error.
const char* error_type =
error.type == PmmArenaSelectionError::Type::kTooSmall && !allocated ? "warning" : "error";
ktl::string_view reason = PmmArenaSelectionError::ToString(error.type);
ktl::string_view range_type = memalloc::ToString(error.range.type);
printf("PMM: %s: unable to include [%#" PRIx64 ", %#" PRIx64 ") (%.*s) in arena: %.*s\n",
error_type, //
error.range.addr, error.range.end(), //
static_cast<int>(range_type.size()), range_type.data(), //
static_cast<int>(reason.size()), reason.data());
};
SelectPmmArenas<PAGE_SIZE>(ranges, init_arena, record_error);
if (status != ZX_OK) {
return status;
}
// If we fail to include a pre-PMM allocation in an arena that could be
// disastrous in unpredictable/hard-to-debug ways, so fail hard early.
ZX_ASSERT(!allocation_excluded);
// Now mark all pre-PMM allocations and holes within our arenas as reserved.
ktl::span arenas = active_arenas();
auto reserve_range = [this, arena{arenas.begin()},
end{arenas.end()}](const memalloc::Range& range) mutable {
// Find the first arena encompassing this range.
//
// Note that trying to include `range` in an arena may have resulted in an
// error during the selection process. If we do encounter a range not in an
// arena, just skip it.
while (arena != end && arena->end() <= range.addr) {
++arena;
}
if (arena == end) {
// In this case the tail of ranges did not end up in any arenas, so we can
// just short-circuit.
return false;
}
if (!arena->address_in_arena(range.addr)) {
return true;
}
DEBUG_ASSERT(arena->address_in_arena(range.end() - 1));
InitReservedRange(range);
return true;
};
ForEachAlignedAllocationOrHole<PAGE_SIZE>(ranges, reserve_range);
return ZX_OK;
}
void PmmNode::EndHandoff() {
FreeList(&phys_handoff_temporary_list_);
ZX_ASSERT(list_is_empty(&phys_handoff_vmo_list_));
}
zx_status_t PmmNode::GetArenaInfo(size_t count, uint64_t i, pmm_arena_info_t* buffer,
size_t buffer_size) {
Guard<Mutex> guard{&lock_};
if ((count == 0) || (count + i > active_arenas().size()) || (i >= active_arenas().size())) {
return ZX_ERR_OUT_OF_RANGE;
}
const size_t size_required = count * sizeof(pmm_arena_info_t);
if (buffer_size < size_required) {
return ZX_ERR_BUFFER_TOO_SMALL;
}
// Skip the first |i| elements.
auto iter = active_arenas().begin();
for (uint64_t j = 0; j < i; j++) {
iter++;
}
// Copy the next |count| elements.
for (uint64_t j = 0; j < count; j++) {
buffer[j] = iter->info();
iter++;
}
return ZX_OK;
}
// called at boot time as arenas are brought online, no locks are acquired
void PmmNode::AddFreePages(list_node* list) TA_NO_THREAD_SAFETY_ANALYSIS {
LTRACEF("list %p\n", list);
uint64_t free_count = 0;
vm_page *temp, *page;
list_for_every_entry_safe (list, page, temp, vm_page, queue_node) {
list_delete(&page->queue_node);
DEBUG_ASSERT(!page->is_loaned());
DEBUG_ASSERT(!page->is_loan_cancelled());
DEBUG_ASSERT(page->is_free());
list_add_tail(&free_list_, &page->queue_node);
++free_count;
}
free_count_.fetch_add(free_count);
ASSERT(free_count_);
may_allocate_evt_.Signal();
LTRACEF("free count now %" PRIu64 "\n", free_count_.load(ktl::memory_order_relaxed));
}
void PmmNode::FillFreePagesAndArm() {
// Require both locks so we can process both of the free lists and modify all_free_pages_filled_.
Guard<Mutex> loaned_guard{&loaned_list_lock_};
Guard<Mutex> free_guard{&lock_};
if (!free_fill_enabled_) {
return;
}
vm_page* page;
list_for_every_entry (&free_list_, page, vm_page, queue_node) {
checker_.FillPattern(page);
}
list_for_every_entry (&free_loaned_list_, page, vm_page, queue_node) {
checker_.FillPattern(page);
}
// Now that every page has been filled, we can arm the checker.
checker_.Arm();
all_free_pages_filled_ = true;
checker_.PrintStatus(stdout);
}
void PmmNode::CheckAllFreePages() {
// Require both locks so we can process both of the free lists. This is an infrequent manual
// operation and does not need to be optimized to avoid holding both locks at once.
Guard<Mutex> loaned_guard{&loaned_list_lock_};
Guard<Mutex> free_guard{&lock_};
if (!checker_.IsArmed()) {
return;
}
uint64_t free_page_count = 0;
uint64_t free_loaned_page_count = 0;
vm_page* page;
list_for_every_entry (&free_list_, page, vm_page, queue_node) {
checker_.AssertPattern(page);
++free_page_count;
}
list_for_every_entry (&free_loaned_list_, page, vm_page, queue_node) {
checker_.AssertPattern(page);
++free_loaned_page_count;
}
ASSERT(free_page_count == free_count_.load(ktl::memory_order_relaxed));
ASSERT(free_loaned_page_count == free_loaned_count_.load(ktl::memory_order_relaxed));
}
#if __has_feature(address_sanitizer)
void PmmNode::PoisonAllFreePages() {
// Require both locks so we can process both of the free lists. This is an infrequent manual
// operation and does not need to be optimized to avoid holding both locks at once.
Guard<Mutex> loaned_guard{&loaned_list_lock_};
Guard<Mutex> free_guard{&lock_};
vm_page* page;
list_for_every_entry (&free_list_, page, vm_page, queue_node) {
AsanPoisonPage(page, kAsanPmmFreeMagic);
};
list_for_every_entry (&free_loaned_list_, page, vm_page, queue_node) {
AsanPoisonPage(page, kAsanPmmFreeMagic);
};
}
#endif // __has_feature(address_sanitizer)
bool PmmNode::EnableFreePageFilling(size_t fill_size, CheckFailAction action) {
// Require both locks so we can manipulate free_fill_enabled_.
Guard<Mutex> loaned_guard{&loaned_list_lock_};
Guard<Mutex> free_guard{&lock_};
if (free_fill_enabled_) {
// Checker is already enabled.
return false;
}
checker_.SetFillSize(fill_size);
checker_.SetAction(action);
// As free_fill_enabled_ may be examined outside of the lock, ensure the manipulations to checker_
// complete first by performing a release. See IsFreeFillEnabledRacy for where the acquire is
// performed.
free_fill_enabled_.store(true, ktl::memory_order_release);
return true;
}
void PmmNode::AllocPageHelperLocked(vm_page_t* page) {
LTRACEF("allocating page %p, pa %#" PRIxPTR ", prev state %s\n", page, page->paddr(),
page_state_to_string(page->state()));
AsanUnpoisonPage(page);
DEBUG_ASSERT(page->is_free() && !page->is_loaned());
// Here we transition the page from FREE->ALLOC, completing the transfer of ownership from the
// PmmNode to the stack. This must be done under lock_, and more specifically the same lock_
// acquisition that removes the page from the free list, as both being the free list, or being
// in the ALLOC state, indicate ownership by the PmmNode.
page->set_state(vm_page_state::ALLOC);
// Used by the FLPH for loaned pages, but cleared here for consistency to ensure no stale pointers
// that could be accidentally referenced.
page->alloc.owner = nullptr;
}
void PmmNode::AllocLoanedPageHelperLocked(vm_page_t* page) {
LTRACEF("allocating loaned page %p, pa %#" PRIxPTR ", prev state %s\n", page, page->paddr(),
page_state_to_string(page->state()));
AsanUnpoisonPage(page);
DEBUG_ASSERT(page->is_free_loaned() && page->is_loaned());
// Here we transition the page from FREE_LOANED->ALLOC, completing the transfer of ownership from
// the PmmNode to the stack. This must be done under loaned_pages_lock_, and more specifically
// the same loaned_pages_lock_ acquisition that removes the page from the free list, as both being
// the free list, or being in the ALLOC state, indicate ownership by the PmmNode.
page->set_state(vm_page_state::ALLOC);
page->alloc.owner = nullptr;
}
zx::result<vm_page_t*> PmmNode::AllocLoanedPage(
fit::inline_function<void(vm_page_t*), 32> allocated) {
DEBUG_ASSERT(Thread::Current::memory_allocation_state().IsEnabled());
AutoPreemptDisabler preempt_disable;
bool free_list_had_fill_pattern = false;
vm_page* page = nullptr;
{
Guard<Mutex> guard{&loaned_list_lock_};
free_list_had_fill_pattern = FreePagesFilledLoanedLocked();
page = list_remove_head_type(&free_loaned_list_, vm_page, queue_node);
if (!page) {
// Does not count as out of memory, so do not report an allocation failure, just tell the
// caller we are out of resources.
return zx::error(ZX_ERR_NO_RESOURCES);
}
AllocLoanedPageHelperLocked(page);
DecrementFreeLoanedCountLocked(1);
// Run the callback while still holding the lock.
allocated(page);
// Before we drop the loaned list lock the page is expected to be in the object state with a
// back pointer.
DEBUG_ASSERT(page->state() == vm_page_state::OBJECT && page->object.get_object());
}
if (free_list_had_fill_pattern) {
checker_.AssertPattern(page);
}
return zx::ok(page);
}
zx::result<vm_page_t*> PmmNode::AllocPage(uint alloc_flags) {
DEBUG_ASSERT(Thread::Current::memory_allocation_state().IsEnabled());
vm_page* page = nullptr;
bool free_list_had_fill_pattern = false;
{
AutoPreemptDisabler preempt_disable;
Guard<Mutex> guard{&lock_};
free_list_had_fill_pattern = FreePagesFilledLocked();
if ((alloc_flags & PMM_ALLOC_FLAG_CAN_WAIT) && ShouldDelayAllocationLocked()) {
pmm_alloc_delayed.Add(1);
return zx::error(ZX_ERR_SHOULD_WAIT);
}
page = list_remove_head_type(&free_list_, vm_page, queue_node);
if (!page) {
// Allocation failures from the regular free list are likely to become user-visible.
ReportAllocFailureLocked(AllocFailure{.type = AllocFailure::Type::Pmm, .size = 1});
return zx::error(ZX_ERR_NO_MEMORY);
}
AllocPageHelperLocked(page);
DecrementFreeCountLocked(1);
}
if (free_list_had_fill_pattern) {
checker_.AssertPattern(page);
}
return zx::ok(page);
}
zx_status_t PmmNode::AllocPages(size_t count, uint alloc_flags, list_node* list) {
LTRACEF("count %zu\n", count);
DEBUG_ASSERT(Thread::Current::memory_allocation_state().IsEnabled());
// list must be initialized prior to calling this
DEBUG_ASSERT(list);
if (unlikely(count == 0)) {
return ZX_OK;
} else if (count == 1) {
zx::result<vm_page_t*> result = AllocPage(alloc_flags);
if (result.is_ok()) {
vm_page_t* page = result.value();
list_add_tail(list, &page->queue_node);
}
return result.status_value();
}
bool free_list_had_fill_pattern = false;
// Holds the pages that we pull out of the PMMs free list. These pages may still need to have
// their pattern checked (based on the bool above) before being appended to |list| and returned to
// the caller.
list_node_t alloc_list = LIST_INITIAL_VALUE(alloc_list);
{
AutoPreemptDisabler preempt_disable;
Guard<Mutex> guard{&lock_};
free_list_had_fill_pattern = FreePagesFilledLocked();
uint64_t free_count = free_count_.load(ktl::memory_order_relaxed);
// based on whether allocated loaned pages or not, setup which_list to point directly to the
// appropriate free list to simplify later allocation code that operates on either list.
if (unlikely(count > free_count)) {
if ((alloc_flags & PMM_ALLOC_FLAG_CAN_WAIT) && should_wait_ != ShouldWaitState::Never) {
pmm_alloc_delayed.Add(1);
return ZX_ERR_SHOULD_WAIT;
}
// Allocation failures from the regular free list are likely to become user-visible.
ReportAllocFailureLocked(AllocFailure{.type = AllocFailure::Type::Pmm, .size = count});
return ZX_ERR_NO_MEMORY;
}
DecrementFreeCountLocked(count);
if ((alloc_flags & PMM_ALLOC_FLAG_CAN_WAIT) && ShouldDelayAllocationLocked()) {
IncrementFreeCountLocked(count);
pmm_alloc_delayed.Add(1);
return ZX_ERR_SHOULD_WAIT;
}
list_node_t* node = &free_list_;
while (count > 0) {
node = list_next(&free_list_, node);
AllocPageHelperLocked(containerof(node, vm_page, queue_node));
--count;
}
// Want to take the pages ranging from the start of the list (identified by which_list) up to
// node, and place them in alloc_list. Due to how the listnode operations work, it's easier to
// move the entire list into alloc_list, then split the pages that we are not allocating back
// into which_list.
list_move(&free_list_, &alloc_list);
list_split_after(&alloc_list, node, &free_list_);
}
// Check the pages we are allocating before appending them into the user's allocation list. Do
// this check before since we must not existing pages in the user's allocation list, as they are
// completely arbitrary pages and there's no reason to expect a fill pattern in them.
if (free_list_had_fill_pattern) {
vm_page* page;
list_for_every_entry (&alloc_list, page, vm_page, queue_node) {
checker_.AssertPattern(page);
}
}
// Append the checked list onto the user provided list.
if (list_is_empty(list)) {
list_move(&alloc_list, list);
} else {
list_splice_after(&alloc_list, list_peek_tail(list));
}
return ZX_OK;
}
zx_status_t PmmNode::AllocRange(paddr_t address, size_t count, list_node* list) {
LTRACEF("address %#" PRIxPTR ", count %zu\n", address, count);
DEBUG_ASSERT(Thread::Current::memory_allocation_state().IsEnabled());
// list must be initialized prior to calling this
DEBUG_ASSERT(list);
// On error scenarios we will free the list, so make sure the caller didn't leave anything in
// there.
DEBUG_ASSERT(list_is_empty(list));
size_t allocated = 0;
if (count == 0) {
return ZX_OK;
}
address = ROUNDDOWN_PAGE_SIZE(address);
bool free_list_had_fill_pattern = false;
{
AutoPreemptDisabler preempt_disable;
Guard<Mutex> guard{&lock_};
free_list_had_fill_pattern = FreePagesFilledLocked();
// walk through the arenas, looking to see if the physical page belongs to it
for (auto& a : active_arenas()) {
for (; allocated < count && a.address_in_arena(address); address += PAGE_SIZE) {
vm_page_t* page = a.FindSpecific(address);
if (!page) {
break;
}
// As we hold lock_, we can assume that any page in the FREE state is owned by us, and
// protected by lock_, and so should is_free() be true we will be allowed to assume it is in
// the free list, remove it from said list, and allocate it.
if (!page->is_free()) {
break;
}
// We never allocate loaned pages for caller of AllocRange()
if (page->is_loaned()) {
break;
}
list_delete(&page->queue_node);
AllocPageHelperLocked(page);
list_add_tail(list, &page->queue_node);
allocated++;
DecrementFreeCountLocked(1);
}
if (allocated == count) {
break;
}
}
if (allocated != count) {
// We were not able to allocate the entire run, free these pages. As we allocated these pages
// under this lock acquisition, the fill status is whatever it was before, i.e. the status of
// whether free pages have all been filled..
FreeListLocked(list, FreePagesFilledLocked());
return ZX_ERR_NOT_FOUND;
}
}
if (free_list_had_fill_pattern) {
vm_page* page;
list_for_every_entry (list, page, vm_page, queue_node) {
checker_.AssertPattern(page);
}
}
return ZX_OK;
}
zx_status_t PmmNode::AllocContiguous(const size_t count, uint alloc_flags, uint8_t alignment_log2,
paddr_t* pa, list_node* list) {
DEBUG_ASSERT(Thread::Current::memory_allocation_state().IsEnabled());
LTRACEF("count %zu, align %u\n", count, alignment_log2);
if (count == 0) {
return ZX_OK;
}
if (alignment_log2 < PAGE_SIZE_SHIFT) {
alignment_log2 = PAGE_SIZE_SHIFT;
}
DEBUG_ASSERT(!(alloc_flags & PMM_ALLOC_FLAG_CAN_WAIT));
// pa and list must be valid pointers
DEBUG_ASSERT(pa);
DEBUG_ASSERT(list);
AutoPreemptDisabler preempt_disable;
Guard<Mutex> guard{&lock_};
for (auto& a : active_arenas()) {
// FindFreeContiguous will search the arena for FREE pages. As we hold lock_, any pages in the
// FREE state are assumed to be owned by us, and would only be modified if lock_ were held.
vm_page_t* p = a.FindFreeContiguous(count, alignment_log2);
if (!p) {
continue;
}
*pa = p->paddr();
// remove the pages from the run out of the free list
for (size_t i = 0; i < count; i++, p++) {
DEBUG_ASSERT_MSG(p->is_free(), "p %p state %u\n", p, static_cast<uint32_t>(p->state()));
// Loaned pages are never returned by FindFreeContiguous() above.
DEBUG_ASSERT(!p->is_loaned());
DEBUG_ASSERT(list_in_list(&p->queue_node));
// Atomically (that is, in a single lock acquisition) remove this page from both the free list
// and FREE state, ensuring it is owned by us.
list_delete(&p->queue_node);
p->set_state(vm_page_state::ALLOC);
DecrementFreeCountLocked(1);
AsanUnpoisonPage(p);
checker_.AssertPattern(p);
list_add_tail(list, &p->queue_node);
}
return ZX_OK;
}
// We could potentially move contents of non-pinned pages out of the way for critical contiguous
// allocations, but for now...
LTRACEF("couldn't find run\n");
return ZX_ERR_NOT_FOUND;
}
// We disable thread safety analysis here, since this function is only called
// during early boot before threading exists.
zx_status_t PmmNode::InitArena(const PmmArenaSelection& selected) TA_NO_THREAD_SAFETY_ANALYSIS {
if (used_arena_count_ >= kArenaCount) {
return ZX_ERR_NOT_SUPPORTED;
}
if (selected.arena.size > (kMaxPagesPerArena * PAGE_SIZE)) {
// We have this limit since we need to compress a page_t pointer to a 24 bit integer.
return ZX_ERR_NOT_SUPPORTED;
}
arenas_[used_arena_count_++].Init(selected, this);
arena_cumulative_size_ += selected.arena.size;
return ZX_OK;
}
void PmmNode::InitReservedRange(const memalloc::Range& range) {
DEBUG_ASSERT(IS_PAGE_ROUNDED(range.addr));
DEBUG_ASSERT(IS_PAGE_ROUNDED(range.size));
ktl::string_view what =
range.type == memalloc::Type::kReserved ? "hole in RAM"sv : memalloc::ToString(range.type);
list_node reserved = LIST_INITIAL_VALUE(reserved);
zx_status_t status = pmm_alloc_range(range.addr, range.size / PAGE_SIZE, &reserved);
if (status != ZX_OK) {
dprintf(INFO, "PMM: unable to reserve [%#" PRIx64 ", %#" PRIx64 "): %.*s: %d\n", range.addr,
range.end(), static_cast<int>(what.size()), what.data(), status);
return; // this is probably fatal but go ahead and continue
}
dprintf(INFO, "PMM: reserved [%#" PRIx64 ", %#" PRIx64 "): %.*s\n", range.addr, range.end(),
static_cast<int>(what.size()), what.data());
// Kernel page tables belong to the arch-specific VM backend, just as they'd
// be if they were created post-Physboot.
if (range.type == memalloc::Type::kKernelPageTables) {
ArchVmAspace::HandoffPageTablesFromPhysboot(&reserved);
return;
}
// Otherwise, mark it as wired and merge it into the appropriate reserved
// list.
vm_page_t* p;
list_for_every_entry (&reserved, p, vm_page_t, queue_node) {
p->set_state(vm_page_state::WIRED);
}
list_node_t* list;
if (range.type == memalloc::Type::kTemporaryPhysHandoff) {
list = &phys_handoff_temporary_list_;
} else if (PhysHandoff::IsPhysVmoType(range.type)) {
list = &phys_handoff_vmo_list_;
} else {
list = &permanently_reserved_list_;
}
if (list_is_empty(list)) {
list_move(&reserved, list);
} else {
list_splice_after(&reserved, list_peek_tail(list));
}
}
void PmmNode::FreePageHelperLocked(vm_page* page, bool already_filled) {
LTRACEF("page %p state %zu paddr %#" PRIxPTR "\n", page, VmPageStateIndex(page->state()),
page->paddr());
DEBUG_ASSERT(!page->is_free());
DEBUG_ASSERT(!page->is_free_loaned());
DEBUG_ASSERT(page->state() != vm_page_state::OBJECT ||
(page->object.pin_count == 0 && page->object.get_object() == nullptr));
// mark it free. This makes the page owned the PmmNode, even though it may not be in any page
// list, since the page is findable via the arena, and so we must ensure to:
// 1. Be performing set_state here under the lock_
// 2. Place the page in the free list and cease referring to the page before ever dropping lock_
page->set_state(vm_page_state::FREE);
// This page cannot be loaned.
DEBUG_ASSERT(!page->is_loaned());
// The caller may have called RacyFreeFillEnabled and potentially already filled a pattern,
// however if it raced with enabling of free filling we may still need to fill the pattern. This
// should be unlikely, and since free filling can never be turned back off there is no race in the
// other direction.
if (FreeFillEnabledLocked() && !already_filled) {
checker_.FillPattern(page);
}
AsanPoisonPage(page, kAsanPmmFreeMagic);
}
void PmmNode::FreeLoanedPageHelperLocked(vm_page* page, bool already_filled) {
LTRACEF("page %p state %zu paddr %#" PRIxPTR "\n", page, VmPageStateIndex(page->state()),
page->paddr());
DEBUG_ASSERT(!page->is_free());
DEBUG_ASSERT(page->state() != vm_page_state::OBJECT || page->object.pin_count == 0);
DEBUG_ASSERT(page->state() != vm_page_state::ALLOC || page->alloc.owner == nullptr);
// mark it free. This makes the page owned the PmmNode and even though it may not be in any page
// list, since the page is findable via the arena we must ensure the following happens:
// 1. We hold loaned_list_lock_ preventing pages from transition to/from loaned
// 2. This page is loaned and hence will not be considered by an arena traversal that holds lock_
// 3. Perform set_state here under the loaned_list_lock_
// 4. Place the page in the loaned_free_list_ and cease referring to the page before ever dropping
// the loaned_list_lock_.
page->set_state(vm_page_state::FREE_LOANED);
// The caller may have called IsFreeFillEnabledRacy and potentially already filled a pattern,
// however if it raced with enabling of free filling we may still need to fill the pattern. This
// should be unlikely, and since free filling can never be turned back off there is no race in the
// other direction. As we hold lock we can safely perform a relaxed read.
if (!already_filled && FreeFillEnabledLoanedLocked()) {
checker_.FillPattern(page);
}
AsanPoisonPage(page, kAsanPmmFreeMagic);
}
void PmmNode::BeginFreeLoanedPage(vm_page_t* page,
fit::inline_function<void(vm_page_t*)> release_page,
FreeLoanedPagesHolder& flph) {
AutoPreemptDisabler preempt_disable;
DEBUG_ASSERT(page->is_loaned());
// On entry we require that the page has a valid backlink.
DEBUG_ASSERT(page->state() == vm_page_state::OBJECT && page->object.get_object());
Guard<Mutex> guard{&loaned_list_lock_};
release_page(page);
// pages freed individually shouldn't be in a queue
DEBUG_ASSERT(!list_in_list(&page->queue_node));
DEBUG_ASSERT(!flph.used_);
page->set_state(vm_page_state::ALLOC);
page->alloc.owner = &flph;
list_add_head(&flph.pages_, &page->queue_node);
}
void PmmNode::FinishFreeLoanedPages(FreeLoanedPagesHolder& flph) {
if (list_is_empty(&flph.pages_)) {
return;
}
const bool fill = IsFreeFillEnabledRacy();
if (fill) {
vm_page_t* p;
list_for_every_entry (&flph.pages_, p, vm_page_t, queue_node) {
checker_.FillPattern(p);
}
}
AutoPreemptDisabler preempt_disable;
Guard<Mutex> guard{&loaned_list_lock_};
DEBUG_ASSERT(!flph.used_);
flph.used_ = true;
FreeLoanedListLocked(&flph.pages_, fill, [&](vm_page_t* page) {
DEBUG_ASSERT(page->state() == vm_page_state::ALLOC);
DEBUG_ASSERT(page->alloc.owner == &flph);
page->alloc.owner = nullptr;
});
// With the pager owners all cleared, no more waiters can come along so we can wake all the
// existing ones up.
while (!flph.waiters_.is_empty()) {
FreeLoanedPagesHolder::Waiter* waiter = flph.waiters_.pop_front();
waiter->event.Signal();
}
}
void PmmNode::WithLoanedPage(vm_page_t* page, fit::inline_function<void(vm_page_t*)> with_page) {
// Technically users could race with |WithLoanedPage| and re-allocate the page after it gets
// migrated to the PmmNode, and then place it back in a new FLPH before a stable state can be
// observed. Such behavior almost certainly represents a kernel bug, so if we detect multiple
// iterations to track the page down we generate a warning.
for (int iterations = 0;; iterations++) {
// Intentionally allocate a new waiter every iteration so that its destructor can detect if it
// has been left in a list incorrectly between iterations.
FreeLoanedPagesHolder::Waiter waiter;
{
AutoPreemptDisabler preempt_disable;
Guard<Mutex> guard{&loaned_list_lock_};
DEBUG_ASSERT(page->is_loaned());
if (page->state() != vm_page_state::ALLOC || !page->alloc.owner) {
with_page(page);
return;
}
page->alloc.owner->waiters_.push_front(&waiter);
// After placing waiter in the list and dropping the loaned_list_lock_ we must not manipulate
// the intrusive list node in the Waiter, as it is now owned by the FLPH.
}
if (iterations > 0) {
printf("WARNING: Required multiple attempts (%d) to track down loaned page %p\n", iterations,
page);
}
// Now that the lock is dropped, wait on the event.
waiter.event.Wait();
// Grab the loaned_list_lock_ to ensure that the FinishFreeLoanedPages path has finished holding
// any reference to our event.
Guard<Mutex> guard{&loaned_list_lock_};
}
}
void PmmNode::FreePage(vm_page* page) {
AutoPreemptDisabler preempt_disable;
DEBUG_ASSERT(!page->is_loaned());
const bool fill = IsFreeFillEnabledRacy();
if (fill) {
checker_.FillPattern(page);
}
Guard<Mutex> guard{&lock_};
// pages freed individually shouldn't be in a queue
DEBUG_ASSERT(!list_in_list(&page->queue_node));
FreePageHelperLocked(page, fill);
IncrementFreeCountLocked(1);
if constexpr (!__has_feature(address_sanitizer)) {
list_add_head(&free_list_, &page->queue_node);
} else {
// If address sanitizer is enabled, put the page at the tail to maximize reuse distance.
list_add_tail(&free_list_, &page->queue_node);
}
}
template <typename F>
void PmmNode::FreeLoanedListLocked(list_node* list, bool already_filled, F validator) {
DEBUG_ASSERT(list);
uint64_t count = 0;
{ // scope page
vm_page* page;
vm_page* temp;
list_for_every_entry_safe (list, page, temp, vm_page_t, queue_node) {
validator(page);
DEBUG_ASSERT(page->is_loaned());
FreeLoanedPageHelperLocked(page, already_filled);
if (page->is_loan_cancelled()) {
// Loaned cancelled pages do not go back on the free list.
list_delete(&page->queue_node);
} else {
count++;
}
}
} // end scope page
ReturnPagesToFreeList(&free_loaned_list_, list);
IncrementFreeLoanedCountLocked(count);
}
void PmmNode::FreeListLocked(list_node* list, bool already_filled) {
DEBUG_ASSERT(list);
uint64_t count = 0;
{ // scope page
vm_page* page;
vm_page* temp;
list_for_every_entry_safe (list, page, temp, vm_page_t, queue_node) {
DEBUG_ASSERT(!page->is_loaned());
FreePageHelperLocked(page, already_filled);
count++;
}
} // end scope page
ReturnPagesToFreeList(&free_list_, list);
IncrementFreeCountLocked(count);
}
void PmmNode::BeginFreeLoanedArray(
vm_page_t** pages, size_t count,
fit::inline_function<void(vm_page_t**, size_t, list_node_t*)> release_list,
FreeLoanedPagesHolder& flph) {
AutoPreemptDisabler preempt_disable;
// On entry we expect all pages to have a backlink.
DEBUG_ASSERT(ktl::all_of(&pages[0], &pages[count], [](vm_page_t* p) {
return p->state() == vm_page_state::OBJECT && p->object.get_object();
}));
Guard<Mutex> guard{&loaned_list_lock_};
DEBUG_ASSERT(!flph.used_);
list_node_t free_list = LIST_INITIAL_VALUE(free_list);
release_list(pages, count, &free_list);
// Validate that the callback populated the free list correctly.
vm_page_t* p;
size_t expected = 0;
list_for_every_entry (&free_list, p, vm_page_t, queue_node) {
p->set_state(vm_page_state::ALLOC);
p->alloc.owner = &flph;
DEBUG_ASSERT(pages[expected] == p);
expected++;
}
DEBUG_ASSERT(expected == count);
list_splice_after(&free_list, &flph.pages_);
}
void PmmNode::FreeList(list_node* list) {
AutoPreemptDisabler preempt_disable;
const bool fill = IsFreeFillEnabledRacy();
if (fill) {
vm_page* page;
list_for_every_entry (list, page, vm_page, queue_node) {
checker_.FillPattern(page);
}
}
Guard<Mutex> guard{&lock_};
FreeListLocked(list, fill);
}
void PmmNode::UnwirePage(vm_page* page) {
Guard<Mutex> guard{&lock_};
ASSERT(page->state() == vm_page_state::WIRED);
list_delete(&page->queue_node);
page->set_state(vm_page_state::ALLOC);
}
bool PmmNode::ShouldDelayAllocationLocked() {
if (should_wait_ == ShouldWaitState::UntilReset) {
return true;
}
if (should_wait_ == ShouldWaitState::Never) {
return false;
}
// See pmm_check_alloc_random_should_wait in pmm.cc for an assertion that random should wait is
// only enabled if DEBUG_ASSERT_IMPLEMENTED.
if constexpr (DEBUG_ASSERT_IMPLEMENTED) {
// Randomly try to make 10% of allocations delayed allocations.
if (gBootOptions->pmm_alloc_random_should_wait &&
rand_r(&random_should_wait_seed_) < (RAND_MAX / 10)) {
return true;
}
}
return false;
}
uint64_t PmmNode::CountFreePages() const TA_NO_THREAD_SAFETY_ANALYSIS {
return free_count_.load(ktl::memory_order_relaxed);
}
uint64_t PmmNode::CountLoanedFreePages() const TA_NO_THREAD_SAFETY_ANALYSIS {
return free_loaned_count_.load(ktl::memory_order_relaxed);
}
uint64_t PmmNode::CountLoanedNotFreePages() const TA_NO_THREAD_SAFETY_ANALYSIS {
AutoPreemptDisabler preempt_disable;
// Require both locks to examine both counts.
Guard<Mutex> loaned_guard{&loaned_list_lock_};
Guard<Mutex> free_guard{&lock_};
return loaned_count_.load(ktl::memory_order_relaxed) -
free_loaned_count_.load(ktl::memory_order_relaxed);
}
uint64_t PmmNode::CountLoanedPages() const TA_NO_THREAD_SAFETY_ANALYSIS {
return loaned_count_.load(ktl::memory_order_relaxed);
}
uint64_t PmmNode::CountLoanCancelledPages() const TA_NO_THREAD_SAFETY_ANALYSIS {
return loan_cancelled_count_.load(ktl::memory_order_relaxed);
}
uint64_t PmmNode::CountTotalBytes() const TA_NO_THREAD_SAFETY_ANALYSIS {
return arena_cumulative_size_;
}
void PmmNode::DumpFree() const TA_NO_THREAD_SAFETY_ANALYSIS {
auto megabytes_free = CountFreePages() * PAGE_SIZE / MB;
printf(" %zu free MBs\n", megabytes_free);
}
void PmmNode::Dump(bool is_panic) const {
// No lock analysis here, as we want to just go for it in the panic case without the lock.
auto dump = [this]() TA_NO_THREAD_SAFETY_ANALYSIS {
uint64_t free_count = free_count_.load(ktl::memory_order_relaxed);
uint64_t free_loaned_count = free_loaned_count_.load(ktl::memory_order_relaxed);
printf(
"pmm node %p: free_count %zu (%zu bytes), free_loaned_count: %zu (%zu bytes), total size "
"%zu\n",
this, free_count, free_count * PAGE_SIZE, free_loaned_count, free_loaned_count * PAGE_SIZE,
arena_cumulative_size_);
PmmStateCount count_sum = {};
for (const auto& a : active_arenas()) {
a.Dump(false, false, &count_sum);
}
printf("Totals\n");
PrintPageStateCounts(count_sum);
};
if (is_panic) {
dump();
} else {
Guard<Mutex> guard{&lock_};
dump();
}
}
void PmmNode::TripFreePagesLevelLocked() {
if (should_wait_ == ShouldWaitState::OnceLevelTripped) {
should_wait_ = ShouldWaitState::UntilReset;
may_allocate_evt_.Unsignal();
}
}
bool PmmNode::SetFreeMemorySignal(uint64_t free_lower_bound, uint64_t free_upper_bound,
uint64_t delay_allocations_pages, Event* event) {
Guard<Mutex> guard{&lock_};
// Ensure delay allocations is valid.
DEBUG_ASSERT(delay_allocations_pages <= free_lower_bound ||
delay_allocations_pages == UINT64_MAX);
const uint64_t free_count = CountFreePages();
if (free_count < free_lower_bound || free_count > free_upper_bound) {
return false;
}
if (delay_allocations_pages == UINT64_MAX) {
TripFreePagesLevelLocked();
} else if (should_wait_ == ShouldWaitState::UntilReset) {
may_allocate_evt_.Signal();
should_wait_ = ShouldWaitState::OnceLevelTripped;
}
should_wait_free_pages_level_ = delay_allocations_pages;
mem_signal_lower_bound_ = free_lower_bound;
mem_signal_upper_bound_ = free_upper_bound;
mem_signal_ = event;
return true;
}
void PmmNode::SignalFreeMemoryChangeLocked() {
DEBUG_ASSERT(mem_signal_);
mem_signal_->Signal();
mem_signal_ = nullptr;
}
void PmmNode::StopReturningShouldWait() {
Guard<Mutex> guard{&lock_};
should_wait_ = ShouldWaitState::Never;
may_allocate_evt_.Signal();
}
int64_t PmmNode::get_alloc_failed_count() { return pmm_alloc_failed.SumAcrossAllCpus(); }
bool PmmNode::has_alloc_failed_no_mem() {
return alloc_failed_no_mem.load(ktl::memory_order_relaxed);
}
void PmmNode::BeginLoan(list_node* page_list) {
DEBUG_ASSERT(page_list);
AutoPreemptDisabler preempt_disable;
const bool fill = IsFreeFillEnabledRacy();
if (fill) {
vm_page* page;
list_for_every_entry (page_list, page, vm_page, queue_node) {
checker_.FillPattern(page);
}
}
Guard<Mutex> guard{&loaned_list_lock_};
uint64_t loaned_count = 0;
vm_page* page;
list_for_every_entry (page_list, page, vm_page, queue_node) {
DEBUG_ASSERT(!page->is_loaned());
DEBUG_ASSERT(!page->is_free());
page->set_is_loaned();
++loaned_count;
DEBUG_ASSERT(!page->is_loan_cancelled());
}
IncrementLoanedCountLocked(loaned_count);
// Callers of BeginLoan() generally won't want the pages loaned to them; the intent is to loan to
// the rest of the system, so go ahead and free also. Some callers will basically choose between
// pmm_begin_loan() and pmm_free().
FreeLoanedListLocked(page_list, fill, [](vm_page_t* p) {});
}
void PmmNode::CancelLoan(vm_page_t* page) {
AutoPreemptDisabler preempt_disable;
// Require both locks in order to iterate the arenas and manipulate the loaned list.
Guard<Mutex> loaned_guard{&loaned_list_lock_};
Guard<Mutex> arena_guard{&lock_};
DEBUG_ASSERT(page->is_loaned());
DEBUG_ASSERT(!page->is_free());
bool was_cancelled = page->is_loan_cancelled();
// We can assert this because of PageSource's overlapping request
// handling.
DEBUG_ASSERT(!was_cancelled);
page->set_is_loan_cancelled();
IncrementLoanCancelledCountLocked(1);
if (page->is_free_loaned()) {
// Currently in free_loaned_list_.
DEBUG_ASSERT(list_in_list(&page->queue_node));
// Remove from free_loaned_list_ to prevent any new use until
// after EndLoan.
list_delete(&page->queue_node);
DecrementFreeLoanedCountLocked(1);
}
}
void PmmNode::EndLoan(vm_page_t* page) {
bool free_list_had_fill_pattern = false;
{
AutoPreemptDisabler preempt_disable;
// Require both locks in order to manipulate loaned pages and the regular free list.
Guard<Mutex> loaned_guard{&loaned_list_lock_};
Guard<Mutex> free_guard{&lock_};
free_list_had_fill_pattern = FreePagesFilledLoanedLocked();
// PageSource serializing such that there's only one request to
// PageProvider in flight at a time for any given page is the main
// reason we can assert these instead of needing to check these.
DEBUG_ASSERT(page->is_loaned());
DEBUG_ASSERT(page->is_loan_cancelled());
DEBUG_ASSERT(page->is_free_loaned());
// Already not in free_loaned_list_ (because loan_cancelled
// already).
DEBUG_ASSERT(!list_in_list(&page->queue_node));
page->clear_is_loaned();
page->clear_is_loan_cancelled();
// Change the state to regular FREE. When this page was made
// FREE_LOANED all of the pmm checker filling and asan work was
// done, so we are safe to just change the state without using a
// helper.
page->set_state(vm_page_state::FREE);
AllocPageHelperLocked(page);
DecrementLoanCancelledCountLocked(1);
DecrementLoanedCountLocked(1);
}
if (free_list_had_fill_pattern) {
checker_.AssertPattern(page);
}
}
void PmmNode::ReportAllocFailureLocked(AllocFailure failure) {
kcounter_add(pmm_alloc_failed, 1);
// Update before signaling the MemoryWatchdog to ensure it observes the update.
//
// |alloc_failed_no_mem| latches so only need to invoke the callback once. We could call it on
// every failure, but that's wasteful and we don't want to spam any underlying Event (or the
// thread lock or the MemoryWatchdog).
const bool first_time = !alloc_failed_no_mem.exchange(true, ktl::memory_order_relaxed);
if (first_time) {
first_alloc_failure_ = failure;
first_alloc_failure_.free_count = free_count_;
}
if (first_time && mem_signal_) {
SignalFreeMemoryChangeLocked();
}
}
void PmmNode::ReportAllocFailure(AllocFailure failure) {
Guard<Mutex> guard{&lock_};
ReportAllocFailureLocked(failure);
}
PmmNode::AllocFailure PmmNode::GetFirstAllocFailure() {
Guard<Mutex> guard{&lock_};
return first_alloc_failure_;
}
void PmmNode::SeedRandomShouldWait() {
if constexpr (DEBUG_ASSERT_IMPLEMENTED) {
Guard<Mutex> guard{&lock_};
crypto::global_prng::GetInstance()->Draw(&random_should_wait_seed_,
sizeof(random_should_wait_seed_));
}
}
zx_status_t PmmNode::SetPageCompression(fbl::RefPtr<VmCompression> compression) {
Guard<Mutex> guard{&compression_lock_};
if (page_compression_) {
return ZX_ERR_ALREADY_EXISTS;
}
page_compression_ = ktl::move(compression);
return ZX_OK;
}
const char* PmmNode::AllocFailure::TypeToString(Type type) {
switch (type) {
case Type::None:
return "None";
case Type::Pmm:
return "PMM";
case Type::Heap:
return "Heap";
case Type::Handle:
return "Handle";
case Type::Other:
return "Other";
}
return "UNKNOWN";
}
zx::result<vm_page_t*> PmmNode::WaitForSinglePageAllocation(Deadline deadline) {
zx_status_t wait_result = may_allocate_evt_.Wait(deadline);
// Let the caller handle the error and retry if necessary.
// This could be `ZX_ERR_TIMED_OUT`, `ZX_ERR_INTERNAL_INTR_KILLED`(thread killed)
// or `ZX_ERR_INTERNAL_INTR_RETRY`(thread suspended).
//
// TODO(fxbug.dev/443281947): Handle thread suspension.
if (wait_result != ZX_OK) {
return zx::error(wait_result);
}
// Try to allocate the page now, it may fail sporadically, since there is no guarantee that
// by the time we attempt to allocate the pages are still available.
zx::result<vm_page_t*> res = AllocPage(PMM_ALLOC_FLAG_CAN_WAIT);
// Normally we would only signal in the `ZX_OK` case, but in order to address
// `pmm_alloc_random_should_wait` we signal unconditionally, to simplify the logic. This leads to
// one extra spurious wake-up.
//
// This is because, when this random wait mode is active, we may block in a non low memory state,
// which will lead to threads getting blocked, and no one kicking them out. The unblocking chain
// is triggered by the system moving OUT of a low memory state, which would signal the event.
bool may_signal = res.is_ok() || res.status_value() == ZX_ERR_SHOULD_WAIT;
bool should_signal = false;
if (may_signal) {
Guard<Mutex> g(&lock_);
// We may only signal the event, when we are in an allocation-able state, otherwise
// we would wake up another thread just for it to receive `ZX_ERR_SHOULD_WAIT` and go back
// to waiting on the event, which we would have signalled below before returning.
//
// We still allow `ZX_ERR_SHOULD_WAIT` to signal threads, to accommodate random waits.
should_signal = may_signal && (should_wait_ != ShouldWaitState::UntilReset);
}
if (should_signal) {
may_allocate_evt_.Signal();
}
if (res.is_error()) {
ZX_DEBUG_ASSERT(res.error_value() == ZX_ERR_SHOULD_WAIT);
return res.take_error();
}
return res;
}