| // Copyright 2018 The Fuchsia Authors |
| // |
| // Use of this source code is governed by a MIT-style |
| // license that can be found in the LICENSE file or at |
| // https://opensource.org/licenses/MIT |
| #include "pmm_node.h" |
| |
| #include <align.h> |
| #include <assert.h> |
| #include <inttypes.h> |
| #include <lib/boot-options/boot-options.h> |
| #include <lib/counters.h> |
| #include <lib/instrumentation/asan.h> |
| #include <lib/zircon-internal/macros.h> |
| #include <trace.h> |
| |
| #include <new> |
| |
| #include <fbl/algorithm.h> |
| #include <kernel/auto_preempt_disabler.h> |
| #include <kernel/mp.h> |
| #include <kernel/thread.h> |
| #include <pretty/cpp/sizes.h> |
| #include <vm/bootalloc.h> |
| #include <vm/physmap.h> |
| #include <vm/pmm.h> |
| #include <vm/pmm_checker.h> |
| #include <vm/stack_owned_loaned_pages_interval.h> |
| |
| #include "vm/pmm.h" |
| #include "vm_priv.h" |
| |
| #define LOCAL_TRACE VM_GLOBAL_TRACE(0) |
| |
| using pretty::FormattedBytes; |
| |
| // The number of PMM allocation calls that have failed. |
| KCOUNTER(pmm_alloc_failed, "vm.pmm.alloc.failed") |
| KCOUNTER(pmm_alloc_delayed, "vm.pmm.alloc.delayed") |
| |
| namespace { |
| |
| void noop_callback(void* context, uint8_t idx) {} |
| |
| // Indicates whether a PMM alloc call has ever failed with ZX_ERR_NO_MEMORY. Used to trigger an OOM |
| // response. See |MemoryWatchdog::WorkerThread|. |
| ktl::atomic<bool> alloc_failed_no_mem; |
| |
| } // namespace |
| |
| // Poison a page |p| with value |value|. Accesses to a poisoned page via the physmap are not |
| // allowed and may cause faults or kASAN checks. |
| void PmmNode::AsanPoisonPage(vm_page_t* p, uint8_t value) { |
| #if __has_feature(address_sanitizer) |
| asan_poison_shadow(reinterpret_cast<uintptr_t>(paddr_to_physmap(p->paddr())), PAGE_SIZE, value); |
| #endif // __has_feature(address_sanitizer) |
| } |
| |
| // Unpoison a page |p|. Accesses to a unpoisoned pages will not cause KASAN check failures. |
| void PmmNode::AsanUnpoisonPage(vm_page_t* p) { |
| #if __has_feature(address_sanitizer) |
| asan_unpoison_shadow(reinterpret_cast<uintptr_t>(paddr_to_physmap(p->paddr())), PAGE_SIZE); |
| #endif // __has_feature(address_sanitizer) |
| } |
| |
| PmmNode::PmmNode() : evictor_(this) { |
| // Initialize the reclamation watermarks such that system never |
| // falls into a low memory state. |
| uint64_t default_watermark = 0; |
| InitReclamation(&default_watermark, 1, 0, nullptr, noop_callback); |
| } |
| |
| PmmNode::~PmmNode() {} |
| |
| // We disable thread safety analysis here, since this function is only called |
| // during early boot before threading exists. |
| zx_status_t PmmNode::AddArena(const pmm_arena_info_t* info) TA_NO_THREAD_SAFETY_ANALYSIS { |
| dprintf(INFO, "PMM: adding arena %p name '%s' base %#" PRIxPTR " size %#zx\n", info, info->name, |
| info->base, info->size); |
| |
| // Make sure we're in early boot (ints disabled and no active CPUs according |
| // to the scheduler). |
| DEBUG_ASSERT(mp_get_active_mask() == 0); |
| DEBUG_ASSERT(arch_ints_disabled()); |
| |
| DEBUG_ASSERT(IS_PAGE_ALIGNED(info->base)); |
| DEBUG_ASSERT(IS_PAGE_ALIGNED(info->size)); |
| DEBUG_ASSERT(info->size > 0); |
| |
| // allocate a c++ arena object |
| PmmArena* arena = new (boot_alloc_mem(sizeof(PmmArena))) PmmArena(); |
| |
| // initialize the object |
| auto status = arena->Init(info, this); |
| if (status != ZX_OK) { |
| // leaks boot allocator memory |
| arena->~PmmArena(); |
| printf("PMM: pmm_add_arena failed to initialize arena\n"); |
| return status; |
| } |
| |
| // walk the arena list, inserting in ascending order of arena base address |
| for (auto& a : arena_list_) { |
| if (a.base() > arena->base()) { |
| arena_list_.insert(a, arena); |
| goto done_add; |
| } |
| } |
| |
| // walked off the end, add it to the end of the list |
| arena_list_.push_back(arena); |
| |
| done_add: |
| arena_cumulative_size_ += info->size; |
| |
| return ZX_OK; |
| } |
| |
| size_t PmmNode::NumArenas() const { |
| Guard<Mutex> guard{&lock_}; |
| return arena_list_.size(); |
| } |
| |
| zx_status_t PmmNode::GetArenaInfo(size_t count, uint64_t i, pmm_arena_info_t* buffer, |
| size_t buffer_size) { |
| Guard<Mutex> guard{&lock_}; |
| |
| if ((count == 0) || (count + i > arena_list_.size()) || (i >= arena_list_.size())) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| const size_t size_required = count * sizeof(pmm_arena_info_t); |
| if (buffer_size < size_required) { |
| return ZX_ERR_BUFFER_TOO_SMALL; |
| } |
| |
| // Skip the first |i| elements. |
| auto iter = arena_list_.begin(); |
| for (uint64_t j = 0; j < i; j++) { |
| iter++; |
| } |
| |
| // Copy the next |count| elements. |
| for (uint64_t j = 0; j < count; j++) { |
| buffer[j] = iter->info(); |
| iter++; |
| } |
| |
| return ZX_OK; |
| } |
| |
| // called at boot time as arenas are brought online, no locks are acquired |
| void PmmNode::AddFreePages(list_node* list) TA_NO_THREAD_SAFETY_ANALYSIS { |
| LTRACEF("list %p\n", list); |
| |
| uint64_t free_count = 0; |
| vm_page *temp, *page; |
| list_for_every_entry_safe (list, page, temp, vm_page, queue_node) { |
| list_delete(&page->queue_node); |
| DEBUG_ASSERT(!page->loaned); |
| DEBUG_ASSERT(!page->loan_cancelled); |
| DEBUG_ASSERT(page->is_free()); |
| list_add_tail(&free_list_, &page->queue_node); |
| ++free_count; |
| } |
| free_count_.fetch_add(free_count); |
| ASSERT(free_count_); |
| free_pages_evt_.Signal(); |
| |
| LTRACEF("free count now %" PRIu64 "\n", free_count_.load(ktl::memory_order_relaxed)); |
| } |
| |
| void PmmNode::FillFreePagesAndArm() { |
| Guard<Mutex> guard{&lock_}; |
| |
| if (!free_fill_enabled_) { |
| return; |
| } |
| |
| vm_page* page; |
| list_for_every_entry (&free_list_, page, vm_page, queue_node) { |
| checker_.FillPattern(page); |
| } |
| list_for_every_entry (&free_loaned_list_, page, vm_page, queue_node) { |
| checker_.FillPattern(page); |
| } |
| |
| // Now that every page has been filled, we can arm the checker. |
| checker_.Arm(); |
| |
| checker_.PrintStatus(stdout); |
| } |
| |
| void PmmNode::CheckAllFreePages() { |
| Guard<Mutex> guard{&lock_}; |
| |
| if (!checker_.IsArmed()) { |
| return; |
| } |
| |
| uint64_t free_page_count = 0; |
| uint64_t free_loaned_page_count = 0; |
| vm_page* page; |
| list_for_every_entry (&free_list_, page, vm_page, queue_node) { |
| checker_.AssertPattern(page); |
| ++free_page_count; |
| } |
| list_for_every_entry (&free_loaned_list_, page, vm_page, queue_node) { |
| checker_.AssertPattern(page); |
| ++free_loaned_page_count; |
| } |
| |
| ASSERT(free_page_count == free_count_.load(ktl::memory_order_relaxed)); |
| ASSERT(free_loaned_page_count == free_loaned_count_.load(ktl::memory_order_relaxed)); |
| } |
| |
| #if __has_feature(address_sanitizer) |
| void PmmNode::PoisonAllFreePages() { |
| Guard<Mutex> guard{&lock_}; |
| |
| vm_page* page; |
| list_for_every_entry (&free_list_, page, vm_page, queue_node) { |
| AsanPoisonPage(page, kAsanPmmFreeMagic); |
| }; |
| list_for_every_entry (&free_loaned_list_, page, vm_page, queue_node) { |
| AsanPoisonPage(page, kAsanPmmFreeMagic); |
| }; |
| } |
| #endif // __has_feature(address_sanitizer) |
| |
| void PmmNode::EnableFreePageFilling(size_t fill_size, PmmChecker::Action action) { |
| Guard<Mutex> guard{&lock_}; |
| checker_.SetFillSize(fill_size); |
| checker_.SetAction(action); |
| free_fill_enabled_ = true; |
| } |
| |
| void PmmNode::DisableChecker() { |
| Guard<Mutex> guard{&lock_}; |
| checker_.Disarm(); |
| free_fill_enabled_ = false; |
| } |
| |
| void PmmNode::AllocPageHelperLocked(vm_page_t* page) { |
| LTRACEF("allocating page %p, pa %#" PRIxPTR ", prev state %s\n", page, page->paddr(), |
| page_state_to_string(page->state())); |
| |
| AsanUnpoisonPage(page); |
| |
| DEBUG_ASSERT(page->is_free()); |
| DEBUG_ASSERT(!page->object.is_stack_owned()); |
| |
| if (page->is_loaned()) { |
| page->object.set_stack_owner(&StackOwnedLoanedPagesInterval::current()); |
| // We want the set_stack_owner() to be visible before set_state(), but we don't need to make |
| // set_state() a release just for the benefit of loaned pages, so we use this fence. |
| ktl::atomic_thread_fence(ktl::memory_order_release); |
| } |
| |
| page->set_state(vm_page_state::ALLOC); |
| |
| if (unlikely(free_fill_enabled_)) { |
| checker_.AssertPattern(page); |
| } |
| } |
| |
| zx_status_t PmmNode::AllocPage(uint alloc_flags, vm_page_t** page_out, paddr_t* pa_out) { |
| DEBUG_ASSERT(Thread::Current::memory_allocation_state().IsEnabled()); |
| AutoPreemptDisabler preempt_disable; |
| Guard<Mutex> guard{&lock_}; |
| |
| // If the caller sets PMM_ALLOC_FLAG_MUST_BORROW, the caller must also set |
| // PMM_ALLOC_FLAG_CAN_BORROW, and must not set PMM_ALLOC_FLAG_CAN_WAIT. |
| DEBUG_ASSERT( |
| !(alloc_flags & PMM_ALLOC_FLAG_MUST_BORROW) || |
| ((alloc_flags & PMM_ALLOC_FLAG_CAN_BORROW) && !((alloc_flags & PMM_ALLOC_FLAG_CAN_WAIT)))); |
| const bool can_borrow = pmm_physical_page_borrowing_config()->is_any_borrowing_enabled() && |
| !!(alloc_flags & PMM_ALLOC_FLAG_CAN_BORROW); |
| const bool must_borrow = can_borrow && !!(alloc_flags & PMM_ALLOC_FLAG_MUST_BORROW); |
| const bool use_loaned_list = can_borrow && (!list_is_empty(&free_loaned_list_) || must_borrow); |
| list_node* const which_list = use_loaned_list ? &free_loaned_list_ : &free_list_; |
| |
| // Note that we do not care if the allocation is happening from the loaned list or not since if |
| // we are in the OOM state we still want to preference those loaned pages to allocations that |
| // cannot be delayed. |
| if ((alloc_flags & PMM_ALLOC_FLAG_CAN_WAIT) && InOomStateLocked() && !never_return_should_wait_) { |
| pmm_alloc_delayed.Add(1); |
| return ZX_ERR_SHOULD_WAIT; |
| } |
| |
| vm_page* page = list_remove_head_type(which_list, vm_page, queue_node); |
| if (!page) { |
| if (!must_borrow) { |
| // Allocation failures from the regular free list are likely to become user-visible. |
| ReportAllocFailure(); |
| } |
| return ZX_ERR_NO_MEMORY; |
| } |
| |
| DEBUG_ASSERT(can_borrow || !page->is_loaned()); |
| AllocPageHelperLocked(page); |
| |
| if (use_loaned_list) { |
| DecrementFreeLoanedCountLocked(1); |
| } else { |
| DecrementFreeCountLocked(1); |
| } |
| |
| if (pa_out) { |
| *pa_out = page->paddr(); |
| } |
| |
| if (page_out) { |
| *page_out = page; |
| } |
| |
| return ZX_OK; |
| } |
| |
| zx_status_t PmmNode::AllocPages(size_t count, uint alloc_flags, list_node* list) { |
| LTRACEF("count %zu\n", count); |
| |
| DEBUG_ASSERT(Thread::Current::memory_allocation_state().IsEnabled()); |
| // list must be initialized prior to calling this |
| DEBUG_ASSERT(list); |
| |
| if (unlikely(count == 0)) { |
| return ZX_OK; |
| } else if (count == 1) { |
| vm_page* page; |
| zx_status_t status = AllocPage(alloc_flags, &page, nullptr); |
| if (likely(status == ZX_OK)) { |
| list_add_tail(list, &page->queue_node); |
| } |
| return status; |
| } |
| |
| // If the caller sets PMM_ALLOC_FLAG_MUST_BORROW, the caller must also set |
| // PMM_ALLOC_FLAG_CAN_BORROW, and must not set PMM_ALLOC_FLAG_CAN_WAIT. |
| DEBUG_ASSERT( |
| !(alloc_flags & PMM_ALLOC_FLAG_MUST_BORROW) || |
| ((alloc_flags & PMM_ALLOC_FLAG_CAN_BORROW) && !((alloc_flags & PMM_ALLOC_FLAG_CAN_WAIT)))); |
| const bool can_borrow = pmm_physical_page_borrowing_config()->is_any_borrowing_enabled() && |
| !!(alloc_flags & PMM_ALLOC_FLAG_CAN_BORROW); |
| const bool must_borrow = can_borrow && !!(alloc_flags & PMM_ALLOC_FLAG_MUST_BORROW); |
| |
| AutoPreemptDisabler preempt_disable; |
| Guard<Mutex> guard{&lock_}; |
| |
| uint64_t free_count; |
| if (must_borrow) { |
| free_count = 0; |
| } else { |
| free_count = free_count_.load(ktl::memory_order_relaxed); |
| } |
| uint64_t available_count = free_count; |
| uint64_t free_loaned_count = 0; |
| if (can_borrow) { |
| free_loaned_count = free_loaned_count_.load(ktl::memory_order_relaxed); |
| available_count += free_loaned_count; |
| } |
| |
| if (unlikely(count > available_count)) { |
| if ((alloc_flags & PMM_ALLOC_FLAG_CAN_WAIT) && !never_return_should_wait_) { |
| pmm_alloc_delayed.Add(1); |
| return ZX_ERR_SHOULD_WAIT; |
| } |
| if (!must_borrow) { |
| // Allocation failures from the regular free list are likely to become user-visible. |
| ReportAllocFailure(); |
| } |
| return ZX_ERR_NO_MEMORY; |
| } |
| // Prefer to allocate from loaned, if allowed by this allocation. If loaned is not allowed by |
| // this allocation, free_loaned_count will be zero here. |
| DEBUG_ASSERT(can_borrow || !free_loaned_count); |
| DEBUG_ASSERT(!must_borrow || !free_count); |
| uint64_t from_loaned_free = ktl::min(count, free_loaned_count); |
| uint64_t from_free = count - from_loaned_free; |
| |
| DecrementFreeCountLocked(from_free); |
| |
| // For simplicity of oom state detection we do this check after decrementing the free count, since |
| // the error case is unlikely and not performance critical. |
| // Even if no pages are being requested from the regular free list (if loaned pages can be used) |
| // we still fail in the oom state since we would prefer those loaned pages to be used to fulfill |
| // allocations that cannot be delayed. |
| if ((alloc_flags & PMM_ALLOC_FLAG_CAN_WAIT) && InOomStateLocked() && !never_return_should_wait_) { |
| IncrementFreeCountLocked(from_free); |
| pmm_alloc_delayed.Add(1); |
| return ZX_ERR_SHOULD_WAIT; |
| } |
| |
| DecrementFreeLoanedCountLocked(from_loaned_free); |
| |
| do { |
| DEBUG_ASSERT(count == from_loaned_free + from_free); |
| list_node* which_list; |
| size_t which_count; |
| if (can_borrow && !list_is_empty(&free_loaned_list_)) { |
| which_list = &free_loaned_list_; |
| which_count = from_loaned_free; |
| from_loaned_free = 0; |
| } else { |
| DEBUG_ASSERT(!must_borrow); |
| which_list = &free_list_; |
| which_count = from_free; |
| from_free = 0; |
| } |
| count -= which_count; |
| |
| DEBUG_ASSERT(which_count > 0); |
| auto node = which_list; |
| while (which_count > 0) { |
| node = list_next(which_list, node); |
| DEBUG_ASSERT(can_borrow || !containerof(node, vm_page, queue_node)->is_loaned()); |
| AllocPageHelperLocked(containerof(node, vm_page, queue_node)); |
| --which_count; |
| } |
| |
| list_node tmp_list = LIST_INITIAL_VALUE(tmp_list); |
| list_split_after(which_list, node, &tmp_list); |
| if (list_is_empty(list)) { |
| list_move(which_list, list); |
| } else { |
| list_splice_after(which_list, list_peek_tail(list)); |
| } |
| list_move(&tmp_list, which_list); |
| DEBUG_ASSERT(count == from_loaned_free + from_free); |
| } while (count > 0); |
| |
| return ZX_OK; |
| } |
| |
| zx_status_t PmmNode::AllocRange(paddr_t address, size_t count, list_node* list) { |
| LTRACEF("address %#" PRIxPTR ", count %zu\n", address, count); |
| |
| DEBUG_ASSERT(Thread::Current::memory_allocation_state().IsEnabled()); |
| // list must be initialized prior to calling this |
| DEBUG_ASSERT(list); |
| // On error scenarios we will free the list, so make sure the caller didn't leave anything in |
| // there. |
| DEBUG_ASSERT(list_is_empty(list)); |
| |
| size_t allocated = 0; |
| if (count == 0) { |
| return ZX_OK; |
| } |
| |
| address = ROUNDDOWN(address, PAGE_SIZE); |
| |
| AutoPreemptDisabler preempt_disable; |
| Guard<Mutex> guard{&lock_}; |
| |
| // walk through the arenas, looking to see if the physical page belongs to it |
| for (auto& a : arena_list_) { |
| for (; allocated < count && a.address_in_arena(address); address += PAGE_SIZE) { |
| vm_page_t* page = a.FindSpecific(address); |
| if (!page) { |
| break; |
| } |
| |
| if (!page->is_free()) { |
| break; |
| } |
| |
| // We never allocate loaned pages for caller of AllocRange() |
| if (page->loaned) { |
| break; |
| } |
| |
| list_delete(&page->queue_node); |
| |
| AllocPageHelperLocked(page); |
| |
| list_add_tail(list, &page->queue_node); |
| |
| allocated++; |
| DecrementFreeCountLocked(1); |
| } |
| |
| if (allocated == count) { |
| break; |
| } |
| } |
| |
| if (allocated != count) { |
| // we were not able to allocate the entire run, free these pages |
| FreeListLocked(list); |
| return ZX_ERR_NOT_FOUND; |
| } |
| |
| return ZX_OK; |
| } |
| |
| zx_status_t PmmNode::AllocContiguous(const size_t count, uint alloc_flags, uint8_t alignment_log2, |
| paddr_t* pa, list_node* list) { |
| DEBUG_ASSERT(Thread::Current::memory_allocation_state().IsEnabled()); |
| LTRACEF("count %zu, align %u\n", count, alignment_log2); |
| |
| if (count == 0) { |
| return ZX_OK; |
| } |
| if (alignment_log2 < PAGE_SIZE_SHIFT) { |
| alignment_log2 = PAGE_SIZE_SHIFT; |
| } |
| |
| DEBUG_ASSERT(!(alloc_flags & (PMM_ALLOC_FLAG_CAN_BORROW | PMM_ALLOC_FLAG_MUST_BORROW | |
| PMM_ALLOC_FLAG_CAN_WAIT))); |
| // pa and list must be valid pointers |
| DEBUG_ASSERT(pa); |
| DEBUG_ASSERT(list); |
| |
| AutoPreemptDisabler preempt_disable; |
| Guard<Mutex> guard{&lock_}; |
| |
| for (auto& a : arena_list_) { |
| vm_page_t* p = a.FindFreeContiguous(count, alignment_log2); |
| if (!p) { |
| continue; |
| } |
| |
| *pa = p->paddr(); |
| |
| // remove the pages from the run out of the free list |
| for (size_t i = 0; i < count; i++, p++) { |
| DEBUG_ASSERT_MSG(p->is_free(), "p %p state %u\n", p, static_cast<uint32_t>(p->state())); |
| // Loaned pages are never returned by FindFreeContiguous() above. |
| DEBUG_ASSERT(!p->loaned); |
| DEBUG_ASSERT(list_in_list(&p->queue_node)); |
| |
| list_delete(&p->queue_node); |
| p->set_state(vm_page_state::ALLOC); |
| |
| DecrementFreeCountLocked(1); |
| AsanUnpoisonPage(p); |
| checker_.AssertPattern(p); |
| |
| list_add_tail(list, &p->queue_node); |
| } |
| |
| return ZX_OK; |
| } |
| |
| // We could potentially move contents of non-pinned pages out of the way for critical contiguous |
| // allocations, but for now... |
| LTRACEF("couldn't find run\n"); |
| return ZX_ERR_NOT_FOUND; |
| } |
| |
| void PmmNode::FreePageHelperLocked(vm_page* page) { |
| LTRACEF("page %p state %zu paddr %#" PRIxPTR "\n", page, VmPageStateIndex(page->state()), |
| page->paddr()); |
| |
| DEBUG_ASSERT(!page->is_free()); |
| DEBUG_ASSERT(page->state() != vm_page_state::OBJECT || page->object.pin_count == 0); |
| |
| // mark it free |
| page->set_state(vm_page_state::FREE); |
| |
| // Coming from OBJECT or ALLOC, this will only be true if the page was loaned (and may still be |
| // loaned, but doesn't have to be currently loaned if the contiguous VMO the page was loaned from |
| // was deleted during stack ownership). |
| // |
| // Coming from a state other than OBJECT or ALLOC, this currently won't be true, but if it were |
| // true in future, it would only be because a state other than OBJECT or ALLOC has a (future) |
| // field overlapping, in which case we do want to clear the invalid stack owner pointer value. |
| // We'll be ok to clear this invalid stack owner after setting FREE previously (instead of |
| // clearing before) because the stack owner is only read elsewhere for pages with an underlying |
| // contiguous VMO owner (whether actually loaned at the time or not), and pages with an underlying |
| // contiguous VMO owner can only be in FREE, ALLOC, OBJECT states, which all have this field, so |
| // reading an invalid stack owner pointer elsewhere won't happen (there's a magic number canary |
| // just in case though). We could instead clear out any invalid stack owner pointer before |
| // setting FREE above and have a shorter comment here, but there's no actual need for the extra |
| // "if", so we just let this "if" handle it (especially since this whole paragraph is a |
| // hypothetical future since there aren't any overlapping fields yet as of this comment). |
| if (page->object.is_stack_owned()) { |
| // Make FREE visible before lack of stack owner. |
| ktl::atomic_thread_fence(ktl::memory_order_release); |
| page->object.clear_stack_owner(); |
| } |
| |
| if (unlikely(free_fill_enabled_)) { |
| checker_.FillPattern(page); |
| } |
| |
| AsanPoisonPage(page, kAsanPmmFreeMagic); |
| } |
| |
| void PmmNode::FreePage(vm_page* page) { |
| AutoPreemptDisabler preempt_disable; |
| Guard<Mutex> guard{&lock_}; |
| |
| // pages freed individually shouldn't be in a queue |
| DEBUG_ASSERT(!list_in_list(&page->queue_node)); |
| |
| FreePageHelperLocked(page); |
| |
| list_node* which_list = nullptr; |
| if (!page->loaned) { |
| IncrementFreeCountLocked(1); |
| which_list = &free_list_; |
| } else if (!page->loan_cancelled) { |
| IncrementFreeLoanedCountLocked(1); |
| which_list = &free_loaned_list_; |
| } |
| |
| // Add the page to the appropriate free queue, unless loan_cancelled. The loan_cancelled pages |
| // don't go in any free queue because they shouldn't get re-used until reclaimed by their |
| // underlying contiguous VMO or until that underlying contiguous VMO is deleted. |
| DEBUG_ASSERT(which_list || page->loan_cancelled); |
| if (which_list) { |
| if constexpr (!__has_feature(address_sanitizer)) { |
| list_add_head(which_list, &page->queue_node); |
| } else { |
| // If address sanitizer is enabled, put the page at the tail to maximize reuse distance. |
| list_add_tail(which_list, &page->queue_node); |
| } |
| } |
| } |
| |
| void PmmNode::FreeListLocked(list_node* list) { |
| DEBUG_ASSERT(list); |
| |
| // process list backwards so the head is as hot as possible |
| uint64_t count = 0; |
| uint64_t loaned_count = 0; |
| list_node freed_loaned_list = LIST_INITIAL_VALUE(freed_loaned_list); |
| { // scope page |
| vm_page* page = list_peek_tail_type(list, vm_page_t, queue_node); |
| while (page) { |
| FreePageHelperLocked(page); |
| vm_page_t* next_page = list_prev_type(list, &page->queue_node, vm_page_t, queue_node); |
| if (page->loaned) { |
| // Remove from |list| and possibly put on freed_loaned_list instead, to route to the correct |
| // free list, or no free list if loan_cancelled. |
| list_delete(&page->queue_node); |
| if (!page->loan_cancelled) { |
| list_add_head(&freed_loaned_list, &page->queue_node); |
| ++loaned_count; |
| } |
| } else { |
| count++; |
| } |
| page = next_page; |
| } |
| } // end scope page |
| |
| if constexpr (!__has_feature(address_sanitizer)) { |
| // splice list at the head of free_list_; free_loaned_list_. |
| list_splice_after(list, &free_list_); |
| list_splice_after(&freed_loaned_list, &free_loaned_list_); |
| } else { |
| // If address sanitizer is enabled, put the pages at the tail to maximize reuse distance. |
| if (!list_is_empty(&free_list_)) { |
| list_splice_after(list, list_peek_tail(&free_list_)); |
| } else { |
| list_splice_after(list, &free_list_); |
| } |
| if (!list_is_empty(&free_loaned_list_)) { |
| list_splice_after(&freed_loaned_list, list_peek_tail(&free_loaned_list_)); |
| } else { |
| list_splice_after(&freed_loaned_list, &free_loaned_list_); |
| } |
| } |
| |
| IncrementFreeCountLocked(count); |
| IncrementFreeLoanedCountLocked(loaned_count); |
| } |
| |
| void PmmNode::FreeList(list_node* list) { |
| AutoPreemptDisabler preempt_disable; |
| Guard<Mutex> guard{&lock_}; |
| |
| FreeListLocked(list); |
| } |
| |
| bool PmmNode::InOomStateLocked() { |
| if (mem_avail_state_cur_index_ == 0) { |
| return true; |
| } |
| // See pmm_check_alloc_random_should_wait in pmm.cc for an assertion that random should wait is |
| // only enabled if DEBUG_ASSERT_IMPLEMENTED. |
| if constexpr (DEBUG_ASSERT_IMPLEMENTED) { |
| // Randomly try to make 10% of allocations delayed allocations. |
| if (gBootOptions->pmm_alloc_random_should_wait && rand() < (RAND_MAX / 10)) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| uint64_t PmmNode::CountFreePages() const TA_NO_THREAD_SAFETY_ANALYSIS { |
| return free_count_.load(ktl::memory_order_relaxed); |
| } |
| |
| uint64_t PmmNode::CountLoanedFreePages() const TA_NO_THREAD_SAFETY_ANALYSIS { |
| return free_loaned_count_.load(ktl::memory_order_relaxed); |
| } |
| |
| uint64_t PmmNode::CountLoanedNotFreePages() const TA_NO_THREAD_SAFETY_ANALYSIS { |
| AutoPreemptDisabler preempt_disable; |
| Guard<Mutex> guard{&lock_}; |
| return loaned_count_.load(ktl::memory_order_relaxed) - |
| free_loaned_count_.load(ktl::memory_order_relaxed); |
| } |
| |
| uint64_t PmmNode::CountLoanedPages() const TA_NO_THREAD_SAFETY_ANALYSIS { |
| return loaned_count_.load(ktl::memory_order_relaxed); |
| } |
| |
| uint64_t PmmNode::CountLoanCancelledPages() const TA_NO_THREAD_SAFETY_ANALYSIS { |
| return loan_cancelled_count_.load(ktl::memory_order_relaxed); |
| } |
| |
| uint64_t PmmNode::CountTotalBytes() const TA_NO_THREAD_SAFETY_ANALYSIS { |
| return arena_cumulative_size_; |
| } |
| |
| void PmmNode::DumpFree() const TA_NO_THREAD_SAFETY_ANALYSIS { |
| auto megabytes_free = CountFreePages() * PAGE_SIZE / MB; |
| printf(" %zu free MBs\n", megabytes_free); |
| } |
| |
| void PmmNode::Dump(bool is_panic) const { |
| // No lock analysis here, as we want to just go for it in the panic case without the lock. |
| auto dump = [this]() TA_NO_THREAD_SAFETY_ANALYSIS { |
| uint64_t free_count = free_count_.load(ktl::memory_order_relaxed); |
| uint64_t free_loaned_count = free_loaned_count_.load(ktl::memory_order_relaxed); |
| printf( |
| "pmm node %p: free_count %zu (%zu bytes), free_loaned_count: %zu (%zu bytes), total size " |
| "%zu\n", |
| this, free_count, free_count * PAGE_SIZE, free_loaned_count, free_loaned_count * PAGE_SIZE, |
| arena_cumulative_size_); |
| for (auto& a : arena_list_) { |
| a.Dump(false, false); |
| } |
| }; |
| |
| if (is_panic) { |
| dump(); |
| } else { |
| Guard<Mutex> guard{&lock_}; |
| dump(); |
| } |
| } |
| |
| zx_status_t PmmNode::InitReclamation(const uint64_t* watermarks, uint8_t watermark_count, |
| uint64_t debounce, void* context, |
| mem_avail_state_updated_callback_t callback) { |
| if (watermark_count > MAX_WATERMARK_COUNT) { |
| return ZX_ERR_INVALID_ARGS; |
| } |
| |
| AutoPreemptDisabler preempt_disable; |
| Guard<Mutex> guard{&lock_}; |
| |
| uint64_t tmp[MAX_WATERMARK_COUNT]; |
| uint64_t tmp_debounce = fbl::round_up(debounce, static_cast<uint64_t>(PAGE_SIZE)) / PAGE_SIZE; |
| for (uint8_t i = 0; i < watermark_count; i++) { |
| tmp[i] = watermarks[i] / PAGE_SIZE; |
| if (i > 0) { |
| if (tmp[i] <= tmp[i - 1]) { |
| return ZX_ERR_INVALID_ARGS; |
| } |
| } else { |
| if (tmp[i] < tmp_debounce) { |
| return ZX_ERR_INVALID_ARGS; |
| } |
| } |
| } |
| |
| mem_avail_state_watermark_count_ = watermark_count; |
| mem_avail_state_debounce_ = tmp_debounce; |
| mem_avail_state_context_ = context; |
| mem_avail_state_callback_ = callback; |
| memcpy(mem_avail_state_watermarks_, tmp, sizeof(mem_avail_state_watermarks_)); |
| static_assert(sizeof(tmp) == sizeof(mem_avail_state_watermarks_)); |
| |
| UpdateMemAvailStateLocked(); |
| |
| return ZX_OK; |
| } |
| |
| void PmmNode::UpdateMemAvailStateLocked() { |
| // Find the smallest watermark which is greater than the number of free pages. |
| uint8_t target = mem_avail_state_watermark_count_; |
| for (uint8_t i = 0; i < mem_avail_state_watermark_count_; i++) { |
| if (mem_avail_state_watermarks_[i] > free_count_.load(ktl::memory_order_relaxed)) { |
| target = i; |
| break; |
| } |
| } |
| SetMemAvailStateLocked(target); |
| } |
| |
| void PmmNode::SetMemAvailStateLocked(uint8_t mem_avail_state) { |
| mem_avail_state_cur_index_ = mem_avail_state; |
| |
| if (mem_avail_state_cur_index_ == 0) { |
| if (likely(!never_return_should_wait_)) { |
| free_pages_evt_.Unsignal(); |
| } |
| } else { |
| free_pages_evt_.Signal(); |
| } |
| |
| if (mem_avail_state_cur_index_ > 0) { |
| // If there is a smaller watermark, then we transition into that state when the |
| // number of free pages drops more than |mem_avail_state_debounce_| pages into that state. |
| mem_avail_state_lower_bound_ = |
| mem_avail_state_watermarks_[mem_avail_state_cur_index_ - 1] - mem_avail_state_debounce_; |
| } else { |
| // There is no smaller state, so we can't ever transition down. |
| mem_avail_state_lower_bound_ = 0; |
| } |
| |
| if (mem_avail_state_cur_index_ < mem_avail_state_watermark_count_) { |
| // If there is a larger watermark, then we transition out of the current state when |
| // the number of free pages exceedes the current state's watermark by at least |
| // |mem_avail_state_debounce_|. |
| mem_avail_state_upper_bound_ = |
| mem_avail_state_watermarks_[mem_avail_state_cur_index_] + mem_avail_state_debounce_; |
| } else { |
| // There is no larger state, so we can't ever transition up. |
| mem_avail_state_upper_bound_ = UINT64_MAX / PAGE_SIZE; |
| } |
| |
| mem_avail_state_callback_(mem_avail_state_context_, mem_avail_state_cur_index_); |
| } |
| |
| void PmmNode::DumpMemAvailState() const { |
| Guard<Mutex> guard{&lock_}; |
| |
| printf("watermarks: ["); |
| for (unsigned i = 0; i < mem_avail_state_watermark_count_; i++) { |
| printf("%s%s", FormattedBytes(mem_avail_state_watermarks_[i] * PAGE_SIZE).c_str(), |
| i + 1 == mem_avail_state_watermark_count_ ? "]\n" : ", "); |
| } |
| printf("debounce: %s\n", FormattedBytes(mem_avail_state_debounce_ * PAGE_SIZE).c_str()); |
| printf("current state: %u\n", mem_avail_state_cur_index_); |
| printf("current bounds: [%s, %s]\n", |
| FormattedBytes(mem_avail_state_lower_bound_ * PAGE_SIZE).c_str(), |
| FormattedBytes(mem_avail_state_upper_bound_ * PAGE_SIZE).c_str()); |
| printf("free memory: %s\n", FormattedBytes(free_count_ * PAGE_SIZE).c_str()); |
| } |
| |
| uint64_t PmmNode::DebugNumPagesTillMemState(uint8_t mem_state_idx) const { |
| Guard<Mutex> guard{&lock_}; |
| if (mem_avail_state_cur_index_ <= mem_state_idx) { |
| // Already in mem_state_idx, or in a state with less available memory than mem_state_idx. |
| return 0; |
| } |
| // We need to either get free_pages below mem_avail_state_watermarks_[mem_state_idx] or, if we are |
| // in state (mem_state_idx + 1), we also need to clear the debounce amount. For simplicity we just |
| // always allocate the debounce amount as well. |
| uint64_t trigger = mem_avail_state_watermarks_[mem_state_idx] - mem_avail_state_debounce_; |
| return (free_count_ - trigger); |
| } |
| |
| uint8_t PmmNode::DebugMaxMemAvailState() const { |
| Guard<Mutex> guard{&lock_}; |
| return mem_avail_state_watermark_count_; |
| } |
| |
| void PmmNode::DebugMemAvailStateCallback(uint8_t mem_state_idx) const { |
| Guard<Mutex> guard{&lock_}; |
| if (mem_state_idx >= mem_avail_state_watermark_count_) { |
| return; |
| } |
| // Invoke callback for the requested state without allocating additional memory, or messing with |
| // any of the internal memory state tracking counters. |
| mem_avail_state_callback_(mem_avail_state_context_, mem_state_idx); |
| } |
| |
| void PmmNode::StopReturningShouldWait() { |
| Guard<Mutex> guard{&lock_}; |
| never_return_should_wait_ = true; |
| free_pages_evt_.Signal(); |
| } |
| |
| int64_t PmmNode::get_alloc_failed_count() { return pmm_alloc_failed.SumAcrossAllCpus(); } |
| |
| bool PmmNode::has_alloc_failed_no_mem() { |
| return alloc_failed_no_mem.load(ktl::memory_order_relaxed); |
| } |
| |
| void PmmNode::BeginLoan(list_node* page_list) { |
| DEBUG_ASSERT(page_list); |
| AutoPreemptDisabler preempt_disable; |
| Guard<Mutex> guard{&lock_}; |
| |
| uint64_t loaned_count = 0; |
| vm_page* page; |
| list_for_every_entry (page_list, page, vm_page, queue_node) { |
| DEBUG_ASSERT(!page->loaned); |
| DEBUG_ASSERT(!page->is_free()); |
| page->loaned = true; |
| ++loaned_count; |
| DEBUG_ASSERT(!page->loan_cancelled); |
| } |
| IncrementLoanedCountLocked(loaned_count); |
| |
| // Callers of BeginLoan() generally won't want the pages loaned to them; the intent is to loan to |
| // the rest of the system, so go ahead and free also. Some callers will basically choose between |
| // pmm_begin_loan() and pmm_free(). |
| FreeListLocked(page_list); |
| } |
| |
| void PmmNode::CancelLoan(paddr_t address, size_t count) { |
| AutoPreemptDisabler preempt_disable; |
| Guard<Mutex> guard{&lock_}; |
| DEBUG_ASSERT(IS_PAGE_ALIGNED(address)); |
| paddr_t end = address + count * PAGE_SIZE; |
| DEBUG_ASSERT(address <= end); |
| |
| uint64_t loan_cancelled_count = 0; |
| uint64_t no_longer_free_loaned_count = 0; |
| |
| ForPagesInPhysRangeLocked(address, count, |
| [&loan_cancelled_count, &no_longer_free_loaned_count](vm_page_t* page) { |
| // We can assert this because of PageSource's overlapping request |
| // handling. |
| DEBUG_ASSERT(page->is_loaned()); |
| bool was_cancelled = page->loan_cancelled; |
| // We can assert this because of PageSource's overlapping request |
| // handling. |
| DEBUG_ASSERT(!was_cancelled); |
| page->loan_cancelled = true; |
| ++loan_cancelled_count; |
| if (page->is_free()) { |
| // Currently in free_loaned_list_. |
| DEBUG_ASSERT(list_in_list(&page->queue_node)); |
| // Remove from free_loaned_list_ to prevent any new use until |
| // after EndLoan. |
| list_delete(&page->queue_node); |
| no_longer_free_loaned_count++; |
| } |
| }); |
| |
| IncrementLoanCancelledCountLocked(loan_cancelled_count); |
| DecrementFreeLoanedCountLocked(no_longer_free_loaned_count); |
| } |
| |
| void PmmNode::EndLoan(paddr_t address, size_t count, list_node* page_list) { |
| AutoPreemptDisabler preempt_disable; |
| Guard<Mutex> guard{&lock_}; |
| DEBUG_ASSERT(IS_PAGE_ALIGNED(address)); |
| paddr_t end = address + count * PAGE_SIZE; |
| DEBUG_ASSERT(address <= end); |
| |
| uint64_t loan_ended_count = 0; |
| |
| ForPagesInPhysRangeLocked(address, count, [this, &page_list, &loan_ended_count](vm_page_t* page) { |
| AssertHeld(lock_); |
| |
| // PageSource serializing such that there's only one request to PageProvider in flight at a time |
| // for any given page is the main reason we can assert these instead of needing to check these. |
| DEBUG_ASSERT(page->is_loaned()); |
| DEBUG_ASSERT(page->is_loan_cancelled()); |
| DEBUG_ASSERT(page->is_free()); |
| |
| // Already not in free_loaned_list_ (because loan_cancelled already). |
| DEBUG_ASSERT(!list_in_list(&page->queue_node)); |
| |
| page->loaned = false; |
| page->loan_cancelled = false; |
| ++loan_ended_count; |
| |
| AllocPageHelperLocked(page); |
| list_add_tail(page_list, &page->queue_node); |
| }); |
| |
| DecrementLoanCancelledCountLocked(loan_ended_count); |
| DecrementLoanedCountLocked(loan_ended_count); |
| } |
| |
| void PmmNode::DeleteLender(paddr_t address, size_t count) { |
| AutoPreemptDisabler preempt_disable; |
| Guard<Mutex> guard{&lock_}; |
| DEBUG_ASSERT(IS_PAGE_ALIGNED(address)); |
| paddr_t end = address + count * PAGE_SIZE; |
| DEBUG_ASSERT(address <= end); |
| uint64_t removed_free_loaned_count = 0; |
| uint64_t added_free_count = 0; |
| |
| uint64_t loan_ended_count = 0; |
| uint64_t loan_un_cancelled_count = 0; |
| |
| ForPagesInPhysRangeLocked(address, count, |
| [this, &removed_free_loaned_count, &loan_un_cancelled_count, |
| &added_free_count, &loan_ended_count](vm_page_t* page) { |
| DEBUG_ASSERT(page->loaned); |
| if (page->is_free() && !page->loan_cancelled) { |
| // Remove from free_loaned_list_. |
| list_delete(&page->queue_node); |
| ++removed_free_loaned_count; |
| } |
| if (page->loan_cancelled) { |
| ++loan_un_cancelled_count; |
| } |
| if (page->is_free()) { |
| // add it to the free queue |
| if constexpr (!__has_feature(address_sanitizer)) { |
| list_add_head(&free_list_, &page->queue_node); |
| } else { |
| // If address sanitizer is enabled, put the page at the tail to |
| // maximize reuse distance. |
| list_add_tail(&free_list_, &page->queue_node); |
| } |
| added_free_count++; |
| } |
| page->loan_cancelled = false; |
| page->loaned = false; |
| ++loan_ended_count; |
| }); |
| |
| DecrementFreeLoanedCountLocked(removed_free_loaned_count); |
| IncrementFreeCountLocked(added_free_count); |
| DecrementLoanedCountLocked(loan_ended_count); |
| DecrementLoanCancelledCountLocked(loan_un_cancelled_count); |
| } |
| |
| bool PmmNode::IsLoaned(vm_page_t* page) { |
| AutoPreemptDisabler preempt_disable; |
| Guard<Mutex> guard{&lock_}; |
| return page->loaned; |
| } |
| |
| template <typename F> |
| void PmmNode::ForPagesInPhysRangeLocked(paddr_t start, size_t count, F func) { |
| DEBUG_ASSERT(IS_PAGE_ALIGNED(start)); |
| // We only intend ForPagesInRange() to be used after arenas have been added to the global |
| // pmm_node. |
| DEBUG_ASSERT(mp_get_active_mask() != 0); |
| |
| if (unlikely(arena_list_.is_empty())) { |
| // We're in a unit test, using ManagedPmmNode which has no arenas. So fall back to the global |
| // pmm_node (which has at least one arena) to find the actual vm_page_t for each page. |
| // |
| // TODO: Make ManagedPmmNode have a more real arena, possibly by allocating a contiguous VMO and |
| // creating an arena from that. |
| paddr_t end = start + count * PAGE_SIZE; |
| for (paddr_t iter = start; iter < end; iter += PAGE_SIZE) { |
| vm_page_t* page = paddr_to_vm_page(iter); |
| func(page); |
| } |
| return; |
| } |
| |
| // We have at least one arena, so use arena_list_ directly. |
| paddr_t end = start + count * PAGE_SIZE; |
| DEBUG_ASSERT(start <= end); |
| paddr_t page_addr = start; |
| for (auto& a : arena_list_) { |
| for (; page_addr < end && a.address_in_arena(page_addr); page_addr += PAGE_SIZE) { |
| vm_page_t* page = a.FindSpecific(page_addr); |
| DEBUG_ASSERT(page); |
| DEBUG_ASSERT(page_addr == page->paddr()); |
| func(page); |
| } |
| if (page_addr == end) { |
| break; |
| } |
| } |
| DEBUG_ASSERT(page_addr == end); |
| } |
| |
| void PmmNode::ReportAllocFailure() { |
| kcounter_add(pmm_alloc_failed, 1); |
| |
| // Update before signaling the MemoryWatchdog to ensure it observes the update. |
| // |
| // |alloc_failed_no_mem| latches so only need to invoke the callback once. We could call it on |
| // every failure, but that's wasteful and we don't want to spam any underlying Event (or the |
| // thread lock or the MemoryWatchdog). |
| const bool first_time = !alloc_failed_no_mem.exchange(true, ktl::memory_order_relaxed); |
| if (first_time) { |
| // Note, the |cur_state| value passed to the callback doesn't really matter because all we're |
| // trying to do here is signal and unblock the MemoryWatchdog's worker thread. |
| mem_avail_state_callback_(mem_avail_state_context_, mem_avail_state_cur_index_); |
| } |
| } |