blob: 007b4e2147c1d64c366194b346fc4e35054f37e2 [file] [log] [blame] [edit]
// Copyright 2016 The Fuchsia Authors
//
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file or at
// https://opensource.org/licenses/MIT
#include "vm/vm_object_paged.h"
#include <align.h>
#include <assert.h>
#include <inttypes.h>
#include <lib/console.h>
#include <lib/counters.h>
#include <lib/fit/defer.h>
#include <stdlib.h>
#include <string.h>
#include <trace.h>
#include <zircon/compiler.h>
#include <zircon/errors.h>
#include <zircon/types.h>
#include <arch/ops.h>
#include <fbl/alloc_checker.h>
#include <ktl/algorithm.h>
#include <ktl/array.h>
#include <ktl/utility.h>
#include <vm/discardable_vmo_tracker.h>
#include <vm/fault.h>
#include <vm/page_source.h>
#include <vm/physical_page_provider.h>
#include <vm/physmap.h>
#include <vm/vm.h>
#include <vm/vm_address_region.h>
#include <vm/vm_cow_pages.h>
#include "vm_priv.h"
#include <ktl/enforce.h>
#define LOCAL_TRACE VM_GLOBAL_TRACE(0)
namespace {
KCOUNTER(vmo_attribution_queries, "vm.attributed_memory.object.queries")
} // namespace
VmObjectPaged::VmObjectPaged(uint32_t options, fbl::RefPtr<VmCowPages> cow_pages, VmCowRange range)
: VmObject(options | kPaged), cow_pages_(ktl::move(cow_pages)), cow_range_(range) {
LTRACEF("%p\n", this);
}
VmObjectPaged::VmObjectPaged(uint32_t options, fbl::RefPtr<VmCowPages> cow_pages)
: VmObjectPaged(options, ktl::move(cow_pages), VmCowRange(0, UINT64_MAX)) {}
VmObjectPaged::~VmObjectPaged() {
canary_.Assert();
LTRACEF("%p\n", this);
// VmObjectPaged initialize must always complete and is not allowed to fail, as such it should
// always end up in the global list.
DEBUG_ASSERT(InGlobalList());
DestructorHelper();
}
void VmObjectPaged::DestructorHelper() {
RemoveFromGlobalList();
if (options_ & kAlwaysPinned) {
Unpin(0, size());
}
fbl::RefPtr<VmCowPages> deferred;
{
Guard<CriticalMutex> guard{lock()};
// Only clear the backlink if we are not a reference. A reference does not "own" the VmCowPages,
// so in the typical case, the VmCowPages will not have its backlink set to a reference. There
// does exist an edge case where the backlink can be a reference, which is handled by the else
// block below.
if (!is_reference()) {
cow_pages_locked()->set_paged_backlink_locked(nullptr);
} else {
// If this is a reference, we need to remove it from the original (parent) VMO's reference
// list.
VmObjectPaged* root_ref = cow_pages_locked()->get_paged_backlink_locked();
// The VmCowPages will have a valid backlink, either to the original VmObjectPaged or a
// reference VmObjectPaged, as long as there is a reference that is alive. We know that this
// is a reference.
DEBUG_ASSERT(root_ref);
if (likely(root_ref != this)) {
AssertHeld(root_ref->lock_ref());
VmObjectPaged* removed = root_ref->reference_list_.erase(*this);
DEBUG_ASSERT(removed == this);
} else {
// It is possible for the backlink to point to |this| if the original parent went away at
// some point and the rest of the reference list had to be re-homed to |this|, and the
// backlink set to |this|. The VmCowPages was pointing to us, so clear the backlink. The
// backlink will get reset below if other references remain.
cow_pages_locked()->set_paged_backlink_locked(nullptr);
}
}
// If this VMO had references, pick one of the references as the paged backlink from the shared
// VmCowPages. Also, move the remainder of the reference list to the chosen reference. Note that
// we're only moving the reference list over without adding the references to the children list;
// we do not want these references to be counted as children of the chosen VMO. We simply want a
// safe way to propagate mapping updates and VmCowPages changes on hidden node addition.
if (!reference_list_.is_empty()) {
// We should only be attempting to reset the backlink if the owner is going away and has reset
// the backlink above.
DEBUG_ASSERT(cow_pages_locked()->get_paged_backlink_locked() == nullptr);
VmObjectPaged* paged_backlink = reference_list_.pop_front();
cow_pages_locked()->set_paged_backlink_locked(paged_backlink);
AssertHeld(paged_backlink->lock_ref());
paged_backlink->reference_list_.splice(paged_backlink->reference_list_.end(),
reference_list_);
}
DEBUG_ASSERT(reference_list_.is_empty());
deferred = cow_pages_;
}
while (deferred) {
deferred = deferred->MaybeDeadTransition();
}
fbl::RefPtr<VmObjectPaged> maybe_parent;
// Re-home all our children with any parent that we have.
{
Guard<CriticalMutex> child_guard{ChildListLock::Get()};
while (!children_list_.is_empty()) {
VmObject* c = &children_list_.front();
children_list_.pop_front();
VmObjectPaged* child = reinterpret_cast<VmObjectPaged*>(c);
child->parent_ = parent_;
if (parent_) {
// Ignore the return since 'this' is a child so we know we are not transitioning from 0->1
// children.
[[maybe_unused]] bool notify = parent_->AddChildLocked(child);
DEBUG_ASSERT(!notify);
}
}
if (parent_) {
// As parent_ is a raw pointer we must ensure that if we call a method on it that it lives
// long enough. To do so we attempt to upgrade it to a refptr, which could fail if it's
// already slated for deletion.
maybe_parent = fbl::MakeRefPtrUpgradeFromRaw(parent_, child_guard);
if (maybe_parent) {
// Holding refptr, can safely pass in the guard to RemoveChild.
parent_->RemoveChild(this, child_guard.take());
} else {
// parent is up for deletion and so there's no need to use RemoveChild since there is no
// user dispatcher to notify anyway and so just drop ourselves to keep the hierarchy
// correct.
parent_->DropChildLocked(this);
}
}
}
if (maybe_parent) {
// As we constructed a RefPtr to our parent, and we are in our own destructor, there is now
// the potential for recursive destruction if we need to delete the parent due to holding the
// last ref, hit this same path, etc.
VmDeferredDeleter<VmObjectPaged>::DoDeferredDelete(ktl::move(maybe_parent));
}
}
zx_status_t VmObjectPaged::HintRange(uint64_t offset, uint64_t len, EvictionHint hint) {
canary_.Assert();
if (can_block_on_page_requests() && hint == EvictionHint::AlwaysNeed) {
lockdep::AssertNoLocksHeld();
}
// Ignore hints for non user-pager-backed VMOs. We choose to silently ignore hints for
// incompatible combinations instead of failing. This is because the kernel does not make any
// explicit guarantees on hints; since they are just hints, the kernel is always free to ignore
// them.
if (!cow_pages_->can_root_source_evict()) {
return ZX_OK;
}
auto cow_range = GetCowRange(offset, len);
if (!cow_range) {
return ZX_ERR_OUT_OF_RANGE;
}
switch (hint) {
case EvictionHint::DontNeed: {
return cow_pages_->PromoteRangeForReclamation(*cow_range);
}
case EvictionHint::AlwaysNeed: {
// Hints are best effort, so ignore any errors in the paging in process.
return cow_pages_->ProtectRangeFromReclamation(*cow_range, /*set_always_need=*/true,
/*ignore_errors=*/true);
}
}
return ZX_OK;
}
zx_status_t VmObjectPaged::PrefetchRange(uint64_t offset, uint64_t len) {
canary_.Assert();
if (can_block_on_page_requests()) {
lockdep::AssertNoLocksHeld();
}
// Round offset and len to be page aligned. Use a sub-scope to validate that temporary end
// calculations cannot be accidentally used later on.
{
uint64_t end;
if (add_overflow(offset, len, &end)) {
return ZX_ERR_OUT_OF_RANGE;
}
const uint64_t end_page = ROUNDUP_PAGE_SIZE(end);
if (end_page < end) {
return ZX_ERR_OUT_OF_RANGE;
}
DEBUG_ASSERT(end_page >= offset);
offset = ROUNDDOWN_PAGE_SIZE(offset);
len = end_page - offset;
}
auto cow_range = GetCowRange(offset, len);
if (!cow_range) {
return ZX_ERR_OUT_OF_RANGE;
}
// Cannot overflow otherwise IsBoundedBy would have failed.
DEBUG_ASSERT(cow_range->is_page_aligned());
if (cow_pages_->is_root_source_user_pager_backed()) {
return cow_pages_->ProtectRangeFromReclamation(*cow_range,
/*set_always_need=*/false,
/*ignore_errors=*/false);
}
// Committing high priority pages is best effort, so ignore any errors from decompressing.
return cow_pages_->DecompressInRange(*cow_range);
}
void VmObjectPaged::CommitHighPriorityPages(uint64_t offset, uint64_t len) {
{
Guard<CriticalMutex> guard{lock()};
if (!cow_pages_locked()->is_high_memory_priority_locked()) {
return;
}
}
// Ignore the result of the prefetch, high priority commit is best effort.
PrefetchRange(offset, len);
}
bool VmObjectPaged::CanDedupZeroPagesLocked() {
canary_.Assert();
// Skip uncached VMOs as we cannot efficiently scan them.
if ((self_locked()->GetMappingCachePolicyLocked() & ZX_CACHE_POLICY_MASK) !=
ZX_CACHE_POLICY_CACHED) {
return false;
}
// Okay to dedup from this VMO.
return true;
}
zx_status_t VmObjectPaged::CreateCommon(uint32_t pmm_alloc_flags, uint32_t options, uint64_t size,
fbl::RefPtr<VmObjectPaged>* obj) {
DEBUG_ASSERT(!(options & (kContiguous | kCanBlockOnPageRequests)));
// Cannot be resizable and pinned, otherwise we will lose track of the pinned range.
if ((options & kResizable) && (options & kAlwaysPinned)) {
return ZX_ERR_INVALID_ARGS;
}
if (pmm_alloc_flags & PMM_ALLOC_FLAG_CAN_WAIT) {
options |= kCanBlockOnPageRequests;
}
// make sure size is page aligned
if (!IS_PAGE_ROUNDED(size)) {
return ZX_ERR_INVALID_ARGS;
}
if (size > MAX_SIZE) {
return ZX_ERR_OUT_OF_RANGE;
}
fbl::AllocChecker ac;
ktl::unique_ptr<DiscardableVmoTracker> discardable = nullptr;
if (options & kDiscardable) {
discardable = ktl::make_unique<DiscardableVmoTracker>(&ac);
if (!ac.check()) {
return ZX_ERR_NO_MEMORY;
}
}
// This function isn't used to create slices or pager-backed VMOs, so VmCowPageOptions can be
// kNone.
fbl::RefPtr<VmCowPages> cow_pages;
zx_status_t status = VmCowPages::Create(VmCowPagesOptions::kNone, pmm_alloc_flags, size,
ktl::move(discardable), &cow_pages);
if (status != ZX_OK) {
return status;
}
// If this VMO will always be pinned, allocate and pin the pages in the VmCowPages prior to
// creating the VmObjectPaged. This ensures the VmObjectPaged destructor can assume that the pages
// are committed and pinned.
if (options & kAlwaysPinned) {
list_node_t prealloc_pages;
list_initialize(&prealloc_pages);
status = pmm_alloc_pages(size / PAGE_SIZE, pmm_alloc_flags, &prealloc_pages);
if (status != ZX_OK) {
return status;
}
Guard<CriticalMutex> guard{cow_pages->lock()};
// Add all the preallocated pages to the object, this takes ownership of all pages regardless
// of the outcome. This is a new VMO, but this call could fail due to OOM.
status = cow_pages->AddNewPagesLocked(0, &prealloc_pages, VmCowPages::CanOverwriteContent::Zero,
true, nullptr);
if (status != ZX_OK) {
return status;
}
// With all the pages in place, pin them.
status = cow_pages->PinRangeLocked(VmCowRange(0, size));
ASSERT(status == ZX_OK);
}
auto vmo = fbl::AdoptRef<VmObjectPaged>(new (&ac) VmObjectPaged(options, ktl::move(cow_pages)));
if (!ac.check()) {
if (options & kAlwaysPinned) {
Guard<CriticalMutex> guard{cow_pages->lock()};
cow_pages->UnpinLocked(VmCowRange(0, size), nullptr);
}
return ZX_ERR_NO_MEMORY;
}
// This creation has succeeded. Must wire up the cow pages and *then* place in the globals list.
{
Guard<CriticalMutex> guard{vmo->lock()};
vmo->cow_pages_locked()->set_paged_backlink_locked(vmo.get());
vmo->cow_pages_locked()->TransitionToAliveLocked();
}
vmo->AddToGlobalList();
*obj = ktl::move(vmo);
return ZX_OK;
}
zx_status_t VmObjectPaged::Create(uint32_t pmm_alloc_flags, uint32_t options, uint64_t size,
fbl::RefPtr<VmObjectPaged>* obj) {
if (options & (kContiguous | kCanBlockOnPageRequests)) {
// Force callers to use CreateContiguous() instead.
return ZX_ERR_INVALID_ARGS;
}
return CreateCommon(pmm_alloc_flags, options, size, obj);
}
zx_status_t VmObjectPaged::CreateContiguous(uint32_t pmm_alloc_flags, uint64_t size,
uint8_t alignment_log2,
fbl::RefPtr<VmObjectPaged>* obj) {
DEBUG_ASSERT(alignment_log2 < sizeof(uint64_t) * 8);
// make sure size is page aligned
if (!IS_PAGE_ROUNDED(size)) {
return ZX_ERR_INVALID_ARGS;
}
if (size > MAX_SIZE) {
return ZX_ERR_OUT_OF_RANGE;
}
fbl::AllocChecker ac;
// For contiguous VMOs, we need a PhysicalPageProvider to reclaim specific loaned physical pages
// on commit.
auto page_provider = fbl::AdoptRef(new (&ac) PhysicalPageProvider(size));
if (!ac.check()) {
return ZX_ERR_NO_MEMORY;
}
PhysicalPageProvider* physical_page_provider_ptr = page_provider.get();
fbl::RefPtr<PageSource> page_source =
fbl::AdoptRef(new (&ac) PageSource(ktl::move(page_provider)));
if (!ac.check()) {
return ZX_ERR_NO_MEMORY;
}
auto* page_source_ptr = page_source.get();
fbl::RefPtr<VmObjectPaged> vmo;
zx_status_t status =
CreateWithSourceCommon(page_source, pmm_alloc_flags, kContiguous, size, &vmo);
if (status != ZX_OK) {
// Ensure to close the page source we created, as it will not get closed by the VmCowPages since
// that creation failed.
page_source->Close();
return status;
}
if (size == 0) {
*obj = ktl::move(vmo);
return ZX_OK;
}
// allocate the pages
list_node page_list;
list_initialize(&page_list);
size_t num_pages = size / PAGE_SIZE;
paddr_t pa;
status = pmm_alloc_contiguous(num_pages, pmm_alloc_flags, alignment_log2, &pa, &page_list);
if (status != ZX_OK) {
LTRACEF("failed to allocate enough pages (asked for %zu)\n", num_pages);
return ZX_ERR_NO_MEMORY;
}
Guard<CriticalMutex> guard{vmo->lock()};
// Add them to the appropriate range of the object, this takes ownership of all the pages
// regardless of outcome.
// This is a newly created VMO with a page source, so we don't expect to be overwriting anything
// in its page list.
status = vmo->cow_pages_locked()->AddNewPagesLocked(
0, &page_list, VmCowPages::CanOverwriteContent::None, true, nullptr);
if (status != ZX_OK) {
return status;
}
physical_page_provider_ptr->Init(vmo->cow_pages_locked(), page_source_ptr, pa);
*obj = ktl::move(vmo);
return ZX_OK;
}
zx_status_t VmObjectPaged::CreateFromWiredPages(const void* data, size_t size, bool exclusive,
fbl::RefPtr<VmObjectPaged>* obj) {
LTRACEF("data %p, size %zu\n", data, size);
fbl::RefPtr<VmObjectPaged> vmo;
zx_status_t status = CreateCommon(PMM_ALLOC_FLAG_ANY, 0, size, &vmo);
if (status != ZX_OK) {
return status;
}
if (size > 0) {
ASSERT(IS_PAGE_ROUNDED(size));
ASSERT(IS_PAGE_ROUNDED(reinterpret_cast<uintptr_t>(data)));
// Do a direct lookup of the physical pages backing the range of
// the kernel that these addresses belong to and jam them directly
// into the VMO.
//
// NOTE: This relies on the kernel not otherwise owning the pages.
// If the setup of the kernel's address space changes so that the
// pages are attached to a kernel VMO, this will need to change.
paddr_t start_paddr = vaddr_to_paddr(data);
ASSERT(start_paddr != 0);
Guard<CriticalMutex> guard{vmo->lock()};
for (size_t count = 0; count < size / PAGE_SIZE; count++) {
paddr_t pa = start_paddr + count * PAGE_SIZE;
vm_page_t* page = paddr_to_vm_page(pa);
ASSERT(page);
if (page->state() == vm_page_state::WIRED) {
pmm_unwire_page(page);
} else {
// This function is only valid for memory in the boot image,
// which should all be wired.
panic("page used to back static vmo in unusable state: paddr %#" PRIxPTR " state %zu\n", pa,
VmPageStateIndex(page->state()));
}
// This is a newly created anonymous VMO, so we expect to be overwriting zeros. A newly
// created anonymous VMO with no committed pages has all its content implicitly zero.
status = vmo->cow_pages_locked()->AddNewPageLocked(
count * PAGE_SIZE, page, VmCowPages::CanOverwriteContent::Zero, nullptr, false, nullptr);
ASSERT_MSG(status == ZX_OK,
"AddNewPageLocked failed on page %zu of %zu at %#" PRIx64 " from [%#" PRIx64
", %#" PRIx64 ")",
count, size / PAGE_SIZE, pa, start_paddr, start_paddr + size);
DEBUG_ASSERT(!page->is_loaned());
}
if (exclusive && !is_physmap_addr(data)) {
// unmap it from the kernel
// NOTE: this means the image can no longer be referenced from original pointer
status = VmAspace::kernel_aspace()->arch_aspace().Unmap(
reinterpret_cast<vaddr_t>(data), size / PAGE_SIZE,
ArchVmAspaceInterface::ArchUnmapOptions::None);
ASSERT(status == ZX_OK);
}
if (!exclusive) {
// Pin all the pages as we must never decommit any of them since they are shared elsewhere.
ASSERT(vmo->cow_range_.offset == 0);
status = vmo->cow_pages_locked()->PinRangeLocked(VmCowRange(0, size));
ASSERT(status == ZX_OK);
}
}
*obj = ktl::move(vmo);
return ZX_OK;
}
zx_status_t VmObjectPaged::CreateExternal(fbl::RefPtr<PageSource> src, uint32_t options,
uint64_t size, fbl::RefPtr<VmObjectPaged>* obj) {
if (options & (kDiscardable | kCanBlockOnPageRequests | kAlwaysPinned)) {
return ZX_ERR_INVALID_ARGS;
}
// make sure size is page aligned
if (!IS_PAGE_ROUNDED(size)) {
return ZX_ERR_INVALID_ARGS;
}
if (size > MAX_SIZE) {
return ZX_ERR_OUT_OF_RANGE;
}
// External VMOs always support delayed PMM allocations, since they already have to tolerate
// arbitrary waits for pages due to the PageSource.
return CreateWithSourceCommon(ktl::move(src), PMM_ALLOC_FLAG_ANY | PMM_ALLOC_FLAG_CAN_WAIT,
options | kCanBlockOnPageRequests, size, obj);
}
zx_status_t VmObjectPaged::CreateWithSourceCommon(fbl::RefPtr<PageSource> src,
uint32_t pmm_alloc_flags, uint32_t options,
uint64_t size, fbl::RefPtr<VmObjectPaged>* obj) {
// Caller must check that size is page aligned.
DEBUG_ASSERT(IS_PAGE_ROUNDED(size));
DEBUG_ASSERT(!(options & kAlwaysPinned));
fbl::AllocChecker ac;
// The cow pages will have a page source, so blocking is always possible.
options |= kCanBlockOnPageRequests;
VmCowPagesOptions cow_options = VmCowPagesOptions::kNone;
cow_options |= VmCowPagesOptions::kPageSourceRoot;
if (options & kContiguous) {
cow_options |= VmCowPagesOptions::kCannotDecommitZeroPages;
}
if (src->properties().is_user_pager) {
cow_options |= VmCowPagesOptions::kUserPagerBackedRoot;
}
if (src->properties().is_preserving_page_content) {
cow_options |= VmCowPagesOptions::kPreservingPageContentRoot;
}
fbl::RefPtr<VmCowPages> cow_pages;
zx_status_t status = VmCowPages::CreateExternal(ktl::move(src), cow_options, size, &cow_pages);
if (status != ZX_OK) {
return status;
}
auto vmo = fbl::AdoptRef<VmObjectPaged>(new (&ac) VmObjectPaged(options, ktl::move(cow_pages)));
if (!ac.check()) {
return ZX_ERR_NO_MEMORY;
}
// This creation has succeeded. Must wire up the cow pages and *then* place in the globals list.
{
Guard<CriticalMutex> guard{vmo->lock()};
vmo->cow_pages_locked()->set_paged_backlink_locked(vmo.get());
vmo->cow_pages_locked()->TransitionToAliveLocked();
}
vmo->AddToGlobalList();
*obj = ktl::move(vmo);
return ZX_OK;
}
zx_status_t VmObjectPaged::CreateChildSlice(uint64_t offset, uint64_t size, bool copy_name,
fbl::RefPtr<VmObject>* child_vmo) {
LTRACEF("vmo %p offset %#" PRIx64 " size %#" PRIx64 "\n", this, offset, size);
canary_.Assert();
// Offset must be page aligned.
if (!IS_PAGE_ROUNDED(offset)) {
return ZX_ERR_INVALID_ARGS;
}
// Make sure size is page aligned.
if (!IS_PAGE_ROUNDED(size)) {
return ZX_ERR_INVALID_ARGS;
}
if (size > MAX_SIZE) {
return ZX_ERR_OUT_OF_RANGE;
}
// Slice must be wholly contained. |size()| will read the size holding the lock. This extra
// acquisition is correct as we must drop the lock in order to perform the allocations.
VmCowRange range;
{
Guard<CriticalMutex> guard{lock()};
auto cow_range = GetCowRangeSizeCheckLocked(offset, size);
if (!cow_range) {
return ZX_ERR_INVALID_ARGS;
}
range = *cow_range;
}
// Forbid creating children of resizable VMOs. This restriction may be lifted in the future.
if (is_resizable()) {
return ZX_ERR_NOT_SUPPORTED;
}
uint32_t options = kSlice;
if (is_contiguous()) {
options |= kContiguous;
}
// If this VMO is contiguous then we allow creating an uncached slice. When zeroing pages that
// are reclaimed from having been loaned from a contiguous VMO, we will zero the pages and flush
// the zeroes to RAM.
const bool allow_uncached = is_contiguous();
return CreateChildReferenceCommon(options, range, allow_uncached, copy_name, nullptr, child_vmo);
}
zx_status_t VmObjectPaged::CreateChildReference(Resizability resizable, uint64_t offset,
uint64_t size, bool copy_name, bool* first_child,
fbl::RefPtr<VmObject>* child_vmo) {
LTRACEF("vmo %p offset %#" PRIx64 " size %#" PRIx64 "\n", this, offset, size);
canary_.Assert();
// A reference spans the entirety of the parent. The specified range has no meaning, require it
// to be zero.
if (offset != 0 || size != 0) {
return ZX_ERR_INVALID_ARGS;
}
if (is_slice()) {
return ZX_ERR_NOT_SUPPORTED;
}
ASSERT(cow_range_.offset == 0);
// Not supported for contiguous VMOs. Can use slices instead as contiguous VMOs are non-resizable
// and support slices.
if (is_contiguous()) {
return ZX_ERR_NOT_SUPPORTED;
}
if (resizable == Resizability::Resizable) {
// Cannot create a resizable reference from a non-resizable VMO.
if (!is_resizable()) {
return ZX_ERR_NOT_SUPPORTED;
}
}
uint32_t options = 0;
// Reference inherits resizability from parent.
if (is_resizable()) {
options |= kResizable;
}
return CreateChildReferenceCommon(options, VmCowRange(0, UINT64_MAX), false, copy_name,
first_child, child_vmo);
}
zx_status_t VmObjectPaged::CreateChildReferenceCommon(uint32_t options, VmCowRange range,
bool allow_uncached, bool copy_name,
bool* first_child,
fbl::RefPtr<VmObject>* child_vmo) {
canary_.Assert();
options |= kReference;
if (can_block_on_page_requests()) {
options |= kCanBlockOnPageRequests;
}
// Reference shares the same VmCowPages as the parent.
fbl::RefPtr<VmObjectPaged> vmo;
{
Guard<CriticalMutex> guard{lock()};
// We know that we are not contiguous so we should not be uncached either.
if (self_locked()->GetMappingCachePolicyLocked() != ARCH_MMU_FLAG_CACHED && !allow_uncached) {
return ZX_ERR_BAD_STATE;
}
// Once all fallible checks are performed, construct the VmObjectPaged.
fbl::AllocChecker ac;
vmo = fbl::AdoptRef<VmObjectPaged>(new (&ac) VmObjectPaged(options, cow_pages_, range));
if (!ac.check()) {
return ZX_ERR_NO_MEMORY;
}
AssertHeld(vmo->lock_ref());
// There's no way good way to convince the static analysis that the vmo->lock() that we hold is
// also the VmObject::lock() in vmo and so we disable analysis to set the cache_policy_;
[&vmo, cache_policy = GetMappingCachePolicyLocked()]() TA_REQ(vmo->lock())
TA_NO_THREAD_SAFETY_ANALYSIS { vmo->cache_policy_ = cache_policy; }();
{
Guard<CriticalMutex> child_guard{ChildListLock::Get()};
vmo->parent_ = this;
const bool first = AddChildLocked(vmo.get());
if (first_child) {
*first_child = first;
}
}
// Also insert into the reference list. The reference should only be inserted in the list of the
// object that the cow_pages_locked() has the backlink to, i.e. the notional "owner" of the
// VmCowPages.
// As a consequence of this, in the case of nested references, the reference relationship can
// look different from the parent->child relationship, which instead mirrors the child creation
// calls as specified by the user (this is true for all child types).
VmObjectPaged* paged_owner = cow_pages_locked()->get_paged_backlink_locked();
// The VmCowPages we point to should have a valid backlink, either to us or to our parent (if we
// are a reference).
DEBUG_ASSERT(paged_owner);
// If this object is not a reference, the |paged_owner| we computed should be the same as
// |this|.
DEBUG_ASSERT(is_reference() || paged_owner == this);
AssertHeld(paged_owner->lock_ref());
paged_owner->reference_list_.push_back(vmo.get());
if (copy_name) {
vmo->name_ = name_;
}
}
// Add to the global list now that fully initialized.
vmo->AddToGlobalList();
*child_vmo = ktl::move(vmo);
return ZX_OK;
}
zx_status_t VmObjectPaged::CreateClone(Resizability resizable, SnapshotType type, uint64_t offset,
uint64_t size, bool copy_name,
fbl::RefPtr<VmObject>* child_vmo) {
LTRACEF("vmo %p offset %#" PRIx64 " size %#" PRIx64 "\n", this, offset, size);
canary_.Assert();
// Copy-on-write clones of contiguous VMOs do not have meaningful semantics, so forbid them.
if (is_contiguous()) {
return ZX_ERR_INVALID_ARGS;
}
// offset must be page aligned
if (!IS_PAGE_ROUNDED(offset)) {
return ZX_ERR_INVALID_ARGS;
}
// size must be page aligned and not too large.
if (!IS_PAGE_ROUNDED(size)) {
return ZX_ERR_INVALID_ARGS;
}
if (size > MAX_SIZE) {
return ZX_ERR_OUT_OF_RANGE;
}
auto cow_range = GetCowRange(offset, size);
if (!cow_range) {
return ZX_ERR_OUT_OF_RANGE;
}
fbl::RefPtr<VmObjectPaged> vmo;
{
VmCowPages::DeferredOps deferred(cow_pages_.get());
Guard<CriticalMutex> guard{lock()};
// check that we're not uncached in some way
if (self_locked()->GetMappingCachePolicyLocked() != ARCH_MMU_FLAG_CACHED) {
return ZX_ERR_BAD_STATE;
}
// If we are a slice we require a unidirection clone, as performing a bi-directional clone
// through a slice does not yet have defined semantics.
const bool require_unidirection = is_slice();
auto result =
cow_pages_locked()->CreateCloneLocked(type, require_unidirection, *cow_range, deferred);
if (result.is_error()) {
return result.error_value();
}
uint32_t options = 0;
if (resizable == Resizability::Resizable) {
options |= kResizable;
}
if (can_block_on_page_requests()) {
options |= kCanBlockOnPageRequests;
}
fbl::AllocChecker ac;
auto [child, child_lock] = (*result).take();
vmo = fbl::AdoptRef<VmObjectPaged>(new (&ac) VmObjectPaged(options, ktl::move(child)));
if (!ac.check()) {
return ZX_ERR_NO_MEMORY;
}
Guard<CriticalMutex> child_guard{AdoptLock, vmo->lock(), ktl::move(child_lock)};
DEBUG_ASSERT(vmo->self_locked()->GetMappingCachePolicyLocked() == ARCH_MMU_FLAG_CACHED);
// Now that everything has succeeded we can wire up cow pages references. VMO will be placed in
// the global list later once lock has been dropped.
vmo->cow_pages_locked()->set_paged_backlink_locked(vmo.get());
vmo->cow_pages_locked()->TransitionToAliveLocked();
// Install the parent.
{
Guard<CriticalMutex> list_guard{ChildListLock::Get()};
vmo->parent_ = this;
// add the new vmo as a child before we do anything, since its
// dtor expects to find it in its parent's child list
AddChildLocked(vmo.get());
}
if (copy_name) {
vmo->name_ = name_;
}
}
// Add to the global list now that fully initialized.
vmo->AddToGlobalList();
*child_vmo = ktl::move(vmo);
return ZX_OK;
}
void VmObjectPaged::DumpLocked(uint depth, bool verbose) const {
canary_.Assert();
uint64_t parent_id = 0;
// Cache the parent value as a void* as it's not safe to dereference once the ChildListLock is
// dropped, but we can still print out its value.
void* parent;
{
Guard<CriticalMutex> guard{ChildListLock::Get()};
parent = parent_;
if (parent_) {
parent_id = parent_->user_id();
}
}
for (uint i = 0; i < depth; ++i) {
printf(" ");
}
printf("vmo %p/k%" PRIu64 " ref %d parent %p/k%" PRIu64 "\n", this, user_id_.load(),
ref_count_debug(), parent, parent_id);
char name[ZX_MAX_NAME_LEN];
get_name(name, sizeof(name));
if (strlen(name) > 0) {
for (uint i = 0; i < depth + 1; ++i) {
printf(" ");
}
printf("name %s\n", name);
}
cow_pages_locked()->DumpLocked(depth, verbose);
}
VmObject::AttributionCounts VmObjectPaged::GetAttributedMemoryInRangeLocked(
uint64_t offset_bytes, uint64_t len_bytes) const {
vmo_attribution_queries.Add(1);
// A reference never has memory attributed to it. It points to the parent's VmCowPages, and we
// need to hold the invariant that we don't double-count attributed memory.
//
// TODO(https://fxbug.dev/42069078): Consider attributing memory to the current VmCowPages
// backlink for the case where the parent has gone away.
if (is_reference()) {
return AttributionCounts{};
}
ASSERT(cow_range_.offset == 0);
uint64_t new_len_bytes;
if (!TrimRange(offset_bytes, len_bytes, size_locked(), &new_len_bytes)) {
return AttributionCounts{};
}
auto cow_range = GetCowRange(offset_bytes, new_len_bytes);
return cow_pages_locked()->GetAttributedMemoryInRangeLocked(*cow_range);
}
zx_status_t VmObjectPaged::CommitRangeInternal(uint64_t offset, uint64_t len, bool pin,
bool write) {
canary_.Assert();
LTRACEF("offset %#" PRIx64 ", len %#" PRIx64 "\n", offset, len);
if (can_block_on_page_requests()) {
lockdep::AssertNoLocksHeld();
}
// We only expect write to be set if this a pin. All non-pin commits are reads.
DEBUG_ASSERT(!write || pin);
// Child slices of VMOs are currently not resizable, nor can they be made
// from resizable parents. If this ever changes, the logic surrounding what
// to do if a VMO gets resized during a Commit or Pin operation will need to
// be revisited. Right now, we can just rely on the fact that the initial
// vetting/trimming of the offset and length of the operation will never
// change if the operation is being executed against a child slice.
DEBUG_ASSERT(!is_resizable() || !is_slice());
// Round offset and len to be page aligned. Use a sub-scope to validate that temporary end
// calculations cannot be accidentally used later on.
{
uint64_t end;
if (add_overflow(offset, len, &end)) {
return ZX_ERR_OUT_OF_RANGE;
}
const uint64_t end_page = ROUNDUP_PAGE_SIZE(end);
if (end_page < end) {
return ZX_ERR_OUT_OF_RANGE;
}
DEBUG_ASSERT(end_page >= offset);
offset = ROUNDDOWN_PAGE_SIZE(offset);
len = end_page - offset;
}
// Although the length, for the non-pin ranges, is allowed to end up outside the VMO range during
// the operation, at least initially it must be within range.
{
Guard<CriticalMutex> guard{lock()};
if (unlikely(!InRange(offset, len, size_locked()))) {
return ZX_ERR_OUT_OF_RANGE;
}
}
if (len == 0) {
// If pinning we explicitly forbid zero length pins as we cannot guarantee consistent semantics.
// For example pinning a zero length range outside the range of the VMO is an error, and so
// pinning a zero length range inside the vmo and then resizing the VMO smaller than the pin
// region should also be an error. To enforce this without having to have new metadata to track
// zero length pin regions is to just forbid them. Note that the user entry points for pinning
// already forbid zero length ranges.
return pin ? ZX_ERR_INVALID_ARGS : ZX_OK;
}
// Tracks the end of the pinned range to unpin in case of failure. The |offset| might lag behind
// the pinned range, as it tracks the range that has been completely processed, which would
// also include dirtying the page after pinning in case of a write.
uint64_t pinned_end_offset = offset;
// Should any errors occur we need to unpin everything. If we were asked to write, we need to mark
// the VMO modified if any pages were committed.
auto deferred_cleanup =
fit::defer([this, pinned_start_offset = offset, &pinned_end_offset, &len, &write]() {
// If we were not able to pin the entire range, i.e. len is not 0, we need to unpin
// everything. Regardless of any resizes or other things that may have happened any pinned
// pages *must* still be within a valid range, and so we know Unpin should succeed. The edge
// case is if we had failed to pin *any* pages and so our original offset may be outside the
// current range of the vmo. Additionally, as pinning a zero length range is invalid, so is
// unpinning, and so we must avoid.
if (pinned_end_offset > pinned_start_offset) {
if (len > 0) {
auto cow_range =
GetCowRange(pinned_start_offset, pinned_end_offset - pinned_start_offset);
Guard<CriticalMutex> guard{AssertOrderedLock, lock(), cow_pages_->lock_order()};
cow_pages_locked()->UnpinLocked(*cow_range, nullptr);
} else if (write) {
Guard<CriticalMutex> guard{AssertOrderedLock, lock(), cow_pages_->lock_order()};
mark_modified_locked();
}
}
});
__UNINITIALIZED MultiPageRequest page_request;
// As we may need to wait on arbitrary page requests we just keep running this as long as there is
// a non-zero range to process.
uint64_t to_dirty_len = 0;
while (len > 0) {
zx_status_t status = ZX_OK;
;
uint64_t committed_len = 0;
if (to_dirty_len > 0) {
Guard<CriticalMutex> guard{AssertOrderedLock, lock(), cow_pages_->lock_order()};
// The to_dirty_len *must* be within range, even though we just grabbed the lock and a resize
// could have happened, since the dirtied range is pinned. As such, any resize could not have
// removed the in progress dirty range.
DEBUG_ASSERT(InRange(offset, to_dirty_len, size_locked()));
uint64_t dirty_len = 0;
status = cow_pages_locked()->PrepareForWriteLocked(
*GetCowRange(offset, to_dirty_len), page_request.GetLazyDirtyRequest(), &dirty_len);
DEBUG_ASSERT(dirty_len <= to_dirty_len);
if (status == ZX_ERR_SHOULD_WAIT) {
page_request.MadeDirtyRequest();
}
// Account for the pages that were dirtied during this attempt.
to_dirty_len -= dirty_len;
committed_len = dirty_len;
} else {
__UNINITIALIZED VmCowPages::DeferredOps deferred(cow_pages_.get());
Guard<CriticalMutex> guard{AssertOrderedLock, lock(), cow_pages_->lock_order()};
uint64_t new_len = len;
if (!TrimRange(offset, len, size_locked(), &new_len)) {
return pin ? ZX_ERR_OUT_OF_RANGE : ZX_OK;
}
if (new_len != len) {
if (pin) {
return ZX_ERR_OUT_OF_RANGE;
}
len = new_len;
if (len == 0) {
break;
}
}
status = cow_pages_locked()->CommitRangeLocked(*GetCowRange(offset, len), deferred,
&committed_len, &page_request);
DEBUG_ASSERT(committed_len <= len);
// If we're required to pin, try to pin the committed range before waiting on the
// page_request, which has been populated to request pages beyond the committed range. Even
// though the page_request has already been initialized, we choose to first completely process
// the committed range, which could end up canceling the already initialized page request.
// This allows us to keep making forward progress as we will potentially pin a few pages
// before trying to fault in further pages, thereby preventing the already committed (and
// pinned) pages from being evicted while we wait with the lock dropped.
if (pin && committed_len > 0) {
uint64_t non_loaned_len = 0;
if (cow_pages_locked()->can_borrow() &&
PhysicalPageBorrowingConfig::Get().is_loaning_enabled()) {
// We need to replace any loaned pages in the committed range with non-loaned pages first,
// since pinning expects all pages to be non-loaned. Replacing loaned pages requires a
// page request too. At any time we'll only be able to wait on a single page request, and
// after the wait the conditions that resulted in the previous request might have changed,
// so we can just cancel and reuse the existing page_request.
// TODO: consider not canceling this and the other request below. The issue with not
// canceling is that without early wake support, i.e. being able to reinitialize an
// existing initialized request, I think this code will not work without canceling.
page_request.CancelRequests();
status = cow_pages_locked()->ReplacePagesWithNonLoanedLocked(
*GetCowRange(offset, committed_len), deferred, page_request.GetAnonymous(),
&non_loaned_len);
DEBUG_ASSERT(non_loaned_len <= committed_len);
} else {
// Either the VMO does not support borrowing, or loaning is not enabled so we know there
// are no loaned pages.
non_loaned_len = committed_len;
}
// We can safely pin the non-loaned range before waiting on the page request.
if (non_loaned_len > 0) {
// Verify that we are starting the pin after the previously pinned range, as we do not
// want to repeatedly pin the same pages.
ASSERT(pinned_end_offset == offset);
zx_status_t pin_status =
cow_pages_locked()->PinRangeLocked(*GetCowRange(offset, non_loaned_len));
if (pin_status != ZX_OK) {
return pin_status;
}
}
// At this point we have successfully committed and pinned non_loaned_len.
uint64_t pinned_len = non_loaned_len;
pinned_end_offset = offset + pinned_len;
// If this is a write and the VMO supports dirty tracking, we also need to mark the pinned
// pages Dirty.
// We pin the pages first before marking them dirty in order to guarantee forward progress.
// Pinning the pages will prevent them from getting decommitted while we are waiting on the
// dirty page request without the lock held.
if (write && pinned_len > 0 && is_dirty_tracked()) {
// Prepare the committed range for writing. We need a page request for this too, so cancel
// any existing one and reuse it.
page_request.CancelRequests();
// We want to dirty the entire pinned range.
to_dirty_len = pinned_len;
continue;
}
committed_len = pinned_len;
}
}
if (status == ZX_ERR_SHOULD_WAIT) {
status = page_request.Wait();
}
if (status != ZX_OK) {
if (status == ZX_ERR_TIMED_OUT) {
Dump(0, false);
}
return status;
}
offset += committed_len;
len -= committed_len;
}
return ZX_OK;
}
zx_status_t VmObjectPaged::DecommitRange(uint64_t offset, uint64_t len) {
canary_.Assert();
LTRACEF("offset %#" PRIx64 ", len %#" PRIx64 "\n", offset, len);
if (is_contiguous() && !PhysicalPageBorrowingConfig::Get().is_loaning_enabled()) {
return ZX_ERR_NOT_SUPPORTED;
}
auto cow_range = GetCowRange(offset, len);
if (!cow_range) {
return ZX_ERR_OUT_OF_RANGE;
}
// Decommit of pages from a contiguous VMO relies on contiguous VMOs not being resizable.
DEBUG_ASSERT(!is_resizable() || !is_contiguous());
return cow_pages_->DecommitRange(*cow_range);
}
zx_status_t VmObjectPaged::ZeroPartialPage(uint64_t page_base_offset, uint64_t zero_start_offset,
uint64_t zero_end_offset) {
DEBUG_ASSERT(zero_start_offset <= zero_end_offset);
DEBUG_ASSERT(zero_end_offset <= PAGE_SIZE);
DEBUG_ASSERT(IS_PAGE_ROUNDED(page_base_offset));
{
Guard<CriticalMutex> guard{lock()};
if (page_base_offset >= size_locked()) {
return ZX_ERR_OUT_OF_RANGE;
}
// TODO: Consider replacing this with a more appropriate generic API when one is available.
if (cow_pages_locked()->PageWouldReadZeroLocked(page_base_offset)) {
// This is already considered zero so no need to redundantly zero again.
return ZX_OK;
}
}
// Need to actually zero out bytes in the page.
return ReadWriteInternal(page_base_offset + zero_start_offset,
zero_end_offset - zero_start_offset, true,
VmObjectReadWriteOptions::None,
[](void* dst, size_t offset, size_t len) -> UserCopyCaptureFaultsResult {
// We're memsetting the *kernel* address of an allocated page, so we
// know that this cannot fault. memset may not be the most efficient,
// but we don't expect to be doing this very often.
memset(dst, 0, len);
return UserCopyCaptureFaultsResult{ZX_OK};
})
.first;
}
zx_status_t VmObjectPaged::ZeroRangeInternal(uint64_t offset, uint64_t len, bool dirty_track) {
canary_.Assert();
if (can_block_on_page_requests()) {
lockdep::AssertNoLocksHeld();
}
// May need to zero in chunks across multiple different lock acquisitions so loop until nothing
// left to do.
while (len > 0) {
// Check for any non-page aligned start and handle separately.
if (!IS_PAGE_ROUNDED(offset)) {
// We're doing partial page writes, so we should be dirty tracking.
DEBUG_ASSERT(dirty_track);
const uint64_t page_base = ROUNDDOWN_PAGE_SIZE(offset);
const uint64_t zero_start_offset = offset - page_base;
const uint64_t zero_len = ktl::min(PAGE_SIZE - zero_start_offset, len);
zx_status_t status =
ZeroPartialPage(page_base, zero_start_offset, zero_start_offset + zero_len);
if (status != ZX_OK) {
return status;
}
// Advance over the length we zeroed and then, since the lock might have been dropped, go
// around the loop to redo the checks.
offset += zero_len;
len -= zero_len;
continue;
}
// The start is page aligned, so if the remaining length is not a page size then perform the
// final sub-page zero.
if (len < PAGE_SIZE) {
DEBUG_ASSERT(dirty_track);
return ZeroPartialPage(offset, 0, len);
}
// First try and do the more efficient decommit. We prefer/ decommit as it performs work in the
// order of the number of committed pages, instead of work in the order of size of the range. An
// error from DecommitRangeLocked indicates that the VMO is not of a form that decommit can
// safely be performed without exposing data that we shouldn't between children and parents, but
// no actual state will have been changed. Should decommit succeed we are done, otherwise we
// will have to handle each offset individually.
//
// Zeroing doesn't decommit pages of contiguous VMOs.
if (!is_contiguous()) {
ktl::optional<VmCowRange> cow_range = GetCowRange(offset, ROUNDDOWN_PAGE_SIZE(len));
if (!cow_range) {
return ZX_ERR_OUT_OF_RANGE;
}
zx_status_t status = cow_pages_->DecommitRange(*cow_range);
if (status == ZX_OK) {
offset += cow_range->len;
len -= cow_range->len;
continue;
}
}
// We might need a page request if the VMO is backed by a page source.
__UNINITIALIZED MultiPageRequest page_request;
uint64_t zeroed_len = 0;
zx_status_t status;
{
__UNINITIALIZED VmCowPages::DeferredOps deferred(cow_pages_.get());
Guard<CriticalMutex> guard{lock()};
// Zeroing a range behaves as if it were an efficient zx_vmo_write. As we cannot write to
// uncached vmo, we also cannot zero an uncahced vmo.
if (self_locked()->GetMappingCachePolicyLocked() != ARCH_MMU_FLAG_CACHED) {
return ZX_ERR_BAD_STATE;
}
// Offset is page aligned, and we have at least one full page to process, so find the page
// aligned length to hand over to the cow pages zero method.
ktl::optional<VmCowRange> cow_range =
GetCowRangeSizeCheckLocked(offset, ROUNDDOWN_PAGE_SIZE(len));
if (!cow_range) {
return ZX_ERR_OUT_OF_RANGE;
}
#if DEBUG_ASSERT_IMPLEMENTED
// Currently we want ZeroPagesLocked() to not decommit any pages from a contiguous VMO. In
// debug we can assert that (not a super fast assert, but seems worthwhile; it's debug only).
uint64_t page_count_before =
is_contiguous() ? cow_pages_locked()->DebugGetPageCountLocked() : 0;
#endif
// Now that we have a page aligned range we can try hand over to the cow pages zero method.
status = cow_pages_locked()->ZeroPagesLocked(*cow_range, dirty_track, deferred, &page_request,
&zeroed_len);
if (zeroed_len != 0) {
// Mark modified since we wrote zeros.
mark_modified_locked();
}
#if DEBUG_ASSERT_IMPLEMENTED
if (is_contiguous()) {
uint64_t page_count_after = cow_pages_locked()->DebugGetPageCountLocked();
DEBUG_ASSERT(page_count_after == page_count_before);
}
#endif
}
// Wait on any page request, which is the only non-fatal error case.
if (status == ZX_ERR_SHOULD_WAIT) {
status = page_request.Wait();
if (status == ZX_ERR_TIMED_OUT) {
Dump(0, false);
}
}
if (status != ZX_OK) {
return status;
}
// Advance over pages that had already been zeroed.
offset += zeroed_len;
len -= zeroed_len;
}
return ZX_OK;
}
zx_status_t VmObjectPaged::Resize(uint64_t s) {
canary_.Assert();
LTRACEF("vmo %p, size %" PRIu64 "\n", this, s);
DEBUG_ASSERT(!is_contiguous() || !is_resizable());
// Also rejects contiguous VMOs.
if (!is_resizable()) {
return ZX_ERR_UNAVAILABLE;
}
// ensure the size is valid and that we will not wrap.
if (!IS_PAGE_ROUNDED(s)) {
return ZX_ERR_INVALID_ARGS;
}
if (s > MAX_SIZE) {
return ZX_ERR_OUT_OF_RANGE;
}
return cow_pages_->Resize(s);
}
// perform some sort of copy in/out on a range of the object using a passed in lambda for the copy
// routine. The copy routine has the expected type signature of: (void *ptr, uint64_t offset,
// uint64_t len) -> UserCopyCaptureFaultsResult.
template <typename T>
ktl::pair<zx_status_t, size_t> VmObjectPaged::ReadWriteInternal(uint64_t offset, size_t len,
bool write,
VmObjectReadWriteOptions options,
T copyfunc) {
canary_.Assert();
uint64_t end_offset;
if (add_overflow(offset, len, &end_offset)) {
return {ZX_ERR_OUT_OF_RANGE, 0};
}
// Track our two offsets.
uint64_t src_offset = offset;
size_t dest_offset = 0;
// The PageRequest is a non-trivial object so we declare it outside the loop to avoid having to
// construct and deconstruct it each iteration. It is tolerant of being reused and will
// reinitialize itself if needed.
// Ideally we can wake up early from the page request to begin processing any partially supplied
// ranges. However, if performing a write to a dirty tracked VMO this is not presently possible as
// we need to first read in the range and then dirty it, and we cannot have both a read and dirty
// request outstanding at one time.
__UNINITIALIZED MultiPageRequest page_request(!write);
do {
zx_status_t status;
__UNINITIALIZED UserCopyCaptureFaultsResult copy_result(ZX_OK);
{
__UNINITIALIZED VmCowPages::DeferredOps deferred(cow_pages_.get());
Guard<CriticalMutex> guard{AssertOrderedLock, lock(), cow_pages_->lock_order()};
if (self_locked()->GetMappingCachePolicyLocked() != ARCH_MMU_FLAG_CACHED) {
return {ZX_ERR_BAD_STATE, src_offset - offset};
}
if (end_offset > size_locked()) {
if (!!(options & VmObjectReadWriteOptions::TrimLength)) {
if (src_offset >= size_locked()) {
return {ZX_OK, src_offset - offset};
}
end_offset = size_locked();
} else {
return {ZX_ERR_OUT_OF_RANGE, src_offset - offset};
}
} else if (src_offset >= end_offset) {
return {ZX_OK, src_offset - offset};
}
const size_t first_page_offset = ROUNDDOWN_PAGE_SIZE(src_offset);
const size_t last_page_offset = ROUNDDOWN_PAGE_SIZE(end_offset - 1);
size_t remaining_pages = (last_page_offset - first_page_offset) / PAGE_SIZE + 1;
size_t pages_since_last_unlock = 0;
bool modified = false;
__UNINITIALIZED zx::result<VmCowPages::LookupCursor> cursor =
GetLookupCursorLocked(first_page_offset, remaining_pages * PAGE_SIZE);
if (cursor.is_error()) {
return {cursor.status_value(), src_offset - offset};
}
// Performing explicit accesses by request of the user, so disable zero forking.
cursor->DisableZeroFork();
AssertHeld(cursor->lock_ref());
while (remaining_pages > 0) {
const size_t page_offset = src_offset % PAGE_SIZE;
const size_t tocopy = ktl::min(PAGE_SIZE - page_offset, end_offset - src_offset);
// If we need to wait on pages then we would like to wait on as many as possible, up to the
// actual limit of the read/write operation. For a read we can wake up once some pages are
// received, minimizing the latency before we start making progress, but as this is not true
// for writes we cap the maximum number requested.
constexpr uint64_t kMaxWriteWaitPages = 16;
const uint64_t max_wait_pages = write ? kMaxWriteWaitPages : UINT64_MAX;
const uint64_t max_waitable_pages = ktl::min(remaining_pages, max_wait_pages);
// Attempt to lookup a page
__UNINITIALIZED zx::result<VmCowPages::LookupCursor::RequireResult> result =
cursor->RequirePage(write, static_cast<uint>(max_waitable_pages), deferred,
&page_request);
status = result.status_value();
if (status != ZX_OK) {
break;
}
// Compute the kernel mapping of this page.
const paddr_t pa = result->page->paddr();
char* page_ptr = reinterpret_cast<char*>(paddr_to_physmap(pa));
// Call the copy routine. If the copy was successful then ZX_OK is returned, otherwise
// ZX_ERR_SHOULD_WAIT may be returned to indicate the copy failed but we can retry it.
copy_result = copyfunc(page_ptr + page_offset, dest_offset, tocopy);
// If a fault has actually occurred, then we will have captured fault info that we can use
// to handle the fault.
if (copy_result.fault_info.has_value()) {
break;
}
// If we encounter _any_ unrecoverable error from the copy operation which
// produced no fault address, squash the error down to just "NOT_FOUND".
// This is what the SoftFault error would have told us if we did try to
// handle the fault and could not.
if (copy_result.status != ZX_OK) {
status = ZX_ERR_NOT_FOUND;
break;
}
// Advance the copy location.
src_offset += tocopy;
dest_offset += tocopy;
remaining_pages--;
modified = write;
// Periodically yield the lock in order to allow other read or write
// operations to advance sooner than they otherwise would.
constexpr size_t kPagesBetweenUnlocks = 16;
if (unlikely(++pages_since_last_unlock == kPagesBetweenUnlocks)) {
pages_since_last_unlock = 0;
if (guard.lock()->IsContested()) {
break;
}
}
}
// Before dropping the lock, check if any pages were modified and update the VMO state
// accordingly.
if (modified) {
mark_modified_locked();
}
}
// If there was a fault while copying, then handle it now that the lock is dropped.
if (copy_result.fault_info.has_value()) {
auto& info = *copy_result.fault_info;
uint64_t to_fault = len - dest_offset;
status = Thread::Current::SoftFaultInRange(info.pf_va, info.pf_flags, to_fault);
} else if (status == ZX_ERR_SHOULD_WAIT) {
// RequirePage 'failed', but told us that it had filled out the page request, so we should
// wait on it.
DEBUG_ASSERT(can_block_on_page_requests());
status = page_request.Wait();
if (status == ZX_ERR_TIMED_OUT) {
Dump(0, false);
}
}
if (status != ZX_OK) {
return {status, src_offset - offset};
}
} while (src_offset < end_offset);
return {ZX_OK, src_offset - offset};
}
zx_status_t VmObjectPaged::Read(void* _ptr, uint64_t offset, size_t len) {
canary_.Assert();
// test to make sure this is a kernel pointer
if (!is_kernel_address(reinterpret_cast<vaddr_t>(_ptr))) {
DEBUG_ASSERT_MSG(0, "non kernel pointer passed\n");
return ZX_ERR_INVALID_ARGS;
}
// read routine that just uses a memcpy
char* ptr = reinterpret_cast<char*>(_ptr);
auto read_routine = [ptr](const void* src, size_t offset,
size_t len) -> UserCopyCaptureFaultsResult {
memcpy(ptr + offset, src, len);
return UserCopyCaptureFaultsResult{ZX_OK};
};
if (can_block_on_page_requests()) {
lockdep::AssertNoLocksHeld();
}
return ReadWriteInternal(offset, len, false, VmObjectReadWriteOptions::None, read_routine).first;
}
zx_status_t VmObjectPaged::Write(const void* _ptr, uint64_t offset, size_t len) {
canary_.Assert();
// test to make sure this is a kernel pointer
if (!is_kernel_address(reinterpret_cast<vaddr_t>(_ptr))) {
DEBUG_ASSERT_MSG(0, "non kernel pointer passed\n");
return ZX_ERR_INVALID_ARGS;
}
// write routine that just uses a memcpy
const char* ptr = reinterpret_cast<const char*>(_ptr);
auto write_routine = [ptr](void* dst, size_t offset, size_t len) -> UserCopyCaptureFaultsResult {
memcpy(dst, ptr + offset, len);
return UserCopyCaptureFaultsResult{ZX_OK};
};
if (can_block_on_page_requests()) {
lockdep::AssertNoLocksHeld();
}
return ReadWriteInternal(offset, len, true, VmObjectReadWriteOptions::None, write_routine).first;
}
zx_status_t VmObjectPaged::CacheOp(uint64_t offset, uint64_t len, CacheOpType type) {
canary_.Assert();
if (unlikely(len == 0)) {
return ZX_ERR_INVALID_ARGS;
}
Guard<CriticalMutex> guard{lock()};
// verify that the range is within the object
auto cow_range = GetCowRangeSizeCheckLocked(offset, len);
if (!cow_range) {
return ZX_ERR_OUT_OF_RANGE;
}
// This cannot overflow as we already checked the range.
const uint64_t cow_end = cow_range->end();
// For syncing instruction caches there may be work that is more efficient to batch together, and
// so we use an abstract consistency manager to optimize it for the given architecture.
ArchVmICacheConsistencyManager sync_cm;
return cow_pages_locked()->LookupReadableLocked(
*cow_range,
[&sync_cm, cow_offset = cow_range->offset, cow_end, type](uint64_t page_offset, paddr_t pa) {
// This cannot overflow due to the maximum possible size of a VMO.
const uint64_t page_end = page_offset + PAGE_SIZE;
// Determine our start and end in terms of vmo offset
const uint64_t start = ktl::max(page_offset, cow_offset);
const uint64_t end = ktl::min(cow_end, page_end);
// Translate to inter-page offset
DEBUG_ASSERT(start >= page_offset);
const uint64_t op_start_offset = start - page_offset;
DEBUG_ASSERT(op_start_offset < PAGE_SIZE);
DEBUG_ASSERT(end > start);
const uint64_t op_len = end - start;
CacheOpPhys(pa + op_start_offset, op_len, type, sync_cm);
return ZX_ERR_NEXT;
});
}
zx_status_t VmObjectPaged::Lookup(uint64_t offset, uint64_t len,
VmObject::LookupFunction lookup_fn) {
canary_.Assert();
VmCowRange range(offset, len);
auto cow_range = GetCowRange(offset, len);
if (!cow_range) {
return ZX_ERR_OUT_OF_RANGE;
}
Guard<CriticalMutex> guard{lock()};
return cow_pages_locked()->LookupLocked(
*cow_range, [&lookup_fn, undo_offset = cow_range_.offset](uint64_t offset, paddr_t pa) {
// Need to undo the parent_offset before forwarding to the lookup_fn, who is ignorant of
// slices.
return lookup_fn(offset - undo_offset, pa);
});
}
zx_status_t VmObjectPaged::LookupContiguous(uint64_t offset, uint64_t len, paddr_t* out_paddr) {
canary_.Assert();
// We should consider having the callers round up to page boundaries and then check whether the
// length is page-aligned.
if (unlikely(len == 0 || !IS_PAGE_ROUNDED(offset))) {
return ZX_ERR_INVALID_ARGS;
}
Guard<CriticalMutex> guard{lock()};
auto cow_range = GetCowRangeSizeCheckLocked(offset, len);
if (!cow_range) {
return ZX_ERR_OUT_OF_RANGE;
}
if (unlikely(!is_contiguous() && (cow_range->len != PAGE_SIZE))) {
// Multi-page lookup only supported for contiguous VMOs.
return ZX_ERR_BAD_STATE;
}
// Verify that all pages are present, and assert that the present pages are contiguous since we
// only support len > PAGE_SIZE for contiguous VMOs.
bool page_seen = false;
uint64_t first_offset = 0;
paddr_t first_paddr = 0;
uint64_t count = 0;
// This has to work for child slices with non-zero cow_range_.offset also, which means even if all
// pages are present, the first cur_offset can be offset + cow_range_.offset.
zx_status_t status = cow_pages_locked()->LookupLocked(
*cow_range,
[&page_seen, &first_offset, &first_paddr, &count](uint64_t cur_offset, paddr_t pa) mutable {
++count;
if (!page_seen) {
first_offset = cur_offset;
first_paddr = pa;
page_seen = true;
}
ASSERT(first_paddr + (cur_offset - first_offset) == pa);
return ZX_ERR_NEXT;
});
ASSERT(status == ZX_OK);
if (count != cow_range->len / PAGE_SIZE) {
return ZX_ERR_NOT_FOUND;
}
if (out_paddr) {
*out_paddr = first_paddr;
}
return ZX_OK;
}
ktl::pair<zx_status_t, size_t> VmObjectPaged::ReadUser(user_out_ptr<char> ptr, uint64_t offset,
size_t len,
VmObjectReadWriteOptions options) {
canary_.Assert();
// read routine that uses copy_to_user
auto read_routine = [ptr](const char* src, size_t offset,
size_t len) -> UserCopyCaptureFaultsResult {
return ptr.byte_offset(offset).copy_array_to_user_capture_faults(src, len);
};
if (can_block_on_page_requests()) {
lockdep::AssertNoLocksHeld();
}
return ReadWriteInternal(offset, len, false, options, read_routine);
}
ktl::pair<zx_status_t, size_t> VmObjectPaged::WriteUser(
user_in_ptr<const char> ptr, uint64_t offset, size_t len, VmObjectReadWriteOptions options,
const OnWriteBytesTransferredCallback& on_bytes_transferred) {
canary_.Assert();
// write routine that uses copy_from_user
auto write_routine = [ptr, base_vmo_offset = offset, &on_bytes_transferred](
char* dst, size_t offset, size_t len) -> UserCopyCaptureFaultsResult {
__UNINITIALIZED auto copy_result =
ptr.byte_offset(offset).copy_array_from_user_capture_faults(dst, len);
if (copy_result.status == ZX_OK) {
if (on_bytes_transferred) {
on_bytes_transferred(base_vmo_offset + offset, len);
}
}
return copy_result;
};
if (can_block_on_page_requests()) {
lockdep::AssertNoLocksHeld();
}
return ReadWriteInternal(offset, len, true, options, write_routine);
}
ktl::pair<zx_status_t, size_t> VmObjectPaged::ReadUserVector(user_out_iovec_t vec, uint64_t offset,
size_t len) {
if (len == 0u) {
return {ZX_OK, 0};
}
if (len > UINT64_MAX - offset) {
return {ZX_ERR_OUT_OF_RANGE, 0};
}
size_t total = 0;
zx_status_t status = vec.ForEach([&](user_out_ptr<char> ptr, size_t capacity) {
if (capacity > len) {
capacity = len;
}
auto [read_status, chunk_actual] =
ReadUser(ptr, offset, capacity, VmObjectReadWriteOptions::None);
// Always add |chunk_actual| since some bytes may have been transferred, even on error
total += chunk_actual;
if (read_status != ZX_OK) {
return read_status;
}
DEBUG_ASSERT(chunk_actual == capacity);
offset += chunk_actual;
len -= chunk_actual;
return len > 0 ? ZX_ERR_NEXT : ZX_ERR_STOP;
});
// Return |ZX_ERR_BUFFER_TOO_SMALL| if all of |len| was not transferred.
status = (status == ZX_OK && len > 0) ? ZX_ERR_BUFFER_TOO_SMALL : status;
return {status, total};
}
ktl::pair<zx_status_t, size_t> VmObjectPaged::WriteUserVector(
user_in_iovec_t vec, uint64_t offset, size_t len,
const OnWriteBytesTransferredCallback& on_bytes_transferred) {
if (len == 0u) {
return {ZX_OK, 0};
}
if (len > UINT64_MAX - offset) {
return {ZX_ERR_OUT_OF_RANGE, 0};
}
size_t total = 0;
zx_status_t status = vec.ForEach([&](user_in_ptr<const char> ptr, size_t capacity) {
if (capacity > len) {
capacity = len;
}
auto [write_status, chunk_actual] =
WriteUser(ptr, offset, capacity, VmObjectReadWriteOptions::None, on_bytes_transferred);
// Always add |chunk_actual| since some bytes may have been transferred, even on error
total += chunk_actual;
if (write_status != ZX_OK) {
return write_status;
}
DEBUG_ASSERT(chunk_actual == capacity);
offset += chunk_actual;
len -= chunk_actual;
return len > 0 ? ZX_ERR_NEXT : ZX_ERR_STOP;
});
// Return |ZX_ERR_BUFFER_TOO_SMALL| if all of |len| was not transferred.
status = (status == ZX_OK && len > 0) ? ZX_ERR_BUFFER_TOO_SMALL : status;
return {status, total};
}
zx_status_t VmObjectPaged::TakePages(uint64_t offset, uint64_t len, VmPageSpliceList* pages) {
canary_.Assert();
// TODO: Check that the region is locked once locking is implemented
if (is_contiguous()) {
return ZX_ERR_NOT_SUPPORTED;
}
auto cow_range = GetCowRange(offset, len);
if (!cow_range) {
return ZX_ERR_OUT_OF_RANGE;
}
auto range = *cow_range;
// Initialize the splice list to the right size.
pages->Initialize(range.len);
uint64_t splice_offset = 0;
__UNINITIALIZED MultiPageRequest page_request;
while (!range.is_empty()) {
uint64_t taken_len = 0;
zx_status_t status =
cow_pages_->TakePages(range, splice_offset, pages, &taken_len, &page_request);
if (status != ZX_ERR_SHOULD_WAIT && status != ZX_OK) {
return status;
}
// We would only have failed to take anything if status was not ZX_OK, which in this case
// would be ZX_ERR_SHOULD_WAIT as that is the only non-OK status we can reach here with.
DEBUG_ASSERT(taken_len > 0 || status == ZX_ERR_SHOULD_WAIT);
// We should have taken the entire range requested if the status was ZX_OK.
DEBUG_ASSERT(status != ZX_OK || taken_len == range.len);
// We should not have taken any more than the requested range.
DEBUG_ASSERT(taken_len <= range.len);
splice_offset += taken_len;
// Record the completed portion.
range = range.TrimedFromStart(taken_len);
if (status == ZX_ERR_SHOULD_WAIT) {
status = page_request.Wait();
if (status != ZX_OK) {
return status;
}
}
}
return ZX_OK;
}
zx_status_t VmObjectPaged::SupplyPages(uint64_t offset, uint64_t len, VmPageSpliceList* pages,
SupplyOptions options) {
canary_.Assert();
// We need this check here instead of in SupplyPagesLocked, as we do use that
// function to provide pages to contiguous VMOs as well.
if (is_contiguous()) {
return ZX_ERR_NOT_SUPPORTED;
}
auto cow_range = GetCowRange(offset, len);
if (!cow_range) {
return ZX_ERR_OUT_OF_RANGE;
}
auto range = *cow_range;
__UNINITIALIZED MultiPageRequest page_request;
while (!range.is_empty()) {
uint64_t supply_len = 0;
zx_status_t status;
{
__UNINITIALIZED VmCowPages::DeferredOps deferred(cow_pages_.get());
Guard<CriticalMutex> guard{lock()};
status = cow_pages_locked()->SupplyPagesLocked(range, pages, options, &supply_len, deferred,
&page_request);
}
if (status != ZX_ERR_SHOULD_WAIT && status != ZX_OK) {
return status;
}
// We would only have failed to supply anything if status was not ZX_OK, which in this case
// would be ZX_ERR_SHOULD_WAIT as that is the only non-OK status we can reach here with.
DEBUG_ASSERT(supply_len > 0 || status == ZX_ERR_SHOULD_WAIT);
// We should have supplied the entire range requested if the status was ZX_OK.
DEBUG_ASSERT(status != ZX_OK || supply_len == range.len);
// We should not have supplied any more than the requested range.
DEBUG_ASSERT(supply_len <= range.len);
// Record the completed portion.
range = range.TrimedFromStart(supply_len);
if (status == ZX_ERR_SHOULD_WAIT) {
status = page_request.Wait();
if (status != ZX_OK) {
return status;
}
}
}
return ZX_OK;
}
zx_status_t VmObjectPaged::DirtyPages(uint64_t offset, uint64_t len) {
// It is possible to encounter delayed PMM allocations, which requires waiting on the
// page_request.
__UNINITIALIZED AnonymousPageRequest page_request;
auto cow_range = GetCowRange(offset, len);
if (!cow_range) {
return ZX_ERR_OUT_OF_RANGE;
}
// Initialize a list of allocated pages that DirtyPages will allocate any new pages into
// before inserting them in the VMO. Allocated pages can therefore be shared across multiple calls
// to DirtyPages. Instead of having to allocate and free pages in case DirtyPages
// cannot successfully dirty the entire range atomically, we can just hold on to the allocated
// pages and use them for the next call. This ensures that we are making forward progress with
// each successive call to DirtyPages.
list_node alloc_list;
list_initialize(&alloc_list);
auto alloc_list_cleanup = fit::defer([&alloc_list, this]() -> void {
if (!list_is_empty(&alloc_list)) {
cow_pages_->FreePages(&alloc_list);
}
});
while (true) {
zx_status_t status = cow_pages_->DirtyPages(*cow_range, &alloc_list, &page_request);
if (status == ZX_OK) {
return ZX_OK;
}
if (status == ZX_ERR_SHOULD_WAIT) {
status = page_request.Allocate().status_value();
}
if (status != ZX_OK) {
return status;
}
// If the wait was successful, loop around and try the call again, which will re-validate any
// state that might have changed when the lock was dropped.
}
}
zx_status_t VmObjectPaged::EnumerateDirtyRanges(uint64_t offset, uint64_t len,
DirtyRangeEnumerateFunction&& dirty_range_fn) {
Guard<CriticalMutex> guard{lock()};
if (auto cow_range = GetCowRange(offset, len)) {
// Need to wrap the callback to translate the cow pages offsets back into offsets as seen by
// this object.
return cow_pages_locked()->EnumerateDirtyRangesLocked(
*cow_range, [&dirty_range_fn, undo_offset = cow_range_.offset](
uint64_t range_offset, uint64_t range_len, bool range_is_zero) {
return dirty_range_fn(range_offset - undo_offset, range_len, range_is_zero);
});
}
return ZX_ERR_OUT_OF_RANGE;
}
zx_status_t VmObjectPaged::SetMappingCachePolicy(const uint32_t cache_policy) {
// Is it a valid cache flag?
if (cache_policy & ~ZX_CACHE_POLICY_MASK) {
return ZX_ERR_INVALID_ARGS;
}
Guard<CriticalMutex> guard{lock()};
// conditions for allowing the cache policy to be set:
// 1) vmo has no pinned pages
// 2) vmo has no mappings
// 3) vmo has no children
// 4) vmo is not a child
if (cow_pages_locked()->pinned_page_count_locked() > 0) {
return ZX_ERR_BAD_STATE;
}
if (self_locked()->num_mappings_locked() != 0) {
return ZX_ERR_BAD_STATE;
}
// The ChildListLock needs to be held to inspect the children/parent pointers, however we do not
// need to hold it over the remainder of this method as the main VMO lock is held, and creating a
// new child happens under that lock as well since the creation path must, in a single lock
// acquisition, be checking the cache_policy_ and creating the child.
{
Guard<CriticalMutex> child_guard{ChildListLock::Get()};
if (!children_list_.is_empty()) {
return ZX_ERR_BAD_STATE;
}
if (parent_) {
return ZX_ERR_BAD_STATE;
}
}
// Forbid if there are references, or if this object is a reference itself. We do not want cache
// policies to diverge across references. Note that this check is required in addition to the
// children_list_ and parent_ check, because it is possible for a non-reference parent to go away,
// which will trigger the election of a reference as the new owner for the remaining
// reference_list_, and also reset the parent_.
if (!reference_list_.is_empty()) {
return ZX_ERR_BAD_STATE;
}
if (is_reference()) {
return ZX_ERR_BAD_STATE;
}
// It does not make sense for a pager-backed or discardable VMO to be uncached.
if (is_user_pager_backed() || is_discardable()) {
DEBUG_ASSERT(GetMappingCachePolicyLocked() == ARCH_MMU_FLAG_CACHED);
return cache_policy == ARCH_MMU_FLAG_CACHED ? ZX_OK : ZX_ERR_BAD_STATE;
}
// Set the cache policy before informing the VmCowPages, as it may make decisions based on the
// final cache policy.
// There's no way good way to convince the static analysis that the lock() that we hold is
// also the VmObject::lock() and so we disable analysis to set the cache_policy_;
[this, &cache_policy]() TA_REQ(lock())
TA_NO_THREAD_SAFETY_ANALYSIS { cache_policy_ = cache_policy; }();
// Asks the cow pages to perform any internal transitions and, most importantly, clean and
// invalidate any committed pages. In the case of going from cached->uncached the clean+invalidate
// ensures that any modifications are cleaned back to RAM so that an uncached mapping sees any
// modifications made prior to changing the cache policy. When going from uncached->cached due to
// the cached physmap there could be cache lines that hold stale data of the pages that were
// modified via an uncached mapping. As these cache lines are, by definition, clean, a
// clean+invalidate will simply invalidate them and not write them back, ensuring that a future
// access via a cached mapping sees the up to date value.
// Note that uncached here refers to any of the uncached policies: device, write combining, etc.
// Transitioning between different uncached policies does not require a cache operation for
// correctness, but it is also harmless and not a case we attempt to optimize for.
cow_pages_locked()->FinishCachePolicyTransitionLocked();
return ZX_OK;
}
void VmObjectPaged::RangeChangeUpdateLocked(VmCowRange range, RangeChangeOp op) {
canary_.Assert();
// offsets for vmos needn't be aligned, but vmars use aligned offsets
uint64_t aligned_offset = ROUNDDOWN_PAGE_SIZE(range.offset);
uint64_t aligned_len = ROUNDUP_PAGE_SIZE(range.end()) - aligned_offset;
if (GetIntersect(cow_range_.offset, cow_range_.len, aligned_offset, aligned_len, &aligned_offset,
&aligned_len)) {
// Found the intersection in cow space, convert back to object space.
aligned_offset -= cow_range_.offset;
self_locked()->RangeChangeUpdateMappingsLocked(aligned_offset, aligned_len, op);
}
// Propagate the change to reference children as well. This is done regardless of intersection as
// we may have become the holder of the reference list even if they were not originally references
// made against us, and so their cow views might be different.
for (auto& ref : reference_list_) {
AssertHeld(ref.lock_ref());
// Use the same offset and len. References span the entirety of the parent VMO and hence share
// all offsets.
ref.RangeChangeUpdateLocked(range, op);
}
}
void VmObjectPaged::ForwardRangeChangeUpdateLocked(uint64_t offset, uint64_t len,
RangeChangeOp op) {
canary_.Assert();
// Call RangeChangeUpdateLocked on the owner of the CowPages.
AssertHeld(cow_pages_locked()->get_paged_backlink_locked()->lock_ref());
if (auto cow_range = GetCowRange(offset, len)) {
cow_pages_locked()->get_paged_backlink_locked()->RangeChangeUpdateLocked(*cow_range, op);
}
}
zx_status_t VmObjectPaged::LockRange(uint64_t offset, uint64_t len,
zx_vmo_lock_state_t* lock_state_out) {
if (!is_discardable()) {
return ZX_ERR_NOT_SUPPORTED;
}
auto cow_range = GetCowRange(offset, len);
if (!cow_range) {
return ZX_ERR_OUT_OF_RANGE;
}
Guard<CriticalMutex> guard{lock()};
return cow_pages_locked()->LockRangeLocked(*cow_range, lock_state_out);
}
zx_status_t VmObjectPaged::TryLockRange(uint64_t offset, uint64_t len) {
if (!is_discardable()) {
return ZX_ERR_NOT_SUPPORTED;
}
auto cow_range = GetCowRange(offset, len);
if (!cow_range) {
return ZX_ERR_OUT_OF_RANGE;
}
Guard<CriticalMutex> guard{lock()};
return cow_pages_locked()->TryLockRangeLocked(*cow_range);
}
zx_status_t VmObjectPaged::UnlockRange(uint64_t offset, uint64_t len) {
if (!is_discardable()) {
return ZX_ERR_NOT_SUPPORTED;
}
auto cow_range = GetCowRange(offset, len);
if (!cow_range) {
return ZX_ERR_OUT_OF_RANGE;
}
Guard<CriticalMutex> guard{lock()};
return cow_pages_locked()->UnlockRangeLocked(*cow_range);
}
zx_status_t VmObjectPaged::GetPage(uint64_t offset, uint pf_flags, list_node* alloc_list,
MultiPageRequest* page_request, vm_page_t** page, paddr_t* pa) {
__UNINITIALIZED VmCowPages::DeferredOps deferred(cow_pages_.get());
Guard<CriticalMutex> guard{lock()};
const bool write = pf_flags & VMM_PF_FLAG_WRITE;
zx::result<VmCowPages::LookupCursor> cursor = GetLookupCursorLocked(offset, PAGE_SIZE);
if (cursor.is_error()) {
return cursor.error_value();
}
AssertHeld(cursor->lock_ref());
// Hardware faults are considered to update access times separately, all other lookup reasons
// should do the default update of access time.
if (pf_flags & VMM_PF_FLAG_HW_FAULT) {
cursor->DisableMarkAccessed();
}
if (!(pf_flags & VMM_PF_FLAG_FAULT_MASK)) {
vm_page_t* p = cursor->MaybePage(write);
if (!p) {
return ZX_ERR_NOT_FOUND;
}
if (page) {
*page = p;
}
if (pa) {
*pa = p->paddr();
}
return ZX_OK;
}
auto result = cursor->RequirePage(write, PAGE_SIZE, deferred, page_request);
if (result.is_error()) {
return result.error_value();
}
if (page) {
*page = result->page;
}
if (pa) {
*pa = result->page->paddr();
}
return ZX_OK;
}