zircon/kernel/vm/vm_object_paged.cc - fuchsia - Git at Google

 // Copyright 2016 The Fuchsia Authors
 //
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file or at
 // https://opensource.org/licenses/MIT
 #include "vm/vm_object_paged.h"

 #include <align.h>
 #include <assert.h>
 #include <err.h>
 #include <inttypes.h>
 #include <lib/console.h>
 #include <lib/counters.h>
 #include <stdlib.h>
 #include <string.h>
 #include <trace.h>
 #include <zircon/types.h>

 #include <arch/ops.h>
 #include <fbl/alloc_checker.h>
 #include <fbl/auto_call.h>
 #include <ktl/algorithm.h>
 #include <ktl/array.h>
 #include <ktl/move.h>
 #include <vm/bootreserve.h>
 #include <vm/fault.h>
 #include <vm/page_source.h>
 #include <vm/physmap.h>
 #include <vm/vm.h>
 #include <vm/vm_address_region.h>
 #include <vm/vm_cow_pages.h>

 #include "vm_priv.h"

 #define LOCAL_TRACE VM_GLOBAL_TRACE(0)

 namespace {

 KCOUNTER(vmo_attribution_queries_all, "vm.object.attribution.queries_all")
 KCOUNTER(vmo_attribution_queries_entire_object, "vm.object.attribution.queries_entire_object")
 KCOUNTER(vmo_attribution_cache_hits, "vm.object.attribution.cache_hits")
 KCOUNTER(vmo_attribution_cache_misses, "vm.object.attribution.cache_misses")

 }  // namespace

 VmObjectPaged::VmObjectPaged(uint32_t options, fbl::RefPtr<VmHierarchyState> hierarchy_state,
                              fbl::RefPtr<VmCowPages> cow_pages)
     : VmObject(ktl::move(hierarchy_state)), options_(options), cow_pages_(ktl::move(cow_pages)) {
   LTRACEF("%p\n", this);

   {
     Guard<Mutex> guard{&lock_};
     cow_pages_locked()->set_paged_backlink_locked(this);
   }

   // Adding to the global list needs to be done at the end of the ctor, since
   // calls can be made into this object as soon as it is in that list.
   AddToGlobalList();
 }

 void VmObjectPaged::InitializeOriginalParentLocked(fbl::RefPtr<VmObjectPaged> parent,
                                                    uint64_t offset) {
   DEBUG_ASSERT(parent_ == nullptr);
   DEBUG_ASSERT(original_parent_user_id_ == 0);

   AssertHeld(parent->lock_);
   original_parent_user_id_ = parent->user_id_locked();
   parent_ = ktl::move(parent);
 }

 VmObjectPaged::~VmObjectPaged() {
   canary_.Assert();

   LTRACEF("%p\n", this);

   RemoveFromGlobalList();

   Guard<Mutex> guard{&lock_};

   if (is_contiguous() && !is_slice()) {
     // A contiguous VMO either has all its pages committed and pinned or, if creation failed, no
     // pages committed and pinned. Check if we are in the failure case by looking up the first page.
     if (GetPageLocked(0, 0, nullptr, nullptr, nullptr, nullptr) == ZX_OK) {
       cow_pages_locked()->UnpinLocked(0, size_locked());
     }
   }

   cow_pages_->set_paged_backlink_locked(nullptr);

   if (!is_hidden()) {
     // If we're not a hidden vmo, then we need to remove ourself from our parent. This needs
     // to be done before emptying the page list so that a hidden parent can't merge into this
     // vmo and repopulate the page list.
     //
     // To prevent races with a hidden parent merging itself into this vmo, it is necessary
     // to hold the lock over the parent_ check and into the subsequent removal call.
     if (parent_) {
       LTRACEF("removing ourself from our parent %p\n", parent_.get());
       parent_->RemoveChild(this, guard.take());
       // Avoid recursing destructors when we delete our parent by using the deferred deletion
       // method. See common in parent else branch for why we can avoid this on a hidden parent.
       if (!parent_->is_hidden()) {
         hierarchy_state_ptr_->DoDeferredDelete(ktl::move(parent_));
       }
     }
   } else {
     // Most of the hidden vmo's state should have already been cleaned up when it merged
     // itself into its child in ::RemoveChild.
     DEBUG_ASSERT(children_list_len_ == 0);
     // Even though we are hidden we might have a parent. Unlike in the other branch of this if we
     // do not need to perform any deferred deletion. The reason for this is that the deferred
     // deletion mechanism is intended to resolve the scenario where there is a chain of 'one ref'
     // parent pointers that will chain delete. However, with hidden parents we *know* that a hidden
     // parent has two children (and hence at least one other ref to it) and so we cannot be in a
     // one ref chain. Even if N threads all tried to remove children from the hierarchy at once,
     // this would ultimately get serialized through the lock and the hierarchy would go from
     //
     //          [..]
     //           /
     //          A                             [..]
     //         / \                             /
     //        B   E           TO         B    A
     //       / \                        /    / \.
     //      C   D                      C    D   E
     //
     // And so each serialized deletion breaks of a discrete two VMO chain that can be safely
     // finalized with one recursive step.
   }
 }

 void VmObjectPaged::HarvestAccessedBits() {
   canary_.Assert();

   Guard<Mutex> guard{lock()};
   // If there is no root page source, then we have nothing worth harvesting bits from.
   if (cow_pages_locked()->GetRootPageSourceLocked() == nullptr) {
     return;
   }

   fbl::Function<bool(vm_page_t*, uint64_t)> f = [this](vm_page_t* p, uint64_t offset) {
     AssertHeld(lock_);
     // Skip the zero page as we are never going to evict it and initial zero pages will not be
     // returned by GetPageLocked down below.
     if (p == vm_get_zero_page()) {
       return false;
     }
     // Use GetPageLocked to perform page lookup. Pass neither software fault, hardware fault or
     // write to prevent any committing or copy-on-write behavior. This will just cause the page to
     // be looked up, and its location in any pager_backed queues updated.
     __UNUSED vm_page_t* out;
     __UNUSED zx_status_t result =
         cow_pages_locked()->GetPageLocked(offset, 0, nullptr, nullptr, &out, nullptr);
     // We are in this callback because there is a physical page mapped into the hardware page table
     // attributed to this vmo. If we cannot find it, or it isn't the page we expect, then something
     // has gone horribly wrong.
     DEBUG_ASSERT(result == ZX_OK);
     DEBUG_ASSERT(out == p);
     return true;
   };
   for (auto& m : mapping_list_) {
     if (m.aspace()->is_user()) {
       AssertHeld(*m.object_lock());
       __UNUSED zx_status_t result = m.HarvestAccessVmoRangeLocked(0, size_locked(), f);
       // There's no way we should be harvesting an invalid range as that would imply that this
       // mapping is invalid.
       DEBUG_ASSERT(result == ZX_OK);
     }
   }
 }

 bool VmObjectPaged::DedupZeroPage(vm_page_t* page, uint64_t offset) {
   canary_.Assert();

   Guard<Mutex> guard{&lock_};

   // Skip uncached VMOs as we cannot efficiently scan them.
   if ((cache_policy_ & ZX_CACHE_POLICY_MASK) != ZX_CACHE_POLICY_CACHED) {
     return false;
   }

   // Skip any VMOs that have non user mappings as we cannot safely remove write permissions from
   // them and indicates this VMO is actually in use by the kernel and we probably would not want to
   // perform zero page de-duplication on it even if we could.
   for (auto& m : mapping_list_) {
     if (!m.aspace()->is_user()) {
       return false;
     }
   }

   if (cow_pages_locked()->DedupZeroPageLocked(page, offset)) {
     eviction_event_count_++;
     IncrementHierarchyGenerationCountLocked();
     return true;
   }
   return false;
 }

 uint32_t VmObjectPaged::ScanForZeroPages(bool reclaim) {
   canary_.Assert();

   Guard<Mutex> guard{lock()};

   // Skip uncached VMOs as we cannot efficiently scan them.
   if ((cache_policy_ & ZX_CACHE_POLICY_MASK) != ZX_CACHE_POLICY_CACHED) {
     return 0;
   }

   // Skip any VMOs that have non user mappings as we cannot safely remove write permissions from
   // them and indicates this VMO is actually in use by the kernel and we probably would not want to
   // perform zero page de-duplication on it even if we could.
   for (auto& m : mapping_list_) {
     if (!m.aspace()->is_user()) {
       return 0;
     }
     // Remove write from the mapping to ensure it's not being concurrently modified.
     AssertHeld(*m.object_lock());
     m.RemoveWriteVmoRangeLocked(0, size_locked());
   }

   uint32_t count = cow_pages_locked()->ScanForZeroPagesLocked(reclaim);

   if (reclaim && count > 0) {
     IncrementHierarchyGenerationCountLocked();
   }

   return count;
 }

 zx_status_t VmObjectPaged::CreateCommon(uint32_t pmm_alloc_flags, uint32_t options, uint64_t size,
                                         fbl::RefPtr<VmObjectPaged>* obj) {
   // make sure size is page aligned
   zx_status_t status = RoundSize(size, &size);
   if (status != ZX_OK) {
     return status;
   }

   fbl::AllocChecker ac;
   auto state = fbl::MakeRefCountedChecked<VmHierarchyState>(&ac);
   if (!ac.check()) {
     return ZX_ERR_NO_MEMORY;
   }

   fbl::RefPtr<VmCowPages> cow_pages;
   status = VmCowPages::Create(state, pmm_alloc_flags, size, &cow_pages);
   if (status != ZX_OK) {
     return status;
   }

   auto vmo = fbl::AdoptRef<VmObjectPaged>(
       new (&ac) VmObjectPaged(options, ktl::move(state), ktl::move(cow_pages)));
   if (!ac.check()) {
     return ZX_ERR_NO_MEMORY;
   }

   *obj = ktl::move(vmo);

   return ZX_OK;
 }

 zx_status_t VmObjectPaged::Create(uint32_t pmm_alloc_flags, uint32_t options, uint64_t size,
                                   fbl::RefPtr<VmObjectPaged>* obj) {
   if (options & kContiguous) {
     // Force callers to use CreateContiguous() instead.
     return ZX_ERR_INVALID_ARGS;
   }

   return CreateCommon(pmm_alloc_flags, options, size, obj);
 }

 zx_status_t VmObjectPaged::CreateContiguous(uint32_t pmm_alloc_flags, uint64_t size,
                                             uint8_t alignment_log2,
                                             fbl::RefPtr<VmObjectPaged>* obj) {
   DEBUG_ASSERT(alignment_log2 < sizeof(uint64_t) * 8);
   // make sure size is page aligned
   zx_status_t status = RoundSize(size, &size);
   if (status != ZX_OK) {
     return status;
   }

   fbl::RefPtr<VmObjectPaged> vmo;
   status = CreateCommon(pmm_alloc_flags, kContiguous, size, &vmo);
   if (status != ZX_OK) {
     return status;
   }

   if (size == 0) {
     *obj = ktl::move(vmo);
     return ZX_OK;
   }

   // allocate the pages
   list_node page_list;
   list_initialize(&page_list);

   size_t num_pages = size / PAGE_SIZE;
   paddr_t pa;
   status = pmm_alloc_contiguous(num_pages, pmm_alloc_flags, alignment_log2, &pa, &page_list);
   if (status != ZX_OK) {
     LTRACEF("failed to allocate enough pages (asked for %zu)\n", num_pages);
     return ZX_ERR_NO_MEMORY;
   }
   Guard<Mutex> guard{&vmo->lock_};
   // add them to the appropriate range of the object, this takes ownership of all the pages
   // regardless of outcome.
   status = vmo->cow_pages_locked()->AddNewPagesLocked(0, &page_list);
   if (status != ZX_OK) {
     return status;
   }

   // We already added the pages, so this will just cause them to be pinned.
   status = vmo->CommitRangeInternal(0, size, true, ktl::move(guard));
   if (status != ZX_OK) {
     // Decommit the range so the destructor doesn't attempt to unpin.
     vmo->DecommitRangeLocked(0, size);
     return status;
   }

   *obj = ktl::move(vmo);
   return ZX_OK;
 }

 zx_status_t VmObjectPaged::CreateFromWiredPages(const void* data, size_t size, bool exclusive,
                                                 fbl::RefPtr<VmObjectPaged>* obj) {
   LTRACEF("data %p, size %zu\n", data, size);

   fbl::RefPtr<VmObjectPaged> vmo;
   zx_status_t status = CreateCommon(PMM_ALLOC_FLAG_ANY, 0, size, &vmo);
   if (status != ZX_OK) {
     return status;
   }

   if (size > 0) {
     ASSERT(IS_PAGE_ALIGNED(size));
     ASSERT(IS_PAGE_ALIGNED(reinterpret_cast<uintptr_t>(data)));

     // Do a direct lookup of the physical pages backing the range of
     // the kernel that these addresses belong to and jam them directly
     // into the VMO.
     //
     // NOTE: This relies on the kernel not otherwise owning the pages.
     // If the setup of the kernel's address space changes so that the
     // pages are attached to a kernel VMO, this will need to change.

     paddr_t start_paddr = vaddr_to_paddr(data);
     ASSERT(start_paddr != 0);

     Guard<Mutex> guard{&vmo->lock_};

     for (size_t count = 0; count < size / PAGE_SIZE; count++) {
       paddr_t pa = start_paddr + count * PAGE_SIZE;
       vm_page_t* page = paddr_to_vm_page(pa);
       ASSERT(page);

       if (page->state() == VM_PAGE_STATE_WIRED) {
         boot_reserve_unwire_page(page);
       } else {
         // This function is only valid for memory in the boot image,
         // which should all be wired.
         panic("page used to back static vmo in unusable state: paddr %#" PRIxPTR " state %u\n", pa,
               page->state());
       }
       status = vmo->cow_pages_locked()->AddNewPageLocked(count * PAGE_SIZE, page, false, false);
       ASSERT(status == ZX_OK);
     }

     if (exclusive && !is_physmap_addr(data)) {
       // unmap it from the kernel
       // NOTE: this means the image can no longer be referenced from original pointer
       status = VmAspace::kernel_aspace()->arch_aspace().Unmap(reinterpret_cast<vaddr_t>(data),
                                                               size / PAGE_SIZE, nullptr);
       ASSERT(status == ZX_OK);
     }
   }

   *obj = ktl::move(vmo);

   return ZX_OK;
 }

 zx_status_t VmObjectPaged::CreateExternal(fbl::RefPtr<PageSource> src, uint32_t options,
                                           uint64_t size, fbl::RefPtr<VmObjectPaged>* obj) {
   // make sure size is page aligned
   zx_status_t status = RoundSize(size, &size);
   if (status != ZX_OK) {
     return status;
   }

   fbl::AllocChecker ac;
   auto state = fbl::AdoptRef<VmHierarchyState>(new (&ac) VmHierarchyState);
   if (!ac.check()) {
     return ZX_ERR_NO_MEMORY;
   }

   fbl::RefPtr<VmCowPages> cow_pages;
   status = VmCowPages::CreateExternal(ktl::move(src), state, size, &cow_pages);
   if (status != ZX_OK) {
     return status;
   }

   auto vmo = fbl::AdoptRef<VmObjectPaged>(
       new (&ac) VmObjectPaged(options, ktl::move(state), ktl::move(cow_pages)));
   if (!ac.check()) {
     return ZX_ERR_NO_MEMORY;
   }

   *obj = ktl::move(vmo);

   return ZX_OK;
 }

 void VmObjectPaged::InsertHiddenParentLocked(fbl::RefPtr<VmObjectPaged>&& hidden_parent) {
   AssertHeld(hidden_parent->lock_);
   // Insert the new VmObject |hidden_parent| between between |this| and |parent_|.
   cow_pages_locked()->InsertHiddenParentLocked(hidden_parent->cow_pages_);
   if (parent_) {
     AssertHeld(parent_->lock_ref());
     hidden_parent->InitializeOriginalParentLocked(parent_, 0);
     parent_->ReplaceChildLocked(this, hidden_parent.get());
   } else {
     // The |hidden_parent| is the now the root of this vmo hierarchy. Move the
     // |hierarchy_generation_count_| into the |hidden_parent|.
     hidden_parent->hierarchy_generation_count_ = hierarchy_generation_count_;
     hierarchy_generation_count_ = kGenerationCountInitial;
   }
   hidden_parent->AddChildLocked(this);
   parent_ = hidden_parent;

   // We use the user_id to walk the tree looking for the right child observer. This
   // is set after adding the hidden parent into the tree since that's not really
   // a 'real' child.
   hidden_parent->user_id_ = user_id_;
 }

 zx_status_t VmObjectPaged::CreateChildSlice(uint64_t offset, uint64_t size, bool copy_name,
                                             fbl::RefPtr<VmObject>* child_vmo) {
   LTRACEF("vmo %p offset %#" PRIx64 " size %#" PRIx64 "\n", this, offset, size);

   canary_.Assert();

   // Offset must be page aligned.
   if (!IS_PAGE_ALIGNED(offset)) {
     return ZX_ERR_INVALID_ARGS;
   }

   // Make sure size is page aligned.
   zx_status_t status = RoundSize(size, &size);
   if (status != ZX_OK) {
     return status;
   }

   // Slice must be wholly contained. |size()| will read the size holding the lock. This is extra
   // acquisition is correct as we must drop the lock in order to perform the allocations.
   uint64_t our_size = this->size();
   if (!InRange(offset, size, our_size)) {
     return ZX_ERR_INVALID_ARGS;
   }

   // Forbid creating children of resizable VMOs. This restriction may be lifted in the future.
   if (is_resizable()) {
     return ZX_ERR_NOT_SUPPORTED;
   }

   uint32_t options = kSlice;
   if (is_contiguous()) {
     options |= kContiguous;
   }

   // There are two reasons for declaring/allocating the clones outside of the vmo's lock. First,
   // the dtor might require taking the lock, so we need to ensure that it isn't called until
   // after the lock is released. Second, diagnostics code makes calls into vmos while holding
   // the global vmo lock. Since the VmObject ctor takes the global lock, we can't construct
   // any vmos under any vmo lock.
   fbl::RefPtr<VmCowPages> cow_pages;
   status = cow_pages_->CreateChildSlice(offset, size, &cow_pages);
   if (status != ZX_OK) {
     return status;
   }

   fbl::AllocChecker ac;
   auto vmo = fbl::AdoptRef<VmObjectPaged>(
       new (&ac) VmObjectPaged(options, hierarchy_state_ptr_, ktl::move(cow_pages)));
   if (!ac.check()) {
     return ZX_ERR_NO_MEMORY;
   }

   bool notify_one_child;
   {
     Guard<Mutex> guard{&lock_};
     AssertHeld(vmo->lock_);

     // If this VMO is contiguous then we allow creating an uncached slice as we will never
     // have to perform zeroing of pages. Pages will never be zeroed since contiguous VMOs have
     // all of their pages allocated (and so COW of the zero page will never happen). The VMO is
     // also not allowed to be resizable and so will never have to allocate new pages (and zero
     // them).
     if (cache_policy_ != ARCH_MMU_FLAG_CACHED && !is_contiguous()) {
       return ZX_ERR_BAD_STATE;
     }
     vmo->cache_policy_ = cache_policy_;

     // Initialize the parents for both parallel hierarchies.
     vmo->InitializeOriginalParentLocked(fbl::RefPtr(this), offset);
     vmo->cow_pages_locked()->InitializeOriginalParentLocked(cow_pages_, offset);

     // add the new vmo as a child before we do anything, since its
     // dtor expects to find it in its parent's child list
     notify_one_child = AddChildLocked(vmo.get());
     cow_pages_locked()->AddChildLocked(vmo->cow_pages_.get());

     if (copy_name) {
       vmo->name_ = name_;
     }
     IncrementHierarchyGenerationCountLocked();
   }

   if (notify_one_child) {
     NotifyOneChild();
   }

   *child_vmo = ktl::move(vmo);

   return ZX_OK;
 }

 zx_status_t VmObjectPaged::CreateClone(Resizability resizable, CloneType type, uint64_t offset,
                                        uint64_t size, bool copy_name,
                                        fbl::RefPtr<VmObject>* child_vmo) {
   LTRACEF("vmo %p offset %#" PRIx64 " size %#" PRIx64 "\n", this, offset, size);

   canary_.Assert();

   // Copy-on-write clones of contiguous VMOs do not have meaningful semantics, so forbid them.
   if (is_contiguous()) {
     return ZX_ERR_INVALID_ARGS;
   }

   // offset must be page aligned
   if (!IS_PAGE_ALIGNED(offset)) {
     return ZX_ERR_INVALID_ARGS;
   }

   // make sure size is page aligned
   zx_status_t status = RoundSize(size, &size);
   if (status != ZX_OK) {
     return status;
   }

   auto options = resizable == Resizability::Resizable ? kResizable : 0u;
   // There are two reasons for declaring/allocating the clones outside of the vmo's lock. First,
   // the dtor might require taking the lock, so we need to ensure that it isn't called until
   // after the lock is released. Second, diagnostics code makes calls into vmos while holding
   // the global vmo lock. Since the VmObject ctor takes the global lock, we can't construct
   // any vmos under any vmo lock.
   fbl::RefPtr<VmCowPages> cow_pages;
   status = cow_pages_->CreateClone(offset, size, &cow_pages);
   if (status != ZX_OK) {
     return status;
   }

   fbl::AllocChecker ac;
   auto vmo = fbl::AdoptRef<VmObjectPaged>(
       new (&ac) VmObjectPaged(options, hierarchy_state_ptr_, ktl::move(cow_pages)));
   if (!ac.check()) {
     return ZX_ERR_NO_MEMORY;
   }

   fbl::RefPtr<VmObjectPaged> hidden_parent;
   // Optimistically create the hidden parent early as we want to do it outside the lock, but we
   // need to hold the lock to validate invariants.
   if (type == CloneType::Snapshot) {
     // The initial size is 0. It will be initialized as part of the atomic
     // insertion into the child tree.
     status = cow_pages_->CreateHidden(&cow_pages);
     if (status != ZX_OK) {
       return status;
     }
     hidden_parent = fbl::AdoptRef<VmObjectPaged>(
         new (&ac) VmObjectPaged(kHidden, hierarchy_state_ptr_, ktl::move(cow_pages)));
     if (!ac.check()) {
       return ZX_ERR_NO_MEMORY;
     }
   }

   bool notify_one_child;
   {
     Guard<Mutex> guard{&lock_};
     AssertHeld(vmo->lock_);
     switch (type) {
       case CloneType::Snapshot: {
         // To create an eager copy-on-write clone, the kernel creates an artifical parent vmo
         // called a 'hidden vmo'. The content of the original vmo is moved into the hidden
         // vmo, and the original vmo becomes a child of the hidden vmo. Then a second child
         // is created, which is the userspace visible clone.
         //
         // Hidden vmos are an implementation detail that are not exposed to userspace.

         if (!cow_pages_locked()->IsCowClonableLocked()) {
           return ZX_ERR_NOT_SUPPORTED;
         }

         // If this is non-zero, that means that there are pages which hardware can
         // touch, so the vmo can't be safely cloned.
         // TODO: consider immediately forking these pages.
         if (cow_pages_locked()->pinned_page_count_locked()) {
           return ZX_ERR_BAD_STATE;
         }

         break;
       }
       case CloneType::PrivatePagerCopy:
         if (!cow_pages_locked()->GetRootPageSourceLocked()) {
           return ZX_ERR_NOT_SUPPORTED;
         }
         break;
     }

     // check that we're not uncached in some way
     if (cache_policy_ != ARCH_MMU_FLAG_CACHED) {
       return ZX_ERR_BAD_STATE;
     }

     VmObjectPaged* clone_parent;
     if (type == CloneType::Snapshot) {
       clone_parent = hidden_parent.get();

       InsertHiddenParentLocked(ktl::move(hidden_parent));

       // Invalidate everything the clone will be able to see. They're COW pages now,
       // so any existing mappings can no longer directly write to the pages.
       // This should be being done by VmCowPages, but as we are temporarily responsible for
       // construction of the hierarchy it's easier for us to do it for the moment.
       cow_pages_locked()->RangeChangeUpdateLocked(offset, size, RangeChangeOp::RemoveWrite);
     } else {
       clone_parent = this;
     }
     AssertHeld(clone_parent->lock_);

     // Initialize the parents for both parallel hierarchies.
     vmo->InitializeOriginalParentLocked(fbl::RefPtr(clone_parent), offset);
     vmo->cow_pages_locked()->InitializeOriginalParentLocked(clone_parent->cow_pages_, offset);

     // add the new vmo as a child before we do anything, since its
     // dtor expects to find it in its parent's child list
     notify_one_child = clone_parent->AddChildLocked(vmo.get());
     clone_parent->cow_pages_locked()->AddChildLocked(vmo->cow_pages_.get());

     if (copy_name) {
       vmo->name_ = name_;
     }
     IncrementHierarchyGenerationCountLocked();
   }

   if (notify_one_child) {
     NotifyOneChild();
   }

   *child_vmo = ktl::move(vmo);

   return ZX_OK;
 }

 bool VmObjectPaged::OnChildAddedLocked() {
   if (!is_hidden()) {
     return VmObject::OnChildAddedLocked();
   }

   if (user_id_ == ZX_KOID_INVALID) {
     // The original vmo is added as a child of the hidden vmo before setting
     // the user id to prevent counting as its own child.
     return false;
   }

   // After initialization, hidden vmos always have two children - the vmo on which
   // zx_vmo_create_child was invoked and the vmo which that syscall created.
   DEBUG_ASSERT(children_list_len_ == 2);

   // Reaching into the children confuses analysis
   for (auto& c : children_list_) {
     DEBUG_ASSERT(c.is_paged());
     VmObjectPaged& child = static_cast<VmObjectPaged&>(c);
     AssertHeld(child.lock_);
     if (child.user_id_ == user_id_) {
       return child.OnChildAddedLocked();
     }
   }

   // One of the children should always have a matching user_id.
   panic("no child with matching user_id: %" PRIx64 "\n", user_id_);
 }

 void VmObjectPaged::RemoveChild(VmObject* removed, Guard<Mutex>&& adopt) {
   DEBUG_ASSERT(adopt.wraps_lock(lock_ref().lock()));

   // This is scoped before guard to ensure the guard is dropped first, see comment where child_ref
   // is assigned for more details.
   fbl::RefPtr<VmObject> child_ref;

   Guard<Mutex> guard{AdoptLock, ktl::move(adopt)};

   IncrementHierarchyGenerationCountLocked();

   // Remove the child in our parallel hierarchy, resulting in any necessary merging with the
   // hidden parent to happen.
   cow_pages_locked()->RemoveChildLocked(static_cast<VmObjectPaged*>(removed)->cow_pages_.get());

   if (!is_hidden()) {
     VmObject::RemoveChild(removed, guard.take());
     return;
   }

   // Hidden vmos always have 0 or 2 children, but we can't be here with 0 children.
   DEBUG_ASSERT(children_list_len_ == 2);
   // A hidden vmo must be fully initialized to have 2 children.
   DEBUG_ASSERT(user_id_ != ZX_KOID_INVALID);

   DropChildLocked(removed);

   VmObject* child = &children_list_.front();
   DEBUG_ASSERT(child);

   // Attempt to upgrade our raw pointer to a ref ptr. This upgrade can fail in the scenario that
   // the childs refcount has dropped to zero and is also attempting to delete itself. If this
   // happens, as we hold the vmo lock we know our child cannot complete its destructor, and so we
   // can still modify pieces of it until we drop the lock. It is now possible that after we upgrade
   // we become the sole holder of a refptr, and the refptr *must* be destroyed after we release the
   // VMO lock to prevent a deadlock.
   child_ref = fbl::MakeRefPtrUpgradeFromRaw(child, guard);

   // Our children must be paged.
   DEBUG_ASSERT(child->is_paged());
   VmObjectPaged* typed_child = static_cast<VmObjectPaged*>(child);
   AssertHeld(typed_child->lock_);

   // Merging of most information happened when we asked our cow_pages parallel node to remove itself
   // as a child. The only item we are responsible for merging is the hierarchy_generation_count.
   if (!parent_) {
     typed_child->hierarchy_generation_count_ = hierarchy_generation_count_;
   }

   // The child which removed itself and led to the invocation should have a reference
   // to us, in addition to child.parent_ which we are about to clear.
   DEBUG_ASSERT(ref_count_debug() >= 2);

   // Drop the child from our list, but don't recurse back into this function. Then
   // remove ourselves from the clone tree.
   DropChildLocked(typed_child);
   if (parent_) {
     AssertHeld(parent_->lock_ref());
     parent_->ReplaceChildLocked(this, typed_child);
   }
   typed_child->parent_ = ktl::move(parent_);

   // To use child here  we need to ensure that it will live long enough. Up until here even if child
   // was waiting to be destroyed, we knew it would stay alive as long as we held the lock. Since we
   // give away the guard in the call to OnUserChildRemoved, we can only perform the call if we can
   // separately guarantee the child stays alive by having a refptr to it.
   // In the scenario where the refptr does not exist, that means the upgrade failed and there is no
   // user object to signal anyway.
   if (child_ref) {
     // We need to proxy the closure down to the original user-visible vmo. To find
     // that, we can walk down the clone tree following the user_id_.
     VmObjectPaged* descendant = typed_child;
     AssertHeld(descendant->lock_);
     while (descendant && descendant->user_id_ == user_id_) {
       if (!descendant->is_hidden()) {
         descendant->OnUserChildRemoved(guard.take());
         return;
       }
       VmObjectPaged* left = static_cast<VmObjectPaged*>(&descendant->children_list_.front());
       VmObjectPaged* right = static_cast<VmObjectPaged*>(&descendant->children_list_.back());
       AssertHeld(left->lock_ref());
       AssertHeld(right->lock_ref());
       if (left->user_id_locked() == user_id_) {
         descendant = left;
       } else if (right->user_id_locked() == user_id_) {
         descendant = right;
       } else {
         descendant = nullptr;
       }
     }
   }
 }

 void VmObjectPaged::DumpLocked(uint depth, bool verbose) const {
   canary_.Assert();

   uint64_t parent_id = original_parent_user_id_;

   for (uint i = 0; i < depth; ++i) {
     printf("  ");
   }
   printf("vmo %p/k%" PRIu64 " ref %d parent %p/k%" PRIu64 "\n", this, user_id_, ref_count_debug(),
          parent_.get(), parent_id);

   char name[ZX_MAX_NAME_LEN];
   get_name(name, sizeof(name));
   if (strlen(name) > 0) {
     for (uint i = 0; i < depth + 1; ++i) {
       printf("  ");
     }
     printf("name %s\n", name);
   }

   cow_pages_locked()->DumpLocked(depth, verbose);
 }

 void VmObjectPaged::IncrementHierarchyGenerationCountLocked() {
   auto vmo = this;
   AssertHeld(vmo->lock_);
   while (vmo->parent_) {
     DEBUG_ASSERT(vmo->hierarchy_generation_count_ == kGenerationCountInitial);
     vmo = vmo->parent_.get();
     DEBUG_ASSERT(vmo);
   }

   if (vmo->hierarchy_generation_count_ == UINT32_MAX) {
     // Wrap around to the initial value in case of overflow.
     vmo->hierarchy_generation_count_ = kGenerationCountInitial;
     // Invalidate the cached page attribution value (resetting its generation count to
     // |kGenerationCountUnset|), thereby forcing a recompute on the next
     // |AttributedPagesInRangeLocked()| call. This handles the corner case where the cached
     // |generation_count| was |kGenerationCountInitial|, resulting in a false cache hit.
     cached_page_attribution_ = {};
   } else {
     ++vmo->hierarchy_generation_count_;
   }
   DEBUG_ASSERT(vmo->hierarchy_generation_count_ != kGenerationCountUnset);
 }

 uint32_t VmObjectPaged::GetHierarchyGenerationCountLocked() const {
   auto vmo = this;
   AssertHeld(vmo->lock_);
   while (vmo->parent_) {
     DEBUG_ASSERT(vmo->hierarchy_generation_count_ == kGenerationCountInitial);
     vmo = vmo->parent_.get();
     DEBUG_ASSERT(vmo);
   }
   DEBUG_ASSERT(vmo->hierarchy_generation_count_ != kGenerationCountUnset);
   return vmo->hierarchy_generation_count_;
 }

 size_t VmObjectPaged::AttributedPagesInRangeLocked(uint64_t offset, uint64_t len) const {
   if (is_hidden()) {
     return 0;
   }

   uint64_t new_len;
   if (!TrimRange(offset, len, size_locked(), &new_len)) {
     return 0;
   }

   vmo_attribution_queries_all.Add(1);

   uint32_t gen_count;
   bool update_cached_attribution = false;
   // Use cached value if generation count has not changed since the last time we attributed pages.
   // Only applicable for attribution over the entire VMO, not a partial range.
   if (offset == 0 && new_len == size_locked()) {
     vmo_attribution_queries_entire_object.Add(1);
     gen_count = GetHierarchyGenerationCountLocked();

     if (cached_page_attribution_.generation_count == gen_count) {
       vmo_attribution_cache_hits.Add(1);
       return cached_page_attribution_.page_count;
     } else {
       vmo_attribution_cache_misses.Add(1);
       update_cached_attribution = true;
     }
   }

   size_t page_count = cow_pages_locked()->AttributedPagesInRangeLocked(offset, new_len);

   if (update_cached_attribution) {
     // Cache attributed page count along with current generation count.
     DEBUG_ASSERT(cached_page_attribution_.generation_count != gen_count);
     cached_page_attribution_.generation_count = gen_count;
     cached_page_attribution_.page_count = page_count;
   }

   return page_count;
 }

 zx_status_t VmObjectPaged::CommitRangeInternal(uint64_t offset, uint64_t len, bool pin,
                                                Guard<Mutex>&& adopt) {
   canary_.Assert();
   LTRACEF("offset %#" PRIx64 ", len %#" PRIx64 "\n", offset, len);

   DEBUG_ASSERT(adopt.wraps_lock(lock_ref().lock()));
   Guard<Mutex> guard{AdoptLock, ktl::move(adopt)};
   // Convince the static analysis that we now do actually hold lock_.
   AssertHeld(lock_);

   // Child slices of VMOs are currently not resizable, nor can they be made
   // from resizable parents.  If this ever changes, the logic surrounding what
   // to do if a VMO gets resized during a Commit or Pin operation will need to
   // be revisited.  Right now, we can just rely on the fact that the initial
   // vetting/trimming of the offset and length of the operation will never
   // change if the operation is being executed against a child slice.
   DEBUG_ASSERT(!is_resizable() || !is_slice());

   return cow_pages_locked()->CommitRange(offset, len, pin, ktl::move(guard));
 }

 zx_status_t VmObjectPaged::DecommitRange(uint64_t offset, uint64_t len) {
   canary_.Assert();
   LTRACEF("offset %#" PRIx64 ", len %#" PRIx64 "\n", offset, len);
   if (is_contiguous()) {
     return ZX_ERR_NOT_SUPPORTED;
   }
   Guard<Mutex> guard{&lock_};
   return DecommitRangeLocked(offset, len);
 }

 zx_status_t VmObjectPaged::DecommitRangeLocked(uint64_t offset, uint64_t len) {
   canary_.Assert();

   zx_status_t status = cow_pages_locked()->DecommitRangeLocked(offset, len);
   if (status == ZX_OK) {
     IncrementHierarchyGenerationCountLocked();
   }
   return status;
 }

 zx_status_t VmObjectPaged::ZeroPartialPage(uint64_t page_base_offset, uint64_t zero_start_offset,
                                            uint64_t zero_end_offset, Guard<Mutex>* guard) {
   DEBUG_ASSERT(zero_start_offset <= zero_end_offset);
   DEBUG_ASSERT(zero_end_offset <= PAGE_SIZE);
   DEBUG_ASSERT(IS_PAGE_ALIGNED(page_base_offset));
   DEBUG_ASSERT(page_base_offset < size_locked());

   // TODO: Consider replacing this with a more appropriate generic API when one is available.
   if (cow_pages_locked()->PageWouldReadZeroLocked(page_base_offset)) {
     // This is already considered zero so no need to redundantly zero again.
     return ZX_OK;
   }

   // Need to actually zero out bytes in the page.
   return ReadWriteInternalLocked(
       page_base_offset + zero_start_offset, zero_end_offset - zero_start_offset, true,
       [](void* dst, size_t offset, size_t len, Guard<Mutex>* guard) -> zx_status_t {
         // We're memsetting the *kernel* address of an allocated page, so we know that this
         // cannot fault. memset may not be the most efficient, but we don't expect to be doing
         // this very often.
         memset(dst, 0, len);
         return ZX_OK;
       },
       guard);
 }

 zx_status_t VmObjectPaged::ZeroRange(uint64_t offset, uint64_t len) {
   canary_.Assert();
   Guard<Mutex> guard{&lock_};

   // Zeroing a range behaves as if it were an efficient zx_vmo_write. As we cannot write to uncached
   // vmo, we also cannot zero an uncahced vmo.
   if (cache_policy_ != ARCH_MMU_FLAG_CACHED) {
     return ZX_ERR_BAD_STATE;
   }

   // Trim the size and validate it is in range of the vmo.
   uint64_t new_len;
   if (!TrimRange(offset, len, size_locked(), &new_len)) {
     return ZX_ERR_OUT_OF_RANGE;
   }

   // Construct our initial range. Already checked the range above so we know it cannot overflow.
   uint64_t start = offset;
   uint64_t end = start + new_len;

   // Helper that checks and establishes our invariants. We use this after calling functions that
   // may have temporarily released the lock.
   auto establish_invariants = [this, end]() TA_REQ(lock_) {
     if (end > size_locked()) {
       return ZX_ERR_BAD_STATE;
     }
     if (cache_policy_ != ARCH_MMU_FLAG_CACHED) {
       return ZX_ERR_BAD_STATE;
     }
     return ZX_OK;
   };

   uint64_t start_page_base = ROUNDDOWN(start, PAGE_SIZE);
   uint64_t end_page_base = ROUNDDOWN(end, PAGE_SIZE);

   if (unlikely(start_page_base != start)) {
     // Need to handle the case were end is unaligned and on the same page as start
     if (unlikely(start_page_base == end_page_base)) {
       return ZeroPartialPage(start_page_base, start - start_page_base, end - start_page_base,
                              &guard);
     }
     zx_status_t status =
         ZeroPartialPage(start_page_base, start - start_page_base, PAGE_SIZE, &guard);
     if (status == ZX_OK) {
       status = establish_invariants();
     }
     if (status != ZX_OK) {
       return status;
     }
     start = start_page_base + PAGE_SIZE;
   }

   if (unlikely(end_page_base != end)) {
     zx_status_t status = ZeroPartialPage(end_page_base, 0, end - end_page_base, &guard);
     if (status == ZX_OK) {
       status = establish_invariants();
     }
     if (status != ZX_OK) {
       return status;
     }
     end = end_page_base;
   }

   // Now that we have a page aligned range we can try hand over to the cow pages zero method.
   // Always increment the gen count as it's possible for ZeroPagesLocked to fail part way through
   // and it doesn't unroll its actions.
   IncrementHierarchyGenerationCountLocked();

   return cow_pages_locked()->ZeroPagesLocked(start, end);
 }

 zx_status_t VmObjectPaged::Resize(uint64_t s) {
   canary_.Assert();

   LTRACEF("vmo %p, size %" PRIu64 "\n", this, s);

   if (!is_resizable()) {
     return ZX_ERR_UNAVAILABLE;
   }

   // round up the size to the next page size boundary and make sure we don't wrap
   zx_status_t status = RoundSize(s, &s);
   if (status != ZX_OK) {
     return status;
   }

   Guard<Mutex> guard{&lock_};

   status = cow_pages_locked()->ResizeLocked(s);
   if (status != ZX_OK) {
     return status;
   }
   IncrementHierarchyGenerationCountLocked();
   return ZX_OK;
 }

 // perform some sort of copy in/out on a range of the object using a passed in lambda
 // for the copy routine. The copy routine has the expected type signature of:
 // (uint64_t src_offset, uint64_t dest_offset, bool write, Guard<Mutex> *guard) -> zx_status_t
 // The passed in guard may have its CallUnlocked member used, but if it does then ZX_OK must not be
 // the return value. A return of ZX_ERR_SHOULD_WAIT implies that the attempted copy should be tried
 // again at the exact same offsets.
 template <typename T>
 zx_status_t VmObjectPaged::ReadWriteInternalLocked(uint64_t offset, size_t len, bool write,
                                                    T copyfunc, Guard<Mutex>* guard) {
   canary_.Assert();

   uint64_t end_offset;
   if (add_overflow(offset, len, &end_offset)) {
     return ZX_ERR_OUT_OF_RANGE;
   }

   // Declare a lambda that will check any object properties we require to be true. We place these
   // in a lambda so that we can perform them any time the lock is dropped.
   auto check = [this, &end_offset]() -> zx_status_t {
     AssertHeld(lock_);
     if (cache_policy_ != ARCH_MMU_FLAG_CACHED) {
       return ZX_ERR_BAD_STATE;
     }
     if (end_offset > size_locked()) {
       return ZX_ERR_OUT_OF_RANGE;
     }
     return ZX_OK;
   };

   // Perform initial check.
   if (zx_status_t status = check(); status != ZX_OK) {
     return status;
   }

   // Track our two offsets.
   uint64_t src_offset = offset;
   size_t dest_offset = 0;
   while (len > 0) {
     const size_t page_offset = src_offset % PAGE_SIZE;
     const size_t tocopy = ktl::min(PAGE_SIZE - page_offset, len);

     // fault in the page
     PageRequest page_request;
     paddr_t pa;
     zx_status_t status =
         GetPageLocked(src_offset, VMM_PF_FLAG_SW_FAULT | (write ? VMM_PF_FLAG_WRITE : 0), nullptr,
                       &page_request, nullptr, &pa);
     if (status == ZX_ERR_SHOULD_WAIT) {
       // Must block on asynchronous page requests whilst not holding the lock.
       guard->CallUnlocked([&status, &page_request]() { status = page_request.Wait(); });
       if (status != ZX_OK) {
         if (status == ZX_ERR_TIMED_OUT) {
           DumpLocked(0, false);
         }
         return status;
       }
       // Recheck properties and if all is good go back to the top of the loop to attempt to fault in
       // the page again.
       status = check();
       if (status == ZX_OK) {
         continue;
       }
     }
     if (status != ZX_OK) {
       return status;
     }
     // Compute the kernel mapping of this page.
     char* page_ptr = reinterpret_cast<char*>(paddr_to_physmap(pa));

     // Call the copy routine. If the copy was successful then ZX_OK is returned, otherwise
     // ZX_ERR_SHOULD_WAIT may be returned to indicate the copy failed but we can retry it.
     status = copyfunc(page_ptr + page_offset, dest_offset, tocopy, guard);

     if (status == ZX_ERR_SHOULD_WAIT) {
       // Recheck properties. If all is good we cannot simply retry the copy as the underlying page
       // could have changed, so we retry the loop from the top.
       status = check();
       if (status == ZX_OK) {
         continue;
       }
     }
     if (status != ZX_OK) {
       return status;
     }

     // Advance the copy location.
     src_offset += tocopy;
     dest_offset += tocopy;
     len -= tocopy;
   }

   return ZX_OK;
 }

 zx_status_t VmObjectPaged::Read(void* _ptr, uint64_t offset, size_t len) {
   canary_.Assert();
   // test to make sure this is a kernel pointer
   if (!is_kernel_address(reinterpret_cast<vaddr_t>(_ptr))) {
     DEBUG_ASSERT_MSG(0, "non kernel pointer passed\n");
     return ZX_ERR_INVALID_ARGS;
   }

   // read routine that just uses a memcpy
   char* ptr = reinterpret_cast<char*>(_ptr);
   auto read_routine = [ptr](const void* src, size_t offset, size_t len,
                             Guard<Mutex>* guard) -> zx_status_t {
     memcpy(ptr + offset, src, len);
     return ZX_OK;
   };

   Guard<Mutex> guard{&lock_};

   return ReadWriteInternalLocked(offset, len, false, read_routine, &guard);
 }

 zx_status_t VmObjectPaged::Write(const void* _ptr, uint64_t offset, size_t len) {
   canary_.Assert();
   // test to make sure this is a kernel pointer
   if (!is_kernel_address(reinterpret_cast<vaddr_t>(_ptr))) {
     DEBUG_ASSERT_MSG(0, "non kernel pointer passed\n");
     return ZX_ERR_INVALID_ARGS;
   }

   // write routine that just uses a memcpy
   const char* ptr = reinterpret_cast<const char*>(_ptr);
   auto write_routine = [ptr](void* dst, size_t offset, size_t len,
                              Guard<Mutex>* guard) -> zx_status_t {
     memcpy(dst, ptr + offset, len);
     return ZX_OK;
   };

   Guard<Mutex> guard{&lock_};

   return ReadWriteInternalLocked(offset, len, true, write_routine, &guard);
 }

 zx_status_t VmObjectPaged::Lookup(uint64_t offset, uint64_t len, vmo_lookup_fn_t lookup_fn,
                                   void* context) {
   canary_.Assert();
   if (unlikely(len == 0)) {
     return ZX_ERR_INVALID_ARGS;
   }

   Guard<Mutex> guard{&lock_};

   return cow_pages_locked()->LookupLocked(offset, len, lookup_fn, context);
 }

 zx_status_t VmObjectPaged::LookupContiguous(uint64_t offset, uint64_t len, paddr_t* out_paddr) {
   canary_.Assert();

   if (unlikely(len == 0 || !IS_PAGE_ALIGNED(offset))) {
     return ZX_ERR_INVALID_ARGS;
   }

   Guard<Mutex> guard{&lock_};

   if (unlikely(!InRange(offset, len, size_locked()))) {
     return ZX_ERR_OUT_OF_RANGE;
   }

   if (unlikely(is_contiguous())) {
     // Already checked that the entire requested range is valid, and since we know all our pages are
     // contiguous we can simply lookup one page.
     len = PAGE_SIZE;
   } else if (unlikely(len != PAGE_SIZE)) {
     // Multi-page lookup only supported for contiguous VMOs.
     return ZX_ERR_BAD_STATE;
   }

   // Lookup the one page / first page of contiguous VMOs.
   return cow_pages_locked()->LookupLocked(
       offset, len,
       [](void* arg, uint64_t offset, size_t index, paddr_t pa) {
         if (arg) {
           *static_cast<paddr_t*>(arg) = pa;
         }
         return ZX_OK;
       },
       out_paddr);
 }

 zx_status_t VmObjectPaged::ReadUser(VmAspace* current_aspace, user_out_ptr<char> ptr,
                                     uint64_t offset, size_t len) {
   canary_.Assert();

   // read routine that uses copy_to_user
   auto read_routine = [ptr, current_aspace](const char* src, size_t offset, size_t len,
                                             Guard<Mutex>* guard) -> zx_status_t {
     auto copy_result = ptr.byte_offset(offset).copy_array_to_user_capture_faults(src, len);

     // If a fault has actually occurred, then we will have captured fault info that we can use to
     // handle the fault.
     if (copy_result.fault_info.has_value()) {
       zx_status_t result;
       guard->CallUnlocked([&info = *copy_result.fault_info, &result, current_aspace] {
         result = current_aspace->SoftFault(info.pf_va, info.pf_flags);
       });
       // If we handled the fault, tell the upper level to try again.
       return result == ZX_OK ? ZX_ERR_SHOULD_WAIT : result;
     }

     // If we encounter _any_ unrecoverable error from the copy operation which
     // produced no fault address, squash the error down to just "NOT_FOUND".
     // This is what the SoftFault error would have told us if we did try to
     // handle the fault and could not.
     return copy_result.status == ZX_OK ? ZX_OK : ZX_ERR_NOT_FOUND;
   };

   Guard<Mutex> guard{&lock_};

   return ReadWriteInternalLocked(offset, len, false, read_routine, &guard);
 }

 zx_status_t VmObjectPaged::WriteUser(VmAspace* current_aspace, user_in_ptr<const char> ptr,
                                      uint64_t offset, size_t len) {
   canary_.Assert();

   // write routine that uses copy_from_user
   auto write_routine = [ptr, &current_aspace](char* dst, size_t offset, size_t len,
                                               Guard<Mutex>* guard) -> zx_status_t {
     auto copy_result = ptr.byte_offset(offset).copy_array_from_user_capture_faults(dst, len);

     // If a fault has actually occurred, then we will have captured fault info that we can use to
     // handle the fault.
     if (copy_result.fault_info.has_value()) {
       zx_status_t result;
       guard->CallUnlocked([&info = *copy_result.fault_info, &result, current_aspace] {
         result = current_aspace->SoftFault(info.pf_va, info.pf_flags);
       });
       // If we handled the fault, tell the upper level to try again.
       return result == ZX_OK ? ZX_ERR_SHOULD_WAIT : result;
     }

     // If we encounter _any_ unrecoverable error from the copy operation which
     // produced no fault address, squash the error down to just "NOT_FOUND".
     // This is what the SoftFault error would have told us if we did try to
     // handle the fault and could not.
     return copy_result.status == ZX_OK ? ZX_OK : ZX_ERR_NOT_FOUND;
   };

   Guard<Mutex> guard{&lock_};

   return ReadWriteInternalLocked(offset, len, true, write_routine, &guard);
 }

 zx_status_t VmObjectPaged::TakePages(uint64_t offset, uint64_t len, VmPageSpliceList* pages) {
   canary_.Assert();

   Guard<Mutex> src_guard{&lock_};

   // This is only used by the userpager API, which has significant restrictions on
   // what sorts of vmos are acceptable. If splice starts being used in more places,
   // then this restriction might need to be lifted.
   // TODO: Check that the region is locked once locking is implemented
   if (mapping_list_len_ || children_list_len_) {
     return ZX_ERR_BAD_STATE;
   }
   zx_status_t status = cow_pages_locked()->TakePagesLocked(offset, len, pages);

   if (status == ZX_OK) {
     IncrementHierarchyGenerationCountLocked();
   }
   return status;
 }

 zx_status_t VmObjectPaged::SupplyPages(uint64_t offset, uint64_t len, VmPageSpliceList* pages) {
   canary_.Assert();

   Guard<Mutex> guard{&lock_};

   // It is possible that supply pages fails and we increment the gen count needlessly, but the user
   // is certainly expecting it to succeed.
   IncrementHierarchyGenerationCountLocked();

   return cow_pages_locked()->SupplyPagesLocked(offset, len, pages);
 }

 zx_status_t VmObjectPaged::SetMappingCachePolicy(const uint32_t cache_policy) {
   // Is it a valid cache flag?
   if (cache_policy & ~ZX_CACHE_POLICY_MASK) {
     return ZX_ERR_INVALID_ARGS;
   }

   Guard<Mutex> guard{&lock_};

   // conditions for allowing the cache policy to be set:
   // 1) vmo either has no pages committed currently or is transitioning from being cached
   // 2) vmo has no pinned pages
   // 3) vmo has no mappings
   // 4) vmo has no children
   // 5) vmo is not a child
   // Counting attributed pages does a sufficient job of checking for committed pages since we also
   // require no children and no parent, so attribution == precisely our pages.
   if (cow_pages_locked()->AttributedPagesInRangeLocked(0, size_locked()) != 0 &&
       cache_policy_ != ARCH_MMU_FLAG_CACHED) {
     // We forbid to transitioning committed pages from any kind of uncached->cached policy as we do
     // not currently have a story for dealing with the speculative loads that may have happened
     // against the cached physmap. That is, whilst a page was uncached the cached physmap version
     // may have been loaded and sitting in cache. If we switch to cached mappings we may then use
     // stale data out of the cache.
     // This isn't a problem if going *from* an cached state, as we can safely clean+invalidate.
     // Similarly it's not a problem if there aren't actually any committed pages.
     return ZX_ERR_BAD_STATE;
   }
   // If we are contiguous we 'pre pinned' all the pages, but this doesn't count for pinning as far
   // as the user and potential DMA is concerned. Take this into account when checking if the user
   // pinned any pages.
   uint64_t expected_pin_count = (is_contiguous() ? (size_locked() / PAGE_SIZE) : 0);
   if (cow_pages_locked()->pinned_page_count_locked() > expected_pin_count) {
     return ZX_ERR_BAD_STATE;
   }
   if (!mapping_list_.is_empty()) {
     return ZX_ERR_BAD_STATE;
   }
   if (!children_list_.is_empty()) {
     return ZX_ERR_BAD_STATE;
   }
   if (parent_) {
     return ZX_ERR_BAD_STATE;
   }

   // If transitioning from a cached policy we must clean/invalidate all the pages as the kernel may
   // have written to them on behalf of the user.
   // TODO: To avoid iterating the whole offset range VmCowPages needs an appropriate interface for
   // iterating on committed pages. Lookup is presently unsuitable as it performs a lookup for read,
   // which could return pages in our parent, instead of a lookup for write.
   if (cache_policy_ == ARCH_MMU_FLAG_CACHED && cache_policy != ARCH_MMU_FLAG_CACHED) {
     for (uint64_t offset = 0; offset < size_locked(); offset += PAGE_SIZE) {
       paddr_t pa;
       zx_status_t status = GetPageLocked(offset, 0, nullptr, nullptr, nullptr, &pa);
       if (likely(status == ZX_OK)) {
         arch_clean_invalidate_cache_range((vaddr_t)paddr_to_physmap(pa), PAGE_SIZE);
       }
     }
   }

   cache_policy_ = cache_policy;

   return ZX_OK;
 }

 void VmObjectPaged::RangeChangeUpdateLocked(uint64_t offset, uint64_t len, RangeChangeOp op) {
   canary_.Assert();

   // offsets for vmos needn't be aligned, but vmars use aligned offsets
   const uint64_t aligned_offset = ROUNDDOWN(offset, PAGE_SIZE);
   const uint64_t aligned_len = ROUNDUP(offset + len, PAGE_SIZE) - aligned_offset;

   for (auto& m : mapping_list_) {
     AssertHeld(*m.object_lock());
     if (op == RangeChangeOp::Unmap) {
       m.UnmapVmoRangeLocked(aligned_offset, aligned_len);
     } else if (op == RangeChangeOp::RemoveWrite) {
       m.RemoveWriteVmoRangeLocked(aligned_offset, aligned_len);
     } else {
       panic("Unknown RangeChangeOp %d\n", static_cast<int>(op));
     }
   }
 }

 bool VmObjectPaged::EvictPage(vm_page_t* page, uint64_t offset) {
   Guard<Mutex> guard{&lock_};

   bool evicted = cow_pages_locked()->EvictPageLocked(page, offset);

   if (evicted) {
     eviction_event_count_++;
     IncrementHierarchyGenerationCountLocked();
   }
   return evicted;
 }