[kernel][vm] Introduce VmCowPages This creates a parallel hierarchy to VmObjectPaged in VmCowPages. The logic of actually manipulating copy-on-write through the VmPageList is handled by VmCowPages, with VmObjectPaged maintaining responsibility for handling VmMappings and providing the user API. Standalone this commit is a temporary regression in terms of memory use and performance due to the VmObjectPaged still creating needless hidden parents that serve no purpose. The reason to have a temporary regression is to keep this commit as close to "moving code" as possible without there also being additional complex changes hidden inside. This does lay the groundwork for successive commits to perform the 'interesting' transformations to end up in an eventually more efficient hierarchy. Change-Id: I32ee7e8f1a158aa3e8cfa3cdb87435ec129588e9 Reviewed-on: https://fuchsia-review.googlesource.com/c/fuchsia/+/437341 Commit-Queue: Adrian Danis <adanis@google.com> Reviewed-by: Rasha Eqbal <rashaeqbal@google.com> Testability-Review: Rasha Eqbal <rashaeqbal@google.com>

commit: 08ec46b68186331f5275098ff33fca66dc5e1b36 [log] [tgz]
author: Adrian Danis <adanis@google.com> Wed Oct 28 00:58:42 2020 +0000
committer: CQ Bot <commit-bot@chromium.org> Wed Oct 28 00:58:42 2020 +0000
tree: 62d7175044f51e4be7e6c251f3c4e88be1e30a8a
parent: de217a80a0c72506770260d3b165410ea0f5daf7 [diff]
diff --git a/zircon/kernel/vm/BUILD.zircon.gn b/zircon/kernel/vm/BUILD.zircon.gn
index a06969c..cbbeec8 100644
--- a/zircon/kernel/vm/BUILD.zircon.gn
+++ b/zircon/kernel/vm/BUILD.zircon.gn

@@ -26,6 +26,7 @@
     "vm_address_region.cc",
     "vm_address_region_or_mapping.cc",
     "vm_aspace.cc",
+    "vm_cow_pages.cc",
     "vm_mapping.cc",
     "vm_object.cc",
     "vm_object_paged.cc",

diff --git a/zircon/kernel/vm/include/vm/vm_cow_pages.h b/zircon/kernel/vm/include/vm/vm_cow_pages.h
new file mode 100644
index 0000000..23c3a58
--- /dev/null
+++ b/zircon/kernel/vm/include/vm/vm_cow_pages.h

@@ -0,0 +1,485 @@
+// Copyright 2020 The Fuchsia Authors
+//
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT
+
+#ifndef ZIRCON_KERNEL_VM_INCLUDE_VM_VM_COW_PAGES_H_
+#define ZIRCON_KERNEL_VM_INCLUDE_VM_VM_COW_PAGES_H_
+
+#include <assert.h>
+#include <lib/user_copy/user_ptr.h>
+#include <lib/zircon-internal/thread_annotations.h>
+#include <stdint.h>
+#include <zircon/listnode.h>
+#include <zircon/types.h>
+
+#include <fbl/array.h>
+#include <fbl/canary.h>
+#include <fbl/intrusive_double_list.h>
+#include <fbl/macros.h>
+#include <fbl/ref_counted.h>
+#include <fbl/ref_ptr.h>
+#include <kernel/mutex.h>
+#include <vm/page_source.h>
+#include <vm/pmm.h>
+#include <vm/vm.h>
+#include <vm/vm_aspace.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page_list.h>
+
+// Forward declare these so VmCowPages helpers can accept references.
+class BatchPQRemove;
+class VmObjectPaged;
+
+// Implements a copy-on-write hierarchy of pages in a VmPageList.
+// Currently this is treated as a private helper class of VmObjectPaged, with it being responsible
+// for correct usage. Once the hierarchy in VmObjectPaged is changed this class will become more
+// independent and responsible for its own correctness. The specific ways it currently relies on
+// VmObjectPaged are
+//  1. The backlink must be set via set_paged_backlink_locked and be non-null at all times.
+//  2. Setting up the hidden node hierarchy is performed by VmObjectPaged manually doing
+//     ::CreateHidden and inserting the correct children/parents.
+class VmCowPages final : public VmHierarchyBase,
+                         public fbl::ContainableBaseClasses<
+                             fbl::TaggedDoublyLinkedListable<VmCowPages*, internal::ChildListTag>> {
+ public:
+  // All create functions currently are close mirrors of the VmObjectPaged create functions and
+  // exist for VmObjectPaged to create appropriate nodes in the VmCowPages hierarchy.
+  static zx_status_t Create(fbl::RefPtr<VmHierarchyState> root_lock, uint32_t pmm_alloc_flags,
+                            uint64_t size, fbl::RefPtr<VmCowPages>* cow_pages);
+
+  static zx_status_t CreateExternal(fbl::RefPtr<PageSource> src,
+                                    fbl::RefPtr<VmHierarchyState> root_lock, uint64_t size,
+                                    fbl::RefPtr<VmCowPages>* cow_pages);
+
+  zx_status_t CreateHidden(fbl::RefPtr<VmCowPages>* hidden_cow);
+
+  zx_status_t CreateClone(uint64_t offset, uint64_t size, fbl::RefPtr<VmCowPages>* child_cow);
+
+  // Creates a child that looks back to this VmCowPages for all operations. Once a child slice is
+  // created this node should not ever be Resized.
+  zx_status_t CreateChildSlice(uint64_t offset, uint64_t size, fbl::RefPtr<VmCowPages>* cow_slice);
+
+  // Returns the size in bytes of this cow pages range. This will always be a multiple of the page
+  // size.
+  uint64_t size_locked() const TA_REQ(lock_) { return size_; }
+
+  // Returns whether this cow pages node is ultimately backed by a user pager to fulfill initial
+  // content, and not zero pages.
+  bool is_pager_backed() const {
+    Guard<Mutex> guard{&lock_};
+    return GetRootPageSourceLocked() != nullptr;
+  }
+
+  // When attributing pages hidden nodes must be attributed to either their left or right
+  // descendants. The attribution IDs of all involved determine where attribution goes. For
+  // historical and practical reasons actual user ids are used, although any consistent naming
+  // scheme will have the same effect.
+  void set_page_attribution_user_id_locked(uint64_t id) TA_REQ(lock_) {
+    page_attribution_user_id_ = id;
+  }
+
+  // See description on |pinned_page_count_| for meaning.
+  uint64_t pinned_page_count_locked() const TA_REQ(lock_) { return pinned_page_count_; }
+
+  // Sets the VmObjectPaged backlink for this copy-on-write node. This object has no tracking of
+  // mappings, but understands that they exist. When it manipulates pages in a way that could effect
+  // mappings it uses the backlink to notify the VmObjectPaged.
+  // Currently it is assumed that all nodes always have backlinks with the 1:1 hierarchy mapping.
+  void set_paged_backlink_locked(VmObjectPaged* ref) TA_REQ(lock_) { paged_ref_ = ref; }
+
+  uint64_t HeapAllocationBytes() const {
+    Guard<Mutex> guard{&lock_};
+    return page_list_.HeapAllocationBytes();
+  }
+
+  fbl::RefPtr<PageSource> GetRootPageSourceLocked() const TA_REQ(lock_);
+
+  void DetachSource() {
+    DEBUG_ASSERT(page_source_);
+    page_source_->Detach();
+  }
+
+  // Resizes the range of this cow pages. |size| must be a multiple of the page size and this must
+  // not be called on slices or nodes with slice children.
+  zx_status_t ResizeLocked(uint64_t size) TA_REQ(lock_);
+
+  // See VmObject::Lookup
+  zx_status_t LookupLocked(uint64_t offset, uint64_t len, vmo_lookup_fn_t lookup_fn, void* context)
+      TA_REQ(lock_);
+
+  // See VmObject::TakePages
+  zx_status_t TakePagesLocked(uint64_t offset, uint64_t len, VmPageSpliceList* pages) TA_REQ(lock_);
+
+  // See VmObject::SupplyPages
+  zx_status_t SupplyPagesLocked(uint64_t offset, uint64_t len, VmPageSpliceList* pages)
+      TA_REQ(lock_);
+
+  // See VmObject::FailPageRequests
+  zx_status_t FailPageRequests(uint64_t offset, uint64_t len, zx_status_t error_status);
+
+  // See VmObject::GetPageLocked
+  // The pages returned from this are assumed to be used in the following ways.
+  //  * Our VmObjectPaged backlink, or any of childrens backlinks, are allowed to have readable
+  //    mappings, and will be informed to unmap via the backlinks when needed.
+  //  * Our VmObjectPaged backlink and our *slice* children are allowed to have writable mappings,
+  //    and will be informed to either unmap or remove writability when needed.
+  zx_status_t GetPageLocked(uint64_t offset, uint pf_flags, list_node* free_list,
+                            PageRequest* page_request, vm_page_t**, paddr_t*) TA_REQ(lock_);
+
+  // Adds an allocated page to this cow pages at the specified offset, can be optionally zeroed and
+  // any mappings invalidated. If an error is returned the caller retains ownership of |page|.
+  // Offset must be page aligned.
+  zx_status_t AddNewPageLocked(uint64_t offset, vm_page_t* page, bool zero = true,
+                               bool do_range_update = true) TA_REQ(lock_);
+
+  // Adds a set of pages consecutively starting from the given offset. Regardless of the return
+  // result ownership of the pages is taken. Pages are assumed to be in the ALLOC state and can be
+  // optionally zeroed before inserting. start_offset must be page aligned.
+  zx_status_t AddNewPagesLocked(uint64_t start_offset, list_node_t* pages, bool zero = true,
+                                bool do_range_update = true) TA_REQ(lock_);
+
+  // Attempts to release pages in the pages list causing the range to become copy-on-write again.
+  // For consistency if there is a parent or a backing page source, such that the range would not
+  // explicitly copy-on-write the zero page then this will fail. Use ZeroPagesLocked for an
+  // operation that is guaranteed to succeed, but may not release memory.
+  zx_status_t DecommitRangeLocked(uint64_t offset, uint64_t len) TA_REQ(lock_);
+
+  // After successful completion the range of pages will all read as zeros. The mechanism used to
+  // achieve this is not guaranteed to decommit, but it will try to.
+  // |page_start_base| and |page_end_base| must be page aligned offsets within the range of the
+  // object.
+  zx_status_t ZeroPagesLocked(uint64_t page_start_base, uint64_t page_end_base) TA_REQ(lock_);
+
+  // Unified function that implements both VmObject::CommitRange and VmObject::CommitRangePinned
+  zx_status_t CommitRange(uint64_t offset, uint64_t len, bool pin, Guard<Mutex>&& adopt);
+
+  // See VmObject::Unpin
+  void UnpinLocked(uint64_t offset, uint64_t len) TA_REQ(lock_);
+
+  // Returns true if a page is not currently committed, and if the offset were to be read from, it
+  // would be read as zero. Requested offset must be page aligned and within range.
+  bool PageWouldReadZeroLocked(uint64_t page_offset) TA_REQ(lock_);
+
+  // Returns whether this node is currently suitable for having a copy-on-write child made of it.
+  bool IsCowClonableLocked() const TA_REQ(lock_);
+
+  // see VmObjectPaged::AttributedPagesInRange
+  size_t AttributedPagesInRangeLocked(uint64_t offset, uint64_t len) const TA_REQ(lock_);
+
+  // See VmObject::ScanForZeroPages
+  uint32_t ScanForZeroPagesLocked(bool reclaim) TA_REQ(lock_);
+
+  // See VmObject::EvictPage
+  bool EvictPageLocked(vm_page_t* page, uint64_t offset) TA_REQ(lock_);
+
+  // See VmObjectPaged::DedupZeroPage
+  bool DedupZeroPageLocked(vm_page_t* page, uint64_t offset) TA_REQ(lock_);
+
+  void DumpLocked(uint depth, bool verbose) const TA_REQ(lock_);
+  bool DebugValidatePageSplitsLocked() const TA_REQ(lock_);
+
+  // Different operations that RangeChangeUpdate* can perform against any VmMappings that are found.
+  enum class RangeChangeOp {
+    Unmap,
+    RemoveWrite,
+  };
+  // Apply the specified operation to all mappings in the given range. This is applied to all
+  // descendants within the range.
+  void RangeChangeUpdateLocked(uint64_t offset, uint64_t len, RangeChangeOp op) TA_REQ(lock_);
+
+  // These helper functions exist for VmObjectPaged to manipulate the hierarchy. They are temporary
+  // until this is cleaned up and the 1:1 equivalence of hierarchies is removed.
+  void InsertHiddenParentLocked(fbl::RefPtr<VmCowPages> hidden_parent) TA_REQ(lock_);
+  void RemoveChildLocked(VmCowPages* child) TA_REQ(lock_);
+  void InitializeOriginalParentLocked(fbl::RefPtr<VmCowPages> parent, uint64_t offset)
+      TA_REQ(lock_);
+  void AddChildLocked(VmCowPages* o) TA_REQ(lock_);
+
+ private:
+  // private constructor (use Create())
+  VmCowPages(fbl::RefPtr<VmHierarchyState> root_lock, uint32_t options, uint32_t pmm_alloc_flags,
+             uint64_t size, fbl::RefPtr<PageSource> page_source);
+
+  // private destructor, only called from refptr
+  ~VmCowPages() override;
+  friend fbl::RefPtr<VmCowPages>;
+
+  DISALLOW_COPY_ASSIGN_AND_MOVE(VmCowPages);
+
+  bool is_hidden() const { return (options_ & kHidden); }
+  bool is_slice() const { return options_ & kSlice; }
+
+  // Add a page to the object. This operation unmaps the corresponding
+  // offset from any existing mappings.
+  // If |do_range_update| is false, this function will skip updating mappings.
+  // On success the page to add is moved out of `*p`, otherwise it is left there.
+  zx_status_t AddPageLocked(VmPageOrMarker* p, uint64_t offset, bool do_range_update = true)
+      TA_REQ(lock_);
+
+  // internal check if any pages in a range are pinned
+  bool AnyPagesPinnedLocked(uint64_t offset, size_t len) TA_REQ(lock_);
+
+  // Helper function for ::AllocatedPagesInRangeLocked. Counts the number of pages in ancestor's
+  // vmos that should be attributed to this vmo for the specified range. It is an error to pass in a
+  // range that does not need attributing (i.e. offset must be < parent_limit_), although |len| is
+  // permitted to be sized such that the range exceeds parent_limit_.
+  // The return value is the length of the processed region, which will be <= |size| and is
+  // guaranteed to be > 0. The |count| is the number of pages in this region that should be
+  // attributed to this vmo, versus some other vmo.
+  uint64_t CountAttributedAncestorPagesLocked(uint64_t offset, uint64_t size, uint64_t* count) const
+      TA_REQ(lock_);
+
+  // Searches for the the initial content for |this| at |offset|. The result could be used to
+  // initialize a commit, or compare an existing commit with the original. The initial content
+  // is a reference to a VmPageOrMarker as there could be an explicit vm_page of content, an
+  // explicit zero page of content via a marker, or no initial content. Determining the meaning of
+  // no initial content (i.e. whether it is zero or something else) is left up to the caller.
+  //
+  // If an ancestor has a committed page which corresponds to |offset|, returns that page
+  // as well as the VmObjectPaged and offset which own the page. If no ancestor has a committed
+  // page for the offset, returns null as well as the VmObjectPaged/offset which need to be queried
+  // to populate the page.
+  VmPageOrMarker* FindInitialPageContentLocked(uint64_t offset, VmCowPages** owner_out,
+                                               uint64_t* owner_offset_out) TA_REQ(lock_);
+
+  // GetPageLocked helper function that 'forks' the page at |offset| of the current vmo. If
+  // this function successfully inserts a page into |offset| of the current vmo, it returns
+  // a pointer to the corresponding vm_page_t struct. The only failure condition is memory
+  // allocation failure, in which case this function returns null.
+  //
+  // The source page that is being forked has already been calculated - it is |page|, which
+  // is currently in |page_owner| at offset |owner_offset|.
+  //
+  // This function is responsible for ensuring that COW clones never result in worse memory
+  // consumption than simply creating a new vmo and memcpying the content. It does this by
+  // migrating a page from a hidden vmo into one child if that page is not 'accessible' to the
+  // other child (instead of allocating a new page into the child and making the hidden vmo's
+  // page inaccessible).
+  //
+  // Whether a particular page in a hidden vmo is 'accessible' to a particular child is
+  // determined by a combination of two factors. First, if the page lies outside of the range
+  // in the hidden vmo the child can see (specified by parent_offset_ and parent_limit_), then
+  // the page is not accessible. Second, if the page has already been copied into the child,
+  // then the page in the hidden vmo is not accessible to that child. This is tracked by the
+  // cow_X_split bits in the vm_page_t structure.
+  //
+  // To handle memory allocation failure, this function performs the fork operation from the
+  // root vmo towards the leaf vmo. This allows the COW invariants to always be preserved.
+  //
+  // |page| must not be the zero-page, as there is no need to do the complex page
+  // fork logic to reduce memory consumption in that case.
+  vm_page_t* CloneCowPageLocked(uint64_t offset, list_node_t* free_list, VmCowPages* page_owner,
+                                vm_page_t* page, uint64_t owner_offset) TA_REQ(lock_);
+
+  // This is an optimized wrapper around CloneCowPageLocked for when an initial content page needs
+  // to be forked to preserve the COW invariant, but you know you are immediately going to overwrite
+  // the forked page with zeros.
+  //
+  // The optimization it can make is that it can fork the page up to the parent and then, instead
+  // of forking here and then having to immediately free the page, it can insert a marker here and
+  // set the split bits in the parent page as if it had been forked.
+  zx_status_t CloneCowPageAsZeroLocked(uint64_t offset, list_node_t* free_list,
+                                       VmCowPages* page_owner, vm_page_t* page,
+                                       uint64_t owner_offset) TA_REQ(lock_);
+
+  // Returns true if |page| (located at |offset| in this vmo) is only accessible by one
+  // child, where 'accessible' is defined by ::CloneCowPageLocked.
+  bool IsUniAccessibleLocked(vm_page_t* page, uint64_t offset) const TA_REQ(lock_);
+
+  // Releases this vmo's reference to any ancestor vmo's COW pages, for the range [start, end)
+  // in this vmo. This is done by either setting the pages' split bits (if something else
+  // can access the pages) or by freeing the pages onto |free_list| (if nothing else can
+  // access the pages).
+  //
+  // This function recursively invokes itself for regions of the parent vmo which are
+  // not accessible by the sibling vmo.
+  void ReleaseCowParentPagesLocked(uint64_t start, uint64_t end, BatchPQRemove* page_remover)
+      TA_REQ(lock_);
+
+  // Helper function for ReleaseCowParentPagesLocked that processes pages which are visible
+  // to at least this VMO, and possibly its sibling, as well as updates parent_(offset_)limit_.
+  void ReleaseCowParentPagesLockedHelper(uint64_t start, uint64_t end, bool sibling_visible,
+                                         BatchPQRemove* page_remover) TA_REQ(lock_);
+
+  // Updates the parent limits of all children so that they will never be able to
+  // see above |new_size| in this vmo, even if the vmo is enlarged in the future.
+  void UpdateChildParentLimitsLocked(uint64_t new_size) TA_REQ(lock_);
+
+  // When cleaning up a hidden vmo, merges the hidden vmo's content (e.g. page list, view
+  // of the parent) into the remaining child.
+  void MergeContentWithChildLocked(VmCowPages* removed, bool removed_left) TA_REQ(lock_);
+
+  // Only valid to be called when is_slice() is true and returns the first parent of this
+  // hierarchy that is not a slice. The offset of this slice within that VmObjectPaged is set as
+  // the output.
+  VmCowPages* PagedParentOfSliceLocked(uint64_t* offset) TA_REQ(lock_);
+
+  // Unpins a page and potentially moves it into a different page queue should its pin
+  // count reach zero.
+  void UnpinPage(vm_page_t* page, uint64_t offset) TA_REQ(lock_);
+
+  // Updates the page queue of an existing page, moving it to whichever non wired queue
+  // is appropriate.
+  void MoveToNotWired(vm_page_t* page, uint64_t offset) TA_REQ(lock_);
+
+  // Places a newly added page into the appropriate non wired page queue.
+  void SetNotWired(vm_page_t* page, uint64_t offset) TA_REQ(lock_);
+
+  // Updates any meta data for accessing a page. Currently this moves pager backed pages around in
+  // the page queue to track which ones were recently accessed for the purposes of eviction. In
+  // terms of functional correctness this never has to be called.
+  void UpdateOnAccessLocked(vm_page_t* page, uint64_t offset) TA_REQ(lock_);
+
+  // Outside of initialization/destruction, hidden vmos always have two children. For
+  // clarity, whichever child is first in the list is the 'left' child, and whichever
+  // child is second is the 'right' child. Children of a paged vmo will always be paged
+  // vmos themselves.
+  VmCowPages& left_child_locked() TA_REQ(lock_) TA_ASSERT(left_child_locked().lock()) {
+    DEBUG_ASSERT(is_hidden());
+    DEBUG_ASSERT(children_list_len_ == 2);
+
+    auto& ret = children_list_.front();
+    AssertHeld(ret.lock_);
+    return ret;
+  }
+  VmCowPages& right_child_locked() TA_REQ(lock_) TA_ASSERT(right_child_locked().lock()) {
+    DEBUG_ASSERT(is_hidden());
+    DEBUG_ASSERT(children_list_len_ == 2);
+    auto& ret = children_list_.back();
+    AssertHeld(ret.lock_);
+    return ret;
+  }
+  const VmCowPages& left_child_locked() const TA_REQ(lock_) TA_ASSERT(left_child_locked().lock()) {
+    DEBUG_ASSERT(is_hidden());
+    DEBUG_ASSERT(children_list_len_ == 2);
+    const auto& ret = children_list_.front();
+    AssertHeld(ret.lock_);
+    return ret;
+  }
+  const VmCowPages& right_child_locked() const TA_REQ(lock_)
+      TA_ASSERT(right_child_locked().lock()) {
+    DEBUG_ASSERT(is_hidden());
+    DEBUG_ASSERT(children_list_len_ == 2);
+    const auto& ret = children_list_.back();
+    AssertHeld(ret.lock_);
+    return ret;
+  }
+
+  void ReplaceChildLocked(VmCowPages* old, VmCowPages* new_child) TA_REQ(lock_);
+
+  void DropChildLocked(VmCowPages* c) TA_REQ(lock_);
+
+  // Types for an additional linked list over the VmCowPages for use when doing a
+  // RangeChangeUpdate.
+  //
+  // To avoid unbounded stack growth we need to reserve the memory to exist on a
+  // RangeChange list in our object so that we can have a flat iteration over a
+  // work list. RangeChangeLists should only be used by the RangeChangeUpdate
+  // code.
+  using RangeChangeNodeState = fbl::SinglyLinkedListNodeState<VmCowPages*>;
+  struct RangeChangeTraits {
+    static RangeChangeNodeState& node_state(VmCowPages& cow) { return cow.range_change_state_; }
+  };
+  using RangeChangeList =
+      fbl::SinglyLinkedListCustomTraits<VmCowPages*, VmCowPages::RangeChangeTraits>;
+  friend struct RangeChangeTraits;
+
+  // Given an initial list of VmCowPages performs RangeChangeUpdate on it until the list is empty.
+  static void RangeChangeUpdateListLocked(RangeChangeList* list, RangeChangeOp op);
+
+  void RangeChangeUpdateFromParentLocked(uint64_t offset, uint64_t len, RangeChangeList* list)
+      TA_REQ(lock_);
+
+  // magic value
+  fbl::Canary<fbl::magic("VMCP")> canary_;
+
+  // |options_| is a bitmask of:
+  static constexpr uint32_t kHidden = (1u << 2);
+  static constexpr uint32_t kSlice = (1u << 3);
+  const uint32_t options_;
+
+  uint64_t size_ TA_GUARDED(lock_);
+  // Offset in the *parent* where this object starts.
+  uint64_t parent_offset_ TA_GUARDED(lock_) = 0;
+  // Offset in *this object* above which accesses will no longer access the parent.
+  uint64_t parent_limit_ TA_GUARDED(lock_) = 0;
+  // Offset in *this object* below which this vmo stops referring to its parent. This field
+  // is only useful for hidden vmos, where it is used by ::ReleaseCowPagesParentLocked
+  // together with parent_limit_ to reduce how often page split bits need to be set. It is
+  // effectively a summary of the parent_offset_ values of all descendants - unlike
+  // parent_limit_, this value does not directly impact page lookup. See partial_cow_release_ flag
+  // for more details on usage of this limit.
+  uint64_t parent_start_limit_ TA_GUARDED(lock_) = 0;
+  // Offset in our root parent where this object would start if projected onto it. This value is
+  // used as an efficient summation of accumulated offsets to ensure that an offset projected all
+  // the way to the root would not overflow a 64-bit integer. Although actual page resolution
+  // would never reach the root in such a case, a childs full range projected onto its parent is
+  // used to simplify some operations and so this invariant of not overflowing accumulated offsets
+  // needs to be maintained.
+  uint64_t root_parent_offset_ TA_GUARDED(lock_) = 0;
+  const uint32_t pmm_alloc_flags_;
+
+  // Flag which is true if there was a call to ::ReleaseCowParentPagesLocked which was
+  // not able to update the parent limits. When this is not set, it is sometimes
+  // possible for ::MergeContentWithChildLocked to do significantly less work. This flag acts as a
+  // proxy then for how precise the parent_limit_ and parent_start_limit_ are. It is always an
+  // absolute guarantee that descendants cannot see outside of the limits, but when this flag is
+  // true there is a possibility that there is a sub range inside the limits that they also cannot
+  // see.
+  // Imagine a two siblings that see the parent range [0x1000-0x2000) and [0x3000-0x4000)
+  // respectively. The parent can have the start_limit of 0x1000 and limit of 0x4000, but without
+  // additional allocations it cannot track the free region 0x2000-0x3000, and so
+  // partial_cow_release_ must be set to indicate in the future we need to do more expensive
+  // processing to check for such free regions.
+  bool partial_cow_release_ TA_GUARDED(lock_) = false;
+
+  // parent pointer (may be null)
+  fbl::RefPtr<VmCowPages> parent_ TA_GUARDED(lock_);
+
+  // list of every child
+  fbl::TaggedDoublyLinkedList<VmCowPages*, internal::ChildListTag> children_list_ TA_GUARDED(lock_);
+
+  // length of children_list_
+  uint32_t children_list_len_ TA_GUARDED(lock_) = 0;
+
+  // Flag used for walking back up clone tree without recursion. See ::CloneCowPageLocked.
+  enum class StackDir : bool {
+    Left,
+    Right,
+  };
+  struct {
+    uint64_t scratch : 63;
+    StackDir dir_flag : 1;
+  } stack_ TA_GUARDED(lock_);
+
+  // This value is used when determining against which user-visible vmo a hidden vmo's
+  // pages should be attributed. It serves as a tie-breaker for pages that are accessible by
+  // multiple user-visible vmos. See ::HasAttributedAncestorPageLocked for more details.
+  //
+  // For non-hidden vmobjects, this always equals user_id_. For hidden vmobjects, this
+  // is the page_attribution_user_id_ of one of their children (i.e. the user_id_ of one
+  // of their non-hidden descendants).
+  uint64_t page_attribution_user_id_ TA_GUARDED(lock_) = 0;
+
+  // Counts the total number of pages pinned by ::CommitRange. If one page is pinned n times, it
+  // contributes n to this count.
+  uint64_t pinned_page_count_ TA_GUARDED(lock_) = 0;
+
+  // The page source, if any.
+  const fbl::RefPtr<PageSource> page_source_;
+
+  // a tree of pages
+  VmPageList page_list_ TA_GUARDED(lock_);
+
+  RangeChangeNodeState range_change_state_;
+  uint64_t range_change_offset_ TA_GUARDED(lock_);
+  uint64_t range_change_len_ TA_GUARDED(lock_);
+
+  // optional reference back to a VmObjectPaged so that we can perform mapping updates. This is a
+  // raw pointer to avoid circular references, the VmObjectPaged destructor needs to update it.
+  VmObjectPaged* paged_ref_ TA_GUARDED(lock_) = nullptr;
+};
+
+#endif  // ZIRCON_KERNEL_VM_INCLUDE_VM_VM_COW_PAGES_H_

diff --git a/zircon/kernel/vm/include/vm/vm_object.h b/zircon/kernel/vm/include/vm/vm_object.h
index 83413ed..e038370 100644
--- a/zircon/kernel/vm/include/vm/vm_object.h
+++ b/zircon/kernel/vm/include/vm/vm_object.h

@@ -132,7 +132,7 @@
   // public API
   virtual zx_status_t Resize(uint64_t size) { return ZX_ERR_NOT_SUPPORTED; }
 
-  virtual uint64_t size() const { return 0; }
+  virtual uint64_t size() const TA_EXCL(lock_) { return 0; }
   virtual uint32_t create_options() const { return 0; }
 
   // Returns true if the object is backed by RAM.

diff --git a/zircon/kernel/vm/include/vm/vm_object_paged.h b/zircon/kernel/vm/include/vm/vm_object_paged.h
index 72d1c10..57cdf43 100644
--- a/zircon/kernel/vm/include/vm/vm_object_paged.h
+++ b/zircon/kernel/vm/include/vm/vm_object_paged.h

@@ -25,13 +25,10 @@
 #include <vm/pmm.h>
 #include <vm/vm.h>
 #include <vm/vm_aspace.h>
+#include <vm/vm_cow_pages.h>
 #include <vm/vm_object.h>
-#include <vm/vm_page_list.h>
 
-// Forward declare this so VmObjectPaged helpers can accept references.
-class BatchPQRemove;
-
-// the main VM object type, holding a list of pages
+// the main VM object type, based on a copy-on-write set of pages.
 class VmObjectPaged final : public VmObject {
  public:
   // |options_| is a bitmask of:
@@ -65,20 +62,14 @@
                                     fbl::RefPtr<VmObjectPaged>* vmo);
 
   zx_status_t Resize(uint64_t size) override;
-  uint32_t create_options() const override { return options_; }
-  uint64_t size() const override
-      // TODO: Figure out whether it's safe to lock here without causing
-      // any deadlocks.
-      TA_NO_THREAD_SAFETY_ANALYSIS {
-    return size_;
+  uint64_t size() const override TA_EXCL(lock_) {
+    Guard<Mutex> guard{&lock_};
+    return cow_pages_locked()->size_locked();
   }
   bool is_paged() const override { return true; }
   bool is_contiguous() const override { return (options_ & kContiguous); }
   bool is_resizable() const override { return (options_ & kResizable); }
-  bool is_pager_backed() const override {
-    Guard<Mutex> guard{&lock_};
-    return GetRootPageSourceLocked() != nullptr;
-  }
+  bool is_pager_backed() const override { return cow_pages_->is_pager_backed(); }
   bool is_hidden() const override { return (options_ & kHidden); }
   ChildType child_type() const override {
     if (is_slice()) {
@@ -95,20 +86,20 @@
   void set_user_id(uint64_t user_id) override {
     VmObject::set_user_id(user_id);
     Guard<Mutex> guard{&lock_};
-    page_attribution_user_id_ = user_id;
+    cow_pages_locked()->set_page_attribution_user_id_locked(user_id);
   }
 
-  uint64_t HeapAllocationBytes() const override {
-    Guard<Mutex> guard{&lock_};
-    return page_list_.HeapAllocationBytes();
-  }
+  uint64_t HeapAllocationBytes() const override { return cow_pages_->HeapAllocationBytes(); }
 
   uint64_t EvictedPagedCount() const override {
     Guard<Mutex> guard{&lock_};
     return eviction_event_count_;
   }
 
-  size_t AttributedPagesInRange(uint64_t offset, uint64_t len) const override;
+  size_t AttributedPagesInRange(uint64_t offset, uint64_t len) const override {
+    Guard<Mutex> guard{&lock_};
+    return AttributedPagesInRangeLocked(offset, len);
+  }
 
   zx_status_t CommitRange(uint64_t offset, uint64_t len) override {
     Guard<Mutex> guard{&lock_};
@@ -121,7 +112,10 @@
   zx_status_t DecommitRange(uint64_t offset, uint64_t len) override;
   zx_status_t ZeroRange(uint64_t offset, uint64_t len) override;
 
-  void Unpin(uint64_t offset, uint64_t len) override;
+  void Unpin(uint64_t offset, uint64_t len) override {
+    Guard<Mutex> guard{&lock_};
+    cow_pages_locked()->UnpinLocked(offset, len);
+  }
 
   zx_status_t Read(void* ptr, uint64_t offset, size_t len) override;
   zx_status_t Write(const void* ptr, uint64_t offset, size_t len) override;
@@ -136,7 +130,9 @@
 
   zx_status_t TakePages(uint64_t offset, uint64_t len, VmPageSpliceList* pages) override;
   zx_status_t SupplyPages(uint64_t offset, uint64_t len, VmPageSpliceList* pages) override;
-  zx_status_t FailPageRequests(uint64_t offset, uint64_t len, zx_status_t error_status) override;
+  zx_status_t FailPageRequests(uint64_t offset, uint64_t len, zx_status_t error_status) override {
+    return cow_pages_->FailPageRequests(offset, len, error_status);
+  }
 
   void Dump(uint depth, bool verbose) override {
     Guard<Mutex> guard{&lock_};
@@ -144,8 +140,11 @@
   }
 
   zx_status_t GetPageLocked(uint64_t offset, uint pf_flags, list_node* free_list,
-                            PageRequest* page_request, vm_page_t**, paddr_t*) override
-      TA_REQ(lock_);
+                            PageRequest* page_request, vm_page_t** out_page,
+                            paddr_t* out_paddr) override TA_REQ(lock_) {
+    return cow_pages_locked()->GetPageLocked(offset, pf_flags, free_list, page_request, out_page,
+                                             out_paddr);
+  }
 
   zx_status_t CreateClone(Resizability resizable, CloneType type, uint64_t offset, uint64_t size,
                           bool copy_name, fbl::RefPtr<VmObject>* child_vmo) override;
@@ -153,16 +152,17 @@
   // must have the same lock.
   void InsertHiddenParentLocked(fbl::RefPtr<VmObjectPaged>&& hidden_parent) TA_REQ(lock_);
 
-  uint32_t GetMappingCachePolicy() const override;
+  uint32_t GetMappingCachePolicy() const override {
+    Guard<Mutex> guard{&lock_};
+    return GetMappingCachePolicyLocked();
+  }
+  uint32_t GetMappingCachePolicyLocked() const TA_REQ(lock_) { return cache_policy_; }
   zx_status_t SetMappingCachePolicy(const uint32_t cache_policy) override;
 
   void RemoveChild(VmObject* child, Guard<Mutex>&& guard) override TA_REQ(lock_);
   bool OnChildAddedLocked() override TA_REQ(lock_);
 
-  void DetachSource() override {
-    DEBUG_ASSERT(page_source_);
-    page_source_->Detach();
-  }
+  void DetachSource() override { cow_pages_->DetachSource(); }
 
   zx_status_t CreateChildSlice(uint64_t offset, uint64_t size, bool copy_name,
                                fbl::RefPtr<VmObject>* child_vmo) override;
@@ -189,7 +189,7 @@
   // if false is returned this may write various additional information to the debuglog.
   bool DebugValidatePageSplits() const {
     Guard<Mutex> guard{&lock_};
-    return DebugValidatePageSplitsLocked();
+    return cow_pages_locked()->DebugValidatePageSplitsLocked();
   }
 
   // Used to cache the page attribution count for this VMO. Also tracks the hierarchy generation
@@ -211,10 +211,21 @@
     return GetHierarchyGenerationCountLocked();
   }
 
+  using RangeChangeOp = VmCowPages::RangeChangeOp;
+  // Apply the specified operation to all mappings in the given range.
+  void RangeChangeUpdateLocked(uint64_t offset, uint64_t len, RangeChangeOp op) TA_REQ(lock_);
+
+  // Increment the generation count of the VMO hierarchy this VMO is a part of. Walks up the VMO
+  // tree to the root.
+  //
+  // This should be called whenever a change is made to the VMO tree or the VMO's page list, that
+  // could result in page attribution counts to change for any VMO in this tree.
+  void IncrementHierarchyGenerationCountLocked() TA_REQ(lock_);
+
  private:
   // private constructor (use Create())
-  VmObjectPaged(uint32_t options, uint32_t pmm_alloc_flags, uint64_t size,
-                fbl::RefPtr<VmHierarchyState> root_state, fbl::RefPtr<PageSource> page_source);
+  VmObjectPaged(uint32_t options, fbl::RefPtr<VmHierarchyState> root_state,
+                fbl::RefPtr<VmCowPages> cow_pages);
 
   // Initializes the original parent state of the vmo. |offset| is the offset of
   // this vmo in |parent|.
@@ -233,150 +244,24 @@
 
   DISALLOW_COPY_ASSIGN_AND_MOVE(VmObjectPaged);
 
-  // Add a page to the object. This operation unmaps the corresponding
-  // offset from any existing mappings.
-  zx_status_t AddPage(vm_page_t* p, uint64_t offset);
-  // If |do_range_update| is false, this function will skip updating mappings.
-  // On success the page to add is moved out of `*p`, otherwise it is left there.
-  zx_status_t AddPageLocked(VmPageOrMarker* p, uint64_t offset, bool do_range_update = true)
-      TA_REQ(lock_);
-
-  // internal page list routine
-  void AddPageToArray(size_t index, vm_page_t* p);
-
   // Unified function that implements both CommitRange and CommitRangePinned
   zx_status_t CommitRangeInternal(uint64_t offset, uint64_t len, bool pin, Guard<Mutex>&& adopt);
 
-  void UnpinLocked(uint64_t offset, uint64_t len) TA_REQ(lock_);
-
-  // Internal decommit range helper that expects the lock to be held. On success it will populate
-  // the past in page list with any pages that should be freed.
-  zx_status_t DecommitRangeLocked(uint64_t offset, uint64_t len, list_node_t& free_list)
-      TA_REQ(lock_);
-  zx_status_t ZeroRangeLocked(uint64_t offset, uint64_t len, list_node_t* free_list,
-                              Guard<Mutex>* guard) TA_REQ(lock_);
-
-  fbl::RefPtr<PageSource> GetRootPageSourceLocked() const TA_REQ(lock_);
-
-  bool IsCowClonableLocked() const TA_REQ(lock_);
-
-  // internal check if any pages in a range are pinned
-  bool AnyPagesPinnedLocked(uint64_t offset, size_t len) TA_REQ(lock_);
+  // Internal decommit range helper that expects the lock to be held.
+  zx_status_t DecommitRangeLocked(uint64_t offset, uint64_t len) TA_REQ(lock_);
 
   // Get the current generation count of the VMO hierarchy this VMO is a part of. Walks up the VMO
   // tree to the root.
   uint32_t GetHierarchyGenerationCountLocked() const TA_REQ(lock_);
 
-  // Increment the generation count of the VMO hierarchy this VMO is a part of. Walks up the VMO
-  // tree to the root.
-  //
-  // This should be called whenever a change is made to the VMO tree or the VMO's page list, that
-  // could result in page attribution counts to change for any VMO in this tree.
-  void IncrementHierarchyGenerationCountLocked() TA_REQ(lock_);
-
   // see AttributedPagesInRange
   size_t AttributedPagesInRangeLocked(uint64_t offset, uint64_t len) const TA_REQ(lock_);
-  // Helper function for ::AllocatedPagesInRangeLocked. Counts the number of pages in ancestor's
-  // vmos that should be attributed to this vmo for the specified range. It is an error to pass in a
-  // range that does not need attributing (i.e. offset must be < parent_limit_), although |len| is
-  // permitted to be sized such that the range exceeds parent_limit_.
-  // The return value is the length of the processed region, which will be <= |size| and is
-  // guaranteed to be > 0. The |count| is the number of pages in this region that should be
-  // attributed to this vmo, versus some other vmo.
-  uint64_t CountAttributedAncestorPagesLocked(uint64_t offset, uint64_t size, uint64_t* count) const
-      TA_REQ(lock_);
 
   // internal read/write routine that takes a templated copy function to help share some code
   template <typename T>
   zx_status_t ReadWriteInternalLocked(uint64_t offset, size_t len, bool write, T copyfunc,
                                       Guard<Mutex>* guard) TA_REQ(lock_);
 
-  // Searches for the the initial content for |this| at |offset|. The result could be used to
-  // initialize a commit, or compare an existing commit with the original. The initial content
-  // is a reference to a VmPageOrMarker as there could be an explicit vm_page of content, an
-  // explicit zero page of content via a marker, or no initial content. Determining the meaning of
-  // no initial content (i.e. whether it is zero or something else) is left up to the caller.
-  //
-  // If an ancestor has a committed page which corresponds to |offset|, returns that page
-  // as well as the VmObjectPaged and offset which own the page. If no ancestor has a committed
-  // page for the offset, returns null as well as the VmObjectPaged/offset which need to be queried
-  // to populate the page.
-  VmPageOrMarker* FindInitialPageContentLocked(uint64_t offset, VmObjectPaged** owner_out,
-                                               uint64_t* owner_offset_out, uint64_t* owner_id_out)
-      TA_REQ(lock_);
-
-  // GetPageLocked helper function that 'forks' the page at |offset| of the current vmo. If
-  // this function successfully inserts a page into |offset| of the current vmo, it returns
-  // a pointer to the corresponding vm_page_t struct. The only failure condition is memory
-  // allocation failure, in which case this function returns null.
-  //
-  // The source page that is being forked has already been calculated - it is |page|, which
-  // is currently in |page_owner| at offset |owner_offset|.
-  //
-  // This function is responsible for ensuring that COW clones never result in worse memory
-  // consumption than simply creating a new vmo and memcpying the content. It does this by
-  // migrating a page from a hidden vmo into one child if that page is not 'accessible' to the
-  // other child (instead of allocating a new page into the child and making the hidden vmo's
-  // page inaccessible).
-  //
-  // Whether a particular page in a hidden vmo is 'accessible' to a particular child is
-  // determined by a combination of two factors. First, if the page lies outside of the range
-  // in the hidden vmo the child can see (specified by parent_offset_ and parent_limit_), then
-  // the page is not accessible. Second, if the page has already been copied into the child,
-  // then the page in the hidden vmo is not accessible to that child. This is tracked by the
-  // cow_X_split bits in the vm_page_t structure.
-  //
-  // To handle memory allocation failure, this function performs the fork operation from the
-  // root vmo towards the leaf vmo. This allows the COW invariants to always be preserved.
-  //
-  // |page| must not be the zero-page, as there is no need to do the complex page
-  // fork logic to reduce memory consumption in that case.
-  vm_page_t* CloneCowPageLocked(uint64_t offset, list_node_t* free_list, VmObjectPaged* page_owner,
-                                vm_page_t* page, uint64_t owner_offset) TA_REQ(lock_);
-
-  // This is an optimized wrapper around CloneCowPageLocked for when an initial content page needs
-  // to be forked to preserve the COW invariant, but you know you are immediately going to overwrite
-  // the forked page with zeros.
-  //
-  // The optimization it can make is that it can fork the page up to the parent and then, instead
-  // of forking here and then having to immediately free the page, it can insert a marker here and
-  // set the split bits in the parent page as if it had been forked.
-  zx_status_t CloneCowPageAsZeroLocked(uint64_t offset, list_node_t* free_list,
-                                       VmObjectPaged* page_owner, vm_page_t* page,
-                                       uint64_t owner_offset) TA_REQ(lock_);
-
-  // Returns true if |page| (located at |offset| in this vmo) is only accessible by one
-  // child, where 'accessible' is defined by ::CloneCowPageLocked.
-  bool IsUniAccessibleLocked(vm_page_t* page, uint64_t offset) const TA_REQ(lock_);
-
-  // Releases this vmo's reference to any ancestor vmo's COW pages, for the range [start, end)
-  // in this vmo. This is done by either setting the pages' split bits (if something else
-  // can access the pages) or by freeing the pages onto |free_list| (if nothing else can
-  // access the pages).
-  //
-  // This function recursively invokes itself for regions of the parent vmo which are
-  // not accessible by the sibling vmo.
-  void ReleaseCowParentPagesLocked(uint64_t start, uint64_t end, BatchPQRemove* page_remover)
-      TA_REQ(lock_);
-
-  // Helper function for ReleaseCowParentPagesLocked that processes pages which are visible
-  // to at least this VMO, and possibly its sibling, as well as updates parent_(offset_)limit_.
-  void ReleaseCowParentPagesLockedHelper(uint64_t start, uint64_t end, bool sibling_visible,
-                                         BatchPQRemove* page_remover) TA_REQ(lock_);
-
-  // Updates the parent limits of all children so that they will never be able to
-  // see above |new_size| in this vmo, even if the vmo is enlarged in the future.
-  void UpdateChildParentLimitsLocked(uint64_t new_size) TA_REQ(lock_);
-
-  // When cleaning up a hidden vmo, merges the hidden vmo's content (e.g. page list, view
-  // of the parent) into the remaining child.
-  void MergeContentWithChildLocked(VmObjectPaged* removed, bool removed_left) TA_REQ(lock_);
-
-  // Only valid to be called when is_slice() is true and returns the first parent of this
-  // hierarchy that is not a slice. The offset of this slice within that VmObjectPaged is set as
-  // the output.
-  VmObjectPaged* PagedParentOfSliceLocked(uint64_t* offset) TA_REQ(lock_);
-
   // Zeroes a partial range in a page. May use CallUnlocked on the passed in guard. The page to zero
   // is looked up using page_base_offset, and will be committed if needed. The range of
   // [zero_start_offset, zero_end_offset) is relative to the page and so [0, PAGE_SIZE) would zero
@@ -384,160 +269,27 @@
   zx_status_t ZeroPartialPage(uint64_t page_base_offset, uint64_t zero_start_offset,
                               uint64_t zero_end_offset, Guard<Mutex>* guard) TA_REQ(lock_);
 
-  // Unpins a page and potentially moves it into a different page queue should its pin
-  // count reach zero.
-  void UnpinPage(vm_page_t* page, uint64_t offset);
-
-  // Updates the page queue of an existing page, moving it to whichever non wired queue
-  // is appropriate.
-  void MoveToNotWired(vm_page_t* page, uint64_t offset);
-
-  // Places a newly added page into the appropriate non wired page queue.
-  void SetNotWired(vm_page_t* page, uint64_t offset);
-
-  // Updates any meta data for accessing a page. Currently this moves pager backed pages around in
-  // the page queue to track which ones were recently accessed for the purposes of eviction. In
-  // terms of functional correctness this never has to be called.
-  void UpdateOnAccessLocked(vm_page_t* page, uint64_t offset) TA_REQ(lock_);
-
-  // Outside of initialization/destruction, hidden vmos always have two children. For
-  // clarity, whichever child is first in the list is the 'left' child, and whichever
-  // child is second is the 'right' child. Children of a paged vmo will always be paged
-  // vmos themselves.
-  VmObjectPaged& left_child_locked() TA_REQ(lock_) TA_ASSERT(left_child_locked().lock()) {
-    DEBUG_ASSERT(is_hidden());
-    DEBUG_ASSERT(children_list_len_ == 2);
-    DEBUG_ASSERT(children_list_.front().is_paged());
-
-    auto& ret = static_cast<VmObjectPaged&>(children_list_.front());
-    AssertHeld(ret.lock_);
-    return ret;
-  }
-  VmObjectPaged& right_child_locked() TA_REQ(lock_) TA_ASSERT(right_child_locked().lock()) {
-    DEBUG_ASSERT(is_hidden());
-    DEBUG_ASSERT(children_list_len_ == 2);
-    DEBUG_ASSERT(children_list_.back().is_paged());
-    auto& ret = static_cast<VmObjectPaged&>(children_list_.back());
-    AssertHeld(ret.lock_);
-    return ret;
-  }
-  const VmObjectPaged& left_child_locked() const TA_REQ(lock_)
-      TA_ASSERT(left_child_locked().lock()) {
-    DEBUG_ASSERT(is_hidden());
-    DEBUG_ASSERT(children_list_len_ == 2);
-    DEBUG_ASSERT(children_list_.front().is_paged());
-    const auto& ret = static_cast<const VmObjectPaged&>(children_list_.front());
-    AssertHeld(ret.lock_);
-    return ret;
-  }
-  const VmObjectPaged& right_child_locked() const TA_REQ(lock_)
-      TA_ASSERT(right_child_locked().lock()) {
-    DEBUG_ASSERT(is_hidden());
-    DEBUG_ASSERT(children_list_len_ == 2);
-    DEBUG_ASSERT(children_list_.back().is_paged());
-    const auto& ret = static_cast<const VmObjectPaged&>(children_list_.back());
-    AssertHeld(ret.lock_);
-    return ret;
-  }
-
   // Internal implementations that assume lock is already held.
   void DumpLocked(uint depth, bool verbose) const TA_REQ(lock_);
-  bool DebugValidatePageSplitsLocked() const TA_REQ(lock_);
 
-  // Different operations that RangeChangeUpdate* can perform against any VmMappings that are found.
-  enum class RangeChangeOp {
-    Unmap,
-    RemoveWrite,
-  };
+  // Convenience wrapper that returns cow_pages_ whilst asserting that the lock is held.
+  VmCowPages* cow_pages_locked() const TA_REQ(lock_) TA_ASSERT(cow_pages_locked()->lock()) {
+    AssertHeld(cow_pages_->lock_ref());
+    return cow_pages_.get();
+  }
 
-  // Types for an additional linked list over the VmObjectPaged for use when doing a
-  // RangeChangeUpdate.
-  //
-  // To avoid unbounded stack growth we need to reserve the memory to exist on a
-  // RangeChange list in our object so that we can have a flat iteration over a
-  // work list. RangeChangeLists should only be used by the RangeChangeUpdate
-  // code.
-  using RangeChangeNodeState = fbl::SinglyLinkedListNodeState<VmObjectPaged*>;
-  struct RangeChangeTraits {
-    static RangeChangeNodeState& node_state(VmObjectPaged& foo) { return foo.range_change_state_; }
-  };
-  using RangeChangeList =
-      fbl::SinglyLinkedListCustomTraits<VmObjectPaged*, VmObjectPaged::RangeChangeTraits>;
-  friend struct RangeChangeTraits;
-
-  // Apply the specified operation to all mappings in the given range. This is applied to all
-  // descendants within the range.
-  void RangeChangeUpdateLocked(uint64_t offset, uint64_t len, RangeChangeOp op) TA_REQ(lock_);
-
-  // Given an initial list of VmObject's performs RangeChangeUpdate on it until the list is empty.
-  static void RangeChangeUpdateListLocked(RangeChangeList* list, RangeChangeOp op);
-
-  void RangeChangeUpdateFromParentLocked(uint64_t offset, uint64_t len, RangeChangeList* list)
-      TA_REQ(lock_);
+  uint64_t size_locked() const TA_REQ(lock_) { return cow_pages_locked()->size_locked(); }
 
   // members
   const uint32_t options_;
-  uint64_t size_ TA_GUARDED(lock_) = 0;
-  // Offset in the *parent* where this object starts.
-  uint64_t parent_offset_ TA_GUARDED(lock_) = 0;
-  // Offset in *this object* above which accesses will no longer access the parent.
-  uint64_t parent_limit_ TA_GUARDED(lock_) = 0;
-  // Offset in *this object* below which this vmo stops referring to its parent. This field
-  // is only useful for hidden vmos, where it is used by ::ReleaseCowPagesParentLocked
-  // together with parent_limit_ to reduce how often page split bits need to be set. It is
-  // effectively a summary of the parent_offset_ values of all descendants - unlike
-  // parent_limit_, this value does not directly impact page lookup. See partial_cow_release_ flag
-  // for more details on usage of this limit.
-  uint64_t parent_start_limit_ TA_GUARDED(lock_) = 0;
-  // Offset in our root parent where this object would start if projected onto it. This value is
-  // used as an efficient summation of accumulated offsets to ensure that an offset projected all
-  // the way to the root would not overflow a 64-bit integer. Although actual page resolution
-  // would never reach the root in such a case, a childs full range projected onto its parent is
-  // used to simplify some operations and so this invariant of not overflowing accumulated offsets
-  // needs to be maintained.
-  uint64_t root_parent_offset_ TA_GUARDED(lock_) = 0;
-  const uint32_t pmm_alloc_flags_ = PMM_ALLOC_FLAG_ANY;
   uint32_t cache_policy_ TA_GUARDED(lock_) = ARCH_MMU_FLAG_CACHED;
 
-  // Flag which is true if there was a call to ::ReleaseCowParentPagesLocked which was
-  // not able to update the parent limits. When this is not set, it is sometimes
-  // possible for ::MergeContentWithChildLocked to do significantly less work. This flag acts as a
-  // proxy then for how precise the parent_limit_ and parent_start_limit_ are. It is always an
-  // absolute guarantee that descendants cannot see outside of the limits, but when this flag is
-  // true there is a possibility that there is a sub range inside the limits that they also cannot
-  // see.
-  // Imagine a two siblings that see the parent range [0x1000-0x2000) and [0x3000-0x4000)
-  // respectively. The parent can have the start_limit of 0x1000 and limit of 0x4000, but without
-  // additional allocations it cannot track the free region 0x2000-0x3000, and so
-  // partial_cow_release_ must be set to indicate in the future we need to do more expensive
-  // processing to check for such free regions.
-  bool partial_cow_release_ TA_GUARDED(lock_) = false;
-
   // parent pointer (may be null)
   fbl::RefPtr<VmObjectPaged> parent_ TA_GUARDED(lock_);
   // Record the user_id_ of the original parent, in case we make
   // a bidirectional clone and end up changing parent_.
   uint64_t original_parent_user_id_ TA_GUARDED(lock_) = 0;
 
-  // Flag used for walking back up clone tree without recursion. See ::CloneCowPageLocked.
-  enum class StackDir : bool {
-    Left,
-    Right,
-  };
-  struct {
-    uint64_t scratch : 63;
-    StackDir dir_flag : 1;
-  } stack_ TA_GUARDED(lock_);
-
-  // This value is used when determining against which user-visible vmo a hidden vmo's
-  // pages should be attributed. It serves as a tie-breaker for pages that are accessible by
-  // multiple user-visible vmos. See ::HasAttributedAncestorPageLocked for more details.
-  //
-  // For non-hidden vmobjects, this always equals user_id_. For hidden vmobjects, this
-  // is the page_attribution_user_id_ of one of their children (i.e. the user_id_ of one
-  // of their non-hidden descendants).
-  uint64_t page_attribution_user_id_ TA_GUARDED(lock_) = 0;
-
   static constexpr uint32_t kGenerationCountUnset = 0;
   static constexpr uint32_t kGenerationCountInitial = 1;
 
@@ -557,23 +309,11 @@
   // Tracks the last cached page attribution count.
   mutable CachedPageAttribution cached_page_attribution_ TA_GUARDED(lock_) = {};
 
-  // Counts the total number of pages pinned by ::Pin. If one page is pinned n times, it
-  // contributes n to this count. However, this does not include pages pinned when creating
-  // a contiguous vmo.
-  uint64_t pinned_page_count_ TA_GUARDED(lock_) = 0;
-
   // Count eviction events so that we can report them to the user.
   uint64_t eviction_event_count_ TA_GUARDED(lock_) = 0;
 
-  // The page source, if any.
-  const fbl::RefPtr<PageSource> page_source_;
-
-  RangeChangeNodeState range_change_state_;
-  uint64_t range_change_offset_ TA_GUARDED(lock_);
-  uint64_t range_change_len_ TA_GUARDED(lock_);
-
-  // a tree of pages
-  VmPageList page_list_ TA_GUARDED(lock_);
+  // Reference to our pages should never be modified and is only dropped in the destructor.
+  fbl::RefPtr<VmCowPages> const cow_pages_;
 };
 
 #endif  // ZIRCON_KERNEL_VM_INCLUDE_VM_VM_OBJECT_PAGED_H_

diff --git a/zircon/kernel/vm/vm_cow_pages.cc b/zircon/kernel/vm/vm_cow_pages.cc
new file mode 100644
index 0000000..de1c595
--- /dev/null
+++ b/zircon/kernel/vm/vm_cow_pages.cc

@@ -0,0 +1,2865 @@
+// Copyright 2020 The Fuchsia Authors
+//
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT
+
+#include "vm/vm_cow_pages.h"
+
+#include <lib/counters.h>
+#include <trace.h>
+
+#include <fbl/auto_call.h>
+#include <kernel/range_check.h>
+#include <ktl/move.h>
+#include <vm/fault.h>
+#include <vm/physmap.h>
+#include <vm/vm_object_paged.h>
+
+#include "vm_priv.h"
+
+#define LOCAL_TRACE VM_GLOBAL_TRACE(0)
+
+namespace {
+
+void ZeroPage(paddr_t pa) {
+  void* ptr = paddr_to_physmap(pa);
+  DEBUG_ASSERT(ptr);
+
+  arch_zero_page(ptr);
+}
+
+void ZeroPage(vm_page_t* p) {
+  paddr_t pa = p->paddr();
+  ZeroPage(pa);
+}
+
+bool IsZeroPage(vm_page_t* p) {
+  uint64_t* base = (uint64_t*)paddr_to_physmap(p->paddr());
+  for (int i = 0; i < PAGE_SIZE / (int)sizeof(uint64_t); i++) {
+    if (base[i] != 0)
+      return false;
+  }
+  return true;
+}
+
+void InitializeVmPage(vm_page_t* p) {
+  DEBUG_ASSERT(p->state() == VM_PAGE_STATE_ALLOC);
+  p->set_state(VM_PAGE_STATE_OBJECT);
+  p->object.pin_count = 0;
+  p->object.cow_left_split = 0;
+  p->object.cow_right_split = 0;
+}
+
+// Allocates a new page and populates it with the data at |parent_paddr|.
+bool AllocateCopyPage(uint32_t pmm_alloc_flags, paddr_t parent_paddr, list_node_t* free_list,
+                      vm_page_t** clone) {
+  paddr_t pa_clone;
+  vm_page_t* p_clone = nullptr;
+  if (free_list) {
+    p_clone = list_remove_head_type(free_list, vm_page, queue_node);
+    if (p_clone) {
+      pa_clone = p_clone->paddr();
+    }
+  }
+  if (!p_clone) {
+    zx_status_t status = pmm_alloc_page(pmm_alloc_flags, &p_clone, &pa_clone);
+    if (!p_clone) {
+      DEBUG_ASSERT(status == ZX_ERR_NO_MEMORY);
+      return false;
+    }
+    DEBUG_ASSERT(status == ZX_OK);
+  }
+
+  InitializeVmPage(p_clone);
+
+  void* dst = paddr_to_physmap(pa_clone);
+  DEBUG_ASSERT(dst);
+
+  if (parent_paddr != vm_get_zero_page_paddr()) {
+    // do a direct copy of the two pages
+    const void* src = paddr_to_physmap(parent_paddr);
+    DEBUG_ASSERT(src);
+    memcpy(dst, src, PAGE_SIZE);
+  } else {
+    // avoid pointless fetches by directly zeroing dst
+    arch_zero_page(dst);
+  }
+
+  *clone = p_clone;
+
+  return true;
+}
+
+bool SlotHasPinnedPage(VmPageOrMarker* slot) {
+  return slot && slot->IsPage() && slot->Page()->object.pin_count > 0;
+}
+
+inline uint64_t CheckedAdd(uint64_t a, uint64_t b) {
+  uint64_t result;
+  bool overflow = add_overflow(a, b, &result);
+  DEBUG_ASSERT(!overflow);
+  return result;
+}
+
+}  // namespace
+
+// Helper class for collecting pages to performed batched Removes from the page queue to not incur
+// its spinlock overhead for every single page. Pages that it removes from the page queue get placed
+// into a provided list. Note that pages are not moved into the list until *after* Flush has been
+// called and Flush must be called prior to object destruction.
+class BatchPQRemove {
+ public:
+  BatchPQRemove(list_node_t* free_list) : free_list_(free_list) {}
+  ~BatchPQRemove() { DEBUG_ASSERT(count_ == 0); }
+  DISALLOW_COPY_AND_ASSIGN_ALLOW_MOVE(BatchPQRemove);
+
+  // Add a page to the batch set. Automatically calls |Flush| if the limit is reached.
+  void Push(vm_page_t* page) {
+    DEBUG_ASSERT(page);
+    pages_[count_] = page;
+    count_++;
+    if (count_ == kMaxPages) {
+      Flush();
+    }
+  }
+
+  // Performs |Remove| on any pending pages. This allows you to know that all pages are in the
+  // original list so that you can do operations on the list.
+  void Flush() {
+    if (count_ > 0) {
+      pmm_page_queues()->RemoveArrayIntoList(pages_.data(), count_, free_list_);
+      count_ = 0;
+    }
+  }
+
+  // Produces a callback suitable for passing to VmPageList::RemovePages that will |Push| any pages
+  auto RemovePagesCallback() {
+    return [this](VmPageOrMarker* p, uint64_t off) {
+      if (p->IsPage()) {
+        vm_page_t* page = p->ReleasePage();
+        Push(page);
+      }
+      *p = VmPageOrMarker::Empty();
+      return ZX_ERR_NEXT;
+    };
+  }
+
+ private:
+  // The value of 64 was chosen as there is minimal performance gains originally measured by using
+  // higher values. There is an incentive on this being as small as possible due to this typically
+  // being created on the stack, and our stack space is limited.
+  static constexpr size_t kMaxPages = 64;
+
+  size_t count_ = 0;
+  ktl::array<vm_page_t*, kMaxPages> pages_;
+  list_node_t* free_list_ = nullptr;
+};
+
+VmCowPages::VmCowPages(fbl::RefPtr<VmHierarchyState> hierarchy_state_ptr, uint32_t options,
+                       uint32_t pmm_alloc_flags, uint64_t size, fbl::RefPtr<PageSource> page_source)
+    : VmHierarchyBase(ktl::move(hierarchy_state_ptr)),
+      options_(options),
+      size_(size),
+      pmm_alloc_flags_(pmm_alloc_flags),
+      page_source_(ktl::move(page_source)) {
+  DEBUG_ASSERT(IS_PAGE_ALIGNED(size));
+}
+
+void VmCowPages::InitializeOriginalParentLocked(fbl::RefPtr<VmCowPages> parent, uint64_t offset) {
+  DEBUG_ASSERT(parent_ == nullptr);
+
+  AssertHeld(parent->lock_);
+  page_list_.InitializeSkew(parent->page_list_.GetSkew(), offset);
+
+  AssertHeld(parent->lock_ref());
+  parent_ = ktl::move(parent);
+}
+
+VmCowPages::~VmCowPages() {
+  canary_.Assert();
+
+  // The hierarchy of VmCowPages, although it mirrors VmObjectPaged right now, is maintained
+  // slightly differently. The VmObjectPaged destructor will explicitly clean up any hidden
+  // hierarchies before it drops the reference to us. This means we do not need to do any thing
+  // with removing ourselves from our parent etc, and can literally just free the page lists.
+  // All of this will change once VmObjectPaged has its hierarchy simplified, and the logic that
+  // is currently there will be moved into here.
+  if (!is_hidden()) {
+    DEBUG_ASSERT(!parent_);
+  } else {
+    DEBUG_ASSERT(children_list_len_ == 0);
+    DEBUG_ASSERT(page_list_.HasNoPages());
+  }
+
+  // Cleanup page lists and page sources.
+  list_node_t list;
+  list_initialize(&list);
+
+  BatchPQRemove page_remover(&list);
+  // free all of the pages attached to us
+  page_list_.RemoveAllPages([&page_remover](vm_page_t* page) {
+    ASSERT(page->object.pin_count == 0);
+    page_remover.Push(page);
+  });
+
+  if (page_source_) {
+    page_source_->Close();
+  }
+  page_remover.Flush();
+
+  pmm_free(&list);
+}
+
+bool VmCowPages::DedupZeroPageLocked(vm_page_t* page, uint64_t offset) {
+  canary_.Assert();
+
+  // Check this page is still a part of this VMO. object.page_offset could be complete garbage,
+  // but there's no harm in looking up a random slot as we'll then notice it's the wrong page.
+  VmPageOrMarker* page_or_marker = page_list_.Lookup(offset);
+  if (!page_or_marker || !page_or_marker->IsPage() || page_or_marker->Page() != page ||
+      page->object.pin_count > 0) {
+    return false;
+  }
+
+  // We expect most pages to not be zero, as such we will first do a 'racy' zero page check where
+  // we leave write permissions on the page. If the page isn't zero, which is our hope, then we
+  // haven't paid the price of modifying page tables.
+  if (!IsZeroPage(page_or_marker->Page())) {
+    return false;
+  }
+
+  RangeChangeUpdateLocked(offset, PAGE_SIZE, RangeChangeOp::RemoveWrite);
+
+  if (IsZeroPage(page_or_marker->Page())) {
+    RangeChangeUpdateLocked(offset, PAGE_SIZE, RangeChangeOp::Unmap);
+    vm_page_t* page = page_or_marker->ReleasePage();
+    pmm_page_queues()->Remove(page);
+    DEBUG_ASSERT(!list_in_list(&page->queue_node));
+    pmm_free_page(page);
+    *page_or_marker = VmPageOrMarker::Marker();
+    return true;
+  }
+  return false;
+}
+
+uint32_t VmCowPages::ScanForZeroPagesLocked(bool reclaim) {
+  canary_.Assert();
+
+  // Check if we have any slice children. Slice children may have writable mappings to our pages,
+  // and so we need to also remove any mappings from them. Non-slice children could only have
+  // read-only mappings, which is the state we already want, and so we don't need to touch them.
+  for (auto& child : children_list_) {
+    if (child.is_slice()) {
+      // Slices are strict subsets of their parents so we don't need to bother looking at parent
+      // limits etc and can just operate on the entire range.
+      AssertHeld(child.lock_);
+      child.RangeChangeUpdateLocked(0, child.size_, RangeChangeOp::RemoveWrite);
+    }
+  }
+
+  list_node_t free_list;
+  list_initialize(&free_list);
+
+  uint32_t count = 0;
+  page_list_.RemovePages(
+      [&count, &free_list, reclaim, this](VmPageOrMarker* p, uint64_t off) {
+        // Pinned pages cannot be decommitted so do not consider them.
+        if (p->IsPage() && p->Page()->object.pin_count == 0 && IsZeroPage(p->Page())) {
+          count++;
+          if (reclaim) {
+            // Need to remove all mappings (include read) ones to this range before we remove the
+            // page.
+            AssertHeld(this->lock_);
+            RangeChangeUpdateLocked(off, PAGE_SIZE, RangeChangeOp::Unmap);
+            vm_page_t* page = p->ReleasePage();
+            pmm_page_queues()->Remove(page);
+            DEBUG_ASSERT(!list_in_list(&page->queue_node));
+            list_add_tail(&free_list, &page->queue_node);
+            *p = VmPageOrMarker::Marker();
+          }
+        }
+        return ZX_ERR_NEXT;
+      },
+      0, VmPageList::MAX_SIZE);
+
+  pmm_free(&free_list);
+  return count;
+}
+
+zx_status_t VmCowPages::Create(fbl::RefPtr<VmHierarchyState> root_lock, uint32_t pmm_alloc_flags,
+                               uint64_t size, fbl::RefPtr<VmCowPages>* cow_pages) {
+  fbl::AllocChecker ac;
+  auto cow = fbl::AdoptRef<VmCowPages>(
+      new (&ac) VmCowPages(ktl::move(root_lock), 0, pmm_alloc_flags, size, nullptr));
+  if (!ac.check()) {
+    return ZX_ERR_NO_MEMORY;
+  }
+  *cow_pages = ktl::move(cow);
+  return ZX_OK;
+}
+
+zx_status_t VmCowPages::CreateExternal(fbl::RefPtr<PageSource> src,
+                                       fbl::RefPtr<VmHierarchyState> root_lock, uint64_t size,
+                                       fbl::RefPtr<VmCowPages>* cow_pages) {
+  fbl::AllocChecker ac;
+  auto cow = fbl::AdoptRef<VmCowPages>(
+      new (&ac) VmCowPages(ktl::move(root_lock), 0, PMM_ALLOC_FLAG_ANY, size, ktl::move(src)));
+  if (!ac.check()) {
+    return ZX_ERR_NO_MEMORY;
+  }
+
+  *cow_pages = ktl::move(cow);
+
+  return ZX_OK;
+}
+
+void VmCowPages::ReplaceChildLocked(VmCowPages* old, VmCowPages* new_child) {
+  canary_.Assert();
+  children_list_.replace(*old, new_child);
+}
+
+void VmCowPages::DropChildLocked(VmCowPages* c) {
+  canary_.Assert();
+  DEBUG_ASSERT(children_list_len_ > 0);
+  children_list_.erase(*c);
+  --children_list_len_;
+}
+
+void VmCowPages::AddChildLocked(VmCowPages* o) {
+  canary_.Assert();
+  children_list_.push_front(o);
+  children_list_len_++;
+}
+
+void VmCowPages::InsertHiddenParentLocked(fbl::RefPtr<VmCowPages> hidden_parent) {
+  AssertHeld(hidden_parent->lock_);
+  // Insert the new VmObject |hidden_parent| between between |this| and |parent_|.
+  if (parent_) {
+    AssertHeld(parent_->lock_ref());
+    hidden_parent->InitializeOriginalParentLocked(parent_, 0);
+    parent_->ReplaceChildLocked(this, hidden_parent.get());
+  }
+  hidden_parent->AddChildLocked(this);
+  parent_ = hidden_parent;
+
+  // Hidden parent starts of attributed to us as we are really the original user created parent, and
+  // our sibling is our child. As such all the pages in the hidden parent were just previously our
+  // pages, so keep attributing them to us.
+  hidden_parent->page_attribution_user_id_ = page_attribution_user_id_;
+
+  // The hidden parent should have the same view as we had into
+  // its parent, and this vmo has a full view into the hidden vmo
+  hidden_parent->parent_offset_ = parent_offset_;
+  hidden_parent->parent_limit_ = parent_limit_;
+  // Although we are inserting the hidden parent between this and parent_ they share the same
+  // root_parent_offset_.
+  hidden_parent->root_parent_offset_ = root_parent_offset_;
+  parent_offset_ = 0;
+  parent_limit_ = size_;
+
+  // This method should only ever be called on leaf vmos (i.e. non-hidden),
+  // so this flag should never be set.
+  DEBUG_ASSERT(!partial_cow_release_);
+  DEBUG_ASSERT(parent_start_limit_ == 0);  // Should only ever be set for hidden vmos
+
+  // Moving our page list would be bad if we had a page source and potentially have pages with
+  // links back to this object.
+  DEBUG_ASSERT(!page_source_);
+  // Move everything into the hidden parent, for immutability
+  hidden_parent->page_list_ = ktl::move(page_list_);
+
+  // As we are moving pages between objects we need to make sure no backlinks are broken. We know
+  // there's no page_source_ and hence no pages will be in the pager_backed queue, but we could
+  // have pages in the unswappable_zero_forked queue. We do know that pages in this queue cannot
+  // have been pinned, so we can just move (or re-move potentially) any page that is not pinned
+  // into the regular unswappable queue.
+  {
+    PageQueues* pq = pmm_page_queues();
+    Guard<SpinLock, IrqSave> guard{pq->get_lock()};
+    hidden_parent->page_list_.ForEveryPage([pq](auto* p, uint64_t off) {
+      if (p->IsPage()) {
+        vm_page_t* page = p->Page();
+        if (page->object.pin_count == 0) {
+          AssertHeld<Lock<SpinLock>, IrqSave>(*pq->get_lock());
+          pq->MoveToUnswappableLocked(page);
+        }
+      }
+      return ZX_ERR_NEXT;
+    });
+  }
+  hidden_parent->size_ = size_;
+}
+
+zx_status_t VmCowPages::CreateChildSlice(uint64_t offset, uint64_t size,
+                                         fbl::RefPtr<VmCowPages>* cow_slice) {
+  LTRACEF("vmo %p offset %#" PRIx64 " size %#" PRIx64 "\n", this, offset, size);
+
+  canary_.Assert();
+
+  fbl::AllocChecker ac;
+  // Slices just need the slice option and default alloc flags since they will propagate any
+  // operation up to a parent and use their options and alloc flags.
+  auto slice = fbl::AdoptRef<VmCowPages>(
+      new (&ac) VmCowPages(hierarchy_state_ptr_, kSlice, PMM_ALLOC_FLAG_ANY, size, nullptr));
+  if (!ac.check()) {
+    return ZX_ERR_NO_MEMORY;
+  }
+
+  Guard<Mutex> guard{&lock_};
+  AssertHeld(slice->lock_);
+
+  slice->parent_offset_ = offset;
+  slice->parent_limit_ = size;
+  // As our slice must be in range of the parent it is impossible to have the accumulated parent
+  // offset overflow.
+  slice->root_parent_offset_ = CheckedAdd(offset, slice->root_parent_offset_);
+  CheckedAdd(slice->root_parent_offset_, size);
+
+  // Currently rely on VmObjectPaged code to setup the hierarchy and call
+  // InitializeOriginalParentLocked etc. This will be changed once the VmObjectPaged hierarchy is
+  // simplified.
+
+  *cow_slice = slice;
+  return ZX_OK;
+}
+
+zx_status_t VmCowPages::CreateClone(uint64_t offset, uint64_t size,
+                                    fbl::RefPtr<VmCowPages>* cow_child) {
+  LTRACEF("vmo %p offset %#" PRIx64 " size %#" PRIx64 "\n", this, offset, size);
+
+  canary_.Assert();
+
+  // There are two reasons for declaring/allocating the clones outside of the vmo's lock. First,
+  // the dtor might require taking the lock, so we need to ensure that it isn't called until
+  // after the lock is released. Second, diagnostics code makes calls into vmos while holding
+  // the global vmo lock. Since the VmObject ctor takes the global lock, we can't construct
+  // any vmos under any vmo lock.
+  fbl::AllocChecker ac;
+  auto cow_pages = fbl::AdoptRef<VmCowPages>(
+      new (&ac) VmCowPages(hierarchy_state_ptr_, 0, pmm_alloc_flags_, size, nullptr));
+  if (!ac.check()) {
+    return ZX_ERR_NO_MEMORY;
+  }
+
+  Guard<Mutex> guard{&lock_};
+  AssertHeld(cow_pages->lock_);
+
+  bool overflow;
+  overflow = add_overflow(offset, root_parent_offset_, &cow_pages->root_parent_offset_);
+  if (overflow) {
+    return ZX_ERR_INVALID_ARGS;
+  }
+  uint64_t temp;
+  overflow = add_overflow(cow_pages->root_parent_offset_, size, &temp);
+  if (overflow) {
+    return ZX_ERR_INVALID_ARGS;
+  }
+
+  cow_pages->parent_offset_ = offset;
+  if (offset > size_) {
+    cow_pages->parent_limit_ = 0;
+  } else {
+    cow_pages->parent_limit_ = ktl::min(size, size_ - offset);
+  }
+
+  *cow_child = ktl::move(cow_pages);
+  return ZX_OK;
+}
+
+zx_status_t VmCowPages::CreateHidden(fbl::RefPtr<VmCowPages>* hidden_cow) {
+  fbl::AllocChecker ac;
+  auto cow_pages = fbl::AdoptRef<VmCowPages>(
+      new (&ac) VmCowPages(hierarchy_state_ptr_, kHidden, pmm_alloc_flags_, 0, nullptr));
+  if (!ac.check()) {
+    return ZX_ERR_NO_MEMORY;
+  }
+  *hidden_cow = ktl::move(cow_pages);
+  return ZX_OK;
+}
+
+void VmCowPages::RemoveChildLocked(VmCowPages* removed) {
+  canary_.Assert();
+
+  AssertHeld(removed->lock_);
+  removed->parent_.reset();
+
+  if (!is_hidden()) {
+    DropChildLocked(removed);
+    return;
+  }
+
+  // Hidden vmos always have 0 or 2 children, but we can't be here with 0 children.
+  DEBUG_ASSERT(children_list_len_ == 2);
+  bool removed_left = &left_child_locked() == removed;
+
+  DropChildLocked(removed);
+
+  VmCowPages* child = &children_list_.front();
+  DEBUG_ASSERT(child);
+
+  MergeContentWithChildLocked(removed, removed_left);
+
+  // The child which removed itself and led to the invocation should have a reference
+  // to us, in addition to child.parent_ which we are about to clear.
+  DEBUG_ASSERT(ref_count_debug() >= 2);
+
+  AssertHeld(child->lock_);
+  if (child->page_attribution_user_id_ != page_attribution_user_id_) {
+    // If the attribution user id of this vmo doesn't match that of its remaining child,
+    // then the vmo with the matching attribution user id  was just closed. In that case, we
+    // need to reattribute the pages of any ancestor hidden vmos to vmos that still exist.
+    //
+    // The syscall API doesn't specify how pages are to be attributed among a group of COW
+    // clones. One option is to pick a remaining vmo 'arbitrarily' and attribute everything to
+    // that vmo. However, it seems fairer to reattribute each remaining hidden vmo with
+    // its child whose user id doesn't match the vmo that was just closed. So walk up the
+    // clone chain and attribute each hidden vmo to the vmo we didn't just walk through.
+    auto cur = this;
+    AssertHeld(cur->lock_);
+    uint64_t user_id_to_skip = page_attribution_user_id_;
+    while (cur->parent_ != nullptr) {
+      DEBUG_ASSERT(cur->parent_->is_hidden());
+      auto parent = cur->parent_.get();
+      AssertHeld(parent->lock_);
+
+      if (parent->page_attribution_user_id_ == page_attribution_user_id_) {
+        uint64_t new_user_id = parent->left_child_locked().page_attribution_user_id_;
+        if (new_user_id == user_id_to_skip) {
+          new_user_id = parent->right_child_locked().page_attribution_user_id_;
+        }
+        // Although user IDs can be unset for VMOs that do not have a dispatcher, copy-on-write
+        // VMOs always have user level dispatchers, and should have a valid user-id set, hence we
+        // should never end up re-attributing a hidden parent with an unset id.
+        DEBUG_ASSERT(new_user_id != 0);
+        // The 'if' above should mean that the new_user_id isn't the ID we are trying to remove
+        // and isn't one we just used. For this to fail we either need a corrupt VMO hierarchy, or
+        // to have labeled two leaf nodes with the same user_id, which would also be incorrect as
+        // leaf nodes have unique dispatchers and hence unique ids.
+        DEBUG_ASSERT(new_user_id != page_attribution_user_id_ && new_user_id != user_id_to_skip);
+        parent->page_attribution_user_id_ = new_user_id;
+        user_id_to_skip = new_user_id;
+
+        cur = parent;
+      } else {
+        break;
+      }
+    }
+  }
+
+  // Drop the child from our list, but don't recurse back into this function. Then
+  // remove ourselves from the clone tree.
+  DropChildLocked(child);
+  if (parent_) {
+    AssertHeld(parent_->lock_ref());
+    parent_->ReplaceChildLocked(this, child);
+  }
+  child->parent_ = ktl::move(parent_);
+}
+
+void VmCowPages::MergeContentWithChildLocked(VmCowPages* removed, bool removed_left) {
+  DEBUG_ASSERT(children_list_len_ == 1);
+  VmCowPages& child = children_list_.front();
+  AssertHeld(child.lock_);
+  AssertHeld(removed->lock_);
+
+  list_node freed_pages;
+  list_initialize(&freed_pages);
+  BatchPQRemove page_remover(&freed_pages);
+
+  const uint64_t visibility_start_offset = child.parent_offset_ + child.parent_start_limit_;
+  const uint64_t merge_start_offset = child.parent_offset_;
+  const uint64_t merge_end_offset = child.parent_offset_ + child.parent_limit_;
+
+  // Hidden parents are not supposed to have page sources, but we assert it here anyway because a
+  // page source would make the way we move pages between objects incorrect, as we would break any
+  // potential back links.
+  DEBUG_ASSERT(!page_source_);
+
+  page_list_.RemovePages(page_remover.RemovePagesCallback(), 0, visibility_start_offset);
+  page_list_.RemovePages(page_remover.RemovePagesCallback(), merge_end_offset,
+                         VmPageList::MAX_SIZE);
+
+  if (child.parent_offset_ + child.parent_limit_ > parent_limit_) {
+    // Update the child's parent limit to ensure that it won't be able to see more
+    // of its new parent than this hidden vmo was able to see.
+    if (parent_limit_ < child.parent_offset_) {
+      child.parent_limit_ = 0;
+      child.parent_start_limit_ = 0;
+    } else {
+      child.parent_limit_ = parent_limit_ - child.parent_offset_;
+      child.parent_start_limit_ = ktl::min(child.parent_start_limit_, child.parent_limit_);
+    }
+  } else {
+    // The child will be able to see less of its new parent than this hidden vmo was
+    // able to see, so release any parent pages in that range.
+    ReleaseCowParentPagesLocked(merge_end_offset, parent_limit_, &page_remover);
+  }
+
+  if (removed->parent_offset_ + removed->parent_start_limit_ < visibility_start_offset) {
+    // If the removed former child has a smaller offset, then there are retained
+    // ancestor pages that will no longer be visible and thus should be freed.
+    ReleaseCowParentPagesLocked(removed->parent_offset_ + removed->parent_start_limit_,
+                                visibility_start_offset, &page_remover);
+  }
+
+  // Adjust the child's offset so it will still see the correct range.
+  bool overflow = add_overflow(parent_offset_, child.parent_offset_, &child.parent_offset_);
+  // Overflow here means that something went wrong when setting up parent limits.
+  DEBUG_ASSERT(!overflow);
+
+  if (child.is_hidden()) {
+    // After the merge, either |child| can't see anything in parent (in which case
+    // the parent limits could be anything), or |child|'s first visible offset will be
+    // at least as large as |this|'s first visible offset.
+    DEBUG_ASSERT(child.parent_start_limit_ == child.parent_limit_ ||
+                 parent_offset_ + parent_start_limit_ <=
+                     child.parent_offset_ + child.parent_start_limit_);
+  } else {
+    // non-hidden vmos should always have zero parent_start_limit_
+    DEBUG_ASSERT(child.parent_start_limit_ == 0);
+  }
+
+  // As we are moving pages between objects we need to make sure no backlinks are broken. We know
+  // there's no page_source_ and hence no pages will be in the pager_backed queue, but we could
+  // have pages in the unswappable_zero_forked queue. We do know that pages in this queue cannot
+  // have been pinned, so we can just move (or re-move potentially) any page that is not pinned
+  // into the unswappable queue.
+  {
+    PageQueues* pq = pmm_page_queues();
+    Guard<SpinLock, IrqSave> guard{pq->get_lock()};
+    page_list_.ForEveryPage([pq](auto* p, uint64_t off) {
+      if (p->IsPage()) {
+        vm_page_t* page = p->Page();
+        if (page->object.pin_count == 0) {
+          AssertHeld<Lock<SpinLock>, IrqSave>(*pq->get_lock());
+          pq->MoveToUnswappableLocked(page);
+        }
+      }
+      return ZX_ERR_NEXT;
+    });
+  }
+
+  // At this point, we need to merge |this|'s page list and |child|'s page list.
+  //
+  // In general, COW clones are expected to share most of their pages (i.e. to fork a relatively
+  // small number of pages). Because of this, it is preferable to do work proportional to the
+  // number of pages which were forked into |removed|. However, there are a few things that can
+  // prevent this:
+  //   - If |child|'s offset is non-zero then the offsets of all of |this|'s pages will
+  //     need to be updated when they are merged into |child|.
+  //   - If there has been a call to ReleaseCowParentPagesLocked which was not able to
+  //     update the parent limits, then there can exist pages in this vmo's page list
+  //     which are not visible to |child| but can't be easily freed based on its parent
+  //     limits. Finding these pages requires examining the split bits of all pages.
+  //   - If |child| is hidden, then there can exist pages in this vmo which were split into
+  //     |child|'s subtree and then migrated out of |child|. Those pages need to be freed, and
+  //     the simplest way to find those pages is to examine the split bits.
+  bool fast_merge = merge_start_offset == 0 && !partial_cow_release_ && !child.is_hidden();
+
+  if (fast_merge) {
+    // Only leaf vmos can be directly removed, so this must always be true. This guarantees
+    // that there are no pages that were split into |removed| that have since been migrated
+    // to its children.
+    DEBUG_ASSERT(!removed->is_hidden());
+
+    // Before merging, find any pages that are present in both |removed| and |this|. Those
+    // pages are visibile to |child| but haven't been written to through |child|, so
+    // their split bits need to be cleared. Note that ::ReleaseCowParentPagesLocked ensures
+    // that pages outside of the parent limit range won't have their split bits set.
+    removed->page_list_.ForEveryPageInRange(
+        [removed_offset = removed->parent_offset_, this](auto* page, uint64_t offset) {
+          AssertHeld(lock_);
+          if (page->IsMarker()) {
+            return ZX_ERR_NEXT;
+          }
+          VmPageOrMarker* page_or_mark = page_list_.Lookup(offset + removed_offset);
+          if (page_or_mark && page_or_mark->IsPage()) {
+            vm_page* p_page = page_or_mark->Page();
+            // The page is definitely forked into |removed|, but
+            // shouldn't be forked twice.
+            DEBUG_ASSERT(p_page->object.cow_left_split ^ p_page->object.cow_right_split);
+            p_page->object.cow_left_split = 0;
+            p_page->object.cow_right_split = 0;
+          }
+          return ZX_ERR_NEXT;
+        },
+        removed->parent_start_limit_, removed->parent_limit_);
+
+    list_node covered_pages;
+    list_initialize(&covered_pages);
+    BatchPQRemove covered_remover(&covered_pages);
+
+    // Now merge |child|'s pages into |this|, overwriting any pages present in |this|, and
+    // then move that list to |child|.
+
+    child.page_list_.MergeOnto(page_list_,
+                               [&covered_remover](vm_page_t* p) { covered_remover.Push(p); });
+    child.page_list_ = ktl::move(page_list_);
+
+    vm_page_t* p;
+    covered_remover.Flush();
+    list_for_every_entry (&covered_pages, p, vm_page_t, queue_node) {
+      // The page was already present in |child|, so it should be split at least
+      // once. And being split twice is obviously bad.
+      ASSERT(p->object.cow_left_split ^ p->object.cow_right_split);
+      ASSERT(p->object.pin_count == 0);
+    }
+    list_splice_after(&covered_pages, &freed_pages);
+  } else {
+    // Merge our page list into the child page list and update all the necessary metadata.
+    child.page_list_.MergeFrom(
+        page_list_, merge_start_offset, merge_end_offset,
+        [&page_remover](vm_page* page, uint64_t offset) { page_remover.Push(page); },
+        [&page_remover, removed_left](VmPageOrMarker* page_or_marker, uint64_t offset) {
+          DEBUG_ASSERT(page_or_marker->IsPage());
+          vm_page_t* page = page_or_marker->Page();
+          DEBUG_ASSERT(page->object.pin_count == 0);
+
+          if (removed_left ? page->object.cow_right_split : page->object.cow_left_split) {
+            // This happens when the pages was already migrated into child but then
+            // was migrated further into child's descendants. The page can be freed.
+            page = page_or_marker->ReleasePage();
+            page_remover.Push(page);
+          } else {
+            // Since we recursively fork on write, if the child doesn't have the
+            // page, then neither of its children do.
+            page->object.cow_left_split = 0;
+            page->object.cow_right_split = 0;
+          }
+        });
+  }
+
+  page_remover.Flush();
+  if (!list_is_empty(&freed_pages)) {
+    pmm_free(&freed_pages);
+  }
+}
+
+void VmCowPages::DumpLocked(uint depth, bool verbose) const {
+  canary_.Assert();
+
+  size_t count = 0;
+  page_list_.ForEveryPage([&count](const auto* p, uint64_t) {
+    if (p->IsPage()) {
+      count++;
+    }
+    return ZX_ERR_NEXT;
+  });
+
+  for (uint i = 0; i < depth; ++i) {
+    printf("  ");
+  }
+  printf("cow_pages %p size %#" PRIx64 " offset %#" PRIx64 " start limit %#" PRIx64
+         " limit %#" PRIx64 " pages %zu ref %d parent %p\n",
+         this, size_, parent_offset_, parent_start_limit_, parent_limit_, count, ref_count_debug(),
+         parent_.get());
+
+  if (page_source_) {
+    for (uint i = 0; i < depth + 1; ++i) {
+      printf("  ");
+    }
+    page_source_->Dump();
+  }
+
+  if (verbose) {
+    auto f = [depth](const auto* p, uint64_t offset) {
+      for (uint i = 0; i < depth + 1; ++i) {
+        printf("  ");
+      }
+      if (p->IsMarker()) {
+        printf("offset %#" PRIx64 " zero page marker\n", offset);
+      } else {
+        vm_page_t* page = p->Page();
+        printf("offset %#" PRIx64 " page %p paddr %#" PRIxPTR "(%c%c)\n", offset, page,
+               page->paddr(), page->object.cow_left_split ? 'L' : '.',
+               page->object.cow_right_split ? 'R' : '.');
+      }
+      return ZX_ERR_NEXT;
+    };
+    page_list_.ForEveryPage(f);
+  }
+}
+
+size_t VmCowPages::AttributedPagesInRangeLocked(uint64_t offset, uint64_t len) const {
+  canary_.Assert();
+
+  if (is_hidden()) {
+    return 0;
+  }
+
+  size_t page_count = 0;
+  // TODO: Decide who pages should actually be attribtued to.
+  page_list_.ForEveryPageAndGapInRange(
+      [&page_count](const auto* p, uint64_t off) {
+        if (p->IsPage()) {
+          page_count++;
+        }
+        return ZX_ERR_NEXT;
+      },
+      [this, &page_count](uint64_t gap_start, uint64_t gap_end) {
+        AssertHeld(lock_);
+
+        // If there's no parent, there's no pages to care about. If there is a non-hidden
+        // parent, then that owns any pages in the gap, not us.
+        if (!parent_ || !parent_->is_hidden()) {
+          return ZX_ERR_NEXT;
+        }
+
+        // Count any ancestor pages that should be attributed to us in the range. Ideally the whole
+        // range gets processed in one attempt, but in order to prevent unbounded stack growth with
+        // recursion we instead process partial ranges and recalculate the intermediate results.
+        // As a result instead of being O(n) in the number of committed pages it could
+        // pathologically become O(nd) where d is our depth in the vmo hierarchy.
+        uint64_t off = gap_start;
+        while (off < parent_limit_ && off < gap_end) {
+          uint64_t local_count = 0;
+          uint64_t attributed =
+              CountAttributedAncestorPagesLocked(off, gap_end - off, &local_count);
+          // |CountAttributedAncestorPagesLocked| guarantees that it will make progress.
+          DEBUG_ASSERT(attributed > 0);
+          off += attributed;
+          page_count += local_count;
+        }
+
+        return ZX_ERR_NEXT;
+      },
+      offset, offset + len);
+
+  return page_count;
+}
+
+uint64_t VmCowPages::CountAttributedAncestorPagesLocked(uint64_t offset, uint64_t size,
+                                                        uint64_t* count) const TA_REQ(lock_) {
+  // We need to walk up the ancestor chain to see if there are any pages that should be attributed
+  // to this vmo. We attempt operate on the entire range given to us but should we need to query
+  // the next parent for a range we trim our operating range. Trimming the range is necessary as
+  // we cannot recurse and otherwise have no way to remember where we were up to after processing
+  // the range in the parent. The solution then is to return all the way back up to the caller with
+  // a partial range and then effectively recompute the meta data at the point we were up to.
+
+  // Note that we cannot stop just because the page_attribution_user_id_ changes. This is because
+  // there might still be a forked page at the offset in question which should be attributed to
+  // this vmo. Whenever the attribution user id changes while walking up the ancestors, we need
+  // to determine if there is a 'closer' vmo in the sibling subtree to which the offset in
+  // question can be attributed, or if it should still be attributed to the current vmo.
+
+  DEBUG_ASSERT(offset < parent_limit_);
+  const VmCowPages* cur = this;
+  AssertHeld(cur->lock_);
+  uint64_t cur_offset = offset;
+  uint64_t cur_size = size;
+  // Count of how many pages we attributed as being owned by this vmo.
+  uint64_t attributed_ours = 0;
+  // Count how much we've processed. This is needed to remember when we iterate up the parent list
+  // at an offset.
+  uint64_t attributed = 0;
+  while (cur_offset < cur->parent_limit_) {
+    // For cur->parent_limit_ to be non-zero, it must have a parent.
+    DEBUG_ASSERT(cur->parent_);
+
+    const auto parent = cur->parent_.get();
+    AssertHeld(parent->lock_);
+    uint64_t parent_offset;
+    bool overflowed = add_overflow(cur->parent_offset_, cur_offset, &parent_offset);
+    DEBUG_ASSERT(!overflowed);                     // vmo creation should have failed
+    DEBUG_ASSERT(parent_offset <= parent->size_);  // parent_limit_ prevents this
+
+    const bool left = cur == &parent->left_child_locked();
+    const auto& sib = left ? parent->right_child_locked() : parent->left_child_locked();
+
+    // Work out how much of the desired size is actually visible to us in the parent, we just use
+    // this to walk the correct amount of the page_list_
+    const uint64_t parent_size = ktl::min(cur_size, cur->parent_limit_ - cur_offset);
+
+    // By default we expect to process the entire range, hence our next_size is 0. Should we need to
+    // iterate up the stack then these will be set by one of the callbacks.
+    uint64_t next_parent_offset = parent_offset + cur_size;
+    uint64_t next_size = 0;
+    parent->page_list_.ForEveryPageAndGapInRange(
+        [&parent, &cur, &attributed_ours, &sib](const auto* p, uint64_t off) {
+          AssertHeld(cur->lock_);
+          AssertHeld(sib.lock_);
+          AssertHeld(parent->lock_);
+          if (p->IsMarker()) {
+            return ZX_ERR_NEXT;
+          }
+          vm_page* page = p->Page();
+          if (
+              // Page is explicitly owned by us
+              (parent->page_attribution_user_id_ == cur->page_attribution_user_id_) ||
+              // If page has already been split and we can see it, then we know
+              // the sibling subtree can't see the page and thus it should be
+              // attributed to this vmo.
+              (page->object.cow_left_split || page->object.cow_right_split) ||
+              // If the sibling cannot access this page then its ours, otherwise we know there's
+              // a vmo in the sibling subtree which is 'closer' to this offset, and to which we will
+              // attribute the page to.
+              !(sib.parent_offset_ + sib.parent_start_limit_ <= off &&
+                off < sib.parent_offset_ + sib.parent_limit_)) {
+            attributed_ours++;
+          }
+          return ZX_ERR_NEXT;
+        },
+        [&parent, &cur, &next_parent_offset, &next_size, &sib](uint64_t gap_start,
+                                                               uint64_t gap_end) {
+          // Process a gap in the parent VMO.
+          //
+          // A gap in the parent VMO doesn't necessarily mean there are no pages
+          // in this range: our parent's ancestors may have pages, so we need to
+          // walk up the tree to find out.
+          //
+          // We don't always need to walk the tree though: in this this gap, both this VMO
+          // and our sibling VMO will share the same set of ancestor pages. However, the
+          // pages will only be accounted to one of the two VMOs.
+          //
+          // If the parent page_attribution_user_id is the same as us, we need to
+          // keep walking up the tree to perform a more accurate count.
+          //
+          // If the parent page_attribution_user_id is our sibling, however, we
+          // can just ignore the overlapping range: pages may or may not exist in
+          // the range --- but either way, they would be accounted to our sibling.
+          // Instead, we need only walk up ranges not visible to our sibling.
+          AssertHeld(cur->lock_);
+          AssertHeld(sib.lock_);
+          AssertHeld(parent->lock_);
+          uint64_t gap_size = gap_end - gap_start;
+          if (parent->page_attribution_user_id_ == cur->page_attribution_user_id_) {
+            // don't need to consider siblings as we own this range, but we do need to
+            // keep looking up the stack to find any actual pages.
+            next_parent_offset = gap_start;
+            next_size = gap_size;
+            return ZX_ERR_STOP;
+          }
+          // For this entire range we know that the offset is visible to the current vmo, and there
+          // are no committed or migrated pages. We need to check though for what portion of this
+          // range we should attribute to the sibling. Any range that we can attribute to the
+          // sibling we can skip, otherwise we have to keep looking up the stack to see if there are
+          // any pages that could be attributed to us.
+          uint64_t sib_offset, sib_len;
+          if (!GetIntersect(gap_start, gap_size, sib.parent_offset_ + sib.parent_start_limit_,
+                            sib.parent_limit_ - sib.parent_start_limit_, &sib_offset, &sib_len)) {
+            // No sibling ownership, so need to look at the whole range in the parent to find any
+            // pages.
+            next_parent_offset = gap_start;
+            next_size = gap_size;
+            return ZX_ERR_STOP;
+          }
+          // If the whole range is owned by the sibling, any pages that might be in
+          // it won't be accounted to us anyway. Skip the segment.
+          if (sib_len == gap_size) {
+            DEBUG_ASSERT(sib_offset == gap_start);
+            return ZX_ERR_NEXT;
+          }
+
+          // Otherwise, inspect the range not visible to our sibling.
+          if (sib_offset == gap_start) {
+            next_parent_offset = sib_offset + sib_len;
+            next_size = gap_end - next_parent_offset;
+          } else {
+            next_parent_offset = gap_start;
+            next_size = sib_offset - gap_start;
+          }
+          return ZX_ERR_STOP;
+        },
+        parent_offset, parent_offset + parent_size);
+    if (next_size == 0) {
+      // If next_size wasn't set then we don't need to keep looking up the chain as we successfully
+      // looked at the entire range.
+      break;
+    }
+    // Count anything up to the next starting point as being processed.
+    attributed += next_parent_offset - parent_offset;
+    // Size should have been reduced by at least the amount we just attributed
+    DEBUG_ASSERT(next_size <= cur_size &&
+                 cur_size - next_size >= next_parent_offset - parent_offset);
+
+    cur = parent;
+    cur_offset = next_parent_offset;
+    cur_size = next_size;
+  }
+  // Exiting the loop means we either ceased finding a relevant parent for the range, or we were
+  // able to process the entire range without needing to look up to a parent, in either case we
+  // can consider the entire range as attributed.
+  //
+  // The cur_size can be larger than the value of parent_size from the last loop iteration. This is
+  // fine as that range we trivially know has zero pages in it, and therefore has zero pages to
+  // determine attributions off.
+  attributed += cur_size;
+
+  *count = attributed_ours;
+  return attributed;
+}
+
+zx_status_t VmCowPages::AddPageLocked(VmPageOrMarker* p, uint64_t offset, bool do_range_update) {
+  canary_.Assert();
+
+  if (p->IsPage()) {
+    LTRACEF("vmo %p, offset %#" PRIx64 ", page %p (%#" PRIxPTR ")\n", this, offset, p->Page(),
+            p->Page()->paddr());
+  } else {
+    DEBUG_ASSERT(p->IsMarker());
+    LTRACEF("vmo %p, offset %#" PRIx64 ", marker\n", this, offset);
+  }
+
+  if (offset >= size_) {
+    return ZX_ERR_OUT_OF_RANGE;
+  }
+
+  VmPageOrMarker* page = page_list_.LookupOrAllocate(offset);
+  if (!page) {
+    return ZX_ERR_NO_MEMORY;
+  }
+  // Only fail on pages, we overwrite markers and empty slots.
+  if (page->IsPage()) {
+    return ZX_ERR_ALREADY_EXISTS;
+  }
+  // If this is actually a real page, we need to place it into the appropriate queue.
+  if (p->IsPage()) {
+    vm_page_t* page = p->Page();
+    DEBUG_ASSERT(page->state() == VM_PAGE_STATE_OBJECT);
+    DEBUG_ASSERT(page->object.pin_count == 0);
+    SetNotWired(page, offset);
+  }
+  *page = ktl::move(*p);
+
+  if (do_range_update) {
+    // other mappings may have covered this offset into the vmo, so unmap those ranges
+    RangeChangeUpdateLocked(offset, PAGE_SIZE, RangeChangeOp::Unmap);
+  }
+
+  return ZX_OK;
+}
+
+zx_status_t VmCowPages::AddNewPageLocked(uint64_t offset, vm_page_t* page, bool zero,
+                                         bool do_range_update) {
+  canary_.Assert();
+
+  DEBUG_ASSERT(IS_PAGE_ALIGNED(offset));
+
+  InitializeVmPage(page);
+  if (zero) {
+    ZeroPage(page);
+  }
+
+  VmPageOrMarker p = VmPageOrMarker::Page(page);
+  zx_status_t status = AddPageLocked(&p, offset, false);
+
+  if (status != ZX_OK) {
+    // Release the page from 'p', as we are returning failure 'page' is still owned by the caller.
+    p.ReleasePage();
+  }
+  return status;
+}
+
+zx_status_t VmCowPages::AddNewPagesLocked(uint64_t start_offset, list_node_t* pages, bool zero,
+                                          bool do_range_update) {
+  canary_.Assert();
+
+  DEBUG_ASSERT(IS_PAGE_ALIGNED(start_offset));
+
+  uint64_t offset = start_offset;
+  while (vm_page_t* p = list_remove_head_type(pages, vm_page_t, queue_node)) {
+    // Defer the range change update by passing false as we will do it in bulk at the end if needed.
+    zx_status_t status = AddNewPageLocked(offset, p, zero, false);
+    if (status != ZX_OK) {
+      // Put the page back on the list so that someone owns it and it'll get free'd.
+      list_add_head(pages, &p->queue_node);
+      // Decommit any pages we already placed.
+      if (offset > start_offset) {
+        DecommitRangeLocked(start_offset, offset - start_offset);
+      }
+
+      // Free all the pages back as we had ownership of them.
+      pmm_free(pages);
+      return status;
+    }
+    offset += PAGE_SIZE;
+  }
+
+  if (do_range_update) {
+    // other mappings may have covered this offset into the vmo, so unmap those ranges
+    RangeChangeUpdateLocked(start_offset, offset - start_offset, RangeChangeOp::Unmap);
+  }
+  return ZX_OK;
+}
+
+bool VmCowPages::IsUniAccessibleLocked(vm_page_t* page, uint64_t offset) const {
+  DEBUG_ASSERT(page_list_.Lookup(offset)->Page() == page);
+
+  if (page->object.cow_right_split || page->object.cow_left_split) {
+    return true;
+  }
+
+  if (offset < left_child_locked().parent_offset_ + left_child_locked().parent_start_limit_ ||
+      offset >= left_child_locked().parent_offset_ + left_child_locked().parent_limit_) {
+    return true;
+  }
+
+  if (offset < right_child_locked().parent_offset_ + right_child_locked().parent_start_limit_ ||
+      offset >= right_child_locked().parent_offset_ + right_child_locked().parent_limit_) {
+    return true;
+  }
+
+  return false;
+}
+
+vm_page_t* VmCowPages::CloneCowPageLocked(uint64_t offset, list_node_t* free_list,
+                                          VmCowPages* page_owner, vm_page_t* page,
+                                          uint64_t owner_offset) {
+  DEBUG_ASSERT(page != vm_get_zero_page());
+  DEBUG_ASSERT(parent_);
+
+  // To avoid the need for rollback logic on allocation failure, we start the forking
+  // process from the root-most vmo and work our way towards the leaf vmo. This allows
+  // us to maintain the hidden vmo invariants through the whole operation, so that we
+  // can stop at any point.
+  //
+  // To set this up, walk from the leaf to |page_owner|, and keep track of the
+  // path via |stack_.dir_flag|.
+  VmCowPages* cur = this;
+  do {
+    AssertHeld(cur->lock_);
+    VmCowPages* next = cur->parent_.get();
+    // We can't make COW clones of physical vmos, so this can only happen if we
+    // somehow don't find |page_owner| in the ancestor chain.
+    DEBUG_ASSERT(next);
+    AssertHeld(next->lock_);
+
+    next->stack_.dir_flag = &next->left_child_locked() == cur ? StackDir::Left : StackDir::Right;
+    if (next->stack_.dir_flag == StackDir::Right) {
+      DEBUG_ASSERT(&next->right_child_locked() == cur);
+    }
+    cur = next;
+  } while (cur != page_owner);
+  uint64_t cur_offset = owner_offset;
+
+  // |target_page| is the page we're considering for migration. Cache it
+  // across loop iterations.
+  vm_page_t* target_page = page;
+
+  bool alloc_failure = false;
+
+  // As long as we're simply migrating |page|, there's no need to update any vmo mappings, since
+  // that means the other side of the clone tree has already covered |page| and the current side
+  // of the clone tree will still see |page|. As soon as we insert a new page, we'll need to
+  // update all mappings at or below that level.
+  bool skip_range_update = true;
+  do {
+    // |target_page| is always located at in |cur| at |cur_offset| at the start of the loop.
+    VmCowPages* target_page_owner = cur;
+    AssertHeld(target_page_owner->lock_);
+    uint64_t target_page_offset = cur_offset;
+
+    cur = cur->stack_.dir_flag == StackDir::Left ? &cur->left_child_locked()
+                                                 : &cur->right_child_locked();
+    DEBUG_ASSERT(cur_offset >= cur->parent_offset_);
+    cur_offset -= cur->parent_offset_;
+
+    if (target_page_owner->IsUniAccessibleLocked(target_page, target_page_offset)) {
+      // If the page we're covering in the parent is uni-accessible, then we
+      // can directly move the page.
+
+      // Assert that we're not trying to split the page the same direction two times. Either
+      // some tracking state got corrupted or a page in the subtree we're trying to
+      // migrate to got improperly migrated/freed. If we did this migration, then the
+      // opposite subtree would lose access to this page.
+      DEBUG_ASSERT(!(target_page_owner->stack_.dir_flag == StackDir::Left &&
+                     target_page->object.cow_left_split));
+      DEBUG_ASSERT(!(target_page_owner->stack_.dir_flag == StackDir::Right &&
+                     target_page->object.cow_right_split));
+
+      target_page->object.cow_left_split = 0;
+      target_page->object.cow_right_split = 0;
+      VmPageOrMarker removed = target_page_owner->page_list_.RemovePage(target_page_offset);
+      vm_page* removed_page = removed.ReleasePage();
+      pmm_page_queues()->Remove(removed_page);
+      DEBUG_ASSERT(removed_page == target_page);
+    } else {
+      // Otherwise we need to fork the page.
+      vm_page_t* cover_page;
+      alloc_failure = !AllocateCopyPage(pmm_alloc_flags_, page->paddr(), free_list, &cover_page);
+      if (unlikely(alloc_failure)) {
+        // TODO: plumb through PageRequest once anonymous page source is implemented.
+        break;
+      }
+
+      // We're going to cover target_page with cover_page, so set appropriate split bit.
+      if (target_page_owner->stack_.dir_flag == StackDir::Left) {
+        target_page->object.cow_left_split = 1;
+        DEBUG_ASSERT(target_page->object.cow_right_split == 0);
+      } else {
+        target_page->object.cow_right_split = 1;
+        DEBUG_ASSERT(target_page->object.cow_left_split == 0);
+      }
+      target_page = cover_page;
+
+      skip_range_update = false;
+    }
+
+    // Skip the automatic range update so we can do it ourselves more efficiently.
+    VmPageOrMarker add_page = VmPageOrMarker::Page(target_page);
+    zx_status_t status = cur->AddPageLocked(&add_page, cur_offset, false);
+    DEBUG_ASSERT(status == ZX_OK);
+
+    if (!skip_range_update) {
+      if (cur != this) {
+        // In this case, cur is a hidden vmo and has no direct mappings. Also, its
+        // descendents along the page stack will be dealt with by subsequent iterations
+        // of this loop. That means that any mappings that need to be touched now are
+        // owned by the children on the opposite side of stack_.dir_flag.
+        VmCowPages& other = cur->stack_.dir_flag == StackDir::Left ? cur->right_child_locked()
+                                                                   : cur->left_child_locked();
+        AssertHeld(other.lock_);
+        RangeChangeList list;
+        other.RangeChangeUpdateFromParentLocked(cur_offset, PAGE_SIZE, &list);
+        RangeChangeUpdateListLocked(&list, RangeChangeOp::Unmap);
+      } else {
+        // In this case, cur is the last vmo being changed, so update its whole subtree.
+        DEBUG_ASSERT(offset == cur_offset);
+        RangeChangeUpdateLocked(offset, PAGE_SIZE, RangeChangeOp::Unmap);
+      }
+    }
+  } while (cur != this);
+  DEBUG_ASSERT(alloc_failure || cur_offset == offset);
+
+  if (unlikely(alloc_failure)) {
+    return nullptr;
+  } else {
+    return target_page;
+  }
+}
+
+zx_status_t VmCowPages::CloneCowPageAsZeroLocked(uint64_t offset, list_node_t* free_list,
+                                                 VmCowPages* page_owner, vm_page_t* page,
+                                                 uint64_t owner_offset) {
+  DEBUG_ASSERT(parent_);
+
+  // Ensure we have a slot as we'll need it later.
+  VmPageOrMarker* slot = page_list_.LookupOrAllocate(offset);
+
+  if (!slot) {
+    return ZX_ERR_NO_MEMORY;
+  }
+
+  // We cannot be forking a page to here if there's already something.
+  DEBUG_ASSERT(slot->IsEmpty());
+
+  // Need to make sure the page is duplicated as far as our parent. Then we can pretend
+  // that we have forked it into us by setting the marker.
+  AssertHeld(parent_->lock_);
+  if (page_owner != parent_.get()) {
+    // Do not pass free_list here as this wants a free_list to allocate from, where as our free_list
+    // is for placing on old objects.
+    page = parent_->CloneCowPageLocked(offset + parent_offset_, nullptr, page_owner, page,
+                                       owner_offset);
+    if (page == nullptr) {
+      return ZX_ERR_NO_MEMORY;
+    }
+  }
+
+  bool left = this == &(parent_->left_child_locked());
+  // Page is in our parent. Check if its uni accessible, if so we can free it.
+  if (parent_->IsUniAccessibleLocked(page, offset + parent_offset_)) {
+    // Make sure we didn't already merge the page in this direction.
+    DEBUG_ASSERT(!(left && page->object.cow_left_split));
+    DEBUG_ASSERT(!(!left && page->object.cow_right_split));
+    vm_page* removed = parent_->page_list_.RemovePage(offset + parent_offset_).ReleasePage();
+    DEBUG_ASSERT(removed == page);
+    pmm_page_queues()->Remove(removed);
+    DEBUG_ASSERT(!list_in_list(&removed->queue_node));
+    list_add_tail(free_list, &removed->queue_node);
+  } else {
+    if (left) {
+      page->object.cow_left_split = 1;
+    } else {
+      page->object.cow_right_split = 1;
+    }
+  }
+  // Insert the zero marker.
+  *slot = VmPageOrMarker::Marker();
+  return ZX_OK;
+}
+
+VmPageOrMarker* VmCowPages::FindInitialPageContentLocked(uint64_t offset, VmCowPages** owner_out,
+                                                         uint64_t* owner_offset_out) {
+  // Search up the clone chain for any committed pages. cur_offset is the offset
+  // into cur we care about. The loop terminates either when that offset contains
+  // a committed page or when that offset can't reach into the parent.
+  VmPageOrMarker* page = nullptr;
+  VmCowPages* cur = this;
+  AssertHeld(cur->lock_);
+  uint64_t cur_offset = offset;
+  while (cur_offset < cur->parent_limit_) {
+    VmCowPages* parent = cur->parent_.get();
+    // If there's no parent, then parent_limit_ is 0 and we'll never enter the loop
+    DEBUG_ASSERT(parent);
+    AssertHeld(parent->lock_ref());
+
+    uint64_t parent_offset;
+    bool overflowed = add_overflow(cur->parent_offset_, cur_offset, &parent_offset);
+    ASSERT(!overflowed);
+    if (parent_offset >= parent->size_) {
+      // The offset is off the end of the parent, so cur is the VmObjectPaged
+      // which will provide the page.
+      break;
+    }
+
+    cur = parent;
+    cur_offset = parent_offset;
+    VmPageOrMarker* p = cur->page_list_.Lookup(parent_offset);
+    if (p && !p->IsEmpty()) {
+      page = p;
+      break;
+    }
+  }
+
+  *owner_out = cur;
+  *owner_offset_out = cur_offset;
+
+  return page;
+}
+
+void VmCowPages::UpdateOnAccessLocked(vm_page_t* page, uint64_t offset) {
+  // The only kinds of pages where there is anything to update on an access is pager backed pages.
+  // To that end we first want to determine, with certainty, that the provided page is in fact in
+  // the pager backed queue.
+
+  if (page == vm_get_zero_page()) {
+    return;
+  }
+  // Check if we have a page_source_. If we don't have one then none of our pages can be pager
+  // backed, so we can abort.
+  if (!page_source_) {
+    return;
+  }
+  // We know there is a page source and so most of the pages will be in the pager backed queue, with
+  // the exception of any pages that are pinned, those will be in the wired queue and so we need to
+  // skip them.
+  if (page->object.pin_count != 0) {
+    return;
+  }
+
+  // Currently there is a 1:1 correspondence between the VmObjectPaged hierarchy and us, so we can
+  // assume there is a backlink.
+  DEBUG_ASSERT(paged_ref_);
+
+  // These asserts are for sanity, the above checks should have caused us to abort if these aren't
+  // true.
+  DEBUG_ASSERT(page->object.get_object() == reinterpret_cast<void*>(paged_ref_));
+  DEBUG_ASSERT(page->object.get_page_offset() == offset);
+  // Although the page is already in the pager backed queue, this move causes it be moved to the
+  // front of the first queue, representing it was recently accessed.
+  pmm_page_queues()->MoveToPagerBacked(page, paged_ref_, offset);
+}
+
+// Looks up the page at the requested offset, faulting it in if requested and necessary.  If
+// this VMO has a parent and the requested page isn't found, the parent will be searched.
+//
+// |free_list|, if not NULL, is a list of allocated but unused vm_page_t that
+// this function may allocate from.  This function will need at most one entry,
+// and will not fail if |free_list| is a non-empty list, faulting in was requested,
+// and offset is in range.
+zx_status_t VmCowPages::GetPageLocked(uint64_t offset, uint pf_flags, list_node* free_list,
+                                      PageRequest* page_request, vm_page_t** const page_out,
+                                      paddr_t* const pa_out) {
+  canary_.Assert();
+  DEBUG_ASSERT(!is_hidden());
+
+  if (offset >= size_) {
+    return ZX_ERR_OUT_OF_RANGE;
+  }
+
+  offset = ROUNDDOWN(offset, PAGE_SIZE);
+
+  if (is_slice()) {
+    uint64_t parent_offset;
+    VmCowPages* parent = PagedParentOfSliceLocked(&parent_offset);
+    AssertHeld(parent->lock_);
+    return parent->GetPageLocked(offset + parent_offset, pf_flags, free_list, page_request,
+                                 page_out, pa_out);
+  }
+
+  VmPageOrMarker* page_or_mark = page_list_.Lookup(offset);
+  vm_page* p = nullptr;
+  VmCowPages* page_owner;
+  uint64_t owner_offset;
+  if (page_or_mark && page_or_mark->IsPage()) {
+    // This is the common case where we have the page and don't need to do anything more, so
+    // return it straight away.
+    vm_page_t* p = page_or_mark->Page();
+    UpdateOnAccessLocked(p, offset);
+    if (page_out) {
+      *page_out = p;
+    }
+    if (pa_out) {
+      *pa_out = p->paddr();
+    }
+    return ZX_OK;
+  }
+
+  // Get content from parent if available, otherwise accept we are the owner of the yet to exist
+  // page.
+  if ((!page_or_mark || page_or_mark->IsEmpty()) && parent_) {
+    page_or_mark = FindInitialPageContentLocked(offset, &page_owner, &owner_offset);
+  } else {
+    page_owner = this;
+    owner_offset = offset;
+  }
+
+  // At this point we might not have an actual page, but we should at least have a notional owner.
+  DEBUG_ASSERT(page_owner);
+
+  __UNUSED char pf_string[5];
+  LTRACEF("vmo %p, offset %#" PRIx64 ", pf_flags %#x (%s)\n", this, offset, pf_flags,
+          vmm_pf_flags_to_string(pf_flags, pf_string));
+
+  // We need to turn this potential page or marker into a real vm_page_t. This means failing cases
+  // that we cannot handle, determining whether we can substitute the zero_page and potentially
+  // consulting a page_source.
+  if (page_or_mark && page_or_mark->IsPage()) {
+    p = page_or_mark->Page();
+  } else {
+    // If we don't have a real page and we're not sw or hw faulting in the page, return not found.
+    if ((pf_flags & VMM_PF_FLAG_FAULT_MASK) == 0) {
+      return ZX_ERR_NOT_FOUND;
+    }
+
+    // We need to get a real page as our initial content. At this point we are either starting from
+    // the zero page, or something supplied from a page source. The page source only fills in if we
+    // have a true absence of content.
+    if ((page_or_mark && page_or_mark->IsMarker()) || !page_owner->page_source_) {
+      // Either no relevant page source or this is a known marker, in which case the content is
+      // the zero page.
+      p = vm_get_zero_page();
+    } else {
+      AssertHeld(page_owner->lock_);
+      DEBUG_ASSERT(page_owner->paged_ref_);
+      AssertHeld(page_owner->paged_ref_->lock_ref());
+      VmoDebugInfo vmo_debug_info = {.vmo_ptr = reinterpret_cast<uintptr_t>(page_owner->paged_ref_),
+                                     .vmo_id = page_owner->paged_ref_->user_id_locked()};
+      zx_status_t status = page_owner->page_source_->GetPage(owner_offset, page_request,
+                                                             vmo_debug_info, &p, nullptr);
+      // Pager page sources will never synchronously return a page.
+      DEBUG_ASSERT(status != ZX_OK);
+
+      if (page_owner != this && status == ZX_ERR_NOT_FOUND) {
+        // The default behavior of clones of detached pager VMOs fault in zero
+        // pages instead of propagating the pager's fault.
+        // TODO: Add an arg to zx_vmo_create_child to optionally fault here.
+        p = vm_get_zero_page();
+      } else {
+        return status;
+      }
+    }
+  }
+
+  // If we made it this far we must have some valid vm_page in |p|. Although this may be the zero
+  // page, the rest of this function is tolerant towards correctly forking it.
+  DEBUG_ASSERT(p);
+  // It's possible that we are going to fork the page, and the user isn't actually going to directly
+  // use `p`, but creating the fork still uses `p` so we want to consider it accessed.
+  AssertHeld(page_owner->lock_);
+  page_owner->UpdateOnAccessLocked(p, owner_offset);
+
+  if ((pf_flags & VMM_PF_FLAG_WRITE) == 0) {
+    // If we're read-only faulting, return the page so they can map or read from it directly.
+    if (page_out) {
+      *page_out = p;
+    }
+    if (pa_out) {
+      *pa_out = p->paddr();
+    }
+    LTRACEF("read only faulting in page %p, pa %#" PRIxPTR " from parent\n", p, p->paddr());
+    return ZX_OK;
+  }
+
+  vm_page_t* res_page;
+  if (!page_owner->is_hidden() || p == vm_get_zero_page()) {
+    // If the vmo isn't hidden, we can't move the page. If the page is the zero
+    // page, there's no need to try to move the page. In either case, we need to
+    // allocate a writable page for this vmo.
+    if (!AllocateCopyPage(pmm_alloc_flags_, p->paddr(), free_list, &res_page)) {
+      return ZX_ERR_NO_MEMORY;
+    }
+    VmPageOrMarker insert = VmPageOrMarker::Page(res_page);
+    zx_status_t status = AddPageLocked(&insert, offset);
+    if (status != ZX_OK) {
+      // AddPageLocked failing for any other reason is a programming error.
+      DEBUG_ASSERT_MSG(status == ZX_ERR_NO_MEMORY, "status=%d\n", status);
+      pmm_free_page(insert.ReleasePage());
+      return status;
+    }
+    // Interpret a software fault as an explicit desire to have potential zero pages and don't
+    // consider them for cleaning, this is an optimization.
+    // We explicitly must *not* place pages from a page_source_ into the zero scanning queue.
+    if (p == vm_get_zero_page() && !page_source_ && !(pf_flags & VMM_PF_FLAG_SW_FAULT)) {
+      DEBUG_ASSERT(paged_ref_);
+      pmm_page_queues()->MoveToUnswappableZeroFork(res_page, paged_ref_, offset);
+    }
+
+    // This is the only path where we can allocate a new page without being a clone (clones are
+    // always cached). So we check here if we are not fully cached and if so perform a
+    // clean/invalidate to flush our zeroes. After doing this we will not touch the page via the
+    // physmap and so we can pretend there isn't an aliased mapping.
+    DEBUG_ASSERT(paged_ref_);
+    AssertHeld(paged_ref_->lock_ref());
+    if (paged_ref_->GetMappingCachePolicyLocked() != ARCH_MMU_FLAG_CACHED) {
+      arch_clean_invalidate_cache_range((vaddr_t)paddr_to_physmap(res_page->paddr()), PAGE_SIZE);
+    }
+  } else {
+    // We need a writable page; let ::CloneCowPageLocked handle inserting one.
+    res_page = CloneCowPageLocked(offset, free_list, page_owner, p, owner_offset);
+    if (res_page == nullptr) {
+      return ZX_ERR_NO_MEMORY;
+    }
+  }
+
+  LTRACEF("faulted in page %p, pa %#" PRIxPTR "\n", res_page, res_page->paddr());
+
+  if (page_out) {
+    *page_out = res_page;
+  }
+  if (pa_out) {
+    *pa_out = res_page->paddr();
+  }
+
+  // If we made it here, we committed a new page in this VMO.
+  // TODO: have a better page lookup API between VmObjectPaged and VmCowPages to relocate this
+  // logic.
+  DEBUG_ASSERT(paged_ref_);
+  AssertHeld(paged_ref_->lock_ref());
+  paged_ref_->IncrementHierarchyGenerationCountLocked();
+
+  return ZX_OK;
+}
+
+zx_status_t VmCowPages::CommitRange(uint64_t offset, uint64_t len, bool pin, Guard<Mutex>&& adopt) {
+  canary_.Assert();
+  LTRACEF("offset %#" PRIx64 ", len %#" PRIx64 "\n", offset, len);
+
+  DEBUG_ASSERT(adopt.wraps_lock(lock_.lock()));
+  Guard<Mutex> guard{AdoptLock, ktl::move(adopt)};
+  // Convince the static analysis that we now do actually hold lock_.
+  AssertHeld(lock_);
+
+  // If a pin is requested the entire range must exist and be valid,
+  // otherwise we can commit a partial range.
+  uint64_t new_len = len;
+  if (pin) {
+    // If pinning we explicitly forbid zero length pins as we cannot guarantee consistent semantics.
+    // For example pinning a zero length range outside the range of the VMO is an error, and so
+    // pinning a zero length range inside the vmo and then resizing the VMO smaller than the pin
+    // region should also be an error. To enforce this without having to have new metadata to track
+    // zero length pin regions is to just forbid them. Note that the user entry points for pinning
+    // already forbid zero length ranges.
+    if (len == 0) {
+      return ZX_ERR_INVALID_ARGS;
+    }
+    // verify that the range is within the object
+    if (unlikely(!InRange(offset, len, size_))) {
+      return ZX_ERR_OUT_OF_RANGE;
+    }
+  } else {
+    if (!TrimRange(offset, len, size_, &new_len)) {
+      return ZX_ERR_OUT_OF_RANGE;
+    }
+    // was in range, just zero length
+    if (new_len == 0) {
+      return ZX_OK;
+    }
+  }
+
+  if (is_slice()) {
+    uint64_t parent_offset;
+    VmCowPages* parent = PagedParentOfSliceLocked(&parent_offset);
+    AssertHeld(parent->lock_);
+
+    // PagedParentOfSliceLocked will walk all of the way up the VMO hierarchy
+    // until it hits a non-slice VMO.  This guarantees that we should only ever
+    // recurse once instead of an unbound number of times.  DEBUG_ASSERT this so
+    // that we don't actually end up with unbound recursion just in case the
+    // property changes.
+    DEBUG_ASSERT(!parent->is_slice());
+
+    return parent->CommitRange(offset + parent_offset, new_len, pin, guard.take());
+  }
+
+  // compute a page aligned end to do our searches in to make sure we cover all the pages
+  uint64_t end = ROUNDUP_PAGE_SIZE(offset + new_len);
+  DEBUG_ASSERT(end > offset);
+  offset = ROUNDDOWN(offset, PAGE_SIZE);
+
+  fbl::RefPtr<PageSource> root_source = GetRootPageSourceLocked();
+
+  // If this vmo has a direct page source, then the source will provide the backing memory. For
+  // children that eventually depend on a page source, we skip preallocating memory to avoid
+  // potentially overallocating pages if something else touches the vmo while we're blocked on the
+  // request. Otherwise we optimize things by preallocating all the pages.
+  list_node page_list;
+  list_initialize(&page_list);
+  if (root_source == nullptr) {
+    // make a pass through the list to find out how many pages we need to allocate
+    size_t count = (end - offset) / PAGE_SIZE;
+    page_list_.ForEveryPageInRange(
+        [&count](const auto* p, auto off) {
+          if (p->IsPage()) {
+            count--;
+          }
+          return ZX_ERR_NEXT;
+        },
+        offset, end);
+
+    if (count == 0 && !pin) {
+      return ZX_OK;
+    }
+
+    zx_status_t status = pmm_alloc_pages(count, pmm_alloc_flags_, &page_list);
+    if (status != ZX_OK) {
+      return status;
+    }
+  }
+
+  auto list_cleanup = fbl::MakeAutoCall([&page_list]() {
+    if (!list_is_empty(&page_list)) {
+      pmm_free(&page_list);
+    }
+  });
+
+  // Should any errors occur we need to unpin everything.
+  auto pin_cleanup = fbl::MakeAutoCall([this, original_offset = offset, &offset, pin]() {
+    // Regardless of any resizes or other things that may have happened any pinned pages *must*
+    // still be within a valid range, and so we know Unpin should succeed. The edge case is if we
+    // had failed to pin *any* pages and so our original offset may be outside the current range of
+    // the vmo. Additionally, as pinning a zero length range is invalid, so is unpinning, and so we
+    // must avoid.
+    if (pin && offset > original_offset) {
+      AssertHeld(*lock());
+      UnpinLocked(original_offset, offset - original_offset);
+    }
+  });
+
+  bool retry = false;
+  PageRequest page_request(true);
+  do {
+    if (retry) {
+      // If there was a page request that couldn't be fulfilled, we need wait on the
+      // request and retry the commit. Note that when we retry the loop, offset is
+      // updated past the portion of the vmo that we successfully committed.
+      zx_status_t status = ZX_OK;
+      guard.CallUnlocked([&page_request, &status]() mutable { status = page_request.Wait(); });
+      if (status != ZX_OK) {
+        if (status == ZX_ERR_TIMED_OUT) {
+          DumpLocked(0, false);
+        }
+        return status;
+      }
+      retry = false;
+
+      // Re-run the range checks, since size_ could have changed while we were blocked. This
+      // is not a failure, since the arguments were valid when the syscall was made. It's as
+      // if the commit was successful but then the pages were thrown away. Unless we are pinning,
+      // in which case pages being thrown away is explicitly an error.
+      new_len = len;
+      if (pin) {
+        // verify that the range is within the object
+        if (unlikely(!InRange(offset, len, size_))) {
+          return ZX_ERR_OUT_OF_RANGE;
+        }
+      } else {
+        if (!TrimRange(offset, len, size_, &new_len)) {
+          pin_cleanup.cancel();
+          return ZX_OK;
+        }
+        if (new_len == 0) {
+          pin_cleanup.cancel();
+          return ZX_OK;
+        }
+      }
+
+      end = ROUNDUP_PAGE_SIZE(offset + new_len);
+      DEBUG_ASSERT(end > offset);
+    }
+
+    // Remember what our offset was prior to attempting to commit.
+    const uint64_t prev_offset = offset;
+
+    // cur_offset tracks how far we've made page requests, even if they're not done.
+    uint64_t cur_offset = offset;
+    while (cur_offset < end) {
+      // Don't commit if we already have this page
+      VmPageOrMarker* p = page_list_.Lookup(cur_offset);
+      vm_page_t* page = nullptr;
+      if (!p || !p->IsPage()) {
+        // Check if our parent has the page
+        const uint flags = VMM_PF_FLAG_SW_FAULT | VMM_PF_FLAG_WRITE;
+        zx_status_t res =
+            GetPageLocked(cur_offset, flags, &page_list, &page_request, &page, nullptr);
+        if (res == ZX_ERR_NEXT || res == ZX_ERR_SHOULD_WAIT) {
+          // In either case we'll need to wait on the request and retry, but if we get
+          // ZX_ERR_NEXT we keep faulting until we eventually see ZX_ERR_SHOULD_WAIT.
+          retry = true;
+          if (res == ZX_ERR_SHOULD_WAIT) {
+            break;
+          }
+        } else if (res != ZX_OK) {
+          return res;
+        }
+      } else {
+        page = p->Page();
+      }
+
+      if (!retry) {
+        // As long as we're not in the retry state cur_offset and offset should track.
+        DEBUG_ASSERT(offset == cur_offset);
+        // Pin the page if needed and then formally commit by increasing our working offset.
+        if (pin) {
+          DEBUG_ASSERT(page->state() == VM_PAGE_STATE_OBJECT);
+          if (page->object.pin_count == VM_PAGE_OBJECT_MAX_PIN_COUNT) {
+            return ZX_ERR_UNAVAILABLE;
+          }
+
+          page->object.pin_count++;
+          if (page->object.pin_count == 1) {
+            pmm_page_queues()->MoveToWired(page);
+          }
+          // Pinning every page in the largest vmo possible as many times as possible can't overflow
+          static_assert(VmPageList::MAX_SIZE / PAGE_SIZE <
+                        UINT64_MAX / VM_PAGE_OBJECT_MAX_PIN_COUNT);
+          pinned_page_count_++;
+        }
+        offset += PAGE_SIZE;
+        len -= PAGE_SIZE;
+      }
+      cur_offset += PAGE_SIZE;
+    }
+
+    // Unmap all of the pages in the range we touched. This may end up unmapping non-present
+    // ranges or unmapping things multiple times, but it's necessary to ensure that we unmap
+    // everything that actually is present before anything else sees it.
+    if (cur_offset - prev_offset) {
+      RangeChangeUpdateLocked(offset, cur_offset - prev_offset, RangeChangeOp::Unmap);
+    }
+
+    if (retry && cur_offset == end) {
+      zx_status_t res = root_source->FinalizeRequest(&page_request);
+      if (res != ZX_ERR_SHOULD_WAIT) {
+        return res;
+      }
+    }
+  } while (retry);
+
+  pin_cleanup.cancel();
+  return ZX_OK;
+}
+
+zx_status_t VmCowPages::DecommitRangeLocked(uint64_t offset, uint64_t len) {
+  canary_.Assert();
+
+  // Trim the size and perform our zero-length hot-path check before we recurse
+  // up to our top-level ancestor.  Size bounding needs to take place relative
+  // to the child the operation was originally targeted against.
+  uint64_t new_len;
+  if (!TrimRange(offset, len, size_, &new_len)) {
+    return ZX_ERR_OUT_OF_RANGE;
+  }
+
+  // was in range, just zero length
+  if (new_len == 0) {
+    return ZX_OK;
+  }
+
+  // If this is a child slice of a VMO, then find our way up to our root
+  // ancestor (taking our offset into account as we do), and then recurse,
+  // running the operation against our ancestor.  Note that
+  // PagedParentOfSliceLocked will iteratively walk all the way up to our
+  // non-slice ancestor, not just our immediate parent, so we can guaranteed
+  // bounded recursion.
+  if (is_slice()) {
+    uint64_t parent_offset;
+    VmCowPages* parent = PagedParentOfSliceLocked(&parent_offset);
+    AssertHeld(parent->lock_);
+    DEBUG_ASSERT(!parent->is_slice());  // assert bounded recursion.
+    return parent->DecommitRangeLocked(offset + parent_offset, new_len);
+  }
+
+  if (parent_ || GetRootPageSourceLocked()) {
+    return ZX_ERR_NOT_SUPPORTED;
+  }
+
+  // Demand offset and length be correctly aligned to not give surprising user semantics.
+  if (!IS_PAGE_ALIGNED(offset) || !IS_PAGE_ALIGNED(len)) {
+    return ZX_ERR_INVALID_ARGS;
+  }
+
+  LTRACEF("start offset %#" PRIx64 ", end %#" PRIx64 "\n", offset, offset + new_len);
+
+  // TODO(teisenbe): Allow decommitting of pages pinned by
+  // CommitRangeContiguous
+
+  if (AnyPagesPinnedLocked(offset, new_len)) {
+    return ZX_ERR_BAD_STATE;
+  }
+
+  // unmap all of the pages in this range on all the mapping regions
+  RangeChangeUpdateLocked(offset, new_len, RangeChangeOp::Unmap);
+
+  list_node_t free_list;
+  list_initialize(&free_list);
+
+  BatchPQRemove page_remover(&free_list);
+
+  page_list_.RemovePages(page_remover.RemovePagesCallback(), offset, offset + new_len);
+  page_remover.Flush();
+  pmm_free(&free_list);
+
+  return ZX_OK;
+}
+
+bool VmCowPages::PageWouldReadZeroLocked(uint64_t page_offset) {
+  DEBUG_ASSERT(IS_PAGE_ALIGNED(page_offset));
+  DEBUG_ASSERT(page_offset < size_);
+  VmPageOrMarker* slot = page_list_.Lookup(page_offset);
+  if (slot && slot->IsMarker()) {
+    // This is already considered zero as there's a marker.
+    return true;
+  }
+  // If we don't have a committed page we need to check our parent.
+  if (!slot || !slot->IsPage()) {
+    VmCowPages* page_owner;
+    uint64_t owner_offset;
+    if (!FindInitialPageContentLocked(page_offset, &page_owner, &owner_offset)) {
+      // Parent doesn't have a page either, so would also read as zero, assuming no page source.
+      return GetRootPageSourceLocked() == nullptr;
+    }
+  }
+  // Content either locally or in our parent, assume it is non-zero and return false.
+  return false;
+}
+
+zx_status_t VmCowPages::ZeroPagesLocked(uint64_t page_start_base, uint64_t page_end_base) {
+  canary_.Assert();
+
+  DEBUG_ASSERT(page_start_base <= page_end_base);
+  DEBUG_ASSERT(page_end_base <= size_);
+  DEBUG_ASSERT(IS_PAGE_ALIGNED(page_start_base));
+  DEBUG_ASSERT(IS_PAGE_ALIGNED(page_end_base));
+
+  // Forward any operations on slices up to the original non slice parent.
+  if (is_slice()) {
+    uint64_t parent_offset;
+    VmCowPages* parent = PagedParentOfSliceLocked(&parent_offset);
+    AssertHeld(parent->lock_);
+    return parent->ZeroPagesLocked(page_start_base + parent_offset, page_end_base + parent_offset);
+  }
+
+  // First try and do the more efficient decommit. We prefer/ decommit as it performs work in the
+  // order of the number of committed pages, instead of work in the order of size of the range. An
+  // error from DecommitRangeLocked indicates that the VMO is not of a form that decommit can safely
+  // be performed without exposing data that we shouldn't between children and parents, but no
+  // actual state will have been changed. Should decommit succeed we are done, otherwise we will
+  // have to handle each offset individually.
+  zx_status_t status = DecommitRangeLocked(page_start_base, page_end_base - page_start_base);
+  if (status == ZX_OK) {
+    return ZX_OK;
+  }
+
+  // Unmap any page that is touched by this range in any of our, or our childrens, mapping regions.
+  // We do this on the assumption we are going to be able to free pages either completely or by
+  // turning them into markers and it's more efficient to unmap once in bulk here.
+  RangeChangeUpdateLocked(page_start_base, page_end_base - page_start_base, RangeChangeOp::Unmap);
+
+  list_node_t free_list;
+  list_initialize(&free_list);
+
+  auto auto_free = fbl::MakeAutoCall([&free_list]() {
+    if (!list_is_empty(&free_list)) {
+      pmm_free(&free_list);
+    }
+  });
+
+  // Give us easier names for our range.
+  uint64_t start = page_start_base;
+  uint64_t end = page_end_base;
+
+  // If we're zeroing at the end of our parent range we can update to reflect this similar to a
+  // resize. This does not work if we are a slice, but we checked for that earlier. Whilst this does
+  // not actually zero the range in question, it makes future zeroing of the range far more
+  // efficient, which is why we do it first.
+  // parent_limit_ is a page aligned offset and so we can only reduce it to a rounded up value of
+  // start.
+  uint64_t rounded_start = ROUNDUP_PAGE_SIZE(start);
+  if (rounded_start < parent_limit_ && end >= parent_limit_) {
+    if (parent_ && parent_->is_hidden()) {
+      // Release any COW pages that are no longer necessary. This will also
+      // update the parent limit.
+      BatchPQRemove page_remover(&free_list);
+      ReleaseCowParentPagesLocked(rounded_start, parent_limit_, &page_remover);
+      page_remover.Flush();
+    } else {
+      parent_limit_ = rounded_start;
+    }
+  }
+
+  for (uint64_t offset = start; offset < end; offset += PAGE_SIZE) {
+    VmPageOrMarker* slot = page_list_.Lookup(offset);
+
+    const bool can_see_parent = parent_ && offset < parent_limit_;
+
+    // This is a lambda as it only makes sense to talk about parent mutability when we have a parent
+    // for this offset.
+    auto parent_immutable = [can_see_parent, this]() TA_REQ(lock_) {
+      DEBUG_ASSERT(can_see_parent);
+      return parent_->is_hidden();
+    };
+
+    // Finding the initial page content is expensive, but we only need to call it
+    // under certain circumstances scattered in the code below. The lambda
+    // get_initial_page_content() will lazily fetch and cache the details. This
+    // avoids us calling it when we don't need to, or calling it more than once.
+    struct InitialPageContent {
+      bool inited = false;
+      VmCowPages* page_owner;
+      uint64_t owner_offset;
+      vm_page_t* page;
+    } initial_content_;
+    auto get_initial_page_content = [&initial_content_, can_see_parent, this, offset]()
+                                        TA_REQ(lock_) -> const InitialPageContent& {
+      if (!initial_content_.inited) {
+        DEBUG_ASSERT(can_see_parent);
+        VmPageOrMarker* page_or_marker = FindInitialPageContentLocked(
+            offset, &initial_content_.page_owner, &initial_content_.owner_offset);
+        // We only care about the parent having a 'true' vm_page for content. If the parent has a
+        // marker then it's as if the parent has no content since that's a zero page anyway, which
+        // is what we are trying to achieve.
+        initial_content_.page =
+            page_or_marker && page_or_marker->IsPage() ? page_or_marker->Page() : nullptr;
+        initial_content_.inited = true;
+      }
+      return initial_content_;
+    };
+
+    auto parent_has_content = [get_initial_page_content]() TA_REQ(lock_) {
+      return get_initial_page_content().page != nullptr;
+    };
+
+    // Ideally we just collect up pages and hand them over to the pmm all at the end, but if we need
+    // to allocate any pages then we would like to ensure that we do not cause total memory to peak
+    // higher due to squirreling these pages away.
+    auto free_any_pages = [&free_list] {
+      if (!list_is_empty(&free_list)) {
+        pmm_free(&free_list);
+      }
+    };
+
+    // If there's already a marker then we can avoid any second guessing and leave the marker alone.
+    if (slot && slot->IsMarker()) {
+      continue;
+    }
+
+    // In the ideal case we can zero by making there be an Empty slot in our page list, so first
+    // see if we can do that. This is true when there is nothing pinned and either:
+    //  * This offset does not relate to our parent
+    //  * This offset does relate to our parent, but our parent is immutable and is currently zero
+    //    at this offset.
+    if (!SlotHasPinnedPage(slot) &&
+        (!can_see_parent || (parent_immutable() && !parent_has_content()))) {
+      if (slot && slot->IsPage()) {
+        vm_page_t* page = page_list_.RemovePage(offset).ReleasePage();
+        pmm_page_queues()->Remove(page);
+        DEBUG_ASSERT(!list_in_list(&page->queue_node));
+        list_add_tail(&free_list, &page->queue_node);
+      }
+      continue;
+    }
+    // The only time we would reach either and *not* have a parent is if the page is pinned
+    DEBUG_ASSERT(SlotHasPinnedPage(slot) || parent_);
+
+    // Now we know that we need to do something active to make this zero, either through a marker or
+    // a page. First make sure we have a slot to modify.
+    if (!slot) {
+      slot = page_list_.LookupOrAllocate(offset);
+      if (unlikely(!slot)) {
+        return ZX_ERR_NO_MEMORY;
+      }
+    }
+
+    // Ideally we will use a marker, but we can only do this if we can point to a committed page
+    // to justify the allocation of the marker (i.e. we cannot allocate infinite markers with no
+    // committed pages). A committed page in this case exists if the parent has any content.
+    if (SlotHasPinnedPage(slot) || !parent_has_content()) {
+      if (slot->IsPage()) {
+        // Zero the existing page.
+        ZeroPage(slot->Page());
+        continue;
+      }
+      // Allocate a new page, it will be zeroed in the process.
+      vm_page_t* p;
+      free_any_pages();
+      // Do not pass our free_list here as this takes a list to allocate from, where as our list is
+      // for collecting things to free.
+      bool result = AllocateCopyPage(pmm_alloc_flags_, vm_get_zero_page_paddr(), nullptr, &p);
+      if (!result) {
+        return ZX_ERR_NO_MEMORY;
+      }
+      SetNotWired(p, offset);
+      *slot = VmPageOrMarker::Page(p);
+      continue;
+    }
+    DEBUG_ASSERT(parent_ && parent_has_content());
+
+    // We are able to insert a marker, but if our page content is from a hidden owner we need to
+    // perform slightly more complex cow forking.
+    const InitialPageContent& content = get_initial_page_content();
+    if (slot->IsEmpty() && content.page_owner->is_hidden()) {
+      free_any_pages();
+      zx_status_t result = CloneCowPageAsZeroLocked(offset, &free_list, content.page_owner,
+                                                    content.page, content.owner_offset);
+      if (result != ZX_OK) {
+        return result;
+      }
+      continue;
+    }
+
+    // Remove any page that could be hanging around in the slot before we make it a marker.
+    if (slot->IsPage()) {
+      vm_page_t* page = slot->ReleasePage();
+      pmm_page_queues()->Remove(page);
+      DEBUG_ASSERT(!list_in_list(&page->queue_node));
+      list_add_tail(&free_list, &page->queue_node);
+    }
+    *slot = VmPageOrMarker::Marker();
+  }
+
+  return ZX_OK;
+}
+
+void VmCowPages::MoveToNotWired(vm_page_t* page, uint64_t offset) {
+  if (page_source_) {
+    DEBUG_ASSERT(paged_ref_);
+    pmm_page_queues()->MoveToPagerBacked(page, paged_ref_, offset);
+  } else {
+    pmm_page_queues()->MoveToUnswappable(page);
+  }
+}
+
+void VmCowPages::SetNotWired(vm_page_t* page, uint64_t offset) {
+  if (page_source_) {
+    DEBUG_ASSERT(paged_ref_);
+    pmm_page_queues()->SetPagerBacked(page, paged_ref_, offset);
+  } else {
+    pmm_page_queues()->SetUnswappable(page);
+  }
+}
+
+void VmCowPages::UnpinPage(vm_page_t* page, uint64_t offset) {
+  DEBUG_ASSERT(page->state() == VM_PAGE_STATE_OBJECT);
+  ASSERT(page->object.pin_count > 0);
+  page->object.pin_count--;
+  if (page->object.pin_count == 0) {
+    MoveToNotWired(page, offset);
+  }
+}
+
+void VmCowPages::UnpinLocked(uint64_t offset, uint64_t len) {
+  canary_.Assert();
+
+  // verify that the range is within the object
+  ASSERT(InRange(offset, len, size_));
+  // forbid zero length unpins as zero length pins return errors.
+  ASSERT(len != 0);
+
+  if (is_slice()) {
+    uint64_t parent_offset;
+    VmCowPages* parent = PagedParentOfSliceLocked(&parent_offset);
+    AssertHeld(parent->lock_);
+    return parent->UnpinLocked(offset + parent_offset, len);
+  }
+
+  const uint64_t start_page_offset = ROUNDDOWN(offset, PAGE_SIZE);
+  const uint64_t end_page_offset = ROUNDUP(offset + len, PAGE_SIZE);
+
+  zx_status_t status = page_list_.ForEveryPageAndGapInRange(
+      [this](const auto* page, uint64_t off) {
+        if (page->IsMarker()) {
+          return ZX_ERR_NOT_FOUND;
+        }
+        AssertHeld(lock_);
+        UnpinPage(page->Page(), off);
+        return ZX_ERR_NEXT;
+      },
+      [](uint64_t gap_start, uint64_t gap_end) { return ZX_ERR_NOT_FOUND; }, start_page_offset,
+      end_page_offset);
+  ASSERT_MSG(status == ZX_OK, "Tried to unpin an uncommitted page");
+
+  bool overflow = sub_overflow(
+      pinned_page_count_, (end_page_offset - start_page_offset) / PAGE_SIZE, &pinned_page_count_);
+  ASSERT(!overflow);
+
+  return;
+}
+
+bool VmCowPages::AnyPagesPinnedLocked(uint64_t offset, size_t len) {
+  canary_.Assert();
+  DEBUG_ASSERT(lock_.lock().IsHeld());
+  DEBUG_ASSERT(IS_PAGE_ALIGNED(offset));
+  DEBUG_ASSERT(IS_PAGE_ALIGNED(len));
+
+  const uint64_t start_page_offset = offset;
+  const uint64_t end_page_offset = offset + len;
+
+  if (pinned_page_count_ == 0) {
+    return false;
+  }
+
+  bool found_pinned = false;
+  page_list_.ForEveryPageInRange(
+      [&found_pinned, start_page_offset, end_page_offset](const auto* p, uint64_t off) {
+        DEBUG_ASSERT(off >= start_page_offset && off < end_page_offset);
+        if (p->IsPage() && p->Page()->object.pin_count > 0) {
+          found_pinned = true;
+          return ZX_ERR_STOP;
+        }
+        return ZX_ERR_NEXT;
+      },
+      start_page_offset, end_page_offset);
+
+  return found_pinned;
+}
+
+// Helper function which processes the region visible by both children.
+void VmCowPages::ReleaseCowParentPagesLockedHelper(uint64_t start, uint64_t end,
+                                                   bool sibling_visible,
+                                                   BatchPQRemove* page_remover) {
+  // Compute the range in the parent that cur no longer will be able to see.
+  const uint64_t parent_range_start = CheckedAdd(start, parent_offset_);
+  const uint64_t parent_range_end = CheckedAdd(end, parent_offset_);
+
+  bool skip_split_bits = true;
+  if (parent_limit_ <= end) {
+    parent_limit_ = ktl::min(start, parent_limit_);
+    if (parent_limit_ <= parent_start_limit_) {
+      // Setting both to zero is cleaner and makes some asserts easier.
+      parent_start_limit_ = 0;
+      parent_limit_ = 0;
+    }
+  } else if (start == parent_start_limit_) {
+    parent_start_limit_ = end;
+  } else if (sibling_visible) {
+    // Split bits and partial cow release are only an issue if this range is also visible to our
+    // sibling. If it's not visible then we will always be freeing all pages anyway, no need to
+    // worry about split bits. Otherwise if the vmo limits can't be updated, this function will need
+    // to use the split bits to release pages in the parent. It also means that ancestor pages in
+    // the specified range might end up being released based on their current split bits, instead of
+    // through subsequent calls to this function. Therefore parent and all ancestors need to have
+    // the partial_cow_release_ flag set to prevent fast merge issues in ::RemoveChild.
+    auto cur = this;
+    AssertHeld(cur->lock_);
+    uint64_t cur_start = start;
+    uint64_t cur_end = end;
+    while (cur->parent_ && cur_start < cur_end) {
+      auto parent = cur->parent_.get();
+      AssertHeld(parent->lock_);
+      parent->partial_cow_release_ = true;
+      cur_start = ktl::max(CheckedAdd(cur_start, cur->parent_offset_), parent->parent_start_limit_);
+      cur_end = ktl::min(CheckedAdd(cur_end, cur->parent_offset_), parent->parent_limit_);
+      cur = parent;
+    }
+    skip_split_bits = false;
+  }
+
+  // Free any pages that either aren't visible, or were already split into the other child. For
+  // pages that haven't been split into the other child, we need to ensure they're univisible.
+  AssertHeld(parent_->lock_);
+  parent_->page_list_.RemovePages(
+      [skip_split_bits, sibling_visible, page_remover,
+       left = this == &parent_->left_child_locked()](VmPageOrMarker* page_or_mark,
+                                                     uint64_t offset) {
+        if (page_or_mark->IsMarker()) {
+          // If this marker is in a range still visible to the sibling then we just leave it, no
+          // split bits or anything to be updated. If the sibling cannot see it, then we can clear
+          // it.
+          if (!sibling_visible) {
+            *page_or_mark = VmPageOrMarker::Empty();
+          }
+          return ZX_ERR_NEXT;
+        }
+        vm_page* page = page_or_mark->Page();
+        // If the sibling can still see this page then we need to keep it around, otherwise we can
+        // free it. The sibling can see the page if this range is |sibling_visible| and if the
+        // sibling hasn't already forked the page, which is recorded in the split bits.
+        if (!sibling_visible || left ? page->object.cow_right_split : page->object.cow_left_split) {
+          page = page_or_mark->ReleasePage();
+          page_remover->Push(page);
+          return ZX_ERR_NEXT;
+        }
+        if (skip_split_bits) {
+          // If we were able to update this vmo's parent limit, that made the pages
+          // uniaccessible. We clear the split bits to allow ::RemoveChild to efficiently
+          // merge vmos without having to worry about pages above parent_limit_.
+          page->object.cow_left_split = 0;
+          page->object.cow_right_split = 0;
+        } else {
+          // Otherwise set the appropriate split bit to make the page uniaccessible.
+          if (left) {
+            page->object.cow_left_split = 1;
+          } else {
+            page->object.cow_right_split = 1;
+          }
+        }
+        return ZX_ERR_NEXT;
+      },
+      parent_range_start, parent_range_end);
+}
+
+void VmCowPages::ReleaseCowParentPagesLocked(uint64_t start, uint64_t end,
+                                             BatchPQRemove* page_remover) {
+  // This function releases |this| references to any ancestor vmo's COW pages.
+  //
+  // To do so, we divide |this| parent into three (possibly 0-length) regions: the region
+  // which |this| sees but before what the sibling can see, the region where both |this|
+  // and its sibling can see, and the region |this| can see but after what the sibling can
+  // see. Processing the 2nd region only requires touching the direct parent, since the sibling
+  // can see ancestor pages in the region. However, processing the 1st and 3rd regions requires
+  // recursively releasing |this| parent's ancestor pages, since those pages are no longer
+  // visible through |this| parent.
+  //
+  // This function processes region 3 (incl. recursively processing the parent), then region 2,
+  // then region 1 (incl. recursively processing the parent). Processing is done in reverse order
+  // to ensure parent_limit_ is reduced correctly. When processing either regions of type 1 or 3 we
+  //  1. walk up the parent and find the largest common slice that all nodes in the hierarchy see
+  //     as being of the same type.
+  //  2. walk back down (using stack_ direction flags) applying the range update using that final
+  //     calculated size
+  //  3. reduce the range we are operating on to not include the section we just processed
+  //  4. repeat steps 1-3 until range is empty
+  // In the worst case it is possible for this algorithm then to be O(N^2) in the depth of the tree.
+  // More optimal algorithms probably exist, but this algorithm is sufficient for at the moment as
+  // these suboptimal scenarios do not occur in practice.
+
+  // At the top level we continuously attempt to process the range until it is empty.
+  while (end > start) {
+    // cur_start / cur_end get adjusted as cur moves up/down the parent chain.
+    uint64_t cur_start = start;
+    uint64_t cur_end = end;
+    VmCowPages* cur = this;
+
+    AssertHeld(cur->lock_);
+    // First walk up the parent chain as long as there is a visible parent that does not overlap
+    // with its sibling.
+    while (cur->parent_ && cur->parent_start_limit_ < cur_end && cur_start < cur->parent_limit_) {
+      if (cur_end > cur->parent_limit_) {
+        // Part of the range sees the parent, and part of it doesn't. As we only process ranges of
+        // a single type we first trim the range down to the portion that doesn't see the parent,
+        // then next time around the top level loop we will process the portion that does see
+        cur_start = cur->parent_limit_;
+        DEBUG_ASSERT(cur_start < cur_end);
+        break;
+      }
+      // Trim the start to the portion of the parent it can see.
+      cur_start = ktl::max(cur_start, cur->parent_start_limit_);
+      DEBUG_ASSERT(cur_start < cur_end);
+
+      // Work out what the overlap with our sibling is
+      auto parent = cur->parent_.get();
+      AssertHeld(parent->lock_);
+      bool left = cur == &parent->left_child_locked();
+      auto& other = left ? parent->right_child_locked() : parent->left_child_locked();
+      AssertHeld(other.lock_);
+
+      // Project our operating range into our parent.
+      const uint64_t our_parent_start = CheckedAdd(cur_start, cur->parent_offset_);
+      const uint64_t our_parent_end = CheckedAdd(cur_end, cur->parent_offset_);
+      // Project our siblings full range into our parent.
+      const uint64_t other_parent_start =
+          CheckedAdd(other.parent_offset_, other.parent_start_limit_);
+      const uint64_t other_parent_end = CheckedAdd(other.parent_offset_, other.parent_limit_);
+
+      if (other_parent_end >= our_parent_end && other_parent_start < our_parent_end) {
+        // At least some of the end of our range overlaps with the sibling. First move up our start
+        // to ensure our range is 100% overlapping.
+        if (other_parent_start > our_parent_start) {
+          cur_start = CheckedAdd(cur_start, other_parent_start - our_parent_start);
+          DEBUG_ASSERT(cur_start < cur_end);
+        }
+        // Free the range that overlaps with the sibling, then we are done walking up as this is the
+        // type 2 kind of region. It is safe to process this right now since we are in a terminal
+        // state and are leaving the loop, thus we know that this is the final size of the region.
+        cur->ReleaseCowParentPagesLockedHelper(cur_start, cur_end, true, page_remover);
+        break;
+      }
+      // End of our range does not see the sibling. First move up our start to ensure we are dealing
+      // with a range that is 100% no sibling, and then keep on walking up.
+      if (other_parent_end > our_parent_start && other_parent_end < our_parent_end) {
+        DEBUG_ASSERT(other_parent_end < our_parent_end);
+        cur_start = CheckedAdd(cur_start, other_parent_end - our_parent_start);
+        DEBUG_ASSERT(cur_start < cur_end);
+      }
+
+      // Record the direction so we can walk about down later.
+      parent->stack_.dir_flag = left ? StackDir::Left : StackDir::Right;
+      // Don't use our_parent_start as we may have updated cur_start
+      cur_start = CheckedAdd(cur_start, cur->parent_offset_);
+      cur_end = our_parent_end;
+      DEBUG_ASSERT(cur_start < cur_end);
+      cur = parent;
+    }
+
+    // Every parent that we walked up had no overlap with its siblings. Now that we know the size
+    // of the range that we can process we just walk back down processing.
+    while (cur != this) {
+      // Although we free pages in the parent we operate on the *child*, as that is whose limits
+      // we will actually adjust. The ReleaseCowParentPagesLockedHelper will then reach backup to
+      // the parent to actually free any pages.
+      cur = cur->stack_.dir_flag == StackDir::Left ? &cur->left_child_locked()
+                                                   : &cur->right_child_locked();
+      AssertHeld(cur->lock_);
+      DEBUG_ASSERT(cur_start >= cur->parent_offset_);
+      DEBUG_ASSERT(cur_end >= cur->parent_offset_);
+      cur_start -= cur->parent_offset_;
+      cur_end -= cur->parent_offset_;
+
+      cur->ReleaseCowParentPagesLockedHelper(cur_start, cur_end, false, page_remover);
+    }
+
+    // Update the end with the portion we managed to do. Ensuring some basic sanity of the range,
+    // most importantly that we processed a non-zero portion to ensure progress.
+    DEBUG_ASSERT(cur_start >= start);
+    DEBUG_ASSERT(cur_start < end);
+    DEBUG_ASSERT(cur_end == end);
+    end = cur_start;
+  }
+}
+
+zx_status_t VmCowPages::ResizeLocked(uint64_t s) {
+  canary_.Assert();
+
+  LTRACEF("vmcp %p, size %" PRIu64 "\n", this, s);
+
+  // make sure everything is aligned before we get started
+  DEBUG_ASSERT(IS_PAGE_ALIGNED(size_));
+  DEBUG_ASSERT(IS_PAGE_ALIGNED(s));
+  DEBUG_ASSERT(!is_slice());
+
+  list_node_t free_list;
+  list_initialize(&free_list);
+
+  BatchPQRemove page_remover(&free_list);
+
+  // see if we're shrinking or expanding the vmo
+  if (s < size_) {
+    // shrinking
+    uint64_t start = s;
+    uint64_t end = size_;
+    uint64_t len = end - start;
+
+    // bail if there are any pinned pages in the range we're trimming
+    if (AnyPagesPinnedLocked(start, len)) {
+      return ZX_ERR_BAD_STATE;
+    }
+
+    // unmap all of the pages in this range on all the mapping regions
+    RangeChangeUpdateLocked(start, len, RangeChangeOp::Unmap);
+
+    if (page_source_) {
+      // Tell the page source that any non-resident pages that are now out-of-bounds
+      // were supplied, to ensure that any reads of those pages get woken up.
+      zx_status_t status = page_list_.ForEveryPageAndGapInRange(
+          [](const auto* p, uint64_t off) { return ZX_ERR_NEXT; },
+          [&](uint64_t gap_start, uint64_t gap_end) {
+            page_source_->OnPagesSupplied(gap_start, gap_end);
+            return ZX_ERR_NEXT;
+          },
+          start, end);
+      DEBUG_ASSERT(status == ZX_OK);
+    }
+
+    if (parent_ && parent_->is_hidden()) {
+      // Release any COW pages that are no longer necessary. This will also
+      // update the parent limit.
+      ReleaseCowParentPagesLocked(start, end, &page_remover);
+      // Validate that the parent limit was correctly updated as it should never remain larger than
+      // our actual size.
+      DEBUG_ASSERT(parent_limit_ <= s);
+    } else {
+      parent_limit_ = ktl::min(parent_limit_, s);
+    }
+    // If the tail of a parent disappears, the children shouldn't be able to see that region
+    // again, even if the parent is later reenlarged. So update the child parent limits.
+    UpdateChildParentLimitsLocked(s);
+
+    page_list_.RemovePages(page_remover.RemovePagesCallback(), start, end);
+  } else if (s > size_) {
+    uint64_t temp;
+    // Check that this VMOs new size would not cause it to overflow if projected onto the root.
+    bool overflow = add_overflow(root_parent_offset_, s, &temp);
+    if (overflow) {
+      return ZX_ERR_INVALID_ARGS;
+    }
+    // expanding
+    // figure the starting and ending page offset that is affected
+    uint64_t start = size_;
+    uint64_t end = s;
+    uint64_t len = end - start;
+
+    // inform all our children or mapping that there's new bits
+    RangeChangeUpdateLocked(start, len, RangeChangeOp::Unmap);
+  }
+
+  // save bytewise size
+  size_ = s;
+
+  page_remover.Flush();
+  pmm_free(&free_list);
+
+  return ZX_OK;
+}
+
+void VmCowPages::UpdateChildParentLimitsLocked(uint64_t new_size) {
+  // Note that a child's parent_limit_ will limit that child's descendants' views into
+  // this vmo, so this method only needs to touch the direct children.
+  for (auto& child : children_list_) {
+    AssertHeld(child.lock_);
+    if (new_size < child.parent_offset_) {
+      child.parent_limit_ = 0;
+    } else {
+      child.parent_limit_ = ktl::min(child.parent_limit_, new_size - child.parent_offset_);
+    }
+  }
+}
+
+zx_status_t VmCowPages::LookupLocked(uint64_t offset, uint64_t len, vmo_lookup_fn_t lookup_fn,
+                                     void* context) {
+  canary_.Assert();
+  if (unlikely(len == 0)) {
+    return ZX_ERR_INVALID_ARGS;
+  }
+
+  // verify that the range is within the object
+  if (unlikely(!InRange(offset, len, size_))) {
+    return ZX_ERR_OUT_OF_RANGE;
+  }
+
+  const uint64_t start_page_offset = ROUNDDOWN(offset, PAGE_SIZE);
+  const uint64_t end_page_offset = ROUNDUP(offset + len, PAGE_SIZE);
+
+  zx_status_t status = page_list_.ForEveryPageAndGapInRange(
+      [lookup_fn, context, start_page_offset](const auto* p, uint64_t off) {
+        if (p->IsMarker()) {
+          return ZX_ERR_NO_MEMORY;
+        }
+        const size_t index = (off - start_page_offset) / PAGE_SIZE;
+        paddr_t pa = p->Page()->paddr();
+        zx_status_t status = lookup_fn(context, off, index, pa);
+        if (status != ZX_OK) {
+          if (unlikely(status == ZX_ERR_NEXT || status == ZX_ERR_STOP)) {
+            status = ZX_ERR_INTERNAL;
+          }
+          return status;
+        }
+        return ZX_ERR_NEXT;
+      },
+      [this, lookup_fn, context, start_page_offset](uint64_t gap_start, uint64_t gap_end) {
+        AssertHeld(this->lock_);
+        // If some page was missing from our list, run the more expensive
+        // GetPageLocked to see if our parent has it.
+        for (uint64_t off = gap_start; off < gap_end; off += PAGE_SIZE) {
+          paddr_t pa;
+          zx_status_t status = this->GetPageLocked(off, 0, nullptr, nullptr, nullptr, &pa);
+          if (status != ZX_OK) {
+            return ZX_ERR_NO_MEMORY;
+          }
+          const size_t index = (off - start_page_offset) / PAGE_SIZE;
+          status = lookup_fn(context, off, index, pa);
+          if (status != ZX_OK) {
+            if (unlikely(status == ZX_ERR_NEXT || status == ZX_ERR_STOP)) {
+              status = ZX_ERR_INTERNAL;
+            }
+            return status;
+          }
+        }
+        return ZX_ERR_NEXT;
+      },
+      start_page_offset, end_page_offset);
+  if (status != ZX_OK) {
+    return status;
+  }
+
+  return ZX_OK;
+}
+
+zx_status_t VmCowPages::TakePagesLocked(uint64_t offset, uint64_t len, VmPageSpliceList* pages) {
+  canary_.Assert();
+
+  DEBUG_ASSERT(IS_PAGE_ALIGNED(offset));
+  DEBUG_ASSERT(IS_PAGE_ALIGNED(len));
+
+  if (!InRange(offset, len, size_)) {
+    return ZX_ERR_OUT_OF_RANGE;
+  }
+
+  if (AnyPagesPinnedLocked(offset, len) || parent_ || page_source_) {
+    return ZX_ERR_BAD_STATE;
+  }
+
+  // This is only used by the userpager API, which has significant restrictions on
+  // what sorts of vmos are acceptable. If splice starts being used in more places,
+  // then this restriction might need to be lifted.
+  // TODO: Check that the region is locked once locking is implemented
+  if (children_list_len_) {
+    return ZX_ERR_BAD_STATE;
+  }
+
+  page_list_.ForEveryPageInRange(
+      [](const auto* p, uint64_t off) {
+        if (p->IsPage()) {
+          pmm_page_queues()->Remove(p->Page());
+        }
+        return ZX_ERR_NEXT;
+      },
+      offset, offset + len);
+
+  *pages = page_list_.TakePages(offset, len);
+
+  return ZX_OK;
+}
+
+zx_status_t VmCowPages::SupplyPagesLocked(uint64_t offset, uint64_t len, VmPageSpliceList* pages) {
+  canary_.Assert();
+
+  DEBUG_ASSERT(IS_PAGE_ALIGNED(offset));
+  DEBUG_ASSERT(IS_PAGE_ALIGNED(len));
+
+  ASSERT(page_source_);
+
+  if (!InRange(offset, len, size_)) {
+    return ZX_ERR_OUT_OF_RANGE;
+  }
+  uint64_t end = offset + len;
+
+  list_node free_list;
+  list_initialize(&free_list);
+
+  // [new_pages_start, new_pages_start + new_pages_len) tracks the current run of
+  // consecutive new pages added to this vmo.
+  uint64_t new_pages_start = offset;
+  uint64_t new_pages_len = 0;
+  zx_status_t status = ZX_OK;
+  while (!pages->IsDone()) {
+    VmPageOrMarker src_page = pages->Pop();
+
+    // The pager API does not allow the source VMO of supply pages to have a page source, so we can
+    // assume that any empty pages are zeroes and insert explicit markers here. We need to insert
+    // explicit markers to actually resolve the pager fault.
+    if (src_page.IsEmpty()) {
+      src_page = VmPageOrMarker::Marker();
+    }
+
+    status = AddPageLocked(&src_page, offset);
+    if (status == ZX_OK) {
+      new_pages_len += PAGE_SIZE;
+    } else {
+      if (src_page.IsPage()) {
+        vm_page_t* page = src_page.ReleasePage();
+        DEBUG_ASSERT(!list_in_list(&page->queue_node));
+        list_add_tail(&free_list, &page->queue_node);
+      }
+
+      if (likely(status == ZX_ERR_ALREADY_EXISTS)) {
+        status = ZX_OK;
+
+        // We hit the end of a run of absent pages, so notify the pager source
+        // of any new pages that were added and reset the tracking variables.
+        if (new_pages_len) {
+          page_source_->OnPagesSupplied(new_pages_start, new_pages_len);
+        }
+        new_pages_start = offset + PAGE_SIZE;
+        new_pages_len = 0;
+      } else {
+        break;
+      }
+    }
+    offset += PAGE_SIZE;
+
+    DEBUG_ASSERT(new_pages_start + new_pages_len <= end);
+  }
+  if (new_pages_len) {
+    page_source_->OnPagesSupplied(new_pages_start, new_pages_len);
+  }
+
+  if (!list_is_empty(&free_list)) {
+    pmm_free(&free_list);
+  }
+
+  return status;
+}
+
+// This is a transient operation used only to fail currently outstanding page requests. It does not
+// alter the state of the VMO, or any pages that might have already been populated within the
+// specified range.
+//
+// If certain pages in this range are populated, we must have done so via a previous SupplyPages()
+// call that succeeded. So it might be fine for clients to continue accessing them, despite the
+// larger range having failed.
+//
+// TODO(rashaeqbal): If we support a more permanent failure mode in the future, we will need to free
+// populated pages in the specified range, and possibly detach the VMO from the page source.
+zx_status_t VmCowPages::FailPageRequests(uint64_t offset, uint64_t len, zx_status_t error_status) {
+  canary_.Assert();
+
+  DEBUG_ASSERT(IS_PAGE_ALIGNED(offset));
+  DEBUG_ASSERT(IS_PAGE_ALIGNED(len));
+
+  // |error_status| must have already been validated by the PagerDispatcher.
+  DEBUG_ASSERT(PageSource::IsValidFailureCode(error_status));
+
+  Guard<Mutex> guard{&lock_};
+  ASSERT(page_source_);
+
+  if (!InRange(offset, len, size_)) {
+    return ZX_ERR_OUT_OF_RANGE;
+  }
+
+  page_source_->OnPagesFailed(offset, len, error_status);
+  return ZX_OK;
+}
+
+fbl::RefPtr<PageSource> VmCowPages::GetRootPageSourceLocked() const {
+  auto cow_pages = this;
+  AssertHeld(cow_pages->lock_);
+  while (cow_pages->parent_) {
+    cow_pages = cow_pages->parent_.get();
+    if (!cow_pages) {
+      return nullptr;
+    }
+  }
+  return cow_pages->page_source_;
+}
+
+bool VmCowPages::IsCowClonableLocked() const {
+  // Copy-on-write clones of pager vmos aren't supported as we can't
+  // efficiently make an immutable snapshot.
+  if (page_source_) {
+    return false;
+  }
+
+  // Copy-on-write clones of slices aren't supported at the moment due to the resulting VMO chains
+  // having non hidden VMOs between hidden VMOs. This case cannot be handled be CloneCowPageLocked
+  // at the moment and so we forbid the construction of such cases for the moment.
+  // Bug: 36841
+  if (is_slice()) {
+    return false;
+  }
+
+  // vmos descended from paged/physical vmos can't be eager cloned.
+  auto parent = parent_.get();
+  while (parent) {
+    if (parent->page_source_) {
+      return false;
+    }
+    AssertHeld(parent->lock_);
+    parent = parent->parent_.get();
+  }
+  return true;
+}
+
+VmCowPages* VmCowPages::PagedParentOfSliceLocked(uint64_t* offset) {
+  DEBUG_ASSERT(is_slice());
+  VmCowPages* cur = this;
+  uint64_t off = 0;
+  while (cur->is_slice()) {
+    AssertHeld(cur->lock_);
+    off += cur->parent_offset_;
+    DEBUG_ASSERT(cur->parent_);
+    cur = cur->parent_.get();
+  }
+  *offset = off;
+  return cur;
+}
+
+void VmCowPages::RangeChangeUpdateFromParentLocked(const uint64_t offset, const uint64_t len,
+                                                   RangeChangeList* list) {
+  canary_.Assert();
+
+  LTRACEF("offset %#" PRIx64 " len %#" PRIx64 " p_offset %#" PRIx64 " size_ %#" PRIx64 "\n", offset,
+          len, parent_offset_, size_);
+
+  // our parent is notifying that a range of theirs changed, see where it intersects
+  // with our offset into the parent and pass it on
+  uint64_t offset_new;
+  uint64_t len_new;
+  if (!GetIntersect(parent_offset_, size_, offset, len, &offset_new, &len_new)) {
+    return;
+  }
+
+  // if they intersect with us, then by definition the new offset must be >= parent_offset_
+  DEBUG_ASSERT(offset_new >= parent_offset_);
+
+  // subtract our offset
+  offset_new -= parent_offset_;
+
+  // verify that it's still within range of us
+  DEBUG_ASSERT(offset_new + len_new <= size_);
+
+  LTRACEF("new offset %#" PRIx64 " new len %#" PRIx64 "\n", offset_new, len_new);
+
+  // pass it on. to prevent unbounded recursion we package up our desired offset and len and add
+  // ourselves to the list. UpdateRangeLocked will then get called on it later.
+  // TODO: optimize by not passing on ranges that are completely covered by pages local to this vmo
+  range_change_offset_ = offset_new;
+  range_change_len_ = len_new;
+  list->push_front(this);
+}
+
+void VmCowPages::RangeChangeUpdateListLocked(RangeChangeList* list, RangeChangeOp op) {
+  while (!list->is_empty()) {
+    VmCowPages* object = list->pop_front();
+    AssertHeld(object->lock_);
+
+    // Check if there is an associated backlink, and if so pass the operation over.
+    if (object->paged_ref_) {
+      AssertHeld(object->paged_ref_->lock_ref());
+      object->paged_ref_->RangeChangeUpdateLocked(object->range_change_offset_,
+                                                  object->range_change_len_, op);
+    }
+
+    // inform all our children this as well, so they can inform their mappings
+    for (auto& child : object->children_list_) {
+      AssertHeld(child.lock_);
+      child.RangeChangeUpdateFromParentLocked(object->range_change_offset_,
+                                              object->range_change_len_, list);
+    }
+  }
+}
+
+void VmCowPages::RangeChangeUpdateLocked(uint64_t offset, uint64_t len, RangeChangeOp op) {
+  canary_.Assert();
+
+  RangeChangeList list;
+  this->range_change_offset_ = offset;
+  this->range_change_len_ = len;
+  list.push_front(this);
+  RangeChangeUpdateListLocked(&list, op);
+}
+
+bool VmCowPages::EvictPageLocked(vm_page_t* page, uint64_t offset) {
+  // Without a page source to bring the page back in we cannot even think about eviction.
+  if (!page_source_) {
+    return false;
+  }
+
+  // Check this page is still a part of this VMO.
+  VmPageOrMarker* page_or_marker = page_list_.Lookup(offset);
+  if (!page_or_marker || !page_or_marker->IsPage() || page_or_marker->Page() != page) {
+    return false;
+  }
+
+  // Pinned pages could be in use by DMA so we cannot safely evict them.
+  if (page->object.pin_count != 0) {
+    return false;
+  }
+
+  // Remove any mappings to this page before we remove it.
+  RangeChangeUpdateLocked(offset, PAGE_SIZE, RangeChangeOp::Unmap);
+
+  // Use RemovePage over just writing to page_or_marker so that the page list has the opportunity
+  // to release any now empty intermediate nodes.
+  vm_page_t* p = page_list_.RemovePage(offset).ReleasePage();
+  DEBUG_ASSERT(p == page);
+  pmm_page_queues()->Remove(page);
+
+  // |page| is now owned by the caller.
+  return true;
+}
+
+bool VmCowPages::DebugValidatePageSplitsLocked() const {
+  canary_.Assert();
+
+  if (!is_hidden()) {
+    // Nothing to validate on a leaf vmo.
+    return true;
+  }
+  // Assume this is valid until we prove otherwise.
+  bool valid = true;
+  page_list_.ForEveryPage([this, &valid](const VmPageOrMarker* page, uint64_t offset) {
+    if (!page->IsPage()) {
+      return ZX_ERR_NEXT;
+    }
+    vm_page_t* p = page->Page();
+    AssertHeld(this->lock_);
+    // We found a page in the hidden VMO, if it has been forked in either direction then we
+    // expect that if we search down that path we will find that the forked page and that no
+    // descendant can 'see' back to this page.
+    const VmCowPages* expected = nullptr;
+    if (p->object.cow_left_split) {
+      expected = &left_child_locked();
+    } else if (p->object.cow_right_split) {
+      expected = &right_child_locked();
+    } else {
+      return ZX_ERR_NEXT;
+    }
+
+    // We know this must be true as this is a hidden vmo and so left_child_locked and
+    // right_child_locked will never have returned null.
+    DEBUG_ASSERT(expected);
+
+    // No leaf VMO in expected should be able to 'see' this page and potentially re-fork it. To
+    // validate this we need to walk the entire sub tree.
+    const VmCowPages* cur = expected;
+    uint64_t off = offset;
+    // We start with cur being an immediate child of 'this', so we can preform subtree traversal
+    // until we end up back in 'this'.
+    while (cur != this) {
+      AssertHeld(cur->lock_);
+      // Check that we can see this page in the parent. Importantly this first checks if
+      // |off < cur->parent_offset_| allowing us to safely perform that subtraction from then on.
+      if (off < cur->parent_offset_ || off - cur->parent_offset_ < cur->parent_start_limit_ ||
+          off - cur->parent_offset_ >= cur->parent_limit_) {
+        // This blank case is used to capture the scenario where current does not see the target
+        // offset in the parent, in which case there is no point traversing into the children.
+      } else if (cur->is_hidden()) {
+        // A hidden VMO *may* have the page, but not necessarily if both children forked it out.
+        const VmPageOrMarker* l = cur->page_list_.Lookup(off - cur->parent_offset_);
+        if (!l || l->IsEmpty()) {
+          // Page not found, we need to recurse down into our children.
+          off -= cur->parent_offset_;
+          cur = &cur->left_child_locked();
+          continue;
+        }
+      } else {
+        // We already checked in the first 'if' branch that this offset was visible, and so this
+        // leaf VMO *must* have a page or marker to prevent it 'seeing' the already forked original.
+        const VmPageOrMarker* l = cur->page_list_.Lookup(off - cur->parent_offset_);
+        if (!l || l->IsEmpty()) {
+          printf("Failed to find fork of page %p (off %p) from %p in leaf node %p (off %p)\n", p,
+                 (void*)offset, this, cur, (void*)(off - cur->parent_offset_));
+          cur->DumpLocked(1, true);
+          this->DumpLocked(1, true);
+          valid = false;
+          return ZX_ERR_STOP;
+        }
+      }
+
+      // Find our next node by walking up until we see we have come from a left path, then go right.
+      do {
+        VmCowPages* next = cur->parent_.get();
+        AssertHeld(next->lock_);
+        off += next->parent_offset_;
+        if (next == this) {
+          cur = next;
+          break;
+        }
+
+        // If we came from the left, go back down on the right, otherwise just keep going up.
+        if (cur == &next->left_child_locked()) {
+          off -= next->parent_offset_;
+          cur = &next->right_child_locked();
+          break;
+        }
+        cur = next;
+      } while (1);
+    }
+    return ZX_ERR_NEXT;
+  });
+  return valid;
+}

diff --git a/zircon/kernel/vm/vm_object.cc b/zircon/kernel/vm/vm_object.cc
index 3cee5d7..fb88930 100644
--- a/zircon/kernel/vm/vm_object.cc
+++ b/zircon/kernel/vm/vm_object.cc

@@ -359,10 +359,6 @@
 
   Guard<Mutex> guard{&lock_};
 
-  if (unlikely(!InRange(start_offset, len, size()))) {
-    return ZX_ERR_OUT_OF_RANGE;
-  }
-
   const size_t end_offset = static_cast<size_t>(start_offset + len);
   size_t op_start_offset = static_cast<size_t>(start_offset);
 
@@ -413,6 +409,8 @@
           arch_sync_cache_range(cache_op_addr, cache_op_len);
           break;
       }
+    } else if (status == ZX_ERR_OUT_OF_RANGE) {
+      return status;
     }
 
     op_start_offset += cache_op_len;

diff --git a/zircon/kernel/vm/vm_object_paged.cc b/zircon/kernel/vm/vm_object_paged.cc
index 60a01a7..f16d622 100644
--- a/zircon/kernel/vm/vm_object_paged.cc
+++ b/zircon/kernel/vm/vm_object_paged.cc

@@ -28,6 +28,7 @@
 #include <vm/physmap.h>
 #include <vm/vm.h>
 #include <vm/vm_address_region.h>
+#include <vm/vm_cow_pages.h>
 
 #include "vm_priv.h"
 
@@ -35,86 +36,6 @@
 
 namespace {
 
-void ZeroPage(paddr_t pa) {
-  void* ptr = paddr_to_physmap(pa);
-  DEBUG_ASSERT(ptr);
-
-  arch_zero_page(ptr);
-}
-
-void ZeroPage(vm_page_t* p) {
-  paddr_t pa = p->paddr();
-  ZeroPage(pa);
-}
-
-bool IsZeroPage(vm_page_t* p) {
-  uint64_t* base = (uint64_t*)paddr_to_physmap(p->paddr());
-  for (int i = 0; i < PAGE_SIZE / (int)sizeof(uint64_t); i++) {
-    if (base[i] != 0)
-      return false;
-  }
-  return true;
-}
-
-void InitializeVmPage(vm_page_t* p) {
-  DEBUG_ASSERT(p->state() == VM_PAGE_STATE_ALLOC);
-  p->set_state(VM_PAGE_STATE_OBJECT);
-  p->object.pin_count = 0;
-  p->object.cow_left_split = 0;
-  p->object.cow_right_split = 0;
-}
-
-// Allocates a new page and populates it with the data at |parent_paddr|.
-bool AllocateCopyPage(uint32_t pmm_alloc_flags, paddr_t parent_paddr, list_node_t* free_list,
-                      vm_page_t** clone) {
-  paddr_t pa_clone;
-  vm_page_t* p_clone = nullptr;
-  if (free_list) {
-    p_clone = list_remove_head_type(free_list, vm_page, queue_node);
-    if (p_clone) {
-      pa_clone = p_clone->paddr();
-    }
-  }
-  if (!p_clone) {
-    zx_status_t status = pmm_alloc_page(pmm_alloc_flags, &p_clone, &pa_clone);
-    if (!p_clone) {
-      DEBUG_ASSERT(status == ZX_ERR_NO_MEMORY);
-      return false;
-    }
-    DEBUG_ASSERT(status == ZX_OK);
-  }
-
-  InitializeVmPage(p_clone);
-
-  void* dst = paddr_to_physmap(pa_clone);
-  DEBUG_ASSERT(dst);
-
-  if (parent_paddr != vm_get_zero_page_paddr()) {
-    // do a direct copy of the two pages
-    const void* src = paddr_to_physmap(parent_paddr);
-    DEBUG_ASSERT(src);
-    memcpy(dst, src, PAGE_SIZE);
-  } else {
-    // avoid pointless fetches by directly zeroing dst
-    arch_zero_page(dst);
-  }
-
-  *clone = p_clone;
-
-  return true;
-}
-
-bool SlotHasPinnedPage(VmPageOrMarker* slot) {
-  return slot && slot->IsPage() && slot->Page()->object.pin_count > 0;
-}
-
-inline uint64_t CheckedAdd(uint64_t a, uint64_t b) {
-  uint64_t result;
-  bool overflow = add_overflow(a, b, &result);
-  DEBUG_ASSERT(!overflow);
-  return result;
-}
-
 KCOUNTER(vmo_attribution_queries_all, "vm.object.attribution.queries_all")
 KCOUNTER(vmo_attribution_queries_entire_object, "vm.object.attribution.queries_entire_object")
 KCOUNTER(vmo_attribution_cache_hits, "vm.object.attribution.cache_hits")
@@ -122,69 +43,15 @@
 
 }  // namespace
 
-// Helper class for collecting pages to performed batched Removes from the page queue to not incur
-// its spinlock overhead for every single page. Pages that it removes from the page queue get placed
-// into a provided list. Note that pages are not moved into the list until *after* Flush has been
-// called and Flush must be called prior to object destruction.
-class BatchPQRemove {
- public:
-  BatchPQRemove(list_node_t* free_list) : free_list_(free_list) {}
-  ~BatchPQRemove() { DEBUG_ASSERT(count_ == 0); }
-  DISALLOW_COPY_AND_ASSIGN_ALLOW_MOVE(BatchPQRemove);
-
-  // Add a page to the batch set. Automatically calls |Flush| if the limit is reached.
-  void Push(vm_page_t* page) {
-    DEBUG_ASSERT(page);
-    pages_[count_] = page;
-    count_++;
-    if (count_ == kMaxPages) {
-      Flush();
-    }
-  }
-
-  // Performs |Remove| on any pending pages. This allows you to know that all pages are in the
-  // original list so that you can do operations on the list.
-  void Flush() {
-    if (count_ > 0) {
-      pmm_page_queues()->RemoveArrayIntoList(pages_.data(), count_, free_list_);
-      count_ = 0;
-    }
-  }
-
-  // Produces a callback suitable for passing to VmPageList::RemovePages that will |Push| any pages
-  auto RemovePagesCallback() {
-    return [this](VmPageOrMarker* p, uint64_t off) {
-      if (p->IsPage()) {
-        vm_page_t* page = p->ReleasePage();
-        Push(page);
-      }
-      *p = VmPageOrMarker::Empty();
-      return ZX_ERR_NEXT;
-    };
-  }
-
- private:
-  // The value of 64 was chosen as there is minimal performance gains originally measured by using
-  // higher values. There is an incentive on this being as small as possible due to this typically
-  // being created on the stack, and our stack space is limited.
-  static constexpr size_t kMaxPages = 64;
-
-  size_t count_ = 0;
-  ktl::array<vm_page_t*, kMaxPages> pages_;
-  list_node_t* free_list_ = nullptr;
-};
-
-VmObjectPaged::VmObjectPaged(uint32_t options, uint32_t pmm_alloc_flags, uint64_t size,
-                             fbl::RefPtr<VmHierarchyState> hierarchy_state,
-                             fbl::RefPtr<PageSource> page_source)
-    : VmObject(ktl::move(hierarchy_state)),
-      options_(options),
-      size_(size),
-      pmm_alloc_flags_(pmm_alloc_flags),
-      page_source_(ktl::move(page_source)) {
+VmObjectPaged::VmObjectPaged(uint32_t options, fbl::RefPtr<VmHierarchyState> hierarchy_state,
+                             fbl::RefPtr<VmCowPages> cow_pages)
+    : VmObject(ktl::move(hierarchy_state)), options_(options), cow_pages_(ktl::move(cow_pages)) {
   LTRACEF("%p\n", this);
 
-  DEBUG_ASSERT(IS_PAGE_ALIGNED(size_));
+  {
+    Guard<Mutex> guard{&lock_};
+    cow_pages_locked()->set_paged_backlink_locked(this);
+  }
 
   // Adding to the global list needs to be done at the end of the ctor, since
   // calls can be made into this object as soon as it is in that list.
@@ -197,9 +64,6 @@
   DEBUG_ASSERT(original_parent_user_id_ == 0);
 
   AssertHeld(parent->lock_);
-  page_list_.InitializeSkew(parent->page_list_.GetSkew(), offset);
-
-  AssertHeld(parent->lock_ref());
   original_parent_user_id_ = parent->user_id_locked();
   parent_ = ktl::move(parent);
 }
@@ -211,6 +75,18 @@
 
   RemoveFromGlobalList();
 
+  Guard<Mutex> guard{&lock_};
+
+  if (is_contiguous() && !is_slice()) {
+    // A contiguous VMO either has all its pages committed and pinned or, if creation failed, no
+    // pages committed and pinned. Check if we are in the failure case by looking up the first page.
+    if (GetPageLocked(0, 0, nullptr, nullptr, nullptr, nullptr) == ZX_OK) {
+      cow_pages_locked()->UnpinLocked(0, size_locked());
+    }
+  }
+
+  cow_pages_->set_paged_backlink_locked(nullptr);
+
   if (!is_hidden()) {
     // If we're not a hidden vmo, then we need to remove ourself from our parent. This needs
     // to be done before emptying the page list so that a hidden parent can't merge into this
@@ -218,7 +94,6 @@
     //
     // To prevent races with a hidden parent merging itself into this vmo, it is necessary
     // to hold the lock over the parent_ check and into the subsequent removal call.
-    Guard<Mutex> guard{&lock_};
     if (parent_) {
       LTRACEF("removing ourself from our parent %p\n", parent_.get());
       parent_->RemoveChild(this, guard.take());
@@ -232,7 +107,6 @@
     // Most of the hidden vmo's state should have already been cleaned up when it merged
     // itself into its child in ::RemoveChild.
     DEBUG_ASSERT(children_list_len_ == 0);
-    DEBUG_ASSERT(page_list_.HasNoPages());
     // Even though we are hidden we might have a parent. Unlike in the other branch of this if we
     // do not need to perform any deferred deletion. The reason for this is that the deferred
     // deletion mechanism is intended to resolve the scenario where there is a chain of 'one ref'
@@ -252,38 +126,19 @@
     // And so each serialized deletion breaks of a discrete two VMO chain that can be safely
     // finalized with one recursive step.
   }
-
-  list_node_t list;
-  list_initialize(&list);
-
-  BatchPQRemove page_remover(&list);
-  // free all of the pages attached to us
-  page_list_.RemoveAllPages([this, &page_remover](vm_page_t* page) {
-    page_remover.Push(page);
-    if (this->is_contiguous()) {
-      // Don't use unpin page since we already removed it from the page queue.
-      page->object.pin_count--;
-    }
-    ASSERT(page->object.pin_count == 0);
-  });
-
-  if (page_source_) {
-    page_source_->Close();
-  }
-  page_remover.Flush();
-
-  pmm_free(&list);
 }
 
 void VmObjectPaged::HarvestAccessedBits() {
+  canary_.Assert();
+
   Guard<Mutex> guard{lock()};
   // If there is no root page source, then we have nothing worth harvesting bits from.
-  if (GetRootPageSourceLocked() == nullptr) {
+  if (cow_pages_locked()->GetRootPageSourceLocked() == nullptr) {
     return;
   }
 
   fbl::Function<bool(vm_page_t*, uint64_t)> f = [this](vm_page_t* p, uint64_t offset) {
-    AssertHeld(*lock());
+    AssertHeld(lock_);
     // Skip the zero page as we are never going to evict it and initial zero pages will not be
     // returned by GetPageLocked down below.
     if (p == vm_get_zero_page()) {
@@ -293,7 +148,8 @@
     // write to prevent any committing or copy-on-write behavior. This will just cause the page to
     // be looked up, and its location in any pager_backed queues updated.
     __UNUSED vm_page_t* out;
-    __UNUSED zx_status_t result = GetPageLocked(offset, 0, nullptr, nullptr, &out, nullptr);
+    __UNUSED zx_status_t result =
+        cow_pages_locked()->GetPageLocked(offset, 0, nullptr, nullptr, &out, nullptr);
     // We are in this callback because there is a physical page mapped into the hardware page table
     // attributed to this vmo. If we cannot find it, or it isn't the page we expect, then something
     // has gone horribly wrong.
@@ -304,7 +160,7 @@
   for (auto& m : mapping_list_) {
     if (m.aspace()->is_user()) {
       AssertHeld(*m.object_lock());
-      __UNUSED zx_status_t result = m.HarvestAccessVmoRangeLocked(0, size(), f);
+      __UNUSED zx_status_t result = m.HarvestAccessVmoRangeLocked(0, size_locked(), f);
       // There's no way we should be harvesting an invalid range as that would imply that this
       // mapping is invalid.
       DEBUG_ASSERT(result == ZX_OK);
@@ -313,15 +169,9 @@
 }
 
 bool VmObjectPaged::DedupZeroPage(vm_page_t* page, uint64_t offset) {
-  Guard<Mutex> guard{&lock_};
+  canary_.Assert();
 
-  // Check this page is still a part of this VMO. object.page_offset could be complete garbage,
-  // but there's no harm in looking up a random slot as we'll then notice it's the wrong page.
-  VmPageOrMarker* page_or_marker = page_list_.Lookup(offset);
-  if (!page_or_marker || !page_or_marker->IsPage() || page_or_marker->Page() != page ||
-      page->object.pin_count > 0) {
-    return false;
-  }
+  Guard<Mutex> guard{&lock_};
 
   // Skip uncached VMOs as we cannot efficiently scan them.
   if ((cache_policy_ & ZX_CACHE_POLICY_MASK) != ZX_CACHE_POLICY_CACHED) {
@@ -337,22 +187,7 @@
     }
   }
 
-  // We expect most pages to not be zero, as such we will first do a 'racy' zero page check where
-  // we leave write permissions on the page. If the page isn't zero, which is our hope, then we
-  // haven't paid the price of modifying page tables.
-  if (!IsZeroPage(page_or_marker->Page())) {
-    return false;
-  }
-
-  RangeChangeUpdateLocked(offset, PAGE_SIZE, RangeChangeOp::RemoveWrite);
-
-  if (IsZeroPage(page_or_marker->Page())) {
-    RangeChangeUpdateLocked(offset, PAGE_SIZE, RangeChangeOp::Unmap);
-    vm_page_t* page = page_or_marker->ReleasePage();
-    pmm_page_queues()->Remove(page);
-    DEBUG_ASSERT(!list_in_list(&page->queue_node));
-    pmm_free_page(page);
-    *page_or_marker = VmPageOrMarker::Marker();
+  if (cow_pages_locked()->DedupZeroPageLocked(page, offset)) {
     eviction_event_count_++;
     IncrementHierarchyGenerationCountLocked();
     return true;
@@ -361,8 +196,8 @@
 }
 
 uint32_t VmObjectPaged::ScanForZeroPages(bool reclaim) {
-  list_node_t free_list;
-  list_initialize(&free_list);
+  canary_.Assert();
+
   Guard<Mutex> guard{lock()};
 
   // Skip uncached VMOs as we cannot efficiently scan them.
@@ -379,52 +214,15 @@
     }
     // Remove write from the mapping to ensure it's not being concurrently modified.
     AssertHeld(*m.object_lock());
-    m.RemoveWriteVmoRangeLocked(0, size());
+    m.RemoveWriteVmoRangeLocked(0, size_locked());
   }
 
-  // Check if we have any slice children. Slice children may have writable mappings to our pages,
-  // and so we need to also remove any mappings from them. Non-slice children could only have
-  // read-only mappings, which is the state we already want, and so we don't need to touch them.
-  for (auto& child : children_list_) {
-    DEBUG_ASSERT(child.is_paged());
-    VmObjectPaged& typed_child = static_cast<VmObjectPaged&>(child);
-    if (typed_child.is_slice()) {
-      // Slices are strict subsets of their parents so we don't need to bother looking at parent
-      // limits etc and can just operate on the entire range.
-      AssertHeld(typed_child.lock_);
-      typed_child.RangeChangeUpdateLocked(0, typed_child.size(), RangeChangeOp::RemoveWrite);
-    }
-  }
-
-  uint32_t count = 0;
-  page_list_.RemovePages(
-      [&count, &free_list, reclaim, this](VmPageOrMarker* p, uint64_t off) {
-        // Pinned pages cannot be decommitted so do not consider them.
-        if (p->IsPage() && p->Page()->object.pin_count == 0 && IsZeroPage(p->Page())) {
-          count++;
-          if (reclaim) {
-            // Need to remove all mappings (include read) ones to this range before we remove the
-            // page.
-            AssertHeld(this->lock_);
-            RangeChangeUpdateLocked(off, PAGE_SIZE, RangeChangeOp::Unmap);
-            vm_page_t* page = p->ReleasePage();
-            pmm_page_queues()->Remove(page);
-            DEBUG_ASSERT(!list_in_list(&page->queue_node));
-            list_add_tail(&free_list, &page->queue_node);
-            *p = VmPageOrMarker::Marker();
-          }
-        }
-        return ZX_ERR_NEXT;
-      },
-      0, VmObjectPaged::MAX_SIZE);
+  uint32_t count = cow_pages_locked()->ScanForZeroPagesLocked(reclaim);
 
   if (reclaim && count > 0) {
     IncrementHierarchyGenerationCountLocked();
   }
 
-  // Release the guard so we can free any pages.
-  guard.Release();
-  pmm_free(&free_list);
   return count;
 }
 
@@ -442,8 +240,14 @@
     return ZX_ERR_NO_MEMORY;
   }
 
+  fbl::RefPtr<VmCowPages> cow_pages;
+  status = VmCowPages::Create(state, pmm_alloc_flags, size, &cow_pages);
+  if (status != ZX_OK) {
+    return status;
+  }
+
   auto vmo = fbl::AdoptRef<VmObjectPaged>(
-      new (&ac) VmObjectPaged(options, pmm_alloc_flags, size, ktl::move(state), nullptr));
+      new (&ac) VmObjectPaged(options, ktl::move(state), ktl::move(cow_pages)));
   if (!ac.check()) {
     return ZX_ERR_NO_MEMORY;
   }
@@ -495,38 +299,22 @@
     LTRACEF("failed to allocate enough pages (asked for %zu)\n", num_pages);
     return ZX_ERR_NO_MEMORY;
   }
-  auto cleanup_phys_pages = fbl::MakeAutoCall([&page_list]() { pmm_free(&page_list); });
-
-  // add them to the appropriate range of the object
-  VmObjectPaged* vmop = static_cast<VmObjectPaged*>(vmo.get());
-  Guard<Mutex> guard{&vmop->lock_};
-  for (uint64_t off = 0; off < size; off += PAGE_SIZE) {
-    VmPageOrMarker* slot = vmop->page_list_.LookupOrAllocate(off);
-    if (!slot) {
-      return ZX_ERR_NO_MEMORY;
-    }
-    if (!slot->IsEmpty()) {
-      return ZX_ERR_ALREADY_EXISTS;
-    }
-
-    vm_page_t* p = list_remove_head_type(&page_list, vm_page_t, queue_node);
-    ASSERT(p);
-
-    InitializeVmPage(p);
-
-    // TODO: remove once pmm returns zeroed pages
-    ZeroPage(p);
-
-    // Mark the pages as pinned, so they can't be physically rearranged
-    // underneath us.
-    DEBUG_ASSERT(p->object.pin_count == 0);
-    p->object.pin_count++;
-    pmm_page_queues()->SetWired(p);
-
-    *slot = VmPageOrMarker::Page(p);
+  Guard<Mutex> guard{&vmo->lock_};
+  // add them to the appropriate range of the object, this takes ownership of all the pages
+  // regardless of outcome.
+  status = vmo->cow_pages_locked()->AddNewPagesLocked(0, &page_list);
+  if (status != ZX_OK) {
+    return status;
   }
 
-  cleanup_phys_pages.cancel();
+  // We already added the pages, so this will just cause them to be pinned.
+  status = vmo->CommitRangeInternal(0, size, true, ktl::move(guard));
+  if (status != ZX_OK) {
+    // Decommit the range so the destructor doesn't attempt to unpin.
+    vmo->DecommitRangeLocked(0, size);
+    return status;
+  }
+
   *obj = ktl::move(vmo);
   return ZX_OK;
 }
@@ -556,6 +344,8 @@
     paddr_t start_paddr = vaddr_to_paddr(data);
     ASSERT(start_paddr != 0);
 
+    Guard<Mutex> guard{&vmo->lock_};
+
     for (size_t count = 0; count < size / PAGE_SIZE; count++) {
       paddr_t pa = start_paddr + count * PAGE_SIZE;
       vm_page_t* page = paddr_to_vm_page(pa);
@@ -569,11 +359,8 @@
         panic("page used to back static vmo in unusable state: paddr %#" PRIxPTR " state %u\n", pa,
               page->state());
       }
-      InitializeVmPage(page);
-
-      // XXX hack to work around the ref pointer to the base class
-      auto vmo2 = static_cast<VmObjectPaged*>(vmo.get());
-      vmo2->AddPage(page, count * PAGE_SIZE);
+      status = vmo->cow_pages_locked()->AddNewPageLocked(count * PAGE_SIZE, page, false, false);
+      ASSERT(status == ZX_OK);
     }
 
     if (exclusive && !is_physmap_addr(data)) {
@@ -604,8 +391,14 @@
     return ZX_ERR_NO_MEMORY;
   }
 
+  fbl::RefPtr<VmCowPages> cow_pages;
+  status = VmCowPages::CreateExternal(ktl::move(src), state, size, &cow_pages);
+  if (status != ZX_OK) {
+    return status;
+  }
+
   auto vmo = fbl::AdoptRef<VmObjectPaged>(
-      new (&ac) VmObjectPaged(options, PMM_ALLOC_FLAG_ANY, size, ktl::move(state), ktl::move(src)));
+      new (&ac) VmObjectPaged(options, ktl::move(state), ktl::move(cow_pages)));
   if (!ac.check()) {
     return ZX_ERR_NO_MEMORY;
   }
@@ -618,6 +411,7 @@
 void VmObjectPaged::InsertHiddenParentLocked(fbl::RefPtr<VmObjectPaged>&& hidden_parent) {
   AssertHeld(hidden_parent->lock_);
   // Insert the new VmObject |hidden_parent| between between |this| and |parent_|.
+  cow_pages_locked()->InsertHiddenParentLocked(hidden_parent->cow_pages_);
   if (parent_) {
     AssertHeld(parent_->lock_ref());
     hidden_parent->InitializeOriginalParentLocked(parent_, 0);
@@ -635,49 +429,6 @@
   // is set after adding the hidden parent into the tree since that's not really
   // a 'real' child.
   hidden_parent->user_id_ = user_id_;
-  hidden_parent->page_attribution_user_id_ = user_id_;
-
-  // The hidden parent should have the same view as we had into
-  // its parent, and this vmo has a full view into the hidden vmo
-  hidden_parent->parent_offset_ = parent_offset_;
-  hidden_parent->parent_limit_ = parent_limit_;
-  // Although we are inserting the hidden parent between this and parent_ they share the same
-  // root_parent_offset_.
-  hidden_parent->root_parent_offset_ = root_parent_offset_;
-  parent_offset_ = 0;
-  parent_limit_ = size_;
-
-  // This method should only ever be called on leaf vmos (i.e. non-hidden),
-  // so this flag should never be set.
-  DEBUG_ASSERT(!partial_cow_release_);
-  DEBUG_ASSERT(parent_start_limit_ == 0);  // Should only ever be set for hidden vmos
-
-  // Moving our page list would be bad if we had a page source and potentially have pages with
-  // links back to this object.
-  DEBUG_ASSERT(!page_source_);
-  // Move everything into the hidden parent, for immutability
-  hidden_parent->page_list_ = ktl::move(page_list_);
-
-  // As we are moving pages between objects we need to make sure no backlinks are broken. We know
-  // there's no page_source_ and hence no pages will be in the pager_backed queue, but we could
-  // have pages in the unswappable_zero_forked queue. We do know that pages in this queue cannot
-  // have been pinned, so we can just move (or re-move potentially) any page that is not pinned
-  // into the regular unswappable queue.
-  {
-    PageQueues* pq = pmm_page_queues();
-    Guard<SpinLock, IrqSave> guard{pq->get_lock()};
-    hidden_parent->page_list_.ForEveryPage([pq](auto* p, uint64_t off) {
-      if (p->IsPage()) {
-        vm_page_t* page = p->Page();
-        if (page->object.pin_count == 0) {
-          AssertHeld<Lock<SpinLock>, IrqSave>(*pq->get_lock());
-          pq->MoveToUnswappableLocked(page);
-        }
-      }
-      return ZX_ERR_NEXT;
-    });
-  }
-  hidden_parent->size_ = size_;
 }
 
 zx_status_t VmObjectPaged::CreateChildSlice(uint64_t offset, uint64_t size, bool copy_name,
@@ -697,16 +448,9 @@
     return status;
   }
 
-  // Slice must be wholly contained.
-  uint64_t our_size;
-  {
-    // size_ is not an atomic variable and although it should not be changing, as we are not
-    // allowing this operation on resizable vmo's, we should still be holding the lock to
-    // correctly read size_. Unfortunately we must also drop then drop the lock in order to
-    // perform the allocation.
-    Guard<Mutex> guard{&lock_};
-    our_size = size_;
-  }
+  // Slice must be wholly contained. |size()| will read the size holding the lock. This is extra
+  // acquisition is correct as we must drop the lock in order to perform the allocations.
+  uint64_t our_size = this->size();
   if (!InRange(offset, size, our_size)) {
     return ZX_ERR_INVALID_ARGS;
   }
@@ -726,9 +470,15 @@
   // after the lock is released. Second, diagnostics code makes calls into vmos while holding
   // the global vmo lock. Since the VmObject ctor takes the global lock, we can't construct
   // any vmos under any vmo lock.
+  fbl::RefPtr<VmCowPages> cow_pages;
+  status = cow_pages_->CreateChildSlice(offset, size, &cow_pages);
+  if (status != ZX_OK) {
+    return status;
+  }
+
   fbl::AllocChecker ac;
   auto vmo = fbl::AdoptRef<VmObjectPaged>(
-      new (&ac) VmObjectPaged(options, pmm_alloc_flags_, size, hierarchy_state_ptr_, nullptr));
+      new (&ac) VmObjectPaged(options, hierarchy_state_ptr_, ktl::move(cow_pages)));
   if (!ac.check()) {
     return ZX_ERR_NO_MEMORY;
   }
@@ -747,18 +497,15 @@
       return ZX_ERR_BAD_STATE;
     }
     vmo->cache_policy_ = cache_policy_;
-    vmo->parent_offset_ = offset;
-    vmo->parent_limit_ = size;
-    // As our slice must be in range of the parent it is impossible to have the accumulated parent
-    // offset overflow.
-    vmo->root_parent_offset_ = CheckedAdd(offset, root_parent_offset_);
-    CheckedAdd(vmo->root_parent_offset_, size);
 
+    // Initialize the parents for both parallel hierarchies.
     vmo->InitializeOriginalParentLocked(fbl::RefPtr(this), offset);
+    vmo->cow_pages_locked()->InitializeOriginalParentLocked(cow_pages_, offset);
 
     // add the new vmo as a child before we do anything, since its
     // dtor expects to find it in its parent's child list
     notify_one_child = AddChildLocked(vmo.get());
+    cow_pages_locked()->AddChildLocked(vmo->cow_pages_.get());
 
     if (copy_name) {
       vmo->name_ = name_;
@@ -804,9 +551,15 @@
   // after the lock is released. Second, diagnostics code makes calls into vmos while holding
   // the global vmo lock. Since the VmObject ctor takes the global lock, we can't construct
   // any vmos under any vmo lock.
+  fbl::RefPtr<VmCowPages> cow_pages;
+  status = cow_pages_->CreateClone(offset, size, &cow_pages);
+  if (status != ZX_OK) {
+    return status;
+  }
+
   fbl::AllocChecker ac;
   auto vmo = fbl::AdoptRef<VmObjectPaged>(
-      new (&ac) VmObjectPaged(options, pmm_alloc_flags_, size, hierarchy_state_ptr_, nullptr));
+      new (&ac) VmObjectPaged(options, hierarchy_state_ptr_, ktl::move(cow_pages)));
   if (!ac.check()) {
     return ZX_ERR_NO_MEMORY;
   }
@@ -817,8 +570,12 @@
   if (type == CloneType::Snapshot) {
     // The initial size is 0. It will be initialized as part of the atomic
     // insertion into the child tree.
+    status = cow_pages_->CreateHidden(&cow_pages);
+    if (status != ZX_OK) {
+      return status;
+    }
     hidden_parent = fbl::AdoptRef<VmObjectPaged>(
-        new (&ac) VmObjectPaged(kHidden, pmm_alloc_flags_, 0, hierarchy_state_ptr_, nullptr));
+        new (&ac) VmObjectPaged(kHidden, hierarchy_state_ptr_, ktl::move(cow_pages)));
     if (!ac.check()) {
       return ZX_ERR_NO_MEMORY;
     }
@@ -837,21 +594,21 @@
         //
         // Hidden vmos are an implementation detail that are not exposed to userspace.
 
-        if (!IsCowClonableLocked()) {
+        if (!cow_pages_locked()->IsCowClonableLocked()) {
           return ZX_ERR_NOT_SUPPORTED;
         }
 
         // If this is non-zero, that means that there are pages which hardware can
         // touch, so the vmo can't be safely cloned.
         // TODO: consider immediately forking these pages.
-        if (pinned_page_count_) {
+        if (cow_pages_locked()->pinned_page_count_locked()) {
           return ZX_ERR_BAD_STATE;
         }
 
         break;
       }
       case CloneType::PrivatePagerCopy:
-        if (!GetRootPageSourceLocked()) {
+        if (!cow_pages_locked()->GetRootPageSourceLocked()) {
           return ZX_ERR_NOT_SUPPORTED;
         }
         break;
@@ -862,26 +619,6 @@
       return ZX_ERR_BAD_STATE;
     }
 
-    // Check that the full range of this VMO does not overflow if projected back onto the root
-    // parent. Record our accumulated offset up to this point in the process.
-    bool overflow;
-    overflow = add_overflow(offset, root_parent_offset_, &vmo->root_parent_offset_);
-    if (overflow) {
-      return ZX_ERR_INVALID_ARGS;
-    }
-    uint64_t temp;
-    overflow = add_overflow(vmo->root_parent_offset_, size, &temp);
-    if (overflow) {
-      return ZX_ERR_INVALID_ARGS;
-    }
-
-    vmo->parent_offset_ = offset;
-    if (offset > size_) {
-      vmo->parent_limit_ = 0;
-    } else {
-      vmo->parent_limit_ = ktl::min(size, size_ - offset);
-    }
-
     VmObjectPaged* clone_parent;
     if (type == CloneType::Snapshot) {
       clone_parent = hidden_parent.get();
@@ -890,17 +627,22 @@
 
       // Invalidate everything the clone will be able to see. They're COW pages now,
       // so any existing mappings can no longer directly write to the pages.
-      RangeChangeUpdateLocked(vmo->parent_offset_, vmo->parent_limit_, RangeChangeOp::RemoveWrite);
+      // This should be being done by VmCowPages, but as we are temporarily responsible for
+      // construction of the hierarchy it's easier for us to do it for the moment.
+      cow_pages_locked()->RangeChangeUpdateLocked(offset, size, RangeChangeOp::RemoveWrite);
     } else {
       clone_parent = this;
     }
     AssertHeld(clone_parent->lock_);
 
+    // Initialize the parents for both parallel hierarchies.
     vmo->InitializeOriginalParentLocked(fbl::RefPtr(clone_parent), offset);
+    vmo->cow_pages_locked()->InitializeOriginalParentLocked(clone_parent->cow_pages_, offset);
 
     // add the new vmo as a child before we do anything, since its
     // dtor expects to find it in its parent's child list
     notify_one_child = clone_parent->AddChildLocked(vmo.get());
+    clone_parent->cow_pages_locked()->AddChildLocked(vmo->cow_pages_.get());
 
     if (copy_name) {
       vmo->name_ = name_;
@@ -957,6 +699,10 @@
 
   IncrementHierarchyGenerationCountLocked();
 
+  // Remove the child in our parallel hierarchy, resulting in any necessary merging with the
+  // hidden parent to happen.
+  cow_pages_locked()->RemoveChildLocked(static_cast<VmObjectPaged*>(removed)->cow_pages_.get());
+
   if (!is_hidden()) {
     VmObject::RemoveChild(removed, guard.take());
     return;
@@ -966,7 +712,6 @@
   DEBUG_ASSERT(children_list_len_ == 2);
   // A hidden vmo must be fully initialized to have 2 children.
   DEBUG_ASSERT(user_id_ != ZX_KOID_INVALID);
-  bool removed_left = &left_child_locked() == removed;
 
   DropChildLocked(removed);
 
@@ -986,56 +731,16 @@
   VmObjectPaged* typed_child = static_cast<VmObjectPaged*>(child);
   AssertHeld(typed_child->lock_);
 
-  // Merge this vmo's content into the remaining child.
-  DEBUG_ASSERT(removed->is_paged());
-  MergeContentWithChildLocked(static_cast<VmObjectPaged*>(removed), removed_left);
+  // Merging of most information happened when we asked our cow_pages parallel node to remove itself
+  // as a child. The only item we are responsible for merging is the hierarchy_generation_count.
+  if (!parent_) {
+    typed_child->hierarchy_generation_count_ = hierarchy_generation_count_;
+  }
 
   // The child which removed itself and led to the invocation should have a reference
   // to us, in addition to child.parent_ which we are about to clear.
   DEBUG_ASSERT(ref_count_debug() >= 2);
 
-  if (typed_child->page_attribution_user_id_ != page_attribution_user_id_) {
-    // If the attribution user id of this vmo doesn't match that of its remaining child,
-    // then the vmo with the matching attribution user id  was just closed. In that case, we
-    // need to reattribute the pages of any ancestor hidden vmos to vmos that still exist.
-    //
-    // The syscall API doesn't specify how pages are to be attributed among a group of COW
-    // clones. One option is to pick a remaining vmo 'arbitrarily' and attribute everything to
-    // that vmo. However, it seems fairer to reattribute each remaining hidden vmo with
-    // its child whose user id doesn't match the vmo that was just closed. So walk up the
-    // clone chain and attribute each hidden vmo to the vmo we didn't just walk through.
-    auto cur = this;
-    AssertHeld(cur->lock_);
-    uint64_t user_id_to_skip = page_attribution_user_id_;
-    while (cur->parent_ != nullptr) {
-      DEBUG_ASSERT(cur->parent_->is_hidden());
-      auto parent = cur->parent_.get();
-      AssertHeld(parent->lock_);
-
-      if (parent->page_attribution_user_id_ == page_attribution_user_id_) {
-        uint64_t new_user_id = parent->left_child_locked().page_attribution_user_id_;
-        if (new_user_id == user_id_to_skip) {
-          new_user_id = parent->right_child_locked().page_attribution_user_id_;
-        }
-        // Although user IDs can be unset for VMOs that do not have a dispatcher, copy-on-write
-        // VMOs always have user level dispatchers, and should have a valid user-id set, hence we
-        // should never end up re-attributing a hidden parent with an unset id.
-        DEBUG_ASSERT(new_user_id != 0);
-        // The 'if' above should mean that the new_user_id isn't the ID we are trying to remove
-        // and isn't one we just used. For this to fail we either need a corrupt VMO hierarchy, or
-        // to have labeled two leaf nodes with the same user_id, which would also be incorrect as
-        // leaf nodes have unique dispatchers and hence unique ids.
-        DEBUG_ASSERT(new_user_id != page_attribution_user_id_ && new_user_id != user_id_to_skip);
-        parent->page_attribution_user_id_ = new_user_id;
-        user_id_to_skip = new_user_id;
-
-        cur = parent;
-      } else {
-        break;
-      }
-    }
-  }
-
   // Drop the child from our list, but don't recurse back into this function. Then
   // remove ourselves from the clone tree.
   DropChildLocked(typed_child);
@@ -1061,10 +766,14 @@
         descendant->OnUserChildRemoved(guard.take());
         return;
       }
-      if (descendant->left_child_locked().user_id_ == user_id_) {
-        descendant = &descendant->left_child_locked();
-      } else if (descendant->right_child_locked().user_id_ == user_id_) {
-        descendant = &descendant->right_child_locked();
+      VmObjectPaged* left = static_cast<VmObjectPaged*>(&descendant->children_list_.front());
+      VmObjectPaged* right = static_cast<VmObjectPaged*>(&descendant->children_list_.back());
+      AssertHeld(left->lock_ref());
+      AssertHeld(right->lock_ref());
+      if (left->user_id_locked() == user_id_) {
+        descendant = left;
+      } else if (right->user_id_locked() == user_id_) {
+        descendant = right;
       } else {
         descendant = nullptr;
       }
@@ -1072,211 +781,16 @@
   }
 }
 
-void VmObjectPaged::MergeContentWithChildLocked(VmObjectPaged* removed, bool removed_left) {
-  DEBUG_ASSERT(children_list_len_ == 1);
-  DEBUG_ASSERT(children_list_.front().is_paged());
-  VmObjectPaged& child = static_cast<VmObjectPaged&>(children_list_.front());
-  AssertHeld(child.lock_);
-  AssertHeld(removed->lock_);
-
-  list_node freed_pages;
-  list_initialize(&freed_pages);
-  BatchPQRemove page_remover(&freed_pages);
-
-  const uint64_t visibility_start_offset = child.parent_offset_ + child.parent_start_limit_;
-  const uint64_t merge_start_offset = child.parent_offset_;
-  const uint64_t merge_end_offset = child.parent_offset_ + child.parent_limit_;
-
-  // Hidden parents are not supposed to have page sources, but we assert it here anyway because a
-  // page source would make the way we move pages between objects incorrect, as we would break any
-  // potential back links.
-  DEBUG_ASSERT(!page_source_);
-
-  // If the hidden parent was the root of this vmo hierarchy, move the |hierarchy_generation_count_|
-  // into the remaining |child|.
-  if (!parent_) {
-    child.hierarchy_generation_count_ = hierarchy_generation_count_;
-  }
-
-  page_list_.RemovePages(page_remover.RemovePagesCallback(), 0, visibility_start_offset);
-  page_list_.RemovePages(page_remover.RemovePagesCallback(), merge_end_offset, MAX_SIZE);
-
-  if (child.parent_offset_ + child.parent_limit_ > parent_limit_) {
-    // Update the child's parent limit to ensure that it won't be able to see more
-    // of its new parent than this hidden vmo was able to see.
-    if (parent_limit_ < child.parent_offset_) {
-      child.parent_limit_ = 0;
-      child.parent_start_limit_ = 0;
-    } else {
-      child.parent_limit_ = parent_limit_ - child.parent_offset_;
-      child.parent_start_limit_ = ktl::min(child.parent_start_limit_, child.parent_limit_);
-    }
-  } else {
-    // The child will be able to see less of its new parent than this hidden vmo was
-    // able to see, so release any parent pages in that range.
-    ReleaseCowParentPagesLocked(merge_end_offset, parent_limit_, &page_remover);
-  }
-
-  if (removed->parent_offset_ + removed->parent_start_limit_ < visibility_start_offset) {
-    // If the removed former child has a smaller offset, then there are retained
-    // ancestor pages that will no longer be visible and thus should be freed.
-    ReleaseCowParentPagesLocked(removed->parent_offset_ + removed->parent_start_limit_,
-                                visibility_start_offset, &page_remover);
-  }
-
-  // Adjust the child's offset so it will still see the correct range.
-  bool overflow = add_overflow(parent_offset_, child.parent_offset_, &child.parent_offset_);
-  // Overflow here means that something went wrong when setting up parent limits.
-  DEBUG_ASSERT(!overflow);
-
-  if (child.is_hidden()) {
-    // After the merge, either |child| can't see anything in parent (in which case
-    // the parent limits could be anything), or |child|'s first visible offset will be
-    // at least as large as |this|'s first visible offset.
-    DEBUG_ASSERT(child.parent_start_limit_ == child.parent_limit_ ||
-                 parent_offset_ + parent_start_limit_ <=
-                     child.parent_offset_ + child.parent_start_limit_);
-  } else {
-    // non-hidden vmos should always have zero parent_start_limit_
-    DEBUG_ASSERT(child.parent_start_limit_ == 0);
-  }
-
-  // As we are moving pages between objects we need to make sure no backlinks are broken. We know
-  // there's no page_source_ and hence no pages will be in the pager_backed queue, but we could
-  // have pages in the unswappable_zero_forked queue. We do know that pages in this queue cannot
-  // have been pinned, so we can just move (or re-move potentially) any page that is not pinned
-  // into the unswappable queue.
-  {
-    PageQueues* pq = pmm_page_queues();
-    Guard<SpinLock, IrqSave> guard{pq->get_lock()};
-    page_list_.ForEveryPage([pq](auto* p, uint64_t off) {
-      if (p->IsPage()) {
-        vm_page_t* page = p->Page();
-        if (page->object.pin_count == 0) {
-          AssertHeld<Lock<SpinLock>, IrqSave>(*pq->get_lock());
-          pq->MoveToUnswappableLocked(page);
-        }
-      }
-      return ZX_ERR_NEXT;
-    });
-  }
-
-  // At this point, we need to merge |this|'s page list and |child|'s page list.
-  //
-  // In general, COW clones are expected to share most of their pages (i.e. to fork a relatively
-  // small number of pages). Because of this, it is preferable to do work proportional to the
-  // number of pages which were forked into |removed|. However, there are a few things that can
-  // prevent this:
-  //   - If |child|'s offset is non-zero then the offsets of all of |this|'s pages will
-  //     need to be updated when they are merged into |child|.
-  //   - If there has been a call to ReleaseCowParentPagesLocked which was not able to
-  //     update the parent limits, then there can exist pages in this vmo's page list
-  //     which are not visible to |child| but can't be easily freed based on its parent
-  //     limits. Finding these pages requires examining the split bits of all pages.
-  //   - If |child| is hidden, then there can exist pages in this vmo which were split into
-  //     |child|'s subtree and then migrated out of |child|. Those pages need to be freed, and
-  //     the simplest way to find those pages is to examine the split bits.
-  bool fast_merge = merge_start_offset == 0 && !partial_cow_release_ && !child.is_hidden();
-
-  if (fast_merge) {
-    // Only leaf vmos can be directly removed, so this must always be true. This guarantees
-    // that there are no pages that were split into |removed| that have since been migrated
-    // to its children.
-    DEBUG_ASSERT(!removed->is_hidden());
-
-    // Before merging, find any pages that are present in both |removed| and |this|. Those
-    // pages are visibile to |child| but haven't been written to through |child|, so
-    // their split bits need to be cleared. Note that ::ReleaseCowParentPagesLocked ensures
-    // that pages outside of the parent limit range won't have their split bits set.
-    removed->page_list_.ForEveryPageInRange(
-        [removed_offset = removed->parent_offset_, this](auto* page, uint64_t offset) {
-          AssertHeld(lock_);
-          if (page->IsMarker()) {
-            return ZX_ERR_NEXT;
-          }
-          VmPageOrMarker* page_or_mark = page_list_.Lookup(offset + removed_offset);
-          if (page_or_mark && page_or_mark->IsPage()) {
-            vm_page* p_page = page_or_mark->Page();
-            // The page is definitely forked into |removed|, but
-            // shouldn't be forked twice.
-            DEBUG_ASSERT(p_page->object.cow_left_split ^ p_page->object.cow_right_split);
-            p_page->object.cow_left_split = 0;
-            p_page->object.cow_right_split = 0;
-          }
-          return ZX_ERR_NEXT;
-        },
-        removed->parent_start_limit_, removed->parent_limit_);
-
-    list_node covered_pages;
-    list_initialize(&covered_pages);
-    BatchPQRemove covered_remover(&covered_pages);
-
-    // Now merge |child|'s pages into |this|, overwriting any pages present in |this|, and
-    // then move that list to |child|.
-
-    child.page_list_.MergeOnto(page_list_,
-                               [&covered_remover](vm_page_t* p) { covered_remover.Push(p); });
-    child.page_list_ = ktl::move(page_list_);
-
-    vm_page_t* p;
-    covered_remover.Flush();
-    list_for_every_entry (&covered_pages, p, vm_page_t, queue_node) {
-      // The page was already present in |child|, so it should be split at least
-      // once. And being split twice is obviously bad.
-      ASSERT(p->object.cow_left_split ^ p->object.cow_right_split);
-      ASSERT(p->object.pin_count == 0);
-    }
-    list_splice_after(&covered_pages, &freed_pages);
-  } else {
-    // Merge our page list into the child page list and update all the necessary metadata.
-    child.page_list_.MergeFrom(
-        page_list_, merge_start_offset, merge_end_offset,
-        [&page_remover](vm_page* page, uint64_t offset) { page_remover.Push(page); },
-        [&page_remover, removed_left](VmPageOrMarker* page_or_marker, uint64_t offset) {
-          DEBUG_ASSERT(page_or_marker->IsPage());
-          vm_page_t* page = page_or_marker->Page();
-          DEBUG_ASSERT(page->object.pin_count == 0);
-
-          if (removed_left ? page->object.cow_right_split : page->object.cow_left_split) {
-            // This happens when the pages was already migrated into child but then
-            // was migrated further into child's descendants. The page can be freed.
-            page = page_or_marker->ReleasePage();
-            page_remover.Push(page);
-          } else {
-            // Since we recursively fork on write, if the child doesn't have the
-            // page, then neither of its children do.
-            page->object.cow_left_split = 0;
-            page->object.cow_right_split = 0;
-          }
-        });
-  }
-
-  page_remover.Flush();
-  if (!list_is_empty(&freed_pages)) {
-    pmm_free(&freed_pages);
-  }
-}
-
 void VmObjectPaged::DumpLocked(uint depth, bool verbose) const {
   canary_.Assert();
 
   uint64_t parent_id = original_parent_user_id_;
 
-  size_t count = 0;
-  page_list_.ForEveryPage([&count](const auto* p, uint64_t) {
-    if (p->IsPage()) {
-      count++;
-    }
-    return ZX_ERR_NEXT;
-  });
-
   for (uint i = 0; i < depth; ++i) {
     printf("  ");
   }
-  printf("vmo %p/k%" PRIu64 " size %#" PRIx64 " offset %#" PRIx64 " start limit %#" PRIx64
-         " limit %#" PRIx64 " pages %zu ref %d parent %p/k%" PRIu64 "\n",
-         this, user_id_, size_, parent_offset_, parent_start_limit_, parent_limit_, count,
-         ref_count_debug(), parent_.get(), parent_id);
+  printf("vmo %p/k%" PRIu64 " ref %d parent %p/k%" PRIu64 "\n", this, user_id_, ref_count_debug(),
+         parent_.get(), parent_id);
 
   char name[ZX_MAX_NAME_LEN];
   get_name(name, sizeof(name));
@@ -1287,30 +801,7 @@
     printf("name %s\n", name);
   }
 
-  if (page_source_) {
-    for (uint i = 0; i < depth + 1; ++i) {
-      printf("  ");
-    }
-    page_source_->Dump();
-  }
-
-  if (verbose) {
-    auto f = [depth](const auto* p, uint64_t offset) {
-      for (uint i = 0; i < depth + 1; ++i) {
-        printf("  ");
-      }
-      if (p->IsMarker()) {
-        printf("offset %#" PRIx64 " zero page marker\n", offset);
-      } else {
-        vm_page_t* page = p->Page();
-        printf("offset %#" PRIx64 " page %p paddr %#" PRIxPTR "(%c%c)\n", offset, page,
-               page->paddr(), page->object.cow_left_split ? 'L' : '.',
-               page->object.cow_right_split ? 'R' : '.');
-      }
-      return ZX_ERR_NEXT;
-    };
-    page_list_.ForEveryPage(f);
-  }
+  cow_pages_locked()->DumpLocked(depth, verbose);
 }
 
 void VmObjectPaged::IncrementHierarchyGenerationCountLocked() {
@@ -1348,19 +839,13 @@
   return vmo->hierarchy_generation_count_;
 }
 
-size_t VmObjectPaged::AttributedPagesInRange(uint64_t offset, uint64_t len) const {
-  canary_.Assert();
-  Guard<Mutex> guard{&lock_};
-  return AttributedPagesInRangeLocked(offset, len);
-}
-
 size_t VmObjectPaged::AttributedPagesInRangeLocked(uint64_t offset, uint64_t len) const {
   if (is_hidden()) {
     return 0;
   }
 
   uint64_t new_len;
-  if (!TrimRange(offset, len, size_, &new_len)) {
+  if (!TrimRange(offset, len, size_locked(), &new_len)) {
     return 0;
   }
 
@@ -1370,7 +855,7 @@
   bool update_cached_attribution = false;
   // Use cached value if generation count has not changed since the last time we attributed pages.
   // Only applicable for attribution over the entire VMO, not a partial range.
-  if (offset == 0 && new_len == size_) {
+  if (offset == 0 && new_len == size_locked()) {
     vmo_attribution_queries_entire_object.Add(1);
     gen_count = GetHierarchyGenerationCountLocked();
 
@@ -1383,43 +868,7 @@
     }
   }
 
-  size_t page_count = 0;
-  // TODO: Decide who pages should actually be attribtued to.
-  page_list_.ForEveryPageAndGapInRange(
-      [&page_count](const auto* p, uint64_t off) {
-        if (p->IsPage()) {
-          page_count++;
-        }
-        return ZX_ERR_NEXT;
-      },
-      [this, &page_count](uint64_t gap_start, uint64_t gap_end) {
-        AssertHeld(lock_);
-
-        // If there's no parent, there's no pages to care about. If there is a non-hidden
-        // parent, then that owns any pages in the gap, not us.
-        if (!parent_ || !parent_->is_hidden()) {
-          return ZX_ERR_NEXT;
-        }
-
-        // Count any ancestor pages that should be attributed to us in the range. Ideally the whole
-        // range gets processed in one attempt, but in order to prevent unbounded stack growth with
-        // recursion we instead process partial ranges and recalculate the intermediate results.
-        // As a result instead of being O(n) in the number of committed pages it could
-        // pathologically become O(nd) where d is our depth in the vmo hierarchy.
-        uint64_t off = gap_start;
-        while (off < parent_limit_ && off < gap_end) {
-          uint64_t local_count = 0;
-          uint64_t attributed =
-              CountAttributedAncestorPagesLocked(off, gap_end - off, &local_count);
-          // |CountAttributedAncestorPagesLocked| guarantees that it will make progress.
-          DEBUG_ASSERT(attributed > 0);
-          off += attributed;
-          page_count += local_count;
-        }
-
-        return ZX_ERR_NEXT;
-      },
-      offset, offset + new_len);
+  size_t page_count = cow_pages_locked()->AttributedPagesInRangeLocked(offset, new_len);
 
   if (update_cached_attribution) {
     // Cache attributed page count along with current generation count.
@@ -1431,666 +880,6 @@
   return page_count;
 }
 
-uint64_t VmObjectPaged::CountAttributedAncestorPagesLocked(uint64_t offset, uint64_t size,
-                                                           uint64_t* count) const TA_REQ(lock_) {
-  // We need to walk up the ancestor chain to see if there are any pages that should be attributed
-  // to this vmo. We attempt operate on the entire range given to us but should we need to query
-  // the next parent for a range we trim our operating range. Trimming the range is necessary as
-  // we cannot recurse and otherwise have no way to remember where we were up to after processing
-  // the range in the parent. The solution then is to return all the way back up to the caller with
-  // a partial range and then effectively recompute the meta data at the point we were up to.
-
-  // Note that we cannot stop just because the page_attribution_user_id_ changes. This is because
-  // there might still be a forked page at the offset in question which should be attributed to
-  // this vmo. Whenever the attribution user id changes while walking up the ancestors, we need
-  // to determine if there is a 'closer' vmo in the sibling subtree to which the offset in
-  // question can be attributed, or if it should still be attributed to the current vmo.
-
-  DEBUG_ASSERT(offset < parent_limit_);
-  const VmObjectPaged* cur = this;
-  AssertHeld(cur->lock_);
-  uint64_t cur_offset = offset;
-  uint64_t cur_size = size;
-  // Count of how many pages we attributed as being owned by this vmo.
-  uint64_t attributed_ours = 0;
-  // Count how much we've processed. This is needed to remember when we iterate up the parent list
-  // at an offset.
-  uint64_t attributed = 0;
-  while (cur_offset < cur->parent_limit_) {
-    // For cur->parent_limit_ to be non-zero, it must have a parent.
-    DEBUG_ASSERT(cur->parent_);
-
-    const auto parent = cur->parent_.get();
-    AssertHeld(parent->lock_);
-    uint64_t parent_offset;
-    bool overflowed = add_overflow(cur->parent_offset_, cur_offset, &parent_offset);
-    DEBUG_ASSERT(!overflowed);                     // vmo creation should have failed
-    DEBUG_ASSERT(parent_offset <= parent->size_);  // parent_limit_ prevents this
-
-    const bool left = cur == &parent->left_child_locked();
-    const auto& sib = left ? parent->right_child_locked() : parent->left_child_locked();
-
-    // Work out how much of the desired size is actually visible to us in the parent, we just use
-    // this to walk the correct amount of the page_list_
-    const uint64_t parent_size = ktl::min(cur_size, cur->parent_limit_ - cur_offset);
-
-    // By default we expect to process the entire range, hence our next_size is 0. Should we need to
-    // iterate up the stack then these will be set by one of the callbacks.
-    uint64_t next_parent_offset = parent_offset + cur_size;
-    uint64_t next_size = 0;
-    parent->page_list_.ForEveryPageAndGapInRange(
-        [&parent, &cur, &attributed_ours, &sib](const auto* p, uint64_t off) {
-          AssertHeld(cur->lock_);
-          AssertHeld(sib.lock_);
-          AssertHeld(parent->lock_);
-          if (p->IsMarker()) {
-            return ZX_ERR_NEXT;
-          }
-          vm_page* page = p->Page();
-          if (
-              // Page is explicitly owned by us
-              (parent->page_attribution_user_id_ == cur->page_attribution_user_id_) ||
-              // If page has already been split and we can see it, then we know
-              // the sibling subtree can't see the page and thus it should be
-              // attributed to this vmo.
-              (page->object.cow_left_split || page->object.cow_right_split) ||
-              // If the sibling cannot access this page then its ours, otherwise we know there's
-              // a vmo in the sibling subtree which is 'closer' to this offset, and to which we will
-              // attribute the page to.
-              !(sib.parent_offset_ + sib.parent_start_limit_ <= off &&
-                off < sib.parent_offset_ + sib.parent_limit_)) {
-            attributed_ours++;
-          }
-          return ZX_ERR_NEXT;
-        },
-        [&parent, &cur, &next_parent_offset, &next_size, &sib](uint64_t gap_start,
-                                                               uint64_t gap_end) {
-          // Process a gap in the parent VMO.
-          //
-          // A gap in the parent VMO doesn't necessarily mean there are no pages
-          // in this range: our parent's ancestors may have pages, so we need to
-          // walk up the tree to find out.
-          //
-          // We don't always need to walk the tree though: in this this gap, both this VMO
-          // and our sibling VMO will share the same set of ancestor pages. However, the
-          // pages will only be accounted to one of the two VMOs.
-          //
-          // If the parent page_attribution_user_id is the same as us, we need to
-          // keep walking up the tree to perform a more accurate count.
-          //
-          // If the parent page_attribution_user_id is our sibling, however, we
-          // can just ignore the overlapping range: pages may or may not exist in
-          // the range --- but either way, they would be accounted to our sibling.
-          // Instead, we need only walk up ranges not visible to our sibling.
-          AssertHeld(cur->lock_);
-          AssertHeld(sib.lock_);
-          AssertHeld(parent->lock_);
-          uint64_t gap_size = gap_end - gap_start;
-          if (parent->page_attribution_user_id_ == cur->page_attribution_user_id_) {
-            // don't need to consider siblings as we own this range, but we do need to
-            // keep looking up the stack to find any actual pages.
-            next_parent_offset = gap_start;
-            next_size = gap_size;
-            return ZX_ERR_STOP;
-          }
-          // For this entire range we know that the offset is visible to the current vmo, and there
-          // are no committed or migrated pages. We need to check though for what portion of this
-          // range we should attribute to the sibling. Any range that we can attribute to the
-          // sibling we can skip, otherwise we have to keep looking up the stack to see if there are
-          // any pages that could be attributed to us.
-          uint64_t sib_offset, sib_len;
-          if (!GetIntersect(gap_start, gap_size, sib.parent_offset_ + sib.parent_start_limit_,
-                            sib.parent_limit_ - sib.parent_start_limit_, &sib_offset, &sib_len)) {
-            // No sibling ownership, so need to look at the whole range in the parent to find any
-            // pages.
-            next_parent_offset = gap_start;
-            next_size = gap_size;
-            return ZX_ERR_STOP;
-          }
-          // If the whole range is owned by the sibling, any pages that might be in
-          // it won't be accounted to us anyway. Skip the segment.
-          if (sib_len == gap_size) {
-            DEBUG_ASSERT(sib_offset == gap_start);
-            return ZX_ERR_NEXT;
-          }
-
-          // Otherwise, inspect the range not visible to our sibling.
-          if (sib_offset == gap_start) {
-            next_parent_offset = sib_offset + sib_len;
-            next_size = gap_end - next_parent_offset;
-          } else {
-            next_parent_offset = gap_start;
-            next_size = sib_offset - gap_start;
-          }
-          return ZX_ERR_STOP;
-        },
-        parent_offset, parent_offset + parent_size);
-    if (next_size == 0) {
-      // If next_size wasn't set then we don't need to keep looking up the chain as we successfully
-      // looked at the entire range.
-      break;
-    }
-    // Count anything up to the next starting point as being processed.
-    attributed += next_parent_offset - parent_offset;
-    // Size should have been reduced by at least the amount we just attributed
-    DEBUG_ASSERT(next_size <= cur_size &&
-                 cur_size - next_size >= next_parent_offset - parent_offset);
-
-    cur = parent;
-    cur_offset = next_parent_offset;
-    cur_size = next_size;
-  }
-  // Exiting the loop means we either ceased finding a relevant parent for the range, or we were
-  // able to process the entire range without needing to look up to a parent, in either case we
-  // can consider the entire range as attributed.
-  //
-  // The cur_size can be larger than the value of parent_size from the last loop iteration. This is
-  // fine as that range we trivially know has zero pages in it, and therefore has zero pages to
-  // determine attributions off.
-  attributed += cur_size;
-
-  *count = attributed_ours;
-  return attributed;
-}
-
-zx_status_t VmObjectPaged::AddPage(vm_page_t* p, uint64_t offset) {
-  if (p->object.pin_count) {
-    return ZX_ERR_INVALID_ARGS;
-  }
-
-  Guard<Mutex> guard{&lock_};
-
-  VmPageOrMarker page = VmPageOrMarker::Page(p);
-  zx_status_t result = AddPageLocked(&page, offset);
-  if (result != ZX_OK) {
-    // Leave ownership of `p` with the caller.
-    page.ReleasePage();
-  }
-  return result;
-}
-
-zx_status_t VmObjectPaged::AddPageLocked(VmPageOrMarker* p, uint64_t offset, bool do_range_update) {
-  canary_.Assert();
-  DEBUG_ASSERT(lock_.lock().IsHeld());
-
-  if (p->IsPage()) {
-    LTRACEF("vmo %p, offset %#" PRIx64 ", page %p (%#" PRIxPTR ")\n", this, offset, p->Page(),
-            p->Page()->paddr());
-  } else {
-    DEBUG_ASSERT(p->IsMarker());
-    LTRACEF("vmo %p, offset %#" PRIx64 ", marker\n", this, offset);
-  }
-
-  if (offset >= size_) {
-    return ZX_ERR_OUT_OF_RANGE;
-  }
-
-  VmPageOrMarker* page = page_list_.LookupOrAllocate(offset);
-  if (!page) {
-    return ZX_ERR_NO_MEMORY;
-  }
-  // Only fail on pages, we overwrite markers and empty slots.
-  if (page->IsPage()) {
-    return ZX_ERR_ALREADY_EXISTS;
-  }
-  // If this is actually a real page, we need to place it into the appropriate queue.
-  if (p->IsPage()) {
-    vm_page_t* page = p->Page();
-    DEBUG_ASSERT(page->object.pin_count == 0);
-    SetNotWired(page, offset);
-  }
-  *page = ktl::move(*p);
-
-  if (do_range_update) {
-    // other mappings may have covered this offset into the vmo, so unmap those ranges
-    RangeChangeUpdateLocked(offset, PAGE_SIZE, RangeChangeOp::Unmap);
-  }
-
-  return ZX_OK;
-}
-
-bool VmObjectPaged::IsUniAccessibleLocked(vm_page_t* page, uint64_t offset) const {
-  DEBUG_ASSERT(page_list_.Lookup(offset)->Page() == page);
-
-  if (page->object.cow_right_split || page->object.cow_left_split) {
-    return true;
-  }
-
-  if (offset < left_child_locked().parent_offset_ + left_child_locked().parent_start_limit_ ||
-      offset >= left_child_locked().parent_offset_ + left_child_locked().parent_limit_) {
-    return true;
-  }
-
-  if (offset < right_child_locked().parent_offset_ + right_child_locked().parent_start_limit_ ||
-      offset >= right_child_locked().parent_offset_ + right_child_locked().parent_limit_) {
-    return true;
-  }
-
-  return false;
-}
-
-vm_page_t* VmObjectPaged::CloneCowPageLocked(uint64_t offset, list_node_t* free_list,
-                                             VmObjectPaged* page_owner, vm_page_t* page,
-                                             uint64_t owner_offset) {
-  DEBUG_ASSERT(page != vm_get_zero_page());
-  DEBUG_ASSERT(parent_);
-
-  // To avoid the need for rollback logic on allocation failure, we start the forking
-  // process from the root-most vmo and work our way towards the leaf vmo. This allows
-  // us to maintain the hidden vmo invariants through the whole operation, so that we
-  // can stop at any point.
-  //
-  // To set this up, walk from the leaf to |page_owner|, and keep track of the
-  // path via |stack_.dir_flag|.
-  VmObjectPaged* cur = this;
-  do {
-    AssertHeld(cur->lock_);
-    VmObjectPaged* next = cur->parent_.get();
-    // We can't make COW clones of physical vmos, so this can only happen if we
-    // somehow don't find |page_owner| in the ancestor chain.
-    DEBUG_ASSERT(next);
-    AssertHeld(next->lock_);
-
-    next->stack_.dir_flag = &next->left_child_locked() == cur ? StackDir::Left : StackDir::Right;
-    if (next->stack_.dir_flag == StackDir::Right) {
-      DEBUG_ASSERT(&next->right_child_locked() == cur);
-    }
-    cur = next;
-  } while (cur != page_owner);
-  uint64_t cur_offset = owner_offset;
-
-  // |target_page| is the page we're considering for migration. Cache it
-  // across loop iterations.
-  vm_page_t* target_page = page;
-
-  bool alloc_failure = false;
-
-  // As long as we're simply migrating |page|, there's no need to update any vmo mappings, since
-  // that means the other side of the clone tree has already covered |page| and the current side
-  // of the clone tree will still see |page|. As soon as we insert a new page, we'll need to
-  // update all mappings at or below that level.
-  bool skip_range_update = true;
-  do {
-    // |target_page| is always located at in |cur| at |cur_offset| at the start of the loop.
-    VmObjectPaged* target_page_owner = cur;
-    AssertHeld(target_page_owner->lock_);
-    uint64_t target_page_offset = cur_offset;
-
-    cur = cur->stack_.dir_flag == StackDir::Left ? &cur->left_child_locked()
-                                                 : &cur->right_child_locked();
-    DEBUG_ASSERT(cur_offset >= cur->parent_offset_);
-    cur_offset -= cur->parent_offset_;
-
-    if (target_page_owner->IsUniAccessibleLocked(target_page, target_page_offset)) {
-      // If the page we're covering in the parent is uni-accessible, then we
-      // can directly move the page.
-
-      // Assert that we're not trying to split the page the same direction two times. Either
-      // some tracking state got corrupted or a page in the subtree we're trying to
-      // migrate to got improperly migrated/freed. If we did this migration, then the
-      // opposite subtree would lose access to this page.
-      DEBUG_ASSERT(!(target_page_owner->stack_.dir_flag == StackDir::Left &&
-                     target_page->object.cow_left_split));
-      DEBUG_ASSERT(!(target_page_owner->stack_.dir_flag == StackDir::Right &&
-                     target_page->object.cow_right_split));
-
-      target_page->object.cow_left_split = 0;
-      target_page->object.cow_right_split = 0;
-      VmPageOrMarker removed = target_page_owner->page_list_.RemovePage(target_page_offset);
-      vm_page* removed_page = removed.ReleasePage();
-      pmm_page_queues()->Remove(removed_page);
-      DEBUG_ASSERT(removed_page == target_page);
-    } else {
-      // Otherwise we need to fork the page.
-      vm_page_t* cover_page;
-      alloc_failure = !AllocateCopyPage(pmm_alloc_flags_, page->paddr(), free_list, &cover_page);
-      if (unlikely(alloc_failure)) {
-        // TODO: plumb through PageRequest once anonymous page source is implemented.
-        break;
-      }
-
-      // We're going to cover target_page with cover_page, so set appropriate split bit.
-      if (target_page_owner->stack_.dir_flag == StackDir::Left) {
-        target_page->object.cow_left_split = 1;
-        DEBUG_ASSERT(target_page->object.cow_right_split == 0);
-      } else {
-        target_page->object.cow_right_split = 1;
-        DEBUG_ASSERT(target_page->object.cow_left_split == 0);
-      }
-      target_page = cover_page;
-
-      skip_range_update = false;
-    }
-
-    // Skip the automatic range update so we can do it ourselves more efficiently.
-    VmPageOrMarker add_page = VmPageOrMarker::Page(target_page);
-    zx_status_t status = cur->AddPageLocked(&add_page, cur_offset, false);
-    DEBUG_ASSERT(status == ZX_OK);
-
-    if (!skip_range_update) {
-      if (cur != this) {
-        // In this case, cur is a hidden vmo and has no direct mappings. Also, its
-        // descendents along the page stack will be dealt with by subsequent iterations
-        // of this loop. That means that any mappings that need to be touched now are
-        // owned by the children on the opposite side of stack_.dir_flag.
-        DEBUG_ASSERT(cur->mapping_list_len_ == 0);
-        VmObjectPaged& other = cur->stack_.dir_flag == StackDir::Left ? cur->right_child_locked()
-                                                                      : cur->left_child_locked();
-        AssertHeld(other.lock_);
-        RangeChangeList list;
-        other.RangeChangeUpdateFromParentLocked(cur_offset, PAGE_SIZE, &list);
-        RangeChangeUpdateListLocked(&list, RangeChangeOp::Unmap);
-      } else {
-        // In this case, cur is the last vmo being changed, so update its whole subtree.
-        DEBUG_ASSERT(offset == cur_offset);
-        RangeChangeUpdateLocked(offset, PAGE_SIZE, RangeChangeOp::Unmap);
-      }
-    }
-  } while (cur != this);
-  DEBUG_ASSERT(alloc_failure || cur_offset == offset);
-
-  if (unlikely(alloc_failure)) {
-    return nullptr;
-  } else {
-    return target_page;
-  }
-}
-
-zx_status_t VmObjectPaged::CloneCowPageAsZeroLocked(uint64_t offset, list_node_t* free_list,
-                                                    VmObjectPaged* page_owner, vm_page_t* page,
-                                                    uint64_t owner_offset) {
-  DEBUG_ASSERT(parent_);
-
-  // Ensure we have a slot as we'll need it later.
-  VmPageOrMarker* slot = page_list_.LookupOrAllocate(offset);
-
-  if (!slot) {
-    return ZX_ERR_NO_MEMORY;
-  }
-
-  // We cannot be forking a page to here if there's already something.
-  DEBUG_ASSERT(slot->IsEmpty());
-
-  // Need to make sure the page is duplicated as far as our parent. Then we can pretend
-  // that we have forked it into us by setting the marker.
-  AssertHeld(parent_->lock_);
-  if (page_owner != parent_.get()) {
-    // Do not pass free_list here as this wants a free_list to allocate from, where as our free_list
-    // is for placing on old objects.
-    page = parent_->CloneCowPageLocked(offset + parent_offset_, nullptr, page_owner, page,
-                                       owner_offset);
-    if (page == nullptr) {
-      return ZX_ERR_NO_MEMORY;
-    }
-  }
-
-  bool left = this == &(parent_->left_child_locked());
-  // Page is in our parent. Check if its uni accessible, if so we can free it.
-  if (parent_->IsUniAccessibleLocked(page, offset + parent_offset_)) {
-    // Make sure we didn't already merge the page in this direction.
-    DEBUG_ASSERT(!(left && page->object.cow_left_split));
-    DEBUG_ASSERT(!(!left && page->object.cow_right_split));
-    vm_page* removed = parent_->page_list_.RemovePage(offset + parent_offset_).ReleasePage();
-    DEBUG_ASSERT(removed == page);
-    pmm_page_queues()->Remove(removed);
-    DEBUG_ASSERT(!list_in_list(&removed->queue_node));
-    list_add_tail(free_list, &removed->queue_node);
-  } else {
-    if (left) {
-      page->object.cow_left_split = 1;
-    } else {
-      page->object.cow_right_split = 1;
-    }
-  }
-  // Insert the zero marker.
-  *slot = VmPageOrMarker::Marker();
-  return ZX_OK;
-}
-
-VmPageOrMarker* VmObjectPaged::FindInitialPageContentLocked(uint64_t offset,
-                                                            VmObjectPaged** owner_out,
-                                                            uint64_t* owner_offset_out,
-                                                            uint64_t* owner_id_out) {
-  // Search up the clone chain for any committed pages. cur_offset is the offset
-  // into cur we care about. The loop terminates either when that offset contains
-  // a committed page or when that offset can't reach into the parent.
-  VmPageOrMarker* page = nullptr;
-  VmObjectPaged* cur = this;
-  AssertHeld(cur->lock_);
-  uint64_t cur_offset = offset;
-  while (cur_offset < cur->parent_limit_) {
-    VmObjectPaged* parent = cur->parent_.get();
-    // If there's no parent, then parent_limit_ is 0 and we'll never enter the loop
-    DEBUG_ASSERT(parent);
-    AssertHeld(parent->lock_ref());
-
-    uint64_t parent_offset;
-    bool overflowed = add_overflow(cur->parent_offset_, cur_offset, &parent_offset);
-    ASSERT(!overflowed);
-    if (parent_offset >= parent->size()) {
-      // The offset is off the end of the parent, so cur is the VmObjectPaged
-      // which will provide the page.
-      break;
-    }
-
-    cur = parent;
-    cur_offset = parent_offset;
-    VmPageOrMarker* p = cur->page_list_.Lookup(parent_offset);
-    if (p && !p->IsEmpty()) {
-      page = p;
-      break;
-    }
-  }
-
-  *owner_out = cur;
-  *owner_offset_out = cur_offset;
-  *owner_id_out = cur->user_id_locked();
-
-  return page;
-}
-
-void VmObjectPaged::UpdateOnAccessLocked(vm_page_t* page, uint64_t offset) {
-  // The only kinds of pages where there is anything to update on an access is pager backed pages.
-  // To that end we first want to determine, with certainty, that the provided page is in fact in
-  // the pager backed queue.
-
-  if (page == vm_get_zero_page()) {
-    return;
-  }
-  // Check if we have a page_source_. If we don't have one then none of our pages can be pager
-  // backed, so we can abort.
-  if (!page_source_) {
-    return;
-  }
-  // We know there is a page source and so most of the pages will be in the pager backed queue, with
-  // the exception of any pages that are pinned, those will be in the wired queue and so we need to
-  // skip them.
-  if (page->object.pin_count != 0) {
-    return;
-  }
-
-  // These asserts are for sanity, the above checks should have caused us to abort if these aren't
-  // true.
-  DEBUG_ASSERT(page->object.get_object() == reinterpret_cast<void*>(this));
-  DEBUG_ASSERT(page->object.get_page_offset() == offset);
-  // Although the page is already in the pager backed queue, this move causes it be moved to the
-  // front of the first queue, representing it was recently accessed.
-  pmm_page_queues()->MoveToPagerBacked(page, this, offset);
-}
-
-// Looks up the page at the requested offset, faulting it in if requested and necessary.  If
-// this VMO has a parent and the requested page isn't found, the parent will be searched.
-//
-// |free_list|, if not NULL, is a list of allocated but unused vm_page_t that
-// this function may allocate from.  This function will need at most one entry,
-// and will not fail if |free_list| is a non-empty list, faulting in was requested,
-// and offset is in range.
-zx_status_t VmObjectPaged::GetPageLocked(uint64_t offset, uint pf_flags, list_node* free_list,
-                                         PageRequest* page_request, vm_page_t** const page_out,
-                                         paddr_t* const pa_out) {
-  canary_.Assert();
-  DEBUG_ASSERT(!is_hidden());
-
-  if (offset >= size_) {
-    return ZX_ERR_OUT_OF_RANGE;
-  }
-
-  offset = ROUNDDOWN(offset, PAGE_SIZE);
-
-  if (is_slice()) {
-    uint64_t parent_offset;
-    VmObjectPaged* parent = PagedParentOfSliceLocked(&parent_offset);
-    AssertHeld(parent->lock_);
-    return parent->GetPageLocked(offset + parent_offset, pf_flags, free_list, page_request,
-                                 page_out, pa_out);
-  }
-
-  VmPageOrMarker* page_or_mark = page_list_.Lookup(offset);
-  vm_page* p = nullptr;
-  VmObjectPaged* page_owner;
-  uint64_t owner_offset;
-  uint64_t owner_id;
-  if (page_or_mark && page_or_mark->IsPage()) {
-    // This is the common case where we have the page and don't need to do anything more, so
-    // return it straight away.
-    vm_page_t* p = page_or_mark->Page();
-    UpdateOnAccessLocked(p, offset);
-    if (page_out) {
-      *page_out = p;
-    }
-    if (pa_out) {
-      *pa_out = p->paddr();
-    }
-    return ZX_OK;
-  }
-
-  // Get content from parent if available, otherwise accept we are the owner of the yet to exist
-  // page.
-  if ((!page_or_mark || page_or_mark->IsEmpty()) && parent_) {
-    page_or_mark = FindInitialPageContentLocked(offset, &page_owner, &owner_offset, &owner_id);
-  } else {
-    page_owner = this;
-    owner_offset = offset;
-    owner_id = user_id_locked();
-  }
-
-  // At this point we might not have an actual page, but we should at least have a notional owner.
-  DEBUG_ASSERT(page_owner);
-
-  __UNUSED char pf_string[5];
-  LTRACEF("vmo %p, offset %#" PRIx64 ", pf_flags %#x (%s)\n", this, offset, pf_flags,
-          vmm_pf_flags_to_string(pf_flags, pf_string));
-
-  // We need to turn this potential page or marker into a real vm_page_t. This means failing cases
-  // that we cannot handle, determining whether we can substitute the zero_page and potentially
-  // consulting a page_source.
-  if (page_or_mark && page_or_mark->IsPage()) {
-    p = page_or_mark->Page();
-  } else {
-    // If we don't have a real page and we're not sw or hw faulting in the page, return not found.
-    if ((pf_flags & VMM_PF_FLAG_FAULT_MASK) == 0) {
-      return ZX_ERR_NOT_FOUND;
-    }
-
-    // We need to get a real page as our initial content. At this point we are either starting from
-    // the zero page, or something supplied from a page source. The page source only fills in if we
-    // have a true absence of content.
-    if ((page_or_mark && page_or_mark->IsMarker()) || !page_owner->page_source_) {
-      // Either no relevant page source or this is a known marker, in which case the content is
-      // the zero page.
-      p = vm_get_zero_page();
-    } else {
-      VmoDebugInfo vmo_debug_info = {.vmo_ptr = reinterpret_cast<uintptr_t>(page_owner),
-                                     .vmo_id = owner_id};
-      zx_status_t status = page_owner->page_source_->GetPage(owner_offset, page_request,
-                                                             vmo_debug_info, &p, nullptr);
-      // Pager page sources will never synchronously return a page.
-      DEBUG_ASSERT(status != ZX_OK);
-
-      if (page_owner != this && status == ZX_ERR_NOT_FOUND) {
-        // The default behavior of clones of detached pager VMOs fault in zero
-        // pages instead of propagating the pager's fault.
-        // TODO: Add an arg to zx_vmo_create_child to optionally fault here.
-        p = vm_get_zero_page();
-      } else {
-        return status;
-      }
-    }
-  }
-
-  // If we made it this far we must have some valid vm_page in |p|. Although this may be the zero
-  // page, the rest of this function is tolerant towards correctly forking it.
-  DEBUG_ASSERT(p);
-  // It's possible that we are going to fork the page, and the user isn't actually going to directly
-  // use `p`, but creating the fork still uses `p` so we want to consider it accessed.
-  AssertHeld(page_owner->lock_);
-  page_owner->UpdateOnAccessLocked(p, owner_offset);
-
-  if ((pf_flags & VMM_PF_FLAG_WRITE) == 0) {
-    // If we're read-only faulting, return the page so they can map or read from it directly.
-    if (page_out) {
-      *page_out = p;
-    }
-    if (pa_out) {
-      *pa_out = p->paddr();
-    }
-    LTRACEF("read only faulting in page %p, pa %#" PRIxPTR " from parent\n", p, p->paddr());
-    return ZX_OK;
-  }
-
-  vm_page_t* res_page;
-  if (!page_owner->is_hidden() || p == vm_get_zero_page()) {
-    // If the vmo isn't hidden, we can't move the page. If the page is the zero
-    // page, there's no need to try to move the page. In either case, we need to
-    // allocate a writable page for this vmo.
-    if (!AllocateCopyPage(pmm_alloc_flags_, p->paddr(), free_list, &res_page)) {
-      return ZX_ERR_NO_MEMORY;
-    }
-    VmPageOrMarker insert = VmPageOrMarker::Page(res_page);
-    zx_status_t status = AddPageLocked(&insert, offset);
-    if (status != ZX_OK) {
-      // AddPageLocked failing for any other reason is a programming error.
-      DEBUG_ASSERT_MSG(status == ZX_ERR_NO_MEMORY, "status=%d\n", status);
-      pmm_free_page(insert.ReleasePage());
-      return status;
-    }
-    // Interpret a software fault as an explicit desire to have potential zero pages and don't
-    // consider them for cleaning, this is an optimization.
-    // We explicitly must *not* place pages from a page_source_ into the zero scanning queue.
-    if (p == vm_get_zero_page() && !page_source_ && !(pf_flags & VMM_PF_FLAG_SW_FAULT)) {
-      pmm_page_queues()->MoveToUnswappableZeroFork(res_page, this, offset);
-    }
-
-    // This is the only path where we can allocate a new page without being a clone (clones are
-    // always cached). So we check here if we are not fully cached and if so perform a
-    // clean/invalidate to flush our zeroes. After doing this we will not touch the page via the
-    // physmap and so we can pretend there isn't an aliased mapping.
-    if (cache_policy_ != ARCH_MMU_FLAG_CACHED) {
-      arch_clean_invalidate_cache_range((vaddr_t)paddr_to_physmap(res_page->paddr()), PAGE_SIZE);
-    }
-  } else {
-    // We need a writable page; let ::CloneCowPageLocked handle inserting one.
-    res_page = CloneCowPageLocked(offset, free_list, page_owner, p, owner_offset);
-    if (res_page == nullptr) {
-      return ZX_ERR_NO_MEMORY;
-    }
-  }
-
-  LTRACEF("faulted in page %p, pa %#" PRIxPTR "\n", res_page, res_page->paddr());
-
-  if (page_out) {
-    *page_out = res_page;
-  }
-  if (pa_out) {
-    *pa_out = res_page->paddr();
-  }
-
-  // If we made it here, we committed a new page in this VMO.
-  IncrementHierarchyGenerationCountLocked();
-
-  return ZX_OK;
-}
-
 zx_status_t VmObjectPaged::CommitRangeInternal(uint64_t offset, uint64_t len, bool pin,
                                                Guard<Mutex>&& adopt) {
   canary_.Assert();
@@ -2101,33 +890,6 @@
   // Convince the static analysis that we now do actually hold lock_.
   AssertHeld(lock_);
 
-  // If a pin is requested the entire range must exist and be valid,
-  // otherwise we can commit a partial range.
-  uint64_t new_len = len;
-  if (pin) {
-    // If pinning we explicitly forbid zero length pins as we cannot guarantee consistent semantics.
-    // For example pinning a zero length range outside the range of the VMO is an error, and so
-    // pinning a zero length range inside the vmo and then resizing the VMO smaller than the pin
-    // region should also be an error. To enforce this without having to have new metadata to track
-    // zero length pin regions is to just forbid them. Note that the user entry points for pinning
-    // already forbid zero length ranges.
-    if (len == 0) {
-      return ZX_ERR_INVALID_ARGS;
-    }
-    // verify that the range is within the object
-    if (unlikely(!InRange(offset, len, size_))) {
-      return ZX_ERR_OUT_OF_RANGE;
-    }
-  } else {
-    if (!TrimRange(offset, len, size_, &new_len)) {
-      return ZX_ERR_OUT_OF_RANGE;
-    }
-    // was in range, just zero length
-    if (new_len == 0) {
-      return ZX_OK;
-    }
-  }
-
   // Child slices of VMOs are currently not resizable, nor can they be made
   // from resizable parents.  If this ever changes, the logic surrounding what
   // to do if a VMO gets resized during a Commit or Pin operation will need to
@@ -2136,282 +898,25 @@
   // change if the operation is being executed against a child slice.
   DEBUG_ASSERT(!is_resizable() || !is_slice());
 
-  if (is_slice()) {
-    uint64_t parent_offset;
-    VmObjectPaged* parent = PagedParentOfSliceLocked(&parent_offset);
-    AssertHeld(parent->lock_);
-
-    // PagedParentOfSliceLocked will walk all of the way up the VMO hierarchy
-    // until it hits a non-slice VMO.  This guarantees that we should only ever
-    // recurse once instead of an unbound number of times.  DEBUG_ASSERT this so
-    // that we don't actually end up with unbound recursion just in case the
-    // property changes.
-    DEBUG_ASSERT(!parent->is_slice());
-
-    return parent->CommitRangeInternal(offset + parent_offset, new_len, pin, guard.take());
-  }
-
-  // compute a page aligned end to do our searches in to make sure we cover all the pages
-  uint64_t end = ROUNDUP_PAGE_SIZE(offset + new_len);
-  DEBUG_ASSERT(end > offset);
-  offset = ROUNDDOWN(offset, PAGE_SIZE);
-
-  fbl::RefPtr<PageSource> root_source = GetRootPageSourceLocked();
-
-  // If this vmo has a direct page source, then the source will provide the backing memory. For
-  // children that eventually depend on a page source, we skip preallocating memory to avoid
-  // potentially overallocating pages if something else touches the vmo while we're blocked on the
-  // request. Otherwise we optimize things by preallocating all the pages.
-  list_node page_list;
-  list_initialize(&page_list);
-  if (root_source == nullptr) {
-    // make a pass through the list to find out how many pages we need to allocate
-    size_t count = (end - offset) / PAGE_SIZE;
-    page_list_.ForEveryPageInRange(
-        [&count](const auto* p, auto off) {
-          if (p->IsPage()) {
-            count--;
-          }
-          return ZX_ERR_NEXT;
-        },
-        offset, end);
-
-    if (count == 0 && !pin) {
-      return ZX_OK;
-    }
-
-    zx_status_t status = pmm_alloc_pages(count, pmm_alloc_flags_, &page_list);
-    if (status != ZX_OK) {
-      return status;
-    }
-  }
-
-  auto list_cleanup = fbl::MakeAutoCall([&page_list]() {
-    if (!list_is_empty(&page_list)) {
-      pmm_free(&page_list);
-    }
-  });
-
-  // Should any errors occur we need to unpin everything.
-  auto pin_cleanup = fbl::MakeAutoCall([this, original_offset = offset, &offset, pin]() {
-    // Regardless of any resizes or other things that may have happened any pinned pages *must*
-    // still be within a valid range, and so we know Unpin should succeed. The edge case is if we
-    // had failed to pin *any* pages and so our original offset may be outside the current range of
-    // the vmo. Additionally, as pinning a zero length range is invalid, so is unpinning, and so we
-    // must avoid.
-    if (pin && offset > original_offset) {
-      AssertHeld(*lock());
-      UnpinLocked(original_offset, offset - original_offset);
-    }
-  });
-
-  bool retry = false;
-  PageRequest page_request(true);
-  do {
-    if (retry) {
-      // If there was a page request that couldn't be fulfilled, we need wait on the
-      // request and retry the commit. Note that when we retry the loop, offset is
-      // updated past the portion of the vmo that we successfully committed.
-      zx_status_t status = ZX_OK;
-      guard.CallUnlocked([&page_request, &status]() mutable { status = page_request.Wait(); });
-      if (status != ZX_OK) {
-        if (status == ZX_ERR_TIMED_OUT) {
-          DumpLocked(0, false);
-        }
-        return status;
-      }
-      retry = false;
-
-      // Re-run the range checks, since size_ could have changed while we were blocked. This
-      // is not a failure, since the arguments were valid when the syscall was made. It's as
-      // if the commit was successful but then the pages were thrown away. Unless we are pinning,
-      // in which case pages being thrown away is explicitly an error.
-      new_len = len;
-      if (pin) {
-        // verify that the range is within the object
-        if (unlikely(!InRange(offset, len, size_))) {
-          return ZX_ERR_OUT_OF_RANGE;
-        }
-      } else {
-        if (!TrimRange(offset, len, size_, &new_len)) {
-          pin_cleanup.cancel();
-          return ZX_OK;
-        }
-        if (new_len == 0) {
-          pin_cleanup.cancel();
-          return ZX_OK;
-        }
-      }
-
-      end = ROUNDUP_PAGE_SIZE(offset + new_len);
-      DEBUG_ASSERT(end > offset);
-    }
-
-    // Remember what our offset was prior to attempting to commit.
-    const uint64_t prev_offset = offset;
-
-    // cur_offset tracks how far we've made page requests, even if they're not done.
-    uint64_t cur_offset = offset;
-    while (cur_offset < end) {
-      // Don't commit if we already have this page
-      VmPageOrMarker* p = page_list_.Lookup(cur_offset);
-      vm_page_t* page = nullptr;
-      if (!p || !p->IsPage()) {
-        // Check if our parent has the page
-        const uint flags = VMM_PF_FLAG_SW_FAULT | VMM_PF_FLAG_WRITE;
-        zx_status_t res =
-            GetPageLocked(cur_offset, flags, &page_list, &page_request, &page, nullptr);
-        if (res == ZX_ERR_NEXT || res == ZX_ERR_SHOULD_WAIT) {
-          // In either case we'll need to wait on the request and retry, but if we get
-          // ZX_ERR_NEXT we keep faulting until we eventually see ZX_ERR_SHOULD_WAIT.
-          retry = true;
-          if (res == ZX_ERR_SHOULD_WAIT) {
-            break;
-          }
-        } else if (res != ZX_OK) {
-          return res;
-        }
-      } else {
-        page = p->Page();
-      }
-
-      if (!retry) {
-        // As long as we're not in the retry state cur_offset and offset should track.
-        DEBUG_ASSERT(offset == cur_offset);
-        // Pin the page if needed and then formally commit by increasing our working offset.
-        if (pin) {
-          DEBUG_ASSERT(page->state() == VM_PAGE_STATE_OBJECT);
-          if (page->object.pin_count == VM_PAGE_OBJECT_MAX_PIN_COUNT) {
-            return ZX_ERR_UNAVAILABLE;
-          }
-
-          page->object.pin_count++;
-          if (page->object.pin_count == 1) {
-            pmm_page_queues()->MoveToWired(page);
-          }
-          // Pinning every page in the largest vmo possible as many times as possible can't overflow
-          static_assert(VmObjectPaged::MAX_SIZE / PAGE_SIZE <
-                        UINT64_MAX / VM_PAGE_OBJECT_MAX_PIN_COUNT);
-          pinned_page_count_++;
-        }
-        offset += PAGE_SIZE;
-        len -= PAGE_SIZE;
-      }
-      cur_offset += PAGE_SIZE;
-    }
-
-    // Unmap all of the pages in the range we touched. This may end up unmapping non-present
-    // ranges or unmapping things multiple times, but it's necessary to ensure that we unmap
-    // everything that actually is present before anything else sees it.
-    if (cur_offset - prev_offset) {
-      RangeChangeUpdateLocked(offset, cur_offset - prev_offset, RangeChangeOp::Unmap);
-    }
-
-    if (retry && cur_offset == end) {
-      zx_status_t res = root_source->FinalizeRequest(&page_request);
-      if (res != ZX_ERR_SHOULD_WAIT) {
-        return res;
-      }
-    }
-  } while (retry);
-
-  pin_cleanup.cancel();
-  return ZX_OK;
+  return cow_pages_locked()->CommitRange(offset, len, pin, ktl::move(guard));
 }
 
 zx_status_t VmObjectPaged::DecommitRange(uint64_t offset, uint64_t len) {
   canary_.Assert();
   LTRACEF("offset %#" PRIx64 ", len %#" PRIx64 "\n", offset, len);
-  list_node_t list;
-  list_initialize(&list);
-  zx_status_t status;
-  {
-    Guard<Mutex> guard{&lock_};
-    status = DecommitRangeLocked(offset, len, list);
-  }
-  if (status == ZX_OK) {
-    pmm_free(&list);
-  }
-  return status;
-}
-
-zx_status_t VmObjectPaged::DecommitRangeLocked(uint64_t offset, uint64_t len,
-                                               list_node_t& free_list) {
-  if (options_ & kContiguous) {
+  if (is_contiguous()) {
     return ZX_ERR_NOT_SUPPORTED;
   }
-
-  // Trim the size and perform our zero-length hot-path check before we recurse
-  // up to our top-level ancestor.  Size bounding needs to take place relative
-  // to the child the operation was originally targeted against.
-  uint64_t new_len;
-  if (!TrimRange(offset, len, size_, &new_len)) {
-    return ZX_ERR_OUT_OF_RANGE;
-  }
-
-  // was in range, just zero length
-  if (new_len == 0) {
-    return ZX_OK;
-  }
-
-  // If this is a child slice of a VMO, then find our way up to our root
-  // ancestor (taking our offset into account as we do), and then recurse,
-  // running the operation against our ancestor.  Note that
-  // PagedParentOfSliceLocked will iteratively walk all the way up to our
-  // non-slice ancestor, not just our immediate parent, so we can guaranteed
-  // bounded recursion.
-  if (is_slice()) {
-    uint64_t parent_offset;
-    VmObjectPaged* parent = PagedParentOfSliceLocked(&parent_offset);
-    AssertHeld(parent->lock_);
-    DEBUG_ASSERT(!parent->is_slice());  // assert bounded recursion.
-    return parent->DecommitRangeLocked(offset + parent_offset, new_len, free_list);
-  }
-
-  if (parent_ || GetRootPageSourceLocked()) {
-    return ZX_ERR_NOT_SUPPORTED;
-  }
-
-  // Demand offset and length be correctly aligned to not give surprising user semantics.
-  if (!IS_PAGE_ALIGNED(offset) || !IS_PAGE_ALIGNED(len)) {
-    return ZX_ERR_INVALID_ARGS;
-  }
-
-  LTRACEF("start offset %#" PRIx64 ", end %#" PRIx64 "\n", offset, offset + new_len);
-
-  // TODO(teisenbe): Allow decommitting of pages pinned by
-  // CommitRangeContiguous
-
-  if (AnyPagesPinnedLocked(offset, new_len)) {
-    return ZX_ERR_BAD_STATE;
-  }
-
-  // unmap all of the pages in this range on all the mapping regions
-  RangeChangeUpdateLocked(offset, new_len, RangeChangeOp::Unmap);
-
-  BatchPQRemove page_remover(&free_list);
-
-  page_list_.RemovePages(page_remover.RemovePagesCallback(), offset, offset + new_len);
-  page_remover.Flush();
-
-  IncrementHierarchyGenerationCountLocked();
-
-  return ZX_OK;
+  Guard<Mutex> guard{&lock_};
+  return DecommitRangeLocked(offset, len);
 }
 
-zx_status_t VmObjectPaged::ZeroRange(uint64_t offset, uint64_t len) {
+zx_status_t VmObjectPaged::DecommitRangeLocked(uint64_t offset, uint64_t len) {
   canary_.Assert();
-  list_node_t list;
-  list_initialize(&list);
-  zx_status_t status;
-  {
-    Guard<Mutex> guard{&lock_};
-    status = ZeroRangeLocked(offset, len, &list, &guard);
-  }
+
+  zx_status_t status = cow_pages_locked()->DecommitRangeLocked(offset, len);
   if (status == ZX_OK) {
-    pmm_free(&list);
-  } else {
-    DEBUG_ASSERT(list_is_empty(&list));
+    IncrementHierarchyGenerationCountLocked();
   }
   return status;
 }
@@ -2421,23 +926,14 @@
   DEBUG_ASSERT(zero_start_offset <= zero_end_offset);
   DEBUG_ASSERT(zero_end_offset <= PAGE_SIZE);
   DEBUG_ASSERT(IS_PAGE_ALIGNED(page_base_offset));
-  DEBUG_ASSERT(page_base_offset < size_);
+  DEBUG_ASSERT(page_base_offset < size_locked());
 
-  VmPageOrMarker* slot = page_list_.Lookup(page_base_offset);
-
-  if (slot && slot->IsMarker()) {
+  // TODO: Consider replacing this with a more appropriate generic API when one is available.
+  if (cow_pages_locked()->PageWouldReadZeroLocked(page_base_offset)) {
     // This is already considered zero so no need to redundantly zero again.
     return ZX_OK;
   }
-  // If we don't have a committed page we need to check our parent.
-  if (!slot || !slot->IsPage()) {
-    VmObjectPaged* page_owner;
-    uint64_t owner_offset, owner_id;
-    if (!FindInitialPageContentLocked(page_base_offset, &page_owner, &owner_offset, &owner_id)) {
-      // Parent doesn't have a page either, so nothing to do this is already zero.
-      return ZX_OK;
-    }
-  }
+
   // Need to actually zero out bytes in the page.
   return ReadWriteInternalLocked(
       page_base_offset + zero_start_offset, zero_end_offset - zero_start_offset, true,
@@ -2451,8 +947,10 @@
       guard);
 }
 
-zx_status_t VmObjectPaged::ZeroRangeLocked(uint64_t offset, uint64_t len, list_node_t* free_list,
-                                           Guard<Mutex>* guard) {
+zx_status_t VmObjectPaged::ZeroRange(uint64_t offset, uint64_t len) {
+  canary_.Assert();
+  Guard<Mutex> guard{&lock_};
+
   // Zeroing a range behaves as if it were an efficient zx_vmo_write. As we cannot write to uncached
   // vmo, we also cannot zero an uncahced vmo.
   if (cache_policy_ != ARCH_MMU_FLAG_CACHED) {
@@ -2461,56 +959,23 @@
 
   // Trim the size and validate it is in range of the vmo.
   uint64_t new_len;
-  if (!TrimRange(offset, len, size_, &new_len)) {
+  if (!TrimRange(offset, len, size_locked(), &new_len)) {
     return ZX_ERR_OUT_OF_RANGE;
   }
 
-  // Forward any operations on slices up to the original non slice parent.
-  if (is_slice()) {
-    uint64_t parent_offset;
-    VmObjectPaged* parent = PagedParentOfSliceLocked(&parent_offset);
-    AssertHeld(parent->lock_);
-    return parent->ZeroRangeLocked(offset + parent_offset, new_len, free_list, guard);
-  }
-
   // Construct our initial range. Already checked the range above so we know it cannot overflow.
   uint64_t start = offset;
   uint64_t end = start + new_len;
 
-  // Unmap any page that is touched by this range in any of our, or our childrens, mapping regions.
-  // This ensures that for any pages we are able to zero through decommiting we do not have free'd
-  // pages still being mapped in.
-  RangeChangeUpdateLocked(start, end - start, RangeChangeOp::Unmap);
-
-  // If we're zeroing at the end of our parent range we can update to reflect this similar to a
-  // resize. This does not work if we are a slice, but we checked for that earlier. Whilst this does
-  // not actually zero the range in question, it makes future zeroing of the range far more
-  // efficient, which is why we do it first.
-  // parent_limit_ is a page aligned offset and so we can only reduce it to a rounded up value of
-  // start.
-  uint64_t rounded_start = ROUNDUP_PAGE_SIZE(start);
-  if (rounded_start < parent_limit_ && end >= parent_limit_) {
-    if (parent_ && parent_->is_hidden()) {
-      // Release any COW pages that are no longer necessary. This will also
-      // update the parent limit.
-      BatchPQRemove page_remover(free_list);
-      ReleaseCowParentPagesLocked(rounded_start, parent_limit_, &page_remover);
-      page_remover.Flush();
-    } else {
-      parent_limit_ = rounded_start;
-    }
-  }
-
   // Helper that checks and establishes our invariants. We use this after calling functions that
   // may have temporarily released the lock.
-  auto establish_invariants = [this, start, end]() TA_REQ(lock_) {
-    if (end > size_) {
+  auto establish_invariants = [this, end]() TA_REQ(lock_) {
+    if (end > size_locked()) {
       return ZX_ERR_BAD_STATE;
     }
     if (cache_policy_ != ARCH_MMU_FLAG_CACHED) {
       return ZX_ERR_BAD_STATE;
     }
-    RangeChangeUpdateLocked(start, end - start, RangeChangeOp::Unmap);
     return ZX_OK;
   };
 
@@ -2521,10 +986,10 @@
     // Need to handle the case were end is unaligned and on the same page as start
     if (unlikely(start_page_base == end_page_base)) {
       return ZeroPartialPage(start_page_base, start - start_page_base, end - start_page_base,
-                             guard);
+                             &guard);
     }
     zx_status_t status =
-        ZeroPartialPage(start_page_base, start - start_page_base, PAGE_SIZE, guard);
+        ZeroPartialPage(start_page_base, start - start_page_base, PAGE_SIZE, &guard);
     if (status == ZX_OK) {
       status = establish_invariants();
     }
@@ -2535,7 +1000,7 @@
   }
 
   if (unlikely(end_page_base != end)) {
-    zx_status_t status = ZeroPartialPage(end_page_base, 0, end - end_page_base, guard);
+    zx_status_t status = ZeroPartialPage(end_page_base, 0, end - end_page_base, &guard);
     if (status == ZX_OK) {
       status = establish_invariants();
     }
@@ -2545,451 +1010,12 @@
     end = end_page_base;
   }
 
-  // Now that we have a page aligned range we can try and do the more efficient decommit. We prefer
-  // decommit as it performs work in the order of the number of committed pages, instead of work in
-  // the order of size of the range. An error from DecommitRangeLocked indicates that the VMO is not
-  // of a form that decommit can safely be performed without exposing data that we shouldn't between
-  // children and parents, but no actual state will have been changed.
-  // Should decommit succeed we are done, otherwise we will have to handle each offset individually.
-  zx_status_t status = DecommitRangeLocked(start, end - start, *free_list);
-  if (status == ZX_OK) {
-    return ZX_OK;
-  }
-
-  for (offset = start; offset < end; offset += PAGE_SIZE) {
-    VmPageOrMarker* slot = page_list_.Lookup(offset);
-
-    const bool can_see_parent = parent_ && offset < parent_limit_;
-
-    // This is a lambda as it only makes sense to talk about parent mutability when we have a parent
-    // for this offset.
-    auto parent_immutable = [can_see_parent, this]() TA_REQ(lock_) {
-      DEBUG_ASSERT(can_see_parent);
-      return parent_->is_hidden();
-    };
-
-    // Finding the initial page content is expensive, but we only need to call it
-    // under certain circumstances scattered in the code below. The lambda
-    // get_initial_page_content() will lazily fetch and cache the details. This
-    // avoids us calling it when we don't need to, or calling it more than once.
-    struct InitialPageContent {
-      bool inited = false;
-      VmObjectPaged* page_owner;
-      uint64_t owner_offset;
-      uint64_t owner_id;
-      vm_page_t* page;
-    } initial_content_;
-    auto get_initial_page_content = [&initial_content_, can_see_parent, this, offset]()
-                                        TA_REQ(lock_) -> const InitialPageContent& {
-      if (!initial_content_.inited) {
-        DEBUG_ASSERT(can_see_parent);
-        VmPageOrMarker* page_or_marker = FindInitialPageContentLocked(
-            offset, &initial_content_.page_owner, &initial_content_.owner_offset,
-            &initial_content_.owner_id);
-        // We only care about the parent having a 'true' vm_page for content. If the parent has a
-        // marker then it's as if the parent has no content since that's a zero page anyway, which
-        // is what we are trying to achieve.
-        initial_content_.page =
-            page_or_marker && page_or_marker->IsPage() ? page_or_marker->Page() : nullptr;
-        initial_content_.inited = true;
-      }
-      return initial_content_;
-    };
-
-    auto parent_has_content = [get_initial_page_content]() TA_REQ(lock_) {
-      return get_initial_page_content().page != nullptr;
-    };
-
-    // Ideally we just collect up pages and hand them over to the pmm all at the end, but if we need
-    // to allocate any pages then we would like to ensure that we do not cause total memory to peak
-    // higher due to squirreling these pages away.
-    auto free_any_pages = [&free_list] {
-      if (!list_is_empty(free_list)) {
-        pmm_free(free_list);
-      }
-    };
-
-    // If there's already a marker then we can avoid any second guessing and leave the marker alone.
-    if (slot && slot->IsMarker()) {
-      continue;
-    }
-
-    // In the ideal case we can zero by making there be an Empty slot in our page list, so first
-    // see if we can do that. This is true when there is nothing pinned and either:
-    //  * This offset does not relate to our parent
-    //  * This offset does relate to our parent, but our parent is immutable and is currently zero
-    //    at this offset.
-    if (!SlotHasPinnedPage(slot) &&
-        (!can_see_parent || (parent_immutable() && !parent_has_content()))) {
-      if (slot && slot->IsPage()) {
-        vm_page_t* page = page_list_.RemovePage(offset).ReleasePage();
-        pmm_page_queues()->Remove(page);
-        DEBUG_ASSERT(!list_in_list(&page->queue_node));
-        list_add_tail(free_list, &page->queue_node);
-      }
-      continue;
-    }
-    // The only time we would reach either and *not* have a parent is if the page is pinned
-    DEBUG_ASSERT(SlotHasPinnedPage(slot) || parent_);
-
-    // Now we know that we need to do something active to make this zero, either through a marker or
-    // a page. First make sure we have a slot to modify.
-    if (!slot) {
-      slot = page_list_.LookupOrAllocate(offset);
-      if (unlikely(!slot)) {
-        return ZX_ERR_NO_MEMORY;
-      }
-    }
-
-    // Ideally we will use a marker, but we can only do this if we can point to a committed page
-    // to justify the allocation of the marker (i.e. we cannot allocate infinite markers with no
-    // committed pages). A committed page in this case exists if the parent has any content.
-    if (SlotHasPinnedPage(slot) || !parent_has_content()) {
-      if (slot->IsPage()) {
-        // Zero the existing page.
-        ZeroPage(slot->Page());
-        continue;
-      }
-      // Allocate a new page, it will be zeroed in the process.
-      vm_page_t* p;
-      free_any_pages();
-      // Do not pass our free_list here as this takes a list to allocate from, where as our list is
-      // for collecting things to free.
-      bool result = AllocateCopyPage(pmm_alloc_flags_, vm_get_zero_page_paddr(), nullptr, &p);
-      if (!result) {
-        return ZX_ERR_NO_MEMORY;
-      }
-      SetNotWired(p, offset);
-      *slot = VmPageOrMarker::Page(p);
-      continue;
-    }
-    DEBUG_ASSERT(parent_ && parent_has_content());
-
-    // We are able to insert a marker, but if our page content is from a hidden owner we need to
-    // perform slightly more complex cow forking.
-    const InitialPageContent& content = get_initial_page_content();
-    if (slot->IsEmpty() && content.page_owner->is_hidden()) {
-      free_any_pages();
-      zx_status_t result = CloneCowPageAsZeroLocked(offset, free_list, content.page_owner,
-                                                    content.page, content.owner_offset);
-      if (result != ZX_OK) {
-        return result;
-      }
-      continue;
-    }
-
-    // Remove any page that could be hanging around in the slot before we make it a marker.
-    if (slot->IsPage()) {
-      vm_page_t* page = slot->ReleasePage();
-      pmm_page_queues()->Remove(page);
-      DEBUG_ASSERT(!list_in_list(&page->queue_node));
-      list_add_tail(free_list, &page->queue_node);
-    }
-    *slot = VmPageOrMarker::Marker();
-  }
-
+  // Now that we have a page aligned range we can try hand over to the cow pages zero method.
+  // Always increment the gen count as it's possible for ZeroPagesLocked to fail part way through
+  // and it doesn't unroll its actions.
   IncrementHierarchyGenerationCountLocked();
 
-  return ZX_OK;
-}
-
-void VmObjectPaged::MoveToNotWired(vm_page_t* page, uint64_t offset) {
-  if (page_source_) {
-    pmm_page_queues()->MoveToPagerBacked(page, this, offset);
-  } else {
-    pmm_page_queues()->MoveToUnswappable(page);
-  }
-}
-
-void VmObjectPaged::SetNotWired(vm_page_t* page, uint64_t offset) {
-  if (page_source_) {
-    pmm_page_queues()->SetPagerBacked(page, this, offset);
-  } else {
-    pmm_page_queues()->SetUnswappable(page);
-  }
-}
-
-void VmObjectPaged::UnpinPage(vm_page_t* page, uint64_t offset) {
-  DEBUG_ASSERT(page->state() == VM_PAGE_STATE_OBJECT);
-  ASSERT(page->object.pin_count > 0);
-  page->object.pin_count--;
-  if (page->object.pin_count == 0) {
-    MoveToNotWired(page, offset);
-  }
-}
-
-void VmObjectPaged::Unpin(uint64_t offset, uint64_t len) {
-  Guard<Mutex> guard{&lock_};
-  UnpinLocked(offset, len);
-}
-
-void VmObjectPaged::UnpinLocked(uint64_t offset, uint64_t len) {
-  canary_.Assert();
-
-  // verify that the range is within the object
-  ASSERT(InRange(offset, len, size_));
-  // forbid zero length unpins as zero length pins return errors.
-  ASSERT(len != 0);
-
-  if (is_slice()) {
-    uint64_t parent_offset;
-    VmObjectPaged* parent = PagedParentOfSliceLocked(&parent_offset);
-    AssertHeld(parent->lock_);
-    return parent->UnpinLocked(offset + parent_offset, len);
-  }
-
-  const uint64_t start_page_offset = ROUNDDOWN(offset, PAGE_SIZE);
-  const uint64_t end_page_offset = ROUNDUP(offset + len, PAGE_SIZE);
-
-  zx_status_t status = page_list_.ForEveryPageAndGapInRange(
-      [this](const auto* page, uint64_t off) {
-        if (page->IsMarker()) {
-          return ZX_ERR_NOT_FOUND;
-        }
-        UnpinPage(page->Page(), off);
-        return ZX_ERR_NEXT;
-      },
-      [](uint64_t gap_start, uint64_t gap_end) { return ZX_ERR_NOT_FOUND; }, start_page_offset,
-      end_page_offset);
-  ASSERT_MSG(status == ZX_OK, "Tried to unpin an uncommitted page");
-
-  bool overflow = sub_overflow(
-      pinned_page_count_, (end_page_offset - start_page_offset) / PAGE_SIZE, &pinned_page_count_);
-  ASSERT(!overflow);
-
-  return;
-}
-
-bool VmObjectPaged::AnyPagesPinnedLocked(uint64_t offset, size_t len) {
-  canary_.Assert();
-  DEBUG_ASSERT(lock_.lock().IsHeld());
-  DEBUG_ASSERT(IS_PAGE_ALIGNED(offset));
-  DEBUG_ASSERT(IS_PAGE_ALIGNED(len));
-
-  if (pinned_page_count_ == 0) {
-    return is_contiguous();
-  }
-
-  const uint64_t start_page_offset = offset;
-  const uint64_t end_page_offset = offset + len;
-
-  bool found_pinned = false;
-  page_list_.ForEveryPageInRange(
-      [&found_pinned, start_page_offset, end_page_offset](const auto* p, uint64_t off) {
-        DEBUG_ASSERT(off >= start_page_offset && off < end_page_offset);
-        if (p->IsPage() && p->Page()->object.pin_count > 0) {
-          found_pinned = true;
-          return ZX_ERR_STOP;
-        }
-        return ZX_ERR_NEXT;
-      },
-      start_page_offset, end_page_offset);
-
-  return found_pinned;
-}
-
-// Helper function which processes the region visible by both children.
-void VmObjectPaged::ReleaseCowParentPagesLockedHelper(uint64_t start, uint64_t end,
-                                                      bool sibling_visible,
-                                                      BatchPQRemove* page_remover) {
-  // Compute the range in the parent that cur no longer will be able to see.
-  const uint64_t parent_range_start = CheckedAdd(start, parent_offset_);
-  const uint64_t parent_range_end = CheckedAdd(end, parent_offset_);
-
-  bool skip_split_bits = true;
-  if (parent_limit_ <= end) {
-    parent_limit_ = ktl::min(start, parent_limit_);
-    if (parent_limit_ <= parent_start_limit_) {
-      // Setting both to zero is cleaner and makes some asserts easier.
-      parent_start_limit_ = 0;
-      parent_limit_ = 0;
-    }
-  } else if (start == parent_start_limit_) {
-    parent_start_limit_ = end;
-  } else if (sibling_visible) {
-    // Split bits and partial cow release are only an issue if this range is also visible to our
-    // sibling. If it's not visible then we will always be freeing all pages anyway, no need to
-    // worry about split bits. Otherwise if the vmo limits can't be updated, this function will need
-    // to use the split bits to release pages in the parent. It also means that ancestor pages in
-    // the specified range might end up being released based on their current split bits, instead of
-    // through subsequent calls to this function. Therefore parent and all ancestors need to have
-    // the partial_cow_release_ flag set to prevent fast merge issues in ::RemoveChild.
-    auto cur = this;
-    AssertHeld(cur->lock_);
-    uint64_t cur_start = start;
-    uint64_t cur_end = end;
-    while (cur->parent_ && cur_start < cur_end) {
-      auto parent = cur->parent_.get();
-      AssertHeld(parent->lock_);
-      parent->partial_cow_release_ = true;
-      cur_start = ktl::max(CheckedAdd(cur_start, cur->parent_offset_), parent->parent_start_limit_);
-      cur_end = ktl::min(CheckedAdd(cur_end, cur->parent_offset_), parent->parent_limit_);
-      cur = parent;
-    }
-    skip_split_bits = false;
-  }
-
-  // Free any pages that either aren't visible, or were already split into the other child. For
-  // pages that haven't been split into the other child, we need to ensure they're univisible.
-  AssertHeld(parent_->lock_);
-  parent_->page_list_.RemovePages(
-      [skip_split_bits, sibling_visible, page_remover,
-       left = this == &parent_->left_child_locked()](VmPageOrMarker* page_or_mark,
-                                                     uint64_t offset) {
-        if (page_or_mark->IsMarker()) {
-          // If this marker is in a range still visible to the sibling then we just leave it, no
-          // split bits or anything to be updated. If the sibling cannot see it, then we can clear
-          // it.
-          if (!sibling_visible) {
-            *page_or_mark = VmPageOrMarker::Empty();
-          }
-          return ZX_ERR_NEXT;
-        }
-        vm_page* page = page_or_mark->Page();
-        // If the sibling can still see this page then we need to keep it around, otherwise we can
-        // free it. The sibling can see the page if this range is |sibling_visible| and if the
-        // sibling hasn't already forked the page, which is recorded in the split bits.
-        if (!sibling_visible || left ? page->object.cow_right_split : page->object.cow_left_split) {
-          page = page_or_mark->ReleasePage();
-          page_remover->Push(page);
-          return ZX_ERR_NEXT;
-        }
-        if (skip_split_bits) {
-          // If we were able to update this vmo's parent limit, that made the pages
-          // uniaccessible. We clear the split bits to allow ::RemoveChild to efficiently
-          // merge vmos without having to worry about pages above parent_limit_.
-          page->object.cow_left_split = 0;
-          page->object.cow_right_split = 0;
-        } else {
-          // Otherwise set the appropriate split bit to make the page uniaccessible.
-          if (left) {
-            page->object.cow_left_split = 1;
-          } else {
-            page->object.cow_right_split = 1;
-          }
-        }
-        return ZX_ERR_NEXT;
-      },
-      parent_range_start, parent_range_end);
-}
-
-void VmObjectPaged::ReleaseCowParentPagesLocked(uint64_t start, uint64_t end,
-                                                BatchPQRemove* page_remover) {
-  // This function releases |this| references to any ancestor vmo's COW pages.
-  //
-  // To do so, we divide |this| parent into three (possibly 0-length) regions: the region
-  // which |this| sees but before what the sibling can see, the region where both |this|
-  // and its sibling can see, and the region |this| can see but after what the sibling can
-  // see. Processing the 2nd region only requires touching the direct parent, since the sibling
-  // can see ancestor pages in the region. However, processing the 1st and 3rd regions requires
-  // recursively releasing |this| parent's ancestor pages, since those pages are no longer
-  // visible through |this| parent.
-  //
-  // This function processes region 3 (incl. recursively processing the parent), then region 2,
-  // then region 1 (incl. recursively processing the parent). Processing is done in reverse order
-  // to ensure parent_limit_ is reduced correctly. When processing either regions of type 1 or 3 we
-  //  1. walk up the parent and find the largest common slice that all nodes in the hierarchy see
-  //     as being of the same type.
-  //  2. walk back down (using stack_ direction flags) applying the range update using that final
-  //     calculated size
-  //  3. reduce the range we are operating on to not include the section we just processed
-  //  4. repeat steps 1-3 until range is empty
-  // In the worst case it is possible for this algorithm then to be O(N^2) in the depth of the tree.
-  // More optimal algorithms probably exist, but this algorithm is sufficient for at the moment as
-  // these suboptimal scenarios do not occur in practice.
-
-  // At the top level we continuously attempt to process the range until it is empty.
-  while (end > start) {
-    // cur_start / cur_end get adjusted as cur moves up/down the parent chain.
-    uint64_t cur_start = start;
-    uint64_t cur_end = end;
-    VmObjectPaged* cur = this;
-
-    AssertHeld(cur->lock_);
-    // First walk up the parent chain as long as there is a visible parent that does not overlap
-    // with its sibling.
-    while (cur->parent_ && cur->parent_start_limit_ < cur_end && cur_start < cur->parent_limit_) {
-      if (cur_end > cur->parent_limit_) {
-        // Part of the range sees the parent, and part of it doesn't. As we only process ranges of
-        // a single type we first trim the range down to the portion that doesn't see the parent,
-        // then next time around the top level loop we will process the portion that does see
-        cur_start = cur->parent_limit_;
-        DEBUG_ASSERT(cur_start < cur_end);
-        break;
-      }
-      // Trim the start to the portion of the parent it can see.
-      cur_start = ktl::max(cur_start, cur->parent_start_limit_);
-      DEBUG_ASSERT(cur_start < cur_end);
-
-      // Work out what the overlap with our sibling is
-      auto parent = cur->parent_.get();
-      AssertHeld(parent->lock_);
-      bool left = cur == &parent->left_child_locked();
-      auto& other = left ? parent->right_child_locked() : parent->left_child_locked();
-      AssertHeld(other.lock_);
-
-      // Project our operating range into our parent.
-      const uint64_t our_parent_start = CheckedAdd(cur_start, cur->parent_offset_);
-      const uint64_t our_parent_end = CheckedAdd(cur_end, cur->parent_offset_);
-      // Project our siblings full range into our parent.
-      const uint64_t other_parent_start =
-          CheckedAdd(other.parent_offset_, other.parent_start_limit_);
-      const uint64_t other_parent_end = CheckedAdd(other.parent_offset_, other.parent_limit_);
-
-      if (other_parent_end >= our_parent_end && other_parent_start < our_parent_end) {
-        // At least some of the end of our range overlaps with the sibling. First move up our start
-        // to ensure our range is 100% overlapping.
-        if (other_parent_start > our_parent_start) {
-          cur_start = CheckedAdd(cur_start, other_parent_start - our_parent_start);
-          DEBUG_ASSERT(cur_start < cur_end);
-        }
-        // Free the range that overlaps with the sibling, then we are done walking up as this is the
-        // type 2 kind of region. It is safe to process this right now since we are in a terminal
-        // state and are leaving the loop, thus we know that this is the final size of the region.
-        cur->ReleaseCowParentPagesLockedHelper(cur_start, cur_end, true, page_remover);
-        break;
-      }
-      // End of our range does not see the sibling. First move up our start to ensure we are dealing
-      // with a range that is 100% no sibling, and then keep on walking up.
-      if (other_parent_end > our_parent_start && other_parent_end < our_parent_end) {
-        DEBUG_ASSERT(other_parent_end < our_parent_end);
-        cur_start = CheckedAdd(cur_start, other_parent_end - our_parent_start);
-        DEBUG_ASSERT(cur_start < cur_end);
-      }
-
-      // Record the direction so we can walk about down later.
-      parent->stack_.dir_flag = left ? StackDir::Left : StackDir::Right;
-      // Don't use our_parent_start as we may have updated cur_start
-      cur_start = CheckedAdd(cur_start, cur->parent_offset_);
-      cur_end = our_parent_end;
-      DEBUG_ASSERT(cur_start < cur_end);
-      cur = parent;
-    }
-
-    // Every parent that we walked up had no overlap with its siblings. Now that we know the size
-    // of the range that we can process we just walk back down processing.
-    while (cur != this) {
-      // Although we free pages in the parent we operate on the *child*, as that is whose limits
-      // we will actually adjust. The ReleaseCowParentPagesLockedHelper will then reach backup to
-      // the parent to actually free any pages.
-      cur = cur->stack_.dir_flag == StackDir::Left ? &cur->left_child_locked()
-                                                   : &cur->right_child_locked();
-      AssertHeld(cur->lock_);
-      DEBUG_ASSERT(cur_start >= cur->parent_offset_);
-      DEBUG_ASSERT(cur_end >= cur->parent_offset_);
-      cur_start -= cur->parent_offset_;
-      cur_end -= cur->parent_offset_;
-
-      cur->ReleaseCowParentPagesLockedHelper(cur_start, cur_end, false, page_remover);
-    }
-
-    // Update the end with the portion we managed to do. Ensuring some basic sanity of the range,
-    // most importantly that we processed a non-zero portion to ensure progress.
-    DEBUG_ASSERT(cur_start >= start);
-    DEBUG_ASSERT(cur_start < end);
-    DEBUG_ASSERT(cur_end == end);
-    end = cur_start;
-  }
+  return cow_pages_locked()->ZeroPagesLocked(start, end);
 }
 
 zx_status_t VmObjectPaged::Resize(uint64_t s) {
@@ -2997,11 +1023,11 @@
 
   LTRACEF("vmo %p, size %" PRIu64 "\n", this, s);
 
-  if (!(options_ & kResizable)) {
+  if (!is_resizable()) {
     return ZX_ERR_UNAVAILABLE;
   }
 
-  // round up the size to the next page size boundary and make sure we dont wrap
+  // round up the size to the next page size boundary and make sure we don't wrap
   zx_status_t status = RoundSize(s, &s);
   if (status != ZX_OK) {
     return status;
@@ -3009,102 +1035,14 @@
 
   Guard<Mutex> guard{&lock_};
 
-  // make sure everything is aligned before we get started
-  DEBUG_ASSERT(IS_PAGE_ALIGNED(size_));
-  DEBUG_ASSERT(IS_PAGE_ALIGNED(s));
-
-  list_node_t free_list;
-  list_initialize(&free_list);
-
-  BatchPQRemove page_remover(&free_list);
-
-  // see if we're shrinking or expanding the vmo
-  if (s < size_) {
-    // shrinking
-    uint64_t start = s;
-    uint64_t end = size_;
-    uint64_t len = end - start;
-
-    // bail if there are any pinned pages in the range we're trimming
-    if (AnyPagesPinnedLocked(start, len)) {
-      return ZX_ERR_BAD_STATE;
-    }
-
-    // unmap all of the pages in this range on all the mapping regions
-    RangeChangeUpdateLocked(start, len, RangeChangeOp::Unmap);
-
-    if (page_source_) {
-      // Tell the page source that any non-resident pages that are now out-of-bounds
-      // were supplied, to ensure that any reads of those pages get woken up.
-      zx_status_t status = page_list_.ForEveryPageAndGapInRange(
-          [](const auto* p, uint64_t off) { return ZX_ERR_NEXT; },
-          [&](uint64_t gap_start, uint64_t gap_end) {
-            page_source_->OnPagesSupplied(gap_start, gap_end);
-            return ZX_ERR_NEXT;
-          },
-          start, end);
-      DEBUG_ASSERT(status == ZX_OK);
-    }
-
-    if (parent_ && parent_->is_hidden()) {
-      // Release any COW pages that are no longer necessary. This will also
-      // update the parent limit.
-      ReleaseCowParentPagesLocked(start, end, &page_remover);
-      // Validate that the parent limit was correctly updated as it should never remain larger than
-      // our actual size.
-      DEBUG_ASSERT(parent_limit_ <= s);
-    } else {
-      parent_limit_ = ktl::min(parent_limit_, s);
-    }
-    // If the tail of a parent disappears, the children shouldn't be able to see that region
-    // again, even if the parent is later reenlarged. So update the child parent limits.
-    UpdateChildParentLimitsLocked(s);
-
-    page_list_.RemovePages(page_remover.RemovePagesCallback(), start, end);
-  } else if (s > size_) {
-    uint64_t temp;
-    // Check that this VMOs new size would not cause it to overflow if projected onto the root.
-    bool overflow = add_overflow(root_parent_offset_, s, &temp);
-    if (overflow) {
-      return ZX_ERR_INVALID_ARGS;
-    }
-    // expanding
-    // figure the starting and ending page offset that is affected
-    uint64_t start = size_;
-    uint64_t end = s;
-    uint64_t len = end - start;
-
-    // inform all our children or mapping that there's new bits
-    RangeChangeUpdateLocked(start, len, RangeChangeOp::Unmap);
+  status = cow_pages_locked()->ResizeLocked(s);
+  if (status != ZX_OK) {
+    return status;
   }
-
-  // save bytewise size
-  size_ = s;
-
   IncrementHierarchyGenerationCountLocked();
-
-  page_remover.Flush();
-  guard.Release();
-  pmm_free(&free_list);
-
   return ZX_OK;
 }
 
-void VmObjectPaged::UpdateChildParentLimitsLocked(uint64_t new_size) {
-  // Note that a child's parent_limit_ will limit that child's descendants' views into
-  // this vmo, so this method only needs to touch the direct children.
-  for (auto& c : children_list_) {
-    DEBUG_ASSERT(c.is_paged());
-    VmObjectPaged& child = static_cast<VmObjectPaged&>(c);
-    AssertHeld(child.lock_);
-    if (new_size < child.parent_offset_) {
-      child.parent_limit_ = 0;
-    } else {
-      child.parent_limit_ = ktl::min(child.parent_limit_, new_size - child.parent_offset_);
-    }
-  }
-}
-
 // perform some sort of copy in/out on a range of the object using a passed in lambda
 // for the copy routine. The copy routine has the expected type signature of:
 // (uint64_t src_offset, uint64_t dest_offset, bool write, Guard<Mutex> *guard) -> zx_status_t
@@ -3128,7 +1066,7 @@
     if (cache_policy_ != ARCH_MMU_FLAG_CACHED) {
       return ZX_ERR_BAD_STATE;
     }
-    if (end_offset > size_) {
+    if (end_offset > size_locked()) {
       return ZX_ERR_OUT_OF_RANGE;
     }
     return ZX_OK;
@@ -3250,57 +1188,7 @@
 
   Guard<Mutex> guard{&lock_};
 
-  // verify that the range is within the object
-  if (unlikely(!InRange(offset, len, size_))) {
-    return ZX_ERR_OUT_OF_RANGE;
-  }
-
-  const uint64_t start_page_offset = ROUNDDOWN(offset, PAGE_SIZE);
-  const uint64_t end_page_offset = ROUNDUP(offset + len, PAGE_SIZE);
-
-  zx_status_t status = page_list_.ForEveryPageAndGapInRange(
-      [lookup_fn, context, start_page_offset](const auto* p, uint64_t off) {
-        if (p->IsMarker()) {
-          return ZX_ERR_NO_MEMORY;
-        }
-        const size_t index = (off - start_page_offset) / PAGE_SIZE;
-        paddr_t pa = p->Page()->paddr();
-        zx_status_t status = lookup_fn(context, off, index, pa);
-        if (status != ZX_OK) {
-          if (unlikely(status == ZX_ERR_NEXT || status == ZX_ERR_STOP)) {
-            status = ZX_ERR_INTERNAL;
-          }
-          return status;
-        }
-        return ZX_ERR_NEXT;
-      },
-      [this, lookup_fn, context, start_page_offset](uint64_t gap_start, uint64_t gap_end) {
-        AssertHeld(this->lock_);
-        // If some page was missing from our list, run the more expensive
-        // GetPageLocked to see if our parent has it.
-        for (uint64_t off = gap_start; off < gap_end; off += PAGE_SIZE) {
-          paddr_t pa;
-          zx_status_t status = this->GetPageLocked(off, 0, nullptr, nullptr, nullptr, &pa);
-          if (status != ZX_OK) {
-            return ZX_ERR_NO_MEMORY;
-          }
-          const size_t index = (off - start_page_offset) / PAGE_SIZE;
-          status = lookup_fn(context, off, index, pa);
-          if (status != ZX_OK) {
-            if (unlikely(status == ZX_ERR_NEXT || status == ZX_ERR_STOP)) {
-              status = ZX_ERR_INTERNAL;
-            }
-            return status;
-          }
-        }
-        return ZX_ERR_NEXT;
-      },
-      start_page_offset, end_page_offset);
-  if (status != ZX_OK) {
-    return status;
-  }
-
-  return ZX_OK;
+  return cow_pages_locked()->LookupLocked(offset, len, lookup_fn, context);
 }
 
 zx_status_t VmObjectPaged::LookupContiguous(uint64_t offset, uint64_t len, paddr_t* out_paddr) {
@@ -3311,30 +1199,30 @@
   }
 
   Guard<Mutex> guard{&lock_};
-  if (unlikely(!InRange(offset, len, size_))) {
+
+  if (unlikely(!InRange(offset, len, size_locked()))) {
     return ZX_ERR_OUT_OF_RANGE;
   }
-  if (unlikely(!is_contiguous() && len != PAGE_SIZE)) {
+
+  if (unlikely(is_contiguous())) {
+    // Already checked that the entire requested range is valid, and since we know all our pages are
+    // contiguous we can simply lookup one page.
+    len = PAGE_SIZE;
+  } else if (unlikely(len != PAGE_SIZE)) {
+    // Multi-page lookup only supported for contiguous VMOs.
     return ZX_ERR_BAD_STATE;
   }
-  // If this is a slice then our pages are in our first non-slice parent. Our previous range check
-  // is still valid as slices have to be strict subsets and are nothing can be resizable.
-  VmPageOrMarker* page;
-  if (is_slice()) {
-    uint64_t parent_offset;
-    VmObjectPaged* paged_parent = PagedParentOfSliceLocked(&parent_offset);
-    AssertHeld(paged_parent->lock_);
-    page = paged_parent->page_list_.Lookup(offset + parent_offset);
-  } else {
-    page = page_list_.Lookup(offset);
-  }
-  if (!page || !page->IsPage()) {
-    return ZX_ERR_BAD_STATE;
-  }
-  if (out_paddr) {
-    *out_paddr = page->Page()->paddr();
-  }
-  return ZX_OK;
+
+  // Lookup the one page / first page of contiguous VMOs.
+  return cow_pages_locked()->LookupLocked(
+      offset, len,
+      [](void* arg, uint64_t offset, size_t index, paddr_t pa) {
+        if (arg) {
+          *static_cast<paddr_t*>(arg) = pa;
+        }
+        return ZX_OK;
+      },
+      out_paddr);
 }
 
 zx_status_t VmObjectPaged::ReadUser(VmAspace* current_aspace, user_out_ptr<char> ptr,
@@ -3402,19 +1290,10 @@
 }
 
 zx_status_t VmObjectPaged::TakePages(uint64_t offset, uint64_t len, VmPageSpliceList* pages) {
-  DEBUG_ASSERT(IS_PAGE_ALIGNED(offset));
-  DEBUG_ASSERT(IS_PAGE_ALIGNED(len));
+  canary_.Assert();
 
   Guard<Mutex> src_guard{&lock_};
 
-  if (!InRange(offset, len, size_)) {
-    return ZX_ERR_OUT_OF_RANGE;
-  }
-
-  if (AnyPagesPinnedLocked(offset, len) || parent_ || page_source_) {
-    return ZX_ERR_BAD_STATE;
-  }
-
   // This is only used by the userpager API, which has significant restrictions on
   // what sorts of vmos are acceptable. If splice starts being used in more places,
   // then this restriction might need to be lifted.
@@ -3422,127 +1301,24 @@
   if (mapping_list_len_ || children_list_len_) {
     return ZX_ERR_BAD_STATE;
   }
+  zx_status_t status = cow_pages_locked()->TakePagesLocked(offset, len, pages);
 
-  page_list_.ForEveryPageInRange(
-      [](const auto* p, uint64_t off) {
-        if (p->IsPage()) {
-          pmm_page_queues()->Remove(p->Page());
-        }
-        return ZX_ERR_NEXT;
-      },
-      offset, offset + len);
-
-  *pages = page_list_.TakePages(offset, len);
-
-  IncrementHierarchyGenerationCountLocked();
-
-  return ZX_OK;
-}
-
-zx_status_t VmObjectPaged::SupplyPages(uint64_t offset, uint64_t len, VmPageSpliceList* pages) {
-  DEBUG_ASSERT(IS_PAGE_ALIGNED(offset));
-  DEBUG_ASSERT(IS_PAGE_ALIGNED(len));
-
-  Guard<Mutex> guard{&lock_};
-  ASSERT(page_source_);
-
-  if (!InRange(offset, len, size_)) {
-    return ZX_ERR_OUT_OF_RANGE;
+  if (status == ZX_OK) {
+    IncrementHierarchyGenerationCountLocked();
   }
-  uint64_t end = offset + len;
-
-  list_node free_list;
-  list_initialize(&free_list);
-
-  // [new_pages_start, new_pages_start + new_pages_len) tracks the current run of
-  // consecutive new pages added to this vmo.
-  uint64_t new_pages_start = offset;
-  uint64_t new_pages_len = 0;
-  zx_status_t status = ZX_OK;
-  while (!pages->IsDone()) {
-    VmPageOrMarker src_page = pages->Pop();
-
-    // The pager API does not allow the source VMO of supply pages to have a page source, so we can
-    // assume that any empty pages are zeroes and insert explicit markers here. We need to insert
-    // explicit markers to actually resolve the pager fault.
-    if (src_page.IsEmpty()) {
-      src_page = VmPageOrMarker::Marker();
-    }
-
-    status = AddPageLocked(&src_page, offset);
-    if (status == ZX_OK) {
-      new_pages_len += PAGE_SIZE;
-    } else {
-      if (src_page.IsPage()) {
-        vm_page_t* page = src_page.ReleasePage();
-        DEBUG_ASSERT(!list_in_list(&page->queue_node));
-        list_add_tail(&free_list, &page->queue_node);
-      }
-
-      if (likely(status == ZX_ERR_ALREADY_EXISTS)) {
-        status = ZX_OK;
-
-        // We hit the end of a run of absent pages, so notify the pager source
-        // of any new pages that were added and reset the tracking variables.
-        if (new_pages_len) {
-          page_source_->OnPagesSupplied(new_pages_start, new_pages_len);
-        }
-        new_pages_start = offset + PAGE_SIZE;
-        new_pages_len = 0;
-      } else {
-        break;
-      }
-    }
-    offset += PAGE_SIZE;
-
-    DEBUG_ASSERT(new_pages_start + new_pages_len <= end);
-  }
-  if (new_pages_len) {
-    page_source_->OnPagesSupplied(new_pages_start, new_pages_len);
-  }
-
-  IncrementHierarchyGenerationCountLocked();
-
-  if (!list_is_empty(&free_list)) {
-    pmm_free(&free_list);
-  }
-
   return status;
 }
 
-// This is a transient operation used only to fail currently outstanding page requests. It does not
-// alter the state of the VMO, or any pages that might have already been populated within the
-// specified range.
-//
-// If certain pages in this range are populated, we must have done so via a previous SupplyPages()
-// call that succeeded. So it might be fine for clients to continue accessing them, despite the
-// larger range having failed.
-//
-// TODO(rashaeqbal): If we support a more permanent failure mode in the future, we will need to free
-// populated pages in the specified range, and possibly detach the VMO from the page source.
-zx_status_t VmObjectPaged::FailPageRequests(uint64_t offset, uint64_t len,
-                                            zx_status_t error_status) {
-  DEBUG_ASSERT(IS_PAGE_ALIGNED(offset));
-  DEBUG_ASSERT(IS_PAGE_ALIGNED(len));
-
-  // |error_status| must have already been validated by the PagerDispatcher.
-  DEBUG_ASSERT(PageSource::IsValidFailureCode(error_status));
+zx_status_t VmObjectPaged::SupplyPages(uint64_t offset, uint64_t len, VmPageSpliceList* pages) {
+  canary_.Assert();
 
   Guard<Mutex> guard{&lock_};
-  ASSERT(page_source_);
 
-  if (!InRange(offset, len, size_)) {
-    return ZX_ERR_OUT_OF_RANGE;
-  }
+  // It is possible that supply pages fails and we increment the gen count needlessly, but the user
+  // is certainly expecting it to succeed.
+  IncrementHierarchyGenerationCountLocked();
 
-  page_source_->OnPagesFailed(offset, len, error_status);
-  return ZX_OK;
-}
-
-uint32_t VmObjectPaged::GetMappingCachePolicy() const {
-  Guard<Mutex> guard{&lock_};
-
-  return cache_policy_;
+  return cow_pages_locked()->SupplyPagesLocked(offset, len, pages);
 }
 
 zx_status_t VmObjectPaged::SetMappingCachePolicy(const uint32_t cache_policy) {
@@ -3559,7 +1335,10 @@
   // 3) vmo has no mappings
   // 4) vmo has no children
   // 5) vmo is not a child
-  if (!page_list_.IsEmpty() && cache_policy_ != ARCH_MMU_FLAG_CACHED) {
+  // Counting attributed pages does a sufficient job of checking for committed pages since we also
+  // require no children and no parent, so attribution == precisely our pages.
+  if (cow_pages_locked()->AttributedPagesInRangeLocked(0, size_locked()) != 0 &&
+      cache_policy_ != ARCH_MMU_FLAG_CACHED) {
     // We forbid to transitioning committed pages from any kind of uncached->cached policy as we do
     // not currently have a story for dealing with the speculative loads that may have happened
     // against the cached physmap. That is, whilst a page was uncached the cached physmap version
@@ -3569,7 +1348,11 @@
     // Similarly it's not a problem if there aren't actually any committed pages.
     return ZX_ERR_BAD_STATE;
   }
-  if (pinned_page_count_ > 0) {
+  // If we are contiguous we 'pre pinned' all the pages, but this doesn't count for pinning as far
+  // as the user and potential DMA is concerned. Take this into account when checking if the user
+  // pinned any pages.
+  uint64_t expected_pin_count = (is_contiguous() ? (size_locked() / PAGE_SIZE) : 0);
+  if (cow_pages_locked()->pinned_page_count_locked() > expected_pin_count) {
     return ZX_ERR_BAD_STATE;
   }
   if (!mapping_list_.is_empty()) {
@@ -3584,14 +1367,17 @@
 
   // If transitioning from a cached policy we must clean/invalidate all the pages as the kernel may
   // have written to them on behalf of the user.
+  // TODO: To avoid iterating the whole offset range VmCowPages needs an appropriate interface for
+  // iterating on committed pages. Lookup is presently unsuitable as it performs a lookup for read,
+  // which could return pages in our parent, instead of a lookup for write.
   if (cache_policy_ == ARCH_MMU_FLAG_CACHED && cache_policy != ARCH_MMU_FLAG_CACHED) {
-    page_list_.ForEveryPage([](const auto* p, uint64_t off) {
-      if (p->IsPage()) {
-        vm_page_t* page = p->Page();
-        arch_clean_invalidate_cache_range((vaddr_t)paddr_to_physmap(page->paddr()), PAGE_SIZE);
+    for (uint64_t offset = 0; offset < size_locked(); offset += PAGE_SIZE) {
+      paddr_t pa;
+      zx_status_t status = GetPageLocked(offset, 0, nullptr, nullptr, nullptr, &pa);
+      if (likely(status == ZX_OK)) {
+        arch_clean_invalidate_cache_range((vaddr_t)paddr_to_physmap(pa), PAGE_SIZE);
       }
-      return ZX_ERR_NEXT;
-    });
+    }
   }
 
   cache_policy_ = cache_policy;
@@ -3599,258 +1385,33 @@
   return ZX_OK;
 }
 
-void VmObjectPaged::RangeChangeUpdateFromParentLocked(const uint64_t offset, const uint64_t len,
-                                                      RangeChangeList* list) {
-  canary_.Assert();
-
-  LTRACEF("offset %#" PRIx64 " len %#" PRIx64 " p_offset %#" PRIx64 " size_ %#" PRIx64 "\n", offset,
-          len, parent_offset_, size_);
-
-  // our parent is notifying that a range of theirs changed, see where it intersects
-  // with our offset into the parent and pass it on
-  uint64_t offset_new;
-  uint64_t len_new;
-  if (!GetIntersect(parent_offset_, size_, offset, len, &offset_new, &len_new)) {
-    return;
-  }
-
-  // if they intersect with us, then by definition the new offset must be >= parent_offset_
-  DEBUG_ASSERT(offset_new >= parent_offset_);
-
-  // subtract our offset
-  offset_new -= parent_offset_;
-
-  // verify that it's still within range of us
-  DEBUG_ASSERT(offset_new + len_new <= size_);
-
-  LTRACEF("new offset %#" PRIx64 " new len %#" PRIx64 "\n", offset_new, len_new);
-
-  // pass it on. to prevent unbounded recursion we package up our desired offset and len and add
-  // ourselves to the list. UpdateRangeLocked will then get called on it later.
-  // TODO: optimize by not passing on ranges that are completely covered by pages local to this vmo
-  range_change_offset_ = offset_new;
-  range_change_len_ = len_new;
-  list->push_front(this);
-}
-
-void VmObjectPaged::RangeChangeUpdateListLocked(RangeChangeList* list, RangeChangeOp op) {
-  while (!list->is_empty()) {
-    VmObjectPaged* object = list->pop_front();
-    AssertHeld(object->lock_);
-
-    // offsets for vmos needn't be aligned, but vmars use aligned offsets
-    const uint64_t aligned_offset = ROUNDDOWN(object->range_change_offset_, PAGE_SIZE);
-    const uint64_t aligned_len =
-        ROUNDUP(object->range_change_offset_ + object->range_change_len_, PAGE_SIZE) -
-        aligned_offset;
-
-    // other mappings may have covered this offset into the vmo, so unmap those ranges
-    for (auto& m : object->mapping_list_) {
-      AssertHeld(*m.object_lock());
-      if (op == RangeChangeOp::Unmap) {
-        m.UnmapVmoRangeLocked(aligned_offset, aligned_len);
-      } else if (op == RangeChangeOp::RemoveWrite) {
-        m.RemoveWriteVmoRangeLocked(aligned_offset, aligned_len);
-      } else {
-        panic("Unknown RangeChangeOp %d\n", static_cast<int>(op));
-      }
-    }
-
-    // inform all our children this as well, so they can inform their mappings
-    for (auto& c : object->children_list_) {
-      // All our children are paged VMOs themselves.
-      DEBUG_ASSERT(c.is_paged());
-      VmObjectPaged& child = static_cast<VmObjectPaged&>(c);
-      AssertHeld(child.lock_);
-      child.RangeChangeUpdateFromParentLocked(object->range_change_offset_,
-                                              object->range_change_len_, list);
-    }
-  }
-}
-
 void VmObjectPaged::RangeChangeUpdateLocked(uint64_t offset, uint64_t len, RangeChangeOp op) {
   canary_.Assert();
 
-  RangeChangeList list;
-  this->range_change_offset_ = offset;
-  this->range_change_len_ = len;
-  list.push_front(this);
-  RangeChangeUpdateListLocked(&list, op);
-}
+  // offsets for vmos needn't be aligned, but vmars use aligned offsets
+  const uint64_t aligned_offset = ROUNDDOWN(offset, PAGE_SIZE);
+  const uint64_t aligned_len = ROUNDUP(offset + len, PAGE_SIZE) - aligned_offset;
 
-fbl::RefPtr<PageSource> VmObjectPaged::GetRootPageSourceLocked() const {
-  auto vm_object = this;
-  AssertHeld(vm_object->lock_);
-  while (vm_object->parent_) {
-    vm_object = vm_object->parent_.get();
-    if (!vm_object) {
-      return nullptr;
+  for (auto& m : mapping_list_) {
+    AssertHeld(*m.object_lock());
+    if (op == RangeChangeOp::Unmap) {
+      m.UnmapVmoRangeLocked(aligned_offset, aligned_len);
+    } else if (op == RangeChangeOp::RemoveWrite) {
+      m.RemoveWriteVmoRangeLocked(aligned_offset, aligned_len);
+    } else {
+      panic("Unknown RangeChangeOp %d\n", static_cast<int>(op));
     }
   }
-  return vm_object->page_source_;
-}
-
-bool VmObjectPaged::IsCowClonableLocked() const {
-  // Copy-on-write clones of pager vmos aren't supported as we can't
-  // efficiently make an immutable snapshot.
-  if (page_source_) {
-    return false;
-  }
-
-  // Copy-on-write clones of slices aren't supported at the moment due to the resulting VMO chains
-  // having non hidden VMOs between hidden VMOs. This case cannot be handled be CloneCowPageLocked
-  // at the moment and so we forbid the construction of such cases for the moment.
-  // Bug: 36841
-  if (is_slice()) {
-    return false;
-  }
-
-  // vmos descended from paged/physical vmos can't be eager cloned.
-  auto parent = parent_.get();
-  while (parent) {
-    if (parent->page_source_) {
-      return false;
-    }
-    AssertHeld(parent->lock_);
-    parent = parent->parent_.get();
-  }
-  return true;
-}
-
-VmObjectPaged* VmObjectPaged::PagedParentOfSliceLocked(uint64_t* offset) {
-  DEBUG_ASSERT(is_slice());
-  VmObjectPaged* cur = this;
-  uint64_t off = 0;
-  while (cur->is_slice()) {
-    AssertHeld(cur->lock_);
-    off += cur->parent_offset_;
-    DEBUG_ASSERT(cur->parent_);
-    cur = cur->parent_.get();
-  }
-  *offset = off;
-  return cur;
 }
 
 bool VmObjectPaged::EvictPage(vm_page_t* page, uint64_t offset) {
-  // Without a page source to bring the page back in we cannot even think about eviction.
-  if (!page_source_) {
-    return false;
-  }
-
   Guard<Mutex> guard{&lock_};
 
-  // Check this page is still a part of this VMO.
-  VmPageOrMarker* page_or_marker = page_list_.Lookup(offset);
-  if (!page_or_marker || !page_or_marker->IsPage() || page_or_marker->Page() != page) {
-    return false;
+  bool evicted = cow_pages_locked()->EvictPageLocked(page, offset);
+
+  if (evicted) {
+    eviction_event_count_++;
+    IncrementHierarchyGenerationCountLocked();
   }
-
-  // Pinned pages could be in use by DMA so we cannot safely evict them.
-  if (page->object.pin_count != 0) {
-    return false;
-  }
-
-  // Remove any mappings to this page before we remove it.
-  RangeChangeUpdateLocked(offset, PAGE_SIZE, RangeChangeOp::Unmap);
-
-  // Use RemovePage over just writing to page_or_marker so that the page list has the opportunity
-  // to release any now empty intermediate nodes.
-  vm_page_t* p = page_list_.RemovePage(offset).ReleasePage();
-  DEBUG_ASSERT(p == page);
-  pmm_page_queues()->Remove(page);
-  eviction_event_count_++;
-  IncrementHierarchyGenerationCountLocked();
-
-  // |page| is now owned by the caller.
-  return true;
-}
-
-bool VmObjectPaged::DebugValidatePageSplitsLocked() const {
-  if (!is_hidden()) {
-    // Nothing to validate on a leaf vmo.
-    return true;
-  }
-  // Assume this is valid until we prove otherwise.
-  bool valid = true;
-  page_list_.ForEveryPage([this, &valid](const VmPageOrMarker* page, uint64_t offset) {
-    if (!page->IsPage()) {
-      return ZX_ERR_NEXT;
-    }
-    vm_page_t* p = page->Page();
-    AssertHeld(this->lock_);
-    // We found a page in the hidden VMO, if it has been forked in either direction then we
-    // expect that if we search down that path we will find that the forked page and that no
-    // descendant can 'see' back to this page.
-    const VmObjectPaged* expected = nullptr;
-    if (p->object.cow_left_split) {
-      expected = &left_child_locked();
-    } else if (p->object.cow_right_split) {
-      expected = &right_child_locked();
-    } else {
-      return ZX_ERR_NEXT;
-    }
-
-    // We know this must be true as this is a hidden vmo and so left_child_locked and
-    // right_child_locked will never have returned null.
-    DEBUG_ASSERT(expected);
-
-    // No leaf VMO in expected should be able to 'see' this page and potentially re-fork it. To
-    // validate this we need to walk the entire sub tree.
-    const VmObjectPaged* cur = expected;
-    uint64_t off = offset;
-    // We start with cur being an immediate child of 'this', so we can preform subtree traversal
-    // until we end up back in 'this'.
-    while (cur != this) {
-      AssertHeld(cur->lock_);
-      // Check that we can see this page in the parent. Importantly this first checks if
-      // |off < cur->parent_offset_| allowing us to safely perform that subtraction from then on.
-      if (off < cur->parent_offset_ || off - cur->parent_offset_ < cur->parent_start_limit_ ||
-          off - cur->parent_offset_ >= cur->parent_limit_) {
-        // This blank case is used to capture the scenario where current does not see the target
-        // offset in the parent, in which case there is no point traversing into the children.
-      } else if (cur->is_hidden()) {
-        // A hidden VMO *may* have the page, but not necessarily if both children forked it out.
-        const VmPageOrMarker* l = cur->page_list_.Lookup(off - cur->parent_offset_);
-        if (!l || l->IsEmpty()) {
-          // Page not found, we need to recurse down into our children.
-          off -= cur->parent_offset_;
-          cur = &cur->left_child_locked();
-          continue;
-        }
-      } else {
-        // We already checked in the first 'if' branch that this offset was visible, and so this
-        // leaf VMO *must* have a page or marker to prevent it 'seeing' the already forked original.
-        const VmPageOrMarker* l = cur->page_list_.Lookup(off - cur->parent_offset_);
-        if (!l || l->IsEmpty()) {
-          printf("Failed to find fork of page %p (off %p) from %p in leaf node %p (off %p)\n", p,
-                 (void*)offset, this, cur, (void*)(off - cur->parent_offset_));
-          cur->DumpLocked(1, true);
-          this->DumpLocked(1, true);
-          valid = false;
-          return ZX_ERR_STOP;
-        }
-      }
-
-      // Find our next node by walking up until we see we have come from a left path, then go right.
-      do {
-        VmObjectPaged* next = cur->parent_.get();
-        AssertHeld(next->lock_);
-        off += next->parent_offset_;
-        if (next == this) {
-          cur = next;
-          break;
-        }
-
-        // If we came from the left, go back down on the right, otherwise just keep going up.
-        if (cur == &next->left_child_locked()) {
-          off -= next->parent_offset_;
-          cur = &next->right_child_locked();
-          break;
-        }
-        cur = next;
-      } while (1);
-    }
-    return ZX_ERR_NEXT;
-  });
-  return valid;
+  return evicted;
 }
commit	08ec46b68186331f5275098ff33fca66dc5e1b36	[log] [tgz]
author	Adrian Danis <adanis@google.com>	Wed Oct 28 00:58:42 2020 +0000
committer	CQ Bot <commit-bot@chromium.org>	Wed Oct 28 00:58:42 2020 +0000
tree	62d7175044f51e4be7e6c251f3c4e88be1e30a8a
parent	de217a80a0c72506770260d3b165410ea0f5daf7 [diff]