| // Copyright 2016 The Fuchsia Authors |
| // |
| // Use of this source code is governed by a MIT-style |
| // license that can be found in the LICENSE file or at |
| // https://opensource.org/licenses/MIT |
| |
| #include "vm/vm_object_paged.h" |
| |
| #include "vm_priv.h" |
| |
| #include <arch/ops.h> |
| #include <assert.h> |
| #include <err.h> |
| #include <fbl/alloc_checker.h> |
| #include <fbl/auto_call.h> |
| #include <inttypes.h> |
| #include <ktl/move.h> |
| #include <lib/console.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <trace.h> |
| #include <vm/bootreserve.h> |
| #include <vm/fault.h> |
| #include <vm/page_source.h> |
| #include <vm/physmap.h> |
| #include <vm/vm.h> |
| #include <vm/vm_address_region.h> |
| #include <zircon/types.h> |
| |
| #define LOCAL_TRACE MAX(VM_GLOBAL_TRACE, 0) |
| |
| namespace { |
| |
| void ZeroPage(paddr_t pa) { |
| void* ptr = paddr_to_physmap(pa); |
| DEBUG_ASSERT(ptr); |
| |
| arch_zero_page(ptr); |
| } |
| |
| void ZeroPage(vm_page_t* p) { |
| paddr_t pa = p->paddr(); |
| ZeroPage(pa); |
| } |
| |
| void InitializeVmPage(vm_page_t* p) { |
| DEBUG_ASSERT(p->state() == VM_PAGE_STATE_ALLOC); |
| p->set_state(VM_PAGE_STATE_OBJECT); |
| p->object.pin_count = 0; |
| } |
| |
| // round up the size to the next page size boundary and make sure we dont wrap |
| zx_status_t RoundSize(uint64_t size, uint64_t* out_size) { |
| *out_size = ROUNDUP_PAGE_SIZE(size); |
| if (*out_size < size) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| |
| // there's a max size to keep indexes within range |
| if (*out_size > VmObjectPaged::MAX_SIZE) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| |
| return ZX_OK; |
| } |
| |
| } // namespace |
| |
| VmObjectPaged::VmObjectPaged( |
| uint32_t options, uint32_t pmm_alloc_flags, uint64_t size, |
| fbl::RefPtr<vm_lock_t> root_lock, fbl::RefPtr<PageSource> page_source) |
| : VmObject(ktl::move(root_lock)), |
| options_(options), |
| size_(size), |
| pmm_alloc_flags_(pmm_alloc_flags), |
| page_source_(ktl::move(page_source)) { |
| LTRACEF("%p\n", this); |
| |
| DEBUG_ASSERT(IS_PAGE_ALIGNED(size_)); |
| |
| // Adding to the global list needs to be done at the end of the ctor, since |
| // calls can be made into this object as soon as it is in that list. |
| AddToGlobalList(); |
| } |
| |
| void VmObjectPaged::InitializeOriginalParentLocked(fbl::RefPtr<VmObject> parent, uint64_t offset) { |
| DEBUG_ASSERT(lock_.lock().IsHeld()); |
| DEBUG_ASSERT(parent_ == nullptr); |
| DEBUG_ASSERT(original_parent_user_id_ == 0); |
| |
| if (parent->is_paged()) { |
| page_list_.InitializeSkew( |
| VmObjectPaged::AsVmObjectPaged(parent)->page_list_.GetSkew(), offset); |
| } |
| |
| original_parent_user_id_ = parent->user_id_locked(); |
| parent_ = ktl::move(parent); |
| } |
| |
| VmObjectPaged::~VmObjectPaged() { |
| canary_.Assert(); |
| |
| LTRACEF("%p\n", this); |
| |
| RemoveFromGlobalList(); |
| |
| if (!is_hidden()) { |
| // If we're not a hidden vmo, then we need to remove ourself from our parent. This needs |
| // to be done before emptying the page list so that a hidden parent can't merge into this |
| // vmo and repopulate the page list. |
| // |
| // To prevent races with a hidden parent merging itself into this vmo, it is necessary |
| // to hold the lock over the parent_ check and into the subsequent removal call. |
| Guard<fbl::Mutex> guard{&lock_}; |
| if (parent_) { |
| LTRACEF("removing ourself from our parent %p\n", parent_.get()); |
| parent_->RemoveChild(this, guard.take()); |
| } |
| } else { |
| // Most of the hidden vmo's state should have already been cleaned up when it merged |
| // itself into its child in ::OnChildRemoved. |
| DEBUG_ASSERT(children_list_len_ == 0); |
| DEBUG_ASSERT(page_list_.IsEmpty()); |
| } |
| |
| page_list_.ForEveryPage( |
| [this](const auto p, uint64_t off) { |
| if (this->is_contiguous()) { |
| p->object.pin_count--; |
| } |
| ASSERT(p->object.pin_count == 0); |
| return ZX_ERR_NEXT; |
| }); |
| |
| // free all of the pages attached to us |
| list_node_t list; |
| list_initialize(&list); |
| page_list_.RemoveAllPages(&list); |
| |
| if (page_source_) { |
| page_source_->Close(); |
| } |
| |
| pmm_free(&list); |
| } |
| |
| zx_status_t VmObjectPaged::CreateCommon(uint32_t pmm_alloc_flags, |
| uint32_t options, |
| uint64_t size, fbl::RefPtr<VmObject>* obj) { |
| // make sure size is page aligned |
| zx_status_t status = RoundSize(size, &size); |
| if (status != ZX_OK) { |
| return status; |
| } |
| |
| fbl::AllocChecker ac; |
| auto lock = fbl::AdoptRef<vm_lock_t>(new (&ac) vm_lock_t); |
| if (!ac.check()) { |
| return ZX_ERR_NO_MEMORY; |
| } |
| |
| auto vmo = fbl::AdoptRef<VmObject>( |
| new (&ac) VmObjectPaged(options, pmm_alloc_flags, size, ktl::move(lock), nullptr)); |
| if (!ac.check()) { |
| return ZX_ERR_NO_MEMORY; |
| } |
| |
| *obj = ktl::move(vmo); |
| |
| return ZX_OK; |
| } |
| |
| zx_status_t VmObjectPaged::Create(uint32_t pmm_alloc_flags, |
| uint32_t options, |
| uint64_t size, fbl::RefPtr<VmObject>* obj) { |
| if (options & kContiguous) { |
| // Force callers to use CreateContiguous() instead. |
| return ZX_ERR_INVALID_ARGS; |
| } |
| |
| return CreateCommon(pmm_alloc_flags, options, size, obj); |
| } |
| |
| zx_status_t VmObjectPaged::CreateContiguous(uint32_t pmm_alloc_flags, uint64_t size, |
| uint8_t alignment_log2, fbl::RefPtr<VmObject>* obj) { |
| DEBUG_ASSERT(alignment_log2 < sizeof(uint64_t) * 8); |
| // make sure size is page aligned |
| zx_status_t status = RoundSize(size, &size); |
| if (status != ZX_OK) { |
| return status; |
| } |
| |
| fbl::RefPtr<VmObject> vmo; |
| status = CreateCommon(pmm_alloc_flags, kContiguous, size, &vmo); |
| if (status != ZX_OK) { |
| return status; |
| } |
| |
| if (size == 0) { |
| *obj = ktl::move(vmo); |
| return ZX_OK; |
| } |
| |
| // allocate the pages |
| list_node page_list; |
| list_initialize(&page_list); |
| |
| size_t num_pages = size / PAGE_SIZE; |
| paddr_t pa; |
| status = pmm_alloc_contiguous(num_pages, pmm_alloc_flags, alignment_log2, &pa, &page_list); |
| if (status != ZX_OK) { |
| LTRACEF("failed to allocate enough pages (asked for %zu)\n", num_pages); |
| return ZX_ERR_NO_MEMORY; |
| } |
| auto cleanup_phys_pages = fbl::MakeAutoCall([&page_list]() { |
| pmm_free(&page_list); |
| }); |
| |
| // add them to the appropriate range of the object |
| VmObjectPaged* vmop = static_cast<VmObjectPaged*>(vmo.get()); |
| for (uint64_t off = 0; off < size; off += PAGE_SIZE) { |
| vm_page_t* p = list_remove_head_type(&page_list, vm_page_t, queue_node); |
| ASSERT(p); |
| |
| InitializeVmPage(p); |
| |
| // TODO: remove once pmm returns zeroed pages |
| ZeroPage(p); |
| |
| // We don't need thread-safety analysis here, since this VMO has not |
| // been shared anywhere yet. |
| [&]() TA_NO_THREAD_SAFETY_ANALYSIS { |
| status = vmop->page_list_.AddPage(p, off); |
| }(); |
| if (status != ZX_OK) { |
| return status; |
| } |
| |
| // Mark the pages as pinned, so they can't be physically rearranged |
| // underneath us. |
| p->object.pin_count++; |
| } |
| |
| cleanup_phys_pages.cancel(); |
| *obj = ktl::move(vmo); |
| return ZX_OK; |
| } |
| |
| zx_status_t VmObjectPaged::CreateFromWiredPages(const void* data, size_t size, bool exclusive, |
| fbl::RefPtr<VmObject>* obj) { |
| LTRACEF("data %p, size %zu\n", data, size); |
| |
| fbl::RefPtr<VmObject> vmo; |
| zx_status_t status = CreateCommon(PMM_ALLOC_FLAG_ANY, 0, size, &vmo); |
| if (status != ZX_OK) { |
| return status; |
| } |
| |
| if (size > 0) { |
| ASSERT(IS_PAGE_ALIGNED(size)); |
| ASSERT(IS_PAGE_ALIGNED(reinterpret_cast<uintptr_t>(data))); |
| |
| // Do a direct lookup of the physical pages backing the range of |
| // the kernel that these addresses belong to and jam them directly |
| // into the VMO. |
| // |
| // NOTE: This relies on the kernel not otherwise owning the pages. |
| // If the setup of the kernel's address space changes so that the |
| // pages are attached to a kernel VMO, this will need to change. |
| |
| paddr_t start_paddr = vaddr_to_paddr(data); |
| ASSERT(start_paddr != 0); |
| |
| for (size_t count = 0; count < size / PAGE_SIZE; count++) { |
| paddr_t pa = start_paddr + count * PAGE_SIZE; |
| vm_page_t* page = paddr_to_vm_page(pa); |
| ASSERT(page); |
| |
| if (page->state() == VM_PAGE_STATE_WIRED) { |
| boot_reserve_unwire_page(page); |
| } else { |
| // This function is only valid for memory in the boot image, |
| // which should all be wired. |
| panic("page used to back static vmo in unusable state: paddr %#" PRIxPTR |
| " state %u\n", pa, page->state()); |
| } |
| InitializeVmPage(page); |
| |
| // XXX hack to work around the ref pointer to the base class |
| auto vmo2 = static_cast<VmObjectPaged*>(vmo.get()); |
| vmo2->AddPage(page, count * PAGE_SIZE); |
| } |
| } |
| |
| if (exclusive && !is_physmap_addr(data)) { |
| // unmap it from the kernel |
| // NOTE: this means the image can no longer be referenced from original pointer |
| status = VmAspace::kernel_aspace()->arch_aspace().Unmap( |
| reinterpret_cast<vaddr_t>(data), size / PAGE_SIZE, nullptr); |
| ASSERT(status == ZX_OK); |
| } |
| |
| *obj = ktl::move(vmo); |
| |
| return ZX_OK; |
| } |
| |
| zx_status_t VmObjectPaged::CreateExternal(fbl::RefPtr<PageSource> src, uint32_t options, |
| uint64_t size, fbl::RefPtr<VmObject>* obj) { |
| // make sure size is page aligned |
| zx_status_t status = RoundSize(size, &size); |
| if (status != ZX_OK) { |
| return status; |
| } |
| |
| fbl::AllocChecker ac; |
| auto lock = fbl::AdoptRef<vm_lock_t>(new (&ac) vm_lock_t); |
| if (!ac.check()) { |
| return ZX_ERR_NO_MEMORY; |
| } |
| |
| auto vmo = fbl::AdoptRef<VmObject>(new (&ac) VmObjectPaged( |
| options, PMM_ALLOC_FLAG_ANY, size, ktl::move(lock), ktl::move(src))); |
| if (!ac.check()) { |
| return ZX_ERR_NO_MEMORY; |
| } |
| |
| *obj = ktl::move(vmo); |
| |
| return ZX_OK; |
| } |
| |
| void VmObjectPaged::InsertHiddenParentLocked(fbl::RefPtr<VmObjectPaged>&& hidden_parent) { |
| // Insert the new VmObject |hidden_parent| between between |this| and |parent_|. |
| if (parent_) { |
| hidden_parent->InitializeOriginalParentLocked(parent_, 0); |
| parent_->ReplaceChildLocked(this, hidden_parent.get()); |
| } |
| hidden_parent->AddChildLocked(this); |
| parent_ = hidden_parent; |
| |
| // We use the user_id to walk the tree looking for the right child observer. This |
| // is set after adding the hidden parent into the tree since that's not really |
| // a 'real' child. |
| hidden_parent->user_id_ = user_id_; |
| |
| // The hidden parent should have the same view as we had into |
| // its parent, and this vmo has a full view into the hidden vmo |
| hidden_parent->parent_offset_ = parent_offset_; |
| hidden_parent->parent_limit_ = parent_limit_; |
| parent_offset_ = 0; |
| parent_limit_ = size_; |
| |
| // Move everything into the hidden parent, for immutability |
| hidden_parent->page_list_ = std::move(page_list_); |
| hidden_parent->size_ = size_; |
| } |
| |
| zx_status_t VmObjectPaged::CreateCowClone(Resizability resizable, CloneType type, |
| uint64_t offset, uint64_t size, |
| bool copy_name, fbl::RefPtr<VmObject>* child_vmo) { |
| LTRACEF("vmo %p offset %#" PRIx64 " size %#" PRIx64 "\n", this, offset, size); |
| |
| canary_.Assert(); |
| |
| // offset must be page aligned |
| if (!IS_PAGE_ALIGNED(offset)) { |
| return ZX_ERR_INVALID_ARGS; |
| } |
| |
| // make sure size is page aligned |
| zx_status_t status = RoundSize(size, &size); |
| if (status != ZX_OK) { |
| return status; |
| } |
| |
| // TODO(stevensd): Add support for bidirectional contiguous clones. |
| if (type == CloneType::Bidirectional && is_contiguous()) { |
| return ZX_ERR_NOT_SUPPORTED; |
| } |
| |
| auto options = resizable == Resizability::Resizable ? kResizable : 0u; |
| // There are two reasons for declaring/allocating the clones outside of the vmo's lock. First, |
| // the dtor might require taking the lock, so we need to ensure that it isn't called until |
| // after the lock is released. Second, diagnostics code makes calls into vmos while holding |
| // the global vmo lock. Since the VmObject ctor takes the global lock, we can't construct |
| // any vmos under any vmo lock. |
| fbl::AllocChecker ac; |
| auto vmo = fbl::AdoptRef<VmObjectPaged>(new (&ac) VmObjectPaged( |
| options, pmm_alloc_flags_, size, lock_ptr_, nullptr)); |
| if (!ac.check()) { |
| return ZX_ERR_NO_MEMORY; |
| } |
| |
| fbl::RefPtr<VmObjectPaged> hidden_parent; |
| if (type == CloneType::Bidirectional) { |
| // To create a bidirectional clone, the kernel creates an artifical parent vmo |
| // called a 'hidden vmo'. The content of the original vmo is moved into the hidden |
| // vmo, and the original vmo becomes a child of the hidden vmo. Then a second child |
| // is created, which is the userspace visible clone. |
| // |
| // Hidden vmos are an implementation detail that are not exposed to userspace. |
| |
| if (!IsBidirectionalClonable()) { |
| return ZX_ERR_NOT_SUPPORTED; |
| } |
| |
| // The initial size is 0. It will be initialized as part of the atomic |
| // insertion into the child tree. |
| hidden_parent = fbl::AdoptRef<VmObjectPaged>(new (&ac) VmObjectPaged( |
| kHidden, pmm_alloc_flags_, 0, lock_ptr_, nullptr)); |
| if (!ac.check()) { |
| return ZX_ERR_NO_MEMORY; |
| } |
| } |
| |
| bool notify_one_child; |
| { |
| Guard<fbl::Mutex> guard{&lock_}; |
| |
| // check that we're not uncached in some way |
| if (cache_policy_ != ARCH_MMU_FLAG_CACHED) { |
| return ZX_ERR_BAD_STATE; |
| } |
| |
| // TODO: ZX-692 make sure that the accumulated parent offset of the entire |
| // parent chain doesn't wrap 64bit space. |
| vmo->parent_offset_ = offset; |
| vmo->parent_limit_ = fbl::min(size, size_ - offset); |
| |
| VmObjectPaged* clone_parent; |
| if (type == CloneType::Bidirectional) { |
| clone_parent = hidden_parent.get(); |
| |
| InsertHiddenParentLocked(ktl::move(hidden_parent)); |
| |
| // Invalidate everything the clone will be able to see. They're COW pages now, |
| // so any existing mappings can no longer directly write to the pages. |
| // TODO: Just change the mappings to RO instead of fully unmapping. |
| RangeChangeUpdateLocked(vmo->parent_offset_, vmo->parent_offset_ + vmo->parent_limit_); |
| } else { |
| clone_parent = this; |
| } |
| |
| vmo->InitializeOriginalParentLocked(fbl::WrapRefPtr(clone_parent), offset); |
| |
| // add the new vmo as a child before we do anything, since its |
| // dtor expects to find it in its parent's child list |
| notify_one_child = clone_parent->AddChildLocked(vmo.get()); |
| |
| if (copy_name) { |
| vmo->name_ = name_; |
| } |
| } |
| |
| if (notify_one_child) { |
| NotifyOneChild(); |
| } |
| |
| *child_vmo = ktl::move(vmo); |
| |
| return ZX_OK; |
| } |
| |
| bool VmObjectPaged::OnChildAddedLocked() { |
| if (!is_hidden()) { |
| return VmObject::OnChildAddedLocked(); |
| } |
| |
| if (user_id_ == ZX_KOID_INVALID) { |
| // The original vmo is added as a child of the hidden vmo before setting |
| // the user id to prevent counting as its own child. |
| return false; |
| } |
| |
| // After initialization, hidden vmos always have two children - the vmo on which |
| // zx_vmo_create_child was invoked and the vmo which that syscall created. |
| DEBUG_ASSERT(children_list_len_ == 2); |
| |
| // We need to proxy the child add to the original vmo so that |
| // it can update it's clone count. |
| return [&]() TA_NO_THREAD_SAFETY_ANALYSIS -> bool { |
| // Reaching into the children confuses analysis |
| for (auto& c : children_list_) { |
| if (c.user_id_ == user_id_) { |
| return c.OnChildAddedLocked(); |
| } |
| } |
| // One of the children should always have a matching user_id. |
| panic("no child with matching user_id: %" PRIx64 "\n", user_id_); |
| }(); |
| } |
| |
| void VmObjectPaged::OnChildRemoved(Guard<fbl::Mutex>&& adopt) { |
| DEBUG_ASSERT(adopt.wraps_lock(lock_ptr_->lock.lock())); |
| Guard<fbl::Mutex> guard{AdoptLock, ktl::move(adopt)}; |
| |
| if (!is_hidden()) { |
| VmObject::OnChildRemoved(guard.take()); |
| return; |
| } |
| |
| if (children_list_len_ == 2) { |
| // If there are multiple eager COW clones of one vmo, we need to proxy clone closure |
| // to the original vmo to update the userspace-visible child count. In this situation, |
| // we use user_id_ to walk the tree to find the original vmo. |
| |
| // A hidden vmo must be fully initialized to have 2 children. |
| DEBUG_ASSERT(user_id_ != ZX_KOID_INVALID); |
| |
| for (auto& c : children_list_) { |
| if (c.user_id_ == user_id_) { |
| c.OnChildRemoved(guard.take()); |
| return; |
| } |
| } |
| return; |
| } |
| |
| // Hidden vmos have at most 2 children, and this function ensures that that OnChildRemoved |
| // isn't called on the last child. So at this point we know that there is exactly one child. |
| DEBUG_ASSERT(children_list_len_ == 1); |
| auto& child = children_list_.front(); |
| |
| const uint64_t merge_start_offset = child.parent_offset_; |
| const uint64_t merge_end_offset = child.parent_offset_ + child.parent_limit_; |
| |
| // Update child's parent limit so that it won't be able to see more of parent_ than |
| // this can see once it gets reparented and has its offset adjusted. |
| if (child.parent_offset_ + child.parent_limit_ < parent_limit_) { |
| // No need to update the limit since child's view is a subset of this's view. |
| // TODO(stevensd): Release ancestor pages which this can see but child can't. |
| } else { |
| // Set the limit so the child won't be able to see more of its new parent |
| // than this hidden vmo was able to see. |
| if (parent_limit_ < child.parent_offset_) { |
| child.parent_limit_ = 0; |
| } else { |
| child.parent_limit_ = parent_limit_ - child.parent_offset_; |
| } |
| } |
| |
| // TODO(stevensd): Release ancestor pages below child.parent_offset_. |
| |
| bool overflow = add_overflow(parent_offset_, child.parent_offset_, &child.parent_offset_); |
| // Overflow here means that something went wrong when setting up parent limits. |
| DEBUG_ASSERT(!overflow); |
| |
| list_node covered_pages; |
| list_initialize(&covered_pages); |
| |
| // Merge our page list into the child page list and update all the necessary metadata. |
| // TODO: This does work proportional to the number of pages in page_list_. Investigate what |
| // would need to be done to make the work proportional to the number of pages actually split. |
| child.page_list_.MergeFrom(page_list_, |
| merge_start_offset, merge_end_offset, |
| [](vm_page* page, uint64_t offset) { |
| // TODO(stevensd): Update per-page metadata here when available |
| }, |
| [](vm_page* page, uint64_t offset) { |
| // TODO(stevensd): Update per-page metadata here when available |
| }, &covered_pages); |
| |
| if (!list_is_empty(&covered_pages)) { |
| pmm_free(&covered_pages); |
| } |
| |
| // The child which removed itself and led to the invocation should have a reference |
| // to us, in addition to child.parent_ which we are about to clear. |
| DEBUG_ASSERT(ref_count_debug() >= 2); |
| |
| // Drop the child from our list, but don't recurse back into this function. Then |
| // remove ourselves from the clone tree. |
| DropChildLocked(&child); |
| if (parent_) { |
| parent_->ReplaceChildLocked(this, &child); |
| } |
| child.parent_ = std::move(parent_); |
| |
| if (child.user_id_ == user_id_) { |
| // Pass the removal notification towards the original vmo. |
| child.OnChildRemoved(guard.take()); |
| } |
| } |
| |
| void VmObjectPaged::Dump(uint depth, bool verbose) { |
| canary_.Assert(); |
| |
| // This can grab our lock. |
| uint64_t parent_id = parent_user_id(); |
| |
| Guard<fbl::Mutex> guard{&lock_}; |
| |
| size_t count = 0; |
| page_list_.ForEveryPage([&count](const auto p, uint64_t) { |
| count++; |
| return ZX_ERR_NEXT; |
| }); |
| |
| for (uint i = 0; i < depth; ++i) { |
| printf(" "); |
| } |
| printf("vmo %p/k%" PRIu64 " size %#" PRIx64 " offset %#" PRIx64 |
| " pages %zu ref %d parent %p/k%" PRIu64 "\n", |
| this, user_id_, size_, parent_offset_, count, |
| ref_count_debug(), parent_.get(), parent_id); |
| |
| if (verbose) { |
| auto f = [depth](const auto p, uint64_t offset) { |
| for (uint i = 0; i < depth + 1; ++i) { |
| printf(" "); |
| } |
| printf("offset %#" PRIx64 " page %p paddr %#" PRIxPTR "\n", offset, p, p->paddr()); |
| return ZX_ERR_NEXT; |
| }; |
| page_list_.ForEveryPage(f); |
| } |
| } |
| |
| size_t VmObjectPaged::AllocatedPagesInRange(uint64_t offset, uint64_t len) const { |
| canary_.Assert(); |
| Guard<fbl::Mutex> guard{&lock_}; |
| return AllocatedPagesInRangeLocked(offset, len); |
| } |
| |
| size_t VmObjectPaged::AllocatedPagesInRangeLocked(uint64_t offset, uint64_t len) const { |
| uint64_t new_len; |
| if (!TrimRange(offset, len, size_, &new_len)) { |
| return 0; |
| } |
| size_t count = 0; |
| // TODO: Figure out what to do with our parent's pages. If we're a clone, |
| // page_list_ only contains pages that we've made copies of. |
| page_list_.ForEveryPage( |
| [&count, offset, new_len](const auto p, uint64_t off) { |
| if (off >= offset && off < offset + new_len) { |
| count++; |
| } |
| return ZX_ERR_NEXT; |
| }); |
| return count; |
| } |
| |
| zx_status_t VmObjectPaged::AddPage(vm_page_t* p, uint64_t offset) { |
| Guard<fbl::Mutex> guard{&lock_}; |
| |
| return AddPageLocked(p, offset); |
| } |
| |
| zx_status_t VmObjectPaged::AddPageLocked(vm_page_t* p, uint64_t offset) { |
| canary_.Assert(); |
| DEBUG_ASSERT(lock_.lock().IsHeld()); |
| |
| LTRACEF("vmo %p, offset %#" PRIx64 ", page %p (%#" PRIxPTR ")\n", this, offset, p, p->paddr()); |
| |
| DEBUG_ASSERT(p); |
| |
| if (offset >= size_) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| |
| zx_status_t err = page_list_.AddPage(p, offset); |
| if (err != ZX_OK) { |
| return err; |
| } |
| |
| // other mappings may have covered this offset into the vmo, so unmap those ranges |
| RangeChangeUpdateLocked(offset, PAGE_SIZE); |
| |
| return ZX_OK; |
| } |
| |
| vm_page_t* VmObjectPaged::FindInitialPageContentLocked(uint64_t offset, uint pf_flags, |
| VmObject** owner_out, |
| uint64_t* owner_offset_out) { |
| DEBUG_ASSERT(page_list_.GetPage(offset) == nullptr); |
| |
| // Search up the clone chain for any committed pages. cur_offset is the offset |
| // into cur we care about. The loop terminates either when that offset contains |
| // a committed page or when that offset can't reach into the parent. |
| vm_page_t* page = nullptr; |
| VmObjectPaged* cur = this; |
| uint64_t cur_offset = offset; |
| while (!page && cur_offset < cur->parent_limit_) { |
| // If there's no parent, then parent_limit_ is 0 and we'll never enter the loop |
| DEBUG_ASSERT(cur->parent_); |
| |
| uint64_t parent_offset; |
| bool overflowed = add_overflow(cur->parent_offset_, cur_offset, &parent_offset); |
| ASSERT(!overflowed); |
| if (parent_offset >= cur->parent_->size()) { |
| // The offset is off the end of the parent, so cur is the VmObject |
| // which will provide the page. |
| break; |
| } |
| |
| if (!cur->parent_->is_paged()) { |
| uint parent_pf_flags = pf_flags & ~VMM_PF_FLAG_WRITE; |
| auto status = cur->parent_->GetPageLocked(parent_offset, parent_pf_flags, |
| nullptr, nullptr, &page, nullptr); |
| // The first if statement should ensure we never make an out-of-range query into a |
| // physical VMO, and physical VMOs will always return a page for all valid offsets. |
| DEBUG_ASSERT(status == ZX_OK); |
| DEBUG_ASSERT(page != nullptr); |
| |
| *owner_out = cur->parent_.get(); |
| *owner_offset_out = parent_offset; |
| return page; |
| } else { |
| cur = VmObjectPaged::AsVmObjectPaged(cur->parent_); |
| cur_offset = parent_offset; |
| page = cur->page_list_.GetPage(parent_offset); |
| } |
| } |
| |
| *owner_out = cur; |
| *owner_offset_out = cur_offset; |
| |
| return page; |
| } |
| |
| // Looks up the page at the requested offset, faulting it in if requested and necessary. If |
| // this VMO has a parent and the requested page isn't found, the parent will be searched. |
| // |
| // |free_list|, if not NULL, is a list of allocated but unused vm_page_t that |
| // this function may allocate from. This function will need at most one entry, |
| // and will not fail if |free_list| is a non-empty list, faulting in was requested, |
| // and offset is in range. |
| zx_status_t VmObjectPaged::GetPageLocked(uint64_t offset, uint pf_flags, list_node* free_list, |
| PageRequest* page_request, |
| vm_page_t** const page_out, paddr_t* const pa_out) { |
| canary_.Assert(); |
| DEBUG_ASSERT(lock_.lock().IsHeld()); |
| |
| if (offset >= size_) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| |
| vm_page_t* p; |
| |
| // see if we already have a page at that offset |
| p = page_list_.GetPage(offset); |
| if (p) { |
| if (page_out) { |
| *page_out = p; |
| } |
| if (pa_out) { |
| *pa_out = p->paddr(); |
| } |
| return ZX_OK; |
| } |
| |
| __UNUSED char pf_string[5]; |
| LTRACEF("vmo %p, offset %#" PRIx64 ", pf_flags %#x (%s)\n", this, offset, pf_flags, |
| vmm_pf_flags_to_string(pf_flags, pf_string)); |
| |
| VmObject* page_owner; |
| uint64_t owner_offset; |
| if (!parent_) { |
| // Avoid the function call in the common case. |
| page_owner = this; |
| owner_offset = offset; |
| } else { |
| p = FindInitialPageContentLocked(offset, pf_flags, &page_owner, &owner_offset); |
| } |
| |
| if (!p) { |
| // If we're not being asked to sw or hw fault in the page, return not found. |
| if ((pf_flags & VMM_PF_FLAG_FAULT_MASK) == 0) { |
| return ZX_ERR_NOT_FOUND; |
| } |
| |
| // Since physical VMOs always provide pages for their full range, we should |
| // never get here for physical VMOs. |
| DEBUG_ASSERT(page_owner->is_paged()); |
| VmObjectPaged* typed_owner = static_cast<VmObjectPaged*>(page_owner); |
| |
| if (typed_owner->page_source_) { |
| zx_status_t status = |
| typed_owner->page_source_->GetPage(owner_offset, page_request, &p, nullptr); |
| // Pager page sources will never synchronously return a page. |
| DEBUG_ASSERT(status != ZX_OK); |
| |
| if (typed_owner != this && status == ZX_ERR_NOT_FOUND) { |
| // The default behavior of clones of detached pager VMOs fault in zero |
| // pages instead of propagating the pager's fault. |
| // TODO(stevensd): Add an arg to zx_vmo_create_child to optionally fault here. |
| p = vm_get_zero_page(); |
| } else { |
| return status; |
| } |
| } else { |
| // If there's no page source, we're using an anonymous page. It's not |
| // necessary to fault a writable page directly into the owning VMO. |
| p = vm_get_zero_page(); |
| } |
| } |
| DEBUG_ASSERT(p); |
| |
| if ((pf_flags & VMM_PF_FLAG_WRITE) == 0) { |
| // If we're read-only faulting, return the page so they can map or read from it directly. |
| if (page_out) { |
| *page_out = p; |
| } |
| if (pa_out) { |
| *pa_out = p->paddr(); |
| } |
| LTRACEF("read only faulting in page %p, pa %#" PRIxPTR " from parent\n", p, p->paddr()); |
| return ZX_OK; |
| } |
| |
| // If we're write faulting, we need to allocate a wriable page into this VMO. |
| vm_page_t* new_p = nullptr; |
| paddr_t new_pa; |
| if (free_list) { |
| new_p = list_remove_head_type(free_list, vm_page, queue_node); |
| if (new_p) { |
| new_pa = new_p->paddr(); |
| } |
| } |
| if (!new_p) { |
| pmm_alloc_page(pmm_alloc_flags_, &new_p, &new_pa); |
| if (!new_p) { |
| return ZX_ERR_NO_MEMORY; |
| } |
| } |
| |
| InitializeVmPage(new_p); |
| |
| void* dst = paddr_to_physmap(new_pa); |
| DEBUG_ASSERT(dst); |
| |
| if (likely(p == vm_get_zero_page())) { |
| // avoid pointless fetches by directly zeroing dst |
| arch_zero_page(dst); |
| |
| // If ARM and not fully cached, clean/invalidate the page after zeroing it. |
| // check doesn't need to be done in the other branch, since that branch is |
| // only hit for clones and clones are always cached. |
| #if ARCH_ARM64 |
| if (cache_policy_ != ARCH_MMU_FLAG_CACHED) { |
| arch_clean_invalidate_cache_range((addr_t) dst, PAGE_SIZE); |
| } |
| #endif |
| } else { |
| // do a direct copy of the two pages |
| const void* src = paddr_to_physmap(p->paddr()); |
| DEBUG_ASSERT(src); |
| memcpy(dst, src, PAGE_SIZE); |
| } |
| |
| // Add the new page and return it. This also is responsible for |
| // unmapping this offset in any children. |
| zx_status_t status = AddPageLocked(new_p, offset); |
| DEBUG_ASSERT(status == ZX_OK); |
| |
| LTRACEF("faulted in page %p, pa %#" PRIxPTR " copied from %p\n", new_p, new_pa, p); |
| |
| if (page_out) { |
| *page_out = new_p; |
| } |
| if (pa_out) { |
| *pa_out = new_pa; |
| } |
| |
| return ZX_OK; |
| } |
| |
| zx_status_t VmObjectPaged::CommitRange(uint64_t offset, uint64_t len) { |
| canary_.Assert(); |
| LTRACEF("offset %#" PRIx64 ", len %#" PRIx64 "\n", offset, len); |
| |
| Guard<fbl::Mutex> guard{&lock_}; |
| |
| // trim the size |
| uint64_t new_len; |
| if (!TrimRange(offset, len, size_, &new_len)) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| |
| // was in range, just zero length |
| if (new_len == 0) { |
| return ZX_OK; |
| } |
| |
| // compute a page aligned end to do our searches in to make sure we cover all the pages |
| uint64_t end = ROUNDUP_PAGE_SIZE(offset + new_len); |
| DEBUG_ASSERT(end > offset); |
| offset = ROUNDDOWN(offset, PAGE_SIZE); |
| |
| fbl::RefPtr<PageSource> root_source = GetRootPageSourceLocked(); |
| |
| // If this vmo has a direct page source, then the source will provide the backing memory. For |
| // children that eventually depend on a page source, we skip preallocating memory to avoid |
| // potentially overallocating pages if something else touches the vmo while we're blocked on the |
| // request. Otherwise we optimize things by preallocating all the pages. |
| list_node page_list; |
| list_initialize(&page_list); |
| if (root_source == nullptr) { |
| // make a pass through the list to find out how many pages we need to allocate |
| size_t count = (end - offset) / PAGE_SIZE; |
| page_list_.ForEveryPageInRange( |
| [&count](const auto p, auto off) { |
| count--; |
| return ZX_ERR_NEXT; |
| }, |
| offset, end); |
| |
| if (count == 0) { |
| return ZX_OK; |
| } |
| |
| zx_status_t status = pmm_alloc_pages(count, pmm_alloc_flags_, &page_list); |
| if (status != ZX_OK) { |
| return status; |
| } |
| } |
| |
| auto list_cleanup = fbl::MakeAutoCall([&page_list]() { |
| if (!list_is_empty(&page_list)) { |
| pmm_free(&page_list); |
| } |
| }); |
| |
| bool retry = false; |
| PageRequest page_request(true); |
| do { |
| if (retry) { |
| // If there was a page request that couldn't be fulfilled, we need wait on the |
| // request and retry the commit. Note that when we retry the loop, offset is |
| // updated past the portion of the vmo that we successfully commited. |
| zx_status_t status = ZX_OK; |
| guard.CallUnlocked([&page_request, &status]() mutable { |
| status = page_request.Wait(); |
| }); |
| if (status != ZX_OK) { |
| return status; |
| } |
| retry = false; |
| |
| // Re-run the range checks, since size_ could have changed while we were blocked. This |
| // is not a failure, since the arguments were valid when the syscall was made. It's as |
| // if the commit was successful but then the pages were thrown away. |
| if (!TrimRange(offset, new_len, size_, &new_len)) { |
| return ZX_OK; |
| } |
| |
| if (new_len == 0) { |
| return ZX_OK; |
| } |
| |
| end = ROUNDUP_PAGE_SIZE(offset + new_len); |
| DEBUG_ASSERT(end > offset); |
| offset = ROUNDDOWN(offset, PAGE_SIZE); |
| } |
| |
| // cur_offset tracks how far we've made page requests, even if they're not done |
| uint64_t cur_offset = offset; |
| // new_offset tracks how far we've successfully committed and is where we'll |
| // restart from if we need to retry the commit |
| uint64_t new_offset = offset; |
| while (cur_offset < end) { |
| // Don't commit if we already have this page |
| vm_page_t* p = page_list_.GetPage(cur_offset); |
| if (!p) { |
| // Check if our parent has the page |
| const uint flags = VMM_PF_FLAG_SW_FAULT | VMM_PF_FLAG_WRITE; |
| zx_status_t res = GetPageLocked(cur_offset, flags, &page_list, |
| &page_request, nullptr, nullptr); |
| if (res == ZX_ERR_NEXT || res == ZX_ERR_SHOULD_WAIT) { |
| // In either case we'll need to wait on the request and retry, but if we get |
| // ZX_ERR_NEXT we keep faulting until we eventually see ZX_ERR_SHOULD_WAIT. |
| retry = true; |
| if (res == ZX_ERR_SHOULD_WAIT) { |
| break; |
| } |
| } else if (res != ZX_OK) { |
| return res; |
| } |
| } |
| |
| cur_offset += PAGE_SIZE; |
| if (!retry) { |
| new_offset = offset; |
| } |
| } |
| |
| // Unmap all of the pages in the range we touched. This may end up unmapping non-present |
| // ranges or unmapping things multiple times, but it's necessary to ensure that we unmap |
| // everything that actually is present before anything else sees it. |
| if (cur_offset - offset) { |
| RangeChangeUpdateLocked(offset, cur_offset - offset); |
| } |
| |
| if (retry && cur_offset == end) { |
| zx_status_t res = root_source->FinalizeRequest(&page_request); |
| if (res != ZX_ERR_SHOULD_WAIT) { |
| return res; |
| } |
| } |
| offset = new_offset; |
| } while (retry); |
| |
| return ZX_OK; |
| } |
| |
| zx_status_t VmObjectPaged::DecommitRange(uint64_t offset, uint64_t len) { |
| canary_.Assert(); |
| LTRACEF("offset %#" PRIx64 ", len %#" PRIx64 "\n", offset, len); |
| |
| if (options_ & kContiguous) { |
| return ZX_ERR_NOT_SUPPORTED; |
| } |
| |
| Guard<fbl::Mutex> guard{&lock_}; |
| |
| // trim the size |
| uint64_t new_len; |
| if (!TrimRange(offset, len, size_, &new_len)) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| |
| // was in range, just zero length |
| if (new_len == 0) { |
| return ZX_OK; |
| } |
| |
| // figure the starting and ending page offset |
| uint64_t start = ROUNDDOWN(offset, PAGE_SIZE); |
| uint64_t end = ROUNDUP_PAGE_SIZE(offset + new_len); |
| DEBUG_ASSERT(end > offset); |
| DEBUG_ASSERT(end > start); |
| uint64_t page_aligned_len = end - start; |
| |
| LTRACEF("start offset %#" PRIx64 ", end %#" PRIx64 ", page_aliged_len %#" PRIx64 "\n", start, end, |
| page_aligned_len); |
| |
| // TODO(teisenbe): Allow decommitting of pages pinned by |
| // CommitRangeContiguous |
| |
| if (AnyPagesPinnedLocked(start, page_aligned_len)) { |
| return ZX_ERR_BAD_STATE; |
| } |
| |
| // unmap all of the pages in this range on all the mapping regions |
| RangeChangeUpdateLocked(start, page_aligned_len); |
| |
| list_node_t list; |
| list_initialize(&list); |
| page_list_.RemovePages(start, end, &list); |
| |
| guard.Release(); |
| |
| pmm_free(&list); |
| |
| return ZX_OK; |
| } |
| |
| zx_status_t VmObjectPaged::Pin(uint64_t offset, uint64_t len) { |
| canary_.Assert(); |
| |
| Guard<fbl::Mutex> guard{&lock_}; |
| return PinLocked(offset, len); |
| } |
| |
| zx_status_t VmObjectPaged::PinLocked(uint64_t offset, uint64_t len) { |
| canary_.Assert(); |
| |
| // verify that the range is within the object |
| if (unlikely(!InRange(offset, len, size_))) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| |
| if (unlikely(len == 0)) { |
| return ZX_OK; |
| } |
| |
| const uint64_t start_page_offset = ROUNDDOWN(offset, PAGE_SIZE); |
| const uint64_t end_page_offset = ROUNDUP(offset + len, PAGE_SIZE); |
| |
| uint64_t pin_range_end = start_page_offset; |
| zx_status_t status = page_list_.ForEveryPageAndGapInRange( |
| [&pin_range_end](const auto p, uint64_t off) { |
| DEBUG_ASSERT(p->state() == VM_PAGE_STATE_OBJECT); |
| if (p->object.pin_count == VM_PAGE_OBJECT_MAX_PIN_COUNT) { |
| return ZX_ERR_UNAVAILABLE; |
| } |
| |
| p->object.pin_count++; |
| pin_range_end = off + PAGE_SIZE; |
| return ZX_ERR_NEXT; |
| }, |
| [](uint64_t gap_start, uint64_t gap_end) { |
| return ZX_ERR_NOT_FOUND; |
| }, |
| start_page_offset, end_page_offset); |
| |
| if (status != ZX_OK) { |
| UnpinLocked(start_page_offset, pin_range_end - start_page_offset); |
| return status; |
| } |
| |
| return ZX_OK; |
| } |
| |
| void VmObjectPaged::Unpin(uint64_t offset, uint64_t len) { |
| Guard<fbl::Mutex> guard{&lock_}; |
| UnpinLocked(offset, len); |
| } |
| |
| void VmObjectPaged::UnpinLocked(uint64_t offset, uint64_t len) { |
| canary_.Assert(); |
| DEBUG_ASSERT(lock_.lock().IsHeld()); |
| |
| // verify that the range is within the object |
| ASSERT(InRange(offset, len, size_)); |
| |
| if (unlikely(len == 0)) { |
| return; |
| } |
| |
| const uint64_t start_page_offset = ROUNDDOWN(offset, PAGE_SIZE); |
| const uint64_t end_page_offset = ROUNDUP(offset + len, PAGE_SIZE); |
| |
| zx_status_t status = page_list_.ForEveryPageAndGapInRange( |
| [](const auto p, uint64_t off) { |
| DEBUG_ASSERT(p->state() == VM_PAGE_STATE_OBJECT); |
| ASSERT(p->object.pin_count > 0); |
| p->object.pin_count--; |
| return ZX_ERR_NEXT; |
| }, |
| [](uint64_t gap_start, uint64_t gap_end) { |
| return ZX_ERR_NOT_FOUND; |
| }, |
| start_page_offset, end_page_offset); |
| ASSERT_MSG(status == ZX_OK, "Tried to unpin an uncommitted page"); |
| return; |
| } |
| |
| bool VmObjectPaged::AnyPagesPinnedLocked(uint64_t offset, size_t len) { |
| canary_.Assert(); |
| DEBUG_ASSERT(lock_.lock().IsHeld()); |
| DEBUG_ASSERT(IS_PAGE_ALIGNED(offset)); |
| DEBUG_ASSERT(IS_PAGE_ALIGNED(len)); |
| |
| const uint64_t start_page_offset = offset; |
| const uint64_t end_page_offset = offset + len; |
| |
| bool found_pinned = false; |
| page_list_.ForEveryPageInRange( |
| [&found_pinned, start_page_offset, end_page_offset](const auto p, uint64_t off) { |
| DEBUG_ASSERT(off >= start_page_offset && off < end_page_offset); |
| if (p->object.pin_count > 0) { |
| found_pinned = true; |
| return ZX_ERR_STOP; |
| } |
| return ZX_ERR_NEXT; |
| }, |
| start_page_offset, end_page_offset); |
| |
| return found_pinned; |
| } |
| |
| zx_status_t VmObjectPaged::Resize(uint64_t s) { |
| canary_.Assert(); |
| |
| LTRACEF("vmo %p, size %" PRIu64 "\n", this, s); |
| |
| if (!(options_ & kResizable)) { |
| return ZX_ERR_UNAVAILABLE; |
| } |
| |
| // round up the size to the next page size boundary and make sure we dont wrap |
| zx_status_t status = RoundSize(s, &s); |
| if (status != ZX_OK) { |
| return status; |
| } |
| |
| Guard<fbl::Mutex> guard{&lock_}; |
| |
| // make sure everything is aligned before we get started |
| DEBUG_ASSERT(IS_PAGE_ALIGNED(size_)); |
| DEBUG_ASSERT(IS_PAGE_ALIGNED(s)); |
| |
| list_node_t free_list; |
| list_initialize(&free_list); |
| |
| // see if we're shrinking or expanding the vmo |
| if (s < size_) { |
| // shrinking |
| uint64_t start = s; |
| uint64_t end = size_; |
| uint64_t len = end - start; |
| |
| // bail if there are any pinned pages in the range we're trimming |
| if (AnyPagesPinnedLocked(start, len)) { |
| return ZX_ERR_BAD_STATE; |
| } |
| |
| // unmap all of the pages in this range on all the mapping regions |
| RangeChangeUpdateLocked(start, len); |
| |
| if (page_source_) { |
| // Tell the page source that any non-resident pages that are now out-of-bounds |
| // were supplied, to ensure that any reads of those pages get woken up. |
| zx_status_t status = page_list_.ForEveryPageAndGapInRange( |
| [](const auto p, uint64_t off) { |
| return ZX_ERR_NEXT; |
| }, |
| [&](uint64_t gap_start, uint64_t gap_end) { |
| page_source_->OnPagesSupplied(gap_start, gap_end); |
| return ZX_ERR_NEXT; |
| }, |
| start, end); |
| DEBUG_ASSERT(status == ZX_OK); |
| } |
| |
| parent_limit_ = fbl::min(parent_limit_, s); |
| |
| page_list_.RemovePages(start, end, &free_list); |
| } else if (s > size_) { |
| // expanding |
| // figure the starting and ending page offset that is affected |
| uint64_t start = size_; |
| uint64_t end = s; |
| uint64_t len = end - start; |
| |
| // inform all our children or mapping that there's new bits |
| RangeChangeUpdateLocked(start, len); |
| } |
| |
| // save bytewise size |
| size_ = s; |
| |
| guard.Release(); |
| pmm_free(&free_list); |
| |
| return ZX_OK; |
| } |
| |
| // perform some sort of copy in/out on a range of the object using a passed in lambda |
| // for the copy routine |
| template <typename T> |
| zx_status_t VmObjectPaged::ReadWriteInternal(uint64_t offset, size_t len, bool write, T copyfunc) { |
| canary_.Assert(); |
| |
| Guard<fbl::Mutex> guard{&lock_}; |
| |
| // are we uncached? abort in this case |
| if (cache_policy_ != ARCH_MMU_FLAG_CACHED) { |
| return ZX_ERR_BAD_STATE; |
| } |
| |
| // Test if in range. If we block on a page request, then it's possible for the |
| // size to change. If that happens, then any out-of-bounds reads will be caught |
| // by GetPageLocked. |
| uint64_t end_offset; |
| if (add_overflow(offset, len, &end_offset) || end_offset > size_) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| |
| // Walk the list of pages and do the read/write. This is performed in |
| // a loop to deal with blocking on asynchronous page requests. |
| uint64_t src_offset = offset; |
| size_t dest_offset = 0; |
| PageRequest page_request; |
| bool need_retry = false; |
| do { |
| if (need_retry) { |
| // If we looped because of an asynchronous page request, block on it |
| // outside the lock and then resume reading/writing. |
| zx_status_t status; |
| guard.CallUnlocked([&status, &page_request]() { |
| status = page_request.Wait(); |
| }); |
| if (status != ZX_OK) { |
| return status; |
| } |
| need_retry = false; |
| } |
| |
| while (len > 0) { |
| size_t page_offset = src_offset % PAGE_SIZE; |
| size_t tocopy = fbl::min(PAGE_SIZE - page_offset, len); |
| |
| // fault in the page |
| paddr_t pa; |
| auto status = GetPageLocked(src_offset, |
| VMM_PF_FLAG_SW_FAULT | (write ? VMM_PF_FLAG_WRITE : 0), |
| nullptr, &page_request, nullptr, &pa); |
| if (status == ZX_ERR_SHOULD_WAIT) { |
| need_retry = true; |
| break; |
| } else if (status != ZX_OK) { |
| return status; |
| } |
| |
| // compute the kernel mapping of this page |
| uint8_t* page_ptr = reinterpret_cast<uint8_t*>(paddr_to_physmap(pa)); |
| |
| // call the copy routine |
| auto err = copyfunc(page_ptr + page_offset, dest_offset, tocopy); |
| if (err < 0) { |
| return err; |
| } |
| |
| src_offset += tocopy; |
| dest_offset += tocopy; |
| len -= tocopy; |
| } |
| } while (need_retry); |
| |
| return ZX_OK; |
| } |
| |
| zx_status_t VmObjectPaged::Read(void* _ptr, uint64_t offset, size_t len) { |
| canary_.Assert(); |
| // test to make sure this is a kernel pointer |
| if (!is_kernel_address(reinterpret_cast<vaddr_t>(_ptr))) { |
| DEBUG_ASSERT_MSG(0, "non kernel pointer passed\n"); |
| return ZX_ERR_INVALID_ARGS; |
| } |
| |
| // read routine that just uses a memcpy |
| uint8_t* ptr = reinterpret_cast<uint8_t*>(_ptr); |
| auto read_routine = [ptr](const void* src, size_t offset, size_t len) -> zx_status_t { |
| memcpy(ptr + offset, src, len); |
| return ZX_OK; |
| }; |
| |
| return ReadWriteInternal(offset, len, false, read_routine); |
| } |
| |
| zx_status_t VmObjectPaged::Write(const void* _ptr, uint64_t offset, size_t len) { |
| canary_.Assert(); |
| // test to make sure this is a kernel pointer |
| if (!is_kernel_address(reinterpret_cast<vaddr_t>(_ptr))) { |
| DEBUG_ASSERT_MSG(0, "non kernel pointer passed\n"); |
| return ZX_ERR_INVALID_ARGS; |
| } |
| |
| // write routine that just uses a memcpy |
| const uint8_t* ptr = reinterpret_cast<const uint8_t*>(_ptr); |
| auto write_routine = [ptr](void* dst, size_t offset, size_t len) -> zx_status_t { |
| memcpy(dst, ptr + offset, len); |
| return ZX_OK; |
| }; |
| |
| return ReadWriteInternal(offset, len, true, write_routine); |
| } |
| |
| zx_status_t VmObjectPaged::Lookup(uint64_t offset, uint64_t len, |
| vmo_lookup_fn_t lookup_fn, void* context) { |
| canary_.Assert(); |
| if (unlikely(len == 0)) { |
| return ZX_ERR_INVALID_ARGS; |
| } |
| |
| Guard<fbl::Mutex> guard{&lock_}; |
| |
| // verify that the range is within the object |
| if (unlikely(!InRange(offset, len, size_))) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| |
| const uint64_t start_page_offset = ROUNDDOWN(offset, PAGE_SIZE); |
| const uint64_t end_page_offset = ROUNDUP(offset + len, PAGE_SIZE); |
| |
| zx_status_t status = page_list_.ForEveryPageAndGapInRange( |
| [lookup_fn, context, start_page_offset](const auto p, uint64_t off) { |
| const size_t index = (off - start_page_offset) / PAGE_SIZE; |
| paddr_t pa = p->paddr(); |
| zx_status_t status = lookup_fn(context, off, index, pa); |
| if (status != ZX_OK) { |
| if (unlikely(status == ZX_ERR_NEXT || status == ZX_ERR_STOP)) { |
| status = ZX_ERR_INTERNAL; |
| } |
| return status; |
| } |
| return ZX_ERR_NEXT; |
| }, |
| [this, lookup_fn, context, start_page_offset](uint64_t gap_start, uint64_t gap_end) { |
| // If some page was missing from our list, run the more expensive |
| // GetPageLocked to see if our parent has it. |
| for (uint64_t off = gap_start; off < gap_end; off += PAGE_SIZE) { |
| |
| paddr_t pa; |
| zx_status_t status = this->GetPageLocked(off, 0, nullptr, nullptr, nullptr, &pa); |
| if (status != ZX_OK) { |
| return ZX_ERR_NO_MEMORY; |
| } |
| const size_t index = (off - start_page_offset) / PAGE_SIZE; |
| status = lookup_fn(context, off, index, pa); |
| if (status != ZX_OK) { |
| if (unlikely(status == ZX_ERR_NEXT || status == ZX_ERR_STOP)) { |
| status = ZX_ERR_INTERNAL; |
| } |
| return status; |
| } |
| } |
| return ZX_ERR_NEXT; |
| }, |
| start_page_offset, end_page_offset); |
| if (status != ZX_OK) { |
| return status; |
| } |
| |
| return ZX_OK; |
| } |
| |
| zx_status_t VmObjectPaged::ReadUser(user_out_ptr<void> ptr, uint64_t offset, size_t len) { |
| canary_.Assert(); |
| |
| // read routine that uses copy_to_user |
| auto read_routine = [ptr](const void* src, size_t offset, size_t len) -> zx_status_t { |
| return ptr.byte_offset(offset).copy_array_to_user(src, len); |
| }; |
| |
| return ReadWriteInternal(offset, len, false, read_routine); |
| } |
| |
| zx_status_t VmObjectPaged::WriteUser(user_in_ptr<const void> ptr, uint64_t offset, size_t len) { |
| canary_.Assert(); |
| |
| // write routine that uses copy_from_user |
| auto write_routine = [ptr](void* dst, size_t offset, size_t len) -> zx_status_t { |
| return ptr.byte_offset(offset).copy_array_from_user(dst, len); |
| }; |
| |
| return ReadWriteInternal(offset, len, true, write_routine); |
| } |
| |
| zx_status_t VmObjectPaged::TakePages(uint64_t offset, uint64_t len, VmPageSpliceList* pages) { |
| Guard<fbl::Mutex> src_guard{&lock_}; |
| uint64_t end; |
| if (add_overflow(offset, len, &end) || size() < end) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| |
| if (AnyPagesPinnedLocked(offset, len) || parent_ || page_source_) { |
| return ZX_ERR_BAD_STATE; |
| } |
| |
| // This is only used by the userpager API, which has significant restrictions on |
| // what sorts of vmos are acceptable. If splice starts being used in more places, |
| // then this restriction might need to be lifted. |
| // TODO: Check that the region is locked once locking is implemented |
| if (mapping_list_len_ || children_list_len_ |
| || AllocatedPagesInRangeLocked(offset , len) != (len / PAGE_SIZE)) { |
| return ZX_ERR_BAD_STATE; |
| } |
| |
| *pages = page_list_.TakePages(offset, len); |
| |
| return ZX_OK; |
| } |
| |
| zx_status_t VmObjectPaged::SupplyPages(uint64_t offset, uint64_t len, VmPageSpliceList* pages) { |
| Guard<fbl::Mutex> guard{&lock_}; |
| ASSERT(page_source_); |
| |
| uint64_t end; |
| if (add_overflow(offset, len, &end) || size() < end) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| |
| list_node free_list; |
| list_initialize(&free_list); |
| |
| // [new_pages_start, new_pages_start + new_pages_len) tracks the current run of |
| // consecutive new pages added to this vmo. |
| uint64_t new_pages_start = offset; |
| uint64_t new_pages_len = 0; |
| zx_status_t status = ZX_OK; |
| while (!pages->IsDone()) { |
| vm_page* src_page = pages->Pop(); |
| status = AddPageLocked(src_page, offset); |
| if (status == ZX_OK) { |
| new_pages_len += PAGE_SIZE; |
| } else { |
| list_add_tail(&free_list, &src_page->queue_node); |
| |
| if (likely(status == ZX_ERR_ALREADY_EXISTS)) { |
| status = ZX_OK; |
| |
| // We hit the end of a run of absent pages, so notify the pager source |
| // of any new pages that were added and reset the tracking variables. |
| if (new_pages_len) { |
| page_source_->OnPagesSupplied(new_pages_start, new_pages_len); |
| } |
| new_pages_start = offset + PAGE_SIZE; |
| new_pages_len = 0; |
| } else { |
| break; |
| } |
| } |
| offset += PAGE_SIZE; |
| |
| DEBUG_ASSERT(new_pages_start + new_pages_len <= end); |
| } |
| if (new_pages_len) { |
| page_source_->OnPagesSupplied(new_pages_start, new_pages_len); |
| } |
| |
| if (!list_is_empty(&free_list)) { |
| pmm_free(&free_list); |
| } |
| |
| return status; |
| } |
| |
| uint32_t VmObjectPaged::GetMappingCachePolicy() const { |
| Guard<fbl::Mutex> guard{&lock_}; |
| |
| return cache_policy_; |
| } |
| |
| zx_status_t VmObjectPaged::SetMappingCachePolicy(const uint32_t cache_policy) { |
| // Is it a valid cache flag? |
| if (cache_policy & ~ZX_CACHE_POLICY_MASK) { |
| return ZX_ERR_INVALID_ARGS; |
| } |
| |
| Guard<fbl::Mutex> guard{&lock_}; |
| |
| // conditions for allowing the cache policy to be set: |
| // 1) vmo has no pages committed currently |
| // 2) vmo has no mappings |
| // 3) vmo has no children |
| // 4) vmo is not a child |
| if (!page_list_.IsEmpty()) { |
| return ZX_ERR_BAD_STATE; |
| } |
| if (!mapping_list_.is_empty()) { |
| return ZX_ERR_BAD_STATE; |
| } |
| if (!children_list_.is_empty()) { |
| return ZX_ERR_BAD_STATE; |
| } |
| if (parent_) { |
| return ZX_ERR_BAD_STATE; |
| } |
| |
| cache_policy_ = cache_policy; |
| |
| return ZX_OK; |
| } |
| |
| void VmObjectPaged::RangeChangeUpdateFromParentLocked(const uint64_t offset, const uint64_t len) { |
| canary_.Assert(); |
| |
| LTRACEF("offset %#" PRIx64 " len %#" PRIx64 " p_offset %#" PRIx64 " size_ %#" PRIx64 "\n", |
| offset, len, parent_offset_, size_); |
| |
| // our parent is notifying that a range of theirs changed, see where it intersects |
| // with our offset into the parent and pass it on |
| uint64_t offset_new; |
| uint64_t len_new; |
| if (!GetIntersect(parent_offset_, size_, offset, len, |
| &offset_new, &len_new)) { |
| return; |
| } |
| |
| // if they intersect with us, then by definition the new offset must be >= parent_offset_ |
| DEBUG_ASSERT(offset_new >= parent_offset_); |
| |
| // subtract our offset |
| offset_new -= parent_offset_; |
| |
| // verify that it's still within range of us |
| DEBUG_ASSERT(offset_new + len_new <= size_); |
| |
| LTRACEF("new offset %#" PRIx64 " new len %#" PRIx64 "\n", |
| offset_new, len_new); |
| |
| // pass it on |
| // TODO: optimize by not passing on ranges that are completely covered by pages local to this vmo |
| RangeChangeUpdateLocked(offset_new, len_new); |
| } |
| |
| fbl::RefPtr<PageSource> VmObjectPaged::GetRootPageSourceLocked() const { |
| auto vm_object = this; |
| while (vm_object->parent_) { |
| vm_object = VmObjectPaged::AsVmObjectPaged(vm_object->parent_); |
| if (!vm_object) { |
| return nullptr; |
| } |
| } |
| return vm_object->page_source_; |
| } |
| |
| bool VmObjectPaged::IsBidirectionalClonable() const { |
| Guard<fbl::Mutex> guard{&lock_}; |
| |
| // Bidirectional clones of pager vmos aren't supported as we can't |
| // efficiently make an immutable snapshot. |
| if (page_source_) { |
| return false; |
| } |
| |
| // vmos descended from paged/physical vmos can't be eager cloned. |
| auto parent = parent_; |
| while (parent) { |
| auto p = VmObjectPaged::AsVmObjectPaged(parent); |
| if (!p || p->page_source_) { |
| return false; |
| } |
| parent = p->parent_; |
| } |
| return true; |
| } |