| // Copyright 2016 The Fuchsia Authors |
| // |
| // Use of this source code is governed by a MIT-style |
| // license that can be found in the LICENSE file or at |
| // https://opensource.org/licenses/MIT |
| #include "vm/vm_object_paged.h" |
| |
| #include <align.h> |
| #include <assert.h> |
| #include <inttypes.h> |
| #include <lib/console.h> |
| #include <lib/counters.h> |
| #include <lib/fit/defer.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <trace.h> |
| #include <zircon/compiler.h> |
| #include <zircon/errors.h> |
| #include <zircon/types.h> |
| |
| #include <arch/ops.h> |
| #include <fbl/alloc_checker.h> |
| #include <ktl/algorithm.h> |
| #include <ktl/array.h> |
| #include <ktl/utility.h> |
| #include <vm/discardable_vmo_tracker.h> |
| #include <vm/fault.h> |
| #include <vm/page_source.h> |
| #include <vm/physical_page_provider.h> |
| #include <vm/physmap.h> |
| #include <vm/vm.h> |
| #include <vm/vm_address_region.h> |
| #include <vm/vm_cow_pages.h> |
| |
| #include "vm_priv.h" |
| |
| #include <ktl/enforce.h> |
| |
| #define LOCAL_TRACE VM_GLOBAL_TRACE(0) |
| |
| namespace { |
| |
| KCOUNTER(vmo_attribution_queries, "vm.attributed_memory.object.queries") |
| |
| } // namespace |
| |
| VmObjectPaged::VmObjectPaged(uint32_t options, fbl::RefPtr<VmCowPages> cow_pages, VmCowRange range) |
| : VmObject(options | kPaged), cow_pages_(ktl::move(cow_pages)), cow_range_(range) { |
| LTRACEF("%p\n", this); |
| } |
| |
| VmObjectPaged::VmObjectPaged(uint32_t options, fbl::RefPtr<VmCowPages> cow_pages) |
| : VmObjectPaged(options, ktl::move(cow_pages), VmCowRange(0, UINT64_MAX)) {} |
| |
| VmObjectPaged::~VmObjectPaged() { |
| canary_.Assert(); |
| |
| LTRACEF("%p\n", this); |
| |
| // VmObjectPaged initialize must always complete and is not allowed to fail, as such it should |
| // always end up in the global list. |
| DEBUG_ASSERT(InGlobalList()); |
| |
| DestructorHelper(); |
| } |
| |
| void VmObjectPaged::DestructorHelper() { |
| RemoveFromGlobalList(); |
| |
| if (options_ & kAlwaysPinned) { |
| Unpin(0, size()); |
| } |
| |
| fbl::RefPtr<VmCowPages> deferred; |
| { |
| Guard<CriticalMutex> guard{lock()}; |
| |
| // Only clear the backlink if we are not a reference. A reference does not "own" the VmCowPages, |
| // so in the typical case, the VmCowPages will not have its backlink set to a reference. There |
| // does exist an edge case where the backlink can be a reference, which is handled by the else |
| // block below. |
| if (!is_reference()) { |
| cow_pages_locked()->set_paged_backlink_locked(nullptr); |
| } else { |
| // If this is a reference, we need to remove it from the original (parent) VMO's reference |
| // list. |
| VmObjectPaged* root_ref = cow_pages_locked()->get_paged_backlink_locked(); |
| // The VmCowPages will have a valid backlink, either to the original VmObjectPaged or a |
| // reference VmObjectPaged, as long as there is a reference that is alive. We know that this |
| // is a reference. |
| DEBUG_ASSERT(root_ref); |
| if (likely(root_ref != this)) { |
| AssertHeld(root_ref->lock_ref()); |
| VmObjectPaged* removed = root_ref->reference_list_.erase(*this); |
| DEBUG_ASSERT(removed == this); |
| } else { |
| // It is possible for the backlink to point to |this| if the original parent went away at |
| // some point and the rest of the reference list had to be re-homed to |this|, and the |
| // backlink set to |this|. The VmCowPages was pointing to us, so clear the backlink. The |
| // backlink will get reset below if other references remain. |
| cow_pages_locked()->set_paged_backlink_locked(nullptr); |
| } |
| } |
| |
| // If this VMO had references, pick one of the references as the paged backlink from the shared |
| // VmCowPages. Also, move the remainder of the reference list to the chosen reference. Note that |
| // we're only moving the reference list over without adding the references to the children list; |
| // we do not want these references to be counted as children of the chosen VMO. We simply want a |
| // safe way to propagate mapping updates and VmCowPages changes on hidden node addition. |
| if (!reference_list_.is_empty()) { |
| // We should only be attempting to reset the backlink if the owner is going away and has reset |
| // the backlink above. |
| DEBUG_ASSERT(cow_pages_locked()->get_paged_backlink_locked() == nullptr); |
| VmObjectPaged* paged_backlink = reference_list_.pop_front(); |
| cow_pages_locked()->set_paged_backlink_locked(paged_backlink); |
| AssertHeld(paged_backlink->lock_ref()); |
| paged_backlink->reference_list_.splice(paged_backlink->reference_list_.end(), |
| reference_list_); |
| } |
| DEBUG_ASSERT(reference_list_.is_empty()); |
| deferred = cow_pages_; |
| } |
| while (deferred) { |
| deferred = deferred->MaybeDeadTransition(); |
| } |
| |
| fbl::RefPtr<VmObjectPaged> maybe_parent; |
| |
| // Re-home all our children with any parent that we have. |
| { |
| Guard<CriticalMutex> child_guard{ChildListLock::Get()}; |
| while (!children_list_.is_empty()) { |
| VmObject* c = &children_list_.front(); |
| children_list_.pop_front(); |
| VmObjectPaged* child = reinterpret_cast<VmObjectPaged*>(c); |
| child->parent_ = parent_; |
| if (parent_) { |
| // Ignore the return since 'this' is a child so we know we are not transitioning from 0->1 |
| // children. |
| [[maybe_unused]] bool notify = parent_->AddChildLocked(child); |
| DEBUG_ASSERT(!notify); |
| } |
| } |
| |
| if (parent_) { |
| // As parent_ is a raw pointer we must ensure that if we call a method on it that it lives |
| // long enough. To do so we attempt to upgrade it to a refptr, which could fail if it's |
| // already slated for deletion. |
| maybe_parent = fbl::MakeRefPtrUpgradeFromRaw(parent_, child_guard); |
| if (maybe_parent) { |
| // Holding refptr, can safely pass in the guard to RemoveChild. |
| parent_->RemoveChild(this, child_guard.take()); |
| } else { |
| // parent is up for deletion and so there's no need to use RemoveChild since there is no |
| // user dispatcher to notify anyway and so just drop ourselves to keep the hierarchy |
| // correct. |
| parent_->DropChildLocked(this); |
| } |
| } |
| } |
| if (maybe_parent) { |
| // As we constructed a RefPtr to our parent, and we are in our own destructor, there is now |
| // the potential for recursive destruction if we need to delete the parent due to holding the |
| // last ref, hit this same path, etc. |
| VmDeferredDeleter<VmObjectPaged>::DoDeferredDelete(ktl::move(maybe_parent)); |
| } |
| } |
| |
| zx_status_t VmObjectPaged::HintRange(uint64_t offset, uint64_t len, EvictionHint hint) { |
| canary_.Assert(); |
| |
| if (can_block_on_page_requests() && hint == EvictionHint::AlwaysNeed) { |
| lockdep::AssertNoLocksHeld(); |
| } |
| |
| // Ignore hints for non user-pager-backed VMOs. We choose to silently ignore hints for |
| // incompatible combinations instead of failing. This is because the kernel does not make any |
| // explicit guarantees on hints; since they are just hints, the kernel is always free to ignore |
| // them. |
| if (!cow_pages_->can_root_source_evict()) { |
| return ZX_OK; |
| } |
| |
| auto cow_range = GetCowRange(offset, len); |
| if (!cow_range) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| |
| switch (hint) { |
| case EvictionHint::DontNeed: { |
| return cow_pages_->PromoteRangeForReclamation(*cow_range); |
| } |
| case EvictionHint::AlwaysNeed: { |
| // Hints are best effort, so ignore any errors in the paging in process. |
| return cow_pages_->ProtectRangeFromReclamation(*cow_range, /*set_always_need=*/true, |
| /*ignore_errors=*/true); |
| } |
| } |
| |
| return ZX_OK; |
| } |
| |
| zx_status_t VmObjectPaged::PrefetchRange(uint64_t offset, uint64_t len) { |
| canary_.Assert(); |
| if (can_block_on_page_requests()) { |
| lockdep::AssertNoLocksHeld(); |
| } |
| |
| // Round offset and len to be page aligned. Use a sub-scope to validate that temporary end |
| // calculations cannot be accidentally used later on. |
| { |
| uint64_t end; |
| if (add_overflow(offset, len, &end)) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| const uint64_t end_page = ROUNDUP_PAGE_SIZE(end); |
| if (end_page < end) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| DEBUG_ASSERT(end_page >= offset); |
| offset = ROUNDDOWN_PAGE_SIZE(offset); |
| len = end_page - offset; |
| } |
| |
| auto cow_range = GetCowRange(offset, len); |
| if (!cow_range) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| // Cannot overflow otherwise IsBoundedBy would have failed. |
| DEBUG_ASSERT(cow_range->is_page_aligned()); |
| if (cow_pages_->is_root_source_user_pager_backed()) { |
| return cow_pages_->ProtectRangeFromReclamation(*cow_range, |
| /*set_always_need=*/false, |
| /*ignore_errors=*/false); |
| } |
| // Committing high priority pages is best effort, so ignore any errors from decompressing. |
| return cow_pages_->DecompressInRange(*cow_range); |
| } |
| |
| void VmObjectPaged::CommitHighPriorityPages(uint64_t offset, uint64_t len) { |
| { |
| Guard<CriticalMutex> guard{lock()}; |
| if (!cow_pages_locked()->is_high_memory_priority_locked()) { |
| return; |
| } |
| } |
| // Ignore the result of the prefetch, high priority commit is best effort. |
| PrefetchRange(offset, len); |
| } |
| |
| bool VmObjectPaged::CanDedupZeroPagesLocked() { |
| canary_.Assert(); |
| |
| // Skip uncached VMOs as we cannot efficiently scan them. |
| if ((self_locked()->GetMappingCachePolicyLocked() & ZX_CACHE_POLICY_MASK) != |
| ZX_CACHE_POLICY_CACHED) { |
| return false; |
| } |
| |
| // Okay to dedup from this VMO. |
| return true; |
| } |
| |
| zx_status_t VmObjectPaged::CreateCommon(uint32_t pmm_alloc_flags, uint32_t options, uint64_t size, |
| fbl::RefPtr<VmObjectPaged>* obj) { |
| DEBUG_ASSERT(!(options & (kContiguous | kCanBlockOnPageRequests))); |
| |
| // Cannot be resizable and pinned, otherwise we will lose track of the pinned range. |
| if ((options & kResizable) && (options & kAlwaysPinned)) { |
| return ZX_ERR_INVALID_ARGS; |
| } |
| |
| if (pmm_alloc_flags & PMM_ALLOC_FLAG_CAN_WAIT) { |
| options |= kCanBlockOnPageRequests; |
| } |
| |
| // make sure size is page aligned |
| if (!IS_PAGE_ROUNDED(size)) { |
| return ZX_ERR_INVALID_ARGS; |
| } |
| if (size > MAX_SIZE) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| |
| fbl::AllocChecker ac; |
| |
| ktl::unique_ptr<DiscardableVmoTracker> discardable = nullptr; |
| if (options & kDiscardable) { |
| discardable = ktl::make_unique<DiscardableVmoTracker>(&ac); |
| if (!ac.check()) { |
| return ZX_ERR_NO_MEMORY; |
| } |
| } |
| |
| // This function isn't used to create slices or pager-backed VMOs, so VmCowPageOptions can be |
| // kNone. |
| fbl::RefPtr<VmCowPages> cow_pages; |
| zx_status_t status = VmCowPages::Create(VmCowPagesOptions::kNone, pmm_alloc_flags, size, |
| ktl::move(discardable), &cow_pages); |
| if (status != ZX_OK) { |
| return status; |
| } |
| |
| // If this VMO will always be pinned, allocate and pin the pages in the VmCowPages prior to |
| // creating the VmObjectPaged. This ensures the VmObjectPaged destructor can assume that the pages |
| // are committed and pinned. |
| if (options & kAlwaysPinned) { |
| list_node_t prealloc_pages; |
| list_initialize(&prealloc_pages); |
| status = pmm_alloc_pages(size / PAGE_SIZE, pmm_alloc_flags, &prealloc_pages); |
| if (status != ZX_OK) { |
| return status; |
| } |
| Guard<CriticalMutex> guard{cow_pages->lock()}; |
| // Add all the preallocated pages to the object, this takes ownership of all pages regardless |
| // of the outcome. This is a new VMO, but this call could fail due to OOM. |
| status = cow_pages->AddNewPagesLocked(0, &prealloc_pages, VmCowPages::CanOverwriteContent::Zero, |
| true, nullptr); |
| if (status != ZX_OK) { |
| return status; |
| } |
| // With all the pages in place, pin them. |
| status = cow_pages->PinRangeLocked(VmCowRange(0, size)); |
| ASSERT(status == ZX_OK); |
| } |
| |
| auto vmo = fbl::AdoptRef<VmObjectPaged>(new (&ac) VmObjectPaged(options, ktl::move(cow_pages))); |
| if (!ac.check()) { |
| if (options & kAlwaysPinned) { |
| Guard<CriticalMutex> guard{cow_pages->lock()}; |
| cow_pages->UnpinLocked(VmCowRange(0, size), nullptr); |
| } |
| return ZX_ERR_NO_MEMORY; |
| } |
| |
| // This creation has succeeded. Must wire up the cow pages and *then* place in the globals list. |
| { |
| Guard<CriticalMutex> guard{vmo->lock()}; |
| vmo->cow_pages_locked()->set_paged_backlink_locked(vmo.get()); |
| vmo->cow_pages_locked()->TransitionToAliveLocked(); |
| } |
| vmo->AddToGlobalList(); |
| |
| *obj = ktl::move(vmo); |
| |
| return ZX_OK; |
| } |
| |
| zx_status_t VmObjectPaged::Create(uint32_t pmm_alloc_flags, uint32_t options, uint64_t size, |
| fbl::RefPtr<VmObjectPaged>* obj) { |
| if (options & (kContiguous | kCanBlockOnPageRequests)) { |
| // Force callers to use CreateContiguous() instead. |
| return ZX_ERR_INVALID_ARGS; |
| } |
| |
| return CreateCommon(pmm_alloc_flags, options, size, obj); |
| } |
| |
| zx_status_t VmObjectPaged::CreateContiguous(uint32_t pmm_alloc_flags, uint64_t size, |
| uint8_t alignment_log2, |
| fbl::RefPtr<VmObjectPaged>* obj) { |
| DEBUG_ASSERT(alignment_log2 < sizeof(uint64_t) * 8); |
| // make sure size is page aligned |
| if (!IS_PAGE_ROUNDED(size)) { |
| return ZX_ERR_INVALID_ARGS; |
| } |
| if (size > MAX_SIZE) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| |
| fbl::AllocChecker ac; |
| // For contiguous VMOs, we need a PhysicalPageProvider to reclaim specific loaned physical pages |
| // on commit. |
| auto page_provider = fbl::AdoptRef(new (&ac) PhysicalPageProvider(size)); |
| if (!ac.check()) { |
| return ZX_ERR_NO_MEMORY; |
| } |
| PhysicalPageProvider* physical_page_provider_ptr = page_provider.get(); |
| fbl::RefPtr<PageSource> page_source = |
| fbl::AdoptRef(new (&ac) PageSource(ktl::move(page_provider))); |
| if (!ac.check()) { |
| return ZX_ERR_NO_MEMORY; |
| } |
| auto* page_source_ptr = page_source.get(); |
| |
| fbl::RefPtr<VmObjectPaged> vmo; |
| zx_status_t status = |
| CreateWithSourceCommon(page_source, pmm_alloc_flags, kContiguous, size, &vmo); |
| if (status != ZX_OK) { |
| // Ensure to close the page source we created, as it will not get closed by the VmCowPages since |
| // that creation failed. |
| page_source->Close(); |
| return status; |
| } |
| |
| if (size == 0) { |
| *obj = ktl::move(vmo); |
| return ZX_OK; |
| } |
| |
| // allocate the pages |
| list_node page_list; |
| list_initialize(&page_list); |
| |
| size_t num_pages = size / PAGE_SIZE; |
| paddr_t pa; |
| status = pmm_alloc_contiguous(num_pages, pmm_alloc_flags, alignment_log2, &pa, &page_list); |
| if (status != ZX_OK) { |
| LTRACEF("failed to allocate enough pages (asked for %zu)\n", num_pages); |
| return ZX_ERR_NO_MEMORY; |
| } |
| Guard<CriticalMutex> guard{vmo->lock()}; |
| // Add them to the appropriate range of the object, this takes ownership of all the pages |
| // regardless of outcome. |
| // This is a newly created VMO with a page source, so we don't expect to be overwriting anything |
| // in its page list. |
| status = vmo->cow_pages_locked()->AddNewPagesLocked( |
| 0, &page_list, VmCowPages::CanOverwriteContent::None, true, nullptr); |
| if (status != ZX_OK) { |
| return status; |
| } |
| |
| physical_page_provider_ptr->Init(vmo->cow_pages_locked(), page_source_ptr, pa); |
| |
| *obj = ktl::move(vmo); |
| return ZX_OK; |
| } |
| |
| zx_status_t VmObjectPaged::CreateFromWiredPages(const void* data, size_t size, bool exclusive, |
| fbl::RefPtr<VmObjectPaged>* obj) { |
| LTRACEF("data %p, size %zu\n", data, size); |
| |
| fbl::RefPtr<VmObjectPaged> vmo; |
| zx_status_t status = CreateCommon(PMM_ALLOC_FLAG_ANY, 0, size, &vmo); |
| if (status != ZX_OK) { |
| return status; |
| } |
| |
| if (size > 0) { |
| ASSERT(IS_PAGE_ROUNDED(size)); |
| ASSERT(IS_PAGE_ROUNDED(reinterpret_cast<uintptr_t>(data))); |
| |
| // Do a direct lookup of the physical pages backing the range of |
| // the kernel that these addresses belong to and jam them directly |
| // into the VMO. |
| // |
| // NOTE: This relies on the kernel not otherwise owning the pages. |
| // If the setup of the kernel's address space changes so that the |
| // pages are attached to a kernel VMO, this will need to change. |
| |
| paddr_t start_paddr = vaddr_to_paddr(data); |
| ASSERT(start_paddr != 0); |
| |
| Guard<CriticalMutex> guard{vmo->lock()}; |
| |
| for (size_t count = 0; count < size / PAGE_SIZE; count++) { |
| paddr_t pa = start_paddr + count * PAGE_SIZE; |
| vm_page_t* page = paddr_to_vm_page(pa); |
| ASSERT(page); |
| |
| if (page->state() == vm_page_state::WIRED) { |
| pmm_unwire_page(page); |
| } else { |
| // This function is only valid for memory in the boot image, |
| // which should all be wired. |
| panic("page used to back static vmo in unusable state: paddr %#" PRIxPTR " state %zu\n", pa, |
| VmPageStateIndex(page->state())); |
| } |
| // This is a newly created anonymous VMO, so we expect to be overwriting zeros. A newly |
| // created anonymous VMO with no committed pages has all its content implicitly zero. |
| status = vmo->cow_pages_locked()->AddNewPageLocked( |
| count * PAGE_SIZE, page, VmCowPages::CanOverwriteContent::Zero, nullptr, false, nullptr); |
| ASSERT_MSG(status == ZX_OK, |
| "AddNewPageLocked failed on page %zu of %zu at %#" PRIx64 " from [%#" PRIx64 |
| ", %#" PRIx64 ")", |
| count, size / PAGE_SIZE, pa, start_paddr, start_paddr + size); |
| DEBUG_ASSERT(!page->is_loaned()); |
| } |
| |
| if (exclusive && !is_physmap_addr(data)) { |
| // unmap it from the kernel |
| // NOTE: this means the image can no longer be referenced from original pointer |
| status = VmAspace::kernel_aspace()->arch_aspace().Unmap( |
| reinterpret_cast<vaddr_t>(data), size / PAGE_SIZE, |
| ArchVmAspaceInterface::ArchUnmapOptions::None); |
| ASSERT(status == ZX_OK); |
| } |
| if (!exclusive) { |
| // Pin all the pages as we must never decommit any of them since they are shared elsewhere. |
| ASSERT(vmo->cow_range_.offset == 0); |
| status = vmo->cow_pages_locked()->PinRangeLocked(VmCowRange(0, size)); |
| ASSERT(status == ZX_OK); |
| } |
| } |
| |
| *obj = ktl::move(vmo); |
| |
| return ZX_OK; |
| } |
| |
| zx_status_t VmObjectPaged::CreateExternal(fbl::RefPtr<PageSource> src, uint32_t options, |
| uint64_t size, fbl::RefPtr<VmObjectPaged>* obj) { |
| if (options & (kDiscardable | kCanBlockOnPageRequests | kAlwaysPinned)) { |
| return ZX_ERR_INVALID_ARGS; |
| } |
| |
| // make sure size is page aligned |
| if (!IS_PAGE_ROUNDED(size)) { |
| return ZX_ERR_INVALID_ARGS; |
| } |
| if (size > MAX_SIZE) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| |
| // External VMOs always support delayed PMM allocations, since they already have to tolerate |
| // arbitrary waits for pages due to the PageSource. |
| return CreateWithSourceCommon(ktl::move(src), PMM_ALLOC_FLAG_ANY | PMM_ALLOC_FLAG_CAN_WAIT, |
| options | kCanBlockOnPageRequests, size, obj); |
| } |
| |
| zx_status_t VmObjectPaged::CreateWithSourceCommon(fbl::RefPtr<PageSource> src, |
| uint32_t pmm_alloc_flags, uint32_t options, |
| uint64_t size, fbl::RefPtr<VmObjectPaged>* obj) { |
| // Caller must check that size is page aligned. |
| DEBUG_ASSERT(IS_PAGE_ROUNDED(size)); |
| DEBUG_ASSERT(!(options & kAlwaysPinned)); |
| |
| fbl::AllocChecker ac; |
| |
| // The cow pages will have a page source, so blocking is always possible. |
| options |= kCanBlockOnPageRequests; |
| |
| VmCowPagesOptions cow_options = VmCowPagesOptions::kNone; |
| cow_options |= VmCowPagesOptions::kPageSourceRoot; |
| |
| if (options & kContiguous) { |
| cow_options |= VmCowPagesOptions::kCannotDecommitZeroPages; |
| } |
| |
| if (src->properties().is_user_pager) { |
| cow_options |= VmCowPagesOptions::kUserPagerBackedRoot; |
| } |
| |
| if (src->properties().is_preserving_page_content) { |
| cow_options |= VmCowPagesOptions::kPreservingPageContentRoot; |
| } |
| |
| fbl::RefPtr<VmCowPages> cow_pages; |
| zx_status_t status = VmCowPages::CreateExternal(ktl::move(src), cow_options, size, &cow_pages); |
| if (status != ZX_OK) { |
| return status; |
| } |
| |
| auto vmo = fbl::AdoptRef<VmObjectPaged>(new (&ac) VmObjectPaged(options, ktl::move(cow_pages))); |
| if (!ac.check()) { |
| return ZX_ERR_NO_MEMORY; |
| } |
| |
| // This creation has succeeded. Must wire up the cow pages and *then* place in the globals list. |
| { |
| Guard<CriticalMutex> guard{vmo->lock()}; |
| vmo->cow_pages_locked()->set_paged_backlink_locked(vmo.get()); |
| vmo->cow_pages_locked()->TransitionToAliveLocked(); |
| } |
| vmo->AddToGlobalList(); |
| |
| *obj = ktl::move(vmo); |
| |
| return ZX_OK; |
| } |
| |
| zx_status_t VmObjectPaged::CreateChildSlice(uint64_t offset, uint64_t size, bool copy_name, |
| fbl::RefPtr<VmObject>* child_vmo) { |
| LTRACEF("vmo %p offset %#" PRIx64 " size %#" PRIx64 "\n", this, offset, size); |
| |
| canary_.Assert(); |
| |
| // Offset must be page aligned. |
| if (!IS_PAGE_ROUNDED(offset)) { |
| return ZX_ERR_INVALID_ARGS; |
| } |
| |
| // Make sure size is page aligned. |
| if (!IS_PAGE_ROUNDED(size)) { |
| return ZX_ERR_INVALID_ARGS; |
| } |
| if (size > MAX_SIZE) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| |
| // Slice must be wholly contained. |size()| will read the size holding the lock. This extra |
| // acquisition is correct as we must drop the lock in order to perform the allocations. |
| VmCowRange range; |
| { |
| Guard<CriticalMutex> guard{lock()}; |
| auto cow_range = GetCowRangeSizeCheckLocked(offset, size); |
| if (!cow_range) { |
| return ZX_ERR_INVALID_ARGS; |
| } |
| range = *cow_range; |
| } |
| |
| // Forbid creating children of resizable VMOs. This restriction may be lifted in the future. |
| if (is_resizable()) { |
| return ZX_ERR_NOT_SUPPORTED; |
| } |
| |
| uint32_t options = kSlice; |
| if (is_contiguous()) { |
| options |= kContiguous; |
| } |
| |
| // If this VMO is contiguous then we allow creating an uncached slice. When zeroing pages that |
| // are reclaimed from having been loaned from a contiguous VMO, we will zero the pages and flush |
| // the zeroes to RAM. |
| const bool allow_uncached = is_contiguous(); |
| return CreateChildReferenceCommon(options, range, allow_uncached, copy_name, nullptr, child_vmo); |
| } |
| |
| zx_status_t VmObjectPaged::CreateChildReference(Resizability resizable, uint64_t offset, |
| uint64_t size, bool copy_name, bool* first_child, |
| fbl::RefPtr<VmObject>* child_vmo) { |
| LTRACEF("vmo %p offset %#" PRIx64 " size %#" PRIx64 "\n", this, offset, size); |
| |
| canary_.Assert(); |
| |
| // A reference spans the entirety of the parent. The specified range has no meaning, require it |
| // to be zero. |
| if (offset != 0 || size != 0) { |
| return ZX_ERR_INVALID_ARGS; |
| } |
| |
| if (is_slice()) { |
| return ZX_ERR_NOT_SUPPORTED; |
| } |
| ASSERT(cow_range_.offset == 0); |
| |
| // Not supported for contiguous VMOs. Can use slices instead as contiguous VMOs are non-resizable |
| // and support slices. |
| if (is_contiguous()) { |
| return ZX_ERR_NOT_SUPPORTED; |
| } |
| |
| if (resizable == Resizability::Resizable) { |
| // Cannot create a resizable reference from a non-resizable VMO. |
| if (!is_resizable()) { |
| return ZX_ERR_NOT_SUPPORTED; |
| } |
| } |
| |
| uint32_t options = 0; |
| |
| // Reference inherits resizability from parent. |
| if (is_resizable()) { |
| options |= kResizable; |
| } |
| |
| return CreateChildReferenceCommon(options, VmCowRange(0, UINT64_MAX), false, copy_name, |
| first_child, child_vmo); |
| } |
| |
| zx_status_t VmObjectPaged::CreateChildReferenceCommon(uint32_t options, VmCowRange range, |
| bool allow_uncached, bool copy_name, |
| bool* first_child, |
| fbl::RefPtr<VmObject>* child_vmo) { |
| canary_.Assert(); |
| |
| options |= kReference; |
| |
| if (can_block_on_page_requests()) { |
| options |= kCanBlockOnPageRequests; |
| } |
| |
| // Reference shares the same VmCowPages as the parent. |
| fbl::RefPtr<VmObjectPaged> vmo; |
| { |
| Guard<CriticalMutex> guard{lock()}; |
| |
| // We know that we are not contiguous so we should not be uncached either. |
| if (self_locked()->GetMappingCachePolicyLocked() != ARCH_MMU_FLAG_CACHED && !allow_uncached) { |
| return ZX_ERR_BAD_STATE; |
| } |
| |
| // Once all fallible checks are performed, construct the VmObjectPaged. |
| fbl::AllocChecker ac; |
| vmo = fbl::AdoptRef<VmObjectPaged>(new (&ac) VmObjectPaged(options, cow_pages_, range)); |
| if (!ac.check()) { |
| return ZX_ERR_NO_MEMORY; |
| } |
| AssertHeld(vmo->lock_ref()); |
| |
| // There's no way good way to convince the static analysis that the vmo->lock() that we hold is |
| // also the VmObject::lock() in vmo and so we disable analysis to set the cache_policy_; |
| [&vmo, cache_policy = GetMappingCachePolicyLocked()]() TA_REQ(vmo->lock()) |
| TA_NO_THREAD_SAFETY_ANALYSIS { vmo->cache_policy_ = cache_policy; }(); |
| { |
| Guard<CriticalMutex> child_guard{ChildListLock::Get()}; |
| vmo->parent_ = this; |
| const bool first = AddChildLocked(vmo.get()); |
| if (first_child) { |
| *first_child = first; |
| } |
| } |
| |
| // Also insert into the reference list. The reference should only be inserted in the list of the |
| // object that the cow_pages_locked() has the backlink to, i.e. the notional "owner" of the |
| // VmCowPages. |
| // As a consequence of this, in the case of nested references, the reference relationship can |
| // look different from the parent->child relationship, which instead mirrors the child creation |
| // calls as specified by the user (this is true for all child types). |
| VmObjectPaged* paged_owner = cow_pages_locked()->get_paged_backlink_locked(); |
| // The VmCowPages we point to should have a valid backlink, either to us or to our parent (if we |
| // are a reference). |
| DEBUG_ASSERT(paged_owner); |
| // If this object is not a reference, the |paged_owner| we computed should be the same as |
| // |this|. |
| DEBUG_ASSERT(is_reference() || paged_owner == this); |
| AssertHeld(paged_owner->lock_ref()); |
| paged_owner->reference_list_.push_back(vmo.get()); |
| |
| if (copy_name) { |
| vmo->name_ = name_; |
| } |
| } |
| |
| // Add to the global list now that fully initialized. |
| vmo->AddToGlobalList(); |
| |
| *child_vmo = ktl::move(vmo); |
| |
| return ZX_OK; |
| } |
| |
| zx_status_t VmObjectPaged::CreateClone(Resizability resizable, SnapshotType type, uint64_t offset, |
| uint64_t size, bool copy_name, |
| fbl::RefPtr<VmObject>* child_vmo) { |
| LTRACEF("vmo %p offset %#" PRIx64 " size %#" PRIx64 "\n", this, offset, size); |
| |
| canary_.Assert(); |
| |
| // Copy-on-write clones of contiguous VMOs do not have meaningful semantics, so forbid them. |
| if (is_contiguous()) { |
| return ZX_ERR_INVALID_ARGS; |
| } |
| |
| // offset must be page aligned |
| if (!IS_PAGE_ROUNDED(offset)) { |
| return ZX_ERR_INVALID_ARGS; |
| } |
| |
| // size must be page aligned and not too large. |
| if (!IS_PAGE_ROUNDED(size)) { |
| return ZX_ERR_INVALID_ARGS; |
| } |
| if (size > MAX_SIZE) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| auto cow_range = GetCowRange(offset, size); |
| if (!cow_range) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| |
| fbl::RefPtr<VmObjectPaged> vmo; |
| |
| { |
| VmCowPages::DeferredOps deferred(cow_pages_.get()); |
| Guard<CriticalMutex> guard{lock()}; |
| // check that we're not uncached in some way |
| if (self_locked()->GetMappingCachePolicyLocked() != ARCH_MMU_FLAG_CACHED) { |
| return ZX_ERR_BAD_STATE; |
| } |
| |
| // If we are a slice we require a unidirection clone, as performing a bi-directional clone |
| // through a slice does not yet have defined semantics. |
| const bool require_unidirection = is_slice(); |
| auto result = |
| cow_pages_locked()->CreateCloneLocked(type, require_unidirection, *cow_range, deferred); |
| if (result.is_error()) { |
| return result.error_value(); |
| } |
| |
| uint32_t options = 0; |
| if (resizable == Resizability::Resizable) { |
| options |= kResizable; |
| } |
| if (can_block_on_page_requests()) { |
| options |= kCanBlockOnPageRequests; |
| } |
| fbl::AllocChecker ac; |
| auto [child, child_lock] = (*result).take(); |
| vmo = fbl::AdoptRef<VmObjectPaged>(new (&ac) VmObjectPaged(options, ktl::move(child))); |
| if (!ac.check()) { |
| return ZX_ERR_NO_MEMORY; |
| } |
| Guard<CriticalMutex> child_guard{AdoptLock, vmo->lock(), ktl::move(child_lock)}; |
| DEBUG_ASSERT(vmo->self_locked()->GetMappingCachePolicyLocked() == ARCH_MMU_FLAG_CACHED); |
| |
| // Now that everything has succeeded we can wire up cow pages references. VMO will be placed in |
| // the global list later once lock has been dropped. |
| vmo->cow_pages_locked()->set_paged_backlink_locked(vmo.get()); |
| vmo->cow_pages_locked()->TransitionToAliveLocked(); |
| |
| // Install the parent. |
| { |
| Guard<CriticalMutex> list_guard{ChildListLock::Get()}; |
| vmo->parent_ = this; |
| |
| // add the new vmo as a child before we do anything, since its |
| // dtor expects to find it in its parent's child list |
| AddChildLocked(vmo.get()); |
| } |
| |
| if (copy_name) { |
| vmo->name_ = name_; |
| } |
| } |
| |
| // Add to the global list now that fully initialized. |
| vmo->AddToGlobalList(); |
| |
| *child_vmo = ktl::move(vmo); |
| |
| return ZX_OK; |
| } |
| |
| void VmObjectPaged::DumpLocked(uint depth, bool verbose) const { |
| canary_.Assert(); |
| |
| uint64_t parent_id = 0; |
| // Cache the parent value as a void* as it's not safe to dereference once the ChildListLock is |
| // dropped, but we can still print out its value. |
| void* parent; |
| { |
| Guard<CriticalMutex> guard{ChildListLock::Get()}; |
| parent = parent_; |
| if (parent_) { |
| parent_id = parent_->user_id(); |
| } |
| } |
| |
| for (uint i = 0; i < depth; ++i) { |
| printf(" "); |
| } |
| printf("vmo %p/k%" PRIu64 " ref %d parent %p/k%" PRIu64 "\n", this, user_id_.load(), |
| ref_count_debug(), parent, parent_id); |
| |
| char name[ZX_MAX_NAME_LEN]; |
| get_name(name, sizeof(name)); |
| if (strlen(name) > 0) { |
| for (uint i = 0; i < depth + 1; ++i) { |
| printf(" "); |
| } |
| printf("name %s\n", name); |
| } |
| |
| cow_pages_locked()->DumpLocked(depth, verbose); |
| } |
| |
| VmObject::AttributionCounts VmObjectPaged::GetAttributedMemoryInRangeLocked( |
| uint64_t offset_bytes, uint64_t len_bytes) const { |
| vmo_attribution_queries.Add(1); |
| |
| // A reference never has memory attributed to it. It points to the parent's VmCowPages, and we |
| // need to hold the invariant that we don't double-count attributed memory. |
| // |
| // TODO(https://fxbug.dev/42069078): Consider attributing memory to the current VmCowPages |
| // backlink for the case where the parent has gone away. |
| if (is_reference()) { |
| return AttributionCounts{}; |
| } |
| ASSERT(cow_range_.offset == 0); |
| uint64_t new_len_bytes; |
| if (!TrimRange(offset_bytes, len_bytes, size_locked(), &new_len_bytes)) { |
| return AttributionCounts{}; |
| } |
| |
| auto cow_range = GetCowRange(offset_bytes, new_len_bytes); |
| return cow_pages_locked()->GetAttributedMemoryInRangeLocked(*cow_range); |
| } |
| |
| zx_status_t VmObjectPaged::CommitRangeInternal(uint64_t offset, uint64_t len, bool pin, |
| bool write) { |
| canary_.Assert(); |
| LTRACEF("offset %#" PRIx64 ", len %#" PRIx64 "\n", offset, len); |
| |
| if (can_block_on_page_requests()) { |
| lockdep::AssertNoLocksHeld(); |
| } |
| |
| // We only expect write to be set if this a pin. All non-pin commits are reads. |
| DEBUG_ASSERT(!write || pin); |
| |
| // Child slices of VMOs are currently not resizable, nor can they be made |
| // from resizable parents. If this ever changes, the logic surrounding what |
| // to do if a VMO gets resized during a Commit or Pin operation will need to |
| // be revisited. Right now, we can just rely on the fact that the initial |
| // vetting/trimming of the offset and length of the operation will never |
| // change if the operation is being executed against a child slice. |
| DEBUG_ASSERT(!is_resizable() || !is_slice()); |
| |
| // Round offset and len to be page aligned. Use a sub-scope to validate that temporary end |
| // calculations cannot be accidentally used later on. |
| { |
| uint64_t end; |
| if (add_overflow(offset, len, &end)) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| const uint64_t end_page = ROUNDUP_PAGE_SIZE(end); |
| if (end_page < end) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| DEBUG_ASSERT(end_page >= offset); |
| offset = ROUNDDOWN_PAGE_SIZE(offset); |
| len = end_page - offset; |
| } |
| |
| // Although the length, for the non-pin ranges, is allowed to end up outside the VMO range during |
| // the operation, at least initially it must be within range. |
| { |
| Guard<CriticalMutex> guard{lock()}; |
| if (unlikely(!InRange(offset, len, size_locked()))) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| } |
| |
| if (len == 0) { |
| // If pinning we explicitly forbid zero length pins as we cannot guarantee consistent semantics. |
| // For example pinning a zero length range outside the range of the VMO is an error, and so |
| // pinning a zero length range inside the vmo and then resizing the VMO smaller than the pin |
| // region should also be an error. To enforce this without having to have new metadata to track |
| // zero length pin regions is to just forbid them. Note that the user entry points for pinning |
| // already forbid zero length ranges. |
| return pin ? ZX_ERR_INVALID_ARGS : ZX_OK; |
| } |
| |
| // Tracks the end of the pinned range to unpin in case of failure. The |offset| might lag behind |
| // the pinned range, as it tracks the range that has been completely processed, which would |
| // also include dirtying the page after pinning in case of a write. |
| uint64_t pinned_end_offset = offset; |
| // Should any errors occur we need to unpin everything. If we were asked to write, we need to mark |
| // the VMO modified if any pages were committed. |
| auto deferred_cleanup = |
| fit::defer([this, pinned_start_offset = offset, &pinned_end_offset, &len, &write]() { |
| // If we were not able to pin the entire range, i.e. len is not 0, we need to unpin |
| // everything. Regardless of any resizes or other things that may have happened any pinned |
| // pages *must* still be within a valid range, and so we know Unpin should succeed. The edge |
| // case is if we had failed to pin *any* pages and so our original offset may be outside the |
| // current range of the vmo. Additionally, as pinning a zero length range is invalid, so is |
| // unpinning, and so we must avoid. |
| if (pinned_end_offset > pinned_start_offset) { |
| if (len > 0) { |
| auto cow_range = |
| GetCowRange(pinned_start_offset, pinned_end_offset - pinned_start_offset); |
| Guard<CriticalMutex> guard{AssertOrderedLock, lock(), cow_pages_->lock_order()}; |
| cow_pages_locked()->UnpinLocked(*cow_range, nullptr); |
| } else if (write) { |
| Guard<CriticalMutex> guard{AssertOrderedLock, lock(), cow_pages_->lock_order()}; |
| mark_modified_locked(); |
| } |
| } |
| }); |
| |
| __UNINITIALIZED MultiPageRequest page_request; |
| |
| // As we may need to wait on arbitrary page requests we just keep running this as long as there is |
| // a non-zero range to process. |
| uint64_t to_dirty_len = 0; |
| while (len > 0) { |
| zx_status_t status = ZX_OK; |
| ; |
| uint64_t committed_len = 0; |
| if (to_dirty_len > 0) { |
| Guard<CriticalMutex> guard{AssertOrderedLock, lock(), cow_pages_->lock_order()}; |
| // The to_dirty_len *must* be within range, even though we just grabbed the lock and a resize |
| // could have happened, since the dirtied range is pinned. As such, any resize could not have |
| // removed the in progress dirty range. |
| DEBUG_ASSERT(InRange(offset, to_dirty_len, size_locked())); |
| uint64_t dirty_len = 0; |
| status = cow_pages_locked()->PrepareForWriteLocked( |
| *GetCowRange(offset, to_dirty_len), page_request.GetLazyDirtyRequest(), &dirty_len); |
| DEBUG_ASSERT(dirty_len <= to_dirty_len); |
| if (status == ZX_ERR_SHOULD_WAIT) { |
| page_request.MadeDirtyRequest(); |
| } |
| // Account for the pages that were dirtied during this attempt. |
| to_dirty_len -= dirty_len; |
| committed_len = dirty_len; |
| } else { |
| __UNINITIALIZED VmCowPages::DeferredOps deferred(cow_pages_.get()); |
| Guard<CriticalMutex> guard{AssertOrderedLock, lock(), cow_pages_->lock_order()}; |
| uint64_t new_len = len; |
| if (!TrimRange(offset, len, size_locked(), &new_len)) { |
| return pin ? ZX_ERR_OUT_OF_RANGE : ZX_OK; |
| } |
| if (new_len != len) { |
| if (pin) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| len = new_len; |
| if (len == 0) { |
| break; |
| } |
| } |
| |
| status = cow_pages_locked()->CommitRangeLocked(*GetCowRange(offset, len), deferred, |
| &committed_len, &page_request); |
| DEBUG_ASSERT(committed_len <= len); |
| |
| // If we're required to pin, try to pin the committed range before waiting on the |
| // page_request, which has been populated to request pages beyond the committed range. Even |
| // though the page_request has already been initialized, we choose to first completely process |
| // the committed range, which could end up canceling the already initialized page request. |
| // This allows us to keep making forward progress as we will potentially pin a few pages |
| // before trying to fault in further pages, thereby preventing the already committed (and |
| // pinned) pages from being evicted while we wait with the lock dropped. |
| if (pin && committed_len > 0) { |
| uint64_t non_loaned_len = 0; |
| if (cow_pages_locked()->can_borrow() && |
| PhysicalPageBorrowingConfig::Get().is_loaning_enabled()) { |
| // We need to replace any loaned pages in the committed range with non-loaned pages first, |
| // since pinning expects all pages to be non-loaned. Replacing loaned pages requires a |
| // page request too. At any time we'll only be able to wait on a single page request, and |
| // after the wait the conditions that resulted in the previous request might have changed, |
| // so we can just cancel and reuse the existing page_request. |
| // TODO: consider not canceling this and the other request below. The issue with not |
| // canceling is that without early wake support, i.e. being able to reinitialize an |
| // existing initialized request, I think this code will not work without canceling. |
| page_request.CancelRequests(); |
| status = cow_pages_locked()->ReplacePagesWithNonLoanedLocked( |
| *GetCowRange(offset, committed_len), deferred, page_request.GetAnonymous(), |
| &non_loaned_len); |
| DEBUG_ASSERT(non_loaned_len <= committed_len); |
| } else { |
| // Either the VMO does not support borrowing, or loaning is not enabled so we know there |
| // are no loaned pages. |
| non_loaned_len = committed_len; |
| } |
| |
| // We can safely pin the non-loaned range before waiting on the page request. |
| if (non_loaned_len > 0) { |
| // Verify that we are starting the pin after the previously pinned range, as we do not |
| // want to repeatedly pin the same pages. |
| ASSERT(pinned_end_offset == offset); |
| zx_status_t pin_status = |
| cow_pages_locked()->PinRangeLocked(*GetCowRange(offset, non_loaned_len)); |
| if (pin_status != ZX_OK) { |
| return pin_status; |
| } |
| } |
| // At this point we have successfully committed and pinned non_loaned_len. |
| uint64_t pinned_len = non_loaned_len; |
| pinned_end_offset = offset + pinned_len; |
| |
| // If this is a write and the VMO supports dirty tracking, we also need to mark the pinned |
| // pages Dirty. |
| // We pin the pages first before marking them dirty in order to guarantee forward progress. |
| // Pinning the pages will prevent them from getting decommitted while we are waiting on the |
| // dirty page request without the lock held. |
| if (write && pinned_len > 0 && is_dirty_tracked()) { |
| // Prepare the committed range for writing. We need a page request for this too, so cancel |
| // any existing one and reuse it. |
| page_request.CancelRequests(); |
| |
| // We want to dirty the entire pinned range. |
| to_dirty_len = pinned_len; |
| continue; |
| } |
| committed_len = pinned_len; |
| } |
| } |
| if (status == ZX_ERR_SHOULD_WAIT) { |
| status = page_request.Wait(); |
| } |
| if (status != ZX_OK) { |
| if (status == ZX_ERR_TIMED_OUT) { |
| Dump(0, false); |
| } |
| return status; |
| } |
| offset += committed_len; |
| len -= committed_len; |
| } |
| return ZX_OK; |
| } |
| |
| zx_status_t VmObjectPaged::DecommitRange(uint64_t offset, uint64_t len) { |
| canary_.Assert(); |
| LTRACEF("offset %#" PRIx64 ", len %#" PRIx64 "\n", offset, len); |
| |
| if (is_contiguous() && !PhysicalPageBorrowingConfig::Get().is_loaning_enabled()) { |
| return ZX_ERR_NOT_SUPPORTED; |
| } |
| |
| auto cow_range = GetCowRange(offset, len); |
| if (!cow_range) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| |
| // Decommit of pages from a contiguous VMO relies on contiguous VMOs not being resizable. |
| DEBUG_ASSERT(!is_resizable() || !is_contiguous()); |
| |
| return cow_pages_->DecommitRange(*cow_range); |
| } |
| |
| zx_status_t VmObjectPaged::ZeroPartialPage(uint64_t page_base_offset, uint64_t zero_start_offset, |
| uint64_t zero_end_offset) { |
| DEBUG_ASSERT(zero_start_offset <= zero_end_offset); |
| DEBUG_ASSERT(zero_end_offset <= PAGE_SIZE); |
| DEBUG_ASSERT(IS_PAGE_ROUNDED(page_base_offset)); |
| |
| { |
| Guard<CriticalMutex> guard{lock()}; |
| |
| if (page_base_offset >= size_locked()) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| |
| // TODO: Consider replacing this with a more appropriate generic API when one is available. |
| if (cow_pages_locked()->PageWouldReadZeroLocked(page_base_offset)) { |
| // This is already considered zero so no need to redundantly zero again. |
| return ZX_OK; |
| } |
| } |
| |
| // Need to actually zero out bytes in the page. |
| return ReadWriteInternal(page_base_offset + zero_start_offset, |
| zero_end_offset - zero_start_offset, true, |
| VmObjectReadWriteOptions::None, |
| [](void* dst, size_t offset, size_t len) -> UserCopyCaptureFaultsResult { |
| // We're memsetting the *kernel* address of an allocated page, so we |
| // know that this cannot fault. memset may not be the most efficient, |
| // but we don't expect to be doing this very often. |
| memset(dst, 0, len); |
| return UserCopyCaptureFaultsResult{ZX_OK}; |
| }) |
| .first; |
| } |
| |
| zx_status_t VmObjectPaged::ZeroRangeInternal(uint64_t offset, uint64_t len, bool dirty_track) { |
| canary_.Assert(); |
| if (can_block_on_page_requests()) { |
| lockdep::AssertNoLocksHeld(); |
| } |
| // May need to zero in chunks across multiple different lock acquisitions so loop until nothing |
| // left to do. |
| while (len > 0) { |
| // Check for any non-page aligned start and handle separately. |
| if (!IS_PAGE_ROUNDED(offset)) { |
| // We're doing partial page writes, so we should be dirty tracking. |
| DEBUG_ASSERT(dirty_track); |
| const uint64_t page_base = ROUNDDOWN_PAGE_SIZE(offset); |
| const uint64_t zero_start_offset = offset - page_base; |
| const uint64_t zero_len = ktl::min(PAGE_SIZE - zero_start_offset, len); |
| zx_status_t status = |
| ZeroPartialPage(page_base, zero_start_offset, zero_start_offset + zero_len); |
| if (status != ZX_OK) { |
| return status; |
| } |
| // Advance over the length we zeroed and then, since the lock might have been dropped, go |
| // around the loop to redo the checks. |
| offset += zero_len; |
| len -= zero_len; |
| continue; |
| } |
| // The start is page aligned, so if the remaining length is not a page size then perform the |
| // final sub-page zero. |
| if (len < PAGE_SIZE) { |
| DEBUG_ASSERT(dirty_track); |
| return ZeroPartialPage(offset, 0, len); |
| } |
| |
| // First try and do the more efficient decommit. We prefer/ decommit as it performs work in the |
| // order of the number of committed pages, instead of work in the order of size of the range. An |
| // error from DecommitRangeLocked indicates that the VMO is not of a form that decommit can |
| // safely be performed without exposing data that we shouldn't between children and parents, but |
| // no actual state will have been changed. Should decommit succeed we are done, otherwise we |
| // will have to handle each offset individually. |
| // |
| // Zeroing doesn't decommit pages of contiguous VMOs. |
| if (!is_contiguous()) { |
| ktl::optional<VmCowRange> cow_range = GetCowRange(offset, ROUNDDOWN_PAGE_SIZE(len)); |
| if (!cow_range) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| |
| zx_status_t status = cow_pages_->DecommitRange(*cow_range); |
| if (status == ZX_OK) { |
| offset += cow_range->len; |
| len -= cow_range->len; |
| continue; |
| } |
| } |
| |
| // We might need a page request if the VMO is backed by a page source. |
| __UNINITIALIZED MultiPageRequest page_request; |
| uint64_t zeroed_len = 0; |
| zx_status_t status; |
| { |
| __UNINITIALIZED VmCowPages::DeferredOps deferred(cow_pages_.get()); |
| Guard<CriticalMutex> guard{lock()}; |
| |
| // Zeroing a range behaves as if it were an efficient zx_vmo_write. As we cannot write to |
| // uncached vmo, we also cannot zero an uncahced vmo. |
| if (self_locked()->GetMappingCachePolicyLocked() != ARCH_MMU_FLAG_CACHED) { |
| return ZX_ERR_BAD_STATE; |
| } |
| |
| // Offset is page aligned, and we have at least one full page to process, so find the page |
| // aligned length to hand over to the cow pages zero method. |
| ktl::optional<VmCowRange> cow_range = |
| GetCowRangeSizeCheckLocked(offset, ROUNDDOWN_PAGE_SIZE(len)); |
| if (!cow_range) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| |
| #if DEBUG_ASSERT_IMPLEMENTED |
| // Currently we want ZeroPagesLocked() to not decommit any pages from a contiguous VMO. In |
| // debug we can assert that (not a super fast assert, but seems worthwhile; it's debug only). |
| uint64_t page_count_before = |
| is_contiguous() ? cow_pages_locked()->DebugGetPageCountLocked() : 0; |
| #endif |
| // Now that we have a page aligned range we can try hand over to the cow pages zero method. |
| status = cow_pages_locked()->ZeroPagesLocked(*cow_range, dirty_track, deferred, &page_request, |
| &zeroed_len); |
| if (zeroed_len != 0) { |
| // Mark modified since we wrote zeros. |
| mark_modified_locked(); |
| } |
| |
| #if DEBUG_ASSERT_IMPLEMENTED |
| if (is_contiguous()) { |
| uint64_t page_count_after = cow_pages_locked()->DebugGetPageCountLocked(); |
| DEBUG_ASSERT(page_count_after == page_count_before); |
| } |
| #endif |
| } |
| |
| // Wait on any page request, which is the only non-fatal error case. |
| if (status == ZX_ERR_SHOULD_WAIT) { |
| status = page_request.Wait(); |
| if (status == ZX_ERR_TIMED_OUT) { |
| Dump(0, false); |
| } |
| } |
| if (status != ZX_OK) { |
| return status; |
| } |
| // Advance over pages that had already been zeroed. |
| offset += zeroed_len; |
| len -= zeroed_len; |
| } |
| return ZX_OK; |
| } |
| |
| zx_status_t VmObjectPaged::Resize(uint64_t s) { |
| canary_.Assert(); |
| |
| LTRACEF("vmo %p, size %" PRIu64 "\n", this, s); |
| |
| DEBUG_ASSERT(!is_contiguous() || !is_resizable()); |
| // Also rejects contiguous VMOs. |
| if (!is_resizable()) { |
| return ZX_ERR_UNAVAILABLE; |
| } |
| |
| // ensure the size is valid and that we will not wrap. |
| if (!IS_PAGE_ROUNDED(s)) { |
| return ZX_ERR_INVALID_ARGS; |
| } |
| if (s > MAX_SIZE) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| |
| return cow_pages_->Resize(s); |
| } |
| |
| // perform some sort of copy in/out on a range of the object using a passed in lambda for the copy |
| // routine. The copy routine has the expected type signature of: (void *ptr, uint64_t offset, |
| // uint64_t len) -> UserCopyCaptureFaultsResult. |
| template <typename T> |
| ktl::pair<zx_status_t, size_t> VmObjectPaged::ReadWriteInternal(uint64_t offset, size_t len, |
| bool write, |
| VmObjectReadWriteOptions options, |
| T copyfunc) { |
| canary_.Assert(); |
| |
| uint64_t end_offset; |
| if (add_overflow(offset, len, &end_offset)) { |
| return {ZX_ERR_OUT_OF_RANGE, 0}; |
| } |
| |
| // Track our two offsets. |
| uint64_t src_offset = offset; |
| size_t dest_offset = 0; |
| |
| // The PageRequest is a non-trivial object so we declare it outside the loop to avoid having to |
| // construct and deconstruct it each iteration. It is tolerant of being reused and will |
| // reinitialize itself if needed. |
| // Ideally we can wake up early from the page request to begin processing any partially supplied |
| // ranges. However, if performing a write to a dirty tracked VMO this is not presently possible as |
| // we need to first read in the range and then dirty it, and we cannot have both a read and dirty |
| // request outstanding at one time. |
| __UNINITIALIZED MultiPageRequest page_request(!write); |
| do { |
| zx_status_t status; |
| __UNINITIALIZED UserCopyCaptureFaultsResult copy_result(ZX_OK); |
| { |
| __UNINITIALIZED VmCowPages::DeferredOps deferred(cow_pages_.get()); |
| Guard<CriticalMutex> guard{AssertOrderedLock, lock(), cow_pages_->lock_order()}; |
| if (self_locked()->GetMappingCachePolicyLocked() != ARCH_MMU_FLAG_CACHED) { |
| return {ZX_ERR_BAD_STATE, src_offset - offset}; |
| } |
| if (end_offset > size_locked()) { |
| if (!!(options & VmObjectReadWriteOptions::TrimLength)) { |
| if (src_offset >= size_locked()) { |
| return {ZX_OK, src_offset - offset}; |
| } |
| end_offset = size_locked(); |
| } else { |
| return {ZX_ERR_OUT_OF_RANGE, src_offset - offset}; |
| } |
| } else if (src_offset >= end_offset) { |
| return {ZX_OK, src_offset - offset}; |
| } |
| |
| const size_t first_page_offset = ROUNDDOWN_PAGE_SIZE(src_offset); |
| const size_t last_page_offset = ROUNDDOWN_PAGE_SIZE(end_offset - 1); |
| size_t remaining_pages = (last_page_offset - first_page_offset) / PAGE_SIZE + 1; |
| size_t pages_since_last_unlock = 0; |
| bool modified = false; |
| |
| __UNINITIALIZED zx::result<VmCowPages::LookupCursor> cursor = |
| GetLookupCursorLocked(first_page_offset, remaining_pages * PAGE_SIZE); |
| if (cursor.is_error()) { |
| return {cursor.status_value(), src_offset - offset}; |
| } |
| // Performing explicit accesses by request of the user, so disable zero forking. |
| cursor->DisableZeroFork(); |
| AssertHeld(cursor->lock_ref()); |
| |
| while (remaining_pages > 0) { |
| const size_t page_offset = src_offset % PAGE_SIZE; |
| const size_t tocopy = ktl::min(PAGE_SIZE - page_offset, end_offset - src_offset); |
| |
| // If we need to wait on pages then we would like to wait on as many as possible, up to the |
| // actual limit of the read/write operation. For a read we can wake up once some pages are |
| // received, minimizing the latency before we start making progress, but as this is not true |
| // for writes we cap the maximum number requested. |
| constexpr uint64_t kMaxWriteWaitPages = 16; |
| const uint64_t max_wait_pages = write ? kMaxWriteWaitPages : UINT64_MAX; |
| const uint64_t max_waitable_pages = ktl::min(remaining_pages, max_wait_pages); |
| |
| // Attempt to lookup a page |
| __UNINITIALIZED zx::result<VmCowPages::LookupCursor::RequireResult> result = |
| cursor->RequirePage(write, static_cast<uint>(max_waitable_pages), deferred, |
| &page_request); |
| |
| status = result.status_value(); |
| if (status != ZX_OK) { |
| break; |
| } |
| |
| // Compute the kernel mapping of this page. |
| const paddr_t pa = result->page->paddr(); |
| char* page_ptr = reinterpret_cast<char*>(paddr_to_physmap(pa)); |
| |
| // Call the copy routine. If the copy was successful then ZX_OK is returned, otherwise |
| // ZX_ERR_SHOULD_WAIT may be returned to indicate the copy failed but we can retry it. |
| copy_result = copyfunc(page_ptr + page_offset, dest_offset, tocopy); |
| |
| // If a fault has actually occurred, then we will have captured fault info that we can use |
| // to handle the fault. |
| if (copy_result.fault_info.has_value()) { |
| break; |
| } |
| // If we encounter _any_ unrecoverable error from the copy operation which |
| // produced no fault address, squash the error down to just "NOT_FOUND". |
| // This is what the SoftFault error would have told us if we did try to |
| // handle the fault and could not. |
| if (copy_result.status != ZX_OK) { |
| status = ZX_ERR_NOT_FOUND; |
| break; |
| } |
| // Advance the copy location. |
| src_offset += tocopy; |
| dest_offset += tocopy; |
| remaining_pages--; |
| modified = write; |
| |
| // Periodically yield the lock in order to allow other read or write |
| // operations to advance sooner than they otherwise would. |
| constexpr size_t kPagesBetweenUnlocks = 16; |
| if (unlikely(++pages_since_last_unlock == kPagesBetweenUnlocks)) { |
| pages_since_last_unlock = 0; |
| if (guard.lock()->IsContested()) { |
| break; |
| } |
| } |
| } |
| // Before dropping the lock, check if any pages were modified and update the VMO state |
| // accordingly. |
| if (modified) { |
| mark_modified_locked(); |
| } |
| } |
| |
| // If there was a fault while copying, then handle it now that the lock is dropped. |
| if (copy_result.fault_info.has_value()) { |
| auto& info = *copy_result.fault_info; |
| uint64_t to_fault = len - dest_offset; |
| status = Thread::Current::SoftFaultInRange(info.pf_va, info.pf_flags, to_fault); |
| } else if (status == ZX_ERR_SHOULD_WAIT) { |
| // RequirePage 'failed', but told us that it had filled out the page request, so we should |
| // wait on it. |
| DEBUG_ASSERT(can_block_on_page_requests()); |
| status = page_request.Wait(); |
| if (status == ZX_ERR_TIMED_OUT) { |
| Dump(0, false); |
| } |
| } |
| if (status != ZX_OK) { |
| return {status, src_offset - offset}; |
| } |
| } while (src_offset < end_offset); |
| |
| return {ZX_OK, src_offset - offset}; |
| } |
| |
| zx_status_t VmObjectPaged::Read(void* _ptr, uint64_t offset, size_t len) { |
| canary_.Assert(); |
| // test to make sure this is a kernel pointer |
| if (!is_kernel_address(reinterpret_cast<vaddr_t>(_ptr))) { |
| DEBUG_ASSERT_MSG(0, "non kernel pointer passed\n"); |
| return ZX_ERR_INVALID_ARGS; |
| } |
| |
| // read routine that just uses a memcpy |
| char* ptr = reinterpret_cast<char*>(_ptr); |
| auto read_routine = [ptr](const void* src, size_t offset, |
| size_t len) -> UserCopyCaptureFaultsResult { |
| memcpy(ptr + offset, src, len); |
| return UserCopyCaptureFaultsResult{ZX_OK}; |
| }; |
| |
| if (can_block_on_page_requests()) { |
| lockdep::AssertNoLocksHeld(); |
| } |
| |
| return ReadWriteInternal(offset, len, false, VmObjectReadWriteOptions::None, read_routine).first; |
| } |
| |
| zx_status_t VmObjectPaged::Write(const void* _ptr, uint64_t offset, size_t len) { |
| canary_.Assert(); |
| // test to make sure this is a kernel pointer |
| if (!is_kernel_address(reinterpret_cast<vaddr_t>(_ptr))) { |
| DEBUG_ASSERT_MSG(0, "non kernel pointer passed\n"); |
| return ZX_ERR_INVALID_ARGS; |
| } |
| |
| // write routine that just uses a memcpy |
| const char* ptr = reinterpret_cast<const char*>(_ptr); |
| auto write_routine = [ptr](void* dst, size_t offset, size_t len) -> UserCopyCaptureFaultsResult { |
| memcpy(dst, ptr + offset, len); |
| return UserCopyCaptureFaultsResult{ZX_OK}; |
| }; |
| |
| if (can_block_on_page_requests()) { |
| lockdep::AssertNoLocksHeld(); |
| } |
| |
| return ReadWriteInternal(offset, len, true, VmObjectReadWriteOptions::None, write_routine).first; |
| } |
| |
| zx_status_t VmObjectPaged::CacheOp(uint64_t offset, uint64_t len, CacheOpType type) { |
| canary_.Assert(); |
| if (unlikely(len == 0)) { |
| return ZX_ERR_INVALID_ARGS; |
| } |
| |
| Guard<CriticalMutex> guard{lock()}; |
| |
| // verify that the range is within the object |
| auto cow_range = GetCowRangeSizeCheckLocked(offset, len); |
| if (!cow_range) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| |
| // This cannot overflow as we already checked the range. |
| const uint64_t cow_end = cow_range->end(); |
| |
| // For syncing instruction caches there may be work that is more efficient to batch together, and |
| // so we use an abstract consistency manager to optimize it for the given architecture. |
| ArchVmICacheConsistencyManager sync_cm; |
| |
| return cow_pages_locked()->LookupReadableLocked( |
| *cow_range, |
| [&sync_cm, cow_offset = cow_range->offset, cow_end, type](uint64_t page_offset, paddr_t pa) { |
| // This cannot overflow due to the maximum possible size of a VMO. |
| const uint64_t page_end = page_offset + PAGE_SIZE; |
| |
| // Determine our start and end in terms of vmo offset |
| const uint64_t start = ktl::max(page_offset, cow_offset); |
| const uint64_t end = ktl::min(cow_end, page_end); |
| |
| // Translate to inter-page offset |
| DEBUG_ASSERT(start >= page_offset); |
| const uint64_t op_start_offset = start - page_offset; |
| DEBUG_ASSERT(op_start_offset < PAGE_SIZE); |
| |
| DEBUG_ASSERT(end > start); |
| const uint64_t op_len = end - start; |
| |
| CacheOpPhys(pa + op_start_offset, op_len, type, sync_cm); |
| return ZX_ERR_NEXT; |
| }); |
| } |
| |
| zx_status_t VmObjectPaged::Lookup(uint64_t offset, uint64_t len, |
| VmObject::LookupFunction lookup_fn) { |
| canary_.Assert(); |
| VmCowRange range(offset, len); |
| auto cow_range = GetCowRange(offset, len); |
| if (!cow_range) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| |
| Guard<CriticalMutex> guard{lock()}; |
| |
| return cow_pages_locked()->LookupLocked( |
| *cow_range, [&lookup_fn, undo_offset = cow_range_.offset](uint64_t offset, paddr_t pa) { |
| // Need to undo the parent_offset before forwarding to the lookup_fn, who is ignorant of |
| // slices. |
| return lookup_fn(offset - undo_offset, pa); |
| }); |
| } |
| |
| zx_status_t VmObjectPaged::LookupContiguous(uint64_t offset, uint64_t len, paddr_t* out_paddr) { |
| canary_.Assert(); |
| |
| // We should consider having the callers round up to page boundaries and then check whether the |
| // length is page-aligned. |
| if (unlikely(len == 0 || !IS_PAGE_ROUNDED(offset))) { |
| return ZX_ERR_INVALID_ARGS; |
| } |
| |
| Guard<CriticalMutex> guard{lock()}; |
| |
| auto cow_range = GetCowRangeSizeCheckLocked(offset, len); |
| if (!cow_range) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| |
| if (unlikely(!is_contiguous() && (cow_range->len != PAGE_SIZE))) { |
| // Multi-page lookup only supported for contiguous VMOs. |
| return ZX_ERR_BAD_STATE; |
| } |
| |
| // Verify that all pages are present, and assert that the present pages are contiguous since we |
| // only support len > PAGE_SIZE for contiguous VMOs. |
| bool page_seen = false; |
| uint64_t first_offset = 0; |
| paddr_t first_paddr = 0; |
| uint64_t count = 0; |
| // This has to work for child slices with non-zero cow_range_.offset also, which means even if all |
| // pages are present, the first cur_offset can be offset + cow_range_.offset. |
| zx_status_t status = cow_pages_locked()->LookupLocked( |
| *cow_range, |
| [&page_seen, &first_offset, &first_paddr, &count](uint64_t cur_offset, paddr_t pa) mutable { |
| ++count; |
| if (!page_seen) { |
| first_offset = cur_offset; |
| first_paddr = pa; |
| page_seen = true; |
| } |
| ASSERT(first_paddr + (cur_offset - first_offset) == pa); |
| return ZX_ERR_NEXT; |
| }); |
| ASSERT(status == ZX_OK); |
| if (count != cow_range->len / PAGE_SIZE) { |
| return ZX_ERR_NOT_FOUND; |
| } |
| if (out_paddr) { |
| *out_paddr = first_paddr; |
| } |
| return ZX_OK; |
| } |
| |
| ktl::pair<zx_status_t, size_t> VmObjectPaged::ReadUser(user_out_ptr<char> ptr, uint64_t offset, |
| size_t len, |
| VmObjectReadWriteOptions options) { |
| canary_.Assert(); |
| |
| // read routine that uses copy_to_user |
| auto read_routine = [ptr](const char* src, size_t offset, |
| size_t len) -> UserCopyCaptureFaultsResult { |
| return ptr.byte_offset(offset).copy_array_to_user_capture_faults(src, len); |
| }; |
| |
| if (can_block_on_page_requests()) { |
| lockdep::AssertNoLocksHeld(); |
| } |
| |
| return ReadWriteInternal(offset, len, false, options, read_routine); |
| } |
| |
| ktl::pair<zx_status_t, size_t> VmObjectPaged::WriteUser( |
| user_in_ptr<const char> ptr, uint64_t offset, size_t len, VmObjectReadWriteOptions options, |
| const OnWriteBytesTransferredCallback& on_bytes_transferred) { |
| canary_.Assert(); |
| |
| // write routine that uses copy_from_user |
| auto write_routine = [ptr, base_vmo_offset = offset, &on_bytes_transferred]( |
| char* dst, size_t offset, size_t len) -> UserCopyCaptureFaultsResult { |
| __UNINITIALIZED auto copy_result = |
| ptr.byte_offset(offset).copy_array_from_user_capture_faults(dst, len); |
| |
| if (copy_result.status == ZX_OK) { |
| if (on_bytes_transferred) { |
| on_bytes_transferred(base_vmo_offset + offset, len); |
| } |
| } |
| return copy_result; |
| }; |
| |
| if (can_block_on_page_requests()) { |
| lockdep::AssertNoLocksHeld(); |
| } |
| |
| return ReadWriteInternal(offset, len, true, options, write_routine); |
| } |
| |
| ktl::pair<zx_status_t, size_t> VmObjectPaged::ReadUserVector(user_out_iovec_t vec, uint64_t offset, |
| size_t len) { |
| if (len == 0u) { |
| return {ZX_OK, 0}; |
| } |
| if (len > UINT64_MAX - offset) { |
| return {ZX_ERR_OUT_OF_RANGE, 0}; |
| } |
| |
| size_t total = 0; |
| zx_status_t status = vec.ForEach([&](user_out_ptr<char> ptr, size_t capacity) { |
| if (capacity > len) { |
| capacity = len; |
| } |
| |
| auto [read_status, chunk_actual] = |
| ReadUser(ptr, offset, capacity, VmObjectReadWriteOptions::None); |
| |
| // Always add |chunk_actual| since some bytes may have been transferred, even on error |
| total += chunk_actual; |
| if (read_status != ZX_OK) { |
| return read_status; |
| } |
| |
| DEBUG_ASSERT(chunk_actual == capacity); |
| |
| offset += chunk_actual; |
| len -= chunk_actual; |
| return len > 0 ? ZX_ERR_NEXT : ZX_ERR_STOP; |
| }); |
| |
| // Return |ZX_ERR_BUFFER_TOO_SMALL| if all of |len| was not transferred. |
| status = (status == ZX_OK && len > 0) ? ZX_ERR_BUFFER_TOO_SMALL : status; |
| return {status, total}; |
| } |
| |
| ktl::pair<zx_status_t, size_t> VmObjectPaged::WriteUserVector( |
| user_in_iovec_t vec, uint64_t offset, size_t len, |
| const OnWriteBytesTransferredCallback& on_bytes_transferred) { |
| if (len == 0u) { |
| return {ZX_OK, 0}; |
| } |
| if (len > UINT64_MAX - offset) { |
| return {ZX_ERR_OUT_OF_RANGE, 0}; |
| } |
| |
| size_t total = 0; |
| zx_status_t status = vec.ForEach([&](user_in_ptr<const char> ptr, size_t capacity) { |
| if (capacity > len) { |
| capacity = len; |
| } |
| |
| auto [write_status, chunk_actual] = |
| WriteUser(ptr, offset, capacity, VmObjectReadWriteOptions::None, on_bytes_transferred); |
| |
| // Always add |chunk_actual| since some bytes may have been transferred, even on error |
| total += chunk_actual; |
| if (write_status != ZX_OK) { |
| return write_status; |
| } |
| |
| DEBUG_ASSERT(chunk_actual == capacity); |
| |
| offset += chunk_actual; |
| len -= chunk_actual; |
| return len > 0 ? ZX_ERR_NEXT : ZX_ERR_STOP; |
| }); |
| |
| // Return |ZX_ERR_BUFFER_TOO_SMALL| if all of |len| was not transferred. |
| status = (status == ZX_OK && len > 0) ? ZX_ERR_BUFFER_TOO_SMALL : status; |
| return {status, total}; |
| } |
| |
| zx_status_t VmObjectPaged::TakePages(uint64_t offset, uint64_t len, VmPageSpliceList* pages) { |
| canary_.Assert(); |
| |
| // TODO: Check that the region is locked once locking is implemented |
| if (is_contiguous()) { |
| return ZX_ERR_NOT_SUPPORTED; |
| } |
| |
| auto cow_range = GetCowRange(offset, len); |
| if (!cow_range) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| auto range = *cow_range; |
| |
| // Initialize the splice list to the right size. |
| pages->Initialize(range.len); |
| uint64_t splice_offset = 0; |
| |
| __UNINITIALIZED MultiPageRequest page_request; |
| while (!range.is_empty()) { |
| uint64_t taken_len = 0; |
| zx_status_t status = |
| cow_pages_->TakePages(range, splice_offset, pages, &taken_len, &page_request); |
| if (status != ZX_ERR_SHOULD_WAIT && status != ZX_OK) { |
| return status; |
| } |
| // We would only have failed to take anything if status was not ZX_OK, which in this case |
| // would be ZX_ERR_SHOULD_WAIT as that is the only non-OK status we can reach here with. |
| DEBUG_ASSERT(taken_len > 0 || status == ZX_ERR_SHOULD_WAIT); |
| // We should have taken the entire range requested if the status was ZX_OK. |
| DEBUG_ASSERT(status != ZX_OK || taken_len == range.len); |
| // We should not have taken any more than the requested range. |
| DEBUG_ASSERT(taken_len <= range.len); |
| |
| splice_offset += taken_len; |
| |
| // Record the completed portion. |
| range = range.TrimedFromStart(taken_len); |
| |
| if (status == ZX_ERR_SHOULD_WAIT) { |
| status = page_request.Wait(); |
| if (status != ZX_OK) { |
| return status; |
| } |
| } |
| } |
| return ZX_OK; |
| } |
| |
| zx_status_t VmObjectPaged::SupplyPages(uint64_t offset, uint64_t len, VmPageSpliceList* pages, |
| SupplyOptions options) { |
| canary_.Assert(); |
| |
| // We need this check here instead of in SupplyPagesLocked, as we do use that |
| // function to provide pages to contiguous VMOs as well. |
| if (is_contiguous()) { |
| return ZX_ERR_NOT_SUPPORTED; |
| } |
| auto cow_range = GetCowRange(offset, len); |
| if (!cow_range) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| auto range = *cow_range; |
| |
| __UNINITIALIZED MultiPageRequest page_request; |
| while (!range.is_empty()) { |
| uint64_t supply_len = 0; |
| zx_status_t status; |
| { |
| __UNINITIALIZED VmCowPages::DeferredOps deferred(cow_pages_.get()); |
| Guard<CriticalMutex> guard{lock()}; |
| status = cow_pages_locked()->SupplyPagesLocked(range, pages, options, &supply_len, deferred, |
| &page_request); |
| } |
| if (status != ZX_ERR_SHOULD_WAIT && status != ZX_OK) { |
| return status; |
| } |
| // We would only have failed to supply anything if status was not ZX_OK, which in this case |
| // would be ZX_ERR_SHOULD_WAIT as that is the only non-OK status we can reach here with. |
| DEBUG_ASSERT(supply_len > 0 || status == ZX_ERR_SHOULD_WAIT); |
| // We should have supplied the entire range requested if the status was ZX_OK. |
| DEBUG_ASSERT(status != ZX_OK || supply_len == range.len); |
| // We should not have supplied any more than the requested range. |
| DEBUG_ASSERT(supply_len <= range.len); |
| |
| // Record the completed portion. |
| range = range.TrimedFromStart(supply_len); |
| |
| if (status == ZX_ERR_SHOULD_WAIT) { |
| status = page_request.Wait(); |
| if (status != ZX_OK) { |
| return status; |
| } |
| } |
| } |
| return ZX_OK; |
| } |
| |
| zx_status_t VmObjectPaged::DirtyPages(uint64_t offset, uint64_t len) { |
| // It is possible to encounter delayed PMM allocations, which requires waiting on the |
| // page_request. |
| __UNINITIALIZED AnonymousPageRequest page_request; |
| |
| auto cow_range = GetCowRange(offset, len); |
| if (!cow_range) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| |
| // Initialize a list of allocated pages that DirtyPages will allocate any new pages into |
| // before inserting them in the VMO. Allocated pages can therefore be shared across multiple calls |
| // to DirtyPages. Instead of having to allocate and free pages in case DirtyPages |
| // cannot successfully dirty the entire range atomically, we can just hold on to the allocated |
| // pages and use them for the next call. This ensures that we are making forward progress with |
| // each successive call to DirtyPages. |
| list_node alloc_list; |
| list_initialize(&alloc_list); |
| auto alloc_list_cleanup = fit::defer([&alloc_list, this]() -> void { |
| if (!list_is_empty(&alloc_list)) { |
| cow_pages_->FreePages(&alloc_list); |
| } |
| }); |
| while (true) { |
| zx_status_t status = cow_pages_->DirtyPages(*cow_range, &alloc_list, &page_request); |
| if (status == ZX_OK) { |
| return ZX_OK; |
| } |
| if (status == ZX_ERR_SHOULD_WAIT) { |
| status = page_request.Allocate().status_value(); |
| } |
| if (status != ZX_OK) { |
| return status; |
| } |
| // If the wait was successful, loop around and try the call again, which will re-validate any |
| // state that might have changed when the lock was dropped. |
| } |
| } |
| |
| zx_status_t VmObjectPaged::EnumerateDirtyRanges(uint64_t offset, uint64_t len, |
| DirtyRangeEnumerateFunction&& dirty_range_fn) { |
| Guard<CriticalMutex> guard{lock()}; |
| if (auto cow_range = GetCowRange(offset, len)) { |
| // Need to wrap the callback to translate the cow pages offsets back into offsets as seen by |
| // this object. |
| return cow_pages_locked()->EnumerateDirtyRangesLocked( |
| *cow_range, [&dirty_range_fn, undo_offset = cow_range_.offset]( |
| uint64_t range_offset, uint64_t range_len, bool range_is_zero) { |
| return dirty_range_fn(range_offset - undo_offset, range_len, range_is_zero); |
| }); |
| } |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| |
| zx_status_t VmObjectPaged::SetMappingCachePolicy(const uint32_t cache_policy) { |
| // Is it a valid cache flag? |
| if (cache_policy & ~ZX_CACHE_POLICY_MASK) { |
| return ZX_ERR_INVALID_ARGS; |
| } |
| |
| Guard<CriticalMutex> guard{lock()}; |
| |
| // conditions for allowing the cache policy to be set: |
| // 1) vmo has no pinned pages |
| // 2) vmo has no mappings |
| // 3) vmo has no children |
| // 4) vmo is not a child |
| if (cow_pages_locked()->pinned_page_count_locked() > 0) { |
| return ZX_ERR_BAD_STATE; |
| } |
| |
| if (self_locked()->num_mappings_locked() != 0) { |
| return ZX_ERR_BAD_STATE; |
| } |
| |
| // The ChildListLock needs to be held to inspect the children/parent pointers, however we do not |
| // need to hold it over the remainder of this method as the main VMO lock is held, and creating a |
| // new child happens under that lock as well since the creation path must, in a single lock |
| // acquisition, be checking the cache_policy_ and creating the child. |
| { |
| Guard<CriticalMutex> child_guard{ChildListLock::Get()}; |
| |
| if (!children_list_.is_empty()) { |
| return ZX_ERR_BAD_STATE; |
| } |
| if (parent_) { |
| return ZX_ERR_BAD_STATE; |
| } |
| } |
| |
| // Forbid if there are references, or if this object is a reference itself. We do not want cache |
| // policies to diverge across references. Note that this check is required in addition to the |
| // children_list_ and parent_ check, because it is possible for a non-reference parent to go away, |
| // which will trigger the election of a reference as the new owner for the remaining |
| // reference_list_, and also reset the parent_. |
| if (!reference_list_.is_empty()) { |
| return ZX_ERR_BAD_STATE; |
| } |
| if (is_reference()) { |
| return ZX_ERR_BAD_STATE; |
| } |
| |
| // It does not make sense for a pager-backed or discardable VMO to be uncached. |
| if (is_user_pager_backed() || is_discardable()) { |
| DEBUG_ASSERT(GetMappingCachePolicyLocked() == ARCH_MMU_FLAG_CACHED); |
| return cache_policy == ARCH_MMU_FLAG_CACHED ? ZX_OK : ZX_ERR_BAD_STATE; |
| } |
| |
| // Set the cache policy before informing the VmCowPages, as it may make decisions based on the |
| // final cache policy. |
| // There's no way good way to convince the static analysis that the lock() that we hold is |
| // also the VmObject::lock() and so we disable analysis to set the cache_policy_; |
| [this, &cache_policy]() TA_REQ(lock()) |
| TA_NO_THREAD_SAFETY_ANALYSIS { cache_policy_ = cache_policy; }(); |
| |
| // Asks the cow pages to perform any internal transitions and, most importantly, clean and |
| // invalidate any committed pages. In the case of going from cached->uncached the clean+invalidate |
| // ensures that any modifications are cleaned back to RAM so that an uncached mapping sees any |
| // modifications made prior to changing the cache policy. When going from uncached->cached due to |
| // the cached physmap there could be cache lines that hold stale data of the pages that were |
| // modified via an uncached mapping. As these cache lines are, by definition, clean, a |
| // clean+invalidate will simply invalidate them and not write them back, ensuring that a future |
| // access via a cached mapping sees the up to date value. |
| // Note that uncached here refers to any of the uncached policies: device, write combining, etc. |
| // Transitioning between different uncached policies does not require a cache operation for |
| // correctness, but it is also harmless and not a case we attempt to optimize for. |
| cow_pages_locked()->FinishCachePolicyTransitionLocked(); |
| |
| return ZX_OK; |
| } |
| |
| void VmObjectPaged::RangeChangeUpdateLocked(VmCowRange range, RangeChangeOp op) { |
| canary_.Assert(); |
| |
| // offsets for vmos needn't be aligned, but vmars use aligned offsets |
| uint64_t aligned_offset = ROUNDDOWN_PAGE_SIZE(range.offset); |
| uint64_t aligned_len = ROUNDUP_PAGE_SIZE(range.end()) - aligned_offset; |
| if (GetIntersect(cow_range_.offset, cow_range_.len, aligned_offset, aligned_len, &aligned_offset, |
| &aligned_len)) { |
| // Found the intersection in cow space, convert back to object space. |
| aligned_offset -= cow_range_.offset; |
| self_locked()->RangeChangeUpdateMappingsLocked(aligned_offset, aligned_len, op); |
| } |
| |
| // Propagate the change to reference children as well. This is done regardless of intersection as |
| // we may have become the holder of the reference list even if they were not originally references |
| // made against us, and so their cow views might be different. |
| for (auto& ref : reference_list_) { |
| AssertHeld(ref.lock_ref()); |
| // Use the same offset and len. References span the entirety of the parent VMO and hence share |
| // all offsets. |
| ref.RangeChangeUpdateLocked(range, op); |
| } |
| } |
| |
| void VmObjectPaged::ForwardRangeChangeUpdateLocked(uint64_t offset, uint64_t len, |
| RangeChangeOp op) { |
| canary_.Assert(); |
| |
| // Call RangeChangeUpdateLocked on the owner of the CowPages. |
| AssertHeld(cow_pages_locked()->get_paged_backlink_locked()->lock_ref()); |
| if (auto cow_range = GetCowRange(offset, len)) { |
| cow_pages_locked()->get_paged_backlink_locked()->RangeChangeUpdateLocked(*cow_range, op); |
| } |
| } |
| |
| zx_status_t VmObjectPaged::LockRange(uint64_t offset, uint64_t len, |
| zx_vmo_lock_state_t* lock_state_out) { |
| if (!is_discardable()) { |
| return ZX_ERR_NOT_SUPPORTED; |
| } |
| auto cow_range = GetCowRange(offset, len); |
| if (!cow_range) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| |
| Guard<CriticalMutex> guard{lock()}; |
| return cow_pages_locked()->LockRangeLocked(*cow_range, lock_state_out); |
| } |
| |
| zx_status_t VmObjectPaged::TryLockRange(uint64_t offset, uint64_t len) { |
| if (!is_discardable()) { |
| return ZX_ERR_NOT_SUPPORTED; |
| } |
| auto cow_range = GetCowRange(offset, len); |
| if (!cow_range) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| |
| Guard<CriticalMutex> guard{lock()}; |
| return cow_pages_locked()->TryLockRangeLocked(*cow_range); |
| } |
| |
| zx_status_t VmObjectPaged::UnlockRange(uint64_t offset, uint64_t len) { |
| if (!is_discardable()) { |
| return ZX_ERR_NOT_SUPPORTED; |
| } |
| auto cow_range = GetCowRange(offset, len); |
| if (!cow_range) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| |
| Guard<CriticalMutex> guard{lock()}; |
| return cow_pages_locked()->UnlockRangeLocked(*cow_range); |
| } |
| |
| zx_status_t VmObjectPaged::GetPage(uint64_t offset, uint pf_flags, list_node* alloc_list, |
| MultiPageRequest* page_request, vm_page_t** page, paddr_t* pa) { |
| __UNINITIALIZED VmCowPages::DeferredOps deferred(cow_pages_.get()); |
| Guard<CriticalMutex> guard{lock()}; |
| const bool write = pf_flags & VMM_PF_FLAG_WRITE; |
| zx::result<VmCowPages::LookupCursor> cursor = GetLookupCursorLocked(offset, PAGE_SIZE); |
| if (cursor.is_error()) { |
| return cursor.error_value(); |
| } |
| AssertHeld(cursor->lock_ref()); |
| // Hardware faults are considered to update access times separately, all other lookup reasons |
| // should do the default update of access time. |
| if (pf_flags & VMM_PF_FLAG_HW_FAULT) { |
| cursor->DisableMarkAccessed(); |
| } |
| if (!(pf_flags & VMM_PF_FLAG_FAULT_MASK)) { |
| vm_page_t* p = cursor->MaybePage(write); |
| if (!p) { |
| return ZX_ERR_NOT_FOUND; |
| } |
| if (page) { |
| *page = p; |
| } |
| if (pa) { |
| *pa = p->paddr(); |
| } |
| return ZX_OK; |
| } |
| auto result = cursor->RequirePage(write, PAGE_SIZE, deferred, page_request); |
| if (result.is_error()) { |
| return result.error_value(); |
| } |
| if (page) { |
| *page = result->page; |
| } |
| if (pa) { |
| *pa = result->page->paddr(); |
| } |
| return ZX_OK; |
| } |