| // Copyright 2016 The Fuchsia Authors |
| // |
| // Use of this source code is governed by a MIT-style |
| // license that can be found in the LICENSE file or at |
| // https://opensource.org/licenses/MIT |
| |
| #include <align.h> |
| #include <assert.h> |
| #include <inttypes.h> |
| #include <lib/counters.h> |
| #include <lib/fit/defer.h> |
| #include <lib/page/size.h> |
| #include <trace.h> |
| #include <zircon/errors.h> |
| #include <zircon/types.h> |
| |
| #include <fbl/alloc_checker.h> |
| #include <ktl/algorithm.h> |
| #include <ktl/iterator.h> |
| #include <ktl/utility.h> |
| #include <vm/fault.h> |
| #include <vm/physmap.h> |
| #include <vm/vm.h> |
| #include <vm/vm_address_region.h> |
| #include <vm/vm_aspace.h> |
| #include <vm/vm_object.h> |
| #include <vm/vm_object_paged.h> |
| #include <vm/vm_object_physical.h> |
| |
| #include "vm/vm_address_region.h" |
| #include "vm_priv.h" |
| |
| #include <ktl/enforce.h> |
| |
| #define LOCAL_TRACE VM_GLOBAL_TRACE(0) |
| |
| namespace { |
| |
| KCOUNTER(vm_mapping_attribution_queries, "vm.attributed_memory.mapping.queries") |
| KCOUNTER(vm_mappings_merged, "vm.aspace.mapping.merged_neighbors") |
| KCOUNTER(vm_mappings_protect_no_write, "vm.aspace.mapping.protect_without_write") |
| |
| } // namespace |
| |
| // Helper class for managing the logic of skipping certain unmap operations for in progress faults. |
| // This is expected to be stack allocated under the object lock and the object lock must not be |
| // dropped over its lifetime. |
| // Creating this object creates a contract where the caller will either update the mapping for this |
| // location and call Success, or this object will automatically unmap the location if necessary. |
| class VmMapping::CurrentlyFaulting { |
| public: |
| CurrentlyFaulting(VmMapping* mapping, uint64_t object_offset, uint64_t len) |
| TA_REQ(mapping->object_->lock()) |
| : mapping_(mapping), object_offset_(object_offset), len_(len) { |
| DEBUG_ASSERT(mapping->currently_faulting_ == nullptr); |
| // CurrentlyFaulting is typically allocated on the stack and GCCs diagnostics can get confused |
| // and fail to realize that the destructor will clear the pointing, causing GCC to believe that |
| // there might be a dangling pointer. |
| #if !defined(__clang__) |
| #pragma GCC diagnostic push |
| #pragma GCC diagnostic ignored "-Wdangling-pointer" |
| #endif |
| mapping->currently_faulting_ = this; |
| #if !defined(__clang__) |
| #pragma GCC diagnostic pop |
| #endif |
| } |
| ~CurrentlyFaulting() { |
| // If the caller did not call Success, and an unmap was skipped, then we must unmap the range |
| // ourselves. We only do the unmap here if a prior unmap was skipped to avoid needless unmaps |
| // due to transient errors such as needing to wait on a page request. |
| if (state_ == State::UnmapSkipped) { |
| vaddr_t base; |
| size_t new_len; |
| bool valid_range = mapping_->ObjectRangeToVaddrRange(object_offset_, len_, &base, &new_len); |
| ASSERT(valid_range); |
| ASSERT(new_len == len_); |
| zx_status_t status = mapping_->aspace_->arch_aspace().Unmap( |
| base, new_len / kPageSize, mapping_->aspace_->EnlargeArchUnmap()); |
| ASSERT(status == ZX_OK); |
| } |
| mapping_->currently_faulting_ = nullptr; |
| } |
| |
| // Called to say that the given range needs to be unmapped. This returns true if updating the |
| // range will be handled by the faulting thread and that the unmap can therefore be skipped. |
| // Returns false if the caller should unmap themselves. |
| bool UnmapRange(uint64_t object_offset, uint64_t len) { |
| DEBUG_ASSERT(state_ != State::Completed); |
| if (Intersects(object_offset, len, object_offset_, len_)) { |
| state_ = State::UnmapSkipped; |
| return true; |
| } |
| return false; |
| } |
| |
| // Called to indicate that the mapping for the fault location has been updated successfully. This |
| // acts to cancel the unmap that would otherwise happen when this object goes out of scope. |
| void MappingUpdated() { state_ = State::Completed; } |
| |
| DISALLOW_COPY_ASSIGN_AND_MOVE(CurrentlyFaulting); |
| |
| private: |
| // Reference back to the original mapping. |
| VmMapping* mapping_; |
| // The offset, in object space, of the page fault. |
| uint64_t object_offset_; |
| uint64_t len_; |
| enum class State { |
| NoUnmapNeeded, |
| UnmapSkipped, |
| Completed, |
| }; |
| State state_ = State::NoUnmapNeeded; |
| }; |
| |
| VmMapping::VmMapping(VmAddressRegion& parent, bool private_clone, vaddr_t base, size_t size, |
| uint32_t vmar_flags, fbl::RefPtr<VmObject> vmo, uint64_t vmo_offset, |
| MappingProtectionRanges&& ranges, Mergeable mergeable) |
| : VmAddressRegionOrMapping(base, size, vmar_flags, parent.aspace_.get(), &parent, true), |
| mergeable_(mergeable), |
| private_clone_(private_clone), |
| object_(ktl::move(vmo)), |
| object_offset_(vmo_offset), |
| protection_ranges_(ktl::move(ranges)) { |
| LTRACEF("%p aspace %p base %#" PRIxPTR " size %#zx offset %#" PRIx64 "\n", this, aspace_.get(), |
| base_, size_, vmo_offset); |
| } |
| |
| VmMapping::VmMapping(VmAddressRegion& parent, bool private_clone, vaddr_t base, size_t size, |
| uint32_t vmar_flags, fbl::RefPtr<VmObject> vmo, uint64_t vmo_offset, |
| uint arch_mmu_flags, Mergeable mergeable) |
| : VmMapping(parent, private_clone, base, size, vmar_flags, vmo, vmo_offset, |
| MappingProtectionRanges(arch_mmu_flags), mergeable) {} |
| |
| VmMapping::~VmMapping() { |
| canary_.Assert(); |
| LTRACEF("%p aspace %p base %#" PRIxPTR " size %#zx\n", this, aspace_.get(), base_, size_); |
| } |
| |
| fbl::RefPtr<VmObject> VmMapping::vmo() const { |
| Guard<CriticalMutex> guard{lock()}; |
| return vmo_locked(); |
| } |
| |
| VmMapping::AttributionCounts VmMapping::GetAttributedMemoryLocked( |
| Guard<CriticalMutex>& guard) const { |
| canary_.Assert(); |
| |
| if (!IsAliveLocked()) { |
| return AttributionCounts{}; |
| } |
| |
| vm_mapping_attribution_queries.Add(1); |
| |
| fbl::RefPtr<VmObject> vmo = object_; |
| const uint64_t object_offset = object_offset_; |
| VmMapping::AttributionCounts page_counts; |
| guard.CallUnlocked( |
| [&]() { page_counts = vmo->GetAttributedMemoryInRange(object_offset, size_); }); |
| return page_counts; |
| } |
| |
| VmMapping::AttributionCounts VmMapping::GetAttributedMemory() const { |
| canary_.Assert(); |
| |
| Guard<CriticalMutex> guard{lock()}; |
| return GetAttributedMemoryLocked(guard); |
| } |
| |
| void VmMapping::DumpLocked(uint depth, bool verbose) const { |
| canary_.Assert(); |
| for (uint i = 0; i < depth; ++i) { |
| printf(" "); |
| } |
| char vmo_name[32]; |
| object_->get_name(vmo_name, sizeof(vmo_name)); |
| printf("map %p [%#" PRIxPTR " %#" PRIxPTR "] sz %#zx state %d mergeable %s\n", this, base_, |
| base_ + size_ - 1, size_, (int)state_locked(), |
| mergeable_ == Mergeable::YES ? "true" : "false"); |
| EnumerateProtectionRangesLocked(base_, size_, [depth](vaddr_t base, size_t len, uint mmu_flags) { |
| for (uint i = 0; i < depth + 1; ++i) { |
| printf(" "); |
| } |
| printf(" [%#" PRIxPTR " %#" PRIxPTR "] mmufl %#x\n", base, base + len - 1, mmu_flags); |
| return ZX_ERR_NEXT; |
| }); |
| for (uint i = 0; i < depth + 1; ++i) { |
| printf(" "); |
| } |
| AttributionCounts counts = object_->GetAttributedMemoryInRange(object_offset_, size_); |
| printf("vmo %p/k%" PRIu64 " off %#" PRIx64 " bytes (%zu/%zu) ref %d '%s'\n", object_.get(), |
| object_->user_id(), object_offset_, counts.uncompressed_bytes, counts.compressed_bytes, |
| ref_count_debug(), vmo_name); |
| if (verbose) { |
| object_->Dump(depth + 1, false); |
| } |
| } |
| |
| using ArchUnmapOptions = ArchVmAspaceInterface::ArchUnmapOptions; |
| |
| // static |
| zx_status_t VmMapping::ProtectOrUnmap(const fbl::RefPtr<VmAspace>& aspace, vaddr_t base, |
| size_t size, uint new_arch_mmu_flags) { |
| // This can never be used to set a WRITE permission since it does not ask the underlying VMO to |
| // perform the copy-on-write step. The underlying VMO might also support dirty tracking, which |
| // requires write permission faults in order to track pages as dirty when written. |
| ASSERT(!(new_arch_mmu_flags & ARCH_MMU_FLAG_PERM_WRITE)); |
| // If not removing all permissions do the protect, otherwise skip straight to unmapping the entire |
| // region. |
| if ((new_arch_mmu_flags & ARCH_MMU_FLAG_PERM_RWX_MASK) != 0) { |
| zx_status_t status = aspace->arch_aspace().Protect( |
| base, size / kPageSize, new_arch_mmu_flags, |
| aspace->can_enlarge_arch_unmap() ? ArchUnmapOptions::Enlarge : ArchUnmapOptions::None); |
| // If the unmap failed and we are allowed to unmap extra portions of the aspace then fall |
| // through and unmap, otherwise return with whatever the status is. |
| if (likely(status == ZX_OK) || !aspace->can_enlarge_arch_unmap()) { |
| return status; |
| } |
| } |
| |
| return aspace->arch_aspace().Unmap(base, size / kPageSize, aspace->EnlargeArchUnmap()); |
| } |
| |
| zx_status_t VmMapping::ProtectLocked(vaddr_t base, size_t size, uint new_arch_mmu_flags) { |
| // Assert a few things that should already have been checked by the caller. |
| DEBUG_ASSERT(size != 0 && IsPageRounded(base) && IsPageRounded(size)); |
| DEBUG_ASSERT(!(new_arch_mmu_flags & ARCH_MMU_FLAG_CACHE_MASK)); |
| DEBUG_ASSERT(is_valid_mapping_flags(new_arch_mmu_flags)); |
| |
| DEBUG_ASSERT(object_); |
| // grab the lock for the vmo |
| Guard<CriticalMutex> guard{object_->lock()}; |
| |
| // Persist our current caching mode. Every protect region will have the same caching mode so we |
| // can acquire this from any region. |
| new_arch_mmu_flags |= (protection_ranges_.FirstRegionMmuFlags() & ARCH_MMU_FLAG_CACHE_MASK); |
| |
| // This will get called by UpdateProtectionRange below for every existing unique protection range |
| // that gets changed and allows us to fine tune the protect action based on the previous flags. |
| auto protect_callback = [new_arch_mmu_flags, this](vaddr_t base, size_t size, |
| uint old_arch_mmu_flags) { |
| // Perform an early return if the new and old flags are the same, as there's nothing to be done. |
| if (new_arch_mmu_flags == old_arch_mmu_flags) { |
| return; |
| } |
| |
| uint flags = new_arch_mmu_flags; |
| // Check if the new flags have the write permission. This is problematic as we cannot just |
| // change any existing hardware mappings to have the write permission, as any individual mapping |
| // may be the result of a read fault and still need to have a copy-on-write step performed. This |
| // could also map a dirty tracked VMO which requires write permission faults to track pages as |
| // dirty when written. |
| if (new_arch_mmu_flags & ARCH_MMU_FLAG_PERM_WRITE) { |
| // Whatever happens, we're not going to be protecting the arch aspace to have write mappings, |
| // so this has to be a user aspace so that we can lazily take write faults in the future. |
| ASSERT(aspace_->is_user() || aspace_->is_guest_physical()); |
| flags &= ~ARCH_MMU_FLAG_PERM_WRITE; |
| vm_mappings_protect_no_write.Add(1); |
| // If the new flags without write permission are the same as the old flags, then skip the |
| // protect step since it will be a no-op. |
| if (flags == old_arch_mmu_flags) { |
| return; |
| } |
| } |
| |
| zx_status_t status = ProtectOrUnmap(aspace_, base, size, flags); |
| // If the protect failed then we do not have sufficient information left to rollback in order to |
| // return an error, nor can we claim success, so require the protect to have succeeded to |
| // continue. |
| ASSERT(status == ZX_OK); |
| }; |
| |
| zx_status_t status = protection_ranges_.UpdateProtectionRange( |
| base_, size_, base, size, new_arch_mmu_flags, protect_callback); |
| ASSERT(status == ZX_OK || status == ZX_ERR_NO_MEMORY); |
| return status; |
| } |
| |
| zx_status_t VmMapping::UnmapLocked(vaddr_t base, size_t size) { |
| canary_.Assert(); |
| DEBUG_ASSERT(size != 0 && IsPageRounded(size) && IsPageRounded(base)); |
| DEBUG_ASSERT(base >= base_ && base - base_ < size_); |
| DEBUG_ASSERT(size_ - (base - base_) >= size); |
| DEBUG_ASSERT(parent_); |
| |
| if (state_ != LifeCycleState::ALIVE) { |
| return ZX_ERR_BAD_STATE; |
| } |
| |
| AssertHeld(parent_->lock_ref()); |
| AssertHeld(parent_->region_lock_ref()); |
| |
| // Should never be unmapping everything, otherwise should destroy. |
| DEBUG_ASSERT(base != base_ || size != size_); |
| |
| LTRACEF("%p\n", this); |
| |
| // First create any new mapping. One or two might be required depending on whether unmapping from |
| // an end or the middle. |
| fbl::RefPtr<VmMapping> left, right; |
| if (base_ != base) { |
| fbl::AllocChecker ac; |
| left = fbl::AdoptRef(new (&ac) VmMapping(*parent_, private_clone_, base_, base - base_, flags_, |
| object_, object_offset_, MappingProtectionRanges(0), |
| Mergeable::YES)); |
| if (!ac.check()) { |
| return ZX_ERR_NO_MEMORY; |
| AssertHeld(parent_->region_lock_ref()); |
| } |
| } |
| if (base + size != base_ + size_) { |
| fbl::AllocChecker ac; |
| const vaddr_t offset = base + size - base_; |
| right = fbl::AdoptRef(new (&ac) VmMapping( |
| *parent_, private_clone_, base_ + offset, size_ - offset, flags_, object_, |
| object_offset_ + offset, MappingProtectionRanges(0), Mergeable::YES)); |
| if (!ac.check()) { |
| return ZX_ERR_NO_MEMORY; |
| } |
| } |
| |
| // Grab the lock for the vmo. This is acquired here so that it is held continuously over both the |
| // architectural unmap and removing the current mapping from the VMO. |
| DEBUG_ASSERT(object_); |
| Guard<CriticalMutex> guard{object_->lock()}; |
| |
| zx_status_t status = |
| aspace_->arch_aspace().Unmap(base, size / kPageSize, aspace_->EnlargeArchUnmap()); |
| ASSERT(status == ZX_OK); |
| |
| const MemoryPriority old_priority = memory_priority_; |
| auto set_priority = |
| [old_priority](VmMapping& self) TA_REQ(self.lock()) TA_REQ(self.object_->lock()) { |
| if (old_priority == VmAddressRegion::MemoryPriority::HIGH) { |
| self.SetMemoryPriorityHighAlreadyPositiveLockedObject</*SplitOnUnmap=*/true>(); |
| } else { |
| DEBUG_ASSERT(old_priority == VmAddressRegion::MemoryPriority::DEFAULT); |
| self.SetMemoryPriorityDefaultLockedObject</*SplitOnUnmap=*/true>(); |
| } |
| }; |
| |
| // Split the protection_ranges_ from this mapping into the new mapping(s). This has be done after |
| // the mapping construction as this step is destructive and hard to rollback. |
| // |
| // Need to set memory priorities before we call DestroyLockedObject. If we have |
| // MemoryPriority::HIGH, then we need to pass that on to left and right before object_ and |
| // aspace_ suffer any dynamic reclamation. |
| if (right) { |
| AssertHeld(right->lock_ref()); |
| AssertHeld(right->object_lock_ref()); |
| MappingProtectionRanges right_prot = protection_ranges_.SplitAt(base + size); |
| right->protection_ranges_ = ktl::move(right_prot); |
| set_priority(*right); |
| } |
| if (left) { |
| AssertHeld(left->lock_ref()); |
| AssertHeld(left->object_lock_ref()); |
| protection_ranges_.DiscardAbove(base); |
| left->protection_ranges_ = ktl::move(protection_ranges_); |
| set_priority(*left); |
| } |
| |
| // Now finish destroying this mapping. |
| status = DestroyLockedObject(false); |
| ASSERT(status == ZX_OK); |
| |
| // Install the new mappings. |
| auto finish_mapping = [](fbl::RefPtr<VmMapping>& mapping) { |
| if (mapping) { |
| AssertHeld(mapping->lock_ref()); |
| AssertHeld(mapping->object_lock_ref()); |
| AssertHeld(mapping->region_lock_ref()); |
| mapping->ActivateLocked(); |
| } |
| }; |
| finish_mapping(left); |
| finish_mapping(right); |
| return ZX_OK; |
| } |
| |
| bool VmMapping::ObjectRangeToVaddrRange(uint64_t offset, uint64_t len, vaddr_t* base, |
| uint64_t* virtual_len) const { |
| DEBUG_ASSERT(IsPageRounded(offset)); |
| DEBUG_ASSERT(IsPageRounded(len)); |
| DEBUG_ASSERT(base); |
| DEBUG_ASSERT(virtual_len); |
| |
| // Zero sized ranges are considered to have no overlap. |
| if (len == 0) { |
| *base = 0; |
| *virtual_len = 0; |
| return false; |
| } |
| |
| // compute the intersection of the passed in vmo range and our mapping |
| uint64_t offset_new; |
| if (!GetIntersect(object_offset_, static_cast<uint64_t>(size()), offset, len, &offset_new, |
| virtual_len)) { |
| return false; |
| } |
| |
| DEBUG_ASSERT(*virtual_len > 0 && *virtual_len <= SIZE_MAX); |
| DEBUG_ASSERT(offset_new >= object_offset_); |
| |
| LTRACEF("intersection offset %#" PRIx64 ", len %#" PRIx64 "\n", offset_new, *virtual_len); |
| |
| // make sure the base + offset is within our address space |
| // should be, according to the range stored in base_ + size_ |
| bool overflowed = add_overflow(this->base(), offset_new - object_offset_, base); |
| ASSERT(!overflowed); |
| |
| // make sure we're only operating within our window |
| ASSERT(*base >= this->base()); |
| ASSERT((*base + *virtual_len - 1) <= (this->base() + size() - 1)); |
| |
| return true; |
| } |
| |
| void VmMapping::AspaceUnmapLockedObject(uint64_t offset, uint64_t len, UnmapOptions options) const { |
| canary_.Assert(); |
| |
| // NOTE: must be acquired with the vmo lock held, but doesn't need to take |
| // the address space lock, since it will not manipulate its location in the |
| // vmar tree. However, it must be held in the ALIVE state across this call. |
| // |
| // Avoids a race with DestroyLocked() since it removes ourself from the VMO's |
| // mapping list with the VMO lock held before dropping this state to DEAD. The |
| // VMO cant call back to us once we're out of their list. |
| DEBUG_ASSERT(get_state_locked_object() == LifeCycleState::ALIVE); |
| |
| // |object_| itself is not accessed in this method, and we do not hold the correct lock for it, |
| // but we know the object_->lock() is held and so therefore object_ is valid and will not be |
| // modified. Therefore it's correct to read object_ here for the purposes of an assert, but cannot |
| // be expressed nicely with regular annotations. |
| [&]() TA_NO_THREAD_SAFETY_ANALYSIS { DEBUG_ASSERT(object_); }(); |
| |
| // In the case of unmapping known instances of the zero page check if this range intersects with |
| // an in progress fault. If it does we can skip the unmap with the knowledge that the mapping will |
| // be updated later. This is safe since the zero page is, by definition, only mapped read only, |
| // and is never modified so delaying the update of the mapping cannot cause either any users to |
| // see incorrect data, or users to be able to modify an old mapping. |
| if ((options & UnmapOptions::OnlyHasZeroPages) && currently_faulting_ && |
| currently_faulting_->UnmapRange(offset, len)) { |
| return; |
| } |
| |
| LTRACEF("region %p obj_offset %#" PRIx64 " size %zu, offset %#" PRIx64 " len %#" PRIx64 "\n", |
| this, object_offset_, size_, offset, len); |
| |
| // See if there's an intersect. |
| vaddr_t base; |
| uint64_t new_len; |
| if (!ObjectRangeToVaddrRange(offset, len, &base, &new_len)) { |
| return; |
| } |
| |
| // If this is a kernel mapping then we should not be removing mappings out of the arch aspace, |
| // unless this mapping has explicitly opted out of this check. |
| DEBUG_ASSERT(aspace_->is_user() || aspace_->is_guest_physical() || |
| flags_ & VMAR_FLAG_DEBUG_DYNAMIC_KERNEL_MAPPING); |
| |
| auto aspace_op = aspace_->EnlargeArchUnmap(); |
| if (options & UnmapOptions::Harvest) { |
| aspace_op |= ArchUnmapOptions::Harvest; |
| } |
| |
| zx_status_t status = aspace_->arch_aspace().Unmap(base, new_len / kPageSize, aspace_op); |
| ASSERT(status == ZX_OK); |
| } |
| |
| void VmMapping::AspaceRemoveWriteLockedObject(uint64_t offset, uint64_t len) const { |
| LTRACEF("region %p obj_offset %#" PRIx64 " size %zu, offset %#" PRIx64 " len %#" PRIx64 "\n", |
| this, object_offset_, size_, offset, len); |
| |
| canary_.Assert(); |
| |
| // NOTE: must be acquired with the vmo lock held, but doesn't need to take |
| // the address space lock, since it will not manipulate its location in the |
| // vmar tree. However, it must be held in the ALIVE state across this call. |
| // |
| // Avoids a race with DestroyLocked() since it removes ourself from the VMO's |
| // mapping list with the VMO lock held before dropping this state to DEAD. The |
| // VMO cant call back to us once we're out of their list. |
| DEBUG_ASSERT(get_state_locked_object() == LifeCycleState::ALIVE); |
| |
| // |object_| itself is not accessed in this method, and we do not hold the correct lock for it, |
| // but we know the object_->lock() is held and so therefore object_ is valid and will not be |
| // modified. Therefore it's correct to read object_ here for the purposes of an assert, but cannot |
| // be expressed nicely with regular annotations. |
| [&]() TA_NO_THREAD_SAFETY_ANALYSIS { DEBUG_ASSERT(object_); }(); |
| |
| // If this doesn't support writing then nothing to be done, as we know we have no write mappings. |
| if (!(flags_ & VMAR_FLAG_CAN_MAP_WRITE)) { |
| return; |
| } |
| |
| // See if there's an intersect. |
| vaddr_t base; |
| uint64_t new_len; |
| if (!ObjectRangeToVaddrRange(offset, len, &base, &new_len)) { |
| return; |
| } |
| |
| // If this is a kernel mapping then we should not be modify mappings in the arch aspace, |
| // unless this mapping has explicitly opted out of this check. |
| DEBUG_ASSERT_MSG(aspace_->is_user() || aspace_->is_guest_physical() || |
| flags_ & VMAR_FLAG_DEBUG_DYNAMIC_KERNEL_MAPPING, |
| "region %p obj_offset %#" PRIx64 " size %zu, offset %#" PRIx64 " len %#" PRIx64 |
| "\n", |
| this, object_offset_, size(), offset, len); |
| |
| zx_status_t status = ProtectRangesLockedObject().EnumerateProtectionRanges( |
| this->base(), size(), base, new_len, |
| [this](vaddr_t region_base, size_t region_len, uint mmu_flags) { |
| // If this range doesn't currently support being writable then we can skip. |
| if (!(mmu_flags & ARCH_MMU_FLAG_PERM_WRITE)) { |
| return ZX_ERR_NEXT; |
| } |
| |
| // Build new mmu flags without writing. |
| mmu_flags &= ~(ARCH_MMU_FLAG_PERM_WRITE); |
| |
| zx_status_t result = ProtectOrUnmap(aspace_, region_base, region_len, mmu_flags); |
| if (result == ZX_OK) { |
| return ZX_ERR_NEXT; |
| } |
| return result; |
| }); |
| ASSERT(status == ZX_OK); |
| } |
| |
| void VmMapping::AspaceDebugUnpinLockedObject(uint64_t offset, uint64_t len) const { |
| LTRACEF("region %p obj_offset %#" PRIx64 " size %zu, offset %#" PRIx64 " len %#" PRIx64 "\n", |
| this, object_offset_, size_, offset, len); |
| |
| canary_.Assert(); |
| |
| // NOTE: must be acquired with the vmo lock held, but doesn't need to take |
| // the address space lock, since it will not manipulate its location in the |
| // vmar tree. However, it must be held in the ALIVE state across this call. |
| // |
| // Avoids a race with DestroyLocked() since it removes ourself from the VMO's |
| // mapping list with the VMO lock held before dropping this state to DEAD. The |
| // VMO cant call back to us once we're out of their list. |
| DEBUG_ASSERT(get_state_locked_object() == LifeCycleState::ALIVE); |
| |
| // See if there's an intersect. |
| vaddr_t base; |
| uint64_t new_len; |
| if (!ObjectRangeToVaddrRange(offset, len, &base, &new_len)) { |
| return; |
| } |
| |
| // This unpin is not allowed for kernel mappings, unless the mapping has specifically opted out of |
| // this debug check due to it performing its own dynamic management. |
| DEBUG_ASSERT(aspace_->is_user() || aspace_->is_guest_physical() || |
| flags_ & VMAR_FLAG_DEBUG_DYNAMIC_KERNEL_MAPPING); |
| } |
| |
| namespace { |
| |
| // Helper class for batching installing mappings into the arch aspace. The mappings object lock must |
| // be held over the entirety of the lifetime of this object, without ever being released. |
| template <size_t NumPages> |
| class VmMappingCoalescer { |
| public: |
| VmMappingCoalescer(VmMapping* mapping, vaddr_t base, uint mmu_flags, |
| ArchVmAspace::ExistingEntryAction existing_entry_action) |
| TA_REQ(mapping->object_lock()); |
| ~VmMappingCoalescer(); |
| |
| // Add a page to the mapping run. |
| zx_status_t Append(vaddr_t vaddr, paddr_t paddr) { |
| // If this isn't the expected vaddr, flush the run we have first. |
| if (!can_append(vaddr)) { |
| zx_status_t status = Flush(); |
| if (status != ZX_OK) { |
| return status; |
| } |
| base_ = vaddr; |
| } |
| phys_[count_] = paddr; |
| ++count_; |
| return ZX_OK; |
| } |
| |
| zx_status_t AppendOrAdjustMapping(vaddr_t vaddr, paddr_t paddr, uint mmu_flags) { |
| // If this isn't the expected vaddr or mmu_flags have changed, flush the run we have first. |
| if (!can_append(vaddr) || mmu_flags != mmu_flags_) { |
| zx_status_t status = Flush(); |
| if (status != ZX_OK) { |
| return status; |
| } |
| base_ = vaddr; |
| mmu_flags_ = mmu_flags; |
| } |
| |
| phys_[count_] = paddr; |
| ++count_; |
| return ZX_OK; |
| } |
| |
| // How much space remains in the phys_ array, starting from vaddr, that can be used to |
| // opportunistically map additional pages. |
| size_t ExtraPageCapacityFrom(vaddr_t vaddr) { |
| // vaddr must be appendable & the coalescer can't be empty. |
| return (can_append(vaddr) && count_ != 0) ? NumPages - count_ : 0; |
| } |
| |
| // Functions for the user to manually manage the pages array. It is up to the user to manage the |
| // page count and ensure the coalescer doesn't overflow, maintains the correct page count and that |
| // the pages are contiguous. |
| paddr_t* GetNextPageSlot() { return &phys_[count_]; } |
| |
| uint GetMmuFlags() { return mmu_flags_; } |
| |
| void IncrementCount(size_t i) { count_ += i; } |
| |
| // Submit any outstanding mappings to the MMU. |
| zx_status_t Flush(); |
| |
| size_t TotalMapped() { return total_mapped_; } |
| |
| // Drop the current outstanding mappings without sending them to the MMU. |
| void Drop() { count_ = 0; } |
| |
| private: |
| // Vaddr can be appended if it's the next free slot and the coalescer isn't full. |
| bool can_append(vaddr_t vaddr) { |
| return count_ < ktl::size(phys_) && vaddr == base_ + count_ * kPageSize; |
| } |
| |
| DISALLOW_COPY_ASSIGN_AND_MOVE(VmMappingCoalescer); |
| |
| VmMapping* mapping_; |
| vaddr_t base_; |
| paddr_t phys_[NumPages]; |
| size_t count_; |
| size_t total_mapped_ = 0; |
| uint mmu_flags_; |
| const ArchVmAspace::ExistingEntryAction existing_entry_action_; |
| }; |
| |
| template <size_t NumPages> |
| VmMappingCoalescer<NumPages>::VmMappingCoalescer( |
| VmMapping* mapping, vaddr_t base, uint mmu_flags, |
| ArchVmAspace::ExistingEntryAction existing_entry_action) |
| : mapping_(mapping), |
| base_(base), |
| count_(0), |
| mmu_flags_(mmu_flags), |
| existing_entry_action_(existing_entry_action) { |
| // Mapping is only valid if there is at least some access in the flags. |
| DEBUG_ASSERT(mmu_flags & ARCH_MMU_FLAG_PERM_RWX_MASK); |
| } |
| |
| template <size_t NumPages> |
| VmMappingCoalescer<NumPages>::~VmMappingCoalescer() { |
| // Make sure no outstanding mappings. |
| DEBUG_ASSERT(count_ == 0); |
| } |
| |
| template <size_t NumPages> |
| zx_status_t VmMappingCoalescer<NumPages>::Flush() { |
| if (count_ == 0) { |
| return ZX_OK; |
| } |
| |
| VM_KTRACE_DURATION(2, "map_page", ("va", ktrace::Pointer{base_}), ("count", count_), |
| ("mmu_flags", mmu_flags_)); |
| |
| // Assert that we're not accidentally mapping the zero page writable. Unless called from a kernel |
| // aspace, as the zero page can be mapped writeable from the kernel aspace in mexec. |
| DEBUG_ASSERT( |
| !(mmu_flags_ & ARCH_MMU_FLAG_PERM_WRITE) || |
| ktl::all_of(phys_, &phys_[count_], [](paddr_t p) { return p != vm_get_zero_page_paddr(); }) || |
| !mapping_->aspace()->is_user()); |
| |
| zx_status_t ret = mapping_->aspace()->arch_aspace().Map(base_, phys_, count_, mmu_flags_, |
| existing_entry_action_); |
| if (ret != ZX_OK) { |
| TRACEF("error %d mapping %zu pages starting at va %#" PRIxPTR "\n", ret, count_, base_); |
| } |
| base_ += count_ * kPageSize; |
| total_mapped_ += count_; |
| count_ = 0; |
| return ret; |
| } |
| |
| } // namespace |
| |
| zx_status_t VmMapping::MapRange(size_t offset, size_t len, bool commit, bool ignore_existing) { |
| Guard<CriticalMutex> aspace_guard{lock()}; |
| canary_.Assert(); |
| |
| len = RoundUpPageSize(len); |
| if (len == 0) { |
| return ZX_ERR_INVALID_ARGS; |
| } |
| |
| if (state_locked() != LifeCycleState::ALIVE) { |
| return ZX_ERR_BAD_STATE; |
| } |
| |
| LTRACEF("region %p, offset %#zx, size %#zx, commit %d\n", this, offset, len, commit); |
| |
| DEBUG_ASSERT(object_); |
| if (!IsPageRounded(offset) || !is_in_range(base_ + offset, len)) { |
| return ZX_ERR_INVALID_ARGS; |
| } |
| |
| // If this is a kernel mapping then validate that all pages being mapped are currently pinned, |
| // ensuring that they cannot be taken away for any reason, unless the mapping has specifically |
| // opted out of this debug check due to it performing its own dynamic management. |
| DEBUG_ASSERT(aspace_->is_user() || aspace_->is_guest_physical() || |
| (flags_ & VMAR_FLAG_DEBUG_DYNAMIC_KERNEL_MAPPING) || |
| object_->DebugIsRangePinned(object_offset_ + offset, len)); |
| |
| // Cache whether the object is dirty tracked, we need to know this when computing mmu flags later. |
| const bool dirty_tracked = object_->is_dirty_tracked(); |
| |
| // The region to map could have multiple different current arch mmu flags, so we need to iterate |
| // over them to ensure we install mappings with the correct permissions. |
| return EnumerateProtectionRangesLocked( |
| base_ + offset, len, |
| [this, commit, dirty_tracked, ignore_existing](vaddr_t base, size_t len, uint mmu_flags) { |
| AssertHeld(lock_ref()); |
| |
| // Remove the write permission if this maps a vmo that supports dirty tracking, in order to |
| // trigger write permission faults when writes occur, enabling us to track when pages are |
| // dirtied. |
| if (dirty_tracked) { |
| mmu_flags &= ~ARCH_MMU_FLAG_PERM_WRITE; |
| } |
| |
| // If there are no access permissions on this region then mapping has no effect, so skip. |
| if (!(mmu_flags & ARCH_MMU_FLAG_PERM_RWX_MASK)) { |
| return ZX_ERR_NEXT; |
| } |
| |
| // In the scenario where we are committing, and calling RequireOwnedPage, we are supposed to |
| // pass in a non-null LazyPageRequest. Technically we could get away with not passing in a |
| // PageRequest since: |
| // * Only internal kernel VMOs will have the 'commit' flag passed in for their mappings |
| // * Only pager backed VMOs or VMOs that support delayed memory allocations need to fill |
| // out a PageRequest |
| // * Internal kernel VMOs are never pager backed or have the delayed memory allocation flag |
| // set. |
| // However, should these assumptions ever get violated it's better to catch this gracefully |
| // than have RequireOwnedPage error/crash internally, and it costs nothing to create and |
| // pass in. |
| __UNINITIALIZED MultiPageRequest page_request; |
| |
| const uint64_t map_offset = base - base_; |
| const uint64_t vmo_offset = object_offset_ + map_offset; |
| if (VmObjectPaged* paged = DownCastVmObject<VmObjectPaged>(object_.get()); likely(paged)) { |
| // grab the lock for the vmo |
| __UNINITIALIZED VmCowPages::DeferredOps deferred(paged->MakeDeferredOps()); |
| Guard<CriticalMutex> guard{AssertOrderedAliasedLock, paged->lock(), object_->lock(), |
| paged->lock_order()}; |
| |
| // Trim our range to the current VMO size. Our mapping might exceed the VMO in the case |
| // where the VMO is resizable, and this should not be considered an error. |
| len = TrimmedObjectRangeLocked(map_offset, len); |
| if (len == 0) { |
| return ZX_ERR_STOP; |
| } |
| |
| VmMappingCoalescer<16> coalescer(this, base, mmu_flags, |
| ignore_existing |
| ? ArchVmAspace::ExistingEntryAction::Skip |
| : ArchVmAspace::ExistingEntryAction::Error); |
| |
| const bool writing = mmu_flags & ARCH_MMU_FLAG_PERM_WRITE; |
| __UNINITIALIZED auto cursor = paged->GetLookupCursorLocked(vmo_offset, len); |
| if (cursor.is_error()) { |
| return cursor.error_value(); |
| } |
| // Do not consider pages touched when mapping in, if they are actually touched they will |
| // get an accessed bit set in the hardware. |
| cursor->DisableMarkAccessed(); |
| AssertHeld(cursor->lock_ref()); |
| for (uint64_t off = 0; off < len; off += kPageSize) { |
| vm_page_t* page = nullptr; |
| if (commit) { |
| __UNINITIALIZED zx::result<VmCowPages::LookupCursor::RequireResult> result = |
| cursor->RequireOwnedPage(writing, 1, deferred, &page_request); |
| if (result.is_error()) { |
| zx_status_t status = result.error_value(); |
| // As per the comment above page_request definition, there should never be commit |
| // + pager backed VMO and so we should never end up with a PageRequest needing to be |
| // waited on. |
| ASSERT(status != ZX_ERR_SHOULD_WAIT); |
| // fail when we can't commit every requested page |
| coalescer.Drop(); |
| return status; |
| } |
| page = result->page; |
| } else { |
| // Not committing so get a page if one exists. This increments the cursor, returning |
| // nullptr if no page. |
| page = cursor->MaybePage(writing); |
| // This page was not present and if we are in a run of absent pages we would like to |
| // efficiently skip them, instead of querying each virtual address individually. Due |
| // to the assumptions of the cursor, we cannot call SkipMissingPages if we had just |
| // requested the last page in the range of the cursor. |
| if (!page && off + kPageSize < len) { |
| // Increment |off| for the any pages we skip and let the original page from |
| // MaybePage get incremented on the way around the loop before the range gets |
| // checked. |
| off += cursor->SkipMissingPages() * kPageSize; |
| } |
| } |
| if (page) { |
| zx_status_t status = coalescer.Append(base + off, page->paddr()); |
| if (status != ZX_OK) { |
| return status; |
| } |
| } |
| } |
| zx_status_t status = coalescer.Flush(); |
| return status == ZX_OK ? ZX_ERR_NEXT : status; |
| } else if (VmObjectPhysical* phys = DownCastVmObject<VmObjectPhysical>(object_.get()); |
| phys) { |
| // grab the lock for the vmo |
| Guard<CriticalMutex> object_guard{AliasedLock, phys->lock(), object_->lock()}; |
| // Physical VMOs are never resizable, so do not need to worry about trimming the range. |
| DEBUG_ASSERT(!phys->is_resizable()); |
| VmMappingCoalescer<16> coalescer(this, base, mmu_flags, |
| ignore_existing |
| ? ArchVmAspace::ExistingEntryAction::Skip |
| : ArchVmAspace::ExistingEntryAction::Error); |
| |
| // Physical VMOs are always allocated and contiguous, just need to get the paddr. |
| paddr_t phys_base = 0; |
| zx_status_t status = phys->LookupContiguousLocked(vmo_offset, len, &phys_base); |
| ASSERT(status == ZX_OK); |
| |
| for (size_t offset = 0; offset < len; offset += kPageSize) { |
| status = coalescer.Append(base + offset, phys_base + offset); |
| if (status != ZX_OK) { |
| return status; |
| } |
| } |
| status = coalescer.Flush(); |
| return status == ZX_OK ? ZX_ERR_NEXT : status; |
| } else { |
| panic("VmObject should be paged or physical"); |
| return ZX_ERR_INTERNAL; |
| } |
| }); |
| } |
| |
| zx_status_t VmMapping::DecommitRange(size_t offset, size_t len) { |
| canary_.Assert(); |
| LTRACEF("%p [%#zx+%#zx], offset %#zx, len %#zx\n", this, base_, size_, offset, len); |
| |
| Guard<CriticalMutex> guard{lock()}; |
| if (state_locked() != LifeCycleState::ALIVE) { |
| return ZX_ERR_BAD_STATE; |
| } |
| if (offset + len < offset || offset + len > size_) { |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| // VmObject::DecommitRange will typically call back into our instance's |
| // VmMapping::AspaceUnmapLockedObject. |
| return object_->DecommitRange(object_offset_ + offset, len); |
| } |
| |
| zx_status_t VmMapping::DestroyLocked() { |
| canary_.Assert(); |
| // Keep a refptr to the object_ so we know our lock remains valid. |
| fbl::RefPtr<VmObject> object(object_); |
| Guard<CriticalMutex> guard{object_->lock()}; |
| return DestroyLockedObject(true); |
| } |
| |
| zx_status_t VmMapping::DestroyLockedObject(bool unmap) { |
| // Take a reference to ourself, so that we do not get destructed after |
| // dropping our last reference in this method (e.g. when calling |
| // subregions_.erase below). |
| fbl::RefPtr<VmMapping> self(this); |
| |
| // If this is the last_fault_ then clear it before removing from the VMAR tree. Even if this |
| // destroy fails, it's always safe to clear last_fault_, so we preference doing it upfront for |
| // clarity. |
| if (aspace_->last_fault_ == this) { |
| aspace_->last_fault_ = nullptr; |
| } |
| |
| // The vDSO code mapping can never be unmapped, not even |
| // by VMAR destruction (except for process exit, of course). |
| // TODO(mcgrathr): Turn this into a policy-driven process-fatal case |
| // at some point. teisenbe@ wants to eventually make zx_vmar_destroy |
| // never fail. |
| if (aspace_->vdso_code_mapping_ == self) { |
| return ZX_ERR_ACCESS_DENIED; |
| } |
| |
| // Remove any priority. |
| SetMemoryPriorityDefaultLockedObject(); |
| |
| if (unmap) { |
| zx_status_t status = |
| aspace_->arch_aspace().Unmap(base_, size_ / kPageSize, aspace_->EnlargeArchUnmap()); |
| if (status != ZX_OK) { |
| return status; |
| } |
| } |
| protection_ranges_.clear(); |
| object_->RemoveMappingLocked(this); |
| |
| // Detach the region from the parent. |
| if (parent_) { |
| AssertHeld(parent_->lock_ref()); |
| AssertHeld(parent_->region_lock_ref()); |
| DEBUG_ASSERT(this->in_subregion_tree()); |
| parent_->subregions_.RemoveRegion(this); |
| } |
| |
| // detach from any object we have mapped. Note that we are holding the aspace_->lock() so we |
| // will not race with other threads calling vmo() |
| object_.reset(); |
| object_reset_ = true; |
| |
| // mark ourself as dead |
| parent_ = nullptr; |
| state_ = LifeCycleState::DEAD; |
| return ZX_OK; |
| } |
| |
| template <typename T> |
| ktl::pair<zx_status_t, uint32_t> VmMapping::PageFaultLockedObject(vaddr_t va, uint pf_flags, |
| size_t additional_pages, |
| T* object, |
| VmCowPages::DeferredOps* deferred, |
| MultiPageRequest* page_request) { |
| // Ensure the 'object' type is exactly the form we expect so that is_paged is calculated |
| // correctly. |
| static_assert(ktl::is_same_v<decltype(object), VmObjectPaged*> || |
| ktl::is_same_v<decltype(object), VmObjectPhysical*>); |
| constexpr bool is_paged = ktl::is_same_v<decltype(object), VmObjectPaged*>; |
| |
| // Fault batch size when num_pages > 1. |
| static constexpr uint64_t kBatchPages = 16; |
| static constexpr uint64_t coalescer_size = ktl::max(kPageFaultMaxOptimisticPages, kBatchPages); |
| |
| const uint64_t vmo_offset = va - base_ + object_offset_; |
| |
| [[maybe_unused]] char pf_string[5]; |
| LTRACEF("%p va %#" PRIxPTR " vmo_offset %#" PRIx64 ", pf_flags %#x (%s)\n", this, va, vmo_offset, |
| pf_flags, vmm_pf_flags_to_string(pf_flags, pf_string)); |
| |
| // Need to look up the mmu flags for this virtual address, as well as how large a region those |
| // flags are for so we can cap the extra mappings we create. |
| const MappingProtectionRanges::FlagsRange range = |
| ProtectRangesLockedObject().FlagsRangeAtAddr(base_, size_, va); |
| |
| // Build the mmu flags we need to have based on the page fault. This strategy of building the |
| // flags and then comparing all at once allows the compiler to provide much better code gen. |
| uint needed_mmu_flags = 0; |
| if (pf_flags & VMM_PF_FLAG_USER) { |
| needed_mmu_flags |= ARCH_MMU_FLAG_PERM_USER; |
| } |
| const bool write = pf_flags & VMM_PF_FLAG_WRITE; |
| if (write) { |
| needed_mmu_flags |= ARCH_MMU_FLAG_PERM_WRITE; |
| } else { |
| needed_mmu_flags |= ARCH_MMU_FLAG_PERM_READ; |
| } |
| if (pf_flags & VMM_PF_FLAG_INSTRUCTION) { |
| needed_mmu_flags |= ARCH_MMU_FLAG_PERM_EXECUTE; |
| } |
| // Check that all the needed flags are present. |
| if (unlikely((range.mmu_flags & needed_mmu_flags) != needed_mmu_flags)) { |
| if ((pf_flags & VMM_PF_FLAG_USER) && !(range.mmu_flags & ARCH_MMU_FLAG_PERM_USER)) { |
| // user page fault on non user mapped region |
| LTRACEF("permission failure: user fault on non user region\n"); |
| } |
| if ((pf_flags & VMM_PF_FLAG_WRITE) && !(range.mmu_flags & ARCH_MMU_FLAG_PERM_WRITE)) { |
| // write to a non-writeable region |
| LTRACEF("permission failure: write fault on non-writable region\n"); |
| } |
| if (!(pf_flags & VMM_PF_FLAG_WRITE) && !(range.mmu_flags & ARCH_MMU_FLAG_PERM_READ)) { |
| // read to a non-readable region |
| LTRACEF("permission failure: read fault on non-readable region\n"); |
| } |
| if ((pf_flags & VMM_PF_FLAG_INSTRUCTION) && !(range.mmu_flags & ARCH_MMU_FLAG_PERM_EXECUTE)) { |
| // instruction fetch from a no execute region |
| LTRACEF("permission failure: execute fault on no execute region\n"); |
| } |
| return {ZX_ERR_ACCESS_DENIED, 0}; |
| } |
| |
| // Calculate the number of pages from va until the end of the protection range. |
| const size_t num_protection_range_pages = (range.region_top - va) / kPageSize; |
| |
| uint64_t vmo_size = object->size_locked(); |
| if constexpr (is_paged) { |
| // If fault-beyond-stream-size is set, throw exception on memory accesses past the page |
| // containing the user defined stream size. |
| if (flags_ & VMAR_FLAG_FAULT_BEYOND_STREAM_SIZE) { |
| if (auto size = object->saturating_stream_size_locked()) { |
| vmo_size = *size; |
| } |
| } |
| } |
| |
| if (vmo_offset >= vmo_size) { |
| return {ZX_ERR_OUT_OF_RANGE, 0}; |
| } |
| |
| // Calculate the maximum number of pages we can legally look at, i.e. are valid, in the vmo |
| // taking into account the protection range, which is implicitly taking into account the mapping |
| // size. |
| const size_t num_vmo_pages = (vmo_size - vmo_offset) / kPageSize; |
| const size_t num_valid_pages = ktl::min(num_protection_range_pages, num_vmo_pages); |
| |
| // Number of requested pages, trimmed to protection range & VMO. |
| const size_t num_required_pages = ktl::min(num_valid_pages, additional_pages + 1); |
| DEBUG_ASSERT(num_required_pages > 0); |
| // Helper to calculate the remaining pt pages if we need them. |
| auto calc_pt_pages = [](uint64_t va) { |
| const uint64_t next_pt_base = ArchVmAspace::NextUserPageTableOffset(va); |
| const size_t num_pt_pages = (next_pt_base - va) / kPageSize; |
| return num_pt_pages; |
| }; |
| // Number of pages we're aiming to fault. If a range > 1 page is supplied, it is assumed the |
| // user knows the appropriate range, so opportunistic pages will not be added. |
| const size_t num_fault_pages = |
| additional_pages == 0 |
| ? ktl::min({kPageFaultMaxOptimisticPages, num_valid_pages, calc_pt_pages(va)}) |
| : num_required_pages; |
| |
| // Opportunistic pages are not considered in currently_faulting optimisation, as it is not |
| // guaranteed the mappings will be updated. |
| CurrentlyFaulting currently_faulting(this, vmo_offset, num_required_pages * kPageSize); |
| |
| __UNINITIALIZED VmMappingCoalescer<coalescer_size> coalescer( |
| this, va, range.mmu_flags, ArchVmAspace::ExistingEntryAction::Upgrade); |
| |
| if constexpr (ktl::is_same_v<decltype(object), VmObjectPaged*>) { |
| // fault in or grab existing pages. |
| const size_t cursor_size = num_fault_pages * kPageSize; |
| __UNINITIALIZED auto cursor = object->GetLookupCursorLocked(vmo_offset, cursor_size); |
| if (cursor.is_error()) { |
| return {cursor.error_value(), coalescer.TotalMapped()}; |
| } |
| // Do not consider pages touched when mapping in, if they are actually touched they will |
| // get an accessed bit set in the hardware. |
| cursor->DisableMarkAccessed(); |
| AssertHeld(cursor->lock_ref()); |
| |
| // Fault requested pages. |
| uint64_t offset = 0; |
| for (; offset < (num_required_pages * kPageSize); offset += kPageSize) { |
| uint curr_mmu_flags = range.mmu_flags; |
| |
| uint num_curr_pages = static_cast<uint>(num_required_pages - (offset / kPageSize)); |
| __UNINITIALIZED zx::result<VmCowPages::LookupCursor::RequireResult> result = |
| cursor->RequirePage(write, num_curr_pages, *deferred, page_request); |
| if (result.is_error()) { |
| coalescer.Flush(); |
| return {result.error_value(), coalescer.TotalMapped()}; |
| } |
| |
| DEBUG_ASSERT(!write || result->writable); |
| |
| // We looked up in order to write. Mark as modified. Only need to do this once. |
| if (write && offset == 0) { |
| object->mark_modified_locked(); |
| } |
| |
| // If we read faulted, and lookup didn't say that this is always writable, then we map or |
| // modify the page without any write permissions. This ensures we will fault again if a |
| // write is attempted so we can potentially replace this page with a copy or a new one, or |
| // update the page's dirty state. |
| if (!write && !result->writable) { |
| // we read faulted, so only map with read permissions |
| curr_mmu_flags &= ~ARCH_MMU_FLAG_PERM_WRITE; |
| } |
| |
| zx_status_t status = |
| coalescer.AppendOrAdjustMapping(va + offset, result->page->paddr(), curr_mmu_flags); |
| if (status != ZX_OK) { |
| // Flush any existing pages in the coalescer. |
| coalescer.Flush(); |
| return {status, coalescer.TotalMapped()}; |
| } |
| } |
| |
| // Fault opportunistic pages. If a range is supplied, it is assumed the user knows the |
| // appropriate range, so opportunistic pages will not be fault. |
| if (additional_pages == 0) { |
| DEBUG_ASSERT(num_fault_pages > 0); |
| // Check how much space the coalescer has for faulting additional pages. |
| size_t extra_pages = coalescer.ExtraPageCapacityFrom(va + kPageSize); |
| extra_pages = ktl::min(extra_pages, num_fault_pages - 1); |
| |
| // Acquire any additional pages, but only if they already exist as the user has not |
| // attempted to use these pages yet. |
| if (extra_pages > 0) { |
| bool writeable = (coalescer.GetMmuFlags() & ARCH_MMU_FLAG_PERM_WRITE); |
| size_t num_extra_pages = cursor->IfExistPages(writeable, static_cast<uint>(extra_pages), |
| coalescer.GetNextPageSlot()); |
| coalescer.IncrementCount(num_extra_pages); |
| } |
| } |
| } |
| if constexpr (!is_paged) { |
| // Already validated the size, and since physical VMOs are always allocated, and not |
| // resizable, we know we can always retrieve the maximum number of pages without failure. |
| uint64_t phys_len = num_fault_pages * kPageSize; |
| paddr_t phys_base = 0; |
| zx_status_t status = object->LookupContiguousLocked(vmo_offset, phys_len, &phys_base); |
| |
| ASSERT(status == ZX_OK); |
| |
| // Extrapolate the pages from the base address. |
| for (size_t offset = 0; offset < phys_len; offset += kPageSize) { |
| status = coalescer.Append(va + offset, phys_base + offset); |
| if (status != ZX_OK) { |
| return {status, coalescer.TotalMapped()}; |
| } |
| } |
| } |
| zx_status_t status = coalescer.Flush(); |
| if (status == ZX_OK) { |
| // Mapping has been successfully updated by us. Inform the faulting helper so that it knows |
| // not to unmap the range instead. |
| currently_faulting.MappingUpdated(); |
| } |
| return {status, coalescer.TotalMapped()}; |
| } |
| |
| ktl::pair<zx_status_t, uint32_t> VmMapping::PageFault(vaddr_t va, const uint pf_flags, |
| const size_t additional_pages, |
| VmObject* object, |
| MultiPageRequest* page_request) { |
| VM_KTRACE_DURATION(2, "VmMapping::PageFault", ("user_id", object->user_id()), |
| ("va", ktrace::Pointer{va})); |
| canary_.Assert(); |
| |
| DEBUG_ASSERT(IsPageRounded(va)); |
| |
| if (VmObjectPaged* paged = DownCastVmObject<VmObjectPaged>(object); likely(paged)) { |
| __UNINITIALIZED VmCowPages::DeferredOps deferred(paged->MakeDeferredOps()); |
| Guard<CriticalMutex> guard{AssertOrderedLock, paged->lock(), paged->lock_order()}; |
| if (object_reset_) { |
| return {ZX_ERR_UNAVAILABLE, 0}; |
| } |
| // The caller was obliged pass us the value of |object_| in as |object|, whose lock we now |
| // hold. Since we know that object_ can only hold one of two values, |object| or |nullptr| then |
| // if object_reset_ is false, i.e. |object_| is still equal to |object|, then we know that: |
| // * Our read of object_reset_ did not race, since it is written under object_->lock(), which |
| // we presently hold |
| // * object_ == object since it's not null |
| // * object_ cannot transition to null since we hold its lock. |
| assert_object_lock(); |
| return PageFaultLockedObject(va, pf_flags, additional_pages, paged, &deferred, page_request); |
| } |
| VmObjectPhysical* phys = DownCastVmObject<VmObjectPhysical>(object); |
| ASSERT(phys); |
| Guard<CriticalMutex> guard{phys->lock()}; |
| if (object_reset_) { |
| return {ZX_ERR_UNAVAILABLE, 0}; |
| } |
| // See comment in paged case for explanation. |
| assert_object_lock(); |
| return PageFaultLockedObject(va, pf_flags, additional_pages, phys, nullptr, page_request); |
| } |
| |
| ktl::pair<zx_status_t, uint32_t> VmMapping::PageFaultLocked(vaddr_t va, const uint pf_flags, |
| const size_t additional_pages, |
| MultiPageRequest* page_request) { |
| // As the aspace lock is held we can safely just use the direct raw value of object_, knowing that |
| // it cannot be destructed, and call the regular PageFault method. This is explicitly safe to call |
| // with the aspace lock held. |
| return PageFault(va, pf_flags, additional_pages, object_.get(), page_request); |
| } |
| |
| void VmMapping::ActivateLocked() { |
| DEBUG_ASSERT(state_ == LifeCycleState::NOT_READY); |
| DEBUG_ASSERT(parent_); |
| |
| state_ = LifeCycleState::ALIVE; |
| object_->AddMappingLocked(this); |
| |
| // Now that we have added a mapping to the VMO it's cache policy becomes fixed, and we can read it |
| // and augment our arch_mmu_flags. |
| uint32_t cache_policy = object_->GetMappingCachePolicyLocked(); |
| uint arch_mmu_flags = protection_ranges_.FirstRegionMmuFlags(); |
| if ((arch_mmu_flags & ARCH_MMU_FLAG_CACHE_MASK) != cache_policy) { |
| // Warn in the event that we somehow receive a VMO that has a cache |
| // policy set while also holding cache policy flags within the arch |
| // flags. The only path that should be able to achieve this is if |
| // something in the kernel maps into their aspace incorrectly. |
| if ((arch_mmu_flags & ARCH_MMU_FLAG_CACHE_MASK) != 0) { |
| TRACEF( |
| "warning: mapping has conflicting cache policies: vmo %#02x " |
| "arch_mmu_flags %#02x.\n", |
| cache_policy, arch_mmu_flags & ARCH_MMU_FLAG_CACHE_MASK); |
| // Clear the existing cache policy and use the new one. |
| arch_mmu_flags &= ~ARCH_MMU_FLAG_CACHE_MASK; |
| } |
| // If we are changing the cache policy then this can only happen if this is a new mapping region |
| // and not a new mapping occurring as a result of an unmap split. In the case of a new mapping |
| // region we know there cannot yet be any protection ranges. |
| DEBUG_ASSERT(protection_ranges_.IsSingleRegion()); |
| arch_mmu_flags |= cache_policy; |
| protection_ranges_.SetFirstRegionMmuFlags(arch_mmu_flags); |
| } |
| |
| AssertHeld(parent_->lock_ref()); |
| AssertHeld(parent_->region_lock_ref()); |
| parent_->subregions_.InsertRegion(fbl::RefPtr<VmAddressRegionOrMapping>(this)); |
| } |
| |
| void VmMapping::Activate() { |
| Guard<CriticalMutex> guard{object_->lock()}; |
| ActivateLocked(); |
| } |
| |
| fbl::RefPtr<VmMapping> VmMapping::TryMergeRightNeighborLocked(VmMapping* right_candidate) { |
| AssertHeld(right_candidate->lock_ref()); |
| AssertHeld(right_candidate->region_lock_ref()); |
| |
| // This code is tolerant of many 'miss calls' if mappings aren't mergeable or are not neighbours |
| // etc, but the caller should not be attempting to merge if these mappings are not actually from |
| // the same vmar parent. Doing so indicates something structurally wrong with the hierarchy. |
| DEBUG_ASSERT(parent_ == right_candidate->parent_); |
| |
| // Should not be able to have the same parent yet have gotten a different memory priority. |
| DEBUG_ASSERT(memory_priority_ == right_candidate->memory_priority_); |
| |
| // These tests are intended to be ordered such that we fail as fast as possible. As such testing |
| // for mergeability, which we commonly expect to succeed and not fail, is done last. |
| |
| // Need to refer to the same object. |
| if (object_.get() != right_candidate->object_.get()) { |
| return nullptr; |
| } |
| DEBUG_ASSERT(private_clone_ == right_candidate->private_clone_); |
| // Aspace and VMO ranges need to be contiguous. Validate that the right candidate is actually to |
| // the right in addition to checking that base+size lines up for single scenario where base_+size_ |
| // can overflow and becomes zero. |
| if (base_ + size_ != right_candidate->base_ || right_candidate->base_ < base_) { |
| return nullptr; |
| } |
| if (object_offset_ + size_ != right_candidate->object_offset_) { |
| return nullptr; |
| } |
| // All flags need to be consistent. |
| if (flags_ != right_candidate->flags_) { |
| return nullptr; |
| } |
| // Although we can combine the protect_region_list_rest_ of the two mappings, we require that they |
| // be of the same cacheability, as this is an assumption that mapping has a single cacheability |
| // type. Since all protection regions have the same cacheability we can check any arbitrary one in |
| // each of the mappings. Note that this check is technically redundant, since a VMO can only have |
| // one kind of cacheability and we already know this is the same VMO, but some extra paranoia here |
| // does not hurt. |
| if ((ProtectRangesLocked().FirstRegionMmuFlags() & ARCH_MMU_FLAG_CACHE_MASK) != |
| (right_candidate->ProtectRangesLocked().FirstRegionMmuFlags() & ARCH_MMU_FLAG_CACHE_MASK)) { |
| return nullptr; |
| } |
| |
| // Only merge live mappings. |
| if (state_ != LifeCycleState::ALIVE || right_candidate->state_ != LifeCycleState::ALIVE) { |
| return nullptr; |
| } |
| // Both need to be mergeable. |
| if (mergeable_ == Mergeable::NO || right_candidate->mergeable_ == Mergeable::NO) { |
| return nullptr; |
| } |
| |
| fbl::AllocChecker ac; |
| fbl::RefPtr<VmMapping> new_mapping = fbl::AdoptRef( |
| new (&ac) VmMapping(*parent_, private_clone_, base_, size_ + right_candidate->size_, flags_, |
| object_, object_offset_, MappingProtectionRanges(0), Mergeable::YES)); |
| if (!ac.check()) { |
| return nullptr; |
| } |
| AssertHeld(new_mapping->lock_ref()); |
| |
| const MemoryPriority old_priority = memory_priority_; |
| // Although it is somewhat awkward and verbose, we use a lambda here instead of just a subscope to |
| // prevent the usages of `AssertHeld` from 'leaking' beyond the actual guard scope. |
| const bool failure = |
| [&]() TA_REQ(lock()) TA_REQ(right_candidate->lock()) TA_REQ(new_mapping->lock()) { |
| // Although it was safe to read size_ without holding the object lock, we need to acquire it |
| // to perform changes. |
| Guard<CriticalMutex> guard{AliasedLock, object_->lock(), right_candidate->object_->lock()}; |
| |
| // Attempt to merge the protection region lists first. This is done first as a node |
| // allocation might be needed, which could fail. If it fails we can still abort now without |
| // needing to roll back any changes. |
| zx_status_t status = protection_ranges_.MergeRightNeighbor( |
| right_candidate->protection_ranges_, right_candidate->base_); |
| if (status != ZX_OK) { |
| ASSERT(status == ZX_ERR_NO_MEMORY); |
| return true; |
| } |
| AssertHeld(region_lock_ref()); |
| |
| AssertHeld(new_mapping->object_lock_ref()); |
| new_mapping->protection_ranges_ = ktl::move(protection_ranges_); |
| |
| status = DestroyLockedObject(false); |
| ASSERT(status == ZX_OK); |
| AssertHeld(right_candidate->region_lock_ref()); |
| status = right_candidate->DestroyLockedObject(false); |
| ASSERT(status == ZX_OK); |
| AssertHeld(new_mapping->region_lock_ref()); |
| new_mapping->ActivateLocked(); |
| return false; |
| }(); |
| if (failure) { |
| return nullptr; |
| } |
| |
| new_mapping->SetMemoryPriorityLocked(old_priority); |
| |
| vm_mappings_merged.Add(1); |
| return new_mapping; |
| } |
| |
| void VmMapping::TryMergeNeighborsLocked() { |
| canary_.Assert(); |
| |
| // Check that this mapping is mergeable and is currently in the correct lifecycle state. |
| if (mergeable_ == Mergeable::NO || state_ != LifeCycleState::ALIVE) { |
| return; |
| } |
| // As a VmMapping if we we are alive we by definition have a parent. |
| DEBUG_ASSERT(parent_); |
| |
| // We expect there to be a RefPtr to us held beyond the one for the wavl tree ensuring that we |
| // cannot trigger our own destructor should we remove ourselves from the hierarchy. |
| DEBUG_ASSERT(ref_count_debug() > 1); |
| |
| AssertHeld(parent_->lock_ref()); |
| AssertHeld(parent_->region_lock_ref()); |
| |
| // Find our two merge candidates. |
| fbl::RefPtr<VmMapping> left, right; |
| if (auto left_candidate = parent_->subregions_.LeftOf(this); left_candidate.IsValid()) { |
| left = left_candidate->as_vm_mapping(); |
| } |
| if (auto right_candidate = parent_->subregions_.RightOf(this); right_candidate.IsValid()) { |
| right = right_candidate->as_vm_mapping(); |
| } |
| |
| // Attempt to merge with each candidate. Any successful merge will produce a new mapping and |
| // invalidate this. |
| if (right) { |
| right = TryMergeRightNeighborLocked(right.get()); |
| } |
| if (left) { |
| // We either merge the left with our result of the right merge, or if that was not successful |
| // with |this|. |
| AssertHeld(left->lock_ref()); |
| AssertHeld(left->region_lock_ref()); |
| left->TryMergeRightNeighborLocked(right ? right.get() : this); |
| } |
| } |
| |
| void VmMapping::MarkMergeable(fbl::RefPtr<VmMapping> mapping) { |
| Guard<CriticalMutex> region_guard{mapping->region_lock()}; |
| Guard<CriticalMutex> guard{mapping->lock()}; |
| // Now that we have the lock check this mapping is still alive and we haven't raced with some |
| // kind of destruction. |
| if (mapping->state_ != LifeCycleState::ALIVE) { |
| return; |
| } |
| // Skip marking any vdso segments mergeable. Although there is currently only one vdso segment and |
| // so it would never actually get merged, marking it mergeable is technically incorrect. |
| if (mapping->aspace_->vdso_code_mapping_ == mapping) { |
| return; |
| } |
| mapping->mergeable_ = Mergeable::YES; |
| mapping->TryMergeNeighborsLocked(); |
| } |
| |
| template <bool SplitOnUnmap> |
| void VmMapping::SetMemoryPriorityLocked(VmAddressRegion::MemoryPriority priority) { |
| if constexpr (SplitOnUnmap) { |
| // all that's required to set our priority is to have object_ and aspace_ set up |
| DEBUG_ASSERT(state_locked() == LifeCycleState::NOT_READY && object_ && aspace_); |
| } else { |
| DEBUG_ASSERT(state_locked() == LifeCycleState::ALIVE); |
| } |
| const bool to_high = priority == VmAddressRegion::MemoryPriority::HIGH; |
| const int64_t delta = to_high ? 1 : -1; |
| if (priority == memory_priority_) { |
| return; |
| } |
| memory_priority_ = priority; |
| aspace_->ChangeHighPriorityCountLocked(delta); |
| if (VmObjectPaged* paged = DownCastVmObject<VmObjectPaged>(object_.get()); paged) { |
| PriorityChanger pc = paged->MakePriorityChanger(delta); |
| if (priority == VmAddressRegion::MemoryPriority::HIGH) { |
| pc.PrepareMayNotAlreadyBeHighPriority(); |
| } |
| Guard<CriticalMutex> guard{AliasedLock, object_->lock(), pc.lock()}; |
| pc.ChangeHighPriorityCountLocked(); |
| } |
| } |
| |
| template <bool SplitOnUnmap> |
| void VmMapping::SetMemoryPriorityDefaultLockedObject() { |
| if constexpr (SplitOnUnmap) { |
| // all that's required to set our priority is to have object_ and aspace_ set up |
| DEBUG_ASSERT(state_locked() == LifeCycleState::NOT_READY && object_ && aspace_); |
| } else { |
| DEBUG_ASSERT(state_locked() == LifeCycleState::ALIVE); |
| } |
| if (memory_priority_ == VmAddressRegion::MemoryPriority::DEFAULT) { |
| return; |
| } |
| memory_priority_ = VmAddressRegion::MemoryPriority::DEFAULT; |
| aspace_->ChangeHighPriorityCountLocked(-1); |
| if (VmObjectPaged* paged = DownCastVmObject<VmObjectPaged>(object_.get()); paged) { |
| PriorityChanger pc = paged->MakePriorityChanger(-1); |
| AssertHeld(pc.lock_ref()); // we have the object lock |
| pc.ChangeHighPriorityCountLocked(); |
| } |
| } |
| |
| template <bool SplitOnUnmap> |
| void VmMapping::SetMemoryPriorityHighAlreadyPositiveLockedObject() { |
| if constexpr (SplitOnUnmap) { |
| // all that's required to set our priority is to have object_ and aspace_ set up |
| DEBUG_ASSERT(state_locked() == LifeCycleState::NOT_READY && object_ && aspace_); |
| } else { |
| DEBUG_ASSERT(state_locked() == LifeCycleState::ALIVE); |
| } |
| if (memory_priority_ == VmAddressRegion::MemoryPriority::HIGH) { |
| return; |
| } |
| memory_priority_ = VmAddressRegion::MemoryPriority::HIGH; |
| aspace_->ChangeHighPriorityCountLocked(1); |
| if (VmObjectPaged* paged = DownCastVmObject<VmObjectPaged>(object_.get()); paged) { |
| PriorityChanger pc = paged->MakePriorityChanger(1); |
| AssertHeld(pc.lock_ref()); // we have the object lock |
| pc.PrepareIsAlreadyHighPriorityLocked(); |
| pc.ChangeHighPriorityCountLocked(); |
| } |
| } |
| |
| void VmMapping::CommitHighMemoryPriority() { |
| fbl::RefPtr<VmObject> vmo; |
| uint64_t offset; |
| uint64_t len; |
| { |
| Guard<CriticalMutex> guard{lock()}; |
| if (state_locked() != LifeCycleState::ALIVE || memory_priority_ != MemoryPriority::HIGH) { |
| return; |
| } |
| vmo = object_; |
| offset = object_offset_; |
| len = size(); |
| } |
| DEBUG_ASSERT(vmo); |
| vmo->CommitHighPriorityPages(offset, len); |
| // Ignore the return result of MapRange as this is just best effort. |
| MapRange(offset, len, false, true); |
| } |
| |
| zx::result<fbl::RefPtr<VmMapping>> VmMapping::ForceWritable() { |
| canary_.Assert(); |
| // Take a ref to ourselves in case we drop the last one when removing from our parent. |
| fbl::RefPtr<VmMapping> self(this); |
| Guard<CriticalMutex> region_guard{region_lock()}; |
| Guard<CriticalMutex> guard{lock()}; |
| if (state_locked() != LifeCycleState::ALIVE) { |
| return zx::error{ZX_ERR_BAD_STATE}; |
| } |
| DEBUG_ASSERT(object_); |
| DEBUG_ASSERT(parent_); |
| |
| // Never allow writes to the vdso. |
| if (aspace_->vdso_code_mapping_.get() == this) { |
| return zx::error(ZX_ERR_ACCESS_DENIED); |
| } |
| // If we have already re-directed to a private clone then there is no need to do so again. |
| if (private_clone_) { |
| return zx::ok(ktl::move(self)); |
| } |
| // If the mapping is already possible to write to (even if disabled by current protections), then |
| // writing is already safe. |
| if (is_valid_mapping_flags(ARCH_MMU_FLAG_PERM_WRITE)) { |
| return zx::ok(ktl::move(self)); |
| } |
| // A physical VMO cannot be cloned and so we cannot make this safe, just allow the write. |
| if (!object_->is_paged()) { |
| return zx::ok(ktl::move(self)); |
| } |
| |
| // Create a clone of our VMO that covers the size of our mapping. |
| fbl::RefPtr<VmMapping> writable; |
| { |
| fbl::RefPtr<VmObject> clone; |
| zx_status_t status = object_->CreateClone(Resizability::NonResizable, SnapshotType::OnWrite, |
| object_offset_, size_, true, &clone); |
| if (status != ZX_OK) { |
| return zx::error(status); |
| } |
| Guard<CriticalMutex> object_guard{object_lock()}; |
| fbl::AllocChecker ac; |
| // We created the clone starting at object_offset_ in the old object, so that makes the |
| // equivalent start object_offset_ be 0 in the clone. |
| writable = |
| fbl::AdoptRef(new (&ac) VmMapping(*parent_, true, base_, size_, flags_, ktl::move(clone), 0, |
| ktl::move(protection_ranges_), mergeable_)); |
| if (!ac.check()) { |
| return zx::error(ZX_ERR_NO_MEMORY); |
| } |
| } |
| // First transfer any memory priority from the current mapping to the new mapping. |
| AssertHeld(writable->lock_ref()); |
| AssertHeld(writable->region_lock_ref()); |
| // Use SplitOnUnmap=true because writable hasn't been activated yet. |
| writable->SetMemoryPriorityLocked</*SplitOnUnmap=*/true>(memory_priority_); |
| |
| // Now destroy the old mapping and then install the new one. As we hold the aspace lock |
| // continuously the temporary gap in a mapping being present cannot be observed. |
| zx_status_t status = DestroyLocked(); |
| ASSERT(status == ZX_OK); |
| writable->Activate(); |
| return zx::ok(ktl::move(writable)); |
| } |
| |
| uint64_t VmMapping::TrimmedObjectRangeLocked(uint64_t offset, uint64_t len) const TA_REQ(lock()) |
| TA_REQ(object_->lock()) { |
| const uint64_t vmo_offset = object_offset_ + offset; |
| const uint64_t vmo_size = object_->size_locked(); |
| if (vmo_offset >= vmo_size) { |
| return 0; |
| } |
| |
| uint64_t trim_len = vmo_size - vmo_offset; |
| |
| if (flags_ & VMAR_FLAG_FAULT_BEYOND_STREAM_SIZE) { |
| VmObjectPaged* paged = DownCastVmObject<VmObjectPaged>(object_.get()); |
| DEBUG_ASSERT(paged); |
| AssertHeld(paged->lock_ref()); |
| auto stream_size_res = paged->saturating_stream_size_locked(); |
| // Creating a fault-beyond-stream-size mapping should have allocated a CSM. |
| DEBUG_ASSERT(stream_size_res); |
| size_t stream_size = stream_size_res.value(); |
| DEBUG_ASSERT(stream_size <= vmo_size); |
| trim_len = stream_size - vmo_offset; |
| } |
| |
| return ktl::min(trim_len, len); |
| } |
| |
| template <typename F> |
| zx_status_t MappingProtectionRanges::UpdateProtectionRange(vaddr_t mapping_base, |
| size_t mapping_size, vaddr_t base, |
| size_t size, uint new_arch_mmu_flags, |
| F callback) { |
| // If we're changing the whole mapping, just make the change. |
| if (mapping_base == base && mapping_size == size) { |
| protect_region_list_rest_.clear(); |
| callback(base, size, first_region_arch_mmu_flags_); |
| first_region_arch_mmu_flags_ = new_arch_mmu_flags; |
| return ZX_OK; |
| } |
| |
| // Find the range of nodes that will need deleting. |
| auto first = protect_region_list_rest_.lower_bound(base); |
| auto last = protect_region_list_rest_.upper_bound(base + (size - 1)); |
| |
| // Work the flags in the regions before the first/last nodes. We need to cache these flags so that |
| // once we are inserting the new protection nodes, we do not insert nodes such that we would cause |
| // two regions to have the same flags (which would be redundant). |
| const uint start_carry_flags = FlagsForPreviousRegion(first); |
| const uint end_carry_flags = FlagsForPreviousRegion(last); |
| |
| // Determine how many new nodes we are going to need so we can allocate up front. This ensures |
| // that after we have deleted nodes from the tree (and destroyed information) we do not have to |
| // do an allocation that might fail and leave us in an unrecoverable state. However, we would |
| // like to avoid actually performing allocations as far as possible, so do the following |
| // 1. Count how many nodes will be needed to represent the new protection range (after the nodes |
| // between first,last have been deleted. As a protection range has two points, a start and an |
| // end, the most nodes we can ever possibly need is two. |
| // 2. Of these new nodes we will need, work out how many we can reuse from deletion. |
| // 3. Allocate the remainder. |
| ktl::optional<ktl::unique_ptr<ProtectNode>> protect_nodes[2]; |
| const uint total_nodes_needed = NodeAllocationsForRange(mapping_base, mapping_size, base, size, |
| first, last, new_arch_mmu_flags); |
| uint nodes_needed = total_nodes_needed; |
| // First see how many of the nodes we will be able to get by erasing and can reuse. |
| for (auto it = first; nodes_needed > 0 && it != last; it++) { |
| nodes_needed--; |
| } |
| // If there are any nodes_needed still, allocate them so that they are available. |
| uint nodes_available = 0; |
| // Allocate any remaining nodes_needed that we will not fulfill from deletions. |
| while (nodes_available < nodes_needed) { |
| fbl::AllocChecker ac; |
| ktl::unique_ptr<ProtectNode> new_node(ktl::make_unique<ProtectNode>(&ac)); |
| if (!ac.check()) { |
| return ZX_ERR_NO_MEMORY; |
| } |
| protect_nodes[nodes_available++].emplace(ktl::move(new_node)); |
| } |
| |
| // Now that we have done all memory allocations and know that we cannot fail start the destructive |
| // part and erase any nodes in the the range as well as call the provided callback with the old |
| // data. |
| { |
| vaddr_t old_start = base; |
| uint old_flags = start_carry_flags; |
| while (first != last) { |
| // On the first iteration if the range is aligned to a node then we skip, since we do not want |
| // to do the callback for a zero sized range. |
| if (old_start != first->region_start) { |
| callback(old_start, first->region_start - old_start, old_flags); |
| } |
| old_start = first->region_start; |
| old_flags = first->arch_mmu_flags; |
| auto node = protect_region_list_rest_.erase(first++); |
| if (nodes_available < total_nodes_needed) { |
| protect_nodes[nodes_available++].emplace(ktl::move(node)); |
| } |
| } |
| // If the range was not aligned to a node then process any remainder. |
| if (old_start <= base + (size - 1)) { |
| callback(old_start, base + size - old_start, old_flags); |
| } |
| } |
| |
| // At this point we should now have all the nodes. |
| DEBUG_ASSERT(total_nodes_needed == nodes_available); |
| |
| // Check if we are updating the implicit first node, which just involves changing |
| // first_region_arch_mmu_flags_, or if there's a protection change that requires a node insertion. |
| if (base == mapping_base) { |
| first_region_arch_mmu_flags_ = new_arch_mmu_flags; |
| } else if (start_carry_flags != new_arch_mmu_flags) { |
| ASSERT(nodes_available > 0); |
| auto node = ktl::move(protect_nodes[--nodes_available].value()); |
| node->region_start = base; |
| node->arch_mmu_flags = new_arch_mmu_flags; |
| protect_region_list_rest_.insert(ktl::move(node)); |
| } |
| |
| // To create the end of the region we first check if there is a gap between the end of this region |
| // and the start of the next region. Additionally this needs to handle the case where there is no |
| // next node in the tree, and so we have to check against mapping limit of mapping_base + |
| // mapping_size. |
| const uint64_t next_region_start = |
| last.IsValid() ? last->region_start : (mapping_base + mapping_size); |
| if (next_region_start != base + size) { |
| // There is a gap to the next node so we need to make sure it keeps its old protection value, |
| // end_carry_flags. However, it could have ended up that these flags are what we are protecting |
| // to, in which case a new node isn't needed as we can just effectively merge the gap into this |
| // protection range. |
| if (end_carry_flags != new_arch_mmu_flags) { |
| ASSERT(nodes_available > 0); |
| auto node = ktl::move(protect_nodes[--nodes_available].value()); |
| node->region_start = base + size; |
| node->arch_mmu_flags = end_carry_flags; |
| protect_region_list_rest_.insert(ktl::move(node)); |
| // Since we are essentially moving forward a node that we previously deleted, to essentially |
| // shrink the previous protection range, we know that there is no merging needed with the next |
| // node. |
| DEBUG_ASSERT(!last.IsValid() || last->arch_mmu_flags != end_carry_flags); |
| } |
| } else if (last.IsValid() && last->arch_mmu_flags == new_arch_mmu_flags) { |
| // From the previous `if` block we know that if last.IsValid is true, then the end of the region |
| // being protected is last->region_start. If this next region happens to have the same flags as |
| // what we just protected, then we need to drop this node. |
| protect_region_list_rest_.erase(last); |
| } |
| |
| // We should not have allocated more nodes than we needed, this indicates a bug in the calculation |
| // logic. |
| DEBUG_ASSERT(nodes_available == 0); |
| return ZX_OK; |
| } |
| |
| // Counts how many nodes would need to be allocated for a protection range. This calculation is |
| // based of whether there are actually changes in the protection type that require a node to be |
| // added. |
| uint MappingProtectionRanges::NodeAllocationsForRange(vaddr_t mapping_base, size_t mapping_size, |
| vaddr_t base, size_t size, |
| RegionList::iterator removal_start, |
| RegionList::iterator removal_end, |
| uint new_mmu_flags) const { |
| uint nodes_needed = 0; |
| // Check if we will need a node at the start. if base==base_ then we will just be changing the |
| // first_region_arch_mmu_flags_, otherwise we need a node if we're actually causing a protection |
| // change. |
| if (base != mapping_base && FlagsForPreviousRegion(removal_start) != new_mmu_flags) { |
| nodes_needed++; |
| } |
| // The node for the end of the region is needed under two conditions |
| // 1. There will be a non-zero gap between the end of our new region and the start of the next |
| // existing region. |
| // 2. This non-zero sized gap is of a different protection type. |
| const uint64_t next_region_start = |
| removal_end.IsValid() ? removal_end->region_start : (mapping_base + mapping_size); |
| if (next_region_start != base + size && FlagsForPreviousRegion(removal_end) != new_mmu_flags) { |
| nodes_needed++; |
| } |
| return nodes_needed; |
| } |
| |
| zx_status_t MappingProtectionRanges::MergeRightNeighbor(MappingProtectionRanges& right, |
| vaddr_t merge_addr) { |
| // We need to insert a node if the protection type of the end of the left mapping is not the |
| // same as the protection type of the start of the right mapping. |
| if (FlagsForPreviousRegion(protect_region_list_rest_.end()) != |
| right.first_region_arch_mmu_flags_) { |
| fbl::AllocChecker ac; |
| ktl::unique_ptr<ProtectNode> region = |
| ktl::make_unique<ProtectNode>(&ac, merge_addr, right.first_region_arch_mmu_flags_); |
| if (!ac.check()) { |
| // No state has changed yet, so even though we do not forward up an error it is safe to just |
| // not merge. |
| TRACEF("Aborted region merge due to out of memory\n"); |
| return ZX_ERR_NO_MEMORY; |
| } |
| protect_region_list_rest_.insert(ktl::move(region)); |
| } |
| // Carry over any remaining regions. |
| while (!right.protect_region_list_rest_.is_empty()) { |
| protect_region_list_rest_.insert(right.protect_region_list_rest_.pop_front()); |
| } |
| return ZX_OK; |
| } |
| |
| MappingProtectionRanges MappingProtectionRanges::SplitAt(vaddr_t split) { |
| // Determine the mmu flags the right most mapping would start at. |
| auto right_nodes = protect_region_list_rest_.upper_bound(split); |
| const uint right_mmu_flags = FlagsForPreviousRegion(right_nodes); |
| |
| MappingProtectionRanges ranges(right_mmu_flags); |
| |
| // Move any protect regions into the right half. |
| while (right_nodes != protect_region_list_rest_.end()) { |
| ranges.protect_region_list_rest_.insert(protect_region_list_rest_.erase(right_nodes++)); |
| } |
| return ranges; |
| } |
| |
| void MappingProtectionRanges::DiscardBelow(vaddr_t addr) { |
| auto last = protect_region_list_rest_.upper_bound(addr); |
| while (protect_region_list_rest_.begin() != last) { |
| first_region_arch_mmu_flags_ = protect_region_list_rest_.pop_front()->arch_mmu_flags; |
| } |
| } |
| |
| void MappingProtectionRanges::DiscardAbove(vaddr_t addr) { |
| for (auto it = protect_region_list_rest_.lower_bound(addr); |
| it != protect_region_list_rest_.end();) { |
| protect_region_list_rest_.erase(it++); |
| } |
| } |
| |
| bool MappingProtectionRanges::DebugNodesWithinRange(vaddr_t mapping_base, size_t mapping_size) { |
| if (protect_region_list_rest_.is_empty()) { |
| return true; |
| } |
| if (protect_region_list_rest_.begin()->region_start < mapping_base) { |
| return false; |
| } |
| if ((--protect_region_list_rest_.end())->region_start >= mapping_base + mapping_size) { |
| return false; |
| } |
| return true; |
| } |