| // Copyright 2016 The Fuchsia Authors |
| // |
| // Use of this source code is governed by a MIT-style |
| // license that can be found in the LICENSE file or at |
| // https://opensource.org/licenses/MIT |
| |
| #ifndef ZIRCON_KERNEL_VM_INCLUDE_VM_VM_ASPACE_H_ |
| #define ZIRCON_KERNEL_VM_INCLUDE_VM_VM_ASPACE_H_ |
| |
| #include <assert.h> |
| #include <lib/crypto/prng.h> |
| #include <zircon/types.h> |
| |
| #include <arch/aspace.h> |
| #include <fbl/canary.h> |
| #include <fbl/intrusive_double_list.h> |
| #include <fbl/intrusive_wavl_tree.h> |
| #include <fbl/macros.h> |
| #include <fbl/ref_counted.h> |
| #include <fbl/ref_ptr.h> |
| #include <kernel/lockdep.h> |
| #include <kernel/mutex.h> |
| #include <vm/arch_vm_aspace.h> |
| #include <vm/vm.h> |
| |
| class VmAddressRegion; |
| class VmEnumerator; |
| class VmMapping; |
| class VmAddressRegionOrMapping; |
| |
| namespace hypervisor { |
| class GuestPhysicalAspace; |
| } // namespace hypervisor |
| |
| class VmObject; |
| |
| class VmAspace : public fbl::DoublyLinkedListable<VmAspace*>, public fbl::RefCounted<VmAspace> { |
| public: |
| enum class Type { |
| User = 0, |
| Kernel, |
| // You probably do not want to use LOW_KERNEL. It is primarily used for SMP bootstrap or mexec |
| // to allow mappings of very low memory using the standard VMM subsystem. |
| LowKernel, |
| // Used to construct an address space representing hypervisor guest memory. |
| GuestPhysical, |
| }; |
| |
| // Create an address space of the type specified in |type| with name |name|. |
| // |
| // Although reference counted, the returned VmAspace must be explicitly destroyed via Destroy. |
| // |
| // Returns null on failure (e.g. due to resource starvation). |
| static fbl::RefPtr<VmAspace> Create(Type type, const char* name); |
| |
| // Create an address space of the type specified in |type| with name |name|. |
| // |
| // The returned aspace will start at |base| and span |size|. |
| // |
| // If |share_opt| is ShareOpt::Shared, we're creating a shared address space, and the underlying |
| // ArchVmAspace will be initialized using the `InitShared` method instead of the normal |
| // `Init` method. |
| // |
| // If |share_opt| is ShareOpt::Restricted, we're creating a restricted address space, and the |
| // underlying ArchVmAspace will be initialized using the `InitRestricted` method. |
| // |
| // Although reference counted, the returned VmAspace must be explicitly destroyed via Destroy. |
| // |
| // Returns null on failure (e.g. due to resource starvation). |
| enum class ShareOpt { |
| None, |
| Restricted, |
| Shared, |
| }; |
| static fbl::RefPtr<VmAspace> Create(vaddr_t base, size_t size, Type type, const char* name, |
| ShareOpt share_opt); |
| |
| // Create a unified address space that consists of the given constituent address spaces. |
| // |
| // The passed in address spaces must meet the following criteria: |
| // 1. They must manage non-overlapping regions. |
| // 2. The shared VmAspace must have been created with the shared argument set to true. |
| // |
| // Although reference counted, the returned VmAspace must be explicitly destroyed via Destroy. |
| // Note that it must be Destroy()'d before the shared and restricted VmAspaces; Destroy()'ing the |
| // constituent VmAspaces before Destroy()'ing this one will trigger asserts. |
| // |
| // Returns null on failure (e.g. due to resource starvation). |
| static fbl::RefPtr<VmAspace> CreateUnified(VmAspace* shared, VmAspace* restricted, |
| const char* name); |
| |
| // Destroy this address space. |
| // |
| // Destroy does not free this object, but rather allows it to be freed when the last retaining |
| // RefPtr is destroyed. |
| zx_status_t Destroy(); |
| |
| void Rename(const char* name); |
| |
| // simple accessors |
| vaddr_t base() const { return base_; } |
| size_t size() const { return size_; } |
| const char* name() const { return name_; } |
| ArchVmAspace& arch_aspace() { return arch_aspace_; } |
| bool is_user() const { return type_ == Type::User; } |
| bool is_aslr_enabled() const { return aslr_config_.enabled; } |
| |
| // Get the root VMAR (briefly acquires the aspace lock) |
| // May return nullptr if the aspace has been destroyed or is not yet initialized. |
| fbl::RefPtr<VmAddressRegion> RootVmar(); |
| |
| // Returns true if the address space has been destroyed. |
| bool is_destroyed() const; |
| |
| // accessor for singleton kernel address space |
| static VmAspace* kernel_aspace() { return kernel_aspace_; } |
| |
| // set the per thread aspace pointer to this |
| void AttachToThread(Thread* t); |
| |
| void Dump(bool verbose) const; |
| void DumpLocked(bool verbose) const TA_REQ(lock_); |
| |
| static void DropAllUserPageTables(); |
| void DropUserPageTables(); |
| |
| static void DumpAllAspaces(bool verbose); |
| |
| // Harvests all accessed information across all user mappings and updates any page age |
| // information for terminal mappings, and potentially harvests page tables depending on the |
| // passed in action. This requires holding the aspaces_list_lock_ over the entire duration and |
| // whilst not a commonly used lock this function should still only be called infrequently to |
| // avoid monopolizing the lock. |
| using NonTerminalAction = ArchVmAspace::NonTerminalAction; |
| using TerminalAction = ArchVmAspace::TerminalAction; |
| static void HarvestAllUserAccessedBits(NonTerminalAction non_terminal_action, |
| TerminalAction terminal_action); |
| |
| // Traverses the VM tree rooted at this node, in depth-first pre-order. If |
| // any methods of |ve| return false, the traversal stops and this method |
| // returns ZX_ERR_CANCELED. If the aspace is destroyed or otherwise not |
| // enumerable this returns ZX_ERR_BAD_STATE, otherwise ZX_OK is returned if |
| // traversal completes successfully. |
| zx_status_t EnumerateChildren(VmEnumerator* ve); |
| |
| // A collection of memory usage counts. |
| struct vm_usage_t { |
| // A count of bytes covered by VmMapping ranges. |
| size_t mapped_bytes; |
| |
| // For the fields below, a bytes is considered committed if a VmMapping |
| // covers a range of a VmObject that contains that byte's page, and the page |
| // has physical memory allocated to it. |
| |
| // A count of committed bytes that are only mapped into this address |
| // space. |
| size_t private_bytes; |
| |
| // A count of committed bytes that are mapped into this and at least |
| // one other address spaces. |
| size_t shared_bytes; |
| |
| // A number that estimates the fraction of shared_pages that this |
| // address space is responsible for keeping alive. |
| // |
| // An estimate of: |
| // For each shared, committed page: |
| // scaled_shared_bytes += |
| // PAGE_SIZE / (number of address spaces mapping this page) |
| // |
| // This number is strictly smaller than shared_pages * PAGE_SIZE. |
| size_t scaled_shared_bytes; |
| }; |
| |
| // Counts memory usage under the VmAspace. |
| zx_status_t GetMemoryUsage(vm_usage_t* usage); |
| |
| // Generates a soft fault against this aspace. This is similar to a PageFault except: |
| // * This aspace may not currently be active and this does not have to be called from the |
| // hardware exception handler. |
| // * May be invoked spuriously in situations where the hardware mappings would have prevented a |
| // real PageFault from occurring. |
| zx_status_t SoftFault(vaddr_t va, uint flags); |
| |
| // Generates an accessed flag fault against this aspace. This is a specialized version of |
| // SoftFault that will only resolve a potential missing access flag and nothing else. |
| zx_status_t AccessedFault(vaddr_t va); |
| |
| // Page fault routine. Should only be called by the hypervisor or by Thread::Current::Fault. |
| zx_status_t PageFault(vaddr_t va, uint flags); |
| |
| // Convenience method for traversing the tree of VMARs to find the deepest |
| // VMAR in the tree that includes *va*. |
| // Returns nullptr if the aspace has been destroyed or is not yet initialized. |
| fbl::RefPtr<VmAddressRegionOrMapping> FindRegion(vaddr_t va); |
| |
| // For region creation routines |
| static const uint VMM_FLAG_VALLOC_SPECIFIC = (1u << 0); // allocate at specific address |
| static const uint VMM_FLAG_COMMIT = (1u << 1); // commit memory up front (no demand paging) |
| |
| // legacy functions to assist in the transition to VMARs |
| // These all assume a flat VMAR structure in which all VMOs are mapped |
| // as children of the root. They will all assert if used on user aspaces |
| // TODO(teisenbe): remove uses of these in favor of new VMAR interfaces |
| zx_status_t AllocPhysical(const char* name, size_t size, void** ptr, uint8_t align_pow2, |
| paddr_t paddr, uint vmm_flags, uint arch_mmu_flags); |
| zx_status_t AllocContiguous(const char* name, size_t size, void** ptr, uint8_t align_pow2, |
| uint vmm_flags, uint arch_mmu_flags); |
| zx_status_t Alloc(const char* name, size_t size, void** ptr, uint8_t align_pow2, uint vmm_flags, |
| uint arch_mmu_flags); |
| zx_status_t FreeRegion(vaddr_t va); |
| |
| // Internal use function for mapping VMOs. Do not use. This is exposed in |
| // the public API purely for tests. |
| zx_status_t MapObjectInternal(fbl::RefPtr<VmObject> vmo, const char* name, uint64_t offset, |
| size_t size, void** ptr, uint8_t align_pow2, uint vmm_flags, |
| uint arch_mmu_flags); |
| |
| uintptr_t vdso_base_address() const; |
| uintptr_t vdso_code_address() const; |
| |
| // Helper function to test for collision with vdso_code_mapping_. |
| bool IntersectsVdsoCodeLocked(vaddr_t base, size_t size) const TA_REQ(lock_); |
| |
| // Returns whether this aspace is currently set to be a high memory priority. |
| bool IsHighMemoryPriority() const; |
| |
| protected: |
| // Share the aspace lock with VmAddressRegion/VmMapping/GuestPhysicalAspace so they can serialize |
| // changes to the aspace. |
| friend class VmAddressRegionOrMapping; |
| friend class VmAddressRegion; |
| friend class VmMapping; |
| friend class hypervisor::GuestPhysicalAspace; |
| Lock<CriticalMutex>* lock() const TA_RET_CAP(lock_) { return &lock_; } |
| Lock<CriticalMutex>& lock_ref() const TA_RET_CAP(lock_) { return lock_; } |
| |
| // Expose the PRNG for ASLR to VmAddressRegion |
| crypto::Prng& AslrPrngLocked() TA_REQ(lock_) { |
| DEBUG_ASSERT(is_aslr_enabled()); |
| return aslr_prng_; |
| } |
| |
| uint8_t AslrEntropyBits(bool compact) const { |
| return compact ? aslr_config_.compact_entropy_bits : aslr_config_.entropy_bits; |
| } |
| |
| private: |
| friend lazy_init::Access; |
| |
| // Represents the ALSR configuration for a VmAspace. This is grouped in a struct so it can be |
| // conveniently grouped together as it is const over the lifetime of a VmAspace. |
| struct AslrConfig { |
| bool enabled; |
| uint8_t entropy_bits; |
| uint8_t compact_entropy_bits; |
| // We record the PRNG seed to enable reproducible debugging. |
| uint8_t seed[crypto::Prng::kMinEntropy]; |
| }; |
| |
| // can only be constructed via factory or LazyInit |
| VmAspace(vaddr_t base, size_t size, Type type, AslrConfig aslr_config, const char* name); |
| |
| DISALLOW_COPY_ASSIGN_AND_MOVE(VmAspace); |
| |
| // private destructor that can only be used from the ref ptr |
| ~VmAspace(); |
| friend fbl::RefPtr<VmAspace>; |
| |
| // complete initialization, may fail in OOM cases |
| zx_status_t Init(ShareOpt share_opt); |
| |
| void InitializeAslr(); |
| |
| static AslrConfig CreateAslrConfig(Type type); |
| |
| // Increments or decrements the priority count of this aspace. The high priority count is used to |
| // control active page table reclamation, and applies to the whole aspace. The count is never |
| // allowed to go negative and so callers must only subtract what they have already added. Further, |
| // callers are required to remove any additions before the aspace is destroyed. |
| void ChangeHighPriorityCountLocked(int64_t delta) TA_REQ(lock()); |
| |
| // Returns whether this aspace is a guest physical address space. |
| // TODO(https://fxbug.dev/42054461): Rationalize usage of `is_user` and `is_guest_physical`. |
| bool is_guest_physical() const { return type_ == Type::GuestPhysical; } |
| |
| // Encodes the idea that we can always unmap from user aspaces. |
| ArchVmAspace::EnlargeOperation EnlargeArchUnmap() const { |
| return is_user() || is_guest_physical() ? ArchVmAspace::EnlargeOperation::Yes |
| : ArchVmAspace::EnlargeOperation::No; |
| } |
| |
| fbl::RefPtr<VmAddressRegion> RootVmarLocked() TA_REQ(lock_); |
| |
| // magic |
| fbl::Canary<fbl::magic("VMAS")> canary_; |
| |
| // members |
| const vaddr_t base_; |
| const size_t size_; |
| const Type type_; |
| char name_[ZX_MAX_NAME_LEN] TA_GUARDED(lock_); |
| bool aspace_destroyed_ TA_GUARDED(lock_) = false; |
| |
| // The high priority count is used to determine whether this aspace should perform page table |
| // reclamation, with any non-zero count completely disabling reclamation. This is an atomic so |
| // that it can be safely read outside the lock, however writes should occur inside the lock. |
| ktl::atomic<int64_t> high_priority_count_ = 0; |
| |
| mutable DECLARE_CRITICAL_MUTEX(VmAspace) lock_; |
| |
| // Keep a cache of the VmMapping of the last PageFault that occurred. On a page fault this can |
| // be checked to see if it matches more quickly than walking the full vmar tree. Mappings that |
| // are stored here must be in the ALIVE state, implying that they are in the VMAR tree. It is |
| // then the responsibility of the VmMapping to remove itself from here should it transition out |
| // of ALIVE, and remove itself from the VMAR tree. A raw pointer is stored here since the |
| // VmMapping must be alive and in tree anyway and if it were a RefPtr we would not be able to |
| // handle being the one to drop the last ref and perform destruction. |
| VmMapping* last_fault_ TA_GUARDED(lock_) = nullptr; |
| |
| // root of virtual address space |
| // Access to this reference is guarded by lock_. |
| fbl::RefPtr<VmAddressRegion> root_vmar_ TA_GUARDED(lock_); |
| |
| // PRNG used by VMARs for address choices. The PRNG is thread safe and does not need to be guarded |
| // by the lock. |
| crypto::Prng aslr_prng_; |
| const AslrConfig aslr_config_; |
| |
| // architecturally specific part of the aspace. This is internally locked and does not need to be |
| // guarded by lock_. |
| ArchVmAspace arch_aspace_; |
| |
| fbl::RefPtr<VmMapping> vdso_code_mapping_ TA_GUARDED(lock_); |
| |
| // The number of page table reclamations attempted since last active. This is used since we need |
| // to perform pt reclamation twice in a row (once to clear accessed bits, another time to |
| // reclaim page tables) before the aspace is at a fixed point and we can actually stop |
| // performing the harvests. |
| uint32_t pt_harvest_since_active_ TA_GUARDED(AspaceListLock::Get()) = 0; |
| |
| DECLARE_SINGLETON_MUTEX(AspaceListLock); |
| static fbl::DoublyLinkedList<VmAspace*> aspaces_list_ TA_GUARDED(AspaceListLock::Get()); |
| |
| // initialization routines need to construct the singleton kernel address space |
| // at a particular points in the bootup process |
| static void KernelAspaceInitPreHeap(); |
| static VmAspace* kernel_aspace_; |
| friend void vm_init_preheap(); |
| }; |
| |
| #endif // ZIRCON_KERNEL_VM_INCLUDE_VM_VM_ASPACE_H_ |