| // Copyright 2016 The Fuchsia Authors |
| // |
| // Use of this source code is governed by a MIT-style |
| // license that can be found in the LICENSE file or at |
| // https://opensource.org/licenses/MIT |
| |
| #include <assert.h> |
| #include <err.h> |
| #include <string.h> |
| #include <trace.h> |
| |
| #include <arch/arch_ops.h> |
| #include <arch/mmu.h> |
| #include <arch/x86.h> |
| #include <arch/x86/descriptor.h> |
| #include <arch/x86/feature.h> |
| #include <arch/x86/mmu.h> |
| #include <arch/x86/mmu_mem_types.h> |
| #include <kernel/mp.h> |
| #include <kernel/vm.h> |
| #include <vm/arch_vm_aspace.h> |
| #include <vm/pmm.h> |
| #include <fbl/auto_lock.h> |
| |
| #define LOCAL_TRACE 0 |
| |
| /* Default address width including virtual/physical address. |
| * newer versions fetched below */ |
| uint8_t g_vaddr_width = 48; |
| uint8_t g_paddr_width = 32; |
| |
| /* True if the system supports 1GB pages */ |
| static bool supports_huge_pages = false; |
| |
| /* top level kernel page tables, initialized in start.S */ |
| volatile pt_entry_t pml4[NO_OF_PT_ENTRIES] __ALIGNED(PAGE_SIZE); |
| volatile pt_entry_t pdp[NO_OF_PT_ENTRIES] __ALIGNED(PAGE_SIZE); /* temporary */ |
| volatile pt_entry_t pte[NO_OF_PT_ENTRIES] __ALIGNED(PAGE_SIZE); |
| |
| /* top level pdp needed to map the -512GB..0 space */ |
| volatile pt_entry_t pdp_high[NO_OF_PT_ENTRIES] __ALIGNED(PAGE_SIZE); |
| |
| /* a big pile of page tables needed to map 64GB of memory into kernel space using 2MB pages */ |
| volatile pt_entry_t linear_map_pdp[(64ULL * GB) / (2 * MB)] __ALIGNED(PAGE_SIZE); |
| |
| /* which of the above variables is the top level page table */ |
| #define KERNEL_PT pml4 |
| |
| /* kernel base top level page table in physical space */ |
| static const paddr_t kernel_pt_phys = (vaddr_t)KERNEL_PT - KERNEL_BASE; |
| |
| /* valid EPT MMU flags */ |
| static const uint kValidEptFlags = |
| ARCH_MMU_FLAG_PERM_READ | ARCH_MMU_FLAG_PERM_WRITE | ARCH_MMU_FLAG_PERM_EXECUTE; |
| |
| paddr_t x86_kernel_cr3(void) { |
| return kernel_pt_phys; |
| } |
| |
| /** |
| * @brief check if the virtual address is canonical |
| */ |
| bool x86_is_vaddr_canonical(vaddr_t vaddr) { |
| uint64_t max_vaddr_lohalf, min_vaddr_hihalf; |
| |
| /* get max address in lower-half canonical addr space */ |
| /* e.g. if width is 48, then 0x00007FFF_FFFFFFFF */ |
| max_vaddr_lohalf = ((uint64_t)1ull << (g_vaddr_width - 1)) - 1; |
| |
| /* get min address in higher-half canonical addr space */ |
| /* e.g. if width is 48, then 0xFFFF8000_00000000*/ |
| min_vaddr_hihalf = ~max_vaddr_lohalf; |
| |
| /* Check to see if the address in a canonical address */ |
| if ((vaddr > max_vaddr_lohalf) && (vaddr < min_vaddr_hihalf)) |
| return false; |
| |
| return true; |
| } |
| |
| /** |
| * @brief check if the virtual address is aligned and canonical |
| */ |
| static bool x86_mmu_check_vaddr(vaddr_t vaddr) { |
| /* Check to see if the address is PAGE aligned */ |
| if (!IS_ALIGNED(vaddr, PAGE_SIZE)) |
| return false; |
| |
| return x86_is_vaddr_canonical(vaddr); |
| } |
| |
| /** |
| * @brief check if the physical address is valid and aligned |
| */ |
| static bool x86_mmu_check_paddr(paddr_t paddr) { |
| uint64_t max_paddr; |
| |
| /* Check to see if the address is PAGE aligned */ |
| if (!IS_ALIGNED(paddr, PAGE_SIZE)) |
| return false; |
| |
| max_paddr = ((uint64_t)1ull << g_paddr_width) - 1; |
| |
| return paddr <= max_paddr; |
| } |
| |
| /** |
| * @brief invalidate all TLB entries, including global entries |
| */ |
| static void x86_tlb_global_invalidate() { |
| /* See Intel 3A section 4.10.4.1 */ |
| ulong cr4 = x86_get_cr4(); |
| if (likely(cr4 & X86_CR4_PGE)) { |
| x86_set_cr4(cr4 & ~X86_CR4_PGE); |
| x86_set_cr4(cr4); |
| } else { |
| x86_set_cr3(x86_get_cr3()); |
| } |
| } |
| |
| /* Task used for invalidating a TLB entry on each CPU */ |
| struct tlb_invalidate_page_context { |
| ulong target_cr3; |
| vaddr_t vaddr; |
| enum page_table_levels level; |
| bool global_page; |
| }; |
| static void tlb_invalidate_page_task(void* raw_context) { |
| DEBUG_ASSERT(arch_ints_disabled()); |
| tlb_invalidate_page_context* context = (tlb_invalidate_page_context*)raw_context; |
| |
| ulong cr3 = x86_get_cr3(); |
| if (context->target_cr3 != cr3 && !context->global_page) { |
| /* This invalidation doesn't apply to this CPU, ignore it */ |
| return; |
| } |
| |
| switch (context->level) { |
| case PML4_L: |
| x86_tlb_global_invalidate(); |
| break; |
| case PDP_L: |
| case PD_L: |
| case PT_L: |
| __asm__ volatile("invlpg %0" ::"m"(*(uint8_t*)context->vaddr)); |
| break; |
| } |
| } |
| |
| /** |
| * @brief Invalidate a single page at a given page table level |
| * |
| * @param aspace The aspace we're invalidating for (if NULL, assume for current one) |
| * @param vaddr The virtual address we are invalidating the TLB entry for |
| * @param level The page table level that maps this vaddr |
| * @param global_page True if we are invalidating a global mapping |
| * |
| * TODO(MG-979): Optimize this. This is horribly inefficient. |
| * We should also change this to pool invalidations from a single |
| * "transaction" and then only execute a single mp_sync_exec for that |
| * transaction, rather than one per page. |
| */ |
| static void x86_tlb_invalidate_page(X86ArchVmAspace* aspace, vaddr_t vaddr, |
| enum page_table_levels level, bool global_page) { |
| ulong cr3 = aspace ? aspace->pt_phys() : x86_get_cr3(); |
| struct tlb_invalidate_page_context task_context = { |
| .target_cr3 = cr3, .vaddr = vaddr, .level = level, .global_page = global_page, |
| }; |
| |
| /* Target only CPUs this aspace is active on. It may be the case that some |
| * other CPU will become active in it after this load, or will have left it |
| * just before this load. In the former case, it is becoming active after |
| * the write to the page table, so it will see the change. In the latter |
| * case, it will get a spurious request to flush. */ |
| mp_ipi_target_t target; |
| mp_cpu_mask_t target_mask = 0; |
| if (global_page || aspace == nullptr) { |
| target = MP_IPI_TARGET_ALL; |
| } else { |
| target = MP_IPI_TARGET_MASK; |
| target_mask = aspace->active_cpus(); |
| } |
| |
| mp_sync_exec(target, target_mask, tlb_invalidate_page_task, &task_context); |
| } |
| |
| template <int Level> |
| struct PageTableBase { |
| static constexpr page_table_levels level = static_cast<page_table_levels>(Level); |
| static_assert(Level >= 0, "Level too low"); |
| static_assert(Level < X86_PAGING_LEVELS, "level too high"); |
| |
| /** |
| * @brief Return the page size for this level |
| */ |
| static size_t page_size() { |
| switch (Level) { |
| case PT_L: |
| return 1ULL << PT_SHIFT; |
| case PD_L: |
| return 1ULL << PD_SHIFT; |
| case PDP_L: |
| return 1ULL << PDP_SHIFT; |
| case PML4_L: |
| return 1ULL << PML4_SHIFT; |
| default: |
| panic("page_size: invalid level\n"); |
| } |
| } |
| |
| /** |
| * @brief Whether the processor supports the page size of this level |
| */ |
| static bool supports_page_size() { |
| DEBUG_ASSERT(Level != PT_L); |
| switch (Level) { |
| case PD_L: |
| return true; |
| case PDP_L: |
| return supports_huge_pages; |
| case PML4_L: |
| return false; |
| default: |
| panic("Unreachable case in supports_page_size\n"); |
| } |
| } |
| |
| /** |
| * @brief Whether an address is aligned to the page size of this level |
| */ |
| static bool page_aligned(vaddr_t vaddr) { |
| return (vaddr & (page_size() - 1)) == 0; |
| } |
| |
| static uint vaddr_to_index(vaddr_t vaddr) { |
| switch (Level) { |
| case PML4_L: |
| return VADDR_TO_PML4_INDEX(vaddr); |
| case PDP_L: |
| return VADDR_TO_PDP_INDEX(vaddr); |
| case PD_L: |
| return VADDR_TO_PD_INDEX(vaddr); |
| case PT_L: |
| return VADDR_TO_PT_INDEX(vaddr); |
| default: |
| panic("vaddr_to_index: invalid level\n"); |
| } |
| } |
| |
| /* |
| * @brief Convert a PTE to a physical address |
| */ |
| static paddr_t paddr_from_pte(pt_entry_t pte) { |
| DEBUG_ASSERT(IS_PAGE_PRESENT(pte)); |
| |
| paddr_t pa; |
| switch (Level) { |
| case PDP_L: |
| pa = (pte & X86_HUGE_PAGE_FRAME); |
| break; |
| case PD_L: |
| pa = (pte & X86_LARGE_PAGE_FRAME); |
| break; |
| case PT_L: |
| pa = (pte & X86_PG_FRAME); |
| break; |
| default: |
| panic("paddr_from_pte at unhandled level %d\n", Level); |
| } |
| |
| LTRACEF_LEVEL(2, "pte 0x%" PRIxPTE " , level %d, paddr %#" PRIxPTR "\n", pte, Level, pa); |
| |
| return pa; |
| } |
| }; |
| |
| template <int Level> |
| struct PageTable : PageTableBase<Level> { |
| using Base = PageTableBase<Level>; |
| using LowerTable = PageTable<Level - 1>; |
| using TopTable = PageTable<MAX_PAGING_LEVEL>; |
| |
| /** |
| * @brief Return x86 arch flags for intermediate tables |
| */ |
| static arch_flags_t intermediate_arch_flags() { |
| return X86_MMU_PG_RW | X86_MMU_PG_U; |
| } |
| |
| /** |
| * @brief Return x86 arch flags from generic MMU flags |
| * |
| * These are used for page mapping entries in the table. |
| */ |
| static arch_flags_t arch_flags(uint aspace_flags, uint flags) { |
| arch_flags_t arch_flags = 0; |
| |
| if (flags & ARCH_MMU_FLAG_PERM_WRITE) |
| arch_flags |= X86_MMU_PG_RW; |
| |
| if (flags & ARCH_MMU_FLAG_PERM_USER) |
| arch_flags |= X86_MMU_PG_U; |
| |
| if (aspace_flags & ARCH_ASPACE_FLAG_KERNEL) { |
| /* setting global flag for kernel pages */ |
| arch_flags |= X86_MMU_PG_G; |
| } |
| |
| if (!(flags & ARCH_MMU_FLAG_PERM_EXECUTE)) |
| arch_flags |= X86_MMU_PG_NX; |
| |
| if (Level > 0) { |
| switch (flags & ARCH_MMU_FLAG_CACHE_MASK) { |
| case ARCH_MMU_FLAG_CACHED: |
| arch_flags |= X86_MMU_LARGE_PAT_WRITEBACK; |
| break; |
| case ARCH_MMU_FLAG_UNCACHED_DEVICE: |
| case ARCH_MMU_FLAG_UNCACHED: |
| arch_flags |= X86_MMU_LARGE_PAT_UNCACHABLE; |
| break; |
| case ARCH_MMU_FLAG_WRITE_COMBINING: |
| arch_flags |= X86_MMU_LARGE_PAT_WRITE_COMBINING; |
| break; |
| default: |
| PANIC_UNIMPLEMENTED; |
| } |
| } else { |
| switch (flags & ARCH_MMU_FLAG_CACHE_MASK) { |
| case ARCH_MMU_FLAG_CACHED: |
| arch_flags |= X86_MMU_PTE_PAT_WRITEBACK; |
| break; |
| case ARCH_MMU_FLAG_UNCACHED_DEVICE: |
| case ARCH_MMU_FLAG_UNCACHED: |
| arch_flags |= X86_MMU_PTE_PAT_UNCACHABLE; |
| break; |
| case ARCH_MMU_FLAG_WRITE_COMBINING: |
| arch_flags |= X86_MMU_PTE_PAT_WRITE_COMBINING; |
| break; |
| default: |
| PANIC_UNIMPLEMENTED; |
| } |
| } |
| |
| return arch_flags; |
| } |
| |
| /** |
| * @brief Return the x86 arch flags to split a large page into smaller pages |
| */ |
| static arch_flags_t split_arch_flags(arch_flags_t arch_flags) { |
| static_assert(Level != PT_L, "tried to split PT_L"); |
| // This can't easily be a static assert without duplicating |
| // a bunch of code in the callers |
| DEBUG_ASSERT(Level != PML4_L); |
| DEBUG_ASSERT(arch_flags & X86_MMU_PG_PS); |
| if (Level == PD_L) { |
| // Note: Clear PS before the check below; the PAT bit for a PTE is the |
| // the same as the PS bit for a higher table entry. |
| arch_flags &= ~X86_MMU_PG_PS; |
| |
| /* If the larger page had the PAT flag set, make sure it's |
| * transferred to the different index for a PTE */ |
| if (arch_flags & X86_MMU_PG_LARGE_PAT) { |
| arch_flags &= ~X86_MMU_PG_LARGE_PAT; |
| arch_flags |= X86_MMU_PG_PTE_PAT; |
| } |
| } |
| return arch_flags; |
| } |
| |
| /** |
| * @brief Invalidate a single page at a given page table level |
| */ |
| static void tlb_invalidate_page(X86ArchVmAspace* aspace, vaddr_t vaddr, bool global_page) { |
| x86_tlb_invalidate_page(aspace, vaddr, Base::level, global_page); |
| } |
| }; |
| |
| template <int Level> |
| struct ExtendedPageTable : PageTableBase<Level> { |
| using LowerTable = ExtendedPageTable<Level - 1>; |
| using TopTable = ExtendedPageTable<MAX_PAGING_LEVEL>; |
| |
| /** |
| * @brief Return EPT arch flags for intermediate tables |
| */ |
| static arch_flags_t intermediate_arch_flags() { |
| return X86_EPT_R | X86_EPT_W | X86_EPT_X; |
| } |
| |
| /** |
| * @brief Return EPT arch flags from generic MMU flags |
| * |
| * These are used for page mapping entries in the table. |
| */ |
| static arch_flags_t arch_flags(uint aspace_flags, uint flags) { |
| // Only the write-back memory type is supported. |
| arch_flags_t arch_flags = X86_EPT_WB; |
| |
| if (flags & ARCH_MMU_FLAG_PERM_READ) |
| arch_flags |= X86_EPT_R; |
| |
| if (flags & ARCH_MMU_FLAG_PERM_WRITE) |
| arch_flags |= X86_EPT_W; |
| |
| if (flags & ARCH_MMU_FLAG_PERM_EXECUTE) |
| arch_flags |= X86_EPT_X; |
| |
| return arch_flags; |
| } |
| |
| /** |
| * @brief Return the EPT arch flags to split a large page into smaller pages |
| */ |
| static arch_flags_t split_arch_flags(arch_flags_t arch_flags) { |
| static_assert(Level != PT_L, "tried to split PT_L"); |
| DEBUG_ASSERT(Level != PML4_L); |
| // We don't need to relocate any flags on split for EPT. |
| return arch_flags; |
| } |
| |
| /** |
| * @brief Invalidate a single page at a given page table level |
| */ |
| static void tlb_invalidate_page(X86ArchVmAspace* aspace, vaddr_t vaddr, bool global_page) { |
| // TODO(MG-981): Implement this. |
| } |
| }; |
| |
| /** |
| * @brief Return generic MMU flags from x86 arch flags |
| */ |
| static uint x86_mmu_flags(arch_flags_t flags, enum page_table_levels level) { |
| uint mmu_flags = ARCH_MMU_FLAG_PERM_READ; |
| |
| if (flags & X86_MMU_PG_RW) |
| mmu_flags |= ARCH_MMU_FLAG_PERM_WRITE; |
| |
| if (flags & X86_MMU_PG_U) |
| mmu_flags |= ARCH_MMU_FLAG_PERM_USER; |
| |
| if (!(flags & X86_MMU_PG_NX)) |
| mmu_flags |= ARCH_MMU_FLAG_PERM_EXECUTE; |
| |
| if (level > 0) { |
| switch (flags & X86_MMU_LARGE_PAT_MASK) { |
| case X86_MMU_LARGE_PAT_WRITEBACK: |
| mmu_flags |= ARCH_MMU_FLAG_CACHED; |
| break; |
| case X86_MMU_LARGE_PAT_UNCACHABLE: |
| mmu_flags |= ARCH_MMU_FLAG_UNCACHED; |
| break; |
| case X86_MMU_LARGE_PAT_WRITE_COMBINING: |
| mmu_flags |= ARCH_MMU_FLAG_WRITE_COMBINING; |
| break; |
| default: |
| PANIC_UNIMPLEMENTED; |
| } |
| } else { |
| switch (flags & X86_MMU_PTE_PAT_MASK) { |
| case X86_MMU_PTE_PAT_WRITEBACK: |
| mmu_flags |= ARCH_MMU_FLAG_CACHED; |
| break; |
| case X86_MMU_PTE_PAT_UNCACHABLE: |
| mmu_flags |= ARCH_MMU_FLAG_UNCACHED; |
| break; |
| case X86_MMU_PTE_PAT_WRITE_COMBINING: |
| mmu_flags |= ARCH_MMU_FLAG_WRITE_COMBINING; |
| break; |
| default: |
| PANIC_UNIMPLEMENTED; |
| } |
| } |
| return mmu_flags; |
| } |
| |
| /** |
| * @brief Return generic MMU flags from EPT arch flags |
| */ |
| static uint ept_mmu_flags(arch_flags_t flags, enum page_table_levels level) { |
| // Only the write-back memory type is supported. |
| uint mmu_flags = ARCH_MMU_FLAG_CACHED; |
| |
| if (flags & X86_EPT_R) |
| mmu_flags |= ARCH_MMU_FLAG_PERM_READ; |
| |
| if (flags & X86_EPT_W) |
| mmu_flags |= ARCH_MMU_FLAG_PERM_WRITE; |
| |
| if (flags & X86_EPT_X) |
| mmu_flags |= ARCH_MMU_FLAG_PERM_EXECUTE; |
| |
| return mmu_flags; |
| } |
| |
| struct MappingCursor { |
| public: |
| /** |
| * @brief Update the cursor to skip over a not-present page table entry. |
| */ |
| template <typename PageTable> |
| void SkipEntry() { |
| const size_t ps = PageTable::page_size(); |
| // Calculate the amount the cursor should skip to get to the next entry at |
| // this page table level. |
| const size_t skipped_size = ps - (vaddr & (ps - 1)); |
| // If our endpoint was in the middle of this range, clamp the |
| // amount we remove from the cursor |
| const size_t _size = (size > skipped_size) ? skipped_size : size; |
| |
| size -= _size; |
| vaddr += _size; |
| } |
| |
| paddr_t paddr; |
| vaddr_t vaddr; |
| size_t size; |
| }; |
| |
| template <typename PageTable> |
| void X86ArchVmAspace::UpdateEntry(vaddr_t vaddr, volatile pt_entry_t* pte, paddr_t paddr, |
| arch_flags_t flags) { |
| DEBUG_ASSERT(pte); |
| DEBUG_ASSERT(IS_PAGE_ALIGNED(paddr)); |
| |
| pt_entry_t olde = *pte; |
| |
| /* set the new entry */ |
| *pte = paddr | flags | X86_MMU_PG_P; |
| |
| /* attempt to invalidate the page */ |
| if (IS_PAGE_PRESENT(olde)) { |
| PageTable::tlb_invalidate_page(this, vaddr, is_kernel_address(vaddr)); |
| } |
| } |
| |
| template <typename PageTable> |
| void X86ArchVmAspace::UnmapEntry(X86ArchVmAspace* aspace, vaddr_t vaddr, volatile pt_entry_t* pte) { |
| DEBUG_ASSERT(pte); |
| |
| pt_entry_t olde = *pte; |
| |
| *pte = 0; |
| |
| /* attempt to invalidate the page */ |
| if (IS_PAGE_PRESENT(olde)) { |
| PageTable::tlb_invalidate_page(aspace, vaddr, is_kernel_address(vaddr)); |
| } |
| } |
| |
| /** |
| * @brief Allocating a new page table |
| */ |
| static volatile pt_entry_t* _map_alloc_page(void) { |
| vm_page_t* p; |
| |
| pt_entry_t* page_ptr = static_cast<pt_entry_t*>(pmm_alloc_kpage(nullptr, &p)); |
| if (!page_ptr) |
| return nullptr; |
| |
| arch_zero_page(page_ptr); |
| p->state = VM_PAGE_STATE_MMU; |
| |
| return page_ptr; |
| } |
| |
| /* |
| * @brief Split the given large page into smaller pages |
| */ |
| template <typename PageTable> |
| status_t X86ArchVmAspace::SplitLargePage(vaddr_t vaddr, volatile pt_entry_t* pte) { |
| static_assert(PageTable::level != PT_L, "tried splitting PT_L"); |
| LTRACEF_LEVEL(2, "splitting table %p at level %d\n", pte, PageTable::level); |
| |
| DEBUG_ASSERT(IS_PAGE_PRESENT(*pte) && IS_LARGE_PAGE(*pte)); |
| volatile pt_entry_t* m = _map_alloc_page(); |
| if (m == nullptr) { |
| return ZX_ERR_NO_MEMORY; |
| } |
| |
| paddr_t paddr_base = PageTable::paddr_from_pte(*pte); |
| arch_flags_t flags = PageTable::split_arch_flags(*pte & X86_LARGE_FLAGS_MASK); |
| |
| DEBUG_ASSERT(PageTable::page_aligned(vaddr)); |
| vaddr_t new_vaddr = vaddr; |
| paddr_t new_paddr = paddr_base; |
| size_t ps = PageTable::LowerTable::page_size(); |
| for (int i = 0; i < NO_OF_PT_ENTRIES; i++) { |
| volatile pt_entry_t* e = m + i; |
| // If this is a PDP_L (i.e. huge page), flags will include the |
| // PS bit still, so the new PD entries will be large pages. |
| UpdateEntry<typename PageTable::LowerTable>(new_vaddr, e, new_paddr, flags); |
| new_vaddr += ps; |
| new_paddr += ps; |
| } |
| DEBUG_ASSERT(new_vaddr == vaddr + PageTable::page_size()); |
| |
| flags = PageTable::intermediate_arch_flags(); |
| UpdateEntry<PageTable>(vaddr, pte, X86_VIRT_TO_PHYS(m), flags); |
| pt_pages_++; |
| return ZX_OK; |
| } |
| |
| /* |
| * @brief given a page table entry, return a pointer to the next page table one level down |
| */ |
| static inline volatile pt_entry_t* get_next_table_from_entry(pt_entry_t entry) { |
| if (!IS_PAGE_PRESENT(entry) || IS_LARGE_PAGE(entry)) |
| return nullptr; |
| |
| return reinterpret_cast<volatile pt_entry_t*>(X86_PHYS_TO_VIRT(entry & X86_PG_FRAME)); |
| } |
| |
| /** |
| * @brief Walk the page table structures returning the entry and level that maps the address. |
| * |
| * @param table The top-level paging structure's virtual address |
| * @param vaddr The virtual address to retrieve the mapping for |
| * @param ret_level The level of the table that defines the found mapping |
| * @param mapping The mapping that was found |
| * |
| * @return ZX_OK if mapping is found |
| * @return ZX_ERR_NOT_FOUND if mapping is not found |
| */ |
| template <typename PageTable> |
| status_t X86ArchVmAspace::GetMapping(volatile pt_entry_t* table, vaddr_t vaddr, |
| enum page_table_levels* ret_level, |
| volatile pt_entry_t** mapping) { |
| DEBUG_ASSERT(table); |
| DEBUG_ASSERT(ret_level); |
| DEBUG_ASSERT(mapping); |
| |
| LTRACEF_LEVEL(2, "table %p\n", table); |
| |
| uint index = PageTable::vaddr_to_index(vaddr); |
| volatile pt_entry_t* e = table + index; |
| pt_entry_t pt_val = *e; |
| if (!IS_PAGE_PRESENT(pt_val)) |
| return ZX_ERR_NOT_FOUND; |
| |
| /* if this is a large page, stop here */ |
| if (IS_LARGE_PAGE(pt_val)) { |
| *mapping = e; |
| *ret_level = PageTable::level; |
| return ZX_OK; |
| } |
| |
| volatile pt_entry_t* next_table = get_next_table_from_entry(pt_val); |
| return GetMapping<typename PageTable::LowerTable>(next_table, vaddr, |
| ret_level, mapping); |
| } |
| |
| template <> |
| status_t X86ArchVmAspace::GetMapping<PageTable<PT_L>>( |
| volatile pt_entry_t* table, vaddr_t vaddr, |
| enum page_table_levels* ret_level, |
| volatile pt_entry_t** mapping) { |
| return GetMappingL0<PageTable<PT_L>>(table, vaddr, ret_level, mapping); |
| } |
| |
| template <> |
| status_t X86ArchVmAspace::GetMapping<ExtendedPageTable<PT_L>>( |
| volatile pt_entry_t* table, vaddr_t vaddr, |
| enum page_table_levels* ret_level, |
| volatile pt_entry_t** mapping) { |
| return GetMappingL0<ExtendedPageTable<PT_L>>(table, vaddr, ret_level, mapping); |
| } |
| |
| template <typename PageTable> |
| status_t X86ArchVmAspace::GetMappingL0(volatile pt_entry_t* table, vaddr_t vaddr, |
| enum page_table_levels* ret_level, |
| volatile pt_entry_t** mapping) { |
| static_assert(PageTable::level == PT_L, "GetMappingL0 used with wrong level"); |
| |
| /* do the final page table lookup */ |
| uint index = PageTable::vaddr_to_index(vaddr); |
| volatile pt_entry_t* e = table + index; |
| if (!IS_PAGE_PRESENT(*e)) |
| return ZX_ERR_NOT_FOUND; |
| |
| *mapping = e; |
| *ret_level = PageTable::level; |
| return ZX_OK; |
| } |
| |
| /** |
| * @brief Unmaps the range specified by start_cursor. |
| * |
| * Level must be MAX_PAGING_LEVEL when invoked. |
| * |
| * @param table The top-level paging structure's virtual address. |
| * @param start_cursor A cursor describing the range of address space to |
| * unmap within table |
| * @param new_cursor A returned cursor describing how much work was not |
| * completed. Must be non-null. |
| * |
| * @return true if at least one page was unmapped at this level |
| */ |
| template <typename PageTable> |
| bool X86ArchVmAspace::RemoveMapping(volatile pt_entry_t* table, |
| const MappingCursor& start_cursor, |
| MappingCursor* new_cursor) { |
| DEBUG_ASSERT(table); |
| LTRACEF("L: %d, %016" PRIxPTR " %016zx\n", PageTable::level, start_cursor.vaddr, |
| start_cursor.size); |
| DEBUG_ASSERT(x86_mmu_check_vaddr(start_cursor.vaddr)); |
| |
| *new_cursor = start_cursor; |
| |
| bool unmapped = false; |
| size_t ps = PageTable::page_size(); |
| uint index = PageTable::vaddr_to_index(new_cursor->vaddr); |
| for (; index != NO_OF_PT_ENTRIES && new_cursor->size != 0; ++index) { |
| volatile pt_entry_t* e = table + index; |
| pt_entry_t pt_val = *e; |
| // If the page isn't even mapped, just skip it |
| if (!IS_PAGE_PRESENT(pt_val)) { |
| new_cursor->SkipEntry<PageTable>(); |
| DEBUG_ASSERT(new_cursor->size <= start_cursor.size); |
| continue; |
| } |
| |
| if (IS_LARGE_PAGE(pt_val)) { |
| bool vaddr_level_aligned = PageTable::page_aligned(new_cursor->vaddr); |
| // If the request covers the entire large page, just unmap it |
| if (vaddr_level_aligned && new_cursor->size >= ps) { |
| UnmapEntry<PageTable>(this, new_cursor->vaddr, e); |
| unmapped = true; |
| |
| new_cursor->vaddr += ps; |
| new_cursor->size -= ps; |
| DEBUG_ASSERT(new_cursor->size <= start_cursor.size); |
| continue; |
| } |
| // Otherwise, we need to split it |
| vaddr_t page_vaddr = new_cursor->vaddr & ~(ps - 1); |
| status_t status = SplitLargePage<PageTable>(page_vaddr, e); |
| if (status != ZX_OK) { |
| // If split fails, just unmap the whole thing, and let a |
| // subsequent page fault clean it up. |
| UnmapEntry<PageTable>(this, new_cursor->vaddr, e); |
| unmapped = true; |
| |
| new_cursor->SkipEntry<PageTable>(); |
| DEBUG_ASSERT(new_cursor->size <= start_cursor.size); |
| } |
| pt_val = *e; |
| } |
| |
| MappingCursor cursor; |
| volatile pt_entry_t* next_table = get_next_table_from_entry(pt_val); |
| bool lower_unmapped = RemoveMapping<typename PageTable::LowerTable>( |
| next_table, *new_cursor, &cursor); |
| |
| // If we were requesting to unmap everything in the lower page table, |
| // we know we can unmap the lower level page table. Otherwise, if |
| // we unmapped anything in the lower level, check to see if that |
| // level is now empty. |
| bool unmap_page_table = |
| PageTable::page_aligned(new_cursor->vaddr) && new_cursor->size >= ps; |
| if (!unmap_page_table && lower_unmapped) { |
| uint lower_idx; |
| for (lower_idx = 0; lower_idx < NO_OF_PT_ENTRIES; ++lower_idx) { |
| if (IS_PAGE_PRESENT(next_table[lower_idx])) { |
| break; |
| } |
| } |
| if (lower_idx == NO_OF_PT_ENTRIES) { |
| unmap_page_table = true; |
| } |
| } |
| if (unmap_page_table) { |
| paddr_t ptable_phys = X86_VIRT_TO_PHYS(next_table); |
| LTRACEF("L: %d free pt v %#" PRIxPTR " phys %#" PRIxPTR "\n", |
| PageTable::level, (uintptr_t)next_table, ptable_phys); |
| |
| UnmapEntry<PageTable>(this, new_cursor->vaddr, e); |
| vm_page_t* page = paddr_to_vm_page(ptable_phys); |
| |
| DEBUG_ASSERT(page); |
| DEBUG_ASSERT_MSG(page->state == VM_PAGE_STATE_MMU, |
| "page %p state %u, paddr %#" PRIxPTR "\n", page, page->state, |
| X86_VIRT_TO_PHYS(next_table)); |
| |
| pmm_free_page(page); |
| pt_pages_--; |
| unmapped = true; |
| } |
| *new_cursor = cursor; |
| DEBUG_ASSERT(new_cursor->size <= start_cursor.size); |
| |
| DEBUG_ASSERT(new_cursor->size == 0 || PageTable::page_aligned(new_cursor->vaddr)); |
| } |
| |
| return unmapped; |
| } |
| |
| template <> |
| bool X86ArchVmAspace::RemoveMapping<PageTable<PT_L>>(volatile pt_entry_t* table, |
| const MappingCursor& start_cursor, |
| MappingCursor* new_cursor) { |
| return RemoveMappingL0<PageTable<PT_L>>(table, start_cursor, new_cursor); |
| } |
| |
| template <> |
| bool X86ArchVmAspace::RemoveMapping<ExtendedPageTable<PT_L>>(volatile pt_entry_t* table, |
| const MappingCursor& start_cursor, |
| MappingCursor* new_cursor) { |
| return RemoveMappingL0<ExtendedPageTable<PT_L>>(table, start_cursor, new_cursor); |
| } |
| |
| // Base case of RemoveMapping for smallest page size. |
| template <typename PageTable> |
| bool X86ArchVmAspace::RemoveMappingL0(volatile pt_entry_t* table, |
| const MappingCursor& start_cursor, |
| MappingCursor* new_cursor) { |
| static_assert(PageTable::level == PT_L, "RemoveMappingL0 used with wrong level"); |
| LTRACEF("%016" PRIxPTR " %016zx\n", start_cursor.vaddr, start_cursor.size); |
| DEBUG_ASSERT(IS_PAGE_ALIGNED(start_cursor.size)); |
| |
| *new_cursor = start_cursor; |
| |
| bool unmapped = false; |
| uint index = PageTable::vaddr_to_index(new_cursor->vaddr); |
| for (; index != NO_OF_PT_ENTRIES && new_cursor->size != 0; ++index) { |
| volatile pt_entry_t* e = table + index; |
| if (IS_PAGE_PRESENT(*e)) { |
| UnmapEntry<PageTable>(this, new_cursor->vaddr, e); |
| unmapped = true; |
| } |
| |
| new_cursor->vaddr += PAGE_SIZE; |
| new_cursor->size -= PAGE_SIZE; |
| DEBUG_ASSERT(new_cursor->size <= start_cursor.size); |
| } |
| return unmapped; |
| } |
| |
| /** |
| * @brief Creates mappings for the range specified by start_cursor |
| * |
| * Level must be MAX_PAGING_LEVEL when invoked. |
| * |
| * @param table The top-level paging structure's virtual address. |
| * @param start_cursor A cursor describing the range of address space to |
| * act on within table |
| * @param new_cursor A returned cursor describing how much work was not |
| * completed. Must be non-null. |
| * |
| * @return ZX_OK if successful |
| * @return ZX_ERR_ALREADY_EXISTS if the range overlaps an existing mapping |
| * @return ZX_ERR_NO_MEMORY if intermediate page tables could not be allocated |
| */ |
| template <typename PageTable> |
| status_t X86ArchVmAspace::AddMapping(volatile pt_entry_t* table, uint mmu_flags, |
| const MappingCursor& start_cursor, |
| MappingCursor* new_cursor) { |
| DEBUG_ASSERT(table); |
| DEBUG_ASSERT(x86_mmu_check_vaddr(start_cursor.vaddr)); |
| DEBUG_ASSERT(x86_mmu_check_paddr(start_cursor.paddr)); |
| |
| status_t ret = ZX_OK; |
| *new_cursor = start_cursor; |
| |
| arch_flags_t interm_arch_flags = PageTable::intermediate_arch_flags(); |
| arch_flags_t arch_flags = PageTable::arch_flags(flags_, mmu_flags); |
| |
| size_t ps = PageTable::page_size(); |
| bool level_supports_large_pages = PageTable::supports_page_size(); |
| uint index = PageTable::vaddr_to_index(new_cursor->vaddr); |
| for (; index != NO_OF_PT_ENTRIES && new_cursor->size != 0; ++index) { |
| volatile pt_entry_t* e = table + index; |
| pt_entry_t pt_val = *e; |
| // See if there's a large page in our way |
| if (IS_PAGE_PRESENT(pt_val) && IS_LARGE_PAGE(pt_val)) { |
| ret = ZX_ERR_ALREADY_EXISTS; |
| goto err; |
| } |
| |
| // Check if this is a candidate for a new large page |
| bool level_valigned = PageTable::page_aligned(new_cursor->vaddr); |
| bool level_paligned = PageTable::page_aligned(new_cursor->paddr); |
| if (level_supports_large_pages && !IS_PAGE_PRESENT(pt_val) && level_valigned && |
| level_paligned && new_cursor->size >= ps) { |
| |
| UpdateEntry<PageTable>(new_cursor->vaddr, table + index, |
| new_cursor->paddr, |
| arch_flags | X86_MMU_PG_PS); |
| |
| new_cursor->paddr += ps; |
| new_cursor->vaddr += ps; |
| new_cursor->size -= ps; |
| DEBUG_ASSERT(new_cursor->size <= start_cursor.size); |
| } else { |
| // See if we need to create a new table |
| if (!IS_PAGE_PRESENT(pt_val)) { |
| volatile pt_entry_t* m = _map_alloc_page(); |
| if (m == nullptr) { |
| ret = ZX_ERR_NO_MEMORY; |
| goto err; |
| } |
| |
| LTRACEF_LEVEL(2, "new table %p at level %d\n", m, PageTable::level); |
| |
| UpdateEntry<PageTable>(new_cursor->vaddr, e, |
| X86_VIRT_TO_PHYS(m), interm_arch_flags); |
| pt_val = *e; |
| pt_pages_++; |
| } |
| |
| MappingCursor cursor; |
| ret = AddMapping<typename PageTable::LowerTable>( |
| get_next_table_from_entry(pt_val), mmu_flags, *new_cursor, &cursor); |
| *new_cursor = cursor; |
| DEBUG_ASSERT(new_cursor->size <= start_cursor.size); |
| if (ret != ZX_OK) { |
| goto err; |
| } |
| } |
| } |
| return ZX_OK; |
| err: |
| if (fbl::is_same<PageTable, typename PageTable::TopTable>::value) { |
| MappingCursor cursor = start_cursor; |
| MappingCursor result; |
| // new_cursor->size should be how much is left to be mapped still |
| cursor.size -= new_cursor->size; |
| if (cursor.size > 0) { |
| RemoveMapping<typename PageTable::TopTable>(table, cursor, &result); |
| DEBUG_ASSERT(result.size == 0); |
| } |
| } |
| return ret; |
| } |
| |
| template <> |
| status_t X86ArchVmAspace::AddMapping<PageTable<PT_L>>( |
| volatile pt_entry_t* table, uint mmu_flags, |
| const MappingCursor& start_cursor, MappingCursor* new_cursor) { |
| return AddMappingL0<PageTable<PT_L>>(table, mmu_flags, start_cursor, |
| new_cursor); |
| } |
| |
| template <> |
| status_t X86ArchVmAspace::AddMapping<ExtendedPageTable<PT_L>>( |
| volatile pt_entry_t* table, uint mmu_flags, |
| const MappingCursor& start_cursor, MappingCursor* new_cursor) { |
| return AddMappingL0<ExtendedPageTable<PT_L>>(table, mmu_flags, start_cursor, |
| new_cursor); |
| } |
| |
| // Base case of AddMapping for smallest page size. |
| template <typename PageTable> |
| status_t X86ArchVmAspace::AddMappingL0(volatile pt_entry_t* table, uint mmu_flags, |
| const MappingCursor& start_cursor, |
| MappingCursor* new_cursor) { |
| static_assert(PageTable::level == PT_L, "AddMappingL0 used with wrong level"); |
| DEBUG_ASSERT(IS_PAGE_ALIGNED(start_cursor.size)); |
| |
| *new_cursor = start_cursor; |
| |
| arch_flags_t arch_flags = PageTable::arch_flags(flags_, mmu_flags); |
| |
| uint index = PageTable::vaddr_to_index(new_cursor->vaddr); |
| for (; index != NO_OF_PT_ENTRIES && new_cursor->size != 0; ++index) { |
| volatile pt_entry_t* e = table + index; |
| if (IS_PAGE_PRESENT(*e)) { |
| return ZX_ERR_ALREADY_EXISTS; |
| } |
| |
| UpdateEntry<PageTable>(new_cursor->vaddr, e, new_cursor->paddr, arch_flags); |
| |
| new_cursor->paddr += PAGE_SIZE; |
| new_cursor->vaddr += PAGE_SIZE; |
| new_cursor->size -= PAGE_SIZE; |
| DEBUG_ASSERT(new_cursor->size <= start_cursor.size); |
| } |
| |
| return ZX_OK; |
| } |
| |
| /** |
| * @brief Changes the permissions/caching of the range specified by start_cursor |
| * |
| * Level must be MAX_PAGING_LEVEL when invoked. |
| * |
| * @param table The top-level paging structure's virtual address. |
| * @param start_cursor A cursor describing the range of address space to |
| * act on within table |
| * @param new_cursor A returned cursor describing how much work was not |
| * completed. Must be non-null. |
| */ |
| template <typename PageTable> |
| status_t X86ArchVmAspace::UpdateMapping(volatile pt_entry_t* table, |
| uint mmu_flags, |
| const MappingCursor& start_cursor, |
| MappingCursor* new_cursor) { |
| DEBUG_ASSERT(table); |
| LTRACEF("L: %d, %016" PRIxPTR " %016zx\n", PageTable::level, start_cursor.vaddr, |
| start_cursor.size); |
| DEBUG_ASSERT(x86_mmu_check_vaddr(start_cursor.vaddr)); |
| |
| status_t ret = ZX_OK; |
| *new_cursor = start_cursor; |
| |
| arch_flags_t arch_flags = PageTable::arch_flags(flags_, mmu_flags); |
| |
| size_t ps = PageTable::page_size(); |
| uint index = PageTable::vaddr_to_index(new_cursor->vaddr); |
| for (; index != NO_OF_PT_ENTRIES && new_cursor->size != 0; ++index) { |
| volatile pt_entry_t* e = table + index; |
| pt_entry_t pt_val = *e; |
| // Skip unmapped pages (we may encounter these due to demand paging) |
| if (!IS_PAGE_PRESENT(pt_val)) { |
| new_cursor->SkipEntry<PageTable>(); |
| continue; |
| } |
| |
| if (IS_LARGE_PAGE(pt_val)) { |
| bool vaddr_level_aligned = PageTable::page_aligned(new_cursor->vaddr); |
| // If the request covers the entire large page, just change the |
| // permissions |
| if (vaddr_level_aligned && new_cursor->size >= ps) { |
| UpdateEntry<PageTable>(new_cursor->vaddr, e, |
| PageTable::paddr_from_pte(pt_val), |
| arch_flags | X86_MMU_PG_PS); |
| |
| new_cursor->vaddr += ps; |
| new_cursor->size -= ps; |
| DEBUG_ASSERT(new_cursor->size <= start_cursor.size); |
| continue; |
| } |
| // Otherwise, we need to split it |
| vaddr_t page_vaddr = new_cursor->vaddr & ~(ps - 1); |
| ret = SplitLargePage<PageTable>(page_vaddr, e); |
| if (ret != ZX_OK) { |
| // If we failed to split the table, just unmap it. Subsequent |
| // page faults will bring it back in. |
| MappingCursor cursor; |
| cursor.vaddr = new_cursor->vaddr; |
| cursor.size = ps; |
| |
| MappingCursor tmp_cursor; |
| RemoveMapping<PageTable>(table, cursor, &tmp_cursor); |
| |
| new_cursor->SkipEntry<PageTable>(); |
| } |
| pt_val = *e; |
| } |
| |
| MappingCursor cursor; |
| volatile pt_entry_t* next_table = get_next_table_from_entry(pt_val); |
| ret = UpdateMapping<typename PageTable::LowerTable>(next_table, mmu_flags, |
| *new_cursor, &cursor); |
| *new_cursor = cursor; |
| if (ret != ZX_OK) { |
| // Currently this can't happen |
| ASSERT(false); |
| } |
| DEBUG_ASSERT(new_cursor->size <= start_cursor.size); |
| |
| DEBUG_ASSERT(new_cursor->size == 0 || PageTable::page_aligned(new_cursor->vaddr)); |
| } |
| return ZX_OK; |
| } |
| |
| template <> |
| status_t X86ArchVmAspace::UpdateMapping<PageTable<PT_L>>( |
| volatile pt_entry_t* table, uint mmu_flags, |
| const MappingCursor& start_cursor, MappingCursor* new_cursor) { |
| return UpdateMappingL0<PageTable<PT_L>>(table, mmu_flags, |
| start_cursor, new_cursor); |
| } |
| |
| template <> |
| status_t X86ArchVmAspace::UpdateMapping<ExtendedPageTable<PT_L>>( |
| volatile pt_entry_t* table, uint mmu_flags, |
| const MappingCursor& start_cursor, MappingCursor* new_cursor) { |
| return UpdateMappingL0<ExtendedPageTable<PT_L>>(table, mmu_flags, |
| start_cursor, new_cursor); |
| } |
| |
| // Base case of UpdateMapping for smallest page size. |
| template <typename PageTable> |
| status_t X86ArchVmAspace::UpdateMappingL0(volatile pt_entry_t* table, |
| uint mmu_flags, |
| const MappingCursor& start_cursor, |
| MappingCursor* new_cursor) { |
| static_assert(PageTable::level == PT_L, "UpdateMappingL0 used with wrong level"); |
| LTRACEF("%016" PRIxPTR " %016zx\n", start_cursor.vaddr, start_cursor.size); |
| DEBUG_ASSERT(IS_PAGE_ALIGNED(start_cursor.size)); |
| |
| *new_cursor = start_cursor; |
| |
| arch_flags_t arch_flags = PageTable::arch_flags(flags_, mmu_flags); |
| |
| uint index = PageTable::vaddr_to_index(new_cursor->vaddr); |
| for (; index != NO_OF_PT_ENTRIES && new_cursor->size != 0; ++index) { |
| volatile pt_entry_t* e = table + index; |
| pt_entry_t pt_val = *e; |
| // Skip unmapped pages (we may encounter these due to demand paging) |
| if (IS_PAGE_PRESENT(pt_val)) { |
| UpdateEntry<PageTable>(new_cursor->vaddr, e, |
| PageTable::paddr_from_pte(pt_val), |
| arch_flags); |
| } |
| |
| new_cursor->vaddr += PAGE_SIZE; |
| new_cursor->size -= PAGE_SIZE; |
| DEBUG_ASSERT(new_cursor->size <= start_cursor.size); |
| } |
| DEBUG_ASSERT(new_cursor->size == 0 || PageTable::page_aligned(new_cursor->vaddr)); |
| return ZX_OK; |
| } |
| |
| template <template <int> class PageTable> |
| status_t X86ArchVmAspace::UnmapPages(vaddr_t vaddr, const size_t count, |
| size_t* unmapped) { |
| LTRACEF("aspace %p, vaddr %#" PRIxPTR ", count %#zx\n", this, vaddr, count); |
| |
| canary_.Assert(); |
| |
| if (!x86_mmu_check_vaddr(vaddr)) |
| return ZX_ERR_INVALID_ARGS; |
| if (!IsValidVaddr(vaddr)) |
| return ZX_ERR_INVALID_ARGS; |
| if (count == 0) |
| return ZX_OK; |
| |
| DEBUG_ASSERT(pt_virt_); |
| |
| MappingCursor start = { |
| .paddr = 0, .vaddr = vaddr, .size = count * PAGE_SIZE, |
| }; |
| |
| MappingCursor result; |
| RemoveMapping<PageTable<MAX_PAGING_LEVEL>>(pt_virt_, start, &result); |
| DEBUG_ASSERT(result.size == 0); |
| |
| if (unmapped) |
| *unmapped = count; |
| |
| return ZX_OK; |
| } |
| |
| status_t X86ArchVmAspace::Unmap(vaddr_t vaddr, size_t count, size_t* unmapped) { |
| fbl::AutoLock a(&lock_); |
| |
| if (flags_ & ARCH_ASPACE_FLAG_GUEST_PASPACE) { |
| return UnmapPages<ExtendedPageTable>(vaddr, count, unmapped); |
| } else { |
| return UnmapPages<PageTable>(vaddr, count, unmapped); |
| } |
| } |
| |
| template <template <int> class PageTable> |
| status_t X86ArchVmAspace::MapPages(vaddr_t vaddr, paddr_t paddr, |
| const size_t count, uint mmu_flags, |
| size_t* mapped) { |
| canary_.Assert(); |
| |
| LTRACEF("aspace %p, vaddr %#" PRIxPTR " paddr %#" PRIxPTR " count %#zx mmu_flags 0x%x\n", |
| this, vaddr, paddr, count, mmu_flags); |
| |
| if ((!x86_mmu_check_paddr(paddr))) |
| return ZX_ERR_INVALID_ARGS; |
| if (!x86_mmu_check_vaddr(vaddr)) |
| return ZX_ERR_INVALID_ARGS; |
| if (!IsValidVaddr(vaddr)) |
| return ZX_ERR_INVALID_ARGS; |
| if (count == 0) |
| return ZX_OK; |
| |
| if (!(mmu_flags & ARCH_MMU_FLAG_PERM_READ)) |
| return ZX_ERR_INVALID_ARGS; |
| |
| DEBUG_ASSERT(pt_virt_); |
| |
| MappingCursor start = { |
| .paddr = paddr, .vaddr = vaddr, .size = count * PAGE_SIZE, |
| }; |
| MappingCursor result; |
| status_t status = AddMapping<PageTable<MAX_PAGING_LEVEL>>(pt_virt_, mmu_flags, |
| start, &result); |
| if (status != ZX_OK) { |
| dprintf(SPEW, "Add mapping failed with err=%d\n", status); |
| return status; |
| } |
| DEBUG_ASSERT(result.size == 0); |
| |
| if (mapped) |
| *mapped = count; |
| |
| return ZX_OK; |
| } |
| |
| status_t X86ArchVmAspace::Map(vaddr_t vaddr, paddr_t paddr, size_t count, |
| uint mmu_flags, size_t* mapped) { |
| fbl::AutoLock a(&lock_); |
| |
| if (flags_ & ARCH_ASPACE_FLAG_GUEST_PASPACE) { |
| if (mmu_flags & ~kValidEptFlags) |
| return ZX_ERR_INVALID_ARGS; |
| return MapPages<ExtendedPageTable>(vaddr, paddr, count, mmu_flags, mapped); |
| } else { |
| return MapPages<PageTable>(vaddr, paddr, count, mmu_flags, mapped); |
| } |
| } |
| |
| template <template <int> class PageTable> |
| status_t X86ArchVmAspace::ProtectPages(vaddr_t vaddr, size_t count, uint mmu_flags) { |
| canary_.Assert(); |
| |
| LTRACEF("aspace %p, vaddr %#" PRIxPTR " count %#zx mmu_flags 0x%x\n", |
| this, vaddr, count, mmu_flags); |
| |
| if (!x86_mmu_check_vaddr(vaddr)) |
| return ZX_ERR_INVALID_ARGS; |
| if (!IsValidVaddr(vaddr)) |
| return ZX_ERR_INVALID_ARGS; |
| if (count == 0) |
| return ZX_OK; |
| |
| if (!(mmu_flags & ARCH_MMU_FLAG_PERM_READ)) |
| return ZX_ERR_INVALID_ARGS; |
| |
| MappingCursor start = { |
| .paddr = 0, .vaddr = vaddr, .size = count * PAGE_SIZE, |
| }; |
| MappingCursor result; |
| status_t status = UpdateMapping<PageTable<MAX_PAGING_LEVEL>>( |
| pt_virt_, mmu_flags, start, &result); |
| if (status != ZX_OK) { |
| return status; |
| } |
| DEBUG_ASSERT(result.size == 0); |
| return ZX_OK; |
| } |
| |
| status_t X86ArchVmAspace::Protect(vaddr_t vaddr, size_t count, uint mmu_flags) { |
| fbl::AutoLock a(&lock_); |
| |
| if (flags_ & ARCH_ASPACE_FLAG_GUEST_PASPACE) { |
| if (mmu_flags & ~kValidEptFlags) |
| return ZX_ERR_INVALID_ARGS; |
| return ProtectPages<ExtendedPageTable>(vaddr, count, mmu_flags); |
| } else { |
| return ProtectPages<PageTable>(vaddr, count, mmu_flags); |
| } |
| } |
| |
| void x86_mmu_early_init() { |
| x86_mmu_mem_type_init(); |
| x86_mmu_percpu_init(); |
| |
| // Unmap the lower identity mapping. |
| X86ArchVmAspace::UnmapEntry<PageTable<PML4_L>>(nullptr, 0, &pml4[0]); |
| |
| /* get the address width from the CPU */ |
| uint8_t vaddr_width = x86_linear_address_width(); |
| uint8_t paddr_width = x86_physical_address_width(); |
| |
| supports_huge_pages = x86_feature_test(X86_FEATURE_HUGE_PAGE); |
| |
| /* if we got something meaningful, override the defaults. |
| * some combinations of cpu on certain emulators seems to return |
| * nonsense paddr widths (1), so trim it. */ |
| if (paddr_width > g_paddr_width) |
| g_paddr_width = paddr_width; |
| if (vaddr_width > g_vaddr_width) |
| g_vaddr_width = vaddr_width; |
| |
| LTRACEF("paddr_width %u vaddr_width %u\n", g_paddr_width, g_vaddr_width); |
| } |
| |
| void x86_mmu_init(void) {} |
| |
| X86ArchVmAspace::X86ArchVmAspace() {} |
| |
| /* |
| * Fill in the high level x86 arch aspace structure and allocating a top level page table. |
| */ |
| status_t X86ArchVmAspace::Init(vaddr_t base, size_t size, uint mmu_flags) { |
| static_assert(sizeof(mp_cpu_mask_t) == sizeof(active_cpus_), "err"); |
| canary_.Assert(); |
| |
| fbl::AutoLock a(&lock_); |
| |
| LTRACEF("aspace %p, base %#" PRIxPTR ", size 0x%zx, mmu_flags 0x%x\n", this, base, size, |
| mmu_flags); |
| |
| flags_ = mmu_flags; |
| base_ = base; |
| size_ = size; |
| if (mmu_flags & ARCH_ASPACE_FLAG_KERNEL) { |
| pt_phys_ = kernel_pt_phys; |
| pt_virt_ = (pt_entry_t*)X86_PHYS_TO_VIRT(pt_phys_); |
| LTRACEF("kernel aspace: pt phys %#" PRIxPTR ", virt %p\n", pt_phys_, pt_virt_); |
| } else if (mmu_flags & ARCH_ASPACE_FLAG_GUEST_PASPACE) { |
| vm_page_t* p = pmm_alloc_page(0, &pt_phys_); |
| if (p == nullptr) { |
| TRACEF("error allocating top level page directory\n"); |
| return ZX_ERR_NO_MEMORY; |
| } |
| p->state = VM_PAGE_STATE_MMU; |
| pt_virt_ = static_cast<pt_entry_t*>(paddr_to_kvaddr(pt_phys_)); |
| memset(pt_virt_, 0, sizeof(pt_entry_t) * NO_OF_PT_ENTRIES); |
| LTRACEF("guest paspace: pt phys %#" PRIxPTR ", virt %p\n", pt_phys_, pt_virt_); |
| } else { |
| /* allocate a top level page table for the new address space */ |
| paddr_t pa; |
| vm_page_t* p; |
| pt_virt_ = (pt_entry_t*)pmm_alloc_kpage(&pa, &p); |
| if (!pt_virt_) { |
| TRACEF("error allocating top level page directory\n"); |
| return ZX_ERR_NO_MEMORY; |
| } |
| pt_phys_ = pa; |
| |
| p->state = VM_PAGE_STATE_MMU; |
| |
| // Zero out the user space half of it. |
| memset(pt_virt_, 0, sizeof(pt_entry_t) * NO_OF_PT_ENTRIES / 2); |
| |
| // Copy the kernel portion of it from the master kernel pt. |
| memcpy(pt_virt_ + NO_OF_PT_ENTRIES / 2, |
| const_cast<pt_entry_t*>(&KERNEL_PT[NO_OF_PT_ENTRIES / 2]), |
| sizeof(pt_entry_t) * NO_OF_PT_ENTRIES / 2); |
| |
| LTRACEF("user aspace: pt phys %#" PRIxPTR ", virt %p\n", pt_phys_, pt_virt_); |
| } |
| pt_pages_ = 1; |
| active_cpus_ = 0; |
| |
| return ZX_OK; |
| } |
| |
| template <template <int> class PageTable> |
| status_t X86ArchVmAspace::DestroyAspace() { |
| canary_.Assert(); |
| DEBUG_ASSERT(active_cpus_ == 0); |
| |
| #if LK_DEBUGLEVEL > 1 |
| pt_entry_t* table = static_cast<pt_entry_t*>(pt_virt_); |
| uint start = PageTable<MAX_PAGING_LEVEL>::vaddr_to_index(base_); |
| uint end = PageTable<MAX_PAGING_LEVEL>::vaddr_to_index(base_ + size_ - 1); |
| |
| // Don't check start if that table is shared with another aspace. |
| if (!PageTable<MAX_PAGING_LEVEL>::page_aligned(base_)) { |
| start += 1; |
| } |
| // Do check the end if it fills out the table entry. |
| if (PageTable<MAX_PAGING_LEVEL>::page_aligned(base_ + size_)) { |
| end += 1; |
| } |
| |
| for (uint i = start; i < end; ++i) { |
| DEBUG_ASSERT(!IS_PAGE_PRESENT(table[i])); |
| } |
| #endif |
| |
| pmm_free_page(paddr_to_vm_page(pt_phys_)); |
| pt_phys_ = 0; |
| |
| return ZX_OK; |
| } |
| |
| status_t X86ArchVmAspace::Destroy() { |
| fbl::AutoLock a(&lock_); |
| |
| if (flags_ & ARCH_ASPACE_FLAG_GUEST_PASPACE) |
| return DestroyAspace<ExtendedPageTable>(); |
| else |
| return DestroyAspace<PageTable>(); |
| } |
| |
| void X86ArchVmAspace::ContextSwitch(X86ArchVmAspace* old_aspace, X86ArchVmAspace* aspace) { |
| mp_cpu_mask_t cpu_bit = 1U << arch_curr_cpu_num(); |
| if (aspace != nullptr) { |
| aspace->canary_.Assert(); |
| LTRACEF_LEVEL(3, "switching to aspace %p, pt %#" PRIXPTR "\n", aspace, aspace->pt_phys_); |
| x86_set_cr3(aspace->pt_phys_); |
| |
| if (old_aspace != nullptr) { |
| atomic_and(&old_aspace->active_cpus_, ~cpu_bit); |
| } |
| atomic_or(&aspace->active_cpus_, cpu_bit); |
| } else { |
| LTRACEF_LEVEL(3, "switching to kernel aspace, pt %#" PRIxPTR "\n", kernel_pt_phys); |
| x86_set_cr3(kernel_pt_phys); |
| if (old_aspace != nullptr) { |
| atomic_and(&old_aspace->active_cpus_, ~cpu_bit); |
| } |
| } |
| |
| // Cleanup io bitmap entries from previous thread. |
| if (old_aspace) |
| x86_clear_tss_io_bitmap(old_aspace->io_bitmap()); |
| |
| // Set the io bitmap for this thread. |
| if (aspace) |
| x86_set_tss_io_bitmap(aspace->io_bitmap()); |
| } |
| |
| template <template <int> class PageTable, typename F> |
| status_t X86ArchVmAspace::QueryVaddr(vaddr_t vaddr, paddr_t* paddr, |
| uint* mmu_flags, F arch_to_mmu) { |
| canary_.Assert(); |
| |
| page_table_levels ret_level; |
| |
| LTRACEF("aspace %p, vaddr %#" PRIxPTR ", paddr %p, mmu_flags %p\n", this, vaddr, paddr, |
| mmu_flags); |
| |
| if (!IsValidVaddr(vaddr)) |
| return ZX_ERR_INVALID_ARGS; |
| |
| volatile pt_entry_t* last_valid_entry; |
| status_t status = GetMapping<PageTable<MAX_PAGING_LEVEL>>( |
| pt_virt_, vaddr, &ret_level, &last_valid_entry); |
| if (status != ZX_OK) |
| return status; |
| |
| DEBUG_ASSERT(last_valid_entry); |
| LTRACEF("last_valid_entry (%p) 0x%" PRIxPTE ", level %d\n", last_valid_entry, *last_valid_entry, |
| ret_level); |
| |
| /* based on the return level, parse the page table entry */ |
| if (paddr) { |
| switch (ret_level) { |
| case PDP_L: /* 1GB page */ |
| *paddr = PageTable<PDP_L>::paddr_from_pte(*last_valid_entry); |
| *paddr |= vaddr & PAGE_OFFSET_MASK_HUGE; |
| break; |
| case PD_L: /* 2MB page */ |
| *paddr = PageTable<PD_L>::paddr_from_pte(*last_valid_entry); |
| *paddr |= vaddr & PAGE_OFFSET_MASK_LARGE; |
| break; |
| case PT_L: /* 4K page */ |
| *paddr = PageTable<PT_L>::paddr_from_pte(*last_valid_entry); |
| *paddr |= vaddr & PAGE_OFFSET_MASK_4KB; |
| break; |
| default: |
| panic("arch_mmu_query: unhandled frame level\n"); |
| } |
| |
| LTRACEF("paddr %#" PRIxPTR "\n", *paddr); |
| } |
| |
| /* converting arch-specific flags to mmu flags */ |
| if (mmu_flags) { |
| *mmu_flags = arch_to_mmu(*last_valid_entry, ret_level); |
| } |
| |
| return ZX_OK; |
| } |
| |
| status_t X86ArchVmAspace::Query(vaddr_t vaddr, paddr_t* paddr, uint* mmu_flags) { |
| fbl::AutoLock a(&lock_); |
| |
| if (flags_ & ARCH_ASPACE_FLAG_GUEST_PASPACE) { |
| return QueryVaddr<ExtendedPageTable>(vaddr, paddr, mmu_flags, |
| ept_mmu_flags); |
| } else { |
| return QueryVaddr<PageTable>(vaddr, paddr, mmu_flags, |
| x86_mmu_flags); |
| } |
| } |
| |
| void x86_mmu_percpu_init(void) { |
| ulong cr0 = x86_get_cr0(); |
| /* Set write protect bit in CR0*/ |
| cr0 |= X86_CR0_WP; |
| // Clear Cache disable/not write-through bits |
| cr0 &= ~(X86_CR0_NW | X86_CR0_CD); |
| x86_set_cr0(cr0); |
| |
| /* Setting the SMEP & SMAP bit in CR4 */ |
| ulong cr4 = x86_get_cr4(); |
| if (x86_feature_test(X86_FEATURE_SMEP)) |
| cr4 |= X86_CR4_SMEP; |
| if (x86_feature_test(X86_FEATURE_SMAP)) |
| cr4 |= X86_CR4_SMAP; |
| x86_set_cr4(cr4); |
| |
| // Set NXE bit in X86_MSR_IA32_EFER. |
| uint64_t efer_msr = read_msr(X86_MSR_IA32_EFER); |
| efer_msr |= X86_EFER_NXE; |
| write_msr(X86_MSR_IA32_EFER, efer_msr); |
| } |
| |
| X86ArchVmAspace::~X86ArchVmAspace() { |
| // TODO(MG-980): check that we've destroyed the aspace. |
| } |
| |
| vaddr_t X86ArchVmAspace::PickSpot(vaddr_t base, uint prev_region_mmu_flags, |
| vaddr_t end, uint next_region_mmu_flags, |
| vaddr_t align, size_t size, uint mmu_flags) { |
| canary_.Assert(); |
| return PAGE_ALIGN(base); |
| } |