| // Copyright 2017 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include <hypervisor/guest.h> |
| |
| #include <fcntl.h> |
| #include <limits.h> |
| #include <string.h> |
| #include <unistd.h> |
| |
| #include <fbl/alloc_checker.h> |
| #include <fbl/type_support.h> |
| #include <zircon/device/sysinfo.h> |
| #include <zircon/process.h> |
| #include <zircon/syscalls.h> |
| #include <zircon/syscalls/hypervisor.h> |
| #include <zircon/syscalls/port.h> |
| #include <zircon/threads.h> |
| |
| static const char kResourcePath[] = "/dev/misc/sysinfo"; |
| |
| static const uint32_t kE820Ram = 1; |
| static const uint32_t kE820Reserved = 2; |
| |
| // Number of threads reading from the async device port. |
| static const size_t kNumAsyncWorkers = 1; |
| |
| static const size_t kMaxSize = 512ull << 30; |
| static const size_t kMinSize = 4 * (4 << 10); |
| |
| // clang-format off |
| |
| static const uint64_t kAddr32kb = 0x0000000000008000; |
| static const uint64_t kAddr64kb = 0x0000000000010000; |
| static const uint64_t kAddr1mb = 0x0000000000100000; |
| static const uint64_t kAddr3500mb = 0x00000000e0000000; |
| static const uint64_t kAddr4000mb = 0x0000000100000000; |
| |
| // clang-format on |
| |
| static zx_status_t guest_get_resource(zx_handle_t* resource) { |
| int fd = open(kResourcePath, O_RDWR); |
| if (fd < 0) |
| return ZX_ERR_IO; |
| ssize_t n = ioctl_sysinfo_get_hypervisor_resource(fd, resource); |
| close(fd); |
| return n < 0 ? ZX_ERR_IO : ZX_OK; |
| } |
| |
| zx_status_t Guest::Init(size_t mem_size) { |
| zx_status_t status = phys_mem_.Init(mem_size); |
| if (status != ZX_OK) { |
| fprintf(stderr, "Failed to create guest physical memory.\n"); |
| return status; |
| } |
| |
| zx_handle_t resource; |
| status = guest_get_resource(&resource); |
| if (status != ZX_OK) { |
| fprintf(stderr, "Failed to get hypervisor resource.\n"); |
| return status; |
| } |
| |
| status = zx_guest_create(resource, 0, phys_mem_.vmo(), &guest_); |
| if (status != ZX_OK) { |
| fprintf(stderr, "Failed to create guest.\n"); |
| return status; |
| } |
| zx_handle_close(resource); |
| |
| status = zx::port::create(0, &port_); |
| if (status != ZX_OK) { |
| fprintf(stderr, "Failed to create port.\n"); |
| return status; |
| } |
| |
| for (size_t i = 0; i < kNumAsyncWorkers; ++i) { |
| thrd_t thread; |
| auto thread_func = +[](void* arg) { return static_cast<Guest*>(arg)->IoThread(); }; |
| int ret = thrd_create_with_name(&thread, thread_func, this, "io-handler"); |
| if (ret != thrd_success) { |
| fprintf(stderr, "Failed to create io handler thread: %d\n", ret); |
| return ZX_ERR_INTERNAL; |
| } |
| |
| ret = thrd_detach(thread); |
| if (ret != thrd_success) { |
| fprintf(stderr, "Failed to detach io handler thread: %d\n", ret); |
| return ZX_ERR_INTERNAL; |
| } |
| } |
| |
| return ZX_OK; |
| } |
| |
| Guest::~Guest() { |
| zx_handle_close(guest_); |
| } |
| |
| zx_status_t Guest::IoThread() { |
| while (true) { |
| zx_port_packet_t packet; |
| zx_status_t status = port_.wait(ZX_TIME_INFINITE, &packet, 0); |
| if (status != ZX_OK) { |
| fprintf(stderr, "Failed to wait for device port %d\n", status); |
| break; |
| } |
| |
| uint64_t addr; |
| IoValue value; |
| switch (packet.type) { |
| case ZX_PKT_TYPE_GUEST_IO: |
| addr = packet.guest_io.port; |
| value.access_size = packet.guest_io.access_size; |
| static_assert(sizeof(value.data) >= sizeof(packet.guest_io.data), |
| "IoValue too small to contain zx_packet_guest_io_t."); |
| memcpy(value.data, packet.guest_io.data, sizeof(packet.guest_io.data)); |
| break; |
| case ZX_PKT_TYPE_GUEST_BELL: |
| addr = packet.guest_bell.addr; |
| value.access_size = 0; |
| value.u32 = 0; |
| break; |
| default: |
| return ZX_ERR_NOT_SUPPORTED; |
| } |
| |
| status = trap_key_to_mapping(packet.key)->Write(addr, value); |
| if (status != ZX_OK) { |
| fprintf(stderr, "Unable to handle packet for device %d\n", status); |
| break; |
| } |
| } |
| |
| return ZX_ERR_INTERNAL; |
| } |
| |
| static constexpr uint32_t trap_kind(TrapType type) { |
| switch (type) { |
| case TrapType::MMIO_SYNC: |
| return ZX_GUEST_TRAP_MEM; |
| case TrapType::MMIO_BELL: |
| return ZX_GUEST_TRAP_BELL; |
| case TrapType::PIO_SYNC: |
| case TrapType::PIO_ASYNC: |
| return ZX_GUEST_TRAP_IO; |
| default: |
| ZX_PANIC("Unhandled TrapType %d.\n", |
| static_cast<fbl::underlying_type<TrapType>::type>(type)); |
| return 0; |
| } |
| } |
| |
| static constexpr zx_handle_t get_trap_port(TrapType type, zx_handle_t port) { |
| switch (type) { |
| case TrapType::PIO_ASYNC: |
| case TrapType::MMIO_BELL: |
| return port; |
| case TrapType::PIO_SYNC: |
| case TrapType::MMIO_SYNC: |
| return ZX_HANDLE_INVALID; |
| default: |
| ZX_PANIC("Unhandled TrapType %d.\n", |
| static_cast<fbl::underlying_type<TrapType>::type>(type)); |
| return ZX_HANDLE_INVALID; |
| } |
| } |
| |
| zx_status_t Guest::CreateMapping(TrapType type, uint64_t addr, size_t size, uint64_t offset, |
| IoHandler* handler) { |
| fbl::AllocChecker ac; |
| auto mapping = fbl::make_unique_checked<IoMapping>(&ac, addr, size, offset, handler); |
| if (!ac.check()) |
| return ZX_ERR_NO_MEMORY; |
| |
| // Set a trap for the IO region. We set the 'key' to be the address of the |
| // mapping so that we get the pointer to the mapping provided to us in port |
| // packets. |
| zx_handle_t port = get_trap_port(type, port_.get()); |
| uint32_t kind = trap_kind(type); |
| uint64_t key = reinterpret_cast<uintptr_t>(mapping.get()); |
| zx_status_t status = zx_guest_set_trap(guest_, kind, addr, size, port, key); |
| if (status != ZX_OK) |
| return status; |
| |
| mappings_.push_front(fbl::move(mapping)); |
| return ZX_OK; |
| } |
| |
| #if __x86_64__ |
| enum { |
| X86_PTE_P = 0x01, /* P Valid */ |
| X86_PTE_RW = 0x02, /* R/W Read/Write */ |
| X86_PTE_PS = 0x80, /* PS Page size */ |
| }; |
| |
| static const size_t kPml4PageSize = 512ull << 30; |
| static const size_t kPdpPageSize = 1 << 30; |
| static const size_t kPdPageSize = 2 << 20; |
| static const size_t kPtPageSize = 4 << 10; |
| static const size_t kPtesPerPage = PAGE_SIZE / sizeof(uint64_t); |
| |
| /** |
| * Create all page tables for a given page size. |
| * |
| * @param addr The mapped address of where to write the page table. Must be page-aligned. |
| * @param size The size of memory to map. |
| * @param l1_page_size The size of pages at this level. |
| * @param l1_pte_off The offset of this page table, relative to the start of memory. |
| * @param aspace_off The address space offset, used to keep track of mapped address space. |
| * @param has_page Whether this level of the page table has associated pages. |
| * @param map_flags Flags added to any descriptors directly mapping pages. |
| */ |
| static uintptr_t page_table(uintptr_t addr, size_t size, size_t l1_page_size, uintptr_t l1_pte_off, |
| uint64_t* aspace_off, bool has_page, uint64_t map_flags) { |
| size_t l1_ptes = (size + l1_page_size - 1) / l1_page_size; |
| bool has_l0_aspace = size % l1_page_size != 0; |
| size_t l1_pages = (l1_ptes + kPtesPerPage - 1) / kPtesPerPage; |
| uintptr_t l0_pte_off = l1_pte_off + l1_pages * PAGE_SIZE; |
| |
| uint64_t* pt = (uint64_t*)(addr + l1_pte_off); |
| for (size_t i = 0; i < l1_ptes; i++) { |
| if (has_page && (!has_l0_aspace || i < l1_ptes - 1)) { |
| pt[i] = *aspace_off | X86_PTE_P | X86_PTE_RW | map_flags; |
| *aspace_off += l1_page_size; |
| } else { |
| if (i > 0 && (i % kPtesPerPage == 0)) |
| l0_pte_off += PAGE_SIZE; |
| pt[i] = l0_pte_off | X86_PTE_P | X86_PTE_RW; |
| } |
| } |
| |
| return l0_pte_off; |
| } |
| #endif // __x86_64__ |
| |
| zx_status_t guest_create_page_table(uintptr_t addr, size_t size, uintptr_t* end_off) { |
| if (size % PAGE_SIZE != 0) |
| return ZX_ERR_INVALID_ARGS; |
| if (size > kMaxSize || size < kMinSize) |
| return ZX_ERR_OUT_OF_RANGE; |
| |
| #if __x86_64__ |
| uint64_t aspace_off = 0; |
| *end_off = 0; |
| *end_off = page_table(addr, size - aspace_off, kPml4PageSize, *end_off, &aspace_off, false, 0); |
| *end_off = page_table(addr, size - aspace_off, kPdpPageSize, *end_off, &aspace_off, true, X86_PTE_PS); |
| *end_off = page_table(addr, size - aspace_off, kPdPageSize, *end_off, &aspace_off, true, X86_PTE_PS); |
| *end_off = page_table(addr, size - aspace_off, kPtPageSize, *end_off, &aspace_off, true, 0); |
| return ZX_OK; |
| #else // __x86_64__ |
| return ZX_ERR_NOT_SUPPORTED; |
| #endif // __x86_64__ |
| } |
| |
| size_t guest_e820_size(size_t size) { |
| return (size > kAddr4000mb ? 6 : 5) * sizeof(e820entry_t); |
| } |
| |
| zx_status_t guest_create_e820(uintptr_t addr, size_t size, uintptr_t e820_off) { |
| if (e820_off + guest_e820_size(size) > size) |
| return ZX_ERR_BUFFER_TOO_SMALL; |
| |
| e820entry_t* entry = (e820entry_t*)(addr + e820_off); |
| // 0 to 32kb is reserved. |
| entry[0].addr = 0; |
| entry[0].size = kAddr32kb; |
| entry[0].type = kE820Reserved; |
| // 32kb to to 64kb is available (for linux's real mode trampoline). |
| entry[1].addr = kAddr32kb; |
| entry[1].size = kAddr32kb; |
| entry[1].type = kE820Ram; |
| // 64kb to 1mb is reserved. |
| entry[2].addr = kAddr64kb; |
| entry[2].size = kAddr1mb - kAddr64kb; |
| entry[2].type = kE820Reserved; |
| // 1mb to min(size, 3500mb) is available. |
| entry[3].addr = kAddr1mb; |
| entry[3].size = (size < kAddr3500mb ? size : kAddr3500mb) - kAddr1mb; |
| entry[3].type = kE820Ram; |
| // 3500mb to 4000mb is reserved. |
| entry[4].addr = kAddr3500mb; |
| entry[4].size = kAddr4000mb - kAddr3500mb; |
| entry[4].type = kE820Reserved; |
| if (size > kAddr4000mb) { |
| // If size > 4000mb, then make that region available. |
| entry[5].addr = kAddr4000mb; |
| entry[5].size = size - kAddr4000mb; |
| entry[5].type = kE820Ram; |
| } |
| |
| return ZX_OK; |
| } |