| // Copyright 2016 The Fuchsia Authors |
| // Copyright (c) 2009 Corey Tabaka |
| // Copyright (c) 2015 Intel Corporation |
| // Copyright (c) 2016 Travis Geiselbrecht |
| // |
| // Use of this source code is governed by a MIT-style |
| // license that can be found in the LICENSE file or at |
| // https://opensource.org/licenses/MIT |
| |
| #include <err.h> |
| #include <trace.h> |
| #include <arch/ops.h> |
| #include <arch/x86/apic.h> |
| #include <arch/x86/cpu_topology.h> |
| #include <arch/x86/mmu.h> |
| #include <dev/pcie_bus_driver.h> |
| #include <dev/uart.h> |
| #include <platform.h> |
| #include <mexec.h> |
| #include "platform_p.h" |
| #include <platform/pc.h> |
| #include <platform/pc/acpi.h> |
| #include <platform/pc/bootloader.h> |
| #include <platform/pc/memory.h> |
| #include <platform/console.h> |
| #include <platform/keyboard.h> |
| #include <zircon/boot/bootdata.h> |
| #include <zircon/boot/multiboot.h> |
| #include <arch/mmu.h> |
| #include <arch/mp.h> |
| #include <arch/x86.h> |
| #include <malloc.h> |
| #include <string.h> |
| #include <assert.h> |
| #include <lk/init.h> |
| #include <kernel/cmdline.h> |
| #include <vm/initial_map.h> |
| #include <vm/pmm.h> |
| #include <vm/vm_aspace.h> |
| |
| #include <lib/cksum.h> |
| |
| extern "C" { |
| #include <efi/runtime-services.h> |
| #include <efi/system-table.h> |
| }; |
| |
| #define LOCAL_TRACE 0 |
| |
| // Set to 1 to do crc32 checks on boot data |
| // KERNEL_LL_DEBUG and a uart or early gfxconsole are necessary |
| // to see the output from this debug feature |
| #define DEBUG_BOOT_DATA 0 |
| |
| extern multiboot_info_t* _multiboot_info; |
| extern bootdata_t* _bootdata_base; |
| |
| pc_bootloader_info_t bootloader; |
| |
| struct mmu_initial_mapping mmu_initial_mappings[] = { |
| /* 64GB of memory mapped where the kernel lives */ |
| { |
| .phys = MEMBASE, |
| .virt = KERNEL_ASPACE_BASE, |
| .size = 64ULL*GB, /* x86-64 maps first 64GB by default */ |
| .flags = 0, |
| .name = "memory" |
| }, |
| /* KERNEL_SIZE of memory mapped where the kernel lives. |
| * On x86-64, this only sticks around until the VM is brought up, after |
| * that this will be replaced with mappings of the correct privileges. */ |
| { |
| .phys = MEMBASE, |
| .virt = KERNEL_BASE, |
| .size = KERNEL_SIZE, /* x86 maps first KERNEL_SIZE by default */ |
| .flags = MMU_INITIAL_MAPPING_TEMPORARY, |
| .name = "kernel_temp" |
| }, |
| /* null entry to terminate the list */ |
| {} |
| }; |
| |
| static bool early_console_disabled; |
| |
| static int process_bootitem(bootdata_t* bd, void* item) { |
| switch (bd->type) { |
| case BOOTDATA_ACPI_RSDP: |
| if (bd->length >= sizeof(uint64_t)) { |
| bootloader.acpi_rsdp = *((uint64_t*)item); |
| } |
| break; |
| case BOOTDATA_EFI_SYSTEM_TABLE: |
| if (bd->length >= sizeof(uint64_t)) { |
| bootloader.efi_system_table = (void*) *((uint64_t*)item); |
| } |
| break; |
| case BOOTDATA_FRAMEBUFFER: { |
| if (bd->length >= sizeof(bootdata_swfb_t)) { |
| memcpy(&bootloader.fb, item, sizeof(bootdata_swfb_t)); |
| } |
| break; |
| } |
| case BOOTDATA_CMDLINE: |
| if (bd->length > 0) { |
| ((char*) item)[bd->length - 1] = 0; |
| cmdline_append((char*) item); |
| } |
| break; |
| case BOOTDATA_EFI_MEMORY_MAP: |
| bootloader.efi_mmap = item; |
| bootloader.efi_mmap_size = bd->length; |
| break; |
| case BOOTDATA_E820_TABLE: |
| bootloader.e820_table = item; |
| bootloader.e820_count = bd->length / sizeof(e820entry_t); |
| break; |
| case BOOTDATA_LASTLOG_NVRAM2: |
| // fallthrough: this is a legacy/typo variant |
| case BOOTDATA_LASTLOG_NVRAM: |
| if (bd->length >= sizeof(bootdata_nvram_t)) { |
| memcpy(&bootloader.nvram, item, sizeof(bootdata_nvram_t)); |
| } |
| break; |
| case BOOTDATA_DEBUG_UART: |
| if (bd->length >= sizeof(bootdata_uart_t)) { |
| memcpy(&bootloader.uart, item, sizeof(bootdata_uart_t)); |
| } |
| break; |
| case BOOTDATA_IGNORE: |
| break; |
| } |
| return 0; |
| } |
| |
| extern "C" void *boot_alloc_mem(size_t len); |
| extern "C" void boot_alloc_reserve(uintptr_t phys, size_t _len); |
| |
| static void process_bootdata(bootdata_t* hdr, uintptr_t phys, bool verify) { |
| if ((hdr->type != BOOTDATA_CONTAINER) || |
| (hdr->extra != BOOTDATA_MAGIC)) { |
| printf("bootdata: invalid %08x %08x %08x %08x\n", |
| hdr->type, hdr->length, hdr->extra, hdr->flags); |
| return; |
| } |
| |
| size_t hsz = sizeof(bootdata_t); |
| if (hdr->flags & BOOTDATA_FLAG_EXTRA) { |
| hsz += sizeof(bootextra_t); |
| } |
| |
| size_t total_len = hdr->length + hsz; |
| |
| printf("bootdata: @ %p (%zu bytes)\n", hdr, total_len); |
| |
| bootdata_t* bd = reinterpret_cast<bootdata_t*>(reinterpret_cast<char*>(hdr) + hsz); |
| |
| size_t remain = hdr->length; |
| while (remain > sizeof(bootdata_t)) { |
| remain -= sizeof(bootdata_t); |
| uintptr_t item = reinterpret_cast<uintptr_t>(bd + 1); |
| |
| #if DEBUG_BOOT_DATA |
| char tag[5]; |
| uint8_t* x = reinterpret_cast<uint8_t*>(&bd->type); |
| unsigned n; |
| for (n = 0; n < 4; n++) { |
| tag[n] = ((*x >= ' ') && (*x <= 127)) ? *x : '.'; |
| x++; |
| } |
| tag[n] = 0; |
| printf("bootdata: @ %p typ=%08x (%s) len=%08x ext=%08x flg=%08x\n", |
| bd, bd->type, tag, bd->length, bd->extra, bd->flags); |
| #endif |
| |
| // check for extra header and process if it exists |
| if (bd->flags & BOOTDATA_FLAG_EXTRA) { |
| if (remain < sizeof(bootextra_t)) { |
| printf("bootdata: truncated header\n"); |
| break; |
| } |
| bootextra_t* extra = reinterpret_cast<bootextra_t*>(item); |
| item += sizeof(bootextra_t); |
| remain -= sizeof(bootextra_t); |
| if (extra->magic != BOOTITEM_MAGIC) { |
| printf("bootdata: bad magic\n"); |
| break; |
| } |
| } |
| |
| size_t advance = BOOTDATA_ALIGN(bd->length); |
| if (advance > remain) { |
| printf("bootdata: truncated\n"); |
| break; |
| } |
| #if DEBUG_BOOT_DATA |
| if (verify && (bd->flags & BOOTDATA_FLAG_CRC32)) { |
| if (!(bd->flags & BOOTDATA_FLAG_EXTRA)) { |
| printf("bootdata: crc flag set but no extra data!\n"); |
| break; |
| } |
| bootextra_t* extra = reinterpret_cast<bootextra_t*>(item - sizeof(bootextra_t)); |
| uint32_t crc = 0; |
| uint32_t tmp = extra->crc32; |
| extra->crc32 = 0; |
| crc = crc32(crc, reinterpret_cast<uint8_t*>(bd), sizeof(bootdata_t)); |
| crc = crc32(crc, reinterpret_cast<uint8_t*>(extra), sizeof(bootextra_t)); |
| crc = crc32(crc, reinterpret_cast<uint8_t*>(item), bd->length); |
| extra->crc32 = tmp; |
| printf("bootdata: crc %08x, computed %08x: %s\n", tmp, crc, |
| (tmp == crc) ? "OKAY" : "FAIL"); |
| } |
| #endif |
| if (!verify) { |
| if (process_bootitem(bd, reinterpret_cast<void*>(item))) { |
| break; |
| } |
| } |
| bd = reinterpret_cast<bootdata_t*>(item + advance); |
| remain -= advance; |
| } |
| |
| if (!verify) { |
| boot_alloc_reserve(phys, total_len); |
| bootloader.ramdisk_base = phys; |
| bootloader.ramdisk_size = total_len; |
| } |
| } |
| |
| extern bool halt_on_panic; |
| |
| static void platform_save_bootloader_data(bool verify) { |
| if (_multiboot_info != NULL) { |
| multiboot_info_t* mi = (multiboot_info_t*) X86_PHYS_TO_VIRT(_multiboot_info); |
| printf("multiboot: info @ %p flags %#x\n", mi, mi->flags); |
| |
| if ((mi->flags & MB_INFO_CMD_LINE) && mi->cmdline && (!verify)) { |
| const char* cmdline = (const char*) X86_PHYS_TO_VIRT(mi->cmdline); |
| printf("multiboot: cmdline @ %p\n", cmdline); |
| cmdline_append(cmdline); |
| } |
| if ((mi->flags & MB_INFO_MODS) && mi->mods_addr) { |
| module_t* mod = (module_t*) X86_PHYS_TO_VIRT(mi->mods_addr); |
| if (mi->mods_count > 0) { |
| printf("multiboot: ramdisk @ %08x..%08x\n", mod->mod_start, mod->mod_end); |
| process_bootdata(reinterpret_cast<bootdata_t*>(X86_PHYS_TO_VIRT(mod->mod_start)), |
| mod->mod_start, verify); |
| } |
| } |
| } |
| if (_bootdata_base != NULL) { |
| bootdata_t* bd = (bootdata_t*) X86_PHYS_TO_VIRT(_bootdata_base); |
| process_bootdata(bd, (uintptr_t) _bootdata_base, verify); |
| } |
| |
| halt_on_panic = cmdline_get_bool("kernel.halt-on-panic", false); |
| } |
| |
| static void* ramdisk_base; |
| static size_t ramdisk_size; |
| |
| static void platform_preserve_ramdisk(void) { |
| if (bootloader.ramdisk_size == 0) { |
| return; |
| } |
| if (bootloader.ramdisk_base == 0) { |
| return; |
| } |
| struct list_node list = LIST_INITIAL_VALUE(list); |
| size_t pages = ROUNDUP_PAGE_SIZE(bootloader.ramdisk_size) / PAGE_SIZE; |
| size_t actual = pmm_alloc_range(bootloader.ramdisk_base, pages, &list); |
| if (actual != pages) { |
| panic("unable to reserve ramdisk memory range\n"); |
| } |
| |
| // mark all of the pages we allocated as WIRED |
| vm_page_t *p; |
| list_for_every_entry(&list, p, vm_page_t, free.node) { |
| p->state = VM_PAGE_STATE_WIRED; |
| } |
| |
| ramdisk_base = paddr_to_kvaddr(bootloader.ramdisk_base); |
| ramdisk_size = pages * PAGE_SIZE; |
| } |
| |
| void* platform_get_ramdisk(size_t *size) { |
| if (ramdisk_base) { |
| *size = ramdisk_size; |
| return ramdisk_base; |
| } else { |
| *size = 0; |
| return NULL; |
| } |
| } |
| |
| #include <dev/display.h> |
| #include <lib/gfxconsole.h> |
| |
| status_t display_get_info(struct display_info *info) { |
| return gfxconsole_display_get_info(info); |
| } |
| |
| static void platform_early_display_init(void) { |
| struct display_info info; |
| void *bits; |
| |
| if (bootloader.fb.base == 0) { |
| return; |
| } |
| |
| if (cmdline_get_bool("gfxconsole.early", false) == false) { |
| early_console_disabled = true; |
| return; |
| } |
| |
| // allocate an offscreen buffer of worst-case size, page aligned |
| bits = boot_alloc_mem(8192 + bootloader.fb.height * bootloader.fb.stride * 4); |
| bits = (void*) ((((uintptr_t) bits) + 4095) & (~4095)); |
| |
| memset(&info, 0, sizeof(info)); |
| info.format = bootloader.fb.format; |
| info.width = bootloader.fb.width; |
| info.height = bootloader.fb.height; |
| info.stride = bootloader.fb.stride; |
| info.flags = DISPLAY_FLAG_HW_FRAMEBUFFER; |
| info.framebuffer = (void*) X86_PHYS_TO_VIRT(bootloader.fb.base); |
| |
| gfxconsole_bind_display(&info, bits); |
| } |
| |
| /* Ensure the framebuffer is write-combining as soon as we have the VMM. |
| * Some system firmware has the MTRRs for the framebuffer set to Uncached. |
| * Since dealing with MTRRs is rather complicated, we wait for the VMM to |
| * come up so we can use PAT to manage the memory types. */ |
| static void platform_ensure_display_memtype(uint level) |
| { |
| if (bootloader.fb.base == 0) { |
| return; |
| } |
| if (early_console_disabled) { |
| return; |
| } |
| struct display_info info; |
| memset(&info, 0, sizeof(info)); |
| info.format = bootloader.fb.format; |
| info.width = bootloader.fb.width; |
| info.height = bootloader.fb.height; |
| info.stride = bootloader.fb.stride; |
| info.flags = DISPLAY_FLAG_HW_FRAMEBUFFER; |
| |
| void *addr = NULL; |
| status_t status = VmAspace::kernel_aspace()->AllocPhysical( |
| "boot_fb", |
| ROUNDUP(info.stride * info.height * 4, PAGE_SIZE), |
| &addr, |
| PAGE_SIZE_SHIFT, |
| bootloader.fb.base, |
| 0 /* vmm flags */, |
| ARCH_MMU_FLAG_WRITE_COMBINING | ARCH_MMU_FLAG_PERM_READ | |
| ARCH_MMU_FLAG_PERM_WRITE); |
| if (status != ZX_OK) { |
| TRACEF("Failed to map boot_fb: %d\n", status); |
| return; |
| } |
| |
| info.framebuffer = addr; |
| gfxconsole_bind_display(&info, NULL); |
| } |
| LK_INIT_HOOK(display_memtype, &platform_ensure_display_memtype, LK_INIT_LEVEL_VM + 1); |
| |
| static efi_guid zircon_guid = ZIRCON_VENDOR_GUID; |
| static char16_t crashlog_name[] = ZIRCON_CRASHLOG_EFIVAR; |
| |
| static fbl::RefPtr<VmAspace> efi_aspace; |
| |
| typedef struct { |
| uint64_t magic; |
| uint64_t length; |
| uint64_t nmagic; |
| uint64_t nlength; |
| } log_hdr_t; |
| |
| #define NVRAM_MAGIC (0x6f8962d66b28504fULL) |
| |
| static size_t nvram_stow_crashlog(void* log, size_t len) { |
| size_t max = bootloader.nvram.length - sizeof(log_hdr_t); |
| void* nvram = paddr_to_kvaddr(bootloader.nvram.base); |
| if (nvram == NULL) { |
| return 0; |
| } |
| |
| if (log == NULL) { |
| return max; |
| } |
| if (len > max) { |
| len = max; |
| } |
| |
| log_hdr_t hdr = { |
| .magic = NVRAM_MAGIC, |
| .length = len, |
| .nmagic = ~NVRAM_MAGIC, |
| .nlength = ~len, |
| }; |
| memcpy(nvram, &hdr, sizeof(hdr)); |
| memcpy(static_cast<char*>(nvram) + sizeof(hdr), log, len); |
| arch_clean_cache_range((uintptr_t)nvram, sizeof(hdr) + len); |
| return len; |
| } |
| |
| static size_t nvram_recover_crashlog(size_t len, void* cookie, |
| void (*func)(const void* data, size_t, size_t len, void* cookie)) { |
| size_t max = bootloader.nvram.length - sizeof(log_hdr_t); |
| void* nvram = paddr_to_kvaddr(bootloader.nvram.base); |
| if (nvram == NULL) { |
| return 0; |
| } |
| log_hdr_t hdr; |
| memcpy(&hdr, nvram, sizeof(hdr)); |
| if ((hdr.magic != NVRAM_MAGIC) || (hdr.length > max) || |
| (hdr.nmagic != ~NVRAM_MAGIC) || (hdr.nlength != ~hdr.length)) { |
| printf("nvram-crashlog: bad header: %016lx %016lx %016lx %016lx\n", |
| hdr.magic, hdr.length, hdr.nmagic, hdr.nlength); |
| return 0; |
| } |
| if (len == 0) { |
| return hdr.length; |
| } |
| if (len > hdr.length) { |
| len = hdr.length; |
| } |
| func(static_cast<char*>(nvram) + sizeof(hdr), 0, len, cookie); |
| |
| // invalidate header so we don't get a stale crashlog |
| // on future boots |
| hdr.magic = 0; |
| memcpy(nvram, &hdr, sizeof(hdr)); |
| return hdr.length; |
| } |
| |
| void platform_init_crashlog(void) { |
| if (bootloader.nvram.base && bootloader.nvram.length > sizeof(log_hdr_t)) { |
| // Nothing to do for simple nvram logs |
| return; |
| } else { |
| bootloader.nvram.base = 0; |
| bootloader.nvram.length = 0; |
| } |
| |
| if (bootloader.efi_system_table != NULL) { |
| // Create a linear mapping to use to call UEFI Runtime Services |
| efi_aspace = VmAspace::Create(VmAspace::TYPE_LOW_KERNEL, "uefi"); |
| if (!efi_aspace) { |
| return; |
| } |
| |
| //TODO: get more precise about this. This gets the job done on |
| // the platforms we're working on right now, but is probably |
| // not entirely correct. |
| void* ptr = (void*) 0; |
| zx_status_t r = efi_aspace->AllocPhysical("1:1", 16*1024*1024*1024UL, &ptr, |
| PAGE_SIZE_SHIFT, 0, |
| VmAspace::VMM_FLAG_VALLOC_SPECIFIC, |
| ARCH_MMU_FLAG_PERM_READ | |
| ARCH_MMU_FLAG_PERM_WRITE | |
| ARCH_MMU_FLAG_PERM_EXECUTE); |
| |
| if (r != ZX_OK) { |
| efi_aspace.reset(); |
| } |
| } |
| } |
| |
| // Something big enough for the panic log but not too enormous |
| // to avoid excessive pressure on efi variable storage |
| #define MAX_EFI_CRASHLOG_LEN 4096 |
| |
| static size_t efi_stow_crashlog(void* log, size_t len) { |
| if (!efi_aspace) { |
| return 0; |
| } |
| if (log == NULL) { |
| return MAX_EFI_CRASHLOG_LEN; |
| } |
| if (len > MAX_EFI_CRASHLOG_LEN) { |
| len = MAX_EFI_CRASHLOG_LEN; |
| } |
| |
| vmm_set_active_aspace(reinterpret_cast<vmm_aspace_t*>(efi_aspace.get())); |
| |
| efi_system_table* sys = static_cast<efi_system_table*>(bootloader.efi_system_table); |
| efi_runtime_services* rs = sys->RuntimeServices; |
| if (rs->SetVariable(crashlog_name, &zircon_guid, ZIRCON_CRASHLOG_EFIATTR, len, log) == 0) { |
| return len; |
| } else { |
| return 0; |
| } |
| } |
| |
| size_t platform_stow_crashlog(void* log, size_t len) { |
| if (bootloader.nvram.base) { |
| return nvram_stow_crashlog(log, len); |
| } else { |
| return efi_stow_crashlog(log, len); |
| } |
| } |
| |
| size_t platform_recover_crashlog(size_t len, void* cookie, |
| void (*func)(const void* data, size_t, size_t len, void* cookie)) { |
| if (bootloader.nvram.base != 0) { |
| return nvram_recover_crashlog(len, cookie, func); |
| } else { |
| return 0; |
| } |
| } |
| |
| typedef struct e820_walk_ctx { |
| uint8_t* buf; |
| size_t len; |
| zx_status_t ret; |
| } e820_walk_ctx_t; |
| |
| static void e820_entry_walk(uint64_t base, uint64_t size, bool is_mem, void* void_ctx) { |
| e820_walk_ctx* ctx = (e820_walk_ctx*)void_ctx; |
| |
| // Something went wrong in one of the previous calls, don't attempt to |
| // continue. |
| if (ctx->ret != ZX_OK) |
| return; |
| |
| // Make sure we have enough space in the buffer. |
| if (ctx->len < sizeof(e820entry_t)) { |
| ctx->ret = ZX_ERR_BUFFER_TOO_SMALL; |
| return; |
| } |
| |
| e820entry_t* entry = (e820entry_t*)ctx->buf; |
| entry->addr = base; |
| entry->size = size; |
| |
| // Hack: When we first parse this map we normalize each section to either |
| // memory or not-memory. When we pass it to the next kernel, we lose |
| // information about the type of "not memory" in each region. |
| entry->type = is_mem ? E820_RAM : E820_RESERVED; |
| |
| ctx->buf += sizeof(*entry); |
| ctx->len -= sizeof(*entry); |
| ctx->ret = ZX_OK; |
| } |
| |
| // Give the platform an opportunity to append any platform specific bootdata |
| // sections. |
| zx_status_t platform_mexec_patch_bootdata(uint8_t* bootdata, const size_t len) { |
| uint8_t e820buf[sizeof(e820entry_t) * 32]; |
| |
| e820_walk_ctx ctx; |
| ctx.buf = e820buf; |
| ctx.len = sizeof(e820buf); |
| ctx.ret = ZX_OK; |
| |
| zx_status_t ret = enumerate_e820(e820_entry_walk, &ctx); |
| |
| if (ret != ZX_OK) |
| return ret; |
| |
| if (ctx.ret != ZX_OK) |
| return ctx.ret; |
| |
| uint32_t section_length = (uint32_t)(sizeof(e820buf) - ctx.len); |
| |
| ret = bootdata_append_section(bootdata, len, e820buf, section_length, |
| BOOTDATA_E820_TABLE, 0, 0); |
| |
| if (ret != ZX_OK) |
| return ret; |
| |
| // Append information about the framebuffer to the bootdata |
| if (bootloader.fb.base) { |
| ret = bootdata_append_section(bootdata, len, (uint8_t*)&bootloader.fb, |
| sizeof(bootloader.fb), BOOTDATA_FRAMEBUFFER, 0, 0); |
| if (ret != ZX_OK) |
| return ret; |
| } |
| |
| if (bootloader.efi_system_table) { |
| ret = bootdata_append_section(bootdata, len, (uint8_t*)&bootloader.efi_system_table, |
| sizeof(bootloader.efi_system_table), |
| BOOTDATA_EFI_SYSTEM_TABLE, 0, 0); |
| if (ret != ZX_OK) |
| return ret; |
| } |
| |
| if (bootloader.acpi_rsdp) { |
| ret = bootdata_append_section(bootdata, len, (uint8_t*)&bootloader.acpi_rsdp, |
| sizeof(bootloader.acpi_rsdp), BOOTDATA_ACPI_RSDP, 0, 0); |
| if (ret != ZX_OK) |
| return ret; |
| } |
| |
| return ZX_OK; |
| } |
| |
| // Number of pages required to identity map 4GiB of memory. |
| const size_t kBytesToIdentityMap = 4ull * GB; |
| const size_t kNumL2PageTables = kBytesToIdentityMap / (2ull * MB * NO_OF_PT_ENTRIES); |
| const size_t kNumL3PageTables = 1; |
| const size_t kNumL4PageTables = 1; |
| const size_t kTotalPageTableCount = kNumL2PageTables + kNumL3PageTables + kNumL4PageTables; |
| |
| // Allocate `count` pages where no page has a physical address less than |
| // `lower_bound` |
| static void alloc_pages_greater_than(paddr_t lower_bound, size_t count, paddr_t* paddrs) { |
| struct list_node list = LIST_INITIAL_VALUE(list); |
| while (count) { |
| const size_t actual = pmm_alloc_range(lower_bound, count, &list); |
| |
| for (size_t i = 0; i < actual; i++) { |
| paddrs[count - (i + 1)] = lower_bound + PAGE_SIZE * i; |
| } |
| |
| count -= actual; |
| lower_bound += PAGE_SIZE * (actual + 1); |
| |
| // If we're past the 4GiB mark and still trying to allocate, just give |
| // up. |
| if (lower_bound >= (4 * GB)) { |
| panic("failed to allocate page tables for mexec"); |
| } |
| } |
| |
| // mark all of the pages we allocated as WIRED |
| vm_page_t *p; |
| list_for_every_entry(&list, p, vm_page_t, free.node) { |
| p->state = VM_PAGE_STATE_WIRED; |
| } |
| } |
| |
| void platform_mexec(mexec_asm_func mexec_assembly, memmov_ops_t* ops, |
| uintptr_t new_bootimage_addr, size_t new_bootimage_len) { |
| |
| // A hacky way to handle disabling all PCI devices until we have devhost |
| // lifecycles implemented |
| PcieBusDriver::GetDriver()->DisableBus(); |
| |
| // This code only handles one L3 and one L4 page table for now. Fail if |
| // there are more L2 page tables than can fit in one L3 page table. |
| static_assert(kNumL2PageTables <= NO_OF_PT_ENTRIES, |
| "Kexec identity map size is too large. Only one L3 PTE is supported at this time."); |
| static_assert(kNumL3PageTables == 1, "Only 1 L3 page table is supported at this time."); |
| static_assert(kNumL4PageTables == 1, "Only 1 L4 page table is supported at this time."); |
| |
| // Identity map the first 4GiB of RAM |
| fbl::RefPtr<VmAspace> identity_aspace = |
| VmAspace::Create(VmAspace::TYPE_LOW_KERNEL, "x86-64 mexec 1:1"); |
| DEBUG_ASSERT(identity_aspace); |
| |
| const uint perm_flags_rwx = ARCH_MMU_FLAG_PERM_READ | |
| ARCH_MMU_FLAG_PERM_WRITE | |
| ARCH_MMU_FLAG_PERM_EXECUTE; |
| void* identity_address = 0x0; |
| paddr_t pa = 0; |
| zx_status_t result = identity_aspace->AllocPhysical("1:1 mapping", kBytesToIdentityMap, |
| &identity_address, 0, pa, |
| VmAspace::VMM_FLAG_VALLOC_SPECIFIC, |
| perm_flags_rwx); |
| if (result != ZX_OK) { |
| panic("failed to identity map low memory"); |
| } |
| |
| vmm_set_active_aspace(reinterpret_cast<vmm_aspace_t*>(identity_aspace.get())); |
| |
| paddr_t safe_pages[kTotalPageTableCount]; |
| alloc_pages_greater_than(new_bootimage_addr + new_bootimage_len + PAGE_SIZE, |
| kTotalPageTableCount, safe_pages); |
| |
| size_t safe_page_id = 0; |
| volatile pt_entry_t* ptl4 = (pt_entry_t*)paddr_to_kvaddr(safe_pages[safe_page_id++]); |
| volatile pt_entry_t* ptl3 = (pt_entry_t*)paddr_to_kvaddr(safe_pages[safe_page_id++]); |
| |
| // Initialize these to 0 |
| for (size_t i = 0; i < NO_OF_PT_ENTRIES; i++) { |
| ptl4[i] = 0; |
| ptl3[i] = 0; |
| } |
| |
| for (size_t i = 0; i < kNumL2PageTables; i++) { |
| ptl3[i] = safe_pages[safe_page_id] | X86_KERNEL_PD_FLAGS; |
| volatile pt_entry_t* ptl2 = (pt_entry_t*)paddr_to_kvaddr(safe_pages[safe_page_id]); |
| |
| for (size_t j = 0; j < NO_OF_PT_ENTRIES; j++) { |
| ptl2[j] = (2 * MB * (i * NO_OF_PT_ENTRIES + j)) | X86_KERNEL_PD_LP_FLAGS; |
| } |
| |
| safe_page_id++; |
| } |
| |
| ptl4[0] = vaddr_to_paddr((void*)ptl3) | X86_KERNEL_PD_FLAGS; |
| |
| mexec_assembly((uintptr_t)new_bootimage_addr, vaddr_to_paddr((void*)ptl4), 0, 0, ops, 0); |
| } |
| |
| void platform_halt_secondary_cpus(void) |
| { |
| // Migrate this thread to the boot cpu. |
| thread_migrate_cpu(BOOT_CPU_ID); |
| |
| // Send a shutdown interrupt to all the other cores. |
| apic_send_broadcast_ipi(0x00, DELIVERY_MODE_INIT); |
| } |
| |
| void platform_early_init(void) |
| { |
| /* extract bootloader data while still accessible */ |
| /* this includes debug uart config, etc. */ |
| platform_save_bootloader_data(false); |
| |
| /* get the debug output working */ |
| platform_init_debug_early(); |
| |
| #if WITH_LEGACY_PC_CONSOLE |
| /* get the text console working */ |
| platform_init_console(); |
| #endif |
| |
| /* if the bootloader has framebuffer info, use it for early console */ |
| platform_early_display_init(); |
| |
| #if DEBUG_BOOT_DATA |
| // second pass to verify crc32s so we can see the results |
| platform_save_bootloader_data(true); |
| #endif |
| |
| /* initialize physical memory arenas */ |
| platform_mem_init(); |
| |
| platform_preserve_ramdisk(); |
| } |
| |
| static void platform_init_smp(void) |
| { |
| uint32_t num_cpus = 0; |
| |
| status_t status = platform_enumerate_cpus(NULL, 0, &num_cpus); |
| if (status != ZX_OK) { |
| TRACEF("failed to enumerate CPUs, disabling SMP\n"); |
| return; |
| } |
| |
| // allocate 2x the table for temporary work |
| uint32_t *apic_ids = static_cast<uint32_t *>(malloc(sizeof(*apic_ids) * num_cpus * 2)); |
| if (!apic_ids) { |
| TRACEF("failed to allocate apic_ids table, disabling SMP\n"); |
| return; |
| } |
| |
| // a temporary list used before we filter out hyperthreaded pairs |
| uint32_t *apic_ids_temp = apic_ids + num_cpus; |
| |
| // find the list of all cpu apic ids into a temporary list |
| uint32_t real_num_cpus; |
| status = platform_enumerate_cpus(apic_ids_temp, num_cpus, &real_num_cpus); |
| if (status != ZX_OK || num_cpus != real_num_cpus) { |
| TRACEF("failed to enumerate CPUs, disabling SMP\n"); |
| free(apic_ids); |
| return; |
| } |
| |
| // Filter out hyperthreads if we've been told not to init them |
| bool use_ht = cmdline_get_bool("kernel.smp.ht", true); |
| |
| // we're implicitly running on the BSP |
| uint32_t bsp_apic_id = apic_local_id(); |
| |
| // iterate over all the cores and optionally disable some of them |
| dprintf(INFO, "cpu topology:\n"); |
| uint32_t using_count = 0; |
| for (uint32_t i = 0; i < num_cpus; ++i) { |
| x86_cpu_topology_t topo; |
| x86_cpu_topology_decode(apic_ids_temp[i], &topo); |
| |
| // filter it out if it's a HT pair that we dont want to use |
| bool keep = true; |
| if (!use_ht && topo.smt_id != 0) |
| keep = false; |
| |
| dprintf(INFO, "\t%u: apic id 0x%x package %u core %u smt %u%s%s\n", |
| i, apic_ids_temp[i], topo.package_id, topo.core_id, topo.smt_id, |
| (apic_ids_temp[i] == bsp_apic_id) ? " BSP" : "", |
| keep ? "" : " (not using)"); |
| |
| if (!keep) |
| continue; |
| |
| // save this apic id into the primary list |
| apic_ids[using_count++] = apic_ids_temp[i]; |
| } |
| num_cpus = using_count; |
| |
| // Find the CPU count limit |
| uint32_t max_cpus = cmdline_get_uint32("kernel.smp.maxcpus", SMP_MAX_CPUS); |
| if (max_cpus > SMP_MAX_CPUS || max_cpus <= 0) { |
| printf("invalid kernel.smp.maxcpus value, defaulting to %d\n", SMP_MAX_CPUS); |
| max_cpus = SMP_MAX_CPUS; |
| } |
| |
| dprintf(INFO, "Found %u cpu%c\n", num_cpus, (num_cpus > 1) ? 's': ' '); |
| if (num_cpus > max_cpus) { |
| dprintf(INFO, "Clamping number of CPUs to %u\n", max_cpus); |
| num_cpus = max_cpus; |
| } |
| |
| if (num_cpus == max_cpus || !use_ht) { |
| // If we are at the max number of CPUs, or have filtered out |
| // hyperthreads, sanity check that the bootstrap processor is in the set. |
| bool found_bp = false; |
| for (unsigned int i = 0; i < num_cpus; ++i) { |
| if (apic_ids[i] == bsp_apic_id) { |
| found_bp = true; |
| break; |
| } |
| } |
| ASSERT(found_bp); |
| } |
| |
| x86_init_smp(apic_ids, num_cpus); |
| |
| // trim the boot cpu out of the apic id list before passing to the AP booting routine |
| for (uint i = 0; i < num_cpus - 1; ++i) { |
| if (apic_ids[i] == bsp_apic_id) { |
| memmove(&apic_ids[i], &apic_ids[i+1], sizeof(*apic_ids) * (num_cpus - i - 1)); |
| break; |
| } |
| } |
| |
| x86_bringup_aps(apic_ids, num_cpus - 1); |
| |
| free(apic_ids); |
| } |
| |
| status_t platform_mp_prep_cpu_unplug(uint cpu_id) |
| { |
| // TODO: Make sure the IOAPIC and PCI have nothing for this CPU |
| return arch_mp_prep_cpu_unplug(cpu_id); |
| } |
| |
| void platform_init(void) |
| { |
| platform_init_debug(); |
| |
| platform_init_crashlog(); |
| |
| #if NO_USER_KEYBOARD |
| platform_init_keyboard(&console_input_buf); |
| #endif |
| |
| platform_init_smp(); |
| } |