| // Copyright 2016 The Fuchsia Authors |
| // |
| // Use of this source code is governed by a MIT-style |
| // license that can be found in the LICENSE file or at |
| // https://opensource.org/licenses/MIT |
| |
| // This file provides real-mode entry points for |
| // 1) secondary CPU initialization |
| // 2) suspend-to-RAM wakeup |
| |
| #include <asm.h> |
| #include <arch/x86/bootstrap16.h> |
| #include <arch/x86/descriptor.h> |
| #include <arch/x86/registers.h> |
| #include <arch/defines.h> |
| |
| // This code's only non-PIC instructions are movabs, which can be fixed up |
| // safely (see gen-kaslr-fixups.sh). This section name is specially known |
| // by kernel.ld and gen-kaslr-fixups.sh. |
| .section .text.bootstrap16,"ax",%progbits |
| .balign PAGE_SIZE |
| |
| DATA(x86_bootstrap16_start) |
| |
| .code16 |
| FUNCTION_LABEL(x86_bootstrap16_entry) |
| // Enter no-fill cache mode (allegedly this is the initial state |
| // according to Intel 3A, but on at least one Broadwell the APs can |
| // come up with caching enabled) |
| mov %cr0, %ebx |
| or $X86_CR0_CD, %ebx |
| and $~X86_CR0_NW, %ebx |
| mov %ebx, %cr0 |
| 0: |
| |
| // We cheat a little and don't switch off of our real mode segments in |
| // protected mode. In real mode and protected mode, all of our code |
| // and data accesses are relative to %cs and %ss, using the real mode |
| // segment calculations. |
| |
| // setup %ds/%ss to refer to the data region |
| mov %cs, %si |
| add $0x100, %si |
| mov %si, %ds |
| mov %si, %ss |
| |
| lgdtl BCD_PHYS_GDTR_OFFSET |
| |
| // enter protected mode (but without paging) |
| mov %cr0, %ebx |
| or $X86_CR0_PE, %ebx |
| mov %ebx, %cr0 |
| |
| // clear instruction prefetch queue |
| jmp 0f |
| 0: |
| // enable PAE / PGE |
| mov %cr4, %ecx |
| or $(X86_CR4_PAE|X86_CR4_PGE), %ecx |
| mov %ecx, %cr4 |
| |
| // load CR3 with the bootstrap PML4 |
| mov BCD_PHYS_BOOTSTRAP_PML4_OFFSET, %ecx |
| mov %ecx, %cr3 |
| |
| // enable IA-32e mode and indicate support for NX pages. |
| // need the latter for once we switch to the real kernel |
| // address space. |
| mov $X86_MSR_IA32_EFER, %ecx |
| rdmsr |
| or $X86_EFER_LME, %eax |
| or $X86_EFER_NXE, %eax |
| wrmsr |
| |
| // enable paging |
| mov %cr0, %ebx |
| or $X86_CR0_PG, %ebx |
| mov %ebx, %cr0 |
| |
| // Translate data page segment into full address |
| mov %ds, %esi |
| shl $4, %esi |
| |
| // Jump to 64-bit CS |
| mov $BCD_PHYS_LM_ENTRY_OFFSET, %esp |
| lretl |
| |
| // Get the secondary cpu into 64-bit mode with interrupts disabled and no TSS |
| .code64 |
| FUNCTION_LABEL(_x86_secondary_cpu_long_mode_entry) |
| // When we get here, %rsi should contain the absolute address of our data |
| // page. |
| mov $1, %rdi |
| LOCK xadd %edi, BCD_CPU_COUNTER_OFFSET(%esi) |
| // %rdi is now the index this CPU should use to grab resources |
| |
| // Shift index by 2, since the per_cpu member contains two 64-bit values which |
| // will be at offsets 8*(2n) and 8*(2n+1) relative to PER_CPU_BASE_OFFSET |
| shl $1, %rdi |
| // Retrieve the top of this CPUs initial kernel stack |
| // Note: the stack is unusable until we switch cr3 below |
| mov BCD_PER_CPU_BASE_OFFSET(%rsi, %rdi, 8), %rsp |
| |
| // Retrieve this CPUs initial thread |
| // Note: the stack is unusable until we switch cr3 below |
| add $1, %rdi |
| mov BCD_PER_CPU_BASE_OFFSET(%rsi, %rdi, 8), %rdx |
| |
| // Retrieve the new PML4 address before our data page becomes unreachable |
| mov BCD_PHYS_KERNEL_PML4_OFFSET(%esi), %ecx |
| // Similarly for the CPU waiting mask |
| mov BCD_CPU_WAITING_OFFSET(%esi), %rdi |
| |
| // Switch out of the copied code page and into the kernel's |
| // version of it |
| movabs $.Lhighaddr, %rbx |
| jmp *%rbx |
| .Lhighaddr: |
| // Switch to the kernel's PML4 |
| mov %rcx, %cr3 |
| // As of this point, %esi is invalid |
| |
| // Reload the GDT with one based off of non-identity mapping |
| lgdt _temp_gdtr(%rip) |
| |
| // Zero our data segments |
| xor %eax, %eax |
| mov %eax, %ds |
| mov %eax, %es |
| mov %eax, %fs |
| mov %eax, %gs |
| mov %eax, %ss |
| |
| // Load the IDT |
| call load_startup_idt |
| |
| mov %rdx, %rsi |
| // Do an indirect call to keep this position independent |
| // x86_secondary_entry(CPU ready counter, thread) |
| movabs $x86_secondary_entry, %rbx |
| call *%rbx |
| |
| // If x86_secondary_entry returns, hang. |
| 0: |
| hlt |
| jmp 0b |
| |
| // Get the cpu into 64-bit mode with interrupts disabled and no TSS. This must |
| // only be called on the bootstrap processor. |
| FUNCTION_LABEL(_x86_suspend_wakeup) |
| // Retrieve the new PML4 address before our data page becomes unreachable |
| mov BCD_PHYS_KERNEL_PML4_OFFSET(%esi), %ecx |
| |
| // Stash register pointer so that we can read it after we change |
| // address spaces |
| mov RED_REGISTERS_OFFSET(%esi), %rdi |
| |
| // Switch out of the copied code page and into the kernel's |
| // version of it |
| movabs $.Lwakeup_highaddr, %rbx |
| jmp *%rbx |
| .Lwakeup_highaddr: |
| // Switch to the kernel's PML4 |
| mov %rcx, %cr3 |
| // As of this point, %esi is invalid |
| |
| // Reload the GDT with one based off of non-identity mapping |
| lgdt _temp_gdtr(%rip) |
| |
| // Zero our data segments |
| xor %eax, %eax |
| mov %eax, %ds |
| mov %eax, %es |
| mov %eax, %fs |
| mov %eax, %gs |
| mov %eax, %ss |
| |
| // Restore %gs.base to &bp_percpu. We need to do this before |
| // returning to C code, since the C code might use safe-stack |
| // and/or stack-protector. |
| // TODO(teisenbe): There is a small performance gain that could be made here |
| // by switching from wrmsr to wrgsbase, if wrgsbase is supported. Currently |
| // this is omitted for simplicity. |
| lea bp_percpu(%rip), %rax |
| mov %rax, %rdx |
| shr $32, %rdx |
| mov $X86_MSR_IA32_GS_BASE, %ecx |
| wrmsr |
| |
| // Restore the stack pointer first, so we can use the stack right now. |
| mov 120(%rdi), %rsp |
| |
| // Load the IDT. Note this uses the stack and clobbers %rax, but not %rdi. |
| call load_startup_idt |
| |
| mov 8(%rdi), %rsi |
| mov 16(%rdi), %rbp |
| mov 24(%rdi), %rbx |
| mov 32(%rdi), %rdx |
| mov 40(%rdi), %rcx |
| mov 48(%rdi), %rax |
| mov 56(%rdi), %r8 |
| mov 64(%rdi), %r9 |
| mov 72(%rdi), %r10 |
| mov 80(%rdi), %r11 |
| mov 88(%rdi), %r12 |
| mov 96(%rdi), %r13 |
| mov 104(%rdi), %r14 |
| mov 112(%rdi), %r15 |
| |
| // Note: %rdi is not restored, but it is a caller-save register anyway. |
| // If we want to restore %rdi, we could potentially use the stack here |
| // to do something like "push 128(%rdi); mov (%rdi), %rdi; ret". |
| |
| // Restore RIP |
| jmp *128(%rdi) |
| |
| DATA(x86_bootstrap16_end) |
| nop |