| // Copyright 2017 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "register-set.h" |
| |
| #include <assert.h> |
| #include <math.h> |
| #include <stdio.h> |
| #include <string.h> |
| #include <zircon/hw/debug/arm64.h> |
| |
| #include <unittest/unittest.h> |
| |
| namespace { |
| |
| // Write a NaN double value to the given uint64_t (which is how most of the |
| // registers are stored in the structs). |
| void WriteNaNDouble(uint64_t* output) { |
| double nan_value = nan(""); |
| memcpy(output, &nan_value, sizeof(double)); |
| } |
| |
| } // namespace |
| |
| // Fill Test Values ------------------------------------------------------------------------------- |
| |
| void general_regs_fill_test_values(zx_thread_state_general_regs_t* regs) { |
| for (uint32_t index = 0; index < sizeof(*regs); ++index) { |
| ((uint8_t*)regs)[index] = static_cast<uint8_t>(index + 1); |
| } |
| // Set various flags bits that will read back the same. |
| #if defined(__x86_64__) |
| // Here we set all flag bits that are modifiable from user space or |
| // that are not modifiable but are expected to read back as 1, with the |
| // exception of the trap flag (bit 8, which would interfere with |
| // execution if we set it). |
| // |
| // Note that setting the direction flag (bit 10) helps test whether the |
| // kernel correctly handles taking an interrupt when that flag is set |
| // (see ZX-998). |
| regs->rflags = (1 << 0) | // CF: carry flag |
| (1 << 1) | // Reserved, always 1 |
| (1 << 2) | // PF: parity flag |
| (1 << 4) | // AF: adjust flag |
| (1 << 6) | // ZF: zero flag |
| (1 << 7) | // SF: sign flag |
| (1 << 9) | // IF: interrupt enable flag (set by kernel) |
| (1 << 10) | // DF: direction flag |
| (1 << 11) | // OF: overflow flag |
| (1 << 14) | // NT: nested task flag |
| (1 << 18) | // AC: alignment check flag |
| (1 << 21); // ID: used for testing for CPUID support |
| |
| // Set these to canonical addresses to avoid an error. |
| regs->fs_base = 0x0; |
| regs->gs_base = 0x0; |
| regs->rip = 0x0; |
| #elif defined(__aarch64__) |
| // Only set the 4 flag bits that are readable and writable by the |
| // instructions "msr nzcv, REG" and "mrs REG, nzcv". |
| regs->cpsr = 0xf0000000; |
| regs->tpidr = 0; |
| #endif |
| } |
| |
| void fp_regs_fill_test_values(zx_thread_state_fp_regs* regs) { |
| memset(regs, 0, sizeof(zx_thread_state_fp_regs)); |
| #if defined(__x86_64__) |
| for (size_t i = 0; i < 7; i++) |
| regs->st[i].low = i; |
| |
| // Write NaN to the last value. |
| WriteNaNDouble(®s->st[7].low); |
| #elif defined(__aarch64__) |
| // No FP struct on ARM (vector only). |
| #else |
| #error Unsupported architecture |
| #endif |
| } |
| |
| void vector_regs_fill_test_values(zx_thread_state_vector_regs* regs) { |
| memset(regs, 0, sizeof(zx_thread_state_vector_regs)); |
| #if defined(__x86_64__) |
| for (uint64_t i = 0; i < 16; i++) { |
| // Only sets the XMM registers (first two) since that's all that's guaranteed. |
| regs->zmm[i].v[0] = i; |
| regs->zmm[i].v[1] = i << 8; |
| regs->zmm[i].v[2] = 0; |
| regs->zmm[i].v[3] = 0; |
| } |
| |
| // Write NaN to the last value. |
| WriteNaNDouble(®s->zmm[15].v[0]); |
| #elif defined(__aarch64__) |
| for (uint64_t i = 0; i < 32; i++) { |
| regs->v[i].low = i; |
| regs->v[i].high = i << 8; |
| } |
| |
| // Write NaN to the last value. |
| WriteNaNDouble(®s->v[31].low); |
| #else |
| #error Unsupported architecture |
| #endif |
| } |
| |
| void debug_regs_fill_test_values(zx_thread_state_debug_regs_t* to_write, |
| zx_thread_state_debug_regs_t* expected) { |
| uint64_t base = reinterpret_cast<uint64_t>(debug_regs_fill_test_values); |
| #if defined(__x86_64__) |
| // The kernel will validate that the addresses set into the debug registers are valid userspace |
| // one. We use values relative to this function, as it is guaranteed to be in the userspace |
| // range. |
| to_write->dr[0] = base; |
| to_write->dr[1] = base + 0x4000; |
| to_write->dr[2] = base + 0x8000; |
| to_write->dr[3] = 0x0; // Zero is also valid. |
| to_write->dr6 = 0; |
| to_write->dr7 = 0x33; // Activate all breakpoints. |
| |
| expected->dr[0] = base; |
| expected->dr[1] = base + 0x4000; |
| expected->dr[2] = base + 0x8000; |
| expected->dr[3] = 0x0; |
| expected->dr6 = 0xffff0ff0; // No breakpoint event detected. |
| expected->dr7 = 0x733; // Activate all breakpoints. |
| |
| #elif defined(__aarch64__) |
| *to_write = {}; |
| |
| // We only set two because we know that arm64 ensures that. |
| ARM64_DBGBCR_E_SET(&to_write->hw_bps[0].dbgbcr, 1); |
| ARM64_DBGBCR_E_SET(&to_write->hw_bps[1].dbgbcr, 1); |
| to_write->hw_bps[0].dbgbvr = base; |
| to_write->hw_bps[1].dbgbvr = base + 0x4000; |
| |
| ARM64_DBGWCR_E_SET(&to_write->hw_wps[0].dbgwcr, 1); |
| ARM64_DBGWCR_BAS_SET(&to_write->hw_wps[0].dbgwcr, 0xf); |
| ARM64_DBGWCR_LSC_SET(&to_write->hw_wps[0].dbgwcr, 0b11); |
| ARM64_DBGWCR_E_SET(&to_write->hw_wps[1].dbgwcr, 1); |
| ARM64_DBGWCR_BAS_SET(&to_write->hw_wps[1].dbgwcr, 0xf0); |
| to_write->hw_wps[0].dbgwvr = base; |
| to_write->hw_wps[1].dbgwvr = base + 0x4000; |
| |
| *expected = *to_write; |
| ARM64_DBGBCR_PMC_SET(&expected->hw_bps[0].dbgbcr, 0b10); |
| ARM64_DBGBCR_BAS_SET(&expected->hw_bps[0].dbgbcr, 0xf); |
| ARM64_DBGBCR_PMC_SET(&expected->hw_bps[1].dbgbcr, 0b10); |
| ARM64_DBGBCR_BAS_SET(&expected->hw_bps[1].dbgbcr, 0xf); |
| |
| ARM64_DBGWCR_PAC_SET(&expected->hw_wps[0].dbgwcr, 0b10); |
| ARM64_DBGWCR_LSC_SET(&expected->hw_wps[0].dbgwcr, 0b11); |
| ARM64_DBGWCR_SSC_SET(&expected->hw_wps[0].dbgwcr, 1); |
| ARM64_DBGWCR_PAC_SET(&expected->hw_wps[1].dbgwcr, 0b10); |
| ARM64_DBGWCR_LSC_SET(&expected->hw_wps[1].dbgwcr, 0); |
| ARM64_DBGWCR_SSC_SET(&expected->hw_wps[1].dbgwcr, 1); |
| #else |
| #error Unsupported architecture |
| #endif |
| } |
| |
| // Expect Eq Functions ---------------------------------------------------------------------------- |
| |
| bool general_regs_expect_eq(const zx_thread_state_general_regs_t& regs1, |
| const zx_thread_state_general_regs_t& regs2) { |
| BEGIN_HELPER; |
| #define CHECK_REG(FIELD) EXPECT_EQ(regs1.FIELD, regs2.FIELD, "Reg " #FIELD) |
| #if defined(__x86_64__) |
| CHECK_REG(rax); |
| CHECK_REG(rbx); |
| CHECK_REG(rcx); |
| CHECK_REG(rdx); |
| CHECK_REG(rsi); |
| CHECK_REG(rdi); |
| CHECK_REG(rbp); |
| CHECK_REG(rsp); |
| CHECK_REG(r8); |
| CHECK_REG(r9); |
| CHECK_REG(r10); |
| CHECK_REG(r11); |
| CHECK_REG(r12); |
| CHECK_REG(r13); |
| CHECK_REG(r14); |
| CHECK_REG(r15); |
| CHECK_REG(rip); |
| CHECK_REG(rflags); |
| #elif defined(__aarch64__) |
| for (int regnum = 0; regnum < 30; ++regnum) { |
| char name[10]; |
| snprintf(name, sizeof(name), "Reg r[%d]", regnum); |
| EXPECT_EQ(regs1.r[regnum], regs2.r[regnum], name); |
| } |
| CHECK_REG(lr); |
| CHECK_REG(sp); |
| CHECK_REG(pc); |
| CHECK_REG(cpsr); |
| #else |
| #error Unsupported architecture |
| #endif |
| #undef CHECK_REG |
| END_HELPER; |
| } |
| |
| bool fp_regs_expect_eq(const zx_thread_state_fp_regs_t& regs1, |
| const zx_thread_state_fp_regs_t& regs2) { |
| #if defined(__x86_64__) |
| BEGIN_HELPER; |
| |
| // This just tests the MMX registers. |
| EXPECT_EQ(regs1.st[0].low, regs2.st[0].low, "Reg st[0].low"); |
| EXPECT_EQ(regs1.st[1].low, regs2.st[1].low, "Reg st[1].low"); |
| EXPECT_EQ(regs1.st[2].low, regs2.st[2].low, "Reg st[2].low"); |
| EXPECT_EQ(regs1.st[3].low, regs2.st[3].low, "Reg st[3].low"); |
| EXPECT_EQ(regs1.st[4].low, regs2.st[4].low, "Reg st[4].low"); |
| EXPECT_EQ(regs1.st[5].low, regs2.st[5].low, "Reg st[5].low"); |
| EXPECT_EQ(regs1.st[6].low, regs2.st[6].low, "Reg st[6].low"); |
| EXPECT_EQ(regs1.st[7].low, regs2.st[7].low, "Reg st[7].low"); |
| |
| END_HELPER; |
| #elif defined(__aarch64__) |
| // No FP regs on ARM (uses vector regs for FP). |
| (void)regs1; |
| (void)regs2; |
| return true; |
| #else |
| #error Unsupported architecture |
| #endif |
| } |
| |
| bool vector_regs_expect_unsupported_are_zero(const zx_thread_state_vector_regs_t& regs) { |
| #if defined(__x86_64__) |
| BEGIN_HELPER; |
| // For the first 16 ZMM registers, we currently support only the lowest 256-bits. All others |
| // should be 0. |
| for (int reg = 0; reg < 16; reg++) { |
| for (int i = 4; i < 8; i++) { |
| EXPECT_EQ(regs.zmm[reg].v[i], 0); |
| } |
| } |
| // The next 16 ZMM registers are unsupported. |
| for (int reg = 16; reg < 32; reg++) { |
| for (int i = 0; i < 8; i++) { |
| EXPECT_EQ(regs.zmm[reg].v[i], 0); |
| } |
| } |
| END_HELPER; |
| #elif defined(__aarch64__) |
| // All features/fields are supported on arm64. |
| return true; |
| #else |
| #error Unsupported architecture |
| #endif |
| } |
| |
| bool vector_regs_expect_eq(const zx_thread_state_vector_regs_t& regs1, |
| const zx_thread_state_vector_regs_t& regs2) { |
| BEGIN_HELPER; |
| #if defined(__x86_64__) |
| // Only check the first 16 registers (guaranteed to work). |
| for (int reg = 0; reg < 16; reg++) { |
| // Only check the low 128 bits (guaranteed to work). |
| EXPECT_EQ(regs1.zmm[reg].v[0], regs2.zmm[reg].v[0]); |
| EXPECT_EQ(regs1.zmm[reg].v[1], regs2.zmm[reg].v[1]); |
| } |
| #elif defined(__aarch64__) |
| for (int i = 0; i < 32; i++) { |
| EXPECT_EQ(regs1.v[i].high, regs2.v[i].high); |
| EXPECT_EQ(regs1.v[i].low, regs2.v[i].low); |
| } |
| #else |
| #error Unsupported architecture |
| #endif |
| END_HELPER; |
| } |
| |
| bool debug_regs_expect_eq(const char* file, int line, const zx_thread_state_debug_regs_t& regs1, |
| const zx_thread_state_debug_regs_t& regs2) { |
| BEGIN_HELPER; |
| #if defined(__x86_64__) |
| char buf[1024]; |
| snprintf(buf, sizeof(buf), "%s:%d: %s", file, line, "Reg DR0"); |
| EXPECT_EQ(regs1.dr[0], regs2.dr[0], buf); |
| snprintf(buf, sizeof(buf), "%s:%d: %s", file, line, "Reg DR1"); |
| EXPECT_EQ(regs1.dr[1], regs2.dr[1], buf); |
| snprintf(buf, sizeof(buf), "%s:%d: %s", file, line, "Reg DR2"); |
| EXPECT_EQ(regs1.dr[2], regs2.dr[2], buf); |
| snprintf(buf, sizeof(buf), "%s:%d: %s", file, line, "Reg DR3"); |
| EXPECT_EQ(regs1.dr[3], regs2.dr[3], buf); |
| snprintf(buf, sizeof(buf), "%s:%d: %s", file, line, "Reg DR6"); |
| EXPECT_EQ(regs1.dr6, regs2.dr6, buf); |
| snprintf(buf, sizeof(buf), "%s:%d: %s", file, line, "Reg DR7"); |
| EXPECT_EQ(regs1.dr7, regs2.dr7, buf); |
| #elif defined(__aarch64__) |
| for (uint32_t i = 0; i < 16; i++) { |
| EXPECT_EQ(regs1.hw_bps[i].dbgbcr, regs2.hw_bps[i].dbgbcr); |
| EXPECT_EQ(regs1.hw_bps[i].dbgbvr, regs2.hw_bps[i].dbgbvr); |
| } |
| |
| for (uint32_t i = 0; i < 16; i++) { |
| EXPECT_EQ(regs1.hw_wps[i].dbgwcr, regs2.hw_wps[i].dbgwcr); |
| EXPECT_EQ(regs1.hw_wps[i].dbgwvr, regs2.hw_wps[i].dbgwvr); |
| } |
| |
| EXPECT_EQ(regs1.esr, regs2.esr); |
| EXPECT_EQ(regs1.far, regs2.far); |
| #else |
| #error Unsupported architecture |
| #endif |
| END_HELPER; |
| } |
| |
| // Spin Functions -------------------------------------------------------------------------------- |
| #if defined(__x86_64__) |
| __asm__( |
| ".global spin_address\n" |
| "spin_address:\n" |
| "jmp spin_address\n"); |
| #elif defined(__aarch64__) |
| __asm__( |
| ".global spin_address\n" |
| "spin_address:\n" |
| "b spin_address\n"); |
| #endif |
| |
| // spin_with_general_regs() function. |
| #if defined(__x86_64__) |
| static_assert(offsetof(zx_thread_state_general_regs_t, rax) == 8 * 0, ""); |
| static_assert(offsetof(zx_thread_state_general_regs_t, rbx) == 8 * 1, ""); |
| static_assert(offsetof(zx_thread_state_general_regs_t, rcx) == 8 * 2, ""); |
| static_assert(offsetof(zx_thread_state_general_regs_t, rdx) == 8 * 3, ""); |
| static_assert(offsetof(zx_thread_state_general_regs_t, rsi) == 8 * 4, ""); |
| static_assert(offsetof(zx_thread_state_general_regs_t, rdi) == 8 * 5, ""); |
| static_assert(offsetof(zx_thread_state_general_regs_t, rbp) == 8 * 6, ""); |
| static_assert(offsetof(zx_thread_state_general_regs_t, rsp) == 8 * 7, ""); |
| static_assert(offsetof(zx_thread_state_general_regs_t, r8) == 8 * 8, ""); |
| static_assert(offsetof(zx_thread_state_general_regs_t, r9) == 8 * 9, ""); |
| static_assert(offsetof(zx_thread_state_general_regs_t, r10) == 8 * 10, ""); |
| static_assert(offsetof(zx_thread_state_general_regs_t, r11) == 8 * 11, ""); |
| static_assert(offsetof(zx_thread_state_general_regs_t, r12) == 8 * 12, ""); |
| static_assert(offsetof(zx_thread_state_general_regs_t, r13) == 8 * 13, ""); |
| static_assert(offsetof(zx_thread_state_general_regs_t, r14) == 8 * 14, ""); |
| static_assert(offsetof(zx_thread_state_general_regs_t, r15) == 8 * 15, ""); |
| static_assert(offsetof(zx_thread_state_general_regs_t, rip) == 8 * 16, ""); |
| static_assert(offsetof(zx_thread_state_general_regs_t, rflags) == 8 * 17, ""); |
| static_assert(offsetof(zx_thread_state_general_regs_t, fs_base) == 8 * 18, ""); |
| static_assert(offsetof(zx_thread_state_general_regs_t, gs_base) == 8 * 19, ""); |
| static_assert(sizeof(zx_thread_state_general_regs_t) == 8 * 20, ""); |
| __asm__( |
| ".pushsection .text, \"ax\", @progbits\n" |
| ".global spin_with_general_regs\n" |
| "spin_with_general_regs:\n" |
| // Set flags using POPF. Note that we use POPF rather than SAHF |
| // because POPF is able to set more flags than SAHF. |
| "pushq 8*17(%rdi)\n" |
| "popfq\n" |
| // Load general purpose registers. |
| "movq 8*0(%rdi), %rax\n" |
| "movq 8*1(%rdi), %rbx\n" |
| "movq 8*2(%rdi), %rcx\n" |
| "movq 8*3(%rdi), %rdx\n" |
| "movq 8*4(%rdi), %rsi\n" |
| // Skip assigning rdi here and assign it last. |
| "movq 8*6(%rdi), %rbp\n" |
| "movq 8*7(%rdi), %rsp\n" |
| "movq 8*8(%rdi), %r8\n" |
| "movq 8*9(%rdi), %r9\n" |
| "movq 8*10(%rdi), %r10\n" |
| "movq 8*11(%rdi), %r11\n" |
| "movq 8*12(%rdi), %r12\n" |
| "movq 8*13(%rdi), %r13\n" |
| "movq 8*14(%rdi), %r14\n" |
| "movq 8*15(%rdi), %r15\n" |
| "movq 8*5(%rdi), %rdi\n" |
| |
| ".global spin_address\n" |
| "jmp spin_address\n" |
| ".popsection\n"); |
| #elif defined(__aarch64__) |
| static_assert(offsetof(zx_thread_state_general_regs_t, r[0]) == 8 * 0, ""); |
| static_assert(offsetof(zx_thread_state_general_regs_t, r[1]) == 8 * 1, ""); |
| static_assert(offsetof(zx_thread_state_general_regs_t, lr) == 8 * 30, ""); |
| static_assert(offsetof(zx_thread_state_general_regs_t, sp) == 8 * 31, ""); |
| static_assert(offsetof(zx_thread_state_general_regs_t, pc) == 8 * 32, ""); |
| static_assert(offsetof(zx_thread_state_general_regs_t, cpsr) == 8 * 33, ""); |
| static_assert(sizeof(zx_thread_state_general_regs_t) == 8 * 35, ""); |
| __asm__( |
| ".pushsection .text, \"ax\", %progbits\n" |
| ".global spin_with_general_regs\n" |
| "spin_with_general_regs:\n" |
| // Load sp via a temporary register. |
| "ldr x1, [x0, #8*31]\n" |
| "mov sp, x1\n" |
| // Load NZCV flags, a subset of the PSTATE/CPSR register. |
| "ldr x1, [x0, #8*33]\n" |
| "msr nzcv, x1\n" |
| // Load general purpose registers. |
| // Skip assigning x0 and x1 here and assign them last. |
| "ldp x2, x3, [x0, #8*2]\n" |
| "ldp x4, x5, [x0, #8*4]\n" |
| "ldp x6, x7, [x0, #8*6]\n" |
| "ldp x8, x9, [x0, #8*8]\n" |
| "ldp x10, x11, [x0, #8*10]\n" |
| "ldp x12, x13, [x0, #8*12]\n" |
| "ldp x14, x15, [x0, #8*14]\n" |
| "ldp x16, x17, [x0, #8*16]\n" |
| "ldp x18, x19, [x0, #8*18]\n" |
| "ldp x20, x21, [x0, #8*20]\n" |
| "ldp x22, x23, [x0, #8*22]\n" |
| "ldp x24, x25, [x0, #8*24]\n" |
| "ldp x26, x27, [x0, #8*26]\n" |
| "ldp x28, x29, [x0, #8*28]\n" |
| "ldr x30, [x0, #8*30]\n" |
| "ldp x0, x1, [x0]\n" |
| |
| ".global spin_address\n" |
| "b spin_address\n" |
| ".popsection\n"); |
| #else |
| #error Unsupported architecture |
| #endif |
| |
| // spin_with_fp_regs() function. |
| #if defined(__x86_64__) |
| static_assert(offsetof(zx_thread_state_fp_regs_t, fcw) == 0, ""); |
| static_assert(offsetof(zx_thread_state_fp_regs_t, fsw) == 2, ""); |
| static_assert(offsetof(zx_thread_state_fp_regs_t, ftw) == 4, ""); |
| static_assert(offsetof(zx_thread_state_fp_regs_t, fop) == 6, ""); |
| static_assert(offsetof(zx_thread_state_fp_regs_t, fip) == 8, ""); |
| static_assert(offsetof(zx_thread_state_fp_regs_t, fdp) == 16, ""); |
| static_assert(offsetof(zx_thread_state_fp_regs_t, st) == 32, ""); |
| __asm__( |
| ".pushsection .text, \"ax\", @progbits\n" |
| ".global spin_with_fp_regs\n" |
| "spin_with_fp_regs:\n" |
| |
| // rdi = &zx_thread_state_fp_regs_t.st[0] |
| "lea 32(%rdi), %rdi\n" |
| |
| "movq $0x9999, %rax\n" |
| "movq %rax, %xmm0\n" |
| |
| "movq 16*0(%rdi), %mm0\n" |
| "movq 16*1(%rdi), %mm1\n" |
| "movq 16*2(%rdi), %mm2\n" |
| "movq 16*3(%rdi), %mm3\n" |
| "movq 16*4(%rdi), %mm4\n" |
| "movq 16*5(%rdi), %mm5\n" |
| "movq 16*6(%rdi), %mm6\n" |
| "movq 16*7(%rdi), %mm7\n" |
| |
| ".global spin_address\n" |
| "jmp spin_address\n" |
| ".popsection\n"); |
| #elif defined(__aarch64__) |
| // Just spins and does nothing. ARM64 doesn't define a separate FP state, but doing this allows the |
| // rest of the code to be platform-independent. |
| __asm__( |
| ".pushsection .text, \"ax\", %progbits\n" |
| ".global spin_with_fp_regs\n" |
| "spin_with_fp_regs:\n" |
| |
| // Do nothing. |
| |
| ".global spin_address\n" |
| "b spin_address\n" |
| ".popsection\n"); |
| #else |
| #error Unsupported architecture |
| #endif |
| |
| // spin_with_vector_regs() function. |
| #if defined(__x86_64__) |
| __asm__( |
| ".pushsection .text, \"ax\", @progbits\n" |
| ".global spin_with_vector_regs\n" |
| "spin_with_vector_regs:\n" |
| |
| // rdi = zmm[0] on call. This only loads xmm registers which are guaranteed to exist. |
| // Each zmm input is 512 bits = 64 bytes. |
| "movdqu 64*0(%rdi), %xmm0\n" |
| "movdqu 64*1(%rdi), %xmm1\n" |
| "movdqu 64*2(%rdi), %xmm2\n" |
| "movdqu 64*3(%rdi), %xmm3\n" |
| "movdqu 64*4(%rdi), %xmm4\n" |
| "movdqu 64*5(%rdi), %xmm5\n" |
| "movdqu 64*6(%rdi), %xmm6\n" |
| "movdqu 64*7(%rdi), %xmm7\n" |
| "movdqu 64*8(%rdi), %xmm8\n" |
| "movdqu 64*9(%rdi), %xmm9\n" |
| "movdqu 64*10(%rdi), %xmm10\n" |
| "movdqu 64*11(%rdi), %xmm11\n" |
| "movdqu 64*12(%rdi), %xmm12\n" |
| "movdqu 64*13(%rdi), %xmm13\n" |
| "movdqu 64*14(%rdi), %xmm14\n" |
| "movdqu 64*15(%rdi), %xmm15\n" |
| |
| ".global spin_address\n" |
| "jmp spin_address\n" |
| ".popsection\n"); |
| #elif defined(__aarch64__) |
| static_assert(offsetof(zx_thread_state_vector_regs_t, fpcr) == 0, ""); |
| static_assert(offsetof(zx_thread_state_vector_regs_t, fpsr) == 4, ""); |
| static_assert(offsetof(zx_thread_state_vector_regs_t, v) == 8, ""); |
| __asm__( |
| ".pushsection .text, \"ax\", %progbits\n" |
| ".global spin_with_vector_regs\n" |
| "spin_with_vector_regs:\n" |
| |
| // FPCR and FPSR are first. |
| "ldp w1, w2, [x0]\n" |
| "msr fpcr, x1\n" |
| "msr fpsr, x2\n" |
| |
| // Skip to the vector registers. |
| "add x0, x0, 8\n" |
| |
| // Each register is 128 bits = 16 bytes, so each pair is 32 bytes. |
| "ldp q0, q1, [x0, #(0 * 32)]\n" |
| "ldp q2, q3, [x0, #(1 * 32)]\n" |
| "ldp q4, q5, [x0, #(2 * 32)]\n" |
| "ldp q6, q7, [x0, #(3 * 32)]\n" |
| "ldp q8, q9, [x0, #(4 * 32)]\n" |
| "ldp q10, q11, [x0, #(5 * 32)]\n" |
| "ldp q12, q13, [x0, #(6 * 32)]\n" |
| "ldp q14, q15, [x0, #(7 * 32)]\n" |
| "ldp q16, q17, [x0, #(8 * 32)]\n" |
| "ldp q18, q19, [x0, #(9 * 32)]\n" |
| "ldp q20, q21, [x0, #(10 * 32)]\n" |
| "ldp q22, q23, [x0, #(11 * 32)]\n" |
| "ldp q24, q25, [x0, #(12 * 32)]\n" |
| "ldp q26, q27, [x0, #(13 * 32)]\n" |
| "ldp q28, q29, [x0, #(14 * 32)]\n" |
| "ldp q30, q31, [x0, #(15 * 32)]\n" |
| |
| ".global spin_address\n" |
| "b spin_address\n" |
| ".popsection\n"); |
| #else |
| #error Unsupported architecture |
| #endif |
| |
| // spin_with_debug_regs() function. |
| |
| // spin_with_debug_regs |
| #if defined(__x86_64__) |
| static_assert(offsetof(zx_thread_state_debug_regs_t, dr) == 8 * 0, ""); |
| static_assert(offsetof(zx_thread_state_debug_regs_t, dr6) == 8 * 4, ""); |
| static_assert(offsetof(zx_thread_state_debug_regs_t, dr7) == 8 * 5, ""); |
| __asm__( |
| ".pushsection .text, \"ax\", @progbits\n" |
| ".global spin_with_debug_regs\n" |
| "spin_with_debug_regs:\n" |
| |
| // Do nothing. |
| // The register state will be set through syscalls because setting the debug registers |
| // is a privileged instruction. |
| |
| ".global spin_address\n" |
| "jmp spin_address\n" |
| ".popsection\n"); |
| #elif defined(__aarch64__) |
| __asm__( |
| ".pushsection .text, \"ax\", %progbits\n" |
| ".global spin_with_debug_regs\n" |
| "spin_with_debug_regs:\n" |
| |
| // Do nothing. |
| // The register state will be set through syscalls because setting the debug registers |
| // is a privileged instruction. |
| |
| ".global spin_address\n" |
| "b spin_address\n" |
| ".popsection\n"); |
| #else |
| #error Unsupported architecture |
| #endif |
| |
| // Save and Exit Functions ------------------------------------------------------------------------ |
| |
| // save_general_regs_and_exit_thread() function. |
| #if defined(__x86_64__) |
| __asm__( |
| ".pushsection .text,\"ax\", @progbits\n" |
| ".global save_general_regs_and_exit_thread\n" |
| "save_general_regs_and_exit_thread:\n" |
| "movq %rax, 8*0(%rsp)\n" |
| "movq %rbx, 8*1(%rsp)\n" |
| "movq %rcx, 8*2(%rsp)\n" |
| "movq %rdx, 8*3(%rsp)\n" |
| "movq %rsi, 8*4(%rsp)\n" |
| "movq %rdi, 8*5(%rsp)\n" |
| "movq %rbp, 8*6(%rsp)\n" |
| "movq %rsp, 8*7(%rsp)\n" |
| "movq %r8, 8*8(%rsp)\n" |
| "movq %r9, 8*9(%rsp)\n" |
| "movq %r10, 8*10(%rsp)\n" |
| "movq %r11, 8*11(%rsp)\n" |
| "movq %r12, 8*12(%rsp)\n" |
| "movq %r13, 8*13(%rsp)\n" |
| "movq %r14, 8*14(%rsp)\n" |
| "movq %r15, 8*15(%rsp)\n" |
| // Save the flags register. |
| "pushfq\n" |
| "popq %rax\n" |
| "movq %rax, 8*17(%rsp)\n" |
| // Fill out the rip field with known value. |
| "leaq save_general_regs_and_exit_thread(%rip), %rax\n" |
| "movq %rax, 8*16(%rsp)\n" |
| "jmp zx_thread_exit@PLT\n" |
| "ud2\n" |
| ".popsection\n"); |
| #elif defined(__aarch64__) |
| __asm__( |
| ".pushsection .text, \"ax\", %progbits\n" |
| ".global save_general_regs_and_exit_thread\n" |
| "save_general_regs_and_exit_thread:\n" |
| "stp x0, x1, [sp, #8*0]\n" |
| "stp x2, x3, [sp, #8*2]\n" |
| "stp x4, x5, [sp, #8*4]\n" |
| "stp x6, x7, [sp, #8*6]\n" |
| "stp x8, x9, [sp, #8*8]\n" |
| "stp x10, x11, [sp, #8*10]\n" |
| "stp x12, x13, [sp, #8*12]\n" |
| "stp x14, x15, [sp, #8*14]\n" |
| "stp x16, x17, [sp, #8*16]\n" |
| "stp x18, x19, [sp, #8*18]\n" |
| "stp x20, x21, [sp, #8*20]\n" |
| "stp x22, x23, [sp, #8*22]\n" |
| "stp x24, x25, [sp, #8*24]\n" |
| "stp x26, x27, [sp, #8*26]\n" |
| "stp x28, x29, [sp, #8*28]\n" |
| "str x30, [sp, #8*30]\n" |
| // Save the sp register. |
| "mov x0, sp\n" |
| "str x0, [sp, #8*31]\n" |
| // Fill out the pc field with known value. |
| "adr x0, save_general_regs_and_exit_thread\n" |
| "str x0, [sp, #8*32]\n" |
| // Save NZCV flags, a subset of the PSTATE/CPSR register. |
| "mrs x0, nzcv\n" |
| "str x0, [sp, #8*33]\n" |
| "bl zx_thread_exit\n" |
| "brk 0\n" |
| ".popsection\n"); |
| #else |
| #error Unsupported architecture |
| #endif |
| |
| // save_fp_regs_and_exit_thread() function. |
| #if defined(__x86_64__) |
| static_assert(offsetof(zx_thread_state_fp_regs, st) == 32, ""); |
| __asm__( |
| ".pushsection .text,\"ax\", @progbits\n" |
| ".global save_fp_regs_and_exit_thread\n" |
| "save_fp_regs_and_exit_thread:\n" |
| |
| // This only saves the low 64 bits, which is the MMX register. Each slot in the struct is |
| // 128 bits so need to add 16 bytes each time. The 32 bytes is the start of the FP regs in |
| // the struct (see static assert above). |
| "movq %mm0, 32 + 16*0(%rsp)\n" |
| "movq %mm1, 32 + 16*1(%rsp)\n" |
| "movq %mm2, 32 + 16*2(%rsp)\n" |
| "movq %mm3, 32 + 16*3(%rsp)\n" |
| "movq %mm4, 32 + 16*4(%rsp)\n" |
| "movq %mm5, 32 + 16*5(%rsp)\n" |
| "movq %mm6, 32 + 16*6(%rsp)\n" |
| "movq %mm7, 32 + 16*7(%rsp)\n" |
| |
| "jmp zx_thread_exit@PLT\n" |
| "ud2\n" |
| ".popsection\n"); |
| #elif defined(__aarch64__) |
| __asm__( |
| ".pushsection .text, \"ax\", %progbits\n" |
| ".global save_fp_regs_and_exit_thread\n" |
| "save_fp_regs_and_exit_thread:\n" |
| |
| // Does nothing (no FP values). |
| |
| "bl zx_thread_exit\n" |
| "brk 0\n" |
| ".popsection\n"); |
| #else |
| #error Unsupported architecture |
| #endif |
| |
| // save_vector_regs_and_exit_thread() function. |
| #if defined(__x86_64__) |
| static_assert(offsetof(zx_thread_state_vector_regs, zmm) == 0, ""); |
| __asm__( |
| ".pushsection .text,\"ax\", @progbits\n" |
| ".global save_vector_regs_and_exit_thread\n" |
| "save_vector_regs_and_exit_thread:\n" |
| |
| // Each vector is 512 bits (64 bytes). We only read the first 128 (xmm registers). |
| "movdqu %xmm0, 64*0(%rsp)\n" |
| "movdqu %xmm1, 64*1(%rsp)\n" |
| "movdqu %xmm2, 64*2(%rsp)\n" |
| "movdqu %xmm3, 64*3(%rsp)\n" |
| "movdqu %xmm4, 64*4(%rsp)\n" |
| "movdqu %xmm5, 64*5(%rsp)\n" |
| "movdqu %xmm6, 64*6(%rsp)\n" |
| "movdqu %xmm7, 64*7(%rsp)\n" |
| "movdqu %xmm8, 64*8(%rsp)\n" |
| "movdqu %xmm9, 64*9(%rsp)\n" |
| "movdqu %xmm10, 64*10(%rsp)\n" |
| "movdqu %xmm11, 64*11(%rsp)\n" |
| "movdqu %xmm12, 64*12(%rsp)\n" |
| "movdqu %xmm13, 64*13(%rsp)\n" |
| "movdqu %xmm14, 64*14(%rsp)\n" |
| "movdqu %xmm15, 64*15(%rsp)\n" |
| |
| "jmp zx_thread_exit@PLT\n" |
| "ud2\n" |
| ".popsection\n"); |
| #elif defined(__aarch64__) |
| __asm__( |
| ".pushsection .text, \"ax\", %progbits\n" |
| ".global save_vector_regs_and_exit_thread\n" |
| "save_vector_regs_and_exit_thread:\n" |
| |
| // Input is in SP. |
| "mov x0, sp\n" |
| |
| // FPCR and FPSR. |
| "mrs x1, fpcr\n" |
| "mrs x2, fpsr\n" |
| "stp w1, w2, [x0]\n" |
| |
| // Skip to the vector registers |
| "add x0, x0, 8\n" |
| |
| // Each register is 128 bits = 16 bytes, so each pair is 32 bytes. |
| "stp q0, q1, [x0, #(0 * 32)]\n" |
| "stp q2, q3, [x0, #(1 * 32)]\n" |
| "stp q4, q5, [x0, #(2 * 32)]\n" |
| "stp q6, q7, [x0, #(3 * 32)]\n" |
| "stp q8, q9, [x0, #(4 * 32)]\n" |
| "stp q10, q11, [x0, #(5 * 32)]\n" |
| "stp q12, q13, [x0, #(6 * 32)]\n" |
| "stp q14, q15, [x0, #(7 * 32)]\n" |
| "stp q16, q17, [x0, #(8 * 32)]\n" |
| "stp q18, q19, [x0, #(9 * 32)]\n" |
| "stp q20, q21, [x0, #(10 * 32)]\n" |
| "stp q22, q23, [x0, #(11 * 32)]\n" |
| "stp q24, q25, [x0, #(12 * 32)]\n" |
| "stp q26, q27, [x0, #(13 * 32)]\n" |
| "stp q28, q29, [x0, #(14* 32)]\n" |
| "stp q30, q31, [x0, #(15 * 32)]\n" |
| |
| "bl zx_thread_exit\n" |
| "brk 0\n" |
| ".popsection\n"); |
| #else |
| #error Unsupported architecture |
| #endif |
| |
| // save_thread_local_regs_and_exit_thread() function. |
| #if defined(__x86_64__) |
| static_assert(offsetof(struct thread_local_regs, fs_base_value) == 8 * 0, ""); |
| static_assert(offsetof(struct thread_local_regs, gs_base_value) == 8 * 1, ""); |
| __asm__( |
| ".pushsection .text,\"ax\", @progbits\n" |
| ".global save_thread_local_regs_and_exit_thread\n" |
| "save_thread_local_regs_and_exit_thread:\n" |
| |
| // Read from fs_base and gs_base into the output. Test will assert the |
| // correct values were read. |
| "movq %fs:0, %rax\n" |
| "movq %rax, 8*0(%rsp)\n" |
| "movq %gs:0, %rax\n" |
| "movq %rax, 8*1(%rsp)\n" |
| // Write constants into fs_base and gs_base. Test will assert the |
| // correct values were written. |
| "movq $0x12345678, %fs:0\n" |
| "movq $0x7890abcd, %gs:0\n" |
| |
| "jmp zx_thread_exit@PLT\n" |
| "ud2\n" |
| ".popsection\n"); |
| #elif defined(__aarch64__) |
| __asm__( |
| ".pushsection .text,\"ax\", @progbits\n" |
| ".global save_thread_local_regs_and_exit_thread\n" |
| "save_thread_local_regs_and_exit_thread:\n" |
| |
| "mrs x1, tpidr_el0\n" |
| "ldr x2, [x1]\n" |
| "str x2, [sp, #(8*0)]\n" |
| "movz x2, 0x5678\n" |
| "movk x2, 0x1234, lsl 16\n" |
| "str x2, [x1]\n" |
| |
| "bl zx_thread_exit\n" |
| "brk 0\n" |
| ".popsection\n"); |
| #else |
| #error Unsupported architecture |
| #endif |