| // Copyright 2016 The Fuchsia Authors |
| // |
| // Use of this source code is governed by a MIT-style |
| // license that can be found in the LICENSE file or at |
| // https://opensource.org/licenses/MIT |
| |
| #include <lib/affine/ratio.h> |
| #include <lib/cmdline.h> |
| #include <lib/userabi/vdso-constants.h> |
| #include <lib/userabi/vdso.h> |
| #include <lib/version.h> |
| #include <platform.h> |
| #include <zircon/types.h> |
| |
| #include <fbl/alloc_checker.h> |
| #include <object/handle.h> |
| #include <vm/pmm.h> |
| #include <vm/vm.h> |
| #include <vm/vm_aspace.h> |
| #include <vm/vm_object.h> |
| |
| #include "vdso-code.h" |
| |
| // This is defined in assembly via RODSO_IMAGE (see rodso-asm.h); |
| // vdso-code.h gives details about the image's size and layout. |
| extern "C" const char vdso_image[]; |
| |
| namespace { |
| |
| // Each KernelVmoWindow object represents a mapping in the kernel address |
| // space of a T object found inside a VM object. The kernel mapping exists |
| // for the lifetime of the KernelVmoWindow object. |
| template <typename T> |
| class KernelVmoWindow { |
| public: |
| static_assert(__is_pod(T), "this is for C-compatible types only!"); |
| |
| KernelVmoWindow(const char* name, fbl::RefPtr<VmObject> vmo, uint64_t offset) |
| : mapping_(nullptr) { |
| uint64_t page_offset = ROUNDDOWN(offset, PAGE_SIZE); |
| size_t offset_in_page = static_cast<size_t>(offset % PAGE_SIZE); |
| ASSERT(offset % alignof(T) == 0); |
| |
| const size_t size = offset_in_page + sizeof(T); |
| const uint arch_mmu_flags = ARCH_MMU_FLAG_PERM_READ | ARCH_MMU_FLAG_PERM_WRITE; |
| zx_status_t status = VmAspace::kernel_aspace()->RootVmar()->CreateVmMapping( |
| 0 /* ignored */, size, 0 /* align pow2 */, 0 /* vmar flags */, ktl::move(vmo), page_offset, |
| arch_mmu_flags, name, &mapping_); |
| ASSERT(status == ZX_OK); |
| data_ = reinterpret_cast<T*>(mapping_->base() + offset_in_page); |
| } |
| |
| ~KernelVmoWindow() { |
| if (mapping_) { |
| zx_status_t status = mapping_->Destroy(); |
| ASSERT(status == ZX_OK); |
| } |
| } |
| |
| T* data() const { return data_; } |
| |
| private: |
| fbl::RefPtr<VmMapping> mapping_; |
| T* data_; |
| }; |
| |
| // The .dynsym section of the vDSO, an array of ELF symbol table entries. |
| struct VDsoDynSym { |
| struct { |
| uintptr_t info, value, size; |
| } table[VDSO_DYNSYM_COUNT]; |
| }; |
| |
| #define PASTE(a, b, c) PASTE_1(a, b, c) |
| #define PASTE_1(a, b, c) a##b##c |
| |
| class VDsoDynSymWindow { |
| public: |
| DISALLOW_COPY_ASSIGN_AND_MOVE(VDsoDynSymWindow); |
| |
| static_assert(sizeof(VDsoDynSym) == VDSO_DATA_END_dynsym - VDSO_DATA_START_dynsym, |
| "either VDsoDynsym or gen-rodso-code.sh is suspect"); |
| |
| explicit VDsoDynSymWindow(fbl::RefPtr<VmObject> vmo) |
| : window_("vDSO .dynsym", ktl::move(vmo), VDSO_DATA_START_dynsym) {} |
| |
| void get_symbol_entry(size_t i, uintptr_t* value, size_t* size) { |
| *value = window_.data()->table[i].value; |
| *size = window_.data()->table[i].size; |
| } |
| |
| void set_symbol_entry(size_t i, uintptr_t value, size_t size) { |
| window_.data()->table[i].value = value; |
| window_.data()->table[i].size = size; |
| } |
| |
| void localize_symbol_entry(size_t i) { |
| // The high nybble is the STB_* bits; STB_LOCAL is 0. |
| window_.data()->table[i].info &= 0xf; |
| } |
| |
| #define get_symbol(symbol, value, size) get_symbol_entry(PASTE(VDSO_DYNSYM_, symbol, ), value, size) |
| |
| #define set_symbol(symbol, target) \ |
| set_symbol_entry(PASTE(VDSO_DYNSYM_, symbol, ), PASTE(VDSO_CODE_, target, ), \ |
| PASTE(VDSO_CODE_, target, _SIZE)) |
| |
| #define localize_symbol(symbol) localize_symbol_entry(PASTE(VDSO_DYNSYM_, symbol, )) |
| |
| private: |
| KernelVmoWindow<VDsoDynSym> window_; |
| }; |
| |
| class VDsoCodeWindow { |
| public: |
| DISALLOW_COPY_ASSIGN_AND_MOVE(VDsoCodeWindow); |
| |
| using CodeBuffer = uint8_t[VDSO_CODE_END - VDSO_CODE_START]; |
| |
| explicit VDsoCodeWindow(fbl::RefPtr<VmObject> vmo) |
| : window_("vDSO code segment", ktl::move(vmo), VDSO_CODE_START) {} |
| |
| // Fill the given code region (a whole function) with safely invalid code. |
| // This code should never be run, and any attempt to use it should crash. |
| void block_execution(uintptr_t address, size_t size) { |
| ASSERT(address >= VDSO_CODE_START); |
| ASSERT(address + size < VDSO_CODE_END); |
| address -= VDSO_CODE_START; |
| |
| #if ARCH_X86 |
| |
| // Fill with the single-byte HLT instruction, so any place |
| // user-mode jumps into this code, it gets a trap. |
| memset(&Code()[address], 0xf4, size); |
| |
| #elif ARCH_ARM64 |
| |
| // Fixed-size instructions. |
| ASSERT(address % 4 == 0); |
| ASSERT(size % 4 == 0); |
| uint32_t* code = reinterpret_cast<uint32_t*>(&Code()[address]); |
| for (size_t i = 0; i < size / 4; ++i) |
| code[i] = 0xd4200020; // 'brk #1' (what __builtin_trap() emits) |
| |
| #else |
| #error what architecture? |
| #endif |
| } |
| |
| private: |
| CodeBuffer& Code() { return *window_.data(); } |
| |
| KernelVmoWindow<CodeBuffer> window_; |
| }; |
| |
| #define REDIRECT_SYSCALL(dynsym_window, symbol, target) \ |
| do { \ |
| dynsym_window.set_symbol(symbol, target); \ |
| dynsym_window.set_symbol(_##symbol, target); \ |
| } while (0) |
| |
| // Block the named zx_* function. The symbol table entry will |
| // become invisible to runtime symbol resolution, and the code of |
| // the function will be clobbered with trapping instructions. |
| #define BLOCK_SYSCALL(dynsym_window, code_window, symbol) \ |
| do { \ |
| dynsym_window.localize_symbol(symbol); \ |
| dynsym_window.localize_symbol(_##symbol); \ |
| uintptr_t address, _address; \ |
| size_t size, _size; \ |
| dynsym_window.get_symbol(symbol, &address, &size); \ |
| dynsym_window.get_symbol(_##symbol, &_address, &_size); \ |
| ASSERT(address == _address); \ |
| ASSERT(size == _size); \ |
| code_window.block_execution(address, size); \ |
| } while (0) |
| |
| // Random attributes in kazoo fidl files become "categories" of syscalls. |
| // For each category, define a function block_<category> to block all the |
| // syscalls in that category. These functions can be used in |
| // VDso::CreateVariant (below) to block a category of syscalls for a particular |
| // variant vDSO. |
| #define SYSCALL_CATEGORY_BEGIN(category) \ |
| [[maybe_unused]] \ |
| void block_##category##_syscalls(VDsoDynSymWindow& dynsym_window, VDsoCodeWindow& code_window) { |
| #define SYSCALL_IN_CATEGORY(syscall) BLOCK_SYSCALL(dynsym_window, code_window, zx_##syscall); |
| #define SYSCALL_CATEGORY_END(category) } |
| #include <lib/syscalls/category.inc> |
| #undef SYSCALL_CATEGORY_BEGIN |
| #undef SYSCALL_IN_CATEGORY_END |
| #undef SYSCALL_CATEGORY_END |
| |
| } // anonymous namespace |
| |
| const VDso* VDso::instance_ = NULL; |
| |
| // Private constructor, can only be called by Create (below). |
| VDso::VDso(KernelHandle<VmObjectDispatcher>* vmo_kernel_handle) |
| : RoDso("vdso/full", vdso_image, VDSO_CODE_END, VDSO_CODE_START, vmo_kernel_handle) {} |
| |
| // This is called exactly once, at boot time. |
| const VDso* VDso::Create(KernelHandle<VmObjectDispatcher>* vmo_kernel_handles) { |
| ASSERT(!instance_); |
| |
| fbl::AllocChecker ac; |
| VDso* vdso = new (&ac) VDso(&vmo_kernel_handles[0]); |
| ASSERT(ac.check()); |
| |
| // Map a window into the VMO to write the vdso_constants struct. |
| static_assert(sizeof(vdso_constants) == VDSO_DATA_CONSTANTS_SIZE, "gen-rodso-code.sh is suspect"); |
| KernelVmoWindow<vdso_constants> constants_window("vDSO constants", vdso->vmo()->vmo(), |
| VDSO_DATA_CONSTANTS); |
| zx_ticks_t per_second = ticks_per_second(); |
| |
| // Grab a copy of the ticks to mono ratio; we need this to initialize the |
| // constants window. |
| affine::Ratio ticks_to_mono_ratio = platform_get_ticks_to_time_ratio(); |
| |
| // At this point in time, we absolutely must know the rate that our tick |
| // counter is ticking at. If we don't, then something has gone horribly |
| // wrong. |
| ASSERT(per_second != 0); |
| ASSERT(ticks_to_mono_ratio.numerator() != 0); |
| ASSERT(ticks_to_mono_ratio.denominator() != 0); |
| |
| // Initialize the constants that should be visible to the vDSO. |
| // Rather than assigning each member individually, do this with |
| // struct assignment and a compound literal so that the compiler |
| // can warn if the initializer list omits any member. |
| auto constants = constants_window.data(); |
| *constants = vdso_constants{ |
| arch_max_num_cpus(), |
| { |
| arch_cpu_features(), |
| arch_get_hw_breakpoint_count(), |
| arch_get_hw_watchpoint_count(), |
| }, |
| arch_dcache_line_size(), |
| arch_icache_line_size(), |
| per_second, |
| ticks_to_mono_ratio.numerator(), |
| ticks_to_mono_ratio.denominator(), |
| pmm_count_total_bytes(), |
| strlen(version_string()), |
| "", |
| }; |
| ASSERT(constants->version_string_len < sizeof(constants->version_string)); |
| memcpy(constants->version_string, version_string(), constants->version_string_len); |
| |
| // Conditionally patch some of the entry points related to time based on |
| // platform details which get determined at runtime. |
| VDsoDynSymWindow dynsym_window(vdso->vmo()->vmo()); |
| |
| // If user mode cannot access the tick counter registers, or kernel command |
| // line arguments demand that we access the tick counter via a syscall |
| // instead of direct observation, then we need to make sure to redirect |
| // symbol in the vDSO such that we always syscall in order to query ticks. |
| // |
| // Since this can effect how clock monotonic is calculated as well, we may |
| // need to redirect zx_clock_get_monotonic as well. |
| const bool need_syscall_for_ticks = !platform_usermode_can_access_tick_registers() || |
| gCmdline.GetBool("vdso.ticks_get_force_syscall", false); |
| const bool need_syscall_for_mono = |
| gCmdline.GetBool("vdso.clock_get_monotonic_force_syscall", false); |
| |
| if (need_syscall_for_ticks) { |
| REDIRECT_SYSCALL(dynsym_window, zx_ticks_get, SYSCALL_zx_ticks_get_via_kernel); |
| } |
| |
| if (need_syscall_for_mono) { |
| // Force a syscall for zx_clock_get_monotonic if instructed to do so by the |
| // kernel command line arguments. Make sure to swap out the implementation |
| // of zx_deadline_after as well. |
| REDIRECT_SYSCALL(dynsym_window, zx_clock_get_monotonic, |
| SYSCALL_zx_clock_get_monotonic_via_kernel); |
| REDIRECT_SYSCALL(dynsym_window, zx_deadline_after, deadline_after_via_kernel_mono); |
| } else if (need_syscall_for_ticks) { |
| // If ticks must be accessed via syscall, then choose the alternate form |
| // for clock_get_monotonic which performs the scaling in user mode, but |
| // thunks into the kernel to read the ticks register. |
| REDIRECT_SYSCALL(dynsym_window, zx_clock_get_monotonic, clock_get_monotonic_via_kernel_ticks); |
| REDIRECT_SYSCALL(dynsym_window, zx_deadline_after, deadline_after_via_kernel_ticks); |
| } |
| |
| DEBUG_ASSERT(!(vdso->vmo_rights() & ZX_RIGHT_WRITE)); |
| for (size_t v = static_cast<size_t>(Variant::FULL) + 1; v < static_cast<size_t>(Variant::COUNT); |
| ++v) |
| vdso->CreateVariant(static_cast<Variant>(v), &vmo_kernel_handles[v]); |
| |
| instance_ = vdso; |
| return instance_; |
| } |
| |
| uintptr_t VDso::base_address(const fbl::RefPtr<VmMapping>& code_mapping) { |
| return code_mapping ? code_mapping->base() - VDSO_CODE_START : 0; |
| } |
| |
| // Each vDSO variant VMO is made via a COW clone of the main/default vDSO |
| // VMO. A variant can block some system calls, by syscall category. |
| // This works by modifying the symbol table entries to make the symbols |
| // invisible to dynamic linking (STB_LOCAL) and then clobbering the code |
| // with trapping instructions. In this way, all the code locations are the |
| // same across variants and the syscall entry enforcement doesn't have to |
| // care which variant is in use. The places where the blocked |
| // syscalls' syscall entry instructions would be no longer have the syscall |
| // instructions, so a process using the variant can never get into syscall |
| // entry with that PC value and hence can never pass the vDSO enforcement |
| // test. |
| void VDso::CreateVariant(Variant variant, KernelHandle<VmObjectDispatcher>* vmo_kernel_handle) { |
| DEBUG_ASSERT(variant > Variant::FULL); |
| DEBUG_ASSERT(variant < Variant::COUNT); |
| DEBUG_ASSERT(!variant_vmo_[variant_index(variant)]); |
| |
| fbl::RefPtr<VmObject> new_vmo; |
| zx_status_t status = vmo()->CreateChild(ZX_VMO_CHILD_COPY_ON_WRITE, 0, size(), false, &new_vmo); |
| ASSERT(status == ZX_OK); |
| |
| VDsoDynSymWindow dynsym_window(new_vmo); |
| VDsoCodeWindow code_window(new_vmo); |
| |
| const char* name = nullptr; |
| switch (variant) { |
| case Variant::TEST1: |
| name = "vdso/test1"; |
| block_test_category1_syscalls(dynsym_window, code_window); |
| break; |
| |
| case Variant::TEST2: |
| name = "vdso/test2"; |
| block_test_category2_syscalls(dynsym_window, code_window); |
| break; |
| |
| // No default case so the compiler will warn about new enum entries. |
| case Variant::FULL: |
| case Variant::COUNT: |
| PANIC("VDso::CreateVariant called with bad variant"); |
| } |
| |
| zx_rights_t rights; |
| status = VmObjectDispatcher::Create(ktl::move(new_vmo), vmo_kernel_handle, &rights); |
| ASSERT(status == ZX_OK); |
| |
| status = vmo_kernel_handle->dispatcher()->set_name(name, strlen(name)); |
| ASSERT(status == ZX_OK); |
| |
| variant_vmo_[variant_index(variant)] = vmo_kernel_handle->dispatcher(); |
| } |