| // Copyright 2025 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include <lib/zx/thread.h> |
| #include <zircon/sanitizer.h> |
| |
| #include <cstdint> |
| |
| #include "../weak.h" |
| #include "thread-list.h" |
| #include "thread-storage.h" |
| #include "thread.h" |
| #include "threads_impl.h" |
| |
| extern "C" decltype(__sanitizer_thread_create_hook) __sanitizer_thread_create_hook [[gnu::weak]]; |
| extern "C" decltype(__sanitizer_thread_start_hook) __sanitizer_thread_start_hook [[gnu::weak]]; |
| |
| namespace LIBC_NAMESPACE_DECL { |
| |
| // TODO(https://gcc.gnu.org/bugzilla/show_bug.cgi?id=123565): This has to |
| // appear to have external linkage to work around a GCC bug with the new |
| // extended asm outside of functions. The definition in assembly will still |
| // actually only define a local ELF symbol, just with different name mangling. |
| // When the GCC bug is fixed, the main declaration below will work. |
| #if defined(__aarch64__) && !defined(__clang__) |
| [[noreturn]] void AsmTrampoline(uintptr_t arg1, uintptr_t arg2); |
| #endif |
| |
| namespace { |
| |
| using SanitizerCreateHook = Weak<__sanitizer_thread_create_hook>; |
| using SanitizerStartHook = Weak<__sanitizer_thread_start_hook>; |
| |
| // TODO(https://fxbug.dev/478347581): Ideally &StartThread itself would be the |
| // entry PC value given to zx::thread::start. It gets two arguments in |
| // registers, which are the user's function pointer and the void* to pass it. |
| // |
| // However, the normal ABI requires that both the thread pointer and the |
| // shadow-call-stack pointer be set; and zx::thread::start only sets the PC, |
| // SP, and the two argument registers. So between those two registers and the |
| // stack, the other pointers must be communicated to the AsmTrampoline code, |
| // which must install them before tail-calling StartThread. StartTrampoline |
| // manages all that. |
| // |
| // Moreover, there is a window, between calling zx::thread::start and the |
| // thread actually getting scheduled and getting through the AsmTrampoline |
| // code, where normal invariants don't hold. In this window, the thread |
| // pointer register is zero. The thread and its registers can be seen by |
| // __sanitizer_memory_snapshot. But with the thread pointer not yet set to |
| // point to a Thread on the gAllThreads list, it will have only its registers |
| // and nothing else to take as pointer references owned by that thread. In |
| // particular, the thread's stack won't be scanned, so anything stored only |
| // there and not in the registers will be overlooked by the snapshot. |
| // |
| // The user's function pointer and void* argument for it don't get stored |
| // anywhere but in the initial register values passed to zx::thread::start. |
| // Once the creating thread's zx::thread::start call returns from the kernel, |
| // those values may no longer be visible via the creating thread's own state. |
| // So it's crucial that they go directly into the new thread's registers where |
| // they will be seen. The new Thread block doesn't yet have anything |
| // interesting in it, so it's fine if the snapshot doesn't consider _it_ yet. |
| // The same is true for the shadow call stack and the machine stack. So |
| // StartTrampoline::Prepare() transfers _those_ pointers via the stack, but |
| // keeps the user's pointers in the two available registers. |
| // |
| // In future, the zx::thread::start API should allow setting the thread pointer |
| // and shadow-call-stack registers directly. Then no trampoline would be |
| // required and the subtleties about pointers being visible to the snapshot |
| // logic would be much simpler. |
| |
| // TODO(https://gcc.gnu.org/bugzilla/show_bug.cgi?id=123565): |
| // Should be unconditional; see above. |
| #if !(defined(__aarch64__) && !defined(__clang__)) |
| // A new thread starts at the AsmTrampoline entry point defined below in |
| // assembly code. That establishes normal ABI conditions by setting up the |
| // shadow call stack and thread pointers. It then tail-calls this function. |
| // This is the visible outermost frame of the new thread and the direct caller |
| // of the user's ThreadFunction. |
| [[noreturn, clang::cfi_unchecked_callee]] |
| void AsmTrampoline(uintptr_t arg1, uintptr_t arg2); |
| #else |
| // This applies to StartThread and prevents the buggy GCC from deciding that |
| // there are no references to it and eliding the function (as well as warning |
| // about it, which already breaks the build before the undefined references at |
| // link time have a chance to). |
| [[gnu::used]] |
| #endif |
| [[noreturn]] void StartThread(ThreadFunction* func, void* arg) { |
| Thread& self = *__pthread_self(); |
| |
| // Note that the sanitizer_hook value is not stored anywhere else and is |
| // never made visible to __sanitizer_memory_snapshot |
| SanitizerStartHook::Call(self.sanitizer_hook, ToC11Thread(self)); |
| |
| // The function and arg pointers are never live anywhere but in temporary |
| // registers; __sanitizer_memory_snapshot() will find them in the registers |
| // if this thread is suspended before now (including before it ever runs the |
| // first instruction of AsmTrampoline). But once the call to func begins, it |
| // won't find a way to reach them unless the user code makes them reachable. |
| ThreadExit(func(arg)); |
| } |
| |
| uint64_t* ThreadStackLimit(Thread& thread) { |
| auto* base = reinterpret_cast<std::byte*>(thread.safe_stack.iov_base); |
| std::byte* limit = base + thread.safe_stack.iov_len; |
| return reinterpret_cast<uint64_t*>(limit); |
| } |
| |
| class StartTrampoline { |
| public: |
| StartTrampoline() = delete; |
| |
| explicit StartTrampoline(Thread& thread) : thread_{thread} {} |
| |
| void Prepare(ThreadFunction* func, void* arg) { |
| arg1_ = reinterpret_cast<uintptr_t>(func); |
| arg2_ = reinterpret_cast<uintptr_t>(arg); |
| *--sp_ = second_stack_value(); |
| *--sp_ = thread_pointer(); |
| } |
| |
| zx::result<> Start() const { |
| uintptr_t entry = reinterpret_cast<uintptr_t>(AsmTrampoline); |
| uintptr_t stack = reinterpret_cast<uintptr_t>(sp_); |
| return zx::make_result(thread_handle()->start(entry, stack, arg1_, arg2_)); |
| } |
| |
| private: |
| uintptr_t thread_pointer() const { |
| void* tp = pthread_to_tp(&thread_); |
| return reinterpret_cast<uintptr_t>(tp); |
| } |
| |
| uint64_t second_stack_value() const { |
| #ifdef __x86_64__ |
| // On x86, the thread handle is needed to make a system call to install the |
| // thread pointer, so pass it on the stack to make it easy. |
| return thread_handle()->get(); |
| #else |
| // On other machines, the initial shadow call stack pointer goes there. |
| const uintptr_t shadow_call_stack_sp = |
| reinterpret_cast<uintptr_t>(thread_.shadow_call_stack.iov_base); |
| // The first shadow call stack slot is left as zero so that a backtrace |
| // can simply read downwards from the current shadow-call-stack pointer |
| // and stop at the zero slot, without needing to know the base address to |
| // avoid reading off the bottom. |
| return shadow_call_stack_sp + sizeof(uintptr_t); |
| #endif |
| } |
| |
| zx::unowned_thread thread_handle() const { return zx::unowned_thread{thread_.zxr_thread.handle}; } |
| |
| Thread& thread_; |
| uint64_t* sp_ = ThreadStackLimit(thread_); |
| uintptr_t arg1_ = 0; |
| uintptr_t arg2_ = 0; |
| }; |
| |
| #if defined(__aarch64__) |
| |
| // The thread pointer and shadow call stack register values are popped from the |
| // stack. The thread pointer is put into place in TPIDR_EL0. |
| #ifdef __clang__ |
| // GCC doesn't support [[gnu::naked]] functions for aarch64! But it supports |
| // extended asm _outside functions_ that can provide C++ symbol definitions! |
| [[noreturn, clang::cfi_unchecked_callee, // |
| gnu::naked, gnu::no_profile_instrument_function]] |
| void AsmTrampoline(uintptr_t arg1, uintptr_t arg2) { |
| #endif |
| __asm__( |
| #ifndef __clang__ |
| R"""( |
| .pushsection .text.AsmTrampoline, "ax", %%progbits |
| %cc[AsmTrampoline]: |
| .cfi_startproc |
| )""" |
| #endif |
| R"""( |
| .cfi_def_cfa_offset 16 |
| ldp x17, x18, [sp], #16 |
| .cfi_def_cfa_offset 0 |
| msr TPIDR_EL0, x17 |
| b %cc[StartThread] |
| )""" |
| #ifndef __clang__ |
| R"""( |
| .cfi_endproc |
| .popsection |
| )""" |
| #endif |
| : |
| : |
| #ifdef __clang__ |
| [StartThread] "X"(StartThread) |
| #else |
| [StartThread] "-s"(StartThread), [AsmTrampoline] ":"(AsmTrampoline) |
| #endif |
| ); |
| #ifdef __clang__ |
| } |
| #endif |
| |
| #elif defined(__riscv) |
| |
| // This closely matches the AArch64 version above. |
| [[noreturn, clang::cfi_unchecked_callee, // |
| gnu::naked, gnu::no_profile_instrument_function]] |
| void AsmTrampoline(uintptr_t arg1, uintptr_t arg2) { |
| __asm__ volatile( |
| R"""( |
| .cfi_def_cfa_offset 16 |
| ld tp, 0(sp) |
| ld gp, 8(sp) |
| add sp, sp, 16 |
| .cfi_def_cfa_offset 0 |
| tail %cc[StartThread] |
| )""" |
| : |
| : [StartThread] "s"(StartThread)); |
| } |
| |
| #elif defined(__x86_64__) |
| |
| // This must call: |
| // zx_object_set_property(%edi=handle, %esi=ZX_PROP_REGISTER_FS, %rdx=&value) |
| |
| // The starting SP points to where thread_pointer() was stored by Prepare(), so |
| // that's &value. The handle is stored above that at SP+8. The incoming %rdi |
| // and %rdi arguments need to be preserved around the system call, so those go |
| // into call-saved registers. Once they've been restored after the call, those |
| // two call-saved registers are rezeroed so that only the user's code might be |
| // keeping those pointers alive anywhere once we reach StartThread(), above. |
| // The calling convention expects SP to be -8 mod 16 with the return address |
| // from the call on the top of the stack. So the incoming SP is adjusted back |
| // up only one word, and the TOS word zeroed before the jump to reflect an |
| // apparent zero return address as is the convention for the outermost frame. |
| [[noreturn, clang::cfi_unchecked_callee, // |
| gnu::naked, gnu::no_profile_instrument_function]] |
| void AsmTrampoline(uintptr_t arg1, uintptr_t arg2) { |
| __asm volatile( |
| R"""( |
| .cfi_def_cfa_offset 16 |
| .cfi_undefined %%rip |
| mov %%rdi, %%r12 |
| mov %%rsi, %%r13 |
| mov %%rsp, %%rdx |
| mov %[sizeof_ptr], %%ecx |
| mov 8(%%rsp), %%edi |
| mov %[prop], %%esi |
| call _zx_object_set_property@PLT |
| test %%eax, %%eax |
| jnz .Lfail.%= |
| mov %%r12, %%rdi |
| mov %%r13, %%rsi |
| pop %%r12 |
| .cfi_adjust_cfa_offset -8 |
| xor %%r12, %%r12 |
| xor %%r13, %%r13 |
| mov %%r12, (%%rsp) |
| .cfi_offset %%rip, -8 |
| jmp %cc[StartThread] |
| .pushsection .text.cold, "ax?", %%progbits |
| .Lfail.%=: |
| ud2 |
| .popsection |
| )""" |
| : |
| : [prop] "i"(ZX_PROP_REGISTER_FS), [sizeof_ptr] "i"(sizeof(uintptr_t)), |
| [StartThread] "s"(StartThread)); |
| } |
| |
| #else |
| |
| #error "unsupported machine" |
| |
| #endif |
| |
| // TODO(https://fxbug.dev/478347581): All of that should be replaced with: |
| // thread.thread_handle()->start(&StartThread, sp, func, arg, tp, scsp) |
| zx::result<> StartKernelThread(Thread& thread, ThreadFunction* func, void* arg) { |
| StartTrampoline trampoline{thread}; |
| trampoline.Prepare(func, arg); |
| return trampoline.Start(); |
| } |
| |
| } // namespace |
| |
| zx::result<Thread*> ThreadStart(CreatedThread thread, ThreadFunction* func, void* arg) { |
| // Extract these before the thread starts, since once it starts, it could |
| // exit immediately; if detached, the pointer would become invalid then. |
| void* const hook = thread->sanitizer_hook; |
| const thrd_t thrd = ToC11Thread(*thread); |
| |
| // Include the new thread in the count of running threads before it starts, |
| // so there is no window where it's running but not accounted for. |
| __libc.thread_count.fetch_add(1); |
| zx::result result = StartKernelThread(*thread, func, arg); |
| |
| // The sanitizer callback is made to pair with the before-create callback |
| // even when the thread doesn't actually get started: the thrd_error argument |
| // tells it to clean up for a thread creation that never actually happened. |
| SanitizerCreateHook::Call(hook, thrd, C11ThreadError(result.status_value())); |
| |
| if (result.is_error()) { |
| // If it didn't really start, don't count it as a live thread after all. |
| [[maybe_unused]] int old_count = __libc.thread_count.fetch_sub(1); |
| assert(old_count > 0); |
| return result.take_error(); |
| } |
| return zx::ok(thread.release()); |
| } |
| |
| // This gets called when a CreatedThread dies without successful ThreadStart(). |
| // Just closing the thread handle destroys the kernel thread object, since it |
| // was never started. The ThreadStorage is recovered and immediately destroyed |
| // to deallocate the stacks and thread block. |
| void CreatedThreadDeleter::operator()(Thread* thread) const { |
| AllThreads().erase(*thread); |
| zx::thread{thread->zxr_thread.handle}.reset(); |
| auto storage = ThreadStorage::FromThread(*thread, true); |
| thread->~Thread(); |
| } |
| |
| } // namespace LIBC_NAMESPACE_DECL |