blob: f89f3926b691b5938c7017f6e53d0b3048550ed3 [file] [log] [blame]
#include <elf.h>
#include <lib/processargs/processargs.h>
#include <lib/zircon-internal/unique-backtrace.h>
#include <stdatomic.h>
#include <string.h>
#include <zircon/sanitizer.h>
#include <zircon/syscalls.h>
#include <zircon/utc.h>
#include <runtime/thread.h>
#include "asan_impl.h"
#include "dynlink.h"
#include "libc.h"
#include "setjmp_impl.h"
#include "threads_impl.h"
#include "zircon_impl.h"
struct start_params {
int (*main)(int, char**, char**);
thrd_t td;
uint8_t* buffer;
zx_proc_args_t* procargs;
zx_handle_t* handles;
uint32_t* handle_info;
uint32_t nbytes, nhandles;
zx_handle_t utc_reference;
int* runtime;
};
// See dynlink.c for the full explanation. The compiler generates calls to
// these implicitly. They are PLT calls into the ASan runtime, which is fine
// in and of itself at this point (unlike in dynlink.c). But they might also
// use ShadowCallStack, which is not set up yet. So make sure references here
// only use the libc-internal symbols, which don't have any setup requirements.
__asan_weak_ref("memcpy")
__asan_weak_ref("memset")
#if defined(__aarch64__)
#define SHADOW_CALL_STACK_DWARF_REGNO 18
#define SHADOW_CALL_STACK_INIT "str %[ra], [x18], #8\n"
#elif defined(__riscv)
#define SHADOW_CALL_STACK_DWARF_REGNO 3
#define SHADOW_CALL_STACK_INIT \
"add gp, gp, 8\n" \
"sd %[ra], -8(gp)\n"
#endif
// This gets called via inline assembly below, after switching onto
// the newly-allocated (safe) stack.
static _Noreturn void start_main(const struct start_params*) __asm__("start_main")
__attribute__((used));
// Do not instrument this function with checks for function-type-mismatches.
// UBSan will report errors on the entry to main via p->main if the application
// happens to define main with a signiature different from int(*)(int, char**,
// char**). It's not uncommon for users to instead use const char** for argv
// where this can be reported.
#if __has_feature(undefined_behavior_sanitizer)
__attribute__((no_sanitize("function")))
#endif
static inline int
call_main(int argc, char** argv, char** environ, int (*main_func)(int, char**, char**)) {
return main_func(argc, argv, environ);
}
static void start_main(const struct start_params* p) {
#if defined(SHADOW_CALL_STACK_INIT) && !__has_feature(shadow_call_stack)
__asm__ volatile(
// Ensure shadow-call-stack backtraces consistent with the frame pointer
// backtraces for the initial frames, so they will stay consistent if
// main and its callees use shadow-call-stack.
SHADOW_CALL_STACK_INIT
// DW_CFA_val_expression <regno>, { DW_OP_breg<regno> -8 }
".cfi_escape 0x16, %c[regno], 2, 0x70 + %c[regno], (-8 & 0x7f)"
:
: [regno] "i"(SHADOW_CALL_STACK_DWARF_REGNO), [ra] "r"(__builtin_return_address(0)));
#endif
// Run the __sanitizer_module_loaded hook on all loaded libraries as early as
// possible in the initial execution path. At this point, we can safely call
// into external libraries now that the PLT and shadow call stack are setup.
// This is useful for any library which overrides the
// __sanitizer_module_loaded hook that needs to observe something from loaded
// libs before we actually call .preinit_arrat/.init_array functions. An
// example where this is necessary is with hwasan which will need to register
// globals before actually calling __hwasan_init to prevent any more false
// positives from globals in between now and when __hwasan_init is called.
_dl_iterate_loaded_libs();
// Now that the thread descriptor is set up, it's safe to use the
// dlerror machinery.
*(p->runtime) = 1;
uint32_t argc = p->procargs->args_num;
uint32_t envc = p->procargs->environ_num;
uint32_t namec = p->procargs->names_num;
// Now that it is safe to call safe-stack enabled functions, go ahead and
// install the UTC reference clock, if one was provided to us.
if (p->utc_reference != ZX_HANDLE_INVALID) {
zx_handle_t old_clock = ZX_HANDLE_INVALID;
// Success or fail, libc has consumed our clock handle. It no longer
// belongs to us. From here on out, it is very important that nothing
// attempts to make use of p->utc_reference.
_zx_utc_reference_swap(p->utc_reference, &old_clock);
// If there had been a clock previously, we now own it, but have no use for
// it. Simply close it.
if (old_clock != ZX_HANDLE_INVALID) {
_zx_handle_close(old_clock);
}
}
// Use a single contiguous buffer for argv and envp, with two
// extra words of terminator on the end. In traditional Unix
// process startup, the stack contains argv followed immediately
// by envp and that's followed immediately by the auxiliary vector
// (auxv), which is in two-word pairs and terminated by zero
// words. Some crufty programs might assume some of that layout,
// and it costs us nothing to stay consistent with it here.
char* args_and_environ[argc + 1 + envc + 1 + 2];
char** argv = &args_and_environ[0];
__environ = &args_and_environ[argc + 1];
char** dummy_auxv = &args_and_environ[argc + 1 + envc + 1];
dummy_auxv[0] = dummy_auxv[1] = 0;
char* names[namec + 1];
zx_status_t status = processargs_strings(p->buffer, p->nbytes, argv, __environ, names);
if (status != ZX_OK) {
argc = namec = 0;
argv = __environ = NULL;
}
for (uint32_t n = 0; n < p->nhandles; n++) {
unsigned arg = PA_HND_ARG(p->handle_info[n]);
zx_handle_t h = p->handles[n];
switch (PA_HND_TYPE(p->handle_info[n])) {
case PA_NS_DIR: {
// Avoid strcmp, because it may be instrumented, and we haven't
// initialized the sanitizer runtime yet.
const char* name = names[arg];
if (name[0] == '/' && name[1] == 's' && name[2] == 'v' && name[3] == 'c' && name[4] == 0) {
// TODO(phosek): We should ideally duplicate the handle since
// higher layers might consume it and we want to have a guarantee
// that it stays alive, but that's typically possible since
// channel handles don't have ZX_RIGHT_DUPLICATE right.
//
// TODO(phosek): What if the program uses bind to replace its
// /svc, should the subsequent invocations to __sanitizer_*
// use the startup value or reflect the live changes?
__zircon_namespace_svc = h;
}
continue;
}
}
}
__sanitizer_startup_hook(argc, argv, __environ, p->td->safe_stack.iov_base,
p->td->safe_stack.iov_len);
// Setup the hwasan runtime before any `__libc_extensions_init`s are called.
// This is needed because libraries which define this function (like fdio)
// may be instrumented and either access `__hwasan_tls` or make runtime calls.
__hwasan_init();
// Allow companion libraries a chance to claim handles, zeroing out
// handles[i] and handle_info[i] for handles they claim.
if (&__libc_extensions_init != NULL) {
__libc_extensions_init(p->nhandles, p->handles, p->handle_info, namec, names);
}
// Give any unclaimed handles to zx_take_startup_handle(). This function
// takes ownership of the data, but not the memory: it assumes that the
// arrays are valid as long as the process is alive.
__libc_startup_handles_init(p->nhandles, p->handles, p->handle_info);
// Run static constructors et al.
__libc_init_gwp_asan();
__libc_start_init();
// Pass control to the application.
exit(call_main(argc, argv, __environ, p->main));
}
__EXPORT NO_ASAN LIBC_NO_SAFESTACK _Noreturn void __libc_start_main(zx_handle_t bootstrap,
int (*main)(int, char**,
char**)) {
// Initialize stack-protector canary value first thing. Do the setjmp
// manglers in the same call to avoid the overhead of two system calls.
// That means we need a temporary buffer on the stack, which we then
// want to clear out so the values don't leak there.
struct randoms {
uintptr_t stack_guard;
struct setjmp_manglers setjmp_manglers;
} randoms;
static_assert(sizeof(randoms) <= ZX_CPRNG_DRAW_MAX_LEN, "");
_zx_cprng_draw(&randoms, sizeof(randoms));
__stack_chk_guard = randoms.stack_guard;
__setjmp_manglers = randoms.setjmp_manglers;
// Zero the stack temporaries.
randoms = (struct randoms){};
// Tell the compiler that the value is used, so it doesn't optimize
// out the zeroing as dead stores.
__asm__("# keepalive %0" ::"m"(randoms));
// extract process startup information from channel in arg
struct start_params p = {.main = main, .utc_reference = ZX_HANDLE_INVALID};
zx_status_t status = processargs_message_size(bootstrap, &p.nbytes, &p.nhandles);
// TODO(44088): Right now, we _always_ expect to receive at least some
// handles and some bytes in the initial startup message. Make sure that we
// have both so that we do not accidentally end up declaring a 0-length VLA
// on the stack (which is UDB in C11). See the bug referenced in the TODO,
// however. We do not currently formally state that this is a requirement
// for starting a process, nor do we declare a maximum number of handles
// which can be sent during startup. Restructuring and formalizing the
// process-args startup protocol could help with this situation.
if ((status == ZX_OK) && p.nbytes && p.nhandles) {
PROCESSARGS_BUFFER(buffer, p.nbytes);
zx_handle_t handles[p.nhandles];
p.buffer = buffer;
p.handles = handles;
status = processargs_read(bootstrap, buffer, p.nbytes, handles, p.nhandles, &p.procargs,
&p.handle_info);
if (status != ZX_OK) {
CRASH_WITH_UNIQUE_BACKTRACE();
}
_zx_handle_close(bootstrap);
zx_handle_t main_thread_handle = ZX_HANDLE_INVALID;
processargs_extract_handles(p.nhandles, handles, p.handle_info, &__zircon_process_self,
&__zircon_job_default, &__zircon_vmar_root_self,
&main_thread_handle, &p.utc_reference);
atomic_store(&libc.thread_count, 1);
// This consumes the thread handle and sets up the thread pointer.
thrd_info_t thrd_info = __init_main_thread(main_thread_handle);
p.td = thrd_info.thread;
p.runtime = thrd_info.runtime;
// Switch to the allocated stack and call start_main(&p) there. The
// original stack stays around just to hold the message buffer and handles
// array. The new stack is whole pages, so it's sufficiently aligned.
// The stack switching takes care to maintain valid CFI throughout so that
// CFI-based unwinding works correctly from the start_main frame back to
// this frame and back to its caller, which is the program's entry point
// (usually _start in crt1.o). It also sets up both frame pointer and
// (when available) shadow call stack state to make the basic backtrace
// (i.e. PC list) between CFI, frame pointers, and shadow call stack
// collection methods all consistent. For CFI, this is basically a matter
// of correct metadata. For both frame pointers and shadow call stack, the
// backtrace collection relies on a contiguous stack and won't see anything
// that's not stored within those bounds. So the actual original stack
// frame where this frame's own FP points is not available, and there is no
// shadow call stack at all yet. Instead synthesize artifical "frames"
// that are just enough to appear normal to basic backtrace collection by
// each method and give the same results.
#ifdef __x86_64__
__asm__ volatile(
// Adjust the CFI to track the existing CFA via a different call-saved
// register so unwinding will work after we reset the FP below. Note
// that __builtin_frame_address(0) returns the value of the FP register
// (as documented in the GCC manual), *not* the value of the CFA.
// Moreover, there is no mandated relationship between the two values!
// The compiler will tell us the value of the FP with the built-in, but
// it won't tell us how it's calculating the CFA. Since we force frame
// pointers on when compiling this function, we assume that the
// compiler will have defined its CFA rule as an offset from the FP
// register. So this CFI directive adjusts the CFA rule to refer to a
// different register, one that's safely called-saved here, but reusing
// the existing CFA rule's offset from the FP.
".cfi_def_cfa_register %[frame_address]\n"
// Switch to the new stack.
"lea -16(%[base], %[len], 1), %%rsp\n"
// Synthesize a fake frame on the new stack that's sufficient for FP
// backtrace collection. It would ignore the original real frame
// _start pushed because that FP value is not in the recorded bounds of
// the thread's machine stack.
"mov %[return_address], 8(%%rsp)\n"
"mov %%rsp, %%rbp\n"
// Since we force frame pointers on when compiling this function, we
// assume that the compiler will have defined its CFI rule for the
// caller's FP register in terms of the CFA, so that's still correct
// after we clobber it here.
"call start_main\n"
"ud2\n"
"# Target receives %[arg]"
:
: [base] "r"(p.td->safe_stack.iov_base), [len] "r"(p.td->safe_stack.iov_len),
[return_address] "r"(__builtin_return_address(0)),
// The "b" constraint forces the value into the %rbx register, which
// is call-saved so the compiler will spill it in the prologue and
// produce CFI to read it relative to the CFA.
[frame_address] "b"(__builtin_frame_address(0)),
"m"(p), // Tell the compiler p's fields are all still alive.
[arg] "D"(&p));
#elif defined(__aarch64__)
__asm__ volatile(
// Adjust the CFI to track the existing CFA via a different call-saved
// register so unwinding will work after we reset the FP below. Note
// that __builtin_frame_address(0) returns the value of the FP register
// (as documented in the GCC manual), *not* the value of the CFA.
// Moreover, there is no mandated relationship between the two values!
// The compiler will tell us the value of the FP with the built-in, but
// it won't tell us how it's calculating the CFA. Since we force frame
// pointers on when compiling this function, we assume that the
// compiler will have defined its CFA rule as an offset from the FP
// register. So this CFI directive adjusts the CFA rule to refer to a
// different register, one that's safely called-saved here, but reusing
// the existing CFA rule's offset from the FP.
"mov x28, %[frame_address]\n"
".cfi_def_cfa_register x28\n"
// Switch to the new stacks.
"add sp, %[base], %[len]\n"
"mov x18, %[shadow_call_stack]\n"
// The starting CFI rule for x18 should have been same-value, but we're
// not going to be able to recover the caller's x18 value any more.
".cfi_undefined x18\n"
// Synthesize a backtrace frame on the new stack. Backtrace collection
// would ignore the original real frame _start pushed because that FP
// value is not in the recorded bounds of the thread's machine stack.
"stp xzr, %[return_address], [sp, #-16]!\n"
"mov x29, sp\n"
// Since we force frame pointers on when compiling this function, we
// assume that the compiler will have defined its CFI rule for the
// caller's FP register in terms of the CFA, so that's still correct
// after we clobber it here.
// Push our own return address on the shadow call stack so it appears
// in a backtrace just as it would if this function itself were using
// the normal shadow-call-stack protocol. Before that, push a zero
// return address as an end marker similar to how CFI unwinding marks
// the base frame by having its return address column compute zero.
"stp xzr, %[return_address], [x18], #16\n"
// Neither sp, x29, nor x18 might be used as an input operand, but x0
// might be. So clobber x0 last. We don't need to declare it to the
// compiler as a clobber since we'll never come back and it's fine if
// it's used as an input operand.
"mov x0, %[arg]\n"
"bl start_main\n"
"brk #1"
:
: [base] "r"(p.td->safe_stack.iov_base), [len] "r"(p.td->safe_stack.iov_len),
// Shadow call stack grows up.
[shadow_call_stack] "r"(p.td->shadow_call_stack.iov_base),
[return_address] "r"(__builtin_return_address(0)),
[frame_address] "r"(__builtin_frame_address(0)),
"m"(p), // Tell the compiler p's fields are all still alive.
[arg] "r"(&p)
: "x28");
#elif defined(__riscv)
__asm__ volatile(
// Adjust the CFI to track the existing CFA via a different call-saved
// register so unwinding will work after we reset the FP below. Since
// __builtin_frame_address(0) returns the value of the FP register (as
// documented in the GCC manual), and the RISC-V calling convention
// defines the FP to match the CFA (SP on function entry), this value
// should match. The compiler will tell us the value of the FP with
// the built-in, but it won't tell us how it's calculating the CFA.
// Since we force frame pointers on when compiling this function, we
// assume that the compiler will have defined its CFA rule to point to
// the FP register.
"mv s1, %[frame_address]\n"
".cfi_def_cfa s1, 0\n"
// Switch to the new machine stack.
"add sp, %[base], %[len]\n"
// Synthesize a backtrace frame on the new stack. Backtrace collection
// would ignore the original real frame _start pushed because that FP
// value is not in the recorded bounds of the thread's machine stack.
"add sp, sp, -16\n"
"sd %[return_address], 8(sp)\n"
"sd zero, 0(sp)\n"
// Since we force frame pointers on when compiling this function, we
// assume that the compiler will have defined its CFI rule for the
// caller's FP register in terms of the CFA, so that's still correct
// after we clobber it here. On RISC-V, the FP points to the CFA, not
// to the bottom of the FP, PC pair.
"add fp, sp, 16\n"
// Save the caller's gp in another call-saved register.
"mv s2, gp\n"
".cfi_register gp, s2\n"
// Switch to the new shadow call stack. Then push our own return
// address on the shadow call stack so it appears in a backtrace just
// as it would if this function itself were using the normal shadow
// call stack protocol. Before that, push a zero return address as an
// end marker similar to how CFI unwinding marks the base frame by
// having its return address column compute zero.
"add gp, %[shadow_call_stack], 16\n"
"sd zero, -16(gp)\n"
"sd %[return_address], -8(gp)\n"
// Neither sp, fp, nor gp might be used as an input operand, but a0
// might be. So clobber a0 last. We don't need to declare it to the
// compiler as a clobber since we'll never come back and it's fine if
// it's used as an input operand.
"mv a0, %[arg]\n"
"call start_main\n"
"unimp"
:
: [base] "r"(p.td->safe_stack.iov_base), [len] "r"(p.td->safe_stack.iov_len),
// Shadow call stack grows up.
[shadow_call_stack] "r"(p.td->shadow_call_stack.iov_base),
[return_address] "r"(__builtin_return_address(0)),
[frame_address] "r"(__builtin_frame_address(0)),
"m"(p), // Tell the compiler p's fields are all still alive.
[arg] "r"(&p)
: "s1", "s2");
#else
#error what architecture?
#endif
}
CRASH_WITH_UNIQUE_BACKTRACE();
}