zircon/third_party/ulib/musl/src/env/__libc_start_main.c - fuchsia - Git at Google

 #include <elf.h>
 #include <lib/processargs/processargs.h>
 #include <lib/zircon-internal/unique-backtrace.h>
 #include <stdatomic.h>
 #include <string.h>
 #include <zircon/sanitizer.h>
 #include <zircon/syscalls.h>
 #include <zircon/utc.h>

 #include <runtime/thread.h>

 #include "asan_impl.h"
 #include "dynlink.h"
 #include "libc.h"
 #include "setjmp_impl.h"
 #include "threads_impl.h"
 #include "zircon_impl.h"

 struct start_params {
   int (*main)(int, char**, char**);
   thrd_t td;
   uint8_t* buffer;
   zx_proc_args_t* procargs;
   zx_handle_t* handles;
   uint32_t* handle_info;
   uint32_t nbytes, nhandles;
   zx_handle_t utc_reference;
   int* runtime;
 };

 // See dynlink.c for the full explanation.  The compiler generates calls to
 // these implicitly.  They are PLT calls into the ASan runtime, which is fine
 // in and of itself at this point (unlike in dynlink.c).  But they might also
 // use ShadowCallStack, which is not set up yet.  So make sure references here
 // only use the libc-internal symbols, which don't have any setup requirements.
 __asan_weak_ref("memcpy")
 __asan_weak_ref("memset")

 #if defined(__aarch64__)
 #define SHADOW_CALL_STACK_DWARF_REGNO 18
 #define SHADOW_CALL_STACK_INIT "str %[ra], [x18], #8\n"
 #elif defined(__riscv)
 #define SHADOW_CALL_STACK_DWARF_REGNO 3
 #define SHADOW_CALL_STACK_INIT \
   "add gp, gp, 8\n"            \
   "sd %[ra], -8(gp)\n"
 #endif

 // This gets called via inline assembly below, after switching onto
 // the newly-allocated (safe) stack.
 static _Noreturn void start_main(const struct start_params*) __asm__("start_main")
     __attribute__((used));

 // Do not instrument this function with checks for function-type-mismatches.
 // UBSan will report errors on the entry to main via p->main if the application
 // happens to define main with a signiature different from int(*)(int, char**,
 // char**). It's not uncommon for users to instead use const char** for argv
 // where this can be reported.
 #if __has_feature(undefined_behavior_sanitizer)
 __attribute__((no_sanitize("function")))
 #endif
 static inline int
 call_main(int argc, char** argv, char** environ, int (*main_func)(int, char**, char**)) {
   return main_func(argc, argv, environ);
 }

 static void start_main(const struct start_params* p) {
 #if defined(SHADOW_CALL_STACK_INIT) && !__has_feature(shadow_call_stack)
   __asm__ volatile(
       // Ensure shadow-call-stack backtraces consistent with the frame pointer
       // backtraces for the initial frames, so they will stay consistent if
       // main and its callees use shadow-call-stack.
       SHADOW_CALL_STACK_INIT
       // DW_CFA_val_expression <regno>, { DW_OP_breg<regno> -8 }
       ".cfi_escape 0x16, %c[regno], 2, 0x70 + %c[regno], (-8 & 0x7f)"
       :
       : [regno] "i"(SHADOW_CALL_STACK_DWARF_REGNO), [ra] "r"(__builtin_return_address(0)));
 #endif

   // Run the __sanitizer_module_loaded hook on all loaded libraries as early as
   // possible in the initial execution path. At this point, we can safely call
   // into external libraries now that the PLT and shadow call stack are setup.
   // This is useful for any library which overrides the
   // __sanitizer_module_loaded hook that needs to observe something from loaded
   // libs before we actually call .preinit_arrat/.init_array functions. An
   // example where this is necessary is with hwasan which will need to register
   // globals before actually calling __hwasan_init to prevent any more false
   // positives from globals in between now and when __hwasan_init is called.
   _dl_iterate_loaded_libs();

   // Now that the thread descriptor is set up, it's safe to use the
   // dlerror machinery.
   *(p->runtime) = 1;

   uint32_t argc = p->procargs->args_num;
   uint32_t envc = p->procargs->environ_num;
   uint32_t namec = p->procargs->names_num;

   // Now that it is safe to call safe-stack enabled functions, go ahead and
   // install the UTC reference clock, if one was provided to us.
   if (p->utc_reference != ZX_HANDLE_INVALID) {
     zx_handle_t old_clock = ZX_HANDLE_INVALID;

     // Success or fail, libc has consumed our clock handle.  It no longer
     // belongs to us.  From here on out, it is very important that nothing
     // attempts to make use of p->utc_reference.
     _zx_utc_reference_swap(p->utc_reference, &old_clock);

     // If there had been a clock previously, we now own it, but have no use for
     // it.  Simply close it.
     if (old_clock != ZX_HANDLE_INVALID) {
       _zx_handle_close(old_clock);
     }
   }

   // Use a single contiguous buffer for argv and envp, with two
   // extra words of terminator on the end.  In traditional Unix
   // process startup, the stack contains argv followed immediately
   // by envp and that's followed immediately by the auxiliary vector
   // (auxv), which is in two-word pairs and terminated by zero
   // words.  Some crufty programs might assume some of that layout,
   // and it costs us nothing to stay consistent with it here.
   char* args_and_environ[argc + 1 + envc + 1 + 2];
   char** argv = &args_and_environ[0];
   __environ = &args_and_environ[argc + 1];
   char** dummy_auxv = &args_and_environ[argc + 1 + envc + 1];
   dummy_auxv[0] = dummy_auxv[1] = 0;

   char* names[namec + 1];
   zx_status_t status = processargs_strings(p->buffer, p->nbytes, argv, __environ, names);
   if (status != ZX_OK) {
     argc = namec = 0;
     argv = __environ = NULL;
   }

   for (uint32_t n = 0; n < p->nhandles; n++) {
     unsigned arg = PA_HND_ARG(p->handle_info[n]);
     zx_handle_t h = p->handles[n];

     switch (PA_HND_TYPE(p->handle_info[n])) {
       case PA_NS_DIR: {
         // Avoid strcmp, because it may be instrumented, and we haven't
         // initialized the sanitizer runtime yet.
         const char* name = names[arg];
         if (name[0] == '/' && name[1] == 's' && name[2] == 'v' && name[3] == 'c' && name[4] == 0) {
           // TODO(phosek): We should ideally duplicate the handle since
           // higher layers might consume it and we want to have a guarantee
           // that it stays alive, but that's typically possible since
           // channel handles don't have ZX_RIGHT_DUPLICATE right.
           //
           // TODO(phosek): What if the program uses bind to replace its
           // /svc, should the subsequent invocations to __sanitizer_*
           // use the startup value or reflect the live changes?
           __zircon_namespace_svc = h;
         }
         continue;
       }
     }
   }

   __sanitizer_startup_hook(argc, argv, __environ, p->td->safe_stack.iov_base,
                            p->td->safe_stack.iov_len);

   // Setup the hwasan runtime before any `__libc_extensions_init`s are called.
   // This is needed because libraries which define this function (like fdio)
   // may be instrumented and either access `__hwasan_tls` or make runtime calls.
   __hwasan_init();

   // Allow companion libraries a chance to claim handles, zeroing out
   // handles[i] and handle_info[i] for handles they claim.
   if (&__libc_extensions_init != NULL) {
     __libc_extensions_init(p->nhandles, p->handles, p->handle_info, namec, names);
   }

   // Give any unclaimed handles to zx_take_startup_handle(). This function
   // takes ownership of the data, but not the memory: it assumes that the
   // arrays are valid as long as the process is alive.
   __libc_startup_handles_init(p->nhandles, p->handles, p->handle_info);

   // Run static constructors et al.
   __libc_init_gwp_asan();
   __libc_start_init();

   // Pass control to the application.
   exit(call_main(argc, argv, __environ, p->main));
 }

 __EXPORT NO_ASAN LIBC_NO_SAFESTACK _Noreturn void __libc_start_main(zx_handle_t bootstrap,
                                                                     int (*main)(int, char**,
                                                                                 char**)) {
   // Initialize stack-protector canary value first thing.  Do the setjmp
   // manglers in the same call to avoid the overhead of two system calls.
   // That means we need a temporary buffer on the stack, which we then
   // want to clear out so the values don't leak there.
   struct randoms {
     uintptr_t stack_guard;
     struct setjmp_manglers setjmp_manglers;
   } randoms;
   static_assert(sizeof(randoms) <= ZX_CPRNG_DRAW_MAX_LEN, "");
   _zx_cprng_draw(&randoms, sizeof(randoms));
   __stack_chk_guard = randoms.stack_guard;
   __setjmp_manglers = randoms.setjmp_manglers;
   // Zero the stack temporaries.
   randoms = (struct randoms){};
   // Tell the compiler that the value is used, so it doesn't optimize
   // out the zeroing as dead stores.
   __asm__("# keepalive %0" ::"m"(randoms));

   // extract process startup information from channel in arg
   struct start_params p = {.main = main, .utc_reference = ZX_HANDLE_INVALID};
   zx_status_t status = processargs_message_size(bootstrap, &p.nbytes, &p.nhandles);

   // TODO(44088): Right now, we _always_ expect to receive at least some
   // handles and some bytes in the initial startup message.  Make sure that we
   // have both so that we do not accidentally end up declaring a 0-length VLA
   // on the stack (which is UDB in C11).  See the bug referenced in the TODO,
   // however.  We do not currently formally state that this is a requirement
   // for starting a process, nor do we declare a maximum number of handles
   // which can be sent during startup.  Restructuring and formalizing the
   // process-args startup protocol could help with this situation.
   if ((status == ZX_OK) && p.nbytes && p.nhandles) {
     PROCESSARGS_BUFFER(buffer, p.nbytes);
     zx_handle_t handles[p.nhandles];
     p.buffer = buffer;
     p.handles = handles;
     status = processargs_read(bootstrap, buffer, p.nbytes, handles, p.nhandles, &p.procargs,
                               &p.handle_info);
     if (status != ZX_OK) {
       CRASH_WITH_UNIQUE_BACKTRACE();
     }
     _zx_handle_close(bootstrap);
     zx_handle_t main_thread_handle = ZX_HANDLE_INVALID;
     processargs_extract_handles(p.nhandles, handles, p.handle_info, &__zircon_process_self,
                                 &__zircon_job_default, &__zircon_vmar_root_self,
                                 &main_thread_handle, &p.utc_reference);

     atomic_store(&libc.thread_count, 1);

     // This consumes the thread handle and sets up the thread pointer.
     thrd_info_t thrd_info = __init_main_thread(main_thread_handle);
     p.td = thrd_info.thread;
     p.runtime = thrd_info.runtime;

     // Switch to the allocated stack and call start_main(&p) there.  The
     // original stack stays around just to hold the message buffer and handles
     // array.  The new stack is whole pages, so it's sufficiently aligned.

     // The stack switching takes care to maintain valid CFI throughout so that
     // CFI-based unwinding works correctly from the start_main frame back to
     // this frame and back to its caller, which is the program's entry point
     // (usually _start in crt1.o).  It also sets up both frame pointer and
     // (when available) shadow call stack state to make the basic backtrace
     // (i.e. PC list) between CFI, frame pointers, and shadow call stack
     // collection methods all consistent.  For CFI, this is basically a matter
     // of correct metadata.  For both frame pointers and shadow call stack, the
     // backtrace collection relies on a contiguous stack and won't see anything
     // that's not stored within those bounds.  So the actual original stack
     // frame where this frame's own FP points is not available, and there is no
     // shadow call stack at all yet.  Instead synthesize artifical "frames"
     // that are just enough to appear normal to basic backtrace collection by
     // each method and give the same results.
 #ifdef __x86_64__
     __asm__ volatile(
         // Adjust the CFI to track the existing CFA via a different call-saved
         // register so unwinding will work after we reset the FP below.  Note
         // that __builtin_frame_address(0) returns the value of the FP register
         // (as documented in the GCC manual), *not* the value of the CFA.
         // Moreover, there is no mandated relationship between the two values!
         // The compiler will tell us the value of the FP with the built-in, but
         // it won't tell us how it's calculating the CFA.  Since we force frame
         // pointers on when compiling this function, we assume that the
         // compiler will have defined its CFA rule as an offset from the FP
         // register.  So this CFI directive adjusts the CFA rule to refer to a
         // different register, one that's safely called-saved here, but reusing
         // the existing CFA rule's offset from the FP.
         ".cfi_def_cfa_register %[frame_address]\n"

         // Switch to the new stack.
         "lea -16(%[base], %[len], 1), %%rsp\n"

         // Synthesize a fake frame on the new stack that's sufficient for FP
         // backtrace collection.  It would ignore the original real frame
         // _start pushed because that FP value is not in the recorded bounds of
         // the thread's machine stack.
         "mov %[return_address], 8(%%rsp)\n"
         "mov %%rsp, %%rbp\n"
         // Since we force frame pointers on when compiling this function, we
         // assume that the compiler will have defined its CFI rule for the
         // caller's FP register in terms of the CFA, so that's still correct
         // after we clobber it here.

         "call start_main\n"
         "ud2\n"
         "# Target receives %[arg]"
         :
         : [base] "r"(p.td->safe_stack.iov_base), [len] "r"(p.td->safe_stack.iov_len),
           [return_address] "r"(__builtin_return_address(0)),
           // The "b" constraint forces the value into the %rbx register, which
           // is call-saved so the compiler will spill it in the prologue and
           // produce CFI to read it relative to the CFA.
           [frame_address] "b"(__builtin_frame_address(0)),
           "m"(p),  // Tell the compiler p's fields are all still alive.
           [arg] "D"(&p));
 #elif defined(__aarch64__)
     __asm__ volatile(
         // Adjust the CFI to track the existing CFA via a different call-saved
         // register so unwinding will work after we reset the FP below.  Note
         // that __builtin_frame_address(0) returns the value of the FP register
         // (as documented in the GCC manual), *not* the value of the CFA.
         // Moreover, there is no mandated relationship between the two values!
         // The compiler will tell us the value of the FP with the built-in, but
         // it won't tell us how it's calculating the CFA.  Since we force frame
         // pointers on when compiling this function, we assume that the
         // compiler will have defined its CFA rule as an offset from the FP
         // register.  So this CFI directive adjusts the CFA rule to refer to a
         // different register, one that's safely called-saved here, but reusing
         // the existing CFA rule's offset from the FP.
         "mov x28, %[frame_address]\n"
         ".cfi_def_cfa_register x28\n"

         // Switch to the new stacks.
         "add sp, %[base], %[len]\n"
         "mov x18, %[shadow_call_stack]\n"
         // The starting CFI rule for x18 should have been same-value, but we're
         // not going to be able to recover the caller's x18 value any more.
         ".cfi_undefined x18\n"

         // Synthesize a backtrace frame on the new stack.  Backtrace collection
         // would ignore the original real frame _start pushed because that FP
         // value is not in the recorded bounds of the thread's machine stack.
         "stp xzr, %[return_address], [sp, #-16]!\n"
         "mov x29, sp\n"
         // Since we force frame pointers on when compiling this function, we
         // assume that the compiler will have defined its CFI rule for the
         // caller's FP register in terms of the CFA, so that's still correct
         // after we clobber it here.

         // Push our own return address on the shadow call stack so it appears
         // in a backtrace just as it would if this function itself were using
         // the normal shadow-call-stack protocol.  Before that, push a zero
         // return address as an end marker similar to how CFI unwinding marks
         // the base frame by having its return address column compute zero.
         "stp xzr, %[return_address], [x18], #16\n"

         // Neither sp, x29, nor x18 might be used as an input operand, but x0
         // might be.  So clobber x0 last.  We don't need to declare it to the
         // compiler as a clobber since we'll never come back and it's fine if
         // it's used as an input operand.
         "mov x0, %[arg]\n"
         "bl start_main\n"
         "brk #1"
         :
         : [base] "r"(p.td->safe_stack.iov_base), [len] "r"(p.td->safe_stack.iov_len),
           // Shadow call stack grows up.
           [shadow_call_stack] "r"(p.td->shadow_call_stack.iov_base),
           [return_address] "r"(__builtin_return_address(0)),
           [frame_address] "r"(__builtin_frame_address(0)),
           "m"(p),  // Tell the compiler p's fields are all still alive.
           [arg] "r"(&p)
         : "x28");
 #elif defined(__riscv)
     __asm__ volatile(
         // Adjust the CFI to track the existing CFA via a different call-saved
         // register so unwinding will work after we reset the FP below.  Since
         // __builtin_frame_address(0) returns the value of the FP register (as
         // documented in the GCC manual), and the RISC-V calling convention
         // defines the FP to match the CFA (SP on function entry), this value
         // should match.  The compiler will tell us the value of the FP with
         // the built-in, but it won't tell us how it's calculating the CFA.
         // Since we force frame pointers on when compiling this function, we
         // assume that the compiler will have defined its CFA rule to point to
         // the FP register.
         "mv s1, %[frame_address]\n"
         ".cfi_def_cfa s1, 0\n"

         // Switch to the new machine stack.
         "add sp, %[base], %[len]\n"

         // Synthesize a backtrace frame on the new stack.  Backtrace collection
         // would ignore the original real frame _start pushed because that FP
         // value is not in the recorded bounds of the thread's machine stack.
         "add sp, sp, -16\n"
         "sd %[return_address], 8(sp)\n"
         "sd zero, 0(sp)\n"
         // Since we force frame pointers on when compiling this function, we
         // assume that the compiler will have defined its CFI rule for the
         // caller's FP register in terms of the CFA, so that's still correct
         // after we clobber it here.  On RISC-V, the FP points to the CFA, not
         // to the bottom of the FP, PC pair.
         "add fp, sp, 16\n"

         // Save the caller's gp in another call-saved register.
         "mv s2, gp\n"
         ".cfi_register gp, s2\n"

         // Switch to the new shadow call stack.  Then push our own return
         // address on the shadow call stack so it appears in a backtrace just
         // as it would if this function itself were using the normal shadow
         // call stack protocol.  Before that, push a zero return address as an
         // end marker similar to how CFI unwinding marks the base frame by
         // having its return address column compute zero.
         "add gp, %[shadow_call_stack], 16\n"
         "sd zero, -16(gp)\n"
         "sd %[return_address], -8(gp)\n"

         // Neither sp, fp, nor gp might be used as an input operand, but a0
         // might be.  So clobber a0 last.  We don't need to declare it to the
         // compiler as a clobber since we'll never come back and it's fine if
         // it's used as an input operand.
         "mv a0, %[arg]\n"
         "call start_main\n"
         "unimp"
         :
         : [base] "r"(p.td->safe_stack.iov_base), [len] "r"(p.td->safe_stack.iov_len),
           // Shadow call stack grows up.
           [shadow_call_stack] "r"(p.td->shadow_call_stack.iov_base),
           [return_address] "r"(__builtin_return_address(0)),
           [frame_address] "r"(__builtin_frame_address(0)),
           "m"(p),  // Tell the compiler p's fields are all still alive.
           [arg] "r"(&p)
         : "s1", "s2");
 #else
 #error what architecture?
 #endif
   }

   CRASH_WITH_UNIQUE_BACKTRACE();
 }
	#include <elf.h>
	#include <lib/processargs/processargs.h>
	#include <lib/zircon-internal/unique-backtrace.h>
	#include <stdatomic.h>
	#include <string.h>
	#include <zircon/sanitizer.h>
	#include <zircon/syscalls.h>
	#include <zircon/utc.h>

	#include <runtime/thread.h>

	#include "asan_impl.h"
	#include "dynlink.h"
	#include "libc.h"
	#include "setjmp_impl.h"
	#include "threads_impl.h"
	#include "zircon_impl.h"

	struct start_params {
	int (main)(int, char, char*);
	thrd_t td;
	uint8_t* buffer;
	zx_proc_args_t* procargs;
	zx_handle_t* handles;
	uint32_t* handle_info;
	uint32_t nbytes, nhandles;
	zx_handle_t utc_reference;
	int* runtime;
	};

	// See dynlink.c for the full explanation. The compiler generates calls to
	// these implicitly. They are PLT calls into the ASan runtime, which is fine
	// in and of itself at this point (unlike in dynlink.c). But they might also
	// use ShadowCallStack, which is not set up yet. So make sure references here
	// only use the libc-internal symbols, which don't have any setup requirements.
	__asan_weak_ref("memcpy")
	__asan_weak_ref("memset")

	#if defined(__aarch64__)
	#define SHADOW_CALL_STACK_DWARF_REGNO 18
	#define SHADOW_CALL_STACK_INIT "str %[ra], [x18], #8\n"
	#elif defined(__riscv)
	#define SHADOW_CALL_STACK_DWARF_REGNO 3
	#define SHADOW_CALL_STACK_INIT \
	"add gp, gp, 8\n" \
	"sd %[ra], -8(gp)\n"
	#endif

	// This gets called via inline assembly below, after switching onto
	// the newly-allocated (safe) stack.
	static _Noreturn void start_main(const struct start_params*) __asm__("start_main")
	__attribute__((used));

	// Do not instrument this function with checks for function-type-mismatches.
	// UBSan will report errors on the entry to main via p->main if the application
	// happens to define main with a signiature different from int()(int, char*,
	// char). It's not uncommon for users to instead use const char for argv
	// where this can be reported.
	#if __has_feature(undefined_behavior_sanitizer)
	__attribute__((no_sanitize("function")))
	#endif
	static inline int
	call_main(int argc, char argv, char environ, int (main_func)(int, char, char*)) {
	return main_func(argc, argv, environ);
	}

	static void start_main(const struct start_params* p) {
	#if defined(SHADOW_CALL_STACK_INIT) && !__has_feature(shadow_call_stack)
	__asm__ volatile(
	// Ensure shadow-call-stack backtraces consistent with the frame pointer
	// backtraces for the initial frames, so they will stay consistent if
	// main and its callees use shadow-call-stack.
	SHADOW_CALL_STACK_INIT
	// DW_CFA_val_expression <regno>, { DW_OP_breg<regno> -8 }
	".cfi_escape 0x16, %c[regno], 2, 0x70 + %c[regno], (-8 & 0x7f)"
	:
	: [regno] "i"(SHADOW_CALL_STACK_DWARF_REGNO), [ra] "r"(__builtin_return_address(0)));
	#endif

	// Run the __sanitizer_module_loaded hook on all loaded libraries as early as
	// possible in the initial execution path. At this point, we can safely call
	// into external libraries now that the PLT and shadow call stack are setup.
	// This is useful for any library which overrides the
	// __sanitizer_module_loaded hook that needs to observe something from loaded
	// libs before we actually call .preinit_arrat/.init_array functions. An
	// example where this is necessary is with hwasan which will need to register
	// globals before actually calling __hwasan_init to prevent any more false
	// positives from globals in between now and when __hwasan_init is called.
	_dl_iterate_loaded_libs();

	// Now that the thread descriptor is set up, it's safe to use the
	// dlerror machinery.
	*(p->runtime) = 1;

	uint32_t argc = p->procargs->args_num;
	uint32_t envc = p->procargs->environ_num;
	uint32_t namec = p->procargs->names_num;

	// Now that it is safe to call safe-stack enabled functions, go ahead and
	// install the UTC reference clock, if one was provided to us.
	if (p->utc_reference != ZX_HANDLE_INVALID) {
	zx_handle_t old_clock = ZX_HANDLE_INVALID;

	// Success or fail, libc has consumed our clock handle. It no longer
	// belongs to us. From here on out, it is very important that nothing
	// attempts to make use of p->utc_reference.
	_zx_utc_reference_swap(p->utc_reference, &old_clock);

	// If there had been a clock previously, we now own it, but have no use for
	// it. Simply close it.
	if (old_clock != ZX_HANDLE_INVALID) {
	_zx_handle_close(old_clock);
	}
	}

	// Use a single contiguous buffer for argv and envp, with two
	// extra words of terminator on the end. In traditional Unix
	// process startup, the stack contains argv followed immediately
	// by envp and that's followed immediately by the auxiliary vector
	// (auxv), which is in two-word pairs and terminated by zero
	// words. Some crufty programs might assume some of that layout,
	// and it costs us nothing to stay consistent with it here.
	char* args_and_environ[argc + 1 + envc + 1 + 2];
	char** argv = &args_and_environ[0];
	__environ = &args_and_environ[argc + 1];
	char** dummy_auxv = &args_and_environ[argc + 1 + envc + 1];
	dummy_auxv[0] = dummy_auxv[1] = 0;

	char* names[namec + 1];
	zx_status_t status = processargs_strings(p->buffer, p->nbytes, argv, __environ, names);
	if (status != ZX_OK) {
	argc = namec = 0;
	argv = __environ = NULL;
	}

	for (uint32_t n = 0; n < p->nhandles; n++) {
	unsigned arg = PA_HND_ARG(p->handle_info[n]);
	zx_handle_t h = p->handles[n];

	switch (PA_HND_TYPE(p->handle_info[n])) {
	case PA_NS_DIR: {
	// Avoid strcmp, because it may be instrumented, and we haven't
	// initialized the sanitizer runtime yet.
	const char* name = names[arg];
	if (name[0] == '/' && name[1] == 's' && name[2] == 'v' && name[3] == 'c' && name[4] == 0) {
	// TODO(phosek): We should ideally duplicate the handle since
	// higher layers might consume it and we want to have a guarantee
	// that it stays alive, but that's typically possible since
	// channel handles don't have ZX_RIGHT_DUPLICATE right.
	//
	// TODO(phosek): What if the program uses bind to replace its
	// /svc, should the subsequent invocations to __sanitizer_*
	// use the startup value or reflect the live changes?
	__zircon_namespace_svc = h;
	}
	continue;
	}
	}
	}

	__sanitizer_startup_hook(argc, argv, __environ, p->td->safe_stack.iov_base,
	p->td->safe_stack.iov_len);

	// Setup the hwasan runtime before any `__libc_extensions_init`s are called.
	// This is needed because libraries which define this function (like fdio)
	// may be instrumented and either access `__hwasan_tls` or make runtime calls.
	__hwasan_init();

	// Allow companion libraries a chance to claim handles, zeroing out
	// handles[i] and handle_info[i] for handles they claim.
	if (&__libc_extensions_init != NULL) {
	__libc_extensions_init(p->nhandles, p->handles, p->handle_info, namec, names);
	}

	// Give any unclaimed handles to zx_take_startup_handle(). This function
	// takes ownership of the data, but not the memory: it assumes that the
	// arrays are valid as long as the process is alive.
	__libc_startup_handles_init(p->nhandles, p->handles, p->handle_info);

	// Run static constructors et al.
	__libc_init_gwp_asan();
	__libc_start_init();

	// Pass control to the application.
	exit(call_main(argc, argv, __environ, p->main));
	}

	__EXPORT NO_ASAN LIBC_NO_SAFESTACK _Noreturn void __libc_start_main(zx_handle_t bootstrap,
	int (main)(int, char*,
	char**)) {
	// Initialize stack-protector canary value first thing. Do the setjmp
	// manglers in the same call to avoid the overhead of two system calls.
	// That means we need a temporary buffer on the stack, which we then
	// want to clear out so the values don't leak there.
	struct randoms {
	uintptr_t stack_guard;
	struct setjmp_manglers setjmp_manglers;
	} randoms;
	static_assert(sizeof(randoms) <= ZX_CPRNG_DRAW_MAX_LEN, "");
	_zx_cprng_draw(&randoms, sizeof(randoms));
	__stack_chk_guard = randoms.stack_guard;
	__setjmp_manglers = randoms.setjmp_manglers;
	// Zero the stack temporaries.
	randoms = (struct randoms){};
	// Tell the compiler that the value is used, so it doesn't optimize
	// out the zeroing as dead stores.
	__asm__("# keepalive %0" ::"m"(randoms));

	// extract process startup information from channel in arg
	struct start_params p = {.main = main, .utc_reference = ZX_HANDLE_INVALID};
	zx_status_t status = processargs_message_size(bootstrap, &p.nbytes, &p.nhandles);

	// TODO(44088): Right now, we _always_ expect to receive at least some
	// handles and some bytes in the initial startup message. Make sure that we
	// have both so that we do not accidentally end up declaring a 0-length VLA
	// on the stack (which is UDB in C11). See the bug referenced in the TODO,
	// however. We do not currently formally state that this is a requirement
	// for starting a process, nor do we declare a maximum number of handles
	// which can be sent during startup. Restructuring and formalizing the
	// process-args startup protocol could help with this situation.
	if ((status == ZX_OK) && p.nbytes && p.nhandles) {
	PROCESSARGS_BUFFER(buffer, p.nbytes);
	zx_handle_t handles[p.nhandles];
	p.buffer = buffer;
	p.handles = handles;
	status = processargs_read(bootstrap, buffer, p.nbytes, handles, p.nhandles, &p.procargs,
	&p.handle_info);
	if (status != ZX_OK) {
	CRASH_WITH_UNIQUE_BACKTRACE();
	}
	_zx_handle_close(bootstrap);
	zx_handle_t main_thread_handle = ZX_HANDLE_INVALID;
	processargs_extract_handles(p.nhandles, handles, p.handle_info, &__zircon_process_self,
	&__zircon_job_default, &__zircon_vmar_root_self,
	&main_thread_handle, &p.utc_reference);

	atomic_store(&libc.thread_count, 1);

	// This consumes the thread handle and sets up the thread pointer.
	thrd_info_t thrd_info = __init_main_thread(main_thread_handle);
	p.td = thrd_info.thread;
	p.runtime = thrd_info.runtime;

	// Switch to the allocated stack and call start_main(&p) there. The
	// original stack stays around just to hold the message buffer and handles
	// array. The new stack is whole pages, so it's sufficiently aligned.

	// The stack switching takes care to maintain valid CFI throughout so that
	// CFI-based unwinding works correctly from the start_main frame back to
	// this frame and back to its caller, which is the program's entry point
	// (usually _start in crt1.o). It also sets up both frame pointer and
	// (when available) shadow call stack state to make the basic backtrace
	// (i.e. PC list) between CFI, frame pointers, and shadow call stack
	// collection methods all consistent. For CFI, this is basically a matter
	// of correct metadata. For both frame pointers and shadow call stack, the
	// backtrace collection relies on a contiguous stack and won't see anything
	// that's not stored within those bounds. So the actual original stack
	// frame where this frame's own FP points is not available, and there is no
	// shadow call stack at all yet. Instead synthesize artifical "frames"
	// that are just enough to appear normal to basic backtrace collection by
	// each method and give the same results.
	#ifdef __x86_64__
	__asm__ volatile(
	// Adjust the CFI to track the existing CFA via a different call-saved
	// register so unwinding will work after we reset the FP below. Note
	// that __builtin_frame_address(0) returns the value of the FP register
	// (as documented in the GCC manual), not the value of the CFA.
	// Moreover, there is no mandated relationship between the two values!
	// The compiler will tell us the value of the FP with the built-in, but
	// it won't tell us how it's calculating the CFA. Since we force frame
	// pointers on when compiling this function, we assume that the
	// compiler will have defined its CFA rule as an offset from the FP
	// register. So this CFI directive adjusts the CFA rule to refer to a
	// different register, one that's safely called-saved here, but reusing
	// the existing CFA rule's offset from the FP.
	".cfi_def_cfa_register %[frame_address]\n"

	// Switch to the new stack.
	"lea -16(%[base], %[len], 1), %%rsp\n"

	// Synthesize a fake frame on the new stack that's sufficient for FP
	// backtrace collection. It would ignore the original real frame
	// _start pushed because that FP value is not in the recorded bounds of
	// the thread's machine stack.
	"mov %[return_address], 8(%%rsp)\n"
	"mov %%rsp, %%rbp\n"
	// Since we force frame pointers on when compiling this function, we
	// assume that the compiler will have defined its CFI rule for the
	// caller's FP register in terms of the CFA, so that's still correct
	// after we clobber it here.

	"call start_main\n"
	"ud2\n"
	"# Target receives %[arg]"
	:
	: [base] "r"(p.td->safe_stack.iov_base), [len] "r"(p.td->safe_stack.iov_len),
	[return_address] "r"(__builtin_return_address(0)),
	// The "b" constraint forces the value into the %rbx register, which
	// is call-saved so the compiler will spill it in the prologue and
	// produce CFI to read it relative to the CFA.
	[frame_address] "b"(__builtin_frame_address(0)),
	"m"(p), // Tell the compiler p's fields are all still alive.
	[arg] "D"(&p));
	#elif defined(__aarch64__)
	__asm__ volatile(
	// Adjust the CFI to track the existing CFA via a different call-saved
	// register so unwinding will work after we reset the FP below. Note
	// that __builtin_frame_address(0) returns the value of the FP register
	// (as documented in the GCC manual), not the value of the CFA.
	// Moreover, there is no mandated relationship between the two values!
	// The compiler will tell us the value of the FP with the built-in, but
	// it won't tell us how it's calculating the CFA. Since we force frame
	// pointers on when compiling this function, we assume that the
	// compiler will have defined its CFA rule as an offset from the FP
	// register. So this CFI directive adjusts the CFA rule to refer to a
	// different register, one that's safely called-saved here, but reusing
	// the existing CFA rule's offset from the FP.
	"mov x28, %[frame_address]\n"
	".cfi_def_cfa_register x28\n"

	// Switch to the new stacks.
	"add sp, %[base], %[len]\n"
	"mov x18, %[shadow_call_stack]\n"
	// The starting CFI rule for x18 should have been same-value, but we're
	// not going to be able to recover the caller's x18 value any more.
	".cfi_undefined x18\n"

	// Synthesize a backtrace frame on the new stack. Backtrace collection
	// would ignore the original real frame _start pushed because that FP
	// value is not in the recorded bounds of the thread's machine stack.
	"stp xzr, %[return_address], [sp, #-16]!\n"
	"mov x29, sp\n"
	// Since we force frame pointers on when compiling this function, we
	// assume that the compiler will have defined its CFI rule for the
	// caller's FP register in terms of the CFA, so that's still correct
	// after we clobber it here.

	// Push our own return address on the shadow call stack so it appears
	// in a backtrace just as it would if this function itself were using
	// the normal shadow-call-stack protocol. Before that, push a zero
	// return address as an end marker similar to how CFI unwinding marks
	// the base frame by having its return address column compute zero.
	"stp xzr, %[return_address], [x18], #16\n"

	// Neither sp, x29, nor x18 might be used as an input operand, but x0
	// might be. So clobber x0 last. We don't need to declare it to the
	// compiler as a clobber since we'll never come back and it's fine if
	// it's used as an input operand.
	"mov x0, %[arg]\n"
	"bl start_main\n"
	"brk #1"
	:
	: [base] "r"(p.td->safe_stack.iov_base), [len] "r"(p.td->safe_stack.iov_len),
	// Shadow call stack grows up.
	[shadow_call_stack] "r"(p.td->shadow_call_stack.iov_base),
	[return_address] "r"(__builtin_return_address(0)),
	[frame_address] "r"(__builtin_frame_address(0)),
	"m"(p), // Tell the compiler p's fields are all still alive.
	[arg] "r"(&p)
	: "x28");
	#elif defined(__riscv)
	__asm__ volatile(
	// Adjust the CFI to track the existing CFA via a different call-saved
	// register so unwinding will work after we reset the FP below. Since
	// __builtin_frame_address(0) returns the value of the FP register (as
	// documented in the GCC manual), and the RISC-V calling convention
	// defines the FP to match the CFA (SP on function entry), this value
	// should match. The compiler will tell us the value of the FP with
	// the built-in, but it won't tell us how it's calculating the CFA.
	// Since we force frame pointers on when compiling this function, we
	// assume that the compiler will have defined its CFA rule to point to
	// the FP register.
	"mv s1, %[frame_address]\n"
	".cfi_def_cfa s1, 0\n"

	// Switch to the new machine stack.
	"add sp, %[base], %[len]\n"

	// Synthesize a backtrace frame on the new stack. Backtrace collection
	// would ignore the original real frame _start pushed because that FP
	// value is not in the recorded bounds of the thread's machine stack.
	"add sp, sp, -16\n"
	"sd %[return_address], 8(sp)\n"
	"sd zero, 0(sp)\n"
	// Since we force frame pointers on when compiling this function, we
	// assume that the compiler will have defined its CFI rule for the
	// caller's FP register in terms of the CFA, so that's still correct
	// after we clobber it here. On RISC-V, the FP points to the CFA, not
	// to the bottom of the FP, PC pair.
	"add fp, sp, 16\n"

	// Save the caller's gp in another call-saved register.
	"mv s2, gp\n"
	".cfi_register gp, s2\n"

	// Switch to the new shadow call stack. Then push our own return
	// address on the shadow call stack so it appears in a backtrace just
	// as it would if this function itself were using the normal shadow
	// call stack protocol. Before that, push a zero return address as an
	// end marker similar to how CFI unwinding marks the base frame by
	// having its return address column compute zero.
	"add gp, %[shadow_call_stack], 16\n"
	"sd zero, -16(gp)\n"
	"sd %[return_address], -8(gp)\n"

	// Neither sp, fp, nor gp might be used as an input operand, but a0
	// might be. So clobber a0 last. We don't need to declare it to the
	// compiler as a clobber since we'll never come back and it's fine if
	// it's used as an input operand.
	"mv a0, %[arg]\n"
	"call start_main\n"
	"unimp"
	:
	: [base] "r"(p.td->safe_stack.iov_base), [len] "r"(p.td->safe_stack.iov_len),
	// Shadow call stack grows up.
	[shadow_call_stack] "r"(p.td->shadow_call_stack.iov_base),
	[return_address] "r"(__builtin_return_address(0)),
	[frame_address] "r"(__builtin_frame_address(0)),
	"m"(p), // Tell the compiler p's fields are all still alive.
	[arg] "r"(&p)
	: "s1", "s2");
	#else
	#error what architecture?
	#endif
	}

	CRASH_WITH_UNIQUE_BACKTRACE();
	}