blob: fa3617502dc142e0c433fcefd6ed91ef4bafe7cf [file] [log] [blame]
// Copyright 2020 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <assert.h>
#include <lib/zx/process.h>
#include <lib/zx/thread.h>
#include <lib/zx/vmo.h>
#include <threads.h>
#include <zircon/compiler.h>
#include <zircon/process.h>
#include <zircon/sanitizer.h>
#include <zircon/syscalls/debug.h>
#include <algorithm>
#include <utility>
#include <runtime/thread.h>
#include "../threads/thread-list.h"
#include "../weak.h"
#include "dynlink.h"
#include "threads_impl.h"
namespace LIBC_NAMESPACE_DECL {
namespace {
// TODO(https://fxbug.dev/42175677): ThreadSuspender synchronizes using
// _dl_wrlock. If a vDSO entry point used during the snapshot code is
// interpoosed by a version that calls dlsym, this can deadlock since dlsym
// takes the write lock too for its own arcane reasons. The known interposer
// implementations such as //src/devices/testing/fake-object only call dlsym on
// first entry to each system call (following standard dlsym-interposer
// practice). So just make an early call to each system call entry point used
// in this file, before taking any locks. That way any interposers will have
// done their initialization before we call into them. If the interposers do
// other synchronization this could still cause deadlock in other ways. So
// probably we'll need to change things eventually so that this uses only real
// vDSO entry points that can't be interposed upon.
class PrimeSyscallsBeforeTakingLocks {
public:
PrimeSyscallsBeforeTakingLocks() {
static once_flag flag;
call_once(&flag, Prime);
}
private:
static void Prime() {
zx_handle_t invalid;
uintptr_t ignored;
(void)zx_system_get_page_size();
zx_object_get_child(_zx_process_self(), ZX_KOID_INVALID, 0, &invalid);
zx_object_get_info(_zx_process_self(), 0, nullptr, 0, nullptr, nullptr);
zx_object_wait_one(_zx_process_self(), 0, 0, nullptr);
zx_task_suspend_token(_zx_process_self(), &invalid);
zx_thread_read_state(zxr_thread_get_handle(&__pthread_self()->zxr_thread), 0, nullptr, 0);
zx_handle_t vmo = ZX_HANDLE_INVALID;
zx_vmo_create(0, 0, &vmo);
zx_vmo_set_size(vmo, 0);
zx_vmar_map(_zx_vmar_root_self(), 0, 0, vmo, 0, 0, &ignored);
zx_vmar_unmap(_zx_vmar_root_self(), 0, 0);
zx_handle_close(vmo);
}
};
constexpr WeakLock<_dl_rdlock, _dl_unlock> kDlLock;
constexpr WeakLock<__thread_allocation_inhibit, __thread_allocation_release> kAllocationLock;
// This is a simple container similar to std::vector but using only whole-page
// allocations in a private VMO to avoid interactions with any normal memory
// allocator. Resizing the vector may remap the data in the VMO to a new
// memory location without changing its contents, so the element type must not
// contain any pointers into itself or the like.
template <typename T>
class RelocatingPageAllocatedVector {
public:
RelocatingPageAllocatedVector(const RelocatingPageAllocatedVector&) = delete;
RelocatingPageAllocatedVector(RelocatingPageAllocatedVector&&) = delete;
RelocatingPageAllocatedVector() = default;
~RelocatingPageAllocatedVector() {
for (auto& elt : *this) {
elt.~T();
}
if (data_) {
Unmap(data_);
}
}
using size_type = size_t;
using value_type = T;
using iterator = T*;
using const_iterator = const T*;
size_type size() const { return size_; }
size_type capacity() const { return capacity_; }
T* data() { return data_; }
const T* data() const { return data_; }
iterator begin() { return data_; }
iterator end() { return &data_[size_]; }
const_iterator cbegin() const { return data_; }
const_iterator cend() const { return &data_[size_]; }
T& operator[](size_type i) {
assert(i < size_);
return data_[i];
}
const T& operator[](size_type i) const {
assert(i < size_);
return data_[i];
}
// On success, size() < capacity().
zx_status_t reserve_some_more() {
if (size_ < capacity_) {
return ZX_OK;
}
assert(sizeof(T) <= _zx_system_get_page_size());
const size_t alloc_size = AllocatedSize() + _zx_system_get_page_size();
zx_status_t status =
vmo_ ? vmo_.set_size(alloc_size) : zx::vmo::create(alloc_size, ZX_VMO_RESIZABLE, &vmo_);
if (status == ZX_OK) {
// Leave the old mapping in place while making the new mapping so that
// it's still accessible for element destruction in case of failure.
auto old = data_;
status = Map(alloc_size);
if (status == ZX_OK) {
assert(size_ < capacity_);
Unmap(old);
}
}
return status;
}
// This is like the standard resize method, but it doesn't initialize new
// elements. Instead, it's expected that the caller has already initialized
// them by writing data() elements between size() and capacity().
void resize_in_place(size_t new_size) {
assert(new_size <= capacity_);
size_ = new_size;
}
// Unlike standard containers, this never allocates and must only be called
// when capacity() > size(), e.g. after reserve_some_more().
template <typename U>
void push_back(U&& value) {
assert(size_ < capacity_);
data_[size_++] = std::forward<U>(value);
}
private:
T* data_ = nullptr;
size_t size_ = 0;
size_t capacity_ = 0;
zx::vmo vmo_;
size_t AllocatedSize() const {
size_t total = capacity_ * sizeof(T);
return (total + _zx_system_get_page_size() - 1) &
-static_cast<size_t>(_zx_system_get_page_size());
}
zx_status_t Map(size_t alloc_size) {
uintptr_t addr;
zx_status_t status = _zx_vmar_map(_zx_vmar_root_self(), ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, 0,
vmo_.get(), 0, alloc_size, &addr);
if (status == ZX_OK) {
data_ = reinterpret_cast<T*>(addr);
capacity_ = alloc_size / sizeof(T);
}
return status;
}
void Unmap(void* data) {
_zx_vmar_unmap(_zx_vmar_root_self(), reinterpret_cast<uintptr_t>(data), AllocatedSize());
}
};
// Just keeping the suspend_token handle alive is what keeps the thread
// suspended. So destruction of the Thread object implicitly resumes it.
struct SuspendedThread {
zx_koid_t koid = ZX_KOID_INVALID;
zx::thread thread;
zx::suspend_token token;
};
using SuspendedThreadVector = RelocatingPageAllocatedVector<SuspendedThread>;
class __TA_SCOPED_CAPABILITY ThreadSuspender {
public:
ThreadSuspender() __TA_ACQUIRE(gAllThreadsLock) = default;
~ThreadSuspender() __TA_RELEASE() = default;
zx_status_t Collect(SuspendedThreadVector& threads) {
zx_status_t status = Init();
if (status != ZX_OK) {
return status;
}
size_t filled, count;
bool any_new;
do {
// Prepare to handle more than the last iteration (or "some" on the
// first iteration).
status = koids_.reserve_some_more();
if (status == ZX_OK) {
// Collect all the thread KOIDs in the process.
status = process()->get_info(ZX_INFO_PROCESS_THREADS, koids_.data(),
koids_.capacity() * sizeof(zx_koid_t), &filled, &count);
}
if (status == ZX_OK) {
// Check for threads not already suspended.
koids_.resize_in_place(filled);
status = SuspendNewThreads(threads, &any_new);
}
if (status != ZX_OK) {
return status;
}
// Loop as long as either the scan found any new threads or the buffer
// didn't include all the threads in the process. Any time there is a
// newly-suspended thread, it might have just created another thread
// before being suspended, so another pass is needed to ensure all live
// threads have been caught.
} while (any_new || filled < count);
// Now wait for all the threads to have finished suspending.
for (auto& t : threads) {
zx_signals_t pending;
status = t.thread.wait_one(ZX_THREAD_SUSPENDED | ZX_THREAD_TERMINATED, zx::time::infinite(),
&pending);
if (status != ZX_OK) {
return status;
}
if (pending & ZX_THREAD_TERMINATED) {
// The thread died before getting fully suspended.
t.koid = ZX_KOID_INVALID;
} else {
assert(pending & ZX_THREAD_SUSPENDED);
}
}
return ZX_OK;
}
private:
zx::unowned_process process() { return zx::unowned_process{_zx_process_self()}; }
zx_status_t Init() {
// First determine this thread's KOID to distinguish it from siblings.
zx::unowned_thread this_thread{_zx_thread_self()};
zx_info_handle_basic_t self_info;
zx_status_t status = this_thread->get_info(ZX_INFO_HANDLE_BASIC, &self_info, sizeof(self_info),
nullptr, nullptr);
if (status == ZX_OK) {
this_thread_koid_ = self_info.koid;
}
return status;
}
// Scan koids_ for threads not already present in the vector.
// For each new thread, suspend it and push it onto the vector.
//
// TODO(mcgrathr): Performance considerations for this path:
//
// Most often this will be called exactly twice: first when the vector is
// empty, and then again when the refreshed list of threads is verified to
// exactly match the set already in the vector. It will only be called for
// additional iterations if there is a race with one of the live threads
// creating a new thread. Since the usual use of this facility is for
// shutdown-time leak checking, such races should be unlikely. However, if
// it's used in the future for more performance-sensitive cases such as
// conservative GC implementation then it may become important to minimize
// the overhead of this work in a wider variety of situations.
//
// The first pass of this function will be O(n) in the number of threads.
// The second pass will be O(n^2) in the number of threads. However, note
// that it's not safe to short-circuit that second pass in the common case
// by simply noting that the number of threads is the same as observed in
// the first pass, because it could be that some threads observed and
// suspended in the first pass died but new ones were created that haven't
// been observed and suspended yet. Again, since the usual use of this
// facility is at shutdown-time it's expected that there will not be an
// inordinate number of threads still live at that point in a program.
// However if that turns out not to be a safe enough presumption in
// practice, this could be optimized with a less trivial data structure.
// The implementation constraints here (not using normal allocators and
// non-fatal recovery from allocation failures) preclude using any
// conveniently-available data structure implementations.
//
// If this path is truly performance sensitive then the best solution would
// be a new "suspend all threads but me" facility in the kernel, which can
// straightforwardly use internal synchronization to implement a one-pass
// solution that's O(n) in the number of threads with no need to mitigate
// race conditions.
zx_status_t SuspendNewThreads(SuspendedThreadVector& threads, bool* any) {
*any = false;
for (const zx_koid_t koid : koids_) {
auto match = [koid](const SuspendedThread& t) { return t.koid == koid; };
if (koid != this_thread_koid_ && std::ranges::none_of(threads, match)) {
SuspendedThread t = {.koid = koid};
zx_status_t status =
process()->get_child(koid, ZX_RIGHT_READ | ZX_RIGHT_WRITE | ZX_RIGHT_WAIT, &t.thread);
if (status == ZX_ERR_NOT_FOUND) {
// The thread must have died in a race.
continue;
}
if (status == ZX_OK) {
status = t.thread.suspend(&t.token);
if (status == ZX_ERR_BAD_STATE) {
// The thread is already dying.
continue;
}
}
if (status == ZX_OK) {
status = threads.reserve_some_more();
}
if (status != ZX_OK) {
return status;
}
threads.push_back(std::move(t));
*any = true;
}
}
return ZX_OK;
}
// Take important locks before suspending any threads. These protect data
// structures that MemorySnapshot needs to scan. Once all threads are
// suspended, the locks are released since any potential contenders should
// be quiescent for the remainder of the snapshot, and it's inadvisable to
// call user callbacks with internal locks held.
//
// N.B. The lock order here matches dlopen_internal to avoid A/B deadlock.
// Avoid reentrancy issues with the system calls used with locks held.
[[no_unique_address]] PrimeSyscallsBeforeTakingLocks prime_syscalls_;
// The dynamic linker data structures are used to find all the global
// ranges, so they must be in a consistent state.
std::lock_guard<decltype(kDlLock)> lock_dl_{kDlLock};
// This approximately prevents thread creation. It doesn't affirmatively
// prevent thread creation per se. Rather, it prevents thrd_create or
// pthread_create from allocating new thread data structures. The lock is
// not held while actually creating the thread, however, so there is
// always a race with actual thread creation that has to be addressed by
// the looping logic in Collect, below. Also, nothing prevents racing
// with other direct zx_thread_create calls in the process that don't use
// the libc facilities.
std::lock_guard<decltype(kAllocationLock)> lock_allocation_{kAllocationLock};
// Importantly, this lock protects consistency of the global list of
// all threads so that it can be traversed safely below.
std::lock_guard<Mutex> lock_all_threads_{gAllThreadsLock};
RelocatingPageAllocatedVector<zx_koid_t> koids_;
zx_koid_t this_thread_koid_ = ZX_KOID_INVALID;
};
class MemorySnapshot {
public:
MemorySnapshot() = delete;
MemorySnapshot(void (*done)(zx_status_t, void*), void* arg)
: done_callback_(done), callback_arg_(arg) {}
~MemorySnapshot() {
if (done_callback_) {
done_callback_(status_, callback_arg_);
}
}
bool Ok() const { return status_ == ZX_OK; }
void SuspendThreads() { status_ = ThreadSuspender().Collect(threads_); }
void ReportGlobals(sanitizer_memory_snapshot_callback_t* callback) {
_dl_locked_report_globals(callback, callback_arg_);
}
void ReportThreads(sanitizer_memory_snapshot_callback_t* stacks,
sanitizer_memory_snapshot_callback_t* regs,
sanitizer_memory_snapshot_callback_t* tls) {
for (const auto& t : threads_) {
if (t.koid != ZX_KOID_INVALID) {
ReportThread(t, stacks, regs, tls);
}
}
if (tls) {
ReportInvalidTcbs(tls);
}
}
void ReportTcb(pthread* tcb, uintptr_t thread_sp,
sanitizer_memory_snapshot_callback_t* stacks_callback,
sanitizer_memory_snapshot_callback_t* tls_callback) {
if (stacks_callback) {
ReportStack(tcb->safe_stack, thread_sp, stacks_callback);
ReportStack(tcb->unsafe_stack, tcb->abi.unsafe_sp, stacks_callback);
// The shadow call stack never contains pointers to mutable data,
// so there is no reason to report its contents.
}
if (tls_callback) {
ReportTls(tcb, tls_callback);
}
}
private:
SuspendedThreadVector threads_;
void (*done_callback_)(zx_status_t, void*);
void* callback_arg_;
zx_status_t status_ = ZX_OK;
#if defined(__aarch64__)
static constexpr auto kSpReg = &zx_thread_state_general_regs_t::sp;
static constexpr auto kThreadReg = &zx_thread_state_general_regs_t::tpidr;
#elif defined(__riscv)
static constexpr auto kSpReg = &zx_thread_state_general_regs_t::sp;
static constexpr auto kThreadReg = &zx_thread_state_general_regs_t::tp;
#elif defined(__x86_64__)
static constexpr auto kSpReg = &zx_thread_state_general_regs_t::rsp;
static constexpr auto kThreadReg = &zx_thread_state_general_regs_t::fs_base;
#else
#error "what machine?"
#endif
void ReportThread(const SuspendedThread& t, sanitizer_memory_snapshot_callback_t* stacks_callback,
sanitizer_memory_snapshot_callback_t* regs_callback,
sanitizer_memory_snapshot_callback_t* tls_callback) {
// Collect register data, which is needed to find stack and TLS locations.
zx_thread_state_general_regs_t regs;
zx_status_t status = t.thread.read_state(ZX_THREAD_STATE_GENERAL_REGS, &regs, sizeof(regs));
if (status != ZX_OK) {
return;
}
if (regs_callback) {
// Report the register data.
regs_callback(&regs, sizeof(regs), callback_arg_);
}
if (stacks_callback || tls_callback) {
// Find the TCB to determine the TLS and stack regions. But first verify
// that it's one of the live threads. If it's not there this could be a
// thread not created by libc, or a detached thread that got suspended
// while exiting (so its TCB has already been unmapped, but the thread
// pointer wasn't cleared). In either case we can't safely use the
// pointer since it might be bogus or point to a data structure we don't
// grok. So no TCB-based information (TLS, stack bounds) can be
// discovered and reported.
if (auto tcb = AllThreads().FindTp(regs.*kThreadReg)) {
ReportTcb(tcb, regs.*kSpReg, stacks_callback, tls_callback);
}
}
}
void ReportStack(const iovec& stack, uintptr_t sp,
sanitizer_memory_snapshot_callback_t* callback) {
if (!stack.iov_base || stack.iov_len == 0) {
return;
}
uintptr_t base = reinterpret_cast<uintptr_t>(stack.iov_base);
uintptr_t limit = base + stack.iov_len;
// If the current SP is not woefully misaligned and falls within the
// expected bounds, so just report the currently active range. Otherwise
// assume the thread is off on some other special stack and the whole
// thread stack might actually be in use when it gets back to it.
if (sp % sizeof(uintptr_t) == 0 && sp >= base && sp <= limit) {
// Stacks grow downwards.
base = sp;
}
callback(reinterpret_cast<void*>(base), limit - base, callback_arg_);
}
void ReportTls(pthread* tcb, sanitizer_memory_snapshot_callback_t* callback) {
if (tcb->tsd_used) {
// Report all tss_set (aka pthread_setspecific) values.
callback(tcb->tsd, sizeof(tcb->tsd), callback_arg_);
}
// Report the handful of particular pointers stashed in the TCB itself.
// These are literal cached malloc allocations. Members like `start_arg` or
// `result` might be set up before the thread register is set up, so they
// can hold values that no ReportTls call will reach.
void* ptrs[] = {
tcb->locale,
tcb->dlerror_buf,
tcb->tls_dtors,
};
callback(ptrs, sizeof(ptrs), callback_arg_);
// Report each DTV element with its segment's precise address range.
const size_t gen = (size_t)tcb->head.dtv[0];
size_t modid = 0;
for (auto* mod = __libc.tls_head; mod && ++modid <= gen; mod = mod->next) {
callback(tcb->head.dtv[modid], mod->size, callback_arg_);
}
}
// Report internal thread objects whose threads are either not fully setup or
// have finished. Rather than a costly check for whether the TCB was found
// with a live thread, just report all threads' join values here and not in
// ReportTls (above).
void ReportInvalidTcbs(sanitizer_memory_snapshot_callback_t* callback) {
// Don't hold the lock during callbacks. It should be safe to pretend
// it's locked assuming the callback doesn't create or join threads.
constexpr auto unlocked_all_threads = []() -> ThreadList {
std::lock_guard lock(gAllThreadsLock);
return AllThreadsLocked();
};
for (auto tcb : unlocked_all_threads()) {
void* ptrs[] = {
// Report the thread's starting argument which may only be available in the internal
// pthread, or the thread's result join value which may be set once the thread
// completes.
tcb->start_arg_or_result,
};
callback(ptrs, sizeof(ptrs), callback_arg_);
}
}
};
auto CurrentThreadRegs() {
zx_thread_state_general_regs_t regs;
#if defined(__aarch64__)
__asm__ volatile(
"stp x0, x1, [%1, #(8 * 0)]\n"
"stp x2, x3, [%1, #(8 * 2)]\n"
"stp x4, x5, [%1, #(8 * 4)]\n"
"stp x6, x7, [%1, #(8 * 6)]\n"
"stp x8, x9, [%1, #(8 * 8)]\n"
"stp x10, x11, [%1, #(8 * 10)]\n"
"stp x12, x13, [%1, #(8 * 12)]\n"
"stp x14, x15, [%1, #(8 * 14)]\n"
"stp x16, x17, [%1, #(8 * 16)]\n"
"stp x18, x19, [%1, #(8 * 18)]\n"
"stp x20, x21, [%1, #(8 * 20)]\n"
"stp x22, x23, [%1, #(8 * 22)]\n"
"stp x24, x25, [%1, #(8 * 24)]\n"
"stp x26, x27, [%1, #(8 * 26)]\n"
"stp x28, x29, [%1, #(8 * 28)]\n"
: "=m"(regs)
: "r"(regs.r));
regs.lr = regs.pc = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
regs.sp = reinterpret_cast<uintptr_t>(__builtin_frame_address(0));
__asm__("mrs %0, nzcv" : "=r"(regs.cpsr));
__asm__("mrs %0, tpidr_el0" : "=r"(regs.tpidr));
#elif defined(__riscv)
regs.pc = regs.ra = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
regs.sp = reinterpret_cast<uintptr_t>(__builtin_frame_address(0));
regs.s0 = regs.sp; // s0 is fp.
__asm__ volatile("sd gp, %0" : "=m"(regs.gp));
regs.tp = reinterpret_cast<uintptr_t>(__builtin_thread_pointer());
__asm__ volatile("sd t0, %0" : "=m"(regs.t0));
__asm__ volatile("sd t1, %0" : "=m"(regs.t1));
__asm__ volatile("sd t2, %0" : "=m"(regs.t2));
__asm__ volatile("sd s1, %0" : "=m"(regs.s1));
__asm__ volatile("sd a0, %0" : "=m"(regs.a0));
__asm__ volatile("sd a1, %0" : "=m"(regs.a1));
__asm__ volatile("sd a2, %0" : "=m"(regs.a2));
__asm__ volatile("sd a3, %0" : "=m"(regs.a3));
__asm__ volatile("sd a4, %0" : "=m"(regs.a4));
__asm__ volatile("sd a5, %0" : "=m"(regs.a5));
__asm__ volatile("sd a6, %0" : "=m"(regs.a6));
__asm__ volatile("sd a7, %0" : "=m"(regs.a7));
__asm__ volatile("sd s2, %0" : "=m"(regs.s2));
__asm__ volatile("sd s3, %0" : "=m"(regs.s3));
__asm__ volatile("sd s4, %0" : "=m"(regs.s4));
__asm__ volatile("sd s5, %0" : "=m"(regs.s5));
__asm__ volatile("sd s6, %0" : "=m"(regs.s6));
__asm__ volatile("sd s7, %0" : "=m"(regs.s7));
__asm__ volatile("sd s8, %0" : "=m"(regs.s8));
__asm__ volatile("sd s9, %0" : "=m"(regs.s9));
__asm__ volatile("sd s10, %0" : "=m"(regs.s10));
__asm__ volatile("sd s11, %0" : "=m"(regs.s11));
__asm__ volatile("sd t3, %0" : "=m"(regs.t3));
__asm__ volatile("sd t4, %0" : "=m"(regs.t4));
__asm__ volatile("sd t5, %0" : "=m"(regs.t5));
__asm__ volatile("sd t6, %0" : "=m"(regs.t6));
#elif defined(__x86_64__)
__asm__ volatile("mov %%rax, %0" : "=m"(regs.rax));
__asm__ volatile("mov %%rbx, %0" : "=m"(regs.rbx));
__asm__ volatile("mov %%rcx, %0" : "=m"(regs.rcx));
__asm__ volatile("mov %%rdx, %0" : "=m"(regs.rdx));
__asm__ volatile("mov %%rsi, %0" : "=m"(regs.rsi));
__asm__ volatile("mov %%rdi, %0" : "=m"(regs.rdi));
__asm__ volatile("mov %%rbp, %0" : "=m"(regs.rbp));
__asm__ volatile("mov %%rsp, %0" : "=m"(regs.rsp));
__asm__ volatile("mov %%r8, %0" : "=m"(regs.r8));
__asm__ volatile("mov %%r9, %0" : "=m"(regs.r9));
__asm__ volatile("mov %%r10, %0" : "=m"(regs.r10));
__asm__ volatile("mov %%r11, %0" : "=m"(regs.r11));
__asm__ volatile("mov %%r12, %0" : "=m"(regs.r12));
__asm__ volatile("mov %%r13, %0" : "=m"(regs.r13));
__asm__ volatile("mov %%r14, %0" : "=m"(regs.r14));
__asm__ volatile("mov %%r15, %0" : "=m"(regs.r15));
__asm__(
"pushf\n"
".cfi_adjust_cfa_offset 8\n"
"pop %0\n"
".cfi_adjust_cfa_offset -8\n"
: "=r"(regs.rflags));
// Proxy for fs.base since rdfsbase isn't always available.
__asm__("mov %%fs:0, %0" : "=r"(regs.fs_base));
regs.gs_base = 0; // Don't even try for gs.base.
#else
#error "what machine?"
#endif
return regs;
}
} // namespace
} // namespace LIBC_NAMESPACE_DECL
__EXPORT [[gnu::noinline]]
void __sanitizer_memory_snapshot(sanitizer_memory_snapshot_callback_t* globals,
sanitizer_memory_snapshot_callback_t* stacks,
sanitizer_memory_snapshot_callback_t* regs,
sanitizer_memory_snapshot_callback_t* tls,
void (*done)(zx_status_t, void*), void* arg) {
// The only real reason to capture the registers this early is for the
// test case that tries to use a register it hopes won't be touched.
// This is the first thing after the test sets that register, and the
// volatile on the asms should prevent hoisting down into the if below.
auto regdata = LIBC_NAMESPACE::CurrentThreadRegs();
LIBC_NAMESPACE::MemorySnapshot snapshot(done, arg);
snapshot.SuspendThreads();
if (snapshot.Ok() && globals) {
snapshot.ReportGlobals(globals);
}
if (snapshot.Ok() && (stacks || regs || tls)) {
// Use the boundary of this call frame itself as the stack bound, since it
// shouldn't contain any interesting pointers.
auto sp = reinterpret_cast<uintptr_t>(__builtin_frame_address(0));
snapshot.ReportTcb(__pthread_self(), sp, stacks, tls);
if (regs) {
// Report the register data.
regs(&regdata, sizeof(regdata), arg);
}
snapshot.ReportThreads(stacks, regs, tls);
}
}