sdk/lib/c/dlfcn/dl/tlsdesc-runtime-dynamic.h - fuchsia - Git at Google

 // Copyright 2025 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #ifndef LIB_DL_TLSDESC_RUNTIME_DYNAMIC_H_
 #define LIB_DL_TLSDESC_RUNTIME_DYNAMIC_H_

 #include <lib/elfldltl/layout.h>
 #include <lib/ld/tls.h>
 #include <lib/ld/tlsdesc.h>

 #include <algorithm>
 #include <cstddef>
 #include <memory>
 #include <type_traits>

 #include <fbl/alloc_checker.h>
 #include <fbl/array.h>

 namespace [[gnu::visibility("hidden")]] dl {

 // The argument to TLSDESC hooks is `const TlsDescGot&`.
 using TlsDescGot = elfldltl::Elf<>::TlsDescGot<>;

 // For dynamic TLS modules, each thread's copy of each dynamic PT_TLS segment
 // is found in by index into an array of pointers.  That array itself is found
 // as a normal thread_local variable dl::_dl_tlsdesc_runtime_dynamic_blocks
 // (owned by libdl) using a normal IE access.  These TLSDESC hooks take two
 // values: an index into that array, and an offset within that PT_TLS segment.
 // They compute `_dl_tlsdesc_runtime_dynamic_blocks[index] + offset - $tp`.
 //
 // The TLSDESC ABI provides for one address-sized word to encode the argument
 // to the TLSDESC hook (TlsDescGot::value).  There are two TLSDESC hooks that
 // encode those two values in different ways:
 //
 //  * The "split" version encodes both values directly in the word by splitting
 //    it in half bitwise.  The index is found in the high bits.  The offset is
 //    found in the low bits.  This version is used whenever each value fits
 //    into half the bits of the word.
 //
 //  * The "indirect" version uses the word as a pointer to an allocated data
 //    structure containing the index and offset (TlsdescIndirect).
 //
 // **NOTE:** There is no special provision for synchronization so far.  These
 // entry points assume that the current thread's blocks vector pointer is valid
 // for any index they can be passed.

 struct TlsdescIndirect {
   size_t index, offset;
 };

 // DynamicTlsPtr is a smart pointer type that's interoperable with assembly
 // code accessing it as if it were a plain pointer type in memory.
 class DynamicTlsPtr;

 // For interacting with assembly code, a raw pointer to the first element of a
 // contiguous array of DynamicTlsPtr is used.
 using RawDynamicTlsArray = DynamicTlsPtr*;

 // These are used or defined in assembly (see tlsdesc-runtime-dynamic.S), so
 // they need unmangled linkage names.  From C++, they're still namespaced.
 extern "C" {

 // The runtime hooks access `_dl_tlsdesc_runtime_dynamic_blocks[index]`.
 // _dl_tlsdesc_runtime_dynamic_blocks itself must stay constinit (preferably
 // zero so it goes into .tbss) and trivially destructible to prevent ordering
 // issues that C++ thread_local constructor/destructor semantics would have.
 extern constinit thread_local RawDynamicTlsArray _dl_tlsdesc_runtime_dynamic_blocks
     [[gnu::tls_model("initial-exec")]];

 // This hook splits the `value` field in half, with index in the high bits.
 extern ld::TlsdescCallback _dl_tlsdesc_runtime_dynamic_split;

 // This hook makes the `value` field a `const TlsdescIndirect*`.
 extern ld::TlsdescCallback _dl_tlsdesc_runtime_dynamic_indirect;

 }  // extern "C"

 // Given the thread pointer of any thread, including one just being allocated
 // and not actually started yet, access its _dl_tlsdesc_runtime_dynamic_blocks
 // variable as an lvalue reference.  Just accessing the thread_local variable
 // directly is the same as `TpToDynamicTlsBlocks(__builtin_thread_pointer())`.
 inline RawDynamicTlsArray& TpToDynamicTlsBlocks(void* tp) {
   // Since all TLS accesses are IE model, there is a fixed offset from every
   // thread pointer.  The compiler would compute that with a GOT load and add
   // that to the thread pointer to take the address, but it will see that then
   // being subtracted from the current thread pointer and optimize away the
   // whole thread pointer part, so this is just the trivial GOT load.
   RawDynamicTlsArray* const blocks = &_dl_tlsdesc_runtime_dynamic_blocks;
   return *ld::TpRelative<RawDynamicTlsArray>(ld::TpRelativeToOffset(blocks), tp);
 }

 // DynamicTlsPtr is the type of elements in _dl_tlsdesc_runtime_dynamic_blocks.
 // It's a standard-layout type that's nothing but a plain pointer, so that
 // assembly code can use it with a known precise memory layout.  Otherwise it
 // acts precisely like DynamicTlsPtr::UniquePtr.  (The only reason this type
 // needs to exist is to ensure assembly-compatible implementation internals;
 // std::unique_ptr doesn't formally guarantee that.)
 class DynamicTlsPtr {
  public:
   using TlsModule = ld::abi::Abi<>::TlsModule;

   // The std::unique_ptr to own a TLS block needs a custom deleter to use the
   // `operator delete[]` *function* directly, rather than the `delete[]`
   // *operator*.  This is to match the precise means of allocation, which has
   // to use the `operator new[]` function directly (not `new std::byte[n]`) so
   // as to use the overload that indicates (dynamic) alignment as well as size.
   // Since std::byte is both trivially-destructible and usable uninitialized,
   // there is no semantic difference between using the proper `new[]` and
   // `delete[]` operators (which in the general case ensure constructors and
   // destructors and formal C++ object lifetime rules) and using the underlying
   // allocator functions those operators call, which are called `operator
   // new[]` and `operator delete[]` to keep it confusing since they're neither
   // operators nor functions that take the same arguments as those operators.
   // But there is an important low-level difference, since the `new[]` and
   // `delete[]` operators implicitly use a hidden element count that's stored
   // as a size_t before the pointer (to allow `delete[]` to run the right
   // number of destructors); the underlying allocation includes space for this
   // hidden pointer, not just for the elements.  So it always matters to
   // manually pair the precise allocator and deallocator functions being used.
   //
   // Furthermore, which `operator delete[]` function signature is used to
   // deallocate a particular array should match which `operator new[]` function
   // signature was used to allocare it.  The compiler would generate the
   // `operator new[]` taking the dynamic alignment argument for `new T[n]` when
   // the static alignof(T) is > __STDCPP_DEFAULT_NEW_ALIGNMENT__; in that case,
   // its `delete[]` on T* would use the `operator delete[]` function that takes
   // the alignment (and it could choose or not to use the one that also takes
   // the size).  So this Deleter needs to recover the size and alignment of the
   // original allocation to call the correct `operator delete[]` signature.
   // The private BlockSizes helper class handles all this.
   struct Deleter {
     void operator()(std::byte* ptr) const {
       const TlsModule& module = BlockSizes::GetModule(ptr);
       BlockSizes{module}.Delete(ptr);
     }
   };
   using UniquePtr = std::unique_ptr<std::byte[], Deleter>;

   // Allocate a new, initialized block for the TlsModule.  This is the only way
   // a new pointer goes into a DynamicTlsPtr; otherwise only moves happen.
   // Hence, a DynamicTlsPtr always points to a block that already contains its
   // properly constinit-initialized values for some thread to start using (or,
   // later, that it is already using).
   [[nodiscard]] static DynamicTlsPtr New(fbl::AllocChecker& ac, const TlsModule& module) {
     const BlockSizes sizes{module};
     return sizes.New(ac, module);
   }

   constexpr DynamicTlsPtr() = default;
   DynamicTlsPtr(const DynamicTlsPtr&) = delete;

   constexpr DynamicTlsPtr(DynamicTlsPtr&& other) noexcept : ptr_{other.release()} {}

   DynamicTlsPtr& operator=(const DynamicTlsPtr&) = delete;
   DynamicTlsPtr& operator=(DynamicTlsPtr&& other) noexcept {
     reset();
     ptr_ = other.release();
     return *this;
   }

   void reset() { UniquePtr{release()}.reset(); }

   ~DynamicTlsPtr() { UniquePtr{ptr_}.reset(); }

   explicit constexpr operator bool() const { return ptr_; }

   // There are no get(), operator*(), or operator->() methods.  Once a TLS
   // block has been allocated, the only way to see a pointer inside it is to
   // acquire the valid span with knowledge of the TlsModule::tls_size() value
   // used to allocate this block.
   std::span<std::byte> contents(size_t tls_size) { return std::span{ptr_, tls_size}; }
   std::span<std::byte> contents(const TlsModule& module) { return contents(module.tls_size()); }

  private:
   // This helper class encapsulates all the arithmetic.  It's created by
   // extract size details from a TlsModule.  It then has enough information to
   // allocate (and initialize) or deallocate that module's TLS blocks.
   //
   // Each TLS block is allocated with extra space (before the returned pointer)
   // for its bookkeeping.  This is how the compiler's `new T[n]` and `delete[]`
   // usually work: storing the element count at `((size_t*)ptr)[-1]` by
   // allocating a slightly larger block from the underlying allocator, and
   // actually returning a pointer just inside that block.  This does the same,
   // with the same overhead: one word plus alignment padding.  But it instead
   // stores the TlsModule pointer whence both the size and the alignment of the
   // block allocated can be recomputed.  These blocks must be cleared out of
   // every thread and freed before the module can be unloaded and its TlsModule
   // pointer made invalid.
   class BlockSizes {
    public:
     using ModulePtr = const TlsModule*;

     BlockSizes() = delete;
     BlockSizes(const BlockSizes&) = default;

     // Compute sizes to allocate for this TlsModule.  Every block must be
     // aligned well enough to store the TlsModule pointer, in case that's more
     // than the requested alignment.  Its size must leave space for that
     // pointer to be before the aligned block of the requested size, so it
     // needs as much extra space as the total alignment to store the pointer.
     explicit BlockSizes(const TlsModule& module)
         : align_(std::max(sizeof(ModulePtr), module.tls_alignment())),
           size_{module.tls_size() + align_} {}

     // Do the actual allocation and initialization.  The module must be the
     // same one used in the constructor.  The returned pointer can be passed to
     // GetModule and Delete.
     DynamicTlsPtr New(fbl::AllocChecker& ac, const TlsModule& module) const {
       assert(module.tls_alignment() <= align_);
       assert(module.tls_size() <= size_ - align_);
       void* ptr = operator new[](size_, std::align_val_t{align_}, ac);
       if (!ptr) [[unlikely]] {
         return {};
       }
       DynamicTlsPtr block;
       block.ptr_ = static_cast<std::byte*>(ptr) + align_;
       ModulePointer(block.ptr_) = &module;
       ld::TlsModuleInit(module, {block.ptr_, module.tls_size()});
       return block;
     }

     // Recover the module pointer saved by New() from the DynamicTlsPtr::ptr_.
     static const TlsModule& GetModule(std::byte* ptr) {
       assert(ptr);
       return *ModulePointer(ptr);
     }

     // Given the DynamicTlsPtr::ptr_ value, recover the original pointer from
     // operator new[] and pass that to operator delete[].  The sizes recovered
     // via the saved TlsModule pointer match the operator new[] call exactly.
     void Delete(std::byte* ptr) const {
       operator delete[](ptr - align_, size_, std::align_val_t{align_});
     }

    private:
     static ModulePtr& ModulePointer(std::byte* ptr) {
       return reinterpret_cast<ModulePtr*>(ptr)[-1];
     }

     size_t align_, size_;
   };

   std::byte* release() { return std::exchange(ptr_, nullptr); }

   std::byte* ptr_ = nullptr;
 };
 static_assert(!std::is_copy_constructible_v<DynamicTlsPtr>);
 static_assert(!std::is_copy_assignable_v<DynamicTlsPtr>);
 static_assert(std::is_nothrow_move_constructible_v<DynamicTlsPtr>);
 static_assert(std::is_move_assignable_v<DynamicTlsPtr>);
 static_assert(std::is_standard_layout_v<DynamicTlsPtr>);
 static_assert(sizeof(DynamicTlsPtr) == sizeof(std::byte*));

 // An array to be installed in some thread's _dl_tlsdesc_runtime_dynamic_blocks
 // should start as a managed pointer until its elements are all fully
 // initialized with DynamicTlsPtr::New.
 using SizedDynamicTlsArray = fbl::Array<DynamicTlsPtr>;
 [[nodiscard]] inline SizedDynamicTlsArray MakeDynamicTlsArray(fbl::AllocChecker& ac, size_t n) {
   return fbl::MakeArray<DynamicTlsPtr>(&ac, n);
 }

 // When it's ready to be installed in a thread, it loses track of its size.
 // (That is, the size is no longer accessible to us; however, delete[] will
 // find the hidden size so it can run each element's destructor.)  This should
 // be the only way to modify _dl_tlsdesc_runtime_dynamic_blocks for any thread.
 // It returns an owned, but unsized, pointer to the previous array.  With the
 // thread pointer of any live thread, TLSDESC callbacks can still be accessing
 // the old array itself and/or any of the blocks it points to.  So the returned
 // old array should only be destroyed in cases where it's well-understood to be
 // safe.  Note that even moving-from (i.e. clearing) any of the old array's
 // elements could let any racing thread to see a null pointer, even if the
 // array itself is kept accessible.  So great care should be taken in deciding
 // when to destroy this old array and how.  The straightforward case of just
 // letting the returned array destroy its elements is correct for thread
 // teardown (or unwinding an abortive thread creation).  At thread setup, it's
 // reasonable to use this and just assert the returned pointer is null.  There
 // should be no other uses of changing the installed pointer for a thread
 // (aside from simulated thread setup and teardown in tests) not governed by a
 // set of synchronization constraints around dangling pointer accesses.
 using UnsizedDynamicTlsArray = std::unique_ptr<DynamicTlsPtr[]>;
 [[nodiscard]] inline UnsizedDynamicTlsArray ExchangeRuntimeDynamicBlocks(  //
     SizedDynamicTlsArray blocks, void* tp = __builtin_thread_pointer()) {
   return UnsizedDynamicTlsArray{
       std::exchange(TpToDynamicTlsBlocks(tp), blocks.release()),
   };
 }

 // In testing cases, when an old array is recovered and its size is known, turn
 // it into a SizedDynamicTlsArray again.
 [[nodiscard]] inline SizedDynamicTlsArray AdoptDynamicTlsArray(  //
     UnsizedDynamicTlsArray blocks, size_t n) {
   return SizedDynamicTlsArray{blocks.release(), n};
 }

 // In testing cases, an old array of known size can be expanded by moving its
 // existing blocks without deleting them but then deleting the old array
 // itself.  This is only safe when it's known that no thread could be reading
 // the old array (for example, it's the current thread's own array) and then
 // it's fine if the thread does continue accessing the TLS blocks themselves
 // either through previously-acquired pointers or through the new array that
 // now owns those TLS blocks.  The old array is passed by lvalue reference so
 // it can be left untouched if the allocation of the new array fails.
 [[nodiscard]] inline SizedDynamicTlsArray EnlargeDynamicTlsArray(  //
     fbl::AllocChecker& ac, SizedDynamicTlsArray& old_array, size_t n) {
   assert(n > old_array.size());
   SizedDynamicTlsArray new_array = MakeDynamicTlsArray(ac, n);
   if (new_array) [[likely]] {
     std::ranges::move(old_array, new_array.begin());
     old_array.reset();
   }
   return new_array;
 }

 }  // namespace dl

 #endif  // LIB_DL_TLSDESC_RUNTIME_DYNAMIC_H_
	// Copyright 2025 The Fuchsia Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#ifndef LIB_DL_TLSDESC_RUNTIME_DYNAMIC_H_
	#define LIB_DL_TLSDESC_RUNTIME_DYNAMIC_H_

	#include <lib/elfldltl/layout.h>
	#include <lib/ld/tls.h>
	#include <lib/ld/tlsdesc.h>

	#include <algorithm>
	#include <cstddef>
	#include <memory>
	#include <type_traits>

	#include <fbl/alloc_checker.h>
	#include <fbl/array.h>

	namespace [[gnu::visibility("hidden")]] dl {

	// The argument to TLSDESC hooks is `const TlsDescGot&`.
	using TlsDescGot = elfldltl::Elf<>::TlsDescGot<>;

	// For dynamic TLS modules, each thread's copy of each dynamic PT_TLS segment
	// is found in by index into an array of pointers. That array itself is found
	// as a normal thread_local variable dl::_dl_tlsdesc_runtime_dynamic_blocks
	// (owned by libdl) using a normal IE access. These TLSDESC hooks take two
	// values: an index into that array, and an offset within that PT_TLS segment.
	// They compute `_dl_tlsdesc_runtime_dynamic_blocks[index] + offset - $tp`.
	//
	// The TLSDESC ABI provides for one address-sized word to encode the argument
	// to the TLSDESC hook (TlsDescGot::value). There are two TLSDESC hooks that
	// encode those two values in different ways:
	//
	// * The "split" version encodes both values directly in the word by splitting
	// it in half bitwise. The index is found in the high bits. The offset is
	// found in the low bits. This version is used whenever each value fits
	// into half the bits of the word.
	//
	// * The "indirect" version uses the word as a pointer to an allocated data
	// structure containing the index and offset (TlsdescIndirect).
	//
	// NOTE: There is no special provision for synchronization so far. These
	// entry points assume that the current thread's blocks vector pointer is valid
	// for any index they can be passed.

	struct TlsdescIndirect {
	size_t index, offset;
	};

	// DynamicTlsPtr is a smart pointer type that's interoperable with assembly
	// code accessing it as if it were a plain pointer type in memory.
	class DynamicTlsPtr;

	// For interacting with assembly code, a raw pointer to the first element of a
	// contiguous array of DynamicTlsPtr is used.
	using RawDynamicTlsArray = DynamicTlsPtr*;

	// These are used or defined in assembly (see tlsdesc-runtime-dynamic.S), so
	// they need unmangled linkage names. From C++, they're still namespaced.
	extern "C" {

	// The runtime hooks access `_dl_tlsdesc_runtime_dynamic_blocks[index]`.
	// _dl_tlsdesc_runtime_dynamic_blocks itself must stay constinit (preferably
	// zero so it goes into .tbss) and trivially destructible to prevent ordering
	// issues that C++ thread_local constructor/destructor semantics would have.
	extern constinit thread_local RawDynamicTlsArray _dl_tlsdesc_runtime_dynamic_blocks
	[[gnu::tls_model("initial-exec")]];

	// This hook splits the `value` field in half, with index in the high bits.
	extern ld::TlsdescCallback _dl_tlsdesc_runtime_dynamic_split;

	// This hook makes the `value` field a `const TlsdescIndirect*`.
	extern ld::TlsdescCallback _dl_tlsdesc_runtime_dynamic_indirect;

	} // extern "C"

	// Given the thread pointer of any thread, including one just being allocated
	// and not actually started yet, access its _dl_tlsdesc_runtime_dynamic_blocks
	// variable as an lvalue reference. Just accessing the thread_local variable
	// directly is the same as `TpToDynamicTlsBlocks(__builtin_thread_pointer())`.
	inline RawDynamicTlsArray& TpToDynamicTlsBlocks(void* tp) {
	// Since all TLS accesses are IE model, there is a fixed offset from every
	// thread pointer. The compiler would compute that with a GOT load and add
	// that to the thread pointer to take the address, but it will see that then
	// being subtracted from the current thread pointer and optimize away the
	// whole thread pointer part, so this is just the trivial GOT load.
	RawDynamicTlsArray* const blocks = &_dl_tlsdesc_runtime_dynamic_blocks;
	return *ld::TpRelative<RawDynamicTlsArray>(ld::TpRelativeToOffset(blocks), tp);
	}

	// DynamicTlsPtr is the type of elements in _dl_tlsdesc_runtime_dynamic_blocks.
	// It's a standard-layout type that's nothing but a plain pointer, so that
	// assembly code can use it with a known precise memory layout. Otherwise it
	// acts precisely like DynamicTlsPtr::UniquePtr. (The only reason this type
	// needs to exist is to ensure assembly-compatible implementation internals;
	// std::unique_ptr doesn't formally guarantee that.)
	class DynamicTlsPtr {
	public:
	using TlsModule = ld::abi::Abi<>::TlsModule;

	// The std::unique_ptr to own a TLS block needs a custom deleter to use the
	// `operator delete[]` function directly, rather than the `delete[]`
	// operator. This is to match the precise means of allocation, which has
	// to use the `operator new[]` function directly (not `new std::byte[n]`) so
	// as to use the overload that indicates (dynamic) alignment as well as size.
	// Since std::byte is both trivially-destructible and usable uninitialized,
	// there is no semantic difference between using the proper `new[]` and
	// `delete[]` operators (which in the general case ensure constructors and
	// destructors and formal C++ object lifetime rules) and using the underlying
	// allocator functions those operators call, which are called `operator
	// new[]` and `operator delete[]` to keep it confusing since they're neither
	// operators nor functions that take the same arguments as those operators.
	// But there is an important low-level difference, since the `new[]` and
	// `delete[]` operators implicitly use a hidden element count that's stored
	// as a size_t before the pointer (to allow `delete[]` to run the right
	// number of destructors); the underlying allocation includes space for this
	// hidden pointer, not just for the elements. So it always matters to
	// manually pair the precise allocator and deallocator functions being used.
	//
	// Furthermore, which `operator delete[]` function signature is used to
	// deallocate a particular array should match which `operator new[]` function
	// signature was used to allocare it. The compiler would generate the
	// `operator new[]` taking the dynamic alignment argument for `new T[n]` when
	// the static alignof(T) is > __STDCPP_DEFAULT_NEW_ALIGNMENT__; in that case,
	// its `delete[]` on T* would use the `operator delete[]` function that takes
	// the alignment (and it could choose or not to use the one that also takes
	// the size). So this Deleter needs to recover the size and alignment of the
	// original allocation to call the correct `operator delete[]` signature.
	// The private BlockSizes helper class handles all this.
	struct Deleter {
	void operator()(std::byte* ptr) const {
	const TlsModule& module = BlockSizes::GetModule(ptr);
	BlockSizes{module}.Delete(ptr);
	}
	};
	using UniquePtr = std::unique_ptr<std::byte[], Deleter>;

	// Allocate a new, initialized block for the TlsModule. This is the only way
	// a new pointer goes into a DynamicTlsPtr; otherwise only moves happen.
	// Hence, a DynamicTlsPtr always points to a block that already contains its
	// properly constinit-initialized values for some thread to start using (or,
	// later, that it is already using).
	[[nodiscard]] static DynamicTlsPtr New(fbl::AllocChecker& ac, const TlsModule& module) {
	const BlockSizes sizes{module};
	return sizes.New(ac, module);
	}

	constexpr DynamicTlsPtr() = default;
	DynamicTlsPtr(const DynamicTlsPtr&) = delete;

	constexpr DynamicTlsPtr(DynamicTlsPtr&& other) noexcept : ptr_{other.release()} {}

	DynamicTlsPtr& operator=(const DynamicTlsPtr&) = delete;
	DynamicTlsPtr& operator=(DynamicTlsPtr&& other) noexcept {
	reset();
	ptr_ = other.release();
	return *this;
	}

	void reset() { UniquePtr{release()}.reset(); }

	~DynamicTlsPtr() { UniquePtr{ptr_}.reset(); }

	explicit constexpr operator bool() const { return ptr_; }

	// There are no get(), operator*(), or operator->() methods. Once a TLS
	// block has been allocated, the only way to see a pointer inside it is to
	// acquire the valid span with knowledge of the TlsModule::tls_size() value
	// used to allocate this block.
	std::span<std::byte> contents(size_t tls_size) { return std::span{ptr_, tls_size}; }
	std::span<std::byte> contents(const TlsModule& module) { return contents(module.tls_size()); }

	private:
	// This helper class encapsulates all the arithmetic. It's created by
	// extract size details from a TlsModule. It then has enough information to
	// allocate (and initialize) or deallocate that module's TLS blocks.
	//
	// Each TLS block is allocated with extra space (before the returned pointer)
	// for its bookkeeping. This is how the compiler's `new T[n]` and `delete[]`
	// usually work: storing the element count at `((size_t*)ptr)[-1]` by
	// allocating a slightly larger block from the underlying allocator, and
	// actually returning a pointer just inside that block. This does the same,
	// with the same overhead: one word plus alignment padding. But it instead
	// stores the TlsModule pointer whence both the size and the alignment of the
	// block allocated can be recomputed. These blocks must be cleared out of
	// every thread and freed before the module can be unloaded and its TlsModule
	// pointer made invalid.
	class BlockSizes {
	public:
	using ModulePtr = const TlsModule*;

	BlockSizes() = delete;
	BlockSizes(const BlockSizes&) = default;

	// Compute sizes to allocate for this TlsModule. Every block must be
	// aligned well enough to store the TlsModule pointer, in case that's more
	// than the requested alignment. Its size must leave space for that
	// pointer to be before the aligned block of the requested size, so it
	// needs as much extra space as the total alignment to store the pointer.
	explicit BlockSizes(const TlsModule& module)
	: align_(std::max(sizeof(ModulePtr), module.tls_alignment())),
	size_{module.tls_size() + align_} {}

	// Do the actual allocation and initialization. The module must be the
	// same one used in the constructor. The returned pointer can be passed to
	// GetModule and Delete.
	DynamicTlsPtr New(fbl::AllocChecker& ac, const TlsModule& module) const {
	assert(module.tls_alignment() <= align_);
	assert(module.tls_size() <= size_ - align_);
	void* ptr = operator new[](size_, std::align_val_t{align_}, ac);
	if (!ptr) [[unlikely]] {
	return {};
	}
	DynamicTlsPtr block;
	block.ptr_ = static_cast<std::byte*>(ptr) + align_;
	ModulePointer(block.ptr_) = &module;
	ld::TlsModuleInit(module, {block.ptr_, module.tls_size()});
	return block;
	}

	// Recover the module pointer saved by New() from the DynamicTlsPtr::ptr_.
	static const TlsModule& GetModule(std::byte* ptr) {
	assert(ptr);
	return *ModulePointer(ptr);
	}

	// Given the DynamicTlsPtr::ptr_ value, recover the original pointer from
	// operator new[] and pass that to operator delete[]. The sizes recovered
	// via the saved TlsModule pointer match the operator new[] call exactly.
	void Delete(std::byte* ptr) const {
	operator delete[](ptr - align_, size_, std::align_val_t{align_});
	}

	private:
	static ModulePtr& ModulePointer(std::byte* ptr) {
	return reinterpret_cast<ModulePtr*>(ptr)[-1];
	}

	size_t align_, size_;
	};

	std::byte* release() { return std::exchange(ptr_, nullptr); }

	std::byte* ptr_ = nullptr;
	};
	static_assert(!std::is_copy_constructible_v<DynamicTlsPtr>);
	static_assert(!std::is_copy_assignable_v<DynamicTlsPtr>);
	static_assert(std::is_nothrow_move_constructible_v<DynamicTlsPtr>);
	static_assert(std::is_move_assignable_v<DynamicTlsPtr>);
	static_assert(std::is_standard_layout_v<DynamicTlsPtr>);
	static_assert(sizeof(DynamicTlsPtr) == sizeof(std::byte*));

	// An array to be installed in some thread's _dl_tlsdesc_runtime_dynamic_blocks
	// should start as a managed pointer until its elements are all fully
	// initialized with DynamicTlsPtr::New.
	using SizedDynamicTlsArray = fbl::Array<DynamicTlsPtr>;
	[[nodiscard]] inline SizedDynamicTlsArray MakeDynamicTlsArray(fbl::AllocChecker& ac, size_t n) {
	return fbl::MakeArray<DynamicTlsPtr>(&ac, n);
	}

	// When it's ready to be installed in a thread, it loses track of its size.
	// (That is, the size is no longer accessible to us; however, delete[] will
	// find the hidden size so it can run each element's destructor.) This should
	// be the only way to modify _dl_tlsdesc_runtime_dynamic_blocks for any thread.
	// It returns an owned, but unsized, pointer to the previous array. With the
	// thread pointer of any live thread, TLSDESC callbacks can still be accessing
	// the old array itself and/or any of the blocks it points to. So the returned
	// old array should only be destroyed in cases where it's well-understood to be
	// safe. Note that even moving-from (i.e. clearing) any of the old array's
	// elements could let any racing thread to see a null pointer, even if the
	// array itself is kept accessible. So great care should be taken in deciding
	// when to destroy this old array and how. The straightforward case of just
	// letting the returned array destroy its elements is correct for thread
	// teardown (or unwinding an abortive thread creation). At thread setup, it's
	// reasonable to use this and just assert the returned pointer is null. There
	// should be no other uses of changing the installed pointer for a thread
	// (aside from simulated thread setup and teardown in tests) not governed by a
	// set of synchronization constraints around dangling pointer accesses.
	using UnsizedDynamicTlsArray = std::unique_ptr<DynamicTlsPtr[]>;
	[[nodiscard]] inline UnsizedDynamicTlsArray ExchangeRuntimeDynamicBlocks( //
	SizedDynamicTlsArray blocks, void* tp = __builtin_thread_pointer()) {
	return UnsizedDynamicTlsArray{
	std::exchange(TpToDynamicTlsBlocks(tp), blocks.release()),
	};
	}

	// In testing cases, when an old array is recovered and its size is known, turn
	// it into a SizedDynamicTlsArray again.
	[[nodiscard]] inline SizedDynamicTlsArray AdoptDynamicTlsArray( //
	UnsizedDynamicTlsArray blocks, size_t n) {
	return SizedDynamicTlsArray{blocks.release(), n};
	}

	// In testing cases, an old array of known size can be expanded by moving its
	// existing blocks without deleting them but then deleting the old array
	// itself. This is only safe when it's known that no thread could be reading
	// the old array (for example, it's the current thread's own array) and then
	// it's fine if the thread does continue accessing the TLS blocks themselves
	// either through previously-acquired pointers or through the new array that
	// now owns those TLS blocks. The old array is passed by lvalue reference so
	// it can be left untouched if the allocation of the new array fails.
	[[nodiscard]] inline SizedDynamicTlsArray EnlargeDynamicTlsArray( //
	fbl::AllocChecker& ac, SizedDynamicTlsArray& old_array, size_t n) {
	assert(n > old_array.size());
	SizedDynamicTlsArray new_array = MakeDynamicTlsArray(ac, n);
	if (new_array) [[likely]] {
	std::ranges::move(old_array, new_array.begin());
	old_array.reset();
	}
	return new_array;
	}

	} // namespace dl

	#endif // LIB_DL_TLSDESC_RUNTIME_DYNAMIC_H_