[ld] Dynamic linker passive ABI

TODO

Change-Id: I098f021716cc28c77a501667db266ae7f84c0581
diff --git a/sdk/lib/ld/include/lib/ld/abi.h b/sdk/lib/ld/include/lib/ld/abi.h
index d2acf32..9847f7c 100644
--- a/sdk/lib/ld/include/lib/ld/abi.h
+++ b/sdk/lib/ld/include/lib/ld/abi.h
@@ -28,11 +28,15 @@
 #include <lib/elfldltl/soname.h>
 #include <lib/elfldltl/svr4-abi.h>
 #include <lib/elfldltl/symbol.h>
+#include <lib/elfldltl/tls-layout.h>
 
 #include <string_view>
 
 namespace ld::abi {
 
+// Forward declaration for tls.h.
+struct TlsGetAddrGot;
+
 template <class Elf = elfldltl::Elf<>, class AbiTraits = elfldltl::LocalAbiTraits>
 struct Abi {
   // Aliases to avoid using `typename` all over the place.
@@ -55,8 +59,9 @@
   template <template <class, class> class Class>
   using Type = Class<Elf, AbiTraits>;
 
-  // Forward declaration for type declared in module.h.
+  // Forward declarations for types declared in module.h and tls.h.
   struct Module;
+  struct TlsModule;
 
   // This lists all the initial-exec modules.  Embedded `link_map::l_prev` and
   // `link_map::l_next` form a doubly-linked list in load order, which is a
@@ -65,7 +70,33 @@
   // (except for any redundancies).
   Ptr<const Module> loaded_modules;
 
-  // TODO(fxbug.dev/128502): TLS layout details
+  // This gives the required size and alignment of the overall static TLS area.
+  // The alignment matches the max of static_tls_modules[...].tls_alignment and
+  // the psABI-specified minimum alignment.
+  elfldltl::TlsLayout<Elf> static_tls_layout;
+
+  // TLS details for initial-exec modules that have PT_TLS segments.  The entry
+  // at index `.tls_mod_id - 1` describes that module's PT_TLS.  A module with
+  // `.tls_mod_id == 0` has no PT_TLS segment.  TLS module ID numbers above
+  // static_tls_modules.size() are not used at startup but may be assigned to
+  // dynamically-loaded modules later.
+  Span<const TlsModule> static_tls_modules;
+
+  // Offset from the thread pointer to each module's segment in the static TLS
+  // block.  The entry at index `.tls_mod_id - 1` is the offset of that
+  // module's PT_TLS segment.
+  //
+  // This offset is actually a negative number on some machines like x86, but
+  // it's always calculated using address-sized unsigned arithmetic.  On some
+  // machines where it's non-negative, there is a nonempty psABI-specified
+  // reserved region right after the thread pointer, so a real offset is never
+  // zero; other machines like RISC-V do start the first module at offset zero.
+  // The ordering of offsets after the first is theoretically arbitrary but is
+  // in fact ascending.  When the main executable has a PT_TLS it must have
+  // `.tls_mod_id` of 1 and it must have the smallest offset since this is
+  // statically calculated by the linker for Local-Exec model accesses based
+  // only on the psABI's fixed offset and the PT_TLS alignment requirement.
+  Span<const Addr> static_tls_offsets;
 };
 
 // This is the DT_SONAME value representing the ABI declared in this file.
@@ -95,6 +126,30 @@
 // and exporting it makes sure they can even without debugging symbols.
 extern const Abi<>::RDebug _r_debug;
 
+// This is the function the dynamic linker uses in the PC slot of GOT entries
+// for TLSDESC references in Initial-Exec modules.  It expects the `got[1]`
+// slot to contain an exact offset (treated as signed) from the thread pointer,
+// which it simply returns.
+//
+// The initial-exec dynamic linker exports this symbol so that the libdl
+// runtime dynamic linker can use it for TLSDESC references resolved to the
+// initial-exec set (or for TLSDESC references resolved to any module whose
+// PT_TLS was placed into the static TLS area at load time).
+//
+// This symbol is not expected to be used as a normal call target in any code,
+// only for putting its address into the GOT slot for a TLSDESC reference.
+//
+// On all machines, this function does takes a pointer into the GOT as an
+// argument and it does return a signed offset from the thread pointer.
+// However, the actual register calling convention is completely bespoke for
+// TLSDESC on each machine and doesn't necessarily use the first normal
+// argument register for its argument, nor the normal return value register for
+// its return value; it usually is not permitted to clobber other registers
+// that are clobbered by a normal call.  This function is always written in
+// assembly and the only calls to it are those specially generated by the
+// compiler to call it via a GOT slot produced by TLSDESC relocs.
+uintptr_t _ld_tlsdesc_runtime_static(const uintptr_t* got);
+
 }  // extern "C"
 
 }  // namespace ld::abi
diff --git a/sdk/lib/ld/include/lib/ld/dl-phdr-info.h b/sdk/lib/ld/include/lib/ld/dl-phdr-info.h
new file mode 100644
index 0000000..7e9759e
--- /dev/null
+++ b/sdk/lib/ld/include/lib/ld/dl-phdr-info.h
@@ -0,0 +1,32 @@
+// Copyright 2023 The Fuchsia Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef LIB_LD_DL_PHDR_INFO_H_
+#define LIB_LD_DL_PHDR_INFO_H_
+
+#include <link.h>
+
+#include "module.h"
+
+namespace ld {
+
+template <class Elf, class AbiTraits>
+constexpr dl_phdr_info MakeDlPhdrInfo(const abi::Abi<Elf, AbiTraits>& abi,
+                                      const typename abi::Abi<Elf, AbiTraits>::Module& module,
+                                      void* tls_data, uint64_t adds = 0, uint64_t subs = 0) {
+  return {
+      .dlpi_addr = module.link_map.addr,
+      .dlpi_name = module.link_map.name,
+      .dlpi_phdr = module.phdrs.data(),
+      .dlpi_phnum = static_cast<uint16_t>(module.phdrs.size()),
+      .dlpi_adds = adds,
+      .dlpi_subs = subs,
+      .dlpi_tls_modid = module.tls_modid,
+      .dlpi_tls_data = tls_data,
+  };
+}
+
+}  // namespace ld
+
+#endif  // LIB_LD_DL_PHDR_INFO_H_
diff --git a/sdk/lib/ld/include/lib/ld/init-fini.h b/sdk/lib/ld/include/lib/ld/init-fini.h
new file mode 100644
index 0000000..cc23547
--- /dev/null
+++ b/sdk/lib/ld/include/lib/ld/init-fini.h
@@ -0,0 +1,53 @@
+// Copyright 2023 The Fuchsia Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef LIB_LD_INIT_FINI_H_
+#define LIB_LD_INIT_FINI_H_
+
+#include <lib/elfldltl/init-fini.h>
+#include <lib/elfldltl/link-map-list.h>
+#include <lib/elfldltl/memory.h>
+#include <lib/ld/abi.h>
+#include <lib/ld/module.h>
+
+#include <algorithm>
+#include <climits>
+#include <cstdint>
+
+namespace ld {
+
+using AbiModuleList =
+    elfldltl::LinkMapList<elfldltl::DirectMemory, elfldltl::Elf<>, elfldltl::LocalAbiTraits,
+                          abi::Abi<>::Module, &abi::Abi<>::Module::link_map>;
+
+inline elfldltl::DirectMemory gLocalMemory{
+    {reinterpret_cast<std::byte*>(0), SIZE_MAX},
+    0,
+};
+
+inline AbiModuleList AbiModules(const abi::Abi<>& abi = abi::_ld_abi) {
+  return AbiModuleList{gLocalMemory, abi.loaded_modules.address()};
+}
+
+inline void InitModule(const abi::Abi<>::Module& module) {
+  module.init.CallInit(module.link_map.addr);
+}
+
+inline void FiniModule(const abi::Abi<>::Module& module) {
+  module.fini.CallFini(module.link_map.addr);
+}
+
+inline void InitAbiModules() {
+  AbiModuleList modules = AbiModules();
+  std::for_each(modules.begin(), modules.end(), InitModule);
+}
+
+inline void FiniAbiModules() {
+  AbiModuleList modules = AbiModules();
+  std::for_each(modules.rbegin(), modules.rend(), FiniModule);
+}
+
+}  // namespace ld
+
+#endif  // LIB_LD_INIT_FINI_H_
diff --git a/sdk/lib/ld/include/lib/ld/load-module.h b/sdk/lib/ld/include/lib/ld/load-module.h
index 3fa8c17..2b70fda 100644
--- a/sdk/lib/ld/include/lib/ld/load-module.h
+++ b/sdk/lib/ld/include/lib/ld/load-module.h
@@ -144,6 +144,10 @@
   constexpr size_type load_bias() const { return module().link_map.addr; }
 
   // TODO(fxbug.dev/128502): tls methods
+  constexpr bool uses_static_tls() const {
+    ZX_PANIC("Should never be called");
+    return false;
+  }
   constexpr size_t static_tls_bias() const {
     ZX_PANIC("Should never be called");
     return 0;
diff --git a/sdk/lib/ld/include/lib/ld/module.h b/sdk/lib/ld/include/lib/ld/module.h
index 8fc5556..204ee14 100644
--- a/sdk/lib/ld/include/lib/ld/module.h
+++ b/sdk/lib/ld/include/lib/ld/module.h
@@ -102,7 +102,11 @@
   // program exit or when it's dynamically unloaded (if that's possible).
   Type<elfldltl::InitFiniInfo> fini;
 
-  // TODO(fxbug.dev/128502): TLS module ID
+  // Each module that has a PT_TLS segment of its own is assigned a module ID,
+  // which is a nonzero index.  This value is zero if the module has no PT_TLS.
+  // Note that a module's code might use TLS relocations (resolved to external
+  // symbols) even if that module has no PT_TLS segment of its own.
+  Addr tls_modid = 0;
 
   // Each and every module gets a "module ID" number that's used in symbolizer
   // markup contextual elements describing the module.  These are expected to
diff --git a/sdk/lib/ld/include/lib/ld/tls.h b/sdk/lib/ld/include/lib/ld/tls.h
new file mode 100644
index 0000000..f067734
--- /dev/null
+++ b/sdk/lib/ld/include/lib/ld/tls.h
@@ -0,0 +1,172 @@
+// Copyright 2023 The Fuchsia Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef LIB_LD_TLS_H_
+#define LIB_LD_TLS_H_
+
+#include <lib/stdcompat/bit.h>
+
+#include "abi.h"
+
+namespace ld {
+namespace abi {
+
+// This describes the details gleaned from the PT_TLS header for a module.
+// These are stored in an array indexed by TLS module ID number - 1, as the
+// module ID number zero is never used.
+//
+// Note that while module ID number 1 is most often the main executable, that
+// need not always be so: if the main executable has no PT_TLS of its own, then
+// the earliest module loaded that does have a PT_TLS gets module ID 1.
+//
+// What is importantly special about the main executable is that offsets in the
+// static TLS block are chosen with the main executable first--it may have been
+// linked with LE/GE TLS access code where the linker chose its expected
+// offsets at static link time.  When the dynamic linker follows the usual
+// procedure of assigning module IDs in load order and then doing static TLS
+// layout in the same order, it always comes out the same.  But the only real
+// constraint on the runtime layout chosen is that if the main executable has a
+// PT_TLS segment, it must be first and its offset from the thread pointer must
+// be the fixed value prescribed by the psABI.  The adjacent private portions
+// of the runtime thread descriptor must be located such that both their own
+// alignment requirements and the p_align of module 1's PT_TLS are respected.
+
+template <class Elf, class AbiTraits>
+struct Abi<Elf, AbiTraits>::TlsModule {
+  constexpr size_type tls_size() const { return tls_initial_data.size() + tls_bss_size; }
+
+  // Initial data image in memory, usually a pointer into the RODATA or RELRO
+  // segment of the module's load image.
+  Span<const std::byte> tls_initial_data;
+
+  // If the module has a PT_TLS, its total size in memory (for each thread) is
+  // determined by the initial data (tls_initial_data.size_bytes(), from .tdata
+  // et al) plus this size of zero-initialized bytes (from .tbss et al).
+  Addr tls_bss_size = 0;
+
+  // The runtime memory for each thread's copy of the initialized PT_TLS data
+  // for this segment must have at least this minimum alignment (p_align).
+  // This is validated to be a power of two before the module is loaded.
+  Addr tls_alignment = 0;
+};
+
+// When the compiler generates a call to __tls_get_addr, the linker generates
+// two corresponding dynamic relocation entries applying to adjacent GOT slots
+// that form a pair describing what module and symbol resolved the reference
+// at dynamic link time.  The first slot holds the module ID, a 1-origin
+// index.  The second slot holds the offset from that module's PT_TLS segment.
+struct TlsGetAddrGot {
+  uintptr_t tls_mod_id;  // R_*_DTPMOD* et al relocations set this.
+  uintptr_t offset;      // R_*_DTPOFF* et al relocations set this.
+};
+
+// This is the symbol that compilers generate calls to for GD/LD TLS accesses
+// in the original ABI (without TLSDESC).  Its linkage name is known to the
+// compiler and the linker.  This is not actually implemented by ld.so, but
+// must be supplied by something in the dependency graph of a program that
+// uses old-style TLS.  The implementation in libc or libdl or suchlike can
+// use the `_ld_abi.static_tls_offsets` data to handle TLS module IDs in the
+// initial-exec set (see <lib/dl/tls.h>).
+extern "C" void* __tls_get_addr(const TlsGetAddrGot& got);
+
+// The standard symbol name with hash value cached statically.
+inline constexpr elfldltl::SymbolName kTlsGetAddrSymbol{"__tls_get_addr"};
+
+}  // namespace abi
+
+// Interrogate the passive ABI (e.g. ld::abi::_ld_abi) for the thread-pointer
+// offset of each thread's static TLS data area for the given TLS module ID
+// among the initial-exec set of TLS modules.
+template <class Elf, class AbiTraits>
+constexpr ptrdiff_t TlsInitialExecOffset(const typename abi::Abi<Elf, AbiTraits>& abi,
+                                         typename Elf::size_type modid) {
+  // The offset is stored as unsigned, but is actually signed.
+  const size_t offset = abi.static_tls_offsets[modid - 1];
+  return cpp20::bit_cast<ptrdiff_t>(offset);
+}
+
+// Populate a static TLS segment for the given module in one thread.  The size
+// of the segment must match .tls_size().
+template <class Module>
+constexpr void TlsModuleInit(const Module& module, cpp20::span<std::byte> segment,
+                             bool known_zero = false) {
+  cpp20::span<std::byte> initial_data = module.tls_initial_data;
+  if (!initial_data.empty()) {
+    memcpy(segment.data(), initial_data.data(), initial_data.size());
+  }
+  if (module.tls_bss_size != 0 && !known_zero) {
+    memset(segment.data() + initial_data.size(), 0, module.tls_bss_size);
+  }
+}
+
+// Populate the static TLS block with initial data and zero'd tbss regions for
+// each module that has a PT_TLS segment.  The span passed should cover the
+// whole area allocated for static TLS data for a new thread.  The offset
+// should be the location in that span where the thread pointer will point
+// (which may be at the end of the span for x86 negative TLS offsets).
+template <class Elf, class AbiTraits>
+inline void TlsInitialExecDataInit(const typename abi::Abi<Elf, AbiTraits>& abi,
+                                   cpp20::span<std::byte> block, ptrdiff_t tp_offset,
+                                   bool known_zero = false) {
+  using size_type = typename Elf::size_type;
+  for (size_t i = 0; i < abi.static_tls_modules.size(); ++i) {
+    const auto& module = abi.static_tls_modules[i];
+    const size_type modid = static_cast<size_type>(i + 1);
+    const ptrdiff_t offset = TlsInitialExecOffset(abi, modid);
+    cpp20::span segment = block.subspan(tp_offset + offset, module.tls_size());
+    TlsModuleInit(module, segment, known_zero);
+  }
+}
+
+// Fetch the current thread pointer with the given byte offset.
+inline void* TpRelative(ptrdiff_t offset = 0) {
+  char* tp;
+#if defined(__x86_64__) && defined(__clang__)
+  // This fetches %fs:0, but the compiler knows what it's doing.  LLVM knows
+  // that in the compiler ABI %fs:0 always stores the %fs.base address, and its
+  // optimizer will see through this to integrate *TpRelative(N) as a direct
+  // "mov %fs:N, ...".  Note that these special pointer types can be used to
+  // access memory, but they cannot be cast to a normal pointer type (which in
+  // the abstract should add in the base address, but the compiler doesn't know
+  // how to do that).
+  using FsRelative = char* [[clang::address_space(257)]];
+  tp = *reinterpret_cast<FsRelative*>(0);
+#elif defined(__x86_64__)
+  // TODO(mcgrathr): GCC 6 supports this syntax instead (and __seg_gs):
+  //     void* __seg_fs* fs = 0;
+  // Unfortunately, it allows it only in C and not in C++.
+  // It also requires -fasm under -std=c11 (et al), see:
+  //     https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79609
+  // It's also buggy for the special case of 0, see:
+  //     https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79619
+  __asm__ __volatile__("mov %%fs:0,%0" : "=r"(tp));
+#elif defined(__i386__) && defined(__clang__)
+  // Everything above applies the same on x86-32, but with %gs instead.
+  using GsRelative = char* [[clang::address_space(256)]];
+  tp = *reinterpret_cast<GsRelative*>(0);
+#elif defined(__i386__)
+  __asm__ __volatile__("mov %%gs:0,%0" : "=r"(tp));
+#else
+  tp = static_cast<char*>(__builtin_thread_pointer());
+#endif
+
+  return tp + offset;
+}
+
+// Interrogate the passive ABI (e.g. ld::abi::_ld_abi) to locate the current
+// thread's TLS data area for the given TLS module ID among the initial-exec
+// set of TLS modules.
+template <class Elf, class AbiTraits>
+inline void* TlsInitialExecData(const typename abi::Abi<Elf, AbiTraits>& abi,
+                                typename Elf::size_type modid) {
+  if (modid == 0) {
+    return nullptr;
+  }
+
+  return TpRelative(TlsInitialExecOffset(abi, modid));
+}
+
+}  // namespace ld
+
+#endif  // LIB_LD_TLS_H_
diff --git a/sdk/lib/ld/include/lib/ld/tlsdesc.h b/sdk/lib/ld/include/lib/ld/tlsdesc.h
new file mode 100644
index 0000000..ebcce7d
--- /dev/null
+++ b/sdk/lib/ld/include/lib/ld/tlsdesc.h
@@ -0,0 +1,125 @@
+// Copyright 2023 The Fuchsia Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef LIB_LD_TLSDESC_H_
+#define LIB_LD_TLSDESC_H_
+
+// These declarations relate to the TLSDESC runtime ABI.  While the particular
+// ABI details are specific to each machine, they all fit a common pattern.
+//
+// The R_*_TLSDESC relocation type directs dynamic linking to fill in a special
+// pair of adjacent GOT slots.  The first slot is unfilled at link time and
+// gets the PC of a special function provided by the dynamic linking runtime.
+// For each TLS reference, the compiler generates an indirect call via this GOT
+// slot.  The compiler also passes that address in the GOT to that function.
+//
+// This is a normal indirect call at the machine level.  However, it uses its
+// own bespoke calling convention specified in the psABI for each machine
+// rather than the standard C/C++ calling convention.  The convention for each
+// machine is similar: the use of the return address register and/or stack is
+// normal; one or two registers are designated for the argument (GOT address),
+// return value, and scratch; all other registers are preserved by the call,
+// except the condition codes.  The return value is usually either an address
+// or an offset from the psABI-specified thread-pointer register.
+//
+// This makes the impact of the runtime call on code generation very minimal.
+// The runtime implementation both can refer to the value stored in the GOT
+// slot by dynamic linking and can in theory dynamically update both slots to
+// lazily redirect to a different runtime entry point and argument data.
+//
+// The relocation's symbol and addend are meant to apply to the second GOT slot
+// of the pair.  (For DT_REL format, the addend is stored in place there.)
+// When dynamic linking chooses an entry point to store into the first GOT slot
+// it also chooses the value to store in the second slot, which is some kind of
+// offset or address that includes the addend and symbol value calculations.
+
+#ifdef __ASSEMBLER__  // clang-format off
+
+// Given standard .cfi_startproc initial state, reset CFI to indicate the
+// special ABI for the R_*_TLSDESC callback function on this machine.
+
+.macro .cfi.tlsdesc
+
+#if defined(__aarch64__)
+
+  // Almost all registers are preserved from the caller.  The integer set does
+  // not include x30 (LR) or SP, which .cfi_startproc covered.
+  .cfi.all_integer .cfi.same_value
+  .cfi.all_vectorfp .cfi.same_value
+
+  // On entry x0 contains the argument: the address of the GOT slot pair.
+  // On exit x0 contains the return value: offset from $tp (TPIDR_EL0).
+  .cfi_undefined x0
+
+#elif defined(__riscv)
+
+  // Almost all registers are preserved from the caller.  The integer set does
+  // not include sp, which .cfi_startproc covered.
+  .cfi.all_integer .cfi.same_value
+  .cfi.all_vectorfp .cfi.same_value
+
+  // The return address is in t0 rather than the usual ra, and preserved there.
+  .cfi_return_column t0
+
+  // On entry a0 contains the argument: the address of the GOT slot range.
+  // On exit a0 contains the return value: offset from $tp.
+  .cfi_undefined a0
+
+#elif defined(__x86_64__)
+
+  // Almost all registers are preserved from the caller.  The integer set does
+  // not include %rsp, which .cfi_startproc covered.
+  .cfi.all_integer .cfi.same_value
+  .cfi.all_vectorfp .cfi.same_value
+
+  // On entry %rax contains the argument: the address of the GOT slot pair.
+  // On exit %rax contains the return value: offset from $tp (%fs.base).
+  .cfi_undefined %rax
+
+#else
+
+// Not all machines have TLSDESC support specified in the psABI.
+
+#endif
+
+.endm
+
+#else  // clang-format on
+
+#include <cstdint>
+
+namespace [[gnu::visibility("hidden")]] ld {
+
+// When the compiler generates a TLSDESC-style reference to a TLS variable, it
+// loads a designated register with the address of a pair of GOT slots.  A
+// single R_*_TLSDESC_* dynamic relocation refers to the pair.  The addend
+// applies to (and for DT_REL format, is stored in) the second slot.  The
+// first slot is initialized at load time to a PC address to be called with
+// the address of the GOT (first) entry in the designated register (using an
+// otherwise bespoke calling convention rather than the machine's norm).
+struct TlsDescGot {
+  uintptr_t call;
+  uintptr_t offset;
+};
+
+// This is a callback function to be used in the TlsDescGot::call slot at
+// runtime.  Though it's declared here as a C++ function with an argument, it's
+// actually implemented in assembly code with a bespoke calling convention for
+// the argument, return value, and register usage that's different from normal
+// functions, so this cannot actually be called from C++.  This symbol name is
+// not visible anywhere outside the dynamic linking implementation itself and
+// the function is only ever called by compiler-generated TLSDESC references.
+//
+// In this minimal implementation used for PT_TLS segments in the static TLS
+// set, got.offset is always simply a fixed offset from the thread pointer.
+// Note this offset might be negative, but it's always handled as uintptr_t to
+// ensure well-defined overflow arithmetic.
+
+extern "C" uintptr_t _ld_tlsdesc_runtime_static(const TlsDescGot& got);
+
+}  // namespace ld
+
+#endif  // __ASSEMBLER__
+
+#endif  // LIB_LD_TLSDESC_H_
diff --git a/sdk/lib/ld/static-tls-get-addr.h b/sdk/lib/ld/static-tls-get-addr.h
new file mode 100644
index 0000000..3e459f2
--- /dev/null
+++ b/sdk/lib/ld/static-tls-get-addr.h
@@ -0,0 +1,25 @@
+// Copyright 2022 The Fuchsia Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef LIB_LD_STATIC_TLS_GET_ADDR_H_
+#define LIB_LD_STATIC_TLS_GET_ADDR_H_
+
+#include <lib/ld/abi.h>
+#include <lib/stdcompat/span.h>
+
+#include <cassert>
+#include <cstddef>
+
+namespace ld {
+
+constexpr void* StaticTlsGetAddr(const TlsGetAddrGot& got, cpp20::span<const uintptr_t> offsets,
+                                 void* tp) {
+  assert(got.tls_mod_id > 0);
+  const uintptr_t tp_offset = offsets[got.tls_mod_id - 1] + got.offset;
+  return static_cast<std::byte*>(tp) + tp_offset;
+}
+
+}  // namespace ld
+
+#endif  // LIB_LD_STATIC_TLS_GET_ADDR_H_
diff --git a/sdk/lib/ld/tls_get_addr.cc b/sdk/lib/ld/tls_get_addr.cc
new file mode 100644
index 0000000..fcdbb48
--- /dev/null
+++ b/sdk/lib/ld/tls_get_addr.cc
@@ -0,0 +1,37 @@
+// Copyright 2022 The Fuchsia Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <lib/ld/abi.h>
+#include <lib/ld/tls.h>
+
+#include "static-tls-get-addr.h"
+
+namespace ld {
+namespace {
+
+#if defined(__x86_64__)
+
+inline void* ThreadPointer() {
+#ifdef __clang__
+  return *static_cast<void* [[clang::address_space(257)]]*>(nullptr);
+#else
+  void* tp;
+  __asm__("mov %%fs:0,%0" : "=r"(tp));
+  return tp;
+#endif
+}
+
+#else
+
+inline void* ThreadPointer() { return __builtin_thread_pointer(); }
+
+#endif
+
+}  // namespace
+
+void* __tls_get_addr(TlsGetAddrGot& got) {
+  return StaticTlsGetAddr(got, _ld_abi.static_tls_offsets, ThreadPointer());
+}
+
+}  // namespace ld
diff --git a/sdk/lib/ld/tlsdesc.S b/sdk/lib/ld/tlsdesc.S
new file mode 100644
index 0000000..bd82b52
--- /dev/null
+++ b/sdk/lib/ld/tlsdesc.S
@@ -0,0 +1,98 @@
+// Copyright 2023 The Fuchsia Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <lib/arch/asm.h>
+
+#if defined(__aarch64__)
+
+// The GOT slot contains the offset from TPIDR_EL0.  Just return it.
+//
+// Since the function is so tiny, keep it naturally-aligned (it's a mere 8
+// bytes) just to be sure it can never straddle a cache line.
+.function _ld_tlsdesc_runtime_static, export, align=8
+  .cfi.tlsdesc
+
+  // On AArch64 ILP32, GOT entries are 4 bytes, not 8.
+# ifdef _LP64
+#  define R0 x0
+#  define ADDRSIZE 8
+# else
+#  define R0 w0
+#  define ADDRSIZE 4
+# endif
+
+  // On entry x0 contains the argument: the address of the GOT slot pair.
+  // The first word holds our own PC, the second the static TLS offset.
+  ldr R0, [R0, #ADDRSIZE]
+
+  // On exit x0 contains the return value: offset from $tp (TPIDR_EL0).
+  .cfi_undefined R0
+
+  ret
+.end_function
+
+#elif defined(__x86_64__)
+
+// The GOT slot contains the offset from %fs.base.  Just return it.
+//
+// Since the function is so tiny, keep it naturally-aligned (it's actually
+// only 5 bytes) just to be sure it can never straddle a cache line.
+.function _ld_tlsdesc_runtime_static, export, align=8
+  .cfi.tlsdesc
+
+  // On entry %rax contains the argument: the address of the GOT slot pair.
+  // The first word holds our own PC, the second the static TLS offset.
+  //
+  // Note that on x86-64 ILP32, GOT entries are still 8 bytes, to facilitate
+  // use of the indirect addressing modes.
+  mov 8(%rax), %rax
+
+  // On exit %rax contains the return value: offset from $tp (%fs.base).
+  .cfi_undefined %rax
+
+  ret
+.end_function
+
+#elif defined(__riscv)
+
+.macro .cfi.tlsdesc
+  .cfi_return_column 64
+  .cfi_register 64, t0
+  .cfi_same_value t0
+// .cfi_same_value everything but a0
+.endm
+
+// The GOT slot contains the offset from tp.  Just return it.
+//
+// Since the function is so tiny, keep it naturally-aligned just to be sure it
+// can never straddle a cache line.
+.function _ld_tlsdesc_runtime_static, export, align=4, cfi=custom
+  .cfi.tlsdesc
+
+# ifdef _LP64
+#  define LOAD_ADDRSIZE ld
+#  define ADDRSIZE 8
+# else
+#  define LOAD_ADDRSIZE lw
+#  define ADDRSIZE 4
+# endif
+
+  // On entry a0 contains the argument: the address of the GOT slot range.
+  // The first word holds our own PC, the second the static TLS offset.
+  // Unlike other machines, RISC-V has more slots to spare after that.
+  // But those are only useful in the dynamic-loading case.
+  LOAD_ADDRSIZE a0, ADDRSIZE(a0)
+
+  // On exit a0 contains the return value: offset from tp.
+  .cfi_undefined a0
+
+  // The caller's return address is in t0, with ra preserved.
+  jr t0
+.end_function
+
+#else
+
+// Not all machines have TLSDESC support specified in the psABI.
+
+#endif
diff --git a/src/lib/elfldltl/include/lib/elfldltl/init-fini.h b/src/lib/elfldltl/include/lib/elfldltl/init-fini.h
index e263f1b..215d86f 100644
--- a/src/lib/elfldltl/include/lib/elfldltl/init-fini.h
+++ b/src/lib/elfldltl/include/lib/elfldltl/init-fini.h
@@ -75,7 +75,7 @@
   // true iff Addr has already been relocated.  The argument flag should be
   // true iff relocations affecting RELRO data have already been applied.
   template <typename T>
-  constexpr void VisitInit(T&& init, bool relocated) {
+  constexpr void VisitInit(T&& init, bool relocated) const {
     if (legacy_ != 0) {
       init(legacy_, false);
     }
@@ -86,7 +86,7 @@
 
   // Same as VisitInit, but in the reverse order.
   template <typename T>
-  constexpr void VisitFini(T&& fini, bool relocated) {
+  constexpr void VisitFini(T&& fini, bool relocated) const {
     for (auto it = array_.rbegin(); it != array_.rend(); ++it) {
       fini(*it, relocated);
     }
@@ -109,12 +109,12 @@
   }
 
   // Call all the functions in initialization order.
-  void CallInit(size_type bias, bool relocated = true) {
+  void CallInit(size_type bias, bool relocated = true) const {
     VisitInit(RelocatedCall(bias), relocated);
   }
 
   // Call all the functions in finalization order.
-  void CallFini(size_type bias, bool relocated = true) {
+  void CallFini(size_type bias, bool relocated = true) const {
     VisitFini(RelocatedCall(bias), relocated);
   }
 
diff --git a/src/lib/elfldltl/include/lib/elfldltl/link-map-list.h b/src/lib/elfldltl/include/lib/elfldltl/link-map-list.h
new file mode 100644
index 0000000..86a50d4
--- /dev/null
+++ b/src/lib/elfldltl/include/lib/elfldltl/link-map-list.h
@@ -0,0 +1,133 @@
+// Copyright 2023 The Fuchsia Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SRC_LIB_ELFLDLTL_INCLUDE_LIB_ELFLDLTL_LINK_MAP_LIST_H_
+#define SRC_LIB_ELFLDLTL_INCLUDE_LIB_ELFLDLTL_LINK_MAP_LIST_H_
+
+#include <iterator>
+
+#include "layout.h"
+#include "memory.h"
+#include "svr4-abi.h"
+
+namespace elfldltl {
+
+template <class Memory, class Elf = Elf<>, class AbiTraits = LocalAbiTraits,
+          typename EntryType = typename Elf::template LinkMap<AbiTraits>,
+          auto LinkMapMember = nullptr>
+class LinkMapList {
+  template <bool Reverse>
+  class IteratorImpl;
+
+ public:
+  using value_type = EntryType;
+  using reference = value_type&;
+  using const_reference = const value_type&;
+  using difference_type = ptrdiff_t;
+  using size_type = size_t;
+
+  using iterator = IteratorImpl<false>;
+  using reverse_iterator = IteratorImpl<true>;
+  using const_iterator = iterator;
+  using const_reverse_iterator = reverse_iterator;
+
+  constexpr LinkMapList(const LinkMapList&) = default;
+
+  constexpr LinkMapList(Memory& memory, typename Elf::size_type map) : memory_(memory), map_(map) {}
+
+  iterator begin() const { return iterator(memory_, map_); }
+
+  iterator end() const { return iterator(memory_, 0); }
+
+  reverse_iterator rbegin() const { return reverse_iterator(memory_, map_); }
+
+  reverse_iterator rend() const { return reverse_iterator(memory_, 0); }
+
+ private:
+  static constexpr const auto& GetEntry(const value_type& value) {
+    if constexpr (LinkMapMember) {
+      return value.*LinkMapMember;
+    } else {
+      return value;
+    }
+  }
+
+  Memory& memory_;
+  typename Elf::size_type map_;
+};
+
+// Deduction guide.
+template <class Memory>
+LinkMapList(Memory&, Elf<>::size_type) -> LinkMapList<Memory>;
+
+template <class Elf, class AbiTraits, class Memory, typename EntryType, auto LinkMapMember>
+template <bool Reverse>
+class LinkMapList<Elf, AbiTraits, Memory, EntryType, LinkMapMember>::IteratorImpl {
+ public:
+  using iterator_category = std::bidirectional_iterator_tag;
+
+  constexpr IteratorImpl() = default;
+  constexpr IteratorImpl(const IteratorImpl&) = default;
+
+  constexpr bool operator==(const IteratorImpl& other) const { return address_ == other.address_; }
+
+  constexpr bool operator!=(const IteratorImpl& other) const { return !(*this == other); }
+
+  constexpr const value_type& operator*() const { return *value_; }
+
+  constexpr IteratorImpl& operator++() {  // prefix
+    Update<Reverse ? &LinkMapType::prev : &LinkMapType::next>();
+    return *this;
+  }
+
+  constexpr IteratorImpl operator++(int) {  // postfix
+    IteratorImpl old = *this;
+    ++*this;
+    return old;
+  }
+
+  constexpr IteratorImpl& operator--() {  // prefix
+    Update<Reverse ? &LinkMapType::next : &LinkMapType::prev>();
+    return *this;
+  }
+
+  constexpr IteratorImpl operator--(int) {  // postfix
+    IteratorImpl old = *this;
+    ++*this;
+    return old;
+  }
+
+ private:
+  using LinkMapType = std::decay_t<decltype(GetEntry(std::declval<const value_type&>()))>;
+
+  constexpr IteratorImpl(Memory& memory, typename Elf::size_type address)
+      : memory_(&memory), address_(address) {
+    Update();
+  }
+
+  // Read the struct from the current address pointer into value_.
+  // If the pointer can't be read, reset address_ to zero (end state).
+  template <auto Member = nullptr>
+  constexpr void Update() {
+    if constexpr (Member) {
+      address_ = GetEntry(*value_).*Member.address();
+    }
+    if (address_ != 0) {
+      if (auto data = memory_->template ReadArray<value_type>(address_, 1)) {
+        value_ = data->data();
+      } else {
+        value_ = nullptr;
+        address_ = 0;
+      }
+    }
+  }
+
+  Memory* memory_ = nullptr;
+  const value_type* value_ = nullptr;
+  typename Elf::size_type address_ = 0;
+};
+
+}  // namespace elfldltl
+
+#endif  // SRC_LIB_ELFLDLTL_INCLUDE_LIB_ELFLDLTL_LINK_MAP_LIST_H_
diff --git a/src/lib/elfldltl/include/lib/elfldltl/machine.h b/src/lib/elfldltl/include/lib/elfldltl/machine.h
index e572a63..c7e30b2 100644
--- a/src/lib/elfldltl/include/lib/elfldltl/machine.h
+++ b/src/lib/elfldltl/include/lib/elfldltl/machine.h
@@ -222,6 +222,62 @@
   static constexpr std::optional<uint32_t> kTlsDesc = std::nullopt;
 };
 
+// This is specialized to give the machine-specific details on dynamic
+// linking for TLS.  This is only what relocation needs to handle, not
+// the whole thread-pointer ABI for the machine.  The ElfMachine::kNone
+// specialization is an exemplar documenting the template API.
+template <class Elf, ElfMachine Machine>
+struct TlsTraits;
+
+template <class Elf>
+struct TlsTraits<Elf, ElfMachine::kNone> {
+  // Each module in the initial exec set that has a PT_TLS segment gets
+  // assigned an offset from the thread pointer where its PT_TLS block
+  // will appear in each thread's static TLS area.  If the main
+  // executable has a PT_TLS segment, then it will have module ID 1 and
+  // its Local Exec relocations will have been assigned statically by
+  // the linker.  The psABI sets a starting offset from the thread
+  // pointer that the main executable's PT_TLS segment will be assigned.
+  // The actual offset the linker uses is rounded up based on the
+  // p_align of that PT_TLS segment.  So the entire block is expected to
+  // be aligned such that the thread pointer's value has maximum
+  // alignment of any PT_TLS segment in the static TLS area, and then
+  // the linker will align offsets up as necessary.  The Local Exec
+  // offset is the offset that the first PT_TLS segment (the
+  // executable's if it has one) would be assigned if p_align were 1.
+  static constexpr typename Elf::size_type kTlsLocalExecOffset = 0;
+
+  // If true, TLS offsets from the thread pointer are negative.  The
+  // calculations for thread pointer alignment are the same whether
+  // offsets are positive or negative: that the first PT_TLS segment
+  // (the executable's if it has one) has the offset closest to zero
+  // that is aligned to p_align and >= p_memsz.
+  static constexpr bool kTlsNegative = false;
+};
+
+// AArch64 puts TLS above TP after a two-word reserved area.
+template <class Elf>
+struct TlsTraits<Elf, ElfMachine::kAarch64> {
+  using size_type = typename Elf::size_type;
+
+  static constexpr size_type kTlsLocalExecOffset = 2 * sizeof(size_type);
+  static constexpr bool kTlsNegative = false;
+};
+
+// RISC-V puts TLS above TP with no offset, as shown in the exemplar.
+template <class Elf>
+struct TlsTraits<Elf, ElfMachine::kRiscv> : public TlsTraits<Elf, ElfMachine::kNone> {};
+
+// X86 puts TLS below TP.
+template <class Elf>
+struct TlsTraits<Elf, ElfMachine::kX86_64> {
+  static constexpr typename Elf::size_type kTlsLocalExecOffset = 0;
+  static constexpr bool kTlsNegative = true;
+};
+
+template <class Elf>
+struct TlsTraits<Elf, ElfMachine::k386> : public TlsTraits<Elf, ElfMachine::kX86_64> {};
+
 // This should list all the fully-defined specializations except for kNone.
 template <template <ElfMachine...> class Template>
 using AllSupportedMachines = Template<  //
diff --git a/src/lib/elfldltl/include/lib/elfldltl/resolve.h b/src/lib/elfldltl/include/lib/elfldltl/resolve.h
index 9659676..b8efd84 100644
--- a/src/lib/elfldltl/include/lib/elfldltl/resolve.h
+++ b/src/lib/elfldltl/include/lib/elfldltl/resolve.h
@@ -19,20 +19,26 @@
 // the `resolve` parameter for RelocateSymbolic. See link.h for more details.
 // The Module type must have the following methods:
 //
-//  * const SymbolInfo& symbol_info()
+//  * const SymbolInfo& symbol_info() const
 //    Returns the SymbolInfo type associated with this module. This is used
 //    to call SymbolInfo::Lookup().
 //
-//  * size_type load_bias()
+//  * size_type load_bias() const
 //    Returns the load bias for symbol addresses in this module.
 //
-//  * size_type tls_module_id()
+//  * size_type tls_module_id() const
 //    Returns the TLS module ID number for this module.
+//    This will be zero for a module with no PT_TLS segment.
+//    It's always one in the main executable if has a PT_TLS segment,
+//    but may be one in a different module if the main executable has none.
 //
-//  * size_type static_tls_bias()
+//  * bool uses_static_tls() const
+//    This module may have TLS relocations for IE or LE model accesses.
+//
+//  * size_type static_tls_bias() const
 //    Returns the static TLS layout bias for the defining module.
 //
-//  * size_type tls_desc_hook(const Sym&), tls_desc_value(const Sym&)
+//  * size_type tls_desc_hook(const Sym&), tls_desc_value(const Sym&) const
 //    Returns the two values for the TLSDESC resolution.
 //
 template <class Module>
@@ -75,13 +81,6 @@
   using Definition = ResolverDefinition<Module>;
 
   return [&](const auto& ref, elfldltl::RelocateTls tls_type) -> std::optional<Definition> {
-    // TODO(fxbug.dev/118060): Support thread local symbols. For now we just use
-    // FormatError, which isn't preferable, but this is just a temporary error.
-    if (tls_type != RelocateTls::kNone) {
-      diag.FormatError("TLS not yet supported");
-      return std::nullopt;
-    }
-
     elfldltl::SymbolName name{ref_info, ref};
 
     if (name.empty()) [[unlikely]] {
@@ -91,6 +90,29 @@
 
     for (const auto& module : modules) {
       if (const auto* sym = name.Lookup(module.symbol_info())) {
+        switch (tls_type) {
+          case RelocateTls::kNone:
+            if (sym->type() == ElfSymType::kTls) [[unlikely]] {
+              diag.FormatError("non-TLS relocation resolves to STT_TLS symbol ", name);
+              return std::nullopt;
+            }
+            break;
+          case RelocateTls::kStatic:
+            if (!module.uses_static_tls()) [[unlikely]] {
+              diag.FormatError(
+                  "TLS Initial Exec relocation resolves to STT_TLS symbol in module without DF_STATIC_TLS: ",
+                  name);
+              return std::nullopt;
+            }
+            [[fallthrough]];
+          case RelocateTls::kDynamic:
+          case RelocateTls::kDesc:
+            if (sym->type() != ElfSymType::kTls) [[unlikely]] {
+              diag.FormatError("TLS relocation resolves to non-STT_TLS symbol: ", name);
+              return std::nullopt;
+            }
+            break;
+        }
         return Definition{sym, std::addressof(module)};
       }
     }
diff --git a/src/lib/elfldltl/include/lib/elfldltl/static-pie-with-vdso.h b/src/lib/elfldltl/include/lib/elfldltl/static-pie-with-vdso.h
index d0f0c37..0fada38 100644
--- a/src/lib/elfldltl/include/lib/elfldltl/static-pie-with-vdso.h
+++ b/src/lib/elfldltl/include/lib/elfldltl/static-pie-with-vdso.h
@@ -36,16 +36,20 @@
 // Do self-relocation against the vDSO so system calls can be made normally.
 // This is the simplified all-in-one version that decodes the all vDSO details
 // from memory itself.  It returns the the program's own SymbolInfo data; see
-// <lib/elfldltl/symbol.h> for details.
-template <class Self, class DiagnosticsType>
+// <lib/elfldltl/symbol.h> for details.  Optional additional arguments are
+// passed along to DecodeDynamic as observer objects to collect information
+// other than the SymbolInfo and RelocationInfo implicitly collected here.
+template <class Self, class DiagnosticsType, typename... Observers>
 inline SymbolInfo<typename Self::Elf> LinkStaticPieWithVdso(  //
-    const Self& self, DiagnosticsType& diagnostics, const void* vdso_base);
+    const Self& self, DiagnosticsType& diagnostics, const void* vdso_base,
+    Observers&&... observers);
 
 // This version takes vDSO details already distilled separately.
-template <class Self, class DiagnosticsType>
+template <class Self, class DiagnosticsType, typename... Observers>
 inline SymbolInfo<typename Self::Elf> LinkStaticPieWithVdso(
     const Self& self, DiagnosticsType& diagnostics,
-    const SymbolInfo<typename Self::Elf>& vdso_symbols, typename Self::Elf::size_type vdso_bias) {
+    const SymbolInfo<typename Self::Elf>& vdso_symbols, typename Self::Elf::size_type vdso_bias,
+    Observers&&... observers) {
   using namespace std::literals;
   using Elf = typename Self::Elf;
   using size_type = typename Elf::size_type;
@@ -59,7 +63,8 @@
   SymbolInfo<Elf> symbol_info;
   DecodeDynamic(diagnostics, memory, Self::Dynamic(),       //
                 DynamicRelocationInfoObserver(reloc_info),  //
-                DynamicSymbolInfoObserver(symbol_info));
+                DynamicSymbolInfoObserver(symbol_info),     //
+                std::forward<Observers>(observers)...);
 
   // Apply simple fixups first, just in case anything else needs them done.
   if (RelocateRelative(diagnostics, memory, reloc_info, bias)) {
@@ -113,7 +118,7 @@
 }
 
 // This distills the vDSO symbols and load bias from the image in memory.
-template <class Elf, class DiagnosticsType>
+template <class Elf = Elf<>, class DiagnosticsType>
 inline std::pair<SymbolInfo<Elf>, uintptr_t> GetVdsoSymbols(DiagnosticsType& diagnostics,
                                                             const void* vdso_base) {
   using Ehdr = typename Elf::Ehdr;
@@ -157,16 +162,18 @@
 }
 
 // This just combines the two functions above.
-template <class Self, class DiagnosticsType>
+template <class Self, class DiagnosticsType, typename... Observers>
 inline SymbolInfo<typename Self::Elf> LinkStaticPieWithVdso(  //
-    const Self& self, DiagnosticsType& diagnostics, const void* vdso_base) {
+    const Self& self, DiagnosticsType& diagnostics, const void* vdso_base,
+    Observers&&... observers) {
   using Elf = typename Self::Elf;
 
   // Fetch the vDSO symbol table.
   auto [vdso_symbols, vdso_bias] = GetVdsoSymbols<Elf>(diagnostics, vdso_base);
 
   // The main work is done in the overload defined above.
-  return LinkStaticPieWithVdso(self, diagnostics, vdso_symbols, vdso_bias);
+  return LinkStaticPieWithVdso(self, diagnostics, vdso_symbols, vdso_bias,
+                               std::forward<Observers>(observers)...);
 }
 
 }  // namespace elfldltl
diff --git a/src/lib/elfldltl/include/lib/elfldltl/tls-layout.h b/src/lib/elfldltl/include/lib/elfldltl/tls-layout.h
new file mode 100644
index 0000000..6bf41e6
--- /dev/null
+++ b/src/lib/elfldltl/include/lib/elfldltl/tls-layout.h
@@ -0,0 +1,96 @@
+// Copyright 2023 The Fuchsia Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SRC_LIB_ELFLDLTL_INCLUDE_LIB_ELFLDLTL_TLS_LAYOUT_H_
+#define SRC_LIB_ELFLDLTL_INCLUDE_LIB_ELFLDLTL_TLS_LAYOUT_H_
+
+#include <algorithm>
+
+#include "constants.h"
+#include "layout.h"
+#include "machine.h"
+
+namespace elfldltl {
+
+// This performs static TLS layout according to the machine's ABI rules.
+//
+// Each call to Assign takes the PT_TLS phdr for a module and returns the
+// thread pointer offset for that module's TLS block.  If there is a main
+// executable that might use the Local Exec TLS model, then its TLS block must
+// be assigned first.
+//
+// When all the static TLS modules have been assigned, then size_bytes() and
+// alignment() return the size and alignment for the static TLS area.  The size
+// is zero if there were no nonempty PT_TLS segments passed to Assign at all.
+// When nonzero, the size includes the ABI reserved area, if any: it's the
+// space that must be available immediately at (or before) the thread pointer.
+
+template <class Elf = Elf<>>
+class TlsLayout {
+ public:
+  using Addr = typename Elf::Addr;
+  using size_type = typename Elf::size_type;
+  using Phdr = typename Elf::Phdr;
+
+  template <ElfMachine Machine = ElfMachine::kNative, size_type RedZone = 0>
+  constexpr size_type Assign(const Phdr& phdr) {
+    using Traits = TlsTraits<Elf, Machine>;
+
+    const size_type segment_size = phdr.memsz;
+    const size_type segment_alignment = PhdrAlign(phdr);
+
+    // The first module gets assigned at a fixed offset.  This isn't just made
+    // the initializer value for size_bytes_ for two reasons: to keep this type
+    // purely zero-initialized so it can live in bss; so that the state when no
+    // PT_TLS segments exist at all is always simply zero size.
+    if (size_bytes_ == 0) {
+      size_bytes_ = Traits::kTlsLocalExecOffset;
+    }
+
+    // The whole static TLS block must at least as aligned as each segment.
+    alignment_ = std::max(alignment_, segment_alignment);
+
+    // Within the block, each segment must be aligned according to its p_align.
+    auto segment_aligned = [segment_alignment](size_type size) {
+      return AlignUp(size, segment_alignment);
+    };
+
+    // Assign an offset for this segment and update the total size.
+    if constexpr (Traits::kTlsNegative) {
+      // Below the last assignment, aligned down as needed.
+      size_type offset = segment_aligned(size_bytes_ + segment_size);
+      size_bytes_ = offset + RedZone;
+      return -offset;
+    } else {
+      // Above the last assignment, aligned up as needed.
+      size_type offset = segment_aligned(size_bytes_);
+      size_bytes_ = offset + segment_size + RedZone;
+      return offset;
+    }
+  }
+
+  constexpr size_type size_bytes() const { return size_bytes_; }
+
+  constexpr size_type alignment() const { return alignment_; }
+
+  constexpr size_type Align(size_type size, size_t min_alignment = 0) const {
+    return AlignUp(size, std::max(min_alignment, alignment_));
+  }
+
+ private:
+  static constexpr size_type PhdrAlign(const Phdr& phdr) {
+    return phdr.align == 0 ? 1 : phdr.align;
+  }
+
+  static constexpr size_type AlignUp(size_type size, size_type alignment) {
+    return (size + alignment - 1) & -alignment;
+  }
+
+  Addr size_bytes_ = 0;
+  Addr alignment_ = 0;
+};
+
+}  // namespace elfldltl
+
+#endif  // SRC_LIB_ELFLDLTL_INCLUDE_LIB_ELFLDLTL_TLS_LAYOUT_H_
diff --git a/src/lib/elfldltl/test/resolve-tests.cc b/src/lib/elfldltl/test/resolve-tests.cc
index 662d8d4..670c64b2 100644
--- a/src/lib/elfldltl/test/resolve-tests.cc
+++ b/src/lib/elfldltl/test/resolve-tests.cc
@@ -39,6 +39,10 @@
 
     constexpr size_type load_bias() const { return 0; }
 
+    constexpr bool uses_static_tls() const { return false; }
+    constexpr size_type tls_module_id() const { return 0; }
+    constexpr size_type static_tls_bias() const { return 0; }
+
     constexpr auto& file() { return file_; }
 
    private: