Merge branch 'dev'
diff --git a/COPYING b/COPYING
index 611968c..104b1f8 100644
--- a/COPYING
+++ b/COPYING
@@ -1,10 +1,10 @@
 Unless otherwise specified, files in the jemalloc source distribution are
 subject to the following license:
 --------------------------------------------------------------------------------
-Copyright (C) 2002-2015 Jason Evans <jasone@canonware.com>.
+Copyright (C) 2002-2016 Jason Evans <jasone@canonware.com>.
 All rights reserved.
 Copyright (C) 2007-2012 Mozilla Foundation.  All rights reserved.
-Copyright (C) 2009-2015 Facebook, Inc.  All rights reserved.
+Copyright (C) 2009-2016 Facebook, Inc.  All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
diff --git a/ChangeLog b/ChangeLog
index af78615..926209e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,29 @@
 
     https://github.com/jemalloc/jemalloc
 
+* 4.2.0 (May 12, 2016)
+
+  New features:
+  - Add the arena.<i>.reset mallctl, which makes it possible to discard all of
+    an arena's allocations in a single operation.  (@jasone@)
+  - Add the stats.retained and stats.arenas.<i>.retained statistics.  (@jasone)
+  - Add the --with-version configure option.  (@jasone)
+  - Support --with-lg-page values larger than actual page size.  (@jasone)
+
+  Optimizations:
+  - Use pairing heaps rather than red-black trees for various hot data
+    structures.  (@djwatson, @jasone)
+  - Streamline fast paths of rtree operations.  (@jasone)
+  - Optimize the fast paths of calloc() and [m,d,sd]allocx().  (@jasone)
+  - Decommit unused virtual memory if the OS does not overcommit.  (@jasone)
+  - Specify MAP_NORESERVE on Linux if [heuristic] overcommit is active, in order
+    to avoid unfortunate interactions during fork(2).  (@jasone)
+
+  Bug fixes:
+  - Fix chunk accounting related to triggering gdump profiles.  (@jasone)
+  - Link against librt for clock_gettime(2) if glibc < 2.17.  (@jasone)
+  - Scale leak report summary according to sampling probability.  (@jasone)
+
 * 4.1.1 (May 3, 2016)
 
   This bugfix release resolves a variety of mostly minor issues, though the
@@ -21,7 +44,7 @@
     enabled and active.  (@jasone)
   - Fix various chunk leaks in OOM code paths.  (@jasone)
   - Fix malloc_stats_print() to print opt.narenas correctly.  (@jasone)
-  - Fix MSVC-specific build/test issues.  (@rustyx, yuslepukhin)
+  - Fix MSVC-specific build/test issues.  (@rustyx, @yuslepukhin)
   - Fix a variety of test failures that were due to test fragility rather than
     core bugs.  (@jasone)
 
@@ -80,14 +103,14 @@
   Bug fixes:
   - Fix stats.cactive accounting regression.  (@rustyx, @jasone)
   - Handle unaligned keys in hash().  This caused problems for some ARM systems.
-    (@jasone, Christopher Ferris)
+    (@jasone, @cferris1000)
   - Refactor arenas array.  In addition to fixing a fork-related deadlock, this
     makes arena lookups faster and simpler.  (@jasone)
   - Move retained memory allocation out of the default chunk allocation
     function, to a location that gets executed even if the application installs
     a custom chunk allocation function.  This resolves a virtual memory leak.
     (@buchgr)
-  - Fix a potential tsd cleanup leak.  (Christopher Ferris, @jasone)
+  - Fix a potential tsd cleanup leak.  (@cferris1000, @jasone)
   - Fix run quantization.  In practice this bug had no impact unless
     applications requested memory with alignment exceeding one page.
     (@jasone, @djwatson)
diff --git a/INSTALL b/INSTALL
index 5c25054..6878716 100644
--- a/INSTALL
+++ b/INSTALL
@@ -35,6 +35,10 @@
     will cause files to be installed into /usr/local/include, /usr/local/lib,
     and /usr/local/man.
 
+--with-version=<major>.<minor>.<bugfix>-<nrev>-g<gid>
+    Use the specified version string rather than trying to generate one (if in
+    a git repository) or use existing the VERSION file (if present).
+
 --with-rpath=<colon-separated-rpath>
     Embed one or more library paths, so that libjemalloc can find the libraries
     it is linked to.  This works only on ELF-based systems.
diff --git a/Makefile.in b/Makefile.in
index 4b0e184..652f01f 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -28,7 +28,6 @@
 LDFLAGS := @LDFLAGS@
 EXTRA_LDFLAGS := @EXTRA_LDFLAGS@
 LIBS := @LIBS@
-TESTLIBS := @TESTLIBS@
 RPATH_EXTRA := @RPATH_EXTRA@
 SO := @so@
 IMPORTLIB := @importlib@
@@ -103,7 +102,8 @@
 	$(srcroot)src/tcache.c \
 	$(srcroot)src/ticker.c \
 	$(srcroot)src/tsd.c \
-	$(srcroot)src/util.c
+	$(srcroot)src/util.c \
+	$(srcroot)src/witness.c
 ifeq ($(enable_valgrind), 1)
 C_SRCS += $(srcroot)src/valgrind.c
 endif
@@ -134,7 +134,10 @@
 	$(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \
 	$(srcroot)test/src/thd.c $(srcroot)test/src/timer.c
 C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/util.c
-TESTS_UNIT := $(srcroot)test/unit/atomic.c \
+TESTS_UNIT := \
+	$(srcroot)test/unit/a0.c \
+	$(srcroot)test/unit/arena_reset.c \
+	$(srcroot)test/unit/atomic.c \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/ckh.c \
 	$(srcroot)test/unit/decay.c \
@@ -148,6 +151,7 @@
 	$(srcroot)test/unit/math.c \
 	$(srcroot)test/unit/mq.c \
 	$(srcroot)test/unit/mtx.c \
+	$(srcroot)test/unit/ph.c \
 	$(srcroot)test/unit/prng.c \
 	$(srcroot)test/unit/prof_accum.c \
 	$(srcroot)test/unit/prof_active.c \
@@ -169,6 +173,7 @@
 	$(srcroot)test/unit/nstime.c \
 	$(srcroot)test/unit/tsd.c \
 	$(srcroot)test/unit/util.c \
+	$(srcroot)test/unit/witness.c \
 	$(srcroot)test/unit/zero.c
 TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \
 	$(srcroot)test/integration/allocated.c \
@@ -290,15 +295,15 @@
 
 $(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(TESTS_UNIT_LINK_OBJS) $(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS)
 	@mkdir -p $(@D)
-	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(TESTLIBS) $(EXTRA_LDFLAGS)
+	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS)
 
 $(objroot)test/integration/%$(EXE): $(objroot)test/integration/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
-	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lpthread,$(LIBS))) -lm $(TESTLIBS) $(EXTRA_LDFLAGS)
+	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lpthread,$(LIBS))) -lm $(EXTRA_LDFLAGS)
 
 $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
-	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(TESTLIBS) $(EXTRA_LDFLAGS)
+	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS)
 
 build_lib_shared: $(DSOS)
 build_lib_static: $(STATIC_LIBS)
diff --git a/configure.ac b/configure.ac
index eb387ed..7f19715 100644
--- a/configure.ac
+++ b/configure.ac
@@ -141,6 +141,7 @@
     JE_CFLAGS_APPEND([-Wall])
     JE_CFLAGS_APPEND([-Werror=declaration-after-statement])
     JE_CFLAGS_APPEND([-Wshorten-64-to-32])
+    JE_CFLAGS_APPEND([-Wsign-compare])
     JE_CFLAGS_APPEND([-pipe])
     JE_CFLAGS_APPEND([-g3])
   elif test "x$je_cv_msvc" = "xyes" ; then
@@ -304,6 +305,7 @@
   *-*-freebsd*)
 	CFLAGS="$CFLAGS"
 	abi="elf"
+	AC_DEFINE([JEMALLOC_SYSCTL_VM_OVERCOMMIT], [ ])
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	force_lazy_lock="1"
 	;;
@@ -328,6 +330,7 @@
 	CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
+	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ])
 	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
 	AC_DEFINE([JEMALLOC_USE_CXX_THROW], [ ])
@@ -1172,27 +1175,36 @@
 dnl jemalloc configuration.
 dnl 
 
-dnl Set VERSION if source directory is inside a git repository.
-if test "x`test ! \"${srcroot}\" && cd \"${srcroot}\"; git rev-parse --is-inside-work-tree 2>/dev/null`" = "xtrue" ; then
-  dnl Pattern globs aren't powerful enough to match both single- and
-  dnl double-digit version numbers, so iterate over patterns to support up to
-  dnl version 99.99.99 without any accidental matches.
-  rm -f "${objroot}VERSION"
-  for pattern in ['[0-9].[0-9].[0-9]' '[0-9].[0-9].[0-9][0-9]' \
-                 '[0-9].[0-9][0-9].[0-9]' '[0-9].[0-9][0-9].[0-9][0-9]' \
-                 '[0-9][0-9].[0-9].[0-9]' '[0-9][0-9].[0-9].[0-9][0-9]' \
-                 '[0-9][0-9].[0-9][0-9].[0-9]' \
-                 '[0-9][0-9].[0-9][0-9].[0-9][0-9]']; do
-    if test ! -e "${objroot}VERSION" ; then
-      (test ! "${srcroot}" && cd "${srcroot}"; git describe --long --abbrev=40 --match="${pattern}") > "${objroot}VERSION.tmp" 2>/dev/null
-      if test $? -eq 0 ; then
-        mv "${objroot}VERSION.tmp" "${objroot}VERSION"
-        break
-      fi
+AC_ARG_WITH([version],
+  [AS_HELP_STRING([--with-version=<major>.<minor>.<bugfix>-<nrev>-g<gid>],
+   [Version string])],
+  [
+    echo "${with_version}" | grep ['^[0-9]\+\.[0-9]\+\.[0-9]\+-[0-9]\+-g[0-9a-f]\+$'] 2>&1 1>/dev/null
+    if test $? -ne 0 ; then
+      AC_MSG_ERROR([${with_version} does not match <major>.<minor>.<bugfix>-<nrev>-g<gid>])
     fi
-  done
-fi
-rm -f "${objroot}VERSION.tmp"
+    echo "$with_version" > "${objroot}VERSION"
+  ], [
+    dnl Set VERSION if source directory is inside a git repository.
+    if test "x`test ! \"${srcroot}\" && cd \"${srcroot}\"; git rev-parse --is-inside-work-tree 2>/dev/null`" = "xtrue" ; then
+      dnl Pattern globs aren't powerful enough to match both single- and
+      dnl double-digit version numbers, so iterate over patterns to support up
+      dnl to version 99.99.99 without any accidental matches.
+      for pattern in ['[0-9].[0-9].[0-9]' '[0-9].[0-9].[0-9][0-9]' \
+                     '[0-9].[0-9][0-9].[0-9]' '[0-9].[0-9][0-9].[0-9][0-9]' \
+                     '[0-9][0-9].[0-9].[0-9]' '[0-9][0-9].[0-9].[0-9][0-9]' \
+                     '[0-9][0-9].[0-9][0-9].[0-9]' \
+                     '[0-9][0-9].[0-9][0-9].[0-9][0-9]']; do
+        (test ! "${srcroot}" && cd "${srcroot}"; git describe --long --abbrev=40 --match="${pattern}") > "${objroot}VERSION.tmp" 2>/dev/null
+        if test $? -eq 0 ; then
+          mv "${objroot}VERSION.tmp" "${objroot}VERSION"
+          break
+        fi
+      done
+    fi
+    rm -f "${objroot}VERSION.tmp"
+  ])
+
 if test ! -e "${objroot}VERSION" ; then
   if test ! -e "${srcroot}VERSION" ; then
     AC_MSG_RESULT(
@@ -1229,13 +1241,8 @@
 
 CPPFLAGS="$CPPFLAGS -D_REENTRANT"
 
-dnl Check whether clock_gettime(2) is in libc or librt.  This function is only
-dnl used in test code, so save the result to TESTLIBS to avoid poluting LIBS.
-SAVED_LIBS="${LIBS}"
-LIBS=
-AC_SEARCH_LIBS([clock_gettime], [rt], [TESTLIBS="${LIBS}"])
-AC_SUBST([TESTLIBS])
-LIBS="${SAVED_LIBS}"
+dnl Check whether clock_gettime(2) is in libc or librt.
+AC_SEARCH_LIBS([clock_gettime], [rt])
 
 dnl Check if the GNU-specific secure_getenv function exists.
 AC_CHECK_FUNC([secure_getenv],
@@ -1741,7 +1748,6 @@
 AC_MSG_RESULT([LDFLAGS            : ${LDFLAGS}])
 AC_MSG_RESULT([EXTRA_LDFLAGS      : ${EXTRA_LDFLAGS}])
 AC_MSG_RESULT([LIBS               : ${LIBS}])
-AC_MSG_RESULT([TESTLIBS           : ${TESTLIBS}])
 AC_MSG_RESULT([RPATH_EXTRA        : ${RPATH_EXTRA}])
 AC_MSG_RESULT([])
 AC_MSG_RESULT([XSLTPROC           : ${XSLTPROC}])
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 88b003a..c4a44e3 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -540,8 +540,8 @@
     are smaller than four times the page size, large size classes are smaller
     than the chunk size (see the <link
     linkend="opt.lg_chunk"><mallctl>opt.lg_chunk</mallctl></link> option), and
-    huge size classes extend from the chunk size up to one size class less than
-    the full address space size.</para>
+    huge size classes extend from the chunk size up to the largest size class
+    that does not exceed <constant>PTRDIFF_MAX</constant>.</para>
 
     <para>Allocations are packed tightly together, which can be an issue for
     multi-threaded applications.  If you need to assure that allocations do not
@@ -659,7 +659,7 @@
           <entry>[1280 KiB, 1536 KiB, 1792 KiB]</entry>
         </row>
         <row>
-          <entry morerows="6">Huge</entry>
+          <entry morerows="8">Huge</entry>
           <entry>256 KiB</entry>
           <entry>[2 MiB]</entry>
         </row>
@@ -687,6 +687,14 @@
           <entry>...</entry>
           <entry>...</entry>
         </row>
+        <row>
+          <entry>512 PiB</entry>
+          <entry>[2560 PiB, 3 EiB, 3584 PiB, 4 EiB]</entry>
+        </row>
+        <row>
+          <entry>1 EiB</entry>
+          <entry>[5 EiB, 6 EiB, 7 EiB]</entry>
+        </row>
       </tbody>
       </tgroup>
     </table>
@@ -1550,6 +1558,23 @@
         details.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="arena.i.reset">
+        <term>
+          <mallctl>arena.&lt;i&gt;.reset</mallctl>
+          (<type>void</type>)
+          <literal>--</literal>
+        </term>
+        <listitem><para>Discard all of the arena's extant allocations.  This
+        interface can only be used with arenas created via <link
+        linkend="arenas.extend"><mallctl>arenas.extend</mallctl></link>.  None
+        of the arena's discarded/cached allocations may accessed afterward.  As
+        part of this requirement, all thread caches which were used to
+        allocate/deallocate in conjunction with the arena must be flushed
+        beforehand.  This interface cannot be used if running inside Valgrind,
+        nor if the <link linkend="opt.quarantine">quarantine</link> size is
+        non-zero.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="arena.i.dss">
         <term>
           <mallctl>arena.&lt;i&gt;.dss</mallctl>
@@ -2161,6 +2186,25 @@
         linkend="stats.resident"><mallctl>stats.resident</mallctl></link>.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="stats.retained">
+        <term>
+          <mallctl>stats.retained</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Total number of bytes in virtual memory mappings that
+        were retained rather than being returned to the operating system via
+        e.g. <citerefentry><refentrytitle>munmap</refentrytitle>
+        <manvolnum>2</manvolnum></citerefentry>.  Retained virtual memory is
+        typically untouched, decommitted, or purged, so it has no strongly
+        associated physical memory (see <link
+        linkend="arena.i.chunk_hooks">chunk hooks</link> for details).  Retained
+        memory is excluded from mapped memory statistics, e.g. <link
+        linkend="stats.mapped"><mallctl>stats.mapped</mallctl></link>.
+        </para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.arenas.i.dss">
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.dss</mallctl>
@@ -2241,6 +2285,18 @@
         <listitem><para>Number of mapped bytes.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="stats.arenas.i.retained">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.retained</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Number of retained bytes.  See <link
+        linkend="stats.retained"><mallctl>stats.retained</mallctl></link> for
+        details.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.arenas.i.metadata.mapped">
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.metadata.mapped</mallctl>
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 42a7896..b1de2b6 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -36,6 +36,7 @@
 #define	DECAY_NTICKS_PER_UPDATE	1000
 
 typedef struct arena_runs_dirty_link_s arena_runs_dirty_link_t;
+typedef struct arena_avail_links_s arena_avail_links_t;
 typedef struct arena_run_s arena_run_t;
 typedef struct arena_chunk_map_bits_s arena_chunk_map_bits_t;
 typedef struct arena_chunk_map_misc_s arena_chunk_map_misc_t;
@@ -153,13 +154,13 @@
  */
 struct arena_chunk_map_misc_s {
 	/*
-	 * Linkage for run trees.  There are two disjoint uses:
+	 * Linkage for run heaps.  There are two disjoint uses:
 	 *
-	 * 1) arena_t's runs_avail tree.
+	 * 1) arena_t's runs_avail heaps.
 	 * 2) arena_run_t conceptually uses this linkage for in-use non-full
 	 *    runs, rather than directly embedding linkage.
 	 */
-	rb_node(arena_chunk_map_misc_t)		rb_link;
+	phn(arena_chunk_map_misc_t)		ph_link;
 
 	union {
 		/* Linkage for list of dirty runs. */
@@ -175,7 +176,7 @@
 		arena_run_t			run;
 	};
 };
-typedef rb_tree(arena_chunk_map_misc_t) arena_run_tree_t;
+typedef ph(arena_chunk_map_misc_t) arena_run_heap_t;
 #endif /* JEMALLOC_ARENA_STRUCTS_A */
 
 #ifdef JEMALLOC_ARENA_STRUCTS_B
@@ -272,13 +273,13 @@
 	arena_run_t		*runcur;
 
 	/*
-	 * Tree of non-full runs.  This tree is used when looking for an
+	 * Heap of non-full runs.  This heap is used when looking for an
 	 * existing run when runcur is no longer usable.  We choose the
 	 * non-full run that is lowest in memory; this policy tends to keep
 	 * objects packed well, and it can also help reduce the number of
 	 * almost-empty chunks.
 	 */
-	arena_run_tree_t	runs;
+	arena_run_heap_t	runs;
 
 	/* Bin statistics. */
 	malloc_bin_stats_t	stats;
@@ -289,10 +290,18 @@
 	unsigned		ind;
 
 	/*
-	 * Number of threads currently assigned to this arena.  This field is
-	 * synchronized via atomic operations.
+	 * Number of threads currently assigned to this arena, synchronized via
+	 * atomic operations.  Each thread has two distinct assignments, one for
+	 * application-serving allocation, and the other for internal metadata
+	 * allocation.  Internal metadata must not be allocated from arenas
+	 * created via the arenas.extend mallctl, because the arena.<i>.reset
+	 * mallctl indiscriminately discards all allocations for the affected
+	 * arena.
+	 *
+	 *   0: Application allocation.
+	 *   1: Internal metadata allocation.
 	 */
-	unsigned		nthreads;
+	unsigned		nthreads[2];
 
 	/*
 	 * There are three classes of arena operations from a locking
@@ -321,6 +330,10 @@
 
 	dss_prec_t		dss_prec;
 
+
+	/* Extant arena chunks. */
+	ql_head(extent_node_t)	achunks;
+
 	/*
 	 * In order to avoid rapid chunk allocation/deallocation when an arena
 	 * oscillates right on the cusp of needing a new chunk, cache the most
@@ -457,10 +470,10 @@
 	arena_bin_t		bins[NBINS];
 
 	/*
-	 * Quantized address-ordered trees of this arena's available runs.  The
-	 * trees are used for first-best-fit run allocation.
+	 * Quantized address-ordered heaps of this arena's available runs.  The
+	 * heaps are used for first-best-fit run allocation.
 	 */
-	arena_run_tree_t	runs_avail[1]; /* Dynamically sized. */
+	arena_run_heap_t	runs_avail[1]; /* Dynamically sized. */
 };
 
 /* Used in conjunction with tsd for fast arena-related context lookup. */
@@ -505,25 +518,28 @@
     bool cache);
 void	arena_chunk_cache_maybe_remove(arena_t *arena, extent_node_t *node,
     bool cache);
-extent_node_t	*arena_node_alloc(arena_t *arena);
-void	arena_node_dalloc(arena_t *arena, extent_node_t *node);
-void	*arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment,
-    bool *zero);
-void	arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t usize);
-void	arena_chunk_ralloc_huge_similar(arena_t *arena, void *chunk,
-    size_t oldsize, size_t usize);
-void	arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk,
-    size_t oldsize, size_t usize);
-bool	arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk,
-    size_t oldsize, size_t usize, bool *zero);
-ssize_t	arena_lg_dirty_mult_get(arena_t *arena);
-bool	arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult);
-ssize_t	arena_decay_time_get(arena_t *arena);
-bool	arena_decay_time_set(arena_t *arena, ssize_t decay_time);
-void	arena_maybe_purge(arena_t *arena);
-void	arena_purge(arena_t *arena, bool all);
-void	arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin,
-    szind_t binind, uint64_t prof_accumbytes);
+extent_node_t	*arena_node_alloc(tsdn_t *tsdn, arena_t *arena);
+void	arena_node_dalloc(tsdn_t *tsdn, arena_t *arena, extent_node_t *node);
+void	*arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize,
+    size_t alignment, bool *zero);
+void	arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, void *chunk,
+    size_t usize);
+void	arena_chunk_ralloc_huge_similar(tsdn_t *tsdn, arena_t *arena,
+    void *chunk, size_t oldsize, size_t usize);
+void	arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena,
+    void *chunk, size_t oldsize, size_t usize);
+bool	arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena,
+    void *chunk, size_t oldsize, size_t usize, bool *zero);
+ssize_t	arena_lg_dirty_mult_get(tsdn_t *tsdn, arena_t *arena);
+bool	arena_lg_dirty_mult_set(tsdn_t *tsdn, arena_t *arena,
+    ssize_t lg_dirty_mult);
+ssize_t	arena_decay_time_get(tsdn_t *tsdn, arena_t *arena);
+bool	arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time);
+void	arena_purge(tsdn_t *tsdn, arena_t *arena, bool all);
+void	arena_maybe_purge(tsdn_t *tsdn, arena_t *arena);
+void	arena_reset(tsd_t *tsd, arena_t *arena);
+void	arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena,
+    tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
 void	arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info,
     bool zero);
 #ifdef JEMALLOC_JET
@@ -536,17 +552,18 @@
 void	arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info);
 #endif
 void	arena_quarantine_junk_small(void *ptr, size_t usize);
-void	*arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t ind, bool zero);
-void	*arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
-    bool zero, tcache_t *tcache);
-void	*arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize,
+void	*arena_malloc_large(tsdn_t *tsdn, arena_t *arena, szind_t ind,
+    bool zero);
+void	*arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
+    szind_t ind, bool zero);
+void	*arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache);
-void	arena_prof_promoted(const void *ptr, size_t size);
-void	arena_dalloc_bin_junked_locked(arena_t *arena, arena_chunk_t *chunk,
-    void *ptr, arena_chunk_map_bits_t *bitselm);
-void	arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
-    size_t pageind, arena_chunk_map_bits_t *bitselm);
-void	arena_dalloc_small(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+void	arena_prof_promoted(tsdn_t *tsdn, const void *ptr, size_t size);
+void	arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena,
+    arena_chunk_t *chunk, void *ptr, arena_chunk_map_bits_t *bitselm);
+void	arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
+    void *ptr, size_t pageind, arena_chunk_map_bits_t *bitselm);
+void	arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
     void *ptr, size_t pageind);
 #ifdef JEMALLOC_JET
 typedef void (arena_dalloc_junk_large_t)(void *, size_t);
@@ -554,70 +571,80 @@
 #else
 void	arena_dalloc_junk_large(void *ptr, size_t usize);
 #endif
-void	arena_dalloc_large_junked_locked(arena_t *arena, arena_chunk_t *chunk,
-    void *ptr);
-void	arena_dalloc_large(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+void	arena_dalloc_large_junked_locked(tsdn_t *tsdn, arena_t *arena,
+    arena_chunk_t *chunk, void *ptr);
+void	arena_dalloc_large(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
     void *ptr);
 #ifdef JEMALLOC_JET
 typedef void (arena_ralloc_junk_large_t)(void *, size_t, size_t);
 extern arena_ralloc_junk_large_t *arena_ralloc_junk_large;
 #endif
-bool	arena_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
-    size_t extra, bool zero);
+bool	arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize,
+    size_t size, size_t extra, bool zero);
 void	*arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
     size_t size, size_t alignment, bool zero, tcache_t *tcache);
-dss_prec_t	arena_dss_prec_get(arena_t *arena);
-bool	arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
+dss_prec_t	arena_dss_prec_get(tsdn_t *tsdn, arena_t *arena);
+bool	arena_dss_prec_set(tsdn_t *tsdn, arena_t *arena, dss_prec_t dss_prec);
 ssize_t	arena_lg_dirty_mult_default_get(void);
 bool	arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult);
 ssize_t	arena_decay_time_default_get(void);
 bool	arena_decay_time_default_set(ssize_t decay_time);
-void	arena_basic_stats_merge(arena_t *arena, unsigned *nthreads,
+void	arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
+    unsigned *nthreads, const char **dss, ssize_t *lg_dirty_mult,
+    ssize_t *decay_time, size_t *nactive, size_t *ndirty);
+void	arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
-    size_t *nactive, size_t *ndirty);
-void	arena_stats_merge(arena_t *arena, unsigned *nthreads, const char **dss,
-    ssize_t *lg_dirty_mult, ssize_t *decay_time, size_t *nactive,
-    size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats,
-    malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats);
-unsigned	arena_nthreads_get(arena_t *arena);
-void	arena_nthreads_inc(arena_t *arena);
-void	arena_nthreads_dec(arena_t *arena);
-arena_t	*arena_new(unsigned ind);
+    size_t *nactive, size_t *ndirty, arena_stats_t *astats,
+    malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats,
+    malloc_huge_stats_t *hstats);
+unsigned	arena_nthreads_get(arena_t *arena, bool internal);
+void	arena_nthreads_inc(arena_t *arena, bool internal);
+void	arena_nthreads_dec(arena_t *arena, bool internal);
+arena_t	*arena_new(tsdn_t *tsdn, unsigned ind);
 bool	arena_boot(void);
-void	arena_prefork0(arena_t *arena);
-void	arena_prefork1(arena_t *arena);
-void	arena_prefork2(arena_t *arena);
-void	arena_prefork3(arena_t *arena);
-void	arena_postfork_parent(arena_t *arena);
-void	arena_postfork_child(arena_t *arena);
+void	arena_prefork0(tsdn_t *tsdn, arena_t *arena);
+void	arena_prefork1(tsdn_t *tsdn, arena_t *arena);
+void	arena_prefork2(tsdn_t *tsdn, arena_t *arena);
+void	arena_prefork3(tsdn_t *tsdn, arena_t *arena);
+void	arena_postfork_parent(tsdn_t *tsdn, arena_t *arena);
+void	arena_postfork_child(tsdn_t *tsdn, arena_t *arena);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
-arena_chunk_map_bits_t	*arena_bitselm_get(arena_chunk_t *chunk,
+arena_chunk_map_bits_t	*arena_bitselm_get_mutable(arena_chunk_t *chunk,
     size_t pageind);
-arena_chunk_map_misc_t	*arena_miscelm_get(arena_chunk_t *chunk,
+const arena_chunk_map_bits_t	*arena_bitselm_get_const(
+    const arena_chunk_t *chunk, size_t pageind);
+arena_chunk_map_misc_t	*arena_miscelm_get_mutable(arena_chunk_t *chunk,
     size_t pageind);
+const arena_chunk_map_misc_t	*arena_miscelm_get_const(
+    const arena_chunk_t *chunk, size_t pageind);
 size_t	arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm);
-void	*arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm);
+void	*arena_miscelm_to_rpages(const arena_chunk_map_misc_t *miscelm);
 arena_chunk_map_misc_t	*arena_rd_to_miscelm(arena_runs_dirty_link_t *rd);
 arena_chunk_map_misc_t	*arena_run_to_miscelm(arena_run_t *run);
-size_t	*arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbitsp_read(size_t *mapbitsp);
-size_t	arena_mapbits_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_size_decode(size_t mapbits);
-size_t	arena_mapbits_unallocated_size_get(arena_chunk_t *chunk,
+size_t	*arena_mapbitsp_get_mutable(arena_chunk_t *chunk, size_t pageind);
+const size_t	*arena_mapbitsp_get_const(const arena_chunk_t *chunk,
     size_t pageind);
-size_t	arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind);
-szind_t	arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_decommitted_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbitsp_read(const size_t *mapbitsp);
+size_t	arena_mapbits_get(const arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_size_decode(size_t mapbits);
+size_t	arena_mapbits_unallocated_size_get(const arena_chunk_t *chunk,
+    size_t pageind);
+size_t	arena_mapbits_large_size_get(const arena_chunk_t *chunk,
+    size_t pageind);
+size_t	arena_mapbits_small_runind_get(const arena_chunk_t *chunk,
+    size_t pageind);
+szind_t	arena_mapbits_binind_get(const arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_dirty_get(const arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_unzeroed_get(const arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_decommitted_get(const arena_chunk_t *chunk,
+    size_t pageind);
+size_t	arena_mapbits_large_get(const arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_allocated_get(const arena_chunk_t *chunk, size_t pageind);
 void	arena_mapbitsp_write(size_t *mapbitsp, size_t mapbits);
 size_t	arena_mapbits_size_encode(size_t size);
 void	arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind,
@@ -637,29 +664,31 @@
 size_t	arena_metadata_allocated_get(arena_t *arena);
 bool	arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes);
 bool	arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes);
-bool	arena_prof_accum(arena_t *arena, uint64_t accumbytes);
+bool	arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes);
 szind_t	arena_ptr_small_binind_get(const void *ptr, size_t mapbits);
 szind_t	arena_bin_index(arena_t *arena, arena_bin_t *bin);
 size_t	arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
     const void *ptr);
-prof_tctx_t	*arena_prof_tctx_get(const void *ptr);
-void	arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx);
-void	arena_prof_tctx_reset(const void *ptr, size_t usize,
+prof_tctx_t	*arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr);
+void	arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
+    prof_tctx_t *tctx);
+void	arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize,
     const void *old_ptr, prof_tctx_t *old_tctx);
-void	arena_decay_ticks(tsd_t *tsd, arena_t *arena, unsigned nticks);
-void	arena_decay_tick(tsd_t *tsd, arena_t *arena);
-void	*arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
+void	arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks);
+void	arena_decay_tick(tsdn_t *tsdn, arena_t *arena);
+void	*arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
     bool zero, tcache_t *tcache, bool slow_path);
 arena_t	*arena_aalloc(const void *ptr);
-size_t	arena_salloc(const void *ptr, bool demote);
-void	arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path);
-void	arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
+size_t	arena_salloc(tsdn_t *tsdn, const void *ptr, bool demote);
+void	arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path);
+void	arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
+    bool slow_path);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
 #  ifdef JEMALLOC_ARENA_INLINE_A
 JEMALLOC_ALWAYS_INLINE arena_chunk_map_bits_t *
-arena_bitselm_get(arena_chunk_t *chunk, size_t pageind)
+arena_bitselm_get_mutable(arena_chunk_t *chunk, size_t pageind)
 {
 
 	assert(pageind >= map_bias);
@@ -668,8 +697,15 @@
 	return (&chunk->map_bits[pageind-map_bias]);
 }
 
+JEMALLOC_ALWAYS_INLINE const arena_chunk_map_bits_t *
+arena_bitselm_get_const(const arena_chunk_t *chunk, size_t pageind)
+{
+
+	return (arena_bitselm_get_mutable((arena_chunk_t *)chunk, pageind));
+}
+
 JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
-arena_miscelm_get(arena_chunk_t *chunk, size_t pageind)
+arena_miscelm_get_mutable(arena_chunk_t *chunk, size_t pageind)
 {
 
 	assert(pageind >= map_bias);
@@ -679,6 +715,13 @@
 	    (uintptr_t)map_misc_offset) + pageind-map_bias);
 }
 
+JEMALLOC_ALWAYS_INLINE const arena_chunk_map_misc_t *
+arena_miscelm_get_const(const arena_chunk_t *chunk, size_t pageind)
+{
+
+	return (arena_miscelm_get_mutable((arena_chunk_t *)chunk, pageind));
+}
+
 JEMALLOC_ALWAYS_INLINE size_t
 arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm)
 {
@@ -693,7 +736,7 @@
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm)
+arena_miscelm_to_rpages(const arena_chunk_map_misc_t *miscelm)
 {
 	arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm);
 	size_t pageind = arena_miscelm_to_pageind(miscelm);
@@ -726,24 +769,31 @@
 }
 
 JEMALLOC_ALWAYS_INLINE size_t *
-arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbitsp_get_mutable(arena_chunk_t *chunk, size_t pageind)
 {
 
-	return (&arena_bitselm_get(chunk, pageind)->bits);
+	return (&arena_bitselm_get_mutable(chunk, pageind)->bits);
+}
+
+JEMALLOC_ALWAYS_INLINE const size_t *
+arena_mapbitsp_get_const(const arena_chunk_t *chunk, size_t pageind)
+{
+
+	return (arena_mapbitsp_get_mutable((arena_chunk_t *)chunk, pageind));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbitsp_read(size_t *mapbitsp)
+arena_mapbitsp_read(const size_t *mapbitsp)
 {
 
 	return (*mapbitsp);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_get(const arena_chunk_t *chunk, size_t pageind)
 {
 
-	return (arena_mapbitsp_read(arena_mapbitsp_get(chunk, pageind)));
+	return (arena_mapbitsp_read(arena_mapbitsp_get_const(chunk, pageind)));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
@@ -763,7 +813,7 @@
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_unallocated_size_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
@@ -773,7 +823,7 @@
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_large_size_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
@@ -784,7 +834,7 @@
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_small_runind_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
@@ -795,7 +845,7 @@
 }
 
 JEMALLOC_ALWAYS_INLINE szind_t
-arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_binind_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 	szind_t binind;
@@ -807,7 +857,7 @@
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_dirty_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
@@ -818,7 +868,7 @@
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_unzeroed_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
@@ -829,7 +879,7 @@
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_decommitted_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_decommitted_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
@@ -840,7 +890,7 @@
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_large_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
@@ -849,7 +899,7 @@
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_allocated_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
@@ -885,7 +935,7 @@
 arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size,
     size_t flags)
 {
-	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
 
 	assert((size & PAGE_MASK) == 0);
 	assert((flags & CHUNK_MAP_FLAGS_MASK) == flags);
@@ -899,7 +949,7 @@
 arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind,
     size_t size)
 {
-	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
 	size_t mapbits = arena_mapbitsp_read(mapbitsp);
 
 	assert((size & PAGE_MASK) == 0);
@@ -911,7 +961,7 @@
 JEMALLOC_ALWAYS_INLINE void
 arena_mapbits_internal_set(arena_chunk_t *chunk, size_t pageind, size_t flags)
 {
-	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
 
 	assert((flags & CHUNK_MAP_UNZEROED) == flags);
 	arena_mapbitsp_write(mapbitsp, flags);
@@ -921,7 +971,7 @@
 arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size,
     size_t flags)
 {
-	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
 
 	assert((size & PAGE_MASK) == 0);
 	assert((flags & CHUNK_MAP_FLAGS_MASK) == flags);
@@ -936,7 +986,7 @@
 arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind,
     szind_t binind)
 {
-	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
 	size_t mapbits = arena_mapbitsp_read(mapbitsp);
 
 	assert(binind <= BININD_INVALID);
@@ -950,7 +1000,7 @@
 arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind,
     szind_t binind, size_t flags)
 {
-	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
 
 	assert(binind < BININD_INVALID);
 	assert(pageind - runind >= map_bias);
@@ -1007,7 +1057,7 @@
 }
 
 JEMALLOC_INLINE bool
-arena_prof_accum(arena_t *arena, uint64_t accumbytes)
+arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes)
 {
 
 	cassert(config_prof);
@@ -1018,9 +1068,9 @@
 	{
 		bool ret;
 
-		malloc_mutex_lock(&arena->lock);
+		malloc_mutex_lock(tsdn, &arena->lock);
 		ret = arena_prof_accum_impl(arena, accumbytes);
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 		return (ret);
 	}
 }
@@ -1038,12 +1088,12 @@
 		size_t pageind;
 		size_t actual_mapbits;
 		size_t rpages_ind;
-		arena_run_t *run;
+		const arena_run_t *run;
 		arena_bin_t *bin;
 		szind_t run_binind, actual_binind;
 		arena_bin_info_t *bin_info;
-		arena_chunk_map_misc_t *miscelm;
-		void *rpages;
+		const arena_chunk_map_misc_t *miscelm;
+		const void *rpages;
 
 		assert(binind != BININD_INVALID);
 		assert(binind < NBINS);
@@ -1056,7 +1106,7 @@
 		assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
 		rpages_ind = pageind - arena_mapbits_small_runind_get(chunk,
 		    pageind);
-		miscelm = arena_miscelm_get(chunk, rpages_ind);
+		miscelm = arena_miscelm_get_const(chunk, rpages_ind);
 		run = &miscelm->run;
 		run_binind = run->binind;
 		bin = &arena->bins[run_binind];
@@ -1156,7 +1206,7 @@
 }
 
 JEMALLOC_INLINE prof_tctx_t *
-arena_prof_tctx_get(const void *ptr)
+arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr)
 {
 	prof_tctx_t *ret;
 	arena_chunk_t *chunk;
@@ -1172,18 +1222,19 @@
 		if (likely((mapbits & CHUNK_MAP_LARGE) == 0))
 			ret = (prof_tctx_t *)(uintptr_t)1U;
 		else {
-			arena_chunk_map_misc_t *elm = arena_miscelm_get(chunk,
-			    pageind);
+			arena_chunk_map_misc_t *elm =
+			    arena_miscelm_get_mutable(chunk, pageind);
 			ret = atomic_read_p(&elm->prof_tctx_pun);
 		}
 	} else
-		ret = huge_prof_tctx_get(ptr);
+		ret = huge_prof_tctx_get(tsdn, ptr);
 
 	return (ret);
 }
 
 JEMALLOC_INLINE void
-arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx)
+arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
+    prof_tctx_t *tctx)
 {
 	arena_chunk_t *chunk;
 
@@ -1202,7 +1253,7 @@
 
 			assert(arena_mapbits_large_get(chunk, pageind) != 0);
 
-			elm = arena_miscelm_get(chunk, pageind);
+			elm = arena_miscelm_get_mutable(chunk, pageind);
 			atomic_write_p(&elm->prof_tctx_pun, tctx);
 		} else {
 			/*
@@ -1214,12 +1265,12 @@
 			assert(arena_mapbits_large_get(chunk, pageind) == 0);
 		}
 	} else
-		huge_prof_tctx_set(ptr, tctx);
+		huge_prof_tctx_set(tsdn, ptr, tctx);
 }
 
 JEMALLOC_INLINE void
-arena_prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
-    prof_tctx_t *old_tctx)
+arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize,
+    const void *old_ptr, prof_tctx_t *old_tctx)
 {
 
 	cassert(config_prof);
@@ -1238,56 +1289,59 @@
 			    0);
 			assert(arena_mapbits_large_get(chunk, pageind) != 0);
 
-			elm = arena_miscelm_get(chunk, pageind);
+			elm = arena_miscelm_get_mutable(chunk, pageind);
 			atomic_write_p(&elm->prof_tctx_pun,
 			    (prof_tctx_t *)(uintptr_t)1U);
 		} else
-			huge_prof_tctx_reset(ptr);
+			huge_prof_tctx_reset(tsdn, ptr);
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_decay_ticks(tsd_t *tsd, arena_t *arena, unsigned nticks)
+arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks)
 {
+	tsd_t *tsd;
 	ticker_t *decay_ticker;
 
-	if (unlikely(tsd == NULL))
+	if (unlikely(tsdn_null(tsdn)))
 		return;
+	tsd = tsdn_tsd(tsdn);
 	decay_ticker = decay_ticker_get(tsd, arena->ind);
 	if (unlikely(decay_ticker == NULL))
 		return;
 	if (unlikely(ticker_ticks(decay_ticker, nticks)))
-		arena_purge(arena, false);
+		arena_purge(tsdn, arena, false);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_decay_tick(tsd_t *tsd, arena_t *arena)
+arena_decay_tick(tsdn_t *tsdn, arena_t *arena)
 {
 
-	arena_decay_ticks(tsd, arena, 1);
+	arena_decay_ticks(tsdn, arena, 1);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, bool zero,
+arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
     tcache_t *tcache, bool slow_path)
 {
 
+	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(size != 0);
 
 	if (likely(tcache != NULL)) {
 		if (likely(size <= SMALL_MAXCLASS)) {
-			return (tcache_alloc_small(tsd, arena, tcache, size,
-			    ind, zero, slow_path));
+			return (tcache_alloc_small(tsdn_tsd(tsdn), arena,
+			    tcache, size, ind, zero, slow_path));
 		}
 		if (likely(size <= tcache_maxclass)) {
-			return (tcache_alloc_large(tsd, arena, tcache, size,
-			    ind, zero, slow_path));
+			return (tcache_alloc_large(tsdn_tsd(tsdn), arena,
+			    tcache, size, ind, zero, slow_path));
 		}
 		/* (size > tcache_maxclass) case falls through. */
 		assert(size > tcache_maxclass);
 	}
 
-	return (arena_malloc_hard(tsd, arena, size, ind, zero, tcache));
+	return (arena_malloc_hard(tsdn, arena, size, ind, zero));
 }
 
 JEMALLOC_ALWAYS_INLINE arena_t *
@@ -1304,7 +1358,7 @@
 
 /* Return the size of the allocation pointed to by ptr. */
 JEMALLOC_ALWAYS_INLINE size_t
-arena_salloc(const void *ptr, bool demote)
+arena_salloc(tsdn_t *tsdn, const void *ptr, bool demote)
 {
 	size_t ret;
 	arena_chunk_t *chunk;
@@ -1347,17 +1401,18 @@
 			ret = index2size(binind);
 		}
 	} else
-		ret = huge_salloc(ptr);
+		ret = huge_salloc(tsdn, ptr);
 
 	return (ret);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
+arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path)
 {
 	arena_chunk_t *chunk;
 	size_t pageind, mapbits;
 
+	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(ptr != NULL);
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
@@ -1370,11 +1425,12 @@
 			if (likely(tcache != NULL)) {
 				szind_t binind = arena_ptr_small_binind_get(ptr,
 				    mapbits);
-				tcache_dalloc_small(tsd, tcache, ptr, binind,
-				    slow_path);
+				tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr,
+				    binind, slow_path);
 			} else {
-				arena_dalloc_small(tsd, extent_node_arena_get(
-				    &chunk->node), chunk, ptr, pageind);
+				arena_dalloc_small(tsdn,
+				    extent_node_arena_get(&chunk->node), chunk,
+				    ptr, pageind);
 			}
 		} else {
 			size_t size = arena_mapbits_large_size_get(chunk,
@@ -1385,22 +1441,26 @@
 
 			if (likely(tcache != NULL) && size - large_pad <=
 			    tcache_maxclass) {
-				tcache_dalloc_large(tsd, tcache, ptr, size -
-				    large_pad, slow_path);
+				tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
+				    size - large_pad, slow_path);
 			} else {
-				arena_dalloc_large(tsd, extent_node_arena_get(
-				    &chunk->node), chunk, ptr);
+				arena_dalloc_large(tsdn,
+				    extent_node_arena_get(&chunk->node), chunk,
+				    ptr);
 			}
 		}
 	} else
-		huge_dalloc(tsd, ptr, tcache);
+		huge_dalloc(tsdn, ptr);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
+arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
+    bool slow_path)
 {
 	arena_chunk_t *chunk;
 
+	assert(!tsdn_null(tsdn) || tcache == NULL);
+
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	if (likely(chunk != ptr)) {
 		if (config_prof && opt_prof) {
@@ -1417,34 +1477,36 @@
 				    pageind) - large_pad;
 			}
 		}
-		assert(s2u(size) == s2u(arena_salloc(ptr, false)));
+		assert(s2u(size) == s2u(arena_salloc(tsdn, ptr, false)));
 
 		if (likely(size <= SMALL_MAXCLASS)) {
 			/* Small allocation. */
 			if (likely(tcache != NULL)) {
 				szind_t binind = size2index(size);
-				tcache_dalloc_small(tsd, tcache, ptr, binind,
-				    true);
+				tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr,
+				    binind, slow_path);
 			} else {
 				size_t pageind = ((uintptr_t)ptr -
 				    (uintptr_t)chunk) >> LG_PAGE;
-				arena_dalloc_small(tsd, extent_node_arena_get(
-				    &chunk->node), chunk, ptr, pageind);
+				arena_dalloc_small(tsdn,
+				    extent_node_arena_get(&chunk->node), chunk,
+				    ptr, pageind);
 			}
 		} else {
 			assert(config_cache_oblivious || ((uintptr_t)ptr &
 			    PAGE_MASK) == 0);
 
 			if (likely(tcache != NULL) && size <= tcache_maxclass) {
-				tcache_dalloc_large(tsd, tcache, ptr, size,
-				    true);
+				tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
+				    size, slow_path);
 			} else {
-				arena_dalloc_large(tsd, extent_node_arena_get(
-				    &chunk->node), chunk, ptr);
+				arena_dalloc_large(tsdn,
+				    extent_node_arena_get(&chunk->node), chunk,
+				    ptr);
 			}
 		}
 	} else
-		huge_dalloc(tsd, ptr, tcache);
+		huge_dalloc(tsdn, ptr);
 }
 #  endif /* JEMALLOC_ARENA_INLINE_B */
 #endif
diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h
index 39e46ee..d6b81e1 100644
--- a/include/jemalloc/internal/base.h
+++ b/include/jemalloc/internal/base.h
@@ -9,12 +9,13 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-void	*base_alloc(size_t size);
-void	base_stats_get(size_t *allocated, size_t *resident, size_t *mapped);
+void	*base_alloc(tsdn_t *tsdn, size_t size);
+void	base_stats_get(tsdn_t *tsdn, size_t *allocated, size_t *resident,
+    size_t *mapped);
 bool	base_boot(void);
-void	base_prefork(void);
-void	base_postfork_parent(void);
-void	base_postfork_child(void);
+void	base_prefork(tsdn_t *tsdn);
+void	base_postfork_parent(tsdn_t *tsdn);
+void	base_postfork_child(tsdn_t *tsdn);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h
index 0e0d247..36f38b5 100644
--- a/include/jemalloc/internal/bitmap.h
+++ b/include/jemalloc/internal/bitmap.h
@@ -17,8 +17,8 @@
 
 /*
  * Do some analysis on how big the bitmap is before we use a tree.  For a brute
- * force linear search, if we would have to call ffsl more than 2^3 times, use a
- * tree instead.
+ * force linear search, if we would have to call ffs_lu() more than 2^3 times,
+ * use a tree instead.
  */
 #if LG_BITMAP_MAXBITS - LG_BITMAP_GROUP_NBITS > 3
 #  define USE_TREE
@@ -223,7 +223,7 @@
 		i++;
 		g = bitmap[i];
 	}
-	bit = (bit - 1) + (i << LG_BITMAP_GROUP_NBITS);
+	bit = (i << LG_BITMAP_GROUP_NBITS) + (bit - 1);
 #endif
 	bitmap_set(bitmap, binfo, bit);
 	return (bit);
diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index d800478..c9fd4ec 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -48,28 +48,32 @@
 
 extern const chunk_hooks_t	chunk_hooks_default;
 
-chunk_hooks_t	chunk_hooks_get(arena_t *arena);
-chunk_hooks_t	chunk_hooks_set(arena_t *arena,
+chunk_hooks_t	chunk_hooks_get(tsdn_t *tsdn, arena_t *arena);
+chunk_hooks_t	chunk_hooks_set(tsdn_t *tsdn, arena_t *arena,
     const chunk_hooks_t *chunk_hooks);
 
-bool	chunk_register(const void *chunk, const extent_node_t *node);
+bool	chunk_register(tsdn_t *tsdn, const void *chunk,
+    const extent_node_t *node);
 void	chunk_deregister(const void *chunk, const extent_node_t *node);
 void	*chunk_alloc_base(size_t size);
-void	*chunk_alloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *new_addr, size_t size, size_t alignment, bool *zero,
-    bool dalloc_node);
-void	*chunk_alloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit);
-void	chunk_dalloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *chunk, size_t size, bool committed);
-void	chunk_dalloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *chunk, size_t size, bool zeroed, bool committed);
-bool	chunk_purge_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *chunk, size_t size, size_t offset, size_t length);
+void	*chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
+    bool *zero, bool dalloc_node);
+void	*chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
+    bool *zero, bool *commit);
+void	chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *chunk, size_t size, bool committed);
+void	chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *chunk, size_t size, bool zeroed,
+    bool committed);
+bool	chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t offset,
+    size_t length);
 bool	chunk_boot(void);
-void	chunk_prefork(void);
-void	chunk_postfork_parent(void);
-void	chunk_postfork_child(void);
+void	chunk_prefork(tsdn_t *tsdn);
+void	chunk_postfork_parent(tsdn_t *tsdn);
+void	chunk_postfork_child(tsdn_t *tsdn);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/chunk_dss.h b/include/jemalloc/internal/chunk_dss.h
index 388f46b..724fa57 100644
--- a/include/jemalloc/internal/chunk_dss.h
+++ b/include/jemalloc/internal/chunk_dss.h
@@ -21,15 +21,15 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-dss_prec_t	chunk_dss_prec_get(void);
-bool	chunk_dss_prec_set(dss_prec_t dss_prec);
-void	*chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size,
-    size_t alignment, bool *zero, bool *commit);
-bool	chunk_in_dss(void *chunk);
+dss_prec_t	chunk_dss_prec_get(tsdn_t *tsdn);
+bool	chunk_dss_prec_set(tsdn_t *tsdn, dss_prec_t dss_prec);
+void	*chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr,
+    size_t size, size_t alignment, bool *zero, bool *commit);
+bool	chunk_in_dss(tsdn_t *tsdn, void *chunk);
 bool	chunk_dss_boot(void);
-void	chunk_dss_prefork(void);
-void	chunk_dss_postfork_parent(void);
-void	chunk_dss_postfork_child(void);
+void	chunk_dss_prefork(tsdn_t *tsdn);
+void	chunk_dss_postfork_parent(tsdn_t *tsdn);
+void	chunk_dss_postfork_child(tsdn_t *tsdn);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h
index f75ad90..46e151c 100644
--- a/include/jemalloc/internal/ckh.h
+++ b/include/jemalloc/internal/ckh.h
@@ -64,13 +64,13 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-bool	ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
+bool	ckh_new(tsdn_t *tsdn, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
     ckh_keycomp_t *keycomp);
-void	ckh_delete(tsd_t *tsd, ckh_t *ckh);
+void	ckh_delete(tsdn_t *tsdn, ckh_t *ckh);
 size_t	ckh_count(ckh_t *ckh);
 bool	ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data);
-bool	ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data);
-bool	ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
+bool	ckh_insert(tsdn_t *tsdn, ckh_t *ckh, const void *key, const void *data);
+bool	ckh_remove(tsdn_t *tsdn, ckh_t *ckh, const void *searchkey, void **key,
     void **data);
 bool	ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data);
 void	ckh_string_hash(const void *key, size_t r_hash[2]);
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 9c5e932..af0f6d7 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -21,13 +21,14 @@
 	/* If (nchildren == 0), this is a terminal node. */
 	unsigned		nchildren;
 	const			ctl_node_t *children;
-	int			(*ctl)(const size_t *, size_t, void *, size_t *,
-	    void *, size_t);
+	int			(*ctl)(tsd_t *, const size_t *, size_t, void *,
+	    size_t *, void *, size_t);
 };
 
 struct ctl_indexed_node_s {
 	struct ctl_node_s	node;
-	const ctl_named_node_t	*(*index)(const size_t *, size_t, size_t);
+	const ctl_named_node_t	*(*index)(tsdn_t *, const size_t *, size_t,
+	    size_t);
 };
 
 struct ctl_arena_stats_s {
@@ -60,6 +61,7 @@
 	size_t			metadata;
 	size_t			resident;
 	size_t			mapped;
+	size_t			retained;
 	unsigned		narenas;
 	ctl_arena_stats_t	*arenas;	/* (narenas + 1) elements. */
 };
@@ -68,16 +70,17 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-int	ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen);
-int	ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp);
-
-int	ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
+int	ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
     void *newp, size_t newlen);
+int	ctl_nametomib(tsdn_t *tsdn, const char *name, size_t *mibp,
+    size_t *miblenp);
+
+int	ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen);
 bool	ctl_boot(void);
-void	ctl_prefork(void);
-void	ctl_postfork_parent(void);
-void	ctl_postfork_child(void);
+void	ctl_prefork(tsdn_t *tsdn);
+void	ctl_postfork_parent(tsdn_t *tsdn);
+void	ctl_postfork_child(tsdn_t *tsdn);
 
 #define	xmallctl(name, oldp, oldlenp, newp, newlen) do {		\
 	if (je_mallctl(name, oldp, oldlenp, newp, newlen)		\
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 386d50e..49d76a5 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -48,7 +48,7 @@
 		/* Linkage for the size/address-ordered tree. */
 		rb_node(extent_node_t)	szad_link;
 
-		/* Linkage for arena's huge and node_cache lists. */
+		/* Linkage for arena's achunks, huge, and node_cache lists. */
 		ql_elm(extent_node_t)	ql_link;
 	};
 
diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h
index cb6f69e..b5fa9e6 100644
--- a/include/jemalloc/internal/huge.h
+++ b/include/jemalloc/internal/huge.h
@@ -9,24 +9,23 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-void	*huge_malloc(tsd_t *tsd, arena_t *arena, size_t usize, bool zero,
-    tcache_t *tcache);
-void	*huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
-    bool zero, tcache_t *tcache);
-bool	huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize,
+void	*huge_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero);
+void	*huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
+    size_t alignment, bool zero);
+bool	huge_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize,
     size_t usize_min, size_t usize_max, bool zero);
 void	*huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
     size_t usize, size_t alignment, bool zero, tcache_t *tcache);
 #ifdef JEMALLOC_JET
-typedef void (huge_dalloc_junk_t)(void *, size_t);
+typedef void (huge_dalloc_junk_t)(tsdn_t *, void *, size_t);
 extern huge_dalloc_junk_t *huge_dalloc_junk;
 #endif
-void	huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache);
+void	huge_dalloc(tsdn_t *tsdn, void *ptr);
 arena_t	*huge_aalloc(const void *ptr);
-size_t	huge_salloc(const void *ptr);
-prof_tctx_t	*huge_prof_tctx_get(const void *ptr);
-void	huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx);
-void	huge_prof_tctx_reset(const void *ptr);
+size_t	huge_salloc(tsdn_t *tsdn, const void *ptr);
+prof_tctx_t	*huge_prof_tctx_get(tsdn_t *tsdn, const void *ptr);
+void	huge_prof_tctx_set(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx);
+void	huge_prof_tctx_reset(tsdn_t *tsdn, const void *ptr);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 3f54391..51bf897 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -161,6 +161,7 @@
 #include <malloc/malloc.h>
 #endif
 
+#include "jemalloc/internal/ph.h"
 #define	RB_COMPACT
 #include "jemalloc/internal/rb.h"
 #include "jemalloc/internal/qr.h"
@@ -257,6 +258,9 @@
 #  ifdef __powerpc__
 #    define LG_QUANTUM		4
 #  endif
+#  ifdef __riscv__
+#    define LG_QUANTUM		4
+#  endif
 #  ifdef __s390__
 #    define LG_QUANTUM		4
 #  endif
@@ -367,6 +371,7 @@
 #include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/witness.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/tsd.h"
 #include "jemalloc/internal/mb.h"
@@ -398,6 +403,7 @@
 #include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/witness.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/bitmap.h"
@@ -440,6 +446,9 @@
 /* Number of CPUs. */
 extern unsigned	ncpus;
 
+/* Number of arenas used for automatic multiplexing of threads and arenas. */
+extern unsigned	narenas_auto;
+
 /*
  * Arenas that are used to service external requests.  Not all elements of the
  * arenas array are necessarily used; arenas are created lazily as needed.
@@ -463,14 +472,14 @@
 void	*bootstrap_malloc(size_t size);
 void	*bootstrap_calloc(size_t num, size_t size);
 void	bootstrap_free(void *ptr);
-arena_t	*arenas_extend(unsigned ind);
 unsigned	narenas_total_get(void);
-arena_t	*arena_init(unsigned ind);
+arena_t	*arena_init(tsdn_t *tsdn, unsigned ind);
 arena_tdata_t	*arena_tdata_get_hard(tsd_t *tsd, unsigned ind);
-arena_t	*arena_choose_hard(tsd_t *tsd);
+arena_t	*arena_choose_hard(tsd_t *tsd, bool internal);
 void	arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind);
 void	thread_allocated_cleanup(tsd_t *tsd);
 void	thread_deallocated_cleanup(tsd_t *tsd);
+void	iarena_cleanup(tsd_t *tsd);
 void	arena_cleanup(tsd_t *tsd);
 void	arenas_tdata_cleanup(tsd_t *tsd);
 void	narenas_tdata_cleanup(tsd_t *tsd);
@@ -490,6 +499,7 @@
 #include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/witness.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/bitmap.h"
@@ -521,8 +531,9 @@
 #include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
-#include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/tsd.h"
+#include "jemalloc/internal/witness.h"
+#include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/base.h"
@@ -542,10 +553,12 @@
 size_t	s2u_lookup(size_t size);
 size_t	s2u(size_t size);
 size_t	sa2u(size_t size, size_t alignment);
+arena_t	*arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal);
 arena_t	*arena_choose(tsd_t *tsd, arena_t *arena);
+arena_t	*arena_ichoose(tsdn_t *tsdn, arena_t *arena);
 arena_tdata_t	*arena_tdata_get(tsd_t *tsd, unsigned ind,
     bool refresh_if_missing);
-arena_t	*arena_get(unsigned ind, bool init_if_missing);
+arena_t	*arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing);
 ticker_t	*decay_ticker_get(tsd_t *tsd, unsigned ind);
 #endif
 
@@ -741,7 +754,7 @@
 		 * Calculate the size of the over-size run that arena_palloc()
 		 * would need to allocate in order to guarantee the alignment.
 		 */
-		if (usize + large_pad + alignment - PAGE <= arena_maxrun)
+		if (usize + large_pad + alignment <= arena_maxrun)
 			return (usize);
 	}
 
@@ -771,7 +784,7 @@
 	 * Calculate the multi-chunk mapping that huge_palloc() would need in
 	 * order to guarantee the alignment.
 	 */
-	if (usize + alignment - PAGE < usize) {
+	if (usize + alignment < usize) {
 		/* size_t overflow. */
 		return (0);
 	}
@@ -780,19 +793,38 @@
 
 /* Choose an arena based on a per-thread value. */
 JEMALLOC_INLINE arena_t *
-arena_choose(tsd_t *tsd, arena_t *arena)
+arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal)
 {
 	arena_t *ret;
 
 	if (arena != NULL)
 		return (arena);
 
-	if (unlikely((ret = tsd_arena_get(tsd)) == NULL))
-		ret = arena_choose_hard(tsd);
+	ret = internal ? tsd_iarena_get(tsd) : tsd_arena_get(tsd);
+	if (unlikely(ret == NULL))
+		ret = arena_choose_hard(tsd, internal);
 
 	return (ret);
 }
 
+JEMALLOC_INLINE arena_t *
+arena_choose(tsd_t *tsd, arena_t *arena)
+{
+
+	return (arena_choose_impl(tsd, arena, false));
+}
+
+JEMALLOC_INLINE arena_t *
+arena_ichoose(tsdn_t *tsdn, arena_t *arena)
+{
+
+	assert(!tsdn_null(tsdn) || arena != NULL);
+
+	if (!tsdn_null(tsdn))
+		return (arena_choose_impl(tsdn_tsd(tsdn), NULL, true));
+	return (arena);
+}
+
 JEMALLOC_INLINE arena_tdata_t *
 arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing)
 {
@@ -819,7 +851,7 @@
 }
 
 JEMALLOC_INLINE arena_t *
-arena_get(unsigned ind, bool init_if_missing)
+arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing)
 {
 	arena_t *ret;
 
@@ -829,7 +861,7 @@
 	if (unlikely(ret == NULL)) {
 		ret = atomic_read_p((void *)&arenas[ind]);
 		if (init_if_missing && unlikely(ret == NULL))
-			ret = arena_init(ind);
+			ret = arena_init(tsdn, ind);
 	}
 	return (ret);
 }
@@ -863,30 +895,27 @@
 
 #ifndef JEMALLOC_ENABLE_INLINE
 arena_t	*iaalloc(const void *ptr);
-size_t	isalloc(const void *ptr, bool demote);
-void	*iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero,
+size_t	isalloc(tsdn_t *tsdn, const void *ptr, bool demote);
+void	*iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero,
     tcache_t *tcache, bool is_metadata, arena_t *arena, bool slow_path);
-void	*imalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache,
-    arena_t *arena);
-void	*imalloc(tsd_t *tsd, size_t size, szind_t ind, bool slow_path);
-void	*icalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache,
-    arena_t *arena);
-void	*icalloc(tsd_t *tsd, size_t size, szind_t ind);
-void	*ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
+void	*ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero,
+    bool slow_path);
+void	*ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, bool is_metadata, arena_t *arena);
-void	*ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
+void	*ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, arena_t *arena);
 void	*ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero);
-size_t	ivsalloc(const void *ptr, bool demote);
+size_t	ivsalloc(tsdn_t *tsdn, const void *ptr, bool demote);
 size_t	u2rz(size_t usize);
-size_t	p2rz(const void *ptr);
-void	idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata,
+size_t	p2rz(tsdn_t *tsdn, const void *ptr);
+void	idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool is_metadata,
     bool slow_path);
-void	idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache);
 void	idalloc(tsd_t *tsd, void *ptr);
 void	iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path);
-void	isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
-void	isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
+void	isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
+    bool slow_path);
+void	isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache,
+    bool slow_path);
 void	*iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
     size_t extra, size_t alignment, bool zero, tcache_t *tcache,
     arena_t *arena);
@@ -894,7 +923,7 @@
     size_t alignment, bool zero, tcache_t *tcache, arena_t *arena);
 void	*iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
     size_t alignment, bool zero);
-bool	ixalloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
+bool	ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, size_t alignment, bool zero);
 #endif
 
@@ -910,102 +939,85 @@
 
 /*
  * Typical usage:
+ *   tsdn_t *tsdn = [...]
  *   void *ptr = [...]
- *   size_t sz = isalloc(ptr, config_prof);
+ *   size_t sz = isalloc(tsdn, ptr, config_prof);
  */
 JEMALLOC_ALWAYS_INLINE size_t
-isalloc(const void *ptr, bool demote)
+isalloc(tsdn_t *tsdn, const void *ptr, bool demote)
 {
 
 	assert(ptr != NULL);
 	/* Demotion only makes sense if config_prof is true. */
 	assert(config_prof || !demote);
 
-	return (arena_salloc(ptr, demote));
+	return (arena_salloc(tsdn, ptr, demote));
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero, tcache_t *tcache,
+iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
     bool is_metadata, arena_t *arena, bool slow_path)
 {
 	void *ret;
 
 	assert(size != 0);
+	assert(!is_metadata || tcache == NULL);
+	assert(!is_metadata || arena == NULL || arena->ind < narenas_auto);
 
-	ret = arena_malloc(tsd, arena, size, ind, zero, tcache, slow_path);
+	ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path);
 	if (config_stats && is_metadata && likely(ret != NULL)) {
-		arena_metadata_allocated_add(iaalloc(ret), isalloc(ret,
-		    config_prof));
+		arena_metadata_allocated_add(iaalloc(ret),
+		    isalloc(tsdn, ret, config_prof));
 	}
 	return (ret);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-imalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache, arena_t *arena)
+ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path)
 {
 
-	return (iallocztm(tsd, size, ind, false, tcache, false, arena, true));
+	return (iallocztm(tsd_tsdn(tsd), size, ind, zero, tcache_get(tsd, true),
+	    false, NULL, slow_path));
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-imalloc(tsd_t *tsd, size_t size, szind_t ind, bool slow_path)
-{
-
-	return (iallocztm(tsd, size, ind, false, tcache_get(tsd, true), false,
-	    NULL, slow_path));
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-icalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache, arena_t *arena)
-{
-
-	return (iallocztm(tsd, size, ind, true, tcache, false, arena, true));
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-icalloc(tsd_t *tsd, size_t size, szind_t ind)
-{
-
-	return (iallocztm(tsd, size, ind, true, tcache_get(tsd, true), false,
-	    NULL, true));
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
+ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, bool is_metadata, arena_t *arena)
 {
 	void *ret;
 
 	assert(usize != 0);
 	assert(usize == sa2u(usize, alignment));
+	assert(!is_metadata || tcache == NULL);
+	assert(!is_metadata || arena == NULL || arena->ind < narenas_auto);
 
-	ret = arena_palloc(tsd, arena, usize, alignment, zero, tcache);
+	ret = arena_palloc(tsdn, arena, usize, alignment, zero, tcache);
 	assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
 	if (config_stats && is_metadata && likely(ret != NULL)) {
-		arena_metadata_allocated_add(iaalloc(ret), isalloc(ret,
+		arena_metadata_allocated_add(iaalloc(ret), isalloc(tsdn, ret,
 		    config_prof));
 	}
 	return (ret);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
+ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, arena_t *arena)
 {
 
-	return (ipallocztm(tsd, usize, alignment, zero, tcache, false, arena));
+	return (ipallocztm(tsdn, usize, alignment, zero, tcache, false, arena));
 }
 
 JEMALLOC_ALWAYS_INLINE void *
 ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero)
 {
 
-	return (ipallocztm(tsd, usize, alignment, zero, tcache_get(tsd, true),
-	    false, NULL));
+	return (ipallocztm(tsd_tsdn(tsd), usize, alignment, zero,
+	    tcache_get(tsd, true), false, NULL));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-ivsalloc(const void *ptr, bool demote)
+ivsalloc(tsdn_t *tsdn, const void *ptr, bool demote)
 {
 	extent_node_t *node;
 
@@ -1017,7 +1029,7 @@
 	assert(extent_node_addr_get(node) == ptr ||
 	    extent_node_achunk_get(node));
 
-	return (isalloc(ptr, demote));
+	return (isalloc(tsdn, ptr, demote));
 }
 
 JEMALLOC_INLINE size_t
@@ -1035,39 +1047,34 @@
 }
 
 JEMALLOC_INLINE size_t
-p2rz(const void *ptr)
+p2rz(tsdn_t *tsdn, const void *ptr)
 {
-	size_t usize = isalloc(ptr, false);
+	size_t usize = isalloc(tsdn, ptr, false);
 
 	return (u2rz(usize));
 }
 
 JEMALLOC_ALWAYS_INLINE void
-idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata,
+idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool is_metadata,
     bool slow_path)
 {
 
 	assert(ptr != NULL);
+	assert(!is_metadata || tcache == NULL);
+	assert(!is_metadata || iaalloc(ptr)->ind < narenas_auto);
 	if (config_stats && is_metadata) {
-		arena_metadata_allocated_sub(iaalloc(ptr), isalloc(ptr,
+		arena_metadata_allocated_sub(iaalloc(ptr), isalloc(tsdn, ptr,
 		    config_prof));
 	}
 
-	arena_dalloc(tsd, ptr, tcache, slow_path);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache)
-{
-
-	idalloctm(tsd, ptr, tcache, false, true);
+	arena_dalloc(tsdn, ptr, tcache, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void
 idalloc(tsd_t *tsd, void *ptr)
 {
 
-	idalloctm(tsd, ptr, tcache_get(tsd, false), false, true);
+	idalloctm(tsd_tsdn(tsd), ptr, tcache_get(tsd, false), false, true);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -1077,24 +1084,25 @@
 	if (slow_path && config_fill && unlikely(opt_quarantine))
 		quarantine(tsd, ptr);
 	else
-		idalloctm(tsd, ptr, tcache, false, slow_path);
+		idalloctm(tsd_tsdn(tsd), ptr, tcache, false, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
+isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
+    bool slow_path)
 {
 
-	arena_sdalloc(tsd, ptr, size, tcache);
+	arena_sdalloc(tsdn, ptr, size, tcache, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
+isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache, bool slow_path)
 {
 
-	if (config_fill && unlikely(opt_quarantine))
+	if (slow_path && config_fill && unlikely(opt_quarantine))
 		quarantine(tsd, ptr);
 	else
-		isdalloct(tsd, ptr, size, tcache);
+		isdalloct(tsd_tsdn(tsd), ptr, size, tcache, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -1107,7 +1115,7 @@
 	usize = sa2u(size + extra, alignment);
 	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 		return (NULL);
-	p = ipalloct(tsd, usize, alignment, zero, tcache, arena);
+	p = ipalloct(tsd_tsdn(tsd), usize, alignment, zero, tcache, arena);
 	if (p == NULL) {
 		if (extra == 0)
 			return (NULL);
@@ -1115,7 +1123,8 @@
 		usize = sa2u(size, alignment);
 		if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 			return (NULL);
-		p = ipalloct(tsd, usize, alignment, zero, tcache, arena);
+		p = ipalloct(tsd_tsdn(tsd), usize, alignment, zero, tcache,
+		    arena);
 		if (p == NULL)
 			return (NULL);
 	}
@@ -1125,7 +1134,7 @@
 	 */
 	copysize = (size < oldsize) ? size : oldsize;
 	memcpy(p, ptr, copysize);
-	isqalloc(tsd, ptr, oldsize, tcache);
+	isqalloc(tsd, ptr, oldsize, tcache, true);
 	return (p);
 }
 
@@ -1161,7 +1170,7 @@
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-ixalloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t extra,
+ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
     size_t alignment, bool zero)
 {
 
@@ -1174,7 +1183,7 @@
 		return (true);
 	}
 
-	return (arena_ralloc_no_move(tsd, ptr, oldsize, size, extra, zero));
+	return (arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero));
 }
 #endif
 
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 2c75371..7de0cf7 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -215,6 +215,15 @@
 #undef JEMALLOC_ZONE_VERSION
 
 /*
+ * Methods for determining whether the OS overcommits.
+ * JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's
+ *                                         /proc/sys/vm.overcommit_memory file.
+ * JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl.
+ */
+#undef JEMALLOC_SYSCTL_VM_OVERCOMMIT
+#undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
+
+/*
  * Methods for purging unused pages differ between operating systems.
  *
  *   madvise(..., MADV_DONTNEED) : On Linux, this immediately discards pages,
diff --git a/include/jemalloc/internal/mb.h b/include/jemalloc/internal/mb.h
index 3cfa787..437c86f 100644
--- a/include/jemalloc/internal/mb.h
+++ b/include/jemalloc/internal/mb.h
@@ -42,7 +42,7 @@
 	    : /* Inputs. */
 	    : "memory" /* Clobbers. */
 	    );
-#else
+#  else
 	/*
 	 * This is hopefully enough to keep the compiler from reordering
 	 * instructions around this one.
@@ -52,7 +52,7 @@
 	    : /* Inputs. */
 	    : "memory" /* Clobbers. */
 	    );
-#endif
+#  endif
 }
 #elif (defined(__amd64__) || defined(__x86_64__))
 JEMALLOC_INLINE void
@@ -104,9 +104,9 @@
 {
 	malloc_mutex_t mtx;
 
-	malloc_mutex_init(&mtx);
-	malloc_mutex_lock(&mtx);
-	malloc_mutex_unlock(&mtx);
+	malloc_mutex_init(&mtx, "mb", WITNESS_RANK_OMIT);
+	malloc_mutex_lock(NULL, &mtx);
+	malloc_mutex_unlock(NULL, &mtx);
 }
 #endif
 #endif
diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index f051f29..5221799 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -6,17 +6,21 @@
 #ifdef _WIN32
 #  define MALLOC_MUTEX_INITIALIZER
 #elif (defined(JEMALLOC_OSSPIN))
-#  define MALLOC_MUTEX_INITIALIZER {0}
+#  define MALLOC_MUTEX_INITIALIZER {0, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)}
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
-#  define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER, NULL}
+#  define MALLOC_MUTEX_INITIALIZER					\
+    {PTHREAD_MUTEX_INITIALIZER, NULL, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)}
 #else
 #  if (defined(JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP) &&		\
        defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP))
 #    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP
-#    define MALLOC_MUTEX_INITIALIZER {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP}
+#    define MALLOC_MUTEX_INITIALIZER					\
+       {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP,				\
+        WITNESS_INITIALIZER(WITNESS_RANK_OMIT)}
 #  else
 #    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
-#    define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER}
+#    define MALLOC_MUTEX_INITIALIZER					\
+       {PTHREAD_MUTEX_INITIALIZER, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)}
 #  endif
 #endif
 
@@ -39,6 +43,7 @@
 #else
 	pthread_mutex_t		lock;
 #endif
+	witness_t		witness;
 };
 
 #endif /* JEMALLOC_H_STRUCTS */
@@ -52,27 +57,31 @@
 #  define isthreaded true
 #endif
 
-bool	malloc_mutex_init(malloc_mutex_t *mutex);
-void	malloc_mutex_prefork(malloc_mutex_t *mutex);
-void	malloc_mutex_postfork_parent(malloc_mutex_t *mutex);
-void	malloc_mutex_postfork_child(malloc_mutex_t *mutex);
-bool	mutex_boot(void);
+bool	malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
+    witness_rank_t rank);
+void	malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void	malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void	malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex);
+bool	malloc_mutex_boot(void);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
-void	malloc_mutex_lock(malloc_mutex_t *mutex);
-void	malloc_mutex_unlock(malloc_mutex_t *mutex);
+void	malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void	malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void	malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void	malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
 JEMALLOC_INLINE void
-malloc_mutex_lock(malloc_mutex_t *mutex)
+malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
 	if (isthreaded) {
+		witness_assert_not_owner(tsdn, &mutex->witness);
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 		AcquireSRWLockExclusive(&mutex->lock);
@@ -84,14 +93,16 @@
 #else
 		pthread_mutex_lock(&mutex->lock);
 #endif
+		witness_lock(tsdn, &mutex->witness);
 	}
 }
 
 JEMALLOC_INLINE void
-malloc_mutex_unlock(malloc_mutex_t *mutex)
+malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
 	if (isthreaded) {
+		witness_unlock(tsdn, &mutex->witness);
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 		ReleaseSRWLockExclusive(&mutex->lock);
@@ -105,6 +116,22 @@
 #endif
 	}
 }
+
+JEMALLOC_INLINE void
+malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex)
+{
+
+	if (isthreaded)
+		witness_assert_owner(tsdn, &mutex->witness);
+}
+
+JEMALLOC_INLINE void
+malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex)
+{
+
+	if (isthreaded)
+		witness_assert_not_owner(tsdn, &mutex->witness);
+}
 #endif
 
 #endif /* JEMALLOC_H_INLINES */
diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index bd04f04..dc293b7 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -1,13 +1,13 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_TYPES
 
-#define JEMALLOC_CLOCK_GETTIME defined(_POSIX_MONOTONIC_CLOCK) \
+#define	JEMALLOC_CLOCK_GETTIME defined(_POSIX_MONOTONIC_CLOCK) \
     && _POSIX_MONOTONIC_CLOCK >= 0
 
 typedef struct nstime_s nstime_t;
 
 /* Maximum supported number of seconds (~584 years). */
-#define	NSTIME_SEC_MAX	18446744072
+#define	NSTIME_SEC_MAX	KQU(18446744072)
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index da7eb96..e21effd 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -9,13 +9,14 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-void	*pages_map(void *addr, size_t size);
+void	*pages_map(void *addr, size_t size, bool *commit);
 void	pages_unmap(void *addr, size_t size);
 void	*pages_trim(void *addr, size_t alloc_size, size_t leadsize,
-    size_t size);
+    size_t size, bool *commit);
 bool	pages_commit(void *addr, size_t size);
 bool	pages_decommit(void *addr, size_t size);
 bool	pages_purge(void *addr, size_t size);
+void	pages_boot(void);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
new file mode 100644
index 0000000..4f91c33
--- /dev/null
+++ b/include/jemalloc/internal/ph.h
@@ -0,0 +1,345 @@
+/*
+ * A Pairing Heap implementation.
+ *
+ * "The Pairing Heap: A New Form of Self-Adjusting Heap"
+ * https://www.cs.cmu.edu/~sleator/papers/pairing-heaps.pdf
+ *
+ * With auxiliary twopass list, described in a follow on paper.
+ *
+ * "Pairing Heaps: Experiments and Analysis"
+ * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.106.2988&rep=rep1&type=pdf
+ *
+ *******************************************************************************
+ */
+
+#ifndef PH_H_
+#define	PH_H_
+
+/* Node structure. */
+#define	phn(a_type)							\
+struct {								\
+	a_type	*phn_prev;						\
+	a_type	*phn_next;						\
+	a_type	*phn_lchild;						\
+}
+
+/* Root structure. */
+#define	ph(a_type)							\
+struct {								\
+	a_type	*ph_root;						\
+}
+
+/* Internal utility macros. */
+#define	phn_lchild_get(a_type, a_field, a_phn)				\
+	(a_phn->a_field.phn_lchild)
+#define	phn_lchild_set(a_type, a_field, a_phn, a_lchild) do {		\
+	a_phn->a_field.phn_lchild = a_lchild;				\
+} while (0)
+
+#define	phn_next_get(a_type, a_field, a_phn)				\
+	(a_phn->a_field.phn_next)
+#define	phn_prev_set(a_type, a_field, a_phn, a_prev) do {		\
+	a_phn->a_field.phn_prev = a_prev;				\
+} while (0)
+
+#define	phn_prev_get(a_type, a_field, a_phn)				\
+	(a_phn->a_field.phn_prev)
+#define	phn_next_set(a_type, a_field, a_phn, a_next) do {		\
+	a_phn->a_field.phn_next = a_next;				\
+} while (0)
+
+#define	phn_merge_ordered(a_type, a_field, a_phn0, a_phn1, a_cmp) do {	\
+	a_type *phn0child;						\
+									\
+	assert(a_phn0 != NULL);						\
+	assert(a_phn1 != NULL);						\
+	assert(a_cmp(a_phn0, a_phn1) <= 0);				\
+									\
+	phn_prev_set(a_type, a_field, a_phn1, a_phn0);			\
+	phn0child = phn_lchild_get(a_type, a_field, a_phn0);		\
+	phn_next_set(a_type, a_field, a_phn1, phn0child);		\
+	if (phn0child != NULL)						\
+		phn_prev_set(a_type, a_field, phn0child, a_phn1);	\
+	phn_lchild_set(a_type, a_field, a_phn0, a_phn1);		\
+} while (0)
+
+#define	phn_merge(a_type, a_field, a_phn0, a_phn1, a_cmp, r_phn) do {	\
+	if (a_phn0 == NULL)						\
+		r_phn = a_phn1;						\
+	else if (a_phn1 == NULL)					\
+		r_phn = a_phn0;						\
+	else if (a_cmp(a_phn0, a_phn1) < 0) {				\
+		phn_merge_ordered(a_type, a_field, a_phn0, a_phn1,	\
+		    a_cmp);						\
+		r_phn = a_phn0;						\
+	} else {							\
+		phn_merge_ordered(a_type, a_field, a_phn1, a_phn0,	\
+		    a_cmp);						\
+		r_phn = a_phn1;						\
+	}								\
+} while (0)
+
+#define	ph_merge_siblings(a_type, a_field, a_phn, a_cmp, r_phn) do {	\
+	a_type *head = NULL;						\
+	a_type *tail = NULL;						\
+	a_type *phn0 = a_phn;						\
+	a_type *phn1 = phn_next_get(a_type, a_field, phn0);		\
+									\
+	/*								\
+	 * Multipass merge, wherein the first two elements of a FIFO	\
+	 * are repeatedly merged, and each result is appended to the	\
+	 * singly linked FIFO, until the FIFO contains only a single	\
+	 * element.  We start with a sibling list but no reference to	\
+	 * its tail, so we do a single pass over the sibling list to	\
+	 * populate the FIFO.						\
+	 */								\
+	if (phn1 != NULL) {						\
+		a_type *phnrest = phn_next_get(a_type, a_field, phn1);	\
+		if (phnrest != NULL)					\
+			phn_prev_set(a_type, a_field, phnrest, NULL);	\
+		phn_prev_set(a_type, a_field, phn0, NULL);		\
+		phn_next_set(a_type, a_field, phn0, NULL);		\
+		phn_prev_set(a_type, a_field, phn1, NULL);		\
+		phn_next_set(a_type, a_field, phn1, NULL);		\
+		phn_merge(a_type, a_field, phn0, phn1, a_cmp, phn0);	\
+		head = tail = phn0;					\
+		phn0 = phnrest;						\
+		while (phn0 != NULL) {					\
+			phn1 = phn_next_get(a_type, a_field, phn0);	\
+			if (phn1 != NULL) {				\
+				phnrest = phn_next_get(a_type, a_field,	\
+				    phn1);				\
+				if (phnrest != NULL) {			\
+					phn_prev_set(a_type, a_field,	\
+					    phnrest, NULL);		\
+				}					\
+				phn_prev_set(a_type, a_field, phn0,	\
+				    NULL);				\
+				phn_next_set(a_type, a_field, phn0,	\
+				    NULL);				\
+				phn_prev_set(a_type, a_field, phn1,	\
+				    NULL);				\
+				phn_next_set(a_type, a_field, phn1,	\
+				    NULL);				\
+				phn_merge(a_type, a_field, phn0, phn1,	\
+				    a_cmp, phn0);			\
+				phn_next_set(a_type, a_field, tail,	\
+				    phn0);				\
+				tail = phn0;				\
+				phn0 = phnrest;				\
+			} else {					\
+				phn_next_set(a_type, a_field, tail,	\
+				    phn0);				\
+				tail = phn0;				\
+				phn0 = NULL;				\
+			}						\
+		}							\
+		phn0 = head;						\
+		phn1 = phn_next_get(a_type, a_field, phn0);		\
+		if (phn1 != NULL) {					\
+			while (true) {					\
+				head = phn_next_get(a_type, a_field,	\
+				    phn1);				\
+				assert(phn_prev_get(a_type, a_field,	\
+				    phn0) == NULL);			\
+				phn_next_set(a_type, a_field, phn0,	\
+				    NULL);				\
+				assert(phn_prev_get(a_type, a_field,	\
+				    phn1) == NULL);			\
+				phn_next_set(a_type, a_field, phn1,	\
+				    NULL);				\
+				phn_merge(a_type, a_field, phn0, phn1,	\
+				    a_cmp, phn0);			\
+				if (head == NULL)			\
+					break;				\
+				phn_next_set(a_type, a_field, tail,	\
+				    phn0);				\
+				tail = phn0;				\
+				phn0 = head;				\
+				phn1 = phn_next_get(a_type, a_field,	\
+				    phn0);				\
+			}						\
+		}							\
+	}								\
+	r_phn = phn0;							\
+} while (0)
+
+#define	ph_merge_aux(a_type, a_field, a_ph, a_cmp) do {			\
+	a_type *phn = phn_next_get(a_type, a_field, a_ph->ph_root);	\
+	if (phn != NULL) {						\
+		phn_prev_set(a_type, a_field, a_ph->ph_root, NULL);	\
+		phn_next_set(a_type, a_field, a_ph->ph_root, NULL);	\
+		phn_prev_set(a_type, a_field, phn, NULL);		\
+		ph_merge_siblings(a_type, a_field, phn, a_cmp, phn);	\
+		assert(phn_next_get(a_type, a_field, phn) == NULL);	\
+		phn_merge(a_type, a_field, a_ph->ph_root, phn, a_cmp,	\
+		    a_ph->ph_root);					\
+	}								\
+} while (0)
+
+#define	ph_merge_children(a_type, a_field, a_phn, a_cmp, r_phn) do {	\
+	a_type *lchild = phn_lchild_get(a_type, a_field, a_phn);	\
+	if (lchild == NULL)						\
+		r_phn = NULL;						\
+	else {								\
+		ph_merge_siblings(a_type, a_field, lchild, a_cmp,	\
+		    r_phn);						\
+	}								\
+} while (0)
+
+/*
+ * The ph_proto() macro generates function prototypes that correspond to the
+ * functions generated by an equivalently parameterized call to ph_gen().
+ */
+#define	ph_proto(a_attr, a_prefix, a_ph_type, a_type)			\
+a_attr void	a_prefix##new(a_ph_type *ph);				\
+a_attr bool	a_prefix##empty(a_ph_type *ph);				\
+a_attr a_type	*a_prefix##first(a_ph_type *ph);			\
+a_attr void	a_prefix##insert(a_ph_type *ph, a_type *phn);		\
+a_attr a_type	*a_prefix##remove_first(a_ph_type *ph);			\
+a_attr void	a_prefix##remove(a_ph_type *ph, a_type *phn);
+
+/*
+ * The ph_gen() macro generates a type-specific pairing heap implementation,
+ * based on the above cpp macros.
+ */
+#define	ph_gen(a_attr, a_prefix, a_ph_type, a_type, a_field, a_cmp)	\
+a_attr void								\
+a_prefix##new(a_ph_type *ph)						\
+{									\
+									\
+	memset(ph, 0, sizeof(ph(a_type)));				\
+}									\
+a_attr bool								\
+a_prefix##empty(a_ph_type *ph)						\
+{									\
+									\
+	return (ph->ph_root == NULL);					\
+}									\
+a_attr a_type *								\
+a_prefix##first(a_ph_type *ph)						\
+{									\
+									\
+	if (ph->ph_root == NULL)					\
+		return (NULL);						\
+	ph_merge_aux(a_type, a_field, ph, a_cmp);			\
+	return (ph->ph_root);						\
+}									\
+a_attr void								\
+a_prefix##insert(a_ph_type *ph, a_type *phn)				\
+{									\
+									\
+	memset(&phn->a_field, 0, sizeof(phn(a_type)));			\
+									\
+	/*								\
+	 * Treat the root as an aux list during insertion, and lazily	\
+	 * merge during a_prefix##remove_first().  For elements that	\
+	 * are inserted, then removed via a_prefix##remove() before the	\
+	 * aux list is ever processed, this makes insert/remove		\
+	 * constant-time, whereas eager merging would make insert	\
+	 * O(log n).							\
+	 */								\
+	if (ph->ph_root == NULL)					\
+		ph->ph_root = phn;					\
+	else {								\
+		phn_next_set(a_type, a_field, phn, phn_next_get(a_type,	\
+		    a_field, ph->ph_root));				\
+		if (phn_next_get(a_type, a_field, ph->ph_root) !=	\
+		    NULL) {						\
+			phn_prev_set(a_type, a_field,			\
+			    phn_next_get(a_type, a_field, ph->ph_root),	\
+			    phn);					\
+		}							\
+		phn_prev_set(a_type, a_field, phn, ph->ph_root);	\
+		phn_next_set(a_type, a_field, ph->ph_root, phn);	\
+	}								\
+}									\
+a_attr a_type *								\
+a_prefix##remove_first(a_ph_type *ph)					\
+{									\
+	a_type *ret;							\
+									\
+	if (ph->ph_root == NULL)					\
+		return (NULL);						\
+	ph_merge_aux(a_type, a_field, ph, a_cmp);			\
+									\
+	ret = ph->ph_root;						\
+									\
+	ph_merge_children(a_type, a_field, ph->ph_root, a_cmp,		\
+	    ph->ph_root);						\
+									\
+	return (ret);							\
+}									\
+a_attr void								\
+a_prefix##remove(a_ph_type *ph, a_type *phn)				\
+{									\
+	a_type *replace, *parent;					\
+									\
+	/*								\
+	 * We can delete from aux list without merging it, but we need	\
+	 * to merge if we are dealing with the root node.		\
+	 */								\
+	if (ph->ph_root == phn) {					\
+		ph_merge_aux(a_type, a_field, ph, a_cmp);		\
+		if (ph->ph_root == phn) {				\
+			ph_merge_children(a_type, a_field, ph->ph_root,	\
+			    a_cmp, ph->ph_root);			\
+			return;						\
+		}							\
+	}								\
+									\
+	/* Get parent (if phn is leftmost child) before mutating. */	\
+	if ((parent = phn_prev_get(a_type, a_field, phn)) != NULL) {	\
+		if (phn_lchild_get(a_type, a_field, parent) != phn)	\
+			parent = NULL;					\
+	}								\
+	/* Find a possible replacement node, and link to parent. */	\
+	ph_merge_children(a_type, a_field, phn, a_cmp, replace);	\
+	/* Set next/prev for sibling linked list. */			\
+	if (replace != NULL) {						\
+		if (parent != NULL) {					\
+			phn_prev_set(a_type, a_field, replace, parent);	\
+			phn_lchild_set(a_type, a_field, parent,		\
+			    replace);					\
+		} else {						\
+			phn_prev_set(a_type, a_field, replace,		\
+			    phn_prev_get(a_type, a_field, phn));	\
+			if (phn_prev_get(a_type, a_field, phn) !=	\
+			    NULL) {					\
+				phn_next_set(a_type, a_field,		\
+				    phn_prev_get(a_type, a_field, phn),	\
+				    replace);				\
+			}						\
+		}							\
+		phn_next_set(a_type, a_field, replace,			\
+		    phn_next_get(a_type, a_field, phn));		\
+		if (phn_next_get(a_type, a_field, phn) != NULL) {	\
+			phn_prev_set(a_type, a_field,			\
+			    phn_next_get(a_type, a_field, phn),		\
+			    replace);					\
+		}							\
+	} else {							\
+		if (parent != NULL) {					\
+			a_type *next = phn_next_get(a_type, a_field,	\
+			    phn);					\
+			phn_lchild_set(a_type, a_field, parent, next);	\
+			if (next != NULL) {				\
+				phn_prev_set(a_type, a_field, next,	\
+				    parent);				\
+			}						\
+		} else {						\
+			assert(phn_prev_get(a_type, a_field, phn) !=	\
+			    NULL);					\
+			phn_next_set(a_type, a_field,			\
+			    phn_prev_get(a_type, a_field, phn),		\
+			    phn_next_get(a_type, a_field, phn));	\
+		}							\
+		if (phn_next_get(a_type, a_field, phn) != NULL) {	\
+			phn_prev_set(a_type, a_field,			\
+			    phn_next_get(a_type, a_field, phn),		\
+			    phn_prev_get(a_type, a_field, phn));	\
+		}							\
+	}								\
+}
+
+#endif /* PH_H_ */
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 30516b4..f2b6a55 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -5,10 +5,12 @@
 arena_basic_stats_merge
 arena_bin_index
 arena_bin_info
-arena_bitselm_get
+arena_bitselm_get_const
+arena_bitselm_get_mutable
 arena_boot
 arena_choose
 arena_choose_hard
+arena_choose_impl
 arena_chunk_alloc_huge
 arena_chunk_cache_maybe_insert
 arena_chunk_cache_maybe_remove
@@ -21,9 +23,7 @@
 arena_dalloc_bin
 arena_dalloc_bin_junked_locked
 arena_dalloc_junk_large
-arena_dalloc_junk_large_impl
 arena_dalloc_junk_small
-arena_dalloc_junk_small_impl
 arena_dalloc_large
 arena_dalloc_large_junked_locked
 arena_dalloc_small
@@ -36,6 +36,7 @@
 arena_dss_prec_get
 arena_dss_prec_set
 arena_get
+arena_ichoose
 arena_init
 arena_lg_dirty_mult_default_get
 arena_lg_dirty_mult_default_set
@@ -62,7 +63,8 @@
 arena_mapbits_unallocated_size_get
 arena_mapbits_unallocated_size_set
 arena_mapbits_unzeroed_get
-arena_mapbitsp_get
+arena_mapbitsp_get_const
+arena_mapbitsp_get_mutable
 arena_mapbitsp_read
 arena_mapbitsp_write
 arena_maxrun
@@ -71,7 +73,8 @@
 arena_metadata_allocated_get
 arena_metadata_allocated_sub
 arena_migrate
-arena_miscelm_get
+arena_miscelm_get_const
+arena_miscelm_get_mutable
 arena_miscelm_to_pageind
 arena_miscelm_to_rpages
 arena_new
@@ -102,6 +105,7 @@
 arena_ralloc_no_move
 arena_rd_to_miscelm
 arena_redzone_corruption
+arena_reset
 arena_run_regind
 arena_run_to_miscelm
 arena_salloc
@@ -287,14 +291,11 @@
 huge_ralloc_no_move
 huge_salloc
 iaalloc
+ialloc
 iallocztm
-icalloc
-icalloct
+iarena_cleanup
 idalloc
-idalloct
 idalloctm
-imalloc
-imalloct
 in_valgrind
 index2size
 index2size_compute
@@ -320,6 +321,9 @@
 lg_floor
 lg_prof_sample
 malloc_cprintf
+malloc_mutex_assert_not_owner
+malloc_mutex_assert_owner
+malloc_mutex_boot
 malloc_mutex_init
 malloc_mutex_lock
 malloc_mutex_postfork_child
@@ -341,7 +345,7 @@
 map_bias
 map_misc_offset
 mb_write
-mutex_boot
+narenas_auto
 narenas_tdata_cleanup
 narenas_total_get
 ncpus
@@ -361,7 +365,6 @@
 nstime_sec
 nstime_subtract
 nstime_update
-nstime_update_impl
 opt_abort
 opt_decay_time
 opt_dss
@@ -391,6 +394,7 @@
 opt_xmalloc
 opt_zero
 p2rz
+pages_boot
 pages_commit
 pages_decommit
 pages_map
@@ -492,8 +496,6 @@
 tcache_alloc_large
 tcache_alloc_small
 tcache_alloc_small_hard
-tcache_arena_associate
-tcache_arena_dissociate
 tcache_arena_reassociate
 tcache_bin_flush_large
 tcache_bin_flush_small
@@ -539,19 +541,23 @@
 tsd_boot0
 tsd_boot1
 tsd_booted
+tsd_booted_get
 tsd_cleanup
 tsd_cleanup_wrapper
 tsd_fetch
 tsd_get
+tsd_iarena_get
+tsd_iarena_set
+tsd_iarenap_get
+tsd_initialized
+tsd_init_check_recursion
+tsd_init_finish
+tsd_init_head
 tsd_narenas_tdata_get
 tsd_narenas_tdata_set
 tsd_narenas_tdatap_get
 tsd_wrapper_get
 tsd_wrapper_set
-tsd_initialized
-tsd_init_check_recursion
-tsd_init_finish
-tsd_init_head
 tsd_nominal
 tsd_prof_tdata_get
 tsd_prof_tdata_set
@@ -574,8 +580,33 @@
 tsd_thread_deallocatedp_get
 tsd_tls
 tsd_tsd
+tsd_tsdn
+tsd_witness_fork_get
+tsd_witness_fork_set
+tsd_witness_forkp_get
+tsd_witnesses_get
+tsd_witnesses_set
+tsd_witnessesp_get
+tsdn_fetch
+tsdn_null
+tsdn_tsd
 u2rz
 valgrind_freelike_block
 valgrind_make_mem_defined
 valgrind_make_mem_noaccess
 valgrind_make_mem_undefined
+witness_assert_lockless
+witness_assert_not_owner
+witness_assert_owner
+witness_fork_cleanup
+witness_init
+witness_lock
+witness_lock_error
+witness_lockless_error
+witness_not_owner_error
+witness_owner_error
+witness_postfork_child
+witness_postfork_parent
+witness_prefork
+witness_unlock
+witnesses_cleanup
diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h
index 48dd6cc..691e153 100644
--- a/include/jemalloc/internal/prof.h
+++ b/include/jemalloc/internal/prof.h
@@ -281,7 +281,7 @@
 extern size_t	lg_prof_sample;
 
 void	prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
-void	prof_malloc_sample_object(const void *ptr, size_t usize,
+void	prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
 void	prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx);
 void	bt_init(prof_bt_t *bt, void **vec);
@@ -293,33 +293,33 @@
 const prof_cnt_t *prof_cnt_all(void);
 typedef int (prof_dump_open_t)(bool, const char *);
 extern prof_dump_open_t *prof_dump_open;
-typedef bool (prof_dump_header_t)(bool, const prof_cnt_t *);
+typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *);
 extern prof_dump_header_t *prof_dump_header;
 #endif
-void	prof_idump(void);
-bool	prof_mdump(const char *filename);
-void	prof_gdump(void);
-prof_tdata_t	*prof_tdata_init(tsd_t *tsd);
+void	prof_idump(tsdn_t *tsdn);
+bool	prof_mdump(tsd_t *tsd, const char *filename);
+void	prof_gdump(tsdn_t *tsdn);
+prof_tdata_t	*prof_tdata_init(tsdn_t *tsdn);
 prof_tdata_t	*prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
-void	prof_reset(tsd_t *tsd, size_t lg_sample);
+void	prof_reset(tsdn_t *tsdn, size_t lg_sample);
 void	prof_tdata_cleanup(tsd_t *tsd);
-const char	*prof_thread_name_get(void);
-bool	prof_active_get(void);
-bool	prof_active_set(bool active);
+bool	prof_active_get(tsdn_t *tsdn);
+bool	prof_active_set(tsdn_t *tsdn, bool active);
+const char	*prof_thread_name_get(tsd_t *tsd);
 int	prof_thread_name_set(tsd_t *tsd, const char *thread_name);
-bool	prof_thread_active_get(void);
-bool	prof_thread_active_set(bool active);
-bool	prof_thread_active_init_get(void);
-bool	prof_thread_active_init_set(bool active_init);
-bool	prof_gdump_get(void);
-bool	prof_gdump_set(bool active);
+bool	prof_thread_active_get(tsd_t *tsd);
+bool	prof_thread_active_set(tsd_t *tsd, bool active);
+bool	prof_thread_active_init_get(tsdn_t *tsdn);
+bool	prof_thread_active_init_set(tsdn_t *tsdn, bool active_init);
+bool	prof_gdump_get(tsdn_t *tsdn);
+bool	prof_gdump_set(tsdn_t *tsdn, bool active);
 void	prof_boot0(void);
 void	prof_boot1(void);
-bool	prof_boot2(void);
-void	prof_prefork0(void);
-void	prof_prefork1(void);
-void	prof_postfork_parent(void);
-void	prof_postfork_child(void);
+bool	prof_boot2(tsdn_t *tsdn);
+void	prof_prefork0(tsdn_t *tsdn);
+void	prof_prefork1(tsdn_t *tsdn);
+void	prof_postfork_parent(tsdn_t *tsdn);
+void	prof_postfork_child(tsdn_t *tsdn);
 void	prof_sample_threshold_update(prof_tdata_t *tdata);
 
 #endif /* JEMALLOC_H_EXTERNS */
@@ -330,17 +330,17 @@
 bool	prof_active_get_unlocked(void);
 bool	prof_gdump_get_unlocked(void);
 prof_tdata_t	*prof_tdata_get(tsd_t *tsd, bool create);
+prof_tctx_t	*prof_tctx_get(tsdn_t *tsdn, const void *ptr);
+void	prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
+    prof_tctx_t *tctx);
+void	prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize,
+    const void *old_ptr, prof_tctx_t *tctx);
 bool	prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit,
     prof_tdata_t **tdata_out);
 prof_tctx_t	*prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active,
     bool update);
-prof_tctx_t	*prof_tctx_get(const void *ptr);
-void	prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx);
-void	prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
+void	prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
-void	prof_malloc_sample_object(const void *ptr, size_t usize,
-    prof_tctx_t *tctx);
-void	prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx);
 void	prof_realloc(tsd_t *tsd, const void *ptr, size_t usize,
     prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr,
     size_t old_usize, prof_tctx_t *old_tctx);
@@ -384,7 +384,7 @@
 	if (create) {
 		if (unlikely(tdata == NULL)) {
 			if (tsd_nominal(tsd)) {
-				tdata = prof_tdata_init(tsd);
+				tdata = prof_tdata_init(tsd_tsdn(tsd));
 				tsd_prof_tdata_set(tsd, tdata);
 			}
 		} else if (unlikely(tdata->expired)) {
@@ -398,34 +398,34 @@
 }
 
 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
-prof_tctx_get(const void *ptr)
+prof_tctx_get(tsdn_t *tsdn, const void *ptr)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	return (arena_prof_tctx_get(ptr));
+	return (arena_prof_tctx_get(tsdn, ptr));
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx)
+prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_tctx_set(ptr, usize, tctx);
+	arena_prof_tctx_set(tsdn, ptr, usize, tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
+prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize, const void *old_ptr,
     prof_tctx_t *old_tctx)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_tctx_reset(ptr, usize, old_ptr, old_tctx);
+	arena_prof_tctx_reset(tsdn, ptr, usize, old_ptr, old_tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -480,17 +480,17 @@
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx)
+prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
-	assert(usize == isalloc(ptr, true));
+	assert(usize == isalloc(tsdn, ptr, true));
 
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
-		prof_malloc_sample_object(ptr, usize, tctx);
+		prof_malloc_sample_object(tsdn, ptr, usize, tctx);
 	else
-		prof_tctx_set(ptr, usize, (prof_tctx_t *)(uintptr_t)1U);
+		prof_tctx_set(tsdn, ptr, usize, (prof_tctx_t *)(uintptr_t)1U);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -504,7 +504,7 @@
 	assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
 
 	if (prof_active && !updated && ptr != NULL) {
-		assert(usize == isalloc(ptr, true));
+		assert(usize == isalloc(tsd_tsdn(tsd), ptr, true));
 		if (prof_sample_accum_update(tsd, usize, true, NULL)) {
 			/*
 			 * Don't sample.  The usize passed to prof_alloc_prep()
@@ -521,9 +521,9 @@
 	old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U);
 
 	if (unlikely(sampled))
-		prof_malloc_sample_object(ptr, usize, tctx);
+		prof_malloc_sample_object(tsd_tsdn(tsd), ptr, usize, tctx);
 	else
-		prof_tctx_reset(ptr, usize, old_ptr, old_tctx);
+		prof_tctx_reset(tsd_tsdn(tsd), ptr, usize, old_ptr, old_tctx);
 
 	if (unlikely(old_sampled))
 		prof_free_sampled_object(tsd, old_usize, old_tctx);
@@ -532,10 +532,10 @@
 JEMALLOC_ALWAYS_INLINE void
 prof_free(tsd_t *tsd, const void *ptr, size_t usize)
 {
-	prof_tctx_t *tctx = prof_tctx_get(ptr);
+	prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), ptr);
 
 	cassert(config_prof);
-	assert(usize == isalloc(ptr, true));
+	assert(usize == isalloc(tsd_tsdn(tsd), ptr, true));
 
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
 		prof_free_sampled_object(tsd, usize, tctx);
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 28ae9d1..8d0c584 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -15,9 +15,10 @@
  * machine address width.
  */
 #define	LG_RTREE_BITS_PER_LEVEL	4
-#define	RTREE_BITS_PER_LEVEL	(ZU(1) << LG_RTREE_BITS_PER_LEVEL)
+#define	RTREE_BITS_PER_LEVEL	(1U << LG_RTREE_BITS_PER_LEVEL)
+/* Maximum rtree height. */
 #define	RTREE_HEIGHT_MAX						\
-    ((ZU(1) << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
+    ((1U << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
 
 /* Used for two-stage lock-free node initialization. */
 #define	RTREE_NODE_INITIALIZING	((rtree_node_elm_t *)0x1)
@@ -111,22 +112,25 @@
 uintptr_t	rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level);
 
 bool	rtree_node_valid(rtree_node_elm_t *node);
-rtree_node_elm_t	*rtree_child_tryread(rtree_node_elm_t *elm);
+rtree_node_elm_t	*rtree_child_tryread(rtree_node_elm_t *elm,
+    bool dependent);
 rtree_node_elm_t	*rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm,
-    unsigned level);
+    unsigned level, bool dependent);
 extent_node_t	*rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm,
     bool dependent);
 void	rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm,
     const extent_node_t *val);
-rtree_node_elm_t	*rtree_subtree_tryread(rtree_t *rtree, unsigned level);
-rtree_node_elm_t	*rtree_subtree_read(rtree_t *rtree, unsigned level);
+rtree_node_elm_t	*rtree_subtree_tryread(rtree_t *rtree, unsigned level,
+    bool dependent);
+rtree_node_elm_t	*rtree_subtree_read(rtree_t *rtree, unsigned level,
+    bool dependent);
 
 extent_node_t	*rtree_get(rtree_t *rtree, uintptr_t key, bool dependent);
 bool	rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_))
-JEMALLOC_INLINE unsigned
+JEMALLOC_ALWAYS_INLINE unsigned
 rtree_start_level(rtree_t *rtree, uintptr_t key)
 {
 	unsigned start_level;
@@ -140,7 +144,7 @@
 	return (start_level);
 }
 
-JEMALLOC_INLINE uintptr_t
+JEMALLOC_ALWAYS_INLINE uintptr_t
 rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level)
 {
 
@@ -149,37 +153,40 @@
 	    rtree->levels[level].bits) - 1));
 }
 
-JEMALLOC_INLINE bool
+JEMALLOC_ALWAYS_INLINE bool
 rtree_node_valid(rtree_node_elm_t *node)
 {
 
 	return ((uintptr_t)node > (uintptr_t)RTREE_NODE_INITIALIZING);
 }
 
-JEMALLOC_INLINE rtree_node_elm_t *
-rtree_child_tryread(rtree_node_elm_t *elm)
+JEMALLOC_ALWAYS_INLINE rtree_node_elm_t *
+rtree_child_tryread(rtree_node_elm_t *elm, bool dependent)
 {
 	rtree_node_elm_t *child;
 
 	/* Double-checked read (first read may be stale. */
 	child = elm->child;
-	if (!rtree_node_valid(child))
+	if (!dependent && !rtree_node_valid(child))
 		child = atomic_read_p(&elm->pun);
+	assert(!dependent || child != NULL);
 	return (child);
 }
 
-JEMALLOC_INLINE rtree_node_elm_t *
-rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level)
+JEMALLOC_ALWAYS_INLINE rtree_node_elm_t *
+rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level,
+    bool dependent)
 {
 	rtree_node_elm_t *child;
 
-	child = rtree_child_tryread(elm);
-	if (unlikely(!rtree_node_valid(child)))
+	child = rtree_child_tryread(elm, dependent);
+	if (!dependent && unlikely(!rtree_node_valid(child)))
 		child = rtree_child_read_hard(rtree, elm, level);
+	assert(!dependent || child != NULL);
 	return (child);
 }
 
-JEMALLOC_INLINE extent_node_t *
+JEMALLOC_ALWAYS_INLINE extent_node_t *
 rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm, bool dependent)
 {
 
@@ -208,54 +215,119 @@
 	atomic_write_p(&elm->pun, val);
 }
 
-JEMALLOC_INLINE rtree_node_elm_t *
-rtree_subtree_tryread(rtree_t *rtree, unsigned level)
+JEMALLOC_ALWAYS_INLINE rtree_node_elm_t *
+rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent)
 {
 	rtree_node_elm_t *subtree;
 
 	/* Double-checked read (first read may be stale. */
 	subtree = rtree->levels[level].subtree;
-	if (!rtree_node_valid(subtree))
+	if (!dependent && unlikely(!rtree_node_valid(subtree)))
 		subtree = atomic_read_p(&rtree->levels[level].subtree_pun);
+	assert(!dependent || subtree != NULL);
 	return (subtree);
 }
 
-JEMALLOC_INLINE rtree_node_elm_t *
-rtree_subtree_read(rtree_t *rtree, unsigned level)
+JEMALLOC_ALWAYS_INLINE rtree_node_elm_t *
+rtree_subtree_read(rtree_t *rtree, unsigned level, bool dependent)
 {
 	rtree_node_elm_t *subtree;
 
-	subtree = rtree_subtree_tryread(rtree, level);
-	if (unlikely(!rtree_node_valid(subtree)))
+	subtree = rtree_subtree_tryread(rtree, level, dependent);
+	if (!dependent && unlikely(!rtree_node_valid(subtree)))
 		subtree = rtree_subtree_read_hard(rtree, level);
+	assert(!dependent || subtree != NULL);
 	return (subtree);
 }
 
-JEMALLOC_INLINE extent_node_t *
+JEMALLOC_ALWAYS_INLINE extent_node_t *
 rtree_get(rtree_t *rtree, uintptr_t key, bool dependent)
 {
 	uintptr_t subkey;
-	unsigned i, start_level;
-	rtree_node_elm_t *node, *child;
+	unsigned start_level;
+	rtree_node_elm_t *node;
 
 	start_level = rtree_start_level(rtree, key);
 
-	for (i = start_level, node = rtree_subtree_tryread(rtree, start_level);
-	    /**/; i++, node = child) {
-		if (!dependent && unlikely(!rtree_node_valid(node)))
-			return (NULL);
-		subkey = rtree_subkey(rtree, key, i);
-		if (i == rtree->height - 1) {
-			/*
-			 * node is a leaf, so it contains values rather than
-			 * child pointers.
-			 */
-			return (rtree_val_read(rtree, &node[subkey],
-			    dependent));
-		}
-		assert(i < rtree->height - 1);
-		child = rtree_child_tryread(&node[subkey]);
+	node = rtree_subtree_tryread(rtree, start_level, dependent);
+#define	RTREE_GET_BIAS	(RTREE_HEIGHT_MAX - rtree->height)
+	switch (start_level + RTREE_GET_BIAS) {
+#define	RTREE_GET_SUBTREE(level)					\
+	case level:							\
+		assert(level < (RTREE_HEIGHT_MAX-1));			\
+		if (!dependent && unlikely(!rtree_node_valid(node)))	\
+			return (NULL);					\
+		subkey = rtree_subkey(rtree, key, level -		\
+		    RTREE_GET_BIAS);					\
+		node = rtree_child_tryread(&node[subkey], dependent);	\
+		/* Fall through. */
+#define	RTREE_GET_LEAF(level)						\
+	case level:							\
+		assert(level == (RTREE_HEIGHT_MAX-1));			\
+		if (!dependent && unlikely(!rtree_node_valid(node)))	\
+			return (NULL);					\
+		subkey = rtree_subkey(rtree, key, level -		\
+		    RTREE_GET_BIAS);					\
+		/*							\
+		 * node is a leaf, so it contains values rather than	\
+		 * child pointers.					\
+		 */							\
+		return (rtree_val_read(rtree, &node[subkey],		\
+		    dependent));
+#if RTREE_HEIGHT_MAX > 1
+	RTREE_GET_SUBTREE(0)
+#endif
+#if RTREE_HEIGHT_MAX > 2
+	RTREE_GET_SUBTREE(1)
+#endif
+#if RTREE_HEIGHT_MAX > 3
+	RTREE_GET_SUBTREE(2)
+#endif
+#if RTREE_HEIGHT_MAX > 4
+	RTREE_GET_SUBTREE(3)
+#endif
+#if RTREE_HEIGHT_MAX > 5
+	RTREE_GET_SUBTREE(4)
+#endif
+#if RTREE_HEIGHT_MAX > 6
+	RTREE_GET_SUBTREE(5)
+#endif
+#if RTREE_HEIGHT_MAX > 7
+	RTREE_GET_SUBTREE(6)
+#endif
+#if RTREE_HEIGHT_MAX > 8
+	RTREE_GET_SUBTREE(7)
+#endif
+#if RTREE_HEIGHT_MAX > 9
+	RTREE_GET_SUBTREE(8)
+#endif
+#if RTREE_HEIGHT_MAX > 10
+	RTREE_GET_SUBTREE(9)
+#endif
+#if RTREE_HEIGHT_MAX > 11
+	RTREE_GET_SUBTREE(10)
+#endif
+#if RTREE_HEIGHT_MAX > 12
+	RTREE_GET_SUBTREE(11)
+#endif
+#if RTREE_HEIGHT_MAX > 13
+	RTREE_GET_SUBTREE(12)
+#endif
+#if RTREE_HEIGHT_MAX > 14
+	RTREE_GET_SUBTREE(13)
+#endif
+#if RTREE_HEIGHT_MAX > 15
+	RTREE_GET_SUBTREE(14)
+#endif
+#if RTREE_HEIGHT_MAX > 16
+#  error Unsupported RTREE_HEIGHT_MAX
+#endif
+	RTREE_GET_LEAF(RTREE_HEIGHT_MAX-1)
+#undef RTREE_GET_SUBTREE
+#undef RTREE_GET_LEAF
+	default: not_reached();
 	}
+#undef RTREE_GET_BIAS
 	not_reached();
 }
 
@@ -268,7 +340,7 @@
 
 	start_level = rtree_start_level(rtree, key);
 
-	node = rtree_subtree_read(rtree, start_level);
+	node = rtree_subtree_read(rtree, start_level, false);
 	if (node == NULL)
 		return (true);
 	for (i = start_level; /**/; i++, node = child) {
@@ -282,7 +354,7 @@
 			return (false);
 		}
 		assert(i + 1 < rtree->height);
-		child = rtree_child_read(rtree, &node[subkey], i);
+		child = rtree_child_read(rtree, &node[subkey], i, false);
 		if (child == NULL)
 			return (true);
 	}
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 705903a..b621817 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -103,6 +103,14 @@
 	size_t		mapped;
 
 	/*
+	 * Number of bytes currently retained as a side effect of munmap() being
+	 * disabled/bypassed.  Retained bytes are technically mapped (though
+	 * always decommitted or purged), but they are excluded from the mapped
+	 * statistic (above).
+	 */
+	size_t		retained;
+
+	/*
 	 * Total number of purge sweeps, total number of madvise calls made,
 	 * and total pages purged in order to keep dirty unused memory under
 	 * control.
diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index 8357820..70883b1 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -130,27 +130,25 @@
  */
 extern tcaches_t	*tcaches;
 
-size_t	tcache_salloc(const void *ptr);
+size_t	tcache_salloc(tsdn_t *tsdn, const void *ptr);
 void	tcache_event_hard(tsd_t *tsd, tcache_t *tcache);
-void	*tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
+void	*tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     tcache_bin_t *tbin, szind_t binind, bool *tcache_success);
 void	tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
     szind_t binind, unsigned rem);
 void	tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
     unsigned rem, tcache_t *tcache);
-void	tcache_arena_associate(tcache_t *tcache, arena_t *arena);
-void	tcache_arena_reassociate(tcache_t *tcache, arena_t *oldarena,
-    arena_t *newarena);
-void	tcache_arena_dissociate(tcache_t *tcache, arena_t *arena);
+void	tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache,
+    arena_t *oldarena, arena_t *newarena);
 tcache_t *tcache_get_hard(tsd_t *tsd);
-tcache_t *tcache_create(tsd_t *tsd, arena_t *arena);
+tcache_t *tcache_create(tsdn_t *tsdn, arena_t *arena);
 void	tcache_cleanup(tsd_t *tsd);
 void	tcache_enabled_cleanup(tsd_t *tsd);
-void	tcache_stats_merge(tcache_t *tcache, arena_t *arena);
-bool	tcaches_create(tsd_t *tsd, unsigned *r_ind);
+void	tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
+bool	tcaches_create(tsdn_t *tsdn, unsigned *r_ind);
 void	tcaches_flush(tsd_t *tsd, unsigned ind);
 void	tcaches_destroy(tsd_t *tsd, unsigned ind);
-bool	tcache_boot(void);
+bool	tcache_boot(tsdn_t *tsdn);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
@@ -297,8 +295,8 @@
 		if (unlikely(arena == NULL))
 			return (NULL);
 
-		ret = tcache_alloc_small_hard(tsd, arena, tcache, tbin, binind,
-			&tcache_hard_success);
+		ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache,
+		    tbin, binind, &tcache_hard_success);
 		if (tcache_hard_success == false)
 			return (NULL);
 	}
@@ -310,7 +308,7 @@
 	 */
 	if (config_prof || (slow_path && config_fill) || unlikely(zero)) {
 		usize = index2size(binind);
-		assert(tcache_salloc(ret) == usize);
+		assert(tcache_salloc(tsd_tsdn(tsd), ret) == usize);
 	}
 
 	if (likely(!zero)) {
@@ -358,7 +356,7 @@
 		if (unlikely(arena == NULL))
 			return (NULL);
 
-		ret = arena_malloc_large(tsd, arena, binind, zero);
+		ret = arena_malloc_large(tsd_tsdn(tsd), arena, binind, zero);
 		if (ret == NULL)
 			return (NULL);
 	} else {
@@ -381,9 +379,10 @@
 		}
 		if (likely(!zero)) {
 			if (slow_path && config_fill) {
-				if (unlikely(opt_junk_alloc))
-					memset(ret, 0xa5, usize);
-				else if (unlikely(opt_zero))
+				if (unlikely(opt_junk_alloc)) {
+					memset(ret, JEMALLOC_ALLOC_JUNK,
+					    usize);
+				} else if (unlikely(opt_zero))
 					memset(ret, 0, usize);
 			}
 		} else
@@ -406,7 +405,7 @@
 	tcache_bin_t *tbin;
 	tcache_bin_info_t *tbin_info;
 
-	assert(tcache_salloc(ptr) <= SMALL_MAXCLASS);
+	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SMALL_MAXCLASS);
 
 	if (slow_path && config_fill && unlikely(opt_junk_free))
 		arena_dalloc_junk_small(ptr, &arena_bin_info[binind]);
@@ -433,8 +432,8 @@
 	tcache_bin_info_t *tbin_info;
 
 	assert((size & PAGE_MASK) == 0);
-	assert(tcache_salloc(ptr) > SMALL_MAXCLASS);
-	assert(tcache_salloc(ptr) <= tcache_maxclass);
+	assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SMALL_MAXCLASS);
+	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass);
 
 	binind = size2index(size);
 
@@ -458,8 +457,10 @@
 tcaches_get(tsd_t *tsd, unsigned ind)
 {
 	tcaches_t *elm = &tcaches[ind];
-	if (unlikely(elm->tcache == NULL))
-		elm->tcache = tcache_create(tsd, arena_choose(tsd, NULL));
+	if (unlikely(elm->tcache == NULL)) {
+		elm->tcache = tcache_create(tsd_tsdn(tsd), arena_choose(tsd,
+		    NULL));
+	}
 	return (elm->tcache);
 }
 #endif
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 16cc2f1..bf11341 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -13,6 +13,9 @@
 #endif
 
 typedef struct tsd_s tsd_t;
+typedef struct tsdn_s tsdn_t;
+
+#define	TSDN_NULL	((tsdn_t *)0)
 
 typedef enum {
 	tsd_state_uninitialized,
@@ -44,6 +47,7 @@
  * The result is a set of generated functions, e.g.:
  *
  *   bool example_tsd_boot(void) {...}
+ *   bool example_tsd_booted_get(void) {...}
  *   example_t *example_tsd_get() {...}
  *   void example_tsd_set(example_t *val) {...}
  *
@@ -98,6 +102,8 @@
 a_name##tsd_boot1(void);						\
 a_attr bool								\
 a_name##tsd_boot(void);							\
+a_attr bool								\
+a_name##tsd_booted_get(void);						\
 a_attr a_type *								\
 a_name##tsd_get(void);							\
 a_attr void								\
@@ -201,6 +207,12 @@
 									\
 	return (a_name##tsd_boot0());					\
 }									\
+a_attr bool								\
+a_name##tsd_booted_get(void)						\
+{									\
+									\
+	return (a_name##tsd_booted);					\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
 a_name##tsd_get(void)							\
@@ -246,6 +258,12 @@
 									\
 	return (a_name##tsd_boot0());					\
 }									\
+a_attr bool								\
+a_name##tsd_booted_get(void)						\
+{									\
+									\
+	return (a_name##tsd_booted);					\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
 a_name##tsd_get(void)							\
@@ -368,6 +386,12 @@
 	a_name##tsd_boot1();						\
 	return (false);							\
 }									\
+a_attr bool								\
+a_name##tsd_booted_get(void)						\
+{									\
+									\
+	return (a_name##tsd_booted);					\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
 a_name##tsd_get(void)							\
@@ -490,6 +514,12 @@
 	a_name##tsd_boot1();						\
 	return (false);							\
 }									\
+a_attr bool								\
+a_name##tsd_booted_get(void)						\
+{									\
+									\
+	return (a_name##tsd_booted);					\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
 a_name##tsd_get(void)							\
@@ -536,12 +566,15 @@
     O(thread_allocated,		uint64_t)				\
     O(thread_deallocated,	uint64_t)				\
     O(prof_tdata,		prof_tdata_t *)				\
+    O(iarena,			arena_t *)				\
     O(arena,			arena_t *)				\
     O(arenas_tdata,		arena_tdata_t *)			\
     O(narenas_tdata,		unsigned)				\
     O(arenas_tdata_bypass,	bool)					\
     O(tcache_enabled,		tcache_enabled_t)			\
     O(quarantine,		quarantine_t *)				\
+    O(witnesses,		witness_list_t)				\
+    O(witness_fork,		bool)					\
 
 #define	TSD_INITIALIZER {						\
     tsd_state_uninitialized,						\
@@ -551,10 +584,13 @@
     NULL,								\
     NULL,								\
     NULL,								\
+    NULL,								\
     0,									\
     false,								\
     tcache_enabled_default,						\
-    NULL								\
+    NULL,								\
+    ql_head_initializer(witnesses),					\
+    false								\
 }
 
 struct tsd_s {
@@ -565,6 +601,15 @@
 #undef O
 };
 
+/*
+ * Wrapper around tsd_t that makes it possible to avoid implicit conversion
+ * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be
+ * explicitly converted to tsd_t, which is non-nullable.
+ */
+struct tsdn_s {
+	tsd_t	tsd;
+};
+
 static const tsd_t tsd_initializer = TSD_INITIALIZER;
 
 malloc_tsd_types(, tsd_t)
@@ -577,7 +622,7 @@
 void	malloc_tsd_dalloc(void *wrapper);
 void	malloc_tsd_no_cleanup(void *arg);
 void	malloc_tsd_cleanup_register(bool (*f)(void));
-bool	malloc_tsd_boot0(void);
+tsd_t	*malloc_tsd_boot0(void);
 void	malloc_tsd_boot1(void);
 #if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
     !defined(_WIN32))
@@ -595,6 +640,7 @@
 malloc_tsd_protos(JEMALLOC_ATTR(unused), , tsd_t)
 
 tsd_t	*tsd_fetch(void);
+tsdn_t	*tsd_tsdn(tsd_t *tsd);
 bool	tsd_nominal(tsd_t *tsd);
 #define	O(n, t)								\
 t	*tsd_##n##p_get(tsd_t *tsd);					\
@@ -602,6 +648,9 @@
 void	tsd_##n##_set(tsd_t *tsd, t n);
 MALLOC_TSD
 #undef O
+tsdn_t	*tsdn_fetch(void);
+bool	tsdn_null(const tsdn_t *tsdn);
+tsd_t	*tsdn_tsd(tsdn_t *tsdn);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TSD_C_))
@@ -628,6 +677,13 @@
 	return (tsd);
 }
 
+JEMALLOC_ALWAYS_INLINE tsdn_t *
+tsd_tsdn(tsd_t *tsd)
+{
+
+	return ((tsdn_t *)tsd);
+}
+
 JEMALLOC_INLINE bool
 tsd_nominal(tsd_t *tsd)
 {
@@ -659,6 +715,32 @@
 }
 MALLOC_TSD
 #undef O
+
+JEMALLOC_ALWAYS_INLINE tsdn_t *
+tsdn_fetch(void)
+{
+
+	if (!tsd_booted_get())
+		return (NULL);
+
+	return (tsd_tsdn(tsd_fetch()));
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsdn_null(const tsdn_t *tsdn)
+{
+
+	return (tsdn == NULL);
+}
+
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsdn_tsd(tsdn_t *tsdn)
+{
+
+	assert(!tsdn_null(tsdn));
+
+	return (&tsdn->tsd);
+}
 #endif
 
 #endif /* JEMALLOC_H_INLINES */
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index b8885bf..a0c2203 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -40,6 +40,10 @@
  */
 #define	MALLOC_PRINTF_BUFSIZE	4096
 
+/* Junk fill patterns. */
+#define	JEMALLOC_ALLOC_JUNK	((uint8_t)0xa5)
+#define	JEMALLOC_FREE_JUNK	((uint8_t)0x5a)
+
 /*
  * Wrap a cpp argument that contains commas such that it isn't broken up into
  * multiple arguments.
@@ -73,12 +77,12 @@
       JEMALLOC_CLANG_HAS_BUILTIN(__builtin_unreachable)
 #	define unreachable() __builtin_unreachable()
 #  else
-#	define unreachable()
+#	define unreachable() abort()
 #  endif
 #else
 #	define likely(x)   !!(x)
 #	define unlikely(x) !!(x)
-#	define unreachable()
+#	define unreachable() abort()
 #endif
 
 #include "jemalloc/internal/assert.h"
@@ -106,9 +110,9 @@
  * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating
  * point math.
  */
-int	malloc_vsnprintf(char *str, size_t size, const char *format,
+size_t	malloc_vsnprintf(char *str, size_t size, const char *format,
     va_list ap);
-int	malloc_snprintf(char *str, size_t size, const char *format, ...)
+size_t	malloc_snprintf(char *str, size_t size, const char *format, ...)
     JEMALLOC_FORMAT_PRINTF(3, 4);
 void	malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *format, va_list ap);
diff --git a/include/jemalloc/internal/valgrind.h b/include/jemalloc/internal/valgrind.h
index a3380df..1a86808 100644
--- a/include/jemalloc/internal/valgrind.h
+++ b/include/jemalloc/internal/valgrind.h
@@ -30,15 +30,17 @@
  * calls must be embedded in macros rather than in functions so that when
  * Valgrind reports errors, there are no extra stack frames in the backtraces.
  */
-#define	JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do {		\
-	if (unlikely(in_valgrind && cond))				\
-		VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(ptr), zero);	\
+#define	JEMALLOC_VALGRIND_MALLOC(cond, tsdn, ptr, usize, zero) do {	\
+	if (unlikely(in_valgrind && cond)) {				\
+		VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(tsdn, ptr),	\
+		    zero);						\
+	}								\
 } while (0)
-#define	JEMALLOC_VALGRIND_REALLOC(maybe_moved, ptr, usize,		\
+#define	JEMALLOC_VALGRIND_REALLOC(maybe_moved, tsdn, ptr, usize,	\
     ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null,	\
     zero) do {								\
 	if (unlikely(in_valgrind)) {					\
-		size_t rzsize = p2rz(ptr);				\
+		size_t rzsize = p2rz(tsdn, ptr);			\
 									\
 		if (!maybe_moved || ptr == old_ptr) {			\
 			VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize,	\
@@ -81,8 +83,8 @@
 #define	JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(ptr, usize) do {} while (0)
 #define	JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize) do {} while (0)
 #define	JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ptr, usize) do {} while (0)
-#define	JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do {} while (0)
-#define	JEMALLOC_VALGRIND_REALLOC(maybe_moved, ptr, usize,		\
+#define	JEMALLOC_VALGRIND_MALLOC(cond, tsdn, ptr, usize, zero) do {} while (0)
+#define	JEMALLOC_VALGRIND_REALLOC(maybe_moved, tsdn, ptr, usize,	\
     ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null,	\
     zero) do {} while (0)
 #define	JEMALLOC_VALGRIND_FREE(ptr, rzsize) do {} while (0)
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
new file mode 100644
index 0000000..d78dca2
--- /dev/null
+++ b/include/jemalloc/internal/witness.h
@@ -0,0 +1,249 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct witness_s witness_t;
+typedef unsigned witness_rank_t;
+typedef ql_head(witness_t) witness_list_t;
+typedef int witness_comp_t (const witness_t *, const witness_t *);
+
+/*
+ * Lock ranks.  Witnesses with rank WITNESS_RANK_OMIT are completely ignored by
+ * the witness machinery.
+ */
+#define	WITNESS_RANK_OMIT		0U
+
+#define	WITNESS_RANK_INIT		1U
+#define	WITNESS_RANK_CTL		1U
+#define	WITNESS_RANK_ARENAS		2U
+
+#define	WITNESS_RANK_PROF_DUMP		3U
+#define	WITNESS_RANK_PROF_BT2GCTX	4U
+#define	WITNESS_RANK_PROF_TDATAS	5U
+#define	WITNESS_RANK_PROF_TDATA		6U
+#define	WITNESS_RANK_PROF_GCTX		7U
+
+#define	WITNESS_RANK_ARENA		8U
+#define	WITNESS_RANK_ARENA_CHUNKS	9U
+#define	WITNESS_RANK_ARENA_NODE_CACHE	10
+
+#define	WITNESS_RANK_BASE		11U
+
+#define	WITNESS_RANK_LEAF		0xffffffffU
+#define	WITNESS_RANK_ARENA_BIN		WITNESS_RANK_LEAF
+#define	WITNESS_RANK_ARENA_HUGE		WITNESS_RANK_LEAF
+#define	WITNESS_RANK_DSS		WITNESS_RANK_LEAF
+#define	WITNESS_RANK_PROF_ACTIVE	WITNESS_RANK_LEAF
+#define	WITNESS_RANK_PROF_DUMP_SEQ	WITNESS_RANK_LEAF
+#define	WITNESS_RANK_PROF_GDUMP		WITNESS_RANK_LEAF
+#define	WITNESS_RANK_PROF_NEXT_THR_UID	WITNESS_RANK_LEAF
+#define	WITNESS_RANK_PROF_THREAD_ACTIVE_INIT	WITNESS_RANK_LEAF
+
+#define	WITNESS_INITIALIZER(rank) {"initializer", rank, NULL, {NULL, NULL}}
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct witness_s {
+	/* Name, used for printing lock order reversal messages. */
+	const char		*name;
+
+	/*
+	 * Witness rank, where 0 is lowest and UINT_MAX is highest.  Witnesses
+	 * must be acquired in order of increasing rank.
+	 */
+	witness_rank_t		rank;
+
+	/*
+	 * If two witnesses are of equal rank and they have the samp comp
+	 * function pointer, it is called as a last attempt to differentiate
+	 * between witnesses of equal rank.
+	 */
+	witness_comp_t		*comp;
+
+	/* Linkage for thread's currently owned locks. */
+	ql_elm(witness_t)	link;
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+void	witness_init(witness_t *witness, const char *name, witness_rank_t rank,
+    witness_comp_t *comp);
+#ifdef JEMALLOC_JET
+typedef void (witness_lock_error_t)(const witness_list_t *, const witness_t *);
+extern witness_lock_error_t *witness_lock_error;
+#else
+void	witness_lock_error(const witness_list_t *witnesses,
+    const witness_t *witness);
+#endif
+#ifdef JEMALLOC_JET
+typedef void (witness_owner_error_t)(const witness_t *);
+extern witness_owner_error_t *witness_owner_error;
+#else
+void	witness_owner_error(const witness_t *witness);
+#endif
+#ifdef JEMALLOC_JET
+typedef void (witness_not_owner_error_t)(const witness_t *);
+extern witness_not_owner_error_t *witness_not_owner_error;
+#else
+void	witness_not_owner_error(const witness_t *witness);
+#endif
+#ifdef JEMALLOC_JET
+typedef void (witness_lockless_error_t)(const witness_list_t *);
+extern witness_lockless_error_t *witness_lockless_error;
+#else
+void	witness_lockless_error(const witness_list_t *witnesses);
+#endif
+
+void	witnesses_cleanup(tsd_t *tsd);
+void	witness_fork_cleanup(tsd_t *tsd);
+void	witness_prefork(tsd_t *tsd);
+void	witness_postfork_parent(tsd_t *tsd);
+void	witness_postfork_child(tsd_t *tsd);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+void	witness_assert_owner(tsdn_t *tsdn, const witness_t *witness);
+void	witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness);
+void	witness_assert_lockless(tsdn_t *tsdn);
+void	witness_lock(tsdn_t *tsdn, witness_t *witness);
+void	witness_unlock(tsdn_t *tsdn, witness_t *witness);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
+JEMALLOC_INLINE void
+witness_assert_owner(tsdn_t *tsdn, const witness_t *witness)
+{
+	tsd_t *tsd;
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	if (!config_debug)
+		return;
+
+	if (tsdn_null(tsdn))
+		return;
+	tsd = tsdn_tsd(tsdn);
+	if (witness->rank == WITNESS_RANK_OMIT)
+		return;
+
+	witnesses = tsd_witnessesp_get(tsd);
+	ql_foreach(w, witnesses, link) {
+		if (w == witness)
+			return;
+	}
+	witness_owner_error(witness);
+}
+
+JEMALLOC_INLINE void
+witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness)
+{
+	tsd_t *tsd;
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	if (!config_debug)
+		return;
+
+	if (tsdn_null(tsdn))
+		return;
+	tsd = tsdn_tsd(tsdn);
+	if (witness->rank == WITNESS_RANK_OMIT)
+		return;
+
+	witnesses = tsd_witnessesp_get(tsd);
+	ql_foreach(w, witnesses, link) {
+		if (w == witness)
+			witness_not_owner_error(witness);
+	}
+}
+
+JEMALLOC_INLINE void
+witness_assert_lockless(tsdn_t *tsdn)
+{
+	tsd_t *tsd;
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	if (!config_debug)
+		return;
+
+	if (tsdn_null(tsdn))
+		return;
+	tsd = tsdn_tsd(tsdn);
+
+	witnesses = tsd_witnessesp_get(tsd);
+	w = ql_last(witnesses, link);
+	if (w != NULL)
+		witness_lockless_error(witnesses);
+}
+
+JEMALLOC_INLINE void
+witness_lock(tsdn_t *tsdn, witness_t *witness)
+{
+	tsd_t *tsd;
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	if (!config_debug)
+		return;
+
+	if (tsdn_null(tsdn))
+		return;
+	tsd = tsdn_tsd(tsdn);
+	if (witness->rank == WITNESS_RANK_OMIT)
+		return;
+
+	witness_assert_not_owner(tsdn, witness);
+
+	witnesses = tsd_witnessesp_get(tsd);
+	w = ql_last(witnesses, link);
+	if (w == NULL) {
+		/* No other locks; do nothing. */
+	} else if (tsd_witness_fork_get(tsd) && w->rank <= witness->rank) {
+		/* Forking, and relaxed ranking satisfied. */
+	} else if (w->rank > witness->rank) {
+		/* Not forking, rank order reversal. */
+		witness_lock_error(witnesses, witness);
+	} else if (w->rank == witness->rank && (w->comp == NULL || w->comp !=
+	    witness->comp || w->comp(w, witness) > 0)) {
+		/*
+		 * Missing/incompatible comparison function, or comparison
+		 * function indicates rank order reversal.
+		 */
+		witness_lock_error(witnesses, witness);
+	}
+
+	ql_elm_new(witness, link);
+	ql_tail_insert(witnesses, witness, link);
+}
+
+JEMALLOC_INLINE void
+witness_unlock(tsdn_t *tsdn, witness_t *witness)
+{
+	tsd_t *tsd;
+	witness_list_t *witnesses;
+
+	if (!config_debug)
+		return;
+
+	if (tsdn_null(tsdn))
+		return;
+	tsd = tsdn_tsd(tsdn);
+	if (witness->rank == WITNESS_RANK_OMIT)
+		return;
+
+	witness_assert_owner(tsdn, witness);
+
+	witnesses = tsd_witnessesp_get(tsd);
+	ql_remove(witnesses, witness, link);
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in
index 9f356f9..129240e 100644
--- a/include/jemalloc/jemalloc_macros.h.in
+++ b/include/jemalloc/jemalloc_macros.h.in
@@ -13,11 +13,11 @@
 
 #  define MALLOCX_LG_ALIGN(la)	((int)(la))
 #  if LG_SIZEOF_PTR == 2
-#    define MALLOCX_ALIGN(a)	((int)(ffs(a)-1))
+#    define MALLOCX_ALIGN(a)	((int)(ffs((int)(a))-1))
 #  else
 #    define MALLOCX_ALIGN(a)						\
-       ((int)(((a) < (size_t)INT_MAX) ? ffs((int)(a))-1 :		\
-       ffs((int)((a)>>32))+31))
+       ((int)(((size_t)(a) < (size_t)INT_MAX) ? ffs((int)(a))-1 :	\
+       ffs((int)(((size_t)(a))>>32))+31))
 #  endif
 #  define MALLOCX_ZERO	((int)0x40)
 /*
@@ -29,7 +29,7 @@
 /*
  * Bias arena index bits so that 0 encodes "use an automatically chosen arena".
  */
-#  define MALLOCX_ARENA(a)	((int)(((a)+1) << 20))
+#  define MALLOCX_ARENA(a)	((((int)(a))+1) << 20)
 
 #if defined(__cplusplus) && defined(JEMALLOC_USE_CXX_THROW)
 #  define JEMALLOC_CXX_THROW throw()
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index f3f0260..9315022 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -56,6 +56,7 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\mutex.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\nstime.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\pages.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ph.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\private_namespace.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\private_unnamespace.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\prng.h" />
@@ -250,7 +251,7 @@
       <Optimization>Disabled</Optimization>
       <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
       <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
     </ClCompile>
     <Link>
@@ -267,7 +268,7 @@
       <PreprocessorDefinitions>JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
       <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
     </ClCompile>
     <Link>
@@ -283,7 +284,7 @@
       <Optimization>Disabled</Optimization>
       <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
       <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
     </ClCompile>
     <Link>
@@ -300,8 +301,9 @@
       <PreprocessorDefinitions>JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
-      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <DebugInformationFormat>OldStyle</DebugInformationFormat>
+      <MinimalRebuild>false</MinimalRebuild>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
@@ -318,7 +320,7 @@
       <IntrinsicFunctions>true</IntrinsicFunctions>
       <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
       <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
     </ClCompile>
     <Link>
@@ -339,7 +341,7 @@
       <PreprocessorDefinitions>_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
       <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
     </ClCompile>
     <Link>
@@ -359,7 +361,7 @@
       <IntrinsicFunctions>true</IntrinsicFunctions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
       <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
     </ClCompile>
     <Link>
@@ -380,8 +382,8 @@
       <PreprocessorDefinitions>_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
-      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <DebugInformationFormat>OldStyle</DebugInformationFormat>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index ce70632..88c15ef 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -107,6 +107,9 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\pages.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ph.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\private_namespace.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
diff --git a/src/arena.c b/src/arena.c
index 48e9b20..c605bcd 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -37,11 +37,12 @@
  * definition.
  */
 
-static void	arena_purge_to_limit(arena_t *arena, size_t ndirty_limit);
-static void	arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty,
-    bool cleaned, bool decommitted);
-static void	arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk,
-    arena_run_t *run, arena_bin_t *bin);
+static void	arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena,
+    size_t ndirty_limit);
+static void	arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, arena_run_t *run,
+    bool dirty, bool cleaned, bool decommitted);
+static void	arena_dalloc_bin_run(tsdn_t *tsdn, arena_t *arena,
+    arena_chunk_t *chunk, arena_run_t *run, arena_bin_t *bin);
 static void	arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk,
     arena_run_t *run, arena_bin_t *bin);
 
@@ -72,9 +73,9 @@
 	return ((a_miscelm > b_miscelm) - (a_miscelm < b_miscelm));
 }
 
-/* Generate red-black tree functions. */
-rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_misc_t,
-    rb_link, arena_run_addr_comp)
+/* Generate pairing heap functions. */
+ph_gen(static UNUSED, arena_run_heap_, arena_run_heap_t, arena_chunk_map_misc_t,
+    ph_link, arena_run_addr_comp)
 
 static size_t
 run_quantize_floor_compute(size_t size)
@@ -155,7 +156,7 @@
 
 #ifdef JEMALLOC_JET
 #undef run_quantize_floor
-#define	run_quantize_floor JEMALLOC_N(run_quantize_floor_impl)
+#define	run_quantize_floor JEMALLOC_N(n_run_quantize_floor)
 #endif
 static size_t
 run_quantize_floor(size_t size)
@@ -173,12 +174,12 @@
 #ifdef JEMALLOC_JET
 #undef run_quantize_floor
 #define	run_quantize_floor JEMALLOC_N(run_quantize_floor)
-run_quantize_t *run_quantize_floor = JEMALLOC_N(run_quantize_floor_impl);
+run_quantize_t *run_quantize_floor = JEMALLOC_N(n_run_quantize_floor);
 #endif
 
 #ifdef JEMALLOC_JET
 #undef run_quantize_ceil
-#define	run_quantize_ceil JEMALLOC_N(run_quantize_ceil_impl)
+#define	run_quantize_ceil JEMALLOC_N(n_run_quantize_ceil)
 #endif
 static size_t
 run_quantize_ceil(size_t size)
@@ -196,10 +197,10 @@
 #ifdef JEMALLOC_JET
 #undef run_quantize_ceil
 #define	run_quantize_ceil JEMALLOC_N(run_quantize_ceil)
-run_quantize_t *run_quantize_ceil = JEMALLOC_N(run_quantize_ceil_impl);
+run_quantize_t *run_quantize_ceil = JEMALLOC_N(n_run_quantize_ceil);
 #endif
 
-static arena_run_tree_t *
+static arena_run_heap_t *
 arena_runs_avail_get(arena_t *arena, szind_t ind)
 {
 
@@ -214,11 +215,11 @@
     size_t npages)
 {
 	szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get(
-	    arena_miscelm_get(chunk, pageind))));
+	    arena_miscelm_get_const(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
-	arena_run_tree_insert(arena_runs_avail_get(arena, ind),
-	    arena_miscelm_get(chunk, pageind));
+	arena_run_heap_insert(arena_runs_avail_get(arena, ind),
+	    arena_miscelm_get_mutable(chunk, pageind));
 }
 
 static void
@@ -226,18 +227,19 @@
     size_t npages)
 {
 	szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get(
-	    arena_miscelm_get(chunk, pageind))));
+	    arena_miscelm_get_const(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
-	arena_run_tree_remove(arena_runs_avail_get(arena, ind),
-	    arena_miscelm_get(chunk, pageind));
+	arena_run_heap_remove(arena_runs_avail_get(arena, ind),
+	    arena_miscelm_get_mutable(chunk, pageind));
 }
 
 static void
 arena_run_dirty_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
     size_t npages)
 {
-	arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind);
+	arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk,
+	    pageind);
 
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
@@ -254,7 +256,8 @@
 arena_run_dirty_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
     size_t npages)
 {
-	arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind);
+	arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk,
+	    pageind);
 
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
@@ -589,7 +592,8 @@
 }
 
 static bool
-arena_chunk_register(arena_t *arena, arena_chunk_t *chunk, bool zero)
+arena_chunk_register(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
+    bool zero)
 {
 
 	/*
@@ -600,62 +604,63 @@
 	 */
 	extent_node_init(&chunk->node, arena, chunk, chunksize, zero, true);
 	extent_node_achunk_set(&chunk->node, true);
-	return (chunk_register(chunk, &chunk->node));
+	return (chunk_register(tsdn, chunk, &chunk->node));
 }
 
 static arena_chunk_t *
-arena_chunk_alloc_internal_hard(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    bool *zero, bool *commit)
+arena_chunk_alloc_internal_hard(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, bool *zero, bool *commit)
 {
 	arena_chunk_t *chunk;
 
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 
-	chunk = (arena_chunk_t *)chunk_alloc_wrapper(arena, chunk_hooks, NULL,
-	    chunksize, chunksize, zero, commit);
+	chunk = (arena_chunk_t *)chunk_alloc_wrapper(tsdn, arena, chunk_hooks,
+	    NULL, chunksize, chunksize, zero, commit);
 	if (chunk != NULL && !*commit) {
 		/* Commit header. */
 		if (chunk_hooks->commit(chunk, chunksize, 0, map_bias <<
 		    LG_PAGE, arena->ind)) {
-			chunk_dalloc_wrapper(arena, chunk_hooks, (void *)chunk,
-			    chunksize, *zero, *commit);
+			chunk_dalloc_wrapper(tsdn, arena, chunk_hooks,
+			    (void *)chunk, chunksize, *zero, *commit);
 			chunk = NULL;
 		}
 	}
-	if (chunk != NULL && arena_chunk_register(arena, chunk, *zero)) {
+	if (chunk != NULL && arena_chunk_register(tsdn, arena, chunk, *zero)) {
 		if (!*commit) {
 			/* Undo commit of header. */
 			chunk_hooks->decommit(chunk, chunksize, 0, map_bias <<
 			    LG_PAGE, arena->ind);
 		}
-		chunk_dalloc_wrapper(arena, chunk_hooks, (void *)chunk,
+		chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, (void *)chunk,
 		    chunksize, *zero, *commit);
 		chunk = NULL;
 	}
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	return (chunk);
 }
 
 static arena_chunk_t *
-arena_chunk_alloc_internal(arena_t *arena, bool *zero, bool *commit)
+arena_chunk_alloc_internal(tsdn_t *tsdn, arena_t *arena, bool *zero,
+    bool *commit)
 {
 	arena_chunk_t *chunk;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 
-	chunk = chunk_alloc_cache(arena, &chunk_hooks, NULL, chunksize,
+	chunk = chunk_alloc_cache(tsdn, arena, &chunk_hooks, NULL, chunksize,
 	    chunksize, zero, true);
 	if (chunk != NULL) {
-		if (arena_chunk_register(arena, chunk, *zero)) {
-			chunk_dalloc_cache(arena, &chunk_hooks, chunk,
+		if (arena_chunk_register(tsdn, arena, chunk, *zero)) {
+			chunk_dalloc_cache(tsdn, arena, &chunk_hooks, chunk,
 			    chunksize, true);
 			return (NULL);
 		}
 		*commit = true;
 	}
 	if (chunk == NULL) {
-		chunk = arena_chunk_alloc_internal_hard(arena, &chunk_hooks,
-		    zero, commit);
+		chunk = arena_chunk_alloc_internal_hard(tsdn, arena,
+		    &chunk_hooks, zero, commit);
 	}
 
 	if (config_stats && chunk != NULL) {
@@ -667,7 +672,7 @@
 }
 
 static arena_chunk_t *
-arena_chunk_init_hard(arena_t *arena)
+arena_chunk_init_hard(tsdn_t *tsdn, arena_t *arena)
 {
 	arena_chunk_t *chunk;
 	bool zero, commit;
@@ -677,14 +682,14 @@
 
 	zero = false;
 	commit = false;
-	chunk = arena_chunk_alloc_internal(arena, &zero, &commit);
+	chunk = arena_chunk_alloc_internal(tsdn, arena, &zero, &commit);
 	if (chunk == NULL)
 		return (NULL);
 
 	/*
 	 * Initialize the map to contain one maximal free untouched run.  Mark
-	 * the pages as zeroed if chunk_alloc() returned a zeroed or decommitted
-	 * chunk.
+	 * the pages as zeroed if arena_chunk_alloc_internal() returned a zeroed
+	 * or decommitted chunk.
 	 */
 	flag_unzeroed = (zero || !commit) ? 0 : CHUNK_MAP_UNZEROED;
 	flag_decommitted = commit ? 0 : CHUNK_MAP_DECOMMITTED;
@@ -696,17 +701,18 @@
 	 */
 	if (!zero) {
 		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(
-		    (void *)arena_bitselm_get(chunk, map_bias+1),
-		    (size_t)((uintptr_t) arena_bitselm_get(chunk,
-		    chunk_npages-1) - (uintptr_t)arena_bitselm_get(chunk,
-		    map_bias+1)));
+		    (void *)arena_bitselm_get_const(chunk, map_bias+1),
+		    (size_t)((uintptr_t)arena_bitselm_get_const(chunk,
+		    chunk_npages-1) -
+		    (uintptr_t)arena_bitselm_get_const(chunk, map_bias+1)));
 		for (i = map_bias+1; i < chunk_npages-1; i++)
 			arena_mapbits_internal_set(chunk, i, flag_unzeroed);
 	} else {
 		JEMALLOC_VALGRIND_MAKE_MEM_DEFINED((void
-		    *)arena_bitselm_get(chunk, map_bias+1), (size_t)((uintptr_t)
-		    arena_bitselm_get(chunk, chunk_npages-1) -
-		    (uintptr_t)arena_bitselm_get(chunk, map_bias+1)));
+		    *)arena_bitselm_get_const(chunk, map_bias+1),
+		    (size_t)((uintptr_t)arena_bitselm_get_const(chunk,
+		    chunk_npages-1) -
+		    (uintptr_t)arena_bitselm_get_const(chunk, map_bias+1)));
 		if (config_debug) {
 			for (i = map_bias+1; i < chunk_npages-1; i++) {
 				assert(arena_mapbits_unzeroed_get(chunk, i) ==
@@ -721,26 +727,73 @@
 }
 
 static arena_chunk_t *
-arena_chunk_alloc(arena_t *arena)
+arena_chunk_alloc(tsdn_t *tsdn, arena_t *arena)
 {
 	arena_chunk_t *chunk;
 
 	if (arena->spare != NULL)
 		chunk = arena_chunk_init_spare(arena);
 	else {
-		chunk = arena_chunk_init_hard(arena);
+		chunk = arena_chunk_init_hard(tsdn, arena);
 		if (chunk == NULL)
 			return (NULL);
 	}
 
+	ql_elm_new(&chunk->node, ql_link);
+	ql_tail_insert(&arena->achunks, &chunk->node, ql_link);
 	arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias);
 
 	return (chunk);
 }
 
 static void
-arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk)
+arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk)
 {
+	bool committed;
+	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
+
+	chunk_deregister(chunk, &chunk->node);
+
+	committed = (arena_mapbits_decommitted_get(chunk, map_bias) == 0);
+	if (!committed) {
+		/*
+		 * Decommit the header.  Mark the chunk as decommitted even if
+		 * header decommit fails, since treating a partially committed
+		 * chunk as committed has a high potential for causing later
+		 * access of decommitted memory.
+		 */
+		chunk_hooks = chunk_hooks_get(tsdn, arena);
+		chunk_hooks.decommit(chunk, chunksize, 0, map_bias << LG_PAGE,
+		    arena->ind);
+	}
+
+	chunk_dalloc_cache(tsdn, arena, &chunk_hooks, (void *)chunk, chunksize,
+	    committed);
+
+	if (config_stats) {
+		arena->stats.mapped -= chunksize;
+		arena->stats.metadata_mapped -= (map_bias << LG_PAGE);
+	}
+}
+
+static void
+arena_spare_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *spare)
+{
+
+	assert(arena->spare != spare);
+
+	if (arena_mapbits_dirty_get(spare, map_bias) != 0) {
+		arena_run_dirty_remove(arena, spare, map_bias,
+		    chunk_npages-map_bias);
+	}
+
+	arena_chunk_discard(tsdn, arena, spare);
+}
+
+static void
+arena_chunk_dalloc(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk)
+{
+	arena_chunk_t *spare;
 
 	assert(arena_mapbits_allocated_get(chunk, map_bias) == 0);
 	assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0);
@@ -756,43 +809,11 @@
 	/* Remove run from runs_avail, so that the arena does not use it. */
 	arena_avail_remove(arena, chunk, map_bias, chunk_npages-map_bias);
 
-	if (arena->spare != NULL) {
-		arena_chunk_t *spare = arena->spare;
-		chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
-		bool committed;
-
-		arena->spare = chunk;
-		if (arena_mapbits_dirty_get(spare, map_bias) != 0) {
-			arena_run_dirty_remove(arena, spare, map_bias,
-			    chunk_npages-map_bias);
-		}
-
-		chunk_deregister(spare, &spare->node);
-
-		committed = (arena_mapbits_decommitted_get(spare, map_bias) ==
-		    0);
-		if (!committed) {
-			/*
-			 * Decommit the header.  Mark the chunk as decommitted
-			 * even if header decommit fails, since treating a
-			 * partially committed chunk as committed has a high
-			 * potential for causing later access of decommitted
-			 * memory.
-			 */
-			chunk_hooks = chunk_hooks_get(arena);
-			chunk_hooks.decommit(spare, chunksize, 0, map_bias <<
-			    LG_PAGE, arena->ind);
-		}
-
-		chunk_dalloc_cache(arena, &chunk_hooks, (void *)spare,
-		    chunksize, committed);
-
-		if (config_stats) {
-			arena->stats.mapped -= chunksize;
-			arena->stats.metadata_mapped -= (map_bias << LG_PAGE);
-		}
-	} else
-		arena->spare = chunk;
+	ql_remove(&arena->achunks, &chunk->node, ql_link);
+	spare = arena->spare;
+	arena->spare = chunk;
+	if (spare != NULL)
+		arena_spare_discard(tsdn, arena, spare);
 }
 
 static void
@@ -835,6 +856,17 @@
 }
 
 static void
+arena_huge_reset_stats_cancel(arena_t *arena, size_t usize)
+{
+	szind_t index = size2index(usize) - nlclasses - NBINS;
+
+	cassert(config_stats);
+
+	arena->stats.ndalloc_huge++;
+	arena->stats.hstats[index].ndalloc--;
+}
+
+static void
 arena_huge_dalloc_stats_update_undo(arena_t *arena, size_t usize)
 {
 	szind_t index = size2index(usize) - nlclasses - NBINS;
@@ -865,63 +897,64 @@
 }
 
 extent_node_t *
-arena_node_alloc(arena_t *arena)
+arena_node_alloc(tsdn_t *tsdn, arena_t *arena)
 {
 	extent_node_t *node;
 
-	malloc_mutex_lock(&arena->node_cache_mtx);
+	malloc_mutex_lock(tsdn, &arena->node_cache_mtx);
 	node = ql_last(&arena->node_cache, ql_link);
 	if (node == NULL) {
-		malloc_mutex_unlock(&arena->node_cache_mtx);
-		return (base_alloc(sizeof(extent_node_t)));
+		malloc_mutex_unlock(tsdn, &arena->node_cache_mtx);
+		return (base_alloc(tsdn, sizeof(extent_node_t)));
 	}
 	ql_tail_remove(&arena->node_cache, extent_node_t, ql_link);
-	malloc_mutex_unlock(&arena->node_cache_mtx);
+	malloc_mutex_unlock(tsdn, &arena->node_cache_mtx);
 	return (node);
 }
 
 void
-arena_node_dalloc(arena_t *arena, extent_node_t *node)
+arena_node_dalloc(tsdn_t *tsdn, arena_t *arena, extent_node_t *node)
 {
 
-	malloc_mutex_lock(&arena->node_cache_mtx);
+	malloc_mutex_lock(tsdn, &arena->node_cache_mtx);
 	ql_elm_new(node, ql_link);
 	ql_tail_insert(&arena->node_cache, node, ql_link);
-	malloc_mutex_unlock(&arena->node_cache_mtx);
+	malloc_mutex_unlock(tsdn, &arena->node_cache_mtx);
 }
 
 static void *
-arena_chunk_alloc_huge_hard(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    size_t usize, size_t alignment, bool *zero, size_t csize)
+arena_chunk_alloc_huge_hard(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, size_t usize, size_t alignment, bool *zero,
+    size_t csize)
 {
 	void *ret;
 	bool commit = true;
 
-	ret = chunk_alloc_wrapper(arena, chunk_hooks, NULL, csize, alignment,
-	    zero, &commit);
+	ret = chunk_alloc_wrapper(tsdn, arena, chunk_hooks, NULL, csize,
+	    alignment, zero, &commit);
 	if (ret == NULL) {
 		/* Revert optimistic stats updates. */
-		malloc_mutex_lock(&arena->lock);
+		malloc_mutex_lock(tsdn, &arena->lock);
 		if (config_stats) {
 			arena_huge_malloc_stats_update_undo(arena, usize);
 			arena->stats.mapped -= usize;
 		}
 		arena_nactive_sub(arena, usize >> LG_PAGE);
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 	}
 
 	return (ret);
 }
 
 void *
-arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment,
-    bool *zero)
+arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize,
+    size_t alignment, bool *zero)
 {
 	void *ret;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 	size_t csize = CHUNK_CEILING(usize);
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 
 	/* Optimistically update stats. */
 	if (config_stats) {
@@ -930,61 +963,61 @@
 	}
 	arena_nactive_add(arena, usize >> LG_PAGE);
 
-	ret = chunk_alloc_cache(arena, &chunk_hooks, NULL, csize, alignment,
-	    zero, true);
-	malloc_mutex_unlock(&arena->lock);
+	ret = chunk_alloc_cache(tsdn, arena, &chunk_hooks, NULL, csize,
+	    alignment, zero, true);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	if (ret == NULL) {
-		ret = arena_chunk_alloc_huge_hard(arena, &chunk_hooks, usize,
-		    alignment, zero, csize);
+		ret = arena_chunk_alloc_huge_hard(tsdn, arena, &chunk_hooks,
+		    usize, alignment, zero, csize);
 	}
 
 	return (ret);
 }
 
 void
-arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t usize)
+arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, void *chunk, size_t usize)
 {
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 	size_t csize;
 
 	csize = CHUNK_CEILING(usize);
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	if (config_stats) {
 		arena_huge_dalloc_stats_update(arena, usize);
 		arena->stats.mapped -= usize;
 	}
 	arena_nactive_sub(arena, usize >> LG_PAGE);
 
-	chunk_dalloc_cache(arena, &chunk_hooks, chunk, csize, true);
-	malloc_mutex_unlock(&arena->lock);
+	chunk_dalloc_cache(tsdn, arena, &chunk_hooks, chunk, csize, true);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
 void
-arena_chunk_ralloc_huge_similar(arena_t *arena, void *chunk, size_t oldsize,
-    size_t usize)
+arena_chunk_ralloc_huge_similar(tsdn_t *tsdn, arena_t *arena, void *chunk,
+    size_t oldsize, size_t usize)
 {
 
 	assert(CHUNK_CEILING(oldsize) == CHUNK_CEILING(usize));
 	assert(oldsize != usize);
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	if (config_stats)
 		arena_huge_ralloc_stats_update(arena, oldsize, usize);
 	if (oldsize < usize)
 		arena_nactive_add(arena, (usize - oldsize) >> LG_PAGE);
 	else
 		arena_nactive_sub(arena, (oldsize - usize) >> LG_PAGE);
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
 void
-arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk, size_t oldsize,
-    size_t usize)
+arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena, void *chunk,
+    size_t oldsize, size_t usize)
 {
 	size_t udiff = oldsize - usize;
 	size_t cdiff = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize);
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	if (config_stats) {
 		arena_huge_ralloc_stats_update(arena, oldsize, usize);
 		if (cdiff != 0)
@@ -997,51 +1030,52 @@
 		void *nchunk = (void *)((uintptr_t)chunk +
 		    CHUNK_CEILING(usize));
 
-		chunk_dalloc_cache(arena, &chunk_hooks, nchunk, cdiff, true);
+		chunk_dalloc_cache(tsdn, arena, &chunk_hooks, nchunk, cdiff,
+		    true);
 	}
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
 static bool
-arena_chunk_ralloc_huge_expand_hard(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *chunk, size_t oldsize, size_t usize, bool *zero, void *nchunk,
-    size_t udiff, size_t cdiff)
+arena_chunk_ralloc_huge_expand_hard(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *chunk, size_t oldsize, size_t usize,
+    bool *zero, void *nchunk, size_t udiff, size_t cdiff)
 {
 	bool err;
 	bool commit = true;
 
-	err = (chunk_alloc_wrapper(arena, chunk_hooks, nchunk, cdiff, chunksize,
-	    zero, &commit) == NULL);
+	err = (chunk_alloc_wrapper(tsdn, arena, chunk_hooks, nchunk, cdiff,
+	    chunksize, zero, &commit) == NULL);
 	if (err) {
 		/* Revert optimistic stats updates. */
-		malloc_mutex_lock(&arena->lock);
+		malloc_mutex_lock(tsdn, &arena->lock);
 		if (config_stats) {
 			arena_huge_ralloc_stats_update_undo(arena, oldsize,
 			    usize);
 			arena->stats.mapped -= cdiff;
 		}
 		arena_nactive_sub(arena, udiff >> LG_PAGE);
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 	} else if (chunk_hooks->merge(chunk, CHUNK_CEILING(oldsize), nchunk,
 	    cdiff, true, arena->ind)) {
-		chunk_dalloc_wrapper(arena, chunk_hooks, nchunk, cdiff, *zero,
-		    true);
+		chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, nchunk, cdiff,
+		    *zero, true);
 		err = true;
 	}
 	return (err);
 }
 
 bool
-arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize,
-    size_t usize, bool *zero)
+arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, void *chunk,
+    size_t oldsize, size_t usize, bool *zero)
 {
 	bool err;
-	chunk_hooks_t chunk_hooks = chunk_hooks_get(arena);
+	chunk_hooks_t chunk_hooks = chunk_hooks_get(tsdn, arena);
 	void *nchunk = (void *)((uintptr_t)chunk + CHUNK_CEILING(oldsize));
 	size_t udiff = usize - oldsize;
 	size_t cdiff = CHUNK_CEILING(usize) - CHUNK_CEILING(oldsize);
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 
 	/* Optimistically update stats. */
 	if (config_stats) {
@@ -1050,17 +1084,17 @@
 	}
 	arena_nactive_add(arena, udiff >> LG_PAGE);
 
-	err = (chunk_alloc_cache(arena, &chunk_hooks, nchunk, cdiff, chunksize,
-	    zero, true) == NULL);
-	malloc_mutex_unlock(&arena->lock);
+	err = (chunk_alloc_cache(tsdn, arena, &chunk_hooks, nchunk, cdiff,
+	    chunksize, zero, true) == NULL);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	if (err) {
-		err = arena_chunk_ralloc_huge_expand_hard(arena, &chunk_hooks,
-		    chunk, oldsize, usize, zero, nchunk, udiff,
+		err = arena_chunk_ralloc_huge_expand_hard(tsdn, arena,
+		    &chunk_hooks, chunk, oldsize, usize, zero, nchunk, udiff,
 		    cdiff);
 	} else if (chunk_hooks.merge(chunk, CHUNK_CEILING(oldsize), nchunk,
 	    cdiff, true, arena->ind)) {
-		chunk_dalloc_wrapper(arena, &chunk_hooks, nchunk, cdiff, *zero,
-		    true);
+		chunk_dalloc_wrapper(tsdn, arena, &chunk_hooks, nchunk, cdiff,
+		    *zero, true);
 		err = true;
 	}
 
@@ -1079,7 +1113,7 @@
 
 	ind = size2index(run_quantize_ceil(size));
 	for (i = ind; i < runs_avail_nclasses + runs_avail_bias; i++) {
-		arena_chunk_map_misc_t *miscelm = arena_run_tree_first(
+		arena_chunk_map_misc_t *miscelm = arena_run_heap_first(
 		    arena_runs_avail_get(arena, i));
 		if (miscelm != NULL)
 			return (&miscelm->run);
@@ -1100,7 +1134,7 @@
 }
 
 static arena_run_t *
-arena_run_alloc_large(arena_t *arena, size_t size, bool zero)
+arena_run_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t size, bool zero)
 {
 	arena_chunk_t *chunk;
 	arena_run_t *run;
@@ -1116,9 +1150,9 @@
 	/*
 	 * No usable runs.  Create a new chunk from which to allocate the run.
 	 */
-	chunk = arena_chunk_alloc(arena);
+	chunk = arena_chunk_alloc(tsdn, arena);
 	if (chunk != NULL) {
-		run = &arena_miscelm_get(chunk, map_bias)->run;
+		run = &arena_miscelm_get_mutable(chunk, map_bias)->run;
 		if (arena_run_split_large(arena, run, size, zero))
 			run = NULL;
 		return (run);
@@ -1144,7 +1178,7 @@
 }
 
 static arena_run_t *
-arena_run_alloc_small(arena_t *arena, size_t size, szind_t binind)
+arena_run_alloc_small(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t binind)
 {
 	arena_chunk_t *chunk;
 	arena_run_t *run;
@@ -1161,9 +1195,9 @@
 	/*
 	 * No usable runs.  Create a new chunk from which to allocate the run.
 	 */
-	chunk = arena_chunk_alloc(arena);
+	chunk = arena_chunk_alloc(tsdn, arena);
 	if (chunk != NULL) {
-		run = &arena_miscelm_get(chunk, map_bias)->run;
+		run = &arena_miscelm_get_mutable(chunk, map_bias)->run;
 		if (arena_run_split_small(arena, run, size, binind))
 			run = NULL;
 		return (run);
@@ -1186,28 +1220,28 @@
 }
 
 ssize_t
-arena_lg_dirty_mult_get(arena_t *arena)
+arena_lg_dirty_mult_get(tsdn_t *tsdn, arena_t *arena)
 {
 	ssize_t lg_dirty_mult;
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	lg_dirty_mult = arena->lg_dirty_mult;
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	return (lg_dirty_mult);
 }
 
 bool
-arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult)
+arena_lg_dirty_mult_set(tsdn_t *tsdn, arena_t *arena, ssize_t lg_dirty_mult)
 {
 
 	if (!arena_lg_dirty_mult_valid(lg_dirty_mult))
 		return (true);
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	arena->lg_dirty_mult = lg_dirty_mult;
-	arena_maybe_purge(arena);
-	malloc_mutex_unlock(&arena->lock);
+	arena_maybe_purge(tsdn, arena);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	return (false);
 }
@@ -1265,7 +1299,7 @@
 	sum = 0;
 	for (i = 0; i < SMOOTHSTEP_NSTEPS; i++)
 		sum += arena->decay_backlog[i] * h_steps[i];
-	npages_limit_backlog = (sum >> SMOOTHSTEP_BFP);
+	npages_limit_backlog = (size_t)(sum >> SMOOTHSTEP_BFP);
 
 	return (npages_limit_backlog);
 }
@@ -1273,7 +1307,7 @@
 static void
 arena_decay_epoch_advance(arena_t *arena, const nstime_t *time)
 {
-	uint64_t nadvance;
+	uint64_t nadvance_u64;
 	nstime_t delta;
 	size_t ndirty_delta;
 
@@ -1282,27 +1316,31 @@
 
 	nstime_copy(&delta, time);
 	nstime_subtract(&delta, &arena->decay_epoch);
-	nadvance = nstime_divide(&delta, &arena->decay_interval);
-	assert(nadvance > 0);
+	nadvance_u64 = nstime_divide(&delta, &arena->decay_interval);
+	assert(nadvance_u64 > 0);
 
-	/* Add nadvance decay intervals to epoch. */
+	/* Add nadvance_u64 decay intervals to epoch. */
 	nstime_copy(&delta, &arena->decay_interval);
-	nstime_imultiply(&delta, nadvance);
+	nstime_imultiply(&delta, nadvance_u64);
 	nstime_add(&arena->decay_epoch, &delta);
 
 	/* Set a new deadline. */
 	arena_decay_deadline_init(arena);
 
 	/* Update the backlog. */
-	if (nadvance >= SMOOTHSTEP_NSTEPS) {
+	if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) {
 		memset(arena->decay_backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
 		    sizeof(size_t));
 	} else {
-		memmove(arena->decay_backlog, &arena->decay_backlog[nadvance],
-		    (SMOOTHSTEP_NSTEPS - nadvance) * sizeof(size_t));
-		if (nadvance > 1) {
+		size_t nadvance_z = (size_t)nadvance_u64;
+
+		assert((uint64_t)nadvance_z == nadvance_u64);
+
+		memmove(arena->decay_backlog, &arena->decay_backlog[nadvance_z],
+		    (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t));
+		if (nadvance_z > 1) {
 			memset(&arena->decay_backlog[SMOOTHSTEP_NSTEPS -
-			    nadvance], 0, (nadvance-1) * sizeof(size_t));
+			    nadvance_z], 0, (nadvance_z-1) * sizeof(size_t));
 		}
 	}
 	ndirty_delta = (arena->ndirty > arena->decay_ndirty) ? arena->ndirty -
@@ -1352,29 +1390,33 @@
 arena_decay_time_valid(ssize_t decay_time)
 {
 
-	return (decay_time >= -1 && decay_time <= NSTIME_SEC_MAX);
+	if (decay_time < -1)
+		return (false);
+	if (decay_time == -1 || (uint64_t)decay_time <= NSTIME_SEC_MAX)
+		return (true);
+	return (false);
 }
 
 ssize_t
-arena_decay_time_get(arena_t *arena)
+arena_decay_time_get(tsdn_t *tsdn, arena_t *arena)
 {
 	ssize_t decay_time;
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	decay_time = arena->decay_time;
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	return (decay_time);
 }
 
 bool
-arena_decay_time_set(arena_t *arena, ssize_t decay_time)
+arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time)
 {
 
 	if (!arena_decay_time_valid(decay_time))
 		return (true);
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	/*
 	 * Restart decay backlog from scratch, which may cause many dirty pages
 	 * to be immediately purged.  It would conceptually be possible to map
@@ -1384,14 +1426,14 @@
 	 * arbitrary change during initial arena configuration.
 	 */
 	arena_decay_init(arena, decay_time);
-	arena_maybe_purge(arena);
-	malloc_mutex_unlock(&arena->lock);
+	arena_maybe_purge(tsdn, arena);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	return (false);
 }
 
 static void
-arena_maybe_purge_ratio(arena_t *arena)
+arena_maybe_purge_ratio(tsdn_t *tsdn, arena_t *arena)
 {
 
 	assert(opt_purge == purge_mode_ratio);
@@ -1414,12 +1456,12 @@
 		 */
 		if (arena->ndirty <= threshold)
 			return;
-		arena_purge_to_limit(arena, threshold);
+		arena_purge_to_limit(tsdn, arena, threshold);
 	}
 }
 
 static void
-arena_maybe_purge_decay(arena_t *arena)
+arena_maybe_purge_decay(tsdn_t *tsdn, arena_t *arena)
 {
 	nstime_t time;
 	size_t ndirty_limit;
@@ -1429,7 +1471,7 @@
 	/* Purge all or nothing if the option is disabled. */
 	if (arena->decay_time <= 0) {
 		if (arena->decay_time == 0)
-			arena_purge_to_limit(arena, 0);
+			arena_purge_to_limit(tsdn, arena, 0);
 		return;
 	}
 
@@ -1450,11 +1492,11 @@
 	 */
 	if (arena->ndirty <= ndirty_limit)
 		return;
-	arena_purge_to_limit(arena, ndirty_limit);
+	arena_purge_to_limit(tsdn, arena, ndirty_limit);
 }
 
 void
-arena_maybe_purge(arena_t *arena)
+arena_maybe_purge(tsdn_t *tsdn, arena_t *arena)
 {
 
 	/* Don't recursively purge. */
@@ -1462,9 +1504,9 @@
 		return;
 
 	if (opt_purge == purge_mode_ratio)
-		arena_maybe_purge_ratio(arena);
+		arena_maybe_purge_ratio(tsdn, arena);
 	else
-		arena_maybe_purge_decay(arena);
+		arena_maybe_purge_decay(tsdn, arena);
 }
 
 static size_t
@@ -1502,7 +1544,7 @@
 }
 
 static size_t
-arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks,
+arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     size_t ndirty_limit, arena_runs_dirty_link_t *purge_runs_sentinel,
     extent_node_t *purge_chunks_sentinel)
 {
@@ -1533,7 +1575,7 @@
 			 * dalloc_node=false argument to chunk_alloc_cache().
 			 */
 			zero = false;
-			chunk = chunk_alloc_cache(arena, chunk_hooks,
+			chunk = chunk_alloc_cache(tsdn, arena, chunk_hooks,
 			    extent_node_addr_get(chunkselm),
 			    extent_node_size_get(chunkselm), chunksize, &zero,
 			    false);
@@ -1568,7 +1610,7 @@
 			 * prior to allocation.
 			 */
 			if (chunk == arena->spare)
-				arena_chunk_alloc(arena);
+				arena_chunk_alloc(tsdn, arena);
 
 			/* Temporarily allocate the free dirty run. */
 			arena_run_split_large(arena, run, run_size, false);
@@ -1592,7 +1634,7 @@
 }
 
 static size_t
-arena_purge_stashed(arena_t *arena, chunk_hooks_t *chunk_hooks,
+arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     arena_runs_dirty_link_t *purge_runs_sentinel,
     extent_node_t *purge_chunks_sentinel)
 {
@@ -1604,7 +1646,7 @@
 		nmadvise = 0;
 	npurged = 0;
 
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	for (rdelm = qr_next(purge_runs_sentinel, rd_link),
 	    chunkselm = qr_next(purge_chunks_sentinel, cc_link);
 	    rdelm != purge_runs_sentinel; rdelm = qr_next(rdelm, rd_link)) {
@@ -1643,7 +1685,7 @@
 				flag_unzeroed = 0;
 				flags = CHUNK_MAP_DECOMMITTED;
 			} else {
-				flag_unzeroed = chunk_purge_wrapper(arena,
+				flag_unzeroed = chunk_purge_wrapper(tsdn, arena,
 				    chunk_hooks, chunk, chunksize, pageind <<
 				    LG_PAGE, run_size) ? CHUNK_MAP_UNZEROED : 0;
 				flags = flag_unzeroed;
@@ -1674,7 +1716,7 @@
 		if (config_stats)
 			nmadvise++;
 	}
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 
 	if (config_stats) {
 		arena->stats.nmadvise += nmadvise;
@@ -1685,7 +1727,7 @@
 }
 
 static void
-arena_unstash_purged(arena_t *arena, chunk_hooks_t *chunk_hooks,
+arena_unstash_purged(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     arena_runs_dirty_link_t *purge_runs_sentinel,
     extent_node_t *purge_chunks_sentinel)
 {
@@ -1705,10 +1747,10 @@
 			bool zeroed = extent_node_zeroed_get(chunkselm);
 			bool committed = extent_node_committed_get(chunkselm);
 			extent_node_dirty_remove(chunkselm);
-			arena_node_dalloc(arena, chunkselm);
+			arena_node_dalloc(tsdn, arena, chunkselm);
 			chunkselm = chunkselm_next;
-			chunk_dalloc_wrapper(arena, chunk_hooks, addr, size,
-			    zeroed, committed);
+			chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, addr,
+			    size, zeroed, committed);
 		} else {
 			arena_chunk_t *chunk =
 			    (arena_chunk_t *)CHUNK_ADDR2BASE(rdelm);
@@ -1719,7 +1761,8 @@
 			    pageind) != 0);
 			arena_run_t *run = &miscelm->run;
 			qr_remove(rdelm, rd_link);
-			arena_run_dalloc(arena, run, false, true, decommitted);
+			arena_run_dalloc(tsdn, arena, run, false, true,
+			    decommitted);
 		}
 	}
 }
@@ -1734,9 +1777,9 @@
  *                       (arena->ndirty >= ndirty_limit)
  */
 static void
-arena_purge_to_limit(arena_t *arena, size_t ndirty_limit)
+arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit)
 {
-	chunk_hooks_t chunk_hooks = chunk_hooks_get(arena);
+	chunk_hooks_t chunk_hooks = chunk_hooks_get(tsdn, arena);
 	size_t npurge, npurged;
 	arena_runs_dirty_link_t purge_runs_sentinel;
 	extent_node_t purge_chunks_sentinel;
@@ -1757,14 +1800,14 @@
 	qr_new(&purge_runs_sentinel, rd_link);
 	extent_node_dirty_linkage_init(&purge_chunks_sentinel);
 
-	npurge = arena_stash_dirty(arena, &chunk_hooks, ndirty_limit,
+	npurge = arena_stash_dirty(tsdn, arena, &chunk_hooks, ndirty_limit,
 	    &purge_runs_sentinel, &purge_chunks_sentinel);
 	if (npurge == 0)
 		goto label_return;
-	npurged = arena_purge_stashed(arena, &chunk_hooks, &purge_runs_sentinel,
-	    &purge_chunks_sentinel);
+	npurged = arena_purge_stashed(tsdn, arena, &chunk_hooks,
+	    &purge_runs_sentinel, &purge_chunks_sentinel);
 	assert(npurged == npurge);
-	arena_unstash_purged(arena, &chunk_hooks, &purge_runs_sentinel,
+	arena_unstash_purged(tsdn, arena, &chunk_hooks, &purge_runs_sentinel,
 	    &purge_chunks_sentinel);
 
 	if (config_stats)
@@ -1775,15 +1818,159 @@
 }
 
 void
-arena_purge(arena_t *arena, bool all)
+arena_purge(tsdn_t *tsdn, arena_t *arena, bool all)
 {
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	if (all)
-		arena_purge_to_limit(arena, 0);
+		arena_purge_to_limit(tsdn, arena, 0);
 	else
-		arena_maybe_purge(arena);
-	malloc_mutex_unlock(&arena->lock);
+		arena_maybe_purge(tsdn, arena);
+	malloc_mutex_unlock(tsdn, &arena->lock);
+}
+
+static void
+arena_achunk_prof_reset(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk)
+{
+	size_t pageind, npages;
+
+	cassert(config_prof);
+	assert(opt_prof);
+
+	/*
+	 * Iterate over the allocated runs and remove profiled allocations from
+	 * the sample set.
+	 */
+	for (pageind = map_bias; pageind < chunk_npages; pageind += npages) {
+		if (arena_mapbits_allocated_get(chunk, pageind) != 0) {
+			if (arena_mapbits_large_get(chunk, pageind) != 0) {
+				void *ptr = (void *)((uintptr_t)chunk + (pageind
+				    << LG_PAGE));
+				size_t usize = isalloc(tsd_tsdn(tsd), ptr,
+				    config_prof);
+
+				prof_free(tsd, ptr, usize);
+				npages = arena_mapbits_large_size_get(chunk,
+				    pageind) >> LG_PAGE;
+			} else {
+				/* Skip small run. */
+				size_t binind = arena_mapbits_binind_get(chunk,
+				    pageind);
+				arena_bin_info_t *bin_info =
+				    &arena_bin_info[binind];
+				npages = bin_info->run_size >> LG_PAGE;
+			}
+		} else {
+			/* Skip unallocated run. */
+			npages = arena_mapbits_unallocated_size_get(chunk,
+			    pageind) >> LG_PAGE;
+		}
+		assert(pageind + npages <= chunk_npages);
+	}
+}
+
+void
+arena_reset(tsd_t *tsd, arena_t *arena)
+{
+	unsigned i;
+	extent_node_t *node;
+
+	/*
+	 * Locking in this function is unintuitive.  The caller guarantees that
+	 * no concurrent operations are happening in this arena, but there are
+	 * still reasons that some locking is necessary:
+	 *
+	 * - Some of the functions in the transitive closure of calls assume
+	 *   appropriate locks are held, and in some cases these locks are
+	 *   temporarily dropped to avoid lock order reversal or deadlock due to
+	 *   reentry.
+	 * - mallctl("epoch", ...) may concurrently refresh stats.  While
+	 *   strictly speaking this is a "concurrent operation", disallowing
+	 *   stats refreshes would impose an inconvenient burden.
+	 */
+
+	/* Remove large allocations from prof sample set. */
+	if (config_prof && opt_prof) {
+		ql_foreach(node, &arena->achunks, ql_link) {
+			arena_achunk_prof_reset(tsd, arena,
+			    extent_node_addr_get(node));
+		}
+	}
+
+	/* Reset curruns for large size classes. */
+	if (config_stats) {
+		for (i = 0; i < nlclasses; i++)
+			arena->stats.lstats[i].curruns = 0;
+	}
+
+	/* Huge allocations. */
+	malloc_mutex_lock(tsd_tsdn(tsd), &arena->huge_mtx);
+	for (node = ql_last(&arena->huge, ql_link); node != NULL; node =
+	    ql_last(&arena->huge, ql_link)) {
+		void *ptr = extent_node_addr_get(node);
+		size_t usize;
+
+		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->huge_mtx);
+		if (config_stats || (config_prof && opt_prof))
+			usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
+		/* Remove huge allocation from prof sample set. */
+		if (config_prof && opt_prof)
+			prof_free(tsd, ptr, usize);
+		huge_dalloc(tsd_tsdn(tsd), ptr);
+		malloc_mutex_lock(tsd_tsdn(tsd), &arena->huge_mtx);
+		/* Cancel out unwanted effects on stats. */
+		if (config_stats)
+			arena_huge_reset_stats_cancel(arena, usize);
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->huge_mtx);
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &arena->lock);
+
+	/* Bins. */
+	for (i = 0; i < NBINS; i++) {
+		arena_bin_t *bin = &arena->bins[i];
+		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
+		bin->runcur = NULL;
+		arena_run_heap_new(&bin->runs);
+		if (config_stats) {
+			bin->stats.curregs = 0;
+			bin->stats.curruns = 0;
+		}
+		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
+	}
+
+	/*
+	 * Re-initialize runs_dirty such that the chunks_cache and runs_dirty
+	 * chains directly correspond.
+	 */
+	qr_new(&arena->runs_dirty, rd_link);
+	for (node = qr_next(&arena->chunks_cache, cc_link);
+	    node != &arena->chunks_cache; node = qr_next(node, cc_link)) {
+		qr_new(&node->rd, rd_link);
+		qr_meld(&arena->runs_dirty, &node->rd, rd_link);
+	}
+
+	/* Arena chunks. */
+	for (node = ql_last(&arena->achunks, ql_link); node != NULL; node =
+	    ql_last(&arena->achunks, ql_link)) {
+		ql_remove(&arena->achunks, node, ql_link);
+		arena_chunk_discard(tsd_tsdn(tsd), arena,
+		    extent_node_addr_get(node));
+	}
+
+	/* Spare. */
+	if (arena->spare != NULL) {
+		arena_chunk_discard(tsd_tsdn(tsd), arena, arena->spare);
+		arena->spare = NULL;
+	}
+
+	assert(!arena->purging);
+	arena->nactive = 0;
+
+	for(i = 0; i < runs_avail_nclasses; i++)
+		arena_run_heap_new(&arena->runs_avail[i]);
+
+	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
 }
 
 static void
@@ -1900,8 +2087,8 @@
 }
 
 static void
-arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned,
-    bool decommitted)
+arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, arena_run_t *run, bool dirty,
+    bool cleaned, bool decommitted)
 {
 	arena_chunk_t *chunk;
 	arena_chunk_map_misc_t *miscelm;
@@ -1961,7 +2148,7 @@
 	if (size == arena_maxrun) {
 		assert(run_ind == map_bias);
 		assert(run_pages == (arena_maxrun >> LG_PAGE));
-		arena_chunk_dalloc(arena, chunk);
+		arena_chunk_dalloc(tsdn, arena, chunk);
 	}
 
 	/*
@@ -1972,12 +2159,12 @@
 	 * chances of spuriously crossing the dirty page purging threshold.
 	 */
 	if (dirty)
-		arena_maybe_purge(arena);
+		arena_maybe_purge(tsdn, arena);
 }
 
 static void
-arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
-    size_t oldsize, size_t newsize)
+arena_run_trim_head(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
+    arena_run_t *run, size_t oldsize, size_t newsize)
 {
 	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
 	size_t pageind = arena_miscelm_to_pageind(miscelm);
@@ -2012,12 +2199,13 @@
 	    flag_dirty | (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk,
 	    pageind+head_npages)));
 
-	arena_run_dalloc(arena, run, false, false, (flag_decommitted != 0));
+	arena_run_dalloc(tsdn, arena, run, false, false, (flag_decommitted !=
+	    0));
 }
 
 static void
-arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
-    size_t oldsize, size_t newsize, bool dirty)
+arena_run_trim_tail(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
+    arena_run_t *run, size_t oldsize, size_t newsize, bool dirty)
 {
 	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
 	size_t pageind = arena_miscelm_to_pageind(miscelm);
@@ -2054,20 +2242,10 @@
 	    flag_dirty | (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk,
 	    pageind+head_npages)));
 
-	tail_miscelm = arena_miscelm_get(chunk, pageind + head_npages);
+	tail_miscelm = arena_miscelm_get_mutable(chunk, pageind + head_npages);
 	tail_run = &tail_miscelm->run;
-	arena_run_dalloc(arena, tail_run, dirty, false, (flag_decommitted !=
-	    0));
-}
-
-static arena_run_t *
-arena_bin_runs_first(arena_bin_t *bin)
-{
-	arena_chunk_map_misc_t *miscelm = arena_run_tree_first(&bin->runs);
-	if (miscelm != NULL)
-		return (&miscelm->run);
-
-	return (NULL);
+	arena_run_dalloc(tsdn, arena, tail_run, dirty, false, (flag_decommitted
+	    != 0));
 }
 
 static void
@@ -2075,35 +2253,25 @@
 {
 	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
 
-	assert(arena_run_tree_search(&bin->runs, miscelm) == NULL);
-
-	arena_run_tree_insert(&bin->runs, miscelm);
-}
-
-static void
-arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run)
-{
-	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
-
-	assert(arena_run_tree_search(&bin->runs, miscelm) != NULL);
-
-	arena_run_tree_remove(&bin->runs, miscelm);
+	arena_run_heap_insert(&bin->runs, miscelm);
 }
 
 static arena_run_t *
 arena_bin_nonfull_run_tryget(arena_bin_t *bin)
 {
-	arena_run_t *run = arena_bin_runs_first(bin);
-	if (run != NULL) {
-		arena_bin_runs_remove(bin, run);
-		if (config_stats)
-			bin->stats.reruns++;
-	}
-	return (run);
+	arena_chunk_map_misc_t *miscelm;
+
+	miscelm = arena_run_heap_remove_first(&bin->runs);
+	if (miscelm == NULL)
+		return (NULL);
+	if (config_stats)
+		bin->stats.reruns++;
+
+	return (&miscelm->run);
 }
 
 static arena_run_t *
-arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
+arena_bin_nonfull_run_get(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin)
 {
 	arena_run_t *run;
 	szind_t binind;
@@ -2119,19 +2287,19 @@
 	bin_info = &arena_bin_info[binind];
 
 	/* Allocate a new run. */
-	malloc_mutex_unlock(&bin->lock);
+	malloc_mutex_unlock(tsdn, &bin->lock);
 	/******************************/
-	malloc_mutex_lock(&arena->lock);
-	run = arena_run_alloc_small(arena, bin_info->run_size, binind);
+	malloc_mutex_lock(tsdn, &arena->lock);
+	run = arena_run_alloc_small(tsdn, arena, bin_info->run_size, binind);
 	if (run != NULL) {
 		/* Initialize run internals. */
 		run->binind = binind;
 		run->nfree = bin_info->nregs;
 		bitmap_init(run->bitmap, &bin_info->bitmap_info);
 	}
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	/********************************/
-	malloc_mutex_lock(&bin->lock);
+	malloc_mutex_lock(tsdn, &bin->lock);
 	if (run != NULL) {
 		if (config_stats) {
 			bin->stats.nruns++;
@@ -2154,7 +2322,7 @@
 
 /* Re-fill bin->runcur, then call arena_run_reg_alloc(). */
 static void *
-arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
+arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin)
 {
 	szind_t binind;
 	arena_bin_info_t *bin_info;
@@ -2163,7 +2331,7 @@
 	binind = arena_bin_index(arena, bin);
 	bin_info = &arena_bin_info[binind];
 	bin->runcur = NULL;
-	run = arena_bin_nonfull_run_get(arena, bin);
+	run = arena_bin_nonfull_run_get(tsdn, arena, bin);
 	if (bin->runcur != NULL && bin->runcur->nfree > 0) {
 		/*
 		 * Another thread updated runcur while this one ran without the
@@ -2184,9 +2352,10 @@
 			 * were just deallocated from the run.
 			 */
 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
-			if (run->nfree == bin_info->nregs)
-				arena_dalloc_bin_run(arena, chunk, run, bin);
-			else
+			if (run->nfree == bin_info->nregs) {
+				arena_dalloc_bin_run(tsdn, arena, chunk, run,
+				    bin);
+			} else
 				arena_bin_lower_run(arena, chunk, run, bin);
 		}
 		return (ret);
@@ -2203,7 +2372,7 @@
 }
 
 void
-arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin,
+arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_bin_t *tbin,
     szind_t binind, uint64_t prof_accumbytes)
 {
 	unsigned i, nfill;
@@ -2211,10 +2380,10 @@
 
 	assert(tbin->ncached == 0);
 
-	if (config_prof && arena_prof_accum(arena, prof_accumbytes))
-		prof_idump();
+	if (config_prof && arena_prof_accum(tsdn, arena, prof_accumbytes))
+		prof_idump(tsdn);
 	bin = &arena->bins[binind];
-	malloc_mutex_lock(&bin->lock);
+	malloc_mutex_lock(tsdn, &bin->lock);
 	for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >>
 	    tbin->lg_fill_div); i < nfill; i++) {
 		arena_run_t *run;
@@ -2222,7 +2391,7 @@
 		if ((run = bin->runcur) != NULL && run->nfree > 0)
 			ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]);
 		else
-			ptr = arena_bin_malloc_hard(arena, bin);
+			ptr = arena_bin_malloc_hard(tsdn, arena, bin);
 		if (ptr == NULL) {
 			/*
 			 * OOM.  tbin->avail isn't yet filled down to its first
@@ -2249,30 +2418,31 @@
 		bin->stats.nfills++;
 		tbin->tstats.nrequests = 0;
 	}
-	malloc_mutex_unlock(&bin->lock);
+	malloc_mutex_unlock(tsdn, &bin->lock);
 	tbin->ncached = i;
-	arena_decay_tick(tsd, arena);
+	arena_decay_tick(tsdn, arena);
 }
 
 void
 arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero)
 {
 
+	size_t redzone_size = bin_info->redzone_size;
+
 	if (zero) {
-		size_t redzone_size = bin_info->redzone_size;
-		memset((void *)((uintptr_t)ptr - redzone_size), 0xa5,
-		    redzone_size);
-		memset((void *)((uintptr_t)ptr + bin_info->reg_size), 0xa5,
-		    redzone_size);
+		memset((void *)((uintptr_t)ptr - redzone_size),
+		    JEMALLOC_ALLOC_JUNK, redzone_size);
+		memset((void *)((uintptr_t)ptr + bin_info->reg_size),
+		    JEMALLOC_ALLOC_JUNK, redzone_size);
 	} else {
-		memset((void *)((uintptr_t)ptr - bin_info->redzone_size), 0xa5,
-		    bin_info->reg_interval);
+		memset((void *)((uintptr_t)ptr - redzone_size),
+		    JEMALLOC_ALLOC_JUNK, bin_info->reg_interval);
 	}
 }
 
 #ifdef JEMALLOC_JET
 #undef arena_redzone_corruption
-#define	arena_redzone_corruption JEMALLOC_N(arena_redzone_corruption_impl)
+#define	arena_redzone_corruption JEMALLOC_N(n_arena_redzone_corruption)
 #endif
 static void
 arena_redzone_corruption(void *ptr, size_t usize, bool after,
@@ -2287,7 +2457,7 @@
 #undef arena_redzone_corruption
 #define	arena_redzone_corruption JEMALLOC_N(arena_redzone_corruption)
 arena_redzone_corruption_t *arena_redzone_corruption =
-    JEMALLOC_N(arena_redzone_corruption_impl);
+    JEMALLOC_N(n_arena_redzone_corruption);
 #endif
 
 static void
@@ -2302,22 +2472,22 @@
 
 		for (i = 1; i <= redzone_size; i++) {
 			uint8_t *byte = (uint8_t *)((uintptr_t)ptr - i);
-			if (*byte != 0xa5) {
+			if (*byte != JEMALLOC_ALLOC_JUNK) {
 				error = true;
 				arena_redzone_corruption(ptr, size, false, i,
 				    *byte);
 				if (reset)
-					*byte = 0xa5;
+					*byte = JEMALLOC_ALLOC_JUNK;
 			}
 		}
 		for (i = 0; i < redzone_size; i++) {
 			uint8_t *byte = (uint8_t *)((uintptr_t)ptr + size + i);
-			if (*byte != 0xa5) {
+			if (*byte != JEMALLOC_ALLOC_JUNK) {
 				error = true;
 				arena_redzone_corruption(ptr, size, true, i,
 				    *byte);
 				if (reset)
-					*byte = 0xa5;
+					*byte = JEMALLOC_ALLOC_JUNK;
 			}
 		}
 	}
@@ -2328,7 +2498,7 @@
 
 #ifdef JEMALLOC_JET
 #undef arena_dalloc_junk_small
-#define	arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small_impl)
+#define	arena_dalloc_junk_small JEMALLOC_N(n_arena_dalloc_junk_small)
 #endif
 void
 arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info)
@@ -2336,14 +2506,14 @@
 	size_t redzone_size = bin_info->redzone_size;
 
 	arena_redzones_validate(ptr, bin_info, false);
-	memset((void *)((uintptr_t)ptr - redzone_size), 0x5a,
+	memset((void *)((uintptr_t)ptr - redzone_size), JEMALLOC_FREE_JUNK,
 	    bin_info->reg_interval);
 }
 #ifdef JEMALLOC_JET
 #undef arena_dalloc_junk_small
 #define	arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small)
 arena_dalloc_junk_small_t *arena_dalloc_junk_small =
-    JEMALLOC_N(arena_dalloc_junk_small_impl);
+    JEMALLOC_N(n_arena_dalloc_junk_small);
 #endif
 
 void
@@ -2362,7 +2532,7 @@
 }
 
 static void *
-arena_malloc_small(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
+arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero)
 {
 	void *ret;
 	arena_bin_t *bin;
@@ -2373,14 +2543,14 @@
 	bin = &arena->bins[binind];
 	usize = index2size(binind);
 
-	malloc_mutex_lock(&bin->lock);
+	malloc_mutex_lock(tsdn, &bin->lock);
 	if ((run = bin->runcur) != NULL && run->nfree > 0)
 		ret = arena_run_reg_alloc(run, &arena_bin_info[binind]);
 	else
-		ret = arena_bin_malloc_hard(arena, bin);
+		ret = arena_bin_malloc_hard(tsdn, arena, bin);
 
 	if (ret == NULL) {
-		malloc_mutex_unlock(&bin->lock);
+		malloc_mutex_unlock(tsdn, &bin->lock);
 		return (NULL);
 	}
 
@@ -2389,9 +2559,9 @@
 		bin->stats.nrequests++;
 		bin->stats.curregs++;
 	}
-	malloc_mutex_unlock(&bin->lock);
-	if (config_prof && !isthreaded && arena_prof_accum(arena, usize))
-		prof_idump();
+	malloc_mutex_unlock(tsdn, &bin->lock);
+	if (config_prof && !isthreaded && arena_prof_accum(tsdn, arena, usize))
+		prof_idump(tsdn);
 
 	if (!zero) {
 		if (config_fill) {
@@ -2411,12 +2581,12 @@
 		memset(ret, 0, usize);
 	}
 
-	arena_decay_tick(tsd, arena);
+	arena_decay_tick(tsdn, arena);
 	return (ret);
 }
 
 void *
-arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
+arena_malloc_large(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero)
 {
 	void *ret;
 	size_t usize;
@@ -2427,7 +2597,7 @@
 
 	/* Large allocation. */
 	usize = index2size(binind);
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	if (config_cache_oblivious) {
 		uint64_t r;
 
@@ -2440,9 +2610,9 @@
 		random_offset = ((uintptr_t)r) << LG_CACHELINE;
 	} else
 		random_offset = 0;
-	run = arena_run_alloc_large(arena, usize + large_pad, zero);
+	run = arena_run_alloc_large(tsdn, arena, usize + large_pad, zero);
 	if (run == NULL) {
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 		return (NULL);
 	}
 	miscelm = arena_run_to_miscelm(run);
@@ -2460,42 +2630,45 @@
 	}
 	if (config_prof)
 		idump = arena_prof_accum_locked(arena, usize);
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	if (config_prof && idump)
-		prof_idump();
+		prof_idump(tsdn);
 
 	if (!zero) {
 		if (config_fill) {
 			if (unlikely(opt_junk_alloc))
-				memset(ret, 0xa5, usize);
+				memset(ret, JEMALLOC_ALLOC_JUNK, usize);
 			else if (unlikely(opt_zero))
 				memset(ret, 0, usize);
 		}
 	}
 
-	arena_decay_tick(tsd, arena);
+	arena_decay_tick(tsdn, arena);
 	return (ret);
 }
 
 void *
-arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
-    bool zero, tcache_t *tcache)
+arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
+    bool zero)
 {
 
-	arena = arena_choose(tsd, arena);
+	assert(!tsdn_null(tsdn) || arena != NULL);
+
+	if (likely(!tsdn_null(tsdn)))
+		arena = arena_choose(tsdn_tsd(tsdn), arena);
 	if (unlikely(arena == NULL))
 		return (NULL);
 
 	if (likely(size <= SMALL_MAXCLASS))
-		return (arena_malloc_small(tsd, arena, ind, zero));
+		return (arena_malloc_small(tsdn, arena, ind, zero));
 	if (likely(size <= large_maxclass))
-		return (arena_malloc_large(tsd, arena, ind, zero));
-	return (huge_malloc(tsd, arena, index2size(ind), zero, tcache));
+		return (arena_malloc_large(tsdn, arena, ind, zero));
+	return (huge_malloc(tsdn, arena, index2size(ind), zero));
 }
 
 /* Only handles large allocations that require more than page alignment. */
 static void *
-arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
+arena_palloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
     bool zero)
 {
 	void *ret;
@@ -2505,19 +2678,21 @@
 	arena_chunk_map_misc_t *miscelm;
 	void *rpages;
 
+	assert(!tsdn_null(tsdn) || arena != NULL);
 	assert(usize == PAGE_CEILING(usize));
 
-	arena = arena_choose(tsd, arena);
+	if (likely(!tsdn_null(tsdn)))
+		arena = arena_choose(tsdn_tsd(tsdn), arena);
 	if (unlikely(arena == NULL))
 		return (NULL);
 
 	alignment = PAGE_CEILING(alignment);
-	alloc_size = usize + large_pad + alignment - PAGE;
+	alloc_size = usize + large_pad + alignment;
 
-	malloc_mutex_lock(&arena->lock);
-	run = arena_run_alloc_large(arena, alloc_size, false);
+	malloc_mutex_lock(tsdn, &arena->lock);
+	run = arena_run_alloc_large(tsdn, arena, alloc_size, false);
 	if (run == NULL) {
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 		return (NULL);
 	}
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
@@ -2532,16 +2707,16 @@
 		arena_chunk_map_misc_t *head_miscelm = miscelm;
 		arena_run_t *head_run = run;
 
-		miscelm = arena_miscelm_get(chunk,
+		miscelm = arena_miscelm_get_mutable(chunk,
 		    arena_miscelm_to_pageind(head_miscelm) + (leadsize >>
 		    LG_PAGE));
 		run = &miscelm->run;
 
-		arena_run_trim_head(arena, chunk, head_run, alloc_size,
+		arena_run_trim_head(tsdn, arena, chunk, head_run, alloc_size,
 		    alloc_size - leadsize);
 	}
 	if (trailsize != 0) {
-		arena_run_trim_tail(arena, chunk, run, usize + large_pad +
+		arena_run_trim_tail(tsdn, arena, chunk, run, usize + large_pad +
 		    trailsize, usize + large_pad, false);
 	}
 	if (arena_run_init_large(arena, run, usize + large_pad, zero)) {
@@ -2552,8 +2727,8 @@
 		    run_ind) != 0);
 
 		assert(decommitted); /* Cause of OOM. */
-		arena_run_dalloc(arena, run, dirty, false, decommitted);
-		malloc_mutex_unlock(&arena->lock);
+		arena_run_dalloc(tsdn, arena, run, dirty, false, decommitted);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 		return (NULL);
 	}
 	ret = arena_miscelm_to_rpages(miscelm);
@@ -2568,20 +2743,20 @@
 		arena->stats.lstats[index].nrequests++;
 		arena->stats.lstats[index].curruns++;
 	}
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	if (config_fill && !zero) {
 		if (unlikely(opt_junk_alloc))
-			memset(ret, 0xa5, usize);
+			memset(ret, JEMALLOC_ALLOC_JUNK, usize);
 		else if (unlikely(opt_zero))
 			memset(ret, 0, usize);
 	}
-	arena_decay_tick(tsd, arena);
+	arena_decay_tick(tsdn, arena);
 	return (ret);
 }
 
 void *
-arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
+arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
     bool zero, tcache_t *tcache)
 {
 	void *ret;
@@ -2589,7 +2764,7 @@
 	if (usize <= SMALL_MAXCLASS && (alignment < PAGE || (alignment == PAGE
 	    && (usize & PAGE_MASK) == 0))) {
 		/* Small; alignment doesn't require special run placement. */
-		ret = arena_malloc(tsd, arena, usize, size2index(usize), zero,
+		ret = arena_malloc(tsdn, arena, usize, size2index(usize), zero,
 		    tcache, true);
 	} else if (usize <= large_maxclass && alignment <= PAGE) {
 		/*
@@ -2598,26 +2773,25 @@
 		 * the base of the run, so do some bit manipulation to retrieve
 		 * the base.
 		 */
-		ret = arena_malloc(tsd, arena, usize, size2index(usize), zero,
+		ret = arena_malloc(tsdn, arena, usize, size2index(usize), zero,
 		    tcache, true);
 		if (config_cache_oblivious)
 			ret = (void *)((uintptr_t)ret & ~PAGE_MASK);
 	} else {
 		if (likely(usize <= large_maxclass)) {
-			ret = arena_palloc_large(tsd, arena, usize, alignment,
+			ret = arena_palloc_large(tsdn, arena, usize, alignment,
 			    zero);
 		} else if (likely(alignment <= chunksize))
-			ret = huge_malloc(tsd, arena, usize, zero, tcache);
+			ret = huge_malloc(tsdn, arena, usize, zero);
 		else {
-			ret = huge_palloc(tsd, arena, usize, alignment, zero,
-			    tcache);
+			ret = huge_palloc(tsdn, arena, usize, alignment, zero);
 		}
 	}
 	return (ret);
 }
 
 void
-arena_prof_promoted(const void *ptr, size_t size)
+arena_prof_promoted(tsdn_t *tsdn, const void *ptr, size_t size)
 {
 	arena_chunk_t *chunk;
 	size_t pageind;
@@ -2626,8 +2800,8 @@
 	cassert(config_prof);
 	assert(ptr != NULL);
 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
-	assert(isalloc(ptr, false) == LARGE_MINCLASS);
-	assert(isalloc(ptr, true) == LARGE_MINCLASS);
+	assert(isalloc(tsdn, ptr, false) == LARGE_MINCLASS);
+	assert(isalloc(tsdn, ptr, true) == LARGE_MINCLASS);
 	assert(size <= SMALL_MAXCLASS);
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
@@ -2636,8 +2810,8 @@
 	assert(binind < NBINS);
 	arena_mapbits_large_binind_set(chunk, pageind, binind);
 
-	assert(isalloc(ptr, false) == LARGE_MINCLASS);
-	assert(isalloc(ptr, true) == size);
+	assert(isalloc(tsdn, ptr, false) == LARGE_MINCLASS);
+	assert(isalloc(tsdn, ptr, true) == size);
 }
 
 static void
@@ -2653,33 +2827,34 @@
 		    &chunk->node), bin);
 		arena_bin_info_t *bin_info = &arena_bin_info[binind];
 
+		/*
+		 * The following block's conditional is necessary because if the
+		 * run only contains one region, then it never gets inserted
+		 * into the non-full runs tree.
+		 */
 		if (bin_info->nregs != 1) {
-			/*
-			 * This block's conditional is necessary because if the
-			 * run only contains one region, then it never gets
-			 * inserted into the non-full runs tree.
-			 */
-			arena_bin_runs_remove(bin, run);
+			arena_chunk_map_misc_t *miscelm =
+			    arena_run_to_miscelm(run);
+
+			arena_run_heap_remove(&bin->runs, miscelm);
 		}
 	}
 }
 
 static void
-arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
-    arena_bin_t *bin)
+arena_dalloc_bin_run(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
+    arena_run_t *run, arena_bin_t *bin)
 {
 
 	assert(run != bin->runcur);
-	assert(arena_run_tree_search(&bin->runs, arena_run_to_miscelm(run)) ==
-	    NULL);
 
-	malloc_mutex_unlock(&bin->lock);
+	malloc_mutex_unlock(tsdn, &bin->lock);
 	/******************************/
-	malloc_mutex_lock(&arena->lock);
-	arena_run_dalloc(arena, run, true, false, false);
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
+	arena_run_dalloc(tsdn, arena, run, true, false, false);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	/****************************/
-	malloc_mutex_lock(&bin->lock);
+	malloc_mutex_lock(tsdn, &bin->lock);
 	if (config_stats)
 		bin->stats.curruns--;
 }
@@ -2706,8 +2881,8 @@
 }
 
 static void
-arena_dalloc_bin_locked_impl(arena_t *arena, arena_chunk_t *chunk, void *ptr,
-    arena_chunk_map_bits_t *bitselm, bool junked)
+arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
+    void *ptr, arena_chunk_map_bits_t *bitselm, bool junked)
 {
 	size_t pageind, rpages_ind;
 	arena_run_t *run;
@@ -2717,7 +2892,7 @@
 
 	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind);
-	run = &arena_miscelm_get(chunk, rpages_ind)->run;
+	run = &arena_miscelm_get_mutable(chunk, rpages_ind)->run;
 	binind = run->binind;
 	bin = &arena->bins[binind];
 	bin_info = &arena_bin_info[binind];
@@ -2728,7 +2903,7 @@
 	arena_run_reg_dalloc(run, ptr);
 	if (run->nfree == bin_info->nregs) {
 		arena_dissociate_bin_run(chunk, run, bin);
-		arena_dalloc_bin_run(arena, chunk, run, bin);
+		arena_dalloc_bin_run(tsdn, arena, chunk, run, bin);
 	} else if (run->nfree == 1 && run != bin->runcur)
 		arena_bin_lower_run(arena, chunk, run, bin);
 
@@ -2739,15 +2914,15 @@
 }
 
 void
-arena_dalloc_bin_junked_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr,
-    arena_chunk_map_bits_t *bitselm)
+arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena,
+    arena_chunk_t *chunk, void *ptr, arena_chunk_map_bits_t *bitselm)
 {
 
-	arena_dalloc_bin_locked_impl(arena, chunk, ptr, bitselm, true);
+	arena_dalloc_bin_locked_impl(tsdn, arena, chunk, ptr, bitselm, true);
 }
 
 void
-arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
+arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, void *ptr,
     size_t pageind, arena_chunk_map_bits_t *bitselm)
 {
 	arena_run_t *run;
@@ -2755,16 +2930,16 @@
 	size_t rpages_ind;
 
 	rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind);
-	run = &arena_miscelm_get(chunk, rpages_ind)->run;
+	run = &arena_miscelm_get_mutable(chunk, rpages_ind)->run;
 	bin = &arena->bins[run->binind];
-	malloc_mutex_lock(&bin->lock);
-	arena_dalloc_bin_locked_impl(arena, chunk, ptr, bitselm, false);
-	malloc_mutex_unlock(&bin->lock);
+	malloc_mutex_lock(tsdn, &bin->lock);
+	arena_dalloc_bin_locked_impl(tsdn, arena, chunk, ptr, bitselm, false);
+	malloc_mutex_unlock(tsdn, &bin->lock);
 }
 
 void
-arena_dalloc_small(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, void *ptr,
-    size_t pageind)
+arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
+    void *ptr, size_t pageind)
 {
 	arena_chunk_map_bits_t *bitselm;
 
@@ -2773,35 +2948,36 @@
 		assert(arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk,
 		    pageind)) != BININD_INVALID);
 	}
-	bitselm = arena_bitselm_get(chunk, pageind);
-	arena_dalloc_bin(arena, chunk, ptr, pageind, bitselm);
-	arena_decay_tick(tsd, arena);
+	bitselm = arena_bitselm_get_mutable(chunk, pageind);
+	arena_dalloc_bin(tsdn, arena, chunk, ptr, pageind, bitselm);
+	arena_decay_tick(tsdn, arena);
 }
 
 #ifdef JEMALLOC_JET
 #undef arena_dalloc_junk_large
-#define	arena_dalloc_junk_large JEMALLOC_N(arena_dalloc_junk_large_impl)
+#define	arena_dalloc_junk_large JEMALLOC_N(n_arena_dalloc_junk_large)
 #endif
 void
 arena_dalloc_junk_large(void *ptr, size_t usize)
 {
 
 	if (config_fill && unlikely(opt_junk_free))
-		memset(ptr, 0x5a, usize);
+		memset(ptr, JEMALLOC_FREE_JUNK, usize);
 }
 #ifdef JEMALLOC_JET
 #undef arena_dalloc_junk_large
 #define	arena_dalloc_junk_large JEMALLOC_N(arena_dalloc_junk_large)
 arena_dalloc_junk_large_t *arena_dalloc_junk_large =
-    JEMALLOC_N(arena_dalloc_junk_large_impl);
+    JEMALLOC_N(n_arena_dalloc_junk_large);
 #endif
 
 static void
-arena_dalloc_large_locked_impl(arena_t *arena, arena_chunk_t *chunk,
-    void *ptr, bool junked)
+arena_dalloc_large_locked_impl(tsdn_t *tsdn, arena_t *arena,
+    arena_chunk_t *chunk, void *ptr, bool junked)
 {
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
-	arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind);
+	arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk,
+	    pageind);
 	arena_run_t *run = &miscelm->run;
 
 	if (config_fill || config_stats) {
@@ -2820,33 +2996,35 @@
 		}
 	}
 
-	arena_run_dalloc(arena, run, true, false, false);
+	arena_run_dalloc(tsdn, arena, run, true, false, false);
 }
 
 void
-arena_dalloc_large_junked_locked(arena_t *arena, arena_chunk_t *chunk,
+arena_dalloc_large_junked_locked(tsdn_t *tsdn, arena_t *arena,
+    arena_chunk_t *chunk, void *ptr)
+{
+
+	arena_dalloc_large_locked_impl(tsdn, arena, chunk, ptr, true);
+}
+
+void
+arena_dalloc_large(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
     void *ptr)
 {
 
-	arena_dalloc_large_locked_impl(arena, chunk, ptr, true);
-}
-
-void
-arena_dalloc_large(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, void *ptr)
-{
-
-	malloc_mutex_lock(&arena->lock);
-	arena_dalloc_large_locked_impl(arena, chunk, ptr, false);
-	malloc_mutex_unlock(&arena->lock);
-	arena_decay_tick(tsd, arena);
+	malloc_mutex_lock(tsdn, &arena->lock);
+	arena_dalloc_large_locked_impl(tsdn, arena, chunk, ptr, false);
+	malloc_mutex_unlock(tsdn, &arena->lock);
+	arena_decay_tick(tsdn, arena);
 }
 
 static void
-arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
-    size_t oldsize, size_t size)
+arena_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
+    void *ptr, size_t oldsize, size_t size)
 {
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
-	arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind);
+	arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk,
+	    pageind);
 	arena_run_t *run = &miscelm->run;
 
 	assert(size < oldsize);
@@ -2855,8 +3033,8 @@
 	 * Shrink the run, and make trailing pages available for other
 	 * allocations.
 	 */
-	malloc_mutex_lock(&arena->lock);
-	arena_run_trim_tail(arena, chunk, run, oldsize + large_pad, size +
+	malloc_mutex_lock(tsdn, &arena->lock);
+	arena_run_trim_tail(tsdn, arena, chunk, run, oldsize + large_pad, size +
 	    large_pad, true);
 	if (config_stats) {
 		szind_t oldindex = size2index(oldsize) - NBINS;
@@ -2874,12 +3052,12 @@
 		arena->stats.lstats[index].nrequests++;
 		arena->stats.lstats[index].curruns++;
 	}
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
 static bool
-arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
-    size_t oldsize, size_t usize_min, size_t usize_max, bool zero)
+arena_ralloc_large_grow(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
+    void *ptr, size_t oldsize, size_t usize_min, size_t usize_max, bool zero)
 {
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	size_t npages = (oldsize + large_pad) >> LG_PAGE;
@@ -2889,7 +3067,7 @@
 	    large_pad);
 
 	/* Try to extend the run. */
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	if (pageind+npages >= chunk_npages || arena_mapbits_allocated_get(chunk,
 	    pageind+npages) != 0)
 		goto label_fail;
@@ -2912,7 +3090,7 @@
 		if (splitsize == 0)
 			goto label_fail;
 
-		run = &arena_miscelm_get(chunk, pageind+npages)->run;
+		run = &arena_miscelm_get_mutable(chunk, pageind+npages)->run;
 		if (arena_run_split_large(arena, run, splitsize, zero))
 			goto label_fail;
 
@@ -2969,24 +3147,24 @@
 			arena->stats.lstats[index].nrequests++;
 			arena->stats.lstats[index].curruns++;
 		}
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 		return (false);
 	}
 label_fail:
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	return (true);
 }
 
 #ifdef JEMALLOC_JET
 #undef arena_ralloc_junk_large
-#define	arena_ralloc_junk_large JEMALLOC_N(arena_ralloc_junk_large_impl)
+#define	arena_ralloc_junk_large JEMALLOC_N(n_arena_ralloc_junk_large)
 #endif
 static void
 arena_ralloc_junk_large(void *ptr, size_t old_usize, size_t usize)
 {
 
 	if (config_fill && unlikely(opt_junk_free)) {
-		memset((void *)((uintptr_t)ptr + usize), 0x5a,
+		memset((void *)((uintptr_t)ptr + usize), JEMALLOC_FREE_JUNK,
 		    old_usize - usize);
 	}
 }
@@ -2994,7 +3172,7 @@
 #undef arena_ralloc_junk_large
 #define	arena_ralloc_junk_large JEMALLOC_N(arena_ralloc_junk_large)
 arena_ralloc_junk_large_t *arena_ralloc_junk_large =
-    JEMALLOC_N(arena_ralloc_junk_large_impl);
+    JEMALLOC_N(n_arena_ralloc_junk_large);
 #endif
 
 /*
@@ -3002,7 +3180,7 @@
  * always fail if growing an object, and the following run is already in use.
  */
 static bool
-arena_ralloc_large(void *ptr, size_t oldsize, size_t usize_min,
+arena_ralloc_large(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t usize_min,
     size_t usize_max, bool zero)
 {
 	arena_chunk_t *chunk;
@@ -3017,15 +3195,16 @@
 	arena = extent_node_arena_get(&chunk->node);
 
 	if (oldsize < usize_max) {
-		bool ret = arena_ralloc_large_grow(arena, chunk, ptr, oldsize,
-		    usize_min, usize_max, zero);
+		bool ret = arena_ralloc_large_grow(tsdn, arena, chunk, ptr,
+		    oldsize, usize_min, usize_max, zero);
 		if (config_fill && !ret && !zero) {
 			if (unlikely(opt_junk_alloc)) {
-				memset((void *)((uintptr_t)ptr + oldsize), 0xa5,
-				    isalloc(ptr, config_prof) - oldsize);
+				memset((void *)((uintptr_t)ptr + oldsize),
+				    JEMALLOC_ALLOC_JUNK,
+				    isalloc(tsdn, ptr, config_prof) - oldsize);
 			} else if (unlikely(opt_zero)) {
 				memset((void *)((uintptr_t)ptr + oldsize), 0,
-				    isalloc(ptr, config_prof) - oldsize);
+				    isalloc(tsdn, ptr, config_prof) - oldsize);
 			}
 		}
 		return (ret);
@@ -3034,12 +3213,12 @@
 	assert(oldsize > usize_max);
 	/* Fill before shrinking in order avoid a race. */
 	arena_ralloc_junk_large(ptr, oldsize, usize_max);
-	arena_ralloc_large_shrink(arena, chunk, ptr, oldsize, usize_max);
+	arena_ralloc_large_shrink(tsdn, arena, chunk, ptr, oldsize, usize_max);
 	return (false);
 }
 
 bool
-arena_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
+arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, bool zero)
 {
 	size_t usize_min, usize_max;
@@ -3069,32 +3248,32 @@
 		} else {
 			if (usize_max <= SMALL_MAXCLASS)
 				return (true);
-			if (arena_ralloc_large(ptr, oldsize, usize_min,
+			if (arena_ralloc_large(tsdn, ptr, oldsize, usize_min,
 			    usize_max, zero))
 				return (true);
 		}
 
 		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-		arena_decay_tick(tsd, extent_node_arena_get(&chunk->node));
+		arena_decay_tick(tsdn, extent_node_arena_get(&chunk->node));
 		return (false);
 	} else {
-		return (huge_ralloc_no_move(tsd, ptr, oldsize, usize_min,
+		return (huge_ralloc_no_move(tsdn, ptr, oldsize, usize_min,
 		    usize_max, zero));
 	}
 }
 
 static void *
-arena_ralloc_move_helper(tsd_t *tsd, arena_t *arena, size_t usize,
+arena_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache)
 {
 
 	if (alignment == 0)
-		return (arena_malloc(tsd, arena, usize, size2index(usize), zero,
-		    tcache, true));
+		return (arena_malloc(tsdn, arena, usize, size2index(usize),
+		    zero, tcache, true));
 	usize = sa2u(usize, alignment);
 	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 		return (NULL);
-	return (ipalloct(tsd, usize, alignment, zero, tcache, arena));
+	return (ipalloct(tsdn, usize, alignment, zero, tcache, arena));
 }
 
 void *
@@ -3112,7 +3291,8 @@
 		size_t copysize;
 
 		/* Try to avoid moving the allocation. */
-		if (!arena_ralloc_no_move(tsd, ptr, oldsize, usize, 0, zero))
+		if (!arena_ralloc_no_move(tsd_tsdn(tsd), ptr, oldsize, usize, 0,
+		    zero))
 			return (ptr);
 
 		/*
@@ -3120,8 +3300,8 @@
 		 * the object.  In that case, fall back to allocating new space
 		 * and copying.
 		 */
-		ret = arena_ralloc_move_helper(tsd, arena, usize, alignment,
-		    zero, tcache);
+		ret = arena_ralloc_move_helper(tsd_tsdn(tsd), arena, usize,
+		    alignment, zero, tcache);
 		if (ret == NULL)
 			return (NULL);
 
@@ -3133,7 +3313,7 @@
 		copysize = (usize < oldsize) ? usize : oldsize;
 		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize);
 		memcpy(ret, ptr, copysize);
-		isqalloc(tsd, ptr, oldsize, tcache);
+		isqalloc(tsd, ptr, oldsize, tcache, true);
 	} else {
 		ret = huge_ralloc(tsd, arena, ptr, oldsize, usize, alignment,
 		    zero, tcache);
@@ -3142,25 +3322,25 @@
 }
 
 dss_prec_t
-arena_dss_prec_get(arena_t *arena)
+arena_dss_prec_get(tsdn_t *tsdn, arena_t *arena)
 {
 	dss_prec_t ret;
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	ret = arena->dss_prec;
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	return (ret);
 }
 
 bool
-arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec)
+arena_dss_prec_set(tsdn_t *tsdn, arena_t *arena, dss_prec_t dss_prec)
 {
 
 	if (!have_dss)
 		return (dss_prec != dss_prec_disabled);
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	arena->dss_prec = dss_prec;
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	return (false);
 }
 
@@ -3208,7 +3388,7 @@
     size_t *nactive, size_t *ndirty)
 {
 
-	*nthreads += arena_nthreads_get(arena);
+	*nthreads += arena_nthreads_get(arena, false);
 	*dss = dss_prec_names[arena->dss_prec];
 	*lg_dirty_mult = arena->lg_dirty_mult;
 	*decay_time = arena->decay_time;
@@ -3217,32 +3397,34 @@
 }
 
 void
-arena_basic_stats_merge(arena_t *arena, unsigned *nthreads, const char **dss,
-    ssize_t *lg_dirty_mult, ssize_t *decay_time, size_t *nactive,
-    size_t *ndirty)
+arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
+    const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
+    size_t *nactive, size_t *ndirty)
 {
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	arena_basic_stats_merge_locked(arena, nthreads, dss, lg_dirty_mult,
 	    decay_time, nactive, ndirty);
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
 void
-arena_stats_merge(arena_t *arena, unsigned *nthreads, const char **dss,
-    ssize_t *lg_dirty_mult, ssize_t *decay_time, size_t *nactive,
-    size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats,
-    malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats)
+arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
+    const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
+    size_t *nactive, size_t *ndirty, arena_stats_t *astats,
+    malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats,
+    malloc_huge_stats_t *hstats)
 {
 	unsigned i;
 
 	cassert(config_stats);
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	arena_basic_stats_merge_locked(arena, nthreads, dss, lg_dirty_mult,
 	    decay_time, nactive, ndirty);
 
 	astats->mapped += arena->stats.mapped;
+	astats->retained += arena->stats.retained;
 	astats->npurge += arena->stats.npurge;
 	astats->nmadvise += arena->stats.nmadvise;
 	astats->purged += arena->stats.purged;
@@ -3268,12 +3450,12 @@
 		hstats[i].ndalloc += arena->stats.hstats[i].ndalloc;
 		hstats[i].curhchunks += arena->stats.hstats[i].curhchunks;
 	}
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	for (i = 0; i < NBINS; i++) {
 		arena_bin_t *bin = &arena->bins[i];
 
-		malloc_mutex_lock(&bin->lock);
+		malloc_mutex_lock(tsdn, &bin->lock);
 		bstats[i].nmalloc += bin->stats.nmalloc;
 		bstats[i].ndalloc += bin->stats.ndalloc;
 		bstats[i].nrequests += bin->stats.nrequests;
@@ -3285,58 +3467,58 @@
 		bstats[i].nruns += bin->stats.nruns;
 		bstats[i].reruns += bin->stats.reruns;
 		bstats[i].curruns += bin->stats.curruns;
-		malloc_mutex_unlock(&bin->lock);
+		malloc_mutex_unlock(tsdn, &bin->lock);
 	}
 }
 
 unsigned
-arena_nthreads_get(arena_t *arena)
+arena_nthreads_get(arena_t *arena, bool internal)
 {
 
-	return (atomic_read_u(&arena->nthreads));
+	return (atomic_read_u(&arena->nthreads[internal]));
 }
 
 void
-arena_nthreads_inc(arena_t *arena)
+arena_nthreads_inc(arena_t *arena, bool internal)
 {
 
-	atomic_add_u(&arena->nthreads, 1);
+	atomic_add_u(&arena->nthreads[internal], 1);
 }
 
 void
-arena_nthreads_dec(arena_t *arena)
+arena_nthreads_dec(arena_t *arena, bool internal)
 {
 
-	atomic_sub_u(&arena->nthreads, 1);
+	atomic_sub_u(&arena->nthreads[internal], 1);
 }
 
 arena_t *
-arena_new(unsigned ind)
+arena_new(tsdn_t *tsdn, unsigned ind)
 {
 	arena_t *arena;
 	size_t arena_size;
 	unsigned i;
-	arena_bin_t *bin;
 
 	/* Compute arena size to incorporate sufficient runs_avail elements. */
-	arena_size = offsetof(arena_t, runs_avail) + (sizeof(arena_run_tree_t) *
+	arena_size = offsetof(arena_t, runs_avail) + (sizeof(arena_run_heap_t) *
 	    runs_avail_nclasses);
 	/*
 	 * Allocate arena, arena->lstats, and arena->hstats contiguously, mainly
 	 * because there is no way to clean up if base_alloc() OOMs.
 	 */
 	if (config_stats) {
-		arena = (arena_t *)base_alloc(CACHELINE_CEILING(arena_size) +
-		    QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t) +
-		    nhclasses) * sizeof(malloc_huge_stats_t));
+		arena = (arena_t *)base_alloc(tsdn,
+		    CACHELINE_CEILING(arena_size) + QUANTUM_CEILING(nlclasses *
+		    sizeof(malloc_large_stats_t) + nhclasses) *
+		    sizeof(malloc_huge_stats_t));
 	} else
-		arena = (arena_t *)base_alloc(arena_size);
+		arena = (arena_t *)base_alloc(tsdn, arena_size);
 	if (arena == NULL)
 		return (NULL);
 
 	arena->ind = ind;
-	arena->nthreads = 0;
-	if (malloc_mutex_init(&arena->lock))
+	arena->nthreads[0] = arena->nthreads[1] = 0;
+	if (malloc_mutex_init(&arena->lock, "arena", WITNESS_RANK_ARENA))
 		return (NULL);
 
 	if (config_stats) {
@@ -3369,7 +3551,9 @@
 		    (uint64_t)(uintptr_t)arena;
 	}
 
-	arena->dss_prec = chunk_dss_prec_get();
+	arena->dss_prec = chunk_dss_prec_get(tsdn);
+
+	ql_new(&arena->achunks);
 
 	arena->spare = NULL;
 
@@ -3379,7 +3563,7 @@
 	arena->ndirty = 0;
 
 	for(i = 0; i < runs_avail_nclasses; i++)
-		arena_run_tree_new(&arena->runs_avail[i]);
+		arena_run_heap_new(&arena->runs_avail[i]);
 	qr_new(&arena->runs_dirty, rd_link);
 	qr_new(&arena->chunks_cache, cc_link);
 
@@ -3387,28 +3571,32 @@
 		arena_decay_init(arena, arena_decay_time_default_get());
 
 	ql_new(&arena->huge);
-	if (malloc_mutex_init(&arena->huge_mtx))
+	if (malloc_mutex_init(&arena->huge_mtx, "arena_huge",
+	    WITNESS_RANK_ARENA_HUGE))
 		return (NULL);
 
 	extent_tree_szad_new(&arena->chunks_szad_cached);
 	extent_tree_ad_new(&arena->chunks_ad_cached);
 	extent_tree_szad_new(&arena->chunks_szad_retained);
 	extent_tree_ad_new(&arena->chunks_ad_retained);
-	if (malloc_mutex_init(&arena->chunks_mtx))
+	if (malloc_mutex_init(&arena->chunks_mtx, "arena_chunks",
+	    WITNESS_RANK_ARENA_CHUNKS))
 		return (NULL);
 	ql_new(&arena->node_cache);
-	if (malloc_mutex_init(&arena->node_cache_mtx))
+	if (malloc_mutex_init(&arena->node_cache_mtx, "arena_node_cache",
+	    WITNESS_RANK_ARENA_NODE_CACHE))
 		return (NULL);
 
 	arena->chunk_hooks = chunk_hooks_default;
 
 	/* Initialize bins. */
 	for (i = 0; i < NBINS; i++) {
-		bin = &arena->bins[i];
-		if (malloc_mutex_init(&bin->lock))
+		arena_bin_t *bin = &arena->bins[i];
+		if (malloc_mutex_init(&bin->lock, "arena_bin",
+		    WITNESS_RANK_ARENA_BIN))
 			return (NULL);
 		bin->runcur = NULL;
-		arena_run_tree_new(&bin->runs);
+		arena_run_heap_new(&bin->runs);
 		if (config_stats)
 			memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
 	}
@@ -3537,7 +3725,7 @@
 
 	assert(small_maxrun != 0);
 
-	small_run_tab = (bool *)base_alloc(sizeof(bool) * (small_maxrun >>
+	small_run_tab = (bool *)base_alloc(NULL, sizeof(bool) * (small_maxrun >>
 	    LG_PAGE));
 	if (small_run_tab == NULL)
 		return (true);
@@ -3564,12 +3752,12 @@
 
 	run_quantize_max = chunksize + large_pad;
 
-	run_quantize_floor_tab = (size_t *)base_alloc(sizeof(size_t) *
+	run_quantize_floor_tab = (size_t *)base_alloc(NULL, sizeof(size_t) *
 	    (run_quantize_max >> LG_PAGE));
 	if (run_quantize_floor_tab == NULL)
 		return (true);
 
-	run_quantize_ceil_tab = (size_t *)base_alloc(sizeof(size_t) *
+	run_quantize_ceil_tab = (size_t *)base_alloc(NULL, sizeof(size_t) *
 	    (run_quantize_max >> LG_PAGE));
 	if (run_quantize_ceil_tab == NULL)
 		return (true);
@@ -3646,58 +3834,58 @@
 }
 
 void
-arena_prefork0(arena_t *arena)
+arena_prefork0(tsdn_t *tsdn, arena_t *arena)
 {
 
-	malloc_mutex_prefork(&arena->lock);
+	malloc_mutex_prefork(tsdn, &arena->lock);
 }
 
 void
-arena_prefork1(arena_t *arena)
+arena_prefork1(tsdn_t *tsdn, arena_t *arena)
 {
 
-	malloc_mutex_prefork(&arena->chunks_mtx);
+	malloc_mutex_prefork(tsdn, &arena->chunks_mtx);
 }
 
 void
-arena_prefork2(arena_t *arena)
+arena_prefork2(tsdn_t *tsdn, arena_t *arena)
 {
 
-	malloc_mutex_prefork(&arena->node_cache_mtx);
+	malloc_mutex_prefork(tsdn, &arena->node_cache_mtx);
 }
 
 void
-arena_prefork3(arena_t *arena)
+arena_prefork3(tsdn_t *tsdn, arena_t *arena)
 {
 	unsigned i;
 
 	for (i = 0; i < NBINS; i++)
-		malloc_mutex_prefork(&arena->bins[i].lock);
-	malloc_mutex_prefork(&arena->huge_mtx);
+		malloc_mutex_prefork(tsdn, &arena->bins[i].lock);
+	malloc_mutex_prefork(tsdn, &arena->huge_mtx);
 }
 
 void
-arena_postfork_parent(arena_t *arena)
+arena_postfork_parent(tsdn_t *tsdn, arena_t *arena)
 {
 	unsigned i;
 
-	malloc_mutex_postfork_parent(&arena->huge_mtx);
+	malloc_mutex_postfork_parent(tsdn, &arena->huge_mtx);
 	for (i = 0; i < NBINS; i++)
-		malloc_mutex_postfork_parent(&arena->bins[i].lock);
-	malloc_mutex_postfork_parent(&arena->node_cache_mtx);
-	malloc_mutex_postfork_parent(&arena->chunks_mtx);
-	malloc_mutex_postfork_parent(&arena->lock);
+		malloc_mutex_postfork_parent(tsdn, &arena->bins[i].lock);
+	malloc_mutex_postfork_parent(tsdn, &arena->node_cache_mtx);
+	malloc_mutex_postfork_parent(tsdn, &arena->chunks_mtx);
+	malloc_mutex_postfork_parent(tsdn, &arena->lock);
 }
 
 void
-arena_postfork_child(arena_t *arena)
+arena_postfork_child(tsdn_t *tsdn, arena_t *arena)
 {
 	unsigned i;
 
-	malloc_mutex_postfork_child(&arena->huge_mtx);
+	malloc_mutex_postfork_child(tsdn, &arena->huge_mtx);
 	for (i = 0; i < NBINS; i++)
-		malloc_mutex_postfork_child(&arena->bins[i].lock);
-	malloc_mutex_postfork_child(&arena->node_cache_mtx);
-	malloc_mutex_postfork_child(&arena->chunks_mtx);
-	malloc_mutex_postfork_child(&arena->lock);
+		malloc_mutex_postfork_child(tsdn, &arena->bins[i].lock);
+	malloc_mutex_postfork_child(tsdn, &arena->node_cache_mtx);
+	malloc_mutex_postfork_child(tsdn, &arena->chunks_mtx);
+	malloc_mutex_postfork_child(tsdn, &arena->lock);
 }
diff --git a/src/base.c b/src/base.c
index 7cdcfed..81b0801 100644
--- a/src/base.c
+++ b/src/base.c
@@ -13,12 +13,13 @@
 
 /******************************************************************************/
 
-/* base_mtx must be held. */
 static extent_node_t *
-base_node_try_alloc(void)
+base_node_try_alloc(tsdn_t *tsdn)
 {
 	extent_node_t *node;
 
+	malloc_mutex_assert_owner(tsdn, &base_mtx);
+
 	if (base_nodes == NULL)
 		return (NULL);
 	node = base_nodes;
@@ -27,33 +28,34 @@
 	return (node);
 }
 
-/* base_mtx must be held. */
 static void
-base_node_dalloc(extent_node_t *node)
+base_node_dalloc(tsdn_t *tsdn, extent_node_t *node)
 {
 
+	malloc_mutex_assert_owner(tsdn, &base_mtx);
+
 	JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t));
 	*(extent_node_t **)node = base_nodes;
 	base_nodes = node;
 }
 
-/* base_mtx must be held. */
 static extent_node_t *
-base_chunk_alloc(size_t minsize)
+base_chunk_alloc(tsdn_t *tsdn, size_t minsize)
 {
 	extent_node_t *node;
 	size_t csize, nsize;
 	void *addr;
 
+	malloc_mutex_assert_owner(tsdn, &base_mtx);
 	assert(minsize != 0);
-	node = base_node_try_alloc();
+	node = base_node_try_alloc(tsdn);
 	/* Allocate enough space to also carve a node out if necessary. */
 	nsize = (node == NULL) ? CACHELINE_CEILING(sizeof(extent_node_t)) : 0;
 	csize = CHUNK_CEILING(minsize + nsize);
 	addr = chunk_alloc_base(csize);
 	if (addr == NULL) {
 		if (node != NULL)
-			base_node_dalloc(node);
+			base_node_dalloc(tsdn, node);
 		return (NULL);
 	}
 	base_mapped += csize;
@@ -76,7 +78,7 @@
  * physical memory usage.
  */
 void *
-base_alloc(size_t size)
+base_alloc(tsdn_t *tsdn, size_t size)
 {
 	void *ret;
 	size_t csize, usize;
@@ -91,14 +93,14 @@
 
 	usize = s2u(csize);
 	extent_node_init(&key, NULL, NULL, usize, false, false);
-	malloc_mutex_lock(&base_mtx);
+	malloc_mutex_lock(tsdn, &base_mtx);
 	node = extent_tree_szad_nsearch(&base_avail_szad, &key);
 	if (node != NULL) {
 		/* Use existing space. */
 		extent_tree_szad_remove(&base_avail_szad, node);
 	} else {
 		/* Try to allocate more space. */
-		node = base_chunk_alloc(csize);
+		node = base_chunk_alloc(tsdn, csize);
 	}
 	if (node == NULL) {
 		ret = NULL;
@@ -111,7 +113,7 @@
 		extent_node_size_set(node, extent_node_size_get(node) - csize);
 		extent_tree_szad_insert(&base_avail_szad, node);
 	} else
-		base_node_dalloc(node);
+		base_node_dalloc(tsdn, node);
 	if (config_stats) {
 		base_allocated += csize;
 		/*
@@ -123,28 +125,29 @@
 	}
 	JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ret, csize);
 label_return:
-	malloc_mutex_unlock(&base_mtx);
+	malloc_mutex_unlock(tsdn, &base_mtx);
 	return (ret);
 }
 
 void
-base_stats_get(size_t *allocated, size_t *resident, size_t *mapped)
+base_stats_get(tsdn_t *tsdn, size_t *allocated, size_t *resident,
+    size_t *mapped)
 {
 
-	malloc_mutex_lock(&base_mtx);
+	malloc_mutex_lock(tsdn, &base_mtx);
 	assert(base_allocated <= base_resident);
 	assert(base_resident <= base_mapped);
 	*allocated = base_allocated;
 	*resident = base_resident;
 	*mapped = base_mapped;
-	malloc_mutex_unlock(&base_mtx);
+	malloc_mutex_unlock(tsdn, &base_mtx);
 }
 
 bool
 base_boot(void)
 {
 
-	if (malloc_mutex_init(&base_mtx))
+	if (malloc_mutex_init(&base_mtx, "base", WITNESS_RANK_BASE))
 		return (true);
 	extent_tree_szad_new(&base_avail_szad);
 	base_nodes = NULL;
@@ -153,22 +156,22 @@
 }
 
 void
-base_prefork(void)
+base_prefork(tsdn_t *tsdn)
 {
 
-	malloc_mutex_prefork(&base_mtx);
+	malloc_mutex_prefork(tsdn, &base_mtx);
 }
 
 void
-base_postfork_parent(void)
+base_postfork_parent(tsdn_t *tsdn)
 {
 
-	malloc_mutex_postfork_parent(&base_mtx);
+	malloc_mutex_postfork_parent(tsdn, &base_mtx);
 }
 
 void
-base_postfork_child(void)
+base_postfork_child(tsdn_t *tsdn)
 {
 
-	malloc_mutex_postfork_child(&base_mtx);
+	malloc_mutex_postfork_child(tsdn, &base_mtx);
 }
diff --git a/src/bitmap.c b/src/bitmap.c
index b1e6627..ac0f3b3 100644
--- a/src/bitmap.c
+++ b/src/bitmap.c
@@ -74,15 +74,11 @@
 void
 bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
 {
-	size_t i;
 
 	assert(nbits > 0);
 	assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS));
 
-	i = nbits >> LG_BITMAP_GROUP_NBITS;
-	if (nbits % BITMAP_GROUP_NBITS != 0)
-		i++;
-	binfo->ngroups = i;
+	binfo->ngroups = BITMAP_BITS2GROUPS(nbits);
 	binfo->nbits = nbits;
 }
 
@@ -99,9 +95,10 @@
 	size_t extra;
 
 	memset(bitmap, 0xffU, bitmap_size(binfo));
-	extra = (binfo->nbits % (binfo->ngroups * BITMAP_GROUP_NBITS));
+	extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK))
+	    & BITMAP_GROUP_NBITS_MASK;
 	if (extra != 0)
-		bitmap[binfo->ngroups - 1] >>= (BITMAP_GROUP_NBITS - extra);
+		bitmap[binfo->ngroups - 1] >>= extra;
 }
 
 #endif /* USE_TREE */
diff --git a/src/chunk.c b/src/chunk.c
index 304d4e5..adc666f 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -49,9 +49,10 @@
  * definition.
  */
 
-static void	chunk_record(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache,
-    void *chunk, size_t size, bool zeroed, bool committed);
+static void	chunk_record(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, extent_tree_t *chunks_szad,
+    extent_tree_t *chunks_ad, bool cache, void *chunk, size_t size, bool zeroed,
+    bool committed);
 
 /******************************************************************************/
 
@@ -63,23 +64,23 @@
 }
 
 chunk_hooks_t
-chunk_hooks_get(arena_t *arena)
+chunk_hooks_get(tsdn_t *tsdn, arena_t *arena)
 {
 	chunk_hooks_t chunk_hooks;
 
-	malloc_mutex_lock(&arena->chunks_mtx);
+	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
 	chunk_hooks = chunk_hooks_get_locked(arena);
-	malloc_mutex_unlock(&arena->chunks_mtx);
+	malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 
 	return (chunk_hooks);
 }
 
 chunk_hooks_t
-chunk_hooks_set(arena_t *arena, const chunk_hooks_t *chunk_hooks)
+chunk_hooks_set(tsdn_t *tsdn, arena_t *arena, const chunk_hooks_t *chunk_hooks)
 {
 	chunk_hooks_t old_chunk_hooks;
 
-	malloc_mutex_lock(&arena->chunks_mtx);
+	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
 	old_chunk_hooks = arena->chunk_hooks;
 	/*
 	 * Copy each field atomically so that it is impossible for readers to
@@ -104,14 +105,14 @@
 	ATOMIC_COPY_HOOK(split);
 	ATOMIC_COPY_HOOK(merge);
 #undef ATOMIC_COPY_HOOK
-	malloc_mutex_unlock(&arena->chunks_mtx);
+	malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 
 	return (old_chunk_hooks);
 }
 
 static void
-chunk_hooks_assure_initialized_impl(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    bool locked)
+chunk_hooks_assure_initialized_impl(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, bool locked)
 {
 	static const chunk_hooks_t uninitialized_hooks =
 	    CHUNK_HOOKS_INITIALIZER;
@@ -119,27 +120,28 @@
 	if (memcmp(chunk_hooks, &uninitialized_hooks, sizeof(chunk_hooks_t)) ==
 	    0) {
 		*chunk_hooks = locked ? chunk_hooks_get_locked(arena) :
-		    chunk_hooks_get(arena);
+		    chunk_hooks_get(tsdn, arena);
 	}
 }
 
 static void
-chunk_hooks_assure_initialized_locked(arena_t *arena,
+chunk_hooks_assure_initialized_locked(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks)
 {
 
-	chunk_hooks_assure_initialized_impl(arena, chunk_hooks, true);
+	chunk_hooks_assure_initialized_impl(tsdn, arena, chunk_hooks, true);
 }
 
 static void
-chunk_hooks_assure_initialized(arena_t *arena, chunk_hooks_t *chunk_hooks)
+chunk_hooks_assure_initialized(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks)
 {
 
-	chunk_hooks_assure_initialized_impl(arena, chunk_hooks, false);
+	chunk_hooks_assure_initialized_impl(tsdn, arena, chunk_hooks, false);
 }
 
 bool
-chunk_register(const void *chunk, const extent_node_t *node)
+chunk_register(tsdn_t *tsdn, const void *chunk, const extent_node_t *node)
 {
 
 	assert(extent_node_addr_get(node) == chunk);
@@ -159,7 +161,7 @@
 			high = atomic_read_z(&highchunks);
 		}
 		if (cur > high && prof_gdump_get_unlocked())
-			prof_gdump();
+			prof_gdump(tsdn);
 	}
 
 	return (false);
@@ -197,7 +199,7 @@
 }
 
 static void *
-chunk_recycle(arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache,
     void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit,
     bool dalloc_node)
@@ -219,8 +221,8 @@
 	/* Beware size_t wrap-around. */
 	if (alloc_size < size)
 		return (NULL);
-	malloc_mutex_lock(&arena->chunks_mtx);
-	chunk_hooks_assure_initialized_locked(arena, chunk_hooks);
+	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
+	chunk_hooks_assure_initialized_locked(tsdn, arena, chunk_hooks);
 	if (new_addr != NULL) {
 		extent_node_t key;
 		extent_node_init(&key, arena, new_addr, alloc_size, false,
@@ -232,7 +234,7 @@
 	}
 	if (node == NULL || (new_addr != NULL && extent_node_size_get(node) <
 	    size)) {
-		malloc_mutex_unlock(&arena->chunks_mtx);
+		malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 		return (NULL);
 	}
 	leadsize = ALIGNMENT_CEILING((uintptr_t)extent_node_addr_get(node),
@@ -251,7 +253,7 @@
 	if (leadsize != 0 &&
 	    chunk_hooks->split(extent_node_addr_get(node),
 	    extent_node_size_get(node), leadsize, size, false, arena->ind)) {
-		malloc_mutex_unlock(&arena->chunks_mtx);
+		malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 		return (NULL);
 	}
 	/* Remove node from the tree. */
@@ -271,20 +273,21 @@
 		if (chunk_hooks->split(ret, size + trailsize, size,
 		    trailsize, false, arena->ind)) {
 			if (dalloc_node && node != NULL)
-				arena_node_dalloc(arena, node);
-			malloc_mutex_unlock(&arena->chunks_mtx);
-			chunk_record(arena, chunk_hooks, chunks_szad, chunks_ad,
-			    cache, ret, size + trailsize, zeroed, committed);
+				arena_node_dalloc(tsdn, arena, node);
+			malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
+			chunk_record(tsdn, arena, chunk_hooks, chunks_szad,
+			    chunks_ad, cache, ret, size + trailsize, zeroed,
+			    committed);
 			return (NULL);
 		}
 		/* Insert the trailing space as a smaller chunk. */
 		if (node == NULL) {
-			node = arena_node_alloc(arena);
+			node = arena_node_alloc(tsdn, arena);
 			if (node == NULL) {
-				malloc_mutex_unlock(&arena->chunks_mtx);
-				chunk_record(arena, chunk_hooks, chunks_szad,
-				    chunks_ad, cache, ret, size + trailsize,
-				    zeroed, committed);
+				malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
+				chunk_record(tsdn, arena, chunk_hooks,
+				    chunks_szad, chunks_ad, cache, ret, size +
+				    trailsize, zeroed, committed);
 				return (NULL);
 			}
 		}
@@ -296,16 +299,16 @@
 		node = NULL;
 	}
 	if (!committed && chunk_hooks->commit(ret, size, 0, size, arena->ind)) {
-		malloc_mutex_unlock(&arena->chunks_mtx);
-		chunk_record(arena, chunk_hooks, chunks_szad, chunks_ad, cache,
-		    ret, size, zeroed, committed);
+		malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
+		chunk_record(tsdn, arena, chunk_hooks, chunks_szad, chunks_ad,
+		    cache, ret, size, zeroed, committed);
 		return (NULL);
 	}
-	malloc_mutex_unlock(&arena->chunks_mtx);
+	malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 
 	assert(dalloc_node || node != NULL);
 	if (dalloc_node && node != NULL)
-		arena_node_dalloc(arena, node);
+		arena_node_dalloc(tsdn, arena, node);
 	if (*zero) {
 		if (!zeroed)
 			memset(ret, 0, size);
@@ -328,8 +331,8 @@
  * them if they are returned.
  */
 static void *
-chunk_alloc_core(arena_t *arena, void *new_addr, size_t size, size_t alignment,
-    bool *zero, bool *commit, dss_prec_t dss_prec)
+chunk_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
+    size_t alignment, bool *zero, bool *commit, dss_prec_t dss_prec)
 {
 	void *ret;
 
@@ -340,8 +343,8 @@
 
 	/* "primary" dss. */
 	if (have_dss && dss_prec == dss_prec_primary && (ret =
-	    chunk_alloc_dss(arena, new_addr, size, alignment, zero, commit)) !=
-	    NULL)
+	    chunk_alloc_dss(tsdn, arena, new_addr, size, alignment, zero,
+	    commit)) != NULL)
 		return (ret);
 	/* mmap. */
 	if ((ret = chunk_alloc_mmap(new_addr, size, alignment, zero, commit)) !=
@@ -349,8 +352,8 @@
 		return (ret);
 	/* "secondary" dss. */
 	if (have_dss && dss_prec == dss_prec_secondary && (ret =
-	    chunk_alloc_dss(arena, new_addr, size, alignment, zero, commit)) !=
-	    NULL)
+	    chunk_alloc_dss(tsdn, arena, new_addr, size, alignment, zero,
+	    commit)) != NULL)
 		return (ret);
 
 	/* All strategies for allocation failed. */
@@ -380,8 +383,8 @@
 }
 
 void *
-chunk_alloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr,
-    size_t size, size_t alignment, bool *zero, bool dalloc_node)
+chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
+    void *new_addr, size_t size, size_t alignment, bool *zero, bool dalloc_node)
 {
 	void *ret;
 	bool commit;
@@ -392,9 +395,9 @@
 	assert((alignment & chunksize_mask) == 0);
 
 	commit = true;
-	ret = chunk_recycle(arena, chunk_hooks, &arena->chunks_szad_cached,
-	    &arena->chunks_ad_cached, true, new_addr, size, alignment, zero,
-	    &commit, dalloc_node);
+	ret = chunk_recycle(tsdn, arena, chunk_hooks,
+	    &arena->chunks_szad_cached, &arena->chunks_ad_cached, true,
+	    new_addr, size, alignment, zero, &commit, dalloc_node);
 	if (ret == NULL)
 		return (NULL);
 	assert(commit);
@@ -404,11 +407,11 @@
 }
 
 static arena_t *
-chunk_arena_get(unsigned arena_ind)
+chunk_arena_get(tsdn_t *tsdn, unsigned arena_ind)
 {
 	arena_t *arena;
 
-	arena = arena_get(arena_ind, false);
+	arena = arena_get(tsdn, arena_ind, false);
 	/*
 	 * The arena we're allocating on behalf of must have been initialized
 	 * already.
@@ -422,11 +425,13 @@
     bool *commit, unsigned arena_ind)
 {
 	void *ret;
+	tsdn_t *tsdn;
 	arena_t *arena;
 
-	arena = chunk_arena_get(arena_ind);
-	ret = chunk_alloc_core(arena, new_addr, size, alignment, zero, commit,
-	    arena->dss_prec);
+	tsdn = tsdn_fetch();
+	arena = chunk_arena_get(tsdn, arena_ind);
+	ret = chunk_alloc_core(tsdn, arena, new_addr, size, alignment, zero,
+	    commit, arena->dss_prec);
 	if (ret == NULL)
 		return (NULL);
 	if (config_valgrind)
@@ -436,29 +441,35 @@
 }
 
 static void *
-chunk_alloc_retained(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr,
-    size_t size, size_t alignment, bool *zero, bool *commit)
+chunk_alloc_retained(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
+    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit)
 {
+	void *ret;
 
 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
 	assert(alignment != 0);
 	assert((alignment & chunksize_mask) == 0);
 
-	return (chunk_recycle(arena, chunk_hooks, &arena->chunks_szad_retained,
-	    &arena->chunks_ad_retained, false, new_addr, size, alignment, zero,
-	    commit, true));
+	ret = chunk_recycle(tsdn, arena, chunk_hooks,
+	    &arena->chunks_szad_retained, &arena->chunks_ad_retained, false,
+	    new_addr, size, alignment, zero, commit, true);
+
+	if (config_stats && ret != NULL)
+		arena->stats.retained -= size;
+
+	return (ret);
 }
 
 void *
-chunk_alloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr,
-    size_t size, size_t alignment, bool *zero, bool *commit)
+chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
+    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit)
 {
 	void *ret;
 
-	chunk_hooks_assure_initialized(arena, chunk_hooks);
+	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
 
-	ret = chunk_alloc_retained(arena, chunk_hooks, new_addr, size,
+	ret = chunk_alloc_retained(tsdn, arena, chunk_hooks, new_addr, size,
 	    alignment, zero, commit);
 	if (ret == NULL) {
 		ret = chunk_hooks->alloc(new_addr, size, alignment, zero,
@@ -473,7 +484,7 @@
 }
 
 static void
-chunk_record(arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache,
     void *chunk, size_t size, bool zeroed, bool committed)
 {
@@ -485,8 +496,8 @@
 	unzeroed = cache || !zeroed;
 	JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(chunk, size);
 
-	malloc_mutex_lock(&arena->chunks_mtx);
-	chunk_hooks_assure_initialized_locked(arena, chunk_hooks);
+	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
+	chunk_hooks_assure_initialized_locked(tsdn, arena, chunk_hooks);
 	extent_node_init(&key, arena, (void *)((uintptr_t)chunk + size), 0,
 	    false, false);
 	node = extent_tree_ad_nsearch(chunks_ad, &key);
@@ -511,7 +522,7 @@
 		arena_chunk_cache_maybe_insert(arena, node, cache);
 	} else {
 		/* Coalescing forward failed, so insert a new node. */
-		node = arena_node_alloc(arena);
+		node = arena_node_alloc(tsdn, arena);
 		if (node == NULL) {
 			/*
 			 * Node allocation failed, which is an exceedingly
@@ -520,8 +531,8 @@
 			 * a virtual memory leak.
 			 */
 			if (cache) {
-				chunk_purge_wrapper(arena, chunk_hooks, chunk,
-				    size, 0, size);
+				chunk_purge_wrapper(tsdn, arena, chunk_hooks,
+				    chunk, size, 0, size);
 			}
 			goto label_return;
 		}
@@ -557,16 +568,16 @@
 		extent_tree_szad_insert(chunks_szad, node);
 		arena_chunk_cache_maybe_insert(arena, node, cache);
 
-		arena_node_dalloc(arena, prev);
+		arena_node_dalloc(tsdn, arena, prev);
 	}
 
 label_return:
-	malloc_mutex_unlock(&arena->chunks_mtx);
+	malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 }
 
 void
-chunk_dalloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
-    size_t size, bool committed)
+chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
+    void *chunk, size_t size, bool committed)
 {
 
 	assert(chunk != NULL);
@@ -574,9 +585,9 @@
 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
 
-	chunk_record(arena, chunk_hooks, &arena->chunks_szad_cached,
+	chunk_record(tsdn, arena, chunk_hooks, &arena->chunks_szad_cached,
 	    &arena->chunks_ad_cached, true, chunk, size, false, committed);
-	arena_maybe_purge(arena);
+	arena_maybe_purge(tsdn, arena);
 }
 
 static bool
@@ -584,14 +595,14 @@
     unsigned arena_ind)
 {
 
-	if (!have_dss || !chunk_in_dss(chunk))
+	if (!have_dss || !chunk_in_dss(tsdn_fetch(), chunk))
 		return (chunk_dalloc_mmap(chunk, size));
 	return (true);
 }
 
 void
-chunk_dalloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
-    size_t size, bool zeroed, bool committed)
+chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
+    void *chunk, size_t size, bool zeroed, bool committed)
 {
 
 	assert(chunk != NULL);
@@ -599,7 +610,7 @@
 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
 
-	chunk_hooks_assure_initialized(arena, chunk_hooks);
+	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
 	/* Try to deallocate. */
 	if (!chunk_hooks->dalloc(chunk, size, committed, arena->ind))
 		return;
@@ -610,8 +621,11 @@
 	}
 	zeroed = !committed || !chunk_hooks->purge(chunk, size, 0, size,
 	    arena->ind);
-	chunk_record(arena, chunk_hooks, &arena->chunks_szad_retained,
+	chunk_record(tsdn, arena, chunk_hooks, &arena->chunks_szad_retained,
 	    &arena->chunks_ad_retained, false, chunk, size, zeroed, committed);
+
+	if (config_stats)
+		arena->stats.retained += size;
 }
 
 static bool
@@ -648,11 +662,11 @@
 }
 
 bool
-chunk_purge_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
-    size_t size, size_t offset, size_t length)
+chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
+    void *chunk, size_t size, size_t offset, size_t length)
 {
 
-	chunk_hooks_assure_initialized(arena, chunk_hooks);
+	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
 	return (chunk_hooks->purge(chunk, size, offset, length, arena->ind));
 }
 
@@ -673,8 +687,11 @@
 
 	if (!maps_coalesce)
 		return (true);
-	if (have_dss && chunk_in_dss(chunk_a) != chunk_in_dss(chunk_b))
-		return (true);
+	if (have_dss) {
+		tsdn_t *tsdn = tsdn_fetch();
+		if (chunk_in_dss(tsdn, chunk_a) != chunk_in_dss(tsdn, chunk_b))
+			return (true);
+	}
 
 	return (false);
 }
@@ -683,7 +700,7 @@
 chunks_rtree_node_alloc(size_t nelms)
 {
 
-	return ((rtree_node_elm_t *)base_alloc(nelms *
+	return ((rtree_node_elm_t *)base_alloc(tsdn_fetch(), nelms *
 	    sizeof(rtree_node_elm_t)));
 }
 
@@ -730,22 +747,22 @@
 }
 
 void
-chunk_prefork(void)
+chunk_prefork(tsdn_t *tsdn)
 {
 
-	chunk_dss_prefork();
+	chunk_dss_prefork(tsdn);
 }
 
 void
-chunk_postfork_parent(void)
+chunk_postfork_parent(tsdn_t *tsdn)
 {
 
-	chunk_dss_postfork_parent();
+	chunk_dss_postfork_parent(tsdn);
 }
 
 void
-chunk_postfork_child(void)
+chunk_postfork_child(tsdn_t *tsdn)
 {
 
-	chunk_dss_postfork_child();
+	chunk_dss_postfork_child(tsdn);
 }
diff --git a/src/chunk_dss.c b/src/chunk_dss.c
index 943d0e9..0b1f82b 100644
--- a/src/chunk_dss.c
+++ b/src/chunk_dss.c
@@ -41,33 +41,33 @@
 }
 
 dss_prec_t
-chunk_dss_prec_get(void)
+chunk_dss_prec_get(tsdn_t *tsdn)
 {
 	dss_prec_t ret;
 
 	if (!have_dss)
 		return (dss_prec_disabled);
-	malloc_mutex_lock(&dss_mtx);
+	malloc_mutex_lock(tsdn, &dss_mtx);
 	ret = dss_prec_default;
-	malloc_mutex_unlock(&dss_mtx);
+	malloc_mutex_unlock(tsdn, &dss_mtx);
 	return (ret);
 }
 
 bool
-chunk_dss_prec_set(dss_prec_t dss_prec)
+chunk_dss_prec_set(tsdn_t *tsdn, dss_prec_t dss_prec)
 {
 
 	if (!have_dss)
 		return (dss_prec != dss_prec_disabled);
-	malloc_mutex_lock(&dss_mtx);
+	malloc_mutex_lock(tsdn, &dss_mtx);
 	dss_prec_default = dss_prec;
-	malloc_mutex_unlock(&dss_mtx);
+	malloc_mutex_unlock(tsdn, &dss_mtx);
 	return (false);
 }
 
 void *
-chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, size_t alignment,
-    bool *zero, bool *commit)
+chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
+    size_t alignment, bool *zero, bool *commit)
 {
 	cassert(have_dss);
 	assert(size > 0 && (size & chunksize_mask) == 0);
@@ -80,7 +80,7 @@
 	if ((intptr_t)size < 0)
 		return (NULL);
 
-	malloc_mutex_lock(&dss_mtx);
+	malloc_mutex_lock(tsdn, &dss_mtx);
 	if (dss_prev != (void *)-1) {
 
 		/*
@@ -122,7 +122,7 @@
 			if ((uintptr_t)ret < (uintptr_t)dss_max ||
 			    (uintptr_t)dss_next < (uintptr_t)dss_max) {
 				/* Wrap-around. */
-				malloc_mutex_unlock(&dss_mtx);
+				malloc_mutex_unlock(tsdn, &dss_mtx);
 				return (NULL);
 			}
 			incr = gap_size + cpad_size + size;
@@ -130,11 +130,11 @@
 			if (dss_prev == dss_max) {
 				/* Success. */
 				dss_max = dss_next;
-				malloc_mutex_unlock(&dss_mtx);
+				malloc_mutex_unlock(tsdn, &dss_mtx);
 				if (cpad_size != 0) {
 					chunk_hooks_t chunk_hooks =
 					    CHUNK_HOOKS_INITIALIZER;
-					chunk_dalloc_wrapper(arena,
+					chunk_dalloc_wrapper(tsdn, arena,
 					    &chunk_hooks, cpad, cpad_size,
 					    false, true);
 				}
@@ -149,25 +149,25 @@
 			}
 		} while (dss_prev != (void *)-1);
 	}
-	malloc_mutex_unlock(&dss_mtx);
+	malloc_mutex_unlock(tsdn, &dss_mtx);
 
 	return (NULL);
 }
 
 bool
-chunk_in_dss(void *chunk)
+chunk_in_dss(tsdn_t *tsdn, void *chunk)
 {
 	bool ret;
 
 	cassert(have_dss);
 
-	malloc_mutex_lock(&dss_mtx);
+	malloc_mutex_lock(tsdn, &dss_mtx);
 	if ((uintptr_t)chunk >= (uintptr_t)dss_base
 	    && (uintptr_t)chunk < (uintptr_t)dss_max)
 		ret = true;
 	else
 		ret = false;
-	malloc_mutex_unlock(&dss_mtx);
+	malloc_mutex_unlock(tsdn, &dss_mtx);
 
 	return (ret);
 }
@@ -178,7 +178,7 @@
 
 	cassert(have_dss);
 
-	if (malloc_mutex_init(&dss_mtx))
+	if (malloc_mutex_init(&dss_mtx, "dss", WITNESS_RANK_DSS))
 		return (true);
 	dss_base = chunk_dss_sbrk(0);
 	dss_prev = dss_base;
@@ -188,27 +188,27 @@
 }
 
 void
-chunk_dss_prefork(void)
+chunk_dss_prefork(tsdn_t *tsdn)
 {
 
 	if (have_dss)
-		malloc_mutex_prefork(&dss_mtx);
+		malloc_mutex_prefork(tsdn, &dss_mtx);
 }
 
 void
-chunk_dss_postfork_parent(void)
+chunk_dss_postfork_parent(tsdn_t *tsdn)
 {
 
 	if (have_dss)
-		malloc_mutex_postfork_parent(&dss_mtx);
+		malloc_mutex_postfork_parent(tsdn, &dss_mtx);
 }
 
 void
-chunk_dss_postfork_child(void)
+chunk_dss_postfork_child(tsdn_t *tsdn)
 {
 
 	if (have_dss)
-		malloc_mutex_postfork_child(&dss_mtx);
+		malloc_mutex_postfork_child(tsdn, &dss_mtx);
 }
 
 /******************************************************************************/
diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c
index 56b2ee4..f95ae75 100644
--- a/src/chunk_mmap.c
+++ b/src/chunk_mmap.c
@@ -9,25 +9,23 @@
 	void *ret;
 	size_t alloc_size;
 
-	alloc_size = size + alignment - PAGE;
+	alloc_size = size + alignment;
 	/* Beware size_t wrap-around. */
 	if (alloc_size < size)
 		return (NULL);
 	do {
 		void *pages;
 		size_t leadsize;
-		pages = pages_map(NULL, alloc_size);
+		pages = pages_map(NULL, alloc_size, commit);
 		if (pages == NULL)
 			return (NULL);
 		leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) -
 		    (uintptr_t)pages;
-		ret = pages_trim(pages, alloc_size, leadsize, size);
+		ret = pages_trim(pages, alloc_size, leadsize, size, commit);
 	} while (ret == NULL);
 
 	assert(ret != NULL);
 	*zero = true;
-	if (!*commit)
-		*commit = pages_decommit(ret, size);
 	return (ret);
 }
 
@@ -54,7 +52,7 @@
 	assert(alignment != 0);
 	assert((alignment & chunksize_mask) == 0);
 
-	ret = pages_map(new_addr, size);
+	ret = pages_map(new_addr, size, commit);
 	if (ret == NULL || ret == new_addr)
 		return (ret);
 	assert(new_addr == NULL);
@@ -66,8 +64,6 @@
 
 	assert(ret != NULL);
 	*zero = true;
-	if (!*commit)
-		*commit = pages_decommit(ret, size);
 	return (ret);
 }
 
diff --git a/src/ckh.c b/src/ckh.c
index 3b423aa..747c1c8 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -40,8 +40,8 @@
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
-static bool	ckh_grow(tsd_t *tsd, ckh_t *ckh);
-static void	ckh_shrink(tsd_t *tsd, ckh_t *ckh);
+static bool	ckh_grow(tsdn_t *tsdn, ckh_t *ckh);
+static void	ckh_shrink(tsdn_t *tsdn, ckh_t *ckh);
 
 /******************************************************************************/
 
@@ -244,7 +244,7 @@
 }
 
 static bool
-ckh_grow(tsd_t *tsd, ckh_t *ckh)
+ckh_grow(tsdn_t *tsdn, ckh_t *ckh)
 {
 	bool ret;
 	ckhc_t *tab, *ttab;
@@ -270,8 +270,8 @@
 			ret = true;
 			goto label_return;
 		}
-		tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL,
-		    true, NULL);
+		tab = (ckhc_t *)ipallocztm(tsdn, usize, CACHELINE, true, NULL,
+		    true, arena_ichoose(tsdn, NULL));
 		if (tab == NULL) {
 			ret = true;
 			goto label_return;
@@ -283,12 +283,12 @@
 		ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
 
 		if (!ckh_rebuild(ckh, tab)) {
-			idalloctm(tsd, tab, tcache_get(tsd, false), true, true);
+			idalloctm(tsdn, tab, NULL, true, true);
 			break;
 		}
 
 		/* Rebuilding failed, so back out partially rebuilt table. */
-		idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true, true);
+		idalloctm(tsdn, ckh->tab, NULL, true, true);
 		ckh->tab = tab;
 		ckh->lg_curbuckets = lg_prevbuckets;
 	}
@@ -299,7 +299,7 @@
 }
 
 static void
-ckh_shrink(tsd_t *tsd, ckh_t *ckh)
+ckh_shrink(tsdn_t *tsdn, ckh_t *ckh)
 {
 	ckhc_t *tab, *ttab;
 	size_t usize;
@@ -314,8 +314,8 @@
 	usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
 	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 		return;
-	tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true,
-	    NULL);
+	tab = (ckhc_t *)ipallocztm(tsdn, usize, CACHELINE, true, NULL, true,
+	    arena_ichoose(tsdn, NULL));
 	if (tab == NULL) {
 		/*
 		 * An OOM error isn't worth propagating, since it doesn't
@@ -330,7 +330,7 @@
 	ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
 
 	if (!ckh_rebuild(ckh, tab)) {
-		idalloctm(tsd, tab, tcache_get(tsd, false), true, true);
+		idalloctm(tsdn, tab, NULL, true, true);
 #ifdef CKH_COUNT
 		ckh->nshrinks++;
 #endif
@@ -338,7 +338,7 @@
 	}
 
 	/* Rebuilding failed, so back out partially rebuilt table. */
-	idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true, true);
+	idalloctm(tsdn, ckh->tab, NULL, true, true);
 	ckh->tab = tab;
 	ckh->lg_curbuckets = lg_prevbuckets;
 #ifdef CKH_COUNT
@@ -347,7 +347,7 @@
 }
 
 bool
-ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
+ckh_new(tsdn_t *tsdn, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
     ckh_keycomp_t *keycomp)
 {
 	bool ret;
@@ -391,8 +391,8 @@
 		ret = true;
 		goto label_return;
 	}
-	ckh->tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true,
-	    NULL);
+	ckh->tab = (ckhc_t *)ipallocztm(tsdn, usize, CACHELINE, true, NULL,
+	    true, arena_ichoose(tsdn, NULL));
 	if (ckh->tab == NULL) {
 		ret = true;
 		goto label_return;
@@ -404,7 +404,7 @@
 }
 
 void
-ckh_delete(tsd_t *tsd, ckh_t *ckh)
+ckh_delete(tsdn_t *tsdn, ckh_t *ckh)
 {
 
 	assert(ckh != NULL);
@@ -421,9 +421,9 @@
 	    (unsigned long long)ckh->nrelocs);
 #endif
 
-	idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true, true);
+	idalloctm(tsdn, ckh->tab, NULL, true, true);
 	if (config_debug)
-		memset(ckh, 0x5a, sizeof(ckh_t));
+		memset(ckh, JEMALLOC_FREE_JUNK, sizeof(ckh_t));
 }
 
 size_t
@@ -456,7 +456,7 @@
 }
 
 bool
-ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data)
+ckh_insert(tsdn_t *tsdn, ckh_t *ckh, const void *key, const void *data)
 {
 	bool ret;
 
@@ -468,7 +468,7 @@
 #endif
 
 	while (ckh_try_insert(ckh, &key, &data)) {
-		if (ckh_grow(tsd, ckh)) {
+		if (ckh_grow(tsdn, ckh)) {
 			ret = true;
 			goto label_return;
 		}
@@ -480,7 +480,7 @@
 }
 
 bool
-ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
+ckh_remove(tsdn_t *tsdn, ckh_t *ckh, const void *searchkey, void **key,
     void **data)
 {
 	size_t cell;
@@ -502,7 +502,7 @@
 		    + LG_CKH_BUCKET_CELLS - 2)) && ckh->lg_curbuckets
 		    > ckh->lg_minbuckets) {
 			/* Ignore error due to OOM. */
-			ckh_shrink(tsd, ckh);
+			ckh_shrink(tsdn, ckh);
 		}
 
 		return (false);
diff --git a/src/ctl.c b/src/ctl.c
index 17bd071..dad8008 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -42,25 +42,25 @@
 /* Function prototypes for non-inline static functions. */
 
 #define	CTL_PROTO(n)							\
-static int	n##_ctl(const size_t *mib, size_t miblen, void *oldp,	\
-    size_t *oldlenp, void *newp, size_t newlen);
+static int	n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,	\
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen);
 
 #define	INDEX_PROTO(n)							\
-static const ctl_named_node_t	*n##_index(const size_t *mib,		\
-    size_t miblen, size_t i);
+static const ctl_named_node_t	*n##_index(tsdn_t *tsdn,		\
+    const size_t *mib, size_t miblen, size_t i);
 
 static bool	ctl_arena_init(ctl_arena_stats_t *astats);
 static void	ctl_arena_clear(ctl_arena_stats_t *astats);
-static void	ctl_arena_stats_amerge(ctl_arena_stats_t *cstats,
+static void	ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_stats_t *cstats,
     arena_t *arena);
 static void	ctl_arena_stats_smerge(ctl_arena_stats_t *sstats,
     ctl_arena_stats_t *astats);
-static void	ctl_arena_refresh(arena_t *arena, unsigned i);
-static bool	ctl_grow(void);
-static void	ctl_refresh(void);
-static bool	ctl_init(void);
-static int	ctl_lookup(const char *name, ctl_node_t const **nodesp,
-    size_t *mibp, size_t *depthp);
+static void	ctl_arena_refresh(tsdn_t *tsdn, arena_t *arena, unsigned i);
+static bool	ctl_grow(tsdn_t *tsdn);
+static void	ctl_refresh(tsdn_t *tsdn);
+static bool	ctl_init(tsdn_t *tsdn);
+static int	ctl_lookup(tsdn_t *tsdn, const char *name,
+    ctl_node_t const **nodesp, size_t *mibp, size_t *depthp);
 
 CTL_PROTO(version)
 CTL_PROTO(epoch)
@@ -117,9 +117,10 @@
 CTL_PROTO(tcache_create)
 CTL_PROTO(tcache_flush)
 CTL_PROTO(tcache_destroy)
-static void	arena_i_purge(unsigned arena_ind, bool all);
+static void	arena_i_purge(tsdn_t *tsdn, unsigned arena_ind, bool all);
 CTL_PROTO(arena_i_purge)
 CTL_PROTO(arena_i_decay)
+CTL_PROTO(arena_i_reset)
 CTL_PROTO(arena_i_dss)
 CTL_PROTO(arena_i_lg_dirty_mult)
 CTL_PROTO(arena_i_decay_time)
@@ -191,6 +192,7 @@
 CTL_PROTO(stats_arenas_i_pactive)
 CTL_PROTO(stats_arenas_i_pdirty)
 CTL_PROTO(stats_arenas_i_mapped)
+CTL_PROTO(stats_arenas_i_retained)
 CTL_PROTO(stats_arenas_i_npurge)
 CTL_PROTO(stats_arenas_i_nmadvise)
 CTL_PROTO(stats_arenas_i_purged)
@@ -203,6 +205,7 @@
 CTL_PROTO(stats_metadata)
 CTL_PROTO(stats_resident)
 CTL_PROTO(stats_mapped)
+CTL_PROTO(stats_retained)
 
 /******************************************************************************/
 /* mallctl tree. */
@@ -299,6 +302,7 @@
 static const ctl_named_node_t arena_i_node[] = {
 	{NAME("purge"),		CTL(arena_i_purge)},
 	{NAME("decay"),		CTL(arena_i_decay)},
+	{NAME("reset"),		CTL(arena_i_reset)},
 	{NAME("dss"),		CTL(arena_i_dss)},
 	{NAME("lg_dirty_mult"),	CTL(arena_i_lg_dirty_mult)},
 	{NAME("decay_time"),	CTL(arena_i_decay_time)},
@@ -456,6 +460,7 @@
 	{NAME("pactive"),	CTL(stats_arenas_i_pactive)},
 	{NAME("pdirty"),	CTL(stats_arenas_i_pdirty)},
 	{NAME("mapped"),	CTL(stats_arenas_i_mapped)},
+	{NAME("retained"),	CTL(stats_arenas_i_retained)},
 	{NAME("npurge"),	CTL(stats_arenas_i_npurge)},
 	{NAME("nmadvise"),	CTL(stats_arenas_i_nmadvise)},
 	{NAME("purged"),	CTL(stats_arenas_i_purged)},
@@ -482,6 +487,7 @@
 	{NAME("metadata"),	CTL(stats_metadata)},
 	{NAME("resident"),	CTL(stats_resident)},
 	{NAME("mapped"),	CTL(stats_mapped)},
+	{NAME("retained"),	CTL(stats_retained)},
 	{NAME("arenas"),	CHILD(indexed, stats_arenas)}
 };
 
@@ -554,12 +560,12 @@
 }
 
 static void
-ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena)
+ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_stats_t *cstats, arena_t *arena)
 {
 	unsigned i;
 
 	if (config_stats) {
-		arena_stats_merge(arena, &cstats->nthreads, &cstats->dss,
+		arena_stats_merge(tsdn, arena, &cstats->nthreads, &cstats->dss,
 		    &cstats->lg_dirty_mult, &cstats->decay_time,
 		    &cstats->pactive, &cstats->pdirty, &cstats->astats,
 		    cstats->bstats, cstats->lstats, cstats->hstats);
@@ -572,8 +578,8 @@
 			cstats->nrequests_small += cstats->bstats[i].nrequests;
 		}
 	} else {
-		arena_basic_stats_merge(arena, &cstats->nthreads, &cstats->dss,
-		    &cstats->lg_dirty_mult, &cstats->decay_time,
+		arena_basic_stats_merge(tsdn, arena, &cstats->nthreads,
+		    &cstats->dss, &cstats->lg_dirty_mult, &cstats->decay_time,
 		    &cstats->pactive, &cstats->pdirty);
 	}
 }
@@ -589,6 +595,7 @@
 
 	if (config_stats) {
 		sstats->astats.mapped += astats->astats.mapped;
+		sstats->astats.retained += astats->astats.retained;
 		sstats->astats.npurge += astats->astats.npurge;
 		sstats->astats.nmadvise += astats->astats.nmadvise;
 		sstats->astats.purged += astats->astats.purged;
@@ -649,24 +656,24 @@
 }
 
 static void
-ctl_arena_refresh(arena_t *arena, unsigned i)
+ctl_arena_refresh(tsdn_t *tsdn, arena_t *arena, unsigned i)
 {
 	ctl_arena_stats_t *astats = &ctl_stats.arenas[i];
 	ctl_arena_stats_t *sstats = &ctl_stats.arenas[ctl_stats.narenas];
 
 	ctl_arena_clear(astats);
-	ctl_arena_stats_amerge(astats, arena);
+	ctl_arena_stats_amerge(tsdn, astats, arena);
 	/* Merge into sum stats as well. */
 	ctl_arena_stats_smerge(sstats, astats);
 }
 
 static bool
-ctl_grow(void)
+ctl_grow(tsdn_t *tsdn)
 {
 	ctl_arena_stats_t *astats;
 
 	/* Initialize new arena. */
-	if (arena_init(ctl_stats.narenas) == NULL)
+	if (arena_init(tsdn, ctl_stats.narenas) == NULL)
 		return (true);
 
 	/* Allocate extended arena stats. */
@@ -701,7 +708,7 @@
 }
 
 static void
-ctl_refresh(void)
+ctl_refresh(tsdn_t *tsdn)
 {
 	unsigned i;
 	VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas);
@@ -713,19 +720,20 @@
 	ctl_arena_clear(&ctl_stats.arenas[ctl_stats.narenas]);
 
 	for (i = 0; i < ctl_stats.narenas; i++)
-		tarenas[i] = arena_get(i, false);
+		tarenas[i] = arena_get(tsdn, i, false);
 
 	for (i = 0; i < ctl_stats.narenas; i++) {
 		bool initialized = (tarenas[i] != NULL);
 
 		ctl_stats.arenas[i].initialized = initialized;
 		if (initialized)
-			ctl_arena_refresh(tarenas[i], i);
+			ctl_arena_refresh(tsdn, tarenas[i], i);
 	}
 
 	if (config_stats) {
 		size_t base_allocated, base_resident, base_mapped;
-		base_stats_get(&base_allocated, &base_resident, &base_mapped);
+		base_stats_get(tsdn, &base_allocated, &base_resident,
+		    &base_mapped);
 		ctl_stats.allocated =
 		    ctl_stats.arenas[ctl_stats.narenas].allocated_small +
 		    ctl_stats.arenas[ctl_stats.narenas].astats.allocated_large +
@@ -742,17 +750,19 @@
 		    ctl_stats.arenas[ctl_stats.narenas].pdirty) << LG_PAGE);
 		ctl_stats.mapped = base_mapped +
 		    ctl_stats.arenas[ctl_stats.narenas].astats.mapped;
+		ctl_stats.retained =
+		    ctl_stats.arenas[ctl_stats.narenas].astats.retained;
 	}
 
 	ctl_epoch++;
 }
 
 static bool
-ctl_init(void)
+ctl_init(tsdn_t *tsdn)
 {
 	bool ret;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsdn, &ctl_mtx);
 	if (!ctl_initialized) {
 		/*
 		 * Allocate space for one extra arena stats element, which
@@ -794,19 +804,19 @@
 		ctl_stats.arenas[ctl_stats.narenas].initialized = true;
 
 		ctl_epoch = 0;
-		ctl_refresh();
+		ctl_refresh(tsdn);
 		ctl_initialized = true;
 	}
 
 	ret = false;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsdn, &ctl_mtx);
 	return (ret);
 }
 
 static int
-ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp,
-    size_t *depthp)
+ctl_lookup(tsdn_t *tsdn, const char *name, ctl_node_t const **nodesp,
+    size_t *mibp, size_t *depthp)
 {
 	int ret;
 	const char *elm, *tdot, *dot;
@@ -858,7 +868,7 @@
 			}
 
 			inode = ctl_indexed_node(node->children);
-			node = inode->index(mibp, *depthp, (size_t)index);
+			node = inode->index(tsdn, mibp, *depthp, (size_t)index);
 			if (node == NULL) {
 				ret = ENOENT;
 				goto label_return;
@@ -902,8 +912,8 @@
 }
 
 int
-ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen)
+ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
+    void *newp, size_t newlen)
 {
 	int ret;
 	size_t depth;
@@ -911,19 +921,19 @@
 	size_t mib[CTL_MAX_DEPTH];
 	const ctl_named_node_t *node;
 
-	if (!ctl_initialized && ctl_init()) {
+	if (!ctl_initialized && ctl_init(tsd_tsdn(tsd))) {
 		ret = EAGAIN;
 		goto label_return;
 	}
 
 	depth = CTL_MAX_DEPTH;
-	ret = ctl_lookup(name, nodes, mib, &depth);
+	ret = ctl_lookup(tsd_tsdn(tsd), name, nodes, mib, &depth);
 	if (ret != 0)
 		goto label_return;
 
 	node = ctl_named_node(nodes[depth-1]);
 	if (node != NULL && node->ctl)
-		ret = node->ctl(mib, depth, oldp, oldlenp, newp, newlen);
+		ret = node->ctl(tsd, mib, depth, oldp, oldlenp, newp, newlen);
 	else {
 		/* The name refers to a partial path through the ctl tree. */
 		ret = ENOENT;
@@ -934,29 +944,29 @@
 }
 
 int
-ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp)
+ctl_nametomib(tsdn_t *tsdn, const char *name, size_t *mibp, size_t *miblenp)
 {
 	int ret;
 
-	if (!ctl_initialized && ctl_init()) {
+	if (!ctl_initialized && ctl_init(tsdn)) {
 		ret = EAGAIN;
 		goto label_return;
 	}
 
-	ret = ctl_lookup(name, NULL, mibp, miblenp);
+	ret = ctl_lookup(tsdn, name, NULL, mibp, miblenp);
 label_return:
 	return(ret);
 }
 
 int
-ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	const ctl_named_node_t *node;
 	size_t i;
 
-	if (!ctl_initialized && ctl_init()) {
+	if (!ctl_initialized && ctl_init(tsd_tsdn(tsd))) {
 		ret = EAGAIN;
 		goto label_return;
 	}
@@ -978,7 +988,7 @@
 
 			/* Indexed element. */
 			inode = ctl_indexed_node(node->children);
-			node = inode->index(mib, miblen, mib[i]);
+			node = inode->index(tsd_tsdn(tsd), mib, miblen, mib[i]);
 			if (node == NULL) {
 				ret = ENOENT;
 				goto label_return;
@@ -988,7 +998,7 @@
 
 	/* Call the ctl function. */
 	if (node && node->ctl)
-		ret = node->ctl(mib, miblen, oldp, oldlenp, newp, newlen);
+		ret = node->ctl(tsd, mib, miblen, oldp, oldlenp, newp, newlen);
 	else {
 		/* Partial MIB. */
 		ret = ENOENT;
@@ -1002,7 +1012,7 @@
 ctl_boot(void)
 {
 
-	if (malloc_mutex_init(&ctl_mtx))
+	if (malloc_mutex_init(&ctl_mtx, "ctl", WITNESS_RANK_CTL))
 		return (true);
 
 	ctl_initialized = false;
@@ -1011,24 +1021,24 @@
 }
 
 void
-ctl_prefork(void)
+ctl_prefork(tsdn_t *tsdn)
 {
 
-	malloc_mutex_prefork(&ctl_mtx);
+	malloc_mutex_prefork(tsdn, &ctl_mtx);
 }
 
 void
-ctl_postfork_parent(void)
+ctl_postfork_parent(tsdn_t *tsdn)
 {
 
-	malloc_mutex_postfork_parent(&ctl_mtx);
+	malloc_mutex_postfork_parent(tsdn, &ctl_mtx);
 }
 
 void
-ctl_postfork_child(void)
+ctl_postfork_child(tsdn_t *tsdn)
 {
 
-	malloc_mutex_postfork_child(&ctl_mtx);
+	malloc_mutex_postfork_child(tsdn, &ctl_mtx);
 }
 
 /******************************************************************************/
@@ -1085,8 +1095,8 @@
  */
 #define	CTL_RO_CLGEN(c, l, n, v, t)					\
 static int								\
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
-    void *newp, size_t newlen)						\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
+    size_t *oldlenp, void *newp, size_t newlen)				\
 {									\
 	int ret;							\
 	t oldval;							\
@@ -1094,7 +1104,7 @@
 	if (!(c))							\
 		return (ENOENT);					\
 	if (l)								\
-		malloc_mutex_lock(&ctl_mtx);				\
+		malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);		\
 	READONLY();							\
 	oldval = (v);							\
 	READ(oldval, t);						\
@@ -1102,47 +1112,47 @@
 	ret = 0;							\
 label_return:								\
 	if (l)								\
-		malloc_mutex_unlock(&ctl_mtx);				\
+		malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);		\
 	return (ret);							\
 }
 
 #define	CTL_RO_CGEN(c, n, v, t)						\
 static int								\
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
-    void *newp, size_t newlen)						\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
+    size_t *oldlenp, void *newp, size_t newlen)				\
 {									\
 	int ret;							\
 	t oldval;							\
 									\
 	if (!(c))							\
 		return (ENOENT);					\
-	malloc_mutex_lock(&ctl_mtx);					\
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);			\
 	READONLY();							\
 	oldval = (v);							\
 	READ(oldval, t);						\
 									\
 	ret = 0;							\
 label_return:								\
-	malloc_mutex_unlock(&ctl_mtx);					\
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);			\
 	return (ret);							\
 }
 
 #define	CTL_RO_GEN(n, v, t)						\
 static int								\
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
-    void *newp, size_t newlen)						\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
+    size_t *oldlenp, void *newp, size_t newlen)				\
 {									\
 	int ret;							\
 	t oldval;							\
 									\
-	malloc_mutex_lock(&ctl_mtx);					\
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);			\
 	READONLY();							\
 	oldval = (v);							\
 	READ(oldval, t);						\
 									\
 	ret = 0;							\
 label_return:								\
-	malloc_mutex_unlock(&ctl_mtx);					\
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);			\
 	return (ret);							\
 }
 
@@ -1152,8 +1162,8 @@
  */
 #define	CTL_RO_NL_CGEN(c, n, v, t)					\
 static int								\
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
-    void *newp, size_t newlen)						\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
+    size_t *oldlenp, void *newp, size_t newlen)				\
 {									\
 	int ret;							\
 	t oldval;							\
@@ -1171,8 +1181,8 @@
 
 #define	CTL_RO_NL_GEN(n, v, t)						\
 static int								\
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
-    void *newp, size_t newlen)						\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
+    size_t *oldlenp, void *newp, size_t newlen)				\
 {									\
 	int ret;							\
 	t oldval;							\
@@ -1188,17 +1198,15 @@
 
 #define	CTL_TSD_RO_NL_CGEN(c, n, m, t)					\
 static int								\
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
-    void *newp, size_t newlen)						\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
+    size_t *oldlenp, void *newp, size_t newlen)				\
 {									\
 	int ret;							\
 	t oldval;							\
-	tsd_t *tsd;							\
 									\
 	if (!(c))							\
 		return (ENOENT);					\
 	READONLY();							\
-	tsd = tsd_fetch();						\
 	oldval = (m(tsd));						\
 	READ(oldval, t);						\
 									\
@@ -1209,8 +1217,8 @@
 
 #define	CTL_RO_CONFIG_GEN(n, t)						\
 static int								\
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
-    void *newp, size_t newlen)						\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
+    size_t *oldlenp, void *newp, size_t newlen)				\
 {									\
 	int ret;							\
 	t oldval;							\
@@ -1229,21 +1237,21 @@
 CTL_RO_NL_GEN(version, JEMALLOC_VERSION, const char *)
 
 static int
-epoch_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+epoch_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	UNUSED uint64_t newval;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	WRITE(newval, uint64_t);
 	if (newp != NULL)
-		ctl_refresh();
+		ctl_refresh(tsd_tsdn(tsd));
 	READ(ctl_epoch, uint64_t);
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return (ret);
 }
 
@@ -1298,20 +1306,18 @@
 /******************************************************************************/
 
 static int
-thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
-	tsd_t *tsd;
 	arena_t *oldarena;
 	unsigned newind, oldind;
 
-	tsd = tsd_fetch();
 	oldarena = arena_choose(tsd, NULL);
 	if (oldarena == NULL)
 		return (EAGAIN);
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	newind = oldind = oldarena->ind;
 	WRITE(newind, unsigned);
 	READ(oldind, unsigned);
@@ -1325,7 +1331,7 @@
 		}
 
 		/* Initialize arena if necessary. */
-		newarena = arena_get(newind, true);
+		newarena = arena_get(tsd_tsdn(tsd), newind, true);
 		if (newarena == NULL) {
 			ret = EAGAIN;
 			goto label_return;
@@ -1335,15 +1341,15 @@
 		if (config_tcache) {
 			tcache_t *tcache = tsd_tcache_get(tsd);
 			if (tcache != NULL) {
-				tcache_arena_reassociate(tcache, oldarena,
-				    newarena);
+				tcache_arena_reassociate(tsd_tsdn(tsd), tcache,
+				    oldarena, newarena);
 			}
 		}
 	}
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return (ret);
 }
 
@@ -1357,8 +1363,8 @@
     tsd_thread_deallocatedp_get, uint64_t *)
 
 static int
-thread_tcache_enabled_ctl(const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
+thread_tcache_enabled_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	bool oldval;
@@ -1382,8 +1388,8 @@
 }
 
 static int
-thread_tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
+thread_tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 
@@ -1401,7 +1407,7 @@
 }
 
 static int
-thread_prof_name_ctl(const size_t *mib, size_t miblen, void *oldp,
+thread_prof_name_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
@@ -1412,20 +1418,16 @@
 	READ_XOR_WRITE();
 
 	if (newp != NULL) {
-		tsd_t *tsd;
-
 		if (newlen != sizeof(const char *)) {
 			ret = EINVAL;
 			goto label_return;
 		}
 
-		tsd = tsd_fetch();
-
 		if ((ret = prof_thread_name_set(tsd, *(const char **)newp)) !=
 		    0)
 			goto label_return;
 	} else {
-		const char *oldname = prof_thread_name_get();
+		const char *oldname = prof_thread_name_get(tsd);
 		READ(oldname, const char *);
 	}
 
@@ -1435,7 +1437,7 @@
 }
 
 static int
-thread_prof_active_ctl(const size_t *mib, size_t miblen, void *oldp,
+thread_prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
@@ -1444,13 +1446,13 @@
 	if (!config_prof)
 		return (ENOENT);
 
-	oldval = prof_thread_active_get();
+	oldval = prof_thread_active_get(tsd);
 	if (newp != NULL) {
 		if (newlen != sizeof(bool)) {
 			ret = EINVAL;
 			goto label_return;
 		}
-		if (prof_thread_active_set(*(bool *)newp)) {
+		if (prof_thread_active_set(tsd, *(bool *)newp)) {
 			ret = EAGAIN;
 			goto label_return;
 		}
@@ -1465,21 +1467,18 @@
 /******************************************************************************/
 
 static int
-tcache_create_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
-	tsd_t *tsd;
 	unsigned tcache_ind;
 
 	if (!config_tcache)
 		return (ENOENT);
 
-	tsd = tsd_fetch();
-
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	READONLY();
-	if (tcaches_create(tsd, &tcache_ind)) {
+	if (tcaches_create(tsd_tsdn(tsd), &tcache_ind)) {
 		ret = EFAULT;
 		goto label_return;
 	}
@@ -1487,23 +1486,20 @@
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return (ret);
 }
 
 static int
-tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
-	tsd_t *tsd;
 	unsigned tcache_ind;
 
 	if (!config_tcache)
 		return (ENOENT);
 
-	tsd = tsd_fetch();
-
 	WRITEONLY();
 	tcache_ind = UINT_MAX;
 	WRITE(tcache_ind, unsigned);
@@ -1519,18 +1515,15 @@
 }
 
 static int
-tcache_destroy_ctl(const size_t *mib, size_t miblen, void *oldp,
+tcache_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
-	tsd_t *tsd;
 	unsigned tcache_ind;
 
 	if (!config_tcache)
 		return (ENOENT);
 
-	tsd = tsd_fetch();
-
 	WRITEONLY();
 	tcache_ind = UINT_MAX;
 	WRITE(tcache_ind, unsigned);
@@ -1548,10 +1541,10 @@
 /******************************************************************************/
 
 static void
-arena_i_purge(unsigned arena_ind, bool all)
+arena_i_purge(tsdn_t *tsdn, unsigned arena_ind, bool all)
 {
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsdn, &ctl_mtx);
 	{
 		unsigned narenas = ctl_stats.narenas;
 
@@ -1560,43 +1553,43 @@
 			VARIABLE_ARRAY(arena_t *, tarenas, narenas);
 
 			for (i = 0; i < narenas; i++)
-				tarenas[i] = arena_get(i, false);
+				tarenas[i] = arena_get(tsdn, i, false);
 
 			/*
 			 * No further need to hold ctl_mtx, since narenas and
 			 * tarenas contain everything needed below.
 			 */
-			malloc_mutex_unlock(&ctl_mtx);
+			malloc_mutex_unlock(tsdn, &ctl_mtx);
 
 			for (i = 0; i < narenas; i++) {
 				if (tarenas[i] != NULL)
-					arena_purge(tarenas[i], all);
+					arena_purge(tsdn, tarenas[i], all);
 			}
 		} else {
 			arena_t *tarena;
 
 			assert(arena_ind < narenas);
 
-			tarena = arena_get(arena_ind, false);
+			tarena = arena_get(tsdn, arena_ind, false);
 
 			/* No further need to hold ctl_mtx. */
-			malloc_mutex_unlock(&ctl_mtx);
+			malloc_mutex_unlock(tsdn, &ctl_mtx);
 
 			if (tarena != NULL)
-				arena_purge(tarena, all);
+				arena_purge(tsdn, tarena, all);
 		}
 	}
 }
 
 static int
-arena_i_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+arena_i_purge_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 
 	READONLY();
 	WRITEONLY();
-	arena_i_purge((unsigned)mib[1], true);
+	arena_i_purge(tsd_tsdn(tsd), (unsigned)mib[1], true);
 
 	ret = 0;
 label_return:
@@ -1604,14 +1597,14 @@
 }
 
 static int
-arena_i_decay_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+arena_i_decay_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 
 	READONLY();
 	WRITEONLY();
-	arena_i_purge((unsigned)mib[1], false);
+	arena_i_purge(tsd_tsdn(tsd), (unsigned)mib[1], false);
 
 	ret = 0;
 label_return:
@@ -1619,8 +1612,42 @@
 }
 
 static int
-arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+arena_i_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
+{
+	int ret;
+	unsigned arena_ind;
+	arena_t *arena;
+
+	READONLY();
+	WRITEONLY();
+
+	if ((config_valgrind && unlikely(in_valgrind)) || (config_fill &&
+	    unlikely(opt_quarantine))) {
+		ret = EFAULT;
+		goto label_return;
+	}
+
+	arena_ind = (unsigned)mib[1];
+	if (config_debug) {
+		malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
+		assert(arena_ind < ctl_stats.narenas);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
+	}
+	assert(arena_ind >= opt_narenas);
+
+	arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
+
+	arena_reset(tsd, arena);
+
+	ret = 0;
+label_return:
+	return (ret);
+}
+
+static int
+arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	const char *dss = NULL;
@@ -1628,7 +1655,7 @@
 	dss_prec_t dss_prec_old = dss_prec_limit;
 	dss_prec_t dss_prec = dss_prec_limit;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	WRITE(dss, const char *);
 	if (dss != NULL) {
 		int i;
@@ -1649,20 +1676,20 @@
 	}
 
 	if (arena_ind < ctl_stats.narenas) {
-		arena_t *arena = arena_get(arena_ind, false);
+		arena_t *arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
 		if (arena == NULL || (dss_prec != dss_prec_limit &&
-		    arena_dss_prec_set(arena, dss_prec))) {
+		    arena_dss_prec_set(tsd_tsdn(tsd), arena, dss_prec))) {
 			ret = EFAULT;
 			goto label_return;
 		}
-		dss_prec_old = arena_dss_prec_get(arena);
+		dss_prec_old = arena_dss_prec_get(tsd_tsdn(tsd), arena);
 	} else {
 		if (dss_prec != dss_prec_limit &&
-		    chunk_dss_prec_set(dss_prec)) {
+		    chunk_dss_prec_set(tsd_tsdn(tsd), dss_prec)) {
 			ret = EFAULT;
 			goto label_return;
 		}
-		dss_prec_old = chunk_dss_prec_get();
+		dss_prec_old = chunk_dss_prec_get(tsd_tsdn(tsd));
 	}
 
 	dss = dss_prec_names[dss_prec_old];
@@ -1670,26 +1697,26 @@
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return (ret);
 }
 
 static int
-arena_i_lg_dirty_mult_ctl(const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
+arena_i_lg_dirty_mult_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	unsigned arena_ind = (unsigned)mib[1];
 	arena_t *arena;
 
-	arena = arena_get(arena_ind, false);
+	arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
 	if (arena == NULL) {
 		ret = EFAULT;
 		goto label_return;
 	}
 
 	if (oldp != NULL && oldlenp != NULL) {
-		size_t oldval = arena_lg_dirty_mult_get(arena);
+		size_t oldval = arena_lg_dirty_mult_get(tsd_tsdn(tsd), arena);
 		READ(oldval, ssize_t);
 	}
 	if (newp != NULL) {
@@ -1697,7 +1724,8 @@
 			ret = EINVAL;
 			goto label_return;
 		}
-		if (arena_lg_dirty_mult_set(arena, *(ssize_t *)newp)) {
+		if (arena_lg_dirty_mult_set(tsd_tsdn(tsd), arena,
+		    *(ssize_t *)newp)) {
 			ret = EFAULT;
 			goto label_return;
 		}
@@ -1709,21 +1737,21 @@
 }
 
 static int
-arena_i_decay_time_ctl(const size_t *mib, size_t miblen, void *oldp,
+arena_i_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	unsigned arena_ind = (unsigned)mib[1];
 	arena_t *arena;
 
-	arena = arena_get(arena_ind, false);
+	arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
 	if (arena == NULL) {
 		ret = EFAULT;
 		goto label_return;
 	}
 
 	if (oldp != NULL && oldlenp != NULL) {
-		size_t oldval = arena_decay_time_get(arena);
+		size_t oldval = arena_decay_time_get(tsd_tsdn(tsd), arena);
 		READ(oldval, ssize_t);
 	}
 	if (newp != NULL) {
@@ -1731,7 +1759,8 @@
 			ret = EINVAL;
 			goto label_return;
 		}
-		if (arena_decay_time_set(arena, *(ssize_t *)newp)) {
+		if (arena_decay_time_set(tsd_tsdn(tsd), arena,
+		    *(ssize_t *)newp)) {
 			ret = EFAULT;
 			goto label_return;
 		}
@@ -1743,24 +1772,25 @@
 }
 
 static int
-arena_i_chunk_hooks_ctl(const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
+arena_i_chunk_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	unsigned arena_ind = (unsigned)mib[1];
 	arena_t *arena;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	if (arena_ind < narenas_total_get() && (arena =
-	    arena_get(arena_ind, false)) != NULL) {
+	    arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) {
 		if (newp != NULL) {
 			chunk_hooks_t old_chunk_hooks, new_chunk_hooks;
 			WRITE(new_chunk_hooks, chunk_hooks_t);
-			old_chunk_hooks = chunk_hooks_set(arena,
+			old_chunk_hooks = chunk_hooks_set(tsd_tsdn(tsd), arena,
 			    &new_chunk_hooks);
 			READ(old_chunk_hooks, chunk_hooks_t);
 		} else {
-			chunk_hooks_t old_chunk_hooks = chunk_hooks_get(arena);
+			chunk_hooks_t old_chunk_hooks =
+			    chunk_hooks_get(tsd_tsdn(tsd), arena);
 			READ(old_chunk_hooks, chunk_hooks_t);
 		}
 	} else {
@@ -1769,16 +1799,16 @@
 	}
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return (ret);
 }
 
 static const ctl_named_node_t *
-arena_i_index(const size_t *mib, size_t miblen, size_t i)
+arena_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 {
-	const ctl_named_node_t * ret;
+	const ctl_named_node_t *ret;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsdn, &ctl_mtx);
 	if (i > ctl_stats.narenas) {
 		ret = NULL;
 		goto label_return;
@@ -1786,20 +1816,20 @@
 
 	ret = super_arena_i_node;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsdn, &ctl_mtx);
 	return (ret);
 }
 
 /******************************************************************************/
 
 static int
-arenas_narenas_ctl(const size_t *mib, size_t miblen, void *oldp,
+arenas_narenas_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	unsigned narenas;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	READONLY();
 	if (*oldlenp != sizeof(unsigned)) {
 		ret = EINVAL;
@@ -1810,18 +1840,18 @@
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return (ret);
 }
 
 static int
-arenas_initialized_ctl(const size_t *mib, size_t miblen, void *oldp,
+arenas_initialized_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	unsigned nread, i;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	READONLY();
 	if (*oldlenp != ctl_stats.narenas * sizeof(bool)) {
 		ret = EINVAL;
@@ -1836,13 +1866,13 @@
 		((bool *)oldp)[i] = ctl_stats.arenas[i].initialized;
 
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return (ret);
 }
 
 static int
-arenas_lg_dirty_mult_ctl(const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
+arenas_lg_dirty_mult_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 
@@ -1867,7 +1897,7 @@
 }
 
 static int
-arenas_decay_time_ctl(const size_t *mib, size_t miblen, void *oldp,
+arenas_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
@@ -1901,7 +1931,7 @@
 CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t)
 CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t)
 static const ctl_named_node_t *
-arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i)
+arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 {
 
 	if (i > NBINS)
@@ -1912,7 +1942,7 @@
 CTL_RO_NL_GEN(arenas_nlruns, nlclasses, unsigned)
 CTL_RO_NL_GEN(arenas_lrun_i_size, index2size(NBINS+(szind_t)mib[2]), size_t)
 static const ctl_named_node_t *
-arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i)
+arenas_lrun_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 {
 
 	if (i > nlclasses)
@@ -1924,7 +1954,7 @@
 CTL_RO_NL_GEN(arenas_hchunk_i_size, index2size(NBINS+nlclasses+(szind_t)mib[2]),
     size_t)
 static const ctl_named_node_t *
-arenas_hchunk_i_index(const size_t *mib, size_t miblen, size_t i)
+arenas_hchunk_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 {
 
 	if (i > nhclasses)
@@ -1933,15 +1963,15 @@
 }
 
 static int
-arenas_extend_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+arenas_extend_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	unsigned narenas;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	READONLY();
-	if (ctl_grow()) {
+	if (ctl_grow(tsd_tsdn(tsd))) {
 		ret = EAGAIN;
 		goto label_return;
 	}
@@ -1950,14 +1980,40 @@
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return (ret);
 }
 
 /******************************************************************************/
 
 static int
-prof_thread_active_init_ctl(const size_t *mib, size_t miblen, void *oldp,
+prof_thread_active_init_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen)
+{
+	int ret;
+	bool oldval;
+
+	if (!config_prof)
+		return (ENOENT);
+
+	if (newp != NULL) {
+		if (newlen != sizeof(bool)) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		oldval = prof_thread_active_init_set(tsd_tsdn(tsd),
+		    *(bool *)newp);
+	} else
+		oldval = prof_thread_active_init_get(tsd_tsdn(tsd));
+	READ(oldval, bool);
+
+	ret = 0;
+label_return:
+	return (ret);
+}
+
+static int
+prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
@@ -1971,9 +2027,9 @@
 			ret = EINVAL;
 			goto label_return;
 		}
-		oldval = prof_thread_active_init_set(*(bool *)newp);
+		oldval = prof_active_set(tsd_tsdn(tsd), *(bool *)newp);
 	} else
-		oldval = prof_thread_active_init_get();
+		oldval = prof_active_get(tsd_tsdn(tsd));
 	READ(oldval, bool);
 
 	ret = 0;
@@ -1982,33 +2038,8 @@
 }
 
 static int
-prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
-{
-	int ret;
-	bool oldval;
-
-	if (!config_prof)
-		return (ENOENT);
-
-	if (newp != NULL) {
-		if (newlen != sizeof(bool)) {
-			ret = EINVAL;
-			goto label_return;
-		}
-		oldval = prof_active_set(*(bool *)newp);
-	} else
-		oldval = prof_active_get();
-	READ(oldval, bool);
-
-	ret = 0;
-label_return:
-	return (ret);
-}
-
-static int
-prof_dump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+prof_dump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	const char *filename = NULL;
@@ -2019,7 +2050,7 @@
 	WRITEONLY();
 	WRITE(filename, const char *);
 
-	if (prof_mdump(filename)) {
+	if (prof_mdump(tsd, filename)) {
 		ret = EFAULT;
 		goto label_return;
 	}
@@ -2030,8 +2061,8 @@
 }
 
 static int
-prof_gdump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+prof_gdump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	bool oldval;
@@ -2044,9 +2075,9 @@
 			ret = EINVAL;
 			goto label_return;
 		}
-		oldval = prof_gdump_set(*(bool *)newp);
+		oldval = prof_gdump_set(tsd_tsdn(tsd), *(bool *)newp);
 	} else
-		oldval = prof_gdump_get();
+		oldval = prof_gdump_get(tsd_tsdn(tsd));
 	READ(oldval, bool);
 
 	ret = 0;
@@ -2055,12 +2086,11 @@
 }
 
 static int
-prof_reset_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+prof_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	size_t lg_sample = lg_prof_sample;
-	tsd_t *tsd;
 
 	if (!config_prof)
 		return (ENOENT);
@@ -2070,9 +2100,7 @@
 	if (lg_sample >= (sizeof(uint64_t) << 3))
 		lg_sample = (sizeof(uint64_t) << 3) - 1;
 
-	tsd = tsd_fetch();
-
-	prof_reset(tsd, lg_sample);
+	prof_reset(tsd_tsdn(tsd), lg_sample);
 
 	ret = 0;
 label_return:
@@ -2090,6 +2118,7 @@
 CTL_RO_CGEN(config_stats, stats_metadata, ctl_stats.metadata, size_t)
 CTL_RO_CGEN(config_stats, stats_resident, ctl_stats.resident, size_t)
 CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats.mapped, size_t)
+CTL_RO_CGEN(config_stats, stats_retained, ctl_stats.retained, size_t)
 
 CTL_RO_GEN(stats_arenas_i_dss, ctl_stats.arenas[mib[2]].dss, const char *)
 CTL_RO_GEN(stats_arenas_i_lg_dirty_mult, ctl_stats.arenas[mib[2]].lg_dirty_mult,
@@ -2101,6 +2130,8 @@
 CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_mapped,
     ctl_stats.arenas[mib[2]].astats.mapped, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_retained,
+    ctl_stats.arenas[mib[2]].astats.retained, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_npurge,
     ctl_stats.arenas[mib[2]].astats.npurge, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise,
@@ -2157,7 +2188,8 @@
     ctl_stats.arenas[mib[2]].bstats[mib[4]].curruns, size_t)
 
 static const ctl_named_node_t *
-stats_arenas_i_bins_j_index(const size_t *mib, size_t miblen, size_t j)
+stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
+    size_t j)
 {
 
 	if (j > NBINS)
@@ -2175,7 +2207,8 @@
     ctl_stats.arenas[mib[2]].lstats[mib[4]].curruns, size_t)
 
 static const ctl_named_node_t *
-stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j)
+stats_arenas_i_lruns_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
+    size_t j)
 {
 
 	if (j > nlclasses)
@@ -2194,7 +2227,8 @@
     ctl_stats.arenas[mib[2]].hstats[mib[4]].curhchunks, size_t)
 
 static const ctl_named_node_t *
-stats_arenas_i_hchunks_j_index(const size_t *mib, size_t miblen, size_t j)
+stats_arenas_i_hchunks_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
+    size_t j)
 {
 
 	if (j > nhclasses)
@@ -2203,11 +2237,11 @@
 }
 
 static const ctl_named_node_t *
-stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i)
+stats_arenas_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 {
 	const ctl_named_node_t * ret;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsdn, &ctl_mtx);
 	if (i > ctl_stats.narenas || !ctl_stats.arenas[i].initialized) {
 		ret = NULL;
 		goto label_return;
@@ -2215,6 +2249,6 @@
 
 	ret = super_stats_arenas_i_node;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsdn, &ctl_mtx);
 	return (ret);
 }
diff --git a/src/huge.c b/src/huge.c
index 5f7ceaf..1aa02a0 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -15,12 +15,21 @@
 }
 
 static bool
-huge_node_set(const void *ptr, extent_node_t *node)
+huge_node_set(tsdn_t *tsdn, const void *ptr, extent_node_t *node)
 {
 
 	assert(extent_node_addr_get(node) == ptr);
 	assert(!extent_node_achunk_get(node));
-	return (chunk_register(ptr, node));
+	return (chunk_register(tsdn, ptr, node));
+}
+
+static void
+huge_node_reset(tsdn_t *tsdn, const void *ptr, extent_node_t *node)
+{
+	bool err;
+
+	err = huge_node_set(tsdn, ptr, node);
+	assert(!err);
 }
 
 static void
@@ -31,18 +40,17 @@
 }
 
 void *
-huge_malloc(tsd_t *tsd, arena_t *arena, size_t usize, bool zero,
-    tcache_t *tcache)
+huge_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero)
 {
 
 	assert(usize == s2u(usize));
 
-	return (huge_palloc(tsd, arena, usize, chunksize, zero, tcache));
+	return (huge_palloc(tsdn, arena, usize, chunksize, zero));
 }
 
 void *
-huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
-    bool zero, tcache_t *tcache)
+huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
+    bool zero)
 {
 	void *ret;
 	size_t ausize;
@@ -51,14 +59,16 @@
 
 	/* Allocate one or more contiguous chunks for this request. */
 
+	assert(!tsdn_null(tsdn) || arena != NULL);
+
 	ausize = sa2u(usize, alignment);
 	if (unlikely(ausize == 0 || ausize > HUGE_MAXCLASS))
 		return (NULL);
 	assert(ausize >= chunksize);
 
 	/* Allocate an extent node with which to track the chunk. */
-	node = ipallocztm(tsd, CACHELINE_CEILING(sizeof(extent_node_t)),
-	    CACHELINE, false, tcache, true, arena);
+	node = ipallocztm(tsdn, CACHELINE_CEILING(sizeof(extent_node_t)),
+	    CACHELINE, false, NULL, true, arena_ichoose(tsdn, arena));
 	if (node == NULL)
 		return (NULL);
 
@@ -67,34 +77,35 @@
 	 * it is possible to make correct junk/zero fill decisions below.
 	 */
 	is_zeroed = zero;
-	arena = arena_choose(tsd, arena);
-	if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(arena,
-	    usize, alignment, &is_zeroed)) == NULL) {
-		idalloctm(tsd, node, tcache, true, true);
+	if (likely(!tsdn_null(tsdn)))
+		arena = arena_choose(tsdn_tsd(tsdn), arena);
+	if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(tsdn,
+	    arena, usize, alignment, &is_zeroed)) == NULL) {
+		idalloctm(tsdn, node, NULL, true, true);
 		return (NULL);
 	}
 
 	extent_node_init(node, arena, ret, usize, is_zeroed, true);
 
-	if (huge_node_set(ret, node)) {
-		arena_chunk_dalloc_huge(arena, ret, usize);
-		idalloctm(tsd, node, tcache, true, true);
+	if (huge_node_set(tsdn, ret, node)) {
+		arena_chunk_dalloc_huge(tsdn, arena, ret, usize);
+		idalloctm(tsdn, node, NULL, true, true);
 		return (NULL);
 	}
 
 	/* Insert node into huge. */
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	ql_elm_new(node, ql_link);
 	ql_tail_insert(&arena->huge, node, ql_link);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	if (zero || (config_fill && unlikely(opt_zero))) {
 		if (!is_zeroed)
 			memset(ret, 0, usize);
 	} else if (config_fill && unlikely(opt_junk_alloc))
-		memset(ret, 0xa5, usize);
+		memset(ret, JEMALLOC_ALLOC_JUNK, usize);
 
-	arena_decay_tick(tsd, arena);
+	arena_decay_tick(tsdn, arena);
 	return (ret);
 }
 
@@ -103,7 +114,7 @@
 #define	huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk_impl)
 #endif
 static void
-huge_dalloc_junk(void *ptr, size_t usize)
+huge_dalloc_junk(tsdn_t *tsdn, void *ptr, size_t usize)
 {
 
 	if (config_fill && have_dss && unlikely(opt_junk_free)) {
@@ -111,8 +122,8 @@
 		 * Only bother junk filling if the chunk isn't about to be
 		 * unmapped.
 		 */
-		if (!config_munmap || (have_dss && chunk_in_dss(ptr)))
-			memset(ptr, 0x5a, usize);
+		if (!config_munmap || (have_dss && chunk_in_dss(tsdn, ptr)))
+			memset(ptr, JEMALLOC_FREE_JUNK, usize);
 	}
 }
 #ifdef JEMALLOC_JET
@@ -122,8 +133,8 @@
 #endif
 
 static void
-huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize_min,
-    size_t usize_max, bool zero)
+huge_ralloc_no_move_similar(tsdn_t *tsdn, void *ptr, size_t oldsize,
+    size_t usize_min, size_t usize_max, bool zero)
 {
 	size_t usize, usize_next;
 	extent_node_t *node;
@@ -147,24 +158,28 @@
 	if (oldsize > usize) {
 		size_t sdiff = oldsize - usize;
 		if (config_fill && unlikely(opt_junk_free)) {
-			memset((void *)((uintptr_t)ptr + usize), 0x5a, sdiff);
+			memset((void *)((uintptr_t)ptr + usize),
+			    JEMALLOC_FREE_JUNK, sdiff);
 			post_zeroed = false;
 		} else {
-			post_zeroed = !chunk_purge_wrapper(arena, &chunk_hooks,
-			    ptr, CHUNK_CEILING(oldsize), usize, sdiff);
+			post_zeroed = !chunk_purge_wrapper(tsdn, arena,
+			    &chunk_hooks, ptr, CHUNK_CEILING(oldsize), usize,
+			    sdiff);
 		}
 	} else
 		post_zeroed = pre_zeroed;
 
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	/* Update the size of the huge allocation. */
+	huge_node_unset(ptr, node);
 	assert(extent_node_size_get(node) != usize);
 	extent_node_size_set(node, usize);
+	huge_node_reset(tsdn, ptr, node);
 	/* Update zeroed. */
 	extent_node_zeroed_set(node, post_zeroed);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
-	arena_chunk_ralloc_huge_similar(arena, ptr, oldsize, usize);
+	arena_chunk_ralloc_huge_similar(tsdn, arena, ptr, oldsize, usize);
 
 	/* Fill if necessary (growing). */
 	if (oldsize < usize) {
@@ -174,14 +189,15 @@
 				    usize - oldsize);
 			}
 		} else if (config_fill && unlikely(opt_junk_alloc)) {
-			memset((void *)((uintptr_t)ptr + oldsize), 0xa5, usize -
-			    oldsize);
+			memset((void *)((uintptr_t)ptr + oldsize),
+			    JEMALLOC_ALLOC_JUNK, usize - oldsize);
 		}
 	}
 }
 
 static bool
-huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize)
+huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize,
+    size_t usize)
 {
 	extent_node_t *node;
 	arena_t *arena;
@@ -192,7 +208,7 @@
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
 	pre_zeroed = extent_node_zeroed_get(node);
-	chunk_hooks = chunk_hooks_get(arena);
+	chunk_hooks = chunk_hooks_get(tsdn, arena);
 
 	assert(oldsize > usize);
 
@@ -205,42 +221,45 @@
 	if (oldsize > usize) {
 		size_t sdiff = oldsize - usize;
 		if (config_fill && unlikely(opt_junk_free)) {
-			huge_dalloc_junk((void *)((uintptr_t)ptr + usize),
+			huge_dalloc_junk(tsdn, (void *)((uintptr_t)ptr + usize),
 			    sdiff);
 			post_zeroed = false;
 		} else {
-			post_zeroed = !chunk_purge_wrapper(arena, &chunk_hooks,
-			    CHUNK_ADDR2BASE((uintptr_t)ptr + usize),
-			    CHUNK_CEILING(oldsize),
+			post_zeroed = !chunk_purge_wrapper(tsdn, arena,
+			    &chunk_hooks, CHUNK_ADDR2BASE((uintptr_t)ptr +
+			    usize), CHUNK_CEILING(oldsize),
 			    CHUNK_ADDR2OFFSET((uintptr_t)ptr + usize), sdiff);
 		}
 	} else
 		post_zeroed = pre_zeroed;
 
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	/* Update the size of the huge allocation. */
+	huge_node_unset(ptr, node);
 	extent_node_size_set(node, usize);
+	huge_node_reset(tsdn, ptr, node);
 	/* Update zeroed. */
 	extent_node_zeroed_set(node, post_zeroed);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	/* Zap the excess chunks. */
-	arena_chunk_ralloc_huge_shrink(arena, ptr, oldsize, usize);
+	arena_chunk_ralloc_huge_shrink(tsdn, arena, ptr, oldsize, usize);
 
 	return (false);
 }
 
 static bool
-huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t usize, bool zero) {
+huge_ralloc_no_move_expand(tsdn_t *tsdn, void *ptr, size_t oldsize,
+    size_t usize, bool zero) {
 	extent_node_t *node;
 	arena_t *arena;
 	bool is_zeroed_subchunk, is_zeroed_chunk;
 
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	is_zeroed_subchunk = extent_node_zeroed_get(node);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	/*
 	 * Copy zero into is_zeroed_chunk and pass the copy to chunk_alloc(), so
@@ -248,14 +267,16 @@
 	 */
 	is_zeroed_chunk = zero;
 
-	if (arena_chunk_ralloc_huge_expand(arena, ptr, oldsize, usize,
+	if (arena_chunk_ralloc_huge_expand(tsdn, arena, ptr, oldsize, usize,
 	     &is_zeroed_chunk))
 		return (true);
 
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	/* Update the size of the huge allocation. */
+	huge_node_unset(ptr, node);
 	extent_node_size_set(node, usize);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	huge_node_reset(tsdn, ptr, node);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	if (zero || (config_fill && unlikely(opt_zero))) {
 		if (!is_zeroed_subchunk) {
@@ -268,15 +289,15 @@
 			    CHUNK_CEILING(oldsize));
 		}
 	} else if (config_fill && unlikely(opt_junk_alloc)) {
-		memset((void *)((uintptr_t)ptr + oldsize), 0xa5, usize -
-		    oldsize);
+		memset((void *)((uintptr_t)ptr + oldsize), JEMALLOC_ALLOC_JUNK,
+		    usize - oldsize);
 	}
 
 	return (false);
 }
 
 bool
-huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize_min,
+huge_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t usize_min,
     size_t usize_max, bool zero)
 {
 
@@ -290,16 +311,16 @@
 
 	if (CHUNK_CEILING(usize_max) > CHUNK_CEILING(oldsize)) {
 		/* Attempt to expand the allocation in-place. */
-		if (!huge_ralloc_no_move_expand(ptr, oldsize, usize_max,
+		if (!huge_ralloc_no_move_expand(tsdn, ptr, oldsize, usize_max,
 		    zero)) {
-			arena_decay_tick(tsd, huge_aalloc(ptr));
+			arena_decay_tick(tsdn, huge_aalloc(ptr));
 			return (false);
 		}
 		/* Try again, this time with usize_min. */
 		if (usize_min < usize_max && CHUNK_CEILING(usize_min) >
-		    CHUNK_CEILING(oldsize) && huge_ralloc_no_move_expand(ptr,
-		    oldsize, usize_min, zero)) {
-			arena_decay_tick(tsd, huge_aalloc(ptr));
+		    CHUNK_CEILING(oldsize) && huge_ralloc_no_move_expand(tsdn,
+		    ptr, oldsize, usize_min, zero)) {
+			arena_decay_tick(tsdn, huge_aalloc(ptr));
 			return (false);
 		}
 	}
@@ -310,16 +331,17 @@
 	 */
 	if (CHUNK_CEILING(oldsize) >= CHUNK_CEILING(usize_min)
 	    && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(usize_max)) {
-		huge_ralloc_no_move_similar(ptr, oldsize, usize_min, usize_max,
-		    zero);
-		arena_decay_tick(tsd, huge_aalloc(ptr));
+		huge_ralloc_no_move_similar(tsdn, ptr, oldsize, usize_min,
+		    usize_max, zero);
+		arena_decay_tick(tsdn, huge_aalloc(ptr));
 		return (false);
 	}
 
 	/* Attempt to shrink the allocation in-place. */
 	if (CHUNK_CEILING(oldsize) > CHUNK_CEILING(usize_max)) {
-		if (!huge_ralloc_no_move_shrink(ptr, oldsize, usize_max)) {
-			arena_decay_tick(tsd, huge_aalloc(ptr));
+		if (!huge_ralloc_no_move_shrink(tsdn, ptr, oldsize,
+		    usize_max)) {
+			arena_decay_tick(tsdn, huge_aalloc(ptr));
 			return (false);
 		}
 	}
@@ -327,18 +349,18 @@
 }
 
 static void *
-huge_ralloc_move_helper(tsd_t *tsd, arena_t *arena, size_t usize,
-    size_t alignment, bool zero, tcache_t *tcache)
+huge_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
+    size_t alignment, bool zero)
 {
 
 	if (alignment <= chunksize)
-		return (huge_malloc(tsd, arena, usize, zero, tcache));
-	return (huge_palloc(tsd, arena, usize, alignment, zero, tcache));
+		return (huge_malloc(tsdn, arena, usize, zero));
+	return (huge_palloc(tsdn, arena, usize, alignment, zero));
 }
 
 void *
-huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize,
-    size_t alignment, bool zero, tcache_t *tcache)
+huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
+    size_t usize, size_t alignment, bool zero, tcache_t *tcache)
 {
 	void *ret;
 	size_t copysize;
@@ -347,7 +369,8 @@
 	assert(usize > 0 && usize <= HUGE_MAXCLASS);
 
 	/* Try to avoid moving the allocation. */
-	if (!huge_ralloc_no_move(tsd, ptr, oldsize, usize, usize, zero))
+	if (!huge_ralloc_no_move(tsd_tsdn(tsd), ptr, oldsize, usize, usize,
+	    zero))
 		return (ptr);
 
 	/*
@@ -355,19 +378,19 @@
 	 * different size class.  In that case, fall back to allocating new
 	 * space and copying.
 	 */
-	ret = huge_ralloc_move_helper(tsd, arena, usize, alignment, zero,
-	    tcache);
+	ret = huge_ralloc_move_helper(tsd_tsdn(tsd), arena, usize, alignment,
+	    zero);
 	if (ret == NULL)
 		return (NULL);
 
 	copysize = (usize < oldsize) ? usize : oldsize;
 	memcpy(ret, ptr, copysize);
-	isqalloc(tsd, ptr, oldsize, tcache);
+	isqalloc(tsd, ptr, oldsize, tcache, true);
 	return (ret);
 }
 
 void
-huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache)
+huge_dalloc(tsdn_t *tsdn, void *ptr)
 {
 	extent_node_t *node;
 	arena_t *arena;
@@ -375,17 +398,17 @@
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
 	huge_node_unset(ptr, node);
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	ql_remove(&arena->huge, node, ql_link);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
-	huge_dalloc_junk(extent_node_addr_get(node),
+	huge_dalloc_junk(tsdn, extent_node_addr_get(node),
 	    extent_node_size_get(node));
-	arena_chunk_dalloc_huge(extent_node_arena_get(node),
+	arena_chunk_dalloc_huge(tsdn, extent_node_arena_get(node),
 	    extent_node_addr_get(node), extent_node_size_get(node));
-	idalloctm(tsd, node, tcache, true, true);
+	idalloctm(tsdn, node, NULL, true, true);
 
-	arena_decay_tick(tsd, arena);
+	arena_decay_tick(tsdn, arena);
 }
 
 arena_t *
@@ -396,7 +419,7 @@
 }
 
 size_t
-huge_salloc(const void *ptr)
+huge_salloc(tsdn_t *tsdn, const void *ptr)
 {
 	size_t size;
 	extent_node_t *node;
@@ -404,15 +427,15 @@
 
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	size = extent_node_size_get(node);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	return (size);
 }
 
 prof_tctx_t *
-huge_prof_tctx_get(const void *ptr)
+huge_prof_tctx_get(tsdn_t *tsdn, const void *ptr)
 {
 	prof_tctx_t *tctx;
 	extent_node_t *node;
@@ -420,29 +443,29 @@
 
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	tctx = extent_node_prof_tctx_get(node);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	return (tctx);
 }
 
 void
-huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx)
+huge_prof_tctx_set(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx)
 {
 	extent_node_t *node;
 	arena_t *arena;
 
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	extent_node_prof_tctx_set(node, tctx);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 }
 
 void
-huge_prof_tctx_reset(const void *ptr)
+huge_prof_tctx_reset(tsdn_t *tsdn, const void *ptr)
 {
 
-	huge_prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U);
+	huge_prof_tctx_set(tsdn, ptr, (prof_tctx_t *)(uintptr_t)1U);
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7120791..40eb2ea 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -60,7 +60,7 @@
 arena_t			**arenas;
 static unsigned		narenas_total; /* Use narenas_total_*(). */
 static arena_t		*a0; /* arenas[0]; read-only after initialization. */
-static unsigned		narenas_auto; /* Read-only after initialization. */
+unsigned		narenas_auto; /* Read-only after initialization. */
 
 typedef enum {
 	malloc_init_uninitialized	= 3,
@@ -70,10 +70,10 @@
 } malloc_init_t;
 static malloc_init_t	malloc_init_state = malloc_init_uninitialized;
 
-/* 0 should be the common case.  Set to true to trigger initialization. */
+/* False should be the common case.  Set to true to trigger initialization. */
 static bool	malloc_slow = true;
 
-/* When malloc_slow != 0, set the corresponding bits for sanity check. */
+/* When malloc_slow is true, set the corresponding bits for sanity check. */
 enum {
 	flag_opt_junk_alloc	= (1U),
 	flag_opt_junk_free	= (1U << 1),
@@ -212,7 +212,7 @@
 	 * really only matters early in the process creation, before any
 	 * separate thread normally starts doing anything. */
 	if (!init_lock_initialized)
-		malloc_mutex_init(&init_lock);
+		malloc_mutex_init(&init_lock, "init", WITNESS_RANK_INIT);
 	init_lock_initialized = true;
 }
 
@@ -307,7 +307,7 @@
 }
 
 /*
- * The a0*() functions are used instead of i[mcd]alloc() in situations that
+ * The a0*() functions are used instead of i{d,}alloc() in situations that
  * cannot tolerate TLS variable access.
  */
 
@@ -318,15 +318,15 @@
 	if (unlikely(malloc_init_a0()))
 		return (NULL);
 
-	return (iallocztm(NULL, size, size2index(size), zero, false,
-	    is_metadata, arena_get(0, false), true));
+	return (iallocztm(TSDN_NULL, size, size2index(size), zero, NULL,
+	    is_metadata, arena_get(TSDN_NULL, 0, true), true));
 }
 
 static void
 a0idalloc(void *ptr, bool is_metadata)
 {
 
-	idalloctm(NULL, ptr, false, is_metadata, true);
+	idalloctm(TSDN_NULL, ptr, false, is_metadata, true);
 }
 
 void *
@@ -413,7 +413,7 @@
 
 /* Create a new arena and insert it into the arenas array at index ind. */
 static arena_t *
-arena_init_locked(unsigned ind)
+arena_init_locked(tsdn_t *tsdn, unsigned ind)
 {
 	arena_t *arena;
 
@@ -427,39 +427,43 @@
 	 * Another thread may have already initialized arenas[ind] if it's an
 	 * auto arena.
 	 */
-	arena = arena_get(ind, false);
+	arena = arena_get(tsdn, ind, false);
 	if (arena != NULL) {
 		assert(ind < narenas_auto);
 		return (arena);
 	}
 
 	/* Actually initialize the arena. */
-	arena = arena_new(ind);
+	arena = arena_new(tsdn, ind);
 	arena_set(ind, arena);
 	return (arena);
 }
 
 arena_t *
-arena_init(unsigned ind)
+arena_init(tsdn_t *tsdn, unsigned ind)
 {
 	arena_t *arena;
 
-	malloc_mutex_lock(&arenas_lock);
-	arena = arena_init_locked(ind);
-	malloc_mutex_unlock(&arenas_lock);
+	malloc_mutex_lock(tsdn, &arenas_lock);
+	arena = arena_init_locked(tsdn, ind);
+	malloc_mutex_unlock(tsdn, &arenas_lock);
 	return (arena);
 }
 
 static void
-arena_bind(tsd_t *tsd, unsigned ind)
+arena_bind(tsd_t *tsd, unsigned ind, bool internal)
 {
 	arena_t *arena;
 
-	arena = arena_get(ind, false);
-	arena_nthreads_inc(arena);
+	arena = arena_get(tsd_tsdn(tsd), ind, false);
+	arena_nthreads_inc(arena, internal);
 
-	if (tsd_nominal(tsd))
-		tsd_arena_set(tsd, arena);
+	if (tsd_nominal(tsd)) {
+		if (internal)
+			tsd_iarena_set(tsd, arena);
+		else
+			tsd_arena_set(tsd, arena);
+	}
 }
 
 void
@@ -467,21 +471,24 @@
 {
 	arena_t *oldarena, *newarena;
 
-	oldarena = arena_get(oldind, false);
-	newarena = arena_get(newind, false);
-	arena_nthreads_dec(oldarena);
-	arena_nthreads_inc(newarena);
+	oldarena = arena_get(tsd_tsdn(tsd), oldind, false);
+	newarena = arena_get(tsd_tsdn(tsd), newind, false);
+	arena_nthreads_dec(oldarena, false);
+	arena_nthreads_inc(newarena, false);
 	tsd_arena_set(tsd, newarena);
 }
 
 static void
-arena_unbind(tsd_t *tsd, unsigned ind)
+arena_unbind(tsd_t *tsd, unsigned ind, bool internal)
 {
 	arena_t *arena;
 
-	arena = arena_get(ind, false);
-	arena_nthreads_dec(arena);
-	tsd_arena_set(tsd, NULL);
+	arena = arena_get(tsd_tsdn(tsd), ind, false);
+	arena_nthreads_dec(arena, internal);
+	if (internal)
+		tsd_iarena_set(tsd, NULL);
+	else
+		tsd_arena_set(tsd, NULL);
 }
 
 arena_tdata_t *
@@ -562,27 +569,41 @@
 
 /* Slow path, called only by arena_choose(). */
 arena_t *
-arena_choose_hard(tsd_t *tsd)
+arena_choose_hard(tsd_t *tsd, bool internal)
 {
-	arena_t *ret;
+	arena_t *ret JEMALLOC_CC_SILENCE_INIT(NULL);
 
 	if (narenas_auto > 1) {
-		unsigned i, choose, first_null;
+		unsigned i, j, choose[2], first_null;
 
-		choose = 0;
+		/*
+		 * Determine binding for both non-internal and internal
+		 * allocation.
+		 *
+		 *   choose[0]: For application allocation.
+		 *   choose[1]: For internal metadata allocation.
+		 */
+
+		for (j = 0; j < 2; j++)
+			choose[j] = 0;
+
 		first_null = narenas_auto;
-		malloc_mutex_lock(&arenas_lock);
-		assert(arena_get(0, false) != NULL);
+		malloc_mutex_lock(tsd_tsdn(tsd), &arenas_lock);
+		assert(arena_get(tsd_tsdn(tsd), 0, false) != NULL);
 		for (i = 1; i < narenas_auto; i++) {
-			if (arena_get(i, false) != NULL) {
+			if (arena_get(tsd_tsdn(tsd), i, false) != NULL) {
 				/*
 				 * Choose the first arena that has the lowest
 				 * number of threads assigned to it.
 				 */
-				if (arena_nthreads_get(arena_get(i, false)) <
-				    arena_nthreads_get(arena_get(choose,
-				    false)))
-					choose = i;
+				for (j = 0; j < 2; j++) {
+					if (arena_nthreads_get(arena_get(
+					    tsd_tsdn(tsd), i, false), !!j) <
+					    arena_nthreads_get(arena_get(
+					    tsd_tsdn(tsd), choose[j], false),
+					    !!j))
+						choose[j] = i;
+				}
 			} else if (first_null == narenas_auto) {
 				/*
 				 * Record the index of the first uninitialized
@@ -597,27 +618,40 @@
 			}
 		}
 
-		if (arena_nthreads_get(arena_get(choose, false)) == 0
-		    || first_null == narenas_auto) {
-			/*
-			 * Use an unloaded arena, or the least loaded arena if
-			 * all arenas are already initialized.
-			 */
-			ret = arena_get(choose, false);
-		} else {
-			/* Initialize a new arena. */
-			choose = first_null;
-			ret = arena_init_locked(choose);
-			if (ret == NULL) {
-				malloc_mutex_unlock(&arenas_lock);
-				return (NULL);
+		for (j = 0; j < 2; j++) {
+			if (arena_nthreads_get(arena_get(tsd_tsdn(tsd),
+			    choose[j], false), !!j) == 0 || first_null ==
+			    narenas_auto) {
+				/*
+				 * Use an unloaded arena, or the least loaded
+				 * arena if all arenas are already initialized.
+				 */
+				if (!!j == internal) {
+					ret = arena_get(tsd_tsdn(tsd),
+					    choose[j], false);
+				}
+			} else {
+				arena_t *arena;
+
+				/* Initialize a new arena. */
+				choose[j] = first_null;
+				arena = arena_init_locked(tsd_tsdn(tsd),
+				    choose[j]);
+				if (arena == NULL) {
+					malloc_mutex_unlock(tsd_tsdn(tsd),
+					    &arenas_lock);
+					return (NULL);
+				}
+				if (!!j == internal)
+					ret = arena;
 			}
+			arena_bind(tsd, choose[j], !!j);
 		}
-		arena_bind(tsd, choose);
-		malloc_mutex_unlock(&arenas_lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &arenas_lock);
 	} else {
-		ret = arena_get(0, false);
-		arena_bind(tsd, 0);
+		ret = arena_get(tsd_tsdn(tsd), 0, false);
+		arena_bind(tsd, 0, false);
+		arena_bind(tsd, 0, true);
 	}
 
 	return (ret);
@@ -638,13 +672,23 @@
 }
 
 void
+iarena_cleanup(tsd_t *tsd)
+{
+	arena_t *iarena;
+
+	iarena = tsd_iarena_get(tsd);
+	if (iarena != NULL)
+		arena_unbind(tsd, iarena->ind, true);
+}
+
+void
 arena_cleanup(tsd_t *tsd)
 {
 	arena_t *arena;
 
 	arena = tsd_arena_get(tsd);
 	if (arena != NULL)
-		arena_unbind(tsd, arena->ind);
+		arena_unbind(tsd, arena->ind, false);
 }
 
 void
@@ -681,8 +725,11 @@
 {
 
 	if (config_tcache && config_stats) {
+		tsdn_t *tsdn;
 		unsigned narenas, i;
 
+		tsdn = tsdn_fetch();
+
 		/*
 		 * Merge stats from extant threads.  This is racy, since
 		 * individual threads do not lock when recording tcache stats
@@ -691,7 +738,7 @@
 		 * continue to allocate.
 		 */
 		for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
-			arena_t *arena = arena_get(i, false);
+			arena_t *arena = arena_get(tsdn, i, false);
 			if (arena != NULL) {
 				tcache_t *tcache;
 
@@ -701,11 +748,11 @@
 				 * and bin locks in the opposite order,
 				 * deadlocks may result.
 				 */
-				malloc_mutex_lock(&arena->lock);
+				malloc_mutex_lock(tsdn, &arena->lock);
 				ql_foreach(tcache, &arena->tcache_ql, link) {
-					tcache_stats_merge(tcache, arena);
+					tcache_stats_merge(tsdn, tcache, arena);
 				}
-				malloc_mutex_unlock(&arena->lock);
+				malloc_mutex_unlock(tsdn, &arena->lock);
 			}
 		}
 	}
@@ -1056,7 +1103,8 @@
 				for (i = 0; i < dss_prec_limit; i++) {
 					if (strncmp(dss_prec_names[i], v, vlen)
 					    == 0) {
-						if (chunk_dss_prec_set(i)) {
+						if (chunk_dss_prec_set(NULL,
+						   i)) {
 							malloc_conf_error(
 							    "Error setting dss",
 							    k, klen, v, vlen);
@@ -1186,7 +1234,6 @@
 	}
 }
 
-/* init_lock must be held. */
 static bool
 malloc_init_hard_needed(void)
 {
@@ -1204,9 +1251,9 @@
 	if (malloc_initializer != NO_INITIALIZER && !IS_INITIALIZER) {
 		/* Busy-wait until the initializing thread completes. */
 		do {
-			malloc_mutex_unlock(&init_lock);
+			malloc_mutex_unlock(NULL, &init_lock);
 			CPU_SPINWAIT;
-			malloc_mutex_lock(&init_lock);
+			malloc_mutex_lock(NULL, &init_lock);
 		} while (!malloc_initialized());
 		return (false);
 	}
@@ -1214,9 +1261,8 @@
 	return (true);
 }
 
-/* init_lock must be held. */
 static bool
-malloc_init_hard_a0_locked(void)
+malloc_init_hard_a0_locked()
 {
 
 	malloc_initializer = INITIALIZER;
@@ -1232,6 +1278,7 @@
 				abort();
 		}
 	}
+	pages_boot();
 	if (base_boot())
 		return (true);
 	if (chunk_boot())
@@ -1242,9 +1289,9 @@
 		prof_boot1();
 	if (arena_boot())
 		return (true);
-	if (config_tcache && tcache_boot())
+	if (config_tcache && tcache_boot(TSDN_NULL))
 		return (true);
-	if (malloc_mutex_init(&arenas_lock))
+	if (malloc_mutex_init(&arenas_lock, "arenas", WITNESS_RANK_ARENAS))
 		return (true);
 	/*
 	 * Create enough scaffolding to allow recursive allocation in
@@ -1258,9 +1305,11 @@
 	 * Initialize one arena here.  The rest are lazily created in
 	 * arena_choose_hard().
 	 */
-	if (arena_init(0) == NULL)
+	if (arena_init(TSDN_NULL, 0) == NULL)
 		return (true);
+
 	malloc_init_state = malloc_init_a0_initialized;
+
 	return (false);
 }
 
@@ -1269,30 +1318,18 @@
 {
 	bool ret;
 
-	malloc_mutex_lock(&init_lock);
+	malloc_mutex_lock(TSDN_NULL, &init_lock);
 	ret = malloc_init_hard_a0_locked();
-	malloc_mutex_unlock(&init_lock);
+	malloc_mutex_unlock(TSDN_NULL, &init_lock);
 	return (ret);
 }
 
-/*
- * Initialize data structures which may trigger recursive allocation.
- *
- * init_lock must be held.
- */
+/* Initialize data structures which may trigger recursive allocation. */
 static bool
 malloc_init_hard_recursible(void)
 {
-	bool ret = false;
 
 	malloc_init_state = malloc_init_recursible;
-	malloc_mutex_unlock(&init_lock);
-
-	/* LinuxThreads' pthread_setspecific() allocates. */
-	if (malloc_tsd_boot0()) {
-		ret = true;
-		goto label_return;
-	}
 
 	ncpus = malloc_ncpus();
 
@@ -1301,24 +1338,21 @@
 	/* LinuxThreads' pthread_atfork() allocates. */
 	if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent,
 	    jemalloc_postfork_child) != 0) {
-		ret = true;
 		malloc_write("<jemalloc>: Error in pthread_atfork()\n");
 		if (opt_abort)
 			abort();
+		return (true);
 	}
 #endif
 
-label_return:
-	malloc_mutex_lock(&init_lock);
-	return (ret);
+	return (false);
 }
 
-/* init_lock must be held. */
 static bool
-malloc_init_hard_finish(void)
+malloc_init_hard_finish(tsdn_t *tsdn)
 {
 
-	if (mutex_boot())
+	if (malloc_mutex_boot())
 		return (true);
 
 	if (opt_narenas == 0) {
@@ -1343,7 +1377,7 @@
 	narenas_total_set(narenas_auto);
 
 	/* Allocate and initialize arenas. */
-	arenas = (arena_t **)base_alloc(sizeof(arena_t *) *
+	arenas = (arena_t **)base_alloc(tsdn, sizeof(arena_t *) *
 	    (MALLOCX_ARENA_MAX+1));
 	if (arenas == NULL)
 		return (true);
@@ -1359,38 +1393,43 @@
 static bool
 malloc_init_hard(void)
 {
+	tsd_t *tsd;
 
 #if defined(_WIN32) && _WIN32_WINNT < 0x0600
 	_init_init_lock();
 #endif
-	malloc_mutex_lock(&init_lock);
+	malloc_mutex_lock(TSDN_NULL, &init_lock);
 	if (!malloc_init_hard_needed()) {
-		malloc_mutex_unlock(&init_lock);
+		malloc_mutex_unlock(TSDN_NULL, &init_lock);
 		return (false);
 	}
 
 	if (malloc_init_state != malloc_init_a0_initialized &&
 	    malloc_init_hard_a0_locked()) {
-		malloc_mutex_unlock(&init_lock);
+		malloc_mutex_unlock(TSDN_NULL, &init_lock);
 		return (true);
 	}
 
-	if (malloc_init_hard_recursible()) {
-		malloc_mutex_unlock(&init_lock);
+	malloc_mutex_unlock(TSDN_NULL, &init_lock);
+	/* Recursive allocation relies on functional tsd. */
+	tsd = malloc_tsd_boot0();
+	if (tsd == NULL)
+		return (true);
+	if (malloc_init_hard_recursible())
+		return (true);
+	malloc_mutex_lock(tsd_tsdn(tsd), &init_lock);
+
+	if (config_prof && prof_boot2(tsd_tsdn(tsd))) {
+		malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
 		return (true);
 	}
 
-	if (config_prof && prof_boot2()) {
-		malloc_mutex_unlock(&init_lock);
+	if (malloc_init_hard_finish(tsd_tsdn(tsd))) {
+		malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
 		return (true);
 	}
 
-	if (malloc_init_hard_finish()) {
-		malloc_mutex_unlock(&init_lock);
-		return (true);
-	}
-
-	malloc_mutex_unlock(&init_lock);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
 	malloc_tsd_boot1();
 	return (false);
 }
@@ -1404,7 +1443,7 @@
  */
 
 static void *
-imalloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind,
+ialloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind, bool zero,
     prof_tctx_t *tctx, bool slow_path)
 {
 	void *p;
@@ -1413,44 +1452,58 @@
 		return (NULL);
 	if (usize <= SMALL_MAXCLASS) {
 		szind_t ind_large = size2index(LARGE_MINCLASS);
-		p = imalloc(tsd, LARGE_MINCLASS, ind_large, slow_path);
+		p = ialloc(tsd, LARGE_MINCLASS, ind_large, zero, slow_path);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(p, usize);
+		arena_prof_promoted(tsd_tsdn(tsd), p, usize);
 	} else
-		p = imalloc(tsd, usize, ind, slow_path);
+		p = ialloc(tsd, usize, ind, zero, slow_path);
 
 	return (p);
 }
 
 JEMALLOC_ALWAYS_INLINE_C void *
-imalloc_prof(tsd_t *tsd, size_t usize, szind_t ind, bool slow_path)
+ialloc_prof(tsd_t *tsd, size_t usize, szind_t ind, bool zero, bool slow_path)
 {
 	void *p;
 	prof_tctx_t *tctx;
 
 	tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
-		p = imalloc_prof_sample(tsd, usize, ind, tctx, slow_path);
+		p = ialloc_prof_sample(tsd, usize, ind, zero, tctx, slow_path);
 	else
-		p = imalloc(tsd, usize, ind, slow_path);
+		p = ialloc(tsd, usize, ind, zero, slow_path);
 	if (unlikely(p == NULL)) {
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
 	}
-	prof_malloc(p, usize, tctx);
+	prof_malloc(tsd_tsdn(tsd), p, usize, tctx);
 
 	return (p);
 }
 
+/*
+ * ialloc_body() is inlined so that fast and slow paths are generated separately
+ * with statically known slow_path.
+ *
+ * This function guarantees that *tsdn is non-NULL on success.
+ */
 JEMALLOC_ALWAYS_INLINE_C void *
-imalloc_body(size_t size, tsd_t **tsd, size_t *usize, bool slow_path)
+ialloc_body(size_t size, bool zero, tsdn_t **tsdn, size_t *usize,
+    bool slow_path)
 {
+	tsd_t *tsd;
 	szind_t ind;
 
-	if (slow_path && unlikely(malloc_init()))
+	if (slow_path && unlikely(malloc_init())) {
+		*tsdn = NULL;
 		return (NULL);
-	*tsd = tsd_fetch();
+	}
+
+	tsd = tsd_fetch();
+	*tsdn = tsd_tsdn(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
+
 	ind = size2index(size);
 	if (unlikely(ind >= NSIZES))
 		return (NULL);
@@ -1462,26 +1515,32 @@
 	}
 
 	if (config_prof && opt_prof)
-		return (imalloc_prof(*tsd, *usize, ind, slow_path));
+		return (ialloc_prof(tsd, *usize, ind, zero, slow_path));
 
-	return (imalloc(*tsd, size, ind, slow_path));
+	return (ialloc(tsd, size, ind, zero, slow_path));
 }
 
 JEMALLOC_ALWAYS_INLINE_C void
-imalloc_post_check(void *ret, tsd_t *tsd, size_t usize, bool slow_path)
+ialloc_post_check(void *ret, tsdn_t *tsdn, size_t usize, const char *func,
+    bool update_errno, bool slow_path)
 {
+
+	assert(!tsdn_null(tsdn) || ret == NULL);
+
 	if (unlikely(ret == NULL)) {
 		if (slow_path && config_xmalloc && unlikely(opt_xmalloc)) {
-			malloc_write("<jemalloc>: Error in malloc(): "
-			    "out of memory\n");
+			malloc_printf("<jemalloc>: Error in %s(): out of "
+			    "memory\n", func);
 			abort();
 		}
-		set_errno(ENOMEM);
+		if (update_errno)
+			set_errno(ENOMEM);
 	}
 	if (config_stats && likely(ret != NULL)) {
-		assert(usize == isalloc(ret, config_prof));
-		*tsd_thread_allocatedp_get(tsd) += usize;
+		assert(usize == isalloc(tsdn, ret, config_prof));
+		*tsd_thread_allocatedp_get(tsdn_tsd(tsdn)) += usize;
 	}
+	witness_assert_lockless(tsdn);
 }
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
@@ -1490,24 +1549,20 @@
 je_malloc(size_t size)
 {
 	void *ret;
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 
 	if (size == 0)
 		size = 1;
 
 	if (likely(!malloc_slow)) {
-		/*
-		 * imalloc_body() is inlined so that fast and slow paths are
-		 * generated separately with statically known slow_path.
-		 */
-		ret = imalloc_body(size, &tsd, &usize, false);
-		imalloc_post_check(ret, tsd, usize, false);
+		ret = ialloc_body(size, false, &tsdn, &usize, false);
+		ialloc_post_check(ret, tsdn, usize, "malloc", true, false);
 	} else {
-		ret = imalloc_body(size, &tsd, &usize, true);
-		imalloc_post_check(ret, tsd, usize, true);
+		ret = ialloc_body(size, false, &tsdn, &usize, true);
+		ialloc_post_check(ret, tsdn, usize, "malloc", true, true);
 		UTRACE(0, size, ret);
-		JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, false);
+		JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsdn, ret, usize, false);
 	}
 
 	return (ret);
@@ -1526,7 +1581,7 @@
 		p = ipalloc(tsd, LARGE_MINCLASS, alignment, false);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(p, usize);
+		arena_prof_promoted(tsd_tsdn(tsd), p, usize);
 	} else
 		p = ipalloc(tsd, usize, alignment, false);
 
@@ -1548,7 +1603,7 @@
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
 	}
-	prof_malloc(p, usize, tctx);
+	prof_malloc(tsd_tsdn(tsd), p, usize, tctx);
 
 	return (p);
 }
@@ -1565,10 +1620,12 @@
 	assert(min_alignment != 0);
 
 	if (unlikely(malloc_init())) {
+		tsd = NULL;
 		result = NULL;
 		goto label_oom;
 	}
 	tsd = tsd_fetch();
+	witness_assert_lockless(tsd_tsdn(tsd));
 	if (size == 0)
 		size = 1;
 
@@ -1603,10 +1660,13 @@
 	ret = 0;
 label_return:
 	if (config_stats && likely(result != NULL)) {
-		assert(usize == isalloc(result, config_prof));
+		assert(usize == isalloc(tsd_tsdn(tsd), result, config_prof));
 		*tsd_thread_allocatedp_get(tsd) += usize;
 	}
 	UTRACE(0, size, result);
+	JEMALLOC_VALGRIND_MALLOC(result != NULL, tsd_tsdn(tsd), result, usize,
+	    false);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	return (ret);
 label_oom:
 	assert(result == NULL);
@@ -1616,6 +1676,7 @@
 		abort();
 	}
 	ret = ENOMEM;
+	witness_assert_lockless(tsd_tsdn(tsd));
 	goto label_return;
 }
 
@@ -1623,9 +1684,10 @@
 JEMALLOC_ATTR(nonnull(1))
 je_posix_memalign(void **memptr, size_t alignment, size_t size)
 {
-	int ret = imemalign(memptr, alignment, size, sizeof(void *));
-	JEMALLOC_VALGRIND_MALLOC(ret == 0, *memptr, isalloc(*memptr,
-	    config_prof), false);
+	int ret;
+
+	ret = imemalign(memptr, alignment, size, sizeof(void *));
+
 	return (ret);
 }
 
@@ -1641,117 +1703,45 @@
 		ret = NULL;
 		set_errno(err);
 	}
-	JEMALLOC_VALGRIND_MALLOC(err == 0, ret, isalloc(ret, config_prof),
-	    false);
+
 	return (ret);
 }
 
-static void *
-icalloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind, prof_tctx_t *tctx)
-{
-	void *p;
-
-	if (tctx == NULL)
-		return (NULL);
-	if (usize <= SMALL_MAXCLASS) {
-		szind_t ind_large = size2index(LARGE_MINCLASS);
-		p = icalloc(tsd, LARGE_MINCLASS, ind_large);
-		if (p == NULL)
-			return (NULL);
-		arena_prof_promoted(p, usize);
-	} else
-		p = icalloc(tsd, usize, ind);
-
-	return (p);
-}
-
-JEMALLOC_ALWAYS_INLINE_C void *
-icalloc_prof(tsd_t *tsd, size_t usize, szind_t ind)
-{
-	void *p;
-	prof_tctx_t *tctx;
-
-	tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true);
-	if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
-		p = icalloc_prof_sample(tsd, usize, ind, tctx);
-	else
-		p = icalloc(tsd, usize, ind);
-	if (unlikely(p == NULL)) {
-		prof_alloc_rollback(tsd, tctx, true);
-		return (NULL);
-	}
-	prof_malloc(p, usize, tctx);
-
-	return (p);
-}
-
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2)
 je_calloc(size_t num, size_t size)
 {
 	void *ret;
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 	size_t num_size;
-	szind_t ind;
 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 
-	if (unlikely(malloc_init())) {
-		num_size = 0;
-		ret = NULL;
-		goto label_return;
-	}
-	tsd = tsd_fetch();
-
 	num_size = num * size;
 	if (unlikely(num_size == 0)) {
 		if (num == 0 || size == 0)
 			num_size = 1;
-		else {
-			ret = NULL;
-			goto label_return;
-		}
+		else
+			num_size = HUGE_MAXCLASS + 1; /* Trigger OOM. */
 	/*
 	 * Try to avoid division here.  We know that it isn't possible to
 	 * overflow during multiplication if neither operand uses any of the
 	 * most significant half of the bits in a size_t.
 	 */
 	} else if (unlikely(((num | size) & (SIZE_T_MAX << (sizeof(size_t) <<
-	    2))) && (num_size / size != num))) {
-		/* size_t overflow. */
-		ret = NULL;
-		goto label_return;
-	}
+	    2))) && (num_size / size != num)))
+		num_size = HUGE_MAXCLASS + 1; /* size_t overflow. */
 
-	ind = size2index(num_size);
-	if (unlikely(ind >= NSIZES)) {
-		ret = NULL;
-		goto label_return;
-	}
-	if (config_prof && opt_prof) {
-		usize = index2size(ind);
-		ret = icalloc_prof(tsd, usize, ind);
+	if (likely(!malloc_slow)) {
+		ret = ialloc_body(num_size, true, &tsdn, &usize, false);
+		ialloc_post_check(ret, tsdn, usize, "calloc", true, false);
 	} else {
-		if (config_stats || (config_valgrind && unlikely(in_valgrind)))
-			usize = index2size(ind);
-		ret = icalloc(tsd, num_size, ind);
+		ret = ialloc_body(num_size, true, &tsdn, &usize, true);
+		ialloc_post_check(ret, tsdn, usize, "calloc", true, true);
+		UTRACE(0, num_size, ret);
+		JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsdn, ret, usize, false);
 	}
 
-label_return:
-	if (unlikely(ret == NULL)) {
-		if (config_xmalloc && unlikely(opt_xmalloc)) {
-			malloc_write("<jemalloc>: Error in calloc(): out of "
-			    "memory\n");
-			abort();
-		}
-		set_errno(ENOMEM);
-	}
-	if (config_stats && likely(ret != NULL)) {
-		assert(usize == isalloc(ret, config_prof));
-		*tsd_thread_allocatedp_get(tsd) += usize;
-	}
-	UTRACE(0, num_size, ret);
-	JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, true);
 	return (ret);
 }
 
@@ -1767,7 +1757,7 @@
 		p = iralloc(tsd, old_ptr, old_usize, LARGE_MINCLASS, 0, false);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(p, usize);
+		arena_prof_promoted(tsd_tsdn(tsd), p, usize);
 	} else
 		p = iralloc(tsd, old_ptr, old_usize, usize, 0, false);
 
@@ -1782,7 +1772,7 @@
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(old_ptr);
+	old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr);
 	tctx = prof_alloc_prep(tsd, usize, prof_active, true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
 		p = irealloc_prof_sample(tsd, old_ptr, old_usize, usize, tctx);
@@ -1804,14 +1794,16 @@
 	size_t usize;
 	UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
 
+	witness_assert_lockless(tsd_tsdn(tsd));
+
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	if (config_prof && opt_prof) {
-		usize = isalloc(ptr, config_prof);
+		usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
 		prof_free(tsd, ptr, usize);
 	} else if (config_stats || config_valgrind)
-		usize = isalloc(ptr, config_prof);
+		usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
 	if (config_stats)
 		*tsd_thread_deallocatedp_get(tsd) += usize;
 
@@ -1819,17 +1811,19 @@
 		iqalloc(tsd, ptr, tcache, false);
 	else {
 		if (config_valgrind && unlikely(in_valgrind))
-			rzsize = p2rz(ptr);
+			rzsize = p2rz(tsd_tsdn(tsd), ptr);
 		iqalloc(tsd, ptr, tcache, true);
 		JEMALLOC_VALGRIND_FREE(ptr, rzsize);
 	}
 }
 
 JEMALLOC_INLINE_C void
-isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache)
+isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path)
 {
 	UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
 
+	witness_assert_lockless(tsd_tsdn(tsd));
+
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
@@ -1838,8 +1832,8 @@
 	if (config_stats)
 		*tsd_thread_deallocatedp_get(tsd) += usize;
 	if (config_valgrind && unlikely(in_valgrind))
-		rzsize = p2rz(ptr);
-	isqalloc(tsd, ptr, usize, tcache);
+		rzsize = p2rz(tsd_tsdn(tsd), ptr);
+	isqalloc(tsd, ptr, usize, tcache, slow_path);
 	JEMALLOC_VALGRIND_FREE(ptr, rzsize);
 }
 
@@ -1849,13 +1843,15 @@
 je_realloc(void *ptr, size_t size)
 {
 	void *ret;
-	tsd_t *tsd JEMALLOC_CC_SILENCE_INIT(NULL);
+	tsdn_t *tsdn JEMALLOC_CC_SILENCE_INIT(NULL);
 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 	size_t old_usize = 0;
 	UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0);
 
 	if (unlikely(size == 0)) {
 		if (ptr != NULL) {
+			tsd_t *tsd;
+
 			/* realloc(ptr, 0) is equivalent to free(ptr). */
 			UTRACE(ptr, 0, 0);
 			tsd = tsd_fetch();
@@ -1866,13 +1862,19 @@
 	}
 
 	if (likely(ptr != NULL)) {
+		tsd_t *tsd;
+
 		assert(malloc_initialized() || IS_INITIALIZER);
 		malloc_thread_init();
 		tsd = tsd_fetch();
 
-		old_usize = isalloc(ptr, config_prof);
-		if (config_valgrind && unlikely(in_valgrind))
-			old_rzsize = config_prof ? p2rz(ptr) : u2rz(old_usize);
+		witness_assert_lockless(tsd_tsdn(tsd));
+
+		old_usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
+		if (config_valgrind && unlikely(in_valgrind)) {
+			old_rzsize = config_prof ? p2rz(tsd_tsdn(tsd), ptr) :
+			    u2rz(old_usize);
+		}
 
 		if (config_prof && opt_prof) {
 			usize = s2u(size);
@@ -1884,12 +1886,14 @@
 				usize = s2u(size);
 			ret = iralloc(tsd, ptr, old_usize, size, 0, false);
 		}
+		tsdn = tsd_tsdn(tsd);
 	} else {
 		/* realloc(NULL, size) is equivalent to malloc(size). */
 		if (likely(!malloc_slow))
-			ret = imalloc_body(size, &tsd, &usize, false);
+			ret = ialloc_body(size, false, &tsdn, &usize, false);
 		else
-			ret = imalloc_body(size, &tsd, &usize, true);
+			ret = ialloc_body(size, false, &tsdn, &usize, true);
+		assert(!tsdn_null(tsdn) || ret == NULL);
 	}
 
 	if (unlikely(ret == NULL)) {
@@ -1901,13 +1905,17 @@
 		set_errno(ENOMEM);
 	}
 	if (config_stats && likely(ret != NULL)) {
-		assert(usize == isalloc(ret, config_prof));
+		tsd_t *tsd;
+
+		assert(usize == isalloc(tsdn, ret, config_prof));
+		tsd = tsdn_tsd(tsdn);
 		*tsd_thread_allocatedp_get(tsd) += usize;
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
 	}
 	UTRACE(ptr, size, ret);
-	JEMALLOC_VALGRIND_REALLOC(true, ret, usize, true, ptr, old_usize,
+	JEMALLOC_VALGRIND_REALLOC(true, tsdn, ret, usize, true, ptr, old_usize,
 	    old_rzsize, true, false);
+	witness_assert_lockless(tsdn);
 	return (ret);
 }
 
@@ -1918,10 +1926,12 @@
 	UTRACE(ptr, 0, 0);
 	if (likely(ptr != NULL)) {
 		tsd_t *tsd = tsd_fetch();
+		witness_assert_lockless(tsd_tsdn(tsd));
 		if (likely(!malloc_slow))
 			ifree(tsd, ptr, tcache_get(tsd, false), false);
 		else
 			ifree(tsd, ptr, tcache_get(tsd, false), true);
+		witness_assert_lockless(tsd_tsdn(tsd));
 	}
 }
 
@@ -1942,7 +1952,6 @@
 	void *ret JEMALLOC_CC_SILENCE_INIT(NULL);
 	if (unlikely(imemalign(&ret, alignment, size, 1) != 0))
 		ret = NULL;
-	JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, size, false);
 	return (ret);
 }
 #endif
@@ -1956,7 +1965,6 @@
 	void *ret JEMALLOC_CC_SILENCE_INIT(NULL);
 	if (unlikely(imemalign(&ret, PAGE, size, 1) != 0))
 		ret = NULL;
-	JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, size, false);
 	return (ret);
 }
 #endif
@@ -1997,7 +2005,7 @@
  */
 
 JEMALLOC_ALWAYS_INLINE_C bool
-imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize,
+imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize,
     size_t *alignment, bool *zero, tcache_t **tcache, arena_t **arena)
 {
 
@@ -2020,7 +2028,7 @@
 		*tcache = tcache_get(tsd, true);
 	if ((flags & MALLOCX_ARENA_MASK) != 0) {
 		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
-		*arena = arena_get(arena_ind, true);
+		*arena = arena_get(tsd_tsdn(tsd), arena_ind, true);
 		if (unlikely(*arena == NULL))
 			return (true);
 	} else
@@ -2028,63 +2036,44 @@
 	return (false);
 }
 
-JEMALLOC_ALWAYS_INLINE_C bool
-imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize,
-    size_t *alignment, bool *zero, tcache_t **tcache, arena_t **arena)
-{
-
-	if (likely(flags == 0)) {
-		*usize = s2u(size);
-		if (unlikely(*usize == 0 || *usize > HUGE_MAXCLASS))
-			return (true);
-		*alignment = 0;
-		*zero = false;
-		*tcache = tcache_get(tsd, true);
-		*arena = NULL;
-		return (false);
-	} else {
-		return (imallocx_flags_decode_hard(tsd, size, flags, usize,
-		    alignment, zero, tcache, arena));
-	}
-}
-
 JEMALLOC_ALWAYS_INLINE_C void *
-imallocx_flags(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
-    tcache_t *tcache, arena_t *arena)
+imallocx_flags(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
+    tcache_t *tcache, arena_t *arena, bool slow_path)
 {
 	szind_t ind;
 
 	if (unlikely(alignment != 0))
-		return (ipalloct(tsd, usize, alignment, zero, tcache, arena));
+		return (ipalloct(tsdn, usize, alignment, zero, tcache, arena));
 	ind = size2index(usize);
 	assert(ind < NSIZES);
-	if (unlikely(zero))
-		return (icalloct(tsd, usize, ind, tcache, arena));
-	return (imalloct(tsd, usize, ind, tcache, arena));
+	return (iallocztm(tsdn, usize, ind, zero, tcache, false, arena,
+	    slow_path));
 }
 
 static void *
-imallocx_prof_sample(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
-    tcache_t *tcache, arena_t *arena)
+imallocx_prof_sample(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
+    tcache_t *tcache, arena_t *arena, bool slow_path)
 {
 	void *p;
 
 	if (usize <= SMALL_MAXCLASS) {
 		assert(((alignment == 0) ? s2u(LARGE_MINCLASS) :
 		    sa2u(LARGE_MINCLASS, alignment)) == LARGE_MINCLASS);
-		p = imallocx_flags(tsd, LARGE_MINCLASS, alignment, zero, tcache,
-		    arena);
+		p = imallocx_flags(tsdn, LARGE_MINCLASS, alignment, zero,
+		    tcache, arena, slow_path);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(p, usize);
-	} else
-		p = imallocx_flags(tsd, usize, alignment, zero, tcache, arena);
+		arena_prof_promoted(tsdn, p, usize);
+	} else {
+		p = imallocx_flags(tsdn, usize, alignment, zero, tcache, arena,
+		    slow_path);
+	}
 
 	return (p);
 }
 
 JEMALLOC_ALWAYS_INLINE_C void *
-imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize)
+imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize, bool slow_path)
 {
 	void *p;
 	size_t alignment;
@@ -2097,25 +2086,27 @@
 	    &zero, &tcache, &arena)))
 		return (NULL);
 	tctx = prof_alloc_prep(tsd, *usize, prof_active_get_unlocked(), true);
-	if (likely((uintptr_t)tctx == (uintptr_t)1U))
-		p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena);
-	else if ((uintptr_t)tctx > (uintptr_t)1U) {
-		p = imallocx_prof_sample(tsd, *usize, alignment, zero, tcache,
-		    arena);
+	if (likely((uintptr_t)tctx == (uintptr_t)1U)) {
+		p = imallocx_flags(tsd_tsdn(tsd), *usize, alignment, zero,
+		    tcache, arena, slow_path);
+	} else if ((uintptr_t)tctx > (uintptr_t)1U) {
+		p = imallocx_prof_sample(tsd_tsdn(tsd), *usize, alignment, zero,
+		    tcache, arena, slow_path);
 	} else
 		p = NULL;
 	if (unlikely(p == NULL)) {
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
 	}
-	prof_malloc(p, *usize, tctx);
+	prof_malloc(tsd_tsdn(tsd), p, *usize, tctx);
 
 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
 	return (p);
 }
 
 JEMALLOC_ALWAYS_INLINE_C void *
-imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize)
+imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize,
+    bool slow_path)
 {
 	void *p;
 	size_t alignment;
@@ -2123,24 +2114,53 @@
 	tcache_t *tcache;
 	arena_t *arena;
 
+	if (unlikely(imallocx_flags_decode(tsd, size, flags, usize, &alignment,
+	    &zero, &tcache, &arena)))
+		return (NULL);
+	p = imallocx_flags(tsd_tsdn(tsd), *usize, alignment, zero, tcache,
+	    arena, slow_path);
+	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
+	return (p);
+}
+
+/* This function guarantees that *tsdn is non-NULL on success. */
+JEMALLOC_ALWAYS_INLINE_C void *
+imallocx_body(size_t size, int flags, tsdn_t **tsdn, size_t *usize,
+    bool slow_path)
+{
+	tsd_t *tsd;
+
+	if (slow_path && unlikely(malloc_init())) {
+		*tsdn = NULL;
+		return (NULL);
+	}
+
+	tsd = tsd_fetch();
+	*tsdn = tsd_tsdn(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
+
 	if (likely(flags == 0)) {
 		szind_t ind = size2index(size);
 		if (unlikely(ind >= NSIZES))
 			return (NULL);
-		if (config_stats || (config_valgrind &&
-		    unlikely(in_valgrind))) {
+		if (config_stats || (config_prof && opt_prof) || (slow_path &&
+		    config_valgrind && unlikely(in_valgrind))) {
 			*usize = index2size(ind);
 			assert(*usize > 0 && *usize <= HUGE_MAXCLASS);
 		}
-		return (imalloc(tsd, size, ind, true));
+
+		if (config_prof && opt_prof) {
+			return (ialloc_prof(tsd, *usize, ind, false,
+			    slow_path));
+		}
+
+		return (ialloc(tsd, size, ind, false, slow_path));
 	}
 
-	if (unlikely(imallocx_flags_decode_hard(tsd, size, flags, usize,
-	    &alignment, &zero, &tcache, &arena)))
-		return (NULL);
-	p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena);
-	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
-	return (p);
+	if (config_prof && opt_prof)
+		return (imallocx_prof(tsd, size, flags, usize, slow_path));
+
+	return (imallocx_no_prof(tsd, size, flags, usize, slow_path));
 }
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
@@ -2148,37 +2168,24 @@
 JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
 je_mallocx(size_t size, int flags)
 {
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 	void *p;
 	size_t usize;
 
 	assert(size != 0);
 
-	if (unlikely(malloc_init()))
-		goto label_oom;
-	tsd = tsd_fetch();
-
-	if (config_prof && opt_prof)
-		p = imallocx_prof(tsd, size, flags, &usize);
-	else
-		p = imallocx_no_prof(tsd, size, flags, &usize);
-	if (unlikely(p == NULL))
-		goto label_oom;
-
-	if (config_stats) {
-		assert(usize == isalloc(p, config_prof));
-		*tsd_thread_allocatedp_get(tsd) += usize;
+	if (likely(!malloc_slow)) {
+		p = imallocx_body(size, flags, &tsdn, &usize, false);
+		ialloc_post_check(p, tsdn, usize, "mallocx", false, false);
+	} else {
+		p = imallocx_body(size, flags, &tsdn, &usize, true);
+		ialloc_post_check(p, tsdn, usize, "mallocx", false, true);
+		UTRACE(0, size, p);
+		JEMALLOC_VALGRIND_MALLOC(p != NULL, tsdn, p, usize,
+		    MALLOCX_ZERO_GET(flags));
 	}
-	UTRACE(0, size, p);
-	JEMALLOC_VALGRIND_MALLOC(true, p, usize, MALLOCX_ZERO_GET(flags));
+
 	return (p);
-label_oom:
-	if (config_xmalloc && unlikely(opt_xmalloc)) {
-		malloc_write("<jemalloc>: Error in mallocx(): out of memory\n");
-		abort();
-	}
-	UTRACE(0, size, 0);
-	return (NULL);
 }
 
 static void *
@@ -2195,7 +2202,7 @@
 		    zero, tcache, arena);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(p, usize);
+		arena_prof_promoted(tsd_tsdn(tsd), p, usize);
 	} else {
 		p = iralloct(tsd, old_ptr, old_usize, usize, alignment, zero,
 		    tcache, arena);
@@ -2214,7 +2221,7 @@
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(old_ptr);
+	old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr);
 	tctx = prof_alloc_prep(tsd, *usize, prof_active, true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
 		p = irallocx_prof_sample(tsd, old_ptr, old_usize, *usize,
@@ -2237,7 +2244,7 @@
 		 * be the same as the current usize because of in-place large
 		 * reallocation.  Therefore, query the actual value of usize.
 		 */
-		*usize = isalloc(p, config_prof);
+		*usize = isalloc(tsd_tsdn(tsd), p, config_prof);
 	}
 	prof_realloc(tsd, p, *usize, tctx, prof_active, true, old_ptr,
 	    old_usize, old_tctx);
@@ -2265,10 +2272,11 @@
 	assert(malloc_initialized() || IS_INITIALIZER);
 	malloc_thread_init();
 	tsd = tsd_fetch();
+	witness_assert_lockless(tsd_tsdn(tsd));
 
 	if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) {
 		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
-		arena = arena_get(arena_ind, true);
+		arena = arena_get(tsd_tsdn(tsd), arena_ind, true);
 		if (unlikely(arena == NULL))
 			goto label_oom;
 	} else
@@ -2282,7 +2290,7 @@
 	} else
 		tcache = tcache_get(tsd, true);
 
-	old_usize = isalloc(ptr, config_prof);
+	old_usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
 	if (config_valgrind && unlikely(in_valgrind))
 		old_rzsize = u2rz(old_usize);
 
@@ -2300,7 +2308,7 @@
 		if (unlikely(p == NULL))
 			goto label_oom;
 		if (config_stats || (config_valgrind && unlikely(in_valgrind)))
-			usize = isalloc(p, config_prof);
+			usize = isalloc(tsd_tsdn(tsd), p, config_prof);
 	}
 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
 
@@ -2309,8 +2317,9 @@
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
 	}
 	UTRACE(ptr, size, p);
-	JEMALLOC_VALGRIND_REALLOC(true, p, usize, false, ptr, old_usize,
-	    old_rzsize, false, zero);
+	JEMALLOC_VALGRIND_REALLOC(true, tsd_tsdn(tsd), p, usize, false, ptr,
+	    old_usize, old_rzsize, false, zero);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	return (p);
 label_oom:
 	if (config_xmalloc && unlikely(opt_xmalloc)) {
@@ -2318,31 +2327,32 @@
 		abort();
 	}
 	UTRACE(ptr, size, 0);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	return (NULL);
 }
 
 JEMALLOC_ALWAYS_INLINE_C size_t
-ixallocx_helper(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
+ixallocx_helper(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
     size_t extra, size_t alignment, bool zero)
 {
 	size_t usize;
 
-	if (ixalloc(tsd, ptr, old_usize, size, extra, alignment, zero))
+	if (ixalloc(tsdn, ptr, old_usize, size, extra, alignment, zero))
 		return (old_usize);
-	usize = isalloc(ptr, config_prof);
+	usize = isalloc(tsdn, ptr, config_prof);
 
 	return (usize);
 }
 
 static size_t
-ixallocx_prof_sample(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
+ixallocx_prof_sample(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
     size_t extra, size_t alignment, bool zero, prof_tctx_t *tctx)
 {
 	size_t usize;
 
 	if (tctx == NULL)
 		return (old_usize);
-	usize = ixallocx_helper(tsd, ptr, old_usize, size, extra, alignment,
+	usize = ixallocx_helper(tsdn, ptr, old_usize, size, extra, alignment,
 	    zero);
 
 	return (usize);
@@ -2357,7 +2367,7 @@
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(ptr);
+	old_tctx = prof_tctx_get(tsd_tsdn(tsd), ptr);
 	/*
 	 * usize isn't knowable before ixalloc() returns when extra is non-zero.
 	 * Therefore, compute its maximum possible value and use that in
@@ -2382,11 +2392,11 @@
 	tctx = prof_alloc_prep(tsd, usize_max, prof_active, false);
 
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
-		usize = ixallocx_prof_sample(tsd, ptr, old_usize, size, extra,
-		    alignment, zero, tctx);
+		usize = ixallocx_prof_sample(tsd_tsdn(tsd), ptr, old_usize,
+		    size, extra, alignment, zero, tctx);
 	} else {
-		usize = ixallocx_helper(tsd, ptr, old_usize, size, extra,
-		    alignment, zero);
+		usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size,
+		    extra, alignment, zero);
 	}
 	if (usize == old_usize) {
 		prof_alloc_rollback(tsd, tctx, false);
@@ -2413,8 +2423,9 @@
 	assert(malloc_initialized() || IS_INITIALIZER);
 	malloc_thread_init();
 	tsd = tsd_fetch();
+	witness_assert_lockless(tsd_tsdn(tsd));
 
-	old_usize = isalloc(ptr, config_prof);
+	old_usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
 
 	/*
 	 * The API explicitly absolves itself of protecting against (size +
@@ -2439,8 +2450,8 @@
 		usize = ixallocx_prof(tsd, ptr, old_usize, size, extra,
 		    alignment, zero);
 	} else {
-		usize = ixallocx_helper(tsd, ptr, old_usize, size, extra,
-		    alignment, zero);
+		usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size,
+		    extra, alignment, zero);
 	}
 	if (unlikely(usize == old_usize))
 		goto label_not_resized;
@@ -2449,10 +2460,11 @@
 		*tsd_thread_allocatedp_get(tsd) += usize;
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
 	}
-	JEMALLOC_VALGRIND_REALLOC(false, ptr, usize, false, ptr, old_usize,
-	    old_rzsize, false, zero);
+	JEMALLOC_VALGRIND_REALLOC(false, tsd_tsdn(tsd), ptr, usize, false, ptr,
+	    old_usize, old_rzsize, false, zero);
 label_not_resized:
 	UTRACE(ptr, size, ptr);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	return (usize);
 }
 
@@ -2461,15 +2473,20 @@
 je_sallocx(const void *ptr, int flags)
 {
 	size_t usize;
+	tsdn_t *tsdn;
 
 	assert(malloc_initialized() || IS_INITIALIZER);
 	malloc_thread_init();
 
-	if (config_ivsalloc)
-		usize = ivsalloc(ptr, config_prof);
-	else
-		usize = isalloc(ptr, config_prof);
+	tsdn = tsdn_fetch();
+	witness_assert_lockless(tsdn);
 
+	if (config_ivsalloc)
+		usize = ivsalloc(tsdn, ptr, config_prof);
+	else
+		usize = isalloc(tsdn, ptr, config_prof);
+
+	witness_assert_lockless(tsdn);
 	return (usize);
 }
 
@@ -2483,6 +2500,7 @@
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	tsd = tsd_fetch();
+	witness_assert_lockless(tsd_tsdn(tsd));
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE)
 			tcache = NULL;
@@ -2492,18 +2510,25 @@
 		tcache = tcache_get(tsd, false);
 
 	UTRACE(ptr, 0, 0);
-	ifree(tsd_fetch(), ptr, tcache, true);
+	if (likely(!malloc_slow))
+		ifree(tsd, ptr, tcache, false);
+	else
+		ifree(tsd, ptr, tcache, true);
+	witness_assert_lockless(tsd_tsdn(tsd));
 }
 
 JEMALLOC_ALWAYS_INLINE_C size_t
-inallocx(size_t size, int flags)
+inallocx(tsdn_t *tsdn, size_t size, int flags)
 {
 	size_t usize;
 
+	witness_assert_lockless(tsdn);
+
 	if (likely((flags & MALLOCX_LG_ALIGN_MASK) == 0))
 		usize = s2u(size);
 	else
 		usize = sa2u(size, MALLOCX_ALIGN_GET_SPECIFIED(flags));
+	witness_assert_lockless(tsdn);
 	return (usize);
 }
 
@@ -2516,10 +2541,11 @@
 
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
-	usize = inallocx(size, flags);
-	assert(usize == isalloc(ptr, config_prof));
-
 	tsd = tsd_fetch();
+	usize = inallocx(tsd_tsdn(tsd), size, flags);
+	assert(usize == isalloc(tsd_tsdn(tsd), ptr, config_prof));
+
+	witness_assert_lockless(tsd_tsdn(tsd));
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE)
 			tcache = NULL;
@@ -2529,7 +2555,11 @@
 		tcache = tcache_get(tsd, false);
 
 	UTRACE(ptr, 0, 0);
-	isfree(tsd, ptr, usize, tcache);
+	if (likely(!malloc_slow))
+		isfree(tsd, ptr, usize, tcache, false);
+	else
+		isfree(tsd, ptr, usize, tcache, true);
+	witness_assert_lockless(tsd_tsdn(tsd));
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
@@ -2537,16 +2567,21 @@
 je_nallocx(size_t size, int flags)
 {
 	size_t usize;
+	tsdn_t *tsdn;
 
 	assert(size != 0);
 
 	if (unlikely(malloc_init()))
 		return (0);
 
-	usize = inallocx(size, flags);
+	tsdn = tsdn_fetch();
+	witness_assert_lockless(tsdn);
+
+	usize = inallocx(tsdn, size, flags);
 	if (unlikely(usize > HUGE_MAXCLASS))
 		return (0);
 
+	witness_assert_lockless(tsdn);
 	return (usize);
 }
 
@@ -2554,55 +2589,82 @@
 je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
     size_t newlen)
 {
+	int ret;
+	tsd_t *tsd;
 
 	if (unlikely(malloc_init()))
 		return (EAGAIN);
 
-	return (ctl_byname(name, oldp, oldlenp, newp, newlen));
+	tsd = tsd_fetch();
+	witness_assert_lockless(tsd_tsdn(tsd));
+	ret = ctl_byname(tsd, name, oldp, oldlenp, newp, newlen);
+	witness_assert_lockless(tsd_tsdn(tsd));
+	return (ret);
 }
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp)
 {
+	int ret;
+	tsdn_t *tsdn;
 
 	if (unlikely(malloc_init()))
 		return (EAGAIN);
 
-	return (ctl_nametomib(name, mibp, miblenp));
+	tsdn = tsdn_fetch();
+	witness_assert_lockless(tsdn);
+	ret = ctl_nametomib(tsdn, name, mibp, miblenp);
+	witness_assert_lockless(tsdn);
+	return (ret);
 }
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
   void *newp, size_t newlen)
 {
+	int ret;
+	tsd_t *tsd;
 
 	if (unlikely(malloc_init()))
 		return (EAGAIN);
 
-	return (ctl_bymib(mib, miblen, oldp, oldlenp, newp, newlen));
+	tsd = tsd_fetch();
+	witness_assert_lockless(tsd_tsdn(tsd));
+	ret = ctl_bymib(tsd, mib, miblen, oldp, oldlenp, newp, newlen);
+	witness_assert_lockless(tsd_tsdn(tsd));
+	return (ret);
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *opts)
 {
+	tsdn_t *tsdn;
 
+	tsdn = tsdn_fetch();
+	witness_assert_lockless(tsdn);
 	stats_print(write_cb, cbopaque, opts);
+	witness_assert_lockless(tsdn);
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
 je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr)
 {
 	size_t ret;
+	tsdn_t *tsdn;
 
 	assert(malloc_initialized() || IS_INITIALIZER);
 	malloc_thread_init();
 
-	if (config_ivsalloc)
-		ret = ivsalloc(ptr, config_prof);
-	else
-		ret = (ptr == NULL) ? 0 : isalloc(ptr, config_prof);
+	tsdn = tsdn_fetch();
+	witness_assert_lockless(tsdn);
 
+	if (config_ivsalloc)
+		ret = ivsalloc(tsdn, ptr, config_prof);
+	else
+		ret = (ptr == NULL) ? 0 : isalloc(tsdn, ptr, config_prof);
+
+	witness_assert_lockless(tsdn);
 	return (ret);
 }
 
@@ -2628,6 +2690,7 @@
  * to trigger the deadlock described above, but doing so would involve forking
  * via a library constructor that runs before jemalloc's runs.
  */
+#ifndef JEMALLOC_JET
 JEMALLOC_ATTR(constructor)
 static void
 jemalloc_constructor(void)
@@ -2635,6 +2698,7 @@
 
 	malloc_init();
 }
+#endif
 
 #ifndef JEMALLOC_MUTEX_INIT_CB
 void
@@ -2644,6 +2708,7 @@
 _malloc_prefork(void)
 #endif
 {
+	tsd_t *tsd;
 	unsigned i, j, narenas;
 	arena_t *arena;
 
@@ -2653,31 +2718,41 @@
 #endif
 	assert(malloc_initialized());
 
+	tsd = tsd_fetch();
+
 	narenas = narenas_total_get();
 
+	witness_prefork(tsd);
 	/* Acquire all mutexes in a safe order. */
-	ctl_prefork();
-	malloc_mutex_prefork(&arenas_lock);
-	prof_prefork0();
+	ctl_prefork(tsd_tsdn(tsd));
+	malloc_mutex_prefork(tsd_tsdn(tsd), &arenas_lock);
+	prof_prefork0(tsd_tsdn(tsd));
 	for (i = 0; i < 3; i++) {
 		for (j = 0; j < narenas; j++) {
-			if ((arena = arena_get(j, false)) != NULL) {
+			if ((arena = arena_get(tsd_tsdn(tsd), j, false)) !=
+			    NULL) {
 				switch (i) {
-				case 0: arena_prefork0(arena); break;
-				case 1: arena_prefork1(arena); break;
-				case 2: arena_prefork2(arena); break;
+				case 0:
+					arena_prefork0(tsd_tsdn(tsd), arena);
+					break;
+				case 1:
+					arena_prefork1(tsd_tsdn(tsd), arena);
+					break;
+				case 2:
+					arena_prefork2(tsd_tsdn(tsd), arena);
+					break;
 				default: not_reached();
 				}
 			}
 		}
 	}
-	base_prefork();
-	chunk_prefork();
+	base_prefork(tsd_tsdn(tsd));
+	chunk_prefork(tsd_tsdn(tsd));
 	for (i = 0; i < narenas; i++) {
-		if ((arena = arena_get(i, false)) != NULL)
-			arena_prefork3(arena);
+		if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL)
+			arena_prefork3(tsd_tsdn(tsd), arena);
 	}
-	prof_prefork1();
+	prof_prefork1(tsd_tsdn(tsd));
 }
 
 #ifndef JEMALLOC_MUTEX_INIT_CB
@@ -2688,6 +2763,7 @@
 _malloc_postfork(void)
 #endif
 {
+	tsd_t *tsd;
 	unsigned i, narenas;
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
@@ -2696,39 +2772,46 @@
 #endif
 	assert(malloc_initialized());
 
+	tsd = tsd_fetch();
+
+	witness_postfork_parent(tsd);
 	/* Release all mutexes, now that fork() has completed. */
-	chunk_postfork_parent();
-	base_postfork_parent();
+	chunk_postfork_parent(tsd_tsdn(tsd));
+	base_postfork_parent(tsd_tsdn(tsd));
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
 
-		if ((arena = arena_get(i, false)) != NULL)
-			arena_postfork_parent(arena);
+		if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL)
+			arena_postfork_parent(tsd_tsdn(tsd), arena);
 	}
-	prof_postfork_parent();
-	malloc_mutex_postfork_parent(&arenas_lock);
-	ctl_postfork_parent();
+	prof_postfork_parent(tsd_tsdn(tsd));
+	malloc_mutex_postfork_parent(tsd_tsdn(tsd), &arenas_lock);
+	ctl_postfork_parent(tsd_tsdn(tsd));
 }
 
 void
 jemalloc_postfork_child(void)
 {
+	tsd_t *tsd;
 	unsigned i, narenas;
 
 	assert(malloc_initialized());
 
+	tsd = tsd_fetch();
+
+	witness_postfork_child(tsd);
 	/* Release all mutexes, now that fork() has completed. */
-	chunk_postfork_child();
-	base_postfork_child();
+	chunk_postfork_child(tsd_tsdn(tsd));
+	base_postfork_child(tsd_tsdn(tsd));
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
 
-		if ((arena = arena_get(i, false)) != NULL)
-			arena_postfork_child(arena);
+		if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL)
+			arena_postfork_child(tsd_tsdn(tsd), arena);
 	}
-	prof_postfork_child();
-	malloc_mutex_postfork_child(&arenas_lock);
-	ctl_postfork_child();
+	prof_postfork_child(tsd_tsdn(tsd));
+	malloc_mutex_postfork_child(tsd_tsdn(tsd), &arenas_lock);
+	ctl_postfork_child(tsd_tsdn(tsd));
 }
 
 /******************************************************************************/
diff --git a/src/mutex.c b/src/mutex.c
index 2d47af9..a1fac34 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -69,7 +69,7 @@
 #endif
 
 bool
-malloc_mutex_init(malloc_mutex_t *mutex)
+malloc_mutex_init(malloc_mutex_t *mutex, const char *name, witness_rank_t rank)
 {
 
 #ifdef _WIN32
@@ -103,31 +103,34 @@
 	}
 	pthread_mutexattr_destroy(&attr);
 #endif
+	if (config_debug)
+		witness_init(&mutex->witness, name, rank, NULL);
 	return (false);
 }
 
 void
-malloc_mutex_prefork(malloc_mutex_t *mutex)
+malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
-	malloc_mutex_lock(mutex);
+	malloc_mutex_lock(tsdn, mutex);
 }
 
 void
-malloc_mutex_postfork_parent(malloc_mutex_t *mutex)
+malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
-	malloc_mutex_unlock(mutex);
+	malloc_mutex_unlock(tsdn, mutex);
 }
 
 void
-malloc_mutex_postfork_child(malloc_mutex_t *mutex)
+malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
-	malloc_mutex_unlock(mutex);
+	malloc_mutex_unlock(tsdn, mutex);
 #else
-	if (malloc_mutex_init(mutex)) {
+	if (malloc_mutex_init(mutex, mutex->witness.name,
+	    mutex->witness.rank)) {
 		malloc_printf("<jemalloc>: Error re-initializing mutex in "
 		    "child\n");
 		if (opt_abort)
@@ -137,7 +140,7 @@
 }
 
 bool
-mutex_boot(void)
+malloc_mutex_boot(void)
 {
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
diff --git a/src/nstime.c b/src/nstime.c
index 4cf90b5..26e49dc 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -99,7 +99,7 @@
 
 #ifdef JEMALLOC_JET
 #undef nstime_update
-#define	nstime_update JEMALLOC_N(nstime_update_impl)
+#define	nstime_update JEMALLOC_N(n_nstime_update)
 #endif
 bool
 nstime_update(nstime_t *time)
@@ -144,5 +144,5 @@
 #ifdef JEMALLOC_JET
 #undef nstime_update
 #define	nstime_update JEMALLOC_N(nstime_update)
-nstime_update_t *nstime_update = JEMALLOC_N(nstime_update_impl);
+nstime_update_t *nstime_update = JEMALLOC_N(n_nstime_update);
 #endif
diff --git a/src/pages.c b/src/pages.c
index 83a167f..2a9b7e3 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -1,29 +1,49 @@
 #define	JEMALLOC_PAGES_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
+#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
+#include <sys/sysctl.h>
+#endif
+
+/******************************************************************************/
+/* Data. */
+
+#ifndef _WIN32
+#  define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE)
+#  define PAGES_PROT_DECOMMIT (PROT_NONE)
+static int	mmap_flags;
+#endif
+static bool	os_overcommits;
+
 /******************************************************************************/
 
 void *
-pages_map(void *addr, size_t size)
+pages_map(void *addr, size_t size, bool *commit)
 {
 	void *ret;
 
 	assert(size != 0);
 
+	if (os_overcommits)
+		*commit = true;
+
 #ifdef _WIN32
 	/*
 	 * If VirtualAlloc can't allocate at the given address when one is
 	 * given, it fails and returns NULL.
 	 */
-	ret = VirtualAlloc(addr, size, MEM_COMMIT | MEM_RESERVE,
+	ret = VirtualAlloc(addr, size, MEM_RESERVE | (*commit ? MEM_COMMIT : 0),
 	    PAGE_READWRITE);
 #else
 	/*
 	 * We don't use MAP_FIXED here, because it can cause the *replacement*
 	 * of existing mappings, and we only want to create new mappings.
 	 */
-	ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
-	    -1, 0);
+	{
+		int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
+
+		ret = mmap(addr, size, prot, mmap_flags, -1, 0);
+	}
 	assert(ret != NULL);
 
 	if (ret == MAP_FAILED)
@@ -67,7 +87,8 @@
 }
 
 void *
-pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size)
+pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
+    bool *commit)
 {
 	void *ret = (void *)((uintptr_t)addr + leadsize);
 
@@ -77,7 +98,7 @@
 		void *new_addr;
 
 		pages_unmap(addr, alloc_size);
-		new_addr = pages_map(ret, size);
+		new_addr = pages_map(ret, size, commit);
 		if (new_addr == ret)
 			return (ret);
 		if (new_addr)
@@ -101,17 +122,17 @@
 pages_commit_impl(void *addr, size_t size, bool commit)
 {
 
-#ifndef _WIN32
-	/*
-	 * The following decommit/commit implementation is functional, but
-	 * always disabled because it doesn't add value beyong improved
-	 * debugging (at the cost of extra system calls) on systems that
-	 * overcommit.
-	 */
-	if (false) {
-		int prot = commit ? (PROT_READ | PROT_WRITE) : PROT_NONE;
-		void *result = mmap(addr, size, prot, MAP_PRIVATE | MAP_ANON |
-		    MAP_FIXED, -1, 0);
+	if (os_overcommits)
+		return (true);
+
+#ifdef _WIN32
+	return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT,
+	    PAGE_READWRITE)) : (!VirtualFree(addr, size, MEM_DECOMMIT)));
+#else
+	{
+		int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
+		void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED,
+		    -1, 0);
 		if (result == MAP_FAILED)
 			return (true);
 		if (result != addr) {
@@ -125,7 +146,6 @@
 		return (false);
 	}
 #endif
-	return (true);
 }
 
 bool
@@ -171,3 +191,63 @@
 	return (unzeroed);
 }
 
+#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
+static bool
+os_overcommits_sysctl(void)
+{
+	int vm_overcommit;
+	size_t sz;
+
+	sz = sizeof(vm_overcommit);
+	if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0)
+		return (false); /* Error. */
+
+	return ((vm_overcommit & 0x3) == 0);
+}
+#endif
+
+#ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
+static bool
+os_overcommits_proc(void)
+{
+	int fd;
+	char buf[1];
+	ssize_t nread;
+
+	fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
+	if (fd == -1)
+		return (false); /* Error. */
+
+	nread = read(fd, &buf, sizeof(buf));
+	if (nread < 1)
+		return (false); /* Error. */
+	/*
+	 * /proc/sys/vm/overcommit_memory meanings:
+	 * 0: Heuristic overcommit.
+	 * 1: Always overcommit.
+	 * 2: Never overcommit.
+	 */
+	return (buf[0] == '0' || buf[0] == '1');
+}
+#endif
+
+void
+pages_boot(void)
+{
+
+#ifndef _WIN32
+	mmap_flags = MAP_PRIVATE | MAP_ANON;
+#endif
+
+#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
+	os_overcommits = os_overcommits_sysctl();
+#elif defined(JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY)
+	os_overcommits = os_overcommits_proc();
+#  ifdef MAP_NORESERVE
+	if (os_overcommits)
+		mmap_flags |= MAP_NORESERVE;
+#  endif
+#else
+	os_overcommits = false;
+#endif
+}
diff --git a/src/prof.c b/src/prof.c
index a92320d..c1f58d4 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -121,13 +121,13 @@
  * definition.
  */
 
-static bool	prof_tctx_should_destroy(prof_tctx_t *tctx);
+static bool	prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx);
 static void	prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
-static bool	prof_tdata_should_destroy(prof_tdata_t *tdata,
+static bool	prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
     bool even_if_attached);
-static void	prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata,
+static void	prof_tdata_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
     bool even_if_attached);
-static char	*prof_thread_name_alloc(tsd_t *tsd, const char *thread_name);
+static char	*prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);
 
 /******************************************************************************/
 /* Red-black trees. */
@@ -213,22 +213,23 @@
 	}
 
 	if ((uintptr_t)tctx > (uintptr_t)1U) {
-		malloc_mutex_lock(tctx->tdata->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 		tctx->prepared = false;
-		if (prof_tctx_should_destroy(tctx))
+		if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx))
 			prof_tctx_destroy(tsd, tctx);
 		else
-			malloc_mutex_unlock(tctx->tdata->lock);
+			malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
 	}
 }
 
 void
-prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx)
+prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
+    prof_tctx_t *tctx)
 {
 
-	prof_tctx_set(ptr, usize, tctx);
+	prof_tctx_set(tsdn, ptr, usize, tctx);
 
-	malloc_mutex_lock(tctx->tdata->lock);
+	malloc_mutex_lock(tsdn, tctx->tdata->lock);
 	tctx->cnts.curobjs++;
 	tctx->cnts.curbytes += usize;
 	if (opt_prof_accum) {
@@ -236,23 +237,23 @@
 		tctx->cnts.accumbytes += usize;
 	}
 	tctx->prepared = false;
-	malloc_mutex_unlock(tctx->tdata->lock);
+	malloc_mutex_unlock(tsdn, tctx->tdata->lock);
 }
 
 void
 prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx)
 {
 
-	malloc_mutex_lock(tctx->tdata->lock);
+	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 	assert(tctx->cnts.curobjs > 0);
 	assert(tctx->cnts.curbytes >= usize);
 	tctx->cnts.curobjs--;
 	tctx->cnts.curbytes -= usize;
 
-	if (prof_tctx_should_destroy(tctx))
+	if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx))
 		prof_tctx_destroy(tsd, tctx);
 	else
-		malloc_mutex_unlock(tctx->tdata->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
 }
 
 void
@@ -277,7 +278,7 @@
 		tdata->enq = true;
 	}
 
-	malloc_mutex_lock(&bt2gctx_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
 }
 
 JEMALLOC_INLINE_C void
@@ -287,7 +288,7 @@
 	cassert(config_prof);
 	assert(tdata == prof_tdata_get(tsd, false));
 
-	malloc_mutex_unlock(&bt2gctx_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
 
 	if (tdata != NULL) {
 		bool idump, gdump;
@@ -300,9 +301,9 @@
 		tdata->enq_gdump = false;
 
 		if (idump)
-			prof_idump();
+			prof_idump(tsd_tsdn(tsd));
 		if (gdump)
-			prof_gdump();
+			prof_gdump(tsd_tsdn(tsd));
 	}
 }
 
@@ -546,14 +547,15 @@
 }
 
 static prof_gctx_t *
-prof_gctx_create(tsd_t *tsd, prof_bt_t *bt)
+prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt)
 {
 	/*
 	 * Create a single allocation that has space for vec of length bt->len.
 	 */
 	size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *));
-	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsd, size,
-	    size2index(size), false, tcache_get(tsd, true), true, NULL, true);
+	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsdn, size,
+	    size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true),
+	    true);
 	if (gctx == NULL)
 		return (NULL);
 	gctx->lock = prof_gctx_mutex_choose();
@@ -585,32 +587,33 @@
 	 * into this function.
 	 */
 	prof_enter(tsd, tdata_self);
-	malloc_mutex_lock(gctx->lock);
+	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
 	assert(gctx->nlimbo != 0);
 	if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) {
 		/* Remove gctx from bt2gctx. */
-		if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL))
+		if (ckh_remove(tsd_tsdn(tsd), &bt2gctx, &gctx->bt, NULL, NULL))
 			not_reached();
 		prof_leave(tsd, tdata_self);
 		/* Destroy gctx. */
-		malloc_mutex_unlock(gctx->lock);
-		idalloctm(tsd, gctx, tcache_get(tsd, false), true, true);
+		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
+		idalloctm(tsd_tsdn(tsd), gctx, NULL, true, true);
 	} else {
 		/*
 		 * Compensate for increment in prof_tctx_destroy() or
 		 * prof_lookup().
 		 */
 		gctx->nlimbo--;
-		malloc_mutex_unlock(gctx->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
 		prof_leave(tsd, tdata_self);
 	}
 }
 
-/* tctx->tdata->lock must be held. */
 static bool
-prof_tctx_should_destroy(prof_tctx_t *tctx)
+prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx)
 {
 
+	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
+
 	if (opt_prof_accum)
 		return (false);
 	if (tctx->cnts.curobjs != 0)
@@ -633,7 +636,6 @@
 	return (true);
 }
 
-/* tctx->tdata->lock is held upon entry, and released before return. */
 static void
 prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
 {
@@ -641,17 +643,19 @@
 	prof_gctx_t *gctx = tctx->gctx;
 	bool destroy_tdata, destroy_tctx, destroy_gctx;
 
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
+
 	assert(tctx->cnts.curobjs == 0);
 	assert(tctx->cnts.curbytes == 0);
 	assert(!opt_prof_accum);
 	assert(tctx->cnts.accumobjs == 0);
 	assert(tctx->cnts.accumbytes == 0);
 
-	ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
-	destroy_tdata = prof_tdata_should_destroy(tdata, false);
-	malloc_mutex_unlock(tdata->lock);
+	ckh_remove(tsd_tsdn(tsd), &tdata->bt2tctx, &gctx->bt, NULL, NULL);
+	destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false);
+	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 
-	malloc_mutex_lock(gctx->lock);
+	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
 	switch (tctx->state) {
 	case prof_tctx_state_nominal:
 		tctx_tree_remove(&gctx->tctxs, tctx);
@@ -691,17 +695,19 @@
 		destroy_tctx = false;
 		destroy_gctx = false;
 	}
-	malloc_mutex_unlock(gctx->lock);
+	malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
 	if (destroy_gctx) {
 		prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx,
 		    tdata);
 	}
 
+	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
+
 	if (destroy_tdata)
-		prof_tdata_destroy(tsd, tdata, false);
+		prof_tdata_destroy(tsd_tsdn(tsd), tdata, false);
 
 	if (destroy_tctx)
-		idalloctm(tsd, tctx, tcache_get(tsd, false), true, true);
+		idalloctm(tsd_tsdn(tsd), tctx, NULL, true, true);
 }
 
 static bool
@@ -721,17 +727,16 @@
 	prof_enter(tsd, tdata);
 	if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
 		/* bt has never been seen before.  Insert it. */
-		gctx.p = prof_gctx_create(tsd, bt);
+		gctx.p = prof_gctx_create(tsd_tsdn(tsd), bt);
 		if (gctx.v == NULL) {
 			prof_leave(tsd, tdata);
 			return (true);
 		}
 		btkey.p = &gctx.p->bt;
-		if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
+		if (ckh_insert(tsd_tsdn(tsd), &bt2gctx, btkey.v, gctx.v)) {
 			/* OOM. */
 			prof_leave(tsd, tdata);
-			idalloctm(tsd, gctx.v, tcache_get(tsd, false), true,
-			    true);
+			idalloctm(tsd_tsdn(tsd), gctx.v, NULL, true, true);
 			return (true);
 		}
 		new_gctx = true;
@@ -740,9 +745,9 @@
 		 * Increment nlimbo, in order to avoid a race condition with
 		 * prof_tctx_destroy()/prof_gctx_try_destroy().
 		 */
-		malloc_mutex_lock(gctx.p->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), gctx.p->lock);
 		gctx.p->nlimbo++;
-		malloc_mutex_unlock(gctx.p->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), gctx.p->lock);
 		new_gctx = false;
 	}
 	prof_leave(tsd, tdata);
@@ -769,13 +774,12 @@
 	if (tdata == NULL)
 		return (NULL);
 
-	malloc_mutex_lock(tdata->lock);
+	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
 	not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v);
 	if (!not_found) /* Note double negative! */
 		ret.p->prepared = true;
-	malloc_mutex_unlock(tdata->lock);
+	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 	if (not_found) {
-		tcache_t *tcache;
 		void *btkey;
 		prof_gctx_t *gctx;
 		bool new_gctx, error;
@@ -789,10 +793,9 @@
 			return (NULL);
 
 		/* Link a prof_tctx_t into gctx for this thread. */
-		tcache = tcache_get(tsd, true);
-		ret.v = iallocztm(tsd, sizeof(prof_tctx_t),
-		    size2index(sizeof(prof_tctx_t)), false, tcache, true, NULL,
-		    true);
+		ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t),
+		    size2index(sizeof(prof_tctx_t)), false, NULL, true,
+		    arena_ichoose(tsd_tsdn(tsd), NULL), true);
 		if (ret.p == NULL) {
 			if (new_gctx)
 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
@@ -806,41 +809,42 @@
 		ret.p->tctx_uid = tdata->tctx_uid_next++;
 		ret.p->prepared = true;
 		ret.p->state = prof_tctx_state_initializing;
-		malloc_mutex_lock(tdata->lock);
-		error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v);
-		malloc_mutex_unlock(tdata->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
+		error = ckh_insert(tsd_tsdn(tsd), &tdata->bt2tctx, btkey,
+		    ret.v);
+		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 		if (error) {
 			if (new_gctx)
 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
-			idalloctm(tsd, ret.v, tcache, true, true);
+			idalloctm(tsd_tsdn(tsd), ret.v, NULL, true, true);
 			return (NULL);
 		}
-		malloc_mutex_lock(gctx->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
 		ret.p->state = prof_tctx_state_nominal;
 		tctx_tree_insert(&gctx->tctxs, ret.p);
 		gctx->nlimbo--;
-		malloc_mutex_unlock(gctx->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
 	}
 
 	return (ret.p);
 }
 
+/*
+ * The bodies of this function and prof_leakcheck() are compiled out unless heap
+ * profiling is enabled, so that it is possible to compile jemalloc with
+ * floating point support completely disabled.  Avoiding floating point code is
+ * important on memory-constrained systems, but it also enables a workaround for
+ * versions of glibc that don't properly save/restore floating point registers
+ * during dynamic lazy symbol loading (which internally calls into whatever
+ * malloc implementation happens to be integrated into the application).  Note
+ * that some compilers (e.g.  gcc 4.8) may use floating point registers for fast
+ * memory moves, so jemalloc must be compiled with such optimizations disabled
+ * (e.g.
+ * -mno-sse) in order for the workaround to be complete.
+ */
 void
 prof_sample_threshold_update(prof_tdata_t *tdata)
 {
-	/*
-	 * The body of this function is compiled out unless heap profiling is
-	 * enabled, so that it is possible to compile jemalloc with floating
-	 * point support completely disabled.  Avoiding floating point code is
-	 * important on memory-constrained systems, but it also enables a
-	 * workaround for versions of glibc that don't properly save/restore
-	 * floating point registers during dynamic lazy symbol loading (which
-	 * internally calls into whatever malloc implementation happens to be
-	 * integrated into the application).  Note that some compilers (e.g.
-	 * gcc 4.8) may use floating point registers for fast memory moves, so
-	 * jemalloc must be compiled with such optimizations disabled (e.g.
-	 * -mno-sse) in order for the workaround to be complete.
-	 */
 #ifdef JEMALLOC_PROF
 	uint64_t r;
 	double u;
@@ -894,11 +898,13 @@
 prof_tdata_count(void)
 {
 	size_t tdata_count = 0;
+	tsdn_t *tsdn;
 
-	malloc_mutex_lock(&tdatas_mtx);
+	tsdn = tsdn_fetch();
+	malloc_mutex_lock(tsdn, &tdatas_mtx);
 	tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter,
 	    (void *)&tdata_count);
-	malloc_mutex_unlock(&tdatas_mtx);
+	malloc_mutex_unlock(tsdn, &tdatas_mtx);
 
 	return (tdata_count);
 }
@@ -917,9 +923,9 @@
 	if (tdata == NULL)
 		return (0);
 
-	malloc_mutex_lock(&bt2gctx_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
 	bt_count = ckh_count(&bt2gctx);
-	malloc_mutex_unlock(&bt2gctx_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
 
 	return (bt_count);
 }
@@ -1032,20 +1038,21 @@
 	return (ret);
 }
 
-/* tctx->tdata->lock is held. */
 static void
-prof_tctx_merge_tdata(prof_tctx_t *tctx, prof_tdata_t *tdata)
+prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata)
 {
 
-	malloc_mutex_lock(tctx->gctx->lock);
+	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
+
+	malloc_mutex_lock(tsdn, tctx->gctx->lock);
 
 	switch (tctx->state) {
 	case prof_tctx_state_initializing:
-		malloc_mutex_unlock(tctx->gctx->lock);
+		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
 		return;
 	case prof_tctx_state_nominal:
 		tctx->state = prof_tctx_state_dumping;
-		malloc_mutex_unlock(tctx->gctx->lock);
+		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
 
 		memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t));
 
@@ -1064,11 +1071,12 @@
 	}
 }
 
-/* gctx->lock is held. */
 static void
-prof_tctx_merge_gctx(prof_tctx_t *tctx, prof_gctx_t *gctx)
+prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx)
 {
 
+	malloc_mutex_assert_owner(tsdn, gctx->lock);
+
 	gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
 	gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
 	if (opt_prof_accum) {
@@ -1077,10 +1085,12 @@
 	}
 }
 
-/* tctx->gctx is held. */
 static prof_tctx_t *
 prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
 {
+	tsdn_t *tsdn = (tsdn_t *)arg;
+
+	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
 
 	switch (tctx->state) {
 	case prof_tctx_state_nominal:
@@ -1088,7 +1098,7 @@
 		break;
 	case prof_tctx_state_dumping:
 	case prof_tctx_state_purgatory:
-		prof_tctx_merge_gctx(tctx, tctx->gctx);
+		prof_tctx_merge_gctx(tsdn, tctx, tctx->gctx);
 		break;
 	default:
 		not_reached();
@@ -1097,11 +1107,18 @@
 	return (NULL);
 }
 
-/* gctx->lock is held. */
+struct prof_tctx_dump_iter_arg_s {
+	tsdn_t	*tsdn;
+	bool	propagate_err;
+};
+
 static prof_tctx_t *
-prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
+prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque)
 {
-	bool propagate_err = *(bool *)arg;
+	struct prof_tctx_dump_iter_arg_s *arg =
+	    (struct prof_tctx_dump_iter_arg_s *)opaque;
+
+	malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock);
 
 	switch (tctx->state) {
 	case prof_tctx_state_initializing:
@@ -1110,7 +1127,7 @@
 		break;
 	case prof_tctx_state_dumping:
 	case prof_tctx_state_purgatory:
-		if (prof_dump_printf(propagate_err,
+		if (prof_dump_printf(arg->propagate_err,
 		    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": "
 		    "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs,
 		    tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs,
@@ -1123,12 +1140,14 @@
 	return (NULL);
 }
 
-/* tctx->gctx is held. */
 static prof_tctx_t *
 prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
 {
+	tsdn_t *tsdn = (tsdn_t *)arg;
 	prof_tctx_t *ret;
 
+	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
+
 	switch (tctx->state) {
 	case prof_tctx_state_nominal:
 		/* New since dumping started; ignore. */
@@ -1149,12 +1168,12 @@
 }
 
 static void
-prof_dump_gctx_prep(prof_gctx_t *gctx, prof_gctx_tree_t *gctxs)
+prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs)
 {
 
 	cassert(config_prof);
 
-	malloc_mutex_lock(gctx->lock);
+	malloc_mutex_lock(tsdn, gctx->lock);
 
 	/*
 	 * Increment nlimbo so that gctx won't go away before dump.
@@ -1166,19 +1185,26 @@
 
 	memset(&gctx->cnt_summed, 0, sizeof(prof_cnt_t));
 
-	malloc_mutex_unlock(gctx->lock);
+	malloc_mutex_unlock(tsdn, gctx->lock);
 }
 
-static prof_gctx_t *
-prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *arg)
-{
-	size_t *leak_ngctx = (size_t *)arg;
+struct prof_gctx_merge_iter_arg_s {
+	tsdn_t	*tsdn;
+	size_t	leak_ngctx;
+};
 
-	malloc_mutex_lock(gctx->lock);
-	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter, NULL);
+static prof_gctx_t *
+prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque)
+{
+	struct prof_gctx_merge_iter_arg_s *arg =
+	    (struct prof_gctx_merge_iter_arg_s *)opaque;
+
+	malloc_mutex_lock(arg->tsdn, gctx->lock);
+	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
+	    (void *)arg->tsdn);
 	if (gctx->cnt_summed.curobjs != 0)
-		(*leak_ngctx)++;
-	malloc_mutex_unlock(gctx->lock);
+		arg->leak_ngctx++;
+	malloc_mutex_unlock(arg->tsdn, gctx->lock);
 
 	return (NULL);
 }
@@ -1197,7 +1223,7 @@
 	 */
 	while ((gctx = gctx_tree_first(gctxs)) != NULL) {
 		gctx_tree_remove(gctxs, gctx);
-		malloc_mutex_lock(gctx->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
 		{
 			prof_tctx_t *next;
 
@@ -1205,14 +1231,15 @@
 			do {
 				prof_tctx_t *to_destroy =
 				    tctx_tree_iter(&gctx->tctxs, next,
-				    prof_tctx_finish_iter, NULL);
+				    prof_tctx_finish_iter,
+				    (void *)tsd_tsdn(tsd));
 				if (to_destroy != NULL) {
 					next = tctx_tree_next(&gctx->tctxs,
 					    to_destroy);
 					tctx_tree_remove(&gctx->tctxs,
 					    to_destroy);
-					idalloctm(tsd, to_destroy,
-					    tcache_get(tsd, false), true, true);
+					idalloctm(tsd_tsdn(tsd), to_destroy,
+					    NULL, true, true);
 				} else
 					next = NULL;
 			} while (next != NULL);
@@ -1220,19 +1247,26 @@
 		gctx->nlimbo--;
 		if (prof_gctx_should_destroy(gctx)) {
 			gctx->nlimbo++;
-			malloc_mutex_unlock(gctx->lock);
+			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
 			prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
 		} else
-			malloc_mutex_unlock(gctx->lock);
+			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
 	}
 }
 
-static prof_tdata_t *
-prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
-{
-	prof_cnt_t *cnt_all = (prof_cnt_t *)arg;
+struct prof_tdata_merge_iter_arg_s {
+	tsdn_t		*tsdn;
+	prof_cnt_t	cnt_all;
+};
 
-	malloc_mutex_lock(tdata->lock);
+static prof_tdata_t *
+prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+    void *opaque)
+{
+	struct prof_tdata_merge_iter_arg_s *arg =
+	    (struct prof_tdata_merge_iter_arg_s *)opaque;
+
+	malloc_mutex_lock(arg->tsdn, tdata->lock);
 	if (!tdata->expired) {
 		size_t tabind;
 		union {
@@ -1244,17 +1278,17 @@
 		memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t));
 		for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL,
 		    &tctx.v);)
-			prof_tctx_merge_tdata(tctx.p, tdata);
+			prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata);
 
-		cnt_all->curobjs += tdata->cnt_summed.curobjs;
-		cnt_all->curbytes += tdata->cnt_summed.curbytes;
+		arg->cnt_all.curobjs += tdata->cnt_summed.curobjs;
+		arg->cnt_all.curbytes += tdata->cnt_summed.curbytes;
 		if (opt_prof_accum) {
-			cnt_all->accumobjs += tdata->cnt_summed.accumobjs;
-			cnt_all->accumbytes += tdata->cnt_summed.accumbytes;
+			arg->cnt_all.accumobjs += tdata->cnt_summed.accumobjs;
+			arg->cnt_all.accumbytes += tdata->cnt_summed.accumbytes;
 		}
 	} else
 		tdata->dumping = false;
-	malloc_mutex_unlock(tdata->lock);
+	malloc_mutex_unlock(arg->tsdn, tdata->lock);
 
 	return (NULL);
 }
@@ -1283,7 +1317,7 @@
 #define	prof_dump_header JEMALLOC_N(prof_dump_header_impl)
 #endif
 static bool
-prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all)
+prof_dump_header(tsdn_t *tsdn, bool propagate_err, const prof_cnt_t *cnt_all)
 {
 	bool ret;
 
@@ -1294,10 +1328,10 @@
 	    cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes))
 		return (true);
 
-	malloc_mutex_lock(&tdatas_mtx);
+	malloc_mutex_lock(tsdn, &tdatas_mtx);
 	ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter,
 	    (void *)&propagate_err) != NULL);
-	malloc_mutex_unlock(&tdatas_mtx);
+	malloc_mutex_unlock(tsdn, &tdatas_mtx);
 	return (ret);
 }
 #ifdef JEMALLOC_JET
@@ -1306,15 +1340,16 @@
 prof_dump_header_t *prof_dump_header = JEMALLOC_N(prof_dump_header_impl);
 #endif
 
-/* gctx->lock is held. */
 static bool
-prof_dump_gctx(bool propagate_err, prof_gctx_t *gctx, const prof_bt_t *bt,
-    prof_gctx_tree_t *gctxs)
+prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx,
+    const prof_bt_t *bt, prof_gctx_tree_t *gctxs)
 {
 	bool ret;
 	unsigned i;
+	struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg;
 
 	cassert(config_prof);
+	malloc_mutex_assert_owner(tsdn, gctx->lock);
 
 	/* Avoid dumping such gctx's that have no useful data. */
 	if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) ||
@@ -1348,8 +1383,10 @@
 		goto label_return;
 	}
 
+	prof_tctx_dump_iter_arg.tsdn = tsdn;
+	prof_tctx_dump_iter_arg.propagate_err = propagate_err;
 	if (tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter,
-	    (void *)&propagate_err) != NULL) {
+	    (void *)&prof_tctx_dump_iter_arg) != NULL) {
 		ret = true;
 		goto label_return;
 	}
@@ -1442,39 +1479,66 @@
 	return (ret);
 }
 
+/*
+ * See prof_sample_threshold_update() comment for why the body of this function
+ * is conditionally compiled.
+ */
 static void
 prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
     const char *filename)
 {
 
+#ifdef JEMALLOC_PROF
+	/*
+	 * Scaling is equivalent AdjustSamples() in jeprof, but the result may
+	 * differ slightly from what jeprof reports, because here we scale the
+	 * summary values, whereas jeprof scales each context individually and
+	 * reports the sums of the scaled values.
+	 */
 	if (cnt_all->curbytes != 0) {
-		malloc_printf("<jemalloc>: Leak summary: %"FMTu64" byte%s, %"
-		    FMTu64" object%s, %zu context%s\n",
-		    cnt_all->curbytes, (cnt_all->curbytes != 1) ? "s" : "",
-		    cnt_all->curobjs, (cnt_all->curobjs != 1) ? "s" : "",
-		    leak_ngctx, (leak_ngctx != 1) ? "s" : "");
+		double sample_period = (double)((uint64_t)1 << lg_prof_sample);
+		double ratio = (((double)cnt_all->curbytes) /
+		    (double)cnt_all->curobjs) / sample_period;
+		double scale_factor = 1.0 / (1.0 - exp(-ratio));
+		uint64_t curbytes = (uint64_t)round(((double)cnt_all->curbytes)
+		    * scale_factor);
+		uint64_t curobjs = (uint64_t)round(((double)cnt_all->curobjs) *
+		    scale_factor);
+
+		malloc_printf("<jemalloc>: Leak approximation summary: ~%"FMTu64
+		    " byte%s, ~%"FMTu64" object%s, >= %zu context%s\n",
+		    curbytes, (curbytes != 1) ? "s" : "", curobjs, (curobjs !=
+		    1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : "");
 		malloc_printf(
 		    "<jemalloc>: Run jeprof on \"%s\" for leak detail\n",
 		    filename);
 	}
+#endif
 }
 
+struct prof_gctx_dump_iter_arg_s {
+	tsdn_t	*tsdn;
+	bool	propagate_err;
+};
+
 static prof_gctx_t *
-prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *arg)
+prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque)
 {
 	prof_gctx_t *ret;
-	bool propagate_err = *(bool *)arg;
+	struct prof_gctx_dump_iter_arg_s *arg =
+	    (struct prof_gctx_dump_iter_arg_s *)opaque;
 
-	malloc_mutex_lock(gctx->lock);
+	malloc_mutex_lock(arg->tsdn, gctx->lock);
 
-	if (prof_dump_gctx(propagate_err, gctx, &gctx->bt, gctxs)) {
+	if (prof_dump_gctx(arg->tsdn, arg->propagate_err, gctx, &gctx->bt,
+	    gctxs)) {
 		ret = gctx;
 		goto label_return;
 	}
 
 	ret = NULL;
 label_return:
-	malloc_mutex_unlock(gctx->lock);
+	malloc_mutex_unlock(arg->tsdn, gctx->lock);
 	return (ret);
 }
 
@@ -1482,13 +1546,14 @@
 prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck)
 {
 	prof_tdata_t *tdata;
-	prof_cnt_t cnt_all;
+	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
 	size_t tabind;
 	union {
 		prof_gctx_t	*p;
 		void		*v;
 	} gctx;
-	size_t leak_ngctx;
+	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
+	struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg;
 	prof_gctx_tree_t gctxs;
 
 	cassert(config_prof);
@@ -1497,7 +1562,7 @@
 	if (tdata == NULL)
 		return (true);
 
-	malloc_mutex_lock(&prof_dump_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
 	prof_enter(tsd, tdata);
 
 	/*
@@ -1506,20 +1571,24 @@
 	 */
 	gctx_tree_new(&gctxs);
 	for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);)
-		prof_dump_gctx_prep(gctx.p, &gctxs);
+		prof_dump_gctx_prep(tsd_tsdn(tsd), gctx.p, &gctxs);
 
 	/*
 	 * Iterate over tdatas, and for the non-expired ones snapshot their tctx
 	 * stats and merge them into the associated gctx's.
 	 */
-	memset(&cnt_all, 0, sizeof(prof_cnt_t));
-	malloc_mutex_lock(&tdatas_mtx);
-	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter, (void *)&cnt_all);
-	malloc_mutex_unlock(&tdatas_mtx);
+	prof_tdata_merge_iter_arg.tsdn = tsd_tsdn(tsd);
+	memset(&prof_tdata_merge_iter_arg.cnt_all, 0, sizeof(prof_cnt_t));
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
+	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter,
+	    (void *)&prof_tdata_merge_iter_arg);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
 
 	/* Merge tctx stats into gctx's. */
-	leak_ngctx = 0;
-	gctx_tree_iter(&gctxs, NULL, prof_gctx_merge_iter, (void *)&leak_ngctx);
+	prof_gctx_merge_iter_arg.tsdn = tsd_tsdn(tsd);
+	prof_gctx_merge_iter_arg.leak_ngctx = 0;
+	gctx_tree_iter(&gctxs, NULL, prof_gctx_merge_iter,
+	    (void *)&prof_gctx_merge_iter_arg);
 
 	prof_leave(tsd, tdata);
 
@@ -1528,12 +1597,15 @@
 		goto label_open_close_error;
 
 	/* Dump profile header. */
-	if (prof_dump_header(propagate_err, &cnt_all))
+	if (prof_dump_header(tsd_tsdn(tsd), propagate_err,
+	    &prof_tdata_merge_iter_arg.cnt_all))
 		goto label_write_error;
 
 	/* Dump per gctx profile stats. */
+	prof_gctx_dump_iter_arg.tsdn = tsd_tsdn(tsd);
+	prof_gctx_dump_iter_arg.propagate_err = propagate_err;
 	if (gctx_tree_iter(&gctxs, NULL, prof_gctx_dump_iter,
-	    (void *)&propagate_err) != NULL)
+	    (void *)&prof_gctx_dump_iter_arg) != NULL)
 		goto label_write_error;
 
 	/* Dump /proc/<pid>/maps if possible. */
@@ -1544,17 +1616,18 @@
 		goto label_open_close_error;
 
 	prof_gctx_finish(tsd, &gctxs);
-	malloc_mutex_unlock(&prof_dump_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
 
-	if (leakcheck)
-		prof_leakcheck(&cnt_all, leak_ngctx, filename);
-
+	if (leakcheck) {
+		prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all,
+		    prof_gctx_merge_iter_arg.leak_ngctx, filename);
+	}
 	return (false);
 label_write_error:
 	prof_dump_close(propagate_err);
 label_open_close_error:
 	prof_gctx_finish(tsd, &gctxs);
-	malloc_mutex_unlock(&prof_dump_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
 	return (true);
 }
 
@@ -1594,23 +1667,23 @@
 		return;
 	tsd = tsd_fetch();
 
-	malloc_mutex_lock(&prof_dump_seq_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
 	prof_dump_filename(filename, 'f', VSEQ_INVALID);
-	malloc_mutex_unlock(&prof_dump_seq_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
 	prof_dump(tsd, false, filename, opt_prof_leak);
 }
 
 void
-prof_idump(void)
+prof_idump(tsdn_t *tsdn)
 {
 	tsd_t *tsd;
 	prof_tdata_t *tdata;
 
 	cassert(config_prof);
 
-	if (!prof_booted)
+	if (!prof_booted || tsdn_null(tsdn))
 		return;
-	tsd = tsd_fetch();
+	tsd = tsdn_tsd(tsdn);
 	tdata = prof_tdata_get(tsd, false);
 	if (tdata == NULL)
 		return;
@@ -1621,50 +1694,48 @@
 
 	if (opt_prof_prefix[0] != '\0') {
 		char filename[PATH_MAX + 1];
-		malloc_mutex_lock(&prof_dump_seq_mtx);
+		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
 		prof_dump_filename(filename, 'i', prof_dump_iseq);
 		prof_dump_iseq++;
-		malloc_mutex_unlock(&prof_dump_seq_mtx);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
 		prof_dump(tsd, false, filename, false);
 	}
 }
 
 bool
-prof_mdump(const char *filename)
+prof_mdump(tsd_t *tsd, const char *filename)
 {
-	tsd_t *tsd;
 	char filename_buf[DUMP_FILENAME_BUFSIZE];
 
 	cassert(config_prof);
 
 	if (!opt_prof || !prof_booted)
 		return (true);
-	tsd = tsd_fetch();
 
 	if (filename == NULL) {
 		/* No filename specified, so automatically generate one. */
 		if (opt_prof_prefix[0] == '\0')
 			return (true);
-		malloc_mutex_lock(&prof_dump_seq_mtx);
+		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
 		prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
 		prof_dump_mseq++;
-		malloc_mutex_unlock(&prof_dump_seq_mtx);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
 		filename = filename_buf;
 	}
 	return (prof_dump(tsd, true, filename, false));
 }
 
 void
-prof_gdump(void)
+prof_gdump(tsdn_t *tsdn)
 {
 	tsd_t *tsd;
 	prof_tdata_t *tdata;
 
 	cassert(config_prof);
 
-	if (!prof_booted)
+	if (!prof_booted || tsdn_null(tsdn))
 		return;
-	tsd = tsd_fetch();
+	tsd = tsdn_tsd(tsdn);
 	tdata = prof_tdata_get(tsd, false);
 	if (tdata == NULL)
 		return;
@@ -1675,10 +1746,10 @@
 
 	if (opt_prof_prefix[0] != '\0') {
 		char filename[DUMP_FILENAME_BUFSIZE];
-		malloc_mutex_lock(&prof_dump_seq_mtx);
+		malloc_mutex_lock(tsdn, &prof_dump_seq_mtx);
 		prof_dump_filename(filename, 'u', prof_dump_useq);
 		prof_dump_useq++;
-		malloc_mutex_unlock(&prof_dump_seq_mtx);
+		malloc_mutex_unlock(tsdn, &prof_dump_seq_mtx);
 		prof_dump(tsd, false, filename, false);
 	}
 }
@@ -1707,31 +1778,30 @@
 }
 
 JEMALLOC_INLINE_C uint64_t
-prof_thr_uid_alloc(void)
+prof_thr_uid_alloc(tsdn_t *tsdn)
 {
 	uint64_t thr_uid;
 
-	malloc_mutex_lock(&next_thr_uid_mtx);
+	malloc_mutex_lock(tsdn, &next_thr_uid_mtx);
 	thr_uid = next_thr_uid;
 	next_thr_uid++;
-	malloc_mutex_unlock(&next_thr_uid_mtx);
+	malloc_mutex_unlock(tsdn, &next_thr_uid_mtx);
 
 	return (thr_uid);
 }
 
 static prof_tdata_t *
-prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
+prof_tdata_init_impl(tsdn_t *tsdn, uint64_t thr_uid, uint64_t thr_discrim,
     char *thread_name, bool active)
 {
 	prof_tdata_t *tdata;
-	tcache_t *tcache;
 
 	cassert(config_prof);
 
 	/* Initialize an empty cache for this thread. */
-	tcache = tcache_get(tsd, true);
-	tdata = (prof_tdata_t *)iallocztm(tsd, sizeof(prof_tdata_t),
-	    size2index(sizeof(prof_tdata_t)), false, tcache, true, NULL, true);
+	tdata = (prof_tdata_t *)iallocztm(tsdn, sizeof(prof_tdata_t),
+	    size2index(sizeof(prof_tdata_t)), false, NULL, true,
+	    arena_get(TSDN_NULL, 0, true), true);
 	if (tdata == NULL)
 		return (NULL);
 
@@ -1743,9 +1813,9 @@
 	tdata->expired = false;
 	tdata->tctx_uid_next = 0;
 
-	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS,
+	if (ckh_new(tsdn, &tdata->bt2tctx, PROF_CKH_MINITEMS,
 	    prof_bt_hash, prof_bt_keycomp)) {
-		idalloctm(tsd, tdata, tcache, true, true);
+		idalloctm(tsdn, tdata, NULL, true, true);
 		return (NULL);
 	}
 
@@ -1759,24 +1829,23 @@
 	tdata->dumping = false;
 	tdata->active = active;
 
-	malloc_mutex_lock(&tdatas_mtx);
+	malloc_mutex_lock(tsdn, &tdatas_mtx);
 	tdata_tree_insert(&tdatas, tdata);
-	malloc_mutex_unlock(&tdatas_mtx);
+	malloc_mutex_unlock(tsdn, &tdatas_mtx);
 
 	return (tdata);
 }
 
 prof_tdata_t *
-prof_tdata_init(tsd_t *tsd)
+prof_tdata_init(tsdn_t *tsdn)
 {
 
-	return (prof_tdata_init_impl(tsd, prof_thr_uid_alloc(), 0, NULL,
-	    prof_thread_active_init_get()));
+	return (prof_tdata_init_impl(tsdn, prof_thr_uid_alloc(tsdn), 0, NULL,
+	    prof_thread_active_init_get(tsdn)));
 }
 
-/* tdata->lock must be held. */
 static bool
-prof_tdata_should_destroy(prof_tdata_t *tdata, bool even_if_attached)
+prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached)
 {
 
 	if (tdata->attached && !even_if_attached)
@@ -1786,32 +1855,42 @@
 	return (true);
 }
 
-/* tdatas_mtx must be held. */
-static void
-prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
+static bool
+prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
     bool even_if_attached)
 {
-	tcache_t *tcache;
 
-	assert(prof_tdata_should_destroy(tdata, even_if_attached));
-	assert(tsd_prof_tdata_get(tsd) != tdata);
+	malloc_mutex_assert_owner(tsdn, tdata->lock);
 
-	tdata_tree_remove(&tdatas, tdata);
-
-	tcache = tcache_get(tsd, false);
-	if (tdata->thread_name != NULL)
-		idalloctm(tsd, tdata->thread_name, tcache, true, true);
-	ckh_delete(tsd, &tdata->bt2tctx);
-	idalloctm(tsd, tdata, tcache, true, true);
+	return (prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
 }
 
 static void
-prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached)
+prof_tdata_destroy_locked(tsdn_t *tsdn, prof_tdata_t *tdata,
+    bool even_if_attached)
 {
 
-	malloc_mutex_lock(&tdatas_mtx);
-	prof_tdata_destroy_locked(tsd, tdata, even_if_attached);
-	malloc_mutex_unlock(&tdatas_mtx);
+	malloc_mutex_assert_owner(tsdn, &tdatas_mtx);
+
+	assert(tsdn_null(tsdn) || tsd_prof_tdata_get(tsdn_tsd(tsdn)) != tdata);
+
+	tdata_tree_remove(&tdatas, tdata);
+
+	assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
+
+	if (tdata->thread_name != NULL)
+		idalloctm(tsdn, tdata->thread_name, NULL, true, true);
+	ckh_delete(tsdn, &tdata->bt2tctx);
+	idalloctm(tsdn, tdata, NULL, true, true);
+}
+
+static void
+prof_tdata_destroy(tsdn_t *tsdn, prof_tdata_t *tdata, bool even_if_attached)
+{
+
+	malloc_mutex_lock(tsdn, &tdatas_mtx);
+	prof_tdata_destroy_locked(tsdn, tdata, even_if_attached);
+	malloc_mutex_unlock(tsdn, &tdatas_mtx);
 }
 
 static void
@@ -1819,9 +1898,10 @@
 {
 	bool destroy_tdata;
 
-	malloc_mutex_lock(tdata->lock);
+	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
 	if (tdata->attached) {
-		destroy_tdata = prof_tdata_should_destroy(tdata, true);
+		destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata,
+		    true);
 		/*
 		 * Only detach if !destroy_tdata, because detaching would allow
 		 * another thread to win the race to destroy tdata.
@@ -1831,9 +1911,9 @@
 		tsd_prof_tdata_set(tsd, NULL);
 	} else
 		destroy_tdata = false;
-	malloc_mutex_unlock(tdata->lock);
+	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 	if (destroy_tdata)
-		prof_tdata_destroy(tsd, tdata, true);
+		prof_tdata_destroy(tsd_tsdn(tsd), tdata, true);
 }
 
 prof_tdata_t *
@@ -1842,27 +1922,27 @@
 	uint64_t thr_uid = tdata->thr_uid;
 	uint64_t thr_discrim = tdata->thr_discrim + 1;
 	char *thread_name = (tdata->thread_name != NULL) ?
-	    prof_thread_name_alloc(tsd, tdata->thread_name) : NULL;
+	    prof_thread_name_alloc(tsd_tsdn(tsd), tdata->thread_name) : NULL;
 	bool active = tdata->active;
 
 	prof_tdata_detach(tsd, tdata);
-	return (prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
-	    active));
+	return (prof_tdata_init_impl(tsd_tsdn(tsd), thr_uid, thr_discrim,
+	    thread_name, active));
 }
 
 static bool
-prof_tdata_expire(prof_tdata_t *tdata)
+prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata)
 {
 	bool destroy_tdata;
 
-	malloc_mutex_lock(tdata->lock);
+	malloc_mutex_lock(tsdn, tdata->lock);
 	if (!tdata->expired) {
 		tdata->expired = true;
 		destroy_tdata = tdata->attached ? false :
-		    prof_tdata_should_destroy(tdata, false);
+		    prof_tdata_should_destroy(tsdn, tdata, false);
 	} else
 		destroy_tdata = false;
-	malloc_mutex_unlock(tdata->lock);
+	malloc_mutex_unlock(tsdn, tdata->lock);
 
 	return (destroy_tdata);
 }
@@ -1870,35 +1950,36 @@
 static prof_tdata_t *
 prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
 {
+	tsdn_t *tsdn = (tsdn_t *)arg;
 
-	return (prof_tdata_expire(tdata) ? tdata : NULL);
+	return (prof_tdata_expire(tsdn, tdata) ? tdata : NULL);
 }
 
 void
-prof_reset(tsd_t *tsd, size_t lg_sample)
+prof_reset(tsdn_t *tsdn, size_t lg_sample)
 {
 	prof_tdata_t *next;
 
 	assert(lg_sample < (sizeof(uint64_t) << 3));
 
-	malloc_mutex_lock(&prof_dump_mtx);
-	malloc_mutex_lock(&tdatas_mtx);
+	malloc_mutex_lock(tsdn, &prof_dump_mtx);
+	malloc_mutex_lock(tsdn, &tdatas_mtx);
 
 	lg_prof_sample = lg_sample;
 
 	next = NULL;
 	do {
 		prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
-		    prof_tdata_reset_iter, NULL);
+		    prof_tdata_reset_iter, (void *)tsdn);
 		if (to_destroy != NULL) {
 			next = tdata_tree_next(&tdatas, to_destroy);
-			prof_tdata_destroy_locked(tsd, to_destroy, false);
+			prof_tdata_destroy_locked(tsdn, to_destroy, false);
 		} else
 			next = NULL;
 	} while (next != NULL);
 
-	malloc_mutex_unlock(&tdatas_mtx);
-	malloc_mutex_unlock(&prof_dump_mtx);
+	malloc_mutex_unlock(tsdn, &tdatas_mtx);
+	malloc_mutex_unlock(tsdn, &prof_dump_mtx);
 }
 
 void
@@ -1915,35 +1996,33 @@
 }
 
 bool
-prof_active_get(void)
+prof_active_get(tsdn_t *tsdn)
 {
 	bool prof_active_current;
 
-	malloc_mutex_lock(&prof_active_mtx);
+	malloc_mutex_lock(tsdn, &prof_active_mtx);
 	prof_active_current = prof_active;
-	malloc_mutex_unlock(&prof_active_mtx);
+	malloc_mutex_unlock(tsdn, &prof_active_mtx);
 	return (prof_active_current);
 }
 
 bool
-prof_active_set(bool active)
+prof_active_set(tsdn_t *tsdn, bool active)
 {
 	bool prof_active_old;
 
-	malloc_mutex_lock(&prof_active_mtx);
+	malloc_mutex_lock(tsdn, &prof_active_mtx);
 	prof_active_old = prof_active;
 	prof_active = active;
-	malloc_mutex_unlock(&prof_active_mtx);
+	malloc_mutex_unlock(tsdn, &prof_active_mtx);
 	return (prof_active_old);
 }
 
 const char *
-prof_thread_name_get(void)
+prof_thread_name_get(tsd_t *tsd)
 {
-	tsd_t *tsd;
 	prof_tdata_t *tdata;
 
-	tsd = tsd_fetch();
 	tdata = prof_tdata_get(tsd, true);
 	if (tdata == NULL)
 		return ("");
@@ -1951,7 +2030,7 @@
 }
 
 static char *
-prof_thread_name_alloc(tsd_t *tsd, const char *thread_name)
+prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name)
 {
 	char *ret;
 	size_t size;
@@ -1963,8 +2042,8 @@
 	if (size == 1)
 		return ("");
 
-	ret = iallocztm(tsd, size, size2index(size), false, tcache_get(tsd,
-	    true), true, NULL, true);
+	ret = iallocztm(tsdn, size, size2index(size), false, NULL, true,
+	    arena_get(TSDN_NULL, 0, true), true);
 	if (ret == NULL)
 		return (NULL);
 	memcpy(ret, thread_name, size);
@@ -1991,13 +2070,12 @@
 			return (EFAULT);
 	}
 
-	s = prof_thread_name_alloc(tsd, thread_name);
+	s = prof_thread_name_alloc(tsd_tsdn(tsd), thread_name);
 	if (s == NULL)
 		return (EAGAIN);
 
 	if (tdata->thread_name != NULL) {
-		idalloctm(tsd, tdata->thread_name, tcache_get(tsd, false),
-		    true, true);
+		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, true, true);
 		tdata->thread_name = NULL;
 	}
 	if (strlen(s) > 0)
@@ -2006,12 +2084,10 @@
 }
 
 bool
-prof_thread_active_get(void)
+prof_thread_active_get(tsd_t *tsd)
 {
-	tsd_t *tsd;
 	prof_tdata_t *tdata;
 
-	tsd = tsd_fetch();
 	tdata = prof_tdata_get(tsd, true);
 	if (tdata == NULL)
 		return (false);
@@ -2019,12 +2095,10 @@
 }
 
 bool
-prof_thread_active_set(bool active)
+prof_thread_active_set(tsd_t *tsd, bool active)
 {
-	tsd_t *tsd;
 	prof_tdata_t *tdata;
 
-	tsd = tsd_fetch();
 	tdata = prof_tdata_get(tsd, true);
 	if (tdata == NULL)
 		return (true);
@@ -2033,48 +2107,48 @@
 }
 
 bool
-prof_thread_active_init_get(void)
+prof_thread_active_init_get(tsdn_t *tsdn)
 {
 	bool active_init;
 
-	malloc_mutex_lock(&prof_thread_active_init_mtx);
+	malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx);
 	active_init = prof_thread_active_init;
-	malloc_mutex_unlock(&prof_thread_active_init_mtx);
+	malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx);
 	return (active_init);
 }
 
 bool
-prof_thread_active_init_set(bool active_init)
+prof_thread_active_init_set(tsdn_t *tsdn, bool active_init)
 {
 	bool active_init_old;
 
-	malloc_mutex_lock(&prof_thread_active_init_mtx);
+	malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx);
 	active_init_old = prof_thread_active_init;
 	prof_thread_active_init = active_init;
-	malloc_mutex_unlock(&prof_thread_active_init_mtx);
+	malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx);
 	return (active_init_old);
 }
 
 bool
-prof_gdump_get(void)
+prof_gdump_get(tsdn_t *tsdn)
 {
 	bool prof_gdump_current;
 
-	malloc_mutex_lock(&prof_gdump_mtx);
+	malloc_mutex_lock(tsdn, &prof_gdump_mtx);
 	prof_gdump_current = prof_gdump_val;
-	malloc_mutex_unlock(&prof_gdump_mtx);
+	malloc_mutex_unlock(tsdn, &prof_gdump_mtx);
 	return (prof_gdump_current);
 }
 
 bool
-prof_gdump_set(bool gdump)
+prof_gdump_set(tsdn_t *tsdn, bool gdump)
 {
 	bool prof_gdump_old;
 
-	malloc_mutex_lock(&prof_gdump_mtx);
+	malloc_mutex_lock(tsdn, &prof_gdump_mtx);
 	prof_gdump_old = prof_gdump_val;
 	prof_gdump_val = gdump;
-	malloc_mutex_unlock(&prof_gdump_mtx);
+	malloc_mutex_unlock(tsdn, &prof_gdump_mtx);
 	return (prof_gdump_old);
 }
 
@@ -2115,47 +2189,54 @@
 }
 
 bool
-prof_boot2(void)
+prof_boot2(tsdn_t *tsdn)
 {
 
 	cassert(config_prof);
 
 	if (opt_prof) {
-		tsd_t *tsd;
 		unsigned i;
 
 		lg_prof_sample = opt_lg_prof_sample;
 
 		prof_active = opt_prof_active;
-		if (malloc_mutex_init(&prof_active_mtx))
+		if (malloc_mutex_init(&prof_active_mtx, "prof_active",
+		    WITNESS_RANK_PROF_ACTIVE))
 			return (true);
 
 		prof_gdump_val = opt_prof_gdump;
-		if (malloc_mutex_init(&prof_gdump_mtx))
+		if (malloc_mutex_init(&prof_gdump_mtx, "prof_gdump",
+		    WITNESS_RANK_PROF_GDUMP))
 			return (true);
 
 		prof_thread_active_init = opt_prof_thread_active_init;
-		if (malloc_mutex_init(&prof_thread_active_init_mtx))
+		if (malloc_mutex_init(&prof_thread_active_init_mtx,
+		    "prof_thread_active_init",
+		    WITNESS_RANK_PROF_THREAD_ACTIVE_INIT))
 			return (true);
 
-		tsd = tsd_fetch();
-		if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
+		if (ckh_new(tsdn, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
 		    prof_bt_keycomp))
 			return (true);
-		if (malloc_mutex_init(&bt2gctx_mtx))
+		if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
+		    WITNESS_RANK_PROF_BT2GCTX))
 			return (true);
 
 		tdata_tree_new(&tdatas);
-		if (malloc_mutex_init(&tdatas_mtx))
+		if (malloc_mutex_init(&tdatas_mtx, "prof_tdatas",
+		    WITNESS_RANK_PROF_TDATAS))
 			return (true);
 
 		next_thr_uid = 0;
-		if (malloc_mutex_init(&next_thr_uid_mtx))
+		if (malloc_mutex_init(&next_thr_uid_mtx, "prof_next_thr_uid",
+		    WITNESS_RANK_PROF_NEXT_THR_UID))
 			return (true);
 
-		if (malloc_mutex_init(&prof_dump_seq_mtx))
+		if (malloc_mutex_init(&prof_dump_seq_mtx, "prof_dump_seq",
+		    WITNESS_RANK_PROF_DUMP_SEQ))
 			return (true);
-		if (malloc_mutex_init(&prof_dump_mtx))
+		if (malloc_mutex_init(&prof_dump_mtx, "prof_dump",
+		    WITNESS_RANK_PROF_DUMP))
 			return (true);
 
 		if (opt_prof_final && opt_prof_prefix[0] != '\0' &&
@@ -2165,21 +2246,23 @@
 				abort();
 		}
 
-		gctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS *
-		    sizeof(malloc_mutex_t));
+		gctx_locks = (malloc_mutex_t *)base_alloc(tsdn, PROF_NCTX_LOCKS
+		    * sizeof(malloc_mutex_t));
 		if (gctx_locks == NULL)
 			return (true);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
-			if (malloc_mutex_init(&gctx_locks[i]))
+			if (malloc_mutex_init(&gctx_locks[i], "prof_gctx",
+			    WITNESS_RANK_PROF_GCTX))
 				return (true);
 		}
 
-		tdata_locks = (malloc_mutex_t *)base_alloc(PROF_NTDATA_LOCKS *
-		    sizeof(malloc_mutex_t));
+		tdata_locks = (malloc_mutex_t *)base_alloc(tsdn,
+		    PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t));
 		if (tdata_locks == NULL)
 			return (true);
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
-			if (malloc_mutex_init(&tdata_locks[i]))
+			if (malloc_mutex_init(&tdata_locks[i], "prof_tdata",
+			    WITNESS_RANK_PROF_TDATA))
 				return (true);
 		}
 	}
@@ -2198,76 +2281,77 @@
 }
 
 void
-prof_prefork0(void)
+prof_prefork0(tsdn_t *tsdn)
 {
 
 	if (opt_prof) {
 		unsigned i;
 
-		malloc_mutex_prefork(&prof_dump_mtx);
-		malloc_mutex_prefork(&bt2gctx_mtx);
-		malloc_mutex_prefork(&tdatas_mtx);
+		malloc_mutex_prefork(tsdn, &prof_dump_mtx);
+		malloc_mutex_prefork(tsdn, &bt2gctx_mtx);
+		malloc_mutex_prefork(tsdn, &tdatas_mtx);
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
-			malloc_mutex_prefork(&tdata_locks[i]);
+			malloc_mutex_prefork(tsdn, &tdata_locks[i]);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++)
-			malloc_mutex_prefork(&gctx_locks[i]);
+			malloc_mutex_prefork(tsdn, &gctx_locks[i]);
 	}
 }
 
 void
-prof_prefork1(void)
+prof_prefork1(tsdn_t *tsdn)
 {
 
 	if (opt_prof) {
-		malloc_mutex_prefork(&prof_active_mtx);
-		malloc_mutex_prefork(&prof_dump_seq_mtx);
-		malloc_mutex_prefork(&prof_gdump_mtx);
-		malloc_mutex_prefork(&next_thr_uid_mtx);
-		malloc_mutex_prefork(&prof_thread_active_init_mtx);
+		malloc_mutex_prefork(tsdn, &prof_active_mtx);
+		malloc_mutex_prefork(tsdn, &prof_dump_seq_mtx);
+		malloc_mutex_prefork(tsdn, &prof_gdump_mtx);
+		malloc_mutex_prefork(tsdn, &next_thr_uid_mtx);
+		malloc_mutex_prefork(tsdn, &prof_thread_active_init_mtx);
 	}
 }
 
 void
-prof_postfork_parent(void)
+prof_postfork_parent(tsdn_t *tsdn)
 {
 
 	if (opt_prof) {
 		unsigned i;
 
-		malloc_mutex_postfork_parent(&prof_thread_active_init_mtx);
-		malloc_mutex_postfork_parent(&next_thr_uid_mtx);
-		malloc_mutex_postfork_parent(&prof_gdump_mtx);
-		malloc_mutex_postfork_parent(&prof_dump_seq_mtx);
-		malloc_mutex_postfork_parent(&prof_active_mtx);
+		malloc_mutex_postfork_parent(tsdn,
+		    &prof_thread_active_init_mtx);
+		malloc_mutex_postfork_parent(tsdn, &next_thr_uid_mtx);
+		malloc_mutex_postfork_parent(tsdn, &prof_gdump_mtx);
+		malloc_mutex_postfork_parent(tsdn, &prof_dump_seq_mtx);
+		malloc_mutex_postfork_parent(tsdn, &prof_active_mtx);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++)
-			malloc_mutex_postfork_parent(&gctx_locks[i]);
+			malloc_mutex_postfork_parent(tsdn, &gctx_locks[i]);
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
-			malloc_mutex_postfork_parent(&tdata_locks[i]);
-		malloc_mutex_postfork_parent(&tdatas_mtx);
-		malloc_mutex_postfork_parent(&bt2gctx_mtx);
-		malloc_mutex_postfork_parent(&prof_dump_mtx);
+			malloc_mutex_postfork_parent(tsdn, &tdata_locks[i]);
+		malloc_mutex_postfork_parent(tsdn, &tdatas_mtx);
+		malloc_mutex_postfork_parent(tsdn, &bt2gctx_mtx);
+		malloc_mutex_postfork_parent(tsdn, &prof_dump_mtx);
 	}
 }
 
 void
-prof_postfork_child(void)
+prof_postfork_child(tsdn_t *tsdn)
 {
 
 	if (opt_prof) {
 		unsigned i;
 
-		malloc_mutex_postfork_child(&prof_thread_active_init_mtx);
-		malloc_mutex_postfork_child(&next_thr_uid_mtx);
-		malloc_mutex_postfork_child(&prof_gdump_mtx);
-		malloc_mutex_postfork_child(&prof_dump_seq_mtx);
-		malloc_mutex_postfork_child(&prof_active_mtx);
+		malloc_mutex_postfork_child(tsdn, &prof_thread_active_init_mtx);
+		malloc_mutex_postfork_child(tsdn, &next_thr_uid_mtx);
+		malloc_mutex_postfork_child(tsdn, &prof_gdump_mtx);
+		malloc_mutex_postfork_child(tsdn, &prof_dump_seq_mtx);
+		malloc_mutex_postfork_child(tsdn, &prof_active_mtx);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++)
-			malloc_mutex_postfork_child(&gctx_locks[i]);
+			malloc_mutex_postfork_child(tsdn, &gctx_locks[i]);
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
-			malloc_mutex_postfork_child(&tdata_locks[i]);
-		malloc_mutex_postfork_child(&tdatas_mtx);
-		malloc_mutex_postfork_child(&bt2gctx_mtx);
-		malloc_mutex_postfork_child(&prof_dump_mtx);
+			malloc_mutex_postfork_child(tsdn, &tdata_locks[i]);
+		malloc_mutex_postfork_child(tsdn, &tdatas_mtx);
+		malloc_mutex_postfork_child(tsdn, &bt2gctx_mtx);
+		malloc_mutex_postfork_child(tsdn, &prof_dump_mtx);
 	}
 }
 
diff --git a/src/quarantine.c b/src/quarantine.c
index ff8801c..18903fb 100644
--- a/src/quarantine.c
+++ b/src/quarantine.c
@@ -13,24 +13,22 @@
 /* Function prototypes for non-inline static functions. */
 
 static quarantine_t	*quarantine_grow(tsd_t *tsd, quarantine_t *quarantine);
-static void	quarantine_drain_one(tsd_t *tsd, quarantine_t *quarantine);
-static void	quarantine_drain(tsd_t *tsd, quarantine_t *quarantine,
+static void	quarantine_drain_one(tsdn_t *tsdn, quarantine_t *quarantine);
+static void	quarantine_drain(tsdn_t *tsdn, quarantine_t *quarantine,
     size_t upper_bound);
 
 /******************************************************************************/
 
 static quarantine_t *
-quarantine_init(tsd_t *tsd, size_t lg_maxobjs)
+quarantine_init(tsdn_t *tsdn, size_t lg_maxobjs)
 {
 	quarantine_t *quarantine;
 	size_t size;
 
-	assert(tsd_nominal(tsd));
-
 	size = offsetof(quarantine_t, objs) + ((ZU(1) << lg_maxobjs) *
 	    sizeof(quarantine_obj_t));
-	quarantine = (quarantine_t *)iallocztm(tsd, size, size2index(size),
-	    false, tcache_get(tsd, true), true, NULL, true);
+	quarantine = (quarantine_t *)iallocztm(tsdn, size, size2index(size),
+	    false, NULL, true, arena_get(TSDN_NULL, 0, true), true);
 	if (quarantine == NULL)
 		return (NULL);
 	quarantine->curbytes = 0;
@@ -49,7 +47,7 @@
 	if (!tsd_nominal(tsd))
 		return;
 
-	quarantine = quarantine_init(tsd, LG_MAXOBJS_INIT);
+	quarantine = quarantine_init(tsd_tsdn(tsd), LG_MAXOBJS_INIT);
 	/*
 	 * Check again whether quarantine has been initialized, because
 	 * quarantine_init() may have triggered recursive initialization.
@@ -57,7 +55,7 @@
 	if (tsd_quarantine_get(tsd) == NULL)
 		tsd_quarantine_set(tsd, quarantine);
 	else
-		idalloctm(tsd, quarantine, tcache_get(tsd, false), true, true);
+		idalloctm(tsd_tsdn(tsd), quarantine, NULL, true, true);
 }
 
 static quarantine_t *
@@ -65,9 +63,9 @@
 {
 	quarantine_t *ret;
 
-	ret = quarantine_init(tsd, quarantine->lg_maxobjs + 1);
+	ret = quarantine_init(tsd_tsdn(tsd), quarantine->lg_maxobjs + 1);
 	if (ret == NULL) {
-		quarantine_drain_one(tsd, quarantine);
+		quarantine_drain_one(tsd_tsdn(tsd), quarantine);
 		return (quarantine);
 	}
 
@@ -89,18 +87,18 @@
 		memcpy(&ret->objs[ncopy_a], quarantine->objs, ncopy_b *
 		    sizeof(quarantine_obj_t));
 	}
-	idalloctm(tsd, quarantine, tcache_get(tsd, false), true, true);
+	idalloctm(tsd_tsdn(tsd), quarantine, NULL, true, true);
 
 	tsd_quarantine_set(tsd, ret);
 	return (ret);
 }
 
 static void
-quarantine_drain_one(tsd_t *tsd, quarantine_t *quarantine)
+quarantine_drain_one(tsdn_t *tsdn, quarantine_t *quarantine)
 {
 	quarantine_obj_t *obj = &quarantine->objs[quarantine->first];
-	assert(obj->usize == isalloc(obj->ptr, config_prof));
-	idalloctm(tsd, obj->ptr, NULL, false, true);
+	assert(obj->usize == isalloc(tsdn, obj->ptr, config_prof));
+	idalloctm(tsdn, obj->ptr, NULL, false, true);
 	quarantine->curbytes -= obj->usize;
 	quarantine->curobjs--;
 	quarantine->first = (quarantine->first + 1) & ((ZU(1) <<
@@ -108,24 +106,24 @@
 }
 
 static void
-quarantine_drain(tsd_t *tsd, quarantine_t *quarantine, size_t upper_bound)
+quarantine_drain(tsdn_t *tsdn, quarantine_t *quarantine, size_t upper_bound)
 {
 
 	while (quarantine->curbytes > upper_bound && quarantine->curobjs > 0)
-		quarantine_drain_one(tsd, quarantine);
+		quarantine_drain_one(tsdn, quarantine);
 }
 
 void
 quarantine(tsd_t *tsd, void *ptr)
 {
 	quarantine_t *quarantine;
-	size_t usize = isalloc(ptr, config_prof);
+	size_t usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
 
 	cassert(config_fill);
 	assert(opt_quarantine);
 
 	if ((quarantine = tsd_quarantine_get(tsd)) == NULL) {
-		idalloctm(tsd, ptr, NULL, false, true);
+		idalloctm(tsd_tsdn(tsd), ptr, NULL, false, true);
 		return;
 	}
 	/*
@@ -135,7 +133,7 @@
 	if (quarantine->curbytes + usize > opt_quarantine) {
 		size_t upper_bound = (opt_quarantine >= usize) ? opt_quarantine
 		    - usize : 0;
-		quarantine_drain(tsd, quarantine, upper_bound);
+		quarantine_drain(tsd_tsdn(tsd), quarantine, upper_bound);
 	}
 	/* Grow the quarantine ring buffer if it's full. */
 	if (quarantine->curobjs == (ZU(1) << quarantine->lg_maxobjs))
@@ -160,11 +158,11 @@
 			    && usize <= SMALL_MAXCLASS)
 				arena_quarantine_junk_small(ptr, usize);
 			else
-				memset(ptr, 0x5a, usize);
+				memset(ptr, JEMALLOC_FREE_JUNK, usize);
 		}
 	} else {
 		assert(quarantine->curbytes == 0);
-		idalloctm(tsd, ptr, NULL, false, true);
+		idalloctm(tsd_tsdn(tsd), ptr, NULL, false, true);
 	}
 }
 
@@ -178,8 +176,8 @@
 
 	quarantine = tsd_quarantine_get(tsd);
 	if (quarantine != NULL) {
-		quarantine_drain(tsd, quarantine, 0);
-		idalloctm(tsd, quarantine, tcache_get(tsd, false), true, true);
+		quarantine_drain(tsd_tsdn(tsd), quarantine, 0);
+		idalloctm(tsd_tsdn(tsd), quarantine, NULL, true, true);
 		tsd_quarantine_set(tsd, NULL);
 	}
 }
diff --git a/src/rtree.c b/src/rtree.c
index af0d97e..3166b45 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -15,6 +15,8 @@
 {
 	unsigned bits_in_leaf, height, i;
 
+	assert(RTREE_HEIGHT_MAX == ((ZU(1) << (LG_SIZEOF_PTR+3)) /
+	    RTREE_BITS_PER_LEVEL));
 	assert(bits > 0 && bits <= (sizeof(uintptr_t) << 3));
 
 	bits_in_leaf = (bits % RTREE_BITS_PER_LEVEL) == 0 ? RTREE_BITS_PER_LEVEL
diff --git a/src/stats.c b/src/stats.c
index 87b09e5..073be4f 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -259,7 +259,7 @@
 	unsigned nthreads;
 	const char *dss;
 	ssize_t lg_dirty_mult, decay_time;
-	size_t page, pactive, pdirty, mapped;
+	size_t page, pactive, pdirty, mapped, retained;
 	size_t metadata_mapped, metadata_allocated;
 	uint64_t npurge, nmadvise, purged;
 	size_t small_allocated;
@@ -349,6 +349,9 @@
 	CTL_M2_GET("stats.arenas.0.mapped", i, &mapped, size_t);
 	malloc_cprintf(write_cb, cbopaque,
 	    "mapped:                  %12zu\n", mapped);
+	CTL_M2_GET("stats.arenas.0.retained", i, &retained, size_t);
+	malloc_cprintf(write_cb, cbopaque,
+	    "retained:                %12zu\n", retained);
 	CTL_M2_GET("stats.arenas.0.metadata.mapped", i, &metadata_mapped,
 	    size_t);
 	CTL_M2_GET("stats.arenas.0.metadata.allocated", i, &metadata_allocated,
@@ -597,7 +600,7 @@
 
 	if (config_stats) {
 		size_t *cactive;
-		size_t allocated, active, metadata, resident, mapped;
+		size_t allocated, active, metadata, resident, mapped, retained;
 
 		CTL_GET("stats.cactive", &cactive, size_t *);
 		CTL_GET("stats.allocated", &allocated, size_t);
@@ -605,10 +608,11 @@
 		CTL_GET("stats.metadata", &metadata, size_t);
 		CTL_GET("stats.resident", &resident, size_t);
 		CTL_GET("stats.mapped", &mapped, size_t);
+		CTL_GET("stats.retained", &retained, size_t);
 		malloc_cprintf(write_cb, cbopaque,
 		    "Allocated: %zu, active: %zu, metadata: %zu,"
-		    " resident: %zu, mapped: %zu\n",
-		    allocated, active, metadata, resident, mapped);
+		    " resident: %zu, mapped: %zu, retained: %zu\n",
+		    allocated, active, metadata, resident, mapped, retained);
 		malloc_cprintf(write_cb, cbopaque,
 		    "Current active ceiling: %zu\n",
 		    atomic_read_z(cactive));
diff --git a/src/tcache.c b/src/tcache.c
index 6e32f40..175759c 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -23,10 +23,11 @@
 
 /******************************************************************************/
 
-size_t	tcache_salloc(const void *ptr)
+size_t
+tcache_salloc(tsdn_t *tsdn, const void *ptr)
 {
 
-	return (arena_salloc(ptr, false));
+	return (arena_salloc(tsdn, ptr, false));
 }
 
 void
@@ -70,12 +71,12 @@
 }
 
 void *
-tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
+tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     tcache_bin_t *tbin, szind_t binind, bool *tcache_success)
 {
 	void *ret;
 
-	arena_tcache_fill_small(tsd, arena, tbin, binind, config_prof ?
+	arena_tcache_fill_small(tsdn, arena, tbin, binind, config_prof ?
 	    tcache->prof_accumbytes : 0);
 	if (config_prof)
 		tcache->prof_accumbytes = 0;
@@ -106,12 +107,13 @@
 		arena_bin_t *bin = &bin_arena->bins[binind];
 
 		if (config_prof && bin_arena == arena) {
-			if (arena_prof_accum(arena, tcache->prof_accumbytes))
-				prof_idump();
+			if (arena_prof_accum(tsd_tsdn(tsd), arena,
+			    tcache->prof_accumbytes))
+				prof_idump(tsd_tsdn(tsd));
 			tcache->prof_accumbytes = 0;
 		}
 
-		malloc_mutex_lock(&bin->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 		if (config_stats && bin_arena == arena) {
 			assert(!merged_stats);
 			merged_stats = true;
@@ -128,9 +130,9 @@
 				size_t pageind = ((uintptr_t)ptr -
 				    (uintptr_t)chunk) >> LG_PAGE;
 				arena_chunk_map_bits_t *bitselm =
-				    arena_bitselm_get(chunk, pageind);
-				arena_dalloc_bin_junked_locked(bin_arena, chunk,
-				    ptr, bitselm);
+				    arena_bitselm_get_mutable(chunk, pageind);
+				arena_dalloc_bin_junked_locked(tsd_tsdn(tsd),
+				    bin_arena, chunk, ptr, bitselm);
 			} else {
 				/*
 				 * This object was allocated via a different
@@ -142,8 +144,8 @@
 				ndeferred++;
 			}
 		}
-		malloc_mutex_unlock(&bin->lock);
-		arena_decay_ticks(tsd, bin_arena, nflush - ndeferred);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
+		arena_decay_ticks(tsd_tsdn(tsd), bin_arena, nflush - ndeferred);
 	}
 	if (config_stats && !merged_stats) {
 		/*
@@ -151,11 +153,11 @@
 		 * arena, so the stats didn't get merged.  Manually do so now.
 		 */
 		arena_bin_t *bin = &arena->bins[binind];
-		malloc_mutex_lock(&bin->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 		bin->stats.nflushes++;
 		bin->stats.nrequests += tbin->tstats.nrequests;
 		tbin->tstats.nrequests = 0;
-		malloc_mutex_unlock(&bin->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 	}
 
 	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
@@ -188,7 +190,7 @@
 
 		if (config_prof)
 			idump = false;
-		malloc_mutex_lock(&locked_arena->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->lock);
 		if ((config_prof || config_stats) && locked_arena == arena) {
 			if (config_prof) {
 				idump = arena_prof_accum_locked(arena,
@@ -211,8 +213,8 @@
 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 			if (extent_node_arena_get(&chunk->node) ==
 			    locked_arena) {
-				arena_dalloc_large_junked_locked(locked_arena,
-				    chunk, ptr);
+				arena_dalloc_large_junked_locked(tsd_tsdn(tsd),
+				    locked_arena, chunk, ptr);
 			} else {
 				/*
 				 * This object was allocated via a different
@@ -224,22 +226,23 @@
 				ndeferred++;
 			}
 		}
-		malloc_mutex_unlock(&locked_arena->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &locked_arena->lock);
 		if (config_prof && idump)
-			prof_idump();
-		arena_decay_ticks(tsd, locked_arena, nflush - ndeferred);
+			prof_idump(tsd_tsdn(tsd));
+		arena_decay_ticks(tsd_tsdn(tsd), locked_arena, nflush -
+		    ndeferred);
 	}
 	if (config_stats && !merged_stats) {
 		/*
 		 * The flush loop didn't happen to flush to this thread's
 		 * arena, so the stats didn't get merged.  Manually do so now.
 		 */
-		malloc_mutex_lock(&arena->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), &arena->lock);
 		arena->stats.nrequests_large += tbin->tstats.nrequests;
 		arena->stats.lstats[binind - NBINS].nrequests +=
 		    tbin->tstats.nrequests;
 		tbin->tstats.nrequests = 0;
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
 	}
 
 	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
@@ -249,34 +252,26 @@
 		tbin->low_water = tbin->ncached;
 }
 
-void
-tcache_arena_associate(tcache_t *tcache, arena_t *arena)
+static void
+tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
 {
 
 	if (config_stats) {
 		/* Link into list of extant tcaches. */
-		malloc_mutex_lock(&arena->lock);
+		malloc_mutex_lock(tsdn, &arena->lock);
 		ql_elm_new(tcache, link);
 		ql_tail_insert(&arena->tcache_ql, tcache, link);
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 	}
 }
 
-void
-tcache_arena_reassociate(tcache_t *tcache, arena_t *oldarena, arena_t *newarena)
-{
-
-	tcache_arena_dissociate(tcache, oldarena);
-	tcache_arena_associate(tcache, newarena);
-}
-
-void
-tcache_arena_dissociate(tcache_t *tcache, arena_t *arena)
+static void
+tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
 {
 
 	if (config_stats) {
 		/* Unlink from list of extant tcaches. */
-		malloc_mutex_lock(&arena->lock);
+		malloc_mutex_lock(tsdn, &arena->lock);
 		if (config_debug) {
 			bool in_ql = false;
 			tcache_t *iter;
@@ -289,11 +284,20 @@
 			assert(in_ql);
 		}
 		ql_remove(&arena->tcache_ql, tcache, link);
-		tcache_stats_merge(tcache, arena);
-		malloc_mutex_unlock(&arena->lock);
+		tcache_stats_merge(tsdn, tcache, arena);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 	}
 }
 
+void
+tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *oldarena,
+    arena_t *newarena)
+{
+
+	tcache_arena_dissociate(tsdn, tcache, oldarena);
+	tcache_arena_associate(tsdn, tcache, newarena);
+}
+
 tcache_t *
 tcache_get_hard(tsd_t *tsd)
 {
@@ -307,11 +311,11 @@
 	arena = arena_choose(tsd, NULL);
 	if (unlikely(arena == NULL))
 		return (NULL);
-	return (tcache_create(tsd, arena));
+	return (tcache_create(tsd_tsdn(tsd), arena));
 }
 
 tcache_t *
-tcache_create(tsd_t *tsd, arena_t *arena)
+tcache_create(tsdn_t *tsdn, arena_t *arena)
 {
 	tcache_t *tcache;
 	size_t size, stack_offset;
@@ -325,12 +329,12 @@
 	/* Avoid false cacheline sharing. */
 	size = sa2u(size, CACHELINE);
 
-	tcache = ipallocztm(tsd, size, CACHELINE, true, false, true,
-	    arena_get(0, false));
+	tcache = ipallocztm(tsdn, size, CACHELINE, true, NULL, true,
+	    arena_get(TSDN_NULL, 0, true));
 	if (tcache == NULL)
 		return (NULL);
 
-	tcache_arena_associate(tcache, arena);
+	tcache_arena_associate(tsdn, tcache, arena);
 
 	ticker_init(&tcache->gc_ticker, TCACHE_GC_INCR);
 
@@ -357,7 +361,7 @@
 	unsigned i;
 
 	arena = arena_choose(tsd, NULL);
-	tcache_arena_dissociate(tcache, arena);
+	tcache_arena_dissociate(tsd_tsdn(tsd), tcache, arena);
 
 	for (i = 0; i < NBINS; i++) {
 		tcache_bin_t *tbin = &tcache->tbins[i];
@@ -365,9 +369,9 @@
 
 		if (config_stats && tbin->tstats.nrequests != 0) {
 			arena_bin_t *bin = &arena->bins[i];
-			malloc_mutex_lock(&bin->lock);
+			malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 			bin->stats.nrequests += tbin->tstats.nrequests;
-			malloc_mutex_unlock(&bin->lock);
+			malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 		}
 	}
 
@@ -376,19 +380,19 @@
 		tcache_bin_flush_large(tsd, tbin, i, 0, tcache);
 
 		if (config_stats && tbin->tstats.nrequests != 0) {
-			malloc_mutex_lock(&arena->lock);
+			malloc_mutex_lock(tsd_tsdn(tsd), &arena->lock);
 			arena->stats.nrequests_large += tbin->tstats.nrequests;
 			arena->stats.lstats[i - NBINS].nrequests +=
 			    tbin->tstats.nrequests;
-			malloc_mutex_unlock(&arena->lock);
+			malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
 		}
 	}
 
 	if (config_prof && tcache->prof_accumbytes > 0 &&
-	    arena_prof_accum(arena, tcache->prof_accumbytes))
-		prof_idump();
+	    arena_prof_accum(tsd_tsdn(tsd), arena, tcache->prof_accumbytes))
+		prof_idump(tsd_tsdn(tsd));
 
-	idalloctm(tsd, tcache, false, true, true);
+	idalloctm(tsd_tsdn(tsd), tcache, NULL, true, true);
 }
 
 void
@@ -412,21 +416,22 @@
 	/* Do nothing. */
 }
 
-/* Caller must own arena->lock. */
 void
-tcache_stats_merge(tcache_t *tcache, arena_t *arena)
+tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
 {
 	unsigned i;
 
 	cassert(config_stats);
 
+	malloc_mutex_assert_owner(tsdn, &arena->lock);
+
 	/* Merge and reset tcache stats. */
 	for (i = 0; i < NBINS; i++) {
 		arena_bin_t *bin = &arena->bins[i];
 		tcache_bin_t *tbin = &tcache->tbins[i];
-		malloc_mutex_lock(&bin->lock);
+		malloc_mutex_lock(tsdn, &bin->lock);
 		bin->stats.nrequests += tbin->tstats.nrequests;
-		malloc_mutex_unlock(&bin->lock);
+		malloc_mutex_unlock(tsdn, &bin->lock);
 		tbin->tstats.nrequests = 0;
 	}
 
@@ -440,13 +445,14 @@
 }
 
 bool
-tcaches_create(tsd_t *tsd, unsigned *r_ind)
+tcaches_create(tsdn_t *tsdn, unsigned *r_ind)
 {
+	arena_t *arena;
 	tcache_t *tcache;
 	tcaches_t *elm;
 
 	if (tcaches == NULL) {
-		tcaches = base_alloc(sizeof(tcache_t *) *
+		tcaches = base_alloc(tsdn, sizeof(tcache_t *) *
 		    (MALLOCX_TCACHE_MAX+1));
 		if (tcaches == NULL)
 			return (true);
@@ -454,7 +460,10 @@
 
 	if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX)
 		return (true);
-	tcache = tcache_create(tsd, arena_get(0, false));
+	arena = arena_ichoose(tsdn, NULL);
+	if (unlikely(arena == NULL))
+		return (true);
+	tcache = tcache_create(tsdn, arena);
 	if (tcache == NULL)
 		return (true);
 
@@ -500,7 +509,7 @@
 }
 
 bool
-tcache_boot(void)
+tcache_boot(tsdn_t *tsdn)
 {
 	unsigned i;
 
@@ -518,7 +527,7 @@
 	nhbins = size2index(tcache_maxclass) + 1;
 
 	/* Initialize tcache_bin_info. */
-	tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins *
+	tcache_bin_info = (tcache_bin_info_t *)base_alloc(tsdn, nhbins *
 	    sizeof(tcache_bin_info_t));
 	if (tcache_bin_info == NULL)
 		return (true);
diff --git a/src/tsd.c b/src/tsd.c
index 34c1573..aeaa5e1 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -77,7 +77,7 @@
 		/* Do nothing. */
 		break;
 	case tsd_state_nominal:
-#define O(n, t)								\
+#define	O(n, t)								\
 		n##_cleanup(tsd);
 MALLOC_TSD
 #undef O
@@ -106,15 +106,17 @@
 	}
 }
 
-bool
+tsd_t *
 malloc_tsd_boot0(void)
 {
+	tsd_t *tsd;
 
 	ncleanups = 0;
 	if (tsd_boot0())
-		return (true);
-	*tsd_arenas_tdata_bypassp_get(tsd_fetch()) = true;
-	return (false);
+		return (NULL);
+	tsd = tsd_fetch();
+	*tsd_arenas_tdata_bypassp_get(tsd) = true;
+	return (tsd);
 }
 
 void
@@ -169,10 +171,10 @@
 	tsd_init_block_t *iter;
 
 	/* Check whether this thread has already inserted into the list. */
-	malloc_mutex_lock(&head->lock);
+	malloc_mutex_lock(NULL, &head->lock);
 	ql_foreach(iter, &head->blocks, link) {
 		if (iter->thread == self) {
-			malloc_mutex_unlock(&head->lock);
+			malloc_mutex_unlock(NULL, &head->lock);
 			return (iter->data);
 		}
 	}
@@ -180,7 +182,7 @@
 	ql_elm_new(block, link);
 	block->thread = self;
 	ql_tail_insert(&head->blocks, block, link);
-	malloc_mutex_unlock(&head->lock);
+	malloc_mutex_unlock(NULL, &head->lock);
 	return (NULL);
 }
 
@@ -188,8 +190,8 @@
 tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block)
 {
 
-	malloc_mutex_lock(&head->lock);
+	malloc_mutex_lock(NULL, &head->lock);
 	ql_remove(&head->blocks, block, link);
-	malloc_mutex_unlock(&head->lock);
+	malloc_mutex_unlock(NULL, &head->lock);
 }
 #endif
diff --git a/src/util.c b/src/util.c
index 02673c7..a1c4a2a 100644
--- a/src/util.c
+++ b/src/util.c
@@ -14,6 +14,7 @@
 		malloc_write("<jemalloc>: Unreachable code reached\n");	\
 		abort();						\
 	}								\
+	unreachable();							\
 } while (0)
 
 #define	not_implemented() do {						\
@@ -314,10 +315,9 @@
 	return (s);
 }
 
-int
+size_t
 malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
 {
-	int ret;
 	size_t i;
 	const char *f;
 
@@ -408,6 +408,8 @@
 			int prec = -1;
 			int width = -1;
 			unsigned char len = '?';
+			char *s;
+			size_t slen;
 
 			f++;
 			/* Flags. */
@@ -498,8 +500,6 @@
 			}
 			/* Conversion specifier. */
 			switch (*f) {
-				char *s;
-				size_t slen;
 			case '%':
 				/* %% */
 				APPEND_C(*f);
@@ -585,21 +585,19 @@
 		str[i] = '\0';
 	else
 		str[size - 1] = '\0';
-	assert(i < INT_MAX);
-	ret = (int)i;
 
 #undef APPEND_C
 #undef APPEND_S
 #undef APPEND_PADDED_S
 #undef GET_ARG_NUMERIC
-	return (ret);
+	return (i);
 }
 
 JEMALLOC_FORMAT_PRINTF(3, 4)
-int
+size_t
 malloc_snprintf(char *str, size_t size, const char *format, ...)
 {
-	int ret;
+	size_t ret;
 	va_list ap;
 
 	va_start(ap, format);
diff --git a/src/witness.c b/src/witness.c
new file mode 100644
index 0000000..23753f2
--- /dev/null
+++ b/src/witness.c
@@ -0,0 +1,136 @@
+#define	JEMALLOC_WITNESS_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+void
+witness_init(witness_t *witness, const char *name, witness_rank_t rank,
+    witness_comp_t *comp)
+{
+
+	witness->name = name;
+	witness->rank = rank;
+	witness->comp = comp;
+}
+
+#ifdef JEMALLOC_JET
+#undef witness_lock_error
+#define	witness_lock_error JEMALLOC_N(n_witness_lock_error)
+#endif
+void
+witness_lock_error(const witness_list_t *witnesses, const witness_t *witness)
+{
+	witness_t *w;
+
+	malloc_printf("<jemalloc>: Lock rank order reversal:");
+	ql_foreach(w, witnesses, link) {
+		malloc_printf(" %s(%u)", w->name, w->rank);
+	}
+	malloc_printf(" %s(%u)\n", witness->name, witness->rank);
+	abort();
+}
+#ifdef JEMALLOC_JET
+#undef witness_lock_error
+#define	witness_lock_error JEMALLOC_N(witness_lock_error)
+witness_lock_error_t *witness_lock_error = JEMALLOC_N(n_witness_lock_error);
+#endif
+
+#ifdef JEMALLOC_JET
+#undef witness_owner_error
+#define	witness_owner_error JEMALLOC_N(n_witness_owner_error)
+#endif
+void
+witness_owner_error(const witness_t *witness)
+{
+
+	malloc_printf("<jemalloc>: Should own %s(%u)\n", witness->name,
+	    witness->rank);
+	abort();
+}
+#ifdef JEMALLOC_JET
+#undef witness_owner_error
+#define	witness_owner_error JEMALLOC_N(witness_owner_error)
+witness_owner_error_t *witness_owner_error = JEMALLOC_N(n_witness_owner_error);
+#endif
+
+#ifdef JEMALLOC_JET
+#undef witness_not_owner_error
+#define	witness_not_owner_error JEMALLOC_N(n_witness_not_owner_error)
+#endif
+void
+witness_not_owner_error(const witness_t *witness)
+{
+
+	malloc_printf("<jemalloc>: Should not own %s(%u)\n", witness->name,
+	    witness->rank);
+	abort();
+}
+#ifdef JEMALLOC_JET
+#undef witness_not_owner_error
+#define	witness_not_owner_error JEMALLOC_N(witness_not_owner_error)
+witness_not_owner_error_t *witness_not_owner_error =
+    JEMALLOC_N(n_witness_not_owner_error);
+#endif
+
+#ifdef JEMALLOC_JET
+#undef witness_lockless_error
+#define	witness_lockless_error JEMALLOC_N(n_witness_lockless_error)
+#endif
+void
+witness_lockless_error(const witness_list_t *witnesses)
+{
+	witness_t *w;
+
+	malloc_printf("<jemalloc>: Should not own any locks:");
+	ql_foreach(w, witnesses, link) {
+		malloc_printf(" %s(%u)", w->name, w->rank);
+	}
+	malloc_printf("\n");
+	abort();
+}
+#ifdef JEMALLOC_JET
+#undef witness_lockless_error
+#define	witness_lockless_error JEMALLOC_N(witness_lockless_error)
+witness_lockless_error_t *witness_lockless_error =
+    JEMALLOC_N(n_witness_lockless_error);
+#endif
+
+void
+witnesses_cleanup(tsd_t *tsd)
+{
+
+	witness_assert_lockless(tsd_tsdn(tsd));
+
+	/* Do nothing. */
+}
+
+void
+witness_fork_cleanup(tsd_t *tsd)
+{
+
+	/* Do nothing. */
+}
+
+void
+witness_prefork(tsd_t *tsd)
+{
+
+	tsd_witness_fork_set(tsd, true);
+}
+
+void
+witness_postfork_parent(tsd_t *tsd)
+{
+
+	tsd_witness_fork_set(tsd, false);
+}
+
+void
+witness_postfork_child(tsd_t *tsd)
+{
+#ifndef JEMALLOC_MUTEX_INIT_CB
+	witness_list_t *witnesses;
+
+	witnesses = tsd_witnessesp_get(tsd);
+	ql_new(witnesses);
+#endif
+	tsd_witness_fork_set(tsd, false);
+}
diff --git a/src/zone.c b/src/zone.c
index 6859b3f..2c17123 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -56,7 +56,7 @@
 	 * not work in practice, we must check all pointers to assure that they
 	 * reside within a mapped chunk before determining size.
 	 */
-	return (ivsalloc(ptr, config_prof));
+	return (ivsalloc(tsdn_fetch(), ptr, config_prof));
 }
 
 static void *
@@ -87,7 +87,7 @@
 zone_free(malloc_zone_t *zone, void *ptr)
 {
 
-	if (ivsalloc(ptr, config_prof) != 0) {
+	if (ivsalloc(tsdn_fetch(), ptr, config_prof) != 0) {
 		je_free(ptr);
 		return;
 	}
@@ -99,7 +99,7 @@
 zone_realloc(malloc_zone_t *zone, void *ptr, size_t size)
 {
 
-	if (ivsalloc(ptr, config_prof) != 0)
+	if (ivsalloc(tsdn_fetch(), ptr, config_prof) != 0)
 		return (je_realloc(ptr, size));
 
 	return (realloc(ptr, size));
@@ -123,7 +123,7 @@
 {
 	size_t alloc_size;
 
-	alloc_size = ivsalloc(ptr, config_prof);
+	alloc_size = ivsalloc(tsdn_fetch(), ptr, config_prof);
 	if (alloc_size != 0) {
 		assert(alloc_size == size);
 		je_free(ptr);
diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in
index 0a3dbea..1f36e46 100644
--- a/test/include/test/jemalloc_test.h.in
+++ b/test/include/test/jemalloc_test.h.in
@@ -19,39 +19,6 @@
 #  include <pthread.h>
 #endif
 
-/******************************************************************************/
-/*
- * Define always-enabled assertion macros, so that test assertions execute even
- * if assertions are disabled in the library code.  These definitions must
- * exist prior to including "jemalloc/internal/util.h".
- */
-#define	assert(e) do {							\
-	if (!(e)) {							\
-		malloc_printf(						\
-		    "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n",	\
-		    __FILE__, __LINE__, #e);				\
-		abort();						\
-	}								\
-} while (0)
-
-#define	not_reached() do {						\
-	malloc_printf(							\
-	    "<jemalloc>: %s:%d: Unreachable code reached\n",		\
-	    __FILE__, __LINE__);					\
-	abort();							\
-} while (0)
-
-#define	not_implemented() do {						\
-	malloc_printf("<jemalloc>: %s:%d: Not implemented\n",		\
-	    __FILE__, __LINE__);					\
-	abort();							\
-} while (0)
-
-#define	assert_not_implemented(e) do {					\
-	if (!(e))							\
-		not_implemented();					\
-} while (0)
-
 #include "test/jemalloc_test_defs.h"
 
 #ifdef JEMALLOC_OSSPIN
@@ -86,6 +53,14 @@
 #  include "jemalloc/internal/jemalloc_internal_defs.h"
 #  include "jemalloc/internal/jemalloc_internal_macros.h"
 
+static const bool config_debug =
+#ifdef JEMALLOC_DEBUG
+    true
+#else
+    false
+#endif
+    ;
+
 #  define JEMALLOC_N(n) @private_namespace@##n
 #  include "jemalloc/internal/private_namespace.h"
 
@@ -149,3 +124,40 @@
 #include "test/thd.h"
 #define	MEXP 19937
 #include "test/SFMT.h"
+
+/******************************************************************************/
+/*
+ * Define always-enabled assertion macros, so that test assertions execute even
+ * if assertions are disabled in the library code.
+ */
+#undef assert
+#undef not_reached
+#undef not_implemented
+#undef assert_not_implemented
+
+#define	assert(e) do {							\
+	if (!(e)) {							\
+		malloc_printf(						\
+		    "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n",	\
+		    __FILE__, __LINE__, #e);				\
+		abort();						\
+	}								\
+} while (0)
+
+#define	not_reached() do {						\
+	malloc_printf(							\
+	    "<jemalloc>: %s:%d: Unreachable code reached\n",		\
+	    __FILE__, __LINE__);					\
+	abort();							\
+} while (0)
+
+#define	not_implemented() do {						\
+	malloc_printf("<jemalloc>: %s:%d: Not implemented\n",		\
+	    __FILE__, __LINE__);					\
+	abort();							\
+} while (0)
+
+#define	assert_not_implemented(e) do {					\
+	if (!(e))							\
+		not_implemented();					\
+} while (0)
diff --git a/test/include/test/test.h b/test/include/test/test.h
index 3cf901f..c8112eb 100644
--- a/test/include/test/test.h
+++ b/test/include/test/test.h
@@ -311,6 +311,9 @@
 #define	test(...)							\
 	p_test(__VA_ARGS__, NULL)
 
+#define	test_no_malloc_init(...)					\
+	p_test_no_malloc_init(__VA_ARGS__, NULL)
+
 #define	test_skip_if(e) do {						\
 	if (e) {							\
 		test_skip("%s:%s:%d: Test skipped: (%s)",		\
@@ -324,6 +327,7 @@
 
 /* For private use by macros. */
 test_status_t	p_test(test_t *t, ...);
+test_status_t	p_test_no_malloc_init(test_t *t, ...);
 void	p_test_init(const char *name);
 void	p_test_fini(void);
 void	p_test_fail(const char *prefix, const char *message);
diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index c185cc6..55e1a09 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -1,5 +1,9 @@
 #include "test/jemalloc_test.h"
 
+#ifdef JEMALLOC_FILL
+const char *malloc_conf = "junk:false";
+#endif
+
 static unsigned
 get_nsizes_impl(const char *cmd)
 {
@@ -69,7 +73,7 @@
 
 TEST_BEGIN(test_oom)
 {
-	size_t hugemax, size, alignment;
+	size_t hugemax;
 	bool oom;
 	void *ptrs[3];
 	unsigned i;
@@ -94,15 +98,16 @@
 	}
 
 #if LG_SIZEOF_PTR == 3
-	size      = ZU(0x8000000000000000);
-	alignment = ZU(0x8000000000000000);
+	assert_ptr_null(mallocx(0x8000000000000000ULL,
+	    MALLOCX_ALIGN(0x8000000000000000ULL)),
+	    "Expected OOM for mallocx()");
+	assert_ptr_null(mallocx(0x8000000000000000ULL,
+	    MALLOCX_ALIGN(0x80000000)),
+	    "Expected OOM for mallocx()");
 #else
-	size      = ZU(0x80000000);
-	alignment = ZU(0x80000000);
+	assert_ptr_null(mallocx(0x80000000UL, MALLOCX_ALIGN(0x80000000UL)),
+	    "Expected OOM for mallocx()");
 #endif
-	assert_ptr_null(mallocx(size, MALLOCX_ALIGN(alignment)),
-	    "Expected OOM for mallocx(size=%#zx, MALLOCX_ALIGN(%#zx)", size,
-	    alignment);
 }
 TEST_END
 
diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c
index 5c4998b..ad292bb 100644
--- a/test/integration/xallocx.c
+++ b/test/integration/xallocx.c
@@ -1,5 +1,9 @@
 #include "test/jemalloc_test.h"
 
+#ifdef JEMALLOC_FILL
+const char *malloc_conf = "junk:false";
+#endif
+
 /*
  * Use a separate arena for xallocx() extension/contraction tests so that
  * internal allocation e.g. by heap profiling can't interpose allocations where
diff --git a/test/src/test.c b/test/src/test.c
index 8173614..d70cc75 100644
--- a/test/src/test.c
+++ b/test/src/test.c
@@ -60,32 +60,30 @@
 	malloc_printf("%s: %s\n", test_name, test_status_string(test_status));
 }
 
-test_status_t
-p_test(test_t *t, ...)
+static test_status_t
+p_test_impl(bool do_malloc_init, test_t *t, va_list ap)
 {
 	test_status_t ret;
-	va_list ap;
 
-	/*
-	 * Make sure initialization occurs prior to running tests.  Tests are
-	 * special because they may use internal facilities prior to triggering
-	 * initialization as a side effect of calling into the public API.  This
-	 * is a final safety that works even if jemalloc_constructor() doesn't
-	 * run, as for MSVC builds.
-	 */
-	if (nallocx(1, 0) == 0) {
-		malloc_printf("Initialization error");
-		return (test_status_fail);
+	if (do_malloc_init) {
+		/*
+		 * Make sure initialization occurs prior to running tests.
+		 * Tests are special because they may use internal facilities
+		 * prior to triggering initialization as a side effect of
+		 * calling into the public API.
+		 */
+		if (nallocx(1, 0) == 0) {
+			malloc_printf("Initialization error");
+			return (test_status_fail);
+		}
 	}
 
 	ret = test_status_pass;
-	va_start(ap, t);
 	for (; t != NULL; t = va_arg(ap, test_t *)) {
 		t();
 		if (test_status > ret)
 			ret = test_status;
 	}
-	va_end(ap);
 
 	malloc_printf("--- %s: %u/%u, %s: %u/%u, %s: %u/%u ---\n",
 	    test_status_string(test_status_pass),
@@ -98,6 +96,34 @@
 	return (ret);
 }
 
+test_status_t
+p_test(test_t *t, ...)
+{
+	test_status_t ret;
+	va_list ap;
+
+	ret = test_status_pass;
+	va_start(ap, t);
+	ret = p_test_impl(true, t, ap);
+	va_end(ap);
+
+	return (ret);
+}
+
+test_status_t
+p_test_no_malloc_init(test_t *t, ...)
+{
+	test_status_t ret;
+	va_list ap;
+
+	ret = test_status_pass;
+	va_start(ap, t);
+	ret = p_test_impl(false, t, ap);
+	va_end(ap);
+
+	return (ret);
+}
+
 void
 p_test_fail(const char *prefix, const char *message)
 {
diff --git a/test/src/timer.c b/test/src/timer.c
index e91b3cf..3c7e63a 100644
--- a/test/src/timer.c
+++ b/test/src/timer.c
@@ -32,9 +32,8 @@
 	uint64_t t0 = timer_usec(a);
 	uint64_t t1 = timer_usec(b);
 	uint64_t mult;
-	unsigned i = 0;
-	unsigned j;
-	int n;
+	size_t i = 0;
+	size_t j, n;
 
 	/* Whole. */
 	n = malloc_snprintf(&buf[i], buflen-i, "%"FMTu64, t0 / t1);
diff --git a/test/stress/microbench.c b/test/stress/microbench.c
index ee39fea..7dc45f8 100644
--- a/test/stress/microbench.c
+++ b/test/stress/microbench.c
@@ -1,7 +1,8 @@
 #include "test/jemalloc_test.h"
 
 JEMALLOC_INLINE_C void
-time_func(timedelta_t *timer, uint64_t nwarmup, uint64_t niter, void (*func)(void))
+time_func(timedelta_t *timer, uint64_t nwarmup, uint64_t niter,
+    void (*func)(void))
 {
 	uint64_t i;
 
diff --git a/test/unit/a0.c b/test/unit/a0.c
new file mode 100644
index 0000000..b9ba45a
--- /dev/null
+++ b/test/unit/a0.c
@@ -0,0 +1,19 @@
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_a0)
+{
+	void *p;
+
+	p = a0malloc(1);
+	assert_ptr_not_null(p, "Unexpected a0malloc() error");
+	a0dalloc(p);
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test_no_malloc_init(
+	    test_a0));
+}
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
new file mode 100644
index 0000000..8ba36c2
--- /dev/null
+++ b/test/unit/arena_reset.c
@@ -0,0 +1,159 @@
+#include "test/jemalloc_test.h"
+
+#ifdef JEMALLOC_PROF
+const char *malloc_conf = "prof:true,lg_prof_sample:0";
+#endif
+
+static unsigned
+get_nsizes_impl(const char *cmd)
+{
+	unsigned ret;
+	size_t z;
+
+	z = sizeof(unsigned);
+	assert_d_eq(mallctl(cmd, &ret, &z, NULL, 0), 0,
+	    "Unexpected mallctl(\"%s\", ...) failure", cmd);
+
+	return (ret);
+}
+
+static unsigned
+get_nsmall(void)
+{
+
+	return (get_nsizes_impl("arenas.nbins"));
+}
+
+static unsigned
+get_nlarge(void)
+{
+
+	return (get_nsizes_impl("arenas.nlruns"));
+}
+
+static unsigned
+get_nhuge(void)
+{
+
+	return (get_nsizes_impl("arenas.nhchunks"));
+}
+
+static size_t
+get_size_impl(const char *cmd, size_t ind)
+{
+	size_t ret;
+	size_t z;
+	size_t mib[4];
+	size_t miblen = 4;
+
+	z = sizeof(size_t);
+	assert_d_eq(mallctlnametomib(cmd, mib, &miblen),
+	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+	mib[2] = ind;
+	z = sizeof(size_t);
+	assert_d_eq(mallctlbymib(mib, miblen, &ret, &z, NULL, 0),
+	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
+
+	return (ret);
+}
+
+static size_t
+get_small_size(size_t ind)
+{
+
+	return (get_size_impl("arenas.bin.0.size", ind));
+}
+
+static size_t
+get_large_size(size_t ind)
+{
+
+	return (get_size_impl("arenas.lrun.0.size", ind));
+}
+
+static size_t
+get_huge_size(size_t ind)
+{
+
+	return (get_size_impl("arenas.hchunk.0.size", ind));
+}
+
+TEST_BEGIN(test_arena_reset)
+{
+#define	NHUGE	4
+	unsigned arena_ind, nsmall, nlarge, nhuge, nptrs, i;
+	size_t sz, miblen;
+	void **ptrs;
+	int flags;
+	size_t mib[3];
+	tsdn_t *tsdn;
+
+	test_skip_if((config_valgrind && unlikely(in_valgrind)) || (config_fill
+	    && unlikely(opt_quarantine)));
+
+	sz = sizeof(unsigned);
+	assert_d_eq(mallctl("arenas.extend", &arena_ind, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+
+	flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+
+	nsmall = get_nsmall();
+	nlarge = get_nlarge();
+	nhuge = get_nhuge() > NHUGE ? NHUGE : get_nhuge();
+	nptrs = nsmall + nlarge + nhuge;
+	ptrs = (void **)malloc(nptrs * sizeof(void *));
+	assert_ptr_not_null(ptrs, "Unexpected malloc() failure");
+
+	/* Allocate objects with a wide range of sizes. */
+	for (i = 0; i < nsmall; i++) {
+		sz = get_small_size(i);
+		ptrs[i] = mallocx(sz, flags);
+		assert_ptr_not_null(ptrs[i],
+		    "Unexpected mallocx(%zu, %#x) failure", sz, flags);
+	}
+	for (i = 0; i < nlarge; i++) {
+		sz = get_large_size(i);
+		ptrs[nsmall + i] = mallocx(sz, flags);
+		assert_ptr_not_null(ptrs[i],
+		    "Unexpected mallocx(%zu, %#x) failure", sz, flags);
+	}
+	for (i = 0; i < nhuge; i++) {
+		sz = get_huge_size(i);
+		ptrs[nsmall + nlarge + i] = mallocx(sz, flags);
+		assert_ptr_not_null(ptrs[i],
+		    "Unexpected mallocx(%zu, %#x) failure", sz, flags);
+	}
+
+	tsdn = tsdn_fetch();
+
+	/* Verify allocations. */
+	for (i = 0; i < nptrs; i++) {
+		assert_zu_gt(ivsalloc(tsdn, ptrs[i], false), 0,
+		    "Allocation should have queryable size");
+	}
+
+	/* Reset. */
+	miblen = sizeof(mib)/sizeof(size_t);
+	assert_d_eq(mallctlnametomib("arena.0.reset", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+
+	/* Verify allocations no longer exist. */
+	for (i = 0; i < nptrs; i++) {
+		assert_zu_eq(ivsalloc(tsdn, ptrs[i], false), 0,
+		    "Allocation should no longer exist");
+	}
+
+	free(ptrs);
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test(
+	    test_arena_reset));
+}
diff --git a/test/unit/bitmap.c b/test/unit/bitmap.c
index 1ab0bb8..a2dd546 100644
--- a/test/unit/bitmap.c
+++ b/test/unit/bitmap.c
@@ -101,7 +101,7 @@
 		bitmap_info_t binfo;
 		bitmap_info_init(&binfo, i);
 		{
-			ssize_t j;
+			size_t j;
 			bitmap_t *bitmap = (bitmap_t *)malloc(
 			    bitmap_size(&binfo));
 			bitmap_init(bitmap, &binfo);
@@ -119,7 +119,7 @@
 			 * Iteratively unset bits starting at the end, and
 			 * verify that bitmap_sfu() reaches the unset bits.
 			 */
-			for (j = i - 1; j >= 0; j--) {
+			for (j = i - 1; j < i; j--) { /* (i..0] */
 				bitmap_unset(bitmap, &binfo, j);
 				assert_zd_eq(bitmap_sfu(bitmap, &binfo), j,
 				    "First unset bit should the bit previously "
diff --git a/test/unit/ckh.c b/test/unit/ckh.c
index b117595..961e2ac 100644
--- a/test/unit/ckh.c
+++ b/test/unit/ckh.c
@@ -2,24 +2,24 @@
 
 TEST_BEGIN(test_new_delete)
 {
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 	ckh_t ckh;
 
-	tsd = tsd_fetch();
+	tsdn = tsdn_fetch();
 
-	assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash, ckh_string_keycomp),
-	    "Unexpected ckh_new() error");
-	ckh_delete(tsd, &ckh);
+	assert_false(ckh_new(tsdn, &ckh, 2, ckh_string_hash,
+	    ckh_string_keycomp), "Unexpected ckh_new() error");
+	ckh_delete(tsdn, &ckh);
 
-	assert_false(ckh_new(tsd, &ckh, 3, ckh_pointer_hash,
+	assert_false(ckh_new(tsdn, &ckh, 3, ckh_pointer_hash,
 	    ckh_pointer_keycomp), "Unexpected ckh_new() error");
-	ckh_delete(tsd, &ckh);
+	ckh_delete(tsdn, &ckh);
 }
 TEST_END
 
 TEST_BEGIN(test_count_insert_search_remove)
 {
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 	ckh_t ckh;
 	const char *strs[] = {
 	    "a string",
@@ -30,17 +30,17 @@
 	const char *missing = "A string not in the hash table.";
 	size_t i;
 
-	tsd = tsd_fetch();
+	tsdn = tsdn_fetch();
 
-	assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash, ckh_string_keycomp),
-	    "Unexpected ckh_new() error");
+	assert_false(ckh_new(tsdn, &ckh, 2, ckh_string_hash,
+	    ckh_string_keycomp), "Unexpected ckh_new() error");
 	assert_zu_eq(ckh_count(&ckh), 0,
 	    "ckh_count() should return %zu, but it returned %zu", ZU(0),
 	    ckh_count(&ckh));
 
 	/* Insert. */
 	for (i = 0; i < sizeof(strs)/sizeof(const char *); i++) {
-		ckh_insert(tsd, &ckh, strs[i], strs[i]);
+		ckh_insert(tsdn, &ckh, strs[i], strs[i]);
 		assert_zu_eq(ckh_count(&ckh), i+1,
 		    "ckh_count() should return %zu, but it returned %zu", i+1,
 		    ckh_count(&ckh));
@@ -85,7 +85,7 @@
 		vp = (i & 2) ? &v.p : NULL;
 		k.p = NULL;
 		v.p = NULL;
-		assert_false(ckh_remove(tsd, &ckh, strs[i], kp, vp),
+		assert_false(ckh_remove(tsdn, &ckh, strs[i], kp, vp),
 		    "Unexpected ckh_remove() error");
 
 		ks = (i & 1) ? strs[i] : (const char *)NULL;
@@ -101,22 +101,22 @@
 		    ckh_count(&ckh));
 	}
 
-	ckh_delete(tsd, &ckh);
+	ckh_delete(tsdn, &ckh);
 }
 TEST_END
 
 TEST_BEGIN(test_insert_iter_remove)
 {
 #define	NITEMS ZU(1000)
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 	ckh_t ckh;
 	void **p[NITEMS];
 	void *q, *r;
 	size_t i;
 
-	tsd = tsd_fetch();
+	tsdn = tsdn_fetch();
 
-	assert_false(ckh_new(tsd, &ckh, 2, ckh_pointer_hash,
+	assert_false(ckh_new(tsdn, &ckh, 2, ckh_pointer_hash,
 	    ckh_pointer_keycomp), "Unexpected ckh_new() error");
 
 	for (i = 0; i < NITEMS; i++) {
@@ -128,7 +128,7 @@
 		size_t j;
 
 		for (j = i; j < NITEMS; j++) {
-			assert_false(ckh_insert(tsd, &ckh, p[j], p[j]),
+			assert_false(ckh_insert(tsdn, &ckh, p[j], p[j]),
 			    "Unexpected ckh_insert() failure");
 			assert_false(ckh_search(&ckh, p[j], &q, &r),
 			    "Unexpected ckh_search() failure");
@@ -143,13 +143,13 @@
 		for (j = i + 1; j < NITEMS; j++) {
 			assert_false(ckh_search(&ckh, p[j], NULL, NULL),
 			    "Unexpected ckh_search() failure");
-			assert_false(ckh_remove(tsd, &ckh, p[j], &q, &r),
+			assert_false(ckh_remove(tsdn, &ckh, p[j], &q, &r),
 			    "Unexpected ckh_remove() failure");
 			assert_ptr_eq(p[j], q, "Key pointer mismatch");
 			assert_ptr_eq(p[j], r, "Value pointer mismatch");
 			assert_true(ckh_search(&ckh, p[j], NULL, NULL),
 			    "Unexpected ckh_search() success");
-			assert_true(ckh_remove(tsd, &ckh, p[j], &q, &r),
+			assert_true(ckh_remove(tsdn, &ckh, p[j], &q, &r),
 			    "Unexpected ckh_remove() success");
 		}
 
@@ -184,13 +184,13 @@
 	for (i = 0; i < NITEMS; i++) {
 		assert_false(ckh_search(&ckh, p[i], NULL, NULL),
 		    "Unexpected ckh_search() failure");
-		assert_false(ckh_remove(tsd, &ckh, p[i], &q, &r),
+		assert_false(ckh_remove(tsdn, &ckh, p[i], &q, &r),
 		    "Unexpected ckh_remove() failure");
 		assert_ptr_eq(p[i], q, "Key pointer mismatch");
 		assert_ptr_eq(p[i], r, "Value pointer mismatch");
 		assert_true(ckh_search(&ckh, p[i], NULL, NULL),
 		    "Unexpected ckh_search() success");
-		assert_true(ckh_remove(tsd, &ckh, p[i], &q, &r),
+		assert_true(ckh_remove(tsdn, &ckh, p[i], &q, &r),
 		    "Unexpected ckh_remove() success");
 		dallocx(p[i], 0);
 	}
@@ -198,7 +198,7 @@
 	assert_zu_eq(ckh_count(&ckh), 0,
 	    "ckh_count() should return %zu, but it returned %zu",
 	    ZU(0), ckh_count(&ckh));
-	ckh_delete(tsd, &ckh);
+	ckh_delete(tsdn, &ckh);
 #undef NITEMS
 }
 TEST_END
diff --git a/test/unit/fork.c b/test/unit/fork.c
index c0d5642..46c815e 100644
--- a/test/unit/fork.c
+++ b/test/unit/fork.c
@@ -14,6 +14,13 @@
 	assert_ptr_not_null(p, "Unexpected malloc() failure");
 
 	pid = fork();
+
+	free(p);
+
+	p = malloc(64);
+	assert_ptr_not_null(p, "Unexpected malloc() failure");
+	free(p);
+
 	if (pid == -1) {
 		/* Error. */
 		test_fail("Unexpected fork() failure");
@@ -24,11 +31,23 @@
 		int status;
 
 		/* Parent. */
-		free(p);
-		do {
+		while (true) {
 			if (waitpid(pid, &status, 0) == -1)
 				test_fail("Unexpected waitpid() failure");
-		} while (!WIFEXITED(status) && !WIFSIGNALED(status));
+			if (WIFSIGNALED(status)) {
+				test_fail("Unexpected child termination due to "
+				    "signal %d", WTERMSIG(status));
+				break;
+			}
+			if (WIFEXITED(status)) {
+				if (WEXITSTATUS(status) != 0) {
+					test_fail(
+					    "Unexpected child exit value %d",
+					    WEXITSTATUS(status));
+				}
+				break;
+			}
+		}
 	}
 #else
 	test_skip("fork(2) is irrelevant to Windows");
diff --git a/test/unit/junk.c b/test/unit/junk.c
index b23dd1e..acddc60 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -29,7 +29,7 @@
 
 	arena_dalloc_junk_small_orig(ptr, bin_info);
 	for (i = 0; i < bin_info->reg_size; i++) {
-		assert_c_eq(((char *)ptr)[i], 0x5a,
+		assert_u_eq(((uint8_t *)ptr)[i], JEMALLOC_FREE_JUNK,
 		    "Missing junk fill for byte %zu/%zu of deallocated region",
 		    i, bin_info->reg_size);
 	}
@@ -44,7 +44,7 @@
 
 	arena_dalloc_junk_large_orig(ptr, usize);
 	for (i = 0; i < usize; i++) {
-		assert_c_eq(((char *)ptr)[i], 0x5a,
+		assert_u_eq(((uint8_t *)ptr)[i], JEMALLOC_FREE_JUNK,
 		    "Missing junk fill for byte %zu/%zu of deallocated region",
 		    i, usize);
 	}
@@ -53,10 +53,10 @@
 }
 
 static void
-huge_dalloc_junk_intercept(void *ptr, size_t usize)
+huge_dalloc_junk_intercept(tsdn_t *tsdn, void *ptr, size_t usize)
 {
 
-	huge_dalloc_junk_orig(ptr, usize);
+	huge_dalloc_junk_orig(tsdn, ptr, usize);
 	/*
 	 * The conditions under which junk filling actually occurs are nuanced
 	 * enough that it doesn't make sense to duplicate the decision logic in
@@ -69,7 +69,7 @@
 static void
 test_junk(size_t sz_min, size_t sz_max)
 {
-	char *s;
+	uint8_t *s;
 	size_t sz_prev, sz, i;
 
 	if (opt_junk_free) {
@@ -82,23 +82,23 @@
 	}
 
 	sz_prev = 0;
-	s = (char *)mallocx(sz_min, 0);
+	s = (uint8_t *)mallocx(sz_min, 0);
 	assert_ptr_not_null((void *)s, "Unexpected mallocx() failure");
 
 	for (sz = sallocx(s, 0); sz <= sz_max;
 	    sz_prev = sz, sz = sallocx(s, 0)) {
 		if (sz_prev > 0) {
-			assert_c_eq(s[0], 'a',
+			assert_u_eq(s[0], 'a',
 			    "Previously allocated byte %zu/%zu is corrupted",
 			    ZU(0), sz_prev);
-			assert_c_eq(s[sz_prev-1], 'a',
+			assert_u_eq(s[sz_prev-1], 'a',
 			    "Previously allocated byte %zu/%zu is corrupted",
 			    sz_prev-1, sz_prev);
 		}
 
 		for (i = sz_prev; i < sz; i++) {
 			if (opt_junk_alloc) {
-				assert_c_eq(s[i], 0xa5,
+				assert_u_eq(s[i], JEMALLOC_ALLOC_JUNK,
 				    "Newly allocated byte %zu/%zu isn't "
 				    "junk-filled", i, sz);
 			}
@@ -107,7 +107,7 @@
 
 		if (xallocx(s, sz+1, 0, 0) == sz) {
 			watch_junking(s);
-			s = (char *)rallocx(s, sz+1, 0);
+			s = (uint8_t *)rallocx(s, sz+1, 0);
 			assert_ptr_not_null((void *)s,
 			    "Unexpected rallocx() failure");
 			assert_true(!opt_junk_free || saw_junking,
@@ -244,7 +244,6 @@
 main(void)
 {
 
-	assert(!config_fill || opt_junk_alloc || opt_junk_free);
 	return (test(
 	    test_junk_small,
 	    test_junk_large,
diff --git a/test/unit/junk_alloc.c b/test/unit/junk_alloc.c
index 8db3331..a5895b5 100644
--- a/test/unit/junk_alloc.c
+++ b/test/unit/junk_alloc.c
@@ -1,3 +1,3 @@
-#define JEMALLOC_TEST_JUNK_OPT "junk:alloc"
+#define	JEMALLOC_TEST_JUNK_OPT "junk:alloc"
 #include "junk.c"
 #undef JEMALLOC_TEST_JUNK_OPT
diff --git a/test/unit/junk_free.c b/test/unit/junk_free.c
index 482a61d..bb5183c 100644
--- a/test/unit/junk_free.c
+++ b/test/unit/junk_free.c
@@ -1,3 +1,3 @@
-#define JEMALLOC_TEST_JUNK_OPT "junk:free"
+#define	JEMALLOC_TEST_JUNK_OPT "junk:free"
 #include "junk.c"
 #undef JEMALLOC_TEST_JUNK_OPT
diff --git a/test/unit/ph.c b/test/unit/ph.c
new file mode 100644
index 0000000..da442f0
--- /dev/null
+++ b/test/unit/ph.c
@@ -0,0 +1,290 @@
+#include "test/jemalloc_test.h"
+
+typedef struct node_s node_t;
+
+struct node_s {
+#define	NODE_MAGIC 0x9823af7e
+	uint32_t magic;
+	phn(node_t) link;
+	uint64_t key;
+};
+
+static int
+node_cmp(const node_t *a, const node_t *b)
+{
+	int ret;
+
+	ret = (a->key > b->key) - (a->key < b->key);
+	if (ret == 0) {
+		/*
+		 * Duplicates are not allowed in the heap, so force an
+		 * arbitrary ordering for non-identical items with equal keys.
+		 */
+		ret = (((uintptr_t)a) > ((uintptr_t)b))
+		    - (((uintptr_t)a) < ((uintptr_t)b));
+	}
+	return (ret);
+}
+
+static int
+node_cmp_magic(const node_t *a, const node_t *b) {
+
+	assert_u32_eq(a->magic, NODE_MAGIC, "Bad magic");
+	assert_u32_eq(b->magic, NODE_MAGIC, "Bad magic");
+
+	return (node_cmp(a, b));
+}
+
+typedef ph(node_t) heap_t;
+ph_gen(static, heap_, heap_t, node_t, link, node_cmp_magic);
+
+static void
+node_print(const node_t *node, unsigned depth)
+{
+	unsigned i;
+	node_t *leftmost_child, *sibling;
+
+	for (i = 0; i < depth; i++)
+		malloc_printf("\t");
+	malloc_printf("%2"FMTu64"\n", node->key);
+
+	leftmost_child = phn_lchild_get(node_t, link, node);
+	if (leftmost_child == NULL)
+		return;
+	node_print(leftmost_child, depth + 1);
+
+	for (sibling = phn_next_get(node_t, link, leftmost_child); sibling !=
+	    NULL; sibling = phn_next_get(node_t, link, sibling)) {
+		node_print(sibling, depth + 1);
+	}
+}
+
+static void
+heap_print(const heap_t *heap)
+{
+	node_t *auxelm;
+
+	malloc_printf("vvv heap %p vvv\n", heap);
+	if (heap->ph_root == NULL)
+		goto label_return;
+
+	node_print(heap->ph_root, 0);
+
+	for (auxelm = phn_next_get(node_t, link, heap->ph_root); auxelm != NULL;
+	    auxelm = phn_next_get(node_t, link, auxelm)) {
+		assert_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t,
+		    link, auxelm)), auxelm,
+		    "auxelm's prev doesn't link to auxelm");
+		node_print(auxelm, 0);
+	}
+
+label_return:
+	malloc_printf("^^^ heap %p ^^^\n", heap);
+}
+
+static unsigned
+node_validate(const node_t *node, const node_t *parent)
+{
+	unsigned nnodes = 1;
+	node_t *leftmost_child, *sibling;
+
+	if (parent != NULL) {
+		assert_d_ge(node_cmp_magic(node, parent), 0,
+		    "Child is less than parent");
+	}
+
+	leftmost_child = phn_lchild_get(node_t, link, node);
+	if (leftmost_child == NULL)
+		return (nnodes);
+	assert_ptr_eq((void *)phn_prev_get(node_t, link, leftmost_child),
+	    (void *)node, "Leftmost child does not link to node");
+	nnodes += node_validate(leftmost_child, node);
+
+	for (sibling = phn_next_get(node_t, link, leftmost_child); sibling !=
+	    NULL; sibling = phn_next_get(node_t, link, sibling)) {
+		assert_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t,
+		    link, sibling)), sibling,
+		    "sibling's prev doesn't link to sibling");
+		nnodes += node_validate(sibling, node);
+	}
+	return (nnodes);
+}
+
+static unsigned
+heap_validate(const heap_t *heap)
+{
+	unsigned nnodes = 0;
+	node_t *auxelm;
+
+	if (heap->ph_root == NULL)
+		goto label_return;
+
+	nnodes += node_validate(heap->ph_root, NULL);
+
+	for (auxelm = phn_next_get(node_t, link, heap->ph_root); auxelm != NULL;
+	    auxelm = phn_next_get(node_t, link, auxelm)) {
+		assert_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t,
+		    link, auxelm)), auxelm,
+		    "auxelm's prev doesn't link to auxelm");
+		nnodes += node_validate(auxelm, NULL);
+	}
+
+label_return:
+	if (false)
+		heap_print(heap);
+	return (nnodes);
+}
+
+TEST_BEGIN(test_ph_empty)
+{
+	heap_t heap;
+
+	heap_new(&heap);
+	assert_true(heap_empty(&heap), "Heap should be empty");
+	assert_ptr_null(heap_first(&heap), "Unexpected node");
+}
+TEST_END
+
+static void
+node_remove(heap_t *heap, node_t *node)
+{
+
+	heap_remove(heap, node);
+
+	node->magic = 0;
+}
+
+static node_t *
+node_remove_first(heap_t *heap)
+{
+	node_t *node = heap_remove_first(heap);
+	node->magic = 0;
+	return (node);
+}
+
+TEST_BEGIN(test_ph_random)
+{
+#define	NNODES 25
+#define	NBAGS 250
+#define	SEED 42
+	sfmt_t *sfmt;
+	uint64_t bag[NNODES];
+	heap_t heap;
+	node_t nodes[NNODES];
+	unsigned i, j, k;
+
+	sfmt = init_gen_rand(SEED);
+	for (i = 0; i < NBAGS; i++) {
+		switch (i) {
+		case 0:
+			/* Insert in order. */
+			for (j = 0; j < NNODES; j++)
+				bag[j] = j;
+			break;
+		case 1:
+			/* Insert in reverse order. */
+			for (j = 0; j < NNODES; j++)
+				bag[j] = NNODES - j - 1;
+			break;
+		default:
+			for (j = 0; j < NNODES; j++)
+				bag[j] = gen_rand64_range(sfmt, NNODES);
+		}
+
+		for (j = 1; j <= NNODES; j++) {
+			/* Initialize heap and nodes. */
+			heap_new(&heap);
+			assert_u_eq(heap_validate(&heap), 0,
+			    "Incorrect node count");
+			for (k = 0; k < j; k++) {
+				nodes[k].magic = NODE_MAGIC;
+				nodes[k].key = bag[k];
+			}
+
+			/* Insert nodes. */
+			for (k = 0; k < j; k++) {
+				heap_insert(&heap, &nodes[k]);
+				if (i % 13 == 12) {
+					/* Trigger merging. */
+					assert_ptr_not_null(heap_first(&heap),
+					    "Heap should not be empty");
+				}
+				assert_u_eq(heap_validate(&heap), k + 1,
+				    "Incorrect node count");
+			}
+
+			assert_false(heap_empty(&heap),
+			    "Heap should not be empty");
+
+			/* Remove nodes. */
+			switch (i % 4) {
+			case 0:
+				for (k = 0; k < j; k++) {
+					assert_u_eq(heap_validate(&heap), j - k,
+					    "Incorrect node count");
+					node_remove(&heap, &nodes[k]);
+					assert_u_eq(heap_validate(&heap), j - k
+					    - 1, "Incorrect node count");
+				}
+				break;
+			case 1:
+				for (k = j; k > 0; k--) {
+					node_remove(&heap, &nodes[k-1]);
+					assert_u_eq(heap_validate(&heap), k - 1,
+					    "Incorrect node count");
+				}
+				break;
+			case 2: {
+				node_t *prev = NULL;
+				for (k = 0; k < j; k++) {
+					node_t *node = node_remove_first(&heap);
+					assert_u_eq(heap_validate(&heap), j - k
+					    - 1, "Incorrect node count");
+					if (prev != NULL) {
+						assert_d_ge(node_cmp(node,
+						    prev), 0,
+						    "Bad removal order");
+					}
+					prev = node;
+				}
+				break;
+			} case 3: {
+				node_t *prev = NULL;
+				for (k = 0; k < j; k++) {
+					node_t *node = heap_first(&heap);
+					assert_u_eq(heap_validate(&heap), j - k,
+					    "Incorrect node count");
+					if (prev != NULL) {
+						assert_d_ge(node_cmp(node,
+						    prev), 0,
+						    "Bad removal order");
+					}
+					node_remove(&heap, node);
+					assert_u_eq(heap_validate(&heap), j - k
+					    - 1, "Incorrect node count");
+					prev = node;
+				}
+				break;
+			} default:
+				not_reached();
+			}
+
+			assert_ptr_null(heap_first(&heap),
+			    "Heap should be empty");
+			assert_true(heap_empty(&heap), "Heap should be empty");
+		}
+	}
+	fini_gen_rand(sfmt);
+#undef NNODES
+#undef SEED
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test(
+	    test_ph_empty,
+	    test_ph_random));
+}
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index 69983e5..5ae45fd 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -94,7 +94,8 @@
 bool prof_dump_header_intercepted = false;
 prof_cnt_t cnt_all_copy = {0, 0, 0, 0};
 static bool
-prof_dump_header_intercept(bool propagate_err, const prof_cnt_t *cnt_all)
+prof_dump_header_intercept(tsdn_t *tsdn, bool propagate_err,
+    const prof_cnt_t *cnt_all)
 {
 
 	prof_dump_header_intercepted = true;
diff --git a/test/unit/stats.c b/test/unit/stats.c
index 6e80316..a9a3981 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -220,11 +220,11 @@
 	if (config_stats) {
 		assert_zu_gt(allocated, 0,
 		    "allocated should be greater than zero");
-		assert_zu_gt(nmalloc, 0,
+		assert_u64_gt(nmalloc, 0,
 		    "nmalloc should be greater than zero");
-		assert_zu_ge(nmalloc, ndalloc,
+		assert_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
-		assert_zu_gt(nrequests, 0,
+		assert_u64_gt(nrequests, 0,
 		    "nrequests should be greater than zero");
 	}
 
@@ -262,9 +262,9 @@
 	if (config_stats) {
 		assert_zu_gt(allocated, 0,
 		    "allocated should be greater than zero");
-		assert_zu_gt(nmalloc, 0,
+		assert_u64_gt(nmalloc, 0,
 		    "nmalloc should be greater than zero");
-		assert_zu_ge(nmalloc, ndalloc,
+		assert_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
 	}
 
diff --git a/test/unit/tsd.c b/test/unit/tsd.c
index 8be787f..7dde4b7 100644
--- a/test/unit/tsd.c
+++ b/test/unit/tsd.c
@@ -99,6 +99,11 @@
 main(void)
 {
 
+	/* Core tsd bootstrapping must happen prior to data_tsd_boot(). */
+	if (nallocx(1, 0) == 0) {
+		malloc_printf("Initialization error");
+		return (test_status_fail);
+	}
 	data_tsd_boot();
 
 	return (test(
diff --git a/test/unit/util.c b/test/unit/util.c
index 2f65aad..c958dc0 100644
--- a/test/unit/util.c
+++ b/test/unit/util.c
@@ -4,27 +4,27 @@
 	unsigned i, pow2;						\
 	t x;								\
 									\
-	assert_zu_eq(pow2_ceil_##suf(0), 0, "Unexpected result");	\
+	assert_##suf##_eq(pow2_ceil_##suf(0), 0, "Unexpected result");	\
 									\
 	for (i = 0; i < sizeof(t) * 8; i++) {				\
-		assert_zu_eq(pow2_ceil_##suf(((t)1) << i), ((t)1) << i,	\
-		    "Unexpected result");				\
+		assert_##suf##_eq(pow2_ceil_##suf(((t)1) << i), ((t)1)	\
+		    << i, "Unexpected result");				\
 	}								\
 									\
 	for (i = 2; i < sizeof(t) * 8; i++) {				\
-		assert_zu_eq(pow2_ceil_##suf((((t)1) << i) - 1),	\
+		assert_##suf##_eq(pow2_ceil_##suf((((t)1) << i) - 1),	\
 		    ((t)1) << i, "Unexpected result");			\
 	}								\
 									\
 	for (i = 0; i < sizeof(t) * 8 - 1; i++) {			\
-		assert_zu_eq(pow2_ceil_##suf((((t)1) << i) + 1),	\
+		assert_##suf##_eq(pow2_ceil_##suf((((t)1) << i) + 1),	\
 		    ((t)1) << (i+1), "Unexpected result");		\
 	}								\
 									\
 	for (pow2 = 1; pow2 < 25; pow2++) {				\
 		for (x = (((t)1) << (pow2-1)) + 1; x <= ((t)1) << pow2;	\
 		    x++) {						\
-			assert_zu_eq(pow2_ceil_##suf(x),		\
+			assert_##suf##_eq(pow2_ceil_##suf(x),		\
 			    ((t)1) << pow2,				\
 			    "Unexpected result, x=%"pri, x);		\
 		}							\
@@ -160,14 +160,14 @@
 {
 #define	BUFLEN	15
 	char buf[BUFLEN];
-	int result;
+	size_t result;
 	size_t len;
-#define TEST(expected_str_untruncated, ...) do {			\
+#define	TEST(expected_str_untruncated, ...) do {			\
 	result = malloc_snprintf(buf, len, __VA_ARGS__);		\
 	assert_d_eq(strncmp(buf, expected_str_untruncated, len-1), 0,	\
 	    "Unexpected string inequality (\"%s\" vs \"%s\")",		\
-	    buf, expected_str_untruncated);		\
-	assert_d_eq(result, strlen(expected_str_untruncated),		\
+	    buf, expected_str_untruncated);				\
+	assert_zu_eq(result, strlen(expected_str_untruncated),		\
 	    "Unexpected result");					\
 } while (0)
 
@@ -193,11 +193,11 @@
 {
 #define	BUFLEN	128
 	char buf[BUFLEN];
-	int result;
+	size_t result;
 #define	TEST(expected_str, ...) do {					\
 	result = malloc_snprintf(buf, sizeof(buf), __VA_ARGS__);	\
 	assert_str_eq(buf, expected_str, "Unexpected output");		\
-	assert_d_eq(result, strlen(expected_str), "Unexpected result");	\
+	assert_zu_eq(result, strlen(expected_str), "Unexpected result");\
 } while (0)
 
 	TEST("hello", "hello");
diff --git a/test/unit/witness.c b/test/unit/witness.c
new file mode 100644
index 0000000..ed17275
--- /dev/null
+++ b/test/unit/witness.c
@@ -0,0 +1,278 @@
+#include "test/jemalloc_test.h"
+
+static witness_lock_error_t *witness_lock_error_orig;
+static witness_owner_error_t *witness_owner_error_orig;
+static witness_not_owner_error_t *witness_not_owner_error_orig;
+static witness_lockless_error_t *witness_lockless_error_orig;
+
+static bool saw_lock_error;
+static bool saw_owner_error;
+static bool saw_not_owner_error;
+static bool saw_lockless_error;
+
+static void
+witness_lock_error_intercept(const witness_list_t *witnesses,
+    const witness_t *witness)
+{
+
+	saw_lock_error = true;
+}
+
+static void
+witness_owner_error_intercept(const witness_t *witness)
+{
+
+	saw_owner_error = true;
+}
+
+static void
+witness_not_owner_error_intercept(const witness_t *witness)
+{
+
+	saw_not_owner_error = true;
+}
+
+static void
+witness_lockless_error_intercept(const witness_list_t *witnesses)
+{
+
+	saw_lockless_error = true;
+}
+
+static int
+witness_comp(const witness_t *a, const witness_t *b)
+{
+
+	assert_u_eq(a->rank, b->rank, "Witnesses should have equal rank");
+
+	return (strcmp(a->name, b->name));
+}
+
+static int
+witness_comp_reverse(const witness_t *a, const witness_t *b)
+{
+
+	assert_u_eq(a->rank, b->rank, "Witnesses should have equal rank");
+
+	return (-strcmp(a->name, b->name));
+}
+
+TEST_BEGIN(test_witness)
+{
+	witness_t a, b;
+	tsdn_t *tsdn;
+
+	test_skip_if(!config_debug);
+
+	tsdn = tsdn_fetch();
+
+	witness_assert_lockless(tsdn);
+
+	witness_init(&a, "a", 1, NULL);
+	witness_assert_not_owner(tsdn, &a);
+	witness_lock(tsdn, &a);
+	witness_assert_owner(tsdn, &a);
+
+	witness_init(&b, "b", 2, NULL);
+	witness_assert_not_owner(tsdn, &b);
+	witness_lock(tsdn, &b);
+	witness_assert_owner(tsdn, &b);
+
+	witness_unlock(tsdn, &a);
+	witness_unlock(tsdn, &b);
+
+	witness_assert_lockless(tsdn);
+}
+TEST_END
+
+TEST_BEGIN(test_witness_comp)
+{
+	witness_t a, b, c, d;
+	tsdn_t *tsdn;
+
+	test_skip_if(!config_debug);
+
+	tsdn = tsdn_fetch();
+
+	witness_assert_lockless(tsdn);
+
+	witness_init(&a, "a", 1, witness_comp);
+	witness_assert_not_owner(tsdn, &a);
+	witness_lock(tsdn, &a);
+	witness_assert_owner(tsdn, &a);
+
+	witness_init(&b, "b", 1, witness_comp);
+	witness_assert_not_owner(tsdn, &b);
+	witness_lock(tsdn, &b);
+	witness_assert_owner(tsdn, &b);
+	witness_unlock(tsdn, &b);
+
+	witness_lock_error_orig = witness_lock_error;
+	witness_lock_error = witness_lock_error_intercept;
+	saw_lock_error = false;
+
+	witness_init(&c, "c", 1, witness_comp_reverse);
+	witness_assert_not_owner(tsdn, &c);
+	assert_false(saw_lock_error, "Unexpected witness lock error");
+	witness_lock(tsdn, &c);
+	assert_true(saw_lock_error, "Expected witness lock error");
+	witness_unlock(tsdn, &c);
+
+	saw_lock_error = false;
+
+	witness_init(&d, "d", 1, NULL);
+	witness_assert_not_owner(tsdn, &d);
+	assert_false(saw_lock_error, "Unexpected witness lock error");
+	witness_lock(tsdn, &d);
+	assert_true(saw_lock_error, "Expected witness lock error");
+	witness_unlock(tsdn, &d);
+
+	witness_unlock(tsdn, &a);
+
+	witness_assert_lockless(tsdn);
+
+	witness_lock_error = witness_lock_error_orig;
+}
+TEST_END
+
+TEST_BEGIN(test_witness_reversal)
+{
+	witness_t a, b;
+	tsdn_t *tsdn;
+
+	test_skip_if(!config_debug);
+
+	witness_lock_error_orig = witness_lock_error;
+	witness_lock_error = witness_lock_error_intercept;
+	saw_lock_error = false;
+
+	tsdn = tsdn_fetch();
+
+	witness_assert_lockless(tsdn);
+
+	witness_init(&a, "a", 1, NULL);
+	witness_init(&b, "b", 2, NULL);
+
+	witness_lock(tsdn, &b);
+	assert_false(saw_lock_error, "Unexpected witness lock error");
+	witness_lock(tsdn, &a);
+	assert_true(saw_lock_error, "Expected witness lock error");
+
+	witness_unlock(tsdn, &a);
+	witness_unlock(tsdn, &b);
+
+	witness_assert_lockless(tsdn);
+
+	witness_lock_error = witness_lock_error_orig;
+}
+TEST_END
+
+TEST_BEGIN(test_witness_recursive)
+{
+	witness_t a;
+	tsdn_t *tsdn;
+
+	test_skip_if(!config_debug);
+
+	witness_not_owner_error_orig = witness_not_owner_error;
+	witness_not_owner_error = witness_not_owner_error_intercept;
+	saw_not_owner_error = false;
+
+	witness_lock_error_orig = witness_lock_error;
+	witness_lock_error = witness_lock_error_intercept;
+	saw_lock_error = false;
+
+	tsdn = tsdn_fetch();
+
+	witness_assert_lockless(tsdn);
+
+	witness_init(&a, "a", 1, NULL);
+
+	witness_lock(tsdn, &a);
+	assert_false(saw_lock_error, "Unexpected witness lock error");
+	assert_false(saw_not_owner_error, "Unexpected witness not owner error");
+	witness_lock(tsdn, &a);
+	assert_true(saw_lock_error, "Expected witness lock error");
+	assert_true(saw_not_owner_error, "Expected witness not owner error");
+
+	witness_unlock(tsdn, &a);
+
+	witness_assert_lockless(tsdn);
+
+	witness_owner_error = witness_owner_error_orig;
+	witness_lock_error = witness_lock_error_orig;
+
+}
+TEST_END
+
+TEST_BEGIN(test_witness_unlock_not_owned)
+{
+	witness_t a;
+	tsdn_t *tsdn;
+
+	test_skip_if(!config_debug);
+
+	witness_owner_error_orig = witness_owner_error;
+	witness_owner_error = witness_owner_error_intercept;
+	saw_owner_error = false;
+
+	tsdn = tsdn_fetch();
+
+	witness_assert_lockless(tsdn);
+
+	witness_init(&a, "a", 1, NULL);
+
+	assert_false(saw_owner_error, "Unexpected owner error");
+	witness_unlock(tsdn, &a);
+	assert_true(saw_owner_error, "Expected owner error");
+
+	witness_assert_lockless(tsdn);
+
+	witness_owner_error = witness_owner_error_orig;
+}
+TEST_END
+
+TEST_BEGIN(test_witness_lockful)
+{
+	witness_t a;
+	tsdn_t *tsdn;
+
+	test_skip_if(!config_debug);
+
+	witness_lockless_error_orig = witness_lockless_error;
+	witness_lockless_error = witness_lockless_error_intercept;
+	saw_lockless_error = false;
+
+	tsdn = tsdn_fetch();
+
+	witness_assert_lockless(tsdn);
+
+	witness_init(&a, "a", 1, NULL);
+
+	assert_false(saw_lockless_error, "Unexpected lockless error");
+	witness_assert_lockless(tsdn);
+
+	witness_lock(tsdn, &a);
+	witness_assert_lockless(tsdn);
+	assert_true(saw_lockless_error, "Expected lockless error");
+
+	witness_unlock(tsdn, &a);
+
+	witness_assert_lockless(tsdn);
+
+	witness_lockless_error = witness_lockless_error_orig;
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test(
+	    test_witness,
+	    test_witness_comp,
+	    test_witness_reversal,
+	    test_witness_recursive,
+	    test_witness_unlock_not_owned,
+	    test_witness_lockful));
+}
diff --git a/test/unit/zero.c b/test/unit/zero.c
index 93afc2b..30ebe37 100644
--- a/test/unit/zero.c
+++ b/test/unit/zero.c
@@ -8,39 +8,41 @@
 static void
 test_zero(size_t sz_min, size_t sz_max)
 {
-	char *s;
+	uint8_t *s;
 	size_t sz_prev, sz, i;
+#define	MAGIC	((uint8_t)0x61)
 
 	sz_prev = 0;
-	s = (char *)mallocx(sz_min, 0);
+	s = (uint8_t *)mallocx(sz_min, 0);
 	assert_ptr_not_null((void *)s, "Unexpected mallocx() failure");
 
 	for (sz = sallocx(s, 0); sz <= sz_max;
 	    sz_prev = sz, sz = sallocx(s, 0)) {
 		if (sz_prev > 0) {
-			assert_c_eq(s[0], 'a',
+			assert_u_eq(s[0], MAGIC,
 			    "Previously allocated byte %zu/%zu is corrupted",
 			    ZU(0), sz_prev);
-			assert_c_eq(s[sz_prev-1], 'a',
+			assert_u_eq(s[sz_prev-1], MAGIC,
 			    "Previously allocated byte %zu/%zu is corrupted",
 			    sz_prev-1, sz_prev);
 		}
 
 		for (i = sz_prev; i < sz; i++) {
-			assert_c_eq(s[i], 0x0,
+			assert_u_eq(s[i], 0x0,
 			    "Newly allocated byte %zu/%zu isn't zero-filled",
 			    i, sz);
-			s[i] = 'a';
+			s[i] = MAGIC;
 		}
 
 		if (xallocx(s, sz+1, 0, 0) == sz) {
-			s = (char *)rallocx(s, sz+1, 0);
+			s = (uint8_t *)rallocx(s, sz+1, 0);
 			assert_ptr_not_null((void *)s,
 			    "Unexpected rallocx() failure");
 		}
 	}
 
 	dallocx(s, 0);
+#undef MAGIC
 }
 
 TEST_BEGIN(test_zero_small)