Merge branch 'dev'
diff --git a/ChangeLog b/ChangeLog
index 98c12f2..ee1b7ea 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,41 @@
 
     https://github.com/jemalloc/jemalloc
 
+* 5.0.1 (July 1, 2017)
+
+  This bugfix release fixes several issues, most of which are obscure enough
+  that typical applications are not impacted.
+
+  Bug fixes:
+  - Update decay->nunpurged before purging, in order to avoid potential update
+    races and subsequent incorrect purging volume.  (@interwq)
+  - Only abort on dlsym(3) error if the failure impacts an enabled feature (lazy
+    locking and/or background threads).  This mitigates an initialization
+    failure bug for which we still do not have a clear reproduction test case.
+    (@interwq)
+  - Modify tsd management so that it neither crashes nor leaks if a thread's
+    only allocation activity is to call free() after TLS destructors have been
+    executed.  This behavior was observed when operating with GNU libc, and is
+    unlikely to be an issue with other libc implementations.  (@interwq)
+  - Mask signals during background thread creation.  This prevents signals from
+    being inadvertently delivered to background threads.  (@jasone,
+    @davidgoldblatt, @interwq)
+  - Avoid inactivity checks within background threads, in order to prevent
+    recursive mutex acquisition.  (@interwq)
+  - Fix extent_grow_retained() to use the specified hooks when the
+    arena.<i>.extent_hooks mallctl is used to override the default hooks.
+    (@interwq)
+  - Add missing reentrancy support for custom extent hooks which allocate.
+    (@interwq)
+  - Post-fork(2), re-initialize the list of tcaches associated with each arena
+    to contain no tcaches except the forking thread's.  (@interwq)
+  - Add missing post-fork(2) mutex reinitialization for extent_grow_mtx.  This
+    fixes potential deadlocks after fork(2).  (@interwq)
+  - Enforce minimum autoconf version (currently 2.68), since 2.63 is known to
+    generate corrupt configure scripts.  (@jasone)
+  - Ensure that the configured page size (--with-lg-page) is no larger than the
+    configured huge page size (--with-lg-hugepage).  (@jasone)
+
 * 5.0.0 (June 13, 2017)
 
   Unlike all previous jemalloc releases, this release does not use naturally
diff --git a/configure.ac b/configure.ac
index 32ae02c..1551ded 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,4 +1,5 @@
 dnl Process this file with autoconf to produce a configure script.
+AC_PREREQ(2.68)
 AC_INIT([Makefile.in])
 
 AC_CONFIG_AUX_DIR([build-aux])
@@ -1373,6 +1374,10 @@
     je_cv_lg_hugepage=21
   fi
 fi
+if test "x${LG_PAGE}" != "xundefined" -a \
+        "${je_cv_lg_hugepage}" -lt "${LG_PAGE}" ; then
+  AC_MSG_ERROR([Huge page size (2^${je_cv_lg_hugepage}) must be at least page size (2^${LG_PAGE})])
+fi
 AC_DEFINE_UNQUOTED([LG_HUGEPAGE], [${je_cv_lg_hugepage}])
 
 AC_ARG_WITH([lg_page_sizes],
@@ -1470,6 +1475,15 @@
   if test "x${je_cv_pthread_atfork}" = "xyes" ; then
     AC_DEFINE([JEMALLOC_HAVE_PTHREAD_ATFORK], [ ])
   fi
+  dnl Check if pthread_setname_np is available with the expected API.
+  JE_COMPILABLE([pthread_setname_np(3)], [
+#include <pthread.h>
+], [
+  pthread_setname_np(pthread_self(), "setname_test");
+], [je_cv_pthread_setname_np])
+  if test "x${je_cv_pthread_setname_np}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_HAVE_PTHREAD_SETNAME_NP], [ ])
+  fi
 fi
 
 JE_APPEND_VS(CPPFLAGS, -D_REENTRANT)
diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 3a85bcb..af16d15 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -90,6 +90,7 @@
 void arena_prefork4(tsdn_t *tsdn, arena_t *arena);
 void arena_prefork5(tsdn_t *tsdn, arena_t *arena);
 void arena_prefork6(tsdn_t *tsdn, arena_t *arena);
+void arena_prefork7(tsdn_t *tsdn, arena_t *arena);
 void arena_postfork_parent(tsdn_t *tsdn, arena_t *arena);
 void arena_postfork_child(tsdn_t *tsdn, arena_t *arena);
 
diff --git a/include/jemalloc/internal/background_thread_externs.h b/include/jemalloc/internal/background_thread_externs.h
index 7c88369..8b4b847 100644
--- a/include/jemalloc/internal/background_thread_externs.h
+++ b/include/jemalloc/internal/background_thread_externs.h
@@ -6,6 +6,7 @@
 extern atomic_b_t background_thread_enabled_state;
 extern size_t n_background_threads;
 extern background_thread_info_t *background_thread_info;
+extern bool can_enable_background_thread;
 
 bool background_thread_create(tsd_t *tsd, unsigned arena_ind);
 bool background_threads_enable(tsd_t *tsd);
diff --git a/include/jemalloc/internal/background_thread_inlines.h b/include/jemalloc/internal/background_thread_inlines.h
index fd5095f..ef50231 100644
--- a/include/jemalloc/internal/background_thread_inlines.h
+++ b/include/jemalloc/internal/background_thread_inlines.h
@@ -41,8 +41,9 @@
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_background_thread_inactivity_check(tsdn_t *tsdn, arena_t *arena) {
-	if (!background_thread_enabled()) {
+arena_background_thread_inactivity_check(tsdn_t *tsdn, arena_t *arena,
+    bool is_background_thread) {
+	if (!background_thread_enabled() || is_background_thread) {
 		return;
 	}
 	background_thread_info_t *info =
diff --git a/include/jemalloc/internal/base_externs.h b/include/jemalloc/internal/base_externs.h
index 0a1114f..a4fd5ac 100644
--- a/include/jemalloc/internal/base_externs.h
+++ b/include/jemalloc/internal/base_externs.h
@@ -3,7 +3,7 @@
 
 base_t *b0get(void);
 base_t *base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
-void base_delete(base_t *base);
+void base_delete(tsdn_t *tsdn, base_t *base);
 extent_hooks_t *base_extent_hooks_get(base_t *base);
 extent_hooks_t *base_extent_hooks_set(base_t *base,
     extent_hooks_t *extent_hooks);
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index f159383..a91c4cf 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -91,8 +91,7 @@
 
 int ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
     void *newp, size_t newlen);
-int ctl_nametomib(tsdn_t *tsdn, const char *name, size_t *mibp,
-    size_t *miblenp);
+int ctl_nametomib(tsd_t *tsd, const char *name, size_t *mibp, size_t *miblenp);
 
 int ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen);
diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index 1efdb56..8ae5ef4 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -22,6 +22,7 @@
 #    include <sys/uio.h>
 #  endif
 #  include <pthread.h>
+#  include <signal.h>
 #  ifdef JEMALLOC_OS_UNFAIR_LOCK
 #    include <os/lock.h>
 #  endif
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 2bf9dea..c0f834f 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -98,6 +98,9 @@
 /* Defined if pthread_atfork(3) is available. */
 #undef JEMALLOC_HAVE_PTHREAD_ATFORK
 
+/* Defined if pthread_setname_np(3) is available. */
+#undef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
+
 /*
  * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available.
  */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 854fb1e..24ea416 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -146,7 +146,10 @@
 }
 
 static inline void
-pre_reentrancy(tsd_t *tsd) {
+pre_reentrancy(tsd_t *tsd, arena_t *arena) {
+	/* arena is the current context.  Reentry from a0 is not allowed. */
+	assert(arena != arena_get(tsd_tsdn(tsd), 0, false));
+
 	bool fast = tsd_fast(tsd);
 	++*tsd_reentrancy_levelp_get(tsd);
 	if (fast) {
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index abe133f..db3e9c7 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -48,7 +48,7 @@
 void tcache_prefork(tsdn_t *tsdn);
 void tcache_postfork_parent(tsdn_t *tsdn);
 void tcache_postfork_child(tsdn_t *tsdn);
-void tcache_flush(void);
+void tcache_flush(tsd_t *tsd);
 bool tsd_tcache_data_init(tsd_t *tsd);
 bool tsd_tcache_enabled_data_init(tsd_t *tsd);
 
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 631fbf1..155a2ec 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -99,9 +99,10 @@
 	tsd_state_nominal_slow = 1, /* Initialized but on slow path. */
 	/* the above 2 nominal states should be lower values. */
 	tsd_state_nominal_max = 1, /* used for comparison only. */
-	tsd_state_purgatory = 2,
-	tsd_state_reincarnated = 3,
-	tsd_state_uninitialized = 4
+	tsd_state_minimal_initialized = 2,
+	tsd_state_purgatory = 3,
+	tsd_state_reincarnated = 4,
+	tsd_state_uninitialized = 5
 };
 
 /* Manually limit tsd_state_t to a single byte. */
@@ -190,7 +191,8 @@
 tsd_##n##p_get(tsd_t *tsd) {						\
 	assert(tsd->state == tsd_state_nominal ||			\
 	    tsd->state == tsd_state_nominal_slow ||			\
-	    tsd->state == tsd_state_reincarnated);			\
+	    tsd->state == tsd_state_reincarnated ||			\
+	    tsd->state == tsd_state_minimal_initialized);		\
 	return tsd_##n##p_get_unsafe(tsd);				\
 }
 MALLOC_TSD
@@ -225,7 +227,8 @@
 #define O(n, t, nt)							\
 JEMALLOC_ALWAYS_INLINE void						\
 tsd_##n##_set(tsd_t *tsd, t val) {					\
-	assert(tsd->state != tsd_state_reincarnated);			\
+	assert(tsd->state != tsd_state_reincarnated &&			\
+	    tsd->state != tsd_state_minimal_initialized);		\
 	*tsd_##n##p_get(tsd) = val;					\
 }
 MALLOC_TSD
@@ -248,7 +251,7 @@
 }
 
 JEMALLOC_ALWAYS_INLINE tsd_t *
-tsd_fetch_impl(bool init, bool internal) {
+tsd_fetch_impl(bool init, bool minimal) {
 	tsd_t *tsd = tsd_get(init);
 
 	if (!init && tsd_get_allocates() && tsd == NULL) {
@@ -257,7 +260,7 @@
 	assert(tsd != NULL);
 
 	if (unlikely(tsd->state != tsd_state_nominal)) {
-		return tsd_fetch_slow(tsd, internal);
+		return tsd_fetch_slow(tsd, minimal);
 	}
 	assert(tsd_fast(tsd));
 	tsd_assert_fast(tsd);
@@ -265,9 +268,20 @@
 	return tsd;
 }
 
+/* Get a minimal TSD that requires no cleanup.  See comments in free(). */
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_fetch_min(void) {
+	return tsd_fetch_impl(true, true);
+}
+
+/* For internal background threads use only. */
 JEMALLOC_ALWAYS_INLINE tsd_t *
 tsd_internal_fetch(void) {
-	return tsd_fetch_impl(true, true);
+	tsd_t *tsd = tsd_fetch_min();
+	/* Use reincarnated state to prevent full initialization. */
+	tsd->state = tsd_state_reincarnated;
+
+	return tsd;
 }
 
 JEMALLOC_ALWAYS_INLINE tsd_t *
diff --git a/src/arena.c b/src/arena.c
index 0912df3..632fce5 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -61,7 +61,8 @@
  */
 
 static void arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena,
-    arena_decay_t *decay, extents_t *extents, bool all, size_t npages_limit);
+    arena_decay_t *decay, extents_t *extents, bool all, size_t npages_limit,
+    bool is_background_thread);
 static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena,
     bool is_background_thread, bool all);
 static void arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
@@ -378,7 +379,7 @@
 	if (arena_dirty_decay_ms_get(arena) == 0) {
 		arena_decay_dirty(tsdn, arena, false, true);
 	} else {
-		arena_background_thread_inactivity_check(tsdn, arena);
+		arena_background_thread_inactivity_check(tsdn, arena, false);
 	}
 }
 
@@ -687,10 +688,11 @@
 
 static void
 arena_decay_try_purge(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    extents_t *extents, size_t current_npages, size_t npages_limit) {
+    extents_t *extents, size_t current_npages, size_t npages_limit,
+    bool is_background_thread) {
 	if (current_npages > npages_limit) {
 		arena_decay_to_limit(tsdn, arena, decay, extents, false,
-		    npages_limit);
+		    npages_limit, is_background_thread);
 	}
 }
 
@@ -720,17 +722,19 @@
 
 static void
 arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    extents_t *extents, const nstime_t *time, bool purge) {
+    extents_t *extents, const nstime_t *time, bool is_background_thread) {
 	size_t current_npages = extents_npages_get(extents);
 	arena_decay_epoch_advance_helper(decay, time, current_npages);
 
 	size_t npages_limit = arena_decay_backlog_npages_limit(decay);
-	if (purge) {
-		arena_decay_try_purge(tsdn, arena, decay, extents,
-		    current_npages, npages_limit);
-	}
+	/* We may unlock decay->mtx when try_purge(). Finish logging first. */
 	decay->nunpurged = (npages_limit > current_npages) ? npages_limit :
 	    current_npages;
+
+	if (!background_thread_enabled() || is_background_thread) {
+		arena_decay_try_purge(tsdn, arena, decay, extents,
+		    current_npages, npages_limit, is_background_thread);
+	}
 }
 
 static void
@@ -794,7 +798,7 @@
 	if (decay_ms <= 0) {
 		if (decay_ms == 0) {
 			arena_decay_to_limit(tsdn, arena, decay, extents, false,
-			    0);
+			    0, is_background_thread);
 		}
 		return false;
 	}
@@ -829,14 +833,13 @@
 	 */
 	bool advance_epoch = arena_decay_deadline_reached(decay, &time);
 	if (advance_epoch) {
-		bool should_purge = is_background_thread ||
-		    !background_thread_enabled();
 		arena_decay_epoch_advance(tsdn, arena, decay, extents, &time,
-		    should_purge);
+		    is_background_thread);
 	} else if (is_background_thread) {
 		arena_decay_try_purge(tsdn, arena, decay, extents,
 		    extents_npages_get(extents),
-		    arena_decay_backlog_npages_limit(decay));
+		    arena_decay_backlog_npages_limit(decay),
+		    is_background_thread);
 	}
 
 	return advance_epoch;
@@ -915,7 +918,7 @@
 static size_t
 arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, arena_decay_t *decay, extents_t *extents,
-    bool all, extent_list_t *decay_extents) {
+    bool all, extent_list_t *decay_extents, bool is_background_thread) {
 	UNUSED size_t nmadvise, nunmapped;
 	size_t npurged;
 
@@ -945,7 +948,7 @@
 				extents_dalloc(tsdn, arena, r_extent_hooks,
 				    &arena->extents_muzzy, extent);
 				arena_background_thread_inactivity_check(tsdn,
-				    arena);
+				    arena, is_background_thread);
 				break;
 			}
 			/* Fall through. */
@@ -984,7 +987,8 @@
  */
 static void
 arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    extents_t *extents, bool all, size_t npages_limit) {
+    extents_t *extents, bool all, size_t npages_limit,
+    bool is_background_thread) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 1);
 	malloc_mutex_assert_owner(tsdn, &decay->mtx);
@@ -1004,7 +1008,8 @@
 	    npages_limit, &decay_extents);
 	if (npurge != 0) {
 		UNUSED size_t npurged = arena_decay_stashed(tsdn, arena,
-		    &extent_hooks, decay, extents, all, &decay_extents);
+		    &extent_hooks, decay, extents, all, &decay_extents,
+		    is_background_thread);
 		assert(npurged == npurge);
 	}
 
@@ -1017,7 +1022,8 @@
     extents_t *extents, bool is_background_thread, bool all) {
 	if (all) {
 		malloc_mutex_lock(tsdn, &decay->mtx);
-		arena_decay_to_limit(tsdn, arena, decay, extents, all, 0);
+		arena_decay_to_limit(tsdn, arena, decay, extents, all, 0,
+		    is_background_thread);
 		malloc_mutex_unlock(tsdn, &decay->mtx);
 
 		return false;
@@ -1251,7 +1257,7 @@
 	 * Destroy the base allocator, which manages all metadata ever mapped by
 	 * this arena.
 	 */
-	base_delete(arena->base);
+	base_delete(tsd_tsdn(tsd), arena->base);
 }
 
 static extent_t *
@@ -2045,7 +2051,7 @@
 		 * is done enough that we should have tsd.
 		 */
 		assert(!tsdn_null(tsdn));
-		pre_reentrancy(tsdn_tsd(tsdn));
+		pre_reentrancy(tsdn_tsd(tsdn), arena);
 		if (hooks_arena_new_hook) {
 			hooks_arena_new_hook();
 		}
@@ -2055,7 +2061,7 @@
 	return arena;
 label_error:
 	if (ind != 0) {
-		base_delete(base);
+		base_delete(tsdn, base);
 	}
 	return NULL;
 }
@@ -2081,28 +2087,33 @@
 
 void
 arena_prefork2(tsdn_t *tsdn, arena_t *arena) {
+	malloc_mutex_prefork(tsdn, &arena->extent_grow_mtx);
+}
+
+void
+arena_prefork3(tsdn_t *tsdn, arena_t *arena) {
 	extents_prefork(tsdn, &arena->extents_dirty);
 	extents_prefork(tsdn, &arena->extents_muzzy);
 	extents_prefork(tsdn, &arena->extents_retained);
 }
 
 void
-arena_prefork3(tsdn_t *tsdn, arena_t *arena) {
+arena_prefork4(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_prefork(tsdn, &arena->extent_avail_mtx);
 }
 
 void
-arena_prefork4(tsdn_t *tsdn, arena_t *arena) {
+arena_prefork5(tsdn_t *tsdn, arena_t *arena) {
 	base_prefork(tsdn, arena->base);
 }
 
 void
-arena_prefork5(tsdn_t *tsdn, arena_t *arena) {
+arena_prefork6(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_prefork(tsdn, &arena->large_mtx);
 }
 
 void
-arena_prefork6(tsdn_t *tsdn, arena_t *arena) {
+arena_prefork7(tsdn_t *tsdn, arena_t *arena) {
 	for (unsigned i = 0; i < NBINS; i++) {
 		malloc_mutex_prefork(tsdn, &arena->bins[i].lock);
 	}
@@ -2121,6 +2132,7 @@
 	extents_postfork_parent(tsdn, &arena->extents_dirty);
 	extents_postfork_parent(tsdn, &arena->extents_muzzy);
 	extents_postfork_parent(tsdn, &arena->extents_retained);
+	malloc_mutex_postfork_parent(tsdn, &arena->extent_grow_mtx);
 	malloc_mutex_postfork_parent(tsdn, &arena->decay_dirty.mtx);
 	malloc_mutex_postfork_parent(tsdn, &arena->decay_muzzy.mtx);
 	if (config_stats) {
@@ -2132,6 +2144,23 @@
 arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	unsigned i;
 
+	atomic_store_u(&arena->nthreads[0], 0, ATOMIC_RELAXED);
+	atomic_store_u(&arena->nthreads[1], 0, ATOMIC_RELAXED);
+	if (tsd_arena_get(tsdn_tsd(tsdn)) == arena) {
+		arena_nthreads_inc(arena, false);
+	}
+	if (tsd_iarena_get(tsdn_tsd(tsdn)) == arena) {
+		arena_nthreads_inc(arena, true);
+	}
+	if (config_stats) {
+		ql_new(&arena->tcache_ql);
+		tcache_t *tcache = tcache_get(tsdn_tsd(tsdn));
+		if (tcache != NULL && tcache->arena == arena) {
+			ql_elm_new(tcache, link);
+			ql_tail_insert(&arena->tcache_ql, tcache, link);
+		}
+	}
+
 	for (i = 0; i < NBINS; i++) {
 		malloc_mutex_postfork_child(tsdn, &arena->bins[i].lock);
 	}
@@ -2141,6 +2170,7 @@
 	extents_postfork_child(tsdn, &arena->extents_dirty);
 	extents_postfork_child(tsdn, &arena->extents_muzzy);
 	extents_postfork_child(tsdn, &arena->extents_retained);
+	malloc_mutex_postfork_child(tsdn, &arena->extent_grow_mtx);
 	malloc_mutex_postfork_child(tsdn, &arena->decay_dirty.mtx);
 	malloc_mutex_postfork_child(tsdn, &arena->decay_muzzy.mtx);
 	if (config_stats) {
diff --git a/src/background_thread.c b/src/background_thread.c
index a7403b8..eb30eb5 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -20,6 +20,9 @@
 /* Thread info per-index. */
 background_thread_info_t *background_thread_info;
 
+/* False if no necessary runtime support. */
+bool can_enable_background_thread;
+
 /******************************************************************************/
 
 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
@@ -313,7 +316,7 @@
 		    &background_thread_lock);
 	}
 
-	pre_reentrancy(tsd);
+	pre_reentrancy(tsd, NULL);
 	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
 	bool has_thread;
 	assert(info->state != background_thread_paused);
@@ -344,6 +347,38 @@
 
 static void *background_thread_entry(void *ind_arg);
 
+static int
+background_thread_create_signals_masked(pthread_t *thread,
+    const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg) {
+	/*
+	 * Mask signals during thread creation so that the thread inherits
+	 * an empty signal set.
+	 */
+	sigset_t set;
+	sigfillset(&set);
+	sigset_t oldset;
+	int mask_err = pthread_sigmask(SIG_SETMASK, &set, &oldset);
+	if (mask_err != 0) {
+		return mask_err;
+	}
+	int create_err = pthread_create_wrapper(thread, attr, start_routine,
+	    arg);
+	/*
+	 * Restore the signal mask.  Failure to restore the signal mask here
+	 * changes program behavior.
+	 */
+	int restore_err = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
+	if (restore_err != 0) {
+		malloc_printf("<jemalloc>: background thread creation "
+		    "failed (%d), and signal mask restoration failed "
+		    "(%d)\n", create_err, restore_err);
+		if (opt_abort) {
+			abort();
+		}
+	}
+	return create_err;
+}
+
 static void
 check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
     bool *created_threads) {
@@ -373,9 +408,9 @@
 		 */
 		malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
 
-		pre_reentrancy(tsd);
-		int err = pthread_create_wrapper(&info->thread, NULL,
-		    background_thread_entry, (void *)(uintptr_t)i);
+		pre_reentrancy(tsd, NULL);
+		int err = background_thread_create_signals_masked(&info->thread,
+		    NULL, background_thread_entry, (void *)(uintptr_t)i);
 		post_reentrancy(tsd);
 
 		if (err == 0) {
@@ -464,7 +499,9 @@
 background_thread_entry(void *ind_arg) {
 	unsigned thread_ind = (unsigned)(uintptr_t)ind_arg;
 	assert(thread_ind < ncpus);
-
+#ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
+	pthread_setname_np(pthread_self(), "jemalloc_bg_thd");
+#endif
 	if (opt_percpu_arena != percpu_arena_disabled) {
 		set_current_thread_affinity((int)thread_ind);
 	}
@@ -520,12 +557,12 @@
 		return false;
 	}
 
-	pre_reentrancy(tsd);
+	pre_reentrancy(tsd, NULL);
 	/*
 	 * To avoid complications (besides reentrancy), create internal
 	 * background threads with the underlying pthread_create.
 	 */
-	int err = pthread_create_wrapper(&info->thread, NULL,
+	int err = background_thread_create_signals_masked(&info->thread, NULL,
 	    background_thread_entry, (void *)thread_ind);
 	post_reentrancy(tsd);
 
@@ -785,9 +822,14 @@
 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
 	pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
 	if (pthread_create_fptr == NULL) {
-		malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
-		    "\"pthread_create\")\n");
-		abort();
+		can_enable_background_thread = false;
+		if (config_lazy_lock || opt_background_thread) {
+			malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
+			    "\"pthread_create\")\n");
+			abort();
+		}
+	} else {
+		can_enable_background_thread = true;
 	}
 #endif
 	return false;
diff --git a/src/base.c b/src/base.c
index 8e1544f..97078b1 100644
--- a/src/base.c
+++ b/src/base.c
@@ -15,7 +15,7 @@
 /******************************************************************************/
 
 static void *
-base_map(extent_hooks_t *extent_hooks, unsigned ind, size_t size) {
+base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size) {
 	void *addr;
 	bool zero = true;
 	bool commit = true;
@@ -25,15 +25,19 @@
 	if (extent_hooks == &extent_hooks_default) {
 		addr = extent_alloc_mmap(NULL, size, PAGE, &zero, &commit);
 	} else {
+		/* No arena context as we are creating new arenas. */
+		tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
+		pre_reentrancy(tsd, NULL);
 		addr = extent_hooks->alloc(extent_hooks, NULL, size, PAGE,
 		    &zero, &commit, ind);
+		post_reentrancy(tsd);
 	}
 
 	return addr;
 }
 
 static void
-base_unmap(extent_hooks_t *extent_hooks, unsigned ind, void *addr,
+base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr,
     size_t size) {
 	/*
 	 * Cascade through dalloc, decommit, purge_forced, and purge_lazy,
@@ -61,27 +65,32 @@
 		/* Nothing worked.  This should never happen. */
 		not_reached();
 	} else {
+		tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
+		pre_reentrancy(tsd, NULL);
 		if (extent_hooks->dalloc != NULL &&
 		    !extent_hooks->dalloc(extent_hooks, addr, size, true,
 		    ind)) {
-			return;
+			goto label_done;
 		}
 		if (extent_hooks->decommit != NULL &&
 		    !extent_hooks->decommit(extent_hooks, addr, size, 0, size,
 		    ind)) {
-			return;
+			goto label_done;
 		}
 		if (extent_hooks->purge_forced != NULL &&
 		    !extent_hooks->purge_forced(extent_hooks, addr, size, 0,
 		    size, ind)) {
-			return;
+			goto label_done;
 		}
 		if (extent_hooks->purge_lazy != NULL &&
 		    !extent_hooks->purge_lazy(extent_hooks, addr, size, 0, size,
 		    ind)) {
-			return;
+			goto label_done;
 		}
 		/* Nothing worked.  That's the application's problem. */
+	label_done:
+		post_reentrancy(tsd);
+		return;
 	}
 }
 
@@ -157,7 +166,7 @@
  * On success a pointer to the initialized base_block_t header is returned.
  */
 static base_block_t *
-base_block_alloc(extent_hooks_t *extent_hooks, unsigned ind,
+base_block_alloc(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind,
     pszind_t *pind_last, size_t *extent_sn_next, size_t size,
     size_t alignment) {
 	alignment = ALIGNMENT_CEILING(alignment, QUANTUM);
@@ -179,7 +188,7 @@
 	size_t next_block_size = HUGEPAGE_CEILING(sz_pind2sz(pind_next));
 	size_t block_size = (min_block_size > next_block_size) ? min_block_size
 	    : next_block_size;
-	base_block_t *block = (base_block_t *)base_map(extent_hooks, ind,
+	base_block_t *block = (base_block_t *)base_map(tsdn, extent_hooks, ind,
 	    block_size);
 	if (block == NULL) {
 		return NULL;
@@ -207,8 +216,9 @@
 	 * called.
 	 */
 	malloc_mutex_unlock(tsdn, &base->mtx);
-	base_block_t *block = base_block_alloc(extent_hooks, base_ind_get(base),
-	    &base->pind_last, &base->extent_sn_next, size, alignment);
+	base_block_t *block = base_block_alloc(tsdn, extent_hooks,
+	    base_ind_get(base), &base->pind_last, &base->extent_sn_next, size,
+	    alignment);
 	malloc_mutex_lock(tsdn, &base->mtx);
 	if (block == NULL) {
 		return NULL;
@@ -234,8 +244,8 @@
 base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	pszind_t pind_last = 0;
 	size_t extent_sn_next = 0;
-	base_block_t *block = base_block_alloc(extent_hooks, ind, &pind_last,
-	    &extent_sn_next, sizeof(base_t), QUANTUM);
+	base_block_t *block = base_block_alloc(tsdn, extent_hooks, ind,
+	    &pind_last, &extent_sn_next, sizeof(base_t), QUANTUM);
 	if (block == NULL) {
 		return NULL;
 	}
@@ -249,7 +259,7 @@
 	atomic_store_p(&base->extent_hooks, extent_hooks, ATOMIC_RELAXED);
 	if (malloc_mutex_init(&base->mtx, "base", WITNESS_RANK_BASE,
 	    malloc_mutex_rank_exclusive)) {
-		base_unmap(extent_hooks, ind, block, block->size);
+		base_unmap(tsdn, extent_hooks, ind, block, block->size);
 		return NULL;
 	}
 	base->pind_last = pind_last;
@@ -272,13 +282,13 @@
 }
 
 void
-base_delete(base_t *base) {
+base_delete(tsdn_t *tsdn, base_t *base) {
 	extent_hooks_t *extent_hooks = base_extent_hooks_get(base);
 	base_block_t *next = base->blocks;
 	do {
 		base_block_t *block = next;
 		next = block->next;
-		base_unmap(extent_hooks, base_ind_get(base), block,
+		base_unmap(tsdn, extent_hooks, base_ind_get(base), block,
 		    block->size);
 	} while (next != NULL);
 }
diff --git a/src/ctl.c b/src/ctl.c
index b3ae4aa..36bc8fb 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -622,7 +622,7 @@
 }
 
 static ctl_arena_t *
-arenas_i_impl(tsdn_t *tsdn, size_t i, bool compat, bool init) {
+arenas_i_impl(tsd_t *tsd, size_t i, bool compat, bool init) {
 	ctl_arena_t *ret;
 
 	assert(!compat || !init);
@@ -635,15 +635,15 @@
 				ctl_arena_stats_t	astats;
 			};
 			struct container_s *cont =
-			    (struct container_s *)base_alloc(tsdn, b0get(),
-			    sizeof(struct container_s), QUANTUM);
+			    (struct container_s *)base_alloc(tsd_tsdn(tsd),
+			    b0get(), sizeof(struct container_s), QUANTUM);
 			if (cont == NULL) {
 				return NULL;
 			}
 			ret = &cont->ctl_arena;
 			ret->astats = &cont->astats;
 		} else {
-			ret = (ctl_arena_t *)base_alloc(tsdn, b0get(),
+			ret = (ctl_arena_t *)base_alloc(tsd_tsdn(tsd), b0get(),
 			    sizeof(ctl_arena_t), QUANTUM);
 			if (ret == NULL) {
 				return NULL;
@@ -659,7 +659,7 @@
 
 static ctl_arena_t *
 arenas_i(size_t i) {
-	ctl_arena_t *ret = arenas_i_impl(TSDN_NULL, i, true, false);
+	ctl_arena_t *ret = arenas_i_impl(tsd_fetch(), i, true, false);
 	assert(ret != NULL);
 	return ret;
 }
@@ -863,7 +863,7 @@
 }
 
 static unsigned
-ctl_arena_init(tsdn_t *tsdn, extent_hooks_t *extent_hooks) {
+ctl_arena_init(tsd_t *tsd, extent_hooks_t *extent_hooks) {
 	unsigned arena_ind;
 	ctl_arena_t *ctl_arena;
 
@@ -876,12 +876,12 @@
 	}
 
 	/* Trigger stats allocation. */
-	if (arenas_i_impl(tsdn, arena_ind, false, true) == NULL) {
+	if (arenas_i_impl(tsd, arena_ind, false, true) == NULL) {
 		return UINT_MAX;
 	}
 
 	/* Initialize new arena. */
-	if (arena_init(tsdn, arena_ind, extent_hooks) == NULL) {
+	if (arena_init(tsd_tsdn(tsd), arena_ind, extent_hooks) == NULL) {
 		return UINT_MAX;
 	}
 
@@ -975,8 +975,9 @@
 }
 
 static bool
-ctl_init(tsdn_t *tsdn) {
+ctl_init(tsd_t *tsd) {
 	bool ret;
+	tsdn_t *tsdn = tsd_tsdn(tsd);
 
 	malloc_mutex_lock(tsdn, &ctl_mtx);
 	if (!ctl_initialized) {
@@ -1010,14 +1011,14 @@
 		 * here rather than doing it lazily elsewhere, in order
 		 * to limit when OOM-caused errors can occur.
 		 */
-		if ((ctl_sarena = arenas_i_impl(tsdn, MALLCTL_ARENAS_ALL, false,
+		if ((ctl_sarena = arenas_i_impl(tsd, MALLCTL_ARENAS_ALL, false,
 		    true)) == NULL) {
 			ret = true;
 			goto label_return;
 		}
 		ctl_sarena->initialized = true;
 
-		if ((ctl_darena = arenas_i_impl(tsdn, MALLCTL_ARENAS_DESTROYED,
+		if ((ctl_darena = arenas_i_impl(tsd, MALLCTL_ARENAS_DESTROYED,
 		    false, true)) == NULL) {
 			ret = true;
 			goto label_return;
@@ -1031,7 +1032,7 @@
 
 		ctl_arenas->narenas = narenas_total_get();
 		for (i = 0; i < ctl_arenas->narenas; i++) {
-			if (arenas_i_impl(tsdn, i, false, true) == NULL) {
+			if (arenas_i_impl(tsd, i, false, true) == NULL) {
 				ret = true;
 				goto label_return;
 			}
@@ -1156,7 +1157,7 @@
 	size_t mib[CTL_MAX_DEPTH];
 	const ctl_named_node_t *node;
 
-	if (!ctl_initialized && ctl_init(tsd_tsdn(tsd))) {
+	if (!ctl_initialized && ctl_init(tsd)) {
 		ret = EAGAIN;
 		goto label_return;
 	}
@@ -1180,15 +1181,15 @@
 }
 
 int
-ctl_nametomib(tsdn_t *tsdn, const char *name, size_t *mibp, size_t *miblenp) {
+ctl_nametomib(tsd_t *tsd, const char *name, size_t *mibp, size_t *miblenp) {
 	int ret;
 
-	if (!ctl_initialized && ctl_init(tsdn)) {
+	if (!ctl_initialized && ctl_init(tsd)) {
 		ret = EAGAIN;
 		goto label_return;
 	}
 
-	ret = ctl_lookup(tsdn, name, NULL, mibp, miblenp);
+	ret = ctl_lookup(tsd_tsdn(tsd), name, NULL, mibp, miblenp);
 label_return:
 	return(ret);
 }
@@ -1200,7 +1201,7 @@
 	const ctl_named_node_t *node;
 	size_t i;
 
-	if (!ctl_initialized && ctl_init(tsd_tsdn(tsd))) {
+	if (!ctl_initialized && ctl_init(tsd)) {
 		ret = EAGAIN;
 		goto label_return;
 	}
@@ -1522,6 +1523,13 @@
 
 		background_thread_enabled_set(tsd_tsdn(tsd), newval);
 		if (newval) {
+			if (!can_enable_background_thread) {
+				malloc_printf("<jemalloc>: Error in dlsym("
+			            "RTLD_NEXT, \"pthread_create\"). Cannot "
+				    "enable background_thread\n");
+				ret = EFAULT;
+				goto label_return;
+			}
 			if (background_threads_enable(tsd)) {
 				ret = EFAULT;
 				goto label_return;
@@ -1689,7 +1697,7 @@
 	READONLY();
 	WRITEONLY();
 
-	tcache_flush();
+	tcache_flush(tsd);
 
 	ret = 0;
 label_return:
@@ -1963,7 +1971,7 @@
 			unsigned ind = arena_ind % ncpus;
 			background_thread_info_t *info =
 			    &background_thread_info[ind];
-			assert(info->state = background_thread_paused);
+			assert(info->state == background_thread_paused);
 			malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
 			info->state = background_thread_started;
 			malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
@@ -2305,8 +2313,7 @@
 
 	extent_hooks = (extent_hooks_t *)&extent_hooks_default;
 	WRITE(extent_hooks, extent_hooks_t *);
-	if ((arena_ind = ctl_arena_init(tsd_tsdn(tsd), extent_hooks)) ==
-	    UINT_MAX) {
+	if ((arena_ind = ctl_arena_init(tsd, extent_hooks)) == UINT_MAX) {
 		ret = EAGAIN;
 		goto label_return;
 	}
diff --git a/src/extent.c b/src/extent.c
index 386a7ce..fa45c84 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1025,6 +1025,18 @@
 	    alignment, zero, commit);
 }
 
+static void
+extent_hook_pre_reentrancy(tsdn_t *tsdn, arena_t *arena) {
+	tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
+	pre_reentrancy(tsd, arena);
+}
+
+static void
+extent_hook_post_reentrancy(tsdn_t *tsdn) {
+	tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
+	post_reentrancy(tsd);
+}
+
 /*
  * If virtual memory is retained, create increasingly larger extents from which
  * to split requested extents in order to limit the total number of disjoint
@@ -1066,9 +1078,20 @@
 	}
 	bool zeroed = false;
 	bool committed = false;
-	void *ptr = extent_alloc_core(tsdn, arena, NULL, alloc_size, PAGE,
-	    &zeroed, &committed, (dss_prec_t)atomic_load_u(&arena->dss_prec,
-	    ATOMIC_RELAXED));
+
+	void *ptr;
+	if (*r_extent_hooks == &extent_hooks_default) {
+		ptr = extent_alloc_core(tsdn, arena, NULL, alloc_size, PAGE,
+		    &zeroed, &committed, (dss_prec_t)atomic_load_u(
+		    &arena->dss_prec, ATOMIC_RELAXED));
+	} else {
+		extent_hook_pre_reentrancy(tsdn, arena);
+		ptr = (*r_extent_hooks)->alloc(*r_extent_hooks, NULL,
+		    alloc_size, PAGE, &zeroed, &committed,
+		    arena_ind_get(arena));
+		extent_hook_post_reentrancy(tsdn);
+	}
+
 	extent_init(extent, arena, ptr, alloc_size, false, NSIZES,
 	    arena_extent_sn_next(arena), extent_state_active, zeroed,
 	    committed);
@@ -1238,8 +1261,10 @@
 		addr = extent_alloc_default_impl(tsdn, arena, new_addr, esize,
 		    alignment, zero, commit);
 	} else {
+		extent_hook_pre_reentrancy(tsdn, arena);
 		addr = (*r_extent_hooks)->alloc(*r_extent_hooks, new_addr,
 		    esize, alignment, zero, commit, arena_ind_get(arena));
+		extent_hook_post_reentrancy(tsdn);
 	}
 	if (addr == NULL) {
 		extent_dalloc(tsdn, arena, extent);
@@ -1477,10 +1502,12 @@
 		err = extent_dalloc_default_impl(extent_base_get(extent),
 		    extent_size_get(extent));
 	} else {
+		extent_hook_pre_reentrancy(tsdn, arena);
 		err = ((*r_extent_hooks)->dalloc == NULL ||
 		    (*r_extent_hooks)->dalloc(*r_extent_hooks,
 		    extent_base_get(extent), extent_size_get(extent),
 		    extent_committed_get(extent), arena_ind_get(arena)));
+		extent_hook_post_reentrancy(tsdn);
 	}
 
 	if (!err) {
@@ -1506,6 +1533,9 @@
 	}
 
 	extent_reregister(tsdn, extent);
+	if (*r_extent_hooks != &extent_hooks_default) {
+		extent_hook_pre_reentrancy(tsdn, arena);
+	}
 	/* Try to decommit; purge if that fails. */
 	bool zeroed;
 	if (!extent_committed_get(extent)) {
@@ -1527,6 +1557,9 @@
 	} else {
 		zeroed = false;
 	}
+	if (*r_extent_hooks != &extent_hooks_default) {
+		extent_hook_post_reentrancy(tsdn);
+	}
 	extent_zeroed_set(extent, zeroed);
 
 	if (config_prof) {
@@ -1570,9 +1603,11 @@
 		extent_destroy_default_impl(extent_base_get(extent),
 		    extent_size_get(extent));
 	} else if ((*r_extent_hooks)->destroy != NULL) {
+		extent_hook_pre_reentrancy(tsdn, arena);
 		(*r_extent_hooks)->destroy(*r_extent_hooks,
 		    extent_base_get(extent), extent_size_get(extent),
 		    extent_committed_get(extent), arena_ind_get(arena));
+		extent_hook_post_reentrancy(tsdn);
 	}
 
 	extent_dalloc(tsdn, arena, extent);
@@ -1593,9 +1628,15 @@
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
+	if (*r_extent_hooks != &extent_hooks_default) {
+		extent_hook_pre_reentrancy(tsdn, arena);
+	}
 	bool err = ((*r_extent_hooks)->commit == NULL ||
 	    (*r_extent_hooks)->commit(*r_extent_hooks, extent_base_get(extent),
 	    extent_size_get(extent), offset, length, arena_ind_get(arena)));
+	if (*r_extent_hooks != &extent_hooks_default) {
+		extent_hook_post_reentrancy(tsdn);
+	}
 	extent_committed_set(extent, extent_committed_get(extent) || !err);
 	return err;
 }
@@ -1624,10 +1665,16 @@
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
+	if (*r_extent_hooks != &extent_hooks_default) {
+		extent_hook_pre_reentrancy(tsdn, arena);
+	}
 	bool err = ((*r_extent_hooks)->decommit == NULL ||
 	    (*r_extent_hooks)->decommit(*r_extent_hooks,
 	    extent_base_get(extent), extent_size_get(extent), offset, length,
 	    arena_ind_get(arena)));
+	if (*r_extent_hooks != &extent_hooks_default) {
+		extent_hook_post_reentrancy(tsdn);
+	}
 	extent_committed_set(extent, extent_committed_get(extent) && err);
 	return err;
 }
@@ -1654,10 +1701,21 @@
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
-	return ((*r_extent_hooks)->purge_lazy == NULL ||
-	    (*r_extent_hooks)->purge_lazy(*r_extent_hooks,
+
+	if ((*r_extent_hooks)->purge_lazy == NULL) {
+		return true;
+	}
+	if (*r_extent_hooks != &extent_hooks_default) {
+		extent_hook_pre_reentrancy(tsdn, arena);
+	}
+	bool err = (*r_extent_hooks)->purge_lazy(*r_extent_hooks,
 	    extent_base_get(extent), extent_size_get(extent), offset, length,
-	    arena_ind_get(arena)));
+	    arena_ind_get(arena));
+	if (*r_extent_hooks != &extent_hooks_default) {
+		extent_hook_post_reentrancy(tsdn);
+	}
+
+	return err;
 }
 
 bool
@@ -1690,10 +1748,20 @@
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
-	return ((*r_extent_hooks)->purge_forced == NULL ||
-	    (*r_extent_hooks)->purge_forced(*r_extent_hooks,
+
+	if ((*r_extent_hooks)->purge_forced == NULL) {
+		return true;
+	}
+	if (*r_extent_hooks != &extent_hooks_default) {
+		extent_hook_pre_reentrancy(tsdn, arena);
+	}
+	bool err = (*r_extent_hooks)->purge_forced(*r_extent_hooks,
 	    extent_base_get(extent), extent_size_get(extent), offset, length,
-	    arena_ind_get(arena)));
+	    arena_ind_get(arena));
+	if (*r_extent_hooks != &extent_hooks_default) {
+		extent_hook_post_reentrancy(tsdn);
+	}
+	return err;
 }
 
 bool
@@ -1762,9 +1830,16 @@
 
 	extent_lock2(tsdn, extent, trail);
 
-	if ((*r_extent_hooks)->split(*r_extent_hooks, extent_base_get(extent),
+	if (*r_extent_hooks != &extent_hooks_default) {
+		extent_hook_pre_reentrancy(tsdn, arena);
+	}
+	bool err = (*r_extent_hooks)->split(*r_extent_hooks, extent_base_get(extent),
 	    size_a + size_b, size_a, size_b, extent_committed_get(extent),
-	    arena_ind_get(arena))) {
+	    arena_ind_get(arena));
+	if (*r_extent_hooks != &extent_hooks_default) {
+		extent_hook_post_reentrancy(tsdn);
+	}
+	if (err) {
 		goto label_error_c;
 	}
 
@@ -1834,10 +1909,12 @@
 		err = extent_merge_default_impl(extent_base_get(a),
 		    extent_base_get(b));
 	} else {
+		extent_hook_pre_reentrancy(tsdn, arena);
 		err = (*r_extent_hooks)->merge(*r_extent_hooks,
 		    extent_base_get(a), extent_size_get(a), extent_base_get(b),
 		    extent_size_get(b), extent_committed_get(a),
 		    arena_ind_get(arena));
+		extent_hook_post_reentrancy(tsdn);
 	}
 
 	if (err) {
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 52c86aa..0ee8ad4 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1476,7 +1476,7 @@
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &init_lock);
 	/* Set reentrancy level to 1 during init. */
-	pre_reentrancy(tsd);
+	pre_reentrancy(tsd, NULL);
 	/* Initialize narenas before prof_boot2 (for allocation). */
 	if (malloc_init_narenas() || background_thread_boot1(tsd_tsdn(tsd))) {
 		UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
@@ -1799,7 +1799,7 @@
 		 */
 		assert(dopts->tcache_ind == TCACHE_IND_AUTOMATIC ||
 		    dopts->tcache_ind == TCACHE_IND_NONE);
-		assert(dopts->arena_ind = ARENA_IND_AUTOMATIC);
+		assert(dopts->arena_ind == ARENA_IND_AUTOMATIC);
 		dopts->tcache_ind = TCACHE_IND_NONE;
 		/* We know that arena 0 has already been initialized. */
 		dopts->arena_ind = 0;
@@ -2264,7 +2264,15 @@
 je_free(void *ptr) {
 	UTRACE(ptr, 0, 0);
 	if (likely(ptr != NULL)) {
-		tsd_t *tsd = tsd_fetch();
+		/*
+		 * We avoid setting up tsd fully (e.g. tcache, arena binding)
+		 * based on only free() calls -- other activities trigger the
+		 * minimal to full transition.  This is because free() may
+		 * happen during thread shutdown after tls deallocation: if a
+		 * thread never had any malloc activities until then, a
+		 * fully-setup tsd won't be destructed properly.
+		 */
+		tsd_t *tsd = tsd_fetch_min();
 		check_entry_exit_locking(tsd_tsdn(tsd));
 
 		tcache_t *tcache;
@@ -2910,16 +2918,15 @@
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) {
 	int ret;
-	tsdn_t *tsdn;
 
 	if (unlikely(malloc_init())) {
 		return EAGAIN;
 	}
 
-	tsdn = tsdn_fetch();
-	check_entry_exit_locking(tsdn);
-	ret = ctl_nametomib(tsdn, name, mibp, miblenp);
-	check_entry_exit_locking(tsdn);
+	tsd_t *tsd = tsd_fetch();
+	check_entry_exit_locking(tsd_tsdn(tsd));
+	ret = ctl_nametomib(tsd, name, mibp, miblenp);
+	check_entry_exit_locking(tsd_tsdn(tsd));
 	return ret;
 }
 
@@ -3042,7 +3049,7 @@
 		background_thread_prefork1(tsd_tsdn(tsd));
 	}
 	/* Break arena prefork into stages to preserve lock order. */
-	for (i = 0; i < 7; i++) {
+	for (i = 0; i < 8; i++) {
 		for (j = 0; j < narenas; j++) {
 			if ((arena = arena_get(tsd_tsdn(tsd), j, false)) !=
 			    NULL) {
@@ -3068,6 +3075,9 @@
 				case 6:
 					arena_prefork6(tsd_tsdn(tsd), arena);
 					break;
+				case 7:
+					arena_prefork7(tsd_tsdn(tsd), arena);
+					break;
 				default: not_reached();
 				}
 			}
diff --git a/src/prof.c b/src/prof.c
index 61dfa2c..975722c 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -1633,7 +1633,7 @@
 		return true;
 	}
 
-	pre_reentrancy(tsd);
+	pre_reentrancy(tsd, NULL);
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
 
 	prof_gctx_tree_t gctxs;
diff --git a/src/tcache.c b/src/tcache.c
index 6355805..936ef31 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -474,8 +474,7 @@
 }
 
 void
-tcache_flush(void) {
-	tsd_t *tsd = tsd_fetch();
+tcache_flush(tsd_t *tsd) {
 	assert(tcache_available(tsd));
 	tcache_flush_cache(tsd, tsd_tcachep_get(tsd));
 }
diff --git a/src/tsd.c b/src/tsd.c
index 9733033..f968992 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -87,7 +87,8 @@
 
 static bool
 tsd_data_init_nocleanup(tsd_t *tsd) {
-	assert(tsd->state == tsd_state_reincarnated);
+	assert(tsd->state == tsd_state_reincarnated ||
+	    tsd->state == tsd_state_minimal_initialized);
 	/*
 	 * During reincarnation, there is no guarantee that the cleanup function
 	 * will be called (deallocation may happen after all tsd destructors).
@@ -103,15 +104,8 @@
 }
 
 tsd_t *
-tsd_fetch_slow(tsd_t *tsd, bool internal) {
-	if (internal) {
-		/* For internal background threads use only. */
-		assert(tsd->state == tsd_state_uninitialized);
-		tsd->state = tsd_state_reincarnated;
-		tsd_set(tsd);
-		tsd_data_init_nocleanup(tsd);
-		return tsd;
-	}
+tsd_fetch_slow(tsd_t *tsd, bool minimal) {
+	assert(!tsd_fast(tsd));
 
 	if (tsd->state == tsd_state_nominal_slow) {
 		/* On slow path but no work needed. */
@@ -119,11 +113,28 @@
 		    tsd_reentrancy_level_get(tsd) > 0 ||
 		    *tsd_arenas_tdata_bypassp_get(tsd));
 	} else if (tsd->state == tsd_state_uninitialized) {
-		tsd->state = tsd_state_nominal;
-		tsd_slow_update(tsd);
-		/* Trigger cleanup handler registration. */
-		tsd_set(tsd);
-		tsd_data_init(tsd);
+		if (!minimal) {
+			tsd->state = tsd_state_nominal;
+			tsd_slow_update(tsd);
+			/* Trigger cleanup handler registration. */
+			tsd_set(tsd);
+			tsd_data_init(tsd);
+		} else {
+			tsd->state = tsd_state_minimal_initialized;
+			tsd_set(tsd);
+			tsd_data_init_nocleanup(tsd);
+		}
+	} else if (tsd->state == tsd_state_minimal_initialized) {
+		if (!minimal) {
+			/* Switch to fully initialized. */
+			tsd->state = tsd_state_nominal;
+			assert(*tsd_reentrancy_levelp_get(tsd) >= 1);
+			(*tsd_reentrancy_levelp_get(tsd))--;
+			tsd_slow_update(tsd);
+			tsd_data_init(tsd);
+		} else {
+			assert_tsd_data_cleanup_done(tsd);
+		}
 	} else if (tsd->state == tsd_state_purgatory) {
 		tsd->state = tsd_state_reincarnated;
 		tsd_set(tsd);
@@ -197,6 +208,9 @@
 	case tsd_state_uninitialized:
 		/* Do nothing. */
 		break;
+	case tsd_state_minimal_initialized:
+		/* This implies the thread only did free() in its life time. */
+		/* Fall through. */
 	case tsd_state_reincarnated:
 		/*
 		 * Reincarnated means another destructor deallocated memory
diff --git a/test/integration/extent.c b/test/integration/extent.c
index 7262b80..1dcf217 100644
--- a/test/integration/extent.c
+++ b/test/integration/extent.c
@@ -39,10 +39,13 @@
 	assert_d_eq(mallctlnametomib("arena.0.purge", purge_mib, &purge_miblen),
 	    0, "Unexpected mallctlnametomib() failure");
 	purge_mib[1] = (size_t)arena_ind;
+	called_alloc = false;
+	try_alloc = true;
 	try_dalloc = false;
 	try_decommit = false;
 	p = mallocx(large0 * 2, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
+	assert_true(called_alloc, "Expected alloc call");
 	called_dalloc = false;
 	called_decommit = false;
 	did_purge_lazy = false;
diff --git a/test/unit/base.c b/test/unit/base.c
index 5dc42f0..7fa24ac 100644
--- a/test/unit/base.c
+++ b/test/unit/base.c
@@ -27,11 +27,10 @@
 };
 
 TEST_BEGIN(test_base_hooks_default) {
-	tsdn_t *tsdn;
 	base_t *base;
 	size_t allocated0, allocated1, resident, mapped;
 
-	tsdn = tsdn_fetch();
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 	base = base_new(tsdn, 0, (extent_hooks_t *)&extent_hooks_default);
 
 	if (config_stats) {
@@ -49,13 +48,12 @@
 		    "At least 42 bytes were allocated by base_alloc()");
 	}
 
-	base_delete(base);
+	base_delete(tsdn, base);
 }
 TEST_END
 
 TEST_BEGIN(test_base_hooks_null) {
 	extent_hooks_t hooks_orig;
-	tsdn_t *tsdn;
 	base_t *base;
 	size_t allocated0, allocated1, resident, mapped;
 
@@ -68,7 +66,7 @@
 	memcpy(&hooks_orig, &hooks, sizeof(extent_hooks_t));
 	memcpy(&hooks, &hooks_null, sizeof(extent_hooks_t));
 
-	tsdn = tsdn_fetch();
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 	base = base_new(tsdn, 0, &hooks);
 	assert_ptr_not_null(base, "Unexpected base_new() failure");
 
@@ -87,7 +85,7 @@
 		    "At least 42 bytes were allocated by base_alloc()");
 	}
 
-	base_delete(base);
+	base_delete(tsdn, base);
 
 	memcpy(&hooks, &hooks_orig, sizeof(extent_hooks_t));
 }
@@ -95,7 +93,6 @@
 
 TEST_BEGIN(test_base_hooks_not_null) {
 	extent_hooks_t hooks_orig;
-	tsdn_t *tsdn;
 	base_t *base;
 	void *p, *q, *r, *r_exp;
 
@@ -108,7 +105,7 @@
 	memcpy(&hooks_orig, &hooks, sizeof(extent_hooks_t));
 	memcpy(&hooks, &hooks_not_null, sizeof(extent_hooks_t));
 
-	tsdn = tsdn_fetch();
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 	did_alloc = false;
 	base = base_new(tsdn, 0, &hooks);
 	assert_ptr_not_null(base, "Unexpected base_new() failure");
@@ -200,7 +197,7 @@
 
 	called_dalloc = called_destroy = called_decommit = called_purge_lazy =
 	    called_purge_forced = false;
-	base_delete(base);
+	base_delete(tsdn, base);
 	assert_true(called_dalloc, "Expected dalloc call");
 	assert_true(!called_destroy, "Unexpected destroy call");
 	assert_true(called_decommit, "Expected decommit call");