Merge branch 'dev'
diff --git a/jemalloc/ChangeLog b/jemalloc/ChangeLog
index fc2f894..d042982 100644
--- a/jemalloc/ChangeLog
+++ b/jemalloc/ChangeLog
@@ -6,15 +6,21 @@
     http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git
     git://canonware.com/jemalloc.git
 
-* 2.1.1
+* 2.1.2 (March 2, 2011)
 
-  Bug Fixes:
+  Bug fixes:
+  - Fix "thread.{de,}allocatedp" mallctl for OS X.
+  - Add missing jemalloc.a to build system.
+
+* 2.1.1 (January 31, 2011)
+
+  Bug fixes:
   - Fix aligned huge reallocation (affected allocm()).
   - Fix the ALLOCM_LG_ALIGN macro definition.
   - Fix a heap dumping deadlock.
   - Fix a "thread.arena" mallctl bug.
 
-* 2.1.0
+* 2.1.0 (December 3, 2010)
 
   This version incorporates some optimizations that can't quite be considered
   bug fixes.
@@ -31,7 +37,7 @@
     --enable-debug --enable-dss configurations.
   - Fix a minor statistics bug for mallctl("swap.avail", ...).
 
-* 2.0.1
+* 2.0.1 (October 29, 2010)
 
   Bug fixes:
   - Fix a race condition in heap profiling that could cause undefined behavior
@@ -40,7 +46,7 @@
     code.
   - Fix a compilation error for non-C99 builds.
 
-* 2.0.0
+* 2.0.0 (October 24, 2010)
 
   This version focuses on the experimental *allocm() API, and on improved
   run-time configuration/introspection.  Nonetheless, numerous performance
@@ -92,7 +98,7 @@
   - Fix a heap profiling bug due to sometimes losing track of requested object
     size for sampled objects.
 
-* 1.0.3
+* 1.0.3 (August 12, 2010)
 
   Bug fixes:
   - Fix the libunwind-based implementation of stack backtracing (used for heap
@@ -101,7 +107,7 @@
     threads raced to initialize malloc, some of them could end up permanently
     blocked.
 
-* 1.0.2
+* 1.0.2 (May 11, 2010)
 
   Bug fixes:
   - Fix junk filling of large objects, which could cause memory corruption.
@@ -109,7 +115,7 @@
     memory limits could cause swap file configuration to fail.  Contributed by
     Jordan DeLong.
 
-* 1.0.1
+* 1.0.1 (April 14, 2010)
 
   Bug fixes:
   - Fix compilation when --enable-fill is specified.
@@ -118,7 +124,7 @@
   - Fix dirty page purging race conditions that could cause crashes.
   - Fix crash in tcache flushing code during thread destruction.
 
-* 1.0.0
+* 1.0.0 (April 11, 2010)
 
   This release focuses on speed and run-time introspection.  Numerous
   algorithmic improvements make this release substantially faster than its
@@ -154,7 +160,7 @@
   - Fix a chunk leak.  The leaked chunks were never touched, so this impacted
     virtual memory usage, but not physical memory usage.
 
-* linux_20080828a, linux_20080827a
+* linux_2008082[78]a (August 27/28, 2008)
 
   These snapshot releases are the simple result of incorporating Linux-specific
   support into the FreeBSD malloc sources.
diff --git a/jemalloc/Makefile.in b/jemalloc/Makefile.in
index ee674b3..6dfaf5b 100644
--- a/jemalloc/Makefile.in
+++ b/jemalloc/Makefile.in
@@ -20,7 +20,7 @@
 
 # Build parameters.
 CPPFLAGS := @CPPFLAGS@ -I@srcroot@include -I@objroot@include
-CFLAGS := @CFLAGS@ -fPIC -DPIC
+CFLAGS := @CFLAGS@
 ifeq (macho, @abi@)
 CFLAGS += -dynamic
 endif
@@ -55,6 +55,7 @@
 ifeq (macho, @abi@)
 CSRCS += @srcroot@src/zone.c
 endif
+STATIC_LIBS :=	@objroot@lib/libjemalloc@install_suffix@.a
 DSOS := @objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) \
 	@objroot@lib/libjemalloc@install_suffix@.$(SO) \
 	@objroot@lib/libjemalloc@install_suffix@_pic.a
@@ -72,8 +73,10 @@
 .PHONY: install_html install_man install_doc install
 .PHONY: tests check clean distclean relclean
 
+.SECONDARY : $(CTESTS:@srcroot@%.c=@objroot@%.o)
+
 # Default target.
-all: $(DSOS)
+all: $(DSOS) $(STATIC_LIBS)
 
 dist: doc
 
@@ -97,15 +100,24 @@
 	$(CC) $(CFLAGS) -c $(CPPFLAGS) -o $@ $<
 	@$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) $< | sed \"s/\($(subst /,\/,$(notdir $(basename $@)))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.o \2/g\" > $(@:%.o=%.d)"
 
+@objroot@src/%.pic.o: @srcroot@src/%.c
+	@mkdir -p $(@D)
+	$(CC) $(CFLAGS) -fPIC -DPIC -c $(CPPFLAGS) -o $@ $<
+	@$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) $< | sed \"s/\($(subst /,\/,$(notdir $(basename $@)))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.o \2/g\" > $(@:%.o=%.d)"
+
 %.$(SO) : %.$(SO).$(REV)
 	@mkdir -p $(@D)
 	ln -sf $(<F) $@
 
-@objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) : $(CSRCS:@srcroot@%.c=@objroot@%.o)
+@objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) : $(CSRCS:@srcroot@%.c=@objroot@%.pic.o)
 	@mkdir -p $(@D)
 	$(CC) -shared -Wl,-$(WL_SONAME),$(@F) $(RPATH_EXTRA:%=@RPATH@%) -o $@ $+ $(LDFLAGS) $(LIBS)
 
-@objroot@lib/libjemalloc@install_suffix@_pic.a : $(CSRCS:@srcroot@%.c=@objroot@%.o)
+@objroot@lib/libjemalloc@install_suffix@_pic.a : $(CSRCS:@srcroot@%.c=@objroot@%.pic.o)
+	@mkdir -p $(@D)
+	ar crus $@ $+
+
+@objroot@lib/libjemalloc@install_suffix@.a : $(CSRCS:@srcroot@%.c=@objroot@%.o)
 	@mkdir -p $(@D)
 	ar crus $@ $+
 
@@ -137,11 +149,12 @@
 	install -m 644 $$h $(INCLUDEDIR)/jemalloc; \
 done
 
-install_lib: $(DSOS)
+install_lib: $(DSOS) $(STATIC_LIBS)
 	install -d $(LIBDIR)
 	install -m 755 @objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) $(LIBDIR)
 	ln -sf libjemalloc@install_suffix@.$(SO).$(REV) $(LIBDIR)/libjemalloc@install_suffix@.$(SO)
 	install -m 755 @objroot@lib/libjemalloc@install_suffix@_pic.a $(LIBDIR)
+	install -m 755 @objroot@lib/libjemalloc@install_suffix@.a $(LIBDIR)
 
 install_html:
 	install -d $(DATADIR)/doc/jemalloc@install_suffix@
@@ -193,12 +206,14 @@
 
 clean:
 	rm -f $(CSRCS:@srcroot@%.c=@objroot@%.o)
+	rm -f $(CSRCS:@srcroot@%.c=@objroot@%.pic.o)
 	rm -f $(CSRCS:@srcroot@%.c=@objroot@%.d)
+	rm -f $(CSRCS:@srcroot@%.c=@objroot@%.pic.d)
 	rm -f $(CTESTS:@srcroot@%.c=@objroot@%)
 	rm -f $(CTESTS:@srcroot@%.c=@objroot@%.o)
 	rm -f $(CTESTS:@srcroot@%.c=@objroot@%.d)
 	rm -f $(CTESTS:@srcroot@%.c=@objroot@%.out)
-	rm -f $(DSOS)
+	rm -f $(DSOS) $(STATIC_LIBS)
 
 distclean: clean
 	rm -rf @objroot@autom4te.cache
diff --git a/jemalloc/include/jemalloc/internal/hash.h b/jemalloc/include/jemalloc/internal/hash.h
index d12cdb8..9073d83 100644
--- a/jemalloc/include/jemalloc/internal/hash.h
+++ b/jemalloc/include/jemalloc/internal/hash.h
@@ -62,7 +62,7 @@
 	h *= m;
 	h ^= h >> r;
 
-	return h;
+	return (h);
 }
 #endif
 
diff --git a/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in b/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
index 0680b43..a27416c 100644
--- a/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
@@ -148,11 +148,6 @@
 
 #define	SIZEOF_PTR		(1U << LG_SIZEOF_PTR)
 
-/* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
-#if (!defined(PIC) && !defined(NO_TLS))
-#  define NO_TLS
-#endif
-
 /*
  * Maximum size of L1 cache line.  This is used to avoid cache line aliasing.
  * In addition, this controls the spacing of cacheline-spaced size classes.
@@ -240,6 +235,13 @@
 #endif
 #include "jemalloc/internal/prof.h"
 
+#ifdef JEMALLOC_STATS
+typedef struct {
+	uint64_t	allocated;
+	uint64_t	deallocated;
+} thread_allocated_t;
+#endif
+
 #undef JEMALLOC_H_STRUCTS
 /******************************************************************************/
 #define JEMALLOC_H_EXTERNS
@@ -295,45 +297,28 @@
 extern unsigned		narenas;
 
 #ifdef JEMALLOC_STATS
-typedef struct {
-	uint64_t	allocated;
-	uint64_t	deallocated;
-} thread_allocated_t;
 #  ifndef NO_TLS
 extern __thread thread_allocated_t	thread_allocated_tls;
-#    define ALLOCATED_GET() thread_allocated_tls.allocated
-#    define DEALLOCATED_GET() thread_allocated_tls.deallocated
+#    define ALLOCATED_GET() (thread_allocated_tls.allocated)
+#    define ALLOCATEDP_GET() (&thread_allocated_tls.allocated)
+#    define DEALLOCATED_GET() (thread_allocated_tls.deallocated)
+#    define DEALLOCATEDP_GET() (&thread_allocated_tls.deallocated)
 #    define ALLOCATED_ADD(a, d) do {					\
 	thread_allocated_tls.allocated += a;				\
 	thread_allocated_tls.deallocated += d;				\
 } while (0)
 #  else
 extern pthread_key_t	thread_allocated_tsd;
-#    define ALLOCATED_GET()						\
-	(uint64_t)((pthread_getspecific(thread_allocated_tsd) != NULL)	\
-	    ? ((thread_allocated_t *)					\
-	    pthread_getspecific(thread_allocated_tsd))->allocated : 0)
-#    define DEALLOCATED_GET()						\
-	(uint64_t)((pthread_getspecific(thread_allocated_tsd) != NULL)	\
-	    ? ((thread_allocated_t					\
-	    *)pthread_getspecific(thread_allocated_tsd))->deallocated :	\
-	    0)
+thread_allocated_t	*thread_allocated_get_hard(void);
+
+#    define ALLOCATED_GET() (thread_allocated_get()->allocated)
+#    define ALLOCATEDP_GET() (&thread_allocated_get()->allocated)
+#    define DEALLOCATED_GET() (thread_allocated_get()->deallocated)
+#    define DEALLOCATEDP_GET() (&thread_allocated_get()->deallocated)
 #    define ALLOCATED_ADD(a, d) do {					\
-	thread_allocated_t *thread_allocated = (thread_allocated_t *)	\
-	    pthread_getspecific(thread_allocated_tsd);			\
-	if (thread_allocated != NULL) {					\
-		thread_allocated->allocated += (a);			\
-		thread_allocated->deallocated += (d);			\
-	} else {							\
-		thread_allocated = (thread_allocated_t *)		\
-		    imalloc(sizeof(thread_allocated_t));		\
-		if (thread_allocated != NULL) {				\
-			pthread_setspecific(thread_allocated_tsd,	\
-			    thread_allocated);				\
-			thread_allocated->allocated = (a);		\
-			thread_allocated->deallocated = (d);		\
-		}							\
-	}								\
+	thread_allocated_t *thread_allocated = thread_allocated_get();	\
+	thread_allocated->allocated += (a);				\
+	thread_allocated->deallocated += (d);				\
 } while (0)
 #  endif
 #endif
@@ -384,6 +369,9 @@
 size_t	sa2u(size_t size, size_t alignment, size_t *run_size_p);
 void	malloc_write(const char *s);
 arena_t	*choose_arena(void);
+#  ifdef NO_TLS
+thread_allocated_t	*thread_allocated_get(void);
+#  endif
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
@@ -414,10 +402,10 @@
 {
 
 	if (size <= small_maxclass)
-		return arenas[0]->bins[small_size2bin[size]].reg_size;
+		return (arenas[0]->bins[small_size2bin[size]].reg_size);
 	if (size <= arena_maxclass)
-		return PAGE_CEILING(size);
-	return CHUNK_CEILING(size);
+		return (PAGE_CEILING(size));
+	return (CHUNK_CEILING(size));
 }
 
 /*
@@ -544,6 +532,19 @@
 
 	return (ret);
 }
+
+#ifdef NO_TLS
+JEMALLOC_INLINE thread_allocated_t *
+thread_allocated_get(void)
+{
+	thread_allocated_t *thread_allocated = (thread_allocated_t *)
+	    pthread_getspecific(thread_allocated_tsd);
+
+	if (thread_allocated == NULL)
+		return (thread_allocated_get_hard());
+	return (thread_allocated);
+}
+#endif
 #endif
 
 #include "jemalloc/internal/rtree.h"
diff --git a/jemalloc/src/chunk_mmap.c b/jemalloc/src/chunk_mmap.c
index bc36755..164e86e 100644
--- a/jemalloc/src/chunk_mmap.c
+++ b/jemalloc/src/chunk_mmap.c
@@ -206,13 +206,15 @@
 void *
 chunk_alloc_mmap(size_t size)
 {
-	return chunk_alloc_mmap_internal(size, false);
+
+	return (chunk_alloc_mmap_internal(size, false));
 }
 
 void *
 chunk_alloc_mmap_noreserve(size_t size)
 {
-	return chunk_alloc_mmap_internal(size, true);
+
+	return (chunk_alloc_mmap_internal(size, true));
 }
 
 void
diff --git a/jemalloc/src/ctl.c b/jemalloc/src/ctl.c
index 0b8b06f..c37b4e7 100644
--- a/jemalloc/src/ctl.c
+++ b/jemalloc/src/ctl.c
@@ -1151,9 +1151,9 @@
 
 #ifdef JEMALLOC_STATS
 CTL_RO_NL_GEN(thread_allocated, ALLOCATED_GET(), uint64_t);
-CTL_RO_NL_GEN(thread_allocatedp, &ALLOCATED_GET(), uint64_t *);
+CTL_RO_NL_GEN(thread_allocatedp, ALLOCATEDP_GET(), uint64_t *);
 CTL_RO_NL_GEN(thread_deallocated, DEALLOCATED_GET(), uint64_t);
-CTL_RO_NL_GEN(thread_deallocatedp, &DEALLOCATED_GET(), uint64_t *);
+CTL_RO_NL_GEN(thread_deallocatedp, DEALLOCATEDP_GET(), uint64_t *);
 #endif
 
 /******************************************************************************/
diff --git a/jemalloc/src/jemalloc.c b/jemalloc/src/jemalloc.c
index f5434c7..61a36c7 100644
--- a/jemalloc/src/jemalloc.c
+++ b/jemalloc/src/jemalloc.c
@@ -213,6 +213,28 @@
 	JEMALLOC_P(malloc_stats_print)(NULL, NULL, NULL);
 }
 
+#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
+thread_allocated_t *
+thread_allocated_get_hard(void)
+{
+	thread_allocated_t *thread_allocated = (thread_allocated_t *)
+	    imalloc(sizeof(thread_allocated_t));
+	if (thread_allocated == NULL) {
+		static thread_allocated_t static_thread_allocated = {0, 0};
+		malloc_write("<jemalloc>: Error allocating TSD;"
+		    " mallctl(\"thread.{de,}allocated[p]\", ...)"
+		    " will be inaccurate\n");
+		if (opt_abort)
+			abort();
+		return (&static_thread_allocated);
+	}
+	pthread_setspecific(thread_allocated_tsd, thread_allocated);
+	thread_allocated->allocated = 0;
+	thread_allocated->deallocated = 0;
+	return (thread_allocated);
+}
+#endif
+
 /*
  * End miscellaneous support functions.
  */
diff --git a/jemalloc/test/allocated.c b/jemalloc/test/allocated.c
index 64a1735..b1e40e4 100644
--- a/jemalloc/test/allocated.c
+++ b/jemalloc/test/allocated.c
@@ -16,6 +16,7 @@
 	int err;
 	void *p;
 	uint64_t a0, a1, d0, d1;
+	uint64_t *ap0, *ap1, *dp0, *dp1;
 	size_t sz, usize;
 
 	sz = sizeof(a0);
@@ -31,6 +32,20 @@
 		    strerror(err));
 		exit(1);
 	}
+	sz = sizeof(ap0);
+	if ((err = JEMALLOC_P(mallctl)("thread.allocatedp", &ap0, &sz, NULL,
+	    0))) {
+		if (err == ENOENT) {
+#ifdef JEMALLOC_STATS
+			assert(false);
+#endif
+			goto RETURN;
+		}
+		fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
+		    strerror(err));
+		exit(1);
+	}
+	assert(*ap0 == a0);
 
 	sz = sizeof(d0);
 	if ((err = JEMALLOC_P(mallctl)("thread.deallocated", &d0, &sz, NULL,
@@ -45,6 +60,20 @@
 		    strerror(err));
 		exit(1);
 	}
+	sz = sizeof(dp0);
+	if ((err = JEMALLOC_P(mallctl)("thread.deallocatedp", &dp0, &sz, NULL,
+	    0))) {
+		if (err == ENOENT) {
+#ifdef JEMALLOC_STATS
+			assert(false);
+#endif
+			goto RETURN;
+		}
+		fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
+		    strerror(err));
+		exit(1);
+	}
+	assert(*dp0 == d0);
 
 	p = JEMALLOC_P(malloc)(1);
 	if (p == NULL) {
@@ -54,6 +83,10 @@
 
 	sz = sizeof(a1);
 	JEMALLOC_P(mallctl)("thread.allocated", &a1, &sz, NULL, 0);
+	sz = sizeof(ap1);
+	JEMALLOC_P(mallctl)("thread.allocatedp", &ap1, &sz, NULL, 0);
+	assert(*ap1 == a1);
+	assert(ap0 == ap1);
 
 	usize = JEMALLOC_P(malloc_usable_size)(p);
 	assert(a0 + usize <= a1);
@@ -62,6 +95,10 @@
 
 	sz = sizeof(d1);
 	JEMALLOC_P(mallctl)("thread.deallocated", &d1, &sz, NULL, 0);
+	sz = sizeof(dp1);
+	JEMALLOC_P(mallctl)("thread.deallocatedp", &dp1, &sz, NULL, 0);
+	assert(*dp1 == d1);
+	assert(dp0 == dp1);
 
 	assert(d0 + usize <= d1);