Merge branch 'dev'
diff --git a/ChangeLog b/ChangeLog
index e3b0a51..8ed42cb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,18 @@
 
     https://github.com/jemalloc/jemalloc
 
+* 4.0.4 (October 24, 2015)
+
+  This bugfix release fixes another xallocx() regression.  No other regressions
+  have come to light in over a month, so this is likely a good starting point
+  for people who prefer to wait for "dot one" releases with all the major issues
+  shaken out.
+
+  Bug fixes:
+  - Fix xallocx(..., MALLOCX_ZERO to zero the last full trailing page of large
+    allocations that have been randomly assigned an offset of 0 when
+    --enable-cache-oblivious configure option is enabled.
+
 * 4.0.3 (September 24, 2015)
 
   This bugfix release continues the trend of xallocx() and heap profiling fixes.
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 8fc774b..26a5e14 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1418,8 +1418,8 @@
         can cause asynchronous string deallocation.  Furthermore, each
         invocation of this interface can only read or write; simultaneous
         read/write is not supported due to string lifetime limitations.  The
-        name string must nil-terminated and comprised only of characters in the
-        sets recognized
+        name string must be nil-terminated and comprised only of characters in
+        the sets recognized
         by <citerefentry><refentrytitle>isgraph</refentrytitle>
         <manvolnum>3</manvolnum></citerefentry> and
         <citerefentry><refentrytitle>isblank</refentrytitle>
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 8536a3e..654cd08 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -317,6 +317,10 @@
 #define	PAGE		((size_t)(1U << LG_PAGE))
 #define	PAGE_MASK	((size_t)(PAGE - 1))
 
+/* Return the page base address for the page containing address a. */
+#define	PAGE_ADDR2BASE(a)						\
+	((void *)((uintptr_t)(a) & ~PAGE_MASK))
+
 /* Return the smallest pagesize multiple that is >= s. */
 #define	PAGE_CEILING(s)							\
 	(((s) + PAGE_MASK) & ~PAGE_MASK)
diff --git a/src/arena.c b/src/arena.c
index 3081519..43733cc 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2683,10 +2683,16 @@
 			/*
 			 * Zero the trailing bytes of the original allocation's
 			 * last page, since they are in an indeterminate state.
+			 * There will always be trailing bytes, because ptr's
+			 * offset from the beginning of the run is a multiple of
+			 * CACHELINE in [0 .. PAGE).
 			 */
-			assert(PAGE_CEILING(oldsize) == oldsize);
-			memset((void *)((uintptr_t)ptr + oldsize), 0,
-			    PAGE_CEILING((uintptr_t)ptr) - (uintptr_t)ptr);
+			void *zbase = (void *)((uintptr_t)ptr + oldsize);
+			void *zpast = PAGE_ADDR2BASE((void *)((uintptr_t)zbase +
+			    PAGE));
+			size_t nzero = (uintptr_t)zpast - (uintptr_t)zbase;
+			assert(nzero > 0);
+			memset(zbase, 0, nzero);
 		}
 
 		size = oldsize + splitsize;
diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c
index 3736252..0045196 100644
--- a/test/integration/xallocx.c
+++ b/test/integration/xallocx.c
@@ -1,5 +1,24 @@
 #include "test/jemalloc_test.h"
 
+/*
+ * Use a separate arena for xallocx() extension/contraction tests so that
+ * internal allocation e.g. by heap profiling can't interpose allocations where
+ * xallocx() would ordinarily be able to extend.
+ */
+static unsigned
+arena_ind(void)
+{
+	static unsigned ind = 0;
+
+	if (ind == 0) {
+		size_t sz = sizeof(ind);
+		assert_d_eq(mallctl("arenas.extend", &ind, &sz, NULL, 0), 0,
+		    "Unexpected mallctl failure creating arena");
+	}
+
+	return (ind);
+}
+
 TEST_BEGIN(test_same_size)
 {
 	void *p;
@@ -218,6 +237,7 @@
 
 TEST_BEGIN(test_extra_large)
 {
+	int flags = MALLOCX_ARENA(arena_ind());
 	size_t smallmax, large0, large1, large2, huge0, hugemax;
 	void *p;
 
@@ -229,61 +249,62 @@
 	huge0 = get_huge_size(0);
 	hugemax = get_huge_size(get_nhuge()-1);
 
-	p = mallocx(large2, 0);
+	p = mallocx(large2, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
 
-	assert_zu_eq(xallocx(p, large2, 0, 0), large2,
+	assert_zu_eq(xallocx(p, large2, 0, flags), large2,
 	    "Unexpected xallocx() behavior");
 	/* Test size decrease with zero extra. */
-	assert_zu_eq(xallocx(p, large0, 0, 0), large0,
+	assert_zu_eq(xallocx(p, large0, 0, flags), large0,
 	    "Unexpected xallocx() behavior");
-	assert_zu_eq(xallocx(p, smallmax, 0, 0), large0,
+	assert_zu_eq(xallocx(p, smallmax, 0, flags), large0,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_eq(xallocx(p, large2, 0, 0), large2,
+	assert_zu_eq(xallocx(p, large2, 0, flags), large2,
 	    "Unexpected xallocx() behavior");
 	/* Test size decrease with non-zero extra. */
-	assert_zu_eq(xallocx(p, large0, large2 - large0, 0), large2,
+	assert_zu_eq(xallocx(p, large0, large2 - large0, flags), large2,
 	    "Unexpected xallocx() behavior");
-	assert_zu_eq(xallocx(p, large1, large2 - large1, 0), large2,
+	assert_zu_eq(xallocx(p, large1, large2 - large1, flags), large2,
 	    "Unexpected xallocx() behavior");
-	assert_zu_eq(xallocx(p, large0, large1 - large0, 0), large1,
+	assert_zu_eq(xallocx(p, large0, large1 - large0, flags), large1,
 	    "Unexpected xallocx() behavior");
-	assert_zu_eq(xallocx(p, smallmax, large0 - smallmax, 0), large0,
+	assert_zu_eq(xallocx(p, smallmax, large0 - smallmax, flags), large0,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_eq(xallocx(p, large0, 0, 0), large0,
+	assert_zu_eq(xallocx(p, large0, 0, flags), large0,
 	    "Unexpected xallocx() behavior");
 	/* Test size increase with zero extra. */
-	assert_zu_eq(xallocx(p, large2, 0, 0), large2,
+	assert_zu_eq(xallocx(p, large2, 0, flags), large2,
 	    "Unexpected xallocx() behavior");
-	assert_zu_eq(xallocx(p, huge0, 0, 0), large2,
+	assert_zu_eq(xallocx(p, huge0, 0, flags), large2,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_eq(xallocx(p, large0, 0, 0), large0,
+	assert_zu_eq(xallocx(p, large0, 0, flags), large0,
 	    "Unexpected xallocx() behavior");
 	/* Test size increase with non-zero extra. */
-	assert_zu_lt(xallocx(p, large0, huge0 - large0, 0), huge0,
+	assert_zu_lt(xallocx(p, large0, huge0 - large0, flags), huge0,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_eq(xallocx(p, large0, 0, 0), large0,
+	assert_zu_eq(xallocx(p, large0, 0, flags), large0,
 	    "Unexpected xallocx() behavior");
 	/* Test size increase with non-zero extra. */
-	assert_zu_eq(xallocx(p, large0, large2 - large0, 0), large2,
+	assert_zu_eq(xallocx(p, large0, large2 - large0, flags), large2,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_eq(xallocx(p, large2, 0, 0), large2,
+	assert_zu_eq(xallocx(p, large2, 0, flags), large2,
 	    "Unexpected xallocx() behavior");
 	/* Test size+extra overflow. */
-	assert_zu_lt(xallocx(p, large2, hugemax - large2 + 1, 0), huge0,
+	assert_zu_lt(xallocx(p, large2, hugemax - large2 + 1, flags), huge0,
 	    "Unexpected xallocx() behavior");
 
-	dallocx(p, 0);
+	dallocx(p, flags);
 }
 TEST_END
 
 TEST_BEGIN(test_extra_huge)
 {
+	int flags = MALLOCX_ARENA(arena_ind());
 	size_t largemax, huge0, huge1, huge2, hugemax;
 	void *p;
 
@@ -294,56 +315,56 @@
 	huge2 = get_huge_size(2);
 	hugemax = get_huge_size(get_nhuge()-1);
 
-	p = mallocx(huge2, 0);
+	p = mallocx(huge2, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
 
-	assert_zu_eq(xallocx(p, huge2, 0, 0), huge2,
+	assert_zu_eq(xallocx(p, huge2, 0, flags), huge2,
 	    "Unexpected xallocx() behavior");
 	/* Test size decrease with zero extra. */
-	assert_zu_ge(xallocx(p, huge0, 0, 0), huge0,
+	assert_zu_ge(xallocx(p, huge0, 0, flags), huge0,
 	    "Unexpected xallocx() behavior");
-	assert_zu_ge(xallocx(p, largemax, 0, 0), huge0,
+	assert_zu_ge(xallocx(p, largemax, 0, flags), huge0,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_eq(xallocx(p, huge2, 0, 0), huge2,
+	assert_zu_eq(xallocx(p, huge2, 0, flags), huge2,
 	    "Unexpected xallocx() behavior");
 	/* Test size decrease with non-zero extra. */
-	assert_zu_eq(xallocx(p, huge0, huge2 - huge0, 0), huge2,
+	assert_zu_eq(xallocx(p, huge0, huge2 - huge0, flags), huge2,
 	    "Unexpected xallocx() behavior");
-	assert_zu_eq(xallocx(p, huge1, huge2 - huge1, 0), huge2,
+	assert_zu_eq(xallocx(p, huge1, huge2 - huge1, flags), huge2,
 	    "Unexpected xallocx() behavior");
-	assert_zu_eq(xallocx(p, huge0, huge1 - huge0, 0), huge1,
+	assert_zu_eq(xallocx(p, huge0, huge1 - huge0, flags), huge1,
 	    "Unexpected xallocx() behavior");
-	assert_zu_ge(xallocx(p, largemax, huge0 - largemax, 0), huge0,
+	assert_zu_ge(xallocx(p, largemax, huge0 - largemax, flags), huge0,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_ge(xallocx(p, huge0, 0, 0), huge0,
+	assert_zu_ge(xallocx(p, huge0, 0, flags), huge0,
 	    "Unexpected xallocx() behavior");
 	/* Test size increase with zero extra. */
-	assert_zu_le(xallocx(p, huge2, 0, 0), huge2,
+	assert_zu_le(xallocx(p, huge2, 0, flags), huge2,
 	    "Unexpected xallocx() behavior");
-	assert_zu_le(xallocx(p, hugemax+1, 0, 0), huge2,
+	assert_zu_le(xallocx(p, hugemax+1, 0, flags), huge2,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_ge(xallocx(p, huge0, 0, 0), huge0,
+	assert_zu_ge(xallocx(p, huge0, 0, flags), huge0,
 	    "Unexpected xallocx() behavior");
 	/* Test size increase with non-zero extra. */
-	assert_zu_le(xallocx(p, huge0, SIZE_T_MAX - huge0, 0), hugemax,
+	assert_zu_le(xallocx(p, huge0, SIZE_T_MAX - huge0, flags), hugemax,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_ge(xallocx(p, huge0, 0, 0), huge0,
+	assert_zu_ge(xallocx(p, huge0, 0, flags), huge0,
 	    "Unexpected xallocx() behavior");
 	/* Test size increase with non-zero extra. */
-	assert_zu_le(xallocx(p, huge0, huge2 - huge0, 0), huge2,
+	assert_zu_le(xallocx(p, huge0, huge2 - huge0, flags), huge2,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_eq(xallocx(p, huge2, 0, 0), huge2,
+	assert_zu_eq(xallocx(p, huge2, 0, flags), huge2,
 	    "Unexpected xallocx() behavior");
 	/* Test size+extra overflow. */
-	assert_zu_le(xallocx(p, huge2, hugemax - huge2 + 1, 0), hugemax,
+	assert_zu_le(xallocx(p, huge2, hugemax - huge2 + 1, flags), hugemax,
 	    "Unexpected xallocx() behavior");
 
-	dallocx(p, 0);
+	dallocx(p, flags);
 }
 TEST_END
 
@@ -388,12 +409,13 @@
 static void
 test_zero(size_t szmin, size_t szmax)
 {
+	int flags = MALLOCX_ARENA(arena_ind()) | MALLOCX_ZERO;
 	size_t sz, nsz;
 	void *p;
 #define	FILL_BYTE 0x7aU
 
 	sz = szmax;
-	p = mallocx(sz, MALLOCX_ZERO);
+	p = mallocx(sz, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
 	assert_false(validate_fill(p, 0x00, 0, sz), "Memory not filled: sz=%zu",
 	    sz);
@@ -408,14 +430,14 @@
 
 	/* Shrink in place so that we can expect growing in place to succeed. */
 	sz = szmin;
-	assert_zu_eq(xallocx(p, sz, 0, MALLOCX_ZERO), sz,
+	assert_zu_eq(xallocx(p, sz, 0, flags), sz,
 	    "Unexpected xallocx() error");
 	assert_false(validate_fill(p, FILL_BYTE, 0, sz),
 	    "Memory not filled: sz=%zu", sz);
 
 	for (sz = szmin; sz < szmax; sz = nsz) {
-		nsz = nallocx(sz+1, MALLOCX_ZERO);
-		assert_zu_eq(xallocx(p, sz+1, 0, MALLOCX_ZERO), nsz,
+		nsz = nallocx(sz+1, flags);
+		assert_zu_eq(xallocx(p, sz+1, 0, flags), nsz,
 		    "Unexpected xallocx() failure");
 		assert_false(validate_fill(p, FILL_BYTE, 0, sz),
 		    "Memory not filled: sz=%zu", sz);
@@ -426,7 +448,7 @@
 		    "Memory not filled: nsz=%zu", nsz);
 	}
 
-	dallocx(p, 0);
+	dallocx(p, flags);
 }
 
 TEST_BEGIN(test_zero_large)