Merge branch 'dev'
diff --git a/jemalloc/ChangeLog b/jemalloc/ChangeLog
index 6db63db..7b262c9 100644
--- a/jemalloc/ChangeLog
+++ b/jemalloc/ChangeLog
@@ -6,6 +6,13 @@
     http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git
     git://canonware.com/jemalloc.git
 
+* 2.2.1 (March 30, 2011)
+
+  Bug fixes:
+  - Implement atomic operations for x86/x64.  This fixes compilation failures
+    for versions of gcc that are still in wide use.
+  - Fix an assertion in arena_purge().
+
 * 2.2.0 (March 22, 2011)
 
   This version incorporates several improvements to algorithms and data
diff --git a/jemalloc/include/jemalloc/internal/atomic.h b/jemalloc/include/jemalloc/internal/atomic.h
index 821c2ef..9a29862 100644
--- a/jemalloc/include/jemalloc/internal/atomic.h
+++ b/jemalloc/include/jemalloc/internal/atomic.h
@@ -40,6 +40,7 @@
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_))
+/******************************************************************************/
 /* 64-bit operations. */
 #ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
 JEMALLOC_INLINE uint64_t
@@ -69,12 +70,40 @@
 
 	return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p));
 }
+#elif (defined(__amd64_) || defined(__x86_64__))
+JEMALLOC_INLINE uint64_t
+atomic_add_uint64(uint64_t *p, uint64_t x)
+{
+
+	asm volatile (
+	    "lock; xaddq %0, %1;"
+	    : "+r" (x), "=m" (*p) /* Outputs. */
+	    : "m" (*p) /* Inputs. */
+	    );
+
+	return (x);
+}
+
+JEMALLOC_INLINE uint64_t
+atomic_sub_uint64(uint64_t *p, uint64_t x)
+{
+
+	x = (uint64_t)(-(int64_t)x);
+	asm volatile (
+	    "lock; xaddq %0, %1;"
+	    : "+r" (x), "=m" (*p) /* Outputs. */
+	    : "m" (*p) /* Inputs. */
+	    );
+
+	return (x);
+}
 #else
 #  if (LG_SIZEOF_PTR == 3)
 #    error "Missing implementation for 64-bit atomic operations"
 #  endif
 #endif
 
+/******************************************************************************/
 /* 32-bit operations. */
 #ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4
 JEMALLOC_INLINE uint32_t
@@ -104,6 +133,33 @@
 
 	return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p));
 }
+#elif (defined(__i386__) || defined(__amd64_) || defined(__x86_64__))
+JEMALLOC_INLINE uint32_t
+atomic_add_uint32(uint32_t *p, uint32_t x)
+{
+
+	asm volatile (
+	    "lock; xaddl %0, %1;"
+	    : "+r" (x), "=m" (*p) /* Outputs. */
+	    : "m" (*p) /* Inputs. */
+	    );
+
+	return (x);
+}
+
+JEMALLOC_INLINE uint32_t
+atomic_sub_uint32(uint32_t *p, uint32_t x)
+{
+
+	x = (uint32_t)(-(int32_t)x);
+	asm volatile (
+	    "lock; xaddl %0, %1;"
+	    : "+r" (x), "=m" (*p) /* Outputs. */
+	    : "m" (*p) /* Inputs. */
+	    );
+
+	return (x);
+}
 #else
 #  error "Missing implementation for 32-bit atomic operations"
 #endif
diff --git a/jemalloc/src/arena.c b/jemalloc/src/arena.c
index 1954da9..9aaf47f 100644
--- a/jemalloc/src/arena.c
+++ b/jemalloc/src/arena.c
@@ -868,9 +868,10 @@
 	}
 	assert(ndirty == arena->ndirty);
 #endif
-	assert(arena->ndirty > arena->npurgatory);
+	assert(arena->ndirty > arena->npurgatory || all);
 	assert(arena->ndirty > chunk_npages || all);
-	assert((arena->nactive >> opt_lg_dirty_mult) < arena->ndirty || all);
+	assert((arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty -
+	    npurgatory) || all);
 
 #ifdef JEMALLOC_STATS
 	arena->stats.npurge++;
@@ -882,8 +883,10 @@
 	 * multiple threads from racing to reduce ndirty below the threshold.
 	 */
 	npurgatory = arena->ndirty - arena->npurgatory;
-	if (all == false)
+	if (all == false) {
+		assert(npurgatory >= arena->nactive >> opt_lg_dirty_mult);
 		npurgatory -= arena->nactive >> opt_lg_dirty_mult;
+	}
 	arena->npurgatory += npurgatory;
 
 	while (npurgatory > 0) {