Improve codegen (#237)

diff --git a/rpmalloc/rpmalloc.c b/rpmalloc/rpmalloc.c
index c6ff02b..5186f61 100644
--- a/rpmalloc/rpmalloc.c
+++ b/rpmalloc/rpmalloc.c
@@ -925,8 +925,9 @@
 	} else {
 #if defined(MADV_FREE_REUSABLE)
 		int ret;
-		while ((ret = madvise(address, size, MADV_FREE_REUSABLE)) == -1 && errno == EAGAIN);
-		if (ret == -1 && errno != 0)
+		while ((ret = madvise(address, size, MADV_FREE_REUSABLE)) == -1 && (errno == EAGAIN))
+			errno = 0;
+		if ((ret == -1) && (errno != 0))
 #elif defined(MADV_FREE)
 		if (madvise(address, size, MADV_FREE))
 #endif
@@ -978,12 +979,9 @@
 //! Add a span to double linked list at the head
 static void
 _rpmalloc_span_double_link_list_add(span_t** head, span_t* span) {
-	if (*head) {
-		span->next = *head;
+	if (*head)
 		(*head)->prev = span;
-	} else {
-		span->next = 0;
-	}
+	span->next = *head;
 	*head = span;
 }
 
@@ -1005,9 +1003,8 @@
 		span_t* next_span = span->next;
 		span_t* prev_span = span->prev;
 		prev_span->next = next_span;
-		if (EXPECTED(next_span != 0)) {
+		if (EXPECTED(next_span != 0))
 			next_span->prev = prev_span;
-		}
 	}
 }
 
@@ -1241,7 +1238,7 @@
 
 //! Initialize an unused span (from cache or mapped) to be new active span, putting the initial free list in heap class free list
 static void*
-_rpmalloc_span_initialize_new(heap_t* heap, span_t* span, uint32_t class_idx) {
+_rpmalloc_span_initialize_new(heap_t* heap, heap_size_class_t* heap_size_class, span_t* span, uint32_t class_idx) {
 	rpmalloc_assert(span->span_count == 1, "Internal failure");
 	size_class_t* size_class = _memory_size_class + class_idx;
 	span->size_class = class_idx;
@@ -1255,11 +1252,11 @@
 
 	//Setup free list. Only initialize one system page worth of free blocks in list
 	void* block;
-	span->free_list_limit = free_list_partial_init(&heap->size_class[class_idx].free_list, &block, 
+	span->free_list_limit = free_list_partial_init(&heap_size_class->free_list, &block, 
 		span, pointer_offset(span, SPAN_HEADER_SIZE), size_class->block_count, size_class->block_size);
 	//Link span as partial if there remains blocks to be initialized as free list, or full if fully initialized
 	if (span->free_list_limit < span->block_count) {
-		_rpmalloc_span_double_link_list_add(&heap->size_class[class_idx].partial_span, span);
+		_rpmalloc_span_double_link_list_add(&heap_size_class->partial_span, span);
 		span->used_count = span->free_list_limit;
 	} else {
 #if RPMALLOC_FIRST_CLASS_HEAPS
@@ -1722,23 +1719,17 @@
 
 //! Get a span from one of the cache levels (thread cache, reserved, global cache) or fallback to mapping more memory
 static span_t*
-_rpmalloc_heap_extract_new_span(heap_t* heap, size_t span_count, uint32_t class_idx) {
+_rpmalloc_heap_extract_new_span(heap_t* heap, heap_size_class_t* heap_size_class, size_t span_count, uint32_t class_idx) {
 	span_t* span;
 #if ENABLE_THREAD_CACHE
-	if (class_idx < SIZE_CLASS_COUNT) {
-		if (heap->size_class[class_idx].cache) {
-			span = heap->size_class[class_idx].cache;
-			span_t* new_cache = 0;
-			if (heap->span_cache.count)
-				new_cache = heap->span_cache.span[--heap->span_cache.count];
-			heap->size_class[class_idx].cache = new_cache;
-			_rpmalloc_inc_span_statistics(heap, span_count, class_idx);
-			return span;
-		}
+	if (heap_size_class && heap_size_class->cache) {
+		span = heap_size_class->cache;
+		heap_size_class->cache = (heap->span_cache.count ? heap->span_cache.span[--heap->span_cache.count] : 0);
+		_rpmalloc_inc_span_statistics(heap, span_count, class_idx);
+		return span;
 	}
-#else
-	(void)sizeof(class_idx);
 #endif
+	(void)sizeof(class_idx);
 	// Allow 50% overhead to increase cache hits
 	size_t base_span_count = span_count;
 	size_t limit_span_count = (span_count > 2) ? (span_count + (span_count >> 1)) : span_count;
@@ -2037,21 +2028,21 @@
 
 //! Allocate a small/medium sized memory block from the given heap
 static void*
-_rpmalloc_allocate_from_heap_fallback(heap_t* heap, uint32_t class_idx) {
-	span_t* span = heap->size_class[class_idx].partial_span;
+_rpmalloc_allocate_from_heap_fallback(heap_t* heap, heap_size_class_t* heap_size_class, uint32_t class_idx) {
+	span_t* span = heap_size_class->partial_span;
 	if (EXPECTED(span != 0)) {
 		rpmalloc_assert(span->block_count == _memory_size_class[span->size_class].block_count, "Span block count corrupted");
 		rpmalloc_assert(!_rpmalloc_span_is_fully_utilized(span), "Internal failure");
 		void* block;
 		if (span->free_list) {
-			//Swap in free list if not empty
-			heap->size_class[class_idx].free_list = span->free_list;
+			//Span local free list is not empty, swap to size class free list
+			block = free_list_pop(&span->free_list);
+			heap_size_class->free_list = span->free_list;
 			span->free_list = 0;
-			block = free_list_pop(&heap->size_class[class_idx].free_list);
 		} else {
 			//If the span did not fully initialize free list, link up another page worth of blocks			
 			void* block_start = pointer_offset(span, SPAN_HEADER_SIZE + ((size_t)span->free_list_limit * span->block_size));
-			span->free_list_limit += free_list_partial_init(&heap->size_class[class_idx].free_list, &block,
+			span->free_list_limit += free_list_partial_init(&heap_size_class->free_list, &block,
 				(void*)((uintptr_t)block_start & ~(_memory_page_size - 1)), block_start,
 				span->block_count - span->free_list_limit, span->block_size);
 		}
@@ -2067,7 +2058,7 @@
 			return block;
 
 		//The span is fully utilized, unlink from partial list and add to fully utilized list
-		_rpmalloc_span_double_link_list_pop_head(&heap->size_class[class_idx].partial_span, span);
+		_rpmalloc_span_double_link_list_pop_head(&heap_size_class->partial_span, span);
 #if RPMALLOC_FIRST_CLASS_HEAPS
 		_rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span);
 #endif
@@ -2076,10 +2067,10 @@
 	}
 
 	//Find a span in one of the cache levels
-	span = _rpmalloc_heap_extract_new_span(heap, 1, class_idx);
+	span = _rpmalloc_heap_extract_new_span(heap, heap_size_class, 1, class_idx);
 	if (EXPECTED(span != 0)) {
 		//Mark span as owned by this heap and set base data, return first block
-		return _rpmalloc_span_initialize_new(heap, span, class_idx);
+		return _rpmalloc_span_initialize_new(heap, heap_size_class, span, class_idx);
 	}
 
 	return 0;
@@ -2091,10 +2082,11 @@
 	rpmalloc_assert(heap, "No thread heap");
 	//Small sizes have unique size classes
 	const uint32_t class_idx = (uint32_t)((size + (SMALL_GRANULARITY - 1)) >> SMALL_GRANULARITY_SHIFT);
+	heap_size_class_t* heap_size_class = heap->size_class + class_idx;
 	_rpmalloc_stat_inc_alloc(heap, class_idx);
-	if (EXPECTED(heap->size_class[class_idx].free_list != 0))
-		return free_list_pop(&heap->size_class[class_idx].free_list);
-	return _rpmalloc_allocate_from_heap_fallback(heap, class_idx);
+	if (EXPECTED(heap_size_class->free_list != 0))
+		return free_list_pop(&heap_size_class->free_list);
+	return _rpmalloc_allocate_from_heap_fallback(heap, heap_size_class, class_idx);
 }
 
 //! Allocate a medium sized memory block from the given heap
@@ -2104,10 +2096,11 @@
 	//Calculate the size class index and do a dependent lookup of the final class index (in case of merged classes)
 	const uint32_t base_idx = (uint32_t)(SMALL_CLASS_COUNT + ((size - (SMALL_SIZE_LIMIT + 1)) >> MEDIUM_GRANULARITY_SHIFT));
 	const uint32_t class_idx = _memory_size_class[base_idx].class_idx;
+	heap_size_class_t* heap_size_class = heap->size_class + class_idx;
 	_rpmalloc_stat_inc_alloc(heap, class_idx);
-	if (EXPECTED(heap->size_class[class_idx].free_list != 0))
-		return free_list_pop(&heap->size_class[class_idx].free_list);
-	return _rpmalloc_allocate_from_heap_fallback(heap, class_idx);
+	if (EXPECTED(heap_size_class->free_list != 0))
+		return free_list_pop(&heap_size_class->free_list);
+	return _rpmalloc_allocate_from_heap_fallback(heap, heap_size_class, class_idx);
 }
 
 //! Allocate a large sized memory block from the given heap
@@ -2123,7 +2116,7 @@
 		++span_count;
 
 	//Find a span in one of the cache levels
-	span_t* span = _rpmalloc_heap_extract_new_span(heap, span_count, SIZE_CLASS_LARGE);
+	span_t* span = _rpmalloc_heap_extract_new_span(heap, 0, span_count, SIZE_CLASS_LARGE);
 	if (!span)
 		return span;
 
@@ -2321,8 +2314,8 @@
 		_rpmalloc_span_double_link_list_add(&heap->size_class[span->size_class].partial_span, span);
 		--heap->full_span_count;
 	}
-	--span->used_count;
 	*((void**)block) = span->free_list;
+	--span->used_count;
 	span->free_list = block;
 	if (UNEXPECTED(span->used_count == span->list_size)) {
 		_rpmalloc_span_double_link_list_remove(&heap->size_class[span->size_class].partial_span, span);