Array based caches (#177)
diff --git a/CHANGELOG b/CHANGELOG
index 2e1a037..c11f613 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,5 +1,7 @@
1.4.1
+Dual license as both released to public domain or under MIT license
+
Allow up to 4GiB page sizes
Fix an issue where large page sizes in conjunction with many threads waste a lot of memory (previously
@@ -21,6 +23,11 @@
Refactor finalization to be compatible with global scope data causing dynamic allocations and frees, like
C++ objects with custom ctors/dtors.
+Refactor thread and global cache to be array based instead of list based for improved performance
+and cache size control.
+
+Added missing C++ operator overloads with ENABLE_OVERRIDE when using Microsoft C++ runtimes
+
1.4.0
diff --git a/rpmalloc/malloc.c b/rpmalloc/malloc.c
index b281e1f..2e1f02a 100644
--- a/rpmalloc/malloc.c
+++ b/rpmalloc/malloc.c
@@ -222,6 +222,11 @@
#endif
+static inline size_t
+_rpmalloc_page_size(void) {
+ return _memory_page_size;
+}
+
extern inline void* RPMALLOC_CDECL
reallocarray(void* ptr, size_t count, size_t size) {
size_t total;
@@ -248,9 +253,10 @@
extern inline void* RPMALLOC_CDECL
valloc(size_t size) {
get_thread_heap();
+ const size_t page_size = _rpmalloc_page_size();
if (!size)
- size = _memory_page_size;
- size_t total_size = size + _memory_page_size;
+ size = page_size;
+ size_t total_size = size + page_size;
#if ENABLE_VALIDATE_ARGS
if (total_size < size) {
errno = EINVAL;
@@ -258,8 +264,8 @@
}
#endif
void* buffer = rpmalloc(total_size);
- if ((uintptr_t)buffer & (_memory_page_size - 1))
- return (void*)(((uintptr_t)buffer & ~(_memory_page_size - 1)) + _memory_page_size);
+ if ((uintptr_t)buffer & (page_size - 1))
+ return (void*)(((uintptr_t)buffer & ~(page_size - 1)) + page_size);
return buffer;
}
@@ -267,8 +273,9 @@
pvalloc(size_t size) {
get_thread_heap();
size_t aligned_size = size;
- if (aligned_size % _memory_page_size)
- aligned_size = (1 + (aligned_size / _memory_page_size)) * _memory_page_size;
+ const size_t page_size = _rpmalloc_page_size();
+ if (aligned_size % page_size)
+ aligned_size = (1 + (aligned_size / page_size)) * page_size;
#if ENABLE_VALIDATE_ARGS
if (aligned_size < size) {
errno = EINVAL;
diff --git a/rpmalloc/rpmalloc.c b/rpmalloc/rpmalloc.c
index d011bcc..4b83a0e 100644
--- a/rpmalloc/rpmalloc.c
+++ b/rpmalloc/rpmalloc.c
@@ -1,4 +1,4 @@
-/* rpmalloc.c - Memory allocator - Public Domain - 2016 Mattias Jansson
+/* rpmalloc.c - Memory allocator - Public Domain - 2016-2020 Mattias Jansson
*
* This library provides a cross-platform lock free thread caching malloc implementation in C11.
* The latest source code is always available at
@@ -50,60 +50,43 @@
#define ENABLE_PRELOAD 0
#endif
#ifndef DISABLE_UNMAP
-//! Disable unmapping memory pages
+//! Disable unmapping memory pages (also enables unlimited cache)
#define DISABLE_UNMAP 0
#endif
+#ifndef ENABLE_UNLIMITED_CACHE
+//! Enable unlimited global cache (no unmapping until finalization)
+#define ENABLE_UNLIMITED_CACHE 0
+#endif
+#ifndef ENABLE_ADAPTIVE_THREAD_CACHE
+//! Enable adaptive thread cache size based on use heuristics
+#define ENABLE_ADAPTIVE_THREAD_CACHE 0
+#endif
#ifndef DEFAULT_SPAN_MAP_COUNT
//! Default number of spans to map in call to map more virtual memory (default values yield 4MiB here)
#define DEFAULT_SPAN_MAP_COUNT 64
#endif
-
-#if ENABLE_THREAD_CACHE
-#ifndef ENABLE_UNLIMITED_CACHE
-//! Unlimited thread and global cache
-#define ENABLE_UNLIMITED_CACHE 0
-#endif
-#ifndef ENABLE_UNLIMITED_THREAD_CACHE
-//! Unlimited cache disables any thread cache limitations
-#define ENABLE_UNLIMITED_THREAD_CACHE ENABLE_UNLIMITED_CACHE
-#endif
-#if !ENABLE_UNLIMITED_THREAD_CACHE
-#ifndef THREAD_CACHE_MULTIPLIER
-//! Multiplier for thread cache (cache limit will be span release count multiplied by this value)
-#define THREAD_CACHE_MULTIPLIER 16
-#endif
-#ifndef ENABLE_ADAPTIVE_THREAD_CACHE
-//! Enable adaptive size of per-thread cache (still bounded by THREAD_CACHE_MULTIPLIER hard limit)
-#define ENABLE_ADAPTIVE_THREAD_CACHE 0
-#endif
-#endif
-#endif
-
-#if ENABLE_GLOBAL_CACHE && ENABLE_THREAD_CACHE
-#if DISABLE_UNMAP
-#undef ENABLE_UNLIMITED_GLOBAL_CACHE
-#define ENABLE_UNLIMITED_GLOBAL_CACHE 1
-#endif
-#ifndef ENABLE_UNLIMITED_GLOBAL_CACHE
-//! Unlimited cache disables any global cache limitations
-#define ENABLE_UNLIMITED_GLOBAL_CACHE ENABLE_UNLIMITED_CACHE
-#endif
-#if !ENABLE_UNLIMITED_GLOBAL_CACHE
-//! Multiplier for global cache (cache limit will be span release count multiplied by this value)
-#define GLOBAL_CACHE_MULTIPLIER (THREAD_CACHE_MULTIPLIER * 6)
-#endif
-#else
-# undef ENABLE_GLOBAL_CACHE
-# define ENABLE_GLOBAL_CACHE 0
-#endif
-
-#if !ENABLE_THREAD_CACHE || ENABLE_UNLIMITED_THREAD_CACHE
-# undef ENABLE_ADAPTIVE_THREAD_CACHE
-# define ENABLE_ADAPTIVE_THREAD_CACHE 0
+#ifndef GLOBAL_CACHE_MULTIPLIER
+//! Multiplier for global cache
+#define GLOBAL_CACHE_MULTIPLIER 8
#endif
#if DISABLE_UNMAP && !ENABLE_GLOBAL_CACHE
-# error Must use global cache if unmap is disabled
+#error Must use global cache if unmap is disabled
+#endif
+
+#if DISABLE_UNMAP
+#undef ENABLE_UNLIMITED_CACHE
+#define ENABLE_UNLIMITED_CACHE 1
+#endif
+
+#if !ENABLE_GLOBAL_CACHE
+#undef ENABLE_UNLIMITED_CACHE
+#define ENABLE_UNLIMITED_CACHE 0
+#endif
+
+#if !ENABLE_THREAD_CACHE
+#undef ENABLE_ADAPTIVE_THREAD_CACHE
+#define ENABLE_ADAPTIVE_THREAD_CACHE 0
#endif
#if defined( _WIN32 ) || defined( __WIN32__ ) || defined( _WIN64 )
@@ -115,7 +98,7 @@
#endif
/// Platform and arch specifics
-#if defined(_MSC_VER) && !defined(__clang__)
+#if defined(_MSC_VER)
# ifndef FORCEINLINE
# define FORCEINLINE inline __forceinline
# endif
@@ -206,13 +189,15 @@
static FORCEINLINE int32_t atomic_load32(atomic32_t* src) { return *src; }
static FORCEINLINE void atomic_store32(atomic32_t* dst, int32_t val) { *dst = val; }
+static FORCEINLINE void atomic_store32_release(atomic32_t* dst, int32_t val) { *dst = val; }
static FORCEINLINE int32_t atomic_incr32(atomic32_t* val) { return (int32_t)InterlockedIncrement(val); }
static FORCEINLINE int32_t atomic_decr32(atomic32_t* val) { return (int32_t)InterlockedDecrement(val); }
+static FORCEINLINE int32_t atomic_add32(atomic32_t* val, int32_t add) { return (int32_t)InterlockedExchangeAdd(val, add) + add; }
+static FORCEINLINE int atomic_cas32_acquire(atomic32_t* dst, int32_t val, int32_t ref) { return (InterlockedCompareExchange(dst, val, ref) == ref) ? 1 : 0; }
#if ENABLE_STATISTICS || ENABLE_ADAPTIVE_THREAD_CACHE
static FORCEINLINE int64_t atomic_load64(atomic64_t* src) { return *src; }
static FORCEINLINE int64_t atomic_add64(atomic64_t* val, int64_t add) { return (int64_t)InterlockedExchangeAdd64(val, add) + add; }
#endif
-static FORCEINLINE int32_t atomic_add32(atomic32_t* val, int32_t add) { return (int32_t)InterlockedExchangeAdd(val, add) + add; }
static FORCEINLINE void* atomic_load_ptr(atomicptr_t* src) { return (void*)*src; }
static FORCEINLINE void atomic_store_ptr(atomicptr_t* dst, void* val) { *dst = val; }
static FORCEINLINE void atomic_store_ptr_release(atomicptr_t* dst, void* val) { *dst = val; }
@@ -232,13 +217,15 @@
static FORCEINLINE int32_t atomic_load32(atomic32_t* src) { return atomic_load_explicit(src, memory_order_relaxed); }
static FORCEINLINE void atomic_store32(atomic32_t* dst, int32_t val) { atomic_store_explicit(dst, val, memory_order_relaxed); }
+static FORCEINLINE void atomic_store32_release(atomic32_t* dst, int32_t val) { atomic_store_explicit(dst, val, memory_order_release); }
static FORCEINLINE int32_t atomic_incr32(atomic32_t* val) { return atomic_fetch_add_explicit(val, 1, memory_order_relaxed) + 1; }
static FORCEINLINE int32_t atomic_decr32(atomic32_t* val) { return atomic_fetch_add_explicit(val, -1, memory_order_relaxed) - 1; }
+static FORCEINLINE int32_t atomic_add32(atomic32_t* val, int32_t add) { return atomic_fetch_add_explicit(val, add, memory_order_relaxed) + add; }
+static FORCEINLINE int atomic_cas32_acquire(atomic32_t* dst, int32_t val, int32_t ref) { return atomic_compare_exchange_weak_explicit(dst, &ref, val, memory_order_acquire, memory_order_relaxed); }
#if ENABLE_STATISTICS || ENABLE_ADAPTIVE_THREAD_CACHE
static FORCEINLINE int64_t atomic_load64(atomic64_t* val) { return atomic_load_explicit(val, memory_order_relaxed); }
static FORCEINLINE int64_t atomic_add64(atomic64_t* val, int64_t add) { return atomic_fetch_add_explicit(val, add, memory_order_relaxed) + add; }
#endif
-static FORCEINLINE int32_t atomic_add32(atomic32_t* val, int32_t add) { return atomic_fetch_add_explicit(val, add, memory_order_relaxed) + add; }
static FORCEINLINE void* atomic_load_ptr(atomicptr_t* src) { return atomic_load_explicit(src, memory_order_relaxed); }
static FORCEINLINE void atomic_store_ptr(atomicptr_t* dst, void* val) { atomic_store_explicit(dst, val, memory_order_relaxed); }
static FORCEINLINE void atomic_store_ptr_release(atomicptr_t* dst, void* val) { atomic_store_explicit(dst, val, memory_order_release); }
@@ -314,6 +301,14 @@
#define HEAP_ORPHAN_ABA_SIZE 512
//! Size of a span header (must be a multiple of SMALL_GRANULARITY and a power of two)
#define SPAN_HEADER_SIZE 128
+//! Number of spans in thread cache
+#define MAX_THREAD_SPAN_CACHE 256
+//! Number of spans to transfer between thread and global cache
+#define THREAD_SPAN_CACHE_TRANSFER 64
+//! Number of spans in thread cache for large spans (must be greater than LARGE_CLASS_COUNT / 2)
+#define MAX_THREAD_SPAN_LARGE_CACHE 64
+//! Number of spans to transfer between thread and global cache for large spans
+#define THREAD_SPAN_LARGE_CACHE_TRANSFER 6
_Static_assert((SMALL_GRANULARITY & (SMALL_GRANULARITY - 1)) == 0, "Small granularity must be power of two");
_Static_assert((SPAN_HEADER_SIZE & (SPAN_HEADER_SIZE - 1)) == 0, "Span header size must be power of two");
@@ -456,30 +451,42 @@
};
_Static_assert(sizeof(span_t) <= SPAN_HEADER_SIZE, "span size mismatch");
+struct span_cache_t {
+ size_t count;
+ span_t* span[MAX_THREAD_SPAN_CACHE];
+};
+typedef struct span_cache_t span_cache_t;
+
+struct span_large_cache_t {
+ size_t count;
+ span_t* span[MAX_THREAD_SPAN_LARGE_CACHE];
+};
+typedef struct span_large_cache_t span_large_cache_t;
+
+struct heap_size_class_t {
+ //! Free list of active span
+ void* free_list;
+ //! Double linked list of partially used spans with free blocks.
+ // Previous span pointer in head points to tail span of list.
+ span_t* partial_span;
+ //! Early level cache of fully free spans
+ span_t* cache[2];
+};
+typedef struct heap_size_class_t heap_size_class_t;
+
// Control structure for a heap, either a thread heap or a first class heap if enabled
struct heap_t {
//! Owning thread ID
uintptr_t owner_thread;
- //! Free list of active span
- void* free_list[SIZE_CLASS_COUNT];
- //! Double linked list of partially used spans with free blocks for each size class.
- // Previous span pointer in head points to tail span of list.
- span_t* partial_span[SIZE_CLASS_COUNT];
+ //! Free lists for each size class
+ heap_size_class_t size_class[SIZE_CLASS_COUNT];
#if RPMALLOC_FIRST_CLASS_HEAPS
//! Double linked list of fully utilized spans with free blocks for each size class.
// Previous span pointer in head points to tail span of list.
span_t* full_span[SIZE_CLASS_COUNT];
#endif
-#if ENABLE_THREAD_CACHE
- //! List of free spans (single linked list)
- span_t* span_cache[LARGE_CLASS_COUNT];
-#endif
//! List of deferred free spans (single linked list)
atomicptr_t span_free_deferred;
-#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
- //! Current and high water mark of spans used per span count
- span_use_t span_use[LARGE_CLASS_COUNT];
-#endif
#if RPMALLOC_FIRST_CLASS_HEAPS
//! Double linked list of large and huge spans allocated by this heap
span_t* large_huge_span;
@@ -506,6 +513,16 @@
heap_t* master_heap;
//! Child count
atomic32_t child_count;
+#if ENABLE_THREAD_CACHE
+ //! Arrays of fully freed spans, single span
+ span_cache_t span_cache;
+ //! Arrays of fully freed spans, large spans with > 1 span count
+ span_large_cache_t span_large_cache[LARGE_CLASS_COUNT - 1];
+#endif
+#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
+ //! Current and high water mark of spans used per span count
+ span_use_t span_use[LARGE_CLASS_COUNT];
+#endif
#if ENABLE_STATISTICS
//! Number of bytes transitioned thread -> global
atomic64_t thread_to_global;
@@ -528,12 +545,16 @@
_Static_assert(sizeof(size_class_t) == 8, "Size class size mismatch");
struct global_cache_t {
- //! Cache list pointer
- atomicptr_t cache;
- //! Cache size
- atomic32_t size;
- //! ABA counter
- atomic32_t counter;
+ //! Cache lock
+ atomic32_t lock;
+ //! Cache count
+ size_t count;
+ //! Cached spans
+ span_t* span[GLOBAL_CACHE_MULTIPLIER * MAX_THREAD_SPAN_CACHE];
+#if ENABLE_UNLIMITED_CACHE
+ //! Unlimited cache overflow
+ span_t* overflow;
+#endif
};
////////////
@@ -542,6 +563,11 @@
///
//////
+//! Default span size (64KiB)
+#define _memory_default_span_size (64 * 1024)
+#define _memory_default_span_size_shift 16
+#define _memory_default_span_mask (~((uintptr_t)(_memory_span_size - 1)))
+
//! Initialized flag
static int _rpmalloc_initialized;
//! Configuration
@@ -560,10 +586,10 @@
//! Mask to get to start of a memory span
static uintptr_t _memory_span_mask;
#else
-//! Hardwired span size (64KiB)
-#define _memory_span_size (64 * 1024)
-#define _memory_span_size_shift 16
-#define _memory_span_mask (~((uintptr_t)(_memory_span_size - 1)))
+//! Hardwired span size
+#define _memory_span_size _memory_default_span_size
+#define _memory_span_size_shift _memory_default_span_size_shift
+#define _memory_span_mask _memory_default_span_mask
#endif
//! Number of spans to map in each map call
static size_t _memory_span_map_count;
@@ -827,76 +853,6 @@
///
//////
-#if ENABLE_THREAD_CACHE
-
-static void
-_rpmalloc_span_unmap(span_t* span);
-
-//! Unmap a single linked list of spans
-static void
-_rpmalloc_span_list_unmap_all(span_t* span) {
- size_t list_size = span->list_size;
- for (size_t ispan = 0; ispan < list_size; ++ispan) {
- span_t* next_span = span->next;
- _rpmalloc_span_unmap(span);
- span = next_span;
- }
- assert(!span);
-}
-
-//! Add span to head of single linked span list
-static size_t
-_rpmalloc_span_list_push(span_t** head, span_t* span) {
- span->next = *head;
- if (*head)
- span->list_size = (*head)->list_size + 1;
- else
- span->list_size = 1;
- *head = span;
- return span->list_size;
-}
-
-//! Remove span from head of single linked span list, returns the new list head
-static span_t*
-_rpmalloc_span_list_pop(span_t** head) {
- span_t* span = *head;
- span_t* next_span = 0;
- if (span->list_size > 1) {
- assert(span->next);
- next_span = span->next;
- assert(next_span);
- next_span->list_size = span->list_size - 1;
- }
- *head = next_span;
- return span;
-}
-
-//! Split a single linked span list
-static span_t*
-_rpmalloc_span_list_split(span_t* span, size_t limit) {
- span_t* next = 0;
- if (limit < 2)
- limit = 2;
- if (span->list_size > limit) {
- uint32_t list_size = 1;
- span_t* last = span;
- next = span->next;
- while (list_size < limit) {
- last = next;
- next = next->next;
- ++list_size;
- }
- last->next = 0;
- assert(next);
- next->list_size = span->list_size - list_size;
- span->list_size = list_size;
- span->prev = 0;
- }
- return next;
-}
-
-#endif
-
//! Add a span to double linked list at the head
static void
_rpmalloc_span_double_link_list_add(span_t** head, span_t* span) {
@@ -1075,10 +1031,19 @@
#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
atomic_decr32(&heap->span_use[0].current);
#endif
- _rpmalloc_stat_inc(&heap->span_use[0].spans_to_cache);
- _rpmalloc_stat_inc(&heap->size_class_use[span->size_class].spans_to_cache);
_rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current);
- _rpmalloc_heap_cache_insert(heap, span);
+ if (!heap->finalize) {
+ _rpmalloc_stat_inc(&heap->span_use[0].spans_to_cache);
+ _rpmalloc_stat_inc(&heap->size_class_use[span->size_class].spans_to_cache);
+ if (heap->size_class[span->size_class].cache[0]) {
+ if (heap->size_class[span->size_class].cache[1])
+ _rpmalloc_heap_cache_insert(heap, heap->size_class[span->size_class].cache[1]);
+ heap->size_class[span->size_class].cache[1] = heap->size_class[span->size_class].cache[0];
+ }
+ heap->size_class[span->size_class].cache[0] = span;
+ } else {
+ _rpmalloc_span_unmap(span);
+ }
}
//! Initialize a (partial) free list up to next system memory page, while reserving the first block
@@ -1129,11 +1094,11 @@
//Setup free list. Only initialize one system page worth of free blocks in list
void* block;
- span->free_list_limit = free_list_partial_init(&heap->free_list[class_idx], &block,
+ span->free_list_limit = free_list_partial_init(&heap->size_class[class_idx].free_list, &block,
span, pointer_offset(span, SPAN_HEADER_SIZE), size_class->block_count, size_class->block_size);
//Link span as partial if there remains blocks to be initialized as free list, or full if fully initialized
if (span->free_list_limit < span->block_count) {
- _rpmalloc_span_double_link_list_add(&heap->partial_span[class_idx], span);
+ _rpmalloc_span_double_link_list_add(&heap->size_class[class_idx].partial_span, span);
span->used_count = span->free_list_limit;
} else {
#if RPMALLOC_FIRST_CLASS_HEAPS
@@ -1165,7 +1130,8 @@
static int
_rpmalloc_span_finalize(heap_t* heap, size_t iclass, span_t* span, span_t** list_head) {
- span_t* class_span = (span_t*)((uintptr_t)heap->free_list[iclass] & _memory_span_mask);
+ void* free_list = heap->size_class[iclass].free_list;
+ span_t* class_span = (span_t*)((uintptr_t)free_list & _memory_span_mask);
if (span == class_span) {
// Adopt the heap class free list back into the span free list
void* block = span->free_list;
@@ -1175,17 +1141,17 @@
block = *((void**)block);
}
uint32_t free_count = 0;
- block = heap->free_list[iclass];
+ block = free_list;
while (block) {
++free_count;
block = *((void**)block);
}
if (last_block) {
- *((void**)last_block) = heap->free_list[iclass];
+ *((void**)last_block) = free_list;
} else {
- span->free_list = heap->free_list[iclass];
+ span->free_list = free_list;
}
- heap->free_list[iclass] = 0;
+ heap->size_class[iclass].free_list = 0;
span->used_count -= free_count;
}
//If this assert triggers you have memory leaks
@@ -1211,89 +1177,83 @@
#if ENABLE_GLOBAL_CACHE
-//! Insert the given list of memory page spans in the global cache
-static void
-_rpmalloc_global_cache_insert(global_cache_t* cache, span_t* span, size_t cache_limit) {
- assert((span->list_size == 1) || (span->next != 0));
- int32_t list_size = (int32_t)span->list_size;
- //Unmap if cache has reached the limit. Does not need stronger synchronization, the worst
- //case is that the span list is unmapped when it could have been cached (no real dependency
- //between the two variables)
- if (atomic_add32(&cache->size, list_size) > (int32_t)cache_limit) {
-#if !ENABLE_UNLIMITED_GLOBAL_CACHE
- _rpmalloc_span_list_unmap_all(span);
- atomic_add32(&cache->size, -list_size);
- return;
-#endif
- }
- void* current_cache, *new_cache;
- do {
- current_cache = atomic_load_ptr(&cache->cache);
- span->prev = (span_t*)((uintptr_t)current_cache & _memory_span_mask);
- new_cache = (void*)((uintptr_t)span | ((uintptr_t)atomic_incr32(&cache->counter) & ~_memory_span_mask));
- } while (!atomic_cas_ptr(&cache->cache, new_cache, current_cache));
-}
-
-//! Extract a number of memory page spans from the global cache
-static span_t*
-_rpmalloc_global_cache_extract(global_cache_t* cache) {
- uintptr_t span_ptr;
- do {
- void* global_span = atomic_load_ptr(&cache->cache);
- span_ptr = (uintptr_t)global_span & _memory_span_mask;
- if (span_ptr) {
- span_t* span = (span_t*)span_ptr;
- //By accessing the span ptr before it is swapped out of list we assume that a contending thread
- //does not manage to traverse the span to being unmapped before we access it
- void* new_cache = (void*)((uintptr_t)span->prev | ((uintptr_t)atomic_incr32(&cache->counter) & ~_memory_span_mask));
- if (atomic_cas_ptr(&cache->cache, new_cache, global_span)) {
- atomic_add32(&cache->size, -(int32_t)span->list_size);
- return span;
- }
- }
- } while (span_ptr);
- return 0;
-}
-
//! Finalize a global cache, only valid from allocator finalization (not thread safe)
static void
_rpmalloc_global_cache_finalize(global_cache_t* cache) {
- void* current_cache = atomic_load_ptr(&cache->cache);
- span_t* span = (span_t*)((uintptr_t)current_cache & _memory_span_mask);
- while (span) {
- span_t* skip_span = (span_t*)((uintptr_t)span->prev & _memory_span_mask);
- atomic_add32(&cache->size, -(int32_t)span->list_size);
- _rpmalloc_span_list_unmap_all(span);
- span = skip_span;
+ for (size_t ispan = 0; ispan < cache->count; ++ispan)
+ _rpmalloc_span_unmap(cache->span[ispan]);
+ cache->count = 0;
+
+#if ENABLE_UNLIMITED_CACHE
+ while (cache->overflow) {
+ span_t* span = cache->overflow;
+ cache->overflow = span->next;
+ _rpmalloc_span_unmap(span);
}
- assert(!atomic_load32(&cache->size));
- atomic_store_ptr(&cache->cache, 0);
- atomic_store32(&cache->size, 0);
+#endif
+
+ atomic_store32_release(&cache->lock, 0);
}
-//! Insert the given list of memory page spans in the global cache
static void
-_rpmalloc_global_cache_insert_span_list(span_t* span) {
- size_t span_count = span->span_count;
-#if ENABLE_UNLIMITED_GLOBAL_CACHE
- _rpmalloc_global_cache_insert(&_memory_span_cache[span_count - 1], span, 0);
+_rpmalloc_global_cache_insert_spans(span_t** span, size_t span_count, size_t count) {
+ const size_t cache_limit = (span_count == 1) ?
+ GLOBAL_CACHE_MULTIPLIER * MAX_THREAD_SPAN_CACHE :
+ GLOBAL_CACHE_MULTIPLIER * (MAX_THREAD_SPAN_LARGE_CACHE - (span_count >> 1));
+
+ global_cache_t* cache = &_memory_span_cache[span_count - 1];
+
+ size_t insert_count = count;
+ while (!atomic_cas32_acquire(&cache->lock, 1, 0))
+ /* Spin */;
+
+ if ((cache->count + insert_count) > cache_limit)
+ insert_count = cache_limit - cache->count;
+
+ memcpy(cache->span + cache->count, span, sizeof(span_t*) * insert_count);
+ cache->count += insert_count;
+
+#if ENABLE_UNLIMITED_CACHE
+ while (insert_count < count) {
+ span_t* current_span = span[insert_count++];
+ current_span->next = cache->overflow;
+ cache->overflow = current_span;
+ }
+ atomic_store32_release(&cache->lock, 0);
#else
- const size_t cache_limit = (GLOBAL_CACHE_MULTIPLIER * ((span_count == 1) ? _memory_span_release_count : _memory_span_release_count_large));
- _rpmalloc_global_cache_insert(&_memory_span_cache[span_count - 1], span, cache_limit);
+ atomic_store32_release(&cache->lock, 0);
+ for (size_t ispan = insert_count; ispan < count; ++ispan)
+ _rpmalloc_span_unmap(span[ispan]);
#endif
}
-//! Extract a number of memory page spans from the global cache for large blocks
-static span_t*
-_rpmalloc_global_cache_extract_span_list(size_t span_count) {
- span_t* span = _rpmalloc_global_cache_extract(&_memory_span_cache[span_count - 1]);
- assert(!span || (span->span_count == span_count));
- return span;
+static size_t
+_rpmalloc_global_cache_extract_spans(span_t** span, size_t span_count, size_t count) {
+ global_cache_t* cache = &_memory_span_cache[span_count - 1];
+
+ size_t extract_count = count;
+ while (!atomic_cas32_acquire(&cache->lock, 1, 0))
+ /* Spin */;
+
+ if (extract_count > cache->count)
+ extract_count = cache->count;
+
+ memcpy(span, cache->span + (cache->count - extract_count), sizeof(span_t*) * extract_count);
+ cache->count -= extract_count;
+#if ENABLE_UNLIMITED_CACHE
+ while ((extract_count < count) && cache->overflow) {
+ span_t* current_span = cache->overflow;
+ span[extract_count++] = current_span;
+ cache->overflow = current_span->next;
+ }
+#endif
+ atomic_store32_release(&cache->lock, 0);
+
+ return extract_count;
}
#endif
-
////////////
///
/// Heap control
@@ -1383,10 +1343,14 @@
#if ENABLE_THREAD_CACHE
for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
- span_t* span = heap->span_cache[iclass];
- heap->span_cache[iclass] = 0;
- if (span)
- _rpmalloc_span_list_unmap_all(span);
+ span_cache_t* span_cache;
+ if (!iclass)
+ span_cache = &heap->span_cache;
+ else
+ span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1));
+ for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
+ _rpmalloc_span_unmap(span_cache->span[ispan]);
+ span_cache->count = 0;
}
#endif
@@ -1396,7 +1360,7 @@
}
for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
- if (heap->free_list[iclass] || heap->partial_span[iclass]) {
+ if (heap->size_class[iclass].free_list || heap->size_class[iclass].partial_span) {
--heap->finalize;
return;
}
@@ -1412,7 +1376,7 @@
list_heap->next_heap = heap->next_heap;
}
- _rpmalloc_heap_unmap( heap );
+ _rpmalloc_heap_unmap(heap);
}
//! Insert a single span into thread heap cache, releasing to global cache if overflow
@@ -1425,37 +1389,42 @@
}
#if ENABLE_THREAD_CACHE
size_t span_count = span->span_count;
- size_t idx = span_count - 1;
- _rpmalloc_stat_inc(&heap->span_use[idx].spans_to_cache);
-#if ENABLE_UNLIMITED_THREAD_CACHE
- _rpmalloc_span_list_push(&heap->span_cache[idx], span);
-#else
- const size_t release_count = (!idx ? _memory_span_release_count : _memory_span_release_count_large);
- size_t current_cache_size = _rpmalloc_span_list_push(&heap->span_cache[idx], span);
- if (current_cache_size <= release_count)
- return;
- const size_t hard_limit = release_count * THREAD_CACHE_MULTIPLIER;
- if (current_cache_size <= hard_limit) {
-#if ENABLE_ADAPTIVE_THREAD_CACHE
- //Require 25% of high water mark to remain in cache (and at least 1, if use is 0)
- const size_t high_mark = heap->span_use[idx].high;
- const size_t min_limit = (high_mark >> 2) + release_count + 1;
- if (current_cache_size < min_limit)
- return;
-#else
- return;
-#endif
- }
- heap->span_cache[idx] = _rpmalloc_span_list_split(span, release_count);
- assert(span->list_size == release_count);
+ _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_to_cache);
+ if (span_count == 1) {
+ span_cache_t* span_cache = &heap->span_cache;
+ span_cache->span[span_cache->count++] = span;
+ if (span_cache->count == MAX_THREAD_SPAN_CACHE) {
+ const size_t remain_count = MAX_THREAD_SPAN_CACHE - THREAD_SPAN_CACHE_TRANSFER;
#if ENABLE_GLOBAL_CACHE
- _rpmalloc_stat_add64(&heap->thread_to_global, (size_t)span->list_size * span_count * _memory_span_size);
- _rpmalloc_stat_add(&heap->span_use[idx].spans_to_global, span->list_size);
- _rpmalloc_global_cache_insert_span_list(span);
+ _rpmalloc_stat_add64(&heap->thread_to_global, THREAD_SPAN_CACHE_TRANSFER * _memory_span_size);
+ _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_to_global, THREAD_SPAN_CACHE_TRANSFER);
+ _rpmalloc_global_cache_insert_spans(span_cache->span + remain_count, span_count, THREAD_SPAN_CACHE_TRANSFER);
#else
- _rpmalloc_span_list_unmap_all(span);
+ for (size_t ispan = 0; ispan < THREAD_SPAN_CACHE_TRANSFER; ++ispan)
+ _rpmalloc_span_unmap(span_cache->span[remain_count + ispan]);
#endif
+ span_cache->count = remain_count;
+ }
+ } else {
+ size_t cache_idx = span_count - 2;
+ span_large_cache_t* span_cache = heap->span_large_cache + cache_idx;
+ span_cache->span[span_cache->count++] = span;
+ const size_t cache_limit = (MAX_THREAD_SPAN_LARGE_CACHE - (span_count >> 1));
+ if (span_cache->count == cache_limit) {
+ const size_t transfer_limit = 2 + (cache_limit >> 2);
+ const size_t transfer_count = (THREAD_SPAN_LARGE_CACHE_TRANSFER <= transfer_limit ? THREAD_SPAN_LARGE_CACHE_TRANSFER : transfer_limit);
+ const size_t remain_count = cache_limit - transfer_count;
+#if ENABLE_GLOBAL_CACHE
+ _rpmalloc_stat_add64(&heap->thread_to_global, transfer_count * span_count * _memory_span_size);
+ _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_to_global, transfer_count);
+ _rpmalloc_global_cache_insert_spans(span_cache->span + remain_count, span_count, transfer_count);
+#else
+ for (size_t ispan = 0; ispan < transfer_count; ++ispan)
+ _rpmalloc_span_unmap(span_cache->span[remain_count + ispan]);
#endif
+ span_cache->count = remain_count;
+ }
+ }
#else
(void)sizeof(heap);
_rpmalloc_span_unmap(span);
@@ -1466,13 +1435,20 @@
static span_t*
_rpmalloc_heap_thread_cache_extract(heap_t* heap, size_t span_count) {
span_t* span = 0;
- size_t idx = span_count - 1;
- if (!idx)
+ if (span_count == 1) {
_rpmalloc_heap_cache_adopt_deferred(heap, &span);
+ if (span)
+ return span;
+ }
#if ENABLE_THREAD_CACHE
- if (!span && heap->span_cache[idx]) {
- _rpmalloc_stat_inc(&heap->span_use[idx].spans_from_cache);
- span = _rpmalloc_span_list_pop(&heap->span_cache[idx]);
+ span_cache_t* span_cache;
+ if (span_count == 1)
+ span_cache = &heap->span_cache;
+ else
+ span_cache = (span_cache_t*)(heap->span_large_cache + (span_count - 2));
+ if (span_cache->count) {
+ _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_from_cache);
+ return span_cache->span[--span_cache->count];
}
#endif
return span;
@@ -1489,13 +1465,31 @@
static span_t*
_rpmalloc_heap_global_cache_extract(heap_t* heap, size_t span_count) {
#if ENABLE_GLOBAL_CACHE
- size_t idx = span_count - 1;
- heap->span_cache[idx] = _rpmalloc_global_cache_extract_span_list(span_count);
- if (heap->span_cache[idx]) {
- _rpmalloc_stat_add64(&heap->global_to_thread, (size_t)heap->span_cache[idx]->list_size * span_count * _memory_span_size);
- _rpmalloc_stat_add(&heap->span_use[idx].spans_from_global, heap->span_cache[idx]->list_size);
- return _rpmalloc_span_list_pop(&heap->span_cache[idx]);
+#if ENABLE_THREAD_CACHE
+ span_cache_t* span_cache;
+ size_t wanted_count;
+ if (span_count == 1) {
+ span_cache = &heap->span_cache;
+ wanted_count = THREAD_SPAN_CACHE_TRANSFER;
+ } else {
+ span_cache = (span_cache_t*)(heap->span_large_cache + (span_count - 2));
+ wanted_count = THREAD_SPAN_LARGE_CACHE_TRANSFER;
}
+ span_cache->count = _rpmalloc_global_cache_extract_spans(span_cache->span, span_count, wanted_count);
+ if (span_cache->count) {
+ _rpmalloc_stat_add64(&heap->global_to_thread, span_count * span_cache->count * _memory_span_size);
+ _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_from_global, span_cache->count);
+ return span_cache->span[--span_cache->count];
+ }
+#else
+ span_t* span = 0;
+ size_t count = _rpmalloc_global_cache_extract_spans(&span, span_count, 1);
+ if (count) {
+ _rpmalloc_stat_add64(&heap->global_to_thread, span_count * count * _memory_span_size);
+ _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_from_global, count);
+ return span;
+ }
+#endif
#endif
(void)sizeof(heap);
(void)sizeof(span_count);
@@ -1505,7 +1499,7 @@
//! Get a span from one of the cache levels (thread cache, reserved, global cache) or fallback to mapping more memory
static span_t*
_rpmalloc_heap_extract_new_span(heap_t* heap, size_t span_count, uint32_t class_idx) {
- (void)sizeof(class_idx);
+ span_t* span;
#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
uint32_t idx = (uint32_t)span_count - 1;
uint32_t current_count = (uint32_t)atomic_incr32(&heap->span_use[idx].current);
@@ -1513,7 +1507,26 @@
atomic_store32(&heap->span_use[idx].high, (int32_t)current_count);
_rpmalloc_stat_add_peak(&heap->size_class_use[class_idx].spans_current, 1, heap->size_class_use[class_idx].spans_peak);
#endif
- span_t* span = _rpmalloc_heap_thread_cache_extract(heap, span_count);
+#if ENABLE_THREAD_CACHE
+ if (class_idx < SIZE_CLASS_COUNT) {
+ if (heap->size_class[class_idx].cache[0]) {
+ span = heap->size_class[class_idx].cache[0];
+ span_t* new_cache = 0;
+ if (heap->span_cache.count)
+ new_cache = heap->span_cache.span[--heap->span_cache.count];
+ if (heap->size_class[class_idx].cache[1]) {
+ heap->size_class[class_idx].cache[0] = heap->size_class[class_idx].cache[1];
+ heap->size_class[class_idx].cache[1] = new_cache;
+ } else {
+ heap->size_class[class_idx].cache[0] = new_cache;
+ }
+ return span;
+ }
+ }
+#else
+ (void)sizeof(class_idx);
+#endif
+ span = _rpmalloc_heap_thread_cache_extract(heap, span_count);
if (EXPECTED(span != 0)) {
_rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache);
return span;
@@ -1642,26 +1655,27 @@
_rpmalloc_heap_cache_adopt_deferred(heap, 0);
#if ENABLE_THREAD_CACHE
for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
- span_t* span = heap->span_cache[iclass];
- heap->span_cache[iclass] = 0;
- if (span && heap->finalize) {
- _rpmalloc_span_list_unmap_all(span);
+ span_cache_t* span_cache;
+ if (!iclass)
+ span_cache = &heap->span_cache;
+ else
+ span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1));
+ if (!span_cache->count)
continue;
- }
+ if (heap->finalize) {
+ for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
+ _rpmalloc_span_unmap(span_cache->span[ispan]);
+ } else {
#if ENABLE_GLOBAL_CACHE
- while (span) {
- assert(span->span_count == (iclass + 1));
- size_t release_count = (!iclass ? _memory_span_release_count : _memory_span_release_count_large);
- span_t* next = _rpmalloc_span_list_split(span, (uint32_t)release_count);
- _rpmalloc_stat_add64(&heap->thread_to_global, (size_t)span->list_size * span->span_count * _memory_span_size);
- _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global, span->list_size);
- _rpmalloc_global_cache_insert_span_list(span);
- span = next;
- }
+ _rpmalloc_stat_add64(&heap->thread_to_global, span_cache->count * (iclass + 1) * _memory_span_size);
+ _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global, span_cache->count);
+ _rpmalloc_global_cache_insert_spans(span_cache->span, iclass + 1, span_cache->count);
#else
- if (span)
- _rpmalloc_span_list_unmap_all(span);
+ for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
+ _rpmalloc_span_unmap(span_cache->span[ispan]);
#endif
+ }
+ span_cache->count = 0;
}
#endif
@@ -1692,15 +1706,21 @@
_rpmalloc_heap_cache_adopt_deferred(heap, 0);
for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
- span_t* span = heap->partial_span[iclass];
+ if (heap->size_class[iclass].cache[0])
+ _rpmalloc_span_unmap(heap->size_class[iclass].cache[0]);
+ if (heap->size_class[iclass].cache[1])
+ _rpmalloc_span_unmap(heap->size_class[iclass].cache[1]);
+ heap->size_class[iclass].cache[0] = 0;
+ heap->size_class[iclass].cache[1] = 0;
+ span_t* span = heap->size_class[iclass].partial_span;
while (span) {
span_t* next = span->next;
- _rpmalloc_span_finalize(heap, iclass, span, &heap->partial_span[iclass]);
+ _rpmalloc_span_finalize(heap, iclass, span, &heap->size_class[iclass].partial_span);
span = next;
}
// If class still has a free list it must be a full span
- if (heap->free_list[iclass]) {
- span_t* class_span = (span_t*)((uintptr_t)heap->free_list[iclass] & _memory_span_mask);
+ if (heap->size_class[iclass].free_list) {
+ span_t* class_span = (span_t*)((uintptr_t)heap->size_class[iclass].free_list & _memory_span_mask);
span_t** list = 0;
#if RPMALLOC_FIRST_CLASS_HEAPS
list = &heap->full_span[iclass];
@@ -1709,17 +1729,21 @@
if (!_rpmalloc_span_finalize(heap, iclass, class_span, list)) {
if (list)
_rpmalloc_span_double_link_list_remove(list, class_span);
- _rpmalloc_span_double_link_list_add(&heap->partial_span[iclass], class_span);
+ _rpmalloc_span_double_link_list_add(&heap->size_class[iclass].partial_span, class_span);
}
}
}
#if ENABLE_THREAD_CACHE
for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
- if (heap->span_cache[iclass]) {
- _rpmalloc_span_list_unmap_all(heap->span_cache[iclass]);
- heap->span_cache[iclass] = 0;
- }
+ span_cache_t* span_cache;
+ if (!iclass)
+ span_cache = &heap->span_cache;
+ else
+ span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1));
+ for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
+ _rpmalloc_span_unmap(span_cache->span[ispan]);
+ span_cache->count = 0;
}
#endif
assert(!atomic_load_ptr(&heap->span_free_deferred));
@@ -1743,20 +1767,20 @@
//! Allocate a small/medium sized memory block from the given heap
static void*
_rpmalloc_allocate_from_heap_fallback(heap_t* heap, uint32_t class_idx) {
- span_t* span = heap->partial_span[class_idx];
+ span_t* span = heap->size_class[class_idx].partial_span;
if (EXPECTED(span != 0)) {
assert(span->block_count == _memory_size_class[span->size_class].block_count);
assert(!_rpmalloc_span_is_fully_utilized(span));
void* block;
if (span->free_list) {
//Swap in free list if not empty
- heap->free_list[class_idx] = span->free_list;
+ heap->size_class[class_idx].free_list = span->free_list;
span->free_list = 0;
- block = free_list_pop(&heap->free_list[class_idx]);
+ block = free_list_pop(&heap->size_class[class_idx].free_list);
} else {
//If the span did not fully initialize free list, link up another page worth of blocks
void* block_start = pointer_offset(span, SPAN_HEADER_SIZE + ((size_t)span->free_list_limit * span->block_size));
- span->free_list_limit += free_list_partial_init(&heap->free_list[class_idx], &block,
+ span->free_list_limit += free_list_partial_init(&heap->size_class[class_idx].free_list, &block,
(void*)((uintptr_t)block_start & ~(_memory_page_size - 1)), block_start,
span->block_count - span->free_list_limit, span->block_size);
}
@@ -1772,7 +1796,7 @@
return block;
//The span is fully utilized, unlink from partial list and add to fully utilized list
- _rpmalloc_span_double_link_list_pop_head(&heap->partial_span[class_idx], span);
+ _rpmalloc_span_double_link_list_pop_head(&heap->size_class[class_idx].partial_span, span);
#if RPMALLOC_FIRST_CLASS_HEAPS
_rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span);
#endif
@@ -1797,8 +1821,8 @@
//Small sizes have unique size classes
const uint32_t class_idx = (uint32_t)((size + (SMALL_GRANULARITY - 1)) >> SMALL_GRANULARITY_SHIFT);
_rpmalloc_stat_inc_alloc(heap, class_idx);
- if (EXPECTED(heap->free_list[class_idx] != 0))
- return free_list_pop(&heap->free_list[class_idx]);
+ if (EXPECTED(heap->size_class[class_idx].free_list != 0))
+ return free_list_pop(&heap->size_class[class_idx].free_list);
return _rpmalloc_allocate_from_heap_fallback(heap, class_idx);
}
@@ -1810,8 +1834,8 @@
const uint32_t base_idx = (uint32_t)(SMALL_CLASS_COUNT + ((size - (SMALL_SIZE_LIMIT + 1)) >> MEDIUM_GRANULARITY_SHIFT));
const uint32_t class_idx = _memory_size_class[base_idx].class_idx;
_rpmalloc_stat_inc_alloc(heap, class_idx);
- if (EXPECTED(heap->free_list[class_idx] != 0))
- return free_list_pop(&heap->free_list[class_idx]);
+ if (EXPECTED(heap->size_class[class_idx].free_list != 0))
+ return free_list_pop(&heap->size_class[class_idx].free_list);
return _rpmalloc_allocate_from_heap_fallback(heap, class_idx);
}
@@ -2019,14 +2043,14 @@
#if RPMALLOC_FIRST_CLASS_HEAPS
_rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class], span);
#endif
- _rpmalloc_span_double_link_list_add(&heap->partial_span[span->size_class], span);
+ _rpmalloc_span_double_link_list_add(&heap->size_class[span->size_class].partial_span, span);
--heap->full_span_count;
}
--span->used_count;
*((void**)block) = span->free_list;
span->free_list = block;
if (UNEXPECTED(span->used_count == span->list_size)) {
- _rpmalloc_span_double_link_list_remove(&heap->partial_span[span->size_class], span);
+ _rpmalloc_span_double_link_list_remove(&heap->size_class[span->size_class].partial_span, span);
_rpmalloc_span_release_to_cache(heap, span);
}
}
@@ -2465,18 +2489,22 @@
_memory_page_size = ((size_t)1 << _memory_page_size_shift);
#if RPMALLOC_CONFIGURABLE
- size_t span_size = _memory_config.span_size;
- if (!span_size)
- span_size = (64 * 1024);
- if (span_size > (256 * 1024))
- span_size = (256 * 1024);
- _memory_span_size = 4096;
- _memory_span_size_shift = 12;
- while (_memory_span_size < span_size) {
- _memory_span_size <<= 1;
- ++_memory_span_size_shift;
+ if (!_memory_config.span_size) {
+ _memory_span_size = _memory_default_span_size;
+ _memory_span_size_shift = _memory_default_span_size_shift;
+ _memory_span_mask = _memory_default_span_mask;
+ } else {
+ size_t span_size = _memory_config.span_size;
+ if (span_size > (256 * 1024))
+ span_size = (256 * 1024);
+ _memory_span_size = 4096;
+ _memory_span_size_shift = 12;
+ while (_memory_span_size < span_size) {
+ _memory_span_size <<= 1;
+ ++_memory_span_size_shift;
+ }
+ _memory_span_mask = ~(uintptr_t)(_memory_span_size - 1);
}
- _memory_span_mask = ~(uintptr_t)(_memory_span_size - 1);
#endif
_memory_span_map_count = ( _memory_config.span_map_count ? _memory_config.span_map_count : DEFAULT_SPAN_MAP_COUNT);
@@ -2746,7 +2774,7 @@
for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
size_class_t* size_class = _memory_size_class + iclass;
- span_t* span = heap->partial_span[iclass];
+ span_t* span = heap->size_class[iclass].partial_span;
while (span) {
size_t free_count = span->list_size;
size_t block_count = size_class->block_count;
@@ -2760,8 +2788,12 @@
#if ENABLE_THREAD_CACHE
for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
- if (heap->span_cache[iclass])
- stats->spancache = (size_t)heap->span_cache[iclass]->list_size * (iclass + 1) * _memory_span_size;
+ span_cache_t* span_cache;
+ if (!iclass)
+ span_cache = &heap->span_cache;
+ else
+ span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1));
+ stats->spancache = span_cache->count * (iclass + 1) * _memory_span_size;
}
#endif
@@ -2812,9 +2844,8 @@
stats->huge_alloc_peak = (size_t)_huge_pages_peak * _memory_page_size;
#endif
#if ENABLE_GLOBAL_CACHE
- for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
- stats->cached += (size_t)atomic_load32(&_memory_span_cache[iclass].size) * (iclass + 1) * _memory_span_size;
- }
+ for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass)
+ stats->cached += _memory_span_cache[iclass].count * (iclass + 1) * _memory_span_size;
#endif
}
@@ -2851,7 +2882,7 @@
atomic_load32(&heap->span_use[iclass].high),
((size_t)atomic_load32(&heap->span_use[iclass].high) * (size_t)_memory_span_size * (iclass + 1)) / (size_t)(1024 * 1024),
#if ENABLE_THREAD_CACHE
- heap->span_cache[iclass] ? heap->span_cache[iclass]->list_size : 0,
+ (unsigned int)(iclass ? heap->span_cache.count : heap->span_large_cache[iclass - 1].count),
((size_t)atomic_load32(&heap->span_use[iclass].spans_to_cache) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024),
((size_t)atomic_load32(&heap->span_use[iclass].spans_from_cache) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024),
#else
@@ -3032,13 +3063,13 @@
_rpmalloc_heap_cache_adopt_deferred(heap, 0);
for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
- span = heap->partial_span[iclass];
+ span = heap->size_class[iclass].partial_span;
while (span) {
next_span = span->next;
_rpmalloc_heap_cache_insert(heap, span);
span = next_span;
}
- heap->partial_span[iclass] = 0;
+ heap->size_class[iclass].partial_span = 0;
span = heap->full_span[iclass];
while (span) {
next_span = span->next;
@@ -3046,8 +3077,7 @@
span = next_span;
}
}
- memset(heap->free_list, 0, sizeof(heap->free_list));
- memset(heap->partial_span, 0, sizeof(heap->partial_span));
+ memset(heap->size_class, 0, sizeof(heap->size_class));
memset(heap->full_span, 0, sizeof(heap->full_span));
span = heap->large_huge_span;
@@ -3064,22 +3094,22 @@
#if ENABLE_THREAD_CACHE
for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
- span = heap->span_cache[iclass];
+ span_cache_t* span_cache;
+ if (!iclass)
+ span_cache = &heap->span_cache;
+ else
+ span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1));
+ if (!span_cache->count)
+ continue;
#if ENABLE_GLOBAL_CACHE
- while (span) {
- assert(span->span_count == (iclass + 1));
- size_t release_count = (!iclass ? _memory_span_release_count : _memory_span_release_count_large);
- next_span = _rpmalloc_span_list_split(span, (uint32_t)release_count);
- _rpmalloc_stat_add64(&heap->thread_to_global, (size_t)span->list_size * span->span_count * _memory_span_size);
- _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global, span->list_size);
- _rpmalloc_global_cache_insert_span_list(span);
- span = next_span;
- }
+ _rpmalloc_stat_add64(&heap->thread_to_global, span_cache->count * (iclass + 1) * _memory_span_size);
+ _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global, span_cache->count);
+ _rpmalloc_global_cache_insert_spans(span_cache->span, iclass + 1, span_cache->count);
#else
- if (span)
- _rpmalloc_span_list_unmap_all(span);
+ for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
+ _rpmalloc_span_unmap(span_cache->span[ispan]);
#endif
- heap->span_cache[iclass] = 0;
+ span_cache->count = 0;
}
#endif