wip on stats
diff --git a/rpmalloc/rpmalloc.c b/rpmalloc/rpmalloc.c
index 693927d..9dd799e 100644
--- a/rpmalloc/rpmalloc.c
+++ b/rpmalloc/rpmalloc.c
@@ -43,11 +43,11 @@
#endif
#ifndef ENABLE_THREAD_CACHE
//! Enable per-thread cache
-#define ENABLE_THREAD_CACHE 1
+#define ENABLE_THREAD_CACHE 0
#endif
#ifndef ENABLE_GLOBAL_CACHE
//! Enable global cache shared between all threads, requires thread cache
-#define ENABLE_GLOBAL_CACHE 1
+#define ENABLE_GLOBAL_CACHE 0
#endif
#if ENABLE_THREAD_CACHE
@@ -261,6 +261,18 @@
uint16_t block_count;
};
+#if ENABLE_STATISTICS
+
+//! Counter for current/max tracking
+struct stat_t {
+ atomicsize_t current;
+ atomicsize_t max;
+};
+
+typedef struct stat_t stat_t;
+
+#endif
+
///
/// Validation
///
@@ -321,13 +333,19 @@
static size_t atomicsize_incr(atomicsize_t* src) { return _InterlockedIncrement((volatile LONG*)src); }
#endif
static void atomicsize_store(atomicsize_t* dst, size_t val) { *dst = val; }
+#if ENABLE_STATISTICS
+static size_t atomicsize_load(atomicsize_t* src) { return *src; }
+static void atomicsize_store_release(atomicsize_t* dst, size_t val) { *dst = val; }
+static size_t atomicsize_add_acquire(atomicsize_t* dst, size_t val) { return InterlockedAdd64((volatile LONG64*)dst, (LONG64)val); }
+static size_t atomicsize_sub(atomicsize_t* dst, size_t val) { return InterlockedAdd64((volatile LONG64*)dst, -(LONG64)val); }
+#endif
static void* atomicptr_load(atomicptr_t* src) { return (void*)*src; }
static void atomicptr_store(atomicptr_t* dst, void* val) { *dst = val; }
static void atomicptr_store_release(atomicptr_t* dst, void* val) { *dst = val; }
static int atomicptr_cas(atomicptr_t* dst, void* val, void* ref) { return (_InterlockedCompareExchangePointer((void* volatile*)dst, val, ref) == ref) ? 1 : 0; }
-static int atomicptr_cas_acquire(atomicptr_t* dst, void* val, void* ref) { return atomicptr_cas(dst, val, ref); }
static void* atomicptr_exchange(atomicptr_t* dst, void* val) { return InterlockedExchangePointer((void* volatile*)dst, val); }
+static void* atomicptr_exchange_acquire(atomicptr_t* dst, void* val) { return InterlockedExchangePointer((void* volatile*)dst, val); }
#else
@@ -338,8 +356,8 @@
static void atomicptr_store(atomicptr_t* dst, void* val) { atomic_store_explicit(dst, val, memory_order_relaxed); }
static void atomicptr_store_release(atomicptr_t* dst, void* val) { atomic_store_explicit(dst, val, memory_order_release); }
static int atomicptr_cas(atomicptr_t* dst, void* val, void* ref) { return atomic_compare_exchange_weak_explicit(dst, &ref, val, memory_order_relaxed, memory_order_relaxed); }
-static int atomicptr_cas_acquire(atomicptr_t* dst, void* val, void* ref) { return atomic_compare_exchange_weak_explicit(dst, &ref, val, memory_order_acquire, memory_order_relaxed); }
static void* atomicptr_exchange(atomicptr_t* dst, void* val) { return atomic_exchange_explicit(dst, val, memory_order_relaxed); }
+static void* atomicptr_exchange_acquire(atomicptr_t* dst, void* val) { return atomic_exchange_explicit(dst, val, memory_order_acquire); }
#endif
@@ -356,6 +374,23 @@
# define CHECK_NOT_NULL(x) 1
#endif
+#if ENABLE_STATISTICS
+
+static void
+stat_add(stat_t* stat, size_t size) {
+ size_t current = atomicsize_add_acquire(&stat->current, size);
+ size_t max = atomicsize_load(&stat->max);
+ if (current > max)
+ atomicsize_store_release(&stat->max, current);
+}
+
+static void
+stat_sub(stat_t* stat, size_t size) {
+ atomicsize_sub(&stat->current, size);
+}
+
+#endif
+
///
/// Global data
///
@@ -392,6 +427,11 @@
static atomicsize_t global_cache_counter;
#endif
+#if ENABLE_STATISTICS
+//! Mapped memory
+static stat_t stat_mmap;
+#endif
+
///
/// Forward declarations
///
@@ -423,10 +463,11 @@
size_t granularity = (use_huge_pages && (os_mmap_granularity < os_huge_page_size)) ? os_huge_page_size : os_mmap_granularity;
//Either size is a heap (a single memory page), a chunk or a huge block - we only need to align chunks and huge blocks to span granularity, and only if larger than mmap granularity
size_t padding = ((size >= CHUNK_SIZE) && (SPAN_SIZE > granularity)) ? SPAN_SIZE : 0;
+ size_t total = size + padding;
rpmalloc_assert(size >= os_page_size);
#ifdef _WIN32
//Ok to MEM_COMMIT - according to MSDN, "actual physical pages are not allocated unless/until the virtual addresses are actually accessed"
- void* ptr = VirtualAlloc(0, size + padding, (use_huge_pages ? MEM_LARGE_PAGES : 0) | MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
+ void* ptr = VirtualAlloc(0, total, (use_huge_pages ? MEM_LARGE_PAGES : 0) | MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
if (!ptr) {
rpmalloc_assert_fail_return("Failed to map virtual memory block", 0);
}
@@ -436,11 +477,11 @@
int fd = (int)VM_MAKE_TAG(240U);
if (use_huge_pages)
fd |= VM_FLAGS_SUPERPAGE_SIZE_2MB;
- void* ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, fd, 0);
+ void* ptr = mmap(0, total, PROT_READ | PROT_WRITE, flags, fd, 0);
# elif defined(MAP_HUGETLB)
- void* ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, (use_huge_pages ? MAP_HUGETLB : 0) | flags, -1, 0);
+ void* ptr = mmap(0, total, PROT_READ | PROT_WRITE, (use_huge_pages ? MAP_HUGETLB : 0) | flags, -1, 0);
# else
- void* ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, -1, 0);
+ void* ptr = mmap(0, total, PROT_READ | PROT_WRITE, flags, -1, 0);
# endif
if ((ptr == MAP_FAILED) || !ptr) {
rpmalloc_assert_fail("Failed to map virtual memory block");
@@ -455,6 +496,7 @@
ptr = pointer_offset(ptr, final_padding);
*offset = final_padding >> 3;
}
+ stat_add(&stat_mmap, total);
rpmalloc_assert((size < SPAN_MASK) || !((uintptr_t)ptr & ~SPAN_MASK));
return ptr;
}
@@ -465,8 +507,11 @@
rpmalloc_assert(release || (offset == 0));
rpmalloc_assert(!release || (release >= size));
rpmalloc_assert(size >= os_page_size);
+ //We assume huge pages are aligned to addresses which are a multiple of huge page size
+ int use_huge_pages = (os_huge_pages && (size >= os_huge_page_size));
+ size_t granularity = (use_huge_pages && (os_mmap_granularity < os_huge_page_size)) ? os_huge_page_size : os_mmap_granularity;
//Padding is always one span size
- if (release && (size >= CHUNK_SIZE))
+ if (release && (size >= CHUNK_SIZE) && (SPAN_SIZE > granularity))
release += SPAN_SIZE;
if (release && offset) {
offset <<= 3;
@@ -487,6 +532,7 @@
rpmalloc_assert_fail("Failed to madvise virtual memory block as free");
}
#endif
+ stat_sub(&stat_mmap, release);
}
///
@@ -528,13 +574,11 @@
rpmalloc_global_cache_clear(void) {
void* cache = atomicptr_exchange(&global_cache, 0);
uintptr_t chunkptr = (uintptr_t)cache & ~(ABA_SIZE - 1);
- if (chunkptr) {
- chunk_t* chunk = (chunk_t*)chunkptr;
- while (chunk) {
- chunk_t* next = chunk->next;
- rpmalloc_unmap(chunk, chunk->mapped_size, chunk->mapped_offset, chunk->mapped_size);
- chunk = next;
- }
+ chunk_t* chunk = (chunk_t*)chunkptr;
+ while (chunk) {
+ chunk_t* next = chunk->next;
+ rpmalloc_unmap(chunk, chunk->mapped_size, chunk->mapped_offset, chunk->mapped_size);
+ chunk = next;
}
}
@@ -859,8 +903,8 @@
rpmalloc_span_adopt_deferred_free(span_t* span) {
// We need acquire semantics on the CAS operation since we are interested in the list size
do {
- span->free = atomicptr_load(&span->free_defer);
- } while ((span->free == INVALID_POINTER) || !atomicptr_cas_acquire(&span->free_defer, INVALID_POINTER, span->free));
+ span->free = atomicptr_exchange_acquire(&span->free_defer, INVALID_POINTER);
+ } while (span->free == INVALID_POINTER);
span->used_count -= span->defer_size;
span->defer_size = 0;
atomicptr_store_release(&span->free_defer, 0);
@@ -900,9 +944,9 @@
// guarantee the list_size variable validity + release semantics on pointer store
void* free_list;
do {
- free_list = atomicptr_load(&span->free_defer);
- *((void**)block) = free_list;
- } while ((free_list == INVALID_POINTER) || !atomicptr_cas_acquire(&span->free_defer, INVALID_POINTER, free_list));
+ free_list = atomicptr_exchange_acquire(&span->free_defer, INVALID_POINTER);
+ } while (free_list == INVALID_POINTER);
+ *((void**)block) = free_list;
uint32_t free_count = ++span->defer_size;
atomicptr_store_release(&span->free_defer, block);
if (free_count == span->block_count) {
@@ -1730,7 +1774,12 @@
extern void
rpmalloc_finalize(void) {
rpmalloc_thread_finalize();
+#if ENABLE_GLOBAL_CACHE
rpmalloc_global_cache_clear();
+#endif
+#if ENABLE_STATISTICS
+ rpmalloc_assert(atomicsize_load(&stat_mmap.current) == 0);
+#endif
}
extern inline RPMALLOC_ALLOCATOR void*