Merge develop for 1.4.3 release
diff --git a/CHANGELOG b/CHANGELOG
index cfc9e73..58d505c 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,15 @@
+1.4.3
+
+Fixed an issue where certain combinations of memory page size and span map counts could cause
+a deadlock in the mapping of new memory pages.
+
+Tweaked cache levels and avoid setting spans as reserved in a heap when the heap already has
+spans in the thread cache to improve cache usage.
+
+Prefer flags to more actively evict physical pages in madvise calls when partially unmapping
+span ranges on POSIX systems.
+
+
1.4.2
Fixed an issue where calling _exit might hang the main thread cleanup in rpmalloc if another
@@ -204,7 +216,7 @@
Improve documentation and additional code comments
-Move benchmarks to separate repo, https://github.com/rampantpixels/rpmalloc-benchmark
+Move benchmarks to separate repo, https://github.com/mjansson/rpmalloc-benchmark
1.0
diff --git a/README.md b/README.md
index fcda26b..ec8a843 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
# rpmalloc - General Purpose Memory Allocator
This library provides a public domain cross platform lock free thread caching 16-byte aligned memory allocator implemented in C. The latest source code is always available at https://github.com/mjansson/rpmalloc
-Created by Mattias Jansson ([@maniccoder](https://twitter.com/maniccoder))
+Created by Mattias Jansson ([@maniccoder](https://twitter.com/maniccoder)) - Discord server for discussions at https://discord.gg/M8BwTQrt6c
Platforms currently supported:
diff --git a/build/ninja/msvc.py b/build/ninja/msvc.py
index 8288d94..9f9d13a 100644
--- a/build/ninja/msvc.py
+++ b/build/ninja/msvc.py
@@ -31,8 +31,8 @@
self.linkcmd = '$toolchain$link $libpaths $configlibpaths $linkflags $linkarchflags $linkconfigflags /DEBUG /NOLOGO /SUBSYSTEM:CONSOLE /DYNAMICBASE /NXCOMPAT /MANIFEST /MANIFESTUAC:\"level=\'asInvoker\' uiAccess=\'false\'\" /TLBID:1 /PDB:$pdbpath /OUT:$out $in $libs $archlibs $oslibs'
self.dllcmd = self.linkcmd + ' /DLL'
- self.cflags = ['/D', '"' + project.upper() + '_COMPILE=1"', '/D', '"_UNICODE"', '/D', '"UNICODE"', '/Zi', '/Oi', '/Oy-', '/GS-', '/Gy-', '/Qpar-', '/fp:fast', '/fp:except-', '/Zc:forScope', '/Zc:wchar_t', '/GR-', '/openmp-']
- self.cwarnflags = ['/W4', '/WX']
+ self.cflags = ['/D', '"' + project.upper() + '_COMPILE=1"', '/D', '"_UNICODE"', '/D', '"UNICODE"', '/std:c17', '/Zi', '/Oi', '/Oy-', '/GS-', '/Gy-', '/Qpar-', '/fp:fast', '/fp:except-', '/Zc:forScope', '/Zc:wchar_t', '/GR-', '/openmp-']
+ self.cwarnflags = ['/W4', '/WX', '/wd4201'] #Ignore nameless union/struct which is allowed in C11
self.cmoreflags = []
self.arflags = ['/ignore:4221'] #Ignore empty object file warning]
self.linkflags = ['/DEBUG']
@@ -138,10 +138,11 @@
tools_list.sort(key=StrictVersion)
self.toolchain = os.path.join(tools_basepath, tools_list[-1])
self.toolchain_version = major_version + ".0"
+ break
if self.toolchain == '':
toolchain = ''
- versions = ['16.0', '15.0', '14.0', '13.0', '12.0', '11.0', '10.0']
+ versions = ['17.0', '16.0', '15.0']
keys = [
'HKLM\\SOFTWARE\\Microsoft\\VisualStudio\\SxS\\VC7',
'HKCU\\SOFTWARE\\Microsoft\\VisualStudio\\SxS\\VC7',
@@ -161,17 +162,18 @@
except:
continue
if not toolchain == '':
- if version == '15.0' or version == '16.0':
- tools_basepath = os.path.join(toolchain, 'VC', 'Tools', 'MSVC')
- tools_list = [item for item in os.listdir(tools_basepath) if os.path.isdir(os.path.join(tools_basepath, item))]
- from distutils.version import StrictVersion
- tools_list.sort(key=StrictVersion)
- toolchain = os.path.join(tools_basepath, tools_list[-1])
+ tools_basepath = os.path.join(toolchain, 'VC', 'Tools', 'MSVC')
+ tools_list = [item for item in os.listdir(tools_basepath) if os.path.isdir(os.path.join(tools_basepath, item))]
+ from distutils.version import StrictVersion
+ tools_list.sort(key=StrictVersion)
+ toolchain = os.path.join(tools_basepath, tools_list[-1])
self.toolchain = toolchain
self.toolchain_version = version
break
if not self.toolchain == '':
break
+ if self.toolchain == '':
+ raise Exception("Unable to locate any installed Visual Studio toolchain")
self.includepaths += [os.path.join(self.toolchain, 'include')]
if self.sdkpath == '':
versions = ['v10.0', 'v8.1']
@@ -237,13 +239,10 @@
return []
def make_arch_toolchain_path(self, arch):
- if self.toolchain_version == '15.0' or self.toolchain_version == '16.0':
- if arch == 'x86-64':
- return os.path.join(self.toolchain, 'bin', 'HostX64', 'x64\\')
- elif arch == 'x86':
- return os.path.join(self.toolchain, 'bin', 'HostX64', 'x86\\')
if arch == 'x86-64':
- return os.path.join(self.toolchain, 'bin', 'amd64\\')
+ return os.path.join(self.toolchain, 'bin', 'HostX64', 'x64\\')
+ elif arch == 'x86':
+ return os.path.join(self.toolchain, 'bin', 'HostX64', 'x86\\')
return os.path.join(self.toolchain, 'bin\\')
def make_carchflags(self, arch, targettype):
@@ -321,20 +320,14 @@
libpaths += [os.path.join(libpath, self.libpath, config, arch) for libpath in extralibpaths]
if self.sdkpath != '':
if arch == 'x86':
- if self.toolchain_version == '15.0' or self.toolchain_version == '16.0':
- libpaths += [os.path.join(self.toolchain, 'lib', 'x86')]
- else:
- libpaths += [os.path.join(self.toolchain, 'lib')]
+ libpaths += [os.path.join(self.toolchain, 'lib', 'x86')]
if self.sdkversion == 'v8.1':
libpaths += [os.path.join( self.sdkpath, 'lib', 'winv6.3', 'um', 'x86')]
if self.sdkversion == 'v10.0':
libpaths += [os.path.join(self.sdkpath, 'lib', self.sdkversionpath, 'um', 'x86')]
libpaths += [os.path.join(self.sdkpath, 'lib', self.sdkversionpath, 'ucrt', 'x86')]
else:
- if self.toolchain_version == '15.0' or self.toolchain_version == '16.0':
- libpaths += [os.path.join( self.toolchain, 'lib', 'x64')]
- else:
- libpaths += [os.path.join( self.toolchain, 'lib', 'amd64')]
+ libpaths += [os.path.join( self.toolchain, 'lib', 'x64')]
if self.sdkversion == 'v8.1':
libpaths += [os.path.join( self.sdkpath, 'lib', 'winv6.3', 'um', 'x64')]
if self.sdkversion == 'v10.0':
diff --git a/build/ninja/version.py b/build/ninja/version.py
index 1bf086a..78be895 100644
--- a/build/ninja/version.py
+++ b/build/ninja/version.py
@@ -15,7 +15,7 @@
if sys.platform.startswith('win'):
gitcmd = 'git.exe'
try:
- git_version = subprocess.check_output( [ gitcmd, 'describe', '--long' ], stderr = subprocess.STDOUT ).strip()
+ git_version = subprocess.check_output( [ gitcmd, 'describe', '--tags', '--long' ], stderr = subprocess.STDOUT ).strip()
tokens = git_version.decode().split( '-' )
version_numbers = tokens[0].split( '.' )
except Exception:
diff --git a/build/ninja/vslocate.py b/build/ninja/vslocate.py
index 4ec7fcf..afa171a 100644
--- a/build/ninja/vslocate.py
+++ b/build/ninja/vslocate.py
@@ -88,8 +88,15 @@
ctypes.POINTER(ctypes.POINTER(ISetupConfiguration)),
ctypes.c_void_p)
+ installations = []
+ dll = None
+
dll_path = os.path.expandvars("$ProgramData\\Microsoft\\VisualStudio\\Setup\\x64\\Microsoft.VisualStudio.Setup.Configuration.Native.dll")
- dll = ctypes.WinDLL(dll_path)
+ try:
+ dll = ctypes.WinDLL(dll_path)
+ except OSError as e:
+ #print("Failed to load Visual Studio setup configuration DLL: " + str(e))
+ return installations
params_get_setup_configuration = (1, "configuration", 0), (1, "reserved", 0),
@@ -98,8 +105,6 @@
configuration = ctypes.POINTER(ISetupConfiguration)()
reserved = ctypes.c_void_p(0)
- installations = []
-
result = get_setup_configuration(ctypes.byref(configuration), reserved)
if result != 0:
#print("Failed to get setup configuration: " + str(result))
@@ -110,7 +115,7 @@
enum_setup_instances = ctypes.POINTER(IEnumSetupInstances)()
result = enum_instances(configuration, ctypes.byref(enum_setup_instances))
if result != 0:
- #print("Failed to enum setup instances: " + str(result))
+ #print("Failed to enum setup instances: " + str(result))
return installations
diff --git a/configure.py b/configure.py
index 514189e..dc1b78e 100755
--- a/configure.py
+++ b/configure.py
@@ -9,7 +9,7 @@
import generator
-generator = generator.Generator(project = 'rpmalloc', variables = [('bundleidentifier', 'com.rampantpixels.rpmalloc.$(binname)')])
+generator = generator.Generator(project = 'rpmalloc', variables = [('bundleidentifier', 'com.maniccoder.rpmalloc.$(binname)')])
rpmalloc_lib = generator.lib(module = 'rpmalloc', libname = 'rpmalloc', sources = ['rpmalloc.c'])
rpmalloc_test_lib = generator.lib(module = 'rpmalloc', libname = 'rpmalloc-test', sources = ['rpmalloc.c'], variables = {'defines': ['ENABLE_ASSERTS=1', 'ENABLE_STATISTICS=1', 'RPMALLOC_FIRST_CLASS_HEAPS=1', 'RPMALLOC_CONFIGURABLE=1']})
diff --git a/rpmalloc/rpmalloc.c b/rpmalloc/rpmalloc.c
index 5186f61..f061cb4 100644
--- a/rpmalloc/rpmalloc.c
+++ b/rpmalloc/rpmalloc.c
@@ -20,6 +20,9 @@
#if defined(__clang__)
#pragma clang diagnostic ignored "-Wunused-macros"
#pragma clang diagnostic ignored "-Wunused-function"
+#if __has_warning("-Wreserved-identifier")
+#pragma clang diagnostic ignored "-Wreserved-identifier"
+#endif
#elif defined(__GNUC__)
#pragma GCC diagnostic ignored "-Wunused-macros"
#pragma GCC diagnostic ignored "-Wunused-function"
@@ -107,6 +110,7 @@
/// Platform and arch specifics
#if defined(_MSC_VER) && !defined(__clang__)
+# pragma warning (disable: 5105)
# ifndef FORCEINLINE
# define FORCEINLINE inline __forceinline
# endif
@@ -320,11 +324,11 @@
//! Size of a span header (must be a multiple of SMALL_GRANULARITY and a power of two)
#define SPAN_HEADER_SIZE 128
//! Number of spans in thread cache
-#define MAX_THREAD_SPAN_CACHE 256
+#define MAX_THREAD_SPAN_CACHE 400
//! Number of spans to transfer between thread and global cache
#define THREAD_SPAN_CACHE_TRANSFER 64
//! Number of spans in thread cache for large spans (must be greater than LARGE_CLASS_COUNT / 2)
-#define MAX_THREAD_SPAN_LARGE_CACHE 64
+#define MAX_THREAD_SPAN_LARGE_CACHE 100
//! Number of spans to transfer between thread and global cache for large spans
#define THREAD_SPAN_LARGE_CACHE_TRANSFER 6
@@ -570,6 +574,12 @@
atomic32_t lock;
//! Cache count
uint32_t count;
+#if ENABLE_STATISTICS
+ //! Insert count
+ size_t insert_count;
+ //! Extract count
+ size_t extract_count;
+#endif
//! Cached spans
span_t* span[GLOBAL_CACHE_MULTIPLIER * MAX_THREAD_SPAN_CACHE];
//! Unlimited cache overflow
@@ -614,10 +624,8 @@
#endif
//! Number of spans to map in each map call
static size_t _memory_span_map_count;
-//! Number of spans to release from thread cache to global cache (single spans)
-static size_t _memory_span_release_count;
-//! Number of spans to release from thread cache to global cache (large multiple spans)
-static size_t _memory_span_release_count_large;
+//! Number of spans to keep reserved in each heap
+static size_t _memory_heap_reserve_count;
//! Global size classes
static size_class_t _memory_size_class[SIZE_CLASS_COUNT];
//! Run-time size limit of medium blocks
@@ -661,8 +669,6 @@
static atomic32_t _master_spans;
//! Number of unmapped dangling master spans
static atomic32_t _unmapped_master_spans;
-//! Number of currently unused spans
-static atomic32_t _reserved_spans;
//! Running counter of total number of mapped memory pages since start
static atomic32_t _mapped_total;
//! Running counter of total number of unmapped memory pages since start
@@ -858,7 +864,12 @@
//Ok to MEM_COMMIT - according to MSDN, "actual physical pages are not allocated unless/until the virtual addresses are actually accessed"
void* ptr = VirtualAlloc(0, size + padding, (_memory_huge_pages ? MEM_LARGE_PAGES : 0) | MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
if (!ptr) {
- rpmalloc_assert(ptr, "Failed to map virtual memory block");
+ if (_memory_config.map_fail_callback) {
+ if (_memory_config.map_fail_callback(size + padding))
+ return _rpmalloc_mmap_os(size, offset);
+ } else {
+ rpmalloc_assert(ptr, "Failed to map virtual memory block");
+ }
return 0;
}
#else
@@ -880,8 +891,12 @@
void* ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, -1, 0);
# endif
if ((ptr == MAP_FAILED) || !ptr) {
- if (errno != ENOMEM)
+ if (_memory_config.map_fail_callback) {
+ if (_memory_config.map_fail_callback(size + padding))
+ return _rpmalloc_mmap_os(size, offset);
+ } else if (errno != ENOMEM) {
rpmalloc_assert((ptr != MAP_FAILED) && ptr, "Failed to map virtual memory block");
+ }
return 0;
}
#endif
@@ -927,12 +942,13 @@
int ret;
while ((ret = madvise(address, size, MADV_FREE_REUSABLE)) == -1 && (errno == EAGAIN))
errno = 0;
- if ((ret == -1) && (errno != 0))
-#elif defined(MADV_FREE)
- if (madvise(address, size, MADV_FREE))
-#endif
-#if defined(MADV_DONTNEED)
+ if ((ret == -1) && (errno != 0)) {
+#elif defined(MADV_DONTNEED)
if (madvise(address, size, MADV_DONTNEED)) {
+#elif defined(MADV_PAGEOUT)
+ if (madvise(address, size, MADV_PAGEOUT)) {
+#elif defined(MADV_FREE)
+ if (madvise(address, size, MADV_FREE)) {
#else
if (posix_madvise(address, size, POSIX_MADV_DONTNEED)) {
#endif
@@ -1084,7 +1100,6 @@
if (!span)
return 0;
_rpmalloc_span_initialize(span, aligned_span_count, span_count, align_offset);
- _rpmalloc_stat_add(&_reserved_spans, aligned_span_count);
_rpmalloc_stat_inc(&_master_spans);
if (span_count <= LARGE_CLASS_COUNT)
_rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_map_calls);
@@ -1095,19 +1110,17 @@
_rpmalloc_span_mark_as_subspan_unless_master(heap->span_reserve_master, heap->span_reserve, heap->spans_reserved);
_rpmalloc_heap_cache_insert(heap, heap->span_reserve);
}
- if (reserved_count > DEFAULT_SPAN_MAP_COUNT) {
- // If huge pages, make sure only one thread maps more memory to avoid bloat
- while (!atomic_cas32_acquire(&_memory_global_lock, 1, 0))
- _rpmalloc_spin();
- size_t remain_count = reserved_count - DEFAULT_SPAN_MAP_COUNT;
- reserved_count = DEFAULT_SPAN_MAP_COUNT;
+ if (reserved_count > _memory_heap_reserve_count) {
+ // If huge pages or eager spam map count, the global reserve spin lock is held by caller, _rpmalloc_span_map
+ rpmalloc_assert(atomic_load32(&_memory_global_lock) == 1, "Global spin lock not held as expected");
+ size_t remain_count = reserved_count - _memory_heap_reserve_count;
+ reserved_count = _memory_heap_reserve_count;
span_t* remain_span = (span_t*)pointer_offset(reserved_spans, reserved_count * _memory_span_size);
if (_memory_global_reserve) {
_rpmalloc_span_mark_as_subspan_unless_master(_memory_global_reserve_master, _memory_global_reserve, _memory_global_reserve_count);
_rpmalloc_span_unmap(_memory_global_reserve);
}
_rpmalloc_global_set_reserved_spans(span, remain_span, remain_count);
- atomic_store32_release(&_memory_global_lock, 0);
}
_rpmalloc_heap_set_reserved_spans(heap, span, reserved_spans, reserved_count);
}
@@ -1120,12 +1133,13 @@
if (span_count <= heap->spans_reserved)
return _rpmalloc_span_map_from_reserve(heap, span_count);
span_t* span = 0;
- if (_memory_page_size > _memory_span_size) {
+ int use_global_reserve = (_memory_page_size > _memory_span_size) || (_memory_span_map_count > _memory_heap_reserve_count);
+ if (use_global_reserve) {
// If huge pages, make sure only one thread maps more memory to avoid bloat
while (!atomic_cas32_acquire(&_memory_global_lock, 1, 0))
_rpmalloc_spin();
if (_memory_global_reserve_count >= span_count) {
- size_t reserve_count = (!heap->spans_reserved ? DEFAULT_SPAN_MAP_COUNT : span_count);
+ size_t reserve_count = (!heap->spans_reserved ? _memory_heap_reserve_count : span_count);
if (_memory_global_reserve_count < reserve_count)
reserve_count = _memory_global_reserve_count;
span = _rpmalloc_global_get_reserved_spans(reserve_count);
@@ -1141,7 +1155,7 @@
}
if (!span)
span = _rpmalloc_span_map_aligned_count(heap, span_count);
- if (_memory_page_size > _memory_span_size)
+ if (use_global_reserve)
atomic_store32_release(&_memory_global_lock, 0);
return span;
}
@@ -1161,10 +1175,8 @@
if (!is_master) {
//Directly unmap subspans (unless huge pages, in which case we defer and unmap entire page range with master)
rpmalloc_assert(span->align_offset == 0, "Span align offset corrupted");
- if (_memory_span_size >= _memory_page_size) {
+ if (_memory_span_size >= _memory_page_size)
_rpmalloc_unmap(span, span_count * _memory_span_size, 0, 0);
- _rpmalloc_stat_sub(&_reserved_spans, span_count);
- }
} else {
//Special double flag to denote an unmapped master
//It must be kept in memory since span header must be used
@@ -1178,7 +1190,6 @@
size_t unmap_count = master->span_count;
if (_memory_span_size < _memory_page_size)
unmap_count = master->total_spans;
- _rpmalloc_stat_sub(&_reserved_spans, unmap_count);
_rpmalloc_stat_sub(&_master_spans, 1);
_rpmalloc_stat_sub(&_unmapped_master_spans, 1);
_rpmalloc_unmap(master, unmap_count * _memory_span_size, master->align_offset, (size_t)master->total_spans * _memory_span_size);
@@ -1190,6 +1201,7 @@
_rpmalloc_span_release_to_cache(heap_t* heap, span_t* span) {
rpmalloc_assert(heap == span->heap, "Span heap pointer corrupted");
rpmalloc_assert(span->size_class < SIZE_CLASS_COUNT, "Invalid span size class");
+ rpmalloc_assert(span->span_count == 1, "Invalid span count");
#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
atomic_decr32(&heap->span_use[0].current);
#endif
@@ -1366,6 +1378,9 @@
while (!atomic_cas32_acquire(&cache->lock, 1, 0))
_rpmalloc_spin();
+#if ENABLE_STATISTICS
+ cache->insert_count += count;
+#endif
if ((cache->count + insert_count) > cache_limit)
insert_count = cache_limit - cache->count;
@@ -1438,6 +1453,9 @@
while (!atomic_cas32_acquire(&cache->lock, 1, 0))
_rpmalloc_spin();
+#if ENABLE_STATISTICS
+ cache->extract_count += count;
+#endif
size_t want = count - extract_count;
if (want > cache->count)
want = cache->count;
@@ -1452,6 +1470,12 @@
cache->overflow = current_span->next;
}
+#if ENABLE_ASSERTS
+ for (size_t ispan = 0; ispan < extract_count; ++ispan) {
+ assert(span[ispan]->span_count == span_count);
+ }
+#endif
+
atomic_store32_release(&cache->lock, 0);
return extract_count;
@@ -1830,7 +1854,6 @@
return 0;
// Master span will contain the heaps
- _rpmalloc_stat_add(&_reserved_spans, span_count);
_rpmalloc_stat_inc(&_master_spans);
_rpmalloc_span_initialize(span, span_count, heap_span_count, align_offset);
}
@@ -1856,7 +1879,7 @@
if (span_count > heap_span_count) {
// Cap reserved spans
size_t remain_count = span_count - heap_span_count;
- size_t reserve_count = (remain_count > DEFAULT_SPAN_MAP_COUNT ? DEFAULT_SPAN_MAP_COUNT : remain_count);
+ size_t reserve_count = (remain_count > _memory_heap_reserve_count ? _memory_heap_reserve_count : remain_count);
span_t* remain_span = (span_t*)pointer_offset(span, heap_span_count * _memory_span_size);
_rpmalloc_heap_set_reserved_spans(heap, span, remain_span, reserve_count);
@@ -2392,7 +2415,7 @@
_rpmalloc_deallocate_defer_free_span(span->heap, span);
return;
}
- rpmalloc_assert(span->heap->full_span_count, "Heap spanc counter corrupted");
+ rpmalloc_assert(span->heap->full_span_count, "Heap span counter corrupted");
--span->heap->full_span_count;
#if RPMALLOC_FIRST_CLASS_HEAPS
_rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span);
@@ -2404,7 +2427,12 @@
#endif
heap_t* heap = span->heap;
rpmalloc_assert(heap, "No thread heap");
- if ((span->span_count > 1) && !heap->finalize && !heap->spans_reserved) {
+#if ENABLE_THREAD_CACHE
+ const int set_as_reserved = ((span->span_count > 1) && (heap->span_cache.count == 0) && !heap->finalize && !heap->spans_reserved);
+#else
+ const int set_as_reserved = ((span->span_count > 1) && !heap->finalize && !heap->spans_reserved);
+#endif
+ if (set_as_reserved) {
heap->span_reserve = span;
heap->spans_reserved = span->span_count;
if (span->flags & SPAN_FLAG_MASTER) {
@@ -2651,23 +2679,26 @@
_memory_config.memory_unmap = _rpmalloc_unmap_os;
}
+#if PLATFORM_WINDOWS
+ SYSTEM_INFO system_info;
+ memset(&system_info, 0, sizeof(system_info));
+ GetSystemInfo(&system_info);
+ _memory_map_granularity = system_info.dwAllocationGranularity;
+#else
+ _memory_map_granularity = (size_t)sysconf(_SC_PAGESIZE);
+#endif
+
#if RPMALLOC_CONFIGURABLE
_memory_page_size = _memory_config.page_size;
#else
_memory_page_size = 0;
#endif
_memory_huge_pages = 0;
- _memory_map_granularity = _memory_page_size;
if (!_memory_page_size) {
#if PLATFORM_WINDOWS
- SYSTEM_INFO system_info;
- memset(&system_info, 0, sizeof(system_info));
- GetSystemInfo(&system_info);
_memory_page_size = system_info.dwPageSize;
- _memory_map_granularity = system_info.dwAllocationGranularity;
#else
- _memory_page_size = (size_t)sysconf(_SC_PAGESIZE);
- _memory_map_granularity = _memory_page_size;
+ _memory_page_size = _memory_map_granularity;
if (_memory_config.enable_huge_pages) {
#if defined(__linux__)
size_t huge_page_size = 0;
@@ -2722,18 +2753,18 @@
token_privileges.Privileges[0].Luid = luid;
token_privileges.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
if (AdjustTokenPrivileges(token, FALSE, &token_privileges, 0, 0, 0)) {
- DWORD err = GetLastError();
- if (err == ERROR_SUCCESS) {
+ if (GetLastError() == ERROR_SUCCESS)
_memory_huge_pages = 1;
- if (large_page_minimum > _memory_page_size)
- _memory_page_size = large_page_minimum;
- if (large_page_minimum > _memory_map_granularity)
- _memory_map_granularity = large_page_minimum;
- }
}
}
CloseHandle(token);
}
+ if (_memory_huge_pages) {
+ if (large_page_minimum > _memory_page_size)
+ _memory_page_size = large_page_minimum;
+ if (large_page_minimum > _memory_map_granularity)
+ _memory_map_granularity = large_page_minimum;
+ }
}
#endif
@@ -2780,15 +2811,13 @@
_memory_span_map_count = (_memory_page_size / _memory_span_size);
if ((_memory_page_size >= _memory_span_size) && ((_memory_span_map_count * _memory_span_size) % _memory_page_size))
_memory_span_map_count = (_memory_page_size / _memory_span_size);
+ _memory_heap_reserve_count = (_memory_span_map_count > DEFAULT_SPAN_MAP_COUNT) ? DEFAULT_SPAN_MAP_COUNT : _memory_span_map_count;
_memory_config.page_size = _memory_page_size;
_memory_config.span_size = _memory_span_size;
_memory_config.span_map_count = _memory_span_map_count;
_memory_config.enable_huge_pages = _memory_huge_pages;
- _memory_span_release_count = (_memory_span_map_count > 4 ? ((_memory_span_map_count < 64) ? _memory_span_map_count : 64) : 4);
- _memory_span_release_count_large = (_memory_span_release_count > 8 ? (_memory_span_release_count / 4) : 2);
-
#if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
if (pthread_key_create(&_memory_thread_heap, _rpmalloc_heap_release_raw_fc))
return -1;
@@ -2827,7 +2856,6 @@
atomic_store32(&_mapped_pages, 0);
_mapped_pages_peak = 0;
atomic_store32(&_master_spans, 0);
- atomic_store32(&_reserved_spans, 0);
atomic_store32(&_mapped_total, 0);
atomic_store32(&_unmapped_total, 0);
atomic_store32(&_mapped_pages_os, 0);
@@ -2883,7 +2911,6 @@
#if ENABLE_STATISTICS
//If you hit these asserts you probably have memory leaks (perhaps global scope data doing dynamic allocations) or double frees in your code
rpmalloc_assert(atomic_load32(&_mapped_pages) == 0, "Memory leak detected");
- rpmalloc_assert(atomic_load32(&_reserved_spans) == 0, "Memory leak detected");
rpmalloc_assert(atomic_load32(&_mapped_pages_os) == 0, "Memory leak detected");
#endif
@@ -3221,34 +3248,33 @@
fprintf(file, "HugeCurrentMiB HugePeakMiB\n");
fprintf(file, "%14zu %11zu\n", huge_current / (size_t)(1024 * 1024), huge_peak / (size_t)(1024 * 1024));
- size_t global_cache = 0;
+ fprintf(file, "GlobalCacheMiB\n");
for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
global_cache_t* cache = _memory_span_cache + iclass;
- global_cache += (size_t)cache->count * iclass * _memory_span_size;
+ size_t global_cache = (size_t)cache->count * iclass * _memory_span_size;
+ size_t global_overflow_cache = 0;
span_t* span = cache->overflow;
while (span) {
- global_cache += iclass * _memory_span_size;
+ global_overflow_cache += iclass * _memory_span_size;
span = span->next;
}
+ if (global_cache || global_overflow_cache || cache->insert_count || cache->extract_count)
+ fprintf(file, "%4zu: %8zuMiB (%8zuMiB overflow) %14zu insert %14zu extract\n", iclass + 1, global_cache / (size_t)(1024 * 1024), global_overflow_cache / (size_t)(1024 * 1024), cache->insert_count, cache->extract_count);
}
- fprintf(file, "GlobalCacheMiB\n");
- fprintf(file, "%14zu\n", global_cache / (size_t)(1024 * 1024));
size_t mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size;
size_t mapped_os = (size_t)atomic_load32(&_mapped_pages_os) * _memory_page_size;
size_t mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size;
size_t mapped_total = (size_t)atomic_load32(&_mapped_total) * _memory_page_size;
size_t unmapped_total = (size_t)atomic_load32(&_unmapped_total) * _memory_page_size;
- size_t reserved_total = (size_t)atomic_load32(&_reserved_spans) * _memory_span_size;
- fprintf(file, "MappedMiB MappedOSMiB MappedPeakMiB MappedTotalMiB UnmappedTotalMiB ReservedTotalMiB\n");
- fprintf(file, "%9zu %11zu %13zu %14zu %16zu %16zu\n",
+ fprintf(file, "MappedMiB MappedOSMiB MappedPeakMiB MappedTotalMiB UnmappedTotalMiB\n");
+ fprintf(file, "%9zu %11zu %13zu %14zu %16zu\n",
mapped / (size_t)(1024 * 1024),
mapped_os / (size_t)(1024 * 1024),
mapped_peak / (size_t)(1024 * 1024),
mapped_total / (size_t)(1024 * 1024),
- unmapped_total / (size_t)(1024 * 1024),
- reserved_total / (size_t)(1024 * 1024));
+ unmapped_total / (size_t)(1024 * 1024));
fprintf(file, "\n");
#if 0
diff --git a/rpmalloc/rpmalloc.h b/rpmalloc/rpmalloc.h
index b1fa757..f3363f2 100644
--- a/rpmalloc/rpmalloc.h
+++ b/rpmalloc/rpmalloc.h
@@ -156,6 +156,12 @@
//! Called when an assert fails, if asserts are enabled. Will use the standard assert()
// if this is not set.
void (*error_callback)(const char* message);
+ //! Called when a call to map memory pages fails (out of memory). If this callback is
+ // not set or returns zero the library will return a null pointer in the allocation
+ // call. If this callback returns non-zero the map call will be retried. The argument
+ // passed is the number of bytes that was requested in the map call. Only used if
+ // the default system memory map function is used (memory_map callback is not set).
+ int (*map_fail_callback)(size_t size);
//! Size of memory pages. The page size MUST be a power of two. All memory mapping
// requests to memory_map will be made with size set to a multiple of the page size.
// Used if RPMALLOC_CONFIGURABLE is defined to 1, otherwise system page size is used.
diff --git a/test/main.c b/test/main.c
index 0a92ac7..a01e09c 100644
--- a/test/main.c
+++ b/test/main.c
@@ -2,6 +2,11 @@
#if defined(_WIN32) && !defined(_CRT_SECURE_NO_WARNINGS)
# define _CRT_SECURE_NO_WARNINGS
#endif
+#ifdef _MSC_VER
+# if !defined(__clang__)
+# pragma warning (disable: 5105)
+# endif
+#endif
#if defined(__clang__)
#pragma clang diagnostic ignored "-Wnonportable-system-include-path"
#endif
@@ -362,7 +367,7 @@
for (size_t iloop = 0; iloop < 8000; ++iloop) {
for (size_t iptr = 0; iptr < pointer_count; ++iptr) {
if (iloop)
- rpfree(rprealloc(pointers[iptr], rand() % 4096));
+ rpfree(rprealloc(pointers[iptr], (size_t)rand() % 4096));
pointers[iptr] = rpaligned_alloc(alignments[(iptr + iloop) % 5], iloop + iptr);
}
}
@@ -787,15 +792,13 @@
}
static int
-test_threaded(void) {
+test_thread_implementation(void) {
uintptr_t thread[32];
uintptr_t threadres[32];
unsigned int i;
size_t num_alloc_threads;
allocator_thread_arg_t arg;
- rpmalloc_initialize();
-
num_alloc_threads = _hardware_threads;
if (num_alloc_threads < 2)
num_alloc_threads = 2;
@@ -846,11 +849,23 @@
return -1;
}
- printf("Memory threaded tests passed\n");
-
return 0;
}
+static int
+test_threaded(void) {
+ rpmalloc_initialize();
+
+ int ret = test_thread_implementation();
+
+ rpmalloc_finalize();
+
+ if (ret == 0)
+ printf("Memory threaded tests passed\n");
+
+ return ret;
+}
+
static int
test_crossthread(void) {
uintptr_t thread[32];
@@ -917,10 +932,10 @@
for (unsigned int ithread = 0; ithread < num_alloc_threads; ++ithread)
rpfree(arg[ithread].pointers);
- rpmalloc_finalize();
-
printf("Memory cross thread free tests passed\n");
+ rpmalloc_finalize();
+
return 0;
}
@@ -1091,6 +1106,24 @@
return 0;
}
+static int
+test_large_pages(void) {
+ rpmalloc_config_t config = {0};
+ config.page_size = 16 * 1024 * 1024;
+ config.span_map_count = 16;
+
+ rpmalloc_initialize_config(&config);
+
+ int ret = test_thread_implementation();
+
+ rpmalloc_finalize();
+
+ if (ret == 0)
+ printf("Large page config test passed\n");
+
+ return ret;
+}
+
int
test_run(int argc, char** argv) {
(void)sizeof(argc);
@@ -1110,6 +1143,8 @@
return -1;
if (test_first_class_heaps())
return -1;
+ if (test_large_pages())
+ return -1;
if (test_error())
return -1;
printf("All tests passed\n");
diff --git a/test/thread.c b/test/thread.c
index ff4758b..9d047e9 100644
--- a/test/thread.c
+++ b/test/thread.c
@@ -3,6 +3,9 @@
#include <errno.h>
#ifdef _MSC_VER
+# if !defined(__clang__)
+# pragma warning (disable: 5105)
+# endif
# define ATTRIBUTE_NORETURN
#else
# define ATTRIBUTE_NORETURN __attribute__((noreturn))