Merge branch 'dev'
diff --git a/.gitignore b/.gitignore
index d9e49f8..b468186 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,10 +6,11 @@
/jemalloc/doc/jemalloc.3
/jemalloc/lib/
/jemalloc/Makefile
-/jemalloc/src/internal/jemalloc_internal\.h
-/jemalloc/src/internal/mtrgraph_defs\.h
-/jemalloc/src/internal/mtrplay_defs\.h
-/jemalloc/src/jemalloc\.h
-/jemalloc/src/jemalloc_defs\.h
+/jemalloc/include/jemalloc/internal/jemalloc_internal\.h
+/jemalloc/include/jemalloc/jemalloc\.h
+/jemalloc/include/jemalloc/jemalloc_defs\.h
+/jemalloc/test/jemalloc_test\.h
/jemalloc/src/*.[od]
+/jemalloc/test/*.[od]
+/jemalloc/test/*.out
/jemalloc/VERSION
diff --git a/jemalloc/COPYING b/jemalloc/COPYING
index 1baaf50..10ade12 100644
--- a/jemalloc/COPYING
+++ b/jemalloc/COPYING
@@ -3,6 +3,7 @@
--------------------------------------------------------------------------------
Copyright (C) 2002-2010 Jason Evans <jasone@canonware.com>.
All rights reserved.
+Copyright (C) 2007-2010 Mozilla Foundation. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
diff --git a/jemalloc/ChangeLog b/jemalloc/ChangeLog
new file mode 100644
index 0000000..290dea1
--- /dev/null
+++ b/jemalloc/ChangeLog
@@ -0,0 +1,130 @@
+Following are change highlights associated with official releases. Important
+bug fixes are all mentioned, but internal enhancements are omitted here for
+brevity (even though they are more fun to write about). Much more detail can be
+found in the git revision history:
+
+ http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git
+ git://canonware.com/jemalloc.git
+
+* 2.0.0
+
+ This version focuses on the experimental *allocm() API, and on improved
+ run-time configuration/introspection. Nonetheless, numerous performance
+ improvements are also included.
+
+ New features:
+ - Implement the experimental {,r,s,d}allocm() API, which provides a superset
+ of the functionality available via malloc(), calloc(), posix_memalign(),
+ realloc(), malloc_usable_size(), and free(). These functions can be used
+ to allocate/reallocate aligned zeroed memory, ask for optional extra
+ memory during reallocation, prevent object movement during reallocation,
+ etc.
+ - Replace JEMALLOC_OPTIONS/JEMALLOC_PROF_PREFIX with MALLOC_CONF, which is
+ more human-readable, and more flexible. For example:
+ JEMALLOC_OPTIONS=AJP
+ is now:
+ MALLOC_CONF=abort:true,fill:true,stats_print:true
+ - Port to Apple OS X. Sponsored by Mozilla.
+ - Make it possible for the application to control thread-->arena mappings
+ via the "thread.arena" mallctl.
+ - Add compile-time support for all TLS-related functionality via pthreads
+ TSD. This is mainly of interest for OS X, which does not support TLS, but
+ has a TSD implementation with similar performance.
+ - Override memalign() and valloc() if they are provided by the system.
+ - Add the "arenas.purge" mallctl, which can be used to synchronously purge
+ all dirty unused pages.
+ - Make cumulative heap profiling data optional, so that it is possible to
+ limit the amount of memory consumed by heap profiling data structures.
+ - Add per thread allocation counters that can be accessed via the
+ "thread.allocated" and "thread.deallocated" mallctls.
+
+ Incompatible changes:
+ - Remove JEMALLOC_OPTIONS and malloc_options (see MALLOC_CONF above).
+ - Increase default backtrace depth from 4 to 128 for heap profiling.
+ - Disable interval-based profile dumps by default.
+
+ Bug fixes:
+ - Remove bad assertions in fork handler functions. These assertions could
+ cause aborts for some combinations of configure settings.
+ - Fix strerror_r() usage to deal with non-standard semantics in GNU libc.
+ - Fix leak context reporting. This bug tended to cause the number of contexts
+ to be underreported (though the reported number of objects and bytes were
+ correct).
+ - Fix a realloc() bug for large in-place growing reallocation. This bug could
+ cause memory corruption, but it was hard to trigger.
+ - Fix an allocation bug for small allocations that could be triggered if
+ multiple threads raced to create a new run of backing pages.
+ - Enhance the heap profiler to trigger samples based on usable size, rather
+ than request size.
+ - Fix a heap profiling bug due to sometimes losing track of requested object
+ size for sampled objects.
+
+* 1.0.3
+
+ Bug fixes:
+ - Fix the libunwind-based implementation of stack backtracing (used for heap
+ profiling). This bug could cause zero-length backtraces to be reported.
+ - Add a missing mutex unlock in library initialization code. If multiple
+ threads raced to initialize malloc, some of them could end up permanently
+ blocked.
+
+* 1.0.2
+
+ Bug fixes:
+ - Fix junk filling of large objects, which could cause memory corruption.
+ - Add MAP_NORESERVE support for chunk mapping, because otherwise virtual
+ memory limits could cause swap file configuration to fail. Contributed by
+ Jordan DeLong.
+
+* 1.0.1
+
+ Bug fixes:
+ - Fix compilation when --enable-fill is specified.
+ - Fix threads-related profiling bugs that affected accuracy and caused memory
+ to be leaked during thread exit.
+ - Fix dirty page purging race conditions that could cause crashes.
+ - Fix crash in tcache flushing code during thread destruction.
+
+* 1.0.0
+
+ This release focuses on speed and run-time introspection. Numerous
+ algorithmic improvements make this release substantially faster than its
+ predecessors.
+
+ New features:
+ - Implement autoconf-based configuration system.
+ - Add mallctl*(), for the purposes of introspection and run-time
+ configuration.
+ - Make it possible for the application to manually flush a thread's cache, via
+ the "tcache.flush" mallctl.
+ - Base maximum dirty page count on proportion of active memory.
+ - Compute various addtional run-time statistics, including per size class
+ statistics for large objects.
+ - Expose malloc_stats_print(), which can be called repeatedly by the
+ application.
+ - Simplify the malloc_message() signature to only take one string argument,
+ and incorporate an opaque data pointer argument for use by the application
+ in combination with malloc_stats_print().
+ - Add support for allocation backed by one or more swap files, and allow the
+ application to disable over-commit if swap files are in use.
+ - Implement allocation profiling and leak checking.
+
+ Removed features:
+ - Remove the dynamic arena rebalancing code, since thread-specific caching
+ reduces its utility.
+
+ Bug fixes:
+ - Modify chunk allocation to work when address space layout randomization
+ (ASLR) is in use.
+ - Fix thread cleanup bugs related to TLS destruction.
+ - Handle 0-size allocation requests in posix_memalign().
+ - Fix a chunk leak. The leaked chunks were never touched, so this impacted
+ virtual memory usage, but not physical memory usage.
+
+* linux_20080828a, linux_20080827a
+
+ These snapshot releases are the simple result of incorporating Linux-specific
+ support into the FreeBSD malloc sources.
+
+--------------------------------------------------------------------------------
+vim:filetype=text:textwidth=80
diff --git a/jemalloc/INSTALL b/jemalloc/INSTALL
index eec3b37..fafd788 100644
--- a/jemalloc/INSTALL
+++ b/jemalloc/INSTALL
@@ -27,26 +27,42 @@
it is linked to. This works only on ELF-based systems.
--with-jemalloc-prefix=<prefix>
- Prefix all public APIs with <prefix>, so that, for example, malloc()
- becomes <prefix>malloc(). This makes it possible to use jemalloc at the
- same time as the system allocator.
+ Prefix all public APIs with <prefix>. For example, if <prefix> is
+ "prefix_", the API changes like the following occur:
+
+ malloc() --> prefix_malloc()
+ malloc_conf --> prefix_malloc_conf
+ /etc/malloc.conf --> /etc/prefix_malloc.conf
+ MALLOC_CONF --> PREFIX_MALLOC_CONF
+
+ This makes it possible to use jemalloc at the same time as the system
+ allocator, or even to use multiple copies of jemalloc simultaneously.
+
+ By default, the prefix is "", except on OS X, where it is "je_". On OS X,
+ jemalloc overlays the default malloc zone, but makes no attempt to actually
+ replace the "malloc", "calloc", etc. symbols.
--with-install-suffix=<suffix>
Append <suffix> to the base name of all installed files, such that multiple
versions of jemalloc can coexist in the same installation directory. For
example, libjemalloc.so.0 becomes libjemalloc<suffix>.so.0.
+--enable-cc-silence
+ Enable code that silences unuseful compiler warnings. This is helpful when
+ trying to tell serious warnings from those due to compiler limitations, but
+ it potentially incurs a performance penalty.
+
--enable-debug
Enable assertions and validation code. This incurs a substantial
performance hit, but is very useful during application development.
--enable-stats
- Enable statistics gathering functionality. Use the 'P' option to print
- detailed allocation statistics at exit.
+ Enable statistics gathering functionality. See the "opt.stats_print"
+ option documentation for usage details.
--enable-prof
- Enable heap profiling and leak detection functionality. Use the 'B', 'E',
- 'F', 'I', 'L', and 'U' options to control these features.
+ Enable heap profiling and leak detection functionality. See the "opt.prof"
+ option documention for usage details.
--disable-prof-libgcc
Disable the use of libgcc's backtracing functionality. Ordinarily, libgcc's
@@ -72,8 +88,8 @@
--disable-tcache
Disable thread-specific caches for small objects. Objects are cached and
- released in bulk, thus reducing the total number of mutex operations. Use
- the 'H', 'G', and 'M' options to control thread-specific caching.
+ released in bulk, thus reducing the total number of mutex operations. See
+ the "opt.tcache" option for suage details.
--enable-swap
Enable mmap()ed swap file support. When this feature is built in, it is
@@ -85,18 +101,18 @@
mmap(2).
--enable-fill
- Enable support for junk/zero filling of memory. Use the 'J' option to
- control junk filling, or the 'Z' option to control zero filling.
+ Enable support for junk/zero filling of memory. See the "opt.junk"/
+ "opt.zero" option documentation for usage details.
--enable-xmalloc
Enable support for optional immediate termination due to out-of-memory
errors, as is commonly implemented by "xmalloc" wrapper function for malloc.
- Use the 'X' option to control termination behavior.
+ See the "opt.xmalloc" option documentation for usage details.
--enable-sysv
Enable support for System V semantics, wherein malloc(0) returns NULL
- rather than a minimal allocation. Use the 'V' option to control System V
- compatibility.
+ rather than a minimal allocation. See the "opt.sysv" option documentation
+ for usage details.
--enable-dynamic-page-shift
Under most conditions, the system page size never changes (usually 4KiB or
@@ -213,3 +229,14 @@
cd obj
../configure --enable-autogen
make
+
+=== Documentation ==============================================================
+
+The manual page that the configure script generates can be manually formatted
+prior to installation via any of the following commands:
+
+ nroff -man -man-ext -t doc/jemalloc.3
+
+ groff -man -man-ext -t -Tps doc/jemalloc.3 | ps2pdf - doc/jemalloc.3.pdf
+
+ (cd doc; groff -man -man-ext -t -Thtml jemalloc.3 > jemalloc.3.html)
diff --git a/jemalloc/Makefile.in b/jemalloc/Makefile.in
index ac9b782..46eddf4 100644
--- a/jemalloc/Makefile.in
+++ b/jemalloc/Makefile.in
@@ -28,10 +28,17 @@
RPATH_EXTRA := @RPATH_EXTRA@
ifeq (macho, @abi@)
SO := dylib
+WL_SONAME := dylib_install_name
else
SO := so
+WL_SONAME := soname
endif
-REV := 0
+REV := 1
+ifeq (macho, @abi@)
+TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH=@objroot@lib
+else
+TEST_LIBRARY_PATH :=
+endif
# Lists of files.
BINS := @srcroot@bin/pprof
@@ -42,11 +49,18 @@
@srcroot@src/chunk_mmap.c @srcroot@src/chunk_swap.c @srcroot@src/ckh.c \
@srcroot@src/ctl.c @srcroot@src/extent.c @srcroot@src/hash.c \
@srcroot@src/huge.c @srcroot@src/mb.c @srcroot@src/mutex.c \
- @srcroot@src/prof.c @srcroot@src/stats.c @srcroot@src/tcache.c
-DSOS := @objroot@lib/libjemalloc@install_suffix@.so.$(REV) \
- @objroot@lib/libjemalloc@install_suffix@.so \
+ @srcroot@src/prof.c @srcroot@src/rtree.c \
+ @srcroot@src/stats.c @srcroot@src/tcache.c
+ifeq (macho, @abi@)
+CSRCS += @srcroot@src/zone.c
+endif
+DSOS := @objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) \
+ @objroot@lib/libjemalloc@install_suffix@.$(SO) \
@objroot@lib/libjemalloc@install_suffix@_pic.a
MAN3 := @objroot@doc/jemalloc@install_suffix@.3
+CTESTS := @srcroot@test/allocated.c @srcroot@test/allocm.c \
+ @srcroot@test/posix_memalign.c \
+ @srcroot@test/rallocm.c @srcroot@test/thread_arena.c
.PHONY: all dist install check clean distclean relclean
@@ -63,18 +77,32 @@
$(CC) $(CFLAGS) -c $(CPPFLAGS) -o $@ $<
@$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) $< | sed \"s/\($(subst /,\/,$(notdir $(basename $@)))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.o \2/g\" > $(@:%.o=%.d)"
-%.so : %.so.$(REV)
+%.$(SO) : %.$(SO).$(REV)
@mkdir -p $(@D)
ln -sf $(<F) $@
-@objroot@lib/libjemalloc@install_suffix@.so.$(REV) : $(CSRCS:@srcroot@%.c=@objroot@%.o)
+@objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) : $(CSRCS:@srcroot@%.c=@objroot@%.o)
@mkdir -p $(@D)
- $(CC) -shared -Wl,-soname,$(@F) -o $@ $+ $(LDFLAGS) $(LIBS)
+ $(CC) -shared -Wl,-$(WL_SONAME),$(@F) $(RPATH_EXTRA:%=@RPATH@%) -o $@ $+ $(LDFLAGS) $(LIBS)
@objroot@lib/libjemalloc@install_suffix@_pic.a : $(CSRCS:@srcroot@%.c=@objroot@%.o)
@mkdir -p $(@D)
ar crus $@ $+
+@objroot@test/%.o: @srcroot@test/%.c
+ @mkdir -p $(@D)
+ $(CC) $(CFLAGS) -c $(CPPFLAGS) -I@objroot@test -o $@ $<
+ @$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) -I@objroot@test $< | sed \"s/\($(subst /,\/,$(notdir $(basename $@)))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.o \2/g\" > $(@:%.o=%.d)"
+
+@objroot@test/%: @objroot@test/%.o \
+ @objroot@lib/libjemalloc@install_suffix@.$(SO)
+ @mkdir -p $(@D)
+ifneq (@RPATH@, )
+ $(CC) -o $@ $< @RPATH@@objroot@lib -L@objroot@lib -ljemalloc@install_suffix@
+else
+ $(CC) -o $@ $< -L@objroot@lib -ljemalloc@install_suffix@
+endif
+
install_bin:
install -d $(BINDIR)
@for b in $(BINS); do \
@@ -91,8 +119,8 @@
install_lib: $(DSOS)
install -d $(LIBDIR)
- install -m 755 @objroot@lib/libjemalloc@install_suffix@.so.$(REV) $(LIBDIR)
- ln -sf libjemalloc@install_suffix@.so.$(REV) $(LIBDIR)/libjemalloc@install_suffix@.so
+ install -m 755 @objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) $(LIBDIR)
+ ln -sf libjemalloc@install_suffix@.$(SO).$(REV) $(LIBDIR)/libjemalloc@install_suffix@.$(SO)
install -m 755 @objroot@lib/libjemalloc@install_suffix@_pic.a $(LIBDIR)
install_man:
@@ -104,19 +132,50 @@
install: install_bin install_include install_lib install_man
-check:
+tests: $(CTESTS:@srcroot@%.c=@objroot@%)
+
+check: tests
+ @mkdir -p @objroot@test
+ @$(SHELL) -c 'total=0; \
+ failures=0; \
+ echo "========================================="; \
+ for t in $(CTESTS:@srcroot@%.c=@objroot@%); do \
+ total=`expr $$total + 1`; \
+ /bin/echo -n "$${t} ... "; \
+ $(TEST_LIBRARY_PATH) $${t} @abs_srcroot@ @abs_objroot@ \
+ > @objroot@$${t}.out 2>&1; \
+ if test -e "@srcroot@$${t}.exp"; then \
+ diff -u @srcroot@$${t}.exp \
+ @objroot@$${t}.out >/dev/null 2>&1; \
+ fail=$$?; \
+ if test "$${fail}" -eq "1" ; then \
+ failures=`expr $${failures} + 1`; \
+ echo "*** FAIL ***"; \
+ else \
+ echo "pass"; \
+ fi; \
+ else \
+ echo "*** FAIL *** (.exp file is missing)"; \
+ failures=`expr $${failures} + 1`; \
+ fi; \
+ done; \
+ echo "========================================="; \
+ echo "Failures: $${failures}/$${total}"'
clean:
rm -f $(CSRCS:@srcroot@%.c=@objroot@%.o)
rm -f $(CSRCS:@srcroot@%.c=@objroot@%.d)
+ rm -f $(CTESTS:@srcroot@%.c=@objroot@%)
+ rm -f $(CTESTS:@srcroot@%.c=@objroot@%.o)
+ rm -f $(CTESTS:@srcroot@%.c=@objroot@%.d)
+ rm -f $(CTESTS:@srcroot@%.c=@objroot@%.out)
rm -f $(DSOS)
distclean: clean
rm -rf @objroot@autom4te.cache
rm -f @objroot@config.log
rm -f @objroot@config.status
- rm -f @objroot@cfghdrs.stamp
- rm -f @objroot@cfgoutputs.stamp
+ rm -f @objroot@config.stamp
rm -f @cfghdrs_out@
rm -f @cfgoutputs_out@
diff --git a/jemalloc/README b/jemalloc/README
index 2ff36ef..4d7b552 100644
--- a/jemalloc/README
+++ b/jemalloc/README
@@ -1,9 +1,9 @@
jemalloc is a general-purpose scalable concurrent malloc(3) implementation.
This distribution is a stand-alone "portable" implementation that currently
-targets only Linux. jemalloc is included as the default allocator in the
-FreeBSD and NetBSD operating systems, and it is used by the Mozilla Firefox web
-browser on Microsoft Windows-related platforms. Depending on your needs, one
-of the other divergent versions may suit your needs better than this
+targets Linux and Apple OS X. jemalloc is included as the default allocator in
+the FreeBSD and NetBSD operating systems, and it is used by the Mozilla Firefox
+web browser on Microsoft Windows-related platforms. Depending on your needs,
+one of the other divergent versions may suit your needs better than this
distribution.
The COPYING file contains copyright and licensing information.
@@ -11,4 +11,6 @@
The INSTALL file contains information on how to configure, build, and install
jemalloc.
+The ChangeLog file contains a brief summary of changes for each release.
+
URL: http://www.canonware.com/jemalloc/
diff --git a/jemalloc/bin/pprof b/jemalloc/bin/pprof
index 57c0600..1655f07 100755
--- a/jemalloc/bin/pprof
+++ b/jemalloc/bin/pprof
@@ -92,9 +92,7 @@
my $KCACHEGRIND = "kcachegrind";
my $PS2PDF = "ps2pdf";
# These are used for dynamic profiles
-my $WGET = "wget";
-my $WGET_FLAGS = "--no-http-keep-alive"; # only supported by some wgets
-my $CURL = "curl";
+my $URL_FETCHER = "curl -s";
# These are the web pages that servers need to support for dynamic profiles
my $HEAP_PAGE = "/pprof/heap";
@@ -108,6 +106,12 @@
my $SYMBOL_PAGE = "/pprof/symbol"; # must support symbol lookup via POST
my $PROGRAM_NAME_PAGE = "/pprof/cmdline";
+# These are the web pages that can be named on the command line.
+# All the alternatives must begin with /.
+my $PROFILES = "($HEAP_PAGE|$PROFILE_PAGE|$PMUPROFILE_PAGE|" .
+ "$GROWTH_PAGE|$CONTENTION_PAGE|$WALL_PAGE|" .
+ "$FILTEREDPROFILE_PAGE)";
+
# default binary name
my $UNKNOWN_BINARY = "(unknown)";
@@ -176,12 +180,14 @@
--text Generate text report
--callgrind Generate callgrind format to stdout
--gv Generate Postscript and display
+ --web Generate SVG and display
--list=<regexp> Generate source listing of matching routines
--disasm=<regexp> Generate disassembly of matching routines
--symbols Print demangled symbol names found at given addresses
--dot Generate DOT file to stdout
--ps Generate Postcript to stdout
--pdf Generate PDF to stdout
+ --svg Generate SVG to stdout
--gif Generate GIF to stdout
--raw Generate symbolized pprof data (useful with remote fetch)
@@ -209,7 +215,7 @@
(i.e. direct leak generators) more visible
Miscellaneous:
- --tools=<prefix> Prefix for object tool pathnames
+ --tools=<prefix or binary:fullpath>[,...] \$PATH for object tool pathnames
--test Run unit tests
--help This message
--version Version information
@@ -224,6 +230,8 @@
Enters "interactive" mode
pprof --text /bin/ls ls.prof
Outputs one line per procedure
+pprof --web /bin/ls ls.prof
+ Displays annotated call-graph in web browser
pprof --gv /bin/ls ls.prof
Displays annotated call-graph via 'gv'
pprof --gv --focus=Mutex /bin/ls ls.prof
@@ -234,6 +242,9 @@
(Per-line) annotated source listing for getdir()
pprof --disasm=getdir /bin/ls ls.prof
(Per-PC) annotated disassembly for getdir()
+
+pprof http://localhost:1234/
+ Enters "interactive" mode
pprof --text localhost:1234
Outputs one line per procedure for localhost:1234
pprof --raw localhost:1234 > ./local.raw
@@ -293,10 +304,12 @@
$main::opt_disasm = "";
$main::opt_symbols = 0;
$main::opt_gv = 0;
+ $main::opt_web = 0;
$main::opt_dot = 0;
$main::opt_ps = 0;
$main::opt_pdf = 0;
$main::opt_gif = 0;
+ $main::opt_svg = 0;
$main::opt_raw = 0;
$main::opt_nodecount = 80;
@@ -331,6 +344,9 @@
# Are we using $SYMBOL_PAGE?
$main::use_symbol_page = 0;
+ # Files returned by TempName.
+ %main::tempnames = ();
+
# Type of profile we are dealing with
# Supported types:
# cpu
@@ -356,9 +372,11 @@
"disasm=s" => \$main::opt_disasm,
"symbols!" => \$main::opt_symbols,
"gv!" => \$main::opt_gv,
+ "web!" => \$main::opt_web,
"dot!" => \$main::opt_dot,
"ps!" => \$main::opt_ps,
"pdf!" => \$main::opt_pdf,
+ "svg!" => \$main::opt_svg,
"gif!" => \$main::opt_gif,
"raw!" => \$main::opt_raw,
"interactive!" => \$main::opt_interactive,
@@ -434,9 +452,11 @@
($main::opt_disasm eq '' ? 0 : 1) +
($main::opt_symbols == 0 ? 0 : 1) +
$main::opt_gv +
+ $main::opt_web +
$main::opt_dot +
$main::opt_ps +
$main::opt_pdf +
+ $main::opt_svg +
$main::opt_gif +
$main::opt_raw +
$main::opt_interactive +
@@ -511,20 +531,6 @@
ConfigureObjTools($main::prog)
}
- # Check what flags our commandline utilities support
- if (open(TFILE, "$WGET $WGET_FLAGS -V 2>&1 |")) {
- my @lines = <TFILE>;
- if (grep(/unrecognized/, @lines) > 0) {
- # grep found 'unrecognized' token from WGET, clear WGET flags
- $WGET_FLAGS = "";
- }
- close(TFILE);
- }
- # TODO(csilvers): check all the other binaries and objtools to see
- # if they are installed and what flags they support, and store that
- # in a data structure here, rather than scattering these tests about.
- # Then, ideally, rewrite code to use wget OR curl OR GET or ...
-
# Break the opt_list_prefix into the prefix_list array
@prefix_list = split (',', $main::opt_lib_prefix);
@@ -588,6 +594,10 @@
} elsif ($main::use_symbol_page) {
$symbols = FetchSymbols($pcs);
} else {
+ # TODO(csilvers): $libs uses the /proc/self/maps data from profile1,
+ # which may differ from the data from subsequent profiles, especially
+ # if they were run on different machines. Use appropriate libs for
+ # each pc somehow.
$symbols = ExtractSymbols($libs, $pcs);
}
@@ -635,9 +645,24 @@
} else {
if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) {
if ($main::opt_gv) {
- RunGV(PsTempName($main::next_tmpfile), "");
+ RunGV(TempName($main::next_tmpfile, "ps"), "");
+ } elsif ($main::opt_web) {
+ my $tmp = TempName($main::next_tmpfile, "svg");
+ RunWeb($tmp);
+ # The command we run might hand the file name off
+ # to an already running browser instance and then exit.
+ # Normally, we'd remove $tmp on exit (right now),
+ # but fork a child to remove $tmp a little later, so that the
+ # browser has time to load it first.
+ delete $main::tempnames{$tmp};
+ if (fork() == 0) {
+ sleep 5;
+ unlink($tmp);
+ exit(0);
+ }
}
} else {
+ cleanup();
exit(1);
}
}
@@ -683,6 +708,34 @@
}
}
+sub RunWeb {
+ my $fname = shift;
+ print STDERR "Loading web page file:///$fname\n";
+
+ if (`uname` =~ /Darwin/) {
+ # OS X: open will use standard preference for SVG files.
+ system("/usr/bin/open", $fname);
+ return;
+ }
+
+ # Some kind of Unix; try generic symlinks, then specific browsers.
+ # (Stop once we find one.)
+ # Works best if the browser is already running.
+ my @alt = (
+ "/etc/alternatives/gnome-www-browser",
+ "/etc/alternatives/x-www-browser",
+ "google-chrome",
+ "firefox",
+ );
+ foreach my $b (@alt) {
+ if (system($b, $fname) == 0) {
+ return;
+ }
+ }
+
+ print STDERR "Could not load web browser.\n";
+}
+
sub RunKcachegrind {
my $fname = shift;
my $bg = shift; # "" or " &" if we should run in background
@@ -739,10 +792,10 @@
print STDERR "\n";
return 0;
}
- if (m/^ *quit/) {
+ if (m/^\s*quit/) {
return 0;
}
- if (m/^ *help/) {
+ if (m/^\s*help/) {
InteractiveHelpMessage();
return 1;
}
@@ -754,7 +807,7 @@
$main::opt_gv = 0;
$main::opt_cum = 0;
- if (m/^ *(text|top)(\d*) *(.*)/) {
+ if (m/^\s*(text|top)(\d*)\s*(.*)/) {
$main::opt_text = 1;
my $line_limit = ($2 ne "") ? int($2) : 10;
@@ -773,14 +826,14 @@
PrintText($symbols, $flat, $cumulative, $total, $line_limit);
return 1;
}
- if (m/^ *callgrind *([^ \n]*)/) {
+ if (m/^\s*callgrind\s*([^ \n]*)/) {
$main::opt_callgrind = 1;
# Get derived profiles
my $calls = ExtractCalls($symbols, $orig_profile);
my $filename = $1;
if ( $1 eq '' ) {
- $filename = CallgrindTempName($main::next_tmpfile);
+ $filename = TempName($main::next_tmpfile, "callgrind");
}
PrintCallgrind($calls, $filename);
if ( $1 eq '' ) {
@@ -790,7 +843,7 @@
return 1;
}
- if (m/^ *list *(.+)/) {
+ if (m/^\s*list\s*(.+)/) {
$main::opt_list = 1;
my $routine;
@@ -807,7 +860,7 @@
PrintListing($libs, $flat, $cumulative, $routine);
return 1;
}
- if (m/^ *disasm *(.+)/) {
+ if (m/^\s*disasm\s*(.+)/) {
$main::opt_disasm = 1;
my $routine;
@@ -825,12 +878,18 @@
PrintDisassembly($libs, $flat, $cumulative, $routine, $total);
return 1;
}
- if (m/^ *gv *(.*)/) {
- $main::opt_gv = 1;
+ if (m/^\s*(gv|web)\s*(.*)/) {
+ $main::opt_gv = 0;
+ $main::opt_web = 0;
+ if ($1 eq "gv") {
+ $main::opt_gv = 1;
+ } elsif ($1 eq "web") {
+ $main::opt_web = 1;
+ }
my $focus;
my $ignore;
- ($focus, $ignore) = ParseInteractiveArgs($1);
+ ($focus, $ignore) = ParseInteractiveArgs($2);
# Process current profile to account for various settings
my $profile = ProcessProfile($orig_profile, $symbols, $focus, $ignore);
@@ -841,11 +900,19 @@
my $cumulative = CumulativeProfile($reduced);
if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) {
- RunGV(PsTempName($main::next_tmpfile), " &");
+ if ($main::opt_gv) {
+ RunGV(TempName($main::next_tmpfile, "ps"), " &");
+ } elsif ($main::opt_web) {
+ RunWeb(TempName($main::next_tmpfile, "svg"));
+ }
$main::next_tmpfile++;
}
return 1;
}
+ if (m/^\s*$/) {
+ return 1;
+ }
+ print STDERR "Unknown command: try 'help'.\n";
return 1;
}
@@ -894,6 +961,14 @@
the "focus" regular expression matches a routine name on the stack
trace.
+ web
+ web [focus] [-ignore1] [-ignore2]
+ Like GV, but displays profile in your web browser instead of using
+ Ghostview. Works best if your web browser is already running.
+ To change the browser that gets used:
+ On Linux, set the /etc/alternatives/gnome-www-browser symlink.
+ On OS X, change the Finder association for SVG files.
+
list [routine_regexp] [-ignore1] [-ignore2]
Show source listing of routines whose names match "routine_regexp"
@@ -950,14 +1025,12 @@
##### Output code #####
-sub PsTempName {
+sub TempName {
my $fnum = shift;
- return "$main::tmpfile_ps" . "." . "$fnum" . ".ps";
-}
-
-sub CallgrindTempName {
- my $fnum = shift;
- return "$main::tmpfile_ps" . "." . "$fnum" . ".callgrind";
+ my $ext = shift;
+ my $file = "$main::tmpfile_ps.$fnum.$ext";
+ $main::tempnames{$file} = 1;
+ return $file;
}
# Print profile data in packed binary format (64-bit) to standard out
@@ -1599,7 +1672,6 @@
}
if ($last < 0) {
print STDERR "No nodes to print\n";
- cleanup();
return 0;
}
@@ -1612,11 +1684,14 @@
# Open DOT output file
my $output;
if ($main::opt_gv) {
- $output = "| $DOT -Tps2 >" . PsTempName($main::next_tmpfile);
+ $output = "| $DOT -Tps2 >" . TempName($main::next_tmpfile, "ps");
} elsif ($main::opt_ps) {
$output = "| $DOT -Tps2";
} elsif ($main::opt_pdf) {
$output = "| $DOT -Tps2 | $PS2PDF - -";
+ } elsif ($main::opt_web || $main::opt_svg) {
+ # We need to post-process the SVG, so write to a temporary file always.
+ $output = "| $DOT -Tsvg >" . TempName($main::next_tmpfile, "svg");
} elsif ($main::opt_gif) {
$output = "| $DOT -Tgif";
} else {
@@ -1727,7 +1802,10 @@
my $fraction = abs($local_total ? (3 * ($n / $local_total)) : 0);
if ($fraction > 1) { $fraction = 1; }
my $w = $fraction * 2;
- #if ($w < 1) { $w = 1; }
+ if ($w < 1 && ($main::opt_web || $main::opt_svg)) {
+ # SVG output treats line widths < 1 poorly.
+ $w = 1;
+ }
# Dot sometimes segfaults if given edge weights that are too large, so
# we cap the weights at a large value
@@ -1751,11 +1829,312 @@
}
print DOT ("}\n");
-
close(DOT);
+
+ if ($main::opt_web || $main::opt_svg) {
+ # Rewrite SVG to be more usable inside web browser.
+ RewriteSvg(TempName($main::next_tmpfile, "svg"));
+ }
+
return 1;
}
+sub RewriteSvg {
+ my $svgfile = shift;
+
+ open(SVG, $svgfile) || die "open temp svg: $!";
+ my @svg = <SVG>;
+ close(SVG);
+ unlink $svgfile;
+ my $svg = join('', @svg);
+
+ # Dot's SVG output is
+ #
+ # <svg width="___" height="___"
+ # viewBox="___" xmlns=...>
+ # <g id="graph0" transform="...">
+ # ...
+ # </g>
+ # </svg>
+ #
+ # Change it to
+ #
+ # <svg width="100%" height="100%"
+ # xmlns=...>
+ # $svg_javascript
+ # <g id="viewport" transform="translate(0,0)">
+ # <g id="graph0" transform="...">
+ # ...
+ # </g>
+ # </g>
+ # </svg>
+
+ # Fix width, height; drop viewBox.
+ $svg =~ s/(?s)<svg width="[^"]+" height="[^"]+"(.*?)viewBox="[^"]+"/<svg width="100%" height="100%"$1/;
+
+ # Insert script, viewport <g> above first <g>
+ my $svg_javascript = SvgJavascript();
+ my $viewport = "<g id=\"viewport\" transform=\"translate(0,0)\">\n";
+ $svg =~ s/<g id="graph\d"/$svg_javascript$viewport$&/;
+
+ # Insert final </g> above </svg>.
+ $svg =~ s/(.*)(<\/svg>)/$1<\/g>$2/;
+ $svg =~ s/<g id="graph\d"(.*?)/<g id="viewport"$1/;
+
+ if ($main::opt_svg) {
+ # --svg: write to standard output.
+ print $svg;
+ } else {
+ # Write back to temporary file.
+ open(SVG, ">$svgfile") || die "open $svgfile: $!";
+ print SVG $svg;
+ close(SVG);
+ }
+}
+
+sub SvgJavascript {
+ return <<'EOF';
+<script type="text/ecmascript"><![CDATA[
+// SVGPan
+// http://www.cyberz.org/blog/2009/12/08/svgpan-a-javascript-svg-panzoomdrag-library/
+// Local modification: if(true || ...) below to force panning, never moving.
+
+/**
+ * SVGPan library 1.2
+ * ====================
+ *
+ * Given an unique existing element with id "viewport", including the
+ * the library into any SVG adds the following capabilities:
+ *
+ * - Mouse panning
+ * - Mouse zooming (using the wheel)
+ * - Object dargging
+ *
+ * Known issues:
+ *
+ * - Zooming (while panning) on Safari has still some issues
+ *
+ * Releases:
+ *
+ * 1.2, Sat Mar 20 08:42:50 GMT 2010, Zeng Xiaohui
+ * Fixed a bug with browser mouse handler interaction
+ *
+ * 1.1, Wed Feb 3 17:39:33 GMT 2010, Zeng Xiaohui
+ * Updated the zoom code to support the mouse wheel on Safari/Chrome
+ *
+ * 1.0, Andrea Leofreddi
+ * First release
+ *
+ * This code is licensed under the following BSD license:
+ *
+ * Copyright 2009-2010 Andrea Leofreddi <a.leofreddi@itcharm.com>. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this list of
+ * conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice, this list
+ * of conditions and the following disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Andrea Leofreddi ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Andrea Leofreddi OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * The views and conclusions contained in the software and documentation are those of the
+ * authors and should not be interpreted as representing official policies, either expressed
+ * or implied, of Andrea Leofreddi.
+ */
+
+var root = document.documentElement;
+
+var state = 'none', stateTarget, stateOrigin, stateTf;
+
+setupHandlers(root);
+
+/**
+ * Register handlers
+ */
+function setupHandlers(root){
+ setAttributes(root, {
+ "onmouseup" : "add(evt)",
+ "onmousedown" : "handleMouseDown(evt)",
+ "onmousemove" : "handleMouseMove(evt)",
+ "onmouseup" : "handleMouseUp(evt)",
+ //"onmouseout" : "handleMouseUp(evt)", // Decomment this to stop the pan functionality when dragging out of the SVG element
+ });
+
+ if(navigator.userAgent.toLowerCase().indexOf('webkit') >= 0)
+ window.addEventListener('mousewheel', handleMouseWheel, false); // Chrome/Safari
+ else
+ window.addEventListener('DOMMouseScroll', handleMouseWheel, false); // Others
+
+ var g = svgDoc.getElementById("svg");
+ g.width = "100%";
+ g.height = "100%";
+}
+
+/**
+ * Instance an SVGPoint object with given event coordinates.
+ */
+function getEventPoint(evt) {
+ var p = root.createSVGPoint();
+
+ p.x = evt.clientX;
+ p.y = evt.clientY;
+
+ return p;
+}
+
+/**
+ * Sets the current transform matrix of an element.
+ */
+function setCTM(element, matrix) {
+ var s = "matrix(" + matrix.a + "," + matrix.b + "," + matrix.c + "," + matrix.d + "," + matrix.e + "," + matrix.f + ")";
+
+ element.setAttribute("transform", s);
+}
+
+/**
+ * Dumps a matrix to a string (useful for debug).
+ */
+function dumpMatrix(matrix) {
+ var s = "[ " + matrix.a + ", " + matrix.c + ", " + matrix.e + "\n " + matrix.b + ", " + matrix.d + ", " + matrix.f + "\n 0, 0, 1 ]";
+
+ return s;
+}
+
+/**
+ * Sets attributes of an element.
+ */
+function setAttributes(element, attributes){
+ for (i in attributes)
+ element.setAttributeNS(null, i, attributes[i]);
+}
+
+/**
+ * Handle mouse move event.
+ */
+function handleMouseWheel(evt) {
+ if(evt.preventDefault)
+ evt.preventDefault();
+
+ evt.returnValue = false;
+
+ var svgDoc = evt.target.ownerDocument;
+
+ var delta;
+
+ if(evt.wheelDelta)
+ delta = evt.wheelDelta / 3600; // Chrome/Safari
+ else
+ delta = evt.detail / -90; // Mozilla
+
+ var z = 1 + delta; // Zoom factor: 0.9/1.1
+
+ var g = svgDoc.getElementById("viewport");
+
+ var p = getEventPoint(evt);
+
+ p = p.matrixTransform(g.getCTM().inverse());
+
+ // Compute new scale matrix in current mouse position
+ var k = root.createSVGMatrix().translate(p.x, p.y).scale(z).translate(-p.x, -p.y);
+
+ setCTM(g, g.getCTM().multiply(k));
+
+ stateTf = stateTf.multiply(k.inverse());
+}
+
+/**
+ * Handle mouse move event.
+ */
+function handleMouseMove(evt) {
+ if(evt.preventDefault)
+ evt.preventDefault();
+
+ evt.returnValue = false;
+
+ var svgDoc = evt.target.ownerDocument;
+
+ var g = svgDoc.getElementById("viewport");
+
+ if(state == 'pan') {
+ // Pan mode
+ var p = getEventPoint(evt).matrixTransform(stateTf);
+
+ setCTM(g, stateTf.inverse().translate(p.x - stateOrigin.x, p.y - stateOrigin.y));
+ } else if(state == 'move') {
+ // Move mode
+ var p = getEventPoint(evt).matrixTransform(g.getCTM().inverse());
+
+ setCTM(stateTarget, root.createSVGMatrix().translate(p.x - stateOrigin.x, p.y - stateOrigin.y).multiply(g.getCTM().inverse()).multiply(stateTarget.getCTM()));
+
+ stateOrigin = p;
+ }
+}
+
+/**
+ * Handle click event.
+ */
+function handleMouseDown(evt) {
+ if(evt.preventDefault)
+ evt.preventDefault();
+
+ evt.returnValue = false;
+
+ var svgDoc = evt.target.ownerDocument;
+
+ var g = svgDoc.getElementById("viewport");
+
+ if(true || evt.target.tagName == "svg") {
+ // Pan mode
+ state = 'pan';
+
+ stateTf = g.getCTM().inverse();
+
+ stateOrigin = getEventPoint(evt).matrixTransform(stateTf);
+ } else {
+ // Move mode
+ state = 'move';
+
+ stateTarget = evt.target;
+
+ stateTf = g.getCTM().inverse();
+
+ stateOrigin = getEventPoint(evt).matrixTransform(stateTf);
+ }
+}
+
+/**
+ * Handle mouse button release event.
+ */
+function handleMouseUp(evt) {
+ if(evt.preventDefault)
+ evt.preventDefault();
+
+ evt.returnValue = false;
+
+ var svgDoc = evt.target.ownerDocument;
+
+ if(state == 'pan' || state == 'move') {
+ // Quit pan mode
+ state = '';
+ }
+}
+
+]]></script>
+EOF
+}
+
# Translate a stack of addresses into a stack of symbols
sub TranslateStack {
my $symbols = shift;
@@ -2310,28 +2689,11 @@
AddEntry($profile, (join "\n", @k), $count);
}
-sub IsSymbolizedProfileFile {
- my $file_name = shift;
-
- if (!(-e $file_name) || !(-r $file_name)) {
- return 0;
- }
-
- $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash
- my $symbol_marker = $&;
- # Check if the file contains a symbol-section marker.
- open(TFILE, "<$file_name");
- my @lines = <TFILE>;
- my $result = grep(/^--- *$symbol_marker/, @lines);
- close(TFILE);
- return $result > 0;
-}
-
##### Code to profile a server dynamically #####
sub CheckSymbolPage {
my $url = SymbolPageURL();
- open(SYMBOL, "$WGET $WGET_FLAGS -qO- '$url' |");
+ open(SYMBOL, "$URL_FETCHER '$url' |");
my $line = <SYMBOL>;
$line =~ s/\r//g; # turn windows-looking lines into unix-looking lines
close(SYMBOL);
@@ -2350,33 +2712,45 @@
sub IsProfileURL {
my $profile_name = shift;
- my ($host, $port, $path) = ParseProfileURL($profile_name);
- return defined($host) and defined($port) and defined($path);
+ if (-f $profile_name) {
+ printf STDERR "Using local file $profile_name.\n";
+ return 0;
+ }
+ return 1;
}
sub ParseProfileURL {
my $profile_name = shift;
- if (defined($profile_name) &&
- $profile_name =~ m,^(http://|)([^/:]+):(\d+)(|\@\d+)(|/|.*($PROFILE_PAGE|$PMUPROFILE_PAGE|$HEAP_PAGE|$GROWTH_PAGE|$CONTENTION_PAGE|$WALL_PAGE|$FILTEREDPROFILE_PAGE))$,o) {
- # $6 is $PROFILE_PAGE/$HEAP_PAGE/etc. $5 is *everything* after
- # the hostname, as long as that everything is the empty string,
- # a slash, or something ending in $PROFILE_PAGE/$HEAP_PAGE/etc.
- # So "$6 || $5" is $PROFILE_PAGE/etc if there, or else it's "/" or "".
- return ($2, $3, $6 || $5);
+
+ if (!defined($profile_name) || $profile_name eq "") {
+ return ();
}
- return ();
+
+ # Split profile URL - matches all non-empty strings, so no test.
+ $profile_name =~ m,^(https?://)?([^/]+)(.*?)(/|$PROFILES)?$,;
+
+ my $proto = $1 || "http://";
+ my $hostport = $2;
+ my $prefix = $3;
+ my $profile = $4 || "/";
+
+ my $host = $hostport;
+ $host =~ s/:.*//;
+
+ my $baseurl = "$proto$hostport$prefix";
+ return ($host, $baseurl, $profile);
}
# We fetch symbols from the first profile argument.
sub SymbolPageURL {
- my ($host, $port, $path) = ParseProfileURL($main::pfile_args[0]);
- return "http://$host:$port$SYMBOL_PAGE";
+ my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]);
+ return "$baseURL$SYMBOL_PAGE";
}
sub FetchProgramName() {
- my ($host, $port, $path) = ParseProfileURL($main::pfile_args[0]);
- my $url = "http://$host:$port$PROGRAM_NAME_PAGE";
- my $command_line = "$WGET $WGET_FLAGS -qO- '$url'";
+ my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]);
+ my $url = "$baseURL$PROGRAM_NAME_PAGE";
+ my $command_line = "$URL_FETCHER '$url'";
open(CMDLINE, "$command_line |") or error($command_line);
my $cmdline = <CMDLINE>;
$cmdline =~ s/\r//g; # turn windows-looking lines into unix-looking lines
@@ -2393,7 +2767,7 @@
# curl. Redirection happens on borg hosts.
sub ResolveRedirectionForCurl {
my $url = shift;
- my $command_line = "$CURL -s --head '$url'";
+ my $command_line = "$URL_FETCHER --head '$url'";
open(CMDLINE, "$command_line |") or error($command_line);
while (<CMDLINE>) {
s/\r//g; # turn windows-looking lines into unix-looking lines
@@ -2405,6 +2779,20 @@
return $url;
}
+# Add a timeout flat to URL_FETCHER
+sub AddFetchTimeout {
+ my $fetcher = shift;
+ my $timeout = shift;
+ if (defined($timeout)) {
+ if ($fetcher =~ m/\bcurl -s/) {
+ $fetcher .= sprintf(" --max-time %d", $timeout);
+ } elsif ($fetcher =~ m/\brpcget\b/) {
+ $fetcher .= sprintf(" --deadline=%d", $timeout);
+ }
+ }
+ return $fetcher;
+}
+
# Reads a symbol map from the file handle name given as $1, returning
# the resulting symbol map. Also processes variables relating to symbols.
# Currently, the only variable processed is 'binary=<value>' which updates
@@ -2460,10 +2848,14 @@
close(POSTFILE);
my $url = SymbolPageURL();
- # Here we use curl for sending data via POST since old
- # wget doesn't have --post-file option.
- $url = ResolveRedirectionForCurl($url);
- my $command_line = "$CURL -sd '\@$main::tmpfile_sym' '$url'";
+
+ my $command_line;
+ if ($URL_FETCHER =~ m/\bcurl -s/) {
+ $url = ResolveRedirectionForCurl($url);
+ $command_line = "$URL_FETCHER -d '\@$main::tmpfile_sym' '$url'";
+ } else {
+ $command_line = "$URL_FETCHER --post '$url' < '$main::tmpfile_sym'";
+ }
# We use c++filt in case $SYMBOL_PAGE gives us mangled symbols.
my $cppfilt = $obj_tool_map{"c++filt"};
open(SYMBOL, "$command_line | $cppfilt |") or error($command_line);
@@ -2508,10 +2900,10 @@
sub MakeProfileBaseName {
my ($binary_name, $profile_name) = @_;
- my ($host, $port, $path) = ParseProfileURL($profile_name);
+ my ($host, $baseURL, $path) = ParseProfileURL($profile_name);
my $binary_shortname = BaseName($binary_name);
- return sprintf("%s.%s.%s-port%s",
- $binary_shortname, $main::op_time, $host, $port);
+ return sprintf("%s.%s.%s",
+ $binary_shortname, $main::op_time, $host);
}
sub FetchDynamicProfile {
@@ -2523,7 +2915,7 @@
if (!IsProfileURL($profile_name)) {
return $profile_name;
} else {
- my ($host, $port, $path) = ParseProfileURL($profile_name);
+ my ($host, $baseURL, $path) = ParseProfileURL($profile_name);
if ($path eq "" || $path eq "/") {
# Missing type specifier defaults to cpu-profile
$path = $PROFILE_PAGE;
@@ -2531,35 +2923,26 @@
my $profile_file = MakeProfileBaseName($binary_name, $profile_name);
- my $url;
- my $wget_timeout;
- if (($path =~ m/$PROFILE_PAGE/) || ($path =~ m/$PMUPROFILE_PAGE/)) {
- if ($path =~ m/$PROFILE_PAGE/) {
- $url = sprintf("http://$host:$port$path?seconds=%d",
- $main::opt_seconds);
+ my $url = "$baseURL$path";
+ my $fetch_timeout = undef;
+ if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE/) {
+ if ($path =~ m/[?]/) {
+ $url .= "&";
} else {
- if ($profile_name =~ m/[?]/) {
- $profile_name .= "&"
- } else {
- $profile_name .= "?"
- }
- $url = sprintf("http://$profile_name" . "seconds=%d",
- $main::opt_seconds);
+ $url .= "?";
}
- $wget_timeout = sprintf("--timeout=%d",
- int($main::opt_seconds * 1.01 + 60));
+ $url .= sprintf("seconds=%d", $main::opt_seconds);
+ $fetch_timeout = $main::opt_seconds * 1.01 + 60;
} else {
# For non-CPU profiles, we add a type-extension to
# the target profile file name.
my $suffix = $path;
$suffix =~ s,/,.,g;
- $profile_file .= "$suffix";
- $url = "http://$host:$port$path";
- $wget_timeout = "";
+ $profile_file .= $suffix;
}
my $profile_dir = $ENV{"PPROF_TMPDIR"} || ($ENV{HOME} . "/pprof");
- if (!(-d $profile_dir)) {
+ if (! -d $profile_dir) {
mkdir($profile_dir)
|| die("Unable to create profile directory $profile_dir: $!\n");
}
@@ -2570,14 +2953,15 @@
return $real_profile;
}
- my $cmd = "$WGET $WGET_FLAGS $wget_timeout -q -O $tmp_profile '$url'";
- if (($path =~ m/$PROFILE_PAGE/) || ($path =~ m/$PMUPROFILE_PAGE/)){
+ my $fetcher = AddFetchTimeout($URL_FETCHER, $fetch_timeout);
+ my $cmd = "$fetcher '$url' > '$tmp_profile'";
+ if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE/){
print STDERR "Gathering CPU profile from $url for $main::opt_seconds seconds to\n ${real_profile}\n";
if ($encourage_patience) {
print STDERR "Be patient...\n";
}
} else {
- print STDERR "Fetching $path profile from $host:$port to\n ${real_profile}\n";
+ print STDERR "Fetching $path profile from $url to\n ${real_profile}\n";
}
(system($cmd) == 0) || error("Failed to get profile: $cmd: $!\n");
@@ -2624,6 +3008,7 @@
} else {
$position = 1 | ($position << 1);
TryCollectProfile($maxlevel, $level, $position);
+ cleanup();
exit(0);
}
}
@@ -2662,6 +3047,7 @@
stride => 512 * 1024, # must be a multiple of bitsize/8
slots => [],
unpack_code => "", # N for big-endian, V for little
+ perl_is_64bit => 1, # matters if profile is 64-bit
};
bless $self, $class;
# Let unittests adjust the stride
@@ -2685,17 +3071,15 @@
}
@$slots = unpack($self->{unpack_code} . "*", $str);
} else {
- # If we're a 64-bit profile, make sure we're a 64-bit-capable
+ # If we're a 64-bit profile, check if we're a 64-bit-capable
# perl. Otherwise, each slot will be represented as a float
# instead of an int64, losing precision and making all the
- # 64-bit addresses right. We *could* try to handle this with
- # software emulation of 64-bit ints, but that's added complexity
- # for no clear benefit (yet). We use 'Q' to test for 64-bit-ness;
- # perl docs say it's only available on 64-bit perl systems.
+ # 64-bit addresses wrong. We won't complain yet, but will
+ # later if we ever see a value that doesn't fit in 32 bits.
my $has_q = 0;
eval { $has_q = pack("Q", "1") ? 1 : 1; };
if (!$has_q) {
- ::error("$fname: need a 64-bit perl to process this 64-bit profile.\n");
+ $self->{perl_is_64bit} = 0;
}
read($self->{file}, $str, 8);
if (substr($str, 4, 4) eq chr(0)x4) {
@@ -2731,11 +3115,17 @@
# TODO(csilvers): if this is a 32-bit perl, the math below
# could end up in a too-large int, which perl will promote
# to a double, losing necessary precision. Deal with that.
- if ($self->{unpack_code} eq 'V') { # little-endian
- push(@b64_values, $b32_values[$i] + $b32_values[$i+1] * (2**32));
- } else {
- push(@b64_values, $b32_values[$i] * (2**32) + $b32_values[$i+1]);
- }
+ # Right now, we just die.
+ my ($lo, $hi) = ($b32_values[$i], $b32_values[$i+1]);
+ if ($self->{unpack_code} eq 'N') { # big-endian
+ ($lo, $hi) = ($hi, $lo);
+ }
+ my $value = $lo + $hi * (2**32);
+ if (!$self->{perl_is_64bit} && # check value is exactly represented
+ (($value % (2**32)) != $lo || int($value / (2**32)) != $hi)) {
+ ::error("Need a 64-bit perl to process this 64-bit profile.\n");
+ }
+ push(@b64_values, $value);
}
@$slots = @b64_values;
}
@@ -2764,6 +3154,44 @@
}
}
+# Return the next line from the profile file, assuming it's a text
+# line (which in this case means, doesn't start with a NUL byte). If
+# it's not a text line, return "". At EOF, return undef, like perl does.
+# Input file should be in binmode.
+sub ReadProfileLine {
+ local *PROFILE = shift;
+ my $firstchar = "";
+ my $line = "";
+ read(PROFILE, $firstchar, 1);
+ seek(PROFILE, -1, 1); # unread the firstchar
+ if ($firstchar eq "\0") {
+ return "";
+ }
+ $line = <PROFILE>;
+ if (defined($line)) {
+ $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines
+ }
+ return $line;
+}
+
+sub IsSymbolizedProfileFile {
+ my $file_name = shift;
+ if (!(-e $file_name) || !(-r $file_name)) {
+ return 0;
+ }
+ # Check if the file contains a symbol-section marker.
+ open(TFILE, "<$file_name");
+ binmode TFILE;
+ my $firstline = ReadProfileLine(*TFILE);
+ close(TFILE);
+ if (!$firstline) {
+ return 0;
+ }
+ $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash
+ my $symbol_marker = $&;
+ return $firstline =~ /^--- *$symbol_marker/;
+}
+
# Parse profile generated by common/profiler.cc and return a reference
# to a map:
# $result->{version} Version number of profile file
@@ -2798,28 +3226,17 @@
# whole firstline, since it may be gigabytes(!) of data.
open(PROFILE, "<$fname") || error("$fname: $!\n");
binmode PROFILE; # New perls do UTF-8 processing
- my $firstchar = "";
- my $header = "";
- read(PROFILE, $firstchar, 1);
- seek(PROFILE, -1, 1); # unread the firstchar
- if ($firstchar ne "\0") {
- $header = <PROFILE>;
- $header =~ s/\r//g; # turn windows-looking lines into unix-looking lines
+ my $header = ReadProfileLine(*PROFILE);
+ if (!defined($header)) { # means "at EOF"
+ error("Profile is empty.\n");
}
my $symbols;
if ($header =~ m/^--- *$symbol_marker/o) {
- # read the symbol section of the symbolized profile file
+ # Read the symbol section of the symbolized profile file.
$symbols = ReadSymbols(*PROFILE{IO});
-
- # read the next line to get the header for the remaining profile
- $header = "";
- read(PROFILE, $firstchar, 1);
- seek(PROFILE, -1, 1); # unread the firstchar
- if ($firstchar ne "\0") {
- $header = <PROFILE>;
- $header =~ s/\r//g;
- }
+ # Read the next line to get the header for the remaining profile.
+ $header = ReadProfileLine(*PROFILE) || "";
}
my $result;
@@ -3114,18 +3531,18 @@
# The sampling frequency is the rate of a Poisson process.
# This means that the probability of sampling an allocation of
# size X with sampling rate Y is 1 - exp(-X/Y)
- my $ratio;
- my $scale_factor;
- if ($n1 != 0) {
- $ratio = (($s1*1.0)/$n1)/($sample_adjustment);
- $scale_factor = 1/(1 - exp(-$ratio));
- $n1 *= $scale_factor;
- $s1 *= $scale_factor;
- }
- $ratio = (($s2*1.0)/$n2)/($sample_adjustment);
- $scale_factor = 1/(1 - exp(-$ratio));
+ if ($n1 != 0) {
+ my $ratio = (($s1*1.0)/$n1)/($sample_adjustment);
+ my $scale_factor = 1/(1 - exp(-$ratio));
+ $n1 *= $scale_factor;
+ $s1 *= $scale_factor;
+ }
+ if ($n2 != 0) {
+ my $ratio = (($s2*1.0)/$n2)/($sample_adjustment);
+ my $scale_factor = 1/(1 - exp(-$ratio));
$n2 *= $scale_factor;
$s2 *= $scale_factor;
+ }
} else {
# Remote-heap version 1
my $ratio;
@@ -3676,35 +4093,34 @@
my $symbols = {};
- # Map each PC value to the containing library
+ # Map each PC value to the containing library. To make this faster,
+ # we sort libraries by their starting pc value (highest first), and
+ # advance through the libraries as we advance the pc. Sometimes the
+ # addresses of libraries may overlap with the addresses of the main
+ # binary, so to make sure the libraries 'win', we iterate over the
+ # libraries in reverse order (binary will have the lowest start addr).
my @pcs = (sort { $a cmp $b } keys(%{$pcset}));
- foreach my $lib (reverse sort {$a->[1] cmp $b->[1]} @{$libs}) {
+ foreach my $lib (sort {$b->[1] cmp $a->[1]} @{$libs}) {
my $libname = $lib->[0];
my $start = $lib->[1];
my $finish = $lib->[2];
my $offset = $lib->[3];
# Get list of pcs that belong in this library.
- my $pc = pop(@pcs);
- my @pcs2 = ();
my $contained = [];
- while (defined $pc && $pc gt $finish) {
- unshift(@pcs2, $pc);
- $pc = pop(@pcs);
+ my ($start_pc_index, $finish_pc_index);
+ for ($finish_pc_index = $#pcs + 1; $finish_pc_index > 0;
+ $finish_pc_index--) {
+ last if $pcs[$finish_pc_index - 1] le $finish;
}
- while (defined $pc && $pc ge $start) {
- push(@{$contained}, $pc);
- $pc = pop(@pcs);
+ for ($start_pc_index = $finish_pc_index; $start_pc_index > 0;
+ $start_pc_index--) {
+ last if $pcs[$start_pc_index - 1] lt $start;
}
- if (defined $pc) {
- push(@pcs, $pc);
- }
- @pcs = (@pcs, @pcs2);
+ @{$contained} = splice(@pcs, $start_pc_index,
+ $finish_pc_index - $start_pc_index);
# Map to symbols
MapToSymbols($libname, AddressSub($start, $offset), $contained, $symbols);
- if (scalar(@pcs) == 0) {
- last;
- }
}
return $symbols;
@@ -3732,8 +4148,7 @@
# If "addr2line" isn't installed on the system at all, just use
# nm to get what info we can (function names, but not line numbers).
- if ($main::opt_lines == 0 || system("$addr2line --help >/dev/null 2>&1")
- != 0) {
+ if (system("$addr2line --help >/dev/null 2>&1") != 0) {
MapSymbolsWithNM($image, $offset, $pclist, $symbols);
return;
}
@@ -3919,6 +4334,8 @@
if ($file_type =~ /Mach-O/) {
# OS X uses otool to examine Mach-O files, rather than objdump.
$obj_tool_map{"otool"} = "otool";
+ $obj_tool_map{"addr2line"} = "false"; # no addr2line
+ $obj_tool_map{"objdump"} = "false"; # no objdump
}
# Go fill in %obj_tool_map with the pathnames to use:
@@ -3935,18 +4352,27 @@
my $tool = shift;
my $path;
- if ($main::opt_tools ne "") {
- # Use a prefix specified by the --tools option...
- $path = $main::opt_tools . $tool;
- if (!-x $path) {
- error("No '$tool' found with prefix specified by --tools $main::opt_tools\n");
+ # --tools (or $PPROF_TOOLS) is a comma separated list, where each
+ # item is either a) a pathname prefix, or b) a map of the form
+ # <tool>:<path>. First we look for an entry of type (b) for our
+ # tool. If one is found, we use it. Otherwise, we consider all the
+ # pathname prefixes in turn, until one yields an existing file. If
+ # none does, we use a default path.
+ my $tools = $main::opt_tools || $ENV{"PPROF_TOOLS"} || "";
+ if ($tools =~ m/(,|^)\Q$tool\E:([^,]*)/) {
+ $path = $2;
+ # TODO(csilvers): sanity-check that $path exists? Hard if it's relative.
+ } elsif ($tools ne '') {
+ foreach my $prefix (split(',', $tools)) {
+ next if ($prefix =~ /:/); # ignore "tool:fullpath" entries in the list
+ if (-x $prefix . $tool) {
+ $path = $prefix . $tool;
+ last;
+ }
}
- } elsif (exists $ENV{"PPROF_TOOLS"} &&
- $ENV{"PPROF_TOOLS"} ne "") {
- #... or specified with the PPROF_TOOLS environment variable...
- $path = $ENV{"PPROF_TOOLS"} . $tool;
- if (!-x $path) {
- error("No '$tool' found with prefix specified by PPROF_TOOLS=$ENV{PPROF_TOOLS}\n");
+ if (!$path) {
+ error("No '$tool' found with prefix specified by " .
+ "--tools (or \$PPROF_TOOLS) '$tools'\n");
}
} else {
# ... otherwise use the version that exists in the same directory as
@@ -3965,9 +4391,8 @@
sub cleanup {
unlink($main::tmpfile_sym);
- for (my $i = 0; $i < $main::next_tmpfile; $i++) {
- unlink(PsTempName($i));
- }
+ unlink(keys %main::tempnames);
+
# We leave any collected profiles in $HOME/pprof in case the user wants
# to look at them later. We print a message informing them of this.
if ((scalar(@main::profile_files) > 0) &&
@@ -4010,7 +4435,7 @@
my $routine = "";
while (<NM>) {
s/\r//g; # turn windows-looking lines into unix-looking lines
- if (m/^([0-9a-f]+) (.) (..*)/) {
+ if (m/^\s*([0-9a-f]+) (.) (..*)/) {
my $start_val = $1;
my $type = $2;
my $this_routine = $3;
@@ -4072,7 +4497,6 @@
$symbol_table->{$routine} = [HexExtend($last_start),
HexExtend($last_start)];
}
-
return $symbol_table;
}
@@ -4120,7 +4544,11 @@
my @nm_commands = ("$nm -n $flatten_flag $demangle_flag" .
" $image 2>/dev/null $cppfilt_flag",
"$nm -D -n $flatten_flag $demangle_flag" .
- " $image 2>/dev/null $cppfilt_flag");
+ " $image 2>/dev/null $cppfilt_flag",
+ # 6nm is for Go binaries
+ "6nm $image 2>/dev/null | sort",
+ );
+
# If the executable is an MS Windows PDB-format executable, we'll
# have set up obj_tool_map("nm_pdb"). In this case, we actually
# want to use both unix nm and windows-specific nm_pdb, since
diff --git a/jemalloc/configure.ac b/jemalloc/configure.ac
index ce6e679..0ed1373 100644
--- a/jemalloc/configure.ac
+++ b/jemalloc/configure.ac
@@ -150,7 +150,7 @@
[attribute])
if test "x${attribute}" = "xyes" ; then
AC_DEFINE([JEMALLOC_HAVE_ATTR], [ ])
- if test "x$GCC" = "xyes" ; then
+ if test "x$GCC" = "xyes" -a "${abi}" = "xelf"; then
JE_CFLAGS_APPEND([-fvisibility=internal])
fi
fi
@@ -166,17 +166,20 @@
*-*-darwin*)
CFLAGS="$CFLAGS -fno-common -no-cpp-precomp"
abi="macho"
+ AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE])
RPATH=""
;;
*-*-freebsd*)
CFLAGS="$CFLAGS"
abi="elf"
+ AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE])
RPATH="-Wl,-rpath,"
;;
*-*-linux*)
CFLAGS="$CFLAGS"
CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
abi="elf"
+ AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED])
RPATH="-Wl,-rpath,"
;;
*-*-netbsd*)
@@ -191,6 +194,7 @@
[CFLAGS="$CFLAGS"; abi="elf"],
[abi="aout"])
AC_MSG_RESULT([$abi])
+ AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE])
RPATH="-Wl,-rpath,"
;;
*-*-solaris2*)
@@ -245,12 +249,20 @@
AC_ARG_WITH([jemalloc_prefix],
[AS_HELP_STRING([--with-jemalloc-prefix=<prefix>], [Prefix to prepend to all public APIs])],
[JEMALLOC_PREFIX="$with_jemalloc_prefix"],
- [JEMALLOC_PREFIX=]
+ [if test "x$abi" != "xmacho" ; then
+ JEMALLOC_PREFIX=""
+else
+ JEMALLOC_PREFIX="je_"
+fi]
)
if test "x$JEMALLOC_PREFIX" != "x" ; then
- AC_DEFINE([JEMALLOC_PREFIX], [ ])
+ JEMALLOC_CPREFIX=`echo ${JEMALLOC_PREFIX} | tr "a-z" "A-Z"`
+ AC_DEFINE_UNQUOTED([JEMALLOC_PREFIX], ["$JEMALLOC_PREFIX"])
+ AC_DEFINE_UNQUOTED([JEMALLOC_CPREFIX], ["$JEMALLOC_CPREFIX"])
jemalloc_prefix="$JEMALLOC_PREFIX"
+ jemalloc_cprefix="$JEMALLOC_CPREFIX"
AC_SUBST([jemalloc_prefix])
+ AC_SUBST([jemalloc_cprefix])
AC_DEFINE_UNQUOTED([JEMALLOC_P(string_that_no_one_should_want_to_use_as_a_jemalloc_API_prefix)], [${JEMALLOC_PREFIX}##string_that_no_one_should_want_to_use_as_a_jemalloc_API_prefix])
fi
@@ -266,14 +278,17 @@
cfgoutputs_in="${srcroot}Makefile.in ${srcroot}doc/jemalloc.3.in"
cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/jemalloc.h.in"
cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/internal/jemalloc_internal.h.in"
+cfgoutputs_in="${cfgoutputs_in} ${srcroot}test/jemalloc_test.h.in"
cfgoutputs_out="Makefile doc/jemalloc${install_suffix}.3"
cfgoutputs_out="${cfgoutputs_out} include/jemalloc/jemalloc${install_suffix}.h"
cfgoutputs_out="${cfgoutputs_out} include/jemalloc/internal/jemalloc_internal.h"
+cfgoutputs_out="${cfgoutputs_out} test/jemalloc_test.h"
cfgoutputs_tup="Makefile doc/jemalloc${install_suffix}.3:doc/jemalloc.3.in"
cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/jemalloc${install_suffix}.h:include/jemalloc/jemalloc.h.in"
cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/internal/jemalloc_internal.h"
+cfgoutputs_tup="${cfgoutputs_tup} test/jemalloc_test.h:test/jemalloc_test.h.in"
cfghdrs_in="${srcroot}include/jemalloc/jemalloc_defs.h.in"
@@ -281,6 +296,23 @@
cfghdrs_tup="include/jemalloc/jemalloc_defs${install_suffix}.h:include/jemalloc/jemalloc_defs.h.in"
+dnl Do not silence irrelevant compiler warnings by default, since enabling this
+dnl option incurs a performance penalty.
+AC_ARG_ENABLE([cc-silence],
+ [AS_HELP_STRING([--enable-cc-silence],
+ [Silence irrelevant compiler warnings])],
+[if test "x$enable_cc_silence" = "xno" ; then
+ enable_cc_silence="0"
+else
+ enable_cc_silence="1"
+fi
+],
+[enable_cc_silence="0"]
+)
+if test "x$enable_cc_silence" = "x1" ; then
+ AC_DEFINE([JEMALLOC_CC_SILENCE])
+fi
+
dnl Do not compile with debugging by default.
AC_ARG_ENABLE([debug],
[AS_HELP_STRING([--enable-debug], [Build debugging code])],
@@ -294,8 +326,18 @@
)
if test "x$enable_debug" = "x1" ; then
AC_DEFINE([JEMALLOC_DEBUG], [ ])
+ AC_DEFINE([JEMALLOC_IVSALLOC], [ ])
fi
AC_SUBST([enable_debug])
+if test "x$enable_debug" = "x0" ; then
+ roff_debug=".\\\" "
+ roff_no_debug=""
+else
+ roff_debug=""
+ roff_no_debug=".\\\" "
+fi
+AC_SUBST([roff_debug])
+AC_SUBST([roff_no_debug])
dnl Only optimize if not debugging.
if test "x$enable_debug" = "x0" -a "x$no_CFLAGS" = "xyes" ; then
@@ -379,7 +421,47 @@
fi,
LUNWIND="-lunwind"
)
-dnl Finish prof-related definitions below, once TLS configuration is done.
+if test "x$enable_prof" = "x1" ; then
+ LIBS="$LIBS -lm"
+ AC_DEFINE([JEMALLOC_PROF], [ ])
+ if test "x$enable_prof_libunwind" = "x1" ; then
+ AC_CHECK_HEADERS([libunwind.h], , [enable_prof_libunwind="0"])
+ if test "x$LUNWIND" = "x-lunwind" ; then
+ AC_CHECK_LIB([unwind], [backtrace], [LIBS="$LIBS $LUNWIND"],
+ [enable_prof_libunwind="0"])
+ else
+ LIBS="$LIBS $LUNWIND"
+ fi
+ if test "x${enable_prof_libunwind}" = "x1" ; then
+ AC_DEFINE([JEMALLOC_PROF_LIBUNWIND], [ ])
+ fi
+ fi
+fi
+AC_SUBST([enable_prof])
+if test "x$enable_prof" = "x0" ; then
+ roff_prof=".\\\" "
+ roff_no_prof=""
+else
+ roff_prof=""
+ roff_no_prof=".\\\" "
+fi
+AC_SUBST([roff_prof])
+AC_SUBST([roff_no_prof])
+
+dnl If libunwind isn't enabled, try to use libgcc rather than gcc intrinsics
+dnl for backtracing.
+if test "x$enable_prof" = "x1" -a "x$enable_prof_libgcc" = "x1" ; then
+ if test "x$enable_prof_libunwind" = "x0" -a "x$GCC" = "xyes" ; then
+ enable_prof_libgcc="1"
+ AC_CHECK_HEADERS([unwind.h], , [enable_prof_libgcc="0"])
+ AC_CHECK_LIB([gcc], [_Unwind_Backtrace], [LIBS="$LIBS -lgcc"], [enable_prof_libgcc="0"])
+ if test "x${enable_prof_libgcc}" = "x1" ; then
+ AC_DEFINE([JEMALLOC_PROF_LIBGCC], [ ])
+ fi
+ else
+ enable_prof_libgcc="0"
+ fi
+fi
dnl Enable tiny allocations by default.
AC_ARG_ENABLE([tiny],
@@ -417,7 +499,19 @@
],
[enable_tcache="1"]
)
-dnl Finish tcache-related definitions below, once TLS configuration is done.
+if test "x$enable_tcache" = "x1" ; then
+ AC_DEFINE([JEMALLOC_TCACHE], [ ])
+fi
+AC_SUBST([enable_tcache])
+if test "x$enable_tcache" = "x0" ; then
+ roff_tcache=".\\\" "
+ roff_no_tcache=""
+else
+ roff_tcache=""
+ roff_no_tcache=".\\\" "
+fi
+AC_SUBST([roff_tcache])
+AC_SUBST([roff_no_tcache])
dnl Do not enable mmap()ped swap files by default.
AC_ARG_ENABLE([swap],
@@ -579,7 +673,7 @@
dnl Set VERSION if source directory has an embedded git repository.
if test -d "${srcroot}../.git" ; then
- git describe --long > ${srcroot}VERSION
+ git describe --long --abbrev=40 > ${srcroot}VERSION
fi
jemalloc_version=`cat ${srcroot}VERSION`
jemalloc_version_major=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]1}'`
@@ -647,69 +741,63 @@
AC_MSG_RESULT([no])
enable_tls="0")
fi
+AC_SUBST([enable_tls])
if test "x${enable_tls}" = "x0" ; then
AC_DEFINE_UNQUOTED([NO_TLS], [ ])
fi
-dnl Finish tcache-related definitions, now that TLS configuration is done.
-if test "x$enable_tls" = "x0" ; then
- enable_tcache="0"
-fi
-if test "x$enable_tcache" = "x1" ; then
- AC_DEFINE([JEMALLOC_TCACHE], [ ])
-fi
-AC_SUBST([enable_tcache])
-if test "x$enable_tcache" = "x0" ; then
- roff_tcache=".\\\" "
- roff_no_tcache=""
-else
- roff_tcache=""
- roff_no_tcache=".\\\" "
-fi
-AC_SUBST([roff_tcache])
-AC_SUBST([roff_no_tcache])
+dnl ============================================================================
+dnl Check for allocator-related functions that should be wrapped.
-dnl Finish prof-related definitions, now that TLS configuration is done.
-if test "x$enable_tls" = "x0" ; then
- enable_prof="0"
-fi
-if test "x$enable_prof" = "x1" ; then
- LIBS="$LIBS -lm"
- AC_DEFINE([JEMALLOC_PROF], [ ])
- if test "x$enable_prof_libunwind" = "x1" ; then
- AC_CHECK_HEADERS([libunwind.h], , [enable_prof_libunwind="0"])
- if test "x$LUNWIND" = "x-lunwind" ; then
- AC_CHECK_LIB([unwind], [backtrace], [LIBS="$LIBS $LUNWIND"],
- [enable_prof_libunwind="0"])
- else
- LIBS="$LIBS $LUNWIND"
- fi
- if test "x${enable_prof_libunwind}" = "x1" ; then
- AC_DEFINE([JEMALLOC_PROF_LIBUNWIND], [ ])
- fi
- fi
-fi
-AC_SUBST([enable_prof])
-if test "x$enable_prof" = "x0" ; then
- roff_prof=".\\\" "
- roff_no_prof=""
-else
- roff_prof=""
- roff_no_prof=".\\\" "
-fi
-AC_SUBST([roff_prof])
-AC_SUBST([roff_no_prof])
+AC_CHECK_FUNC([memalign],
+ [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN])])
+AC_CHECK_FUNC([valloc],
+ [AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC])])
-dnl If libunwind isn't enabled, try to use libgcc rather than gcc intrinsics
-dnl for backtracing.
-if test "x$enable_prof" = "x1" -a "x$enable_prof_libunwind" = "x0" \
- -a "x$GCC" = "xyes" -a "x$enable_prof_libgcc" = "x1" ; then
- enable_prof_libgcc="1"
- AC_CHECK_HEADERS([unwind.h], , [enable_prof_libgcc="0"])
- AC_CHECK_LIB([gcc], [_Unwind_Backtrace], [LIBS="$LIBS -lgcc"], [enable_prof_libgcc="0"])
- if test "x${enable_prof_libgcc}" = "x1" ; then
- AC_DEFINE([JEMALLOC_PROF_LIBGCC], [ ])
- fi
+dnl ============================================================================
+dnl Darwin-related configuration.
+
+if test "x${abi}" = "xmacho" ; then
+ AC_DEFINE([JEMALLOC_IVSALLOC])
+ AC_DEFINE([JEMALLOC_ZONE])
+
+ dnl The szone version jumped from 3 to 6 between the OS X 10.5.x and 10.6
+ dnl releases. malloc_zone_t and malloc_introspection_t have new fields in
+ dnl 10.6, which is the only source-level indication of the change.
+ AC_MSG_CHECKING([malloc zone version])
+ AC_TRY_COMPILE([#include <stdlib.h>
+#include <malloc/malloc.h>], [
+ static malloc_zone_t zone;
+ static struct malloc_introspection_t zone_introspect;
+
+ zone.size = NULL;
+ zone.malloc = NULL;
+ zone.calloc = NULL;
+ zone.valloc = NULL;
+ zone.free = NULL;
+ zone.realloc = NULL;
+ zone.destroy = NULL;
+ zone.zone_name = "jemalloc_zone";
+ zone.batch_malloc = NULL;
+ zone.batch_free = NULL;
+ zone.introspect = &zone_introspect;
+ zone.version = 6;
+ zone.memalign = NULL;
+ zone.free_definite_size = NULL;
+
+ zone_introspect.enumerator = NULL;
+ zone_introspect.good_size = NULL;
+ zone_introspect.check = NULL;
+ zone_introspect.print = NULL;
+ zone_introspect.log = NULL;
+ zone_introspect.force_lock = NULL;
+ zone_introspect.force_unlock = NULL;
+ zone_introspect.statistics = NULL;
+ zone_introspect.zone_locked = NULL;
+], [AC_DEFINE_UNQUOTED([JEMALLOC_ZONE_VERSION], [6])
+ AC_MSG_RESULT([6])],
+ [AC_DEFINE_UNQUOTED([JEMALLOC_ZONE_VERSION], [3])
+ AC_MSG_RESULT([3])])
fi
dnl ============================================================================
@@ -755,9 +843,11 @@
AC_MSG_RESULT([JEMALLOC_PREFIX : ${JEMALLOC_PREFIX}])
AC_MSG_RESULT([install_suffix : ${install_suffix}])
AC_MSG_RESULT([autogen : ${enable_autogen}])
+AC_MSG_RESULT([cc-silence : ${enable_cc_silence}])
AC_MSG_RESULT([debug : ${enable_debug}])
AC_MSG_RESULT([stats : ${enable_stats}])
AC_MSG_RESULT([prof : ${enable_prof}])
+AC_MSG_RESULT([prof-libgcc : ${enable_prof_libgcc}])
AC_MSG_RESULT([prof-libunwind : ${enable_prof_libunwind}])
AC_MSG_RESULT([tiny : ${enable_tiny}])
AC_MSG_RESULT([tcache : ${enable_tcache}])
@@ -768,4 +858,5 @@
AC_MSG_RESULT([dss : ${enable_dss}])
AC_MSG_RESULT([dynamic_page_shift : ${enable_dynamic_page_shift}])
AC_MSG_RESULT([lazy_lock : ${enable_lazy_lock}])
+AC_MSG_RESULT([tls : ${enable_tls}])
AC_MSG_RESULT([===============================================================================])
diff --git a/jemalloc/doc/jemalloc.3.in b/jemalloc/doc/jemalloc.3.in
index cf5cb5e..6286664 100644
--- a/jemalloc/doc/jemalloc.3.in
+++ b/jemalloc/doc/jemalloc.3.in
@@ -38,8 +38,8 @@
.\" @(#)malloc.3 8.1 (Berkeley) 6/4/93
.\" $FreeBSD: head/lib/libc/stdlib/malloc.3 182225 2008-08-27 02:00:53Z jasone $
.\"
-.Dd April 2, 2010
-.Dt JEMALLOC 3
+.Dd October 24, 2010
+.Dt jemalloc 3
.Os
.Sh NAME
.Nm @jemalloc_prefix@malloc ,
@@ -51,13 +51,24 @@
.Nm @jemalloc_prefix@malloc_stats_print ,
.Nm @jemalloc_prefix@mallctl ,
.Nm @jemalloc_prefix@mallctlnametomib ,
-.Nm @jemalloc_prefix@mallctlbymib
+.Nm @jemalloc_prefix@mallctlbymib ,
+.Nm @jemalloc_prefix@allocm ,
+.Nm @jemalloc_prefix@rallocm ,
+.Nm @jemalloc_prefix@sallocm ,
+.Nm @jemalloc_prefix@dallocm
.Nd general purpose memory allocation functions
.Sh LIBRARY
.Sy libjemalloc@install_suffix@
+.Pp
+This manual describes jemalloc @jemalloc_version@.
+More information can be found at the
+.UR http://\:www.canonware.com/\:jemalloc/
+jemalloc website
+.UE .
.Sh SYNOPSIS
.In stdlib.h
.In jemalloc/jemalloc@install_suffix@.h
+.Ss Standard API
.Ft void *
.Fn @jemalloc_prefix@malloc "size_t size"
.Ft void *
@@ -68,6 +79,7 @@
.Fn @jemalloc_prefix@realloc "void *ptr" "size_t size"
.Ft void
.Fn @jemalloc_prefix@free "void *ptr"
+.Ss Non-standard API
.Ft size_t
.Fn @jemalloc_prefix@malloc_usable_size "const void *ptr"
.Ft void
@@ -79,10 +91,20 @@
.Ft int
.Fn @jemalloc_prefix@mallctlbymib "const size_t *mib" "size_t miblen" "void *oldp" "size_t *oldlenp" "void *newp" "size_t newlen"
.Ft const char *
-.Va @jemalloc_prefix@malloc_options ;
+.Va @jemalloc_prefix@malloc_conf ;
.Ft void
.Fn \*(lp*@jemalloc_prefix@malloc_message\*(rp "void *cbopaque" "const char *s"
+.Ss Experimental API
+.Ft int
+.Fn @jemalloc_prefix@allocm "void **ptr" "size_t *rsize" "size_t size" "int flags"
+.Ft int
+.Fn @jemalloc_prefix@rallocm "void **ptr" "size_t *rsize" "size_t size" "size_t extra" "int flags"
+.Ft int
+.Fn @jemalloc_prefix@sallocm "const void *ptr" "size_t *rsize" "int flags"
+.Ft int
+.Fn @jemalloc_prefix@dallocm "void *ptr" "int flags"
.Sh DESCRIPTION
+.Ss Standard API
The
.Fn @jemalloc_prefix@malloc
function allocates
@@ -158,7 +180,7 @@
is
.Dv NULL ,
no action occurs.
-.Pp
+.Ss Non-standard API
The
.Fn @jemalloc_prefix@malloc_usable_size
function returns the usable size of the allocation pointed to by
@@ -289,255 +311,130 @@
/* Do something with bin_size... */
}
.Ed
-.Sh TUNING
-Once, when the first call is made to one of these memory allocation
-routines, various flags will be set or reset, which affects the
-workings of this allocator implementation.
+.Ss Experimental API
+The experimental API is subject to change or removal without regard for
+backward compatibility.
.Pp
The
-.Dq name
-of the file referenced by the symbolic link named
-.Pa /etc/jemalloc.conf ,
-the value of the environment variable
-.Ev JEMALLOC_OPTIONS ,
-and the string pointed to by the global variable
-.Va @jemalloc_prefix@malloc_options
-will be interpreted, in that order, from left to right as flags.
-.Pp
-Each flag is a single letter, optionally prefixed by a non-negative base 10
-integer repetition count.
-For example,
-.Dq 3N
-is equivalent to
-.Dq NNN .
-Some flags control parameter magnitudes, where uppercase increases the
-magnitude, and lowercase decreases the magnitude.
-Other flags control boolean parameters, where uppercase indicates that a
-behavior is set, or on, and lowercase means that a behavior is not set, or off.
-.Bl -tag -width indent
-.It A
-All warnings (except for the warning about unknown
-flags being set) become fatal.
-The process will call
-.Xr abort 3
-in these cases.
-@roff_prof@.It B
-@roff_prof@Double/halve the maximum backtrace depth when profiling memory
-@roff_prof@allocation activity.
-@roff_prof@The default is 4.
-.It C
-Double/halve the size of the maximum size class that is a multiple of the
-cacheline size (64).
-Above this size, subpage spacing (256 bytes) is used for size classes.
-The default value is 512 bytes.
-.It D
-Halve/double the per-arena minimum ratio of active to dirty pages.
-Some dirty unused pages may be allowed to accumulate, within the limit set by
-the ratio (or one chunk worth of dirty pages, whichever is greater), before
-informing the kernel about some of those pages via
-.Xr madvise 2 .
-This provides the kernel with sufficient information to recycle dirty pages if
-physical memory becomes scarce and the pages remain unused.
-The default minimum ratio is 32:1;
-.Ev JEMALLOC_OPTIONS=6D
-will disable dirty page purging.
-@roff_prof@.It E
-@roff_prof@Activate/deactivate profiling.
-@roff_prof@This is a secondary control mechanism that makes it possible to
-@roff_prof@start the application with profiling enabled (see the
-@roff_prof@.Dq F
-@roff_prof@option) but inactive, then toggle profiling at any time during
-@roff_prof@program execution with the
-@roff_prof@.Dq prof.active
-@roff_prof@mallctl.
-@roff_prof@This option is enabled by default.
-@roff_prof@.It F
-@roff_prof@Profile memory allocation activity, and use an
-@roff_prof@.Xr atexit 3
-@roff_prof@function to dump final memory usage to a file named according to
-@roff_prof@the pattern
-@roff_prof@.Pa <prefix>.<pid>.<seq>.f.heap ,
-@roff_prof@where
-@roff_prof@.Pa <prefix>
-@roff_prof@is controlled by the
-@roff_prof@JEMALLOC_PROF_PREFIX
-@roff_prof@environment variable.
-@roff_prof@See the
-@roff_prof@.Dq B
-@roff_prof@option for backtrace depth control.
-@roff_prof@See the
-@roff_prof@.Dq E
-@roff_prof@option for on-the-fly activation/deactivation.
-@roff_prof@See the
-@roff_prof@.Dq S
-@roff_prof@option for probabilistic sampling control.
-@roff_prof@See the
-@roff_prof@.Dq I
-@roff_prof@option for information on interval-triggered profile dumping, and the
-@roff_prof@.Dq U
-@roff_prof@option for information on high-water-triggered profile dumping.
-@roff_prof@Profile output is compatible with the included pprof Perl script,
-@roff_prof@which originates from the google-perftools package
-@roff_prof@(http://code.google.com/p/google-perftools/).
-@roff_tcache@.It G
-@roff_tcache@Double/halve the approximate interval (counted in terms of
-@roff_tcache@thread-specific cache allocation/deallocation events) between full
-@roff_tcache@thread-specific cache garbage collection sweeps.
-@roff_tcache@Garbage collection is actually performed incrementally, one size
-@roff_tcache@class at a time, in order to avoid large collection pauses.
-@roff_tcache@The default sweep interval is 8192;
-@roff_tcache@.Ev JEMALLOC_OPTIONS=14g
-@roff_tcache@will disable garbage collection.
-@roff_tcache@.It H
-@roff_tcache@Enable/disable thread-specific caching.
-@roff_tcache@When there are multiple threads, each thread uses a
-@roff_tcache@thread-specific cache for objects up to a certain size.
-@roff_tcache@Thread-specific caching allows many allocations to be satisfied
-@roff_tcache@without performing any thread synchronization, at the cost of
-@roff_tcache@increased memory use.
-@roff_tcache@See the
-@roff_tcache@.Dq G
-@roff_tcache@and
-@roff_tcache@.Dq M
-@roff_tcache@options for related tuning information.
-@roff_tcache@This option is enabled by default.
-@roff_prof@.It I
-@roff_prof@Double/halve the average interval between memory profile dumps, as
-@roff_prof@measured in bytes of allocation activity.
-@roff_prof@The actual interval between dumps may be sporadic because
-@roff_prof@decentralized allocation counters are used to avoid synchronization
-@roff_prof@bottlenecks.
-@roff_prof@Profiles are dumped to files named according to the pattern
-@roff_prof@.Pa <prefix>.<pid>.<seq>.i<iseq>.heap ,
-@roff_prof@where
-@roff_prof@.Pa <prefix>
-@roff_prof@is controlled by the
-@roff_prof@JEMALLOC_PROF_PREFIX
-@roff_prof@environment variable.
-@roff_prof@The default average interval is 1 GiB;
-@roff_prof@.Ev JEMALLOC_OPTIONS=31i
-@roff_prof@will disable interval-triggered profile dumping.
-@roff_fill@.It J
-@roff_fill@Each byte of new memory allocated by
-@roff_fill@.Fn @jemalloc_prefix@malloc
-@roff_fill@or
-@roff_fill@.Fn @jemalloc_prefix@realloc
-@roff_fill@will be initialized to 0xa5.
-@roff_fill@All memory returned by
-@roff_fill@.Fn @jemalloc_prefix@free
-@roff_fill@or
-@roff_fill@.Fn @jemalloc_prefix@realloc
-@roff_fill@will be initialized to 0x5a.
-@roff_fill@This is intended for debugging and will impact performance
-@roff_fill@negatively.
-.It K
-Double/halve the virtual memory chunk size.
-The default chunk size is 4 MiB.
-@roff_prof@.It L
-@roff_prof@Use an
-@roff_prof@.Xr atexit 3
-@roff_prof@function to report memory leaks.
-@roff_prof@See the
-@roff_prof@.Dq B
-@roff_prof@option for backtrace depth control.
-@roff_prof@See the
-@roff_prof@.Dq F option for information on analyzing heap profile output.
-@roff_prof@This option is disabled by default.
-@roff_tcache@.It M
-@roff_tcache@Double/halve the maximum size class to cache.
-@roff_tcache@At a minimum, all small size classes are cached, and at a maximum
-@roff_tcache@all large size classes are cached.
-@roff_tcache@The default maximum is 32 KiB.
-.It N
-Double/halve the number of arenas.
-The default number of arenas is four times the number of CPUs, or one if there
-is a single CPU.
-@roff_swap@.It O
-@roff_swap@Over-commit memory as a side effect of using anonymous
-@roff_swap@.Xr mmap 2
-@roff_swap@@roff_dss@ and
-@roff_swap@@roff_dss@.Xr sbrk 2
-@roff_swap@for virtual memory allocation.
-@roff_swap@In order for overcommit to be disabled, the
-@roff_swap@.Dq swap.fds
-@roff_swap@mallctl must have been successfully written to.
-@roff_swap@This option is enabled by default.
-.It P
-The
-.Fn malloc_stats_print
-function is called at program exit via an
-.Xr atexit 3
-function.
-@roff_stats@This has the potential to cause deadlock for a multi-threaded
-@roff_stats@process that exits while one or more threads are executing in the
-@roff_stats@memory allocation functions.
-@roff_stats@Therefore, this option should only be used with care; it is
-@roff_stats@primarily intended as a performance tuning aid during application
-@roff_stats@development.
-.It Q
-Double/halve the size of the maximum size class that is a multiple of the
-quantum (8 or 16 bytes, depending on architecture).
-Above this size, cacheline spacing is used for size classes.
-The default value is 128 bytes.
-@roff_prof@.It S
-@roff_prof@Double/halve the average interval between allocation samples, as
-@roff_prof@measured in bytes of allocation activity.
-@roff_prof@Increasing the sampling interval decreases profile fidelity, but
-@roff_prof@also decreases the computational overhead.
-@roff_prof@The default sample interval is one (i.e. all allocations are
-@roff_prof@sampled).
-@roff_prof@.It U
-@roff_prof@Trigger a memory profile dump every time the total virtual memory
-@roff_prof@exceeds the previous maximum.
-@roff_prof@Profiles are dumped to files named according to the pattern
-@roff_prof@.Pa <prefix>.<pid>.<seq>.u<useq>.heap ,
-@roff_prof@where
-@roff_prof@.Pa <prefix>
-@roff_prof@is controlled by the
-@roff_prof@JEMALLOC_PROF_PREFIX
-@roff_prof@environment variable.
-@roff_prof@This option is disabled by default.
-@roff_sysv@.It V
-@roff_sysv@Attempting to allocate zero bytes will return a
-@roff_sysv@.Dv NULL
-@roff_sysv@pointer instead of a valid pointer.
-@roff_sysv@(The default behavior is to make a minimal allocation and return a
-@roff_sysv@pointer to it.)
-@roff_sysv@This option is provided for System V compatibility.
-@roff_sysv@@roff_xmalloc@This option is incompatible with the
-@roff_sysv@@roff_xmalloc@.Dq X
-@roff_sysv@@roff_xmalloc@option.
-@roff_xmalloc@.It X
-@roff_xmalloc@Rather than return failure for any allocation function, display a
-@roff_xmalloc@diagnostic message on
-@roff_xmalloc@.Dv STDERR_FILENO
-@roff_xmalloc@and cause the program to drop core (using
-@roff_xmalloc@.Xr abort 3 ) .
-@roff_xmalloc@This option should be set at compile time by including the
-@roff_xmalloc@following in the source code:
-@roff_xmalloc@.Bd -literal -offset indent
-@roff_xmalloc@@jemalloc_prefix@malloc_options = "X";
-@roff_xmalloc@.Ed
-@roff_fill@.It Z
-@roff_fill@Each byte of new memory allocated by
-@roff_fill@.Fn @jemalloc_prefix@malloc
-@roff_fill@or
-@roff_fill@.Fn @jemalloc_prefix@realloc
-@roff_fill@will be initialized to 0.
-@roff_fill@Note that this initialization only happens once for each byte, so
-@roff_fill@.Fn @jemalloc_prefix@realloc
-@roff_fill@calls do not zero memory that was previously allocated.
-@roff_fill@This is intended for debugging and will impact performance
-@roff_fill@negatively.
+.Fn @jemalloc_prefix@allocm ,
+.Fn @jemalloc_prefix@rallocm ,
+.Fn @jemalloc_prefix@sallocm ,
+and
+.Fn @jemalloc_prefix@dallocm
+functions all have a
+.Fa flags
+argument that can be used to specify options.
+The functions only check the options that are contextually relevant.
+Use bitwise or (|) operations to specify one or more of the following:
+.Bl -tag -width ".Dv ALLOCM_LG_ALIGN(la)"
+.It ALLOCM_LG_ALIGN(la)
+Align the memory allocation to start at an address that is a multiple of
+(1 <<
+.Fa la ) .
+This macro does not validate that
+.Fa la
+is within the valid range.
+.It ALLOCM_ALIGN(a)
+Align the memory allocation to start at an address that is a multiple of
+.Fa a ,
+where
+.Fa a
+is a power of two.
+This macro does not validate that
+.Fa a
+is a power of 2.
+.It ALLOCM_ZERO
+Initialize newly allocated memory to contain zero bytes.
+In the growing reallocation case, the real size prior to reallocation defines
+the boundary between untouched bytes and those that are initialized to contain
+zero bytes.
+If this option is absent, newly allocated memory is uninitialized.
+.It ALLOCM_NO_MOVE
+For reallocation, fail rather than moving the object.
+This constraint can apply to both growth and shrinkage.
.El
.Pp
-@roff_fill@The
-@roff_fill@.Dq J
-@roff_fill@and
-@roff_fill@.Dq Z
-@roff_fill@options are intended for testing and debugging.
-@roff_fill@An application which changes its behavior when these options are used
-@roff_fill@is flawed.
+The
+.Fn @jemalloc_prefix@allocm
+function allocates at least
+.Fa size
+bytes of memory, sets
+.Fa *ptr
+to the base address of the allocation, and sets
+.Fa *rsize
+to the real size of the allocation if
+.Fa rsize
+is not
+.Dv NULL .
+.Pp
+The
+.Fn @jemalloc_prefix@rallocm
+function resizes the allocation at
+.Fa *ptr
+to be at least
+.Fa size
+bytes, sets
+.Fa *ptr
+to the base address of the allocation if it moved, and sets
+.Fa *rsize
+to the real size of the allocation if
+.Fa rsize
+is not
+.Dv NULL .
+If
+.Fa extra
+is non-zero, an attempt is made to resize the allocation to be at least
+.Fa ( size
++
+.Fa extra )
+bytes, though inability to allocate the extra byte(s) will not by itself result
+in failure.
+Behavior is undefined if
+.Fa ( size
++
+.Fa extra
+>
+.Dv SIZE_T_MAX ) .
+.Pp
+The
+.Fn @jemalloc_prefix@sallocm
+function sets
+.Fa *rsize
+to the real size of the allocation.
+.Pp
+The
+.Fn @jemalloc_prefix@dallocm
+function causes the memory referenced by
+.Fa ptr
+to be made available for future allocations.
+.Sh TUNING
+Once, when the first call is made to one of the memory allocation routines, the
+allocator initializes its internals based in part on various options that can
+be specified at compile- or run-time.
+.Pp
+The string pointed to by the global variable
+.Va @jemalloc_prefix@malloc_conf ,
+the
+.Dq name
+of the file referenced by the symbolic link named
+.Pa /etc/@jemalloc_prefix@malloc.conf ,
+and the value of the environment variable
+.Ev @jemalloc_cprefix@MALLOC_CONF ,
+will be interpreted, in that order, from left to right as options.
+.Pp
+An options string is a comma-separated list of option:value pairs.
+There is one key corresponding to each
+.Dq opt.*
+mallctl.
+For example,
+.Dq abort:true,narenas:1
+sets the
+.Dq opt.abort
+and
+.Dq opt.narenas
+options.
+Some options have boolean values (true/false), others have integer values (base
+8, 10, or 16, depending on prefix), and yet others have raw string values.
.Sh IMPLEMENTATION NOTES
@roff_dss@Traditionally, allocators have used
@roff_dss@.Xr sbrk 2
@@ -564,8 +461,8 @@
does not make much use of the allocation functions.
.Pp
@roff_tcache@In addition to multiple arenas, this allocator supports
-@roff_tcache@thread-specific caching for small objects, in order to make it
-@roff_tcache@possible to completely avoid synchronization for most small
+@roff_tcache@thread-specific caching for small and large objects, in order to
+@roff_tcache@make it possible to completely avoid synchronization for most
@roff_tcache@allocation requests.
@roff_tcache@Such caching allows very fast allocation in the common case, but it
@roff_tcache@increases memory usage and fragmentation, since a bounded number of
@@ -594,27 +491,27 @@
determine all metadata regarding small and large allocations in constant time.
.Pp
Small objects are managed in groups by page runs.
-Each run maintains a bitmap that tracks which regions are in use.
+Each run maintains a frontier and free list to track which regions are in use.
@roff_tiny@Allocation requests that are no more than half the quantum (8 or 16,
@roff_tiny@depending on architecture) are rounded up to the nearest power of
@roff_tiny@two.
Allocation requests that are
@roff_tiny@more than half the quantum, but
no more than the minimum cacheline-multiple size class (see the
-.Dq Q
+.Dq opt.lg_qspace_max
option) are rounded up to the nearest multiple of the
@roff_tiny@quantum.
@roff_no_tiny@quantum (8 or 16, depending on architecture).
Allocation requests that are more than the minimum cacheline-multiple size
class, but no more than the minimum subpage-multiple size class (see the
-.Dq C
+.Dq opt.lg_cspace_max
option) are rounded up to the nearest multiple of the cacheline size (64).
Allocation requests that are more than the minimum subpage-multiple size class,
but no more than the maximum subpage-multiple size class are rounded up to the
nearest multiple of the subpage size (256).
Allocation requests that are more than the maximum subpage-multiple size class,
but small enough to fit in an arena-managed chunk (see the
-.Dq K
+.Dq opt.lg_chunk
option), are rounded up to the nearest run size.
Allocation requests that are too large to fit in an arena-managed chunk are
rounded up to the nearest multiple of the chunk size.
@@ -623,10 +520,33 @@
multi-threaded applications.
If you need to assure that allocations do not suffer from cacheline sharing,
round your allocation requests up to the nearest multiple of the cacheline
-size.
+size, or specify cacheline alignment when allocating.
+.Pp
+Assuming 4 MiB chunks, 4 KiB pages, and a 16-byte quantum on a 64-bit system,
+the size classes in each category are as follows:
+.\"-----------------------------------------------------------------------------
+.TS
+allbox tab(;);
+LLL
+LLL
+^LL
+^LL
+^LL
+LsL
+LsL.
+Category;Subcategory;Size
+@roff_tiny@Small;Tiny;[8]
+@roff_no_tiny@Small;Tiny;[disabled]
+;Quantum-spaced;[16, 32, 48, ..., 128]
+;Cacheline-spaced;[192, 256, 320, ..., 512]
+;Sub-page-spaced;[768, 1024, 1280, ..., 3840]
+Large;[4 KiB, 8 KiB, 12 KiB, ..., 4072 KiB]
+Huge;[4 MiB, 8 MiB, 12 MiB, ...]
+.TE
+.\"-----------------------------------------------------------------------------
.Sh MALLCTL NAMESPACE
The following names are defined in the namespace accessible via the
-.Fn mallctl*
+.Fn @jemalloc_prefix@mallctl*
functions.
Value types are specified in parentheses, and their readable/writable statuses
are encoded as rw, r-, -w, or --.
@@ -638,6 +558,10 @@
@roff_stats@<i> equal to
@roff_stats@.Dq arenas.narenas
@roff_stats@can be used to access the summation of statistics from all arenas.
+.Pp
+Take special note of the
+.Dq epoch
+mallctl, which controls refreshing of cached dynamic statistics.
.Bl -ohang
.\"-----------------------------------------------------------------------------
.It Sy "version (const char *) r-"
@@ -648,27 +572,12 @@
.It Sy "epoch (uint64_t) rw"
.Bd -ragged -offset indent -compact
If a value is passed in, refresh the data from which the
-.Fn mallctl*
+.Fn @jemalloc_prefix@mallctl*
functions report values, and increment the epoch.
Return the current epoch.
This is useful for detecting whether another thread caused a refresh.
.Ed
.\"-----------------------------------------------------------------------------
-@roff_tcache@.It Sy "tcache.flush (void) --"
-@roff_tcache@.Bd -ragged -offset indent -compact
-@roff_tcache@Flush calling thread's tcache.
-@roff_tcache@This interface releases all cached objects and internal data
-@roff_tcache@structures associated with the calling thread's thread-specific
-@roff_tcache@cache.
-@roff_tcache@Ordinarily, this interface need not be called, since automatic
-@roff_tcache@periodic incremental garbage collection occurs, and the thread
-@roff_tcache@cache is automatically discarded when a thread exits.
-@roff_tcache@However, garbage collection is triggered by allocation activity,
-@roff_tcache@so it is possible for a thread that stops allocating/deallocating
-@roff_tcache@to retain its cache indefinitely, in which case the developer may
-@roff_tcache@find manual flushing useful.
-.Ed
-.\"-----------------------------------------------------------------------------
.It Sy "config.debug (bool) r-"
.Bd -ragged -offset indent -compact
--enable-debug was specified during build configuration.
@@ -746,129 +655,386 @@
.\"-----------------------------------------------------------------------------
.It Sy "opt.abort (bool) r-"
.Bd -ragged -offset indent -compact
-See the
-.Dq A
-option.
+Abort-on-warning enabled/disabled.
+If true, most warnings are fatal.
+The process will call
+.Xr abort 3
+in these cases.
+This option is
+@roff_debug@enabled
+@roff_no_debug@disabled
+by default.
.Ed
.\"-----------------------------------------------------------------------------
-@roff_fill@.It Sy "opt.junk (bool) r-"
-@roff_fill@.Bd -ragged -offset indent -compact
-@roff_fill@See the
-@roff_fill@.Dq J
-@roff_fill@option.
-@roff_fill@.Ed
-.\"-----------------------------------------------------------------------------
-@roff_fill@.It Sy "opt.zero (bool) r-"
-@roff_fill@.Bd -ragged -offset indent -compact
-@roff_fill@See the
-@roff_fill@.Dq Z
-@roff_fill@option.
-@roff_fill@.Ed
-.\"-----------------------------------------------------------------------------
-@roff_xmalloc@.It Sy "opt.xmalloc (bool) r-"
-@roff_xmalloc@.Bd -ragged -offset indent -compact
-@roff_xmalloc@See the
-@roff_xmalloc@.Dq X
-@roff_xmalloc@option.
-@roff_xmalloc@.Ed
-.\"-----------------------------------------------------------------------------
-@roff_tcache@.It Sy "opt.tcache (bool) r-"
-@roff_tcache@.Bd -ragged -offset indent -compact
-@roff_tcache@See the
-@roff_tcache@.Dq H
-@roff_tcache@option.
-@roff_tcache@.Ed
-.\"-----------------------------------------------------------------------------
-@roff_tcache@.It Sy "opt.lg_tcache_gc_sweep (ssize_t) r-"
-@roff_tcache@.Bd -ragged -offset indent -compact
-@roff_tcache@See the
-@roff_tcache@.Dq G
-@roff_tcache@option.
-@roff_tcache@.Ed
-.\"-----------------------------------------------------------------------------
-.It Sy "opt.stats_print (bool) r-"
-.Bd -ragged -offset indent -compact
-See the
-.Dq P
-option.
-.Ed
-.\"-----------------------------------------------------------------------------
-@roff_prof@.It Sy "opt.prof (bool) r-"
-@roff_prof@.Bd -ragged -offset indent -compact
-@roff_prof@See the
-@roff_prof@.Dq F
-@roff_prof@option.
-@roff_prof@.Ed
-.\"-----------------------------------------------------------------------------
-@roff_prof@.It Sy "opt.lg_prof_bt_max (size_t) r-"
-@roff_prof@.Bd -ragged -offset indent -compact
-@roff_prof@See the
-@roff_prof@.Dq B
-@roff_prof@option.
-@roff_prof@.Ed
-.\"-----------------------------------------------------------------------------
-@roff_prof@.It Sy "opt.lg_prof_interval (ssize_t) r-"
-@roff_prof@.Bd -ragged -offset indent -compact
-@roff_prof@See the
-@roff_prof@.Dq I
-@roff_prof@option.
-@roff_prof@.Ed
-.\"-----------------------------------------------------------------------------
-@roff_prof@.It Sy "opt.prof_udump (bool) r-"
-@roff_prof@.Bd -ragged -offset indent -compact
-@roff_prof@See the
-@roff_prof@.Dq U
-@roff_prof@option.
-@roff_prof@.Ed
-.\"-----------------------------------------------------------------------------
-@roff_prof@.It Sy "opt.prof_leak (bool) r-"
-@roff_prof@.Bd -ragged -offset indent -compact
-@roff_prof@See the
-@roff_prof@.Dq L
-@roff_prof@option.
-@roff_prof@.Ed
-.\"-----------------------------------------------------------------------------
.It Sy "opt.lg_qspace_max (size_t) r-"
.Bd -ragged -offset indent -compact
-See the
-.Dq Q
-option.
+Size (log base 2) of the maximum size class that is a multiple of the quantum
+(8 or 16 bytes, depending on architecture).
+Above this size, cacheline spacing is used for size classes.
+The default value is 128 bytes (2^7).
.Ed
.\"-----------------------------------------------------------------------------
.It Sy "opt.lg_cspace_max (size_t) r-"
.Bd -ragged -offset indent -compact
-See the
-.Dq C
-option.
-.Ed
-.\"-----------------------------------------------------------------------------
-.It Sy "opt.lg_dirty_mult (ssize_t) r-"
-.Bd -ragged -offset indent -compact
-See the
-.Dq D
-option.
+Size (log base 2) of the maximum size class that is a multiple of the cacheline
+size (64).
+Above this size, subpage spacing (256 bytes) is used for size classes.
+The default value is 512 bytes (2^9).
.Ed
.\"-----------------------------------------------------------------------------
.It Sy "opt.lg_chunk (size_t) r-"
.Bd -ragged -offset indent -compact
-See the
-.Dq K
-option.
+Virtual memory chunk size (log base 2).
+The default chunk size is 4 MiB (2^22).
.Ed
.\"-----------------------------------------------------------------------------
+.It Sy "opt.narenas (size_t) r-"
+.Bd -ragged -offset indent -compact
+Maximum number of arenas to use.
+The default maximum number of arenas is four times the number of CPUs, or one
+if there is a single CPU.
+.Ed
+.\"-----------------------------------------------------------------------------
+.It Sy "opt.lg_dirty_mult (ssize_t) r-"
+.Bd -ragged -offset indent -compact
+Per-arena minimum ratio (log base 2) of active to dirty pages.
+Some dirty unused pages may be allowed to accumulate, within the limit set by
+the ratio (or one chunk worth of dirty pages, whichever is greater), before
+informing the kernel about some of those pages via
+.Xr madvise 2
+or a similar system call.
+This provides the kernel with sufficient information to recycle dirty pages if
+physical memory becomes scarce and the pages remain unused.
+The default minimum ratio is 32:1 (2^5:1); an option value of -1 will disable
+dirty page purging.
+.Ed
+.\"-----------------------------------------------------------------------------
+.It Sy "opt.stats_print (bool) r-"
+.Bd -ragged -offset indent -compact
+Enable/disable statistics printing at exit.
+If enabled, the
+.Fn @jemalloc_prefix@malloc_stats_print
+function is called at program exit via an
+.Xr atexit 3
+function.
+@roff_stats@This has the potential to cause deadlock for a multi-threaded
+@roff_stats@process that exits while one or more threads are executing in the
+@roff_stats@memory allocation functions.
+@roff_stats@Therefore, this option should only be used with care; it is
+@roff_stats@primarily intended as a performance tuning aid during application
+@roff_stats@development.
+This option is disabled by default.
+.Ed
+.\"-----------------------------------------------------------------------------
+@roff_fill@.It Sy "opt.junk (bool) r-"
+@roff_fill@.Bd -ragged -offset indent -compact
+@roff_fill@Junk filling enabled/disabled.
+@roff_fill@If enabled, each byte of uninitialized allocated memory will be
+@roff_fill@initialized to 0xa5.
+@roff_fill@All deallocated memory will be initialized to 0x5a.
+@roff_fill@This is intended for debugging and will impact performance
+@roff_fill@negatively.
+@roff_fill@This option is
+@roff_fill@@roff_debug@enabled
+@roff_fill@@roff_no_debug@disabled
+@roff_fill@by default.
+@roff_fill@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_fill@.It Sy "opt.zero (bool) r-"
+@roff_fill@.Bd -ragged -offset indent -compact
+@roff_fill@Zero filling enabled/disabled.
+@roff_fill@If enabled, each byte of uninitialized allocated memory will be
+@roff_fill@initialized to 0.
+@roff_fill@Note that this initialization only happens once for each byte, so
+@roff_fill@.Fn @jemalloc_prefix@realloc
+@roff_fill@calls do not zero memory that was previously allocated.
+@roff_fill@This is intended for debugging and will impact performance
+@roff_fill@negatively.
+@roff_fill@This option is disabled by default.
+@roff_fill@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_sysv@.It Sy "opt.sysv (bool) r-"
+@roff_sysv@.Bd -ragged -offset indent -compact
+@roff_sysv@If enabled, attempting to allocate zero bytes will return a
+@roff_sysv@.Dv NULL
+@roff_sysv@pointer instead of a valid pointer.
+@roff_sysv@(The default behavior is to make a minimal allocation and return a
+@roff_sysv@pointer to it.)
+@roff_sysv@This option is provided for System V compatibility.
+@roff_sysv@@roff_xmalloc@This option is incompatible with the
+@roff_sysv@@roff_xmalloc@.Dq opt.xmalloc
+@roff_sysv@@roff_xmalloc@option.
+@roff_sysv@This option is disabled by default.
+@roff_sysv@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_xmalloc@.It Sy "opt.xmalloc (bool) r-"
+@roff_xmalloc@.Bd -ragged -offset indent -compact
+@roff_xmalloc@Abort-on-out-of-memory enabled/disabled.
+@roff_xmalloc@If enabled, rather than returning failure for any allocation
+@roff_xmalloc@function, display a diagnostic message on
+@roff_xmalloc@.Dv STDERR_FILENO
+@roff_xmalloc@and cause the program to drop core (using
+@roff_xmalloc@.Xr abort 3 ) .
+@roff_xmalloc@If an application is designed to depend on this behavior, set the
+@roff_xmalloc@option at compile time by including the following in the source
+@roff_xmalloc@code:
+@roff_xmalloc@.Bd -literal -offset indent
+@roff_xmalloc@@jemalloc_prefix@malloc_conf = "xmalloc:true";
+@roff_xmalloc@.Ed
+@roff_xmalloc@.Pp
+@roff_xmalloc@This option is disabled by default.
+@roff_xmalloc@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_tcache@.It Sy "opt.tcache (bool) r-"
+@roff_tcache@.Bd -ragged -offset indent -compact
+@roff_tcache@Thread-specific caching enabled/disabled.
+@roff_tcache@When there are multiple threads, each thread uses a
+@roff_tcache@thread-specific cache for objects up to a certain size.
+@roff_tcache@Thread-specific caching allows many allocations to be satisfied
+@roff_tcache@without performing any thread synchronization, at the cost of
+@roff_tcache@increased memory use.
+@roff_tcache@See the
+@roff_tcache@.Dq opt.lg_tcache_gc_sweep
+@roff_tcache@and
+@roff_tcache@.Dq opt.tcache_max
+@roff_tcache@options for related tuning information.
+@roff_tcache@This option is enabled by default.
+@roff_tcache@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_tcache@.It Sy "opt.lg_tcache_gc_sweep (ssize_t) r-"
+@roff_tcache@.Bd -ragged -offset indent -compact
+@roff_tcache@Approximate interval (log base 2) between full thread-specific
+@roff_tcache@cache garbage collection sweeps, counted in terms of
+@roff_tcache@thread-specific cache allocation/deallocation events.
+@roff_tcache@Garbage collection is actually performed incrementally, one size
+@roff_tcache@class at a time, in order to avoid large collection pauses.
+@roff_tcache@The default sweep interval is 8192 (2^13); setting this option to
+@roff_tcache@-1 will disable garbage collection.
+@roff_tcache@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_tcache@.It Sy "opt.lg_tcache_max (size_t) r-"
+@roff_tcache@.Bd -ragged -offset indent -compact
+@roff_tcache@Maximum size class (log base 2) to cache in the thread-specific
+@roff_tcache@cache.
+@roff_tcache@At a minimum, all small size classes are cached, and at a maximum
+@roff_tcache@all large size classes are cached.
+@roff_tcache@The default maximum is 32 KiB (2^15).
+@roff_tcache@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_prof@.It Sy "opt.prof (bool) r-"
+@roff_prof@.Bd -ragged -offset indent -compact
+@roff_prof@Memory profiling enabled/disabled.
+@roff_prof@If enabled, profile memory allocation activity, and use an
+@roff_prof@.Xr atexit 3
+@roff_prof@function to dump final memory usage to a file named according to
+@roff_prof@the pattern
+@roff_prof@.Pa <prefix>.<pid>.<seq>.f.heap ,
+@roff_prof@where
+@roff_prof@.Pa <prefix>
+@roff_prof@is controlled by the
+@roff_prof@.Dq opt.prof_prefix
+@roff_prof@option.
+@roff_prof@See the
+@roff_prof@.Dq opt.lg_prof_bt_max
+@roff_prof@option for backtrace depth control.
+@roff_prof@See the
+@roff_prof@.Dq opt.prof_active
+@roff_prof@option for on-the-fly activation/deactivation.
+@roff_prof@See the
+@roff_prof@.Dq opt.lg_prof_sample
+@roff_prof@option for probabilistic sampling control.
+@roff_prof@See the
+@roff_prof@.Dq opt.prof_accum
+@roff_prof@option for control of cumulative sample reporting.
+@roff_prof@See the
+@roff_prof@.Dq opt.lg_prof_tcmax
+@roff_prof@option for control of per thread backtrace caching.
+@roff_prof@See the
+@roff_prof@.Dq opt.lg_prof_interval
+@roff_prof@option for information on interval-triggered profile dumping, and the
+@roff_prof@.Dq opt.prof_gdump
+@roff_prof@option for information on high-water-triggered profile dumping.
+@roff_prof@Profile output is compatible with the included pprof Perl script,
+@roff_prof@which originates from the
+@roff_prof@.UR http://\:code.google.com/\:p/\:google-perftools/
+@roff_prof@google-perftools package
+@roff_prof@.UE .
+@roff_prof@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_prof@.It Sy "opt.prof_prefix (const char *) r-"
+@roff_prof@.Bd -ragged -offset indent -compact
+@roff_prof@Filename prefix for profile dumps.
+@roff_prof@If the prefix is set to the empty string, no automatic dumps will
+@roff_prof@occur; this is primarily useful for disabling the automatic final
+@roff_prof@heap dump (which also disables leak reporting, if enabled).
+@roff_prof@The default prefix is
+@roff_prof@.Pa jeprof .
+@roff_prof@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_prof@.It Sy "opt.lg_prof_bt_max (size_t) r-"
+@roff_prof@.Bd -ragged -offset indent -compact
+@roff_prof@Maximum backtrace depth (log base 2) when profiling memory
+@roff_prof@allocation activity.
+@roff_prof@The default is 128 (2^7).
+@roff_prof@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_prof@.It Sy "opt.prof_active (bool) r-"
+@roff_prof@.Bd -ragged -offset indent -compact
+@roff_prof@Profiling activated/deactivated.
+@roff_prof@This is a secondary control mechanism that makes it possible to
+@roff_prof@start the application with profiling enabled (see the
+@roff_prof@.Dq opt.prof
+@roff_prof@option) but inactive, then toggle profiling at any time during
+@roff_prof@program execution with the
+@roff_prof@.Dq prof.active
+@roff_prof@mallctl.
+@roff_prof@This option is enabled by default.
+@roff_prof@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_prof@.It Sy "opt.lg_prof_sample (ssize_t) r-"
+@roff_prof@.Bd -ragged -offset indent -compact
+@roff_prof@Average interval (log base 2) between allocation samples, as
+@roff_prof@measured in bytes of allocation activity.
+@roff_prof@Increasing the sampling interval decreases profile fidelity, but
+@roff_prof@also decreases the computational overhead.
+@roff_prof@The default sample interval is 1 (2^0) (i.e. all allocations are
+@roff_prof@sampled).
+@roff_prof@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_prof@.It Sy "opt.prof_accum (bool) r-"
+@roff_prof@.Bd -ragged -offset indent -compact
+@roff_prof@Reporting of cumulative object/byte counts in profile dumps
+@roff_prof@enabled/disabled.
+@roff_prof@If this option is enabled, every unique backtrace must be stored for
+@roff_prof@the duration of execution.
+@roff_prof@Depending on the application, this can impose a large memory
+@roff_prof@overhead, and the cumulative counts are not always of interest.
+@roff_prof@See the
+@roff_prof@.Dq opt.lg_prof_tcmax
+@roff_prof@option for control of per thread backtrace caching, which has
+@roff_prof@important interactions.
+@roff_prof@This option is enabled by default.
+@roff_prof@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_prof@.It Sy "opt.lg_prof_tcmax (ssize_t) r-"
+@roff_prof@.Bd -ragged -offset indent -compact
+@roff_prof@Maximum per thread backtrace cache (log base 2) used for heap
+@roff_prof@profiling.
+@roff_prof@A backtrace can only be discarded if the
+@roff_prof@.Dq opt.prof_accum
+@roff_prof@option is disabled, and no thread caches currently refer to the
+@roff_prof@backtrace.
+@roff_prof@Therefore, a backtrace cache limit should be imposed if the
+@roff_prof@intention is to limit how much memory is used by backtraces.
+@roff_prof@By default, no limit is imposed (encoded as -1).
+@roff_prof@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_prof@.It Sy "opt.lg_prof_interval (ssize_t) r-"
+@roff_prof@.Bd -ragged -offset indent -compact
+@roff_prof@Average interval (log base 2) between memory profile dumps, as
+@roff_prof@measured in bytes of allocation activity.
+@roff_prof@The actual interval between dumps may be sporadic because
+@roff_prof@decentralized allocation counters are used to avoid synchronization
+@roff_prof@bottlenecks.
+@roff_prof@Profiles are dumped to files named according to the pattern
+@roff_prof@.Pa <prefix>.<pid>.<seq>.i<iseq>.heap ,
+@roff_prof@where
+@roff_prof@.Pa <prefix>
+@roff_prof@is controlled by the
+@roff_prof@.Dq opt.prof_prefix
+@roff_prof@option.
+@roff_prof@By default, interval-triggered profile dumping is disabled (encoded
+@roff_prof@as -1).
+@roff_prof@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_prof@.It Sy "opt.prof_gdump (bool) r-"
+@roff_prof@.Bd -ragged -offset indent -compact
+@roff_prof@Trigger a memory profile dump every time the total virtual memory
+@roff_prof@exceeds the previous maximum.
+@roff_prof@Profiles are dumped to files named according to the pattern
+@roff_prof@.Pa <prefix>.<pid>.<seq>.u<useq>.heap ,
+@roff_prof@where
+@roff_prof@.Pa <prefix>
+@roff_prof@is controlled by the
+@roff_prof@.Dq opt.prof_prefix
+@roff_prof@option.
+@roff_prof@This option is disabled by default.
+@roff_prof@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_prof@.It Sy "opt.prof_leak (bool) r-"
+@roff_prof@.Bd -ragged -offset indent -compact
+@roff_prof@Leak reporting enabled/disabled.
+@roff_prof@If enabled, use an
+@roff_prof@.Xr atexit 3
+@roff_prof@function to report memory leaks detected by allocation sampling.
+@roff_prof@See the
+@roff_prof@.Dq opt.lg_prof_bt_max
+@roff_prof@option for backtrace depth control.
+@roff_prof@See the
+@roff_prof@.Dq opt.prof
+@roff_prof@option for information on analyzing heap profile output.
+@roff_prof@This option is disabled by default.
+@roff_prof@.Ed
+.\"-----------------------------------------------------------------------------
.It Sy "opt.overcommit (bool) r-"
.Bd -ragged -offset indent -compact
-See the
-.Dq O
-option.
+@roff_swap@Over-commit enabled/disabled.
+@roff_swap@If enabled, over-commit memory as a side effect of using anonymous
+@roff_swap@.Xr mmap 2
+@roff_swap@@roff_dss@ and
+@roff_swap@@roff_dss@.Xr sbrk 2
+@roff_swap@for virtual memory allocation.
+@roff_swap@In order for overcommit to be disabled, the
+@roff_swap@.Dq swap.fds
+@roff_swap@mallctl must have been successfully written to.
+@roff_swap@This option is enabled by default.
.Ed
.\"-----------------------------------------------------------------------------
+@roff_tcache@.It Sy "tcache.flush (void) --"
+@roff_tcache@.Bd -ragged -offset indent -compact
+@roff_tcache@Flush calling thread's tcache.
+@roff_tcache@This interface releases all cached objects and internal data
+@roff_tcache@structures associated with the calling thread's thread-specific
+@roff_tcache@cache.
+@roff_tcache@Ordinarily, this interface need not be called, since automatic
+@roff_tcache@periodic incremental garbage collection occurs, and the thread
+@roff_tcache@cache is automatically discarded when a thread exits.
+@roff_tcache@However, garbage collection is triggered by allocation activity,
+@roff_tcache@so it is possible for a thread that stops allocating/deallocating
+@roff_tcache@to retain its cache indefinitely, in which case the developer may
+@roff_tcache@find manual flushing useful.
+.Ed
+.\"-----------------------------------------------------------------------------
+.It Sy "thread.arena (unsigned) rw"
+.Bd -ragged -offset indent -compact
+Get or set the arena associated with the calling thread.
+The arena index must be less than the maximum number of arenas (see the
+.Dq arenas.narenas
+mallctl).
+If the specified arena was not initialized beforehand (see the
+.Dq arenas.initialized
+mallctl), it will be automatically initialized as a side effect of calling this
+interface.
+.Ed
+.\"-----------------------------------------------------------------------------
+@roff_stats@.It Sy "thread.allocated (uint64_t) r-"
+@roff_stats@.Bd -ragged -offset indent -compact
+@roff_stats@Get the total number of bytes ever allocated by the calling thread.
+@roff_stats@This counter has the potential to wrap around; it is up to the
+@roff_stats@application to appropriately interpret the counter in such cases.
+@roff_stats@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_stats@.It Sy "thread.deallocated (uint64_t) r-"
+@roff_stats@.Bd -ragged -offset indent -compact
+@roff_stats@Get the total number of bytes ever deallocated by the calling
+@roff_stats@thread.
+@roff_stats@This counter has the potential to wrap around; it is up to the
+@roff_stats@application to appropriately interpret the counter in such cases.
+@roff_stats@.Ed
+.\"-----------------------------------------------------------------------------
.It Sy "arenas.narenas (unsigned) r-"
.Bd -ragged -offset indent -compact
Maximum number of arenas.
-See the
-.Dq N
-option.
.Ed
.\"-----------------------------------------------------------------------------
.It Sy "arenas.initialized (bool *) r-"
@@ -1004,11 +1170,17 @@
Maximum size supported by this large size class.
.Ed
.\"-----------------------------------------------------------------------------
+.It Sy "arenas.purge (unsigned) -w"
+.Bd -ragged -offset indent -compact
+Purge unused dirty pages for the specified arena, or for all arenas if none is
+specified.
+.Ed
+.\"-----------------------------------------------------------------------------
@roff_prof@.It Sy "prof.active (bool) rw"
@roff_prof@.Bd -ragged -offset indent -compact
@roff_prof@Control whether sampling is currently active.
@roff_prof@See the
-@roff_prof@.Dq E
+@roff_prof@.Dq opt.prof_active
@roff_prof@option for additional information.
@roff_prof@.Ed
.\"-----------------------------------------------------------------------------
@@ -1020,8 +1192,8 @@
@roff_prof@where
@roff_prof@.Pa <prefix>
@roff_prof@is controlled by the
-@roff_prof@JEMALLOC_PROF_PREFIX
-@roff_prof@environment variable.
+@roff_prof@.Dq opt.prof_prefix
+@roff_prof@option.
@roff_prof@.Ed
.\"-----------------------------------------------------------------------------
@roff_prof@.It Sy "prof.interval (uint64_t) r-"
@@ -1029,7 +1201,7 @@
@roff_prof@Average number of bytes allocated between inverval-based profile
@roff_prof@dumps.
@roff_prof@See the
-@roff_prof@.Dq I
+@roff_prof@.Dq opt.lg_prof_interval
@roff_prof@option for additional information.
@roff_prof@.Ed
.\"-----------------------------------------------------------------------------
@@ -1283,10 +1455,9 @@
.\"-----------------------------------------------------------------------------
.El
.Sh DEBUGGING MALLOC PROBLEMS
-The first thing to do is to set the
-.Dq A
-option.
-This option forces a coredump (if possible) at the first sign of trouble,
+Start by setting the
+.Dq opt.abort
+option, which forces a coredump (if possible) at the first sign of trouble,
rather than the normal policy of trying to continue if at all possible.
.Pp
It is probably also a good idea to recompile the program with suitable
@@ -1294,22 +1465,22 @@
.Pp
@roff_fill@If the program starts to give unusual results, coredump or generally
@roff_fill@behave differently without emitting any of the messages mentioned in
-@roff_fill@the next section, it is likely because it depends on the storage
-@roff_fill@being filled with zero bytes.
+@roff_fill@the next section, it is likely because the program depends on the
+@roff_fill@storage being filled with zero bytes.
@roff_fill@Try running it with the
-@roff_fill@.Dq Z
+@roff_fill@.Dq opt.zero
@roff_fill@option set;
@roff_fill@if that improves the situation, this diagnosis has been confirmed.
@roff_fill@If the program still misbehaves,
@roff_fill@the likely problem is accessing memory outside the allocated area.
@roff_fill@.Pp
@roff_fill@Alternatively, if the symptoms are not easy to reproduce, setting the
-@roff_fill@.Dq J
+@roff_fill@.Dq opt.junk
@roff_fill@option may help provoke the problem.
@roff_fill@.Pp
-Unfortunately this implementation does not provide much detail about
-the problems it detects; the performance impact for storing such information
-would be prohibitive.
+This implementation does not provide much detail about the problems it detects,
+because the performance impact for storing such information would be
+prohibitive.
There are a number of allocator implementations available on the Internet
which focus on detecting and pinpointing problems by trading performance for
extra sanity checks and detailed diagnostics.
@@ -1319,8 +1490,8 @@
.Dv STDERR_FILENO .
Errors will result in the process dumping core.
If the
-.Dq A
-option is set, all warnings are treated as errors.
+.Dq opt.abort
+option is set, most warnings are treated as errors.
.Pp
The
.Va @jemalloc_prefix@malloc_message
@@ -1342,6 +1513,7 @@
All messages are prefixed by
.Dq <jemalloc>: .
.Sh RETURN VALUES
+.Ss Standard API
The
.Fn @jemalloc_prefix@malloc
and
@@ -1390,7 +1562,7 @@
The
.Fn @jemalloc_prefix@free
function returns no value.
-.Pp
+.Ss Non-standard API
The
.Fn @jemalloc_prefix@malloc_usable_size
function returns the usable size of the allocation pointed to by
@@ -1429,37 +1601,69 @@
A memory allocation failure occurred.
.It Bq Er EFAULT
An interface with side effects failed in some way not directly related to
-.Fn mallctl*
+.Fn @jemalloc_prefix@mallctl*
read/write processing.
.El
+.Ss Experimental API
+The
+.Fn @jemalloc_prefix@allocm ,
+.Fn @jemalloc_prefix@rallocm ,
+.Fn @jemalloc_prefix@sallocm ,
+and
+.Fn @jemalloc_prefix@dallocm
+functions return
+.Dv ALLOCM_SUCCESS
+on success; otherwise they return an error value.
+The
+.Fn @jemalloc_prefix@allocm
+and
+.Fn @jemalloc_prefix@rallocm
+functions will fail if:
+.Bl -tag -width ".Bq Er ALLOCM_ERR_OOM"
+.It Bq Er ALLOCM_ERR_OOM
+Out of memory.
+Insufficient contiguous memory was available to service the allocation request.
+The
+.Fn @jemalloc_prefix@allocm
+function additionally sets
+.Fa *ptr
+to
+.Dv NULL ,
+whereas the
+.Fn @jemalloc_prefix@rallocm
+function leaves
+.Fa *ptr
+unmodified.
+.El
+.Pp
+The
+.Fn @jemalloc_prefix@rallocm
+function will also fail if:
+.Bl -tag -width ".Bq Er ALLOCM_ERR_NOT_MOVED"
+.It Bq Er ALLOCM_ERR_NOT_MOVED
+.Dv ALLOCM_NO_MOVE
+was specified, but the reallocation request could not be serviced without
+moving the object.
+.El
.Sh ENVIRONMENT
-The following environment variables affect the execution of the allocation
+The following environment variable affects the execution of the allocation
functions:
-@roff_prof@.Bl -tag -width ".Ev JEMALLOC_PROF_PREFIX"
-@roff_no_prof@.Bl -tag -width ".Ev JEMALLOC_OPTIONS"
-.It Ev JEMALLOC_OPTIONS
+.Bl -tag -width ".Ev @jemalloc_cprefix@MALLOC_CONF"
+.It Ev @jemalloc_cprefix@MALLOC_CONF
If the environment variable
-.Ev JEMALLOC_OPTIONS
-is set, the characters it contains will be interpreted as flags to the
-allocation functions.
-@roff_prof@.It Ev JEMALLOC_PROF_PREFIX
-@roff_prof@If the environment variable
-@roff_prof@.Ev JEMALLOC_PROF_PREFIX
-@roff_prof@is set, use it as the filename prefix for profile dumps; otherwise
-@roff_prof@use
-@roff_prof@.Pa jeprof
-@roff_prof@as the prefix.
+.Ev @jemalloc_cprefix@MALLOC_CONF
+is set, the characters it contains will be interpreted as options.
.El
.Sh EXAMPLES
To dump core whenever a problem occurs:
.Pp
.Bd -literal -offset indent
-ln -s 'A' /etc/jemalloc.conf
+ln -s 'abort:true' /etc/@jemalloc_prefix@malloc.conf
.Ed
.Pp
-To specify in the source a chunk size that is twice the default:
+To specify in the source a chunk size that is 16 MiB:
.Bd -literal -offset indent
-@jemalloc_prefix@malloc_options = "K";
+@jemalloc_prefix@malloc_conf = "lg_chunk:24";
.Ed
.Sh SEE ALSO
.Xr madvise 2 ,
diff --git a/jemalloc/include/jemalloc/internal/arena.h b/jemalloc/include/jemalloc/internal/arena.h
index c1955f1..9556c2c 100644
--- a/jemalloc/include/jemalloc/internal/arena.h
+++ b/jemalloc/include/jemalloc/internal/arena.h
@@ -121,17 +121,17 @@
*
* p : run page offset
* s : run size
- * c : size class (used only if prof_promote is true)
+ * c : (binind+1) for size class (used only if prof_promote is true)
* x : don't care
* - : 0
* + : 1
- * [DZLA] : bit set
- * [dzla] : bit unset
+ * [DULA] : bit set
+ * [dula] : bit unset
*
* Unallocated (clean):
- * ssssssss ssssssss ssss---- ----dz--
- * xxxxxxxx xxxxxxxx xxxx---- -----Zxx
- * ssssssss ssssssss ssss---- ----dZ--
+ * ssssssss ssssssss ssss---- ----du--
+ * xxxxxxxx xxxxxxxx xxxx---- -----Uxx
+ * ssssssss ssssssss ssss---- ----dU--
*
* Unallocated (dirty):
* ssssssss ssssssss ssss---- ----D---
@@ -144,7 +144,7 @@
* pppppppp pppppppp pppp---- ----d--a
*
* Large:
- * ssssssss ssssssss ssss++++ ++++D-la
+ * ssssssss ssssssss ssss---- ----D-la
* xxxxxxxx xxxxxxxx xxxx---- ----xxxx
* -------- -------- -------- ----D-la
*
@@ -152,7 +152,7 @@
* ssssssss ssssssss sssscccc ccccD-la
*
* Large (not sampled, size == PAGE_SIZE):
- * ssssssss ssssssss ssss++++ ++++D-la
+ * ssssssss ssssssss ssss---- ----D-la
*/
size_t bits;
#ifdef JEMALLOC_PROF
@@ -161,7 +161,7 @@
#endif
#define CHUNK_MAP_FLAGS_MASK ((size_t)0xfU)
#define CHUNK_MAP_DIRTY ((size_t)0x8U)
-#define CHUNK_MAP_ZEROED ((size_t)0x4U)
+#define CHUNK_MAP_UNZEROED ((size_t)0x4U)
#define CHUNK_MAP_LARGE ((size_t)0x2U)
#define CHUNK_MAP_ALLOCATED ((size_t)0x1U)
#define CHUNK_MAP_KEY CHUNK_MAP_ALLOCATED
@@ -187,7 +187,12 @@
/* Number of dirty pages. */
size_t ndirty;
- /* Map of pages within chunk that keeps track of free/large/small. */
+ /*
+ * Map of pages within chunk that keeps track of free/large/small. The
+ * first map_bias entries are omitted, since the chunk header does not
+ * need to be tracked in the map. This omission saves a header page
+ * for common chunk sizes (e.g. 4 MiB).
+ */
arena_chunk_map_t map[1]; /* Dynamically sized. */
};
typedef rb_tree(arena_chunk_t) arena_chunk_tree_t;
@@ -416,8 +421,12 @@
extern size_t sspace_max;
#define small_maxclass sspace_max
-#define nlclasses (chunk_npages - arena_chunk_header_npages)
+#define nlclasses (chunk_npages - map_bias)
+void arena_purge_all(arena_t *arena);
+#ifdef JEMALLOC_PROF
+void arena_prof_accum(arena_t *arena, uint64_t accumbytes);
+#endif
#ifdef JEMALLOC_TCACHE
void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin,
size_t binind
@@ -426,20 +435,15 @@
# endif
);
#endif
-#ifdef JEMALLOC_PROF
-void arena_prof_accum(arena_t *arena, uint64_t accumbytes);
-#endif
void *arena_malloc_small(arena_t *arena, size_t size, bool zero);
void *arena_malloc_large(arena_t *arena, size_t size, bool zero);
void *arena_malloc(size_t size, bool zero);
-void *arena_palloc(arena_t *arena, size_t alignment, size_t size,
- size_t alloc_size);
+void *arena_palloc(arena_t *arena, size_t size, size_t alloc_size,
+ size_t alignment, bool zero);
size_t arena_salloc(const void *ptr);
#ifdef JEMALLOC_PROF
void arena_prof_promoted(const void *ptr, size_t size);
size_t arena_salloc_demote(const void *ptr);
-prof_ctx_t *arena_prof_ctx_get(const void *ptr);
-void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
#endif
void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
arena_chunk_map_t *mapelm);
@@ -449,7 +453,10 @@
arena_stats_t *astats, malloc_bin_stats_t *bstats,
malloc_large_stats_t *lstats);
#endif
-void *arena_ralloc(void *ptr, size_t size, size_t oldsize);
+void *arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size,
+ size_t extra, bool zero);
+void *arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
+ size_t alignment, bool zero);
bool arena_new(arena_t *arena, unsigned ind);
bool arena_boot(void);
@@ -458,10 +465,149 @@
#ifdef JEMALLOC_H_INLINES
#ifndef JEMALLOC_ENABLE_INLINE
+unsigned arena_run_regind(arena_run_t *run, arena_bin_t *bin,
+ const void *ptr, size_t size);
+# ifdef JEMALLOC_PROF
+prof_ctx_t *arena_prof_ctx_get(const void *ptr);
+void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
+# endif
void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
+JEMALLOC_INLINE unsigned
+arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr,
+ size_t size)
+{
+ unsigned shift, diff, regind;
+
+ assert(run->magic == ARENA_RUN_MAGIC);
+
+ /*
+ * Avoid doing division with a variable divisor if possible. Using
+ * actual division here can reduce allocator throughput by over 20%!
+ */
+ diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
+
+ /* Rescale (factor powers of 2 out of the numerator and denominator). */
+ shift = ffs(size) - 1;
+ diff >>= shift;
+ size >>= shift;
+
+ if (size == 1) {
+ /* The divisor was a power of 2. */
+ regind = diff;
+ } else {
+ /*
+ * To divide by a number D that is not a power of two we
+ * multiply by (2^21 / D) and then right shift by 21 positions.
+ *
+ * X / D
+ *
+ * becomes
+ *
+ * (X * size_invs[D - 3]) >> SIZE_INV_SHIFT
+ *
+ * We can omit the first three elements, because we never
+ * divide by 0, and 1 and 2 are both powers of two, which are
+ * handled above.
+ */
+#define SIZE_INV_SHIFT 21
+#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1)
+ static const unsigned size_invs[] = {
+ SIZE_INV(3),
+ SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
+ SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
+ SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
+ SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
+ SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
+ SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
+ SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
+ };
+
+ if (size <= ((sizeof(size_invs) / sizeof(unsigned)) + 2))
+ regind = (diff * size_invs[size - 3]) >> SIZE_INV_SHIFT;
+ else
+ regind = diff / size;
+#undef SIZE_INV
+#undef SIZE_INV_SHIFT
+ }
+ assert(diff == regind * size);
+ assert(regind < bin->nregs);
+
+ return (regind);
+}
+
+#ifdef JEMALLOC_PROF
+JEMALLOC_INLINE prof_ctx_t *
+arena_prof_ctx_get(const void *ptr)
+{
+ prof_ctx_t *ret;
+ arena_chunk_t *chunk;
+ size_t pageind, mapbits;
+
+ assert(ptr != NULL);
+ assert(CHUNK_ADDR2BASE(ptr) != ptr);
+
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
+ mapbits = chunk->map[pageind-map_bias].bits;
+ assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
+ if ((mapbits & CHUNK_MAP_LARGE) == 0) {
+ if (prof_promote)
+ ret = (prof_ctx_t *)(uintptr_t)1U;
+ else {
+ arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
+ (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
+ PAGE_SHIFT));
+ arena_bin_t *bin = run->bin;
+ unsigned regind;
+
+ assert(run->magic == ARENA_RUN_MAGIC);
+ regind = arena_run_regind(run, bin, ptr, bin->reg_size);
+ ret = *(prof_ctx_t **)((uintptr_t)run +
+ bin->ctx0_offset + (regind *
+ sizeof(prof_ctx_t *)));
+ }
+ } else
+ ret = chunk->map[pageind-map_bias].prof_ctx;
+
+ return (ret);
+}
+
+JEMALLOC_INLINE void
+arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
+{
+ arena_chunk_t *chunk;
+ size_t pageind, mapbits;
+
+ assert(ptr != NULL);
+ assert(CHUNK_ADDR2BASE(ptr) != ptr);
+
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
+ mapbits = chunk->map[pageind-map_bias].bits;
+ assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
+ if ((mapbits & CHUNK_MAP_LARGE) == 0) {
+ if (prof_promote == false) {
+ arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
+ (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
+ PAGE_SHIFT));
+ arena_bin_t *bin = run->bin;
+ unsigned regind;
+
+ assert(run->magic == ARENA_RUN_MAGIC);
+ regind = arena_run_regind(run, bin, ptr, bin->reg_size);
+
+ *((prof_ctx_t **)((uintptr_t)run + bin->ctx0_offset
+ + (regind * sizeof(prof_ctx_t *)))) = ctx;
+ } else
+ assert((uintptr_t)ctx == (uintptr_t)1U);
+ } else
+ chunk->map[pageind-map_bias].prof_ctx = ctx;
+}
+#endif
+
JEMALLOC_INLINE void
arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
{
@@ -474,8 +620,8 @@
assert(ptr != NULL);
assert(CHUNK_ADDR2BASE(ptr) != ptr);
- pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
- mapelm = &chunk->map[pageind];
+ pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
+ mapelm = &chunk->map[pageind-map_bias];
assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0);
if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) {
/* Small allocation. */
diff --git a/jemalloc/include/jemalloc/internal/chunk.h b/jemalloc/include/jemalloc/internal/chunk.h
index 1f6abf7..a60f0ad 100644
--- a/jemalloc/include/jemalloc/internal/chunk.h
+++ b/jemalloc/include/jemalloc/internal/chunk.h
@@ -39,13 +39,17 @@
extern chunk_stats_t stats_chunks;
#endif
+#ifdef JEMALLOC_IVSALLOC
+extern rtree_t *chunks_rtree;
+#endif
+
extern size_t chunksize;
extern size_t chunksize_mask; /* (chunksize - 1). */
extern size_t chunk_npages;
-extern size_t arena_chunk_header_npages;
+extern size_t map_bias; /* Number of arena chunk header pages. */
extern size_t arena_maxclass; /* Max size class for arenas. */
-void *chunk_alloc(size_t size, bool *zero);
+void *chunk_alloc(size_t size, bool base, bool *zero);
void chunk_dealloc(void *chunk, size_t size);
bool chunk_boot(void);
diff --git a/jemalloc/include/jemalloc/internal/chunk_mmap.h b/jemalloc/include/jemalloc/internal/chunk_mmap.h
index dc52448..07b50a4 100644
--- a/jemalloc/include/jemalloc/internal/chunk_mmap.h
+++ b/jemalloc/include/jemalloc/internal/chunk_mmap.h
@@ -13,6 +13,8 @@
void *chunk_alloc_mmap_noreserve(size_t size);
void chunk_dealloc_mmap(void *chunk, size_t size);
+bool chunk_mmap_boot(void);
+
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
diff --git a/jemalloc/include/jemalloc/internal/ckh.h b/jemalloc/include/jemalloc/internal/ckh.h
index c39ea5c..d4e391b 100644
--- a/jemalloc/include/jemalloc/internal/ckh.h
+++ b/jemalloc/include/jemalloc/internal/ckh.h
@@ -45,7 +45,7 @@
#endif
/* Used for pseudo-random number generation. */
-#define CKH_A 12345
+#define CKH_A 1103515241
#define CKH_C 12347
uint32_t prn_state;
diff --git a/jemalloc/include/jemalloc/internal/ctl.h b/jemalloc/include/jemalloc/internal/ctl.h
index 7bbf21e..8776ad1 100644
--- a/jemalloc/include/jemalloc/internal/ctl.h
+++ b/jemalloc/include/jemalloc/internal/ctl.h
@@ -82,9 +82,9 @@
#define xmallctl(name, oldp, oldlenp, newp, newlen) do { \
if (JEMALLOC_P(mallctl)(name, oldp, oldlenp, newp, newlen) \
!= 0) { \
- malloc_write("<jemalloc>: Invalid xmallctl(\""); \
+ malloc_write("<jemalloc>: Failure in xmallctl(\""); \
malloc_write(name); \
- malloc_write("\", ...) call\n"); \
+ malloc_write("\", ...)\n"); \
abort(); \
} \
} while (0)
@@ -92,9 +92,9 @@
#define xmallctlnametomib(name, mibp, miblenp) do { \
if (JEMALLOC_P(mallctlnametomib)(name, mibp, miblenp) != 0) { \
malloc_write( \
- "<jemalloc>: Invalid xmallctlnametomib(\""); \
+ "<jemalloc>: Failure in xmallctlnametomib(\""); \
malloc_write(name); \
- malloc_write("\", ...) call\n"); \
+ malloc_write("\", ...)\n"); \
abort(); \
} \
} while (0)
@@ -103,7 +103,7 @@
if (JEMALLOC_P(mallctlbymib)(mib, miblen, oldp, oldlenp, newp, \
newlen) != 0) { \
malloc_write( \
- "<jemalloc>: Invalid xmallctlbymib() call\n"); \
+ "<jemalloc>: Failure in xmallctlbymib()\n"); \
abort(); \
} \
} while (0)
diff --git a/jemalloc/include/jemalloc/internal/huge.h b/jemalloc/include/jemalloc/internal/huge.h
index 0c0582f..bf23127 100644
--- a/jemalloc/include/jemalloc/internal/huge.h
+++ b/jemalloc/include/jemalloc/internal/huge.h
@@ -20,8 +20,11 @@
extern malloc_mutex_t huge_mtx;
void *huge_malloc(size_t size, bool zero);
-void *huge_palloc(size_t alignment, size_t size);
-void *huge_ralloc(void *ptr, size_t size, size_t oldsize);
+void *huge_palloc(size_t size, size_t alignment, bool zero);
+void *huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size,
+ size_t extra);
+void *huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
+ size_t alignment, bool zero);
void huge_dalloc(void *ptr);
size_t huge_salloc(const void *ptr);
#ifdef JEMALLOC_PROF
diff --git a/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in b/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
index 2c3f32f..3d25300 100644
--- a/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
@@ -17,16 +17,29 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
+#include <stddef.h>
+#ifndef offsetof
+# define offsetof(type, member) ((size_t)&(((type *)NULL)->member))
+#endif
#include <inttypes.h>
#include <string.h>
#include <strings.h>
+#include <ctype.h>
#include <unistd.h>
#include <fcntl.h>
#include <pthread.h>
+#include <math.h>
#define JEMALLOC_MANGLE
#include "../jemalloc@install_suffix@.h"
+#ifdef JEMALLOC_ZONE
+#include <mach/mach_error.h>
+#include <mach/mach_init.h>
+#include <mach/vm_map.h>
+#include <malloc/malloc.h>
+#endif
+
#ifdef JEMALLOC_LAZY_LOCK
#include <dlfcn.h>
#endif
@@ -49,7 +62,7 @@
malloc_write("<jemalloc>: "); \
malloc_write(__FILE__); \
malloc_write(":"); \
- malloc_write(umax2s(__LINE__, 10, line_buf)); \
+ malloc_write(u2s(__LINE__, 10, line_buf)); \
malloc_write(": Failed assertion: "); \
malloc_write("\""); \
malloc_write(#e); \
@@ -77,6 +90,8 @@
/******************************************************************************/
#define JEMALLOC_H_TYPES
+#define ALLOCM_LG_ALIGN_MASK ((int)0x3f)
+
#define ZU(z) ((size_t)z)
#ifndef __DECONST
@@ -92,8 +107,8 @@
# define JEMALLOC_INLINE static inline
#endif
-/* Size of stack-allocated buffer passed to strerror_r(). */
-#define STRERROR_BUF 64
+/* Size of stack-allocated buffer passed to buferror(). */
+#define BUFERROR_BUF 64
/* Minimum alignment of allocations is 2^LG_QUANTUM bytes. */
#ifdef __i386__
@@ -159,6 +174,16 @@
#define STATIC_PAGE_SIZE ((size_t)(1U << STATIC_PAGE_SHIFT))
#define STATIC_PAGE_MASK ((size_t)(STATIC_PAGE_SIZE - 1))
+#ifdef PAGE_SHIFT
+# undef PAGE_SHIFT
+#endif
+#ifdef PAGE_SIZE
+# undef PAGE_SIZE
+#endif
+#ifdef PAGE_MASK
+# undef PAGE_MASK
+#endif
+
#ifdef DYNAMIC_PAGE_SHIFT
# define PAGE_SHIFT lg_pagesize
# define PAGE_SIZE pagesize
@@ -184,8 +209,12 @@
#include "jemalloc/internal/base.h"
#include "jemalloc/internal/chunk.h"
#include "jemalloc/internal/huge.h"
+#include "jemalloc/internal/rtree.h"
#include "jemalloc/internal/tcache.h"
#include "jemalloc/internal/hash.h"
+#ifdef JEMALLOC_ZONE
+#include "jemalloc/internal/zone.h"
+#endif
#include "jemalloc/internal/prof.h"
#undef JEMALLOC_H_TYPES
@@ -203,8 +232,12 @@
#include "jemalloc/internal/base.h"
#include "jemalloc/internal/chunk.h"
#include "jemalloc/internal/huge.h"
+#include "jemalloc/internal/rtree.h"
#include "jemalloc/internal/tcache.h"
#include "jemalloc/internal/hash.h"
+#ifdef JEMALLOC_ZONE
+#include "jemalloc/internal/zone.h"
+#endif
#include "jemalloc/internal/prof.h"
#undef JEMALLOC_H_STRUCTS
@@ -224,6 +257,7 @@
#ifdef JEMALLOC_FILL
extern bool opt_zero;
#endif
+extern size_t opt_narenas;
#ifdef DYNAMIC_PAGE_SHIFT
extern size_t pagesize;
@@ -240,8 +274,19 @@
* Map of pthread_self() --> arenas[???], used for selecting an arena to use
* for allocations.
*/
-extern __thread arena_t *arenas_map JEMALLOC_ATTR(tls_model("initial-exec"));
+extern __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec"));
+# define ARENA_GET() arenas_tls
+# define ARENA_SET(v) do { \
+ arenas_tls = (v); \
+} while (0)
+#else
+extern pthread_key_t arenas_tsd;
+# define ARENA_GET() ((arena_t *)pthread_getspecific(arenas_tsd))
+# define ARENA_SET(v) do { \
+ pthread_setspecific(arenas_tsd, (void *)(v)); \
+} while (0)
#endif
+
/*
* Arenas that are used to service external requests. Not all elements of the
* arenas array are necessarily used; arenas are created lazily as needed.
@@ -249,11 +294,56 @@
extern arena_t **arenas;
extern unsigned narenas;
-arena_t *arenas_extend(unsigned ind);
-#ifndef NO_TLS
-arena_t *choose_arena_hard(void);
+#ifdef JEMALLOC_STATS
+typedef struct {
+ uint64_t allocated;
+ uint64_t deallocated;
+} thread_allocated_t;
+# ifndef NO_TLS
+extern __thread thread_allocated_t thread_allocated_tls;
+# define ALLOCATED_GET() thread_allocated_tls.allocated
+# define DEALLOCATED_GET() thread_allocated_tls.deallocated
+# define ALLOCATED_ADD(a, d) do { \
+ thread_allocated_tls.allocated += a; \
+ thread_allocated_tls.deallocated += d; \
+} while (0)
+# else
+extern pthread_key_t thread_allocated_tsd;
+# define ALLOCATED_GET() \
+ (uint64_t)((pthread_getspecific(thread_allocated_tsd) != NULL) \
+ ? ((thread_allocated_t *) \
+ pthread_getspecific(thread_allocated_tsd))->allocated : 0)
+# define DEALLOCATED_GET() \
+ (uint64_t)((pthread_getspecific(thread_allocated_tsd) != NULL) \
+ ? ((thread_allocated_t \
+ *)pthread_getspecific(thread_allocated_tsd))->deallocated : \
+ 0)
+# define ALLOCATED_ADD(a, d) do { \
+ thread_allocated_t *thread_allocated = (thread_allocated_t *) \
+ pthread_getspecific(thread_allocated_tsd); \
+ if (thread_allocated != NULL) { \
+ thread_allocated->allocated += (a); \
+ thread_allocated->deallocated += (d); \
+ } else { \
+ thread_allocated = (thread_allocated_t *) \
+ imalloc(sizeof(thread_allocated_t)); \
+ if (thread_allocated != NULL) { \
+ pthread_setspecific(thread_allocated_tsd, \
+ thread_allocated); \
+ thread_allocated->allocated = (a); \
+ thread_allocated->deallocated = (d); \
+ } \
+ } \
+} while (0)
+# endif
#endif
+arena_t *arenas_extend(unsigned ind);
+arena_t *choose_arena_hard(void);
+int buferror(int errnum, char *buf, size_t buflen);
+void jemalloc_prefork(void);
+void jemalloc_postfork(void);
+
#include "jemalloc/internal/prn.h"
#include "jemalloc/internal/ckh.h"
#include "jemalloc/internal/stats.h"
@@ -265,8 +355,12 @@
#include "jemalloc/internal/base.h"
#include "jemalloc/internal/chunk.h"
#include "jemalloc/internal/huge.h"
+#include "jemalloc/internal/rtree.h"
#include "jemalloc/internal/tcache.h"
#include "jemalloc/internal/hash.h"
+#ifdef JEMALLOC_ZONE
+#include "jemalloc/internal/zone.h"
+#endif
#include "jemalloc/internal/prof.h"
#undef JEMALLOC_H_EXTERNS
@@ -285,11 +379,143 @@
#include "jemalloc/internal/huge.h"
#ifndef JEMALLOC_ENABLE_INLINE
+size_t pow2_ceil(size_t x);
+size_t s2u(size_t size);
+size_t sa2u(size_t size, size_t alignment, size_t *run_size_p);
void malloc_write(const char *s);
arena_t *choose_arena(void);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
+/* Compute the smallest power of 2 that is >= x. */
+JEMALLOC_INLINE size_t
+pow2_ceil(size_t x)
+{
+
+ x--;
+ x |= x >> 1;
+ x |= x >> 2;
+ x |= x >> 4;
+ x |= x >> 8;
+ x |= x >> 16;
+#if (LG_SIZEOF_PTR == 3)
+ x |= x >> 32;
+#endif
+ x++;
+ return (x);
+}
+
+/*
+ * Compute usable size that would result from allocating an object with the
+ * specified size.
+ */
+JEMALLOC_INLINE size_t
+s2u(size_t size)
+{
+
+ if (size <= small_maxclass)
+ return arenas[0]->bins[small_size2bin[size]].reg_size;
+ if (size <= arena_maxclass)
+ return PAGE_CEILING(size);
+ return CHUNK_CEILING(size);
+}
+
+/*
+ * Compute usable size that would result from allocating an object with the
+ * specified size and alignment.
+ */
+JEMALLOC_INLINE size_t
+sa2u(size_t size, size_t alignment, size_t *run_size_p)
+{
+ size_t usize;
+
+ /*
+ * Round size up to the nearest multiple of alignment.
+ *
+ * This done, we can take advantage of the fact that for each small
+ * size class, every object is aligned at the smallest power of two
+ * that is non-zero in the base two representation of the size. For
+ * example:
+ *
+ * Size | Base 2 | Minimum alignment
+ * -----+----------+------------------
+ * 96 | 1100000 | 32
+ * 144 | 10100000 | 32
+ * 192 | 11000000 | 64
+ *
+ * Depending on runtime settings, it is possible that arena_malloc()
+ * will further round up to a power of two, but that never causes
+ * correctness issues.
+ */
+ usize = (size + (alignment - 1)) & (-alignment);
+ /*
+ * (usize < size) protects against the combination of maximal
+ * alignment and size greater than maximal alignment.
+ */
+ if (usize < size) {
+ /* size_t overflow. */
+ return (0);
+ }
+
+ if (usize <= arena_maxclass && alignment <= PAGE_SIZE) {
+ if (usize <= small_maxclass) {
+ return
+ (arenas[0]->bins[small_size2bin[usize]].reg_size);
+ }
+ return (PAGE_CEILING(usize));
+ } else {
+ size_t run_size;
+
+ /*
+ * We can't achieve subpage alignment, so round up alignment
+ * permanently; it makes later calculations simpler.
+ */
+ alignment = PAGE_CEILING(alignment);
+ usize = PAGE_CEILING(size);
+ /*
+ * (usize < size) protects against very large sizes within
+ * PAGE_SIZE of SIZE_T_MAX.
+ *
+ * (usize + alignment < usize) protects against the
+ * combination of maximal alignment and usize large enough
+ * to cause overflow. This is similar to the first overflow
+ * check above, but it needs to be repeated due to the new
+ * usize value, which may now be *equal* to maximal
+ * alignment, whereas before we only detected overflow if the
+ * original size was *greater* than maximal alignment.
+ */
+ if (usize < size || usize + alignment < usize) {
+ /* size_t overflow. */
+ return (0);
+ }
+
+ /*
+ * Calculate the size of the over-size run that arena_palloc()
+ * would need to allocate in order to guarantee the alignment.
+ */
+ if (usize >= alignment)
+ run_size = usize + alignment - PAGE_SIZE;
+ else {
+ /*
+ * It is possible that (alignment << 1) will cause
+ * overflow, but it doesn't matter because we also
+ * subtract PAGE_SIZE, which in the case of overflow
+ * leaves us with a very large run_size. That causes
+ * the first conditional below to fail, which means
+ * that the bogus run_size value never gets used for
+ * anything important.
+ */
+ run_size = (alignment << 1) - PAGE_SIZE;
+ }
+ if (run_size_p != NULL)
+ *run_size_p = run_size;
+
+ if (run_size <= arena_maxclass)
+ return (PAGE_CEILING(usize));
+ return (CHUNK_CEILING(usize));
+ }
+}
+
/*
* Wrapper around malloc_message() that avoids the need for
* JEMALLOC_P(malloc_message)(...) throughout the code.
@@ -310,78 +536,35 @@
{
arena_t *ret;
- /*
- * We can only use TLS if this is a PIC library, since for the static
- * library version, libc's malloc is used by TLS allocation, which
- * introduces a bootstrapping issue.
- */
-#ifndef NO_TLS
- ret = arenas_map;
+ ret = ARENA_GET();
if (ret == NULL) {
ret = choose_arena_hard();
assert(ret != NULL);
}
-#else
- if (isthreaded && narenas > 1) {
- unsigned long ind;
- /*
- * Hash pthread_self() to one of the arenas. There is a prime
- * number of arenas, so this has a reasonable chance of
- * working. Even so, the hashing can be easily thwarted by
- * inconvenient pthread_self() values. Without specific
- * knowledge of how pthread_self() calculates values, we can't
- * easily do much better than this.
- */
- ind = (unsigned long) pthread_self() % narenas;
-
- /*
- * Optimistially assume that arenas[ind] has been initialized.
- * At worst, we find out that some other thread has already
- * done so, after acquiring the lock in preparation. Note that
- * this lazy locking also has the effect of lazily forcing
- * cache coherency; without the lock acquisition, there's no
- * guarantee that modification of arenas[ind] by another thread
- * would be seen on this CPU for an arbitrary amount of time.
- *
- * In general, this approach to modifying a synchronized value
- * isn't a good idea, but in this case we only ever modify the
- * value once, so things work out well.
- */
- ret = arenas[ind];
- if (ret == NULL) {
- /*
- * Avoid races with another thread that may have already
- * initialized arenas[ind].
- */
- malloc_mutex_lock(&arenas_lock);
- if (arenas[ind] == NULL)
- ret = arenas_extend((unsigned)ind);
- else
- ret = arenas[ind];
- malloc_mutex_unlock(&arenas_lock);
- }
- } else
- ret = arenas[0];
-#endif
-
- assert(ret != NULL);
return (ret);
}
#endif
+#include "jemalloc/internal/rtree.h"
#include "jemalloc/internal/tcache.h"
#include "jemalloc/internal/arena.h"
#include "jemalloc/internal/hash.h"
-#include "jemalloc/internal/prof.h"
+#ifdef JEMALLOC_ZONE
+#include "jemalloc/internal/zone.h"
+#endif
#ifndef JEMALLOC_ENABLE_INLINE
void *imalloc(size_t size);
void *icalloc(size_t size);
-void *ipalloc(size_t alignment, size_t size);
+void *ipalloc(size_t size, size_t alignment, bool zero);
size_t isalloc(const void *ptr);
-void *iralloc(void *ptr, size_t size);
+# ifdef JEMALLOC_IVSALLOC
+size_t ivsalloc(const void *ptr);
+# endif
void idalloc(void *ptr);
+void *iralloc(void *ptr, size_t size, size_t extra, size_t alignment,
+ bool zero, bool no_move);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
@@ -408,95 +591,28 @@
}
JEMALLOC_INLINE void *
-ipalloc(size_t alignment, size_t size)
+ipalloc(size_t size, size_t alignment, bool zero)
{
void *ret;
- size_t ceil_size;
+ size_t usize;
+ size_t run_size
+# ifdef JEMALLOC_CC_SILENCE
+ = 0
+# endif
+ ;
- /*
- * Round size up to the nearest multiple of alignment.
- *
- * This done, we can take advantage of the fact that for each small
- * size class, every object is aligned at the smallest power of two
- * that is non-zero in the base two representation of the size. For
- * example:
- *
- * Size | Base 2 | Minimum alignment
- * -----+----------+------------------
- * 96 | 1100000 | 32
- * 144 | 10100000 | 32
- * 192 | 11000000 | 64
- *
- * Depending on runtime settings, it is possible that arena_malloc()
- * will further round up to a power of two, but that never causes
- * correctness issues.
- */
- ceil_size = (size + (alignment - 1)) & (-alignment);
- /*
- * (ceil_size < size) protects against the combination of maximal
- * alignment and size greater than maximal alignment.
- */
- if (ceil_size < size) {
- /* size_t overflow. */
+ usize = sa2u(size, alignment, &run_size);
+ if (usize == 0)
return (NULL);
- }
-
- if (ceil_size <= PAGE_SIZE || (alignment <= PAGE_SIZE
- && ceil_size <= arena_maxclass))
- ret = arena_malloc(ceil_size, false);
- else {
- size_t run_size;
-
- /*
- * We can't achieve subpage alignment, so round up alignment
- * permanently; it makes later calculations simpler.
- */
- alignment = PAGE_CEILING(alignment);
- ceil_size = PAGE_CEILING(size);
- /*
- * (ceil_size < size) protects against very large sizes within
- * PAGE_SIZE of SIZE_T_MAX.
- *
- * (ceil_size + alignment < ceil_size) protects against the
- * combination of maximal alignment and ceil_size large enough
- * to cause overflow. This is similar to the first overflow
- * check above, but it needs to be repeated due to the new
- * ceil_size value, which may now be *equal* to maximal
- * alignment, whereas before we only detected overflow if the
- * original size was *greater* than maximal alignment.
- */
- if (ceil_size < size || ceil_size + alignment < ceil_size) {
- /* size_t overflow. */
- return (NULL);
- }
-
- /*
- * Calculate the size of the over-size run that arena_palloc()
- * would need to allocate in order to guarantee the alignment.
- */
- if (ceil_size >= alignment)
- run_size = ceil_size + alignment - PAGE_SIZE;
- else {
- /*
- * It is possible that (alignment << 1) will cause
- * overflow, but it doesn't matter because we also
- * subtract PAGE_SIZE, which in the case of overflow
- * leaves us with a very large run_size. That causes
- * the first conditional below to fail, which means
- * that the bogus run_size value never gets used for
- * anything important.
- */
- run_size = (alignment << 1) - PAGE_SIZE;
- }
-
- if (run_size <= arena_maxclass) {
- ret = arena_palloc(choose_arena(), alignment, ceil_size,
- run_size);
- } else if (alignment <= chunksize)
- ret = huge_malloc(ceil_size, false);
- else
- ret = huge_palloc(alignment, ceil_size);
- }
+ if (usize <= arena_maxclass && alignment <= PAGE_SIZE)
+ ret = arena_malloc(usize, zero);
+ else if (run_size <= arena_maxclass) {
+ ret = arena_palloc(choose_arena(), usize, run_size, alignment,
+ zero);
+ } else if (alignment <= chunksize)
+ ret = huge_malloc(usize, zero);
+ else
+ ret = huge_palloc(usize, alignment, zero);
assert(((uintptr_t)ret & (alignment - 1)) == 0);
return (ret);
@@ -526,21 +642,18 @@
return (ret);
}
-JEMALLOC_INLINE void *
-iralloc(void *ptr, size_t size)
+#ifdef JEMALLOC_IVSALLOC
+JEMALLOC_INLINE size_t
+ivsalloc(const void *ptr)
{
- size_t oldsize;
- assert(ptr != NULL);
- assert(size != 0);
+ /* Return 0 if ptr is not within a chunk managed by jemalloc. */
+ if (rtree_get(chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == NULL)
+ return (0);
- oldsize = isalloc(ptr);
-
- if (size <= arena_maxclass)
- return (arena_ralloc(ptr, size, oldsize));
- else
- return (huge_ralloc(ptr, size, oldsize));
+ return (isalloc(ptr));
}
+#endif
JEMALLOC_INLINE void
idalloc(void *ptr)
@@ -555,7 +668,70 @@
else
huge_dalloc(ptr);
}
+
+JEMALLOC_INLINE void *
+iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero,
+ bool no_move)
+{
+ void *ret;
+ size_t oldsize;
+
+ assert(ptr != NULL);
+ assert(size != 0);
+
+ oldsize = isalloc(ptr);
+
+ if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
+ != 0) {
+ size_t copysize;
+
+ /*
+ * Existing object alignment is inadquate; allocate new space
+ * and copy.
+ */
+ if (no_move)
+ return (NULL);
+ ret = ipalloc(size + extra, alignment, zero);
+ if (ret == NULL) {
+ if (extra == 0)
+ return (NULL);
+ /* Try again, without extra this time. */
+ ret = ipalloc(size, alignment, zero);
+ if (ret == NULL)
+ return (NULL);
+ }
+ /*
+ * Copy at most size bytes (not size+extra), since the caller
+ * has no expectation that the extra bytes will be reliably
+ * preserved.
+ */
+ copysize = (size < oldsize) ? size : oldsize;
+ memcpy(ret, ptr, copysize);
+ idalloc(ptr);
+ return (ret);
+ }
+
+ if (no_move) {
+ if (size <= arena_maxclass) {
+ return (arena_ralloc_no_move(ptr, oldsize, size,
+ extra, zero));
+ } else {
+ return (huge_ralloc_no_move(ptr, oldsize, size,
+ extra));
+ }
+ } else {
+ if (size + extra <= arena_maxclass) {
+ return (arena_ralloc(ptr, oldsize, size, extra,
+ alignment, zero));
+ } else {
+ return (huge_ralloc(ptr, oldsize, size, extra,
+ alignment, zero));
+ }
+ }
+}
#endif
+#include "jemalloc/internal/prof.h"
+
#undef JEMALLOC_H_INLINES
/******************************************************************************/
diff --git a/jemalloc/include/jemalloc/internal/mutex.h b/jemalloc/include/jemalloc/internal/mutex.h
index 108bfa8..dcca01e 100644
--- a/jemalloc/include/jemalloc/internal/mutex.h
+++ b/jemalloc/include/jemalloc/internal/mutex.h
@@ -3,6 +3,12 @@
typedef pthread_mutex_t malloc_mutex_t;
+#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
+# define MALLOC_MUTEX_INITIALIZER PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
+#else
+# define MALLOC_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
+#endif
+
#endif /* JEMALLOC_H_TYPES */
/******************************************************************************/
#ifdef JEMALLOC_H_STRUCTS
@@ -18,6 +24,7 @@
#endif
bool malloc_mutex_init(malloc_mutex_t *mutex);
+void malloc_mutex_destroy(malloc_mutex_t *mutex);
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
diff --git a/jemalloc/include/jemalloc/internal/prof.h b/jemalloc/include/jemalloc/internal/prof.h
index fb55fb9..7864000 100644
--- a/jemalloc/include/jemalloc/internal/prof.h
+++ b/jemalloc/include/jemalloc/internal/prof.h
@@ -6,20 +6,25 @@
typedef struct prof_cnt_s prof_cnt_t;
typedef struct prof_thr_cnt_s prof_thr_cnt_t;
typedef struct prof_ctx_s prof_ctx_t;
-typedef struct prof_s prof_t;
+typedef struct prof_tdata_s prof_tdata_t;
/* Option defaults. */
-#define LG_PROF_BT_MAX_DEFAULT 2
+#define PROF_PREFIX_DEFAULT "jeprof"
+#define LG_PROF_BT_MAX_DEFAULT 7
#define LG_PROF_SAMPLE_DEFAULT 0
-#define LG_PROF_INTERVAL_DEFAULT 30
+#define LG_PROF_INTERVAL_DEFAULT -1
+#define LG_PROF_TCMAX_DEFAULT -1
/*
* Hard limit on stack backtrace depth. Note that the version of
* prof_backtrace() that is based on __builtin_return_address() necessarily has
- * a hard-coded number of backtrace frame handlers, so increasing
- * LG_PROF_BT_MAX requires changing prof_backtrace().
+ * a hard-coded number of backtrace frame handlers.
*/
-#define LG_PROF_BT_MAX 7 /* >= LG_PROF_BT_MAX_DEFAULT */
+#if (defined(JEMALLOC_PROF_LIBGCC) || defined(JEMALLOC_PROF_LIBUNWIND))
+# define LG_PROF_BT_MAX ((ZU(1) << (LG_SIZEOF_PTR+3)) - 1)
+#else
+# define LG_PROF_BT_MAX 7 /* >= LG_PROF_BT_MAX_DEFAULT */
+#endif
#define PROF_BT_MAX (1U << LG_PROF_BT_MAX)
/* Initial hash table size. */
@@ -34,16 +39,16 @@
struct prof_bt_s {
/* Backtrace, stored as len program counters. */
- void **vec;
- unsigned len;
+ void **vec;
+ unsigned len;
};
#ifdef JEMALLOC_PROF_LIBGCC
/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
typedef struct {
- prof_bt_t *bt;
- unsigned nignore;
- unsigned max;
+ prof_bt_t *bt;
+ unsigned nignore;
+ unsigned max;
} prof_unwind_data_t;
#endif
@@ -51,11 +56,11 @@
/*
* Profiling counters. An allocation/deallocation pair can operate on
* different prof_thr_cnt_t objects that are linked into the same
- * prof_ctx_t sets_ql, so it is possible for the cur* counters to go
+ * prof_ctx_t cnts_ql, so it is possible for the cur* counters to go
* negative. In principle it is possible for the *bytes counters to
- * overflow/underflow, but a general solution would require some form
- * of 128-bit counter solution; this implementation doesn't bother to
- * solve that problem.
+ * overflow/underflow, but a general solution would require something
+ * like 128-bit counters; this implementation doesn't bother to solve
+ * that problem.
*/
int64_t curobjs;
int64_t curbytes;
@@ -64,15 +69,18 @@
};
struct prof_thr_cnt_s {
- /* Linkage into prof_ctx_t's sets_ql. */
- ql_elm(prof_thr_cnt_t) link;
+ /* Linkage into prof_ctx_t's cnts_ql. */
+ ql_elm(prof_thr_cnt_t) cnts_link;
+
+ /* Linkage into thread's LRU. */
+ ql_elm(prof_thr_cnt_t) lru_link;
/*
* Associated context. If a thread frees an object that it did not
* allocate, it is possible that the context is not cached in the
* thread's hash table, in which case it must be able to look up the
* context, insert a new prof_thr_cnt_t into the thread's hash table,
- * and link it into the prof_ctx_t's sets_ql.
+ * and link it into the prof_ctx_t's cnts_ql.
*/
prof_ctx_t *ctx;
@@ -101,11 +109,11 @@
/* Associated backtrace. */
prof_bt_t *bt;
- /* Protects cnt_merged and sets_ql. */
+ /* Protects cnt_merged and cnts_ql. */
malloc_mutex_t lock;
- /* Temporary storage for aggregation during dump. */
- prof_cnt_t cnt_dump;
+ /* Temporary storage for summation during dump. */
+ prof_cnt_t cnt_summed;
/* When threads exit, they merge their stats into cnt_merged. */
prof_cnt_t cnt_merged;
@@ -117,6 +125,31 @@
ql_head(prof_thr_cnt_t) cnts_ql;
};
+struct prof_tdata_s {
+ /*
+ * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread keeps a
+ * cache of backtraces, with associated thread-specific prof_thr_cnt_t
+ * objects. Other threads may read the prof_thr_cnt_t contents, but no
+ * others will ever write them.
+ *
+ * Upon thread exit, the thread must merge all the prof_thr_cnt_t
+ * counter data into the associated prof_ctx_t objects, and unlink/free
+ * the prof_thr_cnt_t objects.
+ */
+ ckh_t bt2cnt;
+
+ /* LRU for contents of bt2cnt. */
+ ql_head(prof_thr_cnt_t) lru_ql;
+
+ /* Backtrace vector, used for calls to prof_backtrace(). */
+ void **vec;
+
+ /* Sampling state. */
+ uint64_t prn_state;
+ uint64_t threshold;
+ uint64_t accum;
+};
+
#endif /* JEMALLOC_H_STRUCTS */
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
@@ -129,11 +162,14 @@
* to notice state changes.
*/
extern bool opt_prof_active;
-extern size_t opt_lg_prof_bt_max; /* Maximum backtrace depth. */
-extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */
+extern size_t opt_lg_prof_bt_max; /* Maximum backtrace depth. */
+extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */
extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */
-extern bool opt_prof_udump; /* High-water memory dumping. */
-extern bool opt_prof_leak; /* Dump leak summary at exit. */
+extern bool opt_prof_gdump; /* High-water memory dumping. */
+extern bool opt_prof_leak; /* Dump leak summary at exit. */
+extern bool opt_prof_accum; /* Report cumulative bytes. */
+extern ssize_t opt_lg_prof_tcmax; /* lg(max per thread bactrace cache) */
+extern char opt_prof_prefix[PATH_MAX + 1];
/*
* Profile dump interval, measured in bytes allocated. Each arena triggers a
@@ -150,25 +186,362 @@
*/
extern bool prof_promote;
-bool prof_init(prof_t *prof, bool master);
-void prof_destroy(prof_t *prof);
+/* (1U << opt_lg_prof_bt_max). */
+extern unsigned prof_bt_max;
-prof_thr_cnt_t *prof_alloc_prep(size_t size);
-prof_ctx_t *prof_ctx_get(const void *ptr);
-void prof_malloc(const void *ptr, prof_thr_cnt_t *cnt);
-void prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr,
- size_t old_size, prof_ctx_t *old_ctx);
-void prof_free(const void *ptr);
+/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */
+#ifndef NO_TLS
+extern __thread prof_tdata_t *prof_tdata_tls
+ JEMALLOC_ATTR(tls_model("initial-exec"));
+# define PROF_TCACHE_GET() prof_tdata_tls
+# define PROF_TCACHE_SET(v) do { \
+ prof_tdata_tls = (v); \
+ pthread_setspecific(prof_tdata_tsd, (void *)(v)); \
+} while (0)
+#else
+# define PROF_TCACHE_GET() \
+ ((prof_tdata_t *)pthread_getspecific(prof_tdata_tsd))
+# define PROF_TCACHE_SET(v) do { \
+ pthread_setspecific(prof_tdata_tsd, (void *)(v)); \
+} while (0)
+#endif
+/*
+ * Same contents as b2cnt_tls, but initialized such that the TSD destructor is
+ * called when a thread exits, so that prof_tdata_tls contents can be merged,
+ * unlinked, and deallocated.
+ */
+extern pthread_key_t prof_tdata_tsd;
+
+void bt_init(prof_bt_t *bt, void **vec);
+void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max);
+prof_thr_cnt_t *prof_lookup(prof_bt_t *bt);
void prof_idump(void);
bool prof_mdump(const char *filename);
-void prof_udump(void);
+void prof_gdump(void);
+prof_tdata_t *prof_tdata_init(void);
void prof_boot0(void);
-bool prof_boot1(void);
+void prof_boot1(void);
+bool prof_boot2(void);
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
+#ifndef JEMALLOC_ENABLE_INLINE
+void prof_sample_threshold_update(prof_tdata_t *prof_tdata);
+prof_thr_cnt_t *prof_alloc_prep(size_t size);
+prof_ctx_t *prof_ctx_get(const void *ptr);
+void prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
+bool prof_sample_accum_update(size_t size);
+void prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt);
+void prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
+ size_t old_size, prof_ctx_t *old_ctx);
+void prof_free(const void *ptr, size_t size);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
+JEMALLOC_INLINE void
+prof_sample_threshold_update(prof_tdata_t *prof_tdata)
+{
+ uint64_t r;
+ double u;
+
+ /*
+ * Compute prof_sample_threshold as a geometrically distributed random
+ * variable with mean (2^opt_lg_prof_sample).
+ */
+ prn64(r, 53, prof_tdata->prn_state,
+ (uint64_t)6364136223846793005LLU, (uint64_t)1442695040888963407LLU);
+ u = (double)r * (1.0/9007199254740992.0L);
+ prof_tdata->threshold = (uint64_t)(log(u) /
+ log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample))))
+ + (uint64_t)1U;
+}
+
+JEMALLOC_INLINE prof_thr_cnt_t *
+prof_alloc_prep(size_t size)
+{
+#ifdef JEMALLOC_ENABLE_INLINE
+ /* This function does not have its own stack frame, because it is inlined. */
+# define NIGNORE 1
+#else
+# define NIGNORE 2
+#endif
+ prof_thr_cnt_t *ret;
+ prof_tdata_t *prof_tdata;
+ prof_bt_t bt;
+
+ assert(size == s2u(size));
+
+ prof_tdata = PROF_TCACHE_GET();
+ if (prof_tdata == NULL) {
+ prof_tdata = prof_tdata_init();
+ if (prof_tdata == NULL)
+ return (NULL);
+ }
+
+ if (opt_prof_active == false) {
+ /* Sampling is currently inactive, so avoid sampling. */
+ ret = (prof_thr_cnt_t *)(uintptr_t)1U;
+ } else if (opt_lg_prof_sample == 0) {
+ /*
+ * Don't bother with sampling logic, since sampling interval is
+ * 1.
+ */
+ bt_init(&bt, prof_tdata->vec);
+ prof_backtrace(&bt, NIGNORE, prof_bt_max);
+ ret = prof_lookup(&bt);
+ } else {
+ if (prof_tdata->threshold == 0) {
+ /*
+ * Initialize. Seed the prng differently for each
+ * thread.
+ */
+ prof_tdata->prn_state = (uint64_t)(uintptr_t)&size;
+ prof_sample_threshold_update(prof_tdata);
+ }
+
+ /*
+ * Determine whether to capture a backtrace based on whether
+ * size is enough for prof_accum to reach
+ * prof_tdata->threshold. However, delay updating these
+ * variables until prof_{m,re}alloc(), because we don't know
+ * for sure that the allocation will succeed.
+ *
+ * Use subtraction rather than addition to avoid potential
+ * integer overflow.
+ */
+ if (size >= prof_tdata->threshold - prof_tdata->accum) {
+ bt_init(&bt, prof_tdata->vec);
+ prof_backtrace(&bt, NIGNORE, prof_bt_max);
+ ret = prof_lookup(&bt);
+ } else
+ ret = (prof_thr_cnt_t *)(uintptr_t)1U;
+ }
+
+ return (ret);
+#undef NIGNORE
+}
+
+JEMALLOC_INLINE prof_ctx_t *
+prof_ctx_get(const void *ptr)
+{
+ prof_ctx_t *ret;
+ arena_chunk_t *chunk;
+
+ assert(ptr != NULL);
+
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ if (chunk != ptr) {
+ /* Region. */
+ assert(chunk->arena->magic == ARENA_MAGIC);
+
+ ret = arena_prof_ctx_get(ptr);
+ } else
+ ret = huge_prof_ctx_get(ptr);
+
+ return (ret);
+}
+
+JEMALLOC_INLINE void
+prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
+{
+ arena_chunk_t *chunk;
+
+ assert(ptr != NULL);
+
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ if (chunk != ptr) {
+ /* Region. */
+ assert(chunk->arena->magic == ARENA_MAGIC);
+
+ arena_prof_ctx_set(ptr, ctx);
+ } else
+ huge_prof_ctx_set(ptr, ctx);
+}
+
+JEMALLOC_INLINE bool
+prof_sample_accum_update(size_t size)
+{
+ prof_tdata_t *prof_tdata;
+
+ /* Sampling logic is unnecessary if the interval is 1. */
+ assert(opt_lg_prof_sample != 0);
+
+ prof_tdata = PROF_TCACHE_GET();
+ assert(prof_tdata != NULL);
+
+ /* Take care to avoid integer overflow. */
+ if (size >= prof_tdata->threshold - prof_tdata->accum) {
+ prof_tdata->accum -= (prof_tdata->threshold - size);
+ /* Compute new prof_sample_threshold. */
+ prof_sample_threshold_update(prof_tdata);
+ while (prof_tdata->accum >= prof_tdata->threshold) {
+ prof_tdata->accum -= prof_tdata->threshold;
+ prof_sample_threshold_update(prof_tdata);
+ }
+ return (false);
+ } else {
+ prof_tdata->accum += size;
+ return (true);
+ }
+}
+
+JEMALLOC_INLINE void
+prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt)
+{
+
+ assert(ptr != NULL);
+ assert(size == isalloc(ptr));
+
+ if (opt_lg_prof_sample != 0) {
+ if (prof_sample_accum_update(size)) {
+ /*
+ * Don't sample. For malloc()-like allocation, it is
+ * always possible to tell in advance how large an
+ * object's usable size will be, so there should never
+ * be a difference between the size passed to
+ * prof_alloc_prep() and prof_malloc().
+ */
+ assert((uintptr_t)cnt == (uintptr_t)1U);
+ }
+ }
+
+ if ((uintptr_t)cnt > (uintptr_t)1U) {
+ prof_ctx_set(ptr, cnt->ctx);
+
+ cnt->epoch++;
+ /*********/
+ mb_write();
+ /*********/
+ cnt->cnts.curobjs++;
+ cnt->cnts.curbytes += size;
+ if (opt_prof_accum) {
+ cnt->cnts.accumobjs++;
+ cnt->cnts.accumbytes += size;
+ }
+ /*********/
+ mb_write();
+ /*********/
+ cnt->epoch++;
+ /*********/
+ mb_write();
+ /*********/
+ } else
+ prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
+}
+
+JEMALLOC_INLINE void
+prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
+ size_t old_size, prof_ctx_t *old_ctx)
+{
+ prof_thr_cnt_t *told_cnt;
+
+ assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U);
+
+ if (ptr != NULL) {
+ assert(size == isalloc(ptr));
+ if (opt_lg_prof_sample != 0) {
+ if (prof_sample_accum_update(size)) {
+ /*
+ * Don't sample. The size passed to
+ * prof_alloc_prep() was larger than what
+ * actually got allocated, so a backtrace was
+ * captured for this allocation, even though
+ * its actual size was insufficient to cross
+ * the sample threshold.
+ */
+ cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
+ }
+ }
+ }
+
+ if ((uintptr_t)old_ctx > (uintptr_t)1U) {
+ told_cnt = prof_lookup(old_ctx->bt);
+ if (told_cnt == NULL) {
+ /*
+ * It's too late to propagate OOM for this realloc(),
+ * so operate directly on old_cnt->ctx->cnt_merged.
+ */
+ malloc_mutex_lock(&old_ctx->lock);
+ old_ctx->cnt_merged.curobjs--;
+ old_ctx->cnt_merged.curbytes -= old_size;
+ malloc_mutex_unlock(&old_ctx->lock);
+ told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
+ }
+ } else
+ told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
+
+ if ((uintptr_t)told_cnt > (uintptr_t)1U)
+ told_cnt->epoch++;
+ if ((uintptr_t)cnt > (uintptr_t)1U) {
+ prof_ctx_set(ptr, cnt->ctx);
+ cnt->epoch++;
+ } else
+ prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
+ /*********/
+ mb_write();
+ /*********/
+ if ((uintptr_t)told_cnt > (uintptr_t)1U) {
+ told_cnt->cnts.curobjs--;
+ told_cnt->cnts.curbytes -= old_size;
+ }
+ if ((uintptr_t)cnt > (uintptr_t)1U) {
+ cnt->cnts.curobjs++;
+ cnt->cnts.curbytes += size;
+ if (opt_prof_accum) {
+ cnt->cnts.accumobjs++;
+ cnt->cnts.accumbytes += size;
+ }
+ }
+ /*********/
+ mb_write();
+ /*********/
+ if ((uintptr_t)told_cnt > (uintptr_t)1U)
+ told_cnt->epoch++;
+ if ((uintptr_t)cnt > (uintptr_t)1U)
+ cnt->epoch++;
+ /*********/
+ mb_write(); /* Not strictly necessary. */
+}
+
+JEMALLOC_INLINE void
+prof_free(const void *ptr, size_t size)
+{
+ prof_ctx_t *ctx = prof_ctx_get(ptr);
+
+ if ((uintptr_t)ctx > (uintptr_t)1) {
+ assert(size == isalloc(ptr));
+ prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt);
+
+ if (tcnt != NULL) {
+ tcnt->epoch++;
+ /*********/
+ mb_write();
+ /*********/
+ tcnt->cnts.curobjs--;
+ tcnt->cnts.curbytes -= size;
+ /*********/
+ mb_write();
+ /*********/
+ tcnt->epoch++;
+ /*********/
+ mb_write();
+ /*********/
+ } else {
+ /*
+ * OOM during free() cannot be propagated, so operate
+ * directly on cnt->ctx->cnt_merged.
+ */
+ malloc_mutex_lock(&ctx->lock);
+ ctx->cnt_merged.curobjs--;
+ ctx->cnt_merged.curbytes -= size;
+ malloc_mutex_unlock(&ctx->lock);
+ }
+ }
+}
+#endif
+
#endif /* JEMALLOC_H_INLINES */
/******************************************************************************/
#endif /* JEMALLOC_PROF */
diff --git a/jemalloc/include/jemalloc/internal/rtree.h b/jemalloc/include/jemalloc/internal/rtree.h
new file mode 100644
index 0000000..9d58eba
--- /dev/null
+++ b/jemalloc/include/jemalloc/internal/rtree.h
@@ -0,0 +1,161 @@
+/*
+ * This radix tree implementation is tailored to the singular purpose of
+ * tracking which chunks are currently owned by jemalloc. This functionality
+ * is mandatory for OS X, where jemalloc must be able to respond to object
+ * ownership queries.
+ *
+ *******************************************************************************
+ */
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct rtree_s rtree_t;
+
+/*
+ * Size of each radix tree node (must be a power of 2). This impacts tree
+ * depth.
+ */
+#if (LG_SIZEOF_PTR == 2)
+# define RTREE_NODESIZE (1U << 14)
+#else
+# define RTREE_NODESIZE CACHELINE
+#endif
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct rtree_s {
+ malloc_mutex_t mutex;
+ void **root;
+ unsigned height;
+ unsigned level2bits[1]; /* Dynamically sized. */
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+rtree_t *rtree_new(unsigned bits);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+#ifndef JEMALLOC_DEBUG
+void *rtree_get_locked(rtree_t *rtree, uintptr_t key);
+#endif
+void *rtree_get(rtree_t *rtree, uintptr_t key);
+bool rtree_set(rtree_t *rtree, uintptr_t key, void *val);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(RTREE_C_))
+#define RTREE_GET_GENERATE(f) \
+/* The least significant bits of the key are ignored. */ \
+JEMALLOC_INLINE void * \
+f(rtree_t *rtree, uintptr_t key) \
+{ \
+ void *ret; \
+ uintptr_t subkey; \
+ unsigned i, lshift, height, bits; \
+ void **node, **child; \
+ \
+ RTREE_LOCK(&rtree->mutex); \
+ for (i = lshift = 0, height = rtree->height, node = rtree->root;\
+ i < height - 1; \
+ i++, lshift += bits, node = child) { \
+ bits = rtree->level2bits[i]; \
+ subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR + \
+ 3)) - bits); \
+ child = (void**)node[subkey]; \
+ if (child == NULL) { \
+ RTREE_UNLOCK(&rtree->mutex); \
+ return (NULL); \
+ } \
+ } \
+ \
+ /* \
+ * node is a leaf, so it contains values rather than node \
+ * pointers. \
+ */ \
+ bits = rtree->level2bits[i]; \
+ subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - \
+ bits); \
+ ret = node[subkey]; \
+ RTREE_UNLOCK(&rtree->mutex); \
+ \
+ RTREE_GET_VALIDATE \
+ return (ret); \
+}
+
+#ifdef JEMALLOC_DEBUG
+# define RTREE_LOCK(l) malloc_mutex_lock(l)
+# define RTREE_UNLOCK(l) malloc_mutex_unlock(l)
+# define RTREE_GET_VALIDATE
+RTREE_GET_GENERATE(rtree_get_locked)
+# undef RTREE_LOCK
+# undef RTREE_UNLOCK
+# undef RTREE_GET_VALIDATE
+#endif
+
+#define RTREE_LOCK(l)
+#define RTREE_UNLOCK(l)
+#ifdef JEMALLOC_DEBUG
+ /*
+ * Suppose that it were possible for a jemalloc-allocated chunk to be
+ * munmap()ped, followed by a different allocator in another thread re-using
+ * overlapping virtual memory, all without invalidating the cached rtree
+ * value. The result would be a false positive (the rtree would claim that
+ * jemalloc owns memory that it had actually discarded). This scenario
+ * seems impossible, but the following assertion is a prudent sanity check.
+ */
+# define RTREE_GET_VALIDATE \
+ assert(rtree_get_locked(rtree, key) == ret);
+#else
+# define RTREE_GET_VALIDATE
+#endif
+RTREE_GET_GENERATE(rtree_get)
+#undef RTREE_LOCK
+#undef RTREE_UNLOCK
+#undef RTREE_GET_VALIDATE
+
+JEMALLOC_INLINE bool
+rtree_set(rtree_t *rtree, uintptr_t key, void *val)
+{
+ uintptr_t subkey;
+ unsigned i, lshift, height, bits;
+ void **node, **child;
+
+ malloc_mutex_lock(&rtree->mutex);
+ for (i = lshift = 0, height = rtree->height, node = rtree->root;
+ i < height - 1;
+ i++, lshift += bits, node = child) {
+ bits = rtree->level2bits[i];
+ subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) -
+ bits);
+ child = (void**)node[subkey];
+ if (child == NULL) {
+ child = (void**)base_alloc(sizeof(void *) <<
+ rtree->level2bits[i+1]);
+ if (child == NULL) {
+ malloc_mutex_unlock(&rtree->mutex);
+ return (true);
+ }
+ memset(child, 0, sizeof(void *) <<
+ rtree->level2bits[i+1]);
+ node[subkey] = child;
+ }
+ }
+
+ /* node is a leaf, so it contains values rather than node pointers. */
+ bits = rtree->level2bits[i];
+ subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - bits);
+ node[subkey] = val;
+ malloc_mutex_unlock(&rtree->mutex);
+
+ return (false);
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/jemalloc/include/jemalloc/internal/stats.h b/jemalloc/include/jemalloc/internal/stats.h
index cbf035f..3fc2080 100644
--- a/jemalloc/include/jemalloc/internal/stats.h
+++ b/jemalloc/include/jemalloc/internal/stats.h
@@ -154,7 +154,7 @@
extern bool opt_stats_print;
-char *umax2s(uintmax_t x, unsigned base, char *s);
+char *u2s(uint64_t x, unsigned base, char *s);
#ifdef JEMALLOC_STATS
void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque,
const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4));
diff --git a/jemalloc/include/jemalloc/internal/tcache.h b/jemalloc/include/jemalloc/internal/tcache.h
index a8be436..1ad91a9 100644
--- a/jemalloc/include/jemalloc/internal/tcache.h
+++ b/jemalloc/include/jemalloc/internal/tcache.h
@@ -17,7 +17,7 @@
/* Number of cache slots for large size classes. */
#define TCACHE_NSLOTS_LARGE 20
-/* (1U << opt_lg_tcache_maxclass) is used to compute tcache_maxclass. */
+/* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */
#define LG_TCACHE_MAXCLASS_DEFAULT 15
/*
@@ -61,12 +61,25 @@
#ifdef JEMALLOC_H_EXTERNS
extern bool opt_tcache;
-extern ssize_t opt_lg_tcache_maxclass;
+extern ssize_t opt_lg_tcache_max;
extern ssize_t opt_lg_tcache_gc_sweep;
/* Map of thread-specific caches. */
+#ifndef NO_TLS
extern __thread tcache_t *tcache_tls
JEMALLOC_ATTR(tls_model("initial-exec"));
+# define TCACHE_GET() tcache_tls
+# define TCACHE_SET(v) do { \
+ tcache_tls = (tcache_t *)(v); \
+ pthread_setspecific(tcache_tsd, (void *)(v)); \
+} while (0)
+#else
+# define TCACHE_GET() ((tcache_t *)pthread_getspecific(tcache_tsd))
+# define TCACHE_SET(v) do { \
+ pthread_setspecific(tcache_tsd, (void *)(v)); \
+} while (0)
+#endif
+extern pthread_key_t tcache_tsd;
/*
* Number of tcache bins. There are nbins small-object bins, plus 0 or more
@@ -122,14 +135,23 @@
if ((isthreaded & opt_tcache) == false)
return (NULL);
- tcache = tcache_tls;
- if ((uintptr_t)tcache <= (uintptr_t)1) {
+ tcache = TCACHE_GET();
+ if ((uintptr_t)tcache <= (uintptr_t)2) {
if (tcache == NULL) {
tcache = tcache_create(choose_arena());
if (tcache == NULL)
return (NULL);
- } else
+ } else {
+ if (tcache == (void *)(uintptr_t)1) {
+ /*
+ * Make a note that an allocator function was
+ * called after the tcache_thread_cleanup() was
+ * called.
+ */
+ TCACHE_SET((uintptr_t)2);
+ }
return (NULL);
+ }
}
return (tcache);
@@ -258,9 +280,9 @@
} else {
#ifdef JEMALLOC_PROF
arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
- size_t pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk)
- >> PAGE_SHIFT);
- chunk->map[pageind].bits |= CHUNK_MAP_CLASS_MASK;
+ size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >>
+ PAGE_SHIFT);
+ chunk->map[pageind-map_bias].bits &= ~CHUNK_MAP_CLASS_MASK;
#endif
if (zero == false) {
#ifdef JEMALLOC_FILL
@@ -299,8 +321,8 @@
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
arena = chunk->arena;
- pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
- mapelm = &chunk->map[pageind];
+ pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
+ mapelm = &chunk->map[pageind-map_bias];
run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
(mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT));
assert(run->magic == ARENA_RUN_MAGIC);
@@ -339,7 +361,6 @@
arena_chunk_t *chunk;
size_t pageind, binind;
tcache_bin_t *tbin;
- arena_chunk_map_t *mapelm;
assert((size & PAGE_MASK) == 0);
assert(arena_salloc(ptr) > small_maxclass);
@@ -347,8 +368,7 @@
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
arena = chunk->arena;
- pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
- mapelm = &chunk->map[pageind];
+ pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
binind = nbins + (size >> PAGE_SHIFT) - 1;
#ifdef JEMALLOC_FILL
diff --git a/jemalloc/include/jemalloc/internal/zone.h b/jemalloc/include/jemalloc/internal/zone.h
new file mode 100644
index 0000000..859b529
--- /dev/null
+++ b/jemalloc/include/jemalloc/internal/zone.h
@@ -0,0 +1,23 @@
+#ifndef JEMALLOC_ZONE
+# error "This source file is for zones on Darwin (OS X)."
+#endif
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+malloc_zone_t *create_zone(void);
+void szone2ozone(malloc_zone_t *zone);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/jemalloc/include/jemalloc/jemalloc.h.in b/jemalloc/include/jemalloc/jemalloc.h.in
index 8ef8183..4dd3981 100644
--- a/jemalloc/include/jemalloc/jemalloc.h.in
+++ b/jemalloc/include/jemalloc/jemalloc.h.in
@@ -4,6 +4,9 @@
extern "C" {
#endif
+#include <limits.h>
+#include <strings.h>
+
#define JEMALLOC_VERSION "@jemalloc_version@"
#define JEMALLOC_VERSION_MAJOR @jemalloc_version_major@
#define JEMALLOC_VERSION_MINOR @jemalloc_version_minor@
@@ -16,7 +19,20 @@
# define JEMALLOC_P(s) s
#endif
-extern const char *JEMALLOC_P(malloc_options);
+#define ALLOCM_LG_ALIGN ((int)0x3f)
+#if LG_SIZEOF_PTR == 2
+#define ALLOCM_ALIGN(a) (ffs(a)-1)
+#else
+#define ALLOCM_ALIGN(a) ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31)
+#endif
+#define ALLOCM_ZERO ((int)0x40)
+#define ALLOCM_NO_MOVE ((int)0x80)
+
+#define ALLOCM_SUCCESS 0
+#define ALLOCM_ERR_OOM 1
+#define ALLOCM_ERR_NOT_MOVED 2
+
+extern const char *JEMALLOC_P(malloc_conf);
extern void (*JEMALLOC_P(malloc_message))(void *, const char *);
void *JEMALLOC_P(malloc)(size_t size) JEMALLOC_ATTR(malloc);
@@ -36,6 +52,14 @@
int JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp,
size_t *oldlenp, void *newp, size_t newlen);
+int JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags)
+ JEMALLOC_ATTR(nonnull(1));
+int JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size,
+ size_t extra, int flags) JEMALLOC_ATTR(nonnull(1));
+int JEMALLOC_P(sallocm)(const void *ptr, size_t *rsize, int flags)
+ JEMALLOC_ATTR(nonnull(1));
+int JEMALLOC_P(dallocm)(void *ptr, int flags) JEMALLOC_ATTR(nonnull(1));
+
#ifdef __cplusplus
};
#endif
diff --git a/jemalloc/include/jemalloc/jemalloc_defs.h.in b/jemalloc/include/jemalloc/jemalloc_defs.h.in
index 8b98d67..b8f3f36 100644
--- a/jemalloc/include/jemalloc/jemalloc_defs.h.in
+++ b/jemalloc/include/jemalloc/jemalloc_defs.h.in
@@ -13,6 +13,7 @@
* the API prefixing.
*/
#undef JEMALLOC_PREFIX
+#undef JEMALLOC_CPREFIX
#if (defined(JEMALLOC_PREFIX) && defined(JEMALLOC_MANGLE))
#undef JEMALLOC_P
#endif
@@ -31,6 +32,9 @@
# define JEMALLOC_ATTR(s)
#endif
+/* JEMALLOC_CC_SILENCE enables code that silences unuseful compiler warnings. */
+#undef JEMALLOC_CC_SILENCE
+
/*
* JEMALLOC_DEBUG enables assertions and other sanity checks, and disables
* inline functions.
@@ -92,6 +96,38 @@
/* TLS is used to map arenas and magazine caches to threads. */
#undef NO_TLS
+/*
+ * JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside
+ * within jemalloc-owned chunks before dereferencing them.
+ */
+#undef JEMALLOC_IVSALLOC
+
+/*
+ * Define overrides for non-standard allocator-related functions if they
+ * are present on the system.
+ */
+#undef JEMALLOC_OVERRIDE_MEMALIGN
+#undef JEMALLOC_OVERRIDE_VALLOC
+
+/*
+ * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
+ */
+#undef JEMALLOC_ZONE
+#undef JEMALLOC_ZONE_VERSION
+
+/*
+ * Methods for purging unused pages differ between operating systems.
+ *
+ * madvise(..., MADV_DONTNEED) : On Linux, this immediately discards pages,
+ * such that new pages will be demand-zeroed if
+ * the address region is later touched.
+ * madvise(..., MADV_FREE) : On FreeBSD and Darwin, this marks pages as being
+ * unused, such that they will be discarded rather
+ * than swapped out.
+ */
+#undef JEMALLOC_PURGE_MADVISE_DONTNEED
+#undef JEMALLOC_PURGE_MADVISE_FREE
+
/* sizeof(void *) == 2^LG_SIZEOF_PTR. */
#undef LG_SIZEOF_PTR
diff --git a/jemalloc/src/arena.c b/jemalloc/src/arena.c
index ee859fc..3d4f888 100644
--- a/jemalloc/src/arena.c
+++ b/jemalloc/src/arena.c
@@ -165,7 +165,7 @@
static void arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large,
bool zero);
-static void arena_purge(arena_t *arena);
+static void arena_purge(arena_t *arena, bool all);
static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty);
static void arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk,
arena_run_t *run, size_t oldsize, size_t newsize);
@@ -174,16 +174,18 @@
static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
+static void arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run,
+ arena_bin_t *bin);
static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk,
arena_run_t *run, arena_bin_t *bin);
+static void arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk,
+ arena_run_t *run, arena_bin_t *bin);
static void arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk,
- void *ptr, size_t size, size_t oldsize);
+ void *ptr, size_t oldsize, size_t size);
static bool arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk,
- void *ptr, size_t size, size_t oldsize);
-static bool arena_ralloc_large(void *ptr, size_t size, size_t oldsize);
-#ifdef JEMALLOC_TINY
-static size_t pow2_ceil(size_t x);
-#endif
+ void *ptr, size_t oldsize, size_t size, size_t extra, bool zero);
+static bool arena_ralloc_large(void *ptr, size_t oldsize, size_t size,
+ size_t extra, bool zero);
static bool small_size2bin_init(void);
#ifdef JEMALLOC_DEBUG
static void small_size2bin_validate(void);
@@ -281,12 +283,33 @@
assert(((uintptr_t)ptr - ((uintptr_t)run +
(uintptr_t)run->bin->reg0_offset)) % (uintptr_t)run->bin->reg_size
== 0);
+ /*
+ * Freeing a pointer lower than region zero can cause assertion
+ * failure.
+ */
+ assert((uintptr_t)ptr >= (uintptr_t)run +
+ (uintptr_t)run->bin->reg0_offset);
+ /*
+ * Freeing a pointer past in the run's frontier can cause assertion
+ * failure.
+ */
+ assert((uintptr_t)ptr < (uintptr_t)run->next);
*(void **)ptr = run->avail;
run->avail = ptr;
run->nfree++;
}
+#ifdef JEMALLOC_DEBUG
+static inline void
+arena_chunk_validate_zeroed(arena_chunk_t *chunk, size_t run_ind)
+{
+ size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << PAGE_SHIFT));
+ for (size_t i = 0; i < PAGE_SIZE / sizeof(size_t); i++)
+ assert(p[i] == 0);
+}
+#endif
+
static void
arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
bool zero)
@@ -300,39 +323,40 @@
old_ndirty = chunk->ndirty;
run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
>> PAGE_SHIFT);
- flag_dirty = chunk->map[run_ind].bits & CHUNK_MAP_DIRTY;
+ flag_dirty = chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY;
runs_avail = (flag_dirty != 0) ? &arena->runs_avail_dirty :
&arena->runs_avail_clean;
- total_pages = (chunk->map[run_ind].bits & ~PAGE_MASK) >>
+ total_pages = (chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) >>
PAGE_SHIFT;
- assert((chunk->map[run_ind+total_pages-1].bits & CHUNK_MAP_DIRTY) ==
- flag_dirty);
+ assert((chunk->map[run_ind+total_pages-1-map_bias].bits &
+ CHUNK_MAP_DIRTY) == flag_dirty);
need_pages = (size >> PAGE_SHIFT);
assert(need_pages > 0);
assert(need_pages <= total_pages);
rem_pages = total_pages - need_pages;
- arena_avail_tree_remove(runs_avail, &chunk->map[run_ind]);
+ arena_avail_tree_remove(runs_avail, &chunk->map[run_ind-map_bias]);
arena->nactive += need_pages;
/* Keep track of trailing unused pages for later use. */
if (rem_pages > 0) {
if (flag_dirty != 0) {
- chunk->map[run_ind+need_pages].bits = (rem_pages <<
- PAGE_SHIFT) | CHUNK_MAP_DIRTY;
- chunk->map[run_ind+total_pages-1].bits = (rem_pages <<
- PAGE_SHIFT) | CHUNK_MAP_DIRTY;
+ chunk->map[run_ind+need_pages-map_bias].bits =
+ (rem_pages << PAGE_SHIFT) | CHUNK_MAP_DIRTY;
+ chunk->map[run_ind+total_pages-1-map_bias].bits =
+ (rem_pages << PAGE_SHIFT) | CHUNK_MAP_DIRTY;
} else {
- chunk->map[run_ind+need_pages].bits = (rem_pages <<
- PAGE_SHIFT) | (chunk->map[run_ind+need_pages].bits &
- CHUNK_MAP_ZEROED);
- chunk->map[run_ind+total_pages-1].bits = (rem_pages <<
- PAGE_SHIFT) |
- (chunk->map[run_ind+total_pages-1].bits &
- CHUNK_MAP_ZEROED);
+ chunk->map[run_ind+need_pages-map_bias].bits =
+ (rem_pages << PAGE_SHIFT) |
+ (chunk->map[run_ind+need_pages-map_bias].bits &
+ CHUNK_MAP_UNZEROED);
+ chunk->map[run_ind+total_pages-1-map_bias].bits =
+ (rem_pages << PAGE_SHIFT) |
+ (chunk->map[run_ind+total_pages-1-map_bias].bits &
+ CHUNK_MAP_UNZEROED);
}
arena_avail_tree_insert(runs_avail,
- &chunk->map[run_ind+need_pages]);
+ &chunk->map[run_ind+need_pages-map_bias]);
}
/* Update dirty page accounting. */
@@ -353,13 +377,19 @@
* zeroed (i.e. never before touched).
*/
for (i = 0; i < need_pages; i++) {
- if ((chunk->map[run_ind + i].bits &
- CHUNK_MAP_ZEROED) == 0) {
+ if ((chunk->map[run_ind+i-map_bias].bits
+ & CHUNK_MAP_UNZEROED) != 0) {
memset((void *)((uintptr_t)
- chunk + ((run_ind + i) <<
+ chunk + ((run_ind+i) <<
PAGE_SHIFT)), 0,
PAGE_SIZE);
}
+#ifdef JEMALLOC_DEBUG
+ else {
+ arena_chunk_validate_zeroed(
+ chunk, run_ind+i);
+ }
+#endif
}
} else {
/*
@@ -376,27 +406,54 @@
* Set the last element first, in case the run only contains one
* page (i.e. both statements set the same element).
*/
- chunk->map[run_ind+need_pages-1].bits = CHUNK_MAP_LARGE |
- CHUNK_MAP_ALLOCATED | flag_dirty;
- chunk->map[run_ind].bits = size | CHUNK_MAP_LARGE |
-#ifdef JEMALLOC_PROF
- CHUNK_MAP_CLASS_MASK |
-#endif
- CHUNK_MAP_ALLOCATED | flag_dirty;
+ chunk->map[run_ind+need_pages-1-map_bias].bits =
+ CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED | flag_dirty;
+ chunk->map[run_ind-map_bias].bits = size | flag_dirty |
+ CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
} else {
assert(zero == false);
/*
- * Propagate the dirty flag to the allocated small run, so that
- * arena_dalloc_bin_run() has the ability to conditionally trim
- * clean pages.
+ * Propagate the dirty and unzeroed flags to the allocated
+ * small run, so that arena_dalloc_bin_run() has the ability to
+ * conditionally trim clean pages.
*/
- chunk->map[run_ind].bits = CHUNK_MAP_ALLOCATED | flag_dirty;
+ chunk->map[run_ind-map_bias].bits =
+ (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED) |
+ CHUNK_MAP_ALLOCATED | flag_dirty;
+#ifdef JEMALLOC_DEBUG
+ /*
+ * The first page will always be dirtied during small run
+ * initialization, so a validation failure here would not
+ * actually cause an observable failure.
+ */
+ if (flag_dirty == 0 &&
+ (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED)
+ == 0)
+ arena_chunk_validate_zeroed(chunk, run_ind);
+#endif
for (i = 1; i < need_pages - 1; i++) {
- chunk->map[run_ind + i].bits = (i << PAGE_SHIFT)
- | CHUNK_MAP_ALLOCATED;
+ chunk->map[run_ind+i-map_bias].bits = (i << PAGE_SHIFT)
+ | (chunk->map[run_ind+i-map_bias].bits &
+ CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED;
+#ifdef JEMALLOC_DEBUG
+ if (flag_dirty == 0 &&
+ (chunk->map[run_ind+i-map_bias].bits &
+ CHUNK_MAP_UNZEROED) == 0)
+ arena_chunk_validate_zeroed(chunk, run_ind+i);
+#endif
}
- chunk->map[run_ind + need_pages - 1].bits = ((need_pages - 1) <<
- PAGE_SHIFT) | CHUNK_MAP_ALLOCATED | flag_dirty;
+ chunk->map[run_ind+need_pages-1-map_bias].bits = ((need_pages
+ - 1) << PAGE_SHIFT) |
+ (chunk->map[run_ind+need_pages-1-map_bias].bits &
+ CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED | flag_dirty;
+#ifdef JEMALLOC_DEBUG
+ if (flag_dirty == 0 &&
+ (chunk->map[run_ind+need_pages-1-map_bias].bits &
+ CHUNK_MAP_UNZEROED) == 0) {
+ arena_chunk_validate_zeroed(chunk,
+ run_ind+need_pages-1);
+ }
+#endif
}
}
@@ -413,20 +470,24 @@
arena->spare = NULL;
/* Insert the run into the appropriate runs_avail_* tree. */
- if ((chunk->map[arena_chunk_header_npages].bits &
- CHUNK_MAP_DIRTY) == 0)
+ if ((chunk->map[0].bits & CHUNK_MAP_DIRTY) == 0)
runs_avail = &arena->runs_avail_clean;
else
runs_avail = &arena->runs_avail_dirty;
- arena_avail_tree_insert(runs_avail,
- &chunk->map[arena_chunk_header_npages]);
+ assert((chunk->map[0].bits & ~PAGE_MASK) == arena_maxclass);
+ assert((chunk->map[chunk_npages-1-map_bias].bits & ~PAGE_MASK)
+ == arena_maxclass);
+ assert((chunk->map[0].bits & CHUNK_MAP_DIRTY) ==
+ (chunk->map[chunk_npages-1-map_bias].bits &
+ CHUNK_MAP_DIRTY));
+ arena_avail_tree_insert(runs_avail, &chunk->map[0]);
} else {
bool zero;
- size_t zeroed;
+ size_t unzeroed;
zero = false;
malloc_mutex_unlock(&arena->lock);
- chunk = (arena_chunk_t *)chunk_alloc(chunksize, &zero);
+ chunk = (arena_chunk_t *)chunk_alloc(chunksize, false, &zero);
malloc_mutex_lock(&arena->lock);
if (chunk == NULL)
return (NULL);
@@ -449,17 +510,28 @@
* Mark the pages as zeroed iff chunk_alloc() returned a zeroed
* chunk.
*/
- zeroed = zero ? CHUNK_MAP_ZEROED : 0;
- for (i = 0; i < arena_chunk_header_npages; i++)
- chunk->map[i].bits = 0;
- chunk->map[i].bits = arena_maxclass | zeroed;
- for (i++; i < chunk_npages-1; i++)
- chunk->map[i].bits = zeroed;
- chunk->map[chunk_npages-1].bits = arena_maxclass | zeroed;
+ unzeroed = zero ? 0 : CHUNK_MAP_UNZEROED;
+ chunk->map[0].bits = arena_maxclass | unzeroed;
+ /*
+ * There is no need to initialize the internal page map entries
+ * unless the chunk is not zeroed.
+ */
+ if (zero == false) {
+ for (i = map_bias+1; i < chunk_npages-1; i++)
+ chunk->map[i-map_bias].bits = unzeroed;
+ }
+#ifdef JEMALLOC_DEBUG
+ else {
+ for (i = map_bias+1; i < chunk_npages-1; i++)
+ assert(chunk->map[i-map_bias].bits == unzeroed);
+ }
+#endif
+ chunk->map[chunk_npages-1-map_bias].bits = arena_maxclass |
+ unzeroed;
/* Insert the run into the runs_avail_clean tree. */
arena_avail_tree_insert(&arena->runs_avail_clean,
- &chunk->map[arena_chunk_header_npages]);
+ &chunk->map[0]);
}
return (chunk);
@@ -474,13 +546,11 @@
* Remove run from the appropriate runs_avail_* tree, so that the arena
* does not use it.
*/
- if ((chunk->map[arena_chunk_header_npages].bits &
- CHUNK_MAP_DIRTY) == 0)
+ if ((chunk->map[0].bits & CHUNK_MAP_DIRTY) == 0)
runs_avail = &arena->runs_avail_clean;
else
runs_avail = &arena->runs_avail_dirty;
- arena_avail_tree_remove(runs_avail,
- &chunk->map[arena_chunk_header_npages]);
+ arena_avail_tree_remove(runs_avail, &chunk->map[0]);
if (arena->spare != NULL) {
arena_chunk_t *spare = arena->spare;
@@ -516,8 +586,9 @@
mapelm = arena_avail_tree_nsearch(&arena->runs_avail_dirty, &key);
if (mapelm != NULL) {
arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
- size_t pageind = ((uintptr_t)mapelm - (uintptr_t)run_chunk->map)
- / sizeof(arena_chunk_map_t);
+ size_t pageind = (((uintptr_t)mapelm -
+ (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t))
+ + map_bias;
run = (arena_run_t *)((uintptr_t)run_chunk + (pageind <<
PAGE_SHIFT));
@@ -527,8 +598,9 @@
mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key);
if (mapelm != NULL) {
arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
- size_t pageind = ((uintptr_t)mapelm - (uintptr_t)run_chunk->map)
- / sizeof(arena_chunk_map_t);
+ size_t pageind = (((uintptr_t)mapelm -
+ (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t))
+ + map_bias;
run = (arena_run_t *)((uintptr_t)run_chunk + (pageind <<
PAGE_SHIFT));
@@ -541,8 +613,8 @@
*/
chunk = arena_chunk_alloc(arena);
if (chunk != NULL) {
- run = (arena_run_t *)((uintptr_t)chunk +
- (arena_chunk_header_npages << PAGE_SHIFT));
+ run = (arena_run_t *)((uintptr_t)chunk + (map_bias <<
+ PAGE_SHIFT));
arena_run_split(arena, run, size, large, zero);
return (run);
}
@@ -555,8 +627,9 @@
mapelm = arena_avail_tree_nsearch(&arena->runs_avail_dirty, &key);
if (mapelm != NULL) {
arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
- size_t pageind = ((uintptr_t)mapelm - (uintptr_t)run_chunk->map)
- / sizeof(arena_chunk_map_t);
+ size_t pageind = (((uintptr_t)mapelm -
+ (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t))
+ + map_bias;
run = (arena_run_t *)((uintptr_t)run_chunk + (pageind <<
PAGE_SHIFT));
@@ -566,8 +639,9 @@
mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key);
if (mapelm != NULL) {
arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
- size_t pageind = ((uintptr_t)mapelm - (uintptr_t)run_chunk->map)
- / sizeof(arena_chunk_map_t);
+ size_t pageind = (((uintptr_t)mapelm -
+ (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t))
+ + map_bias;
run = (arena_run_t *)((uintptr_t)run_chunk + (pageind <<
PAGE_SHIFT));
@@ -587,7 +661,7 @@
(arena->ndirty - arena->npurgatory) > chunk_npages &&
(arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty -
arena->npurgatory))
- arena_purge(arena);
+ arena_purge(arena, false);
}
static inline void
@@ -595,7 +669,7 @@
{
ql_head(arena_chunk_map_t) mapelms;
arena_chunk_map_t *mapelm;
- size_t pageind, flag_zeroed;
+ size_t pageind, flag_unzeroed;
#ifdef JEMALLOC_DEBUG
size_t ndirty;
#endif
@@ -605,11 +679,19 @@
ql_new(&mapelms);
- flag_zeroed =
-#ifdef JEMALLOC_SWAP
- swap_enabled ? 0 :
+ flag_unzeroed =
+#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED
+ /*
+ * madvise(..., MADV_DONTNEED) results in zero-filled pages for anonymous
+ * mappings, but not for file-backed mappings.
+ */
+# ifdef JEMALLOC_SWAP
+ swap_enabled ? CHUNK_MAP_UNZEROED :
+# endif
+ 0;
+#else
+ CHUNK_MAP_UNZEROED;
#endif
- CHUNK_MAP_ZEROED;
/*
* If chunk is the spare, temporarily re-allocate it, 1) so that its
@@ -627,14 +709,13 @@
* run.
*/
if (chunk == arena->spare) {
- assert((chunk->map[arena_chunk_header_npages].bits &
- CHUNK_MAP_DIRTY) != 0);
+ assert((chunk->map[0].bits & CHUNK_MAP_DIRTY) != 0);
arena_chunk_alloc(arena);
}
/* Temporarily allocate all free dirty runs within chunk. */
- for (pageind = arena_chunk_header_npages; pageind < chunk_npages;) {
- mapelm = &chunk->map[pageind];
+ for (pageind = map_bias; pageind < chunk_npages;) {
+ mapelm = &chunk->map[pageind-map_bias];
if ((mapelm->bits & CHUNK_MAP_ALLOCATED) == 0) {
size_t npages;
@@ -646,25 +727,22 @@
arena_avail_tree_remove(
&arena->runs_avail_dirty, mapelm);
+ mapelm->bits = (npages << PAGE_SHIFT) |
+ flag_unzeroed | CHUNK_MAP_LARGE |
+ CHUNK_MAP_ALLOCATED;
/*
* Update internal elements in the page map, so
- * that CHUNK_MAP_ZEROED is properly set.
- * madvise(..., MADV_DONTNEED) results in
- * zero-filled pages for anonymous mappings,
- * but not for file-backed mappings.
+ * that CHUNK_MAP_UNZEROED is properly set.
*/
- mapelm->bits = (npages << PAGE_SHIFT) |
- CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED |
- flag_zeroed;
for (i = 1; i < npages - 1; i++) {
- chunk->map[pageind + i].bits =
- flag_zeroed;
+ chunk->map[pageind+i-map_bias].bits =
+ flag_unzeroed;
}
if (npages > 1) {
- chunk->map[pageind + npages - 1].bits =
- (npages << PAGE_SHIFT) |
- CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED |
- flag_zeroed;
+ chunk->map[
+ pageind+npages-1-map_bias].bits =
+ flag_unzeroed | CHUNK_MAP_LARGE |
+ CHUNK_MAP_ALLOCATED;
}
arena->nactive += npages;
@@ -706,8 +784,8 @@
nmadvise = 0;
#endif
ql_foreach(mapelm, &mapelms, u.ql_link) {
- size_t pageind = ((uintptr_t)mapelm - (uintptr_t)chunk->map) /
- sizeof(arena_chunk_map_t);
+ size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) /
+ sizeof(arena_chunk_map_t)) + map_bias;
size_t npages = mapelm->bits >> PAGE_SHIFT;
assert(pageind + npages <= chunk_npages);
@@ -715,8 +793,17 @@
assert(ndirty >= npages);
ndirty -= npages;
#endif
+
+#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED
madvise((void *)((uintptr_t)chunk + (pageind << PAGE_SHIFT)),
(npages << PAGE_SHIFT), MADV_DONTNEED);
+#elif defined(JEMALLOC_PURGE_MADVISE_FREE)
+ madvise((void *)((uintptr_t)chunk + (pageind << PAGE_SHIFT)),
+ (npages << PAGE_SHIFT), MADV_FREE);
+#else
+# error "No method defined for purging unused dirty pages."
+#endif
+
#ifdef JEMALLOC_STATS
nmadvise++;
#endif
@@ -732,8 +819,8 @@
/* Deallocate runs. */
for (mapelm = ql_first(&mapelms); mapelm != NULL;
mapelm = ql_first(&mapelms)) {
- size_t pageind = ((uintptr_t)mapelm - (uintptr_t)chunk->map) /
- sizeof(arena_chunk_map_t);
+ size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) /
+ sizeof(arena_chunk_map_t)) + map_bias;
arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
(uintptr_t)(pageind << PAGE_SHIFT));
@@ -743,7 +830,7 @@
}
static void
-arena_purge(arena_t *arena)
+arena_purge(arena_t *arena, bool all)
{
arena_chunk_t *chunk;
size_t npurgatory;
@@ -757,8 +844,8 @@
assert(ndirty == arena->ndirty);
#endif
assert(arena->ndirty > arena->npurgatory);
- assert(arena->ndirty > chunk_npages);
- assert((arena->nactive >> opt_lg_dirty_mult) < arena->ndirty);
+ assert(arena->ndirty > chunk_npages || all);
+ assert((arena->nactive >> opt_lg_dirty_mult) < arena->ndirty || all);
#ifdef JEMALLOC_STATS
arena->stats.npurge++;
@@ -769,8 +856,9 @@
* purge, and add the result to arena->npurgatory. This will keep
* multiple threads from racing to reduce ndirty below the threshold.
*/
- npurgatory = (arena->ndirty - arena->npurgatory) - (arena->nactive >>
- opt_lg_dirty_mult);
+ npurgatory = arena->ndirty - arena->npurgatory;
+ if (all == false)
+ npurgatory -= arena->nactive >> opt_lg_dirty_mult;
arena->npurgatory += npurgatory;
while (npurgatory > 0) {
@@ -826,6 +914,15 @@
}
}
+void
+arena_purge_all(arena_t *arena)
+{
+
+ malloc_mutex_lock(&arena->lock);
+ arena_purge(arena, true);
+ malloc_mutex_unlock(&arena->lock);
+}
+
static void
arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
{
@@ -836,11 +933,18 @@
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk)
>> PAGE_SHIFT);
- assert(run_ind >= arena_chunk_header_npages);
+ assert(run_ind >= map_bias);
assert(run_ind < chunk_npages);
- if ((chunk->map[run_ind].bits & CHUNK_MAP_LARGE) != 0)
- size = chunk->map[run_ind].bits & ~PAGE_MASK;
- else
+ if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_LARGE) != 0) {
+ size = chunk->map[run_ind-map_bias].bits & ~PAGE_MASK;
+ assert(size == PAGE_SIZE ||
+ (chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits &
+ ~PAGE_MASK) == 0);
+ assert((chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits &
+ CHUNK_MAP_LARGE) != 0);
+ assert((chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits &
+ CHUNK_MAP_ALLOCATED) != 0);
+ } else
size = run->bin->run_size;
run_pages = (size >> PAGE_SHIFT);
arena->nactive -= run_pages;
@@ -849,7 +953,7 @@
* The run is dirty if the caller claims to have dirtied it, as well as
* if it was already dirty before being allocated.
*/
- if ((chunk->map[run_ind].bits & CHUNK_MAP_DIRTY) != 0)
+ if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) != 0)
dirty = true;
flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0;
runs_avail = dirty ? &arena->runs_avail_dirty :
@@ -857,72 +961,91 @@
/* Mark pages as unallocated in the chunk map. */
if (dirty) {
- chunk->map[run_ind].bits = size | flag_dirty;
- chunk->map[run_ind+run_pages-1].bits = size | flag_dirty;
+ chunk->map[run_ind-map_bias].bits = size | CHUNK_MAP_DIRTY;
+ chunk->map[run_ind+run_pages-1-map_bias].bits = size |
+ CHUNK_MAP_DIRTY;
chunk->ndirty += run_pages;
arena->ndirty += run_pages;
} else {
- chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
- CHUNK_MAP_ZEROED);
- chunk->map[run_ind+run_pages-1].bits = size |
- (chunk->map[run_ind+run_pages-1].bits & CHUNK_MAP_ZEROED);
+ chunk->map[run_ind-map_bias].bits = size |
+ (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED);
+ chunk->map[run_ind+run_pages-1-map_bias].bits = size |
+ (chunk->map[run_ind+run_pages-1-map_bias].bits &
+ CHUNK_MAP_UNZEROED);
}
/* Try to coalesce forward. */
if (run_ind + run_pages < chunk_npages &&
- (chunk->map[run_ind+run_pages].bits & CHUNK_MAP_ALLOCATED) == 0 &&
- (chunk->map[run_ind+run_pages].bits & CHUNK_MAP_DIRTY) ==
- flag_dirty) {
- size_t nrun_size = chunk->map[run_ind+run_pages].bits &
+ (chunk->map[run_ind+run_pages-map_bias].bits & CHUNK_MAP_ALLOCATED)
+ == 0 && (chunk->map[run_ind+run_pages-map_bias].bits &
+ CHUNK_MAP_DIRTY) == flag_dirty) {
+ size_t nrun_size = chunk->map[run_ind+run_pages-map_bias].bits &
~PAGE_MASK;
+ size_t nrun_pages = nrun_size >> PAGE_SHIFT;
/*
* Remove successor from runs_avail; the coalesced run is
* inserted later.
*/
+ assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits
+ & ~PAGE_MASK) == nrun_size);
+ assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits
+ & CHUNK_MAP_ALLOCATED) == 0);
+ assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits
+ & CHUNK_MAP_DIRTY) == flag_dirty);
arena_avail_tree_remove(runs_avail,
- &chunk->map[run_ind+run_pages]);
+ &chunk->map[run_ind+run_pages-map_bias]);
size += nrun_size;
- run_pages = size >> PAGE_SHIFT;
+ run_pages += nrun_pages;
- assert((chunk->map[run_ind+run_pages-1].bits & ~PAGE_MASK)
- == nrun_size);
- chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
- CHUNK_MAP_FLAGS_MASK);
- chunk->map[run_ind+run_pages-1].bits = size |
- (chunk->map[run_ind+run_pages-1].bits &
+ chunk->map[run_ind-map_bias].bits = size |
+ (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_FLAGS_MASK);
+ chunk->map[run_ind+run_pages-1-map_bias].bits = size |
+ (chunk->map[run_ind+run_pages-1-map_bias].bits &
CHUNK_MAP_FLAGS_MASK);
}
/* Try to coalesce backward. */
- if (run_ind > arena_chunk_header_npages && (chunk->map[run_ind-1].bits &
- CHUNK_MAP_ALLOCATED) == 0 && (chunk->map[run_ind-1].bits &
+ if (run_ind > map_bias && (chunk->map[run_ind-1-map_bias].bits &
+ CHUNK_MAP_ALLOCATED) == 0 && (chunk->map[run_ind-1-map_bias].bits &
CHUNK_MAP_DIRTY) == flag_dirty) {
- size_t prun_size = chunk->map[run_ind-1].bits & ~PAGE_MASK;
+ size_t prun_size = chunk->map[run_ind-1-map_bias].bits &
+ ~PAGE_MASK;
+ size_t prun_pages = prun_size >> PAGE_SHIFT;
- run_ind -= prun_size >> PAGE_SHIFT;
+ run_ind -= prun_pages;
/*
* Remove predecessor from runs_avail; the coalesced run is
* inserted later.
*/
- arena_avail_tree_remove(runs_avail, &chunk->map[run_ind]);
+ assert((chunk->map[run_ind-map_bias].bits & ~PAGE_MASK)
+ == prun_size);
+ assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_ALLOCATED)
+ == 0);
+ assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY)
+ == flag_dirty);
+ arena_avail_tree_remove(runs_avail,
+ &chunk->map[run_ind-map_bias]);
size += prun_size;
- run_pages = size >> PAGE_SHIFT;
+ run_pages += prun_pages;
- assert((chunk->map[run_ind].bits & ~PAGE_MASK) == prun_size);
- chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
- CHUNK_MAP_FLAGS_MASK);
- chunk->map[run_ind+run_pages-1].bits = size |
- (chunk->map[run_ind+run_pages-1].bits &
+ chunk->map[run_ind-map_bias].bits = size |
+ (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_FLAGS_MASK);
+ chunk->map[run_ind+run_pages-1-map_bias].bits = size |
+ (chunk->map[run_ind+run_pages-1-map_bias].bits &
CHUNK_MAP_FLAGS_MASK);
}
/* Insert into runs_avail, now that coalescing is complete. */
- arena_avail_tree_insert(runs_avail, &chunk->map[run_ind]);
+ assert((chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) ==
+ (chunk->map[run_ind+run_pages-1-map_bias].bits & ~PAGE_MASK));
+ assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) ==
+ (chunk->map[run_ind+run_pages-1-map_bias].bits & CHUNK_MAP_DIRTY));
+ arena_avail_tree_insert(runs_avail, &chunk->map[run_ind-map_bias]);
if (dirty) {
/*
@@ -941,8 +1064,8 @@
* manipulation checks whether the first run is unallocated and extends
* to the end of the chunk.
*/
- if ((chunk->map[arena_chunk_header_npages].bits & (~PAGE_MASK |
- CHUNK_MAP_ALLOCATED)) == arena_maxclass)
+ if ((chunk->map[0].bits & (~PAGE_MASK | CHUNK_MAP_ALLOCATED)) ==
+ arena_maxclass)
arena_chunk_dealloc(arena, chunk);
/*
@@ -962,18 +1085,40 @@
{
size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT;
size_t head_npages = (oldsize - newsize) >> PAGE_SHIFT;
- size_t flags = chunk->map[pageind].bits & CHUNK_MAP_FLAGS_MASK;
+ size_t flag_dirty = chunk->map[pageind-map_bias].bits & CHUNK_MAP_DIRTY;
assert(oldsize > newsize);
/*
* Update the chunk map so that arena_run_dalloc() can treat the
- * leading run as separately allocated.
+ * leading run as separately allocated. Set the last element of each
+ * run first, in case of single-page runs.
*/
- assert(chunk->map[pageind].bits & CHUNK_MAP_LARGE);
- assert(chunk->map[pageind].bits & CHUNK_MAP_ALLOCATED);
- chunk->map[pageind].bits = (oldsize - newsize) | flags;
- chunk->map[pageind+head_npages].bits = newsize | flags;
+ assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_LARGE) != 0);
+ assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_ALLOCATED) != 0);
+ chunk->map[pageind+head_npages-1-map_bias].bits = flag_dirty |
+ (chunk->map[pageind+head_npages-1-map_bias].bits &
+ CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
+ chunk->map[pageind-map_bias].bits = (oldsize - newsize)
+ | flag_dirty | (chunk->map[pageind-map_bias].bits &
+ CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
+
+#ifdef JEMALLOC_DEBUG
+ {
+ size_t tail_npages = newsize >> PAGE_SHIFT;
+ assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias]
+ .bits & ~PAGE_MASK) == 0);
+ assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias]
+ .bits & CHUNK_MAP_DIRTY) == flag_dirty);
+ assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias]
+ .bits & CHUNK_MAP_LARGE) != 0);
+ assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias]
+ .bits & CHUNK_MAP_ALLOCATED) != 0);
+ }
+#endif
+ chunk->map[pageind+head_npages-map_bias].bits = newsize | flag_dirty |
+ (chunk->map[pageind+head_npages-map_bias].bits &
+ CHUNK_MAP_FLAGS_MASK) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
arena_run_dalloc(arena, run, false);
}
@@ -983,20 +1128,40 @@
size_t oldsize, size_t newsize, bool dirty)
{
size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT;
- size_t npages = newsize >> PAGE_SHIFT;
- size_t flags = chunk->map[pageind].bits & CHUNK_MAP_FLAGS_MASK;
+ size_t head_npages = newsize >> PAGE_SHIFT;
+ size_t tail_npages = (oldsize - newsize) >> PAGE_SHIFT;
+ size_t flag_dirty = chunk->map[pageind-map_bias].bits &
+ CHUNK_MAP_DIRTY;
assert(oldsize > newsize);
/*
* Update the chunk map so that arena_run_dalloc() can treat the
- * trailing run as separately allocated.
+ * trailing run as separately allocated. Set the last element of each
+ * run first, in case of single-page runs.
*/
- assert(chunk->map[pageind].bits & CHUNK_MAP_LARGE);
- assert(chunk->map[pageind].bits & CHUNK_MAP_ALLOCATED);
- chunk->map[pageind].bits = newsize | flags;
- chunk->map[pageind+npages-1].bits = newsize | flags;
- chunk->map[pageind+npages].bits = (oldsize - newsize) | flags;
+ assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_LARGE) != 0);
+ assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_ALLOCATED) != 0);
+ chunk->map[pageind+head_npages-1-map_bias].bits = flag_dirty |
+ (chunk->map[pageind+head_npages-1-map_bias].bits &
+ CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
+ chunk->map[pageind-map_bias].bits = newsize | flag_dirty |
+ (chunk->map[pageind-map_bias].bits & CHUNK_MAP_UNZEROED) |
+ CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
+
+ assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits &
+ ~PAGE_MASK) == 0);
+ assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits &
+ CHUNK_MAP_LARGE) != 0);
+ assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits &
+ CHUNK_MAP_ALLOCATED) != 0);
+ chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits =
+ flag_dirty |
+ (chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits &
+ CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
+ chunk->map[pageind+head_npages-map_bias].bits = (oldsize - newsize) |
+ flag_dirty | (chunk->map[pageind+head_npages-map_bias].bits &
+ CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize),
dirty);
@@ -1018,8 +1183,8 @@
arena_run_tree_remove(&bin->runs, mapelm);
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm);
- pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) /
- sizeof(arena_chunk_map_t));
+ pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) /
+ sizeof(arena_chunk_map_t))) + map_bias;
run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
(mapelm->bits >> PAGE_SHIFT))
<< PAGE_SHIFT));
@@ -1039,7 +1204,7 @@
/* Initialize run internals. */
run->bin = bin;
run->avail = NULL;
- run->next = (void *)(((uintptr_t)run) +
+ run->next = (void *)((uintptr_t)run +
(uintptr_t)bin->reg0_offset);
run->nfree = bin->nregs;
#ifdef JEMALLOC_DEBUG
@@ -1061,7 +1226,7 @@
/*
* arena_run_alloc() failed, but another thread may have made
- * sufficient memory available while this one dopped bin->lock above,
+ * sufficient memory available while this one dropped bin->lock above,
* so search one more time.
*/
mapelm = arena_run_tree_first(&bin->runs);
@@ -1073,8 +1238,8 @@
arena_run_tree_remove(&bin->runs, mapelm);
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm);
- pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) /
- sizeof(arena_chunk_map_t));
+ pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) /
+ sizeof(arena_chunk_map_t))) + map_bias;
run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
(mapelm->bits >> PAGE_SHIFT))
<< PAGE_SHIFT));
@@ -1105,11 +1270,21 @@
assert(bin->runcur->nfree > 0);
ret = arena_run_reg_alloc(bin->runcur, bin);
if (run != NULL) {
- malloc_mutex_unlock(&bin->lock);
- malloc_mutex_lock(&arena->lock);
- arena_run_dalloc(arena, run, false);
- malloc_mutex_unlock(&arena->lock);
- malloc_mutex_lock(&bin->lock);
+ arena_chunk_t *chunk;
+
+ /*
+ * arena_run_alloc() may have allocated run, or it may
+ * have pulled it from the bin's run tree. Therefore
+ * it is unsafe to make any assumptions about how run
+ * has previously been used, and arena_bin_lower_run()
+ * must be called, as if a region were just deallocated
+ * from the run.
+ */
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
+ if (run->nfree == bin->nregs)
+ arena_dalloc_bin_run(arena, chunk, run, bin);
+ else
+ arena_bin_lower_run(arena, chunk, run, bin);
}
return (ret);
}
@@ -1424,17 +1599,19 @@
/* Only handles large allocations that require more than page alignment. */
void *
-arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
+arena_palloc(arena_t *arena, size_t size, size_t alloc_size, size_t alignment,
+ bool zero)
{
void *ret;
size_t offset;
arena_chunk_t *chunk;
assert((size & PAGE_MASK) == 0);
- assert((alignment & PAGE_MASK) == 0);
+
+ alignment = PAGE_CEILING(alignment);
malloc_mutex_lock(&arena->lock);
- ret = (void *)arena_run_alloc(arena, alloc_size, true, false);
+ ret = (void *)arena_run_alloc(arena, alloc_size, true, zero);
if (ret == NULL) {
malloc_mutex_unlock(&arena->lock);
return (NULL);
@@ -1482,10 +1659,12 @@
malloc_mutex_unlock(&arena->lock);
#ifdef JEMALLOC_FILL
- if (opt_junk)
- memset(ret, 0xa5, size);
- else if (opt_zero)
- memset(ret, 0, size);
+ if (zero == false) {
+ if (opt_junk)
+ memset(ret, 0xa5, size);
+ else if (opt_zero)
+ memset(ret, 0, size);
+ }
#endif
return (ret);
}
@@ -1502,8 +1681,8 @@
assert(CHUNK_ADDR2BASE(ptr) != ptr);
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
- mapbits = chunk->map[pageind].bits;
+ pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
+ mapbits = chunk->map[pageind-map_bias].bits;
assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
if ((mapbits & CHUNK_MAP_LARGE) == 0) {
arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
@@ -1535,11 +1714,11 @@
assert(isalloc(ptr) == PAGE_SIZE);
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
+ pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
binind = small_size2bin[size];
assert(binind < nbins);
- chunk->map[pageind].bits = (chunk->map[pageind].bits &
- ~CHUNK_MAP_CLASS_MASK) | (binind << CHUNK_MAP_CLASS_SHIFT);
+ chunk->map[pageind-map_bias].bits = (chunk->map[pageind-map_bias].bits &
+ ~CHUNK_MAP_CLASS_MASK) | ((binind+1) << CHUNK_MAP_CLASS_SHIFT);
}
size_t
@@ -1553,8 +1732,8 @@
assert(CHUNK_ADDR2BASE(ptr) != ptr);
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
- mapbits = chunk->map[pageind].bits;
+ pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
+ mapbits = chunk->map[pageind-map_bias].bits;
assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
if ((mapbits & CHUNK_MAP_LARGE) == 0) {
arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
@@ -1569,9 +1748,9 @@
assert(((uintptr_t)ptr & PAGE_MASK) == 0);
ret = mapbits & ~PAGE_MASK;
if (prof_promote && ret == PAGE_SIZE && (mapbits &
- CHUNK_MAP_CLASS_MASK) != CHUNK_MAP_CLASS_MASK) {
+ CHUNK_MAP_CLASS_MASK) != 0) {
size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >>
- CHUNK_MAP_CLASS_SHIFT);
+ CHUNK_MAP_CLASS_SHIFT) - 1;
assert(binind < nbins);
ret = chunk->arena->bins[binind].reg_size;
}
@@ -1580,144 +1759,12 @@
return (ret);
}
-
-static inline unsigned
-arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr,
- size_t size)
-{
- unsigned shift, diff, regind;
-
- assert(run->magic == ARENA_RUN_MAGIC);
-
- /*
- * Avoid doing division with a variable divisor if possible. Using
- * actual division here can reduce allocator throughput by over 20%!
- */
- diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
-
- /* Rescale (factor powers of 2 out of the numerator and denominator). */
- shift = ffs(size) - 1;
- diff >>= shift;
- size >>= shift;
-
- if (size == 1) {
- /* The divisor was a power of 2. */
- regind = diff;
- } else {
- /*
- * To divide by a number D that is not a power of two we
- * multiply by (2^21 / D) and then right shift by 21 positions.
- *
- * X / D
- *
- * becomes
- *
- * (X * size_invs[D - 3]) >> SIZE_INV_SHIFT
- *
- * We can omit the first three elements, because we never
- * divide by 0, and 1 and 2 are both powers of two, which are
- * handled above.
- */
-#define SIZE_INV_SHIFT 21
-#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1)
- static const unsigned size_invs[] = {
- SIZE_INV(3),
- SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
- SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
- SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
- SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
- SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
- SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
- SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
- };
-
- if (size <= ((sizeof(size_invs) / sizeof(unsigned)) + 2))
- regind = (diff * size_invs[size - 3]) >> SIZE_INV_SHIFT;
- else
- regind = diff / size;
-#undef SIZE_INV
-#undef SIZE_INV_SHIFT
- }
- assert(diff == regind * size);
- assert(regind < bin->nregs);
-
- return (regind);
-}
-
-prof_ctx_t *
-arena_prof_ctx_get(const void *ptr)
-{
- prof_ctx_t *ret;
- arena_chunk_t *chunk;
- size_t pageind, mapbits;
-
- assert(ptr != NULL);
- assert(CHUNK_ADDR2BASE(ptr) != ptr);
-
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
- mapbits = chunk->map[pageind].bits;
- assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
- if ((mapbits & CHUNK_MAP_LARGE) == 0) {
- if (prof_promote)
- ret = (prof_ctx_t *)(uintptr_t)1U;
- else {
- arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
- (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
- PAGE_SHIFT));
- arena_bin_t *bin = run->bin;
- unsigned regind;
-
- assert(run->magic == ARENA_RUN_MAGIC);
- regind = arena_run_regind(run, bin, ptr, bin->reg_size);
- ret = *(prof_ctx_t **)((uintptr_t)run +
- bin->ctx0_offset + (regind *
- sizeof(prof_ctx_t *)));
- }
- } else
- ret = chunk->map[pageind].prof_ctx;
-
- return (ret);
-}
-
-void
-arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
-{
- arena_chunk_t *chunk;
- size_t pageind, mapbits;
-
- assert(ptr != NULL);
- assert(CHUNK_ADDR2BASE(ptr) != ptr);
-
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
- mapbits = chunk->map[pageind].bits;
- assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
- if ((mapbits & CHUNK_MAP_LARGE) == 0) {
- if (prof_promote == false) {
- arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
- (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
- PAGE_SHIFT));
- arena_bin_t *bin = run->bin;
- unsigned regind;
-
- assert(run->magic == ARENA_RUN_MAGIC);
- regind = arena_run_regind(run, bin, ptr, bin->reg_size);
-
- *((prof_ctx_t **)((uintptr_t)run + bin->ctx0_offset
- + (regind * sizeof(prof_ctx_t *)))) = ctx;
- } else
- assert((uintptr_t)ctx == (uintptr_t)1U);
- } else
- chunk->map[pageind].prof_ctx = ctx;
-}
#endif
static void
-arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
+arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run,
arena_bin_t *bin)
{
- size_t npages, run_ind, past;
/* Dissociate run from bin. */
if (run == bin->runcur)
@@ -1725,7 +1772,8 @@
else if (bin->nregs != 1) {
size_t run_pageind = (((uintptr_t)run - (uintptr_t)chunk)) >>
PAGE_SHIFT;
- arena_chunk_map_t *run_mapelm = &chunk->map[run_pageind];
+ arena_chunk_map_t *run_mapelm =
+ &chunk->map[run_pageind-map_bias];
/*
* This block's conditional is necessary because if the run
* only contains one region, then it never gets inserted into
@@ -1733,13 +1781,24 @@
*/
arena_run_tree_remove(&bin->runs, run_mapelm);
}
+}
+
+static void
+arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
+ arena_bin_t *bin)
+{
+ size_t npages, run_ind, past;
+
+ assert(run != bin->runcur);
+ assert(arena_run_tree_search(&bin->runs, &chunk->map[
+ (((uintptr_t)run-(uintptr_t)chunk)>>PAGE_SHIFT)-map_bias]) == NULL);
malloc_mutex_unlock(&bin->lock);
/******************************/
npages = bin->run_size >> PAGE_SHIFT;
run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT);
- past = (size_t)(((uintptr_t)run->next - (uintptr_t)1U -
- (uintptr_t)chunk) >> PAGE_SHIFT) + 1;
+ past = (size_t)((PAGE_CEILING((uintptr_t)run->next) - (uintptr_t)chunk)
+ >> PAGE_SHIFT);
malloc_mutex_lock(&arena->lock);
/*
@@ -1747,19 +1806,21 @@
* trim the clean pages before deallocating the dirty portion of the
* run.
*/
- if ((chunk->map[run_ind].bits & CHUNK_MAP_DIRTY) == 0 && past - run_ind
- < npages) {
+ if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) == 0 && past
+ - run_ind < npages) {
/*
* Trim clean pages. Convert to large run beforehand. Set the
* last map element first, in case this is a one-page run.
*/
- chunk->map[run_ind+npages-1].bits = CHUNK_MAP_LARGE |
- (chunk->map[run_ind].bits & CHUNK_MAP_FLAGS_MASK);
- chunk->map[run_ind].bits = bin->run_size | CHUNK_MAP_LARGE |
- (chunk->map[run_ind].bits & CHUNK_MAP_FLAGS_MASK);
+ chunk->map[run_ind+npages-1-map_bias].bits = CHUNK_MAP_LARGE |
+ (chunk->map[run_ind+npages-1-map_bias].bits &
+ CHUNK_MAP_FLAGS_MASK);
+ chunk->map[run_ind-map_bias].bits = bin->run_size |
+ CHUNK_MAP_LARGE | (chunk->map[run_ind-map_bias].bits &
+ CHUNK_MAP_FLAGS_MASK);
arena_run_trim_tail(arena, chunk, run, (npages << PAGE_SHIFT),
- ((npages - (past - run_ind)) << PAGE_SHIFT), false);
- npages = past - run_ind;
+ ((past - run_ind) << PAGE_SHIFT), false);
+ /* npages = past - run_ind; */
}
#ifdef JEMALLOC_DEBUG
run->magic = 0;
@@ -1773,6 +1834,42 @@
#endif
}
+static void
+arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
+ arena_bin_t *bin)
+{
+
+ /*
+ * Make sure that bin->runcur always refers to the lowest non-full run,
+ * if one exists.
+ */
+ if (bin->runcur == NULL)
+ bin->runcur = run;
+ else if ((uintptr_t)run < (uintptr_t)bin->runcur) {
+ /* Switch runcur. */
+ if (bin->runcur->nfree > 0) {
+ arena_chunk_t *runcur_chunk =
+ CHUNK_ADDR2BASE(bin->runcur);
+ size_t runcur_pageind = (((uintptr_t)bin->runcur -
+ (uintptr_t)runcur_chunk)) >> PAGE_SHIFT;
+ arena_chunk_map_t *runcur_mapelm =
+ &runcur_chunk->map[runcur_pageind-map_bias];
+
+ /* Insert runcur. */
+ arena_run_tree_insert(&bin->runs, runcur_mapelm);
+ }
+ bin->runcur = run;
+ } else {
+ size_t run_pageind = (((uintptr_t)run -
+ (uintptr_t)chunk)) >> PAGE_SHIFT;
+ arena_chunk_map_t *run_mapelm =
+ &chunk->map[run_pageind-map_bias];
+
+ assert(arena_run_tree_search(&bin->runs, run_mapelm) == NULL);
+ arena_run_tree_insert(&bin->runs, run_mapelm);
+ }
+}
+
void
arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
arena_chunk_map_t *mapelm)
@@ -1784,7 +1881,7 @@
size_t size;
#endif
- pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
+ pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
(mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT));
assert(run->magic == ARENA_RUN_MAGIC);
@@ -1799,43 +1896,11 @@
#endif
arena_run_reg_dalloc(run, ptr);
-
- if (run->nfree == bin->nregs)
+ if (run->nfree == bin->nregs) {
+ arena_dissociate_bin_run(chunk, run, bin);
arena_dalloc_bin_run(arena, chunk, run, bin);
- else if (run->nfree == 1 && run != bin->runcur) {
- /*
- * Make sure that bin->runcur always refers to the lowest
- * non-full run, if one exists.
- */
- if (bin->runcur == NULL)
- bin->runcur = run;
- else if ((uintptr_t)run < (uintptr_t)bin->runcur) {
- /* Switch runcur. */
- if (bin->runcur->nfree > 0) {
- arena_chunk_t *runcur_chunk =
- CHUNK_ADDR2BASE(bin->runcur);
- size_t runcur_pageind =
- (((uintptr_t)bin->runcur -
- (uintptr_t)runcur_chunk)) >> PAGE_SHIFT;
- arena_chunk_map_t *runcur_mapelm =
- &runcur_chunk->map[runcur_pageind];
-
- /* Insert runcur. */
- arena_run_tree_insert(&bin->runs,
- runcur_mapelm);
- }
- bin->runcur = run;
- } else {
- size_t run_pageind = (((uintptr_t)run -
- (uintptr_t)chunk)) >> PAGE_SHIFT;
- arena_chunk_map_t *run_mapelm =
- &chunk->map[run_pageind];
-
- assert(arena_run_tree_search(&bin->runs, run_mapelm) ==
- NULL);
- arena_run_tree_insert(&bin->runs, run_mapelm);
- }
- }
+ } else if (run->nfree == 1 && run != bin->runcur)
+ arena_bin_lower_run(arena, chunk, run, bin);
#ifdef JEMALLOC_STATS
bin->stats.allocated -= size;
@@ -1908,7 +1973,7 @@
#if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS))
size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >>
PAGE_SHIFT;
- size_t size = chunk->map[pageind].bits & ~PAGE_MASK;
+ size_t size = chunk->map[pageind-map_bias].bits & ~PAGE_MASK;
#endif
#ifdef JEMALLOC_FILL
@@ -1930,7 +1995,7 @@
static void
arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
- size_t size, size_t oldsize)
+ size_t oldsize, size_t size)
{
assert(size < oldsize);
@@ -1965,50 +2030,71 @@
static bool
arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
- size_t size, size_t oldsize)
+ size_t oldsize, size_t size, size_t extra, bool zero)
{
size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
size_t npages = oldsize >> PAGE_SHIFT;
+ size_t followsize;
- assert(oldsize == (chunk->map[pageind].bits & ~PAGE_MASK));
+ assert(oldsize == (chunk->map[pageind-map_bias].bits & ~PAGE_MASK));
/* Try to extend the run. */
- assert(size > oldsize);
+ assert(size + extra > oldsize);
malloc_mutex_lock(&arena->lock);
- if (pageind + npages < chunk_npages && (chunk->map[pageind+npages].bits
- & CHUNK_MAP_ALLOCATED) == 0 && (chunk->map[pageind+npages].bits &
- ~PAGE_MASK) >= size - oldsize) {
+ if (pageind + npages < chunk_npages &&
+ (chunk->map[pageind+npages-map_bias].bits
+ & CHUNK_MAP_ALLOCATED) == 0 && (followsize =
+ chunk->map[pageind+npages-map_bias].bits & ~PAGE_MASK) >= size -
+ oldsize) {
/*
* The next run is available and sufficiently large. Split the
* following run, then merge the first part with the existing
* allocation.
*/
+ size_t flag_dirty;
+ size_t splitsize = (oldsize + followsize <= size + extra)
+ ? followsize : size + extra - oldsize;
arena_run_split(arena, (arena_run_t *)((uintptr_t)chunk +
- ((pageind+npages) << PAGE_SHIFT)), size - oldsize, true,
- false);
+ ((pageind+npages) << PAGE_SHIFT)), splitsize, true, zero);
- chunk->map[pageind].bits = size | CHUNK_MAP_LARGE |
- CHUNK_MAP_ALLOCATED;
- chunk->map[pageind+npages].bits = CHUNK_MAP_LARGE |
- CHUNK_MAP_ALLOCATED;
+ size = oldsize + splitsize;
+ npages = size >> PAGE_SHIFT;
+
+ /*
+ * Mark the extended run as dirty if either portion of the run
+ * was dirty before allocation. This is rather pedantic,
+ * because there's not actually any sequence of events that
+ * could cause the resulting run to be passed to
+ * arena_run_dalloc() with the dirty argument set to false
+ * (which is when dirty flag consistency would really matter).
+ */
+ flag_dirty = (chunk->map[pageind-map_bias].bits &
+ CHUNK_MAP_DIRTY) |
+ (chunk->map[pageind+npages-1-map_bias].bits &
+ CHUNK_MAP_DIRTY);
+ chunk->map[pageind-map_bias].bits = size | flag_dirty
+ | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
+ chunk->map[pageind+npages-1-map_bias].bits = flag_dirty |
+ CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
#ifdef JEMALLOC_STATS
- arena->stats.ndalloc_large++;
- arena->stats.allocated_large -= oldsize;
- arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].ndalloc++;
- arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--;
+ arena->stats.ndalloc_large++;
+ arena->stats.allocated_large -= oldsize;
+ arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].ndalloc++;
+ arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--;
- arena->stats.nmalloc_large++;
- arena->stats.nrequests_large++;
- arena->stats.allocated_large += size;
- arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++;
- arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
- arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
- if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
- arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
- arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
- arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns;
- }
+ arena->stats.nmalloc_large++;
+ arena->stats.nrequests_large++;
+ arena->stats.allocated_large += size;
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++;
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
+ if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
+ arena->stats.lstats[(size >> PAGE_SHIFT) -
+ 1].curruns;
+ }
#endif
malloc_mutex_unlock(&arena->lock);
return (false);
@@ -2023,11 +2109,12 @@
* always fail if growing an object, and the following run is already in use.
*/
static bool
-arena_ralloc_large(void *ptr, size_t size, size_t oldsize)
+arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra,
+ bool zero)
{
size_t psize;
- psize = PAGE_CEILING(size);
+ psize = PAGE_CEILING(size + extra);
if (psize == oldsize) {
/* Same size class. */
#ifdef JEMALLOC_FILL
@@ -2053,14 +2140,15 @@
oldsize - size);
}
#endif
- arena_ralloc_large_shrink(arena, chunk, ptr, psize,
- oldsize);
+ arena_ralloc_large_shrink(arena, chunk, ptr, oldsize,
+ psize);
return (false);
} else {
bool ret = arena_ralloc_large_grow(arena, chunk, ptr,
- psize, oldsize);
+ oldsize, PAGE_CEILING(size),
+ psize - PAGE_CEILING(size), zero);
#ifdef JEMALLOC_FILL
- if (ret == false && opt_zero) {
+ if (ret == false && zero == false && opt_zero) {
memset((void *)((uintptr_t)ptr + oldsize), 0,
size - oldsize);
}
@@ -2071,49 +2159,89 @@
}
void *
-arena_ralloc(void *ptr, size_t size, size_t oldsize)
+arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra,
+ bool zero)
{
- void *ret;
- size_t copysize;
- /* Try to avoid moving the allocation. */
+ /*
+ * Avoid moving the allocation if the size class can be left the same.
+ */
if (oldsize <= arena_maxclass) {
if (oldsize <= small_maxclass) {
- if (size <= small_maxclass && small_size2bin[size] ==
- small_size2bin[oldsize])
- goto IN_PLACE;
+ assert(choose_arena()->bins[small_size2bin[
+ oldsize]].reg_size == oldsize);
+ if ((size + extra <= small_maxclass &&
+ small_size2bin[size + extra] ==
+ small_size2bin[oldsize]) || (size <= oldsize &&
+ size + extra >= oldsize)) {
+#ifdef JEMALLOC_FILL
+ if (opt_junk && size < oldsize) {
+ memset((void *)((uintptr_t)ptr + size),
+ 0x5a, oldsize - size);
+ }
+#endif
+ return (ptr);
+ }
} else {
assert(size <= arena_maxclass);
- if (size > small_maxclass) {
- if (arena_ralloc_large(ptr, size, oldsize) ==
- false)
+ if (size + extra > small_maxclass) {
+ if (arena_ralloc_large(ptr, oldsize, size,
+ extra, zero) == false)
return (ptr);
}
}
}
- /*
- * If we get here, then size and oldsize are different enough that we
- * need to move the object. In that case, fall back to allocating new
- * space and copying.
- */
- ret = arena_malloc(size, false);
- if (ret == NULL)
- return (NULL);
+ /* Reallocation would require a move. */
+ return (NULL);
+}
- /* Junk/zero-filling were already done by arena_malloc(). */
+void *
+arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
+ size_t alignment, bool zero)
+{
+ void *ret;
+ size_t copysize;
+
+ /* Try to avoid moving the allocation. */
+ ret = arena_ralloc_no_move(ptr, oldsize, size, extra, zero);
+ if (ret != NULL)
+ return (ret);
+
+
+ /*
+ * size and oldsize are different enough that we need to move the
+ * object. In that case, fall back to allocating new space and
+ * copying.
+ */
+ if (alignment != 0)
+ ret = ipalloc(size + extra, alignment, zero);
+ else
+ ret = arena_malloc(size + extra, zero);
+
+ if (ret == NULL) {
+ if (extra == 0)
+ return (NULL);
+ /* Try again, this time without extra. */
+ if (alignment != 0)
+ ret = ipalloc(size, alignment, zero);
+ else
+ ret = arena_malloc(size, zero);
+
+ if (ret == NULL)
+ return (NULL);
+ }
+
+ /* Junk/zero-filling were already done by ipalloc()/arena_malloc(). */
+
+ /*
+ * Copy at most size bytes (not size+extra), since the caller has no
+ * expectation that the extra bytes will be reliably preserved.
+ */
copysize = (size < oldsize) ? size : oldsize;
memcpy(ret, ptr, copysize);
idalloc(ptr);
return (ret);
-IN_PLACE:
-#ifdef JEMALLOC_FILL
- if (opt_junk && size < oldsize)
- memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size);
- else if (opt_zero && size > oldsize)
- memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize);
-#endif
- return (ptr);
}
bool
@@ -2239,26 +2367,6 @@
return (false);
}
-#ifdef JEMALLOC_TINY
-/* Compute the smallest power of 2 that is >= x. */
-static size_t
-pow2_ceil(size_t x)
-{
-
- x--;
- x |= x >> 1;
- x |= x >> 2;
- x |= x >> 4;
- x |= x >> 8;
- x |= x >> 16;
-#if (SIZEOF_PTR == 8)
- x |= x >> 32;
-#endif
- x++;
- return (x);
-}
-#endif
-
#ifdef JEMALLOC_DEBUG
static void
small_size2bin_validate(void)
@@ -2381,6 +2489,7 @@
arena_boot(void)
{
size_t header_size;
+ unsigned i;
/* Set variables according to the value of opt_lg_[qc]space_max. */
qspace_max = (1U << opt_lg_qspace_max);
@@ -2420,7 +2529,7 @@
if (nbins > 255) {
char line_buf[UMAX2S_BUFSIZE];
malloc_write("<jemalloc>: Too many small size classes (");
- malloc_write(umax2s(nbins, 10, line_buf));
+ malloc_write(u2s(nbins, 10, line_buf));
malloc_write(" > max 255)\n");
abort();
}
@@ -2429,7 +2538,7 @@
if (nbins > 256) {
char line_buf[UMAX2S_BUFSIZE];
malloc_write("<jemalloc>: Too many small size classes (");
- malloc_write(umax2s(nbins, 10, line_buf));
+ malloc_write(u2s(nbins, 10, line_buf));
malloc_write(" > max 256)\n");
abort();
}
@@ -2439,13 +2548,26 @@
/*
* Compute the header size such that it is large enough to contain the
- * page map.
+ * page map. The page map is biased to omit entries for the header
+ * itself, so some iteration is necessary to compute the map bias.
+ *
+ * 1) Compute safe header_size and map_bias values that include enough
+ * space for an unbiased page map.
+ * 2) Refine map_bias based on (1) to omit the header pages in the page
+ * map. The resulting map_bias may be one too small.
+ * 3) Refine map_bias based on (2). The result will be >= the result
+ * from (2), and will always be correct.
*/
- header_size = sizeof(arena_chunk_t) +
- (sizeof(arena_chunk_map_t) * (chunk_npages - 1));
- arena_chunk_header_npages = (header_size >> PAGE_SHIFT) +
- ((header_size & PAGE_MASK) != 0);
- arena_maxclass = chunksize - (arena_chunk_header_npages << PAGE_SHIFT);
+ map_bias = 0;
+ for (i = 0; i < 3; i++) {
+ header_size = offsetof(arena_chunk_t, map)
+ + (sizeof(arena_chunk_map_t) * (chunk_npages-map_bias));
+ map_bias = (header_size >> PAGE_SHIFT) + ((header_size &
+ PAGE_MASK) != 0);
+ }
+ assert(map_bias > 0);
+
+ arena_maxclass = chunksize - (map_bias << PAGE_SHIFT);
return (false);
}
diff --git a/jemalloc/src/base.c b/jemalloc/src/base.c
index 605197e..cc85e84 100644
--- a/jemalloc/src/base.c
+++ b/jemalloc/src/base.c
@@ -32,7 +32,7 @@
assert(minsize != 0);
csize = CHUNK_CEILING(minsize);
zero = false;
- base_pages = chunk_alloc(csize, &zero);
+ base_pages = chunk_alloc(csize, true, &zero);
if (base_pages == NULL)
return (true);
base_next_addr = base_pages;
diff --git a/jemalloc/src/chunk.c b/jemalloc/src/chunk.c
index e6e3bcd..00bf50a 100644
--- a/jemalloc/src/chunk.c
+++ b/jemalloc/src/chunk.c
@@ -14,11 +14,15 @@
chunk_stats_t stats_chunks;
#endif
+#ifdef JEMALLOC_IVSALLOC
+rtree_t *chunks_rtree;
+#endif
+
/* Various chunk-related settings. */
size_t chunksize;
size_t chunksize_mask; /* (chunksize - 1). */
size_t chunk_npages;
-size_t arena_chunk_header_npages;
+size_t map_bias;
size_t arena_maxclass; /* Max size class for arenas. */
/******************************************************************************/
@@ -30,7 +34,7 @@
* advantage of them if they are returned.
*/
void *
-chunk_alloc(size_t size, bool *zero)
+chunk_alloc(size_t size, bool base, bool *zero)
{
void *ret;
@@ -63,10 +67,18 @@
/* All strategies for allocation failed. */
ret = NULL;
RETURN:
+#ifdef JEMALLOC_IVSALLOC
+ if (base == false && ret != NULL) {
+ if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) {
+ chunk_dealloc(ret, size);
+ return (NULL);
+ }
+ }
+#endif
#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
if (ret != NULL) {
# ifdef JEMALLOC_PROF
- bool udump;
+ bool gdump;
# endif
malloc_mutex_lock(&chunks_mtx);
# ifdef JEMALLOC_STATS
@@ -76,17 +88,17 @@
if (stats_chunks.curchunks > stats_chunks.highchunks) {
stats_chunks.highchunks = stats_chunks.curchunks;
# ifdef JEMALLOC_PROF
- udump = true;
+ gdump = true;
# endif
}
# ifdef JEMALLOC_PROF
else
- udump = false;
+ gdump = false;
# endif
malloc_mutex_unlock(&chunks_mtx);
# ifdef JEMALLOC_PROF
- if (opt_prof && opt_prof_udump && udump)
- prof_udump();
+ if (opt_prof && opt_prof_gdump && gdump)
+ prof_gdump();
# endif
}
#endif
@@ -104,6 +116,9 @@
assert(size != 0);
assert((size & chunksize_mask) == 0);
+#ifdef JEMALLOC_IVSALLOC
+ rtree_set(chunks_rtree, (uintptr_t)chunk, NULL);
+#endif
#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
malloc_mutex_lock(&chunks_mtx);
stats_chunks.curchunks -= (size / chunksize);
@@ -126,21 +141,27 @@
{
/* Set variables according to the value of opt_lg_chunk. */
- chunksize = (1LU << opt_lg_chunk);
+ chunksize = (ZU(1) << opt_lg_chunk);
assert(chunksize >= PAGE_SIZE);
chunksize_mask = chunksize - 1;
chunk_npages = (chunksize >> PAGE_SHIFT);
+#ifdef JEMALLOC_IVSALLOC
+ chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) - opt_lg_chunk);
+ if (chunks_rtree == NULL)
+ return (true);
+#endif
#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
if (malloc_mutex_init(&chunks_mtx))
return (true);
memset(&stats_chunks, 0, sizeof(chunk_stats_t));
#endif
-
#ifdef JEMALLOC_SWAP
if (chunk_swap_boot())
return (true);
#endif
+ if (chunk_mmap_boot())
+ return (true);
#ifdef JEMALLOC_DSS
if (chunk_dss_boot())
return (true);
diff --git a/jemalloc/src/chunk_mmap.c b/jemalloc/src/chunk_mmap.c
index d9f9e86..bc36755 100644
--- a/jemalloc/src/chunk_mmap.c
+++ b/jemalloc/src/chunk_mmap.c
@@ -6,26 +6,30 @@
/*
* Used by chunk_alloc_mmap() to decide whether to attempt the fast path and
- * potentially avoid some system calls. We can get away without TLS here,
- * since the state of mmap_unaligned only affects performance, rather than
- * correct function.
+ * potentially avoid some system calls.
*/
-static
#ifndef NO_TLS
- __thread
+static __thread bool mmap_unaligned_tls
+ JEMALLOC_ATTR(tls_model("initial-exec"));
+#define MMAP_UNALIGNED_GET() mmap_unaligned_tls
+#define MMAP_UNALIGNED_SET(v) do { \
+ mmap_unaligned_tls = (v); \
+} while (0)
+#else
+static pthread_key_t mmap_unaligned_tsd;
+#define MMAP_UNALIGNED_GET() ((bool)pthread_getspecific(mmap_unaligned_tsd))
+#define MMAP_UNALIGNED_SET(v) do { \
+ pthread_setspecific(mmap_unaligned_tsd, (void *)(v)); \
+} while (0)
#endif
- bool mmap_unaligned
-#ifndef NO_TLS
- JEMALLOC_ATTR(tls_model("initial-exec"))
-#endif
- ;
/******************************************************************************/
/* Function prototypes for non-inline static functions. */
static void *pages_map(void *addr, size_t size, bool noreserve);
static void pages_unmap(void *addr, size_t size);
-static void *chunk_alloc_mmap_slow(size_t size, bool unaligned, bool noreserve);
+static void *chunk_alloc_mmap_slow(size_t size, bool unaligned,
+ bool noreserve);
static void *chunk_alloc_mmap_internal(size_t size, bool noreserve);
/******************************************************************************/
@@ -54,9 +58,9 @@
* We succeeded in mapping memory, but not in the right place.
*/
if (munmap(ret, size) == -1) {
- char buf[STRERROR_BUF];
+ char buf[BUFERROR_BUF];
- strerror_r(errno, buf, sizeof(buf));
+ buferror(errno, buf, sizeof(buf));
malloc_write("<jemalloc>: Error in munmap(): ");
malloc_write(buf);
malloc_write("\n");
@@ -76,9 +80,9 @@
{
if (munmap(addr, size) == -1) {
- char buf[STRERROR_BUF];
+ char buf[BUFERROR_BUF];
- strerror_r(errno, buf, sizeof(buf));
+ buferror(errno, buf, sizeof(buf));
malloc_write("<jemalloc>: Error in munmap(): ");
malloc_write(buf);
malloc_write("\n");
@@ -128,7 +132,7 @@
* method.
*/
if (unaligned == false)
- mmap_unaligned = false;
+ MMAP_UNALIGNED_SET(false);
return (ret);
}
@@ -166,7 +170,7 @@
* fast method next time.
*/
- if (mmap_unaligned == false) {
+ if (MMAP_UNALIGNED_GET() == false) {
size_t offset;
ret = pages_map(NULL, size, noreserve);
@@ -175,7 +179,7 @@
offset = CHUNK_ADDR2OFFSET(ret);
if (offset != 0) {
- mmap_unaligned = true;
+ MMAP_UNALIGNED_SET(true);
/* Try to extend chunk boundary. */
if (pages_map((void *)((uintptr_t)ret + size),
chunksize - offset, noreserve) == NULL) {
@@ -184,7 +188,8 @@
* the reliable-but-expensive method.
*/
pages_unmap(ret, size);
- ret = chunk_alloc_mmap_slow(size, true, noreserve);
+ ret = chunk_alloc_mmap_slow(size, true,
+ noreserve);
} else {
/* Clean up unneeded leading space. */
pages_unmap(ret, chunksize - offset);
@@ -216,3 +221,17 @@
pages_unmap(chunk, size);
}
+
+bool
+chunk_mmap_boot(void)
+{
+
+#ifdef NO_TLS
+ if (pthread_key_create(&mmap_unaligned_tsd, NULL) != 0) {
+ malloc_write("<jemalloc>: Error in pthread_key_create()\n");
+ return (true);
+ }
+#endif
+
+ return (false);
+}
diff --git a/jemalloc/src/chunk_swap.c b/jemalloc/src/chunk_swap.c
index ed9e414..ee038ba 100644
--- a/jemalloc/src/chunk_swap.c
+++ b/jemalloc/src/chunk_swap.c
@@ -294,9 +294,10 @@
void *addr = mmap((void *)((uintptr_t)vaddr + voff), sizes[i],
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fds[i], 0);
if (addr == MAP_FAILED) {
- char buf[STRERROR_BUF];
+ char buf[BUFERROR_BUF];
- strerror_r(errno, buf, sizeof(buf));
+
+ buferror(errno, buf, sizeof(buf));
malloc_write(
"<jemalloc>: Error in mmap(..., MAP_FIXED, ...): ");
malloc_write(buf);
@@ -304,7 +305,7 @@
if (opt_abort)
abort();
if (munmap(vaddr, voff) == -1) {
- strerror_r(errno, buf, sizeof(buf));
+ buferror(errno, buf, sizeof(buf));
malloc_write("<jemalloc>: Error in munmap(): ");
malloc_write(buf);
malloc_write("\n");
diff --git a/jemalloc/src/ckh.c b/jemalloc/src/ckh.c
index a0c4162..682a8db 100644
--- a/jemalloc/src/ckh.c
+++ b/jemalloc/src/ckh.c
@@ -263,13 +263,12 @@
lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS;
while (true) {
lg_curcells++;
- tab = (ckhc_t *) ipalloc((ZU(1) << LG_CACHELINE),
- sizeof(ckhc_t) << lg_curcells);
+ tab = (ckhc_t *)ipalloc(sizeof(ckhc_t) << lg_curcells,
+ ZU(1) << LG_CACHELINE, true);
if (tab == NULL) {
ret = true;
goto RETURN;
}
- memset(tab, 0, sizeof(ckhc_t) << lg_curcells);
/* Swap in new table. */
ttab = ckh->tab;
ckh->tab = tab;
@@ -305,8 +304,8 @@
*/
lg_prevbuckets = ckh->lg_curbuckets;
lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1;
- tab = (ckhc_t *)ipalloc((ZU(1) << LG_CACHELINE),
- sizeof(ckhc_t) << lg_curcells);
+ tab = (ckhc_t *)ipalloc(sizeof(ckhc_t) << lg_curcells,
+ ZU(1) << LG_CACHELINE, true);
if (tab == NULL) {
/*
* An OOM error isn't worth propagating, since it doesn't
@@ -314,7 +313,6 @@
*/
return;
}
- memset(tab, 0, sizeof(ckhc_t) << lg_curcells);
/* Swap in new table. */
ttab = ckh->tab;
ckh->tab = tab;
@@ -377,13 +375,12 @@
ckh->hash = hash;
ckh->keycomp = keycomp;
- ckh->tab = (ckhc_t *)ipalloc((ZU(1) << LG_CACHELINE),
- sizeof(ckhc_t) << lg_mincells);
+ ckh->tab = (ckhc_t *)ipalloc(sizeof(ckhc_t) << lg_mincells,
+ (ZU(1) << LG_CACHELINE), true);
if (ckh->tab == NULL) {
ret = true;
goto RETURN;
}
- memset(ckh->tab, 0, sizeof(ckhc_t) << lg_mincells);
#ifdef JEMALLOC_DEBUG
ckh->magic = CKH_MAGIG;
@@ -570,12 +567,21 @@
{
size_t ret1, ret2;
uint64_t h;
+ union {
+ const void *v;
+ uint64_t i;
+ } u;
assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64));
assert(hash1 != NULL);
assert(hash2 != NULL);
- h = hash(&key, sizeof(void *), 0xd983396e68886082LLU);
+ assert(sizeof(u.v) == sizeof(u.i));
+#if (LG_SIZEOF_PTR != LG_SIZEOF_INT)
+ u.i = 0;
+#endif
+ u.v = key;
+ h = hash(&u.i, sizeof(u.i), 0xd983396e68886082LLU);
if (minbits <= 32) {
/*
* Avoid doing multiple hashes, since a single hash provides
@@ -586,7 +592,7 @@
} else {
assert(SIZEOF_PTR == 8);
ret1 = h;
- ret2 = hash(&key, sizeof(void *), 0x5e2be9aff8709a5dLLU);
+ ret2 = hash(&u.i, sizeof(u.i), 0x5e2be9aff8709a5dLLU);
}
*hash1 = ret1;
diff --git a/jemalloc/src/ctl.c b/jemalloc/src/ctl.c
index ffb732d..c83ee4f 100644
--- a/jemalloc/src/ctl.c
+++ b/jemalloc/src/ctl.c
@@ -41,6 +41,11 @@
#ifdef JEMALLOC_TCACHE
CTL_PROTO(tcache_flush)
#endif
+CTL_PROTO(thread_arena)
+#ifdef JEMALLOC_STATS
+CTL_PROTO(thread_allocated)
+CTL_PROTO(thread_deallocated)
+#endif
CTL_PROTO(config_debug)
CTL_PROTO(config_dss)
CTL_PROTO(config_dynamic_page_shift)
@@ -57,8 +62,15 @@
CTL_PROTO(config_tls)
CTL_PROTO(config_xmalloc)
CTL_PROTO(opt_abort)
+CTL_PROTO(opt_lg_qspace_max)
+CTL_PROTO(opt_lg_cspace_max)
+CTL_PROTO(opt_lg_chunk)
+CTL_PROTO(opt_narenas)
+CTL_PROTO(opt_lg_dirty_mult)
+CTL_PROTO(opt_stats_print)
#ifdef JEMALLOC_FILL
CTL_PROTO(opt_junk)
+CTL_PROTO(opt_zero)
#endif
#ifdef JEMALLOC_SYSV
CTL_PROTO(opt_sysv)
@@ -66,27 +78,22 @@
#ifdef JEMALLOC_XMALLOC
CTL_PROTO(opt_xmalloc)
#endif
-#ifdef JEMALLOC_ZERO
-CTL_PROTO(opt_zero)
-#endif
#ifdef JEMALLOC_TCACHE
CTL_PROTO(opt_tcache)
CTL_PROTO(opt_lg_tcache_gc_sweep)
#endif
#ifdef JEMALLOC_PROF
CTL_PROTO(opt_prof)
+CTL_PROTO(opt_prof_prefix)
CTL_PROTO(opt_prof_active)
CTL_PROTO(opt_lg_prof_bt_max)
CTL_PROTO(opt_lg_prof_sample)
CTL_PROTO(opt_lg_prof_interval)
-CTL_PROTO(opt_prof_udump)
+CTL_PROTO(opt_prof_gdump)
CTL_PROTO(opt_prof_leak)
+CTL_PROTO(opt_prof_accum)
+CTL_PROTO(opt_lg_prof_tcmax)
#endif
-CTL_PROTO(opt_stats_print)
-CTL_PROTO(opt_lg_qspace_max)
-CTL_PROTO(opt_lg_cspace_max)
-CTL_PROTO(opt_lg_dirty_mult)
-CTL_PROTO(opt_lg_chunk)
#ifdef JEMALLOC_SWAP
CTL_PROTO(opt_overcommit)
#endif
@@ -125,6 +132,7 @@
CTL_PROTO(arenas_nhbins)
#endif
CTL_PROTO(arenas_nlruns)
+CTL_PROTO(arenas_purge)
#ifdef JEMALLOC_PROF
CTL_PROTO(prof_active)
CTL_PROTO(prof_dump)
@@ -210,6 +218,15 @@
};
#endif
+static const ctl_node_t thread_node[] = {
+ {NAME("arena"), CTL(thread_arena)}
+#ifdef JEMALLOC_STATS
+ ,
+ {NAME("allocated"), CTL(thread_allocated)},
+ {NAME("deallocated"), CTL(thread_deallocated)}
+#endif
+};
+
static const ctl_node_t config_node[] = {
{NAME("debug"), CTL(config_debug)},
{NAME("dss"), CTL(config_dss)},
@@ -230,36 +247,43 @@
static const ctl_node_t opt_node[] = {
{NAME("abort"), CTL(opt_abort)},
+ {NAME("lg_qspace_max"), CTL(opt_lg_qspace_max)},
+ {NAME("lg_cspace_max"), CTL(opt_lg_cspace_max)},
+ {NAME("lg_chunk"), CTL(opt_lg_chunk)},
+ {NAME("narenas"), CTL(opt_narenas)},
+ {NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)},
+ {NAME("stats_print"), CTL(opt_stats_print)}
#ifdef JEMALLOC_FILL
+ ,
{NAME("junk"), CTL(opt_junk)},
+ {NAME("zero"), CTL(opt_zero)}
#endif
#ifdef JEMALLOC_SYSV
- {NAME("sysv"), CTL(opt_sysv)},
+ ,
+ {NAME("sysv"), CTL(opt_sysv)}
#endif
#ifdef JEMALLOC_XMALLOC
- {NAME("xmalloc"), CTL(opt_xmalloc)},
-#endif
-#ifdef JEMALLOC_ZERO
- {NAME("zero"), CTL(opt_zero)},
+ ,
+ {NAME("xmalloc"), CTL(opt_xmalloc)}
#endif
#ifdef JEMALLOC_TCACHE
+ ,
{NAME("tcache"), CTL(opt_tcache)},
- {NAME("lg_tcache_gc_sweep"), CTL(opt_lg_tcache_gc_sweep)},
+ {NAME("lg_tcache_gc_sweep"), CTL(opt_lg_tcache_gc_sweep)}
#endif
#ifdef JEMALLOC_PROF
+ ,
{NAME("prof"), CTL(opt_prof)},
+ {NAME("prof_prefix"), CTL(opt_prof_prefix)},
{NAME("prof_active"), CTL(opt_prof_active)},
{NAME("lg_prof_bt_max"), CTL(opt_lg_prof_bt_max)},
{NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)},
{NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)},
- {NAME("prof_udump"), CTL(opt_prof_udump)},
+ {NAME("prof_gdump"), CTL(opt_prof_gdump)},
{NAME("prof_leak"), CTL(opt_prof_leak)},
+ {NAME("prof_accum"), CTL(opt_prof_accum)},
+ {NAME("lg_prof_tcmax"), CTL(opt_lg_prof_tcmax)}
#endif
- {NAME("stats_print"), CTL(opt_stats_print)},
- {NAME("lg_qspace_max"), CTL(opt_lg_qspace_max)},
- {NAME("lg_cspace_max"), CTL(opt_lg_cspace_max)},
- {NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)},
- {NAME("lg_chunk"), CTL(opt_lg_chunk)}
#ifdef JEMALLOC_SWAP
,
{NAME("overcommit"), CTL(opt_overcommit)}
@@ -321,7 +345,8 @@
#endif
{NAME("bin"), CHILD(arenas_bin)},
{NAME("nlruns"), CTL(arenas_nlruns)},
- {NAME("lrun"), CHILD(arenas_lrun)}
+ {NAME("lrun"), CHILD(arenas_lrun)},
+ {NAME("purge"), CTL(arenas_purge)}
};
#ifdef JEMALLOC_PROF
@@ -448,6 +473,7 @@
#ifdef JEMALLOC_TCACHE
{NAME("tcache"), CHILD(tcache)},
#endif
+ {NAME("thread"), CHILD(thread)},
{NAME("config"), CHILD(config)},
{NAME("opt"), CHILD(opt)},
{NAME("arenas"), CHILD(arenas)},
@@ -1028,13 +1054,13 @@
VOID();
- tcache = tcache_tls;
+ tcache = TCACHE_GET();
if (tcache == NULL) {
ret = 0;
goto RETURN;
}
tcache_destroy(tcache);
- tcache_tls = NULL;
+ TCACHE_SET(NULL);
ret = 0;
RETURN:
@@ -1042,6 +1068,49 @@
}
#endif
+static int
+thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
+ void *newp, size_t newlen)
+{
+ int ret;
+ unsigned newind, oldind;
+
+ newind = oldind = choose_arena()->ind;
+ WRITE(oldind, unsigned);
+ READ(newind, unsigned);
+ if (newind != oldind) {
+ arena_t *arena;
+
+ if (newind >= narenas) {
+ /* New arena index is out of range. */
+ ret = EFAULT;
+ goto RETURN;
+ }
+
+ /* Initialize arena if necessary. */
+ malloc_mutex_lock(&arenas_lock);
+ if ((arena = arenas[newind]) == NULL)
+ arena = arenas_extend(newind);
+ malloc_mutex_unlock(&arenas_lock);
+ if (arena == NULL) {
+ ret = EAGAIN;
+ goto RETURN;
+ }
+
+ /* Set new arena association. */
+ ARENA_SET(arena);
+ }
+
+ ret = 0;
+RETURN:
+ return (ret);
+}
+
+#ifdef JEMALLOC_STATS
+CTL_RO_GEN(thread_allocated, ALLOCATED_GET(), uint64_t);
+CTL_RO_GEN(thread_deallocated, DEALLOCATED_GET(), uint64_t);
+#endif
+
/******************************************************************************/
#ifdef JEMALLOC_DEBUG
@@ -1137,8 +1206,15 @@
/******************************************************************************/
CTL_RO_GEN(opt_abort, opt_abort, bool)
+CTL_RO_GEN(opt_lg_qspace_max, opt_lg_qspace_max, size_t)
+CTL_RO_GEN(opt_lg_cspace_max, opt_lg_cspace_max, size_t)
+CTL_RO_GEN(opt_lg_chunk, opt_lg_chunk, size_t)
+CTL_RO_GEN(opt_narenas, opt_narenas, size_t)
+CTL_RO_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t)
+CTL_RO_GEN(opt_stats_print, opt_stats_print, bool)
#ifdef JEMALLOC_FILL
CTL_RO_GEN(opt_junk, opt_junk, bool)
+CTL_RO_GEN(opt_zero, opt_zero, bool)
#endif
#ifdef JEMALLOC_SYSV
CTL_RO_GEN(opt_sysv, opt_sysv, bool)
@@ -1146,27 +1222,22 @@
#ifdef JEMALLOC_XMALLOC
CTL_RO_GEN(opt_xmalloc, opt_xmalloc, bool)
#endif
-#ifdef JEMALLOC_ZERO
-CTL_RO_GEN(opt_zero, opt_zero, bool)
-#endif
#ifdef JEMALLOC_TCACHE
CTL_RO_GEN(opt_tcache, opt_tcache, bool)
CTL_RO_GEN(opt_lg_tcache_gc_sweep, opt_lg_tcache_gc_sweep, ssize_t)
#endif
#ifdef JEMALLOC_PROF
CTL_RO_GEN(opt_prof, opt_prof, bool)
+CTL_RO_GEN(opt_prof_prefix, opt_prof_prefix, const char *)
CTL_RO_GEN(opt_prof_active, opt_prof_active, bool)
CTL_RO_GEN(opt_lg_prof_bt_max, opt_lg_prof_bt_max, size_t)
CTL_RO_GEN(opt_lg_prof_sample, opt_lg_prof_sample, size_t)
CTL_RO_GEN(opt_lg_prof_interval, opt_lg_prof_interval, ssize_t)
-CTL_RO_GEN(opt_prof_udump, opt_prof_udump, bool)
+CTL_RO_GEN(opt_prof_gdump, opt_prof_gdump, bool)
CTL_RO_GEN(opt_prof_leak, opt_prof_leak, bool)
+CTL_RO_GEN(opt_prof_accum, opt_prof_accum, bool)
+CTL_RO_GEN(opt_lg_prof_tcmax, opt_lg_prof_tcmax, ssize_t)
#endif
-CTL_RO_GEN(opt_stats_print, opt_stats_print, bool)
-CTL_RO_GEN(opt_lg_qspace_max, opt_lg_qspace_max, size_t)
-CTL_RO_GEN(opt_lg_cspace_max, opt_lg_cspace_max, size_t)
-CTL_RO_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t)
-CTL_RO_GEN(opt_lg_chunk, opt_lg_chunk, size_t)
#ifdef JEMALLOC_SWAP
CTL_RO_GEN(opt_overcommit, opt_overcommit, bool)
#endif
@@ -1249,6 +1320,44 @@
#endif
CTL_RO_GEN(arenas_nlruns, nlclasses, size_t)
+static int
+arenas_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
+ void *newp, size_t newlen)
+{
+ int ret;
+ unsigned arena;
+
+ WRITEONLY();
+ arena = UINT_MAX;
+ WRITE(arena, unsigned);
+ if (newp != NULL && arena >= narenas) {
+ ret = EFAULT;
+ goto RETURN;
+ } else {
+ arena_t *tarenas[narenas];
+
+ malloc_mutex_lock(&arenas_lock);
+ memcpy(tarenas, arenas, sizeof(arena_t *) * narenas);
+ malloc_mutex_unlock(&arenas_lock);
+
+ if (arena == UINT_MAX) {
+ unsigned i;
+ for (i = 0; i < narenas; i++) {
+ if (tarenas[i] != NULL)
+ arena_purge_all(tarenas[i]);
+ }
+ } else {
+ assert(arena < narenas);
+ if (tarenas[arena] != NULL)
+ arena_purge_all(tarenas[arena]);
+ }
+ }
+
+ ret = 0;
+RETURN:
+ return (ret);
+}
+
/******************************************************************************/
#ifdef JEMALLOC_PROF
diff --git a/jemalloc/src/huge.c b/jemalloc/src/huge.c
index 49962ea..a035197 100644
--- a/jemalloc/src/huge.c
+++ b/jemalloc/src/huge.c
@@ -37,7 +37,7 @@
if (node == NULL)
return (NULL);
- ret = chunk_alloc(csize, &zero);
+ ret = chunk_alloc(csize, false, &zero);
if (ret == NULL) {
base_node_dealloc(node);
return (NULL);
@@ -69,12 +69,11 @@
/* Only handles large allocations that require more than chunk alignment. */
void *
-huge_palloc(size_t alignment, size_t size)
+huge_palloc(size_t size, size_t alignment, bool zero)
{
void *ret;
size_t alloc_size, chunk_size, offset;
extent_node_t *node;
- bool zero;
/*
* This allocation requires alignment that is even larger than chunk
@@ -98,8 +97,7 @@
if (node == NULL)
return (NULL);
- zero = false;
- ret = chunk_alloc(alloc_size, &zero);
+ ret = chunk_alloc(alloc_size, false, &zero);
if (ret == NULL) {
base_node_dealloc(node);
return (NULL);
@@ -142,45 +140,80 @@
malloc_mutex_unlock(&huge_mtx);
#ifdef JEMALLOC_FILL
- if (opt_junk)
- memset(ret, 0xa5, chunk_size);
- else if (opt_zero)
- memset(ret, 0, chunk_size);
+ if (zero == false) {
+ if (opt_junk)
+ memset(ret, 0xa5, chunk_size);
+ else if (opt_zero)
+ memset(ret, 0, chunk_size);
+ }
#endif
return (ret);
}
void *
-huge_ralloc(void *ptr, size_t size, size_t oldsize)
+huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra)
{
- void *ret;
- size_t copysize;
- /* Avoid moving the allocation if the size class would not change. */
- if (oldsize > arena_maxclass &&
- CHUNK_CEILING(size) == CHUNK_CEILING(oldsize)) {
+ /*
+ * Avoid moving the allocation if the size class can be left the same.
+ */
+ if (oldsize > arena_maxclass
+ && CHUNK_CEILING(oldsize) >= CHUNK_CEILING(size)
+ && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(size+extra)) {
+ assert(CHUNK_CEILING(oldsize) == oldsize);
#ifdef JEMALLOC_FILL
if (opt_junk && size < oldsize) {
- memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize
- - size);
- } else if (opt_zero && size > oldsize) {
- memset((void *)((uintptr_t)ptr + oldsize), 0, size
- - oldsize);
+ memset((void *)((uintptr_t)ptr + size), 0x5a,
+ oldsize - size);
}
#endif
return (ptr);
}
- /*
- * If we get here, then size and oldsize are different enough that we
- * need to use a different size class. In that case, fall back to
- * allocating new space and copying.
- */
- ret = huge_malloc(size, false);
- if (ret == NULL)
- return (NULL);
+ /* Reallocation would require a move. */
+ return (NULL);
+}
+void *
+huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
+ size_t alignment, bool zero)
+{
+ void *ret;
+ size_t copysize;
+
+ /* Try to avoid moving the allocation. */
+ ret = huge_ralloc_no_move(ptr, oldsize, size, extra);
+ if (ret != NULL)
+ return (ret);
+
+ /*
+ * size and oldsize are different enough that we need to use a
+ * different size class. In that case, fall back to allocating new
+ * space and copying.
+ */
+ if (alignment != 0)
+ ret = huge_palloc(size + extra, alignment, zero);
+ else
+ ret = huge_malloc(size + extra, zero);
+
+ if (ret == NULL) {
+ if (extra == 0)
+ return (NULL);
+ /* Try again, this time without extra. */
+ if (alignment != 0)
+ ret = huge_palloc(size, alignment, zero);
+ else
+ ret = huge_malloc(size, zero);
+
+ if (ret == NULL)
+ return (NULL);
+ }
+
+ /*
+ * Copy at most size bytes (not size+extra), since the caller has no
+ * expectation that the extra bytes will be reliably preserved.
+ */
copysize = (size < oldsize) ? size : oldsize;
memcpy(ret, ptr, copysize);
idalloc(ptr);
diff --git a/jemalloc/src/jemalloc.c b/jemalloc/src/jemalloc.c
index b36590d..2aebc51 100644
--- a/jemalloc/src/jemalloc.c
+++ b/jemalloc/src/jemalloc.c
@@ -1,85 +1,3 @@
-/*-
- * This allocator implementation is designed to provide scalable performance
- * for multi-threaded programs on multi-processor systems. The following
- * features are included for this purpose:
- *
- * + Multiple arenas are used if there are multiple CPUs, which reduces lock
- * contention and cache sloshing.
- *
- * + Thread-specific caching is used if there are multiple threads, which
- * reduces the amount of locking.
- *
- * + Cache line sharing between arenas is avoided for internal data
- * structures.
- *
- * + Memory is managed in chunks and runs (chunks can be split into runs),
- * rather than as individual pages. This provides a constant-time
- * mechanism for associating allocations with particular arenas.
- *
- * Allocation requests are rounded up to the nearest size class, and no record
- * of the original request size is maintained. Allocations are broken into
- * categories according to size class. Assuming 1 MiB chunks, 4 KiB pages and
- * a 16 byte quantum on a 32-bit system, the size classes in each category are
- * as follows:
- *
- * |========================================|
- * | Category | Subcategory | Size |
- * |========================================|
- * | Small | Tiny | 2 |
- * | | | 4 |
- * | | | 8 |
- * | |------------------+----------|
- * | | Quantum-spaced | 16 |
- * | | | 32 |
- * | | | 48 |
- * | | | ... |
- * | | | 96 |
- * | | | 112 |
- * | | | 128 |
- * | |------------------+----------|
- * | | Cacheline-spaced | 192 |
- * | | | 256 |
- * | | | 320 |
- * | | | 384 |
- * | | | 448 |
- * | | | 512 |
- * | |------------------+----------|
- * | | Sub-page | 760 |
- * | | | 1024 |
- * | | | 1280 |
- * | | | ... |
- * | | | 3328 |
- * | | | 3584 |
- * | | | 3840 |
- * |========================================|
- * | Large | 4 KiB |
- * | | 8 KiB |
- * | | 12 KiB |
- * | | ... |
- * | | 1012 KiB |
- * | | 1016 KiB |
- * | | 1020 KiB |
- * |========================================|
- * | Huge | 1 MiB |
- * | | 2 MiB |
- * | | 3 MiB |
- * | | ... |
- * |========================================|
- *
- * Different mechanisms are used accoding to category:
- *
- * Small: Each size class is segregated into its own set of runs. Each run
- * maintains a bitmap of which regions are free/allocated.
- *
- * Large : Each allocation is backed by a dedicated run. Metadata are stored
- * in the associated arena chunk header maps.
- *
- * Huge : Each allocation is backed by a dedicated contiguous set of chunks.
- * Metadata are stored in a separate red-black tree.
- *
- *******************************************************************************
- */
-
#define JEMALLOC_C_
#include "jemalloc/internal/jemalloc_internal.h"
@@ -89,22 +7,30 @@
malloc_mutex_t arenas_lock;
arena_t **arenas;
unsigned narenas;
-#ifndef NO_TLS
static unsigned next_arena;
-#endif
#ifndef NO_TLS
-__thread arena_t *arenas_map JEMALLOC_ATTR(tls_model("initial-exec"));
+__thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec"));
+#else
+pthread_key_t arenas_tsd;
+#endif
+
+#ifdef JEMALLOC_STATS
+# ifndef NO_TLS
+__thread thread_allocated_t thread_allocated_tls;
+# else
+pthread_key_t thread_allocated_tsd;
+# endif
#endif
/* Set to true once the allocator has been initialized. */
-static bool malloc_initialized = false;
+static bool malloc_initialized = false;
/* Used to let the initializing thread recursively allocate. */
-static pthread_t malloc_initializer = (unsigned long)0;
+static pthread_t malloc_initializer = (unsigned long)0;
/* Used to avoid initialization races. */
-static malloc_mutex_t init_lock = PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP;
+static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER;
#ifdef DYNAMIC_PAGE_SHIFT
size_t pagesize;
@@ -115,8 +41,7 @@
unsigned ncpus;
/* Runtime configuration options. */
-const char *JEMALLOC_P(malloc_options)
- JEMALLOC_ATTR(visibility("default"));
+const char *JEMALLOC_P(malloc_conf) JEMALLOC_ATTR(visibility("default"));
#ifdef JEMALLOC_DEBUG
bool opt_abort = true;
# ifdef JEMALLOC_FILL
@@ -137,7 +62,7 @@
#ifdef JEMALLOC_FILL
bool opt_zero = false;
#endif
-static int opt_narenas_lshift = 0;
+size_t opt_narenas = 0;
/******************************************************************************/
/* Function prototypes for non-inline static functions. */
@@ -145,9 +70,15 @@
static void wrtmessage(void *cbopaque, const char *s);
static void stats_print_atexit(void);
static unsigned malloc_ncpus(void);
+#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
+static void thread_allocated_cleanup(void *arg);
+#endif
+static bool malloc_conf_next(char const **opts_p, char const **k_p,
+ size_t *klen_p, char const **v_p, size_t *vlen_p);
+static void malloc_conf_error(const char *msg, const char *k, size_t klen,
+ const char *v, size_t vlen);
+static void malloc_conf_init(void);
static bool malloc_init_hard(void);
-static void jemalloc_prefork(void);
-static void jemalloc_postfork(void);
/******************************************************************************/
/* malloc_message() setup. */
@@ -160,8 +91,14 @@
void
wrtmessage(void *cbopaque, const char *s)
{
-
- write(STDERR_FILENO, s, strlen(s));
+#ifdef JEMALLOC_CC_SILENCE
+ int result =
+#endif
+ write(STDERR_FILENO, s, strlen(s));
+#ifdef JEMALLOC_CC_SILENCE
+ if (result < 0)
+ result = errno;
+#endif
}
void (*JEMALLOC_P(malloc_message))(void *, const char *s)
@@ -179,8 +116,8 @@
arena_t *ret;
/* Allocate enough space for trailing bins. */
- ret = (arena_t *)base_alloc(sizeof(arena_t)
- + (sizeof(arena_bin_t) * (nbins - 1)));
+ ret = (arena_t *)base_alloc(offsetof(arena_t, bins)
+ + (sizeof(arena_bin_t) * nbins));
if (ret != NULL && arena_new(ret, ind) == false) {
arenas[ind] = ret;
return (ret);
@@ -200,7 +137,6 @@
return (arenas[0]);
}
-#ifndef NO_TLS
/*
* Choose an arena based on a per-thread value (slow-path code only, called
* only by choose_arena()).
@@ -219,11 +155,29 @@
} else
ret = arenas[0];
- arenas_map = ret;
+ ARENA_SET(ret);
return (ret);
}
+
+/*
+ * glibc provides a non-standard strerror_r() when _GNU_SOURCE is defined, so
+ * provide a wrapper.
+ */
+int
+buferror(int errnum, char *buf, size_t buflen)
+{
+#ifdef _GNU_SOURCE
+ char *b = strerror_r(errno, buf, buflen);
+ if (b != buf) {
+ strncpy(buf, b, buflen);
+ buf[buflen-1] = '\0';
+ }
+ return (0);
+#else
+ return (strerror_r(errno, buf, buflen));
#endif
+}
static void
stats_print_atexit(void)
@@ -283,6 +237,17 @@
return (ret);
}
+#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
+static void
+thread_allocated_cleanup(void *arg)
+{
+ uint64_t *allocated = (uint64_t *)arg;
+
+ if (allocated != NULL)
+ idalloc(allocated);
+}
+#endif
+
/*
* FreeBSD's pthreads implementation calls malloc(3), so the malloc
* implementation has to take pains to avoid infinite recursion during
@@ -299,12 +264,323 @@
}
static bool
-malloc_init_hard(void)
+malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
+ char const **v_p, size_t *vlen_p)
+{
+ bool accept;
+ const char *opts = *opts_p;
+
+ *k_p = opts;
+
+ for (accept = false; accept == false;) {
+ switch (*opts) {
+ case 'A': case 'B': case 'C': case 'D': case 'E':
+ case 'F': case 'G': case 'H': case 'I': case 'J':
+ case 'K': case 'L': case 'M': case 'N': case 'O':
+ case 'P': case 'Q': case 'R': case 'S': case 'T':
+ case 'U': case 'V': case 'W': case 'X': case 'Y':
+ case 'Z':
+ case 'a': case 'b': case 'c': case 'd': case 'e':
+ case 'f': case 'g': case 'h': case 'i': case 'j':
+ case 'k': case 'l': case 'm': case 'n': case 'o':
+ case 'p': case 'q': case 'r': case 's': case 't':
+ case 'u': case 'v': case 'w': case 'x': case 'y':
+ case 'z':
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ case '_':
+ opts++;
+ break;
+ case ':':
+ opts++;
+ *klen_p = (uintptr_t)opts - 1 - (uintptr_t)*k_p;
+ *v_p = opts;
+ accept = true;
+ break;
+ case '\0':
+ if (opts != *opts_p) {
+ malloc_write("<jemalloc>: Conf string "
+ "ends with key\n");
+ }
+ return (true);
+ default:
+ malloc_write("<jemalloc>: Malformed conf "
+ "string\n");
+ return (true);
+ }
+ }
+
+ for (accept = false; accept == false;) {
+ switch (*opts) {
+ case ',':
+ opts++;
+ /*
+ * Look ahead one character here, because the
+ * next time this function is called, it will
+ * assume that end of input has been cleanly
+ * reached if no input remains, but we have
+ * optimistically already consumed the comma if
+ * one exists.
+ */
+ if (*opts == '\0') {
+ malloc_write("<jemalloc>: Conf string "
+ "ends with comma\n");
+ }
+ *vlen_p = (uintptr_t)opts - 1 - (uintptr_t)*v_p;
+ accept = true;
+ break;
+ case '\0':
+ *vlen_p = (uintptr_t)opts - (uintptr_t)*v_p;
+ accept = true;
+ break;
+ default:
+ opts++;
+ break;
+ }
+ }
+
+ *opts_p = opts;
+ return (false);
+}
+
+static void
+malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v,
+ size_t vlen)
+{
+ char buf[PATH_MAX + 1];
+
+ malloc_write("<jemalloc>: ");
+ malloc_write(msg);
+ malloc_write(": ");
+ memcpy(buf, k, klen);
+ memcpy(&buf[klen], ":", 1);
+ memcpy(&buf[klen+1], v, vlen);
+ buf[klen+1+vlen] = '\0';
+ malloc_write(buf);
+ malloc_write("\n");
+}
+
+static void
+malloc_conf_init(void)
{
unsigned i;
- int linklen;
char buf[PATH_MAX + 1];
- const char *opts;
+ const char *opts, *k, *v;
+ size_t klen, vlen;
+
+ for (i = 0; i < 3; i++) {
+ /* Get runtime configuration. */
+ switch (i) {
+ case 0:
+ if (JEMALLOC_P(malloc_conf) != NULL) {
+ /*
+ * Use options that were compiled into the
+ * program.
+ */
+ opts = JEMALLOC_P(malloc_conf);
+ } else {
+ /* No configuration specified. */
+ buf[0] = '\0';
+ opts = buf;
+ }
+ break;
+ case 1: {
+ int linklen;
+ const char *linkname =
+#ifdef JEMALLOC_PREFIX
+ "/etc/"JEMALLOC_PREFIX"malloc.conf"
+#else
+ "/etc/malloc.conf"
+#endif
+ ;
+
+ if ((linklen = readlink(linkname, buf,
+ sizeof(buf) - 1)) != -1) {
+ /*
+ * Use the contents of the "/etc/malloc.conf"
+ * symbolic link's name.
+ */
+ buf[linklen] = '\0';
+ opts = buf;
+ } else {
+ /* No configuration specified. */
+ buf[0] = '\0';
+ opts = buf;
+ }
+ break;
+ }
+ case 2: {
+ const char *envname =
+#ifdef JEMALLOC_PREFIX
+ JEMALLOC_CPREFIX"MALLOC_CONF"
+#else
+ "MALLOC_CONF"
+#endif
+ ;
+
+ if ((opts = getenv(envname)) != NULL) {
+ /*
+ * Do nothing; opts is already initialized to
+ * the value of the JEMALLOC_OPTIONS
+ * environment variable.
+ */
+ } else {
+ /* No configuration specified. */
+ buf[0] = '\0';
+ opts = buf;
+ }
+ break;
+ }
+ default:
+ /* NOTREACHED */
+ assert(false);
+ buf[0] = '\0';
+ opts = buf;
+ }
+
+ while (*opts != '\0' && malloc_conf_next(&opts, &k, &klen, &v,
+ &vlen) == false) {
+#define CONF_HANDLE_BOOL(n) \
+ if (sizeof(#n)-1 == klen && strncmp(#n, k, \
+ klen) == 0) { \
+ if (strncmp("true", v, vlen) == 0 && \
+ vlen == sizeof("true")-1) \
+ opt_##n = true; \
+ else if (strncmp("false", v, vlen) == \
+ 0 && vlen == sizeof("false")-1) \
+ opt_##n = false; \
+ else { \
+ malloc_conf_error( \
+ "Invalid conf value", \
+ k, klen, v, vlen); \
+ } \
+ continue; \
+ }
+#define CONF_HANDLE_SIZE_T(n, min, max) \
+ if (sizeof(#n)-1 == klen && strncmp(#n, k, \
+ klen) == 0) { \
+ unsigned long ul; \
+ char *end; \
+ \
+ errno = 0; \
+ ul = strtoul(v, &end, 0); \
+ if (errno != 0 || (uintptr_t)end - \
+ (uintptr_t)v != vlen) { \
+ malloc_conf_error( \
+ "Invalid conf value", \
+ k, klen, v, vlen); \
+ } else if (ul < min || ul > max) { \
+ malloc_conf_error( \
+ "Out-of-range conf value", \
+ k, klen, v, vlen); \
+ } else \
+ opt_##n = ul; \
+ continue; \
+ }
+#define CONF_HANDLE_SSIZE_T(n, min, max) \
+ if (sizeof(#n)-1 == klen && strncmp(#n, k, \
+ klen) == 0) { \
+ long l; \
+ char *end; \
+ \
+ errno = 0; \
+ l = strtol(v, &end, 0); \
+ if (errno != 0 || (uintptr_t)end - \
+ (uintptr_t)v != vlen) { \
+ malloc_conf_error( \
+ "Invalid conf value", \
+ k, klen, v, vlen); \
+ } else if (l < (ssize_t)min || l > \
+ (ssize_t)max) { \
+ malloc_conf_error( \
+ "Out-of-range conf value", \
+ k, klen, v, vlen); \
+ } else \
+ opt_##n = l; \
+ continue; \
+ }
+#define CONF_HANDLE_CHAR_P(n, d) \
+ if (sizeof(#n)-1 == klen && strncmp(#n, k, \
+ klen) == 0) { \
+ size_t cpylen = (vlen <= \
+ sizeof(opt_##n)-1) ? vlen : \
+ sizeof(opt_##n)-1; \
+ strncpy(opt_##n, v, cpylen); \
+ opt_##n[cpylen] = '\0'; \
+ continue; \
+ }
+
+ CONF_HANDLE_BOOL(abort)
+ CONF_HANDLE_SIZE_T(lg_qspace_max, LG_QUANTUM,
+ PAGE_SHIFT-1)
+ CONF_HANDLE_SIZE_T(lg_cspace_max, LG_QUANTUM,
+ PAGE_SHIFT-1)
+ /*
+ * Chunks always require at least one * header page,
+ * plus one data page.
+ */
+ CONF_HANDLE_SIZE_T(lg_chunk, PAGE_SHIFT+1,
+ (sizeof(size_t) << 3) - 1)
+ CONF_HANDLE_SIZE_T(narenas, 1, SIZE_T_MAX)
+ CONF_HANDLE_SSIZE_T(lg_dirty_mult, -1,
+ (sizeof(size_t) << 3) - 1)
+ CONF_HANDLE_BOOL(stats_print)
+#ifdef JEMALLOC_FILL
+ CONF_HANDLE_BOOL(junk)
+ CONF_HANDLE_BOOL(zero)
+#endif
+#ifdef JEMALLOC_SYSV
+ CONF_HANDLE_BOOL(sysv)
+#endif
+#ifdef JEMALLOC_XMALLOC
+ CONF_HANDLE_BOOL(xmalloc)
+#endif
+#ifdef JEMALLOC_TCACHE
+ CONF_HANDLE_BOOL(tcache)
+ CONF_HANDLE_SSIZE_T(lg_tcache_gc_sweep, -1,
+ (sizeof(size_t) << 3) - 1)
+ CONF_HANDLE_SSIZE_T(lg_tcache_max, -1,
+ (sizeof(size_t) << 3) - 1)
+#endif
+#ifdef JEMALLOC_PROF
+ CONF_HANDLE_BOOL(prof)
+ CONF_HANDLE_CHAR_P(prof_prefix, "jeprof")
+ CONF_HANDLE_SIZE_T(lg_prof_bt_max, 0, LG_PROF_BT_MAX)
+ CONF_HANDLE_BOOL(prof_active)
+ CONF_HANDLE_SSIZE_T(lg_prof_sample, 0,
+ (sizeof(uint64_t) << 3) - 1)
+ CONF_HANDLE_BOOL(prof_accum)
+ CONF_HANDLE_SSIZE_T(lg_prof_tcmax, -1,
+ (sizeof(size_t) << 3) - 1)
+ CONF_HANDLE_SSIZE_T(lg_prof_interval, -1,
+ (sizeof(uint64_t) << 3) - 1)
+ CONF_HANDLE_BOOL(prof_gdump)
+ CONF_HANDLE_BOOL(prof_leak)
+#endif
+#ifdef JEMALLOC_SWAP
+ CONF_HANDLE_BOOL(overcommit)
+#endif
+ malloc_conf_error("Invalid conf pair", k, klen, v,
+ vlen);
+#undef CONF_HANDLE_BOOL
+#undef CONF_HANDLE_SIZE_T
+#undef CONF_HANDLE_SSIZE_T
+#undef CONF_HANDLE_CHAR_P
+ }
+
+ /* Validate configuration of options that are inter-related. */
+ if (opt_lg_qspace_max+1 >= opt_lg_cspace_max) {
+ malloc_write("<jemalloc>: Invalid lg_[qc]space_max "
+ "relationship; restoring defaults\n");
+ opt_lg_qspace_max = LG_QSPACE_MAX_DEFAULT;
+ opt_lg_cspace_max = LG_CSPACE_MAX_DEFAULT;
+ }
+ }
+}
+
+static bool
+malloc_init_hard(void)
+{
arena_t *init_arenas[1];
malloc_mutex_lock(&init_lock);
@@ -347,287 +623,11 @@
}
#endif
- for (i = 0; i < 3; i++) {
- unsigned j;
-
- /* Get runtime configuration. */
- switch (i) {
- case 0:
- if ((linklen = readlink("/etc/jemalloc.conf", buf,
- sizeof(buf) - 1)) != -1) {
- /*
- * Use the contents of the "/etc/jemalloc.conf"
- * symbolic link's name.
- */
- buf[linklen] = '\0';
- opts = buf;
- } else {
- /* No configuration specified. */
- buf[0] = '\0';
- opts = buf;
- }
- break;
- case 1:
- if ((opts = getenv("JEMALLOC_OPTIONS")) != NULL) {
- /*
- * Do nothing; opts is already initialized to
- * the value of the JEMALLOC_OPTIONS
- * environment variable.
- */
- } else {
- /* No configuration specified. */
- buf[0] = '\0';
- opts = buf;
- }
- break;
- case 2:
- if (JEMALLOC_P(malloc_options) != NULL) {
- /*
- * Use options that were compiled into the
- * program.
- */
- opts = JEMALLOC_P(malloc_options);
- } else {
- /* No configuration specified. */
- buf[0] = '\0';
- opts = buf;
- }
- break;
- default:
- /* NOTREACHED */
- assert(false);
- buf[0] = '\0';
- opts = buf;
- }
-
- for (j = 0; opts[j] != '\0'; j++) {
- unsigned k, nreps;
- bool nseen;
-
- /* Parse repetition count, if any. */
- for (nreps = 0, nseen = false;; j++, nseen = true) {
- switch (opts[j]) {
- case '0': case '1': case '2': case '3':
- case '4': case '5': case '6': case '7':
- case '8': case '9':
- nreps *= 10;
- nreps += opts[j] - '0';
- break;
- default:
- goto MALLOC_OUT;
- }
- }
-MALLOC_OUT:
- if (nseen == false)
- nreps = 1;
-
- for (k = 0; k < nreps; k++) {
- switch (opts[j]) {
- case 'a':
- opt_abort = false;
- break;
- case 'A':
- opt_abort = true;
- break;
#ifdef JEMALLOC_PROF
- case 'b':
- if (opt_lg_prof_bt_max > 0)
- opt_lg_prof_bt_max--;
- break;
- case 'B':
- if (opt_lg_prof_bt_max < LG_PROF_BT_MAX)
- opt_lg_prof_bt_max++;
- break;
+ prof_boot0();
#endif
- case 'c':
- if (opt_lg_cspace_max - 1 >
- opt_lg_qspace_max &&
- opt_lg_cspace_max >
- LG_CACHELINE)
- opt_lg_cspace_max--;
- break;
- case 'C':
- if (opt_lg_cspace_max < PAGE_SHIFT
- - 1)
- opt_lg_cspace_max++;
- break;
- case 'd':
- if (opt_lg_dirty_mult + 1 <
- (sizeof(size_t) << 3))
- opt_lg_dirty_mult++;
- break;
- case 'D':
- if (opt_lg_dirty_mult >= 0)
- opt_lg_dirty_mult--;
- break;
-#ifdef JEMALLOC_PROF
- case 'e':
- opt_prof_active = false;
- break;
- case 'E':
- opt_prof_active = true;
- break;
- case 'f':
- opt_prof = false;
- break;
- case 'F':
- opt_prof = true;
- break;
-#endif
-#ifdef JEMALLOC_TCACHE
- case 'g':
- if (opt_lg_tcache_gc_sweep >= 0)
- opt_lg_tcache_gc_sweep--;
- break;
- case 'G':
- if (opt_lg_tcache_gc_sweep + 1 <
- (sizeof(size_t) << 3))
- opt_lg_tcache_gc_sweep++;
- break;
- case 'h':
- opt_tcache = false;
- break;
- case 'H':
- opt_tcache = true;
- break;
-#endif
-#ifdef JEMALLOC_PROF
- case 'i':
- if (opt_lg_prof_interval >= 0)
- opt_lg_prof_interval--;
- break;
- case 'I':
- if (opt_lg_prof_interval + 1 <
- (sizeof(uint64_t) << 3))
- opt_lg_prof_interval++;
- break;
-#endif
-#ifdef JEMALLOC_FILL
- case 'j':
- opt_junk = false;
- break;
- case 'J':
- opt_junk = true;
- break;
-#endif
- case 'k':
- /*
- * Chunks always require at least one
- * header page, plus one data page.
- */
- if ((1U << (opt_lg_chunk - 1)) >=
- (2U << PAGE_SHIFT))
- opt_lg_chunk--;
- break;
- case 'K':
- if (opt_lg_chunk + 1 <
- (sizeof(size_t) << 3))
- opt_lg_chunk++;
- break;
-#ifdef JEMALLOC_PROF
- case 'l':
- opt_prof_leak = false;
- break;
- case 'L':
- opt_prof_leak = true;
- break;
-#endif
-#ifdef JEMALLOC_TCACHE
- case 'm':
- if (opt_lg_tcache_maxclass >= 0)
- opt_lg_tcache_maxclass--;
- break;
- case 'M':
- if (opt_lg_tcache_maxclass + 1 <
- (sizeof(size_t) << 3))
- opt_lg_tcache_maxclass++;
- break;
-#endif
- case 'n':
- opt_narenas_lshift--;
- break;
- case 'N':
- opt_narenas_lshift++;
- break;
-#ifdef JEMALLOC_SWAP
- case 'o':
- opt_overcommit = false;
- break;
- case 'O':
- opt_overcommit = true;
- break;
-#endif
- case 'p':
- opt_stats_print = false;
- break;
- case 'P':
- opt_stats_print = true;
- break;
- case 'q':
- if (opt_lg_qspace_max > LG_QUANTUM)
- opt_lg_qspace_max--;
- break;
- case 'Q':
- if (opt_lg_qspace_max + 1 <
- opt_lg_cspace_max)
- opt_lg_qspace_max++;
- break;
-#ifdef JEMALLOC_PROF
- case 's':
- if (opt_lg_prof_sample > 0)
- opt_lg_prof_sample--;
- break;
- case 'S':
- if (opt_lg_prof_sample + 1 <
- (sizeof(uint64_t) << 3))
- opt_lg_prof_sample++;
- break;
- case 'u':
- opt_prof_udump = false;
- break;
- case 'U':
- opt_prof_udump = true;
- break;
-#endif
-#ifdef JEMALLOC_SYSV
- case 'v':
- opt_sysv = false;
- break;
- case 'V':
- opt_sysv = true;
- break;
-#endif
-#ifdef JEMALLOC_XMALLOC
- case 'x':
- opt_xmalloc = false;
- break;
- case 'X':
- opt_xmalloc = true;
- break;
-#endif
-#ifdef JEMALLOC_FILL
- case 'z':
- opt_zero = false;
- break;
- case 'Z':
- opt_zero = true;
- break;
-#endif
- default: {
- char cbuf[2];
- cbuf[0] = opts[j];
- cbuf[1] = '\0';
- malloc_write(
- "<jemalloc>: Unsupported character "
- "in malloc options: '");
- malloc_write(cbuf);
- malloc_write("'\n");
- }
- }
- }
- }
- }
+ malloc_conf_init();
/* Register fork handlers. */
if (pthread_atfork(jemalloc_prefork, jemalloc_postfork,
@@ -662,7 +662,7 @@
}
#ifdef JEMALLOC_PROF
- prof_boot0();
+ prof_boot1();
#endif
if (arena_boot()) {
@@ -679,6 +679,15 @@
return (true);
}
+#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
+ /* Initialize allocation counters before any allocations can occur. */
+ if (pthread_key_create(&thread_allocated_tsd, thread_allocated_cleanup)
+ != 0) {
+ malloc_mutex_unlock(&init_lock);
+ return (true);
+ }
+#endif
+
/*
* Create enough scaffolding to allow recursive allocation in
* malloc_ncpus().
@@ -697,19 +706,17 @@
return (true);
}
-#ifndef NO_TLS
/*
* Assign the initial arena to the initial thread, in order to avoid
* spurious creation of an extra arena if the application switches to
* threaded mode.
*/
- arenas_map = arenas[0];
-#endif
+ ARENA_SET(arenas[0]);
malloc_mutex_init(&arenas_lock);
#ifdef JEMALLOC_PROF
- if (prof_boot1()) {
+ if (prof_boot2()) {
malloc_mutex_unlock(&init_lock);
return (true);
}
@@ -721,64 +728,40 @@
ncpus = malloc_ncpus();
malloc_mutex_lock(&init_lock);
- if (ncpus > 1) {
+ if (opt_narenas == 0) {
/*
* For SMP systems, create more than one arena per CPU by
* default.
*/
- opt_narenas_lshift += 2;
+ if (ncpus > 1)
+ opt_narenas = ncpus << 2;
+ else
+ opt_narenas = 1;
+ }
+ narenas = opt_narenas;
+ /*
+ * Make sure that the arenas array can be allocated. In practice, this
+ * limit is enough to allow the allocator to function, but the ctl
+ * machinery will fail to allocate memory at far lower limits.
+ */
+ if (narenas > chunksize / sizeof(arena_t *)) {
+ char buf[UMAX2S_BUFSIZE];
+
+ narenas = chunksize / sizeof(arena_t *);
+ malloc_write("<jemalloc>: Reducing narenas to limit (");
+ malloc_write(u2s(narenas, 10, buf));
+ malloc_write(")\n");
}
- /* Determine how many arenas to use. */
- narenas = ncpus;
- if (opt_narenas_lshift > 0) {
- if ((narenas << opt_narenas_lshift) > narenas)
- narenas <<= opt_narenas_lshift;
- /*
- * Make sure not to exceed the limits of what base_alloc() can
- * handle.
- */
- if (narenas * sizeof(arena_t *) > chunksize)
- narenas = chunksize / sizeof(arena_t *);
- } else if (opt_narenas_lshift < 0) {
- if ((narenas >> -opt_narenas_lshift) < narenas)
- narenas >>= -opt_narenas_lshift;
- /* Make sure there is at least one arena. */
- if (narenas == 0)
- narenas = 1;
- }
+ next_arena = (narenas > 0) ? 1 : 0;
#ifdef NO_TLS
- if (narenas > 1) {
- static const unsigned primes[] = {1, 3, 5, 7, 11, 13, 17, 19,
- 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
- 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149,
- 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211,
- 223, 227, 229, 233, 239, 241, 251, 257, 263};
- unsigned nprimes, parenas;
-
- /*
- * Pick a prime number of hash arenas that is more than narenas
- * so that direct hashing of pthread_self() pointers tends to
- * spread allocations evenly among the arenas.
- */
- assert((narenas & 1) == 0); /* narenas must be even. */
- nprimes = (sizeof(primes) >> LG_SIZEOF_INT);
- parenas = primes[nprimes - 1]; /* In case not enough primes. */
- for (i = 1; i < nprimes; i++) {
- if (primes[i] > narenas) {
- parenas = primes[i];
- break;
- }
- }
- narenas = parenas;
+ if (pthread_key_create(&arenas_tsd, NULL) != 0) {
+ malloc_mutex_unlock(&init_lock);
+ return (true);
}
#endif
-#ifndef NO_TLS
- next_arena = (narenas > 0) ? 1 : 0;
-#endif
-
/* Allocate and initialize arenas. */
arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
if (arenas == NULL) {
@@ -793,11 +776,35 @@
/* Copy the pointer to the one arena that was already initialized. */
arenas[0] = init_arenas[0];
+#ifdef JEMALLOC_ZONE
+ /* Register the custom zone. */
+ malloc_zone_register(create_zone());
+
+ /*
+ * Convert the default szone to an "overlay zone" that is capable of
+ * deallocating szone-allocated objects, but allocating new objects
+ * from jemalloc.
+ */
+ szone2ozone(malloc_default_zone());
+#endif
+
malloc_initialized = true;
malloc_mutex_unlock(&init_lock);
return (false);
}
+
+#ifdef JEMALLOC_ZONE
+JEMALLOC_ATTR(constructor)
+void
+jemalloc_darwin_init(void)
+{
+
+ if (malloc_init_hard())
+ abort();
+}
+#endif
+
/*
* End initialization functions.
*/
@@ -812,8 +819,19 @@
JEMALLOC_P(malloc)(size_t size)
{
void *ret;
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
+ size_t usize
+# ifdef JEMALLOC_CC_SILENCE
+ = 0
+# endif
+ ;
+#endif
#ifdef JEMALLOC_PROF
- prof_thr_cnt_t *cnt;
+ prof_thr_cnt_t *cnt
+# ifdef JEMALLOC_CC_SILENCE
+ = NULL
+# endif
+ ;
#endif
if (malloc_init()) {
@@ -843,20 +861,26 @@
#ifdef JEMALLOC_PROF
if (opt_prof) {
- if ((cnt = prof_alloc_prep(size)) == NULL) {
+ usize = s2u(size);
+ if ((cnt = prof_alloc_prep(usize)) == NULL) {
ret = NULL;
goto OOM;
}
- if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && size <=
+ if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <=
small_maxclass) {
ret = imalloc(small_maxclass+1);
if (ret != NULL)
- arena_prof_promoted(ret, size);
+ arena_prof_promoted(ret, usize);
} else
ret = imalloc(size);
} else
#endif
+ {
+#ifdef JEMALLOC_STATS
+ usize = s2u(size);
+#endif
ret = imalloc(size);
+ }
OOM:
if (ret == NULL) {
@@ -875,7 +899,13 @@
#endif
#ifdef JEMALLOC_PROF
if (opt_prof && ret != NULL)
- prof_malloc(ret, cnt);
+ prof_malloc(ret, usize, cnt);
+#endif
+#ifdef JEMALLOC_STATS
+ if (ret != NULL) {
+ assert(usize == isalloc(ret));
+ ALLOCATED_ADD(usize, 0);
+ }
#endif
return (ret);
}
@@ -887,8 +917,19 @@
{
int ret;
void *result;
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
+ size_t usize
+# ifdef JEMALLOC_CC_SILENCE
+ = 0
+# endif
+ ;
+#endif
#ifdef JEMALLOC_PROF
- prof_thr_cnt_t *cnt;
+ prof_thr_cnt_t *cnt
+# ifdef JEMALLOC_CC_SILENCE
+ = NULL
+# endif
+ ;
#endif
if (malloc_init())
@@ -934,24 +975,32 @@
#ifdef JEMALLOC_PROF
if (opt_prof) {
- if ((cnt = prof_alloc_prep(size)) == NULL) {
+ usize = sa2u(size, alignment, NULL);
+ if ((cnt = prof_alloc_prep(usize)) == NULL) {
result = NULL;
ret = EINVAL;
} else {
if (prof_promote && (uintptr_t)cnt !=
- (uintptr_t)1U && size <= small_maxclass) {
- result = ipalloc(alignment,
- small_maxclass+1);
+ (uintptr_t)1U && usize <= small_maxclass) {
+ result = ipalloc(small_maxclass+1,
+ alignment, false);
if (result != NULL) {
arena_prof_promoted(result,
- size);
+ usize);
}
- } else
- result = ipalloc(alignment, size);
+ } else {
+ result = ipalloc(size, alignment,
+ false);
+ }
}
} else
#endif
- result = ipalloc(alignment, size);
+ {
+#ifdef JEMALLOC_STATS
+ usize = sa2u(size, alignment, NULL);
+#endif
+ result = ipalloc(size, alignment, false);
+ }
}
if (result == NULL) {
@@ -970,9 +1019,15 @@
ret = 0;
RETURN:
+#ifdef JEMALLOC_STATS
+ if (result != NULL) {
+ assert(usize == isalloc(result));
+ ALLOCATED_ADD(usize, 0);
+ }
+#endif
#ifdef JEMALLOC_PROF
if (opt_prof && result != NULL)
- prof_malloc(result, cnt);
+ prof_malloc(result, usize, cnt);
#endif
return (ret);
}
@@ -984,8 +1039,19 @@
{
void *ret;
size_t num_size;
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
+ size_t usize
+# ifdef JEMALLOC_CC_SILENCE
+ = 0
+# endif
+ ;
+#endif
#ifdef JEMALLOC_PROF
- prof_thr_cnt_t *cnt;
+ prof_thr_cnt_t *cnt
+# ifdef JEMALLOC_CC_SILENCE
+ = NULL
+# endif
+ ;
#endif
if (malloc_init()) {
@@ -1020,20 +1086,26 @@
#ifdef JEMALLOC_PROF
if (opt_prof) {
- if ((cnt = prof_alloc_prep(num_size)) == NULL) {
+ usize = s2u(num_size);
+ if ((cnt = prof_alloc_prep(usize)) == NULL) {
ret = NULL;
goto RETURN;
}
- if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && num_size
+ if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize
<= small_maxclass) {
ret = icalloc(small_maxclass+1);
if (ret != NULL)
- arena_prof_promoted(ret, num_size);
+ arena_prof_promoted(ret, usize);
} else
ret = icalloc(num_size);
} else
#endif
+ {
+#ifdef JEMALLOC_STATS
+ usize = s2u(num_size);
+#endif
ret = icalloc(num_size);
+ }
RETURN:
if (ret == NULL) {
@@ -1049,7 +1121,13 @@
#ifdef JEMALLOC_PROF
if (opt_prof && ret != NULL)
- prof_malloc(ret, cnt);
+ prof_malloc(ret, usize, cnt);
+#endif
+#ifdef JEMALLOC_STATS
+ if (ret != NULL) {
+ assert(usize == isalloc(ret));
+ ALLOCATED_ADD(usize, 0);
+ }
#endif
return (ret);
}
@@ -1059,10 +1137,25 @@
JEMALLOC_P(realloc)(void *ptr, size_t size)
{
void *ret;
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
+ size_t usize
+# ifdef JEMALLOC_CC_SILENCE
+ = 0
+# endif
+ ;
+ size_t old_size = 0;
+#endif
#ifdef JEMALLOC_PROF
- size_t old_size;
- prof_thr_cnt_t *cnt;
- prof_ctx_t *old_ctx;
+ prof_thr_cnt_t *cnt
+# ifdef JEMALLOC_CC_SILENCE
+ = NULL
+# endif
+ ;
+ prof_ctx_t *old_ctx
+# ifdef JEMALLOC_CC_SILENCE
+ = NULL
+# endif
+ ;
#endif
if (size == 0) {
@@ -1073,9 +1166,11 @@
#ifdef JEMALLOC_SYSV
else {
if (ptr != NULL) {
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
+ old_size = isalloc(ptr);
+#endif
#ifdef JEMALLOC_PROF
if (opt_prof) {
- old_size = isalloc(ptr);
old_ctx = prof_ctx_get(ptr);
cnt = NULL;
}
@@ -1084,7 +1179,6 @@
}
#ifdef JEMALLOC_PROF
else if (opt_prof) {
- old_size = 0;
old_ctx = NULL;
cnt = NULL;
}
@@ -1099,24 +1193,33 @@
assert(malloc_initialized || malloc_initializer ==
pthread_self());
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
+ old_size = isalloc(ptr);
+#endif
#ifdef JEMALLOC_PROF
if (opt_prof) {
- old_size = isalloc(ptr);
+ usize = s2u(size);
old_ctx = prof_ctx_get(ptr);
- if ((cnt = prof_alloc_prep(size)) == NULL) {
+ if ((cnt = prof_alloc_prep(usize)) == NULL) {
ret = NULL;
goto OOM;
}
if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U &&
- size <= small_maxclass) {
- ret = iralloc(ptr, small_maxclass+1);
+ usize <= small_maxclass) {
+ ret = iralloc(ptr, small_maxclass+1, 0, 0,
+ false, false);
if (ret != NULL)
- arena_prof_promoted(ret, size);
+ arena_prof_promoted(ret, usize);
} else
- ret = iralloc(ptr, size);
+ ret = iralloc(ptr, size, 0, 0, false, false);
} else
#endif
- ret = iralloc(ptr, size);
+ {
+#ifdef JEMALLOC_STATS
+ usize = s2u(size);
+#endif
+ ret = iralloc(ptr, size, 0, 0, false, false);
+ }
#ifdef JEMALLOC_PROF
OOM:
@@ -1133,10 +1236,8 @@
}
} else {
#ifdef JEMALLOC_PROF
- if (opt_prof) {
- old_size = 0;
+ if (opt_prof)
old_ctx = NULL;
- }
#endif
if (malloc_init()) {
#ifdef JEMALLOC_PROF
@@ -1147,23 +1248,29 @@
} else {
#ifdef JEMALLOC_PROF
if (opt_prof) {
- if ((cnt = prof_alloc_prep(size)) == NULL)
+ usize = s2u(size);
+ if ((cnt = prof_alloc_prep(usize)) == NULL)
ret = NULL;
else {
if (prof_promote && (uintptr_t)cnt !=
- (uintptr_t)1U && size <=
+ (uintptr_t)1U && usize <=
small_maxclass) {
ret = imalloc(small_maxclass+1);
if (ret != NULL) {
arena_prof_promoted(ret,
- size);
+ usize);
}
} else
ret = imalloc(size);
}
} else
#endif
+ {
+#ifdef JEMALLOC_STATS
+ usize = s2u(size);
+#endif
ret = imalloc(size);
+ }
}
if (ret == NULL) {
@@ -1183,7 +1290,13 @@
#endif
#ifdef JEMALLOC_PROF
if (opt_prof)
- prof_realloc(ret, cnt, ptr, old_size, old_ctx);
+ prof_realloc(ret, usize, cnt, old_size, old_ctx);
+#endif
+#ifdef JEMALLOC_STATS
+ if (ret != NULL) {
+ assert(usize == isalloc(ret));
+ ALLOCATED_ADD(usize, old_size);
+ }
#endif
return (ret);
}
@@ -1194,12 +1307,26 @@
{
if (ptr != NULL) {
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
+ size_t usize;
+#endif
+
assert(malloc_initialized || malloc_initializer ==
pthread_self());
+#ifdef JEMALLOC_STATS
+ usize = isalloc(ptr);
+#endif
#ifdef JEMALLOC_PROF
- if (opt_prof)
- prof_free(ptr);
+ if (opt_prof) {
+# ifndef JEMALLOC_STATS
+ usize = isalloc(ptr);
+# endif
+ prof_free(ptr, usize);
+ }
+#endif
+#ifdef JEMALLOC_STATS
+ ALLOCATED_ADD(0, usize);
#endif
idalloc(ptr);
}
@@ -1210,6 +1337,57 @@
*/
/******************************************************************************/
/*
+ * Begin non-standard override functions.
+ *
+ * These overrides are omitted if the JEMALLOC_PREFIX is defined, since the
+ * entire point is to avoid accidental mixed allocator usage.
+ */
+#ifndef JEMALLOC_PREFIX
+
+#ifdef JEMALLOC_OVERRIDE_MEMALIGN
+JEMALLOC_ATTR(malloc)
+JEMALLOC_ATTR(visibility("default"))
+void *
+JEMALLOC_P(memalign)(size_t alignment, size_t size)
+{
+ void *ret;
+#ifdef JEMALLOC_CC_SILENCE
+ int result =
+#endif
+ JEMALLOC_P(posix_memalign)(&ret, alignment, size);
+#ifdef JEMALLOC_CC_SILENCE
+ if (result != 0)
+ return (NULL);
+#endif
+ return (ret);
+}
+#endif
+
+#ifdef JEMALLOC_OVERRIDE_VALLOC
+JEMALLOC_ATTR(malloc)
+JEMALLOC_ATTR(visibility("default"))
+void *
+JEMALLOC_P(valloc)(size_t size)
+{
+ void *ret;
+#ifdef JEMALLOC_CC_SILENCE
+ int result =
+#endif
+ JEMALLOC_P(posix_memalign)(&ret, PAGE_SIZE, size);
+#ifdef JEMALLOC_CC_SILENCE
+ if (result != 0)
+ return (NULL);
+#endif
+ return (ret);
+}
+#endif
+
+#endif /* JEMALLOC_PREFIX */
+/*
+ * End non-standard override functions.
+ */
+/******************************************************************************/
+/*
* Begin non-standard functions.
*/
@@ -1219,29 +1397,18 @@
{
size_t ret;
+ assert(malloc_initialized || malloc_initializer == pthread_self());
+
+#ifdef JEMALLOC_IVSALLOC
+ ret = ivsalloc(ptr);
+#else
assert(ptr != NULL);
ret = isalloc(ptr);
+#endif
return (ret);
}
-#ifdef JEMALLOC_SWAP
-JEMALLOC_ATTR(visibility("default"))
-int
-JEMALLOC_P(malloc_swap_enable)(const int *fds, unsigned nfds, int prezeroed)
-{
-
- /*
- * Make sure malloc is initialized, because we need page size, chunk
- * size, etc.
- */
- if (malloc_init())
- return (-1);
-
- return (chunk_swap_enable(fds, nfds, (prezeroed != 0)) ? -1 : 0);
-}
-#endif
-
JEMALLOC_ATTR(visibility("default"))
void
JEMALLOC_P(malloc_stats_print)(void (*write_cb)(void *, const char *),
@@ -1286,6 +1453,247 @@
return (ctl_bymib(mib, miblen, oldp, oldlenp, newp, newlen));
}
+JEMALLOC_INLINE void *
+iallocm(size_t size, size_t alignment, bool zero)
+{
+
+ if (alignment != 0)
+ return (ipalloc(size, alignment, zero));
+ else if (zero)
+ return (icalloc(size));
+ else
+ return (imalloc(size));
+}
+
+JEMALLOC_ATTR(nonnull(1))
+JEMALLOC_ATTR(visibility("default"))
+int
+JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags)
+{
+ void *p;
+ size_t usize;
+ size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK)
+ & (SIZE_T_MAX-1));
+ bool zero = flags & ALLOCM_ZERO;
+#ifdef JEMALLOC_PROF
+ prof_thr_cnt_t *cnt;
+#endif
+
+ assert(ptr != NULL);
+ assert(size != 0);
+
+ if (malloc_init())
+ goto OOM;
+
+#ifdef JEMALLOC_PROF
+ if (opt_prof) {
+ usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment,
+ NULL);
+ if ((cnt = prof_alloc_prep(usize)) == NULL)
+ goto OOM;
+ if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <=
+ small_maxclass) {
+ p = iallocm(small_maxclass+1, alignment, zero);
+ if (p == NULL)
+ goto OOM;
+ arena_prof_promoted(p, usize);
+ } else {
+ p = iallocm(size, alignment, zero);
+ if (p == NULL)
+ goto OOM;
+ }
+
+ if (rsize != NULL)
+ *rsize = usize;
+ } else
+#endif
+ {
+ p = iallocm(size, alignment, zero);
+ if (p == NULL)
+ goto OOM;
+#ifndef JEMALLOC_STATS
+ if (rsize != NULL)
+#endif
+ {
+ usize = (alignment == 0) ? s2u(size) : sa2u(size,
+ alignment, NULL);
+#ifdef JEMALLOC_STATS
+ if (rsize != NULL)
+#endif
+ *rsize = usize;
+ }
+ }
+
+ *ptr = p;
+#ifdef JEMALLOC_STATS
+ assert(usize == isalloc(p));
+ ALLOCATED_ADD(usize, 0);
+#endif
+ return (ALLOCM_SUCCESS);
+OOM:
+#ifdef JEMALLOC_XMALLOC
+ if (opt_xmalloc) {
+ malloc_write("<jemalloc>: Error in allocm(): "
+ "out of memory\n");
+ abort();
+ }
+#endif
+ *ptr = NULL;
+ return (ALLOCM_ERR_OOM);
+}
+
+JEMALLOC_ATTR(nonnull(1))
+JEMALLOC_ATTR(visibility("default"))
+int
+JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra,
+ int flags)
+{
+ void *p, *q;
+ size_t usize;
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
+ size_t old_size;
+#endif
+ size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK)
+ & (SIZE_T_MAX-1));
+ bool zero = flags & ALLOCM_ZERO;
+ bool no_move = flags & ALLOCM_NO_MOVE;
+#ifdef JEMALLOC_PROF
+ prof_thr_cnt_t *cnt;
+ prof_ctx_t *old_ctx;
+#endif
+
+ assert(ptr != NULL);
+ assert(*ptr != NULL);
+ assert(size != 0);
+ assert(SIZE_T_MAX - size >= extra);
+ assert(malloc_initialized || malloc_initializer == pthread_self());
+
+ p = *ptr;
+#ifdef JEMALLOC_PROF
+ if (opt_prof) {
+ /*
+ * usize isn't knowable before iralloc() returns when extra is
+ * non-zero. Therefore, compute its maximum possible value and
+ * use that in prof_alloc_prep() to decide whether to capture a
+ * backtrace. prof_realloc() will use the actual usize to
+ * decide whether to sample.
+ */
+ size_t max_usize = (alignment == 0) ? s2u(size+extra) :
+ sa2u(size+extra, alignment, NULL);
+ old_size = isalloc(p);
+ old_ctx = prof_ctx_get(p);
+ if ((cnt = prof_alloc_prep(max_usize)) == NULL)
+ goto OOM;
+ if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && max_usize
+ <= small_maxclass) {
+ q = iralloc(p, small_maxclass+1, (small_maxclass+1 >=
+ size+extra) ? 0 : size+extra - (small_maxclass+1),
+ alignment, zero, no_move);
+ if (q == NULL)
+ goto ERR;
+ usize = isalloc(q);
+ arena_prof_promoted(q, usize);
+ } else {
+ q = iralloc(p, size, extra, alignment, zero, no_move);
+ if (q == NULL)
+ goto ERR;
+ usize = isalloc(q);
+ }
+ prof_realloc(q, usize, cnt, old_size, old_ctx);
+ } else
+#endif
+ {
+#ifdef JEMALLOC_STATS
+ old_size = isalloc(p);
+#endif
+ q = iralloc(p, size, extra, alignment, zero, no_move);
+ if (q == NULL)
+ goto ERR;
+#ifndef JEMALLOC_STATS
+ if (rsize != NULL)
+#endif
+ {
+ usize = isalloc(q);
+#ifdef JEMALLOC_STATS
+ if (rsize != NULL)
+#endif
+ *rsize = usize;
+ }
+ }
+
+ *ptr = q;
+#ifdef JEMALLOC_STATS
+ ALLOCATED_ADD(usize, old_size);
+#endif
+ return (ALLOCM_SUCCESS);
+ERR:
+ if (no_move)
+ return (ALLOCM_ERR_NOT_MOVED);
+#ifdef JEMALLOC_PROF
+OOM:
+#endif
+#ifdef JEMALLOC_XMALLOC
+ if (opt_xmalloc) {
+ malloc_write("<jemalloc>: Error in rallocm(): "
+ "out of memory\n");
+ abort();
+ }
+#endif
+ return (ALLOCM_ERR_OOM);
+}
+
+JEMALLOC_ATTR(nonnull(1))
+JEMALLOC_ATTR(visibility("default"))
+int
+JEMALLOC_P(sallocm)(const void *ptr, size_t *rsize, int flags)
+{
+ size_t sz;
+
+ assert(malloc_initialized || malloc_initializer == pthread_self());
+
+#ifdef JEMALLOC_IVSALLOC
+ sz = ivsalloc(ptr);
+#else
+ assert(ptr != NULL);
+ sz = isalloc(ptr);
+#endif
+ assert(rsize != NULL);
+ *rsize = sz;
+
+ return (ALLOCM_SUCCESS);
+}
+
+JEMALLOC_ATTR(nonnull(1))
+JEMALLOC_ATTR(visibility("default"))
+int
+JEMALLOC_P(dallocm)(void *ptr, int flags)
+{
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
+ size_t usize;
+#endif
+
+ assert(ptr != NULL);
+ assert(malloc_initialized || malloc_initializer == pthread_self());
+
+#ifdef JEMALLOC_STATS
+ usize = isalloc(ptr);
+#endif
+#ifdef JEMALLOC_PROF
+ if (opt_prof) {
+# ifndef JEMALLOC_STATS
+ usize = isalloc(ptr);
+# endif
+ prof_free(ptr, usize);
+ }
+#endif
+#ifdef JEMALLOC_STATS
+ ALLOCATED_ADD(0, usize);
+#endif
+ idalloc(ptr);
+
+ return (ALLOCM_SUCCESS);
+}
+
/*
* End non-standard functions.
*/
@@ -1293,12 +1701,10 @@
/*
* The following functions are used by threading libraries for protection of
- * malloc during fork(). These functions are only called if the program is
- * running in threaded mode, so there is no need to check whether the program
- * is threaded here.
+ * malloc during fork().
*/
-static void
+void
jemalloc_prefork(void)
{
unsigned i;
@@ -1324,7 +1730,7 @@
#endif
}
-static void
+void
jemalloc_postfork(void)
{
unsigned i;
@@ -1349,3 +1755,5 @@
}
malloc_mutex_unlock(&arenas_lock);
}
+
+/******************************************************************************/
diff --git a/jemalloc/src/mutex.c b/jemalloc/src/mutex.c
index 3b6081a..3ecb18a 100644
--- a/jemalloc/src/mutex.c
+++ b/jemalloc/src/mutex.c
@@ -59,7 +59,11 @@
if (pthread_mutexattr_init(&attr) != 0)
return (true);
+#ifdef PTHREAD_MUTEX_ADAPTIVE_NP
pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP);
+#else
+ pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_DEFAULT);
+#endif
if (pthread_mutex_init(mutex, &attr) != 0) {
pthread_mutexattr_destroy(&attr);
return (true);
@@ -68,3 +72,13 @@
return (false);
}
+
+void
+malloc_mutex_destroy(malloc_mutex_t *mutex)
+{
+
+ if (pthread_mutex_destroy(mutex) != 0) {
+ malloc_write("<jemalloc>: Error in pthread_mutex_destroy()\n");
+ abort();
+ }
+}
diff --git a/jemalloc/src/prof.c b/jemalloc/src/prof.c
index 6d6910e..84ce1ba 100644
--- a/jemalloc/src/prof.c
+++ b/jemalloc/src/prof.c
@@ -12,8 +12,6 @@
#include <libunwind.h>
#endif
-#include <math.h>
-
/******************************************************************************/
/* Data. */
@@ -22,48 +20,30 @@
size_t opt_lg_prof_bt_max = LG_PROF_BT_MAX_DEFAULT;
size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
-bool opt_prof_udump = false;
+bool opt_prof_gdump = false;
bool opt_prof_leak = false;
+bool opt_prof_accum = true;
+ssize_t opt_lg_prof_tcmax = LG_PROF_TCMAX_DEFAULT;
+char opt_prof_prefix[PATH_MAX + 1];
uint64_t prof_interval;
bool prof_promote;
+unsigned prof_bt_max;
+
+#ifndef NO_TLS
+__thread prof_tdata_t *prof_tdata_tls
+ JEMALLOC_ATTR(tls_model("initial-exec"));
+#endif
+pthread_key_t prof_tdata_tsd;
+
/*
* Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data
- * structure that knows about all backtraces ever captured.
+ * structure that knows about all backtraces currently captured.
*/
static ckh_t bt2ctx;
static malloc_mutex_t bt2ctx_mtx;
-/*
- * Thread-specific hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread
- * keeps a cache of backtraces, with associated thread-specific prof_thr_cnt_t
- * objects. Other threads may read the prof_thr_cnt_t contents, but no others
- * will ever write them.
- *
- * Upon thread exit, the thread must merge all the prof_thr_cnt_t counter data
- * into the associated prof_ctx_t objects, and unlink/free the prof_thr_cnt_t
- * objects.
- */
-static __thread ckh_t *bt2cnt_tls JEMALLOC_ATTR(tls_model("initial-exec"));
-
-/*
- * Same contents as b2cnt_tls, but initialized such that the TSD destructor is
- * called when a thread exits, so that bt2cnt_tls contents can be merged,
- * unlinked, and deallocated.
- */
-static pthread_key_t bt2cnt_tsd;
-
-/* (1U << opt_lg_prof_bt_max). */
-static unsigned prof_bt_max;
-
-static __thread uint64_t prof_sample_prn_state
- JEMALLOC_ATTR(tls_model("initial-exec"));
-static __thread uint64_t prof_sample_threshold
- JEMALLOC_ATTR(tls_model("initial-exec"));
-static __thread uint64_t prof_sample_accum
- JEMALLOC_ATTR(tls_model("initial-exec"));
-
static malloc_mutex_t prof_dump_seq_mtx;
static uint64_t prof_dump_seq;
static uint64_t prof_dump_iseq;
@@ -85,26 +65,25 @@
static malloc_mutex_t enq_mtx;
static bool enq;
static bool enq_idump;
-static bool enq_udump;
+static bool enq_gdump;
/******************************************************************************/
/* Function prototypes for non-inline static functions. */
static prof_bt_t *bt_dup(prof_bt_t *bt);
-static void bt_init(prof_bt_t *bt, void **vec);
+static void bt_destroy(prof_bt_t *bt);
#ifdef JEMALLOC_PROF_LIBGCC
static _Unwind_Reason_Code prof_unwind_init_callback(
struct _Unwind_Context *context, void *arg);
static _Unwind_Reason_Code prof_unwind_callback(
struct _Unwind_Context *context, void *arg);
#endif
-static void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max);
-static prof_thr_cnt_t *prof_lookup(prof_bt_t *bt);
-static void prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
static bool prof_flush(bool propagate_err);
static bool prof_write(const char *s, bool propagate_err);
-static void prof_ctx_merge(prof_ctx_t *ctx, prof_cnt_t *cnt_all,
+static void prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all,
size_t *leak_nctx);
+static void prof_ctx_destroy(prof_ctx_t *ctx);
+static void prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt);
static bool prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt,
bool propagate_err);
static bool prof_dump_maps(bool propagate_err);
@@ -115,11 +94,11 @@
static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1,
size_t *hash2);
static bool prof_bt_keycomp(const void *k1, const void *k2);
-static void bt2cnt_thread_cleanup(void *arg);
+static void prof_tdata_cleanup(void *arg);
/******************************************************************************/
-static void
+void
bt_init(prof_bt_t *bt, void **vec)
{
@@ -127,6 +106,13 @@
bt->len = 0;
}
+static void
+bt_destroy(prof_bt_t *bt)
+{
+
+ idalloc(bt);
+}
+
static prof_bt_t *
bt_dup(prof_bt_t *bt)
{
@@ -165,7 +151,7 @@
static inline void
prof_leave(void)
{
- bool idump, udump;
+ bool idump, gdump;
malloc_mutex_unlock(&bt2ctx_mtx);
@@ -173,14 +159,14 @@
enq = false;
idump = enq_idump;
enq_idump = false;
- udump = enq_udump;
- enq_udump = false;
+ gdump = enq_gdump;
+ enq_gdump = false;
malloc_mutex_unlock(&enq_mtx);
if (idump)
prof_idump();
- if (udump)
- prof_udump();
+ if (gdump)
+ prof_gdump();
}
#ifdef JEMALLOC_PROF_LIBGCC
@@ -208,7 +194,7 @@
return (_URC_NO_REASON);
}
-static void
+void
prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
{
prof_unwind_data_t data = {bt, nignore, max};
@@ -216,7 +202,7 @@
_Unwind_Backtrace(prof_unwind_callback, &data);
}
#elif defined(JEMALLOC_PROF_LIBUNWIND)
-static void
+void
prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
{
unw_context_t uc;
@@ -251,41 +237,29 @@
}
}
#else
-static void
+void
prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
{
-#define NIGNORE 3
#define BT_FRAME(i) \
- if ((i) < NIGNORE + max) { \
+ if ((i) < nignore + max) { \
void *p; \
if (__builtin_frame_address(i) == 0) \
return; \
p = __builtin_return_address(i); \
if (p == NULL) \
return; \
- if (i >= NIGNORE) { \
- bt->vec[(i) - NIGNORE] = p; \
- bt->len = (i) - NIGNORE + 1; \
+ if (i >= nignore) { \
+ bt->vec[(i) - nignore] = p; \
+ bt->len = (i) - nignore + 1; \
} \
} else \
return;
assert(max <= (1U << opt_lg_prof_bt_max));
- /*
- * Ignore the first three frames, since they are:
- *
- * 0: prof_backtrace()
- * 1: prof_alloc_prep()
- * 2: malloc(), calloc(), etc.
- */
-#if 1
- assert(nignore + 1 == NIGNORE);
-#else
BT_FRAME(0)
BT_FRAME(1)
BT_FRAME(2)
-#endif
BT_FRAME(3)
BT_FRAME(4)
BT_FRAME(5)
@@ -432,345 +406,119 @@
}
#endif
-static prof_thr_cnt_t *
+prof_thr_cnt_t *
prof_lookup(prof_bt_t *bt)
{
- prof_thr_cnt_t *ret;
- ckh_t *bt2cnt = bt2cnt_tls;
+ union {
+ prof_thr_cnt_t *p;
+ void *v;
+ } ret;
+ prof_tdata_t *prof_tdata;
- if (bt2cnt == NULL) {
- /* Initialize an empty cache for this thread. */
- bt2cnt = (ckh_t *)imalloc(sizeof(ckh_t));
- if (bt2cnt == NULL)
+ prof_tdata = PROF_TCACHE_GET();
+ if (prof_tdata == NULL) {
+ prof_tdata = prof_tdata_init();
+ if (prof_tdata == NULL)
return (NULL);
- if (ckh_new(bt2cnt, PROF_CKH_MINITEMS, prof_bt_hash,
- prof_bt_keycomp)) {
- idalloc(bt2cnt);
- return (NULL);
- }
- bt2cnt_tls = bt2cnt;
- pthread_setspecific(bt2cnt_tsd, bt2cnt);
}
- if (ckh_search(bt2cnt, bt, NULL, (void **)&ret)) {
- prof_bt_t *btkey;
- prof_ctx_t *ctx;
+ if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) {
+ union {
+ prof_bt_t *p;
+ void *v;
+ } btkey;
+ union {
+ prof_ctx_t *p;
+ void *v;
+ } ctx;
/*
* This thread's cache lacks bt. Look for it in the global
* cache.
*/
prof_enter();
- if (ckh_search(&bt2ctx, bt, (void **)&btkey, (void **)&ctx)) {
-
+ if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) {
/* bt has never been seen before. Insert it. */
- ctx = (prof_ctx_t *)imalloc(sizeof(prof_ctx_t));
- if (ctx == NULL) {
+ ctx.v = imalloc(sizeof(prof_ctx_t));
+ if (ctx.v == NULL) {
prof_leave();
return (NULL);
}
- btkey = bt_dup(bt);
- if (btkey == NULL) {
+ btkey.p = bt_dup(bt);
+ if (btkey.v == NULL) {
prof_leave();
- idalloc(ctx);
+ idalloc(ctx.v);
return (NULL);
}
- ctx->bt = btkey;
- if (malloc_mutex_init(&ctx->lock)) {
+ ctx.p->bt = btkey.p;
+ if (malloc_mutex_init(&ctx.p->lock)) {
prof_leave();
- idalloc(btkey);
- idalloc(ctx);
+ idalloc(btkey.v);
+ idalloc(ctx.v);
return (NULL);
}
- memset(&ctx->cnt_merged, 0, sizeof(prof_cnt_t));
- ql_new(&ctx->cnts_ql);
- if (ckh_insert(&bt2ctx, btkey, ctx)) {
+ memset(&ctx.p->cnt_merged, 0, sizeof(prof_cnt_t));
+ ql_new(&ctx.p->cnts_ql);
+ if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) {
/* OOM. */
prof_leave();
- idalloc(btkey);
- idalloc(ctx);
+ malloc_mutex_destroy(&ctx.p->lock);
+ idalloc(btkey.v);
+ idalloc(ctx.v);
return (NULL);
}
}
+ /*
+ * Acquire ctx's lock before releasing bt2ctx_mtx, in order to
+ * avoid a race condition with prof_ctx_destroy().
+ */
+ malloc_mutex_lock(&ctx.p->lock);
prof_leave();
/* Link a prof_thd_cnt_t into ctx for this thread. */
- ret = (prof_thr_cnt_t *)imalloc(sizeof(prof_thr_cnt_t));
- if (ret == NULL)
- return (NULL);
- ql_elm_new(ret, link);
- ret->ctx = ctx;
- ret->epoch = 0;
- memset(&ret->cnts, 0, sizeof(prof_cnt_t));
- if (ckh_insert(bt2cnt, btkey, ret)) {
- idalloc(ret);
- return (NULL);
- }
- malloc_mutex_lock(&ctx->lock);
- ql_tail_insert(&ctx->cnts_ql, ret, link);
- malloc_mutex_unlock(&ctx->lock);
- }
-
- return (ret);
-}
-
-static inline void
-prof_sample_threshold_update(void)
-{
- uint64_t r;
- double u;
-
- /*
- * Compute prof_sample_threshold as a geometrically distributed random
- * variable with mean (2^opt_lg_prof_sample).
- */
- prn64(r, 53, prof_sample_prn_state, (uint64_t)1125899906842625LLU,
- 1058392653243283975);
- u = (double)r * (1.0/9007199254740992.0L);
- prof_sample_threshold = (uint64_t)(log(u) /
- log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample))))
- + (uint64_t)1U;
-}
-
-prof_thr_cnt_t *
-prof_alloc_prep(size_t size)
-{
- prof_thr_cnt_t *ret;
- void *vec[prof_bt_max];
- prof_bt_t bt;
-
- if (opt_prof_active == false) {
- /* Sampling is currently inactive, so avoid sampling. */
- ret = (prof_thr_cnt_t *)(uintptr_t)1U;
- } else if (opt_lg_prof_sample == 0) {
- /*
- * Don't bother with sampling logic, since sampling interval is
- * 1.
- */
- bt_init(&bt, vec);
- prof_backtrace(&bt, 2, prof_bt_max);
- ret = prof_lookup(&bt);
- } else {
- if (prof_sample_threshold == 0) {
+ if (opt_lg_prof_tcmax >= 0 && ckh_count(&prof_tdata->bt2cnt)
+ == (ZU(1) << opt_lg_prof_tcmax)) {
+ assert(ckh_count(&prof_tdata->bt2cnt) > 0);
/*
- * Initialize. Seed the prng differently for each
- * thread.
+ * Flush the least recently used cnt in order to keep
+ * bt2cnt from becoming too large.
*/
- prof_sample_prn_state = (uint64_t)(uintptr_t)&size;
- prof_sample_threshold_update();
- }
-
- /*
- * Determine whether to capture a backtrace based on whether
- * size is enough for prof_accum to reach
- * prof_sample_threshold. However, delay updating these
- * variables until prof_{m,re}alloc(), because we don't know
- * for sure that the allocation will succeed.
- *
- * Use subtraction rather than addition to avoid potential
- * integer overflow.
- */
- if (size >= prof_sample_threshold - prof_sample_accum) {
- bt_init(&bt, vec);
- prof_backtrace(&bt, 2, prof_bt_max);
- ret = prof_lookup(&bt);
- } else
- ret = (prof_thr_cnt_t *)(uintptr_t)1U;
- }
-
- return (ret);
-}
-
-prof_ctx_t *
-prof_ctx_get(const void *ptr)
-{
- prof_ctx_t *ret;
- arena_chunk_t *chunk;
-
- assert(ptr != NULL);
-
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- if (chunk != ptr) {
- /* Region. */
- assert(chunk->arena->magic == ARENA_MAGIC);
-
- ret = arena_prof_ctx_get(ptr);
- } else
- ret = huge_prof_ctx_get(ptr);
-
- return (ret);
-}
-
-static void
-prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
-{
- arena_chunk_t *chunk;
-
- assert(ptr != NULL);
-
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- if (chunk != ptr) {
- /* Region. */
- assert(chunk->arena->magic == ARENA_MAGIC);
-
- arena_prof_ctx_set(ptr, ctx);
- } else
- huge_prof_ctx_set(ptr, ctx);
-}
-
-static inline void
-prof_sample_accum_update(size_t size)
-{
-
- /* Sampling logic is unnecessary if the interval is 1. */
- assert(opt_lg_prof_sample != 0);
-
- /* Take care to avoid integer overflow. */
- if (size >= prof_sample_threshold - prof_sample_accum) {
- prof_sample_accum -= (prof_sample_threshold - size);
- /* Compute new prof_sample_threshold. */
- prof_sample_threshold_update();
- while (prof_sample_accum >= prof_sample_threshold) {
- prof_sample_accum -= prof_sample_threshold;
- prof_sample_threshold_update();
- }
- } else
- prof_sample_accum += size;
-}
-
-void
-prof_malloc(const void *ptr, prof_thr_cnt_t *cnt)
-{
- size_t size;
-
- assert(ptr != NULL);
-
- if (opt_lg_prof_sample != 0) {
- size = isalloc(ptr);
- prof_sample_accum_update(size);
- } else if ((uintptr_t)cnt > (uintptr_t)1U)
- size = isalloc(ptr);
-
- if ((uintptr_t)cnt > (uintptr_t)1U) {
- prof_ctx_set(ptr, cnt->ctx);
-
- cnt->epoch++;
- /*********/
- mb_write();
- /*********/
- cnt->cnts.curobjs++;
- cnt->cnts.curbytes += size;
- cnt->cnts.accumobjs++;
- cnt->cnts.accumbytes += size;
- /*********/
- mb_write();
- /*********/
- cnt->epoch++;
- /*********/
- mb_write();
- /*********/
- } else
- prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
-}
-
-void
-prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr,
- size_t old_size, prof_ctx_t *old_ctx)
-{
- size_t size;
- prof_thr_cnt_t *told_cnt;
-
- assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U);
-
- if (ptr != NULL) {
- if (opt_lg_prof_sample != 0) {
- size = isalloc(ptr);
- prof_sample_accum_update(size);
- } else if ((uintptr_t)cnt > (uintptr_t)1U)
- size = isalloc(ptr);
- }
-
- if ((uintptr_t)old_ctx > (uintptr_t)1U) {
- told_cnt = prof_lookup(old_ctx->bt);
- if (told_cnt == NULL) {
- /*
- * It's too late to propagate OOM for this realloc(),
- * so operate directly on old_cnt->ctx->cnt_merged.
- */
- malloc_mutex_lock(&old_ctx->lock);
- old_ctx->cnt_merged.curobjs--;
- old_ctx->cnt_merged.curbytes -= old_size;
- malloc_mutex_unlock(&old_ctx->lock);
- told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
- }
- } else
- told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
-
- if ((uintptr_t)told_cnt > (uintptr_t)1U)
- told_cnt->epoch++;
- if ((uintptr_t)cnt > (uintptr_t)1U) {
- prof_ctx_set(ptr, cnt->ctx);
- cnt->epoch++;
- } else
- prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
- /*********/
- mb_write();
- /*********/
- if ((uintptr_t)told_cnt > (uintptr_t)1U) {
- told_cnt->cnts.curobjs--;
- told_cnt->cnts.curbytes -= old_size;
- }
- if ((uintptr_t)cnt > (uintptr_t)1U) {
- cnt->cnts.curobjs++;
- cnt->cnts.curbytes += size;
- cnt->cnts.accumobjs++;
- cnt->cnts.accumbytes += size;
- }
- /*********/
- mb_write();
- /*********/
- if ((uintptr_t)told_cnt > (uintptr_t)1U)
- told_cnt->epoch++;
- if ((uintptr_t)cnt > (uintptr_t)1U)
- cnt->epoch++;
- /*********/
- mb_write(); /* Not strictly necessary. */
-}
-
-void
-prof_free(const void *ptr)
-{
- prof_ctx_t *ctx = prof_ctx_get(ptr);
-
- if ((uintptr_t)ctx > (uintptr_t)1) {
- size_t size = isalloc(ptr);
- prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt);
-
- if (tcnt != NULL) {
- tcnt->epoch++;
- /*********/
- mb_write();
- /*********/
- tcnt->cnts.curobjs--;
- tcnt->cnts.curbytes -= size;
- /*********/
- mb_write();
- /*********/
- tcnt->epoch++;
- /*********/
- mb_write();
- /*********/
+ ret.p = ql_last(&prof_tdata->lru_ql, lru_link);
+ assert(ret.v != NULL);
+ ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, NULL,
+ NULL);
+ ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
+ prof_ctx_merge(ret.p->ctx, ret.p);
+ /* ret can now be re-used. */
} else {
- /*
- * OOM during free() cannot be propagated, so operate
- * directly on cnt->ctx->cnt_merged.
- */
- malloc_mutex_lock(&ctx->lock);
- ctx->cnt_merged.curobjs--;
- ctx->cnt_merged.curbytes -= size;
- malloc_mutex_unlock(&ctx->lock);
+ assert(opt_lg_prof_tcmax < 0 ||
+ ckh_count(&prof_tdata->bt2cnt) < (ZU(1) <<
+ opt_lg_prof_tcmax));
+ /* Allocate and partially initialize a new cnt. */
+ ret.v = imalloc(sizeof(prof_thr_cnt_t));
+ if (ret.p == NULL)
+ return (NULL);
+ ql_elm_new(ret.p, cnts_link);
+ ql_elm_new(ret.p, lru_link);
}
+ /* Finish initializing ret. */
+ ret.p->ctx = ctx.p;
+ ret.p->epoch = 0;
+ memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
+ if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) {
+ idalloc(ret.v);
+ return (NULL);
+ }
+ ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
+ ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link);
+ malloc_mutex_unlock(&ctx.p->lock);
+ } else {
+ /* Move ret to the front of the LRU. */
+ ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
+ ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
}
+
+ return (ret.p);
}
static bool
@@ -823,15 +571,15 @@
}
static void
-prof_ctx_merge(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx)
+prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx)
{
prof_thr_cnt_t *thr_cnt;
prof_cnt_t tcnt;
malloc_mutex_lock(&ctx->lock);
- memcpy(&ctx->cnt_dump, &ctx->cnt_merged, sizeof(prof_cnt_t));
- ql_foreach(thr_cnt, &ctx->cnts_ql, link) {
+ memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t));
+ ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) {
volatile unsigned *epoch = &thr_cnt->epoch;
while (true) {
@@ -848,46 +596,108 @@
break;
}
- ctx->cnt_dump.curobjs += tcnt.curobjs;
- ctx->cnt_dump.curbytes += tcnt.curbytes;
- ctx->cnt_dump.accumobjs += tcnt.accumobjs;
- ctx->cnt_dump.accumbytes += tcnt.accumbytes;
-
- if (tcnt.curobjs != 0)
- (*leak_nctx)++;
+ ctx->cnt_summed.curobjs += tcnt.curobjs;
+ ctx->cnt_summed.curbytes += tcnt.curbytes;
+ if (opt_prof_accum) {
+ ctx->cnt_summed.accumobjs += tcnt.accumobjs;
+ ctx->cnt_summed.accumbytes += tcnt.accumbytes;
+ }
}
- /* Merge into cnt_all. */
- cnt_all->curobjs += ctx->cnt_dump.curobjs;
- cnt_all->curbytes += ctx->cnt_dump.curbytes;
- cnt_all->accumobjs += ctx->cnt_dump.accumobjs;
- cnt_all->accumbytes += ctx->cnt_dump.accumbytes;
+ if (ctx->cnt_summed.curobjs != 0)
+ (*leak_nctx)++;
+
+ /* Add to cnt_all. */
+ cnt_all->curobjs += ctx->cnt_summed.curobjs;
+ cnt_all->curbytes += ctx->cnt_summed.curbytes;
+ if (opt_prof_accum) {
+ cnt_all->accumobjs += ctx->cnt_summed.accumobjs;
+ cnt_all->accumbytes += ctx->cnt_summed.accumbytes;
+ }
malloc_mutex_unlock(&ctx->lock);
}
+static void
+prof_ctx_destroy(prof_ctx_t *ctx)
+{
+
+ /*
+ * Check that ctx is still unused by any thread cache before destroying
+ * it. prof_lookup() interlocks bt2ctx_mtx and ctx->lock in order to
+ * avoid a race condition with this function.
+ */
+ prof_enter();
+ malloc_mutex_lock(&ctx->lock);
+ if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0) {
+ assert(ctx->cnt_merged.curbytes == 0);
+ assert(ctx->cnt_merged.accumobjs == 0);
+ assert(ctx->cnt_merged.accumbytes == 0);
+ /* Remove ctx from bt2ctx. */
+ ckh_remove(&bt2ctx, ctx->bt, NULL, NULL);
+ prof_leave();
+ /* Destroy ctx. */
+ malloc_mutex_unlock(&ctx->lock);
+ bt_destroy(ctx->bt);
+ malloc_mutex_destroy(&ctx->lock);
+ idalloc(ctx);
+ } else {
+ malloc_mutex_unlock(&ctx->lock);
+ prof_leave();
+ }
+}
+
+static void
+prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt)
+{
+ bool destroy;
+
+ /* Merge cnt stats and detach from ctx. */
+ malloc_mutex_lock(&ctx->lock);
+ ctx->cnt_merged.curobjs += cnt->cnts.curobjs;
+ ctx->cnt_merged.curbytes += cnt->cnts.curbytes;
+ ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs;
+ ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes;
+ ql_remove(&ctx->cnts_ql, cnt, cnts_link);
+ if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL &&
+ ctx->cnt_merged.curobjs == 0)
+ destroy = true;
+ else
+ destroy = false;
+ malloc_mutex_unlock(&ctx->lock);
+ if (destroy)
+ prof_ctx_destroy(ctx);
+}
+
static bool
prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt, bool propagate_err)
{
char buf[UMAX2S_BUFSIZE];
unsigned i;
- if (prof_write(umax2s(ctx->cnt_dump.curobjs, 10, buf), propagate_err)
+ if (opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) {
+ assert(ctx->cnt_summed.curbytes == 0);
+ assert(ctx->cnt_summed.accumobjs == 0);
+ assert(ctx->cnt_summed.accumbytes == 0);
+ return (false);
+ }
+
+ if (prof_write(u2s(ctx->cnt_summed.curobjs, 10, buf), propagate_err)
|| prof_write(": ", propagate_err)
- || prof_write(umax2s(ctx->cnt_dump.curbytes, 10, buf),
+ || prof_write(u2s(ctx->cnt_summed.curbytes, 10, buf),
propagate_err)
|| prof_write(" [", propagate_err)
- || prof_write(umax2s(ctx->cnt_dump.accumobjs, 10, buf),
+ || prof_write(u2s(ctx->cnt_summed.accumobjs, 10, buf),
propagate_err)
|| prof_write(": ", propagate_err)
- || prof_write(umax2s(ctx->cnt_dump.accumbytes, 10, buf),
+ || prof_write(u2s(ctx->cnt_summed.accumbytes, 10, buf),
propagate_err)
|| prof_write("] @", propagate_err))
return (true);
for (i = 0; i < bt->len; i++) {
if (prof_write(" 0x", propagate_err)
- || prof_write(umax2s((uintptr_t)bt->vec[i], 16, buf),
+ || prof_write(u2s((uintptr_t)bt->vec[i], 16, buf),
propagate_err))
return (true);
}
@@ -916,7 +726,7 @@
memcpy(&mpath[i], s, slen);
i += slen;
- s = umax2s(getpid(), 10, buf);
+ s = u2s(getpid(), 10, buf);
slen = strlen(s);
memcpy(&mpath[i], s, slen);
i += slen;
@@ -958,8 +768,14 @@
{
prof_cnt_t cnt_all;
size_t tabind;
- prof_bt_t *bt;
- prof_ctx_t *ctx;
+ union {
+ prof_bt_t *p;
+ void *v;
+ } bt;
+ union {
+ prof_ctx_t *p;
+ void *v;
+ } ctx;
char buf[UMAX2S_BUFSIZE];
size_t leak_nctx;
@@ -979,20 +795,18 @@
/* Merge per thread profile stats, and sum them in cnt_all. */
memset(&cnt_all, 0, sizeof(prof_cnt_t));
leak_nctx = 0;
- for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, (void **)&ctx)
- == false;) {
- prof_ctx_merge(ctx, &cnt_all, &leak_nctx);
- }
+ for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;)
+ prof_ctx_sum(ctx.p, &cnt_all, &leak_nctx);
/* Dump profile header. */
if (prof_write("heap profile: ", propagate_err)
- || prof_write(umax2s(cnt_all.curobjs, 10, buf), propagate_err)
+ || prof_write(u2s(cnt_all.curobjs, 10, buf), propagate_err)
|| prof_write(": ", propagate_err)
- || prof_write(umax2s(cnt_all.curbytes, 10, buf), propagate_err)
+ || prof_write(u2s(cnt_all.curbytes, 10, buf), propagate_err)
|| prof_write(" [", propagate_err)
- || prof_write(umax2s(cnt_all.accumobjs, 10, buf), propagate_err)
+ || prof_write(u2s(cnt_all.accumobjs, 10, buf), propagate_err)
|| prof_write(": ", propagate_err)
- || prof_write(umax2s(cnt_all.accumbytes, 10, buf), propagate_err))
+ || prof_write(u2s(cnt_all.accumbytes, 10, buf), propagate_err))
goto ERROR;
if (opt_lg_prof_sample == 0) {
@@ -1000,16 +814,16 @@
goto ERROR;
} else {
if (prof_write("] @ heap_v2/", propagate_err)
- || prof_write(umax2s((uint64_t)1U << opt_lg_prof_sample, 10,
+ || prof_write(u2s((uint64_t)1U << opt_lg_prof_sample, 10,
buf), propagate_err)
|| prof_write("\n", propagate_err))
goto ERROR;
}
/* Dump per ctx profile stats. */
- for (tabind = 0; ckh_iter(&bt2ctx, &tabind, (void **)&bt, (void **)&ctx)
+ for (tabind = 0; ckh_iter(&bt2ctx, &tabind, &bt.v, &ctx.v)
== false;) {
- if (prof_dump_ctx(ctx, bt, propagate_err))
+ if (prof_dump_ctx(ctx.p, bt.p, propagate_err))
goto ERROR;
}
@@ -1024,12 +838,12 @@
if (leakcheck && cnt_all.curbytes != 0) {
malloc_write("<jemalloc>: Leak summary: ");
- malloc_write(umax2s(cnt_all.curbytes, 10, buf));
+ malloc_write(u2s(cnt_all.curbytes, 10, buf));
malloc_write((cnt_all.curbytes != 1) ? " bytes, " : " byte, ");
- malloc_write(umax2s(cnt_all.curobjs, 10, buf));
+ malloc_write(u2s(cnt_all.curobjs, 10, buf));
malloc_write((cnt_all.curobjs != 1) ? " objects, " :
" object, ");
- malloc_write(umax2s(leak_nctx, 10, buf));
+ malloc_write(u2s(leak_nctx, 10, buf));
malloc_write((leak_nctx != 1) ? " contexts\n" : " context\n");
malloc_write("<jemalloc>: Run pprof on \"");
malloc_write(filename);
@@ -1059,31 +873,11 @@
* Construct a filename of the form:
*
* <prefix>.<pid>.<seq>.v<vseq>.heap\0
- * or
- * jeprof.<pid>.<seq>.v<vseq>.heap\0
*/
i = 0;
- /*
- * Use JEMALLOC_PROF_PREFIX if it's set, and if it is short enough to
- * avoid overflowing DUMP_FILENAME_BUFSIZE. The result may exceed
- * PATH_MAX, but creat(2) will catch that problem.
- */
- if ((s = getenv("JEMALLOC_PROF_PREFIX")) != NULL
- && strlen(s) + (DUMP_FILENAME_BUFSIZE - PATH_MAX) <= PATH_MAX) {
- slen = strlen(s);
- memcpy(&filename[i], s, slen);
- i += slen;
-
- s = ".";
- } else
- s = "jeprof.";
- slen = strlen(s);
- memcpy(&filename[i], s, slen);
- i += slen;
-
- s = umax2s(getpid(), 10, buf);
+ s = opt_prof_prefix;
slen = strlen(s);
memcpy(&filename[i], s, slen);
i += slen;
@@ -1093,7 +887,17 @@
memcpy(&filename[i], s, slen);
i += slen;
- s = umax2s(prof_dump_seq, 10, buf);
+ s = u2s(getpid(), 10, buf);
+ slen = strlen(s);
+ memcpy(&filename[i], s, slen);
+ i += slen;
+
+ s = ".";
+ slen = strlen(s);
+ memcpy(&filename[i], s, slen);
+ i += slen;
+
+ s = u2s(prof_dump_seq, 10, buf);
prof_dump_seq++;
slen = strlen(s);
memcpy(&filename[i], s, slen);
@@ -1108,7 +912,7 @@
i++;
if (vseq != 0xffffffffffffffffLLU) {
- s = umax2s(vseq, 10, buf);
+ s = u2s(vseq, 10, buf);
slen = strlen(s);
memcpy(&filename[i], s, slen);
i += slen;
@@ -1130,10 +934,12 @@
if (prof_booted == false)
return;
- malloc_mutex_lock(&prof_dump_seq_mtx);
- prof_dump_filename(filename, 'f', 0xffffffffffffffffLLU);
- malloc_mutex_unlock(&prof_dump_seq_mtx);
- prof_dump(filename, opt_prof_leak, false);
+ if (opt_prof_prefix[0] != '\0') {
+ malloc_mutex_lock(&prof_dump_seq_mtx);
+ prof_dump_filename(filename, 'f', 0xffffffffffffffffLLU);
+ malloc_mutex_unlock(&prof_dump_seq_mtx);
+ prof_dump(filename, opt_prof_leak, false);
+ }
}
void
@@ -1151,11 +957,13 @@
}
malloc_mutex_unlock(&enq_mtx);
- malloc_mutex_lock(&prof_dump_seq_mtx);
- prof_dump_filename(filename, 'i', prof_dump_iseq);
- prof_dump_iseq++;
- malloc_mutex_unlock(&prof_dump_seq_mtx);
- prof_dump(filename, false, false);
+ if (opt_prof_prefix[0] != '\0') {
+ malloc_mutex_lock(&prof_dump_seq_mtx);
+ prof_dump_filename(filename, 'i', prof_dump_iseq);
+ prof_dump_iseq++;
+ malloc_mutex_unlock(&prof_dump_seq_mtx);
+ prof_dump(filename, false, false);
+ }
}
bool
@@ -1168,6 +976,8 @@
if (filename == NULL) {
/* No filename specified, so automatically generate one. */
+ if (opt_prof_prefix[0] == '\0')
+ return (true);
malloc_mutex_lock(&prof_dump_seq_mtx);
prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
prof_dump_mseq++;
@@ -1178,7 +988,7 @@
}
void
-prof_udump(void)
+prof_gdump(void)
{
char filename[DUMP_FILENAME_BUFSIZE];
@@ -1186,17 +996,19 @@
return;
malloc_mutex_lock(&enq_mtx);
if (enq) {
- enq_udump = true;
+ enq_gdump = true;
malloc_mutex_unlock(&enq_mtx);
return;
}
malloc_mutex_unlock(&enq_mtx);
- malloc_mutex_lock(&prof_dump_seq_mtx);
- prof_dump_filename(filename, 'u', prof_dump_useq);
- prof_dump_useq++;
- malloc_mutex_unlock(&prof_dump_seq_mtx);
- prof_dump(filename, false, false);
+ if (opt_prof_prefix[0] != '\0') {
+ malloc_mutex_lock(&prof_dump_seq_mtx);
+ prof_dump_filename(filename, 'u', prof_dump_useq);
+ prof_dump_useq++;
+ malloc_mutex_unlock(&prof_dump_seq_mtx);
+ prof_dump(filename, false, false);
+ }
}
static void
@@ -1239,52 +1051,69 @@
return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
}
-static void
-bt2cnt_thread_cleanup(void *arg)
+prof_tdata_t *
+prof_tdata_init(void)
{
- ckh_t *bt2cnt;
+ prof_tdata_t *prof_tdata;
- bt2cnt = bt2cnt_tls;
- if (bt2cnt != NULL) {
- ql_head(prof_thr_cnt_t) cnts_ql;
- size_t tabind;
+ /* Initialize an empty cache for this thread. */
+ prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t));
+ if (prof_tdata == NULL)
+ return (NULL);
+
+ if (ckh_new(&prof_tdata->bt2cnt, PROF_CKH_MINITEMS,
+ prof_bt_hash, prof_bt_keycomp)) {
+ idalloc(prof_tdata);
+ return (NULL);
+ }
+ ql_new(&prof_tdata->lru_ql);
+
+ prof_tdata->vec = imalloc(sizeof(void *) * prof_bt_max);
+ if (prof_tdata->vec == NULL) {
+
+ ckh_delete(&prof_tdata->bt2cnt);
+ idalloc(prof_tdata);
+ return (NULL);
+ }
+
+ prof_tdata->prn_state = 0;
+ prof_tdata->threshold = 0;
+ prof_tdata->accum = 0;
+
+ PROF_TCACHE_SET(prof_tdata);
+
+ return (prof_tdata);
+}
+
+static void
+prof_tdata_cleanup(void *arg)
+{
+ prof_tdata_t *prof_tdata;
+
+ prof_tdata = PROF_TCACHE_GET();
+ if (prof_tdata != NULL) {
prof_thr_cnt_t *cnt;
- /* Iteratively merge cnt's into the global stats. */
- ql_new(&cnts_ql);
- tabind = 0;
- while (ckh_iter(bt2cnt, &tabind, NULL, (void **)&cnt) ==
- false) {
- prof_ctx_t *ctx = cnt->ctx;
- /* Merge stats and detach from ctx. */
- malloc_mutex_lock(&ctx->lock);
- ctx->cnt_merged.curobjs += cnt->cnts.curobjs;
- ctx->cnt_merged.curbytes += cnt->cnts.curbytes;
- ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs;
- ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes;
- ql_remove(&ctx->cnts_ql, cnt, link);
- malloc_mutex_unlock(&ctx->lock);
-
- /*
- * Stash cnt for deletion after finishing with
- * ckh_iter().
- */
- ql_tail_insert(&cnts_ql, cnt, link);
- }
+ /*
+ * Delete the hash table. All of its contents can still be
+ * iterated over via the LRU.
+ */
+ ckh_delete(&prof_tdata->bt2cnt);
/*
- * Delete the hash table now that cnts_ql has a list of all
- * cnt's.
+ * Iteratively merge cnt's into the global stats and delete
+ * them.
*/
- ckh_delete(bt2cnt);
- idalloc(bt2cnt);
- bt2cnt_tls = NULL;
-
- /* Delete cnt's. */
- while ((cnt = ql_last(&cnts_ql, link)) != NULL) {
- ql_remove(&cnts_ql, cnt, link);
+ while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) {
+ prof_ctx_merge(cnt->ctx, cnt);
+ ql_remove(&prof_tdata->lru_ql, cnt, lru_link);
idalloc(cnt);
}
+
+ idalloc(prof_tdata->vec);
+
+ idalloc(prof_tdata);
+ PROF_TCACHE_SET(NULL);
}
}
@@ -1292,6 +1121,14 @@
prof_boot0(void)
{
+ memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT,
+ sizeof(PROF_PREFIX_DEFAULT));
+}
+
+void
+prof_boot1(void)
+{
+
/*
* opt_prof and prof_promote must be in their final state before any
* arenas are initialized, so this function must be executed early.
@@ -1303,7 +1140,7 @@
* automatically dumped.
*/
opt_prof = true;
- opt_prof_udump = false;
+ opt_prof_gdump = false;
prof_interval = 0;
} else if (opt_prof) {
if (opt_lg_prof_interval >= 0) {
@@ -1317,7 +1154,7 @@
}
bool
-prof_boot1(void)
+prof_boot2(void)
{
if (opt_prof) {
@@ -1326,7 +1163,7 @@
return (true);
if (malloc_mutex_init(&bt2ctx_mtx))
return (true);
- if (pthread_key_create(&bt2cnt_tsd, bt2cnt_thread_cleanup)
+ if (pthread_key_create(&prof_tdata_tsd, prof_tdata_cleanup)
!= 0) {
malloc_write(
"<jemalloc>: Error in pthread_key_create()\n");
@@ -1341,7 +1178,7 @@
return (true);
enq = false;
enq_idump = false;
- enq_udump = false;
+ enq_gdump = false;
if (atexit(prof_fdump) != 0) {
malloc_write("<jemalloc>: Error in atexit()\n");
diff --git a/jemalloc/src/rtree.c b/jemalloc/src/rtree.c
new file mode 100644
index 0000000..7753743
--- /dev/null
+++ b/jemalloc/src/rtree.c
@@ -0,0 +1,43 @@
+#define RTREE_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+rtree_t *
+rtree_new(unsigned bits)
+{
+ rtree_t *ret;
+ unsigned bits_per_level, height, i;
+
+ bits_per_level = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(void *)))) - 1;
+ height = bits / bits_per_level;
+ if (height * bits_per_level != bits)
+ height++;
+ assert(height * bits_per_level >= bits);
+
+ ret = (rtree_t*)base_alloc(offsetof(rtree_t, level2bits) +
+ (sizeof(unsigned) * height));
+ if (ret == NULL)
+ return (NULL);
+ memset(ret, 0, offsetof(rtree_t, level2bits) + (sizeof(unsigned) *
+ height));
+
+ malloc_mutex_init(&ret->mutex);
+ ret->height = height;
+ if (bits_per_level * height > bits)
+ ret->level2bits[0] = bits % bits_per_level;
+ else
+ ret->level2bits[0] = bits_per_level;
+ for (i = 1; i < height; i++)
+ ret->level2bits[i] = bits_per_level;
+
+ ret->root = (void**)base_alloc(sizeof(void *) << ret->level2bits[0]);
+ if (ret->root == NULL) {
+ /*
+ * We leak the rtree here, since there's no generic base
+ * deallocation.
+ */
+ return (NULL);
+ }
+ memset(ret->root, 0, sizeof(void *) << ret->level2bits[0]);
+
+ return (ret);
+}
diff --git a/jemalloc/src/stats.c b/jemalloc/src/stats.c
index 9dc7529..3dfe0d2 100644
--- a/jemalloc/src/stats.c
+++ b/jemalloc/src/stats.c
@@ -57,12 +57,12 @@
/*
* We don't want to depend on vsnprintf() for production builds, since that can
- * cause unnecessary bloat for static binaries. umax2s() provides minimal
- * integer printing functionality, so that malloc_printf() use can be limited to
+ * cause unnecessary bloat for static binaries. u2s() provides minimal integer
+ * printing functionality, so that malloc_printf() use can be limited to
* JEMALLOC_STATS code.
*/
char *
-umax2s(uintmax_t x, unsigned base, char *s)
+u2s(uint64_t x, unsigned base, char *s)
{
unsigned i;
@@ -72,8 +72,8 @@
case 10:
do {
i--;
- s[i] = "0123456789"[x % 10];
- x /= 10;
+ s[i] = "0123456789"[x % (uint64_t)10];
+ x /= (uint64_t)10;
} while (x > 0);
break;
case 16:
@@ -86,8 +86,9 @@
default:
do {
i--;
- s[i] = "0123456789abcdefghijklmnopqrstuvwxyz"[x % base];
- x /= base;
+ s[i] = "0123456789abcdefghijklmnopqrstuvwxyz"[x %
+ (uint64_t)base];
+ x /= (uint64_t)base;
} while (x > 0);
}
@@ -374,6 +375,7 @@
stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
const char *opts)
{
+ int err;
uint64_t epoch;
size_t u64sz;
char s[UMAX2S_BUFSIZE];
@@ -383,10 +385,27 @@
bool bins = true;
bool large = true;
- /* Refresh stats, in case mallctl() was called by the application. */
+ /*
+ * Refresh stats, in case mallctl() was called by the application.
+ *
+ * Check for OOM here, since refreshing the ctl cache can trigger
+ * allocation. In practice, none of the subsequent mallctl()-related
+ * calls in this function will cause OOM if this one succeeds.
+ * */
epoch = 1;
u64sz = sizeof(uint64_t);
- xmallctl("epoch", &epoch, &u64sz, &epoch, sizeof(uint64_t));
+ err = JEMALLOC_P(mallctl)("epoch", &epoch, &u64sz, &epoch,
+ sizeof(uint64_t));
+ if (err != 0) {
+ if (err == EAGAIN) {
+ malloc_write("<jemalloc>: Memory allocation failure in "
+ "mallctl(\"epoch\", ...)\n");
+ return;
+ }
+ malloc_write("<jemalloc>: Failure in mallctl(\"epoch\", "
+ "...)\n");
+ abort();
+ }
if (write_cb == NULL) {
/*
@@ -430,10 +449,12 @@
bool bv;
unsigned uv;
ssize_t ssv;
- size_t sv, bsz, ssz;
+ size_t sv, bsz, ssz, sssz, cpsz;
bsz = sizeof(bool);
ssz = sizeof(size_t);
+ sssz = sizeof(ssize_t);
+ cpsz = sizeof(const char *);
CTL_GET("version", &cpv, const char *);
write_cb(cbopaque, "Version: ");
@@ -444,113 +465,140 @@
write_cb(cbopaque, bv ? "enabled" : "disabled");
write_cb(cbopaque, "\n");
- write_cb(cbopaque, "Boolean JEMALLOC_OPTIONS: ");
- if ((err = JEMALLOC_P(mallctl)("opt.abort", &bv, &bsz, NULL, 0))
- == 0)
- write_cb(cbopaque, bv ? "A" : "a");
- if ((err = JEMALLOC_P(mallctl)("prof.active", &bv, &bsz,
- NULL, 0)) == 0)
- write_cb(cbopaque, bv ? "E" : "e");
- if ((err = JEMALLOC_P(mallctl)("opt.prof", &bv, &bsz, NULL, 0))
- == 0)
- write_cb(cbopaque, bv ? "F" : "f");
- if ((err = JEMALLOC_P(mallctl)("opt.tcache", &bv, &bsz, NULL,
- 0)) == 0)
- write_cb(cbopaque, bv ? "H" : "h");
- if ((err = JEMALLOC_P(mallctl)("opt.junk", &bv, &bsz, NULL, 0))
- == 0)
- write_cb(cbopaque, bv ? "J" : "j");
- if ((err = JEMALLOC_P(mallctl)("opt.prof_leak", &bv, &bsz, NULL,
- 0)) == 0)
- write_cb(cbopaque, bv ? "L" : "l");
- if ((err = JEMALLOC_P(mallctl)("opt.overcommit", &bv, &bsz,
- NULL, 0)) == 0)
- write_cb(cbopaque, bv ? "O" : "o");
- if ((err = JEMALLOC_P(mallctl)("opt.stats_print", &bv, &bsz,
- NULL, 0)) == 0)
- write_cb(cbopaque, bv ? "P" : "p");
- if ((err = JEMALLOC_P(mallctl)("opt.prof_udump", &bv, &bsz,
- NULL, 0)) == 0)
- write_cb(cbopaque, bv ? "U" : "u");
- if ((err = JEMALLOC_P(mallctl)("opt.sysv", &bv, &bsz, NULL, 0))
- == 0)
- write_cb(cbopaque, bv ? "V" : "v");
- if ((err = JEMALLOC_P(mallctl)("opt.xmalloc", &bv, &bsz, NULL,
- 0)) == 0)
- write_cb(cbopaque, bv ? "X" : "x");
- if ((err = JEMALLOC_P(mallctl)("opt.zero", &bv, &bsz, NULL, 0))
- == 0)
- write_cb(cbopaque, bv ? "Z" : "z");
- write_cb(cbopaque, "\n");
+#define OPT_WRITE_BOOL(n) \
+ if ((err = JEMALLOC_P(mallctl)("opt."#n, &bv, &bsz, \
+ NULL, 0)) == 0) { \
+ write_cb(cbopaque, " opt."#n": "); \
+ write_cb(cbopaque, bv ? "true" : "false"); \
+ write_cb(cbopaque, "\n"); \
+ }
+#define OPT_WRITE_SIZE_T(n) \
+ if ((err = JEMALLOC_P(mallctl)("opt."#n, &sv, &ssz, \
+ NULL, 0)) == 0) { \
+ write_cb(cbopaque, " opt."#n": "); \
+ write_cb(cbopaque, u2s(sv, 10, s)); \
+ write_cb(cbopaque, "\n"); \
+ }
+#define OPT_WRITE_SSIZE_T(n) \
+ if ((err = JEMALLOC_P(mallctl)("opt."#n, &ssv, &sssz, \
+ NULL, 0)) == 0) { \
+ if (ssv >= 0) { \
+ write_cb(cbopaque, " opt."#n": "); \
+ write_cb(cbopaque, u2s(ssv, 10, s)); \
+ } else { \
+ write_cb(cbopaque, " opt."#n": -"); \
+ write_cb(cbopaque, u2s(-ssv, 10, s)); \
+ } \
+ write_cb(cbopaque, "\n"); \
+ }
+#define OPT_WRITE_CHAR_P(n) \
+ if ((err = JEMALLOC_P(mallctl)("opt."#n, &cpv, &cpsz, \
+ NULL, 0)) == 0) { \
+ write_cb(cbopaque, " opt."#n": \""); \
+ write_cb(cbopaque, cpv); \
+ write_cb(cbopaque, "\"\n"); \
+ }
+
+ write_cb(cbopaque, "Run-time option settings:\n");
+ OPT_WRITE_BOOL(abort)
+ OPT_WRITE_SIZE_T(lg_qspace_max)
+ OPT_WRITE_SIZE_T(lg_cspace_max)
+ OPT_WRITE_SIZE_T(lg_chunk)
+ OPT_WRITE_SIZE_T(narenas)
+ OPT_WRITE_SSIZE_T(lg_dirty_mult)
+ OPT_WRITE_BOOL(stats_print)
+ OPT_WRITE_BOOL(junk)
+ OPT_WRITE_BOOL(zero)
+ OPT_WRITE_BOOL(sysv)
+ OPT_WRITE_BOOL(xmalloc)
+ OPT_WRITE_BOOL(tcache)
+ OPT_WRITE_SSIZE_T(lg_tcache_gc_sweep)
+ OPT_WRITE_SSIZE_T(lg_tcache_max)
+ OPT_WRITE_BOOL(prof)
+ OPT_WRITE_CHAR_P(prof_prefix)
+ OPT_WRITE_SIZE_T(lg_prof_bt_max)
+ OPT_WRITE_BOOL(prof_active)
+ OPT_WRITE_SSIZE_T(lg_prof_sample)
+ OPT_WRITE_BOOL(prof_accum)
+ OPT_WRITE_SSIZE_T(lg_prof_tcmax)
+ OPT_WRITE_SSIZE_T(lg_prof_interval)
+ OPT_WRITE_BOOL(prof_gdump)
+ OPT_WRITE_BOOL(prof_leak)
+ OPT_WRITE_BOOL(overcommit)
+
+#undef OPT_WRITE_BOOL
+#undef OPT_WRITE_SIZE_T
+#undef OPT_WRITE_SSIZE_T
+#undef OPT_WRITE_CHAR_P
write_cb(cbopaque, "CPUs: ");
- write_cb(cbopaque, umax2s(ncpus, 10, s));
+ write_cb(cbopaque, u2s(ncpus, 10, s));
write_cb(cbopaque, "\n");
CTL_GET("arenas.narenas", &uv, unsigned);
write_cb(cbopaque, "Max arenas: ");
- write_cb(cbopaque, umax2s(uv, 10, s));
+ write_cb(cbopaque, u2s(uv, 10, s));
write_cb(cbopaque, "\n");
write_cb(cbopaque, "Pointer size: ");
- write_cb(cbopaque, umax2s(sizeof(void *), 10, s));
+ write_cb(cbopaque, u2s(sizeof(void *), 10, s));
write_cb(cbopaque, "\n");
CTL_GET("arenas.quantum", &sv, size_t);
write_cb(cbopaque, "Quantum size: ");
- write_cb(cbopaque, umax2s(sv, 10, s));
+ write_cb(cbopaque, u2s(sv, 10, s));
write_cb(cbopaque, "\n");
CTL_GET("arenas.cacheline", &sv, size_t);
write_cb(cbopaque, "Cacheline size (assumed): ");
- write_cb(cbopaque, umax2s(sv, 10, s));
+ write_cb(cbopaque, u2s(sv, 10, s));
write_cb(cbopaque, "\n");
CTL_GET("arenas.subpage", &sv, size_t);
write_cb(cbopaque, "Subpage spacing: ");
- write_cb(cbopaque, umax2s(sv, 10, s));
+ write_cb(cbopaque, u2s(sv, 10, s));
write_cb(cbopaque, "\n");
if ((err = JEMALLOC_P(mallctl)("arenas.tspace_min", &sv, &ssz,
NULL, 0)) == 0) {
write_cb(cbopaque, "Tiny 2^n-spaced sizes: [");
- write_cb(cbopaque, umax2s(sv, 10, s));
+ write_cb(cbopaque, u2s(sv, 10, s));
write_cb(cbopaque, "..");
CTL_GET("arenas.tspace_max", &sv, size_t);
- write_cb(cbopaque, umax2s(sv, 10, s));
+ write_cb(cbopaque, u2s(sv, 10, s));
write_cb(cbopaque, "]\n");
}
CTL_GET("arenas.qspace_min", &sv, size_t);
write_cb(cbopaque, "Quantum-spaced sizes: [");
- write_cb(cbopaque, umax2s(sv, 10, s));
+ write_cb(cbopaque, u2s(sv, 10, s));
write_cb(cbopaque, "..");
CTL_GET("arenas.qspace_max", &sv, size_t);
- write_cb(cbopaque, umax2s(sv, 10, s));
+ write_cb(cbopaque, u2s(sv, 10, s));
write_cb(cbopaque, "]\n");
CTL_GET("arenas.cspace_min", &sv, size_t);
write_cb(cbopaque, "Cacheline-spaced sizes: [");
- write_cb(cbopaque, umax2s(sv, 10, s));
+ write_cb(cbopaque, u2s(sv, 10, s));
write_cb(cbopaque, "..");
CTL_GET("arenas.cspace_max", &sv, size_t);
- write_cb(cbopaque, umax2s(sv, 10, s));
+ write_cb(cbopaque, u2s(sv, 10, s));
write_cb(cbopaque, "]\n");
CTL_GET("arenas.sspace_min", &sv, size_t);
write_cb(cbopaque, "Subpage-spaced sizes: [");
- write_cb(cbopaque, umax2s(sv, 10, s));
+ write_cb(cbopaque, u2s(sv, 10, s));
write_cb(cbopaque, "..");
CTL_GET("arenas.sspace_max", &sv, size_t);
- write_cb(cbopaque, umax2s(sv, 10, s));
+ write_cb(cbopaque, u2s(sv, 10, s));
write_cb(cbopaque, "]\n");
CTL_GET("opt.lg_dirty_mult", &ssv, ssize_t);
if (ssv >= 0) {
write_cb(cbopaque,
"Min active:dirty page ratio per arena: ");
- write_cb(cbopaque, umax2s((1U << ssv), 10, s));
+ write_cb(cbopaque, u2s((1U << ssv), 10, s));
write_cb(cbopaque, ":1\n");
} else {
write_cb(cbopaque,
@@ -560,7 +608,7 @@
&ssz, NULL, 0)) == 0) {
write_cb(cbopaque,
"Maximum thread-cached size class: ");
- write_cb(cbopaque, umax2s(sv, 10, s));
+ write_cb(cbopaque, u2s(sv, 10, s));
write_cb(cbopaque, "\n");
}
if ((err = JEMALLOC_P(mallctl)("opt.lg_tcache_gc_sweep", &ssv,
@@ -570,39 +618,51 @@
CTL_GET("opt.tcache", &tcache_enabled, bool);
write_cb(cbopaque, "Thread cache GC sweep interval: ");
write_cb(cbopaque, tcache_enabled && ssv >= 0 ?
- umax2s(tcache_gc_sweep, 10, s) : "N/A");
+ u2s(tcache_gc_sweep, 10, s) : "N/A");
write_cb(cbopaque, "\n");
}
if ((err = JEMALLOC_P(mallctl)("opt.prof", &bv, &bsz, NULL, 0))
== 0 && bv) {
CTL_GET("opt.lg_prof_bt_max", &sv, size_t);
write_cb(cbopaque, "Maximum profile backtrace depth: ");
- write_cb(cbopaque, umax2s((1U << sv), 10, s));
+ write_cb(cbopaque, u2s((1U << sv), 10, s));
write_cb(cbopaque, "\n");
+ CTL_GET("opt.lg_prof_tcmax", &ssv, ssize_t);
+ write_cb(cbopaque,
+ "Maximum per thread backtrace cache: ");
+ if (ssv >= 0) {
+ write_cb(cbopaque, u2s((1U << ssv), 10, s));
+ write_cb(cbopaque, " (2^");
+ write_cb(cbopaque, u2s(ssv, 10, s));
+ write_cb(cbopaque, ")\n");
+ } else
+ write_cb(cbopaque, "N/A\n");
+
CTL_GET("opt.lg_prof_sample", &sv, size_t);
write_cb(cbopaque, "Average profile sample interval: ");
- write_cb(cbopaque, umax2s((1U << sv), 10, s));
+ write_cb(cbopaque, u2s((((uint64_t)1U) << sv), 10, s));
write_cb(cbopaque, " (2^");
- write_cb(cbopaque, umax2s(sv, 10, s));
+ write_cb(cbopaque, u2s(sv, 10, s));
write_cb(cbopaque, ")\n");
CTL_GET("opt.lg_prof_interval", &ssv, ssize_t);
write_cb(cbopaque, "Average profile dump interval: ");
if (ssv >= 0) {
- write_cb(cbopaque, umax2s((1U << ssv), 10, s));
+ write_cb(cbopaque, u2s((((uint64_t)1U) << ssv),
+ 10, s));
write_cb(cbopaque, " (2^");
- write_cb(cbopaque, umax2s(ssv, 10, s));
+ write_cb(cbopaque, u2s(ssv, 10, s));
write_cb(cbopaque, ")\n");
} else
write_cb(cbopaque, "N/A\n");
}
CTL_GET("arenas.chunksize", &sv, size_t);
write_cb(cbopaque, "Chunk size: ");
- write_cb(cbopaque, umax2s(sv, 10, s));
+ write_cb(cbopaque, u2s(sv, 10, s));
CTL_GET("opt.lg_chunk", &sv, size_t);
write_cb(cbopaque, " (2^");
- write_cb(cbopaque, umax2s(sv, 10, s));
+ write_cb(cbopaque, u2s(sv, 10, s));
write_cb(cbopaque, ")\n");
}
diff --git a/jemalloc/src/tcache.c b/jemalloc/src/tcache.c
index ace24ce..cbbe7a1 100644
--- a/jemalloc/src/tcache.c
+++ b/jemalloc/src/tcache.c
@@ -5,17 +5,19 @@
/* Data. */
bool opt_tcache = true;
-ssize_t opt_lg_tcache_maxclass = LG_TCACHE_MAXCLASS_DEFAULT;
+ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT;
/* Map of thread-specific caches. */
+#ifndef NO_TLS
__thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec"));
+#endif
/*
* Same contents as tcache, but initialized such that the TSD destructor is
* called when a thread exits, so that the cache can be cleaned up.
*/
-static pthread_key_t tcache_tsd;
+pthread_key_t tcache_tsd;
size_t nhbins;
size_t tcache_maxclass;
@@ -93,10 +95,10 @@
flush = *(void **)ptr;
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
if (chunk->arena == arena) {
- size_t pageind = (((uintptr_t)ptr -
- (uintptr_t)chunk) >> PAGE_SHIFT);
+ size_t pageind = ((uintptr_t)ptr -
+ (uintptr_t)chunk) >> PAGE_SHIFT;
arena_chunk_map_t *mapelm =
- &chunk->map[pageind];
+ &chunk->map[pageind-map_bias];
arena_dalloc_bin(arena, chunk, ptr, mapelm);
} else {
/*
@@ -202,12 +204,14 @@
size_t size;
unsigned i;
- size = sizeof(tcache_t) + (sizeof(tcache_bin_t) * (nhbins - 1));
+ size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins);
/*
* Round up to the nearest multiple of the cacheline size, in order to
* avoid the possibility of false cacheline sharing.
*
- * That this works relies on the same logic as in ipalloc().
+ * That this works relies on the same logic as in ipalloc(), but we
+ * cannot directly call ipalloc() here due to tcache bootstrapping
+ * issues.
*/
size = (size + CACHELINE_MASK) & (-CACHELINE);
@@ -239,8 +243,7 @@
for (; i < nhbins; i++)
tcache->tbins[i].ncached_max = TCACHE_NSLOTS_LARGE;
- tcache_tls = tcache;
- pthread_setspecific(tcache_tsd, tcache);
+ TCACHE_SET(tcache);
return (tcache);
}
@@ -308,9 +311,9 @@
if (arena_salloc(tcache) <= small_maxclass) {
arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
arena_t *arena = chunk->arena;
- size_t pageind = (((uintptr_t)tcache - (uintptr_t)chunk) >>
- PAGE_SHIFT);
- arena_chunk_map_t *mapelm = &chunk->map[pageind];
+ size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >>
+ PAGE_SHIFT;
+ arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias];
arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
(uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) <<
PAGE_SHIFT));
@@ -328,11 +331,24 @@
{
tcache_t *tcache = (tcache_t *)arg;
- assert(tcache == tcache_tls);
- if (tcache != NULL) {
+ if (tcache == (void *)(uintptr_t)1) {
+ /*
+ * The previous time this destructor was called, we set the key
+ * to 1 so that other destructors wouldn't cause re-creation of
+ * the tcache. This time, do nothing, so that the destructor
+ * will not be called again.
+ */
+ } else if (tcache == (void *)(uintptr_t)2) {
+ /*
+ * Another destructor called an allocator function after this
+ * destructor was called. Reset tcache to 1 in order to
+ * receive another callback.
+ */
+ TCACHE_SET((uintptr_t)1);
+ } else if (tcache != NULL) {
assert(tcache != (void *)(uintptr_t)1);
tcache_destroy(tcache);
- tcache_tls = (void *)(uintptr_t)1;
+ TCACHE_SET((uintptr_t)1);
}
}
@@ -368,16 +384,16 @@
if (opt_tcache) {
/*
- * If necessary, clamp opt_lg_tcache_maxclass, now that
+ * If necessary, clamp opt_lg_tcache_max, now that
* small_maxclass and arena_maxclass are known.
*/
- if (opt_lg_tcache_maxclass < 0 || (1U <<
- opt_lg_tcache_maxclass) < small_maxclass)
+ if (opt_lg_tcache_max < 0 || (1U <<
+ opt_lg_tcache_max) < small_maxclass)
tcache_maxclass = small_maxclass;
- else if ((1U << opt_lg_tcache_maxclass) > arena_maxclass)
+ else if ((1U << opt_lg_tcache_max) > arena_maxclass)
tcache_maxclass = arena_maxclass;
else
- tcache_maxclass = (1U << opt_lg_tcache_maxclass);
+ tcache_maxclass = (1U << opt_lg_tcache_max);
nhbins = nbins + (tcache_maxclass >> PAGE_SHIFT);
diff --git a/jemalloc/src/zone.c b/jemalloc/src/zone.c
new file mode 100644
index 0000000..2c1b231
--- /dev/null
+++ b/jemalloc/src/zone.c
@@ -0,0 +1,354 @@
+#include "jemalloc/internal/jemalloc_internal.h"
+#ifndef JEMALLOC_ZONE
+# error "This source file is for zones on Darwin (OS X)."
+#endif
+
+/******************************************************************************/
+/* Data. */
+
+static malloc_zone_t zone, szone;
+static struct malloc_introspection_t zone_introspect, ozone_introspect;
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static size_t zone_size(malloc_zone_t *zone, void *ptr);
+static void *zone_malloc(malloc_zone_t *zone, size_t size);
+static void *zone_calloc(malloc_zone_t *zone, size_t num, size_t size);
+static void *zone_valloc(malloc_zone_t *zone, size_t size);
+static void zone_free(malloc_zone_t *zone, void *ptr);
+static void *zone_realloc(malloc_zone_t *zone, void *ptr, size_t size);
+#if (JEMALLOC_ZONE_VERSION >= 6)
+static void *zone_memalign(malloc_zone_t *zone, size_t alignment,
+ size_t size);
+static void zone_free_definite_size(malloc_zone_t *zone, void *ptr,
+ size_t size);
+#endif
+static void *zone_destroy(malloc_zone_t *zone);
+static size_t zone_good_size(malloc_zone_t *zone, size_t size);
+static void zone_force_lock(malloc_zone_t *zone);
+static void zone_force_unlock(malloc_zone_t *zone);
+static size_t ozone_size(malloc_zone_t *zone, void *ptr);
+static void ozone_free(malloc_zone_t *zone, void *ptr);
+static void *ozone_realloc(malloc_zone_t *zone, void *ptr, size_t size);
+static unsigned ozone_batch_malloc(malloc_zone_t *zone, size_t size,
+ void **results, unsigned num_requested);
+static void ozone_batch_free(malloc_zone_t *zone, void **to_be_freed,
+ unsigned num);
+#if (JEMALLOC_ZONE_VERSION >= 6)
+static void ozone_free_definite_size(malloc_zone_t *zone, void *ptr,
+ size_t size);
+#endif
+static void ozone_force_lock(malloc_zone_t *zone);
+static void ozone_force_unlock(malloc_zone_t *zone);
+
+/******************************************************************************/
+/*
+ * Functions.
+ */
+
+static size_t
+zone_size(malloc_zone_t *zone, void *ptr)
+{
+
+ /*
+ * There appear to be places within Darwin (such as setenv(3)) that
+ * cause calls to this function with pointers that *no* zone owns. If
+ * we knew that all pointers were owned by *some* zone, we could split
+ * our zone into two parts, and use one as the default allocator and
+ * the other as the default deallocator/reallocator. Since that will
+ * not work in practice, we must check all pointers to assure that they
+ * reside within a mapped chunk before determining size.
+ */
+ return (ivsalloc(ptr));
+}
+
+static void *
+zone_malloc(malloc_zone_t *zone, size_t size)
+{
+
+ return (JEMALLOC_P(malloc)(size));
+}
+
+static void *
+zone_calloc(malloc_zone_t *zone, size_t num, size_t size)
+{
+
+ return (JEMALLOC_P(calloc)(num, size));
+}
+
+static void *
+zone_valloc(malloc_zone_t *zone, size_t size)
+{
+ void *ret = NULL; /* Assignment avoids useless compiler warning. */
+
+ JEMALLOC_P(posix_memalign)(&ret, PAGE_SIZE, size);
+
+ return (ret);
+}
+
+static void
+zone_free(malloc_zone_t *zone, void *ptr)
+{
+
+ JEMALLOC_P(free)(ptr);
+}
+
+static void *
+zone_realloc(malloc_zone_t *zone, void *ptr, size_t size)
+{
+
+ return (JEMALLOC_P(realloc)(ptr, size));
+}
+
+#if (JEMALLOC_ZONE_VERSION >= 6)
+static void *
+zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size)
+{
+ void *ret = NULL; /* Assignment avoids useless compiler warning. */
+
+ JEMALLOC_P(posix_memalign)(&ret, alignment, size);
+
+ return (ret);
+}
+
+static void
+zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size)
+{
+
+ assert(ivsalloc(ptr) == size);
+ JEMALLOC_P(free)(ptr);
+}
+#endif
+
+static void *
+zone_destroy(malloc_zone_t *zone)
+{
+
+ /* This function should never be called. */
+ assert(false);
+ return (NULL);
+}
+
+static size_t
+zone_good_size(malloc_zone_t *zone, size_t size)
+{
+ size_t ret;
+ void *p;
+
+ /*
+ * Actually create an object of the appropriate size, then find out
+ * how large it could have been without moving up to the next size
+ * class.
+ */
+ p = JEMALLOC_P(malloc)(size);
+ if (p != NULL) {
+ ret = isalloc(p);
+ JEMALLOC_P(free)(p);
+ } else
+ ret = size;
+
+ return (ret);
+}
+
+static void
+zone_force_lock(malloc_zone_t *zone)
+{
+
+ if (isthreaded)
+ jemalloc_prefork();
+}
+
+static void
+zone_force_unlock(malloc_zone_t *zone)
+{
+
+ if (isthreaded)
+ jemalloc_postfork();
+}
+
+malloc_zone_t *
+create_zone(void)
+{
+
+ zone.size = (void *)zone_size;
+ zone.malloc = (void *)zone_malloc;
+ zone.calloc = (void *)zone_calloc;
+ zone.valloc = (void *)zone_valloc;
+ zone.free = (void *)zone_free;
+ zone.realloc = (void *)zone_realloc;
+ zone.destroy = (void *)zone_destroy;
+ zone.zone_name = "jemalloc_zone";
+ zone.batch_malloc = NULL;
+ zone.batch_free = NULL;
+ zone.introspect = &zone_introspect;
+ zone.version = JEMALLOC_ZONE_VERSION;
+#if (JEMALLOC_ZONE_VERSION >= 6)
+ zone.memalign = zone_memalign;
+ zone.free_definite_size = zone_free_definite_size;
+#endif
+
+ zone_introspect.enumerator = NULL;
+ zone_introspect.good_size = (void *)zone_good_size;
+ zone_introspect.check = NULL;
+ zone_introspect.print = NULL;
+ zone_introspect.log = NULL;
+ zone_introspect.force_lock = (void *)zone_force_lock;
+ zone_introspect.force_unlock = (void *)zone_force_unlock;
+ zone_introspect.statistics = NULL;
+#if (JEMALLOC_ZONE_VERSION >= 6)
+ zone_introspect.zone_locked = NULL;
+#endif
+
+ return (&zone);
+}
+
+static size_t
+ozone_size(malloc_zone_t *zone, void *ptr)
+{
+ size_t ret;
+
+ ret = ivsalloc(ptr);
+ if (ret == 0)
+ ret = szone.size(zone, ptr);
+
+ return (ret);
+}
+
+static void
+ozone_free(malloc_zone_t *zone, void *ptr)
+{
+
+ if (ivsalloc(ptr) != 0)
+ JEMALLOC_P(free)(ptr);
+ else {
+ size_t size = szone.size(zone, ptr);
+ if (size != 0)
+ (szone.free)(zone, ptr);
+ }
+}
+
+static void *
+ozone_realloc(malloc_zone_t *zone, void *ptr, size_t size)
+{
+ size_t oldsize;
+
+ if (ptr == NULL)
+ return (JEMALLOC_P(malloc)(size));
+
+ oldsize = ivsalloc(ptr);
+ if (oldsize != 0)
+ return (JEMALLOC_P(realloc)(ptr, size));
+ else {
+ oldsize = szone.size(zone, ptr);
+ if (oldsize == 0)
+ return (JEMALLOC_P(malloc)(size));
+ else {
+ void *ret = JEMALLOC_P(malloc)(size);
+ if (ret != NULL) {
+ memcpy(ret, ptr, (oldsize < size) ? oldsize :
+ size);
+ (szone.free)(zone, ptr);
+ }
+ return (ret);
+ }
+ }
+}
+
+static unsigned
+ozone_batch_malloc(malloc_zone_t *zone, size_t size, void **results,
+ unsigned num_requested)
+{
+
+ /* Don't bother implementing this interface, since it isn't required. */
+ return (0);
+}
+
+static void
+ozone_batch_free(malloc_zone_t *zone, void **to_be_freed, unsigned num)
+{
+ unsigned i;
+
+ for (i = 0; i < num; i++)
+ ozone_free(zone, to_be_freed[i]);
+}
+
+#if (JEMALLOC_ZONE_VERSION >= 6)
+static void
+ozone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size)
+{
+
+ if (ivsalloc(ptr) != 0) {
+ assert(ivsalloc(ptr) == size);
+ JEMALLOC_P(free)(ptr);
+ } else {
+ assert(size == szone.size(zone, ptr));
+ szone.free_definite_size(zone, ptr, size);
+ }
+}
+#endif
+
+static void
+ozone_force_lock(malloc_zone_t *zone)
+{
+
+ /* jemalloc locking is taken care of by the normal jemalloc zone. */
+ szone.introspect->force_lock(zone);
+}
+
+static void
+ozone_force_unlock(malloc_zone_t *zone)
+{
+
+ /* jemalloc locking is taken care of by the normal jemalloc zone. */
+ szone.introspect->force_unlock(zone);
+}
+
+/*
+ * Overlay the default scalable zone (szone) such that existing allocations are
+ * drained, and further allocations come from jemalloc. This is necessary
+ * because Core Foundation directly accesses and uses the szone before the
+ * jemalloc library is even loaded.
+ */
+void
+szone2ozone(malloc_zone_t *zone)
+{
+
+ /*
+ * Stash a copy of the original szone so that we can call its
+ * functions as needed. Note that the internally, the szone stores its
+ * bookkeeping data structures immediately following the malloc_zone_t
+ * header, so when calling szone functions, we need to pass a pointer
+ * to the original zone structure.
+ */
+ memcpy(&szone, zone, sizeof(malloc_zone_t));
+
+ zone->size = (void *)ozone_size;
+ zone->malloc = (void *)zone_malloc;
+ zone->calloc = (void *)zone_calloc;
+ zone->valloc = (void *)zone_valloc;
+ zone->free = (void *)ozone_free;
+ zone->realloc = (void *)ozone_realloc;
+ zone->destroy = (void *)zone_destroy;
+ zone->zone_name = "jemalloc_ozone";
+ zone->batch_malloc = ozone_batch_malloc;
+ zone->batch_free = ozone_batch_free;
+ zone->introspect = &ozone_introspect;
+ zone->version = JEMALLOC_ZONE_VERSION;
+#if (JEMALLOC_ZONE_VERSION >= 6)
+ zone->memalign = zone_memalign;
+ zone->free_definite_size = ozone_free_definite_size;
+#endif
+
+ ozone_introspect.enumerator = NULL;
+ ozone_introspect.good_size = (void *)zone_good_size;
+ ozone_introspect.check = NULL;
+ ozone_introspect.print = NULL;
+ ozone_introspect.log = NULL;
+ ozone_introspect.force_lock = (void *)ozone_force_lock;
+ ozone_introspect.force_unlock = (void *)ozone_force_unlock;
+ ozone_introspect.statistics = NULL;
+#if (JEMALLOC_ZONE_VERSION >= 6)
+ ozone_introspect.zone_locked = NULL;
+#endif
+}
diff --git a/jemalloc/test/allocated.c b/jemalloc/test/allocated.c
new file mode 100644
index 0000000..64a1735
--- /dev/null
+++ b/jemalloc/test/allocated.c
@@ -0,0 +1,105 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <pthread.h>
+#include <assert.h>
+#include <errno.h>
+#include <string.h>
+
+#define JEMALLOC_MANGLE
+#include "jemalloc_test.h"
+
+void *
+thread_start(void *arg)
+{
+ int err;
+ void *p;
+ uint64_t a0, a1, d0, d1;
+ size_t sz, usize;
+
+ sz = sizeof(a0);
+ if ((err = JEMALLOC_P(mallctl)("thread.allocated", &a0, &sz, NULL,
+ 0))) {
+ if (err == ENOENT) {
+#ifdef JEMALLOC_STATS
+ assert(false);
+#endif
+ goto RETURN;
+ }
+ fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
+ strerror(err));
+ exit(1);
+ }
+
+ sz = sizeof(d0);
+ if ((err = JEMALLOC_P(mallctl)("thread.deallocated", &d0, &sz, NULL,
+ 0))) {
+ if (err == ENOENT) {
+#ifdef JEMALLOC_STATS
+ assert(false);
+#endif
+ goto RETURN;
+ }
+ fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
+ strerror(err));
+ exit(1);
+ }
+
+ p = JEMALLOC_P(malloc)(1);
+ if (p == NULL) {
+ fprintf(stderr, "%s(): Error in malloc()\n", __func__);
+ exit(1);
+ }
+
+ sz = sizeof(a1);
+ JEMALLOC_P(mallctl)("thread.allocated", &a1, &sz, NULL, 0);
+
+ usize = JEMALLOC_P(malloc_usable_size)(p);
+ assert(a0 + usize <= a1);
+
+ JEMALLOC_P(free)(p);
+
+ sz = sizeof(d1);
+ JEMALLOC_P(mallctl)("thread.deallocated", &d1, &sz, NULL, 0);
+
+ assert(d0 + usize <= d1);
+
+RETURN:
+ return (NULL);
+}
+
+int
+main(void)
+{
+ int ret = 0;
+ pthread_t thread;
+
+ fprintf(stderr, "Test begin\n");
+
+ thread_start(NULL);
+
+ if (pthread_create(&thread, NULL, thread_start, NULL)
+ != 0) {
+ fprintf(stderr, "%s(): Error in pthread_create()\n", __func__);
+ ret = 1;
+ goto RETURN;
+ }
+ pthread_join(thread, (void *)&ret);
+
+ thread_start(NULL);
+
+ if (pthread_create(&thread, NULL, thread_start, NULL)
+ != 0) {
+ fprintf(stderr, "%s(): Error in pthread_create()\n", __func__);
+ ret = 1;
+ goto RETURN;
+ }
+ pthread_join(thread, (void *)&ret);
+
+ thread_start(NULL);
+
+RETURN:
+ fprintf(stderr, "Test end\n");
+ return (ret);
+}
diff --git a/jemalloc/test/allocated.exp b/jemalloc/test/allocated.exp
new file mode 100644
index 0000000..369a88d
--- /dev/null
+++ b/jemalloc/test/allocated.exp
@@ -0,0 +1,2 @@
+Test begin
+Test end
diff --git a/jemalloc/test/allocm.c b/jemalloc/test/allocm.c
new file mode 100644
index 0000000..59d0002
--- /dev/null
+++ b/jemalloc/test/allocm.c
@@ -0,0 +1,133 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+#define JEMALLOC_MANGLE
+#include "jemalloc_test.h"
+
+#define CHUNK 0x400000
+/* #define MAXALIGN ((size_t)0x80000000000LLU) */
+#define MAXALIGN ((size_t)0x2000000LLU)
+#define NITER 4
+
+int
+main(void)
+{
+ int r;
+ void *p;
+ size_t sz, alignment, total, tsz;
+ unsigned i;
+ void *ps[NITER];
+
+ fprintf(stderr, "Test begin\n");
+
+ sz = 0;
+ r = JEMALLOC_P(allocm)(&p, &sz, 42, 0);
+ if (r != ALLOCM_SUCCESS) {
+ fprintf(stderr, "Unexpected allocm() error\n");
+ abort();
+ }
+ if (sz < 42)
+ fprintf(stderr, "Real size smaller than expected\n");
+ if (JEMALLOC_P(dallocm)(p, 0) != ALLOCM_SUCCESS)
+ fprintf(stderr, "Unexpected dallocm() error\n");
+
+ r = JEMALLOC_P(allocm)(&p, NULL, 42, 0);
+ if (r != ALLOCM_SUCCESS) {
+ fprintf(stderr, "Unexpected allocm() error\n");
+ abort();
+ }
+ if (JEMALLOC_P(dallocm)(p, 0) != ALLOCM_SUCCESS)
+ fprintf(stderr, "Unexpected dallocm() error\n");
+
+ r = JEMALLOC_P(allocm)(&p, NULL, 42, ALLOCM_ZERO);
+ if (r != ALLOCM_SUCCESS) {
+ fprintf(stderr, "Unexpected allocm() error\n");
+ abort();
+ }
+ if (JEMALLOC_P(dallocm)(p, 0) != ALLOCM_SUCCESS)
+ fprintf(stderr, "Unexpected dallocm() error\n");
+
+#if LG_SIZEOF_PTR == 3
+ alignment = 0x8000000000000000LLU;
+ sz = 0x8000000000000000LLU;
+#else
+ alignment = 0x80000000LU;
+ sz = 0x80000000LU;
+#endif
+ r = JEMALLOC_P(allocm)(&p, NULL, sz, ALLOCM_ALIGN(alignment));
+ if (r == ALLOCM_SUCCESS) {
+ fprintf(stderr,
+ "Expected error for allocm(&p, %zu, 0x%x)\n",
+ sz, ALLOCM_ALIGN(alignment));
+ }
+
+#if LG_SIZEOF_PTR == 3
+ alignment = 0x4000000000000000LLU;
+ sz = 0x8400000000000001LLU;
+#else
+ alignment = 0x40000000LU;
+ sz = 0x84000001LU;
+#endif
+ r = JEMALLOC_P(allocm)(&p, NULL, sz, ALLOCM_ALIGN(alignment));
+ if (r == ALLOCM_SUCCESS) {
+ fprintf(stderr,
+ "Expected error for allocm(&p, %zu, 0x%x)\n",
+ sz, ALLOCM_ALIGN(alignment));
+ }
+
+ alignment = 0x10LLU;
+#if LG_SIZEOF_PTR == 3
+ sz = 0xfffffffffffffff0LLU;
+#else
+ sz = 0xfffffff0LU;
+#endif
+ r = JEMALLOC_P(allocm)(&p, NULL, sz, ALLOCM_ALIGN(alignment));
+ if (r == ALLOCM_SUCCESS) {
+ fprintf(stderr,
+ "Expected error for allocm(&p, %zu, 0x%x)\n",
+ sz, ALLOCM_ALIGN(alignment));
+ }
+
+ for (i = 0; i < NITER; i++)
+ ps[i] = NULL;
+
+ for (alignment = 8;
+ alignment <= MAXALIGN;
+ alignment <<= 1) {
+ total = 0;
+ fprintf(stderr, "Alignment: %zu\n", alignment);
+ for (sz = 1;
+ sz < 3 * alignment && sz < (1U << 31);
+ sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
+ for (i = 0; i < NITER; i++) {
+ r = JEMALLOC_P(allocm)(&ps[i], NULL, sz,
+ ALLOCM_ALIGN(alignment) | ALLOCM_ZERO);
+ if (r != ALLOCM_SUCCESS) {
+ fprintf(stderr,
+ "Error for size %zu (0x%zx): %d\n",
+ sz, sz, r);
+ exit(1);
+ }
+ if ((uintptr_t)p & (alignment-1)) {
+ fprintf(stderr,
+ "%p inadequately aligned for"
+ " alignment: %zu\n", p, alignment);
+ }
+ JEMALLOC_P(sallocm)(ps[i], &tsz, 0);
+ total += tsz;
+ if (total >= (MAXALIGN << 1))
+ break;
+ }
+ for (i = 0; i < NITER; i++) {
+ if (ps[i] != NULL) {
+ JEMALLOC_P(dallocm)(ps[i], 0);
+ ps[i] = NULL;
+ }
+ }
+ }
+ }
+
+ fprintf(stderr, "Test end\n");
+ return (0);
+}
diff --git a/jemalloc/test/allocm.exp b/jemalloc/test/allocm.exp
new file mode 100644
index 0000000..b5061c7
--- /dev/null
+++ b/jemalloc/test/allocm.exp
@@ -0,0 +1,25 @@
+Test begin
+Alignment: 8
+Alignment: 16
+Alignment: 32
+Alignment: 64
+Alignment: 128
+Alignment: 256
+Alignment: 512
+Alignment: 1024
+Alignment: 2048
+Alignment: 4096
+Alignment: 8192
+Alignment: 16384
+Alignment: 32768
+Alignment: 65536
+Alignment: 131072
+Alignment: 262144
+Alignment: 524288
+Alignment: 1048576
+Alignment: 2097152
+Alignment: 4194304
+Alignment: 8388608
+Alignment: 16777216
+Alignment: 33554432
+Test end
diff --git a/jemalloc/test/jemalloc_test.h.in b/jemalloc/test/jemalloc_test.h.in
new file mode 100644
index 0000000..0c48895
--- /dev/null
+++ b/jemalloc/test/jemalloc_test.h.in
@@ -0,0 +1,6 @@
+/*
+ * This header should be included by tests, rather than directly including
+ * jemalloc/jemalloc.h, because --with-install-suffix may cause the header to
+ * have a different name.
+ */
+#include "jemalloc/jemalloc@install_suffix@.h"
diff --git a/jemalloc/test/posix_memalign.c b/jemalloc/test/posix_memalign.c
new file mode 100644
index 0000000..3e306c0
--- /dev/null
+++ b/jemalloc/test/posix_memalign.c
@@ -0,0 +1,121 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+
+#define JEMALLOC_MANGLE
+#include "jemalloc_test.h"
+
+#define CHUNK 0x400000
+/* #define MAXALIGN ((size_t)0x80000000000LLU) */
+#define MAXALIGN ((size_t)0x2000000LLU)
+#define NITER 4
+
+int
+main(void)
+{
+ size_t alignment, size, total;
+ unsigned i;
+ int err;
+ void *p, *ps[NITER];
+
+ fprintf(stderr, "Test begin\n");
+
+ /* Test error conditions. */
+ for (alignment = 0; alignment < sizeof(void *); alignment++) {
+ err = JEMALLOC_P(posix_memalign)(&p, alignment, 1);
+ if (err != EINVAL) {
+ fprintf(stderr,
+ "Expected error for invalid alignment %zu\n",
+ alignment);
+ }
+ }
+
+ for (alignment = sizeof(size_t); alignment < MAXALIGN;
+ alignment <<= 1) {
+ err = JEMALLOC_P(posix_memalign)(&p, alignment + 1, 1);
+ if (err == 0) {
+ fprintf(stderr,
+ "Expected error for invalid alignment %zu\n",
+ alignment + 1);
+ }
+ }
+
+#if LG_SIZEOF_PTR == 3
+ alignment = 0x8000000000000000LLU;
+ size = 0x8000000000000000LLU;
+#else
+ alignment = 0x80000000LU;
+ size = 0x80000000LU;
+#endif
+ err = JEMALLOC_P(posix_memalign)(&p, alignment, size);
+ if (err == 0) {
+ fprintf(stderr,
+ "Expected error for posix_memalign(&p, %zu, %zu)\n",
+ alignment, size);
+ }
+
+#if LG_SIZEOF_PTR == 3
+ alignment = 0x4000000000000000LLU;
+ size = 0x8400000000000001LLU;
+#else
+ alignment = 0x40000000LU;
+ size = 0x84000001LU;
+#endif
+ err = JEMALLOC_P(posix_memalign)(&p, alignment, size);
+ if (err == 0) {
+ fprintf(stderr,
+ "Expected error for posix_memalign(&p, %zu, %zu)\n",
+ alignment, size);
+ }
+
+ alignment = 0x10LLU;
+#if LG_SIZEOF_PTR == 3
+ size = 0xfffffffffffffff0LLU;
+#else
+ size = 0xfffffff0LU;
+#endif
+ err = JEMALLOC_P(posix_memalign)(&p, alignment, size);
+ if (err == 0) {
+ fprintf(stderr,
+ "Expected error for posix_memalign(&p, %zu, %zu)\n",
+ alignment, size);
+ }
+
+ for (i = 0; i < NITER; i++)
+ ps[i] = NULL;
+
+ for (alignment = 8;
+ alignment <= MAXALIGN;
+ alignment <<= 1) {
+ total = 0;
+ fprintf(stderr, "Alignment: %zu\n", alignment);
+ for (size = 1;
+ size < 3 * alignment && size < (1U << 31);
+ size += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
+ for (i = 0; i < NITER; i++) {
+ err = JEMALLOC_P(posix_memalign)(&ps[i],
+ alignment, size);
+ if (err) {
+ fprintf(stderr,
+ "Error for size %zu (0x%zx): %s\n",
+ size, size, strerror(err));
+ exit(1);
+ }
+ total += JEMALLOC_P(malloc_usable_size)(ps[i]);
+ if (total >= (MAXALIGN << 1))
+ break;
+ }
+ for (i = 0; i < NITER; i++) {
+ if (ps[i] != NULL) {
+ JEMALLOC_P(free)(ps[i]);
+ ps[i] = NULL;
+ }
+ }
+ }
+ }
+
+ fprintf(stderr, "Test end\n");
+ return (0);
+}
diff --git a/jemalloc/test/posix_memalign.exp b/jemalloc/test/posix_memalign.exp
new file mode 100644
index 0000000..b5061c7
--- /dev/null
+++ b/jemalloc/test/posix_memalign.exp
@@ -0,0 +1,25 @@
+Test begin
+Alignment: 8
+Alignment: 16
+Alignment: 32
+Alignment: 64
+Alignment: 128
+Alignment: 256
+Alignment: 512
+Alignment: 1024
+Alignment: 2048
+Alignment: 4096
+Alignment: 8192
+Alignment: 16384
+Alignment: 32768
+Alignment: 65536
+Alignment: 131072
+Alignment: 262144
+Alignment: 524288
+Alignment: 1048576
+Alignment: 2097152
+Alignment: 4194304
+Alignment: 8388608
+Alignment: 16777216
+Alignment: 33554432
+Test end
diff --git a/jemalloc/test/rallocm.c b/jemalloc/test/rallocm.c
new file mode 100644
index 0000000..a8cadeb
--- /dev/null
+++ b/jemalloc/test/rallocm.c
@@ -0,0 +1,117 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define JEMALLOC_MANGLE
+#include "jemalloc_test.h"
+
+int
+main(void)
+{
+ void *p, *q;
+ size_t sz, tsz;
+ int r;
+
+ fprintf(stderr, "Test begin\n");
+
+ r = JEMALLOC_P(allocm)(&p, &sz, 42, 0);
+ if (r != ALLOCM_SUCCESS) {
+ fprintf(stderr, "Unexpected allocm() error\n");
+ abort();
+ }
+
+ q = p;
+ r = JEMALLOC_P(rallocm)(&q, &tsz, sz, 0, ALLOCM_NO_MOVE);
+ if (r != ALLOCM_SUCCESS)
+ fprintf(stderr, "Unexpected rallocm() error\n");
+ if (q != p)
+ fprintf(stderr, "Unexpected object move\n");
+ if (tsz != sz) {
+ fprintf(stderr, "Unexpected size change: %zu --> %zu\n",
+ sz, tsz);
+ }
+
+ q = p;
+ r = JEMALLOC_P(rallocm)(&q, &tsz, sz, 5, ALLOCM_NO_MOVE);
+ if (r != ALLOCM_SUCCESS)
+ fprintf(stderr, "Unexpected rallocm() error\n");
+ if (q != p)
+ fprintf(stderr, "Unexpected object move\n");
+ if (tsz != sz) {
+ fprintf(stderr, "Unexpected size change: %zu --> %zu\n",
+ sz, tsz);
+ }
+
+ q = p;
+ r = JEMALLOC_P(rallocm)(&q, &tsz, sz + 5, 0, ALLOCM_NO_MOVE);
+ if (r != ALLOCM_ERR_NOT_MOVED)
+ fprintf(stderr, "Unexpected rallocm() result\n");
+ if (q != p)
+ fprintf(stderr, "Unexpected object move\n");
+ if (tsz != sz) {
+ fprintf(stderr, "Unexpected size change: %zu --> %zu\n",
+ sz, tsz);
+ }
+
+ q = p;
+ r = JEMALLOC_P(rallocm)(&q, &tsz, sz + 5, 0, 0);
+ if (r != ALLOCM_SUCCESS)
+ fprintf(stderr, "Unexpected rallocm() error\n");
+ if (q == p)
+ fprintf(stderr, "Expected object move\n");
+ if (tsz == sz) {
+ fprintf(stderr, "Expected size change: %zu --> %zu\n",
+ sz, tsz);
+ }
+ p = q;
+ sz = tsz;
+
+ r = JEMALLOC_P(rallocm)(&q, &tsz, 8192, 0, 0);
+ if (r != ALLOCM_SUCCESS)
+ fprintf(stderr, "Unexpected rallocm() error\n");
+ if (q == p)
+ fprintf(stderr, "Expected object move\n");
+ if (tsz == sz) {
+ fprintf(stderr, "Expected size change: %zu --> %zu\n",
+ sz, tsz);
+ }
+ p = q;
+ sz = tsz;
+
+ r = JEMALLOC_P(rallocm)(&q, &tsz, 16384, 0, 0);
+ if (r != ALLOCM_SUCCESS)
+ fprintf(stderr, "Unexpected rallocm() error\n");
+ if (tsz == sz) {
+ fprintf(stderr, "Expected size change: %zu --> %zu\n",
+ sz, tsz);
+ }
+ p = q;
+ sz = tsz;
+
+ r = JEMALLOC_P(rallocm)(&q, &tsz, 8192, 0, ALLOCM_NO_MOVE);
+ if (r != ALLOCM_SUCCESS)
+ fprintf(stderr, "Unexpected rallocm() error\n");
+ if (q != p)
+ fprintf(stderr, "Unexpected object move\n");
+ if (tsz == sz) {
+ fprintf(stderr, "Expected size change: %zu --> %zu\n",
+ sz, tsz);
+ }
+ sz = tsz;
+
+ r = JEMALLOC_P(rallocm)(&q, &tsz, 16384, 0, ALLOCM_NO_MOVE);
+ if (r != ALLOCM_SUCCESS)
+ fprintf(stderr, "Unexpected rallocm() error\n");
+ if (q != p)
+ fprintf(stderr, "Unexpected object move\n");
+ if (tsz == sz) {
+ fprintf(stderr, "Expected size change: %zu --> %zu\n",
+ sz, tsz);
+ }
+ sz = tsz;
+
+ JEMALLOC_P(dallocm)(p, 0);
+
+ fprintf(stderr, "Test end\n");
+ return (0);
+}
diff --git a/jemalloc/test/rallocm.exp b/jemalloc/test/rallocm.exp
new file mode 100644
index 0000000..369a88d
--- /dev/null
+++ b/jemalloc/test/rallocm.exp
@@ -0,0 +1,2 @@
+Test begin
+Test end
diff --git a/jemalloc/test/thread_arena.c b/jemalloc/test/thread_arena.c
new file mode 100644
index 0000000..bd884e1
--- /dev/null
+++ b/jemalloc/test/thread_arena.c
@@ -0,0 +1,82 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <string.h>
+
+#define JEMALLOC_MANGLE
+#include "jemalloc_test.h"
+
+void *
+thread_start(void *arg)
+{
+ unsigned main_arena_ind = *(unsigned *)arg;
+ void *p;
+ unsigned arena_ind;
+ size_t size;
+ int err;
+
+ p = JEMALLOC_P(malloc)(1);
+ if (p == NULL) {
+ fprintf(stderr, "%s(): Error in malloc()\n", __func__);
+ return (void *)1;
+ }
+
+ size = sizeof(arena_ind);
+ if ((err = JEMALLOC_P(mallctl)("thread.arena", &arena_ind, &size,
+ &main_arena_ind, sizeof(main_arena_ind)))) {
+ fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
+ strerror(err));
+ return (void *)1;
+ }
+
+ return (NULL);
+}
+
+int
+main(void)
+{
+ int ret = 0;
+ void *p;
+ unsigned arena_ind;
+ size_t size;
+ int err;
+ pthread_t thread;
+
+ fprintf(stderr, "Test begin\n");
+
+ p = JEMALLOC_P(malloc)(1);
+ if (p == NULL) {
+ fprintf(stderr, "%s(): Error in malloc()\n", __func__);
+ ret = 1;
+ goto RETURN;
+ }
+
+ size = sizeof(arena_ind);
+ if ((err = JEMALLOC_P(mallctl)("thread.arena", &arena_ind, &size, NULL,
+ 0))) {
+ fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
+ strerror(err));
+ ret = 1;
+ goto RETURN;
+ }
+
+ if (pthread_create(&thread, NULL, thread_start, (void *)&arena_ind)
+ != 0) {
+ fprintf(stderr, "%s(): Error in pthread_create()\n", __func__);
+ ret = 1;
+ goto RETURN;
+ }
+ pthread_join(thread, (void *)&ret);
+
+ if (pthread_create(&thread, NULL, thread_start, (void *)&arena_ind)
+ != 0) {
+ fprintf(stderr, "%s(): Error in pthread_create()\n", __func__);
+ ret = 1;
+ goto RETURN;
+ }
+ pthread_join(thread, (void *)&ret);
+
+RETURN:
+ fprintf(stderr, "Test end\n");
+ return (ret);
+}
diff --git a/jemalloc/test/thread_arena.exp b/jemalloc/test/thread_arena.exp
new file mode 100644
index 0000000..369a88d
--- /dev/null
+++ b/jemalloc/test/thread_arena.exp
@@ -0,0 +1,2 @@
+Test begin
+Test end