Merge branch 'master' into linux-qos-prioritty
diff --git a/INSTALL.md b/INSTALL.md
index b758f2b..9940c2c 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -87,11 +87,6 @@
 Specify the path to Apple's libplatform package, so that appropriate headers
 	can be found and used.
 
-`--with-apple-libclosure-source`
-
-Specify the path to Apple's Libclosure package, so that appropriate headers
-	can be found and used.
-
 `--with-apple-xnu-source`
 
 Specify the path to Apple's XNU package, so that appropriate headers can be
@@ -104,11 +99,6 @@
 The following options are likely to only be useful when building libdispatch on
 OS X as a replacement for /usr/lib/system/libdispatch.dylib:
 
-`--with-apple-objc4-source`
-
-Specify the path to Apple's objc4 package, so that appropriate headers can
-	be found and used.
-
 `--disable-libdispatch-init-constructor`
 
 Do not tag libdispatch's init routine as __constructor, in which case it must be run manually before libdispatch routines can be called. This is the default when building on OS X. For /usr/lib/system/libdispatch.dylib the init routine is called automatically during process start.
@@ -131,9 +121,7 @@
 		--enable-apple-tsd-optimizations \
 		--with-apple-libpthread-source=/path/to/10.11.0/libpthread-137.1.1 \
 		--with-apple-libplatform-source=/path/to/10.11.0/libplatform-73.1.1 \
-		--with-apple-libclosure-source=/path/to/10.11.0/libclosure-65 \
 		--with-apple-xnu-source=/path/to/10.11.0/xnu-3247.1.106 \
-		--with-apple-objc4-source=/path/to/10.11.0/objc4-680
 	make check
 
 ### Building and installing for FreeBSD
diff --git a/configure.ac b/configure.ac
index 3221112..8f38f08 100644
--- a/configure.ac
+++ b/configure.ac
@@ -125,13 +125,6 @@
   CPPFLAGS="$CPPFLAGS -isystem $apple_libplatform_source_include_path"
 ])
 
-AC_ARG_WITH([apple-libclosure-source],
-  [AS_HELP_STRING([--with-apple-libclosure-source],
-    [Specify path to Apple libclosure source])], [
-  apple_libclosure_source_path=${withval}
-  CPPFLAGS="$CPPFLAGS -isystem $apple_libclosure_source_path"
-])
-
 AC_ARG_WITH([apple-xnu-source],
   [AS_HELP_STRING([--with-apple-xnu-source],
     [Specify path to Apple XNU source])], [
@@ -143,12 +136,6 @@
   CPPFLAGS="$CPPFLAGS -idirafter $apple_xnu_source_libkern_path -isystem $apple_xnu_source_bsd_path -isystem $apple_xnu_source_libsyscall_path -isystem $apple_xnu_source_libproc_path "
 ])
 
-AC_ARG_WITH([apple-objc4-source],
-  [AS_HELP_STRING([--with-apple-objc4-source],
-    [Specify path to Apple objc4 source])], [
-  apple_objc4_source_runtime_path=${withval}/runtime
-])
-
 AC_CACHE_CHECK([for System.framework/PrivateHeaders], dispatch_cv_system_privateheaders,
   [AS_IF([test -d /System/Library/Frameworks/System.framework/PrivateHeaders],
     [dispatch_cv_system_privateheaders=yes], [dispatch_cv_system_privateheaders=no])]
@@ -387,24 +374,10 @@
   [have_foundation=true], [have_foundation=false]
 )
 AM_CONDITIONAL(HAVE_FOUNDATION, $have_foundation)
-# hack for objc4/runtime/objc-internal.h
-AS_IF([test -n "$apple_objc4_source_runtime_path"], [
-  saveCPPFLAGS="$CPPFLAGS"
-  CPPFLAGS="$CPPFLAGS -I."
-  ln -fsh "$apple_objc4_source_runtime_path" objc
-])
-AC_CHECK_HEADER([objc/objc-internal.h], [
+AC_CHECK_HEADER([objc/NSObject.h], [
   AC_DEFINE(HAVE_OBJC, 1, [Define if you have the Objective-C runtime])
-  have_objc=true], [have_objc=false],
-  [#include <objc/runtime.h>]
+  have_objc=true], [have_objc=false]
 )
-AS_IF([test -n "$apple_objc4_source_runtime_path"], [
-  rm -f objc
-  CPPFLAGS="$saveCPPFLAGS"
-  AC_CONFIG_COMMANDS([src/objc],
-    [ln -fsh "$apple_objc4_source_runtime_path" src/objc],
-    [apple_objc4_source_runtime_path="$apple_objc4_source_runtime_path"])
-])
 AM_CONDITIONAL(USE_OBJC, $have_objc)
 AC_LANG_POP([Objective C])
 
@@ -443,7 +416,10 @@
   [have_sem_init=true], [have_sem_init=false]
 )
 
-AC_CHECK_HEADER(linux/futex.h, [have_futex=true], [have_futex=false])
+AC_CHECK_HEADER([linux/futex.h], [
+  AC_DEFINE(HAVE_FUTEX, 1, [Define if linux/futex.h is present])
+  have_futex=true], [have_futex=false]
+)
 
 #
 # We support both Mach semaphores and POSIX semaphores; if the former are
diff --git a/dispatch/dispatch.h b/dispatch/dispatch.h
index 6f8b31b..2d45b83 100644
--- a/dispatch/dispatch.h
+++ b/dispatch/dispatch.h
@@ -23,6 +23,7 @@
 
 #ifdef __APPLE__
 #include <Availability.h>
+#include <os/availability.h>
 #include <TargetConditionals.h>
 #include <os/base.h>
 #elif defined(__linux__)
@@ -47,7 +48,7 @@
 #endif
 #endif
 
-#define DISPATCH_API_VERSION 20160831
+#define DISPATCH_API_VERSION 20170124
 
 #ifndef __DISPATCH_BUILDING_DISPATCH__
 
diff --git a/dispatch/queue.h b/dispatch/queue.h
index b1dd8e5..606bd30 100644
--- a/dispatch/queue.h
+++ b/dispatch/queue.h
@@ -206,15 +206,49 @@
 	void *_Nullable context,
 	dispatch_function_t work);
 
+
+#if !defined(__APPLE__) || TARGET_OS_WATCH || TARGET_OS_TV || \
+		(defined(__IPHONE_OS_VERSION_MIN_REQUIRED) && \
+		__IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_7_0) || \
+		(defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && \
+		__MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_9)
+#define DISPATCH_APPLY_AUTO_AVAILABLE 1
+#else
+#define DISPATCH_APPLY_AUTO_AVAILABLE 0
+#endif
+
+/*!
+ * @constant DISPATCH_APPLY_AUTO
+ *
+ * @abstract
+ * Constant to pass to dispatch_apply() or dispatch_apply_f() to request that
+ * the system automatically use worker threads that match the configuration of
+ * the current thread most closely.
+ *
+ * @discussion
+ * When submitting a block for parallel invocation, passing this constant as the
+ * queue argument will automatically use the global concurrent queue that
+ * matches the Quality of Service of the caller most closely.
+ *
+ * No assumptions should be made about which global concurrent queue will
+ * actually be used.
+ *
+ * Using this constant deploys backward to macOS 10.9, iOS 7.0 and any tvOS or
+ * watchOS version.
+ */
+#if DISPATCH_APPLY_AUTO_AVAILABLE
+#define DISPATCH_APPLY_AUTO ((dispatch_queue_t _Nonnull)0)
+#endif
+
 /*!
  * @function dispatch_apply
  *
  * @abstract
- * Submits a block to a dispatch queue for multiple invocations.
+ * Submits a block to a dispatch queue for parallel invocation.
  *
  * @discussion
- * Submits a block to a dispatch queue for multiple invocations. This function
- * waits for the task block to complete before returning. If the target queue
+ * Submits a block to a dispatch queue for parallel invocation. This function
+ * waits for the task block to complete before returning. If the specified queue
  * is concurrent, the block may be invoked concurrently, and it must therefore
  * be reentrant safe.
  *
@@ -224,8 +258,9 @@
  * The number of iterations to perform.
  *
  * @param queue
- * The target dispatch queue to which the block is submitted.
- * The result of passing NULL in this parameter is undefined.
+ * The dispatch queue to which the block is submitted.
+ * The preferred value to pass is DISPATCH_APPLY_AUTO to automatically use
+ * a queue appropriate for the calling thread.
  *
  * @param block
  * The block to be invoked the specified number of iterations.
@@ -243,7 +278,7 @@
  * @function dispatch_apply_f
  *
  * @abstract
- * Submits a function to a dispatch queue for multiple invocations.
+ * Submits a function to a dispatch queue for parallel invocation.
  *
  * @discussion
  * See dispatch_apply() for details.
@@ -252,14 +287,15 @@
  * The number of iterations to perform.
  *
  * @param queue
- * The target dispatch queue to which the function is submitted.
- * The result of passing NULL in this parameter is undefined.
+ * The dispatch queue to which the function is submitted.
+ * The preferred value to pass is DISPATCH_APPLY_AUTO to automatically use
+ * a queue appropriate for the calling thread.
  *
  * @param context
  * The application-defined context parameter to pass to the function.
  *
  * @param work
- * The application-defined function to invoke on the target queue. The first
+ * The application-defined function to invoke on the specified queue. The first
  * parameter passed to this function is the context provided to
  * dispatch_apply_f(). The second parameter passed to this function is the
  * current index of iteration.
diff --git a/libdispatch.xcodeproj/project.pbxproj b/libdispatch.xcodeproj/project.pbxproj
index ce73d95..361994f 100644
--- a/libdispatch.xcodeproj/project.pbxproj
+++ b/libdispatch.xcodeproj/project.pbxproj
@@ -331,6 +331,37 @@
 		E48EC97C1835BADD00EAC4F1 /* yield.h in Headers */ = {isa = PBXBuildFile; fileRef = E48EC97B1835BADD00EAC4F1 /* yield.h */; };
 		E48EC97D1835BADD00EAC4F1 /* yield.h in Headers */ = {isa = PBXBuildFile; fileRef = E48EC97B1835BADD00EAC4F1 /* yield.h */; };
 		E48EC97E1835BADD00EAC4F1 /* yield.h in Headers */ = {isa = PBXBuildFile; fileRef = E48EC97B1835BADD00EAC4F1 /* yield.h */; };
+		E49BB6D11E70748100868613 /* provider.d in Sources */ = {isa = PBXBuildFile; fileRef = E43570B8126E93380097AB9F /* provider.d */; };
+		E49BB6D21E70748100868613 /* protocol.defs in Sources */ = {isa = PBXBuildFile; fileRef = FC7BED950E8361E600161930 /* protocol.defs */; settings = {ATTRIBUTES = (Client, Server, ); }; };
+		E49BB6D31E70748100868613 /* venture.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E9955CE1C3B218E0071D40C /* venture.c */; };
+		E49BB6D41E70748100868613 /* firehose.defs in Sources */ = {isa = PBXBuildFile; fileRef = 72DEAA9B1AE1B0BD00289540 /* firehose.defs */; };
+		E49BB6D51E70748100868613 /* firehose_buffer.c in Sources */ = {isa = PBXBuildFile; fileRef = 72DEAA971AE181D300289540 /* firehose_buffer.c */; };
+		E49BB6D61E70748100868613 /* event_kevent.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5ACCB01D3C4CFB007DA2B4 /* event_kevent.c */; };
+		E49BB6D71E70748100868613 /* resolver.c in Sources */ = {isa = PBXBuildFile; fileRef = E44EBE371251656400645D88 /* resolver.c */; };
+		E49BB6D81E70748100868613 /* mach.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E4BACBC1D48A41500B562AE /* mach.c */; };
+		E49BB6D91E70748100868613 /* init.c in Sources */ = {isa = PBXBuildFile; fileRef = E44EBE3B1251659900645D88 /* init.c */; };
+		E49BB6DA1E70748100868613 /* queue.c in Sources */ = {isa = PBXBuildFile; fileRef = FC7BED8A0E8361E600161930 /* queue.c */; };
+		E49BB6DB1E70748100868613 /* semaphore.c in Sources */ = {isa = PBXBuildFile; fileRef = 721F5CCE0F15553500FF03A6 /* semaphore.c */; };
+		E49BB6DC1E70748100868613 /* lock.c in Sources */ = {isa = PBXBuildFile; fileRef = 6EF2CAAB1C8899D5001ABE83 /* lock.c */; };
+		E49BB6DD1E70748100868613 /* firehose_reply.defs in Sources */ = {isa = PBXBuildFile; fileRef = 72406A031AF95DF800DF4E2B /* firehose_reply.defs */; settings = {ATTRIBUTES = (Server, ); }; };
+		E49BB6DE1E70748100868613 /* once.c in Sources */ = {isa = PBXBuildFile; fileRef = 96DF70BD0F38FE3C0074BD99 /* once.c */; };
+		E49BB6DF1E70748100868613 /* apply.c in Sources */ = {isa = PBXBuildFile; fileRef = 9676A0E00F3E755D00713ADB /* apply.c */; };
+		E49BB6E01E70748100868613 /* object.c in Sources */ = {isa = PBXBuildFile; fileRef = 9661E56A0F3E7DDF00749F3E /* object.c */; };
+		E49BB6E11E70748100868613 /* benchmark.c in Sources */ = {isa = PBXBuildFile; fileRef = 965CD6340F3E806200D4E28D /* benchmark.c */; };
+		E49BB6E21E70748100868613 /* event_epoll.c in Sources */ = {isa = PBXBuildFile; fileRef = 6EA7937D1D456D1300929B1B /* event_epoll.c */; };
+		E49BB6E31E70748100868613 /* source.c in Sources */ = {isa = PBXBuildFile; fileRef = 96A8AA860F41E7A400CD570B /* source.c */; };
+		E49BB6E41E70748100868613 /* time.c in Sources */ = {isa = PBXBuildFile; fileRef = 96032E4A0F5CC8C700241C5F /* time.c */; };
+		E49BB6E51E70748100868613 /* data.c in Sources */ = {isa = PBXBuildFile; fileRef = 5AAB45BF10D30B79004407EA /* data.c */; };
+		E49BB6E61E70748100868613 /* io.c in Sources */ = {isa = PBXBuildFile; fileRef = 5A27262510F26F1900751FBC /* io.c */; };
+		E49BB6E71E70748100868613 /* block.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E43A724F1AF85BBC00BAA921 /* block.cpp */; };
+		E49BB6E81E70748100868613 /* event.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5ACCBD1D3C6719007DA2B4 /* event.c */; };
+		E49BB6E91E70748100868613 /* transform.c in Sources */ = {isa = PBXBuildFile; fileRef = C9C5F80D143C1771006DC718 /* transform.c */; };
+		E49BB6EA1E70748100868613 /* object.m in Sources */ = {isa = PBXBuildFile; fileRef = E4FC3263145F46C9002FBDDB /* object.m */; };
+		E49BB6EB1E70748100868613 /* allocator.c in Sources */ = {isa = PBXBuildFile; fileRef = 2BBF5A62154B64F5002B20F9 /* allocator.c */; };
+		E49BB6EC1E70748100868613 /* data.m in Sources */ = {isa = PBXBuildFile; fileRef = E420866F16027AE500EEE210 /* data.m */; };
+		E49BB6ED1E70748100868613 /* voucher.c in Sources */ = {isa = PBXBuildFile; fileRef = E44A8E6A1805C3E0009FFDB6 /* voucher.c */; };
+		E49BB7091E70A39700868613 /* venture.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E9955CE1C3B218E0071D40C /* venture.c */; };
+		E49BB70A1E70A3B000868613 /* venture.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E9955CE1C3B218E0071D40C /* venture.c */; };
 		E49F2423125D3C960057C971 /* resolver.c in Sources */ = {isa = PBXBuildFile; fileRef = E44EBE371251656400645D88 /* resolver.c */; };
 		E49F2424125D3C970057C971 /* resolver.c in Sources */ = {isa = PBXBuildFile; fileRef = E44EBE371251656400645D88 /* resolver.c */; };
 		E49F2499125D48D80057C971 /* resolver.c in Sources */ = {isa = PBXBuildFile; fileRef = E44EBE371251656400645D88 /* resolver.c */; };
@@ -559,6 +590,13 @@
 			remoteGlobalIDString = E4EC121612514715000DDBD1;
 			remoteInfo = "libdispatch mp resolved";
 		};
+		E49BB6F71E7074C100868613 /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = E49BB6CE1E70748100868613;
+			remoteInfo = "libdispatch alt resolved";
+		};
 		E4B515DA164B317700E003AF /* PBXContainerItemProxy */ = {
 			isa = PBXContainerItemProxy;
 			containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */;
@@ -620,7 +658,6 @@
 		6E326B161C239431002A6505 /* dispatch_timer_short.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = dispatch_timer_short.c; sourceTree = "<group>"; };
 		6E326B171C239431002A6505 /* dispatch_timer_timeout.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = dispatch_timer_timeout.c; sourceTree = "<group>"; };
 		6E326B441C239B61002A6505 /* dispatch_priority.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = dispatch_priority.c; sourceTree = "<group>"; };
-		6E4130C91B431697001A152D /* backward-compat.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; path = "backward-compat.xcconfig"; sourceTree = "<group>"; };
 		6E4BACBC1D48A41500B562AE /* mach.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = mach.c; sourceTree = "<group>"; };
 		6E4BACC91D48A89500B562AE /* mach_internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mach_internal.h; sourceTree = "<group>"; };
 		6E4FC9D11C84123600520351 /* os_venture_basic.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = os_venture_basic.c; sourceTree = "<group>"; };
@@ -658,6 +695,7 @@
 		6EB4E4421BA8BD7800D7B9D2 /* libfirehose.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; path = libfirehose.xcconfig; sourceTree = "<group>"; };
 		6EB60D291BBB19640092FA94 /* firehose_inline_internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = firehose_inline_internal.h; sourceTree = "<group>"; };
 		6EC5ABE31D4436E4004F8674 /* dispatch_deadname.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = dispatch_deadname.c; sourceTree = "<group>"; };
+		6EC670C61E37E201004F10D6 /* dispatch_network_event_thread.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = dispatch_network_event_thread.c; sourceTree = "<group>"; };
 		6EC670C71E37E201004F10D6 /* perf_mach_async.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = perf_mach_async.c; sourceTree = "<group>"; };
 		6EC670C81E37E201004F10D6 /* perf_pipepingpong.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = perf_pipepingpong.c; sourceTree = "<group>"; };
 		6EDB888D1CB73BDC006776D6 /* dispatch_kevent_cancel_races.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = dispatch_kevent_cancel_races.c; sourceTree = "<group>"; };
@@ -701,6 +739,9 @@
 		96BC39BC0F3EBAB100C59689 /* queue_private.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = queue_private.h; sourceTree = "<group>"; };
 		96C9553A0F3EAEDD000D2CA4 /* once.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = once.h; sourceTree = "<group>"; };
 		96DF70BD0F38FE3C0074BD99 /* once.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; lineEnding = 0; path = once.c; sourceTree = "<group>"; xcLanguageSpecificationIdentifier = xcode.lang.c; };
+		B63B793F1E8F004F0060C1E1 /* dispatch_no_blocks.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = dispatch_no_blocks.c; sourceTree = "<group>"; };
+		B68330BC1EBCF6080003E71C /* dispatch_wl.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = dispatch_wl.c; sourceTree = "<group>"; };
+		B6AC73FD1EB10973009FB2F2 /* perf_thread_request.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = perf_thread_request.c; sourceTree = "<group>"; };
 		B6AE9A4A1D7F53B300AC007F /* dispatch_queue_create.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = dispatch_queue_create.c; sourceTree = "<group>"; };
 		B6AE9A561D7F53C100AC007F /* perf_async_bench.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = perf_async_bench.m; sourceTree = "<group>"; };
 		B6AE9A581D7F53CB00AC007F /* perf_bench.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = perf_bench.m; sourceTree = "<group>"; };
@@ -739,12 +780,13 @@
 		E44F9DA816543F79001DCD38 /* introspection_internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = introspection_internal.h; sourceTree = "<group>"; };
 		E454569214746F1B00106147 /* object_private.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = object_private.h; sourceTree = "<group>"; };
 		E463024F1761603C00E11F4C /* atomic_sfb.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = atomic_sfb.h; sourceTree = "<group>"; };
-		E46DBC5714EE10C80001F9F6 /* libdispatch.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libdispatch.a; sourceTree = BUILT_PRODUCTS_DIR; };
+		E46DBC5714EE10C80001F9F6 /* libdispatch_up.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libdispatch_up.a; sourceTree = BUILT_PRODUCTS_DIR; };
 		E46DBC5814EE11BC0001F9F6 /* libdispatch-up-static.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; path = "libdispatch-up-static.xcconfig"; sourceTree = "<group>"; };
 		E47D6BB5125F0F800070D91C /* resolved.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = resolved.h; sourceTree = "<group>"; };
 		E482F1CD12DBAB590030614D /* postprocess-headers.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; path = "postprocess-headers.sh"; sourceTree = "<group>"; };
 		E48AF55916E70FD9004105FF /* io_private.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = io_private.h; path = private/io_private.h; sourceTree = SOURCE_ROOT; tabWidth = 8; };
 		E48EC97B1835BADD00EAC4F1 /* yield.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = yield.h; sourceTree = "<group>"; };
+		E49BB6F21E70748100868613 /* libdispatch_alt.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libdispatch_alt.a; sourceTree = BUILT_PRODUCTS_DIR; };
 		E49F24DF125D57FA0057C971 /* libdispatch.dylib */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.dylib"; includeInIndex = 0; path = libdispatch.dylib; sourceTree = BUILT_PRODUCTS_DIR; };
 		E49F251D125D630A0057C971 /* install-manpages.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; path = "install-manpages.sh"; sourceTree = "<group>"; };
 		E49F251E125D631D0057C971 /* mig-headers.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; path = "mig-headers.sh"; sourceTree = "<group>"; };
@@ -913,15 +955,16 @@
 			isa = PBXGroup;
 			children = (
 				D2AAC046055464E500DB518D /* libdispatch.dylib */,
-				E4EC11C312514302000DDBD1 /* libdispatch_up.a */,
-				E4EC122D12514715000DDBD1 /* libdispatch_mp.a */,
-				E49F24DF125D57FA0057C971 /* libdispatch.dylib */,
-				E46DBC5714EE10C80001F9F6 /* libdispatch.a */,
 				E4B515D6164B2DA300E003AF /* libdispatch.dylib */,
-				6EB4E4091BA8BCAD00D7B9D2 /* libfirehose_server.a */,
-				6E040C631C499B1B00411A2E /* libfirehose_kernel.a */,
+				E49F24DF125D57FA0057C971 /* libdispatch.dylib */,
+				E4EC122D12514715000DDBD1 /* libdispatch_mp.a */,
+				E4EC11C312514302000DDBD1 /* libdispatch_up.a */,
+				E49BB6F21E70748100868613 /* libdispatch_alt.a */,
+				E46DBC5714EE10C80001F9F6 /* libdispatch_up.a */,
 				C01866BD1C5973210040FC07 /* libdispatch.a */,
 				C00B0E0A1C5AEBBE000330B3 /* libdispatch_dyld_stub.a */,
+				6E040C631C499B1B00411A2E /* libfirehose_kernel.a */,
+				6EB4E4091BA8BCAD00D7B9D2 /* libfirehose_server.a */,
 			);
 			name = Products;
 			sourceTree = "<group>";
@@ -1010,6 +1053,8 @@
 				6E326ABD1C22A577002A6505 /* dispatch_io_net.c */,
 				6E326ABE1C22A577002A6505 /* dispatch_io.c */,
 				6EDB888D1CB73BDC006776D6 /* dispatch_kevent_cancel_races.c */,
+				6EC670C61E37E201004F10D6 /* dispatch_network_event_thread.c */,
+				B63B793F1E8F004F0060C1E1 /* dispatch_no_blocks.c */,
 				C96CE17A1CEB851600F4B8E6 /* dispatch_objc.m */,
 				6E67D9131C17676D00FC98AC /* dispatch_overcommit.c */,
 				6E67D9151C1768B300FC98AC /* dispatch_pingpong.c */,
@@ -1039,12 +1084,14 @@
 				6E62B0531C55806200D2C7C0 /* dispatch_trysync.c */,
 				6E8E4EC91C1A670B0004F5CC /* dispatch_vm.c */,
 				6E326AB71C225FCA002A6505 /* dispatch_vnode.c */,
+				B68330BC1EBCF6080003E71C /* dispatch_wl.c */,
 				6E67D9171C17BA7200FC98AC /* nsoperation.m */,
 				6E4FC9D11C84123600520351 /* os_venture_basic.c */,
 				B6AE9A561D7F53C100AC007F /* perf_async_bench.m */,
 				B6AE9A581D7F53CB00AC007F /* perf_bench.m */,
 				6EC670C71E37E201004F10D6 /* perf_mach_async.c */,
 				6EC670C81E37E201004F10D6 /* perf_pipepingpong.c */,
+				B6AC73FD1EB10973009FB2F2 /* perf_thread_request.c */,
 				92F3FE921BEC686300025962 /* Makefile */,
 				6E8E4E6E1C1A35EE0004F5CC /* test_lib.c */,
 				6E8E4E6F1C1A35EE0004F5CC /* test_lib.h */,
@@ -1089,7 +1136,6 @@
 		E40041E4125E71150022B135 /* xcodeconfig */ = {
 			isa = PBXGroup;
 			children = (
-				6E4130C91B431697001A152D /* backward-compat.xcconfig */,
 				E43D93F11097917E004F6A62 /* libdispatch.xcconfig */,
 				E40041AA125D705F0022B135 /* libdispatch-resolver.xcconfig */,
 				E40041A9125D70590022B135 /* libdispatch-resolved.xcconfig */,
@@ -1587,8 +1633,9 @@
 			);
 			dependencies = (
 				6EF0B27E1BA8C5BF007FA4F6 /* PBXTargetDependency */,
-				E47D6ECB125FEB9D0070D91C /* PBXTargetDependency */,
 				E47D6ECD125FEBA10070D91C /* PBXTargetDependency */,
+				E47D6ECB125FEB9D0070D91C /* PBXTargetDependency */,
+				E49BB6F81E7074C100868613 /* PBXTargetDependency */,
 				E4B515DB164B317700E003AF /* PBXTargetDependency */,
 				C01866C21C597AEA0040FC07 /* PBXTargetDependency */,
 				E437F0D614F7441F00F0B997 /* PBXTargetDependency */,
@@ -1611,7 +1658,24 @@
 			);
 			name = "libdispatch up static";
 			productName = libdispatch;
-			productReference = E46DBC5714EE10C80001F9F6 /* libdispatch.a */;
+			productReference = E46DBC5714EE10C80001F9F6 /* libdispatch_up.a */;
+			productType = "com.apple.product-type.library.static";
+		};
+		E49BB6CE1E70748100868613 /* libdispatch alt resolved */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = E49BB6EF1E70748100868613 /* Build configuration list for PBXNativeTarget "libdispatch alt resolved" */;
+			buildPhases = (
+				E49BB6CF1E70748100868613 /* Mig Headers */,
+				E49BB6D01E70748100868613 /* Sources */,
+				E49BB6EE1E70748100868613 /* Symlink normal variant */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = "libdispatch alt resolved";
+			productName = libdispatch;
+			productReference = E49BB6F21E70748100868613 /* libdispatch_alt.a */;
 			productType = "com.apple.product-type.library.static";
 		};
 		E49F24A9125D57FA0057C971 /* libdispatch no resolver */ = {
@@ -1692,7 +1756,7 @@
 			isa = PBXProject;
 			attributes = {
 				BuildIndependentTargetsInParallel = YES;
-				LastUpgradeCheck = 0820;
+				LastUpgradeCheck = 0900;
 				TargetAttributes = {
 					3F3C9326128E637B0042B1F7 = {
 						ProvisioningStyle = Manual;
@@ -1776,6 +1840,7 @@
 				E49F24A9125D57FA0057C971 /* libdispatch no resolver */,
 				E4EC121612514715000DDBD1 /* libdispatch mp resolved */,
 				E4EC118F12514302000DDBD1 /* libdispatch up resolved */,
+				E49BB6CE1E70748100868613 /* libdispatch alt resolved */,
 				E4B51595164B2DA300E003AF /* libdispatch introspection */,
 				E46DBC1A14EE10C80001F9F6 /* libdispatch up static */,
 				C01866A41C5973210040FC07 /* libdispatch mp static */,
@@ -1931,6 +1996,47 @@
 			shellScript = ". \"${SCRIPT_INPUT_FILE_0}\"";
 			showEnvVarsInLog = 0;
 		};
+		E49BB6CF1E70748100868613 /* Mig Headers */ = {
+			isa = PBXShellScriptBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			inputPaths = (
+				"$(SRCROOT)/src/protocol.defs",
+				"$(SRCROOT)/src/firehose/firehose.defs",
+				"$(SRCROOT)/src/firehose/firehose_reply.defs",
+				"$(SRCROOT)/xcodescripts/mig-headers.sh",
+			);
+			name = "Mig Headers";
+			outputPaths = (
+				"$(DERIVED_FILE_DIR)/protocol.h",
+				"$(DERIVED_FILE_DIR)/protocolServer.h",
+				"$(DERIVED_FILE_DIR)/firehose.h",
+				"$(DERIVED_FILE_DIR)/firehoseServer.h",
+				"$(DERIVED_FILE_DIR)/firehose_reply.h",
+				"$(DERIVED_FILE_DIR)/firehose_replyServer.h",
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+			shellPath = "/bin/bash -e";
+			shellScript = ". \"${SCRIPT_INPUT_FILE_3}\"";
+			showEnvVarsInLog = 0;
+		};
+		E49BB6EE1E70748100868613 /* Symlink normal variant */ = {
+			isa = PBXShellScriptBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			inputPaths = (
+			);
+			name = "Symlink normal variant";
+			outputPaths = (
+				"$(CONFIGURATION_BUILD_DIR)/$(PRODUCT_NAME)_normal.a",
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+			shellPath = "/bin/bash -e";
+			shellScript = "ln -fs \"${PRODUCT_NAME}.a\" \"${SCRIPT_OUTPUT_FILE_0}\"";
+			showEnvVarsInLog = 0;
+		};
 		E49F24D7125D57FA0057C971 /* Install Manpages */ = {
 			isa = PBXShellScriptBuildPhase;
 			buildActionMask = 8;
@@ -2096,10 +2202,10 @@
 			isa = PBXSourcesBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
-				6EF0B27A1BA8C57D007FA4F6 /* firehose_server_object.m in Sources */,
 				6E90269C1BB9BD50004DC3AD /* firehose.defs in Sources */,
-				6E21F2E91BBB240E0000C6A5 /* firehose_server.c in Sources */,
 				6EF0B2781BA8C56E007FA4F6 /* firehose_reply.defs in Sources */,
+				6EF0B27A1BA8C57D007FA4F6 /* firehose_server_object.m in Sources */,
+				6E21F2E91BBB240E0000C6A5 /* firehose_server.c in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -2108,30 +2214,31 @@
 			buildActionMask = 2147483647;
 			files = (
 				C00B0DF21C5AEBBE000330B3 /* protocol.defs in Sources */,
-				C00B0DF31C5AEBBE000330B3 /* resolver.c in Sources */,
-				6E4BACFC1D49A04A00B562AE /* event_epoll.c in Sources */,
-				6EF2CAB31C8899ED001ABE83 /* lock.c in Sources */,
-				C00B0DF41C5AEBBE000330B3 /* init.c in Sources */,
-				C00B0DF51C5AEBBE000330B3 /* queue.c in Sources */,
-				C00B0DF61C5AEBBE000330B3 /* firehose_buffer.c in Sources */,
 				C00B0DF71C5AEBBE000330B3 /* firehose.defs in Sources */,
-				C00B0DF81C5AEBBE000330B3 /* block.cpp in Sources */,
-				C00B0DF91C5AEBBE000330B3 /* semaphore.c in Sources */,
-				6E4BACC81D48A42400B562AE /* mach.c in Sources */,
 				C00B0DFA1C5AEBBE000330B3 /* firehose_reply.defs in Sources */,
-				C00B0DFB1C5AEBBE000330B3 /* once.c in Sources */,
-				C00B0DFC1C5AEBBE000330B3 /* voucher.c in Sources */,
-				C00B0DFD1C5AEBBE000330B3 /* apply.c in Sources */,
+				C00B0DF31C5AEBBE000330B3 /* resolver.c in Sources */,
+				C00B0DF41C5AEBBE000330B3 /* init.c in Sources */,
 				C00B0DFE1C5AEBBE000330B3 /* object.c in Sources */,
-				C00B0DFF1C5AEBBE000330B3 /* benchmark.c in Sources */,
+				C00B0DF81C5AEBBE000330B3 /* block.cpp in Sources */,
+				6EF2CAB31C8899ED001ABE83 /* lock.c in Sources */,
+				C00B0DF91C5AEBBE000330B3 /* semaphore.c in Sources */,
+				C00B0DFB1C5AEBBE000330B3 /* once.c in Sources */,
+				C00B0DF51C5AEBBE000330B3 /* queue.c in Sources */,
+				C00B0DFD1C5AEBBE000330B3 /* apply.c in Sources */,
 				C00B0E001C5AEBBE000330B3 /* source.c in Sources */,
-				C00B0E011C5AEBBE000330B3 /* time.c in Sources */,
-				C00B0E021C5AEBBE000330B3 /* data.c in Sources */,
-				6EA962A61D48625500759D53 /* event_kevent.c in Sources */,
-				C00B0E031C5AEBBE000330B3 /* io.c in Sources */,
-				C00B0E041C5AEBBE000330B3 /* transform.c in Sources */,
+				6E4BACC81D48A42400B562AE /* mach.c in Sources */,
 				6EA9629E1D48622C00759D53 /* event.c in Sources */,
+				6EA962A61D48625500759D53 /* event_kevent.c in Sources */,
+				6E4BACFC1D49A04A00B562AE /* event_epoll.c in Sources */,
+				C00B0DFC1C5AEBBE000330B3 /* voucher.c in Sources */,
+				C00B0DF61C5AEBBE000330B3 /* firehose_buffer.c in Sources */,
+				C00B0E031C5AEBBE000330B3 /* io.c in Sources */,
+				C00B0E021C5AEBBE000330B3 /* data.c in Sources */,
+				C00B0E041C5AEBBE000330B3 /* transform.c in Sources */,
+				C00B0E011C5AEBBE000330B3 /* time.c in Sources */,
 				C00B0E051C5AEBBE000330B3 /* allocator.c in Sources */,
+				C00B0DFF1C5AEBBE000330B3 /* benchmark.c in Sources */,
+				E49BB70A1E70A3B000868613 /* venture.c in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -2140,30 +2247,31 @@
 			buildActionMask = 2147483647;
 			files = (
 				C01866A61C5973210040FC07 /* protocol.defs in Sources */,
-				C01866A71C5973210040FC07 /* resolver.c in Sources */,
-				6E4BACFB1D49A04A00B562AE /* event_epoll.c in Sources */,
-				6EF2CAB21C8899EC001ABE83 /* lock.c in Sources */,
-				C01866A81C5973210040FC07 /* init.c in Sources */,
-				C01866A91C5973210040FC07 /* queue.c in Sources */,
-				C01866AA1C5973210040FC07 /* firehose_buffer.c in Sources */,
 				C01866AB1C5973210040FC07 /* firehose.defs in Sources */,
-				C01866AC1C5973210040FC07 /* block.cpp in Sources */,
-				C01866AD1C5973210040FC07 /* semaphore.c in Sources */,
-				6E4BACC71D48A42300B562AE /* mach.c in Sources */,
 				C01866AE1C5973210040FC07 /* firehose_reply.defs in Sources */,
-				C01866AF1C5973210040FC07 /* once.c in Sources */,
-				C01866B01C5973210040FC07 /* voucher.c in Sources */,
-				C01866B11C5973210040FC07 /* apply.c in Sources */,
+				C01866A71C5973210040FC07 /* resolver.c in Sources */,
+				C01866A81C5973210040FC07 /* init.c in Sources */,
 				C01866B21C5973210040FC07 /* object.c in Sources */,
-				C01866B31C5973210040FC07 /* benchmark.c in Sources */,
+				C01866AC1C5973210040FC07 /* block.cpp in Sources */,
+				6EF2CAB21C8899EC001ABE83 /* lock.c in Sources */,
+				C01866AD1C5973210040FC07 /* semaphore.c in Sources */,
+				C01866AF1C5973210040FC07 /* once.c in Sources */,
+				C01866A91C5973210040FC07 /* queue.c in Sources */,
+				C01866B11C5973210040FC07 /* apply.c in Sources */,
 				C01866B41C5973210040FC07 /* source.c in Sources */,
-				C01866B51C5973210040FC07 /* time.c in Sources */,
-				C01866B61C5973210040FC07 /* data.c in Sources */,
-				6EA962A51D48625400759D53 /* event_kevent.c in Sources */,
-				C01866B71C5973210040FC07 /* io.c in Sources */,
-				C01866B81C5973210040FC07 /* transform.c in Sources */,
+				6E4BACC71D48A42300B562AE /* mach.c in Sources */,
 				6EA9629D1D48622B00759D53 /* event.c in Sources */,
+				6EA962A51D48625400759D53 /* event_kevent.c in Sources */,
+				6E4BACFB1D49A04A00B562AE /* event_epoll.c in Sources */,
+				C01866B01C5973210040FC07 /* voucher.c in Sources */,
+				C01866AA1C5973210040FC07 /* firehose_buffer.c in Sources */,
+				C01866B71C5973210040FC07 /* io.c in Sources */,
+				C01866B61C5973210040FC07 /* data.c in Sources */,
+				C01866B81C5973210040FC07 /* transform.c in Sources */,
+				C01866B51C5973210040FC07 /* time.c in Sources */,
 				C01866B91C5973210040FC07 /* allocator.c in Sources */,
+				C01866B31C5973210040FC07 /* benchmark.c in Sources */,
+				E49BB7091E70A39700868613 /* venture.c in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -2173,33 +2281,33 @@
 			files = (
 				E43570B9126E93380097AB9F /* provider.d in Sources */,
 				FC7BEDA40E8361E600161930 /* protocol.defs in Sources */,
-				6E9955CF1C3B218E0071D40C /* venture.c in Sources */,
 				6ED64B471BBD89AF00C35F4D /* firehose.defs in Sources */,
-				6ED64B441BBD898700C35F4D /* firehose_buffer.c in Sources */,
-				6EA9629F1D48625000759D53 /* event_kevent.c in Sources */,
-				E49F2499125D48D80057C971 /* resolver.c in Sources */,
-				6E4BACBD1D48A41500B562AE /* mach.c in Sources */,
-				E44EBE3E1251659900645D88 /* init.c in Sources */,
-				FC7BED990E8361E600161930 /* queue.c in Sources */,
-				721F5CCF0F15553500FF03A6 /* semaphore.c in Sources */,
-				6EF2CAAC1C8899D5001ABE83 /* lock.c in Sources */,
 				6ED64B491BBD89BC00C35F4D /* firehose_reply.defs in Sources */,
-				96DF70BE0F38FE3C0074BD99 /* once.c in Sources */,
-				9676A0E10F3E755D00713ADB /* apply.c in Sources */,
+				E49F2499125D48D80057C971 /* resolver.c in Sources */,
+				E44EBE3E1251659900645D88 /* init.c in Sources */,
 				9661E56B0F3E7DDF00749F3E /* object.c in Sources */,
-				965CD6350F3E806200D4E28D /* benchmark.c in Sources */,
-				6E4BACF51D49A04600B562AE /* event_epoll.c in Sources */,
-				96A8AA870F41E7A400CD570B /* source.c in Sources */,
-				96032E4B0F5CC8C700241C5F /* time.c in Sources */,
-				5AAB45C010D30B79004407EA /* data.c in Sources */,
-				5A27262610F26F1900751FBC /* io.c in Sources */,
-				E43A72501AF85BBC00BAA921 /* block.cpp in Sources */,
-				6EA962971D48622600759D53 /* event.c in Sources */,
-				C9C5F80E143C1771006DC718 /* transform.c in Sources */,
 				E4FC3264145F46C9002FBDDB /* object.m in Sources */,
-				2BBF5A63154B64F5002B20F9 /* allocator.c in Sources */,
-				E420867016027AE500EEE210 /* data.m in Sources */,
+				E43A72501AF85BBC00BAA921 /* block.cpp in Sources */,
+				6EF2CAAC1C8899D5001ABE83 /* lock.c in Sources */,
+				721F5CCF0F15553500FF03A6 /* semaphore.c in Sources */,
+				96DF70BE0F38FE3C0074BD99 /* once.c in Sources */,
+				FC7BED990E8361E600161930 /* queue.c in Sources */,
+				9676A0E10F3E755D00713ADB /* apply.c in Sources */,
+				96A8AA870F41E7A400CD570B /* source.c in Sources */,
+				6E4BACBD1D48A41500B562AE /* mach.c in Sources */,
+				6EA962971D48622600759D53 /* event.c in Sources */,
+				6EA9629F1D48625000759D53 /* event_kevent.c in Sources */,
+				6E4BACF51D49A04600B562AE /* event_epoll.c in Sources */,
 				E44A8E6B1805C3E0009FFDB6 /* voucher.c in Sources */,
+				6ED64B441BBD898700C35F4D /* firehose_buffer.c in Sources */,
+				5A27262610F26F1900751FBC /* io.c in Sources */,
+				5AAB45C010D30B79004407EA /* data.c in Sources */,
+				E420867016027AE500EEE210 /* data.m in Sources */,
+				C9C5F80E143C1771006DC718 /* transform.c in Sources */,
+				96032E4B0F5CC8C700241C5F /* time.c in Sources */,
+				2BBF5A63154B64F5002B20F9 /* allocator.c in Sources */,
+				965CD6350F3E806200D4E28D /* benchmark.c in Sources */,
+				6E9955CF1C3B218E0071D40C /* venture.c in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -2207,32 +2315,68 @@
 			isa = PBXSourcesBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
-				6E4BACC61D48A42300B562AE /* mach.c in Sources */,
 				E46DBC4014EE10C80001F9F6 /* protocol.defs in Sources */,
-				E46DBC4114EE10C80001F9F6 /* resolver.c in Sources */,
-				6EF2CAB11C8899EC001ABE83 /* lock.c in Sources */,
-				E46DBC4214EE10C80001F9F6 /* init.c in Sources */,
-				E46DBC4314EE10C80001F9F6 /* queue.c in Sources */,
-				6EA962A41D48625300759D53 /* event_kevent.c in Sources */,
-				6EE664271BE2FD5C00ED7B1C /* firehose_buffer.c in Sources */,
-				6EA9629C1D48622A00759D53 /* event.c in Sources */,
 				6EBEC7E71BBDD30F009B1596 /* firehose.defs in Sources */,
-				E43A72881AF85BE900BAA921 /* block.cpp in Sources */,
-				E46DBC4414EE10C80001F9F6 /* semaphore.c in Sources */,
-				6E9956011C3B21980071D40C /* venture.c in Sources */,
 				6EBEC7EA1BBDD326009B1596 /* firehose_reply.defs in Sources */,
-				E46DBC4514EE10C80001F9F6 /* once.c in Sources */,
-				E44A8E701805C3E0009FFDB6 /* voucher.c in Sources */,
-				E46DBC4614EE10C80001F9F6 /* apply.c in Sources */,
+				E46DBC4114EE10C80001F9F6 /* resolver.c in Sources */,
+				E46DBC4214EE10C80001F9F6 /* init.c in Sources */,
 				E46DBC4714EE10C80001F9F6 /* object.c in Sources */,
-				E46DBC4814EE10C80001F9F6 /* benchmark.c in Sources */,
+				E43A72881AF85BE900BAA921 /* block.cpp in Sources */,
+				6EF2CAB11C8899EC001ABE83 /* lock.c in Sources */,
+				E46DBC4414EE10C80001F9F6 /* semaphore.c in Sources */,
+				E46DBC4514EE10C80001F9F6 /* once.c in Sources */,
+				E46DBC4314EE10C80001F9F6 /* queue.c in Sources */,
+				E46DBC4614EE10C80001F9F6 /* apply.c in Sources */,
 				E46DBC4914EE10C80001F9F6 /* source.c in Sources */,
-				E46DBC4A14EE10C80001F9F6 /* time.c in Sources */,
-				E46DBC4B14EE10C80001F9F6 /* data.c in Sources */,
-				E46DBC4C14EE10C80001F9F6 /* io.c in Sources */,
-				E46DBC4D14EE10C80001F9F6 /* transform.c in Sources */,
-				2BBF5A67154B64F5002B20F9 /* allocator.c in Sources */,
+				6E4BACC61D48A42300B562AE /* mach.c in Sources */,
+				6EA9629C1D48622A00759D53 /* event.c in Sources */,
+				6EA962A41D48625300759D53 /* event_kevent.c in Sources */,
 				6E4BACFA1D49A04900B562AE /* event_epoll.c in Sources */,
+				E44A8E701805C3E0009FFDB6 /* voucher.c in Sources */,
+				6EE664271BE2FD5C00ED7B1C /* firehose_buffer.c in Sources */,
+				E46DBC4C14EE10C80001F9F6 /* io.c in Sources */,
+				E46DBC4B14EE10C80001F9F6 /* data.c in Sources */,
+				E46DBC4D14EE10C80001F9F6 /* transform.c in Sources */,
+				E46DBC4A14EE10C80001F9F6 /* time.c in Sources */,
+				2BBF5A67154B64F5002B20F9 /* allocator.c in Sources */,
+				E46DBC4814EE10C80001F9F6 /* benchmark.c in Sources */,
+				6E9956011C3B21980071D40C /* venture.c in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		E49BB6D01E70748100868613 /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				E49BB6D11E70748100868613 /* provider.d in Sources */,
+				E49BB6D21E70748100868613 /* protocol.defs in Sources */,
+				E49BB6D41E70748100868613 /* firehose.defs in Sources */,
+				E49BB6DD1E70748100868613 /* firehose_reply.defs in Sources */,
+				E49BB6D71E70748100868613 /* resolver.c in Sources */,
+				E49BB6D91E70748100868613 /* init.c in Sources */,
+				E49BB6E01E70748100868613 /* object.c in Sources */,
+				E49BB6EA1E70748100868613 /* object.m in Sources */,
+				E49BB6E71E70748100868613 /* block.cpp in Sources */,
+				E49BB6DC1E70748100868613 /* lock.c in Sources */,
+				E49BB6DB1E70748100868613 /* semaphore.c in Sources */,
+				E49BB6DE1E70748100868613 /* once.c in Sources */,
+				E49BB6D81E70748100868613 /* mach.c in Sources */,
+				E49BB6DA1E70748100868613 /* queue.c in Sources */,
+				E49BB6DF1E70748100868613 /* apply.c in Sources */,
+				E49BB6E31E70748100868613 /* source.c in Sources */,
+				E49BB6E81E70748100868613 /* event.c in Sources */,
+				E49BB6D61E70748100868613 /* event_kevent.c in Sources */,
+				E49BB6E21E70748100868613 /* event_epoll.c in Sources */,
+				E49BB6ED1E70748100868613 /* voucher.c in Sources */,
+				E49BB6D51E70748100868613 /* firehose_buffer.c in Sources */,
+				E49BB6E61E70748100868613 /* io.c in Sources */,
+				E49BB6E51E70748100868613 /* data.c in Sources */,
+				E49BB6EC1E70748100868613 /* data.m in Sources */,
+				E49BB6E91E70748100868613 /* transform.c in Sources */,
+				E49BB6E41E70748100868613 /* time.c in Sources */,
+				E49BB6EB1E70748100868613 /* allocator.c in Sources */,
+				E49BB6E11E70748100868613 /* benchmark.c in Sources */,
+				E49BB6D31E70748100868613 /* venture.c in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -2242,33 +2386,33 @@
 			files = (
 				E43570BA126E93380097AB9F /* provider.d in Sources */,
 				E49F24C8125D57FA0057C971 /* protocol.defs in Sources */,
-				6E9956051C3B219B0071D40C /* venture.c in Sources */,
 				6ED64B461BBD89AF00C35F4D /* firehose.defs in Sources */,
-				6ED64B401BBD898300C35F4D /* firehose_buffer.c in Sources */,
-				6EA962A01D48625100759D53 /* event_kevent.c in Sources */,
-				E49F24C9125D57FA0057C971 /* resolver.c in Sources */,
-				6E4BACC21D48A42000B562AE /* mach.c in Sources */,
-				E49F24CA125D57FA0057C971 /* init.c in Sources */,
-				E49F24CB125D57FA0057C971 /* queue.c in Sources */,
-				E49F24CC125D57FA0057C971 /* semaphore.c in Sources */,
-				6EF2CAAD1C8899E9001ABE83 /* lock.c in Sources */,
 				6ED64B4A1BBD89BD00C35F4D /* firehose_reply.defs in Sources */,
-				E49F24CD125D57FA0057C971 /* once.c in Sources */,
-				E49F24CE125D57FA0057C971 /* apply.c in Sources */,
+				E49F24C9125D57FA0057C971 /* resolver.c in Sources */,
+				E49F24CA125D57FA0057C971 /* init.c in Sources */,
 				E49F24CF125D57FA0057C971 /* object.c in Sources */,
-				E49F24D0125D57FA0057C971 /* benchmark.c in Sources */,
-				6E4BACF61D49A04700B562AE /* event_epoll.c in Sources */,
-				E49F24D1125D57FA0057C971 /* source.c in Sources */,
-				E49F24D2125D57FA0057C971 /* time.c in Sources */,
-				E49F24D3125D57FA0057C971 /* data.c in Sources */,
-				E49F24D4125D57FA0057C971 /* io.c in Sources */,
-				E43A72841AF85BCB00BAA921 /* block.cpp in Sources */,
-				6EA962981D48622700759D53 /* event.c in Sources */,
-				C93D6165143E190E00EB9023 /* transform.c in Sources */,
 				E4FC3265145F46C9002FBDDB /* object.m in Sources */,
-				2BBF5A64154B64F5002B20F9 /* allocator.c in Sources */,
-				E420867116027AE500EEE210 /* data.m in Sources */,
+				E43A72841AF85BCB00BAA921 /* block.cpp in Sources */,
+				6EF2CAAD1C8899E9001ABE83 /* lock.c in Sources */,
+				E49F24CC125D57FA0057C971 /* semaphore.c in Sources */,
+				E49F24CD125D57FA0057C971 /* once.c in Sources */,
+				E49F24CB125D57FA0057C971 /* queue.c in Sources */,
+				E49F24CE125D57FA0057C971 /* apply.c in Sources */,
+				E49F24D1125D57FA0057C971 /* source.c in Sources */,
+				6E4BACC21D48A42000B562AE /* mach.c in Sources */,
+				6EA962981D48622700759D53 /* event.c in Sources */,
+				6EA962A01D48625100759D53 /* event_kevent.c in Sources */,
+				6E4BACF61D49A04700B562AE /* event_epoll.c in Sources */,
 				E44A8E6C1805C3E0009FFDB6 /* voucher.c in Sources */,
+				6ED64B401BBD898300C35F4D /* firehose_buffer.c in Sources */,
+				E49F24D4125D57FA0057C971 /* io.c in Sources */,
+				E49F24D3125D57FA0057C971 /* data.c in Sources */,
+				E420867116027AE500EEE210 /* data.m in Sources */,
+				C93D6165143E190E00EB9023 /* transform.c in Sources */,
+				E49F24D2125D57FA0057C971 /* time.c in Sources */,
+				2BBF5A64154B64F5002B20F9 /* allocator.c in Sources */,
+				E49F24D0125D57FA0057C971 /* benchmark.c in Sources */,
+				6E9956051C3B219B0071D40C /* venture.c in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -2277,34 +2421,34 @@
 			buildActionMask = 2147483647;
 			files = (
 				E4B515BD164B2DA300E003AF /* provider.d in Sources */,
-				6EA962A31D48625300759D53 /* event_kevent.c in Sources */,
 				E4B515BE164B2DA300E003AF /* protocol.defs in Sources */,
-				E4B515BF164B2DA300E003AF /* resolver.c in Sources */,
-				6ED64B4B1BBD89BE00C35F4D /* firehose_reply.defs in Sources */,
 				6ED64B481BBD89B100C35F4D /* firehose.defs in Sources */,
+				6ED64B4B1BBD89BE00C35F4D /* firehose_reply.defs in Sources */,
+				E4B515BF164B2DA300E003AF /* resolver.c in Sources */,
 				E4B515C0164B2DA300E003AF /* init.c in Sources */,
-				6EA9629B1D48622900759D53 /* event.c in Sources */,
-				E4B515C1164B2DA300E003AF /* queue.c in Sources */,
-				6E9956021C3B21990071D40C /* venture.c in Sources */,
+				E4B515C5164B2DA300E003AF /* object.c in Sources */,
+				E4B515CC164B2DA300E003AF /* object.m in Sources */,
+				E43A72871AF85BCD00BAA921 /* block.cpp in Sources */,
+				6EF2CAB01C8899EB001ABE83 /* lock.c in Sources */,
 				E4B515C2164B2DA300E003AF /* semaphore.c in Sources */,
 				E4B515C3164B2DA300E003AF /* once.c in Sources */,
-				E43A72871AF85BCD00BAA921 /* block.cpp in Sources */,
+				E4B515C1164B2DA300E003AF /* queue.c in Sources */,
 				E4B515C4164B2DA300E003AF /* apply.c in Sources */,
-				E4B515C5164B2DA300E003AF /* object.c in Sources */,
-				6ED64B431BBD898600C35F4D /* firehose_buffer.c in Sources */,
-				E4B515C6164B2DA300E003AF /* benchmark.c in Sources */,
 				E4B515C7164B2DA300E003AF /* source.c in Sources */,
-				E4B515C8164B2DA300E003AF /* time.c in Sources */,
 				6E4BACC51D48A42200B562AE /* mach.c in Sources */,
-				E4B515C9164B2DA300E003AF /* data.c in Sources */,
-				E4B515CA164B2DA300E003AF /* io.c in Sources */,
-				E44A8E6F1805C3E0009FFDB6 /* voucher.c in Sources */,
-				E4B515CB164B2DA300E003AF /* transform.c in Sources */,
-				6EF2CAB01C8899EB001ABE83 /* lock.c in Sources */,
-				E4B515CC164B2DA300E003AF /* object.m in Sources */,
-				E4B515CD164B2DA300E003AF /* allocator.c in Sources */,
+				6EA9629B1D48622900759D53 /* event.c in Sources */,
+				6EA962A31D48625300759D53 /* event_kevent.c in Sources */,
 				6E4BACF91D49A04800B562AE /* event_epoll.c in Sources */,
+				E44A8E6F1805C3E0009FFDB6 /* voucher.c in Sources */,
+				6ED64B431BBD898600C35F4D /* firehose_buffer.c in Sources */,
+				E4B515CA164B2DA300E003AF /* io.c in Sources */,
+				E4B515C9164B2DA300E003AF /* data.c in Sources */,
 				E4B515CE164B2DA300E003AF /* data.m in Sources */,
+				E4B515CB164B2DA300E003AF /* transform.c in Sources */,
+				E4B515C8164B2DA300E003AF /* time.c in Sources */,
+				E4B515CD164B2DA300E003AF /* allocator.c in Sources */,
+				E4B515C6164B2DA300E003AF /* benchmark.c in Sources */,
+				6E9956021C3B21990071D40C /* venture.c in Sources */,
 				E4B515DD164B32E000E003AF /* introspection.c in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
@@ -2315,33 +2459,33 @@
 			files = (
 				E417A38412A472C4004D659D /* provider.d in Sources */,
 				E44EBE5412517EBE00645D88 /* protocol.defs in Sources */,
-				6E9956031C3B219A0071D40C /* venture.c in Sources */,
 				6EBEC7E61BBDD30D009B1596 /* firehose.defs in Sources */,
-				6ED64B421BBD898500C35F4D /* firehose_buffer.c in Sources */,
-				6EA962A21D48625200759D53 /* event_kevent.c in Sources */,
-				E49F2424125D3C970057C971 /* resolver.c in Sources */,
-				6E4BACC41D48A42200B562AE /* mach.c in Sources */,
-				E44EBE5512517EBE00645D88 /* init.c in Sources */,
-				E4EC11AE12514302000DDBD1 /* queue.c in Sources */,
-				E4EC11AF12514302000DDBD1 /* semaphore.c in Sources */,
-				6EF2CAAF1C8899EB001ABE83 /* lock.c in Sources */,
 				6EBEC7E91BBDD325009B1596 /* firehose_reply.defs in Sources */,
-				E4EC11B012514302000DDBD1 /* once.c in Sources */,
-				E4EC11B112514302000DDBD1 /* apply.c in Sources */,
+				E49F2424125D3C970057C971 /* resolver.c in Sources */,
+				E44EBE5512517EBE00645D88 /* init.c in Sources */,
 				E4EC11B212514302000DDBD1 /* object.c in Sources */,
-				E4EC11B312514302000DDBD1 /* benchmark.c in Sources */,
-				6E4BACF81D49A04800B562AE /* event_epoll.c in Sources */,
-				E4EC11B412514302000DDBD1 /* source.c in Sources */,
-				E4EC11B512514302000DDBD1 /* time.c in Sources */,
-				E4EC11B712514302000DDBD1 /* data.c in Sources */,
-				E4EC11B812514302000DDBD1 /* io.c in Sources */,
-				E43A72861AF85BCC00BAA921 /* block.cpp in Sources */,
-				6EA9629A1D48622900759D53 /* event.c in Sources */,
-				C93D6166143E190F00EB9023 /* transform.c in Sources */,
 				E4FC3266145F46C9002FBDDB /* object.m in Sources */,
-				2BBF5A65154B64F5002B20F9 /* allocator.c in Sources */,
-				E420867316027AE500EEE210 /* data.m in Sources */,
+				E43A72861AF85BCC00BAA921 /* block.cpp in Sources */,
+				6EF2CAAF1C8899EB001ABE83 /* lock.c in Sources */,
+				E4EC11AF12514302000DDBD1 /* semaphore.c in Sources */,
+				E4EC11B012514302000DDBD1 /* once.c in Sources */,
+				E4EC11AE12514302000DDBD1 /* queue.c in Sources */,
+				E4EC11B112514302000DDBD1 /* apply.c in Sources */,
+				E4EC11B412514302000DDBD1 /* source.c in Sources */,
+				6E4BACC41D48A42200B562AE /* mach.c in Sources */,
+				6EA9629A1D48622900759D53 /* event.c in Sources */,
+				6EA962A21D48625200759D53 /* event_kevent.c in Sources */,
+				6E4BACF81D49A04800B562AE /* event_epoll.c in Sources */,
 				E44A8E6E1805C3E0009FFDB6 /* voucher.c in Sources */,
+				6ED64B421BBD898500C35F4D /* firehose_buffer.c in Sources */,
+				E4EC11B812514302000DDBD1 /* io.c in Sources */,
+				E4EC11B712514302000DDBD1 /* data.c in Sources */,
+				E420867316027AE500EEE210 /* data.m in Sources */,
+				C93D6166143E190F00EB9023 /* transform.c in Sources */,
+				E4EC11B512514302000DDBD1 /* time.c in Sources */,
+				2BBF5A65154B64F5002B20F9 /* allocator.c in Sources */,
+				E4EC11B312514302000DDBD1 /* benchmark.c in Sources */,
+				6E9956031C3B219A0071D40C /* venture.c in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -2351,33 +2495,33 @@
 			files = (
 				E417A38512A472C5004D659D /* provider.d in Sources */,
 				E44EBE5612517EBE00645D88 /* protocol.defs in Sources */,
-				6E9956041C3B219B0071D40C /* venture.c in Sources */,
 				6EBEC7E51BBDD30C009B1596 /* firehose.defs in Sources */,
-				6ED64B411BBD898400C35F4D /* firehose_buffer.c in Sources */,
-				6EA962A11D48625100759D53 /* event_kevent.c in Sources */,
-				E49F2423125D3C960057C971 /* resolver.c in Sources */,
-				6E4BACC31D48A42100B562AE /* mach.c in Sources */,
-				E44EBE5712517EBE00645D88 /* init.c in Sources */,
-				E4EC121A12514715000DDBD1 /* queue.c in Sources */,
-				E4EC121B12514715000DDBD1 /* semaphore.c in Sources */,
-				6EF2CAAE1C8899EA001ABE83 /* lock.c in Sources */,
 				6EBEC7E81BBDD324009B1596 /* firehose_reply.defs in Sources */,
-				E4EC121C12514715000DDBD1 /* once.c in Sources */,
-				E4EC121D12514715000DDBD1 /* apply.c in Sources */,
+				E49F2423125D3C960057C971 /* resolver.c in Sources */,
+				E44EBE5712517EBE00645D88 /* init.c in Sources */,
 				E4EC121E12514715000DDBD1 /* object.c in Sources */,
-				E4EC121F12514715000DDBD1 /* benchmark.c in Sources */,
-				6E4BACF71D49A04700B562AE /* event_epoll.c in Sources */,
-				E4EC122012514715000DDBD1 /* source.c in Sources */,
-				E4EC122112514715000DDBD1 /* time.c in Sources */,
-				E4EC122312514715000DDBD1 /* data.c in Sources */,
-				E4EC122412514715000DDBD1 /* io.c in Sources */,
-				E43A72851AF85BCC00BAA921 /* block.cpp in Sources */,
-				6EA962991D48622800759D53 /* event.c in Sources */,
-				C93D6167143E190F00EB9023 /* transform.c in Sources */,
 				E4FC3267145F46C9002FBDDB /* object.m in Sources */,
-				2BBF5A66154B64F5002B20F9 /* allocator.c in Sources */,
-				E420867216027AE500EEE210 /* data.m in Sources */,
+				E43A72851AF85BCC00BAA921 /* block.cpp in Sources */,
+				6EF2CAAE1C8899EA001ABE83 /* lock.c in Sources */,
+				E4EC121B12514715000DDBD1 /* semaphore.c in Sources */,
+				E4EC121C12514715000DDBD1 /* once.c in Sources */,
+				E4EC121A12514715000DDBD1 /* queue.c in Sources */,
+				E4EC121D12514715000DDBD1 /* apply.c in Sources */,
+				E4EC122012514715000DDBD1 /* source.c in Sources */,
+				6E4BACC31D48A42100B562AE /* mach.c in Sources */,
+				6EA962991D48622800759D53 /* event.c in Sources */,
+				6EA962A11D48625100759D53 /* event_kevent.c in Sources */,
+				6E4BACF71D49A04700B562AE /* event_epoll.c in Sources */,
 				E44A8E6D1805C3E0009FFDB6 /* voucher.c in Sources */,
+				6ED64B411BBD898400C35F4D /* firehose_buffer.c in Sources */,
+				E4EC122412514715000DDBD1 /* io.c in Sources */,
+				E4EC122312514715000DDBD1 /* data.c in Sources */,
+				E420867216027AE500EEE210 /* data.m in Sources */,
+				C93D6167143E190F00EB9023 /* transform.c in Sources */,
+				E4EC122112514715000DDBD1 /* time.c in Sources */,
+				2BBF5A66154B64F5002B20F9 /* allocator.c in Sources */,
+				E4EC121F12514715000DDBD1 /* benchmark.c in Sources */,
+				6E9956041C3B219B0071D40C /* venture.c in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -2439,6 +2583,11 @@
 			target = E4EC121612514715000DDBD1 /* libdispatch mp resolved */;
 			targetProxy = E47D6ECC125FEBA10070D91C /* PBXContainerItemProxy */;
 		};
+		E49BB6F81E7074C100868613 /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = E49BB6CE1E70748100868613 /* libdispatch alt resolved */;
+			targetProxy = E49BB6F71E7074C100868613 /* PBXContainerItemProxy */;
+		};
 		E4B515DB164B317700E003AF /* PBXTargetDependency */ = {
 			isa = PBXTargetDependency;
 			target = E4B51595164B2DA300E003AF /* libdispatch introspection */;
@@ -2559,7 +2708,6 @@
 			isa = XCBuildConfiguration;
 			baseConfigurationReference = C00B0E121C5AEBF7000330B3 /* libdispatch-dyld-stub.xcconfig */;
 			buildSettings = {
-				PRODUCT_NAME = "$(PRODUCT_NAME)";
 			};
 			name = Release;
 		};
@@ -2567,7 +2715,6 @@
 			isa = XCBuildConfiguration;
 			baseConfigurationReference = C00B0E121C5AEBF7000330B3 /* libdispatch-dyld-stub.xcconfig */;
 			buildSettings = {
-				PRODUCT_NAME = "$(PRODUCT_NAME)";
 			};
 			name = Debug;
 		};
@@ -2575,7 +2722,6 @@
 			isa = XCBuildConfiguration;
 			baseConfigurationReference = C01866BE1C59735B0040FC07 /* libdispatch-mp-static.xcconfig */;
 			buildSettings = {
-				PRODUCT_NAME = "$(PRODUCT_NAME)";
 			};
 			name = Release;
 		};
@@ -2583,7 +2729,6 @@
 			isa = XCBuildConfiguration;
 			baseConfigurationReference = C01866BE1C59735B0040FC07 /* libdispatch-mp-static.xcconfig */;
 			buildSettings = {
-				PRODUCT_NAME = "$(PRODUCT_NAME)";
 			};
 			name = Debug;
 		};
@@ -2613,6 +2758,22 @@
 			};
 			name = Debug;
 		};
+		E49BB6F01E70748100868613 /* Release */ = {
+			isa = XCBuildConfiguration;
+			baseConfigurationReference = E40041A9125D70590022B135 /* libdispatch-resolved.xcconfig */;
+			buildSettings = {
+				DISPATCH_RESOLVED_VARIANT = alt;
+			};
+			name = Release;
+		};
+		E49BB6F11E70748100868613 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			baseConfigurationReference = E40041A9125D70590022B135 /* libdispatch-resolved.xcconfig */;
+			buildSettings = {
+				DISPATCH_RESOLVED_VARIANT = alt;
+			};
+			name = Debug;
+		};
 		E49F24D9125D57FA0057C971 /* Release */ = {
 			isa = XCBuildConfiguration;
 			buildSettings = {
@@ -2626,6 +2787,7 @@
 		E49F24DA125D57FA0057C971 /* Debug */ = {
 			isa = XCBuildConfiguration;
 			buildSettings = {
+				ONLY_ACTIVE_ARCH = YES;
 				WARNING_CFLAGS = (
 					"-Weverything",
 					"$(inherited)",
@@ -2813,6 +2975,15 @@
 			defaultConfigurationIsVisible = 0;
 			defaultConfigurationName = Release;
 		};
+		E49BB6EF1E70748100868613 /* Build configuration list for PBXNativeTarget "libdispatch alt resolved" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				E49BB6F01E70748100868613 /* Release */,
+				E49BB6F11E70748100868613 /* Debug */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
 		E49F24D8125D57FA0057C971 /* Build configuration list for PBXNativeTarget "libdispatch no resolver" */ = {
 			isa = XCConfigurationList;
 			buildConfigurations = (
diff --git a/man/dispatch_queue_create.3 b/man/dispatch_queue_create.3
index f3c3051..833e564 100644
--- a/man/dispatch_queue_create.3
+++ b/man/dispatch_queue_create.3
@@ -72,7 +72,8 @@
 By convention, clients should pass a reverse DNS style label. For example:
 .Pp
 .Bd -literal -offset indent
-my_queue = dispatch_queue_create("com.example.subsystem.taskXYZ", NULL);
+my_queue = dispatch_queue_create("com.example.subsystem.taskXYZ",
+				 DISPATCH_QUEUE_SERIAL);
 .Ed
 .Pp
 The
diff --git a/os/firehose_buffer_private.h b/os/firehose_buffer_private.h
index 29b80c3..d131d6d 100644
--- a/os/firehose_buffer_private.h
+++ b/os/firehose_buffer_private.h
@@ -31,7 +31,7 @@
 #include <dispatch/dispatch.h>
 #endif
 
-#define OS_FIREHOSE_SPI_VERSION 20160318
+#define OS_FIREHOSE_SPI_VERSION 20170222
 
 /*!
  * @group Firehose SPI
diff --git a/os/firehose_server_private.h b/os/firehose_server_private.h
index 441bb52..fc352da 100644
--- a/os/firehose_server_private.h
+++ b/os/firehose_server_private.h
@@ -228,6 +228,23 @@
 firehose_client_set_context(firehose_client_t client, void *ctxt);
 
 /*!
+ * @function firehose_client_initiate_quarantine
+ *
+ * @abstract
+ * Starts the procedure to move the given client to the high volume quarantine
+ *
+ * @discussion
+ * When the client is in the high volume quarantine, their firehose chunks
+ * have the fcp_quarantined bit set to 1.
+ *
+ * @param client
+ * The specified client.
+ */
+OS_NOTHROW OS_NONNULL1
+void
+firehose_client_initiate_quarantine(firehose_client_t client);
+
+/*!
  * @function firehose_client_metadata_stream_peek
  *
  * @abstract
@@ -361,6 +378,36 @@
 dispatch_queue_t
 firehose_server_copy_queue(firehose_server_queue_t which);
 
+/*!
+ * @function firehose_server_quarantined_suspend
+ *
+ * @abstract
+ * Suspends processing of quarantined clients until
+ * firehose_server_quarantined_resume() is called for the same queue.
+ *
+ * @discussion
+ * Suspending processing of quarantined clients causes firehose_snapshot()
+ * to block until the processing is enabled again.
+ *
+ * However if this is used to pace the processing, it is a good idea to disable
+ * this pacing until the snapshot has completed.
+ *
+ * Similarly, quarantine suspension must be off during shutdown.
+ */
+OS_NOTHROW
+void
+firehose_server_quarantined_suspend(firehose_server_queue_t q);
+
+/*!
+ * @function firehose_server_quarantined_resume
+ *
+ * @abstract
+ * Resumes processing of quarantined clients.
+ */
+OS_NOTHROW
+void
+firehose_server_quarantined_resume(firehose_server_queue_t q);
+
 #pragma mark - Firehose Snapshot
 
 /*!
diff --git a/os/object_private.h b/os/object_private.h
index 2f8cdf4..215c3d1 100644
--- a/os/object_private.h
+++ b/os/object_private.h
@@ -36,7 +36,9 @@
 #define OS_OBJECT_NONNULL __attribute__((__nonnull__))
 #define OS_OBJECT_WARN_RESULT __attribute__((__warn_unused_result__))
 #define OS_OBJECT_MALLOC __attribute__((__malloc__))
+#ifndef OS_OBJECT_EXPORT
 #define OS_OBJECT_EXPORT extern __attribute__((visibility("default")))
+#endif
 #else
 /*! @parseOnly */
 #define OS_OBJECT_NOTHROW
@@ -46,8 +48,11 @@
 #define OS_OBJECT_WARN_RESULT
 /*! @parseOnly */
 #define OS_OBJECT_MALLOC
+#ifndef OS_OBJECT_EXPORT
+/*! @parseOnly */
 #define OS_OBJECT_EXPORT extern
 #endif
+#endif
 
 #if OS_OBJECT_USE_OBJC && __has_feature(objc_arc)
 #define _OS_OBJECT_OBJC_ARC 1
@@ -179,6 +184,18 @@
 void
 _os_object_release_internal(_os_object_t object);
 
+API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0))
+OS_OBJECT_EXPORT OS_OBJECT_NONNULL OS_OBJECT_NOTHROW
+OS_SWIFT_UNAVAILABLE("Unavailable in Swift")
+_os_object_t
+_os_object_retain_internal_n(_os_object_t object, uint16_t n);
+
+API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0))
+OS_OBJECT_EXPORT OS_OBJECT_NONNULL OS_OBJECT_NOTHROW
+OS_SWIFT_UNAVAILABLE("Unavailable in Swift")
+void
+_os_object_release_internal_n(_os_object_t object, uint16_t n);
+
 #endif // !_OS_OBJECT_OBJC_ARC
 
 __END_DECLS
diff --git a/os/voucher_activity_private.h b/os/voucher_activity_private.h
index 28effc9..8ce0ef5 100644
--- a/os/voucher_activity_private.h
+++ b/os/voucher_activity_private.h
@@ -282,12 +282,13 @@
 		const void *privdata, size_t privlen);
 
 typedef const struct voucher_activity_hooks_s {
-#define VOUCHER_ACTIVITY_HOOKS_VERSION     4
+#define VOUCHER_ACTIVITY_HOOKS_VERSION     5
 	long vah_version;
 	mach_port_t (*vah_get_logd_port)(void);
 	dispatch_mach_handler_function_t vah_debug_channel_handler;
 	kern_return_t (*vah_get_reconnect_info)(mach_vm_address_t *, mach_vm_size_t *);
 	void (*vah_metadata_init)(void *metadata_buffer, size_t size);
+	void (*vah_quarantine_starts)(void);
 } *voucher_activity_hooks_t;
 
 /*!
diff --git a/private/mach_private.h b/private/mach_private.h
index 6ca891d..bc53223 100644
--- a/private/mach_private.h
+++ b/private/mach_private.h
@@ -114,7 +114,9 @@
  * A SIGTERM signal has been received. This notification is delivered at most
  * once during the lifetime of the channel. This event is sent only for XPC
  * channels (i.e. channels that were created by calling
- * dispatch_mach_create_4libxpc()).
+ * dispatch_mach_create_4libxpc()) and only if the
+ * dmxh_enable_sigterm_notification function in the XPC hooks structure is not
+ * set or it returned true when it was called at channel activation time.
  *
  * @const DISPATCH_MACH_ASYNC_WAITER_DISCONNECTED
  * The channel has been disconnected by a call to dispatch_mach_reconnect() or
@@ -811,7 +813,7 @@
 
 API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0))
 typedef const struct dispatch_mach_xpc_hooks_s {
-#define DISPATCH_MACH_XPC_HOOKS_VERSION     2
+#define DISPATCH_MACH_XPC_HOOKS_VERSION     3
 	unsigned long version;
 
 	/* Fields available in version 1. */
@@ -827,8 +829,8 @@
 	 * throw an exception.
 	 */
 	bool (* _Nonnull dmxh_direct_message_handler)(void *_Nullable context,
-		dispatch_mach_reason_t reason, dispatch_mach_msg_t message,
-		mach_error_t error);
+			dispatch_mach_reason_t reason, dispatch_mach_msg_t message,
+			mach_error_t error);
 
 	/* Fields available in version 2. */
 
@@ -844,7 +846,7 @@
 	 * other code.
 	 */
 	dispatch_queue_t _Nullable (*_Nonnull dmxh_msg_context_reply_queue)(
-		void *_Nonnull msg_context);
+			void *_Nonnull msg_context);
 
 	/*
 	 * Called when a reply to a message sent by
@@ -861,6 +863,15 @@
 	 * details.
 	 */
 	dispatch_mach_async_reply_callback_t dmxh_async_reply_handler;
+
+	/* Fields available in version 3. */
+	/**
+	 * Called once when the Mach channel has been activated. If this function
+	 * returns true, a DISPATCH_MACH_SIGTERM_RECEIVED notification will be
+	 * delivered to the channel's event handler when a SIGTERM is received.
+	 */
+	bool (* _Nullable dmxh_enable_sigterm_notification)(
+			void *_Nullable context);
 } *dispatch_mach_xpc_hooks_t;
 
 #define DISPATCH_MACH_XPC_SUPPORTS_ASYNC_REPLIES(hooks) ((hooks)->version >= 2)
diff --git a/private/private.h b/private/private.h
index 82da15e..cc9d578 100644
--- a/private/private.h
+++ b/private/private.h
@@ -66,7 +66,7 @@
 #endif /* !__DISPATCH_BUILDING_DISPATCH__ */
 
 // <rdar://problem/9627726> Check that public and private dispatch headers match
-#if DISPATCH_API_VERSION != 20160831 // Keep in sync with <dispatch/dispatch.h>
+#if DISPATCH_API_VERSION != 20170124 // Keep in sync with <dispatch/dispatch.h>
 #error "Dispatch header mismatch between /usr/include and /usr/local/include"
 #endif
 
@@ -214,6 +214,16 @@
 DISPATCH_EXPORT DISPATCH_WARN_RESULT DISPATCH_NOTHROW
 mach_port_t
 _dispatch_runloop_root_queue_get_port_4CF(dispatch_queue_t queue);
+
+#ifdef __BLOCKS__
+API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0))
+DISPATCH_EXPORT DISPATCH_MALLOC DISPATCH_RETURNS_RETAINED DISPATCH_WARN_RESULT
+DISPATCH_NOTHROW
+dispatch_queue_t
+_dispatch_network_root_queue_create_4NW(const char *_Nullable label,
+		const pthread_attr_t *_Nullable attrs,
+		dispatch_block_t _Nullable configure);
+#endif
 #endif
 
 API_AVAILABLE(macos(10.9), ios(7.0))
@@ -242,6 +252,11 @@
 
 #endif /* DISPATCH_COCOA_COMPAT */
 
+API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0))
+DISPATCH_EXPORT DISPATCH_NOTHROW
+void
+_dispatch_poll_for_events_4launchd(void);
+
 __END_DECLS
 
 DISPATCH_ASSUME_NONNULL_END
diff --git a/private/queue_private.h b/private/queue_private.h
index 14d6477..98c7f5e 100644
--- a/private/queue_private.h
+++ b/private/queue_private.h
@@ -278,11 +278,12 @@
 
 /*!
  * @constant DISPATCH_APPLY_CURRENT_ROOT_QUEUE
- * @discussion Constant to pass to the dispatch_apply() and dispatch_apply_f()
- * functions to indicate that the root queue for the current thread should be
- * used (i.e. one of the global concurrent queues or a queue created with
- * dispatch_pthread_root_queue_create()). If there is no such queue, the
- * default priority global concurrent queue will be used.
+ *
+ * @discussion
+ * This constant is deprecated, please use DISPATCH_APPLY_AUTO.
+ *
+ * DISPATCH_APPLY_AUTO also selects the current pthread root queue if
+ * applicable.
  */
 #define DISPATCH_APPLY_CURRENT_ROOT_QUEUE ((dispatch_queue_t _Nonnull)0)
 
diff --git a/private/source_private.h b/private/source_private.h
index f01287b..019f648 100644
--- a/private/source_private.h
+++ b/private/source_private.h
@@ -165,15 +165,6 @@
 	DISPATCH_SOCK_NOTIFY_ACK = 0x00004000,
 };
 
-/*!
- * @enum dispatch_source_nw_channel_flags_t
- *
- * @constant DISPATCH_NW_CHANNEL_FLOW_ADV_UPDATE
- * Received network channel flow advisory.
- */
-enum {
-	DISPATCH_NW_CHANNEL_FLOW_ADV_UPDATE = 0x00000001,
-};
 
 /*!
  * @enum dispatch_source_vfs_flags_t
@@ -341,11 +332,16 @@
  *
  * @constant DISPATCH_MEMORYPRESSURE_PROC_LIMIT_CRITICAL
  * The memory of the process has reached 100% of its high watermark limit.
+ *
+ * @constant DISPATCH_MEMORYPRESSURE_MSL_STATUS
+ * Mask for enabling/disabling malloc stack logging.
  */
 enum {
-	DISPATCH_MEMORYPRESSURE_PROC_LIMIT_WARN DISPATCH_ENUM_API_AVAILABLE(macos(10.12), ios(10.10), tvos(10.10), watchos(3.0)) = 0x10,
+	DISPATCH_MEMORYPRESSURE_PROC_LIMIT_WARN DISPATCH_ENUM_API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0)) = 0x10,
 
-	DISPATCH_MEMORYPRESSURE_PROC_LIMIT_CRITICAL DISPATCH_ENUM_API_AVAILABLE(macos(10.12), ios(10.10), tvos(10.10), watchos(3.0)) = 0x20,
+	DISPATCH_MEMORYPRESSURE_PROC_LIMIT_CRITICAL DISPATCH_ENUM_API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0)) = 0x20,
+	
+	DISPATCH_MEMORYPRESSURE_MSL_STATUS DISPATCH_ENUM_API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0)) = 0xf0000000,
 };
 
 /*!
diff --git a/src/apply.c b/src/apply.c
index 40e6f32..9d64522 100644
--- a/src/apply.c
+++ b/src/apply.c
@@ -35,7 +35,7 @@
 	size_t idx, done = 0;
 
 	idx = os_atomic_inc_orig2o(da, da_index, acquire);
-	if (!fastpath(idx < iter)) goto out;
+	if (unlikely(idx >= iter)) goto out;
 
 	// da_dc is only safe to access once the 'index lock' has been acquired
 	dispatch_apply_function_t const func = (void *)da->da_dc->dc_func;
@@ -67,7 +67,7 @@
 			done++;
 			idx = os_atomic_inc_orig2o(da, da_index, relaxed);
 		});
-	} while (fastpath(idx < iter));
+	} while (likely(idx < iter));
 
 	if (invoke_flags & DISPATCH_APPLY_INVOKE_REDIRECT) {
 		_dispatch_reset_basepri(old_dbp);
@@ -124,7 +124,7 @@
 
 	while (dq && !qaf) {
 		qaf = _dispatch_queue_autorelease_frequency(dq);
-		dq = slowpath(dq->do_targetq);
+		dq = dq->do_targetq;
 	}
 	return qaf;
 }
@@ -198,13 +198,13 @@
 	do {
 		int32_t width = _dispatch_queue_try_reserve_apply_width(rq, da_width);
 
-		if (slowpath(da_width > width)) {
+		if (unlikely(da_width > width)) {
 			int32_t excess = da_width - width;
 			for (tq = dq; tq != rq; tq = tq->do_targetq) {
 				_dispatch_queue_relinquish_width(tq, excess);
 			}
 			da_width -= excess;
-			if (slowpath(!da_width)) {
+			if (unlikely(!da_width)) {
 				return _dispatch_apply_serial(da);
 			}
 			da->da_thr_cnt -= excess;
@@ -216,22 +216,41 @@
 			da->da_flags = _dispatch_queue_autorelease_frequency(dq);
 		}
 		rq = rq->do_targetq;
-	} while (slowpath(rq->do_targetq));
+	} while (unlikely(rq->do_targetq));
 	_dispatch_apply_f2(rq, da, _dispatch_apply_redirect_invoke);
 	do {
 		_dispatch_queue_relinquish_width(dq, da_width);
 		dq = dq->do_targetq;
-	} while (slowpath(dq->do_targetq));
+	} while (unlikely(dq->do_targetq));
 }
 
 #define DISPATCH_APPLY_MAX UINT16_MAX // must be < sqrt(SIZE_MAX)
 
+DISPATCH_ALWAYS_INLINE
+static inline dispatch_queue_t
+_dispatch_apply_root_queue(dispatch_queue_t dq)
+{
+	if (dq) {
+		while (unlikely(dq->do_targetq)) {
+			dq = dq->do_targetq;
+		}
+		// if the current root queue is a pthread root queue, select it
+		if (!_dispatch_priority_qos(dq->dq_priority)) {
+			return dq;
+		}
+	}
+
+	pthread_priority_t pp = _dispatch_get_priority();
+	dispatch_qos_t qos = _dispatch_qos_from_pp(pp);
+	return _dispatch_get_root_queue(qos ? qos : DISPATCH_QOS_DEFAULT, false);
+}
+
 DISPATCH_NOINLINE
 void
 dispatch_apply_f(size_t iterations, dispatch_queue_t dq, void *ctxt,
 		void (*func)(void *, size_t))
 {
-	if (slowpath(iterations == 0)) {
+	if (unlikely(iterations == 0)) {
 		return;
 	}
 	int32_t thr_cnt = (int32_t)dispatch_hw_config(active_cpus);
@@ -240,7 +259,7 @@
 	size_t nested = dtctxt ? dtctxt->dtc_apply_nesting : 0;
 	dispatch_queue_t old_dq = _dispatch_queue_get_current();
 
-	if (!slowpath(nested)) {
+	if (likely(!nested)) {
 		nested = iterations;
 	} else {
 		thr_cnt = nested < (size_t)thr_cnt ? thr_cnt / (int32_t)nested : 1;
@@ -250,12 +269,8 @@
 	if (iterations < (size_t)thr_cnt) {
 		thr_cnt = (int32_t)iterations;
 	}
-	if (slowpath(dq == DISPATCH_APPLY_CURRENT_ROOT_QUEUE)) {
-		dq = old_dq ? old_dq : _dispatch_get_root_queue(
-				DISPATCH_QOS_DEFAULT, false);
-		while (slowpath(dq->do_targetq)) {
-			dq = dq->do_targetq;
-		}
+	if (likely(dq == DISPATCH_APPLY_AUTO)) {
+		dq = _dispatch_apply_root_queue(old_dq);
 	}
 	struct dispatch_continuation_s dc = {
 		.dc_func = (void*)func,
@@ -276,11 +291,11 @@
 #endif
 	da->da_flags = 0;
 
-	if (slowpath(dq->dq_width == 1) || slowpath(thr_cnt <= 1)) {
+	if (unlikely(dq->dq_width == 1 || thr_cnt <= 1)) {
 		return dispatch_sync_f(dq, da, _dispatch_apply_serial);
 	}
-	if (slowpath(dq->do_targetq)) {
-		if (slowpath(dq == old_dq)) {
+	if (unlikely(dq->do_targetq)) {
+		if (unlikely(dq == old_dq)) {
 			return dispatch_sync_f(dq, da, _dispatch_apply_serial);
 		} else {
 			return dispatch_sync_f(dq, da, _dispatch_apply_redirect);
diff --git a/src/block.cpp b/src/block.cpp
index 3060a2a..2a6f007 100644
--- a/src/block.cpp
+++ b/src/block.cpp
@@ -32,6 +32,8 @@
 #include "internal.h"
 }
 
+// NOTE: this file must not contain any atomic operations
+
 #if DISPATCH_DEBUG && DISPATCH_BLOCK_PRIVATE_DATA_DEBUG
 #define _dispatch_block_private_data_debug(msg, ...) \
 		_dispatch_debug("block_private[%p]: " msg, (this), ##__VA_ARGS__)
@@ -83,7 +85,8 @@
 			((void (*)(dispatch_group_t))dispatch_release)(dbpd_group);
 		}
 		if (dbpd_queue) {
-			((void (*)(os_mpsc_queue_t))_os_object_release_internal)(dbpd_queue);
+			((void (*)(os_mpsc_queue_t, uint16_t))
+					_os_object_release_internal_n)(dbpd_queue, 2);
 		}
 		if (dbpd_block) Block_release(dbpd_block);
 		if (dbpd_voucher) voucher_release(dbpd_voucher);
diff --git a/src/data.c b/src/data.c
index adcfbb2..240309f 100644
--- a/src/data.c
+++ b/src/data.c
@@ -100,51 +100,22 @@
 #define _dispatch_data_release(x) dispatch_release(x)
 #endif
 
-const dispatch_block_t _dispatch_data_destructor_free = ^{
-	DISPATCH_INTERNAL_CRASH(0, "free destructor called");
-};
-
-const dispatch_block_t _dispatch_data_destructor_none = ^{
-	DISPATCH_INTERNAL_CRASH(0, "none destructor called");
-};
-
-#if !HAVE_MACH
-const dispatch_block_t _dispatch_data_destructor_munmap = ^{
-	DISPATCH_INTERNAL_CRASH(0, "munmap destructor called");
-};
-#else
-// _dispatch_data_destructor_munmap is a linker alias to the following
-const dispatch_block_t _dispatch_data_destructor_vm_deallocate = ^{
-	DISPATCH_INTERNAL_CRASH(0, "vmdeallocate destructor called");
-};
-#endif
-
-const dispatch_block_t _dispatch_data_destructor_inline = ^{
-	DISPATCH_INTERNAL_CRASH(0, "inline destructor called");
-};
-
-struct dispatch_data_s _dispatch_data_empty = {
-#if DISPATCH_DATA_IS_BRIDGED_TO_NSDATA
-	.do_vtable = DISPATCH_DATA_EMPTY_CLASS,
-#else
-	DISPATCH_GLOBAL_OBJECT_HEADER(data),
-	.do_next = DISPATCH_OBJECT_LISTLESS,
-#endif
-};
-
 DISPATCH_ALWAYS_INLINE
 static inline dispatch_data_t
 _dispatch_data_alloc(size_t n, size_t extra)
 {
 	dispatch_data_t data;
 	size_t size;
+	size_t base_size;
 
-	if (os_mul_and_add_overflow(n, sizeof(range_record),
-			sizeof(struct dispatch_data_s) + extra, &size)) {
+	if (os_add_overflow(sizeof(struct dispatch_data_s), extra, &base_size)) {
+		return DISPATCH_OUT_OF_MEMORY;
+	}
+	if (os_mul_and_add_overflow(n, sizeof(range_record), base_size, &size)) {
 		return DISPATCH_OUT_OF_MEMORY;
 	}
 
-	data = _dispatch_alloc(DISPATCH_DATA_CLASS, size);
+	data = _dispatch_object_alloc(DISPATCH_DATA_CLASS, size);
 	data->num_records = n;
 #if !DISPATCH_DATA_IS_BRIDGED_TO_NSDATA
 	data->do_targetq = dispatch_get_global_queue(
@@ -192,8 +163,8 @@
 }
 
 void
-dispatch_data_init(dispatch_data_t data, const void *buffer, size_t size,
-		dispatch_block_t destructor)
+_dispatch_data_init_with_bytes(dispatch_data_t data, const void *buffer,
+		size_t size, dispatch_block_t destructor)
 {
 	if (!buffer || !size) {
 		if (destructor) {
@@ -284,7 +255,7 @@
 }
 
 void
-_dispatch_data_dispose(dispatch_data_t dd)
+_dispatch_data_dispose(dispatch_data_t dd, DISPATCH_UNUSED bool *allow_free)
 {
 	if (_dispatch_data_leaf(dd)) {
 		_dispatch_data_destroy_buffer(dd->buf, dd->size, dd->do_targetq,
@@ -298,6 +269,18 @@
 	}
 }
 
+void
+_dispatch_data_set_target_queue(dispatch_data_t dd, dispatch_queue_t tq)
+{
+#if DISPATCH_DATA_IS_BRIDGED_TO_NSDATA
+	_dispatch_retain(tq);
+	tq = os_atomic_xchg2o(dd, do_targetq, tq, release);
+	if (tq) _dispatch_release(tq);
+#else
+	_dispatch_object_set_target_queue_inline(dd, tq);
+#endif
+}
+
 size_t
 _dispatch_data_debug(dispatch_data_t dd, char* buf, size_t bufsiz)
 {
diff --git a/src/data.m b/src/data.m
index 9971f18..1d024ff 100644
--- a/src/data.m
+++ b/src/data.m
@@ -28,6 +28,8 @@
 
 #include <Foundation/NSString.h>
 
+// NOTE: this file must not contain any atomic operations
+
 @interface DISPATCH_CLASS(data) () <DISPATCH_CLASS(data)>
 @property (readonly,nonatomic) NSUInteger length;
 @property (readonly,nonatomic) const void *bytes NS_RETURNS_INNER_POINTER;
@@ -66,29 +68,26 @@
 	} else {
 		destructor = DISPATCH_DATA_DESTRUCTOR_NONE;
 	}
-	dispatch_data_init(self, bytes, length, destructor);
+	_dispatch_data_init_with_bytes(self, bytes, length, destructor);
 	return self;
 }
 
-#define _dispatch_data_objc_dispose(selector) \
-	struct dispatch_data_s *dd = (void*)self; \
-	_dispatch_data_dispose(self); \
-	dispatch_queue_t tq = dd->do_targetq; \
-	dispatch_function_t func = dd->finalizer; \
-	void *ctxt = dd->ctxt; \
-	[super selector]; \
-	if (func && ctxt) { \
-		if (!tq) { \
-			 tq = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT,0);\
-		} \
-		dispatch_async_f(tq, ctxt, func); \
-	} \
-	if (tq) { \
-		_os_object_release_internal((_os_object_t)tq); \
-	}
-
 - (void)dealloc {
-	_dispatch_data_objc_dispose(dealloc);
+	struct dispatch_data_s *dd = (void*)self;
+	_dispatch_data_dispose(self, NULL);
+	dispatch_queue_t tq = dd->do_targetq;
+	dispatch_function_t func = dd->finalizer;
+	void *ctxt = dd->ctxt;
+	[super dealloc];
+	if (func && ctxt) {
+		if (!tq) {
+			 tq = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT,0);
+		}
+		dispatch_async_f(tq, ctxt, func);
+	}
+	if (tq) {
+		_os_object_release_internal((_os_object_t)tq);
+	}
 }
 
 - (BOOL)_bytesAreVM {
@@ -113,10 +112,7 @@
 
 - (void)_setTargetQueue:(dispatch_queue_t)queue {
 	struct dispatch_data_s *dd = (void*)self;
-	_os_object_retain_internal((_os_object_t)queue);
-	dispatch_queue_t prev;
-	prev = os_atomic_xchg2o(dd, do_targetq, queue, release);
-	if (prev) _os_object_release_internal((_os_object_t)prev);
+	return _dispatch_data_set_target_queue(dd, queue);
 }
 
 - (NSString *)debugDescription {
diff --git a/src/data_internal.h b/src/data_internal.h
index bbef21e..19fc3d9 100644
--- a/src/data_internal.h
+++ b/src/data_internal.h
@@ -100,12 +100,13 @@
 	dispatch_transform_t encode;
 };
 
-void dispatch_data_init(dispatch_data_t data, const void *buffer, size_t size,
-		dispatch_block_t destructor);
-void _dispatch_data_dispose(dispatch_data_t data);
+void _dispatch_data_init_with_bytes(dispatch_data_t data, const void *buffer,
+		size_t size, dispatch_block_t destructor);
+void _dispatch_data_dispose(dispatch_data_t data, bool *allow_free);
+void _dispatch_data_set_target_queue(struct dispatch_data_s *dd,
+		dispatch_queue_t tq);
 size_t _dispatch_data_debug(dispatch_data_t data, char* buf, size_t bufsiz);
-const void*
-_dispatch_data_get_flattened_bytes(struct dispatch_data_s *dd);
+const void* _dispatch_data_get_flattened_bytes(struct dispatch_data_s *dd);
 
 #if !defined(__cplusplus)
 extern const dispatch_block_t _dispatch_data_destructor_inline;
diff --git a/src/event/event.c b/src/event/event.c
index 2a8a8c3..34abbf0 100644
--- a/src/event/event.c
+++ b/src/event/event.c
@@ -46,6 +46,7 @@
 		du = _dispatch_unote_linkage_get_unote(dul)._du;
 	}
 	du->du_type = dst;
+	du->du_can_be_wlh = dst->dst_per_trigger_qos;
 	du->du_ident = (uint32_t)handle;
 	du->du_filter = dst->dst_filter;
 	du->du_fflags = (typeof(du->du_fflags))mask;
@@ -108,8 +109,13 @@
 	}
 #endif
 	if (du._du->du_is_timer) {
-		if (du._dt->dt_pending_config) {
+		if (unlikely(du._dt->dt_heap_entry[DTH_TARGET_ID] != DTH_INVALID_ID ||
+				du._dt->dt_heap_entry[DTH_DEADLINE_ID] != DTH_INVALID_ID)) {
+			DISPATCH_INTERNAL_CRASH(0, "Disposing of timer still in its heap");
+		}
+		if (unlikely(du._dt->dt_pending_config)) {
 			free(du._dt->dt_pending_config);
+			du._dt->dt_pending_config = NULL;
 		}
 	} else if (!du._du->du_is_direct) {
 		ptr = _dispatch_unote_get_linkage(du);
@@ -280,6 +286,8 @@
 		du._dt->dt_timer.target = UINT64_MAX;
 		du._dt->dt_timer.deadline = UINT64_MAX;
 		du._dt->dt_timer.interval = UINT64_MAX;
+		du._dt->dt_heap_entry[DTH_TARGET_ID] = DTH_INVALID_ID;
+		du._dt->dt_heap_entry[DTH_DEADLINE_ID] = DTH_INVALID_ID;
 	}
 	return du;
 }
diff --git a/src/event/event_config.h b/src/event/event_config.h
index 7f7761c..2ac3c42 100644
--- a/src/event/event_config.h
+++ b/src/event/event_config.h
@@ -35,12 +35,17 @@
 
 #if DISPATCH_DEBUG
 #define DISPATCH_MGR_QUEUE_DEBUG 1
+#define DISPATCH_WLH_DEBUG 1
 #endif
 
 #ifndef DISPATCH_MGR_QUEUE_DEBUG
 #define DISPATCH_MGR_QUEUE_DEBUG 0
 #endif
 
+#ifndef DISPATCH_WLH_DEBUG
+#define DISPATCH_WLH_DEBUG 0
+#endif
+
 #ifndef DISPATCH_MACHPORT_DEBUG
 #define DISPATCH_MACHPORT_DEBUG 0
 #endif
@@ -100,31 +105,6 @@
 #	ifndef VQ_DESIRED_DISK
 #	undef HAVE_DECL_VQ_DESIRED_DISK
 #	endif // VQ_DESIRED_DISK
-
-#	ifndef NOTE_MEMORYSTATUS_LOW_SWAP
-#	define NOTE_MEMORYSTATUS_LOW_SWAP 0x8
-#	endif
-
-#	if !defined(NOTE_MEMORYSTATUS_PROC_LIMIT_WARN) || \
-		!DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(101200)
-#	undef NOTE_MEMORYSTATUS_PROC_LIMIT_WARN
-#	define NOTE_MEMORYSTATUS_PROC_LIMIT_WARN 0
-#	endif // NOTE_MEMORYSTATUS_PROC_LIMIT_WARN
-
-#	if !defined(NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL) || \
-		!DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(101200)
-#	undef NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL
-#	define NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL 0
-#	endif // NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL
-
-#	ifndef DISPATCH_KEVENT_TREAT_ENOENT_AS_EINPROGRESS
-#	if TARGET_OS_MAC && !DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(101200)
-	// deferred delete can return bogus ENOENTs on older kernels
-#	define DISPATCH_KEVENT_TREAT_ENOENT_AS_EINPROGRESS 1
-#	else
-#	define DISPATCH_KEVENT_TREAT_ENOENT_AS_EINPROGRESS 0
-#	endif
-#	endif
 #else // DISPATCH_EVENT_BACKEND_KEVENT
 #	define EV_ADD					0x0001
 #	define EV_DELETE				0x0002
@@ -195,6 +175,14 @@
 #	define MACH_MSG_PRIORITY_UNSPECIFIED ((mach_msg_priority_t)0)
 #	endif // MACH_SEND_OVERRIDE
 
+#	ifndef MACH_SEND_SYNC_OVERRIDE
+#	define MACH_SEND_SYNC_OVERRIDE 0x00100000
+#	endif // MACH_SEND_SYNC_OVERRIDE
+
+#	ifndef MACH_RCV_SYNC_WAIT
+#	define MACH_RCV_SYNC_WAIT 0x00004000
+#	endif // MACH_RCV_SYNC_WAIT
+
 #	define DISPATCH_MACH_TRAILER_SIZE sizeof(dispatch_mach_trailer_t)
 #	define DISPATCH_MACH_RCV_TRAILER MACH_RCV_TRAILER_CTX
 #	define DISPATCH_MACH_RCV_OPTIONS ( \
diff --git a/src/event/event_epoll.c b/src/event/event_epoll.c
index 647552f..68140d5 100644
--- a/src/event/event_epoll.c
+++ b/src/event/event_epoll.c
@@ -211,8 +211,8 @@
 }
 
 bool
-_dispatch_unote_register(dispatch_unote_t du, dispatch_wlh_t wlh,
-		dispatch_priority_t pri)
+_dispatch_unote_register(dispatch_unote_t du,
+		DISPATCH_UNUSED dispatch_wlh_t wlh, dispatch_priority_t pri)
 {
 	struct dispatch_muxnote_bucket_s *dmb;
 	dispatch_muxnote_t dmn;
@@ -225,7 +225,7 @@
 	case DISPATCH_EVFILT_CUSTOM_ADD:
 	case DISPATCH_EVFILT_CUSTOM_OR:
 	case DISPATCH_EVFILT_CUSTOM_REPLACE:
-		du._du->du_wlh = wlh;
+		du._du->du_wlh = DISPATCH_WLH_ANON;
 		return true;
 	case EVFILT_WRITE:
 		events |= EPOLLOUT;
@@ -268,7 +268,8 @@
 			TAILQ_INSERT_TAIL(&dmn->dmn_readers_head, dul, du_link);
 		}
 		dul->du_muxnote = dmn;
-		du._du->du_wlh = DISPATCH_WLH_GLOBAL;
+		dispatch_assert(du._du->du_wlh == NULL);
+		du._du->du_wlh = DISPATCH_WLH_ANON;
 	}
 	return dmn != NULL;
 }
@@ -321,6 +322,7 @@
 			TAILQ_REMOVE(_dispatch_unote_muxnote_bucket(du), dmn, dmn_list);
 			_dispatch_muxnote_dispose(dmn);
 		}
+		dispatch_assert(du._du->du_wlh == DISPATCH_WLH_ANON);
 		du._du->du_wlh = NULL;
 	}
 	return true;
@@ -418,11 +420,6 @@
 {
 }
 
-void
-_dispatch_event_loop_init(void)
-{
-}
-
 static void
 _dispatch_epoll_init(void *context DISPATCH_UNUSED)
 {
@@ -459,7 +456,7 @@
 
 void
 _dispatch_event_loop_poke(dispatch_wlh_t wlh DISPATCH_UNUSED,
-		dispatch_priority_t pri DISPATCH_UNUSED, uint32_t flags DISPATCH_UNUSED)
+		uint64_t dq_state DISPATCH_UNUSED, uint32_t flags DISPATCH_UNUSED)
 {
 	dispatch_once_f(&epoll_init_pred, NULL, _dispatch_epoll_init);
 	dispatch_assume_zero(eventfd_write(_dispatch_eventfd, 1));
@@ -581,4 +578,40 @@
 	}
 }
 
+void
+_dispatch_event_loop_wake_owner(dispatch_sync_context_t dsc,
+		dispatch_wlh_t wlh, uint64_t old_state, uint64_t new_state)
+{
+	(void)dsc; (void)wlh; (void)old_state; (void)new_state;
+}
+
+void
+_dispatch_event_loop_wait_for_ownership(dispatch_sync_context_t dsc)
+{
+	if (dsc->dsc_release_storage) {
+		_dispatch_queue_release_storage(dsc->dc_data);
+	}
+}
+
+void
+_dispatch_event_loop_end_ownership(dispatch_wlh_t wlh, uint64_t old_state,
+		uint64_t new_state, uint32_t flags)
+{
+	(void)wlh; (void)old_state; (void)new_state; (void)flags;
+}
+
+#if DISPATCH_WLH_DEBUG
+void
+_dispatch_event_loop_assert_not_owned(dispatch_wlh_t wlh)
+{
+	(void)wlh;
+}
+#endif
+
+void
+_dispatch_event_loop_leave_immediate(dispatch_wlh_t wlh, uint64_t dq_state)
+{
+	(void)wlh; (void)dq_state;
+}
+
 #endif // DISPATCH_EVENT_BACKEND_EPOLL
diff --git a/src/event/event_internal.h b/src/event/event_internal.h
index c84b353..842c4ee 100644
--- a/src/event/event_internal.h
+++ b/src/event/event_internal.h
@@ -29,9 +29,10 @@
 
 #include "event_config.h"
 
+struct dispatch_sync_context_s;
 typedef struct dispatch_wlh_s *dispatch_wlh_t; // opaque handle
-#define DISPATCH_WLH_GLOBAL ((dispatch_wlh_t)(void*)(~0ul))
-#define DISPATCH_WLH_MANAGER ((dispatch_wlh_t)(void*)(~2ul))
+#define DISPATCH_WLH_ANON       ((dispatch_wlh_t)(void*)(~0ul))
+#define DISPATCH_WLH_MANAGER    ((dispatch_wlh_t)(void*)(~2ul))
 
 #define DISPATCH_UNOTE_DATA_ACTION_SIZE 2
 
@@ -40,15 +41,17 @@
 	uintptr_t du_owner_wref; /* "weak" back reference to the owner object */ \
 	dispatch_wlh_t du_wlh; \
 	uint32_t  du_ident; \
-	int16_t   du_filter; \
-	uint8_t   du_data_action : DISPATCH_UNOTE_DATA_ACTION_SIZE; \
-	uint8_t   du_is_direct : 1; \
-	uint8_t   du_is_timer : 1; \
-	uint8_t   du_memorypressure_override : 1; \
-	uint8_t   du_vmpressure_override : 1; \
-	uint8_t   dmr_async_reply : 1; \
-	uint8_t   dmrr_handler_is_block : 1; \
+	int8_t    du_filter; \
 	os_atomic(bool) dmsr_notification_armed; \
+	uint16_t  du_data_action : DISPATCH_UNOTE_DATA_ACTION_SIZE; \
+	uint16_t  du_is_direct : 1; \
+	uint16_t  du_is_timer : 1; \
+	uint16_t  du_memorypressure_override : 1; \
+	uint16_t  du_vmpressure_override : 1; \
+	uint16_t  du_can_be_wlh : 1; \
+	uint16_t  dmr_async_reply : 1; \
+	uint16_t  dmrr_handler_is_block : 1; \
+	uint16_t  du_unused : 7; \
 	uint32_t  du_fflags; \
 	dispatch_priority_t du_priority
 
@@ -93,6 +96,7 @@
 	uint64_t delay, leeway;
 } dispatch_timer_delay_s;
 
+#define DTH_INVALID_ID  (~0u)
 #define DTH_TARGET_ID   0u
 #define DTH_DEADLINE_ID 1u
 #define DTH_ID_COUNT    2u
@@ -223,11 +227,11 @@
 #define DU_UNREGISTER_ALREADY_DELETED  0x02
 #define DU_UNREGISTER_DISCONNECTED     0x04
 #define DU_UNREGISTER_REPLY_REMOVE     0x08
-#define DU_UNREGISTER_WAKEUP           0x10
 
 typedef struct dispatch_source_type_s {
 	const char *dst_kind;
-	int16_t    dst_filter;
+	int8_t     dst_filter;
+	uint8_t    dst_per_trigger_qos : 1;
 	uint16_t   dst_flags;
 	uint32_t   dst_fflags;
 	uint32_t   dst_mask;
@@ -256,14 +260,10 @@
 extern const dispatch_source_type_s _dispatch_source_type_after;
 
 #if HAVE_MACH
-extern const dispatch_source_type_s _dispatch_source_type_mach_recv_pset;
 extern const dispatch_source_type_s _dispatch_source_type_mach_recv_direct;
-extern const dispatch_source_type_s _dispatch_source_type_mach_recv_direct_pset;
 extern const dispatch_source_type_s _dispatch_mach_type_send;
 extern const dispatch_source_type_s _dispatch_mach_type_recv;
-extern const dispatch_source_type_s _dispatch_mach_type_recv_pset;
 extern const dispatch_source_type_s _dispatch_mach_type_reply;
-extern const dispatch_source_type_s _dispatch_mach_type_reply_pset;
 extern const dispatch_source_type_s _dispatch_xpc_type_sigterm;
 #endif
 
@@ -282,13 +282,17 @@
 #define DISPATCH_DEFERRED_ITEMS_EVENT_COUNT 16
 
 typedef struct dispatch_deferred_items_s {
-#define DISPATCH_PRIORITY_NOSTASH ((dispatch_priority_t)~0u)
-	dispatch_priority_t ddi_stashed_pri;
 	dispatch_queue_t ddi_stashed_rq;
-	dispatch_queue_t ddi_stashed_dq;
+	dispatch_object_t ddi_stashed_dou;
+	dispatch_qos_t ddi_stashed_qos;
 #if DISPATCH_EVENT_BACKEND_KEVENT
-	int ddi_nevents;
-	dispatch_kevent_s ddi_eventlist[DISPATCH_DEFERRED_ITEMS_EVENT_COUNT];
+	dispatch_kevent_t ddi_eventlist;
+	uint16_t ddi_nevents;
+	uint16_t ddi_maxevents;
+	bool     ddi_can_stash;
+	uint16_t ddi_wlh_needs_delete : 1;
+	uint16_t ddi_wlh_needs_update : 1;
+	uint16_t ddi_wlh_servicing : 1;
 #endif
 } dispatch_deferred_items_s, *dispatch_deferred_items_t;
 
@@ -333,32 +337,6 @@
 	_dispatch_thread_setspecific(dispatch_r2k_key, (void *)0);
 }
 
-DISPATCH_ALWAYS_INLINE DISPATCH_PURE
-static inline dispatch_wlh_t
-_dispatch_get_wlh(void)
-{
-	return _dispatch_thread_getspecific(dispatch_wlh_key);
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline void
-_dispatch_set_wlh(dispatch_wlh_t wlh)
-{
-	dispatch_assert(_dispatch_get_wlh() == NULL);
-	dispatch_assert(wlh);
-	_dispatch_debug("wlh[%p]: set current ", wlh);
-	_dispatch_thread_setspecific(dispatch_wlh_key, (void *)wlh);
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline void
-_dispatch_reset_wlh(void)
-{
-	_dispatch_debug("wlh[%p]: clear current ", _dispatch_get_wlh());
-	_dispatch_thread_setspecific(dispatch_wlh_key, NULL);
-	_dispatch_clear_return_to_kernel();
-}
-
 DISPATCH_ALWAYS_INLINE
 static inline bool
 _dispatch_unote_registered(dispatch_unote_t du)
@@ -367,6 +345,14 @@
 }
 
 DISPATCH_ALWAYS_INLINE
+static inline bool
+_dispatch_unote_wlh_changed(dispatch_unote_t du, dispatch_wlh_t expected_wlh)
+{
+	dispatch_wlh_t wlh = du._du->du_wlh;
+	return wlh && wlh != DISPATCH_WLH_ANON && wlh != expected_wlh;
+}
+
+DISPATCH_ALWAYS_INLINE
 static inline dispatch_unote_linkage_t
 _dispatch_unote_get_linkage(dispatch_unote_t du)
 {
@@ -433,14 +419,29 @@
 void _dispatch_unote_dispose(dispatch_unote_t du);
 
 void _dispatch_event_loop_atfork_child(void);
-void _dispatch_event_loop_init(void);
-void _dispatch_event_loop_poke(dispatch_wlh_t wlh, dispatch_priority_t pri,
+#define DISPATCH_EVENT_LOOP_CONSUME_2 DISPATCH_WAKEUP_CONSUME_2
+#define DISPATCH_EVENT_LOOP_OVERRIDE  0x80000000
+void _dispatch_event_loop_poke(dispatch_wlh_t wlh, uint64_t dq_state,
 		uint32_t flags);
-void _dispatch_event_loop_drain(uint32_t flags);
+void _dispatch_event_loop_wake_owner(struct dispatch_sync_context_s *dsc,
+		dispatch_wlh_t wlh, uint64_t old_state, uint64_t new_state);
+void _dispatch_event_loop_wait_for_ownership(
+		struct dispatch_sync_context_s *dsc);
+void _dispatch_event_loop_end_ownership(dispatch_wlh_t wlh,
+		uint64_t old_state, uint64_t new_state, uint32_t flags);
+#if DISPATCH_WLH_DEBUG
+void _dispatch_event_loop_assert_not_owned(dispatch_wlh_t wlh);
+#else
+#undef _dispatch_event_loop_assert_not_owned
+#define _dispatch_event_loop_assert_not_owned(wlh) ((void)wlh)
+#endif
+void _dispatch_event_loop_leave_immediate(dispatch_wlh_t wlh, uint64_t dq_state);
 #if DISPATCH_EVENT_BACKEND_KEVENT
-void _dispatch_event_loop_update(void);
+void _dispatch_event_loop_leave_deferred(dispatch_wlh_t wlh,
+		uint64_t dq_state);
 void _dispatch_event_loop_merge(dispatch_kevent_t events, int nevents);
 #endif
+void _dispatch_event_loop_drain(uint32_t flags);
 void _dispatch_event_loop_timer_arm(unsigned int tidx,
 		dispatch_timer_delay_s range, dispatch_clock_now_cache_t nows);
 void _dispatch_event_loop_timer_delete(unsigned int tidx);
diff --git a/src/event/event_kevent.c b/src/event/event_kevent.c
index 3275888..c15a397 100644
--- a/src/event/event_kevent.c
+++ b/src/event/event_kevent.c
@@ -30,6 +30,7 @@
 #endif
 
 #define DISPATCH_KEVENT_MUXED_MARKER  1ul
+#define DISPATCH_MACH_AUDIT_TOKEN_PID (5)
 
 typedef struct dispatch_muxnote_s {
 	TAILQ_ENTRY(dispatch_muxnote_s) dmn_list;
@@ -38,6 +39,7 @@
 	dispatch_kevent_s dmn_kev;
 } *dispatch_muxnote_t;
 
+static bool _dispatch_timers_force_max_leeway;
 static int _dispatch_kq = -1;
 static struct {
 	dispatch_once_t pred;
@@ -77,7 +79,6 @@
 };
 
 static void _dispatch_kevent_timer_drain(dispatch_kevent_t ke);
-static void _dispatch_kevent_poke_drain(dispatch_kevent_t ke);
 
 #pragma mark -
 #pragma mark kevent debug
@@ -220,7 +221,12 @@
 #define _dispatch_kevent_mgr_debug(verb, kev) _dispatch_kevent_debug(verb, kev)
 #else
 #define _dispatch_kevent_mgr_debug(verb, kev) ((void)verb, (void)kev)
-#endif
+#endif // DISPATCH_MGR_QUEUE_DEBUG
+#if DISPATCH_WLH_DEBUG
+#define _dispatch_kevent_wlh_debug(verb, kev) _dispatch_kevent_debug(verb, kev)
+#else
+#define _dispatch_kevent_wlh_debug(verb, kev)  ((void)verb, (void)kev)
+#endif // DISPATCH_WLH_DEBUG
 
 #if DISPATCH_MACHPORT_DEBUG
 #ifndef MACH_PORT_TYPE_SPREQUEST
@@ -305,9 +311,6 @@
 	return (mach_msg_size_t)ke->ext[1];
 }
 
-#if DISPATCH_EVFILT_MACHPORT_PORTSET_FALLBACK
-static void _dispatch_mach_kevent_portset_drain(dispatch_kevent_t ke);
-#endif
 static void _dispatch_kevent_mach_msg_drain(dispatch_kevent_t ke);
 static inline void _dispatch_mach_host_calendar_change_register(void);
 
@@ -342,20 +345,14 @@
 static void
 _dispatch_kevent_print_error(dispatch_kevent_t ke)
 {
-	dispatch_kevent_t kev = NULL;
-
+	_dispatch_debug("kevent[0x%llx]: handling error",
+			(unsigned long long)ke->udata);
 	if (ke->flags & EV_DELETE) {
 		if (ke->flags & EV_UDATA_SPECIFIC) {
 			if (ke->data == EINPROGRESS) {
 				// deferred EV_DELETE
 				return;
 			}
-#if DISPATCH_KEVENT_TREAT_ENOENT_AS_EINPROGRESS
-			if (ke->data == ENOENT) {
-				// deferred EV_DELETE
-				return;
-			}
-#endif
 		}
 		// for EV_DELETE if the update was deferred we may have reclaimed
 		// the udata already, and it is unsafe to dereference it now.
@@ -369,8 +366,7 @@
 
 #if HAVE_MACH
 	if (ke->filter == EVFILT_MACHPORT && ke->data == ENOTSUP &&
-			(ke->flags & EV_ADD) && _dispatch_evfilt_machport_direct_enabled &&
-			kev && (kev->fflags & MACH_RCV_MSG)) {
+			(ke->flags & EV_ADD) && (ke->fflags & MACH_RCV_MSG)) {
 		DISPATCH_INTERNAL_CRASH(ke->ident,
 				"Missing EVFILT_MACHPORT support for ports");
 	}
@@ -438,7 +434,7 @@
 {
 	if (ke->filter == EVFILT_USER) {
 		_dispatch_kevent_mgr_debug("received", ke);
-		return _dispatch_kevent_poke_drain(ke);
+		return;
 	}
 	_dispatch_kevent_debug("received", ke);
 	if (unlikely(ke->flags & EV_ERROR)) {
@@ -452,8 +448,6 @@
 			ke->data = 0;
 			_dispatch_kevent_debug("synthetic NOTE_EXIT", ke);
 		} else {
-			_dispatch_debug("kevent[0x%llx]: handling error",
-					(unsigned long long)ke->udata);
 			return _dispatch_kevent_print_error(ke);
 		}
 	}
@@ -463,11 +457,6 @@
 
 #if HAVE_MACH
 	if (ke->filter == EVFILT_MACHPORT) {
-#if DISPATCH_EVFILT_MACHPORT_PORTSET_FALLBACK
-		if (ke->udata == 0) {
-			return _dispatch_mach_kevent_portset_drain(ke);
-		}
-#endif
 		if (_dispatch_kevent_mach_msg_size(ke)) {
 			return _dispatch_kevent_mach_msg_drain(ke);
 		}
@@ -533,31 +522,30 @@
 #endif
 
 static void
-_dispatch_kq_init(void *context DISPATCH_UNUSED)
+_dispatch_kq_init(void *context)
 {
+	bool *kq_initialized = context;
+
 	_dispatch_fork_becomes_unsafe();
+	if (unlikely(getenv("LIBDISPATCH_TIMERS_FORCE_MAX_LEEWAY"))) {
+		_dispatch_timers_force_max_leeway = true;
+	}
+	*kq_initialized = true;
+
 #if DISPATCH_USE_KEVENT_WORKQUEUE
 	_dispatch_kevent_workqueue_init();
 	if (_dispatch_kevent_workqueue_enabled) {
 		int r;
 		int kqfd = _dispatch_kq;
-		const dispatch_kevent_s kev[] = {
-			[0] = {
-				.ident = 1,
-				.filter = EVFILT_USER,
-				.flags = EV_ADD|EV_CLEAR,
-				.qos = _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG,
-				.udata = (uintptr_t)DISPATCH_WLH_MANAGER,
-			},
-			[1] = {
-				.ident = 1,
-				.filter = EVFILT_USER,
-				.fflags = NOTE_TRIGGER,
-				.udata = (uintptr_t)DISPATCH_WLH_MANAGER,
-			},
+		const dispatch_kevent_s ke = {
+			.ident = 1,
+			.filter = EVFILT_USER,
+			.flags = EV_ADD|EV_CLEAR,
+			.qos = _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG,
+			.udata = (uintptr_t)DISPATCH_WLH_MANAGER,
 		};
 retry:
-		r = kevent_qos(kqfd, kev, 2, NULL, 0, NULL, NULL,
+		r = kevent_qos(kqfd, &ke, 1, NULL, 0, NULL, NULL,
 				KEVENT_FLAG_WORKQ|KEVENT_FLAG_IMMEDIATE);
 		if (unlikely(r == -1)) {
 			int err = errno;
@@ -579,88 +567,116 @@
 #endif // DISPATCH_USE_MGR_THREAD
 }
 
+#if DISPATCH_USE_MEMORYPRESSURE_SOURCE
+static void _dispatch_memorypressure_init(void);
+#else
+#define _dispatch_memorypressure_init() ((void)0)
+#endif
+
 DISPATCH_NOINLINE
 static int
-_dispatch_kq_update(dispatch_wlh_t wlh, dispatch_kevent_t ke, int n,
+_dispatch_kq_poll(dispatch_wlh_t wlh, dispatch_kevent_t ke, int n,
+		dispatch_kevent_t ke_out, int n_out, void *buf, size_t *avail,
 		uint32_t flags)
 {
 	static dispatch_once_t pred;
-	dispatch_once_f(&pred, NULL, _dispatch_kq_init);
+	bool kq_initialized = false;
+	int r = 0;
 
-	dispatch_kevent_s ke_out[DISPATCH_DEFERRED_ITEMS_EVENT_COUNT];
-	int i, out_n = countof(ke_out), r = 0;
-#if DISPATCH_USE_KEVENT_QOS
-	size_t size, *avail = NULL;
-	void *buf = NULL;
-#endif
-
-#if DISPATCH_DEBUG
-	dispatch_assert(wlh);
-	dispatch_assert((size_t)n <= countof(ke_out));
-	for (i = 0; i < n; i++) {
-		if (ke[i].filter != EVFILT_USER || DISPATCH_MGR_QUEUE_DEBUG) {
-			_dispatch_kevent_debug_n(NULL, ke + i, i, n);
-		}
+	dispatch_once_f(&pred, &kq_initialized, _dispatch_kq_init);
+	if (unlikely(kq_initialized)) {
+		// The calling thread was the one doing the initialization
+		//
+		// The event loop needs the memory pressure source and debug channel,
+		// however creating these will recursively call _dispatch_kq_poll(),
+		// so we can't quite initialize them under the dispatch once.
+		_dispatch_memorypressure_init();
+		_voucher_activity_debug_channel_init();
 	}
-#endif
 
-	wlh = DISPATCH_WLH_GLOBAL;
 
-	if (flags & KEVENT_FLAG_ERROR_EVENTS) {
 #if !DISPATCH_USE_KEVENT_QOS
+	if (flags & KEVENT_FLAG_ERROR_EVENTS) {
 		// emulate KEVENT_FLAG_ERROR_EVENTS
-		for (i = 0; i < n; i++) {
-			ke[i].flags |= EV_RECEIPT;
+		for (r = 0; r < n; r++) {
+			ke[r].flags |= EV_RECEIPT;
 		}
 		out_n = n;
-#endif
-	} else {
-#if DISPATCH_USE_KEVENT_QOS
-		size = DISPATCH_MACH_RECEIVE_MAX_INLINE_MESSAGE_SIZE +
-				DISPATCH_MACH_TRAILER_SIZE;
-		buf = alloca(size);
-		avail = &size;
-#endif
 	}
+#endif
 
 retry:
-	_dispatch_clear_return_to_kernel();
-	if (wlh == DISPATCH_WLH_GLOBAL) {
+	if (wlh == DISPATCH_WLH_ANON) {
 		int kqfd = _dispatch_kq;
 #if DISPATCH_USE_KEVENT_QOS
 		if (_dispatch_kevent_workqueue_enabled) {
 			flags |= KEVENT_FLAG_WORKQ;
 		}
-		r = kevent_qos(kqfd, ke, n, ke_out, out_n, buf, avail, flags);
+		r = kevent_qos(kqfd, ke, n, ke_out, n_out, buf, avail, flags);
 #else
 		const struct timespec timeout_immediately = {}, *timeout = NULL;
 		if (flags & KEVENT_FLAG_IMMEDIATE) timeout = &timeout_immediately;
-		r = kevent(kqfd, ke, n, ke_out, out_n, timeout);
+		r = kevent(kqfd, ke, n, ke_out, n_out, timeout);
 #endif
 	}
 	if (unlikely(r == -1)) {
 		int err = errno;
 		switch (err) {
+		case ENOMEM:
+			_dispatch_temporary_resource_shortage();
+			/* FALLTHROUGH */
 		case EINTR:
 			goto retry;
 		case EBADF:
 			DISPATCH_CLIENT_CRASH(err, "Do not close random Unix descriptors");
-			break;
 		default:
-			(void)dispatch_assume_zero(err);
-			break;
+			DISPATCH_CLIENT_CRASH(err, "Unexpected error from kevent");
 		}
-		return err;
 	}
+	return r;
+}
 
-	if (flags & KEVENT_FLAG_ERROR_EVENTS) {
-		for (i = 0, n = r, r = 0; i < n; i++) {
+DISPATCH_NOINLINE
+static int
+_dispatch_kq_drain(dispatch_wlh_t wlh, dispatch_kevent_t ke, int n,
+		uint32_t flags)
+{
+	dispatch_kevent_s ke_out[DISPATCH_DEFERRED_ITEMS_EVENT_COUNT];
+	bool poll_for_events = !(flags & KEVENT_FLAG_ERROR_EVENTS);
+	int i, n_out = countof(ke_out), r = 0;
+	size_t *avail = NULL;
+	void *buf = NULL;
+
+#if DISPATCH_USE_KEVENT_QOS
+	size_t size;
+	if (poll_for_events) {
+		size = DISPATCH_MACH_RECEIVE_MAX_INLINE_MESSAGE_SIZE +
+				DISPATCH_MACH_TRAILER_SIZE;
+		buf = alloca(size);
+		avail = &size;
+	}
+#endif
+
+#if DISPATCH_DEBUG
+	for (r = 0; r < n; r++) {
+		if (ke[r].filter != EVFILT_USER || DISPATCH_MGR_QUEUE_DEBUG) {
+			_dispatch_kevent_debug_n(NULL, ke + r, r, n);
+		}
+	}
+#endif
+
+	if (poll_for_events) _dispatch_clear_return_to_kernel();
+	n = _dispatch_kq_poll(wlh, ke, n, ke_out, n_out, buf, avail, flags);
+	if (n == 0) {
+		r = 0;
+	} else if (flags & KEVENT_FLAG_ERROR_EVENTS) {
+		for (i = 0, r = 0; i < n; i++) {
 			if ((ke_out[i].flags & EV_ERROR) && (r = (int)ke_out[i].data)) {
 				_dispatch_kevent_drain(&ke_out[i]);
 			}
 		}
 	} else {
-		for (i = 0, n = r, r = 0; i < n; i++) {
+		for (i = 0, r = 0; i < n; i++) {
 			_dispatch_kevent_drain(&ke_out[i]);
 		}
 	}
@@ -671,7 +687,7 @@
 static inline int
 _dispatch_kq_update_one(dispatch_wlh_t wlh, dispatch_kevent_t ke)
 {
-	return _dispatch_kq_update(wlh, ke, 1,
+	return _dispatch_kq_drain(wlh, ke, 1,
 			KEVENT_FLAG_IMMEDIATE | KEVENT_FLAG_ERROR_EVENTS);
 }
 
@@ -679,7 +695,7 @@
 static inline void
 _dispatch_kq_update_all(dispatch_wlh_t wlh, dispatch_kevent_t ke, int n)
 {
-	(void)_dispatch_kq_update(wlh, ke, n,
+	(void)_dispatch_kq_drain(wlh, ke, n,
 			KEVENT_FLAG_IMMEDIATE | KEVENT_FLAG_ERROR_EVENTS);
 }
 
@@ -731,8 +747,8 @@
 _dispatch_kq_deferred_reuse_slot(dispatch_wlh_t wlh,
 		dispatch_deferred_items_t ddi, int slot)
 {
-	if (wlh != DISPATCH_WLH_GLOBAL) _dispatch_set_return_to_kernel();
-	if (unlikely(slot == countof(ddi->ddi_eventlist))) {
+	if (wlh != DISPATCH_WLH_ANON) _dispatch_set_return_to_kernel();
+	if (unlikely(slot == ddi->ddi_maxevents)) {
 		int nevents = ddi->ddi_nevents;
 		ddi->ddi_nevents = 1;
 		_dispatch_kq_update_all(wlh, ddi->ddi_eventlist, nevents);
@@ -762,13 +778,13 @@
 {
 	dispatch_deferred_items_t ddi = _dispatch_deferred_items_get();
 
-	if (ddi && wlh == _dispatch_get_wlh()) {
+	if (ddi && ddi->ddi_maxevents && wlh == _dispatch_get_wlh()) {
 		int slot = _dispatch_kq_deferred_find_slot(ddi, ke->filter, ke->ident,
 				ke->udata);
 		dispatch_kevent_t dk = _dispatch_kq_deferred_reuse_slot(wlh, ddi, slot);
 		*dk = *ke;
-		if (ke->filter != EVFILT_USER || DISPATCH_MGR_QUEUE_DEBUG) {
-			_dispatch_kevent_debug("deferred", ke);
+		if (ke->filter != EVFILT_USER) {
+			_dispatch_kevent_mgr_debug("deferred", ke);
 		}
 	} else {
 		_dispatch_kq_update_one(wlh, ke);
@@ -800,7 +816,9 @@
 
 	if (action_flags & EV_ADD) {
 		// as soon as we register we may get an event delivery and it has to
-		// see this bit already set, else it will not unregister the kevent
+		// see du_wlh already set, else it will not unregister the kevent
+		dispatch_assert(du->du_wlh == NULL);
+		_dispatch_wlh_retain(wlh);
 		du->du_wlh = wlh;
 	}
 
@@ -834,6 +852,7 @@
 done:
 	if (action_flags & EV_ADD) {
 		if (unlikely(r)) {
+			_dispatch_wlh_release(du->du_wlh);
 			du->du_wlh = NULL;
 		}
 		return r == 0;
@@ -842,11 +861,8 @@
 	if (action_flags & EV_DELETE) {
 		if (r == EINPROGRESS) {
 			return false;
-#if DISPATCH_KEVENT_TREAT_ENOENT_AS_EINPROGRESS
-		} else if (r == ENOENT) {
-			return false;
-#endif
 		}
+		_dispatch_wlh_release(du->du_wlh);
 		du->du_wlh = NULL;
 	}
 
@@ -913,7 +929,7 @@
 {
 	struct dispatch_muxnote_bucket_s *dmb;
 	dmb = _dispatch_muxnote_bucket(name, filter);
-	return _dispatch_muxnote_find(dmb, DISPATCH_WLH_GLOBAL, name, filter);
+	return _dispatch_muxnote_find(dmb, DISPATCH_WLH_ANON, name, filter);
 }
 
 DISPATCH_NOINLINE
@@ -971,7 +987,7 @@
 			bool armed = DISPATCH_MACH_NOTIFICATION_ARMED(&dmn->dmn_kev);
 			os_atomic_store2o(du._dmsr, dmsr_notification_armed, armed,relaxed);
 		}
-		du._du->du_wlh = DISPATCH_WLH_GLOBAL;
+		du._du->du_wlh = DISPATCH_WLH_ANON;
 	}
 	return installed;
 }
@@ -986,11 +1002,11 @@
 	case DISPATCH_EVFILT_CUSTOM_ADD:
 	case DISPATCH_EVFILT_CUSTOM_OR:
 	case DISPATCH_EVFILT_CUSTOM_REPLACE:
-		du._du->du_wlh = wlh;
+		du._du->du_wlh = DISPATCH_WLH_ANON;
 		return true;
 	}
 	if (!du._du->du_is_direct) {
-		return _dispatch_unote_register_muxed(du, DISPATCH_WLH_GLOBAL);
+		return _dispatch_unote_register_muxed(du, DISPATCH_WLH_ANON);
 	}
 	return _dispatch_kq_unote_update(wlh, du, EV_ADD | EV_ENABLE);
 }
@@ -1024,6 +1040,7 @@
 	if (dmn->dmn_kev.filter == DISPATCH_EVFILT_MACH_NOTIFICATION) {
 		os_atomic_store2o(du._dmsr, dmsr_notification_armed, false, relaxed);
 	}
+	dispatch_assert(du._du->du_wlh == DISPATCH_WLH_ANON);
 	du._du->du_wlh = NULL;
 	TAILQ_REMOVE(&dmn->dmn_unotes_head, dul, du_link);
 	_TAILQ_TRASH_ENTRY(dul, du_link);
@@ -1090,14 +1107,7 @@
 }
 
 #pragma mark -
-#pragma mark dispatch_loop
-
-#if DISPATCH_USE_MEMORYPRESSURE_SOURCE
-static void _dispatch_memorypressure_init(void);
-#else
-#define _dispatch_memorypressure_init()
-#endif
-static bool _dispatch_timers_force_max_leeway;
+#pragma mark dispatch_event_loop
 
 void
 _dispatch_event_loop_atfork_child(void)
@@ -1108,77 +1118,118 @@
 #endif
 }
 
-DISPATCH_NOINLINE
-void
-_dispatch_event_loop_init(void)
-{
-	if (unlikely(getenv("LIBDISPATCH_TIMERS_FORCE_MAX_LEEWAY"))) {
-		_dispatch_timers_force_max_leeway = true;
-	}
-	_dispatch_memorypressure_init();
-	_voucher_activity_debug_channel_init();
-}
 
 DISPATCH_NOINLINE
 void
-_dispatch_event_loop_poke(dispatch_wlh_t wlh, dispatch_priority_t pri,
-		uint32_t flags)
+_dispatch_event_loop_poke(dispatch_wlh_t wlh, uint64_t dq_state, uint32_t flags)
 {
 	if (wlh == DISPATCH_WLH_MANAGER) {
-		dispatch_assert(!flags);
-		dispatch_kevent_s ke = {
+		dispatch_kevent_s ke = (dispatch_kevent_s){
 			.ident  = 1,
 			.filter = EVFILT_USER,
 			.fflags = NOTE_TRIGGER,
 			.udata = (uintptr_t)DISPATCH_WLH_MANAGER,
 		};
-		return _dispatch_kq_deferred_update(DISPATCH_WLH_GLOBAL, &ke);
-	} else if (wlh && wlh != DISPATCH_WLH_GLOBAL) {
-		dispatch_assert(flags);
-		dispatch_assert(pri);
+		return _dispatch_kq_deferred_update(DISPATCH_WLH_ANON, &ke);
+	} else if (wlh && wlh != DISPATCH_WLH_ANON) {
+		(void)dq_state; (void)flags;
 	}
 	DISPATCH_INTERNAL_CRASH(wlh, "Unsupported wlh configuration");
 }
 
 DISPATCH_NOINLINE
-static void
-_dispatch_kevent_poke_drain(dispatch_kevent_t ke)
-{
-	dispatch_assert(ke->filter == EVFILT_USER);
-	dispatch_wlh_t wlh = (dispatch_wlh_t)ke->udata;
-	dispatch_assert(wlh);
-}
-
-DISPATCH_NOINLINE
 void
 _dispatch_event_loop_drain(uint32_t flags)
 {
 	dispatch_wlh_t wlh = _dispatch_get_wlh();
 	dispatch_deferred_items_t ddi = _dispatch_deferred_items_get();
-	int n = ddi->ddi_nevents;
-	ddi->ddi_nevents = 0;
-	_dispatch_kq_update(wlh, ddi->ddi_eventlist, n, flags);
-}
+	int n;
 
-void
-_dispatch_event_loop_update(void)
-{
-	dispatch_wlh_t wlh = _dispatch_get_wlh();
-	dispatch_deferred_items_t ddi = _dispatch_deferred_items_get();
-	int n = ddi->ddi_nevents;
+again:
+	n = ddi->ddi_nevents;
 	ddi->ddi_nevents = 0;
-	_dispatch_kq_update_all(wlh, ddi->ddi_eventlist, n);
-	dispatch_assert(ddi->ddi_nevents == 0);
-}
+	_dispatch_kq_drain(wlh, ddi->ddi_eventlist, n, flags);
 
-void
-_dispatch_event_loop_merge(dispatch_kevent_t ke, int n)
-{
-	while (n-- > 0) {
-		_dispatch_kevent_drain(ke++);
+	if ((flags & KEVENT_FLAG_IMMEDIATE) &&
+			!(flags & KEVENT_FLAG_ERROR_EVENTS) &&
+			_dispatch_needs_to_return_to_kernel()) {
+		goto again;
 	}
 }
 
+void
+_dispatch_event_loop_merge(dispatch_kevent_t events, int nevents)
+{
+	dispatch_deferred_items_t ddi = _dispatch_deferred_items_get();
+	dispatch_kevent_s kev[nevents];
+
+	// now we can re-use the whole event list, but we need to save one slot
+	// for the event loop poke
+	memcpy(kev, events, sizeof(kev));
+	ddi->ddi_maxevents = DISPATCH_DEFERRED_ITEMS_EVENT_COUNT - 1;
+
+	for (int i = 0; i < nevents; i++) {
+		_dispatch_kevent_drain(&kev[i]);
+	}
+
+	dispatch_wlh_t wlh = _dispatch_get_wlh();
+	if (wlh == DISPATCH_WLH_ANON && ddi->ddi_stashed_dou._do) {
+		if (ddi->ddi_nevents) {
+			// We will drain the stashed item and not return to the kernel
+			// right away. As a consequence, do not delay these updates.
+			_dispatch_event_loop_drain(KEVENT_FLAG_IMMEDIATE |
+					KEVENT_FLAG_ERROR_EVENTS);
+		}
+		_dispatch_trace_continuation_push(ddi->ddi_stashed_rq,
+				ddi->ddi_stashed_dou);
+	}
+}
+
+void
+_dispatch_event_loop_leave_immediate(dispatch_wlh_t wlh, uint64_t dq_state)
+{
+	(void)wlh; (void)dq_state;
+}
+
+void
+_dispatch_event_loop_leave_deferred(dispatch_wlh_t wlh, uint64_t dq_state)
+{
+	(void)wlh; (void)dq_state;
+}
+
+void
+_dispatch_event_loop_wake_owner(dispatch_sync_context_t dsc,
+		dispatch_wlh_t wlh, uint64_t old_state, uint64_t new_state)
+{
+	(void)dsc; (void)wlh; (void)old_state; (void)new_state;
+}
+
+void
+_dispatch_event_loop_wait_for_ownership(dispatch_sync_context_t dsc)
+{
+	if (dsc->dsc_release_storage) {
+		_dispatch_queue_release_storage(dsc->dc_data);
+	}
+}
+
+void
+_dispatch_event_loop_end_ownership(dispatch_wlh_t wlh, uint64_t old_state,
+		uint64_t new_state, uint32_t flags)
+{
+	(void)wlh; (void)old_state; (void)new_state; (void)flags;
+}
+
+#if DISPATCH_WLH_DEBUG
+void
+_dispatch_event_loop_assert_not_owned(dispatch_wlh_t wlh)
+{
+	(void)wlh;
+}
+#endif // DISPATCH_WLH_DEBUG
+
+#pragma mark -
+#pragma mark dispatch_event_loop timers
+
 #define DISPATCH_KEVENT_TIMEOUT_IDENT_MASK (~0ull << 8)
 
 DISPATCH_NOINLINE
@@ -1219,7 +1270,7 @@
 #endif
 	};
 
-	_dispatch_kq_deferred_update(DISPATCH_WLH_GLOBAL, &ke);
+	_dispatch_kq_deferred_update(DISPATCH_WLH_ANON, &ke);
 }
 
 void
@@ -1251,6 +1302,7 @@
 	_dispatch_event_loop_timer_program(tidx, 0, 0, EV_DELETE);
 }
 
+#pragma mark -
 #pragma mark kevent specific sources
 
 static dispatch_unote_t
@@ -1364,12 +1416,16 @@
 		DISPATCH_MEMORYPRESSURE_WARN | \
 		DISPATCH_MEMORYPRESSURE_CRITICAL | \
 		DISPATCH_MEMORYPRESSURE_PROC_LIMIT_WARN | \
-		DISPATCH_MEMORYPRESSURE_PROC_LIMIT_CRITICAL)
+		DISPATCH_MEMORYPRESSURE_PROC_LIMIT_CRITICAL | \
+		DISPATCH_MEMORYPRESSURE_MSL_STATUS)
+
 #define DISPATCH_MEMORYPRESSURE_MALLOC_MASK ( \
 		DISPATCH_MEMORYPRESSURE_WARN | \
 		DISPATCH_MEMORYPRESSURE_CRITICAL | \
 		DISPATCH_MEMORYPRESSURE_PROC_LIMIT_WARN | \
-		DISPATCH_MEMORYPRESSURE_PROC_LIMIT_CRITICAL)
+		DISPATCH_MEMORYPRESSURE_PROC_LIMIT_CRITICAL | \
+		DISPATCH_MEMORYPRESSURE_MSL_STATUS)
+
 
 static void
 _dispatch_memorypressure_handler(void *context)
@@ -1409,8 +1465,7 @@
 {
 	dispatch_source_t ds = dispatch_source_create(
 			DISPATCH_SOURCE_TYPE_MEMORYPRESSURE, 0,
-			DISPATCH_MEMORYPRESSURE_SOURCE_MASK,
-			_dispatch_get_root_queue(DISPATCH_QOS_DEFAULT, true));
+			DISPATCH_MEMORYPRESSURE_SOURCE_MASK, &_dispatch_mgr_q);
 	dispatch_set_context(ds, ds);
 	dispatch_source_set_event_handler_f(ds, _dispatch_memorypressure_handler);
 	dispatch_activate(ds);
@@ -1460,7 +1515,8 @@
 	.dst_mask       = NOTE_MEMORYSTATUS_PRESSURE_NORMAL
 			|NOTE_MEMORYSTATUS_PRESSURE_WARN|NOTE_MEMORYSTATUS_PRESSURE_CRITICAL
 			|NOTE_MEMORYSTATUS_LOW_SWAP|NOTE_MEMORYSTATUS_PROC_LIMIT_WARN
-			|NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL,
+			|NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL
+			|NOTE_MEMORYSTATUS_MSL_STATUS,
 	.dst_size       = sizeof(struct dispatch_source_refs_s),
 
 #if TARGET_OS_SIMULATOR
@@ -1523,15 +1579,42 @@
 	}
 }
 
+static mach_msg_audit_trailer_t *
+_dispatch_mach_msg_get_audit_trailer(mach_msg_header_t *hdr)
+{
+	mach_msg_trailer_t *tlr = NULL;
+	mach_msg_audit_trailer_t *audit_tlr = NULL;
+	tlr = (mach_msg_trailer_t *)((unsigned char *)hdr +
+			round_msg(hdr->msgh_size));
+	// The trailer should always be of format zero.
+	if (tlr->msgh_trailer_type == MACH_MSG_TRAILER_FORMAT_0) {
+		if (tlr->msgh_trailer_size >= sizeof(mach_msg_audit_trailer_t)) {
+			audit_tlr = (mach_msg_audit_trailer_t *)tlr;
+		}
+	}
+	return audit_tlr;
+}
+
 DISPATCH_NOINLINE
 static void
 _dispatch_mach_notify_source_invoke(mach_msg_header_t *hdr)
 {
 	mig_reply_error_t reply;
+	mach_msg_audit_trailer_t *tlr = NULL;
 	dispatch_assert(sizeof(mig_reply_error_t) == sizeof(union
 		__ReplyUnion___dispatch_libdispatch_internal_protocol_subsystem));
 	dispatch_assert(sizeof(mig_reply_error_t) <
 			DISPATCH_MACH_RECEIVE_MAX_INLINE_MESSAGE_SIZE);
+	tlr = _dispatch_mach_msg_get_audit_trailer(hdr);
+	if (!tlr) {
+		DISPATCH_INTERNAL_CRASH(0, "message received without expected trailer");
+	}
+	if (tlr->msgh_audit.val[DISPATCH_MACH_AUDIT_TOKEN_PID] != 0) {
+		(void)dispatch_assume_zero(
+				tlr->msgh_audit.val[DISPATCH_MACH_AUDIT_TOKEN_PID]);
+		mach_msg_destroy(hdr);
+		return;
+	}
 	boolean_t success = libdispatch_internal_protocol_server(hdr, &reply.Head);
 	if (!success && reply.RetCode == MIG_BAD_ID &&
 			(hdr->msgh_id == HOST_CALENDAR_SET_REPLYID ||
@@ -1756,7 +1839,7 @@
 
 	_dispatch_debug_machport(name);
 	dmn = _dispatch_mach_muxnote_find(name, DISPATCH_EVFILT_MACH_NOTIFICATION);
-	if (!dispatch_assume(dmn)) {
+	if (!dmn) {
 		return;
 	}
 
@@ -1905,204 +1988,6 @@
 #pragma mark mach recv / reply
 #if HAVE_MACH
 
-#if DISPATCH_EVFILT_MACHPORT_PORTSET_FALLBACK
-static mach_port_t _dispatch_mach_portset,  _dispatch_mach_recv_portset;
-static dispatch_kevent_s _dispatch_mach_recv_kevent;
-
-static void
-_dispatch_mach_portset_init(void *context DISPATCH_UNUSED)
-{
-	kern_return_t kr = mach_port_allocate(mach_task_self(),
-			MACH_PORT_RIGHT_PORT_SET, &_dispatch_mach_portset);
-	DISPATCH_VERIFY_MIG(kr);
-	if (unlikely(kr)) {
-		DISPATCH_CLIENT_CRASH(kr,
-				"mach_port_allocate() failed: cannot create port set");
-	}
-
-	dispatch_kevent_s kev = {
-		.filter = EVFILT_MACHPORT,
-		.flags  = EV_ADD|EV_ENABLE,
-		.ident  = _dispatch_mach_portset,
-		.qos    = _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG,
-	};
-	_dispatch_kq_deferred_update(DISPATCH_WLH_GLOBAL, &kev);
-}
-
-static bool
-_dispatch_mach_portset_update(mach_port_t mp, mach_port_t mps)
-{
-	kern_return_t kr;
-
-	_dispatch_debug_machport(mp);
-	kr = mach_port_move_member(mach_task_self(), mp, mps);
-	if (unlikely(kr)) {
-		DISPATCH_VERIFY_MIG(kr);
-		switch (kr) {
-		case KERN_INVALID_RIGHT:
-			if (mps) {
-				_dispatch_bug_mach_client("_dispatch_kevent_machport_enable: "
-						"mach_port_move_member() failed ", kr);
-				break;
-			}
-			//fall through
-		case KERN_INVALID_NAME:
-#if DISPATCH_DEBUG
-			_dispatch_log("Corruption: Mach receive right 0x%x destroyed "
-					"prematurely", mp);
-#endif
-			break;
-		default:
-			(void)dispatch_assume_zero(kr);
-			break;
-		}
-	}
-	if (mps) {
-		return kr == KERN_SUCCESS;
-	}
-	return true;
-}
-
-static mach_port_t
-_dispatch_mach_get_portset(void)
-{
-	static dispatch_once_t pred;
-	dispatch_once_f(&pred, NULL, _dispatch_mach_portset_init);
-	return _dispatch_mach_portset;
-}
-
-static bool
-_dispatch_mach_recv_update_portset_mux(dispatch_muxnote_t dmn)
-{
-	mach_port_t mp = (mach_port_t)dmn->dmn_kev.ident;
-	mach_port_t mps = MACH_PORT_NULL;
-	if (!(dmn->dmn_kev.flags & EV_DELETE)) {
-		mps = _dispatch_mach_get_portset();
-	}
-	return _dispatch_mach_portset_update(mp, mps);
-}
-
-static void
-_dispatch_mach_recv_msg_buf_init(dispatch_kevent_t ke)
-{
-	mach_vm_size_t vm_size = mach_vm_round_page(
-			DISPATCH_MACH_RECEIVE_MAX_INLINE_MESSAGE_SIZE +
-			DISPATCH_MACH_TRAILER_SIZE);
-	mach_vm_address_t vm_addr = vm_page_size;
-	kern_return_t kr;
-
-	while (unlikely(kr = mach_vm_allocate(mach_task_self(), &vm_addr, vm_size,
-			VM_FLAGS_ANYWHERE))) {
-		if (kr != KERN_NO_SPACE) {
-			DISPATCH_CLIENT_CRASH(kr,
-					"Could not allocate mach msg receive buffer");
-		}
-		_dispatch_temporary_resource_shortage();
-		vm_addr = vm_page_size;
-	}
-	ke->ext[0] = (uintptr_t)vm_addr;
-	ke->ext[1] = vm_size;
-}
-
-static void
-_dispatch_mach_recv_portset_init(void *context DISPATCH_UNUSED)
-{
-	kern_return_t kr = mach_port_allocate(mach_task_self(),
-			MACH_PORT_RIGHT_PORT_SET, &_dispatch_mach_recv_portset);
-	DISPATCH_VERIFY_MIG(kr);
-	if (unlikely(kr)) {
-		DISPATCH_CLIENT_CRASH(kr,
-				"mach_port_allocate() failed: cannot create port set");
-	}
-
-	dispatch_assert(DISPATCH_MACH_TRAILER_SIZE ==
-			REQUESTED_TRAILER_SIZE_NATIVE(MACH_RCV_TRAILER_ELEMENTS(
-			DISPATCH_MACH_RCV_TRAILER)));
-
-	_dispatch_mach_recv_kevent = (dispatch_kevent_s){
-		.filter = EVFILT_MACHPORT,
-		.ident  = _dispatch_mach_recv_portset,
-		.flags  = EV_ADD|EV_ENABLE|EV_DISPATCH,
-		.fflags = DISPATCH_MACH_RCV_OPTIONS,
-		.qos    = _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG,
-	};
-	if (!_dispatch_kevent_workqueue_enabled) {
-		_dispatch_mach_recv_msg_buf_init(&_dispatch_mach_recv_kevent);
-	}
-	_dispatch_kq_deferred_update(DISPATCH_WLH_GLOBAL,
-			&_dispatch_mach_recv_kevent);
-}
-
-static mach_port_t
-_dispatch_mach_get_recv_portset(void)
-{
-	static dispatch_once_t pred;
-	dispatch_once_f(&pred, NULL, _dispatch_mach_recv_portset_init);
-	return _dispatch_mach_recv_portset;
-}
-
-static bool
-_dispatch_mach_recv_direct_update_portset_mux(dispatch_muxnote_t dmn)
-{
-	mach_port_t mp = (mach_port_t)dmn->dmn_kev.ident;
-	mach_port_t mps = MACH_PORT_NULL;
-	if (!(dmn->dmn_kev.flags & EV_DELETE)) {
-		mps = _dispatch_mach_get_recv_portset();
-	}
-	return _dispatch_mach_portset_update(mp, mps);
-}
-
-static dispatch_unote_t
-_dispatch_mach_kevent_mach_recv_direct_find(mach_port_t name)
-{
-	dispatch_muxnote_t dmn;
-	dispatch_unote_linkage_t dul;
-
-	dmn = _dispatch_mach_muxnote_find(name, EVFILT_MACHPORT);
-	TAILQ_FOREACH(dul, &dmn->dmn_unotes_head, du_link) {
-		dispatch_unote_t du = _dispatch_unote_linkage_get_unote(dul);
-		if (du._du->du_type->dst_fflags & MACH_RCV_MSG) {
-			return du;
-		}
-	}
-	return DISPATCH_UNOTE_NULL;
-}
-
-DISPATCH_NOINLINE
-static void
-_dispatch_mach_kevent_portset_merge(dispatch_kevent_t ke)
-{
-	mach_port_t name = (mach_port_name_t)ke->data;
-	dispatch_unote_linkage_t dul, dul_next;
-	dispatch_muxnote_t dmn;
-
-	_dispatch_debug_machport(name);
-	dmn = _dispatch_mach_muxnote_find(name, EVFILT_MACHPORT);
-	if (!dispatch_assume(dmn)) {
-		return;
-	}
-	_dispatch_mach_portset_update(name, MACH_PORT_NULL); // emulate EV_DISPATCH
-
-	TAILQ_FOREACH_SAFE(dul, &dmn->dmn_unotes_head, du_link, dul_next) {
-		dispatch_unote_t du = _dispatch_unote_linkage_get_unote(dul);
-		dux_merge_evt(du._du, EV_ENABLE | EV_DISPATCH,
-				DISPATCH_MACH_RECV_MESSAGE, 0, 0);
-	}
-}
-
-DISPATCH_NOINLINE
-static void
-_dispatch_mach_kevent_portset_drain(dispatch_kevent_t ke)
-{
-	if (ke->ident == _dispatch_mach_recv_portset) {
-		return _dispatch_kevent_mach_msg_drain(ke);
-	} else {
-		dispatch_assert(ke->ident == _dispatch_mach_portset);
-		return _dispatch_mach_kevent_portset_merge(ke);
-	}
-}
-#endif // DISPATCH_EVFILT_MACHPORT_PORTSET_FALLBACK
-
 static void
 _dispatch_kevent_mach_msg_recv(dispatch_unote_t du, uint32_t flags,
 		mach_msg_header_t *hdr)
@@ -2119,11 +2004,6 @@
 				"received message with MACH_PORT_NULL port");
 	} else {
 		_dispatch_debug_machport(name);
-#if DISPATCH_EVFILT_MACHPORT_PORTSET_FALLBACK
-		if (du._du == NULL) {
-			du = _dispatch_mach_kevent_mach_recv_direct_find(name);
-		}
-#endif
 		if (likely(du._du)) {
 			return dux_merge_msg(du._du, flags, hdr, siz);
 		}
@@ -2194,25 +2074,6 @@
 		_dispatch_bug_mach_client("_dispatch_kevent_mach_msg_drain: "
 				"message reception failed", kr);
 	}
-
-#if DISPATCH_EVFILT_MACHPORT_PORTSET_FALLBACK
-	if (!(flags & EV_UDATA_SPECIFIC)) {
-		_dispatch_kq_deferred_update(DISPATCH_WLH_GLOBAL,
-				&_dispatch_mach_recv_kevent);
-	}
-#endif
-}
-
-static dispatch_unote_t
-_dispatch_source_mach_recv_create(dispatch_source_type_t dst,
-		uintptr_t handle, unsigned long mask)
-{
-#if DISPATCH_EVFILT_MACHPORT_PORTSET_FALLBACK
-	if (!_dispatch_evfilt_machport_direct_enabled) {
-		dst = &_dispatch_source_type_mach_recv_pset;
-	}
-#endif
-	return _dispatch_unote_create_with_handle(dst, handle, mask);
 }
 
 const dispatch_source_type_s _dispatch_source_type_mach_recv = {
@@ -2222,25 +2083,12 @@
 	.dst_fflags     = 0,
 	.dst_size       = sizeof(struct dispatch_source_refs_s),
 
-	.dst_create     = _dispatch_source_mach_recv_create,
+	.dst_create     = _dispatch_unote_create_with_handle,
 	.dst_merge_evt  = _dispatch_source_merge_evt,
 	.dst_merge_msg  = NULL, // never receives messages directly
-};
 
-#if DISPATCH_EVFILT_MACHPORT_PORTSET_FALLBACK
-const dispatch_source_type_s _dispatch_source_type_mach_recv_pset = {
-	.dst_kind       = "mach_recv (portset)",
-	.dst_filter     = EVFILT_MACHPORT,
-	.dst_flags      = EV_DISPATCH,
-	.dst_fflags     = 0,
-	.dst_size       = sizeof(struct dispatch_source_refs_s),
-
-	.dst_create     = NULL, // never created directly
-	.dst_update_mux = _dispatch_mach_recv_update_portset_mux,
-	.dst_merge_evt  = _dispatch_source_merge_evt,
-	.dst_merge_msg  = NULL, // never receives messages directly
+	.dst_per_trigger_qos = true,
 };
-#endif
 
 static void
 _dispatch_source_mach_recv_direct_merge_msg(dispatch_unote_t du, uint32_t flags,
@@ -2266,18 +2114,6 @@
 	}
 }
 
-static dispatch_unote_t
-_dispatch_source_mach_recv_direct_create(dispatch_source_type_t dst,
-	uintptr_t handle, unsigned long mask)
-{
-#if DISPATCH_EVFILT_MACHPORT_PORTSET_FALLBACK
-	if (!_dispatch_evfilt_machport_direct_enabled) {
-		dst = &_dispatch_source_type_mach_recv_direct_pset;
-	}
-#endif
-	return _dispatch_unote_create_with_handle(dst, handle, mask);
-}
-
 static void
 _dispatch_mach_recv_direct_merge(dispatch_unote_t du,
 		uint32_t flags, uintptr_t data,
@@ -2298,40 +2134,13 @@
 	.dst_fflags     = DISPATCH_MACH_RCV_OPTIONS,
 	.dst_size       = sizeof(struct dispatch_source_refs_s),
 
-	.dst_create     = _dispatch_source_mach_recv_direct_create,
+	.dst_create     = _dispatch_unote_create_with_handle,
 	.dst_merge_evt  = _dispatch_mach_recv_direct_merge,
 	.dst_merge_msg  = _dispatch_source_mach_recv_direct_merge_msg,
+
+	.dst_per_trigger_qos = true,
 };
 
-#if DISPATCH_EVFILT_MACHPORT_PORTSET_FALLBACK
-const dispatch_source_type_s _dispatch_source_type_mach_recv_direct_pset = {
-	.dst_kind       = "direct mach_recv (portset)",
-	.dst_filter     = EVFILT_MACHPORT,
-	.dst_flags      = 0,
-	.dst_fflags     = DISPATCH_MACH_RCV_OPTIONS,
-	.dst_size       = sizeof(struct dispatch_source_refs_s),
-
-	.dst_create     = NULL, // never created directly
-	.dst_update_mux = _dispatch_mach_recv_direct_update_portset_mux,
-	.dst_merge_evt  = _dispatch_mach_recv_direct_merge,
-	.dst_merge_msg  = _dispatch_source_mach_recv_direct_merge_msg,
-};
-#endif
-
-static dispatch_unote_t
-_dispatch_mach_recv_create(dispatch_source_type_t dst,
-	uintptr_t handle, unsigned long mask)
-{
-	// mach channels pass MACH_PORT_NULL until connect
-#if DISPATCH_EVFILT_MACHPORT_PORTSET_FALLBACK
-	if (!_dispatch_evfilt_machport_direct_enabled) {
-		dst = &_dispatch_mach_type_recv_pset;
-	}
-#endif
-	// without handle because the mach code will set the ident later
-	return _dispatch_unote_create_without_handle(dst, handle, mask);
-}
-
 const dispatch_source_type_s _dispatch_mach_type_recv = {
 	.dst_kind       = "mach_recv (channel)",
 	.dst_filter     = EVFILT_MACHPORT,
@@ -2339,38 +2148,14 @@
 	.dst_fflags     = DISPATCH_MACH_RCV_OPTIONS,
 	.dst_size       = sizeof(struct dispatch_mach_recv_refs_s),
 
-	.dst_create     = _dispatch_mach_recv_create,
+	 // without handle because the mach code will set the ident after connect
+	.dst_create     = _dispatch_unote_create_without_handle,
 	.dst_merge_evt  = _dispatch_mach_recv_direct_merge,
 	.dst_merge_msg  = _dispatch_mach_merge_msg,
+
+	.dst_per_trigger_qos = true,
 };
 
-#if DISPATCH_EVFILT_MACHPORT_PORTSET_FALLBACK
-const dispatch_source_type_s _dispatch_mach_type_recv_pset = {
-	.dst_kind       = "mach_recv (channel, portset)",
-	.dst_filter     = EVFILT_MACHPORT,
-	.dst_flags      = 0,
-	.dst_fflags     = DISPATCH_MACH_RCV_OPTIONS,
-	.dst_size       = sizeof(struct dispatch_mach_recv_refs_s),
-
-	.dst_create     = NULL, // never created directly
-	.dst_update_mux = _dispatch_mach_recv_direct_update_portset_mux,
-	.dst_merge_evt  = _dispatch_mach_recv_direct_merge,
-	.dst_merge_msg  = _dispatch_mach_merge_msg,
-};
-#endif
-
-static dispatch_unote_t
-_dispatch_mach_reply_create(dispatch_source_type_t dst,
-	uintptr_t handle, unsigned long mask)
-{
-#if DISPATCH_EVFILT_MACHPORT_PORTSET_FALLBACK
-	if (!_dispatch_evfilt_machport_direct_enabled) {
-		dst = &_dispatch_mach_type_reply_pset;
-	}
-#endif
-	return _dispatch_unote_create_with_handle(dst, handle, mask);
-}
-
 DISPATCH_NORETURN
 static void
 _dispatch_mach_reply_merge_evt(dispatch_unote_t du,
@@ -2388,26 +2173,11 @@
 	.dst_fflags     = DISPATCH_MACH_RCV_OPTIONS,
 	.dst_size       = sizeof(struct dispatch_mach_reply_refs_s),
 
-	.dst_create     = _dispatch_mach_reply_create,
+	.dst_create     = _dispatch_unote_create_with_handle,
 	.dst_merge_evt  = _dispatch_mach_reply_merge_evt,
 	.dst_merge_msg  = _dispatch_mach_reply_merge_msg,
 };
 
-#if DISPATCH_EVFILT_MACHPORT_PORTSET_FALLBACK
-const dispatch_source_type_s _dispatch_mach_type_reply_pset = {
-	.dst_kind       = "mach reply (portset)",
-	.dst_filter     = EVFILT_MACHPORT,
-	.dst_flags      = EV_ONESHOT,
-	.dst_fflags     = DISPATCH_MACH_RCV_OPTIONS,
-	.dst_size       = sizeof(struct dispatch_mach_reply_refs_s),
-
-	.dst_create     = NULL, // never created directly
-	.dst_update_mux = _dispatch_mach_recv_direct_update_portset_mux,
-	.dst_merge_evt  = _dispatch_mach_reply_merge_evt,
-	.dst_merge_msg  = _dispatch_mach_reply_merge_msg,
-};
-#endif
-
 #pragma mark Mach channel SIGTERM notification (for XPC channels only)
 
 const dispatch_source_type_s _dispatch_xpc_type_sigterm = {
diff --git a/src/event/workqueue_internal.h b/src/event/workqueue_internal.h
index 02b6903..94dfe4e 100644
--- a/src/event/workqueue_internal.h
+++ b/src/event/workqueue_internal.h
@@ -27,6 +27,8 @@
 #ifndef __DISPATCH_WORKQUEUE_INTERNAL__
 #define __DISPATCH_WORKQUEUE_INTERNAL__
 
+#define WORKQ_ADDTHREADS_OPTION_OVERCOMMIT 0x1
+
 #define DISPATCH_WORKQ_MAX_PTHREAD_COUNT 255
 
 void _dispatch_workq_worker_register(dispatch_queue_t root_q, qos_class_t cls);
diff --git a/src/firehose/firehose.defs b/src/firehose/firehose.defs
index 7ed7958..e4fdf33 100644
--- a/src/firehose/firehose.defs
+++ b/src/firehose/firehose.defs
@@ -40,12 +40,13 @@
 );
 
 routine
-push(
+push_and_wait(
 RequestPort	comm_port		: mach_port_t;
 SReplyPort	reply_port		: mach_port_make_send_once_t;
 			qos_class		: qos_class_t;
 			for_io			: boolean_t;
-out			push_reply		: firehose_push_reply_t
+out			push_reply		: firehose_push_reply_t;
+out			quarantinedOut	: boolean_t
 );
 
 simpleroutine
diff --git a/src/firehose/firehose_buffer.c b/src/firehose/firehose_buffer.c
index 21692b9..3bb790c 100644
--- a/src/firehose/firehose_buffer.c
+++ b/src/firehose/firehose_buffer.c
@@ -69,6 +69,8 @@
 #define DLOCK_LOCK_DATA_CONTENTION 0
 static void _dispatch_gate_wait(dispatch_gate_t l, uint32_t flags);
 
+#define fcp_quarntined fcp_quarantined
+
 #include <kern/debug.h>
 #include <machine/cpu_number.h>
 #include <kern/thread.h>
@@ -450,23 +452,58 @@
 		}
 	}
 }
+
+OS_NOINLINE
+static void
+firehose_client_start_quarantine(firehose_buffer_t fb)
+{
+	if (_voucher_libtrace_hooks->vah_version < 5) return;
+	if (!_voucher_libtrace_hooks->vah_quarantine_starts) return;
+
+	_voucher_libtrace_hooks->vah_quarantine_starts();
+
+	fb->fb_header.fbh_quarantined = true;
+	firehose_buffer_stream_flush(fb, firehose_stream_special);
+	firehose_buffer_stream_flush(fb, firehose_stream_persist);
+	firehose_buffer_stream_flush(fb, firehose_stream_memory);
+}
 #endif // !KERNEL
 
 static void
 firehose_client_merge_updates(firehose_buffer_t fb, bool async_notif,
-		firehose_push_reply_t reply, firehose_bank_state_u *state_out)
+		firehose_push_reply_t reply, bool quarantined,
+		firehose_bank_state_u *state_out)
 {
+	firehose_buffer_header_t fbh = &fb->fb_header;
 	firehose_bank_state_u state;
 	firehose_ring_tail_u otail, ntail;
 	uint64_t old_flushed_pos, bank_updates;
 	uint16_t io_delta = 0;
 	uint16_t mem_delta = 0;
 
-	if (firehose_atomic_maxv2o(&fb->fb_header, fbh_bank.fbb_mem_flushed,
+	if (quarantined) {
+#ifndef KERNEL
+		// this isn't a dispatch_once so that the upcall to libtrace
+		// can actually log itself without blocking on the gate.
+		if (async_notif) {
+			if (os_atomic_xchg(&fbh->fbh_quarantined_state,
+					FBH_QUARANTINE_STARTED, relaxed) !=
+					FBH_QUARANTINE_STARTED) {
+				firehose_client_start_quarantine(fb);
+			}
+		} else if (os_atomic_load(&fbh->fbh_quarantined_state, relaxed) ==
+				FBH_QUARANTINE_NONE) {
+			os_atomic_cmpxchg(&fbh->fbh_quarantined_state, FBH_QUARANTINE_NONE,
+					FBH_QUARANTINE_PENDING, relaxed);
+		}
+#endif
+	}
+
+	if (firehose_atomic_maxv2o(fbh, fbh_bank.fbb_mem_flushed,
 			reply.fpr_mem_flushed_pos, &old_flushed_pos, relaxed)) {
 		mem_delta = (uint16_t)(reply.fpr_mem_flushed_pos - old_flushed_pos);
 	}
-	if (firehose_atomic_maxv2o(&fb->fb_header, fbh_bank.fbb_io_flushed,
+	if (firehose_atomic_maxv2o(fbh, fbh_bank.fbb_io_flushed,
 			reply.fpr_io_flushed_pos, &old_flushed_pos, relaxed)) {
 		io_delta = (uint16_t)(reply.fpr_io_flushed_pos - old_flushed_pos);
 	}
@@ -478,14 +515,14 @@
 
 	if (!mem_delta && !io_delta) {
 		if (state_out) {
-			state_out->fbs_atomic_state = os_atomic_load2o(&fb->fb_header,
+			state_out->fbs_atomic_state = os_atomic_load2o(fbh,
 					fbh_bank.fbb_state.fbs_atomic_state, relaxed);
 		}
 		return;
 	}
 
 	__firehose_critical_region_enter();
-	os_atomic_rmw_loop2o(&fb->fb_header, fbh_ring_tail.frp_atomic_tail,
+	os_atomic_rmw_loop2o(fbh, fbh_ring_tail.frp_atomic_tail,
 			otail.frp_atomic_tail, ntail.frp_atomic_tail, relaxed, {
 		ntail = otail;
 		// overflow handles the generation wraps
@@ -495,7 +532,7 @@
 
 	bank_updates = ((uint64_t)mem_delta << FIREHOSE_BANK_SHIFT(0)) |
 			((uint64_t)io_delta << FIREHOSE_BANK_SHIFT(1));
-	state.fbs_atomic_state = os_atomic_sub2o(&fb->fb_header,
+	state.fbs_atomic_state = os_atomic_sub2o(fbh,
 			fbh_bank.fbb_state.fbs_atomic_state, bank_updates, release);
 	__firehose_critical_region_leave();
 
@@ -503,29 +540,32 @@
 
 	if (async_notif) {
 		if (io_delta) {
-			os_atomic_inc2o(&fb->fb_header, fbh_bank.fbb_io_notifs, relaxed);
+			os_atomic_inc2o(fbh, fbh_bank.fbb_io_notifs, relaxed);
 		}
 		if (mem_delta) {
-			os_atomic_inc2o(&fb->fb_header, fbh_bank.fbb_mem_notifs, relaxed);
+			os_atomic_inc2o(fbh, fbh_bank.fbb_mem_notifs, relaxed);
 		}
 	}
 }
 
 #ifndef KERNEL
+OS_NOT_TAIL_CALLED OS_NOINLINE
 static void
-firehose_client_send_push(firehose_buffer_t fb, bool for_io,
+firehose_client_send_push_and_wait(firehose_buffer_t fb, bool for_io,
 		firehose_bank_state_u *state_out)
 {
 	mach_port_t sendp = fb->fb_header.fbh_sendp;
 	firehose_push_reply_t push_reply = { };
 	qos_class_t qos = qos_class_self();
+	boolean_t quarantined = false;
 	kern_return_t kr;
 
 	if (slowpath(sendp == MACH_PORT_DEAD)) {
 		return;
 	}
 	if (fastpath(sendp)) {
-		kr = firehose_send_push(sendp, qos, for_io, &push_reply);
+		kr = firehose_send_push_and_wait(sendp, qos, for_io,
+				&push_reply, &quarantined);
 		if (likely(kr == KERN_SUCCESS)) {
 			goto success;
 		}
@@ -537,7 +577,8 @@
 
 	sendp = firehose_client_reconnect(fb, sendp);
 	if (fastpath(MACH_PORT_VALID(sendp))) {
-		kr = firehose_send_push(sendp, qos, for_io, &push_reply);
+		kr = firehose_send_push_and_wait(sendp, qos, for_io,
+				&push_reply, &quarantined);
 		if (likely(kr == KERN_SUCCESS)) {
 			goto success;
 		}
@@ -573,12 +614,22 @@
 	// There only is a point for multithreaded clients if:
 	// - enough samples (total_flushes above some limits)
 	// - the ratio is really bad (a push per cycle is definitely a problem)
-	return firehose_client_merge_updates(fb, false, push_reply, state_out);
+	return firehose_client_merge_updates(fb, false, push_reply, quarantined,
+			state_out);
+}
+
+OS_NOT_TAIL_CALLED OS_NOINLINE
+static void
+__FIREHOSE_CLIENT_THROTTLED_DUE_TO_HEAVY_LOGGING__(firehose_buffer_t fb,
+		bool for_io, firehose_bank_state_u *state_out)
+{
+	firehose_client_send_push_and_wait(fb, for_io, state_out);
 }
 
 kern_return_t
 firehose_client_push_reply(mach_port_t req_port OS_UNUSED,
-	kern_return_t rtc, firehose_push_reply_t push_reply OS_UNUSED)
+	kern_return_t rtc, firehose_push_reply_t push_reply OS_UNUSED,
+	boolean_t quarantined OS_UNUSED)
 {
 	DISPATCH_INTERNAL_CRASH(rtc, "firehose_push_reply should never be sent "
 			"to the buffer receive port");
@@ -586,12 +637,12 @@
 
 kern_return_t
 firehose_client_push_notify_async(mach_port_t server_port OS_UNUSED,
-	firehose_push_reply_t push_reply)
+	firehose_push_reply_t push_reply, boolean_t quarantined)
 {
 	// see _dispatch_source_merge_mach_msg_direct
 	dispatch_queue_t dq = _dispatch_queue_get_current();
 	firehose_buffer_t fb = dispatch_get_context(dq);
-	firehose_client_merge_updates(fb, true, push_reply, NULL);
+	firehose_client_merge_updates(fb, true, push_reply, quarantined, NULL);
 	return KERN_SUCCESS;
 }
 
@@ -653,6 +704,7 @@
 		.fcp_qos = firehose_buffer_qos_bits_propagate(),
 		.fcp_stream = ask->stream,
 		.fcp_flag_io = ask->for_io,
+		.fcp_quarantined = ask->quarantined,
 	};
 
 	if (privptr) {
@@ -668,7 +720,8 @@
 {
 	firehose_stream_state_u state, new_state;
 	firehose_tracepoint_t ft;
-	firehose_buffer_stream_t fbs = &fb->fb_header.fbh_stream[ask->stream];
+	firehose_buffer_header_t fbh = &fb->fb_header;
+	firehose_buffer_stream_t fbs = &fbh->fbh_stream[ask->stream];
 	uint64_t stamp_and_len;
 
 	if (fastpath(ref)) {
@@ -685,7 +738,7 @@
 		ft->ft_thread = _pthread_threadid_self_np_direct();
 #endif
 		if (ask->stream == firehose_stream_metadata) {
-			os_atomic_or2o(fb, fb_header.fbh_bank.fbb_metadata_bitmap,
+			os_atomic_or2o(fbh, fbh_bank.fbb_metadata_bitmap,
 					1ULL << ref, relaxed);
 		}
 		// release barrier to make the chunk init visible
@@ -716,8 +769,11 @@
 		ft = NULL;
 	}
 
+	// pairs with the one in firehose_buffer_tracepoint_reserve()
+	__firehose_critical_region_leave();
+
 #ifndef KERNEL
-	if (unlikely(state.fss_gate.dgl_lock != _dispatch_tid_self())) {
+	if (unlikely(_dispatch_lock_is_locked_by_self(state.fss_gate.dgl_lock))) {
 		_dispatch_gate_broadcast_slow(&fbs->fbs_state.fss_gate,
 				state.fss_gate.dgl_lock);
 	}
@@ -725,10 +781,16 @@
 	if (unlikely(state.fss_current == FIREHOSE_STREAM_STATE_PRISTINE)) {
 		firehose_buffer_update_limits(fb);
 	}
+
+	if (unlikely(os_atomic_load2o(fbh, fbh_quarantined_state, relaxed) ==
+			FBH_QUARANTINE_PENDING)) {
+		if (os_atomic_cmpxchg2o(fbh, fbh_quarantined_state,
+				FBH_QUARANTINE_PENDING, FBH_QUARANTINE_STARTED, relaxed)) {
+			firehose_client_start_quarantine(fb);
+		}
+	}
 #endif // KERNEL
 
-	// pairs with the one in firehose_buffer_tracepoint_reserve()
-	__firehose_critical_region_leave();
 	return ft;
 }
 
@@ -967,7 +1029,12 @@
 		state.fbs_atomic_state =
 				os_atomic_load2o(fbb, fbb_state.fbs_atomic_state, relaxed);
 		while ((state.fbs_atomic_state - bank_inc) & bank_unavail_mask) {
-			firehose_client_send_push(fb, ask->for_io, &state);
+			if (ask->quarantined) {
+				__FIREHOSE_CLIENT_THROTTLED_DUE_TO_HEAVY_LOGGING__(fb,
+						ask->for_io, &state);
+			} else {
+				firehose_client_send_push_and_wait(fb, ask->for_io, &state);
+			}
 			if (slowpath(fb->fb_header.fbh_sendp == MACH_PORT_DEAD)) {
 				// logd was unloaded, give up
 				return NULL;
@@ -999,7 +1066,12 @@
 		if (fastpath(ref = firehose_buffer_ring_try_grow(fbb, fbs_max_ref))) {
 			break;
 		}
-		firehose_client_send_push(fb, ask->for_io, NULL);
+		if (ask->quarantined) {
+			__FIREHOSE_CLIENT_THROTTLED_DUE_TO_HEAVY_LOGGING__(fb,
+					ask->for_io, &state);
+		} else {
+			firehose_client_send_push_and_wait(fb, ask->for_io, NULL);
+		}
 		if (slowpath(fb->fb_header.fbh_sendp == MACH_PORT_DEAD)) {
 			// logd was unloaded, give up
 			break;
@@ -1108,7 +1180,7 @@
 {
 	firehose_buffer_t fb = kernel_firehose_buffer;
 	if (fastpath(fb)) {
-		firehose_client_merge_updates(fb, true, update, NULL);
+		firehose_client_merge_updates(fb, true, update, false, NULL);
 	}
 }
 #endif // KERNEL
diff --git a/src/firehose/firehose_buffer_internal.h b/src/firehose/firehose_buffer_internal.h
index 7679c8c..e41d9cb 100644
--- a/src/firehose/firehose_buffer_internal.h
+++ b/src/firehose/firehose_buffer_internal.h
@@ -171,6 +171,11 @@
 	dispatch_once_t					fbh_notifs_pred OS_ALIGNED(64);
 	dispatch_source_t				fbh_notifs_source;
 	dispatch_unfair_lock_s			fbh_logd_lock;
+#define FBH_QUARANTINE_NONE		0
+#define FBH_QUARANTINE_PENDING	1
+#define FBH_QUARANTINE_STARTED	2
+	uint8_t volatile				fbh_quarantined_state;
+	bool							fbh_quarantined;
 #endif
 	uint64_t						fbh_unused[0];
 } OS_ALIGNED(FIREHOSE_CHUNK_SIZE) *firehose_buffer_header_t;
@@ -187,6 +192,7 @@
 	firehose_stream_t stream;
 	bool	 is_bank_ok;
 	bool     for_io;
+	bool     quarantined;
 	uint64_t stamp;
 } *firehose_tracepoint_query_t;
 
diff --git a/src/firehose/firehose_inline_internal.h b/src/firehose/firehose_inline_internal.h
index abc5f9e..3939ee2 100644
--- a/src/firehose/firehose_inline_internal.h
+++ b/src/firehose/firehose_inline_internal.h
@@ -319,7 +319,7 @@
 #if KERNEL
 		new_state.fss_allocator = (uint32_t)cpu_number();
 #else
-		new_state.fss_allocator = _dispatch_tid_self();
+		new_state.fss_allocator = _dispatch_lock_value_for_self();
 #endif
 		success = os_atomic_cmpxchgv2o(fbs, fbs_state.fss_atomic_state,
 				old_state.fss_atomic_state, new_state.fss_atomic_state,
@@ -335,6 +335,9 @@
 		.privsize = privsize,
 		.stream = stream,
 		.for_io = (firehose_stream_uses_io_bank & (1UL << stream)) != 0,
+#ifndef KERNEL
+		.quarantined = fb->fb_header.fbh_quarantined,
+#endif
 		.stamp = stamp,
 	};
 	return firehose_buffer_tracepoint_reserve_slow(fb, &ask, privptr);
diff --git a/src/firehose/firehose_internal.h b/src/firehose/firehose_internal.h
index 29d1ad2..7040995 100644
--- a/src/firehose/firehose_internal.h
+++ b/src/firehose/firehose_internal.h
@@ -29,6 +29,8 @@
 #define __MigTypeCheck 1
 #endif
 
+#define fcp_quarntined fcp_quarantined
+
 #include <limits.h>
 #include <machine/endian.h>
 #include <mach/mach_types.h>
diff --git a/src/firehose/firehose_reply.defs b/src/firehose/firehose_reply.defs
index 124defa..c080545 100644
--- a/src/firehose/firehose_reply.defs
+++ b/src/firehose/firehose_reply.defs
@@ -33,11 +33,13 @@
 simpleroutine push_reply(
 RequestPort	req_port		: mach_port_move_send_once_t;
 in			rtc				: kern_return_t;
-in			push_reply		: firehose_push_reply_t
+in			push_reply		: firehose_push_reply_t;
+in			quarantined		: boolean_t
 );
 
 simpleroutine push_notify_async(
 RequestPort	comm_port		: mach_port_t;
 in			push_reply		: firehose_push_reply_t;
+in			quarantined		: boolean_t;
 WaitTime	timeout			: natural_t
 );
diff --git a/src/firehose/firehose_server.c b/src/firehose/firehose_server.c
index 52397d6..ba335db 100644
--- a/src/firehose/firehose_server.c
+++ b/src/firehose/firehose_server.c
@@ -31,6 +31,11 @@
 		% 8 == 0, "Make sure atomic fields are properly aligned");
 #endif
 
+typedef struct fs_client_queue_s {
+	struct firehose_client_s *volatile fs_client_head;
+	struct firehose_client_s *volatile fs_client_tail;
+} fs_client_queue_s, *fs_client_queue_t;
+
 static struct firehose_server_s {
 	mach_port_t			fs_bootstrap_port;
 	dispatch_mach_t		fs_mach_channel;
@@ -41,26 +46,161 @@
 	firehose_handler_t	fs_handler;
 
 	firehose_snapshot_t fs_snapshot;
-	bool                fs_io_snapshot_started;
-	bool                fs_mem_snapshot_started;
-
 	int					fs_kernel_fd;
 	firehose_client_t	fs_kernel_client;
 
 	TAILQ_HEAD(, firehose_client_s) fs_clients;
+	os_unfair_lock      fs_clients_lock;
+	fs_client_queue_s	fs_queues[4];
+	dispatch_source_t	fs_sources[4];
 } server_config = {
 	.fs_clients = TAILQ_HEAD_INITIALIZER(server_config.fs_clients),
+	.fs_clients_lock = OS_UNFAIR_LOCK_INIT,
 	.fs_kernel_fd = -1,
 };
 
-#pragma mark -
-#pragma mark firehose client state machine
+OS_ALWAYS_INLINE
+static inline void
+fs_clients_lock(void)
+{
+	os_unfair_lock_lock_with_options(&server_config.fs_clients_lock,
+			OS_UNFAIR_LOCK_DATA_SYNCHRONIZATION);
+}
+
+OS_ALWAYS_INLINE
+static inline void
+fs_clients_unlock(void)
+{
+	os_unfair_lock_unlock(&server_config.fs_clients_lock);
+}
 
 static void firehose_server_demux(firehose_client_t fc,
 		mach_msg_header_t *msg_hdr);
 static void firehose_client_cancel(firehose_client_t fc);
 static void firehose_client_snapshot_finish(firehose_client_t fc,
 		firehose_snapshot_t snapshot, bool for_io);
+static void firehose_client_handle_death(void *ctxt);
+
+#pragma mark -
+#pragma mark firehose client enqueueing
+
+OS_ALWAYS_INLINE
+static inline bool
+fs_idx_is_for_io(size_t idx)
+{
+	return idx & 1;
+}
+
+OS_ALWAYS_INLINE
+static inline bool
+fs_queue_is_for_io(fs_client_queue_t q)
+{
+	return (q - server_config.fs_queues) & 1;
+}
+
+OS_ALWAYS_INLINE
+static inline bool
+fs_queue_is_for_quarantined(fs_client_queue_t q)
+{
+	return (q - server_config.fs_queues) & 2;
+}
+
+OS_ALWAYS_INLINE
+static inline fs_client_queue_t
+fs_queue(bool quarantined, bool for_io)
+{
+	return &server_config.fs_queues[quarantined * 2 + for_io];
+}
+
+OS_ALWAYS_INLINE
+static inline dispatch_source_t
+fs_source(bool quarantined, bool for_io)
+{
+	return server_config.fs_sources[quarantined * 2 + for_io];
+}
+
+OS_ALWAYS_INLINE
+static inline void
+firehose_client_push(firehose_client_t fc, pthread_priority_t pp,
+		bool quarantined, bool for_io)
+{
+	fs_client_queue_t queue = fs_queue(quarantined, for_io);
+	if (fc && os_mpsc_push_update_tail(queue, fs_client, fc, fc_next[for_io])) {
+		os_mpsc_push_update_head(queue, fs_client, fc);
+		_dispatch_source_merge_data(fs_source(quarantined, for_io), pp, 1);
+	} else if (pp) {
+		_dispatch_source_merge_data(fs_source(quarantined, for_io), pp, 1);
+	}
+}
+
+OS_ALWAYS_INLINE
+static inline bool
+firehose_client_wakeup(firehose_client_t fc, pthread_priority_t pp,
+		bool for_io)
+{
+	uintptr_t canceled_bit = FC_STATE_CANCELED(for_io);
+	uintptr_t enqueued_bit = FC_STATE_ENQUEUED(for_io);
+	uintptr_t old_state, new_state;
+
+	os_atomic_rmw_loop(&fc->fc_state, old_state, new_state, relaxed, {
+		if (old_state & canceled_bit) {
+			os_atomic_rmw_loop_give_up(return false);
+		}
+		if (old_state & enqueued_bit) {
+			os_atomic_rmw_loop_give_up(break);
+		}
+		new_state = old_state | enqueued_bit;
+	});
+	firehose_client_push(old_state & enqueued_bit ? NULL : fc, pp,
+			fc->fc_quarantined, for_io);
+	return true;
+}
+
+OS_ALWAYS_INLINE
+static inline void
+firehose_client_start_cancel(firehose_client_t fc, bool for_io)
+{
+	uintptr_t canceling_bit = FC_STATE_CANCELING(for_io);
+	uintptr_t canceled_bit = FC_STATE_CANCELED(for_io);
+	uintptr_t enqueued_bit = FC_STATE_ENQUEUED(for_io);
+	uintptr_t old_state, new_state;
+
+	os_atomic_rmw_loop(&fc->fc_state, old_state, new_state, relaxed, {
+		if (old_state & (canceled_bit | canceling_bit)) {
+			os_atomic_rmw_loop_give_up(return);
+		}
+		new_state = old_state | enqueued_bit | canceling_bit;
+	});
+	firehose_client_push(old_state & enqueued_bit ? NULL : fc, 0,
+			fc->fc_quarantined, for_io);
+}
+
+OS_ALWAYS_INLINE
+static inline bool
+firehose_client_dequeue(firehose_client_t fc, bool for_io)
+{
+	uintptr_t canceling_bit = FC_STATE_CANCELING(for_io);
+	uintptr_t canceled_bit = FC_STATE_CANCELED(for_io);
+	uintptr_t enqueued_bit = FC_STATE_ENQUEUED(for_io);
+	uintptr_t old_state, new_state;
+
+	os_atomic_rmw_loop(&fc->fc_state, old_state, new_state, relaxed, {
+		new_state = old_state & ~(canceling_bit | enqueued_bit);
+		if (old_state & canceling_bit) {
+			new_state |= canceled_bit;
+		}
+	});
+
+	if (((old_state ^ new_state) & FC_STATE_CANCELED_MASK) &&
+			(new_state & FC_STATE_CANCELED_MASK) == FC_STATE_CANCELED_MASK) {
+		dispatch_async_f(server_config.fs_io_drain_queue, fc,
+				firehose_client_handle_death);
+	}
+	return !(new_state & canceled_bit);
+}
+
+#pragma mark -
+#pragma mark firehose client state machine
 
 static void
 firehose_client_notify(firehose_client_t fc, mach_port_t reply_port)
@@ -82,9 +222,11 @@
 		}
 	} else {
 		if (reply_port == fc->fc_sendp) {
-			kr = firehose_send_push_notify_async(reply_port, push_reply, 0);
+			kr = firehose_send_push_notify_async(reply_port, push_reply,
+					fc->fc_quarantined, 0);
 		} else {
-			kr = firehose_send_push_reply(reply_port, KERN_SUCCESS, push_reply);
+			kr = firehose_send_push_reply(reply_port, KERN_SUCCESS, push_reply,
+					fc->fc_quarantined);
 		}
 		if (kr != MACH_SEND_INVALID_DEST) {
 			DISPATCH_VERIFY_MIG(kr);
@@ -106,18 +248,6 @@
 	return head;
 }
 
-OS_ALWAYS_INLINE
-static inline void
-firehose_client_push_async_merge(firehose_client_t fc, pthread_priority_t pp,
-		bool for_io)
-{
-	if (for_io) {
-		_dispatch_source_merge_data(fc->fc_io_source, pp, 1);
-	} else {
-		_dispatch_source_merge_data(fc->fc_mem_source, pp, 1);
-	}
-}
-
 OS_NOINLINE OS_COLD
 static void
 firehose_client_mark_corrupted(firehose_client_t fc, mach_port_t reply_port)
@@ -131,7 +261,7 @@
 
 	if (reply_port) {
 		kern_return_t kr = firehose_send_push_reply(reply_port, 0,
-				FIREHOSE_PUSH_REPLY_CORRUPTED);
+				FIREHOSE_PUSH_REPLY_CORRUPTED, false);
 		DISPATCH_VERIFY_MIG(kr);
 		dispatch_assume_zero(kr);
 	}
@@ -156,7 +286,7 @@
 
 OS_NOINLINE
 static void
-firehose_client_drain(firehose_client_t fc, mach_port_t port, uint32_t flags)
+firehose_client_drain_one(firehose_client_t fc, mach_port_t port, uint32_t flags)
 {
 	firehose_buffer_t fb = fc->fc_buffer;
 	firehose_chunk_t fbc;
@@ -174,9 +304,7 @@
 		fbh_ring = fb->fb_header.fbh_io_ring;
 		sent_flushed = (uint16_t)fc->fc_io_sent_flushed_pos;
 		flushed = (uint16_t)fc->fc_io_flushed_pos;
-		if (fc->fc_needs_io_snapshot && server_config.fs_io_snapshot_started) {
-			snapshot = server_config.fs_snapshot;
-		}
+		if (fc->fc_needs_io_snapshot) snapshot = server_config.fs_snapshot;
 	} else {
 		evt = FIREHOSE_EVENT_MEM_BUFFER_RECEIVED;
 		_Static_assert(FIREHOSE_EVENT_MEM_BUFFER_RECEIVED ==
@@ -184,9 +312,7 @@
 		fbh_ring = fb->fb_header.fbh_mem_ring;
 		sent_flushed = (uint16_t)fc->fc_mem_sent_flushed_pos;
 		flushed = (uint16_t)fc->fc_mem_flushed_pos;
-		if (fc->fc_needs_mem_snapshot && server_config.fs_mem_snapshot_started) {
-			snapshot = server_config.fs_snapshot;
-		}
+		if (fc->fc_needs_mem_snapshot) snapshot = server_config.fs_snapshot;
 	}
 
 	if (slowpath(fc->fc_memory_corrupted)) {
@@ -273,12 +399,12 @@
 			// and there's more to drain, so optimistically schedule draining
 			// again this is cheap since the queue is hot, and is fair for other
 			// clients
-			firehose_client_push_async_merge(fc, 0, for_io);
+			firehose_client_wakeup(fc, 0, for_io);
 		}
 		if (count && server_config.fs_kernel_client) {
 			// the kernel is special because it can drop messages, so if we're
 			// draining, poll the kernel each time while we're bound to a thread
-			firehose_client_drain(server_config.fs_kernel_client,
+			firehose_client_drain_one(server_config.fs_kernel_client,
 					MACH_PORT_NULL, flags | FIREHOSE_DRAIN_POLL);
 		}
 	}
@@ -293,20 +419,36 @@
 	// (needs_<for_io>_snapshot: false, memory_corrupted: true). we can safely
 	// silence the corresponding source of drain wake-ups.
 	if (fc->fc_pid) {
-		dispatch_source_cancel(for_io ? fc->fc_io_source : fc->fc_mem_source);
+		firehose_client_start_cancel(fc, for_io);
 	}
 }
 
 static void
-firehose_client_drain_io_async(void *ctx)
+firehose_client_drain(void *ctxt)
 {
-	firehose_client_drain(ctx, MACH_PORT_NULL, FIREHOSE_DRAIN_FOR_IO);
-}
+	fs_client_queue_t queue = ctxt;
+	bool for_io = fs_queue_is_for_io(queue);
+	bool quarantined = fs_queue_is_for_quarantined(queue);
+	firehose_client_t fc, fc_next;
+	size_t clients = 0;
 
-static void
-firehose_client_drain_mem_async(void *ctx)
-{
-	firehose_client_drain(ctx, MACH_PORT_NULL, 0);
+	while (queue->fs_client_tail) {
+		fc = os_mpsc_get_head(queue, fs_client);
+		do {
+			fc_next = os_mpsc_pop_head(queue, fs_client, fc, fc_next[for_io]);
+			if (firehose_client_dequeue(fc, for_io)) {
+				firehose_client_drain_one(fc, MACH_PORT_NULL,
+						for_io ? FIREHOSE_DRAIN_FOR_IO : 0);
+			}
+			// process quarantined clients 4 times as slow as the other ones
+			// also reasyncing every 4 clients allows for discovering
+			// quarantined suspension faster
+			if (++clients == (quarantined ? 1 : 4)) {
+				dispatch_source_merge_data(fs_source(quarantined, for_io), 1);
+				return;
+			}
+		} while ((fc = fc_next));
+	}
 }
 
 OS_NOINLINE
@@ -335,7 +477,10 @@
 	}
 	server_config.fs_handler(fc, FIREHOSE_EVENT_CLIENT_DIED, NULL);
 
+	fs_clients_lock();
 	TAILQ_REMOVE(&server_config.fs_clients, fc, fc_entry);
+	fs_clients_unlock();
+
 	dispatch_release(fc->fc_mach_channel);
 	fc->fc_mach_channel = NULL;
 	fc->fc_entry.tqe_next = DISPATCH_OBJECT_LISTLESS;
@@ -413,7 +558,7 @@
 			continue;
 		}
 		server_config.fs_handler(fc, FIREHOSE_EVENT_IO_BUFFER_RECEIVED, fbc);
-		if (fc->fc_needs_io_snapshot && server_config.fs_io_snapshot_started) {
+		if (fc->fc_needs_io_snapshot) {
 			snapshot->handler(fc, FIREHOSE_SNAPSHOT_EVENT_IO_BUFFER, fbc);
 		}
 	}
@@ -431,7 +576,7 @@
 
 			mem_bitmap_copy &= ~(1ULL << ref);
 			server_config.fs_handler(fc, FIREHOSE_EVENT_MEM_BUFFER_RECEIVED, fbc);
-			if (fc->fc_needs_mem_snapshot && server_config.fs_mem_snapshot_started) {
+			if (fc->fc_needs_mem_snapshot) {
 				snapshot->handler(fc, FIREHOSE_SNAPSHOT_EVENT_MEM_BUFFER, fbc);
 			}
 		}
@@ -447,16 +592,11 @@
 {
 	mach_msg_header_t *msg_hdr = NULL;
 	firehose_client_t fc = ctx;
-	mach_port_t oldsendp = 0, oldrecvp = 0;
-
-	if (dmsg) {
-		msg_hdr = dispatch_mach_msg_get_msg(dmsg, NULL);
-		oldsendp = msg_hdr->msgh_remote_port;
-		oldrecvp = msg_hdr->msgh_local_port;
-	}
+	mach_port_t port;
 
 	switch (reason) {
 	case DISPATCH_MACH_MESSAGE_RECEIVED:
+		msg_hdr = dispatch_mach_msg_get_msg(dmsg, NULL);
 		if (msg_hdr->msgh_id == MACH_NOTIFY_NO_SENDERS) {
 			_dispatch_debug("FIREHOSE NO_SENDERS (unique_pid: 0x%llx)",
 					firehose_client_get_unique_pid(fc, NULL));
@@ -467,25 +607,33 @@
 		break;
 
 	case DISPATCH_MACH_DISCONNECTED:
-		if (oldsendp) {
-			if (slowpath(oldsendp != fc->fc_sendp)) {
-				DISPATCH_INTERNAL_CRASH(oldsendp,
-						"disconnect event about unknown send-right");
+		msg_hdr = dispatch_mach_msg_get_msg(dmsg, NULL);
+		port = msg_hdr->msgh_remote_port;
+		if (MACH_PORT_VALID(port)) {
+			if (port != fc->fc_sendp) {
+				DISPATCH_INTERNAL_CRASH(port, "Unknown send-right");
 			}
 			firehose_mach_port_send_release(fc->fc_sendp);
 			fc->fc_sendp = MACH_PORT_NULL;
 		}
-		if (oldrecvp) {
-			if (slowpath(oldrecvp != fc->fc_recvp)) {
-				DISPATCH_INTERNAL_CRASH(oldrecvp,
-						"disconnect event about unknown receive-right");
+		port = msg_hdr->msgh_local_port;
+		if (MACH_PORT_VALID(port)) {
+			if (port != fc->fc_recvp) {
+				DISPATCH_INTERNAL_CRASH(port, "Unknown recv-right");
 			}
 			firehose_mach_port_recv_dispose(fc->fc_recvp, fc);
 			fc->fc_recvp = MACH_PORT_NULL;
 		}
-		if (fc->fc_recvp == MACH_PORT_NULL && fc->fc_sendp == MACH_PORT_NULL) {
-			firehose_client_cancel(fc);
+		break;
+
+	case DISPATCH_MACH_CANCELED:
+		if (MACH_PORT_VALID(fc->fc_sendp)) {
+			DISPATCH_INTERNAL_CRASH(fc->fc_sendp, "send-right leak");
 		}
+		if (MACH_PORT_VALID(fc->fc_recvp)) {
+			DISPATCH_INTERNAL_CRASH(fc->fc_recvp, "recv-right leak");
+		}
+		firehose_client_cancel(fc);
 		break;
 	}
 }
@@ -499,10 +647,8 @@
 	// resumed in firehose_client_drain for both memory and I/O
 	dispatch_suspend(fc->fc_kernel_source);
 	dispatch_suspend(fc->fc_kernel_source);
-	dispatch_async_f(server_config.fs_mem_drain_queue,
-			fc, firehose_client_drain_mem_async);
-	dispatch_async_f(server_config.fs_io_drain_queue,
-			fc, firehose_client_drain_io_async);
+	firehose_client_wakeup(fc, 0, false);
+	firehose_client_wakeup(fc, 0, true);
 }
 #endif
 
@@ -511,36 +657,37 @@
 		const struct firehose_client_connected_info_s *fcci)
 {
 	dispatch_assert_queue(server_config.fs_io_drain_queue);
+
+	fs_clients_lock();
 	TAILQ_INSERT_TAIL(&server_config.fs_clients, fc, fc_entry);
+	fs_clients_unlock();
+
 	server_config.fs_handler(fc, FIREHOSE_EVENT_CLIENT_CONNECTED, (void *)fcci);
 	if (!fc->fc_pid) {
 		dispatch_activate(fc->fc_kernel_source);
 	} else {
 		dispatch_mach_connect(fc->fc_mach_channel,
 				fc->fc_recvp, fc->fc_sendp, NULL);
-		dispatch_activate(fc->fc_io_source);
-		dispatch_activate(fc->fc_mem_source);
 	}
 }
 
 static void
 firehose_client_cancel(firehose_client_t fc)
 {
-	dispatch_block_t block;
-
 	_dispatch_debug("client died (unique_pid: 0x%llx",
 			firehose_client_get_unique_pid(fc, NULL));
 
+	if (MACH_PORT_VALID(fc->fc_sendp)) {
+		firehose_mach_port_send_release(fc->fc_sendp);
+		fc->fc_sendp = MACH_PORT_NULL;
+	}
+	if (MACH_PORT_VALID(fc->fc_recvp)) {
+		firehose_mach_port_recv_dispose(fc->fc_recvp, fc);
+		fc->fc_recvp = MACH_PORT_NULL;
+	}
 	fc->fc_use_notifs = false;
-	dispatch_source_cancel(fc->fc_io_source);
-	dispatch_source_cancel(fc->fc_mem_source);
-
-	block = dispatch_block_create(DISPATCH_BLOCK_DETACHED, ^{
-		dispatch_async_f(server_config.fs_io_drain_queue, fc,
-				firehose_client_handle_death);
-	});
-	dispatch_async(server_config.fs_mem_drain_queue, block);
-	_Block_release(block);
+	firehose_client_start_cancel(fc, false);
+	firehose_client_start_cancel(fc, true);
 }
 
 static firehose_client_t
@@ -578,28 +725,10 @@
 	uint64_t unique_pid = fb->fb_header.fbh_uniquepid;
 	firehose_client_t fc = _firehose_client_create(fb);
 	dispatch_mach_t dm;
-	dispatch_source_t ds;
 
 	fc->fc_pid = token->pid ? token->pid : ~0;
 	fc->fc_euid = token->euid;
 	fc->fc_pidversion = token->execcnt;
-	ds = dispatch_source_create(DISPATCH_SOURCE_TYPE_DATA_OR, 0, 0,
-			server_config.fs_mem_drain_queue);
-	_os_object_retain_internal_inline(&fc->fc_as_os_object);
-	dispatch_set_context(ds, fc);
-	dispatch_set_finalizer_f(ds,
-			(dispatch_function_t)_os_object_release_internal);
-	dispatch_source_set_event_handler_f(ds, firehose_client_drain_mem_async);
-	fc->fc_mem_source = ds;
-
-	ds = dispatch_source_create(DISPATCH_SOURCE_TYPE_DATA_OR, 0, 0,
-			server_config.fs_io_drain_queue);
-	_os_object_retain_internal_inline(&fc->fc_as_os_object);
-	dispatch_set_context(ds, fc);
-	dispatch_set_finalizer_f(ds,
-			(dispatch_function_t)_os_object_release_internal);
-	dispatch_source_set_event_handler_f(ds, firehose_client_drain_io_async);
-	fc->fc_io_source = ds;
 
 	_dispatch_debug("FIREHOSE_REGISTER (unique_pid: 0x%llx)", unique_pid);
 	fc->fc_recvp = comm_recvp;
@@ -672,12 +801,6 @@
 {
 	_dispatch_debug("Cleaning up client info for unique_pid 0x%llx",
 			firehose_client_get_unique_pid(fc, NULL));
-
-	dispatch_release(fc->fc_io_source);
-	fc->fc_io_source = NULL;
-
-	dispatch_release(fc->fc_mem_source);
-	fc->fc_mem_source = NULL;
 }
 
 uint64_t
@@ -722,6 +845,12 @@
 	return os_atomic_xchg2o(fc, fc_ctxt, ctxt, relaxed);
 }
 
+void
+firehose_client_initiate_quarantine(firehose_client_t fc)
+{
+	fc->fc_quarantined = true;
+}
+
 #pragma mark -
 #pragma mark firehose server
 
@@ -750,22 +879,24 @@
 firehose_server_init(mach_port_t comm_port, firehose_handler_t handler)
 {
 	struct firehose_server_s *fs = &server_config;
-	dispatch_queue_attr_t attr;
+	dispatch_queue_attr_t attr = DISPATCH_QUEUE_SERIAL_WITH_AUTORELEASE_POOL;
+	dispatch_queue_attr_t attr_ui;
 	dispatch_mach_t dm;
+	dispatch_source_t ds;
 
 	// just reference the string so that it's captured
 	(void)os_atomic_load(&__libfirehose_serverVersionString[0], relaxed);
 
-	attr = dispatch_queue_attr_make_with_qos_class(DISPATCH_QUEUE_SERIAL,
+	attr_ui = dispatch_queue_attr_make_with_qos_class(attr,
 			QOS_CLASS_USER_INITIATED, 0);
 	fs->fs_ipc_queue = dispatch_queue_create_with_target(
-			"com.apple.firehose.ipc", attr, NULL);
+			"com.apple.firehose.ipc", attr_ui, NULL);
 	fs->fs_snapshot_gate_queue = dispatch_queue_create_with_target(
-			"com.apple.firehose.snapshot-gate", DISPATCH_QUEUE_SERIAL, NULL);
+			"com.apple.firehose.snapshot-gate", attr, NULL);
 	fs->fs_io_drain_queue = dispatch_queue_create_with_target(
-			"com.apple.firehose.drain-io", DISPATCH_QUEUE_SERIAL, NULL);
+			"com.apple.firehose.drain-io", attr, NULL);
 	fs->fs_mem_drain_queue = dispatch_queue_create_with_target(
-			"com.apple.firehose.drain-mem", DISPATCH_QUEUE_SERIAL, NULL);
+			"com.apple.firehose.drain-mem", attr, NULL);
 
 	dm = dispatch_mach_create_f("com.apple.firehose.listener",
 			fs->fs_ipc_queue, NULL, firehose_server_handle_mach_event);
@@ -773,6 +904,15 @@
 	fs->fs_mach_channel = dm;
 	fs->fs_handler = _Block_copy(handler);
 	firehose_kernel_client_create();
+
+	for (size_t i = 0; i < countof(fs->fs_sources); i++) {
+		ds = dispatch_source_create(DISPATCH_SOURCE_TYPE_DATA_OR, 0, 0,
+				fs_idx_is_for_io(i) ? server_config.fs_io_drain_queue :
+				server_config.fs_mem_drain_queue);
+		dispatch_set_context(ds, &fs->fs_queues[i]);
+		dispatch_source_set_event_handler_f(ds, firehose_client_drain);
+		fs->fs_sources[i] = ds;
+	}
 }
 
 void
@@ -816,24 +956,23 @@
 	}
 	dispatch_mach_connect(fs->fs_mach_channel, fs->fs_bootstrap_port,
 			MACH_PORT_NULL, NULL);
-}
-
-OS_NOINLINE
-static void
-_firehose_server_cancel(void *ctxt OS_UNUSED)
-{
-	firehose_client_t fc;
-	TAILQ_FOREACH(fc, &server_config.fs_clients, fc_entry) {
-		dispatch_mach_cancel(fc->fc_mach_channel);
+	for (size_t i = 0; i < countof(fs->fs_sources); i++) {
+		dispatch_activate(fs->fs_sources[i]);
 	}
 }
 
 void
 firehose_server_cancel(void)
 {
+	firehose_client_t fc;
+
 	dispatch_mach_cancel(server_config.fs_mach_channel);
-	dispatch_async_f(server_config.fs_io_drain_queue, NULL,
-			_firehose_server_cancel);
+
+	fs_clients_lock();
+	TAILQ_FOREACH(fc, &server_config.fs_clients, fc_entry) {
+		dispatch_mach_cancel(fc->fc_mach_channel);
+	}
+	fs_clients_unlock();
 }
 
 dispatch_queue_t
@@ -854,6 +993,37 @@
 	return dq;
 }
 
+void
+firehose_server_quarantined_suspend(firehose_server_queue_t which)
+{
+	switch (which) {
+	case FIREHOSE_SERVER_QUEUE_IO:
+		dispatch_suspend(fs_source(true, true));
+		break;
+	case FIREHOSE_SERVER_QUEUE_MEMORY:
+		dispatch_suspend(fs_source(true, false));
+		break;
+	default:
+		DISPATCH_INTERNAL_CRASH(which, "Invalid firehose server queue type");
+	}
+}
+
+void
+firehose_server_quarantined_resume(firehose_server_queue_t which)
+{
+	switch (which) {
+	case FIREHOSE_SERVER_QUEUE_IO:
+		dispatch_resume(fs_source(true, true));
+		break;
+	case FIREHOSE_SERVER_QUEUE_MEMORY:
+		dispatch_resume(fs_source(true, false));
+		break;
+	default:
+		DISPATCH_INTERNAL_CRASH(which, "Invalid firehose server queue type");
+	}
+}
+
+
 #pragma mark -
 #pragma mark firehose snapshot and peeking
 
@@ -966,73 +1136,35 @@
 }
 
 static void
-firehose_snapshot_start(void *ctxt)
+firehose_snapshot_tickle_clients(firehose_snapshot_t fs, bool for_io)
 {
-	firehose_snapshot_t snapshot = ctxt;
-	firehose_client_t fci;
+	firehose_client_t fc;
 	long n = 0;
 
-	// 0. we need to be on the IO queue so that client connection and/or death
-	//    cannot happen concurrently
-	dispatch_assert_queue(server_config.fs_io_drain_queue);
-	server_config.fs_snapshot = snapshot;
-
-	// 1. mark all the clients participating in the current snapshot
-	//    and enter the group for each bit set
-	TAILQ_FOREACH(fci, &server_config.fs_clients, fc_entry) {
-		if (!fci->fc_pid) {
+	fs_clients_lock();
+	TAILQ_FOREACH(fc, &server_config.fs_clients, fc_entry) {
+		if (slowpath(fc->fc_memory_corrupted)) {
+			continue;
+		}
+		if (!fc->fc_pid) {
 #if TARGET_OS_SIMULATOR
 			continue;
 #endif
-		}
-		if (slowpath(fci->fc_memory_corrupted)) {
+		} else if (!firehose_client_wakeup(fc, 0, for_io)) {
 			continue;
 		}
-		fci->fc_needs_io_snapshot = true;
-		fci->fc_needs_mem_snapshot = true;
-		n += 2;
+		n++;
+		if (for_io) {
+			fc->fc_needs_io_snapshot = true;
+		} else {
+			fc->fc_needs_mem_snapshot = true;
+		}
 	}
-	if (n) {
-		// cheating: equivalent to dispatch_group_enter() n times
-		// without the acquire barriers that we don't need
-		os_atomic_add2o(snapshot->fs_group, dg_value, n, relaxed);
-	}
+	fs_clients_unlock();
 
-	dispatch_async(server_config.fs_mem_drain_queue, ^{
-		// 2. start the fs_mem_snapshot, this is what triggers the snapshot
-		//    logic from _drain() or handle_death()
-		server_config.fs_mem_snapshot_started = true;
-		snapshot->handler(NULL, FIREHOSE_SNAPSHOT_EVENT_MEM_START, NULL);
-
-		dispatch_async(server_config.fs_io_drain_queue, ^{
-			firehose_client_t fcj;
-
-			// 3. start the fs_io_snapshot, this is what triggers the snapshot
-			//    logic from _drain() or handle_death()
-			//    29868879: must always happen after the memory snapshot started
-			server_config.fs_io_snapshot_started = true;
-			snapshot->handler(NULL, FIREHOSE_SNAPSHOT_EVENT_IO_START, NULL);
-
-			// match group_enter from firehose_snapshot() after MEM+IO_START
-			dispatch_group_leave(snapshot->fs_group);
-
-			// 3. tickle all the clients. the list of clients may have changed
-			//    since step 1, but worry not - new clients don't have
-			//    fc_needs_*_snapshot set so drain is harmless; clients that
-			//    were removed from the list have already left the group
-			//    (see firehose_client_finalize())
-			TAILQ_FOREACH(fcj, &server_config.fs_clients, fc_entry) {
-				if (!fcj->fc_pid) {
-#if !TARGET_OS_SIMULATOR
-					firehose_client_kernel_source_handle_event(fcj);
-#endif
-				} else {
-					dispatch_source_merge_data(fcj->fc_io_source, 1);
-					dispatch_source_merge_data(fcj->fc_mem_source, 1);
-				}
-			}
-		});
-	});
+	// cheating: equivalent to dispatch_group_enter() n times
+	// without the acquire barriers that we don't need
+	if (n) os_atomic_add2o(fs->fs_group, dg_value, n, relaxed);
 }
 
 static void
@@ -1042,8 +1174,6 @@
 
 	fs->handler(NULL, FIREHOSE_SNAPSHOT_EVENT_COMPLETE, NULL);
 	server_config.fs_snapshot = NULL;
-	server_config.fs_mem_snapshot_started = false;
-	server_config.fs_io_snapshot_started = false;
 
 	dispatch_release(fs->fs_group);
 	Block_release(fs->handler);
@@ -1056,10 +1186,37 @@
 static void
 firehose_snapshot_gate(void *ctxt)
 {
+	firehose_snapshot_t fs = ctxt;
+
 	// prevent other snapshots from running until done
+
 	dispatch_suspend(server_config.fs_snapshot_gate_queue);
-	dispatch_async_f(server_config.fs_io_drain_queue, ctxt,
-			firehose_snapshot_start);
+
+	server_config.fs_snapshot = fs;
+	dispatch_group_async(fs->fs_group, server_config.fs_mem_drain_queue, ^{
+		// start the fs_mem_snapshot, this is what triggers the snapshot
+		// logic from _drain() or handle_death()
+		fs->handler(NULL, FIREHOSE_SNAPSHOT_EVENT_MEM_START, NULL);
+		firehose_snapshot_tickle_clients(fs, false);
+
+		dispatch_group_async(fs->fs_group, server_config.fs_io_drain_queue, ^{
+			// start the fs_io_snapshot, this is what triggers the snapshot
+			// logic from _drain() or handle_death()
+			// 29868879: must always happen after the memory snapshot started
+			fs->handler(NULL, FIREHOSE_SNAPSHOT_EVENT_IO_START, NULL);
+			firehose_snapshot_tickle_clients(fs, true);
+
+#if !TARGET_OS_SIMULATOR
+			if (server_config.fs_kernel_client) {
+				firehose_client_kernel_source_handle_event(
+						server_config.fs_kernel_client);
+			}
+#endif
+		});
+	});
+
+	dispatch_group_notify_f(fs->fs_group, server_config.fs_io_drain_queue,
+			fs, firehose_snapshot_finish);
 }
 
 void
@@ -1070,12 +1227,6 @@
 	snapshot->handler = Block_copy(handler);
 	snapshot->fs_group = dispatch_group_create();
 
-	// keep the group entered until IO_START and MEM_START have been sent
-	// See firehose_snapshot_start()
-	dispatch_group_enter(snapshot->fs_group);
-	dispatch_group_notify_f(snapshot->fs_group, server_config.fs_io_drain_queue,
-			snapshot, firehose_snapshot_finish);
-
 	dispatch_async_f(server_config.fs_snapshot_gate_queue, snapshot,
 			firehose_snapshot_gate);
 }
@@ -1166,15 +1317,16 @@
 		if (expects_notifs && !fc->fc_use_notifs) {
 			fc->fc_use_notifs = true;
 		}
-		firehose_client_push_async_merge(fc, pp, for_io);
+		firehose_client_wakeup(fc, pp, for_io);
 	}
 	return KERN_SUCCESS;
 }
 
 kern_return_t
-firehose_server_push(mach_port_t server_port OS_UNUSED,
+firehose_server_push_and_wait(mach_port_t server_port OS_UNUSED,
 		mach_port_t reply_port, qos_class_t qos, boolean_t for_io,
-		firehose_push_reply_t *push_reply OS_UNUSED)
+		firehose_push_reply_t *push_reply OS_UNUSED,
+		boolean_t *quarantinedOut OS_UNUSED)
 {
 	firehose_client_t fc = cur_client_info;
 	dispatch_block_flags_t flags = DISPATCH_BLOCK_ENFORCE_QOS_CLASS;
@@ -1196,7 +1348,7 @@
 	}
 
 	block = dispatch_block_create_with_qos_class(flags, qos, 0, ^{
-		firehose_client_drain(fc, reply_port,
+		firehose_client_drain_one(fc, reply_port,
 				for_io ? FIREHOSE_DRAIN_FOR_IO : 0);
 	});
 	dispatch_async(q, block);
diff --git a/src/firehose/firehose_server_internal.h b/src/firehose/firehose_server_internal.h
index d805167..13f52b8 100644
--- a/src/firehose/firehose_server_internal.h
+++ b/src/firehose/firehose_server_internal.h
@@ -36,6 +36,7 @@
 		struct _os_object_s fc_as_os_object;
 	};
 	TAILQ_ENTRY(firehose_client_s) fc_entry;
+	struct firehose_client_s *volatile fc_next[2];
 
 	firehose_buffer_t	fc_buffer;
 	uint64_t volatile	fc_mem_sent_flushed_pos;
@@ -43,14 +44,27 @@
 	uint64_t volatile	fc_io_sent_flushed_pos;
 	uint64_t volatile	fc_io_flushed_pos;
 
+#define FC_STATE_ENQUEUED(for_io)      (0x0001u << (for_io))
+#define FC_STATE_MEM_ENQUEUED           0x0001
+#define FC_STATE_IO_ENQUEUED            0x0002
+
+#define FC_STATE_CANCELING(for_io)     (0x0010u << (for_io))
+#define FC_STATE_MEM_CANCELING          0x0010
+#define FC_STATE_IO_CANCELING           0x0020
+
+#define FC_STATE_CANCELED(for_io)      (0x0100u << (for_io))
+#define FC_STATE_MEM_CANCELED           0x0100
+#define FC_STATE_IO_CANCELED            0x0200
+#define FC_STATE_CANCELED_MASK          0x0300
+
+	uintptr_t volatile	fc_state;
+
 	void *volatile		fc_ctxt;
 
 	union {
 		dispatch_mach_t	fc_mach_channel;
 		dispatch_source_t fc_kernel_source;
 	};
-	dispatch_source_t	fc_io_source;
-	dispatch_source_t	fc_mem_source;
 	mach_port_t			fc_recvp;
 	mach_port_t			fc_sendp;
 	os_unfair_lock      fc_lock;
@@ -61,6 +75,7 @@
 	bool				fc_memory_corrupted;
 	bool				fc_needs_io_snapshot;
 	bool				fc_needs_mem_snapshot;
+	bool				fc_quarantined;
 };
 
 void
diff --git a/src/init.c b/src/init.c
index 22a61e3..dea5e87 100644
--- a/src/init.c
+++ b/src/init.c
@@ -21,6 +21,8 @@
 // Contains exported global data and initialization & other routines that must
 // only exist once in the shared library even when resolvers are used.
 
+// NOTE: this file must not contain any atomic operations
+
 #include "internal.h"
 
 #if HAVE_MACH
@@ -146,10 +148,6 @@
 #if DISPATCH_USE_KEVENT_WORKQUEUE && DISPATCH_USE_MGR_THREAD
 int _dispatch_kevent_workqueue_enabled;
 #endif
-#if DISPATCH_USE_EVFILT_MACHPORT_DIRECT && \
-		DISPATCH_EVFILT_MACHPORT_PORTSET_FALLBACK
-int _dispatch_evfilt_machport_direct_enabled;
-#endif
 
 DISPATCH_HW_CONFIG();
 uint8_t _dispatch_unsafe_fork;
@@ -173,33 +171,6 @@
 	return _dispatch_child_of_unsafe_fork;
 }
 
-DISPATCH_NOINLINE
-void
-_dispatch_fork_becomes_unsafe_slow(void)
-{
-	uint8_t value = os_atomic_or(&_dispatch_unsafe_fork,
-			_DISPATCH_UNSAFE_FORK_MULTITHREADED, relaxed);
-	if (value & _DISPATCH_UNSAFE_FORK_PROHIBIT) {
-		DISPATCH_CLIENT_CRASH(0, "Transition to multithreaded is prohibited");
-	}
-}
-
-DISPATCH_NOINLINE
-void
-_dispatch_prohibit_transition_to_multithreaded(bool prohibit)
-{
-	if (prohibit) {
-		uint8_t value = os_atomic_or(&_dispatch_unsafe_fork,
-				_DISPATCH_UNSAFE_FORK_PROHIBIT, relaxed);
-		if (value & _DISPATCH_UNSAFE_FORK_MULTITHREADED) {
-			DISPATCH_CLIENT_CRASH(0, "The executable is already multithreaded");
-		}
-	} else {
-		os_atomic_and(&_dispatch_unsafe_fork,
-				(uint8_t)~_DISPATCH_UNSAFE_FORK_PROHIBIT, relaxed);
-	}
-}
-
 const struct dispatch_queue_offsets_s dispatch_queue_offsets = {
 	.dqo_version = 6,
 	.dqo_label = offsetof(struct dispatch_queue_s, dq_label),
@@ -238,10 +209,10 @@
 	.do_targetq = &_dispatch_root_queues[
 			DISPATCH_ROOT_QUEUE_IDX_DEFAULT_QOS_OVERCOMMIT],
 #endif
-	.dq_state = DISPATCH_QUEUE_STATE_INIT_VALUE(1),
+	.dq_state = DISPATCH_QUEUE_STATE_INIT_VALUE(1) |
+			DISPATCH_QUEUE_ROLE_BASE_ANON,
 	.dq_label = "com.apple.main-thread",
 	.dq_atomic_flags = DQF_THREAD_BOUND | DQF_CANNOT_TRYSYNC | DQF_WIDTH(1),
-	.dq_wlh = DISPATCH_WLH_GLOBAL, // TODO: main thread wlh
 	.dq_serialnum = 1,
 };
 
@@ -426,6 +397,7 @@
 	.do_debug = dispatch_queue_debug,
 );
 
+
 DISPATCH_VTABLE_SUBCLASS_INSTANCE(queue_main, queue,
 	.do_type = DISPATCH_QUEUE_SERIAL_TYPE,
 	.do_kind = "main-queue",
@@ -449,7 +421,7 @@
 DISPATCH_VTABLE_SUBCLASS_INSTANCE(queue_mgr, queue,
 	.do_type = DISPATCH_QUEUE_MGR_TYPE,
 	.do_kind = "mgr-queue",
-	.do_push = _dispatch_queue_push,
+	.do_push = _dispatch_mgr_queue_push,
 	.do_invoke = _dispatch_mgr_thread,
 	.do_wakeup = _dispatch_mgr_queue_wakeup,
 	.do_debug = dispatch_queue_debug,
@@ -514,6 +486,7 @@
 	.do_kind = "data",
 	.do_dispose = _dispatch_data_dispose,
 	.do_debug = _dispatch_data_debug,
+	.do_set_targetq = (void*)_dispatch_data_set_target_queue,
 );
 #endif
 
@@ -552,6 +525,41 @@
 }
 
 #pragma mark -
+#pragma mark dispatch_data globals
+
+const dispatch_block_t _dispatch_data_destructor_free = ^{
+	DISPATCH_INTERNAL_CRASH(0, "free destructor called");
+};
+
+const dispatch_block_t _dispatch_data_destructor_none = ^{
+	DISPATCH_INTERNAL_CRASH(0, "none destructor called");
+};
+
+#if !HAVE_MACH
+const dispatch_block_t _dispatch_data_destructor_munmap = ^{
+	DISPATCH_INTERNAL_CRASH(0, "munmap destructor called");
+};
+#else
+// _dispatch_data_destructor_munmap is a linker alias to the following
+const dispatch_block_t _dispatch_data_destructor_vm_deallocate = ^{
+	DISPATCH_INTERNAL_CRASH(0, "vmdeallocate destructor called");
+};
+#endif
+
+const dispatch_block_t _dispatch_data_destructor_inline = ^{
+	DISPATCH_INTERNAL_CRASH(0, "inline destructor called");
+};
+
+struct dispatch_data_s _dispatch_data_empty = {
+#if DISPATCH_DATA_IS_BRIDGED_TO_NSDATA
+	.do_vtable = DISPATCH_DATA_EMPTY_CLASS,
+#else
+	DISPATCH_GLOBAL_OBJECT_HEADER(data),
+	.do_next = DISPATCH_OBJECT_LISTLESS,
+#endif
+};
+
+#pragma mark -
 #pragma mark dispatch_bug
 
 static char _dispatch_build[16];
@@ -1147,16 +1155,17 @@
 	}
 }
 
-void*
-_dispatch_last_resort_autorelease_pool_push(void)
+void
+_dispatch_last_resort_autorelease_pool_push(dispatch_invoke_context_t dic)
 {
-	return _dispatch_autorelease_pool_push();
+	dic->dic_autorelease_pool = _dispatch_autorelease_pool_push();
 }
 
 void
-_dispatch_last_resort_autorelease_pool_pop(void *pool)
+_dispatch_last_resort_autorelease_pool_pop(dispatch_invoke_context_t dic)
 {
-	_dispatch_autorelease_pool_pop(pool);
+	_dispatch_autorelease_pool_pop(dic->dic_autorelease_pool);
+	dic->dic_autorelease_pool = NULL;
 }
 
 #endif // DISPATCH_COCOA_COMPAT
@@ -1199,22 +1208,16 @@
 _dispatch_mach_notify_port_destroyed(mach_port_t notify DISPATCH_UNUSED,
 		mach_port_t name)
 {
-	kern_return_t kr;
-	// this function should never be called
-	(void)dispatch_assume_zero(name);
-	kr = mach_port_mod_refs(mach_task_self(), name, MACH_PORT_RIGHT_RECEIVE,-1);
-	DISPATCH_VERIFY_MIG(kr);
-	(void)dispatch_assume_zero(kr);
-	return KERN_SUCCESS;
+	DISPATCH_INTERNAL_CRASH(name, "unexpected receipt of port-destroyed");
+	return KERN_FAILURE;
 }
 
 kern_return_t
-_dispatch_mach_notify_no_senders(mach_port_t notify,
-		mach_port_mscount_t mscnt DISPATCH_UNUSED)
+_dispatch_mach_notify_no_senders(mach_port_t notify DISPATCH_UNUSED,
+		mach_port_mscount_t mscnt)
 {
-	// this function should never be called
-	(void)dispatch_assume_zero(notify);
-	return KERN_SUCCESS;
+	DISPATCH_INTERNAL_CRASH(mscnt, "unexpected receipt of no-more-senders");
+	return KERN_FAILURE;
 }
 
 kern_return_t
diff --git a/src/inline_internal.h b/src/inline_internal.h
index 53548ed..0ed9e51 100644
--- a/src/inline_internal.h
+++ b/src/inline_internal.h
@@ -101,6 +101,13 @@
 
 DISPATCH_ALWAYS_INLINE
 static inline bool
+_dispatch_object_is_queue(dispatch_object_t dou)
+{
+	return _dispatch_object_has_vtable(dou) && dx_vtable(dou._do)->do_push;
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline bool
 _dispatch_object_is_continuation(dispatch_object_t dou)
 {
 	if (_dispatch_object_has_vtable(dou)) {
@@ -167,9 +174,9 @@
 
 DISPATCH_ALWAYS_INLINE
 static inline _os_object_t
-_os_object_retain_internal_inline(_os_object_t obj)
+_os_object_retain_internal_n_inline(_os_object_t obj, int n)
 {
-	int ref_cnt = _os_object_refcnt_inc(obj);
+	int ref_cnt = _os_object_refcnt_add(obj, n);
 	if (unlikely(ref_cnt <= 0)) {
 		_OS_OBJECT_CLIENT_CRASH("Resurrection of an object");
 	}
@@ -178,23 +185,20 @@
 
 DISPATCH_ALWAYS_INLINE
 static inline void
-_os_object_release_internal_inline_no_dispose(_os_object_t obj)
+_os_object_release_internal_n_no_dispose_inline(_os_object_t obj, int n)
 {
-	int ref_cnt = _os_object_refcnt_dec(obj);
+	int ref_cnt = _os_object_refcnt_sub(obj, n);
 	if (likely(ref_cnt >= 0)) {
 		return;
 	}
-	if (ref_cnt == 0) {
-		_OS_OBJECT_CLIENT_CRASH("Unexpected release of an object");
-	}
 	_OS_OBJECT_CLIENT_CRASH("Over-release of an object");
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline void
-_os_object_release_internal_inline(_os_object_t obj)
+_os_object_release_internal_n_inline(_os_object_t obj, int n)
 {
-	int ref_cnt = _os_object_refcnt_dec(obj);
+	int ref_cnt = _os_object_refcnt_sub(obj, n);
 	if (likely(ref_cnt >= 0)) {
 		return;
 	}
@@ -216,14 +220,56 @@
 static inline void
 _dispatch_retain(dispatch_object_t dou)
 {
-	(void)_os_object_retain_internal_inline(dou._os_obj);
+	(void)_os_object_retain_internal_n_inline(dou._os_obj, 1);
+}
+
+DISPATCH_ALWAYS_INLINE_NDEBUG
+static inline void
+_dispatch_retain_2(dispatch_object_t dou)
+{
+	(void)_os_object_retain_internal_n_inline(dou._os_obj, 2);
+}
+
+DISPATCH_ALWAYS_INLINE_NDEBUG
+static inline void
+_dispatch_retain_n(dispatch_object_t dou, int n)
+{
+	(void)_os_object_retain_internal_n_inline(dou._os_obj, n);
 }
 
 DISPATCH_ALWAYS_INLINE_NDEBUG
 static inline void
 _dispatch_release(dispatch_object_t dou)
 {
-	_os_object_release_internal_inline(dou._os_obj);
+	_os_object_release_internal_n_inline(dou._os_obj, 1);
+}
+
+DISPATCH_ALWAYS_INLINE_NDEBUG
+static inline void
+_dispatch_release_2(dispatch_object_t dou)
+{
+	_os_object_release_internal_n_inline(dou._os_obj, 2);
+}
+
+DISPATCH_ALWAYS_INLINE_NDEBUG
+static inline void
+_dispatch_release_n(dispatch_object_t dou, int n)
+{
+	_os_object_release_internal_n_inline(dou._os_obj, n);
+}
+
+DISPATCH_ALWAYS_INLINE_NDEBUG
+static inline void
+_dispatch_release_no_dispose(dispatch_object_t dou)
+{
+	_os_object_release_internal_n_no_dispose_inline(dou._os_obj, 1);
+}
+
+DISPATCH_ALWAYS_INLINE_NDEBUG
+static inline void
+_dispatch_release_2_no_dispose(dispatch_object_t dou)
+{
+	_os_object_release_internal_n_no_dispose_inline(dou._os_obj, 2);
 }
 
 DISPATCH_ALWAYS_INLINE_NDEBUG
@@ -233,6 +279,42 @@
 	_os_object_release_internal(dou._os_obj);
 }
 
+DISPATCH_ALWAYS_INLINE_NDEBUG
+static inline void
+_dispatch_release_2_tailcall(dispatch_object_t dou)
+{
+	_os_object_release_internal_n(dou._os_obj, 2);
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_queue_retain_storage(dispatch_queue_t dq)
+{
+	int ref_cnt = os_atomic_inc2o(dq, dq_sref_cnt, relaxed);
+	if (unlikely(ref_cnt <= 0)) {
+		_OS_OBJECT_CLIENT_CRASH("Resurrection of an object");
+	}
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_queue_release_storage(dispatch_queue_t dq)
+{
+	// this refcount only delays the _dispatch_object_dealloc() and there's no
+	// need for visibility wrt to the allocation, the internal refcount already
+	// gives us that, and the object becomes immutable after the last internal
+	// refcount release.
+	int ref_cnt = os_atomic_dec2o(dq, dq_sref_cnt, relaxed);
+	if (unlikely(ref_cnt >= 0)) {
+		return;
+	}
+	if (unlikely(ref_cnt < -1)) {
+		_OS_OBJECT_CLIENT_CRASH("Over-release of an object");
+	}
+	dq->dq_state = 0xdead000000000000;
+	_dispatch_object_dealloc(dq);
+}
+
 DISPATCH_ALWAYS_INLINE DISPATCH_NONNULL_ALL
 static inline void
 _dispatch_object_set_target_queue_inline(dispatch_object_t dou,
@@ -574,6 +656,113 @@
 	return _dispatch_queue_atomic_flags(dq) & DQF_LEGACY;
 }
 
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_wlh_retain(dispatch_wlh_t wlh)
+{
+	if (wlh && wlh != DISPATCH_WLH_ANON) {
+		_dispatch_queue_retain_storage((dispatch_queue_t)wlh);
+	}
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_wlh_release(dispatch_wlh_t wlh)
+{
+	if (wlh && wlh != DISPATCH_WLH_ANON) {
+		_dispatch_queue_release_storage((dispatch_queue_t)wlh);
+	}
+}
+
+#define DISPATCH_WLH_STORAGE_REF 1ul
+
+DISPATCH_ALWAYS_INLINE DISPATCH_PURE
+static inline dispatch_wlh_t
+_dispatch_get_wlh(void)
+{
+	return _dispatch_thread_getspecific(dispatch_wlh_key);
+}
+
+DISPATCH_ALWAYS_INLINE DISPATCH_PURE
+static inline dispatch_wlh_t
+_dispatch_get_wlh_reference(void)
+{
+	dispatch_wlh_t wlh = _dispatch_thread_getspecific(dispatch_wlh_key);
+	if (wlh != DISPATCH_WLH_ANON) {
+		wlh = (dispatch_wlh_t)((uintptr_t)wlh & ~DISPATCH_WLH_STORAGE_REF);
+	}
+	return wlh;
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline bool
+_dispatch_adopt_wlh_anon_recurse(void)
+{
+	dispatch_wlh_t cur_wlh = _dispatch_get_wlh_reference();
+	if (cur_wlh == DISPATCH_WLH_ANON) return false;
+	_dispatch_debug("wlh[anon]: set current (releasing %p)", cur_wlh);
+	_dispatch_wlh_release(cur_wlh);
+	_dispatch_thread_setspecific(dispatch_wlh_key, (void *)DISPATCH_WLH_ANON);
+	return true;
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_adopt_wlh_anon(void)
+{
+	if (unlikely(!_dispatch_adopt_wlh_anon_recurse())) {
+		DISPATCH_INTERNAL_CRASH(0, "Lingering DISPATCH_WLH_ANON");
+	}
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_adopt_wlh(dispatch_wlh_t wlh)
+{
+	dispatch_wlh_t cur_wlh = _dispatch_get_wlh_reference();
+	_dispatch_debug("wlh[%p]: adopt current (releasing %p)", wlh, cur_wlh);
+	if (cur_wlh == DISPATCH_WLH_ANON) {
+		DISPATCH_INTERNAL_CRASH(0, "Lingering DISPATCH_WLH_ANON");
+	}
+	if (cur_wlh != wlh) {
+		dispatch_assert(wlh);
+		_dispatch_wlh_release(cur_wlh);
+		_dispatch_wlh_retain(wlh);
+	}
+	_dispatch_thread_setspecific(dispatch_wlh_key, (void *)wlh);
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_preserve_wlh_storage_reference(dispatch_wlh_t wlh)
+{
+	dispatch_assert(wlh != DISPATCH_WLH_ANON);
+	dispatch_assert(wlh == _dispatch_get_wlh());
+	_dispatch_thread_setspecific(dispatch_wlh_key,
+			(void *)((uintptr_t)wlh | DISPATCH_WLH_STORAGE_REF));
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_reset_wlh(void)
+{
+	dispatch_assert(_dispatch_get_wlh() == DISPATCH_WLH_ANON);
+	_dispatch_debug("wlh[anon]: clear current");
+	_dispatch_thread_setspecific(dispatch_wlh_key, NULL);
+	_dispatch_clear_return_to_kernel();
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline bool
+_dispatch_wlh_should_poll_unote(dispatch_unote_t du)
+{
+	if (likely(_dispatch_needs_to_return_to_kernel())) {
+		dispatch_wlh_t wlh = _dispatch_get_wlh();
+		return wlh != DISPATCH_WLH_ANON && du._du->du_wlh == wlh;
+	}
+	return false;
+}
+
 #endif // DISPATCH_PURE_C
 #ifndef __cplusplus
 
@@ -678,16 +867,67 @@
 
 DISPATCH_ALWAYS_INLINE
 static inline bool
+_dq_state_is_base_wlh(uint64_t dq_state)
+{
+	return dq_state & DISPATCH_QUEUE_ROLE_BASE_WLH;
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline bool
+_dq_state_is_base_anon(uint64_t dq_state)
+{
+	return dq_state & DISPATCH_QUEUE_ROLE_BASE_ANON;
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline bool
+_dq_state_is_inner_queue(uint64_t dq_state)
+{
+	return (dq_state & DISPATCH_QUEUE_ROLE_MASK) == DISPATCH_QUEUE_ROLE_INNER;
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline bool
 _dq_state_is_enqueued(uint64_t dq_state)
 {
+	return dq_state & (DISPATCH_QUEUE_ENQUEUED|DISPATCH_QUEUE_ENQUEUED_ON_MGR);
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline bool
+_dq_state_is_enqueued_on_target(uint64_t dq_state)
+{
 	return dq_state & DISPATCH_QUEUE_ENQUEUED;
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline bool
+_dq_state_is_enqueued_on_manager(uint64_t dq_state)
+{
+	return dq_state & DISPATCH_QUEUE_ENQUEUED_ON_MGR;
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline bool
+_dq_state_in_sync_transfer(uint64_t dq_state)
+{
+	return dq_state & DISPATCH_QUEUE_SYNC_TRANSFER;
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline bool
 _dq_state_received_override(uint64_t dq_state)
 {
-	return dq_state & DISPATCH_QUEUE_RECEIVED_OVERRIDE;
+	return _dq_state_is_base_anon(dq_state) &&
+			(dq_state & DISPATCH_QUEUE_RECEIVED_OVERRIDE);
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline bool
+_dq_state_received_sync_wait(uint64_t dq_state)
+{
+	return _dq_state_is_base_wlh(dq_state) &&
+			(dq_state & DISPATCH_QUEUE_RECEIVED_SYNC_WAIT);
 }
 
 DISPATCH_ALWAYS_INLINE
@@ -712,13 +952,16 @@
 	uint64_t qos_bits = _dq_state_from_qos(qos);
 	if ((dq_state & DISPATCH_QUEUE_MAX_QOS_MASK) < qos_bits) {
 		dq_state &= ~DISPATCH_QUEUE_MAX_QOS_MASK;
-		dq_state |= qos_bits | DISPATCH_QUEUE_RECEIVED_OVERRIDE;
+		dq_state |= qos_bits;
+		if (unlikely(_dq_state_is_base_anon(dq_state))) {
+			dq_state |= DISPATCH_QUEUE_RECEIVED_OVERRIDE;
+		}
 	}
 	return dq_state;
 }
 
 DISPATCH_ALWAYS_INLINE
-static inline dispatch_lock_owner
+static inline dispatch_tid
 _dq_state_drain_owner(uint64_t dq_state)
 {
 	return _dispatch_lock_owner((dispatch_lock)dq_state);
@@ -728,33 +971,23 @@
 
 DISPATCH_ALWAYS_INLINE
 static inline bool
-_dq_state_drain_pended(uint64_t dq_state)
+_dq_state_drain_locked_by(uint64_t dq_state, dispatch_tid tid)
 {
-	return (dq_state & DISPATCH_QUEUE_DRAIN_PENDED);
+	return _dispatch_lock_is_locked_by((dispatch_lock)dq_state, tid);
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline bool
-_dq_state_drain_locked_by(uint64_t dq_state, uint32_t owner)
+_dq_state_drain_locked_by_self(uint64_t dq_state)
 {
-	if (_dq_state_drain_pended(dq_state)) {
-		return false;
-	}
-	return _dq_state_drain_owner(dq_state) == owner;
+	return _dispatch_lock_is_locked_by_self((dispatch_lock)dq_state);
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline bool
 _dq_state_drain_locked(uint64_t dq_state)
 {
-	return (dq_state & DISPATCH_QUEUE_DRAIN_OWNER_MASK) != 0;
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline bool
-_dq_state_has_waiters(uint64_t dq_state)
-{
-	return _dispatch_lock_has_waiters((dispatch_lock)dq_state);
+	return _dispatch_lock_is_locked((dispatch_lock)dq_state);
 }
 
 DISPATCH_ALWAYS_INLINE
@@ -773,17 +1006,25 @@
 
 DISPATCH_ALWAYS_INLINE
 static inline bool
-_dq_state_should_wakeup(uint64_t dq_state)
+_dq_state_should_override(uint64_t dq_state)
 {
-	return _dq_state_is_runnable(dq_state) &&
-			!_dq_state_is_enqueued(dq_state) &&
-			!_dq_state_drain_locked(dq_state);
+	if (_dq_state_is_suspended(dq_state) ||
+			_dq_state_is_enqueued_on_manager(dq_state)) {
+		return false;
+	}
+	if (_dq_state_is_enqueued_on_target(dq_state)) {
+		return true;
+	}
+	if (_dq_state_is_base_wlh(dq_state)) {
+		return false;
+	}
+	return _dq_state_drain_locked(dq_state);
 }
 
+
 #endif // __cplusplus
 #pragma mark -
 #pragma mark dispatch_queue_t state machine
-#ifndef __cplusplus
 
 static inline pthread_priority_t _dispatch_get_priority(void);
 static inline dispatch_priority_t _dispatch_get_basepri(void);
@@ -791,43 +1032,29 @@
 static inline void _dispatch_set_basepri_override_qos(dispatch_qos_t qos);
 static inline void _dispatch_reset_basepri(dispatch_priority_t dbp);
 static inline dispatch_priority_t _dispatch_set_basepri(dispatch_priority_t dbp);
-
 static inline bool _dispatch_queue_need_override_retain(
 		dispatch_queue_class_t dqu, dispatch_qos_t qos);
 
-DISPATCH_ALWAYS_INLINE
-static inline void
-_dispatch_queue_xref_dispose(struct dispatch_queue_s *dq)
-{
-	uint64_t dq_state = os_atomic_load2o(dq, dq_state, relaxed);
-	if (unlikely(_dq_state_is_suspended(dq_state))) {
-		long state = (long)dq_state;
-		if (sizeof(long) < sizeof(uint64_t)) state = (long)(dq_state >> 32);
-		if (unlikely(_dq_state_is_inactive(dq_state))) {
-			// Arguments for and against this assert are within 6705399
-			DISPATCH_CLIENT_CRASH(state, "Release of an inactive object");
-		}
-		DISPATCH_CLIENT_CRASH(dq_state, "Release of a suspended object");
-	}
-	os_atomic_or2o(dq, dq_atomic_flags, DQF_RELEASED, relaxed);
-}
-
-#endif
 #if DISPATCH_PURE_C
 
 // Note to later developers: ensure that any initialization changes are
 // made for statically allocated queues (i.e. _dispatch_main_q).
 static inline void
 _dispatch_queue_init(dispatch_queue_t dq, dispatch_queue_flags_t dqf,
-		uint16_t width, bool inactive)
+		uint16_t width, uint64_t initial_state_bits)
 {
 	uint64_t dq_state = DISPATCH_QUEUE_STATE_INIT_VALUE(width);
 
-	if (inactive) {
-		dq_state += DISPATCH_QUEUE_INACTIVE + DISPATCH_QUEUE_NEEDS_ACTIVATION;
-		dq_state += DLOCK_OWNER_INVALID;
-		dq->do_ref_cnt++; // rdar://8181908 see _dispatch_queue_resume
+	dispatch_assert((initial_state_bits & ~(DISPATCH_QUEUE_ROLE_MASK |
+			DISPATCH_QUEUE_INACTIVE)) == 0);
+
+	if (initial_state_bits & DISPATCH_QUEUE_INACTIVE) {
+		dq_state |= DISPATCH_QUEUE_INACTIVE + DISPATCH_QUEUE_NEEDS_ACTIVATION;
+		dq_state |= DLOCK_OWNER_MASK;
+		dq->do_ref_cnt += 2; // rdar://8181908 see _dispatch_queue_resume
 	}
+
+	dq_state |= (initial_state_bits & DISPATCH_QUEUE_ROLE_MASK);
 	dq->do_next = (struct dispatch_queue_s *)DISPATCH_OBJECT_LISTLESS;
 	dqf |= DQF_WIDTH(width);
 	os_atomic_store2o(dq, dq_atomic_flags, dqf, relaxed);
@@ -869,8 +1096,13 @@
 	return true;
 }
 
-#define _dispatch_queue_should_override_self(dq_state, qos) \
-	unlikely(qos < _dq_state_max_qos(dq_state))
+DISPATCH_ALWAYS_INLINE
+static inline bool
+_dq_state_needs_lock_override(uint64_t dq_state, dispatch_qos_t qos)
+{
+	return _dq_state_is_base_anon(dq_state) &&
+			qos < _dq_state_max_qos(dq_state);
+}
 
 DISPATCH_ALWAYS_INLINE
 static inline dispatch_qos_t
@@ -884,79 +1116,139 @@
 	return qos;
 }
 
-/* Used by:
- * - _dispatch_queue_class_invoke (normal path)
- * - _dispatch_queue_override_invoke (stealer)
- *
- * Initial state must be { sc:0, ib:0, qf:0, dl:0 }
- * Final state forces { dl:self, qf:1, d: 0 }
- *    ib:1 is forced when the width acquired is equivalent to the barrier width
- */
 DISPATCH_ALWAYS_INLINE DISPATCH_WARN_RESULT
 static inline uint64_t
 _dispatch_queue_drain_try_lock(dispatch_queue_t dq,
-		dispatch_invoke_flags_t flags, uint64_t *dq_state)
+		dispatch_invoke_flags_t flags)
 {
 	uint64_t pending_barrier_width =
 			(dq->dq_width - 1) * DISPATCH_QUEUE_WIDTH_INTERVAL;
-	uint64_t xor_owner_and_set_full_width =
-			_dispatch_tid_self() | DISPATCH_QUEUE_WIDTH_FULL_BIT;
-	uint64_t clear_enqueued_bit, old_state, new_state;
+	uint64_t set_owner_and_set_full_width =
+			_dispatch_lock_value_for_self() | DISPATCH_QUEUE_WIDTH_FULL_BIT;
+	uint64_t lock_fail_mask, old_state, new_state, dequeue_mask;
+
+	// same as !_dq_state_is_runnable()
+	lock_fail_mask  = ~(DISPATCH_QUEUE_WIDTH_FULL_BIT - 1);
+	// same as _dq_state_drain_locked()
+	lock_fail_mask |= DISPATCH_QUEUE_DRAIN_OWNER_MASK;
 
 	if (flags & DISPATCH_INVOKE_STEALING) {
-		clear_enqueued_bit = 0;
+		lock_fail_mask |= DISPATCH_QUEUE_ENQUEUED_ON_MGR;
+		dequeue_mask = 0;
+	} else if (flags & DISPATCH_INVOKE_MANAGER_DRAIN) {
+		dequeue_mask = DISPATCH_QUEUE_ENQUEUED_ON_MGR;
 	} else {
-		clear_enqueued_bit = DISPATCH_QUEUE_ENQUEUED;
+		lock_fail_mask |= DISPATCH_QUEUE_ENQUEUED_ON_MGR;
+		dequeue_mask = DISPATCH_QUEUE_ENQUEUED;
 	}
+	dispatch_assert(!(flags & DISPATCH_INVOKE_WLH));
 
 	dispatch_qos_t oq_floor = _dispatch_get_basepri_override_qos_floor();
 retry:
 	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, acquire, {
 		new_state = old_state;
-		new_state ^= clear_enqueued_bit;
-		if (likely(_dq_state_is_runnable(old_state) &&
-				!_dq_state_drain_locked(old_state))) {
-			if (_dispatch_queue_should_override_self(old_state, oq_floor)) {
+		if (likely(!(old_state & lock_fail_mask))) {
+			if (unlikely(_dq_state_needs_lock_override(old_state, oq_floor))) {
 				os_atomic_rmw_loop_give_up({
 					oq_floor = _dispatch_queue_override_self(old_state);
 					goto retry;
 				});
 			}
 			//
-			// Only keep the HAS_WAITER, MAX_QOS and ENQUEUED (if stealing) bits
+			// Only keep the HAS_WAITER, MAX_QOS and ENQUEUED bits
 			// In particular acquiring the drain lock clears the DIRTY and
-			// RECEIVED_OVERRIDE
+			// RECEIVED_OVERRIDE bits.
 			//
 			new_state &= DISPATCH_QUEUE_DRAIN_PRESERVED_BITS_MASK;
-			//
-			// For the NOWAITERS_BIT case, the thread identity
-			// has NOWAITERS_BIT set, and NOWAITERS_BIT was kept above,
-			// so the xor below flips the NOWAITERS_BIT to 0 as expected.
-			//
-			// For the non inverted WAITERS_BIT case, WAITERS_BIT is not set in
-			// the thread identity, and the xor leaves the bit alone.
-			//
-			new_state ^= xor_owner_and_set_full_width;
+			new_state |= set_owner_and_set_full_width;
 			if (_dq_state_has_pending_barrier(old_state) ||
 					old_state + pending_barrier_width <
 					DISPATCH_QUEUE_WIDTH_FULL_BIT) {
 				new_state |= DISPATCH_QUEUE_IN_BARRIER;
 			}
-		} else if (!clear_enqueued_bit) {
+		} else if (dequeue_mask) {
+			// dequeue_mask is in a register, xor yields better assembly
+			new_state ^= dequeue_mask;
+		} else {
 			os_atomic_rmw_loop_give_up(break);
 		}
 	});
 
-	if (dq_state) *dq_state = new_state;
-	if (likely(_dq_state_is_runnable(old_state) &&
-			!_dq_state_drain_locked(old_state))) {
-		new_state &= DISPATCH_QUEUE_IN_BARRIER | DISPATCH_QUEUE_WIDTH_FULL_BIT;
+	dispatch_assert((old_state & dequeue_mask) == dequeue_mask);
+	if (likely(!(old_state & lock_fail_mask))) {
+		new_state &= DISPATCH_QUEUE_IN_BARRIER | DISPATCH_QUEUE_WIDTH_FULL_BIT |
+				dequeue_mask;
 		old_state &= DISPATCH_QUEUE_WIDTH_MASK;
 		return new_state - old_state;
 	}
 	return 0;
 }
 
+DISPATCH_ALWAYS_INLINE DISPATCH_WARN_RESULT
+static inline bool
+_dispatch_queue_drain_try_lock_wlh(dispatch_queue_t dq, uint64_t *dq_state)
+{
+	uint64_t old_state, new_state;
+	uint64_t lock_bits = _dispatch_lock_value_for_self() |
+			DISPATCH_QUEUE_WIDTH_FULL_BIT | DISPATCH_QUEUE_IN_BARRIER;
+
+	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, acquire, {
+		new_state = old_state;
+		if (unlikely(_dq_state_is_suspended(old_state))) {
+			os_atomic_rmw_loop_give_up(break);
+		} else if (unlikely(_dq_state_drain_locked(old_state))) {
+			os_atomic_rmw_loop_give_up(break);
+		} else {
+			new_state &= DISPATCH_QUEUE_DRAIN_PRESERVED_BITS_MASK;
+			new_state |= lock_bits;
+		}
+	});
+	if (unlikely(!_dq_state_is_base_wlh(old_state) ||
+			!_dq_state_is_enqueued_on_target(old_state) ||
+			_dq_state_is_enqueued_on_manager(old_state))) {
+#if !__LP64__
+		old_state >>= 32;
+#endif
+		DISPATCH_INTERNAL_CRASH(old_state, "Invalid wlh state");
+	}
+
+	if (dq_state) *dq_state = new_state;
+	return !_dq_state_is_suspended(old_state) &&
+			!_dq_state_drain_locked(old_state);
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_queue_mgr_lock(dispatch_queue_t dq)
+{
+	uint64_t old_state, new_state, set_owner_and_set_full_width =
+			_dispatch_lock_value_for_self() | DISPATCH_QUEUE_SERIAL_DRAIN_OWNED;
+
+	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, acquire, {
+		new_state = old_state;
+		if (unlikely(!_dq_state_is_runnable(old_state) ||
+				_dq_state_drain_locked(old_state))) {
+			DISPATCH_INTERNAL_CRASH((uintptr_t)old_state,
+					"Locking the manager should not fail");
+		}
+		new_state &= DISPATCH_QUEUE_DRAIN_PRESERVED_BITS_MASK;
+		new_state |= set_owner_and_set_full_width;
+	});
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline bool
+_dispatch_queue_mgr_unlock(dispatch_queue_t dq)
+{
+	uint64_t old_state, new_state;
+	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, release, {
+		new_state = old_state - DISPATCH_QUEUE_SERIAL_DRAIN_OWNED;
+		new_state &= ~DISPATCH_QUEUE_DRAIN_UNLOCK_MASK;
+		new_state &= ~DISPATCH_QUEUE_MAX_QOS_MASK;
+	});
+	return _dq_state_is_dirty(old_state);
+}
+
 /* Used by _dispatch_barrier_{try,}sync
  *
  * Note, this fails if any of e:1 or dl!=0, but that allows this code to be a
@@ -972,11 +1264,18 @@
 static inline bool
 _dispatch_queue_try_acquire_barrier_sync(dispatch_queue_t dq, uint32_t tid)
 {
-	uint64_t value = DISPATCH_QUEUE_WIDTH_FULL_BIT | DISPATCH_QUEUE_IN_BARRIER;
-	value |= tid;
+	uint64_t init  = DISPATCH_QUEUE_STATE_INIT_VALUE(dq->dq_width);
+	uint64_t value = DISPATCH_QUEUE_WIDTH_FULL_BIT | DISPATCH_QUEUE_IN_BARRIER |
+			_dispatch_lock_value_from_tid(tid);
+	uint64_t old_state, new_state;
 
-	return os_atomic_cmpxchg2o(dq, dq_state,
-			DISPATCH_QUEUE_STATE_INIT_VALUE(dq->dq_width), value, acquire);
+	return os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, acquire, {
+		uint64_t role = old_state & DISPATCH_QUEUE_ROLE_MASK;
+		if (old_state != (init | role)) {
+			os_atomic_rmw_loop_give_up(break);
+		}
+		new_state = value | role;
+	});
 }
 
 /* Used by _dispatch_sync for root queues and some drain codepaths
@@ -1149,18 +1448,13 @@
 static inline bool
 _dispatch_queue_drain_try_unlock(dispatch_queue_t dq, uint64_t owned, bool done)
 {
-	uint64_t old_state = os_atomic_load2o(dq, dq_state, relaxed);
-	uint64_t new_state;
+	uint64_t old_state, new_state;
 
 	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, release, {
-		new_state = old_state - owned;
-		if (unlikely(_dq_state_is_suspended(new_state))) {
-#ifdef DLOCK_NOWAITERS_BIT
-			new_state = new_state | DISPATCH_QUEUE_DRAIN_OWNER_MASK;
-#else
-			new_state = new_state | DLOCK_OWNER_INVALID;
-#endif
-			new_state |= DISPATCH_QUEUE_DIRTY;
+		new_state  = old_state - owned;
+		new_state &= ~DISPATCH_QUEUE_DRAIN_UNLOCK_MASK;
+		if (unlikely(_dq_state_is_suspended(old_state))) {
+			new_state |= DLOCK_OWNER_MASK;
 		} else if (unlikely(_dq_state_is_dirty(old_state))) {
 			os_atomic_rmw_loop_give_up({
 				// just renew the drain lock with an acquire barrier, to see
@@ -1171,11 +1465,8 @@
 				return false;
 			});
 		} else if (likely(done)) {
-			new_state &= ~DISPATCH_QUEUE_DRAIN_UNLOCK_MASK;
-			new_state &= ~DISPATCH_QUEUE_RECEIVED_OVERRIDE;
 			new_state &= ~DISPATCH_QUEUE_MAX_QOS_MASK;
 		} else {
-			new_state = DISPATCH_QUEUE_DRAIN_UNLOCK(new_state);
 			new_state |= DISPATCH_QUEUE_DIRTY;
 		}
 	});
@@ -1187,80 +1478,6 @@
 	return true;
 }
 
-/* Used to transfer the drain lock to a next thread, because it is known
- * and that the dirty-head check isn't needed.
- *
- * This releases `owned`, clears DIRTY, and handles overrides when seen.
- */
-DISPATCH_ALWAYS_INLINE
-static inline void
-_dispatch_queue_drain_transfer_lock(dispatch_queue_t dq,
-		uint64_t owned, dispatch_object_t dou)
-{
-	uint64_t old_state, new_state;
-	mach_port_t next_owner = 0;
-	if (dou._dc->dc_flags & DISPATCH_OBJ_BARRIER_BIT) {
-		next_owner = (mach_port_t)dou._dc->dc_data;
-	}
-
-#ifdef DLOCK_NOWAITERS_BIT
-	// The NOWAITERS_BIT state must not change through the transfer. It means
-	// that if next_owner is 0 the bit must be flipped in the rmw_loop below,
-	// and if next_owner is set, then the bit must be left unchanged.
-	//
-	// - when next_owner is 0, the xor below sets NOWAITERS_BIT in next_owner,
-	//   which causes the second xor to flip the bit as expected.
-	// - if next_owner is not 0, it has the NOWAITERS_BIT set, so we have to
-	//   clear it so that the second xor leaves the NOWAITERS_BIT alone.
-	next_owner ^= DLOCK_NOWAITERS_BIT;
-#endif
-	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, release, {
-		new_state = old_state - owned;
-		// same as DISPATCH_QUEUE_DRAIN_UNLOCK
-		// but we want to be more efficient wrt the WAITERS_BIT
-		new_state &= ~DISPATCH_QUEUE_DRAIN_OWNER_MASK;
-		new_state &= ~DISPATCH_QUEUE_DRAIN_PENDED;
-		new_state &= ~DISPATCH_QUEUE_RECEIVED_OVERRIDE;
-		new_state &= ~DISPATCH_QUEUE_DIRTY;
-		new_state ^= next_owner;
-	});
-	if (_dq_state_received_override(old_state)) {
-		// Ensure that the root queue sees that this thread was overridden.
-		_dispatch_set_basepri_override_qos(_dq_state_max_qos(old_state));
-	}
-}
-
-/* Used to forcefully unlock the drain lock, bypassing the dirty bit check.
- * This usually is followed by a wakeup to re-evaluate the state machine
- * of the queue/source.
- *
- * This releases `owned`, clears DIRTY, and handles overrides when seen.
- */
-DISPATCH_ALWAYS_INLINE
-static inline uint64_t
-_dispatch_queue_drain_unlock(dispatch_queue_t dq, uint64_t owned)
-{
-	uint64_t old_state, new_state;
-
-	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, release, {
-		new_state = old_state - owned;
-		// same as DISPATCH_QUEUE_DRAIN_UNLOCK
-		// but we want to be more efficient wrt the WAITERS_BIT
-#ifdef DLOCK_NOWAITERS_BIT
-		new_state ^= DLOCK_NOWAITERS_BIT;
-#endif
-		new_state &= ~DISPATCH_QUEUE_DRAIN_OWNER_MASK;
-		new_state &= ~DISPATCH_QUEUE_DRAIN_PENDED;
-		new_state &= ~DISPATCH_QUEUE_RECEIVED_OVERRIDE;
-	});
-
-	if (_dq_state_received_override(old_state)) {
-		// Ensure that the root queue sees that this thread was overridden.
-		_dispatch_set_basepri_override_qos(_dq_state_max_qos(old_state));
-	}
-	return old_state;
-}
-
 #pragma mark -
 #pragma mark os_mpsc_queue
 
@@ -1368,7 +1585,7 @@
 static inline bool
 _dispatch_queue_sidelock_trylock(dispatch_queue_t dq, dispatch_qos_t qos)
 {
-	dispatch_lock_owner owner;
+	dispatch_tid owner;
 	if (_dispatch_unfair_lock_trylock(&dq->dq_sidelock, &owner)) {
 		return true;
 	}
@@ -1499,11 +1716,11 @@
 	// queue when invoked by _dispatch_queue_drain. <rdar://problem/6932776>
 	bool overriding = _dispatch_queue_need_override_retain(dq, qos);
 	if (unlikely(_dispatch_queue_push_update_tail(dq, tail))) {
-		if (!overriding) _dispatch_retain(dq);
+		if (!overriding) _dispatch_retain_2(dq->_as_os_obj);
 		_dispatch_queue_push_update_head(dq, tail);
-		flags = DISPATCH_WAKEUP_CONSUME | DISPATCH_WAKEUP_FLUSH;
+		flags = DISPATCH_WAKEUP_CONSUME_2 | DISPATCH_WAKEUP_MAKE_DIRTY;
 	} else if (overriding) {
-		flags = DISPATCH_WAKEUP_CONSUME | DISPATCH_WAKEUP_OVERRIDING;
+		flags = DISPATCH_WAKEUP_CONSUME_2;
 	} else {
 		return;
 	}
@@ -1511,6 +1728,14 @@
 }
 
 DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_queue_push_queue(dispatch_queue_t tq, dispatch_queue_t dq,
+		uint64_t dq_state)
+{
+	return dx_push(tq, dq, _dq_state_max_qos(dq_state));
+}
+
+DISPATCH_ALWAYS_INLINE
 static inline dispatch_priority_t
 _dispatch_root_queue_identity_assume(dispatch_queue_t assumed_rq)
 {
@@ -1521,30 +1746,6 @@
 	return old_dbp;
 }
 
-DISPATCH_ALWAYS_INLINE
-static inline bool
-_dispatch_root_queue_allows_wlh_for_queue(dispatch_queue_t rq,
-		dispatch_queue_class_t dqu)
-{
-	// This will discard:
-	// - queues already tagged with the global wlh
-	// - concurrent queues (width != 1)
-	// - non overcommit queues, which includes pthread root queues.
-	return dqu._dq->dq_wlh != DISPATCH_WLH_GLOBAL && dqu._dq->dq_width == 1 &&
-			(rq->dq_priority & DISPATCH_PRIORITY_FLAG_OVERCOMMIT);
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline dispatch_wlh_t
-_dispatch_root_queue_wlh_for_queue(dispatch_queue_t rq,
-		dispatch_queue_class_t dqu)
-{
-	if (likely(_dispatch_root_queue_allows_wlh_for_queue(rq, dqu))) {
-		return (dispatch_wlh_t)dqu._dq;
-	}
-	return DISPATCH_WLH_GLOBAL;
-}
-
 typedef dispatch_queue_wakeup_target_t
 _dispatch_queue_class_invoke_handler_t(dispatch_object_t,
 		dispatch_invoke_context_t dic, dispatch_invoke_flags_t,
@@ -1554,13 +1755,13 @@
 static inline void
 _dispatch_queue_class_invoke(dispatch_object_t dou,
 		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags,
+		dispatch_invoke_flags_t const_restrict_flags,
 		_dispatch_queue_class_invoke_handler_t invoke)
 {
 	dispatch_queue_t dq = dou._dq;
 	dispatch_queue_wakeup_target_t tq = DISPATCH_QUEUE_WAKEUP_NONE;
-	uint64_t dq_state, to_unlock = 0;
 	bool owning = !(flags & DISPATCH_INVOKE_STEALING);
-	bool overriding = (flags & DISPATCH_INVOKE_OVERRIDING);
+	uint64_t owned = 0;
 
 	// When called from a plain _dispatch_queue_drain:
 	//   overriding = false
@@ -1569,43 +1770,42 @@
 	// When called from an override continuation:
 	//   overriding = true
 	//   owning depends on whether the override embedded the queue or steals
-	DISPATCH_COMPILER_CAN_ASSUME(owning || overriding);
 
-	if (likely(owning)) {
+	if (!(flags & (DISPATCH_INVOKE_STEALING | DISPATCH_INVOKE_WLH))) {
 		dq->do_next = DISPATCH_OBJECT_LISTLESS;
 	}
-	to_unlock = _dispatch_queue_drain_try_lock(dq, flags, &dq_state);
-	if (likely(to_unlock)) {
+	flags |= const_restrict_flags;
+	if (likely(flags & DISPATCH_INVOKE_WLH)) {
+		owned = DISPATCH_QUEUE_SERIAL_DRAIN_OWNED | DISPATCH_QUEUE_ENQUEUED;
+	} else {
+		owned = _dispatch_queue_drain_try_lock(dq, flags);
+	}
+	if (likely(owned)) {
 		dispatch_priority_t old_dbp;
 		if (!(flags & DISPATCH_INVOKE_MANAGER_DRAIN)) {
-			if (unlikely(overriding)) {
-				_dispatch_object_debug(dq, "stolen onto thread 0x%x, 0x%x",
-						_dispatch_tid_self(), _dispatch_get_basepri());
-			}
 			old_dbp = _dispatch_set_basepri(dq->dq_priority);
-			dispatch_wlh_t wlh = _dispatch_get_wlh();
-			if (unlikely(dq->dq_wlh != wlh)) {
-				if (unlikely(dq->dq_wlh)) {
-					_dispatch_ktrace3(DISPATCH_PERF_wlh_change, dq,
-							dq->dq_wlh, wlh);
-					if (!(_dispatch_queue_atomic_flags_set_orig(dq,
-							DQF_WLH_CHANGED) & DQF_WLH_CHANGED)) {
-						_dispatch_bug_deprecated("Changing target queue "
-								"hierarchy after object has started executing");
-					}
-				}
-				dq->dq_wlh = wlh;
-#if DISPATCH_ENFORCE_STATIC_WLH_HIERARCHY
-				_dispatch_queue_atomic_flags_clear(dq, DQF_LEGACY);
-#endif
-			}
 		} else {
 			old_dbp = 0;
 		}
 
 		flags = _dispatch_queue_merge_autorelease_frequency(dq, flags);
 attempt_running_slow_head:
-		tq = invoke(dq, dic, flags, &to_unlock);
+#if DISPATCH_COCOA_COMPAT
+		if ((flags & DISPATCH_INVOKE_WLH) &&
+				!(flags & DISPATCH_INVOKE_AUTORELEASE_ALWAYS)) {
+			_dispatch_last_resort_autorelease_pool_push(dic);
+		}
+#endif // DISPATCH_COCOA_COMPAT
+		tq = invoke(dq, dic, flags, &owned);
+#if DISPATCH_COCOA_COMPAT
+		if ((flags & DISPATCH_INVOKE_WLH) &&
+				!(flags & DISPATCH_INVOKE_AUTORELEASE_ALWAYS)) {
+			dispatch_thread_frame_s dtf;
+			_dispatch_thread_frame_push(&dtf, dq);
+			_dispatch_last_resort_autorelease_pool_pop(dic);
+			_dispatch_thread_frame_pop(&dtf);
+		}
+#endif // DISPATCH_COCOA_COMPAT
 		dispatch_assert(tq != DISPATCH_QUEUE_WAKEUP_TARGET);
 		if (unlikely(tq != DISPATCH_QUEUE_WAKEUP_NONE &&
 				tq != DISPATCH_QUEUE_WAKEUP_WAIT_FOR_EVENT)) {
@@ -1617,14 +1817,15 @@
 			// In both cases, we want to bypass the check for DIRTY.
 			// That may cause us to leave DIRTY in place but all drain lock
 			// acquirers clear it
-		} else if (!_dispatch_queue_drain_try_unlock(dq, to_unlock,
+		} else if (!_dispatch_queue_drain_try_unlock(dq, owned,
 				tq == DISPATCH_QUEUE_WAKEUP_NONE)) {
 			tq = _dispatch_queue_get_current();
 			if (dx_hastypeflag(tq, QUEUE_ROOT) || !owning) {
 				goto attempt_running_slow_head;
 			}
+			DISPATCH_COMPILER_CAN_ASSUME(tq != DISPATCH_QUEUE_WAKEUP_NONE);
 		} else {
-			to_unlock = 0;
+			owned = 0;
 			tq = NULL;
 		}
 		if (!(flags & DISPATCH_INVOKE_MANAGER_DRAIN)) {
@@ -1635,32 +1836,43 @@
 		_dispatch_introspection_queue_item_complete(dq);
 	}
 
-	if (tq && dic->dic_deferred) {
-		return _dispatch_queue_drain_deferred_invoke(dq, dic, flags, to_unlock);
-	}
-
 	if (tq) {
-		uint64_t old_state, new_state;
+		if (const_restrict_flags & DISPATCH_INVOKE_DISALLOW_SYNC_WAITERS) {
+			dispatch_assert(dic->dic_deferred == NULL);
+		} else if (dic->dic_deferred) {
+			return _dispatch_queue_drain_sync_waiter(dq, dic,
+					flags, owned);
+		}
 
+		uint64_t old_state, new_state, enqueued = DISPATCH_QUEUE_ENQUEUED;
+		if (tq == DISPATCH_QUEUE_WAKEUP_MGR) {
+			enqueued = DISPATCH_QUEUE_ENQUEUED_ON_MGR;
+		}
 		os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, release, {
-			new_state  = DISPATCH_QUEUE_DRAIN_UNLOCK(old_state - to_unlock);
+			new_state  = old_state - owned;
+			new_state &= ~DISPATCH_QUEUE_DRAIN_UNLOCK_MASK;
 			new_state |= DISPATCH_QUEUE_DIRTY;
-			if (_dq_state_should_wakeup(new_state)) {
+			if (_dq_state_is_suspended(new_state)) {
+				new_state |= DLOCK_OWNER_MASK;
+			} else if (_dq_state_is_runnable(new_state) &&
+					!_dq_state_is_enqueued(new_state)) {
 				// drain was not interupted for suspension
 				// we will reenqueue right away, just put ENQUEUED back
-				new_state |= DISPATCH_QUEUE_ENQUEUED;
+				new_state |= enqueued;
 			}
 		});
+		old_state -= owned;
 		if (_dq_state_received_override(old_state)) {
 			// Ensure that the root queue sees that this thread was overridden.
-			_dispatch_set_basepri_override_qos(_dq_state_max_qos(old_state));
+			_dispatch_set_basepri_override_qos(_dq_state_max_qos(new_state));
 		}
-		if ((old_state ^ new_state) & DISPATCH_QUEUE_ENQUEUED) {
-			return dx_push(tq, dq, _dq_state_max_qos(old_state));
+		if ((old_state ^ new_state) & enqueued) {
+			dispatch_assert(_dq_state_is_enqueued(new_state));
+			return _dispatch_queue_push_queue(tq, dq, new_state);
 		}
 	}
 
-	return _dispatch_release_tailcall(dq);
+	_dispatch_release_2_tailcall(dq);
 }
 
 DISPATCH_ALWAYS_INLINE
@@ -1698,23 +1910,21 @@
 {
 	// Tag thread-bound queues with the owning thread
 	dispatch_assert(_dispatch_queue_is_thread_bound(dq));
-	mach_port_t old_owner, self = _dispatch_tid_self();
-	uint64_t dq_state = os_atomic_or_orig2o(dq, dq_state, self, relaxed);
-	if (unlikely(old_owner = _dq_state_drain_owner(dq_state))) {
-		DISPATCH_INTERNAL_CRASH(old_owner, "Queue bound twice");
-	}
+	uint64_t old_state, new_state;
+	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, relaxed, {
+		new_state = old_state;
+		new_state &= ~DISPATCH_QUEUE_DRAIN_OWNER_MASK;
+		new_state |= _dispatch_lock_value_for_self();
+	});
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline void
 _dispatch_queue_clear_bound_thread(dispatch_queue_t dq)
 {
-	uint64_t old_state, new_state;
-
 	dispatch_assert(_dispatch_queue_is_thread_bound(dq));
-	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, relaxed, {
-		new_state = DISPATCH_QUEUE_DRAIN_UNLOCK(old_state);
-	});
+	_dispatch_queue_atomic_flags_clear(dq, DQF_THREAD_BOUND|DQF_CANNOT_TRYSYNC);
+	os_atomic_and2o(dq, dq_state, ~DISPATCH_QUEUE_DRAIN_OWNER_MASK, relaxed);
 }
 
 DISPATCH_ALWAYS_INLINE
@@ -1840,6 +2050,21 @@
 }
 
 DISPATCH_ALWAYS_INLINE
+static inline dispatch_priority_t
+_dispatch_set_basepri_wlh(dispatch_priority_t dbp)
+{
+#if HAVE_PTHREAD_WORKQUEUE_QOS
+	dispatch_assert(!_dispatch_get_basepri());
+	// _dispatch_set_basepri_override_qos(DISPATCH_QOS_SATURATED)
+	dbp |= DISPATCH_QOS_SATURATED << DISPATCH_PRIORITY_OVERRIDE_SHIFT;
+	_dispatch_thread_setspecific(dispatch_basepri_key, (void*)(uintptr_t)dbp);
+#else
+	(void)dbp;
+#endif
+	return 0;
+}
+
+DISPATCH_ALWAYS_INLINE
 static inline pthread_priority_t
 _dispatch_priority_adopt(pthread_priority_t pp, unsigned long flags)
 {
@@ -2036,7 +2261,7 @@
 		dispatch_qos_t qos)
 {
 	if (_dispatch_queue_need_override(dqu, qos)) {
-		_os_object_retain_internal_inline(dqu._oq->_as_os_obj);
+		_os_object_retain_internal_n_inline(dqu._oq->_as_os_obj, 2);
 		return true;
 	}
 	return false;
@@ -2053,37 +2278,37 @@
 	return MAX(qos, _dispatch_priority_qos(dqu._oq->oq_priority));
 }
 
+#define DISPATCH_PRIORITY_PROPAGATE_CURRENT 0x1
+#define DISPATCH_PRIORITY_PROPAGATE_FOR_SYNC_IPC 0x2
+
 DISPATCH_ALWAYS_INLINE
-static inline dispatch_qos_t
-_dispatch_queue_reset_max_qos(dispatch_queue_class_t dqu)
+static inline pthread_priority_t
+_dispatch_priority_compute_propagated(pthread_priority_t pp,
+		unsigned int flags)
 {
-	uint64_t old_state, new_state;
-	os_atomic_rmw_loop2o(dqu._dq, dq_state, old_state, new_state, relaxed, {
-		new_state = old_state;
-		new_state &= ~DISPATCH_QUEUE_MAX_QOS_MASK;
-		new_state &= ~DISPATCH_QUEUE_RECEIVED_OVERRIDE;
-		if (old_state == new_state) {
-			os_atomic_rmw_loop_give_up(return DISPATCH_QOS_UNSPECIFIED);
-		}
-	});
-	return _dq_state_max_qos(old_state);
+#if HAVE_PTHREAD_WORKQUEUE_QOS
+	if (flags & DISPATCH_PRIORITY_PROPAGATE_CURRENT) {
+		pp = _dispatch_get_priority();
+	}
+	pp &= ~_PTHREAD_PRIORITY_FLAGS_MASK;
+	if (!(flags & DISPATCH_PRIORITY_PROPAGATE_FOR_SYNC_IPC) &&
+			pp > _dispatch_qos_to_pp(DISPATCH_QOS_USER_INITIATED)) {
+		// Cap QOS for propagation at user-initiated <rdar://16681262&16998036>
+		return _dispatch_qos_to_pp(DISPATCH_QOS_USER_INITIATED);
+	}
+	return pp;
+#else
+	(void)pp; (void)flags;
+	return 0;
+#endif
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline pthread_priority_t
 _dispatch_priority_propagate(void)
 {
-#if HAVE_PTHREAD_WORKQUEUE_QOS
-	pthread_priority_t pp = _dispatch_get_priority();
-	pp &= ~_PTHREAD_PRIORITY_FLAGS_MASK;
-	if (pp > _dispatch_qos_to_pp(DISPATCH_QOS_USER_INITIATED)) {
-		// Cap QOS for propagation at user-initiated <rdar://16681262&16998036>
-		return _dispatch_qos_to_pp(DISPATCH_QOS_USER_INITIATED);
-	}
-	return pp;
-#else
-	return 0;
-#endif
+	return _dispatch_priority_compute_propagated(0,
+			DISPATCH_PRIORITY_PROPAGATE_CURRENT);
 }
 
 // including maintenance
@@ -2100,66 +2325,6 @@
 }
 
 #pragma mark -
-#pragma mark dispatch_wlh_t
-
-static inline dispatch_wlh_t
-_dispatch_queue_class_compute_wlh(dispatch_queue_class_t dqu)
-{
-	// TODO: combine with _dispatch_source_compute_kevent_priority
-	dispatch_queue_t dq = dqu._dq;
-	dispatch_queue_t tq = dq->do_targetq;
-
-	while (unlikely(!dx_hastypeflag(tq, QUEUE_ROOT))) {
-		if (tq->dq_wlh) {
-			return tq->dq_wlh;
-		}
-		dispatch_assert(!_dispatch_queue_is_thread_bound(tq));
-		if (unlikely(DISPATCH_QUEUE_IS_SUSPENDED(tq))) {
-			// this queue may not be activated yet, so the queue graph may not
-			// have stabilized yet
-			return NULL;
-		}
-		if (unlikely(_dispatch_queue_is_legacy(tq))) {
-			if (!_dispatch_is_in_root_queues_array(tq->do_targetq)) {
-				// we're not allowed to dereference tq->do_targetq
-				return NULL;
-			}
-		}
-		dq = tq;
-		tq = dq->do_targetq;
-	}
-	dispatch_assert(tq->dq_wlh);
-	return _dispatch_root_queue_wlh_for_queue(tq, dq);
-}
-
-static inline void
-_dispatch_queue_class_record_wlh_hierarchy(dispatch_queue_class_t dqu,
-		dispatch_wlh_t wlh)
-{
-	dispatch_queue_t dq = dqu._dq;
-	dispatch_queue_t tq = dq->do_targetq;
-
-	dispatch_assert(wlh);
-	dispatch_assert(!dq->dq_wlh);
-	dq->dq_wlh = wlh;
-#if DISPATCH_ENFORCE_STATIC_WLH_HIERARCHY
-	_dispatch_queue_atomic_flags_clear(dq, DQF_LEGACY);
-#endif
-	while (unlikely(!dx_hastypeflag(tq, QUEUE_ROOT))) {
-		if (tq->dq_wlh) {
-			return;
-		}
-		tq->dq_wlh = wlh;
-#if DISPATCH_ENFORCE_STATIC_WLH_HIERARCHY
-		_dispatch_queue_atomic_flags_set_and_clear(tq, DQF_TARGETED,DQF_LEGACY);
-#else
-		_dispatch_queue_atomic_flags_set(tq, DQF_TARGETED);
-#endif
-		tq = tq->do_targetq;
-	}
-}
-
-#pragma mark -
 #pragma mark dispatch_block_t
 
 #ifdef __BLOCKS__
diff --git a/src/internal.h b/src/internal.h
index 688d5dd..0536db1 100644
--- a/src/internal.h
+++ b/src/internal.h
@@ -38,6 +38,7 @@
 
 #ifdef __APPLE__
 #include <Availability.h>
+#include <os/availability.h>
 #include <TargetConditionals.h>
 
 #ifndef TARGET_OS_MAC_DESKTOP
@@ -48,15 +49,15 @@
 #if TARGET_OS_MAC_DESKTOP
 #  define DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(x) \
 		(__MAC_OS_X_VERSION_MIN_REQUIRED >= (x))
-#  if !DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(101100)
-#    error "OS X hosts older than OS X 10.11 aren't supported anymore"
-#  endif // !DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(101000)
+#  if !DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(101200)
+#    error "OS X hosts older than OS X 10.12 aren't supported anymore"
+#  endif // !DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(101200)
 #elif TARGET_OS_SIMULATOR
 #  define DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(x) \
 		(IPHONE_SIMULATOR_HOST_MIN_VERSION_REQUIRED >= (x))
-#  if !DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(101100)
-#    error "Simulator hosts older than OS X 10.11 aren't supported anymore"
-#  endif // !DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(101000)
+#  if !DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(101200)
+#    error "Simulator hosts older than OS X 10.12 aren't supported anymore"
+#  endif // !DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(101200)
 #else
 #  define DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(x) 1
 #  if __IPHONE_OS_VERSION_MIN_REQUIRED < 90000
@@ -188,6 +189,8 @@
 #define DISPATCH_USE_CLIENT_CALLOUT 1
 #endif
 
+#define DISPATCH_ALLOW_NON_LEAF_RETARGET 1
+
 /* The "_debug" library build */
 #ifndef DISPATCH_DEBUG
 #define DISPATCH_DEBUG 0
@@ -239,9 +242,6 @@
 #if HAVE_MALLOC_MALLOC_H
 #include <malloc/malloc.h>
 #endif
-#if __has_include(<malloc_private.h>)
-#include <malloc_private.h>
-#endif // __has_include(<malloc_private.h)
 
 #include <sys/stat.h>
 
@@ -260,7 +260,11 @@
 #endif
 
 #ifdef __BLOCKS__
+#if __has_include(<Block_private.h>)
 #include <Block_private.h>
+#else
+#include "BlocksRuntime/Block_private.h"
+#endif // __has_include(<Block_private.h>)
 #include <Block.h>
 #endif /* __BLOCKS__ */
 
@@ -450,14 +454,14 @@
  * For reporting bugs within libdispatch when using the "_debug" version of the
  * library.
  */
-#if __GNUC__
+#if __APPLE__
 #define dispatch_assert(e) do { \
 		if (__builtin_constant_p(e)) { \
 			dispatch_static_assert(e); \
 		} else { \
 			typeof(e) _e = fastpath(e); /* always eval 'e' */ \
-			if (DISPATCH_DEBUG && !_e) { \
-				_dispatch_abort(__LINE__, (long)_e); \
+			if (!_e) { \
+				__assert_rtn(__func__, __FILE__, __LINE__, #e); \
 			} \
 		} \
 	} while (0)
@@ -468,7 +472,7 @@
 #define dispatch_assert(e) _dispatch_assert((long)(e), __LINE__)
 #endif	/* __GNUC__ */
 
-#if __GNUC__
+#if __APPLE__
 /*
  * A lot of API return zero upon success and not-zero on fail. Let's capture
  * and log the non-zero value
@@ -478,8 +482,8 @@
 			dispatch_static_assert(e); \
 		} else { \
 			typeof(e) _e = slowpath(e); /* always eval 'e' */ \
-			if (DISPATCH_DEBUG && _e) { \
-				_dispatch_abort(__LINE__, (long)_e); \
+			if (_e) { \
+				__assert_rtn(__func__, __FILE__, __LINE__, #e); \
 			} \
 		} \
 	} while (0)
@@ -487,7 +491,7 @@
 static inline void _dispatch_assert_zero(long e, long line) {
 	if (DISPATCH_DEBUG && e) _dispatch_abort(line, e);
 }
-#define dispatch_assert_zero(e) _dispatch_assert((long)(e), __LINE__)
+#define dispatch_assert_zero(e) _dispatch_assert_zero((long)(e), __LINE__)
 #endif	/* __GNUC__ */
 
 /*
@@ -596,6 +600,7 @@
 const char *_dispatch_strdup_if_mutable(const char *str);
 void _dispatch_vtable_init(void);
 char *_dispatch_get_build(void);
+int _dispatch_sigmask(void);
 
 uint64_t _dispatch_timeout(dispatch_time_t when);
 uint64_t _dispatch_time_nanoseconds_since_epoch(dispatch_time_t when);
@@ -630,35 +635,16 @@
 
 // Older Mac OS X and iOS Simulator fallbacks
 
-#if HAVE_PTHREAD_WORKQUEUES || DISPATCH_USE_INTERNAL_WORKQUEUE
-#ifndef WORKQ_ADDTHREADS_OPTION_OVERCOMMIT
-#define WORKQ_ADDTHREADS_OPTION_OVERCOMMIT 0x00000001
-#endif
-#endif // HAVE_PTHREAD_WORKQUEUES || DISPATCH_USE_INTERNAL_WORKQUEUE
 #if HAVE__PTHREAD_WORKQUEUE_INIT && PTHREAD_WORKQUEUE_SPI_VERSION >= 20140213 \
 		&& !defined(HAVE_PTHREAD_WORKQUEUE_QOS)
 #define HAVE_PTHREAD_WORKQUEUE_QOS 1
 #endif
-#if HAVE__PTHREAD_WORKQUEUE_INIT && (PTHREAD_WORKQUEUE_SPI_VERSION >= 20150304 \
-		|| (PTHREAD_WORKQUEUE_SPI_VERSION == 20140730 && \
-			defined(WORKQ_FEATURE_KEVENT))) \
+#if HAVE__PTHREAD_WORKQUEUE_INIT && PTHREAD_WORKQUEUE_SPI_VERSION >= 20150304 \
 		&& !defined(HAVE_PTHREAD_WORKQUEUE_KEVENT)
-#if PTHREAD_WORKQUEUE_SPI_VERSION == 20140730
-// rdar://problem/20609877
-typedef pthread_worqueue_function_kevent_t pthread_workqueue_function_kevent_t;
-#endif
 #define HAVE_PTHREAD_WORKQUEUE_KEVENT 1
 #endif
 
 
-#ifndef PTHREAD_WORKQUEUE_RESETS_VOUCHER_AND_PRIORITY_ON_PARK
-#if HAVE_PTHREAD_WORKQUEUE_QOS && DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(101200)
-#define PTHREAD_WORKQUEUE_RESETS_VOUCHER_AND_PRIORITY_ON_PARK 1
-#else
-#define PTHREAD_WORKQUEUE_RESETS_VOUCHER_AND_PRIORITY_ON_PARK 0
-#endif
-#endif // PTHREAD_WORKQUEUE_RESETS_VOUCHER_AND_PRIORITY_ON_PARK
-
 #ifndef HAVE_PTHREAD_WORKQUEUE_NARROWING
 #if !DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(109900)
 #define HAVE_PTHREAD_WORKQUEUE_NARROWING 0
@@ -681,31 +667,29 @@
 #define DISPATCH_USE_MEMORYPRESSURE_SOURCE 1
 #endif
 #if DISPATCH_USE_MEMORYPRESSURE_SOURCE
+#if __has_include(<malloc_private.h>)
+#include <malloc_private.h>
+#else
+extern void malloc_memory_event_handler(unsigned long);
+#endif // __has_include(<malloc_private.h)
 extern bool _dispatch_memory_warn;
 #endif
 
 #if HAVE_PTHREAD_WORKQUEUE_KEVENT && defined(KEVENT_FLAG_WORKQ) && \
-		DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(101200) && \
 		!defined(DISPATCH_USE_KEVENT_WORKQUEUE)
 #define DISPATCH_USE_KEVENT_WORKQUEUE 1
 #endif
 
-
-#if (!DISPATCH_USE_KEVENT_WORKQUEUE || DISPATCH_DEBUG) && \
+#if (!DISPATCH_USE_KEVENT_WORKQUEUE || DISPATCH_DEBUG || DISPATCH_PROFILE) && \
 		!defined(DISPATCH_USE_MGR_THREAD)
 #define DISPATCH_USE_MGR_THREAD 1
 #endif
 
-#if DISPATCH_USE_KEVENT_WORKQUEUE && \
-		DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(101200) && \
-		!defined(DISPATCH_USE_EVFILT_MACHPORT_DIRECT)
-#define DISPATCH_USE_EVFILT_MACHPORT_DIRECT 1
-#endif
 
-
-#if (!DISPATCH_USE_EVFILT_MACHPORT_DIRECT || DISPATCH_DEBUG) && \
-		!defined(DISPATCH_EVFILT_MACHPORT_PORTSET_FALLBACK)
-#define DISPATCH_EVFILT_MACHPORT_PORTSET_FALLBACK 1
+#if defined(MACH_SEND_SYNC_OVERRIDE) && defined(MACH_RCV_SYNC_WAIT) && \
+		DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(109900) && \
+		!defined(DISPATCH_USE_MACH_SEND_SYNC_OVERRIDE)
+#define DISPATCH_USE_MACH_SEND_SYNC_OVERRIDE 1
 #endif
 
 #if defined(F_SETNOSIGPIPE) && defined(F_GETNOSIGPIPE)
@@ -761,7 +745,8 @@
 #else
 #define ARIADNE_ENTER_DISPATCH_MAIN_CODE 0
 #endif
-#if !defined(DISPATCH_USE_VOUCHER_KDEBUG_TRACE) && DISPATCH_INTROSPECTION
+#if !defined(DISPATCH_USE_VOUCHER_KDEBUG_TRACE) && \
+		(DISPATCH_INTROSPECTION || DISPATCH_PROFILE)
 #define DISPATCH_USE_VOUCHER_KDEBUG_TRACE 1
 #endif
 
@@ -777,7 +762,6 @@
 #define DISPATCH_PERF_delayed_registration DISPATCH_CODE(PERF, 4)
 #define DISPATCH_PERF_mutable_target DISPATCH_CODE(PERF, 5)
 #define DISPATCH_PERF_strict_bg_timer DISPATCH_CODE(PERF, 6)
-#define DISPATCH_PERF_wlh_change DISPATCH_CODE(PERF, 7)
 
 #define DISPATCH_MACH_MSG_hdr_move DISPATCH_CODE(MACH_MSG, 1)
 
@@ -837,32 +821,12 @@
 #endif
 #endif // VOUCHER_USE_MACH_VOUCHER
 
+#ifndef VOUCHER_USE_EMPTY_MACH_BASE_VOUCHER
 #if RDAR_24272659 // FIXME: <rdar://problem/24272659>
-#if !VOUCHER_USE_MACH_VOUCHER || !DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(101200)
-#undef VOUCHER_USE_EMPTY_MACH_BASE_VOUCHER
-#define VOUCHER_USE_EMPTY_MACH_BASE_VOUCHER 0
-#elif !defined(VOUCHER_USE_EMPTY_MACH_BASE_VOUCHER)
 #define VOUCHER_USE_EMPTY_MACH_BASE_VOUCHER 1
-#endif
 #else // RDAR_24272659
-#undef VOUCHER_USE_EMPTY_MACH_BASE_VOUCHER
 #define VOUCHER_USE_EMPTY_MACH_BASE_VOUCHER 0
 #endif // RDAR_24272659
-
-#if !VOUCHER_USE_MACH_VOUCHER || !DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(101200)
-#undef VOUCHER_USE_BANK_AUTOREDEEM
-#define VOUCHER_USE_BANK_AUTOREDEEM 0
-#elif !defined(VOUCHER_USE_BANK_AUTOREDEEM)
-#define VOUCHER_USE_BANK_AUTOREDEEM 1
-#endif
-
-#if !VOUCHER_USE_MACH_VOUCHER || \
-		!__has_include(<voucher/ipc_pthread_priority_types.h>) || \
-		!DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(101200)
-#undef VOUCHER_USE_MACH_VOUCHER_PRIORITY
-#define VOUCHER_USE_MACH_VOUCHER_PRIORITY 0
-#elif !defined(VOUCHER_USE_MACH_VOUCHER_PRIORITY)
-#define VOUCHER_USE_MACH_VOUCHER_PRIORITY 1
 #endif
 
 #ifndef VOUCHER_USE_PERSONA
@@ -976,22 +940,6 @@
 #endif // DISPATCH_USE_KEVENT_WORKQUEUE
 
 
-#if DISPATCH_USE_EVFILT_MACHPORT_DIRECT
-#if !DISPATCH_USE_KEVENT_WORKQUEUE || !EV_UDATA_SPECIFIC
-#error Invalid build configuration
-#endif
-#if DISPATCH_EVFILT_MACHPORT_PORTSET_FALLBACK
-extern int _dispatch_evfilt_machport_direct_enabled;
-#else
-#define _dispatch_evfilt_machport_direct_enabled (1)
-#endif
-#else
-#define _dispatch_evfilt_machport_direct_enabled (0)
-#endif // DISPATCH_USE_EVFILT_MACHPORT_DIRECT
-
-
-int _dispatch_sigmask(void);
-
 /* #includes dependent on internal.h */
 #include "object_internal.h"
 #include "semaphore_internal.h"
diff --git a/src/introspection.c b/src/introspection.c
index cd6bcff..8692a8b 100644
--- a/src/introspection.c
+++ b/src/introspection.c
@@ -219,7 +219,7 @@
 	} else {
 		if (flags & DISPATCH_OBJ_SYNC_WAITER_BIT) {
 			dispatch_sync_context_t dsc = (dispatch_sync_context_t)dc;
-			waiter = pthread_from_mach_thread_np((mach_port_t)dc->dc_data);
+			waiter = pthread_from_mach_thread_np(dsc->dsc_waiter);
 			ctxt = dsc->dsc_ctxt;
 			func = dsc->dsc_func;
 		}
diff --git a/src/io.c b/src/io.c
index f538862..2904373 100644
--- a/src/io.c
+++ b/src/io.c
@@ -233,7 +233,7 @@
 static dispatch_io_t
 _dispatch_io_create(dispatch_io_type_t type)
 {
-	dispatch_io_t channel = _dispatch_alloc(DISPATCH_VTABLE(io),
+	dispatch_io_t channel = _dispatch_object_alloc(DISPATCH_VTABLE(io),
 			sizeof(struct dispatch_io_s));
 	channel->do_next = DISPATCH_OBJECT_LISTLESS;
 	channel->do_targetq = _dispatch_get_root_queue(DISPATCH_QOS_DEFAULT, true);
@@ -278,7 +278,7 @@
 }
 
 void
-_dispatch_io_dispose(dispatch_io_t channel)
+_dispatch_io_dispose(dispatch_io_t channel, DISPATCH_UNUSED bool *allow_free)
 {
 	_dispatch_object_debug(channel, "%s", __func__);
 	if (channel->fd_entry &&
@@ -682,6 +682,9 @@
 				_dispatch_channel_debug("stop cleanup", channel);
 				_dispatch_fd_entry_cleanup_operations(fd_entry, channel);
 				if (!(channel->atomic_flags & DIO_CLOSED)) {
+					if (fd_entry->path_data) {
+						fd_entry->path_data->channel = NULL;
+					}
 					channel->fd_entry = NULL;
 					_dispatch_fd_entry_release(fd_entry);
 				}
@@ -732,9 +735,10 @@
 						relaxed);
 				dispatch_fd_entry_t fd_entry = channel->fd_entry;
 				if (fd_entry) {
-					if (!fd_entry->path_data) {
-						channel->fd_entry = NULL;
+					if (fd_entry->path_data) {
+						fd_entry->path_data->channel = NULL;
 					}
+					channel->fd_entry = NULL;
 					_dispatch_fd_entry_release(fd_entry);
 				}
 			}
@@ -1019,14 +1023,13 @@
 		});
 		return NULL;
 	}
-	dispatch_operation_t op = _dispatch_alloc(DISPATCH_VTABLE(operation),
+	dispatch_operation_t op = _dispatch_object_alloc(DISPATCH_VTABLE(operation),
 			sizeof(struct dispatch_operation_s));
 	_dispatch_channel_debug("operation create: %p", channel, op);
 	op->do_next = DISPATCH_OBJECT_LISTLESS;
 	op->do_xref_cnt = -1; // operation object is not exposed externally
-	op->op_q = dispatch_queue_create("com.apple.libdispatch-io.opq", NULL);
-	op->op_q->do_targetq = queue;
-	_dispatch_retain(queue);
+	op->op_q = dispatch_queue_create_with_target("com.apple.libdispatch-io.opq",
+			NULL, queue);
 	op->active = false;
 	op->direction = direction;
 	op->offset = offset + channel->f_ptr;
@@ -1047,7 +1050,8 @@
 }
 
 void
-_dispatch_operation_dispose(dispatch_operation_t op)
+_dispatch_operation_dispose(dispatch_operation_t op,
+		DISPATCH_UNUSED bool *allow_free)
 {
 	_dispatch_object_debug(op, "%s", __func__);
 	_dispatch_op_debug("dispose", op);
@@ -1305,12 +1309,10 @@
 {
 	dispatch_fd_entry_t fd_entry;
 	fd_entry = _dispatch_calloc(1ul, sizeof(struct dispatch_fd_entry_s));
-	fd_entry->close_queue = dispatch_queue_create(
-			"com.apple.libdispatch-io.closeq", NULL);
 	// Use target queue to ensure that no concurrent lookups are going on when
 	// the close queue is running
-	fd_entry->close_queue->do_targetq = q;
-	_dispatch_retain(q);
+	fd_entry->close_queue = dispatch_queue_create_with_target(
+			"com.apple.libdispatch-io.closeq", NULL, q);
 	// Suspend the cleanup queue until closing
 	_dispatch_fd_entry_retain(fd_entry);
 	return fd_entry;
@@ -1584,11 +1586,9 @@
 	for (direction = 0; direction < DOP_DIR_MAX; direction++) {
 		dispatch_stream_t stream;
 		stream = _dispatch_calloc(1ul, sizeof(struct dispatch_stream_s));
-		stream->dq = dispatch_queue_create("com.apple.libdispatch-io.streamq",
-				NULL);
+		stream->dq = dispatch_queue_create_with_target(
+				"com.apple.libdispatch-io.streamq", NULL, tq);
 		dispatch_set_context(stream->dq, stream);
-		_dispatch_retain(tq);
-		stream->dq->do_targetq = tq;
 		TAILQ_INIT(&stream->operations[DISPATCH_IO_RANDOM]);
 		TAILQ_INIT(&stream->operations[DISPATCH_IO_STREAM]);
 		fd_entry->streams[direction] = stream;
@@ -1633,7 +1633,7 @@
 	}
 	// Otherwise create a new entry
 	size_t pending_reqs_depth = dispatch_io_defaults.max_pending_io_reqs;
-	disk = _dispatch_alloc(DISPATCH_VTABLE(disk),
+	disk = _dispatch_object_alloc(DISPATCH_VTABLE(disk),
 			sizeof(struct dispatch_disk_s) +
 			(pending_reqs_depth * sizeof(dispatch_operation_t)));
 	disk->do_next = DISPATCH_OBJECT_LISTLESS;
@@ -1654,7 +1654,7 @@
 }
 
 void
-_dispatch_disk_dispose(dispatch_disk_t disk)
+_dispatch_disk_dispose(dispatch_disk_t disk, DISPATCH_UNUSED bool *allow_free)
 {
 	uintptr_t hash = DIO_HASH(disk->dev);
 	TAILQ_REMOVE(&_dispatch_io_devs[hash], disk, disk_list);
diff --git a/src/io_internal.h b/src/io_internal.h
index ad8259a..672727f 100644
--- a/src/io_internal.h
+++ b/src/io_internal.h
@@ -178,10 +178,11 @@
 
 void _dispatch_io_set_target_queue(dispatch_io_t channel, dispatch_queue_t dq);
 size_t _dispatch_io_debug(dispatch_io_t channel, char* buf, size_t bufsiz);
-void _dispatch_io_dispose(dispatch_io_t channel);
+void _dispatch_io_dispose(dispatch_io_t channel, bool *allow_free);
 size_t _dispatch_operation_debug(dispatch_operation_t op, char* buf,
 		size_t bufsiz);
-void _dispatch_operation_dispose(dispatch_operation_t operation);
-void _dispatch_disk_dispose(dispatch_disk_t disk);
+void _dispatch_operation_dispose(dispatch_operation_t operation,
+		bool *allow_free);
+void _dispatch_disk_dispose(dispatch_disk_t disk, bool *allow_free);
 
 #endif // __DISPATCH_IO_INTERNAL__
diff --git a/src/libdispatch.codes b/src/libdispatch.codes
index 64f82b5..0ecc333 100644
--- a/src/libdispatch.codes
+++ b/src/libdispatch.codes
@@ -12,7 +12,6 @@
 0x2e020010	DISPATCH_PERF_delayed_registration
 0x2e020014	DISPATCH_PERF_mutable_target
 0x2e020018	DISPATCH_PERF_strict_bg_timer
-0x2e02001c	DISPATCH_PERF_wlh_change
 
 0x2e030004	DISPATCH_MACH_MSG_hdr_move
 
diff --git a/src/mach.c b/src/mach.c
index cc20645..0f9e9a8 100644
--- a/src/mach.c
+++ b/src/mach.c
@@ -33,7 +33,7 @@
 
 DISPATCH_ENUM(dispatch_mach_send_invoke_flags, uint32_t,
 	DM_SEND_INVOKE_NONE            = 0x0,
-	DM_SEND_INVOKE_FLUSH           = 0x1,
+	DM_SEND_INVOKE_MAKE_DIRTY      = 0x1,
 	DM_SEND_INVOKE_NEEDS_BARRIER   = 0x2,
 	DM_SEND_INVOKE_CANCEL          = 0x4,
 	DM_SEND_INVOKE_CAN_RUN_BARRIER = 0x8,
@@ -43,8 +43,6 @@
 		((dispatch_mach_send_invoke_flags_t)DM_SEND_INVOKE_IMMEDIATE_SEND)
 
 static inline mach_msg_option_t _dispatch_mach_checkin_options(void);
-static inline pthread_priority_t _dispatch_mach_priority_propagate(
-		mach_msg_option_t options);
 static mach_port_t _dispatch_mach_msg_get_remote_port(dispatch_object_t dou);
 static mach_port_t _dispatch_mach_msg_get_reply_port(dispatch_object_t dou);
 static void _dispatch_mach_msg_disconnected(dispatch_mach_t dm,
@@ -123,6 +121,14 @@
 			"_dispatch_mach_default_async_reply_handler called");
 }
 
+// Default dmxh_enable_sigterm_notification callback that enables delivery of
+// SIGTERM notifications (for backwards compatibility).
+static bool
+_dispatch_mach_enable_sigterm(void *_Nullable context DISPATCH_UNUSED)
+{
+	return true;
+}
+
 // Callbacks from dispatch to XPC. The default is to not support any callbacks.
 static const struct dispatch_mach_xpc_hooks_s _dispatch_mach_xpc_hooks_default
 		= {
@@ -131,6 +137,7 @@
 	.dmxh_msg_context_reply_queue =
 			&_dispatch_mach_msg_context_no_async_reply_queue,
 	.dmxh_async_reply_handler = &_dispatch_mach_default_async_reply_handler,
+	.dmxh_enable_sigterm_notification = &_dispatch_mach_enable_sigterm,
 };
 
 static dispatch_mach_xpc_hooks_t _dispatch_mach_xpc_hooks
@@ -157,17 +164,17 @@
 	dispatch_mach_recv_refs_t dmrr;
 	dispatch_mach_send_refs_t dmsr;
 	dispatch_mach_t dm;
-	// ensure _dispatch_evfilt_machport_direct_enabled is initialized
-	_dispatch_root_queues_init();
-	dm = _dispatch_alloc(DISPATCH_VTABLE(mach),
+	dm = _dispatch_object_alloc(DISPATCH_VTABLE(mach),
 			sizeof(struct dispatch_mach_s));
-	_dispatch_queue_init(dm->_as_dq, DQF_LEGACY, 1, true);
+	_dispatch_queue_init(dm->_as_dq, DQF_LEGACY, 1,
+			DISPATCH_QUEUE_INACTIVE | DISPATCH_QUEUE_ROLE_INNER);
 
 	dm->dq_label = label;
 	dm->do_ref_cnt++; // the reference _dispatch_mach_cancel_invoke holds
 	dm->dm_is_xpc = is_xpc;
 
 	dmrr = dux_create(&_dispatch_mach_type_recv, 0, 0)._dmrr;
+	dispatch_assert(dmrr->du_is_direct);
 	dmrr->du_owner_wref = _dispatch_ptr2wref(dm);
 	dmrr->dmrr_handler_func = handler;
 	dmrr->dmrr_handler_ctxt = context;
@@ -179,13 +186,6 @@
 	dmsr->du_owner_wref = _dispatch_ptr2wref(dm);
 	dm->dm_send_refs = dmsr;
 
-	if (is_xpc) {
-		dispatch_xpc_term_refs_t _dxtr =
-				dux_create(&_dispatch_xpc_type_sigterm, SIGTERM, 0)._dxtr;
-		_dxtr->du_owner_wref = _dispatch_ptr2wref(dm);
-		dm->dm_xpc_term_refs = _dxtr;
-	}
-
 	if (slowpath(!q)) {
 		q = _dispatch_get_root_queue(DISPATCH_QOS_DEFAULT, true);
 	} else {
@@ -221,7 +221,7 @@
 }
 
 void
-_dispatch_mach_dispose(dispatch_mach_t dm)
+_dispatch_mach_dispose(dispatch_mach_t dm, bool *allow_free)
 {
 	_dispatch_object_debug(dm, "%s", __func__);
 	_dispatch_unote_dispose(dm->dm_recv_refs);
@@ -232,7 +232,7 @@
 		_dispatch_unote_dispose(dm->dm_xpc_term_refs);
 		dm->dm_xpc_term_refs = NULL;
 	}
-	_dispatch_queue_destroy(dm->_as_dq);
+	_dispatch_queue_destroy(dm->_as_dq, allow_free);
 }
 
 void
@@ -309,69 +309,66 @@
 	if (dmsgr) {
 		return _dispatch_mach_handle_or_push_received_msg(dm, dmsgr);
 	}
-	dispatch_assert(!(options & DU_UNREGISTER_WAKEUP));
 }
 
 DISPATCH_NOINLINE
-static void
+static bool
+_dispatch_mach_reply_list_remove(dispatch_mach_t dm,
+		dispatch_mach_reply_refs_t dmr) {
+	// dmsr_replies_lock must be held by the caller.
+	bool removed = false;
+	if (likely(_TAILQ_IS_ENQUEUED(dmr, dmr_list))) {
+		TAILQ_REMOVE(&dm->dm_send_refs->dmsr_replies, dmr, dmr_list);
+		_TAILQ_MARK_NOT_ENQUEUED(dmr, dmr_list);
+		removed = true;
+	}
+	return removed;
+}
+
+DISPATCH_NOINLINE
+static bool
 _dispatch_mach_reply_kevent_unregister(dispatch_mach_t dm,
 		dispatch_mach_reply_refs_t dmr, uint32_t options)
 {
+	dispatch_assert(!_TAILQ_IS_ENQUEUED(dmr, dmr_list));
+
+	bool disconnected = (options & DU_UNREGISTER_DISCONNECTED);
+	_dispatch_debug("machport[0x%08x]: unregistering for reply%s, ctxt %p",
+			(mach_port_t)dmr->du_ident, disconnected ? " (disconnected)" : "",
+			dmr->dmr_ctxt);
+	if (!_dispatch_unote_unregister(dmr, options)) {
+		_dispatch_debug("machport[0x%08x]: deferred delete kevent[%p]",
+						(mach_port_t)dmr->du_ident, dmr);
+		dispatch_assert(options == DU_UNREGISTER_DISCONNECTED);
+		return false;
+	}
+
 	dispatch_mach_msg_t dmsgr = NULL;
 	dispatch_queue_t drq = NULL;
-	bool replies_empty = false;
-	bool disconnected = (options & DU_UNREGISTER_DISCONNECTED);
-	if (options & DU_UNREGISTER_REPLY_REMOVE) {
-		_dispatch_unfair_lock_lock(&dm->dm_send_refs->dmsr_replies_lock);
-		if (unlikely(!_TAILQ_IS_ENQUEUED(dmr, dmr_list))) {
-			DISPATCH_INTERNAL_CRASH(0, "Could not find reply registration");
-		}
-		TAILQ_REMOVE(&dm->dm_send_refs->dmsr_replies, dmr, dmr_list);
-		_TAILQ_MARK_NOT_ENQUEUED(dmr, dmr_list);
-		replies_empty = TAILQ_EMPTY(&dm->dm_send_refs->dmsr_replies);
-		_dispatch_unfair_lock_unlock(&dm->dm_send_refs->dmsr_replies_lock);
-	}
 	if (disconnected) {
+		// The next call is guaranteed to always transfer or consume the voucher
+		// in the dmr, if there is one.
 		dmsgr = _dispatch_mach_msg_create_reply_disconnected(NULL, dmr,
 			dmr->dmr_async_reply ? DISPATCH_MACH_ASYNC_WAITER_DISCONNECTED
 			: DISPATCH_MACH_DISCONNECTED);
 		if (dmr->dmr_ctxt) {
 			drq = _dispatch_mach_msg_context_async_reply_queue(dmr->dmr_ctxt);
 		}
+		dispatch_assert(dmr->dmr_voucher == NULL);
 	} else if (dmr->dmr_voucher) {
 		_voucher_release(dmr->dmr_voucher);
 		dmr->dmr_voucher = NULL;
 	}
-	_dispatch_debug("machport[0x%08x]: unregistering for reply%s, ctxt %p",
-			(mach_port_t)dmr->du_ident, disconnected ? " (disconnected)" : "",
-			dmr->dmr_ctxt);
-	if (!_dispatch_unote_unregister(dmr, options)) {
-		_dispatch_debug("machport[0x%08x]: deferred delete kevent[%p]",
-				(mach_port_t)dmr->du_ident, dmr);
-		dispatch_assert(options == DU_UNREGISTER_DISCONNECTED);
-		// dmr must be put back so that the event delivery finds it, the
-		// replies lock is held by the caller.
-		TAILQ_INSERT_HEAD(&dm->dm_send_refs->dmsr_replies, dmr, dmr_list);
-		if (dmsgr) {
-			dmr->dmr_voucher = dmsgr->dmsg_voucher;
-			dmsgr->dmsg_voucher = NULL;
-			_dispatch_release(dmsgr);
-		}
-		return; // deferred unregistration
-	}
 	_dispatch_unote_dispose(dmr);
+
 	if (dmsgr) {
 		if (drq) {
-			return _dispatch_mach_push_async_reply_msg(dm, dmsgr, drq);
+			_dispatch_mach_push_async_reply_msg(dm, dmsgr, drq);
 		} else {
-			return _dispatch_mach_handle_or_push_received_msg(dm, dmsgr);
+			_dispatch_mach_handle_or_push_received_msg(dm, dmsgr);
 		}
 	}
-	if ((options & DU_UNREGISTER_WAKEUP) && replies_empty &&
-			(dm->dm_send_refs->dmsr_disconnect_cnt ||
-			(dm->dq_atomic_flags & DSF_CANCELED))) {
-		dx_wakeup(dm, 0, DISPATCH_WAKEUP_FLUSH);
-	}
+	return true;
 }
 
 DISPATCH_NOINLINE
@@ -412,10 +409,11 @@
 		dispatch_mach_msg_t dmsg)
 {
 	dispatch_mach_reply_refs_t dmr;
-	dispatch_priority_t mpri, pri, rpri;
-	dispatch_priority_t overcommit;
+	dispatch_priority_t mpri, pri, overcommit;
+	dispatch_wlh_t wlh;
 
 	dmr = dux_create(&_dispatch_mach_type_reply, reply_port, 0)._dmr;
+	dispatch_assert(dmr->du_is_direct);
 	dmr->du_owner_wref = _dispatch_ptr2wref(dm);
 	if (dmsg->dmsg_voucher) {
 		dmr->dmr_voucher = _voucher_retain(dmsg->dmsg_voucher);
@@ -430,18 +428,22 @@
 		drq = _dispatch_mach_msg_context_async_reply_queue(dmsg->do_ctxt);
 	}
 
-	dispatch_wlh_t wlh = dm->dq_wlh;
-	pri = (dm->dq_priority & DISPATCH_PRIORITY_REQUESTED_MASK);
-	overcommit = dm->dq_priority & DISPATCH_PRIORITY_FLAG_OVERCOMMIT;
-	if (drq) {
-		rpri = drq->dq_priority & DISPATCH_PRIORITY_REQUESTED_MASK;
-		if (rpri > pri) {
-			pri = rpri;
-			overcommit = drq->dq_priority & DISPATCH_PRIORITY_FLAG_OVERCOMMIT;
-		}
-		if (drq->dq_wlh) wlh = drq->dq_wlh;
+	if (!drq) {
+		pri = dm->dq_priority;
+		wlh = dm->dm_recv_refs->du_wlh;
+	} else if (dx_hastypeflag(drq, QUEUE_ROOT)) {
+		pri = drq->dq_priority;
+		wlh = DISPATCH_WLH_ANON;
+	} else if (drq == dm->do_targetq) {
+		pri = dm->dq_priority;
+		wlh = dm->dm_recv_refs->du_wlh;
+	} else if (!(pri = _dispatch_queue_compute_priority_and_wlh(drq, &wlh))) {
+		pri = drq->dq_priority;
+		wlh = DISPATCH_WLH_ANON;
 	}
-	if (pri && dmr->du_is_direct) {
+	if (pri & DISPATCH_PRIORITY_REQUESTED_MASK) {
+		overcommit = pri & DISPATCH_PRIORITY_FLAG_OVERCOMMIT;
+		pri &= DISPATCH_PRIORITY_REQUESTED_MASK;
 		mpri = _dispatch_priority_from_pp_strip_flags(dmsg->dmsg_priority);
 		if (pri < mpri) pri = mpri;
 		pri |= overcommit;
@@ -460,25 +462,54 @@
 	_dispatch_unfair_lock_unlock(&dm->dm_send_refs->dmsr_replies_lock);
 
 	if (!_dispatch_unote_register(dmr, wlh, pri)) {
+		_dispatch_unfair_lock_lock(&dm->dm_send_refs->dmsr_replies_lock);
+		_dispatch_mach_reply_list_remove(dm, dmr);
+		_dispatch_unfair_lock_unlock(&dm->dm_send_refs->dmsr_replies_lock);
 		_dispatch_mach_reply_kevent_unregister(dm, dmr,
-				DU_UNREGISTER_DISCONNECTED|DU_UNREGISTER_REPLY_REMOVE);
+				DU_UNREGISTER_DISCONNECTED);
 	}
 }
 
 #pragma mark -
 #pragma mark dispatch_mach_msg
 
+DISPATCH_ALWAYS_INLINE DISPATCH_CONST
+static inline bool
+_dispatch_use_mach_special_reply_port(void)
+{
+#if DISPATCH_USE_MACH_SEND_SYNC_OVERRIDE
+	return true;
+#else
+#define thread_get_special_reply_port() ({__builtin_trap(); MACH_PORT_NULL;})
+	return false;
+#endif
+}
+
 static mach_port_t
 _dispatch_get_thread_reply_port(void)
 {
-	mach_port_t reply_port, mrp = _dispatch_get_thread_mig_reply_port();
+	mach_port_t reply_port, mrp;
+	if (_dispatch_use_mach_special_reply_port()) {
+		mrp = _dispatch_get_thread_special_reply_port();
+	} else {
+		mrp = _dispatch_get_thread_mig_reply_port();
+	}
 	if (mrp) {
 		reply_port = mrp;
 		_dispatch_debug("machport[0x%08x]: borrowed thread sync reply port",
 				reply_port);
 	} else {
-		reply_port = mach_reply_port();
-		_dispatch_set_thread_mig_reply_port(reply_port);
+		if (_dispatch_use_mach_special_reply_port()) {
+			reply_port = thread_get_special_reply_port();
+			_dispatch_set_thread_special_reply_port(reply_port);
+		} else {
+			reply_port = mach_reply_port();
+			_dispatch_set_thread_mig_reply_port(reply_port);
+		}
+		if (unlikely(!MACH_PORT_VALID(reply_port))) {
+			DISPATCH_CLIENT_CRASH(_dispatch_use_mach_special_reply_port(),
+				"Unable to allocate reply port, possible port leak");
+		}
 		_dispatch_debug("machport[0x%08x]: allocated thread sync reply port",
 				reply_port);
 	}
@@ -489,7 +520,12 @@
 static void
 _dispatch_clear_thread_reply_port(mach_port_t reply_port)
 {
-	mach_port_t mrp = _dispatch_get_thread_mig_reply_port();
+	mach_port_t mrp;
+	if (_dispatch_use_mach_special_reply_port()) {
+		mrp = _dispatch_get_thread_special_reply_port();
+	} else {
+		mrp = _dispatch_get_thread_mig_reply_port();
+	}
 	if (reply_port != mrp) {
 		if (mrp) {
 			_dispatch_debug("machport[0x%08x]: did not clear thread sync reply "
@@ -497,7 +533,11 @@
 		}
 		return;
 	}
-	_dispatch_set_thread_mig_reply_port(MACH_PORT_NULL);
+	if (_dispatch_use_mach_special_reply_port()) {
+		_dispatch_set_thread_special_reply_port(MACH_PORT_NULL);
+	} else {
+		_dispatch_set_thread_mig_reply_port(MACH_PORT_NULL);
+	}
 	_dispatch_debug_machport(reply_port);
 	_dispatch_debug("machport[0x%08x]: cleared thread sync reply port",
 			reply_port);
@@ -507,7 +547,12 @@
 _dispatch_set_thread_reply_port(mach_port_t reply_port)
 {
 	_dispatch_debug_machport(reply_port);
-	mach_port_t mrp = _dispatch_get_thread_mig_reply_port();
+	mach_port_t mrp;
+	if (_dispatch_use_mach_special_reply_port()) {
+		mrp = _dispatch_get_thread_special_reply_port();
+	} else {
+		mrp = _dispatch_get_thread_mig_reply_port();
+	}
 	if (mrp) {
 		kern_return_t kr = mach_port_mod_refs(mach_task_self(), reply_port,
 				MACH_PORT_RIGHT_RECEIVE, -1);
@@ -516,7 +561,11 @@
 		_dispatch_debug("machport[0x%08x]: deallocated sync reply port "
 				"(found 0x%08x)", reply_port, mrp);
 	} else {
-		_dispatch_set_thread_mig_reply_port(reply_port);
+		if (_dispatch_use_mach_special_reply_port()) {
+			_dispatch_set_thread_special_reply_port(reply_port);
+		} else {
+			_dispatch_set_thread_mig_reply_port(reply_port);
+		}
 		_dispatch_debug("machport[0x%08x]: restored thread sync reply port",
 				reply_port);
 	}
@@ -578,7 +627,8 @@
 		dmr->dmr_voucher = NULL; // transfer reference
 	} else {
 		voucher = voucher_create_with_mach_msg(hdr);
-		pp = _voucher_get_priority(voucher);
+		pp = _dispatch_priority_compute_propagated(
+				_voucher_get_priority(voucher), 0);
 	}
 
 	destructor = (flags & DISPATCH_EV_MSG_NEEDS_FREE) ?
@@ -609,7 +659,6 @@
 
 	dispatch_mach_recv_refs_t dmrr = du._dmrr;
 	dispatch_mach_t dm = _dispatch_wref2ptr(dmrr->du_owner_wref);
-	dispatch_wakeup_flags_t wflags = 0;
 	dispatch_queue_flags_t dqf;
 	dispatch_mach_msg_t dmsg;
 
@@ -620,31 +669,24 @@
 				"Unexpected EV_VANISHED (do not destroy random mach ports)");
 	}
 
-	if (dmrr->du_is_direct || (flags & (EV_DELETE | EV_ONESHOT))) {
-		// once we modify the queue atomic flags below, it will allow concurrent
-		// threads running _dispatch_mach_invoke2 to dispose of the source,
-		// so we can't safely borrow the reference we get from the muxnote udata
-		// anymore, and need our own
-		wflags = DISPATCH_WAKEUP_CONSUME;
-		_dispatch_retain(dm); // rdar://20382435
-	}
+	// once we modify the queue atomic flags below, it will allow concurrent
+	// threads running _dispatch_mach_invoke2 to dispose of the source,
+	// so we can't safely borrow the reference we get from the muxnote udata
+	// anymore, and need our own
+	dispatch_wakeup_flags_t wflags = DISPATCH_WAKEUP_CONSUME_2;
+	_dispatch_retain_2(dm); // rdar://20382435
 
 	if (unlikely((flags & EV_ONESHOT) && !(flags & EV_DELETE))) {
-		dqf = _dispatch_queue_atomic_flags(dm->_as_dq);
+		dqf = _dispatch_queue_atomic_flags_set_and_clear(dm->_as_dq,
+				DSF_DEFERRED_DELETE, DSF_ARMED);
 		_dispatch_debug("kevent-source[%p]: deferred delete oneshot kevent[%p]",
 				dm, dmrr);
-	} else if (unlikely(flags & EV_DELETE)) {
+	} else if (unlikely(flags & (EV_ONESHOT | EV_DELETE))) {
 		_dispatch_source_refs_unregister(dm->_as_ds,
 				DU_UNREGISTER_ALREADY_DELETED);
 		dqf = _dispatch_queue_atomic_flags(dm->_as_dq);
 		_dispatch_debug("kevent-source[%p]: deleted kevent[%p]", dm, dmrr);
-#if DISPATCH_EVFILT_MACHPORT_PORTSET_FALLBACK
-	} else if (unlikely(!dmrr->du_is_direct)) {
-		dqf = _dispatch_queue_atomic_flags(dm->_as_dq);
-		_dispatch_unote_resume(du);
-#endif
 	} else {
-		dispatch_assert(dmrr->du_is_direct);
 		dqf = _dispatch_queue_atomic_flags_clear(dm->_as_dq, DSF_ARMED);
 		_dispatch_debug("kevent-source[%p]: disarmed kevent[%p]", dm, dmrr);
 	}
@@ -660,14 +702,20 @@
 		if (flags & DISPATCH_EV_MSG_NEEDS_FREE) {
 			free(hdr);
 		}
-		return dx_wakeup(dm, 0, wflags | DISPATCH_WAKEUP_FLUSH);
+		return dx_wakeup(dm, 0, wflags | DISPATCH_WAKEUP_MAKE_DIRTY);
 	}
 
+	// Once the mach channel disarming is visible, cancellation will switch to
+	// immediate deletion.  If we're preempted here, then the whole cancellation
+	// sequence may be complete by the time we really enqueue the message.
+	//
+	// _dispatch_mach_msg_invoke_with_mach() is responsible for filtering it out
+	// to keep the promise that DISPATCH_MACH_DISCONNECTED is the last
+	// event sent.
+
 	dmsg = _dispatch_mach_msg_create_recv(hdr, siz, NULL, flags);
 	_dispatch_mach_handle_or_push_received_msg(dm, dmsg);
-	if (wflags & DISPATCH_WAKEUP_CONSUME) {
-		return _dispatch_release_tailcall(dm);
-	}
+	return _dispatch_release_2_tailcall(dm);
 }
 
 void
@@ -683,23 +731,11 @@
 	_dispatch_debug("machport[0x%08x]: received msg id 0x%x, reply on 0x%08x",
 			hdr->msgh_local_port, hdr->msgh_id, hdr->msgh_remote_port);
 
-	uint32_t options = DU_UNREGISTER_IMMEDIATE_DELETE;
-	options |= DU_UNREGISTER_REPLY_REMOVE;
-	options |= DU_UNREGISTER_WAKEUP;
-	if (canceled) {
-		_dispatch_debug("machport[0x%08x]: drop msg id 0x%x, reply on 0x%08x",
-				hdr->msgh_local_port, hdr->msgh_id, hdr->msgh_remote_port);
-		options |= DU_UNREGISTER_DISCONNECTED;
-		mach_msg_destroy(hdr);
-		if (flags & DISPATCH_EV_MSG_NEEDS_FREE) {
-			free(hdr);
-		}
-	} else {
+	if (!canceled) {
 		dmsg = _dispatch_mach_msg_create_recv(hdr, siz, dmr, flags);
 	}
-	_dispatch_mach_reply_kevent_unregister(dm, dmr, options);
 
-	if (!canceled) {
+	if (dmsg) {
 		dispatch_queue_t drq = NULL;
 		if (dmsg->do_ctxt) {
 			drq = _dispatch_mach_msg_context_async_reply_queue(dmsg->do_ctxt);
@@ -709,13 +745,45 @@
 		} else {
 			_dispatch_mach_handle_or_push_received_msg(dm, dmsg);
 		}
+	} else {
+		_dispatch_debug("machport[0x%08x]: drop msg id 0x%x, reply on 0x%08x",
+				hdr->msgh_local_port, hdr->msgh_id, hdr->msgh_remote_port);
+		mach_msg_destroy(hdr);
+		if (flags & DISPATCH_EV_MSG_NEEDS_FREE) {
+			free(hdr);
+		}
 	}
+
+	dispatch_wakeup_flags_t wflags = 0;
+	uint32_t options = DU_UNREGISTER_IMMEDIATE_DELETE;
+	if (canceled) {
+		options |= DU_UNREGISTER_DISCONNECTED;
+	}
+
+	_dispatch_unfair_lock_lock(&dm->dm_send_refs->dmsr_replies_lock);
+	bool removed = _dispatch_mach_reply_list_remove(dm, dmr);
+	dispatch_assert(removed);
+	if (TAILQ_EMPTY(&dm->dm_send_refs->dmsr_replies) &&
+			(dm->dm_send_refs->dmsr_disconnect_cnt ||
+			(dm->dq_atomic_flags & DSF_CANCELED))) {
+		// When the list is empty, _dispatch_mach_disconnect() may release the
+		// last reference count on the Mach channel. To avoid this, take our
+		// own reference before releasing the lock.
+		wflags = DISPATCH_WAKEUP_MAKE_DIRTY | DISPATCH_WAKEUP_CONSUME_2;
+		_dispatch_retain_2(dm);
+	}
+	_dispatch_unfair_lock_unlock(&dm->dm_send_refs->dmsr_replies_lock);
+
+	bool result = _dispatch_mach_reply_kevent_unregister(dm, dmr, options);
+	dispatch_assert(result);
+	if (wflags) dx_wakeup(dm, 0, wflags);
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline dispatch_mach_msg_t
 _dispatch_mach_msg_reply_recv(dispatch_mach_t dm,
-		dispatch_mach_reply_refs_t dmr, mach_port_t reply_port)
+		dispatch_mach_reply_refs_t dmr, mach_port_t reply_port,
+		mach_port_t send)
 {
 	if (slowpath(!MACH_PORT_VALID(reply_port))) {
 		DISPATCH_CLIENT_CRASH(reply_port, "Invalid reply port");
@@ -726,6 +794,7 @@
 	mach_msg_size_t siz, msgsiz = 0;
 	mach_msg_return_t kr;
 	mach_msg_option_t options;
+	mach_port_t notify = MACH_PORT_NULL;
 	siz = mach_vm_round_page(DISPATCH_MACH_RECEIVE_MAX_INLINE_MESSAGE_SIZE +
 			DISPATCH_MACH_TRAILER_SIZE);
 	hdr = alloca(siz);
@@ -734,12 +803,17 @@
 		*(char*)p = 0; // ensure alloca buffer doesn't overlap with stack guard
 	}
 	options = DISPATCH_MACH_RCV_OPTIONS & (~MACH_RCV_VOUCHER);
+	if (MACH_PORT_VALID(send)) {
+		notify = send;
+		options |= MACH_RCV_SYNC_WAIT;
+	}
+
 retry:
 	_dispatch_debug_machport(reply_port);
 	_dispatch_debug("machport[0x%08x]: MACH_RCV_MSG %s", reply_port,
 			(options & MACH_RCV_TIMEOUT) ? "poll" : "wait");
 	kr = mach_msg(hdr, options, 0, siz, reply_port, MACH_MSG_TIMEOUT_NONE,
-			MACH_PORT_NULL);
+			notify);
 	hdr_copyout_addr = hdr;
 	_dispatch_debug_machport(reply_port);
 	_dispatch_debug("machport[0x%08x]: MACH_RCV_MSG (size %u, opts 0x%x) "
@@ -787,8 +861,9 @@
 			if (shrink) hdr = hdr2 = shrink;
 		}
 		break;
+	case MACH_RCV_INVALID_NOTIFY:
 	default:
-		dispatch_assume_zero(kr);
+		DISPATCH_INTERNAL_CRASH(kr, "Unexpected error from mach_msg_receive");
 		break;
 	}
 	_dispatch_mach_msg_reply_received(dm, dmr, hdr->msgh_local_port);
@@ -1024,15 +1099,20 @@
 			} else {
 				clear_voucher = _voucher_mach_msg_set(msg, voucher);
 			}
-			if (qos && _dispatch_evfilt_machport_direct_enabled) {
+			if (qos) {
 				opts |= MACH_SEND_OVERRIDE;
-				msg_priority = (mach_msg_priority_t)_dispatch_qos_to_pp(qos);
+				msg_priority = (mach_msg_priority_t)
+						_dispatch_priority_compute_propagated(
+						_dispatch_qos_to_pp(qos), 0);
 			}
 		}
 		_dispatch_debug_machport(msg->msgh_remote_port);
 		if (reply_port) _dispatch_debug_machport(reply_port);
 		if (msg_opts & DISPATCH_MACH_WAIT_FOR_REPLY) {
 			if (msg_opts & DISPATCH_MACH_OWNED_REPLY_PORT) {
+				if (_dispatch_use_mach_special_reply_port()) {
+					opts |= MACH_SEND_SYNC_OVERRIDE;
+				}
 				_dispatch_clear_thread_reply_port(reply_port);
 			}
 			_dispatch_mach_reply_waiter_register(dm, dmr, reply_port, dmsg,
@@ -1085,13 +1165,6 @@
 	if (!(msg_opts & DISPATCH_MACH_WAIT_FOR_REPLY) && !kr && reply_port &&
 			!(_dispatch_unote_registered(dmrr) &&
 			dmrr->du_ident == reply_port)) {
-		if (!dmrr->du_is_direct &&
-				_dispatch_queue_get_current() != &_dispatch_mgr_q) {
-			// reply receive kevent must be installed on the manager queue
-			dm->dm_needs_mgr = 1;
-			dmsg->dmsg_options = msg_opts | DISPATCH_MACH_REGISTER_FOR_REPLY;
-			goto out;
-		}
 		_dispatch_mach_reply_kevent_register(dm, reply_port, dmsg);
 	}
 	if (unlikely(!is_reply && dmsg == dsrr->dmsr_checkin &&
@@ -1131,6 +1204,9 @@
 #pragma mark -
 #pragma mark dispatch_mach_send_refs_t
 
+#define _dmsr_state_needs_lock_override(dq_state, qos) \
+		unlikely(qos < _dq_state_max_qos(dq_state))
+
 DISPATCH_ALWAYS_INLINE
 static inline dispatch_qos_t
 _dmsr_state_max_qos(uint64_t dmsr_state)
@@ -1170,11 +1246,8 @@
 #define _dispatch_mach_send_pop_head(dmsr, head) \
 		os_mpsc_pop_head(dmsr, dmsr, head, do_next)
 
-#define dm_push(dm, dc, qos) ({ \
-		dispatch_queue_t _dq = (dm)->_as_dq; \
-		dispatch_assert(dx_vtable(_dq)->do_push == _dispatch_queue_push); \
-		_dispatch_queue_push(_dq, dc, qos); \
-	})
+#define dm_push(dm, dc, qos) \
+		_dispatch_queue_push((dm)->_as_dq, dc, qos)
 
 DISPATCH_ALWAYS_INLINE
 static inline bool
@@ -1226,8 +1299,7 @@
 				dmsg = (dispatch_mach_msg_t)dc;
 				dmr = NULL;
 			} else {
-				if ((_dispatch_unote_registered(dmsr) ||
-						!dm->dm_recv_refs->du_is_direct) &&
+				if (_dispatch_unote_registered(dmsr) &&
 						(_dispatch_queue_get_current() != &_dispatch_mgr_q)) {
 					// send kevent must be uninstalled on the manager queue
 					needs_mgr = true;
@@ -1321,7 +1393,7 @@
 		} else {
 			qos = 0;
 		}
-		if (!disconnecting) dx_wakeup(dm, qos, DISPATCH_WAKEUP_FLUSH);
+		if (!disconnecting) dx_wakeup(dm, qos, DISPATCH_WAKEUP_MAKE_DIRTY);
 	}
 	return returning_send_result;
 }
@@ -1332,7 +1404,7 @@
 		dispatch_mach_send_invoke_flags_t send_flags)
 {
 	dispatch_mach_send_refs_t dmsr = dm->dm_send_refs;
-	dispatch_lock_owner tid_self = _dispatch_tid_self();
+	dispatch_lock owner_self = _dispatch_lock_value_for_self();
 	uint64_t old_state, new_state;
 
 	uint64_t canlock_mask = DISPATCH_MACH_STATE_UNLOCK_MASK;
@@ -1350,18 +1422,18 @@
 	os_atomic_rmw_loop2o(dmsr, dmsr_state, old_state, new_state, acquire, {
 		new_state = old_state;
 		if (unlikely((old_state & canlock_mask) != canlock_state)) {
-			if (!(send_flags & DM_SEND_INVOKE_FLUSH)) {
+			if (!(send_flags & DM_SEND_INVOKE_MAKE_DIRTY)) {
 				os_atomic_rmw_loop_give_up(break);
 			}
 			new_state |= DISPATCH_MACH_STATE_DIRTY;
 		} else {
-			if (_dispatch_queue_should_override_self(old_state, oq_floor)) {
+			if (_dmsr_state_needs_lock_override(old_state, oq_floor)) {
 				os_atomic_rmw_loop_give_up({
 					oq_floor = _dispatch_queue_override_self(old_state);
 					goto retry;
 				});
 			}
-			new_state |= tid_self;
+			new_state |= owner_self;
 			new_state &= ~DISPATCH_MACH_STATE_DIRTY;
 			new_state &= ~DISPATCH_MACH_STATE_RECEIVED_OVERRIDE;
 			new_state &= ~DISPATCH_MACH_STATE_PENDING_BARRIER;
@@ -1419,14 +1491,14 @@
 {
 	dispatch_mach_send_refs_t dmsr = dm->dm_send_refs;
 	uint64_t old_state, new_state, state_flags = 0;
-	dispatch_lock_owner owner;
+	dispatch_tid owner;
 	bool wakeup;
 
 	// <rdar://problem/25896179> when pushing a send barrier that destroys
 	// the last reference to this channel, and the send queue is already
 	// draining on another thread, the send barrier may run as soon as
 	// _dispatch_mach_send_push_inline() returns.
-	_dispatch_retain(dm);
+	_dispatch_retain_2(dm);
 
 	wakeup = _dispatch_mach_send_push_inline(dmsr, dc);
 	if (wakeup) {
@@ -1457,7 +1529,7 @@
 			_dispatch_wqthread_override_start_check_owner(owner, qos,
 					&dmsr->dmsr_state_lock.dul_lock);
 		}
-		return _dispatch_release_tailcall(dm);
+		return _dispatch_release_2_tailcall(dm);
 	}
 
 	dispatch_wakeup_flags_t wflags = 0;
@@ -1465,14 +1537,14 @@
 		_dispatch_mach_send_barrier_drain_push(dm, qos);
 	} else if (wakeup || dmsr->dmsr_disconnect_cnt ||
 			(dm->dq_atomic_flags & DSF_CANCELED)) {
-		wflags = DISPATCH_WAKEUP_FLUSH | DISPATCH_WAKEUP_CONSUME;
+		wflags = DISPATCH_WAKEUP_MAKE_DIRTY | DISPATCH_WAKEUP_CONSUME_2;
 	} else if (old_state & DISPATCH_MACH_STATE_PENDING_BARRIER) {
-		wflags = DISPATCH_WAKEUP_OVERRIDING | DISPATCH_WAKEUP_CONSUME;
+		wflags = DISPATCH_WAKEUP_CONSUME_2;
 	}
 	if (wflags) {
 		return dx_wakeup(dm, qos, wflags);
 	}
-	return _dispatch_release_tailcall(dm);
+	return _dispatch_release_2_tailcall(dm);
 }
 
 DISPATCH_NOINLINE
@@ -1482,9 +1554,9 @@
 		dispatch_mach_send_invoke_flags_t send_flags)
 {
 	dispatch_mach_send_refs_t dmsr = dm->dm_send_refs;
-	dispatch_lock_owner tid_self = _dispatch_tid_self();
+	dispatch_lock owner_self = _dispatch_lock_value_for_self();
 	uint64_t old_state, new_state, canlock_mask, state_flags = 0;
-	dispatch_lock_owner owner;
+	dispatch_tid owner;
 
 	bool wakeup = _dispatch_mach_send_push_inline(dmsr, dou);
 	if (wakeup) {
@@ -1497,7 +1569,7 @@
 			new_state = _dmsr_state_merge_override(old_state, qos);
 			new_state |= state_flags;
 		});
-		dx_wakeup(dm, qos, DISPATCH_WAKEUP_FLUSH);
+		dx_wakeup(dm, qos, DISPATCH_WAKEUP_MAKE_DIRTY);
 		return false;
 	}
 
@@ -1508,7 +1580,7 @@
 			new_state = _dmsr_state_merge_override(old_state, qos);
 			new_state |= state_flags;
 			if (likely((old_state & canlock_mask) == 0)) {
-				new_state |= tid_self;
+				new_state |= owner_self;
 				new_state &= ~DISPATCH_MACH_STATE_DIRTY;
 				new_state &= ~DISPATCH_MACH_STATE_RECEIVED_OVERRIDE;
 				new_state &= ~DISPATCH_MACH_STATE_PENDING_BARRIER;
@@ -1521,7 +1593,7 @@
 				os_atomic_rmw_loop_give_up(return false);
 			}
 			if (likely((old_state & canlock_mask) == 0)) {
-				new_state |= tid_self;
+				new_state |= owner_self;
 				new_state &= ~DISPATCH_MACH_STATE_DIRTY;
 				new_state &= ~DISPATCH_MACH_STATE_RECEIVED_OVERRIDE;
 				new_state &= ~DISPATCH_MACH_STATE_PENDING_BARRIER;
@@ -1539,7 +1611,7 @@
 	}
 
 	if (old_state & DISPATCH_MACH_STATE_PENDING_BARRIER) {
-		dx_wakeup(dm, qos, DISPATCH_WAKEUP_OVERRIDING);
+		dx_wakeup(dm, qos, 0);
 		return false;
 	}
 
@@ -1575,7 +1647,7 @@
 	DISPATCH_ASSERT_ON_MANAGER_QUEUE();
 	dm->dm_send_refs->du_ident = send;
 	dispatch_assume(_dispatch_unote_register(dm->dm_send_refs,
-			DISPATCH_WLH_MANAGER, 0));
+			DISPATCH_WLH_ANON, 0));
 }
 
 void
@@ -1589,7 +1661,7 @@
 
 	if (data & dmsr->du_fflags) {
 		_dispatch_mach_send_invoke(dm, DISPATCH_INVOKE_MANAGER_DRAIN,
-				DM_SEND_INVOKE_FLUSH);
+				DM_SEND_INVOKE_MAKE_DIRTY);
 	}
 }
 
@@ -1600,7 +1672,7 @@
 {
 	mach_error_t error;
 	dispatch_mach_reason_t reason = _dispatch_mach_msg_get_reason(dmsg, &error);
-	if (!dm->dm_is_xpc ||
+	if (reason == DISPATCH_MACH_MESSAGE_RECEIVED || !dm->dm_is_xpc ||
 			!_dispatch_mach_xpc_hooks->dmxh_direct_message_handler(
 			dm->dm_recv_refs->dmrr_handler_ctxt, reason, dmsg, error)) {
 		// Not XPC client or not a message that XPC can handle inline - push
@@ -1649,15 +1721,25 @@
 }
 
 DISPATCH_ALWAYS_INLINE
-static inline pthread_priority_t
-_dispatch_mach_priority_propagate(mach_msg_option_t options)
+static inline dispatch_qos_t
+_dispatch_mach_priority_propagate(mach_msg_option_t options,
+		pthread_priority_t *msg_pp)
 {
 #if DISPATCH_USE_NOIMPORTANCE_QOS
-	if (options & MACH_SEND_NOIMPORTANCE) return 0;
-#else
-	(void)options;
+	if (options & MACH_SEND_NOIMPORTANCE) {
+		*msg_pp = 0;
+		return 0;
+	}
 #endif
-	return _dispatch_priority_propagate();
+	unsigned int flags = DISPATCH_PRIORITY_PROPAGATE_CURRENT;
+	if ((options & DISPATCH_MACH_WAIT_FOR_REPLY) &&
+			(options & DISPATCH_MACH_OWNED_REPLY_PORT) &&
+			_dispatch_use_mach_special_reply_port()) {
+		flags |= DISPATCH_PRIORITY_PROPAGATE_FOR_SYNC_IPC;
+	}
+	*msg_pp = _dispatch_priority_compute_propagated(0, flags);
+	// TODO: remove QoS contribution of sync IPC messages to send queue
+	return _dispatch_qos_from_pp(*msg_pp);
 }
 
 DISPATCH_NOINLINE
@@ -1670,14 +1752,15 @@
 		DISPATCH_CLIENT_CRASH(dmsg->do_next, "Message already enqueued");
 	}
 	dispatch_retain(dmsg);
-	pthread_priority_t priority = _dispatch_mach_priority_propagate(options);
+	pthread_priority_t msg_pp;
+	dispatch_qos_t qos = _dispatch_mach_priority_propagate(options, &msg_pp);
 	options |= _dispatch_mach_send_options();
 	dmsg->dmsg_options = options;
 	mach_msg_header_t *msg = _dispatch_mach_msg_get_msg(dmsg);
 	dmsg->dmsg_reply = _dispatch_mach_msg_get_reply_port(dmsg);
 	bool is_reply = (MACH_MSGH_BITS_REMOTE(msg->msgh_bits) ==
 			MACH_MSG_TYPE_MOVE_SEND_ONCE);
-	dmsg->dmsg_priority = priority;
+	dmsg->dmsg_priority = msg_pp;
 	dmsg->dmsg_voucher = _voucher_copy();
 	_dispatch_voucher_debug("mach-msg[%p] set", dmsg->dmsg_voucher, dmsg);
 
@@ -1700,7 +1783,7 @@
 		dispatch_object_t dou = { ._dmsg = dmsg };
 		if (dc_wait) dou._dc = dc_wait;
 		returning_send_result = _dispatch_mach_send_push_and_trydrain(dm, dou,
-				_dispatch_qos_from_pp(priority), send_flags);
+				qos, send_flags);
 	}
 	if (returning_send_result) {
 		_dispatch_voucher_debug("mach-msg[%p] clear", dmsg->dmsg_voucher, dmsg);
@@ -1751,6 +1834,7 @@
 		dispatch_mach_msg_t dmsg, mach_msg_option_t options,
 		bool *returned_send_result)
 {
+	mach_port_t send = MACH_PORT_NULL;
 	mach_port_t reply_port = _dispatch_mach_msg_get_reply_port(dmsg);
 	if (!reply_port) {
 		// use per-thread mach reply port <rdar://24597802>
@@ -1761,6 +1845,7 @@
 		hdr->msgh_local_port = reply_port;
 		options |= DISPATCH_MACH_OWNED_REPLY_PORT;
 	}
+	options |= DISPATCH_MACH_WAIT_FOR_REPLY;
 
 	dispatch_mach_reply_refs_t dmr;
 #if DISPATCH_DEBUG
@@ -1781,8 +1866,13 @@
 	*returned_send_result = _dispatch_mach_send_msg(dm, dmsg, &dc_wait,options);
 	if (options & DISPATCH_MACH_OWNED_REPLY_PORT) {
 		_dispatch_clear_thread_reply_port(reply_port);
+		if (_dispatch_use_mach_special_reply_port()) {
+			// link special reply port to send right for remote receive right
+			// TODO: extend to pre-connect phase <rdar://problem/31823384>
+			send = dm->dm_send_refs->dmsr_send;
+		}
 	}
-	dmsg = _dispatch_mach_msg_reply_recv(dm, dmr, reply_port);
+	dmsg = _dispatch_mach_msg_reply_recv(dm, dmr, reply_port, send);
 #if DISPATCH_DEBUG
 	free(dmr);
 #endif
@@ -1798,7 +1888,6 @@
 	dispatch_mach_msg_t reply;
 	dispatch_assert_zero(options & DISPATCH_MACH_OPTIONS_MASK);
 	options &= ~DISPATCH_MACH_OPTIONS_MASK;
-	options |= DISPATCH_MACH_WAIT_FOR_REPLY;
 	reply = _dispatch_mach_send_and_wait_for_reply(dm, dmsg, options,
 			&returned_send_result);
 	dispatch_assert(!returned_send_result);
@@ -1819,7 +1908,6 @@
 	dispatch_mach_msg_t reply;
 	dispatch_assert_zero(options & DISPATCH_MACH_OPTIONS_MASK);
 	options &= ~DISPATCH_MACH_OPTIONS_MASK;
-	options |= DISPATCH_MACH_WAIT_FOR_REPLY;
 	options |= DISPATCH_MACH_RETURN_IMMEDIATE_SEND_RESULT;
 	reply = _dispatch_mach_send_and_wait_for_reply(dm, dmsg, options,
 			&returned_send_result);
@@ -1877,8 +1965,8 @@
 	}
 	if (MACH_PORT_VALID(dmsr->dmsr_send)) {
 		_dispatch_mach_msg_disconnected(dm, MACH_PORT_NULL, dmsr->dmsr_send);
+		dmsr->dmsr_send = MACH_PORT_NULL;
 	}
-	dmsr->dmsr_send = MACH_PORT_NULL;
 	if (dmsr->dmsr_checkin) {
 		_dispatch_mach_msg_not_sent(dm, dmsr->dmsr_checkin);
 		dmsr->dmsr_checkin = NULL;
@@ -1889,11 +1977,14 @@
 		TAILQ_REMOVE(&dm->dm_send_refs->dmsr_replies, dmr, dmr_list);
 		_TAILQ_MARK_NOT_ENQUEUED(dmr, dmr_list);
 		if (_dispatch_unote_registered(dmr)) {
-			_dispatch_mach_reply_kevent_unregister(dm, dmr,
-					DU_UNREGISTER_DISCONNECTED);
+			if (!_dispatch_mach_reply_kevent_unregister(dm, dmr,
+					DU_UNREGISTER_DISCONNECTED)) {
+				TAILQ_INSERT_HEAD(&dm->dm_send_refs->dmsr_replies, dmr,
+					dmr_list);
+			}
 		} else {
 			_dispatch_mach_reply_waiter_unregister(dm, dmr,
-					DU_UNREGISTER_DISCONNECTED);
+				DU_UNREGISTER_DISCONNECTED);
 		}
 	}
 	disconnected = TAILQ_EMPTY(&dm->dm_send_refs->dmsr_replies);
@@ -1917,8 +2008,18 @@
 	dispatch_mach_recv_refs_t dmrr = dm->dm_recv_refs;
 	mach_port_t local_port = (mach_port_t)dmrr->du_ident;
 	if (local_port) {
-		_dispatch_source_refs_unregister(dm->_as_ds, 0);
-		if ((dm->dq_atomic_flags & DSF_STATE_MASK) == DSF_DELETED) {
+		// handle the deferred delete case properly, similar to what
+		// _dispatch_source_invoke2() does
+		dispatch_queue_flags_t dqf = _dispatch_queue_atomic_flags(dm->_as_dq);
+		if ((dqf & DSF_DEFERRED_DELETE) && !(dqf & DSF_ARMED)) {
+			_dispatch_source_refs_unregister(dm->_as_ds,
+					DU_UNREGISTER_IMMEDIATE_DELETE);
+			dqf = _dispatch_queue_atomic_flags(dm->_as_dq);
+		} else if (!(dqf & DSF_DEFERRED_DELETE) && !(dqf & DSF_DELETED)) {
+			_dispatch_source_refs_unregister(dm->_as_ds, 0);
+			dqf = _dispatch_queue_atomic_flags(dm->_as_dq);
+		}
+		if ((dqf & DSF_STATE_MASK) == DSF_DELETED) {
 			_dispatch_mach_msg_disconnected(dm, local_port, MACH_PORT_NULL);
 			dmrr->du_ident = 0;
 		} else {
@@ -1928,6 +2029,10 @@
 		_dispatch_queue_atomic_flags_set_and_clear(dm->_as_dq, DSF_DELETED,
 				DSF_ARMED | DSF_DEFERRED_DELETE);
 	}
+
+	if (dm->dm_send_refs->dmsr_disconnect_cnt) {
+		uninstalled = false; // <rdar://problem/31233110>
+	}
 	if (uninstalled) dm->dm_uninstalled = uninstalled;
 }
 
@@ -2023,8 +2128,21 @@
 			if (slowpath(!dm->dm_connect_handler_called)) {
 				_dispatch_mach_connect_invoke(dm);
 			}
-			_dispatch_client_callout4(dmrr->dmrr_handler_ctxt, reason, dmsg,
-					err, dmrr->dmrr_handler_func);
+			if (reason == DISPATCH_MACH_MESSAGE_RECEIVED &&
+					(_dispatch_queue_atomic_flags(dm->_as_dq) & DSF_CANCELED)) {
+				// <rdar://problem/32184699> Do not deliver message received
+				// after cancellation: _dispatch_mach_merge_msg can be preempted
+				// for a long time between clearing DSF_ARMED but before
+				// enqueuing the message, allowing for cancellation to complete,
+				// and then the message event to be delivered.
+				//
+				// This makes XPC unhappy because some of these messages are
+				// port-destroyed notifications that can cause it to try to
+				// reconnect on a channel that is almost fully canceled
+			} else {
+				_dispatch_client_callout4(dmrr->dmrr_handler_ctxt, reason, dmsg,
+						err, dmrr->dmrr_handler_func);
+			}
 		}
 		_dispatch_perfmon_workitem_inc();
 	});
@@ -2174,22 +2292,37 @@
 }
 
 static void
-_dispatch_mach_install(dispatch_mach_t dm, dispatch_priority_t pri,
-		dispatch_wlh_t wlh)
+_dispatch_mach_install(dispatch_mach_t dm, dispatch_wlh_t wlh,
+		dispatch_priority_t pri)
 {
 	dispatch_mach_recv_refs_t dmrr = dm->dm_recv_refs;
 	uint32_t disconnect_cnt;
 
-	if (!dm->dq_wlh && wlh) {
-		_dispatch_queue_class_record_wlh_hierarchy(dm, wlh);
-	}
 	if (dmrr->du_ident) {
-		_dispatch_source_refs_register(dm->_as_ds, pri);
+		_dispatch_source_refs_register(dm->_as_ds, wlh, pri);
+		dispatch_assert(dmrr->du_is_direct);
 	}
-	if (dm->dm_xpc_term_refs) {
-		_dispatch_unote_register(dm->dm_xpc_term_refs, dm->dq_wlh, pri);
+
+	if (dm->dm_is_xpc) {
+		bool monitor_sigterm;
+		if (_dispatch_mach_xpc_hooks->version < 3) {
+			monitor_sigterm = true;
+		} else if (!_dispatch_mach_xpc_hooks->dmxh_enable_sigterm_notification){
+			monitor_sigterm = true;
+		} else {
+			monitor_sigterm =
+					_dispatch_mach_xpc_hooks->dmxh_enable_sigterm_notification(
+					dm->dm_recv_refs->dmrr_handler_ctxt);
+		}
+		if (monitor_sigterm) {
+			dispatch_xpc_term_refs_t _dxtr =
+					dux_create(&_dispatch_xpc_type_sigterm, SIGTERM, 0)._dxtr;
+			_dxtr->du_owner_wref = _dispatch_ptr2wref(dm);
+			dm->dm_xpc_term_refs = _dxtr;
+			_dispatch_unote_register(dm->dm_xpc_term_refs, wlh, pri);
+		}
 	}
-	if (dmrr->du_is_direct && !dm->dq_priority) {
+	if (!dm->dq_priority) {
 		// _dispatch_mach_reply_kevent_register assumes this has been done
 		// which is unlike regular sources or queues, the DEFAULTQUEUE flag
 		// is used so that the priority of the channel doesn't act as
@@ -2204,21 +2337,17 @@
 }
 
 void
-_dispatch_mach_finalize_activation(dispatch_mach_t dm)
+_dispatch_mach_finalize_activation(dispatch_mach_t dm, bool *allow_resume)
 {
-	dispatch_mach_recv_refs_t dmrr = dm->dm_recv_refs;
+	dispatch_priority_t pri;
+	dispatch_wlh_t wlh;
 
 	// call "super"
-	_dispatch_queue_finalize_activation(dm->_as_dq);
+	_dispatch_queue_finalize_activation(dm->_as_dq, allow_resume);
 
-	if (dmrr->du_is_direct && !dm->ds_is_installed) {
-		dispatch_source_t ds = dm->_as_ds;
-		dispatch_priority_t pri = _dispatch_source_compute_kevent_priority(ds);
-		if (pri) {
-			dispatch_wlh_t wlh = dm->dq_wlh;
-			if (!wlh) wlh = _dispatch_queue_class_compute_wlh(dm);
-			_dispatch_mach_install(dm, pri, wlh);
-		}
+	if (!dm->ds_is_installed) {
+		pri = _dispatch_queue_compute_priority_and_wlh(dm->_as_dq, &wlh);
+		if (pri) _dispatch_mach_install(dm, wlh, pri);
 	}
 }
 
@@ -2250,8 +2379,24 @@
 	dispatch_mach_t dm = dou._dm;
 	dispatch_queue_wakeup_target_t retq = NULL;
 	dispatch_queue_t dq = _dispatch_queue_get_current();
+	dispatch_mach_send_refs_t dmsr = dm->dm_send_refs;
+	dispatch_mach_recv_refs_t dmrr = dm->dm_recv_refs;
+	dispatch_queue_flags_t dqf = 0;
 
-	flags |= DISPATCH_INVOKE_DISALLOW_SYNC_WAITERS;
+	if (!(flags & DISPATCH_INVOKE_MANAGER_DRAIN) && dmrr &&
+			_dispatch_unote_wlh_changed(dmrr, _dispatch_get_wlh())) {
+		dqf = _dispatch_queue_atomic_flags_set_orig(dm->_as_dq,
+				DSF_WLH_CHANGED);
+		if (!(dqf & DSF_WLH_CHANGED)) {
+			if (dm->dm_is_xpc) {
+				_dispatch_bug_deprecated("Changing target queue "
+						"hierarchy after xpc connection was activated");
+			} else {
+				_dispatch_bug_deprecated("Changing target queue "
+						"hierarchy after mach channel was activated");
+			}
+		}
+	}
 
 	// This function performs all mach channel actions. Each action is
 	// responsible for verifying that it takes place on the appropriate queue.
@@ -2261,20 +2406,12 @@
 
 	// The order of tests here in invoke and in wakeup should be consistent.
 
-	dispatch_mach_send_refs_t dmsr = dm->dm_send_refs;
-	dispatch_mach_recv_refs_t dmrr = dm->dm_recv_refs;
-	dispatch_queue_t dkq = &_dispatch_mgr_q;
-
-	if (dmrr->du_is_direct) {
-		dkq = dm->do_targetq;
-	}
-
 	if (unlikely(!dm->ds_is_installed)) {
 		// The channel needs to be installed on the kevent queue.
-		if (dq != dkq) {
-			return dkq;
+		if (unlikely(flags & DISPATCH_INVOKE_MANAGER_DRAIN)) {
+			return dm->do_targetq;
 		}
-		_dispatch_mach_install(dm, _dispatch_get_basepri(),_dispatch_get_wlh());
+		_dispatch_mach_install(dm, _dispatch_get_wlh(),_dispatch_get_basepri());
 		_dispatch_perfmon_workitem_inc();
 	}
 
@@ -2287,13 +2424,12 @@
 		}
 	}
 
-	dispatch_queue_flags_t dqf = 0;
-	if (!retq && dmrr->du_is_direct) {
+	if (!retq && _dispatch_unote_registered(dmrr)) {
 		if (_dispatch_mach_tryarm(dm, &dqf)) {
 			_dispatch_unote_resume(dmrr);
 			if (dq == dm->do_targetq && !dq->do_targetq && !dmsr->dmsr_tail &&
 					(dq->dq_priority & DISPATCH_PRIORITY_FLAG_OVERCOMMIT) &&
-					dmrr->du_wlh != DISPATCH_WLH_GLOBAL) {
+					_dispatch_wlh_should_poll_unote(dmrr)) {
 				// try to redrive the drain from under the lock for channels
 				// targeting an overcommit root queue to avoid parking
 				// when the next message has already fired
@@ -2307,7 +2443,7 @@
 
 	if (dmsr->dmsr_tail) {
 		bool requires_mgr = dm->dm_needs_mgr || (dmsr->dmsr_disconnect_cnt &&
-				(_dispatch_unote_registered(dmsr) || !dmrr->du_is_direct));
+				_dispatch_unote_registered(dmsr));
 		if (!os_atomic_load2o(dmsr, dmsr_notification_armed, relaxed) ||
 				(dqf & DSF_CANCELED) || dmsr->dmsr_disconnect_cnt) {
 			// The channel has pending messages to send.
@@ -2357,7 +2493,8 @@
 _dispatch_mach_invoke(dispatch_mach_t dm,
 		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags)
 {
-	_dispatch_queue_class_invoke(dm, dic, flags, _dispatch_mach_invoke2);
+	_dispatch_queue_class_invoke(dm, dic, flags,
+			DISPATCH_INVOKE_DISALLOW_SYNC_WAITERS, _dispatch_mach_invoke2);
 }
 
 void
@@ -2368,18 +2505,12 @@
 	// The order of tests here in probe and in invoke should be consistent.
 
 	dispatch_mach_send_refs_t dmsr = dm->dm_send_refs;
-	dispatch_mach_recv_refs_t dmrr = dm->dm_recv_refs;
-	dispatch_queue_wakeup_target_t dkq = DISPATCH_QUEUE_WAKEUP_MGR;
 	dispatch_queue_wakeup_target_t tq = DISPATCH_QUEUE_WAKEUP_NONE;
 	dispatch_queue_flags_t dqf = _dispatch_queue_atomic_flags(dm->_as_dq);
 
-	if (dmrr->du_is_direct) {
-		dkq = DISPATCH_QUEUE_WAKEUP_TARGET;
-	}
-
 	if (!dm->ds_is_installed) {
 		// The channel needs to be installed on the kevent queue.
-		tq = dkq;
+		tq = DISPATCH_QUEUE_WAKEUP_TARGET;
 		goto done;
 	}
 
@@ -2396,7 +2527,7 @@
 
 	if (dmsr->dmsr_tail) {
 		bool requires_mgr = dm->dm_needs_mgr || (dmsr->dmsr_disconnect_cnt &&
-				(_dispatch_unote_registered(dmsr) || !dmrr->du_is_direct));
+				_dispatch_unote_registered(dmsr));
 		if (!os_atomic_load2o(dmsr, dmsr_notification_armed, relaxed) ||
 				(dqf & DSF_CANCELED) || dmsr->dmsr_disconnect_cnt) {
 			if (unlikely(requires_mgr)) {
@@ -2421,13 +2552,12 @@
 	}
 
 done:
-	if (tq) {
-		return _dispatch_queue_class_wakeup(dm->_as_dq, qos, flags, tq);
-	} else if (qos) {
-		return _dispatch_queue_class_override_drainer(dm->_as_dq, qos, flags);
-	} else if (flags & DISPATCH_WAKEUP_CONSUME) {
-		return _dispatch_release_tailcall(dm);
+	if ((tq == DISPATCH_QUEUE_WAKEUP_TARGET) &&
+			dm->do_targetq == &_dispatch_mgr_q) {
+		tq = DISPATCH_QUEUE_WAKEUP_MGR;
 	}
+
+	return _dispatch_queue_class_wakeup(dm->_as_dq, qos, flags, tq);
 }
 
 static void
@@ -2462,7 +2592,7 @@
 		_dispatch_barrier_async_detached_f(dm->_as_dq, dm,
 				_dispatch_mach_sigterm_invoke);
 	} else {
-		dx_wakeup(dm, _dispatch_qos_from_pp(pp), DISPATCH_WAKEUP_FLUSH);
+		dx_wakeup(dm, _dispatch_qos_from_pp(pp), DISPATCH_WAKEUP_MAKE_DIRTY);
 	}
 }
 
@@ -2477,9 +2607,15 @@
 			slowpath(destructor && !msg)) {
 		DISPATCH_CLIENT_CRASH(size, "Empty message");
 	}
-	dispatch_mach_msg_t dmsg = _dispatch_alloc(DISPATCH_VTABLE(mach_msg),
-			sizeof(struct dispatch_mach_msg_s) +
-			(destructor ? 0 : size - sizeof(dmsg->dmsg_msg)));
+
+	dispatch_mach_msg_t dmsg;
+	size_t msg_size = sizeof(struct dispatch_mach_msg_s);
+	if (!destructor && os_add_overflow(msg_size,
+			  (size - sizeof(dmsg->dmsg_msg)), &msg_size)) {
+		DISPATCH_CLIENT_CRASH(size, "Message size too large");
+	}
+
+	dmsg = _dispatch_object_alloc(DISPATCH_VTABLE(mach_msg), msg_size);
 	if (destructor) {
 		dmsg->dmsg_msg = msg;
 	} else if (msg) {
@@ -2496,7 +2632,8 @@
 }
 
 void
-_dispatch_mach_msg_dispose(dispatch_mach_msg_t dmsg)
+_dispatch_mach_msg_dispose(dispatch_mach_msg_t dmsg,
+		DISPATCH_UNUSED bool *allow_free)
 {
 	if (dmsg->dmsg_voucher) {
 		_voucher_release(dmsg->dmsg_voucher);
@@ -2539,8 +2676,7 @@
 	size_t offset = 0;
 	offset += dsnprintf(&buf[offset], bufsiz - offset, "%s[%p] = { ",
 			dx_kind(dmsg), dmsg);
-	offset += dsnprintf(&buf[offset], bufsiz - offset, "xrefcnt = 0x%x, "
-			"refcnt = 0x%x, ", dmsg->do_xref_cnt + 1, dmsg->do_ref_cnt + 1);
+	offset += _dispatch_object_debug_attr(dmsg, buf + offset, bufsiz - offset);
 	offset += dsnprintf(&buf[offset], bufsiz - offset, "opts/err = 0x%x, "
 			"msgh[%p] = { ", dmsg->dmsg_options, dmsg->dmsg_buf);
 	mach_msg_header_t *hdr = _dispatch_mach_msg_get_msg(dmsg);
diff --git a/src/mach_internal.h b/src/mach_internal.h
index 8600a38..8c8edd8 100644
--- a/src/mach_internal.h
+++ b/src/mach_internal.h
@@ -99,8 +99,8 @@
 
 void _dispatch_mach_msg_async_reply_invoke(dispatch_continuation_t dc,
 		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags);
-void _dispatch_mach_dispose(dispatch_mach_t dm);
-void _dispatch_mach_finalize_activation(dispatch_mach_t dm);
+void _dispatch_mach_dispose(dispatch_mach_t dm, bool *allow_free);
+void _dispatch_mach_finalize_activation(dispatch_mach_t dm, bool *allow_resume);
 void _dispatch_mach_invoke(dispatch_mach_t dm, dispatch_invoke_context_t dic,
 		dispatch_invoke_flags_t flags);
 void _dispatch_mach_wakeup(dispatch_mach_t dm, dispatch_qos_t qos,
@@ -116,7 +116,7 @@
 void _dispatch_xpc_sigterm_merge(dispatch_unote_t du, uint32_t flags,
 		uintptr_t data, uintptr_t status, pthread_priority_t pp);
 
-void _dispatch_mach_msg_dispose(dispatch_mach_msg_t dmsg);
+void _dispatch_mach_msg_dispose(dispatch_mach_msg_t dmsg, bool *allow_free);
 void _dispatch_mach_msg_invoke(dispatch_mach_msg_t dmsg,
 		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags);
 size_t _dispatch_mach_msg_debug(dispatch_mach_msg_t dmsg, char* buf,
diff --git a/src/object.c b/src/object.c
index 1ca41bc..43f580b 100644
--- a/src/object.c
+++ b/src/object.c
@@ -37,14 +37,28 @@
 _os_object_t
 _os_object_retain_internal(_os_object_t obj)
 {
-	return _os_object_retain_internal_inline(obj);
+	return _os_object_retain_internal_n_inline(obj, 1);
+}
+
+DISPATCH_NOINLINE
+_os_object_t
+_os_object_retain_internal_n(_os_object_t obj, uint16_t n)
+{
+	return _os_object_retain_internal_n_inline(obj, n);
 }
 
 DISPATCH_NOINLINE
 void
 _os_object_release_internal(_os_object_t obj)
 {
-	return _os_object_release_internal_inline(obj);
+	return _os_object_release_internal_n_inline(obj, 1);
+}
+
+DISPATCH_NOINLINE
+void
+_os_object_release_internal_n(_os_object_t obj, uint16_t n)
+{
+	return _os_object_release_internal_n_inline(obj, n);
 }
 
 DISPATCH_NOINLINE
@@ -124,7 +138,7 @@
 #pragma mark dispatch_object_t
 
 void *
-_dispatch_alloc(const void *vtable, size_t size)
+_dispatch_object_alloc(const void *vtable, size_t size)
 {
 #if OS_OBJECT_HAVE_OBJC1
 	const struct dispatch_object_vtable_s *_vtable = vtable;
@@ -138,6 +152,27 @@
 }
 
 void
+_dispatch_object_finalize(dispatch_object_t dou)
+{
+#if USE_OBJC
+	objc_destructInstance((id)dou._do);
+#else
+	(void)dou;
+#endif
+}
+
+void
+_dispatch_object_dealloc(dispatch_object_t dou)
+{
+	// so that ddt doesn't pick up bad objects when malloc reuses this memory
+	dou._os_obj->os_obj_isa = NULL;
+#if OS_OBJECT_HAVE_OBJC1
+	dou._do->do_vtable = NULL;
+#endif
+	free(dou._os_obj);
+}
+
+void
 dispatch_retain(dispatch_object_t dou)
 {
 	DISPATCH_OBJECT_TFB(_dispatch_objc_retain, dou);
@@ -151,24 +186,6 @@
 	_os_object_release(dou._os_obj);
 }
 
-static void
-_dispatch_dealloc(dispatch_object_t dou)
-{
-	dispatch_queue_t tq = dou._do->do_targetq;
-	dispatch_function_t func = dou._do->do_finalizer;
-	void *ctxt = dou._do->do_ctxt;
-#if OS_OBJECT_HAVE_OBJC1
-	// so that ddt doesn't pick up bad objects when malloc reuses this memory
-	dou._do->do_vtable = NULL;
-#endif
-	_os_object_dealloc(dou._os_obj);
-
-	if (func && ctxt) {
-		dispatch_async_f(tq, ctxt, func);
-	}
-	_dispatch_release_tailcall(tq);
-}
-
 #if !USE_OBJC
 void
 _dispatch_xref_dispose(dispatch_object_t dou)
@@ -193,11 +210,26 @@
 void
 _dispatch_dispose(dispatch_object_t dou)
 {
+	dispatch_queue_t tq = dou._do->do_targetq;
+	dispatch_function_t func = dou._do->do_finalizer;
+	void *ctxt = dou._do->do_ctxt;
+	bool allow_free = true;
+
 	if (slowpath(dou._do->do_next != DISPATCH_OBJECT_LISTLESS)) {
 		DISPATCH_INTERNAL_CRASH(dou._do->do_next, "Release while enqueued");
 	}
-	dx_dispose(dou._do);
-	return _dispatch_dealloc(dou);
+
+	dx_dispose(dou._do, &allow_free);
+
+	// Past this point, the only thing left of the object is its memory
+	if (likely(allow_free)) {
+		_dispatch_object_finalize(dou);
+		_dispatch_object_dealloc(dou);
+	}
+	if (func && ctxt) {
+		dispatch_async_f(tq, ctxt, func);
+	}
+	if (tq) _dispatch_release_tailcall(tq);
 }
 
 void *
@@ -270,7 +302,9 @@
 dispatch_resume(dispatch_object_t dou)
 {
 	DISPATCH_OBJECT_TFB(_dispatch_objc_resume, dou);
-	if (dx_vtable(dou._do)->do_resume) {
+	// the do_suspend below is not a typo. Having a do_resume but no do_suspend
+	// allows for objects to support activate, but have no-ops suspend/resume
+	if (dx_vtable(dou._do)->do_suspend) {
 		dx_vtable(dou._do)->do_resume(dou._do, false);
 	}
 }
@@ -278,6 +312,6 @@
 size_t
 _dispatch_object_debug_attr(dispatch_object_t dou, char* buf, size_t bufsiz)
 {
-	return dsnprintf(buf, bufsiz, "xrefcnt = 0x%x, refcnt = 0x%x, ",
+	return dsnprintf(buf, bufsiz, "xref = %d, ref = %d, ",
 			dou._do->do_xref_cnt + 1, dou._do->do_ref_cnt + 1);
 }
diff --git a/src/object.m b/src/object.m
index 59cbc9d..cc97cc3 100644
--- a/src/object.m
+++ b/src/object.m
@@ -29,10 +29,21 @@
 #error Objective C GC isn't supported anymore
 #endif
 
+#if __has_include(<objc/objc-internal.h>)
 #include <objc/objc-internal.h>
+#else
+extern id _Nullable objc_retain(id _Nullable obj) __asm__("_objc_retain");
+extern void objc_release(id _Nullable obj) __asm__("_objc_release");
+extern void _objc_init(void);
+extern void _objc_atfork_prepare(void);
+extern void _objc_atfork_parent(void);
+extern void _objc_atfork_child(void);
+#endif // __has_include(<objc/objc-internal.h>)
 #include <objc/objc-exception.h>
 #include <Foundation/NSString.h>
 
+// NOTE: this file must not contain any atomic operations
+
 #pragma mark -
 #pragma mark _os_object_t
 
@@ -286,6 +297,11 @@
 	return [nsstring stringWithFormat:format, class_getName([self class]), buf];
 }
 
+- (void)dealloc DISPATCH_NORETURN {
+	DISPATCH_INTERNAL_CRASH(0, "Calling dealloc on a dispatch object");
+	[super dealloc]; // make clang happy
+}
+
 @end
 
 @implementation DISPATCH_CLASS(queue)
@@ -413,20 +429,20 @@
 
 #if DISPATCH_COCOA_COMPAT
 
-void *
-_dispatch_last_resort_autorelease_pool_push(void)
+void
+_dispatch_last_resort_autorelease_pool_push(dispatch_invoke_context_t dic)
 {
 	if (!slowpath(_os_object_debug_missing_pools)) {
-		return _dispatch_autorelease_pool_push();
+		dic->dic_autorelease_pool = _dispatch_autorelease_pool_push();
 	}
-	return NULL;
 }
 
 void
-_dispatch_last_resort_autorelease_pool_pop(void *context)
+_dispatch_last_resort_autorelease_pool_pop(dispatch_invoke_context_t dic)
 {
 	if (!slowpath(_os_object_debug_missing_pools)) {
-		return _dispatch_autorelease_pool_pop(context);
+		_dispatch_autorelease_pool_pop(dic->dic_autorelease_pool);
+		dic->dic_autorelease_pool = NULL;
 	}
 }
 
diff --git a/src/object_internal.h b/src/object_internal.h
index 61caebf..0060f27 100644
--- a/src/object_internal.h
+++ b/src/object_internal.h
@@ -188,14 +188,14 @@
 	DISPATCH_INVOKABLE_VTABLE_HEADER(x); \
 	void (*const do_wakeup)(struct x##_s *, \
 			dispatch_qos_t, dispatch_wakeup_flags_t); \
-	void (*const do_dispose)(struct x##_s *)
+	void (*const do_dispose)(struct x##_s *, bool *allow_free)
 
 #define DISPATCH_OBJECT_VTABLE_HEADER(x) \
 	DISPATCH_QUEUEABLE_VTABLE_HEADER(x); \
 	void (*const do_set_targetq)(struct x##_s *, dispatch_queue_t); \
 	void (*const do_suspend)(struct x##_s *); \
 	void (*const do_resume)(struct x##_s *, bool activate); \
-	void (*const do_finalize_activation)(struct x##_s *); \
+	void (*const do_finalize_activation)(struct x##_s *, bool *allow_resume); \
 	size_t (*const do_debug)(struct x##_s *, char *, size_t)
 
 #define dx_vtable(x) (&(x)->do_vtable->_os_obj_vtable)
@@ -205,7 +205,7 @@
 #define dx_hastypeflag(x, f) (dx_vtable(x)->do_type & _DISPATCH_##f##_TYPEFLAG)
 #define dx_kind(x) dx_vtable(x)->do_kind
 #define dx_debug(x, y, z) dx_vtable(x)->do_debug((x), (y), (z))
-#define dx_dispose(x) dx_vtable(x)->do_dispose(x)
+#define dx_dispose(x, y) dx_vtable(x)->do_dispose(x, y)
 #define dx_invoke(x, y, z) dx_vtable(x)->do_invoke(x, y, z)
 #define dx_push(x, y, z) dx_vtable(x)->do_push(x, y, z)
 #define dx_wakeup(x, y, z) dx_vtable(x)->do_wakeup(x, y, z)
@@ -230,32 +230,29 @@
 // we sign extend the 64-bit version so that a better instruction encoding is
 // generated on Intel
 #define DISPATCH_OBJECT_LISTLESS ((void *)0xffffffff89abcdef)
-#define DISPATCH_OBJECT_WLH_REQ  ((void *)0xffffffff7009cdef)
 #else
 #define DISPATCH_OBJECT_LISTLESS ((void *)0x89abcdef)
-#define DISPATCH_OBJECT_WLH_REQ  ((void *)0x7009cdef)
 #endif
 
 DISPATCH_ENUM(dispatch_wakeup_flags, uint32_t,
-	// The caller of dx_wakeup owns an internal refcount on the object being
-	// woken up
-	DISPATCH_WAKEUP_CONSUME                 = 0x00000001,
+	// The caller of dx_wakeup owns two internal refcounts on the object being
+	// woken up. Two are needed for WLH wakeups where two threads need
+	// the object to remain valid in a non-coordinated way
+	// - the thread doing the poke for the duration of the poke
+	// - drainers for the duration of their drain
+	DISPATCH_WAKEUP_CONSUME_2               = 0x00000001,
 
 	// Some change to the object needs to be published to drainers.
 	// If the drainer isn't the same thread, some scheme such as the dispatch
 	// queue DIRTY bit must be used and a release barrier likely has to be
 	// involved before dx_wakeup returns
-	DISPATCH_WAKEUP_FLUSH					= 0x00000002,
+	DISPATCH_WAKEUP_MAKE_DIRTY              = 0x00000002,
 
-	// The caller desires to apply an override on the object being woken up.
-	// When this flag is passed, the qos passed to dx_wakeup() should not be 0
-	DISPATCH_WAKEUP_OVERRIDING              = 0x00000004,
-
-	// This wakeup is caused by a handoff from a slow waiter.
-	DISPATCH_WAKEUP_WAITER_HANDOFF          = 0x00000008,
+	// This wakeup is made by a sync owner that still holds the drain lock
+	DISPATCH_WAKEUP_BARRIER_COMPLETE        = 0x00000004,
 
 	// This wakeup is caused by a dispatch_block_wait()
-	DISPATCH_WAKEUP_BLOCK_WAIT              = 0x00000010,
+	DISPATCH_WAKEUP_BLOCK_WAIT              = 0x00000008,
 );
 
 typedef struct dispatch_invoke_context_s {
@@ -263,11 +260,12 @@
 #if HAVE_PTHREAD_WORKQUEUE_NARROWING
 	uint64_t dic_next_narrow_check;
 #endif
+#if DISPATCH_COCOA_COMPAT
+	void *dic_autorelease_pool;
+#endif
 } dispatch_invoke_context_s, *dispatch_invoke_context_t;
 
 #if HAVE_PTHREAD_WORKQUEUE_NARROWING
-#define DISPATCH_NARROW_CHECK_INTERVAL \
-		_dispatch_time_nano2mach(50 * NSEC_PER_MSEC)
 #define DISPATCH_THREAD_IS_NARROWING 1
 
 #define dispatch_with_disabled_narrowing(dic, ...) ({ \
@@ -289,12 +287,11 @@
 	// This invoke is a stealer, meaning that it doesn't own the
 	// enqueue lock at drain lock time.
 	//
-	// @const DISPATCH_INVOKE_OVERRIDING
-	// This invoke is draining the hierarchy on another root queue and needs
-	// to fake the identity of the original one.
+	// @const DISPATCH_INVOKE_WLH
+	// This invoke is for a bottom WLH
 	//
 	DISPATCH_INVOKE_STEALING				= 0x00000001,
-	DISPATCH_INVOKE_OVERRIDING				= 0x00000002,
+	DISPATCH_INVOKE_WLH						= 0x00000002,
 
 	// Misc flags
 	//
@@ -361,29 +358,31 @@
 
 #define DISPATCH_CONTINUATION_TYPE(name)  \
 		(_DISPATCH_CONTINUATION_TYPE | DC_##name##_TYPE)
-	DISPATCH_DATA_TYPE				= 1 | _DISPATCH_NODE_TYPE,
-	DISPATCH_MACH_MSG_TYPE			= 2 | _DISPATCH_NODE_TYPE,
-	DISPATCH_QUEUE_ATTR_TYPE		= 3 | _DISPATCH_NODE_TYPE,
+	DISPATCH_DATA_TYPE					= 1 | _DISPATCH_NODE_TYPE,
+	DISPATCH_MACH_MSG_TYPE				= 2 | _DISPATCH_NODE_TYPE,
+	DISPATCH_QUEUE_ATTR_TYPE			= 3 | _DISPATCH_NODE_TYPE,
 
-	DISPATCH_IO_TYPE				= 0 | _DISPATCH_IO_TYPE,
-	DISPATCH_OPERATION_TYPE			= 0 | _DISPATCH_OPERATION_TYPE,
-	DISPATCH_DISK_TYPE				= 0 | _DISPATCH_DISK_TYPE,
+	DISPATCH_IO_TYPE					= 0 | _DISPATCH_IO_TYPE,
+	DISPATCH_OPERATION_TYPE				= 0 | _DISPATCH_OPERATION_TYPE,
+	DISPATCH_DISK_TYPE					= 0 | _DISPATCH_DISK_TYPE,
 
-	DISPATCH_QUEUE_LEGACY_TYPE		= 1 | _DISPATCH_QUEUE_TYPE,
-	DISPATCH_QUEUE_SERIAL_TYPE		= 2 | _DISPATCH_QUEUE_TYPE,
-	DISPATCH_QUEUE_CONCURRENT_TYPE	= 3 | _DISPATCH_QUEUE_TYPE,
-	DISPATCH_QUEUE_GLOBAL_ROOT_TYPE	= 4 | _DISPATCH_QUEUE_TYPE |
+	DISPATCH_QUEUE_LEGACY_TYPE			= 1 | _DISPATCH_QUEUE_TYPE,
+	DISPATCH_QUEUE_SERIAL_TYPE			= 2 | _DISPATCH_QUEUE_TYPE,
+	DISPATCH_QUEUE_CONCURRENT_TYPE		= 3 | _DISPATCH_QUEUE_TYPE,
+	DISPATCH_QUEUE_GLOBAL_ROOT_TYPE		= 4 | _DISPATCH_QUEUE_TYPE |
 			_DISPATCH_QUEUE_ROOT_TYPEFLAG,
-	DISPATCH_QUEUE_RUNLOOP_TYPE		= 5 | _DISPATCH_QUEUE_TYPE |
+	DISPATCH_QUEUE_NETWORK_EVENT_TYPE	= 5 | _DISPATCH_QUEUE_TYPE |
 			_DISPATCH_QUEUE_ROOT_TYPEFLAG,
-	DISPATCH_QUEUE_MGR_TYPE			= 6 | _DISPATCH_QUEUE_TYPE,
-	DISPATCH_QUEUE_SPECIFIC_TYPE	= 7 | _DISPATCH_QUEUE_TYPE,
+	DISPATCH_QUEUE_RUNLOOP_TYPE			= 6 | _DISPATCH_QUEUE_TYPE |
+			_DISPATCH_QUEUE_ROOT_TYPEFLAG,
+	DISPATCH_QUEUE_MGR_TYPE				= 7 | _DISPATCH_QUEUE_TYPE,
+	DISPATCH_QUEUE_SPECIFIC_TYPE		= 8 | _DISPATCH_QUEUE_TYPE,
 
-	DISPATCH_SEMAPHORE_TYPE			= 1 | _DISPATCH_SEMAPHORE_TYPE,
-	DISPATCH_GROUP_TYPE				= 2 | _DISPATCH_SEMAPHORE_TYPE,
+	DISPATCH_SEMAPHORE_TYPE				= 1 | _DISPATCH_SEMAPHORE_TYPE,
+	DISPATCH_GROUP_TYPE					= 2 | _DISPATCH_SEMAPHORE_TYPE,
 
-	DISPATCH_SOURCE_KEVENT_TYPE		= 1 | _DISPATCH_SOURCE_TYPE,
-	DISPATCH_MACH_CHANNEL_TYPE		= 2 | _DISPATCH_SOURCE_TYPE,
+	DISPATCH_SOURCE_KEVENT_TYPE			= 1 | _DISPATCH_SOURCE_TYPE,
+	DISPATCH_MACH_CHANNEL_TYPE			= 2 | _DISPATCH_SOURCE_TYPE,
 
 };
 
@@ -450,9 +449,9 @@
 	struct dispatch_object_s *volatile ns##_items_head; \
 	unsigned long ns##_serialnum; \
 	const char *ns##_label; \
-	dispatch_wlh_t ns##_wlh; \
 	struct dispatch_object_s *volatile ns##_items_tail; \
-	dispatch_priority_t ns##_priority
+	dispatch_priority_t ns##_priority; \
+	int volatile ns##_sref_cnt
 #else
 #define _OS_MPSC_QUEUE_FIELDS(ns, __state_field__) \
 	struct dispatch_object_s *volatile ns##_items_head; \
@@ -463,10 +462,9 @@
 	/* LP64 global queue cacheline boundary */ \
 	unsigned long ns##_serialnum; \
 	const char *ns##_label; \
-	dispatch_wlh_t ns##_wlh; \
 	struct dispatch_object_s *volatile ns##_items_tail; \
-	dispatch_priority_t ns##_priority
-	/* LP64: 32bit hole */
+	dispatch_priority_t ns##_priority; \
+	int volatile ns##_sref_cnt
 #endif
 
 OS_OBJECT_INTERNAL_CLASS_DECL(os_mpsc_queue, object,
@@ -484,7 +482,9 @@
 
 size_t _dispatch_object_debug_attr(dispatch_object_t dou, char* buf,
 		size_t bufsiz);
-void *_dispatch_alloc(const void *vtable, size_t size);
+void *_dispatch_object_alloc(const void *vtable, size_t size);
+void _dispatch_object_finalize(dispatch_object_t dou);
+void _dispatch_object_dealloc(dispatch_object_t dou);
 #if !USE_OBJC
 void _dispatch_xref_dispose(dispatch_object_t dou);
 #endif
@@ -492,17 +492,22 @@
 #if DISPATCH_COCOA_COMPAT
 #if USE_OBJC
 #include <objc/runtime.h>
+#if __has_include(<objc/objc-internal.h>)
 #include <objc/objc-internal.h>
+#else
+extern void *objc_autoreleasePoolPush(void);
+extern void objc_autoreleasePoolPop(void *context);
+#endif // __has_include(<objc/objc-internal.h>)
 #define _dispatch_autorelease_pool_push() \
-	objc_autoreleasePoolPush()
+		objc_autoreleasePoolPush()
 #define _dispatch_autorelease_pool_pop(context) \
-	objc_autoreleasePoolPop(context)
+		objc_autoreleasePoolPop(context)
 #else
 void *_dispatch_autorelease_pool_push(void);
 void _dispatch_autorelease_pool_pop(void *context);
 #endif
-void *_dispatch_last_resort_autorelease_pool_push(void);
-void _dispatch_last_resort_autorelease_pool_pop(void *context);
+void _dispatch_last_resort_autorelease_pool_push(dispatch_invoke_context_t dic);
+void _dispatch_last_resort_autorelease_pool_pop(dispatch_invoke_context_t dic);
 
 #define dispatch_invoke_with_autoreleasepool(flags, ...)  ({ \
 		void *pool = NULL; \
@@ -518,7 +523,6 @@
 	do { (void)flags; __VA_ARGS__; } while (0)
 #endif
 
-
 #if USE_OBJC
 OS_OBJECT_OBJC_CLASS_DECL(object);
 #endif
@@ -582,20 +586,20 @@
  *   a barrier to perform prior to tearing down an object when the refcount
  *   reached -1.
  */
-#define _os_atomic_refcnt_perform2o(o, f, op, m)   ({ \
+#define _os_atomic_refcnt_perform2o(o, f, op, n, m)   ({ \
 		typeof(o) _o = (o); \
 		int _ref_cnt = _o->f; \
 		if (fastpath(_ref_cnt != _OS_OBJECT_GLOBAL_REFCNT)) { \
-			_ref_cnt = os_atomic_##op##2o(_o, f, m); \
+			_ref_cnt = os_atomic_##op##2o(_o, f, n, m); \
 		} \
 		_ref_cnt; \
 	})
 
-#define _os_atomic_refcnt_inc2o(o, m) \
-		_os_atomic_refcnt_perform2o(o, m, inc, relaxed)
+#define _os_atomic_refcnt_add2o(o, m, n) \
+		_os_atomic_refcnt_perform2o(o, m, add, n, relaxed)
 
-#define _os_atomic_refcnt_dec2o(o, m) \
-		_os_atomic_refcnt_perform2o(o, m, dec, release)
+#define _os_atomic_refcnt_sub2o(o, m, n) \
+		_os_atomic_refcnt_perform2o(o, m, sub, n, release)
 
 #define _os_atomic_refcnt_dispose_barrier2o(o, m) \
 		(void)os_atomic_load2o(o, m, acquire)
@@ -618,19 +622,19 @@
  *
  */
 #define _os_object_xrefcnt_inc(o) \
-		_os_atomic_refcnt_inc2o(o, os_obj_xref_cnt)
+		_os_atomic_refcnt_add2o(o, os_obj_xref_cnt, 1)
 
 #define _os_object_xrefcnt_dec(o) \
-		_os_atomic_refcnt_dec2o(o, os_obj_xref_cnt)
+		_os_atomic_refcnt_sub2o(o, os_obj_xref_cnt, 1)
 
 #define _os_object_xrefcnt_dispose_barrier(o) \
 		_os_atomic_refcnt_dispose_barrier2o(o, os_obj_xref_cnt)
 
-#define _os_object_refcnt_inc(o) \
-		_os_atomic_refcnt_inc2o(o, os_obj_ref_cnt)
+#define _os_object_refcnt_add(o, n) \
+		_os_atomic_refcnt_add2o(o, os_obj_ref_cnt, n)
 
-#define _os_object_refcnt_dec(o) \
-		_os_atomic_refcnt_dec2o(o, os_obj_ref_cnt)
+#define _os_object_refcnt_sub(o, n) \
+		_os_atomic_refcnt_sub2o(o, os_obj_ref_cnt, n)
 
 #define _os_object_refcnt_dispose_barrier(o) \
 		_os_atomic_refcnt_dispose_barrier2o(o, os_obj_ref_cnt)
diff --git a/src/queue.c b/src/queue.c
index 1029185..4d506ef 100644
--- a/src/queue.c
+++ b/src/queue.c
@@ -53,12 +53,16 @@
 static void _dispatch_deferred_items_cleanup(void *ctxt);
 static void _dispatch_frame_cleanup(void *ctxt);
 static void _dispatch_context_cleanup(void *ctxt);
-static void _dispatch_non_barrier_complete(dispatch_queue_t dq);
+static void _dispatch_queue_barrier_complete(dispatch_queue_t dq,
+		dispatch_qos_t qos, dispatch_wakeup_flags_t flags);
+static void _dispatch_queue_non_barrier_complete(dispatch_queue_t dq);
 static void _dispatch_queue_push_sync_waiter(dispatch_queue_t dq,
-		dispatch_sync_context_t dsc);
+		dispatch_sync_context_t dsc, dispatch_qos_t qos);
 #if HAVE_PTHREAD_WORKQUEUE_QOS
-static void _dispatch_root_queue_push_queue_override(dispatch_queue_t rq,
-		dispatch_queue_class_t dqu, dispatch_qos_t qos);
+static void _dispatch_root_queue_push_override_stealer(dispatch_queue_t orig_rq,
+		dispatch_queue_t dq, dispatch_qos_t qos);
+static inline void _dispatch_queue_class_wakeup_with_override(dispatch_queue_t,
+		uint64_t dq_state, dispatch_wakeup_flags_t flags);
 #endif
 #if HAVE_PTHREAD_WORKQUEUES
 static void _dispatch_worker_thread4(void *context);
@@ -358,7 +362,6 @@
 		.do_ctxt = &_dispatch_root_queue_contexts[ \
 				_DISPATCH_ROOT_QUEUE_IDX(n, flags)], \
 		.dq_atomic_flags = DQF_WIDTH(DISPATCH_QUEUE_WIDTH_POOL), \
-		.dq_wlh = DISPATCH_WLH_GLOBAL, \
 		.dq_priority = _dispatch_priority_make(DISPATCH_QOS_##n, 0) | flags | \
 				DISPATCH_PRIORITY_FLAG_ROOTQUEUE | \
 				((flags & DISPATCH_PRIORITY_FLAG_DEFAULTQUEUE) ? 0 : \
@@ -453,13 +456,13 @@
 DISPATCH_CACHELINE_ALIGN
 struct dispatch_queue_s _dispatch_mgr_q = {
 	DISPATCH_GLOBAL_OBJECT_HEADER(queue_mgr),
-	.dq_state = DISPATCH_QUEUE_STATE_INIT_VALUE(1),
+	.dq_state = DISPATCH_QUEUE_STATE_INIT_VALUE(1) |
+			DISPATCH_QUEUE_ROLE_BASE_ANON,
 	.do_targetq = &_dispatch_mgr_root_queue,
 	.dq_label = "com.apple.libdispatch-manager",
 	.dq_atomic_flags = DQF_WIDTH(1),
 	.dq_priority = DISPATCH_PRIORITY_FLAG_MANAGER |
 			DISPATCH_PRIORITY_SATURATED_OVERRIDE,
-	.dq_wlh = DISPATCH_WLH_GLOBAL,
 	.dq_serialnum = 2,
 };
 
@@ -524,10 +527,7 @@
 				"dispatch_assert_queue()");
 	}
 	uint64_t dq_state = os_atomic_load2o(dq, dq_state, relaxed);
-	if (unlikely(_dq_state_drain_pended(dq_state))) {
-		goto fail;
-	}
-	if (likely(_dq_state_drain_owner(dq_state) == _dispatch_tid_self())) {
+	if (likely(_dq_state_drain_locked_by_self(dq_state))) {
 		return;
 	}
 	// we can look at the width: if it is changing while we read it,
@@ -541,7 +541,6 @@
 			return;
 		}
 	}
-fail:
 	_dispatch_assert_queue_fail(dq, true);
 }
 
@@ -554,10 +553,7 @@
 				"dispatch_assert_queue_not()");
 	}
 	uint64_t dq_state = os_atomic_load2o(dq, dq_state, relaxed);
-	if (_dq_state_drain_pended(dq_state)) {
-		return;
-	}
-	if (likely(_dq_state_drain_owner(dq_state) != _dispatch_tid_self())) {
+	if (likely(!_dq_state_drain_locked_by_self(dq_state))) {
 		// we can look at the width: if it is changing while we read it,
 		// it means that a barrier is running on `dq` concurrently, which
 		// proves that we're not on `dq`. Hence reading a stale '1' is ok.
@@ -622,7 +618,7 @@
 #endif
 #if DISPATCH_USE_KEVENT_WORKQUEUE
 	bool disable_kevent_wq = false;
-#if DISPATCH_DEBUG
+#if DISPATCH_DEBUG || DISPATCH_PROFILE
 	disable_kevent_wq = slowpath(getenv("LIBDISPATCH_DISABLE_KEVENT_WQ"));
 #endif
 #endif
@@ -638,9 +634,6 @@
 #if DISPATCH_USE_MGR_THREAD
 			_dispatch_kevent_workqueue_enabled = !r;
 #endif
-#if DISPATCH_EVFILT_MACHPORT_PORTSET_FALLBACK
-			_dispatch_evfilt_machport_direct_enabled = !r;
-#endif
 			result = !r;
 		} else
 #endif // DISPATCH_USE_KEVENT_WORKQUEUE
@@ -817,7 +810,6 @@
 	dispatch_assert(sizeof(struct dispatch_root_queue_context_s) %
 			DISPATCH_CACHELINE_SIZE == 0);
 
-
 #if HAVE_PTHREAD_WORKQUEUE_QOS
 	dispatch_qos_t qos = _dispatch_qos_from_qos_class(qos_class_main());
 	dispatch_priority_t pri = _dispatch_priority_make(qos, 0);
@@ -865,6 +857,7 @@
 			dispatch_atfork_parent, dispatch_atfork_child));
 #endif
 	_dispatch_hw_config_init();
+	_dispatch_time_init();
 	_dispatch_vtable_init();
 	_os_object_init();
 	_voucher_init();
@@ -932,13 +925,18 @@
 void
 _dispatch_queue_atfork_child(void)
 {
+	dispatch_queue_t main_q = &_dispatch_main_q;
 	void *crash = (void *)0x100;
 	size_t i;
 
+	if (_dispatch_queue_is_thread_bound(main_q)) {
+		_dispatch_queue_set_bound_thread(main_q);
+	}
+
 	if (!_dispatch_is_multithreaded_inline()) return;
 
-	_dispatch_main_q.dq_items_head = crash;
-	_dispatch_main_q.dq_items_tail = crash;
+	main_q->dq_items_head = crash;
+	main_q->dq_items_tail = crash;
 
 	_dispatch_mgr_q.dq_items_head = crash;
 	_dispatch_mgr_q.dq_items_tail = crash;
@@ -949,6 +947,33 @@
 	}
 }
 
+DISPATCH_NOINLINE
+void
+_dispatch_fork_becomes_unsafe_slow(void)
+{
+	uint8_t value = os_atomic_or(&_dispatch_unsafe_fork,
+			_DISPATCH_UNSAFE_FORK_MULTITHREADED, relaxed);
+	if (value & _DISPATCH_UNSAFE_FORK_PROHIBIT) {
+		DISPATCH_CLIENT_CRASH(0, "Transition to multithreaded is prohibited");
+	}
+}
+
+DISPATCH_NOINLINE
+void
+_dispatch_prohibit_transition_to_multithreaded(bool prohibit)
+{
+	if (prohibit) {
+		uint8_t value = os_atomic_or(&_dispatch_unsafe_fork,
+				_DISPATCH_UNSAFE_FORK_PROHIBIT, relaxed);
+		if (value & _DISPATCH_UNSAFE_FORK_MULTITHREADED) {
+			DISPATCH_CLIENT_CRASH(0, "The executable is already multithreaded");
+		}
+	} else {
+		os_atomic_and(&_dispatch_unsafe_fork,
+				(uint8_t)~_DISPATCH_UNSAFE_FORK_PROHIBIT, relaxed);
+	}
+}
+
 #pragma mark -
 #pragma mark dispatch_queue_attr_t
 
@@ -1105,13 +1130,107 @@
 	dq->dq_label = label;
 }
 
-// skip zero
-// 1 - main_q
-// 2 - mgr_q
-// 3 - mgr_root_q
-// 4,5,6,7,8,9,10,11,12,13,14,15 - global queues
-// we use 'xadd' on Intel, so the initial value == next assigned
-unsigned long volatile _dispatch_queue_serial_numbers = 16;
+static inline bool
+_dispatch_base_queue_is_wlh(dispatch_queue_t dq, dispatch_queue_t tq)
+{
+	(void)dq; (void)tq;
+	return false;
+}
+
+static void
+_dispatch_queue_inherit_wlh_from_target(dispatch_queue_t dq,
+		dispatch_queue_t tq)
+{
+	uint64_t old_state, new_state, role;
+
+	if (!dx_hastypeflag(tq, QUEUE_ROOT)) {
+		role = DISPATCH_QUEUE_ROLE_INNER;
+	} else if (_dispatch_base_queue_is_wlh(dq, tq)) {
+		role = DISPATCH_QUEUE_ROLE_BASE_WLH;
+	} else {
+		role = DISPATCH_QUEUE_ROLE_BASE_ANON;
+	}
+
+	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, relaxed, {
+		new_state = old_state & ~DISPATCH_QUEUE_ROLE_MASK;
+		new_state |= role;
+		if (old_state == new_state) {
+			os_atomic_rmw_loop_give_up(break);
+		}
+	});
+
+	dispatch_wlh_t cur_wlh = _dispatch_get_wlh();
+	if (cur_wlh == (dispatch_wlh_t)dq && !_dq_state_is_base_wlh(new_state)) {
+		_dispatch_event_loop_leave_immediate(cur_wlh, new_state);
+	}
+	if (!dx_hastypeflag(tq, QUEUE_ROOT)) {
+#if DISPATCH_ALLOW_NON_LEAF_RETARGET
+		_dispatch_queue_atomic_flags_set(tq, DQF_TARGETED);
+#else
+		_dispatch_queue_atomic_flags_set_and_clear(tq, DQF_TARGETED, DQF_LEGACY);
+#endif
+	}
+}
+
+unsigned long volatile _dispatch_queue_serial_numbers =
+		DISPATCH_QUEUE_SERIAL_NUMBER_INIT;
+
+dispatch_priority_t
+_dispatch_queue_compute_priority_and_wlh(dispatch_queue_t dq,
+		dispatch_wlh_t *wlh_out)
+{
+	dispatch_priority_t p = dq->dq_priority & DISPATCH_PRIORITY_REQUESTED_MASK;
+	dispatch_queue_t tq = dq->do_targetq;
+	dispatch_priority_t tqp = tq->dq_priority &DISPATCH_PRIORITY_REQUESTED_MASK;
+	dispatch_wlh_t wlh = DISPATCH_WLH_ANON;
+
+	if (_dq_state_is_base_wlh(dq->dq_state)) {
+		wlh = (dispatch_wlh_t)dq;
+	}
+
+	while (unlikely(!dx_hastypeflag(tq, QUEUE_ROOT))) {
+		if (unlikely(tq == &_dispatch_mgr_q)) {
+			if (wlh_out) *wlh_out = DISPATCH_WLH_ANON;
+			return DISPATCH_PRIORITY_FLAG_MANAGER;
+		}
+		if (unlikely(_dispatch_queue_is_thread_bound(tq))) {
+			// thread-bound hierarchies are weird, we need to install
+			// from the context of the thread this hierarchy is bound to
+			if (wlh_out) *wlh_out = NULL;
+			return 0;
+		}
+		if (unlikely(DISPATCH_QUEUE_IS_SUSPENDED(tq))) {
+			// this queue may not be activated yet, so the queue graph may not
+			// have stabilized yet
+			_dispatch_ktrace1(DISPATCH_PERF_delayed_registration, dq);
+			if (wlh_out) *wlh_out = NULL;
+			return 0;
+		}
+
+		if (_dq_state_is_base_wlh(tq->dq_state)) {
+			wlh = (dispatch_wlh_t)tq;
+		} else if (unlikely(_dispatch_queue_is_legacy(tq))) {
+			// we're not allowed to dereference tq->do_targetq
+			_dispatch_ktrace1(DISPATCH_PERF_delayed_registration, dq);
+			if (wlh_out) *wlh_out = NULL;
+			return 0;
+		}
+
+		if (!(tq->dq_priority & DISPATCH_PRIORITY_FLAG_INHERIT)) {
+			if (p < tqp) p = tqp;
+		}
+		tq = tq->do_targetq;
+		tqp = tq->dq_priority & DISPATCH_PRIORITY_REQUESTED_MASK;
+	}
+
+	if (unlikely(!tqp)) {
+		// pthread root queues opt out of QoS
+		if (wlh_out) *wlh_out = DISPATCH_WLH_ANON;
+		return DISPATCH_PRIORITY_FLAG_MANAGER;
+	}
+	if (wlh_out) *wlh_out = wlh;
+	return _dispatch_priority_inherit_from_root_queue(p, tq);
+}
 
 DISPATCH_NOINLINE
 static dispatch_queue_t
@@ -1230,13 +1349,13 @@
 		}
 	}
 
-	dispatch_queue_t dq = _dispatch_alloc(vtable,
+	dispatch_queue_t dq = _dispatch_object_alloc(vtable,
 			sizeof(struct dispatch_queue_s) - DISPATCH_QUEUE_CACHELINE_PAD);
 	_dispatch_queue_init(dq, dqf, dqa->dqa_concurrent ?
-			DISPATCH_QUEUE_WIDTH_MAX : 1, dqa->dqa_inactive);
+			DISPATCH_QUEUE_WIDTH_MAX : 1, DISPATCH_QUEUE_ROLE_INNER |
+			(dqa->dqa_inactive ? DISPATCH_QUEUE_INACTIVE : 0));
 
 	dq->dq_label = label;
-
 #if HAVE_PTHREAD_WORKQUEUE_QOS
 	dq->dq_priority = dqa->dqa_qos_and_relpri;
 	if (overcommit == _dispatch_queue_attr_overcommit_enabled) {
@@ -1248,17 +1367,10 @@
 		// legacy way of inherithing the QoS from the target
 		_dispatch_queue_priority_inherit_from_target(dq, tq);
 	}
-	if (!dqa->dqa_inactive && !dx_hastypeflag(tq, QUEUE_ROOT)) {
-		_dispatch_queue_atomic_flags_set(tq, DQF_TARGETED);
+	if (!dqa->dqa_inactive) {
+		_dispatch_queue_inherit_wlh_from_target(dq, tq);
 	}
 	dq->do_targetq = tq;
-	if (!_dispatch_queue_is_legacy(dq) && !dqa->dqa_inactive) {
-		if (dx_hastypeflag(tq, QUEUE_ROOT)) {
-			dq->dq_wlh = _dispatch_root_queue_wlh_for_queue(tq, dq);
-		} else {
-			dq->dq_wlh = tq->dq_wlh;
-		}
-	}
 	_dispatch_object_debug(dq, "%s", __func__);
 	return _dispatch_introspection_queue_create(dq);
 }
@@ -1286,7 +1398,7 @@
 }
 
 void
-_dispatch_queue_destroy(dispatch_queue_t dq)
+_dispatch_queue_destroy(dispatch_queue_t dq, bool *allow_free)
 {
 	uint64_t dq_state = os_atomic_load2o(dq, dq_state, relaxed);
 	uint64_t initial_state = DISPATCH_QUEUE_STATE_INIT_VALUE(dq->dq_width);
@@ -1294,21 +1406,13 @@
 	if (dx_type(dq) == DISPATCH_QUEUE_GLOBAL_ROOT_TYPE) {
 		initial_state = DISPATCH_ROOT_QUEUE_STATE_INIT_VALUE;
 	}
-	if (dx_metatype(dq) == _DISPATCH_SOURCE_TYPE) {
-		// dispatch_cancel_and_wait may apply overrides in a racy way with
-		// the source cancellation finishing. This race is expensive and not
-		// really worthwhile to resolve since the source becomes dead anyway.
-		//
-		// In a similar way using DISPATCH_QUEUE_WAKEUP_WAIT_FOR_EVENT causes
-		// DIRTY & MAX_QOS bits to stay with the channel or source sometimes
-		// never woken up before it dies, so we have to ignore them.
-		dq_state &= ~DISPATCH_QUEUE_MAX_QOS_MASK;
-		dq_state &= ~DISPATCH_QUEUE_DIRTY;
-		dq_state &= ~DISPATCH_QUEUE_RECEIVED_OVERRIDE;
-	}
+	dq_state &= ~DISPATCH_QUEUE_MAX_QOS_MASK;
+	dq_state &= ~DISPATCH_QUEUE_DIRTY;
+	dq_state &= ~DISPATCH_QUEUE_ROLE_MASK;
 	if (slowpath(dq_state != initial_state)) {
 		if (_dq_state_drain_locked(dq_state)) {
-			DISPATCH_CLIENT_CRASH(dq, "Release of a locked queue");
+			DISPATCH_CLIENT_CRASH((uintptr_t)dq_state,
+					"Release of a locked queue");
 		}
 #ifndef __LP64__
 		dq_state >>= 32;
@@ -1316,9 +1420,6 @@
 		DISPATCH_CLIENT_CRASH((uintptr_t)dq_state,
 				"Release of a queue with corrupt state");
 	}
-	if (slowpath(dq == _dispatch_queue_get_current())) {
-		DISPATCH_CLIENT_CRASH(dq, "Release of a queue by itself");
-	}
 	if (slowpath(dq->dq_items_tail)) {
 		DISPATCH_CLIENT_CRASH(dq->dq_items_tail,
 				"Release of a queue while items are enqueued");
@@ -1327,30 +1428,61 @@
 	// trash the queue so that use after free will crash
 	dq->dq_items_head = (void *)0x200;
 	dq->dq_items_tail = (void *)0x200;
-	// poison the state with something that is suspended and is easy to spot
-	dq->dq_state = 0xdead000000000000;
 
 	dispatch_queue_t dqsq = os_atomic_xchg2o(dq, dq_specific_q,
 			(void *)0x200, relaxed);
 	if (dqsq) {
 		_dispatch_release(dqsq);
 	}
-	if (dq->dq_wlh) {
-		dq->dq_wlh = NULL;
+
+	// fastpath for queues that never got their storage retained
+	if (likely(os_atomic_load2o(dq, dq_sref_cnt, relaxed) == 0)) {
+		// poison the state with something that is suspended and is easy to spot
+		dq->dq_state = 0xdead000000000000;
+		return;
 	}
+
+	// Take over freeing the memory from _dispatch_object_dealloc()
+	//
+	// As soon as we call _dispatch_queue_release_storage(), we forfeit
+	// the possibility for the caller of dx_dispose() to finalize the object
+	// so that responsibility is ours.
+	_dispatch_object_finalize(dq);
+	*allow_free = false;
+	dq->dq_label = "<released queue, pending free>";
+	dq->do_targetq = NULL;
+	dq->do_finalizer = NULL;
+	dq->do_ctxt = NULL;
+	return _dispatch_queue_release_storage(dq);
 }
 
 // 6618342 Contact the team that owns the Instrument DTrace probe before
 //         renaming this symbol
 void
-_dispatch_queue_dispose(dispatch_queue_t dq)
+_dispatch_queue_dispose(dispatch_queue_t dq, bool *allow_free)
 {
 	_dispatch_object_debug(dq, "%s", __func__);
 	_dispatch_introspection_queue_dispose(dq);
 	if (dq->dq_label && _dispatch_queue_label_needs_free(dq)) {
 		free((void*)dq->dq_label);
 	}
-	_dispatch_queue_destroy(dq);
+	_dispatch_queue_destroy(dq, allow_free);
+}
+
+void
+_dispatch_queue_xref_dispose(dispatch_queue_t dq)
+{
+	uint64_t dq_state = os_atomic_load2o(dq, dq_state, relaxed);
+	if (unlikely(_dq_state_is_suspended(dq_state))) {
+		long state = (long)dq_state;
+		if (sizeof(long) < sizeof(uint64_t)) state = (long)(dq_state >> 32);
+		if (unlikely(_dq_state_is_inactive(dq_state))) {
+			// Arguments for and against this assert are within 6705399
+			DISPATCH_CLIENT_CRASH(state, "Release of an inactive object");
+		}
+		DISPATCH_CLIENT_CRASH(dq_state, "Release of a suspended object");
+	}
+	os_atomic_or2o(dq, dq_atomic_flags, DQF_RELEASED, relaxed);
 }
 
 DISPATCH_NOINLINE
@@ -1404,21 +1536,15 @@
 				return _dispatch_queue_suspend_slow(dq);
 			});
 		}
-#ifdef DLOCK_NOWAITERS_BIT
-		if (_dq_state_drain_locked(dq_state)) {
-			value |= DISPATCH_QUEUE_DRAIN_OWNER_MASK;
-		} else {
-			value ^= DLOCK_OWNER_INVALID;
+		if (!_dq_state_drain_locked(dq_state)) {
+			value |= DLOCK_OWNER_MASK;
 		}
-#else
-		value |= DLOCK_OWNER_INVALID;
-#endif
 	});
 
 	if (!_dq_state_is_suspended(dq_state)) {
 		// rdar://8181908 we need to extend the queue life for the duration
 		// of the call to wakeup at _dispatch_queue_resume() time.
-		_dispatch_retain(dq);
+		_dispatch_retain_2(dq);
 	}
 }
 
@@ -1463,12 +1589,15 @@
 static void
 _dispatch_queue_resume_finalize_activation(dispatch_queue_t dq)
 {
+	bool allow_resume = true;
 	// Step 2: run the activation finalizer
 	if (dx_vtable(dq)->do_finalize_activation) {
-		dx_vtable(dq)->do_finalize_activation(dq);
+		dx_vtable(dq)->do_finalize_activation(dq, &allow_resume);
 	}
 	// Step 3: consume the suspend count
-	return dx_vtable(dq)->do_resume(dq, false);
+	if (allow_resume) {
+		return dx_vtable(dq)->do_resume(dq, false);
+	}
 }
 
 void
@@ -1476,12 +1605,15 @@
 {
 	// covers all suspend and inactive bits, including side suspend bit
 	const uint64_t suspend_bits = DISPATCH_QUEUE_SUSPEND_BITS_MASK;
-	// covers all suspend and inactive bits and owner mask
-	const uint64_t suspend_owner_bits = DISPATCH_QUEUE_SUSPEND_BITS_MASK |
-			DISPATCH_QUEUE_DRAIN_OWNER_MASK;
+	uint64_t pending_barrier_width =
+			(dq->dq_width - 1) * DISPATCH_QUEUE_WIDTH_INTERVAL;
+	uint64_t set_owner_and_set_full_width_and_in_barrier =
+			_dispatch_lock_value_for_self() | DISPATCH_QUEUE_WIDTH_FULL_BIT |
+			DISPATCH_QUEUE_IN_BARRIER;
+
 	// backward compatibility: only dispatch sources can abuse
 	// dispatch_resume() to really mean dispatch_activate()
-	bool resume_can_activate = (dx_type(dq) == DISPATCH_SOURCE_KEVENT_TYPE);
+	bool is_source = (dx_metatype(dq) == _DISPATCH_SOURCE_TYPE);
 	uint64_t dq_state, value;
 
 	dispatch_assert(dq->do_ref_cnt != DISPATCH_OBJECT_GLOBAL_REFCNT);
@@ -1531,51 +1663,49 @@
 					+ DISPATCH_QUEUE_NEEDS_ACTIVATION) {
 				// { sc:1 i:0 na:1 } -> { sc:1 i:0 na:0 }
 				value = dq_state - DISPATCH_QUEUE_NEEDS_ACTIVATION;
-			} else if (resume_can_activate && (dq_state & suspend_bits) ==
+			} else if (is_source && (dq_state & suspend_bits) ==
 					DISPATCH_QUEUE_NEEDS_ACTIVATION + DISPATCH_QUEUE_INACTIVE) {
 				// { sc:0 i:1 na:1 } -> { sc:1 i:0 na:0 }
 				value = dq_state - DISPATCH_QUEUE_INACTIVE
 						- DISPATCH_QUEUE_NEEDS_ACTIVATION
 						+ DISPATCH_QUEUE_SUSPEND_INTERVAL;
-			} else if ((dq_state & suspend_owner_bits) == (suspend_owner_bits &
-					(DISPATCH_QUEUE_SUSPEND_INTERVAL + DLOCK_OWNER_INVALID))) {
-				value = dq_state;
-				value ^= DISPATCH_QUEUE_SUSPEND_INTERVAL + DLOCK_OWNER_INVALID;
-				uint64_t full_width = value;
-				if (_dq_state_has_pending_barrier(full_width)) {
-					full_width -= DISPATCH_QUEUE_PENDING_BARRIER;
-					full_width += DISPATCH_QUEUE_WIDTH_INTERVAL;
-					full_width += DISPATCH_QUEUE_IN_BARRIER;
-				} else {
-					full_width += dq->dq_width * DISPATCH_QUEUE_WIDTH_INTERVAL;
-					full_width += DISPATCH_QUEUE_IN_BARRIER;
-				}
-				if ((full_width & DISPATCH_QUEUE_WIDTH_MASK) ==
-						DISPATCH_QUEUE_WIDTH_FULL_BIT) {
-					value = full_width;
-					value &= ~DISPATCH_QUEUE_DIRTY;
-					value ^= _dispatch_tid_self();
-				} else {
-					value &= ~DISPATCH_QUEUE_MAX_QOS_MASK;
-					value &= ~DISPATCH_QUEUE_RECEIVED_OVERRIDE;
-				}
+			} else if (unlikely(os_sub_overflow(dq_state,
+					DISPATCH_QUEUE_SUSPEND_INTERVAL, &value))) {
+				// underflow means over-resume or a suspend count transfer
+				// to the side count is needed
+				os_atomic_rmw_loop_give_up({
+					if (!(dq_state & DISPATCH_QUEUE_HAS_SIDE_SUSPEND_CNT)) {
+						goto over_resume;
+					}
+					return _dispatch_queue_resume_slow(dq);
+				});
+		//
+		// below this, value = dq_state - DISPATCH_QUEUE_SUSPEND_INTERVAL
+		//
+			} else if (!_dq_state_is_runnable(value)) {
+				// Out of width or still suspended.
+				// For the former, force _dispatch_queue_non_barrier_complete
+				// to reconsider whether it has work to do
+				value |= DISPATCH_QUEUE_DIRTY;
+			} else if (!_dq_state_drain_locked_by(value, DLOCK_OWNER_MASK)) {
+				dispatch_assert(_dq_state_drain_locked(value));
+				// still locked by someone else, make drain_try_unlock() fail
+				// and reconsider whether it has work to do
+				value |= DISPATCH_QUEUE_DIRTY;
+			} else if (!is_source && (_dq_state_has_pending_barrier(value) ||
+					value + pending_barrier_width <
+					DISPATCH_QUEUE_WIDTH_FULL_BIT)) {
+				// if we can, acquire the full width drain lock
+				// and then perform a lock transfer
+				//
+				// However this is never useful for a source where there are no
+				// sync waiters, so never take the lock and do a plain wakeup
+				value &= DISPATCH_QUEUE_DRAIN_PRESERVED_BITS_MASK;
+				value |= set_owner_and_set_full_width_and_in_barrier;
 			} else {
-				value = DISPATCH_QUEUE_SUSPEND_INTERVAL;
-				if (unlikely(os_sub_overflow(dq_state, value, &value))) {
-					// underflow means over-resume or a suspend count transfer
-					// to the side count is needed
-					os_atomic_rmw_loop_give_up({
-						if (!(dq_state & DISPATCH_QUEUE_HAS_SIDE_SUSPEND_CNT)) {
-							goto over_resume;
-						}
-						return _dispatch_queue_resume_slow(dq);
-					});
-				}
-				if (unlikely(_dq_state_is_runnable(value))) {
-					// make drain_try_unlock() fail and reconsider whether
-					// it has work to do
-					value |= DISPATCH_QUEUE_DIRTY;
-				}
+				// clear overrides and force a wakeup
+				value &= ~DISPATCH_QUEUE_DRAIN_UNLOCK_MASK;
+				value &= ~DISPATCH_QUEUE_MAX_QOS_MASK;
 			}
 		});
 	}
@@ -1598,21 +1728,26 @@
 		return;
 	}
 
-	if ((dq_state ^ value) & DISPATCH_QUEUE_IN_BARRIER) {
-		_dispatch_try_lock_transfer_or_wakeup(dq);
-	} else if (_dq_state_should_wakeup(value)) {
+	if (_dq_state_is_dirty(dq_state)) {
 		// <rdar://problem/14637483>
 		// dependency ordering for dq state changes that were flushed
 		// and not acted upon
 		os_atomic_thread_fence(dependency);
-		dq = os_atomic_force_dependency_on(dq, value);
-		dispatch_qos_t qos = _dq_state_max_qos(dq_state);
-		// Balancing the retain() done in suspend() for rdar://8181908
-		return dx_wakeup(dq, qos, DISPATCH_WAKEUP_CONSUME);
+		dq = os_atomic_force_dependency_on(dq, dq_state);
 	}
-
-	// Balancing the retain() done in suspend() for rdar://8181908
-	return _dispatch_release_tailcall(dq);
+	// Balancing the retain_2 done in suspend() for rdar://8181908
+	dispatch_wakeup_flags_t flags = DISPATCH_WAKEUP_CONSUME_2;
+	if ((dq_state ^ value) & DISPATCH_QUEUE_IN_BARRIER) {
+		flags |= DISPATCH_WAKEUP_BARRIER_COMPLETE;
+	} else if (!_dq_state_is_runnable(value)) {
+		if (_dq_state_is_base_wlh(dq_state)) {
+			_dispatch_event_loop_assert_not_owned((dispatch_wlh_t)dq);
+		}
+		return _dispatch_release_2(dq);
+	}
+	dispatch_assert(!_dq_state_received_sync_wait(dq_state));
+	dispatch_assert(!_dq_state_in_sync_transfer(dq_state));
+	return dx_wakeup(dq, _dq_state_max_qos(dq_state), flags);
 
 over_resume:
 	if (unlikely(_dq_state_is_inactive(dq_state))) {
@@ -1673,6 +1808,7 @@
 	os_atomic_rmw_loop2o(dq, dq_atomic_flags, old_dqf, new_dqf, relaxed, {
 		new_dqf = (old_dqf & DQF_FLAGS_MASK) | DQF_WIDTH(tmp);
 	});
+	_dispatch_queue_inherit_wlh_from_target(dq, dq->do_targetq);
 	_dispatch_object_debug(dq, "%s", __func__);
 }
 
@@ -1707,16 +1843,18 @@
 	dispatch_queue_t otq = dq->do_targetq;
 
 	if (_dispatch_queue_atomic_flags(dq) & DQF_TARGETED) {
+#if DISPATCH_ALLOW_NON_LEAF_RETARGET
 		_dispatch_ktrace3(DISPATCH_PERF_non_leaf_retarget, dq, otq, tq);
 		_dispatch_bug_deprecated("Changing the target of a queue "
 				"already targeted by other dispatch objects");
+#else
+		DISPATCH_CLIENT_CRASH(0, "Cannot change the target of a queue "
+				"already targeted by other dispatch objects");
+#endif
 	}
 
 	_dispatch_queue_priority_inherit_from_target(dq, tq);
-	if (!dx_hastypeflag(tq, QUEUE_ROOT)) {
-		_dispatch_queue_atomic_flags_set(tq, DQF_TARGETED);
-	}
-
+	_dispatch_queue_inherit_wlh_from_target(dq, tq);
 #if HAVE_PTHREAD_WORKQUEUE_QOS
 	// see _dispatch_queue_class_wakeup()
 	_dispatch_queue_sidelock_lock(dq);
@@ -1748,22 +1886,33 @@
 		return dx_vtable(dq)->do_resume(dq, false);
 	}
 
+#if !DISPATCH_ALLOW_NON_LEAF_RETARGET
+	if (_dispatch_queue_atomic_flags(dq) & DQF_TARGETED) {
+		DISPATCH_CLIENT_CRASH(0, "Cannot change the target of a queue "
+				"already targeted by other dispatch objects");
+	}
+#endif
+
 	if (unlikely(!_dispatch_queue_is_legacy(dq))) {
+#if DISPATCH_ALLOW_NON_LEAF_RETARGET
 		if (_dispatch_queue_atomic_flags(dq) & DQF_TARGETED) {
-			DISPATCH_CLIENT_CRASH(dq, "Cannot change the target of a queue "
+			DISPATCH_CLIENT_CRASH(0, "Cannot change the target of a queue "
 					"already targeted by other dispatch objects");
 		}
-		DISPATCH_CLIENT_CRASH(dq, "Cannot change the target of this object "
+#endif
+		DISPATCH_CLIENT_CRASH(0, "Cannot change the target of this object "
 				"after it has been activated");
 	}
 
 	unsigned long type = dx_type(dq);
 	switch (type) {
 	case DISPATCH_QUEUE_LEGACY_TYPE:
+#if DISPATCH_ALLOW_NON_LEAF_RETARGET
 		if (_dispatch_queue_atomic_flags(dq) & DQF_TARGETED) {
 			_dispatch_bug_deprecated("Changing the target of a queue "
 					"already targeted by other dispatch objects");
 		}
+#endif
 		break;
 	case DISPATCH_SOURCE_KEVENT_TYPE:
 	case DISPATCH_MACH_CHANNEL_TYPE:
@@ -1801,7 +1950,6 @@
 	.do_ctxt = &_dispatch_mgr_root_queue_context,
 	.dq_label = "com.apple.root.libdispatch-manager",
 	.dq_atomic_flags = DQF_WIDTH(DISPATCH_QUEUE_WIDTH_POOL),
-	.dq_wlh = DISPATCH_WLH_GLOBAL,
 	.dq_priority = DISPATCH_PRIORITY_FLAG_MANAGER |
 			DISPATCH_PRIORITY_SATURATED_OVERRIDE,
 	.dq_serialnum = 3,
@@ -2025,7 +2173,7 @@
 
 	dqs = sizeof(struct dispatch_queue_s) - DISPATCH_QUEUE_CACHELINE_PAD;
 	dqs = roundup(dqs, _Alignof(struct dispatch_root_queue_context_s));
-	dq = _dispatch_alloc(DISPATCH_VTABLE(queue_root), dqs +
+	dq = _dispatch_object_alloc(DISPATCH_VTABLE(queue_root), dqs +
 			sizeof(struct dispatch_root_queue_context_s) +
 			sizeof(struct dispatch_pthread_root_queue_context_s));
 	qc = (void*)dq + dqs;
@@ -2040,13 +2188,11 @@
 		}
 	}
 
-	_dispatch_queue_init(dq, dqf, DISPATCH_QUEUE_WIDTH_POOL, false);
+	_dispatch_queue_init(dq, dqf, DISPATCH_QUEUE_WIDTH_POOL, 0);
 	dq->dq_label = label;
 	dq->dq_state = DISPATCH_ROOT_QUEUE_STATE_INIT_VALUE;
 	dq->do_ctxt = qc;
-	dq->do_targetq = NULL;
 	dq->dq_priority = DISPATCH_PRIORITY_SATURATED_OVERRIDE;
-	dq->dq_wlh = DISPATCH_WLH_GLOBAL;
 
 	pqc->dpq_thread_mediator.do_vtable = DISPATCH_VTABLE(semaphore);
 	qc->dgq_ctxt = pqc;
@@ -2115,7 +2261,7 @@
 #endif // DISPATCH_ENABLE_PTHREAD_ROOT_QUEUES
 
 void
-_dispatch_pthread_root_queue_dispose(dispatch_queue_t dq)
+_dispatch_pthread_root_queue_dispose(dispatch_queue_t dq, bool *allow_free)
 {
 	if (slowpath(dq->do_ref_cnt == DISPATCH_OBJECT_GLOBAL_REFCNT)) {
 		DISPATCH_INTERNAL_CRASH(dq, "Global root queue disposed");
@@ -2127,7 +2273,7 @@
 	dispatch_pthread_root_queue_context_t pqc = qc->dgq_ctxt;
 
 	pthread_attr_destroy(&pqc->dpq_thread_attr);
-	_dispatch_semaphore_dispose(&pqc->dpq_thread_mediator);
+	_dispatch_semaphore_dispose(&pqc->dpq_thread_mediator, NULL);
 	if (pqc->dpq_thread_configure) {
 		Block_release(pqc->dpq_thread_configure);
 	}
@@ -2136,7 +2282,7 @@
 	if (dq->dq_label && _dispatch_queue_label_needs_free(dq)) {
 		free((void*)dq->dq_label);
 	}
-	_dispatch_queue_destroy(dq);
+	_dispatch_queue_destroy(dq, allow_free);
 }
 
 #pragma mark -
@@ -2157,7 +2303,8 @@
 DISPATCH_DECL(dispatch_queue_specific);
 
 void
-_dispatch_queue_specific_queue_dispose(dispatch_queue_specific_queue_t dqsq)
+_dispatch_queue_specific_queue_dispose(dispatch_queue_specific_queue_t dqsq,
+		bool *allow_free)
 {
 	dispatch_queue_specific_t dqs, tmp;
 	dispatch_queue_t rq = _dispatch_get_root_queue(DISPATCH_QOS_DEFAULT, false);
@@ -2168,7 +2315,7 @@
 		}
 		free(dqs);
 	}
-	_dispatch_queue_destroy(dqsq->_as_dq);
+	_dispatch_queue_destroy(dqsq->_as_dq, allow_free);
 }
 
 static void
@@ -2176,12 +2323,13 @@
 {
 	dispatch_queue_specific_queue_t dqsq;
 
-	dqsq = _dispatch_alloc(DISPATCH_VTABLE(queue_specific_queue),
+	dqsq = _dispatch_object_alloc(DISPATCH_VTABLE(queue_specific_queue),
 			sizeof(struct dispatch_queue_specific_queue_s));
-	_dispatch_queue_init(dqsq->_as_dq, DQF_NONE,
-			DISPATCH_QUEUE_WIDTH_MAX, false);
+	_dispatch_queue_init(dqsq->_as_dq, DQF_NONE, DISPATCH_QUEUE_WIDTH_MAX,
+			DISPATCH_QUEUE_ROLE_BASE_ANON);
 	dqsq->do_xref_cnt = -1;
-	dqsq->do_targetq = _dispatch_get_root_queue(DISPATCH_QOS_USER_INITIATED, true);
+	dqsq->do_targetq = _dispatch_get_root_queue(
+			DISPATCH_QOS_USER_INITIATED, true);
 	dqsq->dq_label = "queue-specific";
 	TAILQ_INIT(&dqsq->dqsq_contexts);
 	if (slowpath(!os_atomic_cmpxchg2o(dq, dq_specific_q, NULL,
@@ -2309,7 +2457,7 @@
 		DISPATCH_CLIENT_CRASH(dq->dq_width, "Invalid queue type");
 	}
 	uint64_t dq_state = os_atomic_load2o(dq, dq_state, relaxed);
-	return _dq_state_drain_locked_by(dq_state, _dispatch_tid_self());
+	return _dq_state_drain_locked_by_self(dq_state);
 }
 #endif
 
@@ -2321,12 +2469,13 @@
 {
 	size_t offset = 0;
 	dispatch_queue_t target = dq->do_targetq;
+	const char *tlabel = target && target->dq_label ? target->dq_label : "";
 	uint64_t dq_state = os_atomic_load2o(dq, dq_state, relaxed);
 
-	offset += dsnprintf(&buf[offset], bufsiz - offset,
+	offset += dsnprintf(&buf[offset], bufsiz - offset, "sref = %d, "
 			"target = %s[%p], width = 0x%x, state = 0x%016llx",
-			target && target->dq_label ? target->dq_label : "", target,
-			dq->dq_width, (unsigned long long)dq_state);
+			dq->dq_sref_cnt + 1, tlabel, target, dq->dq_width,
+			(unsigned long long)dq_state);
 	if (_dq_state_is_suspended(dq_state)) {
 		offset += dsnprintf(&buf[offset], bufsiz - offset, ", suspended = %d",
 			_dq_state_suspend_cnt(dq_state));
@@ -2391,11 +2540,15 @@
 #endif
 
 #if DISPATCH_PERF_MON
+
+#define DISPATCH_PERF_MON_BUCKETS 8
+
 static struct {
 	uint64_t volatile time_total;
 	uint64_t volatile count_total;
 	uint64_t volatile thread_total;
-} _dispatch_stats[65];
+} _dispatch_stats[DISPATCH_PERF_MON_BUCKETS];
+DISPATCH_USED static size_t _dispatch_stat_buckets = DISPATCH_PERF_MON_BUCKETS;
 
 void
 _dispatch_queue_merge_stats(uint64_t start, bool trace, perfmon_thread_type type)
@@ -2403,15 +2556,14 @@
 	uint64_t delta = _dispatch_absolute_time() - start;
 	unsigned long count;
 	int bucket = 0;
-
 	count = (unsigned long)_dispatch_thread_getspecific(dispatch_bcounter_key);
 	_dispatch_thread_setspecific(dispatch_bcounter_key, NULL);
-
 	if (count == 0) {
 		bucket = 0;
 		if (trace) _dispatch_ktrace1(DISPATCH_PERF_MON_worker_useless, type);
 	} else {
-		bucket = (int)sizeof(count) * CHAR_BIT - __builtin_clzl(count);
+		bucket = MIN(DISPATCH_PERF_MON_BUCKETS - 1,
+					 (int)sizeof(count) * CHAR_BIT - __builtin_clzl(count));
 		os_atomic_add(&_dispatch_stats[bucket].count_total, count, relaxed);
 	}
 	os_atomic_add(&_dispatch_stats[bucket].time_total, delta, relaxed);
@@ -2449,8 +2601,9 @@
 			if (likely(old_pri & ~_PTHREAD_PRIORITY_FLAGS_MASK)) {
 				pflags |= _PTHREAD_SET_SELF_QOS_FLAG;
 			}
-			if (unlikely(DISPATCH_QUEUE_DRAIN_OWNER(&_dispatch_mgr_q) ==
-					_dispatch_tid_self())) {
+			uint64_t mgr_dq_state =
+					os_atomic_load2o(&_dispatch_mgr_q, dq_state, relaxed);
+			if (unlikely(_dq_state_drain_locked_by_self(mgr_dq_state))) {
 				DISPATCH_INTERNAL_CRASH(pp,
 						"Changing the QoS while on the manager queue");
 			}
@@ -2494,9 +2647,6 @@
 			kv = _voucher_swap_and_get_mach_voucher(ov, v);
 		}
 	}
-#if !PTHREAD_WORKQUEUE_RESETS_VOUCHER_AND_PRIORITY_ON_PARK
-	flags &= ~(dispatch_thread_set_self_t)DISPATCH_THREAD_PARK;
-#endif
 	if (!(flags & DISPATCH_THREAD_PARK)) {
 		_dispatch_set_priority_and_mach_voucher_slow(priority, kv);
 	}
@@ -2795,7 +2945,7 @@
 	oq = os_atomic_xchg2o(dbpd, dbpd_queue, NULL, relaxed);
 	if (oq) {
 		// balances dispatch_{,barrier_,}sync
-		_os_object_release_internal(oq->_as_os_obj);
+		_os_object_release_internal_n(oq->_as_os_obj, 2);
 	}
 }
 
@@ -2821,7 +2971,7 @@
 	oq = os_atomic_xchg2o(dbpd, dbpd_queue, NULL, relaxed);
 	if (oq) {
 		// balances dispatch_{,barrier_,group_}async
-		_os_object_release_internal_inline(oq->_as_os_obj);
+		_os_object_release_internal_n_inline(oq->_as_os_obj, 2);
 	}
 	if (release) {
 		Block_release(b);
@@ -2894,8 +3044,7 @@
 		// that times out, subsequent waits will not boost the qos of the
 		// still-running block.
 		dx_wakeup(boost_oq, _dispatch_qos_from_pp(pp),
-				DISPATCH_WAKEUP_BLOCK_WAIT | DISPATCH_WAKEUP_OVERRIDING |
-				DISPATCH_WAKEUP_CONSUME);
+				DISPATCH_WAKEUP_BLOCK_WAIT | DISPATCH_WAKEUP_CONSUME_2);
 	}
 
 	mach_port_t boost_th = dbpd->dbpd_thread;
@@ -2959,7 +3108,7 @@
 
 	// balanced in d_block_async_invoke_and_release or d_block_wait
 	if (os_atomic_cmpxchg2o(dbpd, dbpd_queue, NULL, oq, relaxed)) {
-		_os_object_retain_internal_inline(oq->_as_os_obj);
+		_os_object_retain_internal_n_inline(oq->_as_os_obj, 2);
 	}
 
 	if (dc_flags & DISPATCH_OBJ_CONSUME_BIT) {
@@ -3116,17 +3265,12 @@
 
 	rq = dq->do_targetq;
 	while (slowpath(rq->do_targetq) && rq != old_dq) {
-		_dispatch_non_barrier_complete(rq);
+		_dispatch_queue_non_barrier_complete(rq);
 		rq = rq->do_targetq;
 	}
 
-	_dispatch_non_barrier_complete(dq);
-
-	if (dic->dic_deferred) {
-		return _dispatch_queue_drain_deferred_invoke(dq, dic, flags, 0);
-	}
-
-	_dispatch_release_tailcall(dq);
+	_dispatch_queue_non_barrier_complete(dq);
+	_dispatch_release_tailcall(dq); // pairs with _dispatch_async_redirect_wrap
 }
 
 DISPATCH_ALWAYS_INLINE
@@ -3143,7 +3287,7 @@
 	dc->dc_other = dou._do;
 	dc->dc_voucher = DISPATCH_NO_VOUCHER;
 	dc->dc_priority = DISPATCH_NO_PRIORITY;
-	_dispatch_retain(dq);
+	_dispatch_retain(dq); // released in _dispatch_async_redirect_invoke
 	return dc;
 }
 
@@ -3294,36 +3438,11 @@
 #pragma mark -
 #pragma mark _dispatch_sync_invoke / _dispatch_sync_complete
 
-DISPATCH_ALWAYS_INLINE
-static inline void
-_dispatch_barrier_complete_inline(dispatch_queue_t dq)
-{
-	uint64_t owned = DISPATCH_QUEUE_IN_BARRIER +
-			dq->dq_width * DISPATCH_QUEUE_WIDTH_INTERVAL;
-
-	if (unlikely(dq->dq_items_tail)) {
-		return _dispatch_try_lock_transfer_or_wakeup(dq);
-	}
-
-	if (unlikely(!_dispatch_queue_drain_try_unlock(dq, owned, true))) {
-		// someone enqueued a slow item at the head
-		// looping may be its last chance
-		return _dispatch_try_lock_transfer_or_wakeup(dq);
-	}
-}
-
 DISPATCH_NOINLINE
 static void
-_dispatch_barrier_complete(dispatch_queue_t dq)
+_dispatch_queue_non_barrier_complete(dispatch_queue_t dq)
 {
-	_dispatch_barrier_complete_inline(dq);
-}
-
-DISPATCH_NOINLINE
-static void
-_dispatch_non_barrier_complete(dispatch_queue_t dq)
-{
-	uint64_t old_state, new_state;
+	uint64_t old_state, new_state, owner_self = _dispatch_lock_value_for_self();
 
 	// see _dispatch_queue_resume()
 	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, relaxed, {
@@ -3346,7 +3465,7 @@
 					DISPATCH_QUEUE_WIDTH_FULL_BIT) {
 				new_state = full_width;
 				new_state &= ~DISPATCH_QUEUE_DIRTY;
-				new_state ^= _dispatch_tid_self();
+				new_state |= owner_self;
 			} else if (_dq_state_is_dirty(old_state)) {
 				new_state |= DISPATCH_QUEUE_ENQUEUED;
 			}
@@ -3354,11 +3473,19 @@
 	});
 
 	if ((old_state ^ new_state) & DISPATCH_QUEUE_IN_BARRIER) {
-		return _dispatch_try_lock_transfer_or_wakeup(dq);
+		if (_dq_state_is_dirty(old_state)) {
+			// <rdar://problem/14637483>
+			// dependency ordering for dq state changes that were flushed
+			// and not acted upon
+			os_atomic_thread_fence(dependency);
+			dq = os_atomic_force_dependency_on(dq, old_state);
+		}
+		return _dispatch_queue_barrier_complete(dq, 0, 0);
 	}
 
 	if ((old_state ^ new_state) & DISPATCH_QUEUE_ENQUEUED) {
-		_dispatch_retain(dq);
+		_dispatch_retain_2(dq);
+		dispatch_assert(!_dq_state_is_base_wlh(new_state));
 		return dx_push(dq->do_targetq, dq, _dq_state_max_qos(new_state));
 	}
 }
@@ -3393,9 +3520,9 @@
 	do {
 		if (dq == stop_dq) return;
 		if (barrier) {
-			_dispatch_barrier_complete(dq);
+			_dispatch_queue_barrier_complete(dq, 0, 0);
 		} else {
-			_dispatch_non_barrier_complete(dq);
+			_dispatch_queue_non_barrier_complete(dq);
 		}
 		dq = dq->do_targetq;
 		barrier = (dq->dq_width == 1);
@@ -3417,7 +3544,7 @@
 		dispatch_function_t func)
 {
 	_dispatch_sync_function_invoke_inline(dq, ctxt, func);
-	_dispatch_non_barrier_complete(dq);
+	_dispatch_queue_non_barrier_complete(dq);
 }
 
 DISPATCH_NOINLINE
@@ -3426,60 +3553,274 @@
 		dispatch_function_t func)
 {
 	_dispatch_sync_function_invoke_inline(dq, ctxt, func);
-	_dispatch_barrier_complete_inline(dq);
+	dx_wakeup(dq, 0, DISPATCH_WAKEUP_BARRIER_COMPLETE);
+}
+
+/*
+ * This is an optimized version of _dispatch_barrier_sync_invoke_and_complete
+ *
+ * For queues we can cheat and inline the unlock code, which is invalid
+ * for objects with a more complex state machine (sources or mach channels)
+ */
+DISPATCH_NOINLINE
+static void
+_dispatch_queue_barrier_sync_invoke_and_complete(dispatch_queue_t dq,
+		void *ctxt, dispatch_function_t func)
+{
+	_dispatch_sync_function_invoke_inline(dq, ctxt, func);
+	if (unlikely(dq->dq_items_tail || dq->dq_width > 1)) {
+		return _dispatch_queue_barrier_complete(dq, 0, 0);
+	}
+
+	// Presence of any of these bits requires more work that only
+	// _dispatch_queue_barrier_complete() handles properly
+	//
+	// Note: testing for RECEIVED_OVERRIDE or RECEIVED_SYNC_WAIT without
+	// checking the role is sloppy, but is a super fast check, and neither of
+	// these bits should be set if the lock was never contended/discovered.
+	const uint64_t fail_unlock_mask = DISPATCH_QUEUE_SUSPEND_BITS_MASK |
+			DISPATCH_QUEUE_ENQUEUED | DISPATCH_QUEUE_DIRTY |
+			DISPATCH_QUEUE_RECEIVED_OVERRIDE | DISPATCH_QUEUE_SYNC_TRANSFER |
+			DISPATCH_QUEUE_RECEIVED_SYNC_WAIT;
+	uint64_t old_state, new_state;
+
+	// similar to _dispatch_queue_drain_try_unlock
+	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, release, {
+		new_state  = old_state - DISPATCH_QUEUE_SERIAL_DRAIN_OWNED;
+		new_state &= ~DISPATCH_QUEUE_DRAIN_UNLOCK_MASK;
+		new_state &= ~DISPATCH_QUEUE_MAX_QOS_MASK;
+		if (unlikely(old_state & fail_unlock_mask)) {
+			os_atomic_rmw_loop_give_up({
+				return _dispatch_queue_barrier_complete(dq, 0, 0);
+			});
+		}
+	});
+	if (_dq_state_is_base_wlh(old_state)) {
+		_dispatch_event_loop_assert_not_owned((dispatch_wlh_t)dq);
+	}
 }
 
 #pragma mark -
 #pragma mark _dispatch_sync_wait / _dispatch_sync_waiter_wake
 
+#define DISPATCH_SYNC_WAITER_NO_UNLOCK (~0ull)
+
 DISPATCH_NOINLINE
 static void
-_dispatch_sync_waiter_wake(OS_UNUSED dispatch_queue_t dq,
-		dispatch_sync_context_t dsc)
+_dispatch_sync_waiter_wake(dispatch_sync_context_t dsc,
+		dispatch_wlh_t wlh, uint64_t old_state, uint64_t new_state)
 {
-	if (dsc->dsc_override_qos > dsc->dsc_override_qos_floor) {
-		_dispatch_wqthread_override_start((mach_port_t)&dsc->dc_data,
-				dsc->dsc_override_qos);
+	dispatch_wlh_t waiter_wlh = dsc->dc_data;
+
+	if (_dq_state_in_sync_transfer(old_state) ||
+			_dq_state_in_sync_transfer(new_state) ||
+			(waiter_wlh != DISPATCH_WLH_ANON)) {
+		_dispatch_event_loop_wake_owner(dsc, wlh, old_state, new_state);
 	}
-	_dispatch_thread_event_signal(&dsc->dsc_event);
+	if (waiter_wlh == DISPATCH_WLH_ANON) {
+		if (dsc->dsc_override_qos > dsc->dsc_override_qos_floor) {
+			_dispatch_wqthread_override_start(dsc->dsc_waiter,
+					dsc->dsc_override_qos);
+		}
+		_dispatch_thread_event_signal(&dsc->dsc_event);
+	}
 	_dispatch_introspection_queue_item_complete(dsc->_as_dc);
 }
 
 DISPATCH_NOINLINE
 static void
-_dispatch_sync_waiter_redirect_or_wake(dispatch_queue_t dq,
+_dispatch_sync_waiter_redirect_or_wake(dispatch_queue_t dq, uint64_t owned,
 		dispatch_object_t dou)
 {
-	dispatch_sync_context_t dsc = (dispatch_sync_context_t )dou._dc;
-	uint32_t tid = (uint32_t)(uintptr_t)dsc->dc_data;
+	dispatch_sync_context_t dsc = (dispatch_sync_context_t)dou._dc;
+	uint64_t next_owner = 0, old_state, new_state;
+	dispatch_wlh_t wlh = NULL;
 
-	if (likely(dsc->dsc_override_qos)) {
-		uint64_t dq_state = os_atomic_load2o(dq, dq_state, relaxed);
-		if (dsc->dsc_override_qos < _dq_state_max_qos(dq_state)) {
-			dsc->dsc_override_qos = _dq_state_max_qos(dq_state);
-		}
-	}
 	_dispatch_trace_continuation_pop(dq, dsc->_as_dc);
 
-	while (unlikely(dq->do_targetq->do_targetq)) {
-		dq = dq->do_targetq;
-		if (likely(dq->dq_width == 1)) {
-			dsc->dc_flags = DISPATCH_OBJ_BARRIER_BIT |
-					DISPATCH_OBJ_SYNC_WAITER_BIT;
-			if (unlikely(!_dispatch_queue_try_acquire_barrier_sync(dq, tid))) {
-				_dispatch_introspection_queue_item_complete(dsc->_as_dc);
-				return _dispatch_queue_push_sync_waiter(dq, dsc);
+	if (owned == DISPATCH_SYNC_WAITER_NO_UNLOCK) {
+		dispatch_assert(!(dsc->dc_flags & DISPATCH_OBJ_BARRIER_BIT));
+		new_state = old_state = os_atomic_load2o(dq, dq_state, relaxed);
+	} else {
+		if (dsc->dc_flags & DISPATCH_OBJ_BARRIER_BIT) {
+			next_owner = _dispatch_lock_value_from_tid(dsc->dsc_waiter);
+		}
+		os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, release, {
+			new_state  = old_state - owned;
+			new_state &= ~DISPATCH_QUEUE_DRAIN_UNLOCK_MASK;
+			new_state &= ~DISPATCH_QUEUE_DIRTY;
+			new_state |= next_owner;
+			if (_dq_state_is_base_wlh(old_state)) {
+				new_state |= DISPATCH_QUEUE_SYNC_TRANSFER;
 			}
-		} else {
-			dsc->dc_flags = DISPATCH_OBJ_SYNC_WAITER_BIT;
-			if (unlikely(!_dispatch_queue_try_reserve_sync_width(dq))) {
-				_dispatch_introspection_queue_item_complete(dsc->_as_dc);
-				return _dispatch_queue_push_sync_waiter(dq, dsc);
-			}
+		});
+		if (_dq_state_is_base_wlh(old_state)) {
+			wlh = (dispatch_wlh_t)dq;
+		} else if (_dq_state_received_override(old_state)) {
+			// Ensure that the root queue sees that this thread was overridden.
+			_dispatch_set_basepri_override_qos(_dq_state_max_qos(old_state));
 		}
 	}
 
-	return _dispatch_sync_waiter_wake(dq, dsc);
+	if (dsc->dc_data == DISPATCH_WLH_ANON) {
+		if (dsc->dsc_override_qos < _dq_state_max_qos(old_state)) {
+			dsc->dsc_override_qos = _dq_state_max_qos(old_state);
+		}
+	}
+
+	if (unlikely(_dq_state_is_inner_queue(old_state))) {
+		dispatch_queue_t tq = dq->do_targetq;
+		if (likely(tq->dq_width == 1)) {
+			dsc->dc_flags = DISPATCH_OBJ_BARRIER_BIT |
+					DISPATCH_OBJ_SYNC_WAITER_BIT;
+		} else {
+			dsc->dc_flags = DISPATCH_OBJ_SYNC_WAITER_BIT;
+		}
+		_dispatch_introspection_queue_item_complete(dsc->_as_dc);
+		return _dispatch_queue_push_sync_waiter(tq, dsc, 0);
+	}
+
+	return _dispatch_sync_waiter_wake(dsc, wlh, old_state, new_state);
+}
+
+DISPATCH_NOINLINE
+static void
+_dispatch_queue_class_barrier_complete(dispatch_queue_t dq, dispatch_qos_t qos,
+		dispatch_wakeup_flags_t flags, dispatch_queue_wakeup_target_t target,
+		uint64_t owned)
+{
+	uint64_t old_state, new_state, enqueue;
+	dispatch_queue_t tq;
+
+	if (target == DISPATCH_QUEUE_WAKEUP_MGR) {
+		tq = &_dispatch_mgr_q;
+		enqueue = DISPATCH_QUEUE_ENQUEUED_ON_MGR;
+	} else if (target) {
+		tq = (target == DISPATCH_QUEUE_WAKEUP_TARGET) ? dq->do_targetq : target;
+		enqueue = DISPATCH_QUEUE_ENQUEUED;
+	} else {
+		tq = NULL;
+		enqueue = 0;
+	}
+
+	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, release, {
+		new_state  = _dq_state_merge_qos(old_state - owned, qos);
+		new_state &= ~DISPATCH_QUEUE_DRAIN_UNLOCK_MASK;
+		if (unlikely(_dq_state_is_suspended(old_state))) {
+			new_state |= DLOCK_OWNER_MASK;
+		} else if (enqueue) {
+			new_state |= enqueue;
+		} else if (unlikely(_dq_state_is_dirty(old_state))) {
+			os_atomic_rmw_loop_give_up({
+				// just renew the drain lock with an acquire barrier, to see
+				// what the enqueuer that set DIRTY has done.
+				// the xor generates better assembly as DISPATCH_QUEUE_DIRTY
+				// is already in a register
+				os_atomic_xor2o(dq, dq_state, DISPATCH_QUEUE_DIRTY, acquire);
+				flags |= DISPATCH_WAKEUP_BARRIER_COMPLETE;
+				return dx_wakeup(dq, qos, flags);
+			});
+		} else if (_dq_state_is_base_wlh(old_state)) {
+			new_state &= ~DISPATCH_QUEUE_MAX_QOS_MASK;
+			new_state &= ~DISPATCH_QUEUE_ENQUEUED;
+		} else {
+			new_state &= ~DISPATCH_QUEUE_MAX_QOS_MASK;
+		}
+	});
+	old_state -= owned;
+	dispatch_assert(_dq_state_drain_locked_by_self(old_state));
+	dispatch_assert(!_dq_state_is_enqueued_on_manager(old_state));
+
+
+	if (_dq_state_received_override(old_state)) {
+		// Ensure that the root queue sees that this thread was overridden.
+		_dispatch_set_basepri_override_qos(_dq_state_max_qos(old_state));
+	}
+
+	if (tq) {
+		if (likely((old_state ^ new_state) & enqueue)) {
+			dispatch_assert(_dq_state_is_enqueued(new_state));
+			dispatch_assert(flags & DISPATCH_WAKEUP_CONSUME_2);
+			return _dispatch_queue_push_queue(tq, dq, new_state);
+		}
+#if HAVE_PTHREAD_WORKQUEUE_QOS
+		// <rdar://problem/27694093> when doing sync to async handoff
+		// if the queue received an override we have to forecefully redrive
+		// the same override so that a new stealer is enqueued because
+		// the previous one may be gone already
+		if (_dq_state_should_override(new_state)) {
+			return _dispatch_queue_class_wakeup_with_override(dq, new_state,
+					flags);
+		}
+#endif
+	}
+	if (flags & DISPATCH_WAKEUP_CONSUME_2) {
+		return _dispatch_release_2_tailcall(dq);
+	}
+}
+
+DISPATCH_NOINLINE
+static void
+_dispatch_queue_barrier_complete(dispatch_queue_t dq, dispatch_qos_t qos,
+		dispatch_wakeup_flags_t flags)
+{
+	dispatch_continuation_t dc_tmp, dc_start = NULL, dc_end = NULL;
+	dispatch_queue_wakeup_target_t target = DISPATCH_QUEUE_WAKEUP_NONE;
+	struct dispatch_object_s *dc = NULL;
+	uint64_t owned = DISPATCH_QUEUE_IN_BARRIER +
+			dq->dq_width * DISPATCH_QUEUE_WIDTH_INTERVAL;
+	size_t count = 0;
+
+	dispatch_assert(dx_metatype(dq) == _DISPATCH_QUEUE_TYPE);
+
+	if (dq->dq_items_tail && !DISPATCH_QUEUE_IS_SUSPENDED(dq)) {
+		dc = _dispatch_queue_head(dq);
+		if (!_dispatch_object_is_sync_waiter(dc)) {
+			// not a slow item, needs to wake up
+		} else if (likely(dq->dq_width == 1) ||
+				_dispatch_object_is_barrier(dc)) {
+			// rdar://problem/8290662 "barrier/writer lock transfer"
+			dc_start = dc_end = (dispatch_continuation_t)dc;
+			owned = 0;
+			count = 1;
+			dc = _dispatch_queue_next(dq, dc);
+		} else {
+			// <rdar://problem/10164594> "reader lock transfer"
+			// we must not wake waiters immediately because our right
+			// for dequeuing is granted through holding the full "barrier" width
+			// which a signaled work item could relinquish out from our feet
+			dc_start = (dispatch_continuation_t)dc;
+			do {
+				// no check on width here because concurrent queues
+				// do not respect width for blocked readers, the thread
+				// is already spent anyway
+				dc_end = (dispatch_continuation_t)dc;
+				owned -= DISPATCH_QUEUE_WIDTH_INTERVAL;
+				count++;
+				dc = _dispatch_queue_next(dq, dc);
+			} while (dc && _dispatch_object_is_sync_waiter_non_barrier(dc));
+		}
+
+		if (count) {
+			do {
+				dc_tmp = dc_start;
+				dc_start = dc_start->do_next;
+				_dispatch_sync_waiter_redirect_or_wake(dq, owned, dc_tmp);
+				owned = DISPATCH_SYNC_WAITER_NO_UNLOCK;
+			} while (dc_tmp != dc_end);
+			if (flags & DISPATCH_WAKEUP_CONSUME_2) {
+				return _dispatch_release_2_tailcall(dq);
+			}
+			return;
+		}
+		if (!(flags & DISPATCH_WAKEUP_CONSUME_2)) {
+			_dispatch_retain_2(dq);
+			flags |= DISPATCH_WAKEUP_CONSUME_2;
+		}
+		target = DISPATCH_QUEUE_WAKEUP_TARGET;
+	}
+
+	return _dispatch_queue_class_barrier_complete(dq, qos, flags, target,owned);
 }
 
 #if DISPATCH_COCOA_COMPAT
@@ -3507,27 +3848,90 @@
 }
 #endif
 
+DISPATCH_ALWAYS_INLINE
+static inline uint64_t
+_dispatch_sync_wait_prepare(dispatch_queue_t dq)
+{
+	uint64_t old_state, new_state;
+
+	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, relaxed, {
+		if (_dq_state_is_suspended(old_state) ||
+				!_dq_state_is_base_wlh(old_state)) {
+			os_atomic_rmw_loop_give_up(return old_state);
+		}
+		if (!_dq_state_drain_locked(old_state) ||
+				_dq_state_in_sync_transfer(old_state)) {
+			os_atomic_rmw_loop_give_up(return old_state);
+		}
+		new_state = old_state | DISPATCH_QUEUE_RECEIVED_SYNC_WAIT;
+	});
+	return new_state;
+}
+
+static void
+_dispatch_sync_waiter_compute_wlh(dispatch_queue_t dq,
+		dispatch_sync_context_t dsc)
+{
+	bool needs_locking = _dispatch_queue_is_legacy(dq);
+
+	if (needs_locking) {
+		dsc->dsc_release_storage = true;
+		_dispatch_queue_sidelock_lock(dq);
+	}
+
+	dispatch_queue_t tq = dq->do_targetq;
+	uint64_t dq_state = _dispatch_sync_wait_prepare(tq);
+
+	if (_dq_state_is_suspended(dq_state) ||
+			_dq_state_is_base_anon(dq_state)) {
+		dsc->dsc_release_storage = false;
+		dsc->dc_data = DISPATCH_WLH_ANON;
+	} else if (_dq_state_is_base_wlh(dq_state)) {
+		if (dsc->dsc_release_storage) {
+			_dispatch_queue_retain_storage(tq);
+		}
+		dsc->dc_data = (dispatch_wlh_t)tq;
+	} else {
+		_dispatch_sync_waiter_compute_wlh(tq, dsc);
+	}
+	if (needs_locking) _dispatch_queue_sidelock_unlock(dq);
+}
+
 DISPATCH_NOINLINE
 static void
 _dispatch_sync_wait(dispatch_queue_t top_dq, void *ctxt,
 		dispatch_function_t func, uintptr_t top_dc_flags,
 		dispatch_queue_t dq, uintptr_t dc_flags)
 {
-	uint32_t tid = _dispatch_tid_self();
-	dispatch_qos_t oq_floor = _dispatch_get_basepri_override_qos_floor();
 	pthread_priority_t pp = _dispatch_get_priority();
+	dispatch_tid tid = _dispatch_tid_self();
+	dispatch_qos_t qos;
+	uint64_t dq_state;
+
+	dq_state = _dispatch_sync_wait_prepare(dq);
+	if (unlikely(_dq_state_drain_locked_by(dq_state, tid))) {
+		DISPATCH_CLIENT_CRASH((uintptr_t)dq_state,
+				"dispatch_sync called on queue "
+				"already owned by current thread");
+	}
 
 	struct dispatch_sync_context_s dsc = {
 		.dc_flags    = dc_flags | DISPATCH_OBJ_SYNC_WAITER_BIT,
-		.dc_data     = (void *)(uintptr_t)tid,
 		.dc_other    = top_dq,
 		.dc_priority = pp | _PTHREAD_PRIORITY_ENFORCE_FLAG,
 		.dc_voucher  = DISPATCH_NO_VOUCHER,
 		.dsc_func    = func,
 		.dsc_ctxt    = ctxt,
-		.dsc_override_qos_floor = oq_floor,
-		.dsc_override_qos = oq_floor,
+		.dsc_waiter  = tid,
 	};
+	if (_dq_state_is_suspended(dq_state) ||
+			_dq_state_is_base_anon(dq_state)) {
+		dsc.dc_data = DISPATCH_WLH_ANON;
+	} else if (_dq_state_is_base_wlh(dq_state)) {
+		dsc.dc_data = (dispatch_wlh_t)dq;
+	} else {
+		_dispatch_sync_waiter_compute_wlh(dq, &dsc);
+	}
 #if DISPATCH_COCOA_COMPAT
 	// It's preferred to execute synchronous blocks on the current thread
 	// due to thread-local side effects, etc. However, blocks submitted
@@ -3545,22 +3949,26 @@
 	dsc.dc_ctxt = &dsc;
 #endif
 
-	uint64_t dq_state = os_atomic_load2o(dq, dq_state, relaxed);
-	if (unlikely(_dq_state_drain_locked_by(dq_state, tid))) {
-		DISPATCH_CLIENT_CRASH(dq, "dispatch_sync called on queue "
-				"already owned by current thread");
+	if (dsc.dc_data == DISPATCH_WLH_ANON) {
+		dsc.dsc_override_qos_floor = dsc.dsc_override_qos =
+				_dispatch_get_basepri_override_qos_floor();
+		qos = _dispatch_qos_from_pp(pp);
+		_dispatch_thread_event_init(&dsc.dsc_event);
+	} else {
+		qos = 0;
 	}
-
-	_dispatch_thread_event_init(&dsc.dsc_event);
-	_dispatch_queue_push_sync_waiter(dq, &dsc);
-	_dispatch_thread_event_wait(&dsc.dsc_event); // acquire
-	_dispatch_thread_event_destroy(&dsc.dsc_event);
-	if (dsc.dsc_override_qos > dsc.dsc_override_qos_floor) {
-		// If we received an override from _dispatch_sync_waiter_wake(),
-		// ensure that the root queue sees that this thread was overridden.
-		_dispatch_set_basepri_override_qos(dsc.dsc_override_qos);
+	_dispatch_queue_push_sync_waiter(dq, &dsc, qos);
+	if (dsc.dc_data == DISPATCH_WLH_ANON) {
+		_dispatch_thread_event_wait(&dsc.dsc_event); // acquire
+		_dispatch_thread_event_destroy(&dsc.dsc_event);
+		// If _dispatch_sync_waiter_wake() gave this thread an override,
+		// ensure that the root queue sees it.
+		if (dsc.dsc_override_qos > dsc.dsc_override_qos_floor) {
+			_dispatch_set_basepri_override_qos(dsc.dsc_override_qos);
+		}
+	} else {
+		_dispatch_event_loop_wait_for_ownership(&dsc);
 	}
-
 	_dispatch_introspection_sync_begin(top_dq);
 #if DISPATCH_COCOA_COMPAT
 	if (unlikely(dsc.dsc_func == NULL)) {
@@ -3592,7 +4000,7 @@
 _dispatch_sync_recurse(dispatch_queue_t dq, void *ctxt,
 		dispatch_function_t func, uintptr_t dc_flags)
 {
-	uint32_t tid = _dispatch_tid_self();
+	dispatch_tid tid = _dispatch_tid_self();
 	dispatch_queue_t tq = dq->do_targetq;
 
 	do {
@@ -3617,7 +4025,7 @@
 dispatch_barrier_sync_f(dispatch_queue_t dq, void *ctxt,
 		dispatch_function_t func)
 {
-	uint32_t tid = _dispatch_tid_self();
+	dispatch_tid tid = _dispatch_tid_self();
 
 	// The more correct thing to do would be to merge the qos of the thread
 	// that just acquired the barrier lock into the queue state.
@@ -3636,7 +4044,7 @@
 	if (unlikely(dq->do_targetq->do_targetq)) {
 		return _dispatch_sync_recurse(dq, ctxt, func, DISPATCH_OBJ_BARRIER_BIT);
 	}
-	_dispatch_barrier_sync_invoke_and_complete(dq, ctxt, func);
+	_dispatch_queue_barrier_sync_invoke_and_complete(dq, ctxt, func);
 }
 
 DISPATCH_NOINLINE
@@ -3678,7 +4086,7 @@
 	}
 	// balanced in d_block_sync_invoke or d_block_wait
 	if (os_atomic_cmpxchg2o(dbpd, dbpd_queue, NULL, dq->_as_oq, relaxed)) {
-		_dispatch_retain(dq);
+		_dispatch_retain_2(dq);
 	}
 	if (flags & DISPATCH_BLOCK_BARRIER) {
 		dispatch_barrier_sync_f(dq, work, _dispatch_block_sync_invoke);
@@ -3718,7 +4126,7 @@
 _dispatch_barrier_trysync_or_async_f(dispatch_queue_t dq, void *ctxt,
 		dispatch_function_t func)
 {
-	uint32_t tid = _dispatch_tid_self();
+	dispatch_tid tid = _dispatch_tid_self();
 	if (unlikely(!_dispatch_queue_try_acquire_barrier_sync(dq, tid))) {
 		return _dispatch_barrier_async_detached_f(dq, ctxt, func);
 	}
@@ -3730,7 +4138,7 @@
 _dispatch_trysync_recurse(dispatch_queue_t dq, void *ctxt,
 		dispatch_function_t f, uintptr_t dc_flags)
 {
-	uint32_t tid = _dispatch_tid_self();
+	dispatch_tid tid = _dispatch_tid_self();
 	dispatch_queue_t q, tq = dq->do_targetq;
 
 	for (;;) {
@@ -3765,7 +4173,7 @@
 _dispatch_barrier_trysync_f(dispatch_queue_t dq, void *ctxt,
 		dispatch_function_t f)
 {
-	uint32_t tid = _dispatch_tid_self();
+	dispatch_tid tid = _dispatch_tid_self();
 	if (unlikely(!dq->do_targetq)) {
 		DISPATCH_CLIENT_CRASH(dq, "_dispatch_trsync called on a root queue");
 	}
@@ -3807,16 +4215,13 @@
 {
 	dispatch_queue_wakeup_target_t target = DISPATCH_QUEUE_WAKEUP_NONE;
 
+	if (unlikely(flags & DISPATCH_WAKEUP_BARRIER_COMPLETE)) {
+		return _dispatch_queue_barrier_complete(dq, qos, flags);
+	}
 	if (_dispatch_queue_class_probe(dq)) {
 		target = DISPATCH_QUEUE_WAKEUP_TARGET;
 	}
-	if (target) {
-		return _dispatch_queue_class_wakeup(dq, qos, flags, target);
-	} else if (qos) {
-		return _dispatch_queue_class_override_drainer(dq, qos, flags);
-	} else if (flags & DISPATCH_WAKEUP_CONSUME) {
-		return _dispatch_release_tailcall(dq);
-	}
+	return _dispatch_queue_class_wakeup(dq, qos, flags, target);
 }
 
 #if DISPATCH_COCOA_COMPAT
@@ -3862,6 +4267,16 @@
 }
 #endif // DISPATCH_COCOA_COMPAT
 
+DISPATCH_ALWAYS_INLINE
+static inline dispatch_qos_t
+_dispatch_runloop_queue_reset_max_qos(dispatch_queue_class_t dqu)
+{
+	uint64_t old_state, clear_bits = DISPATCH_QUEUE_MAX_QOS_MASK |
+			DISPATCH_QUEUE_RECEIVED_OVERRIDE;
+	old_state = os_atomic_and_orig2o(dqu._dq, dq_state, ~clear_bits, relaxed);
+	return _dq_state_max_qos(old_state);
+}
+
 void
 _dispatch_runloop_queue_wakeup(dispatch_queue_t dq, dispatch_qos_t qos,
 		dispatch_wakeup_flags_t flags)
@@ -3872,14 +4287,14 @@
 		return _dispatch_queue_wakeup(dq, qos, flags);
 	}
 
-	if (flags & DISPATCH_WAKEUP_FLUSH) {
+	if (flags & DISPATCH_WAKEUP_MAKE_DIRTY) {
 		os_atomic_or2o(dq, dq_state, DISPATCH_QUEUE_DIRTY, release);
 	}
 	if (_dispatch_queue_class_probe(dq)) {
 		return _dispatch_runloop_queue_poke(dq, qos, flags);
 	}
 
-	qos = _dispatch_queue_reset_max_qos(dq);
+	qos = _dispatch_runloop_queue_reset_max_qos(dq);
 	if (qos) {
 		mach_port_t owner = DISPATCH_QUEUE_DRAIN_OWNER(dq);
 		if (_dispatch_queue_class_probe(dq)) {
@@ -3888,8 +4303,8 @@
 		_dispatch_thread_override_end(owner, dq);
 		return;
 	}
-	if (flags & DISPATCH_WAKEUP_CONSUME) {
-		return _dispatch_release_tailcall(dq);
+	if (flags & DISPATCH_WAKEUP_CONSUME_2) {
+		return _dispatch_release_2_tailcall(dq);
 	}
 #else
 	return _dispatch_queue_wakeup(dq, qos, flags);
@@ -3948,7 +4363,7 @@
 _dispatch_runloop_queue_poke(dispatch_queue_t dq, dispatch_qos_t qos,
 		dispatch_wakeup_flags_t flags)
 {
-	// it's not useful to handle WAKEUP_FLUSH because mach_msg() will have
+	// it's not useful to handle WAKEUP_MAKE_DIRTY because mach_msg() will have
 	// a release barrier and that when runloop queues stop being thread-bound
 	// they have a non optional wake-up to start being a "normal" queue
 	// either in _dispatch_runloop_queue_xref_dispose,
@@ -3978,8 +4393,8 @@
 	}
 no_change:
 	_dispatch_runloop_queue_class_poke(dq);
-	if (flags & DISPATCH_WAKEUP_CONSUME) {
-		return _dispatch_release_tailcall(dq);
+	if (flags & DISPATCH_WAKEUP_CONSUME_2) {
+		return _dispatch_release_2_tailcall(dq);
 	}
 }
 #endif
@@ -4074,7 +4489,7 @@
 	}
 #endif
 	do {
-		_dispatch_retain(dq);
+		_dispatch_retain(dq); // released in _dispatch_worker_thread
 		while ((r = pthread_create(pthr, attr, _dispatch_worker_thread, dq))) {
 			if (r != EAGAIN) {
 				(void)dispatch_assume_zero(r);
@@ -4128,15 +4543,44 @@
 static void
 _dispatch_return_to_kernel(void)
 {
-	if (unlikely(_dispatch_get_wlh() == DISPATCH_WLH_GLOBAL)) {
+	if (unlikely(_dispatch_get_wlh() == DISPATCH_WLH_ANON)) {
 		_dispatch_clear_return_to_kernel();
 	} else {
 		_dispatch_event_loop_drain(KEVENT_FLAG_IMMEDIATE);
 	}
 }
 
+void
+_dispatch_poll_for_events_4launchd(void)
+{
+#if DISPATCH_USE_KEVENT_WORKQUEUE
+	if (_dispatch_get_wlh()) {
+		dispatch_assert(_dispatch_deferred_items_get()->ddi_wlh_servicing);
+		_dispatch_event_loop_drain(KEVENT_FLAG_IMMEDIATE);
+	}
+#endif
+}
+
 #if HAVE_PTHREAD_WORKQUEUE_NARROWING
-static os_atomic(uint64_t) _dispatch_narrowing_deadlines[DISPATCH_QOS_MAX - 1];
+static os_atomic(uint64_t) _dispatch_narrowing_deadlines[DISPATCH_QOS_MAX];
+#if !DISPATCH_TIME_UNIT_USES_NANOSECONDS
+static uint64_t _dispatch_narrow_check_interval_cache;
+#endif
+
+DISPATCH_ALWAYS_INLINE
+static inline uint64_t
+_dispatch_narrow_check_interval(void)
+{
+#if DISPATCH_TIME_UNIT_USES_NANOSECONDS
+	return 50 * NSEC_PER_MSEC;
+#else
+	if (_dispatch_narrow_check_interval_cache == 0) {
+		_dispatch_narrow_check_interval_cache =
+				_dispatch_time_nano2mach(50 * NSEC_PER_MSEC);
+	}
+	return _dispatch_narrow_check_interval_cache;
+#endif
+}
 
 DISPATCH_ALWAYS_INLINE
 static inline void
@@ -4146,7 +4590,7 @@
 	if (_dispatch_priority_qos(pri) &&
 			!(pri & DISPATCH_PRIORITY_FLAG_OVERCOMMIT)) {
 		dic->dic_next_narrow_check = _dispatch_approximate_time() +
-				DISPATCH_NARROW_CHECK_INTERVAL;
+				_dispatch_narrow_check_interval();
 	}
 }
 
@@ -4157,9 +4601,13 @@
 {
 	if (dic->dic_next_narrow_check != DISPATCH_THREAD_IS_NARROWING) {
 		pthread_priority_t pp = _dispatch_get_priority();
-		size_t idx = _dispatch_qos_from_pp(pp) - 1;
+		dispatch_qos_t qos = _dispatch_qos_from_pp(pp);
+		if (unlikely(!qos || qos > countof(_dispatch_narrowing_deadlines))) {
+			DISPATCH_CLIENT_CRASH(pp, "Thread QoS corruption");
+		}
+		size_t idx = qos - 1; // no entry needed for DISPATCH_QOS_UNSPECIFIED
 		os_atomic(uint64_t) *deadline = &_dispatch_narrowing_deadlines[idx];
-		uint64_t oldval, newval = now + DISPATCH_NARROW_CHECK_INTERVAL;
+		uint64_t oldval, newval = now + _dispatch_narrow_check_interval();
 
 		dic->dic_next_narrow_check = newval;
 		os_atomic_rmw_loop(deadline, oldval, newval, relaxed, {
@@ -4239,6 +4687,8 @@
 		// but width can change while draining barrier work items, so we only
 		// convert to `dq->dq_width * WIDTH_INTERVAL` when we drop `IN_BARRIER`
 		owned = DISPATCH_QUEUE_IN_BARRIER;
+	} else {
+		owned &= DISPATCH_QUEUE_WIDTH_MASK;
 	}
 
 	dc = _dispatch_queue_head(dq);
@@ -4249,6 +4699,9 @@
 		if (unlikely(dic->dic_deferred)) {
 			goto out_with_deferred_compute_owned;
 		}
+		if (unlikely(_dispatch_needs_to_return_to_kernel())) {
+			_dispatch_return_to_kernel();
+		}
 		if (unlikely(!dc)) {
 			if (!dq->dq_items_tail) {
 				break;
@@ -4261,9 +4714,6 @@
 		if (unlikely(_dispatch_queue_drain_should_narrow(dic))) {
 			break;
 		}
-		if (unlikely(_dispatch_needs_to_return_to_kernel())) {
-			_dispatch_return_to_kernel();
-		}
 
 first_iteration:
 		dq_state = os_atomic_load(&dq->dq_state, relaxed);
@@ -4308,7 +4758,8 @@
 			next_dc = _dispatch_queue_next(dq, dc);
 			if (_dispatch_object_is_sync_waiter(dc)) {
 				owned -= DISPATCH_QUEUE_WIDTH_INTERVAL;
-				_dispatch_sync_waiter_redirect_or_wake(dq, dc);
+				_dispatch_sync_waiter_redirect_or_wake(dq,
+						DISPATCH_SYNC_WAITER_NO_UNLOCK, dc);
 				continue;
 			}
 
@@ -4329,12 +4780,13 @@
 	if (dc) {
 		owned = _dispatch_queue_adjust_owned(dq, owned, dc);
 	}
-	*owned_ptr = owned;
+	*owned_ptr &= DISPATCH_QUEUE_ENQUEUED | DISPATCH_QUEUE_ENQUEUED_ON_MGR;
+	*owned_ptr |= owned;
 	_dispatch_thread_frame_pop(&dtf);
 	return dc ? dq->do_targetq : NULL;
 
 out_with_no_width:
-	*owned_ptr = 0;
+	*owned_ptr &= DISPATCH_QUEUE_ENQUEUED | DISPATCH_QUEUE_ENQUEUED_ON_MGR;
 	_dispatch_thread_frame_pop(&dtf);
 	return DISPATCH_QUEUE_WAKEUP_WAIT_FOR_EVENT;
 
@@ -4351,7 +4803,8 @@
 		}
 	}
 out_with_deferred:
-	*owned_ptr = owned;
+	*owned_ptr &= DISPATCH_QUEUE_ENQUEUED | DISPATCH_QUEUE_ENQUEUED_ON_MGR;
+	*owned_ptr |= owned;
 	if (unlikely(flags & DISPATCH_INVOKE_DISALLOW_SYNC_WAITERS)) {
 		DISPATCH_INTERNAL_CRASH(dc,
 				"Deferred continuation on source, mach channel or mgr");
@@ -4429,9 +4882,10 @@
 		DISPATCH_CLIENT_CRASH(0, "_dispatch_main_queue_callback_4CF called"
 				" after dispatch_main()");
 	}
-	mach_port_t owner = DISPATCH_QUEUE_DRAIN_OWNER(dq);
-	if (slowpath(owner != _dispatch_tid_self())) {
-		DISPATCH_CLIENT_CRASH(owner, "_dispatch_main_queue_callback_4CF called"
+	uint64_t dq_state = os_atomic_load2o(dq, dq_state, relaxed);
+	if (unlikely(!_dq_state_drain_locked_by_self(dq_state))) {
+		DISPATCH_CLIENT_CRASH((uintptr_t)dq_state,
+				"_dispatch_main_queue_callback_4CF called"
 				" from the wrong thread");
 	}
 
@@ -4440,7 +4894,7 @@
 
 	// <rdar://problem/23256682> hide the frame chaining when CFRunLoop
 	// drains the main runloop, as this should not be observable that way
-	_dispatch_set_wlh(dq->dq_wlh);
+	_dispatch_adopt_wlh_anon();
 	_dispatch_thread_frame_push_and_rebase(&dtf, dq, NULL);
 
 	pthread_priority_t pp = _dispatch_get_priority();
@@ -4462,8 +4916,6 @@
 		_dispatch_continuation_pop_inline(dc, &dic, DISPATCH_INVOKE_NONE, dq);
 	} while ((dc = next_dc));
 
-	// runloop based queues use their port for the queue PUBLISH pattern
-	// so this raw call to dx_wakeup(0) is valid
 	dx_wakeup(dq, 0, 0);
 	_dispatch_voucher_debug("main queue restore", voucher);
 	_dispatch_reset_basepri(old_dbp);
@@ -4483,7 +4935,7 @@
 	}
 	_dispatch_perfmon_start_notrace();
 	dispatch_thread_frame_s dtf;
-	_dispatch_set_wlh(dq->dq_wlh);
+	bool should_reset_wlh = _dispatch_adopt_wlh_anon_recurse();
 	_dispatch_thread_frame_push(&dtf, dq);
 	pthread_priority_t pp = _dispatch_get_priority();
 	dispatch_priority_t pri = _dispatch_priority_from_pp(pp);
@@ -4498,8 +4950,6 @@
 	_dispatch_continuation_pop_inline(dc, &dic, DISPATCH_INVOKE_NONE, dq);
 
 	if (!next_dc) {
-		// runloop based queues use their port for the queue PUBLISH pattern
-		// so this raw call to dx_wakeup(0) is valid
 		dx_wakeup(dq, 0, 0);
 	}
 
@@ -4508,80 +4958,13 @@
 	_dispatch_reset_basepri_override();
 	_dispatch_reset_priority_and_voucher(pp, voucher);
 	_dispatch_thread_frame_pop(&dtf);
-	_dispatch_reset_wlh();
+	if (should_reset_wlh) _dispatch_reset_wlh();
 	_dispatch_force_cache_cleanup();
 	_dispatch_perfmon_end_notrace();
 	return next_dc;
 }
 #endif
 
-DISPATCH_NOINLINE
-void
-_dispatch_try_lock_transfer_or_wakeup(dispatch_queue_t dq)
-{
-	dispatch_continuation_t dc_tmp, dc_start = NULL, dc_end = NULL;
-	struct dispatch_object_s *dc = NULL;
-	uint64_t owned;
-	size_t count = 0;
-
-	owned  = DISPATCH_QUEUE_IN_BARRIER;
-	owned += dq->dq_width * DISPATCH_QUEUE_WIDTH_INTERVAL;
-attempt_running_slow_head:
-	if (dq->dq_items_tail && !DISPATCH_QUEUE_IS_SUSPENDED(dq)) {
-		dc = _dispatch_queue_head(dq);
-		if (!_dispatch_object_is_sync_waiter(dc)) {
-			// not a slow item, needs to wake up
-		} else if (likely(dq->dq_width == 1) ||
-				_dispatch_object_is_barrier(dc)) {
-			// rdar://problem/8290662 "barrier/writer lock transfer"
-			dc_start = dc_end = (dispatch_continuation_t)dc;
-			owned = 0;
-			count = 1;
-			dc = _dispatch_queue_next(dq, dc);
-		} else {
-			// <rdar://problem/10164594> "reader lock transfer"
-			// we must not wake waiters immediately because our right
-			// for dequeuing is granted through holding the full "barrier" width
-			// which a signaled work item could relinquish out from our feet
-			dc_start = (dispatch_continuation_t)dc;
-			do {
-				// no check on width here because concurrent queues
-				// do not respect width for blocked readers, the thread
-				// is already spent anyway
-				dc_end = (dispatch_continuation_t)dc;
-				owned -= DISPATCH_QUEUE_WIDTH_INTERVAL;
-				count++;
-				dc = _dispatch_queue_next(dq, dc);
-			} while (dc && _dispatch_object_is_sync_waiter_non_barrier(dc));
-		}
-
-		if (count) {
-			_dispatch_queue_drain_transfer_lock(dq, owned, dc_start);
-			do {
-				dc_tmp = dc_start;
-				dc_start = dc_start->do_next;
-				_dispatch_sync_waiter_redirect_or_wake(dq, dc_tmp);
-			} while (dc_tmp != dc_end);
-			return;
-		}
-	}
-
-	if (dc || dx_metatype(dq) != _DISPATCH_QUEUE_TYPE) {
-		// <rdar://problem/23336992> the following wakeup is needed for sources
-		// or mach channels: when ds_pending_data is set at the same time
-		// as a trysync_f happens, lock transfer code above doesn't know about
-		// ds_pending_data or the wakeup logic, but lock transfer is useless
-		// for sources and mach channels in the first place.
-		owned = _dispatch_queue_adjust_owned(dq, owned, dc);
-		_dispatch_queue_drain_unlock(dq, owned);
-		return dx_wakeup(dq, 0, DISPATCH_WAKEUP_WAITER_HANDOFF);
-	} else if (unlikely(!_dispatch_queue_drain_try_unlock(dq, owned, true))) {
-		// someone enqueued a slow item at the head
-		// looping may be its last chance
-		goto attempt_running_slow_head;
-	}
-}
-
 void
 _dispatch_mgr_queue_drain(void)
 {
@@ -4614,96 +4997,37 @@
 #pragma mark dispatch_queue_invoke
 
 void
-_dispatch_queue_drain_deferred_invoke(dispatch_queue_t dq,
+_dispatch_queue_drain_sync_waiter(dispatch_queue_t dq,
 		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags,
-		uint64_t to_unlock)
+		uint64_t owned)
 {
 	struct dispatch_object_s *dc = dic->dic_deferred;
-	if (_dispatch_object_is_sync_waiter(dc)) {
-		dispatch_assert(to_unlock == 0);
-		dic->dic_deferred = NULL;
-		_dispatch_queue_drain_transfer_lock(dq, to_unlock, dc);
-		_dispatch_sync_waiter_redirect_or_wake(dq, dc);
-		return _dispatch_release_tailcall(dq);
+	dispatch_assert(_dispatch_object_is_sync_waiter(dc));
+	dic->dic_deferred = NULL;
+	if (flags & DISPATCH_INVOKE_WLH) {
+		// Leave the enqueued bit in place, completion of the last sync waiter
+		// in the handoff chain is responsible for dequeuing
+		//
+		// We currently have a +2 to consume, but we need to keep a +1
+		// for the thread request
+		dispatch_assert(_dq_state_is_enqueued_on_target(owned));
+		dispatch_assert(!_dq_state_is_enqueued_on_manager(owned));
+		owned &= ~DISPATCH_QUEUE_ENQUEUED;
+		_dispatch_release_no_dispose(dq);
+	} else {
+		// The sync waiter must own a reference
+		_dispatch_release_2_no_dispose(dq);
 	}
-
-	bool should_defer_again = false, should_pend_queue = true;
-	uint64_t old_state, new_state;
-
-	if (_dispatch_get_current_queue()->do_targetq) {
-		should_defer_again = true;
-		should_pend_queue = false;
-	}
-
-	if (dq->dq_width > 1) {
-		should_pend_queue = false;
-	} else if (should_pend_queue) {
-		dispatch_assert(to_unlock ==
-				DISPATCH_QUEUE_WIDTH_INTERVAL + DISPATCH_QUEUE_IN_BARRIER);
-		os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, release,{
-			new_state = old_state;
-			if (_dq_state_has_waiters(old_state) ||
-					_dq_state_is_enqueued(old_state)) {
-				os_atomic_rmw_loop_give_up(break);
-			}
-			new_state += DISPATCH_QUEUE_DRAIN_PENDED;
-			new_state -= DISPATCH_QUEUE_IN_BARRIER;
-			new_state -= DISPATCH_QUEUE_WIDTH_INTERVAL;
-		});
-		should_pend_queue = (new_state & DISPATCH_QUEUE_DRAIN_PENDED);
-	}
-
-	if (!should_pend_queue) {
-		if (to_unlock & DISPATCH_QUEUE_IN_BARRIER) {
-			_dispatch_try_lock_transfer_or_wakeup(dq);
-			_dispatch_release(dq);
-		} else if (to_unlock) {
-			_dispatch_queue_drain_unlock(dq, to_unlock);
-			dx_wakeup(dq, 0, DISPATCH_WAKEUP_CONSUME);
-		} else {
-			_dispatch_release(dq);
-		}
-		dq = NULL;
-	}
-
-	if (!should_defer_again) {
-		dic->dic_deferred = NULL;
-		return dx_invoke(dc, dic, flags & _DISPATCH_INVOKE_PROPAGATE_MASK);
-	}
-
-	if (dq) {
-		uint32_t self = _dispatch_tid_self();
-		os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, release,{
-			new_state = old_state;
-			if (!_dq_state_drain_pended(old_state) ||
-					_dq_state_drain_owner(old_state) != self) {
-				os_atomic_rmw_loop_give_up({
-					// We may have been overridden, so inform the root queue
-					_dispatch_set_basepri_override_qos(
-							_dq_state_max_qos(old_state));
-					return _dispatch_release_tailcall(dq);
-				});
-			}
-			new_state = DISPATCH_QUEUE_DRAIN_UNLOCK(new_state);
-		});
-		if (_dq_state_received_override(old_state)) {
-			// Ensure that the root queue sees that this thread was overridden.
-			_dispatch_set_basepri_override_qos(_dq_state_max_qos(old_state));
-		}
-		return dx_invoke(dq, dic, flags | DISPATCH_INVOKE_STEALING);
-	}
+	return _dispatch_sync_waiter_redirect_or_wake(dq, owned, dc);
 }
 
 void
-_dispatch_queue_finalize_activation(dispatch_queue_t dq)
+_dispatch_queue_finalize_activation(dispatch_queue_t dq,
+		DISPATCH_UNUSED bool *allow_resume)
 {
 	dispatch_queue_t tq = dq->do_targetq;
 	_dispatch_queue_priority_inherit_from_target(dq, tq);
-	_dispatch_queue_atomic_flags_set(tq, DQF_TARGETED);
-	if (!dq->dq_wlh) {
-		dispatch_wlh_t wlh = _dispatch_queue_class_compute_wlh(dq);
-		if (wlh) _dispatch_queue_class_record_wlh_hierarchy(dq, wlh);
-	}
+	_dispatch_queue_inherit_wlh_from_target(dq, tq);
 }
 
 DISPATCH_ALWAYS_INLINE
@@ -4730,7 +5054,7 @@
 _dispatch_queue_invoke(dispatch_queue_t dq, dispatch_invoke_context_t dic,
 		dispatch_invoke_flags_t flags)
 {
-	_dispatch_queue_class_invoke(dq, dic, flags, dispatch_queue_invoke2);
+	_dispatch_queue_class_invoke(dq, dic, flags, 0, dispatch_queue_invoke2);
 }
 
 #pragma mark -
@@ -4749,7 +5073,6 @@
 
 	dou._do = dc->dc_data;
 	old_dp = _dispatch_root_queue_identity_assume(assumed_rq);
-	flags |= DISPATCH_INVOKE_OVERRIDING;
 	if (dc_type(dc) == DISPATCH_CONTINUATION_TYPE(OVERRIDE_STEALING)) {
 		flags |= DISPATCH_INVOKE_STEALING;
 	} else {
@@ -4768,19 +5091,6 @@
 	_dispatch_queue_set_current(old_rq);
 }
 
-#if DISPATCH_USE_KEVENT_WORKQUEUE
-DISPATCH_ALWAYS_INLINE
-static inline dispatch_qos_t
-_dispatch_qos_root_queue_push_wlh(dispatch_queue_t rq, dispatch_qos_t qos)
-{
-	// for root queues, the override is the guaranteed minimum override level
-	if (qos > _dispatch_priority_override_qos(rq->dq_priority)) {
-		return qos;
-	}
-	return _dispatch_priority_qos(rq->dq_priority);
-}
-#endif // DISPATCH_USE_KEVENT_WORKQUEUE
-
 DISPATCH_ALWAYS_INLINE
 static inline bool
 _dispatch_root_queue_push_needs_override(dispatch_queue_t rq,
@@ -4840,7 +5150,7 @@
 	dispatch_continuation_t dc = _dispatch_continuation_alloc();
 
 	dc->do_vtable = DC_VTABLE(OVERRIDE_STEALING);
-	_dispatch_retain(dq);
+	_dispatch_retain_2(dq);
 	dc->dc_func = NULL;
 	dc->dc_ctxt = dc;
 	dc->dc_other = orig_rq;
@@ -4851,27 +5161,19 @@
 }
 
 DISPATCH_NOINLINE
-void
-_dispatch_queue_class_wakeup_with_override(dispatch_queue_t dq,
-		dispatch_qos_t qos, dispatch_wakeup_flags_t flags, uint64_t dq_state)
+static void
+_dispatch_queue_class_wakeup_with_override_slow(dispatch_queue_t dq,
+		uint64_t dq_state, dispatch_wakeup_flags_t flags)
 {
-	mach_port_t owner = _dq_state_drain_owner(dq_state);
+	dispatch_qos_t oqos, qos = _dq_state_max_qos(dq_state);
 	dispatch_queue_t tq;
-	dispatch_qos_t oqos;
 	bool locked;
 
-	if (_dq_state_is_suspended(dq_state)) {
-		goto out;
-	}
-
-	if (owner) {
-		int rc = _dispatch_wqthread_override_start_check_owner(owner, qos,
+	if (_dq_state_is_base_anon(dq_state)) {
+		mach_port_t owner = _dq_state_drain_owner(dq_state);
+		if (owner) {
+			(void)_dispatch_wqthread_override_start_check_owner(owner, qos,
 				&dq->dq_state_lock);
-		// EPERM means the target of the override is not a work queue thread
-		// and could be a thread-bound queue such as the main queue.
-		// When that happens we must get to that queue and wake it up if we
-		// want the override to be appplied and take effect.
-		if (rc != EPERM) {
 			goto out;
 		}
 	}
@@ -4933,12 +5235,10 @@
 apply_again:
 	if (dx_type(tq) == DISPATCH_QUEUE_GLOBAL_ROOT_TYPE) {
 		if (_dispatch_root_queue_push_queue_override_needed(tq, qos)) {
-			_dispatch_root_queue_push_queue_override(tq, dq, qos);
+			_dispatch_root_queue_push_override_stealer(tq, dq, qos);
 		}
-	} else if (flags & DISPATCH_WAKEUP_WAITER_HANDOFF) {
-		dx_wakeup(tq, qos, flags);
 	} else if (_dispatch_queue_need_override(tq, qos)) {
-		dx_wakeup(tq, qos, DISPATCH_WAKEUP_OVERRIDING);
+		dx_wakeup(tq, qos, 0);
 	}
 	while (unlikely(locked && !_dispatch_queue_sidelock_tryunlock(dq))) {
 		// rdar://problem/24081326
@@ -4958,145 +5258,62 @@
 	}
 
 out:
-	if (flags & DISPATCH_WAKEUP_CONSUME) {
-		return _dispatch_release_tailcall(dq);
-	}
-}
-#endif // HAVE_PTHREAD_WORKQUEUE_QOS
-
-DISPATCH_NOINLINE
-void
-_dispatch_queue_class_override_drainer(dispatch_queue_t dq,
-		dispatch_qos_t qos, dispatch_wakeup_flags_t flags)
-{
-#if HAVE_PTHREAD_WORKQUEUE_QOS
-	uint64_t old_state, new_state;
-
-	//
-	// Someone is trying to override the last work item of the queue.
-	//
-	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, relaxed, {
-		if (!_dq_state_drain_locked(old_state) &&
-				!_dq_state_is_dirty(old_state)) {
-			os_atomic_rmw_loop_give_up(goto done);
-		}
-		new_state = _dq_state_merge_qos(old_state, qos);
-		if (new_state == old_state) {
-			os_atomic_rmw_loop_give_up(goto done);
-		}
-	});
-	if (_dq_state_drain_locked(new_state)) {
-		return _dispatch_queue_class_wakeup_with_override(dq, qos,
-				flags, new_state);
-	}
-
-done:
-#else
-	(void)qos;
-#endif // HAVE_PTHREAD_WORKQUEUE_QOS
-	if (flags & DISPATCH_WAKEUP_CONSUME) {
-		return _dispatch_release_tailcall(dq);
+	if (flags & DISPATCH_WAKEUP_CONSUME_2) {
+		return _dispatch_release_2_tailcall(dq);
 	}
 }
 
-#if HAVE_PTHREAD_WORKQUEUE_QOS
-DISPATCH_NOINLINE
-static void
-_dispatch_root_queue_push_queue_override(dispatch_queue_t rq,
-		dispatch_queue_class_t dqu, dispatch_qos_t qos)
-{
-	// thread bound queues always have an owner set, so should never reach
-	// this codepath (see _dispatch_queue_class_wakeup_with_override).
-	dispatch_assert(!_dispatch_queue_is_thread_bound(dqu._dq));
-	_dispatch_root_queue_push_override_stealer(rq, dqu._dq, qos);
-}
-#endif // HAVE_PTHREAD_WORKQUEUE_QOS
 
-#if DISPATCH_USE_KEVENT_WORKQUEUE
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_root_queue_push_queue(dispatch_queue_t rq, dispatch_queue_class_t dqu,
-		dispatch_qos_t qos)
+_dispatch_queue_class_wakeup_with_override(dispatch_queue_t dq,
+		uint64_t dq_state, dispatch_wakeup_flags_t flags)
 {
-	// thread bound queues aren't woken up on root queues
-	dispatch_assert(!_dispatch_queue_is_thread_bound(dqu._dq));
-	if (likely(_dispatch_root_queue_allows_wlh_for_queue(rq, dqu._dq))) {
-		dispatch_qos_t wlh_qos;
-		wlh_qos = _dispatch_qos_root_queue_push_wlh(rq, qos);
-	}
-#if HAVE_PTHREAD_WORKQUEUE_QOS
-	if (_dispatch_root_queue_push_needs_override(rq, qos)) {
-		return _dispatch_root_queue_push_override(rq, dqu._dq->_as_do, qos);
-	}
-#endif
-	_dispatch_root_queue_push_inline(rq, dqu._dq, dqu._dq, 1);
+	dispatch_assert(_dq_state_should_override(dq_state));
+
+	return _dispatch_queue_class_wakeup_with_override_slow(dq, dq_state, flags);
 }
-
-DISPATCH_NOINLINE
-static void
-_dispatch_root_queue_push_try_stash(dispatch_queue_t rq,
-		dispatch_queue_class_t dqu, dispatch_qos_t qos,
-		dispatch_deferred_items_t ddi)
-{
-	dispatch_wlh_t cur_wlh = _dispatch_get_wlh();
-	dispatch_wlh_t wlh = _dispatch_root_queue_wlh_for_queue(rq, dqu);
-	dispatch_queue_t old_dq = ddi->ddi_stashed_dq;
-	dispatch_priority_t rq_overcommit;
-	rq_overcommit = rq->dq_priority & DISPATCH_PRIORITY_FLAG_OVERCOMMIT;
-
-	if (cur_wlh != DISPATCH_WLH_GLOBAL) {
-		if (cur_wlh != (dispatch_wlh_t)dqu._dq) {
-			goto out;
-		}
-		dispatch_assert(old_dq == NULL);
-	}
-
-	if (likely(!old_dq || rq_overcommit)) {
-		dispatch_queue_t old_rq = ddi->ddi_stashed_rq;
-		dispatch_priority_t old_pri = ddi->ddi_stashed_pri;
-		ddi->ddi_stashed_rq = rq;
-		ddi->ddi_stashed_dq = dqu._dq;
-		ddi->ddi_stashed_pri = _dispatch_priority_make(qos, 0) | rq_overcommit;
-		_dispatch_debug("wlh[%p]: deferring item %p, rq %p, pri 0x%x",
-				cur_wlh, dqu._dq, rq, ddi->ddi_stashed_pri);
-		if (likely(!old_dq)) {
-			return;
-		}
-		// push the previously stashed item
-		qos = _dispatch_priority_qos(old_pri);
-		rq = old_rq;
-		dqu._dq = old_dq;
-	}
-
-out:
-	if (cur_wlh != DISPATCH_WLH_GLOBAL) {
-		_dispatch_debug("wlh[%p]: not deferring item %p with wlh %p, rq %p",
-				cur_wlh, dqu._dq, wlh, rq);
-	}
-	_dispatch_root_queue_push_queue(rq, dqu, qos);
-}
-#endif // DISPATCH_USE_KEVENT_WORKQUEUE
+#endif // HAVE_PTHREAD_WORKQUEUE_QOS
 
 DISPATCH_NOINLINE
 void
-_dispatch_root_queue_push(dispatch_queue_t dq, dispatch_object_t dou,
+_dispatch_root_queue_push(dispatch_queue_t rq, dispatch_object_t dou,
 		dispatch_qos_t qos)
 {
 #if DISPATCH_USE_KEVENT_WORKQUEUE
-	if (_dispatch_object_has_vtable(dou) && dx_vtable(dou._do)->do_push) {
-		dispatch_deferred_items_t ddi = _dispatch_deferred_items_get();
-		if (unlikely(ddi && ddi->ddi_stashed_pri != DISPATCH_PRIORITY_NOSTASH)){
-			return _dispatch_root_queue_push_try_stash(dq, dou._dq, qos, ddi);
+	dispatch_deferred_items_t ddi = _dispatch_deferred_items_get();
+	if (unlikely(ddi && ddi->ddi_can_stash)) {
+		dispatch_object_t old_dou = ddi->ddi_stashed_dou;
+		dispatch_priority_t rq_overcommit;
+		rq_overcommit = rq->dq_priority & DISPATCH_PRIORITY_FLAG_OVERCOMMIT;
+
+		if (likely(!old_dou._do || rq_overcommit)) {
+			dispatch_queue_t old_rq = ddi->ddi_stashed_rq;
+			dispatch_qos_t old_qos = ddi->ddi_stashed_qos;
+			ddi->ddi_stashed_rq = rq;
+			ddi->ddi_stashed_dou = dou;
+			ddi->ddi_stashed_qos = qos;
+			_dispatch_debug("deferring item %p, rq %p, qos %d",
+					dou._do, rq, qos);
+			if (rq_overcommit) {
+				ddi->ddi_can_stash = false;
+			}
+			if (likely(!old_dou._do)) {
+				return;
+			}
+			// push the previously stashed item
+			qos = old_qos;
+			rq = old_rq;
+			dou = old_dou;
 		}
-		return _dispatch_root_queue_push_queue(dq, dou._dq, qos);
 	}
 #endif
 #if HAVE_PTHREAD_WORKQUEUE_QOS
-	if (_dispatch_root_queue_push_needs_override(dq, qos)) {
-		return _dispatch_root_queue_push_override(dq, dou, qos);
+	if (_dispatch_root_queue_push_needs_override(rq, qos)) {
+		return _dispatch_root_queue_push_override(rq, dou, qos);
 	}
 #endif
-	_dispatch_root_queue_push_inline(dq, dou, dou, 1);
+	_dispatch_root_queue_push_inline(rq, dou, dou, 1);
 }
 
 void
@@ -5105,10 +5322,10 @@
 {
 	if (!(flags & DISPATCH_WAKEUP_BLOCK_WAIT)) {
 		DISPATCH_INTERNAL_CRASH(dq->dq_priority,
-				"Trying to wake up or override a root queue");
+				"Don't try to wake up or override a root queue");
 	}
-	if (flags & DISPATCH_WAKEUP_CONSUME) {
-		return _dispatch_release_tailcall(dq);
+	if (flags & DISPATCH_WAKEUP_CONSUME_2) {
+		return _dispatch_release_2_tailcall(dq);
 	}
 }
 
@@ -5122,150 +5339,179 @@
 
 DISPATCH_NOINLINE
 void
-_dispatch_queue_class_wakeup_enqueue(dispatch_queue_t dq, dispatch_qos_t qos,
-		dispatch_wakeup_flags_t flags, dispatch_queue_wakeup_target_t target)
-{
-	dispatch_queue_t tq;
-
-	if (!(flags & DISPATCH_WAKEUP_CONSUME)) {
-		_dispatch_retain(dq);
-	}
-	if (target == DISPATCH_QUEUE_WAKEUP_TARGET) {
-		// try_become_enqueuer has no acquire barrier, as the last block
-		// of a queue asyncing to that queue is not an uncommon pattern
-		// and in that case the acquire is completely useless
-		//
-		// so instead use depdendency ordering to read the targetq pointer.
-		os_atomic_thread_fence(dependency);
-		tq = os_atomic_load_with_dependency_on2o(dq, do_targetq, (long)qos);
-	} else {
-		tq = target;
-	}
-	return dx_push(tq, dq, qos);
-}
-
-DISPATCH_ALWAYS_INLINE
-static void
-_dispatch_queue_class_wakeup_finish(dispatch_queue_t dq, dispatch_qos_t qos,
-		dispatch_wakeup_flags_t flags, dispatch_queue_wakeup_target_t target,
-		uint64_t old_state, uint64_t new_state)
-{
-	dispatch_assert(target != DISPATCH_QUEUE_WAKEUP_NONE);
-	dispatch_assert(target != DISPATCH_QUEUE_WAKEUP_WAIT_FOR_EVENT);
-
-	if ((old_state ^ new_state) & DISPATCH_QUEUE_MAX_QOS_MASK) {
-		flags |= DISPATCH_WAKEUP_OVERRIDING;
-	} else {
-		flags &= ~(dispatch_wakeup_flags_t)DISPATCH_WAKEUP_OVERRIDING;
-		qos = _dq_state_max_qos(new_state);
-	}
-	if ((old_state ^ new_state) & DISPATCH_QUEUE_ENQUEUED) {
-		return _dispatch_queue_class_wakeup_enqueue(dq, qos, flags, target);
-	}
-
-#if HAVE_PTHREAD_WORKQUEUE_QOS
-	if ((flags & (DISPATCH_WAKEUP_OVERRIDING | DISPATCH_WAKEUP_WAITER_HANDOFF))
-			&& target != DISPATCH_QUEUE_WAKEUP_MGR) {
-		return _dispatch_queue_class_wakeup_with_override(dq, qos,
-				flags, new_state);
-	}
-#endif
-
-	if (flags & DISPATCH_WAKEUP_CONSUME) {
-		return _dispatch_release_tailcall(dq);
-	}
-}
-
-DISPATCH_NOINLINE
-void
 _dispatch_queue_class_wakeup(dispatch_queue_t dq, dispatch_qos_t qos,
 		dispatch_wakeup_flags_t flags, dispatch_queue_wakeup_target_t target)
 {
-	uint64_t old_state, new_state;
+	dispatch_assert(target != DISPATCH_QUEUE_WAKEUP_WAIT_FOR_EVENT);
 
-	qos = _dispatch_queue_override_qos(dq, qos);
-	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, release, {
-		new_state = _dq_state_merge_qos(old_state, qos);
-		if (likely(_dq_state_should_wakeup(old_state))) {
-			new_state |= DISPATCH_QUEUE_ENQUEUED;
-		}
-		if (flags & DISPATCH_WAKEUP_FLUSH) {
-			new_state |= DISPATCH_QUEUE_DIRTY;
-		} else if (new_state == old_state) {
-			os_atomic_rmw_loop_give_up(break);
-		}
-	});
+	if (target && !(flags & DISPATCH_WAKEUP_CONSUME_2)) {
+		_dispatch_retain_2(dq);
+		flags |= DISPATCH_WAKEUP_CONSUME_2;
+	}
 
-	return _dispatch_queue_class_wakeup_finish(dq, qos, flags, target,
-			old_state, new_state);
+	if (unlikely(flags & DISPATCH_WAKEUP_BARRIER_COMPLETE)) {
+		//
+		// _dispatch_queue_class_barrier_complete() is about what both regular
+		// queues and sources needs to evaluate, but the former can have sync
+		// handoffs to perform which _dispatch_queue_class_barrier_complete()
+		// doesn't handle, only _dispatch_queue_barrier_complete() does.
+		//
+		// _dispatch_queue_wakeup() is the one for plain queues that calls
+		// _dispatch_queue_barrier_complete(), and this is only taken for non
+		// queue types.
+		//
+		dispatch_assert(dx_metatype(dq) != _DISPATCH_QUEUE_TYPE);
+		return _dispatch_queue_class_barrier_complete(dq, qos, flags, target,
+				DISPATCH_QUEUE_SERIAL_DRAIN_OWNED);
+	}
+
+	if (target) {
+		uint64_t old_state, new_state, enqueue = DISPATCH_QUEUE_ENQUEUED;
+		if (target == DISPATCH_QUEUE_WAKEUP_MGR) {
+			enqueue = DISPATCH_QUEUE_ENQUEUED_ON_MGR;
+		}
+		qos = _dispatch_queue_override_qos(dq, qos);
+		os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, release, {
+			new_state = _dq_state_merge_qos(old_state, qos);
+			if (likely(!_dq_state_is_suspended(old_state) &&
+					!_dq_state_is_enqueued(old_state) &&
+					(!_dq_state_drain_locked(old_state) ||
+					(enqueue != DISPATCH_QUEUE_ENQUEUED_ON_MGR &&
+					_dq_state_is_base_wlh(old_state))))) {
+				new_state |= enqueue;
+			}
+			if (flags & DISPATCH_WAKEUP_MAKE_DIRTY) {
+				new_state |= DISPATCH_QUEUE_DIRTY;
+			} else if (new_state == old_state) {
+				os_atomic_rmw_loop_give_up(goto done);
+			}
+		});
+
+		if (likely((old_state ^ new_state) & enqueue)) {
+			dispatch_queue_t tq;
+			if (target == DISPATCH_QUEUE_WAKEUP_TARGET) {
+				// the rmw_loop above has no acquire barrier, as the last block
+				// of a queue asyncing to that queue is not an uncommon pattern
+				// and in that case the acquire would be completely useless
+				//
+				// so instead use depdendency ordering to read
+				// the targetq pointer.
+				os_atomic_thread_fence(dependency);
+				tq = os_atomic_load_with_dependency_on2o(dq, do_targetq,
+						(long)new_state);
+			} else {
+				tq = target;
+			}
+			dispatch_assert(_dq_state_is_enqueued(new_state));
+			return _dispatch_queue_push_queue(tq, dq, new_state);
+		}
+#if HAVE_PTHREAD_WORKQUEUE_QOS
+		if (unlikely((old_state ^ new_state) & DISPATCH_QUEUE_MAX_QOS_MASK)) {
+			if (_dq_state_should_override(new_state)) {
+				return _dispatch_queue_class_wakeup_with_override(dq, new_state,
+						flags);
+			}
+		}
+	} else if (qos) {
+		//
+		// Someone is trying to override the last work item of the queue.
+		//
+		uint64_t old_state, new_state;
+		os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, relaxed, {
+			if (!_dq_state_drain_locked(old_state) ||
+					!_dq_state_is_enqueued(old_state)) {
+				os_atomic_rmw_loop_give_up(goto done);
+			}
+			new_state = _dq_state_merge_qos(old_state, qos);
+			if (new_state == old_state) {
+				os_atomic_rmw_loop_give_up(goto done);
+			}
+		});
+		if (_dq_state_should_override(new_state)) {
+			return _dispatch_queue_class_wakeup_with_override(dq, new_state,
+					flags);
+		}
+#endif // HAVE_PTHREAD_WORKQUEUE_QOS
+	}
+done:
+	if (likely(flags & DISPATCH_WAKEUP_CONSUME_2)) {
+		return _dispatch_release_2_tailcall(dq);
+	}
 }
 
 DISPATCH_NOINLINE
 static void
 _dispatch_queue_push_sync_waiter(dispatch_queue_t dq,
-		dispatch_sync_context_t dsc)
+		dispatch_sync_context_t dsc, dispatch_qos_t qos)
 {
-	uint64_t pending_barrier_width =
-			(dq->dq_width - 1) * DISPATCH_QUEUE_WIDTH_INTERVAL;
-	uint64_t xor_owner_and_set_full_width_and_in_barrier =
-			_dispatch_tid_self() | DISPATCH_QUEUE_WIDTH_FULL_BIT |
-			DISPATCH_QUEUE_IN_BARRIER;
-	dispatch_qos_t qos = _dispatch_continuation_override_qos(dq, dsc->_as_dc);
 	uint64_t old_state, new_state;
-	dispatch_wakeup_flags_t flags = 0;
+
+	if (unlikely(dx_type(dq) == DISPATCH_QUEUE_NETWORK_EVENT_TYPE)) {
+		DISPATCH_CLIENT_CRASH(0,
+				"dispatch_sync onto a network event queue");
+	}
 
 	_dispatch_trace_continuation_push(dq, dsc->_as_dc);
+
 	if (unlikely(_dispatch_queue_push_update_tail(dq, dsc->_as_do))) {
 		// for slow waiters, we borrow the reference of the caller
 		// so we don't need to protect the wakeup with a temporary retain
 		_dispatch_queue_push_update_head(dq, dsc->_as_do);
-		flags = DISPATCH_WAKEUP_FLUSH;
 		if (unlikely(_dispatch_queue_is_thread_bound(dq))) {
-			return dx_wakeup(dq, qos, flags);
+			return dx_wakeup(dq, qos, DISPATCH_WAKEUP_MAKE_DIRTY);
 		}
-	}
 
-	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, release, {
-		new_state = _dq_state_merge_qos(old_state, qos);
-#ifdef DLOCK_NOWAITERS_BIT
-		new_state |= DLOCK_NOWAITERS_BIT;
-#else
-		new_state |= DLOCK_WAITERS_BIT;
-#endif
-		if (flags & DISPATCH_WAKEUP_FLUSH) {
+		uint64_t pending_barrier_width =
+				(dq->dq_width - 1) * DISPATCH_QUEUE_WIDTH_INTERVAL;
+		uint64_t set_owner_and_set_full_width_and_in_barrier =
+				_dispatch_lock_value_for_self() |
+				DISPATCH_QUEUE_WIDTH_FULL_BIT | DISPATCH_QUEUE_IN_BARRIER;
+		// similar to _dispatch_queue_drain_try_unlock()
+		os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, release, {
+			new_state  = _dq_state_merge_qos(old_state, qos);
 			new_state |= DISPATCH_QUEUE_DIRTY;
-		}
-		if (_dq_state_drain_pended(old_state)) {
-			// same as DISPATCH_QUEUE_DRAIN_UNLOCK
-			// but we want to be more efficient wrt the WAITERS_BIT
-			new_state &= ~DISPATCH_QUEUE_DRAIN_OWNER_MASK;
-			new_state &= ~DISPATCH_QUEUE_DRAIN_PENDED;
-		}
-		if (unlikely(_dq_state_drain_locked(new_state))) {
-#ifdef DLOCK_NOWAITERS_BIT
-			new_state &= ~(uint64_t)DLOCK_NOWAITERS_BIT;
-#endif
-		} else if (unlikely(!_dq_state_is_runnable(new_state) ||
-				!(flags & DISPATCH_WAKEUP_FLUSH))) {
-			// either not runnable, or was not for the first item (26700358)
-			// so we should not try to lock and handle overrides instead
-		} else if (_dq_state_has_pending_barrier(old_state) ||
-				new_state + pending_barrier_width <
-				DISPATCH_QUEUE_WIDTH_FULL_BIT) {
-			// see _dispatch_queue_drain_try_lock
-			new_state &= DISPATCH_QUEUE_DRAIN_PRESERVED_BITS_MASK;
-			new_state ^= xor_owner_and_set_full_width_and_in_barrier;
-		} else {
-			new_state |= DISPATCH_QUEUE_ENQUEUED;
-		}
-	});
+			if (unlikely(_dq_state_drain_locked(old_state) ||
+					!_dq_state_is_runnable(old_state))) {
+				// not runnable, so we should just handle overrides
+			} else if (_dq_state_is_base_wlh(old_state) &&
+					_dq_state_is_enqueued(old_state)) {
+				// 32123779 let the event thread redrive since it's out already
+			} else if (_dq_state_has_pending_barrier(old_state) ||
+					new_state + pending_barrier_width <
+					DISPATCH_QUEUE_WIDTH_FULL_BIT) {
+				// see _dispatch_queue_drain_try_lock
+				new_state &= DISPATCH_QUEUE_DRAIN_PRESERVED_BITS_MASK;
+				new_state |= set_owner_and_set_full_width_and_in_barrier;
+			}
+		});
 
-	if ((old_state ^ new_state) & DISPATCH_QUEUE_IN_BARRIER) {
-		return _dispatch_try_lock_transfer_or_wakeup(dq);
+		if (_dq_state_is_base_wlh(old_state) &&
+				(dsc->dsc_waiter == _dispatch_tid_self())) {
+			dsc->dsc_wlh_was_first = true;
+		}
+
+		if ((old_state ^ new_state) & DISPATCH_QUEUE_IN_BARRIER) {
+			return _dispatch_queue_barrier_complete(dq, qos, 0);
+		}
+#if HAVE_PTHREAD_WORKQUEUE_QOS
+		if (unlikely((old_state ^ new_state) & DISPATCH_QUEUE_MAX_QOS_MASK)) {
+			if (_dq_state_should_override(new_state)) {
+				return _dispatch_queue_class_wakeup_with_override(dq,
+						new_state, 0);
+			}
+		}
+	} else if (unlikely(qos)) {
+		os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, relaxed, {
+			new_state = _dq_state_merge_qos(old_state, qos);
+			if (old_state == new_state) {
+				os_atomic_rmw_loop_give_up(return);
+			}
+		});
+		if (_dq_state_should_override(new_state)) {
+			return _dispatch_queue_class_wakeup_with_override(dq, new_state, 0);
+		}
+#endif // HAVE_PTHREAD_WORKQUEUE_QOS
 	}
-
-	return _dispatch_queue_class_wakeup_finish(dq, qos, flags,
-			DISPATCH_QUEUE_WAKEUP_TARGET, old_state, new_state);
 }
 
 #pragma mark -
@@ -5381,37 +5627,88 @@
 	return head;
 }
 
+#if DISPATCH_USE_KEVENT_WORKQUEUE
 void
-_dispatch_root_queue_drain_deferred_item(dispatch_queue_t rq,
-		dispatch_queue_t dq DISPATCH_PERF_MON_ARGS_PROTO)
+_dispatch_root_queue_drain_deferred_wlh(dispatch_deferred_items_t ddi
+		DISPATCH_PERF_MON_ARGS_PROTO)
 {
-	// fake that we queued `dq` on `rq` for introspection purposes
-	_dispatch_trace_continuation_push(rq, dq);
-
+	dispatch_queue_t rq = ddi->ddi_stashed_rq;
+	dispatch_queue_t dq = ddi->ddi_stashed_dou._dq;
 	_dispatch_queue_set_current(rq);
-	dispatch_priority_t old_pri = _dispatch_set_basepri(rq->dq_priority);
-#if DISPATCH_COCOA_COMPAT
-	void *pool = _dispatch_last_resort_autorelease_pool_push();
-#endif // DISPATCH_COCOA_COMPAT
-
+	dispatch_priority_t old_pri = _dispatch_set_basepri_wlh(rq->dq_priority);
 	dispatch_invoke_context_s dic = { };
 	dispatch_invoke_flags_t flags = DISPATCH_INVOKE_WORKER_DRAIN |
-			DISPATCH_INVOKE_REDIRECTING_DRAIN;
+			DISPATCH_INVOKE_REDIRECTING_DRAIN | DISPATCH_INVOKE_WLH;
 	_dispatch_queue_drain_init_narrowing_check_deadline(&dic, rq->dq_priority);
-	_dispatch_continuation_pop_inline(dq, &dic, flags, rq);
+	uint64_t dq_state;
+
+	ddi->ddi_wlh_servicing = true;
+	if (unlikely(_dispatch_needs_to_return_to_kernel())) {
+		_dispatch_return_to_kernel();
+	}
+retry:
+	dispatch_assert(ddi->ddi_wlh_needs_delete);
+	_dispatch_trace_continuation_pop(rq, dq);
+
+	if (_dispatch_queue_drain_try_lock_wlh(dq, &dq_state)) {
+		dx_invoke(dq, &dic, flags);
+		if (!ddi->ddi_wlh_needs_delete) {
+			goto park;
+		}
+		dq_state = os_atomic_load2o(dq, dq_state, relaxed);
+		if (unlikely(_dq_state_is_enqueued_on_target(dq_state))) {
+			_dispatch_retain(dq);
+			_dispatch_trace_continuation_push(dq->do_targetq, dq);
+			goto retry;
+		}
+	} else {
+		_dispatch_release_no_dispose(dq);
+	}
+
+	_dispatch_event_loop_leave_deferred((dispatch_wlh_t)dq, dq_state);
+
+park:
 	// event thread that could steal
 	_dispatch_perfmon_end(perfmon_thread_event_steal);
-
-#if DISPATCH_COCOA_COMPAT
-	_dispatch_last_resort_autorelease_pool_pop(pool);
-#endif // DISPATCH_COCOA_COMPAT
 	_dispatch_reset_basepri(old_pri);
+	_dispatch_reset_basepri_override();
 	_dispatch_queue_set_current(NULL);
 
 	_dispatch_voucher_debug("root queue clear", NULL);
 	_dispatch_reset_voucher(NULL, DISPATCH_THREAD_PARK);
 }
 
+void
+_dispatch_root_queue_drain_deferred_item(dispatch_deferred_items_t ddi
+		DISPATCH_PERF_MON_ARGS_PROTO)
+{
+	dispatch_queue_t rq = ddi->ddi_stashed_rq;
+	_dispatch_queue_set_current(rq);
+	dispatch_priority_t old_pri = _dispatch_set_basepri(rq->dq_priority);
+
+	dispatch_invoke_context_s dic = { };
+	dispatch_invoke_flags_t flags = DISPATCH_INVOKE_WORKER_DRAIN |
+			DISPATCH_INVOKE_REDIRECTING_DRAIN;
+#if DISPATCH_COCOA_COMPAT
+	_dispatch_last_resort_autorelease_pool_push(&dic);
+#endif // DISPATCH_COCOA_COMPAT
+	_dispatch_queue_drain_init_narrowing_check_deadline(&dic, rq->dq_priority);
+	_dispatch_continuation_pop_inline(ddi->ddi_stashed_dou, &dic, flags, rq);
+
+	// event thread that could steal
+	_dispatch_perfmon_end(perfmon_thread_event_steal);
+#if DISPATCH_COCOA_COMPAT
+	_dispatch_last_resort_autorelease_pool_pop(&dic);
+#endif // DISPATCH_COCOA_COMPAT
+	_dispatch_reset_basepri(old_pri);
+	_dispatch_reset_basepri_override();
+	_dispatch_queue_set_current(NULL);
+
+	_dispatch_voucher_debug("root queue clear", NULL);
+	_dispatch_reset_voucher(NULL, DISPATCH_THREAD_PARK);
+}
+#endif
+
 DISPATCH_NOT_TAIL_CALLED // prevent tailcall (for Instrument DTrace probe)
 static void
 _dispatch_root_queue_drain(dispatch_queue_t dq, pthread_priority_t pp)
@@ -5426,14 +5723,14 @@
 	dispatch_priority_t pri = dq->dq_priority;
 	if (!pri) pri = _dispatch_priority_from_pp(pp);
 	dispatch_priority_t old_dbp = _dispatch_set_basepri(pri);
-	_dispatch_set_wlh(DISPATCH_WLH_GLOBAL);
-#if DISPATCH_COCOA_COMPAT
-	void *pool = _dispatch_last_resort_autorelease_pool_push();
-#endif // DISPATCH_COCOA_COMPAT
+	_dispatch_adopt_wlh_anon();
 
 	struct dispatch_object_s *item;
 	bool reset = false;
 	dispatch_invoke_context_s dic = { };
+#if DISPATCH_COCOA_COMPAT
+	_dispatch_last_resort_autorelease_pool_push(&dic);
+#endif // DISPATCH_COCOA_COMPAT
 	dispatch_invoke_flags_t flags = DISPATCH_INVOKE_WORKER_DRAIN |
 			DISPATCH_INVOKE_REDIRECTING_DRAIN;
 	_dispatch_queue_drain_init_narrowing_check_deadline(&dic, pri);
@@ -5455,10 +5752,11 @@
 	}
 
 #if DISPATCH_COCOA_COMPAT
-	_dispatch_last_resort_autorelease_pool_pop(pool);
+	_dispatch_last_resort_autorelease_pool_pop(&dic);
 #endif // DISPATCH_COCOA_COMPAT
 	_dispatch_reset_wlh();
 	_dispatch_reset_basepri(old_dbp);
+	_dispatch_reset_basepri_override();
 	_dispatch_queue_set_current(NULL);
 }
 
@@ -5560,13 +5858,25 @@
 #endif
 	(void)os_atomic_inc2o(qc, dgq_thread_pool_size, release);
 	_dispatch_global_queue_poke(dq, 1, 0);
-	_dispatch_release(dq);
-
+	_dispatch_release(dq); // retained in _dispatch_global_queue_poke_slow
 	return NULL;
 }
 #endif // DISPATCH_USE_PTHREAD_POOL
 
 #pragma mark -
+#pragma mark dispatch_network_root_queue
+#if TARGET_OS_MAC
+
+dispatch_queue_t
+_dispatch_network_root_queue_create_4NW(const char *label,
+		const pthread_attr_t *attrs, dispatch_block_t configure)
+{
+	unsigned long flags = dispatch_pthread_root_queue_flags_pool_size(1);
+	return dispatch_pthread_root_queue_create(label, flags, attrs, configure);
+}
+
+#endif // TARGET_OS_MAC
+#pragma mark -
 #pragma mark dispatch_runloop_queue
 
 static bool _dispatch_program_is_probably_callback_driven;
@@ -5583,11 +5893,11 @@
 		return DISPATCH_BAD_INPUT;
 	}
 	dqs = sizeof(struct dispatch_queue_s) - DISPATCH_QUEUE_CACHELINE_PAD;
-	dq = _dispatch_alloc(DISPATCH_VTABLE(queue_runloop), dqs);
-	_dispatch_queue_init(dq, DQF_THREAD_BOUND | DQF_CANNOT_TRYSYNC, 1, false);
+	dq = _dispatch_object_alloc(DISPATCH_VTABLE(queue_runloop), dqs);
+	_dispatch_queue_init(dq, DQF_THREAD_BOUND | DQF_CANNOT_TRYSYNC, 1,
+			DISPATCH_QUEUE_ROLE_BASE_ANON);
 	dq->do_targetq = _dispatch_get_root_queue(DISPATCH_QOS_DEFAULT, true);
 	dq->dq_label = label ? label : "runloop-queue"; // no-copy contract
-	dq->dq_wlh = DISPATCH_WLH_GLOBAL;
 	_dispatch_runloop_queue_handle_init(dq);
 	_dispatch_queue_set_bound_thread(dq);
 	_dispatch_object_debug(dq, "%s", __func__);
@@ -5599,19 +5909,19 @@
 {
 	_dispatch_object_debug(dq, "%s", __func__);
 
-	dispatch_qos_t qos = _dispatch_queue_reset_max_qos(dq);
+	dispatch_qos_t qos = _dispatch_runloop_queue_reset_max_qos(dq);
 	_dispatch_queue_clear_bound_thread(dq);
-	dx_wakeup(dq, qos, DISPATCH_WAKEUP_FLUSH);
+	dx_wakeup(dq, qos, DISPATCH_WAKEUP_MAKE_DIRTY);
 	if (qos) _dispatch_thread_override_end(DISPATCH_QUEUE_DRAIN_OWNER(dq), dq);
 }
 
 void
-_dispatch_runloop_queue_dispose(dispatch_queue_t dq)
+_dispatch_runloop_queue_dispose(dispatch_queue_t dq, bool *allow_free)
 {
 	_dispatch_object_debug(dq, "%s", __func__);
 	_dispatch_introspection_queue_dispose(dq);
 	_dispatch_runloop_queue_handle_dispose(dq);
-	_dispatch_queue_destroy(dq);
+	_dispatch_queue_destroy(dq, allow_free);
 }
 
 bool
@@ -5846,7 +6156,7 @@
 		new_state += DISPATCH_QUEUE_IN_BARRIER;
 	});
 	_dispatch_queue_atomic_flags_clear(dq, DQF_THREAD_BOUND|DQF_CANNOT_TRYSYNC);
-	_dispatch_try_lock_transfer_or_wakeup(dq);
+	_dispatch_queue_barrier_complete(dq, 0, 0);
 
 	// overload the "probably" variable to mean that dispatch_main() or
 	// similar non-POSIX API was called
@@ -5878,12 +6188,13 @@
 			"Premature thread exit while a dispatch queue is running");
 }
 
-DISPATCH_NORETURN
 static void
 _dispatch_wlh_cleanup(void *ctxt)
 {
 	// POSIX defines that destructors are only called if 'ctxt' is non-null
-	DISPATCH_INTERNAL_CRASH(ctxt, "Premature thread exit with active wlh");
+	dispatch_queue_t wlh;
+	wlh = (dispatch_queue_t)((uintptr_t)ctxt & ~DISPATCH_WLH_STORAGE_REF);
+	_dispatch_queue_release_storage(wlh);
 }
 
 DISPATCH_NORETURN
diff --git a/src/queue_internal.h b/src/queue_internal.h
index 91a3186..c1d0f6e 100644
--- a/src/queue_internal.h
+++ b/src/queue_internal.h
@@ -44,6 +44,9 @@
 #define DISPATCH_CACHELINE_ALIGN \
 		__attribute__((__aligned__(DISPATCH_CACHELINE_SIZE)))
 
+#define DISPATCH_CACHELINE_PAD_SIZE(type) \
+		(roundup(sizeof(type), DISPATCH_CACHELINE_SIZE) - sizeof(type))
+
 
 #pragma mark -
 #pragma mark dispatch_queue_t
@@ -60,7 +63,6 @@
 	DQF_CANNOT_TRYSYNC      = 0x00400000,
 	DQF_RELEASED            = 0x00800000, // xref_cnt == -1
 	DQF_LEGACY              = 0x01000000,
-	DQF_WLH_CHANGED         = 0x02000000, // queue wlh changed from initial value
 
 	// only applies to sources
 	//
@@ -99,6 +101,7 @@
 	//    `a` cannot do a cleared -> set transition anymore
 	//    (see _dispatch_source_try_set_armed).
 	//
+	DSF_WLH_CHANGED         = 0x04000000,
 	DSF_CANCEL_WAITER       = 0x08000000, // synchronous waiters for cancel
 	DSF_CANCELED            = 0x10000000, // cancellation has been requested
 	DSF_ARMED               = 0x20000000, // source is armed
@@ -115,10 +118,6 @@
 	struct os_mpsc_queue_s _as_oq[0]; \
 	DISPATCH_OBJECT_HEADER(x); \
 	_OS_MPSC_QUEUE_FIELDS(dq, dq_state); \
-	DISPATCH_UNION_LE(uint32_t volatile dq_atomic_flags, \
-		const uint16_t dq_width, \
-		const uint16_t __dq_opaque \
-	); \
 	uint32_t dq_side_suspend_cnt; \
 	dispatch_unfair_lock_s dq_sidelock; \
 	union { \
@@ -127,27 +126,26 @@
 		struct dispatch_timer_source_refs_s *ds_timer_refs; \
 		struct dispatch_mach_recv_refs_s *dm_recv_refs; \
 	}; \
+	DISPATCH_UNION_LE(uint32_t volatile dq_atomic_flags, \
+		const uint16_t dq_width, \
+		const uint16_t __dq_opaque \
+	); \
 	DISPATCH_INTROSPECTION_QUEUE_HEADER
+	/* LP64: 32bit hole */
 
 #define DISPATCH_QUEUE_HEADER(x) \
 	struct dispatch_queue_s _as_dq[0]; \
 	_DISPATCH_QUEUE_HEADER(x)
 
+struct _dispatch_unpadded_queue_s {
+	_DISPATCH_QUEUE_HEADER(dummy);
+};
+
+#define DISPATCH_QUEUE_CACHELINE_PAD \
+		DISPATCH_CACHELINE_PAD_SIZE(struct _dispatch_unpadded_queue_s)
+
 #define DISPATCH_QUEUE_CACHELINE_PADDING \
 		char _dq_pad[DISPATCH_QUEUE_CACHELINE_PAD]
-#ifdef __LP64__
-#define DISPATCH_QUEUE_CACHELINE_PAD (( \
-		(sizeof(uint32_t) - DISPATCH_INTROSPECTION_QUEUE_HEADER_SIZE) \
-		+ DISPATCH_CACHELINE_SIZE) % DISPATCH_CACHELINE_SIZE)
-#elif OS_OBJECT_HAVE_OBJC1
-#define DISPATCH_QUEUE_CACHELINE_PAD (( \
-		(11*sizeof(void*) - DISPATCH_INTROSPECTION_QUEUE_HEADER_SIZE) \
-		+ DISPATCH_CACHELINE_SIZE) % DISPATCH_CACHELINE_SIZE)
-#else
-#define DISPATCH_QUEUE_CACHELINE_PAD (( \
-		(12*sizeof(void*) - DISPATCH_INTROSPECTION_QUEUE_HEADER_SIZE) \
-		+ DISPATCH_CACHELINE_SIZE) % DISPATCH_CACHELINE_SIZE)
-#endif
 
 /*
  * dispatch queues `dq_state` demystified
@@ -157,27 +155,27 @@
  * Most Significant 32 bit Word
  * ----------------------------
  *
- * sc: suspend count (bits 63 - 57)
+ * sc: suspend count (bits 63 - 58)
  *    The suspend count unsurprisingly holds the suspend count of the queue
  *    Only 7 bits are stored inline. Extra counts are transfered in a side
  *    suspend count and when that has happened, the ssc: bit is set.
  */
-#define DISPATCH_QUEUE_SUSPEND_INTERVAL		0x0200000000000000ull
-#define DISPATCH_QUEUE_SUSPEND_HALF			0x40u
+#define DISPATCH_QUEUE_SUSPEND_INTERVAL		0x0400000000000000ull
+#define DISPATCH_QUEUE_SUSPEND_HALF			0x20u
 /*
- * ssc: side suspend count (bit 56)
+ * ssc: side suspend count (bit 57)
  *    This bit means that the total suspend count didn't fit in the inline
  *    suspend count, and that there are additional suspend counts stored in the
  *    `dq_side_suspend_cnt` field.
  */
-#define DISPATCH_QUEUE_HAS_SIDE_SUSPEND_CNT	0x0100000000000000ull
+#define DISPATCH_QUEUE_HAS_SIDE_SUSPEND_CNT	0x0200000000000000ull
 /*
- * i: inactive bit (bit 55)
+ * i: inactive bit (bit 56)
  *    This bit means that the object is inactive (see dispatch_activate)
  */
-#define DISPATCH_QUEUE_INACTIVE				0x0080000000000000ull
+#define DISPATCH_QUEUE_INACTIVE				0x0100000000000000ull
 /*
- * na: needs activation (bit 54)
+ * na: needs activation (bit 55)
  *    This bit is set if the object is created inactive. It tells
  *    dispatch_queue_wakeup to perform various tasks at first wakeup.
  *
@@ -185,24 +183,24 @@
  *    the object from being woken up (because _dq_state_should_wakeup will say
  *    no), except in the dispatch_activate/dispatch_resume codepath.
  */
-#define DISPATCH_QUEUE_NEEDS_ACTIVATION		0x0040000000000000ull
+#define DISPATCH_QUEUE_NEEDS_ACTIVATION		0x0080000000000000ull
 /*
  * This mask covers the suspend count (sc), side suspend count bit (ssc),
  * inactive (i) and needs activation (na) bits
  */
-#define DISPATCH_QUEUE_SUSPEND_BITS_MASK	0xffc0000000000000ull
+#define DISPATCH_QUEUE_SUSPEND_BITS_MASK	0xff80000000000000ull
 /*
- * ib: in barrier (bit 53)
+ * ib: in barrier (bit 54)
  *    This bit is set when the queue is currently executing a barrier
  */
-#define DISPATCH_QUEUE_IN_BARRIER			0x0020000000000000ull
+#define DISPATCH_QUEUE_IN_BARRIER			0x0040000000000000ull
 /*
- * qf: queue full (bit 52)
+ * qf: queue full (bit 53)
  *    This bit is a subtle hack that allows to check for any queue width whether
  *    the full width of the queue is used or reserved (depending on the context)
  *    In other words that the queue has reached or overflown its capacity.
  */
-#define DISPATCH_QUEUE_WIDTH_FULL_BIT		0x0010000000000000ull
+#define DISPATCH_QUEUE_WIDTH_FULL_BIT		0x0020000000000000ull
 #define DISPATCH_QUEUE_WIDTH_FULL			0x1000ull
 #define DISPATCH_QUEUE_WIDTH_POOL (DISPATCH_QUEUE_WIDTH_FULL - 1)
 #define DISPATCH_QUEUE_WIDTH_MAX  (DISPATCH_QUEUE_WIDTH_FULL - 2)
@@ -210,7 +208,7 @@
 		({ uint16_t _width = (width); \
 		_width > 1 && _width < DISPATCH_QUEUE_WIDTH_POOL; })
 /*
- * w:  width (bits 51 - 40)
+ * w:  width (bits 52 - 41)
  *    This encodes how many work items are in flight. Barriers hold `dq_width`
  *    of them while they run. This is encoded as a signed offset with respect,
  *    to full use, where the negative values represent how many available slots
@@ -219,29 +217,19 @@
  *
  *    When this value is positive, then `wo` is always set to 1.
  */
-#define DISPATCH_QUEUE_WIDTH_INTERVAL		0x0000010000000000ull
-#define DISPATCH_QUEUE_WIDTH_MASK			0x001fff0000000000ull
-#define DISPATCH_QUEUE_WIDTH_SHIFT			40
+#define DISPATCH_QUEUE_WIDTH_INTERVAL		0x0000020000000000ull
+#define DISPATCH_QUEUE_WIDTH_MASK			0x003ffe0000000000ull
+#define DISPATCH_QUEUE_WIDTH_SHIFT			41
 /*
- * pb: pending barrier (bit 39)
+ * pb: pending barrier (bit 40)
  *    Drainers set this bit when they couldn't run the next work item and it is
  *    a barrier. When this bit is set, `dq_width - 1` work item slots are
  *    reserved so that no wakeup happens until the last work item in flight
  *    completes.
  */
-#define DISPATCH_QUEUE_PENDING_BARRIER		0x0000008000000000ull
+#define DISPATCH_QUEUE_PENDING_BARRIER		0x0000010000000000ull
 /*
- * p: pended bit (bit 38)
- *    Set when a drain lock has been pended. When this bit is set,
- *    the drain lock is taken and ENQUEUED is never set.
- *
- *    This bit marks a queue that needs further processing but was kept pended
- *    by an async drainer (not reenqueued) in the hope of being able to drain
- *    it further later.
- */
-#define DISPATCH_QUEUE_DRAIN_PENDED			0x0000004000000000ull
-/*
- * d: dirty bit (bit 37)
+ * d: dirty bit (bit 39)
  *    This bit is set when a queue transitions from empty to not empty.
  *    This bit is set before dq_items_head is set, with appropriate barriers.
  *    Any thread looking at a queue head is responsible for unblocking any
@@ -353,18 +341,40 @@
  *
  *    So on the async "acquire" side, there is no subtlety at all.
  */
-#define DISPATCH_QUEUE_DIRTY				0x0000002000000000ull
+#define DISPATCH_QUEUE_DIRTY				0x0000008000000000ull
 /*
- * e: enqueued bit (bit 36)
- *    Set when a queue is enqueued on its target queue
+ * md: enqueued/draining on manager (bit 38)
+ *    Set when enqueued and draining on the manager hierarchy.
+ *
+ *    Unlike the ENQUEUED bit, it is kept until the queue is unlocked from its
+ *    invoke call on the manager. This is used to prevent stealing, and
+ *    overrides to be applied down the target queue chain.
  */
-#define DISPATCH_QUEUE_ENQUEUED				0x0000001000000000ull
+#define DISPATCH_QUEUE_ENQUEUED_ON_MGR		0x0000004000000000ull
 /*
- * o: has override (bits 34)
+ * r: queue graph role (bits 37 - 36)
+ *    Queue role in the target queue graph
+ *
+ *    11: unused
+ *    10: WLH base
+ *    01: non wlh base
+ *    00: inner queue
+ */
+#define DISPATCH_QUEUE_ROLE_MASK			0x0000003000000000ull
+#define DISPATCH_QUEUE_ROLE_BASE_WLH		0x0000002000000000ull
+#define DISPATCH_QUEUE_ROLE_BASE_ANON		0x0000001000000000ull
+#define DISPATCH_QUEUE_ROLE_INNER			0x0000000000000000ull
+/*
+ * o: has override (bit 35, if role is DISPATCH_QUEUE_ROLE_BASE_ANON)
  *    Set when a queue has received a QOS override and needs to reset it.
  *    This bit is only cleared when the final drain_try_unlock() succeeds.
+ *
+ * sw: has received sync wait (bit 35, if role DISPATCH_QUEUE_ROLE_BASE_WLH)
+ *    Set when a queue owner has been exposed to the kernel because of
+ *    dispatch_sync() contention.
  */
 #define DISPATCH_QUEUE_RECEIVED_OVERRIDE	0x0000000800000000ull
+#define DISPATCH_QUEUE_RECEIVED_SYNC_WAIT	0x0000000800000000ull
 /*
  * max_qos: max qos (bits 34 - 32)
  *   This is the maximum qos that has been enqueued on the queue
@@ -376,27 +386,25 @@
  *    This is used by the normal drain to drain exlusively relative to other
  *    drain stealers (like the QoS Override codepath). It holds the identity
  *    (thread port) of the current drainer.
+ *
+ * st: sync transfer (bit 1 or 30)
+ *    Set when a dispatch_sync() is transferred to
+ *
+ * e: enqueued bit (bit 0 or 31)
+ *    Set when a queue is enqueued on its target queue
  */
-#define DISPATCH_QUEUE_DRAIN_UNLOCK_MASK	(DISPATCH_QUEUE_DRAIN_PENDED | ~0u)
-#ifdef DLOCK_NOWAITERS_BIT
-#define DISPATCH_QUEUE_DRAIN_OWNER_MASK \
-		((uint64_t)(DLOCK_OWNER_MASK | DLOCK_NOFAILED_TRYLOCK_BIT))
-#define DISPATCH_QUEUE_DRAIN_UNLOCK(v) \
-		(((v) & ~(DISPATCH_QUEUE_DIRTY | DISPATCH_QUEUE_DRAIN_PENDED \
-				| DISPATCH_QUEUE_DRAIN_OWNER_MASK)) ^ DLOCK_NOWAITERS_BIT)
+#define DISPATCH_QUEUE_DRAIN_OWNER_MASK		((uint64_t)DLOCK_OWNER_MASK)
+#define DISPATCH_QUEUE_SYNC_TRANSFER		((uint64_t)DLOCK_FAILED_TRYLOCK_BIT)
+#define DISPATCH_QUEUE_ENQUEUED				((uint64_t)DLOCK_WAITERS_BIT)
+
 #define DISPATCH_QUEUE_DRAIN_PRESERVED_BITS_MASK \
-		(DISPATCH_QUEUE_ENQUEUED | DISPATCH_QUEUE_MAX_QOS_MASK | \
-				DLOCK_NOWAITERS_BIT)
-#else
-#define DISPATCH_QUEUE_DRAIN_OWNER_MASK \
-		((uint64_t)(DLOCK_OWNER_MASK | DLOCK_FAILED_TRYLOCK_BIT))
-#define DISPATCH_QUEUE_DRAIN_UNLOCK(v) \
-		((v) & ~(DISPATCH_QUEUE_DIRTY | DISPATCH_QUEUE_DRAIN_PENDED | \
-				DISPATCH_QUEUE_DRAIN_OWNER_MASK))
-#define DISPATCH_QUEUE_DRAIN_PRESERVED_BITS_MASK \
-		(DISPATCH_QUEUE_ENQUEUED | DISPATCH_QUEUE_MAX_QOS_MASK | \
-				DLOCK_WAITERS_BIT)
-#endif
+		(DISPATCH_QUEUE_ENQUEUED_ON_MGR | DISPATCH_QUEUE_ENQUEUED | \
+		DISPATCH_QUEUE_ROLE_MASK | DISPATCH_QUEUE_MAX_QOS_MASK)
+
+#define DISPATCH_QUEUE_DRAIN_UNLOCK_MASK \
+		(DISPATCH_QUEUE_DRAIN_OWNER_MASK | DISPATCH_QUEUE_RECEIVED_OVERRIDE | \
+		DISPATCH_QUEUE_RECEIVED_SYNC_WAIT | DISPATCH_QUEUE_SYNC_TRANSFER)
+
 /*
  *******************************************************************************
  *
@@ -418,8 +426,6 @@
  * that right. To do so, prior to taking any decision, they also try to own
  * the full "barrier" width on the given queue.
  *
- * see _dispatch_try_lock_transfer_or_wakeup
- *
  *******************************************************************************
  *
  * Enqueuing and wakeup rules
@@ -490,11 +496,16 @@
 		(DISPATCH_QUEUE_IN_BARRIER | DISPATCH_QUEUE_WIDTH_INTERVAL)
 
 DISPATCH_CLASS_DECL(queue);
+
 #if !defined(__cplusplus) || !DISPATCH_INTROSPECTION
 struct dispatch_queue_s {
 	_DISPATCH_QUEUE_HEADER(queue);
 	DISPATCH_QUEUE_CACHELINE_PADDING; // for static queues only
 } DISPATCH_ATOMIC64_ALIGN;
+
+#if __has_feature(c_static_assert) && !DISPATCH_INTROSPECTION
+_Static_assert(sizeof(struct dispatch_queue_s) <= 128, "dispatch queue size");
+#endif
 #endif // !defined(__cplusplus) || !DISPATCH_INTROSPECTION
 
 DISPATCH_INTERNAL_SUBCLASS_DECL(queue_serial, queue);
@@ -545,51 +556,51 @@
 #define DISPATCH_QUEUE_WAKEUP_MGR            (&_dispatch_mgr_q)
 #define DISPATCH_QUEUE_WAKEUP_WAIT_FOR_EVENT ((dispatch_queue_wakeup_target_t)-1)
 
-void _dispatch_queue_class_wakeup_with_override(dispatch_queue_t dq,
-		dispatch_qos_t qos, dispatch_wakeup_flags_t flags, uint64_t dq_state);
-void _dispatch_queue_class_override_drainer(dispatch_queue_t dqu,
-		dispatch_qos_t qos, dispatch_wakeup_flags_t flags);
-void _dispatch_queue_class_wakeup_enqueue(dispatch_queue_t dq,
-		dispatch_qos_t qos, dispatch_wakeup_flags_t flags,
-		dispatch_queue_wakeup_target_t target);
 void _dispatch_queue_class_wakeup(dispatch_queue_t dqu, dispatch_qos_t qos,
 		dispatch_wakeup_flags_t flags, dispatch_queue_wakeup_target_t target);
-
-void _dispatch_queue_destroy(dispatch_queue_t dq);
-void _dispatch_queue_dispose(dispatch_queue_t dq);
+dispatch_priority_t _dispatch_queue_compute_priority_and_wlh(
+		dispatch_queue_t dq, dispatch_wlh_t *wlh_out);
+void _dispatch_queue_destroy(dispatch_queue_t dq, bool *allow_free);
+void _dispatch_queue_dispose(dispatch_queue_t dq, bool *allow_free);
+void _dispatch_queue_xref_dispose(struct dispatch_queue_s *dq);
 void _dispatch_queue_set_target_queue(dispatch_queue_t dq, dispatch_queue_t tq);
 void _dispatch_queue_suspend(dispatch_queue_t dq);
 void _dispatch_queue_resume(dispatch_queue_t dq, bool activate);
-void _dispatch_queue_finalize_activation(dispatch_queue_t dq);
+void _dispatch_queue_finalize_activation(dispatch_queue_t dq,
+		bool *allow_resume);
 void _dispatch_queue_invoke(dispatch_queue_t dq,
 		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags);
 void _dispatch_global_queue_poke(dispatch_queue_t dq, int n, int floor);
 void _dispatch_queue_push(dispatch_queue_t dq, dispatch_object_t dou,
 		dispatch_qos_t qos);
-void _dispatch_try_lock_transfer_or_wakeup(dispatch_queue_t dq);
 void _dispatch_queue_wakeup(dispatch_queue_t dq, dispatch_qos_t qos,
 		dispatch_wakeup_flags_t flags);
 dispatch_queue_wakeup_target_t _dispatch_queue_serial_drain(dispatch_queue_t dq,
 		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags,
 		uint64_t *owned);
-void _dispatch_queue_drain_deferred_invoke(dispatch_queue_t dq,
+void _dispatch_queue_drain_sync_waiter(dispatch_queue_t dq,
 		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags,
-		uint64_t to_unlock);
-void _dispatch_queue_specific_queue_dispose(dispatch_queue_specific_queue_t
-		dqsq);
+		uint64_t owned);
+void _dispatch_queue_specific_queue_dispose(
+		dispatch_queue_specific_queue_t dqsq, bool *allow_free);
 void _dispatch_root_queue_wakeup(dispatch_queue_t dq, dispatch_qos_t qos,
 		dispatch_wakeup_flags_t flags);
 void _dispatch_root_queue_push(dispatch_queue_t dq, dispatch_object_t dou,
 		dispatch_qos_t qos);
-void _dispatch_root_queue_drain_deferred_item(dispatch_queue_t rq,
-		dispatch_queue_t dq DISPATCH_PERF_MON_ARGS_PROTO);
-void _dispatch_pthread_root_queue_dispose(dispatch_queue_t dq);
+#if DISPATCH_USE_KEVENT_WORKQUEUE
+void _dispatch_root_queue_drain_deferred_item(dispatch_deferred_items_t ddi
+		DISPATCH_PERF_MON_ARGS_PROTO);
+void _dispatch_root_queue_drain_deferred_wlh(dispatch_deferred_items_t ddi
+		DISPATCH_PERF_MON_ARGS_PROTO);
+#endif
+void _dispatch_pthread_root_queue_dispose(dispatch_queue_t dq,
+		bool *allow_free);
 void _dispatch_main_queue_wakeup(dispatch_queue_t dq, dispatch_qos_t qos,
 		dispatch_wakeup_flags_t flags);
 void _dispatch_runloop_queue_wakeup(dispatch_queue_t dq, dispatch_qos_t qos,
 		dispatch_wakeup_flags_t flags);
 void _dispatch_runloop_queue_xref_dispose(dispatch_queue_t dq);
-void _dispatch_runloop_queue_dispose(dispatch_queue_t dq);
+void _dispatch_runloop_queue_dispose(dispatch_queue_t dq, bool *allow_free);
 void _dispatch_mgr_queue_drain(void);
 #if DISPATCH_USE_MGR_THREAD && DISPATCH_ENABLE_PTHREAD_ROOT_QUEUES
 void _dispatch_mgr_priority_init(void);
@@ -640,6 +651,13 @@
 	_DISPATCH_ROOT_QUEUE_IDX_COUNT,
 };
 
+// skip zero
+// 1 - main_q
+// 2 - mgr_q
+// 3 - mgr_root_q
+// 4,5,6,7,8,9,10,11,12,13,14,15 - global queues
+// we use 'xadd' on Intel, so the initial value == next assigned
+#define DISPATCH_QUEUE_SERIAL_NUMBER_INIT 16
 extern unsigned long volatile _dispatch_queue_serial_numbers;
 extern struct dispatch_queue_s _dispatch_root_queues[];
 extern struct dispatch_queue_s _dispatch_mgr_q;
@@ -830,8 +848,11 @@
 	dispatch_thread_frame_s dsc_dtf;
 #endif
 	dispatch_thread_event_s dsc_event;
+	dispatch_tid dsc_waiter;
 	dispatch_qos_t dsc_override_qos_floor;
 	dispatch_qos_t dsc_override_qos;
+	bool dsc_wlh_was_first;
+	bool dsc_release_storage;
 } *dispatch_sync_context_t;
 
 typedef struct dispatch_continuation_vtable_s {
diff --git a/src/semaphore.c b/src/semaphore.c
index fa6d21a..3fe94c6 100644
--- a/src/semaphore.c
+++ b/src/semaphore.c
@@ -52,15 +52,16 @@
 		return DISPATCH_BAD_INPUT;
 	}
 
-	dsema = (dispatch_semaphore_t)_dispatch_alloc(DISPATCH_VTABLE(semaphore),
-			sizeof(struct dispatch_semaphore_s));
+	dsema = (dispatch_semaphore_t)_dispatch_object_alloc(
+			DISPATCH_VTABLE(semaphore), sizeof(struct dispatch_semaphore_s));
 	_dispatch_semaphore_class_init(value, dsema);
 	dsema->dsema_orig = value;
 	return dsema;
 }
 
 void
-_dispatch_semaphore_dispose(dispatch_object_t dou)
+_dispatch_semaphore_dispose(dispatch_object_t dou,
+		DISPATCH_UNUSED bool *allow_free)
 {
 	dispatch_semaphore_t dsema = dou._dsema;
 
@@ -162,7 +163,7 @@
 static inline dispatch_group_t
 _dispatch_group_create_with_count(long count)
 {
-	dispatch_group_t dg = (dispatch_group_t)_dispatch_alloc(
+	dispatch_group_t dg = (dispatch_group_t)_dispatch_object_alloc(
 			DISPATCH_VTABLE(group), sizeof(struct dispatch_group_s));
 	_dispatch_semaphore_class_init(count, dg);
 	if (count) {
@@ -216,6 +217,7 @@
 		_dispatch_sema4_create(&dg->dg_sema, _DSEMA4_POLICY_FIFO);
 		_dispatch_sema4_signal(&dg->dg_sema, rval);
 	}
+	uint16_t refs = needs_release ? 1 : 0; // <rdar://problem/22318411>
 	if (head) {
 		// async group notify blocks
 		do {
@@ -224,11 +226,9 @@
 			_dispatch_continuation_async(dsn_queue, head);
 			_dispatch_release(dsn_queue);
 		} while ((head = next));
-		_dispatch_release(dg);
+		refs++;
 	}
-	if (needs_release) {
-		_dispatch_release(dg); // <rdar://problem/22318411>
-	}
+	if (refs) _dispatch_release_n(dg, refs);
 	return 0;
 }
 
@@ -246,7 +246,7 @@
 }
 
 void
-_dispatch_group_dispose(dispatch_object_t dou)
+_dispatch_group_dispose(dispatch_object_t dou, DISPATCH_UNUSED bool *allow_free)
 {
 	dispatch_group_t dg = dou._dg;
 
diff --git a/src/semaphore_internal.h b/src/semaphore_internal.h
index 3a4ef6d..f9d0983 100644
--- a/src/semaphore_internal.h
+++ b/src/semaphore_internal.h
@@ -63,11 +63,11 @@
 } dispatch_semaphore_class_t DISPATCH_TRANSPARENT_UNION;
 
 dispatch_group_t _dispatch_group_create_and_enter(void);
-void _dispatch_group_dispose(dispatch_object_t dou);
+void _dispatch_group_dispose(dispatch_object_t dou, bool *allow_free);
 size_t _dispatch_group_debug(dispatch_object_t dou, char *buf,
 		size_t bufsiz);
 
-void _dispatch_semaphore_dispose(dispatch_object_t dou);
+void _dispatch_semaphore_dispose(dispatch_object_t dou, bool *allow_free);
 size_t _dispatch_semaphore_debug(dispatch_object_t dou, char *buf,
 		size_t bufsiz);
 
diff --git a/src/shims/lock.c b/src/shims/lock.c
index de90d60..617fa01 100644
--- a/src/shims/lock.c
+++ b/src/shims/lock.c
@@ -34,6 +34,7 @@
 _Static_assert(DLOCK_LOCK_DATA_CONTENTION == ULF_WAIT_WORKQ_DATA_CONTENTION,
 		"values should be the same");
 
+#if !HAVE_UL_UNFAIR_LOCK
 DISPATCH_ALWAYS_INLINE
 static inline void
 _dispatch_thread_switch(dispatch_lock value, dispatch_lock_options_t flags,
@@ -47,6 +48,7 @@
 	}
 	thread_switch(_dispatch_lock_owner(value), option, timeout);
 }
+#endif // HAVE_UL_UNFAIR_LOCK
 #endif
 
 #pragma mark - semaphores
@@ -315,12 +317,13 @@
 _dispatch_ulock_wait(uint32_t *uaddr, uint32_t val, uint32_t timeout,
 		uint32_t flags)
 {
-	dispatch_assert(!DISPATCH_LOCK_USE_SEMAPHORE_FALLBACK);
 	int rc;
 	_dlock_syscall_switch(err,
 		rc = __ulock_wait(UL_COMPARE_AND_WAIT | flags, uaddr, val, timeout),
 		case 0: return rc > 0 ? ENOTEMPTY : 0;
 		case ETIMEDOUT: case EFAULT: return err;
+		case EOWNERDEAD: DISPATCH_CLIENT_CRASH(*uaddr,
+				"corruption of lock owner");
 		default: DISPATCH_INTERNAL_CRASH(err, "ulock_wait() failed");
 	);
 }
@@ -328,7 +331,6 @@
 static void
 _dispatch_ulock_wake(uint32_t *uaddr, uint32_t flags)
 {
-	dispatch_assert(!DISPATCH_LOCK_USE_SEMAPHORE_FALLBACK);
 	_dlock_syscall_switch(err,
 		__ulock_wake(UL_COMPARE_AND_WAIT | flags, uaddr, 0),
 		case 0: case ENOENT: break;
@@ -344,17 +346,13 @@
 _dispatch_unfair_lock_wait(uint32_t *uaddr, uint32_t val, uint32_t timeout,
 		dispatch_lock_options_t flags)
 {
-	if (DISPATCH_LOCK_USE_SEMAPHORE_FALLBACK) {
-		// <rdar://problem/25075359>
-		timeout =  timeout < 1000 ? 1 : timeout / 1000;
-		_dispatch_thread_switch(val, flags, timeout);
-		return 0;
-	}
 	int rc;
 	_dlock_syscall_switch(err,
 		rc = __ulock_wait(UL_UNFAIR_LOCK | flags, uaddr, val, timeout),
 		case 0: return rc > 0 ? ENOTEMPTY : 0;
 		case ETIMEDOUT: case EFAULT: return err;
+		case EOWNERDEAD: DISPATCH_CLIENT_CRASH(*uaddr,
+				"corruption of lock owner");
 		default: DISPATCH_INTERNAL_CRASH(err, "ulock_wait() failed");
 	);
 }
@@ -362,10 +360,6 @@
 static void
 _dispatch_unfair_lock_wake(uint32_t *uaddr, uint32_t flags)
 {
-	if (DISPATCH_LOCK_USE_SEMAPHORE_FALLBACK) {
-		// <rdar://problem/25075359>
-		return;
-	}
 	_dlock_syscall_switch(err, __ulock_wake(UL_UNFAIR_LOCK | flags, uaddr, 0),
 		case 0: case ENOENT: break;
 		default: DISPATCH_INTERNAL_CRASH(err, "ulock_wake() failed");
@@ -472,13 +466,6 @@
 void
 _dispatch_thread_event_signal_slow(dispatch_thread_event_t dte)
 {
-#if DISPATCH_LOCK_USE_SEMAPHORE_FALLBACK
-	if (DISPATCH_LOCK_USE_SEMAPHORE_FALLBACK) {
-		kern_return_t kr = semaphore_signal(dte->dte_sema);
-		DISPATCH_SEMAPHORE_VERIFY_KR(kr);
-		return;
-	}
-#endif
 #if HAVE_UL_COMPARE_AND_WAIT
 	_dispatch_ulock_wake(&dte->dte_value, 0);
 #elif HAVE_FUTEX
@@ -491,16 +478,6 @@
 void
 _dispatch_thread_event_wait_slow(dispatch_thread_event_t dte)
 {
-#if DISPATCH_LOCK_USE_SEMAPHORE_FALLBACK
-	if (DISPATCH_LOCK_USE_SEMAPHORE_FALLBACK) {
-		kern_return_t kr;
-		do {
-			kr = semaphore_wait(dte->dte_sema);
-		} while (unlikely(kr == KERN_ABORTED));
-		DISPATCH_SEMAPHORE_VERIFY_KR(kr);
-		return;
-	}
-#endif
 #if HAVE_UL_COMPARE_AND_WAIT || HAVE_FUTEX
 	for (;;) {
 		uint32_t value = os_atomic_load(&dte->dte_value, acquire);
@@ -528,30 +505,30 @@
 _dispatch_unfair_lock_lock_slow(dispatch_unfair_lock_t dul,
 		dispatch_lock_options_t flags)
 {
-	dispatch_lock tid_self = _dispatch_tid_self(), next = tid_self;
-	dispatch_lock tid_old, tid_new;
+	dispatch_lock value_self = _dispatch_lock_value_for_self();
+	dispatch_lock old_value, new_value, next = value_self;
 	int rc;
 
 	for (;;) {
-		os_atomic_rmw_loop(&dul->dul_lock, tid_old, tid_new, acquire, {
-			if (likely(!_dispatch_lock_is_locked(tid_old))) {
-				tid_new = next;
+		os_atomic_rmw_loop(&dul->dul_lock, old_value, new_value, acquire, {
+			if (likely(!_dispatch_lock_is_locked(old_value))) {
+				new_value = next;
 			} else {
-				tid_new = tid_old & ~DLOCK_NOWAITERS_BIT;
-				if (tid_new == tid_old) os_atomic_rmw_loop_give_up(break);
+				new_value = old_value | DLOCK_WAITERS_BIT;
+				if (new_value == old_value) os_atomic_rmw_loop_give_up(break);
 			}
 		});
-		if (unlikely(_dispatch_lock_is_locked_by(tid_old, tid_self))) {
+		if (unlikely(_dispatch_lock_is_locked_by(old_value, value_self))) {
 			DISPATCH_CLIENT_CRASH(0, "trying to lock recursively");
 		}
-		if (tid_new == next) {
+		if (new_value == next) {
 			return;
 		}
-		rc = _dispatch_unfair_lock_wait(&dul->dul_lock, tid_new, 0, flags);
+		rc = _dispatch_unfair_lock_wait(&dul->dul_lock, new_value, 0, flags);
 		if (rc == ENOTEMPTY) {
-			next = tid_self & ~DLOCK_NOWAITERS_BIT;
+			next = value_self | DLOCK_WAITERS_BIT;
 		} else {
-			next = tid_self;
+			next = value_self;
 		}
 	}
 }
@@ -568,30 +545,28 @@
 _dispatch_unfair_lock_lock_slow(dispatch_unfair_lock_t dul,
 		dispatch_lock_options_t flags)
 {
-	dispatch_lock tid_cur, tid_self = _dispatch_tid_self();
+	dispatch_lock cur, value_self = _dispatch_lock_value_for_self();
 	uint32_t timeout = 1;
 
 	while (unlikely(!os_atomic_cmpxchgv(&dul->dul_lock,
-			DLOCK_OWNER_NULL, tid_self, &tid_cur, acquire))) {
-		if (unlikely(_dispatch_lock_is_locked_by(tid_cur, tid_self))) {
+			DLOCK_OWNER_NULL, value_self, &cur, acquire))) {
+		if (unlikely(_dispatch_lock_is_locked_by(cur, self))) {
 			DISPATCH_CLIENT_CRASH(0, "trying to lock recursively");
 		}
-		_dispatch_thread_switch(tid_cur, flags, timeout++);
+		_dispatch_thread_switch(cur, flags, timeout++);
 	}
 }
 #endif
 
 void
-_dispatch_unfair_lock_unlock_slow(dispatch_unfair_lock_t dul,
-		dispatch_lock tid_cur)
+_dispatch_unfair_lock_unlock_slow(dispatch_unfair_lock_t dul, dispatch_lock cur)
 {
-	dispatch_lock_owner tid_self = _dispatch_tid_self();
-	if (unlikely(!_dispatch_lock_is_locked_by(tid_cur, tid_self))) {
-		DISPATCH_CLIENT_CRASH(tid_cur, "lock not owned by current thread");
+	if (unlikely(!_dispatch_lock_is_locked_by_self(cur))) {
+		DISPATCH_CLIENT_CRASH(cur, "lock not owned by current thread");
 	}
 
 #if HAVE_UL_UNFAIR_LOCK
-	if (!(tid_cur & DLOCK_NOWAITERS_BIT)) {
+	if (_dispatch_lock_has_waiters(cur)) {
 		_dispatch_unfair_lock_wake(&dul->dul_lock, 0);
 	}
 #elif HAVE_FUTEX
@@ -608,41 +583,37 @@
 _dispatch_gate_wait_slow(dispatch_gate_t dgl, dispatch_lock value,
 		dispatch_lock_options_t flags)
 {
-	dispatch_lock tid_self = _dispatch_tid_self(), tid_old, tid_new;
+	dispatch_lock self = _dispatch_lock_value_for_self();
+	dispatch_lock old_value, new_value;
 	uint32_t timeout = 1;
 
 	for (;;) {
-		os_atomic_rmw_loop(&dgl->dgl_lock, tid_old, tid_new, acquire, {
-			if (likely(tid_old == value)) {
+		os_atomic_rmw_loop(&dgl->dgl_lock, old_value, new_value, acquire, {
+			if (likely(old_value == value)) {
 				os_atomic_rmw_loop_give_up_with_fence(acquire, return);
 			}
-#ifdef DLOCK_NOWAITERS_BIT
-			tid_new = tid_old & ~DLOCK_NOWAITERS_BIT;
-#else
-			tid_new = tid_old | DLOCK_WAITERS_BIT;
-#endif
-			if (tid_new == tid_old) os_atomic_rmw_loop_give_up(break);
+			new_value = old_value | DLOCK_WAITERS_BIT;
+			if (new_value == old_value) os_atomic_rmw_loop_give_up(break);
 		});
-		if (unlikely(_dispatch_lock_is_locked_by(tid_old, tid_self))) {
+		if (unlikely(_dispatch_lock_is_locked_by(old_value, self))) {
 			DISPATCH_CLIENT_CRASH(0, "trying to lock recursively");
 		}
 #if HAVE_UL_UNFAIR_LOCK
-		_dispatch_unfair_lock_wait(&dgl->dgl_lock, tid_new, 0, flags);
+		_dispatch_unfair_lock_wait(&dgl->dgl_lock, new_value, 0, flags);
 #elif HAVE_FUTEX
-		_dispatch_futex_wait(&dgl->dgl_lock, tid_new, NULL, FUTEX_PRIVATE_FLAG);
+		_dispatch_futex_wait(&dgl->dgl_lock, new_value, NULL, FUTEX_PRIVATE_FLAG);
 #else
-		_dispatch_thread_switch(tid_new, flags, timeout++);
+		_dispatch_thread_switch(new_value, flags, timeout++);
 #endif
 		(void)timeout;
 	}
 }
 
 void
-_dispatch_gate_broadcast_slow(dispatch_gate_t dgl, dispatch_lock tid_cur)
+_dispatch_gate_broadcast_slow(dispatch_gate_t dgl, dispatch_lock cur)
 {
-	dispatch_lock_owner tid_self = _dispatch_tid_self();
-	if (unlikely(!_dispatch_lock_is_locked_by(tid_cur, tid_self))) {
-		DISPATCH_CLIENT_CRASH(tid_cur, "lock not owned by current thread");
+	if (unlikely(!_dispatch_lock_is_locked_by_self(cur))) {
+		DISPATCH_CLIENT_CRASH(cur, "lock not owned by current thread");
 	}
 
 #if HAVE_UL_UNFAIR_LOCK
diff --git a/src/shims/lock.h b/src/shims/lock.h
index 4bbbb42..99c5563 100644
--- a/src/shims/lock.h
+++ b/src/shims/lock.h
@@ -30,64 +30,34 @@
 #pragma mark - platform macros
 
 DISPATCH_ENUM(dispatch_lock_options, uint32_t,
-		DLOCK_LOCK_NONE				= 0x00000000,
-		DLOCK_LOCK_DATA_CONTENTION  = 0x00010000,
+	DLOCK_LOCK_NONE				= 0x00000000,
+	DLOCK_LOCK_DATA_CONTENTION  = 0x00010000,
 );
 
 #if TARGET_OS_MAC
 
-typedef mach_port_t dispatch_lock_owner;
+typedef mach_port_t dispatch_tid;
 typedef uint32_t dispatch_lock;
 
-#define DLOCK_OWNER_NULL			((dispatch_lock_owner)MACH_PORT_NULL)
 #define DLOCK_OWNER_MASK			((dispatch_lock)0xfffffffc)
-#define DLOCK_OWNER_INVALID			((dispatch_lock)0xffffffff)
-#define DLOCK_NOWAITERS_BIT			((dispatch_lock)0x00000001)
-#define DLOCK_NOFAILED_TRYLOCK_BIT	((dispatch_lock)0x00000002)
-#define _dispatch_tid_self()		((dispatch_lock_owner)_dispatch_thread_port())
+#define DLOCK_WAITERS_BIT			((dispatch_lock)0x00000001)
+#define DLOCK_FAILED_TRYLOCK_BIT	((dispatch_lock)0x00000002)
+
+#define DLOCK_OWNER_NULL			((dispatch_tid)MACH_PORT_NULL)
+#define _dispatch_tid_self()		((dispatch_tid)_dispatch_thread_port())
 
 DISPATCH_ALWAYS_INLINE
-static inline bool
-_dispatch_lock_is_locked(dispatch_lock lock_value)
-{
-	return (lock_value & DLOCK_OWNER_MASK) != 0;
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline dispatch_lock_owner
+static inline dispatch_tid
 _dispatch_lock_owner(dispatch_lock lock_value)
 {
-	lock_value &= DLOCK_OWNER_MASK;
-	if (lock_value) {
-		lock_value |= DLOCK_NOWAITERS_BIT | DLOCK_NOFAILED_TRYLOCK_BIT;
+	if (lock_value & DLOCK_OWNER_MASK) {
+		return lock_value | DLOCK_WAITERS_BIT | DLOCK_FAILED_TRYLOCK_BIT;
 	}
-	return lock_value;
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline bool
-_dispatch_lock_is_locked_by(dispatch_lock lock_value, dispatch_lock_owner tid)
-{
-	// equivalent to _dispatch_lock_owner(lock_value) == tid
-	return ((lock_value ^ tid) & DLOCK_OWNER_MASK) == 0;
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline bool
-_dispatch_lock_has_waiters(dispatch_lock lock_value)
-{
-	bool nowaiters_bit = (lock_value & DLOCK_NOWAITERS_BIT);
-	return _dispatch_lock_is_locked(lock_value) != nowaiters_bit;
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline bool
-_dispatch_lock_has_failed_trylock(dispatch_lock lock_value)
-{
-	return !(lock_value & DLOCK_NOFAILED_TRYLOCK_BIT);
+	return DLOCK_OWNER_NULL;
 }
 
 #elif defined(__linux__)
+
 #include <linux/futex.h>
 #if !defined(__x86_64__) && !defined(__i386__) && !defined(__s390x__)
 #include <linux/membarrier.h>
@@ -95,36 +65,63 @@
 #include <unistd.h>
 #include <sys/syscall.h>   /* For SYS_xxx definitions */
 
+typedef pid_t dispatch_tid;
 typedef uint32_t dispatch_lock;
-typedef pid_t dispatch_lock_owner;
 
-#define DLOCK_OWNER_NULL			((dispatch_lock_owner)0)
 #define DLOCK_OWNER_MASK			((dispatch_lock)FUTEX_TID_MASK)
-#define DLOCK_OWNER_INVALID			((dispatch_lock)DLOCK_OWNER_MASK)
 #define DLOCK_WAITERS_BIT			((dispatch_lock)FUTEX_WAITERS)
 #define DLOCK_FAILED_TRYLOCK_BIT	((dispatch_lock)FUTEX_OWNER_DIED)
-#define _dispatch_tid_self() \
-		((dispatch_lock_owner)(_dispatch_get_tsd_base()->tid))
+
+#define DLOCK_OWNER_NULL			((dispatch_tid)0)
+#define _dispatch_tid_self()        ((dispatch_tid)(_dispatch_get_tsd_base()->tid))
+
+DISPATCH_ALWAYS_INLINE
+static inline dispatch_tid
+_dispatch_lock_owner(dispatch_lock lock_value)
+{
+	return lock_value & DLOCK_OWNER_MASK;
+}
+
+#else
+#  error define _dispatch_lock encoding scheme for your platform here
+#endif
+
+DISPATCH_ALWAYS_INLINE
+static inline dispatch_lock
+_dispatch_lock_value_from_tid(dispatch_tid tid)
+{
+	return tid & DLOCK_OWNER_MASK;
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline dispatch_lock
+_dispatch_lock_value_for_self(void)
+{
+	return _dispatch_lock_value_from_tid(_dispatch_tid_self());
+}
 
 DISPATCH_ALWAYS_INLINE
 static inline bool
 _dispatch_lock_is_locked(dispatch_lock lock_value)
 {
+	// equivalent to _dispatch_lock_owner(lock_value) == 0
 	return (lock_value & DLOCK_OWNER_MASK) != 0;
 }
 
 DISPATCH_ALWAYS_INLINE
-static inline dispatch_lock_owner
-_dispatch_lock_owner(dispatch_lock lock_value)
+static inline bool
+_dispatch_lock_is_locked_by(dispatch_lock lock_value, dispatch_tid tid)
 {
-	return (lock_value & DLOCK_OWNER_MASK);
+	// equivalent to _dispatch_lock_owner(lock_value) == tid
+	return ((lock_value ^ tid) & DLOCK_OWNER_MASK) == 0;
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline bool
-_dispatch_lock_is_locked_by(dispatch_lock lock_value, dispatch_lock_owner tid)
+_dispatch_lock_is_locked_by_self(dispatch_lock lock_value)
 {
-	return _dispatch_lock_owner(lock_value) == tid;
+	// equivalent to _dispatch_lock_owner(lock_value) == tid
+	return ((lock_value ^ _dispatch_tid_self()) & DLOCK_OWNER_MASK) == 0;
 }
 
 DISPATCH_ALWAYS_INLINE
@@ -138,32 +135,18 @@
 static inline bool
 _dispatch_lock_has_failed_trylock(dispatch_lock lock_value)
 {
-	return !(lock_value & DLOCK_FAILED_TRYLOCK_BIT);
+	return (lock_value & DLOCK_FAILED_TRYLOCK_BIT);
 }
 
-#else
-#  error define _dispatch_lock encoding scheme for your platform here
-#endif
-
 #if __has_include(<sys/ulock.h>)
 #include <sys/ulock.h>
+#ifdef UL_COMPARE_AND_WAIT
+#define HAVE_UL_COMPARE_AND_WAIT 1
 #endif
-
-#ifndef HAVE_UL_COMPARE_AND_WAIT
-#if defined(UL_COMPARE_AND_WAIT) && DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(101200)
-#  define HAVE_UL_COMPARE_AND_WAIT 1
-#else
-#  define HAVE_UL_COMPARE_AND_WAIT 0
+#ifdef UL_UNFAIR_LOCK
+#define HAVE_UL_UNFAIR_LOCK 1
 #endif
-#endif // HAVE_UL_COMPARE_AND_WAIT
-
-#ifndef HAVE_UL_UNFAIR_LOCK
-#if defined(UL_UNFAIR_LOCK) && DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(101200)
-#  define HAVE_UL_UNFAIR_LOCK 1
-#else
-#  define HAVE_UL_UNFAIR_LOCK 0
 #endif
-#endif // HAVE_UL_UNFAIR_LOCK
 
 #ifndef HAVE_FUTEX
 #ifdef __linux__
@@ -175,14 +158,6 @@
 
 #pragma mark - semaphores
 
-#ifndef DISPATCH_LOCK_USE_SEMAPHORE_FALLBACK
-#if TARGET_OS_MAC
-#define DISPATCH_LOCK_USE_SEMAPHORE_FALLBACK (!HAVE_UL_COMPARE_AND_WAIT)
-#else
-#define DISPATCH_LOCK_USE_SEMAPHORE_FALLBACK 0
-#endif
-#endif
-
 #if USE_MACH_SEM
 
 typedef semaphore_t _dispatch_sema4_t;
@@ -270,12 +245,7 @@
  * This locking primitive has no notion of ownership
  */
 typedef struct dispatch_thread_event_s {
-#if DISPATCH_LOCK_USE_SEMAPHORE_FALLBACK
-	union {
-		_dispatch_sema4_t dte_sema;
-		uint32_t dte_value;
-	};
-#elif HAVE_UL_COMPARE_AND_WAIT || HAVE_FUTEX
+#if HAVE_UL_COMPARE_AND_WAIT || HAVE_FUTEX
 	// 1 means signalled but not waited on yet
 	// UINT32_MAX means waited on, but not signalled yet
 	// 0 is the initial and final state
@@ -293,13 +263,6 @@
 static inline void
 _dispatch_thread_event_init(dispatch_thread_event_t dte)
 {
-#if DISPATCH_LOCK_USE_SEMAPHORE_FALLBACK
-	if (DISPATCH_LOCK_USE_SEMAPHORE_FALLBACK) {
-		_dispatch_sema4_init(&dte->dte_sema, _DSEMA4_POLICY_FIFO);
-		_dispatch_sema4_create(&dte->dte_sema, _DSEMA4_POLICY_FIFO);
-		return;
-	}
-#endif
 #if HAVE_UL_COMPARE_AND_WAIT || HAVE_FUTEX
 	dte->dte_value = 0;
 #else
@@ -311,12 +274,6 @@
 static inline void
 _dispatch_thread_event_signal(dispatch_thread_event_t dte)
 {
-#if DISPATCH_LOCK_USE_SEMAPHORE_FALLBACK
-	if (DISPATCH_LOCK_USE_SEMAPHORE_FALLBACK) {
-		_dispatch_thread_event_signal_slow(dte);
-		return;
-	}
-#endif
 #if HAVE_UL_COMPARE_AND_WAIT || HAVE_FUTEX
 	if (os_atomic_inc_orig(&dte->dte_value, release) == 0) {
 		// 0 -> 1 transition doesn't need a signal
@@ -335,12 +292,6 @@
 static inline void
 _dispatch_thread_event_wait(dispatch_thread_event_t dte)
 {
-#if DISPATCH_LOCK_USE_SEMAPHORE_FALLBACK
-	if (DISPATCH_LOCK_USE_SEMAPHORE_FALLBACK) {
-		_dispatch_thread_event_wait_slow(dte);
-		return;
-	}
-#endif
 #if HAVE_UL_COMPARE_AND_WAIT || HAVE_FUTEX
 	if (os_atomic_dec(&dte->dte_value, acquire) == 0) {
 		// 1 -> 0 is always a valid transition, so we can return
@@ -357,12 +308,6 @@
 static inline void
 _dispatch_thread_event_destroy(dispatch_thread_event_t dte)
 {
-#if DISPATCH_LOCK_USE_SEMAPHORE_FALLBACK
-	if (DISPATCH_LOCK_USE_SEMAPHORE_FALLBACK) {
-		_dispatch_sema4_dispose(&dte->dte_sema, _DSEMA4_POLICY_FIFO);
-		return;
-	}
-#endif
 #if HAVE_UL_COMPARE_AND_WAIT || HAVE_FUTEX
 	// nothing to do
 	dispatch_assert(dte->dte_value == 0);
@@ -387,9 +332,9 @@
 static inline void
 _dispatch_unfair_lock_lock(dispatch_unfair_lock_t l)
 {
-	dispatch_lock tid_self = _dispatch_tid_self();
+	dispatch_lock value_self = _dispatch_lock_value_for_self();
 	if (likely(os_atomic_cmpxchg(&l->dul_lock,
-			DLOCK_OWNER_NULL, tid_self, acquire))) {
+			DLOCK_OWNER_NULL, value_self, acquire))) {
 		return;
 	}
 	return _dispatch_unfair_lock_lock_slow(l, DLOCK_LOCK_NONE);
@@ -397,54 +342,42 @@
 
 DISPATCH_ALWAYS_INLINE
 static inline bool
-_dispatch_unfair_lock_trylock(dispatch_unfair_lock_t l,
-		dispatch_lock_owner *owner)
+_dispatch_unfair_lock_trylock(dispatch_unfair_lock_t l, dispatch_tid *owner)
 {
-	dispatch_lock tid_old, tid_new, tid_self = _dispatch_tid_self();
+	dispatch_lock value_self = _dispatch_lock_value_for_self();
+	dispatch_lock old_value, new_value;
 
-	os_atomic_rmw_loop(&l->dul_lock, tid_old, tid_new, acquire, {
-		if (likely(!_dispatch_lock_is_locked(tid_old))) {
-			tid_new = tid_self;
+	os_atomic_rmw_loop(&l->dul_lock, old_value, new_value, acquire, {
+		if (likely(!_dispatch_lock_is_locked(old_value))) {
+			new_value = value_self;
 		} else {
-#ifdef DLOCK_NOFAILED_TRYLOCK_BIT
-			tid_new = tid_old & ~DLOCK_NOFAILED_TRYLOCK_BIT;
-#else
-			tid_new = tid_old | DLOCK_FAILED_TRYLOCK_BIT;
-#endif
+			new_value = old_value | DLOCK_FAILED_TRYLOCK_BIT;
 		}
 	});
-	if (owner) *owner = _dispatch_lock_owner(tid_new);
-	return !_dispatch_lock_is_locked(tid_old);
+	if (owner) *owner = _dispatch_lock_owner(new_value);
+	return !_dispatch_lock_is_locked(old_value);
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline bool
 _dispatch_unfair_lock_tryunlock(dispatch_unfair_lock_t l)
 {
-	dispatch_lock tid_old, tid_new;
+	dispatch_lock old_value, new_value;
 
-	os_atomic_rmw_loop(&l->dul_lock, tid_old, tid_new, release, {
-#ifdef DLOCK_NOFAILED_TRYLOCK_BIT
-		if (likely(tid_old & DLOCK_NOFAILED_TRYLOCK_BIT)) {
-			tid_new = DLOCK_OWNER_NULL;
+	os_atomic_rmw_loop(&l->dul_lock, old_value, new_value, release, {
+		if (unlikely(old_value & DLOCK_FAILED_TRYLOCK_BIT)) {
+			new_value = old_value ^ DLOCK_FAILED_TRYLOCK_BIT;
 		} else {
-			tid_new = tid_old | DLOCK_NOFAILED_TRYLOCK_BIT;
+			new_value = DLOCK_OWNER_NULL;
 		}
-#else
-		if (likely(!(tid_old & DLOCK_FAILED_TRYLOCK_BIT))) {
-			tid_new = DLOCK_OWNER_NULL;
-		} else {
-			tid_new = tid_old & ~DLOCK_FAILED_TRYLOCK_BIT;
-		}
-#endif
 	});
-	if (unlikely(tid_new)) {
+	if (unlikely(new_value)) {
 		// unlock failed, renew the lock, which needs an acquire barrier
 		os_atomic_thread_fence(acquire);
 		return false;
 	}
-	if (unlikely(_dispatch_lock_has_waiters(tid_old))) {
-		_dispatch_unfair_lock_unlock_slow(l, tid_old);
+	if (unlikely(_dispatch_lock_has_waiters(old_value))) {
+		_dispatch_unfair_lock_unlock_slow(l, old_value);
 	}
 	return true;
 }
@@ -453,18 +386,18 @@
 static inline bool
 _dispatch_unfair_lock_unlock_had_failed_trylock(dispatch_unfair_lock_t l)
 {
-	dispatch_lock tid_cur, tid_self = _dispatch_tid_self();
+	dispatch_lock cur, value_self = _dispatch_lock_value_for_self();
 #if HAVE_FUTEX
 	if (likely(os_atomic_cmpxchgv(&l->dul_lock,
-			tid_self, DLOCK_OWNER_NULL, &tid_cur, release))) {
+			value_self, DLOCK_OWNER_NULL, &cur, release))) {
 		return false;
 	}
 #else
-	tid_cur = os_atomic_xchg(&l->dul_lock, DLOCK_OWNER_NULL, release);
-	if (likely(tid_cur == tid_self)) return false;
+	cur = os_atomic_xchg(&l->dul_lock, DLOCK_OWNER_NULL, release);
+	if (likely(cur == value_self)) return false;
 #endif
-	_dispatch_unfair_lock_unlock_slow(l, tid_cur);
-	return _dispatch_lock_has_failed_trylock(tid_cur);
+	_dispatch_unfair_lock_unlock_slow(l, cur);
+	return _dispatch_lock_has_failed_trylock(cur);
 }
 
 DISPATCH_ALWAYS_INLINE
@@ -507,9 +440,8 @@
 static inline bool
 _dispatch_gate_tryenter(dispatch_gate_t l)
 {
-	dispatch_lock tid_self = _dispatch_tid_self();
-	return likely(os_atomic_cmpxchg(&l->dgl_lock,
-			DLOCK_GATE_UNLOCKED, tid_self, acquire));
+	return os_atomic_cmpxchg(&l->dgl_lock, DLOCK_GATE_UNLOCKED,
+			_dispatch_lock_value_for_self(), acquire);
 }
 
 #define _dispatch_gate_wait(l, flags) \
@@ -519,19 +451,18 @@
 static inline void
 _dispatch_gate_broadcast(dispatch_gate_t l)
 {
-	dispatch_lock tid_cur, tid_self = _dispatch_tid_self();
-	tid_cur = os_atomic_xchg(&l->dgl_lock, DLOCK_GATE_UNLOCKED, release);
-	if (likely(tid_cur == tid_self)) return;
-	_dispatch_gate_broadcast_slow(l, tid_cur);
+	dispatch_lock cur, value_self = _dispatch_lock_value_for_self();
+	cur = os_atomic_xchg(&l->dgl_lock, DLOCK_GATE_UNLOCKED, release);
+	if (likely(cur == value_self)) return;
+	_dispatch_gate_broadcast_slow(l, cur);
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline bool
 _dispatch_once_gate_tryenter(dispatch_once_gate_t l)
 {
-	dispatch_once_t tid_self = (dispatch_once_t)_dispatch_tid_self();
-	return likely(os_atomic_cmpxchg(&l->dgo_once,
-			DLOCK_ONCE_UNLOCKED, tid_self, acquire));
+	return os_atomic_cmpxchg(&l->dgo_once, DLOCK_ONCE_UNLOCKED,
+			(dispatch_once_t)_dispatch_lock_value_for_self(), acquire);
 }
 
 #define _dispatch_once_gate_wait(l) \
@@ -570,11 +501,10 @@
 static inline void
 _dispatch_once_gate_broadcast(dispatch_once_gate_t l)
 {
-	dispatch_once_t tid_cur, tid_self = (dispatch_once_t)_dispatch_tid_self();
-
-	tid_cur = _dispatch_once_xchg_done(&l->dgo_once);
-	if (likely(tid_cur == tid_self)) return;
-	_dispatch_gate_broadcast_slow(&l->dgo_gate, (dispatch_lock)tid_cur);
+	dispatch_lock value_self = _dispatch_lock_value_for_self();
+	dispatch_once_t cur = _dispatch_once_xchg_done(&l->dgo_once);
+	if (likely(cur == (dispatch_once_t)value_self)) return;
+	_dispatch_gate_broadcast_slow(&l->dgo_gate, (dispatch_lock)cur);
 }
 
 #endif // __DISPATCH_SHIMS_LOCK__
diff --git a/src/shims/perfmon.h b/src/shims/perfmon.h
index fe23a1d..be9327b 100644
--- a/src/shims/perfmon.h
+++ b/src/shims/perfmon.h
@@ -63,6 +63,7 @@
 #define DISPATCH_PERF_MON_ARGS_PROTO  , uint64_t perfmon_start
 #define DISPATCH_PERF_MON_ARGS        , perfmon_start
 #define DISPATCH_PERF_MON_VAR         uint64_t perfmon_start;
+#define DISPATCH_PERF_MON_VAR_INIT    uint64_t perfmon_start = 0;
 
 #define _dispatch_perfmon_start_impl(trace) ({ \
 		if (trace) _dispatch_ktrace0(DISPATCH_PERF_MON_worker_thread_start); \
@@ -84,6 +85,7 @@
 #define DISPATCH_PERF_MON_ARGS_PROTO
 #define DISPATCH_PERF_MON_ARGS
 #define DISPATCH_PERF_MON_VAR
+#define DISPATCH_PERF_MON_VAR_INIT
 #define _dispatch_perfmon_workitem_inc()
 #define _dispatch_perfmon_workitem_dec()
 #define _dispatch_perfmon_start_impl(trace)
diff --git a/src/shims/time.h b/src/shims/time.h
index 3010f08..0b8e926 100644
--- a/src/shims/time.h
+++ b/src/shims/time.h
@@ -46,7 +46,15 @@
 #define DISPATCH_CLOCK_COUNT  (DISPATCH_CLOCK_MACH + 1)
 } dispatch_clock_t;
 
+void _dispatch_time_init(void);
+
 #if defined(__i386__) || defined(__x86_64__) || !HAVE_MACH_ABSOLUTE_TIME
+#define DISPATCH_TIME_UNIT_USES_NANOSECONDS 1
+#else
+#define DISPATCH_TIME_UNIT_USES_NANOSECONDS 0
+#endif
+
+#if DISPATCH_TIME_UNIT_USES_NANOSECONDS
 // x86 currently implements mach time in nanoseconds
 // this is NOT likely to change
 DISPATCH_ALWAYS_INLINE
@@ -63,52 +71,21 @@
 	return nsec;
 }
 #else
-typedef struct _dispatch_host_time_data_s {
-	dispatch_once_t pred;
-	long double frac;
-	bool ratio_1_to_1;
-} _dispatch_host_time_data_s;
-extern _dispatch_host_time_data_s _dispatch_host_time_data;
-void _dispatch_get_host_time_init(void *context);
-
+#define DISPATCH_USE_HOST_TIME 1
+extern uint64_t (*_dispatch_host_time_mach2nano)(uint64_t machtime);
+extern uint64_t (*_dispatch_host_time_nano2mach)(uint64_t nsec);
 static inline uint64_t
 _dispatch_time_mach2nano(uint64_t machtime)
 {
-	_dispatch_host_time_data_s *const data = &_dispatch_host_time_data;
-	dispatch_once_f(&data->pred, NULL, _dispatch_get_host_time_init);
-
-	if (unlikely(!machtime || data->ratio_1_to_1)) {
-		return machtime;
-	}
-	if (machtime >= INT64_MAX) {
-		return INT64_MAX;
-	}
-	long double big_tmp = ((long double)machtime * data->frac) + .5L;
-	if (unlikely(big_tmp >= INT64_MAX)) {
-		return INT64_MAX;
-	}
-	return (uint64_t)big_tmp;
+	return _dispatch_host_time_mach2nano(machtime);
 }
 
 static inline uint64_t
 _dispatch_time_nano2mach(uint64_t nsec)
 {
-	_dispatch_host_time_data_s *const data = &_dispatch_host_time_data;
-	dispatch_once_f(&data->pred, NULL, _dispatch_get_host_time_init);
-
-	if (unlikely(!nsec || data->ratio_1_to_1)) {
-		return nsec;
-	}
-	if (nsec >= INT64_MAX) {
-		return INT64_MAX;
-	}
-	long double big_tmp = ((long double)nsec / data->frac) + .5L;
-	if (unlikely(big_tmp >= INT64_MAX)) {
-		return INT64_MAX;
-	}
-	return (uint64_t)big_tmp;
+	return _dispatch_host_time_nano2mach(nsec);
 }
-#endif
+#endif // DISPATCH_USE_HOST_TIME
 
 /* XXXRW: Some kind of overflow detection needed? */
 #define _dispatch_timespec_to_nano(ts) \
@@ -123,7 +100,7 @@
 	dispatch_static_assert(sizeof(NSEC_PER_SEC) == 8);
 	dispatch_static_assert(sizeof(USEC_PER_SEC) == 8);
 
-#if TARGET_OS_MAC && DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(101200)
+#if TARGET_OS_MAC
 	return clock_gettime_nsec_np(CLOCK_REALTIME);
 #elif HAVE_DECL_CLOCK_REALTIME
 	struct timespec ts;
diff --git a/src/shims/tsd.h b/src/shims/tsd.h
index f3d3cea..c119e4f 100644
--- a/src/shims/tsd.h
+++ b/src/shims/tsd.h
@@ -65,6 +65,9 @@
 #ifndef __TSD_RETURN_TO_KERNEL
 #define __TSD_RETURN_TO_KERNEL 5
 #endif
+#ifndef __TSD_MACH_SPECIAL_REPLY
+#define __TSD_MACH_SPECIAL_REPLY 8
+#endif
 
 static const unsigned long dispatch_priority_key	= __TSD_THREAD_QOS_CLASS;
 static const unsigned long dispatch_r2k_key			= __TSD_RETURN_TO_KERNEL;
@@ -310,6 +313,11 @@
 #define _dispatch_set_thread_mig_reply_port(p) ( \
 		_dispatch_thread_setspecific(_PTHREAD_TSD_SLOT_MIG_REPLY, \
 		(void*)(uintptr_t)(p)))
+#define _dispatch_get_thread_special_reply_port() ((mach_port_t)(uintptr_t) \
+		_dispatch_thread_getspecific(__TSD_MACH_SPECIAL_REPLY))
+#define _dispatch_set_thread_special_reply_port(p) ( \
+		_dispatch_thread_setspecific(__TSD_MACH_SPECIAL_REPLY, \
+		(void*)(uintptr_t)(p)))
 #endif
 
 DISPATCH_TSD_INLINE DISPATCH_CONST
diff --git a/src/source.c b/src/source.c
index 7c85c74..fd337a9 100644
--- a/src/source.c
+++ b/src/source.c
@@ -23,7 +23,9 @@
 static void _dispatch_source_handler_free(dispatch_source_t ds, long kind);
 static void _dispatch_source_set_interval(dispatch_source_t ds, uint64_t interval);
 
-static void _dispatch_timers_update(dispatch_unote_t du);
+#define DISPATCH_TIMERS_UNREGISTER 0x1
+#define DISPATCH_TIMERS_RETAIN_2 0x2
+static void _dispatch_timers_update(dispatch_unote_t du, uint32_t flags);
 static void _dispatch_timers_unregister(dispatch_timer_source_refs_t dt);
 
 static void _dispatch_source_timer_configure(dispatch_source_t ds);
@@ -40,18 +42,16 @@
 	dispatch_source_refs_t dr;
 	dispatch_source_t ds;
 
-	// ensure _dispatch_evfilt_machport_direct_enabled is initialized
-	_dispatch_root_queues_init();
-
 	dr = dux_create(dst, handle, mask)._dr;
 	if (unlikely(!dr)) {
 		return DISPATCH_BAD_INPUT;
 	}
 
-	ds = _dispatch_alloc(DISPATCH_VTABLE(source),
+	ds = _dispatch_object_alloc(DISPATCH_VTABLE(source),
 			sizeof(struct dispatch_source_s));
 	// Initialize as a queue first, then override some settings below.
-	_dispatch_queue_init(ds->_as_dq, DQF_LEGACY, 1, true);
+	_dispatch_queue_init(ds->_as_dq, DQF_LEGACY, 1,
+			DISPATCH_QUEUE_INACTIVE | DISPATCH_QUEUE_ROLE_INNER);
 	ds->dq_label = "source";
 	ds->do_ref_cnt++; // the reference the manager queue holds
 	ds->ds_refs = dr;
@@ -71,7 +71,7 @@
 }
 
 void
-_dispatch_source_dispose(dispatch_source_t ds)
+_dispatch_source_dispose(dispatch_source_t ds, bool *allow_free)
 {
 	_dispatch_object_debug(ds, "%s", __func__);
 	_dispatch_source_handler_free(ds, DS_REGISTN_HANDLER);
@@ -79,7 +79,7 @@
 	_dispatch_source_handler_free(ds, DS_CANCEL_HANDLER);
 	_dispatch_unote_dispose(ds->ds_refs);
 	ds->ds_refs = NULL;
-	_dispatch_queue_destroy(ds->_as_dq);
+	_dispatch_queue_destroy(ds->_as_dq, allow_free);
 }
 
 void
@@ -90,7 +90,7 @@
 		DISPATCH_CLIENT_CRASH(ds, "Release of a source that has not been "
 				"cancelled, but has a mandatory cancel handler");
 	}
-	dx_wakeup(ds, 0, DISPATCH_WAKEUP_FLUSH);
+	dx_wakeup(ds, 0, DISPATCH_WAKEUP_MAKE_DIRTY);
 }
 
 long
@@ -210,7 +210,7 @@
 		DISPATCH_CLIENT_CRASH(filter, "Invalid source type");
 	}
 
-	dx_wakeup(ds, _dispatch_qos_from_pp(pp), DISPATCH_WAKEUP_FLUSH);
+	dx_wakeup(ds, _dispatch_qos_from_pp(pp), DISPATCH_WAKEUP_MAKE_DIRTY);
 }
 
 void
@@ -534,6 +534,7 @@
 		// to tell the truth, it may not have happened yet
 		if (dqf & DSF_ARMED) {
 			_dispatch_timers_unregister(ds->ds_timer_refs);
+			_dispatch_release_2(ds);
 		}
 		dr->du_ident = DISPATCH_TIMER_IDENT_CANCELED;
 	} else {
@@ -555,7 +556,7 @@
 	}
 	ds->ds_is_installed = true;
 	_dispatch_debug("kevent-source[%p]: disarmed kevent[%p]", ds, dr);
-	_dispatch_release(ds); // the retain is done at creation time
+	_dispatch_release_tailcall(ds); // the retain is done at creation time
 }
 
 DISPATCH_ALWAYS_INLINE
@@ -579,7 +580,7 @@
 {
 	dispatch_source_refs_t dr = ds->ds_refs;
 	if (dr->du_is_timer) {
-		_dispatch_timers_update(dr);
+		_dispatch_timers_update(dr, 0);
 		return true;
 	}
 	if (unlikely(!_dispatch_source_tryarm(ds))) {
@@ -591,14 +592,17 @@
 }
 
 void
-_dispatch_source_refs_register(dispatch_source_t ds, dispatch_priority_t pri)
+_dispatch_source_refs_register(dispatch_source_t ds, dispatch_wlh_t wlh,
+		dispatch_priority_t pri)
 {
 	dispatch_source_refs_t dr = ds->ds_refs;
+	dispatch_priority_t kbp;
 
 	dispatch_assert(!ds->ds_is_installed);
 
 	if (dr->du_is_timer) {
-		dispatch_priority_t kbp = _dispatch_source_compute_kevent_priority(ds);
+		dispatch_queue_t dq = ds->_as_dq;
+		kbp = _dispatch_queue_compute_priority_and_wlh(dq, NULL);
 		// aggressively coalesce background/maintenance QoS timers
 		// <rdar://problem/12200216&27342536>
 		if (_dispatch_qos_is_background(_dispatch_priority_qos(kbp))) {
@@ -609,12 +613,12 @@
 				dr->du_ident = _dispatch_source_timer_idx(dr);
 			}
 		}
-		_dispatch_timers_update(dr);
+		_dispatch_timers_update(dr, 0);
 		return;
 	}
 
 	if (unlikely(!_dispatch_source_tryarm(ds) ||
-			!_dispatch_unote_register(dr, ds->dq_wlh, pri))) {
+			!_dispatch_unote_register(dr, wlh, pri))) {
 		_dispatch_queue_atomic_flags_set_and_clear(ds->_as_dq, DSF_DELETED,
 				DSF_ARMED | DSF_DEFERRED_DELETE);
 	} else {
@@ -634,65 +638,22 @@
 	}
 }
 
-dispatch_priority_t
-_dispatch_source_compute_kevent_priority(dispatch_source_t ds)
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_source_install(dispatch_source_t ds, dispatch_wlh_t wlh,
+		dispatch_priority_t pri)
 {
-	dispatch_priority_t p = ds->dq_priority & DISPATCH_PRIORITY_REQUESTED_MASK;
-	dispatch_queue_t tq = ds->do_targetq;
-	dispatch_priority_t tqp = tq->dq_priority & DISPATCH_PRIORITY_REQUESTED_MASK;
-
-	while (unlikely(!dx_hastypeflag(tq, QUEUE_ROOT))) {
-		if (unlikely(tq == &_dispatch_mgr_q)) {
-			return DISPATCH_PRIORITY_FLAG_MANAGER;
-		}
-		if (unlikely(_dispatch_queue_is_thread_bound(tq))) {
-			// thread-bound hierarchies are weird, we need to install
-			// from the context of the thread this hierarchy is bound to
-			return 0;
-		}
-		if (unlikely(DISPATCH_QUEUE_IS_SUSPENDED(tq))) {
-			// this queue may not be activated yet, so the queue graph may not
-			// have stabilized yet
-			_dispatch_ktrace1(DISPATCH_PERF_delayed_registration, ds);
-			return 0;
-		}
-		if (unlikely(_dispatch_queue_is_legacy(tq))) {
-			if (!_dispatch_is_in_root_queues_array(tq->do_targetq)) {
-				// we're not allowed to dereference tq->do_targetq
-				_dispatch_ktrace1(DISPATCH_PERF_delayed_registration, ds);
-				return 0;
-			}
-		}
-		if (!(tq->dq_priority & DISPATCH_PRIORITY_FLAG_INHERIT)) {
-			if (p < tqp) p = tqp;
-		}
-		tq = tq->do_targetq;
-		tqp = tq->dq_priority & DISPATCH_PRIORITY_REQUESTED_MASK;
-	}
-
-	if (unlikely(!tqp)) {
-		// pthread root queues opt out of QoS
-		return 0;
-	}
-	return _dispatch_priority_inherit_from_root_queue(p, tq);
-}
-
-static void
-_dispatch_source_install(dispatch_source_t ds, dispatch_priority_t pri,
-		dispatch_wlh_t wlh)
-{
-	if (!ds->dq_wlh && wlh) {
-		_dispatch_queue_class_record_wlh_hierarchy(ds, wlh);
-	}
-	_dispatch_source_refs_register(ds, pri);
+	_dispatch_source_refs_register(ds, wlh, pri);
 	ds->ds_is_installed = true;
 }
 
 void
-_dispatch_source_finalize_activation(dispatch_source_t ds)
+_dispatch_source_finalize_activation(dispatch_source_t ds, bool *allow_resume)
 {
 	dispatch_continuation_t dc;
 	dispatch_source_refs_t dr = ds->ds_refs;
+	dispatch_priority_t pri;
+	dispatch_wlh_t wlh;
 
 	if (unlikely(dr->du_is_direct &&
 			(_dispatch_queue_atomic_flags(ds->_as_dq) & DSF_CANCELED))) {
@@ -712,15 +673,12 @@
 	}
 
 	// call "super"
-	_dispatch_queue_finalize_activation(ds->_as_dq);
+	_dispatch_queue_finalize_activation(ds->_as_dq, allow_resume);
 
 	if (dr->du_is_direct && !ds->ds_is_installed) {
-		dispatch_priority_t pri = _dispatch_source_compute_kevent_priority(ds);
-		if (pri) {
-			dispatch_wlh_t wlh = ds->dq_wlh;
-			if (!wlh) wlh = _dispatch_queue_class_compute_wlh(ds);
-			_dispatch_source_install(ds, pri, wlh);
-		}
+		dispatch_queue_t dq = ds->_as_dq;
+		pri = _dispatch_queue_compute_priority_and_wlh(dq, &wlh);
+		if (pri) _dispatch_source_install(ds, wlh, pri);
 	}
 }
 
@@ -732,8 +690,18 @@
 	dispatch_source_t ds = dou._ds;
 	dispatch_queue_wakeup_target_t retq = DISPATCH_QUEUE_WAKEUP_NONE;
 	dispatch_queue_t dq = _dispatch_queue_get_current();
+	dispatch_source_refs_t dr = ds->ds_refs;
+	dispatch_queue_flags_t dqf;
 
-	flags |= DISPATCH_INVOKE_DISALLOW_SYNC_WAITERS;
+	if (!(flags & DISPATCH_INVOKE_MANAGER_DRAIN) &&
+			_dispatch_unote_wlh_changed(dr, _dispatch_get_wlh())) {
+		dqf = _dispatch_queue_atomic_flags_set_orig(ds->_as_dq,
+				DSF_WLH_CHANGED);
+		if (!(dqf & DSF_WLH_CHANGED)) {
+			_dispatch_bug_deprecated("Changing target queue "
+					"hierarchy after source was activated");
+		}
+	}
 
 	if (_dispatch_queue_class_probe(ds)) {
 		// Intentionally always drain even when on the manager queue
@@ -751,9 +719,7 @@
 
 	// The order of tests here in invoke and in wakeup should be consistent.
 
-	dispatch_source_refs_t dr = ds->ds_refs;
 	dispatch_queue_t dkq = &_dispatch_mgr_q;
-	dispatch_queue_flags_t dqf;
 	bool prevent_starvation = false;
 
 	if (dr->du_is_direct) {
@@ -777,8 +743,8 @@
 		if (dq != dkq) {
 			return dkq;
 		}
-		_dispatch_source_install(ds, _dispatch_get_basepri(),
-				_dispatch_get_wlh());
+		_dispatch_source_install(ds, _dispatch_get_wlh(),
+				_dispatch_get_basepri());
 	}
 
 	if (unlikely(DISPATCH_QUEUE_IS_SUSPENDED(ds))) {
@@ -885,7 +851,7 @@
 			// from the source handler
 			return ds->do_targetq;
 		}
-		if (prevent_starvation && dr->du_wlh == DISPATCH_WLH_GLOBAL) {
+		if (prevent_starvation && dr->du_wlh == DISPATCH_WLH_ANON) {
 			// keep the old behavior to force re-enqueue to our target queue
 			// for the rearm.
 			//
@@ -897,7 +863,7 @@
 		if (unlikely(!_dispatch_source_refs_resume(ds))) {
 			goto unregister_event;
 		}
-		if (!prevent_starvation && dr->du_wlh != DISPATCH_WLH_GLOBAL) {
+		if (!prevent_starvation && _dispatch_wlh_should_poll_unote(dr)) {
 			// try to redrive the drain from under the lock for sources
 			// targeting an overcommit root queue to avoid parking
 			// when the next event has already fired
@@ -913,7 +879,8 @@
 _dispatch_source_invoke(dispatch_source_t ds, dispatch_invoke_context_t dic,
 		dispatch_invoke_flags_t flags)
 {
-	_dispatch_queue_class_invoke(ds, dic, flags, _dispatch_source_invoke2);
+	_dispatch_queue_class_invoke(ds, dic, flags,
+			DISPATCH_INVOKE_DISALLOW_SYNC_WAITERS, _dispatch_source_invoke2);
 }
 
 void
@@ -978,13 +945,12 @@
 		tq = DISPATCH_QUEUE_WAKEUP_TARGET;
 	}
 
-	if (tq) {
-		return _dispatch_queue_class_wakeup(ds->_as_dq, qos, flags, tq);
-	} else if (qos) {
-		return _dispatch_queue_class_override_drainer(ds->_as_dq, qos, flags);
-	} else if (flags & DISPATCH_WAKEUP_CONSUME) {
-		return _dispatch_release_tailcall(ds);
+	if ((tq == DISPATCH_QUEUE_WAKEUP_TARGET) &&
+			ds->do_targetq == &_dispatch_mgr_q) {
+		tq = DISPATCH_QUEUE_WAKEUP_MGR;
 	}
+
+	return _dispatch_queue_class_wakeup(ds->_as_dq, qos, flags, tq);
 }
 
 void
@@ -995,13 +961,13 @@
 	// could potentially invoke the source, do the cancellation,
 	// unregister the source, and deallocate it. We would
 	// need to therefore retain/release before setting the bit
-	_dispatch_retain(ds);
+	_dispatch_retain_2(ds);
 
 	dispatch_queue_t q = ds->_as_dq;
 	if (_dispatch_queue_atomic_flags_set_orig(q, DSF_CANCELED) & DSF_CANCELED) {
-		_dispatch_release_tailcall(ds);
+		_dispatch_release_2_tailcall(ds);
 	} else {
-		dx_wakeup(ds, 0, DISPATCH_WAKEUP_FLUSH | DISPATCH_WAKEUP_CONSUME);
+		dx_wakeup(ds, 0, DISPATCH_WAKEUP_MAKE_DIRTY | DISPATCH_WAKEUP_CONSUME_2);
 	}
 }
 
@@ -1036,13 +1002,12 @@
 		return;
 	}
 	if (dqf & DSF_CANCEL_WAITER) {
-		goto override;
+		goto wakeup;
 	}
 
 	// simplified version of _dispatch_queue_drain_try_lock
 	// that also sets the DIRTY bit on failure to lock
-	dispatch_lock_owner tid_self = _dispatch_tid_self();
-	uint64_t xor_owner_and_set_full_width = tid_self |
+	uint64_t set_owner_and_set_full_width = _dispatch_lock_value_for_self() |
 			DISPATCH_QUEUE_WIDTH_FULL_BIT | DISPATCH_QUEUE_IN_BARRIER;
 	uint64_t old_state, new_state;
 
@@ -1051,7 +1016,7 @@
 		if (likely(_dq_state_is_runnable(old_state) &&
 				!_dq_state_drain_locked(old_state))) {
 			new_state &= DISPATCH_QUEUE_DRAIN_PRESERVED_BITS_MASK;
-			new_state ^= xor_owner_and_set_full_width;
+			new_state |= set_owner_and_set_full_width;
 		} else if (old_dqf & DSF_CANCELED) {
 			os_atomic_rmw_loop_give_up(break);
 		} else {
@@ -1081,15 +1046,15 @@
 				_dispatch_source_cancel_callout(ds, NULL, DISPATCH_INVOKE_NONE);
 			}
 		}
-		_dispatch_try_lock_transfer_or_wakeup(ds->_as_dq);
-	} else if (unlikely(_dq_state_drain_locked_by(old_state, tid_self))) {
+		dx_wakeup(ds, 0, DISPATCH_WAKEUP_BARRIER_COMPLETE);
+	} else if (unlikely(_dq_state_drain_locked_by_self(old_state))) {
 		DISPATCH_CLIENT_CRASH(ds, "dispatch_source_cancel_and_wait "
 				"called from a source handler");
 	} else {
 		dispatch_qos_t qos;
-override:
+wakeup:
 		qos = _dispatch_qos_from_pp(_dispatch_get_priority());
-		dx_wakeup(ds, qos, DISPATCH_WAKEUP_OVERRIDING | DISPATCH_WAKEUP_FLUSH);
+		dx_wakeup(ds, qos, DISPATCH_WAKEUP_MAKE_DIRTY);
 		dispatch_activate(ds);
 	}
 
@@ -1121,8 +1086,8 @@
 		// threads running _dispatch_source_invoke2 to dispose of the source,
 		// so we can't safely borrow the reference we get from the muxnote udata
 		// anymore, and need our own
-		wflags = DISPATCH_WAKEUP_CONSUME;
-		_dispatch_retain(ds); // rdar://20382435
+		wflags = DISPATCH_WAKEUP_CONSUME_2;
+		_dispatch_retain_2(ds); // rdar://20382435
 	}
 
 	if ((flags & EV_UDATA_SPECIFIC) && (flags & EV_ONESHOT) &&
@@ -1188,7 +1153,7 @@
 
 done:
 	_dispatch_object_debug(ds, "%s", __func__);
-	dx_wakeup(ds, _dispatch_qos_from_pp(pp), wflags | DISPATCH_WAKEUP_FLUSH);
+	dx_wakeup(ds, _dispatch_qos_from_pp(pp), wflags | DISPATCH_WAKEUP_MAKE_DIRTY);
 }
 
 #pragma mark -
@@ -1267,7 +1232,7 @@
 		// Clear any pending data that might have accumulated on
 		// older timer params <rdar://problem/8574886>
 		os_atomic_store2o(ds, ds_pending_data, 0, relaxed);
-		_dispatch_timers_update(dt);
+		_dispatch_timers_update(dt, 0);
 	}
 }
 
@@ -1343,7 +1308,7 @@
 	_dispatch_source_timer_telemetry(ds, dtc->dtc_clock, &dtc->dtc_timer);
 	dtc = os_atomic_xchg2o(dt, dt_pending_config, dtc, release);
 	if (dtc) free(dtc);
-	dx_wakeup(ds, 0, DISPATCH_WAKEUP_FLUSH);
+	dx_wakeup(ds, 0, DISPATCH_WAKEUP_MAKE_DIRTY);
 }
 
 static void
@@ -1796,6 +1761,9 @@
 {
 	uint32_t idx = (dth->dth_count += DTH_ID_COUNT) - DTH_ID_COUNT;
 
+	dispatch_assert(dt->dt_heap_entry[DTH_TARGET_ID] == DTH_INVALID_ID);
+	dispatch_assert(dt->dt_heap_entry[DTH_DEADLINE_ID] == DTH_INVALID_ID);
+
 	if (idx == 0) {
 		dt->dt_heap_entry[DTH_TARGET_ID] = DTH_TARGET_ID;
 		dt->dt_heap_entry[DTH_DEADLINE_ID] = DTH_DEADLINE_ID;
@@ -1814,27 +1782,36 @@
 DISPATCH_NOINLINE
 static void
 _dispatch_timer_heap_remove(dispatch_timer_heap_t dth,
-		dispatch_timer_source_refs_t removed_dt)
+		dispatch_timer_source_refs_t dt)
 {
 	uint32_t idx = (dth->dth_count -= DTH_ID_COUNT);
 
+	dispatch_assert(dt->dt_heap_entry[DTH_TARGET_ID] != DTH_INVALID_ID);
+	dispatch_assert(dt->dt_heap_entry[DTH_DEADLINE_ID] != DTH_INVALID_ID);
+
 	if (idx == 0) {
+		dispatch_assert(dth->dth_min[DTH_TARGET_ID] == dt);
+		dispatch_assert(dth->dth_min[DTH_DEADLINE_ID] == dt);
 		dth->dth_min[DTH_TARGET_ID] = dth->dth_min[DTH_DEADLINE_ID] = NULL;
-		return;
+		goto clear_heap_entry;
 	}
 
 	for (uint32_t heap_id = 0; heap_id < DTH_ID_COUNT; heap_id++) {
-		dispatch_timer_source_refs_t *slot, dt;
+		dispatch_timer_source_refs_t *slot, last_dt;
 		slot = _dispatch_timer_heap_get_slot(dth, idx + heap_id);
-		dt = *slot; *slot = NULL;
-		if (dt != removed_dt) {
-			uint32_t removed_idx = removed_dt->dt_heap_entry[heap_id];
-			_dispatch_timer_heap_resift(dth, dt, removed_idx);
+		last_dt = *slot; *slot = NULL;
+		if (last_dt != dt) {
+			uint32_t removed_idx = dt->dt_heap_entry[heap_id];
+			_dispatch_timer_heap_resift(dth, last_dt, removed_idx);
 		}
 	}
 	if (unlikely(idx <= _dispatch_timer_heap_capacity(dth->dth_segments - 1))) {
 		_dispatch_timer_heap_shrink(dth);
 	}
+
+clear_heap_entry:
+	dt->dt_heap_entry[DTH_TARGET_ID] = DTH_INVALID_ID;
+	dt->dt_heap_entry[DTH_DEADLINE_ID] = DTH_INVALID_ID;
 }
 
 DISPATCH_ALWAYS_INLINE
@@ -1842,6 +1819,9 @@
 _dispatch_timer_heap_update(dispatch_timer_heap_t dth,
 		dispatch_timer_source_refs_t dt)
 {
+	dispatch_assert(dt->dt_heap_entry[DTH_TARGET_ID] != DTH_INVALID_ID);
+	dispatch_assert(dt->dt_heap_entry[DTH_DEADLINE_ID] != DTH_INVALID_ID);
+
 	_dispatch_timer_heap_resift(dth, dt, dt->dt_heap_entry[DTH_TARGET_ID]);
 	_dispatch_timer_heap_resift(dth, dt, dt->dt_heap_entry[DTH_DEADLINE_ID]);
 }
@@ -1886,6 +1866,7 @@
 	_dispatch_timer_heap_remove(heap, dt);
 	_dispatch_timers_reconfigure = true;
 	_dispatch_timers_processing_mask |= 1 << tidx;
+	dispatch_assert(dt->du_wlh == NULL || dt->du_wlh == DISPATCH_WLH_ANON);
 	dt->du_wlh = NULL;
 }
 
@@ -1902,7 +1883,8 @@
 	}
 	_dispatch_timers_reconfigure = true;
 	_dispatch_timers_processing_mask |= 1 << tidx;
-	dt->du_wlh = DISPATCH_WLH_GLOBAL;
+	dispatch_assert(dt->du_wlh == NULL || dt->du_wlh == DISPATCH_WLH_ANON);
+	dt->du_wlh = DISPATCH_WLH_ANON;
 }
 
 DISPATCH_ALWAYS_INLINE
@@ -1922,7 +1904,7 @@
 // Updates the ordered list of timers based on next fire date for changes to ds.
 // Should only be called from the context of _dispatch_mgr_q.
 static void
-_dispatch_timers_update(dispatch_unote_t du)
+_dispatch_timers_update(dispatch_unote_t du, uint32_t flags)
 {
 	dispatch_timer_source_refs_t dr = du._dt;
 	dispatch_source_t ds = _dispatch_source_from_refs(dr);
@@ -1932,26 +1914,47 @@
 	DISPATCH_ASSERT_ON_MANAGER_QUEUE();
 
 	if (unlikely(dr->du_ident == DISPATCH_TIMER_IDENT_CANCELED)) {
+		dispatch_assert((flags & DISPATCH_TIMERS_RETAIN_2) == 0);
 		return;
 	}
 
 	// Unregister timers that are unconfigured, disabled, suspended or have
 	// missed intervals. Rearm after dispatch_set_timer(), resume or source
 	// invoke will reenable them
-	will_register = dr->dt_timer.target < INT64_MAX &&
+	will_register = !(flags & DISPATCH_TIMERS_UNREGISTER) &&
+			dr->dt_timer.target < INT64_MAX &&
 			!os_atomic_load2o(ds, ds_pending_data, relaxed) &&
 			!DISPATCH_QUEUE_IS_SUSPENDED(ds) &&
 			!os_atomic_load2o(dr, dt_pending_config, relaxed);
-	if (!_dispatch_unote_registered(dr) && will_register) {
-		if (unlikely(!_dispatch_source_timer_tryarm(ds))) {
+	if (likely(!_dispatch_unote_registered(dr))) {
+		dispatch_assert((flags & DISPATCH_TIMERS_RETAIN_2) == 0);
+		if (unlikely(!will_register || !_dispatch_source_timer_tryarm(ds))) {
 			return;
 		}
 		verb = "armed";
-	} else if (unlikely(_dispatch_unote_registered(dr) && !will_register)) {
+	} else if (unlikely(!will_register)) {
 		disarm = true;
 		verb = "disarmed";
 	}
 
+	// The heap owns a +2 on dispatch sources it references
+	//
+	// _dispatch_timers_run2() also sometimes passes DISPATCH_TIMERS_RETAIN_2
+	// when it wants to take over this +2 at the same time we are unregistering
+	// the timer from the heap.
+	//
+	// Compute our refcount balance according to these rules, if our balance
+	// would become negative we retain the source upfront, if it is positive, we
+	// get rid of the extraneous refcounts after we're done touching the source.
+	int refs = will_register ? -2 : 0;
+	if (_dispatch_unote_registered(dr) && !(flags & DISPATCH_TIMERS_RETAIN_2)) {
+		refs += 2;
+	}
+	if (refs < 0) {
+		dispatch_assert(refs == -2);
+		_dispatch_retain_2(ds);
+	}
+
 	uint32_t tidx = _dispatch_source_timer_idx(dr);
 	if (unlikely(_dispatch_unote_registered(dr) &&
 			(!will_register || dr->du_ident != tidx))) {
@@ -1966,6 +1969,10 @@
 	}
 	_dispatch_debug("kevent-source[%p]: %s timer[%p]", ds, verb, dr);
 	_dispatch_object_debug(ds, "%s", __func__);
+	if (refs > 0) {
+		dispatch_assert(refs == 2);
+		_dispatch_release_2_tailcall(ds);
+	}
 }
 
 #define DISPATCH_TIMER_MISSED_MARKER  1ul
@@ -2058,21 +2065,19 @@
 			continue;
 		}
 
-		_dispatch_retain(ds);
 		data = os_atomic_load2o(ds, ds_pending_data, relaxed);
 		if (unlikely(data)) {
 			// the release barrier is required to make the changes
 			// to `ds_timer` visible to _dispatch_source_timer_data()
 			if (os_atomic_cmpxchg2o(ds, ds_pending_data, data,
 					data | DISPATCH_TIMER_MISSED_MARKER, release)) {
-				_dispatch_timers_update(dr);
-				_dispatch_release(ds);
+				_dispatch_timers_update(dr, DISPATCH_TIMERS_UNREGISTER);
 				continue;
 			}
 		}
 
 		data = _dispatch_source_timer_compute_missed(dr, now, 0);
-		_dispatch_timers_update(dr);
+		_dispatch_timers_update(dr, DISPATCH_TIMERS_RETAIN_2);
 		pending_data = data << 1;
 		if (!_dispatch_unote_registered(dr) && dr->dt_timer.target < INT64_MAX){
 			// if we unregistered because of suspension we have to fake we
@@ -2085,7 +2090,7 @@
 		_dispatch_trace_timer_fire(dr, data, data);
 		_dispatch_debug("kevent-source[%p]: fired timer[%p]", ds, dr);
 		_dispatch_object_debug(ds, "%s", __func__);
-		dx_wakeup(ds, 0, DISPATCH_WAKEUP_FLUSH | DISPATCH_WAKEUP_CONSUME);
+		dx_wakeup(ds, 0, DISPATCH_WAKEUP_MAKE_DIRTY | DISPATCH_WAKEUP_CONSUME_2);
 	}
 }
 
@@ -2254,55 +2259,46 @@
 #pragma mark dispatch_mgr
 
 void
-_dispatch_mgr_queue_wakeup(dispatch_queue_t dq,
-		dispatch_qos_t qos, dispatch_wakeup_flags_t flags)
+_dispatch_mgr_queue_push(dispatch_queue_t dq, dispatch_object_t dou,
+		DISPATCH_UNUSED dispatch_qos_t qos)
 {
-	if (flags & DISPATCH_WAKEUP_FLUSH) {
-		os_atomic_or2o(dq, dq_state, DISPATCH_QUEUE_DIRTY, release);
+	uint64_t dq_state;
+	_dispatch_trace_continuation_push(dq, dou._do);
+	if (unlikely(_dispatch_queue_push_update_tail(dq, dou._do))) {
+		_dispatch_queue_push_update_head(dq, dou._do);
+		dq_state = os_atomic_or2o(dq, dq_state, DISPATCH_QUEUE_DIRTY, release);
+		if (!_dq_state_drain_locked_by_self(dq_state)) {
+			_dispatch_event_loop_poke(DISPATCH_WLH_MANAGER, 0, 0);
+		}
 	}
+}
 
-	if (_dispatch_queue_get_current() == &_dispatch_mgr_q) {
-		return;
-	}
-
-	if (!_dispatch_queue_class_probe(&_dispatch_mgr_q)) {
-		return;
-	}
-
-	_dispatch_event_loop_poke(DISPATCH_WLH_MANAGER, qos, 0);
+DISPATCH_NORETURN
+void
+_dispatch_mgr_queue_wakeup(DISPATCH_UNUSED dispatch_queue_t dq,
+		DISPATCH_UNUSED dispatch_qos_t qos,
+		DISPATCH_UNUSED dispatch_wakeup_flags_t flags)
+{
+	DISPATCH_INTERNAL_CRASH(0, "Don't try to wake up or override the manager");
 }
 
 #if DISPATCH_USE_MGR_THREAD
-DISPATCH_NOINLINE
-static void
-_dispatch_mgr_init(void)
-{
-	uint64_t owned = DISPATCH_QUEUE_SERIAL_DRAIN_OWNED;
-	_dispatch_queue_set_current(&_dispatch_mgr_q);
-	if (_dispatch_queue_drain_try_lock(&_dispatch_mgr_q,
-			DISPATCH_INVOKE_STEALING, NULL) != owned) {
-		DISPATCH_INTERNAL_CRASH(0, "Locking the manager should not fail");
-	}
-	_dispatch_mgr_priority_init();
-	_dispatch_event_loop_init();
-}
-
 DISPATCH_NOINLINE DISPATCH_NORETURN
 static void
 _dispatch_mgr_invoke(void)
 {
-	dispatch_deferred_items_s ddi;
+#if DISPATCH_EVENT_BACKEND_KEVENT
+	dispatch_kevent_s evbuf[DISPATCH_DEFERRED_ITEMS_EVENT_COUNT];
+#endif
+	dispatch_deferred_items_s ddi = {
+#if DISPATCH_EVENT_BACKEND_KEVENT
+		.ddi_maxevents = DISPATCH_DEFERRED_ITEMS_EVENT_COUNT,
+		.ddi_eventlist = evbuf,
+#endif
+	};
 	bool poll;
 
-	ddi.ddi_stashed_pri = DISPATCH_PRIORITY_NOSTASH;
-	ddi.ddi_stashed_dq = NULL;
-	ddi.ddi_stashed_rq = NULL;
-#if DISPATCH_EVENT_BACKEND_KEVENT
-	ddi.ddi_nevents = 0;
-#endif
-	dispatch_assert(_dispatch_get_wlh() == DISPATCH_WLH_GLOBAL);
 	_dispatch_deferred_items_set(&ddi);
-
 	for (;;) {
 		_dispatch_mgr_queue_drain();
 		poll = _dispatch_mgr_timers();
@@ -2325,7 +2321,9 @@
 	}
 #endif
 #if DISPATCH_USE_MGR_THREAD
-	_dispatch_mgr_init();
+	_dispatch_queue_set_current(&_dispatch_mgr_q);
+	_dispatch_mgr_priority_init();
+	_dispatch_queue_mgr_lock(&_dispatch_mgr_q);
 	// never returns, so burn bridges behind us & clear stack 2k ahead
 	_dispatch_clear_stack(2048);
 	_dispatch_mgr_invoke();
@@ -2346,14 +2344,8 @@
 		dispatch_deferred_items_t ddi)
 {
 	dispatch_assert(wlh);
-	uint64_t owned = DISPATCH_QUEUE_SERIAL_DRAIN_OWNED;
 	dispatch_priority_t old_dbp;
 
-	ddi->ddi_stashed_pri = DISPATCH_PRIORITY_NOSTASH;
-	ddi->ddi_stashed_dq = NULL;
-	ddi->ddi_stashed_rq = NULL;
-	ddi->ddi_nevents = 0;
-
 	pthread_priority_t pp = _dispatch_get_priority();
 	if (!(pp & _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG)) {
 		// If this thread does not have the event manager flag set, don't setup
@@ -2363,7 +2355,7 @@
 		// Also add the NEEDS_UNBIND flag so that
 		// _dispatch_priority_compute_update knows it has to unbind
 		pp &= _PTHREAD_PRIORITY_OVERCOMMIT_FLAG | ~_PTHREAD_PRIORITY_FLAGS_MASK;
-		if (wlh == DISPATCH_WLH_GLOBAL) {
+		if (wlh == DISPATCH_WLH_ANON) {
 			pp |= _PTHREAD_PRIORITY_NEEDS_UNBIND_FLAG;
 		} else {
 			// pthread sets the flag when it is an event delivery thread
@@ -2372,9 +2364,10 @@
 		}
 		_dispatch_thread_setspecific(dispatch_priority_key,
 				(void *)(uintptr_t)pp);
-		ddi->ddi_stashed_pri = 0;
-		if (wlh != DISPATCH_WLH_GLOBAL) {
+		if (wlh != DISPATCH_WLH_ANON) {
 			_dispatch_debug("wlh[%p]: handling events", wlh);
+		} else {
+			ddi->ddi_can_stash = true;
 		}
 		return DISPATCH_KEVENT_WORKER_IS_NOT_MANAGER;
 	}
@@ -2404,15 +2397,7 @@
 	// ensure kevents registered from this thread are registered at manager QoS
 	old_dbp = _dispatch_set_basepri(DISPATCH_PRIORITY_FLAG_MANAGER);
 	_dispatch_queue_set_current(&_dispatch_mgr_q);
-	if (_dispatch_queue_drain_try_lock(&_dispatch_mgr_q,
-			DISPATCH_INVOKE_STEALING, NULL) != owned) {
-		DISPATCH_INTERNAL_CRASH(0, "Locking the manager should not fail");
-	}
-	static int event_thread_init;
-	if (!event_thread_init) {
-		event_thread_init = 1;
-		_dispatch_event_loop_init();
-	}
+	_dispatch_queue_mgr_lock(&_dispatch_mgr_q);
 	return old_dbp;
 }
 
@@ -2420,38 +2405,35 @@
 static inline bool
 _dispatch_wlh_worker_thread_reset(dispatch_priority_t old_dbp)
 {
-	dispatch_queue_t dq = &_dispatch_mgr_q;
-	uint64_t orig_dq_state = DISPATCH_QUEUE_SERIAL_DRAIN_OWNED;
-
-	orig_dq_state = _dispatch_queue_drain_unlock(dq, orig_dq_state);
+	bool needs_poll = _dispatch_queue_mgr_unlock(&_dispatch_mgr_q);
 	_dispatch_reset_basepri(old_dbp);
+	_dispatch_reset_basepri_override();
 	_dispatch_queue_set_current(NULL);
-	return _dq_state_is_dirty(orig_dq_state);
+	return needs_poll;
 }
 
 DISPATCH_ALWAYS_INLINE
 static void
-_dispatch_wlh_worker_thread(dispatch_wlh_t wlh, dispatch_kevent_t *events,
+_dispatch_wlh_worker_thread(dispatch_wlh_t wlh, dispatch_kevent_t events,
 		int *nevents)
 {
 	_dispatch_introspection_thread_add();
+	DISPATCH_PERF_MON_VAR_INIT
 
-	dispatch_kevent_t ke = *events;
-	DISPATCH_PERF_MON_VAR
-	int n = *nevents;
-	if (!dispatch_assume(n) || !dispatch_assume(*events)) return;
+	dispatch_deferred_items_s ddi = {
+		.ddi_eventlist = events,
+	};
+	dispatch_priority_t old_dbp;
 
-	dispatch_deferred_items_s ddi;
-	dispatch_priority_t old_dbp = _dispatch_wlh_worker_thread_init(wlh, &ddi);
+	old_dbp = _dispatch_wlh_worker_thread_init(wlh, &ddi);
 	if (old_dbp == DISPATCH_KEVENT_WORKER_IS_NOT_MANAGER) {
 		_dispatch_perfmon_start_impl(true);
 	} else {
-		dispatch_assert(wlh == DISPATCH_WLH_GLOBAL);
-		wlh = DISPATCH_WLH_GLOBAL;
+		dispatch_assert(wlh == DISPATCH_WLH_ANON);
+		wlh = DISPATCH_WLH_ANON;
 	}
-	_dispatch_set_wlh(wlh);
 	_dispatch_deferred_items_set(&ddi);
-	_dispatch_event_loop_merge(ke, n);
+	_dispatch_event_loop_merge(events, *nevents);
 
 	if (old_dbp != DISPATCH_KEVENT_WORKER_IS_NOT_MANAGER) {
 		_dispatch_mgr_queue_drain();
@@ -2460,34 +2442,27 @@
 			poll = true;
 		}
 		if (poll) _dispatch_event_loop_poke(DISPATCH_WLH_MANAGER, 0, 0);
-	} else if (ddi.ddi_stashed_dq) {
-		if (wlh == DISPATCH_WLH_GLOBAL) {
-			if (ddi.ddi_nevents) _dispatch_event_loop_update();
-			_dispatch_deferred_items_set(NULL);
-		} else {
-			ddi.ddi_stashed_pri = DISPATCH_PRIORITY_NOSTASH;
-		}
-
+	} else if (ddi.ddi_stashed_dou._do) {
 		_dispatch_debug("wlh[%p]: draining deferred item %p", wlh,
-				ddi.ddi_stashed_dq);
-		_dispatch_root_queue_drain_deferred_item(ddi.ddi_stashed_rq,
-				ddi.ddi_stashed_dq DISPATCH_PERF_MON_ARGS);
+				ddi.ddi_stashed_dou._do);
+		if (wlh == DISPATCH_WLH_ANON) {
+			dispatch_assert(ddi.ddi_nevents == 0);
+			_dispatch_deferred_items_set(NULL);
+			_dispatch_root_queue_drain_deferred_item(&ddi
+					DISPATCH_PERF_MON_ARGS);
+		} else {
+			_dispatch_root_queue_drain_deferred_wlh(&ddi
+					DISPATCH_PERF_MON_ARGS);
+		}
 	}
 
 	_dispatch_deferred_items_set(NULL);
-	_dispatch_reset_wlh();
-
-	if (ddi.ddi_nevents) {
-		_dispatch_debug("flushing %d deferred kevents", ddi.ddi_nevents);
-	}
-	*nevents = ddi.ddi_nevents;
-	dispatch_static_assert(__builtin_types_compatible_p(typeof(**events),
-			typeof(*ddi.ddi_eventlist)));
-	memcpy(*events, ddi.ddi_eventlist,
-		 (size_t)ddi.ddi_nevents * sizeof(*ddi.ddi_eventlist));
-	if (old_dbp == DISPATCH_KEVENT_WORKER_IS_NOT_MANAGER && !ddi.ddi_stashed_dq) {
+	if (old_dbp == DISPATCH_KEVENT_WORKER_IS_NOT_MANAGER &&
+			!ddi.ddi_stashed_dou._do) {
 		_dispatch_perfmon_end(perfmon_thread_event_no_steal);
 	}
+	_dispatch_debug("returning %d deferred kevents", ddi.ddi_nevents);
+	*nevents = ddi.ddi_nevents;
 }
 
 DISPATCH_NOINLINE
@@ -2498,7 +2473,10 @@
 		// events for worker thread request have already been delivered earlier
 		return;
 	}
-	return _dispatch_wlh_worker_thread(DISPATCH_WLH_GLOBAL, events, nevents);
+	if (!dispatch_assume(*nevents && *events)) return;
+	_dispatch_adopt_wlh_anon();
+	_dispatch_wlh_worker_thread(DISPATCH_WLH_ANON, *events, nevents);
+	_dispatch_reset_wlh();
 }
 
 
diff --git a/src/source_internal.h b/src/source_internal.h
index 2082274..55b81e7 100644
--- a/src/source_internal.h
+++ b/src/source_internal.h
@@ -98,13 +98,13 @@
 
 #endif // __cplusplus
 
-dispatch_priority_t
-_dispatch_source_compute_kevent_priority(dispatch_source_t ds);
-void _dispatch_source_refs_register(dispatch_source_t ds, dispatch_priority_t bp);
+void _dispatch_source_refs_register(dispatch_source_t ds,
+		dispatch_wlh_t wlh, dispatch_priority_t bp);
 void _dispatch_source_refs_unregister(dispatch_source_t ds, uint32_t options);
 void _dispatch_source_xref_dispose(dispatch_source_t ds);
-void _dispatch_source_dispose(dispatch_source_t ds);
-void _dispatch_source_finalize_activation(dispatch_source_t ds);
+void _dispatch_source_dispose(dispatch_source_t ds, bool *allow_free);
+void _dispatch_source_finalize_activation(dispatch_source_t ds,
+		bool *allow_resume);
 void _dispatch_source_invoke(dispatch_source_t ds,
 		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags);
 void _dispatch_source_wakeup(dispatch_source_t ds, dispatch_qos_t qos,
@@ -117,6 +117,8 @@
 void _dispatch_source_merge_data(dispatch_source_t ds, pthread_priority_t pp,
 		unsigned long val);
 
+void _dispatch_mgr_queue_push(dispatch_queue_t dq, dispatch_object_t dou,
+		dispatch_qos_t qos);
 void _dispatch_mgr_queue_wakeup(dispatch_queue_t dq, dispatch_qos_t qos,
 		dispatch_wakeup_flags_t flags);
 void _dispatch_mgr_thread(dispatch_queue_t dq, dispatch_invoke_context_t dic,
diff --git a/src/swift/DispatchStubs.cc b/src/swift/DispatchStubs.cc
index 2c76b7b..de309c7 100644
--- a/src/swift/DispatchStubs.cc
+++ b/src/swift/DispatchStubs.cc
@@ -173,12 +173,6 @@
   dispatch_retain(obj);
 }
 
-// DISPATCH_RUNTIME_STDLIB_INTERFACE
-// extern "C" dispatch_queue_t
-// _swift_apply_current_root_queue() {
-// 	return DISPATCH_APPLY_CURRENT_ROOT_QUEUE;
-// }
-
 #define SOURCE(t)                                                              \
   SWIFT_CC(swift)                                                              \
   DISPATCH_RUNTIME_STDLIB_INTERFACE extern "C" dispatch_source_type_t  \
diff --git a/src/swift/Queue.swift b/src/swift/Queue.swift
index b7628c9..1808f9b 100644
--- a/src/swift/Queue.swift
+++ b/src/swift/Queue.swift
@@ -324,10 +324,10 @@
 		return nil
 	}
 
-	public func setSpecific<T>(key: DispatchSpecificKey<T>, value: T) {
-		let v = _DispatchSpecificValue(value: value)
+	public func setSpecific<T>(key: DispatchSpecificKey<T>, value: T?) {
 		let k = Unmanaged.passUnretained(key).toOpaque()
-		let p = Unmanaged.passRetained(v).toOpaque()
+		let v = value.flatMap { _DispatchSpecificValue(value: $0) }
+		let p = v.flatMap { Unmanaged.passRetained($0).toOpaque() }
 		dispatch_queue_set_specific(self.__wrapped, k, p, _destructDispatchSpecificValue)
 	}
 }
@@ -344,8 +344,5 @@
 @_silgen_name("_swift_dispatch_get_main_queue")
 internal func _swift_dispatch_get_main_queue() -> dispatch_queue_t
 
-@_silgen_name("_swift_dispatch_apply_current_root_queue")
-internal func _swift_dispatch_apply_current_root_queue() -> dispatch_queue_t
-
 @_silgen_name("_swift_dispatch_apply_current")
 internal func _swift_dispatch_apply_current(_ iterations: Int, _ block: @convention(block) (Int) -> Void)
diff --git a/src/time.c b/src/time.c
index 6db4880..5b0bab0 100644
--- a/src/time.c
+++ b/src/time.c
@@ -20,30 +20,74 @@
 
 #include "internal.h"
 
-#if !(defined(__i386__) || defined(__x86_64__) || !HAVE_MACH_ABSOLUTE_TIME) \
-		|| TARGET_OS_WIN32
-DISPATCH_CACHELINE_ALIGN _dispatch_host_time_data_s _dispatch_host_time_data = {
-	.ratio_1_to_1 = true,
-};
+#if DISPATCH_USE_HOST_TIME
+typedef struct _dispatch_host_time_data_s {
+	long double frac;
+	bool ratio_1_to_1;
+} _dispatch_host_time_data_s;
+
+DISPATCH_CACHELINE_ALIGN
+static _dispatch_host_time_data_s _dispatch_host_time_data;
+
+uint64_t (*_dispatch_host_time_mach2nano)(uint64_t machtime);
+uint64_t (*_dispatch_host_time_nano2mach)(uint64_t nsec);
+
+static uint64_t
+_dispatch_mach_host_time_mach2nano(uint64_t machtime)
+{
+	_dispatch_host_time_data_s *const data = &_dispatch_host_time_data;
+
+	if (unlikely(!machtime || data->ratio_1_to_1)) {
+		return machtime;
+	}
+	if (machtime >= INT64_MAX) {
+		return INT64_MAX;
+	}
+	long double big_tmp = ((long double)machtime * data->frac) + .5L;
+	if (unlikely(big_tmp >= INT64_MAX)) {
+		return INT64_MAX;
+	}
+	return (uint64_t)big_tmp;
+}
+
+static uint64_t
+_dispatch_mach_host_time_nano2mach(uint64_t nsec)
+{
+	_dispatch_host_time_data_s *const data = &_dispatch_host_time_data;
+
+	if (unlikely(!nsec || data->ratio_1_to_1)) {
+		return nsec;
+	}
+	if (nsec >= INT64_MAX) {
+		return INT64_MAX;
+	}
+	long double big_tmp = ((long double)nsec / data->frac) + .5L;
+	if (unlikely(big_tmp >= INT64_MAX)) {
+		return INT64_MAX;
+	}
+	return (uint64_t)big_tmp;
+}
+
+static void
+_dispatch_host_time_init(mach_timebase_info_data_t *tbi)
+{
+	_dispatch_host_time_data.frac = tbi->numer;
+	_dispatch_host_time_data.frac /= tbi->denom;
+	_dispatch_host_time_data.ratio_1_to_1 = (tbi->numer == tbi->denom);
+	_dispatch_host_time_mach2nano = _dispatch_mach_host_time_mach2nano;
+	_dispatch_host_time_nano2mach = _dispatch_mach_host_time_nano2mach;
+}
+#endif // DISPATCH_USE_HOST_TIME
 
 void
-_dispatch_get_host_time_init(void *context DISPATCH_UNUSED)
+_dispatch_time_init(void)
 {
-#if !TARGET_OS_WIN32
+#if DISPATCH_USE_HOST_TIME
 	mach_timebase_info_data_t tbi;
 	(void)dispatch_assume_zero(mach_timebase_info(&tbi));
-	_dispatch_host_time_data.frac = tbi.numer;
-	_dispatch_host_time_data.frac /= tbi.denom;
-	_dispatch_host_time_data.ratio_1_to_1 = (tbi.numer == tbi.denom);
-#else
-	LARGE_INTEGER freq;
-	dispatch_assume(QueryPerformanceFrequency(&freq));
-	_dispatch_host_time_data.frac = (long double)NSEC_PER_SEC /
-			(long double)freq.QuadPart;
-	_dispatch_host_time_data.ratio_1_to_1 = (freq.QuadPart == 1);
-#endif	/* TARGET_OS_WIN32 */
+	_dispatch_host_time_init(&tbi);
+#endif // DISPATCH_USE_HOST_TIME
 }
-#endif
 
 dispatch_time_t
 dispatch_time(dispatch_time_t inval, int64_t delta)
diff --git a/src/voucher.c b/src/voucher.c
index 9f97b7a..5beadf0 100644
--- a/src/voucher.c
+++ b/src/voucher.c
@@ -359,18 +359,11 @@
 #define _voucher_mach_recipe_size(payload_size) \
 	(sizeof(mach_voucher_attr_recipe_data_t) + (payload_size))
 
-#if VOUCHER_USE_MACH_VOUCHER_PRIORITY
 #define _voucher_mach_recipe_alloca(v) ((mach_voucher_attr_recipe_t)alloca(\
 		_voucher_mach_recipe_size(0) + \
 		_voucher_mach_recipe_size(sizeof(ipc_pthread_priority_value_t)) + \
 		_voucher_mach_recipe_size(sizeof(_voucher_mach_udata_s)) + \
 		_voucher_extra_size(v)))
-#else
-#define _voucher_mach_recipe_alloca(v) ((mach_voucher_attr_recipe_t)alloca(\
-		_voucher_mach_recipe_size(0) + \
-		_voucher_mach_recipe_size(sizeof(_voucher_mach_udata_s)) + \
-		_voucher_extra_size(v)))
-#endif
 
 DISPATCH_ALWAYS_INLINE
 static inline mach_voucher_attr_recipe_size_t
@@ -391,7 +384,6 @@
 	};
 	size += _voucher_mach_recipe_size(0);
 
-#if VOUCHER_USE_MACH_VOUCHER_PRIORITY
 	if (pp) {
 		ipc_pthread_priority_value_t value = (ipc_pthread_priority_value_t)pp;
 		*mvar_buf++ = (mach_voucher_attr_recipe_data_t){
@@ -402,7 +394,6 @@
 		mvar_buf = _dispatch_memappend(mvar_buf, &value);
 		size += _voucher_mach_recipe_size(sizeof(value));
 	}
-#endif // VOUCHER_USE_MACH_VOUCHER_PRIORITY
 
 	if ((v && v->v_activity) || pp) {
 		_voucher_mach_udata_s *udata_buf;
@@ -517,29 +508,6 @@
 	mach_voucher_attr_recipe_size_t kvr_size = 0;
 	mach_voucher_attr_content_size_t udata_sz = 0;
 	_voucher_mach_udata_s *udata = NULL;
-#if !VOUCHER_USE_BANK_AUTOREDEEM
-	mach_voucher_t rkv;
-	const mach_voucher_attr_recipe_data_t redeem_recipe[] = {
-		[0] = {
-			.key = MACH_VOUCHER_ATTR_KEY_ALL,
-			.command = MACH_VOUCHER_ATTR_COPY,
-			.previous_voucher = kv,
-		},
-		[1] = {
-			.key = MACH_VOUCHER_ATTR_KEY_BANK,
-			.command = MACH_VOUCHER_ATTR_REDEEM,
-		},
-	};
-	kr = _voucher_create_mach_voucher(redeem_recipe, sizeof(redeem_recipe),
-			&rkv);
-	if (!dispatch_assume_zero(kr)) {
-		_voucher_dealloc_mach_voucher(kv);
-		_dispatch_kvoucher_debug("redeemed from 0x%08x", rkv, kv);
-		kv = rkv;
-	} else {
-		_dispatch_voucher_debug_machport(kv);
-	}
-#endif
 	voucher_t v = _voucher_find_and_retain(kv);
 	if (v) {
 		_dispatch_voucher_debug("kvoucher[0x%08x] found", v, kv);
@@ -594,15 +562,12 @@
 				.key = MACH_VOUCHER_ATTR_KEY_USER_DATA,
 				.command = MACH_VOUCHER_ATTR_REMOVE,
 			},
-#if VOUCHER_USE_MACH_VOUCHER_PRIORITY
 			[2] = {
 				.key = MACH_VOUCHER_ATTR_KEY_PTHPRIORITY,
 				.command = MACH_VOUCHER_ATTR_REMOVE,
 			},
-#endif
 		};
 		mach_voucher_attr_recipe_size_t size = sizeof(remove_userdata_recipe);
-
 		kr = _voucher_create_mach_voucher(remove_userdata_recipe, size, &nkv);
 		if (!dispatch_assume_zero(kr)) {
 			_dispatch_voucher_debug("kvoucher[0x%08x] udata removal "
@@ -803,7 +768,7 @@
 {
 	_dispatch_voucher_debug("xref_dispose", voucher);
 	_voucher_remove(voucher);
-	return _os_object_release_internal_inline((_os_object_t)voucher);
+	return _os_object_release_internal_n_inline((_os_object_t)voucher, 1);
 }
 
 void
@@ -869,6 +834,7 @@
 	if (dbgp) {
 		dm = dispatch_mach_create_f("com.apple.debug-channel",
 				DISPATCH_TARGET_QUEUE_DEFAULT, NULL, handler);
+		dm->dm_recv_refs->du_can_be_wlh = false; // 29906118
 		dispatch_mach_connect(dm, dbgp, MACH_PORT_NULL, NULL);
 		// will force the DISPATCH_MACH_CONNECTED event
 		dispatch_mach_send_barrier_f(dm, NULL,
@@ -1125,7 +1091,13 @@
 	info_size = proc_pidinfo(getpid(), PROC_PIDUNIQIDENTIFIERINFO, 1,
 			&p_uniqinfo, PROC_PIDUNIQIDENTIFIERINFO_SIZE);
 	if (slowpath(info_size != PROC_PIDUNIQIDENTIFIERINFO_SIZE)) {
-		DISPATCH_INTERNAL_CRASH(info_size, "Unable to get the unique pid");
+		if (info_size == 0) {
+			DISPATCH_INTERNAL_CRASH(errno,
+				"Unable to get the unique pid (error)");
+		} else {
+			DISPATCH_INTERNAL_CRASH(info_size,
+				"Unable to get the unique pid (size)");
+		}
 	}
 	_voucher_unique_pid = p_uniqinfo.p_uniqueid;
 
@@ -1457,7 +1429,7 @@
 	size_t offset = 0;
 	#define bufprintf(...) \
 			offset += dsnprintf(&buf[offset], bufsiz - offset, ##__VA_ARGS__)
-	bufprintf("voucher[%p] = { xrefcnt = 0x%x, refcnt = 0x%x", v,
+	bufprintf("voucher[%p] = { xref = %d, ref = %d", v,
 			v->os_obj_xref_cnt + 1, v->os_obj_ref_cnt + 1);
 
 	if (v->v_kvbase) {
diff --git a/src/voucher_internal.h b/src/voucher_internal.h
index d16fc8a..a0ddd4d 100644
--- a/src/voucher_internal.h
+++ b/src/voucher_internal.h
@@ -123,9 +123,7 @@
 #define DISPATCH_VOUCHER_ACTIVITY_DEBUG 1
 #endif
 
-#if VOUCHER_USE_MACH_VOUCHER_PRIORITY
 #include <voucher/ipc_pthread_priority_types.h>
-#endif
 
 typedef uint32_t _voucher_magic_t;
 typedef uint32_t _voucher_priority_t;
@@ -264,6 +262,14 @@
 #define _dispatch_voucher_debug_machport(name) ((void)(name))
 #endif
 
+#ifndef DISPATCH_VOUCHER_OBJC_DEBUG
+#if DISPATCH_INTROSPECTION || DISPATCH_DEBUG
+#define DISPATCH_VOUCHER_OBJC_DEBUG 1
+#else
+#define DISPATCH_VOUCHER_OBJC_DEBUG 0
+#endif
+#endif // DISPATCH_VOUCHER_OBJC_DEBUG
+
 #if DISPATCH_PURE_C
 
 DISPATCH_ALWAYS_INLINE
diff --git a/xcodeconfig/libdispatch-dyld-stub.xcconfig b/xcodeconfig/libdispatch-dyld-stub.xcconfig
index aabda62..dd1814d 100644
--- a/xcodeconfig/libdispatch-dyld-stub.xcconfig
+++ b/xcodeconfig/libdispatch-dyld-stub.xcconfig
@@ -18,11 +18,11 @@
 // @APPLE_APACHE_LICENSE_HEADER_END@
 //
 
-OTHER_LDFLAGS =
-BUILD_VARIANTS = normal
-GCC_PREPROCESSOR_DEFINITIONS = $(inherited) DISPATCH_VARIANT_STATIC=1 DISPATCH_VARIANT_DYLD_STUB=1 USE_OBJC=0 DISPATCH_USE_DTRACE=0
 PRODUCT_NAME = libdispatch_dyld_stub
 INSTALL_PATH = /usr/local/lib/dyld_stub
-EXCLUDED_SOURCE_FILE_NAMES = *
-INCLUDED_SOURCE_FILE_NAMES = voucher.c // it's minimal with DISPATCH_VARIANT_DYLD_STUB
+BUILD_VARIANTS = normal
+GCC_PREPROCESSOR_DEFINITIONS = $(inherited) DISPATCH_VARIANT_DYLD_STUB=1 $(STATICLIB_PREPROCESSOR_DEFINITIONS)
+OTHER_LDFLAGS =
 VERSIONING_SYSTEM =
+EXCLUDED_SOURCE_FILE_NAMES = *
+INCLUDED_SOURCE_FILE_NAMES = voucher.c // minimal with DISPATCH_VARIANT_DYLD_STUB
diff --git a/xcodeconfig/libdispatch-mp-static.xcconfig b/xcodeconfig/libdispatch-mp-static.xcconfig
index 1f0eddc..af3715f 100644
--- a/xcodeconfig/libdispatch-mp-static.xcconfig
+++ b/xcodeconfig/libdispatch-mp-static.xcconfig
@@ -18,13 +18,12 @@
 // @APPLE_APACHE_LICENSE_HEADER_END@
 //
 
-OTHER_LDFLAGS =
-BUILD_VARIANTS = normal debug
-GCC_PREPROCESSOR_DEFINITIONS = $(inherited) DISPATCH_VARIANT_STATIC=1 USE_OBJC=0 DISPATCH_USE_DTRACE=0
-PRODUCT_NAME = libdispatch
-INSTALL_PATH = /usr/local/lib/system
-
 // skip simulator
 SUPPORTED_PLATFORMS = macosx iphoneos appletvos watchos
+PRODUCT_NAME = libdispatch
+INSTALL_PATH = /usr/local/lib/system
+BUILD_VARIANTS = normal debug
+GCC_PREPROCESSOR_DEFINITIONS = $(inherited) $(STATICLIB_PREPROCESSOR_DEFINITIONS)
+OTHER_LDFLAGS =
 SKIP_INSTALL[sdk=*simulator*] = YES
 EXCLUDED_SOURCE_FILE_NAMES[sdk=*simulator*] = *
diff --git a/xcodeconfig/libdispatch-resolved.xcconfig b/xcodeconfig/libdispatch-resolved.xcconfig
index a42add8..2f2e273 100644
--- a/xcodeconfig/libdispatch-resolved.xcconfig
+++ b/xcodeconfig/libdispatch-resolved.xcconfig
@@ -23,3 +23,4 @@
 OTHER_LDFLAGS =
 SKIP_INSTALL = YES
 VERSIONING_SYSTEM =
+EXCLUDED_SOURCE_FILE_NAMES = *
diff --git a/xcodeconfig/libdispatch-up-static.xcconfig b/xcodeconfig/libdispatch-up-static.xcconfig
index 0ece635..170c5b3 100644
--- a/xcodeconfig/libdispatch-up-static.xcconfig
+++ b/xcodeconfig/libdispatch-up-static.xcconfig
@@ -18,8 +18,11 @@
 // @APPLE_APACHE_LICENSE_HEADER_END@
 //
 
-OTHER_LDFLAGS =
+// skip simulator
+SUPPORTED_PLATFORMS = macosx iphoneos appletvos watchos
+PRODUCT_NAME = libdispatch_up
 BUILD_VARIANTS = normal
+GCC_PREPROCESSOR_DEFINITIONS = $(inherited) DISPATCH_HW_CONFIG_UP=1 $(STATICLIB_PREPROCESSOR_DEFINITIONS)
+OTHER_LDFLAGS =
 SKIP_INSTALL = YES
-EXCLUDED_SOURCE_FILE_NAMES = *
-GCC_PREPROCESSOR_DEFINITIONS = $(inherited) USE_OBJC=0 DISPATCH_USE_DTRACE=0
+EXCLUDED_SOURCE_FILE_NAMES[sdk=*simulator*] = *
diff --git a/xcodeconfig/libdispatch.order b/xcodeconfig/libdispatch.order
index 9642ca4..a25ecc9 100644
--- a/xcodeconfig/libdispatch.order
+++ b/xcodeconfig/libdispatch.order
@@ -71,18 +71,6 @@
 _OBJC_METACLASS_$_OS_dispatch_queue_serial
 _OBJC_METACLASS_$_OS_dispatch_queue_concurrent
 _OBJC_METACLASS_$_OS_dispatch_queue_root
-_OBJC_METACLASS_$_OS_dispatch_queue_main
-_OBJC_METACLASS_$_OS_dispatch_queue_runloop
-_OBJC_METACLASS_$_OS_dispatch_queue_mgr
-_OBJC_METACLASS_$_OS_dispatch_queue_specific_queue
-_OBJC_METACLASS_$_OS_dispatch_queue_attr
-_OBJC_METACLASS_$_OS_dispatch_source
-_OBJC_METACLASS_$_OS_dispatch_mach
-_OBJC_METACLASS_$_OS_dispatch_mach_msg
-_OBJC_METACLASS_$_OS_dispatch_io
-_OBJC_METACLASS_$_OS_dispatch_operation
-_OBJC_METACLASS_$_OS_dispatch_disk
-_OBJC_METACLASS_$_OS_object
 _OBJC_METACLASS_$_OS_voucher
 #_OBJC_METACLASS_$_OS_voucher_recipe
 _OBJC_METACLASS_$_OS_dispatch_data
diff --git a/xcodeconfig/libdispatch.xcconfig b/xcodeconfig/libdispatch.xcconfig
index a2ea6d9..643e1d3 100644
--- a/xcodeconfig/libdispatch.xcconfig
+++ b/xcodeconfig/libdispatch.xcconfig
@@ -71,16 +71,19 @@
 CLANG_WARN_OBJC_IMPLICIT_ATOMIC_PROPERTIES = YES
 CLANG_WARN_OBJC_MISSING_PROPERTY_SYNTHESIS = YES
 CLANG_WARN_SUSPICIOUS_IMPLICIT_CONVERSION = YES
+CLANG_WARN_SUSPICIOUS_MOVE = YES
+CLANG_WARN_UNREACHABLE_CODE = YES
 GCC_TREAT_WARNINGS_AS_ERRORS = YES
 GCC_OPTIMIZATION_LEVEL = s
-GCC_PREPROCESSOR_DEFINITIONS = __DARWIN_NON_CANCELABLE=1 $(DISPATCH_PREPROCESSOR_DEFINITIONS)
 GCC_NO_COMMON_BLOCKS = YES
+GCC_PREPROCESSOR_DEFINITIONS = __DARWIN_NON_CANCELABLE=1 $(DISPATCH_PREPROCESSOR_DEFINITIONS)
+STATICLIB_PREPROCESSOR_DEFINITIONS = DISPATCH_VARIANT_STATIC=1 USE_OBJC=0 DISPATCH_USE_DTRACE=0
 WARNING_CFLAGS = -Wall -Wextra -Warray-bounds-pointer-arithmetic -Watomic-properties -Wcomma -Wconditional-uninitialized -Wcovered-switch-default -Wdate-time -Wdeprecated -Wdouble-promotion -Wduplicate-enum -Wexpansion-to-defined -Wfloat-equal -Widiomatic-parentheses -Wignored-qualifiers -Wimplicit-fallthrough -Wnullable-to-nonnull-conversion -Wobjc-interface-ivars -Wover-aligned -Wpacked -Wpointer-arith -Wselector -Wstatic-in-inline -Wsuper-class-method-mismatch -Wswitch-enum -Wtautological-compare -Wunguarded-availability -Wunused -Wno-unknown-warning-option $(NO_WARNING_CFLAGS)
 NO_WARNING_CFLAGS = -Wno-pedantic -Wno-bad-function-cast -Wno-c++-compat -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-cast-align -Wno-cast-qual -Wno-disabled-macro-expansion -Wno-documentation-unknown-command -Wno-format-nonliteral -Wno-missing-variable-declarations -Wno-old-style-cast -Wno-padded -Wno-reserved-id-macro -Wno-shift-sign-overflow -Wno-undef -Wno-unreachable-code-aggressive -Wno-unused-macros -Wno-used-but-marked-unused -Wno-vla
-OTHER_CFLAGS = -fverbose-asm -isystem $(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders
+OTHER_CFLAGS = -fverbose-asm -isystem $(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders $(PLATFORM_CFLAGS)
 OTHER_CFLAGS[arch=i386][sdk=macosx*] = $(OTHER_CFLAGS) -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-exceptions
 OTHER_CFLAGS_normal = -momit-leaf-frame-pointer
-OTHER_CFLAGS_profile = $(OTHER_CFLAGS_normal) -DDISPATCH_PROFILE=1
+OTHER_CFLAGS_profile = $(OTHER_CFLAGS_normal) -DDISPATCH_PROFILE=1 -DDISPATCH_PERF_MON=1
 OTHER_CFLAGS_debug = -fstack-protector -fno-inline -O0 -DDISPATCH_DEBUG=1 -DOS_DEBUG=1
 GENERATE_PROFILING_CODE = NO
 DYLIB_CURRENT_VERSION = $(CURRENT_PROJECT_VERSION)
diff --git a/xcodeconfig/libfirehose.xcconfig b/xcodeconfig/libfirehose.xcconfig
index 07a8b9a..4c71199 100644
--- a/xcodeconfig/libfirehose.xcconfig
+++ b/xcodeconfig/libfirehose.xcconfig
@@ -18,18 +18,17 @@
 // @APPLE_APACHE_LICENSE_HEADER_END@
 //
 
-OTHER_MIGFLAGS = -novouchers
-OTHER_LDFLAGS =
 SUPPORTED_PLATFORMS = macosx iphoneos iphonesimulator appletvos appletvsimulator watchos watchsimulator
 PRODUCT_NAME = $(TARGET_NAME)
 INSTALL_PATH = /usr/local/lib/
+GCC_PREPROCESSOR_DEFINITIONS = $(inherited) FIREHOSE_SERVER=1 DISPATCH_USE_DTRACE=0
+OTHER_MIGFLAGS = -novouchers
+OTHER_LDFLAGS =
 PUBLIC_HEADERS_FOLDER_PATH = /usr/include/os
 PRIVATE_HEADERS_FOLDER_PATH = /usr/local/include/os
 STRIP_INSTALLED_PRODUCT = NO
 COPY_PHASE_STRIP = NO
 SEPARATE_STRIP = NO
-GCC_PREPROCESSOR_DEFINITIONS = $(inherited) FIREHOSE_SERVER=1 DISPATCH_USE_DTRACE=0
-
 VALID_ARCHS[sdk=macosx*] = $(NATIVE_ARCH_ACTUAL)
 
 COPY_HEADERS_RUN_UNIFDEF = YES
diff --git a/xcodeconfig/libfirehose_kernel.xcconfig b/xcodeconfig/libfirehose_kernel.xcconfig
index f6b2a99..c572f80 100644
--- a/xcodeconfig/libfirehose_kernel.xcconfig
+++ b/xcodeconfig/libfirehose_kernel.xcconfig
@@ -20,16 +20,14 @@
 
 #include "libfirehose.xcconfig"
 
-OTHER_CFLAGS = -mkernel -nostdinc -Wno-packed
-// LLVM_LTO = YES
+SUPPORTED_PLATFORMS = macosx iphoneos appletvos watchos
 PRODUCT_NAME = $(TARGET_NAME)
 INSTALL_PATH = /usr/local/lib/kernel/
-PRIVATE_HEADERS_FOLDER_PATH = /usr/local/include/kernel/os
-SUPPORTED_PLATFORMS = macosx iphoneos appletvos watchos
-
-HEADER_SEARCH_PATHS = $(PROJECT_DIR) $(SDKROOT)/System/Library/Frameworks/Kernel.framework/PrivateHeaders $(SDKROOT)/System/Library/Frameworks/Kernel.framework/Headers $(SDKROOT)/usr/local/include/os $(SDKROOT)/usr/local/include/firehose
-
 GCC_PREPROCESSOR_DEFINITIONS = $(inherited) KERNEL=1 DISPATCH_USE_DTRACE=0
+OTHER_CFLAGS = -mkernel -nostdinc -Wno-packed
+// LLVM_LTO = YES
+PRIVATE_HEADERS_FOLDER_PATH = /usr/local/include/kernel/os
+HEADER_SEARCH_PATHS = $(PROJECT_DIR) $(SDKROOT)/System/Library/Frameworks/Kernel.framework/PrivateHeaders $(SDKROOT)/System/Library/Frameworks/Kernel.framework/Headers $(SDKROOT)/usr/local/include/os $(SDKROOT)/usr/local/include/firehose
 
 COPY_HEADERS_RUN_UNIFDEF = YES
 COPY_HEADERS_UNIFDEF_FLAGS = -DKERNEL=1 -DOS_FIREHOSE_SPI=1 -DOS_VOUCHER_ACTIVITY_SPI_TYPES=1 -UOS_VOUCHER_ACTIVITY_SPI