Merge pull request #452 from drodriguez/fix-regex-windows

[cmake] Skip regex when CMAKE_STATIC_LIBRARY_PREFIX is empty.
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3d52ee8..74b7849 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -77,6 +77,8 @@
   set(INSTALL_OS_HEADERS_DIR "include/os" CACHE PATH "Path where the headers will be installed")
 endif()
 
+option(DISPATCH_ENABLE_ASSERTS "enable debug assertions" FALSE)
+
 option(ENABLE_DTRACE "enable dtrace support" "")
 
 option(ENABLE_TESTING "build libdispatch tests" ON)
@@ -110,8 +112,7 @@
   set(HAVE_PTHREAD_WORKQUEUES 0)
 else()
   check_include_files(pthread/workqueue_private.h HAVE_PTHREAD_WORKQUEUE_PRIVATE_H)
-  check_include_files(pthread_workqueue.h HAVE_PTHREAD_WORKQUEUE_H)
-  if(HAVE_PTHREAD_WORKQUEUE_PRIVATE_H AND HAVE_PTHREAD_WORKQUEUE_H)
+  if(HAVE_PTHREAD_WORKQUEUE_PRIVATE_H)
     set(HAVE_PTHREAD_WORKQUEUES 1)
     set(DISPATCH_USE_INTERNAL_WORKQUEUE 0)
   else()
@@ -191,6 +192,8 @@
 check_function_exists(posix_fadvise HAVE_POSIX_FADVISE)
 check_function_exists(posix_spawnp HAVE_POSIX_SPAWNP)
 check_function_exists(pthread_key_init_np HAVE_PTHREAD_KEY_INIT_NP)
+check_function_exists(pthread_attr_setcpupercent_np HAVE_PTHREAD_ATTR_SETCPUPERCENT_NP)
+check_function_exists(pthread_yield_np HAVE_PTHREAD_YIELD_NP)
 check_function_exists(pthread_main_np HAVE_PTHREAD_MAIN_NP)
 check_function_exists(pthread_workqueue_setdispatch_np HAVE_PTHREAD_WORKQUEUE_SETDISPATCH_NP)
 check_function_exists(strlcpy HAVE_STRLCPY)
@@ -269,6 +272,7 @@
 check_symbol_exists(VQ_QUOTA "sys/mount.h" HAVE_DECL_VQ_QUOTA)
 check_symbol_exists(VQ_UPDATE "sys/mount.h" HAVE_DECL_VQ_UPDATE)
 check_symbol_exists(VQ_VERYLOWDISK "sys/mount.h" HAVE_DECL_VQ_VERYLOWDISK)
+check_symbol_exists(VQ_FREE_SPACE_CHANGE "sys/mount.h" HAVE_DECL_VQ_FREE_SPACE_CHANGE)
 check_symbol_exists(strlcpy "string.h" HAVE_STRLCPY)
 check_symbol_exists(program_invocation_name "errno.h" HAVE_DECL_PROGRAM_INVOCATION_SHORT_NAME)
 if (HAVE_DECL_PROGRAM_INVOCATION_SHORT_NAME)
diff --git a/PATCHES b/PATCHES
index c3d28b3..b448313 100644
--- a/PATCHES
+++ b/PATCHES
@@ -353,3 +353,84 @@
 [8947dcf] APPLIED rdar://33531111
 [5ad9208] APPLIED rdar://33531111
 [698d085] APPLIED rdar://33531111
+[ce1ce45] APPLIED rdar://35017478
+[291f34d] APPLIED rdar://35017478
+[666df60] APPLIED rdar://35017478
+[80dd736] APPLIED rdar://35017478
+[0fd5a69] APPLIED rdar://35017478
+[0e35ed9] APPLIED rdar://35017478
+[70ce56b] APPLIED rdar://35017478
+[40fc1f3] APPLIED rdar://35017478
+[9ec74ed] APPLIED rdar://35017478
+[7f330ed] APPLIED rdar://35017478
+[947b51c] APPLIED rdar://35017478
+[295f676] APPLIED rdar://35017478
+[48196a2] APPLIED rdar://35017478
+[a28fc2b] APPLIED rdar://35017478
+[791ce5d] APPLIED rdar://35017478
+[0d0a998] APPLIED rdar://35017478
+[29329b5] APPLIED rdar://35017478
+[141403a] APPLIED rdar://35017478
+[b7f1beb] APPLIED rdar://35017478
+[7ef9cde] APPLIED rdar://35017478
+[12c9ca8] APPLIED rdar://35017478
+[6d6dc2e] APPLIED rdar://40252515
+[4a9833d] APPLIED rdar://40252515
+[f88e382] APPLIED rdar://40252515
+[bfa9aa7] APPLIED rdar://40252515
+[44f3640] APPLIED rdar://40252515
+[3b06f54] APPLIED rdar://40252515
+[e245cbe] APPLIED rdar://40252515
+[2a539d6] APPLIED rdar://40252515
+[e52c174] APPLIED rdar://40252515
+[723bd98] APPLIED rdar://40252515
+[7e7a579] APPLIED rdar://40252515
+[244a5fe] APPLIED rdar://40252515
+[8b72f76] APPLIED rdar://40252515
+[f3531a2] APPLIED rdar://40252515
+[5cf8acb] APPLIED rdar://40252515
+[dc01e36] APPLIED rdar://40252515
+[2d6d1fd] APPLIED rdar://40252515
+[fdd671d] APPLIED rdar://40252515
+[698220e] APPLIED rdar://40252515
+[9c792ac] APPLIED rdar://40252515
+[b5ec5d8] APPLIED rdar://40252515
+[9295346] APPLIED rdar://40252515
+[bbf03ca] APPLIED rdar://40252515
+[8d3aa22] APPLIED rdar://40252515
+[f151b33] APPLIED rdar://40252515
+[f6e6917] APPLIED rdar://40252515
+[f83b5a4] APPLIED rdar://40252515
+[c4d6402] APPLIED rdar://40252515
+[1457de8] APPLIED rdar://40252515
+[c025baa] APPLIED rdar://40252515
+[a618b46] APPLIED rdar://40252515
+[e723a8e] APPLIED rdar://44568645
+[4ac77b7] APPLIED rdar://44568645
+[03696d7] APPLIED rdar://44568645
+[44f67b2] APPLIED rdar://44568645
+[b15ee59] APPLIED rdar://44568645
+[d29ed37] APPLIED rdar://44568645
+[65ebc0c] APPLIED rdar://44568645
+[93c64d8] APPLIED rdar://44568645
+[1271df6] APPLIED rdar://44568645
+[84ac6ac] APPLIED rdar://44568645
+[30d3c8c] APPLIED rdar://44568645
+[12ff819] APPLIED rdar://44568645
+[82342ee] APPLIED rdar://44568645
+[b13a51e] APPLIED rdar://44568645
+[6bf3065] APPLIED rdar://44568645
+[631821c] APPLIED rdar://44568645
+[e764f34] APPLIED rdar://44568645
+[ff1daf8] APPLIED rdar://44568645
+[b863538] APPLIED rdar://44568645
+[ba3933d] APPLIED rdar://44568645
+[9c48a80] APPLIED rdar://44568645
+[5f49e8b] APPLIED rdar://44568645
+[653a523] APPLIED rdar://44568645
+[ac5f4c4] APPLIED rdar://44568645
+[57139c6] APPLIED rdar://44568645
+[ba74b6a] APPLIED rdar://44568645
+[3975b58] APPLIED rdar://44568645
+[81dc900] APPLIED rdar://44568645
+[6162a1d] APPLIED rdar://44568645
diff --git a/cmake/config.h.in b/cmake/config.h.in
index 0133c4f..2896a20 100644
--- a/cmake/config.h.in
+++ b/cmake/config.h.in
@@ -81,6 +81,10 @@
    you don't. */
 #cmakedefine01 HAVE_DECL_VQ_VERYLOWDISK
 
+/* Define to 1 if you have the declaration of `VQ_FREE_SPACE_CHANGE', and to 0 if
+   you don't. */
+#cmakedefine01 HAVE_DECL_VQ_FREE_SPACE_CHANGE
+
 /* Define to 1 if you have the <dlfcn.h> header file. */
 #cmakedefine01 HAVE_DLFCN_H
 
@@ -141,12 +145,18 @@
 /* Define to 1 if you have the `pthread_key_init_np' function. */
 #cmakedefine HAVE_PTHREAD_KEY_INIT_NP
 
+/* Define to 1 if you have the `pthread_attr_setcpupercent_np' function. */
+#cmakedefine HAVE_PTHREAD_ATTR_SETCPUPERCENT_NP
+
 /* Define to 1 if you have the <pthread_machdep.h> header file. */
 #cmakedefine HAVE_PTHREAD_MACHDEP_H
 
 /* Define to 1 if you have the `pthread_main_np' function. */
 #cmakedefine01 HAVE_PTHREAD_MAIN_NP
 
+/* Define to 1 if you have the `pthread_yield_np' function. */
+#cmakedefine01 HAVE_PTHREAD_YIELD_NP
+
 /* Define to 1 if you have the <pthread_np.h> header file. */
 #cmakedefine01 HAVE_PTHREAD_NP_H
 
@@ -162,9 +172,6 @@
 /* Define to 1 if you have the <pthread/workqueue_private.h> header file. */
 #cmakedefine HAVE_PTHREAD_WORKQUEUE_PRIVATE_H
 
-/* Define to 1 if you have the `pthread_workqueue_setdispatch_np' function. */
-#cmakedefine HAVE_PTHREAD_WORKQUEUE_SETDISPATCH_NP
-
 /* Define to 1 if you have the <stdint.h> header file. */
 #cmakedefine01 HAVE_STDINT_H
 
diff --git a/config/config.h b/config/config.h
new file mode 100644
index 0000000..79fc5b2
--- /dev/null
+++ b/config/config.h
@@ -0,0 +1,271 @@
+/* config/config.h.  Generated from config.h.in by configure.  */
+/* config/config.h.in.  Generated from configure.ac by autoheader.  */
+
+/* Define to 1 if you have the declaration of `CLOCK_MONOTONIC', and to 0 if
+   you don't. */
+#define HAVE_DECL_CLOCK_MONOTONIC 0
+
+/* Define to 1 if you have the declaration of `CLOCK_REALTIME', and to 0 if
+   you don't. */
+#define CLOCK_REALTIME 0
+
+/* Define to 1 if you have the declaration of `CLOCK_UPTIME', and to 0 if you
+   don't. */
+#define HAVE_DECL_CLOCK_UPTIME 0
+
+/* Define to 1 if you have the declaration of `HAVE_DECL_CLOCK_UPTIME_FAST',
+    and to 0 if you don't. */
+#define HAVE_DECL_CLOCK_UPTIME_FAST 0
+
+/* Define to 1 if you have the declaration of `FD_COPY', and to 0 if you
+   don't. */
+#define HAVE_DECL_FD_COPY 1
+
+/* Define to 1 if you have the declaration of `NOTE_LOWAT', and to 0 if you
+   don't. */
+#define HAVE_DECL_NOTE_LOWAT 1
+
+/* Define to 1 if you have the declaration of `NOTE_NONE', and to 0 if you
+   don't. */
+#define HAVE_DECL_NOTE_NONE 1
+
+/* Define to 1 if you have the declaration of `NOTE_REAP', and to 0 if you
+   don't. */
+#define HAVE_DECL_NOTE_REAP 1
+
+/* Define to 1 if you have the declaration of `NOTE_REVOKE', and to 0 if you
+   don't. */
+#define HAVE_DECL_NOTE_REVOKE 1
+
+/* Define to 1 if you have the declaration of `NOTE_SIGNAL', and to 0 if you
+   don't. */
+#define HAVE_DECL_NOTE_SIGNAL 1
+
+/* Define to 1 if you have the declaration of `POSIX_SPAWN_START_SUSPENDED',
+   and to 0 if you don't. */
+#define HAVE_DECL_POSIX_SPAWN_START_SUSPENDED 1
+
+/* Define to 1 if you have the declaration of `program_invocation_short_name',
+   and to 0 if you don't. */
+#define HAVE_DECL_PROGRAM_INVOCATION_SHORT_NAME 0
+
+/* Define to 1 if you have the declaration of `SIGEMT', and to 0 if you don't.
+   */
+#define HAVE_DECL_SIGEMT 1
+
+/* Define to 1 if you have the declaration of `VQ_UPDATE', and to 0 if you
+   don't. */
+#define HAVE_DECL_VQ_UPDATE 1
+
+/* Define to 1 if you have the declaration of `VQ_VERYLOWDISK', and to 0 if
+   you don't. */
+#define HAVE_DECL_VQ_VERYLOWDISK 1
+
+/* Define to 1 if you have the declaration of `VQ_QUOTA', and to 0 if
+ you don't. */
+#define HAVE_DECL_VQ_QUOTA 1
+
+/* Define to 1 if you have the declaration of `VQ_NEARLOWDISK', and to 0 if
+   you don't. */
+#define HAVE_DECL_VQ_NEARLOWDISK 1
+
+/* Define to 1 if you have the declaration of `VQ_DESIRED_DISK', and to 0 if
+ you don't. */
+#define HAVE_DECL_VQ_DESIRED_DISK 1
+
+/* Define to 1 if you have the declaration of `VQ_FREE_SPACE_CHANGE', and to 0 if
+   you don't. */
+#define HAVE_DECL_VQ_FREE_SPACE_CHANGE 1
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#define HAVE_DLFCN_H 1
+
+/* Define to 1 if you have the <fcntl.h> header file. */
+#define HAVE_FCNTL_H 1
+
+/* Define to 1 if you have the `getprogname' function. */
+#define HAVE_GETPROGNAME 1
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#define HAVE_INTTYPES_H 1
+
+/* Define if Apple leaks program is present */
+#define HAVE_LEAKS 1
+
+/* Define to 1 if you have the <libkern/OSAtomic.h> header file. */
+#define HAVE_LIBKERN_OSATOMIC_H 1
+
+/* Define to 1 if you have the <libkern/OSCrossEndian.h> header file. */
+#define HAVE_LIBKERN_OSCROSSENDIAN_H 1
+
+/* Define to 1 if you have the <libproc_internal.h> header file. */
+#define HAVE_LIBPROC_INTERNAL_H 1
+
+/* Define if mach is present */
+#define HAVE_MACH 1
+
+/* Define to 1 if you have the `mach_absolute_time' function. */
+#define HAVE_MACH_ABSOLUTE_TIME 1
+
+/* Define to 1 if you have the `mach_approximate_time' function. */
+#define HAVE_MACH_APPROXIMATE_TIME 1
+
+/* Define to 1 if you have the `malloc_create_zone' function. */
+#define HAVE_MALLOC_CREATE_ZONE 1
+
+/* Define to 1 if you have the <malloc/malloc.h> header file. */
+#define HAVE_MALLOC_MALLOC_H 1
+
+/* Define to 1 if you have the <memory.h> header file. */
+#define HAVE_MEMORY_H 1
+
+/* Define if __builtin_trap marked noreturn */
+#define HAVE_NORETURN_BUILTIN_TRAP 1
+
+/* Define if you have the Objective-C runtime */
+#define HAVE_OBJC 1
+
+/* Define to 1 if you have the `pthread_key_init_np' function. */
+#define HAVE_PTHREAD_KEY_INIT_NP 1
+
+/* Define to 1 if you have the `pthread_attr_setcpupercent_np' function. */
+#define HAVE_PTHREAD_ATTR_SETCPUPERCENT_NP 1
+
+/* Define to 1 if you have the <pthread_machdep.h> header file. */
+#define HAVE_PTHREAD_MACHDEP_H 1
+
+/* Define to 1 if you have the `pthread_main_np' function. */
+#define HAVE_PTHREAD_MAIN_NP 1
+
+/* Define to 1 if you have the `pthread_yield_np' function. */
+#define HAVE_PTHREAD_YIELD_NP 1
+
+/* Define to 1 if you have the <pthread_np.h> header file. */
+/* #undef HAVE_PTHREAD_NP_H */
+
+/* Define to 1 if you have the <pthread/qos.h> header file. */
+#define HAVE_PTHREAD_QOS_H 1
+
+/* Define if pthread work queues are present */
+#define HAVE_PTHREAD_WORKQUEUES 1
+
+/* Define to 1 if you have the <pthread_workqueue.h> header file. */
+#define HAVE_PTHREAD_WORKQUEUE_H 1
+
+/* Define to 1 if you have the <pthread/workqueue_private.h> header file. */
+#define HAVE_PTHREAD_WORKQUEUE_PRIVATE_H 1
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#define HAVE_STDINT_H 1
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#define HAVE_STDLIB_H 1
+
+/* Define to 1 if you have the <strings.h> header file. */
+#define HAVE_STRINGS_H 1
+
+/* Define to 1 if you have the <string.h> header file. */
+#define HAVE_STRING_H 1
+
+/* Define to 1 if you have the `sysconf' function. */
+#define HAVE_SYSCONF 1
+
+/* Define to 1 if you have the <sys/cdefs.h> header file. */
+#define HAVE_SYS_CDEFS_H 1
+
+/* Define to 1 if you have the <sys/guarded.h> header file. */
+#define HAVE_SYS_GUARDED_H 1
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#define HAVE_SYS_STAT_H 1
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#define HAVE_SYS_TYPES_H 1
+
+/* Define to 1 if you have the <TargetConditionals.h> header file. */
+#define HAVE_TARGETCONDITIONALS_H 1
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#define HAVE_UNISTD_H 1
+
+/* Define to 1 if you have the `_pthread_workqueue_init' function. */
+#define HAVE__PTHREAD_WORKQUEUE_INIT 1
+
+/* Define to the sub-directory where libtool stores uninstalled libraries. */
+#define LT_OBJDIR ".libs/"
+
+/* Name of package */
+#define PACKAGE "libdispatch"
+
+/* Define to the address where bug reports for this package should be sent. */
+#define PACKAGE_BUGREPORT "libdispatch@macosforge.org"
+
+/* Define to the full name of this package. */
+#define PACKAGE_NAME "libdispatch"
+
+/* Define to the full name and version of this package. */
+#define PACKAGE_STRING "libdispatch 1.3"
+
+/* Define to the one symbol short name of this package. */
+#define PACKAGE_TARNAME "libdispatch"
+
+/* Define to the home page for this package. */
+#define PACKAGE_URL "http://libdispatch.macosforge.org"
+
+/* Define to the version of this package. */
+#define PACKAGE_VERSION "1.3"
+
+/* Define to 1 if you have the ANSI C header files. */
+#define STDC_HEADERS 1
+
+/* Define to use non-portable pthread TSD optimizations for Mac OS X) */
+#define USE_APPLE_TSD_OPTIMIZATIONS 1
+
+/* Define to tag libdispatch_init as a constructor */
+/* #undef USE_LIBDISPATCH_INIT_CONSTRUCTOR */
+
+/* Define to use Mach semaphores */
+#define USE_MACH_SEM 1
+
+/* Define to use POSIX semaphores */
+/* #undef USE_POSIX_SEM */
+
+/* Enable extensions on AIX 3, Interix.  */
+#ifndef _ALL_SOURCE
+# define _ALL_SOURCE 1
+#endif
+/* Enable GNU extensions on systems that have them.  */
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE 1
+#endif
+/* Enable threading extensions on Solaris.  */
+#ifndef _POSIX_PTHREAD_SEMANTICS
+# define _POSIX_PTHREAD_SEMANTICS 1
+#endif
+/* Enable extensions on HP NonStop.  */
+#ifndef _TANDEM_SOURCE
+# define _TANDEM_SOURCE 1
+#endif
+/* Enable general extensions on Solaris.  */
+#ifndef __EXTENSIONS__
+# define __EXTENSIONS__ 1
+#endif
+
+
+/* Version number of package */
+#define VERSION "1.3"
+
+/* Define to 1 if on MINIX. */
+/* #undef _MINIX */
+
+/* Define to 2 if the system does not provide POSIX.1 features except with
+   this defined. */
+/* #undef _POSIX_1_SOURCE */
+
+/* Define to 1 if you need to in order for `stat' and other things to work. */
+/* #undef _POSIX_SOURCE */
+
+/* Define if using Darwin $NOCANCEL */
+#define __DARWIN_NON_CANCELABLE 1
+
+#define HAVE_STRLCPY 1
diff --git a/dispatch/base.h b/dispatch/base.h
index 62579ec..0c8540a 100644
--- a/dispatch/base.h
+++ b/dispatch/base.h
@@ -203,6 +203,12 @@
 #define DISPATCH_NOESCAPE
 #endif
 
+#if __has_attribute(cold)
+#define DISPATCH_COLD __attribute__((__cold__))
+#else
+#define DISPATCH_COLD
+#endif
+
 #if __has_feature(assume_nonnull)
 #define DISPATCH_ASSUME_NONNULL_BEGIN _Pragma("clang assume_nonnull begin")
 #define DISPATCH_ASSUME_NONNULL_END   _Pragma("clang assume_nonnull end")
diff --git a/dispatch/block.h b/dispatch/block.h
index df817ff..e6bf4f8 100644
--- a/dispatch/block.h
+++ b/dispatch/block.h
@@ -48,13 +48,14 @@
  *
  * @const DISPATCH_BLOCK_DETACHED
  * Flag indicating that a dispatch block object should execute disassociated
- * from current execution context attributes such as QOS class, os_activity_t
- * and properties of the current IPC request (if any). If invoked directly, the
- * block object will remove these attributes from the calling thread for the
- * duration of the block body (before applying attributes assigned to the block
- * object, if any). If submitted to a queue, the block object will be executed
- * with the attributes of the queue (or any attributes specifically assigned to
- * the block object).
+ * from current execution context attributes such as os_activity_t
+ * and properties of the current IPC request (if any). With regard to QoS class,
+ * the behavior is the same as for DISPATCH_BLOCK_NO_QOS. If invoked directly,
+ * the block object will remove the other attributes from the calling thread for
+ * the duration of the block body (before applying attributes assigned to the
+ * block object, if any). If submitted to a queue, the block object will be
+ * executed with the attributes of the queue (or any attributes specifically
+ * assigned to the block object).
  *
  * @const DISPATCH_BLOCK_ASSIGN_CURRENT
  * Flag indicating that a dispatch block object should be assigned the execution
diff --git a/dispatch/dispatch.h b/dispatch/dispatch.h
index 8945acc..0c7bdd4 100644
--- a/dispatch/dispatch.h
+++ b/dispatch/dispatch.h
@@ -54,10 +54,9 @@
 #endif
 #endif
 
-#define DISPATCH_API_VERSION 20170124
+#define DISPATCH_API_VERSION 20180109
 
 #ifndef __DISPATCH_BUILDING_DISPATCH__
-
 #ifndef __DISPATCH_INDIRECT__
 #define __DISPATCH_INDIRECT__
 #endif
@@ -76,7 +75,6 @@
 #include <dispatch/io.h>
 
 #undef __DISPATCH_INDIRECT__
-
 #endif /* !__DISPATCH_BUILDING_DISPATCH__ */
 
 #endif
diff --git a/dispatch/object.h b/dispatch/object.h
index a54b6a9..02815f3 100644
--- a/dispatch/object.h
+++ b/dispatch/object.h
@@ -52,13 +52,16 @@
 
 #if OS_OBJECT_SWIFT3
 #define DISPATCH_DECL(name) OS_OBJECT_DECL_SUBCLASS_SWIFT(name, dispatch_object)
+#define DISPATCH_DECL_SUBCLASS(name, base) OS_OBJECT_DECL_SUBCLASS_SWIFT(name, base)
 #else // OS_OBJECT_SWIFT3
 #define DISPATCH_DECL(name) OS_OBJECT_DECL_SUBCLASS(name, dispatch_object)
+#define DISPATCH_DECL_SUBCLASS(name, base) OS_OBJECT_DECL_SUBCLASS(name, base)
 
 DISPATCH_INLINE DISPATCH_ALWAYS_INLINE DISPATCH_NONNULL_ALL DISPATCH_NOTHROW
 void
-_dispatch_object_validate(dispatch_object_t object) {
-	void *isa = *(void* volatile*)(OS_OBJECT_BRIDGE void*)object;
+_dispatch_object_validate(dispatch_object_t object)
+{
+	void *isa = *(void *volatile*)(OS_OBJECT_BRIDGE void*)object;
 	(void)isa;
 }
 #endif // OS_OBJECT_SWIFT3
@@ -79,31 +82,29 @@
 } *dispatch_object_t;
 #define DISPATCH_DECL(name) \
 		typedef struct name##_s : public dispatch_object_s {} *name##_t
-#define DISPATCH_GLOBAL_OBJECT(type, object) (&(object))
+#define DISPATCH_DECL_SUBCLASS(name, base) \
+		typedef struct name##_s : public base##_s {} *name##_t
+#define DISPATCH_GLOBAL_OBJECT(type, object) (static_cast<type>(&(object)))
 #define DISPATCH_RETURNS_RETAINED
 #else /* Plain C */
+#ifndef __DISPATCH_BUILDING_DISPATCH__
 typedef union {
 	struct _os_object_s *_os_obj;
 	struct dispatch_object_s *_do;
-	struct dispatch_continuation_s *_dc;
 	struct dispatch_queue_s *_dq;
 	struct dispatch_queue_attr_s *_dqa;
 	struct dispatch_group_s *_dg;
 	struct dispatch_source_s *_ds;
 	struct dispatch_mach_s *_dm;
 	struct dispatch_mach_msg_s *_dmsg;
-	struct dispatch_source_attr_s *_dsa;
 	struct dispatch_semaphore_s *_dsema;
 	struct dispatch_data_s *_ddata;
 	struct dispatch_io_s *_dchannel;
-	struct dispatch_operation_s *_doperation;
-	struct dispatch_disk_s *_ddisk;
 } dispatch_object_t DISPATCH_TRANSPARENT_UNION;
-/*! @parseOnly */
+#endif // !__DISPATCH_BUILDING_DISPATCH__
 #define DISPATCH_DECL(name) typedef struct name##_s *name##_t
-/*! @parseOnly */
-#define DISPATCH_GLOBAL_OBJECT(t, x) (&(x))
-/*! @parseOnly */
+#define DISPATCH_DECL_SUBCLASS(name, base) typedef base##_t name##_t
+#define DISPATCH_GLOBAL_OBJECT(type, object) ((type)&(object))
 #define DISPATCH_RETURNS_RETAINED
 #endif
 
@@ -122,12 +123,9 @@
 #define DISPATCH_DATA_DECL(name) OS_OBJECT_DECL_SWIFT(name)
 #endif // DISPATCH_DATA_DECL
 #else
-/*! @parseOnly */
 #define DISPATCH_SOURCE_DECL(name) \
 		DISPATCH_DECL(name);
-/*! @parseOnly */
 #define DISPATCH_DATA_DECL(name) DISPATCH_DECL(name)
-/*! @parseOnly */
 #define DISPATCH_SOURCE_TYPE_DECL(name) \
 		DISPATCH_EXPORT const struct dispatch_source_type_s \
 		_dispatch_source_type_##name
@@ -534,13 +532,13 @@
  * The message to log above and beyond the introspection.
  */
 API_DEPRECATED("unsupported interface", macos(10.6,10.9), ios(4.0,6.0))
-DISPATCH_EXPORT DISPATCH_NONNULL2 DISPATCH_NOTHROW
+DISPATCH_EXPORT DISPATCH_NONNULL2 DISPATCH_NOTHROW DISPATCH_COLD
 __attribute__((__format__(printf,2,3)))
 void
 dispatch_debug(dispatch_object_t object, const char *message, ...);
 
 API_DEPRECATED("unsupported interface", macos(10.6,10.9), ios(4.0,6.0))
-DISPATCH_EXPORT DISPATCH_NONNULL2 DISPATCH_NOTHROW
+DISPATCH_EXPORT DISPATCH_NONNULL2 DISPATCH_NOTHROW DISPATCH_COLD
 __attribute__((__format__(printf,2,0)))
 void
 dispatch_debugv(dispatch_object_t object, const char *message, va_list ap);
diff --git a/dispatch/queue.h b/dispatch/queue.h
index 3bc8cd8..969dc88 100644
--- a/dispatch/queue.h
+++ b/dispatch/queue.h
@@ -53,25 +53,151 @@
  * @typedef dispatch_queue_t
  *
  * @abstract
- * Dispatch queues invoke blocks submitted to them serially in FIFO order. A
- * queue will only invoke one block at a time, but independent queues may each
- * invoke their blocks concurrently with respect to each other.
+ * Dispatch queues invoke workitems submitted to them.
  *
  * @discussion
- * Dispatch queues are lightweight objects to which blocks may be submitted.
- * The system manages a pool of threads which process dispatch queues and
- * invoke blocks submitted to them.
+ * Dispatch queues come in many flavors, the most common one being the dispatch
+ * serial queue (See dispatch_queue_serial_t).
+ *
+ * The system manages a pool of threads which process dispatch queues and invoke
+ * workitems submitted to them.
  *
  * Conceptually a dispatch queue may have its own thread of execution, and
  * interaction between queues is highly asynchronous.
  *
  * Dispatch queues are reference counted via calls to dispatch_retain() and
- * dispatch_release(). Pending blocks submitted to a queue also hold a
+ * dispatch_release(). Pending workitems submitted to a queue also hold a
  * reference to the queue until they have finished. Once all references to a
  * queue have been released, the queue will be deallocated by the system.
  */
 DISPATCH_DECL(dispatch_queue);
 
+/*!
+ * @typedef dispatch_queue_global_t
+ *
+ * @abstract
+ * Dispatch global concurrent queues are an abstraction around the system thread
+ * pool which invokes workitems that are submitted to dispatch queues.
+ *
+ * @discussion
+ * Dispatch global concurrent queues provide buckets of priorities on top of the
+ * thread pool the system manages. The system will decide how many threads
+ * to allocate to this pool depending on demand and system load. In particular,
+ * the system tries to maintain a good level of concurrency for this resource,
+ * and will create new threads when too many existing worker threads block in
+ * system calls.
+ *
+ * The global concurrent queues are a shared resource and as such it is the
+ * responsiblity of every user of this resource to not submit an unbounded
+ * amount of work to this pool, especially work that may block, as this can
+ * cause the system to spawn very large numbers of threads (aka. thread
+ * explosion).
+ *
+ * Work items submitted to the global concurrent queues have no ordering
+ * guarantee with respect to the order of submission, and workitems submitted
+ * to these queues may be invoked concurrently.
+ *
+ * Dispatch global concurrent queues are well-known global objects that are
+ * returned by dispatch_get_global_queue(). These objects cannot be modified.
+ * Calls to dispatch_suspend(), dispatch_resume(), dispatch_set_context(), etc.,
+ * will have no effect when used with queues of this type.
+ */
+#if defined(__DISPATCH_BUILDING_DISPATCH__) && !defined(__OBJC__)
+typedef struct dispatch_queue_global_s *dispatch_queue_global_t;
+#else
+DISPATCH_DECL_SUBCLASS(dispatch_queue_global, dispatch_queue);
+#endif
+
+/*!
+ * @typedef dispatch_queue_serial_t
+ *
+ * @abstract
+ * Dispatch serial queues invoke workitems submitted to them serially in FIFO
+ * order.
+ *
+ * @discussion
+ * Dispatch serial queues are lightweight objects to which workitems may be
+ * submitted to be invoked in FIFO order. A serial queue will only invoke one
+ * workitem at a time, but independent serial queues may each invoke their work
+ * items concurrently with respect to each other.
+ *
+ * Serial queues can target each other (See dispatch_set_target_queue()). The
+ * serial queue at the bottom of a queue hierarchy provides an exclusion
+ * context: at most one workitem submitted to any of the queues in such
+ * a hiearchy will run at any given time.
+ *
+ * Such hierarchies provide a natural construct to organize an application
+ * subsystem around.
+ *
+ * Serial queues are created by passing a dispatch queue attribute derived from
+ * DISPATCH_QUEUE_SERIAL to dispatch_queue_create_with_target().
+ */
+#if defined(__DISPATCH_BUILDING_DISPATCH__) && !defined(__OBJC__)
+typedef struct dispatch_lane_s *dispatch_queue_serial_t;
+#else
+DISPATCH_DECL_SUBCLASS(dispatch_queue_serial, dispatch_queue);
+#endif
+
+/*!
+ * @typedef dispatch_queue_main_t
+ *
+ * @abstract
+ * The type of the default queue that is bound to the main thread.
+ *
+ * @discussion
+ * The main queue is a serial queue (See dispatch_queue_serial_t) which is bound
+ * to the main thread of an application.
+ *
+ * In order to invoke workitems submitted to the main queue, the application
+ * must call dispatch_main(), NSApplicationMain(), or use a CFRunLoop on the
+ * main thread.
+ *
+ * The main queue is a well known global object that is made automatically on
+ * behalf of the main thread during process initialization and is returned by
+ * dispatch_get_main_queue(). This object cannot be modified.  Calls to
+ * dispatch_suspend(), dispatch_resume(), dispatch_set_context(), etc., will
+ * have no effect when used on the main queue.
+ */
+#if defined(__DISPATCH_BUILDING_DISPATCH__) && !defined(__OBJC__)
+typedef struct dispatch_queue_static_s *dispatch_queue_main_t;
+#else
+DISPATCH_DECL_SUBCLASS(dispatch_queue_main, dispatch_queue_serial);
+#endif
+
+/*!
+ * @typedef dispatch_queue_concurrent_t
+ *
+ * @abstract
+ * Dispatch concurrent queues invoke workitems submitted to them concurrently,
+ * and admit a notion of barrier workitems.
+ *
+ * @discussion
+ * Dispatch concurrent queues are lightweight objects to which regular and
+ * barrier workitems may be submited. Barrier workitems are invoked in
+ * exclusion of any other kind of workitem in FIFO order.
+ *
+ * Regular workitems can be invoked concurrently for the same concurrent queue,
+ * in any order. However, regular workitems will not be invoked before any
+ * barrier workitem submited ahead of them has been invoked.
+ *
+ * In other words, if a serial queue is equivalent to a mutex in the Dispatch
+ * world, a concurrent queue is equivalent to a reader-writer lock, where
+ * regular items are readers and barriers are writers.
+ *
+ * Concurrent queues are created by passing a dispatch queue attribute derived
+ * from DISPATCH_QUEUE_CONCURRENT to dispatch_queue_create_with_target().
+ *
+ * Caveat:
+ * Dispatch concurrent queues at this time do not implement priority inversion
+ * avoidance when lower priority regular workitems (readers) are being invoked
+ * and are preventing a higher priority barrier (writer) from being invoked.
+ */
+#if defined(__DISPATCH_BUILDING_DISPATCH__) && !defined(__OBJC__)
+typedef struct dispatch_lane_s *dispatch_queue_concurrent_t;
+#else
+DISPATCH_DECL_SUBCLASS(dispatch_queue_concurrent, dispatch_queue);
+#endif
+
 __BEGIN_DECLS
 
 /*!
@@ -137,8 +263,7 @@
 DISPATCH_EXPORT DISPATCH_NONNULL1 DISPATCH_NONNULL3 DISPATCH_NOTHROW
 void
 dispatch_async_f(dispatch_queue_t queue,
-	void *_Nullable context,
-	dispatch_function_t work);
+		void *_Nullable context, dispatch_function_t work);
 
 /*!
  * @function dispatch_sync
@@ -147,8 +272,12 @@
  * Submits a block for synchronous execution on a dispatch queue.
  *
  * @discussion
- * Submits a block to a dispatch queue like dispatch_async(), however
- * dispatch_sync() will not return until the block has finished.
+ * Submits a workitem to a dispatch queue like dispatch_async(), however
+ * dispatch_sync() will not return until the workitem has finished.
+ *
+ * Work items submitted to a queue with dispatch_sync() do not observe certain
+ * queue attributes of that queue when invoked (such as autorelease frequency
+ * and QOS class).
  *
  * Calls to dispatch_sync() targeting the current queue will result
  * in dead-lock. Use of dispatch_sync() is also subject to the same
@@ -159,8 +288,10 @@
  * calls to this function are synchronous, the dispatch_sync() "borrows" the
  * reference of the caller.
  *
- * As an optimization, dispatch_sync() invokes the block on the current
- * thread when possible.
+ * As an optimization, dispatch_sync() invokes the workitem on the thread which
+ * submitted the workitem, except when the passed queue is the main queue or
+ * a queue targetting it (See dispatch_queue_main_t,
+ * dispatch_set_target_queue()).
  *
  * @param queue
  * The target dispatch queue to which the block is submitted.
@@ -203,18 +334,19 @@
 DISPATCH_EXPORT DISPATCH_NONNULL1 DISPATCH_NONNULL3 DISPATCH_NOTHROW
 void
 dispatch_sync_f(dispatch_queue_t queue,
-	void *_Nullable context,
-	dispatch_function_t work);
+		void *_Nullable context, dispatch_function_t work);
 
 
-#if !defined(__APPLE__) || TARGET_OS_WATCH || TARGET_OS_TV || \
+#if defined(__APPLE__) && \
 		(defined(__IPHONE_OS_VERSION_MIN_REQUIRED) && \
-		__IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_7_0) || \
+		__IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_7_0) || \
 		(defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && \
-		__MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_9)
-#define DISPATCH_APPLY_AUTO_AVAILABLE 1
-#else
+		__MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_9)
 #define DISPATCH_APPLY_AUTO_AVAILABLE 0
+#define DISPATCH_APPLY_QUEUE_ARG_NULLABILITY _Nonnull
+#else
+#define DISPATCH_APPLY_AUTO_AVAILABLE 1
+#define DISPATCH_APPLY_QUEUE_ARG_NULLABILITY _Nullable
 #endif
 
 /*!
@@ -270,7 +402,8 @@
 API_AVAILABLE(macos(10.6), ios(4.0))
 DISPATCH_EXPORT DISPATCH_NONNULL3 DISPATCH_NOTHROW
 void
-dispatch_apply(size_t iterations, dispatch_queue_t queue,
+dispatch_apply(size_t iterations,
+		dispatch_queue_t DISPATCH_APPLY_QUEUE_ARG_NULLABILITY queue,
 		DISPATCH_NOESCAPE void (^block)(size_t));
 #endif
 
@@ -304,9 +437,9 @@
 API_AVAILABLE(macos(10.6), ios(4.0))
 DISPATCH_EXPORT DISPATCH_NONNULL4 DISPATCH_NOTHROW
 void
-dispatch_apply_f(size_t iterations, dispatch_queue_t queue,
-	void *_Nullable context,
-	void (*work)(void *_Nullable, size_t));
+dispatch_apply_f(size_t iterations,
+		dispatch_queue_t DISPATCH_APPLY_QUEUE_ARG_NULLABILITY queue,
+		void *_Nullable context, void (*work)(void *_Nullable, size_t));
 
 /*!
  * @function dispatch_get_current_queue
@@ -343,7 +476,12 @@
 dispatch_get_current_queue(void);
 
 API_AVAILABLE(macos(10.6), ios(4.0))
-DISPATCH_EXPORT struct dispatch_queue_s _dispatch_main_q;
+DISPATCH_EXPORT
+#if defined(__DISPATCH_BUILDING_DISPATCH__) && !defined(__OBJC__)
+struct dispatch_queue_static_s _dispatch_main_q;
+#else
+struct dispatch_queue_s _dispatch_main_q;
+#endif
 
 /*!
  * @function dispatch_get_main_queue
@@ -356,15 +494,24 @@
  * call dispatch_main(), NSApplicationMain(), or use a CFRunLoop on the main
  * thread.
  *
+ * The main queue is meant to be used in application context to interact with
+ * the main thread and the main runloop.
+ *
+ * Because the main queue doesn't behave entirely like a regular serial queue,
+ * it may have unwanted side-effects when used in processes that are not UI apps
+ * (daemons). For such processes, the main queue should be avoided.
+ *
+ * @see dispatch_queue_main_t
+ *
  * @result
  * Returns the main queue. This queue is created automatically on behalf of
  * the main thread before main() is called.
  */
 DISPATCH_INLINE DISPATCH_ALWAYS_INLINE DISPATCH_CONST DISPATCH_NOTHROW
-dispatch_queue_t
+dispatch_queue_main_t
 dispatch_get_main_queue(void)
 {
-	return DISPATCH_GLOBAL_OBJECT(dispatch_queue_t, _dispatch_main_q);
+	return DISPATCH_GLOBAL_OBJECT(dispatch_queue_main_t, _dispatch_main_q);
 }
 
 /*!
@@ -420,9 +567,7 @@
  * class.
  *
  * @discussion
- * The well-known global concurrent queues may not be modified. Calls to
- * dispatch_suspend(), dispatch_resume(), dispatch_set_context(), etc., will
- * have no effect when used with queues returned by this function.
+ * See dispatch_queue_global_t.
  *
  * @param identifier
  * A quality of service class defined in qos_class_t or a priority defined in
@@ -453,7 +598,7 @@
  */
 API_AVAILABLE(macos(10.6), ios(4.0))
 DISPATCH_EXPORT DISPATCH_CONST DISPATCH_WARN_RESULT DISPATCH_NOTHROW
-dispatch_queue_t
+dispatch_queue_global_t
 dispatch_get_global_queue(intptr_t identifier, uintptr_t flags);
 
 /*!
@@ -467,7 +612,11 @@
 /*!
  * @const DISPATCH_QUEUE_SERIAL
  *
- * @discussion A dispatch queue that invokes blocks serially in FIFO order.
+ * @discussion
+ * An attribute that can be used to create a dispatch queue that invokes blocks
+ * serially in FIFO order.
+ *
+ * See dispatch_queue_serial_t.
  */
 #define DISPATCH_QUEUE_SERIAL NULL
 
@@ -475,8 +624,10 @@
  * @const DISPATCH_QUEUE_SERIAL_INACTIVE
  *
  * @discussion
- * A dispatch queue that invokes blocks serially in FIFO order, and that is
- * created initially inactive. See dispatch_queue_attr_make_initially_inactive().
+ * An attribute that can be used to create a dispatch queue that invokes blocks
+ * serially in FIFO order, and that is initially inactive.
+ *
+ * See dispatch_queue_attr_make_initially_inactive().
  */
 #define DISPATCH_QUEUE_SERIAL_INACTIVE \
 		dispatch_queue_attr_make_initially_inactive(DISPATCH_QUEUE_SERIAL)
@@ -484,8 +635,12 @@
 /*!
  * @const DISPATCH_QUEUE_CONCURRENT
  *
- * @discussion A dispatch queue that may invoke blocks concurrently and supports
- * barrier blocks submitted with the dispatch barrier API.
+ * @discussion
+ * An attribute that can be used to create a dispatch queue that may invoke
+ * blocks concurrently and supports barrier blocks submitted with the dispatch
+ * barrier API.
+ *
+ * See dispatch_queue_concurrent_t.
  */
 #define DISPATCH_QUEUE_CONCURRENT \
 		DISPATCH_GLOBAL_OBJECT(dispatch_queue_attr_t, \
@@ -498,9 +653,11 @@
  * @const DISPATCH_QUEUE_CONCURRENT_INACTIVE
  *
  * @discussion
- * A dispatch queue that may invoke blocks concurrently and supports barrier
- * blocks submitted with the dispatch barrier API, and that is created initially
- * inactive. See dispatch_queue_attr_make_initially_inactive().
+ * An attribute that can be used to create a dispatch queue that may invoke
+ * blocks concurrently and supports barrier blocks submitted with the dispatch
+ * barrier API, and that is initially inactive.
+ *
+ * See dispatch_queue_attr_make_initially_inactive().
  */
 #define DISPATCH_QUEUE_CONCURRENT_INACTIVE \
 		dispatch_queue_attr_make_initially_inactive(DISPATCH_QUEUE_CONCURRENT)
@@ -668,6 +825,10 @@
  *	queue = dispatch_queue_create("com.example.myqueue", attr);
  * </code>
  *
+ * The QOS class and relative priority set this way on a queue have no effect on
+ * blocks that are submitted synchronously to a queue (via dispatch_sync(),
+ * dispatch_barrier_sync()).
+ *
  * @param attr
  * A queue attribute value to be combined with the QOS class, or NULL.
  *
@@ -725,9 +886,9 @@
  * reader-writer schemes.
  *
  * When a dispatch queue is no longer needed, it should be released with
- * dispatch_release(). Note that any pending blocks submitted to a queue will
- * hold a reference to that queue. Therefore a queue will not be deallocated
- * until all pending blocks have finished.
+ * dispatch_release(). Note that any pending blocks submitted asynchronously to
+ * a queue will hold a reference to that queue. Therefore a queue will not be
+ * deallocated until all pending blocks have finished.
  *
  * When using a dispatch queue attribute @a attr specifying a QoS class (derived
  * from the result of dispatch_queue_attr_make_with_qos_class()), passing the
@@ -763,8 +924,8 @@
 DISPATCH_NOTHROW
 dispatch_queue_t
 dispatch_queue_create_with_target(const char *_Nullable label,
-	dispatch_queue_attr_t _Nullable attr, dispatch_queue_t _Nullable target)
-	DISPATCH_ALIAS_V2(dispatch_queue_create_with_target);
+		dispatch_queue_attr_t _Nullable attr, dispatch_queue_t _Nullable target)
+		DISPATCH_ALIAS_V2(dispatch_queue_create_with_target);
 
 /*!
  * @function dispatch_queue_create
@@ -783,9 +944,9 @@
  * reader-writer schemes.
  *
  * When a dispatch queue is no longer needed, it should be released with
- * dispatch_release(). Note that any pending blocks submitted to a queue will
- * hold a reference to that queue. Therefore a queue will not be deallocated
- * until all pending blocks have finished.
+ * dispatch_release(). Note that any pending blocks submitted asynchronously to
+ * a queue will hold a reference to that queue. Therefore a queue will not be
+ * deallocated until all pending blocks have finished.
  *
  * Passing the result of the dispatch_queue_attr_make_with_qos_class() function
  * to the attr parameter of this function allows a quality of service class and
@@ -993,9 +1154,8 @@
 API_AVAILABLE(macos(10.6), ios(4.0))
 DISPATCH_EXPORT DISPATCH_NONNULL2 DISPATCH_NONNULL3 DISPATCH_NOTHROW
 void
-dispatch_after(dispatch_time_t when,
-	dispatch_queue_t queue,
-	dispatch_block_t block);
+dispatch_after(dispatch_time_t when, dispatch_queue_t queue,
+		dispatch_block_t block);
 #endif
 
 /*!
@@ -1026,10 +1186,8 @@
 API_AVAILABLE(macos(10.6), ios(4.0))
 DISPATCH_EXPORT DISPATCH_NONNULL2 DISPATCH_NONNULL4 DISPATCH_NOTHROW
 void
-dispatch_after_f(dispatch_time_t when,
-	dispatch_queue_t queue,
-	void *_Nullable context,
-	dispatch_function_t work);
+dispatch_after_f(dispatch_time_t when, dispatch_queue_t queue,
+		void *_Nullable context, dispatch_function_t work);
 
 /*!
  * @functiongroup Dispatch Barrier API
@@ -1108,8 +1266,7 @@
 DISPATCH_EXPORT DISPATCH_NONNULL1 DISPATCH_NONNULL3 DISPATCH_NOTHROW
 void
 dispatch_barrier_async_f(dispatch_queue_t queue,
-	void *_Nullable context,
-	dispatch_function_t work);
+		void *_Nullable context, dispatch_function_t work);
 
 /*!
  * @function dispatch_barrier_sync
@@ -1168,8 +1325,7 @@
 DISPATCH_EXPORT DISPATCH_NONNULL1 DISPATCH_NONNULL3 DISPATCH_NOTHROW
 void
 dispatch_barrier_sync_f(dispatch_queue_t queue,
-	void *_Nullable context,
-	dispatch_function_t work);
+		void *_Nullable context, dispatch_function_t work);
 
 /*!
  * @functiongroup Dispatch queue-specific contexts
@@ -1211,7 +1367,7 @@
 DISPATCH_EXPORT DISPATCH_NONNULL1 DISPATCH_NOTHROW
 void
 dispatch_queue_set_specific(dispatch_queue_t queue, const void *key,
-	void *_Nullable context, dispatch_function_t _Nullable destructor);
+		void *_Nullable context, dispatch_function_t _Nullable destructor);
 
 /*!
  * @function dispatch_queue_get_specific
@@ -1321,7 +1477,7 @@
 DISPATCH_EXPORT DISPATCH_NONNULL1
 void
 dispatch_assert_queue(dispatch_queue_t queue)
-	DISPATCH_ALIAS_V2(dispatch_assert_queue);
+		DISPATCH_ALIAS_V2(dispatch_assert_queue);
 
 /*!
  * @function dispatch_assert_queue_barrier
@@ -1370,7 +1526,7 @@
 DISPATCH_EXPORT DISPATCH_NONNULL1
 void
 dispatch_assert_queue_not(dispatch_queue_t queue)
-	DISPATCH_ALIAS_V2(dispatch_assert_queue_not);
+		DISPATCH_ALIAS_V2(dispatch_assert_queue_not);
 
 #ifdef NDEBUG
 #define dispatch_assert_queue_debug(q) ((void)(0 && (q)))
diff --git a/dispatch/source.h b/dispatch/source.h
index 05a67d9..597d23a 100644
--- a/dispatch/source.h
+++ b/dispatch/source.h
@@ -105,7 +105,7 @@
  * The mask is unused (pass zero for now).
  */
 #define DISPATCH_SOURCE_TYPE_DATA_REPLACE (&_dispatch_source_type_data_replace)
-API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0))
+API_AVAILABLE(macos(10.13), ios(11.0), tvos(11.0), watchos(4.0))
 DISPATCH_SOURCE_TYPE_DECL(data_replace);
 
 /*!
@@ -548,6 +548,7 @@
  *
  *  DISPATCH_SOURCE_TYPE_DATA_ADD:        n/a
  *  DISPATCH_SOURCE_TYPE_DATA_OR:         n/a
+ *  DISPATCH_SOURCE_TYPE_DATA_REPLACE:    n/a
  *  DISPATCH_SOURCE_TYPE_MACH_SEND:       mach port (mach_port_t)
  *  DISPATCH_SOURCE_TYPE_MACH_RECV:       mach port (mach_port_t)
  *  DISPATCH_SOURCE_TYPE_MEMORYPRESSURE   n/a
@@ -579,6 +580,7 @@
  *
  *  DISPATCH_SOURCE_TYPE_DATA_ADD:        n/a
  *  DISPATCH_SOURCE_TYPE_DATA_OR:         n/a
+ *  DISPATCH_SOURCE_TYPE_DATA_REPLACE:    n/a
  *  DISPATCH_SOURCE_TYPE_MACH_SEND:       dispatch_source_mach_send_flags_t
  *  DISPATCH_SOURCE_TYPE_MACH_RECV:       n/a
  *  DISPATCH_SOURCE_TYPE_MEMORYPRESSURE   dispatch_source_memorypressure_flags_t
@@ -615,6 +617,7 @@
  *
  *  DISPATCH_SOURCE_TYPE_DATA_ADD:        application defined data
  *  DISPATCH_SOURCE_TYPE_DATA_OR:         application defined data
+ *  DISPATCH_SOURCE_TYPE_DATA_REPLACE:    application defined data
  *  DISPATCH_SOURCE_TYPE_MACH_SEND:       dispatch_source_mach_send_flags_t
  *  DISPATCH_SOURCE_TYPE_MACH_RECV:       n/a
  *  DISPATCH_SOURCE_TYPE_MEMORYPRESSURE   dispatch_source_memorypressure_flags_t
@@ -637,9 +640,9 @@
  * @function dispatch_source_merge_data
  *
  * @abstract
- * Merges data into a dispatch source of type DISPATCH_SOURCE_TYPE_DATA_ADD or
- * DISPATCH_SOURCE_TYPE_DATA_OR and submits its event handler block to its
- * target queue.
+ * Merges data into a dispatch source of type DISPATCH_SOURCE_TYPE_DATA_ADD,
+ * DISPATCH_SOURCE_TYPE_DATA_OR or DISPATCH_SOURCE_TYPE_DATA_REPLACE,
+ * and submits its event handler block to its target queue.
  *
  * @param source
  * The result of passing NULL in this parameter is undefined.
@@ -684,8 +687,9 @@
  *
  * The 'start' argument also determines which clock will be used for the timer:
  * If 'start' is DISPATCH_TIME_NOW or was created with dispatch_time(3), the
- * timer is based on mach_absolute_time(). If 'start' was created with
- * dispatch_walltime(3), the timer is based on gettimeofday(3).
+ * timer is based on up time (which is obtained from mach_absolute_time() on
+ * Apple platforms). If 'start' was created with dispatch_walltime(3), the
+ * timer is based on gettimeofday(3).
  *
  * Calling this function has no effect if the timer source has already been
  * canceled.
diff --git a/dispatch/time.h b/dispatch/time.h
index ce99f27..02dd27f 100644
--- a/dispatch/time.h
+++ b/dispatch/time.h
@@ -66,6 +66,10 @@
  */
 typedef uint64_t dispatch_time_t;
 
+enum {
+	DISPATCH_WALLTIME_NOW DISPATCH_ENUM_API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))	= ~1ull,
+};
+
 #define DISPATCH_TIME_NOW (0ull)
 #define DISPATCH_TIME_FOREVER (~0ull)
 
@@ -73,15 +77,19 @@
  * @function dispatch_time
  *
  * @abstract
- * Create dispatch_time_t relative to the default clock or modify an existing
- * dispatch_time_t.
+ * Create a dispatch_time_t relative to the current value of the default or
+ * wall time clock, or modify an existing dispatch_time_t.
  *
  * @discussion
- * On Mac OS X the default clock is based on mach_absolute_time().
+ * On Apple platforms, the default clock is based on mach_absolute_time().
  *
  * @param when
- * An optional dispatch_time_t to add nanoseconds to. If zero is passed, then
- * dispatch_time() will use the result of mach_absolute_time().
+ * An optional dispatch_time_t to add nanoseconds to. If DISPATCH_TIME_NOW is
+ * passed, then dispatch_time() will use the default clock (which is based on
+ * mach_absolute_time() on Apple platforms). If DISPATCH_WALLTIME_NOW is used,
+ * dispatch_time() will use the value returned by gettimeofday(3).
+ * dispatch_time(DISPATCH_WALLTIME_NOW, delta) is equivalent to
+ * dispatch_walltime(NULL, delta).
  *
  * @param delta
  * Nanoseconds to add.
@@ -106,6 +114,8 @@
  * @param when
  * A struct timespec to add time to. If NULL is passed, then
  * dispatch_walltime() will use the result of gettimeofday(3).
+ * dispatch_walltime(NULL, delta) returns the same value as
+ * dispatch_time(DISPATCH_WALLTIME_NOW, delta).
  *
  * @param delta
  * Nanoseconds to add.
diff --git a/libdispatch.xcodeproj/project.pbxproj b/libdispatch.xcodeproj/project.pbxproj
index e136647..5d58c56 100644
--- a/libdispatch.xcodeproj/project.pbxproj
+++ b/libdispatch.xcodeproj/project.pbxproj
@@ -40,6 +40,28 @@
 			name = libdispatch_kernel;
 			productName = libdispatch_kernel;
 		};
+		6E43553E215B5D9D00C13177 /* libdispatch_introspection */ = {
+			isa = PBXAggregateTarget;
+			buildConfigurationList = 6E435541215B5D9D00C13177 /* Build configuration list for PBXAggregateTarget "libdispatch_introspection" */;
+			buildPhases = (
+			);
+			dependencies = (
+				6EE5083B21701B9100833569 /* PBXTargetDependency */,
+			);
+			name = libdispatch_introspection;
+			productName = libdispatch_introspection;
+		};
+		6EA833C22162D6380045EFDC /* libdispatch_introspection_Sim */ = {
+			isa = PBXAggregateTarget;
+			buildConfigurationList = 6EA833C32162D6380045EFDC /* Build configuration list for PBXAggregateTarget "libdispatch_introspection_Sim" */;
+			buildPhases = (
+			);
+			dependencies = (
+				6EE5083D21701B9600833569 /* PBXTargetDependency */,
+			);
+			name = libdispatch_introspection_Sim;
+			productName = libdispatch_introspection_Sim;
+		};
 		92CBD7201BED924F006E0892 /* libdispatch_tests_legacy */ = {
 			isa = PBXAggregateTarget;
 			buildConfigurationList = 92CBD7231BED924F006E0892 /* Build configuration list for PBXAggregateTarget "libdispatch_tests_legacy" */;
@@ -51,12 +73,24 @@
 			name = libdispatch_tests_legacy;
 			productName = libdispatch_tests;
 		};
+		9BEBA56F20127D3300E6FD0D /* libdispatch_tools_Sim */ = {
+			isa = PBXAggregateTarget;
+			buildConfigurationList = 9BEBA57620127D3300E6FD0D /* Build configuration list for PBXAggregateTarget "libdispatch_tools_Sim" */;
+			buildPhases = (
+			);
+			dependencies = (
+				9BEBA57820127D4400E6FD0D /* PBXTargetDependency */,
+			);
+			name = libdispatch_tools_Sim;
+			productName = libdispatch_tools_Sim;
+		};
 		C927F35A10FD7F0600C5AB8B /* libdispatch_tools */ = {
 			isa = PBXAggregateTarget;
 			buildConfigurationList = C927F35E10FD7F0B00C5AB8B /* Build configuration list for PBXAggregateTarget "libdispatch_tools" */;
 			buildPhases = (
 			);
 			dependencies = (
+				9B2A11A32032494E0060E7D4 /* PBXTargetDependency */,
 				C927F36910FD7F1A00C5AB8B /* PBXTargetDependency */,
 			);
 			name = libdispatch_tools;
@@ -69,9 +103,7 @@
 		2BBF5A61154B64D8002B20F9 /* allocator_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 2BBF5A5F154B64D8002B20F9 /* allocator_internal.h */; };
 		2BBF5A63154B64F5002B20F9 /* allocator.c in Sources */ = {isa = PBXBuildFile; fileRef = 2BBF5A62154B64F5002B20F9 /* allocator.c */; };
 		2BBF5A64154B64F5002B20F9 /* allocator.c in Sources */ = {isa = PBXBuildFile; fileRef = 2BBF5A62154B64F5002B20F9 /* allocator.c */; };
-		2BBF5A65154B64F5002B20F9 /* allocator.c in Sources */ = {isa = PBXBuildFile; fileRef = 2BBF5A62154B64F5002B20F9 /* allocator.c */; };
 		2BBF5A66154B64F5002B20F9 /* allocator.c in Sources */ = {isa = PBXBuildFile; fileRef = 2BBF5A62154B64F5002B20F9 /* allocator.c */; };
-		2BBF5A67154B64F5002B20F9 /* allocator.c in Sources */ = {isa = PBXBuildFile; fileRef = 2BBF5A62154B64F5002B20F9 /* allocator.c */; };
 		2BE17C6418EA305E002CA4E8 /* layout_private.h in Headers */ = {isa = PBXBuildFile; fileRef = 2BE17C6318EA305E002CA4E8 /* layout_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
 		2BE17C6518EA305E002CA4E8 /* layout_private.h in Headers */ = {isa = PBXBuildFile; fileRef = 2BE17C6318EA305E002CA4E8 /* layout_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
 		5A0095A210F274B0000E2A31 /* io_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 5A0095A110F274B0000E2A31 /* io_internal.h */; };
@@ -88,36 +120,42 @@
 		6E4BACBD1D48A41500B562AE /* mach.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E4BACBC1D48A41500B562AE /* mach.c */; };
 		6E4BACC21D48A42000B562AE /* mach.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E4BACBC1D48A41500B562AE /* mach.c */; };
 		6E4BACC31D48A42100B562AE /* mach.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E4BACBC1D48A41500B562AE /* mach.c */; };
-		6E4BACC41D48A42200B562AE /* mach.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E4BACBC1D48A41500B562AE /* mach.c */; };
 		6E4BACC51D48A42200B562AE /* mach.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E4BACBC1D48A41500B562AE /* mach.c */; };
-		6E4BACC61D48A42300B562AE /* mach.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E4BACBC1D48A41500B562AE /* mach.c */; };
 		6E4BACC71D48A42300B562AE /* mach.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E4BACBC1D48A41500B562AE /* mach.c */; };
 		6E4BACC81D48A42400B562AE /* mach.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E4BACBC1D48A41500B562AE /* mach.c */; };
 		6E4BACCA1D48A89500B562AE /* mach_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E4BACC91D48A89500B562AE /* mach_internal.h */; };
 		6E4BACF51D49A04600B562AE /* event_epoll.c in Sources */ = {isa = PBXBuildFile; fileRef = 6EA7937D1D456D1300929B1B /* event_epoll.c */; };
 		6E4BACF61D49A04700B562AE /* event_epoll.c in Sources */ = {isa = PBXBuildFile; fileRef = 6EA7937D1D456D1300929B1B /* event_epoll.c */; };
 		6E4BACF71D49A04700B562AE /* event_epoll.c in Sources */ = {isa = PBXBuildFile; fileRef = 6EA7937D1D456D1300929B1B /* event_epoll.c */; };
-		6E4BACF81D49A04800B562AE /* event_epoll.c in Sources */ = {isa = PBXBuildFile; fileRef = 6EA7937D1D456D1300929B1B /* event_epoll.c */; };
 		6E4BACF91D49A04800B562AE /* event_epoll.c in Sources */ = {isa = PBXBuildFile; fileRef = 6EA7937D1D456D1300929B1B /* event_epoll.c */; };
-		6E4BACFA1D49A04900B562AE /* event_epoll.c in Sources */ = {isa = PBXBuildFile; fileRef = 6EA7937D1D456D1300929B1B /* event_epoll.c */; };
 		6E4BACFB1D49A04A00B562AE /* event_epoll.c in Sources */ = {isa = PBXBuildFile; fileRef = 6EA7937D1D456D1300929B1B /* event_epoll.c */; };
 		6E4BACFC1D49A04A00B562AE /* event_epoll.c in Sources */ = {isa = PBXBuildFile; fileRef = 6EA7937D1D456D1300929B1B /* event_epoll.c */; };
+		6E5662E11F8C2E3E00BC2474 /* workqueue_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E5662DC1F8C2E3E00BC2474 /* workqueue_internal.h */; };
+		6E5662E21F8C2E4F00BC2474 /* workqueue_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E5662DC1F8C2E3E00BC2474 /* workqueue_internal.h */; };
+		6E5662E31F8C2E5100BC2474 /* workqueue_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E5662DC1F8C2E3E00BC2474 /* workqueue_internal.h */; };
 		6E5ACCBA1D3C4D0B007DA2B4 /* event_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E5ACCB91D3C4D0B007DA2B4 /* event_internal.h */; };
 		6E5ACCBB1D3C4D0E007DA2B4 /* event_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E5ACCB91D3C4D0B007DA2B4 /* event_internal.h */; };
 		6E5ACCBC1D3C4D0F007DA2B4 /* event_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E5ACCB91D3C4D0B007DA2B4 /* event_internal.h */; };
+		6E7018211F4EB51B0077C1DC /* workloop_private.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E70181C1F4EB51B0077C1DC /* workloop_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
+		6E7018221F4EB5220077C1DC /* workloop_private.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E70181C1F4EB51B0077C1DC /* workloop_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
 		6E90269C1BB9BD50004DC3AD /* firehose.defs in Sources */ = {isa = PBXBuildFile; fileRef = 72DEAA9B1AE1B0BD00289540 /* firehose.defs */; settings = {ATTRIBUTES = (Server, ); }; };
 		6E9955581C3AF7710071D40C /* venture_private.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E9955571C3AF7710071D40C /* venture_private.h */; };
 		6E99558A1C3AF7900071D40C /* venture_private.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E9955571C3AF7710071D40C /* venture_private.h */; };
 		6E9955CF1C3B218E0071D40C /* venture.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E9955CE1C3B218E0071D40C /* venture.c */; };
-		6E9956011C3B21980071D40C /* venture.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E9955CE1C3B218E0071D40C /* venture.c */; };
 		6E9956021C3B21990071D40C /* venture.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E9955CE1C3B218E0071D40C /* venture.c */; };
-		6E9956031C3B219A0071D40C /* venture.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E9955CE1C3B218E0071D40C /* venture.c */; };
 		6E9956041C3B219B0071D40C /* venture.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E9955CE1C3B218E0071D40C /* venture.c */; };
 		6E9956051C3B219B0071D40C /* venture.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E9955CE1C3B218E0071D40C /* venture.c */; };
 		6E9956071C3B21AA0071D40C /* venture_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E9956061C3B21AA0071D40C /* venture_internal.h */; };
 		6E9956081C3B21B30071D40C /* venture_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E9956061C3B21AA0071D40C /* venture_internal.h */; };
 		6E9956091C3B21B40071D40C /* venture_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E9956061C3B21AA0071D40C /* venture_internal.h */; };
 		6E9B6B5F1BB4F3C8009E324D /* firehose_buffer_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E9B6B201BB4CC73009E324D /* firehose_buffer_internal.h */; };
+		6E9C6CA720F9848100EA81C0 /* yield.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E9C6CA220F9848000EA81C0 /* yield.c */; };
+		6E9C6CA820F9848C00EA81C0 /* yield.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E9C6CA220F9848000EA81C0 /* yield.c */; };
+		6E9C6CA920F9848D00EA81C0 /* yield.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E9C6CA220F9848000EA81C0 /* yield.c */; };
+		6E9C6CAA20F9848D00EA81C0 /* yield.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E9C6CA220F9848000EA81C0 /* yield.c */; };
+		6E9C6CAB20F9848E00EA81C0 /* yield.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E9C6CA220F9848000EA81C0 /* yield.c */; };
+		6E9C6CAC20F9848E00EA81C0 /* yield.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E9C6CA220F9848000EA81C0 /* yield.c */; };
+		6E9C6CAD20F9848F00EA81C0 /* yield.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E9C6CA220F9848000EA81C0 /* yield.c */; };
 		6EA283D71CAB93920041B2E0 /* libdispatch.codes in Copy Trace Definitions */ = {isa = PBXBuildFile; fileRef = 6EA283D01CAB93270041B2E0 /* libdispatch.codes */; };
 		6EA793891D458A5800929B1B /* event_config.h in Headers */ = {isa = PBXBuildFile; fileRef = 6EA793881D458A5800929B1B /* event_config.h */; };
 		6EA7938E1D458A5C00929B1B /* event_config.h in Headers */ = {isa = PBXBuildFile; fileRef = 6EA793881D458A5800929B1B /* event_config.h */; };
@@ -125,29 +163,20 @@
 		6EA962971D48622600759D53 /* event.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5ACCBD1D3C6719007DA2B4 /* event.c */; };
 		6EA962981D48622700759D53 /* event.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5ACCBD1D3C6719007DA2B4 /* event.c */; };
 		6EA962991D48622800759D53 /* event.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5ACCBD1D3C6719007DA2B4 /* event.c */; };
-		6EA9629A1D48622900759D53 /* event.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5ACCBD1D3C6719007DA2B4 /* event.c */; };
 		6EA9629B1D48622900759D53 /* event.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5ACCBD1D3C6719007DA2B4 /* event.c */; };
-		6EA9629C1D48622A00759D53 /* event.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5ACCBD1D3C6719007DA2B4 /* event.c */; };
 		6EA9629D1D48622B00759D53 /* event.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5ACCBD1D3C6719007DA2B4 /* event.c */; };
 		6EA9629E1D48622C00759D53 /* event.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5ACCBD1D3C6719007DA2B4 /* event.c */; };
 		6EA9629F1D48625000759D53 /* event_kevent.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5ACCB01D3C4CFB007DA2B4 /* event_kevent.c */; };
 		6EA962A01D48625100759D53 /* event_kevent.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5ACCB01D3C4CFB007DA2B4 /* event_kevent.c */; };
 		6EA962A11D48625100759D53 /* event_kevent.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5ACCB01D3C4CFB007DA2B4 /* event_kevent.c */; };
-		6EA962A21D48625200759D53 /* event_kevent.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5ACCB01D3C4CFB007DA2B4 /* event_kevent.c */; };
 		6EA962A31D48625300759D53 /* event_kevent.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5ACCB01D3C4CFB007DA2B4 /* event_kevent.c */; };
-		6EA962A41D48625300759D53 /* event_kevent.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5ACCB01D3C4CFB007DA2B4 /* event_kevent.c */; };
 		6EA962A51D48625400759D53 /* event_kevent.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5ACCB01D3C4CFB007DA2B4 /* event_kevent.c */; };
 		6EA962A61D48625500759D53 /* event_kevent.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5ACCB01D3C4CFB007DA2B4 /* event_kevent.c */; };
 		6EB60D2C1BBB197B0092FA94 /* firehose_inline_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 6EB60D291BBB19640092FA94 /* firehose_inline_internal.h */; };
 		6EBEC7E51BBDD30C009B1596 /* firehose.defs in Sources */ = {isa = PBXBuildFile; fileRef = 72DEAA9B1AE1B0BD00289540 /* firehose.defs */; };
-		6EBEC7E61BBDD30D009B1596 /* firehose.defs in Sources */ = {isa = PBXBuildFile; fileRef = 72DEAA9B1AE1B0BD00289540 /* firehose.defs */; };
-		6EBEC7E71BBDD30F009B1596 /* firehose.defs in Sources */ = {isa = PBXBuildFile; fileRef = 72DEAA9B1AE1B0BD00289540 /* firehose.defs */; };
 		6EBEC7E81BBDD324009B1596 /* firehose_reply.defs in Sources */ = {isa = PBXBuildFile; fileRef = 72406A031AF95DF800DF4E2B /* firehose_reply.defs */; settings = {ATTRIBUTES = (Server, ); }; };
-		6EBEC7E91BBDD325009B1596 /* firehose_reply.defs in Sources */ = {isa = PBXBuildFile; fileRef = 72406A031AF95DF800DF4E2B /* firehose_reply.defs */; settings = {ATTRIBUTES = (Server, ); }; };
-		6EBEC7EA1BBDD326009B1596 /* firehose_reply.defs in Sources */ = {isa = PBXBuildFile; fileRef = 72406A031AF95DF800DF4E2B /* firehose_reply.defs */; settings = {ATTRIBUTES = (Server, ); }; };
 		6ED64B401BBD898300C35F4D /* firehose_buffer.c in Sources */ = {isa = PBXBuildFile; fileRef = 72DEAA971AE181D300289540 /* firehose_buffer.c */; };
 		6ED64B411BBD898400C35F4D /* firehose_buffer.c in Sources */ = {isa = PBXBuildFile; fileRef = 72DEAA971AE181D300289540 /* firehose_buffer.c */; };
-		6ED64B421BBD898500C35F4D /* firehose_buffer.c in Sources */ = {isa = PBXBuildFile; fileRef = 72DEAA971AE181D300289540 /* firehose_buffer.c */; };
 		6ED64B431BBD898600C35F4D /* firehose_buffer.c in Sources */ = {isa = PBXBuildFile; fileRef = 72DEAA971AE181D300289540 /* firehose_buffer.c */; };
 		6ED64B441BBD898700C35F4D /* firehose_buffer.c in Sources */ = {isa = PBXBuildFile; fileRef = 72DEAA971AE181D300289540 /* firehose_buffer.c */; };
 		6ED64B461BBD89AF00C35F4D /* firehose.defs in Sources */ = {isa = PBXBuildFile; fileRef = 72DEAA9B1AE1B0BD00289540 /* firehose.defs */; };
@@ -165,7 +194,6 @@
 		6ED64B581BBD8A3E00C35F4D /* firehose_inline_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 6EB60D291BBB19640092FA94 /* firehose_inline_internal.h */; };
 		6ED64B591BBD8A3F00C35F4D /* firehose_inline_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 6EB60D291BBB19640092FA94 /* firehose_inline_internal.h */; };
 		6EDF10B81BBB488A007F14BF /* firehose_buffer_private.h in Headers */ = {isa = PBXBuildFile; fileRef = 6EDF10831BBB487E007F14BF /* firehose_buffer_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
-		6EE664271BE2FD5C00ED7B1C /* firehose_buffer.c in Sources */ = {isa = PBXBuildFile; fileRef = 72DEAA971AE181D300289540 /* firehose_buffer.c */; };
 		6EF0B26D1BA8C527007FA4F6 /* firehose_server_private.h in Headers */ = {isa = PBXBuildFile; fileRef = 72EA3FBA1AF41EA400BBA227 /* firehose_server_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
 		6EF0B2711BA8C540007FA4F6 /* firehose_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 6EF0B26A1BA8C4AE007FA4F6 /* firehose_internal.h */; };
 		6EF0B2781BA8C56E007FA4F6 /* firehose_reply.defs in Sources */ = {isa = PBXBuildFile; fileRef = 72406A031AF95DF800DF4E2B /* firehose_reply.defs */; settings = {ATTRIBUTES = (Client, ); }; };
@@ -174,9 +202,7 @@
 		6EF2CAAC1C8899D5001ABE83 /* lock.c in Sources */ = {isa = PBXBuildFile; fileRef = 6EF2CAAB1C8899D5001ABE83 /* lock.c */; };
 		6EF2CAAD1C8899E9001ABE83 /* lock.c in Sources */ = {isa = PBXBuildFile; fileRef = 6EF2CAAB1C8899D5001ABE83 /* lock.c */; };
 		6EF2CAAE1C8899EA001ABE83 /* lock.c in Sources */ = {isa = PBXBuildFile; fileRef = 6EF2CAAB1C8899D5001ABE83 /* lock.c */; };
-		6EF2CAAF1C8899EB001ABE83 /* lock.c in Sources */ = {isa = PBXBuildFile; fileRef = 6EF2CAAB1C8899D5001ABE83 /* lock.c */; };
 		6EF2CAB01C8899EB001ABE83 /* lock.c in Sources */ = {isa = PBXBuildFile; fileRef = 6EF2CAAB1C8899D5001ABE83 /* lock.c */; };
-		6EF2CAB11C8899EC001ABE83 /* lock.c in Sources */ = {isa = PBXBuildFile; fileRef = 6EF2CAAB1C8899D5001ABE83 /* lock.c */; };
 		6EF2CAB21C8899EC001ABE83 /* lock.c in Sources */ = {isa = PBXBuildFile; fileRef = 6EF2CAAB1C8899D5001ABE83 /* lock.c */; };
 		6EF2CAB31C8899ED001ABE83 /* lock.c in Sources */ = {isa = PBXBuildFile; fileRef = 6EF2CAAB1C8899D5001ABE83 /* lock.c */; };
 		6EF2CAB41C889D65001ABE83 /* lock.h in Headers */ = {isa = PBXBuildFile; fileRef = 6EF2CAA41C88998A001ABE83 /* lock.h */; };
@@ -200,6 +226,8 @@
 		96BC39BD0F3EBAB100C59689 /* queue_private.h in Headers */ = {isa = PBXBuildFile; fileRef = 96BC39BC0F3EBAB100C59689 /* queue_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
 		96C9553B0F3EAEDD000D2CA4 /* once.h in Headers */ = {isa = PBXBuildFile; fileRef = 96C9553A0F3EAEDD000D2CA4 /* once.h */; settings = {ATTRIBUTES = (Public, ); }; };
 		96DF70BE0F38FE3C0074BD99 /* once.c in Sources */ = {isa = PBXBuildFile; fileRef = 96DF70BD0F38FE3C0074BD99 /* once.c */; };
+		B683588F1FA77F5A00AA0D58 /* time_private.h in Headers */ = {isa = PBXBuildFile; fileRef = B683588A1FA77F4900AA0D58 /* time_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
+		B68358901FA77F5B00AA0D58 /* time_private.h in Headers */ = {isa = PBXBuildFile; fileRef = B683588A1FA77F4900AA0D58 /* time_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
 		C00B0DF21C5AEBBE000330B3 /* protocol.defs in Sources */ = {isa = PBXBuildFile; fileRef = FC7BED950E8361E600161930 /* protocol.defs */; settings = {ATTRIBUTES = (Client, Server, ); }; };
 		C00B0DF31C5AEBBE000330B3 /* resolver.c in Sources */ = {isa = PBXBuildFile; fileRef = E44EBE371251656400645D88 /* resolver.c */; };
 		C00B0DF41C5AEBBE000330B3 /* init.c in Sources */ = {isa = PBXBuildFile; fileRef = E44EBE3B1251659900645D88 /* init.c */; };
@@ -244,17 +272,14 @@
 		C90144661C73A9F6002638FC /* module.modulemap in Headers */ = {isa = PBXBuildFile; fileRef = C90144641C73A845002638FC /* module.modulemap */; settings = {ATTRIBUTES = (Private, ); }; };
 		C913AC0F143BD34800B78976 /* data_private.h in Headers */ = {isa = PBXBuildFile; fileRef = C913AC0E143BD34800B78976 /* data_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
 		C93D6165143E190E00EB9023 /* transform.c in Sources */ = {isa = PBXBuildFile; fileRef = C9C5F80D143C1771006DC718 /* transform.c */; };
-		C93D6166143E190F00EB9023 /* transform.c in Sources */ = {isa = PBXBuildFile; fileRef = C9C5F80D143C1771006DC718 /* transform.c */; };
 		C93D6167143E190F00EB9023 /* transform.c in Sources */ = {isa = PBXBuildFile; fileRef = C9C5F80D143C1771006DC718 /* transform.c */; };
 		C9C5F80E143C1771006DC718 /* transform.c in Sources */ = {isa = PBXBuildFile; fileRef = C9C5F80D143C1771006DC718 /* transform.c */; };
 		E4128ED613BA9A1700ABB2CB /* hw_config.h in Headers */ = {isa = PBXBuildFile; fileRef = E4128ED513BA9A1700ABB2CB /* hw_config.h */; };
 		E4128ED713BA9A1700ABB2CB /* hw_config.h in Headers */ = {isa = PBXBuildFile; fileRef = E4128ED513BA9A1700ABB2CB /* hw_config.h */; };
-		E417A38412A472C4004D659D /* provider.d in Sources */ = {isa = PBXBuildFile; fileRef = E43570B8126E93380097AB9F /* provider.d */; };
 		E417A38512A472C5004D659D /* provider.d in Sources */ = {isa = PBXBuildFile; fileRef = E43570B8126E93380097AB9F /* provider.d */; };
 		E420867016027AE500EEE210 /* data.m in Sources */ = {isa = PBXBuildFile; fileRef = E420866F16027AE500EEE210 /* data.m */; };
 		E420867116027AE500EEE210 /* data.m in Sources */ = {isa = PBXBuildFile; fileRef = E420866F16027AE500EEE210 /* data.m */; };
 		E420867216027AE500EEE210 /* data.m in Sources */ = {isa = PBXBuildFile; fileRef = E420866F16027AE500EEE210 /* data.m */; };
-		E420867316027AE500EEE210 /* data.m in Sources */ = {isa = PBXBuildFile; fileRef = E420866F16027AE500EEE210 /* data.m */; };
 		E421E5F91716ADA10090DC9B /* introspection.h in Headers */ = {isa = PBXBuildFile; fileRef = E421E5F81716ADA10090DC9B /* introspection.h */; settings = {ATTRIBUTES = (Public, ); }; };
 		E422A0D512A557B5005E5BDB /* trace.h in Headers */ = {isa = PBXBuildFile; fileRef = E422A0D412A557B5005E5BDB /* trace.h */; };
 		E422A0D612A557B5005E5BDB /* trace.h in Headers */ = {isa = PBXBuildFile; fileRef = E422A0D412A557B5005E5BDB /* trace.h */; };
@@ -264,26 +289,20 @@
 		E43A72501AF85BBC00BAA921 /* block.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E43A724F1AF85BBC00BAA921 /* block.cpp */; };
 		E43A72841AF85BCB00BAA921 /* block.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E43A724F1AF85BBC00BAA921 /* block.cpp */; };
 		E43A72851AF85BCC00BAA921 /* block.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E43A724F1AF85BBC00BAA921 /* block.cpp */; };
-		E43A72861AF85BCC00BAA921 /* block.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E43A724F1AF85BBC00BAA921 /* block.cpp */; };
 		E43A72871AF85BCD00BAA921 /* block.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E43A724F1AF85BBC00BAA921 /* block.cpp */; };
-		E43A72881AF85BE900BAA921 /* block.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E43A724F1AF85BBC00BAA921 /* block.cpp */; };
 		E44757DA17F4572600B82CA1 /* inline_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = E44757D917F4572600B82CA1 /* inline_internal.h */; };
 		E44757DB17F4573500B82CA1 /* inline_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = E44757D917F4572600B82CA1 /* inline_internal.h */; };
 		E44757DC17F4573600B82CA1 /* inline_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = E44757D917F4572600B82CA1 /* inline_internal.h */; };
 		E44A8E6B1805C3E0009FFDB6 /* voucher.c in Sources */ = {isa = PBXBuildFile; fileRef = E44A8E6A1805C3E0009FFDB6 /* voucher.c */; };
 		E44A8E6C1805C3E0009FFDB6 /* voucher.c in Sources */ = {isa = PBXBuildFile; fileRef = E44A8E6A1805C3E0009FFDB6 /* voucher.c */; };
 		E44A8E6D1805C3E0009FFDB6 /* voucher.c in Sources */ = {isa = PBXBuildFile; fileRef = E44A8E6A1805C3E0009FFDB6 /* voucher.c */; };
-		E44A8E6E1805C3E0009FFDB6 /* voucher.c in Sources */ = {isa = PBXBuildFile; fileRef = E44A8E6A1805C3E0009FFDB6 /* voucher.c */; };
 		E44A8E6F1805C3E0009FFDB6 /* voucher.c in Sources */ = {isa = PBXBuildFile; fileRef = E44A8E6A1805C3E0009FFDB6 /* voucher.c */; };
-		E44A8E701805C3E0009FFDB6 /* voucher.c in Sources */ = {isa = PBXBuildFile; fileRef = E44A8E6A1805C3E0009FFDB6 /* voucher.c */; };
 		E44A8E721805C473009FFDB6 /* voucher_private.h in Headers */ = {isa = PBXBuildFile; fileRef = E44A8E711805C473009FFDB6 /* voucher_private.h */; };
 		E44A8E731805C473009FFDB6 /* voucher_private.h in Headers */ = {isa = PBXBuildFile; fileRef = E44A8E711805C473009FFDB6 /* voucher_private.h */; };
 		E44A8E7518066276009FFDB6 /* voucher_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = E44A8E7418066276009FFDB6 /* voucher_internal.h */; };
 		E44A8E7618066276009FFDB6 /* voucher_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = E44A8E7418066276009FFDB6 /* voucher_internal.h */; };
 		E44A8E7718066276009FFDB6 /* voucher_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = E44A8E7418066276009FFDB6 /* voucher_internal.h */; };
 		E44EBE3E1251659900645D88 /* init.c in Sources */ = {isa = PBXBuildFile; fileRef = E44EBE3B1251659900645D88 /* init.c */; };
-		E44EBE5412517EBE00645D88 /* protocol.defs in Sources */ = {isa = PBXBuildFile; fileRef = FC7BED950E8361E600161930 /* protocol.defs */; settings = {ATTRIBUTES = (Client, Server, ); }; };
-		E44EBE5512517EBE00645D88 /* init.c in Sources */ = {isa = PBXBuildFile; fileRef = E44EBE3B1251659900645D88 /* init.c */; };
 		E44EBE5612517EBE00645D88 /* protocol.defs in Sources */ = {isa = PBXBuildFile; fileRef = FC7BED950E8361E600161930 /* protocol.defs */; settings = {ATTRIBUTES = (Client, Server, ); }; };
 		E44EBE5712517EBE00645D88 /* init.c in Sources */ = {isa = PBXBuildFile; fileRef = E44EBE3B1251659900645D88 /* init.c */; };
 		E44F9DAB16543F94001DCD38 /* introspection_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = E44F9DA816543F79001DCD38 /* introspection_internal.h */; };
@@ -312,20 +331,6 @@
 		E4630251176162D200E11F4C /* atomic_sfb.h in Headers */ = {isa = PBXBuildFile; fileRef = E463024F1761603C00E11F4C /* atomic_sfb.h */; };
 		E4630252176162D300E11F4C /* atomic_sfb.h in Headers */ = {isa = PBXBuildFile; fileRef = E463024F1761603C00E11F4C /* atomic_sfb.h */; };
 		E4630253176162D400E11F4C /* atomic_sfb.h in Headers */ = {isa = PBXBuildFile; fileRef = E463024F1761603C00E11F4C /* atomic_sfb.h */; };
-		E46DBC4014EE10C80001F9F6 /* protocol.defs in Sources */ = {isa = PBXBuildFile; fileRef = FC7BED950E8361E600161930 /* protocol.defs */; settings = {ATTRIBUTES = (Client, Server, ); }; };
-		E46DBC4114EE10C80001F9F6 /* resolver.c in Sources */ = {isa = PBXBuildFile; fileRef = E44EBE371251656400645D88 /* resolver.c */; };
-		E46DBC4214EE10C80001F9F6 /* init.c in Sources */ = {isa = PBXBuildFile; fileRef = E44EBE3B1251659900645D88 /* init.c */; };
-		E46DBC4314EE10C80001F9F6 /* queue.c in Sources */ = {isa = PBXBuildFile; fileRef = FC7BED8A0E8361E600161930 /* queue.c */; };
-		E46DBC4414EE10C80001F9F6 /* semaphore.c in Sources */ = {isa = PBXBuildFile; fileRef = 721F5CCE0F15553500FF03A6 /* semaphore.c */; };
-		E46DBC4514EE10C80001F9F6 /* once.c in Sources */ = {isa = PBXBuildFile; fileRef = 96DF70BD0F38FE3C0074BD99 /* once.c */; };
-		E46DBC4614EE10C80001F9F6 /* apply.c in Sources */ = {isa = PBXBuildFile; fileRef = 9676A0E00F3E755D00713ADB /* apply.c */; };
-		E46DBC4714EE10C80001F9F6 /* object.c in Sources */ = {isa = PBXBuildFile; fileRef = 9661E56A0F3E7DDF00749F3E /* object.c */; };
-		E46DBC4814EE10C80001F9F6 /* benchmark.c in Sources */ = {isa = PBXBuildFile; fileRef = 965CD6340F3E806200D4E28D /* benchmark.c */; };
-		E46DBC4914EE10C80001F9F6 /* source.c in Sources */ = {isa = PBXBuildFile; fileRef = 96A8AA860F41E7A400CD570B /* source.c */; };
-		E46DBC4A14EE10C80001F9F6 /* time.c in Sources */ = {isa = PBXBuildFile; fileRef = 96032E4A0F5CC8C700241C5F /* time.c */; };
-		E46DBC4B14EE10C80001F9F6 /* data.c in Sources */ = {isa = PBXBuildFile; fileRef = 5AAB45BF10D30B79004407EA /* data.c */; };
-		E46DBC4C14EE10C80001F9F6 /* io.c in Sources */ = {isa = PBXBuildFile; fileRef = 5A27262510F26F1900751FBC /* io.c */; };
-		E46DBC4D14EE10C80001F9F6 /* transform.c in Sources */ = {isa = PBXBuildFile; fileRef = C9C5F80D143C1771006DC718 /* transform.c */; };
 		E48AF55A16E70FD9004105FF /* io_private.h in Headers */ = {isa = PBXBuildFile; fileRef = E48AF55916E70FD9004105FF /* io_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
 		E48AF55B16E72D44004105FF /* io_private.h in Headers */ = {isa = PBXBuildFile; fileRef = E48AF55916E70FD9004105FF /* io_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
 		E48EC97C1835BADD00EAC4F1 /* yield.h in Headers */ = {isa = PBXBuildFile; fileRef = E48EC97B1835BADD00EAC4F1 /* yield.h */; };
@@ -363,7 +368,6 @@
 		E49BB7091E70A39700868613 /* venture.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E9955CE1C3B218E0071D40C /* venture.c */; };
 		E49BB70A1E70A3B000868613 /* venture.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E9955CE1C3B218E0071D40C /* venture.c */; };
 		E49F2423125D3C960057C971 /* resolver.c in Sources */ = {isa = PBXBuildFile; fileRef = E44EBE371251656400645D88 /* resolver.c */; };
-		E49F2424125D3C970057C971 /* resolver.c in Sources */ = {isa = PBXBuildFile; fileRef = E44EBE371251656400645D88 /* resolver.c */; };
 		E49F2499125D48D80057C971 /* resolver.c in Sources */ = {isa = PBXBuildFile; fileRef = E44EBE371251656400645D88 /* resolver.c */; };
 		E49F24AB125D57FA0057C971 /* dispatch.h in Headers */ = {isa = PBXBuildFile; fileRef = FC7BED960E8361E600161930 /* dispatch.h */; settings = {ATTRIBUTES = (Public, ); }; };
 		E49F24AC125D57FA0057C971 /* base.h in Headers */ = {isa = PBXBuildFile; fileRef = 72CC942F0ECCD8750031B751 /* base.h */; settings = {ATTRIBUTES = (Public, ); }; };
@@ -435,16 +439,6 @@
 		E4D76A9418E325D200B1F98B /* block.h in Headers */ = {isa = PBXBuildFile; fileRef = E4D76A9218E325D200B1F98B /* block.h */; settings = {ATTRIBUTES = (Public, ); }; };
 		E4EB4A2714C35ECE00AA0FA9 /* object.h in Headers */ = {isa = PBXBuildFile; fileRef = E4EB4A2614C35ECE00AA0FA9 /* object.h */; };
 		E4EB4A2814C35ECE00AA0FA9 /* object.h in Headers */ = {isa = PBXBuildFile; fileRef = E4EB4A2614C35ECE00AA0FA9 /* object.h */; };
-		E4EC11AE12514302000DDBD1 /* queue.c in Sources */ = {isa = PBXBuildFile; fileRef = FC7BED8A0E8361E600161930 /* queue.c */; };
-		E4EC11AF12514302000DDBD1 /* semaphore.c in Sources */ = {isa = PBXBuildFile; fileRef = 721F5CCE0F15553500FF03A6 /* semaphore.c */; };
-		E4EC11B012514302000DDBD1 /* once.c in Sources */ = {isa = PBXBuildFile; fileRef = 96DF70BD0F38FE3C0074BD99 /* once.c */; };
-		E4EC11B112514302000DDBD1 /* apply.c in Sources */ = {isa = PBXBuildFile; fileRef = 9676A0E00F3E755D00713ADB /* apply.c */; };
-		E4EC11B212514302000DDBD1 /* object.c in Sources */ = {isa = PBXBuildFile; fileRef = 9661E56A0F3E7DDF00749F3E /* object.c */; };
-		E4EC11B312514302000DDBD1 /* benchmark.c in Sources */ = {isa = PBXBuildFile; fileRef = 965CD6340F3E806200D4E28D /* benchmark.c */; };
-		E4EC11B412514302000DDBD1 /* source.c in Sources */ = {isa = PBXBuildFile; fileRef = 96A8AA860F41E7A400CD570B /* source.c */; };
-		E4EC11B512514302000DDBD1 /* time.c in Sources */ = {isa = PBXBuildFile; fileRef = 96032E4A0F5CC8C700241C5F /* time.c */; };
-		E4EC11B712514302000DDBD1 /* data.c in Sources */ = {isa = PBXBuildFile; fileRef = 5AAB45BF10D30B79004407EA /* data.c */; };
-		E4EC11B812514302000DDBD1 /* io.c in Sources */ = {isa = PBXBuildFile; fileRef = 5A27262510F26F1900751FBC /* io.c */; };
 		E4EC121A12514715000DDBD1 /* queue.c in Sources */ = {isa = PBXBuildFile; fileRef = FC7BED8A0E8361E600161930 /* queue.c */; };
 		E4EC121B12514715000DDBD1 /* semaphore.c in Sources */ = {isa = PBXBuildFile; fileRef = 721F5CCE0F15553500FF03A6 /* semaphore.c */; };
 		E4EC121C12514715000DDBD1 /* once.c in Sources */ = {isa = PBXBuildFile; fileRef = 96DF70BD0F38FE3C0074BD99 /* once.c */; };
@@ -459,8 +453,8 @@
 		E4ECBAA615253D17002C313C /* mach_private.h in Headers */ = {isa = PBXBuildFile; fileRef = E4ECBAA415253C25002C313C /* mach_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
 		E4FC3264145F46C9002FBDDB /* object.m in Sources */ = {isa = PBXBuildFile; fileRef = E4FC3263145F46C9002FBDDB /* object.m */; };
 		E4FC3265145F46C9002FBDDB /* object.m in Sources */ = {isa = PBXBuildFile; fileRef = E4FC3263145F46C9002FBDDB /* object.m */; };
-		E4FC3266145F46C9002FBDDB /* object.m in Sources */ = {isa = PBXBuildFile; fileRef = E4FC3263145F46C9002FBDDB /* object.m */; };
 		E4FC3267145F46C9002FBDDB /* object.m in Sources */ = {isa = PBXBuildFile; fileRef = E4FC3263145F46C9002FBDDB /* object.m */; };
+		F7DC045B2060BBBE00C90737 /* target.h in Headers */ = {isa = PBXBuildFile; fileRef = F7DC045A2060BBBE00C90737 /* target.h */; };
 		FC0B34790FA2851C0080FFA0 /* source_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = FC0B34780FA2851C0080FFA0 /* source_internal.h */; };
 		FC1832A6109923C7003403D5 /* perfmon.h in Headers */ = {isa = PBXBuildFile; fileRef = FC1832A2109923C7003403D5 /* perfmon.h */; };
 		FC1832A7109923C7003403D5 /* time.h in Headers */ = {isa = PBXBuildFile; fileRef = FC1832A3109923C7003403D5 /* time.h */; };
@@ -513,6 +507,20 @@
 			remoteGlobalIDString = 6E040C621C499B1B00411A2E;
 			remoteInfo = libfirehose_kernel;
 		};
+		6EE5083A21701B9100833569 /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = E4B51595164B2DA300E003AF;
+			remoteInfo = "libdispatch introspection";
+		};
+		6EE5083C21701B9600833569 /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = E4B51595164B2DA300E003AF;
+			remoteInfo = "libdispatch introspection";
+		};
 		6EF0B27D1BA8C5BF007FA4F6 /* PBXContainerItemProxy */ = {
 			isa = PBXContainerItemProxy;
 			containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */;
@@ -534,6 +542,27 @@
 			remoteGlobalIDString = 92F3FECA1BEC69E500025962;
 			remoteInfo = darwintests;
 		};
+		9B2A11A22032494E0060E7D4 /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 9B6A42E01FE098430000D146 /* queue-tip.xcodeproj */;
+			proxyType = 1;
+			remoteGlobalIDString = 9BECABC71E944C0400ED341E;
+			remoteInfo = "queue-tip";
+		};
+		9B2A11A92032494E0060E7D4 /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 9B6A42E01FE098430000D146 /* queue-tip.xcodeproj */;
+			proxyType = 2;
+			remoteGlobalIDString = 9BECABC81E944C0400ED341E;
+			remoteInfo = "queue-tip";
+		};
+		9BEBA57720127D4400E6FD0D /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = C927F35F10FD7F1000C5AB8B /* ddt.xcodeproj */;
+			proxyType = 1;
+			remoteGlobalIDString = FCFA5A9F10D1AE050074F59A;
+			remoteInfo = ddt;
+		};
 		C00B0E131C5AEED6000330B3 /* PBXContainerItemProxy */ = {
 			isa = PBXContainerItemProxy;
 			containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */;
@@ -569,20 +598,6 @@
 			remoteGlobalIDString = D2AAC045055464E500DB518D;
 			remoteInfo = libdispatch;
 		};
-		E437F0D514F7441F00F0B997 /* PBXContainerItemProxy */ = {
-			isa = PBXContainerItemProxy;
-			containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */;
-			proxyType = 1;
-			remoteGlobalIDString = E46DBC1A14EE10C80001F9F6;
-			remoteInfo = libdispatch_static;
-		};
-		E47D6ECA125FEB9D0070D91C /* PBXContainerItemProxy */ = {
-			isa = PBXContainerItemProxy;
-			containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */;
-			proxyType = 1;
-			remoteGlobalIDString = E4EC118F12514302000DDBD1;
-			remoteInfo = "libdispatch up resolved";
-		};
 		E47D6ECC125FEBA10070D91C /* PBXContainerItemProxy */ = {
 			isa = PBXContainerItemProxy;
 			containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */;
@@ -595,14 +610,7 @@
 			containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */;
 			proxyType = 1;
 			remoteGlobalIDString = E49BB6CE1E70748100868613;
-			remoteInfo = "libdispatch alt resolved";
-		};
-		E4B515DA164B317700E003AF /* PBXContainerItemProxy */ = {
-			isa = PBXContainerItemProxy;
-			containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */;
-			proxyType = 1;
-			remoteGlobalIDString = E4B51595164B2DA300E003AF;
-			remoteInfo = "libdispatch introspection";
+			remoteInfo = "libdispatch armv81 resolved";
 		};
 /* End PBXContainerItemProxy section */
 
@@ -636,6 +644,8 @@
 		6E1612691C79606E006FC9A9 /* dispatch_queue_label.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = dispatch_queue_label.c; sourceTree = "<group>"; };
 		6E21F2E41BBB23F00000C6A5 /* firehose_server_internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = firehose_server_internal.h; sourceTree = "<group>"; };
 		6E21F2E51BBB23F00000C6A5 /* firehose_server.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = firehose_server.c; sourceTree = "<group>"; };
+		6E2464E21F5E67E20031ADD9 /* check-order.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; path = "check-order.sh"; sourceTree = "<group>"; };
+		6E29394C1FB9526E00FDAC90 /* libdispatch.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; path = libdispatch.plist; sourceTree = "<group>"; };
 		6E326A8F1C2245C4002A6505 /* dispatch_transform.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = dispatch_transform.c; sourceTree = "<group>"; };
 		6E326AB11C224830002A6505 /* dispatch_cascade.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = dispatch_cascade.c; sourceTree = "<group>"; };
 		6E326AB31C224870002A6505 /* dispatch_qos.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = dispatch_qos.c; sourceTree = "<group>"; };
@@ -658,13 +668,16 @@
 		6E326B161C239431002A6505 /* dispatch_timer_short.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = dispatch_timer_short.c; sourceTree = "<group>"; };
 		6E326B171C239431002A6505 /* dispatch_timer_timeout.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = dispatch_timer_timeout.c; sourceTree = "<group>"; };
 		6E326B441C239B61002A6505 /* dispatch_priority.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = dispatch_priority.c; sourceTree = "<group>"; };
+		6E49BF2420E34B43002624FC /* libdispatch.clean */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = libdispatch.clean; sourceTree = "<group>"; };
+		6E49BF2920E34B44002624FC /* libdispatch.dirty */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = libdispatch.dirty; sourceTree = "<group>"; };
 		6E4BACBC1D48A41500B562AE /* mach.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = mach.c; sourceTree = "<group>"; };
 		6E4BACC91D48A89500B562AE /* mach_internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mach_internal.h; sourceTree = "<group>"; };
 		6E4FC9D11C84123600520351 /* os_venture_basic.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = os_venture_basic.c; sourceTree = "<group>"; };
+		6E5662DC1F8C2E3E00BC2474 /* workqueue_internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = workqueue_internal.h; sourceTree = "<group>"; };
+		6E5662E41F8C2E5B00BC2474 /* workqueue.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = workqueue.c; sourceTree = "<group>"; };
 		6E5ACCB01D3C4CFB007DA2B4 /* event_kevent.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = event_kevent.c; sourceTree = "<group>"; };
 		6E5ACCB91D3C4D0B007DA2B4 /* event_internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = event_internal.h; sourceTree = "<group>"; };
 		6E5ACCBD1D3C6719007DA2B4 /* event.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = event.c; sourceTree = "<group>"; };
-		6E62B0531C55806200D2C7C0 /* dispatch_trysync.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = dispatch_trysync.c; sourceTree = "<group>"; };
 		6E67D8D31C16C20B00FC98AC /* dispatch_apply.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = dispatch_apply.c; sourceTree = "<group>"; };
 		6E67D8D91C16C94B00FC98AC /* dispatch_cf_main.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = dispatch_cf_main.c; sourceTree = "<group>"; };
 		6E67D90D1C16CCEB00FC98AC /* dispatch_debug.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = dispatch_debug.c; sourceTree = "<group>"; };
@@ -673,6 +686,7 @@
 		6E67D9131C17676D00FC98AC /* dispatch_overcommit.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = dispatch_overcommit.c; sourceTree = "<group>"; };
 		6E67D9151C1768B300FC98AC /* dispatch_pingpong.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = dispatch_pingpong.c; sourceTree = "<group>"; };
 		6E67D9171C17BA7200FC98AC /* nsoperation.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = nsoperation.m; sourceTree = "<group>"; };
+		6E70181C1F4EB51B0077C1DC /* workloop_private.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = workloop_private.h; sourceTree = "<group>"; };
 		6E8E4E6D1C1A35EE0004F5CC /* dispatch_select.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = dispatch_select.c; sourceTree = "<group>"; };
 		6E8E4E6E1C1A35EE0004F5CC /* test_lib.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = test_lib.c; sourceTree = "<group>"; };
 		6E8E4E6F1C1A35EE0004F5CC /* test_lib.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = test_lib.h; sourceTree = "<group>"; };
@@ -687,6 +701,7 @@
 		6E9955CE1C3B218E0071D40C /* venture.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = venture.c; sourceTree = "<group>"; };
 		6E9956061C3B21AA0071D40C /* venture_internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = venture_internal.h; sourceTree = "<group>"; };
 		6E9B6B201BB4CC73009E324D /* firehose_buffer_internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = firehose_buffer_internal.h; sourceTree = "<group>"; };
+		6E9C6CA220F9848000EA81C0 /* yield.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = yield.c; path = shims/yield.c; sourceTree = "<group>"; };
 		6EA283D01CAB93270041B2E0 /* libdispatch.codes */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = libdispatch.codes; sourceTree = "<group>"; };
 		6EA2CB841C005DEF0076794A /* dispatch_source.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = dispatch_source.c; sourceTree = "<group>"; };
 		6EA7937D1D456D1300929B1B /* event_epoll.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = event_epoll.c; sourceTree = "<group>"; };
@@ -739,8 +754,11 @@
 		96BC39BC0F3EBAB100C59689 /* queue_private.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = queue_private.h; sourceTree = "<group>"; };
 		96C9553A0F3EAEDD000D2CA4 /* once.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = once.h; sourceTree = "<group>"; };
 		96DF70BD0F38FE3C0074BD99 /* once.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; lineEnding = 0; path = once.c; sourceTree = "<group>"; xcLanguageSpecificationIdentifier = xcode.lang.c; };
+		9B6A42E01FE098430000D146 /* queue-tip.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = "queue-tip.xcodeproj"; path = "tools/queue-tip/queue-tip.xcodeproj"; sourceTree = "<group>"; };
 		B63B793F1E8F004F0060C1E1 /* dispatch_no_blocks.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = dispatch_no_blocks.c; sourceTree = "<group>"; };
 		B68330BC1EBCF6080003E71C /* dispatch_wl.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = dispatch_wl.c; sourceTree = "<group>"; };
+		B683588A1FA77F4900AA0D58 /* time_private.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = time_private.h; sourceTree = "<group>"; };
+		B68358911FA77FFD00AA0D58 /* dispatch_time.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = dispatch_time.c; sourceTree = "<group>"; };
 		B69878521F06F8790088F94F /* dispatch_signals.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = dispatch_signals.c; sourceTree = "<group>"; };
 		B6AC73FD1EB10973009FB2F2 /* perf_thread_request.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = perf_thread_request.c; sourceTree = "<group>"; };
 		B6AE9A4A1D7F53B300AC007F /* dispatch_queue_create.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = dispatch_queue_create.c; sourceTree = "<group>"; };
@@ -782,13 +800,11 @@
 		E44F9DA816543F79001DCD38 /* introspection_internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = introspection_internal.h; sourceTree = "<group>"; };
 		E454569214746F1B00106147 /* object_private.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = object_private.h; sourceTree = "<group>"; };
 		E463024F1761603C00E11F4C /* atomic_sfb.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = atomic_sfb.h; sourceTree = "<group>"; };
-		E46DBC5714EE10C80001F9F6 /* libdispatch_up.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libdispatch_up.a; sourceTree = BUILT_PRODUCTS_DIR; };
-		E46DBC5814EE11BC0001F9F6 /* libdispatch-up-static.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; path = "libdispatch-up-static.xcconfig"; sourceTree = "<group>"; };
 		E47D6BB5125F0F800070D91C /* resolved.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = resolved.h; sourceTree = "<group>"; };
 		E482F1CD12DBAB590030614D /* postprocess-headers.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; path = "postprocess-headers.sh"; sourceTree = "<group>"; };
 		E48AF55916E70FD9004105FF /* io_private.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = io_private.h; path = private/io_private.h; sourceTree = SOURCE_ROOT; tabWidth = 8; };
 		E48EC97B1835BADD00EAC4F1 /* yield.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = yield.h; sourceTree = "<group>"; };
-		E49BB6F21E70748100868613 /* libdispatch_alt.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libdispatch_alt.a; sourceTree = BUILT_PRODUCTS_DIR; };
+		E49BB6F21E70748100868613 /* libdispatch_armv81.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libdispatch_armv81.a; sourceTree = BUILT_PRODUCTS_DIR; };
 		E49F24DF125D57FA0057C971 /* libdispatch.dylib */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.dylib"; includeInIndex = 0; path = libdispatch.dylib; sourceTree = BUILT_PRODUCTS_DIR; };
 		E49F251D125D630A0057C971 /* install-manpages.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; path = "install-manpages.sh"; sourceTree = "<group>"; };
 		E49F251E125D631D0057C971 /* mig-headers.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; path = "mig-headers.sh"; sourceTree = "<group>"; };
@@ -806,9 +822,9 @@
 		E4D76A9218E325D200B1F98B /* block.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = block.h; sourceTree = "<group>"; };
 		E4EB4A2614C35ECE00AA0FA9 /* object.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = object.h; sourceTree = "<group>"; };
 		E4EB4A2A14C36F4E00AA0FA9 /* install-headers.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; path = "install-headers.sh"; sourceTree = "<group>"; };
-		E4EC11C312514302000DDBD1 /* libdispatch_up.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libdispatch_up.a; sourceTree = BUILT_PRODUCTS_DIR; };
 		E4EC122D12514715000DDBD1 /* libdispatch_mp.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libdispatch_mp.a; sourceTree = BUILT_PRODUCTS_DIR; };
 		E4ECBAA415253C25002C313C /* mach_private.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mach_private.h; sourceTree = "<group>"; };
+		E4FB8E90218CD7F8004B7A25 /* install-plists.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; path = "install-plists.sh"; sourceTree = "<group>"; };
 		E4FC3263145F46C9002FBDDB /* object.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = object.m; sourceTree = "<group>"; };
 		EA53C60E1BFEA851000A02EA /* bsdtestharness.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = bsdtestharness.c; path = tests/bsdtestharness.c; sourceTree = "<group>"; };
 		EA53C60F1BFEA851000A02EA /* bsdtests.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = bsdtests.c; path = tests/bsdtests.c; sourceTree = "<group>"; };
@@ -856,6 +872,7 @@
 		EA53C6391BFEA851000A02EA /* dispatch_vm.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = dispatch_vm.c; path = tests/dispatch_vm.c; sourceTree = "<group>"; };
 		EA53C63A1BFEA851000A02EA /* dispatch_vnode.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = dispatch_vnode.c; path = tests/dispatch_vnode.c; sourceTree = "<group>"; };
 		EA53C63B1BFEA851000A02EA /* func.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = func.c; path = tests/func.c; sourceTree = "<group>"; };
+		F7DC045A2060BBBE00C90737 /* target.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = target.h; sourceTree = "<group>"; };
 		FC0B34780FA2851C0080FFA0 /* source_internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = source_internal.h; sourceTree = "<group>"; };
 		FC1832A2109923C7003403D5 /* perfmon.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = perfmon.h; sourceTree = "<group>"; };
 		FC1832A3109923C7003403D5 /* time.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = time.h; sourceTree = "<group>"; };
@@ -910,6 +927,7 @@
 				C6A0FF2B0290797F04C91782 /* Documentation */,
 				1AB674ADFE9D54B511CA2CBB /* Products */,
 				EA53C60D1BFE9605000A02EA /* Tests */,
+				9B6A42E01FE098430000D146 /* queue-tip.xcodeproj */,
 				C927F35F10FD7F1000C5AB8B /* ddt.xcodeproj */,
 				4552536E19B1384900B88766 /* libdispatchtest.xcodeproj */,
 			);
@@ -943,7 +961,9 @@
 				C9C5F80D143C1771006DC718 /* transform.c */,
 				6E9955CE1C3B218E0071D40C /* venture.c */,
 				E44A8E6A1805C3E0009FFDB6 /* voucher.c */,
+				6E9C6CA220F9848000EA81C0 /* yield.c */,
 				6EA283D01CAB93270041B2E0 /* libdispatch.codes */,
+				6E29394C1FB9526E00FDAC90 /* libdispatch.plist */,
 				FC7BED950E8361E600161930 /* protocol.defs */,
 				E43570B8126E93380097AB9F /* provider.d */,
 				6E5ACCAF1D3BF2A0007DA2B4 /* event */,
@@ -960,9 +980,7 @@
 				E4B515D6164B2DA300E003AF /* libdispatch.dylib */,
 				E49F24DF125D57FA0057C971 /* libdispatch.dylib */,
 				E4EC122D12514715000DDBD1 /* libdispatch_mp.a */,
-				E4EC11C312514302000DDBD1 /* libdispatch_up.a */,
-				E49BB6F21E70748100868613 /* libdispatch_alt.a */,
-				E46DBC5714EE10C80001F9F6 /* libdispatch_up.a */,
+				E49BB6F21E70748100868613 /* libdispatch_armv81.a */,
 				C01866BD1C5973210040FC07 /* libdispatch.a */,
 				C00B0E0A1C5AEBBE000330B3 /* libdispatch_dyld_stub.a */,
 				6E040C631C499B1B00411A2E /* libfirehose_kernel.a */,
@@ -987,6 +1005,7 @@
 			children = (
 				6EA793881D458A5800929B1B /* event_config.h */,
 				6E5ACCB91D3C4D0B007DA2B4 /* event_internal.h */,
+				6E5662DC1F8C2E3E00BC2474 /* workqueue_internal.h */,
 			);
 			path = event;
 			sourceTree = "<group>";
@@ -997,6 +1016,7 @@
 				6E5ACCBD1D3C6719007DA2B4 /* event.c */,
 				6E5ACCB01D3C4CFB007DA2B4 /* event_kevent.c */,
 				6EA7937D1D456D1300929B1B /* event_epoll.c */,
+				6E5662E41F8C2E5B00BC2474 /* workqueue.c */,
 			);
 			path = event;
 			sourceTree = "<group>";
@@ -1083,9 +1103,9 @@
 				6E326B151C239431002A6505 /* dispatch_timer_set_time.c */,
 				6E326B161C239431002A6505 /* dispatch_timer_short.c */,
 				6E326B171C239431002A6505 /* dispatch_timer_timeout.c */,
+				B68358911FA77FFD00AA0D58 /* dispatch_time.c */,
 				6E326AE61C2392E8002A6505 /* dispatch_timer.c */,
 				6E326A8F1C2245C4002A6505 /* dispatch_transform.c */,
-				6E62B0531C55806200D2C7C0 /* dispatch_trysync.c */,
 				6E8E4EC91C1A670B0004F5CC /* dispatch_vm.c */,
 				6E326AB71C225FCA002A6505 /* dispatch_vnode.c */,
 				B68330BC1EBCF6080003E71C /* dispatch_wl.c */,
@@ -1104,6 +1124,14 @@
 			path = tests;
 			sourceTree = "<group>";
 		};
+		9B6A42E11FE098430000D146 /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				9B2A11AA2032494E0060E7D4 /* queue-tip */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
 		C6A0FF2B0290797F04C91782 /* Documentation */ = {
 			isa = PBXGroup;
 			children = (
@@ -1143,7 +1171,6 @@
 				E43D93F11097917E004F6A62 /* libdispatch.xcconfig */,
 				E40041AA125D705F0022B135 /* libdispatch-resolver.xcconfig */,
 				E40041A9125D70590022B135 /* libdispatch-resolved.xcconfig */,
-				E46DBC5814EE11BC0001F9F6 /* libdispatch-up-static.xcconfig */,
 				C01866BE1C59735B0040FC07 /* libdispatch-mp-static.xcconfig */,
 				C00B0E121C5AEBF7000330B3 /* libdispatch-dyld-stub.xcconfig */,
 				E4B515D9164B2E9B00E003AF /* libdispatch-introspection.xcconfig */,
@@ -1151,6 +1178,8 @@
 				6E040C721C499C3600411A2E /* libfirehose_kernel.xcconfig */,
 				E422DA3614D2A7E7003C6EE4 /* libdispatch.aliases */,
 				E448727914C6215D00BB45C2 /* libdispatch.order */,
+				6E49BF2420E34B43002624FC /* libdispatch.clean */,
+				6E49BF2920E34B44002624FC /* libdispatch.dirty */,
 				E421E5FD1716BEA70090DC9B /* libdispatch.interposable */,
 			);
 			path = xcodeconfig;
@@ -1180,9 +1209,11 @@
 		E49F259C125D664F0057C971 /* xcodescripts */ = {
 			isa = PBXGroup;
 			children = (
+				6E2464E21F5E67E20031ADD9 /* check-order.sh */,
 				E49F251D125D630A0057C971 /* install-manpages.sh */,
 				E4EB4A2A14C36F4E00AA0FA9 /* install-headers.sh */,
 				E421E5FB1716B8730090DC9B /* install-dtrace.sh */,
+				E4FB8E90218CD7F8004B7A25 /* install-plists.sh */,
 				E49F251E125D631D0057C971 /* mig-headers.sh */,
 				E482F1CD12DBAB590030614D /* postprocess-headers.sh */,
 				C01866BF1C5976C90040FC07 /* run-on-install.sh */,
@@ -1275,6 +1306,7 @@
 				6EF2CAA41C88998A001ABE83 /* lock.h */,
 				FC1832A2109923C7003403D5 /* perfmon.h */,
 				6EFBDA4A1D61A0D600282887 /* priority.h */,
+				F7DC045A2060BBBE00C90737 /* target.h */,
 				FC1832A3109923C7003403D5 /* time.h */,
 				FC1832A4109923C7003403D5 /* tsd.h */,
 				E48EC97B1835BADD00EAC4F1 /* yield.h */,
@@ -1311,8 +1343,10 @@
 				C913AC0E143BD34800B78976 /* data_private.h */,
 				E48AF55916E70FD9004105FF /* io_private.h */,
 				96BC39BC0F3EBAB100C59689 /* queue_private.h */,
+				6E70181C1F4EB51B0077C1DC /* workloop_private.h */,
 				FCEF047F0F5661960067401F /* source_private.h */,
 				E4ECBAA415253C25002C313C /* mach_private.h */,
+				B683588A1FA77F4900AA0D58 /* time_private.h */,
 				C90144641C73A845002638FC /* module.modulemap */,
 				961B99350F3E83980006BC96 /* benchmark.h */,
 				E4B515D7164B2DFB00E003AF /* introspection_private.h */,
@@ -1401,8 +1435,10 @@
 				96BC39BD0F3EBAB100C59689 /* queue_private.h in Headers */,
 				C90144661C73A9F6002638FC /* module.modulemap in Headers */,
 				FCEF04800F5661960067401F /* source_private.h in Headers */,
+				F7DC045B2060BBBE00C90737 /* target.h in Headers */,
 				961B99360F3E83980006BC96 /* benchmark.h in Headers */,
 				FC7BED9E0E8361E600161930 /* internal.h in Headers */,
+				6E7018211F4EB51B0077C1DC /* workloop_private.h in Headers */,
 				965ECC210F3EAB71004DDD89 /* object_internal.h in Headers */,
 				96929D960F3EA2170041FF5D /* queue_internal.h in Headers */,
 				FC0B34790FA2851C0080FFA0 /* source_internal.h in Headers */,
@@ -1429,6 +1465,8 @@
 				6ED64B571BBD8A3B00C35F4D /* firehose_inline_internal.h in Headers */,
 				E4128ED613BA9A1700ABB2CB /* hw_config.h in Headers */,
 				E454569314746F1B00106147 /* object_private.h in Headers */,
+				B683588F1FA77F5A00AA0D58 /* time_private.h in Headers */,
+				6E5662E11F8C2E3E00BC2474 /* workqueue_internal.h in Headers */,
 				E4EB4A2714C35ECE00AA0FA9 /* object.h in Headers */,
 				E48AF55A16E70FD9004105FF /* io_private.h in Headers */,
 				E4ECBAA515253C25002C313C /* mach_private.h in Headers */,
@@ -1444,6 +1482,7 @@
 				E49F24AB125D57FA0057C971 /* dispatch.h in Headers */,
 				E49F24AC125D57FA0057C971 /* base.h in Headers */,
 				6E5ACCBB1D3C4D0E007DA2B4 /* event_internal.h in Headers */,
+				6E7018221F4EB5220077C1DC /* workloop_private.h in Headers */,
 				E49F24AD125D57FA0057C971 /* object.h in Headers */,
 				E44757DC17F4573600B82CA1 /* inline_internal.h in Headers */,
 				E49F24AE125D57FA0057C971 /* queue.h in Headers */,
@@ -1454,6 +1493,7 @@
 				E49F24B1125D57FA0057C971 /* group.h in Headers */,
 				E49F24B2125D57FA0057C971 /* once.h in Headers */,
 				E49F24B3125D57FA0057C971 /* io.h in Headers */,
+				6E5662E21F8C2E4F00BC2474 /* workqueue_internal.h in Headers */,
 				E44A8E7618066276009FFDB6 /* voucher_internal.h in Headers */,
 				E4630252176162D300E11F4C /* atomic_sfb.h in Headers */,
 				E49F24B4125D57FA0057C971 /* data.h in Headers */,
@@ -1481,6 +1521,7 @@
 				6ED64B521BBD8A2100C35F4D /* firehose_buffer_internal.h in Headers */,
 				E48EC97D1835BADD00EAC4F1 /* yield.h in Headers */,
 				2BE17C6518EA305E002CA4E8 /* layout_private.h in Headers */,
+				B68358901FA77F5B00AA0D58 /* time_private.h in Headers */,
 				E49F24C6125D57FA0057C971 /* config.h in Headers */,
 				E422A0D612A557B5005E5BDB /* trace.h in Headers */,
 				6E9956091C3B21B40071D40C /* venture_internal.h in Headers */,
@@ -1505,6 +1546,7 @@
 				E4B515D8164B2DFB00E003AF /* introspection_private.h in Headers */,
 				E44F9DAF16544026001DCD38 /* internal.h in Headers */,
 				E421E5F91716ADA10090DC9B /* introspection.h in Headers */,
+				6E5662E31F8C2E5100BC2474 /* workqueue_internal.h in Headers */,
 				E44F9DB216544032001DCD38 /* object_internal.h in Headers */,
 				E44F9DB316544037001DCD38 /* queue_internal.h in Headers */,
 				6ED64B531BBD8A2300C35F4D /* firehose_buffer_internal.h in Headers */,
@@ -1632,17 +1674,16 @@
 				E4EB4A2B14C3720B00AA0FA9 /* Install Headers */,
 				E482F1C512DBAA110030614D /* Postprocess Headers */,
 				4CED8B9D0EEDF8B600AF99AB /* Install Manpages */,
+				6E2464DD1F5E67900031ADD9 /* Validate symbol ordering */,
+				E4FB8E8F218CD68A004B7A25 /* Install Plists */,
 			);
 			buildRules = (
 			);
 			dependencies = (
 				6EF0B27E1BA8C5BF007FA4F6 /* PBXTargetDependency */,
 				E47D6ECD125FEBA10070D91C /* PBXTargetDependency */,
-				E47D6ECB125FEB9D0070D91C /* PBXTargetDependency */,
 				E49BB6F81E7074C100868613 /* PBXTargetDependency */,
-				E4B515DB164B317700E003AF /* PBXTargetDependency */,
 				C01866C21C597AEA0040FC07 /* PBXTargetDependency */,
-				E437F0D614F7441F00F0B997 /* PBXTargetDependency */,
 				C00B0E141C5AEED6000330B3 /* PBXTargetDependency */,
 			);
 			name = libdispatch;
@@ -1650,24 +1691,9 @@
 			productReference = D2AAC046055464E500DB518D /* libdispatch.dylib */;
 			productType = "com.apple.product-type.library.dynamic";
 		};
-		E46DBC1A14EE10C80001F9F6 /* libdispatch up static */ = {
+		E49BB6CE1E70748100868613 /* libdispatch armv81 resolved */ = {
 			isa = PBXNativeTarget;
-			buildConfigurationList = E46DBC5414EE10C80001F9F6 /* Build configuration list for PBXNativeTarget "libdispatch up static" */;
-			buildPhases = (
-				E46DBC3E14EE10C80001F9F6 /* Sources */,
-			);
-			buildRules = (
-			);
-			dependencies = (
-			);
-			name = "libdispatch up static";
-			productName = libdispatch;
-			productReference = E46DBC5714EE10C80001F9F6 /* libdispatch_up.a */;
-			productType = "com.apple.product-type.library.static";
-		};
-		E49BB6CE1E70748100868613 /* libdispatch alt resolved */ = {
-			isa = PBXNativeTarget;
-			buildConfigurationList = E49BB6EF1E70748100868613 /* Build configuration list for PBXNativeTarget "libdispatch alt resolved" */;
+			buildConfigurationList = E49BB6EF1E70748100868613 /* Build configuration list for PBXNativeTarget "libdispatch armv81 resolved" */;
 			buildPhases = (
 				E49BB6CF1E70748100868613 /* Mig Headers */,
 				E49BB6D01E70748100868613 /* Sources */,
@@ -1677,9 +1703,9 @@
 			);
 			dependencies = (
 			);
-			name = "libdispatch alt resolved";
+			name = "libdispatch armv81 resolved";
 			productName = libdispatch;
-			productReference = E49BB6F21E70748100868613 /* libdispatch_alt.a */;
+			productReference = E49BB6F21E70748100868613 /* libdispatch_armv81.a */;
 			productType = "com.apple.product-type.library.static";
 		};
 		E49F24A9125D57FA0057C971 /* libdispatch no resolver */ = {
@@ -1719,23 +1745,6 @@
 			productReference = E4B515D6164B2DA300E003AF /* libdispatch.dylib */;
 			productType = "com.apple.product-type.library.dynamic";
 		};
-		E4EC118F12514302000DDBD1 /* libdispatch up resolved */ = {
-			isa = PBXNativeTarget;
-			buildConfigurationList = E4EC11BC12514302000DDBD1 /* Build configuration list for PBXNativeTarget "libdispatch up resolved" */;
-			buildPhases = (
-				E4EC12141251461A000DDBD1 /* Mig Headers */,
-				E4EC11AC12514302000DDBD1 /* Sources */,
-				E4EC121212514613000DDBD1 /* Symlink normal variant */,
-			);
-			buildRules = (
-			);
-			dependencies = (
-			);
-			name = "libdispatch up resolved";
-			productName = libdispatch;
-			productReference = E4EC11C312514302000DDBD1 /* libdispatch_up.a */;
-			productType = "com.apple.product-type.library.static";
-		};
 		E4EC121612514715000DDBD1 /* libdispatch mp resolved */ = {
 			isa = PBXNativeTarget;
 			buildConfigurationList = E4EC122612514715000DDBD1 /* Build configuration list for PBXNativeTarget "libdispatch mp resolved" */;
@@ -1760,7 +1769,8 @@
 			isa = PBXProject;
 			attributes = {
 				BuildIndependentTargetsInParallel = YES;
-				LastUpgradeCheck = 0900;
+				DefaultBuildSystemTypeForWorkspace = Latest;
+				LastUpgradeCheck = 1010;
 				TargetAttributes = {
 					3F3C9326128E637B0042B1F7 = {
 						ProvisioningStyle = Manual;
@@ -1787,6 +1797,10 @@
 						CreatedOnToolsVersion = 7.1;
 						ProvisioningStyle = Manual;
 					};
+					9BEBA56F20127D3300E6FD0D = {
+						CreatedOnToolsVersion = 9.3;
+						ProvisioningStyle = Automatic;
+					};
 					C00B0DF01C5AEBBE000330B3 = {
 						ProvisioningStyle = Manual;
 					};
@@ -1799,18 +1813,12 @@
 					D2AAC045055464E500DB518D = {
 						ProvisioningStyle = Manual;
 					};
-					E46DBC1A14EE10C80001F9F6 = {
-						ProvisioningStyle = Manual;
-					};
 					E49F24A9125D57FA0057C971 = {
 						ProvisioningStyle = Manual;
 					};
 					E4B51595164B2DA300E003AF = {
 						ProvisioningStyle = Manual;
 					};
-					E4EC118F12514302000DDBD1 = {
-						ProvisioningStyle = Manual;
-					};
 					E4EC121612514715000DDBD1 = {
 						ProvisioningStyle = Manual;
 					};
@@ -1837,21 +1845,26 @@
 					ProductGroup = 4552536F19B1384900B88766 /* Products */;
 					ProjectRef = 4552536E19B1384900B88766 /* libdispatchtest.xcodeproj */;
 				},
+				{
+					ProductGroup = 9B6A42E11FE098430000D146 /* Products */;
+					ProjectRef = 9B6A42E01FE098430000D146 /* queue-tip.xcodeproj */;
+				},
 			);
 			projectRoot = "";
 			targets = (
 				D2AAC045055464E500DB518D /* libdispatch */,
 				E49F24A9125D57FA0057C971 /* libdispatch no resolver */,
 				E4EC121612514715000DDBD1 /* libdispatch mp resolved */,
-				E4EC118F12514302000DDBD1 /* libdispatch up resolved */,
-				E49BB6CE1E70748100868613 /* libdispatch alt resolved */,
+				E49BB6CE1E70748100868613 /* libdispatch armv81 resolved */,
 				E4B51595164B2DA300E003AF /* libdispatch introspection */,
-				E46DBC1A14EE10C80001F9F6 /* libdispatch up static */,
 				C01866A41C5973210040FC07 /* libdispatch mp static */,
 				C00B0DF01C5AEBBE000330B3 /* libdispatch dyld stub */,
+				6E43553E215B5D9D00C13177 /* libdispatch_introspection */,
+				6EA833C22162D6380045EFDC /* libdispatch_introspection_Sim */,
 				3F3C9326128E637B0042B1F7 /* libdispatch_Sim */,
 				6E2ECAFD1C49C2FF00A30A32 /* libdispatch_kernel */,
 				C927F35A10FD7F0600C5AB8B /* libdispatch_tools */,
+				9BEBA56F20127D3300E6FD0D /* libdispatch_tools_Sim */,
 				4552540A19B1389700B88766 /* libdispatch_tests */,
 				92CBD7201BED924F006E0892 /* libdispatch_tests_legacy */,
 				92F3FECA1BEC69E500025962 /* darwintests */,
@@ -1890,6 +1903,13 @@
 			remoteRef = 4552540819B1384900B88766 /* PBXContainerItemProxy */;
 			sourceTree = BUILT_PRODUCTS_DIR;
 		};
+		9B2A11AA2032494E0060E7D4 /* queue-tip */ = {
+			isa = PBXReferenceProxy;
+			fileType = "compiled.mach-o.executable";
+			path = "queue-tip";
+			remoteRef = 9B2A11A92032494E0060E7D4 /* PBXContainerItemProxy */;
+			sourceTree = BUILT_PRODUCTS_DIR;
+		};
 		C927F36710FD7F1000C5AB8B /* ddt */ = {
 			isa = PBXReferenceProxy;
 			fileType = "compiled.mach-o.executable";
@@ -1916,6 +1936,24 @@
 			shellScript = ". \"${SCRIPT_INPUT_FILE_0}\"";
 			showEnvVarsInLog = 0;
 		};
+		6E2464DD1F5E67900031ADD9 /* Validate symbol ordering */ = {
+			isa = PBXShellScriptBuildPhase;
+			buildActionMask = 8;
+			files = (
+			);
+			inputPaths = (
+				"$(SRCROOT)/xcodeconfig/libdispatch.order",
+				"$(SRCROOT)/xcodeconfig/libdispatch.dirty",
+				"$(SRCROOT)/xcodeconfig/libdispatch.clean",
+			);
+			name = "Validate symbol ordering";
+			outputPaths = (
+			);
+			runOnlyForDeploymentPostprocessing = 1;
+			shellPath = "/bin/bash -e";
+			shellScript = ". \"${SRCROOT}/xcodescripts/check-order.sh\"\n";
+			showEnvVarsInLog = 0;
+		};
 		C00B0E061C5AEBBE000330B3 /* Symlink libdispatch.a -> libdispatch_dyld_target.a */ = {
 			isa = PBXShellScriptBuildPhase;
 			buildActionMask = 2147483647;
@@ -1981,7 +2019,7 @@
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 			shellPath = "/bin/bash -e";
-			shellScript = ". \"${SCRIPT_INPUT_FILE_0}\"";
+			shellScript = ". \"${SCRIPT_INPUT_FILE_0}\"\n";
 			showEnvVarsInLog = 0;
 		};
 		E482F1C512DBAA110030614D /* Postprocess Headers */ = {
@@ -1997,7 +2035,7 @@
 			);
 			runOnlyForDeploymentPostprocessing = 1;
 			shellPath = "/bin/bash -e";
-			shellScript = ". \"${SCRIPT_INPUT_FILE_0}\"";
+			shellScript = ". \"${SCRIPT_INPUT_FILE_0}\"\n";
 			showEnvVarsInLog = 0;
 		};
 		E49BB6CF1E70748100868613 /* Mig Headers */ = {
@@ -2109,47 +2147,6 @@
 			shellScript = ". \"${SCRIPT_INPUT_FILE_0}\"";
 			showEnvVarsInLog = 0;
 		};
-		E4EC121212514613000DDBD1 /* Symlink normal variant */ = {
-			isa = PBXShellScriptBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-			);
-			inputPaths = (
-			);
-			name = "Symlink normal variant";
-			outputPaths = (
-				"$(CONFIGURATION_BUILD_DIR)/$(PRODUCT_NAME)_normal.a",
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-			shellPath = "/bin/bash -e";
-			shellScript = "ln -fs \"${PRODUCT_NAME}.a\" \"${SCRIPT_OUTPUT_FILE_0}\"";
-			showEnvVarsInLog = 0;
-		};
-		E4EC12141251461A000DDBD1 /* Mig Headers */ = {
-			isa = PBXShellScriptBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-			);
-			inputPaths = (
-				"$(SRCROOT)/src/protocol.defs",
-				"$(SRCROOT)/src/firehose/firehose.defs",
-				"$(SRCROOT)/src/firehose/firehose_reply.defs",
-				"$(SRCROOT)/xcodescripts/mig-headers.sh",
-			);
-			name = "Mig Headers";
-			outputPaths = (
-				"$(DERIVED_FILE_DIR)/protocol.h",
-				"$(DERIVED_FILE_DIR)/protocolServer.h",
-				"$(DERIVED_FILE_DIR)/firehose.h",
-				"$(DERIVED_FILE_DIR)/firehoseServer.h",
-				"$(DERIVED_FILE_DIR)/firehose_reply.h",
-				"$(DERIVED_FILE_DIR)/firehose_replyServer.h",
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-			shellPath = "/bin/bash -e";
-			shellScript = ". \"${SCRIPT_INPUT_FILE_3}\"";
-			showEnvVarsInLog = 0;
-		};
 		E4EC121712514715000DDBD1 /* Mig Headers */ = {
 			isa = PBXShellScriptBuildPhase;
 			buildActionMask = 2147483647;
@@ -2191,6 +2188,27 @@
 			shellScript = "ln -fs \"${PRODUCT_NAME}.a\" \"${SCRIPT_OUTPUT_FILE_0}\"";
 			showEnvVarsInLog = 0;
 		};
+		E4FB8E8F218CD68A004B7A25 /* Install Plists */ = {
+			isa = PBXShellScriptBuildPhase;
+			buildActionMask = 8;
+			files = (
+			);
+			inputFileListPaths = (
+			);
+			inputPaths = (
+				"$(SRCROOT)/xcodescripts/install-plists.sh",
+				"$(SRCROOT)/src/libdispatch.plist",
+			);
+			name = "Install Plists";
+			outputFileListPaths = (
+			);
+			outputPaths = (
+			);
+			runOnlyForDeploymentPostprocessing = 1;
+			shellPath = "/bin/bash -e";
+			shellScript = ". \"${SCRIPT_INPUT_FILE_0}\"\n";
+			showEnvVarsInLog = 0;
+		};
 /* End PBXShellScriptBuildPhase section */
 
 /* Begin PBXSourcesBuildPhase section */
@@ -2217,6 +2235,7 @@
 			isa = PBXSourcesBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				6E9C6CAD20F9848F00EA81C0 /* yield.c in Sources */,
 				C00B0DF21C5AEBBE000330B3 /* protocol.defs in Sources */,
 				C00B0DF71C5AEBBE000330B3 /* firehose.defs in Sources */,
 				C00B0DFA1C5AEBBE000330B3 /* firehose_reply.defs in Sources */,
@@ -2250,6 +2269,7 @@
 			isa = PBXSourcesBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				6E9C6CAC20F9848E00EA81C0 /* yield.c in Sources */,
 				C01866A61C5973210040FC07 /* protocol.defs in Sources */,
 				C01866AB1C5973210040FC07 /* firehose.defs in Sources */,
 				C01866AE1C5973210040FC07 /* firehose_reply.defs in Sources */,
@@ -2298,6 +2318,7 @@
 				FC7BED990E8361E600161930 /* queue.c in Sources */,
 				9676A0E10F3E755D00713ADB /* apply.c in Sources */,
 				96A8AA870F41E7A400CD570B /* source.c in Sources */,
+				6E9C6CA720F9848100EA81C0 /* yield.c in Sources */,
 				6E4BACBD1D48A41500B562AE /* mach.c in Sources */,
 				6EA962971D48622600759D53 /* event.c in Sources */,
 				6EA9629F1D48625000759D53 /* event_kevent.c in Sources */,
@@ -2315,39 +2336,6 @@
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
-		E46DBC3E14EE10C80001F9F6 /* Sources */ = {
-			isa = PBXSourcesBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				E46DBC4014EE10C80001F9F6 /* protocol.defs in Sources */,
-				6EBEC7E71BBDD30F009B1596 /* firehose.defs in Sources */,
-				6EBEC7EA1BBDD326009B1596 /* firehose_reply.defs in Sources */,
-				E46DBC4114EE10C80001F9F6 /* resolver.c in Sources */,
-				E46DBC4214EE10C80001F9F6 /* init.c in Sources */,
-				E46DBC4714EE10C80001F9F6 /* object.c in Sources */,
-				E43A72881AF85BE900BAA921 /* block.cpp in Sources */,
-				6EF2CAB11C8899EC001ABE83 /* lock.c in Sources */,
-				E46DBC4414EE10C80001F9F6 /* semaphore.c in Sources */,
-				E46DBC4514EE10C80001F9F6 /* once.c in Sources */,
-				E46DBC4314EE10C80001F9F6 /* queue.c in Sources */,
-				E46DBC4614EE10C80001F9F6 /* apply.c in Sources */,
-				E46DBC4914EE10C80001F9F6 /* source.c in Sources */,
-				6E4BACC61D48A42300B562AE /* mach.c in Sources */,
-				6EA9629C1D48622A00759D53 /* event.c in Sources */,
-				6EA962A41D48625300759D53 /* event_kevent.c in Sources */,
-				6E4BACFA1D49A04900B562AE /* event_epoll.c in Sources */,
-				E44A8E701805C3E0009FFDB6 /* voucher.c in Sources */,
-				6EE664271BE2FD5C00ED7B1C /* firehose_buffer.c in Sources */,
-				E46DBC4C14EE10C80001F9F6 /* io.c in Sources */,
-				E46DBC4B14EE10C80001F9F6 /* data.c in Sources */,
-				E46DBC4D14EE10C80001F9F6 /* transform.c in Sources */,
-				E46DBC4A14EE10C80001F9F6 /* time.c in Sources */,
-				2BBF5A67154B64F5002B20F9 /* allocator.c in Sources */,
-				E46DBC4814EE10C80001F9F6 /* benchmark.c in Sources */,
-				6E9956011C3B21980071D40C /* venture.c in Sources */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
 		E49BB6D01E70748100868613 /* Sources */ = {
 			isa = PBXSourcesBuildPhase;
 			buildActionMask = 2147483647;
@@ -2367,6 +2355,7 @@
 				E49BB6D81E70748100868613 /* mach.c in Sources */,
 				E49BB6DA1E70748100868613 /* queue.c in Sources */,
 				E49BB6DF1E70748100868613 /* apply.c in Sources */,
+				6E9C6CAA20F9848D00EA81C0 /* yield.c in Sources */,
 				E49BB6E31E70748100868613 /* source.c in Sources */,
 				E49BB6E81E70748100868613 /* event.c in Sources */,
 				E49BB6D61E70748100868613 /* event_kevent.c in Sources */,
@@ -2403,6 +2392,7 @@
 				E49F24CB125D57FA0057C971 /* queue.c in Sources */,
 				E49F24CE125D57FA0057C971 /* apply.c in Sources */,
 				E49F24D1125D57FA0057C971 /* source.c in Sources */,
+				6E9C6CA820F9848C00EA81C0 /* yield.c in Sources */,
 				6E4BACC21D48A42000B562AE /* mach.c in Sources */,
 				6EA962981D48622700759D53 /* event.c in Sources */,
 				6EA962A01D48625100759D53 /* event_kevent.c in Sources */,
@@ -2432,6 +2422,7 @@
 				E4B515C0164B2DA300E003AF /* init.c in Sources */,
 				E4B515C5164B2DA300E003AF /* object.c in Sources */,
 				E4B515CC164B2DA300E003AF /* object.m in Sources */,
+				6E9C6CAB20F9848E00EA81C0 /* yield.c in Sources */,
 				E43A72871AF85BCD00BAA921 /* block.cpp in Sources */,
 				6EF2CAB01C8899EB001ABE83 /* lock.c in Sources */,
 				E4B515C2164B2DA300E003AF /* semaphore.c in Sources */,
@@ -2457,42 +2448,6 @@
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
-		E4EC11AC12514302000DDBD1 /* Sources */ = {
-			isa = PBXSourcesBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				E417A38412A472C4004D659D /* provider.d in Sources */,
-				E44EBE5412517EBE00645D88 /* protocol.defs in Sources */,
-				6EBEC7E61BBDD30D009B1596 /* firehose.defs in Sources */,
-				6EBEC7E91BBDD325009B1596 /* firehose_reply.defs in Sources */,
-				E49F2424125D3C970057C971 /* resolver.c in Sources */,
-				E44EBE5512517EBE00645D88 /* init.c in Sources */,
-				E4EC11B212514302000DDBD1 /* object.c in Sources */,
-				E4FC3266145F46C9002FBDDB /* object.m in Sources */,
-				E43A72861AF85BCC00BAA921 /* block.cpp in Sources */,
-				6EF2CAAF1C8899EB001ABE83 /* lock.c in Sources */,
-				E4EC11AF12514302000DDBD1 /* semaphore.c in Sources */,
-				E4EC11B012514302000DDBD1 /* once.c in Sources */,
-				E4EC11AE12514302000DDBD1 /* queue.c in Sources */,
-				E4EC11B112514302000DDBD1 /* apply.c in Sources */,
-				E4EC11B412514302000DDBD1 /* source.c in Sources */,
-				6E4BACC41D48A42200B562AE /* mach.c in Sources */,
-				6EA9629A1D48622900759D53 /* event.c in Sources */,
-				6EA962A21D48625200759D53 /* event_kevent.c in Sources */,
-				6E4BACF81D49A04800B562AE /* event_epoll.c in Sources */,
-				E44A8E6E1805C3E0009FFDB6 /* voucher.c in Sources */,
-				6ED64B421BBD898500C35F4D /* firehose_buffer.c in Sources */,
-				E4EC11B812514302000DDBD1 /* io.c in Sources */,
-				E4EC11B712514302000DDBD1 /* data.c in Sources */,
-				E420867316027AE500EEE210 /* data.m in Sources */,
-				C93D6166143E190F00EB9023 /* transform.c in Sources */,
-				E4EC11B512514302000DDBD1 /* time.c in Sources */,
-				2BBF5A65154B64F5002B20F9 /* allocator.c in Sources */,
-				E4EC11B312514302000DDBD1 /* benchmark.c in Sources */,
-				6E9956031C3B219A0071D40C /* venture.c in Sources */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
 		E4EC121812514715000DDBD1 /* Sources */ = {
 			isa = PBXSourcesBuildPhase;
 			buildActionMask = 2147483647;
@@ -2512,6 +2467,7 @@
 				E4EC121A12514715000DDBD1 /* queue.c in Sources */,
 				E4EC121D12514715000DDBD1 /* apply.c in Sources */,
 				E4EC122012514715000DDBD1 /* source.c in Sources */,
+				6E9C6CA920F9848D00EA81C0 /* yield.c in Sources */,
 				6E4BACC31D48A42100B562AE /* mach.c in Sources */,
 				6EA962991D48622800759D53 /* event.c in Sources */,
 				6EA962A11D48625100759D53 /* event_kevent.c in Sources */,
@@ -2537,6 +2493,16 @@
 			target = 6E040C621C499B1B00411A2E /* libfirehose_kernel */;
 			targetProxy = 6E2ECB011C49C31200A30A32 /* PBXContainerItemProxy */;
 		};
+		6EE5083B21701B9100833569 /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = E4B51595164B2DA300E003AF /* libdispatch introspection */;
+			targetProxy = 6EE5083A21701B9100833569 /* PBXContainerItemProxy */;
+		};
+		6EE5083D21701B9600833569 /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = E4B51595164B2DA300E003AF /* libdispatch introspection */;
+			targetProxy = 6EE5083C21701B9600833569 /* PBXContainerItemProxy */;
+		};
 		6EF0B27E1BA8C5BF007FA4F6 /* PBXTargetDependency */ = {
 			isa = PBXTargetDependency;
 			target = 6EB4E4081BA8BCAD00D7B9D2 /* libfirehose_server */;
@@ -2552,6 +2518,16 @@
 			target = 92F3FECA1BEC69E500025962 /* darwintests */;
 			targetProxy = 92F3FECE1BEC6F1000025962 /* PBXContainerItemProxy */;
 		};
+		9B2A11A32032494E0060E7D4 /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			name = "queue-tip";
+			targetProxy = 9B2A11A22032494E0060E7D4 /* PBXContainerItemProxy */;
+		};
+		9BEBA57820127D4400E6FD0D /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			name = ddt;
+			targetProxy = 9BEBA57720127D4400E6FD0D /* PBXContainerItemProxy */;
+		};
 		C00B0E141C5AEED6000330B3 /* PBXTargetDependency */ = {
 			isa = PBXTargetDependency;
 			target = C00B0DF01C5AEBBE000330B3 /* libdispatch dyld stub */;
@@ -2572,16 +2548,6 @@
 			target = D2AAC045055464E500DB518D /* libdispatch */;
 			targetProxy = E4128E4913B94BCE00ABB2CB /* PBXContainerItemProxy */;
 		};
-		E437F0D614F7441F00F0B997 /* PBXTargetDependency */ = {
-			isa = PBXTargetDependency;
-			target = E46DBC1A14EE10C80001F9F6 /* libdispatch up static */;
-			targetProxy = E437F0D514F7441F00F0B997 /* PBXContainerItemProxy */;
-		};
-		E47D6ECB125FEB9D0070D91C /* PBXTargetDependency */ = {
-			isa = PBXTargetDependency;
-			target = E4EC118F12514302000DDBD1 /* libdispatch up resolved */;
-			targetProxy = E47D6ECA125FEB9D0070D91C /* PBXContainerItemProxy */;
-		};
 		E47D6ECD125FEBA10070D91C /* PBXTargetDependency */ = {
 			isa = PBXTargetDependency;
 			target = E4EC121612514715000DDBD1 /* libdispatch mp resolved */;
@@ -2589,14 +2555,9 @@
 		};
 		E49BB6F81E7074C100868613 /* PBXTargetDependency */ = {
 			isa = PBXTargetDependency;
-			target = E49BB6CE1E70748100868613 /* libdispatch alt resolved */;
+			target = E49BB6CE1E70748100868613 /* libdispatch armv81 resolved */;
 			targetProxy = E49BB6F71E7074C100868613 /* PBXContainerItemProxy */;
 		};
-		E4B515DB164B317700E003AF /* PBXTargetDependency */ = {
-			isa = PBXTargetDependency;
-			target = E4B51595164B2DA300E003AF /* libdispatch introspection */;
-			targetProxy = E4B515DA164B317700E003AF /* PBXContainerItemProxy */;
-		};
 /* End PBXTargetDependency section */
 
 /* Begin XCBuildConfiguration section */
@@ -2668,6 +2629,34 @@
 			};
 			name = Debug;
 		};
+		6E435542215B5D9D00C13177 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				PRODUCT_NAME = "$(TARGET_NAME)";
+			};
+			name = Release;
+		};
+		6E435543215B5D9D00C13177 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				PRODUCT_NAME = "$(TARGET_NAME)";
+			};
+			name = Debug;
+		};
+		6EA833C42162D6380045EFDC /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				PRODUCT_NAME = "$(TARGET_NAME)";
+			};
+			name = Release;
+		};
+		6EA833C52162D6380045EFDC /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				PRODUCT_NAME = "$(TARGET_NAME)";
+			};
+			name = Debug;
+		};
 		6EB4E40B1BA8BCAD00D7B9D2 /* Release */ = {
 			isa = XCBuildConfiguration;
 			baseConfigurationReference = 6EB4E4421BA8BD7800D7B9D2 /* libfirehose.xcconfig */;
@@ -2708,6 +2697,22 @@
 			};
 			name = Debug;
 		};
+		9BEBA57020127D3300E6FD0D /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				CODE_SIGN_STYLE = Automatic;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+			};
+			name = Release;
+		};
+		9BEBA57120127D3300E6FD0D /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				CODE_SIGN_STYLE = Automatic;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+			};
+			name = Debug;
+		};
 		C00B0E081C5AEBBE000330B3 /* Release */ = {
 			isa = XCBuildConfiguration;
 			baseConfigurationReference = C00B0E121C5AEBF7000330B3 /* libdispatch-dyld-stub.xcconfig */;
@@ -2748,25 +2753,11 @@
 			};
 			name = Debug;
 		};
-		E46DBC5514EE10C80001F9F6 /* Release */ = {
-			isa = XCBuildConfiguration;
-			baseConfigurationReference = E46DBC5814EE11BC0001F9F6 /* libdispatch-up-static.xcconfig */;
-			buildSettings = {
-			};
-			name = Release;
-		};
-		E46DBC5614EE10C80001F9F6 /* Debug */ = {
-			isa = XCBuildConfiguration;
-			baseConfigurationReference = E46DBC5814EE11BC0001F9F6 /* libdispatch-up-static.xcconfig */;
-			buildSettings = {
-			};
-			name = Debug;
-		};
 		E49BB6F01E70748100868613 /* Release */ = {
 			isa = XCBuildConfiguration;
 			baseConfigurationReference = E40041A9125D70590022B135 /* libdispatch-resolved.xcconfig */;
 			buildSettings = {
-				DISPATCH_RESOLVED_VARIANT = alt;
+				DISPATCH_RESOLVED_VARIANT = armv81;
 			};
 			name = Release;
 		};
@@ -2774,7 +2765,7 @@
 			isa = XCBuildConfiguration;
 			baseConfigurationReference = E40041A9125D70590022B135 /* libdispatch-resolved.xcconfig */;
 			buildSettings = {
-				DISPATCH_RESOLVED_VARIANT = alt;
+				DISPATCH_RESOLVED_VARIANT = armv81;
 			};
 			name = Debug;
 		};
@@ -2827,22 +2818,6 @@
 			};
 			name = Debug;
 		};
-		E4EC11BD12514302000DDBD1 /* Release */ = {
-			isa = XCBuildConfiguration;
-			baseConfigurationReference = E40041A9125D70590022B135 /* libdispatch-resolved.xcconfig */;
-			buildSettings = {
-				DISPATCH_RESOLVED_VARIANT = up;
-			};
-			name = Release;
-		};
-		E4EC11BE12514302000DDBD1 /* Debug */ = {
-			isa = XCBuildConfiguration;
-			baseConfigurationReference = E40041A9125D70590022B135 /* libdispatch-resolved.xcconfig */;
-			buildSettings = {
-				DISPATCH_RESOLVED_VARIANT = up;
-			};
-			name = Debug;
-		};
 		E4EC122712514715000DDBD1 /* Release */ = {
 			isa = XCBuildConfiguration;
 			baseConfigurationReference = E40041A9125D70590022B135 /* libdispatch-resolved.xcconfig */;
@@ -2916,6 +2891,24 @@
 			defaultConfigurationIsVisible = 0;
 			defaultConfigurationName = Release;
 		};
+		6E435541215B5D9D00C13177 /* Build configuration list for PBXAggregateTarget "libdispatch_introspection" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				6E435542215B5D9D00C13177 /* Release */,
+				6E435543215B5D9D00C13177 /* Debug */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		6EA833C32162D6380045EFDC /* Build configuration list for PBXAggregateTarget "libdispatch_introspection_Sim" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				6EA833C42162D6380045EFDC /* Release */,
+				6EA833C52162D6380045EFDC /* Debug */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
 		6EB4E40A1BA8BCAD00D7B9D2 /* Build configuration list for PBXNativeTarget "libfirehose_server" */ = {
 			isa = XCConfigurationList;
 			buildConfigurations = (
@@ -2943,6 +2936,15 @@
 			defaultConfigurationIsVisible = 0;
 			defaultConfigurationName = Release;
 		};
+		9BEBA57620127D3300E6FD0D /* Build configuration list for PBXAggregateTarget "libdispatch_tools_Sim" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				9BEBA57020127D3300E6FD0D /* Release */,
+				9BEBA57120127D3300E6FD0D /* Debug */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
 		C00B0E071C5AEBBE000330B3 /* Build configuration list for PBXNativeTarget "libdispatch dyld stub" */ = {
 			isa = XCConfigurationList;
 			buildConfigurations = (
@@ -2970,16 +2972,7 @@
 			defaultConfigurationIsVisible = 0;
 			defaultConfigurationName = Release;
 		};
-		E46DBC5414EE10C80001F9F6 /* Build configuration list for PBXNativeTarget "libdispatch up static" */ = {
-			isa = XCConfigurationList;
-			buildConfigurations = (
-				E46DBC5514EE10C80001F9F6 /* Release */,
-				E46DBC5614EE10C80001F9F6 /* Debug */,
-			);
-			defaultConfigurationIsVisible = 0;
-			defaultConfigurationName = Release;
-		};
-		E49BB6EF1E70748100868613 /* Build configuration list for PBXNativeTarget "libdispatch alt resolved" */ = {
+		E49BB6EF1E70748100868613 /* Build configuration list for PBXNativeTarget "libdispatch armv81 resolved" */ = {
 			isa = XCConfigurationList;
 			buildConfigurations = (
 				E49BB6F01E70748100868613 /* Release */,
@@ -3006,15 +2999,6 @@
 			defaultConfigurationIsVisible = 0;
 			defaultConfigurationName = Release;
 		};
-		E4EC11BC12514302000DDBD1 /* Build configuration list for PBXNativeTarget "libdispatch up resolved" */ = {
-			isa = XCConfigurationList;
-			buildConfigurations = (
-				E4EC11BD12514302000DDBD1 /* Release */,
-				E4EC11BE12514302000DDBD1 /* Debug */,
-			);
-			defaultConfigurationIsVisible = 0;
-			defaultConfigurationName = Release;
-		};
 		E4EC122612514715000DDBD1 /* Build configuration list for PBXNativeTarget "libdispatch mp resolved" */ = {
 			isa = XCConfigurationList;
 			buildConfigurations = (
diff --git a/man/dispatch_after.3 b/man/dispatch_after.3
index 4c55214..db34af0 100644
--- a/man/dispatch_after.3
+++ b/man/dispatch_after.3
@@ -31,6 +31,12 @@
 .Fn dispatch_time
 or
 .Fn dispatch_walltime .
+Submission of the block may be delayed by the system in order to improve power consumption and system performance.
+The system applies a leeway (see
+.Xr dispatch_source_set_timer 3 )
+that is equal to one tenth of the interval between
+.Fa when
+and the time at which the function is called, with the leeway capped to at least one millisecond and at most one minute.
 .Pp
 For a more detailed description about submitting blocks to queues, see
 .Xr dispatch_async 3 .
diff --git a/man/dispatch_io_read.3 b/man/dispatch_io_read.3
index 51c3b1c..26a11e8 100644
--- a/man/dispatch_io_read.3
+++ b/man/dispatch_io_read.3
@@ -20,7 +20,7 @@
 .Fo dispatch_io_write
 .Fa "dispatch_io_t channel"
 .Fa "off_t offset"
-.Fa "dispatch_data_t dispatch"
+.Fa "dispatch_data_t data"
 .Fa "dispatch_queue_t queue"
 .Fa "void (^handler)(bool done, dispatch_data_t data, int error)"
 .Fc
@@ -132,7 +132,7 @@
 not be run again. If an unrecoverable error occurs while performing the I/O
 operation, the handler block will be submitted with the
 .Va done
-flag set and the appriate POSIX error code in the
+flag set and the appropriate POSIX error code in the
 .Va error
 parameter. An invocation of a handler block with the
 .Va done
diff --git a/man/dispatch_semaphore_create.3 b/man/dispatch_semaphore_create.3
index da26365..c0aa451 100644
--- a/man/dispatch_semaphore_create.3
+++ b/man/dispatch_semaphore_create.3
@@ -36,7 +36,8 @@
 .Xr dispatch_time 3
 or
 .Xr dispatch_walltime 3
-functions.
+functions. If the timeout is reached without a signal being received, the semaphore
+is re-incremented before the function returns.
 .Pp
 The
 .Fn dispatch_semaphore_signal
diff --git a/man/dispatch_source_create.3 b/man/dispatch_source_create.3
index b4e9a7a..313b6e7 100644
--- a/man/dispatch_source_create.3
+++ b/man/dispatch_source_create.3
@@ -515,8 +515,9 @@
 .Vt DISPATCH_TIME_NOW
 or was created with
 .Xr dispatch_time 3 ,
-the timer is based on
-.Fn mach_absolute_time .
+the timer is based on up time (which is obtained from
+.Fn mach_absolute_time
+on Apple platforms).
 If
 .Fa start
 was created with
diff --git a/man/dispatch_time.3 b/man/dispatch_time.3
index 685898d..2536e0e 100644
--- a/man/dispatch_time.3
+++ b/man/dispatch_time.3
@@ -9,6 +9,7 @@
 .Sh SYNOPSIS
 .Fd #include <dispatch/dispatch.h>
 .Vt static const dispatch_time_t DISPATCH_TIME_NOW = 0ull ;
+.Vt static const dispatch_time_t DISPATCH_WALLTIME_NOW = ~1ull ;
 .Vt static const dispatch_time_t DISPATCH_TIME_FOREVER = ~0ull ;
 .Ft dispatch_time_t
 .Fo dispatch_time
@@ -29,7 +30,8 @@
 The
 .Fa dispatch_time_t
 type is a semi-opaque integer, with only the special values
-.Vt DISPATCH_TIME_NOW
+.Vt DISPATCH_TIME_NOW ,
+.Vt DISPATCH_WALLTIME_NOW
 and
 .Vt DISPATCH_TIME_FOREVER
 being externally defined. All other values are represented using an internal
@@ -43,13 +45,16 @@
 nanoseconds.
 If the
 .Fa base
-parameter maps internally to a wall clock, then the returned value is
-relative to the wall clock.
+parameter maps internally to a wall clock or is
+.Vt DISPATCH_WALLTIME_NOW ,
+then the returned value is relative to the wall clock.
 Otherwise, if
 .Fa base
 is
 .Vt DISPATCH_TIME_NOW ,
-then the current time of the default host clock is used.
+then the current time of the default host clock is used. On Apple platforms,
+the value of the default host clock is obtained from
+.Vt mach_absolute_time() .
 .Pp
 The
 .Fn dispatch_walltime
@@ -59,6 +64,9 @@
 parameter. If
 .Fa base
 is NULL, then the current time of the wall clock is used.
+.Vt dispatch_walltime(NULL, offset)
+is equivalent to
+.Vt dispatch_time(DISPATCH_WALLTIME_NOW, offset) .
 .Sh EDGE CONDITIONS
 The
 .Fn dispatch_time
@@ -81,11 +89,16 @@
 Underflow causes the smallest representable value to be
 returned for a given clock.
 .Sh EXAMPLES
-Create a milestone two seconds in the future:
+Create a milestone two seconds in the future, relative to the default clock:
 .Bd -literal -offset indent
 milestone = dispatch_time(DISPATCH_TIME_NOW, 2 * NSEC_PER_SEC);
 .Ed
 .Pp
+Create a milestone two seconds in the future, in wall clock time:
+.Bd -literal -offset indent
+milestone = dispatch_time(DISPATCH_WALLTIME_NOW, 2 * NSEC_PER_SEC);
+.Ed
+.Pp
 Create a milestone for use as an infinite timeout:
 .Bd -literal -offset indent
 milestone = DISPATCH_TIME_FOREVER;
diff --git a/os/firehose_buffer_private.h b/os/firehose_buffer_private.h
index d131d6d..a633bf4 100644
--- a/os/firehose_buffer_private.h
+++ b/os/firehose_buffer_private.h
@@ -31,7 +31,7 @@
 #include <dispatch/dispatch.h>
 #endif
 
-#define OS_FIREHOSE_SPI_VERSION 20170222
+#define OS_FIREHOSE_SPI_VERSION 20180226
 
 /*!
  * @group Firehose SPI
@@ -40,7 +40,8 @@
  */
 
 #define FIREHOSE_BUFFER_LIBTRACE_HEADER_SIZE	2048ul
-#define FIREHOSE_BUFFER_KERNEL_CHUNK_COUNT		16
+#define FIREHOSE_BUFFER_KERNEL_MIN_CHUNK_COUNT	16
+#define FIREHOSE_BUFFER_KERNEL_MAX_CHUNK_COUNT	64
 
 typedef struct firehose_buffer_range_s {
 	uint16_t fbr_offset; // offset from the start of the buffer
@@ -56,6 +57,14 @@
 extern void __firehose_critical_region_enter(void);
 extern void __firehose_critical_region_leave(void);
 extern void __firehose_allocate(vm_offset_t *addr, vm_size_t size);
+extern uint8_t __firehose_buffer_kernel_chunk_count;
+extern uint8_t __firehose_num_kernel_io_pages;
+
+#define FIREHOSE_BUFFER_KERNEL_DEFAULT_CHUNK_COUNT FIREHOSE_BUFFER_KERNEL_MIN_CHUNK_COUNT
+#define FIREHOSE_BUFFER_KERNEL_DEFAULT_IO_PAGES    8
+
+#define FIREHOSE_BUFFER_KERNEL_CHUNK_COUNT __firehose_buffer_kernel_chunk_count
+#define FIREHOSE_BUFFER_CHUNK_PREALLOCATED_COUNT (__firehose_buffer_kernel_chunk_count - 1) // the first chunk is the header
 
 // exported for the kernel
 firehose_tracepoint_t
@@ -72,6 +81,9 @@
 void
 __firehose_merge_updates(firehose_push_reply_t update);
 
+int
+__firehose_kernel_configuration_valid(uint8_t chunk_count, uint8_t io_pages);
+
 #else
 
 #define __firehose_critical_region_enter()
diff --git a/os/firehose_server_private.h b/os/firehose_server_private.h
index fc352da..d2c379e 100644
--- a/os/firehose_server_private.h
+++ b/os/firehose_server_private.h
@@ -58,7 +58,8 @@
  * This is the first event delivered, and no event is delivered until
  * the handler of that event returns
  *
- * The `page` argument really is really a firehose_client_connected_info_t.
+ * The `page` argument is really a firehose_client_connected_info_t.  The
+ * `fc_pos` argument is not meaningful.
  *
  * @const FIREHOSE_EVENT_CLIENT_DIED
  * The specified client is gone and will not flush new buffers
@@ -68,21 +69,23 @@
  * FIREHOSE_EVENT_CLIENT_CORRUPTED event has been generated.
  *
  * @const FIREHOSE_EVENT_IO_BUFFER_RECEIVED
- * A new buffer needs to be pushed, `page` is set to that buffer.
+ * A new buffer needs to be pushed; `page` is set to that buffer, and `fc_pos`
+ * to its chunk position header.
  *
  * This event can be sent concurrently wrt FIREHOSE_EVENT_MEM_BUFFER_RECEIVED
  * events.
  *
  * @const FIREHOSE_EVENT_MEM_BUFFER_RECEIVED
- * A new buffer needs to be pushed, `page` is set to that buffer.
+ * A new buffer needs to be pushed; `page` is set to that buffer, and `fc_pos`
+ * to its chunk position header.
  *
  * This event can be sent concurrently wrt FIREHOSE_EVENT_IO_BUFFER_RECEIVED
  * events.
  *
  * @const FIREHOSE_EVENT_CLIENT_CORRUPTED
  * This event is received when a client is found being corrupted.
- * `page` is set to the buffer header page. When this event is received,
- * logs have likely been lost for this client.
+ * `page` is set to the buffer header page, and `fc_pos` is not meaningful. When
+ * this event is received, logs have likely been lost for this client.
  *
  * This buffer isn't really a proper firehose buffer page, but its content may
  * be useful for debugging purposes.
@@ -90,7 +93,8 @@
  * @const FIREHOSE_EVENT_CLIENT_FINALIZE
  * This event is received when a firehose client structure is about to be
  * destroyed. Only firehose_client_get_context() can ever be called with
- * the passed firehose client. The `page` argument is NULL for this event.
+ * the passed firehose client. The `page` argument is NULL for this event, and
+ * the `fc_pos` argument is not meaningful.
  *
  * The event is sent from the context that is dropping the last refcount
  * of the client.
@@ -201,6 +205,19 @@
 firehose_client_get_context(firehose_client_t client);
 
 /*!
+ * @function firehose_client_set_strings_cached
+ *
+ * @abstract
+ * Marks a given client as having strings cached already.
+ *
+ * @param client
+ * The specified client.
+ */
+OS_NOTHROW OS_NONNULL1
+void
+firehose_client_set_strings_cached(firehose_client_t client);
+
+/*!
  * @function firehose_client_set_context
  *
  * @abstract
@@ -289,7 +306,8 @@
  * Type of the handler block for firehose_server_init()
  */
 typedef void (^firehose_handler_t)(firehose_client_t client,
-		firehose_event_t event, firehose_chunk_t page);
+		firehose_event_t event, firehose_chunk_t page,
+		firehose_chunk_pos_u fc_pos);
 
 /*!
  * @function firehose_server_init
@@ -357,6 +375,21 @@
 firehose_server_cancel(void);
 
 /*!
+ * @function firehose_server_set_logging_prefs
+ *
+ * @abstract
+ * Publishes a new preferences buffer.
+ *
+ * @description
+ * The server will take ownership of this buffer and will
+ * call munmap() on the previous one that was stored.
+ */
+OS_NOTHROW
+void
+firehose_server_set_logging_prefs(void *pointer, size_t length,
+		os_block_t block);
+
+/*!
  * @typedef firehose_server_queue_t
  *
  * @abstract
@@ -428,7 +461,8 @@
  * Type of the handler block for firehose_snapshot
  */
 typedef void (^firehose_snapshot_handler_t)(firehose_client_t client,
-		firehose_snapshot_event_t event, firehose_chunk_t page);
+		firehose_snapshot_event_t event, firehose_chunk_t page,
+		firehose_chunk_pos_u fc_pos);
 
 /*!
  * @function firehose_snapshot
diff --git a/os/object_private.h b/os/object_private.h
index 3b46322..a667f79 100644
--- a/os/object_private.h
+++ b/os/object_private.h
@@ -184,13 +184,13 @@
 void
 _os_object_release_internal(_os_object_t object);
 
-API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0))
+API_AVAILABLE(macos(10.13), ios(11.0), tvos(11.0), watchos(4.0))
 OS_OBJECT_EXPORT OS_OBJECT_NONNULL OS_OBJECT_NOTHROW
 OS_SWIFT_UNAVAILABLE("Unavailable in Swift")
 _os_object_t
 _os_object_retain_internal_n(_os_object_t object, uint16_t n);
 
-API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0))
+API_AVAILABLE(macos(10.13), ios(11.0), tvos(11.0), watchos(4.0))
 OS_OBJECT_EXPORT OS_OBJECT_NONNULL OS_OBJECT_NOTHROW
 OS_SWIFT_UNAVAILABLE("Unavailable in Swift")
 void
diff --git a/os/voucher_activity_private.h b/os/voucher_activity_private.h
index 3df9023..706ae75 100644
--- a/os/voucher_activity_private.h
+++ b/os/voucher_activity_private.h
@@ -154,7 +154,7 @@
  * @result
  * A new voucher with an activity identifier.
  */
-API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0))
+API_AVAILABLE(macos(10.12.4), ios(10.3), tvos(10.2), watchos(3.2))
 OS_VOUCHER_EXPORT OS_OBJECT_RETURNS_RETAINED OS_WARN_RESULT OS_NOTHROW
 voucher_t
 voucher_activity_create_with_data(firehose_tracepoint_id_t *trace_id,
@@ -162,7 +162,7 @@
 		const void *pubdata, size_t publen);
 
 API_DEPRECATED_WITH_REPLACEMENT("voucher_activity_create_with_data",
-		macos(10.12,10.12), ios(10.0,10.0), tvos(10.0,10.0), watchos(3.0,3.0))
+		macos(10.12,10.12.4), ios(10.0,10.3), tvos(10.0,10.2), watchos(3.0,3.2))
 OS_VOUCHER_EXPORT OS_OBJECT_RETURNS_RETAINED OS_WARN_RESULT OS_NOTHROW
 voucher_t
 voucher_activity_create_with_location(firehose_tracepoint_id_t *trace_id,
@@ -183,7 +183,7 @@
  * The bottom-most 8 bits of the flags will be used to generate the ID.
  * See firehose_activity_flags_t.
  */
-API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0))
+API_AVAILABLE(macos(10.13), ios(11.0), tvos(11.0), watchos(4.0))
 OS_VOUCHER_EXPORT OS_NOTHROW
 firehose_activity_id_t
 voucher_activity_id_allocate(firehose_activity_flags_t flags);
@@ -264,22 +264,21 @@
  * Length of data to read from the iovec after the public data for the private
  * data.
  */
-API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0))
+API_AVAILABLE(macos(10.12.4), ios(10.3), tvos(10.2), watchos(3.2))
 OS_VOUCHER_EXPORT OS_NOTHROW OS_NONNULL4
 firehose_tracepoint_id_t
 voucher_activity_trace_v(firehose_stream_t stream,
 		firehose_tracepoint_id_t trace_id, uint64_t timestamp,
 		const struct iovec *iov, size_t publen, size_t privlen);
 
+#define VOUCHER_ACTIVITY_TRACE_FLAG_UNRELIABLE 0x01
 
-API_DEPRECATED_WITH_REPLACEMENT("voucher_activity_trace_v",
-		macos(10.12,10.12), ios(10.0,10.0), tvos(10.0,10.0), watchos(3.0,3.0))
-OS_VOUCHER_EXPORT OS_NOTHROW OS_NONNULL4 OS_NONNULL6
+API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+OS_VOUCHER_EXPORT OS_NOTHROW OS_NONNULL4
 firehose_tracepoint_id_t
-voucher_activity_trace_with_private_strings(firehose_stream_t stream,
+voucher_activity_trace_v_2(firehose_stream_t stream,
 		firehose_tracepoint_id_t trace_id, uint64_t timestamp,
-		const void *pubdata, size_t publen,
-		const void *privdata, size_t privlen);
+		const struct iovec *iov, size_t publen, size_t privlen, uint32_t flags);
 
 typedef const struct voucher_activity_hooks_s {
 #define VOUCHER_ACTIVITY_HOOKS_VERSION     5
@@ -320,10 +319,42 @@
  */
 API_AVAILABLE(macos(10.10), ios(8.0))
 OS_VOUCHER_EXPORT OS_WARN_RESULT OS_NOTHROW OS_NONNULL_ALL
-void*
+void *
 voucher_activity_get_metadata_buffer(size_t *length);
 
 /*!
+ * @function voucher_activity_get_logging_preferences
+ *
+ * @abstract
+ * Return address and length of vm_map()ed configuration data for the logging
+ * subsystem.
+ *
+ * @discussion
+ * The data must be deallocated with vm_deallocate().
+ *
+ * @param length
+ * Pointer to size_t variable, filled with length of preferences buffer.
+ *
+ * @result
+ * Address of preferences buffer, returns NULL on error.
+ */
+API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0), bridgeos(3.0))
+OS_VOUCHER_EXPORT OS_WARN_RESULT OS_NOTHROW OS_NONNULL_ALL
+void *
+voucher_activity_get_logging_preferences(size_t *length);
+
+/*!
+ * @function voucher_activity_should_send_strings
+ *
+ * @abstract
+ * Returns whether the client should send the strings or not.
+ */
+API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0), bridgeos(4.0))
+OS_VOUCHER_EXPORT OS_WARN_RESULT OS_NOTHROW
+bool
+voucher_activity_should_send_strings(void);
+
+/*!
  * @function voucher_get_activity_id_4dyld
  *
  * @abstract
diff --git a/os/voucher_private.h b/os/voucher_private.h
index 3e28091..ad4e312 100644
--- a/os/voucher_private.h
+++ b/os/voucher_private.h
@@ -199,22 +199,9 @@
  * voucher adopted on the calling thread. If the block object is submitted to a
  * queue, this replaces the default behavior of associating the submitted block
  * instance with the voucher adopted at the time of submission.
- * This flag is ignored if a specific voucher object is assigned with the
- * dispatch_block_create_with_voucher* functions, and is equivalent to passing
- * the NULL voucher to these functions.
+ * This flag is ignored if used with the dispatch_block_create_with_voucher*()
+ * functions.
  *
- * @const DISPATCH_BLOCK_IF_LAST_RESET_QUEUE_QOS_OVERRIDE
- * Flag indicating that this dispatch block object should try to reset the
- * recorded maximum QoS of all currently enqueued items on a serial dispatch
- * queue at the base of a queue hierarchy.
- *
- * This is only works if the queue becomes empty by dequeuing the block in
- * question, and then allows that block to enqueue more work on this hierarchy
- * without perpetuating QoS overrides resulting from items previously executed
- * on the hierarchy.
- *
- * A dispatch block object created with this flag set cannot be used with
- * dispatch_block_wait() or dispatch_block_cancel().
  */
 #define DISPATCH_BLOCK_NO_VOUCHER (0x40ul)
 
@@ -238,9 +225,7 @@
  * on with dispatch_block_wait() or observed with dispatch_block_notify().
  *
  * The returned dispatch block will be executed with the specified voucher
- * adopted for the duration of the block body. If the NULL voucher is passed,
- * the block will be executed with the voucher adopted on the calling thread, or
- * with no voucher if the DISPATCH_BLOCK_DETACHED flag was also provided.
+ * adopted for the duration of the block body.
  *
  * If the returned dispatch block object is submitted to a dispatch queue, the
  * submitted block instance will be associated with the QOS class current at the
@@ -265,11 +250,11 @@
  * @param flags
  * Configuration flags for the block object.
  * Passing a value that is not a bitwise OR of flags from dispatch_block_flags_t
- * results in NULL being returned.
+ * results in NULL being returned. The DISPATCH_BLOCK_NO_VOUCHER flag is
+ * ignored.
  *
  * @param voucher
- * A voucher object or NULL. Passing NULL is equivalent to specifying the
- * DISPATCH_BLOCK_NO_VOUCHER flag.
+ * A voucher object or NULL.
  *
  * @param block
  * The block to create the dispatch block object from.
@@ -305,9 +290,7 @@
  * on with dispatch_block_wait() or observed with dispatch_block_notify().
  *
  * The returned dispatch block will be executed with the specified voucher
- * adopted for the duration of the block body. If the NULL voucher is passed,
- * the block will be executed with the voucher adopted on the calling thread, or
- * with no voucher if the DISPATCH_BLOCK_DETACHED flag was also provided.
+ * adopted for the duration of the block body.
  *
  * If invoked directly, the returned dispatch block object will be executed with
  * the assigned QOS class as long as that does not result in a lower QOS class
@@ -330,11 +313,11 @@
  * @param flags
  * Configuration flags for the block object.
  * Passing a value that is not a bitwise OR of flags from dispatch_block_flags_t
- * results in NULL being returned.
+ * results in NULL being returned. The DISPATCH_BLOCK_NO_VOUCHER and
+ * DISPATCH_BLOCK_NO_QOS flags are ignored.
  *
  * @param voucher
- * A voucher object or NULL. Passing NULL is equivalent to specifying the
- * DISPATCH_BLOCK_NO_VOUCHER flag.
+ * A voucher object or NULL.
  *
  * @param qos_class
  * A QOS class value:
@@ -381,7 +364,7 @@
  * Deprecated, do not use, will abort process if called.
  */
 API_DEPRECATED("removed SPI", \
-		macos(10.11,10.12), ios(9.0,10.0), watchos(2.0,3.0), tvos(9.0,10.0))
+		macos(10.11,10.13), ios(9.0,11.0), watchos(2.0,4.0), tvos(9.0,11.0))
 DISPATCH_EXPORT DISPATCH_MALLOC DISPATCH_RETURNS_RETAINED DISPATCH_WARN_RESULT
 DISPATCH_NOTHROW
 dispatch_queue_t
@@ -420,6 +403,55 @@
 voucher_create_with_mach_msg(mach_msg_header_t *msg);
 
 /*!
+ * @function voucher_kvoucher_debug
+ *
+ * @abstract
+ * Writes a human-readable representation of a voucher to a memory buffer.
+ *
+ * @discussion
+ * The formatted representation of the voucher is written starting at a given
+ * offset in the buffer. If the remaining space in the buffer is too small, the
+ * output is truncated. Nothing is written before buf[offset] or at or beyond
+ * buf[bufsize].
+ *
+ * @param task
+ * The task port for the task that owns the voucher port.
+ *
+ * @param voucher
+ * The voucher port name.
+ *
+ * @param buf
+ * The buffer to which the formatted representation of the voucher should be
+ * written.
+ *
+ * @param bufsiz
+ * The size of the buffer.
+ *
+ * @param offset
+ * The offset of the first byte in the buffer to be used for output.
+ *
+ * @param prefix
+ * A string to be written at the start of each line of formatted output.
+ * Typically used to generate leading whitespace for indentation. Use NULL if
+ * no prefix is required.
+ *
+ * @param max_hex_data
+ * The maximum number of bytes of hex data to be formatted for voucher content
+ * that is not of type MACH_VOUCHER_ATTR_KEY_ATM, MACH_VOUCHER_ATTR_KEY_BANK
+ * or MACH_VOUCHER_ATTR_KEY_IMPORTANCE.
+ *
+ * @result
+ * The offset of the first byte in the buffer following the formatted voucher
+ * representation.
+ */
+API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+OS_VOUCHER_EXPORT OS_WARN_RESULT OS_NOTHROW DISPATCH_COLD
+size_t
+voucher_kvoucher_debug(mach_port_t task, mach_port_name_t voucher, char *buf,
+		   size_t bufsiz, size_t offset, char * _Nullable prefix,
+		   size_t max_hex_data) ;
+
+/*!
  * @group Voucher Persona SPI
  * SPI intended for clients that need to interact with personas.
  */
@@ -430,26 +462,24 @@
  * @function voucher_get_current_persona
  *
  * @abstract
- * Retrieve the persona identifier of the 'originator' process for the current
- * voucher.
+ * Returns the persona identifier for the current thread.
  *
  * @discussion
- * Retrieve the persona identifier of the ’originator’ process possibly stored
- * in the PERSONA_TOKEN attribute of the currently adopted voucher.
+ * Retrieve the persona identifier from the currently adopted voucher.
  *
  * If the thread has not adopted a voucher, or the current voucher does not
- * contain a PERSONA_TOKEN attribute, this function returns the persona
- * identifier of the current process.
+ * contain persona information, this function returns the persona identifier
+ * of the current process.
  *
  * If the process is not running under a persona, then this returns
  * PERSONA_ID_NONE.
  *
  * @result
- * The persona identifier of the 'originator' process for the current voucher,
+ * The persona identifier for the current voucher,
  * or the persona identifier of the current process
  * or PERSONA_ID_NONE
  */
-API_AVAILABLE(ios(9.2))
+API_AVAILABLE(macos(10.14), ios(9.2))
 OS_VOUCHER_EXPORT OS_WARN_RESULT OS_NOTHROW
 uid_t
 voucher_get_current_persona(void);
@@ -472,7 +502,7 @@
  * 0 on success: currently adopted voucher has a PERSONA_TOKEN
  * -1 on failure: persona_info is untouched/uninitialized
  */
-API_AVAILABLE(ios(9.2))
+API_AVAILABLE(macos(10.14), ios(9.2))
 OS_VOUCHER_EXPORT OS_WARN_RESULT OS_NOTHROW OS_NONNULL1
 int
 voucher_get_current_persona_originator_info(
@@ -496,12 +526,108 @@
  * 0 on success: currently adopted voucher has a PERSONA_TOKEN
  * -1 on failure: persona_info is untouched/uninitialized
  */
-API_AVAILABLE(ios(9.2))
+API_AVAILABLE(macos(10.14), ios(9.2))
 OS_VOUCHER_EXPORT OS_WARN_RESULT OS_NOTHROW OS_NONNULL1
 int
 voucher_get_current_persona_proximate_info(
 	struct proc_persona_info *persona_info);
 
+/*!
+ * @function voucher_copy_with_persona_mach_voucher
+ *
+ * @abstract
+ * Creates a copy of the currently adopted voucher and replaces its
+ * persona information with the one passed in the specified mach voucher
+ *
+ * @discussion
+ * If the specified mach voucher is not one returned from
+ * mach_voucher_persona_for_originator() (called on behalf
+ * of the current process), this function will fail
+ *
+ * @param persona_mach_voucher
+ * mach voucher containing the new persona information
+ *
+ * @result
+ * On success, a copy of the current voucher with the new
+ * persona information
+ * On failure, VOUCHER_INVALID
+ */
+API_AVAILABLE(macos(10.14), ios(12))
+OS_VOUCHER_EXPORT OS_OBJECT_RETURNS_RETAINED OS_WARN_RESULT OS_NOTHROW
+voucher_t _Nullable
+voucher_copy_with_persona_mach_voucher(
+	mach_voucher_t persona_mach_voucher);
+
+/*!
+ * @function mach_voucher_persona_self
+ *
+ * @abstract
+ * Creates a mach voucher containing the persona information of the
+ * current process that can be sent as a mach port descriptor in a message
+ *
+ * @discussion
+ * The returned mach voucher has been pre-processed so that it can be sent
+ * in a message
+ *
+ * @param persona_mach_voucher
+ * If successful, a reference to the newly created mach voucher
+ *
+ * @result
+ * KERN_SUCCESS: a mach voucher ready to be sent in a message is
+ * successfully created
+ * KERN_RESOURCE_SHORTAGE: mach voucher creation failed due to
+ * lack of free space
+ */
+API_AVAILABLE(macos(10.14), ios(12))
+OS_VOUCHER_EXPORT OS_WARN_RESULT OS_NOTHROW OS_NONNULL1
+kern_return_t
+mach_voucher_persona_self(mach_voucher_t *persona_mach_voucher);
+
+/*!
+ * @function mach_voucher_persona_for_originator
+ *
+ * @abstract
+ * Creates a mach voucher on behalf of the originator process by copying
+ * the persona information from the specified mach voucher and then
+ * updating the persona identifier to the specified value
+ *
+ * @discussion
+ * Should be called by a privileged process on behalf of the originator process.
+ * The newly created mach voucher should be returned to the originator in a
+ * message. The originator's thread can adopt the new persona by passing
+ * this mach voucher to voucher_copy_with_persona_mach_voucher().
+ *
+ * @param persona_id
+ * The new persona identifier to be set in the mach voucher
+ *
+ * @param originator_persona_mach_voucher
+ * A mach voucher received from the originator, where it was created using
+ * mach_voucher_persona_self()
+ *
+ * @param originator_unique_pid
+ * Unique pid of the originator process
+ *
+ * @param persona_mach_voucher
+ * If successful, a reference to the newly created mach voucher
+ *
+ * @result
+ * KERN_SUCCESS: a mach voucher ready to be returned to the
+ * originator was successfully created
+ * KERN_NO_ACCESS: process does not have privilege to carry
+ * out this operation
+ * KERN_INVALID_ARGUMENT: specified persona identifier is invalid
+ * KERN_INVALID_CAPABILITY: originator_unique_pid does not
+ * match the specified voucher originator's unique pid
+ * KERN_RESOURCE_SHORTAGE: mach voucher creation failed due to
+ * lack of free space
+ */
+API_AVAILABLE(macos(10.14), ios(12))
+OS_VOUCHER_EXPORT OS_WARN_RESULT OS_NOTHROW OS_NONNULL4
+kern_return_t
+mach_voucher_persona_for_originator(uid_t persona_id,
+	mach_voucher_t originator_persona_mach_voucher,
+	uint64_t originator_unique_pid, mach_voucher_t *persona_mach_voucher);
+
 #endif // __has_include(<mach/mach.h>)
 
 __END_DECLS
diff --git a/private/CMakeLists.txt b/private/CMakeLists.txt
index a2ee9bd..f77a92d 100644
--- a/private/CMakeLists.txt
+++ b/private/CMakeLists.txt
@@ -14,6 +14,8 @@
             private.h
             queue_private.h
             source_private.h
+            time_private.h
+            workloop_private.h
           DESTINATION
             "${INSTALL_DISPATCH_HEADERS_DIR}")
 endif()
diff --git a/private/data_private.h b/private/data_private.h
index a922157..5c5431a 100644
--- a/private/data_private.h
+++ b/private/data_private.h
@@ -288,7 +288,6 @@
  * A newly created dispatch data object, dispatch_data_empty if no has been
  * produced, or NULL if an error occurred.
  */
-
 API_AVAILABLE(macos(10.8), ios(6.0))
 DISPATCH_EXPORT DISPATCH_NONNULL_ALL DISPATCH_RETURNS_RETAINED
 DISPATCH_WARN_RESULT DISPATCH_NOTHROW
@@ -297,6 +296,28 @@
 	dispatch_data_format_type_t input_type,
 	dispatch_data_format_type_t output_type);
 
+/*!
+ * @function dispatch_data_get_flattened_bytes_4libxpc
+ *
+ * Similar to dispatch_data_create_map() but attaches it to the passed in
+ * dispatch data.
+ *
+ * The returned mapping, if not NULL, has the size returned by
+ * dispatch_data_get_size() for the specified object, and its lifetime is tied
+ * to the one of the dispatch data itself.
+ *
+ * @discussion
+ * This interface is reserved for XPC usage and is not considered stable ABI.
+ *
+ *
+ * @result
+ * A newly created linear mapping for this data object, may return NULL if
+ * making the dispatch data contiguous failed to allocate memory.
+ */
+API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0), bridgeos(4.0))
+const void *_Nullable
+dispatch_data_get_flattened_bytes_4libxpc(dispatch_data_t data);
+
 __END_DECLS
 
 DISPATCH_ASSUME_NONNULL_END
diff --git a/private/introspection_private.h b/private/introspection_private.h
index 972c688..137ea97 100644
--- a/private/introspection_private.h
+++ b/private/introspection_private.h
@@ -134,7 +134,6 @@
  * @field source_size
  * Size of dispatch_introspection_source_s structure.
  */
-
 API_AVAILABLE(macos(10.9), ios(7.0))
 DISPATCH_EXPORT const struct dispatch_introspection_versions_s {
 	unsigned long introspection_version;
@@ -389,7 +388,8 @@
 	unsigned long enqueued:1,
 			handler_is_block:1,
 			timer:1,
-			after:1;
+			after:1,
+			is_xpc:1;
 } dispatch_introspection_source_s;
 typedef dispatch_introspection_source_s *dispatch_introspection_source_t;
 
@@ -425,12 +425,12 @@
  * Types of items enqueued on a dispatch queue.
  */
 enum dispatch_introspection_queue_item_type {
-  dispatch_introspection_queue_item_type_none = 0x0,
-  dispatch_introspection_queue_item_type_block = 0x11,
-  dispatch_introspection_queue_item_type_function = 0x12,
-  dispatch_introspection_queue_item_type_object = 0x100,
-  dispatch_introspection_queue_item_type_queue = 0x101,
-  dispatch_introspection_queue_item_type_source = 0102,
+	dispatch_introspection_queue_item_type_none = 0x0,
+	dispatch_introspection_queue_item_type_block = 0x11,
+	dispatch_introspection_queue_item_type_function = 0x12,
+	dispatch_introspection_queue_item_type_object = 0x100,
+	dispatch_introspection_queue_item_type_queue = 0x101,
+	dispatch_introspection_queue_item_type_source = 0x42,
 };
 
 /*!
@@ -532,19 +532,92 @@
 		dispatch_continuation_t object);
 
 /*!
+ * @enum dispatch_introspection_runtime_event
+ *
+ * @abstract
+ * Types for major events the dispatch runtime goes through as sent by
+ * the runtime_event hook.
+ *
+ * @const dispatch_introspection_runtime_event_worker_event_delivery
+ * A worker thread was unparked to deliver some kernel events.
+ * There may be an unpark event if the thread will pick up a queue to drain.
+ * There always is a worker_park event when the thread is returned to the pool.
+ * `ptr` is the queue for which events are being delivered, or NULL (for generic
+ * events).
+ * `value` is the number of events delivered.
+ *
+ * @const dispatch_introspection_runtime_event_worker_unpark
+ * A worker thread junst unparked (sent from the context of the thread).
+ * `ptr` is the queue for which the thread unparked.
+ * `value` is 0.
+ *
+ * @const dispatch_introspection_runtime_event_worker_request
+ * `ptr` is set to the queue on behalf of which the thread request is made.
+ * `value` is the number of threads requested.
+ *
+ * @const dispatch_introspection_runtime_event_worker_park
+ * A worker thread is about to park (sent from the context of the thread).
+ * `ptr` and `value` are 0.
+ *
+ * @const dispatch_introspection_runtime_event_sync_wait
+ * A caller of dispatch_sync or dispatch_async_and_wait hit contention.
+ * `ptr` is the queue that caused the initial contention.
+ * `value` is 0.
+ *
+ * @const dispatch_introspection_runtime_event_async_sync_handoff
+ * @const dispatch_introspection_runtime_event_sync_sync_handoff
+ * @const dispatch_introspection_runtime_event_sync_async_handoff
+ *
+ * A queue is being handed off from a thread to another due to respectively:
+ * - async/sync contention
+ * - sync/sync contention
+ * - sync/async contention
+ *
+ * `ptr` is set to dispatch_queue_t which is handed off to the next thread.
+ * `value` is 0.
+ */
+#ifndef __DISPATCH_BUILDING_DISPATCH__
+enum dispatch_introspection_runtime_event {
+	dispatch_introspection_runtime_event_worker_event_delivery = 1,
+	dispatch_introspection_runtime_event_worker_unpark = 2,
+	dispatch_introspection_runtime_event_worker_request = 3,
+	dispatch_introspection_runtime_event_worker_park = 4,
+
+	dispatch_introspection_runtime_event_sync_wait = 10,
+	dispatch_introspection_runtime_event_async_sync_handoff = 11,
+	dispatch_introspection_runtime_event_sync_sync_handoff = 12,
+	dispatch_introspection_runtime_event_sync_async_handoff = 13,
+};
+#endif
+
+/*!
+ * @typedef dispatch_introspection_hook_runtime_event_t
+ *
+ * @abstract
+ * A function pointer called for various runtime events.
+ *
+ * @discussion
+ * The actual payloads are discussed in the documentation of the
+ * dispatch_introspection_runtime_event enum.
+ */
+typedef void (*dispatch_introspection_hook_runtime_event_t)(
+		enum dispatch_introspection_runtime_event event,
+		void *ptr, unsigned long long value);
+
+/*!
  * @typedef dispatch_introspection_hooks_s
  *
  * @abstract
  * A structure of function pointer hooks into libdispatch.
  */
-
 typedef struct dispatch_introspection_hooks_s {
 	dispatch_introspection_hook_queue_create_t queue_create;
 	dispatch_introspection_hook_queue_dispose_t queue_dispose;
 	dispatch_introspection_hook_queue_item_enqueue_t queue_item_enqueue;
 	dispatch_introspection_hook_queue_item_dequeue_t queue_item_dequeue;
 	dispatch_introspection_hook_queue_item_complete_t queue_item_complete;
-	void *_reserved[5];
+	dispatch_introspection_hook_runtime_event_t runtime_event;
+	void *_reserved[4];
 } dispatch_introspection_hooks_s;
 typedef dispatch_introspection_hooks_s *dispatch_introspection_hooks_t;
 
@@ -715,7 +788,6 @@
  * The structure is copied on input and filled with the previously installed
  * hooks on output.
  */
-
 API_AVAILABLE(macos(10.9), ios(7.0))
 DISPATCH_EXPORT void
 dispatch_introspection_hooks_install(dispatch_introspection_hooks_t hooks);
@@ -740,7 +812,6 @@
  * As a convenience, the 'enable' pointer may itself be NULL to indicate that
  * all hook callouts should be enabled.
  */
-
 extern void
 dispatch_introspection_hook_callouts_enable(
 		dispatch_introspection_hooks_t enable);
@@ -751,7 +822,6 @@
  * @abstract
  * Callout to queue creation hook that a debugger can break on.
  */
-
 extern void
 dispatch_introspection_hook_callout_queue_create(
 		dispatch_introspection_queue_t queue_info);
@@ -762,7 +832,6 @@
  * @abstract
  * Callout to queue destruction hook that a debugger can break on.
  */
-
 extern void
 dispatch_introspection_hook_callout_queue_dispose(
 		dispatch_introspection_queue_t queue_info);
@@ -773,7 +842,6 @@
  * @abstract
  * Callout to queue enqueue hook that a debugger can break on.
  */
-
 extern void
 dispatch_introspection_hook_callout_queue_item_enqueue(
 		dispatch_queue_t queue, dispatch_introspection_queue_item_t item);
@@ -784,7 +852,6 @@
  * @abstract
  * Callout to queue dequeue hook that a debugger can break on.
  */
-
 extern void
 dispatch_introspection_hook_callout_queue_item_dequeue(
 		dispatch_queue_t queue, dispatch_introspection_queue_item_t item);
@@ -795,7 +862,6 @@
  * @abstract
  * Callout to queue item complete hook that a debugger can break on.
  */
-
 extern void
 dispatch_introspection_hook_callout_queue_item_complete(
 		dispatch_continuation_t object);
diff --git a/private/layout_private.h b/private/layout_private.h
index 81bcabd..0101fc0 100644
--- a/private/layout_private.h
+++ b/private/layout_private.h
@@ -52,7 +52,6 @@
 } dispatch_queue_offsets;
 
 #if DISPATCH_LAYOUT_SPI
-
 /*!
  * @group Data Structure Layout SPI
  * SPI intended for CoreSymbolication only
@@ -65,8 +64,36 @@
 	const uint16_t dti_queue_index;
 	const uint16_t dti_voucher_index;
 	const uint16_t dti_qos_class_index;
+	/* version 3 */
+	const uint16_t dti_continuation_cache_index;
 } dispatch_tsd_indexes;
 
+#if TARGET_OS_MAC
+
+#include <malloc/malloc.h>
+
+API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+DISPATCH_EXPORT const struct dispatch_allocator_layout_s {
+	const uint16_t dal_version;
+	/* version 1 */
+	/* Pointer to the allocator metadata address, points to NULL if unused */
+	void **const dal_allocator_zone;
+	/* Magical "isa" for allocations that are on freelists */
+	void *const *const dal_deferred_free_isa;
+	/* Size of allocations made in the magazine */
+	const uint16_t dal_allocation_size;
+	/* fields used by the enumerator */
+	const uint16_t dal_magazine_size;
+	const uint16_t dal_first_allocation_offset;
+	const uint16_t dal_allocation_isa_offset;
+	/* Enumerates allocated continuations */
+	kern_return_t (*dal_enumerator)(task_t remote_task,
+			const struct dispatch_allocator_layout_s *remote_allocator_layout,
+			vm_address_t zone_address, memory_reader_t reader,
+			void (^recorder)(vm_address_t dc_address, void *dc_mem,
+					size_t size, bool *stop));
+} dispatch_allocator_layout;
+#endif // TARGET_OS_MAC
 #endif // DISPATCH_LAYOUT_SPI
 
 __END_DECLS
diff --git a/private/mach_private.h b/private/mach_private.h
index bc53223..e311aee 100644
--- a/private/mach_private.h
+++ b/private/mach_private.h
@@ -66,6 +66,9 @@
  *
  * @const DISPATCH_MACH_MESSAGE_RECEIVED
  * A message was received, it is passed in the message parameter.
+ * It is the responsibility of the client of this API to handle this and consume
+ * or dispose of the rights in the message (for example by calling
+ * mach_msg_destroy()).
  *
  * @const DISPATCH_MACH_MESSAGE_SENT
  * A message was sent, it is passed in the message parameter (so that associated
@@ -115,15 +118,15 @@
  * once during the lifetime of the channel. This event is sent only for XPC
  * channels (i.e. channels that were created by calling
  * dispatch_mach_create_4libxpc()) and only if the
- * dmxh_enable_sigterm_notification function in the XPC hooks structure is not
- * set or it returned true when it was called at channel activation time.
+ * dmxh_enable_sigterm_notification function in the XPC hooks structure returned
+ * true when it was called at channel activation time.
  *
  * @const DISPATCH_MACH_ASYNC_WAITER_DISCONNECTED
  * The channel has been disconnected by a call to dispatch_mach_reconnect() or
  * dispatch_mach_cancel(), an empty message is passed in the message parameter
  * (so that associated port rights can be disposed of). The message header will
  * contain a local port with the receive right previously allocated to receive
- * an asynchronous reply to a message previously sent to the channel. Used 
+ * an asynchronous reply to a message previously sent to the channel. Used
  * only if the channel is disconnected while waiting for a reply to a message
  * sent with dispatch_mach_send_with_result_and_async_reply_4libxpc().
  */
@@ -433,6 +436,82 @@
 dispatch_mach_cancel(dispatch_mach_t channel);
 
 /*!
+ * @function dispatch_mach_mig_demux
+ *
+ * @abstract
+ * Handles an incoming DISPATCH_MACH_MESSAGE_RECEIVED event through a series of
+ * MIG subsystem demultiplexers.
+ *
+ * @discussion
+ * This function can be used with a static array of MIG subsystems to try.
+ * If it returns true, then the dispatch mach message has been consumed as per
+ * usual MIG rules.
+ *
+ * If it returns false, then the mach message has not been touched, and
+ * consuming or disposing of the rights in the message is mandatory.
+ *
+ * It is hence possible to write a manual demuxer this way:
+ *
+ * <code>
+ * if (!dispatch_mach_mig_demux(context, subsystems, count, message)) {
+ *     mach_msg_header_t hdr = dispatch_mach_msg_get_msg(message, NULL);
+ *     switch (hdr->msgh_id) {
+ *     case ...: // manual consumption of messages
+ *         ...
+ *         break;
+ *     default:
+ *         mach_msg_destroy(hdr); // no one claimed the message, destroy it
+ *     }
+ * }
+ * </code>
+ *
+ * @param context
+ * An optional context that the MIG routines can query with
+ * dispatch_mach_mig_demux_get_context() as MIG doesn't support contexts.
+ *
+ * @param subsystems
+ * An array of mig_subsystem structs for all the demuxers to try.
+ * These are exposed by MIG in the Server header of the generated interface.
+ *
+ * @param count
+ * The number of entries in the subsystems array.
+ *
+ * @param msg
+ * The dispatch mach message to process.
+ *
+ * @returns
+ * Whether or not the dispatch mach message has been consumed.
+ * If false is returned, then it is the responsibility of the caller to consume
+ * or dispose of the received message rights.
+ */
+API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+DISPATCH_EXPORT DISPATCH_NONNULL2 DISPATCH_NONNULL4 DISPATCH_NOTHROW
+bool
+dispatch_mach_mig_demux(void *_Nullable context,
+		const struct mig_subsystem *_Nonnull const subsystems[_Nonnull],
+		size_t count, dispatch_mach_msg_t msg);
+
+/*!
+ * @function dispatch_mach_mig_demux_get_context
+ *
+ * @abstract
+ * Returns the context passed to dispatch_mach_mig_demux() from the context of
+ * a MIG routine implementation.
+ *
+ * @discussion
+ * Calling this function from another context than a MIG routine called from the
+ * context of dispatch_mach_mig_demux_get_context() is invalid and will cause
+ * your process to be terminated.
+ *
+ * @returns
+ * The context passed to the outer call to dispatch_mach_mig_demux().
+ */
+API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+DISPATCH_EXPORT DISPATCH_NOTHROW
+void *_Nullable
+dispatch_mach_mig_demux_get_context(void);
+
+/*!
  * @function dispatch_mach_send
  * Asynchronously send a message encapsulated in a dispatch mach message object
  * to the specified mach channel.
@@ -811,8 +890,9 @@
 typedef void (*_Nonnull dispatch_mach_async_reply_callback_t)(void *context,
 		dispatch_mach_reason_t reason, dispatch_mach_msg_t message);
 
-API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0))
+API_AVAILABLE(macos(10.13), ios(11.0), tvos(11.0), watchos(4.0))
 typedef const struct dispatch_mach_xpc_hooks_s {
+#define DISPATCH_MACH_XPC_MIN_HOOKS_VERSION 3
 #define DISPATCH_MACH_XPC_HOOKS_VERSION     3
 	unsigned long version;
 
@@ -838,12 +918,12 @@
 	 * Gets the queue to which a reply to a message sent using
 	 * dispatch_mach_send_with_result_and_async_reply_4libxpc() should be
 	 * delivered. The msg_context argument is the value of the do_ctxt field
-	 * of the outgoing message, as returned by dispatch_get_context(). If this
-	 * function returns NULL, the reply will be delivered to the channel queue.
-	 * This function should not make any assumptions about the thread on which
-	 * it is called and, since it may be called more than once per message, it
-	 * should execute as quickly as possible and not attempt to synchronize with
-	 * other code.
+	 * of the outgoing message, as returned by dispatch_get_context().
+	 *
+	 * This function should return a consistent result until an event is
+	 * received for this message. This function must return NULL if
+	 * dispatch_mach_send_with_result_and_async_reply_4libxpc() wasn't used to
+	 * send the message, and non NULL otherwise.
 	 */
 	dispatch_queue_t _Nullable (*_Nonnull dmxh_msg_context_reply_queue)(
 			void *_Nonnull msg_context);
@@ -870,12 +950,10 @@
 	 * returns true, a DISPATCH_MACH_SIGTERM_RECEIVED notification will be
 	 * delivered to the channel's event handler when a SIGTERM is received.
 	 */
-	bool (* _Nullable dmxh_enable_sigterm_notification)(
+	bool (*_Nonnull dmxh_enable_sigterm_notification)(
 			void *_Nullable context);
 } *dispatch_mach_xpc_hooks_t;
 
-#define DISPATCH_MACH_XPC_SUPPORTS_ASYNC_REPLIES(hooks) ((hooks)->version >= 2)
-
 /*!
  * @function dispatch_mach_hooks_install_4libxpc
  *
@@ -893,7 +971,7 @@
  * @param hooks
  * A pointer to the channel hooks structure. This must remain valid once set.
  */
-API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0))
+API_AVAILABLE(macos(10.13), ios(11.0), tvos(11.0), watchos(4.0))
 DISPATCH_EXPORT DISPATCH_NONNULL_ALL DISPATCH_NOTHROW
 void
 dispatch_mach_hooks_install_4libxpc(dispatch_mach_xpc_hooks_t hooks);
@@ -907,7 +985,7 @@
  * for each message received and for each message that was successfully sent,
  * that failed to be sent, or was not sent; as well as when a barrier block
  * has completed, or when channel connection, reconnection or cancellation has
- * taken effect. However, the handler will not be called for messages that 
+ * taken effect. However, the handler will not be called for messages that
  * were passed to the XPC hooks dmxh_direct_message_handler function if that
  * function returned true.
  *
@@ -933,7 +1011,7 @@
  * @result
  * The newly created dispatch mach channel.
  */
-API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0))
+API_AVAILABLE(macos(10.13), ios(11.0), tvos(11.0), watchos(4.0))
 DISPATCH_EXPORT DISPATCH_MALLOC DISPATCH_RETURNS_RETAINED DISPATCH_WARN_RESULT
 DISPATCH_NONNULL4 DISPATCH_NOTHROW
 dispatch_mach_t
@@ -951,7 +1029,7 @@
  * dmxh_msg_context_reply_queue function in the dispatch_mach_xpc_hooks_s
  * structure, which is called with a single argument whose value is the
  * do_ctxt field of the message argument to this function. The reply message is
- * delivered to the dmxh_async_reply_handler hook function instead of being 
+ * delivered to the dmxh_async_reply_handler hook function instead of being
  * passed to the channel event handler.
  *
  * If the dmxh_msg_context_reply_queue function is not implemented or returns
@@ -1014,7 +1092,7 @@
  * Out parameter to return the error from the immediate send attempt.
  * If a deferred send is required, returns 0. Must not be NULL.
  */
-API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0))
+API_AVAILABLE(macos(10.13), ios(11.0), tvos(11.0), watchos(4.0))
 DISPATCH_EXPORT DISPATCH_NONNULL1 DISPATCH_NONNULL2 DISPATCH_NONNULL5
 DISPATCH_NONNULL6 DISPATCH_NOTHROW
 void
@@ -1023,6 +1101,51 @@
 		dispatch_mach_send_flags_t send_flags,
 		dispatch_mach_reason_t *send_result, mach_error_t *send_error);
 
+/*!
+ * @function dispatch_mach_handoff_reply_f
+ *
+ * @abstract
+ * Inform the runtime that a given sync IPC is being handed off to a new queue
+ * hierarchy.
+ *
+ * @discussion
+ * This function can only be called from the context of an IPC handler, or from
+ * a work item created by dispatch_mach_handoff_reply_f. Calling
+ * dispatch_mach_handoff_reply_f from a different context is undefined and will
+ * cause the process to be terminated.
+ *
+ * dispatch_mach_handoff_reply_f will only take effect when the work item that
+ * issued it returns.
+ *
+ * @param queue
+ * The queue the IPC reply will be handed off to. This queue must be an
+ * immutable queue hierarchy (with all nodes created with
+ * dispatch_queue_create_with_target() for example).
+ *
+ * @param port
+ * The send once right that will be replied to.
+ */
+API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+DISPATCH_EXPORT DISPATCH_NONNULL1 DISPATCH_NONNULL4 DISPATCH_NOTHROW
+void
+dispatch_mach_handoff_reply_f(dispatch_queue_t queue, mach_port_t port,
+		void *_Nullable ctxt, dispatch_function_t func);
+
+/*!
+ * @function dispatch_mach_handoff_reply
+ *
+ * @abstract
+ * Inform the runtime that a given sync IPC is being handed off to a new queue
+ * hierarchy.
+ *
+ * @see dispatch_mach_handoff_reply_f
+ */
+API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+DISPATCH_EXPORT DISPATCH_NONNULL1 DISPATCH_NONNULL3 DISPATCH_NOTHROW
+void
+dispatch_mach_handoff_reply(dispatch_queue_t queue, mach_port_t port,
+		dispatch_block_t block);
+
 DISPATCH_ASSUME_NONNULL_END
 
 #endif // DISPATCH_MACH_SPI
diff --git a/private/private.h b/private/private.h
index 19ccccd..b87f5dc 100644
--- a/private/private.h
+++ b/private/private.h
@@ -62,6 +62,7 @@
 
 #include <dispatch/benchmark.h>
 #include <dispatch/queue_private.h>
+#include <dispatch/workloop_private.h>
 #include <dispatch/source_private.h>
 #if DISPATCH_MACH_SPI
 #include <dispatch/mach_private.h>
@@ -69,13 +70,13 @@
 #include <dispatch/data_private.h>
 #include <dispatch/io_private.h>
 #include <dispatch/layout_private.h>
+#include <dispatch/time_private.h>
 
 #undef __DISPATCH_INDIRECT__
-
 #endif /* !__DISPATCH_BUILDING_DISPATCH__ */
 
 // <rdar://problem/9627726> Check that public and private dispatch headers match
-#if DISPATCH_API_VERSION != 20170124 // Keep in sync with <dispatch/dispatch.h>
+#if DISPATCH_API_VERSION != 20180109 // Keep in sync with <dispatch/dispatch.h>
 #error "Dispatch header mismatch between /usr/include and /usr/local/include"
 #endif
 
@@ -176,13 +177,13 @@
 
 #if TARGET_OS_MAC
 #define DISPATCH_COCOA_COMPAT 1
-#elif defined(__linux__) || defined(__FreeBSD__)
+#elif defined(__linux__) || defined(__FreeBSD__) || defined(_WIN32)
 #define DISPATCH_COCOA_COMPAT 1
 #else
 #define DISPATCH_COCOA_COMPAT 0
 #endif
 
-#if DISPATCH_COCOA_COMPAT || defined(_WIN32)
+#if DISPATCH_COCOA_COMPAT
 
 #define DISPATCH_CF_SPI_VERSION 20160712
 
@@ -196,12 +197,10 @@
 #error "runloop support not implemented on this platform"
 #endif
 
-#if TARGET_OS_MAC
 API_AVAILABLE(macos(10.6), ios(4.0))
 DISPATCH_EXPORT DISPATCH_CONST DISPATCH_WARN_RESULT DISPATCH_NOTHROW
 dispatch_runloop_handle_t
 _dispatch_get_main_queue_port_4CF(void);
-#endif
 
 API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0))
 DISPATCH_EXPORT DISPATCH_NOTHROW
@@ -216,27 +215,20 @@
 API_AVAILABLE(macos(10.9), ios(7.0))
 DISPATCH_EXPORT DISPATCH_MALLOC DISPATCH_RETURNS_RETAINED DISPATCH_WARN_RESULT
 DISPATCH_NOTHROW
-dispatch_queue_t
+dispatch_queue_serial_t
 _dispatch_runloop_root_queue_create_4CF(const char *_Nullable label,
 		unsigned long flags);
 
-#if TARGET_OS_MAC || defined(_WIN32)
 API_AVAILABLE(macos(10.9), ios(7.0))
 DISPATCH_EXPORT DISPATCH_WARN_RESULT DISPATCH_NOTHROW
 dispatch_runloop_handle_t
 _dispatch_runloop_root_queue_get_port_4CF(dispatch_queue_t queue);
-#endif
 
 #if TARGET_OS_MAC
-#ifdef __BLOCKS__
-API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0))
-DISPATCH_EXPORT DISPATCH_MALLOC DISPATCH_RETURNS_RETAINED DISPATCH_WARN_RESULT
-DISPATCH_NOTHROW
-dispatch_queue_t
-_dispatch_network_root_queue_create_4NW(const char *_Nullable label,
-		const pthread_attr_t *_Nullable attrs,
-		dispatch_block_t _Nullable configure);
-#endif
+API_AVAILABLE(macos(10.13.2), ios(11.2), tvos(11.2), watchos(4.2))
+DISPATCH_EXPORT DISPATCH_WARN_RESULT DISPATCH_NOTHROW
+bool
+_dispatch_source_will_reenable_kevent_4NW(dispatch_source_t source);
 #endif
 
 API_AVAILABLE(macos(10.9), ios(7.0))
@@ -263,9 +255,9 @@
 DISPATCH_EXPORT
 void (*_Nullable _dispatch_end_NSAutoReleasePool)(void *);
 
-#endif /* DISPATCH_COCOA_COMPAT || defined(_WIN32) */
+#endif /* DISPATCH_COCOA_COMPAT */
 
-API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0))
+API_AVAILABLE(macos(10.13), ios(11.0), tvos(11.0), watchos(4.0))
 DISPATCH_EXPORT DISPATCH_NOTHROW
 void
 _dispatch_poll_for_events_4launchd(void);
diff --git a/private/queue_private.h b/private/queue_private.h
index b7ab515..60ae96e 100644
--- a/private/queue_private.h
+++ b/private/queue_private.h
@@ -47,6 +47,155 @@
 	DISPATCH_QUEUE_OVERCOMMIT = 0x2ull,
 };
 
+
+/*!
+ * @function dispatch_set_qos_class
+ *
+ * @abstract
+ * Sets the QOS class on a dispatch queue, source or mach channel.
+ *
+ * @discussion
+ * This is equivalent to using dispatch_queue_make_attr_with_qos_class()
+ * when creating a dispatch queue, but is availabile on additional dispatch
+ * object types.
+ *
+ * When configured in this manner, the specified QOS class will be used over
+ * the assigned QOS of workitems submitted asynchronously to this object,
+ * unless the workitem has been created with ENFORCE semantics
+ * (see DISPATCH_BLOCK_ENFORCE_QOS_CLASS).
+ *
+ * Calling this function will supersede any prior calls to
+ * dispatch_set_qos_class() or dispatch_set_qos_class_floor().
+ *
+ * @param object
+ * A dispatch queue, source or mach channel to configure.
+ * The object must be inactive, and can't be a workloop.
+ *
+ * Passing another object type or an object that has been activated is undefined
+ * and will cause the process to be terminated.
+ *
+ * @param qos_class
+ * A QOS class value:
+ *  - QOS_CLASS_USER_INTERACTIVE
+ *  - QOS_CLASS_USER_INITIATED
+ *  - QOS_CLASS_DEFAULT
+ *  - QOS_CLASS_UTILITY
+ *  - QOS_CLASS_BACKGROUND
+ * Passing any other value is undefined.
+ *
+ * @param relative_priority
+ * A relative priority within the QOS class. This value is a negative
+ * offset from the maximum supported scheduler priority for the given class.
+ * Passing a value greater than zero or less than QOS_MIN_RELATIVE_PRIORITY
+ * is undefined.
+ */
+API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+DISPATCH_EXPORT DISPATCH_NOTHROW
+void
+dispatch_set_qos_class(dispatch_object_t object,
+		dispatch_qos_class_t qos_class, int relative_priority);
+
+/*!
+ * @function dispatch_set_qos_class_floor
+ *
+ * @abstract
+ * Sets the QOS class floor on a dispatch queue, source, workloop or mach
+ * channel.
+ *
+ * @discussion
+ * The QOS class of workitems submitted to this object asynchronously will be
+ * elevated to at least the specified QOS class floor.
+ * Unlike dispatch_set_qos_class(), the QOS of the workitem will be used if
+ * higher than the floor even when the workitem has been created without
+ * "ENFORCE" semantics.
+ *
+ * Setting the QOS class floor is equivalent to the QOS effects of configuring
+ * a target queue whose QOS class has been set with dispatch_set_qos_class().
+ *
+ * Calling this function will supersede any prior calls to
+ * dispatch_set_qos_class() or dispatch_set_qos_class_floor().
+ *
+ * @param object
+ * A dispatch queue, workloop, source or mach channel to configure.
+ * The object must be inactive.
+ *
+ * Passing another object type or an object that has been activated is undefined
+ * and will cause the process to be terminated.
+ *
+ * @param qos_class
+ * A QOS class value:
+ *  - QOS_CLASS_USER_INTERACTIVE
+ *  - QOS_CLASS_USER_INITIATED
+ *  - QOS_CLASS_DEFAULT
+ *  - QOS_CLASS_UTILITY
+ *  - QOS_CLASS_BACKGROUND
+ * Passing any other value is undefined.
+ *
+ * @param relative_priority
+ * A relative priority within the QOS class. This value is a negative
+ * offset from the maximum supported scheduler priority for the given class.
+ * Passing a value greater than zero or less than QOS_MIN_RELATIVE_PRIORITY
+ * is undefined.
+ */
+API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+DISPATCH_EXPORT DISPATCH_NOTHROW
+void
+dispatch_set_qos_class_floor(dispatch_object_t object,
+		dispatch_qos_class_t qos_class, int relative_priority);
+
+/*!
+ * @function dispatch_set_qos_class_fallback
+ *
+ * @abstract
+ * Sets the fallback QOS class on a dispatch queue, source, workloop or mach
+ * channel.
+ *
+ * @discussion
+ * Workitems submitted asynchronously to this object that don't have an assigned
+ * QOS class will use the specified QOS class as a fallback. This interface
+ * doesn't support relative priority.
+ *
+ * Workitems without an assigned QOS are:
+ * - workitems submitted from the context of a thread opted-out of QOS,
+ * - workitems created with the DISPATCH_BLOCK_DETACHED or
+ *   DISPATCH_BLOCK_NO_QOS_CLASS flags,
+ * - XPC messages sent with xpc_connection_send_notification(),
+ * - XPC connection and dispatch source handlers.
+ *
+ * Calling both dispatch_set_qos_class_fallback() and dispatch_set_qos_class()
+ * on an object will only apply the effect of dispatch_set_qos_class().
+ *
+ * A QOS class fallback must always be at least as high as the current QOS
+ * floor for the dispatch queue hierarchy, else it is ignored.
+ *
+ * When no QOS fallback has been explicitly specified:
+ * - queues on hierarchies without a QOS class or QOS class floor have
+ *   a fallback of QOS_CLASS_DEFAULT,
+ * - queues on hierarchies with a QOS class or QOS class floor configured will
+ *   also use that QOS class as a fallback.
+ *
+ * @param object
+ * A dispatch queue, workloop, source or mach channel to configure.
+ * The object must be inactive.
+ *
+ * Passing another object type or an object that has been activated is undefined
+ * and will cause the process to be terminated.
+ *
+ * @param qos_class
+ * A QOS class value:
+ *  - QOS_CLASS_USER_INTERACTIVE
+ *  - QOS_CLASS_USER_INITIATED
+ *  - QOS_CLASS_DEFAULT
+ *  - QOS_CLASS_UTILITY
+ *  - QOS_CLASS_BACKGROUND
+ * Passing any other value is undefined.
+ */
+API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+DISPATCH_EXPORT DISPATCH_NOTHROW
+void
+dispatch_set_qos_class_fallback(dispatch_object_t object,
+		dispatch_qos_class_t qos_class);
+
 #define DISPATCH_QUEUE_FLAGS_MASK (DISPATCH_QUEUE_OVERCOMMIT)
 
 // On FreeBSD pthread_attr_t is a typedef to a pointer type
@@ -70,7 +219,7 @@
  *
  * It is recommended to not specify a target queue at all when using this
  * attribute and to use dispatch_queue_attr_make_with_qos_class() to select the
- * appropriate QoS class instead.
+ * appropriate QOS class instead.
  *
  * Queues created with this attribute cannot change target after having been
  * activated. See dispatch_set_target_queue() and dispatch_activate().
@@ -133,10 +282,11 @@
  * @param label
  * The new label for the queue.
  */
-API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0))
+API_AVAILABLE(macos(10.13), ios(11.0), tvos(11.0), watchos(4.0))
 DISPATCH_EXPORT DISPATCH_NONNULL1 DISPATCH_NOTHROW
 void
-dispatch_queue_set_label_nocopy(dispatch_queue_t queue, const char *label);
+dispatch_queue_set_label_nocopy(dispatch_queue_t queue,
+		const char * _Nullable label);
 
 /*!
  * @function dispatch_queue_set_width
@@ -174,7 +324,7 @@
 void
 dispatch_queue_set_width(dispatch_queue_t dq, long width);
 
-#ifdef __BLOCKS__
+#if defined(__BLOCKS__) && defined(__APPLE__)
 /*!
  * @function dispatch_pthread_root_queue_create
  *
@@ -229,13 +379,13 @@
  * @result
  * The newly created dispatch pthread root queue.
  */
-API_AVAILABLE(macos(10.9), ios(6.0))
+API_AVAILABLE(macos(10.9), ios(6.0)) DISPATCH_LINUX_UNAVAILABLE()
 DISPATCH_EXPORT DISPATCH_MALLOC DISPATCH_RETURNS_RETAINED DISPATCH_WARN_RESULT
 DISPATCH_NOTHROW
-dispatch_queue_t
+dispatch_queue_global_t
 dispatch_pthread_root_queue_create(const char *_Nullable label,
-	unsigned long flags, const pthread_attr_t DISPATCH_QUEUE_NULLABLE_PTHREAD_ATTR_PTR *_Nullable attr,
-	dispatch_block_t _Nullable configure);
+		unsigned long flags, const pthread_attr_t DISPATCH_QUEUE_NULLABLE_PTHREAD_ATTR_PTR *_Nullable attr,
+		dispatch_block_t _Nullable configure);
 
 /*!
  * @function dispatch_pthread_root_queue_flags_pool_size
@@ -265,8 +415,6 @@
 			(unsigned long)pool_size);
 }
 
-#endif /* __BLOCKS__ */
-
 /*!
  * @function dispatch_pthread_root_queue_copy_current
  *
@@ -279,8 +427,9 @@
  * A new reference to a pthread root queue object or NULL.
  */
 API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0))
+DISPATCH_LINUX_UNAVAILABLE()
 DISPATCH_EXPORT DISPATCH_RETURNS_RETAINED DISPATCH_WARN_RESULT DISPATCH_NOTHROW
-dispatch_queue_t _Nullable
+dispatch_queue_global_t _Nullable
 dispatch_pthread_root_queue_copy_current(void);
 
 /*!
@@ -294,6 +443,8 @@
  */
 #define DISPATCH_APPLY_CURRENT_ROOT_QUEUE ((dispatch_queue_t _Nonnull)0)
 
+#endif /* defined(__BLOCKS__) && defined(__APPLE__) */
+
 /*!
  * @function dispatch_async_enforce_qos_class_f
  *
@@ -328,7 +479,7 @@
 DISPATCH_EXPORT DISPATCH_NONNULL1 DISPATCH_NONNULL3 DISPATCH_NOTHROW
 void
 dispatch_async_enforce_qos_class_f(dispatch_queue_t queue,
-	void *_Nullable context, dispatch_function_t work);
+		void *_Nullable context, dispatch_function_t work);
 
 #ifdef __ANDROID__
 /*!
diff --git a/private/source_private.h b/private/source_private.h
index ad22e6a..6396c11 100644
--- a/private/source_private.h
+++ b/private/source_private.h
@@ -56,6 +56,19 @@
  *
  * The handle is the interval value in milliseconds or frames.
  * The mask specifies which flags from dispatch_source_timer_flags_t to apply.
+ *
+ * Starting with macOS 10.14, iOS 12, dispatch_source_set_timer()
+ * can be used on such sources, and its arguments are used as follow:
+ * - start:
+ *   must be DISPATCH_TIME_NOW or DISPATCH_TIME_FOREVER.
+ *   DISPATCH_TIME_NOW will enable the timer, and align its phase, and
+ *   DISPATCH_TIME_FOREVER will disable the timer as usual.*
+ * - interval:
+ *   its unit is in milliseconds by default, or frames if the source
+ *   was created with the DISPATCH_INTERVAL_UI_ANIMATION flag.
+ * - leeway:
+ *   per-thousands of the interval (valid values range from 0 to 1000).
+ *   If ~0ull is passed, the default leeway for the interval is used instead.
  */
 #define DISPATCH_SOURCE_TYPE_INTERVAL (&_dispatch_source_type_interval)
 API_AVAILABLE(macos(10.9), ios(7.0))
@@ -107,7 +120,7 @@
  * @discussion A dispatch source that monitors events on a network channel.
  */
 #define DISPATCH_SOURCE_TYPE_NW_CHANNEL (&_dispatch_source_type_nw_channel)
-API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0)) DISPATCH_LINUX_UNAVAILABLE()
+API_AVAILABLE(macos(10.13), ios(11.0), tvos(11.0), watchos(4.0)) DISPATCH_LINUX_UNAVAILABLE()
 DISPATCH_SOURCE_TYPE_DECL(nw_channel);
 
 __END_DECLS
@@ -223,6 +236,9 @@
  *
  * @constant DISPATCH_VFS_DESIREDDISK
  * Filesystem has exceeded the DESIREDDISK level
+ *
+ * @constant DISPATCH_VFS_FREE_SPACE_CHANGE
+ * Filesystem free space changed.
  */
 enum {
 	DISPATCH_VFS_NOTRESP = 0x0001,
@@ -238,9 +254,45 @@
 	DISPATCH_VFS_QUOTA = 0x1000,
 	DISPATCH_VFS_NEARLOWDISK = 0x2000,
 	DISPATCH_VFS_DESIREDDISK = 0x4000,
+	DISPATCH_VFS_FREE_SPACE_CHANGE = 0x8000,
 };
 
 /*!
+ * @enum dispatch_clockid_t
+ *
+ * @discussion
+ * These values can be used with DISPATCH_SOURCE_TYPE_TIMER as a "handle"
+ * to anchor the timer to a given clock which allows for various optimizations.
+ *
+ * Note that using an explicit clock will make the dispatch source "strict"
+ * like dispatch_source_set_mandatory_cancel_handler() does.
+ *
+ * @constant DISPATCH_CLOCKID_UPTIME
+ * A monotonic clock that doesn't tick while the machine is asleep.
+ * Equivalent to the CLOCK_UPTIME clock ID on BSD systems.
+ *
+ * @constant DISPATCH_CLOCKID_MONOTONIC
+ * A monotonic clock that ticks while the machine sleeps.
+ * Equivalent to POSIX CLOCK_MONOTONIC.
+ * (Note that on Linux, CLOCK_MONOTONIC isn't conformant and doesn't tick while
+ * sleeping, hence on Linux this is the same clock as CLOCK_BOOTTIME).
+ *
+ * @constant DISPATCH_CLOCKID_WALLTIME
+ * A clock equivalent to the wall clock time, as returned by gettimeofday().
+ * Equivalent to POSIX CLOCK_REALTIME.
+ *
+ * @constant DISPATCH_CLOCKID_REALTIME
+ * An alias for DISPATCH_CLOCKID_WALLTIME to match the POSIX clock of the
+ * same name.
+ */
+DISPATCH_ENUM(dispatch_clockid, uintptr_t,
+	DISPATCH_CLOCKID_UPTIME DISPATCH_ENUM_API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0)) = 1,
+	DISPATCH_CLOCKID_MONOTONIC DISPATCH_ENUM_API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0)) = 2,
+	DISPATCH_CLOCKID_WALLTIME DISPATCH_ENUM_API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0)) = 3,
+	DISPATCH_CLOCKID_REALTIME DISPATCH_ENUM_API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0)) = 3,
+);
+
+/*!
  * @enum dispatch_source_timer_flags_t
  *
  * @constant DISPATCH_TIMER_BACKGROUND
@@ -293,7 +345,7 @@
 enum {
 	DISPATCH_PROC_REAP DISPATCH_ENUM_API_DEPRECATED("unsupported flag",
 			macos(10.6,10.9), ios(4.0,7.0)) = 0x10000000,
-	DISPATCH_PROC_EXIT_STATUS DISPATCH_ENUM_API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(2.0)) = 0x04000000,
+	DISPATCH_PROC_EXIT_STATUS DISPATCH_ENUM_API_AVAILABLE(macos(10.13), ios(11.0), tvos(11.0), watchos(2.0)) = 0x04000000,
 };
 
 /*!
@@ -356,8 +408,8 @@
 	DISPATCH_MEMORYPRESSURE_PROC_LIMIT_WARN DISPATCH_ENUM_API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0)) = 0x10,
 
 	DISPATCH_MEMORYPRESSURE_PROC_LIMIT_CRITICAL DISPATCH_ENUM_API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0)) = 0x20,
-	
-	DISPATCH_MEMORYPRESSURE_MSL_STATUS DISPATCH_ENUM_API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0)) = 0xf0000000,
+
+	DISPATCH_MEMORYPRESSURE_MSL_STATUS DISPATCH_ENUM_API_AVAILABLE(macos(10.13), ios(11.0), tvos(11.0), watchos(4.0)) = 0xf0000000,
 };
 
 /*!
@@ -425,7 +477,7 @@
  * The result of passing NULL in this parameter is undefined.
  */
 #ifdef __BLOCKS__
-API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0))
+API_AVAILABLE(macos(10.13), ios(11.0), tvos(11.0), watchos(4.0))
 DISPATCH_EXPORT DISPATCH_NONNULL_ALL DISPATCH_NOTHROW
 void
 dispatch_source_set_mandatory_cancel_handler(dispatch_source_t source,
@@ -452,7 +504,7 @@
  * context of the dispatch source at the time the handler call is made.
  * The result of passing NULL in this parameter is undefined.
  */
-API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0))
+API_AVAILABLE(macos(10.13), ios(11.0), tvos(11.0), watchos(4.0))
 DISPATCH_EXPORT DISPATCH_NONNULL_ALL DISPATCH_NOTHROW
 void
 dispatch_source_set_mandatory_cancel_handler_f(dispatch_source_t source,
@@ -583,7 +635,7 @@
  * the value of the size argument. If this is less than the value of the size
  * argument, the remaining space in data will have been populated with zeroes.
  */
-API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0))
+API_AVAILABLE(macos(10.13), ios(11.0), tvos(11.0), watchos(4.0))
 DISPATCH_EXPORT DISPATCH_NONNULL_ALL DISPATCH_WARN_RESULT DISPATCH_PURE
 DISPATCH_NOTHROW
 size_t
diff --git a/private/time_private.h b/private/time_private.h
new file mode 100644
index 0000000..ae341e6
--- /dev/null
+++ b/private/time_private.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 20017 Apple Inc. All rights reserved.
+ *
+ * @APPLE_APACHE_LICENSE_HEADER_START@
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * @APPLE_APACHE_LICENSE_HEADER_END@
+ */
+
+/*
+ * IMPORTANT: This header file describes INTERNAL interfaces to libdispatch
+ * which are subject to change in future releases. Any applications relying on
+ * these interfaces WILL break.
+ */
+
+#ifndef __DISPATCH_TIME_PRIVATE__
+#define __DISPATCH_TIME_PRIVATE__
+
+#ifndef __DISPATCH_INDIRECT__
+#error "Please #include <dispatch/private.h> instead of this file directly."
+#include <dispatch/base.h> // for HeaderDoc
+#endif
+
+/*
+ * @constant DISPATCH_MONOTONICTIME_NOW
+ * A dispatch_time_t value that corresponds to the current value of the
+ * platform's monotonic clock. On Apple platforms, this clock is based on
+ * mach_continuous_time(). Use this value with the dispatch_time() function to
+ * derive a time value for a timer in monotonic time (i.e. a timer that
+ * continues to tick while the system is asleep). For example:
+ *
+ * dispatch_time_t t = dispatch_time(DISPATCH_MONOTONICTIME_NOW,5*NSEC_PER_SEC);
+ * dispatch_source_t ds = dispatch_source_create(DISPATCH_SOURCE_TYPE_TIMER,
+ *			0, 0, q);
+ * dispatch_source_set_event_handler(ds, ^{ ...  });
+ * dispatch_source_set_timer(ds, t, 10 * NSEC_PER_SEC, 0);
+ * dispatch_activate(ds);
+ */
+enum {
+	DISPATCH_MONOTONICTIME_NOW DISPATCH_ENUM_API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0)) = (1ull << 63)
+};
+
+#ifdef __APPLE__
+
+// Helper macros for up time, montonic time and wall time.
+#define _dispatch_uptime_after_nsec(t) \
+		dispatch_time(DISPATCH_TIME_NOW, (t))
+#define _dispatch_uptime_after_usec(t) \
+		dispatch_time(DISPATCH_TIME_NOW, (t) * NSEC_PER_USEC)
+#define _dispatch_uptime_after_msec(t) \
+		dispatch_time(DISPATCH_TIME_NOW, (t) * NSEC_PER_MSEC)
+#define _dispatch_uptime_after_sec(t) \
+		dispatch_time(DISPATCH_TIME_NOW, (t) * NSEC_PER_SEC)
+
+#define _dispatch_monotonictime_after_nsec(t) \
+		dispatch_time(DISPATCH_MONOTONICTIME_NOW, (t))
+#define _dispatch_monotonictime_after_usec(t) \
+		dispatch_time(DISPATCH_MONOTONICTIME_NOW, (t) * NSEC_PER_USEC)
+#define _dispatch_monotonictime_after_msec(t) \
+		dispatch_time(DISPATCH_MONOTONICTIME_NOW, (t) * NSEC_PER_MSEC)
+#define _dispatch_monotonictime_after_sec(t) \
+		dispatch_time(DISPATCH_MONOTONICTIME_NOW, (t) * NSEC_PER_SEC)
+
+#define _dispatch_walltime_after_nsec(t) \
+		dispatch_time(DISPATCH_WALLTIME_NOW, (t))
+#define _dispatch_walltime_after_usec(t) \
+		dispatch_time(DISPATCH_WALLTIME_NOW, (t) * NSEC_PER_USEC)
+#define _dispatch_walltime_after_msec(t) \
+		dispatch_time(DISPATCH_WALLTIME_NOW, (t) * NSEC_PER_MSEC)
+#define _dispatch_walltime_after_sec(t) \
+		dispatch_time(DISPATCH_WALLTIME_NOW, (t) * NSEC_PER_SEC)
+
+#endif // __APPLE__
+
+#endif
+
diff --git a/private/workloop_private.h b/private/workloop_private.h
new file mode 100644
index 0000000..73f4d7a
--- /dev/null
+++ b/private/workloop_private.h
@@ -0,0 +1,440 @@
+/*
+ * Copyright (c) 2017-2018 Apple Inc. All rights reserved.
+ *
+ * @APPLE_APACHE_LICENSE_HEADER_START@
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * @APPLE_APACHE_LICENSE_HEADER_END@
+ */
+
+/*
+ * IMPORTANT: This header file describes INTERNAL interfaces to libdispatch
+ * which are subject to change in future releases of Mac OS X. Any applications
+ * relying on these interfaces WILL break.
+ */
+
+#ifndef __DISPATCH_WORKLOOP_PRIVATE__
+#define __DISPATCH_WORKLOOP_PRIVATE__
+
+#ifndef __DISPATCH_INDIRECT__
+#error "Please #include <dispatch/private.h> instead of this file directly."
+#include <dispatch/base.h> // for HeaderDoc
+#endif
+
+/******************************************************************************\
+ *
+ * THIS FILE IS AN IN-PROGRESS INTERFACE THAT IS SUBJECT TO CHANGE
+ *
+\******************************************************************************/
+
+DISPATCH_ASSUME_NONNULL_BEGIN
+
+__BEGIN_DECLS
+
+/*!
+ * @typedef dispatch_workloop_t
+ *
+ * @abstract
+ * Dispatch workloops invoke workitems submitted to them in priority order.
+ *
+ * @discussion
+ * A dispatch workloop is a flavor of dispatch_queue_t that is a priority
+ * ordered queue (using the QOS class of the submitted workitems as the
+ * ordering).
+ *
+ * Between each workitem invocation, the workloop will evaluate whether higher
+ * priority workitems have since been submitted and execute these first.
+ *
+ * Serial queues targeting a workloop maintain FIFO execution of their
+ * workitems. However, the workloop may reorder workitems submitted to
+ * independent serial queues targeting it with respect to each other,
+ * based on their priorities.
+ *
+ * A dispatch workloop is a "subclass" of dispatch_queue_t which can be passed
+ * to all APIs accepting a dispatch queue, except for functions from the
+ * dispatch_sync() family. dispatch_async_and_wait() must be used for workloop
+ * objects. Functions from the dispatch_sync() family on queues targeting
+ * a workloop are still permitted but discouraged for performance reasons.
+ */
+#if defined(__DISPATCH_BUILDING_DISPATCH__) && !defined(__OBJC__)
+typedef struct dispatch_workloop_s *dispatch_workloop_t;
+#else
+DISPATCH_DECL_SUBCLASS(dispatch_workloop, dispatch_queue);
+#endif
+
+/*!
+ * @function dispatch_workloop_create
+ *
+ * @abstract
+ * Creates a new dispatch workloop to which workitems may be submitted.
+ *
+ * @param label
+ * A string label to attach to the workloop.
+ *
+ * @result
+ * The newly created dispatch workloop.
+ */
+API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+DISPATCH_EXPORT DISPATCH_MALLOC DISPATCH_RETURNS_RETAINED DISPATCH_WARN_RESULT
+DISPATCH_NOTHROW
+dispatch_workloop_t
+dispatch_workloop_create(const char *_Nullable label);
+
+/*!
+ * @function dispatch_workloop_create_inactive
+ *
+ * @abstract
+ * Creates a new inactive dispatch workloop that can be setup and then
+ * activated.
+ *
+ * @discussion
+ * Creating an inactive workloop allows for it to receive further configuration
+ * before it is activated, and workitems can be submitted to it.
+ *
+ * Submitting workitems to an inactive workloop is undefined and will cause the
+ * process to be terminated.
+ *
+ * @param label
+ * A string label to attach to the workloop.
+ *
+ * @result
+ * The newly created dispatch workloop.
+ */
+API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+DISPATCH_EXPORT DISPATCH_MALLOC DISPATCH_RETURNS_RETAINED DISPATCH_WARN_RESULT
+DISPATCH_NOTHROW
+dispatch_workloop_t
+dispatch_workloop_create_inactive(const char *_Nullable label);
+
+/*!
+ * @function dispatch_workloop_set_autorelease_frequency
+ *
+ * @abstract
+ * Sets the autorelease frequency of the workloop.
+ *
+ * @discussion
+ * See dispatch_queue_attr_make_with_autorelease_frequency().
+ * The default policy for a workloop is
+ * DISPATCH_AUTORELEASE_FREQUENCY_WORK_ITEM.
+ *
+ * @param workloop
+ * The dispatch workloop to modify.
+ *
+ * This workloop must be inactive, passing an activated object is undefined
+ * and will cause the process to be terminated.
+ *
+ * @param frequency
+ * The requested autorelease frequency.
+ */
+API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+DISPATCH_EXPORT DISPATCH_NONNULL_ALL DISPATCH_NOTHROW
+void
+dispatch_workloop_set_autorelease_frequency(dispatch_workloop_t workloop,
+		dispatch_autorelease_frequency_t frequency);
+
+DISPATCH_ENUM(dispatch_workloop_param_flags, uint64_t,
+	DISPATCH_WORKLOOP_NONE DISPATCH_ENUM_API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0)) = 0x0,
+	DISPATCH_WORKLOOP_FIXED_PRIORITY DISPATCH_ENUM_API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0)) = 0x1,
+);
+
+/*!
+ * @function dispatch_workloop_set_qos_class_floor
+ *
+ * @abstract
+ * Sets the QOS class floor of a workloop.
+ *
+ * @discussion
+ * See dispatch_set_qos_class_floor().
+ *
+ * This function is strictly equivalent to dispatch_set_qos_class_floor() but
+ * allows to pass extra flags.
+ *
+ * Using both dispatch_workloop_set_scheduler_priority() and
+ * dispatch_set_qos_class_floor() or dispatch_workloop_set_qos_class_floor()
+ * is undefined and will cause the process to be terminated.
+ *
+ * @param workloop
+ * The dispatch workloop to modify.
+ *
+ * This workloop must be inactive, passing an activated object is undefined
+ * and will cause the process to be terminated.
+ */
+API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+DISPATCH_EXPORT DISPATCH_NONNULL_ALL DISPATCH_NOTHROW
+void
+dispatch_workloop_set_qos_class_floor(dispatch_workloop_t workloop,
+		dispatch_qos_class_t qos, int relpri, dispatch_workloop_param_flags_t flags);
+
+/*!
+ * @function dispatch_workloop_set_scheduler_priority
+ *
+ * @abstract
+ * Sets the scheduler priority for a dispatch workloop.
+ *
+ * @discussion
+ * This sets the scheduler priority of the threads that the runtime will bring
+ * up to service this workloop.
+ *
+ * QOS propagation still functions on these workloops, but its effect on the
+ * priority of the thread brought up to service this workloop is ignored.
+ *
+ * Using both dispatch_workloop_set_scheduler_priority() and
+ * dispatch_set_qos_class_floor() or dispatch_workloop_set_qos_class_floor()
+ * is undefined and will cause the process to be terminated.
+ *
+ * @param workloop
+ * The dispatch workloop to modify.
+ *
+ * This workloop must be inactive, passing an activated object is undefined
+ * and will cause the process to be terminated.
+ */
+API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+DISPATCH_EXPORT DISPATCH_NONNULL_ALL DISPATCH_NOTHROW
+void
+dispatch_workloop_set_scheduler_priority(dispatch_workloop_t workloop,
+		int priority, dispatch_workloop_param_flags_t flags);
+
+/*!
+ * @function dispatch_workloop_set_cpupercent
+ *
+ * @abstract
+ * Sets the cpu percent and refill attributes for a dispatch workloop.
+ *
+ * @discussion
+ * This should only used if the workloop was also setup with the
+ * DISPATCH_WORKLOOP_FIXED_PRIORITY flag as a safe guard against
+ * busy loops that could starve the rest of the system forever.
+ *
+ * If DISPATCH_WORKLOOP_FIXED_PRIORITY wasn't passed, using this function is
+ * undefined and will cause the process to be terminated.
+ *
+ * @param workloop
+ * The dispatch workloop to modify.
+ *
+ * This workloop must be inactive, passing an activated object is undefined
+ * and will cause the process to be terminated.
+ */
+API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+DISPATCH_EXPORT DISPATCH_NONNULL_ALL DISPATCH_NOTHROW
+void
+dispatch_workloop_set_cpupercent(dispatch_workloop_t workloop, uint8_t percent,
+		uint32_t refillms);
+
+/*!
+ * @function dispatch_workloop_is_current()
+ *
+ * @abstract
+ * Returns whether the current thread has been made by the runtime to service
+ * this workloop.
+ *
+ * @discussion
+ * Note that when using <code>dispatch_async_and_wait(workloop, ^{ ... })</code>
+ * then <code>workloop</code> will be seen as the "current" one by the submitted
+ * workitem, but that is not the case when using dispatch_sync() on a queue
+ * targeting the workloop.
+ */
+API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+DISPATCH_EXPORT DISPATCH_NONNULL_ALL DISPATCH_NOTHROW
+bool
+dispatch_workloop_is_current(dispatch_workloop_t workloop);
+
+/*!
+ * @function dispatch_workloop_copy_current()
+ *
+ * @abstract
+ * Returns a copy of the workoop that is being serviced on the calling thread
+ * if any.
+ *
+ * @discussion
+ * If the thread is not a workqueue thread, or is not servicing a dispatch
+ * workloop, then NULL is returned.
+ *
+ * This returns a retained object that must be released with dispatch_release().
+ */
+API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+DISPATCH_EXPORT DISPATCH_RETURNS_RETAINED DISPATCH_NOTHROW
+dispatch_workloop_t _Nullable
+dispatch_workloop_copy_current(void);
+
+// Equivalent to dispatch_workloop_set_qos_class_floor(workoop, qos, 0, flags)
+API_DEPRECATED_WITH_REPLACEMENT("dispatch_workloop_set_qos_class_floor",
+		macos(10.14,10.14), ios(12.0,12.0), tvos(12.0,12.0), watchos(5.0,5.0))
+DISPATCH_EXPORT DISPATCH_NONNULL_ALL DISPATCH_NOTHROW
+void
+dispatch_workloop_set_qos_class(dispatch_workloop_t workloop,
+		dispatch_qos_class_t qos, dispatch_workloop_param_flags_t flags);
+
+API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+DISPATCH_EXPORT DISPATCH_NOTHROW
+bool
+_dispatch_workloop_should_yield_4NW(void);
+
+/*!
+ * @function dispatch_async_and_wait
+ *
+ * @abstract
+ * Submits a block for synchronous execution on a dispatch queue.
+ *
+ * @discussion
+ * Submits a workitem to a dispatch queue like dispatch_async(), however
+ * dispatch_async_and_wait() will not return until the workitem has finished.
+ *
+ * Like functions of the dispatch_sync family, dispatch_async_and_wait() is
+ * subject to dead-lock (See dispatch_sync() for details).
+ *
+ * However, dispatch_async_and_wait() differs from functions of the
+ * dispatch_sync family in two fundamental ways: how it respects queue
+ * attributes and how it chooses the execution context invoking the workitem.
+ *
+ * <b>Differences with dispatch_sync()</b>
+ *
+ * Work items submitted to a queue with dispatch_async_and_wait() observe all
+ * queue attributes of that queue when invoked (inluding autorelease frequency
+ * or QOS class).
+ *
+ * When the runtime has brought up a thread to invoke the asynchronous workitems
+ * already submitted to the specified queue, that servicing thread will also be
+ * used to execute synchronous work submitted to the queue with
+ * dispatch_async_and_wait().
+ *
+ * However, if the runtime has not brought up a thread to service the specified
+ * queue (because it has no workitems enqueued, or only synchronous workitems),
+ * then dispatch_async_and_wait() will invoke the workitem on the calling thread,
+ * similar to the behaviour of functions in the dispatch_sync family.
+ *
+ * As an exception, if the queue the work is submitted to doesn't target
+ * a global concurrent queue (for example because it targets the main queue),
+ * then the workitem will never be invoked by the thread calling
+ * dispatch_async_and_wait().
+ *
+ * In other words, dispatch_async_and_wait() is similar to submitting
+ * a dispatch_block_create()d workitem to a queue and then waiting on it, as
+ * shown in the code example below. However, dispatch_async_and_wait() is
+ * significantly more efficient when a new thread is not required to execute
+ * the workitem (as it will use the stack of the submitting thread instead of
+ * requiring heap allocations).
+ *
+ * <code>
+ *     dispatch_block_t b = dispatch_block_create(0, block);
+ *     dispatch_async(queue, b);
+ *     dispatch_block_wait(b, DISPATCH_TIME_FOREVER);
+ *     Block_release(b);
+ * </code>
+ *
+ * @param queue
+ * The target dispatch queue to which the block is submitted.
+ * The result of passing NULL in this parameter is undefined.
+ *
+ * @param block
+ * The block to be invoked on the target dispatch queue.
+ * The result of passing NULL in this parameter is undefined.
+ */
+#ifdef __BLOCKS__
+API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+DISPATCH_EXPORT DISPATCH_NONNULL_ALL DISPATCH_NOTHROW
+void
+dispatch_async_and_wait(dispatch_queue_t queue,
+		DISPATCH_NOESCAPE dispatch_block_t block);
+#endif
+
+/*!
+ * @function dispatch_async_and_wait_f
+ *
+ * @abstract
+ * Submits a function for synchronous execution on a dispatch queue.
+ *
+ * @discussion
+ * See dispatch_async_and_wait() for details.
+ *
+ * @param queue
+ * The target dispatch queue to which the function is submitted.
+ * The result of passing NULL in this parameter is undefined.
+ *
+ * @param context
+ * The application-defined context parameter to pass to the function.
+ *
+ * @param work
+ * The application-defined function to invoke on the target queue. The first
+ * parameter passed to this function is the context provided to
+ * dispatch_async_and_wait_f().
+ * The result of passing NULL in this parameter is undefined.
+ */
+API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+DISPATCH_EXPORT DISPATCH_NONNULL1 DISPATCH_NONNULL3 DISPATCH_NOTHROW
+void
+dispatch_async_and_wait_f(dispatch_queue_t queue,
+		void *_Nullable context, dispatch_function_t work);
+
+/*!
+ * @function dispatch_barrier_async_and_wait
+ *
+ * @abstract
+ * Submits a block for synchronous execution on a dispatch queue.
+ *
+ * @discussion
+ * Submits a block to a dispatch queue like dispatch_async_and_wait(), but marks
+ * that block as a barrier (relevant only on DISPATCH_QUEUE_CONCURRENT
+ * queues).
+ *
+ * @param queue
+ * The target dispatch queue to which the block is submitted.
+ * The result of passing NULL in this parameter is undefined.
+ *
+ * @param work
+ * The application-defined block to invoke on the target queue.
+ * The result of passing NULL in this parameter is undefined.
+ */
+#ifdef __BLOCKS__
+API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+DISPATCH_EXPORT DISPATCH_NONNULL_ALL DISPATCH_NOTHROW
+void
+dispatch_barrier_async_and_wait(dispatch_queue_t queue,
+		DISPATCH_NOESCAPE dispatch_block_t block);
+#endif
+
+/*!
+ * @function dispatch_barrier_async_and_wait_f
+ *
+ * @abstract
+ * Submits a function for synchronous execution on a dispatch queue.
+ *
+ * @discussion
+ * Submits a function to a dispatch queue like dispatch_async_and_wait_f(), but
+ * marks that function as a barrier (relevant only on DISPATCH_QUEUE_CONCURRENT
+ * queues).
+ *
+ * @param queue
+ * The target dispatch queue to which the function is submitted.
+ * The result of passing NULL in this parameter is undefined.
+ *
+ * @param context
+ * The application-defined context parameter to pass to the function.
+ *
+ * @param work
+ * The application-defined function to invoke on the target queue. The first
+ * parameter passed to this function is the context provided to
+ * dispatch_barrier_async_and_wait_f().
+ * The result of passing NULL in this parameter is undefined.
+ */
+API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+DISPATCH_EXPORT DISPATCH_NONNULL1 DISPATCH_NONNULL3 DISPATCH_NOTHROW
+void
+dispatch_barrier_async_and_wait_f(dispatch_queue_t queue,
+		void *_Nullable context, dispatch_function_t work);
+
+__END_DECLS
+
+DISPATCH_ASSUME_NONNULL_END
+
+#endif
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 4da1b3f..8d4ea63 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -53,6 +53,7 @@
               shims/perfmon.h
               shims/time.h
               shims/tsd.h
+              shims/yield.c
               shims/yield.h)
 
 set_target_properties(dispatch
diff --git a/src/allocator.c b/src/allocator.c
index e6ea772..6628409 100644
--- a/src/allocator.c
+++ b/src/allocator.c
@@ -41,7 +41,7 @@
 //
 // If something goes wrong here, the symptom would be a NULL dereference
 // in alloc_continuation_from_heap or _magazine when derefing the magazine ptr.
-static dispatch_heap_t _dispatch_main_heap;
+DISPATCH_GLOBAL(dispatch_heap_t _dispatch_main_heap);
 
 DISPATCH_ALWAYS_INLINE
 static void
@@ -112,11 +112,11 @@
 	unsigned int bindex = mindex % BITMAPS_PER_SUPERMAP;
 	unsigned int sindex = mindex / BITMAPS_PER_SUPERMAP;
 	dispatch_assert(&m->maps[sindex][bindex] == bitmap);
-	if (fastpath(continuation_out)) {
+	if (likely(continuation_out)) {
 		*continuation_out = continuation_address(m, sindex, bindex, index);
 	}
-	if (fastpath(supermap_out)) *supermap_out = supermap_address(m, sindex);
-	if (fastpath(bitmap_index_out)) *bitmap_index_out = bindex;
+	if (likely(supermap_out)) *supermap_out = supermap_address(m, sindex);
+	if (likely(bitmap_index_out)) *bitmap_index_out = bindex;
 }
 
 DISPATCH_ALWAYS_INLINE_NDEBUG DISPATCH_CONST
@@ -144,14 +144,14 @@
 	padded_continuation *p = (padded_continuation *)c;
 	struct dispatch_magazine_s *m = magazine_for_continuation(c);
 #if PACK_FIRST_PAGE_WITH_CONTINUATIONS
-	if (fastpath(continuation_is_in_first_page(c))) {
+	if (likely(continuation_is_in_first_page(c))) {
 		cindex = (unsigned int)(p - m->fp_conts);
 		index = cindex % CONTINUATIONS_PER_BITMAP;
 		mindex = cindex / CONTINUATIONS_PER_BITMAP;
-		if (fastpath(supermap_out)) *supermap_out = NULL;
-		if (fastpath(bitmap_index_out)) *bitmap_index_out = mindex;
-		if (fastpath(bitmap_out)) *bitmap_out = &m->fp_maps[mindex];
-		if (fastpath(index_out)) *index_out = index;
+		if (likely(supermap_out)) *supermap_out = NULL;
+		if (likely(bitmap_index_out)) *bitmap_index_out = mindex;
+		if (likely(bitmap_out)) *bitmap_out = &m->fp_maps[mindex];
+		if (likely(index_out)) *index_out = index;
 		return;
 	}
 #endif // PACK_FIRST_PAGE_WITH_CONTINUATIONS
@@ -159,10 +159,10 @@
 	sindex = cindex / (BITMAPS_PER_SUPERMAP * CONTINUATIONS_PER_BITMAP);
 	mindex = (cindex / CONTINUATIONS_PER_BITMAP) % BITMAPS_PER_SUPERMAP;
 	index = cindex % CONTINUATIONS_PER_BITMAP;
-	if (fastpath(supermap_out)) *supermap_out = &m->supermaps[sindex];
-	if (fastpath(bitmap_index_out)) *bitmap_index_out = mindex;
-	if (fastpath(bitmap_out)) *bitmap_out = &m->maps[sindex][mindex];
-	if (fastpath(index_out)) *index_out = index;
+	if (likely(supermap_out)) *supermap_out = &m->supermaps[sindex];
+	if (likely(bitmap_index_out)) *bitmap_index_out = mindex;
+	if (likely(bitmap_out)) *bitmap_out = &m->maps[sindex][mindex];
+	if (likely(index_out)) *index_out = index;
 }
 
 // Base address of page, or NULL if this page shouldn't be madvise()d
@@ -170,17 +170,17 @@
 static void *
 madvisable_page_base_for_continuation(dispatch_continuation_t c)
 {
-	if (fastpath(continuation_is_in_first_page(c))) {
+	if (likely(continuation_is_in_first_page(c))) {
 		return NULL;
 	}
 	void *page_base = (void *)((uintptr_t)c &
 			~(uintptr_t)DISPATCH_ALLOCATOR_PAGE_MASK);
 #if DISPATCH_DEBUG
 	struct dispatch_magazine_s *m = magazine_for_continuation(c);
-	if (slowpath(page_base < (void *)&m->conts)) {
+	if (unlikely(page_base < (void *)&m->conts)) {
 		DISPATCH_INTERNAL_CRASH(page_base, "madvisable continuation too low");
 	}
-	if (slowpath(page_base > (void *)&m->conts[SUPERMAPS_PER_MAGAZINE-1]
+	if (unlikely(page_base > (void *)&m->conts[SUPERMAPS_PER_MAGAZINE-1]
 			[BITMAPS_PER_SUPERMAP-1][CONTINUATIONS_PER_BITMAP-1])) {
 		DISPATCH_INTERNAL_CRASH(page_base, "madvisable continuation too high");
 	}
@@ -254,7 +254,7 @@
 	bitmap_t b;
 
 	if (exclusively == CLEAR_EXCLUSIVELY) {
-		if (slowpath((*bitmap & mask) == 0)) {
+		if (unlikely((*bitmap & mask) == 0)) {
 			DISPATCH_CLIENT_CRASH(*bitmap,
 					"Corruption: failed to clear bit exclusively");
 		}
@@ -299,12 +299,12 @@
 	// TODO: unroll if this is hot?
 	for (i = 0; i < FULL_BITMAPS_IN_FIRST_PAGE; i++) {
 		index = bitmap_set_first_unset_bit(&magazine->fp_maps[i]);
-		if (fastpath(index != NO_BITS_WERE_UNSET)) goto found;
+		if (likely(index != NO_BITS_WERE_UNSET)) goto found;
 	}
 	if (REMAINDERED_CONTINUATIONS_IN_FIRST_PAGE) {
 		index = bitmap_set_first_unset_bit_upto_index(&magazine->fp_maps[i],
 				REMAINDERED_CONTINUATIONS_IN_FIRST_PAGE - 1);
-		if (fastpath(index != NO_BITS_WERE_UNSET)) goto found;
+		if (likely(index != NO_BITS_WERE_UNSET)) goto found;
 	}
 	return NULL;
 
@@ -348,7 +348,7 @@
 	mach_vm_size_t vm_size = MAGAZINES_PER_HEAP * BYTES_PER_MAGAZINE;
 	mach_vm_offset_t vm_mask = ~MAGAZINE_MASK;
 	mach_vm_address_t vm_addr = vm_page_size;
-	while (slowpath(kr = mach_vm_map(mach_task_self(), &vm_addr, vm_size,
+	while (unlikely(kr = mach_vm_map(mach_task_self(), &vm_addr, vm_size,
 			vm_mask, VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_MEMORY_LIBDISPATCH),
 			MEMORY_OBJECT_NULL, 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
 			VM_INHERIT_DEFAULT))) {
@@ -443,7 +443,7 @@
 #if PACK_FIRST_PAGE_WITH_CONTINUATIONS
 	// First try the continuations in the first page for this CPU
 	cont = alloc_continuation_from_first_page(&(heap[cpu_number]));
-	if (fastpath(cont)) {
+	if (likely(cont)) {
 		return cont;
 	}
 #endif
@@ -460,11 +460,11 @@
 	dispatch_continuation_t cont;
 
 	for (;;) {
-		if (!fastpath(*heap)) {
+		if (unlikely(!*heap)) {
 			_dispatch_alloc_try_create_heap(heap);
 		}
 		cont = _dispatch_alloc_continuation_from_heap(*heap);
-		if (fastpath(cont)) {
+		if (likely(cont)) {
 			return cont;
 		}
 		// If we have tuned our parameters right, 99.999% of apps should
@@ -489,16 +489,16 @@
 {
 	dispatch_continuation_t cont;
 
-	if (fastpath(_dispatch_main_heap)) {
+	if (likely(_dispatch_main_heap)) {
 		// Start looking in the same page where we found a continuation
 		// last time.
 		bitmap_t *last = last_found_page();
-		if (fastpath(last)) {
+		if (likely(last)) {
 			unsigned int i;
 			for (i = 0; i < BITMAPS_PER_PAGE; i++) {
 				bitmap_t *cur = last + i;
 				unsigned int index = bitmap_set_first_unset_bit(cur);
-				if (fastpath(index != NO_BITS_WERE_UNSET)) {
+				if (likely(index != NO_BITS_WERE_UNSET)) {
 					bitmap_t *supermap;
 					unsigned int bindex;
 					get_cont_and_indices_for_bitmap_and_index(cur,
@@ -511,7 +511,7 @@
 		}
 
 		cont = _dispatch_alloc_continuation_from_heap(_dispatch_main_heap);
-		if (fastpath(cont)) {
+		if (likely(cont)) {
 			return cont;
 		}
 	}
@@ -579,14 +579,15 @@
 	bitmap_t *b, *s;
 	unsigned int b_idx, idx;
 
+	c->dc_flags = 0;
 	get_maps_and_indices_for_continuation(c, &s, &b_idx, &b, &idx);
 	bool bitmap_now_empty = bitmap_clear_bit(b, idx, CLEAR_EXCLUSIVELY);
-	if (slowpath(s)) {
+	if (unlikely(s)) {
 		(void)bitmap_clear_bit(s, b_idx, CLEAR_NONEXCLUSIVELY);
 	}
 	// We only try to madvise(2) pages outside of the first page.
 	// (Allocations in the first page do not have a supermap entry.)
-	if (slowpath(bitmap_now_empty) && slowpath(s)) {
+	if (unlikely(bitmap_now_empty && s)) {
 		return _dispatch_alloc_maybe_madvise_page(c);
 	}
 }
@@ -594,60 +595,90 @@
 #pragma mark -
 #pragma mark dispatch_alloc_init
 
-#if DISPATCH_DEBUG
+#if DISPATCH_CONTINUATION_MALLOC || DISPATCH_DEBUG
 static void
 _dispatch_alloc_init(void)
 {
 	// Double-check our math. These are all compile time checks and don't
 	// generate code.
 
-	dispatch_assert(sizeof(bitmap_t) == BYTES_PER_BITMAP);
-	dispatch_assert(sizeof(bitmap_t) == BYTES_PER_SUPERMAP);
-	dispatch_assert(sizeof(struct dispatch_magazine_header_s) ==
+	dispatch_static_assert(sizeof(bitmap_t) == BYTES_PER_BITMAP);
+	dispatch_static_assert(sizeof(bitmap_t) == BYTES_PER_SUPERMAP);
+	dispatch_static_assert(sizeof(struct dispatch_magazine_header_s) ==
 			SIZEOF_HEADER);
 
-	dispatch_assert(sizeof(struct dispatch_continuation_s) <=
+	dispatch_static_assert(sizeof(struct dispatch_continuation_s) <=
 			DISPATCH_CONTINUATION_SIZE);
 
 	// Magazines should be the right size, so they pack neatly into an array of
 	// heaps.
-	dispatch_assert(sizeof(struct dispatch_magazine_s) == BYTES_PER_MAGAZINE);
+	dispatch_static_assert(sizeof(struct dispatch_magazine_s) ==
+			BYTES_PER_MAGAZINE);
 
 	// The header and maps sizes should match what we computed.
-	dispatch_assert(SIZEOF_HEADER ==
+	dispatch_static_assert(SIZEOF_HEADER ==
 			sizeof(((struct dispatch_magazine_s *)0x0)->header));
-	dispatch_assert(SIZEOF_MAPS ==
+	dispatch_static_assert(SIZEOF_MAPS ==
 			sizeof(((struct dispatch_magazine_s *)0x0)->maps));
 
 	// The main array of continuations should start at the second page,
 	// self-aligned.
-	dispatch_assert(offsetof(struct dispatch_magazine_s, conts) %
+	dispatch_static_assert(offsetof(struct dispatch_magazine_s, conts) %
 			(CONTINUATIONS_PER_BITMAP * DISPATCH_CONTINUATION_SIZE) == 0);
-	dispatch_assert(offsetof(struct dispatch_magazine_s, conts) ==
+	dispatch_static_assert(offsetof(struct dispatch_magazine_s, conts) ==
 			DISPATCH_ALLOCATOR_PAGE_SIZE);
 
 #if PACK_FIRST_PAGE_WITH_CONTINUATIONS
 	// The continuations in the first page should actually fit within the first
 	// page.
-	dispatch_assert(offsetof(struct dispatch_magazine_s, fp_conts) <
+	dispatch_static_assert(offsetof(struct dispatch_magazine_s, fp_conts) <
 			DISPATCH_ALLOCATOR_PAGE_SIZE);
-	dispatch_assert(offsetof(struct dispatch_magazine_s, fp_conts) %
+	dispatch_static_assert(offsetof(struct dispatch_magazine_s, fp_conts) %
 			DISPATCH_CONTINUATION_SIZE == 0);
-	dispatch_assert(offsetof(struct dispatch_magazine_s, fp_conts) +
+	dispatch_static_assert(offsetof(struct dispatch_magazine_s, fp_conts) +
 			sizeof(((struct dispatch_magazine_s *)0x0)->fp_conts) ==
-					DISPATCH_ALLOCATOR_PAGE_SIZE);
+			DISPATCH_ALLOCATOR_PAGE_SIZE);
 #endif // PACK_FIRST_PAGE_WITH_CONTINUATIONS
 	// Make sure our alignment will be correct: that is, that we are correctly
 	// aligning to both.
-	dispatch_assert(ROUND_UP_TO_BITMAP_ALIGNMENT(ROUND_UP_TO_BITMAP_ALIGNMENT_AND_CONTINUATION_SIZE(1)) ==
+	dispatch_static_assert(ROUND_UP_TO_BITMAP_ALIGNMENT(ROUND_UP_TO_BITMAP_ALIGNMENT_AND_CONTINUATION_SIZE(1)) ==
 			ROUND_UP_TO_BITMAP_ALIGNMENT_AND_CONTINUATION_SIZE(1));
-	dispatch_assert(ROUND_UP_TO_CONTINUATION_SIZE(ROUND_UP_TO_BITMAP_ALIGNMENT_AND_CONTINUATION_SIZE(1)) ==
+	dispatch_static_assert(ROUND_UP_TO_CONTINUATION_SIZE(ROUND_UP_TO_BITMAP_ALIGNMENT_AND_CONTINUATION_SIZE(1)) ==
 			ROUND_UP_TO_BITMAP_ALIGNMENT_AND_CONTINUATION_SIZE(1));
 }
-#elif (DISPATCH_ALLOCATOR && DISPATCH_CONTINUATION_MALLOC) \
-		|| (DISPATCH_CONTINUATION_MALLOC && DISPATCH_USE_MALLOCZONE)
-static inline void _dispatch_alloc_init(void) {}
-#endif
+#endif // DISPATCH_CONTINUATION_MALLOC || DISPATCH_DEBUG
+
+kern_return_t
+_dispatch_allocator_enumerate(task_t remote_task,
+		const struct dispatch_allocator_layout_s *remote_dal,
+		vm_address_t zone_address, memory_reader_t reader,
+		void (^recorder)(vm_address_t, void *, size_t, bool *stop))
+{
+	const size_t heap_size = remote_dal->dal_magazine_size;
+	const size_t dc_size = remote_dal->dal_allocation_size;
+	const size_t dc_flags_offset = remote_dal->dal_allocation_isa_offset;
+	bool stop = false;
+	void *heap;
+
+	while (zone_address) {
+		// FIXME: improve this by not faulting everything and driving it through
+		//        the bitmap.
+		kern_return_t kr = reader(remote_task, zone_address, heap_size, &heap);
+		size_t offs = remote_dal->dal_first_allocation_offset;
+		if (kr) return kr;
+		while (offs < heap_size) {
+			void *isa = *(void **)(heap + offs + dc_flags_offset);
+			if (isa && isa != remote_dal->dal_deferred_free_isa) {
+				recorder(zone_address + offs, heap + offs, dc_size, &stop);
+				if (stop) return KERN_SUCCESS;
+			}
+			offs += dc_size;
+		}
+		zone_address = (vm_address_t)((dispatch_heap_t)heap)->header.dh_next;
+	}
+
+	return KERN_SUCCESS;
+}
 
 #endif // DISPATCH_ALLOCATOR
 
@@ -677,8 +708,8 @@
 _dispatch_malloc_continuation_alloc(void)
 {
 	dispatch_continuation_t dc;
-	while (!(dc = fastpath(calloc(1,
-			ROUND_UP_TO_CACHELINE_SIZE(sizeof(*dc)))))) {
+	size_t alloc_size = ROUND_UP_TO_CACHELINE_SIZE(sizeof(*dc));
+	while (unlikely(!(dc = calloc(1, alloc_size)))) {
 		_dispatch_temporary_resource_shortage();
 	}
 	return dc;
@@ -696,12 +727,10 @@
 
 #if DISPATCH_ALLOCATOR
 #if DISPATCH_CONTINUATION_MALLOC
-#if DISPATCH_USE_NANOZONE
-extern boolean_t malloc_engaged_nano(void);
-#else
+#if !DISPATCH_USE_NANOZONE
 #define malloc_engaged_nano() false
-#endif // DISPATCH_USE_NANOZONE
-static int _dispatch_use_dispatch_alloc;
+#endif // !DISPATCH_USE_NANOZONE
+DISPATCH_STATIC_GLOBAL(bool _dispatch_use_dispatch_alloc);
 #else
 #define _dispatch_use_dispatch_alloc 1
 #endif // DISPATCH_CONTINUATION_MALLOC
@@ -709,6 +738,9 @@
 
 #if (DISPATCH_ALLOCATOR && (DISPATCH_CONTINUATION_MALLOC || DISPATCH_DEBUG)) \
 		|| (DISPATCH_CONTINUATION_MALLOC && DISPATCH_USE_MALLOCZONE)
+
+DISPATCH_STATIC_GLOBAL(dispatch_once_t _dispatch_continuation_alloc_init_pred);
+
 static void
 _dispatch_continuation_alloc_init(void *ctxt DISPATCH_UNUSED)
 {
@@ -729,11 +761,11 @@
 #endif // DISPATCH_ALLOCATOR
 }
 
-static void
-_dispatch_continuation_alloc_once()
+static inline void
+_dispatch_continuation_alloc_once(void)
 {
-	static dispatch_once_t pred;
-	dispatch_once_f(&pred, NULL, _dispatch_continuation_alloc_init);
+	dispatch_once_f(&_dispatch_continuation_alloc_init_pred,
+			NULL, _dispatch_continuation_alloc_init);
 }
 #else
 static inline void _dispatch_continuation_alloc_once(void) {}
diff --git a/src/allocator_internal.h b/src/allocator_internal.h
index abe4a1d..9409048 100644
--- a/src/allocator_internal.h
+++ b/src/allocator_internal.h
@@ -28,7 +28,7 @@
 #define __DISPATCH_ALLOCATOR_INTERNAL__
 
 #ifndef DISPATCH_ALLOCATOR
-#if TARGET_OS_MAC && (defined(__LP64__) || TARGET_OS_EMBEDDED)
+#if TARGET_OS_MAC && (defined(__LP64__) || TARGET_OS_IPHONE)
 #define DISPATCH_ALLOCATOR 1
 #endif
 #endif
@@ -72,7 +72,7 @@
 #define MAGAZINES_PER_HEAP (NUM_CPU)
 
 // Do you care about compaction or performance?
-#if TARGET_OS_EMBEDDED
+#if TARGET_OS_IPHONE
 #define PACK_FIRST_PAGE_WITH_CONTINUATIONS 1
 #else
 #define PACK_FIRST_PAGE_WITH_CONTINUATIONS 0
@@ -88,7 +88,7 @@
 #define DISPATCH_ALLOCATOR_PAGE_MASK PAGE_MAX_MASK
 
 
-#if TARGET_OS_EMBEDDED
+#if TARGET_OS_IPHONE
 #define PAGES_PER_MAGAZINE 64
 #else
 #define PAGES_PER_MAGAZINE 512
@@ -97,7 +97,7 @@
 // Use the largest type your platform is comfortable doing atomic ops with.
 // TODO: rdar://11477843
 typedef unsigned long bitmap_t;
-#if defined(__LP64__)
+#if DISPATCH_SIZEOF_PTR == 8
 #define BYTES_PER_BITMAP 8
 #else
 #define BYTES_PER_BITMAP 4
@@ -147,7 +147,7 @@
 
 #define PADDING_TO_CONTINUATION_SIZE(x) (ROUND_UP_TO_CONTINUATION_SIZE(x) - (x))
 
-#if defined(__LP64__)
+#if DISPATCH_SIZEOF_PTR == 8
 #define SIZEOF_HEADER 16
 #else
 #define SIZEOF_HEADER 8
@@ -281,6 +281,16 @@
 #define DISPATCH_ALLOCATOR_SCRIBBLE ((uintptr_t)0xAFAFAFAFAFAFAFAF)
 #endif
 
+
+kern_return_t _dispatch_allocator_enumerate(task_t remote_task,
+			const struct dispatch_allocator_layout_s *remote_allocator_layout,
+			vm_address_t zone_address, memory_reader_t reader,
+			void (^recorder)(vm_address_t, void *, size_t , bool *stop));
+
 #endif // DISPATCH_ALLOCATOR
 
+#if DISPATCH_ALLOCATOR
+extern dispatch_heap_t _dispatch_main_heap;
+#endif
+
 #endif // __DISPATCH_ALLOCATOR_INTERNAL__
diff --git a/src/apply.c b/src/apply.c
index c682824..9c7d60f 100644
--- a/src/apply.c
+++ b/src/apply.c
@@ -28,9 +28,8 @@
 
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_apply_invoke2(void *ctxt, long invoke_flags)
+_dispatch_apply_invoke2(dispatch_apply_t da, long invoke_flags)
 {
-	dispatch_apply_t da = (dispatch_apply_t)ctxt;
 	size_t const iter = da->da_iterations;
 	size_t idx, done = 0;
 
@@ -40,7 +39,6 @@
 	// da_dc is only safe to access once the 'index lock' has been acquired
 	dispatch_apply_function_t const func = (void *)da->da_dc->dc_func;
 	void *const da_ctxt = da->da_dc->dc_ctxt;
-	dispatch_queue_t dq = da->da_dc->dc_data;
 
 	_dispatch_perfmon_workitem_dec(); // this unit executes many items
 
@@ -54,6 +52,7 @@
 	dispatch_thread_frame_s dtf;
 	dispatch_priority_t old_dbp = 0;
 	if (invoke_flags & DISPATCH_APPLY_INVOKE_REDIRECT) {
+		dispatch_queue_t dq = da->da_dc->dc_data;
 		_dispatch_thread_frame_push(&dtf, dq);
 		old_dbp = _dispatch_set_basepri(dq->dq_priority);
 	}
@@ -156,11 +155,12 @@
 
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_apply_f2(dispatch_queue_t dq, dispatch_apply_t da,
+_dispatch_apply_f(dispatch_queue_global_t dq, dispatch_apply_t da,
 		dispatch_function_t func)
 {
 	int32_t i = 0;
 	dispatch_continuation_t head = NULL, tail = NULL;
+	pthread_priority_t pp = _dispatch_get_priority();
 
 	// The current thread does not need a continuation
 	int32_t continuation_cnt = da->da_thr_cnt - 1;
@@ -169,9 +169,11 @@
 
 	for (i = 0; i < continuation_cnt; i++) {
 		dispatch_continuation_t next = _dispatch_continuation_alloc();
-		uintptr_t dc_flags = DISPATCH_OBJ_CONSUME_BIT;
+		uintptr_t dc_flags = DC_FLAG_CONSUME;
 
-		_dispatch_continuation_init_f(next, dq, da, func, 0, 0, dc_flags);
+		_dispatch_continuation_init_f(next, dq, da, func,
+				DISPATCH_BLOCK_HAS_PRIORITY, dc_flags);
+		next->dc_priority = pp | _PTHREAD_PRIORITY_ENFORCE_FLAG;
 		next->do_next = head;
 		head = next;
 
@@ -182,28 +184,65 @@
 
 	_dispatch_thread_event_init(&da->da_event);
 	// FIXME: dq may not be the right queue for the priority of `head`
+	_dispatch_trace_item_push_list(dq, head, tail);
 	_dispatch_root_queue_push_inline(dq, head, tail, continuation_cnt);
 	// Call the first element directly
 	_dispatch_apply_invoke_and_wait(da);
 }
 
+DISPATCH_ALWAYS_INLINE DISPATCH_WARN_RESULT
+static inline int32_t
+_dispatch_queue_try_reserve_apply_width(dispatch_queue_t dq, int32_t da_width)
+{
+	uint64_t old_state, new_state;
+	int32_t width;
+
+	if (unlikely(dq->dq_width == 1)) {
+		return 0;
+	}
+
+	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, relaxed, {
+		width = (int32_t)_dq_state_available_width(old_state);
+		if (unlikely(!width)) {
+			os_atomic_rmw_loop_give_up(return 0);
+		}
+		if (width > da_width) {
+			width = da_width;
+		}
+		new_state = old_state + (uint64_t)width * DISPATCH_QUEUE_WIDTH_INTERVAL;
+	});
+	return width;
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_queue_relinquish_width(dispatch_queue_t top_dq,
+		dispatch_queue_t stop_dq, int32_t da_width)
+{
+	uint64_t delta = (uint64_t)da_width * DISPATCH_QUEUE_WIDTH_INTERVAL;
+	dispatch_queue_t dq = top_dq;
+
+	while (dq != stop_dq) {
+		os_atomic_sub2o(dq, dq_state, delta, relaxed);
+		dq = dq->do_targetq;
+	}
+}
+
 DISPATCH_NOINLINE
 static void
 _dispatch_apply_redirect(void *ctxt)
 {
 	dispatch_apply_t da = (dispatch_apply_t)ctxt;
 	int32_t da_width = da->da_thr_cnt - 1;
-	dispatch_queue_t dq = da->da_dc->dc_data, rq = dq, tq;
+	dispatch_queue_t top_dq = da->da_dc->dc_data, dq = top_dq;
 
 	do {
-		int32_t width = _dispatch_queue_try_reserve_apply_width(rq, da_width);
+		int32_t width = _dispatch_queue_try_reserve_apply_width(dq, da_width);
 
 		if (unlikely(da_width > width)) {
 			int32_t excess = da_width - width;
-			for (tq = dq; tq != rq; tq = tq->do_targetq) {
-				_dispatch_queue_relinquish_width(tq, excess);
-			}
-			da_width -= excess;
+			_dispatch_queue_relinquish_width(top_dq, dq, excess);
+			da_width = width;
 			if (unlikely(!da_width)) {
 				return _dispatch_apply_serial(da);
 			}
@@ -215,19 +254,17 @@
 			// this continuation.
 			da->da_flags = _dispatch_queue_autorelease_frequency(dq);
 		}
-		rq = rq->do_targetq;
-	} while (unlikely(rq->do_targetq));
-	_dispatch_apply_f2(rq, da, _dispatch_apply_redirect_invoke);
-	do {
-		_dispatch_queue_relinquish_width(dq, da_width);
 		dq = dq->do_targetq;
 	} while (unlikely(dq->do_targetq));
+
+	_dispatch_apply_f(upcast(dq)._dgq, da, _dispatch_apply_redirect_invoke);
+	_dispatch_queue_relinquish_width(top_dq, dq, da_width);
 }
 
 #define DISPATCH_APPLY_MAX UINT16_MAX // must be < sqrt(SIZE_MAX)
 
 DISPATCH_ALWAYS_INLINE
-static inline dispatch_queue_t
+static inline dispatch_queue_global_t
 _dispatch_apply_root_queue(dispatch_queue_t dq)
 {
 	if (dq) {
@@ -235,8 +272,8 @@
 			dq = dq->do_targetq;
 		}
 		// if the current root queue is a pthread root queue, select it
-		if (!_dispatch_priority_qos(dq->dq_priority)) {
-			return dq;
+		if (!_dispatch_is_in_root_queues_array(dq)) {
+			return upcast(dq)._dgq;
 		}
 	}
 
@@ -247,7 +284,7 @@
 
 DISPATCH_NOINLINE
 void
-dispatch_apply_f(size_t iterations, dispatch_queue_t dq, void *ctxt,
+dispatch_apply_f(size_t iterations, dispatch_queue_t _dq, void *ctxt,
 		void (*func)(void *, size_t))
 {
 	if (unlikely(iterations == 0)) {
@@ -257,11 +294,15 @@
 			_dispatch_thread_context_find(_dispatch_apply_key);
 	size_t nested = dtctxt ? dtctxt->dtc_apply_nesting : 0;
 	dispatch_queue_t old_dq = _dispatch_queue_get_current();
+	dispatch_queue_t dq;
 
-	if (likely(dq == DISPATCH_APPLY_AUTO)) {
-		dq = _dispatch_apply_root_queue(old_dq);
+	if (likely(_dq == DISPATCH_APPLY_AUTO)) {
+		dq = _dispatch_apply_root_queue(old_dq)->_as_dq;
+	} else {
+		dq = _dq; // silence clang Nullability complaints
 	}
-	dispatch_qos_t qos = _dispatch_priority_qos(dq->dq_priority);
+	dispatch_qos_t qos = _dispatch_priority_qos(dq->dq_priority) ?:
+			_dispatch_priority_fallback_qos(dq->dq_priority);
 	if (unlikely(dq->do_targetq)) {
 		// if the queue passed-in is not a root queue, use the current QoS
 		// since the caller participates in the work anyway
@@ -294,6 +335,7 @@
 #if DISPATCH_INTROSPECTION
 	da->da_dc = _dispatch_continuation_alloc();
 	*da->da_dc = dc;
+	da->da_dc->dc_flags = DC_FLAG_ALLOCATED;
 #else
 	da->da_dc = &dc;
 #endif
@@ -312,7 +354,7 @@
 
 	dispatch_thread_frame_s dtf;
 	_dispatch_thread_frame_push(&dtf, dq);
-	_dispatch_apply_f2(dq, da, _dispatch_apply_invoke);
+	_dispatch_apply_f(upcast(dq)._dgq, da, _dispatch_apply_invoke);
 	_dispatch_thread_frame_pop(&dtf);
 }
 
@@ -324,39 +366,3 @@
 			(dispatch_apply_function_t)_dispatch_Block_invoke(work));
 }
 #endif
-
-#if 0
-#ifdef __BLOCKS__
-void
-dispatch_stride(size_t offset, size_t stride, size_t iterations,
-		dispatch_queue_t dq, void (^work)(size_t))
-{
-	dispatch_stride_f(offset, stride, iterations, dq, work,
-			(dispatch_apply_function_t)_dispatch_Block_invoke(work));
-}
-#endif
-
-DISPATCH_NOINLINE
-void
-dispatch_stride_f(size_t offset, size_t stride, size_t iterations,
-		dispatch_queue_t dq, void *ctxt, void (*func)(void *, size_t))
-{
-	if (stride == 0) {
-		stride = 1;
-	}
-	dispatch_apply(iterations / stride, queue, ^(size_t idx) {
-		size_t i = idx * stride + offset;
-		size_t stop = i + stride;
-		do {
-			func(ctxt, i++);
-		} while (i < stop);
-	});
-
-	dispatch_sync(queue, ^{
-		size_t i;
-		for (i = iterations - (iterations % stride); i < iterations; i++) {
-			func(ctxt, i + offset);
-		}
-	});
-}
-#endif
diff --git a/src/benchmark.c b/src/benchmark.c
index 49a4faa..15e9f55 100644
--- a/src/benchmark.c
+++ b/src/benchmark.c
@@ -41,7 +41,7 @@
 	register size_t cnt = bdata->count;
 	size_t i = 0;
 	uint64_t start, delta;
-#if defined(__LP64__)
+#if DISPATCH_SIZEOF_PTR == 8 && !defined(_WIN32)
 	__uint128_t lcost;
 #else
 	long double lcost;
@@ -53,12 +53,12 @@
 	dispatch_assert_zero(kr);
 #endif
 
-	start = _dispatch_absolute_time();
+	start = _dispatch_uptime();
 	do {
 		i++;
 		f(c);
 	} while (i < cnt);
-	delta = _dispatch_absolute_time() - start;
+	delta = _dispatch_uptime() - start;
 
 	lcost = delta;
 #if HAVE_MACH_ABSOLUTE_TIME
@@ -93,7 +93,7 @@
 	};
 	static dispatch_once_t pred;
 	uint64_t ns, start, delta;
-#if defined(__LP64__)
+#if DISPATCH_SIZEOF_PTR == 8 && !defined(_WIN32)
 	__uint128_t conversion, big_denom;
 #else
 	long double conversion, big_denom;
@@ -102,16 +102,16 @@
 
 	dispatch_once_f(&pred, &bdata, _dispatch_benchmark_init);
 
-	if (slowpath(count == 0)) {
+	if (unlikely(count == 0)) {
 		return 0;
 	}
 
-	start = _dispatch_absolute_time();
+	start = _dispatch_uptime();
 	do {
 		i++;
 		func(ctxt);
 	} while (i < count);
-	delta = _dispatch_absolute_time() - start;
+	delta = _dispatch_uptime() - start;
 
 	conversion = delta;
 #if HAVE_MACH_ABSOLUTE_TIME
diff --git a/src/block.cpp b/src/block.cpp
index a46b551..55f83c2 100644
--- a/src/block.cpp
+++ b/src/block.cpp
@@ -28,9 +28,7 @@
 #error Must build without C++ exceptions
 #endif
 
-extern "C" {
 #include "internal.h"
-}
 
 // NOTE: this file must not contain any atomic operations
 
@@ -68,9 +66,12 @@
 			dbpd_block(), dbpd_group(), dbpd_queue(), dbpd_thread()
 	{
 		// copy constructor, create copy with retained references
-		if (dbpd_voucher) voucher_retain(dbpd_voucher);
+		if (dbpd_voucher && dbpd_voucher != DISPATCH_NO_VOUCHER) {
+			voucher_retain(dbpd_voucher);
+		}
 		if (o.dbpd_block) {
-			dbpd_block = reinterpret_cast<dispatch_block_t>(_dispatch_Block_copy(o.dbpd_block));
+			dbpd_block = reinterpret_cast<dispatch_block_t>(
+					_dispatch_Block_copy(o.dbpd_block));
 		}
 		_dispatch_block_private_data_debug("copy from %p, block: %p from %p",
 				&o, dbpd_block, o.dbpd_block);
@@ -81,17 +82,24 @@
 	{
 		_dispatch_block_private_data_debug("destroy%s, block: %p",
 				dbpd_magic ? "" : " (stack)", dbpd_block);
+
+#if DISPATCH_INTROSPECTION
+		void *db = (char *) this - sizeof(struct Block_layout);
+		_dispatch_ktrace1(DISPATCH_QOS_TRACE_private_block_dispose, db);
+#endif /* DISPATCH_INTROSPECTION */
+
 		if (dbpd_magic != DISPATCH_BLOCK_PRIVATE_DATA_MAGIC) return;
 		if (dbpd_group) {
 			if (!dbpd_performed) dispatch_group_leave(dbpd_group);
-			((void (*)(dispatch_group_t))dispatch_release)(dbpd_group);
+			_os_object_release(dbpd_group->_as_os_obj);
 		}
 		if (dbpd_queue) {
-			((void (*)(os_mpsc_queue_t, uint16_t))
-					_os_object_release_internal_n)(dbpd_queue, 2);
+			_os_object_release_internal_n(dbpd_queue->_as_os_obj, 2);
 		}
 		if (dbpd_block) Block_release(dbpd_block);
-		if (dbpd_voucher) voucher_release(dbpd_voucher);
+		if (dbpd_voucher && dbpd_voucher != DISPATCH_NO_VOUCHER) {
+			voucher_release(dbpd_voucher);
+		}
 	}
 };
 
@@ -112,7 +120,7 @@
 // we try to reference it directly, but the linker still sees it.
 extern void DISPATCH_BLOCK_SPECIAL_INVOKE(void *)
 		__asm__(OS_STRINGIFY(__USER_LABEL_PREFIX__) "___dispatch_block_create_block_invoke");
-void (*_dispatch_block_special_invoke)(void*) = DISPATCH_BLOCK_SPECIAL_INVOKE;
+void (*const _dispatch_block_special_invoke)(void*) = DISPATCH_BLOCK_SPECIAL_INVOKE;
 }
 
 #endif // __BLOCKS__
diff --git a/src/data.c b/src/data.c
index 3efab2f..0a3cb1a 100644
--- a/src/data.c
+++ b/src/data.c
@@ -51,7 +51,7 @@
  *
  *   Such objects are created when used as an NSData and -bytes is called and
  *   where the dispatch data object is an unflattened composite object.
- *   The underlying implementation is _dispatch_data_get_flattened_bytes
+ *   The underlying implementation is dispatch_data_get_flattened_bytes_4libxpc.
  *
  * TRIVIAL SUBRANGES (num_records == 1, buf == nil, destructor == nil)
  *
@@ -118,8 +118,7 @@
 	data = _dispatch_object_alloc(DISPATCH_DATA_CLASS, size);
 	data->num_records = n;
 #if !DISPATCH_DATA_IS_BRIDGED_TO_NSDATA
-	data->do_targetq = dispatch_get_global_queue(
-			DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
+	data->do_targetq = _dispatch_get_default_queue(false);
 	data->do_next = DISPATCH_OBJECT_LISTLESS;
 #endif
 	return data;
@@ -143,8 +142,7 @@
 #endif
 	} else {
 		if (!queue) {
-			queue = dispatch_get_global_queue(
-					DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
+			queue = _dispatch_get_default_queue(false);
 		}
 		dispatch_async_f(queue, destructor, _dispatch_call_block_and_release);
 	}
@@ -200,7 +198,7 @@
 		// The default destructor was provided, indicating the data should be
 		// copied.
 		data_buf = malloc(size);
-		if (slowpath(!data_buf)) {
+		if (unlikely(!data_buf)) {
 			return DISPATCH_OUT_OF_MEMORY;
 		}
 		buffer = memcpy(data_buf, buffer, size);
@@ -242,7 +240,7 @@
 	dispatch_data_t data = dispatch_data_empty;
 	void *buffer = NULL;
 
-	if (slowpath(!size)) {
+	if (unlikely(!size)) {
 		goto out;
 	}
 	data = _dispatch_data_alloc(0, size);
@@ -271,17 +269,16 @@
 	}
 }
 
+#if DISPATCH_DATA_IS_BRIDGED_TO_NSDATA
 void
 _dispatch_data_set_target_queue(dispatch_data_t dd, dispatch_queue_t tq)
 {
-#if DISPATCH_DATA_IS_BRIDGED_TO_NSDATA
-	_dispatch_retain(tq);
-	tq = os_atomic_xchg2o(dd, do_targetq, tq, release);
-	if (tq) _dispatch_release(tq);
-#else
+	if (tq == DISPATCH_TARGET_QUEUE_DEFAULT) {
+		tq = _dispatch_get_default_queue(false);
+	}
 	_dispatch_object_set_target_queue_inline(dd, tq);
-#endif
 }
+#endif // DISPATCH_DATA_IS_BRIDGED_TO_NSDATA
 
 size_t
 _dispatch_data_debug(dispatch_data_t dd, char* buf, size_t bufsiz)
@@ -405,7 +402,7 @@
 	}
 
 	// Crashing here indicates memory corruption of passed in data object
-	if (slowpath(i >= dd_num_records)) {
+	if (unlikely(i >= dd_num_records)) {
 		DISPATCH_INTERNAL_CRASH(i,
 				"dispatch_data_create_subrange out of bounds");
 	}
@@ -435,7 +432,7 @@
 			last_length -= record_length;
 
 			// Crashing here indicates memory corruption of passed in data object
-			if (slowpath(i + count >= dd_num_records)) {
+			if (unlikely(i + count >= dd_num_records)) {
 				DISPATCH_INTERNAL_CRASH(i + count,
 						"dispatch_data_create_subrange out of bounds");
 			}
@@ -502,7 +499,7 @@
 	}
 
 	buffer = _dispatch_data_flatten(dd);
-	if (fastpath(buffer)) {
+	if (likely(buffer)) {
 		data = dispatch_data_create(buffer, size, NULL,
 				DISPATCH_DATA_DESTRUCTOR_FREE);
 	} else {
@@ -520,12 +517,12 @@
 }
 
 const void *
-_dispatch_data_get_flattened_bytes(dispatch_data_t dd)
+dispatch_data_get_flattened_bytes_4libxpc(dispatch_data_t dd)
 {
 	const void *buffer;
 	size_t offset = 0;
 
-	if (slowpath(!dd->size)) {
+	if (unlikely(!dd->size)) {
 		return NULL;
 	}
 
@@ -535,9 +532,9 @@
 	}
 
 	void *flatbuf = _dispatch_data_flatten(dd);
-	if (fastpath(flatbuf)) {
+	if (likely(flatbuf)) {
 		// we need a release so that readers see the content of the buffer
-		if (slowpath(!os_atomic_cmpxchgv2o(dd, buf, NULL, flatbuf,
+		if (unlikely(!os_atomic_cmpxchgv2o(dd, buf, NULL, flatbuf,
 				&buffer, release))) {
 			free(flatbuf);
 		} else {
diff --git a/src/data.m b/src/data.m
index 1d024ff..2a95d28 100644
--- a/src/data.m
+++ b/src/data.m
@@ -122,7 +122,7 @@
 	_dispatch_data_debug(self, buf, sizeof(buf));
 	NSString *format = [nsstring stringWithUTF8String:"<%s: %s>"];
 	if (!format) return nil;
-	return [nsstring stringWithFormat:format, class_getName([self class]), buf];
+	return [nsstring stringWithFormat:format, object_getClassName(self), buf];
 }
 
 - (NSUInteger)length {
@@ -131,8 +131,7 @@
 }
 
 - (const void *)bytes {
-	struct dispatch_data_s *dd = (void*)self;
-	return _dispatch_data_get_flattened_bytes(dd);
+	return dispatch_data_get_flattened_bytes_4libxpc(self);
 }
 
 - (BOOL)_isCompact {
diff --git a/src/data_internal.h b/src/data_internal.h
index 19fc3d9..1589a79 100644
--- a/src/data_internal.h
+++ b/src/data_internal.h
@@ -51,7 +51,7 @@
 #define DISPATCH_DATA_CLASS DISPATCH_VTABLE(data)
 #define DISPATCH_DATA_EMPTY_CLASS DISPATCH_VTABLE(data_empty)
 #else
-DISPATCH_CLASS_DECL(data);
+DISPATCH_CLASS_DECL(data, OBJECT);
 #define DISPATCH_DATA_CLASS DISPATCH_VTABLE(data)
 #endif // DISPATCH_DATA_IS_BRIDGED_TO_NSDATA
 
@@ -103,10 +103,12 @@
 void _dispatch_data_init_with_bytes(dispatch_data_t data, const void *buffer,
 		size_t size, dispatch_block_t destructor);
 void _dispatch_data_dispose(dispatch_data_t data, bool *allow_free);
+#if DISPATCH_DATA_IS_BRIDGED_TO_NSDATA
 void _dispatch_data_set_target_queue(struct dispatch_data_s *dd,
 		dispatch_queue_t tq);
+#endif
+DISPATCH_COLD
 size_t _dispatch_data_debug(dispatch_data_t data, char* buf, size_t bufsiz);
-const void* _dispatch_data_get_flattened_bytes(struct dispatch_data_s *dd);
 
 #if !defined(__cplusplus)
 extern const dispatch_block_t _dispatch_data_destructor_inline;
@@ -127,13 +129,13 @@
 	const void *buffer = NULL;
 
 	dispatch_assert(dd->size);
-	if (slowpath(!_dispatch_data_leaf(dd)) &&
+	if (unlikely(!_dispatch_data_leaf(dd)) &&
 			_dispatch_data_num_records(dd) == 1) {
 		offset += dd->records[0].from;
 		dd = (struct dispatch_data_s *)dd->records[0].data_object;
 	}
 
-	if (fastpath(_dispatch_data_leaf(dd))) {
+	if (likely(_dispatch_data_leaf(dd))) {
 		buffer = dd->buf + offset;
 	} else {
 		buffer = os_atomic_load((void **)&dd->buf, relaxed);
diff --git a/src/event/event.c b/src/event/event.c
index 49ce750..c59d3a7 100644
--- a/src/event/event.c
+++ b/src/event/event.c
@@ -20,6 +20,13 @@
 
 #include "internal.h"
 
+#pragma mark unote generic functions
+
+static void _dispatch_timer_unote_register(dispatch_timer_source_refs_t dt,
+		dispatch_wlh_t wlh, dispatch_priority_t pri);
+static void _dispatch_timer_unote_resume(dispatch_timer_source_refs_t dt);
+static void _dispatch_timer_unote_unregister(dispatch_timer_source_refs_t dt);
+
 DISPATCH_NOINLINE
 static dispatch_unote_t
 _dispatch_unote_create(dispatch_source_type_t dst,
@@ -32,14 +39,11 @@
 		return DISPATCH_UNOTE_NULL;
 	}
 
-	if (dst->dst_filter != DISPATCH_EVFILT_TIMER) {
-		if (dst->dst_mask && !mask) {
-			return DISPATCH_UNOTE_NULL;
-		}
+	if (dst->dst_mask && !mask) {
+		return DISPATCH_UNOTE_NULL;
 	}
 
-	if ((dst->dst_flags & EV_UDATA_SPECIFIC) ||
-			(dst->dst_filter == DISPATCH_EVFILT_TIMER)) {
+	if (dst->dst_flags & EV_UDATA_SPECIFIC) {
 		du = _dispatch_calloc(1u, dst->dst_size);
 	} else {
 		dul = _dispatch_calloc(1u, sizeof(*dul) + dst->dst_size);
@@ -53,7 +57,6 @@
 	if (dst->dst_flags & EV_UDATA_SPECIFIC) {
 		du->du_is_direct = true;
 	}
-	du->du_data_action = DISPATCH_UNOTE_ACTION_DATA_OR;
 	return (dispatch_unote_t){ ._du = du };
 }
 
@@ -78,13 +81,7 @@
 		return DISPATCH_UNOTE_NULL;
 	}
 #endif
-	dispatch_unote_t du = _dispatch_unote_create(dst, handle, mask);
-	if (du._du) {
-		int16_t filter = dst->dst_filter;
-		du._du->du_data_action = (filter == EVFILT_READ||filter == EVFILT_WRITE)
-			? DISPATCH_UNOTE_ACTION_DATA_SET : DISPATCH_UNOTE_ACTION_DATA_OR;
-	}
-	return du;
+	return _dispatch_unote_create(dst, handle, mask);
 }
 
 DISPATCH_NOINLINE
@@ -123,6 +120,88 @@
 	free(ptr);
 }
 
+bool
+_dispatch_unote_register(dispatch_unote_t du, dispatch_wlh_t wlh,
+		dispatch_priority_t pri)
+{
+	dispatch_assert(du._du->du_is_timer || !_dispatch_unote_registered(du));
+	dispatch_priority_t masked_pri;
+
+	masked_pri = pri & (DISPATCH_PRIORITY_FLAG_MANAGER |
+			DISPATCH_PRIORITY_FLAG_FALLBACK |
+			DISPATCH_PRIORITY_FLAG_FLOOR |
+			DISPATCH_PRIORITY_FALLBACK_QOS_MASK |
+			DISPATCH_PRIORITY_REQUESTED_MASK);
+
+	dispatch_assert(wlh == DISPATCH_WLH_ANON || masked_pri);
+	if (masked_pri == _dispatch_priority_make_fallback(DISPATCH_QOS_DEFAULT)) {
+		_dispatch_ktrace1(DISPATCH_PERF_source_registration_without_qos,
+				_dispatch_wref2ptr(du._du->du_owner_wref));
+	}
+
+	du._du->du_priority = pri;
+
+	switch (du._du->du_filter) {
+	case DISPATCH_EVFILT_CUSTOM_ADD:
+	case DISPATCH_EVFILT_CUSTOM_OR:
+	case DISPATCH_EVFILT_CUSTOM_REPLACE:
+		_dispatch_unote_state_set(du, DISPATCH_WLH_ANON, DU_STATE_ARMED);
+		return true;
+	}
+	if (du._du->du_is_timer) {
+		_dispatch_timer_unote_register(du._dt, wlh, pri);
+		return true;
+	}
+#if DISPATCH_HAVE_DIRECT_KNOTES
+	if (du._du->du_is_direct) {
+		return _dispatch_unote_register_direct(du, wlh);
+	}
+#endif
+	return _dispatch_unote_register_muxed(du);
+}
+
+void
+_dispatch_unote_resume(dispatch_unote_t du)
+{
+	dispatch_assert(du._du->du_is_timer || _dispatch_unote_needs_rearm(du));
+	if (du._du->du_is_timer) {
+		_dispatch_timer_unote_resume(du._dt);
+#if DISPATCH_HAVE_DIRECT_KNOTES
+	} else if (du._du->du_is_direct) {
+		_dispatch_unote_resume_direct(du);
+#endif
+	} else {
+		_dispatch_unote_resume_muxed(du);
+	}
+}
+
+bool
+_dispatch_unote_unregister(dispatch_unote_t du, uint32_t flags)
+{
+	if (!_dispatch_unote_registered(du)) {
+		return true;
+	}
+	switch (du._du->du_filter) {
+	case DISPATCH_EVFILT_CUSTOM_ADD:
+	case DISPATCH_EVFILT_CUSTOM_OR:
+	case DISPATCH_EVFILT_CUSTOM_REPLACE:
+		_dispatch_unote_state_set(du, DU_STATE_UNREGISTERED);
+		return true;
+	}
+	if (du._du->du_is_timer) {
+		_dispatch_timer_unote_unregister(du._dt);
+		return true;
+	}
+#if DISPATCH_HAVE_DIRECT_KNOTES
+	if (du._du->du_is_direct) {
+		return _dispatch_unote_unregister_direct(du, flags);
+	}
+#endif
+
+	dispatch_assert(flags & DUU_DELETE_ACK);
+	return _dispatch_unote_unregister_muxed(du);
+}
+
 #pragma mark data or / add
 
 static dispatch_unote_t
@@ -146,7 +225,9 @@
 	.dst_kind       = "data-add",
 	.dst_filter     = DISPATCH_EVFILT_CUSTOM_ADD,
 	.dst_flags      = EV_UDATA_SPECIFIC|EV_CLEAR,
+	.dst_action     = DISPATCH_UNOTE_ACTION_PASS_DATA,
 	.dst_size       = sizeof(struct dispatch_source_refs_s),
+	.dst_strict     = false,
 
 	.dst_create     = _dispatch_source_data_create,
 	.dst_merge_evt  = NULL,
@@ -156,7 +237,9 @@
 	.dst_kind       = "data-or",
 	.dst_filter     = DISPATCH_EVFILT_CUSTOM_OR,
 	.dst_flags      = EV_UDATA_SPECIFIC|EV_CLEAR,
+	.dst_action     = DISPATCH_UNOTE_ACTION_PASS_DATA,
 	.dst_size       = sizeof(struct dispatch_source_refs_s),
+	.dst_strict     = false,
 
 	.dst_create     = _dispatch_source_data_create,
 	.dst_merge_evt  = NULL,
@@ -166,7 +249,9 @@
 	.dst_kind       = "data-replace",
 	.dst_filter     = DISPATCH_EVFILT_CUSTOM_REPLACE,
 	.dst_flags      = EV_UDATA_SPECIFIC|EV_CLEAR,
+	.dst_action     = DISPATCH_UNOTE_ACTION_PASS_DATA,
 	.dst_size       = sizeof(struct dispatch_source_refs_s),
+	.dst_strict     = false,
 
 	.dst_create     = _dispatch_source_data_create,
 	.dst_merge_evt  = NULL,
@@ -184,7 +269,9 @@
 #endif
 	.dst_data       = 1,
 #endif // DISPATCH_EVENT_BACKEND_KEVENT
+	.dst_action     = DISPATCH_UNOTE_ACTION_SOURCE_SET_DATA,
 	.dst_size       = sizeof(struct dispatch_source_refs_s),
+	.dst_strict     = false,
 
 	.dst_create     = _dispatch_unote_create_with_fd,
 	.dst_merge_evt  = _dispatch_source_merge_evt,
@@ -200,7 +287,9 @@
 #endif
 	.dst_data       = 1,
 #endif // DISPATCH_EVENT_BACKEND_KEVENT
+	.dst_action     = DISPATCH_UNOTE_ACTION_SOURCE_SET_DATA,
 	.dst_size       = sizeof(struct dispatch_source_refs_s),
+	.dst_strict     = false,
 
 	.dst_create     = _dispatch_unote_create_with_fd,
 	.dst_merge_evt  = _dispatch_source_merge_evt,
@@ -215,113 +304,943 @@
 	if (handle >= NSIG) {
 		return DISPATCH_UNOTE_NULL;
 	}
-	dispatch_unote_t du = _dispatch_unote_create_with_handle(dst, handle, mask);
-	if (du._du) {
-		du._du->du_data_action = DISPATCH_UNOTE_ACTION_DATA_ADD;
-	}
-	return du;
+	return _dispatch_unote_create_with_handle(dst, handle, mask);
 }
 
 const dispatch_source_type_s _dispatch_source_type_signal = {
 	.dst_kind       = "signal",
 	.dst_filter     = EVFILT_SIGNAL,
 	.dst_flags      = DISPATCH_EV_DIRECT|EV_CLEAR,
+	.dst_action     = DISPATCH_UNOTE_ACTION_SOURCE_ADD_DATA,
 	.dst_size       = sizeof(struct dispatch_source_refs_s),
+	.dst_strict     = false,
 
 	.dst_create     = _dispatch_source_signal_create,
 	.dst_merge_evt  = _dispatch_source_merge_evt,
 };
 
-#pragma mark timers
+#pragma mark -
+#pragma mark timer globals
 
-bool _dispatch_timers_reconfigure, _dispatch_timers_expired;
-uint32_t _dispatch_timers_processing_mask;
+DISPATCH_GLOBAL(struct dispatch_timer_heap_s
+_dispatch_timers_heap[DISPATCH_TIMER_COUNT]);
+
 #if DISPATCH_USE_DTRACE
-uint32_t _dispatch_timers_will_wake;
+DISPATCH_STATIC_GLOBAL(dispatch_timer_source_refs_t
+_dispatch_trace_next_timer[DISPATCH_TIMER_QOS_COUNT]);
+#define _dispatch_trace_next_timer_set(x, q) \
+		_dispatch_trace_next_timer[(q)] = (x)
+#define _dispatch_trace_next_timer_program(d, q) \
+		_dispatch_trace_timer_program(_dispatch_trace_next_timer[(q)], (d))
+#else
+#define _dispatch_trace_next_timer_set(x, q)
+#define _dispatch_trace_next_timer_program(d, q)
 #endif
-#define DISPATCH_TIMER_HEAP_INITIALIZER(tidx) \
-	[tidx] = { \
-		.dth_target = UINT64_MAX, \
-		.dth_deadline = UINT64_MAX, \
-	}
-#define DISPATCH_TIMER_HEAP_INIT(kind, qos) \
-		DISPATCH_TIMER_HEAP_INITIALIZER(DISPATCH_TIMER_INDEX( \
-		DISPATCH_CLOCK_##kind, DISPATCH_TIMER_QOS_##qos))
 
-struct dispatch_timer_heap_s _dispatch_timers_heap[] =  {
-	DISPATCH_TIMER_HEAP_INIT(WALL, NORMAL),
-	DISPATCH_TIMER_HEAP_INIT(MACH, NORMAL),
+#pragma mark timer heap
+/*
+ * The dispatch_timer_heap_t structure is a double min-heap of timers,
+ * interleaving the by-target min-heap in the even slots, and the by-deadline
+ * in the odd ones.
+ *
+ * The min element of these is held inline in the dispatch_timer_heap_t
+ * structure, and further entries are held in segments.
+ *
+ * dth_segments is the number of allocated segments.
+ *
+ * Segment 0 has a size of `DISPATCH_HEAP_INIT_SEGMENT_CAPACITY` pointers
+ * Segment k has a size of (DISPATCH_HEAP_INIT_SEGMENT_CAPACITY << (k - 1))
+ *
+ * Segment n (dth_segments - 1) is the last segment and points its final n
+ * entries to previous segments. Its address is held in the `dth_heap` field.
+ *
+ * segment n   [ regular timer pointers | n-1 | k | 0 ]
+ *                                         |    |   |
+ * segment n-1 <---------------------------'    |   |
+ * segment k   <--------------------------------'   |
+ * segment 0   <------------------------------------'
+ */
+#define DISPATCH_HEAP_INIT_SEGMENT_CAPACITY 8u
+
+/*
+ * There are two min-heaps stored interleaved in a single array,
+ * even indices are for the by-target min-heap, and odd indices for
+ * the by-deadline one.
+ */
+#define DTH_HEAP_ID_MASK (DTH_ID_COUNT - 1)
+#define DTH_HEAP_ID(idx) ((idx) & DTH_HEAP_ID_MASK)
+#define DTH_IDX_FOR_HEAP_ID(idx, heap_id) \
+		(((idx) & ~DTH_HEAP_ID_MASK) | (heap_id))
+
+DISPATCH_ALWAYS_INLINE
+static inline uint32_t
+_dispatch_timer_heap_capacity(uint32_t segments)
+{
+	if (segments == 0) return 2;
+	uint32_t seg_no = segments - 1;
+	// for C = DISPATCH_HEAP_INIT_SEGMENT_CAPACITY,
+	// 2 + C + SUM(C << (i-1), i = 1..seg_no) - seg_no
+	return 2 + (DISPATCH_HEAP_INIT_SEGMENT_CAPACITY << seg_no) - seg_no;
+}
+
+static void
+_dispatch_timer_heap_grow(dispatch_timer_heap_t dth)
+{
+	uint32_t seg_capacity = DISPATCH_HEAP_INIT_SEGMENT_CAPACITY;
+	uint32_t seg_no = dth->dth_segments++;
+	void **heap, **heap_prev = dth->dth_heap;
+
+	if (seg_no > 0) {
+		seg_capacity <<= (seg_no - 1);
+	}
+	heap = _dispatch_calloc(seg_capacity, sizeof(void *));
+	if (seg_no > 1) {
+		uint32_t prev_seg_no = seg_no - 1;
+		uint32_t prev_seg_capacity = seg_capacity >> 1;
+		memcpy(&heap[seg_capacity - prev_seg_no],
+				&heap_prev[prev_seg_capacity - prev_seg_no],
+				prev_seg_no * sizeof(void *));
+	}
+	if (seg_no > 0) {
+		heap[seg_capacity - seg_no] = heap_prev;
+	}
+	dth->dth_heap = heap;
+}
+
+static void
+_dispatch_timer_heap_shrink(dispatch_timer_heap_t dth)
+{
+	uint32_t seg_capacity = DISPATCH_HEAP_INIT_SEGMENT_CAPACITY;
+	uint32_t seg_no = --dth->dth_segments;
+	void **heap = dth->dth_heap, **heap_prev = NULL;
+
+	if (seg_no > 0) {
+		seg_capacity <<= (seg_no - 1);
+		heap_prev = heap[seg_capacity - seg_no];
+	}
+	if (seg_no > 1) {
+		uint32_t prev_seg_no = seg_no - 1;
+		uint32_t prev_seg_capacity = seg_capacity >> 1;
+		memcpy(&heap_prev[prev_seg_capacity - prev_seg_no],
+				&heap[seg_capacity - prev_seg_no],
+				prev_seg_no * sizeof(void *));
+	}
+	dth->dth_heap = heap_prev;
+	free(heap);
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline dispatch_timer_source_refs_t *
+_dispatch_timer_heap_get_slot(dispatch_timer_heap_t dth, uint32_t idx)
+{
+	uint32_t seg_no, segments = dth->dth_segments;
+	void **segment;
+
+	if (idx < DTH_ID_COUNT) {
+		return &dth->dth_min[idx];
+	}
+	idx -= DTH_ID_COUNT;
+
+	// Derive the segment number from the index. Naming
+	// DISPATCH_HEAP_INIT_SEGMENT_CAPACITY `C`, the segments index ranges are:
+	// 0: 0 .. (C - 1)
+	// 1: C .. 2 * C - 1
+	// k: 2^(k-1) * C .. 2^k * C - 1
+	// so `k` can be derived from the first bit set in `idx`
+	seg_no = (uint32_t)(__builtin_clz(DISPATCH_HEAP_INIT_SEGMENT_CAPACITY - 1) -
+			__builtin_clz(idx | (DISPATCH_HEAP_INIT_SEGMENT_CAPACITY - 1)));
+	if (seg_no + 1 == segments) {
+		segment = dth->dth_heap;
+	} else {
+		uint32_t seg_capacity = DISPATCH_HEAP_INIT_SEGMENT_CAPACITY;
+		seg_capacity <<= (segments - 2);
+		segment = dth->dth_heap[seg_capacity - seg_no - 1];
+	}
+	if (seg_no) {
+		idx -= DISPATCH_HEAP_INIT_SEGMENT_CAPACITY << (seg_no - 1);
+	}
+	return (dispatch_timer_source_refs_t *)(segment + idx);
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_timer_heap_set(dispatch_timer_heap_t dth,
+		dispatch_timer_source_refs_t *slot,
+		dispatch_timer_source_refs_t dt, uint32_t idx)
+{
+	if (idx < DTH_ID_COUNT) {
+		dth->dth_needs_program = true;
+	}
+	*slot = dt;
+	dt->dt_heap_entry[DTH_HEAP_ID(idx)] = idx;
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline uint32_t
+_dispatch_timer_heap_parent(uint32_t idx)
+{
+	uint32_t heap_id = DTH_HEAP_ID(idx);
+	idx = (idx - DTH_ID_COUNT) / 2; // go to the parent
+	return DTH_IDX_FOR_HEAP_ID(idx, heap_id);
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline uint32_t
+_dispatch_timer_heap_left_child(uint32_t idx)
+{
+	uint32_t heap_id = DTH_HEAP_ID(idx);
+	// 2 * (idx - heap_id) + DTH_ID_COUNT + heap_id
+	return 2 * idx + DTH_ID_COUNT - heap_id;
+}
+
+#if DISPATCH_HAVE_TIMER_COALESCING
+DISPATCH_ALWAYS_INLINE
+static inline uint32_t
+_dispatch_timer_heap_walk_skip(uint32_t idx, uint32_t count)
+{
+	uint32_t heap_id = DTH_HEAP_ID(idx);
+
+	idx -= heap_id;
+	if (unlikely(idx + DTH_ID_COUNT == count)) {
+		// reaching `count` doesn't mean we're done, but there is a weird
+		// corner case if the last item of the heap is a left child:
+		//
+		//     /\
+		//    /  \
+		//   /  __\
+		//  /__/
+		//     ^
+		//
+		// The formula below would return the sibling of `idx` which is
+		// out of bounds. Fortunately, the correct answer is the same
+		// as for idx's parent
+		idx = _dispatch_timer_heap_parent(idx);
+	}
+
+	//
+	// When considering the index in a non interleaved, 1-based array
+	// representation of a heap, hence looking at (idx / DTH_ID_COUNT + 1)
+	// for a given idx in our dual-heaps, that index is in one of two forms:
+	//
+	//     (a) 1xxxx011111    or    (b) 111111111
+	//         d    i    0              d       0
+	//
+	// The first bit set is the row of the binary tree node (0-based).
+	// The following digits from most to least significant represent the path
+	// to that node, where `0` is a left turn and `1` a right turn.
+	//
+	// For example 0b0101 (5) is a node on row 2 accessed going left then right:
+	//
+	// row 0          1
+	//              /   .
+	// row 1      2       3
+	//           . \     . .
+	// row 2    4   5   6   7
+	//         : : : : : : : :
+	//
+	// Skipping a sub-tree in walk order means going to the sibling of the last
+	// node reached after we turned left. If the node was of the form (a),
+	// this node is 1xxxx1, which for the above example is 0b0011 (3).
+	// If the node was of the form (b) then we never took a left, meaning
+	// we reached the last element in traversal order.
+	//
+
+	//
+	// we want to find
+	// - the least significant bit set to 0 in (idx / DTH_ID_COUNT + 1)
+	// - which is offset by log_2(DTH_ID_COUNT) from the position of the least
+	//   significant 0 in (idx + DTH_ID_COUNT + DTH_ID_COUNT - 1)
+	//   since idx is a multiple of DTH_ID_COUNT and DTH_ID_COUNT a power of 2.
+	// - which in turn is the same as the position of the least significant 1 in
+	//   ~(idx + DTH_ID_COUNT + DTH_ID_COUNT - 1)
+	//
+	dispatch_static_assert(powerof2(DTH_ID_COUNT));
+	idx += DTH_ID_COUNT + DTH_ID_COUNT - 1;
+	idx >>= __builtin_ctz(~idx);
+
+	//
+	// `idx` is now either:
+	// - 0 if it was the (b) case above, in which case the walk is done
+	// - 1xxxx0 as the position in a 0 based array representation of a non
+	//   interleaved heap, so we just have to compute the interleaved index.
+	//
+	return likely(idx) ? DTH_ID_COUNT * idx + heap_id : UINT32_MAX;
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline uint32_t
+_dispatch_timer_heap_walk_next(uint32_t idx, uint32_t count)
+{
+	//
+	// Goes to the next element in heap walk order, which is the prefix ordered
+	// walk of the tree.
+	//
+	// From a given node, the next item to return is the left child if it
+	// exists, else the first right sibling we find by walking our parent chain,
+	// which is exactly what _dispatch_timer_heap_walk_skip() returns.
+	//
+	uint32_t lchild = _dispatch_timer_heap_left_child(idx);
+	if (lchild < count) {
+		return lchild;
+	}
+	return _dispatch_timer_heap_walk_skip(idx, count);
+}
+
+static uint64_t
+_dispatch_timer_heap_max_target_before(dispatch_timer_heap_t dth, uint64_t limit)
+{
+	dispatch_timer_source_refs_t dri;
+	uint32_t idx = _dispatch_timer_heap_left_child(DTH_TARGET_ID);
+	uint32_t count = dth->dth_count;
+	uint64_t tmp, target = dth->dth_min[DTH_TARGET_ID]->dt_timer.target;
+
+	while (idx < count) {
+		dri = *_dispatch_timer_heap_get_slot(dth, idx);
+		tmp = dri->dt_timer.target;
+		if (tmp > limit) {
+			// skip subtree since none of the targets below can be before limit
+			idx = _dispatch_timer_heap_walk_skip(idx, count);
+		} else {
+			target = tmp;
+			idx = _dispatch_timer_heap_walk_next(idx, count);
+		}
+	}
+	return target;
+}
+#endif // DISPATCH_HAVE_TIMER_COALESCING
+
+static void
+_dispatch_timer_heap_resift(dispatch_timer_heap_t dth,
+		dispatch_timer_source_refs_t dt, uint32_t idx)
+{
+	dispatch_static_assert(offsetof(struct dispatch_timer_source_s, target) ==
+			offsetof(struct dispatch_timer_source_s, heap_key[DTH_TARGET_ID]));
+	dispatch_static_assert(offsetof(struct dispatch_timer_source_s, deadline) ==
+			offsetof(struct dispatch_timer_source_s, heap_key[DTH_DEADLINE_ID]));
+#define dth_cmp(hid, dt1, op, dt2) \
+		(((dt1)->dt_timer.heap_key)[hid] op ((dt2)->dt_timer.heap_key)[hid])
+
+	dispatch_timer_source_refs_t *pslot, pdt;
+	dispatch_timer_source_refs_t *cslot, cdt;
+	dispatch_timer_source_refs_t *rslot, rdt;
+	uint32_t cidx, dth_count = dth->dth_count;
+	dispatch_timer_source_refs_t *slot;
+	int heap_id = DTH_HEAP_ID(idx);
+	bool sifted_up = false;
+
+	// try to sift up
+
+	slot = _dispatch_timer_heap_get_slot(dth, idx);
+	while (idx >= DTH_ID_COUNT) {
+		uint32_t pidx = _dispatch_timer_heap_parent(idx);
+		pslot = _dispatch_timer_heap_get_slot(dth, pidx);
+		pdt = *pslot;
+		if (dth_cmp(heap_id, pdt, <=, dt)) {
+			break;
+		}
+		_dispatch_timer_heap_set(dth, slot, pdt, idx);
+		slot = pslot;
+		idx = pidx;
+		sifted_up = true;
+	}
+	if (sifted_up) {
+		goto done;
+	}
+
+	// try to sift down
+
+	while ((cidx = _dispatch_timer_heap_left_child(idx)) < dth_count) {
+		uint32_t ridx = cidx + DTH_ID_COUNT;
+		cslot = _dispatch_timer_heap_get_slot(dth, cidx);
+		cdt = *cslot;
+		if (ridx < dth_count) {
+			rslot = _dispatch_timer_heap_get_slot(dth, ridx);
+			rdt = *rslot;
+			if (dth_cmp(heap_id, cdt, >, rdt)) {
+				cidx = ridx;
+				cdt = rdt;
+				cslot = rslot;
+			}
+		}
+		if (dth_cmp(heap_id, dt, <=, cdt)) {
+			break;
+		}
+		_dispatch_timer_heap_set(dth, slot, cdt, idx);
+		slot = cslot;
+		idx = cidx;
+	}
+
+done:
+	_dispatch_timer_heap_set(dth, slot, dt, idx);
+#undef dth_cmp
+}
+
+DISPATCH_ALWAYS_INLINE
+static void
+_dispatch_timer_heap_insert(dispatch_timer_heap_t dth,
+		dispatch_timer_source_refs_t dt)
+{
+	uint32_t idx = (dth->dth_count += DTH_ID_COUNT) - DTH_ID_COUNT;
+
+	DISPATCH_TIMER_ASSERT(dt->dt_heap_entry[DTH_TARGET_ID], ==,
+			DTH_INVALID_ID, "target idx");
+	DISPATCH_TIMER_ASSERT(dt->dt_heap_entry[DTH_DEADLINE_ID], ==,
+			DTH_INVALID_ID, "deadline idx");
+
+	dispatch_qos_t qos = MAX(_dispatch_priority_qos(dt->du_priority),
+			_dispatch_priority_fallback_qos(dt->du_priority));
+	if (dth->dth_max_qos < qos) {
+		dth->dth_max_qos = (uint8_t)qos;
+		dth->dth_needs_program = true;
+	}
+
+	if (idx == 0) {
+		dth->dth_needs_program = true;
+		dt->dt_heap_entry[DTH_TARGET_ID] = DTH_TARGET_ID;
+		dt->dt_heap_entry[DTH_DEADLINE_ID] = DTH_DEADLINE_ID;
+		dth->dth_min[DTH_TARGET_ID] = dth->dth_min[DTH_DEADLINE_ID] = dt;
+		return;
+	}
+
+	if (unlikely(idx + DTH_ID_COUNT >
+			_dispatch_timer_heap_capacity(dth->dth_segments))) {
+		_dispatch_timer_heap_grow(dth);
+	}
+	_dispatch_timer_heap_resift(dth, dt, idx + DTH_TARGET_ID);
+	_dispatch_timer_heap_resift(dth, dt, idx + DTH_DEADLINE_ID);
+}
+
+static void
+_dispatch_timer_heap_remove(dispatch_timer_heap_t dth,
+		dispatch_timer_source_refs_t dt)
+{
+	uint32_t idx = (dth->dth_count -= DTH_ID_COUNT);
+
+	DISPATCH_TIMER_ASSERT(dt->dt_heap_entry[DTH_TARGET_ID], !=,
+			DTH_INVALID_ID, "target idx");
+	DISPATCH_TIMER_ASSERT(dt->dt_heap_entry[DTH_DEADLINE_ID], !=,
+			DTH_INVALID_ID, "deadline idx");
+
+	if (idx == 0) {
+		DISPATCH_TIMER_ASSERT(dth->dth_min[DTH_TARGET_ID], ==, dt,
+				"target slot");
+		DISPATCH_TIMER_ASSERT(dth->dth_min[DTH_DEADLINE_ID], ==, dt,
+				"deadline slot");
+		dth->dth_needs_program = true;
+		dth->dth_min[DTH_TARGET_ID] = dth->dth_min[DTH_DEADLINE_ID] = NULL;
+		goto clear_heap_entry;
+	}
+
+	for (uint32_t heap_id = 0; heap_id < DTH_ID_COUNT; heap_id++) {
+		dispatch_timer_source_refs_t *slot, last_dt;
+		slot = _dispatch_timer_heap_get_slot(dth, idx + heap_id);
+		last_dt = *slot; *slot = NULL;
+		if (last_dt != dt) {
+			uint32_t removed_idx = dt->dt_heap_entry[heap_id];
+			_dispatch_timer_heap_resift(dth, last_dt, removed_idx);
+		}
+	}
+	if (unlikely(idx <= _dispatch_timer_heap_capacity(dth->dth_segments - 1))) {
+		_dispatch_timer_heap_shrink(dth);
+	}
+
+clear_heap_entry:
+	dt->dt_heap_entry[DTH_TARGET_ID] = DTH_INVALID_ID;
+	dt->dt_heap_entry[DTH_DEADLINE_ID] = DTH_INVALID_ID;
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_timer_heap_update(dispatch_timer_heap_t dth,
+		dispatch_timer_source_refs_t dt)
+{
+	DISPATCH_TIMER_ASSERT(dt->dt_heap_entry[DTH_TARGET_ID], !=,
+			DTH_INVALID_ID, "target idx");
+	DISPATCH_TIMER_ASSERT(dt->dt_heap_entry[DTH_DEADLINE_ID], !=,
+			DTH_INVALID_ID, "deadline idx");
+
+	_dispatch_timer_heap_resift(dth, dt, dt->dt_heap_entry[DTH_TARGET_ID]);
+	_dispatch_timer_heap_resift(dth, dt, dt->dt_heap_entry[DTH_DEADLINE_ID]);
+}
+
+#pragma mark timer unote
+
+#define _dispatch_timer_du_debug(what, du) \
+		_dispatch_debug("kevent-source[%p]: %s kevent[%p] { ident = 0x%x }", \
+				_dispatch_wref2ptr((du)->du_owner_wref), what, \
+				(du), (du)->du_ident)
+
+DISPATCH_ALWAYS_INLINE
+static inline unsigned int
+_dispatch_timer_unote_idx(dispatch_timer_source_refs_t dt)
+{
+	dispatch_clock_t clock = _dispatch_timer_flags_to_clock(dt->du_timer_flags);
+	uint32_t qos = 0;
+
 #if DISPATCH_HAVE_TIMER_QOS
-	DISPATCH_TIMER_HEAP_INIT(WALL, CRITICAL),
-	DISPATCH_TIMER_HEAP_INIT(MACH, CRITICAL),
-	DISPATCH_TIMER_HEAP_INIT(WALL, BACKGROUND),
-	DISPATCH_TIMER_HEAP_INIT(MACH, BACKGROUND),
+	dispatch_assert(DISPATCH_TIMER_STRICT == DISPATCH_TIMER_QOS_CRITICAL);
+	dispatch_assert(DISPATCH_TIMER_BACKGROUND == DISPATCH_TIMER_QOS_BACKGROUND);
+	qos = dt->du_timer_flags & (DISPATCH_TIMER_STRICT|DISPATCH_TIMER_BACKGROUND);
+	// flags are normalized so this should never happen
+	dispatch_assert(qos < DISPATCH_TIMER_QOS_COUNT);
 #endif
-};
+
+	return DISPATCH_TIMER_INDEX(clock, qos);
+}
+
+static void
+_dispatch_timer_unote_disarm(dispatch_timer_source_refs_t dt,
+		dispatch_timer_heap_t dth)
+{
+	uint32_t tidx = dt->du_ident;
+
+	dispatch_assert(_dispatch_unote_armed(dt));
+	_dispatch_timer_heap_remove(&dth[tidx], dt);
+	_dispatch_timers_heap_dirty(dth, tidx);
+	_dispatch_unote_state_clear_bit(dt, DU_STATE_ARMED);
+	_dispatch_timer_du_debug("disarmed", dt);
+}
+
+static void
+_dispatch_timer_unote_arm(dispatch_timer_source_refs_t dt,
+		dispatch_timer_heap_t dth, uint32_t tidx)
+{
+	if (_dispatch_unote_armed(dt)) {
+		DISPATCH_TIMER_ASSERT(dt->du_ident, ==, tidx, "tidx");
+		_dispatch_timer_heap_update(&dth[tidx], dt);
+		_dispatch_timer_du_debug("updated", dt);
+	} else {
+		dt->du_ident = tidx;
+		_dispatch_timer_heap_insert(&dth[tidx], dt);
+		_dispatch_unote_state_set_bit(dt, DU_STATE_ARMED);
+		_dispatch_timer_du_debug("armed", dt);
+	}
+	_dispatch_timers_heap_dirty(dth, tidx);
+}
+
+#define DISPATCH_TIMER_UNOTE_TRACE_SUSPENSION 0x1
+
+DISPATCH_ALWAYS_INLINE
+static inline bool
+_dispatch_timer_unote_needs_rearm(dispatch_timer_source_refs_t dr, int flags)
+{
+	dispatch_source_t ds = _dispatch_source_from_refs(dr);
+	if (unlikely(DISPATCH_QUEUE_IS_SUSPENDED(ds))) {
+		if (flags & DISPATCH_TIMER_UNOTE_TRACE_SUSPENSION) {
+			_dispatch_ktrace1(DISPATCH_PERF_suspended_timer_fire, ds);
+		}
+		return false;
+	}
+	return dr->du_ident != DISPATCH_TIMER_IDENT_CANCELED &&
+			dr->dt_timer.target < INT64_MAX;
+}
+
+DISPATCH_NOINLINE
+static void
+_dispatch_timer_unote_register(dispatch_timer_source_refs_t dt,
+		dispatch_wlh_t wlh, dispatch_priority_t pri)
+{
+	// aggressively coalesce background/maintenance QoS timers
+	// <rdar://problem/12200216&27342536>
+	if (_dispatch_qos_is_background(_dispatch_priority_qos(pri))) {
+		if (dt->du_timer_flags & DISPATCH_TIMER_STRICT) {
+			_dispatch_ktrace1(DISPATCH_PERF_strict_bg_timer,
+					_dispatch_source_from_refs(dt));
+		} else {
+			dt->du_timer_flags |= DISPATCH_TIMER_BACKGROUND;
+			dt->du_ident = _dispatch_timer_unote_idx(dt);
+		}
+	}
+	// _dispatch_source_activate() can pre-set a wlh for timers directly
+	// attached to their workloops.
+	if (_dispatch_unote_wlh(dt) != wlh) {
+		dispatch_assert(_dispatch_unote_wlh(dt) == NULL);
+		_dispatch_unote_state_set(dt, DISPATCH_WLH_ANON, 0);
+	}
+	if (os_atomic_load2o(dt, dt_pending_config, relaxed)) {
+		_dispatch_timer_unote_configure(dt);
+	}
+}
+
+void
+_dispatch_timer_unote_configure(dispatch_timer_source_refs_t dt)
+{
+	dispatch_timer_config_t dtc;
+
+	dtc = os_atomic_xchg2o(dt, dt_pending_config, NULL, dependency);
+	if (dtc->dtc_clock != _dispatch_timer_flags_to_clock(dt->du_timer_flags)) {
+		dt->du_timer_flags &= ~_DISPATCH_TIMER_CLOCK_MASK;
+		dt->du_timer_flags |= _dispatch_timer_flags_from_clock(dtc->dtc_clock);
+	}
+	dt->dt_timer = dtc->dtc_timer;
+	free(dtc);
+	// Clear any pending data that might have accumulated on
+	// older timer params <rdar://problem/8574886>
+	os_atomic_store2o(dt, ds_pending_data, 0, relaxed);
+
+	if (_dispatch_unote_armed(dt)) {
+		return _dispatch_timer_unote_resume(dt);
+	}
+}
+
+static inline dispatch_timer_heap_t
+_dispatch_timer_unote_heap(dispatch_timer_source_refs_t dt)
+{
+	dispatch_wlh_t wlh = _dispatch_unote_wlh(dt);
+	if (wlh == DISPATCH_WLH_ANON) {
+		return _dispatch_timers_heap;
+	}
+	return ((dispatch_workloop_t)wlh)->dwl_timer_heap;
+}
+
+DISPATCH_NOINLINE
+static void
+_dispatch_timer_unote_resume(dispatch_timer_source_refs_t dt)
+{
+	// ... and now reflect any impact the reconfiguration has to the heap.
+	// The heap also owns a +2 on dispatch sources it references, so maintain
+	// this invariant as we tweak the registration.
+
+	bool will_arm = _dispatch_timer_unote_needs_rearm(dt, 0);
+	bool was_armed = _dispatch_unote_armed(dt);
+	uint32_t tidx = _dispatch_timer_unote_idx(dt);
+	dispatch_timer_heap_t dth = _dispatch_timer_unote_heap(dt);
+
+	if (unlikely(was_armed && (!will_arm || dt->du_ident != tidx))) {
+		_dispatch_timer_unote_disarm(dt, dth);
+	}
+	if (will_arm) {
+		if (!was_armed) _dispatch_retain_unote_owner(dt);
+		_dispatch_timer_unote_arm(dt, dth, tidx);
+	} else if (was_armed) {
+		_dispatch_release_unote_owner_tailcall(dt);
+	}
+}
+
+DISPATCH_NOINLINE
+static void
+_dispatch_timer_unote_unregister(dispatch_timer_source_refs_t dt)
+{
+	dispatch_timer_heap_t dth = _dispatch_timer_unote_heap(dt);
+	if (_dispatch_unote_armed(dt)) {
+		_dispatch_timer_unote_disarm(dt, dth);
+		_dispatch_release_2_no_dispose(_dispatch_source_from_refs(dt));
+	}
+	_dispatch_wlh_release(_dispatch_unote_wlh(dt));
+	_dispatch_unote_state_set(dt, DU_STATE_UNREGISTERED);
+	dt->du_ident = DISPATCH_TIMER_IDENT_CANCELED;
+}
 
 static dispatch_unote_t
 _dispatch_source_timer_create(dispatch_source_type_t dst,
 		uintptr_t handle, uintptr_t mask)
 {
-	uint32_t fflags = dst->dst_fflags;
-	dispatch_unote_t du;
+	dispatch_timer_source_refs_t dt;
 
 	// normalize flags
 	if (mask & DISPATCH_TIMER_STRICT) {
 		mask &= ~(uintptr_t)DISPATCH_TIMER_BACKGROUND;
 	}
+	if (mask & ~dst->dst_mask) {
+		return DISPATCH_UNOTE_NULL;
+	}
 
-	if (fflags & DISPATCH_TIMER_INTERVAL) {
+	if (dst->dst_timer_flags & DISPATCH_TIMER_INTERVAL) {
 		if (!handle) return DISPATCH_UNOTE_NULL;
-		du = _dispatch_unote_create_without_handle(dst, 0, mask);
-	} else {
-		du = _dispatch_unote_create_without_handle(dst, handle, mask);
+	} else if (dst->dst_filter == DISPATCH_EVFILT_TIMER_WITH_CLOCK) {
+		if (handle) return DISPATCH_UNOTE_NULL;
+	} else switch (handle) {
+	case 0:
+		break;
+	case DISPATCH_CLOCKID_UPTIME:
+		dst = &_dispatch_source_type_timer_with_clock;
+		mask |= DISPATCH_TIMER_CLOCK_UPTIME;
+		break;
+	case DISPATCH_CLOCKID_MONOTONIC:
+		dst = &_dispatch_source_type_timer_with_clock;
+		mask |= DISPATCH_TIMER_CLOCK_MONOTONIC;
+		break;
+	case DISPATCH_CLOCKID_WALLTIME:
+		dst = &_dispatch_source_type_timer_with_clock;
+		mask |= DISPATCH_TIMER_CLOCK_WALL;
+		break;
+	default:
+		return DISPATCH_UNOTE_NULL;
 	}
 
-	if (du._dt) {
-		du._dt->du_is_timer = true;
-		du._dt->du_data_action = DISPATCH_UNOTE_ACTION_DATA_ADD;
-		du._dt->du_fflags |= fflags;
-		du._dt->du_ident = _dispatch_source_timer_idx(du);
-		du._dt->dt_timer.target = UINT64_MAX;
-		du._dt->dt_timer.deadline = UINT64_MAX;
-		du._dt->dt_timer.interval = UINT64_MAX;
-		du._dt->dt_heap_entry[DTH_TARGET_ID] = DTH_INVALID_ID;
-		du._dt->dt_heap_entry[DTH_DEADLINE_ID] = DTH_INVALID_ID;
-	}
-	return du;
+	dt = _dispatch_calloc(1u, dst->dst_size);
+	dt->du_type = dst;
+	dt->du_filter = dst->dst_filter;
+	dt->du_is_timer = true;
+	dt->du_timer_flags |= (uint8_t)(mask | dst->dst_timer_flags);
+	dt->du_ident = _dispatch_timer_unote_idx(dt);
+	dt->dt_timer.target = UINT64_MAX;
+	dt->dt_timer.deadline = UINT64_MAX;
+	dt->dt_timer.interval = UINT64_MAX;
+	dt->dt_heap_entry[DTH_TARGET_ID] = DTH_INVALID_ID;
+	dt->dt_heap_entry[DTH_DEADLINE_ID] = DTH_INVALID_ID;
+	return (dispatch_unote_t){ ._dt = dt };
 }
 
 const dispatch_source_type_s _dispatch_source_type_timer = {
-	.dst_kind       = "timer",
-	.dst_filter     = DISPATCH_EVFILT_TIMER,
-	.dst_flags      = EV_DISPATCH,
-	.dst_mask       = DISPATCH_TIMER_STRICT|DISPATCH_TIMER_BACKGROUND,
-	.dst_fflags     = 0,
-	.dst_size       = sizeof(struct dispatch_timer_source_refs_s),
+	.dst_kind           = "timer",
+	.dst_filter         = DISPATCH_EVFILT_TIMER,
+	.dst_flags          = EV_DISPATCH,
+	.dst_mask           = DISPATCH_TIMER_STRICT|DISPATCH_TIMER_BACKGROUND,
+	.dst_timer_flags    = 0,
+	.dst_action         = DISPATCH_UNOTE_ACTION_SOURCE_TIMER,
+	.dst_size           = sizeof(struct dispatch_timer_source_refs_s),
+	.dst_strict         = false,
 
-	.dst_create     = _dispatch_source_timer_create,
+	.dst_create         = _dispatch_source_timer_create,
+	.dst_merge_evt      = _dispatch_source_merge_evt,
+};
+
+const dispatch_source_type_s _dispatch_source_type_timer_with_clock = {
+	.dst_kind           = "timer (fixed-clock)",
+	.dst_filter         = DISPATCH_EVFILT_TIMER_WITH_CLOCK,
+	.dst_flags          = EV_DISPATCH,
+	.dst_mask           = DISPATCH_TIMER_STRICT|DISPATCH_TIMER_BACKGROUND,
+	.dst_timer_flags    = 0,
+	.dst_action         = DISPATCH_UNOTE_ACTION_SOURCE_TIMER,
+	.dst_size           = sizeof(struct dispatch_timer_source_refs_s),
+
+	.dst_create         = _dispatch_source_timer_create,
+	.dst_merge_evt      = _dispatch_source_merge_evt,
 };
 
 const dispatch_source_type_s _dispatch_source_type_after = {
-	.dst_kind       = "timer (after)",
-	.dst_filter     = DISPATCH_EVFILT_TIMER,
-	.dst_flags      = EV_DISPATCH,
-	.dst_mask       = 0,
-	.dst_fflags     = DISPATCH_TIMER_AFTER,
-	.dst_size       = sizeof(struct dispatch_timer_source_refs_s),
+	.dst_kind           = "timer (after)",
+	.dst_filter         = DISPATCH_EVFILT_TIMER_WITH_CLOCK,
+	.dst_flags          = EV_DISPATCH,
+	.dst_mask           = 0,
+	.dst_timer_flags    = DISPATCH_TIMER_AFTER,
+	.dst_action         = DISPATCH_UNOTE_ACTION_SOURCE_TIMER,
+	.dst_size           = sizeof(struct dispatch_timer_source_refs_s),
 
-	.dst_create     = _dispatch_source_timer_create,
+	.dst_create         = _dispatch_source_timer_create,
+	.dst_merge_evt      = _dispatch_source_merge_evt,
 };
 
 const dispatch_source_type_s _dispatch_source_type_interval = {
-	.dst_kind       = "timer (interval)",
-	.dst_filter     = DISPATCH_EVFILT_TIMER,
-	.dst_flags      = EV_DISPATCH,
-	.dst_mask       = DISPATCH_TIMER_STRICT|DISPATCH_TIMER_BACKGROUND
-			|DISPATCH_INTERVAL_UI_ANIMATION,
-	.dst_fflags     = DISPATCH_TIMER_INTERVAL|DISPATCH_TIMER_CLOCK_MACH,
-	.dst_size       = sizeof(struct dispatch_timer_source_refs_s),
+	.dst_kind           = "timer (interval)",
+	.dst_filter         = DISPATCH_EVFILT_TIMER_WITH_CLOCK,
+	.dst_flags          = EV_DISPATCH,
+	.dst_mask           = DISPATCH_TIMER_STRICT|DISPATCH_TIMER_BACKGROUND|
+			DISPATCH_INTERVAL_UI_ANIMATION,
+	.dst_timer_flags    = DISPATCH_TIMER_INTERVAL|DISPATCH_TIMER_CLOCK_UPTIME,
+	.dst_action         = DISPATCH_UNOTE_ACTION_SOURCE_TIMER,
+	.dst_size           = sizeof(struct dispatch_timer_source_refs_s),
 
-	.dst_create     = _dispatch_source_timer_create,
+	.dst_create         = _dispatch_source_timer_create,
+	.dst_merge_evt      = _dispatch_source_merge_evt,
 };
+
+#pragma mark timer draining
+
+static void
+_dispatch_timers_run(dispatch_timer_heap_t dth, uint32_t tidx,
+		dispatch_clock_now_cache_t nows)
+{
+	dispatch_timer_source_refs_t dr;
+	uint64_t pending, now;
+
+	while ((dr = dth[tidx].dth_min[DTH_TARGET_ID])) {
+		DISPATCH_TIMER_ASSERT(dr->du_ident, ==, tidx, "tidx");
+		DISPATCH_TIMER_ASSERT(dr->dt_timer.target, !=, 0, "missing target");
+
+		now = _dispatch_time_now_cached(DISPATCH_TIMER_CLOCK(tidx), nows);
+		if (dr->dt_timer.target > now) {
+			// Done running timers for now.
+			break;
+		}
+
+		if (dr->du_timer_flags & DISPATCH_TIMER_AFTER) {
+			_dispatch_timer_unote_disarm(dr, dth); // +2 is consumed by _merge_evt()
+			_dispatch_wlh_release(_dispatch_unote_wlh(dr));
+			_dispatch_unote_state_set(dr, DU_STATE_UNREGISTERED);
+			os_atomic_store2o(dr, ds_pending_data, 2, relaxed);
+			_dispatch_trace_timer_fire(dr, 1, 1);
+			dux_merge_evt(dr, EV_ONESHOT, 0, 0);
+			continue;
+		}
+
+		if (os_atomic_load2o(dr, dt_pending_config, relaxed)) {
+			_dispatch_timer_unote_configure(dr);
+			continue;
+		}
+
+		// We want to try to keep repeating timers in the heap if their handler
+		// is keeping up to avoid useless hops through the manager thread.
+		//
+		// However, if we can observe a non consumed ds_pending_data, we have to
+		// remove the timer from the heap until the handler keeps up (disarm).
+		// Such an operation is a one-way street, as _dispatch_source_invoke2()
+		// can decide to dispose of a timer without going back to the manager if
+		// it can observe that it is disarmed.
+		//
+		// To solve this race, we use a the MISSED marker in ds_pending_data
+		// with a release barrier to make the changes accumulated on `ds_timer`
+		// visible to _dispatch_source_timer_data(). Doing this also transfers
+		// the responsibility to call _dispatch_timer_unote_compute_missed()
+		// to _dispatch_source_invoke2() without the manager involvement.
+		//
+		// Suspension also causes the timer to be removed from the heap. We need
+		// to make sure _dispatch_source_timer_data() will recompute the proper
+		// number of fired events when the source is resumed, and also use the
+		// MISSED marker for this similar purpose.
+		if (unlikely(os_atomic_load2o(dr, ds_pending_data, relaxed))) {
+			_dispatch_timer_unote_disarm(dr, dth);
+			pending = os_atomic_or_orig2o(dr, ds_pending_data,
+					DISPATCH_TIMER_DISARMED_MARKER, relaxed);
+		} else {
+			pending = _dispatch_timer_unote_compute_missed(dr, now, 0) << 1;
+			if (_dispatch_timer_unote_needs_rearm(dr,
+					DISPATCH_TIMER_UNOTE_TRACE_SUSPENSION)) {
+				// _dispatch_source_merge_evt() consumes a +2 which we transfer
+				// from the heap ownership when we disarm the timer. If it stays
+				// armed, we need to take new retain counts
+				_dispatch_retain_unote_owner(dr);
+				_dispatch_timer_unote_arm(dr, dth, tidx);
+				os_atomic_store2o(dr, ds_pending_data, pending, relaxed);
+			} else {
+				_dispatch_timer_unote_disarm(dr, dth);
+				pending |= DISPATCH_TIMER_DISARMED_MARKER;
+				os_atomic_store2o(dr, ds_pending_data, pending, release);
+			}
+		}
+		_dispatch_trace_timer_fire(dr, pending >> 1, pending >> 1);
+		dux_merge_evt(dr, EV_ONESHOT, 0, 0);
+	}
+}
+
+#if DISPATCH_HAVE_TIMER_COALESCING
+#define DISPATCH_KEVENT_COALESCING_WINDOW_INIT(qos, ms) \
+		[DISPATCH_TIMER_QOS_##qos] = 2ull * (ms) * NSEC_PER_MSEC
+
+static const uint64_t _dispatch_kevent_coalescing_window[] = {
+	DISPATCH_KEVENT_COALESCING_WINDOW_INIT(NORMAL, 75),
+#if DISPATCH_HAVE_TIMER_QOS
+	DISPATCH_KEVENT_COALESCING_WINDOW_INIT(CRITICAL, 1),
+	DISPATCH_KEVENT_COALESCING_WINDOW_INIT(BACKGROUND, 100),
+#endif
+};
+#endif // DISPATCH_HAVE_TIMER_COALESCING
+
+DISPATCH_ALWAYS_INLINE
+static inline dispatch_timer_delay_s
+_dispatch_timers_get_delay(dispatch_timer_heap_t dth, uint32_t tidx,
+		uint32_t qos, dispatch_clock_now_cache_t nows)
+{
+	uint64_t target, deadline;
+	dispatch_timer_delay_s rc;
+
+	if (!dth[tidx].dth_min[DTH_TARGET_ID]) {
+		rc.delay = rc.leeway = INT64_MAX;
+		return rc;
+	}
+
+	target = dth[tidx].dth_min[DTH_TARGET_ID]->dt_timer.target;
+	deadline = dth[tidx].dth_min[DTH_DEADLINE_ID]->dt_timer.deadline;
+	dispatch_assert(target <= deadline && target < INT64_MAX);
+
+	uint64_t now = _dispatch_time_now_cached(DISPATCH_TIMER_CLOCK(tidx), nows);
+	if (target <= now) {
+		rc.delay = rc.leeway = 0;
+		return rc;
+	}
+
+	if (qos < DISPATCH_TIMER_QOS_COUNT && dth[tidx].dth_count > 2) {
+#if DISPATCH_HAVE_TIMER_COALESCING
+		// Timer pre-coalescing <rdar://problem/13222034>
+		// When we have several timers with this target/deadline bracket:
+		//
+		//      Target        window  Deadline
+		//        V           <-------V
+		// t1:    [...........|.................]
+		// t2:         [......|.......]
+		// t3:             [..|..........]
+		// t4:                | [.............]
+		//                 ^
+		//          Optimal Target
+		//
+		// Coalescing works better if the Target is delayed to "Optimal", by
+		// picking the latest target that isn't too close to the deadline.
+		uint64_t window = _dispatch_kevent_coalescing_window[qos];
+		if (target + window < deadline) {
+			uint64_t latest = deadline - window;
+			target = _dispatch_timer_heap_max_target_before(&dth[tidx], latest);
+		}
+#endif
+	}
+
+	rc.delay = MIN(target - now, INT64_MAX);
+	rc.leeway = MIN(deadline - target, INT64_MAX);
+	return rc;
+}
+
+static void
+_dispatch_timers_program(dispatch_timer_heap_t dth, uint32_t tidx,
+		dispatch_clock_now_cache_t nows)
+{
+	uint32_t qos = DISPATCH_TIMER_QOS(tidx);
+	dispatch_timer_delay_s range;
+
+	range = _dispatch_timers_get_delay(dth, tidx, qos, nows);
+	if (range.delay == 0) {
+		_dispatch_timers_heap_dirty(dth, tidx);
+	}
+	if (range.delay == 0 || range.delay >= INT64_MAX) {
+		_dispatch_trace_next_timer_set(NULL, qos);
+		if (dth[tidx].dth_armed) {
+			_dispatch_event_loop_timer_delete(dth, tidx);
+		}
+		dth[tidx].dth_armed = false;
+		dth[tidx].dth_needs_program = false;
+	} else {
+		_dispatch_trace_next_timer_set(dth[tidx].dth_min[DTH_TARGET_ID], qos);
+		_dispatch_trace_next_timer_program(range.delay, qos);
+		_dispatch_event_loop_timer_arm(dth, tidx, range, nows);
+		dth[tidx].dth_armed = true;
+		dth[tidx].dth_needs_program = false;
+	}
+}
+
+void
+_dispatch_event_loop_drain_timers(dispatch_timer_heap_t dth, uint32_t count)
+{
+	dispatch_clock_now_cache_s nows = { };
+	uint32_t tidx;
+
+	do {
+		for (tidx = 0; tidx < count; tidx++) {
+			_dispatch_timers_run(dth, tidx, &nows);
+		}
+
+#if DISPATCH_USE_DTRACE
+		uint32_t mask = dth[0].dth_dirty_bits & DTH_DIRTY_QOS_MASK;
+		while (mask && DISPATCH_TIMER_WAKE_ENABLED()) {
+			int qos = __builtin_ctz(mask);
+			mask -= 1 << qos;
+			_dispatch_trace_timer_wake(_dispatch_trace_next_timer[qos]);
+		}
+#endif // DISPATCH_USE_DTRACE
+
+		dth[0].dth_dirty_bits = 0;
+
+		for (tidx = 0; tidx < count; tidx++) {
+			if (dth[tidx].dth_needs_program) {
+				_dispatch_timers_program(dth, tidx, &nows);
+			}
+		}
+
+		/*
+		 * Note: dth_dirty_bits being set again can happen if we notice
+		 * a new configuration during _dispatch_timers_run() that causes
+		 * the timer to change clocks for a bucket we already drained.
+		 *
+		 * This is however extremely unlikely, and because we drain relatively
+		 * to a constant cached "now", this will converge quickly.
+		 */
+	} while (unlikely(dth[0].dth_dirty_bits));
+}
diff --git a/src/event/event_config.h b/src/event/event_config.h
index c0c38b0..4f4b6e5 100644
--- a/src/event/event_config.h
+++ b/src/event/event_config.h
@@ -80,6 +80,12 @@
 #endif
 
 #if DISPATCH_EVENT_BACKEND_KEVENT
+#	if defined(EV_UDATA_SPECIFIC) && EV_UDATA_SPECIFIC
+#		define DISPATCH_HAVE_DIRECT_KNOTES 1
+#	else
+#		define DISPATCH_HAVE_DIRECT_KNOTES 0
+#	endif
+
 #	if defined(EV_SET_QOS)
 #		define DISPATCH_USE_KEVENT_QOS 1
 #	else
@@ -137,6 +143,10 @@
 #	undef HAVE_DECL_VQ_DESIRED_DISK
 #	endif // VQ_DESIRED_DISK
 
+#	ifndef VQ_FREE_SPACE_CHANGE
+#	undef HAVE_DECL_VQ_FREE_SPACE_CHANGE
+#	endif // VQ_FREE_SPACE_CHANGE
+
 #	if !defined(EVFILT_NW_CHANNEL) && defined(__APPLE__)
 #	define EVFILT_NW_CHANNEL       (-16)
 #	define NOTE_FLOW_ADV_UPDATE    	0x1
@@ -158,6 +168,7 @@
 
 #	define DISPATCH_HAVE_TIMER_QOS 0
 #	define DISPATCH_HAVE_TIMER_COALESCING 0
+#	define DISPATCH_HAVE_DIRECT_KNOTES 0
 #endif // !DISPATCH_EVENT_BACKEND_KEVENT
 
 // These flags are used by dispatch generic code and
@@ -179,11 +190,11 @@
 #define DISPATCH_EV_MSG_NEEDS_FREE	0x10000 // mach message needs to be freed()
 
 #define DISPATCH_EVFILT_TIMER				(-EVFILT_SYSCOUNT - 1)
-#define DISPATCH_EVFILT_CUSTOM_ADD			(-EVFILT_SYSCOUNT - 2)
-#define DISPATCH_EVFILT_CUSTOM_OR			(-EVFILT_SYSCOUNT - 3)
-#define DISPATCH_EVFILT_CUSTOM_REPLACE		(-EVFILT_SYSCOUNT - 4)
-#define DISPATCH_EVFILT_MACH_NOTIFICATION	(-EVFILT_SYSCOUNT - 5)
-#define DISPATCH_EVFILT_SYSCOUNT			( EVFILT_SYSCOUNT + 5)
+#define DISPATCH_EVFILT_TIMER_WITH_CLOCK	(-EVFILT_SYSCOUNT - 2)
+#define DISPATCH_EVFILT_CUSTOM_ADD			(-EVFILT_SYSCOUNT - 3)
+#define DISPATCH_EVFILT_CUSTOM_OR			(-EVFILT_SYSCOUNT - 4)
+#define DISPATCH_EVFILT_CUSTOM_REPLACE		(-EVFILT_SYSCOUNT - 5)
+#define DISPATCH_EVFILT_MACH_NOTIFICATION	(-EVFILT_SYSCOUNT - 6)
 
 #if HAVE_MACH
 #	if !EV_UDATA_SPECIFIC
diff --git a/src/event/event_epoll.c b/src/event/event_epoll.c
index 0425cb2..7c746c0 100644
--- a/src/event/event_epoll.c
+++ b/src/event/event_epoll.c
@@ -38,15 +38,16 @@
 #define DISPATCH_EPOLL_MAX_EVENT_COUNT 16
 
 enum {
-	DISPATCH_EPOLL_EVENTFD    = 0x0001,
-	DISPATCH_EPOLL_CLOCK_WALL = 0x0002,
-	DISPATCH_EPOLL_CLOCK_MACH = 0x0003,
+	DISPATCH_EPOLL_EVENTFD         = 0x0001,
+	DISPATCH_EPOLL_CLOCK_WALL      = 0x0002,
+	DISPATCH_EPOLL_CLOCK_UPTIME    = 0x0003,
+	DISPATCH_EPOLL_CLOCK_MONOTONIC = 0x0004,
 };
 
 typedef struct dispatch_muxnote_s {
-	TAILQ_ENTRY(dispatch_muxnote_s) dmn_list;
-	TAILQ_HEAD(, dispatch_unote_linkage_s) dmn_readers_head;
-	TAILQ_HEAD(, dispatch_unote_linkage_s) dmn_writers_head;
+	LIST_ENTRY(dispatch_muxnote_s) dmn_list;
+	LIST_HEAD(, dispatch_unote_linkage_s) dmn_readers_head;
+	LIST_HEAD(, dispatch_unote_linkage_s) dmn_writers_head;
 	int       dmn_fd;
 	uint32_t  dmn_ident;
 	uint32_t  dmn_events;
@@ -68,8 +69,7 @@
 static dispatch_once_t epoll_init_pred;
 static void _dispatch_epoll_init(void *);
 
-DISPATCH_CACHELINE_ALIGN
-static TAILQ_HEAD(dispatch_muxnote_bucket_s, dispatch_muxnote_s)
+static LIST_HEAD(dispatch_muxnote_bucket_s, dispatch_muxnote_s)
 _dispatch_sources[DSL_HASH_SIZE];
 
 #define DISPATCH_EPOLL_TIMEOUT_INITIALIZER(clock) \
@@ -79,7 +79,8 @@
 	}
 static struct dispatch_epoll_timeout_s _dispatch_epoll_timeout[] = {
 	DISPATCH_EPOLL_TIMEOUT_INITIALIZER(WALL),
-	DISPATCH_EPOLL_TIMEOUT_INITIALIZER(MACH),
+	DISPATCH_EPOLL_TIMEOUT_INITIALIZER(UPTIME),
+	DISPATCH_EPOLL_TIMEOUT_INITIALIZER(MONOTONIC),
 };
 
 #pragma mark dispatch_muxnote_t
@@ -107,7 +108,7 @@
 {
 	dispatch_muxnote_t dmn;
 	if (filter == EVFILT_WRITE) filter = EVFILT_READ;
-	TAILQ_FOREACH(dmn, dmb, dmn_list) {
+	LIST_FOREACH(dmn, dmb, dmn_list) {
 		if (dmn->dmn_ident == ident && dmn->dmn_filter == filter) {
 			break;
 		}
@@ -201,8 +202,8 @@
 	}
 
 	dmn = _dispatch_calloc(1, sizeof(struct dispatch_muxnote_s));
-	TAILQ_INIT(&dmn->dmn_readers_head);
-	TAILQ_INIT(&dmn->dmn_writers_head);
+	LIST_INIT(&dmn->dmn_readers_head);
+	LIST_INIT(&dmn->dmn_writers_head);
 	dmn->dmn_fd = fd;
 	dmn->dmn_ident = du._du->du_ident;
 	dmn->dmn_filter = filter;
@@ -244,7 +245,7 @@
 		break;
 	}
 
-	if (du._du->du_type->dst_flags & EV_DISPATCH) {
+	if (dux_type(du._du)->dst_flags & EV_DISPATCH) {
 		events |= EPOLLONESHOT;
 	}
 
@@ -252,15 +253,13 @@
 }
 
 bool
-_dispatch_unote_register(dispatch_unote_t du,
-		DISPATCH_UNUSED dispatch_wlh_t wlh, dispatch_priority_t pri)
+_dispatch_unote_register_muxed(dispatch_unote_t du)
 {
 	struct dispatch_muxnote_bucket_s *dmb;
 	dispatch_muxnote_t dmn;
-	uint32_t events = _dispatch_unote_required_events(du);
+	uint32_t events;
 
-	dispatch_assert(!_dispatch_unote_registered(du));
-	du._du->du_priority = pri;
+	events = _dispatch_unote_required_events(du);
 
 	dmb = _dispatch_unote_muxnote_bucket(du);
 	dmn = _dispatch_unote_muxnote_find(dmb, du);
@@ -281,7 +280,7 @@
 				_dispatch_muxnote_dispose(dmn);
 				dmn = NULL;
 			} else {
-				TAILQ_INSERT_TAIL(dmb, dmn, dmn_list);
+				LIST_INSERT_HEAD(dmb, dmn, dmn_list);
 			}
 		}
 	}
@@ -289,19 +288,18 @@
 	if (dmn) {
 		dispatch_unote_linkage_t dul = _dispatch_unote_get_linkage(du);
 		if (events & EPOLLOUT) {
-			TAILQ_INSERT_TAIL(&dmn->dmn_writers_head, dul, du_link);
+			LIST_INSERT_HEAD(&dmn->dmn_writers_head, dul, du_link);
 		} else {
-			TAILQ_INSERT_TAIL(&dmn->dmn_readers_head, dul, du_link);
+			LIST_INSERT_HEAD(&dmn->dmn_readers_head, dul, du_link);
 		}
 		dul->du_muxnote = dmn;
-		dispatch_assert(du._du->du_wlh == NULL);
-		du._du->du_wlh = DISPATCH_WLH_ANON;
+		_dispatch_unote_state_set(du, DISPATCH_WLH_ANON, DU_STATE_ARMED);
 	}
 	return dmn != NULL;
 }
 
 void
-_dispatch_unote_resume(dispatch_unote_t du)
+_dispatch_unote_resume_muxed(dispatch_unote_t du)
 {
 	dispatch_muxnote_t dmn = _dispatch_unote_get_linkage(du)->du_muxnote;
 	dispatch_assert(_dispatch_unote_registered(du));
@@ -315,57 +313,43 @@
 }
 
 bool
-_dispatch_unote_unregister(dispatch_unote_t du, DISPATCH_UNUSED uint32_t flags)
+_dispatch_unote_unregister_muxed(dispatch_unote_t du)
 {
-	switch (du._du->du_filter) {
-	case DISPATCH_EVFILT_CUSTOM_ADD:
-	case DISPATCH_EVFILT_CUSTOM_OR:
-	case DISPATCH_EVFILT_CUSTOM_REPLACE:
-		du._du->du_wlh = NULL;
-		return true;
+	dispatch_unote_linkage_t dul = _dispatch_unote_get_linkage(du);
+	dispatch_muxnote_t dmn = dul->du_muxnote;
+	uint32_t events = dmn->dmn_events;
+
+	LIST_REMOVE(dul, du_link);
+	_LIST_TRASH_ENTRY(dul, du_link);
+	dul->du_muxnote = NULL;
+
+	if (LIST_EMPTY(&dmn->dmn_readers_head)) {
+		events &= (uint32_t)~EPOLLIN;
+		if (dmn->dmn_disarmed_events & EPOLLIN) {
+			dmn->dmn_disarmed_events &= (uint16_t)~EPOLLIN;
+			dmn->dmn_events &= (uint32_t)~EPOLLIN;
+		}
 	}
-	if (_dispatch_unote_registered(du)) {
-		dispatch_unote_linkage_t dul = _dispatch_unote_get_linkage(du);
-		dispatch_muxnote_t dmn = dul->du_muxnote;
-		uint32_t events = dmn->dmn_events;
-
-		if (du._du->du_filter == EVFILT_WRITE) {
-			TAILQ_REMOVE(&dmn->dmn_writers_head, dul, du_link);
-		} else {
-			TAILQ_REMOVE(&dmn->dmn_readers_head, dul, du_link);
+	if (LIST_EMPTY(&dmn->dmn_writers_head)) {
+		events &= (uint32_t)~EPOLLOUT;
+		if (dmn->dmn_disarmed_events & EPOLLOUT) {
+			dmn->dmn_disarmed_events &= (uint16_t)~EPOLLOUT;
+			dmn->dmn_events &= (uint32_t)~EPOLLOUT;
 		}
-		_TAILQ_TRASH_ENTRY(dul, du_link);
-		dul->du_muxnote = NULL;
-
-		if (TAILQ_EMPTY(&dmn->dmn_readers_head)) {
-			events &= (uint32_t)~EPOLLIN;
-			if (dmn->dmn_disarmed_events & EPOLLIN) {
-				dmn->dmn_disarmed_events &= (uint16_t)~EPOLLIN;
-				dmn->dmn_events &= (uint32_t)~EPOLLIN;
-			}
-		}
-		if (TAILQ_EMPTY(&dmn->dmn_writers_head)) {
-			events &= (uint32_t)~EPOLLOUT;
-			if (dmn->dmn_disarmed_events & EPOLLOUT) {
-				dmn->dmn_disarmed_events &= (uint16_t)~EPOLLOUT;
-				dmn->dmn_events &= (uint32_t)~EPOLLOUT;
-			}
-		}
-
-		if (events & (EPOLLIN | EPOLLOUT)) {
-			if (events != _dispatch_muxnote_armed_events(dmn)) {
-				dmn->dmn_events = events;
-				events = _dispatch_muxnote_armed_events(dmn);
-				_dispatch_epoll_update(dmn, events, EPOLL_CTL_MOD);
-			}
-		} else {
-			epoll_ctl(_dispatch_epfd, EPOLL_CTL_DEL, dmn->dmn_fd, NULL);
-			TAILQ_REMOVE(_dispatch_unote_muxnote_bucket(du), dmn, dmn_list);
-			_dispatch_muxnote_dispose(dmn);
-		}
-		dispatch_assert(du._du->du_wlh == DISPATCH_WLH_ANON);
-		du._du->du_wlh = NULL;
 	}
+
+	if (events & (EPOLLIN | EPOLLOUT)) {
+		if (events != _dispatch_muxnote_armed_events(dmn)) {
+			dmn->dmn_events = events;
+			events = _dispatch_muxnote_armed_events(dmn);
+			_dispatch_epoll_update(dmn, events, EPOLL_CTL_MOD);
+		}
+	} else {
+		epoll_ctl(_dispatch_epfd, EPOLL_CTL_DEL, dmn->dmn_fd, NULL);
+		LIST_REMOVE(dmn, dmn_list);
+		_dispatch_muxnote_dispose(dmn);
+	}
+	_dispatch_unote_state_set(du, DU_STATE_UNREGISTERED);
 	return true;
 }
 
@@ -374,13 +358,14 @@
 static void
 _dispatch_event_merge_timer(dispatch_clock_t clock)
 {
-	_dispatch_timers_expired = true;
-	_dispatch_timers_processing_mask |= 1 << DISPATCH_TIMER_INDEX(clock, 0);
-#if DISPATCH_USE_DTRACE
-	_dispatch_timers_will_wake |= 1 << 0;
-#endif
+	dispatch_timer_heap_t dth = _dispatch_timers_heap;
+	uint32_t tidx = DISPATCH_TIMER_INDEX(clock, 0);
+
 	_dispatch_epoll_timeout[clock].det_armed = false;
-	_dispatch_timers_heap[clock].dth_flags &= ~DTH_ARMED;
+
+	_dispatch_timers_heap_dirty(dth, tidx);
+	dth[tidx].dth_needs_program = true;
+	dth[tidx].dth_armed = false;
 }
 
 static void
@@ -404,9 +389,12 @@
 		clockid_t clockid;
 		int fd;
 		switch (DISPATCH_TIMER_CLOCK(tidx)) {
-		case DISPATCH_CLOCK_MACH:
+		case DISPATCH_CLOCK_UPTIME:
 			clockid = CLOCK_MONOTONIC;
 			break;
+		case DISPATCH_CLOCK_MONOTONIC:
+			clockid = CLOCK_BOOTTIME;
+			break;
 		case DISPATCH_CLOCK_WALL:
 			clockid = CLOCK_REALTIME;
 			break;
@@ -440,19 +428,19 @@
 }
 
 void
-_dispatch_event_loop_timer_arm(uint32_t tidx, dispatch_timer_delay_s range,
+_dispatch_event_loop_timer_arm(dispatch_timer_heap_t dth DISPATCH_UNUSED,
+		uint32_t tidx, dispatch_timer_delay_s range,
 		dispatch_clock_now_cache_t nows)
 {
-	uint64_t target = range.delay;
-	target += _dispatch_time_now_cached(DISPATCH_TIMER_CLOCK(tidx), nows);
-	_dispatch_timers_heap[tidx].dth_flags |= DTH_ARMED;
+	dispatch_clock_t clock = DISPATCH_TIMER_CLOCK(tidx);
+	uint64_t target = range.delay + _dispatch_time_now_cached(clock, nows);
 	_dispatch_timeout_program(tidx, target, range.leeway);
 }
 
 void
-_dispatch_event_loop_timer_delete(uint32_t tidx)
+_dispatch_event_loop_timer_delete(dispatch_timer_heap_t dth DISPATCH_UNUSED,
+		uint32_t tidx)
 {
-	_dispatch_timers_heap[tidx].dth_flags &= ~DTH_ARMED;
 	_dispatch_timeout_program(tidx, UINT64_MAX, UINT64_MAX);
 }
 
@@ -468,11 +456,6 @@
 {
 	_dispatch_fork_becomes_unsafe();
 
-	unsigned int i;
-	for (i = 0; i < DSL_HASH_SIZE; i++) {
-		TAILQ_INIT(&_dispatch_sources[i]);
-	}
-
 	_dispatch_epfd = epoll_create1(EPOLL_CLOEXEC);
 	if (_dispatch_epfd < 0) {
 		DISPATCH_INTERNAL_CRASH(errno, "epoll_create1() failed");
@@ -493,6 +476,7 @@
 	}
 
 #if DISPATCH_USE_MGR_THREAD
+	_dispatch_trace_item_push(_dispatch_mgr_q.do_targetq, &_dispatch_mgr_q);
 	dx_push(_dispatch_mgr_q.do_targetq, &_dispatch_mgr_q, 0);
 #endif
 }
@@ -522,9 +506,13 @@
 	// will kick in, the thread with the wrong mask will be fixed up, and the
 	// signal delivered to us again properly.
 	if ((rc = read(dmn->dmn_fd, &si, sizeof(si))) == sizeof(si)) {
-		TAILQ_FOREACH_SAFE(dul, &dmn->dmn_readers_head, du_link, dul_next) {
+		LIST_FOREACH_SAFE(dul, &dmn->dmn_readers_head, du_link, dul_next) {
 			dispatch_unote_t du = _dispatch_unote_linkage_get_unote(dul);
-			dux_merge_evt(du._du, EV_ADD|EV_ENABLE|EV_CLEAR, 1, 0, 0);
+			// consumed by dux_merge_evt()
+			_dispatch_retain_unote_owner(du);
+			dispatch_assert(!dux_needs_rearm(du._du));
+			os_atomic_store2o(du._dr, ds_pending_data, 1, relaxed);
+			dux_merge_evt(du._du, EV_ADD|EV_ENABLE|EV_CLEAR, 1, 0);
 		}
 	} else {
 		dispatch_assume(rc == -1 && errno == EAGAIN);
@@ -571,17 +559,27 @@
 
 	if (events & EPOLLIN) {
 		data = _dispatch_get_buffer_size(dmn, false);
-		TAILQ_FOREACH_SAFE(dul, &dmn->dmn_readers_head, du_link, dul_next) {
+		LIST_FOREACH_SAFE(dul, &dmn->dmn_readers_head, du_link, dul_next) {
 			dispatch_unote_t du = _dispatch_unote_linkage_get_unote(dul);
-			dux_merge_evt(du._du, EV_ADD|EV_ENABLE|EV_DISPATCH, ~data, 0, 0);
+			// consumed by dux_merge_evt()
+			_dispatch_retain_unote_owner(du);
+			dispatch_assert(dux_needs_rearm(du._du));
+			_dispatch_unote_state_clear_bit(du, DU_STATE_ARMED);
+			os_atomic_store2o(du._dr, ds_pending_data, ~data, relaxed);
+			dux_merge_evt(du._du, EV_ADD|EV_ENABLE|EV_DISPATCH, data, 0);
 		}
 	}
 
 	if (events & EPOLLOUT) {
 		data = _dispatch_get_buffer_size(dmn, true);
-		TAILQ_FOREACH_SAFE(dul, &dmn->dmn_writers_head, du_link, dul_next) {
+		LIST_FOREACH_SAFE(dul, &dmn->dmn_writers_head, du_link, dul_next) {
 			dispatch_unote_t du = _dispatch_unote_linkage_get_unote(dul);
-			dux_merge_evt(du._du, EV_ADD|EV_ENABLE|EV_DISPATCH, ~data, 0, 0);
+			// consumed by dux_merge_evt()
+			_dispatch_retain_unote_owner(du);
+			dispatch_assert(dux_needs_rearm(du._du));
+			_dispatch_unote_state_clear_bit(du, DU_STATE_ARMED);
+			os_atomic_store2o(du._dr, ds_pending_data, ~data, relaxed);
+			dux_merge_evt(du._du, EV_ADD|EV_ENABLE|EV_DISPATCH, data, 0);
 		}
 	}
 
@@ -631,8 +629,12 @@
 			_dispatch_event_merge_timer(DISPATCH_CLOCK_WALL);
 			break;
 
-		case DISPATCH_EPOLL_CLOCK_MACH:
-			_dispatch_event_merge_timer(DISPATCH_CLOCK_MACH);
+		case DISPATCH_EPOLL_CLOCK_UPTIME:
+			_dispatch_event_merge_timer(DISPATCH_CLOCK_UPTIME);
+			break;
+
+		case DISPATCH_EPOLL_CLOCK_MONOTONIC:
+			_dispatch_event_merge_timer(DISPATCH_CLOCK_MONOTONIC);
 			break;
 
 		default:
@@ -651,6 +653,12 @@
 }
 
 void
+_dispatch_event_loop_cancel_waiter(dispatch_sync_context_t dsc)
+{
+	(void)dsc;
+}
+
+void
 _dispatch_event_loop_wake_owner(dispatch_sync_context_t dsc,
 		dispatch_wlh_t wlh, uint64_t old_state, uint64_t new_state)
 {
@@ -681,9 +689,9 @@
 #endif
 
 void
-_dispatch_event_loop_leave_immediate(dispatch_wlh_t wlh, uint64_t dq_state)
+_dispatch_event_loop_leave_immediate(uint64_t dq_state)
 {
-	(void)wlh; (void)dq_state;
+	(void)dq_state;
 }
 
 #endif // DISPATCH_EVENT_BACKEND_EPOLL
diff --git a/src/event/event_internal.h b/src/event/event_internal.h
index 76bce45..24b5412 100644
--- a/src/event/event_internal.h
+++ b/src/event/event_internal.h
@@ -29,29 +29,120 @@
 
 #include "event_config.h"
 
+/*
+ * The unote state has 3 pieces of information and reflects the state
+ * of the unote registration and mirrors the state of the knote if any.
+ *
+ * This state is peculiar in the sense that it can be read concurrently, but
+ * is never written to concurrently. This is achieved by serializing through
+ * kevent calls from appropriate synchronization context (referred as `dkq`
+ * for dispatch kevent queue in the dispatch source code).
+ *
+ * DU_STATE_ARMED
+ *
+ *   This bit represents the fact that the registration is active and may
+ *   receive events at any given time. This bit can only be set if the WLH bits
+ *   are set and the DU_STATE_NEEDS_DELETE bit is not.
+ *
+ * DU_STATE_NEEDS_DELETE
+ *
+ *   The kernel has indicated that it wants the next event for this unote to be
+ *   an unregistration. This bit can only be set if the DU_STATE_ARMED bit is
+ *   not set.
+ *
+ *   DU_STATE_NEEDS_DELETE may be the only bit set in the unote state
+ *
+ * DU_STATE_WLH_MASK
+ *
+ *   The most significant bits of du_state represent which event loop this unote
+ *   is registered with, and has a storage reference on it taken with
+ *   _dispatch_wlh_retain().
+ *
+ * Registration
+ *
+ *   Unote registration attempt is made with _dispatch_unote_register().
+ *   On succes, it will set the WLH bits and the DU_STATE_ARMED bit, on failure
+ *   the state is 0.
+ *
+ *   _dispatch_unote_register() must be called from the appropriate
+ *   synchronization context depending on the unote type.
+ *
+ * Event delivery
+ *
+ *   When an event is delivered for a unote type that requires explicit
+ *   re-arming (EV_DISPATCH or EV_ONESHOT), the DU_STATE_ARMED bit is cleared.
+ *   If the event is marked as EV_ONESHOT, then the DU_STATE_NEEDS_DELETE bit
+ *   is also set, initiating the "deferred delete" state machine.
+ *
+ *   For other unote types, the state isn't touched, unless the event is
+ *   EV_ONESHOT, in which case it causes an automatic unregistration.
+ *
+ * Unregistration
+ *
+ *   The unote owner can attempt unregistering the unote with
+ *   _dispatch_unote_unregister() from the proper synchronization context
+ *   at any given time. When successful, the state will be set to 0 and the
+ *   unote is no longer active. Unregistration is always successful for events
+ *   that don't require explcit re-arming.
+ *
+ *   When this unregistration fails, then the unote owner must wait for the
+ *   next event delivery for this unote.
+ */
+typedef uintptr_t dispatch_unote_state_t;
+#define DU_STATE_ARMED            ((dispatch_unote_state_t)0x1ul)
+#define DU_STATE_NEEDS_DELETE     ((dispatch_unote_state_t)0x2ul)
+#define DU_STATE_WLH_MASK         ((dispatch_unote_state_t)~0x3ul)
+#define DU_STATE_UNREGISTERED     ((dispatch_unote_state_t)0)
+
 struct dispatch_sync_context_s;
 typedef struct dispatch_wlh_s *dispatch_wlh_t; // opaque handle
-#define DISPATCH_WLH_ANON       ((dispatch_wlh_t)(void*)(~0ul))
-#define DISPATCH_WLH_MANAGER    ((dispatch_wlh_t)(void*)(~2ul))
+#define DISPATCH_WLH_ANON       ((dispatch_wlh_t)(void*)(~0x3ul))
+#define DISPATCH_WLH_MANAGER    ((dispatch_wlh_t)(void*)(~0x7ul))
 
-#define DISPATCH_UNOTE_DATA_ACTION_SIZE 2
+DISPATCH_ENUM(dispatch_unote_timer_flags, uint8_t,
+	/* DISPATCH_TIMER_STRICT 0x1 */
+	/* DISPATCH_TIMER_BACKGROUND = 0x2, */
+	DISPATCH_TIMER_CLOCK_UPTIME = DISPATCH_CLOCK_UPTIME << 2,
+	DISPATCH_TIMER_CLOCK_MONOTONIC = DISPATCH_CLOCK_MONOTONIC << 2,
+	DISPATCH_TIMER_CLOCK_WALL = DISPATCH_CLOCK_WALL << 2,
+#define _DISPATCH_TIMER_CLOCK_MASK (0x3 << 2)
+	DISPATCH_TIMER_INTERVAL = 0x10,
+	/* DISPATCH_INTERVAL_UI_ANIMATION = 0x20 */ // See source_private.h
+	DISPATCH_TIMER_AFTER = 0x40,
+);
+
+static inline dispatch_clock_t
+_dispatch_timer_flags_to_clock(dispatch_unote_timer_flags_t flags)
+{
+	return (dispatch_clock_t)((flags & _DISPATCH_TIMER_CLOCK_MASK) >> 2);
+}
+
+static inline dispatch_unote_timer_flags_t
+_dispatch_timer_flags_from_clock(dispatch_clock_t clock)
+{
+	return (dispatch_unote_timer_flags_t)(clock << 2);
+}
 
 #define DISPATCH_UNOTE_CLASS_HEADER() \
 	dispatch_source_type_t du_type; \
 	uintptr_t du_owner_wref; /* "weak" back reference to the owner object */ \
-	dispatch_wlh_t du_wlh; \
+	os_atomic(dispatch_unote_state_t) du_state; \
 	uint32_t  du_ident; \
 	int8_t    du_filter; \
-	os_atomic(bool) dmsr_notification_armed; \
-	uint16_t  du_data_action : DISPATCH_UNOTE_DATA_ACTION_SIZE; \
-	uint16_t  du_is_direct : 1; \
-	uint16_t  du_is_timer : 1; \
-	uint16_t  du_memorypressure_override : 1; \
-	uint16_t  du_vmpressure_override : 1; \
-	uint16_t  du_can_be_wlh : 1; \
-	uint16_t  dmr_async_reply : 1; \
-	uint16_t  dmrr_handler_is_block : 1; \
-	uint16_t  du_unused : 7; \
+	uint8_t   du_is_direct : 1; \
+	uint8_t   du_is_timer : 1; \
+	uint8_t   du_has_extended_status : 1; \
+	uint8_t   du_memorypressure_override : 1; \
+	uint8_t   du_vmpressure_override : 1; \
+	uint8_t   du_can_be_wlh : 1; \
+	uint8_t   dmrr_handler_is_block : 1; \
+	uint8_t   du_unused_flag : 1; \
+	union { \
+		uint8_t   du_timer_flags; \
+		os_atomic(bool) dmsr_notification_armed; \
+		bool dmr_reply_port_owned; \
+	}; \
+	uint8_t   du_unused; \
 	uint32_t  du_fflags; \
 	dispatch_priority_t du_priority
 
@@ -60,22 +151,10 @@
 #define _dispatch_source_from_refs(dr) \
 		((dispatch_source_t)_dispatch_wref2ptr((dr)->du_owner_wref))
 
-DISPATCH_ENUM(dispatch_unote_action, uint8_t,
-    DISPATCH_UNOTE_ACTION_DATA_OR = 0,
-    DISPATCH_UNOTE_ACTION_DATA_OR_STATUS_SET,
-    DISPATCH_UNOTE_ACTION_DATA_SET,
-    DISPATCH_UNOTE_ACTION_DATA_ADD,
-	DISPATCH_UNOTE_ACTION_LAST = DISPATCH_UNOTE_ACTION_DATA_ADD
-);
-_Static_assert(DISPATCH_UNOTE_ACTION_LAST <
-		(1 << DISPATCH_UNOTE_DATA_ACTION_SIZE),
-		"DISPATCH_UNOTE_ACTION_LAST too large for du_data_action field");
-
 typedef struct dispatch_unote_class_s {
 	DISPATCH_UNOTE_CLASS_HEADER();
 } *dispatch_unote_class_t;
 
-
 enum {
 	DS_EVENT_HANDLER = 0,
 	DS_CANCEL_HANDLER,
@@ -84,7 +163,23 @@
 
 #define DISPATCH_SOURCE_REFS_HEADER() \
 	DISPATCH_UNOTE_CLASS_HEADER(); \
-	struct dispatch_continuation_s *volatile ds_handler[3]
+	struct dispatch_continuation_s *volatile ds_handler[3]; \
+	uint64_t ds_data DISPATCH_ATOMIC64_ALIGN; \
+	uint64_t ds_pending_data DISPATCH_ATOMIC64_ALIGN
+
+
+// Extracts source data from the ds_data field
+#define DISPATCH_SOURCE_GET_DATA(d) ((d) & 0xFFFFFFFF)
+
+// Extracts status from the ds_data field
+#define DISPATCH_SOURCE_GET_STATUS(d) ((d) >> 32)
+
+// Combine data and status for the ds_data field
+#define DISPATCH_SOURCE_COMBINE_DATA_AND_STATUS(data, status) \
+		((((uint64_t)(status)) << 32) | (data))
+
+#define DISPATCH_TIMER_DISARMED_MARKER  1ul
+
 
 // Source state which may contain references to the source object
 // Separately allocated so that 'leaks' can see sources <rdar://problem/9050566>
@@ -125,11 +220,14 @@
 } *dispatch_timer_source_refs_t;
 
 typedef struct dispatch_timer_heap_s {
-	uint64_t dth_target, dth_deadline;
 	uint32_t dth_count;
-	uint16_t dth_segments;
-#define DTH_ARMED  1u
-	uint16_t dth_flags;
+	uint8_t dth_segments;
+	uint8_t dth_max_qos;
+#define DTH_DIRTY_GLOBAL   0x80
+#define DTH_DIRTY_QOS_MASK ((1u << DISPATCH_TIMER_QOS_COUNT) - 1)
+	uint8_t dth_dirty_bits; // Only used in the first heap
+	uint8_t dth_armed : 1;
+	uint8_t dth_needs_program : 1;
 	dispatch_timer_source_refs_t dth_min[DTH_ID_COUNT];
 	void **dth_heap;
 } *dispatch_timer_heap_t;
@@ -154,15 +252,21 @@
 
 struct dispatch_mach_reply_refs_s {
 	DISPATCH_UNOTE_CLASS_HEADER();
-	dispatch_priority_t dmr_priority;
+	pthread_priority_t dmr_priority : 32;
 	void *dmr_ctxt;
 	voucher_t dmr_voucher;
-	TAILQ_ENTRY(dispatch_mach_reply_refs_s) dmr_list;
-	mach_port_t dmr_waiter_tid;
+	LIST_ENTRY(dispatch_mach_reply_refs_s) dmr_list;
 };
 typedef struct dispatch_mach_reply_refs_s *dispatch_mach_reply_refs_t;
 
-#define _DISPATCH_MACH_STATE_UNUSED_MASK        0xffffffa000000000ull
+struct dispatch_mach_reply_wait_refs_s {
+	struct dispatch_mach_reply_refs_s dwr_refs;
+	mach_port_t dwr_waiter_tid;
+};
+typedef struct dispatch_mach_reply_wait_refs_s *dispatch_mach_reply_wait_refs_t;
+
+#define _DISPATCH_MACH_STATE_UNUSED_MASK        0xffffff8000000000ull
+#define DISPATCH_MACH_STATE_ENQUEUED            0x0000008000000000ull
 #define DISPATCH_MACH_STATE_DIRTY               0x0000002000000000ull
 #define DISPATCH_MACH_STATE_PENDING_BARRIER     0x0000001000000000ull
 #define DISPATCH_MACH_STATE_RECEIVED_OVERRIDE   0x0000000800000000ull
@@ -172,29 +276,31 @@
 
 struct dispatch_mach_send_refs_s {
 	DISPATCH_UNOTE_CLASS_HEADER();
-	dispatch_mach_msg_t dmsr_checkin;
-	TAILQ_HEAD(, dispatch_mach_reply_refs_s) dmsr_replies;
 	dispatch_unfair_lock_s dmsr_replies_lock;
-#define DISPATCH_MACH_DISCONNECT_MAGIC_BASE (0x80000000)
-#define DISPATCH_MACH_NEVER_INSTALLED (DISPATCH_MACH_DISCONNECT_MAGIC_BASE + 0)
-#define DISPATCH_MACH_NEVER_CONNECTED (DISPATCH_MACH_DISCONNECT_MAGIC_BASE + 1)
-	uint32_t volatile dmsr_disconnect_cnt;
+	dispatch_mach_msg_t dmsr_checkin;
+	LIST_HEAD(, dispatch_mach_reply_refs_s) dmsr_replies;
+#define DISPATCH_MACH_NEVER_CONNECTED      0x80000000
 	DISPATCH_UNION_LE(uint64_t volatile dmsr_state,
 		dispatch_unfair_lock_s dmsr_state_lock,
 		uint32_t dmsr_state_bits
 	) DISPATCH_ATOMIC64_ALIGN;
 	struct dispatch_object_s *volatile dmsr_tail;
 	struct dispatch_object_s *volatile dmsr_head;
+	uint32_t volatile dmsr_disconnect_cnt;
 	mach_port_t dmsr_send, dmsr_checkin_port;
 };
 typedef struct dispatch_mach_send_refs_s *dispatch_mach_send_refs_t;
 
+bool _dispatch_mach_notification_armed(dispatch_mach_send_refs_t dmsr);
 void _dispatch_mach_notification_set_armed(dispatch_mach_send_refs_t dmsr);
 
 struct dispatch_xpc_term_refs_s {
 	DISPATCH_UNOTE_CLASS_HEADER();
 };
 typedef struct dispatch_xpc_term_refs_s *dispatch_xpc_term_refs_t;
+void _dispatch_sync_ipc_handoff_begin(dispatch_wlh_t wlh, mach_port_t port,
+		uint64_t _Atomic *addr);
+void _dispatch_sync_ipc_handoff_end(dispatch_wlh_t wlh, mach_port_t port);
 #endif // HAVE_MACH
 
 typedef union dispatch_unote_u {
@@ -211,7 +317,7 @@
 
 #define DISPATCH_UNOTE_NULL ((dispatch_unote_t){ ._du = NULL })
 
-#if TARGET_OS_EMBEDDED
+#if TARGET_OS_IPHONE
 #define DSL_HASH_SIZE  64u // must be a power of two
 #else
 #define DSL_HASH_SIZE 256u // must be a power of two
@@ -219,26 +325,33 @@
 #define DSL_HASH(x) ((x) & (DSL_HASH_SIZE - 1))
 
 typedef struct dispatch_unote_linkage_s {
-	TAILQ_ENTRY(dispatch_unote_linkage_s) du_link;
+	LIST_ENTRY(dispatch_unote_linkage_s) du_link;
 	struct dispatch_muxnote_s *du_muxnote;
 } DISPATCH_ATOMIC64_ALIGN *dispatch_unote_linkage_t;
 
-#define DU_UNREGISTER_IMMEDIATE_DELETE 0x01
-#define DU_UNREGISTER_ALREADY_DELETED  0x02
-#define DU_UNREGISTER_DISCONNECTED     0x04
-#define DU_UNREGISTER_REPLY_REMOVE     0x08
+DISPATCH_ENUM(dispatch_unote_action, uint8_t,
+	DISPATCH_UNOTE_ACTION_PASS_DATA,        // pass ke->data
+	DISPATCH_UNOTE_ACTION_PASS_FFLAGS,      // pass ke->fflags
+	DISPATCH_UNOTE_ACTION_SOURCE_OR_FFLAGS, // ds_pending_data |= ke->fflags
+	DISPATCH_UNOTE_ACTION_SOURCE_SET_DATA,  // ds_pending_data = ~ke->data
+	DISPATCH_UNOTE_ACTION_SOURCE_ADD_DATA,  // ds_pending_data += ke->data
+	DISPATCH_UNOTE_ACTION_SOURCE_TIMER,     // timer
+);
 
 typedef struct dispatch_source_type_s {
 	const char *dst_kind;
 	int8_t     dst_filter;
+	dispatch_unote_action_t dst_action;
 	uint8_t    dst_per_trigger_qos : 1;
+	uint8_t    dst_strict : 1;
+	uint8_t    dst_timer_flags;
 	uint16_t   dst_flags;
+#if DISPATCH_EVENT_BACKEND_KEVENT
+	uint16_t   dst_data;
+#endif
 	uint32_t   dst_fflags;
 	uint32_t   dst_mask;
 	uint32_t   dst_size;
-#if DISPATCH_EVENT_BACKEND_KEVENT
-	uint32_t   dst_data;
-#endif
 
 	dispatch_unote_t (*dst_create)(dispatch_source_type_t dst,
 			uintptr_t handle, uintptr_t mask);
@@ -246,26 +359,31 @@
 	bool (*dst_update_mux)(struct dispatch_muxnote_s *dmn);
 #endif
 	void (*dst_merge_evt)(dispatch_unote_t du, uint32_t flags, uintptr_t data,
-			uintptr_t status, pthread_priority_t pp);
+			pthread_priority_t pp);
 #if HAVE_MACH
 	void (*dst_merge_msg)(dispatch_unote_t du, uint32_t flags,
-			mach_msg_header_t *msg, mach_msg_size_t sz);
+			mach_msg_header_t *msg, mach_msg_size_t sz,
+			pthread_priority_t msg_pp, pthread_priority_t override_pp);
 #endif
 } dispatch_source_type_s;
 
 #define dux_create(dst, handle, mask)	(dst)->dst_create(dst, handle, mask)
-#define dux_merge_evt(du, ...)	(du)->du_type->dst_merge_evt(du, __VA_ARGS__)
-#define dux_merge_msg(du, ...)	(du)->du_type->dst_merge_msg(du, __VA_ARGS__)
+#define dux_type(du)           (du)->du_type
+#define dux_needs_rearm(du)    (dux_type(du)->dst_flags & (EV_ONESHOT | EV_DISPATCH))
+#define dux_merge_evt(du, ...) dux_type(du)->dst_merge_evt(du, __VA_ARGS__)
+#define dux_merge_msg(du, ...) dux_type(du)->dst_merge_msg(du, __VA_ARGS__)
 
 extern const dispatch_source_type_s _dispatch_source_type_after;
 
 #if HAVE_MACH
-extern const dispatch_source_type_s _dispatch_source_type_mach_recv_direct;
+extern const dispatch_source_type_s _dispatch_mach_type_notification;
 extern const dispatch_source_type_s _dispatch_mach_type_send;
 extern const dispatch_source_type_s _dispatch_mach_type_recv;
 extern const dispatch_source_type_s _dispatch_mach_type_reply;
 extern const dispatch_source_type_s _dispatch_xpc_type_sigterm;
+#define DISPATCH_MACH_TYPE_WAITER ((const dispatch_source_type_s *)-2)
 #endif
+extern const dispatch_source_type_s _dispatch_source_type_timer_with_clock;
 
 #pragma mark -
 #pragma mark deferred items
@@ -282,9 +400,10 @@
 #define DISPATCH_DEFERRED_ITEMS_EVENT_COUNT 16
 
 typedef struct dispatch_deferred_items_s {
-	dispatch_queue_t ddi_stashed_rq;
+	dispatch_queue_global_t ddi_stashed_rq;
 	dispatch_object_t ddi_stashed_dou;
 	dispatch_qos_t ddi_stashed_qos;
+	dispatch_wlh_t ddi_wlh;
 #if DISPATCH_EVENT_BACKEND_KEVENT
 	dispatch_kevent_t ddi_eventlist;
 	uint16_t ddi_nevents;
@@ -338,17 +457,92 @@
 }
 
 DISPATCH_ALWAYS_INLINE
-static inline bool
-_dispatch_unote_registered(dispatch_unote_t du)
+static inline dispatch_wlh_t
+_du_state_wlh(dispatch_unote_state_t du_state)
 {
-	return du._du->du_wlh != NULL;
+	return (dispatch_wlh_t)(du_state & DU_STATE_WLH_MASK);
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline bool
+_du_state_registered(dispatch_unote_state_t du_state)
+{
+	return du_state != DU_STATE_UNREGISTERED;
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline bool
+_du_state_armed(dispatch_unote_state_t du_state)
+{
+	return du_state & DU_STATE_ARMED;
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline bool
+_du_state_needs_delete(dispatch_unote_state_t du_state)
+{
+	return du_state & DU_STATE_NEEDS_DELETE;
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline bool
+_du_state_needs_rearm(dispatch_unote_state_t du_state)
+{
+	return _du_state_registered(du_state) && !_du_state_armed(du_state) &&
+			!_du_state_needs_delete(du_state);
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline dispatch_unote_state_t
+_dispatch_unote_state(dispatch_unote_t du)
+{
+	return os_atomic_load(&du._du->du_state, relaxed);
+}
+#define _dispatch_unote_wlh(du) \
+		_du_state_wlh(_dispatch_unote_state(du))
+#define _dispatch_unote_registered(du) \
+		_du_state_registered(_dispatch_unote_state(du))
+#define _dispatch_unote_armed(du) \
+		_du_state_armed(_dispatch_unote_state(du))
+#define _dispatch_unote_needs_delete(du) \
+		_du_state_needs_delete(_dispatch_unote_state(du))
+#define _dispatch_unote_needs_rearm(du) \
+		_du_state_needs_rearm(_dispatch_unote_state(du))
+
+DISPATCH_ALWAYS_INLINE DISPATCH_OVERLOADABLE
+static inline void
+_dispatch_unote_state_set(dispatch_unote_t du, dispatch_unote_state_t value)
+{
+	os_atomic_store(&du._du->du_state, value, relaxed);
+}
+
+DISPATCH_ALWAYS_INLINE DISPATCH_OVERLOADABLE
+static inline void
+_dispatch_unote_state_set(dispatch_unote_t du, dispatch_wlh_t wlh,
+		dispatch_unote_state_t bits)
+{
+	_dispatch_unote_state_set(du, (dispatch_unote_state_t)wlh | bits);
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_unote_state_set_bit(dispatch_unote_t du, dispatch_unote_state_t bit)
+{
+	_dispatch_unote_state_set(du, _dispatch_unote_state(du) | bit);
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_unote_state_clear_bit(dispatch_unote_t du, dispatch_unote_state_t bit)
+{
+	_dispatch_unote_state_set(du, _dispatch_unote_state(du) & ~bit);
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline bool
 _dispatch_unote_wlh_changed(dispatch_unote_t du, dispatch_wlh_t expected_wlh)
 {
-	dispatch_wlh_t wlh = du._du->du_wlh;
+	dispatch_wlh_t wlh = _dispatch_unote_wlh(du);
 	return wlh && wlh != DISPATCH_WLH_ANON && wlh != expected_wlh;
 }
 
@@ -362,13 +556,6 @@
 }
 
 DISPATCH_ALWAYS_INLINE
-static inline bool
-_dispatch_unote_needs_rearm(dispatch_unote_t du)
-{
-	return du._du->du_type->dst_flags & (EV_ONESHOT | EV_DISPATCH);
-}
-
-DISPATCH_ALWAYS_INLINE
 static inline dispatch_unote_t
 _dispatch_unote_linkage_get_unote(dispatch_unote_linkage_t dul)
 {
@@ -377,6 +564,27 @@
 
 #endif // DISPATCH_PURE_C
 
+DISPATCH_ALWAYS_INLINE
+static inline unsigned long
+_dispatch_timer_unote_compute_missed(dispatch_timer_source_refs_t dt,
+		uint64_t now, unsigned long prev)
+{
+	uint64_t missed = (now - dt->dt_timer.target) / dt->dt_timer.interval;
+	if (++missed + prev > LONG_MAX) {
+		missed = LONG_MAX - prev;
+	}
+	if (dt->dt_timer.interval < INT64_MAX) {
+		uint64_t push_by = missed * dt->dt_timer.interval;
+		dt->dt_timer.target += push_by;
+		dt->dt_timer.deadline += push_by;
+	} else {
+		dt->dt_timer.target = UINT64_MAX;
+		dt->dt_timer.deadline = UINT64_MAX;
+	}
+	prev += missed;
+	return prev;
+}
+
 #pragma mark -
 #pragma mark prototypes
 
@@ -390,20 +598,19 @@
 #define DISPATCH_TIMER_QOS_COUNT        1u
 #endif
 
-#define DISPATCH_TIMER_QOS(tidx)   (((uintptr_t)(tidx) >> 1) & 3u)
-#define DISPATCH_TIMER_CLOCK(tidx) (dispatch_clock_t)((tidx) & 1u)
+#define DISPATCH_TIMER_QOS(tidx)   ((uint32_t)(tidx) % DISPATCH_TIMER_QOS_COUNT)
+#define DISPATCH_TIMER_CLOCK(tidx) (dispatch_clock_t)((tidx) / DISPATCH_TIMER_QOS_COUNT)
 
-#define DISPATCH_TIMER_INDEX(clock, qos) ((qos) << 1 | (clock))
+#define DISPATCH_TIMER_INDEX(clock, qos) (((clock) * DISPATCH_TIMER_QOS_COUNT) + (qos))
 #define DISPATCH_TIMER_COUNT \
-		DISPATCH_TIMER_INDEX(0, DISPATCH_TIMER_QOS_COUNT)
+		DISPATCH_TIMER_INDEX(DISPATCH_CLOCK_COUNT, 0)
+// Workloops do not support optimizing WALL timers
+#define DISPATCH_TIMER_WLH_COUNT \
+		DISPATCH_TIMER_INDEX(DISPATCH_CLOCK_WALL, 0)
+
 #define DISPATCH_TIMER_IDENT_CANCELED    (~0u)
 
 extern struct dispatch_timer_heap_s _dispatch_timers_heap[DISPATCH_TIMER_COUNT];
-extern bool _dispatch_timers_reconfigure, _dispatch_timers_expired;
-extern uint32_t _dispatch_timers_processing_mask;
-#if DISPATCH_USE_DTRACE
-extern uint32_t _dispatch_timers_will_wake;
-#endif
 
 dispatch_unote_t _dispatch_unote_create_with_handle(dispatch_source_type_t dst,
 		uintptr_t handle, uintptr_t mask);
@@ -411,12 +618,44 @@
 		uintptr_t handle, uintptr_t mask);
 dispatch_unote_t _dispatch_unote_create_without_handle(
 		dispatch_source_type_t dst, uintptr_t handle, uintptr_t mask);
+void _dispatch_unote_dispose(dispatch_unote_t du);
 
+/*
+ * @const DUU_DELETE_ACK
+ * Unregistration can acknowledge the "needs-delete" state of a unote.
+ * There must be some sort of synchronization between callers passing this flag
+ * for a given unote.
+ *
+ * @const DUU_PROBE
+ * This flag is passed for the first unregistration attempt of a unote.
+ * When passed, it allows the unregistration to speculatively try to do the
+ * unregistration syscalls and maybe get lucky. If the flag isn't passed,
+ * unregistration will preflight the attempt, and will not perform any syscall
+ * if it cannot guarantee their success.
+ *
+ * @const DUU_MUST_SUCCEED
+ * The caller expects the unregistration to always succeeed.
+ * _dispatch_unote_unregister will either crash or return true.
+ */
+#define DUU_DELETE_ACK   0x1
+#define DUU_PROBE        0x2
+#define DUU_MUST_SUCCEED 0x4
+bool _dispatch_unote_unregister(dispatch_unote_t du, uint32_t flags);
 bool _dispatch_unote_register(dispatch_unote_t du, dispatch_wlh_t wlh,
 		dispatch_priority_t pri);
 void _dispatch_unote_resume(dispatch_unote_t du);
-bool _dispatch_unote_unregister(dispatch_unote_t du, uint32_t flags);
-void _dispatch_unote_dispose(dispatch_unote_t du);
+
+bool _dispatch_unote_unregister_muxed(dispatch_unote_t du);
+bool _dispatch_unote_register_muxed(dispatch_unote_t du);
+void _dispatch_unote_resume_muxed(dispatch_unote_t du);
+
+#if DISPATCH_HAVE_DIRECT_KNOTES
+bool _dispatch_unote_unregister_direct(dispatch_unote_t du, uint32_t flags);
+bool _dispatch_unote_register_direct(dispatch_unote_t du, dispatch_wlh_t wlh);
+void _dispatch_unote_resume_direct(dispatch_unote_t du);
+#endif
+
+void _dispatch_timer_unote_configure(dispatch_timer_source_refs_t dt);
 
 #if !DISPATCH_EVENT_BACKEND_WINDOWS
 void _dispatch_event_loop_atfork_child(void);
@@ -425,6 +664,7 @@
 #define DISPATCH_EVENT_LOOP_OVERRIDE  0x80000000
 void _dispatch_event_loop_poke(dispatch_wlh_t wlh, uint64_t dq_state,
 		uint32_t flags);
+void _dispatch_event_loop_cancel_waiter(struct dispatch_sync_context_s *dsc);
 void _dispatch_event_loop_wake_owner(struct dispatch_sync_context_s *dsc,
 		dispatch_wlh_t wlh, uint64_t old_state, uint64_t new_state);
 void _dispatch_event_loop_wait_for_ownership(
@@ -437,15 +677,36 @@
 #undef _dispatch_event_loop_assert_not_owned
 #define _dispatch_event_loop_assert_not_owned(wlh) ((void)wlh)
 #endif
-void _dispatch_event_loop_leave_immediate(dispatch_wlh_t wlh, uint64_t dq_state);
+void _dispatch_event_loop_leave_immediate(uint64_t dq_state);
 #if DISPATCH_EVENT_BACKEND_KEVENT
-void _dispatch_event_loop_leave_deferred(dispatch_wlh_t wlh,
+void _dispatch_event_loop_leave_deferred(dispatch_deferred_items_t ddi,
 		uint64_t dq_state);
 void _dispatch_event_loop_merge(dispatch_kevent_t events, int nevents);
 #endif
 void _dispatch_event_loop_drain(uint32_t flags);
-void _dispatch_event_loop_timer_arm(unsigned int tidx,
+
+void _dispatch_event_loop_timer_arm(dispatch_timer_heap_t dth, uint32_t tidx,
 		dispatch_timer_delay_s range, dispatch_clock_now_cache_t nows);
-void _dispatch_event_loop_timer_delete(unsigned int tidx);
+void _dispatch_event_loop_timer_delete(dispatch_timer_heap_t dth, uint32_t tidx);
+
+void _dispatch_event_loop_drain_timers(dispatch_timer_heap_t dth, uint32_t count);
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_timers_heap_dirty(dispatch_timer_heap_t dth, uint32_t tidx)
+{
+	// Note: the dirty bits are only maintained in the first heap for any tidx
+	dth[0].dth_dirty_bits |= (1 << DISPATCH_TIMER_QOS(tidx)) | DTH_DIRTY_GLOBAL;
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_event_loop_drain_anon_timers(void)
+{
+	if (_dispatch_timers_heap[0].dth_dirty_bits) {
+		_dispatch_event_loop_drain_timers(_dispatch_timers_heap,
+				DISPATCH_TIMER_COUNT);
+	}
+}
 
 #endif /* __DISPATCH_EVENT_EVENT_INTERNAL__ */
diff --git a/src/event/event_kevent.c b/src/event/event_kevent.c
index 29c20e1..16b69b3 100644
--- a/src/event/event_kevent.c
+++ b/src/event/event_kevent.c
@@ -35,57 +35,49 @@
 #define dispatch_kevent_udata_t  __typeof__(((dispatch_kevent_t)NULL)->udata)
 
 typedef struct dispatch_muxnote_s {
-	TAILQ_ENTRY(dispatch_muxnote_s) dmn_list;
-	TAILQ_HEAD(, dispatch_unote_linkage_s) dmn_unotes_head;
-	dispatch_wlh_t dmn_wlh;
-	dispatch_kevent_s dmn_kev;
+	LIST_ENTRY(dispatch_muxnote_s) dmn_list;
+	LIST_HEAD(, dispatch_unote_linkage_s) dmn_unotes_head;
+	dispatch_kevent_s dmn_kev DISPATCH_ATOMIC64_ALIGN;
 } *dispatch_muxnote_t;
 
-static bool _dispatch_timers_force_max_leeway;
-static int _dispatch_kq = -1;
-static struct {
-	dispatch_once_t pred;
-	dispatch_unfair_lock_s lock;
-} _dispatch_muxnotes;
-#if !DISPATCH_USE_KEVENT_WORKQUEUE
-#define _dispatch_muxnotes_lock() \
-		_dispatch_unfair_lock_lock(&_dispatch_muxnotes.lock)
-#define _dispatch_muxnotes_unlock() \
-		_dispatch_unfair_lock_unlock(&_dispatch_muxnotes.lock)
-#else
-#define _dispatch_muxnotes_lock()
-#define _dispatch_muxnotes_unlock()
-#endif // !DISPATCH_USE_KEVENT_WORKQUEUE
+LIST_HEAD(dispatch_muxnote_bucket_s, dispatch_muxnote_s);
 
-DISPATCH_CACHELINE_ALIGN
-static TAILQ_HEAD(dispatch_muxnote_bucket_s, dispatch_muxnote_s)
-_dispatch_sources[DSL_HASH_SIZE];
+DISPATCH_STATIC_GLOBAL(bool _dispatch_timers_force_max_leeway);
+DISPATCH_STATIC_GLOBAL(dispatch_once_t _dispatch_kq_poll_pred);
+DISPATCH_STATIC_GLOBAL(struct dispatch_muxnote_bucket_s _dispatch_sources[DSL_HASH_SIZE]);
 
 #if defined(__APPLE__)
-#define DISPATCH_NOTE_CLOCK_WALL NOTE_MACH_CONTINUOUS_TIME
-#define DISPATCH_NOTE_CLOCK_MACH 0
+#define DISPATCH_NOTE_CLOCK_WALL      NOTE_NSECONDS | NOTE_MACH_CONTINUOUS_TIME
+#define DISPATCH_NOTE_CLOCK_MONOTONIC NOTE_MACHTIME | NOTE_MACH_CONTINUOUS_TIME
+#define DISPATCH_NOTE_CLOCK_UPTIME    NOTE_MACHTIME
 #else
-#define DISPATCH_NOTE_CLOCK_WALL 0
-#define DISPATCH_NOTE_CLOCK_MACH 0
+#define DISPATCH_NOTE_CLOCK_WALL      0
+#define DISPATCH_NOTE_CLOCK_MONOTONIC 0
+#define DISPATCH_NOTE_CLOCK_UPTIME    0
 #endif
 
 static const uint32_t _dispatch_timer_index_to_fflags[] = {
 #define DISPATCH_TIMER_FFLAGS_INIT(kind, qos, note) \
 	[DISPATCH_TIMER_INDEX(DISPATCH_CLOCK_##kind, DISPATCH_TIMER_QOS_##qos)] = \
-			DISPATCH_NOTE_CLOCK_##kind | NOTE_ABSOLUTE | \
-			NOTE_NSECONDS | NOTE_LEEWAY | (note)
+			DISPATCH_NOTE_CLOCK_##kind | NOTE_ABSOLUTE | NOTE_LEEWAY | (note)
 	DISPATCH_TIMER_FFLAGS_INIT(WALL, NORMAL, 0),
-	DISPATCH_TIMER_FFLAGS_INIT(MACH, NORMAL, 0),
+	DISPATCH_TIMER_FFLAGS_INIT(UPTIME, NORMAL, 0),
+	DISPATCH_TIMER_FFLAGS_INIT(MONOTONIC, NORMAL, 0),
 #if DISPATCH_HAVE_TIMER_QOS
 	DISPATCH_TIMER_FFLAGS_INIT(WALL, CRITICAL, NOTE_CRITICAL),
-	DISPATCH_TIMER_FFLAGS_INIT(MACH, CRITICAL, NOTE_CRITICAL),
+	DISPATCH_TIMER_FFLAGS_INIT(UPTIME, CRITICAL, NOTE_CRITICAL),
+	DISPATCH_TIMER_FFLAGS_INIT(MONOTONIC, CRITICAL, NOTE_CRITICAL),
 	DISPATCH_TIMER_FFLAGS_INIT(WALL, BACKGROUND, NOTE_BACKGROUND),
-	DISPATCH_TIMER_FFLAGS_INIT(MACH, BACKGROUND, NOTE_BACKGROUND),
+	DISPATCH_TIMER_FFLAGS_INIT(UPTIME, BACKGROUND, NOTE_BACKGROUND),
+	DISPATCH_TIMER_FFLAGS_INIT(MONOTONIC, BACKGROUND, NOTE_BACKGROUND),
 #endif
 #undef DISPATCH_TIMER_FFLAGS_INIT
 };
 
-static void _dispatch_kevent_timer_drain(dispatch_kevent_t ke);
+static inline void _dispatch_kevent_timer_drain(dispatch_kevent_t ke);
+#if DISPATCH_USE_KEVENT_WORKLOOP
+static void _dispatch_kevent_workloop_poke_drain(dispatch_kevent_t ke);
+#endif
 
 #pragma mark -
 #pragma mark kevent debug
@@ -117,9 +109,13 @@
 #ifdef EVFILT_MEMORYSTATUS
 	_evfilt2(EVFILT_MEMORYSTATUS);
 #endif
+#if DISPATCH_USE_KEVENT_WORKLOOP
+	_evfilt2(EVFILT_WORKLOOP);
+#endif // DISPATCH_USE_KEVENT_WORKLOOP
 #endif // DISPATCH_EVENT_BACKEND_KEVENT
 
 	_evfilt2(DISPATCH_EVFILT_TIMER);
+	_evfilt2(DISPATCH_EVFILT_TIMER_WITH_CLOCK);
 	_evfilt2(DISPATCH_EVFILT_CUSTOM_ADD);
 	_evfilt2(DISPATCH_EVFILT_CUSTOM_OR);
 	_evfilt2(DISPATCH_EVFILT_CUSTOM_REPLACE);
@@ -238,6 +234,12 @@
 #define _dispatch_kevent_wlh_debug(verb, kev)  ((void)verb, (void)kev)
 #endif // DISPATCH_WLH_DEBUG
 
+#define _dispatch_du_debug(what, du) \
+		_dispatch_debug("kevent-source[%p]: %s kevent[%p] " \
+				"{ filter = %s, ident = 0x%x }", \
+				_dispatch_wref2ptr((du)->du_owner_wref), what, \
+				(du), _evfiltstr((du)->du_filter), (du)->du_ident)
+
 #if DISPATCH_MACHPORT_DEBUG
 #ifndef MACH_PORT_TYPE_SPREQUEST
 #define MACH_PORT_TYPE_SPREQUEST 0x40000000
@@ -304,8 +306,8 @@
 
 #if HAVE_MACH
 
-static dispatch_once_t _dispatch_mach_host_port_pred;
-static mach_port_t _dispatch_mach_host_port;
+DISPATCH_STATIC_GLOBAL(dispatch_once_t _dispatch_mach_host_port_pred);
+DISPATCH_STATIC_GLOBAL(mach_port_t _dispatch_mach_host_port);
 
 static inline void*
 _dispatch_kevent_mach_msg_buf(dispatch_kevent_t ke)
@@ -332,7 +334,7 @@
 // - data is used to monitor the actual state of the
 //   mach_port_request_notification()
 // - ext[0] is a boolean that trackes whether the notification is armed or not
-#define DISPATCH_MACH_NOTIFICATION_ARMED(dk) ((dk)->ext[0])
+#define DISPATCH_MACH_NOTIFICATION_ARMED(dmn) ((dmn)->dmn_kev.ext[0])
 #endif
 
 DISPATCH_ALWAYS_INLINE
@@ -362,6 +364,7 @@
 static void
 _dispatch_kevent_print_error(dispatch_kevent_t ke)
 {
+	dispatch_unote_class_t du = NULL;
 	_dispatch_debug("kevent[0x%llx]: handling error",
 			(unsigned long long)ke->udata);
 	if (ke->flags & EV_DELETE) {
@@ -376,61 +379,137 @@
 	} else if (_dispatch_kevent_unote_is_muxed(ke)) {
 		ke->flags |= _dispatch_kevent_get_muxnote(ke)->dmn_kev.flags;
 	} else if (ke->udata) {
-		if (!_dispatch_unote_registered(_dispatch_kevent_get_unote(ke))) {
+		du = (dispatch_unote_class_t)(uintptr_t)ke->udata;
+		if (!_dispatch_unote_registered(du)) {
 			ke->flags |= EV_ADD;
 		}
 	}
 
-#if HAVE_MACH
-	if (ke->filter == EVFILT_MACHPORT && ke->data == ENOTSUP &&
-			(ke->flags & EV_ADD) && (ke->fflags & MACH_RCV_MSG)) {
-		DISPATCH_INTERNAL_CRASH(ke->ident,
-				"Missing EVFILT_MACHPORT support for ports");
-	}
-#endif
-
-	if (ke->data) {
+	switch (ke->data) {
+	case 0:
+		return;
+	case ERANGE: /* A broken QoS was passed to kevent_id() */
+		DISPATCH_INTERNAL_CRASH(ke->qos, "Invalid kevent priority");
+	default:
 		// log the unexpected error
 		_dispatch_bug_kevent_client("kevent", _evfiltstr(ke->filter),
 				!ke->udata ? NULL :
 				ke->flags & EV_DELETE ? "delete" :
 				ke->flags & EV_ADD ? "add" :
 				ke->flags & EV_ENABLE ? "enable" : "monitor",
-				(int)ke->data);
+				(int)ke->data, ke->ident, ke->udata, du);
 	}
 }
 
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_kevent_merge_ev_flags(dispatch_unote_t du, uint32_t flags)
+{
+	if (unlikely(!(flags & EV_UDATA_SPECIFIC) && (flags & EV_ONESHOT))) {
+		_dispatch_unote_unregister(du, DUU_DELETE_ACK | DUU_MUST_SUCCEED);
+		return;
+	}
+
+	if (flags & EV_DELETE) {
+		// When a speculative deletion is requested by libdispatch,
+		// and the kernel is about to deliver an event, it can acknowledge
+		// our wish by delivering the event as a (EV_DELETE | EV_ONESHOT)
+		// event and dropping the knote at once.
+		_dispatch_unote_state_set(du, DU_STATE_NEEDS_DELETE);
+	} else if (flags & (EV_ONESHOT | EV_VANISHED)) {
+		// EV_VANISHED events if re-enabled will produce another EV_VANISHED
+		// event. To avoid an infinite loop of such events, mark the unote
+		// as needing deletion so that _dispatch_unote_needs_rearm()
+		// eventually returns false.
+		//
+		// mach channels crash on EV_VANISHED, and dispatch sources stay
+		// in a limbo until canceled (explicitly or not).
+		dispatch_unote_state_t du_state = _dispatch_unote_state(du);
+		du_state |= DU_STATE_NEEDS_DELETE;
+		du_state &= ~DU_STATE_ARMED;
+		_dispatch_unote_state_set(du, du_state);
+	} else if (likely(flags & EV_DISPATCH)) {
+		_dispatch_unote_state_clear_bit(du, DU_STATE_ARMED);
+	} else {
+		return;
+	}
+
+	_dispatch_du_debug((flags & EV_VANISHED) ? "vanished" :
+			(flags & EV_DELETE) ? "deleted oneshot" :
+			(flags & EV_ONESHOT) ? "oneshot" : "disarmed", du._du);
+}
+
 DISPATCH_NOINLINE
 static void
 _dispatch_kevent_merge(dispatch_unote_t du, dispatch_kevent_t ke)
 {
-	uintptr_t data;
-	uintptr_t status = 0;
+	dispatch_unote_action_t action = dux_type(du._du)->dst_action;
 	pthread_priority_t pp = 0;
-#if DISPATCH_USE_KEVENT_QOS
-	pp = ((pthread_priority_t)ke->qos) & ~_PTHREAD_PRIORITY_FLAGS_MASK;
+	uintptr_t data;
+
+	// once we modify the queue atomic flags below, it will allow concurrent
+	// threads running _dispatch_source_invoke2 to dispose of the source,
+	// so we can't safely borrow the reference we get from the muxnote udata
+	// anymore, and need our own <rdar://20382435>
+	_dispatch_retain_unote_owner(du);
+
+	switch (action) {
+	case DISPATCH_UNOTE_ACTION_PASS_DATA:
+		data = (uintptr_t)ke->data;
+		break;
+
+	case DISPATCH_UNOTE_ACTION_PASS_FFLAGS:
+		data = (uintptr_t)ke->fflags;
+#if HAVE_MACH
+		if (du._du->du_filter == EVFILT_MACHPORT) {
+			data = DISPATCH_MACH_RECV_MESSAGE;
+		}
 #endif
-	dispatch_unote_action_t action = du._du->du_data_action;
-	if (action == DISPATCH_UNOTE_ACTION_DATA_SET) {
+		break;
+
+	case DISPATCH_UNOTE_ACTION_SOURCE_SET_DATA:
 		// ke->data is signed and "negative available data" makes no sense
 		// zero bytes happens when EV_EOF is set
 		dispatch_assert(ke->data >= 0l);
-		data = ~(unsigned long)ke->data;
-#if HAVE_MACH
-	} else if (du._du->du_filter == EVFILT_MACHPORT) {
-		data = DISPATCH_MACH_RECV_MESSAGE;
-#endif
-	} else if (action == DISPATCH_UNOTE_ACTION_DATA_ADD) {
 		data = (unsigned long)ke->data;
-	} else if (action == DISPATCH_UNOTE_ACTION_DATA_OR) {
+		os_atomic_store2o(du._dr, ds_pending_data, ~data, relaxed);
+		break;
+
+	case DISPATCH_UNOTE_ACTION_SOURCE_ADD_DATA:
+		data = (unsigned long)ke->data;
+		if (data) os_atomic_add2o(du._dr, ds_pending_data, data, relaxed);
+		break;
+
+	case DISPATCH_UNOTE_ACTION_SOURCE_OR_FFLAGS:
 		data = ke->fflags & du._du->du_fflags;
-	} else if (action == DISPATCH_UNOTE_ACTION_DATA_OR_STATUS_SET) {
-		data = ke->fflags & du._du->du_fflags;
-		status = (unsigned long)ke->data;
-	} else {
+		if (du._dr->du_has_extended_status) {
+			uint64_t odata, ndata, value;
+			uint32_t status = (uint32_t)ke->data;
+
+			// We combine the data and status into a single 64-bit value.
+			value = DISPATCH_SOURCE_COMBINE_DATA_AND_STATUS(data, status);
+			os_atomic_rmw_loop2o(du._dr, ds_pending_data, odata, ndata, relaxed, {
+				ndata = DISPATCH_SOURCE_GET_DATA(odata) | value;
+			});
+#if HAVE_MACH
+		} else if (du._du->du_filter == EVFILT_MACHPORT) {
+			data = DISPATCH_MACH_RECV_MESSAGE;
+			os_atomic_store2o(du._dr, ds_pending_data, data, relaxed);
+#endif
+		} else {
+			if (data) os_atomic_or2o(du._dr, ds_pending_data, data, relaxed);
+		}
+		break;
+
+	default:
 		DISPATCH_INTERNAL_CRASH(action, "Corrupt unote action");
 	}
-	return dux_merge_evt(du._du, ke->flags, data, status, pp);
+
+	_dispatch_kevent_merge_ev_flags(du, ke->flags);
+#if DISPATCH_USE_KEVENT_QOS
+	pp = ((pthread_priority_t)ke->qos) & ~_PTHREAD_PRIORITY_FLAGS_MASK;
+#endif
+	return dux_merge_evt(du._du, ke->flags, data, pp);
 }
 
 DISPATCH_NOINLINE
@@ -440,7 +519,11 @@
 	dispatch_muxnote_t dmn = _dispatch_kevent_get_muxnote(ke);
 	dispatch_unote_linkage_t dul, dul_next;
 
-	TAILQ_FOREACH_SAFE(dul, &dmn->dmn_unotes_head, du_link, dul_next) {
+	if (ke->flags & (EV_ONESHOT | EV_DELETE)) {
+		// tell _dispatch_unote_unregister_muxed() the kernel half is gone
+		dmn->dmn_kev.flags |= EV_DELETE;
+	}
+	LIST_FOREACH_SAFE(dul, &dmn->dmn_unotes_head, du_link, dul_next) {
 		_dispatch_kevent_merge(_dispatch_unote_linkage_get_unote(dul), ke);
 	}
 }
@@ -453,16 +536,20 @@
 		_dispatch_kevent_mgr_debug("received", ke);
 		return;
 	}
+#if DISPATCH_USE_KEVENT_WORKLOOP
+	if (ke->filter == EVFILT_WORKLOOP) {
+		return _dispatch_kevent_workloop_poke_drain(ke);
+	}
+#endif // DISPATCH_USE_KEVENT_WORKLOOP
 	_dispatch_kevent_debug("received", ke);
 	if (unlikely(ke->flags & EV_ERROR)) {
 		if (ke->filter == EVFILT_PROC && ke->data == ESRCH) {
-			// EVFILT_PROC may fail with ESRCH when the process exists but is a zombie
-			// <rdar://problem/5067725>. As a workaround, we simulate an exit event for
-			// any EVFILT_PROC with an invalid pid <rdar://problem/6626350>.
-			ke->flags &= ~(EV_ERROR | EV_ADD | EV_ENABLE | EV_UDATA_SPECIFIC);
-			ke->flags |= EV_ONESHOT;
+			// <rdar://problem/5067725&6626350> EVFILT_PROC may fail with ESRCH
+			// when the process exists but is a zombie. As a workaround, we
+			// simulate an exit event for any EVFILT_PROC with an invalid pid.
+			ke->flags  = EV_UDATA_SPECIFIC | EV_ONESHOT | EV_DELETE;
 			ke->fflags = NOTE_EXIT;
-			ke->data = 0;
+			ke->data   = 0;
 			_dispatch_kevent_debug("synthetic NOTE_EXIT", ke);
 		} else {
 			return _dispatch_kevent_print_error(ke);
@@ -473,10 +560,8 @@
 	}
 
 #if HAVE_MACH
-	if (ke->filter == EVFILT_MACHPORT) {
-		if (_dispatch_kevent_mach_msg_size(ke)) {
-			return _dispatch_kevent_mach_msg_drain(ke);
-		}
+	if (ke->filter == EVFILT_MACHPORT && _dispatch_kevent_mach_msg_size(ke)) {
+		return _dispatch_kevent_mach_msg_drain(ke);
 	}
 #endif
 
@@ -490,8 +575,8 @@
 
 #if DISPATCH_USE_MGR_THREAD
 DISPATCH_NOINLINE
-static int
-_dispatch_kq_create(const void *guard_ptr)
+static void
+_dispatch_kq_create(intptr_t *fd_ptr)
 {
 	static const dispatch_kevent_s kev = {
 		.ident = 1,
@@ -503,7 +588,7 @@
 
 	_dispatch_fork_becomes_unsafe();
 #if DISPATCH_USE_GUARDED_FD
-	guardid_t guard = (uintptr_t)guard_ptr;
+	guardid_t guard = (uintptr_t)fd_ptr;
 	kqfd = guarded_kqueue_np(&guard, GUARD_CLOSE | GUARD_DUP);
 #else
 	(void)guard_ptr;
@@ -534,10 +619,16 @@
 #else
 	dispatch_assume_zero(kevent(kqfd, &kev, 1, NULL, 0, NULL));
 #endif
-	return kqfd;
+	*fd_ptr = kqfd;
 }
 #endif
 
+static inline int
+_dispatch_kq_fd(void)
+{
+	return (int)(intptr_t)_dispatch_mgr_q.do_ctxt;
+}
+
 static void
 _dispatch_kq_init(void *context)
 {
@@ -553,7 +644,7 @@
 	_dispatch_kevent_workqueue_init();
 	if (_dispatch_kevent_workqueue_enabled) {
 		int r;
-		int kqfd = _dispatch_kq;
+		int kqfd = _dispatch_kq_fd();
 		const dispatch_kevent_s ke = {
 			.ident = 1,
 			.filter = EVFILT_USER,
@@ -579,7 +670,8 @@
 	}
 #endif // DISPATCH_USE_KEVENT_WORKQUEUE
 #if DISPATCH_USE_MGR_THREAD
-	_dispatch_kq = _dispatch_kq_create(&_dispatch_mgr_q);
+	_dispatch_kq_create((intptr_t *)&_dispatch_mgr_q.do_ctxt);
+	_dispatch_trace_item_push(_dispatch_mgr_q.do_targetq, &_dispatch_mgr_q);
 	dx_push(_dispatch_mgr_q.do_targetq, &_dispatch_mgr_q, 0);
 #endif // DISPATCH_USE_MGR_THREAD
 }
@@ -596,11 +688,10 @@
 		dispatch_kevent_t ke_out, int n_out, void *buf, size_t *avail,
 		uint32_t flags)
 {
-	static dispatch_once_t pred;
 	bool kq_initialized = false;
 	int r = 0;
 
-	dispatch_once_f(&pred, &kq_initialized, _dispatch_kq_init);
+	dispatch_once_f(&_dispatch_kq_poll_pred, &kq_initialized, _dispatch_kq_init);
 	if (unlikely(kq_initialized)) {
 		// The calling thread was the one doing the initialization
 		//
@@ -611,7 +702,6 @@
 		_voucher_activity_debug_channel_init();
 	}
 
-
 #if !DISPATCH_USE_KEVENT_QOS
 	if (flags & KEVENT_FLAG_ERROR_EVENTS) {
 		// emulate KEVENT_FLAG_ERROR_EVENTS
@@ -623,8 +713,10 @@
 #endif
 
 retry:
-	if (wlh == DISPATCH_WLH_ANON) {
-		int kqfd = _dispatch_kq;
+	if (unlikely(wlh == NULL)) {
+		DISPATCH_INTERNAL_CRASH(wlh, "Invalid wlh");
+	} else if (wlh == DISPATCH_WLH_ANON) {
+		int kqfd = _dispatch_kq_fd();
 #if DISPATCH_USE_KEVENT_QOS
 		if (_dispatch_kevent_workqueue_enabled) {
 			flags |= KEVENT_FLAG_WORKQ;
@@ -637,6 +729,14 @@
 		if (flags & KEVENT_FLAG_IMMEDIATE) timeout = &timeout_immediately;
 		r = kevent(kqfd, ke, n, ke_out, n_out, timeout);
 #endif
+#if DISPATCH_USE_KEVENT_WORKLOOP
+	} else {
+		flags |= KEVENT_FLAG_WORKLOOP;
+		if (!(flags & KEVENT_FLAG_ERROR_EVENTS)) {
+			flags |= KEVENT_FLAG_DYNAMIC_KQ_MUST_EXIST;
+		}
+		r = kevent_id((uintptr_t)wlh, ke, n, ke_out, n_out, buf, avail, flags);
+#endif // DISPATCH_USE_KEVENT_WORKLOOP
 	}
 	if (unlikely(r == -1)) {
 		int err = errno;
@@ -648,6 +748,14 @@
 			goto retry;
 		case EBADF:
 			DISPATCH_CLIENT_CRASH(err, "Do not close random Unix descriptors");
+#if DISPATCH_USE_KEVENT_WORKLOOP
+		case ENOENT:
+			if ((flags & KEVENT_FLAG_ERROR_EVENTS) &&
+					(flags & KEVENT_FLAG_DYNAMIC_KQ_MUST_EXIST)) {
+				return 0;
+			}
+			/* FALLTHROUGH */
+#endif // DISPATCH_USE_KEVENT_WORKLOOP
 		default:
 			DISPATCH_CLIENT_CRASH(err, "Unexpected error from kevent");
 		}
@@ -696,6 +804,14 @@
 			}
 		}
 	} else {
+#if DISPATCH_USE_KEVENT_WORKLOOP
+		if (ke_out[0].flags & EV_ERROR) {
+			// When kevent returns errors it doesn't process the kqueue
+			// and doesn't rearm the return-to-kernel notification
+			// We need to assume we have to go back.
+			_dispatch_set_return_to_kernel();
+		}
+#endif // DISPATCH_USE_KEVENT_WORKLOOP
 		for (i = 0, r = 0; i < n; i++) {
 			_dispatch_kevent_drain(&ke_out[i]);
 		}
@@ -725,13 +841,13 @@
 		uint16_t action)
 {
 	dispatch_unote_class_t du = _du._du;
-	dispatch_source_type_t dst = du->du_type;
+	dispatch_source_type_t dst = dux_type(du);
 	uint16_t flags = dst->dst_flags | action;
 
 	if ((flags & EV_VANISHED) && !(flags & EV_ADD)) {
 		flags &= ~EV_VANISHED;
 	}
-	pthread_priority_t pp = _dispatch_priority_to_pp(du->du_priority);
+
 	*dk = (dispatch_kevent_s){
 		.ident  = du->du_ident,
 		.filter = dst->dst_filter,
@@ -740,7 +856,8 @@
 		.fflags = du->du_fflags | dst->dst_fflags,
 		.data   = (__typeof__(dk->data))dst->dst_data,
 #if DISPATCH_USE_KEVENT_QOS
-		.qos    = (__typeof__(dk->qos))pp,
+		.qos    = (__typeof__(dk->qos))_dispatch_priority_to_pp_prefer_fallback(
+				du->du_priority),
 #endif
 	};
 	(void)pp; // if DISPATCH_USE_KEVENT_QOS == 0
@@ -799,7 +916,7 @@
 {
 	dispatch_deferred_items_t ddi = _dispatch_deferred_items_get();
 
-	if (ddi && ddi->ddi_maxevents && wlh == _dispatch_get_wlh()) {
+	if (ddi && ddi->ddi_wlh == wlh && ddi->ddi_maxevents) {
 		int slot = _dispatch_kq_deferred_find_slot(ddi, ke->filter, ke->ident,
 				ke->udata);
 		dispatch_kevent_t dk = _dispatch_kq_deferred_reuse_slot(wlh, ddi, slot);
@@ -817,7 +934,7 @@
 _dispatch_kq_immediate_update(dispatch_wlh_t wlh, dispatch_kevent_t ke)
 {
 	dispatch_deferred_items_t ddi = _dispatch_deferred_items_get();
-	if (ddi && wlh == _dispatch_get_wlh()) {
+	if (ddi && ddi->ddi_wlh == wlh) {
 		int slot = _dispatch_kq_deferred_find_slot(ddi, ke->filter, ke->ident,
 				ke->udata);
 		_dispatch_kq_deferred_discard_slot(ddi, slot);
@@ -825,6 +942,49 @@
 	return _dispatch_kq_update_one(wlh, ke);
 }
 
+#if HAVE_MACH
+void
+_dispatch_sync_ipc_handoff_begin(dispatch_wlh_t wlh, mach_port_t port,
+		uint64_t _Atomic *addr)
+{
+#ifdef NOTE_WL_SYNC_IPC
+	dispatch_kevent_s ke = {
+		.ident  = port,
+		.filter = EVFILT_WORKLOOP,
+		.flags  = EV_ADD | EV_DISABLE,
+		.fflags = NOTE_WL_SYNC_IPC | NOTE_WL_IGNORE_ESTALE,
+		.udata  = (uintptr_t)wlh,
+		.ext[EV_EXTIDX_WL_ADDR]  = (uintptr_t)addr,
+		.ext[EV_EXTIDX_WL_MASK]  = ~(uintptr_t)0,
+		.ext[EV_EXTIDX_WL_VALUE] = (uintptr_t)wlh,
+	};
+	int rc = _dispatch_kq_immediate_update(wlh, &ke);
+	if (unlikely(rc)) {
+		DISPATCH_INTERNAL_CRASH(rc, "Unexpected error from kevent");
+	}
+#else
+	(void)wlh; (void)port; (void)addr;
+#endif
+}
+
+void
+_dispatch_sync_ipc_handoff_end(dispatch_wlh_t wlh, mach_port_t port)
+{
+#ifdef NOTE_WL_SYNC_IPC
+	dispatch_kevent_s ke = {
+		.ident  = port,
+		.filter = EVFILT_WORKLOOP,
+		.flags  = EV_ADD | EV_DELETE | EV_ENABLE,
+		.fflags = NOTE_WL_SYNC_IPC,
+		.udata  = (uintptr_t)wlh,
+	};
+	_dispatch_kq_deferred_update(wlh, &ke);
+#else
+	(void)wlh; (void)port;
+#endif // NOTE_WL_SYNC_IPC
+}
+#endif
+
 DISPATCH_NOINLINE
 static bool
 _dispatch_kq_unote_update(dispatch_wlh_t wlh, dispatch_unote_t _du,
@@ -837,13 +997,12 @@
 
 	if (action_flags & EV_ADD) {
 		// as soon as we register we may get an event delivery and it has to
-		// see du_wlh already set, else it will not unregister the kevent
-		dispatch_assert(du->du_wlh == NULL);
+		// see du_state already set, else it will not unregister the kevent
 		_dispatch_wlh_retain(wlh);
-		du->du_wlh = wlh;
+		_dispatch_unote_state_set(du, wlh, DU_STATE_ARMED);
 	}
 
-	if (ddi && wlh == _dispatch_get_wlh()) {
+	if (ddi && ddi->ddi_wlh == wlh) {
 		int slot = _dispatch_kq_deferred_find_slot(ddi,
 				du->du_filter, du->du_ident, (dispatch_kevent_udata_t)du);
 		if (slot < ddi->ddi_nevents) {
@@ -873,18 +1032,24 @@
 done:
 	if (action_flags & EV_ADD) {
 		if (unlikely(r)) {
-			_dispatch_wlh_release(du->du_wlh);
-			du->du_wlh = NULL;
+			_dispatch_wlh_release(wlh);
+			_dispatch_unote_state_set(du, DU_STATE_UNREGISTERED);
+		} else {
+			_dispatch_du_debug("installed", du);
 		}
 		return r == 0;
 	}
 
 	if (action_flags & EV_DELETE) {
 		if (r == EINPROGRESS) {
+			_dispatch_du_debug("deferred delete", du);
 			return false;
 		}
-		_dispatch_wlh_release(du->du_wlh);
-		du->du_wlh = NULL;
+		_dispatch_wlh_release(wlh);
+		_dispatch_unote_state_set(du, DU_STATE_UNREGISTERED);
+		_dispatch_du_debug("deleted", du);
+	} else if (action_flags & EV_ENABLE) {
+		_dispatch_du_debug("rearmed", du);
 	}
 
 	dispatch_assume_zero(r);
@@ -893,15 +1058,6 @@
 
 #pragma mark dispatch_muxnote_t
 
-static void
-_dispatch_muxnotes_init(void *ctxt DISPATCH_UNUSED)
-{
-	uint32_t i;
-	for (i = 0; i < DSL_HASH_SIZE; i++) {
-		TAILQ_INIT(&_dispatch_sources[i]);
-	}
-}
-
 DISPATCH_ALWAYS_INLINE
 static inline struct dispatch_muxnote_bucket_s *
 _dispatch_muxnote_bucket(uint64_t ident, int16_t filter)
@@ -919,7 +1075,6 @@
 		break;
 	}
 
-	dispatch_once_f(&_dispatch_muxnotes.pred, NULL, _dispatch_muxnotes_init);
 	return &_dispatch_sources[DSL_HASH((uintptr_t)ident)];
 }
 #define _dispatch_unote_muxnote_bucket(du) \
@@ -928,21 +1083,16 @@
 DISPATCH_ALWAYS_INLINE
 static inline dispatch_muxnote_t
 _dispatch_muxnote_find(struct dispatch_muxnote_bucket_s *dmb,
-		dispatch_wlh_t wlh, uint64_t ident, int16_t filter)
+		uint64_t ident, int16_t filter)
 {
 	dispatch_muxnote_t dmn;
-	_dispatch_muxnotes_lock();
-	TAILQ_FOREACH(dmn, dmb, dmn_list) {
-		if (dmn->dmn_wlh == wlh && dmn->dmn_kev.ident == ident &&
-				dmn->dmn_kev.filter == filter) {
+	LIST_FOREACH(dmn, dmb, dmn_list) {
+		if (dmn->dmn_kev.ident == ident && dmn->dmn_kev.filter == filter) {
 			break;
 		}
 	}
-	_dispatch_muxnotes_unlock();
 	return dmn;
 }
-#define _dispatch_unote_muxnote_find(dmb, du, wlh) \
-		_dispatch_muxnote_find(dmb, wlh, du._du->du_ident, du._du->du_filter)
 
 #if HAVE_MACH
 DISPATCH_ALWAYS_INLINE
@@ -951,52 +1101,47 @@
 {
 	struct dispatch_muxnote_bucket_s *dmb;
 	dmb = _dispatch_muxnote_bucket(name, filter);
-	return _dispatch_muxnote_find(dmb, DISPATCH_WLH_ANON, name, filter);
+	return _dispatch_muxnote_find(dmb, name, filter);
 }
 #endif
 
-DISPATCH_NOINLINE
-static bool
-_dispatch_unote_register_muxed(dispatch_unote_t du, dispatch_wlh_t wlh)
+bool
+_dispatch_unote_register_muxed(dispatch_unote_t du)
 {
 	struct dispatch_muxnote_bucket_s *dmb = _dispatch_unote_muxnote_bucket(du);
 	dispatch_muxnote_t dmn;
 	bool installed = true;
 
-	dmn = _dispatch_unote_muxnote_find(dmb, du, wlh);
+	dmn = _dispatch_muxnote_find(dmb, du._du->du_ident, du._du->du_filter);
 	if (dmn) {
 		uint32_t flags = du._du->du_fflags & ~dmn->dmn_kev.fflags;
 		if (flags) {
 			dmn->dmn_kev.fflags |= flags;
-			if (unlikely(du._du->du_type->dst_update_mux)) {
-				installed = du._du->du_type->dst_update_mux(dmn);
+			if (unlikely(dux_type(du._du)->dst_update_mux)) {
+				installed = dux_type(du._du)->dst_update_mux(dmn);
 			} else {
-				installed = !_dispatch_kq_immediate_update(dmn->dmn_wlh,
+				installed = !_dispatch_kq_immediate_update(DISPATCH_WLH_ANON,
 						&dmn->dmn_kev);
 			}
 			if (!installed) dmn->dmn_kev.fflags &= ~flags;
 		}
 	} else {
 		dmn = _dispatch_calloc(1, sizeof(struct dispatch_muxnote_s));
-		TAILQ_INIT(&dmn->dmn_unotes_head);
 		_dispatch_kq_unote_set_kevent(du, &dmn->dmn_kev, EV_ADD | EV_ENABLE);
 #if DISPATCH_USE_KEVENT_QOS
 		dmn->dmn_kev.qos = _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
 #endif
 		dmn->dmn_kev.udata = (dispatch_kevent_udata_t)((uintptr_t)dmn |
 				DISPATCH_KEVENT_MUXED_MARKER);
-		dmn->dmn_wlh = wlh;
-		if (unlikely(du._du->du_type->dst_update_mux)) {
-			installed = du._du->du_type->dst_update_mux(dmn);
+		if (unlikely(dux_type(du._du)->dst_update_mux)) {
+			installed = dux_type(du._du)->dst_update_mux(dmn);
 		} else {
-			installed = !_dispatch_kq_immediate_update(dmn->dmn_wlh,
+			installed = !_dispatch_kq_immediate_update(DISPATCH_WLH_ANON,
 					&dmn->dmn_kev);
 		}
 		if (installed) {
 			dmn->dmn_kev.flags &= ~(EV_ADD | EV_VANISHED);
-			_dispatch_muxnotes_lock();
-			TAILQ_INSERT_TAIL(dmb, dmn, dmn_list);
-			_dispatch_muxnotes_unlock();
+			LIST_INSERT_HEAD(dmb, dmn, dmn_list);
 		} else {
 			free(dmn);
 		}
@@ -1004,60 +1149,36 @@
 
 	if (installed) {
 		dispatch_unote_linkage_t dul = _dispatch_unote_get_linkage(du);
-		TAILQ_INSERT_TAIL(&dmn->dmn_unotes_head, dul, du_link);
-		dul->du_muxnote = dmn;
-
+		LIST_INSERT_HEAD(&dmn->dmn_unotes_head, dul, du_link);
 #if HAVE_MACH
 		if (du._du->du_filter == DISPATCH_EVFILT_MACH_NOTIFICATION) {
-			bool armed = DISPATCH_MACH_NOTIFICATION_ARMED(&dmn->dmn_kev);
-			os_atomic_store2o(du._dmsr, dmsr_notification_armed, armed,relaxed);
+			os_atomic_store2o(du._dmsr, dmsr_notification_armed,
+					DISPATCH_MACH_NOTIFICATION_ARMED(dmn), relaxed);
 		}
-		du._du->du_wlh = DISPATCH_WLH_ANON;
 #endif
+		dul->du_muxnote = dmn;
+		_dispatch_unote_state_set(du, DISPATCH_WLH_ANON, DU_STATE_ARMED);
+		_dispatch_du_debug("installed", du._du);
 	}
 	return installed;
 }
 
-bool
-_dispatch_unote_register(dispatch_unote_t du, dispatch_wlh_t wlh,
-		dispatch_priority_t pri)
-{
-	dispatch_assert(!_dispatch_unote_registered(du));
-	du._du->du_priority = pri;
-	switch (du._du->du_filter) {
-	case DISPATCH_EVFILT_CUSTOM_ADD:
-	case DISPATCH_EVFILT_CUSTOM_OR:
-	case DISPATCH_EVFILT_CUSTOM_REPLACE:
-		du._du->du_wlh = DISPATCH_WLH_ANON;
-		return true;
-	}
-	if (!du._du->du_is_direct) {
-		return _dispatch_unote_register_muxed(du, DISPATCH_WLH_ANON);
-	}
-	return _dispatch_kq_unote_update(wlh, du, EV_ADD | EV_ENABLE);
-}
-
 void
-_dispatch_unote_resume(dispatch_unote_t du)
+_dispatch_unote_resume_muxed(dispatch_unote_t du)
 {
-	dispatch_assert(_dispatch_unote_registered(du));
-
-	if (du._du->du_is_direct) {
-		dispatch_wlh_t wlh = du._du->du_wlh;
-		_dispatch_kq_unote_update(wlh, du, EV_ENABLE);
-	} else if (unlikely(du._du->du_type->dst_update_mux)) {
+	_dispatch_unote_state_set_bit(du, DU_STATE_ARMED);
+	if (unlikely(dux_type(du._du)->dst_update_mux)) {
 		dispatch_unote_linkage_t dul = _dispatch_unote_get_linkage(du);
-		du._du->du_type->dst_update_mux(dul->du_muxnote);
+		dux_type(du._du)->dst_update_mux(dul->du_muxnote);
 	} else {
 		dispatch_unote_linkage_t dul = _dispatch_unote_get_linkage(du);
 		dispatch_muxnote_t dmn = dul->du_muxnote;
-		_dispatch_kq_deferred_update(dmn->dmn_wlh, &dmn->dmn_kev);
+		_dispatch_kq_deferred_update(DISPATCH_WLH_ANON, &dmn->dmn_kev);
 	}
 }
 
-DISPATCH_NOINLINE
-static bool
-_dispatch_unote_unregister_muxed(dispatch_unote_t du, uint32_t flags)
+bool
+_dispatch_unote_unregister_muxed(dispatch_unote_t du)
 {
 	dispatch_unote_linkage_t dul = _dispatch_unote_get_linkage(du);
 	dispatch_muxnote_t dmn = dul->du_muxnote;
@@ -1068,19 +1189,18 @@
 		os_atomic_store2o(du._dmsr, dmsr_notification_armed, false, relaxed);
 	}
 #endif
-
-	dispatch_assert(du._du->du_wlh == DISPATCH_WLH_ANON);
-	du._du->du_wlh = NULL;
-	TAILQ_REMOVE(&dmn->dmn_unotes_head, dul, du_link);
-	_TAILQ_TRASH_ENTRY(dul, du_link);
+	_dispatch_unote_state_set(du, DU_STATE_UNREGISTERED);
+	LIST_REMOVE(dul, du_link);
+	_LIST_TRASH_ENTRY(dul, du_link);
 	dul->du_muxnote = NULL;
 
-	if (TAILQ_EMPTY(&dmn->dmn_unotes_head)) {
+	if (LIST_EMPTY(&dmn->dmn_unotes_head)) {
+		dispose = true;
+		update = !(dmn->dmn_kev.flags & EV_DELETE);
 		dmn->dmn_kev.flags |= EV_DELETE;
-		update = dispose = true;
 	} else {
-		uint32_t fflags = du._du->du_type->dst_fflags;
-		TAILQ_FOREACH(dul, &dmn->dmn_unotes_head, du_link) {
+		uint32_t fflags = dux_type(du._du)->dst_fflags;
+		LIST_FOREACH(dul, &dmn->dmn_unotes_head, du_link) {
 			du = _dispatch_unote_linkage_get_unote(dul);
 			fflags |= du._du->du_fflags;
 		}
@@ -1089,55 +1209,114 @@
 			update = true;
 		}
 	}
-	if (update && !(flags & DU_UNREGISTER_ALREADY_DELETED)) {
-		if (unlikely(du._du->du_type->dst_update_mux)) {
-			dispatch_assume(du._du->du_type->dst_update_mux(dmn));
+	if (update) {
+		if (unlikely(dux_type(du._du)->dst_update_mux)) {
+			dispatch_assume(dux_type(du._du)->dst_update_mux(dmn));
 		} else {
-			_dispatch_kq_deferred_update(dmn->dmn_wlh, &dmn->dmn_kev);
+			_dispatch_kq_deferred_update(DISPATCH_WLH_ANON, &dmn->dmn_kev);
 		}
 	}
 	if (dispose) {
-		struct dispatch_muxnote_bucket_s *dmb;
-		dmb = _dispatch_muxnote_bucket(dmn->dmn_kev.ident, dmn->dmn_kev.filter);
-		_dispatch_muxnotes_lock();
-		TAILQ_REMOVE(dmb, dmn, dmn_list);
-		_dispatch_muxnotes_unlock();
+		LIST_REMOVE(dmn, dmn_list);
 		free(dmn);
 	}
+	_dispatch_du_debug("deleted", du._du);
 	return true;
 }
 
+#if DISPATCH_HAVE_DIRECT_KNOTES
+bool
+_dispatch_unote_register_direct(dispatch_unote_t du, dispatch_wlh_t wlh)
+{
+	return _dispatch_kq_unote_update(wlh, du, EV_ADD | EV_ENABLE);
+}
+
+void
+_dispatch_unote_resume_direct(dispatch_unote_t du)
+{
+	_dispatch_unote_state_set_bit(du, DU_STATE_ARMED);
+	_dispatch_kq_unote_update(_dispatch_unote_wlh(du), du, EV_ENABLE);
+}
+
 bool
-_dispatch_unote_unregister(dispatch_unote_t du, uint32_t flags)
+_dispatch_unote_unregister_direct(dispatch_unote_t du, uint32_t flags)
 {
-	switch (du._du->du_filter) {
-	case DISPATCH_EVFILT_CUSTOM_ADD:
-	case DISPATCH_EVFILT_CUSTOM_OR:
-	case DISPATCH_EVFILT_CUSTOM_REPLACE:
-		du._du->du_wlh = NULL;
-		return true;
+	dispatch_unote_state_t du_state = _dispatch_unote_state(du);
+	dispatch_wlh_t du_wlh = _du_state_wlh(du_state);
+	dispatch_deferred_items_t ddi = _dispatch_deferred_items_get();
+	uint16_t action = EV_DELETE;
+	if (likely(du_wlh != DISPATCH_WLH_ANON && ddi && ddi->ddi_wlh == du_wlh)) {
+#if DISPATCH_USE_KEVENT_WORKLOOP
+		// Workloops are special: event delivery and servicing a workloop
+		// cannot race because the kernel can reason about these.
+		// Unregistering from a workloop is always safe and should always
+		// succeed immediately.
+#endif
+		action |= EV_ENABLE;
+		flags |= DUU_DELETE_ACK | DUU_MUST_SUCCEED;
 	}
-	dispatch_wlh_t wlh = du._du->du_wlh;
-	if (wlh) {
-		if (!du._du->du_is_direct) {
-			return _dispatch_unote_unregister_muxed(du, flags);
+
+	if (!_du_state_needs_delete(du_state) || (flags & DUU_DELETE_ACK)) {
+		if (du_state == DU_STATE_NEEDS_DELETE) {
+			// There is no knote to unregister anymore, just do it.
+			_dispatch_unote_state_set(du, DU_STATE_UNREGISTERED);
+			_dispatch_du_debug("acknowledged deleted oneshot", du._du);
+			return true;
 		}
-		uint16_t action_flags;
-		if (flags & DU_UNREGISTER_ALREADY_DELETED) {
-			action_flags = 0;
-		} else if (flags & DU_UNREGISTER_IMMEDIATE_DELETE) {
-			action_flags = EV_DELETE | EV_ENABLE;
-		} else {
-			action_flags = EV_DELETE;
+		if (!_du_state_armed(du_state)) {
+			action |= EV_ENABLE;
+			flags |= DUU_MUST_SUCCEED;
 		}
-		return _dispatch_kq_unote_update(wlh, du, action_flags);
+		if ((action & EV_ENABLE) || (flags & DUU_PROBE)) {
+			if (_dispatch_kq_unote_update(du_wlh, du, action)) {
+				return true;
+			}
+		}
 	}
-	return true;
+	if (flags & DUU_MUST_SUCCEED) {
+		DISPATCH_INTERNAL_CRASH(0, "Unregistration failed");
+	}
+	return false;
 }
+#endif // DISPATCH_HAVE_DIRECT_KNOTES
 
 #pragma mark -
 #pragma mark dispatch_event_loop
 
+enum {
+	DISPATCH_WORKLOOP_ASYNC,
+	DISPATCH_WORKLOOP_ASYNC_FROM_SYNC,
+	DISPATCH_WORKLOOP_ASYNC_DISCOVER_SYNC,
+	DISPATCH_WORKLOOP_ASYNC_QOS_UPDATE,
+	DISPATCH_WORKLOOP_ASYNC_LEAVE,
+	DISPATCH_WORKLOOP_ASYNC_LEAVE_FROM_SYNC,
+	DISPATCH_WORKLOOP_ASYNC_LEAVE_FROM_TRANSFER,
+	DISPATCH_WORKLOOP_ASYNC_FORCE_END_OWNERSHIP,
+	DISPATCH_WORKLOOP_RETARGET,
+
+	DISPATCH_WORKLOOP_SYNC_WAIT,
+	DISPATCH_WORKLOOP_SYNC_WAKE,
+	DISPATCH_WORKLOOP_SYNC_FAKE,
+	DISPATCH_WORKLOOP_SYNC_END,
+};
+
+static char const * const _dispatch_workloop_actions[] = {
+	[DISPATCH_WORKLOOP_ASYNC]                       = "async",
+	[DISPATCH_WORKLOOP_ASYNC_FROM_SYNC]             = "async (from sync)",
+	[DISPATCH_WORKLOOP_ASYNC_DISCOVER_SYNC]         = "discover sync",
+	[DISPATCH_WORKLOOP_ASYNC_QOS_UPDATE]            = "qos update",
+	[DISPATCH_WORKLOOP_ASYNC_LEAVE]                 = "leave",
+	[DISPATCH_WORKLOOP_ASYNC_LEAVE_FROM_SYNC]       = "leave (from sync)",
+	[DISPATCH_WORKLOOP_ASYNC_LEAVE_FROM_TRANSFER]   = "leave (from transfer)",
+	[DISPATCH_WORKLOOP_ASYNC_FORCE_END_OWNERSHIP]   = "leave (forced)",
+	[DISPATCH_WORKLOOP_RETARGET]                    = "retarget",
+
+	[DISPATCH_WORKLOOP_SYNC_WAIT]                   = "sync-wait",
+	[DISPATCH_WORKLOOP_SYNC_FAKE]                   = "sync-fake",
+	[DISPATCH_WORKLOOP_SYNC_WAKE]                   = "sync-wake",
+	[DISPATCH_WORKLOOP_SYNC_END]                    = "sync-end",
+};
+
 void
 _dispatch_event_loop_atfork_child(void)
 {
@@ -1147,6 +1326,554 @@
 #endif
 }
 
+#if DISPATCH_USE_KEVENT_WORKLOOP
+#if DISPATCH_WLH_DEBUG
+/*
+ * Debug information for current thread & workloop:
+ *
+ * fflags:
+ * - NOTE_WL_THREAD_REQUEST is set if there is a thread request knote
+ * - NOTE_WL_SYNC_WAIT is set if there is at least one waiter
+ *
+ * ext[0]: 64bit thread ID of the owner if any
+ * ext[1]: 64bit thread ID of the servicer if any
+ * ext[2]: number of workloops owned by the caller thread
+ *
+ * If this interface is supported by the kernel, the returned error is EBUSY,
+ * if not it is EINVAL.
+ */
+static bool
+_dispatch_kevent_workloop_get_info(dispatch_wlh_t wlh, dispatch_kevent_t ke)
+{
+	uint32_t kev_flags = KEVENT_FLAG_IMMEDIATE | KEVENT_FLAG_ERROR_EVENTS |
+			KEVENT_FLAG_DYNAMIC_KQ_MUST_EXIST;
+	*ke = (dispatch_kevent_s){
+		.filter = EVFILT_WORKLOOP,
+		.flags  = EV_ADD | EV_ENABLE,
+	};
+	if (_dispatch_kq_poll(wlh, ke, 1, ke, 1, NULL, NULL, kev_flags)) {
+		dispatch_assert(ke->flags & EV_ERROR);
+		return ke->data == EBUSY;
+	}
+	*ke = (dispatch_kevent_s){
+		.flags = EV_ERROR,
+		.data  = ENOENT,
+	};
+	return true;
+}
+#endif
+
+DISPATCH_ALWAYS_INLINE
+static inline pthread_priority_t
+_dispatch_kevent_workloop_priority(dispatch_queue_t dq, int which,
+		dispatch_qos_t qos)
+{
+	dispatch_priority_t rq_pri = dq->do_targetq->dq_priority;
+	if (qos < _dispatch_priority_qos(rq_pri)) {
+		qos = _dispatch_priority_qos(rq_pri);
+	}
+	if (qos == DISPATCH_QOS_UNSPECIFIED) {
+#if 0 // we need to understand why this is happening first...
+		if (which != DISPATCH_WORKLOOP_ASYNC_FROM_SYNC) {
+			DISPATCH_INTERNAL_CRASH(which, "Should have had a QoS");
+		}
+#else
+		(void)which;
+#endif
+		//
+		// <rdar://32326125> When an enqueue happens right when a barrier ends,
+		// the barrier that ends may notice the next item before the enqueuer
+		// has had the time to set the max QoS on the queue.
+		//
+		// It is inconvenient to drop this thread request, and this case is rare
+		// enough that we instead ask for MAINTENANCE to avoid the kernel
+		// failing with ERANGE.
+		//
+		qos = DISPATCH_QOS_MAINTENANCE;
+	}
+	pthread_priority_t pp = _dispatch_qos_to_pp(qos);
+	return pp | (rq_pri & DISPATCH_PRIORITY_FLAG_OVERCOMMIT);
+}
+
+DISPATCH_ALWAYS_INLINE_NDEBUG
+static void
+_dispatch_kq_fill_workloop_event(dispatch_kevent_t ke, int which,
+		dispatch_wlh_t wlh, uint64_t dq_state)
+{
+	dispatch_queue_t dq = (dispatch_queue_t)wlh;
+	dispatch_qos_t qos = _dq_state_max_qos(dq_state);
+	pthread_priority_t pp = 0;
+	uint32_t fflags = 0;
+	uint64_t mask = 0;
+	uint16_t action = 0;
+
+	switch (which) {
+	case DISPATCH_WORKLOOP_ASYNC_FROM_SYNC:
+		fflags |= NOTE_WL_END_OWNERSHIP;
+		/* FALLTHROUGH */
+	case DISPATCH_WORKLOOP_ASYNC:
+	case DISPATCH_WORKLOOP_ASYNC_DISCOVER_SYNC:
+	case DISPATCH_WORKLOOP_ASYNC_QOS_UPDATE:
+		dispatch_assert(_dq_state_is_base_wlh(dq_state));
+		dispatch_assert(_dq_state_is_enqueued_on_target(dq_state));
+		action = EV_ADD | EV_ENABLE;
+		mask |= DISPATCH_QUEUE_ROLE_MASK;
+		mask |= DISPATCH_QUEUE_ENQUEUED;
+		mask |= DISPATCH_QUEUE_MAX_QOS_MASK;
+		if (which == DISPATCH_WORKLOOP_ASYNC_DISCOVER_SYNC) {
+			dispatch_assert(!_dq_state_in_sync_transfer(dq_state));
+			dispatch_assert(_dq_state_drain_locked(dq_state));
+			mask |= DISPATCH_QUEUE_SYNC_TRANSFER;
+			fflags |= NOTE_WL_DISCOVER_OWNER;
+		} else {
+			fflags |= NOTE_WL_IGNORE_ESTALE;
+		}
+		fflags |= NOTE_WL_UPDATE_QOS;
+		pp = _dispatch_kevent_workloop_priority(dq, which, qos);
+		break;
+
+	case DISPATCH_WORKLOOP_ASYNC_LEAVE_FROM_SYNC:
+		fflags |= NOTE_WL_END_OWNERSHIP;
+		/* FALLTHROUGH */
+	case DISPATCH_WORKLOOP_ASYNC_LEAVE_FROM_TRANSFER:
+		fflags |= NOTE_WL_IGNORE_ESTALE;
+		/* FALLTHROUGH */
+	case DISPATCH_WORKLOOP_ASYNC_LEAVE:
+		dispatch_assert(!_dq_state_is_enqueued_on_target(dq_state));
+		action = EV_ADD | EV_DELETE | EV_ENABLE;
+		mask |= DISPATCH_QUEUE_ENQUEUED;
+		break;
+
+	case DISPATCH_WORKLOOP_ASYNC_FORCE_END_OWNERSHIP:
+		// 0 is never a valid queue state, so the knote attach will fail due to
+		// the debounce. However, NOTE_WL_END_OWNERSHIP is always observed even
+		// when ESTALE is returned, which is the side effect we're after here.
+		fflags |= NOTE_WL_END_OWNERSHIP;
+		fflags |= NOTE_WL_IGNORE_ESTALE;
+		action = EV_ADD | EV_ENABLE;
+		mask = ~0ull;
+		dq_state = 0;
+		pp = _dispatch_kevent_workloop_priority(dq, which, qos);
+		break;
+
+	case DISPATCH_WORKLOOP_RETARGET:
+		action = EV_ADD | EV_DELETE | EV_ENABLE;
+		fflags |= NOTE_WL_END_OWNERSHIP;
+		break;
+
+	default:
+		DISPATCH_INTERNAL_CRASH(which, "Invalid transition");
+	}
+
+	*ke = (dispatch_kevent_s){
+		.ident  = (uintptr_t)wlh,
+		.filter = EVFILT_WORKLOOP,
+		.flags  = action,
+		.fflags = fflags | NOTE_WL_THREAD_REQUEST,
+		.qos    = (__typeof__(ke->qos))pp,
+		.udata  = (uintptr_t)wlh,
+
+		.ext[EV_EXTIDX_WL_ADDR]  = (uintptr_t)&dq->dq_state,
+		.ext[EV_EXTIDX_WL_MASK]  = mask,
+		.ext[EV_EXTIDX_WL_VALUE] = dq_state,
+	};
+	_dispatch_kevent_wlh_debug(_dispatch_workloop_actions[which], ke);
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_kq_fill_ddi_workloop_event(dispatch_deferred_items_t ddi,
+		int which, dispatch_wlh_t wlh, uint64_t dq_state)
+{
+	int slot = _dispatch_kq_deferred_find_slot(ddi, EVFILT_WORKLOOP,
+			(uint64_t)wlh, (uint64_t)wlh);
+	if (slot == ddi->ddi_nevents) {
+		dispatch_assert(slot < DISPATCH_DEFERRED_ITEMS_EVENT_COUNT);
+		ddi->ddi_nevents++;
+	}
+	_dispatch_kq_fill_workloop_event(&ddi->ddi_eventlist[slot],
+			which, wlh, dq_state);
+}
+
+DISPATCH_ALWAYS_INLINE_NDEBUG
+static void
+_dispatch_kq_fill_workloop_sync_event(dispatch_kevent_t ke, int which,
+		dispatch_wlh_t wlh, uint64_t dq_state, dispatch_tid tid)
+{
+	dispatch_queue_t dq = (dispatch_queue_t)wlh;
+	pthread_priority_t pp = 0;
+	uint32_t fflags = 0;
+	uint64_t mask = 0;
+	uint16_t action = 0;
+
+	switch (which) {
+	case DISPATCH_WORKLOOP_SYNC_WAIT:
+		action = EV_ADD | EV_DISABLE;
+		fflags = NOTE_WL_SYNC_WAIT;
+		pp     = _dispatch_get_priority();
+		if (_dispatch_qos_from_pp(pp) == 0) {
+			pp = _dispatch_qos_to_pp(DISPATCH_QOS_DEFAULT);
+		}
+		if (_dq_state_received_sync_wait(dq_state)) {
+			fflags |= NOTE_WL_DISCOVER_OWNER;
+			mask = DISPATCH_QUEUE_ROLE_MASK | DISPATCH_QUEUE_RECEIVED_SYNC_WAIT;
+		}
+		break;
+
+	case DISPATCH_WORKLOOP_SYNC_FAKE:
+		action = EV_ADD | EV_DISABLE;
+		fflags = NOTE_WL_SYNC_WAKE;
+		break;
+
+	case DISPATCH_WORKLOOP_SYNC_WAKE:
+		dispatch_assert(_dq_state_drain_locked_by(dq_state, tid));
+		action = EV_ADD | EV_DISABLE;
+		fflags = NOTE_WL_SYNC_WAKE | NOTE_WL_DISCOVER_OWNER;
+		break;
+
+	case DISPATCH_WORKLOOP_SYNC_END:
+		action = EV_DELETE | EV_ENABLE;
+		fflags = NOTE_WL_SYNC_WAKE | NOTE_WL_END_OWNERSHIP;
+		break;
+
+	default:
+		DISPATCH_INTERNAL_CRASH(which, "Invalid transition");
+	}
+
+	*ke = (dispatch_kevent_s){
+		.ident  = tid,
+		.filter = EVFILT_WORKLOOP,
+		.flags  = action,
+		.fflags = fflags,
+		.udata  = (uintptr_t)wlh,
+		.qos    = (__typeof__(ke->qos))pp,
+
+		.ext[EV_EXTIDX_WL_MASK] = mask,
+		.ext[EV_EXTIDX_WL_VALUE] = dq_state,
+	};
+	if (fflags & NOTE_WL_DISCOVER_OWNER) {
+		ke->ext[EV_EXTIDX_WL_ADDR] = (uintptr_t)&dq->dq_state;
+	}
+	_dispatch_kevent_wlh_debug(_dispatch_workloop_actions[which], ke);
+}
+
+#define DISPATCH_KEVENT_WORKLOOP_ALLOW_ENOENT 1
+#define DISPATCH_KEVENT_WORKLOOP_ALLOW_ESTALE 2
+#define DISPATCH_KEVENT_WORKLOOP_ALLOW_EINTR  4
+
+DISPATCH_ALWAYS_INLINE
+static inline int
+_dispatch_kevent_workloop_drain_error(dispatch_kevent_t ke, long flags)
+{
+	int err = (int)ke->data;
+
+	_dispatch_kevent_wlh_debug("received error", ke);
+	dispatch_assert(ke->flags & EV_ERROR);
+	//
+	// Clear the error so that we can use the same struct to redrive as is
+	// but leave a breadcrumb about the error in xflags for debugging
+	//
+	ke->flags &= ~EV_ERROR;
+	ke->xflags = (uint32_t)err;
+	ke->data = 0;
+
+	switch (err) {
+	case EINTR:
+		if ((flags & DISPATCH_KEVENT_WORKLOOP_ALLOW_EINTR) &&
+				(ke->fflags & NOTE_WL_SYNC_WAIT)) {
+			return EINTR;
+		}
+		break;
+	case ENOENT:
+		if ((flags & DISPATCH_KEVENT_WORKLOOP_ALLOW_ENOENT) &&
+				(ke->flags & EV_DELETE) && (ke->fflags & NOTE_WL_SYNC_WAKE) &&
+				(ke->fflags & NOTE_WL_END_OWNERSHIP)) {
+			//
+			// When breaking out a waiter because of a retarget, that waiter may
+			// not have made his wait syscall yet, and we can't really prepost
+			// an EV_DELETE, so we have to redrive on ENOENT in this case
+			//
+			return ENOENT;
+		}
+		break;
+	case ESTALE:
+		if ((flags & DISPATCH_KEVENT_WORKLOOP_ALLOW_ESTALE) &&
+				!(ke->fflags & NOTE_WL_IGNORE_ESTALE) &&
+				ke->ext[EV_EXTIDX_WL_ADDR] && ke->ext[EV_EXTIDX_WL_MASK]) {
+			return ESTALE;
+		}
+		break;
+	case ERANGE:
+		DISPATCH_INTERNAL_CRASH((uintptr_t)ke->qos, "Broken priority");
+	case EOWNERDEAD:
+		DISPATCH_CLIENT_CRASH((uintptr_t)ke->ext[EV_EXTIDX_WL_VALUE],
+				"Invalid workloop owner, possible memory corruption");
+	default:
+		break;
+	}
+	DISPATCH_INTERNAL_CRASH(err, "Unexpected error from kevent");
+}
+
+DISPATCH_ALWAYS_INLINE
+static void
+_dispatch_kevent_workloop_stash(dispatch_wlh_t wlh, dispatch_kevent_t ke,
+		dispatch_deferred_items_t ddi)
+{
+	dispatch_queue_t dq = (dispatch_queue_t)wlh;
+	dispatch_assert(!ddi->ddi_stashed_dou._dq);
+	ddi->ddi_wlh_needs_delete = true;
+	_dispatch_retain(dq);
+	ddi->ddi_stashed_rq = upcast(dq->do_targetq)._dgq;
+	ddi->ddi_stashed_dou._dq = dq;
+	ddi->ddi_stashed_qos = _dispatch_qos_from_pp((pthread_priority_t)ke->qos);
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline int
+_dispatch_event_loop_get_action_for_state(uint64_t dq_state)
+{
+	dispatch_assert(_dq_state_is_base_wlh(dq_state));
+
+	if (!_dq_state_is_enqueued_on_target(dq_state)) {
+		return DISPATCH_WORKLOOP_ASYNC_LEAVE;
+	}
+	if (!_dq_state_drain_locked(dq_state)) {
+		return DISPATCH_WORKLOOP_ASYNC;
+	}
+	if (!_dq_state_in_sync_transfer(dq_state)) {
+		return DISPATCH_WORKLOOP_ASYNC_DISCOVER_SYNC;
+	}
+	return DISPATCH_WORKLOOP_ASYNC_QOS_UPDATE;
+}
+
+DISPATCH_NOINLINE
+static void
+_dispatch_kevent_workloop_poke_drain(dispatch_kevent_t ke)
+{
+	dispatch_deferred_items_t ddi = _dispatch_deferred_items_get();
+	dispatch_wlh_t wlh = (dispatch_wlh_t)ke->udata;
+
+	dispatch_assert(ke->fflags & NOTE_WL_THREAD_REQUEST);
+	if (ke->flags & EV_ERROR) {
+		uint64_t dq_state = ke->ext[EV_EXTIDX_WL_VALUE];
+
+		_dispatch_kevent_workloop_drain_error(ke,
+				DISPATCH_KEVENT_WORKLOOP_ALLOW_ESTALE);
+
+		if (!_dq_state_is_base_wlh(dq_state)) {
+			dispatch_assert((ke->flags & EV_DELETE) == 0);
+			//
+			// A late async request bounced because the queue is no longer
+			// a workloop. There is a DISPATCH_WORKLOOP_RETARGET transition that
+			// will take care of deleting the thread request
+			//
+			return _dispatch_kevent_wlh_debug("ignoring", ke);
+		}
+
+		//
+		// We're draining a failed _dispatch_event_loop_leave_deferred()
+		// so repeat its logic.
+		//
+		int action = _dispatch_event_loop_get_action_for_state(dq_state);
+		if (action == DISPATCH_WORKLOOP_ASYNC) {
+			_dispatch_kevent_wlh_debug("retry drain", ke);
+			return _dispatch_kevent_workloop_stash(wlh, ke, ddi);
+		} else {
+			_dispatch_kq_fill_workloop_event(ke, action, wlh, dq_state);
+			return _dispatch_kq_deferred_update(wlh, ke);
+		}
+	} else if (ddi->ddi_wlh_needs_delete) {
+		//
+		// we knew about this thread request because we learned about it
+		// in _dispatch_kevent_workloop_poke_self() while merging another event.
+		// It has already been accounted for, so just swallow it.
+		//
+		return _dispatch_kevent_wlh_debug("ignoring", ke);
+	} else {
+		//
+		// This is a new thread request, it is carrying a +1 reference.
+		//
+		_dispatch_kevent_wlh_debug("got drain", ke);
+		return _dispatch_kevent_workloop_stash(wlh, ke, ddi);
+	}
+}
+
+static void
+_dispatch_kevent_workloop_poke(dispatch_wlh_t wlh, uint64_t dq_state,
+		uint32_t flags)
+{
+	uint32_t kev_flags = KEVENT_FLAG_IMMEDIATE | KEVENT_FLAG_ERROR_EVENTS;
+	dispatch_kevent_s ke;
+	int action;
+
+	dispatch_assert(_dq_state_is_enqueued_on_target(dq_state));
+	dispatch_assert(!_dq_state_is_enqueued_on_manager(dq_state));
+	action = _dispatch_event_loop_get_action_for_state(dq_state);
+override:
+	_dispatch_kq_fill_workloop_event(&ke, action, wlh, dq_state);
+
+	if (_dispatch_kq_poll(wlh, &ke, 1, &ke, 1, NULL, NULL, kev_flags)) {
+		_dispatch_kevent_workloop_drain_error(&ke,
+				DISPATCH_KEVENT_WORKLOOP_ALLOW_ESTALE);
+		dispatch_assert(action == DISPATCH_WORKLOOP_ASYNC_DISCOVER_SYNC);
+		dq_state = ke.ext[EV_EXTIDX_WL_VALUE];
+		//
+		// There are 4 things that can cause an ESTALE for DISCOVER_SYNC:
+		// - the queue role changed, we don't want to redrive
+		// - the queue is no longer enqueued, we don't want to redrive
+		// - the max QoS changed, whoever changed it is doing the same
+		//   transition, so we don't need to redrive
+		// - the DISPATCH_QUEUE_IN_SYNC_TRANFER bit got set
+		//
+		// The interesting case is the last one, and will only happen in the
+		// following chain of events:
+		// 1. uncontended dispatch_sync()
+		// 2. contended dispatch_sync()
+		// 3. contended dispatch_async()
+		//
+		// And this code is running because of (3). It is possible that (1)
+		// hands off to (2) while this call is being made, causing the
+		// DISPATCH_QUEUE_IN_TRANSFER_SYNC to be set, and we don't need to tell
+		// the kernel about the owner anymore. However, the async in that case
+		// will have set a QoS on the queue (since dispatch_sync()s don't but
+		// dispatch_async()s always do), and we need to redrive to tell it
+		// to the kernel.
+		//
+		if (_dq_state_is_base_wlh(dq_state) &&
+				_dq_state_is_enqueued_on_target(dq_state) &&
+				_dq_state_in_sync_transfer(dq_state)) {
+			action = DISPATCH_WORKLOOP_ASYNC;
+			goto override;
+		}
+	}
+
+	if (!(flags & DISPATCH_EVENT_LOOP_OVERRIDE)) {
+		// Consume the reference that kept the workloop valid
+		// for the duration of the syscall.
+		return _dispatch_release_tailcall((dispatch_queue_t)wlh);
+	}
+	if (flags & DISPATCH_EVENT_LOOP_CONSUME_2) {
+		return _dispatch_release_2_tailcall((dispatch_queue_t)wlh);
+	}
+}
+
+DISPATCH_NOINLINE
+static void
+_dispatch_kevent_workloop_override_self(dispatch_deferred_items_t ddi,
+		uint64_t dq_state, uint32_t flags)
+{
+	dispatch_wlh_t wlh = ddi->ddi_wlh;
+	uint32_t kev_flags = KEVENT_FLAG_IMMEDIATE | KEVENT_FLAG_ERROR_EVENTS;
+	dispatch_kevent_s ke;
+	//
+	// The workloop received work from itself that caused an override
+	// after the drain lock has been taken, just comply and move on.
+	//
+	dispatch_assert(ddi->ddi_wlh_needs_delete);
+	ddi->ddi_wlh_needs_update = false;
+
+	_dispatch_kq_fill_workloop_event(&ke, DISPATCH_WORKLOOP_ASYNC,
+			wlh, dq_state);
+	if (_dispatch_kq_poll(wlh, &ke, 1, &ke, 1, NULL, NULL, kev_flags)) {
+		_dispatch_kevent_workloop_drain_error(&ke, 0);
+		__builtin_unreachable();
+	}
+	if (flags & DISPATCH_EVENT_LOOP_CONSUME_2) {
+		return _dispatch_release_2_no_dispose((dispatch_queue_t)wlh);
+	}
+}
+
+static void
+_dispatch_kevent_workloop_poke_self(dispatch_deferred_items_t ddi,
+		uint64_t dq_state, uint32_t flags)
+{
+	dispatch_queue_t dq = (dispatch_queue_t)ddi->ddi_wlh;
+
+	if (ddi->ddi_wlh_servicing) {
+		dispatch_assert(ddi->ddi_wlh_needs_delete);
+		if (flags & DISPATCH_EVENT_LOOP_OVERRIDE) {
+			return _dispatch_kevent_workloop_override_self(ddi, dq_state,flags);
+		}
+		//
+		// dx_invoke() wants to re-enqueue itself e.g.  because the thread pool
+		// needs narrowing, or the queue is suspended, or any other reason that
+		// interrupts the drain.
+		//
+		// This is called with a +2 on the queue, a +1 goes to the thread
+		// request, the other we dispose of.
+		//
+		dispatch_assert(!_dq_state_drain_locked(dq_state));
+		dispatch_assert(_dq_state_is_enqueued_on_target(dq_state));
+		dispatch_assert(flags & DISPATCH_EVENT_LOOP_CONSUME_2);
+		_dispatch_release_no_dispose(dq);
+		return _dispatch_event_loop_leave_deferred(ddi, dq_state);
+	}
+
+	//
+	// This codepath is only used during the initial phase of merging
+	// incoming kernel events in _dispatch_workloop_worker_thread, before
+	// trying to take the drain lock in order to drain the workloop.
+	//
+	// Once we have taken the drain lock, wakeups will not reach this codepath
+	// because ddi->ddi_wlh_servicing will be set.
+	//
+
+	if (ddi->ddi_wlh_needs_delete) {
+		//
+		// We know there is a thread request already (stolen or real).
+		// However, an event is causing the workloop to be overridden.
+		// The kernel already has applied the override, so we can
+		// safely swallow this event, which carries no refcount.
+		//
+		dispatch_assert(flags & DISPATCH_EVENT_LOOP_OVERRIDE);
+		dispatch_assert(ddi->ddi_stashed_dou._dq);
+		if (flags & DISPATCH_EVENT_LOOP_CONSUME_2) {
+			return _dispatch_release_2_no_dispose(dq);
+		}
+		return;
+	}
+
+	if (flags & DISPATCH_EVENT_LOOP_OVERRIDE) {
+		//
+		// An event delivery is causing an override, but didn't know
+		// about a thread request yet. However, since we're receving an override
+		// it means this initial thread request either exists in the kernel
+		// or is about to be made.
+		//
+		// If it is about to be made, it is possible that it will bounce with
+		// ESTALE, and will not be retried. It means we can't be sure there
+		// really is or even will be a knote in the kernel for it.
+		//
+		// We still want to take over the +1 this thread request carries whether
+		// it made it (or will make it) to the kernel, and turn it into a +2
+		// below.
+		//
+		// Overrides we receive in this way are coalesced and acknowleged
+		// only when we have to do a kevent() call for other reasons. The kernel
+		// will continue to apply the overrides in question until we acknowledge
+		// them, so there's no rush.
+		//
+		ddi->ddi_wlh_needs_update = true;
+		if (flags & DISPATCH_EVENT_LOOP_CONSUME_2) {
+			_dispatch_release_no_dispose(dq);
+		} else {
+			_dispatch_retain(dq);
+		}
+	} else {
+		//
+		// Merging events causes a thread request to be issued, this means
+		// the queue is empty in userland and the kernel event is the first
+		// thing enqueued. Consume the caller's +2.
+		//
+		dispatch_assert(flags & DISPATCH_EVENT_LOOP_CONSUME_2);
+	}
+	dispatch_assert(!ddi->ddi_stashed_dou._dq);
+	ddi->ddi_wlh_needs_delete = true;
+	ddi->ddi_stashed_rq = upcast(dq->do_targetq)._dgq;
+	ddi->ddi_stashed_dou._dq = dq;
+	ddi->ddi_stashed_qos = _dq_state_max_qos(dq_state);
+}
+#endif // DISPATCH_USE_KEVENT_WORKLOOP
 
 DISPATCH_NOINLINE
 void
@@ -1161,7 +1888,23 @@
 		};
 		return _dispatch_kq_deferred_update(DISPATCH_WLH_ANON, &ke);
 	} else if (wlh && wlh != DISPATCH_WLH_ANON) {
+#if DISPATCH_USE_KEVENT_WORKLOOP
+		dispatch_queue_t dq = (dispatch_queue_t)wlh;
+		dispatch_assert(_dq_state_is_base_wlh(dq_state));
+		if (unlikely(_dq_state_is_enqueued_on_manager(dq_state))) {
+			dispatch_assert(!(flags & DISPATCH_EVENT_LOOP_OVERRIDE));
+			dispatch_assert(flags & DISPATCH_EVENT_LOOP_CONSUME_2);
+			_dispatch_trace_item_push(&_dispatch_mgr_q, dq);
+			return dx_push(_dispatch_mgr_q._as_dq, dq, 0);
+		}
+		dispatch_deferred_items_t ddi = _dispatch_deferred_items_get();
+		if (ddi && ddi->ddi_wlh == wlh) {
+			return _dispatch_kevent_workloop_poke_self(ddi, dq_state, flags);
+		}
+		return _dispatch_kevent_workloop_poke(wlh, dq_state, flags);
+#else
 		(void)dq_state; (void)flags;
+#endif // DISPATCH_USE_KEVENT_WORKLOOP
 	}
 	DISPATCH_INTERNAL_CRASH(wlh, "Unsupported wlh configuration");
 }
@@ -1170,15 +1913,38 @@
 void
 _dispatch_event_loop_drain(uint32_t flags)
 {
-	dispatch_wlh_t wlh = _dispatch_get_wlh();
 	dispatch_deferred_items_t ddi = _dispatch_deferred_items_get();
+	dispatch_wlh_t wlh = ddi->ddi_wlh;
 	int n;
 
 again:
+#if DISPATCH_USE_KEVENT_WORKLOOP
+	if (ddi->ddi_wlh_needs_update) {
+		// see _dispatch_event_loop_drain() comments about the lazy handling
+		// of DISPATCH_EVENT_LOOP_OVERRIDE
+		dispatch_queue_t dq = (dispatch_queue_t)wlh;
+		uint64_t dq_state = os_atomic_load2o(dq, dq_state, relaxed);
+
+		dispatch_assert(ddi->ddi_wlh_needs_delete);
+		ddi->ddi_wlh_needs_update = false;
+		_dispatch_kq_fill_ddi_workloop_event(ddi,
+				DISPATCH_WORKLOOP_ASYNC_QOS_UPDATE, wlh, dq_state);
+	}
+#endif // DISPATCH_USE_KEVENT_WORKLOOP
 	n = ddi->ddi_nevents;
 	ddi->ddi_nevents = 0;
 	_dispatch_kq_drain(wlh, ddi->ddi_eventlist, n, flags);
 
+#if DISPATCH_USE_KEVENT_WORKLOOP
+	dispatch_workloop_t dwl = _dispatch_wlh_to_workloop(wlh);
+	if (dwl) {
+		dispatch_timer_heap_t dth = dwl->dwl_timer_heap;
+		if (dth && dth[0].dth_dirty_bits) {
+			_dispatch_event_loop_drain_timers(dth, DISPATCH_TIMER_WLH_COUNT);
+		}
+	}
+#endif // DISPATCH_USE_KEVENT_WORKLOOP
+
 	if ((flags & KEVENT_FLAG_IMMEDIATE) &&
 			!(flags & KEVENT_FLAG_ERROR_EVENTS) &&
 			_dispatch_needs_to_return_to_kernel()) {
@@ -1190,52 +1956,261 @@
 _dispatch_event_loop_merge(dispatch_kevent_t events, int nevents)
 {
 	dispatch_deferred_items_t ddi = _dispatch_deferred_items_get();
+	dispatch_wlh_t wlh = ddi->ddi_wlh;
 	dispatch_kevent_s kev[nevents];
 
 	// now we can re-use the whole event list, but we need to save one slot
 	// for the event loop poke
 	memcpy(kev, events, sizeof(kev));
-	ddi->ddi_maxevents = DISPATCH_DEFERRED_ITEMS_EVENT_COUNT - 1;
+	ddi->ddi_maxevents = DISPATCH_DEFERRED_ITEMS_EVENT_COUNT - 2;
 
 	for (int i = 0; i < nevents; i++) {
 		_dispatch_kevent_drain(&kev[i]);
 	}
 
-	dispatch_wlh_t wlh = _dispatch_get_wlh();
-	if (wlh == DISPATCH_WLH_ANON && ddi->ddi_stashed_dou._do) {
-		if (ddi->ddi_nevents) {
+	if (wlh == DISPATCH_WLH_ANON) {
+		if (ddi->ddi_stashed_dou._do && ddi->ddi_nevents) {
 			// We will drain the stashed item and not return to the kernel
 			// right away. As a consequence, do not delay these updates.
 			_dispatch_event_loop_drain(KEVENT_FLAG_IMMEDIATE |
 					KEVENT_FLAG_ERROR_EVENTS);
 		}
-		_dispatch_trace_continuation_push(ddi->ddi_stashed_rq,
-				ddi->ddi_stashed_dou);
+#if DISPATCH_USE_KEVENT_WORKLOOP
+	} else if (dx_metatype((dispatch_queue_t)wlh) == _DISPATCH_WORKLOOP_TYPE) {
+		dispatch_timer_heap_t dth = ((dispatch_workloop_t)wlh)->dwl_timer_heap;
+		if (dth && dth[0].dth_dirty_bits) {
+			_dispatch_event_loop_drain_timers(dth, DISPATCH_TIMER_WLH_COUNT);
+		}
+#endif // DISPATCH_USE_KEVENT_WORKLOOP
 	}
 }
 
 void
-_dispatch_event_loop_leave_immediate(dispatch_wlh_t wlh, uint64_t dq_state)
+_dispatch_event_loop_leave_immediate(uint64_t dq_state)
 {
-	(void)wlh; (void)dq_state;
+#if DISPATCH_USE_KEVENT_WORKLOOP
+	dispatch_deferred_items_t ddi = _dispatch_deferred_items_get();
+	dispatch_wlh_t wlh = ddi->ddi_wlh;
+	uint32_t kev_flags = KEVENT_FLAG_IMMEDIATE | KEVENT_FLAG_ERROR_EVENTS |
+			KEVENT_FLAG_DYNAMIC_KQ_MUST_EXIST;
+	dispatch_kevent_s ke;
+	dispatch_assert(!_dq_state_is_base_wlh(dq_state));
+
+	//
+	// A workloop is being retargeted, we need to synchronously destroy
+	// the thread request as delivering it later would confuse the workloop
+	// thread into trying to drain this queue as a bottom one.
+	//
+	// Doing it synchronously prevents races where the queue is retargeted
+	// again, and becomes a workloop again
+	//
+	dispatch_assert(ddi->ddi_wlh_needs_delete);
+	ddi->ddi_wlh_needs_delete = false;
+	ddi->ddi_wlh_needs_update = false;
+	_dispatch_kq_fill_workloop_event(&ke,
+			DISPATCH_WORKLOOP_RETARGET, wlh, dq_state);
+	if (_dispatch_kq_poll(wlh, &ke, 1, &ke, 1, NULL, NULL, kev_flags)) {
+		_dispatch_kevent_workloop_drain_error(&ke, 0);
+		__builtin_unreachable();
+	}
+#else
+	(void)dq_state;
+#endif // DISPATCH_USE_KEVENT_WORKLOOP
 }
 
 void
-_dispatch_event_loop_leave_deferred(dispatch_wlh_t wlh, uint64_t dq_state)
+_dispatch_event_loop_leave_deferred(dispatch_deferred_items_t ddi,
+		uint64_t dq_state)
 {
-	(void)wlh; (void)dq_state;
+#if DISPATCH_USE_KEVENT_WORKLOOP
+	int action = _dispatch_event_loop_get_action_for_state(dq_state);
+	dispatch_assert(ddi->ddi_wlh_needs_delete);
+	ddi->ddi_wlh_needs_delete = false;
+	ddi->ddi_wlh_needs_update = false;
+	_dispatch_kq_fill_ddi_workloop_event(ddi, action, ddi->ddi_wlh, dq_state);
+#else
+	(void)ddi; (void)dq_state;
+#endif // DISPATCH_USE_KEVENT_WORKLOOP
+}
+
+void
+_dispatch_event_loop_cancel_waiter(dispatch_sync_context_t dsc)
+{
+#if DISPATCH_USE_KEVENT_WORKLOOP
+	dispatch_wlh_t wlh = dsc->dc_data;
+	uint32_t kev_flags = KEVENT_FLAG_IMMEDIATE | KEVENT_FLAG_ERROR_EVENTS;
+	dispatch_kevent_s ke;
+
+	_dispatch_kq_fill_workloop_sync_event(&ke, DISPATCH_WORKLOOP_SYNC_END,
+			wlh, 0, dsc->dsc_waiter);
+	if (_dispatch_kq_poll(wlh, &ke, 1, &ke, 1, NULL, NULL, kev_flags)) {
+		_dispatch_kevent_workloop_drain_error(&ke, dsc->dsc_waiter_needs_cancel ?
+				0 : DISPATCH_KEVENT_WORKLOOP_ALLOW_ENOENT);
+		//
+		// Our deletion attempt is opportunistic as in most cases we will find
+		// the matching knote and break the waiter out.
+		//
+		// However, if the waiter hasn't had a chance to make the syscall
+		// to wait yet, we get ENOENT. In this case, pre-post the WAKE,
+		// and transfer the responsibility to delete the knote to the waiter.
+		//
+		dsc->dsc_waiter_needs_cancel = true;
+		_dispatch_kq_fill_workloop_sync_event(&ke,
+				DISPATCH_WORKLOOP_SYNC_FAKE, wlh, 0, dsc->dsc_waiter);
+		if (_dispatch_kq_poll(wlh, &ke, 1, &ke, 1, NULL, NULL, kev_flags)) {
+			_dispatch_kevent_workloop_drain_error(&ke, 0);
+			__builtin_unreachable();
+		}
+	}
+#else
+	(void)dsc;
+#endif // DISPATCH_USE_KEVENT_WORKLOOP
 }
 
 void
 _dispatch_event_loop_wake_owner(dispatch_sync_context_t dsc,
 		dispatch_wlh_t wlh, uint64_t old_state, uint64_t new_state)
 {
+#if DISPATCH_USE_KEVENT_WORKLOOP
+	dispatch_deferred_items_t ddi = _dispatch_deferred_items_get();
+	dispatch_wlh_t waiter_wlh = dsc->dc_data;
+	uint32_t kev_flags = KEVENT_FLAG_IMMEDIATE | KEVENT_FLAG_ERROR_EVENTS;
+	dispatch_kevent_s ke[3];
+	int action, n = 0;
+
+	dispatch_assert(_dq_state_drain_locked_by(new_state, dsc->dsc_waiter));
+
+	if (wlh != DISPATCH_WLH_ANON && ddi && ddi->ddi_wlh == wlh) {
+		dispatch_assert(ddi->ddi_wlh_needs_delete);
+		ddi->ddi_wlh_needs_delete = false;
+		ddi->ddi_wlh_needs_update = false;
+
+		if (wlh == waiter_wlh) { // async -> sync handoff
+			dispatch_assert(_dq_state_is_enqueued_on_target(old_state));
+			dispatch_assert(!_dq_state_in_sync_transfer(old_state));
+			dispatch_assert(_dq_state_in_sync_transfer(new_state));
+
+			if (_dq_state_is_enqueued_on_target(new_state)) {
+				action = DISPATCH_WORKLOOP_ASYNC_QOS_UPDATE;
+			} else {
+				action = DISPATCH_WORKLOOP_ASYNC_LEAVE_FROM_TRANSFER;
+			}
+			_dispatch_kq_fill_ddi_workloop_event(ddi, action, wlh, new_state);
+
+			int slot = _dispatch_kq_deferred_find_slot(ddi, EVFILT_WORKLOOP,
+					(uint64_t)wlh, dsc->dsc_waiter);
+			if (slot == ddi->ddi_nevents) {
+				dispatch_assert(slot < DISPATCH_DEFERRED_ITEMS_EVENT_COUNT);
+				ddi->ddi_nevents++;
+			}
+			_dispatch_kq_fill_workloop_sync_event(&ddi->ddi_eventlist[slot],
+					DISPATCH_WORKLOOP_SYNC_WAKE, wlh, new_state, dsc->dsc_waiter);
+			return;
+		}
+	}
+
+	if ((old_state ^ new_state) & DISPATCH_QUEUE_ENQUEUED) {
+		dispatch_assert(_dq_state_is_enqueued_on_target(old_state));
+		dispatch_assert(_dq_state_in_sync_transfer(new_state));
+		// During the handoff, the waiter noticed there was no work *after*
+		// that last work item, so we want to kill the thread request while
+		// there's an owner around to avoid races betwen knote_process() and
+		// knote_drop() in the kernel.
+		_dispatch_kq_fill_workloop_event(&ke[n++],
+				DISPATCH_WORKLOOP_ASYNC_LEAVE_FROM_TRANSFER, wlh, new_state);
+	}
+	if (_dq_state_in_sync_transfer(new_state)) {
+		// Even when waiter_wlh != wlh we can pretend we got woken up
+		// which is a knote we will be able to delete later with a SYNC_END.
+		// This allows rectifying incorrect ownership sooner, and also happens
+		// on resume if the first item is a sync waiter.
+		_dispatch_kq_fill_workloop_sync_event(&ke[n++],
+				DISPATCH_WORKLOOP_SYNC_WAKE, wlh, new_state, dsc->dsc_waiter);
+	}
+	if (_dq_state_in_sync_transfer(old_state)) {
+		dispatch_tid tid = _dispatch_tid_self();
+		_dispatch_kq_fill_workloop_sync_event(&ke[n++],
+				DISPATCH_WORKLOOP_SYNC_END, wlh, new_state, tid);
+	}
+	//
+	// Past this call it is not safe to look at `wlh` anymore as the callers
+	// sometimes borrow the refcount of the waiter which we will wake up.
+	//
+	if (_dispatch_kq_poll(wlh, ke, n, ke, n, NULL, NULL, kev_flags)) {
+		_dispatch_kevent_workloop_drain_error(&ke[0], 0);
+		__builtin_unreachable();
+	}
+
+	if (unlikely(waiter_wlh != DISPATCH_WLH_ANON && waiter_wlh != wlh)) {
+		_dispatch_bug_deprecated("Changing target queue hierarchy "
+				"with a dispatch_sync in flight");
+		_dispatch_event_loop_cancel_waiter(dsc);
+	}
+#else
 	(void)dsc; (void)wlh; (void)old_state; (void)new_state;
+#endif // DISPATCH_USE_KEVENT_WORKLOOP
 }
 
 void
 _dispatch_event_loop_wait_for_ownership(dispatch_sync_context_t dsc)
 {
+#if DISPATCH_USE_KEVENT_WORKLOOP
+	dispatch_wlh_t wlh = dsc->dc_data;
+	dispatch_kevent_s ke[2];
+	uint32_t kev_flags = KEVENT_FLAG_IMMEDIATE | KEVENT_FLAG_ERROR_EVENTS;
+	uint64_t dq_state;
+	int i, n = 0;
+
+	dq_state = os_atomic_load2o((dispatch_queue_t)wlh, dq_state, relaxed);
+	if (dsc->dsc_wlh_was_first && !_dq_state_drain_locked(dq_state) &&
+			_dq_state_is_enqueued_on_target(dq_state)) {
+		//
+		// <rdar://problem/32123779>
+		//
+		// When an enqueuer is racing with the servicer draining the item that
+		// is being enqueued and going away, it is possible for the enqueuer to
+		// mark an empty queue as enqueued and make a thread request for it.
+		//
+		// If then a thread is selected to deliver this event, but doesn't make
+		// it to userland to take the drain lock, any sync waiter will
+		// nevertheless have to wait for that servicer to consume the thread
+		// request, trying to delete it will be no good. This is why
+		// _dispatch_push_sync_waiter() for workloops will not try to "save
+		// itself" if the enqueued bit is set.
+		//
+		// However, we don't know whether this thread request exists, it may
+		// have bounced, or still be in the process of being added by a much
+		// lower priority thread, so we need to drive it once to avoid priority
+		// inversions.
+		//
+		_dispatch_kq_fill_workloop_event(&ke[n++], DISPATCH_WORKLOOP_ASYNC,
+				wlh, dq_state);
+	}
+
+again:
+	_dispatch_kq_fill_workloop_sync_event(&ke[n++], DISPATCH_WORKLOOP_SYNC_WAIT,
+			wlh, dq_state, dsc->dsc_waiter);
+	n = _dispatch_kq_poll(wlh, ke, n, ke, n, NULL, NULL, kev_flags);
+	for (i = 0; i < n; i++) {
+		long flags = 0;
+		if (ke[i].fflags & NOTE_WL_SYNC_WAIT) {
+			flags = DISPATCH_KEVENT_WORKLOOP_ALLOW_EINTR |
+					DISPATCH_KEVENT_WORKLOOP_ALLOW_ESTALE;
+		}
+		_dispatch_kevent_workloop_drain_error(&ke[i], flags);
+	}
+	if (n) {
+		dispatch_assert(n == 1 && (ke[0].fflags & NOTE_WL_SYNC_WAIT));
+		_dispatch_kevent_wlh_debug("restarting", &ke[0]);
+		dq_state = ke[0].ext[EV_EXTIDX_WL_VALUE];
+		n = 0;
+		goto again;
+	}
+#endif
+	if (dsc->dsc_waiter_needs_cancel) {
+		_dispatch_event_loop_cancel_waiter(dsc);
+		dsc->dsc_waiter_needs_cancel = false;
+	}
 	if (dsc->dsc_release_storage) {
 		_dispatch_queue_release_storage(dsc->dc_data);
 	}
@@ -1245,14 +2220,94 @@
 _dispatch_event_loop_end_ownership(dispatch_wlh_t wlh, uint64_t old_state,
 		uint64_t new_state, uint32_t flags)
 {
+#if DISPATCH_USE_KEVENT_WORKLOOP
+	uint32_t kev_flags = KEVENT_FLAG_IMMEDIATE | KEVENT_FLAG_ERROR_EVENTS;
+	dispatch_kevent_s ke[2];
+	bool needs_forceful_end_ownership = false;
+	int n = 0;
+
+	dispatch_assert(_dq_state_is_base_wlh(new_state));
+	if (_dq_state_is_enqueued_on_target(new_state)) {
+		_dispatch_kq_fill_workloop_event(&ke[n++],
+				DISPATCH_WORKLOOP_ASYNC_FROM_SYNC, wlh, new_state);
+	} else if (_dq_state_is_enqueued_on_target(old_state)) {
+		//
+		// <rdar://problem/41389180> Because the thread request knote may not
+		// have made it, DISPATCH_WORKLOOP_ASYNC_LEAVE_FROM_SYNC may silently
+		// turn into a no-op.
+		//
+		// However, the kernel may know about our ownership anyway, so we need
+		// to make sure it is forcefully ended.
+		//
+		needs_forceful_end_ownership = true;
+		dispatch_assert(_dq_state_is_suspended(new_state));
+		_dispatch_kq_fill_workloop_event(&ke[n++],
+				DISPATCH_WORKLOOP_ASYNC_LEAVE_FROM_SYNC, wlh, new_state);
+	} else if (_dq_state_received_sync_wait(old_state)) {
+		//
+		// This case happens when the current workloop got waited on by some
+		// thread calling _dispatch_event_loop_wait_for_ownership.
+		//
+		// When the workloop became IDLE, it didn't find the sync waiter
+		// continuation, didn't have a thread request to cancel either, and so
+		// we need the kernel to forget about the current thread ownership
+		// of the workloop.
+		//
+		// To forget this ownership, we create a fake WAKE knote that can not
+		// coalesce with any meaningful one, just so that we can EV_DELETE it
+		// with the NOTE_WL_END_OWNERSHIP.
+		//
+		// This is a gross hack, but this will really only ever happen for
+		// cases where a sync waiter started to wait on a workloop, but his part
+		// of the graph got mutated and retargeted onto a different workloop.
+		// In doing so, that sync waiter has snitched to the kernel about
+		// ownership, and the workloop he's bogusly waiting on will go through
+		// this codepath.
+		//
+		needs_forceful_end_ownership = true;
+	}
+
+	if (_dq_state_in_sync_transfer(old_state)) {
+		dispatch_tid tid = _dispatch_tid_self();
+		_dispatch_kq_fill_workloop_sync_event(&ke[n++],
+				DISPATCH_WORKLOOP_SYNC_END, wlh, new_state, tid);
+	} else if (needs_forceful_end_ownership) {
+		kev_flags |= KEVENT_FLAG_DYNAMIC_KQ_MUST_EXIST;
+		_dispatch_kq_fill_workloop_event(&ke[n++],
+				DISPATCH_WORKLOOP_ASYNC_FORCE_END_OWNERSHIP, wlh, new_state);
+	}
+
+	if (_dispatch_kq_poll(wlh, ke, n, ke, n, NULL, NULL, kev_flags)) {
+		_dispatch_kevent_workloop_drain_error(&ke[0], 0);
+		__builtin_unreachable();
+	}
+
+	_dispatch_event_loop_assert_not_owned(wlh);
+
+	int extra_refs = (flags & DISPATCH_EVENT_LOOP_CONSUME_2) ? 2 : 0;
+	if (_dq_state_is_enqueued_on_target(old_state)) extra_refs++;
+	if (_dq_state_is_enqueued_on_target(new_state)) extra_refs--;
+	dispatch_assert(extra_refs >= 0);
+	if (extra_refs > 0) _dispatch_release_n((dispatch_queue_t)wlh, extra_refs);
+#else
 	(void)wlh; (void)old_state; (void)new_state; (void)flags;
+#endif // DISPATCH_USE_KEVENT_WORKLOOP
 }
 
 #if DISPATCH_WLH_DEBUG
 void
 _dispatch_event_loop_assert_not_owned(dispatch_wlh_t wlh)
 {
+#if DISPATCH_USE_KEVENT_WORKLOOP
+	if (wlh != DISPATCH_WLH_ANON) {
+		dispatch_kevent_s ke;
+		if (_dispatch_kevent_workloop_get_info(wlh, &ke)) {
+			dispatch_assert(ke.ext[0] != _pthread_threadid_self_np_direct());
+		}
+	}
+#else
 	(void)wlh;
+#endif // DISPATCH_USE_KEVENT_WORKLOOP
 }
 #endif // DISPATCH_WLH_DEBUG
 
@@ -1263,73 +2318,75 @@
 
 DISPATCH_NOINLINE
 static void
-_dispatch_kevent_timer_drain(dispatch_kevent_t ke)
-{
-	dispatch_assert(ke->data > 0);
-	dispatch_assert((ke->ident & DISPATCH_KEVENT_TIMEOUT_IDENT_MASK) ==
-			DISPATCH_KEVENT_TIMEOUT_IDENT_MASK);
-	uint32_t tidx = ke->ident & ~DISPATCH_KEVENT_TIMEOUT_IDENT_MASK;
-
-	dispatch_assert(tidx < DISPATCH_TIMER_COUNT);
-	_dispatch_timers_expired = true;
-	_dispatch_timers_processing_mask |= 1 << tidx;
-	_dispatch_timers_heap[tidx].dth_flags &= ~DTH_ARMED;
-#if DISPATCH_USE_DTRACE
-	_dispatch_timers_will_wake |= 1 << DISPATCH_TIMER_QOS(tidx);
-#endif
-}
-
-DISPATCH_NOINLINE
-static void
-_dispatch_event_loop_timer_program(uint32_t tidx,
+_dispatch_event_loop_timer_program(dispatch_timer_heap_t dth, uint32_t tidx,
 		uint64_t target, uint64_t leeway, uint16_t action)
 {
+	dispatch_wlh_t wlh = _dispatch_get_wlh();
+#if DISPATCH_USE_KEVENT_QOS
+	pthread_priority_t pp = _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
+	if (wlh != DISPATCH_WLH_ANON) {
+		pp = _dispatch_qos_to_pp(dth[tidx].dth_max_qos);
+	}
+#endif
 	dispatch_kevent_s ke = {
 		.ident = DISPATCH_KEVENT_TIMEOUT_IDENT_MASK | tidx,
 		.filter = EVFILT_TIMER,
 		.flags = action | EV_ONESHOT,
 		.fflags = _dispatch_timer_index_to_fflags[tidx],
 		.data = (int64_t)target,
-		.udata = (dispatch_kevent_udata_t)&_dispatch_timers_heap[tidx],
+		.udata = (dispatch_kevent_udata_t)dth,
 #if DISPATCH_HAVE_TIMER_COALESCING
 		.ext[1] = leeway,
 #endif
 #if DISPATCH_USE_KEVENT_QOS
-		.qos = _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG,
+		.qos = (__typeof__(ke.qos))pp,
 #endif
 	};
-	(void)leeway; // if DISPATCH_HAVE_TIMER_COALESCING == 0
+	(void)leeway; // if !DISPATCH_HAVE_TIMER_COALESCING
 
-	_dispatch_kq_deferred_update(DISPATCH_WLH_ANON, &ke);
+	_dispatch_kq_deferred_update(wlh, &ke);
 }
 
 void
-_dispatch_event_loop_timer_arm(uint32_t tidx, dispatch_timer_delay_s range,
-		dispatch_clock_now_cache_t nows)
+_dispatch_event_loop_timer_arm(dispatch_timer_heap_t dth, uint32_t tidx,
+		dispatch_timer_delay_s range, dispatch_clock_now_cache_t nows)
 {
+	dispatch_clock_t clock = DISPATCH_TIMER_CLOCK(tidx);
+	uint64_t target = range.delay + _dispatch_time_now_cached(clock, nows);
 	if (unlikely(_dispatch_timers_force_max_leeway)) {
-		range.delay += range.leeway;
+		target += range.leeway;
 		range.leeway = 0;
 	}
+
+	_dispatch_event_loop_timer_program(dth, tidx, target, range.leeway,
+			EV_ADD | EV_ENABLE);
 #if HAVE_MACH
-	if (DISPATCH_TIMER_CLOCK(tidx) == DISPATCH_CLOCK_WALL) {
+	if (clock == DISPATCH_CLOCK_WALL) {
 		_dispatch_mach_host_calendar_change_register();
 	}
 #endif
-
-	// <rdar://problem/13186331> EVFILT_TIMER NOTE_ABSOLUTE always expects
-	// a WALL deadline
-	uint64_t now = _dispatch_time_now_cached(DISPATCH_CLOCK_WALL, nows);
-	_dispatch_timers_heap[tidx].dth_flags |= DTH_ARMED;
-	_dispatch_event_loop_timer_program(tidx, now + range.delay, range.leeway,
-			EV_ADD | EV_ENABLE);
 }
 
 void
-_dispatch_event_loop_timer_delete(uint32_t tidx)
+_dispatch_event_loop_timer_delete(dispatch_timer_heap_t dth, uint32_t tidx)
 {
-	_dispatch_timers_heap[tidx].dth_flags &= ~DTH_ARMED;
-	_dispatch_event_loop_timer_program(tidx, 0, 0, EV_DELETE);
+	_dispatch_event_loop_timer_program(dth, tidx, 0, 0, EV_DELETE);
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_kevent_timer_drain(dispatch_kevent_t ke)
+{
+	dispatch_timer_heap_t dth = (dispatch_timer_heap_t)ke->udata;
+	uint32_t tidx = ke->ident & ~DISPATCH_KEVENT_TIMEOUT_IDENT_MASK;
+
+	dispatch_assert(ke->data > 0);
+	dispatch_assert(ke->ident == (tidx | DISPATCH_KEVENT_TIMEOUT_IDENT_MASK));
+	dispatch_assert(tidx < DISPATCH_TIMER_COUNT);
+
+	_dispatch_timers_heap_dirty(dth, tidx);
+	dth[tidx].dth_needs_program = true;
+	dth[tidx].dth_armed = false;
 }
 
 #pragma mark -
@@ -1341,7 +2398,7 @@
 {
 	dispatch_unote_t du = _dispatch_unote_create_with_handle(dst, handle, mask);
 	if (du._du && (mask & DISPATCH_PROC_EXIT_STATUS)) {
-		du._du->du_data_action = DISPATCH_UNOTE_ACTION_DATA_OR_STATUS_SET;
+		du._du->du_has_extended_status = true;
 	}
 	return du;
 }
@@ -1359,7 +2416,9 @@
 			|NOTE_REAP
 #endif
 			,
+	.dst_action     = DISPATCH_UNOTE_ACTION_SOURCE_OR_FFLAGS,
 	.dst_size       = sizeof(struct dispatch_source_refs_s),
+	.dst_strict     = false,
 
 	.dst_create     = _dispatch_source_proc_create,
 	.dst_merge_evt  = _dispatch_source_merge_evt,
@@ -1378,7 +2437,9 @@
 			|NOTE_NONE
 #endif
 			,
+	.dst_action     = DISPATCH_UNOTE_ACTION_SOURCE_OR_FFLAGS,
 	.dst_size       = sizeof(struct dispatch_source_refs_s),
+	.dst_strict     = false,
 
 	.dst_create     = _dispatch_unote_create_with_fd,
 	.dst_merge_evt  = _dispatch_source_merge_evt,
@@ -1405,8 +2466,13 @@
 #if HAVE_DECL_VQ_DESIRED_DISK
 			|VQ_DESIRED_DISK
 #endif
+#if HAVE_DECL_VQ_FREE_SPACE_CHANGE
+			|VQ_FREE_SPACE_CHANGE
+#endif
 			,
+	.dst_action     = DISPATCH_UNOTE_ACTION_SOURCE_OR_FFLAGS,
 	.dst_size       = sizeof(struct dispatch_source_refs_s),
+	.dst_strict     = false,
 
 	.dst_create     = _dispatch_unote_create_without_handle,
 	.dst_merge_evt  = _dispatch_source_merge_evt,
@@ -1430,7 +2496,9 @@
 			|NOTE_NOTIFY_ACK
 #endif
 		,
+	.dst_action     = DISPATCH_UNOTE_ACTION_SOURCE_OR_FFLAGS,
 	.dst_size       = sizeof(struct dispatch_source_refs_s),
+	.dst_strict     = false,
 
 	.dst_create     = _dispatch_unote_create_with_fd,
 	.dst_merge_evt  = _dispatch_source_merge_evt,
@@ -1443,7 +2511,10 @@
 	.dst_filter     = EVFILT_NW_CHANNEL,
 	.dst_flags      = DISPATCH_EV_DIRECT|EV_CLEAR|EV_VANISHED,
 	.dst_mask       = NOTE_FLOW_ADV_UPDATE,
+	.dst_action     = DISPATCH_UNOTE_ACTION_SOURCE_OR_FFLAGS,
 	.dst_size       = sizeof(struct dispatch_source_refs_s),
+	.dst_strict     = false,
+
 	.dst_create     = _dispatch_unote_create_with_fd,
 	.dst_merge_evt  = _dispatch_source_merge_evt,
 };
@@ -1506,7 +2577,7 @@
 {
 	dispatch_source_t ds = dispatch_source_create(
 			DISPATCH_SOURCE_TYPE_MEMORYPRESSURE, 0,
-			DISPATCH_MEMORYPRESSURE_SOURCE_MASK, &_dispatch_mgr_q);
+			DISPATCH_MEMORYPRESSURE_SOURCE_MASK, _dispatch_mgr_q._as_dq);
 	dispatch_set_context(ds, ds);
 	dispatch_source_set_event_handler_f(ds, _dispatch_memorypressure_handler);
 	dispatch_activate(ds);
@@ -1558,7 +2629,9 @@
 			|NOTE_MEMORYSTATUS_LOW_SWAP|NOTE_MEMORYSTATUS_PROC_LIMIT_WARN
 			|NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL
 			|NOTE_MEMORYSTATUS_MSL_STATUS,
+	.dst_action     = DISPATCH_UNOTE_ACTION_SOURCE_OR_FFLAGS,
 	.dst_size       = sizeof(struct dispatch_source_refs_s),
+	.dst_strict     = false,
 
 #if TARGET_OS_SIMULATOR
 	.dst_create     = _dispatch_source_memorypressure_create,
@@ -1587,7 +2660,9 @@
 	.dst_filter     = EVFILT_MEMORYSTATUS,
 	.dst_flags      = EV_UDATA_SPECIFIC|EV_DISPATCH,
 	.dst_mask       = NOTE_VM_PRESSURE,
+	.dst_action     = DISPATCH_UNOTE_ACTION_SOURCE_OR_FFLAGS,
 	.dst_size       = sizeof(struct dispatch_source_refs_s),
+	.dst_strict     = false,
 
 	.dst_create     = _dispatch_source_vm_create,
 	// redirected to _dispatch_source_type_memorypressure
@@ -1604,19 +2679,21 @@
 
 static void _dispatch_mach_host_notify_update(void *context);
 
-static mach_port_t _dispatch_mach_notify_port;
-static dispatch_source_t _dispatch_mach_notify_source;
+DISPATCH_STATIC_GLOBAL(dispatch_once_t _dispatch_mach_notify_port_pred);
+DISPATCH_STATIC_GLOBAL(dispatch_once_t _dispatch_mach_calendar_pred);
+DISPATCH_STATIC_GLOBAL(mach_port_t _dispatch_mach_notify_port);
 
 static void
 _dispatch_timers_calendar_change(void)
 {
-	uint32_t qos;
+	dispatch_timer_heap_t dth = _dispatch_timers_heap;
+	uint32_t qos, tidx;
 
 	// calendar change may have gone past the wallclock deadline
-	_dispatch_timers_expired = true;
 	for (qos = 0; qos < DISPATCH_TIMER_QOS_COUNT; qos++) {
-		_dispatch_timers_processing_mask |=
-				1 << DISPATCH_TIMER_INDEX(DISPATCH_CLOCK_WALL, qos);
+		tidx = DISPATCH_TIMER_INDEX(DISPATCH_CLOCK_WALL, qos);
+		_dispatch_timers_heap_dirty(dth, tidx);
+		dth[tidx].dth_needs_program = true;
 	}
 }
 
@@ -1638,7 +2715,10 @@
 
 DISPATCH_NOINLINE
 static void
-_dispatch_mach_notify_source_invoke(mach_msg_header_t *hdr)
+_dispatch_mach_notification_merge_msg(dispatch_unote_t du, uint32_t flags,
+		mach_msg_header_t *hdr, mach_msg_size_t msgsz DISPATCH_UNUSED,
+		pthread_priority_t msg_pp DISPATCH_UNUSED,
+		pthread_priority_t ovr_pp DISPATCH_UNUSED)
 {
 	mig_reply_error_t reply;
 	mach_msg_audit_trailer_t *tlr = NULL;
@@ -1650,16 +2730,17 @@
 	if (!tlr) {
 		DISPATCH_INTERNAL_CRASH(0, "message received without expected trailer");
 	}
-	if (hdr->msgh_id <= MACH_NOTIFY_LAST
-			&& dispatch_assume_zero(tlr->msgh_audit.val[
+	if (hdr->msgh_id <= MACH_NOTIFY_LAST &&
+			dispatch_assume_zero(tlr->msgh_audit.val[
 			DISPATCH_MACH_AUDIT_TOKEN_PID])) {
 		mach_msg_destroy(hdr);
-		return;
+		goto out;
 	}
+
 	boolean_t success = libdispatch_internal_protocol_server(hdr, &reply.Head);
 	if (!success && reply.RetCode == MIG_BAD_ID &&
 			(hdr->msgh_id == HOST_CALENDAR_SET_REPLYID ||
-			 hdr->msgh_id == HOST_CALENDAR_CHANGED_REPLYID)) {
+			hdr->msgh_id == HOST_CALENDAR_CHANGED_REPLYID)) {
 		_dispatch_debug("calendar-change notification");
 		_dispatch_timers_calendar_change();
 		_dispatch_mach_host_notify_update(NULL);
@@ -1672,39 +2753,38 @@
 	if (!success || (reply.RetCode && reply.RetCode != MIG_NO_REPLY)) {
 		mach_msg_destroy(hdr);
 	}
+
+out:
+	if (flags & DISPATCH_EV_MSG_NEEDS_FREE) {
+		free(hdr);
+	}
+	return _dispatch_unote_resume(du);
 }
 
 DISPATCH_NOINLINE
 static void
 _dispatch_mach_notify_port_init(void *context DISPATCH_UNUSED)
 {
-	kern_return_t kr;
-#if HAVE_MACH_PORT_CONSTRUCT
 	mach_port_options_t opts = { .flags = MPO_CONTEXT_AS_GUARD | MPO_STRICT };
-#if DISPATCH_SIZEOF_PTR == 8
-	const mach_port_context_t guard = 0xfeed09071f1ca7edull;
-#else
-	const mach_port_context_t guard = 0xff1ca7edull;
-#endif
+	mach_port_context_t guard = (uintptr_t)&_dispatch_mach_notify_port;
+	kern_return_t kr;
+
 	kr = mach_port_construct(mach_task_self(), &opts, guard,
 			&_dispatch_mach_notify_port);
-#else
-	kr = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE,
-			&_dispatch_mach_notify_port);
-#endif
-	DISPATCH_VERIFY_MIG(kr);
 	if (unlikely(kr)) {
 		DISPATCH_CLIENT_CRASH(kr,
 				"mach_port_construct() failed: cannot create receive right");
 	}
 
-	static const struct dispatch_continuation_s dc = {
-		.dc_func = (void*)_dispatch_mach_notify_source_invoke,
-	};
-	_dispatch_mach_notify_source = _dispatch_source_create_mach_msg_direct_recv(
-			_dispatch_mach_notify_port, &dc);
-	dispatch_assert(_dispatch_mach_notify_source);
-	dispatch_activate(_dispatch_mach_notify_source);
+	dispatch_unote_t du = dux_create(&_dispatch_mach_type_notification,
+			_dispatch_mach_notify_port, 0);
+
+	// make sure _dispatch_kevent_mach_msg_recv can call
+	// _dispatch_retain_unote_owner
+	du._du->du_owner_wref = _dispatch_ptr2wref(&_dispatch_mgr_q);
+
+	dispatch_assume(_dispatch_unote_register(du, DISPATCH_WLH_ANON,
+			DISPATCH_PRIORITY_FLAG_MANAGER));
 }
 
 static void
@@ -1740,26 +2820,19 @@
 static inline mach_port_t
 _dispatch_get_mach_notify_port(void)
 {
-	static dispatch_once_t pred;
-	dispatch_once_f(&pred, NULL, _dispatch_mach_notify_port_init);
+	dispatch_once_f(&_dispatch_mach_notify_port_pred, NULL,
+			_dispatch_mach_notify_port_init);
 	return _dispatch_mach_notify_port;
 }
 
 static void
 _dispatch_mach_host_notify_update(void *context DISPATCH_UNUSED)
 {
-	static int notify_type = HOST_NOTIFY_CALENDAR_SET;
 	kern_return_t kr;
 	_dispatch_debug("registering for calendar-change notification");
-retry:
+
 	kr = host_request_notification(_dispatch_get_mach_host_port(),
-			notify_type, _dispatch_get_mach_notify_port());
-	// Fallback when missing support for newer _SET variant, fires strictly more
-	if (kr == KERN_INVALID_ARGUMENT &&
-			notify_type != HOST_NOTIFY_CALENDAR_CHANGE) {
-		notify_type = HOST_NOTIFY_CALENDAR_CHANGE;
-		goto retry;
-	}
+			HOST_NOTIFY_CALENDAR_SET, _dispatch_get_mach_notify_port());
 	DISPATCH_VERIFY_MIG(kr);
 	(void)dispatch_assume_zero(kr);
 }
@@ -1768,8 +2841,8 @@
 static inline void
 _dispatch_mach_host_calendar_change_register(void)
 {
-	static dispatch_once_t pred;
-	dispatch_once_f(&pred, NULL, _dispatch_mach_host_notify_update);
+	dispatch_once_f(&_dispatch_mach_calendar_pred, NULL,
+			_dispatch_mach_host_notify_update);
 }
 
 static kern_return_t
@@ -1877,6 +2950,7 @@
 {
 	dispatch_unote_linkage_t dul, dul_next;
 	dispatch_muxnote_t dmn;
+	uint32_t flags = EV_ENABLE;
 
 	_dispatch_debug_machport(name);
 	dmn = _dispatch_mach_muxnote_find(name, DISPATCH_EVFILT_MACH_NOTIFICATION);
@@ -1885,22 +2959,30 @@
 	}
 
 	dmn->dmn_kev.data &= ~_DISPATCH_MACH_SP_FLAGS;
-	if (!final) {
-		// Re-register for notification before delivery
-		final = !_dispatch_kevent_mach_notify_resume(dmn, data, 0);
+	if (final || !_dispatch_kevent_mach_notify_resume(dmn, data, 0)) {
+		flags = EV_ONESHOT;
+		dmn->dmn_kev.flags |= EV_DELETE;
 	}
+	os_atomic_store(&DISPATCH_MACH_NOTIFICATION_ARMED(dmn), 0, relaxed);
 
-	uint32_t flags = final ? EV_ONESHOT : EV_ENABLE;
-	DISPATCH_MACH_NOTIFICATION_ARMED(&dmn->dmn_kev) = 0;
-	TAILQ_FOREACH_SAFE(dul, &dmn->dmn_unotes_head, du_link, dul_next) {
-		dispatch_unote_t du = _dispatch_unote_linkage_get_unote(dul);
-		os_atomic_store2o(du._dmsr, dmsr_notification_armed, false, relaxed);
-		dux_merge_evt(du._du, flags, (data & du._du->du_fflags), 0, 0);
-		if (!dul_next || DISPATCH_MACH_NOTIFICATION_ARMED(&dmn->dmn_kev)) {
-			// current merge is last in list (dmn might have been freed)
-			// or it re-armed the notification
+	LIST_FOREACH_SAFE(dul, &dmn->dmn_unotes_head, du_link, dul_next) {
+		if (os_atomic_load(&DISPATCH_MACH_NOTIFICATION_ARMED(dmn), relaxed)) {
+			dispatch_assert(!final);
 			break;
 		}
+		dispatch_unote_t du = _dispatch_unote_linkage_get_unote(dul);
+		uint32_t fflags = (data & du._du->du_fflags);
+		os_atomic_store2o(du._du, dmsr_notification_armed, 0, relaxed);
+		if (final || fflags) {
+			// consumed by dux_merge_evt()
+			_dispatch_retain_unote_owner(du);
+			if (final) _dispatch_unote_unregister_muxed(du);
+			if (fflags && dux_type(du._du)->dst_action ==
+					DISPATCH_UNOTE_ACTION_SOURCE_OR_FFLAGS) {
+				os_atomic_or2o(du._dr, ds_pending_data, fflags, relaxed);
+			}
+			dux_merge_evt(du._du, flags, fflags, 0);
+		}
 	}
 }
 
@@ -1950,24 +3032,22 @@
 {
 	dispatch_muxnote_t dmn = _dispatch_unote_get_linkage(dmsr)->du_muxnote;
 	dispatch_unote_linkage_t dul;
-	dispatch_unote_t du;
-
-	if (!_dispatch_unote_registered(dmsr)) {
-		return;
-	}
-
+	if (dmn) {
 #if HAVE_MACH
-	DISPATCH_MACH_NOTIFICATION_ARMED(&dmn->dmn_kev) = true;
-	TAILQ_FOREACH(dul, &dmn->dmn_unotes_head, du_link) {
-		du = _dispatch_unote_linkage_get_unote(dul);
-		os_atomic_store2o(du._dmsr, dmsr_notification_armed, true, relaxed);
-	}
+		os_atomic_store(&DISPATCH_MACH_NOTIFICATION_ARMED(dmn), 1, relaxed);
+		LIST_FOREACH(dul, &dmn->dmn_unotes_head, du_link) {
+			dispatch_unote_t du = _dispatch_unote_linkage_get_unote(dul);
+			os_atomic_store2o(du._du, dmsr_notification_armed, 1, relaxed);
+		}
+		_dispatch_debug("machport[0x%08x]: send-possible notification armed",
+				(mach_port_name_t)dmn->dmn_kev.ident);
 #endif
+	}
 }
 
 static dispatch_unote_t
 _dispatch_source_mach_send_create(dispatch_source_type_t dst,
-	uintptr_t handle, unsigned long mask)
+		uintptr_t handle, unsigned long mask)
 {
 	if (!mask) {
 		// Preserve legacy behavior that (mask == 0) => DISPATCH_MACH_SEND_DEAD
@@ -1994,7 +3074,9 @@
 	.dst_filter     = DISPATCH_EVFILT_MACH_NOTIFICATION,
 	.dst_flags      = EV_CLEAR,
 	.dst_mask       = DISPATCH_MACH_SEND_DEAD|DISPATCH_MACH_SEND_POSSIBLE,
+	.dst_action     = DISPATCH_UNOTE_ACTION_SOURCE_OR_FFLAGS,
 	.dst_size       = sizeof(struct dispatch_source_refs_s),
+	.dst_strict     = false,
 
 	.dst_create     = _dispatch_source_mach_send_create,
 	.dst_update_mux = _dispatch_mach_send_update,
@@ -2010,7 +3092,7 @@
 			_dispatch_unote_create_without_handle(dst, handle, mask);
 	if (du._dmsr) {
 		du._dmsr->dmsr_disconnect_cnt = DISPATCH_MACH_NEVER_CONNECTED;
-		TAILQ_INIT(&du._dmsr->dmsr_replies);
+		LIST_INIT(&du._dmsr->dmsr_replies);
 	}
 	return du;
 }
@@ -2020,11 +3102,13 @@
 	.dst_filter     = DISPATCH_EVFILT_MACH_NOTIFICATION,
 	.dst_flags      = EV_CLEAR,
 	.dst_mask       = DISPATCH_MACH_SEND_DEAD|DISPATCH_MACH_SEND_POSSIBLE,
+	.dst_action     = DISPATCH_UNOTE_ACTION_PASS_FFLAGS,
 	.dst_size       = sizeof(struct dispatch_mach_send_refs_s),
+	.dst_strict     = false,
 
 	.dst_create     = _dispatch_mach_send_create,
 	.dst_update_mux = _dispatch_mach_send_update,
-	.dst_merge_evt  = _dispatch_mach_merge_notification,
+	.dst_merge_evt  = _dispatch_mach_notification_merge_evt,
 };
 
 #endif // HAVE_MACH
@@ -2033,31 +3117,25 @@
 
 static void
 _dispatch_kevent_mach_msg_recv(dispatch_unote_t du, uint32_t flags,
-		mach_msg_header_t *hdr)
+		mach_msg_header_t *hdr, pthread_priority_t msg_pp,
+		pthread_priority_t ovr_pp)
 {
-	mach_msg_size_t siz = hdr->msgh_size + DISPATCH_MACH_TRAILER_SIZE;
 	mach_port_t name = hdr->msgh_local_port;
+	mach_msg_size_t siz;
 
-	if (!dispatch_assume(hdr->msgh_size <= UINT_MAX -
-			DISPATCH_MACH_TRAILER_SIZE)) {
-		_dispatch_bug_client("_dispatch_kevent_mach_msg_recv: "
-				"received overlarge message");
-	} else if (!dispatch_assume(name)) {
-		_dispatch_bug_client("_dispatch_kevent_mach_msg_recv: "
-				"received message with MACH_PORT_NULL port");
-	} else {
-		_dispatch_debug_machport(name);
-		if (likely(du._du)) {
-			return dux_merge_msg(du._du, flags, hdr, siz);
-		}
-		_dispatch_bug_client("_dispatch_kevent_mach_msg_recv: "
-				"received message with no listeners");
+	if (os_add_overflow(hdr->msgh_size, DISPATCH_MACH_TRAILER_SIZE, &siz)) {
+		DISPATCH_CLIENT_CRASH(hdr->msgh_size, "Overlarge message received");
+	}
+	if (os_unlikely(name == MACH_PORT_NULL)) {
+		DISPATCH_CLIENT_CRASH(hdr->msgh_id, "Received message with "
+				"MACH_PORT_NULL msgh_local_port");
 	}
 
-	mach_msg_destroy(hdr);
-	if (flags & DISPATCH_EV_MSG_NEEDS_FREE) {
-		free(hdr);
-	}
+	_dispatch_debug_machport(name);
+	// consumed by dux_merge_evt()
+	_dispatch_retain_unote_owner(du);
+	_dispatch_kevent_merge_ev_flags(du, flags);
+	return dux_merge_msg(du._du, flags, hdr, siz, msg_pp, ovr_pp);
 }
 
 DISPATCH_NOINLINE
@@ -2065,53 +3143,50 @@
 _dispatch_kevent_mach_msg_drain(dispatch_kevent_t ke)
 {
 	mach_msg_header_t *hdr = _dispatch_kevent_mach_msg_buf(ke);
+	dispatch_unote_t du = _dispatch_kevent_get_unote(ke);
+	pthread_priority_t msg_pp = (pthread_priority_t)(ke->ext[2] >> 32);
+	pthread_priority_t ovr_pp = (pthread_priority_t)ke->qos;
+	uint32_t flags = ke->flags;
 	mach_msg_size_t siz;
 	mach_msg_return_t kr = (mach_msg_return_t)ke->fflags;
-	uint32_t flags = ke->flags;
-	dispatch_unote_t du = _dispatch_kevent_get_unote(ke);
 
 	if (unlikely(!hdr)) {
 		DISPATCH_INTERNAL_CRASH(kr, "EVFILT_MACHPORT with no message");
 	}
 	if (likely(!kr)) {
-		_dispatch_kevent_mach_msg_recv(du, flags, hdr);
+		return _dispatch_kevent_mach_msg_recv(du, flags, hdr, msg_pp, ovr_pp);
+	}
+	if (kr != MACH_RCV_TOO_LARGE) {
 		goto out;
-	} else if (kr != MACH_RCV_TOO_LARGE) {
-		goto out;
-	} else if (!ke->data) {
+	}
+
+	if (!ke->data) {
 		DISPATCH_INTERNAL_CRASH(0, "MACH_RCV_LARGE_IDENTITY with no identity");
 	}
 	if (unlikely(ke->ext[1] > (UINT_MAX - DISPATCH_MACH_TRAILER_SIZE))) {
 		DISPATCH_INTERNAL_CRASH(ke->ext[1],
 				"EVFILT_MACHPORT with overlarge message");
 	}
-	siz = _dispatch_kevent_mach_msg_size(ke) + DISPATCH_MACH_TRAILER_SIZE;
-	hdr = malloc(siz);
-	if (dispatch_assume(hdr)) {
-		flags |= DISPATCH_EV_MSG_NEEDS_FREE;
-	} else {
-		// Kernel will discard message too large to fit
-		hdr = NULL;
-		siz = 0;
-	}
-	mach_port_t name = (mach_port_name_t)ke->data;
 	const mach_msg_option_t options = ((DISPATCH_MACH_RCV_OPTIONS |
 			MACH_RCV_TIMEOUT) & ~MACH_RCV_LARGE);
-	kr = mach_msg(hdr, options, 0, siz, name, MACH_MSG_TIMEOUT_NONE,
-			MACH_PORT_NULL);
+	siz = _dispatch_kevent_mach_msg_size(ke) + DISPATCH_MACH_TRAILER_SIZE;
+	hdr = malloc(siz); // mach_msg will return TOO_LARGE if hdr/siz is NULL/0
+	kr = mach_msg(hdr, options, 0, dispatch_assume(hdr) ? siz : 0,
+			(mach_port_name_t)ke->data, MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL);
 	if (likely(!kr)) {
-		_dispatch_kevent_mach_msg_recv(du, flags, hdr);
-		goto out;
-	} else if (kr == MACH_RCV_TOO_LARGE) {
+		flags |= DISPATCH_EV_MSG_NEEDS_FREE;
+		return _dispatch_kevent_mach_msg_recv(du, flags, hdr, msg_pp, ovr_pp);
+	}
+
+	if (kr == MACH_RCV_TOO_LARGE) {
 		_dispatch_log("BUG in libdispatch client: "
 				"_dispatch_kevent_mach_msg_drain: dropped message too "
 				"large to fit in memory: id = 0x%x, size = %u",
 				hdr->msgh_id, _dispatch_kevent_mach_msg_size(ke));
 		kr = MACH_MSG_SUCCESS;
 	}
-	if (flags & DISPATCH_EV_MSG_NEEDS_FREE) {
-		free(hdr);
-	}
+	free(hdr);
+
 out:
 	if (unlikely(kr)) {
 		_dispatch_bug_mach_client("_dispatch_kevent_mach_msg_drain: "
@@ -2124,7 +3199,9 @@
 	.dst_filter     = EVFILT_MACHPORT,
 	.dst_flags      = EV_UDATA_SPECIFIC|EV_DISPATCH|EV_VANISHED,
 	.dst_fflags     = 0,
+	.dst_action     = DISPATCH_UNOTE_ACTION_SOURCE_OR_FFLAGS,
 	.dst_size       = sizeof(struct dispatch_source_refs_s),
+	.dst_strict     = false,
 
 	.dst_create     = _dispatch_unote_create_with_handle,
 	.dst_merge_evt  = _dispatch_source_merge_evt,
@@ -2134,66 +3211,51 @@
 };
 
 static void
-_dispatch_source_mach_recv_direct_merge_msg(dispatch_unote_t du, uint32_t flags,
-		mach_msg_header_t *msg, mach_msg_size_t msgsz DISPATCH_UNUSED)
+_dispatch_mach_notification_event(dispatch_unote_t du, uint32_t flags DISPATCH_UNUSED,
+		uintptr_t data DISPATCH_UNUSED, pthread_priority_t pp DISPATCH_UNUSED)
 {
-	dispatch_continuation_t dc = du._dr->ds_handler[DS_EVENT_HANDLER];
-	dispatch_source_t ds = _dispatch_source_from_refs(du._dr);
-	dispatch_queue_t cq = _dispatch_queue_get_current();
-
-	// see firehose_client_push_notify_async
-	_dispatch_queue_set_current(ds->_as_dq);
-	dc->dc_func(msg);
-	_dispatch_queue_set_current(cq);
-	if (flags & DISPATCH_EV_MSG_NEEDS_FREE) {
-		free(msg);
-	}
-	if ((ds->dq_atomic_flags & DSF_CANCELED) ||
-			(flags & (EV_ONESHOT | EV_DELETE))) {
-		return _dispatch_source_merge_evt(du, flags, 0, 0, 0);
-	}
-	if (_dispatch_unote_needs_rearm(du)) {
-		return _dispatch_unote_resume(du);
-	}
+	DISPATCH_CLIENT_CRASH(du._du->du_ident, "Unexpected non message event");
 }
 
+const dispatch_source_type_s _dispatch_mach_type_notification = {
+	.dst_kind       = "mach_notification",
+	.dst_filter     = EVFILT_MACHPORT,
+	.dst_flags      = EV_UDATA_SPECIFIC|EV_DISPATCH|EV_VANISHED,
+	.dst_fflags     = DISPATCH_MACH_RCV_OPTIONS,
+	.dst_action     = DISPATCH_UNOTE_ACTION_PASS_FFLAGS,
+	.dst_size       = sizeof(struct dispatch_unote_class_s),
+	.dst_strict     = false,
+
+	.dst_create     = _dispatch_unote_create_with_handle,
+	.dst_merge_evt  = _dispatch_mach_notification_event,
+	.dst_merge_msg  = _dispatch_mach_notification_merge_msg,
+
+	.dst_per_trigger_qos = true,
+};
+
 static void
-_dispatch_mach_recv_direct_merge(dispatch_unote_t du,
-		uint32_t flags, uintptr_t data,
-		uintptr_t status DISPATCH_UNUSED,
-		pthread_priority_t pp)
+_dispatch_mach_recv_direct_merge_evt(dispatch_unote_t du, uint32_t flags,
+		uintptr_t data, pthread_priority_t pp)
 {
 	if (flags & EV_VANISHED) {
 		DISPATCH_CLIENT_CRASH(du._du->du_ident,
 				"Unexpected EV_VANISHED (do not destroy random mach ports)");
 	}
-	return _dispatch_source_merge_evt(du, flags, data, 0, pp);
+	return _dispatch_source_merge_evt(du, flags, data, pp);
 }
 
-const dispatch_source_type_s _dispatch_source_type_mach_recv_direct = {
-	.dst_kind       = "direct mach_recv",
-	.dst_filter     = EVFILT_MACHPORT,
-	.dst_flags      = EV_UDATA_SPECIFIC|EV_DISPATCH|EV_VANISHED,
-	.dst_fflags     = DISPATCH_MACH_RCV_OPTIONS,
-	.dst_size       = sizeof(struct dispatch_source_refs_s),
-
-	.dst_create     = _dispatch_unote_create_with_handle,
-	.dst_merge_evt  = _dispatch_mach_recv_direct_merge,
-	.dst_merge_msg  = _dispatch_source_mach_recv_direct_merge_msg,
-
-	.dst_per_trigger_qos = true,
-};
-
 const dispatch_source_type_s _dispatch_mach_type_recv = {
 	.dst_kind       = "mach_recv (channel)",
 	.dst_filter     = EVFILT_MACHPORT,
 	.dst_flags      = EV_UDATA_SPECIFIC|EV_DISPATCH|EV_VANISHED,
 	.dst_fflags     = DISPATCH_MACH_RCV_OPTIONS,
+	.dst_action     = DISPATCH_UNOTE_ACTION_PASS_FFLAGS,
 	.dst_size       = sizeof(struct dispatch_mach_recv_refs_s),
+	.dst_strict     = false,
 
-	 // without handle because the mach code will set the ident after connect
+	// without handle because the mach code will set the ident after connect
 	.dst_create     = _dispatch_unote_create_without_handle,
-	.dst_merge_evt  = _dispatch_mach_recv_direct_merge,
+	.dst_merge_evt  = _dispatch_mach_recv_direct_merge_evt,
 	.dst_merge_msg  = _dispatch_mach_merge_msg,
 
 	.dst_per_trigger_qos = true,
@@ -2203,7 +3265,6 @@
 static void
 _dispatch_mach_reply_merge_evt(dispatch_unote_t du,
 		uint32_t flags DISPATCH_UNUSED, uintptr_t data DISPATCH_UNUSED,
-		uintptr_t status DISPATCH_UNUSED,
 		pthread_priority_t pp DISPATCH_UNUSED)
 {
 	DISPATCH_INTERNAL_CRASH(du._du->du_ident, "Unexpected event");
@@ -2214,7 +3275,9 @@
 	.dst_filter     = EVFILT_MACHPORT,
 	.dst_flags      = EV_UDATA_SPECIFIC|EV_DISPATCH|EV_ONESHOT|EV_VANISHED,
 	.dst_fflags     = DISPATCH_MACH_RCV_OPTIONS,
+	.dst_action     = DISPATCH_UNOTE_ACTION_PASS_FFLAGS,
 	.dst_size       = sizeof(struct dispatch_mach_reply_refs_s),
+	.dst_strict     = false,
 
 	.dst_create     = _dispatch_unote_create_with_handle,
 	.dst_merge_evt  = _dispatch_mach_reply_merge_evt,
@@ -2228,10 +3291,12 @@
 	.dst_filter     = EVFILT_SIGNAL,
 	.dst_flags      = DISPATCH_EV_DIRECT|EV_CLEAR|EV_ONESHOT,
 	.dst_fflags     = 0,
+	.dst_action     = DISPATCH_UNOTE_ACTION_PASS_DATA,
 	.dst_size       = sizeof(struct dispatch_xpc_term_refs_s),
+	.dst_strict     = false,
 
 	.dst_create     = _dispatch_unote_create_with_handle,
-	.dst_merge_evt  = _dispatch_xpc_sigterm_merge,
+	.dst_merge_evt  = _dispatch_xpc_sigterm_merge_evt,
 };
 
 #endif // HAVE_MACH
diff --git a/src/event/event_windows.c b/src/event/event_windows.c
index 2fe9680..1e3fae7 100644
--- a/src/event/event_windows.c
+++ b/src/event/event_windows.c
@@ -21,26 +21,31 @@
 #include "internal.h"
 #if DISPATCH_EVENT_BACKEND_WINDOWS
 
+static HANDLE hPort = NULL;
+enum _dispatch_windows_port {
+	DISPATCH_PORT_POKE = 0,
+	DISPATCH_PORT_TIMER_CLOCK_WALL,
+	DISPATCH_PORT_TIMER_CLOCK_UPTIME,
+	DISPATCH_PORT_TIMER_CLOCK_MONOTONIC,
+};
+
 #pragma mark dispatch_unote_t
 
 bool
-_dispatch_unote_register(dispatch_unote_t du DISPATCH_UNUSED,
-		dispatch_wlh_t wlh DISPATCH_UNUSED,
-		dispatch_priority_t pri DISPATCH_UNUSED)
+_dispatch_unote_register_muxed(dispatch_unote_t du DISPATCH_UNUSED)
 {
 	WIN_PORT_ERROR();
 	return false;
 }
 
 void
-_dispatch_unote_resume(dispatch_unote_t du DISPATCH_UNUSED)
+_dispatch_unote_resume_muxed(dispatch_unote_t du DISPATCH_UNUSED)
 {
 	WIN_PORT_ERROR();
 }
 
 bool
-_dispatch_unote_unregister(dispatch_unote_t du DISPATCH_UNUSED,
-		uint32_t flags DISPATCH_UNUSED)
+_dispatch_unote_unregister_muxed(dispatch_unote_t du DISPATCH_UNUSED)
 {
 	WIN_PORT_ERROR();
 	return false;
@@ -48,32 +53,191 @@
 
 #pragma mark timers
 
-void
-_dispatch_event_loop_timer_arm(uint32_t tidx DISPATCH_UNUSED,
-		dispatch_timer_delay_s range DISPATCH_UNUSED,
-		dispatch_clock_now_cache_t nows DISPATCH_UNUSED)
+typedef struct _dispatch_windows_timeout_s {
+	PTP_TIMER pTimer;
+	enum _dispatch_windows_port ullIdent;
+	bool bArmed;
+} *dispatch_windows_timeout_t;
+
+#define DISPATCH_WINDOWS_TIMEOUT_INITIALIZER(clock)                             \
+	[DISPATCH_CLOCK_##clock] = {                                            \
+		.pTimer = NULL,                                                 \
+		.ullIdent = DISPATCH_PORT_TIMER_CLOCK_##clock,                  \
+		.bArmed = FALSE,                                                \
+	}
+
+static struct _dispatch_windows_timeout_s _dispatch_windows_timeout[] = {
+	DISPATCH_WINDOWS_TIMEOUT_INITIALIZER(WALL),
+	DISPATCH_WINDOWS_TIMEOUT_INITIALIZER(UPTIME),
+	DISPATCH_WINDOWS_TIMEOUT_INITIALIZER(MONOTONIC),
+};
+
+static void
+_dispatch_event_merge_timer(dispatch_clock_t clock)
 {
-	WIN_PORT_ERROR();
+	uint32_t tidx = DISPATCH_TIMER_INDEX(clock, 0);
+
+	_dispatch_windows_timeout[clock].bArmed = FALSE;
+
+	_dispatch_timers_heap_dirty(_dispatch_timers_heap, tidx);
+	_dispatch_timers_heap[tidx].dth_needs_program = true;
+	_dispatch_timers_heap[tidx].dth_armed = false;
+}
+
+static void CALLBACK
+_dispatch_timer_callback(PTP_CALLBACK_INSTANCE Instance, PVOID Context,
+	PTP_TIMER Timer)
+{
+	BOOL bSuccess;
+
+	bSuccess = PostQueuedCompletionStatus(hPort, 0, (ULONG_PTR)Context,
+		NULL);
+	if (bSuccess == FALSE) {
+		DISPATCH_INTERNAL_CRASH(GetLastError(),
+			"PostQueuedCompletionStatus");
+	}
 }
 
 void
-_dispatch_event_loop_timer_delete(uint32_t tidx DISPATCH_UNUSED)
+_dispatch_event_loop_timer_arm(dispatch_timer_heap_t dth DISPATCH_UNUSED,
+		uint32_t tidx, dispatch_timer_delay_s range,
+		dispatch_clock_now_cache_t nows)
 {
-	WIN_PORT_ERROR();
+	dispatch_windows_timeout_t timer;
+	FILETIME ftDueTime;
+	LARGE_INTEGER liTime;
+
+	switch (DISPATCH_TIMER_CLOCK(tidx)) {
+	case DISPATCH_CLOCK_WALL:
+		timer = &_dispatch_windows_timeout[DISPATCH_CLOCK_WALL];
+		liTime.QuadPart = range.delay +
+			_dispatch_time_now_cached(DISPATCH_TIMER_CLOCK(tidx), nows);
+		break;
+
+	case DISPATCH_CLOCK_UPTIME:
+	case DISPATCH_CLOCK_MONOTONIC:
+		timer = &_dispatch_windows_timeout[DISPATCH_TIMER_CLOCK(tidx)];
+		liTime.QuadPart = -((range.delay + 99) / 100);
+		break;
+	}
+
+	if (timer->pTimer == NULL) {
+		timer->pTimer = CreateThreadpoolTimer(_dispatch_timer_callback,
+			(LPVOID)timer->ullIdent, NULL);
+		if (timer->pTimer == NULL) {
+			DISPATCH_INTERNAL_CRASH(GetLastError(),
+				"CreateThreadpoolTimer");
+		}
+	}
+
+	ftDueTime.dwHighDateTime = liTime.HighPart;
+	ftDueTime.dwLowDateTime = liTime.LowPart;
+
+	SetThreadpoolTimer(timer->pTimer, &ftDueTime, /*msPeriod=*/0,
+		/*msWindowLength=*/0);
+	timer->bArmed = TRUE;
+}
+
+void
+_dispatch_event_loop_timer_delete(dispatch_timer_heap_t dth DISPATCH_UNUSED,
+		uint32_t tidx)
+{
+	dispatch_windows_timeout_t timer;
+
+	switch (DISPATCH_TIMER_CLOCK(tidx)) {
+	case DISPATCH_CLOCK_WALL:
+		timer = &_dispatch_windows_timeout[DISPATCH_CLOCK_WALL];
+		break;
+
+	case DISPATCH_CLOCK_UPTIME:
+	case DISPATCH_CLOCK_MONOTONIC:
+		timer = &_dispatch_windows_timeout[DISPATCH_TIMER_CLOCK(tidx)];
+		break;
+	}
+
+	SetThreadpoolTimer(timer->pTimer, NULL, /*msPeriod=*/0,
+		/*msWindowLength=*/0);
+	timer->bArmed = FALSE;
 }
 
 #pragma mark dispatch_loop
 
+static void
+_dispatch_windows_port_init(void *context DISPATCH_UNUSED)
+{
+	hPort = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 1);
+	if (hPort == NULL) {
+		DISPATCH_INTERNAL_CRASH(GetLastError(),
+			"CreateIoCompletionPort");
+	}
+
+#if DISPATCH_USE_MGR_THREAD
+	_dispatch_trace_item_push(_dispatch_mgr_q.do_targetq, &_dispatch_mgr_q);
+	dx_push(_dispatch_mgr_q.do_targetq, &_dispatch_mgr_q, 0);
+#endif
+}
+
 void
 _dispatch_event_loop_poke(dispatch_wlh_t wlh DISPATCH_UNUSED,
 		uint64_t dq_state DISPATCH_UNUSED, uint32_t flags DISPATCH_UNUSED)
 {
-	WIN_PORT_ERROR();
+	static dispatch_once_t _dispatch_windows_port_init_pred;
+	BOOL bSuccess;
+
+	dispatch_once_f(&_dispatch_windows_port_init_pred, NULL,
+		_dispatch_windows_port_init);
+	bSuccess = PostQueuedCompletionStatus(hPort, 0, DISPATCH_PORT_POKE,
+		NULL);
+	(void)dispatch_assume(bSuccess);
 }
 
 DISPATCH_NOINLINE
 void
-_dispatch_event_loop_drain(uint32_t flags DISPATCH_UNUSED)
+_dispatch_event_loop_drain(uint32_t flags)
+{
+	DWORD dwNumberOfBytesTransferred;
+	ULONG_PTR ulCompletionKey;
+	LPOVERLAPPED pOV;
+	BOOL bSuccess;
+
+	pOV = (LPOVERLAPPED)&pOV;
+	bSuccess = GetQueuedCompletionStatus(hPort, &dwNumberOfBytesTransferred,
+		&ulCompletionKey, &pOV,
+		(flags & KEVENT_FLAG_IMMEDIATE) ? 0 : INFINITE);
+	while (bSuccess) {
+		switch (ulCompletionKey) {
+		case DISPATCH_PORT_POKE:
+			break;
+
+		case DISPATCH_PORT_TIMER_CLOCK_WALL:
+			_dispatch_event_merge_timer(DISPATCH_CLOCK_WALL);
+			break;
+
+		case DISPATCH_PORT_TIMER_CLOCK_UPTIME:
+			_dispatch_event_merge_timer(DISPATCH_CLOCK_UPTIME);
+			break;
+
+		case DISPATCH_PORT_TIMER_CLOCK_MONOTONIC:
+			_dispatch_event_merge_timer(DISPATCH_CLOCK_MONOTONIC);
+			break;
+
+		default:
+			DISPATCH_INTERNAL_CRASH(ulCompletionKey,
+				"unsupported completion key");
+		}
+
+		bSuccess = GetQueuedCompletionStatus(hPort,
+			&dwNumberOfBytesTransferred, &ulCompletionKey, &pOV, 0);
+	}
+
+	if (bSuccess == FALSE && pOV != NULL) {
+		DISPATCH_INTERNAL_CRASH(GetLastError(),
+			"GetQueuedCompletionStatus");
+	}
+}
+
+void
+_dispatch_event_loop_cancel_waiter(dispatch_sync_context_t dsc DISPATCH_UNUSED)
 {
 	WIN_PORT_ERROR();
 }
@@ -109,9 +273,9 @@
 #endif
 
 void
-_dispatch_event_loop_leave_immediate(dispatch_wlh_t wlh, uint64_t dq_state)
+_dispatch_event_loop_leave_immediate(uint64_t dq_state)
 {
-	(void)wlh; (void)dq_state;
+	(void)dq_state;
 }
 
 #endif // DISPATCH_EVENT_BACKEND_WINDOWS
diff --git a/src/event/workqueue.c b/src/event/workqueue.c
index 19a2476..28f1675 100644
--- a/src/event/workqueue.c
+++ b/src/event/workqueue.c
@@ -48,7 +48,7 @@
  */
 typedef struct dispatch_workq_monitor_s {
 	/* The dispatch_queue we are monitoring */
-	dispatch_queue_t dq;
+	dispatch_queue_global_t dq;
 
 	/* The observed number of runnable worker threads */
 	int32_t num_runnable;
@@ -67,7 +67,7 @@
 } dispatch_workq_monitor_s, *dispatch_workq_monitor_t;
 
 #if HAVE_DISPATCH_WORKQ_MONITORING
-static dispatch_workq_monitor_s _dispatch_workq_monitors[DISPATCH_QOS_MAX];
+static dispatch_workq_monitor_s _dispatch_workq_monitors[DISPATCH_QOS_NBUCKETS];
 #endif
 
 #pragma mark Implementation of the monitoring subsystem.
@@ -79,13 +79,15 @@
 static dispatch_once_t _dispatch_workq_init_once_pred;
 
 void
-_dispatch_workq_worker_register(dispatch_queue_t root_q, qos_class_t cls)
+_dispatch_workq_worker_register(dispatch_queue_global_t root_q)
 {
 	dispatch_once_f(&_dispatch_workq_init_once_pred, NULL, &_dispatch_workq_init_once);
 
 #if HAVE_DISPATCH_WORKQ_MONITORING
-	dispatch_qos_t qos = _dispatch_qos_from_qos_class(cls);
-	dispatch_workq_monitor_t mon = &_dispatch_workq_monitors[qos-1];
+	dispatch_qos_t qos = _dispatch_priority_qos(root_q->dq_priority);
+	if (qos == 0) qos = DISPATCH_QOS_DEFAULT;
+	int bucket = DISPATCH_QOS_BUCKET(qos);
+	dispatch_workq_monitor_t mon = &_dispatch_workq_monitors[bucket];
 	dispatch_assert(mon->dq == root_q);
 	dispatch_tid tid = _dispatch_tid_self();
 	_dispatch_unfair_lock_lock(&mon->registered_tid_lock);
@@ -95,16 +97,17 @@
 	_dispatch_unfair_lock_unlock(&mon->registered_tid_lock);
 #else
 	(void)root_q;
-	(void)cls;
 #endif // HAVE_DISPATCH_WORKQ_MONITORING
 }
 
 void
-_dispatch_workq_worker_unregister(dispatch_queue_t root_q, qos_class_t cls)
+_dispatch_workq_worker_unregister(dispatch_queue_global_t root_q)
 {
 #if HAVE_DISPATCH_WORKQ_MONITORING
-	dispatch_qos_t qos = _dispatch_qos_from_qos_class(cls);
-	dispatch_workq_monitor_t mon = &_dispatch_workq_monitors[qos-1];
+	dispatch_qos_t qos = _dispatch_priority_qos(root_q->dq_priority);
+	if (qos == 0) qos = DISPATCH_QOS_DEFAULT;
+	int bucket = DISPATCH_QOS_BUCKET(qos);
+	dispatch_workq_monitor_t mon = &_dispatch_workq_monitors[bucket];
 	dispatch_assert(mon->dq == root_q);
 	dispatch_tid tid = _dispatch_tid_self();
 	_dispatch_unfair_lock_lock(&mon->registered_tid_lock);
@@ -120,7 +123,6 @@
 	_dispatch_unfair_lock_unlock(&mon->registered_tid_lock);
 #else
 	(void)root_q;
-	(void)cls;
 #endif // HAVE_DISPATCH_WORKQ_MONITORING
 }
 
@@ -182,14 +184,18 @@
 #error must define _dispatch_workq_count_runnable_workers
 #endif
 
+#define foreach_qos_bucket_reverse(name) \
+		for (name = DISPATCH_QOS_BUCKET(DISPATCH_QOS_MAX); \
+				name >= DISPATCH_QOS_BUCKET(DISPATCH_QOS_MAINTENANCE); name--)
+
 static void
 _dispatch_workq_monitor_pools(void *context DISPATCH_UNUSED)
 {
 	int global_soft_max = WORKQ_OVERSUBSCRIBE_FACTOR * (int)dispatch_hw_config(active_cpus);
-	int global_runnable = 0;
-	for (dispatch_qos_t i = DISPATCH_QOS_MAX; i > DISPATCH_QOS_UNSPECIFIED; i--) {
-		dispatch_workq_monitor_t mon = &_dispatch_workq_monitors[i-1];
-		dispatch_queue_t dq = mon->dq;
+	int global_runnable = 0, i;
+	foreach_qos_bucket_reverse(i) {
+		dispatch_workq_monitor_t mon = &_dispatch_workq_monitors[i];
+		dispatch_queue_global_t dq = mon->dq;
 
 		if (!_dispatch_queue_class_probe(dq)) {
 			_dispatch_debug("workq: %s is empty.", dq->dq_label);
@@ -210,7 +216,7 @@
 			int32_t floor = mon->target_runnable - WORKQ_MAX_TRACKED_TIDS;
 			_dispatch_debug("workq: %s has no runnable workers; poking with floor %d",
 					dq->dq_label, floor);
-			_dispatch_global_queue_poke(dq, 1, floor);
+			_dispatch_root_queue_poke(dq, 1, floor);
 			global_runnable += 1; // account for poke in global estimate
 		} else if (mon->num_runnable < mon->target_runnable &&
 				   global_runnable < global_soft_max) {
@@ -223,7 +229,7 @@
 			floor = MAX(floor, floor2);
 			_dispatch_debug("workq: %s under utilization target; poking with floor %d",
 					dq->dq_label, floor);
-			_dispatch_global_queue_poke(dq, 1, floor);
+			_dispatch_root_queue_poke(dq, 1, floor);
 			global_runnable += 1; // account for poke in global estimate
 		}
 	}
@@ -234,10 +240,10 @@
 _dispatch_workq_init_once(void *context DISPATCH_UNUSED)
 {
 #if HAVE_DISPATCH_WORKQ_MONITORING
-	int target_runnable = (int)dispatch_hw_config(active_cpus);
-	for (dispatch_qos_t i = DISPATCH_QOS_MAX; i > DISPATCH_QOS_UNSPECIFIED; i--) {
-		dispatch_workq_monitor_t mon = &_dispatch_workq_monitors[i-1];
-		mon->dq = _dispatch_get_root_queue(i, false);
+	int i, target_runnable = (int)dispatch_hw_config(active_cpus);
+	foreach_qos_bucket_reverse(i) {
+		dispatch_workq_monitor_t mon = &_dispatch_workq_monitors[i];
+		mon->dq = _dispatch_get_root_queue(DISPATCH_QOS_FOR_BUCKET(i), false);
 		void *buf = _dispatch_calloc(WORKQ_MAX_TRACKED_TIDS, sizeof(dispatch_tid));
 		mon->registered_tids = buf;
 		mon->target_runnable = target_runnable;
@@ -245,7 +251,7 @@
 
 	// Create monitoring timer that will periodically run on dispatch_mgr_q
 	dispatch_source_t ds = dispatch_source_create(DISPATCH_SOURCE_TYPE_TIMER,
-			0, 0, &_dispatch_mgr_q);
+			0, 0, _dispatch_mgr_q._as_dq);
 	dispatch_source_set_timer(ds, dispatch_time(DISPATCH_TIME_NOW, 0),
 			NSEC_PER_SEC, 0);
 	dispatch_source_set_event_handler_f(ds, _dispatch_workq_monitor_pools);
diff --git a/src/event/workqueue_internal.h b/src/event/workqueue_internal.h
index 94dfe4e..b6ca6df 100644
--- a/src/event/workqueue_internal.h
+++ b/src/event/workqueue_internal.h
@@ -27,12 +27,8 @@
 #ifndef __DISPATCH_WORKQUEUE_INTERNAL__
 #define __DISPATCH_WORKQUEUE_INTERNAL__
 
-#define WORKQ_ADDTHREADS_OPTION_OVERCOMMIT 0x1
-
-#define DISPATCH_WORKQ_MAX_PTHREAD_COUNT 255
-
-void _dispatch_workq_worker_register(dispatch_queue_t root_q, qos_class_t cls);
-void _dispatch_workq_worker_unregister(dispatch_queue_t root_q, qos_class_t cls);
+void _dispatch_workq_worker_register(dispatch_queue_global_t root_q);
+void _dispatch_workq_worker_unregister(dispatch_queue_global_t root_q);
 
 #if defined(__linux__)
 #define HAVE_DISPATCH_WORKQ_MONITORING 1
diff --git a/src/firehose/firehose.defs b/src/firehose/firehose.defs
index e4fdf33..83d46ef 100644
--- a/src/firehose/firehose.defs
+++ b/src/firehose/firehose.defs
@@ -23,36 +23,44 @@
 
 #include "firehose_types.defs"
 
-subsystem firehose	11600;
-serverprefix		firehose_server_;
-userprefix			firehose_send_;
+subsystem firehose  11600;
+serverprefix        firehose_server_;
+userprefix          firehose_send_;
 
-simpleroutine
-register(
-			server_port		: mach_port_t;
-			mem_port		: mach_port_move_send_t;
-			mem_size		: mach_vm_size_t;
-			comm_recvp		: mach_port_move_receive_t;
-			comm_sendp		: mach_port_make_send_t;
-			extra_info_port : mach_port_move_send_t;
-			extra_info_size : mach_vm_size_t;
-			ServerAuditToken atoken : audit_token_t
+UseSpecialReplyPort 1;
+
+simpleroutine register(
+            server_port     : mach_port_t;
+            mem_port        : mach_port_move_send_t;
+            mem_size        : mach_vm_size_t;
+            comm_mem_recvp  : mach_port_move_receive_t;
+            comm_io_recvp   : mach_port_move_receive_t;
+            comm_sendp      : mach_port_make_send_t;
+            extra_info_port : mach_port_move_send_t;
+            extra_info_size : mach_vm_size_t;
+            ServerAuditToken atoken : audit_token_t
 );
 
-routine
-push_and_wait(
-RequestPort	comm_port		: mach_port_t;
-SReplyPort	reply_port		: mach_port_make_send_once_t;
-			qos_class		: qos_class_t;
-			for_io			: boolean_t;
-out			push_reply		: firehose_push_reply_t;
-out			quarantinedOut	: boolean_t
+routine push_and_wait(
+RequestPort comm_port       : mach_port_t;
+SReplyPort  reply_port      : mach_port_make_send_once_t;
+out         push_reply      : firehose_push_reply_t;
+out         quarantinedOut  : boolean_t
 );
 
-simpleroutine
-push_async(
-			comm_port		: mach_port_t;
-			qos_class		: qos_class_t;
-			for_io			: boolean_t;
-			expects_notify	: boolean_t
+simpleroutine push_async(
+RequestPort comm_port       : mach_port_t;
+in          qos_class       : qos_class_t;
+WaitTime    timeout         : natural_t
+);
+
+routine get_logging_prefs(
+RequestPort server_port     : mach_port_t;
+out         mem_port        : mach_port_t;
+out         mem_size        : mach_vm_size_t
+);
+
+routine should_send_strings(
+RequestPort server_port     : mach_port_t;
+out         strings_needed  : boolean_t
 );
diff --git a/src/firehose/firehose_buffer.c b/src/firehose/firehose_buffer.c
index 36a5b24..4631755 100644
--- a/src/firehose/firehose_buffer.c
+++ b/src/firehose/firehose_buffer.c
@@ -26,12 +26,6 @@
 #define __OS_EXPOSE_INTERNALS_INDIRECT__ 1
 
 #define DISPATCH_PURE_C 1
-#define _safe_cast_to_long(x) \
-		({ _Static_assert(sizeof(__typeof__(x)) <= sizeof(long), \
-				"__builtin_expect doesn't support types wider than long"); \
-				(long)(x); })
-#define fastpath(x) ((__typeof__(x))__builtin_expect(_safe_cast_to_long(x), ~0l))
-#define slowpath(x) ((__typeof__(x))__builtin_expect(_safe_cast_to_long(x), 0l))
 #define os_likely(x) __builtin_expect(!!(x), 1)
 #define os_unlikely(x) __builtin_expect(!!(x), 0)
 #define likely(x)   __builtin_expect(!!(x), 1)
@@ -67,7 +61,7 @@
 	dispatch_lock dgl_lock;
 } dispatch_gate_s, *dispatch_gate_t;
 #define DLOCK_LOCK_DATA_CONTENTION 0
-static void _dispatch_gate_wait(dispatch_gate_t l, uint32_t flags);
+static void _dispatch_firehose_gate_wait(dispatch_gate_t l, uint32_t flags);
 
 #define fcp_quarntined fcp_quarantined
 
@@ -124,6 +118,11 @@
 
 #ifdef KERNEL
 static firehose_buffer_t kernel_firehose_buffer = NULL;
+
+_Static_assert(FIREHOSE_BUFFER_KERNEL_MAX_CHUNK_COUNT == FIREHOSE_BUFFER_CHUNK_COUNT,
+		"FIREHOSE_BUFFER_KERNEL_MAX_CHUNK_COUNT must match FIREHOSE_BUFFER_CHUNK_COUNT");
+_Static_assert(FIREHOSE_BUFFER_KERNEL_DEFAULT_IO_PAGES <= FIREHOSE_BUFFER_KERNEL_DEFAULT_CHUNK_COUNT * 3 / 4,
+		"FIREHOSE_BUFFER_KERNEL_DEFAULT_IO_PAGES cannot exceed 3/4 of FIREHOSE_BUFFER_KERNEL_DEFAULT_CHUNK_COUNT");
 #endif
 
 #pragma mark -
@@ -131,31 +130,37 @@
 #ifndef KERNEL
 
 static mach_port_t
-firehose_client_reconnect(firehose_buffer_t fb, mach_port_t oldsendp)
+firehose_client_reconnect(firehose_buffer_t fb, mach_port_t oldsendp,
+		firehose_buffer_pushport_t pushport)
 {
-	mach_port_t sendp = MACH_PORT_NULL;
+	mach_port_t cursendp = MACH_PORT_NULL;
 	mach_port_t mem_port = MACH_PORT_NULL, extra_info_port = MACH_PORT_NULL;
 	mach_vm_size_t extra_info_size = 0;
 	kern_return_t kr;
+	bool reconnecting = (oldsendp != MACH_PORT_NULL);
 
 	dispatch_assert(fb->fb_header.fbh_logd_port);
 	dispatch_assert(fb->fb_header.fbh_recvp);
 	dispatch_assert(fb->fb_header.fbh_uniquepid != 0);
 
 	_dispatch_unfair_lock_lock(&fb->fb_header.fbh_logd_lock);
-	sendp = fb->fb_header.fbh_sendp;
-	if (sendp != oldsendp || sendp == MACH_PORT_DEAD) {
+	cursendp = fb->fb_header.fbh_sendp[pushport];
+	if (cursendp != oldsendp || cursendp == MACH_PORT_DEAD) {
 		// someone beat us to reconnecting or logd was unloaded, just go away
 		goto unlock;
 	}
 
-	if (oldsendp) {
-		// same trick as _xpc_pipe_dispose: keeping a send right
-		// maintains the name, so that we can destroy the receive right
-		// in case we still have it.
-		(void)firehose_mach_port_recv_dispose(oldsendp, fb);
-		firehose_mach_port_send_release(oldsendp);
-		fb->fb_header.fbh_sendp = MACH_PORT_NULL;
+	if (reconnecting) {
+		for (int i = 0; i < FIREHOSE_BUFFER_NPUSHPORTS; i++) {
+			mach_port_t spi = fb->fb_header.fbh_sendp[i];
+			dispatch_assert(spi);
+			// same trick as _xpc_pipe_dispose: keeping a send right maintains
+			// the name, so that we can destroy the receive right in case we
+			// still have it.
+			(void)firehose_mach_port_recv_dispose(spi, fb);
+			firehose_mach_port_send_release(spi);
+			fb->fb_header.fbh_sendp[i] = MACH_PORT_NULL;
+		}
 	}
 
 	/* Create a memory port for the buffer VM region */
@@ -174,11 +179,7 @@
 		DISPATCH_CLIENT_CRASH(kr, "Unable to make memory port");
 	}
 
-	/* Create a communication port to the logging daemon */
-	uint32_t opts = MPO_CONTEXT_AS_GUARD | MPO_TEMPOWNER | MPO_INSERT_SEND_RIGHT;
-	sendp = firehose_mach_port_allocate(opts, fb);
-
-	if (oldsendp && _voucher_libtrace_hooks->vah_get_reconnect_info) {
+	if (reconnecting && _voucher_libtrace_hooks->vah_get_reconnect_info) {
 		kr = _voucher_libtrace_hooks->vah_get_reconnect_info(&addr, &size);
 		if (likely(kr == KERN_SUCCESS) && addr && size) {
 			extra_info_size = size;
@@ -194,25 +195,39 @@
 		}
 	}
 
+	/* Create memory and IO communication ports to the logging daemon */
+	uint32_t opts = MPO_CONTEXT_AS_GUARD | MPO_TEMPOWNER | MPO_INSERT_SEND_RIGHT;
+	mach_port_t sendp[FIREHOSE_BUFFER_NPUSHPORTS];
+	for (int i = 0; i < FIREHOSE_BUFFER_NPUSHPORTS; i++) {
+		sendp[i] = firehose_mach_port_allocate(opts, 1, fb);
+	}
+	cursendp = sendp[pushport];
+
 	/* Call the firehose_register() MIG routine */
 	kr = firehose_send_register(fb->fb_header.fbh_logd_port, mem_port,
-			sizeof(union firehose_buffer_u), sendp, fb->fb_header.fbh_recvp,
+			sizeof(union firehose_buffer_u),
+			sendp[FIREHOSE_BUFFER_PUSHPORT_MEM],
+			sendp[FIREHOSE_BUFFER_PUSHPORT_IO], fb->fb_header.fbh_recvp,
 			extra_info_port, extra_info_size);
 	if (likely(kr == KERN_SUCCESS)) {
-		fb->fb_header.fbh_sendp = sendp;
+		for (int i = 0; i < FIREHOSE_BUFFER_NPUSHPORTS; i++) {
+			fb->fb_header.fbh_sendp[i] = sendp[i];
+		}
 	} else if (unlikely(kr == MACH_SEND_INVALID_DEST)) {
 		// MACH_SEND_INVALID_DEST here means that logd's boostrap port
 		// turned into a dead name, which in turn means that logd has been
 		// unloaded. The only option here, is to give up permanently.
-		//
-		// same trick as _xpc_pipe_dispose: keeping a send right
-		// maintains the name, so that we can destroy the receive right
-		// in case we still have it.
-		(void)firehose_mach_port_recv_dispose(sendp, fb);
-		firehose_mach_port_send_release(sendp);
+		for (int i = 0; i < FIREHOSE_BUFFER_NPUSHPORTS; i++) {
+			// same trick as _xpc_pipe_dispose: keeping a send right maintains
+			// the name, so that we can destroy the receive right in case we
+			// still have it.
+			(void)firehose_mach_port_recv_dispose(sendp[i], fb);
+			firehose_mach_port_send_release(sendp[i]);
+			fb->fb_header.fbh_sendp[i] = MACH_PORT_DEAD;
+		}
+		cursendp = MACH_PORT_DEAD;
 		firehose_mach_port_send_release(mem_port);
 		if (extra_info_port) firehose_mach_port_send_release(extra_info_port);
-		sendp = fb->fb_header.fbh_sendp = MACH_PORT_DEAD;
 	} else {
 		// the client probably has some form of memory corruption
 		// and/or a port leak
@@ -221,7 +236,7 @@
 
 unlock:
 	_dispatch_unfair_lock_unlock(&fb->fb_header.fbh_logd_lock);
-	return sendp;
+	return cursendp;
 }
 
 static void
@@ -266,14 +281,14 @@
 	}
 	total = MAX(total, FIREHOSE_BUFFER_CHUNK_PREALLOCATED_COUNT);
 	if (!(fbb_flags & FIREHOSE_BUFFER_BANK_FLAG_LOW_MEMORY)) {
-		total = MAX(total, TARGET_OS_EMBEDDED ? 8 : 12);
+		total = MAX(total, TARGET_OS_IPHONE ? 8 : 12);
 	}
 
-	new.fbs_max_ref  = total;
-	new.fbs_mem_bank = FIREHOSE_BANK_UNAVAIL_BIT - (total - 1);
-	new.fbs_io_bank  = FIREHOSE_BANK_UNAVAIL_BIT -
-			MAX(3 * total / 8, 2 * io_streams);
-	new.fbs_unused   = 0;
+	new = (firehose_bank_state_u) {
+		.fbs_max_ref = (firehose_chunk_ref_t)(total + 1),
+		.fbs_mem_bank = total - 1,
+		.fbs_io_bank  = MAX(3 * total / 8, 2 * io_streams),
+	};
 
 	old = fbb->fbb_limits;
 	fbb->fbb_limits = new;
@@ -299,7 +314,7 @@
 	vm_addr = vm_page_size;
 	const size_t madvise_bytes = FIREHOSE_BUFFER_MADVISE_CHUNK_COUNT *
 			FIREHOSE_CHUNK_SIZE;
-	if (slowpath(madvise_bytes % PAGE_SIZE)) {
+	if (unlikely(madvise_bytes % PAGE_SIZE)) {
 		DISPATCH_INTERNAL_CRASH(madvise_bytes,
 				"Invalid values for MADVISE_CHUNK_COUNT / CHUNK_SIZE");
 	}
@@ -308,7 +323,7 @@
 			VM_FLAGS_ANYWHERE | VM_FLAGS_PURGABLE |
 			VM_MAKE_TAG(VM_MEMORY_GENEALOGY), MEMORY_OBJECT_NULL, 0, FALSE,
 			VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_NONE);
-	if (slowpath(kr)) {
+	if (unlikely(kr)) {
 		if (kr != KERN_NO_SPACE) dispatch_assume_zero(kr);
 		firehose_mach_port_send_release(logd_port);
 		return NULL;
@@ -331,7 +346,8 @@
 	fbh->fbh_logd_port = logd_port;
 	fbh->fbh_pid = getpid();
 	fbh->fbh_uniquepid = unique_pid;
-	fbh->fbh_recvp = firehose_mach_port_allocate(opts, fb);
+	fbh->fbh_recvp = firehose_mach_port_allocate(opts, MACH_PORT_QLIMIT_BASIC,
+			fb);
 #endif // !KERNEL
 	fbh->fbh_spi_version = OS_FIREHOSE_SPI_VERSION;
 	fbh->fbh_bank.fbb_flags = bank_flags;
@@ -345,13 +361,13 @@
 	}
 	firehose_buffer_update_limits_unlocked(fb);
 #else
-	uint16_t total = FIREHOSE_BUFFER_CHUNK_PREALLOCATED_COUNT + 1;
-	const uint16_t num_kernel_io_pages = 8;
+	uint16_t total = FIREHOSE_BUFFER_CHUNK_PREALLOCATED_COUNT;
+	const uint16_t num_kernel_io_pages = __firehose_num_kernel_io_pages;
 	uint16_t io_pages = num_kernel_io_pages;
 	fbh->fbh_bank.fbb_state = (firehose_bank_state_u){
-		.fbs_max_ref = total,
-		.fbs_io_bank = FIREHOSE_BANK_UNAVAIL_BIT - io_pages,
-		.fbs_mem_bank = FIREHOSE_BANK_UNAVAIL_BIT - (total - io_pages - 1),
+		.fbs_max_ref = (firehose_chunk_ref_t)(total + 1),
+		.fbs_io_bank = io_pages,
+		.fbs_mem_bank = total - io_pages,
 	};
 	fbh->fbh_bank.fbb_limits = fbh->fbh_bank.fbb_state;
 #endif // KERNEL
@@ -376,7 +392,7 @@
 	// install the early boot page as the current one for persist
 	fbh->fbh_stream[firehose_stream_persist].fbs_state.fss_current =
 			FIREHOSE_BUFFER_CHUNK_PREALLOCATED_COUNT;
-	fbh->fbh_bank.fbb_state.fbs_io_bank += 1;
+	fbh->fbh_bank.fbb_state.fbs_io_bank -= 1;
 #endif
 
 	fbh->fbh_ring_tail = (firehose_ring_tail_u){
@@ -386,52 +402,54 @@
 }
 
 #ifndef KERNEL
-static void
-firehose_notify_source_invoke(mach_msg_header_t *hdr)
+static char const * const _firehose_key = "firehose";
+
+static bool
+firehose_drain_notifications_once(firehose_buffer_t fb)
 {
-	const size_t reply_size =
+	mach_msg_options_t opts = MACH_RCV_MSG | MACH_RCV_TIMEOUT |
+			MACH_RCV_TRAILER_ELEMENTS(MACH_RCV_TRAILER_CTX) | MACH_RCV_LARGE |
+			MACH_RCV_TRAILER_TYPE(MACH_MSG_TRAILER_FORMAT_0);
+
+	const size_t maxsize =
+			sizeof(union __RequestUnion__firehose_client_firehoseReply_subsystem);
+	const size_t maxreplysize =
 			sizeof(union __ReplyUnion__firehose_client_firehoseReply_subsystem);
+	mach_msg_size_t rcv_size = maxsize + MAX_TRAILER_SIZE;
+	mig_reply_error_t *msg = alloca(rcv_size);
+	kern_return_t kr;
 
-	firehose_mig_server(firehoseReply_server, reply_size, hdr);
-}
+	kr = mach_msg(&msg->Head, opts, 0, rcv_size, fb->fb_header.fbh_recvp, 0, 0);
 
-static void
-firehose_client_register_for_notifications(firehose_buffer_t fb)
-{
-	static const struct dispatch_continuation_s dc = {
-		.dc_func = (void *)firehose_notify_source_invoke,
-	};
-	firehose_buffer_header_t fbh = &fb->fb_header;
-
-	dispatch_once(&fbh->fbh_notifs_pred, ^{
-		dispatch_source_t ds = _dispatch_source_create_mach_msg_direct_recv(
-				fbh->fbh_recvp, &dc);
-		dispatch_set_context(ds, fb);
-		dispatch_activate(ds);
-		fbh->fbh_notifs_source = ds;
-	});
+	if (kr == KERN_SUCCESS) {
+		dispatch_thread_context_s firehose_ctxt = {
+			.dtc_key = _firehose_key,
+			.dtc_fb = fb,
+		};
+		_dispatch_thread_context_push(&firehose_ctxt);
+		firehose_mig_server(firehoseReply_server, maxreplysize, &msg->Head);
+		_dispatch_thread_context_pop(&firehose_ctxt);
+	} else if (kr != MACH_RCV_TIMED_OUT) {
+		DISPATCH_CLIENT_CRASH(kr, "firehose_drain_notifications_once() failed");
+	}
+	return kr == KERN_SUCCESS;
 }
 
 static void
 firehose_client_send_push_async(firehose_buffer_t fb, qos_class_t qos,
 		bool for_io)
 {
-	bool ask_for_notifs = fb->fb_header.fbh_notifs_source != NULL;
-	mach_port_t sendp = fb->fb_header.fbh_sendp;
+	firehose_buffer_pushport_t pushport = for_io;
+	mach_port_t sendp = fb->fb_header.fbh_sendp[pushport];
 	kern_return_t kr = KERN_FAILURE;
 
-	if (!ask_for_notifs && _dispatch_is_multithreaded_inline()) {
-		firehose_client_register_for_notifications(fb);
-		ask_for_notifs = true;
-	}
-
-	if (slowpath(sendp == MACH_PORT_DEAD)) {
+	if (unlikely(sendp == MACH_PORT_DEAD)) {
 		return;
 	}
 
-	if (fastpath(sendp)) {
-		kr = firehose_send_push_async(sendp, qos, for_io, ask_for_notifs);
-		if (likely(kr == KERN_SUCCESS)) {
+	if (likely(sendp)) {
+		kr = firehose_send_push_async(sendp, qos, 0);
+		if (likely(kr == KERN_SUCCESS || kr == MACH_SEND_TIMED_OUT)) {
 			return;
 		}
 		if (kr != MACH_SEND_INVALID_DEST) {
@@ -440,10 +458,10 @@
 		}
 	}
 
-	sendp = firehose_client_reconnect(fb, sendp);
-	if (fastpath(MACH_PORT_VALID(sendp))) {
-		kr = firehose_send_push_async(sendp, qos, for_io, ask_for_notifs);
-		if (likely(kr == KERN_SUCCESS)) {
+	sendp = firehose_client_reconnect(fb, sendp, pushport);
+	if (likely(MACH_PORT_VALID(sendp))) {
+		kr = firehose_send_push_async(sendp, qos, 0);
+		if (likely(kr == KERN_SUCCESS || kr == MACH_SEND_TIMED_OUT)) {
 			return;
 		}
 		if (kr != MACH_SEND_INVALID_DEST) {
@@ -485,13 +503,7 @@
 #ifndef KERNEL
 		// this isn't a dispatch_once so that the upcall to libtrace
 		// can actually log itself without blocking on the gate.
-		if (async_notif) {
-			if (os_atomic_xchg(&fbh->fbh_quarantined_state,
-					FBH_QUARANTINE_STARTED, relaxed) !=
-					FBH_QUARANTINE_STARTED) {
-				firehose_client_start_quarantine(fb);
-			}
-		} else if (os_atomic_load(&fbh->fbh_quarantined_state, relaxed) ==
+		if (os_atomic_load(&fbh->fbh_quarantined_state, relaxed) ==
 				FBH_QUARANTINE_NONE) {
 			os_atomic_cmpxchg(&fbh->fbh_quarantined_state, FBH_QUARANTINE_NONE,
 					FBH_QUARANTINE_PENDING, relaxed);
@@ -532,7 +544,7 @@
 
 	bank_updates = ((uint64_t)mem_delta << FIREHOSE_BANK_SHIFT(0)) |
 			((uint64_t)io_delta << FIREHOSE_BANK_SHIFT(1));
-	state.fbs_atomic_state = os_atomic_sub2o(fbh,
+	state.fbs_atomic_state = os_atomic_add2o(fbh,
 			fbh_bank.fbb_state.fbs_atomic_state, bank_updates, release);
 	__firehose_critical_region_leave();
 
@@ -549,23 +561,99 @@
 }
 
 #ifndef KERNEL
+void *
+firehose_buffer_get_logging_prefs(firehose_buffer_t fb, size_t *length)
+{
+	mach_port_t sendp = fb->fb_header.fbh_logd_port;
+	mach_port_t mem_port = MACH_PORT_NULL;
+	mach_vm_size_t size = 0;
+	mach_vm_address_t addr = 0;
+	kern_return_t kr;
+
+	if (unlikely(!MACH_PORT_VALID(sendp))) {
+		*length = 0;
+		return NULL;
+	}
+
+	kr = firehose_send_get_logging_prefs(sendp, &mem_port, &size);
+	if (unlikely(kr != KERN_SUCCESS)) {
+		if (kr != MACH_SEND_INVALID_DEST) {
+			DISPATCH_VERIFY_MIG(kr);
+			dispatch_assume_zero(kr);
+		}
+		*length = 0;
+		return NULL;
+	}
+
+	/* Map the memory handle into the server address space */
+	kr = mach_vm_map(mach_task_self(), &addr, size, 0,
+			VM_FLAGS_ANYWHERE, mem_port, 0, FALSE,
+			VM_PROT_READ, VM_PROT_READ, VM_INHERIT_NONE);
+	DISPATCH_VERIFY_MIG(kr);
+	if (dispatch_assume_zero(kr)) {
+		addr = 0;
+		size = 0;
+	}
+	kr = mach_port_deallocate(mach_task_self(), mem_port);
+	DISPATCH_VERIFY_MIG(kr);
+	dispatch_assume_zero(kr);
+
+	*length = (size_t)size;
+	return (void *)addr;
+}
+
+bool
+firehose_buffer_should_send_strings(firehose_buffer_t fb)
+{
+	mach_port_t sendp = fb->fb_header.fbh_sendp[FIREHOSE_BUFFER_PUSHPORT_MEM];
+	kern_return_t kr;
+	boolean_t result = false;
+
+	if (unlikely(sendp == MACH_PORT_DEAD)) {
+		return false;
+	}
+
+	if (likely(sendp)) {
+		kr = firehose_send_should_send_strings(sendp, &result);
+		if (likely(kr == KERN_SUCCESS)) {
+			return result;
+		}
+		if (kr != MACH_SEND_INVALID_DEST) {
+			DISPATCH_VERIFY_MIG(kr);
+			dispatch_assume_zero(kr);
+		}
+	}
+
+	sendp = firehose_client_reconnect(fb, sendp, FIREHOSE_BUFFER_PUSHPORT_MEM);
+	if (likely(MACH_PORT_VALID(sendp))) {
+		kr = firehose_send_should_send_strings(sendp, &result);
+		if (likely(kr == KERN_SUCCESS)) {
+			return result;
+		}
+		if (kr != MACH_SEND_INVALID_DEST) {
+			DISPATCH_VERIFY_MIG(kr);
+			dispatch_assume_zero(kr);
+		}
+	}
+	return false;
+}
+
 OS_NOT_TAIL_CALLED OS_NOINLINE
 static void
 firehose_client_send_push_and_wait(firehose_buffer_t fb, bool for_io,
 		firehose_bank_state_u *state_out)
 {
-	mach_port_t sendp = fb->fb_header.fbh_sendp;
+	firehose_buffer_pushport_t pushport = for_io;
+	mach_port_t sendp = fb->fb_header.fbh_sendp[pushport];
 	firehose_push_reply_t push_reply = { };
-	qos_class_t qos = qos_class_self();
 	boolean_t quarantined = false;
 	kern_return_t kr;
 
-	if (slowpath(sendp == MACH_PORT_DEAD)) {
+	if (unlikely(sendp == MACH_PORT_DEAD)) {
 		return;
 	}
-	if (fastpath(sendp)) {
-		kr = firehose_send_push_and_wait(sendp, qos, for_io,
-				&push_reply, &quarantined);
+	if (likely(sendp)) {
+		kr = firehose_send_push_and_wait(sendp, &push_reply, &quarantined);
 		if (likely(kr == KERN_SUCCESS)) {
 			goto success;
 		}
@@ -575,10 +663,9 @@
 		}
 	}
 
-	sendp = firehose_client_reconnect(fb, sendp);
-	if (fastpath(MACH_PORT_VALID(sendp))) {
-		kr = firehose_send_push_and_wait(sendp, qos, for_io,
-				&push_reply, &quarantined);
+	sendp = firehose_client_reconnect(fb, sendp, pushport);
+	if (likely(MACH_PORT_VALID(sendp))) {
+		kr = firehose_send_push_and_wait(sendp, &push_reply, &quarantined);
 		if (likely(kr == KERN_SUCCESS)) {
 			goto success;
 		}
@@ -639,9 +726,9 @@
 firehose_client_push_notify_async(mach_port_t server_port OS_UNUSED,
 	firehose_push_reply_t push_reply, boolean_t quarantined)
 {
-	// see _dispatch_source_merge_mach_msg_direct
-	dispatch_queue_t dq = _dispatch_queue_get_current();
-	firehose_buffer_t fb = dispatch_get_context(dq);
+	dispatch_thread_context_t ctxt =
+			_dispatch_thread_context_find(_firehose_key);
+	firehose_buffer_t fb = ctxt->dtc_fb;
 	firehose_client_merge_updates(fb, true, push_reply, quarantined, NULL);
 	return KERN_SUCCESS;
 }
@@ -662,46 +749,93 @@
 #endif // !KERNEL
 
 OS_ALWAYS_INLINE
+static inline uint64_t
+firehose_buffer_chunk_apply_stamp_slop(uint64_t stamp)
+{
+	// <rdar://problem/23562733> boot starts mach absolute time at
+	// 0, and wrapping around to values above UINT64_MAX -
+	// FIREHOSE_STAMP_SLOP breaks firehose_buffer_stream_flush()
+	// assumptions
+	return stamp > FIREHOSE_STAMP_SLOP ? stamp - FIREHOSE_STAMP_SLOP : 0;
+}
+
+OS_ALWAYS_INLINE
+static inline bool
+firehose_buffer_chunk_stamp_delta_fits(firehose_chunk_t fc, uint64_t stamp)
+{
+	return !((stamp - fc->fc_timestamp) >> 48);
+}
+
+OS_ALWAYS_INLINE
 static inline firehose_tracepoint_t
 firehose_buffer_chunk_init(firehose_chunk_t fc,
-		firehose_tracepoint_query_t ask, uint8_t **privptr)
+		firehose_tracepoint_query_t ask, uint8_t **privptr, uint64_t thread,
+		firehose_tracepoint_t *lft, uint64_t loss_start)
 {
+	firehose_tracepoint_t ft;
+	uint64_t stamp_and_len;
+
 	const uint16_t ft_size = offsetof(struct firehose_tracepoint_s, ft_data);
 
 	uint16_t pub_offs = offsetof(struct firehose_chunk_s, fc_data);
 	uint16_t priv_offs = FIREHOSE_CHUNK_SIZE;
 
+	if (unlikely(lft)) {
+		const uint16_t flp_size = sizeof(struct firehose_loss_payload_s);
+		uint64_t stamp, minstamp;
+		uint16_t flp_pub_offs;
+
+		// first, try to make both timestamps fit
+		minstamp = MIN(ask->stamp, loss_start);
+		fc->fc_timestamp =
+				firehose_buffer_chunk_apply_stamp_slop(minstamp);
+
+		// if they can't both fit, use the timestamp of the actual tracepoint:
+		//  a) this should _really_ never happen
+		//  b) if it does, a determined reader can tell that it did by comparing
+		//     the loss event start_stamp payload field with the main stamp
+		if (!firehose_buffer_chunk_stamp_delta_fits(fc, ask->stamp)) {
+			// if ask->stamp didn't fit on the first try it must be greater than
+			// loss_start by > 2^48, so it must also be greater than
+			// FIREHOSE_STAMP_SLOP - so no need to worry about underflow here
+			fc->fc_timestamp = ask->stamp - FIREHOSE_STAMP_SLOP;
+		}
+
+		*lft = (firehose_tracepoint_t)fc->fc_data;
+
+		stamp = firehose_buffer_chunk_stamp_delta_fits(fc, loss_start) ?
+				loss_start : ask->stamp;
+
+		stamp_and_len = stamp - fc->fc_timestamp;
+		stamp_and_len |= (uint64_t)flp_size << 48;
+		os_atomic_store2o(*lft, ft_stamp_and_length, stamp_and_len, relaxed);
+
+		(*lft)->ft_thread = thread; // not really meaningful
+
+		flp_pub_offs = roundup(ft_size + flp_size, 8);
+		pub_offs += flp_pub_offs;
+		ft = (firehose_tracepoint_t)(fc->fc_data + flp_pub_offs);
+	} else {
+		fc->fc_timestamp =
+				firehose_buffer_chunk_apply_stamp_slop(ask->stamp);
+		ft = (firehose_tracepoint_t)fc->fc_data;
+	}
+
 	pub_offs += roundup(ft_size + ask->pubsize, 8);
 	priv_offs -= ask->privsize;
 
-	if (fc->fc_pos.fcp_atomic_pos) {
-		// Needed for process death handling (recycle-reuse):
-		// No atomic fences required, we merely want to make sure the observers
-		// will see memory effects in program (asm) order.
-		// 1. the payload part of the chunk is cleared completely
-		// 2. the chunk is marked as reused
-		// This ensures that if we don't see a reference to a chunk in the ring
-		// and it is dirty, when crawling the chunk, we don't see remnants of
-		// other tracepoints
-		//
-		// We only do that when the fc_pos is non zero, because zero means
-		// we just faulted the chunk, and the kernel already bzero-ed it.
-		bzero(fc->fc_data, sizeof(fc->fc_data));
-	}
-	dispatch_compiler_barrier();
-	// <rdar://problem/23562733> boot starts mach absolute time at 0, and
-	// wrapping around to values above UINT64_MAX - FIREHOSE_STAMP_SLOP
-	// breaks firehose_buffer_stream_flush() assumptions
-	if (ask->stamp > FIREHOSE_STAMP_SLOP) {
-		fc->fc_timestamp = ask->stamp - FIREHOSE_STAMP_SLOP;
-	} else {
-		fc->fc_timestamp = 0;
-	}
+	// Needed for process death handling (tracepoint-begin):
+	// write the length before making the chunk visible
+	stamp_and_len = ask->stamp - fc->fc_timestamp;
+	stamp_and_len |= (uint64_t)ask->pubsize << 48;
+	os_atomic_store2o(ft, ft_stamp_and_length, stamp_and_len, relaxed);
+
+	ft->ft_thread = thread;
+
 	fc->fc_pos = (firehose_chunk_pos_u){
 		.fcp_next_entry_offs = pub_offs,
 		.fcp_private_offs = priv_offs,
 		.fcp_refcnt = 1,
-		.fcp_qos = firehose_buffer_qos_bits_propagate(),
 		.fcp_stream = ask->stream,
 		.fcp_flag_io = ask->for_io,
 		.fcp_quarantined = ask->quarantined,
@@ -710,70 +844,158 @@
 	if (privptr) {
 		*privptr = fc->fc_start + priv_offs;
 	}
-	return (firehose_tracepoint_t)fc->fc_data;
+	return ft;
 }
 
 OS_NOINLINE
 static firehose_tracepoint_t
 firehose_buffer_stream_chunk_install(firehose_buffer_t fb,
-		firehose_tracepoint_query_t ask, uint8_t **privptr, uint16_t ref)
+		firehose_tracepoint_query_t ask, uint8_t **privptr,
+		firehose_chunk_ref_t ref)
 {
 	firehose_stream_state_u state, new_state;
-	firehose_tracepoint_t ft;
+	firehose_tracepoint_t ft = NULL, lft;
 	firehose_buffer_header_t fbh = &fb->fb_header;
 	firehose_buffer_stream_t fbs = &fbh->fbh_stream[ask->stream];
-	uint64_t stamp_and_len;
 
-	if (fastpath(ref)) {
+	if (likely(ref)) {
+		uint64_t thread;
+		bool installed = false;
 		firehose_chunk_t fc = firehose_buffer_ref_to_chunk(fb, ref);
-		ft = firehose_buffer_chunk_init(fc, ask, privptr);
-		// Needed for process death handling (tracepoint-begin):
-		// write the length before making the chunk visible
-		stamp_and_len  = ask->stamp - fc->fc_timestamp;
-		stamp_and_len |= (uint64_t)ask->pubsize << 48;
-		os_atomic_store2o(ft, ft_stamp_and_length, stamp_and_len, relaxed);
-#ifdef KERNEL
-		ft->ft_thread = thread_tid(current_thread());
-#else
-		ft->ft_thread = _pthread_threadid_self_np_direct();
-#endif
-		if (ask->stream == firehose_stream_metadata) {
-			os_atomic_or2o(fbh, fbh_bank.fbb_metadata_bitmap,
-					1ULL << ref, relaxed);
+
+		if (fc->fc_pos.fcp_atomic_pos) {
+			// Needed for process death handling (recycle-reuse):
+			// No atomic fences required, we merely want to make sure the
+			// observers will see memory effects in program (asm) order.
+			// 1. the payload part of the chunk is cleared completely
+			// 2. the chunk is marked as reused
+			// This ensures that if we don't see a reference to a chunk in the
+			// ring and it is dirty, when crawling the chunk, we don't see
+			// remnants of other tracepoints.
+			//
+			// We only do that when the fc_pos is non zero, because zero means
+			// we just faulted the chunk, and the kernel already bzero-ed it.
+			bzero(fc->fc_data, sizeof(fc->fc_data));
 		}
-		// release barrier to make the chunk init visible
-		os_atomic_rmw_loop2o(fbs, fbs_state.fss_atomic_state,
-				state.fss_atomic_state, new_state.fss_atomic_state, release, {
-			// We use a generation counter to prevent a theoretical ABA problem:
-			// a thread could try to acquire a tracepoint in a chunk, fail to
-			// do so mark it as to be pushed, enqueue it, and then be preempted
-			//
-			// It sleeps for a long time, and then tries to acquire the
-			// allocator bit and uninstalling the chunk. Succeeds in doing so,
-			// but because the chunk actually happened to have cycled all the
-			// way back to being installed. That thread would effectively hide
-			// that unflushed chunk and leak it.
-			//
-			// Having a generation counter prevents the uninstallation of the
-			// chunk to spuriously succeed when it was a re-incarnation of it.
-			new_state = (firehose_stream_state_u){
-				.fss_current = ref,
-				.fss_generation = state.fss_generation + 1,
+		dispatch_compiler_barrier();
+
+		if (ask->stream == firehose_stream_metadata) {
+			os_atomic_or2o(fbh, fbh_bank.fbb_metadata_bitmap, 1ULL << ref,
+					relaxed);
+		}
+
+#if KERNEL
+		thread = thread_tid(current_thread());
+#else
+		thread = _pthread_threadid_self_np_direct();
+#endif
+
+		// If no tracepoints were lost at the tail end of this generation, the
+		// chunk timestamp is the stamp of the first tracepoint and the first
+		// tracepoint belongs at the beginning of the chunk.  If, however, we
+		// need to record a loss event, the timestamp has to be the minimum of
+		// the loss stamp and the stamp of the first tracepoint, and the loss
+		// event needs to be placed at the beginning of the chunk in addition to
+		// the first actual tracepoint.
+		state.fss_atomic_state =
+				os_atomic_load2o(fbs, fbs_state.fss_atomic_state, relaxed);
+
+		if (likely(!state.fss_loss)) {
+			ft = firehose_buffer_chunk_init(fc, ask, privptr, thread, NULL, 0);
+
+			// release to publish the chunk init
+			installed = os_atomic_rmw_loop2o(fbs, fbs_state.fss_atomic_state,
+					state.fss_atomic_state, new_state.fss_atomic_state, release, {
+				if (state.fss_loss) {
+					os_atomic_rmw_loop_give_up(break);
+				}
+				// clear the gate, waiter bits and loss count
+				new_state = (firehose_stream_state_u){
+					.fss_current = ref,
+					.fss_generation = state.fss_generation + 1,
+				};
+			});
+		}
+
+		if (unlikely(!installed)) {
+			uint64_t loss_start, loss_end;
+
+			// ensure we can see the start stamp
+			(void)os_atomic_load2o(fbs, fbs_state.fss_atomic_state, acquire);
+			loss_start = fbs->fbs_loss_start;
+			fbs->fbs_loss_start = 0; // reset under fss_gate
+			loss_end = mach_continuous_time();
+
+			ft = firehose_buffer_chunk_init(fc, ask, privptr, thread, &lft,
+					loss_start);
+			os_atomic_rmw_loop2o(fbs, fbs_state.fss_atomic_state,
+					state.fss_atomic_state, new_state.fss_atomic_state, release, {
+				// no giving up this time!
+				new_state = (firehose_stream_state_u){
+					.fss_current = ref,
+					.fss_generation = state.fss_generation + 1,
+				};
+			});
+
+			struct firehose_loss_payload_s flp = {
+				.start_stamp = loss_start,
+				.end_stamp = loss_end,
+				.count = state.fss_loss,
 			};
-		});
+			memcpy(lft->ft_data, &flp, sizeof(flp));
+
+			firehose_tracepoint_id_u ftid = { .ftid = {
+				._namespace = firehose_tracepoint_namespace_loss,
+				// no meaningful value for _type
+				// nor for _flags
+				._code = ask->stream,
+			} };
+
+			// publish the contents of the loss tracepoint
+			os_atomic_store2o(lft, ft_id.ftid_atomic_value, ftid.ftid_value,
+					release);
+		}
 	} else {
-		// the allocator gave up just clear the allocator + waiter bits
-		firehose_stream_state_u mask = { .fss_allocator = ~0u, };
-		state.fss_atomic_state = os_atomic_and_orig2o(fbs,
-				fbs_state.fss_atomic_state, ~mask.fss_atomic_state, relaxed);
-		ft = NULL;
+		// the allocator gave up - just clear the allocator and waiter bits and
+		// increment the loss count
+		state.fss_atomic_state =
+				os_atomic_load2o(fbs, fbs_state.fss_atomic_state, relaxed);
+		if (!state.fss_timestamped) {
+			fbs->fbs_loss_start = mach_continuous_time();
+
+			// release to publish the timestamp
+			os_atomic_rmw_loop2o(fbs, fbs_state.fss_atomic_state,
+					state.fss_atomic_state, new_state.fss_atomic_state,
+					release, {
+				new_state = (firehose_stream_state_u){
+					.fss_loss =
+							MIN(state.fss_loss + 1, FIREHOSE_LOSS_COUNT_MAX),
+					.fss_timestamped = true,
+					.fss_generation = state.fss_generation,
+				};
+			});
+		} else {
+			os_atomic_rmw_loop2o(fbs, fbs_state.fss_atomic_state,
+					state.fss_atomic_state, new_state.fss_atomic_state,
+					relaxed, {
+				new_state = (firehose_stream_state_u){
+					.fss_loss =
+							MIN(state.fss_loss + 1, FIREHOSE_LOSS_COUNT_MAX),
+					.fss_timestamped = true,
+					.fss_generation = state.fss_generation,
+				};
+			});
+		}
 	}
 
 	// pairs with the one in firehose_buffer_tracepoint_reserve()
 	__firehose_critical_region_leave();
 
 #ifndef KERNEL
-	if (unlikely(_dispatch_lock_is_locked_by_self(state.fss_gate.dgl_lock))) {
+	_dispatch_trace_firehose_chunk_install(((uint64_t *)ask)[0],
+			((uint64_t *)ask)[1], state.fss_atomic_state,
+			new_state.fss_atomic_state);
+	if (unlikely(state.fss_allocator & FIREHOSE_GATE_WAITERS_MASK)) {
 		_dispatch_gate_broadcast_slow(&fbs->fbs_state.fss_gate,
 				state.fss_gate.dgl_lock);
 	}
@@ -789,17 +1011,17 @@
 			firehose_client_start_quarantine(fb);
 		}
 	}
-#endif // KERNEL
+#endif // !KERNEL
 
 	return ft;
 }
 
 #ifndef KERNEL
 OS_ALWAYS_INLINE
-static inline uint16_t
+static inline firehose_chunk_ref_t
 firehose_buffer_ring_try_grow(firehose_buffer_bank_t fbb, uint16_t limit)
 {
-	uint16_t ref = 0;
+	firehose_chunk_ref_t ref = 0;
 	uint64_t bitmap;
 
 	_dispatch_unfair_lock_lock(&fbb->fbb_lock);
@@ -813,8 +1035,8 @@
 }
 
 OS_ALWAYS_INLINE
-static inline uint16_t
-firehose_buffer_ring_shrink(firehose_buffer_t fb, uint16_t ref)
+static inline firehose_chunk_ref_t
+firehose_buffer_ring_shrink(firehose_buffer_t fb, firehose_chunk_ref_t ref)
 {
 	const size_t madv_size =
 			FIREHOSE_CHUNK_SIZE * FIREHOSE_BUFFER_MADVISE_CHUNK_COUNT;
@@ -830,7 +1052,7 @@
 	}
 
 	bitmap = (fb->fb_header.fbh_bank.fbb_bitmap &= ~(1UL << ref));
-	ref &= ~madv_mask;
+	ref &= ~(FIREHOSE_BUFFER_MADVISE_CHUNK_COUNT - 1);
 	if ((bitmap & (madv_mask << ref)) == 0) {
 		// if MADVISE_WIDTH consecutive chunks are free, madvise them free
 		madvise(firehose_buffer_ref_to_chunk(fb, ref), madv_size, MADV_FREE);
@@ -844,7 +1066,7 @@
 
 OS_NOINLINE
 void
-firehose_buffer_ring_enqueue(firehose_buffer_t fb, uint16_t ref)
+firehose_buffer_ring_enqueue(firehose_buffer_t fb, firehose_chunk_ref_t ref)
 {
 	firehose_chunk_t fc = firehose_buffer_ref_to_chunk(fb, ref);
 	uint16_t volatile *fbh_ring;
@@ -916,9 +1138,9 @@
 		// a thread being preempted here for GEN_MASK worth of ring rotations,
 		// it could lead to the cmpxchg succeed, and have a bogus enqueue
 		// (confused enqueuer)
-		if (fastpath(os_atomic_cmpxchgv(&fbh_ring[idx], gen, gen | ref, &dummy,
+		if (likely(os_atomic_cmpxchgv(&fbh_ring[idx], gen, gen | ref, &dummy,
 				relaxed))) {
-			if (fastpath(os_atomic_cmpxchgv(fbh_ring_head, head, head + 1,
+			if (likely(os_atomic_cmpxchgv(fbh_ring_head, head, head + 1,
 					&head, release))) {
 				__firehose_critical_region_leave();
 				break;
@@ -949,27 +1171,31 @@
 void
 firehose_buffer_force_connect(firehose_buffer_t fb)
 {
-	mach_port_t sendp = fb->fb_header.fbh_sendp;
-	if (sendp == MACH_PORT_NULL) firehose_client_reconnect(fb, MACH_PORT_NULL);
+	mach_port_t sendp = fb->fb_header.fbh_sendp[FIREHOSE_BUFFER_PUSHPORT_MEM];
+	if (sendp == MACH_PORT_NULL) {
+		firehose_client_reconnect(fb, MACH_PORT_NULL,
+				FIREHOSE_BUFFER_PUSHPORT_MEM);
+	}
 }
 #endif
 
 OS_ALWAYS_INLINE
-static inline uint16_t
+static inline firehose_chunk_ref_t
 firehose_buffer_ring_try_recycle(firehose_buffer_t fb)
 {
 	firehose_ring_tail_u pos, old;
 	uint16_t volatile *fbh_ring;
-	uint16_t gen, ref, entry, tail;
+	uint16_t gen, entry, tail;
+	firehose_chunk_ref_t ref;
 	firehose_chunk_t fc;
 	bool for_io;
 
 	os_atomic_rmw_loop2o(&fb->fb_header, fbh_ring_tail.frp_atomic_tail,
 			old.frp_atomic_tail, pos.frp_atomic_tail, relaxed, {
 		pos = old;
-		if (fastpath(old.frp_mem_tail != old.frp_mem_flushed)) {
+		if (likely(old.frp_mem_tail != old.frp_mem_flushed)) {
 			pos.frp_mem_tail++;
-		} else if (fastpath(old.frp_io_tail != old.frp_io_flushed)) {
+		} else if (likely(old.frp_io_tail != old.frp_io_flushed)) {
 			pos.frp_io_tail++;
 		} else {
 			os_atomic_rmw_loop_give_up(return 0);
@@ -1016,46 +1242,42 @@
 OS_NOINLINE
 static firehose_tracepoint_t
 firehose_buffer_tracepoint_reserve_wait_for_chunks_from_logd(firehose_buffer_t fb,
-		firehose_tracepoint_query_t ask, uint8_t **privptr, uint16_t ref)
+		firehose_tracepoint_query_t ask, uint8_t **privptr)
 {
-	const uint64_t bank_unavail_mask = FIREHOSE_BANK_UNAVAIL_MASK(ask->for_io);
-	const uint64_t bank_inc = FIREHOSE_BANK_INC(ask->for_io);
+	bool for_io = ask->for_io;
+	firehose_buffer_pushport_t pushport = for_io;
 	firehose_buffer_bank_t const fbb = &fb->fb_header.fbh_bank;
 	firehose_bank_state_u state;
-	uint16_t fbs_max_ref;
+	firehose_chunk_ref_t ref, fbs_max_ref;
+
+	for (int i = MACH_PORT_QLIMIT_BASIC;
+			i-- && firehose_drain_notifications_once(fb); );
 
 	// first wait for our bank to have space, if needed
-	if (!fastpath(ask->is_bank_ok)) {
+	if (unlikely(!ask->is_bank_ok)) {
 		state.fbs_atomic_state =
 				os_atomic_load2o(fbb, fbb_state.fbs_atomic_state, relaxed);
-		while ((state.fbs_atomic_state - bank_inc) & bank_unavail_mask) {
+		while (!firehose_buffer_bank_try_reserve_slot(fb, for_io, &state)) {
 			if (ask->quarantined) {
-				__FIREHOSE_CLIENT_THROTTLED_DUE_TO_HEAVY_LOGGING__(fb,
-						ask->for_io, &state);
+				__FIREHOSE_CLIENT_THROTTLED_DUE_TO_HEAVY_LOGGING__(fb, for_io,
+						&state);
 			} else {
-				firehose_client_send_push_and_wait(fb, ask->for_io, &state);
+				firehose_client_send_push_and_wait(fb, for_io, &state);
 			}
-			if (slowpath(fb->fb_header.fbh_sendp == MACH_PORT_DEAD)) {
+			if (unlikely(fb->fb_header.fbh_sendp[pushport] == MACH_PORT_DEAD)) {
 				// logd was unloaded, give up
 				return NULL;
 			}
 		}
-		ask->is_bank_ok = true;
 		fbs_max_ref = state.fbs_max_ref;
 	} else {
 		fbs_max_ref = fbb->fbb_state.fbs_max_ref;
 	}
 
-	// second, if we were passed a chunk, we may need to shrink
-	if (slowpath(ref)) {
-		goto try_shrink;
-	}
-
 	// third, wait for a chunk to come up, and if not, wait on the daemon
 	for (;;) {
-		if (fastpath(ref = firehose_buffer_ring_try_recycle(fb))) {
-		try_shrink:
-			if (slowpath(ref >= fbs_max_ref)) {
+		if (likely(ref = firehose_buffer_ring_try_recycle(fb))) {
+			if (unlikely(ref >= fbs_max_ref)) {
 				ref = firehose_buffer_ring_shrink(fb, ref);
 				if (!ref) {
 					continue;
@@ -1063,16 +1285,16 @@
 			}
 			break;
 		}
-		if (fastpath(ref = firehose_buffer_ring_try_grow(fbb, fbs_max_ref))) {
+		if (likely(ref = firehose_buffer_ring_try_grow(fbb, fbs_max_ref))) {
 			break;
 		}
 		if (ask->quarantined) {
-			__FIREHOSE_CLIENT_THROTTLED_DUE_TO_HEAVY_LOGGING__(fb,
-					ask->for_io, &state);
+			__FIREHOSE_CLIENT_THROTTLED_DUE_TO_HEAVY_LOGGING__(fb, for_io,
+					NULL);
 		} else {
-			firehose_client_send_push_and_wait(fb, ask->for_io, NULL);
+			firehose_client_send_push_and_wait(fb, for_io, NULL);
 		}
-		if (slowpath(fb->fb_header.fbh_sendp == MACH_PORT_DEAD)) {
+		if (unlikely(fb->fb_header.fbh_sendp[pushport] == MACH_PORT_DEAD)) {
 			// logd was unloaded, give up
 			break;
 		}
@@ -1088,7 +1310,7 @@
 }
 OS_NOINLINE
 static void
-_dispatch_gate_wait(dispatch_gate_t l, uint32_t flags)
+_dispatch_firehose_gate_wait(dispatch_gate_t l, uint32_t flags)
 {
 	(void)flags;
 	_dispatch_wait_until(_dispatch_gate_lock_load_seq_cst(l) == 0);
@@ -1102,42 +1324,60 @@
 	const unsigned for_io = ask->for_io;
 	const firehose_buffer_bank_t fbb = &fb->fb_header.fbh_bank;
 	firehose_bank_state_u state;
-	uint16_t ref = 0;
+	bool reserved;
+	firehose_chunk_ref_t ref = 0;
 
-	uint64_t unavail_mask = FIREHOSE_BANK_UNAVAIL_MASK(for_io);
 #ifndef KERNEL
-	state.fbs_atomic_state = os_atomic_add_orig2o(fbb,
-			fbb_state.fbs_atomic_state, FIREHOSE_BANK_INC(for_io), acquire);
-	if (fastpath(!(state.fbs_atomic_state & unavail_mask))) {
-		ask->is_bank_ok = true;
-		if (fastpath(ref = firehose_buffer_ring_try_recycle(fb))) {
-			if (fastpath(ref < state.fbs_max_ref)) {
-				return firehose_buffer_stream_chunk_install(fb, ask,
-						privptr, ref);
+	// before we try to allocate anything look at whether there are things logd
+	// already sent back to us
+	firehose_drain_notifications_once(fb);
+#endif // KERNEL
+
+	state.fbs_atomic_state =
+			os_atomic_load2o(fbb, fbb_state.fbs_atomic_state, relaxed);
+	reserved = firehose_buffer_bank_try_reserve_slot(fb, for_io, &state);
+
+#ifndef KERNEL
+	if (likely(reserved)) {
+		while (!ref) {
+			ref = firehose_buffer_ring_try_recycle(fb);
+			if (unlikely(!ref)) {
+				break;
+			}
+
+			if (unlikely(ref >= state.fbs_max_ref)) {
+				ref = firehose_buffer_ring_shrink(fb, ref);
 			}
 		}
-	}
-	return firehose_buffer_tracepoint_reserve_wait_for_chunks_from_logd(fb, ask,
-			privptr, ref);
-#else
-	firehose_bank_state_u value;
-	ask->is_bank_ok = os_atomic_rmw_loop2o(fbb, fbb_state.fbs_atomic_state,
-			state.fbs_atomic_state, value.fbs_atomic_state, acquire, {
-		value = state;
-		if (slowpath((value.fbs_atomic_state & unavail_mask) != 0)) {
-			os_atomic_rmw_loop_give_up(break);
+
+		if (unlikely(!ref)) {
+			ref = firehose_buffer_ring_try_grow(fbb, state.fbs_max_ref);
 		}
-		value.fbs_atomic_state += FIREHOSE_BANK_INC(for_io);
-	});
-	if (ask->is_bank_ok) {
+	}
+
+	if (likely(ref || !ask->reliable)) {
+		if (!ref && reserved) {
+			firehose_buffer_bank_relinquish_slot(fb, for_io);
+		}
+
+		// installing `0` unlocks the allocator
+		return firehose_buffer_stream_chunk_install(fb, ask, privptr, ref);
+	} else {
+		firehose_buffer_stream_signal_waiting_for_logd(fb, ask->stream);
+
+		ask->is_bank_ok = reserved;
+		return firehose_buffer_tracepoint_reserve_wait_for_chunks_from_logd(fb,
+				ask, privptr);
+	}
+#else
+	if (likely(reserved)) {
 		ref = firehose_buffer_ring_try_recycle(fb);
-		if (slowpath(ref == 0)) {
-			// the kernel has no overlap between I/O and memory chunks,
-			// having an available bank slot means we should be able to recycle
+		if (unlikely(ref == 0)) {
+			// the kernel has no overlap between I/O and memory chunks, so
+			// having an available bank slot means we must be able to recycle
 			DISPATCH_INTERNAL_CRASH(0, "Unable to recycle a chunk");
 		}
 	}
-	// rdar://25137005 installing `0` unlocks the allocator
 	return firehose_buffer_stream_chunk_install(fb, ask, privptr, ref);
 #endif // KERNEL
 }
@@ -1148,11 +1388,11 @@
 		uint16_t pubsize, uint16_t privsize, uint8_t **privptr)
 {
 	firehose_buffer_t fb = kernel_firehose_buffer;
-	if (!fastpath(fb)) {
+	if (unlikely(!fb)) {
 		return NULL;
 	}
 	return firehose_buffer_tracepoint_reserve(fb, stamp, stream, pubsize,
-			privsize, privptr);
+			privsize, privptr, false);
 }
 
 firehose_buffer_t
@@ -1179,10 +1419,19 @@
 __firehose_merge_updates(firehose_push_reply_t update)
 {
 	firehose_buffer_t fb = kernel_firehose_buffer;
-	if (fastpath(fb)) {
+	if (likely(fb)) {
 		firehose_client_merge_updates(fb, true, update, false, NULL);
 	}
 }
+
+int
+__firehose_kernel_configuration_valid(uint8_t chunk_count, uint8_t io_pages)
+{
+	return (((chunk_count % 4) == 0) &&
+			(chunk_count >= FIREHOSE_BUFFER_KERNEL_MIN_CHUNK_COUNT) &&
+			(chunk_count <= FIREHOSE_BUFFER_KERNEL_MAX_CHUNK_COUNT) &&
+			(io_pages <= (chunk_count * 3 / 4)));
+}
 #endif // KERNEL
 
 #endif // OS_FIREHOSE_SPI
diff --git a/src/firehose/firehose_buffer_internal.h b/src/firehose/firehose_buffer_internal.h
index e41d9cb..0b91c49 100644
--- a/src/firehose/firehose_buffer_internal.h
+++ b/src/firehose/firehose_buffer_internal.h
@@ -31,28 +31,42 @@
 
 // firehose buffer is CHUNK_COUNT * CHUNK_SIZE big == 256k
 #define FIREHOSE_BUFFER_CHUNK_COUNT					64ul
-#ifdef KERNEL
-#define FIREHOSE_BUFFER_CHUNK_PREALLOCATED_COUNT	15
-#else
+#ifndef KERNEL
 #define FIREHOSE_BUFFER_CHUNK_PREALLOCATED_COUNT	4
 #define FIREHOSE_BUFFER_MADVISE_CHUNK_COUNT			4
 #endif
 
+#define FIREHOSE_RING_POS_GEN_INC		((uint16_t)(FIREHOSE_BUFFER_CHUNK_COUNT))
+#define FIREHOSE_RING_POS_IDX_MASK		((uint16_t)(FIREHOSE_RING_POS_GEN_INC - 1))
+#define FIREHOSE_RING_POS_GEN_MASK		((uint16_t)~FIREHOSE_RING_POS_IDX_MASK)
+
+#if __has_feature(c_static_assert)
+_Static_assert(FIREHOSE_RING_POS_IDX_MASK < 0xff,
+		"firehose chunk ref fits in its type with space for PRISTINE");
+#endif
+
+typedef uint8_t firehose_chunk_ref_t;
+
 static const unsigned long firehose_stream_uses_io_bank =
 	(1UL << firehose_stream_persist) |
-	(1UL << firehose_stream_special);
+	(1UL << firehose_stream_special) |
+	(1UL << firehose_stream_signpost);
 
 typedef union {
 #define FIREHOSE_BANK_SHIFT(bank)			(16 * (bank))
 #define FIREHOSE_BANK_INC(bank)				(1ULL << FIREHOSE_BANK_SHIFT(bank))
-#define FIREHOSE_BANK_UNAVAIL_BIT			((uint16_t)0x8000)
-#define FIREHOSE_BANK_UNAVAIL_MASK(bank)	(FIREHOSE_BANK_INC(bank) << 15)
 	uint64_t fbs_atomic_state;
 	struct {
-		uint16_t fbs_mem_bank;
-		uint16_t fbs_io_bank;
-		uint16_t fbs_max_ref;
-		uint16_t fbs_unused;
+		union {
+			struct {
+				uint16_t fbs_mem_bank;
+				uint16_t fbs_io_bank;
+			};
+			uint16_t fbs_banks[2];
+		};
+		firehose_chunk_ref_t fbs_max_ref;
+		uint8_t fbs_unused1;
+		uint16_t fbs_unused2;
 	};
 } firehose_bank_state_u;
 
@@ -89,15 +103,34 @@
 	uint64_t fss_atomic_state;
 	dispatch_gate_s fss_gate;
 	struct {
+#define FIREHOSE_GATE_RELIABLE_WAITERS_BIT 		0x00000001UL
+#define FIREHOSE_GATE_UNRELIABLE_WAITERS_BIT 	0x00000002UL
+#define FIREHOSE_GATE_WAITERS_MASK 				0x00000003UL
 		uint32_t fss_allocator;
-#define FIREHOSE_STREAM_STATE_PRISTINE		0xffff
-		uint16_t fss_current;
+#define FIREHOSE_STREAM_STATE_PRISTINE		0xff
+		firehose_chunk_ref_t fss_current;
+		uint8_t fss_loss : FIREHOSE_LOSS_COUNT_WIDTH;
+		uint8_t fss_timestamped : 1;
+		uint8_t fss_waiting_for_logd : 1;
+
+		/*
+		 * We use a generation counter to prevent a theoretical ABA problem: a
+		 * thread could try to acquire a tracepoint in a chunk, fail to do so,
+		 * mark it as to be pushed, enqueue it, and then be preempted.  It
+		 * sleeps for a long time, and then tries to acquire the allocator bit
+		 * and uninstall the chunk. Succeeds in doing so, but because the chunk
+		 * actually happened to have cycled all the way back to being installed.
+		 * That thread would effectively hide that unflushed chunk and leak it.
+		 * Having a generation counter prevents the uninstallation of the chunk
+		 * to spuriously succeed when it was a re-incarnation of it.
+		 */
 		uint16_t fss_generation;
 	};
 } firehose_stream_state_u;
 
 typedef struct firehose_buffer_stream_s {
 	firehose_stream_state_u fbs_state;
+	uint64_t fbs_loss_start; // protected by fss_gate
 } OS_ALIGNED(128) *firehose_buffer_stream_t;
 
 typedef union {
@@ -110,9 +143,11 @@
 	};
 } firehose_ring_tail_u;
 
-#define FIREHOSE_RING_POS_GEN_INC		((uint16_t)(FIREHOSE_BUFFER_CHUNK_COUNT))
-#define FIREHOSE_RING_POS_IDX_MASK		((uint16_t)(FIREHOSE_RING_POS_GEN_INC - 1))
-#define FIREHOSE_RING_POS_GEN_MASK		((uint16_t)~FIREHOSE_RING_POS_IDX_MASK)
+OS_ENUM(firehose_buffer_pushport, uint8_t,
+	FIREHOSE_BUFFER_PUSHPORT_MEM,
+	FIREHOSE_BUFFER_PUSHPORT_IO,
+	FIREHOSE_BUFFER_NPUSHPORTS,
+);
 
 /*
  * Rings are circular buffers with CHUNK_COUNT entries, with 3 important markers
@@ -163,13 +198,11 @@
 	uint64_t						fbh_uniquepid;
 	pid_t							fbh_pid;
 	mach_port_t						fbh_logd_port;
-	mach_port_t volatile			fbh_sendp;
+	mach_port_t volatile			fbh_sendp[FIREHOSE_BUFFER_NPUSHPORTS];
 	mach_port_t						fbh_recvp;
 
 	// past that point fields may be aligned differently between 32 and 64bits
 #ifndef KERNEL
-	dispatch_once_t					fbh_notifs_pred OS_ALIGNED(64);
-	dispatch_source_t				fbh_notifs_source;
 	dispatch_unfair_lock_s			fbh_logd_lock;
 #define FBH_QUARANTINE_NONE		0
 #define FBH_QUARANTINE_PENDING	1
@@ -187,13 +220,14 @@
 
 // used to let the compiler pack these values in 1 or 2 registers
 typedef struct firehose_tracepoint_query_s {
+	uint64_t stamp;
 	uint16_t pubsize;
 	uint16_t privsize;
 	firehose_stream_t stream;
 	bool	 is_bank_ok;
-	bool     for_io;
-	bool     quarantined;
-	uint64_t stamp;
+	bool	 for_io : 1;
+	bool	 quarantined : 1;
+	bool	 reliable : 1;
 } *firehose_tracepoint_query_t;
 
 #ifndef FIREHOSE_SERVER
@@ -206,11 +240,17 @@
 firehose_buffer_tracepoint_reserve_slow(firehose_buffer_t fb,
 		firehose_tracepoint_query_t ask, uint8_t **privptr);
 
+void *
+firehose_buffer_get_logging_prefs(firehose_buffer_t fb, size_t *size);
+
+bool
+firehose_buffer_should_send_strings(firehose_buffer_t fb);
+
 void
 firehose_buffer_update_limits(firehose_buffer_t fb);
 
 void
-firehose_buffer_ring_enqueue(firehose_buffer_t fb, uint16_t ref);
+firehose_buffer_ring_enqueue(firehose_buffer_t fb, firehose_chunk_ref_t ref);
 
 void
 firehose_buffer_force_connect(firehose_buffer_t fb);
diff --git a/src/firehose/firehose_inline_internal.h b/src/firehose/firehose_inline_internal.h
index 51f8c68..a2c80c2 100644
--- a/src/firehose/firehose_inline_internal.h
+++ b/src/firehose/firehose_inline_internal.h
@@ -21,25 +21,30 @@
 #ifndef __FIREHOSE_INLINE_INTERNAL__
 #define __FIREHOSE_INLINE_INTERNAL__
 
+#ifndef _os_atomic_basetypeof
+#define _os_atomic_basetypeof(p) \
+		__typeof__(atomic_load_explicit(_os_atomic_c11_atomic(p), memory_order_relaxed))
+#endif
+
 #define firehose_atomic_maxv2o(p, f, v, o, m) \
 		os_atomic_rmw_loop2o(p, f, *(o), (v), m, { \
 			if (*(o) >= (v)) os_atomic_rmw_loop_give_up(break); \
 		})
 
 #define firehose_atomic_max2o(p, f, v, m)   ({ \
-		__typeof__((p)->f) _old; \
+		_os_atomic_basetypeof(&(p)->f) _old; \
 		firehose_atomic_maxv2o(p, f, v, &_old, m); \
 	})
 
 #ifndef KERNEL
 // caller must test for non zero first
 OS_ALWAYS_INLINE
-static inline uint16_t
+static inline firehose_chunk_ref_t
 firehose_bitmap_first_set(uint64_t bitmap)
 {
 	dispatch_assert(bitmap != 0);
 	// this builtin returns 0 if bitmap is 0, or (first bit set + 1)
-	return (uint16_t)__builtin_ffsll((long long)bitmap) - 1;
+	return (firehose_chunk_ref_t)__builtin_ffsll((long long)bitmap) - 1;
 }
 #endif
 
@@ -49,11 +54,13 @@
 
 OS_ALWAYS_INLINE
 static inline mach_port_t
-firehose_mach_port_allocate(uint32_t flags, void *ctx)
+firehose_mach_port_allocate(uint32_t flags, mach_port_msgcount_t qlimit,
+		void *ctx)
 {
 	mach_port_t port = MACH_PORT_NULL;
 	mach_port_options_t opts = {
-		.flags = flags,
+		.flags = flags | MPO_QLIMIT,
+		.mpl = { .mpl_qlimit = qlimit },
 	};
 	kern_return_t kr = mach_port_construct(mach_task_self(), &opts,
 			(mach_port_context_t)ctx, &port);
@@ -107,7 +114,8 @@
 		expects_reply = true;
 	}
 
-	if (!fastpath(demux(hdr, &msg_reply->Head))) {
+	msg_reply->Head = (mach_msg_header_t){ };
+	if (unlikely(!demux(hdr, &msg_reply->Head))) {
 		rc = MIG_BAD_ID;
 	} else if (msg_reply->Head.msgh_bits & MACH_MSGH_BITS_COMPLEX) {
 		rc = KERN_SUCCESS;
@@ -117,14 +125,14 @@
 		rc = msg_reply->RetCode;
 	}
 
-	if (slowpath(rc == KERN_SUCCESS && expects_reply)) {
+	if (unlikely(rc == KERN_SUCCESS && expects_reply)) {
 		// if crashing here, some handler returned KERN_SUCCESS
 		// hoping for firehose_mig_server to perform the mach_msg()
 		// call to reply, and it doesn't know how to do that
 		DISPATCH_INTERNAL_CRASH(msg_reply->Head.msgh_id,
 				"firehose_mig_server doesn't handle replies");
 	}
-	if (slowpath(rc != KERN_SUCCESS && rc != MIG_NO_REPLY)) {
+	if (unlikely(rc != KERN_SUCCESS && rc != MIG_NO_REPLY)) {
 		// destroy the request - but not the reply port
 		hdr->msgh_remote_port = 0;
 		mach_msg_destroy(hdr);
@@ -144,15 +152,15 @@
 }
 
 OS_ALWAYS_INLINE
-static inline uint16_t
+static inline firehose_chunk_ref_t
 firehose_buffer_chunk_to_ref(firehose_buffer_t fb, firehose_chunk_t fbc)
 {
-	return (uint16_t)(fbc - fb->fb_chunks);
+	return (firehose_chunk_ref_t)(fbc - fb->fb_chunks);
 }
 
 OS_ALWAYS_INLINE
 static inline firehose_chunk_t
-firehose_buffer_ref_to_chunk(firehose_buffer_t fb, uint16_t ref)
+firehose_buffer_ref_to_chunk(firehose_buffer_t fb, firehose_chunk_ref_t ref)
 {
 	return fb->fb_chunks + ref;
 }
@@ -161,20 +169,6 @@
 #if DISPATCH_PURE_C
 
 OS_ALWAYS_INLINE
-static inline uint8_t
-firehose_buffer_qos_bits_propagate(void)
-{
-#ifndef KERNEL
-	pthread_priority_t pp = _dispatch_priority_propagate();
-
-	pp &= _PTHREAD_PRIORITY_QOS_CLASS_MASK;
-	return (uint8_t)(pp >> _PTHREAD_PRIORITY_QOS_CLASS_SHIFT);
-#else
-	return 0;
-#endif
-}
-
-OS_ALWAYS_INLINE
 static inline void
 firehose_buffer_stream_flush(firehose_buffer_t fb, firehose_stream_t stream)
 {
@@ -182,7 +176,7 @@
 	firehose_stream_state_u old_state, new_state;
 	firehose_chunk_t fc;
 	uint64_t stamp = UINT64_MAX; // will cause the reservation to fail
-	uint16_t ref;
+	firehose_chunk_ref_t ref;
 	long result;
 
 	old_state.fss_atomic_state =
@@ -198,7 +192,7 @@
 
 	fc = firehose_buffer_ref_to_chunk(fb, old_state.fss_current);
 	result = firehose_chunk_tracepoint_try_reserve(fc, stamp, stream,
-			firehose_buffer_qos_bits_propagate(), 1, 0, NULL);
+			0, 1, 0, NULL);
 	if (likely(result < 0)) {
 		firehose_buffer_ring_enqueue(fb, old_state.fss_current);
 	}
@@ -247,6 +241,10 @@
  * @param privptr
  * The pointer to the private buffer, can be NULL
  *
+ * @param reliable
+ * Whether we should wait for logd or drop the tracepoint in the event that no
+ * chunk is available.
+ *
  * @result
  * The pointer to the tracepoint.
  */
@@ -254,17 +252,15 @@
 static inline firehose_tracepoint_t
 firehose_buffer_tracepoint_reserve(firehose_buffer_t fb, uint64_t stamp,
 		firehose_stream_t stream, uint16_t pubsize,
-		uint16_t privsize, uint8_t **privptr)
+		uint16_t privsize, uint8_t **privptr, bool reliable)
 {
 	firehose_buffer_stream_t fbs = &fb->fb_header.fbh_stream[stream];
 	firehose_stream_state_u old_state, new_state;
 	firehose_chunk_t fc;
-#if KERNEL
-	bool failable = false;
-#endif
+	bool waited = false;
 	bool success;
 	long result;
-	uint16_t ref;
+	firehose_chunk_ref_t ref;
 
 	// cannot use os_atomic_rmw_loop2o, _page_try_reserve does a store
 	old_state.fss_atomic_state =
@@ -276,11 +272,10 @@
 		if (likely(ref && ref != FIREHOSE_STREAM_STATE_PRISTINE)) {
 			fc = firehose_buffer_ref_to_chunk(fb, ref);
 			result = firehose_chunk_tracepoint_try_reserve(fc, stamp, stream,
-					firehose_buffer_qos_bits_propagate(),
-					pubsize, privsize, privptr);
+					0, pubsize, privsize, privptr);
 			if (likely(result > 0)) {
 				uint64_t thread;
-#ifdef KERNEL
+#if KERNEL
 				thread = thread_tid(current_thread());
 #else
 				thread = _pthread_threadid_self_np_direct();
@@ -293,28 +288,70 @@
 			}
 			new_state.fss_current = 0;
 		}
-#if KERNEL
-		if (failable) {
-			return NULL;
-		}
+
+		if (!reliable && ((waited && old_state.fss_timestamped)
+#ifndef KERNEL
+				|| old_state.fss_waiting_for_logd
 #endif
+			)) {
+			new_state.fss_loss =
+					MIN(old_state.fss_loss + 1, FIREHOSE_LOSS_COUNT_MAX);
+
+			success = os_atomic_cmpxchgv2o(fbs, fbs_state.fss_atomic_state,
+					old_state.fss_atomic_state, new_state.fss_atomic_state,
+					&old_state.fss_atomic_state, relaxed);
+			if (success) {
+#ifndef KERNEL
+				_dispatch_trace_firehose_reserver_gave_up(stream, ref, waited,
+						old_state.fss_atomic_state, new_state.fss_atomic_state);
+#endif
+				return NULL;
+			} else {
+				continue;
+			}
+		}
 
 		if (unlikely(old_state.fss_allocator)) {
-			_dispatch_gate_wait(&fbs->fbs_state.fss_gate,
+#if KERNEL
+			_dispatch_firehose_gate_wait(&fbs->fbs_state.fss_gate,
 					DLOCK_LOCK_DATA_CONTENTION);
+			waited = true;
+
 			old_state.fss_atomic_state =
 					os_atomic_load2o(fbs, fbs_state.fss_atomic_state, relaxed);
-#if KERNEL
-			failable = true;
+#else
+			if (likely(reliable)) {
+				new_state.fss_allocator |= FIREHOSE_GATE_RELIABLE_WAITERS_BIT;
+			} else {
+				new_state.fss_allocator |= FIREHOSE_GATE_UNRELIABLE_WAITERS_BIT;
+			}
+
+			bool already_equal = (new_state.fss_atomic_state ==
+					old_state.fss_atomic_state);
+			success = already_equal || os_atomic_cmpxchgv2o(fbs,
+					fbs_state.fss_atomic_state, old_state.fss_atomic_state,
+					new_state.fss_atomic_state, &old_state.fss_atomic_state,
+					relaxed);
+			if (success) {
+				_dispatch_trace_firehose_reserver_wait(stream, ref, waited,
+						old_state.fss_atomic_state, new_state.fss_atomic_state,
+						reliable);
+				_dispatch_firehose_gate_wait(&fbs->fbs_state.fss_gate,
+						new_state.fss_allocator,
+						DLOCK_LOCK_DATA_CONTENTION);
+				waited = true;
+
+				old_state.fss_atomic_state = os_atomic_load2o(fbs,
+						fbs_state.fss_atomic_state, relaxed);
+			}
 #endif
 			continue;
 		}
 
-		// if the thread doing the allocation is a low priority one
-		// we may starve high priority ones.
-		// so disable preemption before we become an allocator
-		// the reenabling of the preemption is in
-		// firehose_buffer_stream_chunk_install
+		// if the thread doing the allocation is of low priority we may starve
+		// threads of higher priority, so disable pre-emption before becoming
+		// the allocator (it is re-enabled in
+		// firehose_buffer_stream_chunk_install())
 		__firehose_critical_region_enter();
 #if KERNEL
 		new_state.fss_allocator = (uint32_t)cpu_number();
@@ -331,6 +368,7 @@
 	}
 
 	struct firehose_tracepoint_query_s ask = {
+		.stamp = stamp,
 		.pubsize = pubsize,
 		.privsize = privsize,
 		.stream = stream,
@@ -338,8 +376,15 @@
 #ifndef KERNEL
 		.quarantined = fb->fb_header.fbh_quarantined,
 #endif
-		.stamp = stamp,
+		.reliable = reliable,
 	};
+
+#ifndef KERNEL
+	_dispatch_trace_firehose_allocator(((uint64_t *)&ask)[0],
+			((uint64_t *)&ask)[1], old_state.fss_atomic_state,
+			new_state.fss_atomic_state);
+#endif
+
 	return firehose_buffer_tracepoint_reserve_slow(fb, &ask, privptr);
 }
 
@@ -379,9 +424,83 @@
 	}
 }
 
+OS_ALWAYS_INLINE
+static inline bool
+firehose_buffer_bank_try_reserve_slot(firehose_buffer_t fb, bool for_io,
+		firehose_bank_state_u *state_in_out)
+{
+	bool success;
+	firehose_buffer_bank_t fbb = &fb->fb_header.fbh_bank;
+
+	firehose_bank_state_u old_state = *state_in_out, new_state;
+	do {
+		if (unlikely(!old_state.fbs_banks[for_io])) {
+			return false;
+		}
+		new_state = old_state;
+		new_state.fbs_banks[for_io]--;
+
+		success = os_atomic_cmpxchgvw(&fbb->fbb_state.fbs_atomic_state,
+				old_state.fbs_atomic_state, new_state.fbs_atomic_state,
+				&old_state.fbs_atomic_state, acquire);
+	} while (unlikely(!success));
+
+	*state_in_out = new_state;
+	return true;
+}
+
 #ifndef KERNEL
 OS_ALWAYS_INLINE
 static inline void
+firehose_buffer_stream_signal_waiting_for_logd(firehose_buffer_t fb,
+		firehose_stream_t stream)
+{
+	firehose_stream_state_u state, new_state;
+	firehose_buffer_stream_t fbs = &fb->fb_header.fbh_stream[stream];
+
+	state.fss_atomic_state =
+			os_atomic_load2o(fbs, fbs_state.fss_atomic_state, relaxed);
+	if (!state.fss_timestamped) {
+		fbs->fbs_loss_start = mach_continuous_time();
+
+		// release to publish the timestamp
+		os_atomic_rmw_loop2o(fbs, fbs_state.fss_atomic_state,
+				state.fss_atomic_state, new_state.fss_atomic_state,
+				release, {
+			new_state = (firehose_stream_state_u){
+				.fss_allocator = (state.fss_allocator &
+						~FIREHOSE_GATE_UNRELIABLE_WAITERS_BIT),
+				.fss_loss = state.fss_loss,
+				.fss_timestamped = true,
+				.fss_waiting_for_logd = true,
+				.fss_generation = state.fss_generation,
+			};
+		});
+	} else {
+		os_atomic_rmw_loop2o(fbs, fbs_state.fss_atomic_state,
+				state.fss_atomic_state, new_state.fss_atomic_state,
+				relaxed, {
+			new_state = (firehose_stream_state_u){
+				.fss_allocator = (state.fss_allocator &
+						~FIREHOSE_GATE_UNRELIABLE_WAITERS_BIT),
+				.fss_loss = state.fss_loss,
+				.fss_timestamped = true,
+				.fss_waiting_for_logd = true,
+				.fss_generation = state.fss_generation,
+			};
+		});
+	}
+
+	_dispatch_trace_firehose_wait_for_logd(stream, fbs->fbs_loss_start,
+			state.fss_atomic_state, new_state.fss_atomic_state);
+	if (unlikely(state.fss_allocator & FIREHOSE_GATE_UNRELIABLE_WAITERS_BIT)) {
+		_dispatch_gate_broadcast_slow(&fbs->fbs_state.fss_gate,
+				state.fss_gate.dgl_lock);
+	}
+}
+
+OS_ALWAYS_INLINE
+static inline void
 firehose_buffer_clear_bank_flags(firehose_buffer_t fb, unsigned long bits)
 {
 	firehose_buffer_bank_t fbb = &fb->fb_header.fbh_bank;
@@ -405,6 +524,15 @@
 		firehose_buffer_update_limits(fb);
 	}
 }
+
+OS_ALWAYS_INLINE
+static inline void
+firehose_buffer_bank_relinquish_slot(firehose_buffer_t fb, bool for_io)
+{
+	firehose_buffer_bank_t fbb = &fb->fb_header.fbh_bank;
+	os_atomic_add2o(fbb, fbb_state.fbs_atomic_state, FIREHOSE_BANK_INC(for_io),
+			relaxed);
+}
 #endif // !KERNEL
 
 #endif // !defined(FIREHOSE_SERVER)
diff --git a/src/firehose/firehose_reply.defs b/src/firehose/firehose_reply.defs
index c080545..caef7b4 100644
--- a/src/firehose/firehose_reply.defs
+++ b/src/firehose/firehose_reply.defs
@@ -31,15 +31,19 @@
 skip; // firehose_register
 
 simpleroutine push_reply(
-RequestPort	req_port		: mach_port_move_send_once_t;
-in			rtc				: kern_return_t;
-in			push_reply		: firehose_push_reply_t;
-in			quarantined		: boolean_t
+RequestPort req_port        : mach_port_move_send_once_t;
+in          ReturnCode      : kern_return_t;
+in          push_reply      : firehose_push_reply_t;
+in          quarantined     : boolean_t
 );
 
 simpleroutine push_notify_async(
-RequestPort	comm_port		: mach_port_t;
-in			push_reply		: firehose_push_reply_t;
-in			quarantined		: boolean_t;
-WaitTime	timeout			: natural_t
+RequestPort comm_port       : mach_port_t;
+in          push_reply      : firehose_push_reply_t;
+in          quarantined     : boolean_t;
+WaitTime    timeout         : natural_t
 );
+
+skip; // get_logging_prefs_reply
+
+skip; // should_send_strings
diff --git a/src/firehose/firehose_server.c b/src/firehose/firehose_server.c
index ba335db..a674c8f 100644
--- a/src/firehose/firehose_server.c
+++ b/src/firehose/firehose_server.c
@@ -39,15 +39,18 @@
 static struct firehose_server_s {
 	mach_port_t			fs_bootstrap_port;
 	dispatch_mach_t		fs_mach_channel;
-	dispatch_queue_t	fs_ipc_queue;
 	dispatch_queue_t	fs_snapshot_gate_queue;
 	dispatch_queue_t	fs_io_drain_queue;
 	dispatch_queue_t	fs_mem_drain_queue;
 	firehose_handler_t	fs_handler;
 
 	firehose_snapshot_t fs_snapshot;
-	int					fs_kernel_fd;
 	firehose_client_t	fs_kernel_client;
+	int					fs_kernel_fd;
+
+	mach_port_t         fs_prefs_cache_entry;
+	size_t              fs_prefs_cache_size;
+	void               *fs_prefs_cache;
 
 	TAILQ_HEAD(, firehose_client_s) fs_clients;
 	os_unfair_lock      fs_clients_lock;
@@ -74,13 +77,15 @@
 	os_unfair_lock_unlock(&server_config.fs_clients_lock);
 }
 
-static void firehose_server_demux(firehose_client_t fc,
-		mach_msg_header_t *msg_hdr);
 static void firehose_client_cancel(firehose_client_t fc);
 static void firehose_client_snapshot_finish(firehose_client_t fc,
 		firehose_snapshot_t snapshot, bool for_io);
 static void firehose_client_handle_death(void *ctxt);
 
+static const struct mig_subsystem *const firehose_subsystems[] = {
+	(mig_subsystem_t)&firehose_server_firehose_subsystem,
+};
+
 #pragma mark -
 #pragma mark firehose client enqueueing
 
@@ -121,26 +126,22 @@
 
 OS_ALWAYS_INLINE
 static inline void
-firehose_client_push(firehose_client_t fc, pthread_priority_t pp,
-		bool quarantined, bool for_io)
+firehose_client_push(firehose_client_t fc, bool quarantined, bool for_io)
 {
 	fs_client_queue_t queue = fs_queue(quarantined, for_io);
-	if (fc && os_mpsc_push_update_tail(queue, fs_client, fc, fc_next[for_io])) {
-		os_mpsc_push_update_head(queue, fs_client, fc);
-		_dispatch_source_merge_data(fs_source(quarantined, for_io), pp, 1);
-	} else if (pp) {
-		_dispatch_source_merge_data(fs_source(quarantined, for_io), pp, 1);
+	if (fc && os_mpsc_push_item(os_mpsc(queue, fs_client),
+			fc, fc_next[for_io])) {
+		dispatch_source_merge_data(fs_source(quarantined, for_io), 1);
 	}
 }
 
 OS_ALWAYS_INLINE
 static inline bool
-firehose_client_wakeup(firehose_client_t fc, pthread_priority_t pp,
-		bool for_io)
+firehose_client_wakeup(firehose_client_t fc, bool for_io)
 {
-	uintptr_t canceled_bit = FC_STATE_CANCELED(for_io);
-	uintptr_t enqueued_bit = FC_STATE_ENQUEUED(for_io);
-	uintptr_t old_state, new_state;
+	uint16_t canceled_bit = FC_STATE_CANCELED(for_io);
+	uint16_t enqueued_bit = FC_STATE_ENQUEUED(for_io);
+	uint16_t old_state, new_state;
 
 	os_atomic_rmw_loop(&fc->fc_state, old_state, new_state, relaxed, {
 		if (old_state & canceled_bit) {
@@ -151,7 +152,7 @@
 		}
 		new_state = old_state | enqueued_bit;
 	});
-	firehose_client_push(old_state & enqueued_bit ? NULL : fc, pp,
+	firehose_client_push(old_state & enqueued_bit ? NULL : fc,
 			fc->fc_quarantined, for_io);
 	return true;
 }
@@ -160,10 +161,10 @@
 static inline void
 firehose_client_start_cancel(firehose_client_t fc, bool for_io)
 {
-	uintptr_t canceling_bit = FC_STATE_CANCELING(for_io);
-	uintptr_t canceled_bit = FC_STATE_CANCELED(for_io);
-	uintptr_t enqueued_bit = FC_STATE_ENQUEUED(for_io);
-	uintptr_t old_state, new_state;
+	uint16_t canceling_bit = FC_STATE_CANCELING(for_io);
+	uint16_t canceled_bit = FC_STATE_CANCELED(for_io);
+	uint16_t enqueued_bit = FC_STATE_ENQUEUED(for_io);
+	uint16_t old_state, new_state;
 
 	os_atomic_rmw_loop(&fc->fc_state, old_state, new_state, relaxed, {
 		if (old_state & (canceled_bit | canceling_bit)) {
@@ -171,7 +172,7 @@
 		}
 		new_state = old_state | enqueued_bit | canceling_bit;
 	});
-	firehose_client_push(old_state & enqueued_bit ? NULL : fc, 0,
+	firehose_client_push(old_state & enqueued_bit ? NULL : fc,
 			fc->fc_quarantined, for_io);
 }
 
@@ -179,10 +180,10 @@
 static inline bool
 firehose_client_dequeue(firehose_client_t fc, bool for_io)
 {
-	uintptr_t canceling_bit = FC_STATE_CANCELING(for_io);
-	uintptr_t canceled_bit = FC_STATE_CANCELED(for_io);
-	uintptr_t enqueued_bit = FC_STATE_ENQUEUED(for_io);
-	uintptr_t old_state, new_state;
+	uint16_t canceling_bit = FC_STATE_CANCELING(for_io);
+	uint16_t canceled_bit = FC_STATE_CANCELED(for_io);
+	uint16_t enqueued_bit = FC_STATE_ENQUEUED(for_io);
+	uint16_t old_state, new_state;
 
 	os_atomic_rmw_loop(&fc->fc_state, old_state, new_state, relaxed, {
 		new_state = old_state & ~(canceling_bit | enqueued_bit);
@@ -254,7 +255,6 @@
 {
 	// this client is really confused, do *not* answer to asyncs anymore
 	fc->fc_memory_corrupted = true;
-	fc->fc_use_notifs = false;
 
 	// XXX: do not cancel the data sources or a corrupted client could
 	// prevent snapshots from being taken if unlucky with ordering
@@ -292,7 +292,8 @@
 	firehose_chunk_t fbc;
 	firehose_event_t evt;
 	uint16_t volatile *fbh_ring;
-	uint16_t flushed, ref, count = 0;
+	uint16_t flushed, count = 0;
+	firehose_chunk_ref_t ref;
 	uint16_t client_head, client_flushed, sent_flushed;
 	firehose_snapshot_t snapshot = NULL;
 	bool for_io = (flags & FIREHOSE_DRAIN_FOR_IO);
@@ -315,7 +316,7 @@
 		if (fc->fc_needs_mem_snapshot) snapshot = server_config.fs_snapshot;
 	}
 
-	if (slowpath(fc->fc_memory_corrupted)) {
+	if (unlikely(fc->fc_memory_corrupted)) {
 		goto corrupt;
 	}
 
@@ -335,7 +336,7 @@
 		// see firehose_buffer_ring_enqueue
 		do {
 			ref = (flushed + count) & FIREHOSE_RING_POS_IDX_MASK;
-			ref = os_atomic_load(&fbh_ring[ref], relaxed);
+			ref = (firehose_chunk_ref_t)os_atomic_load(&fbh_ring[ref], relaxed);
 			ref &= FIREHOSE_RING_POS_IDX_MASK;
 		} while (!fc->fc_pid && !ref);
 		count++;
@@ -345,20 +346,21 @@
 		}
 
 		fbc = firehose_buffer_ref_to_chunk(fb, ref);
-		if (fbc->fc_pos.fcp_stream == firehose_stream_metadata) {
+		firehose_chunk_pos_u fc_pos = fbc->fc_pos;
+		if (fc_pos.fcp_stream == firehose_stream_metadata) {
 			// serialize with firehose_client_metadata_stream_peek
 			os_unfair_lock_lock(&fc->fc_lock);
 		}
-		server_config.fs_handler(fc, evt, fbc);
-		if (slowpath(snapshot)) {
-			snapshot->handler(fc, evt, fbc);
+		server_config.fs_handler(fc, evt, fbc, fc_pos);
+		if (unlikely(snapshot)) {
+			snapshot->handler(fc, evt, fbc, fc_pos);
 		}
-		if (fbc->fc_pos.fcp_stream == firehose_stream_metadata) {
+		if (fc_pos.fcp_stream == firehose_stream_metadata) {
 			os_unfair_lock_unlock(&fc->fc_lock);
 		}
 		// clients not using notifications (single threaded) always drain fully
 		// because they use all their limit, always
-	} while (!fc->fc_use_notifs || count < DRAIN_BATCH_SIZE || snapshot);
+	} while (count < DRAIN_BATCH_SIZE || snapshot);
 
 	if (count) {
 		// we don't load the full fbh_ring_tail because that is a 64bit quantity
@@ -376,12 +378,12 @@
 		if (!fc->fc_pid) {
 			// will fire firehose_client_notify() because port is MACH_PORT_DEAD
 			port = fc->fc_sendp;
-		} else if (!port && client_flushed == sent_flushed && fc->fc_use_notifs) {
+		} else if (!port && client_flushed == sent_flushed) {
 			port = fc->fc_sendp;
 		}
 	}
 
-	if (slowpath(snapshot)) {
+	if (unlikely(snapshot)) {
 		firehose_client_snapshot_finish(fc, snapshot, for_io);
 		firehose_client_snapshot_mark_done(fc, snapshot, for_io);
 	}
@@ -394,12 +396,12 @@
 			dispatch_resume(fc->fc_kernel_source);
 		}
 	} else {
-		if (fc->fc_use_notifs && count >= DRAIN_BATCH_SIZE) {
+		if (count >= DRAIN_BATCH_SIZE) {
 			// if we hit the drain batch size, the client probably logs a lot
 			// and there's more to drain, so optimistically schedule draining
 			// again this is cheap since the queue is hot, and is fair for other
 			// clients
-			firehose_client_wakeup(fc, 0, for_io);
+			firehose_client_wakeup(fc, for_io);
 		}
 		if (count && server_config.fs_kernel_client) {
 			// the kernel is special because it can drop messages, so if we're
@@ -433,9 +435,10 @@
 	size_t clients = 0;
 
 	while (queue->fs_client_tail) {
-		fc = os_mpsc_get_head(queue, fs_client);
+		fc = os_mpsc_get_head(os_mpsc(queue, fs_client));
 		do {
-			fc_next = os_mpsc_pop_head(queue, fs_client, fc, fc_next[for_io]);
+			fc_next = os_mpsc_pop_head(os_mpsc(queue, fs_client),
+					fc, fc_next[for_io]);
 			if (firehose_client_dequeue(fc, for_io)) {
 				firehose_client_drain_one(fc, MACH_PORT_NULL,
 						for_io ? FIREHOSE_DRAIN_FOR_IO : 0);
@@ -473,16 +476,19 @@
 	}
 	if (fc->fc_memory_corrupted) {
 		server_config.fs_handler(fc, FIREHOSE_EVENT_CLIENT_CORRUPTED,
-				&fb->fb_chunks[0]);
+				&fb->fb_chunks[0], (firehose_chunk_pos_u){ .fcp_pos = 0 });
 	}
-	server_config.fs_handler(fc, FIREHOSE_EVENT_CLIENT_DIED, NULL);
+	server_config.fs_handler(fc, FIREHOSE_EVENT_CLIENT_DIED, NULL,
+			(firehose_chunk_pos_u){ .fcp_pos = 0 });
 
 	fs_clients_lock();
 	TAILQ_REMOVE(&server_config.fs_clients, fc, fc_entry);
 	fs_clients_unlock();
 
-	dispatch_release(fc->fc_mach_channel);
-	fc->fc_mach_channel = NULL;
+	for (int i = 0; i < FIREHOSE_BUFFER_NPUSHPORTS; i++) {
+		dispatch_release(fc->fc_mach_channel[i]);
+		fc->fc_mach_channel[i] = NULL;
+	}
 	fc->fc_entry.tqe_next = DISPATCH_OBJECT_LISTLESS;
 	fc->fc_entry.tqe_prev = DISPATCH_OBJECT_LISTLESS;
 	_os_object_release(&fc->fc_as_os_object);
@@ -528,7 +534,8 @@
 		// remove the pages that we flushed already from the bitmap
 		for (; tail != flushed; tail++) {
 			uint16_t ring_pos = tail & FIREHOSE_RING_POS_IDX_MASK;
-			uint16_t ref = fbh_ring[ring_pos] & FIREHOSE_RING_POS_IDX_MASK;
+			firehose_chunk_ref_t ref =
+					fbh_ring[ring_pos] & FIREHOSE_RING_POS_IDX_MASK;
 
 			bitmap &= ~(1ULL << ref);
 		}
@@ -538,9 +545,10 @@
 
 	// Then look at all the allocated pages not seen in the ring
 	while (bitmap) {
-		uint16_t ref = firehose_bitmap_first_set(bitmap);
+		firehose_chunk_ref_t ref = firehose_bitmap_first_set(bitmap);
 		firehose_chunk_t fbc = firehose_buffer_ref_to_chunk(fb, ref);
-		uint16_t fbc_length = fbc->fc_pos.fcp_next_entry_offs;
+		firehose_chunk_pos_u fc_pos = fbc->fc_pos;
+		uint16_t fbc_length = fc_pos.fcp_next_entry_offs;
 
 		bitmap &= ~(1ULL << ref);
 		if (fbc->fc_start + fbc_length <= fbc->fc_data) {
@@ -553,13 +561,15 @@
 			// so also just ditch it
 			continue;
 		}
-		if (!fbc->fc_pos.fcp_flag_io) {
+		if (!fc_pos.fcp_flag_io) {
 			mem_bitmap |= 1ULL << ref;
 			continue;
 		}
-		server_config.fs_handler(fc, FIREHOSE_EVENT_IO_BUFFER_RECEIVED, fbc);
+		server_config.fs_handler(fc, FIREHOSE_EVENT_IO_BUFFER_RECEIVED, fbc,
+				fc_pos);
 		if (fc->fc_needs_io_snapshot) {
-			snapshot->handler(fc, FIREHOSE_SNAPSHOT_EVENT_IO_BUFFER, fbc);
+			snapshot->handler(fc, FIREHOSE_SNAPSHOT_EVENT_IO_BUFFER, fbc,
+					fc_pos);
 		}
 	}
 
@@ -571,13 +581,16 @@
 		uint64_t mem_bitmap_copy = mem_bitmap;
 
 		while (mem_bitmap_copy) {
-			uint16_t ref = firehose_bitmap_first_set(mem_bitmap_copy);
+			firehose_chunk_ref_t ref = firehose_bitmap_first_set(mem_bitmap_copy);
 			firehose_chunk_t fbc = firehose_buffer_ref_to_chunk(fb, ref);
+			firehose_chunk_pos_u fc_pos = fbc->fc_pos;
 
 			mem_bitmap_copy &= ~(1ULL << ref);
-			server_config.fs_handler(fc, FIREHOSE_EVENT_MEM_BUFFER_RECEIVED, fbc);
+			server_config.fs_handler(fc, FIREHOSE_EVENT_MEM_BUFFER_RECEIVED,
+					fbc, fc_pos);
 			if (fc->fc_needs_mem_snapshot) {
-				snapshot->handler(fc, FIREHOSE_SNAPSHOT_EVENT_MEM_BUFFER, fbc);
+				snapshot->handler(fc, FIREHOSE_SNAPSHOT_EVENT_MEM_BUFFER,
+						fbc, fc_pos);
 			}
 		}
 
@@ -596,44 +609,56 @@
 
 	switch (reason) {
 	case DISPATCH_MACH_MESSAGE_RECEIVED:
+		if (dispatch_mach_mig_demux(fc, firehose_subsystems,
+				countof(firehose_subsystems), dmsg)) {
+			break;
+		}
+
 		msg_hdr = dispatch_mach_msg_get_msg(dmsg, NULL);
 		if (msg_hdr->msgh_id == MACH_NOTIFY_NO_SENDERS) {
 			_dispatch_debug("FIREHOSE NO_SENDERS (unique_pid: 0x%llx)",
 					firehose_client_get_unique_pid(fc, NULL));
-			dispatch_mach_cancel(fc->fc_mach_channel);
-		} else {
-			firehose_server_demux(fc, msg_hdr);
+			for (int i = 0; i < FIREHOSE_BUFFER_NPUSHPORTS; i++) {
+				dispatch_mach_cancel(fc->fc_mach_channel[i]);
+			}
 		}
+		mach_msg_destroy(msg_hdr);
 		break;
 
 	case DISPATCH_MACH_DISCONNECTED:
 		msg_hdr = dispatch_mach_msg_get_msg(dmsg, NULL);
-		port = msg_hdr->msgh_remote_port;
-		if (MACH_PORT_VALID(port)) {
-			if (port != fc->fc_sendp) {
-				DISPATCH_INTERNAL_CRASH(port, "Unknown send-right");
-			}
-			firehose_mach_port_send_release(fc->fc_sendp);
-			fc->fc_sendp = MACH_PORT_NULL;
-		}
 		port = msg_hdr->msgh_local_port;
 		if (MACH_PORT_VALID(port)) {
-			if (port != fc->fc_recvp) {
+			int i;
+			for (i = 0; i < FIREHOSE_BUFFER_NPUSHPORTS; i++) {
+				if (fc->fc_recvp[i] == port) {
+					break;
+				}
+			}
+			if (i == FIREHOSE_BUFFER_NPUSHPORTS) {
 				DISPATCH_INTERNAL_CRASH(port, "Unknown recv-right");
 			}
-			firehose_mach_port_recv_dispose(fc->fc_recvp, fc);
-			fc->fc_recvp = MACH_PORT_NULL;
+			firehose_mach_port_recv_dispose(fc->fc_recvp[i], &fc->fc_recvp[i]);
+			fc->fc_recvp[i] = MACH_PORT_NULL;
 		}
 		break;
 
 	case DISPATCH_MACH_CANCELED:
-		if (MACH_PORT_VALID(fc->fc_sendp)) {
-			DISPATCH_INTERNAL_CRASH(fc->fc_sendp, "send-right leak");
+		if (!_os_atomic_refcnt_sub2o(fc, fc_mach_channel_refcnt, 1)) {
+			_os_atomic_refcnt_dispose_barrier2o(fc, fc_mach_channel_refcnt);
+
+			firehose_mach_port_send_release(fc->fc_sendp);
+			fc->fc_sendp = MACH_PORT_NULL;
+			for (int i = 0; i < FIREHOSE_BUFFER_NPUSHPORTS; i++) {
+				if (MACH_PORT_VALID(fc->fc_recvp[i])) {
+					DISPATCH_INTERNAL_CRASH(fc->fc_recvp[i], "recv-right leak");
+				}
+			}
+
+			firehose_client_cancel(fc);
 		}
-		if (MACH_PORT_VALID(fc->fc_recvp)) {
-			DISPATCH_INTERNAL_CRASH(fc->fc_recvp, "recv-right leak");
-		}
-		firehose_client_cancel(fc);
+		break;
+	default:
 		break;
 	}
 }
@@ -647,8 +672,8 @@
 	// resumed in firehose_client_drain for both memory and I/O
 	dispatch_suspend(fc->fc_kernel_source);
 	dispatch_suspend(fc->fc_kernel_source);
-	firehose_client_wakeup(fc, 0, false);
-	firehose_client_wakeup(fc, 0, true);
+	firehose_client_wakeup(fc, false);
+	firehose_client_wakeup(fc, true);
 }
 #endif
 
@@ -656,18 +681,21 @@
 firehose_client_resume(firehose_client_t fc,
 		const struct firehose_client_connected_info_s *fcci)
 {
-	dispatch_assert_queue(server_config.fs_io_drain_queue);
+	dispatch_assert_queue(server_config.fs_mem_drain_queue);
 
 	fs_clients_lock();
 	TAILQ_INSERT_TAIL(&server_config.fs_clients, fc, fc_entry);
 	fs_clients_unlock();
 
-	server_config.fs_handler(fc, FIREHOSE_EVENT_CLIENT_CONNECTED, (void *)fcci);
+	server_config.fs_handler(fc, FIREHOSE_EVENT_CLIENT_CONNECTED, (void *)fcci,
+			(firehose_chunk_pos_u){ .fcp_pos = 0 });
 	if (!fc->fc_pid) {
 		dispatch_activate(fc->fc_kernel_source);
 	} else {
-		dispatch_mach_connect(fc->fc_mach_channel,
-				fc->fc_recvp, fc->fc_sendp, NULL);
+		for (int i = 0; i < FIREHOSE_BUFFER_NPUSHPORTS; i++) {
+			dispatch_mach_connect(fc->fc_mach_channel[i],
+					fc->fc_recvp[i], MACH_PORT_NULL, NULL);
+		}
 	}
 }
 
@@ -677,15 +705,10 @@
 	_dispatch_debug("client died (unique_pid: 0x%llx",
 			firehose_client_get_unique_pid(fc, NULL));
 
-	if (MACH_PORT_VALID(fc->fc_sendp)) {
-		firehose_mach_port_send_release(fc->fc_sendp);
-		fc->fc_sendp = MACH_PORT_NULL;
+	dispatch_assert(fc->fc_sendp == MACH_PORT_NULL);
+	for (int i = 0; i < FIREHOSE_BUFFER_NPUSHPORTS; i++) {
+		dispatch_assert(fc->fc_recvp[i] == MACH_PORT_NULL);
 	}
-	if (MACH_PORT_VALID(fc->fc_recvp)) {
-		firehose_mach_port_recv_dispose(fc->fc_recvp, fc);
-		fc->fc_recvp = MACH_PORT_NULL;
-	}
-	fc->fc_use_notifs = false;
 	firehose_client_start_cancel(fc, false);
 	firehose_client_start_cancel(fc, true);
 }
@@ -720,7 +743,8 @@
 
 static firehose_client_t
 firehose_client_create(firehose_buffer_t fb, firehose_token_t token,
-		mach_port_t comm_recvp, mach_port_t comm_sendp)
+		mach_port_t comm_mem_recvp, mach_port_t comm_io_recvp,
+		mach_port_t comm_sendp)
 {
 	uint64_t unique_pid = fb->fb_header.fbh_uniquepid;
 	firehose_client_t fc = _firehose_client_create(fb);
@@ -731,13 +755,21 @@
 	fc->fc_pidversion = token->execcnt;
 
 	_dispatch_debug("FIREHOSE_REGISTER (unique_pid: 0x%llx)", unique_pid);
-	fc->fc_recvp = comm_recvp;
+	mach_port_t recvp[] = { comm_mem_recvp, comm_io_recvp };
+	dispatch_queue_t fsqs[] = {
+		server_config.fs_mem_drain_queue,
+		server_config.fs_io_drain_queue
+	};
+	fc->fc_mach_channel_refcnt = FIREHOSE_BUFFER_NPUSHPORTS;
+	for (int i = 0; i < FIREHOSE_BUFFER_NPUSHPORTS; i++) {
+		fc->fc_recvp[i] = recvp[i];
+		firehose_mach_port_guard(fc->fc_recvp[i], true, &fc->fc_recvp[i]);
+		dm = dispatch_mach_create_f("com.apple.firehose.peer", fsqs[i], fc,
+				firehose_client_handle_mach_event);
+		fc->fc_mach_channel[i] = dm;
+	}
+
 	fc->fc_sendp = comm_sendp;
-	firehose_mach_port_guard(comm_recvp, true, fc);
-	dm = dispatch_mach_create_f("com.apple.firehose.peer",
-			server_config.fs_ipc_queue,
-			fc, firehose_client_handle_mach_event);
-	fc->fc_mach_channel = dm;
 	return fc;
 }
 
@@ -767,19 +799,19 @@
 		}
 		DISPATCH_INTERNAL_CRASH(errno, "Unable to map kernel buffer");
 	}
-	if (fb_map.fbmi_size !=
-			FIREHOSE_BUFFER_KERNEL_CHUNK_COUNT * FIREHOSE_CHUNK_SIZE) {
+	if ((fb_map.fbmi_size < FIREHOSE_BUFFER_KERNEL_MIN_CHUNK_COUNT * FIREHOSE_CHUNK_SIZE) ||
+		(fb_map.fbmi_size > FIREHOSE_BUFFER_KERNEL_MAX_CHUNK_COUNT * FIREHOSE_CHUNK_SIZE)) {
 		DISPATCH_INTERNAL_CRASH(fb_map.fbmi_size, "Unexpected kernel buffer size");
 	}
 
 	fc = _firehose_client_create((firehose_buffer_t)(uintptr_t)fb_map.fbmi_addr);
 	ds = dispatch_source_create(DISPATCH_SOURCE_TYPE_READ, (uintptr_t)fd, 0,
-			fs->fs_ipc_queue);
+			fs->fs_mem_drain_queue);
+	dispatch_set_qos_class_floor(ds, QOS_CLASS_USER_INITIATED, 0);
 	dispatch_set_context(ds, fc);
 	dispatch_source_set_event_handler_f(ds,
 			firehose_client_kernel_source_handle_event);
 	fc->fc_kernel_source = ds;
-	fc->fc_use_notifs = true;
 	fc->fc_sendp = MACH_PORT_DEAD; // causes drain() to call notify
 
 	fs->fs_kernel_fd = fd;
@@ -793,7 +825,8 @@
 	vm_deallocate(mach_task_self(), (vm_address_t)fc->fc_buffer,
 			sizeof(*fc->fc_buffer));
 	fc->fc_buffer = NULL;
-	server_config.fs_handler(fc, FIREHOSE_EVENT_CLIENT_FINALIZE, NULL);
+	server_config.fs_handler(fc, FIREHOSE_EVENT_CLIENT_FINALIZE, NULL,
+			(firehose_chunk_pos_u){ .fcp_pos = 0 });
 }
 
 void
@@ -839,6 +872,12 @@
 	return os_atomic_load2o(fc, fc_ctxt, relaxed);
 }
 
+void
+firehose_client_set_strings_cached(firehose_client_t fc)
+{
+	fc->fc_strings_cached = true;
+}
+
 void *
 firehose_client_set_context(firehose_client_t fc, void *ctxt)
 {
@@ -854,24 +893,16 @@
 #pragma mark -
 #pragma mark firehose server
 
-/*
- * The current_message context stores the client info for the current message
- * being handled. The only reason this works is because currently the message
- * processing is serial. If that changes, this would not work.
- */
-static firehose_client_t cur_client_info;
-
 static void
-firehose_server_handle_mach_event(void *ctx OS_UNUSED,
+firehose_server_handle_mach_event(void *ctx,
 		dispatch_mach_reason_t reason, dispatch_mach_msg_t dmsg,
 		mach_error_t error OS_UNUSED)
 {
-	mach_msg_header_t *msg_hdr = NULL;
-
 	if (reason == DISPATCH_MACH_MESSAGE_RECEIVED) {
-		msg_hdr = dispatch_mach_msg_get_msg(dmsg, NULL);
-		/* TODO: Assert this should be a register message */
-		firehose_server_demux(NULL, msg_hdr);
+		if (!dispatch_mach_mig_demux(ctx, firehose_subsystems,
+				countof(firehose_subsystems), dmsg)) {
+			mach_msg_destroy(dispatch_mach_msg_get_msg(dmsg, NULL));
+		}
 	}
 }
 
@@ -880,26 +911,32 @@
 {
 	struct firehose_server_s *fs = &server_config;
 	dispatch_queue_attr_t attr = DISPATCH_QUEUE_SERIAL_WITH_AUTORELEASE_POOL;
-	dispatch_queue_attr_t attr_ui;
+	dispatch_queue_attr_t attr_inactive, attr_utility_inactive;
 	dispatch_mach_t dm;
 	dispatch_source_t ds;
 
 	// just reference the string so that it's captured
 	(void)os_atomic_load(&__libfirehose_serverVersionString[0], relaxed);
 
-	attr_ui = dispatch_queue_attr_make_with_qos_class(attr,
-			QOS_CLASS_USER_INITIATED, 0);
-	fs->fs_ipc_queue = dispatch_queue_create_with_target(
-			"com.apple.firehose.ipc", attr_ui, NULL);
 	fs->fs_snapshot_gate_queue = dispatch_queue_create_with_target(
 			"com.apple.firehose.snapshot-gate", attr, NULL);
+
+	attr_inactive = dispatch_queue_attr_make_initially_inactive(attr);
+	attr_utility_inactive = dispatch_queue_attr_make_with_qos_class(
+			attr_inactive, QOS_CLASS_UTILITY, 0);
+
 	fs->fs_io_drain_queue = dispatch_queue_create_with_target(
-			"com.apple.firehose.drain-io", attr, NULL);
+			"com.apple.firehose.drain-io", attr_utility_inactive, NULL);
+	dispatch_set_qos_class_fallback(fs->fs_io_drain_queue, QOS_CLASS_UTILITY);
+	dispatch_activate(fs->fs_io_drain_queue);
+
 	fs->fs_mem_drain_queue = dispatch_queue_create_with_target(
-			"com.apple.firehose.drain-mem", attr, NULL);
+			"com.apple.firehose.drain-mem", attr_inactive, NULL);
+	dispatch_set_qos_class_fallback(fs->fs_mem_drain_queue, QOS_CLASS_UTILITY);
+	dispatch_activate(fs->fs_mem_drain_queue);
 
 	dm = dispatch_mach_create_f("com.apple.firehose.listener",
-			fs->fs_ipc_queue, NULL, firehose_server_handle_mach_event);
+			fs->fs_mem_drain_queue, NULL, firehose_server_handle_mach_event);
 	fs->fs_bootstrap_port = comm_port;
 	fs->fs_mach_channel = dm;
 	fs->fs_handler = _Block_copy(handler);
@@ -947,7 +984,7 @@
 	struct firehose_server_s *fs = &server_config;
 
 	if (fs->fs_kernel_client) {
-		dispatch_async(fs->fs_io_drain_queue, ^{
+		dispatch_async(fs->fs_mem_drain_queue, ^{
 			struct firehose_client_connected_info_s fcci = {
 				.fcci_version = FIREHOSE_CLIENT_CONNECTED_INFO_VERSION,
 			};
@@ -970,11 +1007,46 @@
 
 	fs_clients_lock();
 	TAILQ_FOREACH(fc, &server_config.fs_clients, fc_entry) {
-		dispatch_mach_cancel(fc->fc_mach_channel);
+		if (fc->fc_pid) {
+			for (int i = 0; i < FIREHOSE_BUFFER_NPUSHPORTS; i++) {
+				dispatch_mach_cancel(fc->fc_mach_channel[i]);
+			}
+		}
 	}
 	fs_clients_unlock();
 }
 
+void
+firehose_server_set_logging_prefs(void *pointer, size_t length, os_block_t block)
+{
+	dispatch_async(server_config.fs_mem_drain_queue, ^{
+		kern_return_t kr;
+		memory_object_size_t size = (memory_object_size_t)length;
+		if (server_config.fs_prefs_cache_entry) {
+			kr = mach_port_deallocate(mach_task_self(),
+					server_config.fs_prefs_cache_entry);
+			DISPATCH_VERIFY_MIG(kr);
+			dispatch_assume_zero(kr);
+		}
+		if (server_config.fs_prefs_cache) {
+			munmap(server_config.fs_prefs_cache,
+					server_config.fs_prefs_cache_size);
+		}
+
+		server_config.fs_prefs_cache = pointer;
+		server_config.fs_prefs_cache_size = length;
+		server_config.fs_prefs_cache_entry = MACH_PORT_NULL;
+		if (pointer) {
+			kr = mach_make_memory_entry_64(mach_task_self(), &size,
+					(mach_vm_address_t)pointer, VM_PROT_READ | MAP_MEM_VM_SHARE,
+					&server_config.fs_prefs_cache_entry, MACH_PORT_NULL);
+			DISPATCH_VERIFY_MIG(kr);
+			dispatch_assume_zero(kr);
+		}
+		if (block) block();
+	});
+}
+
 dispatch_queue_t
 firehose_server_copy_queue(firehose_server_queue_t which)
 {
@@ -1040,7 +1112,7 @@
 		uint64_t bitmap = fbh->fbh_bank.fbb_metadata_bitmap;
 
 		while (bitmap) {
-			uint16_t ref = firehose_bitmap_first_set(bitmap);
+			firehose_chunk_ref_t ref = firehose_bitmap_first_set(bitmap);
 			firehose_chunk_t fbc = firehose_buffer_ref_to_chunk(fb, ref);
 			uint16_t fbc_length = fbc->fc_pos.fcp_next_entry_offs;
 
@@ -1100,7 +1172,7 @@
 	// remove the pages that we flushed already from the bitmap
 	for (; tail != flushed; tail++) {
 		uint16_t idx = tail & FIREHOSE_RING_POS_IDX_MASK;
-		uint16_t ref = fbh_ring[idx] & FIREHOSE_RING_POS_IDX_MASK;
+		firehose_chunk_ref_t ref = fbh_ring[idx] & FIREHOSE_RING_POS_IDX_MASK;
 
 		bitmap &= ~(1ULL << ref);
 	}
@@ -1113,9 +1185,10 @@
 
 	// Then look at all the allocated pages not seen in the ring
 	while (bitmap) {
-		uint16_t ref = firehose_bitmap_first_set(bitmap);
+		firehose_chunk_ref_t ref = firehose_bitmap_first_set(bitmap);
 		firehose_chunk_t fbc = firehose_buffer_ref_to_chunk(fb, ref);
-		uint16_t fbc_length = fbc->fc_pos.fcp_next_entry_offs;
+		firehose_chunk_pos_u fc_pos = fbc->fc_pos;
+		uint16_t fbc_length = fc_pos.fcp_next_entry_offs;
 
 		bitmap &= ~(1ULL << ref);
 		if (fbc->fc_start + fbc_length <= fbc->fc_data) {
@@ -1128,10 +1201,10 @@
 			// so also just ditch it
 			continue;
 		}
-		if (fbc->fc_pos.fcp_flag_io != for_io) {
+		if (fc_pos.fcp_flag_io != for_io) {
 			continue;
 		}
-		snapshot->handler(fc, evt, fbc);
+		snapshot->handler(fc, evt, fbc, fc_pos);
 	}
 }
 
@@ -1139,18 +1212,18 @@
 firehose_snapshot_tickle_clients(firehose_snapshot_t fs, bool for_io)
 {
 	firehose_client_t fc;
-	long n = 0;
+	uint32_t n = 0;
 
 	fs_clients_lock();
 	TAILQ_FOREACH(fc, &server_config.fs_clients, fc_entry) {
-		if (slowpath(fc->fc_memory_corrupted)) {
+		if (unlikely(fc->fc_memory_corrupted)) {
 			continue;
 		}
 		if (!fc->fc_pid) {
 #if TARGET_OS_SIMULATOR
 			continue;
 #endif
-		} else if (!firehose_client_wakeup(fc, 0, for_io)) {
+		} else if (!firehose_client_wakeup(fc, for_io)) {
 			continue;
 		}
 		n++;
@@ -1164,7 +1237,10 @@
 
 	// cheating: equivalent to dispatch_group_enter() n times
 	// without the acquire barriers that we don't need
-	if (n) os_atomic_add2o(fs->fs_group, dg_value, n, relaxed);
+	if (n) {
+		os_atomic_sub2o(fs->fs_group, dg_bits,
+				n * DISPATCH_GROUP_VALUE_INTERVAL, relaxed);
+	}
 }
 
 static void
@@ -1172,7 +1248,8 @@
 {
 	firehose_snapshot_t fs = ctxt;
 
-	fs->handler(NULL, FIREHOSE_SNAPSHOT_EVENT_COMPLETE, NULL);
+	fs->handler(NULL, FIREHOSE_SNAPSHOT_EVENT_COMPLETE, NULL,
+			(firehose_chunk_pos_u){ .fcp_pos = 0 });
 	server_config.fs_snapshot = NULL;
 
 	dispatch_release(fs->fs_group);
@@ -1196,14 +1273,16 @@
 	dispatch_group_async(fs->fs_group, server_config.fs_mem_drain_queue, ^{
 		// start the fs_mem_snapshot, this is what triggers the snapshot
 		// logic from _drain() or handle_death()
-		fs->handler(NULL, FIREHOSE_SNAPSHOT_EVENT_MEM_START, NULL);
+		fs->handler(NULL, FIREHOSE_SNAPSHOT_EVENT_MEM_START, NULL,
+				(firehose_chunk_pos_u){ .fcp_pos = 0 });
 		firehose_snapshot_tickle_clients(fs, false);
 
 		dispatch_group_async(fs->fs_group, server_config.fs_io_drain_queue, ^{
 			// start the fs_io_snapshot, this is what triggers the snapshot
 			// logic from _drain() or handle_death()
 			// 29868879: must always happen after the memory snapshot started
-			fs->handler(NULL, FIREHOSE_SNAPSHOT_EVENT_IO_START, NULL);
+			fs->handler(NULL, FIREHOSE_SNAPSHOT_EVENT_IO_START, NULL,
+					(firehose_chunk_pos_u){ .fcp_pos = 0 });
 			firehose_snapshot_tickle_clients(fs, true);
 
 #if !TARGET_OS_SIMULATOR
@@ -1237,7 +1316,8 @@
 kern_return_t
 firehose_server_register(mach_port_t server_port OS_UNUSED,
 		mach_port_t mem_port, mach_vm_size_t mem_size,
-		mach_port_t comm_recvp, mach_port_t comm_sendp,
+		mach_port_t comm_mem_recvp, mach_port_t comm_io_recvp,
+		mach_port_t comm_sendp,
 		mach_port_t extra_info_port, mach_vm_size_t extra_info_size,
 		audit_token_t atoken)
 {
@@ -1248,17 +1328,22 @@
 		.fcci_version = FIREHOSE_CLIENT_CONNECTED_INFO_VERSION,
 	};
 
+	fc = dispatch_mach_mig_demux_get_context();
+	if (fc != NULL) {
+		return KERN_FAILURE;
+	}
+
 	if (mem_size != sizeof(union firehose_buffer_u)) {
 		return KERN_INVALID_VALUE;
 	}
 
 	/*
-	 * Request a MACH_NOTIFY_NO_SENDERS notification for recvp. That should
-	 * indicate the client going away.
+	 * Request a MACH_NOTIFY_NO_SENDERS notification for the mem_recvp. That
+	 * should indicate the client going away.
 	 */
 	mach_port_t previous = MACH_PORT_NULL;
-	kr = mach_port_request_notification(mach_task_self(), comm_recvp,
-			MACH_NOTIFY_NO_SENDERS, 0, comm_recvp,
+	kr = mach_port_request_notification(mach_task_self(), comm_mem_recvp,
+			MACH_NOTIFY_NO_SENDERS, 0, comm_mem_recvp,
 			MACH_MSG_TYPE_MAKE_SEND_ONCE, &previous);
 	DISPATCH_VERIFY_MIG(kr);
 	if (dispatch_assume_zero(kr)) {
@@ -1275,93 +1360,109 @@
 		return KERN_NO_SPACE;
 	}
 
-	if (extra_info_port && extra_info_size) {
-		mach_vm_address_t addr = 0;
-		kr = mach_vm_map(mach_task_self(), &addr, extra_info_size, 0,
-				VM_FLAGS_ANYWHERE, extra_info_port, 0, FALSE,
-				VM_PROT_READ, VM_PROT_READ, VM_INHERIT_NONE);
-		if (dispatch_assume_zero(kr)) {
-			mach_vm_deallocate(mach_task_self(), base_addr, mem_size);
-			return KERN_NO_SPACE;
+	if (extra_info_port) {
+		if (extra_info_size) {
+			mach_vm_address_t addr = 0;
+			kr = mach_vm_map(mach_task_self(), &addr, extra_info_size, 0,
+					VM_FLAGS_ANYWHERE, extra_info_port, 0, TRUE,
+					VM_PROT_READ, VM_PROT_READ, VM_INHERIT_NONE);
+			if (dispatch_assume_zero(kr)) {
+				mach_vm_deallocate(mach_task_self(), base_addr, mem_size);
+				return KERN_NO_SPACE;
+			}
+			fcci.fcci_data = (void *)(uintptr_t)addr;
+			fcci.fcci_size = (size_t)extra_info_size;
 		}
-		fcci.fcci_data = (void *)(uintptr_t)addr;
-		fcci.fcci_size = (size_t)extra_info_size;
+		firehose_mach_port_send_release(extra_info_port);
 	}
 
-	fc = firehose_client_create((firehose_buffer_t)base_addr,
-			(firehose_token_t)&atoken, comm_recvp, comm_sendp);
-	dispatch_async(server_config.fs_io_drain_queue, ^{
-		firehose_client_resume(fc, &fcci);
-		if (fcci.fcci_size) {
-			vm_deallocate(mach_task_self(), (vm_address_t)fcci.fcci_data,
-					fcci.fcci_size);
-		}
-	});
-
-	if (extra_info_port) firehose_mach_port_send_release(extra_info_port);
 	firehose_mach_port_send_release(mem_port);
+
+	fc = firehose_client_create((firehose_buffer_t)base_addr,
+			(firehose_token_t)&atoken, comm_mem_recvp, comm_io_recvp,
+			comm_sendp);
+	firehose_client_resume(fc, &fcci);
+
+	if (fcci.fcci_size) {
+		vm_deallocate(mach_task_self(), (vm_address_t)fcci.fcci_data,
+				fcci.fcci_size);
+	}
+
 	return KERN_SUCCESS;
 }
 
 kern_return_t
-firehose_server_push_async(mach_port_t server_port OS_UNUSED,
-		qos_class_t qos, boolean_t for_io, boolean_t expects_notifs)
+firehose_server_push_async(mach_port_t server_port,
+		qos_class_t qos DISPATCH_UNUSED)
 {
-	firehose_client_t fc = cur_client_info;
-	pthread_priority_t pp = _pthread_qos_class_encode(qos, 0,
-			_PTHREAD_PRIORITY_ENFORCE_FLAG);
+	firehose_client_t fc = dispatch_mach_mig_demux_get_context();
+
+	if (fc == NULL) {
+		return KERN_FAILURE;
+	}
+
+	bool for_io = (server_port == fc->fc_recvp[FIREHOSE_BUFFER_PUSHPORT_IO]);
 
 	_dispatch_debug("FIREHOSE_PUSH_ASYNC (unique_pid %llx)",
 			firehose_client_get_unique_pid(fc, NULL));
-	if (!slowpath(fc->fc_memory_corrupted)) {
-		if (expects_notifs && !fc->fc_use_notifs) {
-			fc->fc_use_notifs = true;
-		}
-		firehose_client_wakeup(fc, pp, for_io);
+	if (likely(!fc->fc_memory_corrupted)) {
+		firehose_client_wakeup(fc, for_io);
 	}
 	return KERN_SUCCESS;
 }
 
 kern_return_t
-firehose_server_push_and_wait(mach_port_t server_port OS_UNUSED,
-		mach_port_t reply_port, qos_class_t qos, boolean_t for_io,
-		firehose_push_reply_t *push_reply OS_UNUSED,
+firehose_server_push_and_wait(mach_port_t server_port,
+		mach_port_t reply_port, firehose_push_reply_t *push_reply OS_UNUSED,
 		boolean_t *quarantinedOut OS_UNUSED)
 {
-	firehose_client_t fc = cur_client_info;
-	dispatch_block_flags_t flags = DISPATCH_BLOCK_ENFORCE_QOS_CLASS;
-	dispatch_block_t block;
-	dispatch_queue_t q;
+	firehose_client_t fc = dispatch_mach_mig_demux_get_context();
+
+	if (fc == NULL) {
+		return KERN_FAILURE;
+	}
+
+	bool for_io = (server_port == fc->fc_recvp[FIREHOSE_BUFFER_PUSHPORT_IO]);
 
 	_dispatch_debug("FIREHOSE_PUSH (unique_pid %llx)",
 			firehose_client_get_unique_pid(fc, NULL));
 
-	if (slowpath(fc->fc_memory_corrupted)) {
+	if (unlikely(fc->fc_memory_corrupted)) {
 		firehose_client_mark_corrupted(fc, reply_port);
 		return MIG_NO_REPLY;
 	}
 
+	dispatch_queue_t q;
 	if (for_io) {
 		q = server_config.fs_io_drain_queue;
 	} else {
 		q = server_config.fs_mem_drain_queue;
 	}
+	dispatch_assert_queue(q);
 
-	block = dispatch_block_create_with_qos_class(flags, qos, 0, ^{
-		firehose_client_drain_one(fc, reply_port,
-				for_io ? FIREHOSE_DRAIN_FOR_IO : 0);
-	});
-	dispatch_async(q, block);
-	_Block_release(block);
+	firehose_client_drain_one(fc, reply_port,
+			for_io ? FIREHOSE_DRAIN_FOR_IO : 0);
+
 	return MIG_NO_REPLY;
 }
 
-static void
-firehose_server_demux(firehose_client_t fc, mach_msg_header_t *msg_hdr)
+kern_return_t
+firehose_server_get_logging_prefs(mach_port_t server_port OS_UNUSED,
+		mach_port_t *mem_port, mach_vm_size_t *prefs_size)
 {
-	const size_t reply_size =
-			sizeof(union __ReplyUnion__firehose_server_firehose_subsystem);
+	*mem_port = server_config.fs_prefs_cache_entry;
+	*prefs_size = (mach_vm_size_t)server_config.fs_prefs_cache_size;
+	return KERN_SUCCESS;
+}
 
-	cur_client_info = fc;
-	firehose_mig_server(firehose_server, reply_size, msg_hdr);
+kern_return_t
+firehose_server_should_send_strings(mach_port_t server_port OS_UNUSED,
+		boolean_t *needs_strings)
+{
+	firehose_client_t fc = dispatch_mach_mig_demux_get_context();
+	if (fc) {
+		*needs_strings = !fc->fc_strings_cached;
+		return KERN_SUCCESS;
+	}
+	return KERN_FAILURE;
 }
diff --git a/src/firehose/firehose_server_internal.h b/src/firehose/firehose_server_internal.h
index 13f52b8..571cc2a 100644
--- a/src/firehose/firehose_server_internal.h
+++ b/src/firehose/firehose_server_internal.h
@@ -21,7 +21,7 @@
 #ifndef __FIREHOSE_SERVER_INTERNAL__
 #define __FIREHOSE_SERVER_INTERNAL__
 
-OS_OBJECT_CLASS_DECL(firehose_client, object);
+OS_OBJECT_CLASS_DECL(firehose_client);
 #define FIREHOSE_CLIENT_CLASS OS_OBJECT_VTABLE(firehose_client)
 
 typedef struct firehose_snapshot_s *firehose_snapshot_t;
@@ -44,39 +44,41 @@
 	uint64_t volatile	fc_io_sent_flushed_pos;
 	uint64_t volatile	fc_io_flushed_pos;
 
-#define FC_STATE_ENQUEUED(for_io)      (0x0001u << (for_io))
+#define FC_STATE_ENQUEUED(for_io)      (uint16_t)(0x0001u << (for_io))
 #define FC_STATE_MEM_ENQUEUED           0x0001
 #define FC_STATE_IO_ENQUEUED            0x0002
 
-#define FC_STATE_CANCELING(for_io)     (0x0010u << (for_io))
+#define FC_STATE_CANCELING(for_io)     (uint16_t)(0x0010u << (for_io))
 #define FC_STATE_MEM_CANCELING          0x0010
 #define FC_STATE_IO_CANCELING           0x0020
 
-#define FC_STATE_CANCELED(for_io)      (0x0100u << (for_io))
+#define FC_STATE_CANCELED(for_io)      (uint16_t)(0x0100u << (for_io))
 #define FC_STATE_MEM_CANCELED           0x0100
 #define FC_STATE_IO_CANCELED            0x0200
 #define FC_STATE_CANCELED_MASK          0x0300
 
-	uintptr_t volatile	fc_state;
-
 	void *volatile		fc_ctxt;
 
 	union {
-		dispatch_mach_t	fc_mach_channel;
+		dispatch_mach_t	fc_mach_channel[FIREHOSE_BUFFER_NPUSHPORTS];
 		dispatch_source_t fc_kernel_source;
 	};
-	mach_port_t			fc_recvp;
+	mach_port_t			fc_recvp[FIREHOSE_BUFFER_NPUSHPORTS];
 	mach_port_t			fc_sendp;
 	os_unfair_lock      fc_lock;
 	pid_t				fc_pid;
 	int					fc_pidversion;
 	uid_t				fc_euid;
-	bool				fc_use_notifs;
-	bool				fc_memory_corrupted;
-	bool				fc_needs_io_snapshot;
-	bool				fc_needs_mem_snapshot;
-	bool				fc_quarantined;
-};
+	os_atomic(uint16_t)	fc_state;
+	os_atomic(uint8_t)	fc_mach_channel_refcnt;
+	// These bits are mutated from different locking domains, and so cannot be
+	// safely consolidated into a bit-field.
+	bool volatile		fc_strings_cached;
+	bool volatile		fc_memory_corrupted;
+	bool volatile		fc_needs_io_snapshot;
+	bool volatile		fc_needs_mem_snapshot;
+	bool volatile		fc_quarantined;
+} DISPATCH_ATOMIC64_ALIGN;
 
 void
 _firehose_client_xref_dispose(struct firehose_client_s *fc);
diff --git a/src/init.c b/src/init.c
index e2131ca..5b62992 100644
--- a/src/init.c
+++ b/src/init.c
@@ -32,6 +32,7 @@
 #pragma mark -
 #pragma mark dispatch_init
 
+
 #if USE_LIBDISPATCH_INIT_CONSTRUCTOR
 DISPATCH_NOTHROW __attribute__((constructor))
 void
@@ -149,14 +150,15 @@
 dispatch_mach_t _voucher_activity_debug_channel;
 #endif
 #if HAVE_PTHREAD_WORKQUEUE_QOS && DISPATCH_DEBUG
-int _dispatch_set_qos_class_enabled;
+bool _dispatch_set_qos_class_enabled;
 #endif
 #if DISPATCH_USE_KEVENT_WORKQUEUE && DISPATCH_USE_MGR_THREAD
-int _dispatch_kevent_workqueue_enabled;
+bool _dispatch_kevent_workqueue_enabled = 1;
 #endif
 
 DISPATCH_HW_CONFIG();
 uint8_t _dispatch_unsafe_fork;
+uint8_t _dispatch_mode;
 bool _dispatch_child_of_unsafe_fork;
 #if DISPATCH_USE_MEMORYPRESSURE_SOURCE
 bool _dispatch_memory_warn;
@@ -197,133 +199,213 @@
 	.dqo_priority_size = 0,
 };
 
+#if TARGET_OS_MAC
+const struct dispatch_allocator_layout_s dispatch_allocator_layout = {
+	.dal_version = 1,
+#if DISPATCH_ALLOCATOR
+	.dal_allocator_zone = &_dispatch_main_heap,
+	.dal_deferred_free_isa = &_dispatch_main_heap,
+	.dal_allocation_size = DISPATCH_CONTINUATION_SIZE,
+	.dal_magazine_size = BYTES_PER_MAGAZINE,
+#if PACK_FIRST_PAGE_WITH_CONTINUATIONS
+	.dal_first_allocation_offset =
+			offsetof(struct dispatch_magazine_s, fp_conts),
+#else
+	.dal_first_allocation_offset =
+			offsetof(struct dispatch_magazine_s, conts),
+#endif
+	.dal_allocation_isa_offset =
+			offsetof(struct dispatch_continuation_s, dc_flags),
+	.dal_enumerator = &_dispatch_allocator_enumerate,
+#endif // DISPATCH_ALLOCATOR
+};
+#endif
+
 #if DISPATCH_USE_DIRECT_TSD
 const struct dispatch_tsd_indexes_s dispatch_tsd_indexes = {
-	.dti_version = 2,
+	.dti_version = 3,
 	.dti_queue_index = dispatch_queue_key,
 	.dti_voucher_index = dispatch_voucher_key,
 	.dti_qos_class_index = dispatch_priority_key,
+	.dti_continuation_cache_index = dispatch_cache_key,
 };
 #endif // DISPATCH_USE_DIRECT_TSD
 
 // 6618342 Contact the team that owns the Instrument DTrace probe before
 //         renaming this symbol
-DISPATCH_CACHELINE_ALIGN
-struct dispatch_queue_s _dispatch_main_q = {
+struct dispatch_queue_static_s _dispatch_main_q = {
 	DISPATCH_GLOBAL_OBJECT_HEADER(queue_main),
 #if !DISPATCH_USE_RESOLVERS
-	.do_targetq = &_dispatch_root_queues[
-			DISPATCH_ROOT_QUEUE_IDX_DEFAULT_QOS_OVERCOMMIT],
+	.do_targetq = _dispatch_get_default_queue(true),
 #endif
 	.dq_state = DISPATCH_QUEUE_STATE_INIT_VALUE(1) |
 			DISPATCH_QUEUE_ROLE_BASE_ANON,
 	.dq_label = "com.apple.main-thread",
-	.dq_atomic_flags = DQF_THREAD_BOUND | DQF_CANNOT_TRYSYNC | DQF_WIDTH(1),
+	.dq_atomic_flags = DQF_THREAD_BOUND | DQF_WIDTH(1),
 	.dq_serialnum = 1,
 };
 
+#if DISPATCH_USE_MGR_THREAD && DISPATCH_USE_PTHREAD_ROOT_QUEUES
+static struct dispatch_pthread_root_queue_context_s
+_dispatch_mgr_root_queue_pthread_context;
+
+struct dispatch_queue_global_s _dispatch_mgr_root_queue = {
+	DISPATCH_GLOBAL_OBJECT_HEADER(queue_global),
+	.dq_state = DISPATCH_ROOT_QUEUE_STATE_INIT_VALUE,
+	.do_ctxt = &_dispatch_mgr_root_queue_pthread_context,
+	.dq_label = "com.apple.root.libdispatch-manager",
+	.dq_atomic_flags = DQF_WIDTH(DISPATCH_QUEUE_WIDTH_POOL),
+	.dq_priority = DISPATCH_PRIORITY_FLAG_MANAGER |
+			DISPATCH_PRIORITY_SATURATED_OVERRIDE,
+	.dq_serialnum = 3,
+	.dgq_thread_pool_size = 1,
+};
+#else
+#define _dispatch_mgr_root_queue _dispatch_root_queues[\
+		DISPATCH_ROOT_QUEUE_IDX_USER_INTERACTIVE_QOS_OVERCOMMIT]
+#endif
+
+// 6618342 Contact the team that owns the Instrument DTrace probe before
+//         renaming this symbol
+struct dispatch_queue_static_s _dispatch_mgr_q = {
+	DISPATCH_GLOBAL_OBJECT_HEADER(queue_mgr),
+	.dq_state = DISPATCH_QUEUE_STATE_INIT_VALUE(1) |
+			DISPATCH_QUEUE_ROLE_BASE_ANON,
+	.do_ctxt = (void *)-1,
+	.do_targetq = _dispatch_mgr_root_queue._as_dq,
+	.dq_label = "com.apple.libdispatch-manager",
+	.dq_atomic_flags = DQF_WIDTH(1),
+	.dq_priority = DISPATCH_PRIORITY_FLAG_MANAGER |
+			DISPATCH_PRIORITY_SATURATED_OVERRIDE,
+	.dq_serialnum = 2,
+};
+
+#if DISPATCH_USE_INTERNAL_WORKQUEUE
+static struct dispatch_pthread_root_queue_context_s
+		_dispatch_pthread_root_queue_contexts[DISPATCH_ROOT_QUEUE_COUNT];
+#define _dispatch_root_queue_ctxt(n) &_dispatch_pthread_root_queue_contexts[n]
+#else
+#define _dispatch_root_queue_ctxt(n) NULL
+#endif // DISPATCH_USE_INTERNAL_WORKQUEUE
+
+// 6618342 Contact the team that owns the Instrument DTrace probe before
+//         renaming this symbol
+struct dispatch_queue_global_s _dispatch_root_queues[] = {
+#define _DISPATCH_ROOT_QUEUE_IDX(n, flags) \
+		((flags & DISPATCH_PRIORITY_FLAG_OVERCOMMIT) ? \
+		DISPATCH_ROOT_QUEUE_IDX_##n##_QOS_OVERCOMMIT : \
+		DISPATCH_ROOT_QUEUE_IDX_##n##_QOS)
+#define _DISPATCH_ROOT_QUEUE_ENTRY(n, flags, ...) \
+	[_DISPATCH_ROOT_QUEUE_IDX(n, flags)] = { \
+		DISPATCH_GLOBAL_OBJECT_HEADER(queue_global), \
+		.dq_state = DISPATCH_ROOT_QUEUE_STATE_INIT_VALUE, \
+		.do_ctxt = _dispatch_root_queue_ctxt(_DISPATCH_ROOT_QUEUE_IDX(n, flags)), \
+		.dq_atomic_flags = DQF_WIDTH(DISPATCH_QUEUE_WIDTH_POOL), \
+		.dq_priority = flags | ((flags & DISPATCH_PRIORITY_FLAG_FALLBACK) ? \
+				_dispatch_priority_make_fallback(DISPATCH_QOS_##n) : \
+				_dispatch_priority_make(DISPATCH_QOS_##n, 0)), \
+		__VA_ARGS__ \
+	}
+	_DISPATCH_ROOT_QUEUE_ENTRY(MAINTENANCE, 0,
+		.dq_label = "com.apple.root.maintenance-qos",
+		.dq_serialnum = 4,
+	),
+	_DISPATCH_ROOT_QUEUE_ENTRY(MAINTENANCE, DISPATCH_PRIORITY_FLAG_OVERCOMMIT,
+		.dq_label = "com.apple.root.maintenance-qos.overcommit",
+		.dq_serialnum = 5,
+	),
+	_DISPATCH_ROOT_QUEUE_ENTRY(BACKGROUND, 0,
+		.dq_label = "com.apple.root.background-qos",
+		.dq_serialnum = 6,
+	),
+	_DISPATCH_ROOT_QUEUE_ENTRY(BACKGROUND, DISPATCH_PRIORITY_FLAG_OVERCOMMIT,
+		.dq_label = "com.apple.root.background-qos.overcommit",
+		.dq_serialnum = 7,
+	),
+	_DISPATCH_ROOT_QUEUE_ENTRY(UTILITY, 0,
+		.dq_label = "com.apple.root.utility-qos",
+		.dq_serialnum = 8,
+	),
+	_DISPATCH_ROOT_QUEUE_ENTRY(UTILITY, DISPATCH_PRIORITY_FLAG_OVERCOMMIT,
+		.dq_label = "com.apple.root.utility-qos.overcommit",
+		.dq_serialnum = 9,
+	),
+	_DISPATCH_ROOT_QUEUE_ENTRY(DEFAULT, DISPATCH_PRIORITY_FLAG_FALLBACK,
+		.dq_label = "com.apple.root.default-qos",
+		.dq_serialnum = 10,
+	),
+	_DISPATCH_ROOT_QUEUE_ENTRY(DEFAULT,
+			DISPATCH_PRIORITY_FLAG_FALLBACK | DISPATCH_PRIORITY_FLAG_OVERCOMMIT,
+		.dq_label = "com.apple.root.default-qos.overcommit",
+		.dq_serialnum = 11,
+	),
+	_DISPATCH_ROOT_QUEUE_ENTRY(USER_INITIATED, 0,
+		.dq_label = "com.apple.root.user-initiated-qos",
+		.dq_serialnum = 12,
+	),
+	_DISPATCH_ROOT_QUEUE_ENTRY(USER_INITIATED, DISPATCH_PRIORITY_FLAG_OVERCOMMIT,
+		.dq_label = "com.apple.root.user-initiated-qos.overcommit",
+		.dq_serialnum = 13,
+	),
+	_DISPATCH_ROOT_QUEUE_ENTRY(USER_INTERACTIVE, 0,
+		.dq_label = "com.apple.root.user-interactive-qos",
+		.dq_serialnum = 14,
+	),
+	_DISPATCH_ROOT_QUEUE_ENTRY(USER_INTERACTIVE, DISPATCH_PRIORITY_FLAG_OVERCOMMIT,
+		.dq_label = "com.apple.root.user-interactive-qos.overcommit",
+		.dq_serialnum = 15,
+	),
+};
+
+unsigned long volatile _dispatch_queue_serial_numbers =
+		DISPATCH_QUEUE_SERIAL_NUMBER_INIT;
+
+
+dispatch_queue_global_t
+dispatch_get_global_queue(intptr_t priority, uintptr_t flags)
+{
+	dispatch_assert(countof(_dispatch_root_queues) ==
+			DISPATCH_ROOT_QUEUE_COUNT);
+
+	if (flags & ~(unsigned long)DISPATCH_QUEUE_OVERCOMMIT) {
+		return DISPATCH_BAD_INPUT;
+	}
+	dispatch_qos_t qos = _dispatch_qos_from_queue_priority(priority);
+#if !HAVE_PTHREAD_WORKQUEUE_QOS
+	if (qos == QOS_CLASS_MAINTENANCE) {
+		qos = DISPATCH_QOS_BACKGROUND;
+	} else if (qos == QOS_CLASS_USER_INTERACTIVE) {
+		qos = DISPATCH_QOS_USER_INITIATED;
+	}
+#endif
+	if (qos == DISPATCH_QOS_UNSPECIFIED) {
+		return DISPATCH_BAD_INPUT;
+	}
+	return _dispatch_get_root_queue(qos, flags & DISPATCH_QUEUE_OVERCOMMIT);
+}
+
+dispatch_queue_t
+dispatch_get_current_queue(void)
+{
+	return _dispatch_queue_get_current_or_default();
+}
+
 #pragma mark -
 #pragma mark dispatch_queue_attr_t
 
-#define DISPATCH_QUEUE_ATTR_INIT(qos, prio, overcommit, freq, concurrent, \
-			inactive) \
-	{ \
-		DISPATCH_GLOBAL_OBJECT_HEADER(queue_attr), \
-		.dqa_qos_and_relpri = (_dispatch_priority_make(qos, prio) & \
-				DISPATCH_PRIORITY_REQUESTED_MASK), \
-		.dqa_overcommit = _dispatch_queue_attr_overcommit_##overcommit, \
-		.dqa_autorelease_frequency = DISPATCH_AUTORELEASE_FREQUENCY_##freq, \
-		.dqa_concurrent = (concurrent), \
-		.dqa_inactive = (inactive), \
-	}
-
-#define DISPATCH_QUEUE_ATTR_ACTIVE_INIT(qos, prio, overcommit, freq, \
-			concurrent) \
-	{ \
-		[DQA_INDEX_ACTIVE] = DISPATCH_QUEUE_ATTR_INIT( \
-				qos, prio, overcommit, freq, concurrent, false), \
-		[DQA_INDEX_INACTIVE] = DISPATCH_QUEUE_ATTR_INIT( \
-				qos, prio, overcommit, freq, concurrent, true), \
-	}
-
-#define DISPATCH_QUEUE_ATTR_OVERCOMMIT_INIT(qos, prio, overcommit) \
-	{ \
-		[DQA_INDEX_AUTORELEASE_FREQUENCY_INHERIT][DQA_INDEX_CONCURRENT] = \
-				DISPATCH_QUEUE_ATTR_ACTIVE_INIT( \
-						qos, prio, overcommit, INHERIT, 1), \
-		[DQA_INDEX_AUTORELEASE_FREQUENCY_INHERIT][DQA_INDEX_SERIAL] = \
-				DISPATCH_QUEUE_ATTR_ACTIVE_INIT( \
-						qos, prio, overcommit, INHERIT, 0), \
-		[DQA_INDEX_AUTORELEASE_FREQUENCY_WORK_ITEM][DQA_INDEX_CONCURRENT] = \
-				DISPATCH_QUEUE_ATTR_ACTIVE_INIT( \
-						qos, prio, overcommit, WORK_ITEM, 1), \
-		[DQA_INDEX_AUTORELEASE_FREQUENCY_WORK_ITEM][DQA_INDEX_SERIAL] = \
-				DISPATCH_QUEUE_ATTR_ACTIVE_INIT( \
-						qos, prio, overcommit, WORK_ITEM, 0), \
-		[DQA_INDEX_AUTORELEASE_FREQUENCY_NEVER][DQA_INDEX_CONCURRENT] = \
-				DISPATCH_QUEUE_ATTR_ACTIVE_INIT( \
-						qos, prio, overcommit, NEVER, 1), \
-		[DQA_INDEX_AUTORELEASE_FREQUENCY_NEVER][DQA_INDEX_SERIAL] = \
-				DISPATCH_QUEUE_ATTR_ACTIVE_INIT(\
-						qos, prio, overcommit, NEVER, 0), \
-	}
-
-#define DISPATCH_QUEUE_ATTR_PRIO_INITIALIZER(qos, prio) \
-	[prio] = { \
-		[DQA_INDEX_UNSPECIFIED_OVERCOMMIT] = \
-				DISPATCH_QUEUE_ATTR_OVERCOMMIT_INIT(qos, -(prio), unspecified),\
-		[DQA_INDEX_NON_OVERCOMMIT] = \
-				DISPATCH_QUEUE_ATTR_OVERCOMMIT_INIT(qos, -(prio), disabled), \
-		[DQA_INDEX_OVERCOMMIT] = \
-				DISPATCH_QUEUE_ATTR_OVERCOMMIT_INIT(qos, -(prio), enabled), \
-	}
-
-#define DISPATCH_QUEUE_ATTR_PRIO_INIT(qos) \
-	{ \
-		DISPATCH_QUEUE_ATTR_PRIO_INITIALIZER(qos, 0), \
-		DISPATCH_QUEUE_ATTR_PRIO_INITIALIZER(qos, 1), \
-		DISPATCH_QUEUE_ATTR_PRIO_INITIALIZER(qos, 2), \
-		DISPATCH_QUEUE_ATTR_PRIO_INITIALIZER(qos, 3), \
-		DISPATCH_QUEUE_ATTR_PRIO_INITIALIZER(qos, 4), \
-		DISPATCH_QUEUE_ATTR_PRIO_INITIALIZER(qos, 5), \
-		DISPATCH_QUEUE_ATTR_PRIO_INITIALIZER(qos, 6), \
-		DISPATCH_QUEUE_ATTR_PRIO_INITIALIZER(qos, 7), \
-		DISPATCH_QUEUE_ATTR_PRIO_INITIALIZER(qos, 8), \
-		DISPATCH_QUEUE_ATTR_PRIO_INITIALIZER(qos, 9), \
-		DISPATCH_QUEUE_ATTR_PRIO_INITIALIZER(qos, 10), \
-		DISPATCH_QUEUE_ATTR_PRIO_INITIALIZER(qos, 11), \
-		DISPATCH_QUEUE_ATTR_PRIO_INITIALIZER(qos, 12), \
-		DISPATCH_QUEUE_ATTR_PRIO_INITIALIZER(qos, 13), \
-		DISPATCH_QUEUE_ATTR_PRIO_INITIALIZER(qos, 14), \
-		DISPATCH_QUEUE_ATTR_PRIO_INITIALIZER(qos, 15), \
-	}
-
-#define DISPATCH_QUEUE_ATTR_QOS_INITIALIZER(qos) \
-	[DQA_INDEX_QOS_CLASS_##qos] = \
-			DISPATCH_QUEUE_ATTR_PRIO_INIT(DISPATCH_QOS_##qos)
-
 // DISPATCH_QUEUE_CONCURRENT resp. _dispatch_queue_attr_concurrent is aliased
-// to array member [0][0][0][0][0][0] and their properties must match!
-const struct dispatch_queue_attr_s _dispatch_queue_attrs[]
-		[DISPATCH_QUEUE_ATTR_PRIO_COUNT]
-		[DISPATCH_QUEUE_ATTR_OVERCOMMIT_COUNT]
-		[DISPATCH_QUEUE_ATTR_AUTORELEASE_FREQUENCY_COUNT]
-		[DISPATCH_QUEUE_ATTR_CONCURRENCY_COUNT]
-		[DISPATCH_QUEUE_ATTR_INACTIVE_COUNT] = {
-	DISPATCH_QUEUE_ATTR_QOS_INITIALIZER(UNSPECIFIED),
-	DISPATCH_QUEUE_ATTR_QOS_INITIALIZER(MAINTENANCE),
-	DISPATCH_QUEUE_ATTR_QOS_INITIALIZER(BACKGROUND),
-	DISPATCH_QUEUE_ATTR_QOS_INITIALIZER(UTILITY),
-	DISPATCH_QUEUE_ATTR_QOS_INITIALIZER(DEFAULT),
-	DISPATCH_QUEUE_ATTR_QOS_INITIALIZER(USER_INITIATED),
-	DISPATCH_QUEUE_ATTR_QOS_INITIALIZER(USER_INTERACTIVE),
+// to array member [0] and their properties must match!
+const struct dispatch_queue_attr_s _dispatch_queue_attrs[] = {
+	[0 ... DISPATCH_QUEUE_ATTR_COUNT - 1] = {
+		DISPATCH_GLOBAL_OBJECT_HEADER(queue_attr),
+	},
 };
 
 #if DISPATCH_VARIANT_STATIC
 // <rdar://problem/16778703>
-struct dispatch_queue_attr_s _dispatch_queue_attr_concurrent =
-	DISPATCH_QUEUE_ATTR_INIT(QOS_CLASS_UNSPECIFIED, 0,
-			unspecified, INHERIT, 1, false);
+struct dispatch_queue_attr_s _dispatch_queue_attr_concurrent = {
+	DISPATCH_GLOBAL_OBJECT_HEADER(queue_attr),
+};
 #endif // DISPATCH_VARIANT_STATIC
 
 // _dispatch_queue_attr_concurrent is aliased using libdispatch.aliases
@@ -334,190 +416,366 @@
 	__attribute__((__alias__("_dispatch_queue_attrs")));
 #endif
 
+dispatch_queue_attr_info_t
+_dispatch_queue_attr_to_info(dispatch_queue_attr_t dqa)
+{
+	dispatch_queue_attr_info_t dqai = { };
+
+	if (!dqa) return dqai;
+
+#if DISPATCH_VARIANT_STATIC
+	if (dqa == &_dispatch_queue_attr_concurrent) {
+		dqai.dqai_concurrent = true;
+		return dqai;
+	}
+#endif
+
+	if (dqa < _dispatch_queue_attrs ||
+			dqa >= &_dispatch_queue_attrs[DISPATCH_QUEUE_ATTR_COUNT]) {
+#ifndef __APPLE__
+		if (memcmp(dqa, &_dispatch_queue_attrs[0],
+				sizeof(struct dispatch_queue_attr_s)) == 0) {
+			dqa = (dispatch_queue_attr_t)&_dispatch_queue_attrs[0];
+		} else
+#endif // __APPLE__
+		DISPATCH_CLIENT_CRASH(dqa->do_vtable, "Invalid queue attribute");
+	}
+
+	size_t idx = (size_t)(dqa - _dispatch_queue_attrs);
+
+	dqai.dqai_inactive = (idx % DISPATCH_QUEUE_ATTR_INACTIVE_COUNT);
+	idx /= DISPATCH_QUEUE_ATTR_INACTIVE_COUNT;
+
+	dqai.dqai_concurrent = !(idx % DISPATCH_QUEUE_ATTR_CONCURRENCY_COUNT);
+	idx /= DISPATCH_QUEUE_ATTR_CONCURRENCY_COUNT;
+
+	dqai.dqai_relpri = -(int)(idx % DISPATCH_QUEUE_ATTR_PRIO_COUNT);
+	idx /= DISPATCH_QUEUE_ATTR_PRIO_COUNT;
+
+	dqai.dqai_qos = idx % DISPATCH_QUEUE_ATTR_QOS_COUNT;
+	idx /= DISPATCH_QUEUE_ATTR_QOS_COUNT;
+
+	dqai.dqai_autorelease_frequency =
+			idx % DISPATCH_QUEUE_ATTR_AUTORELEASE_FREQUENCY_COUNT;
+	idx /= DISPATCH_QUEUE_ATTR_AUTORELEASE_FREQUENCY_COUNT;
+
+	dqai.dqai_overcommit = idx % DISPATCH_QUEUE_ATTR_OVERCOMMIT_COUNT;
+	idx /= DISPATCH_QUEUE_ATTR_OVERCOMMIT_COUNT;
+
+	return dqai;
+}
+
+static dispatch_queue_attr_t
+_dispatch_queue_attr_from_info(dispatch_queue_attr_info_t dqai)
+{
+	size_t idx = 0;
+
+	idx *= DISPATCH_QUEUE_ATTR_OVERCOMMIT_COUNT;
+	idx += dqai.dqai_overcommit;
+
+	idx *= DISPATCH_QUEUE_ATTR_AUTORELEASE_FREQUENCY_COUNT;
+	idx += dqai.dqai_autorelease_frequency;
+
+	idx *= DISPATCH_QUEUE_ATTR_QOS_COUNT;
+	idx += dqai.dqai_qos;
+
+	idx *= DISPATCH_QUEUE_ATTR_PRIO_COUNT;
+	idx += (size_t)(-dqai.dqai_relpri);
+
+	idx *= DISPATCH_QUEUE_ATTR_CONCURRENCY_COUNT;
+	idx += !dqai.dqai_concurrent;
+
+	idx *= DISPATCH_QUEUE_ATTR_INACTIVE_COUNT;
+	idx += dqai.dqai_inactive;
+
+	return (dispatch_queue_attr_t)&_dispatch_queue_attrs[idx];
+}
+
+dispatch_queue_attr_t
+dispatch_queue_attr_make_with_qos_class(dispatch_queue_attr_t dqa,
+		dispatch_qos_class_t qos_class, int relpri)
+{
+	if (!_dispatch_qos_class_valid(qos_class, relpri)) {
+		return (dispatch_queue_attr_t)dqa;
+	}
+	dispatch_queue_attr_info_t dqai = _dispatch_queue_attr_to_info(dqa);
+	dqai.dqai_qos = _dispatch_qos_from_qos_class(qos_class);
+	dqai.dqai_relpri = relpri;
+	return _dispatch_queue_attr_from_info(dqai);
+}
+
+dispatch_queue_attr_t
+dispatch_queue_attr_make_initially_inactive(dispatch_queue_attr_t dqa)
+{
+	dispatch_queue_attr_info_t dqai = _dispatch_queue_attr_to_info(dqa);
+	dqai.dqai_inactive = true;
+	return _dispatch_queue_attr_from_info(dqai);
+}
+
+dispatch_queue_attr_t
+dispatch_queue_attr_make_with_overcommit(dispatch_queue_attr_t dqa,
+		bool overcommit)
+{
+	dispatch_queue_attr_info_t dqai = _dispatch_queue_attr_to_info(dqa);
+	if (overcommit) {
+		dqai.dqai_overcommit = _dispatch_queue_attr_overcommit_enabled;
+	} else {
+		dqai.dqai_overcommit = _dispatch_queue_attr_overcommit_disabled;
+	}
+	return _dispatch_queue_attr_from_info(dqai);
+}
+
+dispatch_queue_attr_t
+dispatch_queue_attr_make_with_autorelease_frequency(dispatch_queue_attr_t dqa,
+		dispatch_autorelease_frequency_t frequency)
+{
+	switch (frequency) {
+	case DISPATCH_AUTORELEASE_FREQUENCY_INHERIT:
+	case DISPATCH_AUTORELEASE_FREQUENCY_WORK_ITEM:
+	case DISPATCH_AUTORELEASE_FREQUENCY_NEVER:
+		break;
+	default:
+		return (dispatch_queue_attr_t)dqa;
+	}
+	dispatch_queue_attr_info_t dqai = _dispatch_queue_attr_to_info(dqa);
+	dqai.dqai_autorelease_frequency = (uint16_t)frequency;
+	return _dispatch_queue_attr_from_info(dqai);
+}
+
 #pragma mark -
 #pragma mark dispatch_vtables
 
+DISPATCH_NOINLINE
+static void
+_dispatch_object_no_dispose(dispatch_object_t dou,
+		DISPATCH_UNUSED bool *allow_free)
+{
+	DISPATCH_INTERNAL_CRASH(dx_type(dou._do), "do_dispose called");
+}
+
+DISPATCH_NOINLINE
+static size_t
+_dispatch_object_missing_debug(DISPATCH_UNUSED dispatch_object_t dou,
+		char *buf, size_t bufsiz)
+{
+	return strlcpy(buf, "missing do_debug vtable slot: ", bufsiz);
+}
+
+DISPATCH_NOINLINE
+static void
+_dispatch_object_no_invoke(dispatch_object_t dou,
+		DISPATCH_UNUSED dispatch_invoke_context_t dic,
+		DISPATCH_UNUSED dispatch_invoke_flags_t flags)
+{
+	DISPATCH_INTERNAL_CRASH(dx_type(dou._do), "do_invoke called");
+}
+
+/*
+ * Dispatch object cluster
+ */
+
 DISPATCH_VTABLE_INSTANCE(semaphore,
-	.do_type = DISPATCH_SEMAPHORE_TYPE,
-	.do_kind = "semaphore",
-	.do_dispose = _dispatch_semaphore_dispose,
-	.do_debug = _dispatch_semaphore_debug,
+	.do_type        = DISPATCH_SEMAPHORE_TYPE,
+	.do_dispose     = _dispatch_semaphore_dispose,
+	.do_debug       = _dispatch_semaphore_debug,
+	.do_invoke      = _dispatch_object_no_invoke,
 );
 
 DISPATCH_VTABLE_INSTANCE(group,
-	.do_type = DISPATCH_GROUP_TYPE,
-	.do_kind = "group",
-	.do_dispose = _dispatch_group_dispose,
-	.do_debug = _dispatch_group_debug,
+	.do_type        = DISPATCH_GROUP_TYPE,
+	.do_dispose     = _dispatch_group_dispose,
+	.do_debug       = _dispatch_group_debug,
+	.do_invoke      = _dispatch_object_no_invoke,
 );
 
-DISPATCH_VTABLE_INSTANCE(queue,
-	.do_type = DISPATCH_QUEUE_LEGACY_TYPE,
-	.do_kind = "queue",
-	.do_dispose = _dispatch_queue_dispose,
-	.do_suspend = _dispatch_queue_suspend,
-	.do_resume = _dispatch_queue_resume,
-	.do_push = _dispatch_queue_push,
-	.do_invoke = _dispatch_queue_invoke,
-	.do_wakeup = _dispatch_queue_wakeup,
-	.do_debug = dispatch_queue_debug,
-	.do_set_targetq = _dispatch_queue_set_target_queue,
+#if !DISPATCH_DATA_IS_BRIDGED_TO_NSDATA
+DISPATCH_VTABLE_INSTANCE(data,
+	.do_type        = DISPATCH_DATA_TYPE,
+	.do_dispose     = _dispatch_data_dispose,
+	.do_debug       = _dispatch_data_debug,
+	.do_invoke      = _dispatch_object_no_invoke,
 );
-
-DISPATCH_VTABLE_SUBCLASS_INSTANCE(queue_serial, queue,
-	.do_type = DISPATCH_QUEUE_SERIAL_TYPE,
-	.do_kind = "serial-queue",
-	.do_dispose = _dispatch_queue_dispose,
-	.do_suspend = _dispatch_queue_suspend,
-	.do_resume = _dispatch_queue_resume,
-	.do_finalize_activation = _dispatch_queue_finalize_activation,
-	.do_push = _dispatch_queue_push,
-	.do_invoke = _dispatch_queue_invoke,
-	.do_wakeup = _dispatch_queue_wakeup,
-	.do_debug = dispatch_queue_debug,
-	.do_set_targetq = _dispatch_queue_set_target_queue,
-);
-
-DISPATCH_VTABLE_SUBCLASS_INSTANCE(queue_concurrent, queue,
-	.do_type = DISPATCH_QUEUE_CONCURRENT_TYPE,
-	.do_kind = "concurrent-queue",
-	.do_dispose = _dispatch_queue_dispose,
-	.do_suspend = _dispatch_queue_suspend,
-	.do_resume = _dispatch_queue_resume,
-	.do_finalize_activation = _dispatch_queue_finalize_activation,
-	.do_push = _dispatch_queue_push,
-	.do_invoke = _dispatch_queue_invoke,
-	.do_wakeup = _dispatch_queue_wakeup,
-	.do_debug = dispatch_queue_debug,
-	.do_set_targetq = _dispatch_queue_set_target_queue,
-);
-
-
-DISPATCH_VTABLE_SUBCLASS_INSTANCE(queue_root, queue,
-	.do_type = DISPATCH_QUEUE_GLOBAL_ROOT_TYPE,
-	.do_kind = "global-queue",
-	.do_dispose = _dispatch_pthread_root_queue_dispose,
-	.do_push = _dispatch_root_queue_push,
-	.do_invoke = NULL,
-	.do_wakeup = _dispatch_root_queue_wakeup,
-	.do_debug = dispatch_queue_debug,
-);
-
-
-DISPATCH_VTABLE_SUBCLASS_INSTANCE(queue_main, queue,
-	.do_type = DISPATCH_QUEUE_SERIAL_TYPE,
-	.do_kind = "main-queue",
-	.do_dispose = _dispatch_queue_dispose,
-	.do_push = _dispatch_queue_push,
-	.do_invoke = _dispatch_queue_invoke,
-	.do_wakeup = _dispatch_main_queue_wakeup,
-	.do_debug = dispatch_queue_debug,
-);
-
-DISPATCH_VTABLE_SUBCLASS_INSTANCE(queue_runloop, queue,
-	.do_type = DISPATCH_QUEUE_RUNLOOP_TYPE,
-	.do_kind = "runloop-queue",
-	.do_dispose = _dispatch_runloop_queue_dispose,
-	.do_push = _dispatch_queue_push,
-	.do_invoke = _dispatch_queue_invoke,
-	.do_wakeup = _dispatch_runloop_queue_wakeup,
-	.do_debug = dispatch_queue_debug,
-);
-
-DISPATCH_VTABLE_SUBCLASS_INSTANCE(queue_mgr, queue,
-	.do_type = DISPATCH_QUEUE_MGR_TYPE,
-	.do_kind = "mgr-queue",
-	.do_push = _dispatch_mgr_queue_push,
-	.do_invoke = _dispatch_mgr_thread,
-	.do_wakeup = _dispatch_mgr_queue_wakeup,
-	.do_debug = dispatch_queue_debug,
-);
-
-DISPATCH_VTABLE_INSTANCE(queue_specific_queue,
-	.do_type = DISPATCH_QUEUE_SPECIFIC_TYPE,
-	.do_kind = "queue-context",
-	.do_dispose = _dispatch_queue_specific_queue_dispose,
-	.do_push = (void *)_dispatch_queue_push,
-	.do_invoke = (void *)_dispatch_queue_invoke,
-	.do_wakeup = (void *)_dispatch_queue_wakeup,
-	.do_debug = (void *)dispatch_queue_debug,
-);
+#endif
 
 DISPATCH_VTABLE_INSTANCE(queue_attr,
-	.do_type = DISPATCH_QUEUE_ATTR_TYPE,
-	.do_kind = "queue-attr",
+	.do_type        = DISPATCH_QUEUE_ATTR_TYPE,
+	.do_dispose     = _dispatch_object_no_dispose,
+	.do_debug       = _dispatch_object_missing_debug,
+	.do_invoke      = _dispatch_object_no_invoke,
 );
 
+#if HAVE_MACH
+DISPATCH_VTABLE_INSTANCE(mach_msg,
+	.do_type        = DISPATCH_MACH_MSG_TYPE,
+	.do_dispose     = _dispatch_mach_msg_dispose,
+	.do_debug       = _dispatch_mach_msg_debug,
+	.do_invoke      = _dispatch_mach_msg_invoke,
+);
+#endif // HAVE_MACH
+
+DISPATCH_VTABLE_INSTANCE(io,
+	.do_type        = DISPATCH_IO_TYPE,
+	.do_dispose     = _dispatch_io_dispose,
+	.do_debug       = _dispatch_io_debug,
+	.do_invoke      = _dispatch_object_no_invoke,
+);
+
+DISPATCH_VTABLE_INSTANCE(operation,
+	.do_type        = DISPATCH_OPERATION_TYPE,
+	.do_dispose     = _dispatch_operation_dispose,
+	.do_debug       = _dispatch_operation_debug,
+	.do_invoke      = _dispatch_object_no_invoke,
+);
+
+DISPATCH_VTABLE_INSTANCE(disk,
+	.do_type        = DISPATCH_DISK_TYPE,
+	.do_dispose     = _dispatch_disk_dispose,
+	.do_debug       = _dispatch_object_missing_debug,
+	.do_invoke      = _dispatch_object_no_invoke,
+);
+
+/*
+ * Dispatch queue cluster
+ */
+
+DISPATCH_NOINLINE
+static void
+_dispatch_queue_no_activate(dispatch_queue_class_t dqu,
+		DISPATCH_UNUSED bool *allow_resume)
+{
+	DISPATCH_INTERNAL_CRASH(dx_type(dqu._dq), "dq_activate called");
+}
+
+DISPATCH_VTABLE_INSTANCE(queue,
+	// This is the base class for queues, no objects of this type are made
+	.do_type        = _DISPATCH_QUEUE_CLUSTER,
+	.do_dispose     = _dispatch_object_no_dispose,
+	.do_debug       = _dispatch_queue_debug,
+	.do_invoke      = _dispatch_object_no_invoke,
+
+	.dq_activate    = _dispatch_queue_no_activate,
+);
+
+DISPATCH_VTABLE_INSTANCE(workloop,
+	.do_type        = DISPATCH_WORKLOOP_TYPE,
+	.do_dispose     = _dispatch_workloop_dispose,
+	.do_debug       = _dispatch_queue_debug,
+	.do_invoke      = _dispatch_workloop_invoke,
+
+	.dq_activate    = _dispatch_queue_no_activate,
+	.dq_wakeup      = _dispatch_workloop_wakeup,
+	.dq_push        = _dispatch_workloop_push,
+);
+
+DISPATCH_VTABLE_SUBCLASS_INSTANCE(queue_serial, lane,
+	.do_type        = DISPATCH_QUEUE_SERIAL_TYPE,
+	.do_dispose     = _dispatch_lane_dispose,
+	.do_debug       = _dispatch_queue_debug,
+	.do_invoke      = _dispatch_lane_invoke,
+
+	.dq_activate    = _dispatch_lane_activate,
+	.dq_wakeup      = _dispatch_lane_wakeup,
+	.dq_push        = _dispatch_lane_push,
+);
+
+DISPATCH_VTABLE_SUBCLASS_INSTANCE(queue_concurrent, lane,
+	.do_type        = DISPATCH_QUEUE_CONCURRENT_TYPE,
+	.do_dispose     = _dispatch_lane_dispose,
+	.do_debug       = _dispatch_queue_debug,
+	.do_invoke      = _dispatch_lane_invoke,
+
+	.dq_activate    = _dispatch_lane_activate,
+	.dq_wakeup      = _dispatch_lane_wakeup,
+	.dq_push        = _dispatch_lane_concurrent_push,
+);
+
+DISPATCH_VTABLE_SUBCLASS_INSTANCE(queue_global, lane,
+	.do_type        = DISPATCH_QUEUE_GLOBAL_ROOT_TYPE,
+	.do_dispose     = _dispatch_object_no_dispose,
+	.do_debug       = _dispatch_queue_debug,
+	.do_invoke      = _dispatch_object_no_invoke,
+
+	.dq_activate    = _dispatch_queue_no_activate,
+	.dq_wakeup      = _dispatch_root_queue_wakeup,
+	.dq_push        = _dispatch_root_queue_push,
+);
+
+#if DISPATCH_USE_PTHREAD_ROOT_QUEUES
+DISPATCH_VTABLE_SUBCLASS_INSTANCE(queue_pthread_root, lane,
+	.do_type        = DISPATCH_QUEUE_PTHREAD_ROOT_TYPE,
+	.do_dispose     = _dispatch_pthread_root_queue_dispose,
+	.do_debug       = _dispatch_queue_debug,
+	.do_invoke      = _dispatch_object_no_invoke,
+
+	.dq_activate    = _dispatch_queue_no_activate,
+	.dq_wakeup      = _dispatch_root_queue_wakeup,
+	.dq_push        = _dispatch_root_queue_push,
+);
+#endif // DISPATCH_USE_PTHREAD_ROOT_QUEUES
+
+DISPATCH_VTABLE_SUBCLASS_INSTANCE(queue_mgr, lane,
+	.do_type        = DISPATCH_QUEUE_MGR_TYPE,
+	.do_dispose     = _dispatch_object_no_dispose,
+	.do_debug       = _dispatch_queue_debug,
+#if DISPATCH_USE_MGR_THREAD
+	.do_invoke      = _dispatch_mgr_thread,
+#else
+	.do_invoke      = _dispatch_object_no_invoke,
+#endif
+
+	.dq_activate    = _dispatch_queue_no_activate,
+	.dq_wakeup      = _dispatch_mgr_queue_wakeup,
+	.dq_push        = _dispatch_mgr_queue_push,
+);
+
+DISPATCH_VTABLE_SUBCLASS_INSTANCE(queue_main, lane,
+	.do_type        = DISPATCH_QUEUE_MAIN_TYPE,
+	.do_dispose     = _dispatch_lane_dispose,
+	.do_debug       = _dispatch_queue_debug,
+	.do_invoke      = _dispatch_lane_invoke,
+
+	.dq_activate    = _dispatch_queue_no_activate,
+	.dq_wakeup      = _dispatch_main_queue_wakeup,
+	.dq_push        = _dispatch_main_queue_push,
+);
+
+#if DISPATCH_COCOA_COMPAT
+DISPATCH_VTABLE_SUBCLASS_INSTANCE(queue_runloop, lane,
+	.do_type        = DISPATCH_QUEUE_RUNLOOP_TYPE,
+	.do_dispose     = _dispatch_runloop_queue_dispose,
+	.do_debug       = _dispatch_queue_debug,
+	.do_invoke      = _dispatch_lane_invoke,
+
+	.dq_activate    = _dispatch_queue_no_activate,
+	.dq_wakeup      = _dispatch_runloop_queue_wakeup,
+	.dq_push        = _dispatch_lane_push,
+);
+#endif
+
 DISPATCH_VTABLE_INSTANCE(source,
-	.do_type = DISPATCH_SOURCE_KEVENT_TYPE,
-	.do_kind = "kevent-source",
-	.do_dispose = _dispatch_source_dispose,
-	.do_suspend = (void *)_dispatch_queue_suspend,
-	.do_resume = (void *)_dispatch_queue_resume,
-	.do_finalize_activation = _dispatch_source_finalize_activation,
-	.do_push = (void *)_dispatch_queue_push,
-	.do_invoke = _dispatch_source_invoke,
-	.do_wakeup = _dispatch_source_wakeup,
-	.do_debug = _dispatch_source_debug,
-	.do_set_targetq = (void *)_dispatch_queue_set_target_queue,
+	.do_type        = DISPATCH_SOURCE_KEVENT_TYPE,
+	.do_dispose     = _dispatch_source_dispose,
+	.do_debug       = _dispatch_source_debug,
+	.do_invoke      = _dispatch_source_invoke,
+
+	.dq_activate    = _dispatch_source_activate,
+	.dq_wakeup      = _dispatch_source_wakeup,
+	.dq_push        = _dispatch_lane_push,
 );
 
 #if HAVE_MACH
 DISPATCH_VTABLE_INSTANCE(mach,
-	.do_type = DISPATCH_MACH_CHANNEL_TYPE,
-	.do_kind = "mach-channel",
-	.do_dispose = _dispatch_mach_dispose,
-	.do_suspend = (void *)_dispatch_queue_suspend,
-	.do_resume = (void *)_dispatch_queue_resume,
-	.do_finalize_activation = _dispatch_mach_finalize_activation,
-	.do_push = (void *)_dispatch_queue_push,
-	.do_invoke = _dispatch_mach_invoke,
-	.do_wakeup = _dispatch_mach_wakeup,
-	.do_debug = _dispatch_mach_debug,
-	.do_set_targetq = (void *)_dispatch_queue_set_target_queue,
-);
+	.do_type        = DISPATCH_MACH_CHANNEL_TYPE,
+	.do_dispose     = _dispatch_mach_dispose,
+	.do_debug       = _dispatch_mach_debug,
+	.do_invoke      = _dispatch_mach_invoke,
 
-DISPATCH_VTABLE_INSTANCE(mach_msg,
-	.do_type = DISPATCH_MACH_MSG_TYPE,
-	.do_kind = "mach-msg",
-	.do_dispose = _dispatch_mach_msg_dispose,
-	.do_invoke = _dispatch_mach_msg_invoke,
-	.do_debug = _dispatch_mach_msg_debug,
+	.dq_activate    = _dispatch_mach_activate,
+	.dq_wakeup      = _dispatch_mach_wakeup,
+	.dq_push        = _dispatch_lane_push,
 );
 #endif // HAVE_MACH
 
-#if !DISPATCH_DATA_IS_BRIDGED_TO_NSDATA
-DISPATCH_VTABLE_INSTANCE(data,
-	.do_type = DISPATCH_DATA_TYPE,
-	.do_kind = "data",
-	.do_dispose = _dispatch_data_dispose,
-	.do_debug = _dispatch_data_debug,
-	.do_set_targetq = (void*)_dispatch_data_set_target_queue,
-);
-#endif
-
-DISPATCH_VTABLE_INSTANCE(io,
-	.do_type = DISPATCH_IO_TYPE,
-	.do_kind = "channel",
-	.do_dispose = _dispatch_io_dispose,
-	.do_debug = _dispatch_io_debug,
-	.do_set_targetq = _dispatch_io_set_target_queue,
-);
-
-DISPATCH_VTABLE_INSTANCE(operation,
-	.do_type = DISPATCH_OPERATION_TYPE,
-	.do_kind = "operation",
-	.do_dispose = _dispatch_operation_dispose,
-	.do_debug = _dispatch_operation_debug,
-);
-
-DISPATCH_VTABLE_INSTANCE(disk,
-	.do_type = DISPATCH_DISK_TYPE,
-	.do_kind = "disk",
-	.do_dispose = _dispatch_disk_dispose,
-);
-
-
 void
 _dispatch_vtable_init(void)
 {
@@ -578,13 +836,13 @@
 	size_t bufsz = sizeof(_dispatch_build);
 
 	sysctl(mib, 2, _dispatch_build, &bufsz, NULL, 0);
-#if TARGET_IPHONE_SIMULATOR
+#if TARGET_OS_SIMULATOR
 	char *sim_version = getenv("SIMULATOR_RUNTIME_BUILD_VERSION");
 	if (sim_version) {
 		(void)strlcat(_dispatch_build, " ", sizeof(_dispatch_build));
 		(void)strlcat(_dispatch_build, sim_version, sizeof(_dispatch_build));
 	}
-#endif // TARGET_IPHONE_SIMULATOR
+#endif // TARGET_OS_SIMULATOR
 
 #else
 	/*
@@ -596,6 +854,22 @@
 
 static dispatch_once_t _dispatch_build_pred;
 
+bool
+_dispatch_parse_bool(const char *v)
+{
+	return strcasecmp(v, "YES") == 0 || strcasecmp(v, "Y") == 0 ||
+			strcasecmp(v, "TRUE") == 0 || atoi(v);
+}
+
+DISPATCH_NOINLINE
+bool
+_dispatch_getenv_bool(const char *env, bool default_v)
+{
+	const char *v = getenv(env);
+
+	return v ? _dispatch_parse_bool(v) : default_v;
+}
+
 char*
 _dispatch_get_build(void)
 {
@@ -603,58 +877,163 @@
 	return _dispatch_build;
 }
 
-#define _dispatch_bug_log(msg, ...) do { \
-	static void *last_seen; \
-	void *ra = __builtin_return_address(0); \
-	if (last_seen != ra) { \
-		last_seen = ra; \
-		_dispatch_log(msg, ##__VA_ARGS__); \
-	} \
-} while(0)
+#define _dispatch_bug_log_is_repeated() ({ \
+		static void *last_seen; \
+		void *previous = last_seen; \
+		last_seen =__builtin_return_address(0); \
+		last_seen == previous; \
+	})
+
+#if HAVE_OS_FAULT_WITH_PAYLOAD
+__attribute__((__format__(__printf__,2,3)))
+static void
+_dispatch_fault(const char *reason, const char *fmt, ...)
+{
+	char buf[1024];
+	va_list ap;
+
+	va_start(ap, fmt);
+	vsnprintf(buf, sizeof(buf), fmt, ap);
+	va_end(ap);
+
+	if (_dispatch_mode & DISPATCH_MODE_STRICT) {
+#if TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR
+	} else if (!(_dispatch_mode & DISPATCH_MODE_NO_FAULTS)) {
+		os_fault_with_payload(OS_REASON_LIBSYSTEM,
+				OS_REASON_LIBSYSTEM_CODE_FAULT,
+				buf, (uint32_t)strlen(buf) + 1, reason, 0);
+#else
+		(void)reason;
+#endif
+	}
+}
+#else
+#define _dispatch_fault(reason, fmt, ...)
+#endif // HAVE_OS_FAULT_WITH_PAYLOAD
+
+#define _dispatch_log_fault(reason, fmt, ...)  ({ \
+		if (!_dispatch_bug_log_is_repeated()) { \
+			_dispatch_log(fmt, ##__VA_ARGS__); \
+			_dispatch_fault(reason, fmt, ##__VA_ARGS__); \
+			if (_dispatch_mode & DISPATCH_MODE_STRICT) { \
+				DISPATCH_CLIENT_CRASH(0, reason); \
+			} \
+		} \
+	})
 
 void
 _dispatch_bug(size_t line, long val)
 {
 	dispatch_once_f(&_dispatch_build_pred, NULL, _dispatch_build_init);
-	_dispatch_bug_log("BUG in libdispatch: %s - %lu - 0x%lx",
-			_dispatch_build, (unsigned long)line, val);
-}
 
-void
-_dispatch_bug_client(const char* msg)
-{
-	_dispatch_bug_log("BUG in libdispatch client: %s", msg);
+	if (_dispatch_bug_log_is_repeated()) return;
+
+	_dispatch_log("BUG in libdispatch: %s - %lu - 0x%lx",
+			_dispatch_build, (unsigned long)line, val);
 }
 
 #if HAVE_MACH
 void
-_dispatch_bug_mach_client(const char* msg, mach_msg_return_t kr)
+_dispatch_bug_mach_client(const char *msg, mach_msg_return_t kr)
 {
-	_dispatch_bug_log("BUG in libdispatch client: %s %s - 0x%x", msg,
+	_dispatch_log_fault("LIBDISPATCH_STRICT: _dispatch_bug_mach_client",
+			"BUG in libdispatch client: %s %s - 0x%x", msg,
 			mach_error_string(kr), kr);
 }
 #endif
 
-void
-_dispatch_bug_kevent_client(const char* msg, const char* filter,
-		const char *operation, int err)
+void *
+_dispatch_continuation_get_function_symbol(dispatch_continuation_t dc)
 {
+	if (dc->dc_flags & DC_FLAG_BLOCK_WITH_PRIVATE_DATA) {
+		dispatch_block_private_data_t dpbd = _dispatch_block_get_data(dc->dc_ctxt);
+		return _dispatch_Block_invoke(dpbd->dbpd_block);
+	}
+	if (dc->dc_flags & DC_FLAG_BLOCK) {
+		return _dispatch_Block_invoke(dc->dc_ctxt);
+	}
+	return dc->dc_func;
+}
+
+void
+_dispatch_bug_kevent_client(const char *msg, const char *filter,
+		const char *operation, int err, uint64_t ident, uint64_t udata,
+		dispatch_unote_t du)
+{
+	dispatch_continuation_t dc;
+	dispatch_object_t dou;
+	void *func = NULL;
+
+	if (du._du) {
+		dou._do = _dispatch_wref2ptr(du._du->du_owner_wref);
+		switch (dx_type(dou._do)) {
+		case DISPATCH_SOURCE_KEVENT_TYPE:
+			dc = du._dr->ds_handler[DS_EVENT_HANDLER];
+			if (dc) func = _dispatch_continuation_get_function_symbol(dc);
+			break;
+#if HAVE_MACH
+		case DISPATCH_MACH_CHANNEL_TYPE:
+			func = du._dmrr->dmrr_handler_func;
+			break;
+#endif // HAVE_MACH
+		}
+		filter = dux_type(du._du)->dst_kind;
+	}
+
 	if (operation && err) {
-		_dispatch_bug_log("BUG in libdispatch client: %s[%s] %s: \"%s\" - 0x%x",
-				msg, filter, operation, strerror(err), err);
+		_dispatch_log_fault("LIBDISPATCH_STRICT: _dispatch_bug_kevent_client",
+				"BUG in libdispatch client: %s %s: \"%s\" - 0x%x "
+				"{ 0x%"PRIx64"[%s], ident: %"PRId64" / 0x%"PRIx64", handler: %p }",
+				msg, operation, strerror(err), err,
+				udata, filter, ident, ident, func);
 	} else if (operation) {
-		_dispatch_bug_log("BUG in libdispatch client: %s[%s] %s",
-				msg, filter, operation);
+		_dispatch_log_fault("LIBDISPATCH_STRICT: _dispatch_bug_kevent_client",
+				"BUG in libdispatch client: %s %s"
+				"{ 0x%"PRIx64"[%s], ident: %"PRId64" / 0x%"PRIx64", handler: %p }",
+				msg, operation, udata, filter, ident, ident, func);
 	} else {
-		_dispatch_bug_log("BUG in libdispatch: %s[%s]: \"%s\" - 0x%x",
-				msg, filter, strerror(err), err);
+		_dispatch_log_fault("LIBDISPATCH_STRICT: _dispatch_bug_kevent_client",
+				"BUG in libdispatch: %s: \"%s\" - 0x%x"
+				"{ 0x%"PRIx64"[%s], ident: %"PRId64" / 0x%"PRIx64", handler: %p }",
+				msg, strerror(err), err, udata, filter, ident, ident, func);
 	}
 }
 
 void
+_dispatch_bug_kevent_vanished(dispatch_unote_t du)
+{
+	dispatch_continuation_t dc;
+	dispatch_object_t dou;
+	void *func = NULL;
+
+	dou._do = _dispatch_wref2ptr(du._du->du_owner_wref);
+	switch (dx_type(dou._do)) {
+	case DISPATCH_SOURCE_KEVENT_TYPE:
+		dc = du._dr->ds_handler[DS_EVENT_HANDLER];
+		if (dc) func = _dispatch_continuation_get_function_symbol(dc);
+		break;
+ #if HAVE_MACH
+	case DISPATCH_MACH_CHANNEL_TYPE:
+		func = du._dmrr->dmrr_handler_func;
+		break;
+#endif // MACH
+	}
+	_dispatch_log_fault("LIBDISPATCH_STRICT: _dispatch_bug_kevent_vanished",
+			"BUG in libdispatch client: %s, monitored resource vanished before "
+			"the source cancel handler was invoked "
+			"{ %p[%s], ident: %d / 0x%x, handler: %p }",
+			dux_type(du._du)->dst_kind, dou._dq,
+			dou._dq->dq_label ? dou._dq->dq_label : "<unknown>",
+			du._du->du_ident, du._du->du_ident, func);
+}
+
+DISPATCH_NOINLINE DISPATCH_WEAK
+void
 _dispatch_bug_deprecated(const char *msg)
 {
-	_dispatch_bug_log("DEPRECATED USE in libdispatch client: %s", msg);
+	_dispatch_log_fault("LIBDISPATCH_STRICT: _dispatch_bug_deprecated",
+			"DEPRECATED USE in libdispatch client: %s; "
+			"set a breakpoint on _dispatch_bug_deprecated to debug", msg);
 }
 
 void
@@ -730,7 +1109,7 @@
 			gettimeofday(&tv, NULL);
 #endif
 #if DISPATCH_DEBUG
-			dispatch_log_basetime = _dispatch_absolute_time();
+			dispatch_log_basetime = _dispatch_uptime();
 #endif
 #if defined(_WIN32)
 			char szProgramName[MAX_PATH + 1] = {0};
@@ -768,7 +1147,7 @@
 #else
 	r = write(dispatch_logfile, buf, len);
 #endif
-	if (slowpath(r == -1) && errno == EINTR) {
+	if (unlikely(r == -1) && errno == EINTR) {
 		goto retry;
 	}
 }
@@ -783,7 +1162,7 @@
 
 #if DISPATCH_DEBUG
 	offset += dsnprintf(&buf[offset], bufsiz - offset, "%llu\t",
-			(unsigned long long)_dispatch_absolute_time() - dispatch_log_basetime);
+			(unsigned long long)_dispatch_uptime() - dispatch_log_basetime);
 #endif
 	r = vsnprintf(&buf[offset], bufsiz - offset, msg, ap);
 	if (r < 0) return;
@@ -805,7 +1184,7 @@
 _dispatch_vsyslog(const char *msg, va_list ap)
 {
 	char *str;
-    vasprintf(&str, msg, ap);
+	vasprintf(&str, msg, ap);
 	if (str) {
 		_dispatch_syslog(str);
 		free(str);
@@ -860,10 +1239,10 @@
 _dispatch_logv(const char *msg, size_t len, va_list *ap_ptr)
 {
 	dispatch_once_f(&_dispatch_logv_pred, NULL, _dispatch_logv_init);
-	if (slowpath(dispatch_log_disabled)) {
+	if (unlikely(dispatch_log_disabled)) {
 		return;
 	}
-	if (slowpath(dispatch_logfile != -1)) {
+	if (unlikely(dispatch_logfile != -1)) {
 		if (!ap_ptr) {
 			return _dispatch_log_file((char*)msg, len);
 		}
@@ -895,10 +1274,7 @@
 _dispatch_object_debug2(dispatch_object_t dou, char* buf, size_t bufsiz)
 {
 	DISPATCH_OBJECT_TFB(_dispatch_objc_debug, dou, buf, bufsiz);
-	if (dx_vtable(dou._do)->do_debug) {
-		return dx_debug(dou._do, buf, bufsiz);
-	}
-	return strlcpy(buf, "NULL vtable slot: ", bufsiz);
+	return dx_debug(dou._do, buf, bufsiz);
 }
 
 DISPATCH_NOINLINE
@@ -910,7 +1286,7 @@
 	int r;
 #if DISPATCH_DEBUG && !DISPATCH_USE_OS_DEBUG_LOG
 	offset += dsnprintf(&buf[offset], bufsiz - offset, "%llu\t\t%p\t",
-			(unsigned long long)_dispatch_absolute_time() - dispatch_log_basetime,
+			(unsigned long long)_dispatch_uptime() - dispatch_log_basetime,
 			(void *)_dispatch_thread_self());
 #endif
 	if (dou._do) {
@@ -980,7 +1356,7 @@
 _dispatch_calloc(size_t num_items, size_t size)
 {
 	void *buf;
-	while (!fastpath(buf = calloc(num_items, size))) {
+	while (unlikely(!(buf = calloc(num_items, size)))) {
 		_dispatch_temporary_resource_shortage();
 	}
 	return buf;
@@ -995,7 +1371,7 @@
 {
 #if HAVE_DYLD_IS_MEMORY_IMMUTABLE
 	size_t size = strlen(str) + 1;
-	if (slowpath(!_dyld_is_memory_immutable(str, size))) {
+	if (unlikely(!_dyld_is_memory_immutable(str, size))) {
 		char *clone = (char *) malloc(size);
 		if (dispatch_assume(clone)) {
 			memcpy(clone, str, size);
@@ -1018,8 +1394,8 @@
 {
 	dispatch_block_t rval;
 
-	if (fastpath(db)) {
-		while (!fastpath(rval = Block_copy(db))) {
+	if (likely(db)) {
+		while (unlikely(!(rval = Block_copy(db)))) {
 			_dispatch_temporary_resource_shortage();
 		}
 		return rval;
@@ -1058,7 +1434,7 @@
 {
 	_dispatch_get_tsd_base();
 	void *u = _dispatch_get_unwind_tsd();
-	if (fastpath(!u)) return f(ctxt);
+	if (likely(!u)) return f(ctxt);
 	_dispatch_set_unwind_tsd(NULL);
 	f(ctxt);
 	_dispatch_free_unwind_tsd();
@@ -1072,7 +1448,7 @@
 {
 	_dispatch_get_tsd_base();
 	void *u = _dispatch_get_unwind_tsd();
-	if (fastpath(!u)) return f(ctxt, i);
+	if (likely(!u)) return f(ctxt, i);
 	_dispatch_set_unwind_tsd(NULL);
 	f(ctxt, i);
 	_dispatch_free_unwind_tsd();
@@ -1089,7 +1465,7 @@
 {
 	_dispatch_get_tsd_base();
 	void *u = _dispatch_get_unwind_tsd();
-	if (fastpath(!u)) return f(ctxt, reason, dmsg);
+	if (likely(!u)) return f(ctxt, reason, dmsg);
 	_dispatch_set_unwind_tsd(NULL);
 	f(ctxt, reason, dmsg);
 	_dispatch_free_unwind_tsd();
@@ -1104,7 +1480,7 @@
 {
 	_dispatch_get_tsd_base();
 	void *u = _dispatch_get_unwind_tsd();
-	if (fastpath(!u)) return f(ctxt, reason, dmsg, error);
+	if (likely(!u)) return f(ctxt, reason, dmsg, error);
 	_dispatch_set_unwind_tsd(NULL);
 	f(ctxt, reason, dmsg, error);
 	_dispatch_free_unwind_tsd();
@@ -1132,7 +1508,7 @@
 {
 	_os_object_t obj;
 	dispatch_assert(size >= sizeof(struct _os_object_s));
-	while (!fastpath(obj = calloc(1u, size))) {
+	while (unlikely(!(obj = calloc(1u, size)))) {
 		_dispatch_temporary_resource_shortage();
 	}
 	obj->os_obj_isa = cls;
@@ -1157,7 +1533,7 @@
 _os_object_xref_dispose(_os_object_t obj)
 {
 	_os_object_xrefcnt_dispose_barrier(obj);
-	if (fastpath(obj->os_obj_isa->_os_obj_xref_dispose)) {
+	if (likely(obj->os_obj_isa->_os_obj_xref_dispose)) {
 		return obj->os_obj_isa->_os_obj_xref_dispose(obj);
 	}
 	return _os_object_release_internal(obj);
@@ -1167,7 +1543,7 @@
 _os_object_dispose(_os_object_t obj)
 {
 	_os_object_refcnt_dispose_barrier(obj);
-	if (fastpath(obj->os_obj_isa->_os_obj_dispose)) {
+	if (likely(obj->os_obj_isa->_os_obj_dispose)) {
 		return obj->os_obj_isa->_os_obj_dispose(obj);
 	}
 	return _os_object_dealloc(obj);
@@ -1176,7 +1552,7 @@
 void*
 os_retain(void *obj)
 {
-	if (fastpath(obj)) {
+	if (likely(obj)) {
 		return _os_object_retain(obj);
 	}
 	return obj;
@@ -1186,7 +1562,7 @@
 void
 os_release(void *obj)
 {
-	if (fastpath(obj)) {
+	if (likely(obj)) {
 		return _os_object_release(obj);
 	}
 }
@@ -1310,3 +1686,60 @@
 }
 
 #endif // HAVE_MACH
+#pragma mark -
+#pragma mark dispatch to XPC callbacks
+#if HAVE_MACH
+
+// Default dmxh_direct_message_handler callback that does not handle
+// messages inline.
+static bool
+_dispatch_mach_xpc_no_handle_message(
+		void *_Nullable context DISPATCH_UNUSED,
+		dispatch_mach_reason_t reason DISPATCH_UNUSED,
+		dispatch_mach_msg_t message DISPATCH_UNUSED,
+		mach_error_t error DISPATCH_UNUSED)
+{
+	return false;
+}
+
+// Default dmxh_msg_context_reply_queue callback that returns a NULL queue.
+static dispatch_queue_t
+_dispatch_mach_msg_context_no_async_reply_queue(
+		void *_Nonnull msg_context DISPATCH_UNUSED)
+{
+	return NULL;
+}
+
+// Default dmxh_async_reply_handler callback that crashes when called.
+DISPATCH_NORETURN
+static void
+_dispatch_mach_default_async_reply_handler(void *context DISPATCH_UNUSED,
+		dispatch_mach_reason_t reason DISPATCH_UNUSED,
+		dispatch_mach_msg_t message DISPATCH_UNUSED)
+{
+	DISPATCH_CLIENT_CRASH(_dispatch_mach_xpc_hooks,
+			"_dispatch_mach_default_async_reply_handler called");
+}
+
+// Default dmxh_enable_sigterm_notification callback that enables delivery of
+// SIGTERM notifications (for backwards compatibility).
+static bool
+_dispatch_mach_enable_sigterm(void *_Nullable context DISPATCH_UNUSED)
+{
+	return true;
+}
+
+// Callbacks from dispatch to XPC. The default is to not support any callbacks.
+const struct dispatch_mach_xpc_hooks_s _dispatch_mach_xpc_hooks_default = {
+	.version = DISPATCH_MACH_XPC_HOOKS_VERSION,
+	.dmxh_direct_message_handler = &_dispatch_mach_xpc_no_handle_message,
+	.dmxh_msg_context_reply_queue =
+			&_dispatch_mach_msg_context_no_async_reply_queue,
+	.dmxh_async_reply_handler = &_dispatch_mach_default_async_reply_handler,
+	.dmxh_enable_sigterm_notification = &_dispatch_mach_enable_sigterm,
+};
+
+dispatch_mach_xpc_hooks_t _dispatch_mach_xpc_hooks =
+		&_dispatch_mach_xpc_hooks_default;
+
+#endif // HAVE_MACH
diff --git a/src/inline_internal.h b/src/inline_internal.h
index e857abe..67ecfc9 100644
--- a/src/inline_internal.h
+++ b/src/inline_internal.h
@@ -90,20 +90,44 @@
 #if DISPATCH_PURE_C
 
 DISPATCH_ALWAYS_INLINE
+static inline const char *
+_dispatch_object_class_name(dispatch_object_t dou)
+{
+#if USE_OBJC
+	return object_getClassName((id)dou._do) + strlen("OS_dispatch_");
+#else
+	return dx_vtable(dou._do)->do_kind;
+#endif
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline bool
+_dispatch_object_is_global(dispatch_object_t dou)
+{
+	return dou._do->do_ref_cnt == DISPATCH_OBJECT_GLOBAL_REFCNT;
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline bool
+_dispatch_object_is_root_or_base_queue(dispatch_object_t dou)
+{
+	return dx_hastypeflag(dou._do, QUEUE_ROOT) ||
+			dx_hastypeflag(dou._do, QUEUE_BASE);
+}
+
+DISPATCH_ALWAYS_INLINE
 static inline bool
 _dispatch_object_has_vtable(dispatch_object_t dou)
 {
-	uintptr_t dc_flags = dou._dc->dc_flags;
-
 	// vtables are pointers far away from the low page in memory
-	return dc_flags > 0xffful;
+	return dou._dc->dc_flags > 0xffful;
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline bool
 _dispatch_object_is_queue(dispatch_object_t dou)
 {
-	return _dispatch_object_has_vtable(dou) && dx_vtable(dou._do)->do_push;
+	return _dispatch_object_has_vtable(dou) && dx_vtable(dou._dq)->dq_push;
 }
 
 DISPATCH_ALWAYS_INLINE
@@ -138,16 +162,23 @@
 	dispatch_queue_flags_t dq_flags;
 
 	if (!_dispatch_object_has_vtable(dou)) {
-		return (dou._dc->dc_flags & DISPATCH_OBJ_BARRIER_BIT);
+		return (dou._dc->dc_flags & DC_FLAG_BARRIER);
 	}
-	switch (dx_metatype(dou._do)) {
-	case _DISPATCH_QUEUE_TYPE:
-	case _DISPATCH_SOURCE_TYPE:
-		dq_flags = os_atomic_load2o(dou._dq, dq_atomic_flags, relaxed);
-		return dq_flags & DQF_BARRIER_BIT;
-	default:
+	if (dx_cluster(dou._do) != _DISPATCH_QUEUE_CLUSTER) {
 		return false;
 	}
+	dq_flags = os_atomic_load2o(dou._dq, dq_atomic_flags, relaxed);
+	return dq_flags & DQF_BARRIER_BIT;
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline bool
+_dispatch_object_is_waiter(dispatch_object_t dou)
+{
+	if (_dispatch_object_has_vtable(dou)) {
+		return false;
+	}
+	return (dou._dc->dc_flags & (DC_FLAG_SYNC_WAITER | DC_FLAG_ASYNC_AND_WAIT));
 }
 
 DISPATCH_ALWAYS_INLINE
@@ -157,7 +188,7 @@
 	if (_dispatch_object_has_vtable(dou)) {
 		return false;
 	}
-	return (dou._dc->dc_flags & DISPATCH_OBJ_SYNC_WAITER_BIT);
+	return (dou._dc->dc_flags & DC_FLAG_SYNC_WAITER);
 }
 
 DISPATCH_ALWAYS_INLINE
@@ -167,17 +198,16 @@
 	if (_dispatch_object_has_vtable(dou)) {
 		return false;
 	}
-	return ((dou._dc->dc_flags &
-				(DISPATCH_OBJ_BARRIER_BIT | DISPATCH_OBJ_SYNC_WAITER_BIT)) ==
-				(DISPATCH_OBJ_SYNC_WAITER_BIT));
+	return ((dou._dc->dc_flags & (DC_FLAG_BARRIER | DC_FLAG_SYNC_WAITER)) ==
+				(DC_FLAG_SYNC_WAITER));
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline _os_object_t
 _os_object_retain_internal_n_inline(_os_object_t obj, int n)
 {
-	int ref_cnt = _os_object_refcnt_add(obj, n);
-	if (unlikely(ref_cnt <= 0)) {
+	int ref_cnt = _os_object_refcnt_add_orig(obj, n);
+	if (unlikely(ref_cnt < 0)) {
 		_OS_OBJECT_CLIENT_CRASH("Resurrection of an object");
 	}
 	return obj;
@@ -239,6 +269,29 @@
 
 DISPATCH_ALWAYS_INLINE_NDEBUG
 static inline void
+_dispatch_retain_n_unsafe(dispatch_object_t dou, int n)
+{
+	// _dispatch_retain_*_unsafe assumes:
+	// - the object is not global
+	// - there's no refcount management bug
+	//
+	// This is meant to be used only when called between the update_tail and
+	// update_prev os_mpsc methods, so that the assembly of that critical window
+	// is as terse as possible (this window is a possible dequeuer starvation).
+	//
+	// Other code should use the safe variants at all times.
+	os_atomic_add2o(dou._os_obj, os_obj_ref_cnt, n, relaxed);
+}
+
+DISPATCH_ALWAYS_INLINE_NDEBUG
+static inline void
+_dispatch_retain_2_unsafe(dispatch_object_t dou)
+{
+	_dispatch_retain_n_unsafe(dou, 2);
+}
+
+DISPATCH_ALWAYS_INLINE_NDEBUG
+static inline void
 _dispatch_release(dispatch_object_t dou)
 {
 	_os_object_release_internal_n_inline(dou._os_obj, 1);
@@ -288,9 +341,23 @@
 
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_queue_retain_storage(dispatch_queue_t dq)
+_dispatch_retain_unote_owner(dispatch_unote_t du)
 {
-	int ref_cnt = os_atomic_inc2o(dq, dq_sref_cnt, relaxed);
+	_dispatch_retain_2(_dispatch_wref2ptr(du._du->du_owner_wref));
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_release_unote_owner_tailcall(dispatch_unote_t du)
+{
+	_dispatch_release_2_tailcall(_dispatch_wref2ptr(du._du->du_owner_wref));
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_queue_retain_storage(dispatch_queue_class_t dqu)
+{
+	int ref_cnt = os_atomic_inc2o(dqu._dq, dq_sref_cnt, relaxed);
 	if (unlikely(ref_cnt <= 0)) {
 		_OS_OBJECT_CLIENT_CRASH("Resurrection of an object");
 	}
@@ -298,21 +365,21 @@
 
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_queue_release_storage(dispatch_queue_t dq)
+_dispatch_queue_release_storage(dispatch_queue_class_t dqu)
 {
 	// this refcount only delays the _dispatch_object_dealloc() and there's no
 	// need for visibility wrt to the allocation, the internal refcount already
 	// gives us that, and the object becomes immutable after the last internal
 	// refcount release.
-	int ref_cnt = os_atomic_dec2o(dq, dq_sref_cnt, relaxed);
+	int ref_cnt = os_atomic_dec2o(dqu._dq, dq_sref_cnt, relaxed);
 	if (unlikely(ref_cnt >= 0)) {
 		return;
 	}
 	if (unlikely(ref_cnt < -1)) {
 		_OS_OBJECT_CLIENT_CRASH("Over-release of an object");
 	}
-	dq->dq_state = 0xdead000000000000;
-	_dispatch_object_dealloc(dq);
+	dqu._dq->dq_state = 0xdead000000000000;
+	_dispatch_object_dealloc(dqu._dq);
 }
 
 DISPATCH_ALWAYS_INLINE DISPATCH_NONNULL_ALL
@@ -323,7 +390,6 @@
 	_dispatch_retain(tq);
 	tq = os_atomic_xchg2o(dou._do, do_targetq, tq, release);
 	if (tq) _dispatch_release(tq);
-	_dispatch_object_debug(dou._do, "%s", __func__);
 }
 
 #endif // DISPATCH_PURE_C
@@ -386,8 +452,8 @@
 	if (dtf) {
 		dispatch_queue_t tq = dq->do_targetq;
 		if (tq) {
-			// redirections, dispatch_sync and dispatch_trysync_f may skip
-			// frames, so we need to simulate seeing the missing links
+			// redirections or dispatch_sync may skip frames,
+			// so we need to simulate seeing the missing links
 			it->dtfi_queue = tq;
 			if (dq == dtf->dtf_queue) {
 				it->dtfi_frame = dtf->dtf_prev;
@@ -429,25 +495,26 @@
 _dispatch_thread_frame_save_state(dispatch_thread_frame_t dtf)
 {
 	_dispatch_thread_getspecific_packed_pair(
-			dispatch_queue_key, dispatch_frame_key, (void **)&dtf->dtf_queue);
+			dispatch_queue_key, dispatch_frame_key, dtf->dtf_pair);
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_thread_frame_push(dispatch_thread_frame_t dtf, dispatch_queue_t dq)
+_dispatch_thread_frame_push(dispatch_thread_frame_t dtf,
+		dispatch_queue_class_t dqu)
 {
 	_dispatch_thread_frame_save_state(dtf);
-	_dispatch_thread_setspecific_pair(dispatch_queue_key, dq,
+	_dispatch_thread_setspecific_pair(dispatch_queue_key, dqu._dq,
 			dispatch_frame_key, dtf);
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline void
 _dispatch_thread_frame_push_and_rebase(dispatch_thread_frame_t dtf,
-		dispatch_queue_t dq, dispatch_thread_frame_t new_base)
+		dispatch_queue_class_t dqu, dispatch_thread_frame_t new_base)
 {
 	_dispatch_thread_frame_save_state(dtf);
-	_dispatch_thread_setspecific_pair(dispatch_queue_key, dq,
+	_dispatch_thread_setspecific_pair(dispatch_queue_key, dqu._dq,
 			dispatch_frame_key, new_base);
 }
 
@@ -456,7 +523,7 @@
 _dispatch_thread_frame_pop(dispatch_thread_frame_t dtf)
 {
 	_dispatch_thread_setspecific_packed_pair(
-			dispatch_queue_key, dispatch_frame_key, (void **)&dtf->dtf_queue);
+			dispatch_queue_key, dispatch_frame_key, dtf->dtf_pair);
 }
 
 DISPATCH_ALWAYS_INLINE
@@ -464,8 +531,8 @@
 _dispatch_thread_frame_stash(dispatch_thread_frame_t dtf)
 {
 	_dispatch_thread_getspecific_pair(
-			dispatch_queue_key, (void **)&dtf->dtf_queue,
-			dispatch_frame_key, (void **)&dtf->dtf_prev);
+			dispatch_queue_key, &dtf->dtf_pair[0],
+			dispatch_frame_key, &dtf->dtf_pair[1]);
 	_dispatch_thread_frame_pop(dtf->dtf_prev);
 	return dtf->dtf_queue;
 }
@@ -547,100 +614,95 @@
 
 DISPATCH_ALWAYS_INLINE
 static inline dispatch_queue_flags_t
-_dispatch_queue_atomic_flags(dispatch_queue_t dq)
+_dispatch_queue_atomic_flags(dispatch_queue_class_t dqu)
 {
-	return os_atomic_load2o(dq, dq_atomic_flags, relaxed);
+	return os_atomic_load2o(dqu._dq, dq_atomic_flags, relaxed);
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline dispatch_queue_flags_t
-_dispatch_queue_atomic_flags_set(dispatch_queue_t dq,
+_dispatch_queue_atomic_flags_set(dispatch_queue_class_t dqu,
 		dispatch_queue_flags_t bits)
 {
-	return os_atomic_or2o(dq, dq_atomic_flags, bits, relaxed);
+	return os_atomic_or2o(dqu._dq, dq_atomic_flags, bits, relaxed);
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline dispatch_queue_flags_t
-_dispatch_queue_atomic_flags_set_and_clear_orig(dispatch_queue_t dq,
+_dispatch_queue_atomic_flags_set_and_clear_orig(dispatch_queue_class_t dqu,
 		dispatch_queue_flags_t add_bits, dispatch_queue_flags_t clr_bits)
 {
 	dispatch_queue_flags_t oflags, nflags;
-	os_atomic_rmw_loop2o(dq, dq_atomic_flags, oflags, nflags, relaxed, {
+	os_atomic_rmw_loop2o(dqu._dq, dq_atomic_flags, oflags, nflags, relaxed, {
 		nflags = (oflags | add_bits) & ~clr_bits;
+		if (nflags == oflags) os_atomic_rmw_loop_give_up(return oflags);
 	});
 	return oflags;
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline dispatch_queue_flags_t
-_dispatch_queue_atomic_flags_set_and_clear(dispatch_queue_t dq,
+_dispatch_queue_atomic_flags_set_and_clear(dispatch_queue_class_t dqu,
 		dispatch_queue_flags_t add_bits, dispatch_queue_flags_t clr_bits)
 {
 	dispatch_queue_flags_t oflags, nflags;
-	os_atomic_rmw_loop2o(dq, dq_atomic_flags, oflags, nflags, relaxed, {
+	os_atomic_rmw_loop2o(dqu._dq, dq_atomic_flags, oflags, nflags, relaxed, {
 		nflags = (oflags | add_bits) & ~clr_bits;
+		if (nflags == oflags) os_atomic_rmw_loop_give_up(return oflags);
 	});
 	return nflags;
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline dispatch_queue_flags_t
-_dispatch_queue_atomic_flags_set_orig(dispatch_queue_t dq,
+_dispatch_queue_atomic_flags_set_orig(dispatch_queue_class_t dqu,
 		dispatch_queue_flags_t bits)
 {
-	return os_atomic_or_orig2o(dq, dq_atomic_flags, bits, relaxed);
+	return os_atomic_or_orig2o(dqu._dq, dq_atomic_flags, bits, relaxed);
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline dispatch_queue_flags_t
-_dispatch_queue_atomic_flags_clear(dispatch_queue_t dq,
+_dispatch_queue_atomic_flags_clear(dispatch_queue_class_t dqu,
 		dispatch_queue_flags_t bits)
 {
-	return os_atomic_and2o(dq, dq_atomic_flags, ~bits, relaxed);
+	return os_atomic_and2o(dqu._dq, dq_atomic_flags, ~bits, relaxed);
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline bool
-_dispatch_queue_is_thread_bound(dispatch_queue_t dq)
+_dispatch_queue_is_thread_bound(dispatch_queue_class_t dqu)
 {
-	return _dispatch_queue_atomic_flags(dq) & DQF_THREAD_BOUND;
+	return _dispatch_queue_atomic_flags(dqu) & DQF_THREAD_BOUND;
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline bool
-_dispatch_queue_cannot_trysync(dispatch_queue_t dq)
+_dispatch_queue_label_needs_free(dispatch_queue_class_t dqu)
 {
-	return _dispatch_queue_atomic_flags(dq) & DQF_CANNOT_TRYSYNC;
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline bool
-_dispatch_queue_label_needs_free(dispatch_queue_t dq)
-{
-	return _dispatch_queue_atomic_flags(dq) & DQF_LABEL_NEEDS_FREE;
+	return _dispatch_queue_atomic_flags(dqu) & DQF_LABEL_NEEDS_FREE;
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline dispatch_invoke_flags_t
-_dispatch_queue_autorelease_frequency(dispatch_queue_t dq)
+_dispatch_queue_autorelease_frequency(dispatch_queue_class_t dqu)
 {
 	const unsigned long factor =
 			DISPATCH_INVOKE_AUTORELEASE_ALWAYS / DQF_AUTORELEASE_ALWAYS;
-	dispatch_static_assert(factor > 0);
+	dispatch_assert(factor > 0);
 
-	dispatch_queue_flags_t qaf = _dispatch_queue_atomic_flags(dq);
+	dispatch_queue_flags_t qaf = _dispatch_queue_atomic_flags(dqu);
 
-	qaf &= _DQF_AUTORELEASE_MASK;
+	qaf &= (dispatch_queue_flags_t)_DQF_AUTORELEASE_MASK;
 	return (dispatch_invoke_flags_t)qaf * factor;
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline dispatch_invoke_flags_t
-_dispatch_queue_merge_autorelease_frequency(dispatch_queue_t dq,
+_dispatch_queue_merge_autorelease_frequency(dispatch_queue_class_t dqu,
 		dispatch_invoke_flags_t flags)
 {
-	dispatch_invoke_flags_t qaf = _dispatch_queue_autorelease_frequency(dq);
+	dispatch_invoke_flags_t qaf = _dispatch_queue_autorelease_frequency(dqu);
 
 	if (qaf) {
 		flags &= ~_DISPATCH_INVOKE_AUTORELEASE_MASK;
@@ -651,9 +713,9 @@
 
 DISPATCH_ALWAYS_INLINE
 static inline bool
-_dispatch_queue_is_legacy(dispatch_queue_t dq)
+_dispatch_queue_is_mutable(dispatch_queue_class_t dqu)
 {
-	return _dispatch_queue_atomic_flags(dq) & DQF_LEGACY;
+	return _dispatch_queue_atomic_flags(dqu) & DQF_MUTABLE;
 }
 
 DISPATCH_ALWAYS_INLINE
@@ -683,15 +745,37 @@
 	return _dispatch_thread_getspecific(dispatch_wlh_key);
 }
 
+DISPATCH_ALWAYS_INLINE
+static inline dispatch_workloop_t
+_dispatch_wlh_to_workloop(dispatch_wlh_t wlh)
+{
+	if (wlh == DISPATCH_WLH_ANON) {
+		return NULL;
+	}
+	if (dx_metatype((dispatch_workloop_t)wlh) == _DISPATCH_WORKLOOP_TYPE) {
+		return (dispatch_workloop_t)wlh;
+	}
+	return NULL;
+}
+
+DISPATCH_ALWAYS_INLINE DISPATCH_PURE
+static inline dispatch_wlh_t
+_dispatch_get_event_wlh(void)
+{
+	dispatch_deferred_items_t ddi = _dispatch_deferred_items_get();
+	if (ddi) {
+		DISPATCH_COMPILER_CAN_ASSUME(ddi->ddi_wlh != DISPATCH_WLH_ANON);
+		return ddi->ddi_wlh;
+	}
+	return DISPATCH_WLH_ANON;
+}
+
 DISPATCH_ALWAYS_INLINE DISPATCH_PURE
 static inline dispatch_wlh_t
 _dispatch_get_wlh_reference(void)
 {
-	dispatch_wlh_t wlh = _dispatch_thread_getspecific(dispatch_wlh_key);
-	if (wlh != DISPATCH_WLH_ANON) {
-		wlh = (dispatch_wlh_t)((uintptr_t)wlh & ~DISPATCH_WLH_STORAGE_REF);
-	}
-	return wlh;
+	dispatch_wlh_t wlh = _dispatch_get_wlh();
+	return (dispatch_wlh_t)((uintptr_t)wlh & ~DISPATCH_WLH_STORAGE_REF);
 }
 
 DISPATCH_ALWAYS_INLINE
@@ -756,11 +840,10 @@
 static inline bool
 _dispatch_wlh_should_poll_unote(dispatch_unote_t du)
 {
-	if (likely(_dispatch_needs_to_return_to_kernel())) {
-		dispatch_wlh_t wlh = _dispatch_get_wlh();
-		return wlh != DISPATCH_WLH_ANON && du._du->du_wlh == wlh;
-	}
-	return false;
+	dispatch_deferred_items_t ddi = _dispatch_deferred_items_get();
+	return _dispatch_needs_to_return_to_kernel() && ddi &&
+			ddi->ddi_wlh != DISPATCH_WLH_ANON &&
+			_dispatch_unote_wlh(du) == ddi->ddi_wlh;
 }
 
 #endif // DISPATCH_PURE_C
@@ -1032,50 +1115,55 @@
 static inline void _dispatch_set_basepri_override_qos(dispatch_qos_t qos);
 static inline void _dispatch_reset_basepri(dispatch_priority_t dbp);
 static inline dispatch_priority_t _dispatch_set_basepri(dispatch_priority_t dbp);
-static inline bool _dispatch_queue_need_override_retain(
-		dispatch_queue_class_t dqu, dispatch_qos_t qos);
 
 #if DISPATCH_PURE_C
 
 // Note to later developers: ensure that any initialization changes are
 // made for statically allocated queues (i.e. _dispatch_main_q).
-static inline void
-_dispatch_queue_init(dispatch_queue_t dq, dispatch_queue_flags_t dqf,
+static inline dispatch_queue_class_t
+_dispatch_queue_init(dispatch_queue_class_t dqu, dispatch_queue_flags_t dqf,
 		uint16_t width, uint64_t initial_state_bits)
 {
 	uint64_t dq_state = DISPATCH_QUEUE_STATE_INIT_VALUE(width);
+	dispatch_queue_t dq = dqu._dq;
 
 	dispatch_assert((initial_state_bits & ~(DISPATCH_QUEUE_ROLE_MASK |
 			DISPATCH_QUEUE_INACTIVE)) == 0);
 
 	if (initial_state_bits & DISPATCH_QUEUE_INACTIVE) {
 		dq_state |= DISPATCH_QUEUE_INACTIVE + DISPATCH_QUEUE_NEEDS_ACTIVATION;
-		dq_state |= DLOCK_OWNER_MASK;
-		dq->do_ref_cnt += 2; // rdar://8181908 see _dispatch_queue_resume
+		dq->do_ref_cnt += 2; // rdar://8181908 see _dispatch_lane_resume
+		if (dx_metatype(dq) == _DISPATCH_SOURCE_TYPE) {
+			dq->do_ref_cnt++; // released when DSF_DELETED is set
+		}
 	}
 
 	dq_state |= (initial_state_bits & DISPATCH_QUEUE_ROLE_MASK);
-	dq->do_next = (struct dispatch_queue_s *)DISPATCH_OBJECT_LISTLESS;
+	dq->do_next = DISPATCH_OBJECT_LISTLESS;
 	dqf |= DQF_WIDTH(width);
 	os_atomic_store2o(dq, dq_atomic_flags, dqf, relaxed);
 	dq->dq_state = dq_state;
 	dq->dq_serialnum =
 			os_atomic_inc_orig(&_dispatch_queue_serial_numbers, relaxed);
+	return dqu;
 }
+#define _dispatch_queue_alloc(name, dqf, w, initial_state_bits) \
+		_dispatch_queue_init(_dispatch_object_alloc(DISPATCH_VTABLE(name),\
+				sizeof(struct dispatch_##name##_s)), dqf, w, initial_state_bits)
 
 /* Used by:
- * - _dispatch_queue_set_target_queue
+ * - _dispatch_lane_set_target_queue
  * - changing dispatch source handlers
  *
  * Tries to prevent concurrent wakeup of an inactive queue by suspending it.
  */
 DISPATCH_ALWAYS_INLINE DISPATCH_WARN_RESULT
 static inline bool
-_dispatch_queue_try_inactive_suspend(dispatch_queue_t dq)
+_dispatch_lane_try_inactive_suspend(dispatch_lane_class_t dqu)
 {
 	uint64_t old_state, new_state;
 
-	(void)os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, relaxed, {
+	(void)os_atomic_rmw_loop2o(dqu._dl, dq_state, old_state, new_state, relaxed, {
 		if (unlikely(!_dq_state_is_inactive(old_state))) {
 			os_atomic_rmw_loop_give_up(return false);
 		}
@@ -1089,7 +1177,7 @@
 		//
 		// We don't want to handle the side suspend count in a codepath that
 		// needs to be fast.
-		DISPATCH_CLIENT_CRASH(dq, "Too many calls to dispatch_suspend() "
+		DISPATCH_CLIENT_CRASH(0, "Too many calls to dispatch_suspend() "
 				"prior to calling dispatch_set_target_queue() "
 				"or dispatch_set_*_handler()");
 	}
@@ -1195,7 +1283,7 @@
 	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, acquire, {
 		new_state = old_state;
 		if (unlikely(_dq_state_is_suspended(old_state))) {
-			os_atomic_rmw_loop_give_up(break);
+			new_state &= ~DISPATCH_QUEUE_ENQUEUED;
 		} else if (unlikely(_dq_state_drain_locked(old_state))) {
 			os_atomic_rmw_loop_give_up(break);
 		} else {
@@ -1206,7 +1294,7 @@
 	if (unlikely(!_dq_state_is_base_wlh(old_state) ||
 			!_dq_state_is_enqueued_on_target(old_state) ||
 			_dq_state_is_enqueued_on_manager(old_state))) {
-#if !__LP64__
+#if DISPATCH_SIZEOF_PTR == 4
 		old_state >>= 32;
 #endif
 		DISPATCH_INTERNAL_CRASH(old_state, "Invalid wlh state");
@@ -1217,38 +1305,6 @@
 			!_dq_state_drain_locked(old_state);
 }
 
-DISPATCH_ALWAYS_INLINE
-static inline void
-_dispatch_queue_mgr_lock(dispatch_queue_t dq)
-{
-	uint64_t old_state, new_state, set_owner_and_set_full_width =
-			_dispatch_lock_value_for_self() | DISPATCH_QUEUE_SERIAL_DRAIN_OWNED;
-
-	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, acquire, {
-		new_state = old_state;
-		if (unlikely(!_dq_state_is_runnable(old_state) ||
-				_dq_state_drain_locked(old_state))) {
-			DISPATCH_INTERNAL_CRASH((uintptr_t)old_state,
-					"Locking the manager should not fail");
-		}
-		new_state &= DISPATCH_QUEUE_DRAIN_PRESERVED_BITS_MASK;
-		new_state |= set_owner_and_set_full_width;
-	});
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline bool
-_dispatch_queue_mgr_unlock(dispatch_queue_t dq)
-{
-	uint64_t old_state, new_state;
-	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, release, {
-		new_state = old_state - DISPATCH_QUEUE_SERIAL_DRAIN_OWNED;
-		new_state &= ~DISPATCH_QUEUE_DRAIN_UNLOCK_MASK;
-		new_state &= ~DISPATCH_QUEUE_MAX_QOS_MASK;
-	});
-	return _dq_state_is_dirty(old_state);
-}
-
 /* Used by _dispatch_barrier_{try,}sync
  *
  * Note, this fails if any of e:1 or dl!=0, but that allows this code to be a
@@ -1262,11 +1318,13 @@
  */
 DISPATCH_ALWAYS_INLINE DISPATCH_WARN_RESULT
 static inline bool
-_dispatch_queue_try_acquire_barrier_sync(dispatch_queue_t dq, uint32_t tid)
+_dispatch_queue_try_acquire_barrier_sync_and_suspend(dispatch_lane_t dq,
+		uint32_t tid, uint64_t suspend_count)
 {
 	uint64_t init  = DISPATCH_QUEUE_STATE_INIT_VALUE(dq->dq_width);
 	uint64_t value = DISPATCH_QUEUE_WIDTH_FULL_BIT | DISPATCH_QUEUE_IN_BARRIER |
-			_dispatch_lock_value_from_tid(tid);
+			_dispatch_lock_value_from_tid(tid) |
+			(suspend_count * DISPATCH_QUEUE_SUSPEND_INTERVAL);
 	uint64_t old_state, new_state;
 
 	return os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, acquire, {
@@ -1278,6 +1336,13 @@
 	});
 }
 
+DISPATCH_ALWAYS_INLINE DISPATCH_WARN_RESULT
+static inline bool
+_dispatch_queue_try_acquire_barrier_sync(dispatch_queue_class_t dq, uint32_t tid)
+{
+	return _dispatch_queue_try_acquire_barrier_sync_and_suspend(dq._dl, tid, 0);
+}
+
 /* Used by _dispatch_sync for root queues and some drain codepaths
  *
  * Root queues have no strict orderning and dispatch_sync() always goes through.
@@ -1288,10 +1353,9 @@
  */
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_queue_reserve_sync_width(dispatch_queue_t dq)
+_dispatch_queue_reserve_sync_width(dispatch_lane_t dq)
 {
-	(void)os_atomic_add2o(dq, dq_state,
-			DISPATCH_QUEUE_WIDTH_INTERVAL, relaxed);
+	os_atomic_add2o(dq, dq_state, DISPATCH_QUEUE_WIDTH_INTERVAL, relaxed);
 }
 
 /* Used by _dispatch_sync on non-serial queues
@@ -1301,7 +1365,7 @@
  */
 DISPATCH_ALWAYS_INLINE DISPATCH_WARN_RESULT
 static inline bool
-_dispatch_queue_try_reserve_sync_width(dispatch_queue_t dq)
+_dispatch_queue_try_reserve_sync_width(dispatch_lane_t dq)
 {
 	uint64_t old_state, new_state;
 
@@ -1323,43 +1387,6 @@
 	});
 }
 
-/* Used by _dispatch_apply_redirect
- *
- * Try to acquire at most da_width and returns what could be acquired,
- * possibly 0
- */
-DISPATCH_ALWAYS_INLINE DISPATCH_WARN_RESULT
-static inline int32_t
-_dispatch_queue_try_reserve_apply_width(dispatch_queue_t dq, int32_t da_width)
-{
-	uint64_t old_state, new_state;
-	int32_t width;
-
-	(void)os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, relaxed, {
-		width = (int32_t)_dq_state_available_width(old_state);
-		if (unlikely(!width)) {
-			os_atomic_rmw_loop_give_up(return 0);
-		}
-		if (width > da_width) {
-			width = da_width;
-		}
-		new_state = old_state + (uint64_t)width * DISPATCH_QUEUE_WIDTH_INTERVAL;
-	});
-	return width;
-}
-
-/* Used by _dispatch_apply_redirect
- *
- * Release width acquired by _dispatch_queue_try_acquire_width
- */
-DISPATCH_ALWAYS_INLINE
-static inline void
-_dispatch_queue_relinquish_width(dispatch_queue_t dq, int32_t da_width)
-{
-	(void)os_atomic_sub2o(dq, dq_state,
-			(uint64_t)da_width * DISPATCH_QUEUE_WIDTH_INTERVAL, relaxed);
-}
-
 /* Used by target-queue recursing code
  *
  * Initial state must be { sc:0, ib:0, qf:0, pb:0, d:0 }
@@ -1367,7 +1394,7 @@
  */
 DISPATCH_ALWAYS_INLINE DISPATCH_WARN_RESULT
 static inline bool
-_dispatch_queue_try_acquire_async(dispatch_queue_t dq)
+_dispatch_queue_try_acquire_async(dispatch_lane_t dq)
 {
 	uint64_t old_state, new_state;
 
@@ -1393,7 +1420,7 @@
  */
 DISPATCH_ALWAYS_INLINE DISPATCH_WARN_RESULT
 static inline bool
-_dispatch_queue_try_upgrade_full_width(dispatch_queue_t dq, uint64_t owned)
+_dispatch_queue_try_upgrade_full_width(dispatch_lane_t dq, uint64_t owned)
 {
 	uint64_t old_state, new_state;
 	uint64_t pending_barrier_width = DISPATCH_QUEUE_PENDING_BARRIER +
@@ -1421,15 +1448,16 @@
  */
 DISPATCH_ALWAYS_INLINE
 static inline uint64_t
-_dispatch_queue_adjust_owned(dispatch_queue_t dq, uint64_t owned,
+_dispatch_queue_adjust_owned(dispatch_queue_class_t dq, uint64_t owned,
 		struct dispatch_object_s *next_dc)
 {
+	uint16_t dq_width = dq._dq->dq_width;
 	uint64_t reservation;
 
-	if (unlikely(dq->dq_width > 1)) {
+	if (unlikely(dq_width > 1)) {
 		if (next_dc && _dispatch_object_is_barrier(next_dc)) {
 			reservation  = DISPATCH_QUEUE_PENDING_BARRIER;
-			reservation += (dq->dq_width - 1) * DISPATCH_QUEUE_WIDTH_INTERVAL;
+			reservation += (dq_width - 1) * DISPATCH_QUEUE_WIDTH_INTERVAL;
 			owned -= reservation;
 		}
 	}
@@ -1454,7 +1482,7 @@
 		new_state  = old_state - owned;
 		new_state &= ~DISPATCH_QUEUE_DRAIN_UNLOCK_MASK;
 		if (unlikely(_dq_state_is_suspended(old_state))) {
-			new_state |= DLOCK_OWNER_MASK;
+			// nothing to do
 		} else if (unlikely(_dq_state_is_dirty(old_state))) {
 			os_atomic_rmw_loop_give_up({
 				// just renew the drain lock with an acquire barrier, to see
@@ -1481,101 +1509,123 @@
 #pragma mark -
 #pragma mark os_mpsc_queue
 
-// type_t * {volatile,const,_Atomic,...} -> type_t *
-// type_t[] -> type_t *
-#define os_unqualified_pointer_type(expr) \
-		__typeof__(__typeof__(*(expr)) *)
+#define _os_mpsc_head(q, _ns, ...)   &(q)->_ns##_head ##__VA_ARGS__
+#define _os_mpsc_tail(q, _ns, ...)   &(q)->_ns##_tail ##__VA_ARGS__
 
-#define os_mpsc_node_type(q, _ns)  \
-		os_unqualified_pointer_type((q)->_ns##_head)
+#define os_mpsc(q, _ns, ...)   (q, _ns, __VA_ARGS__)
+
+#define os_mpsc_node_type(Q) _os_atomic_basetypeof(_os_mpsc_head Q)
 
 //
 // Multi Producer calls, can be used safely concurrently
 //
 
 // Returns true when the queue was empty and the head must be set
-#define os_mpsc_push_update_tail_list(q, _ns, head, tail, _o_next)  ({ \
-		os_mpsc_node_type(q, _ns) _head = (head), _tail = (tail), _prev; \
-		_tail->_o_next = NULL; \
-		_prev = os_atomic_xchg2o((q), _ns##_tail, _tail, release); \
+#define os_mpsc_push_update_tail(Q, tail, _o_next)  ({ \
+		os_mpsc_node_type(Q) _tl = (tail); \
+		os_atomic_store2o(_tl, _o_next, NULL, relaxed); \
+		os_atomic_xchg(_os_mpsc_tail Q, _tl, release); \
+	})
+
+#define os_mpsc_push_was_empty(prev) ((prev) == NULL)
+
+#define os_mpsc_push_update_prev(Q, prev, head, _o_next)  ({ \
+		os_mpsc_node_type(Q) _prev = (prev); \
 		if (likely(_prev)) { \
-			os_atomic_store2o(_prev, _o_next, _head, relaxed); \
+			(void)os_atomic_store2o(_prev, _o_next, (head), relaxed); \
+		} else { \
+			(void)os_atomic_store(_os_mpsc_head Q, (head), relaxed); \
 		} \
-		(_prev == NULL); \
+	})
+
+#define os_mpsc_push_list(Q, head, tail, _o_next)  ({ \
+		os_mpsc_node_type(Q) _token; \
+		_token = os_mpsc_push_update_tail(Q, tail, _o_next); \
+		os_mpsc_push_update_prev(Q, _token, head, _o_next); \
+		os_mpsc_push_was_empty(_token); \
 	})
 
 // Returns true when the queue was empty and the head must be set
-#define os_mpsc_push_update_tail(q, _ns, o, _o_next)  ({ \
-		os_mpsc_node_type(q, _ns) _o = (o); \
-		os_mpsc_push_update_tail_list(q, _ns, _o, _o, _o_next); \
-	})
-
-#define os_mpsc_push_update_head(q, _ns, o)  ({ \
-		os_atomic_store2o((q), _ns##_head, o, relaxed); \
+#define os_mpsc_push_item(Q, tail, _o_next)  ({ \
+		os_mpsc_node_type(Q) _tail = (tail); \
+		os_mpsc_push_list(Q, _tail, _tail, _o_next); \
 	})
 
 //
 // Single Consumer calls, can NOT be used safely concurrently
 //
 
-#define os_mpsc_get_head(q, _ns) \
-		_dispatch_wait_until(os_atomic_load2o(q, _ns##_head, dependency))
+#define os_mpsc_looks_empty(Q) \
+		(os_atomic_load(_os_mpsc_tail Q, relaxed) == NULL)
 
-#define os_mpsc_get_next(_n, _o_next) \
-		_dispatch_wait_until(os_atomic_load2o(_n, _o_next, dependency))
+#define os_mpsc_get_head(Q)  ({ \
+		__typeof__(_os_mpsc_head Q) __n = _os_mpsc_head Q; \
+		os_mpsc_node_type(Q) _node; \
+		_node = os_atomic_load(__n, dependency); \
+		if (unlikely(_node == NULL)) { \
+			_node = _dispatch_wait_for_enqueuer((void **)__n); \
+		} \
+		_node; \
+	})
 
-#define os_mpsc_pop_head(q, _ns, head, _o_next)  ({ \
-		__typeof__(q) _q = (q); \
-		os_mpsc_node_type(_q, _ns) _head = (head), _n; \
+#define os_mpsc_get_next(_n, _o_next)  ({ \
+		__typeof__(_n) __n = (_n); \
+		_os_atomic_basetypeof(&__n->_o_next) _node; \
+		_node = os_atomic_load(&__n->_o_next, dependency); \
+		if (unlikely(_node == NULL)) { \
+			_node = _dispatch_wait_for_enqueuer((void **)&__n->_o_next); \
+		} \
+		_node; \
+	})
+
+#define os_mpsc_pop_head(Q, head, _o_next)  ({ \
+		os_mpsc_node_type(Q) _head = (head), _n; \
 		_n = os_atomic_load2o(_head, _o_next, dependency); \
-		os_atomic_store2o(_q, _ns##_head, _n, relaxed); \
+		os_atomic_store(_os_mpsc_head Q, _n, relaxed); \
 		/* 22708742: set tail to NULL with release, so that NULL write */ \
 		/* to head above doesn't clobber head from concurrent enqueuer */ \
 		if (unlikely(!_n && \
-				!os_atomic_cmpxchg2o(_q, _ns##_tail, _head, NULL, release))) { \
+				!os_atomic_cmpxchg(_os_mpsc_tail Q, _head, NULL, release))) { \
 			_n = os_mpsc_get_next(_head, _o_next); \
-			os_atomic_store2o(_q, _ns##_head, _n, relaxed); \
+			os_atomic_store(_os_mpsc_head Q, _n, relaxed); \
 		} \
 		_n; \
 	})
 
-#define os_mpsc_undo_pop_head(q, _ns, head, next, _o_next)  ({ \
-		__typeof__(q) _q = (q); \
-		os_mpsc_node_type(_q, _ns) _head = (head), _n = (next); \
+#define os_mpsc_undo_pop_list(Q, head, tail, next, _o_next)  ({ \
+		os_mpsc_node_type(Q) _hd = (head), _tl = (tail), _n = (next); \
+		os_atomic_store2o(_tl, _o_next, _n, relaxed); \
 		if (unlikely(!_n && \
-				!os_atomic_cmpxchg2o(_q, _ns##_tail, NULL, _head, relaxed))) { \
-			_n = os_mpsc_get_head(q, _ns); \
-			os_atomic_store2o(_head, _o_next, _n, relaxed); \
+				!os_atomic_cmpxchg(_os_mpsc_tail Q, NULL, _tl, release))) { \
+			_n = os_mpsc_get_head(Q); \
+			os_atomic_store2o(_tl, _o_next, _n, relaxed); \
 		} \
-		os_atomic_store2o(_q, _ns##_head, _head, relaxed); \
+		os_atomic_store(_os_mpsc_head Q, _hd, relaxed); \
 	})
 
-#define os_mpsc_capture_snapshot(q, _ns, tail)  ({ \
-		__typeof__(q) _q = (q); \
-		os_mpsc_node_type(_q, _ns) _head = os_mpsc_get_head(q, _ns); \
-		os_atomic_store2o(_q, _ns##_head, NULL, relaxed); \
+#define os_mpsc_undo_pop_head(Q, head, next, _o_next) ({ \
+		os_mpsc_node_type(Q) _head = (head); \
+		os_mpsc_undo_pop_list(Q, _head, _head, next, _o_next); \
+	})
+
+#define os_mpsc_capture_snapshot(Q, tail)  ({ \
+		os_mpsc_node_type(Q) _head = os_mpsc_get_head(Q); \
+		os_atomic_store(_os_mpsc_head Q, NULL, relaxed); \
 		/* 22708742: set tail to NULL with release, so that NULL write */ \
 		/* to head above doesn't clobber head from concurrent enqueuer */ \
-		*(tail) = os_atomic_xchg2o(_q, _ns##_tail, NULL, release); \
+		*(tail) = os_atomic_xchg(_os_mpsc_tail Q, NULL, release); \
 		_head; \
 	})
 
 #define os_mpsc_pop_snapshot_head(head, tail, _o_next) ({ \
-		os_unqualified_pointer_type(head) _head = (head), _n = NULL; \
-		if (_head != (tail)) { \
-			_n = os_mpsc_get_next(_head, _o_next); \
-		}; \
-		_n; })
+		__typeof__(head) _head = (head), _tail = (tail), _n = NULL; \
+		if (_head != _tail) _n = os_mpsc_get_next(_head, _o_next); \
+		_n; \
+	})
 
-#define os_mpsc_prepend(q, _ns, head, tail, _o_next)  ({ \
-		__typeof__(q) _q = (q); \
-		os_mpsc_node_type(_q, _ns) _head = (head), _tail = (tail), _n; \
-		os_atomic_store2o(_tail, _o_next, NULL, relaxed); \
-		if (unlikely(!os_atomic_cmpxchg2o(_q, _ns##_tail, NULL, _tail, release))) { \
-			_n = os_mpsc_get_head(q, _ns); \
-			os_atomic_store2o(_tail, _o_next, _n, relaxed); \
-		} \
-		os_atomic_store2o(_q, _ns##_head, _head, relaxed); \
+#define os_mpsc_prepend(Q, head, tail, _o_next)  ({ \
+		os_mpsc_node_type(Q) _n = os_atomic_load(_os_mpsc_head Q, relaxed); \
+		os_mpsc_undo_pop_list(Q, head, tail, _n, _o_next); \
 	})
 
 #pragma mark -
@@ -1583,7 +1633,7 @@
 
 DISPATCH_ALWAYS_INLINE
 static inline bool
-_dispatch_queue_sidelock_trylock(dispatch_queue_t dq, dispatch_qos_t qos)
+_dispatch_queue_sidelock_trylock(dispatch_lane_t dq, dispatch_qos_t qos)
 {
 	dispatch_tid owner;
 	if (_dispatch_unfair_lock_trylock(&dq->dq_sidelock, &owner)) {
@@ -1596,14 +1646,14 @@
 
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_queue_sidelock_lock(dispatch_queue_t dq)
+_dispatch_queue_sidelock_lock(dispatch_lane_t dq)
 {
 	return _dispatch_unfair_lock_lock(&dq->dq_sidelock);
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline bool
-_dispatch_queue_sidelock_tryunlock(dispatch_queue_t dq)
+_dispatch_queue_sidelock_tryunlock(dispatch_lane_t dq)
 {
 	if (_dispatch_unfair_lock_tryunlock(&dq->dq_sidelock)) {
 		return true;
@@ -1617,7 +1667,7 @@
 
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_queue_sidelock_unlock(dispatch_queue_t dq)
+_dispatch_queue_sidelock_unlock(dispatch_lane_t dq)
 {
 	if (_dispatch_unfair_lock_unlock_had_failed_trylock(&dq->dq_sidelock)) {
 		// Ensure that the root queue sees that this thread was overridden.
@@ -1638,106 +1688,73 @@
 }
 
 DISPATCH_ALWAYS_INLINE
-static inline void
-_dispatch_queue_set_current(dispatch_queue_t dq)
+static inline dispatch_queue_t
+_dispatch_queue_get_current_or_default(void)
 {
-	_dispatch_thread_setspecific(dispatch_queue_key, dq);
+	int idx = DISPATCH_ROOT_QUEUE_IDX_DEFAULT_QOS_OVERCOMMIT;
+	return _dispatch_queue_get_current() ?: _dispatch_root_queues[idx]._as_dq;
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_queue_set_current(dispatch_queue_class_t dqu)
+{
+	_dispatch_thread_setspecific(dispatch_queue_key, dqu._dq);
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline struct dispatch_object_s*
-_dispatch_queue_head(dispatch_queue_t dq)
+_dispatch_queue_get_head(dispatch_lane_class_t dq)
 {
-	return os_mpsc_get_head(dq, dq_items);
+	return os_mpsc_get_head(os_mpsc(dq._dl, dq_items));
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline struct dispatch_object_s*
-_dispatch_queue_next(dispatch_queue_t dq, struct dispatch_object_s *dc)
+_dispatch_queue_pop_head(dispatch_lane_class_t dq, struct dispatch_object_s *dc)
 {
-	return os_mpsc_pop_head(dq, dq_items, dc, do_next);
+	return os_mpsc_pop_head(os_mpsc(dq._dl, dq_items), dc, do_next);
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline bool
-_dispatch_queue_push_update_tail(dispatch_queue_t dq,
-		struct dispatch_object_s *tail)
+_dispatch_queue_push_item(dispatch_lane_class_t dqu, dispatch_object_t dou)
 {
-	// if we crash here with a value less than 0x1000, then we are
-	// at a known bug in client code. for example, see
-	// _dispatch_queue_dispose or _dispatch_atfork_child
-	return os_mpsc_push_update_tail(dq, dq_items, tail, do_next);
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline bool
-_dispatch_queue_push_update_tail_list(dispatch_queue_t dq,
-		struct dispatch_object_s *head, struct dispatch_object_s *tail)
-{
-	// if we crash here with a value less than 0x1000, then we are
-	// at a known bug in client code. for example, see
-	// _dispatch_queue_dispose or _dispatch_atfork_child
-	return os_mpsc_push_update_tail_list(dq, dq_items, head, tail, do_next);
+	return os_mpsc_push_item(os_mpsc(dqu._dl, dq_items), dou._do, do_next);
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_queue_push_update_head(dispatch_queue_t dq,
-		struct dispatch_object_s *head)
+_dispatch_root_queue_push_inline(dispatch_queue_global_t dq,
+		dispatch_object_t _head, dispatch_object_t _tail, int n)
 {
-	os_mpsc_push_update_head(dq, dq_items, head);
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline void
-_dispatch_root_queue_push_inline(dispatch_queue_t dq, dispatch_object_t _head,
-		dispatch_object_t _tail, int n)
-{
-	struct dispatch_object_s *head = _head._do, *tail = _tail._do;
-	if (unlikely(_dispatch_queue_push_update_tail_list(dq, head, tail))) {
-		_dispatch_queue_push_update_head(dq, head);
-		return _dispatch_global_queue_poke(dq, n, 0);
+	struct dispatch_object_s *hd = _head._do, *tl = _tail._do;
+	if (unlikely(os_mpsc_push_list(os_mpsc(dq, dq_items), hd, tl, do_next))) {
+		return _dispatch_root_queue_poke(dq, n, 0);
 	}
 }
 
-DISPATCH_ALWAYS_INLINE
-static inline void
-_dispatch_queue_push_inline(dispatch_queue_t dq, dispatch_object_t _tail,
-		dispatch_qos_t qos)
-{
-	struct dispatch_object_s *tail = _tail._do;
-	dispatch_wakeup_flags_t flags = 0;
-	// If we are going to call dx_wakeup(), the queue must be retained before
-	// the item we're pushing can be dequeued, which means:
-	// - before we exchange the tail if we may have to override
-	// - before we set the head if we made the queue non empty.
-	// Otherwise, if preempted between one of these and the call to dx_wakeup()
-	// the blocks submitted to the queue may release the last reference to the
-	// queue when invoked by _dispatch_queue_drain. <rdar://problem/6932776>
-	bool overriding = _dispatch_queue_need_override_retain(dq, qos);
-	if (unlikely(_dispatch_queue_push_update_tail(dq, tail))) {
-		if (!overriding) _dispatch_retain_2(dq->_as_os_obj);
-		_dispatch_queue_push_update_head(dq, tail);
-		flags = DISPATCH_WAKEUP_CONSUME_2 | DISPATCH_WAKEUP_MAKE_DIRTY;
-	} else if (overriding) {
-		flags = DISPATCH_WAKEUP_CONSUME_2;
-	} else {
-		return;
-	}
-	return dx_wakeup(dq, qos, flags);
-}
+#include "trace.h"
 
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_queue_push_queue(dispatch_queue_t tq, dispatch_queue_t dq,
+_dispatch_queue_push_queue(dispatch_queue_t tq, dispatch_queue_class_t dq,
 		uint64_t dq_state)
 {
+#if DISPATCH_USE_KEVENT_WORKLOOP
+	if (likely(_dq_state_is_base_wlh(dq_state))) {
+		_dispatch_trace_runtime_event(worker_request, dq._dq, 1);
+		return _dispatch_event_loop_poke((dispatch_wlh_t)dq._dq, dq_state,
+				DISPATCH_EVENT_LOOP_CONSUME_2);
+	}
+#endif // DISPATCH_USE_KEVENT_WORKLOOP
+	_dispatch_trace_item_push(tq, dq);
 	return dx_push(tq, dq, _dq_state_max_qos(dq_state));
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline dispatch_priority_t
-_dispatch_root_queue_identity_assume(dispatch_queue_t assumed_rq)
+_dispatch_root_queue_identity_assume(dispatch_queue_global_t assumed_rq)
 {
 	dispatch_priority_t old_dbp = _dispatch_get_basepri();
 	dispatch_assert(dx_hastypeflag(assumed_rq, QUEUE_ROOT));
@@ -1747,18 +1764,18 @@
 }
 
 typedef dispatch_queue_wakeup_target_t
-_dispatch_queue_class_invoke_handler_t(dispatch_object_t,
+_dispatch_queue_class_invoke_handler_t(dispatch_queue_class_t,
 		dispatch_invoke_context_t dic, dispatch_invoke_flags_t,
 		uint64_t *owned);
 
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_queue_class_invoke(dispatch_object_t dou,
+_dispatch_queue_class_invoke(dispatch_queue_class_t dqu,
 		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags,
 		dispatch_invoke_flags_t const_restrict_flags,
 		_dispatch_queue_class_invoke_handler_t invoke)
 {
-	dispatch_queue_t dq = dou._dq;
+	dispatch_queue_t dq = dqu._dq;
 	dispatch_queue_wakeup_target_t tq = DISPATCH_QUEUE_WAKEUP_NONE;
 	bool owning = !(flags & DISPATCH_INVOKE_STEALING);
 	uint64_t owned = 0;
@@ -1773,6 +1790,7 @@
 
 	if (!(flags & (DISPATCH_INVOKE_STEALING | DISPATCH_INVOKE_WLH))) {
 		dq->do_next = DISPATCH_OBJECT_LISTLESS;
+		_dispatch_trace_item_pop(_dispatch_queue_get_current(), dq);
 	}
 	flags |= const_restrict_flags;
 	if (likely(flags & DISPATCH_INVOKE_WLH)) {
@@ -1787,6 +1805,11 @@
 		} else {
 			old_dbp = 0;
 		}
+		if (flags & DISPATCH_INVOKE_WORKLOOP_DRAIN) {
+			if (unlikely(_dispatch_queue_atomic_flags(dqu) & DQF_MUTABLE)) {
+				_dispatch_queue_atomic_flags_clear(dqu, DQF_MUTABLE);
+			}
+		}
 
 		flags = _dispatch_queue_merge_autorelease_frequency(dq, flags);
 attempt_running_slow_head:
@@ -1833,85 +1856,57 @@
 		}
 	}
 	if (likely(owning)) {
-		_dispatch_introspection_queue_item_complete(dq);
+		_dispatch_trace_item_complete(dq);
 	}
 
 	if (tq) {
-		if (const_restrict_flags & DISPATCH_INVOKE_DISALLOW_SYNC_WAITERS) {
-			dispatch_assert(dic->dic_deferred == NULL);
-		} else if (dic->dic_deferred) {
-			return _dispatch_queue_drain_sync_waiter(dq, dic,
-					flags, owned);
-		}
-
-		uint64_t old_state, new_state, enqueued = DISPATCH_QUEUE_ENQUEUED;
-		if (tq == DISPATCH_QUEUE_WAKEUP_MGR) {
-			enqueued = DISPATCH_QUEUE_ENQUEUED_ON_MGR;
-		}
-		os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, release, {
-			new_state  = old_state - owned;
-			new_state &= ~DISPATCH_QUEUE_DRAIN_UNLOCK_MASK;
-			new_state |= DISPATCH_QUEUE_DIRTY;
-			if (_dq_state_is_suspended(new_state)) {
-				new_state |= DLOCK_OWNER_MASK;
-			} else if (_dq_state_is_runnable(new_state) &&
-					!_dq_state_is_enqueued(new_state)) {
-				// drain was not interupted for suspension
-				// we will reenqueue right away, just put ENQUEUED back
-				new_state |= enqueued;
-			}
-		});
-		old_state -= owned;
-		if (_dq_state_received_override(old_state)) {
-			// Ensure that the root queue sees that this thread was overridden.
-			_dispatch_set_basepri_override_qos(_dq_state_max_qos(new_state));
-		}
-		if ((old_state ^ new_state) & enqueued) {
-			dispatch_assert(_dq_state_is_enqueued(new_state));
-			return _dispatch_queue_push_queue(tq, dq, new_state);
-		}
+		return _dispatch_queue_invoke_finish(dq, dic, tq, owned);
 	}
 
-	_dispatch_release_2_tailcall(dq);
+	return _dispatch_release_2_tailcall(dq);
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline bool
-_dispatch_queue_class_probe(dispatch_queue_class_t dqu)
+_dispatch_queue_class_probe(dispatch_lane_class_t dqu)
 {
 	struct dispatch_object_s *tail;
 	// seq_cst wrt atomic store to dq_state <rdar://problem/14637483>
 	// seq_cst wrt atomic store to dq_flags <rdar://problem/22623242>
-	tail = os_atomic_load2o(dqu._oq, oq_items_tail, ordered);
+	tail = os_atomic_load2o(dqu._dl, dq_items_tail, ordered);
 	return unlikely(tail != NULL);
 }
 
 DISPATCH_ALWAYS_INLINE DISPATCH_CONST
 static inline bool
-_dispatch_is_in_root_queues_array(dispatch_queue_t dq)
+_dispatch_is_in_root_queues_array(dispatch_queue_class_t dqu)
 {
-	return (dq >= _dispatch_root_queues) &&
-			(dq < _dispatch_root_queues + _DISPATCH_ROOT_QUEUE_IDX_COUNT);
+	return (dqu._dgq >= _dispatch_root_queues) &&
+			(dqu._dgq < _dispatch_root_queues + _DISPATCH_ROOT_QUEUE_IDX_COUNT);
 }
 
 DISPATCH_ALWAYS_INLINE DISPATCH_CONST
-static inline dispatch_queue_t
+static inline dispatch_queue_global_t
 _dispatch_get_root_queue(dispatch_qos_t qos, bool overcommit)
 {
-	if (unlikely(qos == DISPATCH_QOS_UNSPECIFIED || qos > DISPATCH_QOS_MAX)) {
+	if (unlikely(qos < DISPATCH_QOS_MIN || qos > DISPATCH_QOS_MAX)) {
 		DISPATCH_CLIENT_CRASH(qos, "Corrupted priority");
 	}
 	return &_dispatch_root_queues[2 * (qos - 1) + overcommit];
 }
 
+#define _dispatch_get_default_queue(overcommit) \
+		_dispatch_root_queues[DISPATCH_ROOT_QUEUE_IDX_DEFAULT_QOS + \
+				!!(overcommit)]._as_dq
+
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_queue_set_bound_thread(dispatch_queue_t dq)
+_dispatch_queue_set_bound_thread(dispatch_queue_class_t dqu)
 {
 	// Tag thread-bound queues with the owning thread
-	dispatch_assert(_dispatch_queue_is_thread_bound(dq));
+	dispatch_assert(_dispatch_queue_is_thread_bound(dqu));
 	uint64_t old_state, new_state;
-	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, relaxed, {
+	os_atomic_rmw_loop2o(dqu._dq, dq_state, old_state, new_state, relaxed, {
 		new_state = old_state;
 		new_state &= ~DISPATCH_QUEUE_DRAIN_OWNER_MASK;
 		new_state |= _dispatch_lock_value_for_self();
@@ -1920,11 +1915,11 @@
 
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_queue_clear_bound_thread(dispatch_queue_t dq)
+_dispatch_queue_clear_bound_thread(dispatch_queue_class_t dqu)
 {
-	dispatch_assert(_dispatch_queue_is_thread_bound(dq));
-	_dispatch_queue_atomic_flags_clear(dq, DQF_THREAD_BOUND|DQF_CANNOT_TRYSYNC);
-	os_atomic_and2o(dq, dq_state, ~DISPATCH_QUEUE_DRAIN_OWNER_MASK, relaxed);
+	dispatch_assert(_dispatch_queue_is_thread_bound(dqu));
+	os_atomic_and2o(dqu._dq, dq_state,
+			~DISPATCH_QUEUE_DRAIN_OWNER_MASK, relaxed);
 }
 
 DISPATCH_ALWAYS_INLINE
@@ -1982,8 +1977,7 @@
 	dispatch_priority_t dbp = _dispatch_get_basepri();
 	dispatch_qos_t qos = _dispatch_priority_qos(dbp);
 	dispatch_qos_t oqos = _dispatch_priority_override_qos(dbp);
-	dispatch_qos_t qos_floor = MAX(qos, oqos);
-	return qos_floor ? qos_floor : DISPATCH_QOS_SATURATED;
+	return MAX(qos, oqos);
 }
 
 DISPATCH_ALWAYS_INLINE
@@ -2019,49 +2013,79 @@
 
 DISPATCH_ALWAYS_INLINE
 static inline dispatch_priority_t
-_dispatch_set_basepri(dispatch_priority_t dbp)
+_dispatch_set_basepri(dispatch_priority_t dq_dbp)
 {
 #if HAVE_PTHREAD_WORKQUEUE_QOS
-	const dispatch_priority_t preserved_mask =
-			DISPATCH_PRIORITY_OVERRIDE_MASK | DISPATCH_PRIORITY_FLAG_OVERCOMMIT;
 	dispatch_priority_t old_dbp = _dispatch_get_basepri();
-	if (old_dbp) {
-		dispatch_priority_t flags, defaultqueue, basepri;
-		flags = (dbp & DISPATCH_PRIORITY_FLAG_DEFAULTQUEUE);
-		defaultqueue = (old_dbp & DISPATCH_PRIORITY_FLAG_DEFAULTQUEUE);
-		basepri = old_dbp & DISPATCH_PRIORITY_REQUESTED_MASK;
-		dbp &= DISPATCH_PRIORITY_REQUESTED_MASK;
-		if (!dbp) {
-			flags = DISPATCH_PRIORITY_FLAG_INHERIT | defaultqueue;
-			dbp = basepri;
-		} else if (dbp < basepri && !defaultqueue) { // rdar://16349734
-			dbp = basepri;
+	dispatch_priority_t dbp = old_dbp;
+
+	if (unlikely(!old_dbp)) {
+		dbp = dq_dbp & ~DISPATCH_PRIORITY_OVERRIDE_MASK;
+	} else if (dq_dbp & DISPATCH_PRIORITY_REQUESTED_MASK) {
+		dbp &= (DISPATCH_PRIORITY_OVERRIDE_MASK |
+				DISPATCH_PRIORITY_FLAG_OVERCOMMIT);
+		dbp |= MAX(old_dbp & DISPATCH_PRIORITY_REQUESTED_MASK,
+				dq_dbp & DISPATCH_PRIORITY_REQUESTED_MASK);
+		if (_dispatch_priority_fallback_qos(dq_dbp) >
+				_dispatch_priority_qos(dbp)) {
+			dq_dbp &= (DISPATCH_PRIORITY_FALLBACK_QOS_MASK |
+					DISPATCH_PRIORITY_FLAG_FALLBACK |
+					DISPATCH_PRIORITY_FLAG_FLOOR);
+		} else {
+			dq_dbp &= DISPATCH_PRIORITY_FLAG_FLOOR;
 		}
-		dbp |= flags | (old_dbp & preserved_mask);
+		dbp |= dq_dbp;
 	} else {
-		dbp &= ~DISPATCH_PRIORITY_OVERRIDE_MASK;
+		if (dbp & DISPATCH_PRIORITY_REQUESTED_MASK) {
+			dbp |= DISPATCH_PRIORITY_FLAG_FLOOR;
+		}
+		if (_dispatch_priority_fallback_qos(dq_dbp) >
+				_dispatch_priority_qos(dbp)) {
+			dbp &= ~DISPATCH_PRIORITY_FALLBACK_QOS_MASK;
+			dbp |= (dq_dbp & (DISPATCH_PRIORITY_FALLBACK_QOS_MASK |
+					DISPATCH_PRIORITY_FLAG_FALLBACK));
+		}
 	}
 	_dispatch_thread_setspecific(dispatch_basepri_key, (void*)(uintptr_t)dbp);
 	return old_dbp;
 #else
-	(void)dbp;
+	(void)dq_dbp;
 	return 0;
 #endif
 }
 
 DISPATCH_ALWAYS_INLINE
-static inline dispatch_priority_t
-_dispatch_set_basepri_wlh(dispatch_priority_t dbp)
+static inline void
+_dispatch_init_basepri(dispatch_priority_t dbp)
 {
 #if HAVE_PTHREAD_WORKQUEUE_QOS
 	dispatch_assert(!_dispatch_get_basepri());
-	// _dispatch_set_basepri_override_qos(DISPATCH_QOS_SATURATED)
-	dbp |= DISPATCH_QOS_SATURATED << DISPATCH_PRIORITY_OVERRIDE_SHIFT;
 	_dispatch_thread_setspecific(dispatch_basepri_key, (void*)(uintptr_t)dbp);
 #else
 	(void)dbp;
 #endif
-	return 0;
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_init_basepri_wlh(dispatch_priority_t dbp)
+{
+#if HAVE_PTHREAD_WORKQUEUE_QOS
+	dispatch_assert(!_dispatch_get_basepri());
+	dbp |= _dispatch_priority_make_override(DISPATCH_QOS_SATURATED);
+	_dispatch_thread_setspecific(dispatch_basepri_key, (void*)(uintptr_t)dbp);
+#else
+	(void)dbp;
+#endif
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_clear_basepri(void)
+{
+#if HAVE_PTHREAD_WORKQUEUE_QOS
+	_dispatch_thread_setspecific(dispatch_basepri_key, (void*)(uintptr_t)0);
+#endif
 }
 
 DISPATCH_ALWAYS_INLINE
@@ -2069,21 +2093,21 @@
 _dispatch_priority_adopt(pthread_priority_t pp, unsigned long flags)
 {
 #if HAVE_PTHREAD_WORKQUEUE_QOS
-	dispatch_priority_t inherited, defaultqueue, dbp = _dispatch_get_basepri();
+	dispatch_priority_t dbp = _dispatch_get_basepri();
 	pthread_priority_t basepp = _dispatch_priority_to_pp_strip_flags(dbp);
+	pthread_priority_t minbasepp = basepp &
+			~(pthread_priority_t)_PTHREAD_PRIORITY_PRIORITY_MASK;
 	bool enforce = (flags & DISPATCH_PRIORITY_ENFORCE) ||
 			(pp & _PTHREAD_PRIORITY_ENFORCE_FLAG);
-	inherited = (dbp & DISPATCH_PRIORITY_FLAG_INHERIT);
-	defaultqueue = (dbp & DISPATCH_PRIORITY_FLAG_DEFAULTQUEUE);
 	pp &= ~_PTHREAD_PRIORITY_FLAGS_MASK;
 
-	if (!pp) {
+	if (unlikely(!pp)) {
+		dispatch_qos_t fallback = _dispatch_priority_fallback_qos(dbp);
+		return fallback ? _dispatch_qos_to_pp(fallback) : basepp;
+	} else if (pp < minbasepp) {
 		return basepp;
-	} else if (defaultqueue) { // rdar://16349734
-		return pp;
-	} else if (pp < basepp) {
-		return basepp;
-	} else if (enforce || inherited) {
+	} else if (enforce || (dbp & (DISPATCH_PRIORITY_FLAG_FLOOR |
+			DISPATCH_PRIORITY_FLAG_FALLBACK))) {
 		return pp;
 	} else {
 		return basepp;
@@ -2095,66 +2119,6 @@
 }
 
 DISPATCH_ALWAYS_INLINE
-static inline void
-_dispatch_queue_priority_inherit_from_target(dispatch_queue_t dq,
-		dispatch_queue_t tq)
-{
-#if HAVE_PTHREAD_WORKQUEUE_QOS
-	const dispatch_priority_t rootqueue_flag = DISPATCH_PRIORITY_FLAG_ROOTQUEUE;
-	const dispatch_priority_t inherited_flag = DISPATCH_PRIORITY_FLAG_INHERIT;
-	const dispatch_priority_t defaultqueue_flag =
-            DISPATCH_PRIORITY_FLAG_DEFAULTQUEUE;
-	dispatch_priority_t pri = dq->dq_priority, tpri = tq->dq_priority;
-
-	if ((!_dispatch_priority_qos(pri) || (pri & inherited_flag)) &&
-			(tpri & rootqueue_flag)) {
-		if (_dispatch_priority_override_qos(pri) == DISPATCH_QOS_SATURATED) {
-			pri &= DISPATCH_PRIORITY_OVERRIDE_MASK;
-		} else {
-			pri = 0;
-		}
-		if (tpri & defaultqueue_flag) {
-			// <rdar://problem/32921639> base queues need to know they target
-			// the default root queue so that _dispatch_queue_override_qos()
-			// in _dispatch_queue_class_wakeup() can fallback to QOS_DEFAULT
-			// if no other priority was provided.
-			pri |= defaultqueue_flag;
-		} else {
-			pri |= (tpri & ~rootqueue_flag) | inherited_flag;
-		}
-		dq->dq_priority = pri;
-	} else if (pri & defaultqueue_flag) {
-		// the DEFAULTQUEUE flag is only set on queues due to the code above,
-		// and must never be kept if we don't target a global root queue.
-		dq->dq_priority = (pri & ~defaultqueue_flag);
-	}
-#else
-	(void)dq; (void)tq;
-#endif
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline dispatch_priority_t
-_dispatch_priority_inherit_from_root_queue(dispatch_priority_t pri,
-		dispatch_queue_t rq)
-{
-#if HAVE_PTHREAD_WORKQUEUE_QOS
-	dispatch_priority_t p = pri & DISPATCH_PRIORITY_REQUESTED_MASK;
-	dispatch_priority_t rqp = rq->dq_priority & DISPATCH_PRIORITY_REQUESTED_MASK;
-	dispatch_priority_t defaultqueue =
-			rq->dq_priority & DISPATCH_PRIORITY_FLAG_DEFAULTQUEUE;
-
-	if (!p || (!defaultqueue && p < rqp)) {
-		p = rqp | defaultqueue;
-	}
-	return p | (rq->dq_priority & DISPATCH_PRIORITY_FLAG_OVERCOMMIT);
-#else
-	(void)rq; (void)pri;
-	return 0;
-#endif
-}
-
-DISPATCH_ALWAYS_INLINE
 static inline pthread_priority_t
 _dispatch_get_priority(void)
 {
@@ -2257,41 +2221,43 @@
 }
 
 DISPATCH_ALWAYS_INLINE
-static inline bool
-_dispatch_queue_need_override(dispatch_queue_class_t dqu, dispatch_qos_t qos)
+static inline dispatch_qos_t
+_dispatch_queue_push_qos(dispatch_queue_class_t dq, dispatch_qos_t qos)
 {
-	uint64_t dq_state = os_atomic_load2o(dqu._dq, dq_state, relaxed);
-	// dq_priority "override qos" contains the priority at which the queue
-	// is already running for thread-bound queues.
-	// For non thread-bound queues, the qos of the queue may not be observed
-	// when the first work item is dispatched synchronously.
-	return _dq_state_max_qos(dq_state) < qos &&
-			_dispatch_priority_override_qos(dqu._dq->dq_priority) < qos;
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline bool
-_dispatch_queue_need_override_retain(dispatch_queue_class_t dqu,
-		dispatch_qos_t qos)
-{
-	if (_dispatch_queue_need_override(dqu, qos)) {
-		_os_object_retain_internal_n_inline(dqu._oq->_as_os_obj, 2);
-		return true;
+	if (qos > _dispatch_priority_qos(dq._dl->dq_priority)) {
+		return qos;
 	}
-	return false;
+	return DISPATCH_QOS_UNSPECIFIED;
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline dispatch_qos_t
-_dispatch_queue_override_qos(dispatch_queue_class_t dqu, dispatch_qos_t qos)
+_dispatch_queue_wakeup_qos(dispatch_queue_class_t dq, dispatch_qos_t qos)
 {
-	if (dqu._oq->oq_priority & DISPATCH_PRIORITY_FLAG_DEFAULTQUEUE) {
-		// queues targeting the default root queue use any asynchronous
-		// workitem priority available and fallback to QOS_DEFAULT otherwise.
-		return qos ? qos : DISPATCH_QOS_DEFAULT;
-	}
+	if (!qos) qos = _dispatch_priority_fallback_qos(dq._dl->dq_priority);
 	// for asynchronous workitems, queue priority is the floor for overrides
-	return MAX(qos, _dispatch_priority_qos(dqu._oq->oq_priority));
+	return MAX(qos, _dispatch_priority_qos(dq._dl->dq_priority));
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline dispatch_qos_t
+_dispatch_queue_max_qos(dispatch_queue_class_t dq)
+{
+	// Note: the non atomic load allows to avoid CAS on 32bit architectures
+	//       which doesn't give us much as the bits we want are in a single byte
+	//       and can't quite be read non atomically. Given that this function is
+	//       called in various critical codepaths (such as _dispatch_lane_push()
+	//       between the tail exchange and updating the `prev` pointer), we care
+	//       deeply about avoiding this.
+	return _dq_state_max_qos((uint64_t)dq._dl->dq_state_bits << 32);
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline bool
+_dispatch_queue_need_override(dispatch_queue_class_t dq, dispatch_qos_t qos)
+{
+	dispatch_qos_t max_qos = _dispatch_queue_max_qos(dq);
+	return max_qos == DISPATCH_QOS_UNSPECIFIED || max_qos < qos;
 }
 
 #define DISPATCH_PRIORITY_PROPAGATE_CURRENT 0x1
@@ -2349,23 +2315,21 @@
 static inline bool
 _dispatch_block_has_private_data(const dispatch_block_t block)
 {
-	extern void (*_dispatch_block_special_invoke)(void*);
 	return (_dispatch_Block_invoke(block) == _dispatch_block_special_invoke);
 }
 
 DISPATCH_ALWAYS_INLINE DISPATCH_WARN_RESULT
 static inline pthread_priority_t
 _dispatch_block_invoke_should_set_priority(dispatch_block_flags_t flags,
-        pthread_priority_t new_pri)
+		pthread_priority_t new_pri)
 {
 	pthread_priority_t old_pri, p = 0;  // 0 means do not change priority.
 	if ((flags & DISPATCH_BLOCK_HAS_PRIORITY)
 			&& ((flags & DISPATCH_BLOCK_ENFORCE_QOS_CLASS) ||
 			!(flags & DISPATCH_BLOCK_INHERIT_QOS_CLASS))) {
-		old_pri = _dispatch_get_priority();
 		new_pri &= ~_PTHREAD_PRIORITY_FLAGS_MASK;
-		p = old_pri & ~_PTHREAD_PRIORITY_FLAGS_MASK;
-		if (!p || p >= new_pri) p = 0;
+		old_pri = _dispatch_get_priority() & ~_PTHREAD_PRIORITY_FLAGS_MASK;
+		if (old_pri && old_pri < new_pri) p = old_pri;
 	}
 	return p;
 }
@@ -2448,6 +2412,11 @@
 	}
 	dc->do_next = prev_dc;
 	dc->dc_cache_cnt = cnt;
+#if DISPATCH_ALLOCATOR
+	// This magical value helps memory tools to recognize continuations on
+	// the various free lists that are really free.
+	dc->dc_flags = (uintptr_t)(void *)&_dispatch_main_heap;
+#endif
 	_dispatch_thread_setspecific(dispatch_cache_key, dc);
 	return NULL;
 }
@@ -2462,8 +2431,6 @@
 	}
 }
 
-#include "trace.h"
-
 DISPATCH_ALWAYS_INLINE
 static inline void
 _dispatch_continuation_with_group_invoke(dispatch_continuation_t dc)
@@ -2472,7 +2439,7 @@
 	unsigned long type = dx_type(dou);
 	if (type == DISPATCH_GROUP_TYPE) {
 		_dispatch_client_callout(dc->dc_ctxt, dc->dc_func);
-		_dispatch_introspection_queue_item_complete(dou);
+		_dispatch_trace_item_complete(dc);
 		dispatch_group_leave((dispatch_group_t)dou);
 	} else {
 		DISPATCH_INTERNAL_CRASH(dx_type(dou), "Unexpected object type");
@@ -2481,8 +2448,8 @@
 
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_continuation_invoke_inline(dispatch_object_t dou, voucher_t ov,
-		dispatch_invoke_flags_t flags)
+_dispatch_continuation_invoke_inline(dispatch_object_t dou,
+		dispatch_invoke_flags_t flags, dispatch_queue_class_t dqu)
 {
 	dispatch_continuation_t dc = dou._dc, dc1;
 	dispatch_invoke_with_autoreleasepool(flags, {
@@ -2493,17 +2460,20 @@
 		// The ccache version is per-thread.
 		// Therefore, the object has not been reused yet.
 		// This generates better assembly.
-		_dispatch_continuation_voucher_adopt(dc, ov, dc_flags);
-		if (dc_flags & DISPATCH_OBJ_CONSUME_BIT) {
+		_dispatch_continuation_voucher_adopt(dc, dc_flags);
+		if (!(dc_flags & DC_FLAG_NO_INTROSPECTION)) {
+			_dispatch_trace_item_pop(dqu, dou);
+		}
+		if (dc_flags & DC_FLAG_CONSUME) {
 			dc1 = _dispatch_continuation_free_cacheonly(dc);
 		} else {
 			dc1 = NULL;
 		}
-		if (unlikely(dc_flags & DISPATCH_OBJ_GROUP_BIT)) {
+		if (unlikely(dc_flags & DC_FLAG_GROUP_ASYNC)) {
 			_dispatch_continuation_with_group_invoke(dc);
 		} else {
 			_dispatch_client_callout(dc->dc_ctxt, dc->dc_func);
-			_dispatch_introspection_queue_item_complete(dou);
+			_dispatch_trace_item_complete(dc);
 		}
 		if (unlikely(dc1)) {
 			_dispatch_continuation_free_to_cache_limit(dc1);
@@ -2516,147 +2486,131 @@
 static inline void
 _dispatch_continuation_pop_inline(dispatch_object_t dou,
 		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags,
-		dispatch_queue_t dq)
+		dispatch_queue_class_t dqu)
 {
 	dispatch_pthread_root_queue_observer_hooks_t observer_hooks =
 			_dispatch_get_pthread_root_queue_observer_hooks();
-	if (observer_hooks) observer_hooks->queue_will_execute(dq);
-	_dispatch_trace_continuation_pop(dq, dou);
+	if (observer_hooks) observer_hooks->queue_will_execute(dqu._dq);
 	flags &= _DISPATCH_INVOKE_PROPAGATE_MASK;
 	if (_dispatch_object_has_vtable(dou)) {
-		dx_invoke(dou._do, dic, flags);
+		dx_invoke(dou._dq, dic, flags);
 	} else {
-		_dispatch_continuation_invoke_inline(dou, DISPATCH_NO_VOUCHER, flags);
+		_dispatch_continuation_invoke_inline(dou, flags, dqu);
 	}
-	if (observer_hooks) observer_hooks->queue_did_execute(dq);
+	if (observer_hooks) observer_hooks->queue_did_execute(dqu._dq);
 }
 
 // used to forward the do_invoke of a continuation with a vtable to its real
 // implementation.
-#define _dispatch_continuation_pop_forwarded(dc, ov, dc_flags, ...) \
+#define _dispatch_continuation_pop_forwarded(dc, dc_flags, dq, ...) \
 	({ \
 		dispatch_continuation_t _dc = (dc), _dc1; \
 		uintptr_t _dc_flags = (dc_flags); \
-		_dispatch_continuation_voucher_adopt(_dc, ov, _dc_flags); \
-		if (_dc_flags & DISPATCH_OBJ_CONSUME_BIT) { \
+		_dispatch_continuation_voucher_adopt(_dc, _dc_flags); \
+		if (!(_dc_flags & DC_FLAG_NO_INTROSPECTION)) { \
+			_dispatch_trace_item_pop(dq, dc); \
+		} \
+		if (_dc_flags & DC_FLAG_CONSUME) { \
 			_dc1 = _dispatch_continuation_free_cacheonly(_dc); \
 		} else { \
 			_dc1 = NULL; \
 		} \
 		__VA_ARGS__; \
-		_dispatch_introspection_queue_item_complete(_dc); \
+		if (!(_dc_flags & DC_FLAG_NO_INTROSPECTION)) { \
+			_dispatch_trace_item_complete(_dc); \
+		} \
 		if (unlikely(_dc1)) { \
 			_dispatch_continuation_free_to_cache_limit(_dc1); \
 		} \
 	})
 
 DISPATCH_ALWAYS_INLINE
-static inline void
+static inline dispatch_qos_t
 _dispatch_continuation_priority_set(dispatch_continuation_t dc,
+		dispatch_queue_class_t dqu,
 		pthread_priority_t pp, dispatch_block_flags_t flags)
 {
+	dispatch_qos_t qos = DISPATCH_QOS_UNSPECIFIED;
 #if HAVE_PTHREAD_WORKQUEUE_QOS
-	if (likely(!(flags & DISPATCH_BLOCK_HAS_PRIORITY))) {
-		pp = _dispatch_priority_propagate();
-	}
-	if (flags & DISPATCH_BLOCK_ENFORCE_QOS_CLASS) {
-		pp |= _PTHREAD_PRIORITY_ENFORCE_FLAG;
+	dispatch_queue_t dq = dqu._dq;
+
+	if (likely(pp)) {
+		bool enforce = (flags & DISPATCH_BLOCK_ENFORCE_QOS_CLASS);
+		bool is_floor = (dq->dq_priority & DISPATCH_PRIORITY_FLAG_FLOOR);
+		bool dq_has_qos = (dq->dq_priority & DISPATCH_PRIORITY_REQUESTED_MASK);
+		if (enforce) {
+			pp |= _PTHREAD_PRIORITY_ENFORCE_FLAG;
+			qos = _dispatch_qos_from_pp_unsafe(pp);
+		} else if (!is_floor && dq_has_qos) {
+			pp = 0;
+		} else {
+			qos = _dispatch_qos_from_pp_unsafe(pp);
+		}
 	}
 	dc->dc_priority = pp;
 #else
-	(void)dc; (void)pp; (void)flags;
+	(void)dc; (void)dqu; (void)pp; (void)flags;
 #endif
+	return qos;
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline dispatch_qos_t
-_dispatch_continuation_override_qos(dispatch_queue_t dq,
-		dispatch_continuation_t dc)
-{
-#if HAVE_PTHREAD_WORKQUEUE_QOS
-	dispatch_qos_t dc_qos = _dispatch_qos_from_pp(dc->dc_priority);
-	bool enforce = dc->dc_priority & _PTHREAD_PRIORITY_ENFORCE_FLAG;
-	dispatch_qos_t dq_qos = _dispatch_priority_qos(dq->dq_priority);
-	bool defaultqueue = dq->dq_priority & DISPATCH_PRIORITY_FLAG_DEFAULTQUEUE;
-
-	dispatch_assert(dc->dc_priority != DISPATCH_NO_PRIORITY);
-	if (dc_qos && (enforce || !dq_qos || defaultqueue)) {
-		return dc_qos;
-	}
-	return dq_qos;
-#else
-	(void)dq; (void)dc;
-	return 0;
-#endif
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline void
 _dispatch_continuation_init_f(dispatch_continuation_t dc,
-		dispatch_queue_class_t dqu, void *ctxt, dispatch_function_t func,
-		pthread_priority_t pp, dispatch_block_flags_t flags, uintptr_t dc_flags)
+		dispatch_queue_class_t dqu, void *ctxt, dispatch_function_t f,
+		dispatch_block_flags_t flags, uintptr_t dc_flags)
 {
-	dc->dc_flags = dc_flags;
-	dc->dc_func = func;
+	pthread_priority_t pp = 0;
+	dc->dc_flags = dc_flags | DC_FLAG_ALLOCATED;
+	dc->dc_func = f;
 	dc->dc_ctxt = ctxt;
-	_dispatch_continuation_voucher_set(dc, dqu, flags);
-	_dispatch_continuation_priority_set(dc, pp, flags);
+	// in this context DISPATCH_BLOCK_HAS_PRIORITY means that the priority
+	// should not be propagated, only taken from the handler if it has one
+	if (!(flags & DISPATCH_BLOCK_HAS_PRIORITY)) {
+		pp = _dispatch_priority_propagate();
+	}
+	_dispatch_continuation_voucher_set(dc, flags);
+	return _dispatch_continuation_priority_set(dc, dqu, pp, flags);
 }
 
 DISPATCH_ALWAYS_INLINE
-static inline void
+static inline dispatch_qos_t
 _dispatch_continuation_init(dispatch_continuation_t dc,
 		dispatch_queue_class_t dqu, dispatch_block_t work,
-		pthread_priority_t pp, dispatch_block_flags_t flags, uintptr_t dc_flags)
+		dispatch_block_flags_t flags, uintptr_t dc_flags)
 {
-	dc->dc_flags = dc_flags | DISPATCH_OBJ_BLOCK_BIT;
-	dc->dc_ctxt = _dispatch_Block_copy(work);
-	_dispatch_continuation_priority_set(dc, pp, flags);
+	void *ctxt = _dispatch_Block_copy(work);
 
+	dc_flags |= DC_FLAG_BLOCK | DC_FLAG_ALLOCATED;
 	if (unlikely(_dispatch_block_has_private_data(work))) {
-		// always sets dc_func & dc_voucher
-		// may update dc_priority & do_vtable
+		dc->dc_flags = dc_flags;
+		dc->dc_ctxt = ctxt;
+		// will initialize all fields but requires dc_flags & dc_ctxt to be set
 		return _dispatch_continuation_init_slow(dc, dqu, flags);
 	}
 
-	if (dc_flags & DISPATCH_OBJ_CONSUME_BIT) {
-		dc->dc_func = _dispatch_call_block_and_release;
-	} else {
-		dc->dc_func = _dispatch_Block_invoke(work);
+	dispatch_function_t func = _dispatch_Block_invoke(work);
+	if (dc_flags & DC_FLAG_CONSUME) {
+		func = _dispatch_call_block_and_release;
 	}
-	_dispatch_continuation_voucher_set(dc, dqu, flags);
+	return _dispatch_continuation_init_f(dc, dqu, ctxt, func, flags, dc_flags);
 }
 
-#if HAVE_MACH
-#pragma mark dispatch_mach_reply_refs_t
-
-// assumes low bit of mach port names is always set
-#define DISPATCH_MACH_REPLY_PORT_UNOWNED 0x1u
-
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_mach_reply_mark_reply_port_owned(dispatch_mach_reply_refs_t dmr)
+_dispatch_continuation_async(dispatch_queue_class_t dqu,
+		dispatch_continuation_t dc, dispatch_qos_t qos, uintptr_t dc_flags)
 {
-	dmr->du_ident &= ~DISPATCH_MACH_REPLY_PORT_UNOWNED;
+#if DISPATCH_INTROSPECTION
+	if (!(dc_flags & DC_FLAG_NO_INTROSPECTION)) {
+		_dispatch_trace_item_push(dqu, dc);
+	}
+#else
+	(void)dc_flags;
+#endif
+	return dx_push(dqu._dq, dc, qos);
 }
 
-DISPATCH_ALWAYS_INLINE
-static inline bool
-_dispatch_mach_reply_is_reply_port_owned(dispatch_mach_reply_refs_t dmr)
-{
-	mach_port_t reply_port = (mach_port_t)dmr->du_ident;
-	return reply_port ? !(reply_port & DISPATCH_MACH_REPLY_PORT_UNOWNED) :false;
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline mach_port_t
-_dispatch_mach_reply_get_reply_port(mach_port_t reply_port)
-{
-	return reply_port ? (reply_port | DISPATCH_MACH_REPLY_PORT_UNOWNED) : 0;
-}
-
-#endif // HAVE_MACH
-
 #endif // DISPATCH_PURE_C
 
 #endif /* __DISPATCH_INLINE_INTERNAL__ */
diff --git a/src/internal.h b/src/internal.h
index 43aeb9f..92142fb 100644
--- a/src/internal.h
+++ b/src/internal.h
@@ -40,34 +40,7 @@
 #include <Availability.h>
 #include <os/availability.h>
 #include <TargetConditionals.h>
-
-#ifndef TARGET_OS_MAC_DESKTOP
-#define TARGET_OS_MAC_DESKTOP  (TARGET_OS_MAC && \
-		!TARGET_OS_SIMULATOR && !TARGET_OS_IPHONE && !TARGET_OS_EMBEDDED)
-#endif
-
-#if TARGET_OS_MAC_DESKTOP
-#  define DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(x) \
-		(__MAC_OS_X_VERSION_MIN_REQUIRED >= (x))
-#  if !DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(101200)
-#    error "OS X hosts older than OS X 10.12 aren't supported anymore"
-#  endif // !DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(101200)
-#elif TARGET_OS_SIMULATOR
-#  define DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(x) \
-		(IPHONE_SIMULATOR_HOST_MIN_VERSION_REQUIRED >= (x))
-#  if !DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(101200)
-#    error "Simulator hosts older than OS X 10.12 aren't supported anymore"
-#  endif // !DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(101200)
-#else
-#  define DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(x) 1
-#  if __IPHONE_OS_VERSION_MIN_REQUIRED < 90000
-#    error "iOS hosts older than iOS 9.0 aren't supported anymore"
-#  endif
-#endif
-
-#else // !__APPLE__
-#define DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(x) 0
-#endif // !__APPLE__
+#endif // __APPLE__
 
 
 #if !defined(DISPATCH_MACH_SPI) && TARGET_OS_MAC
@@ -138,6 +111,91 @@
 #endif
 #endif
 
+#ifndef DISPATCH_STATIC_GLOBAL
+#define DISPATCH_STATIC_GLOBAL(declaration) static declaration
+#endif
+#ifndef DISPATCH_GLOBAL
+#define DISPATCH_GLOBAL(declaration) declaration
+#endif
+#ifndef DISPATCH_GLOBAL_INIT
+#define DISPATCH_GLOBAL_INIT(declaration, ...) 	declaration = __VA_ARGS__
+#endif
+
+#if defined(__OBJC__) || defined(__cplusplus)
+#define DISPATCH_PURE_C 0
+#else
+#define DISPATCH_PURE_C 1
+#endif
+
+#ifdef __OBJC__
+@protocol OS_dispatch_queue;
+#endif
+
+// Lane cluster class: type for all the queues that have a single head/tail pair
+typedef union {
+	struct dispatch_lane_s *_dl;
+	struct dispatch_queue_static_s *_dsq;
+	struct dispatch_queue_global_s *_dgq;
+	struct dispatch_queue_pthread_root_s *_dpq;
+	struct dispatch_source_s *_ds;
+	struct dispatch_mach_s *_dm;
+#ifdef __OBJC__
+	id<OS_dispatch_queue> _objc_dq; // unsafe cast for the sake of object.m
+#endif
+} dispatch_lane_class_t DISPATCH_TRANSPARENT_UNION;
+
+// Dispatch queue cluster class: type for any dispatch_queue_t
+typedef union {
+	struct dispatch_queue_s *_dq;
+	struct dispatch_workloop_s *_dwl;
+	struct dispatch_lane_s *_dl;
+	struct dispatch_queue_static_s *_dsq;
+	struct dispatch_queue_global_s *_dgq;
+	struct dispatch_queue_pthread_root_s *_dpq;
+	struct dispatch_source_s *_ds;
+	struct dispatch_mach_s *_dm;
+	dispatch_lane_class_t _dlu;
+#ifdef __OBJC__
+	id<OS_dispatch_queue> _objc_dq;
+#endif
+} dispatch_queue_class_t DISPATCH_TRANSPARENT_UNION;
+
+#ifndef __OBJC__
+typedef union {
+	struct _os_object_s *_os_obj;
+	struct dispatch_object_s *_do;
+	struct dispatch_queue_s *_dq;
+	struct dispatch_queue_attr_s *_dqa;
+	struct dispatch_group_s *_dg;
+	struct dispatch_source_s *_ds;
+	struct dispatch_mach_s *_dm;
+	struct dispatch_mach_msg_s *_dmsg;
+	struct dispatch_semaphore_s *_dsema;
+	struct dispatch_data_s *_ddata;
+	struct dispatch_io_s *_dchannel;
+
+	struct dispatch_continuation_s *_dc;
+	struct dispatch_sync_context_s *_dsc;
+	struct dispatch_operation_s *_doperation;
+	struct dispatch_disk_s *_ddisk;
+	struct dispatch_workloop_s *_dwl;
+	struct dispatch_lane_s *_dl;
+	struct dispatch_queue_static_s *_dsq;
+	struct dispatch_queue_global_s *_dgq;
+	struct dispatch_queue_pthread_root_s *_dpq;
+	dispatch_queue_class_t _dqu;
+	dispatch_lane_class_t _dlu;
+	uintptr_t _do_value;
+} dispatch_object_t DISPATCH_TRANSPARENT_UNION;
+
+DISPATCH_ALWAYS_INLINE
+static inline dispatch_object_t
+upcast(dispatch_object_t dou)
+{
+	return dou;
+}
+#endif // __OBJC__
+
 #include <os/object.h>
 #include <dispatch/time.h>
 #include <dispatch/object.h>
@@ -150,18 +208,13 @@
 #include <dispatch/data.h>
 #include <dispatch/io.h>
 
-#if defined(__OBJC__) || defined(__cplusplus)
-#define DISPATCH_PURE_C 0
-#else
-#define DISPATCH_PURE_C 1
-#endif
-
 /* private.h must be included last to avoid picking up installed headers. */
 #if !defined(_WIN32)
 #include <pthread.h>
 #endif
 #include "os/object_private.h"
 #include "queue_private.h"
+#include "workloop_private.h"
 #include "source_private.h"
 #include "mach_private.h"
 #include "data_private.h"
@@ -172,46 +225,6 @@
 #include "benchmark.h"
 #include "private.h"
 
-/* SPI for Libsystem-internal use */
-DISPATCH_EXPORT DISPATCH_NOTHROW void libdispatch_init(void);
-#if !defined(_WIN32)
-DISPATCH_EXPORT DISPATCH_NOTHROW void dispatch_atfork_prepare(void);
-DISPATCH_EXPORT DISPATCH_NOTHROW void dispatch_atfork_parent(void);
-DISPATCH_EXPORT DISPATCH_NOTHROW void dispatch_atfork_child(void);
-#endif
-
-/* More #includes at EOF (dependent on the contents of internal.h) ... */
-
-// Abort on uncaught exceptions thrown from client callouts rdar://8577499
-#if !defined(DISPATCH_USE_CLIENT_CALLOUT)
-#define DISPATCH_USE_CLIENT_CALLOUT 1
-#endif
-
-#define DISPATCH_ALLOW_NON_LEAF_RETARGET 1
-
-/* The "_debug" library build */
-#ifndef DISPATCH_DEBUG
-#define DISPATCH_DEBUG 0
-#endif
-
-#ifndef DISPATCH_PROFILE
-#define DISPATCH_PROFILE 0
-#endif
-
-#if (!TARGET_OS_EMBEDDED || DISPATCH_DEBUG || DISPATCH_PROFILE) && \
-		!defined(DISPATCH_USE_DTRACE)
-#define DISPATCH_USE_DTRACE 1
-#endif
-
-#if DISPATCH_USE_DTRACE && (DISPATCH_INTROSPECTION || DISPATCH_DEBUG || \
-		DISPATCH_PROFILE) && !defined(DISPATCH_USE_DTRACE_INTROSPECTION)
-#define DISPATCH_USE_DTRACE_INTROSPECTION 1
-#endif
-
-#ifndef DISPATCH_DEBUG_QOS
-#define DISPATCH_DEBUG_QOS DISPATCH_DEBUG
-#endif
-
 #if HAVE_LIBKERN_OSCROSSENDIAN_H
 #include <libkern/OSCrossEndian.h>
 #endif
@@ -236,7 +249,17 @@
 #include <mach/notify.h>
 #include <mach/mach_vm.h>
 #include <mach/vm_map.h>
+#if __has_include(<mach/mach_sync_ipc.h>)
+#include <mach/mach_sync_ipc.h>
+#endif
 #endif /* HAVE_MACH */
+#if __has_include(<os/reason_private.h>)
+#define HAVE_OS_FAULT_WITH_PAYLOAD 1
+#include <os/reason_private.h>
+#include <os/variant_private.h>
+#else
+#define HAVE_OS_FAULT_WITH_PAYLOAD 0
+#endif
 #if HAVE_MALLOC_MALLOC_H
 #include <malloc/malloc.h>
 #endif
@@ -246,12 +269,12 @@
 #if defined(_WIN32)
 #include <time.h>
 #else
-#include <sys/queue.h>
 #include <sys/mount.h>
 #ifdef __ANDROID__
 #include <linux/sysctl.h>
 #else
 #include <sys/sysctl.h>
+#include <sys/queue.h>
 #endif /* __ANDROID__ */
 #include <sys/socket.h>
 #include <sys/time.h>
@@ -283,9 +306,6 @@
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdio.h>
-#if defined(_WIN32)
-#define _CRT_RAND_S
-#endif
 #include <stdlib.h>
 #include <string.h>
 #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
@@ -297,14 +317,56 @@
 #endif
 #include <inttypes.h>
 
+/* More #includes at EOF (dependent on the contents of internal.h) ... */
+
+__BEGIN_DECLS
+
+/* SPI for Libsystem-internal use */
+DISPATCH_EXPORT DISPATCH_NOTHROW void libdispatch_init(void);
+#if !defined(_WIN32)
+DISPATCH_EXPORT DISPATCH_NOTHROW void dispatch_atfork_prepare(void);
+DISPATCH_EXPORT DISPATCH_NOTHROW void dispatch_atfork_parent(void);
+DISPATCH_EXPORT DISPATCH_NOTHROW void dispatch_atfork_child(void);
+#endif
+
+// Abort on uncaught exceptions thrown from client callouts rdar://8577499
+#if !defined(DISPATCH_USE_CLIENT_CALLOUT)
+#define DISPATCH_USE_CLIENT_CALLOUT 1
+#endif
+
+#define DISPATCH_ALLOW_NON_LEAF_RETARGET 1
+
+/* The "_debug" library build */
+#ifndef DISPATCH_DEBUG
+#define DISPATCH_DEBUG 0
+#endif
+
+#ifndef DISPATCH_PROFILE
+#define DISPATCH_PROFILE 0
+#endif
+
+#if (TARGET_OS_OSX || DISPATCH_DEBUG || DISPATCH_PROFILE) && \
+		!defined(DISPATCH_USE_DTRACE)
+#define DISPATCH_USE_DTRACE 1
+#endif
+
+#if DISPATCH_USE_DTRACE && (DISPATCH_INTROSPECTION || DISPATCH_DEBUG || \
+		DISPATCH_PROFILE) && !defined(DISPATCH_USE_DTRACE_INTROSPECTION)
+#define DISPATCH_USE_DTRACE_INTROSPECTION 1
+#endif
+
+#ifndef DISPATCH_DEBUG_QOS
+#define DISPATCH_DEBUG_QOS DISPATCH_DEBUG
+#endif
+
 #if defined(__GNUC__) || defined(__clang__)
 #define DISPATCH_NOINLINE __attribute__((__noinline__))
 #define DISPATCH_USED __attribute__((__used__))
 #define DISPATCH_UNUSED __attribute__((__unused__))
 #define DISPATCH_WEAK __attribute__((__weak__))
 #define DISPATCH_OVERLOADABLE __attribute__((__overloadable__))
-#define DISPATCH_PACKED __attribute__((__packed__))
 #if DISPATCH_DEBUG
+#define DISPATCH_PACKED __attribute__((__packed__))
 #define DISPATCH_ALWAYS_INLINE_NDEBUG
 #else
 #define DISPATCH_ALWAYS_INLINE_NDEBUG __attribute__((__always_inline__))
@@ -359,33 +421,36 @@
 #define USEC_PER_SEC 1000000ull
 #define NSEC_PER_USEC 1000ull
 
-/* I wish we had __builtin_expect_range() */
 #if __GNUC__
-#define _safe_cast_to_long(x) \
-		({ _Static_assert(sizeof(__typeof__(x)) <= sizeof(long), \
-				"__builtin_expect doesn't support types wider than long"); \
-				(long)(x); })
-#define fastpath(x) ((__typeof__(x))__builtin_expect(_safe_cast_to_long(x), ~0l))
-#define slowpath(x) ((__typeof__(x))__builtin_expect(_safe_cast_to_long(x), 0l))
 #define likely(x) __builtin_expect(!!(x), 1)
 #define unlikely(x) __builtin_expect(!!(x), 0)
 #else
-#define fastpath(x) (x)
-#define slowpath(x) (x)
 #define likely(x) (!!(x))
 #define unlikely(x) (!!(x))
 #endif // __GNUC__
 
+#define _LIST_IS_ENQUEUED(elm, field) \
+		((elm)->field.le_prev != NULL)
+#define _LIST_MARK_NOT_ENQUEUED(elm, field) \
+		((void)((elm)->field.le_prev = NULL))
 #define _TAILQ_IS_ENQUEUED(elm, field) \
 		((elm)->field.tqe_prev != NULL)
 #define _TAILQ_MARK_NOT_ENQUEUED(elm, field) \
-		do { (elm)->field.tqe_prev = NULL; } while (0)
+		((void)((elm)->field.tqe_prev = NULL))
 
+#if DISPATCH_DEBUG
 // sys/queue.h debugging
-#ifndef TRASHIT
+#undef TRASHIT
 #define TRASHIT(x) do {(x) = (void *)-1;} while (0)
+#else // DISPATCH_DEBUG
+#ifndef TRASHIT
+#define TRASHIT(x)
 #endif
-
+#endif // DISPATCH_DEBUG
+#define _LIST_TRASH_ENTRY(elm, field) do { \
+			TRASHIT((elm)->field.le_next); \
+			TRASHIT((elm)->field.le_prev); \
+		} while (0)
 #define _TAILQ_TRASH_ENTRY(elm, field) do { \
 			TRASHIT((elm)->field.tqe_next); \
 			TRASHIT((elm)->field.tqe_prev); \
@@ -395,23 +460,32 @@
 			TRASHIT((head)->tqh_last); \
 		} while (0)
 
-DISPATCH_EXPORT DISPATCH_NOINLINE
+#define DISPATCH_MODE_STRICT    (1U << 0)
+#define DISPATCH_MODE_NO_FAULTS (1U << 1)
+extern uint8_t _dispatch_mode;
+
+DISPATCH_EXPORT DISPATCH_NOINLINE DISPATCH_COLD
 void _dispatch_bug(size_t line, long val);
 
-DISPATCH_NOINLINE
-void _dispatch_bug_client(const char* msg);
 #if HAVE_MACH
-DISPATCH_NOINLINE
+DISPATCH_NOINLINE DISPATCH_COLD
 void _dispatch_bug_mach_client(const char *msg, mach_msg_return_t kr);
 #endif // HAVE_MACH
-DISPATCH_NOINLINE
-void _dispatch_bug_kevent_client(const char* msg, const char* filter,
-		const char *operation, int err);
 
-DISPATCH_NOINLINE
+struct dispatch_unote_class_s;
+
+DISPATCH_NOINLINE DISPATCH_COLD
+void _dispatch_bug_kevent_client(const char *msg, const char *filter,
+		const char *operation, int err, uint64_t ident, uint64_t udata,
+		struct dispatch_unote_class_s *du);
+
+DISPATCH_NOINLINE DISPATCH_COLD
+void _dispatch_bug_kevent_vanished(struct dispatch_unote_class_s *du);
+
+DISPATCH_NOINLINE DISPATCH_COLD
 void _dispatch_bug_deprecated(const char *msg);
 
-DISPATCH_NOINLINE DISPATCH_NORETURN
+DISPATCH_NOINLINE DISPATCH_NORETURN DISPATCH_COLD
 void _dispatch_abort(size_t line, long val);
 
 #if !defined(DISPATCH_USE_OS_DEBUG_LOG) && DISPATCH_DEBUG
@@ -432,10 +506,21 @@
 #include <syslog.h>
 #endif
 
+#define DISPATCH_BAD_INPUT		((void *_Nonnull)0)
+#define DISPATCH_OUT_OF_MEMORY	((void *_Nonnull)0)
+
+#if __has_attribute(diagnose_if)
+#define DISPATCH_STATIC_ASSERT_IF(e) \
+		__attribute__((diagnose_if(e, "Assertion failed", "error")))
+#else
+#define DISPATCH_STATIC_ASSERT_IF(e)
+#endif // __has_attribute(diagnose_if)
+
 #if DISPATCH_USE_OS_DEBUG_LOG
 #define _dispatch_log(msg, ...) os_debug_log("libdispatch", msg, ## __VA_ARGS__)
 #else
-DISPATCH_EXPORT DISPATCH_NOINLINE __attribute__((__format__(__printf__,1,2)))
+DISPATCH_EXPORT DISPATCH_NOINLINE DISPATCH_COLD
+__attribute__((__format__(__printf__,1,2)))
 void _dispatch_log(const char *msg, ...);
 #endif // DISPATCH_USE_OS_DEBUG_LOG
 
@@ -443,64 +528,41 @@
 		({ size_t _siz = siz; int _r = snprintf(buf, _siz, __VA_ARGS__); \
 		 _r < 0 ? 0u : ((size_t)_r > _siz ? _siz : (size_t)_r); })
 
-#if __GNUC__
-#define dispatch_static_assert(e) ({ \
-		char __compile_time_assert__[(bool)(e) ? 1 : -1] DISPATCH_UNUSED; \
-	})
+#if __has_feature(c_static_assert) || __STDC_VERSION__ >= 201112L
+#define _dispatch_static_assert(e, s, ...) _Static_assert(e, s)
 #else
-#define dispatch_static_assert(e)
+#define _dispatch_static_assert(e, s, ...)
 #endif
+#define dispatch_static_assert(e, ...) \
+		_dispatch_static_assert(e, ##__VA_ARGS__, #e)
 
-#define DISPATCH_BAD_INPUT		((void *_Nonnull)0)
-#define DISPATCH_OUT_OF_MEMORY	((void *_Nonnull)0)
+#define dispatch_assert_aliases(t1, t2, f) \
+		dispatch_static_assert(offsetof(struct t1,f) == offsetof(struct t2,f), \
+				#t1 "::" #f " and " #t2 "::" #f " should alias")
 
 /*
  * For reporting bugs within libdispatch when using the "_debug" version of the
  * library.
  */
-#if __APPLE__
-#define dispatch_assert(e) do { \
-		if (__builtin_constant_p(e)) { \
-			dispatch_static_assert(e); \
-		} else { \
-			__typeof__(e) _e = (e); /* always eval 'e' */ \
-			if (unlikely(DISPATCH_DEBUG && !_e)) { \
-				_dispatch_abort(__LINE__, (long)_e); \
-			} \
-		} \
-	} while (0)
-#else
+DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_assert(long e, size_t line)
+_dispatch_assert(long e, size_t line) DISPATCH_STATIC_ASSERT_IF(!e)
 {
-	if (DISPATCH_DEBUG && !e) _dispatch_abort(line, e);
+	if (unlikely(DISPATCH_DEBUG && !e)) _dispatch_abort(line, e);
 }
 #define dispatch_assert(e) _dispatch_assert((long)(e), __LINE__)
-#endif	/* __GNUC__ */
 
-#if __APPLE__
 /*
  * A lot of API return zero upon success and not-zero on fail. Let's capture
  * and log the non-zero value
  */
-#define dispatch_assert_zero(e) do { \
-		if (__builtin_constant_p(e)) { \
-			dispatch_static_assert(e); \
-		} else { \
-			__typeof__(e) _e = (e); /* always eval 'e' */ \
-			if (unlikely(DISPATCH_DEBUG && _e)) { \
-				_dispatch_abort(__LINE__, (long)_e); \
-			} \
-		} \
-	} while (0)
-#else
+DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_assert_zero(long e, size_t line)
+_dispatch_assert_zero(long e, size_t line) DISPATCH_STATIC_ASSERT_IF(e)
 {
-	if (DISPATCH_DEBUG && e) _dispatch_abort(line, e);
+	if (unlikely(DISPATCH_DEBUG && e)) _dispatch_abort(line, e);
 }
 #define dispatch_assert_zero(e) _dispatch_assert_zero((long)(e), __LINE__)
-#endif	/* __GNUC__ */
 
 /*
  * For reporting bugs or impedance mismatches between libdispatch and external
@@ -508,76 +570,27 @@
  *
  * In particular, we wrap all system-calls with assume() macros.
  */
-#if __GNUC__
-#define dispatch_assume(e) ({ \
-		__typeof__(e) _e = (e); /* always eval 'e' */ \
-		if (unlikely(!_e)) { \
-			if (__builtin_constant_p(e)) { \
-				dispatch_static_assert(e); \
-			} \
-			_dispatch_bug(__LINE__, (long)_e); \
-		} \
-		_e; \
-	})
-#else
-static inline long
-_dispatch_assume(long e, unsigned long line)
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_assume(long e, size_t line) DISPATCH_STATIC_ASSERT_IF(!e)
 {
-	if (!e) _dispatch_bug(line, e);
-	return e;
+	if (unlikely(!e)) _dispatch_bug(line, e);
 }
-#define dispatch_assume(e) _dispatch_assume((long)(e), __LINE__)
-#endif	/* __GNUC__ */
+#define dispatch_assume(e) \
+		({ __typeof__(e) _e = (e); _dispatch_assume((long)_e, __LINE__); _e; })
 
 /*
  * A lot of API return zero upon success and not-zero on fail. Let's capture
  * and log the non-zero value
  */
-#if __GNUC__
-#define dispatch_assume_zero(e) ({ \
-		__typeof__(e) _e = (e); /* always eval 'e' */ \
-		if (unlikely(_e)) { \
-			if (__builtin_constant_p(e)) { \
-				dispatch_static_assert(e); \
-			} \
-			_dispatch_bug(__LINE__, (long)_e); \
-		} \
-		_e; \
-	})
-#else
-static inline long
-_dispatch_assume_zero(long e, unsigned long line)
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_assume_zero(long e, size_t line) DISPATCH_STATIC_ASSERT_IF(e)
 {
-	if (e) _dispatch_bug(line, e);
-	return e;
+	if (unlikely(e)) _dispatch_bug(line, e);
 }
-#define dispatch_assume_zero(e) _dispatch_assume_zero((long)(e), __LINE__)
-#endif	/* __GNUC__ */
-
-/*
- * For reporting bugs in clients when using the "_debug" version of the library.
- */
-#if __GNUC__
-#define dispatch_debug_assert(e, msg, args...) do { \
-		if (__builtin_constant_p(e)) { \
-			dispatch_static_assert(e); \
-		} else { \
-			__typeof__(e) _e = (e); /* always eval 'e' */ \
-			if (unlikely(DISPATCH_DEBUG && !_e)) { \
-				_dispatch_log("%s() 0x%lx: " msg, __func__, (long)_e, ##args); \
-				abort(); \
-			} \
-		} \
-	} while (0)
-#else
-#define dispatch_debug_assert(e, msg, args...) do { \
-	__typeof__(e) _e = (e); /* always eval 'e' */ \
-	if (unlikely(DISPATCH_DEBUG && !_e)) { \
-		_dispatch_log("%s() 0x%lx: " msg, __FUNCTION__, _e, ##args); \
-		abort(); \
-	} \
-} while (0)
-#endif	/* __GNUC__ */
+#define dispatch_assume_zero(e) \
+		({ __typeof__(e) _e = (e); _dispatch_assume_zero((long)_e, __LINE__); _e; })
 
 /* Make sure the debug statments don't get too stale */
 #define _dispatch_debug(x, args...) do { \
@@ -600,6 +613,7 @@
 #ifdef __BLOCKS__
 #define _dispatch_Block_invoke(bb) \
 		((dispatch_function_t)((struct Block_layout *)bb)->invoke)
+
 void *_dispatch_Block_copy(void *block);
 #if __GNUC__
 #define _dispatch_Block_copy(x) ((__typeof__(x))_dispatch_Block_copy(x))
@@ -607,6 +621,8 @@
 void _dispatch_call_block_and_release(void *block);
 #endif /* __BLOCKS__ */
 
+bool _dispatch_parse_bool(const char *v);
+bool _dispatch_getenv_bool(const char *env, bool default_v);
 void _dispatch_temporary_resource_shortage(void);
 void *_dispatch_calloc(size_t num_items, size_t size);
 const char *_dispatch_strdup_if_mutable(const char *str);
@@ -649,23 +665,87 @@
 
 // Older Mac OS X and iOS Simulator fallbacks
 
-#if HAVE__PTHREAD_WORKQUEUE_INIT && PTHREAD_WORKQUEUE_SPI_VERSION >= 20140213 \
-		&& !defined(HAVE_PTHREAD_WORKQUEUE_QOS)
+#ifndef HAVE_PTHREAD_WORKQUEUE_QOS
+#if !DISPATCH_USE_INTERNAL_WORKQUEUE && HAVE__PTHREAD_WORKQUEUE_INIT && \
+		PTHREAD_WORKQUEUE_SPI_VERSION >= 20140213
 #define HAVE_PTHREAD_WORKQUEUE_QOS 1
-#endif
-#if HAVE__PTHREAD_WORKQUEUE_INIT && PTHREAD_WORKQUEUE_SPI_VERSION >= 20150304 \
-		&& !defined(HAVE_PTHREAD_WORKQUEUE_KEVENT)
-#define HAVE_PTHREAD_WORKQUEUE_KEVENT 1
-#endif
-
-
-#ifndef HAVE_PTHREAD_WORKQUEUE_NARROWING
-#if !DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(109900)
-#define HAVE_PTHREAD_WORKQUEUE_NARROWING 0
 #else
-#define HAVE_PTHREAD_WORKQUEUE_NARROWING 1
+#define HAVE_PTHREAD_WORKQUEUE_QOS 0
 #endif
+#endif // !defined(HAVE_PTHREAD_WORKQUEUE_QOS)
+
+#ifndef HAVE_PTHREAD_WORKQUEUE_KEVENT
+#if !DISPATCH_USE_INTERNAL_WORKQUEUE && HAVE__PTHREAD_WORKQUEUE_INIT && \
+		defined(KEVENT_FLAG_WORKQ) && PTHREAD_WORKQUEUE_SPI_VERSION >= 20150304
+#define HAVE_PTHREAD_WORKQUEUE_KEVENT 1
+#else
+#define HAVE_PTHREAD_WORKQUEUE_KEVENT 0
 #endif
+#endif // !defined(HAVE_PTHREAD_WORKQUEUE_KEVENT)
+
+#ifndef HAVE_PTHREAD_WORKQUEUE_WORKLOOP
+#if HAVE_PTHREAD_WORKQUEUE_KEVENT && defined(WORKQ_FEATURE_WORKLOOP) && \
+		defined(KEVENT_FLAG_WORKLOOP) && \
+		DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(101300)
+#define HAVE_PTHREAD_WORKQUEUE_WORKLOOP 1
+#else
+#define HAVE_PTHREAD_WORKQUEUE_WORKLOOP 0
+#endif
+#endif // !defined(HAVE_PTHREAD_WORKQUEUE_WORKLOOP)
+
+#ifndef DISPATCH_USE_WORKQUEUE_NARROWING
+#if HAVE_PTHREAD_WORKQUEUES && DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(101300)
+#define DISPATCH_USE_WORKQUEUE_NARROWING 1
+#else
+#define DISPATCH_USE_WORKQUEUE_NARROWING 0
+#endif
+#endif // !defined(DISPATCH_USE_WORKQUEUE_NARROWING)
+
+#ifndef DISPATCH_USE_PTHREAD_ROOT_QUEUES
+#if defined(__BLOCKS__) && defined(__APPLE__)
+#define DISPATCH_USE_PTHREAD_ROOT_QUEUES 1 // <rdar://problem/10719357>
+#else
+#define DISPATCH_USE_PTHREAD_ROOT_QUEUES 0
+#endif
+#endif // !defined(DISPATCH_USE_PTHREAD_ROOT_QUEUES)
+
+#ifndef DISPATCH_USE_PTHREAD_POOL
+#if DISPATCH_USE_PTHREAD_ROOT_QUEUES || DISPATCH_USE_INTERNAL_WORKQUEUE
+#define DISPATCH_USE_PTHREAD_POOL 1
+#else
+#define DISPATCH_USE_PTHREAD_POOL 0
+#endif
+#endif // !defined(DISPATCH_USE_PTHREAD_POOL)
+
+#ifndef DISPATCH_USE_KEVENT_WORKQUEUE
+#if HAVE_PTHREAD_WORKQUEUE_KEVENT
+#define DISPATCH_USE_KEVENT_WORKQUEUE 1
+#else
+#define DISPATCH_USE_KEVENT_WORKQUEUE 0
+#endif
+#endif // !defined(DISPATCH_USE_KEVENT_WORKQUEUE)
+
+#if DISPATCH_USE_KEVENT_WORKQUEUE
+#if !HAVE_PTHREAD_WORKQUEUE_QOS || !EV_UDATA_SPECIFIC
+#error Invalid build configuration
+#endif
+#endif // DISPATCH_USE_KEVENT_WORKQUEUE
+
+#ifndef DISPATCH_USE_MGR_THREAD
+#if !DISPATCH_USE_KEVENT_WORKQUEUE || DISPATCH_DEBUG || DISPATCH_PROFILE
+#define DISPATCH_USE_MGR_THREAD 1
+#else
+#define DISPATCH_USE_MGR_THREAD 0
+#endif
+#endif // !defined(DISPATCH_USE_MGR_THREAD)
+
+#ifndef DISPATCH_USE_KEVENT_WORKLOOP
+#if HAVE_PTHREAD_WORKQUEUE_WORKLOOP
+#define DISPATCH_USE_KEVENT_WORKLOOP 1
+#else
+#define DISPATCH_USE_KEVENT_WORKLOOP 0
+#endif
+#endif // !defined(DISPATCH_USE_KEVENT_WORKLOOP)
 
 #ifdef EVFILT_MEMORYSTATUS
 #ifndef DISPATCH_USE_MEMORYSTATUS
@@ -680,28 +760,19 @@
 #if !defined(DISPATCH_USE_MEMORYPRESSURE_SOURCE) && DISPATCH_USE_MEMORYSTATUS
 #define DISPATCH_USE_MEMORYPRESSURE_SOURCE 1
 #endif
-#if DISPATCH_USE_MEMORYPRESSURE_SOURCE
+
 #if __has_include(<malloc_private.h>)
 #include <malloc_private.h>
-#else
+#else // __has_include(<malloc_private.h)
 extern void malloc_memory_event_handler(unsigned long);
+extern int malloc_engaged_nano(void);
 #endif // __has_include(<malloc_private.h)
+#if DISPATCH_USE_MEMORYPRESSURE_SOURCE
 extern bool _dispatch_memory_warn;
 #endif
 
-#if HAVE_PTHREAD_WORKQUEUE_KEVENT && defined(KEVENT_FLAG_WORKQ) && \
-		!defined(DISPATCH_USE_KEVENT_WORKQUEUE)
-#define DISPATCH_USE_KEVENT_WORKQUEUE 1
-#endif
-
-#if (!DISPATCH_USE_KEVENT_WORKQUEUE || DISPATCH_DEBUG || DISPATCH_PROFILE) && \
-		!defined(DISPATCH_USE_MGR_THREAD)
-#define DISPATCH_USE_MGR_THREAD 1
-#endif
-
-
 #if defined(MACH_SEND_SYNC_OVERRIDE) && defined(MACH_RCV_SYNC_WAIT) && \
-		DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(109900) && \
+		DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(101300) && \
 		!defined(DISPATCH_USE_MACH_SEND_SYNC_OVERRIDE)
 #define DISPATCH_USE_MACH_SEND_SYNC_OVERRIDE 1
 #endif
@@ -783,6 +854,8 @@
 #define DISPATCH_TRACE_SUBCLASS_PERF 2
 #define DISPATCH_TRACE_SUBCLASS_MACH_MSG 3
 #define DISPATCH_TRACE_SUBCLASS_PERF_MON 4
+#define DISPATCH_TRACE_SUBCLASS_QOS_TRACE 5
+#define DISPATCH_TRACE_SUBCLASS_FIREHOSE_TRACE 6
 
 #define DISPATCH_PERF_non_leaf_retarget DISPATCH_CODE(PERF, 1)
 #define DISPATCH_PERF_post_activate_retarget DISPATCH_CODE(PERF, 2)
@@ -790,6 +863,9 @@
 #define DISPATCH_PERF_delayed_registration DISPATCH_CODE(PERF, 4)
 #define DISPATCH_PERF_mutable_target DISPATCH_CODE(PERF, 5)
 #define DISPATCH_PERF_strict_bg_timer DISPATCH_CODE(PERF, 6)
+#define DISPATCH_PERF_suspended_timer_fire DISPATCH_CODE(PERF, 7)
+#define DISPATCH_PERF_handlerless_source_fire DISPATCH_CODE(PERF, 8)
+#define DISPATCH_PERF_source_registration_without_qos DISPATCH_CODE(PERF, 9)
 
 #define DISPATCH_MACH_MSG_hdr_move DISPATCH_CODE(MACH_MSG, 1)
 
@@ -797,6 +873,32 @@
 #define DISPATCH_PERF_MON_worker_thread_end DISPATCH_CODE_END(PERF_MON, 1)
 #define DISPATCH_PERF_MON_worker_useless DISPATCH_CODE(PERF_MON, 2)
 
+#define DISPATCH_QOS_TRACE_queue_creation_start  DISPATCH_CODE_START(QOS_TRACE, 1)
+#define DISPATCH_QOS_TRACE_queue_creation_end  DISPATCH_CODE_END(QOS_TRACE, 1)
+#define DISPATCH_QOS_TRACE_queue_dispose DISPATCH_CODE(QOS_TRACE, 2)
+
+#define DISPATCH_QOS_TRACE_private_block_creation DISPATCH_CODE(QOS_TRACE, 3)
+#define DISPATCH_QOS_TRACE_private_block_dispose DISPATCH_CODE(QOS_TRACE, 4)
+
+#define DISPATCH_QOS_TRACE_continuation_push_eb DISPATCH_CODE(QOS_TRACE, 5)
+#define DISPATCH_QOS_TRACE_continuation_push_ab DISPATCH_CODE(QOS_TRACE, 6)
+#define DISPATCH_QOS_TRACE_continuation_push_f DISPATCH_CODE(QOS_TRACE, 7)
+#define DISPATCH_QOS_TRACE_source_push DISPATCH_CODE(QOS_TRACE, 8)
+
+#define DISPATCH_QOS_TRACE_continuation_pop DISPATCH_CODE(QOS_TRACE, 9)
+#define DISPATCH_QOS_TRACE_source_pop DISPATCH_CODE(QOS_TRACE, 10)
+
+#define DISPATCH_QOS_TRACE_queue_item_complete DISPATCH_CODE(QOS_TRACE, 11)
+
+#define DISPATCH_QOS_TRACE_src_callout DISPATCH_CODE(QOS_TRACE, 12)
+#define DISPATCH_QOS_TRACE_src_dispose DISPATCH_CODE(QOS_TRACE, 13)
+
+#define DISPATCH_FIREHOSE_TRACE_reserver_gave_up DISPATCH_CODE(FIREHOSE_TRACE, 1)
+#define DISPATCH_FIREHOSE_TRACE_reserver_wait DISPATCH_CODE(FIREHOSE_TRACE, 2)
+#define DISPATCH_FIREHOSE_TRACE_allocator DISPATCH_CODE(FIREHOSE_TRACE, 3)
+#define DISPATCH_FIREHOSE_TRACE_wait_for_logd DISPATCH_CODE(FIREHOSE_TRACE, 4)
+#define DISPATCH_FIREHOSE_TRACE_chunk_install DISPATCH_CODE(FIREHOSE_TRACE, 5)
+
 DISPATCH_ALWAYS_INLINE
 static inline void
 _dispatch_ktrace_impl(uint32_t code, uint64_t a, uint64_t b,
@@ -827,6 +929,8 @@
 #define _dispatch_ktrace1(code, a)          _dispatch_ktrace(code, a, 0, 0, 0)
 #define _dispatch_ktrace0(code)             _dispatch_ktrace(code, 0, 0, 0, 0)
 
+#define BITPACK_UINT32_PAIR(a, b) (((uint64_t) (a) << 32) | (uint64_t) (b))
+
 #ifndef MACH_MSGH_BITS_VOUCHER_MASK
 #define MACH_MSGH_BITS_VOUCHER_MASK	0x001f0000
 #define	MACH_MSGH_BITS_SET_PORTS(remote, local, voucher)	\
@@ -859,7 +963,7 @@
 
 #ifndef VOUCHER_USE_PERSONA
 #if VOUCHER_USE_MACH_VOUCHER && defined(BANK_PERSONA_TOKEN) && \
-		TARGET_OS_IOS && !TARGET_OS_SIMULATOR
+		!TARGET_OS_SIMULATOR
 #define VOUCHER_USE_PERSONA 1
 #else
 #define VOUCHER_USE_PERSONA 0
@@ -886,11 +990,11 @@
 #define _dispatch_set_crash_log_message(msg) \
 		_dispatch_set_crash_log_message_dynamic((msg))
 #define _dispatch_set_crash_log_message_dynamic(msg) _RPTF0(_CRT_ASSERT, (msg))
-#else
+#else  // _WIN32
 #define _dispatch_set_crash_log_cause_and_message(ac, msg) ((void)(ac))
 #define _dispatch_set_crash_log_message(msg)
 #define _dispatch_set_crash_log_message_dynamic(msg)
-#endif
+#endif // _WIN32
 
 #if HAVE_MACH
 // MIG_REPLY_MISMATCH means either:
@@ -972,17 +1076,14 @@
 
 #if HAVE_PTHREAD_WORKQUEUE_QOS
 #if DISPATCH_DEBUG
-extern int _dispatch_set_qos_class_enabled;
+extern bool _dispatch_set_qos_class_enabled;
 #else
 #define _dispatch_set_qos_class_enabled (1)
 #endif
 #endif // HAVE_PTHREAD_WORKQUEUE_QOS
 #if DISPATCH_USE_KEVENT_WORKQUEUE
-#if !HAVE_PTHREAD_WORKQUEUE_QOS || !EV_UDATA_SPECIFIC
-#error Invalid build configuration
-#endif
 #if DISPATCH_USE_MGR_THREAD
-extern int _dispatch_kevent_workqueue_enabled;
+extern bool _dispatch_kevent_workqueue_enabled;
 #else
 #define _dispatch_kevent_workqueue_enabled (1)
 #endif
@@ -990,6 +1091,11 @@
 #define _dispatch_kevent_workqueue_enabled (0)
 #endif // DISPATCH_USE_KEVENT_WORKQUEUE
 
+#if DISPATCH_USE_KEVENT_WORKLOOP
+#if !DISPATCH_USE_KEVENT_WORKQUEUE || !DISPATCH_USE_KEVENT_QOS
+#error Invalid build configuration
+#endif
+#endif
 
 /* #includes dependent on internal.h */
 #include "object_internal.h"
@@ -1004,4 +1110,6 @@
 #include "inline_internal.h"
 #include "firehose/firehose_internal.h"
 
+__END_DECLS
+
 #endif /* __DISPATCH_INTERNAL__ */
diff --git a/src/introspection.c b/src/introspection.c
index 1bb095d..f38f9e3 100644
--- a/src/introspection.c
+++ b/src/introspection.c
@@ -29,54 +29,37 @@
 #include "introspection_private.h"
 
 typedef struct dispatch_introspection_thread_s {
+#if !OS_OBJECT_HAVE_OBJC1
 	void *dit_isa;
-	TAILQ_ENTRY(dispatch_introspection_thread_s) dit_list;
+#endif
+	LIST_ENTRY(dispatch_introspection_thread_s) dit_list;
 	pthread_t thread;
+#if OS_OBJECT_HAVE_OBJC1
+	void *dit_isa;
+#endif
 	dispatch_queue_t *queue;
 } dispatch_introspection_thread_s;
+dispatch_static_assert(offsetof(struct dispatch_continuation_s, dc_flags) ==
+		offsetof(struct dispatch_introspection_thread_s, dit_isa),
+		"These fields must alias so that leaks instruments work");
 typedef struct dispatch_introspection_thread_s *dispatch_introspection_thread_t;
 
 struct dispatch_introspection_state_s _dispatch_introspection = {
-	.threads = TAILQ_HEAD_INITIALIZER(_dispatch_introspection.threads),
-	.queues = TAILQ_HEAD_INITIALIZER(_dispatch_introspection.queues),
+	.threads = LIST_HEAD_INITIALIZER(_dispatch_introspection.threads),
+	.queues = LIST_HEAD_INITIALIZER(_dispatch_introspection.queues),
 };
 
 static void _dispatch_introspection_thread_remove(void *ctxt);
 
-static void _dispatch_introspection_queue_order_dispose(dispatch_queue_t dq);
+static void _dispatch_introspection_queue_order_dispose(
+		dispatch_queue_introspection_context_t dqic);
 
 #pragma mark -
 #pragma mark dispatch_introspection_init
 
-DISPATCH_NOINLINE
-static bool
-_dispatch_getenv_bool(const char *env, bool default_v)
-{
-	const char *v = getenv(env);
-
-	if (v) {
-		return strcasecmp(v, "YES") == 0 || strcasecmp(v, "Y") == 0 ||
-				strcasecmp(v, "TRUE") == 0 || atoi(v);
-	}
-	return default_v;
-}
-
 void
 _dispatch_introspection_init(void)
 {
-	TAILQ_INSERT_TAIL(&_dispatch_introspection.queues,
-			&_dispatch_main_q, diq_list);
-	TAILQ_INSERT_TAIL(&_dispatch_introspection.queues,
-			&_dispatch_mgr_q, diq_list);
-#if DISPATCH_ENABLE_PTHREAD_ROOT_QUEUES
-	TAILQ_INSERT_TAIL(&_dispatch_introspection.queues,
-			_dispatch_mgr_q.do_targetq, diq_list);
-#endif
-	for (size_t i = 0; i < DISPATCH_ROOT_QUEUE_COUNT; i++) {
-		TAILQ_INSERT_TAIL(&_dispatch_introspection.queues,
-				&_dispatch_root_queues[i], diq_list);
-	}
-
 	_dispatch_introspection.debug_queue_inversions =
 			_dispatch_getenv_bool("LIBDISPATCH_DEBUG_QUEUE_INVERSIONS", false);
 
@@ -93,6 +76,15 @@
 	_dispatch_thread_key_create(&dispatch_introspection_key,
 			_dispatch_introspection_thread_remove);
 	_dispatch_introspection_thread_add(); // add main thread
+
+	for (size_t i = 0; i < DISPATCH_ROOT_QUEUE_COUNT; i++) {
+		_dispatch_trace_queue_create(&_dispatch_root_queues[i]);
+	}
+#if DISPATCH_USE_MGR_THREAD && DISPATCH_USE_PTHREAD_ROOT_QUEUES
+	_dispatch_trace_queue_create(_dispatch_mgr_q.do_targetq);
+#endif
+	_dispatch_trace_queue_create(&_dispatch_main_q);
+	_dispatch_trace_queue_create(&_dispatch_mgr_q);
 }
 
 const struct dispatch_introspection_versions_s
@@ -133,7 +125,7 @@
 			(void*)thread + _dispatch_introspection.thread_queue_offset;
 	_dispatch_thread_setspecific(dispatch_introspection_key, dit);
 	_dispatch_unfair_lock_lock(&_dispatch_introspection.threads_lock);
-	TAILQ_INSERT_TAIL(&_dispatch_introspection.threads, dit, dit_list);
+	LIST_INSERT_HEAD(&_dispatch_introspection.threads, dit, dit_list);
 	_dispatch_unfair_lock_unlock(&_dispatch_introspection.threads_lock);
 }
 
@@ -142,7 +134,7 @@
 {
 	dispatch_introspection_thread_t dit = ctxt;
 	_dispatch_unfair_lock_lock(&_dispatch_introspection.threads_lock);
-	TAILQ_REMOVE(&_dispatch_introspection.threads, dit, dit_list);
+	LIST_REMOVE(dit, dit_list);
 	_dispatch_unfair_lock_unlock(&_dispatch_introspection.threads_lock);
 	_dispatch_continuation_free((void*)dit);
 	_dispatch_thread_setspecific(dispatch_introspection_key, NULL);
@@ -151,16 +143,16 @@
 #pragma mark -
 #pragma mark dispatch_introspection_info
 
-DISPATCH_USED inline
-dispatch_introspection_queue_s
-dispatch_introspection_queue_get_info(dispatch_queue_t dq)
+DISPATCH_ALWAYS_INLINE
+static inline dispatch_introspection_queue_s
+_dispatch_introspection_lane_get_info(dispatch_lane_class_t dqu)
 {
-	bool global = (dq->do_xref_cnt == DISPATCH_OBJECT_GLOBAL_REFCNT) ||
-			(dq->do_ref_cnt == DISPATCH_OBJECT_GLOBAL_REFCNT);
+	dispatch_lane_t dq = dqu._dl;
+	bool global = _dispatch_object_is_global(dq);
 	uint64_t dq_state = os_atomic_load2o(dq, dq_state, relaxed);
 
 	dispatch_introspection_queue_s diq = {
-		.queue = dq,
+		.queue = dq->_as_dq,
 		.target_queue = dq->do_targetq,
 		.label = dq->dq_label,
 		.serialnum = dq->dq_serialnum,
@@ -171,11 +163,43 @@
 		.draining = (dq->dq_items_head == (void*)~0ul) ||
 				(!dq->dq_items_head && dq->dq_items_tail),
 		.global = global,
-		.main = (dq == &_dispatch_main_q),
+		.main = dx_type(dq) == DISPATCH_QUEUE_MAIN_TYPE,
 	};
 	return diq;
 }
 
+DISPATCH_ALWAYS_INLINE
+static inline dispatch_introspection_queue_s
+_dispatch_introspection_workloop_get_info(dispatch_workloop_t dwl)
+{
+	uint64_t dq_state = os_atomic_load2o(dwl, dq_state, relaxed);
+
+	dispatch_introspection_queue_s diq = {
+		.queue = dwl->_as_dq,
+		.target_queue = dwl->do_targetq,
+		.label = dwl->dq_label,
+		.serialnum = dwl->dq_serialnum,
+		.width = 1,
+		.suspend_count = 0,
+		.enqueued = _dq_state_is_enqueued(dq_state),
+		.barrier = _dq_state_is_in_barrier(dq_state),
+		.draining = 0,
+		.global = 0,
+		.main = 0,
+	};
+	return diq;
+}
+
+DISPATCH_USED inline
+dispatch_introspection_queue_s
+dispatch_introspection_queue_get_info(dispatch_queue_t dq)
+{
+	if (dx_metatype(dq) == _DISPATCH_WORKLOOP_TYPE) {
+		return _dispatch_introspection_workloop_get_info(upcast(dq)._dwl);
+	}
+	return _dispatch_introspection_lane_get_info(upcast(dq)._dl);
+}
+
 static inline void
 _dispatch_introspection_continuation_get_info(dispatch_queue_t dq,
 		dispatch_continuation_t dc, dispatch_introspection_queue_item_t diqi)
@@ -190,8 +214,9 @@
 		flags = 0;
 		switch (dc_type(dc)) {
 #if HAVE_PTHREAD_WORKQUEUE_QOS
-		case DC_OVERRIDE_STEALING_TYPE:
-		case DC_OVERRIDE_OWNING_TYPE:
+		case DISPATCH_CONTINUATION_TYPE(WORKLOOP_STEALING):
+		case DISPATCH_CONTINUATION_TYPE(OVERRIDE_STEALING):
+		case DISPATCH_CONTINUATION_TYPE(OVERRIDE_OWNING):
 			dc = dc->dc_data;
 			if (!_dispatch_object_is_continuation(dc)) {
 				// these really wrap queues so we should hide the continuation type
@@ -202,40 +227,45 @@
 			}
 			return _dispatch_introspection_continuation_get_info(dq, dc, diqi);
 #endif
-		case DC_ASYNC_REDIRECT_TYPE:
+		case DISPATCH_CONTINUATION_TYPE(ASYNC_REDIRECT):
 			DISPATCH_INTERNAL_CRASH(0, "Handled by the caller");
-		case DC_MACH_ASYNC_REPLY_TYPE:
+		case DISPATCH_CONTINUATION_TYPE(MACH_ASYNC_REPLY):
 			break;
-		case DC_MACH_SEND_BARRRIER_DRAIN_TYPE:
+		case DISPATCH_CONTINUATION_TYPE(MACH_SEND_BARRRIER_DRAIN):
 			break;
-		case DC_MACH_SEND_BARRIER_TYPE:
-		case DC_MACH_RECV_BARRIER_TYPE:
+		case DISPATCH_CONTINUATION_TYPE(MACH_SEND_BARRIER):
+		case DISPATCH_CONTINUATION_TYPE(MACH_RECV_BARRIER):
 			flags = (uintptr_t)dc->dc_data;
 			dq = dq->do_targetq;
 			break;
+		case DISPATCH_CONTINUATION_TYPE(MACH_IPC_HANDOFF):
+			flags = (uintptr_t)dc->dc_data;
+			break;
 		default:
 			DISPATCH_INTERNAL_CRASH(dc->do_vtable, "Unknown dc vtable type");
 		}
-	} else {
-		if (flags & DISPATCH_OBJ_SYNC_WAITER_BIT) {
-			dispatch_sync_context_t dsc = (dispatch_sync_context_t)dc;
-			waiter = pthread_from_mach_thread_np(dsc->dsc_waiter);
-			ctxt = dsc->dsc_ctxt;
-			func = dsc->dsc_func;
-		}
-		if (func == _dispatch_apply_invoke ||
-				func == _dispatch_apply_redirect_invoke) {
-			dispatch_apply_t da = ctxt;
-			if (da->da_todo) {
-				dc = da->da_dc;
-				dq = dc->dc_data;
-				ctxt = dc->dc_ctxt;
-				func = dc->dc_func;
-				apply = true;
-			}
+	} else if (flags & (DC_FLAG_SYNC_WAITER | DC_FLAG_ASYNC_AND_WAIT)) {
+		dispatch_sync_context_t dsc = (dispatch_sync_context_t)dc;
+		waiter = pthread_from_mach_thread_np(dsc->dsc_waiter);
+		ctxt = dsc->dsc_ctxt;
+		func = dsc->dsc_func;
+	} else if (func == _dispatch_apply_invoke ||
+			func == _dispatch_apply_redirect_invoke) {
+		dispatch_apply_t da = ctxt;
+		if (da->da_todo) {
+			dc = da->da_dc;
+			dq = dc->dc_data;
+			ctxt = dc->dc_ctxt;
+			func = dc->dc_func;
+			apply = true;
 		}
 	}
-	if (flags & DISPATCH_OBJ_BLOCK_BIT) {
+
+	if (flags & DC_FLAG_BLOCK_WITH_PRIVATE_DATA) {
+		dispatch_block_private_data_t dbpd = _dispatch_block_get_data(ctxt);
+		diqi->type = dispatch_introspection_queue_item_type_block;
+		func = _dispatch_Block_invoke(dbpd->dbpd_block);
+	} else if (flags & DC_FLAG_BLOCK) {
 		diqi->type = dispatch_introspection_queue_item_type_block;
 		func = _dispatch_Block_invoke(ctxt);
 	} else {
@@ -247,11 +277,11 @@
 		.context = ctxt,
 		.function = func,
 		.waiter = waiter,
-		.barrier = (flags & DISPATCH_OBJ_BARRIER_BIT) || dq->dq_width == 1,
-		.sync = flags & DISPATCH_OBJ_SYNC_WAITER_BIT,
+		.barrier = (flags & DC_FLAG_BARRIER) || dq->dq_width == 1,
+		.sync = (bool)(flags & (DC_FLAG_SYNC_WAITER | DC_FLAG_ASYNC_AND_WAIT)),
 		.apply = apply,
 	};
-	if (flags & DISPATCH_OBJ_GROUP_BIT) {
+	if (flags & DC_FLAG_GROUP_ASYNC) {
 		dispatch_group_t group = dc->dc_data;
 		if (dx_type(group) == DISPATCH_GROUP_TYPE) {
 			diqi->function.group = group;
@@ -267,7 +297,7 @@
 		.object = dou._dc,
 		.target_queue = dou._do->do_targetq,
 		.type = (void*)dou._do->do_vtable,
-		.kind = dx_kind(dou._do),
+		.kind = _dispatch_object_class_name(dou._do),
 	};
 	return dio;
 }
@@ -284,7 +314,7 @@
 	if (dc) {
 		ctxt = dc->dc_ctxt;
 		handler = dc->dc_func;
-		hdlr_is_block = (dc->dc_flags & DISPATCH_OBJ_BLOCK_BIT);
+		hdlr_is_block = (dc->dc_flags & DC_FLAG_BLOCK);
 	}
 
 	uint64_t dq_state = os_atomic_load2o(ds, dq_state, relaxed);
@@ -297,7 +327,7 @@
 		.enqueued = _dq_state_is_enqueued(dq_state),
 		.handler_is_block = hdlr_is_block,
 		.timer = dr->du_is_timer,
-		.after = dr->du_is_timer && (dr->du_fflags & DISPATCH_TIMER_AFTER),
+		.after = dr->du_is_timer && (dr->du_timer_flags & DISPATCH_TIMER_AFTER),
 		.type = (unsigned long)dr->du_filter,
 		.handle = (unsigned long)dr->du_ident,
 	};
@@ -305,6 +335,27 @@
 }
 
 static inline
+dispatch_introspection_source_s
+_dispatch_introspection_mach_get_info(dispatch_mach_t dm)
+{
+	dispatch_mach_recv_refs_t dmrr = dm->dm_recv_refs;
+	uint64_t dq_state = os_atomic_load2o(dm, dq_state, relaxed);
+
+	dispatch_introspection_source_s dis = {
+		.source = upcast(dm)._ds,
+		.target_queue = dm->do_targetq,
+		.context = dmrr->dmrr_handler_ctxt,
+		.handler = (void *)dmrr->dmrr_handler_func,
+		.suspend_count = _dq_state_suspend_cnt(dq_state) + dm->dq_side_suspend_cnt,
+		.enqueued = _dq_state_is_enqueued(dq_state),
+		.handler_is_block = dmrr->dmrr_handler_is_block,
+		.type = (unsigned long)dmrr->du_filter,
+		.handle = (unsigned long)dmrr->du_ident,
+		.is_xpc = dm->dm_is_xpc,
+	};
+	return dis;
+}
+static inline
 dispatch_introspection_queue_thread_s
 _dispatch_introspection_thread_get_info(dispatch_introspection_thread_t dit)
 {
@@ -331,21 +382,25 @@
 	if (_dispatch_object_has_vtable(dou._do)) {
 		unsigned long type = dx_type(dou._do);
 		unsigned long metatype = type & _DISPATCH_META_TYPE_MASK;
-		if (type == DC_ASYNC_REDIRECT_TYPE) {
+		if (type == DISPATCH_CONTINUATION_TYPE(ASYNC_REDIRECT)) {
 			dq = dc->dc_data;
 			dc = dc->dc_other;
 			goto again;
 		}
 		if (metatype == _DISPATCH_CONTINUATION_TYPE) {
 			_dispatch_introspection_continuation_get_info(dq, dc, &diqi);
-		} else if (metatype == _DISPATCH_QUEUE_TYPE &&
-				type != DISPATCH_QUEUE_SPECIFIC_TYPE) {
+		} else if (metatype == _DISPATCH_LANE_TYPE) {
 			diqi.type = dispatch_introspection_queue_item_type_queue;
-			diqi.queue = dispatch_introspection_queue_get_info(dou._dq);
-		} else if (metatype == _DISPATCH_SOURCE_TYPE &&
-				type != DISPATCH_MACH_CHANNEL_TYPE) {
+			diqi.queue = _dispatch_introspection_lane_get_info(dou._dl);
+		} else if (metatype == _DISPATCH_WORKLOOP_TYPE) {
+			diqi.type = dispatch_introspection_queue_item_type_queue;
+			diqi.queue = _dispatch_introspection_workloop_get_info(dou._dwl);
+		} else if (type == DISPATCH_SOURCE_KEVENT_TYPE) {
 			diqi.type = dispatch_introspection_queue_item_type_source;
 			diqi.source = _dispatch_introspection_source_get_info(dou._ds);
+		} else if (type == DISPATCH_MACH_CHANNEL_TYPE) {
+			diqi.type = dispatch_introspection_queue_item_type_source;
+			diqi.source = _dispatch_introspection_mach_get_info(dou._dm);
 		} else {
 			diqi.type = dispatch_introspection_queue_item_type_object;
 			diqi.object = _dispatch_introspection_object_get_info(dou._do);
@@ -364,17 +419,22 @@
 dispatch_introspection_get_queues(dispatch_queue_t start, size_t count,
 		dispatch_introspection_queue_t queues)
 {
-	dispatch_queue_t next;
-	next = start ? start : TAILQ_FIRST(&_dispatch_introspection.queues);
+	dispatch_queue_introspection_context_t next;
+
+	if (start) {
+		next = start->do_finalizer;
+	} else {
+		next = LIST_FIRST(&_dispatch_introspection.queues);
+	}
 	while (count--) {
 		if (!next) {
 			queues->queue = NULL;
-			break;
+			return NULL;
 		}
-		*queues++ = dispatch_introspection_queue_get_info(next);
-		next = TAILQ_NEXT(next, diq_list);
+		*queues++ = dispatch_introspection_queue_get_info(next->dqic_queue._dq);
+		next = LIST_NEXT(next, dqic_list);
 	}
-	return next;
+	return next->dqic_queue._dq;
 }
 
 DISPATCH_USED
@@ -383,24 +443,26 @@
 		size_t count, dispatch_introspection_queue_thread_t threads)
 {
 	dispatch_introspection_thread_t next = start ? (void*)start :
-			TAILQ_FIRST(&_dispatch_introspection.threads);
+			LIST_FIRST(&_dispatch_introspection.threads);
 	while (count--) {
 		if (!next) {
 			threads->object = NULL;
 			break;
 		}
 		*threads++ = _dispatch_introspection_thread_get_info(next);
-		next = TAILQ_NEXT(next, dit_list);
+		next = LIST_NEXT(next, dit_list);
 	}
 	return (void*)next;
 }
 
 DISPATCH_USED
 dispatch_continuation_t
-dispatch_introspection_queue_get_items(dispatch_queue_t dq,
+dispatch_introspection_queue_get_items(dispatch_queue_t _dq,
 		dispatch_continuation_t start, size_t count,
 		dispatch_introspection_queue_item_t items)
 {
+	if (dx_metatype(_dq) != _DISPATCH_LANE_TYPE) return NULL;
+	dispatch_lane_t dq = upcast(_dq)._dl;
 	dispatch_continuation_t next = start ? start :
 			dq->dq_items_head == (void*)~0ul ? NULL : (void*)dq->dq_items_head;
 	while (count--) {
@@ -408,13 +470,39 @@
 			items->type = dispatch_introspection_queue_item_type_none;
 			break;
 		}
-		*items++ = dispatch_introspection_queue_item_get_info(dq, next);
+		*items++ = dispatch_introspection_queue_item_get_info(_dq, next);
 		next = next->do_next;
 	}
 	return next;
 }
 
 #pragma mark -
+#pragma mark tracing & introspection helpers
+
+struct dispatch_object_s *
+_dispatch_introspection_queue_fake_sync_push_pop(dispatch_queue_t dq,
+		void *ctxt, dispatch_function_t func, uintptr_t dc_flags)
+{
+	// fake just what introspection really needs here: flags, func, ctxt, queue,
+	// dc_priority, and of course waiter
+	struct dispatch_sync_context_s dsc = {
+		.dc_priority = _dispatch_get_priority(),
+		.dc_flags    = DC_FLAG_SYNC_WAITER | dc_flags,
+		.dc_other    = dq,
+		.dsc_func    = func,
+		.dsc_ctxt    = ctxt,
+		.dsc_waiter  = _dispatch_tid_self(),
+	};
+
+	_dispatch_trace_item_push(dq, &dsc);
+	_dispatch_trace_item_pop(dq, &dsc);
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wreturn-stack-address"
+	return (struct dispatch_object_s *)(uintptr_t)&dsc;
+#pragma clang diagnostic pop
+}
+
+#pragma mark -
 #pragma mark dispatch_introspection_hooks
 
 #define DISPATCH_INTROSPECTION_NO_HOOK ((void*)~0ul)
@@ -428,6 +516,7 @@
 	.queue_item_enqueue = DISPATCH_INTROSPECTION_NO_HOOK,
 	.queue_item_dequeue = DISPATCH_INTROSPECTION_NO_HOOK,
 	.queue_item_complete = DISPATCH_INTROSPECTION_NO_HOOK,
+	.runtime_event = DISPATCH_INTROSPECTION_NO_HOOK,
 };
 
 #define DISPATCH_INTROSPECTION_HOOKS_COUNT (( \
@@ -436,12 +525,12 @@
 		sizeof(dispatch_function_t))
 
 #define DISPATCH_INTROSPECTION_HOOK_ENABLED(h) \
-		(slowpath(_dispatch_introspection_hooks.h))
+		unlikely(_dispatch_introspection_hooks.h)
 
 #define DISPATCH_INTROSPECTION_HOOK_CALLOUT(h, ...) ({ \
 		__typeof__(_dispatch_introspection_hooks.h) _h; \
 		_h = _dispatch_introspection_hooks.h; \
-		if (slowpath((void*)(_h) != DISPATCH_INTROSPECTION_NO_HOOK)) { \
+		if (unlikely((void*)(_h) != DISPATCH_INTROSPECTION_NO_HOOK)) { \
 			_h(__VA_ARGS__); \
 		} })
 
@@ -515,20 +604,64 @@
 	dispatch_introspection_hook_callout_queue_create(&diq);
 }
 
-dispatch_queue_t
+dispatch_function_t
+_dispatch_object_finalizer(dispatch_object_t dou)
+{
+	dispatch_queue_introspection_context_t dqic;
+	switch (dx_metatype(dou._do)) {
+	case _DISPATCH_LANE_TYPE:
+	case _DISPATCH_WORKLOOP_TYPE:
+		dqic = dou._dq->do_finalizer;
+		return dqic->dqic_finalizer;
+	default:
+		return dou._do->do_finalizer;
+	}
+}
+
+void
+_dispatch_object_set_finalizer(dispatch_object_t dou,
+		dispatch_function_t finalizer)
+{
+	dispatch_queue_introspection_context_t dqic;
+	switch (dx_metatype(dou._do)) {
+	case _DISPATCH_LANE_TYPE:
+	case _DISPATCH_WORKLOOP_TYPE:
+		dqic = dou._dq->do_finalizer;
+		dqic->dqic_finalizer = finalizer;
+		break;
+	default:
+		dou._do->do_finalizer = finalizer;
+		break;
+	}
+}
+
+dispatch_queue_class_t
 _dispatch_introspection_queue_create(dispatch_queue_t dq)
 {
-	TAILQ_INIT(&dq->diq_order_top_head);
-	TAILQ_INIT(&dq->diq_order_bottom_head);
+	dispatch_queue_introspection_context_t dqic;
+	size_t sz = sizeof(struct dispatch_queue_introspection_context_s);
+
+	if (!_dispatch_introspection.debug_queue_inversions) {
+		sz = offsetof(struct dispatch_queue_introspection_context_s,
+				__dqic_no_queue_inversion);
+	}
+	dqic = _dispatch_calloc(1, sz);
+	dqic->dqic_queue._dq = dq;
+	if (_dispatch_introspection.debug_queue_inversions) {
+		LIST_INIT(&dqic->dqic_order_top_head);
+		LIST_INIT(&dqic->dqic_order_bottom_head);
+	}
+	dq->do_finalizer = dqic;
+
 	_dispatch_unfair_lock_lock(&_dispatch_introspection.queues_lock);
-	TAILQ_INSERT_TAIL(&_dispatch_introspection.queues, dq, diq_list);
+	LIST_INSERT_HEAD(&_dispatch_introspection.queues, dqic, dqic_list);
 	_dispatch_unfair_lock_unlock(&_dispatch_introspection.queues_lock);
 
 	DISPATCH_INTROSPECTION_INTERPOSABLE_HOOK_CALLOUT(queue_create, dq);
 	if (DISPATCH_INTROSPECTION_HOOK_ENABLED(queue_create)) {
 		_dispatch_introspection_queue_create_hook(dq);
 	}
-	return dq;
+	return upcast(dq)._dqu;
 }
 
 DISPATCH_NOINLINE
@@ -551,15 +684,22 @@
 void
 _dispatch_introspection_queue_dispose(dispatch_queue_t dq)
 {
+	dispatch_queue_introspection_context_t dqic = dq->do_finalizer;
+
 	DISPATCH_INTROSPECTION_INTERPOSABLE_HOOK_CALLOUT(queue_destroy, dq);
 	if (DISPATCH_INTROSPECTION_HOOK_ENABLED(queue_dispose)) {
 		_dispatch_introspection_queue_dispose_hook(dq);
 	}
 
 	_dispatch_unfair_lock_lock(&_dispatch_introspection.queues_lock);
-	TAILQ_REMOVE(&_dispatch_introspection.queues, dq, diq_list);
-	_dispatch_introspection_queue_order_dispose(dq);
+	LIST_REMOVE(dqic, dqic_list);
+	if (_dispatch_introspection.debug_queue_inversions) {
+		_dispatch_introspection_queue_order_dispose(dqic);
+	}
 	_dispatch_unfair_lock_unlock(&_dispatch_introspection.queues_lock);
+
+	dq->do_finalizer = dqic->dqic_finalizer; // restore the real finalizer
+	free(dqic);
 }
 
 DISPATCH_NOINLINE
@@ -591,6 +731,85 @@
 	}
 }
 
+void
+_dispatch_trace_item_push_internal(dispatch_queue_t dq,
+		dispatch_object_t dou)
+{
+	if (dx_metatype(dq) != _DISPATCH_LANE_TYPE) {
+		return;
+	}
+
+	dispatch_continuation_t dc = dou._dc;
+
+	/* Only track user continuations */
+	if (_dispatch_object_is_continuation(dou) &&
+		_dispatch_object_has_vtable(dou) && dc_type(dc) > 0){
+		return;
+	}
+
+	struct dispatch_introspection_queue_item_s idc;
+	idc = dispatch_introspection_queue_item_get_info(dq, dc);
+
+	switch (idc.type) {
+	case dispatch_introspection_queue_item_type_none:
+		break;
+	case dispatch_introspection_queue_item_type_block:
+	{
+		uintptr_t dc_flags = 0;
+		dc_flags |= (idc.block.barrier ? DC_BARRIER : 0);
+		dc_flags |= (idc.block.sync ? DC_SYNC : 0);
+		dc_flags |= (idc.block.apply ? DC_APPLY : 0);
+
+		if (dc->dc_flags & DC_FLAG_BLOCK_WITH_PRIVATE_DATA) {
+			_dispatch_ktrace4(DISPATCH_QOS_TRACE_continuation_push_eb,
+					dou._do_value,
+					(uintptr_t)idc.block.block, /* Heap allocated block ptr */
+					BITPACK_UINT32_PAIR(dq->dq_serialnum, dc_flags),
+					BITPACK_UINT32_PAIR(_dispatch_get_priority(),
+							dc->dc_priority));
+		} else {
+			_dispatch_ktrace4(DISPATCH_QOS_TRACE_continuation_push_ab,
+					dou._do_value,
+					(uintptr_t)idc.block.block_invoke, /* Function pointer */
+					BITPACK_UINT32_PAIR(dq->dq_serialnum, dc_flags),
+					BITPACK_UINT32_PAIR(_dispatch_get_priority(),
+							dc->dc_priority));
+		}
+
+		break;
+	}
+	case dispatch_introspection_queue_item_type_function:
+	{
+		uintptr_t dc_flags = 0;
+		dc_flags |= (idc.function.barrier ? DC_BARRIER : 0);
+		dc_flags |= (idc.function.sync ? DC_SYNC : 0);
+		dc_flags |= (idc.function.apply ? DC_APPLY : 0);
+
+		_dispatch_ktrace4(DISPATCH_QOS_TRACE_continuation_push_f,
+				dou._do_value,
+				(uintptr_t)idc.function.function, /* Function pointer */
+				BITPACK_UINT32_PAIR(dq->dq_serialnum, dc_flags),
+				BITPACK_UINT32_PAIR(_dispatch_get_priority(), dc->dc_priority));
+		break;
+	}
+	case dispatch_introspection_queue_item_type_object:
+		/* Generic dispatch object - we don't know how to handle this yet */
+		break;
+	case dispatch_introspection_queue_item_type_queue:
+		/* Dispatch queue - we don't know how to handle this yet */
+		break;
+	case dispatch_introspection_queue_item_type_source:
+		/* Dispatch sources */
+		_dispatch_ktrace4(DISPATCH_QOS_TRACE_source_push,
+				dou._do_value,
+				idc.source.type,
+				(uintptr_t)idc.source.handler,
+				dq->dq_serialnum);
+		break;
+	}
+}
+
+
 DISPATCH_NOINLINE
 void
 dispatch_introspection_hook_callout_queue_item_dequeue(dispatch_queue_t queue,
@@ -620,6 +839,47 @@
 	}
 }
 
+void
+_dispatch_trace_item_pop_internal(dispatch_queue_t dq,
+		dispatch_object_t dou)
+{
+	if (dx_metatype(dq) != _DISPATCH_LANE_TYPE) {
+		return;
+	}
+
+	dispatch_continuation_t dc = dou._dc;
+
+	/* Only track user continuations */
+	if (_dispatch_object_is_continuation(dou) &&
+		_dispatch_object_has_vtable(dou) && dc_type(dc) > 0){
+		return;
+	}
+
+	struct dispatch_introspection_queue_item_s idc;
+	idc = dispatch_introspection_queue_item_get_info(dq, dc);
+
+	switch (idc.type) {
+	case dispatch_introspection_queue_item_type_none:
+		break;
+	case dispatch_introspection_queue_item_type_block:
+	case dispatch_introspection_queue_item_type_function:
+		_dispatch_ktrace3(DISPATCH_QOS_TRACE_continuation_pop,
+				dou._do_value, _dispatch_get_priority(), dq->dq_serialnum);
+		break;
+	case dispatch_introspection_queue_item_type_object:
+		/* Generic dispatch object - we don't know how to handle this yet */
+		break;
+	case dispatch_introspection_queue_item_type_queue:
+		/* Dispatch queue - we don't know how to handle this yet */
+		break;
+	case dispatch_introspection_queue_item_type_source:
+		/* Dispatch sources */
+		_dispatch_ktrace2(DISPATCH_QOS_TRACE_source_pop,
+				dou._do_value, dq->dq_serialnum);
+		break;
+	}
+}
+
 DISPATCH_NOINLINE
 void
 dispatch_introspection_hook_callout_queue_item_complete(
@@ -653,6 +913,20 @@
 }
 
 void
+_dispatch_trace_source_callout_entry_internal(dispatch_source_t ds, long kind,
+		dispatch_queue_t dq, dispatch_continuation_t dc)
+{
+	if (dx_metatype(dq) != _DISPATCH_LANE_TYPE) {
+		return;
+	}
+
+	_dispatch_ktrace3(DISPATCH_QOS_TRACE_src_callout,
+					(uintptr_t)ds, (uintptr_t)dc, kind);
+
+	_dispatch_trace_item_push_internal(dq, (dispatch_object_t) dc);
+}
+
+void
 _dispatch_introspection_callout_return(void *ctxt, dispatch_function_t f)
 {
 	dispatch_queue_t dq = _dispatch_queue_get_current();
@@ -660,13 +934,23 @@
 			queue_callout_end, dq, ctxt, f);
 }
 
+void
+_dispatch_introspection_runtime_event(
+		enum dispatch_introspection_runtime_event event,
+		void *ptr, uint64_t value)
+{
+	if (DISPATCH_INTROSPECTION_HOOK_ENABLED(runtime_event)) {
+		DISPATCH_INTROSPECTION_HOOK_CALLOUT(runtime_event, event, ptr, value);
+	}
+}
+
 #pragma mark -
 #pragma mark dispatch introspection deadlock detection
 
 typedef struct dispatch_queue_order_entry_s *dispatch_queue_order_entry_t;
 struct dispatch_queue_order_entry_s {
-	TAILQ_ENTRY(dispatch_queue_order_entry_s) dqoe_order_top_list;
-	TAILQ_ENTRY(dispatch_queue_order_entry_s) dqoe_order_bottom_list;
+	LIST_ENTRY(dispatch_queue_order_entry_s) dqoe_order_top_list;
+	LIST_ENTRY(dispatch_queue_order_entry_s) dqoe_order_bottom_list;
 	const char *dqoe_top_label;
 	const char *dqoe_bottom_label;
 	dispatch_queue_t dqoe_top_tq;
@@ -676,39 +960,43 @@
 };
 
 static void
-_dispatch_introspection_queue_order_dispose(dispatch_queue_t dq)
+_dispatch_introspection_queue_order_dispose(
+		dispatch_queue_introspection_context_t dqic)
 {
+	dispatch_queue_introspection_context_t o_dqic;
 	dispatch_queue_order_entry_t e, te;
 	dispatch_queue_t otherq;
-	TAILQ_HEAD(, dispatch_queue_order_entry_s) head;
+	LIST_HEAD(, dispatch_queue_order_entry_s) head;
 
 	// this whole thing happens with _dispatch_introspection.queues_lock locked
 
-	_dispatch_unfair_lock_lock(&dq->diq_order_top_head_lock);
-	head.tqh_first = dq->diq_order_top_head.tqh_first;
-	head.tqh_last = dq->diq_order_top_head.tqh_last;
-	TAILQ_INIT(&dq->diq_order_top_head);
-	_dispatch_unfair_lock_unlock(&dq->diq_order_top_head_lock);
+	_dispatch_unfair_lock_lock(&dqic->dqic_order_top_head_lock);
+	LIST_INIT(&head);
+	LIST_SWAP(&head, &dqic->dqic_order_top_head,
+			dispatch_queue_order_entry_s, dqoe_order_top_list);
+	_dispatch_unfair_lock_unlock(&dqic->dqic_order_top_head_lock);
 
-	TAILQ_FOREACH_SAFE(e, &head, dqoe_order_top_list, te) {
+	LIST_FOREACH_SAFE(e, &head, dqoe_order_top_list, te) {
 		otherq = e->dqoe_bottom_tq;
-		_dispatch_unfair_lock_lock(&otherq->diq_order_bottom_head_lock);
-		TAILQ_REMOVE(&otherq->diq_order_bottom_head, e, dqoe_order_bottom_list);
-		_dispatch_unfair_lock_unlock(&otherq->diq_order_bottom_head_lock);
+		o_dqic = otherq->do_finalizer;
+		_dispatch_unfair_lock_lock(&o_dqic->dqic_order_bottom_head_lock);
+		LIST_REMOVE(e, dqoe_order_bottom_list);
+		_dispatch_unfair_lock_unlock(&o_dqic->dqic_order_bottom_head_lock);
 		free(e);
 	}
 
-	_dispatch_unfair_lock_lock(&dq->diq_order_bottom_head_lock);
-	head.tqh_first = dq->diq_order_bottom_head.tqh_first;
-	head.tqh_last = dq->diq_order_bottom_head.tqh_last;
-	TAILQ_INIT(&dq->diq_order_bottom_head);
-	_dispatch_unfair_lock_unlock(&dq->diq_order_bottom_head_lock);
+	_dispatch_unfair_lock_lock(&dqic->dqic_order_bottom_head_lock);
+	LIST_INIT(&head);
+	LIST_SWAP(&head, &dqic->dqic_order_bottom_head,
+			dispatch_queue_order_entry_s, dqoe_order_top_list);
+	_dispatch_unfair_lock_unlock(&dqic->dqic_order_bottom_head_lock);
 
-	TAILQ_FOREACH_SAFE(e, &head, dqoe_order_bottom_list, te) {
+	LIST_FOREACH_SAFE(e, &head, dqoe_order_bottom_list, te) {
 		otherq = e->dqoe_top_tq;
-		_dispatch_unfair_lock_lock(&otherq->diq_order_top_head_lock);
-		TAILQ_REMOVE(&otherq->diq_order_top_head, e, dqoe_order_top_list);
-		_dispatch_unfair_lock_unlock(&otherq->diq_order_top_head_lock);
+		o_dqic = otherq->do_finalizer;
+		_dispatch_unfair_lock_lock(&o_dqic->dqic_order_top_head_lock);
+		LIST_REMOVE(e, dqoe_order_top_list);
+		_dispatch_unfair_lock_unlock(&o_dqic->dqic_order_top_head_lock);
 		free(e);
 	}
 }
@@ -777,23 +1065,24 @@
 		dispatch_queue_t bottom_q, dispatch_queue_t bottom_tq)
 {
 	struct dispatch_order_frame_s dof = { .dof_prev = dof_prev };
+	dispatch_queue_introspection_context_t btqic = bottom_tq->do_finalizer;
 
 	// has anyone above bottom_tq ever sync()ed onto top_tq ?
-	_dispatch_unfair_lock_lock(&bottom_tq->diq_order_top_head_lock);
-	TAILQ_FOREACH(dof.dof_e, &bottom_tq->diq_order_top_head, dqoe_order_top_list) {
-		if (slowpath(dof.dof_e->dqoe_bottom_tq == top_tq)) {
+	_dispatch_unfair_lock_lock(&btqic->dqic_order_top_head_lock);
+	LIST_FOREACH(dof.dof_e, &btqic->dqic_order_top_head, dqoe_order_top_list) {
+		if (unlikely(dof.dof_e->dqoe_bottom_tq == top_tq)) {
 			_dispatch_introspection_lock_inversion_fail(&dof, top_q, bottom_q);
 		}
 		_dispatch_introspection_order_check(&dof, top_q, top_tq,
 				bottom_q, dof.dof_e->dqoe_bottom_tq);
 	}
-	_dispatch_unfair_lock_unlock(&bottom_tq->diq_order_top_head_lock);
+	_dispatch_unfair_lock_unlock(&btqic->dqic_order_top_head_lock);
 }
 
 void
-_dispatch_introspection_order_record(dispatch_queue_t top_q,
-		dispatch_queue_t bottom_q)
+_dispatch_introspection_order_record(dispatch_queue_t top_q)
 {
+	dispatch_queue_t bottom_q = _dispatch_queue_get_current();
 	dispatch_queue_order_entry_t e, it;
 	const int pcs_skip = 1, pcs_n_max = 128;
 	void *pcs[pcs_n_max];
@@ -805,17 +1094,19 @@
 
 	dispatch_queue_t top_tq = _dispatch_queue_bottom_target_queue(top_q);
 	dispatch_queue_t bottom_tq = _dispatch_queue_bottom_target_queue(bottom_q);
+	dispatch_queue_introspection_context_t ttqic = top_tq->do_finalizer;
+	dispatch_queue_introspection_context_t btqic = bottom_tq->do_finalizer;
 
-	_dispatch_unfair_lock_lock(&top_tq->diq_order_top_head_lock);
-	TAILQ_FOREACH(it, &top_tq->diq_order_top_head, dqoe_order_top_list) {
+	_dispatch_unfair_lock_lock(&ttqic->dqic_order_top_head_lock);
+	LIST_FOREACH(it, &ttqic->dqic_order_top_head, dqoe_order_top_list) {
 		if (it->dqoe_bottom_tq == bottom_tq) {
 			// that dispatch_sync() is known and validated
 			// move on
-			_dispatch_unfair_lock_unlock(&top_tq->diq_order_top_head_lock);
+			_dispatch_unfair_lock_unlock(&ttqic->dqic_order_top_head_lock);
 			return;
 		}
 	}
-	_dispatch_unfair_lock_unlock(&top_tq->diq_order_top_head_lock);
+	_dispatch_unfair_lock_unlock(&ttqic->dqic_order_top_head_lock);
 
 	_dispatch_introspection_order_check(NULL, top_q, top_tq, bottom_q, bottom_tq);
 	pcs_n = MAX(backtrace(pcs, pcs_n_max) - pcs_skip, 0);
@@ -852,22 +1143,22 @@
 		e->dqoe_bottom_label = bottom_q->dq_label ?: "";
 	}
 
-	_dispatch_unfair_lock_lock(&top_tq->diq_order_top_head_lock);
-	TAILQ_FOREACH(it, &top_tq->diq_order_top_head, dqoe_order_top_list) {
-		if (slowpath(it->dqoe_bottom_tq == bottom_tq)) {
+	_dispatch_unfair_lock_lock(&ttqic->dqic_order_top_head_lock);
+	LIST_FOREACH(it, &ttqic->dqic_order_top_head, dqoe_order_top_list) {
+		if (unlikely(it->dqoe_bottom_tq == bottom_tq)) {
 			// someone else validated it at the same time
 			// go away quickly
-			_dispatch_unfair_lock_unlock(&top_tq->diq_order_top_head_lock);
+			_dispatch_unfair_lock_unlock(&ttqic->dqic_order_top_head_lock);
 			free(e);
 			return;
 		}
 	}
-	TAILQ_INSERT_HEAD(&top_tq->diq_order_top_head, e, dqoe_order_top_list);
-	_dispatch_unfair_lock_unlock(&top_tq->diq_order_top_head_lock);
+	LIST_INSERT_HEAD(&ttqic->dqic_order_top_head, e, dqoe_order_top_list);
+	_dispatch_unfair_lock_unlock(&ttqic->dqic_order_top_head_lock);
 
-	_dispatch_unfair_lock_lock(&bottom_tq->diq_order_bottom_head_lock);
-	TAILQ_INSERT_HEAD(&bottom_tq->diq_order_bottom_head, e, dqoe_order_bottom_list);
-	_dispatch_unfair_lock_unlock(&bottom_tq->diq_order_bottom_head_lock);
+	_dispatch_unfair_lock_lock(&btqic->dqic_order_bottom_head_lock);
+	LIST_INSERT_HEAD(&btqic->dqic_order_bottom_head, e, dqoe_order_bottom_list);
+	_dispatch_unfair_lock_unlock(&btqic->dqic_order_bottom_head_lock);
 }
 
 void
@@ -891,8 +1182,9 @@
 		[2] = "a recipient",
 		[3] = "both an initiator and a recipient"
 	};
-	bool as_top = !TAILQ_EMPTY(&dq->diq_order_top_head);
-	bool as_bottom = !TAILQ_EMPTY(&dq->diq_order_top_head);
+	dispatch_queue_introspection_context_t dqic = dq->do_finalizer;
+	bool as_top = !LIST_EMPTY(&dqic->dqic_order_top_head);
+	bool as_bottom = !LIST_EMPTY(&dqic->dqic_order_top_head);
 
 	if (as_top || as_bottom) {
 		_dispatch_log(
@@ -903,7 +1195,7 @@
 				"a dispatch_sync", dq, dq->dq_label ?: "",
 				reasons[(int)as_top + 2 * (int)as_bottom]);
 		_dispatch_unfair_lock_lock(&_dispatch_introspection.queues_lock);
-		_dispatch_introspection_queue_order_dispose(dq);
+		_dispatch_introspection_queue_order_dispose(dq->do_finalizer);
 		_dispatch_unfair_lock_unlock(&_dispatch_introspection.queues_lock);
 	}
 }
diff --git a/src/introspection_internal.h b/src/introspection_internal.h
index e2fa6d1..d4459da 100644
--- a/src/introspection_internal.h
+++ b/src/introspection_internal.h
@@ -27,20 +27,42 @@
 #ifndef __DISPATCH_INTROSPECTION_INTERNAL__
 #define __DISPATCH_INTROSPECTION_INTERNAL__
 
+/* keep in sync with introspection_private.h */
+enum dispatch_introspection_runtime_event {
+	dispatch_introspection_runtime_event_worker_event_delivery = 1,
+	dispatch_introspection_runtime_event_worker_unpark = 2,
+	dispatch_introspection_runtime_event_worker_request = 3,
+	dispatch_introspection_runtime_event_worker_park = 4,
+
+	dispatch_introspection_runtime_event_sync_wait = 10,
+	dispatch_introspection_runtime_event_async_sync_handoff = 11,
+	dispatch_introspection_runtime_event_sync_sync_handoff = 12,
+	dispatch_introspection_runtime_event_sync_async_handoff = 13,
+};
+
 #if DISPATCH_INTROSPECTION
 
-#define DISPATCH_INTROSPECTION_QUEUE_HEADER \
-		TAILQ_ENTRY(dispatch_queue_s) diq_list; \
-		dispatch_unfair_lock_s diq_order_top_head_lock; \
-		dispatch_unfair_lock_s diq_order_bottom_head_lock; \
-		TAILQ_HEAD(, dispatch_queue_order_entry_s) diq_order_top_head; \
-		TAILQ_HEAD(, dispatch_queue_order_entry_s) diq_order_bottom_head
-#define DISPATCH_INTROSPECTION_QUEUE_HEADER_SIZE \
-		sizeof(struct { DISPATCH_INTROSPECTION_QUEUE_HEADER; })
+#define DC_BARRIER 0x1
+#define DC_SYNC 0x2
+#define DC_APPLY 0x4
+
+typedef struct dispatch_queue_introspection_context_s {
+	dispatch_queue_class_t dqic_queue;
+	dispatch_function_t dqic_finalizer;
+	LIST_ENTRY(dispatch_queue_introspection_context_s) dqic_list;
+
+	char __dqic_no_queue_inversion[0];
+
+	// used for queue inversion debugging only
+	dispatch_unfair_lock_s dqic_order_top_head_lock;
+	dispatch_unfair_lock_s dqic_order_bottom_head_lock;
+	LIST_HEAD(, dispatch_queue_order_entry_s) dqic_order_top_head;
+	LIST_HEAD(, dispatch_queue_order_entry_s) dqic_order_bottom_head;
+} *dispatch_queue_introspection_context_t;
 
 struct dispatch_introspection_state_s {
-	TAILQ_HEAD(, dispatch_introspection_thread_s) threads;
-	TAILQ_HEAD(, dispatch_queue_s) queues;
+	LIST_HEAD(, dispatch_introspection_thread_s) threads;
+	LIST_HEAD(, dispatch_queue_introspection_context_s) queues;
 	dispatch_unfair_lock_s threads_lock;
 	dispatch_unfair_lock_s queues_lock;
 
@@ -54,89 +76,121 @@
 
 void _dispatch_introspection_init(void);
 void _dispatch_introspection_thread_add(void);
-dispatch_queue_t _dispatch_introspection_queue_create(dispatch_queue_t dq);
-void _dispatch_introspection_queue_dispose(dispatch_queue_t dq);
-void _dispatch_introspection_queue_item_enqueue(dispatch_queue_t dq,
+dispatch_function_t _dispatch_object_finalizer(dispatch_object_t dou);
+void _dispatch_object_set_finalizer(dispatch_object_t dou,
+		dispatch_function_t finalizer);
+dispatch_queue_class_t _dispatch_introspection_queue_create(
+		dispatch_queue_class_t dqu);
+void _dispatch_introspection_queue_dispose(dispatch_queue_class_t dqu);
+void _dispatch_introspection_queue_item_enqueue(dispatch_queue_class_t dqu,
 		dispatch_object_t dou);
-void _dispatch_introspection_queue_item_dequeue(dispatch_queue_t dq,
+void _dispatch_introspection_queue_item_dequeue(dispatch_queue_class_t dqu,
 		dispatch_object_t dou);
 void _dispatch_introspection_queue_item_complete(dispatch_object_t dou);
 void _dispatch_introspection_callout_entry(void *ctxt, dispatch_function_t f);
 void _dispatch_introspection_callout_return(void *ctxt, dispatch_function_t f);
+struct dispatch_object_s *_dispatch_introspection_queue_fake_sync_push_pop(
+		dispatch_queue_t dq, void *ctxt, dispatch_function_t func,
+		uintptr_t dc_flags);
+void _dispatch_introspection_runtime_event(
+		enum dispatch_introspection_runtime_event event,
+		void *ptr, uint64_t value);
 
 #if DISPATCH_PURE_C
 
-static dispatch_queue_t _dispatch_queue_get_current(void);
-
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_introspection_queue_push_list(dispatch_queue_t dq,
+_dispatch_introspection_queue_push_list(dispatch_queue_class_t dqu,
 		dispatch_object_t head, dispatch_object_t tail) {
 	struct dispatch_object_s *dou = head._do;
 	do {
-		_dispatch_introspection_queue_item_enqueue(dq, dou);
+		_dispatch_introspection_queue_item_enqueue(dqu, dou);
 	} while (dou != tail._do && (dou = dou->do_next));
 };
 
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_introspection_queue_push(dispatch_queue_t dq, dispatch_object_t dou) {
-	_dispatch_introspection_queue_item_enqueue(dq, dou);
-};
+_dispatch_introspection_queue_push(dispatch_queue_class_t dqu,
+		dispatch_object_t dou)
+{
+	_dispatch_introspection_queue_item_enqueue(dqu, dou);
+}
 
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_introspection_queue_pop(dispatch_queue_t dq, dispatch_object_t dou) {
-	_dispatch_introspection_queue_item_dequeue(dq, dou);
-};
+_dispatch_introspection_queue_pop(dispatch_queue_class_t dqu,
+		dispatch_object_t dou)
+{
+	_dispatch_introspection_queue_item_dequeue(dqu, dou);
+}
 
 void
-_dispatch_introspection_order_record(dispatch_queue_t top_q,
-		dispatch_queue_t bottom_q);
+_dispatch_introspection_order_record(dispatch_queue_t top_q);
 
 void
 _dispatch_introspection_target_queue_changed(dispatch_queue_t dq);
 
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_introspection_sync_begin(dispatch_queue_t dq)
+_dispatch_introspection_sync_begin(dispatch_queue_class_t dq)
 {
 	if (!_dispatch_introspection.debug_queue_inversions) return;
-	_dispatch_introspection_order_record(dq, _dispatch_queue_get_current());
+	_dispatch_introspection_order_record(dq._dq);
 }
 
 #endif // DISPATCH_PURE_C
 
 #else // DISPATCH_INTROSPECTION
 
-#define DISPATCH_INTROSPECTION_QUEUE_HEADER
-#define DISPATCH_INTROSPECTION_QUEUE_HEADER_SIZE 0
-
 #define _dispatch_introspection_init()
 #define _dispatch_introspection_thread_add()
 
 DISPATCH_ALWAYS_INLINE
-static inline dispatch_queue_t
-_dispatch_introspection_queue_create(dispatch_queue_t dq) { return dq; }
+static inline dispatch_queue_class_t
+_dispatch_introspection_queue_create(dispatch_queue_class_t dqu)
+{
+	return dqu;
+}
+
+#if DISPATCH_PURE_C
+
+DISPATCH_ALWAYS_INLINE
+static inline dispatch_function_t
+_dispatch_object_finalizer(dispatch_object_t dou)
+{
+	return dou._do->do_finalizer;
+}
 
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_introspection_queue_dispose(dispatch_queue_t dq) { (void)dq; }
+_dispatch_object_set_finalizer(dispatch_object_t dou,
+		dispatch_function_t finalizer)
+{
+	dou._do->do_finalizer = finalizer;
+}
+
+#endif // DISPATCH_PURE_C
 
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_introspection_queue_push_list(dispatch_queue_t dq DISPATCH_UNUSED,
+_dispatch_introspection_queue_dispose(
+		dispatch_queue_class_t dqu DISPATCH_UNUSED) {}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_introspection_queue_push_list(
+		dispatch_queue_class_t dqu DISPATCH_UNUSED,
 		dispatch_object_t head DISPATCH_UNUSED,
 		dispatch_object_t tail DISPATCH_UNUSED) {}
 
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_introspection_queue_push(dispatch_queue_t dq DISPATCH_UNUSED,
+_dispatch_introspection_queue_push(dispatch_queue_class_t dqu DISPATCH_UNUSED,
 		dispatch_object_t dou DISPATCH_UNUSED) {}
 
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_introspection_queue_pop(dispatch_queue_t dq DISPATCH_UNUSED,
+_dispatch_introspection_queue_pop(dispatch_queue_class_t dqu DISPATCH_UNUSED,
 		dispatch_object_t dou DISPATCH_UNUSED) {}
 
 DISPATCH_ALWAYS_INLINE
@@ -161,7 +215,21 @@
 
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_introspection_sync_begin(dispatch_queue_t dq DISPATCH_UNUSED) {}
+_dispatch_introspection_sync_begin(
+		dispatch_queue_class_t dq DISPATCH_UNUSED) {}
+
+DISPATCH_ALWAYS_INLINE
+static inline struct dispatch_object_s *
+_dispatch_introspection_queue_fake_sync_push_pop(
+		dispatch_queue_t dq DISPATCH_UNUSED,
+		void *ctxt DISPATCH_UNUSED, dispatch_function_t func DISPATCH_UNUSED,
+		uintptr_t dc_flags DISPATCH_UNUSED) { return NULL; }
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_introspection_runtime_event(
+		enum dispatch_introspection_runtime_event event DISPATCH_UNUSED,
+		void *ptr DISPATCH_UNUSED, uint64_t value DISPATCH_UNUSED) {}
 
 #endif // DISPATCH_INTROSPECTION
 
diff --git a/src/io.c b/src/io.c
index 23a07bf..42f1424 100644
--- a/src/io.c
+++ b/src/io.c
@@ -20,6 +20,11 @@
 
 #include "internal.h"
 
+#if defined(__FreeBSD__)
+#include <fcntl.h>
+#define F_RDADVISE F_RDAHEAD
+#endif
+
 #ifndef DISPATCH_IO_DEBUG
 #define DISPATCH_IO_DEBUG DISPATCH_DEBUG
 #endif
@@ -131,7 +136,7 @@
 #if DISPATCH_IO_DEBUG
 #if !DISPATCH_DEBUG
 #define _dispatch_io_log(x, ...) do { \
-			_dispatch_log("%llu\t%p\t" x, _dispatch_absolute_time(), \
+			_dispatch_log("%llu\t%p\t" x, _dispatch_uptime(), \
 			(void *)_dispatch_thread_self(), ##__VA_ARGS__); \
 		} while (0)
 #ifdef _dispatch_object_debug
@@ -147,7 +152,7 @@
 #endif // DISPATCH_IO_DEBUG
 
 #define _dispatch_fd_debug(msg, fd, ...) \
-		_dispatch_io_log("fd[0x%x]: " msg, fd, ##__VA_ARGS__)
+		_dispatch_io_log("fd[0x%" PRIx64 "]: " msg, fd, ##__VA_ARGS__)
 #define _dispatch_op_debug(msg, op, ...) \
 		_dispatch_io_log("op[%p]: " msg, op, ##__VA_ARGS__)
 #define _dispatch_channel_debug(msg, channel, ...) \
@@ -160,39 +165,28 @@
 #pragma mark -
 #pragma mark dispatch_io_hashtables
 
-// Global hashtable of dev_t -> disk_s mappings
-DISPATCH_CACHELINE_ALIGN
-static TAILQ_HEAD(, dispatch_disk_s) _dispatch_io_devs[DIO_HASH_SIZE];
-// Global hashtable of fd -> fd_entry_s mappings
-DISPATCH_CACHELINE_ALIGN
-static TAILQ_HEAD(, dispatch_fd_entry_s) _dispatch_io_fds[DIO_HASH_SIZE];
+LIST_HEAD(dispatch_disk_head_s, dispatch_disk_s);
+LIST_HEAD(dispatch_fd_entry_head_s, dispatch_fd_entry_s);
 
-static dispatch_once_t  _dispatch_io_devs_lockq_pred;
-static dispatch_queue_t _dispatch_io_devs_lockq;
-static dispatch_queue_t _dispatch_io_fds_lockq;
+// Global hashtable of dev_t -> disk_s mappings
+DISPATCH_STATIC_GLOBAL(struct dispatch_disk_head_s _dispatch_io_devs[DIO_HASH_SIZE]);
+DISPATCH_STATIC_GLOBAL(dispatch_queue_t _dispatch_io_devs_lockq);
+
+// Global hashtable of fd -> fd_entry_s mappings
+DISPATCH_STATIC_GLOBAL(struct dispatch_fd_entry_head_s _dispatch_io_fds[DIO_HASH_SIZE]);
+DISPATCH_STATIC_GLOBAL(dispatch_queue_t _dispatch_io_fds_lockq);
+
+DISPATCH_STATIC_GLOBAL(dispatch_once_t _dispatch_io_init_pred);
 
 static char const * const _dispatch_io_key = "io";
 
 static void
-_dispatch_io_fds_lockq_init(void *context DISPATCH_UNUSED)
+_dispatch_io_queues_init(void *context DISPATCH_UNUSED)
 {
 	_dispatch_io_fds_lockq = dispatch_queue_create(
 			"com.apple.libdispatch-io.fd_lockq", NULL);
-	unsigned int i;
-	for (i = 0; i < DIO_HASH_SIZE; i++) {
-		TAILQ_INIT(&_dispatch_io_fds[i]);
-	}
-}
-
-static void
-_dispatch_io_devs_lockq_init(void *context DISPATCH_UNUSED)
-{
 	_dispatch_io_devs_lockq = dispatch_queue_create(
 			"com.apple.libdispatch-io.dev_lockq", NULL);
-	unsigned int i;
-	for (i = 0; i < DIO_HASH_SIZE; i++) {
-		TAILQ_INIT(&_dispatch_io_devs[i]);
-	}
 }
 
 #pragma mark -
@@ -205,14 +199,16 @@
 	DISPATCH_IOCNTL_MAX_PENDING_IO_REQS,
 };
 
-static struct dispatch_io_defaults_s {
+extern struct dispatch_io_defaults_s {
 	size_t chunk_size, low_water_chunks, max_pending_io_reqs;
 	bool initial_delivery;
-} dispatch_io_defaults = {
+} dispatch_io_defaults;
+
+DISPATCH_GLOBAL_INIT(struct dispatch_io_defaults_s dispatch_io_defaults, {
 	.chunk_size = DIO_MAX_CHUNK_SIZE,
 	.low_water_chunks = DIO_DEFAULT_LOW_WATER_CHUNKS,
 	.max_pending_io_reqs = DIO_MAX_PENDING_IO_REQS,
-};
+});
 
 #define _dispatch_iocntl_set_default(p, v) do { \
 		dispatch_io_defaults.p = (__typeof__(dispatch_io_defaults.p))(v); \
@@ -230,6 +226,7 @@
 		break;
 	case DISPATCH_IOCNTL_INITIAL_DELIVERY:
 		_dispatch_iocntl_set_default(initial_delivery, value);
+		break;
 	case DISPATCH_IOCNTL_MAX_PENDING_IO_REQS:
 		_dispatch_iocntl_set_default(max_pending_io_reqs, value);
 		break;
@@ -245,7 +242,7 @@
 	dispatch_io_t channel = _dispatch_object_alloc(DISPATCH_VTABLE(io),
 			sizeof(struct dispatch_io_s));
 	channel->do_next = DISPATCH_OBJECT_LISTLESS;
-	channel->do_targetq = _dispatch_get_root_queue(DISPATCH_QOS_DEFAULT, true);
+	channel->do_targetq = _dispatch_get_default_queue(true);
 	channel->params.type = type;
 	channel->params.high = SIZE_MAX;
 	channel->params.low = dispatch_io_defaults.low_water_chunks *
@@ -481,8 +478,8 @@
 			return;
 		}
 		dispatch_suspend(channel->queue);
-		dispatch_once_f(&_dispatch_io_devs_lockq_pred, NULL,
-				_dispatch_io_devs_lockq_init);
+		dispatch_once_f(&_dispatch_io_init_pred, NULL,
+				_dispatch_io_queues_init);
 		dispatch_async(_dispatch_io_devs_lockq, ^{
 			dispatch_fd_entry_t fd_entry = _dispatch_fd_entry_create_with_path(
 					path_data, st.st_dev, st.st_mode);
@@ -733,7 +730,7 @@
 							channel);
 					dispatch_fd_entry_t fdi;
 					uintptr_t hash = DIO_HASH(channel->fd);
-					TAILQ_FOREACH(fdi, &_dispatch_io_fds[hash], fd_list) {
+					LIST_FOREACH(fdi, &_dispatch_io_fds[hash], fd_list) {
 						if (fdi->fd == channel->fd) {
 							_dispatch_fd_entry_cleanup_operations(fdi, channel);
 							break;
@@ -928,7 +925,7 @@
 		dispatch_operation_t op =
 			_dispatch_operation_create(DOP_DIR_READ, channel, 0,
 					length, dispatch_data_empty,
-					_dispatch_get_root_queue(DISPATCH_QOS_DEFAULT, false),
+					_dispatch_get_default_queue(false),
 					^(bool done, dispatch_data_t data, int error) {
 				if (data) {
 					data = dispatch_data_create_concat(deliver_data, data);
@@ -999,7 +996,7 @@
 		dispatch_operation_t op =
 			_dispatch_operation_create(DOP_DIR_WRITE, channel, 0,
 					dispatch_data_get_size(data), data,
-					_dispatch_get_root_queue(DISPATCH_QOS_DEFAULT, false),
+					_dispatch_get_default_queue(false),
 					^(bool done, dispatch_data_t d, int error) {
 				if (done) {
 					if (d) {
@@ -1042,6 +1039,7 @@
 	if (err || !length) {
 		_dispatch_io_data_retain(data);
 		_dispatch_retain(queue);
+		_dispatch_retain(channel);
 		dispatch_async(channel->barrier_queue, ^{
 			dispatch_async(queue, ^{
 				dispatch_data_t d = data;
@@ -1053,6 +1051,7 @@
 				_dispatch_channel_debug("IO handler invoke: err %d", channel,
 						err);
 				handler(true, d, err);
+				_dispatch_release(channel);
 				_dispatch_io_data_release(data);
 			});
 			_dispatch_release(queue);
@@ -1077,7 +1076,7 @@
 	// Take a snapshot of the priority of the channel queue. The actual I/O
 	// for this operation will be performed at this priority
 	dispatch_queue_t targetq = op->channel->do_targetq;
-	while (fastpath(targetq->do_targetq)) {
+	while (targetq->do_targetq) {
 		targetq = targetq->do_targetq;
 	}
 	op->do_targetq = targetq;
@@ -1338,14 +1337,13 @@
 _dispatch_fd_entry_init_async(dispatch_fd_t fd,
 		dispatch_fd_entry_init_callback_t completion_callback)
 {
-	static dispatch_once_t _dispatch_io_fds_lockq_pred;
-	dispatch_once_f(&_dispatch_io_fds_lockq_pred, NULL,
-			_dispatch_io_fds_lockq_init);
+	dispatch_once_f(&_dispatch_io_init_pred, NULL,
+			_dispatch_io_queues_init);
 	dispatch_async(_dispatch_io_fds_lockq, ^{
 		dispatch_fd_entry_t fd_entry = NULL;
 		// Check to see if there is an existing entry for the given fd
 		uintptr_t hash = DIO_HASH(fd);
-		TAILQ_FOREACH(fd_entry, &_dispatch_io_fds[hash], fd_list) {
+		LIST_FOREACH(fd_entry, &_dispatch_io_fds[hash], fd_list) {
 			if (fd_entry->fd == fd) {
 				// Retain the fd_entry to ensure it cannot go away until the
 				// stat() has completed
@@ -1387,9 +1385,9 @@
 	// On fds lock queue
 	dispatch_fd_entry_t fd_entry = _dispatch_fd_entry_create(
 			_dispatch_io_fds_lockq);
-	_dispatch_fd_entry_debug("create: fd %d", fd_entry, fd);
+	_dispatch_fd_entry_debug("create: fd %" PRId64, fd_entry, fd);
 	fd_entry->fd = fd;
-	TAILQ_INSERT_TAIL(&_dispatch_io_fds[hash], fd_entry, fd_list);
+	LIST_INSERT_HEAD(&_dispatch_io_fds[hash], fd_entry, fd_list);
 	fd_entry->barrier_queue = dispatch_queue_create(
 			"com.apple.libdispatch-io.barrierq", NULL);
 	fd_entry->barrier_group = dispatch_group_create();
@@ -1401,11 +1399,11 @@
 			int result = ioctlsocket((SOCKET)fd, (long)FIONBIO, &value);
 			(void)dispatch_assume_zero(result);
 			_dispatch_stream_init(fd_entry,
-				_dispatch_get_root_queue(DISPATCH_QOS_DEFAULT, false));
+				_dispatch_get_default_queue(false));
 		} else {
 			dispatch_suspend(fd_entry->barrier_queue);
-			dispatch_once_f(&_dispatch_io_devs_lockq_pred, NULL,
-					_dispatch_io_devs_lockq_init);
+			dispatch_once_f(&_dispatch_io_init_pred, NULL,
+					_dispatch_io_queues_init);
 			dispatch_async(_dispatch_io_devs_lockq, ^{
 				_dispatch_disk_init(fd_entry, 0);
 				dispatch_resume(fd_entry->barrier_queue);
@@ -1457,8 +1455,8 @@
 			// We have to get the disk on the global dev queue. The
 			// barrier queue cannot continue until that is complete
 			dispatch_suspend(fd_entry->barrier_queue);
-			dispatch_once_f(&_dispatch_io_devs_lockq_pred, NULL,
-					_dispatch_io_devs_lockq_init);
+			dispatch_once_f(&_dispatch_io_init_pred, NULL,
+					_dispatch_io_queues_init);
 			dispatch_async(_dispatch_io_devs_lockq, ^{
 				_dispatch_disk_init(fd_entry, dev);
 				dispatch_resume(fd_entry->barrier_queue);
@@ -1475,7 +1473,7 @@
 			}
 
 			_dispatch_stream_init(fd_entry,
-					_dispatch_get_root_queue(DISPATCH_QOS_DEFAULT, false));
+					_dispatch_get_default_queue(false));
 		}
 		fd_entry->orig_flags = orig_flags;
 		fd_entry->orig_nosigpipe = orig_nosigpipe;
@@ -1498,7 +1496,7 @@
 			});
 		}
 		// Remove this entry from the global fd list
-		TAILQ_REMOVE(&_dispatch_io_fds[hash], fd_entry, fd_list);
+		LIST_REMOVE(fd_entry, fd_list);
 	});
 	// If there was a source associated with this stream, disposing of the
 	// source cancels it and suspends the close queue. Freeing the fd_entry
@@ -1550,7 +1548,7 @@
 #endif
 	} else {
 			_dispatch_stream_init(fd_entry,
-					_dispatch_get_root_queue(DISPATCH_QOS_DEFAULT, false));
+					_dispatch_get_default_queue(false));
 	}
 	fd_entry->fd = -1;
 	fd_entry->orig_flags = -1;
@@ -1713,7 +1711,7 @@
 	dispatch_disk_t disk;
 	// Check to see if there is an existing entry for the given device
 	uintptr_t hash = DIO_HASH(dev);
-	TAILQ_FOREACH(disk, &_dispatch_io_devs[hash], disk_list) {
+	LIST_FOREACH(disk, &_dispatch_io_devs[hash], disk_list) {
 		if (disk->dev == dev) {
 			_dispatch_retain(disk);
 			goto out;
@@ -1727,7 +1725,7 @@
 	disk->do_next = DISPATCH_OBJECT_LISTLESS;
 	disk->do_xref_cnt = -1;
 	disk->advise_list_depth = pending_reqs_depth;
-	disk->do_targetq = _dispatch_get_root_queue(DISPATCH_QOS_DEFAULT, false);
+	disk->do_targetq = _dispatch_get_default_queue(false);
 	disk->dev = dev;
 	TAILQ_INIT(&disk->operations);
 	disk->cur_rq = TAILQ_FIRST(&disk->operations);
@@ -1735,7 +1733,7 @@
 	snprintf(label, sizeof(label), "com.apple.libdispatch-io.deviceq.%d",
 			(int)dev);
 	disk->pick_queue = dispatch_queue_create(label, NULL);
-	TAILQ_INSERT_TAIL(&_dispatch_io_devs[hash], disk, disk_list);
+	LIST_INSERT_HEAD(&_dispatch_io_devs[hash], disk, disk_list);
 out:
 	fd_entry->disk = disk;
 	TAILQ_INIT(&fd_entry->stream_ops);
@@ -1744,11 +1742,10 @@
 void
 _dispatch_disk_dispose(dispatch_disk_t disk, DISPATCH_UNUSED bool *allow_free)
 {
-	uintptr_t hash = DIO_HASH(disk->dev);
-	TAILQ_REMOVE(&_dispatch_io_devs[hash], disk, disk_list);
+	LIST_REMOVE(disk, disk_list);
 	dispatch_assert(TAILQ_EMPTY(&disk->operations));
 	size_t i;
-	for (i=0; i<disk->advise_list_depth; ++i) {
+	for (i = 0; i < disk->advise_list_depth; ++i) {
 		dispatch_assert(!disk->advise_list[i]);
 	}
 	dispatch_release(disk->pick_queue);
@@ -2180,7 +2177,7 @@
 	op = disk->advise_list[disk->req_idx];
 	int result = _dispatch_operation_perform(op);
 	disk->advise_list[disk->req_idx] = NULL;
-	disk->req_idx = (++disk->req_idx)%disk->advise_list_depth;
+	disk->req_idx = (disk->req_idx + 1) % disk->advise_list_depth;
 	_dispatch_op_debug("async perform completion: disk %p", op, disk);
 	dispatch_async(disk->pick_queue, ^{
 		_dispatch_op_debug("perform completion", op);
@@ -2275,8 +2272,8 @@
 	}
 #else
 #error "_dispatch_operation_advise not implemented on this platform"
-#endif
-#endif
+#endif // defined(F_RDADVISE)
+#endif // defined(_WIN32)
 }
 
 static int
@@ -2564,6 +2561,7 @@
 #pragma mark -
 #pragma mark dispatch_io_debug
 
+DISPATCH_COLD
 static size_t
 _dispatch_io_debug_attr(dispatch_io_t channel, char* buf, size_t bufsiz)
 {
@@ -2587,7 +2585,7 @@
 {
 	size_t offset = 0;
 	offset += dsnprintf(&buf[offset], bufsiz - offset, "%s[%p] = { ",
-			dx_kind(channel), channel);
+			_dispatch_object_class_name(channel), channel);
 	offset += _dispatch_object_debug_attr(channel, &buf[offset],
 			bufsiz - offset);
 	offset += _dispatch_io_debug_attr(channel, &buf[offset], bufsiz - offset);
@@ -2595,6 +2593,7 @@
 	return offset;
 }
 
+DISPATCH_COLD
 static size_t
 _dispatch_operation_debug_attr(dispatch_operation_t op, char* buf,
 		size_t bufsiz)
@@ -2622,7 +2621,7 @@
 {
 	size_t offset = 0;
 	offset += dsnprintf(&buf[offset], bufsiz - offset, "%s[%p] = { ",
-			dx_kind(op), op);
+			_dispatch_object_class_name(op), op);
 	offset += _dispatch_object_debug_attr(op, &buf[offset], bufsiz - offset);
 	offset += _dispatch_operation_debug_attr(op, &buf[offset], bufsiz - offset);
 	offset += dsnprintf(&buf[offset], bufsiz - offset, "}");
diff --git a/src/io_internal.h b/src/io_internal.h
index d70e075..c076cfc 100644
--- a/src/io_internal.h
+++ b/src/io_internal.h
@@ -34,7 +34,7 @@
 
 #define _DISPATCH_IO_LABEL_SIZE 16
 
-#if TARGET_OS_EMBEDDED // rdar://problem/9032036
+#if TARGET_OS_IPHONE // rdar://problem/9032036
 #define DIO_MAX_CHUNK_SIZE				(512u * 1024)
 #define DIO_HASH_SIZE					64u  // must be a power of two
 #else
@@ -66,8 +66,8 @@
 #define DIO_CLOSED		1u // channel has been closed
 #define DIO_STOPPED		2u // channel has been stopped (implies closed)
 
-DISPATCH_INTERNAL_CLASS_DECL(operation);
-DISPATCH_INTERNAL_CLASS_DECL(disk);
+DISPATCH_INTERNAL_CLASS_DECL(operation, OBJECT);
+DISPATCH_INTERNAL_CLASS_DECL(disk, OBJECT);
 
 struct dispatch_stream_s {
 	dispatch_queue_t dq;
@@ -105,7 +105,7 @@
 	size_t advise_idx;
 	dev_t dev;
 	bool io_active;
-	TAILQ_ENTRY(dispatch_disk_s) disk_list;
+	LIST_ENTRY(dispatch_disk_s) disk_list;
 	size_t advise_list_depth;
 	dispatch_operation_t advise_list[];
 };
@@ -127,7 +127,7 @@
 	dispatch_group_t barrier_group;
 	dispatch_io_t convenience_channel;
 	TAILQ_HEAD(, dispatch_operation_s) stream_ops;
-	TAILQ_ENTRY(dispatch_fd_entry_s) fd_list;
+	LIST_ENTRY(dispatch_fd_entry_s) fd_list;
 };
 
 typedef struct dispatch_fd_entry_s *dispatch_fd_entry_t;
@@ -167,7 +167,7 @@
 	TAILQ_ENTRY(dispatch_operation_s) stream_list;
 };
 
-DISPATCH_CLASS_DECL(io);
+DISPATCH_CLASS_DECL(io, OBJECT);
 struct dispatch_io_s {
 	DISPATCH_OBJECT_HEADER(io);
 	dispatch_queue_t queue, barrier_queue;
@@ -185,8 +185,10 @@
 };
 
 void _dispatch_io_set_target_queue(dispatch_io_t channel, dispatch_queue_t dq);
+DISPATCH_COLD
 size_t _dispatch_io_debug(dispatch_io_t channel, char* buf, size_t bufsiz);
 void _dispatch_io_dispose(dispatch_io_t channel, bool *allow_free);
+DISPATCH_COLD
 size_t _dispatch_operation_debug(dispatch_operation_t op, char* buf,
 		size_t bufsiz);
 void _dispatch_operation_dispose(dispatch_operation_t operation,
diff --git a/src/libdispatch.codes b/src/libdispatch.codes
index 0ecc333..855c2ef 100644
--- a/src/libdispatch.codes
+++ b/src/libdispatch.codes
@@ -7,13 +7,36 @@
 0x2e010018	DISPATCH_VOUCHER_activity_adopt
 
 0x2e020004	DISPATCH_PERF_non_leaf_retarget
-0x2e020008	DISPATCH_PERF_post_activate_mutation
+0x2e020008	DISPATCH_PERF_post_activate_retarget
 0x2e02000c	DISPATCH_PERF_post_activate_mutation
 0x2e020010	DISPATCH_PERF_delayed_registration
 0x2e020014	DISPATCH_PERF_mutable_target
 0x2e020018	DISPATCH_PERF_strict_bg_timer
+0x2e02001c	DISPATCH_PERF_suspended_timer_fire
+0x2e020020	DISPATCH_PERF_handlerless_source_fire
+0x2e020024	DISPATCH_PERF_source_registration_without_qos
 
 0x2e030004	DISPATCH_MACH_MSG_hdr_move
 
 0x2e040004	DISPATCH_PERF_MON_worker_thread
 0x2e040008	DISPATCH_PERF_MON_worker_useless
+
+0x2e050004	DISPATCH_QOS_TRACE_queue_creation
+0x2e050008	DISPATCH_QOS_TRACE_queue_dispose
+0x2e05000c	DISPATCH_QOS_TRACE_block_creation
+0x2e050010	DISPATCH_QOS_TRACE_block_dispose
+0x2e050014	DISPATCH_QOS_TRACE_cont_push_eb
+0x2e050018	DISPATCH_QOS_TRACE_cont_push_ab
+0x2e05001c	DISPATCH_QOS_TRACE_cont_push_f
+0x2e050020	DISPATCH_QOS_TRACE_source_push
+0x2e050024	DISPATCH_QOS_TRACE_cont_pop
+0x2e050028	DISPATCH_QOS_TRACE_source_pop
+0x2e05002c	DISPATCH_QOS_TRACE_queue_item_done
+0x2e050030	DISPATCH_QOS_TRACE_source_callout
+0x2e050034	DISPATCH_QOS_TRACE_source_dispose
+
+0x2e060004	DISPATCH_FIREHOSE_TRACE_reserver_gave_up
+0x2e060008	DISPATCH_FIREHOSE_TRACE_reserver_wait
+0x2e06000c	DISPATCH_FIREHOSE_TRACE_allocator
+0x2e060010	DISPATCH_FIREHOSE_TRACE_wait_for_logd
+0x2e060014	DISPATCH_FIREHOSE_TRACE_chunk_install
diff --git a/src/mach.c b/src/mach.c
index 699492d..726368b 100644
--- a/src/mach.c
+++ b/src/mach.c
@@ -24,20 +24,19 @@
 #define DISPATCH_MACH_RETURN_IMMEDIATE_SEND_RESULT 0x1
 #define DISPATCH_MACH_REGISTER_FOR_REPLY 0x2
 #define DISPATCH_MACH_WAIT_FOR_REPLY 0x4
-#define DISPATCH_MACH_OWNED_REPLY_PORT 0x8
-#define DISPATCH_MACH_ASYNC_REPLY 0x10
 #define DISPATCH_MACH_OPTIONS_MASK 0xffff
 
 #define DM_SEND_STATUS_SUCCESS 0x1
 #define DM_SEND_STATUS_RETURNING_IMMEDIATE_SEND_RESULT 0x2
 
+#define DM_CHECKIN_CANCELED ((dispatch_mach_msg_t)~0ul)
+
 DISPATCH_ENUM(dispatch_mach_send_invoke_flags, uint32_t,
 	DM_SEND_INVOKE_NONE            = 0x0,
 	DM_SEND_INVOKE_MAKE_DIRTY      = 0x1,
 	DM_SEND_INVOKE_NEEDS_BARRIER   = 0x2,
-	DM_SEND_INVOKE_CANCEL          = 0x4,
-	DM_SEND_INVOKE_CAN_RUN_BARRIER = 0x8,
-	DM_SEND_INVOKE_IMMEDIATE_SEND  = 0x10,
+	DM_SEND_INVOKE_CAN_RUN_BARRIER = 0x4,
+	DM_SEND_INVOKE_IMMEDIATE_SEND  = 0x8,
 );
 #define DM_SEND_INVOKE_IMMEDIATE_SEND_MASK \
 		((dispatch_mach_send_invoke_flags_t)DM_SEND_INVOKE_IMMEDIATE_SEND)
@@ -48,7 +47,7 @@
 static void _dispatch_mach_msg_disconnected(dispatch_mach_t dm,
 		mach_port_t local_port, mach_port_t remote_port);
 static inline void _dispatch_mach_msg_reply_received(dispatch_mach_t dm,
-		dispatch_mach_reply_refs_t dmr, mach_port_t local_port);
+		dispatch_mach_reply_wait_refs_t dwr, mach_port_t local_port);
 static dispatch_mach_msg_t _dispatch_mach_msg_create_reply_disconnected(
 		dispatch_object_t dou, dispatch_mach_reply_refs_t dmr,
 		dispatch_mach_reason_t reason);
@@ -58,11 +57,10 @@
 		dispatch_mach_msg_t dmsg);
 static void _dispatch_mach_send_push(dispatch_mach_t dm, dispatch_object_t dou,
 		dispatch_qos_t qos);
-static void _dispatch_mach_cancel(dispatch_mach_t dm);
 static void _dispatch_mach_push_send_barrier_drain(dispatch_mach_t dm,
 		dispatch_qos_t qos);
 static void _dispatch_mach_handle_or_push_received_msg(dispatch_mach_t dm,
-		dispatch_mach_msg_t dmsg);
+		dispatch_mach_msg_t dmsg, pthread_priority_t pp);
 static void _dispatch_mach_push_async_reply_msg(dispatch_mach_t dm,
 		dispatch_mach_msg_t dmsg, dispatch_queue_t drq);
 static dispatch_queue_t _dispatch_mach_msg_context_async_reply_queue(
@@ -76,79 +74,16 @@
 // For tests only.
 DISPATCH_EXPORT void _dispatch_mach_hooks_install_default(void);
 
-dispatch_source_t
-_dispatch_source_create_mach_msg_direct_recv(mach_port_t recvp,
-		const struct dispatch_continuation_s *dc)
-{
-	dispatch_source_t ds;
-	ds = dispatch_source_create(&_dispatch_source_type_mach_recv_direct,
-			recvp, 0, &_dispatch_mgr_q);
-	os_atomic_store(&ds->ds_refs->ds_handler[DS_EVENT_HANDLER],
-			(dispatch_continuation_t)dc, relaxed);
-	return ds;
-}
-
 #pragma mark -
 #pragma mark dispatch to XPC callbacks
 
-static dispatch_mach_xpc_hooks_t _dispatch_mach_xpc_hooks;
-
-// Default dmxh_direct_message_handler callback that does not handle
-// messages inline.
-static bool
-_dispatch_mach_xpc_no_handle_message(
-		void *_Nullable context DISPATCH_UNUSED,
-		dispatch_mach_reason_t reason DISPATCH_UNUSED,
-		dispatch_mach_msg_t message DISPATCH_UNUSED,
-		mach_error_t error DISPATCH_UNUSED)
-{
-	return false;
-}
-
-// Default dmxh_msg_context_reply_queue callback that returns a NULL queue.
-static dispatch_queue_t
-_dispatch_mach_msg_context_no_async_reply_queue(
-		void *_Nonnull msg_context DISPATCH_UNUSED)
-{
-	return NULL;
-}
-
-// Default dmxh_async_reply_handler callback that crashes when called.
-DISPATCH_NORETURN
-static void
-_dispatch_mach_default_async_reply_handler(void *context DISPATCH_UNUSED,
-		dispatch_mach_reason_t reason DISPATCH_UNUSED,
-		dispatch_mach_msg_t message DISPATCH_UNUSED)
-{
-	DISPATCH_CLIENT_CRASH(_dispatch_mach_xpc_hooks,
-			"_dispatch_mach_default_async_reply_handler called");
-}
-
-// Default dmxh_enable_sigterm_notification callback that enables delivery of
-// SIGTERM notifications (for backwards compatibility).
-static bool
-_dispatch_mach_enable_sigterm(void *_Nullable context DISPATCH_UNUSED)
-{
-	return true;
-}
-
-// Callbacks from dispatch to XPC. The default is to not support any callbacks.
-static const struct dispatch_mach_xpc_hooks_s _dispatch_mach_xpc_hooks_default
-		= {
-	.version = DISPATCH_MACH_XPC_HOOKS_VERSION,
-	.dmxh_direct_message_handler = &_dispatch_mach_xpc_no_handle_message,
-	.dmxh_msg_context_reply_queue =
-			&_dispatch_mach_msg_context_no_async_reply_queue,
-	.dmxh_async_reply_handler = &_dispatch_mach_default_async_reply_handler,
-	.dmxh_enable_sigterm_notification = &_dispatch_mach_enable_sigterm,
-};
-
-static dispatch_mach_xpc_hooks_t _dispatch_mach_xpc_hooks
-		= &_dispatch_mach_xpc_hooks_default;
-
 void
 dispatch_mach_hooks_install_4libxpc(dispatch_mach_xpc_hooks_t hooks)
 {
+	if (hooks->version < DISPATCH_MACH_XPC_MIN_HOOKS_VERSION) {
+		DISPATCH_CLIENT_CRASH(hooks,
+				"trying to install hooks with unsupported version");
+	}
 	if (!os_atomic_cmpxchg(&_dispatch_mach_xpc_hooks,
 			&_dispatch_mach_xpc_hooks_default, hooks, relaxed)) {
 		DISPATCH_CLIENT_CRASH(_dispatch_mach_xpc_hooks,
@@ -174,13 +109,10 @@
 	dispatch_mach_recv_refs_t dmrr;
 	dispatch_mach_send_refs_t dmsr;
 	dispatch_mach_t dm;
-	dm = _dispatch_object_alloc(DISPATCH_VTABLE(mach),
-			sizeof(struct dispatch_mach_s));
-	_dispatch_queue_init(dm->_as_dq, DQF_LEGACY, 1,
-			DISPATCH_QUEUE_INACTIVE | DISPATCH_QUEUE_ROLE_INNER);
 
+	dm = _dispatch_queue_alloc(mach, DQF_MUTABLE, 1,
+			DISPATCH_QUEUE_INACTIVE | DISPATCH_QUEUE_ROLE_INNER)._dm;
 	dm->dq_label = label;
-	dm->do_ref_cnt++; // the reference _dispatch_mach_cancel_invoke holds
 	dm->dm_is_xpc = is_xpc;
 
 	dmrr = dux_create(&_dispatch_mach_type_recv, 0, 0)._dmrr;
@@ -196,8 +128,8 @@
 	dmsr->du_owner_wref = _dispatch_ptr2wref(dm);
 	dm->dm_send_refs = dmsr;
 
-	if (slowpath(!q)) {
-		q = _dispatch_get_root_queue(DISPATCH_QOS_DEFAULT, true);
+	if (unlikely(!q)) {
+		q = _dispatch_get_default_queue(true);
 	} else {
 		_dispatch_retain(q);
 	}
@@ -242,7 +174,7 @@
 		_dispatch_unote_dispose(dm->dm_xpc_term_refs);
 		dm->dm_xpc_term_refs = NULL;
 	}
-	_dispatch_queue_destroy(dm->_as_dq, allow_free);
+	_dispatch_lane_class_dispose(dm, allow_free);
 }
 
 void
@@ -250,11 +182,9 @@
 		mach_port_t send, dispatch_mach_msg_t checkin)
 {
 	dispatch_mach_send_refs_t dmsr = dm->dm_send_refs;
-	uint32_t disconnect_cnt;
 
 	if (MACH_PORT_VALID(receive)) {
 		dm->dm_recv_refs->du_ident = receive;
-		_dispatch_retain(dm); // the reference the manager queue holds
 	}
 	dmsr->dmsr_send = send;
 	if (MACH_PORT_VALID(send)) {
@@ -266,151 +196,153 @@
 		}
 		dmsr->dmsr_checkin = checkin;
 	}
-	dispatch_assert(DISPATCH_MACH_NEVER_CONNECTED - 1 ==
-			DISPATCH_MACH_NEVER_INSTALLED);
-	disconnect_cnt = os_atomic_dec2o(dmsr, dmsr_disconnect_cnt, release);
-	if (unlikely(disconnect_cnt != DISPATCH_MACH_NEVER_INSTALLED)) {
+
+	uint32_t disconnect_cnt = os_atomic_and_orig2o(dmsr, dmsr_disconnect_cnt,
+			~DISPATCH_MACH_NEVER_CONNECTED, relaxed);
+	if (unlikely(!(disconnect_cnt & DISPATCH_MACH_NEVER_CONNECTED))) {
 		DISPATCH_CLIENT_CRASH(disconnect_cnt, "Channel already connected");
 	}
 	_dispatch_object_debug(dm, "%s", __func__);
 	return dispatch_activate(dm);
 }
 
+static inline void
+_dispatch_mach_reply_list_insert(dispatch_mach_send_refs_t dmsr,
+		dispatch_mach_reply_refs_t dmr)
+{
+	_dispatch_unfair_lock_lock(&dmsr->dmsr_replies_lock);
+	dispatch_assert(!_LIST_IS_ENQUEUED(dmr, dmr_list));
+	LIST_INSERT_HEAD(&dmsr->dmsr_replies, dmr, dmr_list);
+	_dispatch_unfair_lock_unlock(&dmsr->dmsr_replies_lock);
+}
+
+static inline void
+_dispatch_mach_reply_list_remove_locked(dispatch_mach_reply_refs_t dmr)
+{
+	dispatch_assert(_LIST_IS_ENQUEUED(dmr, dmr_list));
+	LIST_REMOVE(dmr, dmr_list);
+	_LIST_MARK_NOT_ENQUEUED(dmr, dmr_list);
+}
+
 static inline bool
-_dispatch_mach_reply_tryremove(dispatch_mach_t dm,
+_dispatch_mach_reply_list_tryremove(dispatch_mach_send_refs_t dmsr,
 		dispatch_mach_reply_refs_t dmr)
 {
 	bool removed;
-	_dispatch_unfair_lock_lock(&dm->dm_send_refs->dmsr_replies_lock);
-	if ((removed = _TAILQ_IS_ENQUEUED(dmr, dmr_list))) {
-		TAILQ_REMOVE(&dm->dm_send_refs->dmsr_replies, dmr, dmr_list);
-		_TAILQ_MARK_NOT_ENQUEUED(dmr, dmr_list);
+	_dispatch_unfair_lock_lock(&dmsr->dmsr_replies_lock);
+	if ((removed = _LIST_IS_ENQUEUED(dmr, dmr_list))) {
+		_dispatch_mach_reply_list_remove_locked(dmr);
 	}
-	_dispatch_unfair_lock_unlock(&dm->dm_send_refs->dmsr_replies_lock);
+	_dispatch_unfair_lock_unlock(&dmsr->dmsr_replies_lock);
 	return removed;
 }
 
+#define DMRU_DELETE_ACK     DUU_DELETE_ACK
+#define DMRU_PROBE          DUU_PROBE
+#define DMRU_MUST_SUCCEED   DUU_MUST_SUCCEED
+#define DMRU_DUU_MASK       0x0f
+#define DMRU_DISCONNECTED   0x10
+#define DMRU_REMOVE         0x20
+#define DMRU_ASYNC_MERGE    0x40
+#define DMRU_CANCEL         0x80
+
 DISPATCH_NOINLINE
 static void
-_dispatch_mach_reply_waiter_unregister(dispatch_mach_t dm,
+_dispatch_mach_reply_unregister(dispatch_mach_t dm,
 		dispatch_mach_reply_refs_t dmr, uint32_t options)
 {
-	dispatch_mach_msg_t dmsgr = NULL;
-	bool disconnected = (options & DU_UNREGISTER_DISCONNECTED);
-	if (options & DU_UNREGISTER_REPLY_REMOVE) {
-		_dispatch_unfair_lock_lock(&dm->dm_send_refs->dmsr_replies_lock);
-		if (unlikely(!_TAILQ_IS_ENQUEUED(dmr, dmr_list))) {
-			DISPATCH_INTERNAL_CRASH(0, "Could not find reply registration");
-		}
-		TAILQ_REMOVE(&dm->dm_send_refs->dmsr_replies, dmr, dmr_list);
-		_TAILQ_MARK_NOT_ENQUEUED(dmr, dmr_list);
-		_dispatch_unfair_lock_unlock(&dm->dm_send_refs->dmsr_replies_lock);
-	}
-	if (disconnected) {
-		dmsgr = _dispatch_mach_msg_create_reply_disconnected(NULL, dmr,
-				DISPATCH_MACH_DISCONNECTED);
-	} else if (dmr->dmr_voucher) {
-		_voucher_release(dmr->dmr_voucher);
-		dmr->dmr_voucher = NULL;
-	}
-	_dispatch_debug("machport[0x%08x]: unregistering for sync reply%s, ctxt %p",
-			_dispatch_mach_reply_get_reply_port((mach_port_t)dmr->du_ident),
+	// - async waiters have a dmr of type &_dispatch_mach_type_reply
+	//   heap-allocated in _dispatch_mach_reply_kevent_register().
+	//
+	// - sync waiters have a dmr of type DISPATCH_MACH_TYPE_WAITER,
+	//   stack-allocated in _dispatch_mach_send_and_wait_for_reply().
+	bool sync_waiter = (dux_type(dmr) == DISPATCH_MACH_TYPE_WAITER);
+	dispatch_mach_send_refs_t dmsr = dm->dm_send_refs;
+	bool disconnected = (options & DMRU_DISCONNECTED);
+	bool wakeup = false;
+
+	_dispatch_debug("machport[0x%08x]: unregistering for%s reply%s, ctxt %p",
+			(mach_port_t)dmr->du_ident, sync_waiter ? " sync" : "",
+			(options & DMRU_CANCEL) ? " (canceled)" :
 			disconnected ? " (disconnected)" : "", dmr->dmr_ctxt);
-	if (dmsgr) {
-		return _dispatch_mach_handle_or_push_received_msg(dm, dmsgr);
+
+	if (options & DMRU_REMOVE) {
+		_dispatch_unfair_lock_lock(&dmsr->dmsr_replies_lock);
+		_dispatch_mach_reply_list_remove_locked(dmr);
+		if (LIST_EMPTY(&dmsr->dmsr_replies) && dmsr->dmsr_disconnect_cnt) {
+			wakeup = true;
+		}
+		_dispatch_unfair_lock_unlock(&dmsr->dmsr_replies_lock);
 	}
-}
 
-DISPATCH_NOINLINE
-static bool
-_dispatch_mach_reply_list_remove(dispatch_mach_t dm,
-		dispatch_mach_reply_refs_t dmr) {
-	// dmsr_replies_lock must be held by the caller.
-	bool removed = false;
-	if (likely(_TAILQ_IS_ENQUEUED(dmr, dmr_list))) {
-		TAILQ_REMOVE(&dm->dm_send_refs->dmsr_replies, dmr, dmr_list);
-		_TAILQ_MARK_NOT_ENQUEUED(dmr, dmr_list);
-		removed = true;
-	}
-	return removed;
-}
-
-DISPATCH_NOINLINE
-static bool
-_dispatch_mach_reply_kevent_unregister(dispatch_mach_t dm,
-		dispatch_mach_reply_refs_t dmr, uint32_t options)
-{
-	dispatch_assert(!_TAILQ_IS_ENQUEUED(dmr, dmr_list));
-
-	bool disconnected = (options & DU_UNREGISTER_DISCONNECTED);
-	_dispatch_debug("machport[0x%08x]: unregistering for reply%s, ctxt %p",
-			(mach_port_t)dmr->du_ident, disconnected ? " (disconnected)" : "",
-			dmr->dmr_ctxt);
-	if (!_dispatch_unote_unregister(dmr, options)) {
-		_dispatch_debug("machport[0x%08x]: deferred delete kevent[%p]",
-						(mach_port_t)dmr->du_ident, dmr);
-		dispatch_assert(options == DU_UNREGISTER_DISCONNECTED);
-		return false;
+	if (_dispatch_unote_registered(dmr) &&
+			!_dispatch_unote_unregister(dmr, options & DMRU_DUU_MASK)) {
+		dispatch_assert(!sync_waiter); // sync waiters never use kevent
+		if (options & DMRU_CANCEL) {
+			// when canceling, failed unregistrations are put back in the list
+			// the caller has the lock held
+			LIST_INSERT_HEAD(&dmsr->dmsr_replies, dmr, dmr_list);
+		}
+		return;
 	}
 
 	dispatch_mach_msg_t dmsgr = NULL;
 	dispatch_queue_t drq = NULL;
 	if (disconnected) {
-		// The next call is guaranteed to always transfer or consume the voucher
-		// in the dmr, if there is one.
-		dmsgr = _dispatch_mach_msg_create_reply_disconnected(NULL, dmr,
-			dmr->dmr_async_reply ? DISPATCH_MACH_ASYNC_WAITER_DISCONNECTED
-			: DISPATCH_MACH_DISCONNECTED);
-		if (dmr->dmr_ctxt) {
+		if (dm->dm_is_xpc && dmr->dmr_ctxt) {
 			drq = _dispatch_mach_msg_context_async_reply_queue(dmr->dmr_ctxt);
 		}
+		dmsgr = _dispatch_mach_msg_create_reply_disconnected(NULL, dmr,
+				drq ? DISPATCH_MACH_ASYNC_WAITER_DISCONNECTED
+				: DISPATCH_MACH_DISCONNECTED);
+		// _dispatch_mach_msg_create_reply_disconnected() consumes the voucher
 		dispatch_assert(dmr->dmr_voucher == NULL);
 	} else if (dmr->dmr_voucher) {
 		_voucher_release(dmr->dmr_voucher);
 		dmr->dmr_voucher = NULL;
 	}
-	_dispatch_unote_dispose(dmr);
+	if (!sync_waiter) {
+		_dispatch_unote_dispose(dmr);
+	}
 
 	if (dmsgr) {
 		if (drq) {
 			_dispatch_mach_push_async_reply_msg(dm, dmsgr, drq);
 		} else {
-			_dispatch_mach_handle_or_push_received_msg(dm, dmsgr);
+			_dispatch_mach_handle_or_push_received_msg(dm, dmsgr, 0);
 		}
 	}
-	return true;
+	if (options & DMRU_ASYNC_MERGE) {
+		if (wakeup) {
+			return dx_wakeup(dm, 0,
+					DISPATCH_WAKEUP_CONSUME_2 | DISPATCH_WAKEUP_MAKE_DIRTY);
+		}
+		return _dispatch_release_2_tailcall(dm);
+	}
 }
 
 DISPATCH_NOINLINE
 static void
 _dispatch_mach_reply_waiter_register(dispatch_mach_t dm,
-		dispatch_mach_reply_refs_t dmr, mach_port_t reply_port,
-		dispatch_mach_msg_t dmsg, mach_msg_option_t msg_opts)
+		dispatch_mach_reply_wait_refs_t dwr, mach_port_t reply_port,
+		dispatch_mach_msg_t dmsg)
 {
+	dispatch_mach_reply_refs_t dmr = &dwr->dwr_refs;
 	dmr->du_owner_wref = _dispatch_ptr2wref(dm);
-	dmr->du_wlh = NULL;
 	dmr->du_filter = EVFILT_MACHPORT;
 	dmr->du_ident = reply_port;
-	if (msg_opts & DISPATCH_MACH_OWNED_REPLY_PORT) {
-		_dispatch_mach_reply_mark_reply_port_owned(dmr);
-	} else {
+	if (!dmr->dmr_reply_port_owned) {
 		if (dmsg->dmsg_voucher) {
 			dmr->dmr_voucher = _voucher_retain(dmsg->dmsg_voucher);
 		}
-		dmr->dmr_priority = _dispatch_priority_from_pp(dmsg->dmsg_priority);
+		dmr->dmr_priority = dmsg->dmsg_priority;
 		// make reply context visible to leaks rdar://11777199
 		dmr->dmr_ctxt = dmsg->do_ctxt;
 	}
 
 	_dispatch_debug("machport[0x%08x]: registering for sync reply, ctxt %p",
 			reply_port, dmsg->do_ctxt);
-	_dispatch_unfair_lock_lock(&dm->dm_send_refs->dmsr_replies_lock);
-	if (unlikely(_TAILQ_IS_ENQUEUED(dmr, dmr_list))) {
-		DISPATCH_INTERNAL_CRASH(dmr->dmr_list.tqe_prev,
-				"Reply already registered");
-	}
-	TAILQ_INSERT_TAIL(&dm->dm_send_refs->dmsr_replies, dmr, dmr_list);
-	_dispatch_unfair_lock_unlock(&dm->dm_send_refs->dmsr_replies_lock);
+	_dispatch_mach_reply_list_insert(dm->dm_send_refs, dmr);
 }
 
 DISPATCH_NOINLINE
@@ -420,6 +352,7 @@
 {
 	dispatch_mach_reply_refs_t dmr;
 	dispatch_priority_t mpri, pri, overcommit;
+	dispatch_qos_t fallback;
 	dispatch_wlh_t wlh;
 
 	dmr = dux_create(&_dispatch_mach_type_reply, reply_port, 0)._dmr;
@@ -428,58 +361,51 @@
 	if (dmsg->dmsg_voucher) {
 		dmr->dmr_voucher = _voucher_retain(dmsg->dmsg_voucher);
 	}
-	dmr->dmr_priority = _dispatch_priority_from_pp(dmsg->dmsg_priority);
+	dmr->dmr_priority = dmsg->dmsg_priority;
 	// make reply context visible to leaks rdar://11777199
 	dmr->dmr_ctxt = dmsg->do_ctxt;
 
 	dispatch_queue_t drq = NULL;
-	if (dmsg->dmsg_options & DISPATCH_MACH_ASYNC_REPLY) {
-		dmr->dmr_async_reply = true;
+	if (dm->dm_is_xpc && dmsg->do_ctxt) {
 		drq = _dispatch_mach_msg_context_async_reply_queue(dmsg->do_ctxt);
 	}
-
-	if (!drq) {
+	if (unlikely(!drq && _dispatch_unote_wlh(dm->dm_recv_refs))) {
+		wlh = _dispatch_unote_wlh(dm->dm_recv_refs);
 		pri = dm->dq_priority;
-		wlh = dm->dm_recv_refs->du_wlh;
-	} else if (dx_type(drq) == DISPATCH_QUEUE_NETWORK_EVENT_TYPE) {
-		pri = DISPATCH_PRIORITY_FLAG_MANAGER;
-		wlh = (dispatch_wlh_t)drq;
 	} else if (dx_hastypeflag(drq, QUEUE_ROOT)) {
-		pri = drq->dq_priority;
 		wlh = DISPATCH_WLH_ANON;
-	} else if (drq == dm->do_targetq) {
-		pri = dm->dq_priority;
-		wlh = dm->dm_recv_refs->du_wlh;
+		if (_dispatch_is_in_root_queues_array(drq)) {
+			pri = drq->dq_priority;
+		} else {
+			pri = DISPATCH_PRIORITY_FLAG_MANAGER;
+		}
 	} else if (!(pri = _dispatch_queue_compute_priority_and_wlh(drq, &wlh))) {
-		pri = drq->dq_priority;
 		wlh = DISPATCH_WLH_ANON;
+		pri = drq->dq_priority;
 	}
+	mpri = _dispatch_priority_from_pp_strip_flags(dmsg->dmsg_priority);
+	overcommit = pri & DISPATCH_PRIORITY_FLAG_OVERCOMMIT;
+	fallback = _dispatch_priority_fallback_qos(pri);
 	if (pri & DISPATCH_PRIORITY_REQUESTED_MASK) {
-		overcommit = pri & DISPATCH_PRIORITY_FLAG_OVERCOMMIT;
 		pri &= DISPATCH_PRIORITY_REQUESTED_MASK;
-		mpri = _dispatch_priority_from_pp_strip_flags(dmsg->dmsg_priority);
 		if (pri < mpri) pri = mpri;
 		pri |= overcommit;
+	} else if (fallback && mpri) {
+		pri = mpri | overcommit;
+	} else if (fallback && !mpri) {
+		pri = _dispatch_priority_make(fallback, 0) | overcommit;
 	} else {
 		pri = DISPATCH_PRIORITY_FLAG_MANAGER;
+		wlh = DISPATCH_WLH_ANON;
 	}
 
 	_dispatch_debug("machport[0x%08x]: registering for reply, ctxt %p",
 			reply_port, dmsg->do_ctxt);
-	_dispatch_unfair_lock_lock(&dm->dm_send_refs->dmsr_replies_lock);
-	if (unlikely(_TAILQ_IS_ENQUEUED(dmr, dmr_list))) {
-		DISPATCH_INTERNAL_CRASH(dmr->dmr_list.tqe_prev,
-				"Reply already registered");
-	}
-	TAILQ_INSERT_TAIL(&dm->dm_send_refs->dmsr_replies, dmr, dmr_list);
-	_dispatch_unfair_lock_unlock(&dm->dm_send_refs->dmsr_replies_lock);
+	_dispatch_mach_reply_list_insert(dm->dm_send_refs, dmr);
 
 	if (!_dispatch_unote_register(dmr, wlh, pri)) {
-		_dispatch_unfair_lock_lock(&dm->dm_send_refs->dmsr_replies_lock);
-		_dispatch_mach_reply_list_remove(dm, dmr);
-		_dispatch_unfair_lock_unlock(&dm->dm_send_refs->dmsr_replies_lock);
-		_dispatch_mach_reply_kevent_unregister(dm, dmr,
-				DU_UNREGISTER_DISCONNECTED);
+		uint32_t options = DMRU_MUST_SUCCEED | DMRU_REMOVE | DMRU_DISCONNECTED;
+		_dispatch_mach_reply_unregister(dm, dmr, options);
 	}
 }
 
@@ -498,6 +424,22 @@
 #endif
 }
 
+static void
+_dispatch_destruct_reply_port(mach_port_t reply_port,
+		enum thread_destruct_special_reply_port_rights rights)
+{
+	kern_return_t kr = KERN_SUCCESS;
+
+	if (_dispatch_use_mach_special_reply_port()) {
+		kr = thread_destruct_special_reply_port(reply_port, rights);
+	} else if (rights == THREAD_SPECIAL_REPLY_PORT_ALL ||
+			rights == THREAD_SPECIAL_REPLY_PORT_RECEIVE_ONLY) {
+		kr = mach_port_destruct(mach_task_self(), reply_port, 0, 0);
+	}
+	DISPATCH_VERIFY_MIG(kr);
+	dispatch_assume_zero(kr);
+}
+
 static mach_port_t
 _dispatch_get_thread_reply_port(void)
 {
@@ -567,10 +509,8 @@
 		mrp = _dispatch_get_thread_mig_reply_port();
 	}
 	if (mrp) {
-		kern_return_t kr = mach_port_mod_refs(mach_task_self(), reply_port,
-				MACH_PORT_RIGHT_RECEIVE, -1);
-		DISPATCH_VERIFY_MIG(kr);
-		dispatch_assume_zero(kr);
+		_dispatch_destruct_reply_port(reply_port,
+				THREAD_SPECIAL_REPLY_PORT_ALL);
 		_dispatch_debug("machport[0x%08x]: deallocated sync reply port "
 				"(found 0x%08x)", reply_port, mrp);
 	} else {
@@ -626,22 +566,20 @@
 
 static inline dispatch_mach_msg_t
 _dispatch_mach_msg_create_recv(mach_msg_header_t *hdr, mach_msg_size_t siz,
-		dispatch_mach_reply_refs_t dmr, uint32_t flags)
+		dispatch_mach_reply_refs_t dmr, uint32_t flags, pthread_priority_t pp)
 {
 	dispatch_mach_msg_destructor_t destructor;
 	dispatch_mach_msg_t dmsg;
 	voucher_t voucher;
-	pthread_priority_t pp;
 
 	if (dmr) {
 		_voucher_mach_msg_clear(hdr, false); // deallocate reply message voucher
-		pp = _dispatch_priority_to_pp(dmr->dmr_priority);
+		pp = dmr->dmr_priority;
 		voucher = dmr->dmr_voucher;
 		dmr->dmr_voucher = NULL; // transfer reference
 	} else {
 		voucher = voucher_create_with_mach_msg(hdr);
-		pp = _dispatch_priority_compute_propagated(
-				_voucher_get_priority(voucher), 0);
+		pp = _dispatch_priority_compute_propagated(pp, 0);
 	}
 
 	destructor = (flags & DISPATCH_EV_MSG_NEEDS_FREE) ?
@@ -663,81 +601,56 @@
 
 void
 _dispatch_mach_merge_msg(dispatch_unote_t du, uint32_t flags,
-		mach_msg_header_t *hdr, mach_msg_size_t siz)
+		mach_msg_header_t *hdr, mach_msg_size_t siz,
+		pthread_priority_t msg_pp, pthread_priority_t ovr_pp)
+
 {
-	// this function is very similar with what _dispatch_source_merge_evt does
-	// but can't reuse it as handling the message must be protected by the
-	// internal refcount between the first half and the trailer of what
-	// _dispatch_source_merge_evt does.
-
-	dispatch_mach_recv_refs_t dmrr = du._dmrr;
-	dispatch_mach_t dm = _dispatch_wref2ptr(dmrr->du_owner_wref);
-	dispatch_queue_flags_t dqf;
-	dispatch_mach_msg_t dmsg;
-
-	dispatch_assert(_dispatch_unote_needs_rearm(du));
-
 	if (flags & EV_VANISHED) {
 		DISPATCH_CLIENT_CRASH(du._du->du_ident,
 				"Unexpected EV_VANISHED (do not destroy random mach ports)");
 	}
 
-	// once we modify the queue atomic flags below, it will allow concurrent
-	// threads running _dispatch_mach_invoke2 to dispose of the source,
-	// so we can't safely borrow the reference we get from the muxnote udata
-	// anymore, and need our own
-	dispatch_wakeup_flags_t wflags = DISPATCH_WAKEUP_CONSUME_2;
-	_dispatch_retain_2(dm); // rdar://20382435
-
-	if (unlikely((flags & EV_ONESHOT) && !(flags & EV_DELETE))) {
-		dqf = _dispatch_queue_atomic_flags_set_and_clear(dm->_as_dq,
-				DSF_DEFERRED_DELETE, DSF_ARMED);
-		_dispatch_debug("kevent-source[%p]: deferred delete oneshot kevent[%p]",
-				dm, dmrr);
-	} else if (unlikely(flags & (EV_ONESHOT | EV_DELETE))) {
-		_dispatch_source_refs_unregister(dm->_as_ds,
-				DU_UNREGISTER_ALREADY_DELETED);
-		dqf = _dispatch_queue_atomic_flags(dm->_as_dq);
-		_dispatch_debug("kevent-source[%p]: deleted kevent[%p]", dm, dmrr);
-	} else {
-		dqf = _dispatch_queue_atomic_flags_clear(dm->_as_dq, DSF_ARMED);
-		_dispatch_debug("kevent-source[%p]: disarmed kevent[%p]", dm, dmrr);
-	}
-
 	_dispatch_debug_machport(hdr->msgh_remote_port);
 	_dispatch_debug("machport[0x%08x]: received msg id 0x%x, reply on 0x%08x",
 			hdr->msgh_local_port, hdr->msgh_id, hdr->msgh_remote_port);
 
-	if (dqf & DSF_CANCELED) {
+	dispatch_mach_t dm = _dispatch_wref2ptr(du._dmrr->du_owner_wref);
+	if (unlikely(_dispatch_queue_atomic_flags(dm) & DSF_CANCELED)) {
 		_dispatch_debug("machport[0x%08x]: drop msg id 0x%x, reply on 0x%08x",
 				hdr->msgh_local_port, hdr->msgh_id, hdr->msgh_remote_port);
 		mach_msg_destroy(hdr);
 		if (flags & DISPATCH_EV_MSG_NEEDS_FREE) {
 			free(hdr);
 		}
-		return dx_wakeup(dm, 0, wflags | DISPATCH_WAKEUP_MAKE_DIRTY);
+	} else {
+		// Once the mach channel disarming is visible, cancellation will switch
+		// to immediately destroy messages.  If we're preempted here, then the
+		// whole cancellation sequence may be complete by the time we really
+		// enqueue the message.
+		//
+		// _dispatch_mach_msg_invoke_with_mach() is responsible for filtering it
+		// out to keep the promise that DISPATCH_MACH_DISCONNECTED is the last
+		// event sent.
+		dispatch_mach_msg_t dmsg;
+		dmsg = _dispatch_mach_msg_create_recv(hdr, siz, NULL, flags, msg_pp);
+		_dispatch_mach_handle_or_push_received_msg(dm, dmsg, ovr_pp);
 	}
 
-	// Once the mach channel disarming is visible, cancellation will switch to
-	// immediate deletion.  If we're preempted here, then the whole cancellation
-	// sequence may be complete by the time we really enqueue the message.
-	//
-	// _dispatch_mach_msg_invoke_with_mach() is responsible for filtering it out
-	// to keep the promise that DISPATCH_MACH_DISCONNECTED is the last
-	// event sent.
-
-	dmsg = _dispatch_mach_msg_create_recv(hdr, siz, NULL, flags);
-	_dispatch_mach_handle_or_push_received_msg(dm, dmsg);
+	if (unlikely(_dispatch_unote_needs_delete(du))) {
+		return dx_wakeup(dm, 0, DISPATCH_WAKEUP_EVENT |
+				DISPATCH_WAKEUP_CONSUME_2 | DISPATCH_WAKEUP_MAKE_DIRTY);
+	}
 	return _dispatch_release_2_tailcall(dm);
 }
 
 void
 _dispatch_mach_reply_merge_msg(dispatch_unote_t du, uint32_t flags,
-		mach_msg_header_t *hdr, mach_msg_size_t siz)
+		mach_msg_header_t *hdr, mach_msg_size_t siz,
+		pthread_priority_t msg_pp, pthread_priority_t ovr_pp)
 {
 	dispatch_mach_reply_refs_t dmr = du._dmr;
 	dispatch_mach_t dm = _dispatch_wref2ptr(dmr->du_owner_wref);
-	bool canceled = (_dispatch_queue_atomic_flags(dm->_as_dq) & DSF_CANCELED);
+	bool canceled = (_dispatch_queue_atomic_flags(dm) & DSF_CANCELED);
 	dispatch_mach_msg_t dmsg = NULL;
 
 	_dispatch_debug_machport(hdr->msgh_remote_port);
@@ -745,18 +658,18 @@
 			hdr->msgh_local_port, hdr->msgh_id, hdr->msgh_remote_port);
 
 	if (!canceled) {
-		dmsg = _dispatch_mach_msg_create_recv(hdr, siz, dmr, flags);
+		dmsg = _dispatch_mach_msg_create_recv(hdr, siz, dmr, flags, msg_pp);
 	}
 
 	if (dmsg) {
 		dispatch_queue_t drq = NULL;
-		if (dmsg->do_ctxt) {
+		if (dm->dm_is_xpc && dmsg->do_ctxt) {
 			drq = _dispatch_mach_msg_context_async_reply_queue(dmsg->do_ctxt);
 		}
 		if (drq) {
 			_dispatch_mach_push_async_reply_msg(dm, dmsg, drq);
 		} else {
-			_dispatch_mach_handle_or_push_received_msg(dm, dmsg);
+			_dispatch_mach_handle_or_push_received_msg(dm, dmsg, ovr_pp);
 		}
 	} else {
 		_dispatch_debug("machport[0x%08x]: drop msg id 0x%x, reply on 0x%08x",
@@ -767,41 +680,41 @@
 		}
 	}
 
-	dispatch_wakeup_flags_t wflags = 0;
-	uint32_t options = DU_UNREGISTER_IMMEDIATE_DELETE;
-	if (canceled) {
-		options |= DU_UNREGISTER_DISCONNECTED;
-	}
+	uint32_t options = DMRU_ASYNC_MERGE | DMRU_REMOVE;
+	options |= DMRU_MUST_SUCCEED | DMRU_DELETE_ACK;
+	if (canceled) options |= DMRU_DISCONNECTED;
+	dispatch_assert(_dispatch_unote_needs_delete(dmr));
+	_dispatch_mach_reply_unregister(dm, dmr, options); // consumes the +2
+}
 
-	_dispatch_unfair_lock_lock(&dm->dm_send_refs->dmsr_replies_lock);
-	bool removed = _dispatch_mach_reply_list_remove(dm, dmr);
-	dispatch_assert(removed);
-	if (TAILQ_EMPTY(&dm->dm_send_refs->dmsr_replies) &&
-			(dm->dm_send_refs->dmsr_disconnect_cnt ||
-			(dm->dq_atomic_flags & DSF_CANCELED))) {
-		// When the list is empty, _dispatch_mach_disconnect() may release the
-		// last reference count on the Mach channel. To avoid this, take our
-		// own reference before releasing the lock.
-		wflags = DISPATCH_WAKEUP_MAKE_DIRTY | DISPATCH_WAKEUP_CONSUME_2;
-		_dispatch_retain_2(dm);
+DISPATCH_ALWAYS_INLINE
+static void
+_dispatch_mach_stack_probe(void *addr, size_t size)
+{
+#if TARGET_OS_MAC && DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(101400) && \
+		(defined(__x86_64__) || defined(__arm64__))
+	// <rdar://problem/40708879> there should be a __has_feature() macro test
+	// for this, for now we approximate it, for when the compiler
+	// is generating calls to ____chkstk_darwin on our behalf
+	(void)addr; (void)size;
+#else
+	for (mach_vm_address_t p = mach_vm_trunc_page(addr + vm_page_size);
+			p < (mach_vm_address_t)addr + size; p += vm_page_size) {
+		*(char*)p = 0; // ensure alloca buffer doesn't overlap with stack guard
 	}
-	_dispatch_unfair_lock_unlock(&dm->dm_send_refs->dmsr_replies_lock);
-
-	bool result = _dispatch_mach_reply_kevent_unregister(dm, dmr, options);
-	dispatch_assert(result);
-	if (wflags) dx_wakeup(dm, 0, wflags);
+#endif
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline dispatch_mach_msg_t
 _dispatch_mach_msg_reply_recv(dispatch_mach_t dm,
-		dispatch_mach_reply_refs_t dmr, mach_port_t reply_port,
+		dispatch_mach_reply_wait_refs_t dwr, mach_port_t reply_port,
 		mach_port_t send)
 {
-	if (slowpath(!MACH_PORT_VALID(reply_port))) {
+	if (unlikely(!MACH_PORT_VALID(reply_port))) {
 		DISPATCH_CLIENT_CRASH(reply_port, "Invalid reply port");
 	}
-	void *ctxt = dmr->dmr_ctxt;
+	void *ctxt = dwr->dwr_refs.dmr_ctxt;
 	mach_msg_header_t *hdr, *hdr2 = NULL;
 	void *hdr_copyout_addr;
 	mach_msg_size_t siz, msgsiz = 0;
@@ -811,10 +724,7 @@
 	siz = mach_vm_round_page(DISPATCH_MACH_RECEIVE_MAX_INLINE_MESSAGE_SIZE +
 			DISPATCH_MACH_TRAILER_SIZE);
 	hdr = alloca(siz);
-	for (mach_vm_address_t p = mach_vm_trunc_page(hdr + vm_page_size);
-			p < (mach_vm_address_t)hdr + siz; p += vm_page_size) {
-		*(char*)p = 0; // ensure alloca buffer doesn't overlap with stack guard
-	}
+	_dispatch_mach_stack_probe(hdr, siz);
 	options = DISPATCH_MACH_RCV_OPTIONS & (~MACH_RCV_VOUCHER);
 	if (MACH_PORT_VALID(send)) {
 		notify = send;
@@ -834,8 +744,7 @@
 			mach_error_string(kr), kr);
 	switch (kr) {
 	case MACH_RCV_TOO_LARGE:
-		if (!fastpath(hdr->msgh_size <= UINT_MAX -
-				DISPATCH_MACH_TRAILER_SIZE)) {
+		if (unlikely(hdr->msgh_size > UINT_MAX - DISPATCH_MACH_TRAILER_SIZE)) {
 			DISPATCH_CLIENT_CRASH(hdr->msgh_size, "Overlarge message");
 		}
 		if (options & MACH_RCV_LARGE) {
@@ -860,6 +769,10 @@
 		// channel was disconnected/canceled and reply port destroyed
 		_dispatch_debug("machport[0x%08x]: sync reply port destroyed, ctxt %p: "
 				"%s - 0x%x", reply_port, ctxt, mach_error_string(kr), kr);
+		if (dwr->dwr_refs.dmr_reply_port_owned) {
+			_dispatch_destruct_reply_port(reply_port,
+					THREAD_SPECIAL_REPLY_PORT_SEND_ONLY);
+		}
 		goto out;
 	case MACH_MSG_SUCCESS:
 		if (hdr->msgh_remote_port) {
@@ -879,9 +792,9 @@
 		DISPATCH_INTERNAL_CRASH(kr, "Unexpected error from mach_msg_receive");
 		break;
 	}
-	_dispatch_mach_msg_reply_received(dm, dmr, hdr->msgh_local_port);
+	_dispatch_mach_msg_reply_received(dm, dwr, hdr->msgh_local_port);
 	hdr->msgh_local_port = MACH_PORT_NULL;
-	if (slowpath((dm->dq_atomic_flags & DSF_CANCELED) || kr)) {
+	if (unlikely((dm->dq_atomic_flags & DSF_CANCELED) || kr)) {
 		if (!kr) mach_msg_destroy(hdr);
 		goto out;
 	}
@@ -904,26 +817,38 @@
 
 static inline void
 _dispatch_mach_msg_reply_received(dispatch_mach_t dm,
-		dispatch_mach_reply_refs_t dmr, mach_port_t local_port)
+		dispatch_mach_reply_wait_refs_t dwr, mach_port_t local_port)
 {
-	bool removed = _dispatch_mach_reply_tryremove(dm, dmr);
+	dispatch_mach_reply_refs_t dmr = &dwr->dwr_refs;
+	bool removed = _dispatch_mach_reply_list_tryremove(dm->dm_send_refs, dmr);
+	mach_port_t reply_port = (mach_port_t)dmr->du_ident;
+
+	if (removed) {
+		_dispatch_debug("machport[0x%08x]: unregistered for sync reply, ctxt %p",
+				reply_port, dmr->dmr_ctxt);
+	}
+
+	if (dmr->dmr_reply_port_owned) {
+		if (local_port != reply_port &&
+				(removed || MACH_PORT_VALID(local_port))) {
+			DISPATCH_CLIENT_CRASH(local_port,
+					"Reply received on unexpected port");
+		}
+		if (removed) {
+			_dispatch_set_thread_reply_port(reply_port);
+		} else {
+			_dispatch_destruct_reply_port(reply_port,
+					THREAD_SPECIAL_REPLY_PORT_SEND_ONLY);
+		}
+		return;
+	}
+
 	if (!MACH_PORT_VALID(local_port) || !removed) {
 		// port moved/destroyed during receive, or reply waiter was never
 		// registered or already removed (disconnected)
 		return;
 	}
-	mach_port_t reply_port = _dispatch_mach_reply_get_reply_port(
-			(mach_port_t)dmr->du_ident);
-	_dispatch_debug("machport[0x%08x]: unregistered for sync reply, ctxt %p",
-			reply_port, dmr->dmr_ctxt);
-	if (_dispatch_mach_reply_is_reply_port_owned(dmr)) {
-		_dispatch_set_thread_reply_port(reply_port);
-		if (local_port != reply_port) {
-			DISPATCH_CLIENT_CRASH(local_port,
-					"Reply received on unexpected port");
-		}
-		return;
-	}
+
 	mach_msg_header_t *hdr;
 	dispatch_mach_msg_t dmsg;
 	dmsg = dispatch_mach_msg_create(NULL, sizeof(mach_msg_header_t),
@@ -931,10 +856,10 @@
 	hdr->msgh_local_port = local_port;
 	dmsg->dmsg_voucher = dmr->dmr_voucher;
 	dmr->dmr_voucher = NULL;  // transfer reference
-	dmsg->dmsg_priority = _dispatch_priority_to_pp(dmr->dmr_priority);
+	dmsg->dmsg_priority = dmr->dmr_priority;
 	dmsg->do_ctxt = dmr->dmr_ctxt;
 	_dispatch_mach_msg_set_reason(dmsg, 0, DISPATCH_MACH_REPLY_RECEIVED);
-	return _dispatch_mach_handle_or_push_received_msg(dm, dmsg);
+	return _dispatch_mach_handle_or_push_received_msg(dm, dmsg, 0);
 }
 
 static inline void
@@ -950,7 +875,7 @@
 	_dispatch_mach_msg_set_reason(dmsg, 0, DISPATCH_MACH_DISCONNECTED);
 	_dispatch_debug("machport[0x%08x]: %s right disconnected", local_port ?
 			local_port : remote_port, local_port ? "receive" : "send");
-	return _dispatch_mach_handle_or_push_received_msg(dm, dmsg);
+	return _dispatch_mach_handle_or_push_received_msg(dm, dmsg, 0);
 }
 
 static inline dispatch_mach_msg_t
@@ -958,52 +883,44 @@
 		dispatch_mach_reply_refs_t dmr, dispatch_mach_reason_t reason)
 {
 	dispatch_mach_msg_t dmsg = dou._dmsg, dmsgr;
-	mach_port_t reply_port = dmsg ? dmsg->dmsg_reply :
-			_dispatch_mach_reply_get_reply_port((mach_port_t)dmr->du_ident);
-	voucher_t v;
+	mach_port_t reply_port = dmsg ? dmsg->dmsg_reply :(mach_port_t)dmr->du_ident;
 
 	if (!reply_port) {
-		if (!dmsg) {
-			v = dmr->dmr_voucher;
-			dmr->dmr_voucher = NULL; // transfer reference
-			if (v) _voucher_release(v);
+		if (!dmsg && dmr->dmr_voucher) {
+			_voucher_release(dmr->dmr_voucher);
+			dmr->dmr_voucher = NULL;
 		}
 		return NULL;
 	}
 
-	if (dmsg) {
-		v = dmsg->dmsg_voucher;
-		if (v) _voucher_retain(v);
-	} else {
-		v = dmr->dmr_voucher;
-		dmr->dmr_voucher = NULL; // transfer reference
-	}
-
-	if ((dmsg && (dmsg->dmsg_options & DISPATCH_MACH_WAIT_FOR_REPLY) &&
-			(dmsg->dmsg_options & DISPATCH_MACH_OWNED_REPLY_PORT)) ||
-			(dmr && !_dispatch_unote_registered(dmr) &&
-			_dispatch_mach_reply_is_reply_port_owned(dmr))) {
-		if (v) _voucher_release(v);
+	if (dmr && !_dispatch_unote_registered(dmr) && dmr->dmr_reply_port_owned) {
+		if (dmr->dmr_voucher) {
+			_voucher_release(dmr->dmr_voucher);
+			dmr->dmr_voucher = NULL;
+		}
 		// deallocate owned reply port to break _dispatch_mach_msg_reply_recv
-		// out of waiting in mach_msg(MACH_RCV_MSG)
-		kern_return_t kr = mach_port_mod_refs(mach_task_self(), reply_port,
-				MACH_PORT_RIGHT_RECEIVE, -1);
-		DISPATCH_VERIFY_MIG(kr);
-		dispatch_assume_zero(kr);
+		// out of waiting in mach_msg(MACH_RCV_MSG).
+		//
+		// after this call, dmr can become invalid
+		_dispatch_destruct_reply_port(reply_port,
+				THREAD_SPECIAL_REPLY_PORT_RECEIVE_ONLY);
 		return NULL;
 	}
 
 	mach_msg_header_t *hdr;
 	dmsgr = dispatch_mach_msg_create(NULL, sizeof(mach_msg_header_t),
 			DISPATCH_MACH_MSG_DESTRUCTOR_DEFAULT, &hdr);
-	dmsgr->dmsg_voucher = v;
 	hdr->msgh_local_port = reply_port;
 	if (dmsg) {
 		dmsgr->dmsg_priority = dmsg->dmsg_priority;
 		dmsgr->do_ctxt = dmsg->do_ctxt;
+		dmsgr->dmsg_voucher = dmsg->dmsg_voucher;
+		if (dmsgr->dmsg_voucher) _voucher_retain(dmsgr->dmsg_voucher);
 	} else {
-		dmsgr->dmsg_priority = _dispatch_priority_to_pp(dmr->dmr_priority);
+		dmsgr->dmsg_priority = dmr->dmr_priority;
 		dmsgr->do_ctxt = dmr->dmr_ctxt;
+		dmsgr->dmsg_voucher = dmr->dmr_voucher;
+		dmr->dmr_voucher = NULL; // transfer reference
 	}
 	_dispatch_mach_msg_set_reason(dmsgr, 0, reason);
 	_dispatch_debug("machport[0x%08x]: reply disconnected, ctxt %p",
@@ -1013,7 +930,8 @@
 
 DISPATCH_NOINLINE
 static void
-_dispatch_mach_msg_not_sent(dispatch_mach_t dm, dispatch_object_t dou)
+_dispatch_mach_msg_not_sent(dispatch_mach_t dm, dispatch_object_t dou,
+		dispatch_mach_reply_wait_refs_t dwr)
 {
 	dispatch_mach_msg_t dmsg = dou._dmsg, dmsgr;
 	dispatch_queue_t drq = NULL;
@@ -1025,20 +943,20 @@
 			msg_opts, msg->msgh_voucher_port, dmsg->dmsg_reply);
 	unsigned long reason = (msg_opts & DISPATCH_MACH_REGISTER_FOR_REPLY) ?
 			0 : DISPATCH_MACH_MESSAGE_NOT_SENT;
-	dmsgr = _dispatch_mach_msg_create_reply_disconnected(dmsg, NULL,
-			msg_opts & DISPATCH_MACH_ASYNC_REPLY
-			? DISPATCH_MACH_ASYNC_WAITER_DISCONNECTED
-			: DISPATCH_MACH_DISCONNECTED);
-	if (dmsg->do_ctxt) {
+	if (dm->dm_is_xpc && dmsg->do_ctxt) {
 		drq = _dispatch_mach_msg_context_async_reply_queue(dmsg->do_ctxt);
 	}
+	dmsgr = _dispatch_mach_msg_create_reply_disconnected(dmsg,
+			dwr ? &dwr->dwr_refs : NULL,
+			drq ? DISPATCH_MACH_ASYNC_WAITER_DISCONNECTED
+			: DISPATCH_MACH_DISCONNECTED);
 	_dispatch_mach_msg_set_reason(dmsg, 0, reason);
-	_dispatch_mach_handle_or_push_received_msg(dm, dmsg);
+	_dispatch_mach_handle_or_push_received_msg(dm, dmsg, 0);
 	if (dmsgr) {
 		if (drq) {
 			_dispatch_mach_push_async_reply_msg(dm, dmsgr, drq);
 		} else {
-			_dispatch_mach_handle_or_push_received_msg(dm, dmsgr);
+			_dispatch_mach_handle_or_push_received_msg(dm, dmsgr, 0);
 		}
 	}
 }
@@ -1046,7 +964,7 @@
 DISPATCH_NOINLINE
 static uint32_t
 _dispatch_mach_msg_send(dispatch_mach_t dm, dispatch_object_t dou,
-		dispatch_mach_reply_refs_t dmr, dispatch_qos_t qos,
+		dispatch_mach_reply_wait_refs_t dwr, dispatch_qos_t qos,
 		dispatch_mach_send_invoke_flags_t send_flags)
 {
 	dispatch_mach_send_refs_t dsrr = dm->dm_send_refs;
@@ -1065,9 +983,9 @@
 		if (unlikely(dsrr->dmsr_checkin && dmsg != dsrr->dmsr_checkin)) {
 			// send initial checkin message
 			if (unlikely(_dispatch_unote_registered(dsrr) &&
-					_dispatch_queue_get_current() != &_dispatch_mgr_q)) {
+					_dispatch_queue_get_current() != _dispatch_mgr_q._as_dq)) {
 				// send kevent must be uninstalled on the manager queue
-				dm->dm_needs_mgr = 1;
+				dm->dm_needs_mgr = true;
 				goto out;
 			}
 			if (unlikely(!_dispatch_mach_msg_send(dm,
@@ -1086,18 +1004,16 @@
 			if (dmsg != dsrr->dmsr_checkin) {
 				msg->msgh_remote_port = dsrr->dmsr_send;
 			}
-			if (_dispatch_queue_get_current() == &_dispatch_mgr_q) {
+			if (_dispatch_queue_get_current() == _dispatch_mgr_q._as_dq) {
 				if (unlikely(!_dispatch_unote_registered(dsrr))) {
 					_dispatch_mach_notification_kevent_register(dm,
 							msg->msgh_remote_port);
+					dispatch_assert(_dispatch_unote_registered(dsrr));
 				}
-				if (likely(_dispatch_unote_registered(dsrr))) {
-					if (os_atomic_load2o(dsrr, dmsr_notification_armed,
-							relaxed)) {
-						goto out;
-					}
-					opts |= MACH_SEND_NOTIFY;
+				if (dsrr->dmsr_notification_armed) {
+					goto out;
 				}
+				opts |= MACH_SEND_NOTIFY;
 			}
 			opts |= MACH_SEND_TIMEOUT;
 			if (dmsg->dmsg_priority != _voucher_get_priority(voucher)) {
@@ -1122,14 +1038,13 @@
 		_dispatch_debug_machport(msg->msgh_remote_port);
 		if (reply_port) _dispatch_debug_machport(reply_port);
 		if (msg_opts & DISPATCH_MACH_WAIT_FOR_REPLY) {
-			if (msg_opts & DISPATCH_MACH_OWNED_REPLY_PORT) {
+			if (dwr->dwr_refs.dmr_reply_port_owned) {
 				if (_dispatch_use_mach_special_reply_port()) {
 					opts |= MACH_SEND_SYNC_OVERRIDE;
 				}
 				_dispatch_clear_thread_reply_port(reply_port);
 			}
-			_dispatch_mach_reply_waiter_register(dm, dmr, reply_port, dmsg,
-					msg_opts);
+			_dispatch_mach_reply_waiter_register(dm, dwr, reply_port, dmsg);
 		}
 		kr = mach_msg(msg, opts, msg->msgh_size, 0, MACH_PORT_NULL, 0,
 				msg_priority);
@@ -1139,8 +1054,9 @@
 				opts, msg_opts, msg->msgh_voucher_port, reply_port,
 				mach_error_string(kr), kr);
 		if (unlikely(kr && (msg_opts & DISPATCH_MACH_WAIT_FOR_REPLY))) {
-			_dispatch_mach_reply_waiter_unregister(dm, dmr,
-					DU_UNREGISTER_REPLY_REMOVE);
+			uint32_t options = DMRU_MUST_SUCCEED | DMRU_REMOVE;
+			dispatch_assert(dwr);
+			_dispatch_mach_reply_unregister(dm, &dwr->dwr_refs, options);
 		}
 		if (clear_voucher) {
 			if (kr == MACH_SEND_INVALID_VOUCHER && msg->msgh_voucher_port) {
@@ -1153,12 +1069,10 @@
 	}
 	if (kr == MACH_SEND_TIMED_OUT && (opts & MACH_SEND_TIMEOUT)) {
 		if (opts & MACH_SEND_NOTIFY) {
-			_dispatch_debug("machport[0x%08x]: send-possible notification "
-					"armed", (mach_port_t)dsrr->du_ident);
 			_dispatch_mach_notification_set_armed(dsrr);
 		} else {
 			// send kevent must be installed on the manager queue
-			dm->dm_needs_mgr = 1;
+			dm->dm_needs_mgr = true;
 		}
 		if (ipc_kvoucher) {
 			_dispatch_kvoucher_debug("reuse on re-send", ipc_kvoucher);
@@ -1184,15 +1098,15 @@
 			_dispatch_unote_registered(dsrr))) {
 		_dispatch_mach_notification_kevent_unregister(dm);
 	}
-	if (slowpath(kr)) {
+	if (unlikely(kr)) {
 		// Send failed, so reply was never registered <rdar://problem/14309159>
-		dmsgr = _dispatch_mach_msg_create_reply_disconnected(dmsg, NULL,
-				msg_opts & DISPATCH_MACH_ASYNC_REPLY
-				? DISPATCH_MACH_ASYNC_WAITER_DISCONNECTED
-				: DISPATCH_MACH_DISCONNECTED);
-		if (dmsg->do_ctxt) {
+		if (dm->dm_is_xpc && dmsg->do_ctxt) {
 			drq = _dispatch_mach_msg_context_async_reply_queue(dmsg->do_ctxt);
 		}
+		dmsgr = _dispatch_mach_msg_create_reply_disconnected(dmsg,
+				dwr ? &dwr->dwr_refs : NULL,
+				drq ? DISPATCH_MACH_ASYNC_WAITER_DISCONNECTED
+				: DISPATCH_MACH_DISCONNECTED);
 	}
 	_dispatch_mach_msg_set_reason(dmsg, kr, 0);
 	if ((send_flags & DM_SEND_INVOKE_IMMEDIATE_SEND) &&
@@ -1200,13 +1114,13 @@
 		// Return sent message synchronously <rdar://problem/25947334>
 		send_status |= DM_SEND_STATUS_RETURNING_IMMEDIATE_SEND_RESULT;
 	} else {
-		_dispatch_mach_handle_or_push_received_msg(dm, dmsg);
+		_dispatch_mach_handle_or_push_received_msg(dm, dmsg, 0);
 	}
 	if (dmsgr) {
 		if (drq) {
 			_dispatch_mach_push_async_reply_msg(dm, dmsgr, drq);
 		} else {
-			_dispatch_mach_handle_or_push_received_msg(dm, dmsgr);
+			_dispatch_mach_handle_or_push_received_msg(dm, dmsgr, 0);
 		}
 	}
 	send_status |= DM_SEND_STATUS_SUCCESS;
@@ -1249,30 +1163,18 @@
 }
 
 #define _dispatch_mach_send_push_update_tail(dmsr, tail) \
-		os_mpsc_push_update_tail(dmsr, dmsr, tail, do_next)
-#define _dispatch_mach_send_push_update_head(dmsr, head) \
-		os_mpsc_push_update_head(dmsr, dmsr, head)
+		os_mpsc_push_update_tail(os_mpsc(dmsr, dmsr), tail, do_next)
+#define _dispatch_mach_send_push_update_prev(dmsr, prev, head) \
+		os_mpsc_push_update_prev(os_mpsc(dmsr, dmsr), prev, head, do_next)
 #define _dispatch_mach_send_get_head(dmsr) \
-		os_mpsc_get_head(dmsr, dmsr)
-#define _dispatch_mach_send_unpop_head(dmsr, dc, dc_next) \
-		os_mpsc_undo_pop_head(dmsr, dmsr, dc, dc_next, do_next)
+		os_mpsc_get_head(os_mpsc(dmsr, dmsr))
+#define _dispatch_mach_send_undo_pop_head(dmsr, dc, dc_next) \
+		os_mpsc_undo_pop_head(os_mpsc(dmsr, dmsr), dc, dc_next, do_next)
 #define _dispatch_mach_send_pop_head(dmsr, head) \
-		os_mpsc_pop_head(dmsr, dmsr, head, do_next)
+		os_mpsc_pop_head(os_mpsc(dmsr, dmsr), head, do_next)
 
 #define dm_push(dm, dc, qos) \
-		_dispatch_queue_push((dm)->_as_dq, dc, qos)
-
-DISPATCH_ALWAYS_INLINE
-static inline bool
-_dispatch_mach_send_push_inline(dispatch_mach_send_refs_t dmsr,
-		dispatch_object_t dou)
-{
-	if (_dispatch_mach_send_push_update_tail(dmsr, dou._do)) {
-		_dispatch_mach_send_push_update_head(dmsr, dou._do);
-		return true;
-	}
-	return false;
-}
+		_dispatch_lane_push(dm, dc, qos)
 
 DISPATCH_NOINLINE
 static bool
@@ -1280,16 +1182,16 @@
 		dispatch_mach_send_invoke_flags_t send_flags)
 {
 	dispatch_mach_send_refs_t dmsr = dm->dm_send_refs;
-	dispatch_mach_reply_refs_t dmr;
+	dispatch_mach_reply_wait_refs_t dwr;
 	dispatch_mach_msg_t dmsg;
 	struct dispatch_object_s *dc = NULL, *next_dc = NULL;
 	dispatch_qos_t qos = _dmsr_state_max_qos(dmsr->dmsr_state);
 	uint64_t old_state, new_state;
 	uint32_t send_status;
-	bool needs_mgr, disconnecting, returning_send_result = false;
+	bool returning_send_result = false;
+	dispatch_wakeup_flags_t wflags = 0;
 
 again:
-	needs_mgr = false; disconnecting = false;
 	while (dmsr->dmsr_tail) {
 		dc = _dispatch_mach_send_get_head(dmsr);
 		do {
@@ -1302,24 +1204,24 @@
 				if (!(send_flags & DM_SEND_INVOKE_CAN_RUN_BARRIER)) {
 					goto partial_drain;
 				}
-				_dispatch_continuation_pop(dc, NULL, flags, dm->_as_dq);
+				_dispatch_continuation_pop(dc, NULL, flags, dm);
 				continue;
 			}
 			if (_dispatch_object_is_sync_waiter(dc)) {
 				dmsg = ((dispatch_continuation_t)dc)->dc_data;
-				dmr = ((dispatch_continuation_t)dc)->dc_other;
+				dwr = ((dispatch_continuation_t)dc)->dc_other;
 			} else if (_dispatch_object_has_vtable(dc)) {
 				dmsg = (dispatch_mach_msg_t)dc;
-				dmr = NULL;
+				dwr = NULL;
 			} else {
 				if (_dispatch_unote_registered(dmsr) &&
-						(_dispatch_queue_get_current() != &_dispatch_mgr_q)) {
+						(_dispatch_queue_get_current() != _dispatch_mgr_q._as_dq)) {
 					// send kevent must be uninstalled on the manager queue
-					needs_mgr = true;
+					dm->dm_needs_mgr = true;
+					wflags |= DISPATCH_WAKEUP_MAKE_DIRTY;
 					goto partial_drain;
 				}
 				if (unlikely(!_dispatch_mach_reconnect_invoke(dm, dc))) {
-					disconnecting = true;
 					goto partial_drain;
 				}
 				_dispatch_perfmon_workitem_inc();
@@ -1328,12 +1230,13 @@
 			_dispatch_voucher_ktrace_dmsg_pop(dmsg);
 			if (unlikely(dmsr->dmsr_disconnect_cnt ||
 					(dm->dq_atomic_flags & DSF_CANCELED))) {
-				_dispatch_mach_msg_not_sent(dm, dmsg);
+				_dispatch_mach_msg_not_sent(dm, dmsg, dwr);
 				_dispatch_perfmon_workitem_inc();
 				continue;
 			}
-			send_status = _dispatch_mach_msg_send(dm, dmsg, dmr, qos, sf);
+			send_status = _dispatch_mach_msg_send(dm, dmsg, dwr, qos, sf);
 			if (unlikely(!send_status)) {
+				if (dm->dm_needs_mgr) wflags |= DISPATCH_WAKEUP_MAKE_DIRTY;
 				goto partial_drain;
 			}
 			if (send_status & DM_SEND_STATUS_RETURNING_IMMEDIATE_SEND_RESULT) {
@@ -1358,7 +1261,7 @@
 
 partial_drain:
 	// if this is not a complete drain, we must undo some things
-	_dispatch_mach_send_unpop_head(dmsr, dc, next_dc);
+	_dispatch_mach_send_undo_pop_head(dmsr, dc, next_dc);
 
 	if (_dispatch_object_has_type(dc,
 			DISPATCH_CONTINUATION_TYPE(MACH_SEND_BARRIER))) {
@@ -1390,23 +1293,38 @@
 		_dispatch_set_basepri_override_qos(_dmsr_state_max_qos(old_state));
 	}
 
+	qos = _dmsr_state_max_qos(new_state);
 	if (unlikely(new_state & DISPATCH_MACH_STATE_UNLOCK_MASK)) {
-		qos = _dmsr_state_max_qos(new_state);
 		os_atomic_thread_fence(dependency);
 		dmsr = os_atomic_force_dependency_on(dmsr, new_state);
 		goto again;
 	}
 
 	if (new_state & DISPATCH_MACH_STATE_PENDING_BARRIER) {
-		qos = _dmsr_state_max_qos(new_state);
+		// we don't need to wakeup the mach channel with DISPATCH_WAKEUP_EVENT
+		// because a push on the receive queue always causes a wakeup even
+		// wen DSF_NEEDS_EVENT is set.
 		_dispatch_mach_push_send_barrier_drain(dm, qos);
-	} else {
-		if (needs_mgr || dm->dm_needs_mgr) {
-			qos = _dmsr_state_max_qos(new_state);
+		return returning_send_result;
+	}
+
+	if (new_state == 0 && dm->dm_disconnected && !dm->dm_cancel_handler_called){
+		// cancelation waits for the send queue to be empty
+		// so when we know cancelation is pending, and we empty the queue,
+		// force an EVENT wakeup.
+		wflags |= DISPATCH_WAKEUP_EVENT | DISPATCH_WAKEUP_MAKE_DIRTY;
+	}
+	if ((old_state ^ new_state) & DISPATCH_MACH_STATE_ENQUEUED) {
+		if (wflags) {
+			wflags |= DISPATCH_WAKEUP_CONSUME_2;
 		} else {
-			qos = 0;
+			// <rdar://problem/26734097> Note that after this release
+			// the mach channel may be gone.
+			_dispatch_release_2(dm);
 		}
-		if (!disconnecting) dx_wakeup(dm, qos, DISPATCH_WAKEUP_MAKE_DIRTY);
+	}
+	if (wflags) {
+		dx_wakeup(dm, dm->dm_needs_mgr ? qos : 0, wflags);
 	}
 	return returning_send_result;
 }
@@ -1456,9 +1374,6 @@
 	if (unlikely((old_state & canlock_mask) != canlock_state)) {
 		return;
 	}
-	if (send_flags & DM_SEND_INVOKE_CANCEL) {
-		_dispatch_mach_cancel(dm);
-	}
 	_dispatch_mach_send_drain(dm, flags, send_flags);
 }
 
@@ -1468,15 +1383,15 @@
 		DISPATCH_UNUSED dispatch_invoke_context_t dic,
 		dispatch_invoke_flags_t flags)
 {
-	dispatch_mach_t dm = (dispatch_mach_t)_dispatch_queue_get_current();
-	uintptr_t dc_flags = DISPATCH_OBJ_CONSUME_BIT;
+	dispatch_mach_t dm = upcast(_dispatch_queue_get_current())._dm;
+	uintptr_t dc_flags = DC_FLAG_CONSUME | DC_FLAG_NO_INTROSPECTION;
 	dispatch_thread_frame_s dtf;
 
 	DISPATCH_COMPILER_CAN_ASSUME(dc->dc_priority == DISPATCH_NO_PRIORITY);
 	DISPATCH_COMPILER_CAN_ASSUME(dc->dc_voucher == DISPATCH_NO_VOUCHER);
 	// hide the mach channel (see _dispatch_mach_barrier_invoke comment)
 	_dispatch_thread_frame_stash(&dtf);
-	_dispatch_continuation_pop_forwarded(dc, DISPATCH_NO_VOUCHER, dc_flags,{
+	_dispatch_continuation_pop_forwarded(dc, dc_flags, dm, {
 		_dispatch_mach_send_invoke(dm, flags,
 				DM_SEND_INVOKE_NEEDS_BARRIER | DM_SEND_INVOKE_CAN_RUN_BARRIER);
 	});
@@ -1499,33 +1414,42 @@
 
 DISPATCH_NOINLINE
 static void
-_dispatch_mach_send_push(dispatch_mach_t dm, dispatch_continuation_t dc,
+_dispatch_mach_send_push(dispatch_mach_t dm, dispatch_object_t dou,
 		dispatch_qos_t qos)
 {
 	dispatch_mach_send_refs_t dmsr = dm->dm_send_refs;
 	uint64_t old_state, new_state, state_flags = 0;
+	struct dispatch_object_s *prev;
+	dispatch_wakeup_flags_t wflags = 0;
+	bool is_send_barrier = (dou._dc->do_vtable == DC_VTABLE(MACH_SEND_BARRIER));
 	dispatch_tid owner;
-	bool wakeup;
 
-	// <rdar://problem/25896179> when pushing a send barrier that destroys
-	// the last reference to this channel, and the send queue is already
-	// draining on another thread, the send barrier may run as soon as
-	// _dispatch_mach_send_push_inline() returns.
-	_dispatch_retain_2(dm);
+	// <rdar://problem/25896179&26266265> the send queue needs to retain
+	// the mach channel if not empty, for the whole duration of this call
+	//
+	// When we may add the ENQUEUED bit, we need to reserve 2 more that we will
+	// transfer to _dispatch_mach_send_drain().
+	prev = _dispatch_mach_send_push_update_tail(dmsr, dou._do);
+	_dispatch_retain_n_unsafe(dm, os_mpsc_push_was_empty(prev) ? 4 : 2);
+	_dispatch_mach_send_push_update_prev(dmsr, prev, dou._do);
 
-	wakeup = _dispatch_mach_send_push_inline(dmsr, dc);
-	if (wakeup) {
-		state_flags = DISPATCH_MACH_STATE_DIRTY;
-		if (dc->do_vtable == DC_VTABLE(MACH_SEND_BARRIER)) {
+	if (unlikely(os_mpsc_push_was_empty(prev))) {
+		state_flags = DISPATCH_MACH_STATE_DIRTY | DISPATCH_MACH_STATE_ENQUEUED;
+		wflags |= DISPATCH_WAKEUP_MAKE_DIRTY;
+		if (is_send_barrier) {
 			state_flags |= DISPATCH_MACH_STATE_PENDING_BARRIER;
 		}
-	}
 
-	if (state_flags) {
 		os_atomic_rmw_loop2o(dmsr, dmsr_state, old_state, new_state, release, {
 			new_state = _dmsr_state_merge_override(old_state, qos);
 			new_state |= state_flags;
 		});
+		if ((old_state ^ new_state) & DISPATCH_MACH_STATE_ENQUEUED) {
+			// +2 transfered to the ENQUEUED state, _dispatch_mach_send_drain
+			// will consume it when clearing the bit.
+		} else {
+			_dispatch_release_2_no_dispose(dm);
+		}
 	} else {
 		os_atomic_rmw_loop2o(dmsr, dmsr_state, old_state, new_state, relaxed, {
 			new_state = _dmsr_state_merge_override(old_state, qos);
@@ -1535,6 +1459,7 @@
 		});
 	}
 
+
 	qos = _dmsr_state_max_qos(new_state);
 	owner = _dispatch_lock_owner((dispatch_lock)old_state);
 	if (owner) {
@@ -1542,21 +1467,15 @@
 			_dispatch_wqthread_override_start_check_owner(owner, qos,
 					&dmsr->dmsr_state_lock.dul_lock);
 		}
-		return _dispatch_release_2_tailcall(dm);
+	} else if (state_flags & DISPATCH_MACH_STATE_PENDING_BARRIER) {
+		_dispatch_mach_push_send_barrier_drain(dm, qos);
+	} else if (wflags || dmsr->dmsr_disconnect_cnt ||
+			(dm->dq_atomic_flags & DSF_CANCELED)) {
+		return dx_wakeup(dm, qos, wflags | DISPATCH_WAKEUP_CONSUME_2);
+	} else if (old_state & DISPATCH_MACH_STATE_PENDING_BARRIER) {
+		return dx_wakeup(dm, qos, DISPATCH_WAKEUP_CONSUME_2);
 	}
 
-	dispatch_wakeup_flags_t wflags = 0;
-	if (state_flags & DISPATCH_MACH_STATE_PENDING_BARRIER) {
-		_dispatch_mach_push_send_barrier_drain(dm, qos);
-	} else if (wakeup || dmsr->dmsr_disconnect_cnt ||
-			(dm->dq_atomic_flags & DSF_CANCELED)) {
-		wflags = DISPATCH_WAKEUP_MAKE_DIRTY | DISPATCH_WAKEUP_CONSUME_2;
-	} else if (old_state & DISPATCH_MACH_STATE_PENDING_BARRIER) {
-		wflags = DISPATCH_WAKEUP_CONSUME_2;
-	}
-	if (wflags) {
-		return dx_wakeup(dm, qos, wflags);
-	}
 	return _dispatch_release_2_tailcall(dm);
 }
 
@@ -1569,12 +1488,19 @@
 	dispatch_mach_send_refs_t dmsr = dm->dm_send_refs;
 	dispatch_lock owner_self = _dispatch_lock_value_for_self();
 	uint64_t old_state, new_state, canlock_mask, state_flags = 0;
+	dispatch_wakeup_flags_t wflags = 0;
 	dispatch_tid owner;
+	struct dispatch_object_s *prev;
 
-	bool wakeup = _dispatch_mach_send_push_inline(dmsr, dou);
-	if (wakeup) {
-		state_flags = DISPATCH_MACH_STATE_DIRTY;
+	prev = _dispatch_mach_send_push_update_tail(dmsr, dou._do);
+	if (os_mpsc_push_was_empty(prev)) {
+		// <rdar://problem/25896179&26266265> the send queue needs to retain
+		// the mach channel if not empty.
+		_dispatch_retain_2(dm);
+		state_flags = DISPATCH_MACH_STATE_DIRTY | DISPATCH_MACH_STATE_ENQUEUED;
+		wflags = DISPATCH_WAKEUP_CONSUME_2 | DISPATCH_WAKEUP_MAKE_DIRTY;
 	}
+	_dispatch_mach_send_push_update_prev(dmsr, prev, dou._do);
 
 	if (unlikely(dmsr->dmsr_disconnect_cnt ||
 			(dm->dq_atomic_flags & DSF_CANCELED))) {
@@ -1582,7 +1508,10 @@
 			new_state = _dmsr_state_merge_override(old_state, qos);
 			new_state |= state_flags;
 		});
-		dx_wakeup(dm, qos, DISPATCH_WAKEUP_MAKE_DIRTY);
+		if ((old_state ^ new_state) & DISPATCH_MACH_STATE_ENQUEUED) {
+			wflags &= ~(dispatch_wakeup_flags_t)DISPATCH_WAKEUP_CONSUME_2;
+		}
+		dx_wakeup(dm, qos, wflags);
 		return false;
 	}
 
@@ -1599,6 +1528,9 @@
 				new_state &= ~DISPATCH_MACH_STATE_PENDING_BARRIER;
 			}
 		});
+		if ((old_state ^ new_state) & DISPATCH_MACH_STATE_ENQUEUED) {
+			wflags &= ~(dispatch_wakeup_flags_t)DISPATCH_WAKEUP_CONSUME_2;
+		}
 	} else {
 		os_atomic_rmw_loop2o(dmsr, dmsr_state, old_state, new_state, acquire, {
 			new_state = _dmsr_state_merge_override(old_state, qos);
@@ -1620,11 +1552,12 @@
 			_dispatch_wqthread_override_start_check_owner(owner, qos,
 					&dmsr->dmsr_state_lock.dul_lock);
 		}
+		if (wflags & DISPATCH_WAKEUP_CONSUME_2) _dispatch_release_2(dm);
 		return false;
 	}
 
 	if (old_state & DISPATCH_MACH_STATE_PENDING_BARRIER) {
-		dx_wakeup(dm, qos, 0);
+		dx_wakeup(dm, qos, wflags);
 		return false;
 	}
 
@@ -1632,10 +1565,11 @@
 	// been dequeued by another thread that raced us to the send queue lock.
 	// A plain load of the head and comparison against our object pointer is
 	// sufficient.
-	if (unlikely(!(wakeup && dou._do == dmsr->dmsr_head))) {
+	if (unlikely(!(wflags && dou._do == dmsr->dmsr_head))) {
 		// Don't request immediate send result for messages we don't own
 		send_flags &= ~DM_SEND_INVOKE_IMMEDIATE_SEND_MASK;
 	}
+	if (wflags & DISPATCH_WAKEUP_CONSUME_2) _dispatch_release_2_no_dispose(dm);
 	return _dispatch_mach_send_drain(dm, DISPATCH_INVOKE_NONE, send_flags);
 }
 
@@ -1646,10 +1580,9 @@
 static inline void
 _dispatch_mach_notification_kevent_unregister(dispatch_mach_t dm)
 {
+	uint32_t duu_options = DUU_DELETE_ACK | DUU_MUST_SUCCEED;
 	DISPATCH_ASSERT_ON_MANAGER_QUEUE();
-	if (_dispatch_unote_registered(dm->dm_send_refs)) {
-		dispatch_assume(_dispatch_unote_unregister(dm->dm_send_refs, 0));
-	}
+	_dispatch_unote_unregister(dm->dm_send_refs, duu_options);
 	dm->dm_send_refs->du_ident = 0;
 }
 
@@ -1660,13 +1593,12 @@
 	DISPATCH_ASSERT_ON_MANAGER_QUEUE();
 	dm->dm_send_refs->du_ident = send;
 	dispatch_assume(_dispatch_unote_register(dm->dm_send_refs,
-			DISPATCH_WLH_ANON, 0));
+			DISPATCH_WLH_ANON, DISPATCH_PRIORITY_FLAG_MANAGER));
 }
 
 void
-_dispatch_mach_merge_notification(dispatch_unote_t du,
+_dispatch_mach_notification_merge_evt(dispatch_unote_t du,
 		uint32_t flags DISPATCH_UNUSED, uintptr_t data,
-		uintptr_t status DISPATCH_UNUSED,
 		pthread_priority_t pp DISPATCH_UNUSED)
 {
 	dispatch_mach_send_refs_t dmsr = du._dmsr;
@@ -1676,21 +1608,27 @@
 		_dispatch_mach_send_invoke(dm, DISPATCH_INVOKE_MANAGER_DRAIN,
 				DM_SEND_INVOKE_MAKE_DIRTY);
 	}
+	_dispatch_release_2_tailcall(dm);
 }
 
 DISPATCH_NOINLINE
 static void
 _dispatch_mach_handle_or_push_received_msg(dispatch_mach_t dm,
-		dispatch_mach_msg_t dmsg)
+		dispatch_mach_msg_t dmsg, pthread_priority_t pp)
 {
 	mach_error_t error;
 	dispatch_mach_reason_t reason = _dispatch_mach_msg_get_reason(dmsg, &error);
+	dispatch_qos_t qos;
+
 	if (reason == DISPATCH_MACH_MESSAGE_RECEIVED || !dm->dm_is_xpc ||
 			!_dispatch_mach_xpc_hooks->dmxh_direct_message_handler(
 			dm->dm_recv_refs->dmrr_handler_ctxt, reason, dmsg, error)) {
 		// Not XPC client or not a message that XPC can handle inline - push
 		// it onto the channel queue.
-		dm_push(dm, dmsg, _dispatch_qos_from_pp(dmsg->dmsg_priority));
+		_dispatch_trace_item_push(dm, dmsg);
+		qos = _dispatch_qos_from_pp(pp);
+		if (!qos) qos = _dispatch_priority_qos(dm->dq_priority);
+		dm_push(dm, dmsg, qos);
 	} else {
 		// XPC handled the message inline. Do the cleanup that would otherwise
 		// have happened in _dispatch_mach_msg_invoke(), leaving out steps that
@@ -1703,12 +1641,13 @@
 DISPATCH_ALWAYS_INLINE
 static void
 _dispatch_mach_push_async_reply_msg(dispatch_mach_t dm,
-		dispatch_mach_msg_t dmsg, dispatch_queue_t drq) {
+		dispatch_mach_msg_t dmsg, dispatch_queue_t drq)
+{
 	// Push the message onto the given queue. This function is only used for
 	// replies to messages sent by
 	// dispatch_mach_send_with_result_and_async_reply_4libxpc().
 	dispatch_continuation_t dc = _dispatch_mach_msg_async_reply_wrap(dmsg, dm);
-	_dispatch_trace_continuation_push(drq, dc);
+	_dispatch_trace_item_push(drq, dc);
 	dx_push(drq, dc, _dispatch_qos_from_pp(dmsg->dmsg_priority));
 }
 
@@ -1726,6 +1665,7 @@
 }
 
 
+
 static inline mach_msg_option_t
 _dispatch_mach_send_options(void)
 {
@@ -1734,26 +1674,36 @@
 }
 
 DISPATCH_ALWAYS_INLINE
-static inline dispatch_qos_t
-_dispatch_mach_priority_propagate(mach_msg_option_t options,
-		pthread_priority_t *msg_pp)
+static inline mach_msg_option_t
+_dispatch_mach_send_msg_prepare(dispatch_mach_t dm,
+		dispatch_mach_msg_t dmsg, mach_msg_option_t options)
 {
-#if DISPATCH_USE_NOIMPORTANCE_QOS
-	if (options & MACH_SEND_NOIMPORTANCE) {
-		*msg_pp = 0;
-		return 0;
+#if DISPATCH_DEBUG
+	if (dm->dm_is_xpc && (options & DISPATCH_MACH_WAIT_FOR_REPLY) == 0 &&
+			_dispatch_mach_msg_get_reply_port(dmsg)) {
+		dispatch_assert(
+				_dispatch_mach_msg_context_async_reply_queue(dmsg->do_ctxt));
 	}
+#else
+	(void)dm;
 #endif
-	unsigned int flags = DISPATCH_PRIORITY_PROPAGATE_CURRENT;
-	if ((options & DISPATCH_MACH_WAIT_FOR_REPLY) &&
-			(options & DISPATCH_MACH_OWNED_REPLY_PORT) &&
-			_dispatch_use_mach_special_reply_port()) {
-		flags |= DISPATCH_PRIORITY_PROPAGATE_FOR_SYNC_IPC;
+	if (DISPATCH_USE_NOIMPORTANCE_QOS && (options & MACH_SEND_NOIMPORTANCE)) {
+		dmsg->dmsg_priority = 0;
+	} else {
+		unsigned int flags = DISPATCH_PRIORITY_PROPAGATE_CURRENT;
+		if ((options & DISPATCH_MACH_WAIT_FOR_REPLY) &&
+				_dispatch_use_mach_special_reply_port()) {
+			// TODO: remove QoS contribution of sync IPC messages to send queue
+			// rdar://31848737
+			flags |= DISPATCH_PRIORITY_PROPAGATE_FOR_SYNC_IPC;
+		}
+		dmsg->dmsg_priority = _dispatch_priority_compute_propagated(0, flags);
 	}
-	*msg_pp = _dispatch_priority_compute_propagated(0, flags);
-	// TODO: remove QoS contribution of sync IPC messages to send queue
-	// rdar://31848737
-	return _dispatch_qos_from_pp(*msg_pp);
+	dmsg->dmsg_voucher = _voucher_copy();
+	_dispatch_voucher_debug("mach-msg[%p] set", dmsg->dmsg_voucher, dmsg);
+	options |= _dispatch_mach_send_options();
+	dmsg->dmsg_options = options;
+	return options;
 }
 
 DISPATCH_NOINLINE
@@ -1762,21 +1712,16 @@
 		dispatch_continuation_t dc_wait, mach_msg_option_t options)
 {
 	dispatch_mach_send_refs_t dmsr = dm->dm_send_refs;
-	if (slowpath(dmsg->do_next != DISPATCH_OBJECT_LISTLESS)) {
+	if (unlikely(dmsg->do_next != DISPATCH_OBJECT_LISTLESS)) {
 		DISPATCH_CLIENT_CRASH(dmsg->do_next, "Message already enqueued");
 	}
+	options = _dispatch_mach_send_msg_prepare(dm, dmsg, options);
 	dispatch_retain(dmsg);
-	pthread_priority_t msg_pp;
-	dispatch_qos_t qos = _dispatch_mach_priority_propagate(options, &msg_pp);
-	options |= _dispatch_mach_send_options();
-	dmsg->dmsg_options = options;
+	dispatch_qos_t qos = _dispatch_qos_from_pp(dmsg->dmsg_priority);
 	mach_msg_header_t *msg = _dispatch_mach_msg_get_msg(dmsg);
 	dmsg->dmsg_reply = _dispatch_mach_msg_get_reply_port(dmsg);
 	bool is_reply = (MACH_MSGH_BITS_REMOTE(msg->msgh_bits) ==
 			MACH_MSG_TYPE_MOVE_SEND_ONCE);
-	dmsg->dmsg_priority = msg_pp;
-	dmsg->dmsg_voucher = _voucher_copy();
-	_dispatch_voucher_debug("mach-msg[%p] set", dmsg->dmsg_voucher, dmsg);
 
 	uint32_t send_status;
 	bool returning_send_result = false;
@@ -1816,7 +1761,7 @@
 {
 	dispatch_assert_zero(options & DISPATCH_MACH_OPTIONS_MASK);
 	options &= ~DISPATCH_MACH_OPTIONS_MASK;
-	bool returned_send_result = _dispatch_mach_send_msg(dm, dmsg, NULL,options);
+	bool returned_send_result = _dispatch_mach_send_msg(dm, dmsg, NULL, options);
 	dispatch_assert(!returned_send_result);
 }
 
@@ -1848,37 +1793,41 @@
 		dispatch_mach_msg_t dmsg, mach_msg_option_t options,
 		bool *returned_send_result)
 {
+	struct dispatch_mach_reply_wait_refs_s dwr_buf = {
+		.dwr_refs = {
+			.du_type = DISPATCH_MACH_TYPE_WAITER,
+			.dmr_ctxt = dmsg->do_ctxt,
+		},
+		.dwr_waiter_tid = _dispatch_tid_self(),
+	};
+	dispatch_mach_reply_wait_refs_t dwr = &dwr_buf;
 	mach_port_t send = MACH_PORT_NULL;
 	mach_port_t reply_port = _dispatch_mach_msg_get_reply_port(dmsg);
-	if (!reply_port) {
+
+	if (likely(!reply_port)) {
 		// use per-thread mach reply port <rdar://24597802>
 		reply_port = _dispatch_get_thread_reply_port();
 		mach_msg_header_t *hdr = _dispatch_mach_msg_get_msg(dmsg);
 		dispatch_assert(MACH_MSGH_BITS_LOCAL(hdr->msgh_bits) ==
 				MACH_MSG_TYPE_MAKE_SEND_ONCE);
 		hdr->msgh_local_port = reply_port;
-		options |= DISPATCH_MACH_OWNED_REPLY_PORT;
+		dwr->dwr_refs.dmr_reply_port_owned = true;
 	}
 	options |= DISPATCH_MACH_WAIT_FOR_REPLY;
 
-	dispatch_mach_reply_refs_t dmr;
 #if DISPATCH_DEBUG
-	dmr = _dispatch_calloc(1, sizeof(*dmr));
-#else
-	struct dispatch_mach_reply_refs_s dmr_buf = { };
-	dmr = &dmr_buf;
+	dwr = _dispatch_calloc(1, sizeof(*dwr));
+	*dwr = dwr_buf;
 #endif
 	struct dispatch_continuation_s dc_wait = {
-		.dc_flags = DISPATCH_OBJ_SYNC_WAITER_BIT,
+		.dc_flags = DC_FLAG_SYNC_WAITER,
 		.dc_data = dmsg,
-		.dc_other = dmr,
+		.dc_other = &dwr->dwr_refs,
 		.dc_priority = DISPATCH_NO_PRIORITY,
 		.dc_voucher = DISPATCH_NO_VOUCHER,
 	};
-	dmr->dmr_ctxt = dmsg->do_ctxt;
-	dmr->dmr_waiter_tid = _dispatch_tid_self();
 	*returned_send_result = _dispatch_mach_send_msg(dm, dmsg, &dc_wait,options);
-	if (options & DISPATCH_MACH_OWNED_REPLY_PORT) {
+	if (dwr->dwr_refs.dmr_reply_port_owned) {
 		_dispatch_clear_thread_reply_port(reply_port);
 		if (_dispatch_use_mach_special_reply_port()) {
 			// link special reply port to send right for remote receive right
@@ -1886,9 +1835,9 @@
 			send = dm->dm_send_refs->dmsr_send;
 		}
 	}
-	dmsg = _dispatch_mach_msg_reply_recv(dm, dmr, reply_port, send);
+	dmsg = _dispatch_mach_msg_reply_recv(dm, dwr, reply_port, send);
 #if DISPATCH_DEBUG
-	free(dmr);
+	free(dwr);
 #endif
 	return dmsg;
 }
@@ -1957,7 +1906,6 @@
 	if (!reply_port) {
 		DISPATCH_CLIENT_CRASH(0, "Reply port needed for async send with reply");
 	}
-	options |= DISPATCH_MACH_ASYNC_REPLY;
 	bool returned_send_result = _dispatch_mach_send_msg(dm, dmsg, NULL,options);
 	unsigned long reason = DISPATCH_MACH_NEEDS_DEFERRED_SEND;
 	mach_error_t err = 0;
@@ -1970,10 +1918,61 @@
 
 DISPATCH_NOINLINE
 static bool
-_dispatch_mach_disconnect(dispatch_mach_t dm)
+_dispatch_mach_cancel(dispatch_mach_t dm)
 {
+	bool uninstalled = dm->dm_disconnected;
+	if (dm->dm_send_refs->dmsr_disconnect_cnt) {
+		uninstalled = false; // <rdar://problem/31233110>
+	}
+
+	_dispatch_object_debug(dm, "%s", __func__);
+
+	uint32_t duu_options = DMRU_DELETE_ACK;
+	if (!(_dispatch_queue_atomic_flags(dm) & DSF_NEEDS_EVENT)) {
+		duu_options |= DMRU_PROBE;
+	}
+
+	dispatch_xpc_term_refs_t dxtr = dm->dm_xpc_term_refs;
+	if (dxtr && !_dispatch_unote_unregister(dxtr, duu_options)) {
+		uninstalled = false;
+	}
+
+	dispatch_mach_recv_refs_t dmrr = dm->dm_recv_refs;
+	mach_port_t local_port = (mach_port_t)dmrr->du_ident;
+	if (local_port) {
+		// handle the deferred delete case properly, similar to what
+		// _dispatch_source_invoke2() does
+		if (_dispatch_unote_unregister(dmrr, duu_options)) {
+			_dispatch_mach_msg_disconnected(dm, local_port, MACH_PORT_NULL);
+			dmrr->du_ident = 0;
+		} else {
+			uninstalled = false;
+		}
+	}
+
+	if (uninstalled) {
+		dispatch_queue_flags_t dqf;
+		dqf = _dispatch_queue_atomic_flags_set_and_clear_orig(dm,
+				DSF_DELETED, DSF_NEEDS_EVENT);
+		if (unlikely(dqf & (DSF_DELETED | DSF_CANCEL_WAITER))) {
+			DISPATCH_CLIENT_CRASH(dqf, "Corrupt channel state");
+		}
+		_dispatch_release_no_dispose(dm); // see _dispatch_queue_alloc()
+	} else {
+		_dispatch_queue_atomic_flags_set(dm, DSF_NEEDS_EVENT);
+	}
+	return uninstalled;
+}
+
+DISPATCH_NOINLINE
+static bool
+_dispatch_mach_reconnect_invoke(dispatch_mach_t dm, dispatch_object_t dou)
+{
+	_dispatch_object_debug(dm, "%s", __func__);
+
+	// 1. handle the send-possible notification and checkin message
+
 	dispatch_mach_send_refs_t dmsr = dm->dm_send_refs;
-	bool disconnected;
 	if (_dispatch_unote_registered(dmsr)) {
 		_dispatch_mach_notification_kevent_unregister(dm);
 	}
@@ -1982,89 +1981,62 @@
 		dmsr->dmsr_send = MACH_PORT_NULL;
 	}
 	if (dmsr->dmsr_checkin) {
-		_dispatch_mach_msg_not_sent(dm, dmsr->dmsr_checkin);
+		_dispatch_mach_msg_not_sent(dm, dmsr->dmsr_checkin, NULL);
 		dmsr->dmsr_checkin = NULL;
 	}
-	_dispatch_unfair_lock_lock(&dm->dm_send_refs->dmsr_replies_lock);
+	dm->dm_needs_mgr = 0;
+
+	// 2. cancel all pending replies and break out synchronous waiters
+
 	dispatch_mach_reply_refs_t dmr, tmp;
-	TAILQ_FOREACH_SAFE(dmr, &dm->dm_send_refs->dmsr_replies, dmr_list, tmp) {
-		TAILQ_REMOVE(&dm->dm_send_refs->dmsr_replies, dmr, dmr_list);
-		_TAILQ_MARK_NOT_ENQUEUED(dmr, dmr_list);
-		if (_dispatch_unote_registered(dmr)) {
-			if (!_dispatch_mach_reply_kevent_unregister(dm, dmr,
-					DU_UNREGISTER_DISCONNECTED)) {
-				TAILQ_INSERT_HEAD(&dm->dm_send_refs->dmsr_replies, dmr,
-					dmr_list);
+	LIST_HEAD(, dispatch_mach_reply_refs_s) replies =
+			LIST_HEAD_INITIALIZER(replies);
+	bool disconnected;
+
+	// _dispatch_mach_reply_merge_msg is the one passing DMRU_DELETE_ACK
+	uint32_t dmru_options = DMRU_CANCEL | DMRU_DISCONNECTED;
+	if (!(_dispatch_queue_atomic_flags(dm) & DSF_NEEDS_EVENT)) {
+		dmru_options |= DMRU_PROBE;
+	}
+
+	_dispatch_unfair_lock_lock(&dmsr->dmsr_replies_lock);
+	LIST_SWAP(&replies, &dmsr->dmsr_replies,
+			dispatch_mach_reply_refs_s, dmr_list);
+	LIST_FOREACH_SAFE(dmr, &replies, dmr_list, tmp) {
+		_LIST_MARK_NOT_ENQUEUED(dmr, dmr_list);
+		_dispatch_mach_reply_unregister(dm, dmr, dmru_options);
+	}
+	// any unote unregistration that fails is put back on the reply list
+	disconnected = LIST_EMPTY(&dmsr->dmsr_replies);
+	_dispatch_unfair_lock_unlock(&dmsr->dmsr_replies_lock);
+
+	// 3. if no reply is left pending deferred deletion, finish reconnecting
+
+	if (disconnected) {
+		mach_port_t dmsr_send = (mach_port_t)dou._dc->dc_other;
+		dispatch_mach_msg_t dmsr_checkin = dou._dc->dc_data;
+
+		_dispatch_continuation_free(dou._dc);
+		if (dmsr_checkin == DM_CHECKIN_CANCELED) {
+			dm->dm_disconnected = true;
+			dmsr_checkin = NULL;
+		}
+		if (dm->dm_disconnected) {
+			if (MACH_PORT_VALID(dmsr_send)) {
+				_dispatch_mach_msg_disconnected(dm, MACH_PORT_NULL, dmsr_send);
+			}
+			if (dmsr_checkin) {
+				_dispatch_mach_msg_not_sent(dm, dmsr_checkin, NULL);
 			}
 		} else {
-			_dispatch_mach_reply_waiter_unregister(dm, dmr,
-				DU_UNREGISTER_DISCONNECTED);
+			dmsr->dmsr_send = dmsr_send;
+			dmsr->dmsr_checkin = dmsr_checkin;
 		}
+		(void)os_atomic_dec2o(dmsr, dmsr_disconnect_cnt, relaxed);
 	}
-	disconnected = TAILQ_EMPTY(&dm->dm_send_refs->dmsr_replies);
-	_dispatch_unfair_lock_unlock(&dm->dm_send_refs->dmsr_replies_lock);
 	return disconnected;
 }
 
-static void
-_dispatch_mach_cancel(dispatch_mach_t dm)
-{
-	_dispatch_object_debug(dm, "%s", __func__);
-	if (!_dispatch_mach_disconnect(dm)) return;
-
-	bool uninstalled = true;
-	dispatch_assert(!dm->dm_uninstalled);
-
-	if (dm->dm_xpc_term_refs) {
-		uninstalled = _dispatch_unote_unregister(dm->dm_xpc_term_refs, 0);
-	}
-
-	dispatch_mach_recv_refs_t dmrr = dm->dm_recv_refs;
-	mach_port_t local_port = (mach_port_t)dmrr->du_ident;
-	if (local_port) {
-		// handle the deferred delete case properly, similar to what
-		// _dispatch_source_invoke2() does
-		dispatch_queue_flags_t dqf = _dispatch_queue_atomic_flags(dm->_as_dq);
-		if ((dqf & DSF_DEFERRED_DELETE) && !(dqf & DSF_ARMED)) {
-			_dispatch_source_refs_unregister(dm->_as_ds,
-					DU_UNREGISTER_IMMEDIATE_DELETE);
-			dqf = _dispatch_queue_atomic_flags(dm->_as_dq);
-		} else if (!(dqf & DSF_DEFERRED_DELETE) && !(dqf & DSF_DELETED)) {
-			_dispatch_source_refs_unregister(dm->_as_ds, 0);
-			dqf = _dispatch_queue_atomic_flags(dm->_as_dq);
-		}
-		if ((dqf & DSF_STATE_MASK) == DSF_DELETED) {
-			_dispatch_mach_msg_disconnected(dm, local_port, MACH_PORT_NULL);
-			dmrr->du_ident = 0;
-		} else {
-			uninstalled = false;
-		}
-	} else {
-		_dispatch_queue_atomic_flags_set_and_clear(dm->_as_dq, DSF_DELETED,
-				DSF_ARMED | DSF_DEFERRED_DELETE);
-	}
-
-	if (dm->dm_send_refs->dmsr_disconnect_cnt) {
-		uninstalled = false; // <rdar://problem/31233110>
-	}
-	if (uninstalled) dm->dm_uninstalled = uninstalled;
-}
-
-DISPATCH_NOINLINE
-static bool
-_dispatch_mach_reconnect_invoke(dispatch_mach_t dm, dispatch_object_t dou)
-{
-	if (!_dispatch_mach_disconnect(dm)) return false;
-	dispatch_mach_send_refs_t dmsr = dm->dm_send_refs;
-	dmsr->dmsr_checkin = dou._dc->dc_data;
-	dmsr->dmsr_send = (mach_port_t)dou._dc->dc_other;
-	_dispatch_continuation_free(dou._dc);
-	(void)os_atomic_dec2o(dmsr, dmsr_disconnect_cnt, relaxed);
-	_dispatch_object_debug(dm, "%s", __func__);
-	_dispatch_release(dm); // <rdar://problem/26266265>
-	return true;
-}
-
 DISPATCH_NOINLINE
 void
 dispatch_mach_reconnect(dispatch_mach_t dm, mach_port_t send,
@@ -2078,11 +2050,11 @@
 		dmsg->dmsg_options = _dispatch_mach_checkin_options();
 		dmsr->dmsr_checkin_port = _dispatch_mach_msg_get_remote_port(dmsg);
 	} else {
-		checkin = NULL;
+		if (checkin != DM_CHECKIN_CANCELED) checkin = NULL;
 		dmsr->dmsr_checkin_port = MACH_PORT_NULL;
 	}
 	dispatch_continuation_t dc = _dispatch_continuation_alloc();
-	dc->dc_flags = DISPATCH_OBJ_CONSUME_BIT;
+	dc->dc_flags = DC_FLAG_CONSUME | DC_FLAG_ALLOCATED;
 	// actually called manually in _dispatch_mach_send_drain
 	dc->dc_func = (void*)_dispatch_mach_reconnect_invoke;
 	dc->dc_ctxt = dc;
@@ -2090,7 +2062,6 @@
 	dc->dc_other = (void*)(uintptr_t)send;
 	dc->dc_voucher = DISPATCH_NO_VOUCHER;
 	dc->dc_priority = DISPATCH_NO_PRIORITY;
-	_dispatch_retain(dm); // <rdar://problem/26266265>
 	return _dispatch_mach_send_push(dm, dc, 0);
 }
 
@@ -2099,7 +2070,7 @@
 dispatch_mach_get_checkin_port(dispatch_mach_t dm)
 {
 	dispatch_mach_send_refs_t dmsr = dm->dm_send_refs;
-	if (slowpath(dm->dq_atomic_flags & DSF_CANCELED)) {
+	if (unlikely(dm->dq_atomic_flags & DSF_CANCELED)) {
 		return MACH_PORT_DEAD;
 	}
 	return dmsr->dmsr_checkin_port;
@@ -2116,6 +2087,194 @@
 	_dispatch_perfmon_workitem_inc();
 }
 
+typedef struct dispatch_ipc_handoff_s {
+	struct dispatch_continuation_s dih_dc;
+	uint64_t _Atomic dih_wlh;
+	int32_t dih_refcnt;
+} dispatch_ipc_handoff_s, *dispatch_ipc_handoff_t;
+
+typedef struct _dispatch_ipc_handoff_context_s {
+	dispatch_thread_context_s dihc_dtc;
+	dispatch_queue_t dihc_dq;
+	dispatch_qos_t dihc_qos;
+} _dispatch_ipc_handoff_context_s, *_dispatch_ipc_handoff_ctxt_t;
+
+static char const * const
+_dispatch_mach_msg_context_key = "mach_msg";
+
+static _dispatch_ipc_handoff_ctxt_t
+_dispatch_mach_handoff_context(mach_port_t port)
+{
+	dispatch_thread_context_t dtc;
+	_dispatch_ipc_handoff_ctxt_t dihc = NULL;
+	dispatch_ipc_handoff_t dih;
+
+	dtc = _dispatch_thread_context_find(_dispatch_mach_msg_context_key);
+	if (dtc && dtc->dtc_dmsg) {
+		/*
+		 * We need one refcount per async() done,
+		 * and one for the whole chain.
+		 */
+		dihc = (_dispatch_ipc_handoff_ctxt_t)dtc;
+		if (dx_type(dtc->dtc_dmsg) == DISPATCH_MACH_MSG_TYPE) {
+			dtc->dtc_dih = _dispatch_calloc(1, sizeof(dispatch_ipc_handoff_s));
+			dih = dtc->dtc_dih;
+			os_atomic_store(&dih->dih_refcnt, 1, relaxed);
+		} else {
+			dih = dtc->dtc_dih;
+			os_atomic_inc(&dih->dih_refcnt, relaxed);
+		}
+		if (dih->dih_dc.dc_other) {
+			DISPATCH_CLIENT_CRASH(0, "Calling dispatch_mach_handoff_reply "
+					"multiple times from the same context");
+		}
+	} else  {
+		DISPATCH_CLIENT_CRASH(0, "Trying to handoff IPC from non IPC context");
+	}
+
+	dih->dih_dc.dc_other = (void *)(uintptr_t)port;
+	return dihc;
+}
+
+static void
+_dispatch_ipc_handoff_release(dispatch_ipc_handoff_t dih)
+{
+	if (os_atomic_dec_orig(&dih->dih_refcnt, relaxed) == 0) {
+		free(dih);
+	}
+}
+
+static void
+_dispatch_mach_handoff_set_wlh(dispatch_ipc_handoff_t dih, dispatch_queue_t dq)
+{
+	while (likely(dq->do_targetq)) {
+		if (unlikely(_dispatch_queue_is_mutable(dq))) {
+			DISPATCH_CLIENT_CRASH(0,
+					"Trying to handoff IPC onto mutable hierarchy");
+		}
+		if (_dq_state_is_base_wlh(dq->dq_state)) {
+			os_atomic_store(&dih->dih_wlh, (uint64_t)dq, relaxed);
+			return;
+		}
+	}
+
+	/* unsupported hierarchy */
+	os_atomic_store(&dih->dih_wlh, 0, relaxed);
+}
+
+void
+dispatch_mach_handoff_reply_f(dispatch_queue_t dq,
+		mach_port_t port, void *ctxt, dispatch_function_t func)
+{
+	_dispatch_ipc_handoff_ctxt_t dihc = _dispatch_mach_handoff_context(port);
+	dispatch_ipc_handoff_t dih = dihc->dihc_dtc.dtc_dih;
+	dispatch_continuation_t dc = &dih->dih_dc;
+
+	_dispatch_mach_handoff_set_wlh(dih, dq);
+	_dispatch_retain(dq);
+	dihc->dihc_dq = dq;
+	dihc->dihc_qos = _dispatch_continuation_init_f(dc, dq, ctxt, func, 0, 0);
+	dc->dc_data = (void *)dc->dc_flags;
+	dc->do_vtable = DC_VTABLE(MACH_IPC_HANDOFF);
+}
+
+void
+dispatch_mach_handoff_reply(dispatch_queue_t dq,
+		mach_port_t port, dispatch_block_t block)
+{
+	_dispatch_ipc_handoff_ctxt_t dihc = _dispatch_mach_handoff_context(port);
+	dispatch_ipc_handoff_t dih = dihc->dihc_dtc.dtc_dih;
+	dispatch_continuation_t dc = &dih->dih_dc;
+
+	_dispatch_retain(dq);
+	dihc->dihc_dq = dq;
+	dihc->dihc_qos = _dispatch_continuation_init(dc, dq, block, 0, 0);
+	dc->dc_data = (void *)dc->dc_flags;
+	dc->do_vtable = DC_VTABLE(MACH_IPC_HANDOFF);
+}
+
+static void
+_dispatch_mach_ipc_handoff_async(_dispatch_ipc_handoff_ctxt_t dihc)
+{
+	dispatch_ipc_handoff_t dih = dihc->dihc_dtc.dtc_dih;
+	dispatch_continuation_t dc = &dih->dih_dc;
+	mach_port_t port = (mach_port_t)(uintptr_t)dc->dc_other;
+	uint64_t wlh = os_atomic_load(&dih->dih_wlh, relaxed);
+
+	_dispatch_continuation_async(dihc->dihc_dq, dc, dihc->dihc_qos,
+			(uintptr_t)dc->dc_data);
+
+	if (wlh) {
+		_dispatch_sync_ipc_handoff_begin((dispatch_wlh_t)wlh, port,
+				&dih->dih_wlh);
+		os_atomic_cmpxchg(&dih->dih_wlh, wlh, ~wlh, relaxed);
+	}
+
+	_dispatch_ipc_handoff_release(dih);
+	_dispatch_release_tailcall(dihc->dihc_dq);
+}
+
+void
+_dispatch_mach_ipc_handoff_invoke(dispatch_continuation_t dc,
+		dispatch_invoke_context_t dic DISPATCH_UNUSED,
+		dispatch_invoke_flags_t flags)
+{
+	dispatch_ipc_handoff_t dih = (dispatch_ipc_handoff_t)dc;
+	_dispatch_ipc_handoff_context_s dihc = { .dihc_dtc = {
+		.dtc_key = _dispatch_mach_msg_context_key,
+		.dtc_dih = dih,
+	} };
+
+	dispatch_queue_t cq = _dispatch_queue_get_current();
+	uintptr_t dc_flags = (uintptr_t)dc->dc_data;
+	mach_port_t port = (mach_port_t)(uintptr_t)dc->dc_other;
+	uint64_t wlh = os_atomic_xchg(&dih->dih_wlh, 0, relaxed);
+
+	if (wlh == 0) {
+		/* not supported */
+	} else if (wlh & 1) {
+		/* _dispatch_mach_ipc_handoff_async finished its work */
+		wlh = ~wlh;
+	} else {
+		/*
+		 * Because this code may race with _dispatch_mach_ipc_handoff_async,
+		 * Make sure that we have the push.
+		 *
+		 * Then mark the handoff as done, as the client callout below
+		 * may consume the send once, and _dispatch_mach_ipc_handoff_async
+		 * may be about an invalid port now.
+		 */
+		_dispatch_sync_ipc_handoff_begin((dispatch_wlh_t)wlh, port,
+				&dih->dih_wlh);
+	}
+
+	dc->do_next = DISPATCH_OBJECT_LISTLESS;
+	dc->dc_other = NULL;
+
+	_dispatch_thread_context_push(&dihc.dihc_dtc);
+
+	_dispatch_continuation_pop_forwarded(dc, dc_flags, cq, {
+		dispatch_invoke_with_autoreleasepool(flags, {
+			_dispatch_client_callout(dc->dc_ctxt, dc->dc_func);
+			_dispatch_trace_item_complete(dc);
+		});
+	});
+
+	_dispatch_thread_context_pop(&dihc.dihc_dtc);
+
+	if (dihc.dihc_dq) {
+		/* a new handoff was started */
+		_dispatch_mach_ipc_handoff_async(&dihc);
+	} else {
+		/* this was the last handoff in the chain, consume the last ref */
+		_dispatch_ipc_handoff_release(dih);
+	}
+
+	if (wlh) {
+		_dispatch_sync_ipc_handoff_end((dispatch_wlh_t)wlh, port);
+	}
+}
+
 DISPATCH_ALWAYS_INLINE
 static void
 _dispatch_mach_msg_invoke_with_mach(dispatch_mach_msg_t dmsg,
@@ -2126,6 +2285,13 @@
 	unsigned long reason = _dispatch_mach_msg_get_reason(dmsg, &err);
 	dispatch_thread_set_self_t adopt_flags = DISPATCH_PRIORITY_ENFORCE|
 			DISPATCH_VOUCHER_CONSUME|DISPATCH_VOUCHER_REPLACE;
+	_dispatch_ipc_handoff_context_s dihc = { .dihc_dtc = {
+		.dtc_key = _dispatch_mach_msg_context_key,
+		.dtc_dmsg = dmsg,
+	} };
+
+	_dispatch_thread_context_push(&dihc.dihc_dtc);
+	_dispatch_trace_item_pop(dm, dmsg);
 
 	dmrr = dm->dm_recv_refs;
 	dmsg->do_next = DISPATCH_OBJECT_LISTLESS;
@@ -2139,20 +2305,24 @@
 			_dispatch_client_callout3(dmrr->dmrr_handler_ctxt, reason, dmsg,
 					_dispatch_mach_xpc_hooks->dmxh_async_reply_handler);
 		} else {
-			if (slowpath(!dm->dm_connect_handler_called)) {
+			if (unlikely(!dm->dm_connect_handler_called)) {
 				_dispatch_mach_connect_invoke(dm);
 			}
 			if (reason == DISPATCH_MACH_MESSAGE_RECEIVED &&
-					(_dispatch_queue_atomic_flags(dm->_as_dq) & DSF_CANCELED)) {
+					(_dispatch_queue_atomic_flags(dm) & DSF_CANCELED)) {
 				// <rdar://problem/32184699> Do not deliver message received
 				// after cancellation: _dispatch_mach_merge_msg can be preempted
-				// for a long time between clearing DSF_ARMED but before
+				// for a long time right after disarming the unote but before
 				// enqueuing the message, allowing for cancellation to complete,
 				// and then the message event to be delivered.
 				//
 				// This makes XPC unhappy because some of these messages are
 				// port-destroyed notifications that can cause it to try to
 				// reconnect on a channel that is almost fully canceled
+				mach_msg_header_t *hdr = _dispatch_mach_msg_get_msg(dmsg);
+				_dispatch_debug("machport[0x%08x]: drop msg id 0x%x, reply on 0x%08x",
+						hdr->msgh_local_port, hdr->msgh_id, hdr->msgh_remote_port);
+				mach_msg_destroy(hdr);
 			} else {
 				_dispatch_client_callout4(dmrr->dmrr_handler_ctxt, reason, dmsg,
 						err, dmrr->dmrr_handler_func);
@@ -2160,8 +2330,13 @@
 		}
 		_dispatch_perfmon_workitem_inc();
 	});
-	_dispatch_introspection_queue_item_complete(dmsg);
+	_dispatch_trace_item_complete(dmsg);
 	dispatch_release(dmsg);
+	_dispatch_thread_context_pop(&dihc.dihc_dtc);
+
+	if (dihc.dihc_dq) {
+		_dispatch_mach_ipc_handoff_async(&dihc);
+	}
 }
 
 DISPATCH_NOINLINE
@@ -2173,7 +2348,7 @@
 	dispatch_thread_frame_s dtf;
 
 	// hide mach channel
-	dispatch_mach_t dm = (dispatch_mach_t)_dispatch_thread_frame_stash(&dtf);
+	dispatch_mach_t dm = upcast(_dispatch_thread_frame_stash(&dtf))._dm;
 	_dispatch_mach_msg_invoke_with_mach(dmsg, flags, dm);
 	_dispatch_thread_frame_unstash(&dtf);
 }
@@ -2197,12 +2372,15 @@
 		_dispatch_thread_frame_stash(&dtf);
 	}
 	dmrr = dm->dm_recv_refs;
-	DISPATCH_COMPILER_CAN_ASSUME(dc_flags & DISPATCH_OBJ_CONSUME_BIT);
-	_dispatch_continuation_pop_forwarded(dc, DISPATCH_NO_VOUCHER, dc_flags, {
+	DISPATCH_COMPILER_CAN_ASSUME(dc_flags & DC_FLAG_CONSUME);
+	if (unlikely(!dm->dm_connect_handler_called)) {
 		dispatch_invoke_with_autoreleasepool(flags, {
-			if (slowpath(!dm->dm_connect_handler_called)) {
-				_dispatch_mach_connect_invoke(dm);
-			}
+			// do not coalesce with the block below due to continuation reuse
+			_dispatch_mach_connect_invoke(dm);
+		});
+	}
+	_dispatch_continuation_pop_forwarded(dc, dc_flags, dm, {
+		dispatch_invoke_with_autoreleasepool(flags, {
 			_dispatch_client_callout(dc->dc_ctxt, dc->dc_func);
 			_dispatch_client_callout4(dmrr->dmrr_handler_ctxt,
 					DISPATCH_MACH_BARRIER_COMPLETED, NULL, 0,
@@ -2230,13 +2408,13 @@
 		dispatch_function_t func)
 {
 	dispatch_continuation_t dc = _dispatch_continuation_alloc();
-	uintptr_t dc_flags = DISPATCH_OBJ_CONSUME_BIT | DISPATCH_OBJ_MACH_BARRIER;
+	uintptr_t dc_flags = DC_FLAG_CONSUME;
 	dispatch_qos_t qos;
 
-	_dispatch_continuation_init_f(dc, dm, context, func, 0, 0, dc_flags);
+	_dispatch_continuation_init_f(dc, dm, context, func, 0, dc_flags);
 	_dispatch_mach_barrier_set_vtable(dc, dm, DC_VTABLE(MACH_SEND_BARRIER));
-	_dispatch_trace_continuation_push(dm->_as_dq, dc);
-	qos = _dispatch_continuation_override_qos(dm->_as_dq, dc);
+	_dispatch_trace_item_push(dm, dc);
+	qos = _dispatch_qos_from_pp(dc->dc_priority);
 	return _dispatch_mach_send_push(dm, dc, qos);
 }
 
@@ -2245,13 +2423,13 @@
 dispatch_mach_send_barrier(dispatch_mach_t dm, dispatch_block_t barrier)
 {
 	dispatch_continuation_t dc = _dispatch_continuation_alloc();
-	uintptr_t dc_flags = DISPATCH_OBJ_CONSUME_BIT | DISPATCH_OBJ_MACH_BARRIER;
+	uintptr_t dc_flags = DC_FLAG_CONSUME;
 	dispatch_qos_t qos;
 
-	_dispatch_continuation_init(dc, dm, barrier, 0, 0, dc_flags);
+	_dispatch_continuation_init(dc, dm, barrier, 0, dc_flags);
 	_dispatch_mach_barrier_set_vtable(dc, dm, DC_VTABLE(MACH_SEND_BARRIER));
-	_dispatch_trace_continuation_push(dm->_as_dq, dc);
-	qos = _dispatch_continuation_override_qos(dm->_as_dq, dc);
+	_dispatch_trace_item_push(dm, dc);
+	qos = _dispatch_qos_from_pp(dc->dc_priority);
 	return _dispatch_mach_send_push(dm, dc, qos);
 }
 
@@ -2261,11 +2439,12 @@
 		dispatch_function_t func)
 {
 	dispatch_continuation_t dc = _dispatch_continuation_alloc();
-	uintptr_t dc_flags = DISPATCH_OBJ_CONSUME_BIT | DISPATCH_OBJ_MACH_BARRIER;
+	uintptr_t dc_flags = DC_FLAG_CONSUME;
+	dispatch_qos_t qos;
 
-	_dispatch_continuation_init_f(dc, dm, context, func, 0, 0, dc_flags);
+	qos = _dispatch_continuation_init_f(dc, dm, context, func, 0, dc_flags);
 	_dispatch_mach_barrier_set_vtable(dc, dm, DC_VTABLE(MACH_RECV_BARRIER));
-	return _dispatch_continuation_async(dm->_as_dq, dc);
+	return _dispatch_continuation_async(dm, dc, qos, dc_flags);
 }
 
 DISPATCH_NOINLINE
@@ -2273,11 +2452,12 @@
 dispatch_mach_receive_barrier(dispatch_mach_t dm, dispatch_block_t barrier)
 {
 	dispatch_continuation_t dc = _dispatch_continuation_alloc();
-	uintptr_t dc_flags = DISPATCH_OBJ_CONSUME_BIT | DISPATCH_OBJ_MACH_BARRIER;
+	uintptr_t dc_flags = DC_FLAG_CONSUME;
+	dispatch_qos_t qos;
 
-	_dispatch_continuation_init(dc, dm, barrier, 0, 0, dc_flags);
+	qos = _dispatch_continuation_init(dc, dm, barrier, 0, dc_flags);
 	_dispatch_mach_barrier_set_vtable(dc, dm, DC_VTABLE(MACH_RECV_BARRIER));
-	return _dispatch_continuation_async(dm->_as_dq, dc);
+	return _dispatch_continuation_async(dm, dc, qos, dc_flags);
 }
 
 DISPATCH_NOINLINE
@@ -2287,7 +2467,7 @@
 	dispatch_mach_recv_refs_t dmrr = dm->dm_recv_refs;
 
 	dispatch_invoke_with_autoreleasepool(flags, {
-		if (slowpath(!dm->dm_connect_handler_called)) {
+		if (unlikely(!dm->dm_connect_handler_called)) {
 			_dispatch_mach_connect_invoke(dm);
 		}
 		_dispatch_client_callout4(dmrr->dmrr_handler_ctxt,
@@ -2295,121 +2475,122 @@
 		_dispatch_perfmon_workitem_inc();
 	});
 	dm->dm_cancel_handler_called = 1;
-	_dispatch_release(dm); // the retain is done at creation time
 }
 
 DISPATCH_NOINLINE
 void
 dispatch_mach_cancel(dispatch_mach_t dm)
 {
-	dispatch_source_cancel(dm->_as_ds);
+	dispatch_queue_flags_t dqf;
+
+	_dispatch_object_debug(dm, "%s", __func__);
+	// <rdar://problem/34849210> similar race to dispatch_source_cancel
+	// Once we set the DSF_CANCELED bit, anyone can notice and finish the
+	// unregistration causing use after free in dispatch_mach_reconnect() below.
+	_dispatch_retain(dm);
+	dqf = _dispatch_queue_atomic_flags_set_orig(dm, DSF_CANCELED);
+	if (!(dqf & DSF_CANCELED)) {
+		dispatch_mach_reconnect(dm, MACH_PORT_NULL, DM_CHECKIN_CANCELED);
+	}
+	_dispatch_release_tailcall(dm);
 }
 
 static void
 _dispatch_mach_install(dispatch_mach_t dm, dispatch_wlh_t wlh,
 		dispatch_priority_t pri)
 {
+	bool cancelled = (_dispatch_queue_atomic_flags(dm) & DSF_CANCELED);
 	dispatch_mach_recv_refs_t dmrr = dm->dm_recv_refs;
-	uint32_t disconnect_cnt;
 
-	if (dmrr->du_ident) {
-		_dispatch_source_refs_register(dm->_as_ds, wlh, pri);
+	dispatch_assert(!dm->ds_is_installed);
+	dm->ds_is_installed = true;
+
+	if (!cancelled && dmrr->du_ident) {
+		(void)_dispatch_unote_register(dmrr, wlh, pri);
 		dispatch_assert(dmrr->du_is_direct);
 	}
 
-	if (dm->dm_is_xpc) {
-		bool monitor_sigterm;
-		if (_dispatch_mach_xpc_hooks->version < 3) {
-			monitor_sigterm = true;
-		} else if (!_dispatch_mach_xpc_hooks->dmxh_enable_sigterm_notification){
-			monitor_sigterm = true;
-		} else {
-			monitor_sigterm =
-					_dispatch_mach_xpc_hooks->dmxh_enable_sigterm_notification(
-					dm->dm_recv_refs->dmrr_handler_ctxt);
-		}
-		if (monitor_sigterm) {
-			dispatch_xpc_term_refs_t _dxtr =
-					dux_create(&_dispatch_xpc_type_sigterm, SIGTERM, 0)._dxtr;
-			_dxtr->du_owner_wref = _dispatch_ptr2wref(dm);
-			dm->dm_xpc_term_refs = _dxtr;
-			_dispatch_unote_register(dm->dm_xpc_term_refs, wlh, pri);
-		}
+	if (!cancelled && dm->dm_is_xpc &&
+			_dispatch_mach_xpc_hooks->dmxh_enable_sigterm_notification(
+			dmrr->dmrr_handler_ctxt)) {
+		dispatch_xpc_term_refs_t _dxtr =
+				dux_create(&_dispatch_xpc_type_sigterm, SIGTERM, 0)._dxtr;
+		_dxtr->du_owner_wref = _dispatch_ptr2wref(dm);
+		dm->dm_xpc_term_refs = _dxtr;
+		_dispatch_unote_register(dm->dm_xpc_term_refs, wlh, pri);
 	}
 	if (!dm->dq_priority) {
 		// _dispatch_mach_reply_kevent_register assumes this has been done
-		// which is unlike regular sources or queues, the DEFAULTQUEUE flag
+		// which is unlike regular sources or queues, the FALLBACK flag
 		// is used so that the priority of the channel doesn't act as
 		// a QoS floor for incoming messages (26761457)
 		dm->dq_priority = pri;
 	}
-	dm->ds_is_installed = true;
-	if (unlikely(!os_atomic_cmpxchgv2o(dm->dm_send_refs, dmsr_disconnect_cnt,
-			DISPATCH_MACH_NEVER_INSTALLED, 0, &disconnect_cnt, release))) {
-		DISPATCH_INTERNAL_CRASH(disconnect_cnt, "Channel already installed");
+
+	uint32_t disconnect_cnt = os_atomic_load2o(dm->dm_send_refs,
+			dmsr_disconnect_cnt, relaxed);
+	if (unlikely(disconnect_cnt & DISPATCH_MACH_NEVER_CONNECTED)) {
+		DISPATCH_CLIENT_CRASH(disconnect_cnt, "Channel never connected");
 	}
 }
 
 void
-_dispatch_mach_finalize_activation(dispatch_mach_t dm, bool *allow_resume)
+_dispatch_mach_activate(dispatch_mach_t dm, bool *allow_resume)
 {
 	dispatch_priority_t pri;
 	dispatch_wlh_t wlh;
 
 	// call "super"
-	_dispatch_queue_finalize_activation(dm->_as_dq, allow_resume);
+	_dispatch_lane_activate(dm, allow_resume);
 
 	if (!dm->ds_is_installed) {
-		pri = _dispatch_queue_compute_priority_and_wlh(dm->_as_dq, &wlh);
+		pri = _dispatch_queue_compute_priority_and_wlh(dm, &wlh);
 		if (pri) _dispatch_mach_install(dm, wlh, pri);
 	}
 }
 
-DISPATCH_ALWAYS_INLINE
-static inline bool
-_dispatch_mach_tryarm(dispatch_mach_t dm, dispatch_queue_flags_t *out_dqf)
+DISPATCH_NOINLINE
+static void
+_dispatch_mach_handle_wlh_change(dispatch_mach_t dm)
 {
-	dispatch_queue_flags_t oqf, nqf;
-	bool rc = os_atomic_rmw_loop2o(dm, dq_atomic_flags, oqf, nqf, relaxed, {
-		nqf = oqf;
-		if (nqf & (DSF_ARMED | DSF_CANCELED | DSF_DEFERRED_DELETE |
-				DSF_DELETED)) {
-			// the test is inside the loop because it's convenient but the
-			// result should not change for the duration of the rmw_loop
-			os_atomic_rmw_loop_give_up(break);
+	dispatch_queue_flags_t dqf;
+
+	dqf = _dispatch_queue_atomic_flags_set_orig(dm, DSF_WLH_CHANGED);
+	if (!(dqf & DQF_MUTABLE)) {
+		if (dm->dm_is_xpc) {
+			DISPATCH_CLIENT_CRASH(0, "Changing target queue "
+					"hierarchy after xpc connection was activated");
+		} else {
+			DISPATCH_CLIENT_CRASH(0, "Changing target queue "
+					"hierarchy after mach channel was connected");
 		}
-		nqf |= DSF_ARMED;
-	});
-	if (out_dqf) *out_dqf = nqf;
-	return rc;
+	}
+	if (!(dqf & DSF_WLH_CHANGED)) {
+		if (dm->dm_is_xpc) {
+			_dispatch_bug_deprecated("Changing target queue "
+					"hierarchy after xpc connection was activated");
+		} else {
+			_dispatch_bug_deprecated("Changing target queue "
+					"hierarchy after mach channel was connected");
+		}
+	}
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline dispatch_queue_wakeup_target_t
-_dispatch_mach_invoke2(dispatch_object_t dou,
+_dispatch_mach_invoke2(dispatch_mach_t dm,
 		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags,
 		uint64_t *owned)
 {
-	dispatch_mach_t dm = dou._dm;
 	dispatch_queue_wakeup_target_t retq = NULL;
 	dispatch_queue_t dq = _dispatch_queue_get_current();
 	dispatch_mach_send_refs_t dmsr = dm->dm_send_refs;
 	dispatch_mach_recv_refs_t dmrr = dm->dm_recv_refs;
-	dispatch_queue_flags_t dqf = 0;
+	dispatch_queue_flags_t dqf;
 
-	if (!(flags & DISPATCH_INVOKE_MANAGER_DRAIN) && dmrr &&
-			_dispatch_unote_wlh_changed(dmrr, _dispatch_get_wlh())) {
-		dqf = _dispatch_queue_atomic_flags_set_orig(dm->_as_dq,
-				DSF_WLH_CHANGED);
-		if (!(dqf & DSF_WLH_CHANGED)) {
-			if (dm->dm_is_xpc) {
-				_dispatch_bug_deprecated("Changing target queue "
-						"hierarchy after xpc connection was activated");
-			} else {
-				_dispatch_bug_deprecated("Changing target queue "
-						"hierarchy after mach channel was activated");
-			}
-		}
+	if (unlikely(!(flags & DISPATCH_INVOKE_MANAGER_DRAIN) && dmrr &&
+			_dispatch_unote_wlh_changed(dmrr, _dispatch_get_event_wlh()))) {
+		_dispatch_mach_handle_wlh_change(dm);
 	}
 
 	// This function performs all mach channel actions. Each action is
@@ -2425,76 +2606,81 @@
 		if (unlikely(flags & DISPATCH_INVOKE_MANAGER_DRAIN)) {
 			return dm->do_targetq;
 		}
-		_dispatch_mach_install(dm, _dispatch_get_wlh(),_dispatch_get_basepri());
+		dispatch_priority_t pri = DISPATCH_PRIORITY_FLAG_MANAGER;
+		if (likely(flags & DISPATCH_INVOKE_WORKER_DRAIN)) {
+			pri = _dispatch_get_basepri();
+		}
+		_dispatch_mach_install(dm, _dispatch_get_event_wlh(), pri);
 		_dispatch_perfmon_workitem_inc();
 	}
 
 	if (_dispatch_queue_class_probe(dm)) {
 		if (dq == dm->do_targetq) {
 drain:
-			retq = _dispatch_queue_serial_drain(dm->_as_dq, dic, flags, owned);
+			retq = _dispatch_lane_serial_drain(dm, dic, flags, owned);
 		} else {
 			retq = dm->do_targetq;
 		}
 	}
 
-	if (!retq && _dispatch_unote_registered(dmrr)) {
-		if (_dispatch_mach_tryarm(dm, &dqf)) {
-			_dispatch_unote_resume(dmrr);
-			if (dq == dm->do_targetq && !dq->do_targetq && !dmsr->dmsr_tail &&
-					(dq->dq_priority & DISPATCH_PRIORITY_FLAG_OVERCOMMIT) &&
-					_dispatch_wlh_should_poll_unote(dmrr)) {
-				// try to redrive the drain from under the lock for channels
-				// targeting an overcommit root queue to avoid parking
-				// when the next message has already fired
-				_dispatch_event_loop_drain(KEVENT_FLAG_IMMEDIATE);
-				if (dm->dq_items_tail) goto drain;
-			}
+	dqf = _dispatch_queue_atomic_flags(dm);
+	if (!retq && !(dqf & DSF_CANCELED) && _dispatch_unote_needs_rearm(dmrr)) {
+		_dispatch_unote_resume(dmrr);
+		if (dq == dm->do_targetq && !dq->do_targetq && !dmsr->dmsr_tail &&
+				(dq->dq_priority & DISPATCH_PRIORITY_FLAG_OVERCOMMIT) &&
+				_dispatch_wlh_should_poll_unote(dmrr)) {
+			// try to redrive the drain from under the lock for channels
+			// targeting an overcommit root queue to avoid parking
+			// when the next message has already fired
+			_dispatch_event_loop_drain(KEVENT_FLAG_IMMEDIATE);
+			if (dm->dq_items_tail) goto drain;
 		}
-	} else {
-		dqf = _dispatch_queue_atomic_flags(dm->_as_dq);
+		dqf = _dispatch_queue_atomic_flags(dm);
 	}
 
 	if (dmsr->dmsr_tail) {
-		bool requires_mgr = dm->dm_needs_mgr || (dmsr->dmsr_disconnect_cnt &&
-				_dispatch_unote_registered(dmsr));
-		if (!os_atomic_load2o(dmsr, dmsr_notification_armed, relaxed) ||
-				(dqf & DSF_CANCELED) || dmsr->dmsr_disconnect_cnt) {
+		if (!dmsr->dmsr_notification_armed || dmsr->dmsr_disconnect_cnt) {
+			bool requires_mgr = dmsr->dmsr_disconnect_cnt ?
+					_dispatch_unote_registered(dmsr) : dm->dm_needs_mgr;
 			// The channel has pending messages to send.
-			if (unlikely(requires_mgr && dq != &_dispatch_mgr_q)) {
-				return retq ? retq : &_dispatch_mgr_q;
+			if (unlikely(requires_mgr && dq != _dispatch_mgr_q._as_dq)) {
+				return retq ? retq : _dispatch_mgr_q._as_dq;
 			}
 			dispatch_mach_send_invoke_flags_t send_flags = DM_SEND_INVOKE_NONE;
-			if (dq != &_dispatch_mgr_q) {
+			if (dq != _dispatch_mgr_q._as_dq) {
 				send_flags |= DM_SEND_INVOKE_CAN_RUN_BARRIER;
 			}
 			_dispatch_mach_send_invoke(dm, flags, send_flags);
+			if (!retq && dm->dq_items_tail) {
+				retq = dm->do_targetq;
+			}
 		}
-		if (!retq) retq = DISPATCH_QUEUE_WAKEUP_WAIT_FOR_EVENT;
-	} else if (!retq && (dqf & DSF_CANCELED)) {
+		if (!retq && dmsr->dmsr_tail) {
+			retq = DISPATCH_QUEUE_WAKEUP_WAIT_FOR_EVENT;
+		}
+	}
+
+	if (dqf & DSF_CANCELED) {
 		// The channel has been cancelled and needs to be uninstalled from the
-		// manager queue. After uninstallation, the cancellation handler needs
-		// to be delivered to the target queue.
-		if (!dm->dm_uninstalled) {
-			if ((dqf & DSF_STATE_MASK) == (DSF_ARMED | DSF_DEFERRED_DELETE)) {
-				// waiting for the delivery of a deferred delete event
-				return retq ? retq : DISPATCH_QUEUE_WAKEUP_WAIT_FOR_EVENT;
-			}
-			if (dq != &_dispatch_mgr_q) {
-				return retq ? retq : &_dispatch_mgr_q;
-			}
-			_dispatch_mach_send_invoke(dm, flags, DM_SEND_INVOKE_CANCEL);
-			if (unlikely(!dm->dm_uninstalled)) {
-				// waiting for the delivery of a deferred delete event
-				// or deletion didn't happen because send_invoke couldn't
-				// acquire the send lock
-				return retq ? retq : DISPATCH_QUEUE_WAKEUP_WAIT_FOR_EVENT;
-			}
+		// manager queue.
+		if (!(dqf & DSF_DELETED) && !_dispatch_mach_cancel(dm)) {
+			// waiting for the delivery of a deferred delete event
+			return retq ? retq : DISPATCH_QUEUE_WAKEUP_WAIT_FOR_EVENT;
 		}
+
+		// After uninstallation, the cancellation handler needs to be delivered
+		// to the target queue, but not before we drained all messages from the
+		// receive queue.
 		if (!dm->dm_cancel_handler_called) {
 			if (dq != dm->do_targetq) {
 				return retq ? retq : dm->do_targetq;
 			}
+			if (DISPATCH_QUEUE_IS_SUSPENDED(dm)) {
+				return dm->do_targetq;
+			}
+			if (_dispatch_queue_class_probe(dm)) {
+				goto drain;
+			}
 			_dispatch_mach_cancel_invoke(dm, flags);
 		}
 	}
@@ -2520,7 +2706,7 @@
 
 	dispatch_mach_send_refs_t dmsr = dm->dm_send_refs;
 	dispatch_queue_wakeup_target_t tq = DISPATCH_QUEUE_WAKEUP_NONE;
-	dispatch_queue_flags_t dqf = _dispatch_queue_atomic_flags(dm->_as_dq);
+	dispatch_queue_flags_t dqf = _dispatch_queue_atomic_flags(dm);
 
 	if (!dm->ds_is_installed) {
 		// The channel needs to be installed on the kevent queue.
@@ -2533,52 +2719,47 @@
 		goto done;
 	}
 
-	if (_dispatch_lock_is_locked(dmsr->dmsr_state_lock.dul_lock)) {
-		// Sending and uninstallation below require the send lock, the channel
-		// will be woken up when the lock is dropped <rdar://15132939&15203957>
-		goto done;
-	}
-
 	if (dmsr->dmsr_tail) {
-		bool requires_mgr = dm->dm_needs_mgr || (dmsr->dmsr_disconnect_cnt &&
-				_dispatch_unote_registered(dmsr));
-		if (!os_atomic_load2o(dmsr, dmsr_notification_armed, relaxed) ||
-				(dqf & DSF_CANCELED) || dmsr->dmsr_disconnect_cnt) {
+		if (_dispatch_lock_is_locked(dmsr->dmsr_state_lock.dul_lock)) {
+			// Sending require the send lock, the channel will be woken up
+			// when the lock is dropped <rdar://15132939&15203957>
+			goto done;
+		}
+
+		if (!dmsr->dmsr_notification_armed || dmsr->dmsr_disconnect_cnt) {
+			bool requires_mgr = dmsr->dmsr_disconnect_cnt ?
+					_dispatch_unote_registered(dmsr) : dm->dm_needs_mgr;
 			if (unlikely(requires_mgr)) {
 				tq = DISPATCH_QUEUE_WAKEUP_MGR;
 			} else {
 				tq = DISPATCH_QUEUE_WAKEUP_TARGET;
 			}
 		}
-	} else if (dqf & DSF_CANCELED) {
-		if (!dm->dm_uninstalled) {
-			if ((dqf & DSF_STATE_MASK) == (DSF_ARMED | DSF_DEFERRED_DELETE)) {
-				// waiting for the delivery of a deferred delete event
-			} else {
-				// The channel needs to be uninstalled from the manager queue
-				tq = DISPATCH_QUEUE_WAKEUP_MGR;
-			}
-		} else if (!dm->dm_cancel_handler_called) {
-			// the cancellation handler needs to be delivered to the target
-			// queue.
-			tq = DISPATCH_QUEUE_WAKEUP_TARGET;
-		}
+	} else if ((dqf & DSF_CANCELED) && (dqf & DSF_NEEDS_EVENT) &&
+			!(flags & DISPATCH_WAKEUP_EVENT)) {
+		// waiting for the delivery of a deferred delete event
+	} else if ((dqf & DSF_CANCELED) && !dm->dm_cancel_handler_called) {
+		// The channel needs to be cancelled and the cancellation handler
+		// needs to be delivered to the target queue.
+		tq = DISPATCH_QUEUE_WAKEUP_TARGET;
 	}
 
 done:
 	if ((tq == DISPATCH_QUEUE_WAKEUP_TARGET) &&
-			dm->do_targetq == &_dispatch_mgr_q) {
+			dm->do_targetq == _dispatch_mgr_q._as_dq) {
 		tq = DISPATCH_QUEUE_WAKEUP_MGR;
 	}
 
-	return _dispatch_queue_class_wakeup(dm->_as_dq, qos, flags, tq);
+	return _dispatch_queue_wakeup(dm, qos, flags, tq);
 }
 
 static void
 _dispatch_mach_sigterm_invoke(void *ctx)
 {
 	dispatch_mach_t dm = ctx;
-	if (!(dm->dq_atomic_flags & DSF_CANCELED)) {
+	uint32_t duu_options = DUU_DELETE_ACK | DUU_MUST_SUCCEED;
+	_dispatch_unote_unregister(dm->dm_xpc_term_refs, duu_options);
+	if (!(_dispatch_queue_atomic_flags(dm) & DSF_CANCELED)) {
 		dispatch_mach_recv_refs_t dmrr = dm->dm_recv_refs;
 		_dispatch_client_callout4(dmrr->dmrr_handler_ctxt,
 				DISPATCH_MACH_SIGTERM_RECEIVED, NULL, 0,
@@ -2587,27 +2768,15 @@
 }
 
 void
-_dispatch_xpc_sigterm_merge(dispatch_unote_t du,
+_dispatch_xpc_sigterm_merge_evt(dispatch_unote_t du,
 		uint32_t flags DISPATCH_UNUSED, uintptr_t data DISPATCH_UNUSED,
-		uintptr_t status DISPATCH_UNUSED, pthread_priority_t pp)
+		pthread_priority_t pp)
 {
 	dispatch_mach_t dm = _dispatch_wref2ptr(du._du->du_owner_wref);
-	uint32_t options = 0;
-	if ((flags & EV_UDATA_SPECIFIC) && (flags & EV_ONESHOT) &&
-			!(flags & EV_DELETE)) {
-		options = DU_UNREGISTER_IMMEDIATE_DELETE;
-	} else {
-		dispatch_assert((flags & EV_ONESHOT) && (flags & EV_DELETE));
-		options = DU_UNREGISTER_ALREADY_DELETED;
-	}
-	_dispatch_unote_unregister(du, options);
 
-	if (!(dm->dq_atomic_flags & DSF_CANCELED)) {
-		_dispatch_barrier_async_detached_f(dm->_as_dq, dm,
-				_dispatch_mach_sigterm_invoke);
-	} else {
-		dx_wakeup(dm, _dispatch_qos_from_pp(pp), DISPATCH_WAKEUP_MAKE_DIRTY);
-	}
+	_dispatch_barrier_async_detached_f(dm, dm, _dispatch_mach_sigterm_invoke);
+	dx_wakeup(dm, _dispatch_qos_from_pp(pp), DISPATCH_WAKEUP_EVENT |
+			DISPATCH_WAKEUP_CONSUME_2 | DISPATCH_WAKEUP_MAKE_DIRTY);
 }
 
 #pragma mark -
@@ -2617,8 +2786,7 @@
 dispatch_mach_msg_create(mach_msg_header_t *msg, size_t size,
 		dispatch_mach_msg_destructor_t destructor, mach_msg_header_t **msg_ptr)
 {
-	if (slowpath(size < sizeof(mach_msg_header_t)) ||
-			slowpath(destructor && !msg)) {
+	if (unlikely(size < sizeof(mach_msg_header_t) || (destructor && !msg))) {
 		DISPATCH_CLIENT_CRASH(size, "Empty message");
 	}
 
@@ -2636,7 +2804,7 @@
 		memcpy(dmsg->dmsg_buf, msg, size);
 	}
 	dmsg->do_next = DISPATCH_OBJECT_LISTLESS;
-	dmsg->do_targetq = _dispatch_get_root_queue(DISPATCH_QOS_DEFAULT, false);
+	dmsg->do_targetq = _dispatch_get_default_queue(false);
 	dmsg->dmsg_destructor = destructor;
 	dmsg->dmsg_size = size;
 	if (msg_ptr) {
@@ -2689,7 +2857,7 @@
 {
 	size_t offset = 0;
 	offset += dsnprintf(&buf[offset], bufsiz - offset, "%s[%p] = { ",
-			dx_kind(dmsg), dmsg);
+			_dispatch_object_class_name(dmsg), dmsg);
 	offset += _dispatch_object_debug_attr(dmsg, buf + offset, bufsiz - offset);
 	offset += dsnprintf(&buf[offset], bufsiz - offset, "opts/err = 0x%x, "
 			"msgh[%p] = { ", dmsg->dmsg_options, dmsg->dmsg_buf);
@@ -2732,11 +2900,7 @@
 static dispatch_queue_t
 _dispatch_mach_msg_context_async_reply_queue(void *msg_context)
 {
-	if (DISPATCH_MACH_XPC_SUPPORTS_ASYNC_REPLIES(_dispatch_mach_xpc_hooks)) {
-		return _dispatch_mach_xpc_hooks->dmxh_msg_context_reply_queue(
-				msg_context);
-	}
-	return NULL;
+	return _dispatch_mach_xpc_hooks->dmxh_msg_context_reply_queue(msg_context);
 }
 
 static dispatch_continuation_t
@@ -2794,17 +2958,11 @@
 
 	bufRequest = alloca(rcv_size);
 	bufRequest->RetCode = 0;
-	for (mach_vm_address_t p = mach_vm_trunc_page(bufRequest + vm_page_size);
-			p < (mach_vm_address_t)bufRequest + rcv_size; p += vm_page_size) {
-		*(char*)p = 0; // ensure alloca buffer doesn't overlap with stack guard
-	}
+	_dispatch_mach_stack_probe(bufRequest, rcv_size);
 
 	bufReply = alloca(rcv_size);
 	bufReply->Head.msgh_size = 0;
-	for (mach_vm_address_t p = mach_vm_trunc_page(bufReply + vm_page_size);
-			p < (mach_vm_address_t)bufReply + rcv_size; p += vm_page_size) {
-		*(char*)p = 0; // ensure alloca buffer doesn't overlap with stack guard
-	}
+	_dispatch_mach_stack_probe(bufReply, rcv_size);
 
 #if DISPATCH_DEBUG
 	options |= MACH_RCV_LARGE; // rdar://problem/8422992
@@ -2825,7 +2983,7 @@
 
 		tmp_options = options;
 
-		if (slowpath(kr)) {
+		if (unlikely(kr)) {
 			switch (kr) {
 			case MACH_SEND_INVALID_DEST:
 			case MACH_SEND_TIMED_OUT:
@@ -2862,6 +3020,9 @@
 							"requested size %zd: id = 0x%x, size = %d",
 							maxmsgsz, bufReply->Head.msgh_id,
 							bufReply->Head.msgh_size);
+					if (bufReply->Head.msgh_bits & MACH_MSGH_BITS_COMPLEX) {
+						mach_msg_destroy(&bufReply->Head);
+					}
 				}
 				if (large_buf) {
 					free(large_buf);
@@ -2898,12 +3059,13 @@
 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
 		int r = proc_importance_assertion_begin_with_msg(&bufRequest->Head,
 				NULL, &assertion_token);
-		if (r && slowpath(r != EIO)) {
+		if (r && r != EIO) {
 			(void)dispatch_assume_zero(r);
 		}
 #pragma clang diagnostic pop
 #endif
 		_voucher_replace(voucher_create_with_mach_msg(&bufRequest->Head));
+		bufReply->Head = (mach_msg_header_t){ };
 		demux_success = callback(&bufRequest->Head, &bufReply->Head);
 
 		if (!demux_success) {
@@ -2913,7 +3075,7 @@
 		} else if (!(bufReply->Head.msgh_bits & MACH_MSGH_BITS_COMPLEX)) {
 			// if MACH_MSGH_BITS_COMPLEX is _not_ set, then bufReply->RetCode
 			// is present
-			if (slowpath(bufReply->RetCode)) {
+			if (unlikely(bufReply->RetCode)) {
 				if (bufReply->RetCode == MIG_NO_REPLY) {
 					continue;
 				}
@@ -2945,8 +3107,114 @@
 }
 
 #pragma mark -
+#pragma mark dispatch_mach_mig_demux
+
+static char const * const
+_dispatch_mach_mig_demux_context_key = "mach_mig_demux";
+
+static const mig_routine_descriptor *
+_dispatch_mach_mig_resolve(mach_msg_id_t msgid,
+		const struct mig_subsystem *const subsystems[], size_t count)
+{
+	const mig_routine_descriptor *desc;
+
+	for (size_t i = 0; i < count; i++) {
+		if (subsystems[i]->start <= msgid && msgid < subsystems[i]->end) {
+			desc = &subsystems[i]->routine[msgid - subsystems[i]->start];
+			return desc->stub_routine ? desc : NULL;
+		}
+	}
+	return NULL;
+}
+
+bool
+dispatch_mach_mig_demux(void *context,
+		const struct mig_subsystem *const subsystems[], size_t count,
+		dispatch_mach_msg_t dmsg)
+{
+	dispatch_thread_context_s dmmd_ctx = {
+		.dtc_key = _dispatch_mach_mig_demux_context_key,
+		.dtc_mig_demux_ctx = context,
+	};
+	mach_msg_header_t *hdr = dispatch_mach_msg_get_msg(dmsg, NULL);
+	mach_msg_id_t msgid = hdr->msgh_id;
+	const mig_routine_descriptor *desc;
+	mig_reply_error_t *bufReply;
+	mach_msg_size_t reply_size;
+	kern_return_t kr;
+
+	desc = _dispatch_mach_mig_resolve(msgid, subsystems, count);
+	if (!desc) return false;
+
+	_dispatch_thread_context_push(&dmmd_ctx);
+
+	reply_size = desc->max_reply_msg + MAX_TRAILER_SIZE;
+	bufReply = alloca(reply_size);
+	_dispatch_mach_stack_probe(bufReply, reply_size);
+	bufReply->Head = (mach_msg_header_t){
+		.msgh_bits = MACH_MSGH_BITS(MACH_MSGH_BITS_REMOTE(hdr->msgh_bits), 0),
+		.msgh_remote_port = hdr->msgh_remote_port,
+		.msgh_size = sizeof(mig_reply_error_t),
+		.msgh_id = msgid + 100,
+	};
+
+	desc->stub_routine(hdr, &bufReply->Head);
+
+	// if MACH_MSGH_BITS_COMPLEX is _not_ set, then bufReply->RetCode is present
+	if (unlikely(!(bufReply->Head.msgh_bits & MACH_MSGH_BITS_COMPLEX) &&
+			bufReply->RetCode)) {
+		// destroy the request - but not the reply port
+		hdr->msgh_remote_port = 0;
+		if (bufReply->RetCode != MIG_NO_REPLY &&
+				(hdr->msgh_bits & MACH_MSGH_BITS_COMPLEX)) {
+			mach_msg_destroy(hdr);
+		}
+	}
+
+	if (bufReply->Head.msgh_remote_port) {
+		mach_msg_option_t options = MACH_SEND_MSG;
+		if (MACH_MSGH_BITS_REMOTE(bufReply->Head.msgh_bits) !=
+				MACH_MSG_TYPE_MOVE_SEND_ONCE) {
+			options |= MACH_SEND_TIMEOUT;
+		}
+		kr = mach_msg(&bufReply->Head, options, bufReply->Head.msgh_size,
+				0, MACH_PORT_NULL, 0, MACH_PORT_NULL);
+		switch (kr) {
+		case KERN_SUCCESS:
+			break;
+		case MACH_SEND_INVALID_DEST:
+		case MACH_SEND_TIMED_OUT:
+			if (bufReply->Head.msgh_bits & MACH_MSGH_BITS_COMPLEX) {
+				mach_msg_destroy(&bufReply->Head);
+			}
+			break;
+		default:
+			DISPATCH_VERIFY_MIG(kr);
+			DISPATCH_CLIENT_CRASH(kr,
+					"dispatch_mach_mig_demux: mach_msg(MACH_SEND_MSG) failed");
+		}
+	}
+
+	_dispatch_thread_context_pop(&dmmd_ctx);
+	return true;
+}
+
+void *
+dispatch_mach_mig_demux_get_context(void)
+{
+	dispatch_thread_context_t dtc;
+	dtc = _dispatch_thread_context_find(_dispatch_mach_mig_demux_context_key);
+	if (unlikely(dtc == NULL)) {
+		DISPATCH_CLIENT_CRASH(0, "dispatch_mach_mig_demux_get_context "
+				"not called from dispatch_mach_mig_demux context");
+	}
+	return dtc->dtc_mig_demux_ctx;
+}
+
+#pragma mark -
 #pragma mark dispatch_mach_debug
 
+DISPATCH_COLD
 static size_t
 _dispatch_mach_debug_attr(dispatch_mach_t dm, char *buf, size_t bufsiz)
 {
@@ -2972,7 +3240,7 @@
 	size_t offset = 0;
 	offset += dsnprintf(&buf[offset], bufsiz - offset, "%s[%p] = { ",
 			dm->dq_label && !dm->dm_cancel_handler_called ? dm->dq_label :
-			dx_kind(dm), dm);
+			_dispatch_object_class_name(dm), dm);
 	offset += _dispatch_object_debug_attr(dm, &buf[offset], bufsiz - offset);
 	offset += _dispatch_mach_debug_attr(dm, &buf[offset], bufsiz - offset);
 	offset += dsnprintf(&buf[offset], bufsiz - offset, "}");
diff --git a/src/mach_internal.h b/src/mach_internal.h
index 8c8edd8..90a5984 100644
--- a/src/mach_internal.h
+++ b/src/mach_internal.h
@@ -55,16 +55,15 @@
 	DISPATCH_MACH_RECV_MESSAGE = 0x2,
 };
 
+DISPATCH_CLASS_DECL(mach, QUEUE);
+DISPATCH_CLASS_DECL(mach_msg, OBJECT);
 
-DISPATCH_CLASS_DECL(mach);
-DISPATCH_CLASS_DECL(mach_msg);
-
-#ifndef __cplusplus
 struct dispatch_mach_s {
-	DISPATCH_SOURCE_HEADER(mach);
+	DISPATCH_SOURCE_CLASS_HEADER(mach);
 	dispatch_mach_send_refs_t dm_send_refs;
 	dispatch_xpc_term_refs_t dm_xpc_term_refs;
 } DISPATCH_ATOMIC64_ALIGN;
+dispatch_assert_valid_lane_type(dispatch_mach_s);
 
 struct dispatch_mach_msg_s {
 	DISPATCH_OBJECT_HEADER(mach_msg);
@@ -91,34 +90,37 @@
 		dm->dm_recv_refs->dmrr_handler_ctxt = (void *)0xbadfeed;
 	}
 }
-#endif // __cplusplus
 
-dispatch_source_t
-_dispatch_source_create_mach_msg_direct_recv(mach_port_t recvp,
-		const struct dispatch_continuation_s *dc);
+extern dispatch_mach_xpc_hooks_t _dispatch_mach_xpc_hooks;
+extern const struct dispatch_mach_xpc_hooks_s _dispatch_mach_xpc_hooks_default;
 
+void _dispatch_mach_ipc_handoff_invoke(dispatch_continuation_t dc,
+		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags);
 void _dispatch_mach_msg_async_reply_invoke(dispatch_continuation_t dc,
 		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags);
 void _dispatch_mach_dispose(dispatch_mach_t dm, bool *allow_free);
-void _dispatch_mach_finalize_activation(dispatch_mach_t dm, bool *allow_resume);
+void _dispatch_mach_activate(dispatch_mach_t dm, bool *allow_resume);
 void _dispatch_mach_invoke(dispatch_mach_t dm, dispatch_invoke_context_t dic,
 		dispatch_invoke_flags_t flags);
 void _dispatch_mach_wakeup(dispatch_mach_t dm, dispatch_qos_t qos,
 		dispatch_wakeup_flags_t flags);
+DISPATCH_COLD
 size_t _dispatch_mach_debug(dispatch_mach_t dm, char* buf, size_t bufsiz);
-void _dispatch_mach_merge_notification(dispatch_unote_t du,
-		uint32_t flags, uintptr_t data, uintptr_t status,
-		pthread_priority_t pp);
+void _dispatch_mach_notification_merge_evt(dispatch_unote_t du,
+		uint32_t flags, uintptr_t data, pthread_priority_t pp);
 void _dispatch_mach_merge_msg(dispatch_unote_t du, uint32_t flags,
-		mach_msg_header_t *msg, mach_msg_size_t msgsz);
+		mach_msg_header_t *msg, mach_msg_size_t msgsz,
+		pthread_priority_t msg_pp, pthread_priority_t ovr_pp);
 void _dispatch_mach_reply_merge_msg(dispatch_unote_t du, uint32_t flags,
-		mach_msg_header_t *msg, mach_msg_size_t msgsz);
-void _dispatch_xpc_sigterm_merge(dispatch_unote_t du, uint32_t flags,
-		uintptr_t data, uintptr_t status, pthread_priority_t pp);
+		mach_msg_header_t *msg, mach_msg_size_t msgsz,
+		pthread_priority_t msg_pp, pthread_priority_t ovr_pp);
+void _dispatch_xpc_sigterm_merge_evt(dispatch_unote_t du, uint32_t flags,
+		uintptr_t data, pthread_priority_t pp);
 
 void _dispatch_mach_msg_dispose(dispatch_mach_msg_t dmsg, bool *allow_free);
 void _dispatch_mach_msg_invoke(dispatch_mach_msg_t dmsg,
 		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags);
+DISPATCH_COLD
 size_t _dispatch_mach_msg_debug(dispatch_mach_msg_t dmsg, char* buf,
 		size_t bufsiz);
 
diff --git a/src/object.c b/src/object.c
index 86d1005..261e199 100644
--- a/src/object.c
+++ b/src/object.c
@@ -27,7 +27,7 @@
 _os_object_retain_count(_os_object_t obj)
 {
 	int xref_cnt = obj->os_obj_xref_cnt;
-	if (slowpath(xref_cnt == _OS_OBJECT_GLOBAL_REFCNT)) {
+	if (unlikely(xref_cnt == _OS_OBJECT_GLOBAL_REFCNT)) {
 		return ULONG_MAX; // global object
 	}
 	return (unsigned long)(xref_cnt + 1);
@@ -65,8 +65,8 @@
 _os_object_t
 _os_object_retain(_os_object_t obj)
 {
-	int xref_cnt = _os_object_xrefcnt_inc(obj);
-	if (slowpath(xref_cnt <= 0)) {
+	int xref_cnt = _os_object_xrefcnt_inc_orig(obj);
+	if (unlikely(xref_cnt < 0)) {
 		_OS_OBJECT_CLIENT_CRASH("Resurrection of an object");
 	}
 	return obj;
@@ -76,11 +76,11 @@
 _os_object_t
 _os_object_retain_with_resurrect(_os_object_t obj)
 {
-	int xref_cnt = _os_object_xrefcnt_inc(obj);
-	if (slowpath(xref_cnt < 0)) {
+	int xref_cnt = _os_object_xrefcnt_inc_orig(obj) + 1;
+	if (unlikely(xref_cnt < 0)) {
 		_OS_OBJECT_CLIENT_CRASH("Resurrection of an over-released object");
 	}
-	if (slowpath(xref_cnt == 0)) {
+	if (unlikely(xref_cnt == 0)) {
 		_os_object_retain_internal(obj);
 	}
 	return obj;
@@ -91,10 +91,10 @@
 _os_object_release(_os_object_t obj)
 {
 	int xref_cnt = _os_object_xrefcnt_dec(obj);
-	if (fastpath(xref_cnt >= 0)) {
+	if (likely(xref_cnt >= 0)) {
 		return;
 	}
-	if (slowpath(xref_cnt < -1)) {
+	if (unlikely(xref_cnt < -1)) {
 		_OS_OBJECT_CLIENT_CRASH("Over-release of an object");
 	}
 	return _os_object_xref_dispose(obj);
@@ -105,13 +105,13 @@
 {
 	int xref_cnt, nxref_cnt;
 	os_atomic_rmw_loop2o(obj, os_obj_xref_cnt, xref_cnt, nxref_cnt, relaxed, {
-		if (slowpath(xref_cnt == _OS_OBJECT_GLOBAL_REFCNT)) {
+		if (unlikely(xref_cnt == _OS_OBJECT_GLOBAL_REFCNT)) {
 			os_atomic_rmw_loop_give_up(return true); // global object
 		}
-		if (slowpath(xref_cnt == -1)) {
+		if (unlikely(xref_cnt == -1)) {
 			os_atomic_rmw_loop_give_up(return false);
 		}
-		if (slowpath(xref_cnt < -1)) {
+		if (unlikely(xref_cnt < -1)) {
 			os_atomic_rmw_loop_give_up(goto overrelease);
 		}
 		nxref_cnt = xref_cnt + 1;
@@ -125,10 +125,10 @@
 _os_object_allows_weak_reference(_os_object_t obj)
 {
 	int xref_cnt = obj->os_obj_xref_cnt;
-	if (slowpath(xref_cnt == -1)) {
+	if (unlikely(xref_cnt == -1)) {
 		return false;
 	}
-	if (slowpath(xref_cnt < -1)) {
+	if (unlikely(xref_cnt < -1)) {
 		_OS_OBJECT_CLIENT_CRASH("Over-release of an object");
 	}
 	return true;
@@ -190,18 +190,21 @@
 void
 _dispatch_xref_dispose(dispatch_object_t dou)
 {
-	unsigned long metatype = dx_metatype(dou._do);
-	if (metatype == _DISPATCH_QUEUE_TYPE || metatype == _DISPATCH_SOURCE_TYPE) {
+	if (dx_cluster(dou._do) == _DISPATCH_QUEUE_CLUSTER) {
 		_dispatch_queue_xref_dispose(dou._dq);
 	}
-	if (dx_type(dou._do) == DISPATCH_SOURCE_KEVENT_TYPE) {
+	switch (dx_type(dou._do)) {
+	case DISPATCH_SOURCE_KEVENT_TYPE:
 		_dispatch_source_xref_dispose(dou._ds);
+		break;
 #if HAVE_MACH
-	} else if (dx_type(dou._do) == DISPATCH_MACH_CHANNEL_TYPE) {
+	case DISPATCH_MACH_CHANNEL_TYPE:
 		_dispatch_mach_xref_dispose(dou._dm);
+		break;
 #endif
-	} else if (dx_type(dou._do) == DISPATCH_QUEUE_RUNLOOP_TYPE) {
-		_dispatch_runloop_queue_xref_dispose(dou._dq);
+	case DISPATCH_QUEUE_RUNLOOP_TYPE:
+		_dispatch_runloop_queue_xref_dispose(dou._dl);
+		break;
 	}
 	return _dispatch_release_tailcall(dou._os_obj);
 }
@@ -211,14 +214,20 @@
 _dispatch_dispose(dispatch_object_t dou)
 {
 	dispatch_queue_t tq = dou._do->do_targetq;
-	dispatch_function_t func = dou._do->do_finalizer;
+	dispatch_function_t func = _dispatch_object_finalizer(dou);
 	void *ctxt = dou._do->do_ctxt;
 	bool allow_free = true;
 
-	if (slowpath(dou._do->do_next != DISPATCH_OBJECT_LISTLESS)) {
+	if (unlikely(dou._do->do_next != DISPATCH_OBJECT_LISTLESS)) {
 		DISPATCH_INTERNAL_CRASH(dou._do->do_next, "Release while enqueued");
 	}
 
+	if (unlikely(tq && tq->dq_serialnum == DISPATCH_QUEUE_SERIAL_NUMBER_WLF)) {
+		// the workloop fallback global queue is never serviced, so redirect
+		// the finalizer onto a global queue
+		tq = _dispatch_get_root_queue(DISPATCH_QOS_DEFAULT, false)->_as_dq;
+	}
+
 	dx_dispose(dou._do, &allow_free);
 
 	// Past this point, the only thing left of the object is its memory
@@ -236,9 +245,7 @@
 dispatch_get_context(dispatch_object_t dou)
 {
 	DISPATCH_OBJECT_TFB(_dispatch_objc_get_context, dou);
-	if (unlikely(dou._do->do_ref_cnt == DISPATCH_OBJECT_GLOBAL_REFCNT ||
-			dx_hastypeflag(dou._do, QUEUE_ROOT) ||
-			dx_hastypeflag(dou._do, QUEUE_BASE))) {
+	if (unlikely(dx_hastypeflag(dou._do, NO_CONTEXT))) {
 		return NULL;
 	}
 	return dou._do->do_ctxt;
@@ -248,9 +255,7 @@
 dispatch_set_context(dispatch_object_t dou, void *context)
 {
 	DISPATCH_OBJECT_TFB(_dispatch_objc_set_context, dou, context);
-	if (unlikely(dou._do->do_ref_cnt == DISPATCH_OBJECT_GLOBAL_REFCNT ||
-			dx_hastypeflag(dou._do, QUEUE_ROOT) ||
-			dx_hastypeflag(dou._do, QUEUE_BASE))) {
+	if (unlikely(dx_hastypeflag(dou._do, NO_CONTEXT))) {
 		return;
 	}
 	dou._do->do_ctxt = context;
@@ -260,36 +265,45 @@
 dispatch_set_finalizer_f(dispatch_object_t dou, dispatch_function_t finalizer)
 {
 	DISPATCH_OBJECT_TFB(_dispatch_objc_set_finalizer_f, dou, finalizer);
-	if (unlikely(dou._do->do_ref_cnt == DISPATCH_OBJECT_GLOBAL_REFCNT ||
-			dx_hastypeflag(dou._do, QUEUE_ROOT) ||
-			dx_hastypeflag(dou._do, QUEUE_BASE))) {
+	if (unlikely(dx_hastypeflag(dou._do, NO_CONTEXT))) {
 		return;
 	}
-	dou._do->do_finalizer = finalizer;
+	_dispatch_object_set_finalizer(dou, finalizer);
 }
 
 void
 dispatch_set_target_queue(dispatch_object_t dou, dispatch_queue_t tq)
 {
 	DISPATCH_OBJECT_TFB(_dispatch_objc_set_target_queue, dou, tq);
-	if (dx_vtable(dou._do)->do_set_targetq) {
-		dx_vtable(dou._do)->do_set_targetq(dou._do, tq);
-	} else if (likely(dou._do->do_ref_cnt != DISPATCH_OBJECT_GLOBAL_REFCNT &&
-			!dx_hastypeflag(dou._do, QUEUE_ROOT) &&
-			!dx_hastypeflag(dou._do, QUEUE_BASE))) {
-		if (slowpath(!tq)) {
-			tq = _dispatch_get_root_queue(DISPATCH_QOS_DEFAULT, false);
-		}
-		_dispatch_object_set_target_queue_inline(dou._do, tq);
+	if (unlikely(_dispatch_object_is_global(dou) ||
+			_dispatch_object_is_root_or_base_queue(dou))) {
+		return;
 	}
+	if (dx_cluster(dou._do) == _DISPATCH_QUEUE_CLUSTER) {
+		return _dispatch_lane_set_target_queue(dou._dl, tq);
+	}
+	if (dx_type(dou._do) == DISPATCH_IO_TYPE) {
+		// <rdar://problem/34417216> FIXME: dispatch IO should be a "source"
+		return _dispatch_io_set_target_queue(dou._dchannel, tq);
+	}
+	if (tq == DISPATCH_TARGET_QUEUE_DEFAULT) {
+		tq = _dispatch_get_default_queue(false);
+	}
+	_dispatch_object_set_target_queue_inline(dou._do, tq);
 }
 
 void
 dispatch_activate(dispatch_object_t dou)
 {
 	DISPATCH_OBJECT_TFB(_dispatch_objc_activate, dou);
-	if (dx_vtable(dou._do)->do_resume) {
-		dx_vtable(dou._do)->do_resume(dou._do, true);
+	if (unlikely(_dispatch_object_is_global(dou))) {
+		return;
+	}
+	if (dx_metatype(dou._do) == _DISPATCH_WORKLOOP_TYPE) {
+		return _dispatch_workloop_activate(dou._dwl);
+	}
+	if (dx_cluster(dou._do) == _DISPATCH_QUEUE_CLUSTER) {
+		return _dispatch_lane_resume(dou._dl, true);
 	}
 }
 
@@ -297,8 +311,12 @@
 dispatch_suspend(dispatch_object_t dou)
 {
 	DISPATCH_OBJECT_TFB(_dispatch_objc_suspend, dou);
-	if (dx_vtable(dou._do)->do_suspend) {
-		dx_vtable(dou._do)->do_suspend(dou._do);
+	if (unlikely(_dispatch_object_is_global(dou) ||
+			_dispatch_object_is_root_or_base_queue(dou))) {
+		return;
+	}
+	if (dx_cluster(dou._do) == _DISPATCH_QUEUE_CLUSTER) {
+		return _dispatch_lane_suspend(dou._dl);
 	}
 }
 
@@ -306,10 +324,12 @@
 dispatch_resume(dispatch_object_t dou)
 {
 	DISPATCH_OBJECT_TFB(_dispatch_objc_resume, dou);
-	// the do_suspend below is not a typo. Having a do_resume but no do_suspend
-	// allows for objects to support activate, but have no-ops suspend/resume
-	if (dx_vtable(dou._do)->do_suspend) {
-		dx_vtable(dou._do)->do_resume(dou._do, false);
+	if (unlikely(_dispatch_object_is_global(dou) ||
+			_dispatch_object_is_root_or_base_queue(dou))) {
+		return;
+	}
+	if (dx_cluster(dou._do) == _DISPATCH_QUEUE_CLUSTER) {
+		_dispatch_lane_resume(dou._dl, false);
 	}
 }
 
diff --git a/src/object.m b/src/object.m
index efee829..925fccc 100644
--- a/src/object.m
+++ b/src/object.m
@@ -52,7 +52,7 @@
 {
 	id obj;
 	size -= sizeof(((struct _os_object_s *)NULL)->os_obj_isa);
-	while (!fastpath(obj = class_createInstance(cls, size))) {
+	while (unlikely(!(obj = class_createInstance(cls, size)))) {
 		_dispatch_temporary_resource_shortage();
 	}
 	return obj;
@@ -82,7 +82,11 @@
 	_Block_use_RR2(&callbacks);
 #if DISPATCH_COCOA_COMPAT
 	const char *v = getenv("OBJC_DEBUG_MISSING_POOLS");
-	_os_object_debug_missing_pools = v && !strcmp(v, "YES");
+	if (v) _os_object_debug_missing_pools = _dispatch_parse_bool(v);
+	v = getenv("DISPATCH_DEBUG_MISSING_POOLS");
+	if (v) _os_object_debug_missing_pools = _dispatch_parse_bool(v);
+	v = getenv("LIBDISPATCH_DEBUG_MISSING_POOLS");
+	if (v) _os_object_debug_missing_pools = _dispatch_parse_bool(v);
 #endif
 }
 
@@ -173,6 +177,9 @@
 	return _os_object_retain_count(self);
 }
 
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wdeprecated-implementations"
+
 -(BOOL)retainWeakReference {
 	return _os_object_retain_weak(self);
 }
@@ -181,6 +188,8 @@
 	return _os_object_allows_weak_reference(self);
 }
 
+#pragma clang diagnostic pop
+
 - (void)_xref_dispose {
 	return _os_object_release_internal(self);
 }
@@ -290,11 +299,11 @@
 	if (dx_vtable(obj)->do_debug) {
 		dx_debug(obj, buf, sizeof(buf));
 	} else {
-		strlcpy(buf, dx_kind(obj), sizeof(buf));
+		strlcpy(buf, object_getClassName(self), sizeof(buf));
 	}
 	NSString *format = [nsstring stringWithUTF8String:"<%s: %s>"];
 	if (!format) return nil;
-	return [nsstring stringWithFormat:format, class_getName([self class]), buf];
+	return [nsstring stringWithFormat:format, object_getClassName(self), buf];
 }
 
 - (void)dealloc DISPATCH_NORETURN {
@@ -313,7 +322,7 @@
 	if (!nsstring) return nil;
 	NSString *format = [nsstring stringWithUTF8String:"<%s: %s>"];
 	if (!format) return nil;
-	return [nsstring stringWithFormat:format, class_getName([self class]),
+	return [nsstring stringWithFormat:format, object_getClassName(self),
 			dispatch_queue_get_label(self), self];
 }
 
@@ -354,7 +363,7 @@
 
 - (void)_xref_dispose {
 	_dispatch_queue_xref_dispose((struct dispatch_queue_s *)self);
-	_dispatch_runloop_queue_xref_dispose(self);
+	_dispatch_runloop_queue_xref_dispose((dispatch_lane_t)self);
 	[super _xref_dispose];
 }
 
@@ -371,12 +380,15 @@
 #endif
 DISPATCH_CLASS_IMPL(semaphore)
 DISPATCH_CLASS_IMPL(group)
+DISPATCH_CLASS_IMPL(workloop)
 DISPATCH_CLASS_IMPL(queue_serial)
 DISPATCH_CLASS_IMPL(queue_concurrent)
 DISPATCH_CLASS_IMPL(queue_main)
-DISPATCH_CLASS_IMPL(queue_root)
+DISPATCH_CLASS_IMPL(queue_global)
+#if DISPATCH_USE_PTHREAD_ROOT_QUEUES
+DISPATCH_CLASS_IMPL(queue_pthread_root)
+#endif
 DISPATCH_CLASS_IMPL(queue_mgr)
-DISPATCH_CLASS_IMPL(queue_specific_queue)
 DISPATCH_CLASS_IMPL(queue_attr)
 DISPATCH_CLASS_IMPL(mach_msg)
 DISPATCH_CLASS_IMPL(io)
@@ -410,7 +422,7 @@
 	_voucher_debug(self, buf, sizeof(buf));
 	NSString *format = [nsstring stringWithUTF8String:"<%s: %s>"];
 	if (!format) return nil;
-	return [nsstring stringWithFormat:format, class_getName([self class]), buf];
+	return [nsstring stringWithFormat:format, object_getClassName(self), buf];
 }
 
 @end
@@ -440,7 +452,7 @@
 void
 _dispatch_last_resort_autorelease_pool_push(dispatch_invoke_context_t dic)
 {
-	if (!slowpath(_os_object_debug_missing_pools)) {
+	if (likely(!_os_object_debug_missing_pools)) {
 		dic->dic_autorelease_pool = _dispatch_autorelease_pool_push();
 	}
 }
@@ -448,7 +460,7 @@
 void
 _dispatch_last_resort_autorelease_pool_pop(dispatch_invoke_context_t dic)
 {
-	if (!slowpath(_os_object_debug_missing_pools)) {
+	if (likely(!_os_object_debug_missing_pools)) {
 		_dispatch_autorelease_pool_pop(dic->dic_autorelease_pool);
 		dic->dic_autorelease_pool = NULL;
 	}
diff --git a/src/object_internal.h b/src/object_internal.h
index 0156503..6985dec 100644
--- a/src/object_internal.h
+++ b/src/object_internal.h
@@ -54,7 +54,7 @@
 #endif
 
 // define a new proper class
-#define OS_OBJECT_CLASS_DECL(name, super, ...) \
+#define OS_OBJECT_CLASS_DECL(name, ...) \
 		struct name##_s; \
 		struct name##_extra_vtable_s { \
 			__VA_ARGS__; \
@@ -71,61 +71,60 @@
 #define OS_OBJECT_INTERNAL_CLASS_DECL(name, super, ...) \
 		OS_OBJECT_OBJC_RUNTIME_VISIBLE \
 		OS_OBJECT_DECL_IMPL_CLASS(name, OS_OBJECT_CLASS(super)); \
-		OS_OBJECT_CLASS_DECL(name, super, ## __VA_ARGS__)
+		OS_OBJECT_CLASS_DECL(name, ## __VA_ARGS__)
 #elif OS_OBJECT_USE_OBJC
 #define OS_OBJECT_INTERNAL_CLASS_DECL(name, super, ...) \
 		OS_OBJECT_DECL(name); \
-		OS_OBJECT_CLASS_DECL(name, super, ## __VA_ARGS__)
+		OS_OBJECT_CLASS_DECL(name, ## __VA_ARGS__)
 #else
 #define OS_OBJECT_INTERNAL_CLASS_DECL(name, super, ...) \
 		typedef struct name##_s *name##_t; \
-		OS_OBJECT_CLASS_DECL(name, super, ## __VA_ARGS__)
+		OS_OBJECT_CLASS_DECL(name, ## __VA_ARGS__)
 #endif
 
-#define DISPATCH_CLASS_DECL_BARE(name) \
-		OS_OBJECT_CLASS_DECL(dispatch_##name, dispatch_object, \
-				DISPATCH_OBJECT_VTABLE_HEADER(dispatch_##name))
+#define DISPATCH_CLASS_DECL_BARE(name, cluster) \
+		OS_OBJECT_CLASS_DECL(dispatch_##name, \
+		DISPATCH_##cluster##_VTABLE_HEADER(dispatch_##name))
 
-#define DISPATCH_CLASS_DECL(name) \
+#define DISPATCH_CLASS_DECL(name, cluster) \
 		_OS_OBJECT_DECL_PROTOCOL(dispatch_##name, dispatch_object) \
 		_OS_OBJECT_CLASS_IMPLEMENTS_PROTOCOL(dispatch_##name, dispatch_##name) \
-		DISPATCH_CLASS_DECL_BARE(name)
+		DISPATCH_CLASS_DECL_BARE(name, cluster)
 
-#define DISPATCH_INTERNAL_CLASS_DECL(name) \
+#define DISPATCH_SUBCLASS_DECL(name, super, ctype) \
+		_OS_OBJECT_DECL_PROTOCOL(dispatch_##name, dispatch_##super); \
+		_OS_OBJECT_CLASS_IMPLEMENTS_PROTOCOL(dispatch_##name, dispatch_##name) \
+		OS_OBJECT_SUBCLASS_DECL(dispatch_##name, dispatch_##ctype)
+
+#define DISPATCH_INTERNAL_CLASS_DECL(name, cluster) \
 		DISPATCH_DECL(dispatch_##name); \
-		DISPATCH_CLASS_DECL(name)
+		DISPATCH_CLASS_DECL(name, cluster)
 
 // define a new subclass used in a cluster
-#define OS_OBJECT_SUBCLASS_DECL(name, super) \
-		_OS_OBJECT_DECL_SUBCLASS_INTERFACE(name, super) \
+#define OS_OBJECT_SUBCLASS_DECL(name, ctype) \
 		struct name##_s; \
-		OS_OBJECT_EXTRA_VTABLE_DECL(name, super) \
-		extern const struct super##_vtable_s OS_OBJECT_CLASS_SYMBOL(name) \
+		OS_OBJECT_EXTRA_VTABLE_DECL(name, ctype) \
+		extern const struct ctype##_vtable_s OS_OBJECT_CLASS_SYMBOL(name) \
 				__asm__(OS_OBJC_CLASS_RAW_SYMBOL_NAME(OS_OBJECT_CLASS(name)))
 
-#define DISPATCH_SUBCLASS_DECL(name, super) \
-		OS_OBJECT_SUBCLASS_DECL(dispatch_##name, super)
-
 #if OS_OBJECT_SWIFT3
 // define a new internal subclass used in a class cluster
-#define OS_OBJECT_INTERNAL_SUBCLASS_DECL(name, super) \
+#define OS_OBJECT_INTERNAL_SUBCLASS_DECL(name, super, ctype) \
 		_OS_OBJECT_DECL_PROTOCOL(name, super); \
-		OS_OBJECT_SUBCLASS_DECL(name, super)
-
-#define DISPATCH_INTERNAL_SUBCLASS_DECL(name, super) \
-		_OS_OBJECT_DECL_PROTOCOL(dispatch_##name, dispatch_##super) \
-		DISPATCH_SUBCLASS_DECL(name, dispatch_##super)
+		_OS_OBJECT_DECL_SUBCLASS_INTERFACE(name, super) \
+		OS_OBJECT_SUBCLASS_DECL(name, ctype)
 #else
 // define a new internal subclass used in a class cluster
-#define OS_OBJECT_INTERNAL_SUBCLASS_DECL(name, super) \
-		OS_OBJECT_DECL_SUBCLASS(name, super); \
-		OS_OBJECT_SUBCLASS_DECL(name, super)
-
-#define DISPATCH_INTERNAL_SUBCLASS_DECL(name, super) \
-		OS_OBJECT_DECL_SUBCLASS(dispatch_##name, dispatch_##super); \
-		DISPATCH_SUBCLASS_DECL(name, dispatch_##super)
+#define OS_OBJECT_INTERNAL_SUBCLASS_DECL(name, super, ctype) \
+		OS_OBJECT_DECL_SUBCLASS(name, ctype); \
+		_OS_OBJECT_DECL_SUBCLASS_INTERFACE(name, super) \
+		OS_OBJECT_SUBCLASS_DECL(name, ctype)
 #endif
 
+#define DISPATCH_INTERNAL_SUBCLASS_DECL(name, super, ctype) \
+		OS_OBJECT_INTERNAL_SUBCLASS_DECL(dispatch_##name, dispatch_##super, \
+				dispatch_##ctype)
+
 // vtable symbols
 #define OS_OBJECT_VTABLE(name)		(&OS_OBJECT_CLASS_SYMBOL(name))
 #define DISPATCH_OBJC_CLASS(name)	(&DISPATCH_CLASS_SYMBOL(name))
@@ -135,39 +134,35 @@
 // ObjC classes and dispatch vtables are co-located via linker order and alias
 // files rdar://10640168
 #if OS_OBJECT_HAVE_OBJC2
-#define OS_OBJECT_VTABLE_SUBCLASS_INSTANCE(name, super, xdispose, dispose, ...) \
+#define OS_OBJECT_VTABLE_SUBCLASS_INSTANCE(name, ctype, xdispose, dispose, ...) \
 		__attribute__((section("__DATA,__objc_data"), used)) \
-		const struct super##_extra_vtable_s \
+		const struct ctype##_extra_vtable_s \
 		OS_OBJECT_EXTRA_VTABLE_SYMBOL(name) = { __VA_ARGS__ }
-#define OS_OBJECT_EXTRA_VTABLE_DECL(name, super)
+#define OS_OBJECT_EXTRA_VTABLE_DECL(name, ctype)
 #define DISPATCH_VTABLE(name) DISPATCH_OBJC_CLASS(name)
 #else
-#define OS_OBJECT_VTABLE_SUBCLASS_INSTANCE(name, super, xdispose, dispose, ...) \
-		const struct super##_vtable_s \
+#define OS_OBJECT_VTABLE_SUBCLASS_INSTANCE(name, ctype, xdispose, dispose, ...) \
+		const struct ctype##_vtable_s \
 		OS_OBJECT_EXTRA_VTABLE_SYMBOL(name) = { \
 			._os_obj_objc_isa = &OS_OBJECT_CLASS_SYMBOL(name), \
 			._os_obj_vtable = { __VA_ARGS__ }, \
 		}
-#define OS_OBJECT_EXTRA_VTABLE_DECL(name, super) \
-		extern const struct super##_vtable_s \
+#define OS_OBJECT_EXTRA_VTABLE_DECL(name, ctype) \
+		extern const struct ctype##_vtable_s \
 				OS_OBJECT_EXTRA_VTABLE_SYMBOL(name);
 #define DISPATCH_VTABLE(name) &OS_OBJECT_EXTRA_VTABLE_SYMBOL(dispatch_##name)
-#endif
+#endif // OS_OBJECT_HAVE_OBJC2
 #else
-#define OS_OBJECT_VTABLE_SUBCLASS_INSTANCE(name, super, xdispose, dispose, ...) \
-		const struct super##_vtable_s OS_OBJECT_CLASS_SYMBOL(name) = { \
+#define OS_OBJECT_VTABLE_SUBCLASS_INSTANCE(name, ctype, xdispose, dispose, ...) \
+		const struct ctype##_vtable_s OS_OBJECT_CLASS_SYMBOL(name) = { \
 			._os_obj_xref_dispose = xdispose, \
 			._os_obj_dispose = dispose, \
 			._os_obj_vtable = { __VA_ARGS__ }, \
 		}
-#define OS_OBJECT_EXTRA_VTABLE_DECL(name, super)
+#define OS_OBJECT_EXTRA_VTABLE_DECL(name, ctype)
 #define DISPATCH_VTABLE(name) DISPATCH_OBJC_CLASS(name)
 #endif // USE_OBJC
 
-#define DISPATCH_VTABLE_SUBCLASS_INSTANCE(name, super, ...) \
-		OS_OBJECT_VTABLE_SUBCLASS_INSTANCE(dispatch_##name, dispatch_##super, \
-				_dispatch_xref_dispose, _dispatch_dispose, __VA_ARGS__)
-
 // vtables for proper classes
 #define OS_OBJECT_VTABLE_INSTANCE(name, xdispose, dispose, ...) \
 		OS_OBJECT_VTABLE_SUBCLASS_INSTANCE(name, name, \
@@ -176,39 +171,50 @@
 #define DISPATCH_VTABLE_INSTANCE(name, ...) \
 		DISPATCH_VTABLE_SUBCLASS_INSTANCE(name, name, __VA_ARGS__)
 
-#define DISPATCH_INVOKABLE_VTABLE_HEADER(x) \
-	unsigned long const do_type; \
-	const char *const do_kind; \
-	void (*const do_invoke)(struct x##_s *, dispatch_invoke_context_t, \
-			dispatch_invoke_flags_t); \
-	void (*const do_push)(struct x##_s *, dispatch_object_t, \
-			dispatch_qos_t)
-
-#define DISPATCH_QUEUEABLE_VTABLE_HEADER(x) \
-	DISPATCH_INVOKABLE_VTABLE_HEADER(x); \
-	void (*const do_wakeup)(struct x##_s *, \
-			dispatch_qos_t, dispatch_wakeup_flags_t); \
-	void (*const do_dispose)(struct x##_s *, bool *allow_free)
+#if USE_OBJC
+#define DISPATCH_VTABLE_SUBCLASS_INSTANCE(name, ctype, ...) \
+		OS_OBJECT_VTABLE_SUBCLASS_INSTANCE(dispatch_##name, dispatch_##ctype, \
+				_dispatch_xref_dispose, _dispatch_dispose, __VA_ARGS__)
 
 #define DISPATCH_OBJECT_VTABLE_HEADER(x) \
-	DISPATCH_QUEUEABLE_VTABLE_HEADER(x); \
-	void (*const do_set_targetq)(struct x##_s *, dispatch_queue_t); \
-	void (*const do_suspend)(struct x##_s *); \
-	void (*const do_resume)(struct x##_s *, bool activate); \
-	void (*const do_finalize_activation)(struct x##_s *, bool *allow_resume); \
-	size_t (*const do_debug)(struct x##_s *, char *, size_t)
+	unsigned long const do_type; \
+	void (*const do_dispose)(struct x##_s *, bool *allow_free); \
+	size_t (*const do_debug)(struct x##_s *, char *, size_t); \
+	void (*const do_invoke)(struct x##_s *, dispatch_invoke_context_t, \
+			dispatch_invoke_flags_t)
+#else
+#define DISPATCH_VTABLE_SUBCLASS_INSTANCE(name, ctype, ...) \
+		OS_OBJECT_VTABLE_SUBCLASS_INSTANCE(dispatch_##name, dispatch_##ctype, \
+				_dispatch_xref_dispose, _dispatch_dispose, \
+				.do_kind = #name, __VA_ARGS__)
+
+#define DISPATCH_OBJECT_VTABLE_HEADER(x) \
+	unsigned long const do_type; \
+	const char *const do_kind; \
+	void (*const do_dispose)(struct x##_s *, bool *allow_free); \
+	size_t (*const do_debug)(struct x##_s *, char *, size_t); \
+	void (*const do_invoke)(struct x##_s *, dispatch_invoke_context_t, \
+			dispatch_invoke_flags_t)
+#endif
+
+#define DISPATCH_QUEUE_VTABLE_HEADER(x); \
+	DISPATCH_OBJECT_VTABLE_HEADER(x); \
+	void (*const dq_activate)(dispatch_queue_class_t, bool *allow_resume); \
+	void (*const dq_wakeup)(dispatch_queue_class_t, dispatch_qos_t, \
+			dispatch_wakeup_flags_t); \
+	void (*const dq_push)(dispatch_queue_class_t, dispatch_object_t, \
+			dispatch_qos_t)
 
 #define dx_vtable(x) (&(x)->do_vtable->_os_obj_vtable)
 #define dx_type(x) dx_vtable(x)->do_type
-#define dx_subtype(x) (dx_vtable(x)->do_type & _DISPATCH_SUB_TYPE_MASK)
 #define dx_metatype(x) (dx_vtable(x)->do_type & _DISPATCH_META_TYPE_MASK)
+#define dx_cluster(x) (dx_vtable(x)->do_type & _DISPATCH_TYPE_CLUSTER_MASK)
 #define dx_hastypeflag(x, f) (dx_vtable(x)->do_type & _DISPATCH_##f##_TYPEFLAG)
-#define dx_kind(x) dx_vtable(x)->do_kind
 #define dx_debug(x, y, z) dx_vtable(x)->do_debug((x), (y), (z))
 #define dx_dispose(x, y) dx_vtable(x)->do_dispose(x, y)
 #define dx_invoke(x, y, z) dx_vtable(x)->do_invoke(x, y, z)
-#define dx_push(x, y, z) dx_vtable(x)->do_push(x, y, z)
-#define dx_wakeup(x, y, z) dx_vtable(x)->do_wakeup(x, y, z)
+#define dx_push(x, y, z) dx_vtable(x)->dq_push(x, y, z)
+#define dx_wakeup(x, y, z) dx_vtable(x)->dq_wakeup(x, y, z)
 
 #define DISPATCH_OBJECT_GLOBAL_REFCNT		_OS_OBJECT_GLOBAL_REFCNT
 
@@ -253,19 +259,23 @@
 
 	// This wakeup is caused by a dispatch_block_wait()
 	DISPATCH_WAKEUP_BLOCK_WAIT              = 0x00000008,
+
+	// This wakeup may cause the source to leave its DSF_NEEDS_EVENT state
+	DISPATCH_WAKEUP_EVENT                   = 0x00000010,
 );
 
 typedef struct dispatch_invoke_context_s {
-	struct dispatch_object_s *dic_deferred;
-#if HAVE_PTHREAD_WORKQUEUE_NARROWING
+#if DISPATCH_USE_WORKQUEUE_NARROWING
 	uint64_t dic_next_narrow_check;
 #endif
+	struct dispatch_object_s *dic_barrier_waiter;
+	dispatch_qos_t dic_barrier_waiter_bucket;
 #if DISPATCH_COCOA_COMPAT
 	void *dic_autorelease_pool;
 #endif
 } dispatch_invoke_context_s, *dispatch_invoke_context_t;
 
-#if HAVE_PTHREAD_WORKQUEUE_NARROWING
+#if DISPATCH_USE_WORKQUEUE_NARROWING
 #define DISPATCH_THREAD_IS_NARROWING 1
 
 #define dispatch_with_disabled_narrowing(dic, ...) ({ \
@@ -322,10 +332,19 @@
 	// @const DISPATCH_INVOKE_MANAGER_DRAIN
 	// We're draining from a manager context
 	//
+	// @const DISPATCH_INVOKE_THREAD_BOUND
+	// We're draining from the context of a thread-bound queue (main thread)
+	//
+	// @const DISPATCH_INVOKE_WORKER_DRAIN
+	// The queue at the bottom of this drain is a workloop that supports
+	// reordering.
+	//
 	DISPATCH_INVOKE_WORKER_DRAIN			= 0x00010000,
 	DISPATCH_INVOKE_REDIRECTING_DRAIN		= 0x00020000,
 	DISPATCH_INVOKE_MANAGER_DRAIN			= 0x00040000,
-#define _DISPATCH_INVOKE_DRAIN_MODE_MASK	  0x000f0000u
+	DISPATCH_INVOKE_THREAD_BOUND			= 0x00080000,
+	DISPATCH_INVOKE_WORKLOOP_DRAIN			= 0x00100000,
+#define _DISPATCH_INVOKE_DRAIN_MODE_MASK	  0x00ff0000u
 
 	// Autoreleasing modes
 	//
@@ -335,57 +354,72 @@
 	// @const DISPATCH_INVOKE_AUTORELEASE_NEVER
 	// Never use autoreleasepools around callouts
 	//
-	DISPATCH_INVOKE_AUTORELEASE_ALWAYS		= 0x00100000,
-	DISPATCH_INVOKE_AUTORELEASE_NEVER		= 0x00200000,
-#define _DISPATCH_INVOKE_AUTORELEASE_MASK	  0x00300000u
+	DISPATCH_INVOKE_AUTORELEASE_ALWAYS		= 0x01000000,
+	DISPATCH_INVOKE_AUTORELEASE_NEVER		= 0x02000000,
+#define _DISPATCH_INVOKE_AUTORELEASE_MASK	  0x03000000u
 );
 
 DISPATCH_ENUM(dispatch_object_flags, unsigned long,
-	_DISPATCH_META_TYPE_MASK		= 0xffff0000, // mask for object meta-types
-	_DISPATCH_TYPEFLAGS_MASK		= 0x0000ff00, // mask for object typeflags
-	_DISPATCH_SUB_TYPE_MASK			= 0x000000ff, // mask for object sub-types
+	_DISPATCH_META_TYPE_MASK		= 0x000000ff, // mask for object meta-types
+	_DISPATCH_TYPE_CLUSTER_MASK		= 0x000000f0, // mask for the cluster type
+	_DISPATCH_SUB_TYPE_MASK			= 0x0000ff00, // mask for object sub-types
+	_DISPATCH_TYPEFLAGS_MASK		= 0x00ff0000, // mask for object typeflags
 
-	_DISPATCH_CONTINUATION_TYPE		=    0x00000, // meta-type for continuations
-	_DISPATCH_QUEUE_TYPE			=    0x10000, // meta-type for queues
-	_DISPATCH_SOURCE_TYPE			=    0x20000, // meta-type for sources
-	_DISPATCH_SEMAPHORE_TYPE		=    0x30000, // meta-type for semaphores
-	_DISPATCH_NODE_TYPE				=    0x40000, // meta-type for data node
-	_DISPATCH_IO_TYPE				=    0x50000, // meta-type for io channels
-	_DISPATCH_OPERATION_TYPE		=    0x60000, // meta-type for io operations
-	_DISPATCH_DISK_TYPE				=    0x70000, // meta-type for io disks
+	_DISPATCH_OBJECT_CLUSTER        = 0x00000000, // dispatch object cluster
+	_DISPATCH_CONTINUATION_TYPE		= 0x00000000, // meta-type for continuations
+	_DISPATCH_SEMAPHORE_TYPE		= 0x00000001, // meta-type for semaphores
+	_DISPATCH_NODE_TYPE				= 0x00000002, // meta-type for data node
+	_DISPATCH_IO_TYPE				= 0x00000003, // meta-type for io channels
+	_DISPATCH_OPERATION_TYPE		= 0x00000004, // meta-type for io operations
+	_DISPATCH_DISK_TYPE				= 0x00000005, // meta-type for io disks
 
-	_DISPATCH_QUEUE_ROOT_TYPEFLAG	=     0x0100, // bit set for any root queues
-	_DISPATCH_QUEUE_BASE_TYPEFLAG	=     0x0200, // base of a hierarchy
-	                                              // targets a root queue
+	_DISPATCH_QUEUE_CLUSTER         = 0x00000010, // dispatch queue cluster
+	_DISPATCH_LANE_TYPE				= 0x00000011, // meta-type for lanes
+	_DISPATCH_WORKLOOP_TYPE			= 0x00000012, // meta-type for workloops
+	_DISPATCH_SOURCE_TYPE			= 0x00000013, // meta-type for sources
 
-#define DISPATCH_CONTINUATION_TYPE(name)  \
-		(_DISPATCH_CONTINUATION_TYPE | DC_##name##_TYPE)
-	DISPATCH_DATA_TYPE					= 1 | _DISPATCH_NODE_TYPE,
-	DISPATCH_MACH_MSG_TYPE				= 2 | _DISPATCH_NODE_TYPE,
-	DISPATCH_QUEUE_ATTR_TYPE			= 3 | _DISPATCH_NODE_TYPE,
+	// QUEUE_ROOT is set on root queues (queues with a NULL do_targetq)
+	// QUEUE_BASE is set on hierarchy bases, these always target a root queue
+	// NO_CONTEXT is set on types not supporting dispatch_{get,set}_context
+	_DISPATCH_QUEUE_ROOT_TYPEFLAG	= 0x00010000,
+	_DISPATCH_QUEUE_BASE_TYPEFLAG	= 0x00020000,
+	_DISPATCH_NO_CONTEXT_TYPEFLAG	= 0x00040000,
 
-	DISPATCH_IO_TYPE					= 0 | _DISPATCH_IO_TYPE,
-	DISPATCH_OPERATION_TYPE				= 0 | _DISPATCH_OPERATION_TYPE,
-	DISPATCH_DISK_TYPE					= 0 | _DISPATCH_DISK_TYPE,
+#define DISPATCH_OBJECT_SUBTYPE(ty, base) (_DISPATCH_##base##_TYPE | (ty) << 8)
+#define DISPATCH_CONTINUATION_TYPE(name) \
+		DISPATCH_OBJECT_SUBTYPE(DC_##name##_TYPE, CONTINUATION)
 
-	DISPATCH_QUEUE_LEGACY_TYPE			= 1 | _DISPATCH_QUEUE_TYPE,
-	DISPATCH_QUEUE_SERIAL_TYPE			= 2 | _DISPATCH_QUEUE_TYPE,
-	DISPATCH_QUEUE_CONCURRENT_TYPE		= 3 | _DISPATCH_QUEUE_TYPE,
-	DISPATCH_QUEUE_GLOBAL_ROOT_TYPE		= 4 | _DISPATCH_QUEUE_TYPE |
-			_DISPATCH_QUEUE_ROOT_TYPEFLAG,
-	DISPATCH_QUEUE_NETWORK_EVENT_TYPE	= 5 | _DISPATCH_QUEUE_TYPE |
+	DISPATCH_SEMAPHORE_TYPE				= DISPATCH_OBJECT_SUBTYPE(1, SEMAPHORE),
+	DISPATCH_GROUP_TYPE					= DISPATCH_OBJECT_SUBTYPE(2, SEMAPHORE),
+
+	DISPATCH_DATA_TYPE					= DISPATCH_OBJECT_SUBTYPE(1, NODE),
+	DISPATCH_MACH_MSG_TYPE				= DISPATCH_OBJECT_SUBTYPE(2, NODE),
+	DISPATCH_QUEUE_ATTR_TYPE			= DISPATCH_OBJECT_SUBTYPE(3, NODE),
+
+	DISPATCH_IO_TYPE					= DISPATCH_OBJECT_SUBTYPE(0, IO),
+	DISPATCH_OPERATION_TYPE				= DISPATCH_OBJECT_SUBTYPE(0, OPERATION),
+	DISPATCH_DISK_TYPE					= DISPATCH_OBJECT_SUBTYPE(0, DISK),
+
+	DISPATCH_QUEUE_SERIAL_TYPE			= DISPATCH_OBJECT_SUBTYPE(1, LANE),
+	DISPATCH_QUEUE_CONCURRENT_TYPE		= DISPATCH_OBJECT_SUBTYPE(2, LANE),
+	DISPATCH_QUEUE_GLOBAL_ROOT_TYPE		= DISPATCH_OBJECT_SUBTYPE(3, LANE) |
+			_DISPATCH_QUEUE_ROOT_TYPEFLAG | _DISPATCH_NO_CONTEXT_TYPEFLAG,
+	DISPATCH_QUEUE_PTHREAD_ROOT_TYPE	= DISPATCH_OBJECT_SUBTYPE(4, LANE) |
+			_DISPATCH_QUEUE_ROOT_TYPEFLAG | _DISPATCH_NO_CONTEXT_TYPEFLAG,
+	DISPATCH_QUEUE_MGR_TYPE				= DISPATCH_OBJECT_SUBTYPE(5, LANE) |
+			_DISPATCH_QUEUE_BASE_TYPEFLAG | _DISPATCH_NO_CONTEXT_TYPEFLAG,
+	DISPATCH_QUEUE_MAIN_TYPE			= DISPATCH_OBJECT_SUBTYPE(6, LANE) |
+			_DISPATCH_QUEUE_BASE_TYPEFLAG | _DISPATCH_NO_CONTEXT_TYPEFLAG,
+	DISPATCH_QUEUE_RUNLOOP_TYPE			= DISPATCH_OBJECT_SUBTYPE(7, LANE) |
+			_DISPATCH_QUEUE_BASE_TYPEFLAG | _DISPATCH_NO_CONTEXT_TYPEFLAG,
+	DISPATCH_QUEUE_NETWORK_EVENT_TYPE	= DISPATCH_OBJECT_SUBTYPE(8, LANE) |
 			_DISPATCH_QUEUE_BASE_TYPEFLAG,
-	DISPATCH_QUEUE_RUNLOOP_TYPE			= 6 | _DISPATCH_QUEUE_TYPE |
-			_DISPATCH_QUEUE_BASE_TYPEFLAG,
-	DISPATCH_QUEUE_MGR_TYPE				= 7 | _DISPATCH_QUEUE_TYPE |
-			_DISPATCH_QUEUE_BASE_TYPEFLAG,
-	DISPATCH_QUEUE_SPECIFIC_TYPE		= 8 | _DISPATCH_QUEUE_TYPE,
 
-	DISPATCH_SEMAPHORE_TYPE				= 1 | _DISPATCH_SEMAPHORE_TYPE,
-	DISPATCH_GROUP_TYPE					= 2 | _DISPATCH_SEMAPHORE_TYPE,
+	DISPATCH_WORKLOOP_TYPE				= DISPATCH_OBJECT_SUBTYPE(0, WORKLOOP) |
+			_DISPATCH_QUEUE_BASE_TYPEFLAG,
 
-	DISPATCH_SOURCE_KEVENT_TYPE			= 1 | _DISPATCH_SOURCE_TYPE,
-	DISPATCH_MACH_CHANNEL_TYPE			= 2 | _DISPATCH_SOURCE_TYPE,
+	DISPATCH_SOURCE_KEVENT_TYPE			= DISPATCH_OBJECT_SUBTYPE(1, SOURCE),
+	DISPATCH_MACH_CHANNEL_TYPE			= DISPATCH_OBJECT_SUBTYPE(2, SOURCE),
 );
 
 typedef struct _os_object_vtable_s {
@@ -434,54 +468,13 @@
 	}
 
 _OS_OBJECT_DECL_PROTOCOL(dispatch_object, object);
-
-OS_OBJECT_CLASS_DECL(dispatch_object, object,
-		DISPATCH_OBJECT_VTABLE_HEADER(dispatch_object));
+DISPATCH_CLASS_DECL_BARE(object, OBJECT);
 
 struct dispatch_object_s {
 	_DISPATCH_OBJECT_HEADER(object);
 };
 
-#if OS_OBJECT_HAVE_OBJC1
-#define _OS_MPSC_QUEUE_FIELDS(ns, __state_field__) \
-	DISPATCH_UNION_LE(uint64_t volatile __state_field__, \
-			dispatch_lock __state_field__##_lock, \
-			uint32_t __state_field__##_bits \
-	) DISPATCH_ATOMIC64_ALIGN; \
-	struct dispatch_object_s *volatile ns##_items_head; \
-	unsigned long ns##_serialnum; \
-	const char *ns##_label; \
-	struct dispatch_object_s *volatile ns##_items_tail; \
-	dispatch_priority_t ns##_priority; \
-	int volatile ns##_sref_cnt
-#else
-#define _OS_MPSC_QUEUE_FIELDS(ns, __state_field__) \
-	struct dispatch_object_s *volatile ns##_items_head; \
-	DISPATCH_UNION_LE(uint64_t volatile __state_field__, \
-			dispatch_lock __state_field__##_lock, \
-			uint32_t __state_field__##_bits \
-	) DISPATCH_ATOMIC64_ALIGN; \
-	/* LP64 global queue cacheline boundary */ \
-	unsigned long ns##_serialnum; \
-	const char *ns##_label; \
-	struct dispatch_object_s *volatile ns##_items_tail; \
-	dispatch_priority_t ns##_priority; \
-	int volatile ns##_sref_cnt
-#endif
-
-OS_OBJECT_INTERNAL_CLASS_DECL(os_mpsc_queue, object,
-		DISPATCH_QUEUEABLE_VTABLE_HEADER(os_mpsc_queue));
-
-struct os_mpsc_queue_s {
-	struct _os_object_s _as_os_obj[0];
-	OS_OBJECT_STRUCT_HEADER(os_mpsc_queue);
-	struct dispatch_object_s *volatile oq_next;
-	void *oq_opaque1; // do_targetq
-	void *oq_opaque2; // do_ctxt
-	void *oq_opaque3; // do_finalizer
-	_OS_MPSC_QUEUE_FIELDS(oq, __oq_state_do_not_use);
-};
-
+DISPATCH_COLD
 size_t _dispatch_object_debug_attr(dispatch_object_t dou, char* buf,
 		size_t bufsiz);
 void *_dispatch_object_alloc(const void *vtable, size_t size);
@@ -535,10 +528,10 @@
 // This is required by the dispatch_data_t/NSData bridging, which is not
 // supported on the old runtime.
 #define DISPATCH_OBJECT_TFB(f, o, ...) \
-	if (slowpath((uintptr_t)((o)._os_obj->os_obj_isa) & 1) || \
-			slowpath((Class)((o)._os_obj->os_obj_isa) < \
-					(Class)OS_OBJECT_VTABLE(dispatch_object)) || \
-			slowpath((Class)((o)._os_obj->os_obj_isa) >= \
+	if (unlikely(((uintptr_t)((o)._os_obj->os_obj_isa) & 1) || \
+			(Class)((o)._os_obj->os_obj_isa) < \
+					(Class)OS_OBJECT_VTABLE(dispatch_object) || \
+			(Class)((o)._os_obj->os_obj_isa) >= \
 					(Class)OS_OBJECT_VTABLE(object))) { \
 		return f((o), ##__VA_ARGS__); \
 	}
@@ -555,6 +548,7 @@
 void _dispatch_objc_suspend(dispatch_object_t dou);
 void _dispatch_objc_resume(dispatch_object_t dou);
 void _dispatch_objc_activate(dispatch_object_t dou);
+DISPATCH_COLD
 size_t _dispatch_objc_debug(dispatch_object_t dou, char* buf, size_t bufsiz);
 
 #if __OBJC2__
@@ -591,14 +585,14 @@
 #define _os_atomic_refcnt_perform2o(o, f, op, n, m)   ({ \
 		__typeof__(o) _o = (o); \
 		int _ref_cnt = _o->f; \
-		if (fastpath(_ref_cnt != _OS_OBJECT_GLOBAL_REFCNT)) { \
+		if (likely(_ref_cnt != _OS_OBJECT_GLOBAL_REFCNT)) { \
 			_ref_cnt = os_atomic_##op##2o(_o, f, n, m); \
 		} \
 		_ref_cnt; \
 	})
 
-#define _os_atomic_refcnt_add2o(o, m, n) \
-		_os_atomic_refcnt_perform2o(o, m, add, n, relaxed)
+#define _os_atomic_refcnt_add_orig2o(o, m, n) \
+		_os_atomic_refcnt_perform2o(o, m, add_orig, n, relaxed)
 
 #define _os_atomic_refcnt_sub2o(o, m, n) \
 		_os_atomic_refcnt_perform2o(o, m, sub, n, release)
@@ -610,9 +604,9 @@
 /*
  * Higher level _os_object_{x,}refcnt_* actions
  *
- * _os_atomic_{x,}refcnt_inc(o):
+ * _os_atomic_{x,}refcnt_inc_orig(o):
  *   increment the external (resp. internal) refcount and
- *   returns the new refcount value
+ *   returns the old refcount value
  *
  * _os_atomic_{x,}refcnt_dec(o):
  *   decrement the external (resp. internal) refcount and
@@ -623,8 +617,8 @@
  *   (resp. internal) refcount
  *
  */
-#define _os_object_xrefcnt_inc(o) \
-		_os_atomic_refcnt_add2o(o, os_obj_xref_cnt, 1)
+#define _os_object_xrefcnt_inc_orig(o) \
+		_os_atomic_refcnt_add_orig2o(o, os_obj_xref_cnt, 1)
 
 #define _os_object_xrefcnt_dec(o) \
 		_os_atomic_refcnt_sub2o(o, os_obj_xref_cnt, 1)
@@ -632,8 +626,8 @@
 #define _os_object_xrefcnt_dispose_barrier(o) \
 		_os_atomic_refcnt_dispose_barrier2o(o, os_obj_xref_cnt)
 
-#define _os_object_refcnt_add(o, n) \
-		_os_atomic_refcnt_add2o(o, os_obj_ref_cnt, n)
+#define _os_object_refcnt_add_orig(o, n) \
+		_os_atomic_refcnt_add_orig2o(o, os_obj_ref_cnt, n)
 
 #define _os_object_refcnt_sub(o, n) \
 		_os_atomic_refcnt_sub2o(o, os_obj_ref_cnt, n)
diff --git a/src/once.c b/src/once.c
index c01538c..86a74ff 100644
--- a/src/once.c
+++ b/src/once.c
@@ -24,14 +24,6 @@
 #undef dispatch_once_f
 
 
-typedef struct _dispatch_once_waiter_s {
-	volatile struct _dispatch_once_waiter_s *volatile dow_next;
-	dispatch_thread_event_s dow_event;
-	mach_port_t dow_thread;
-} *_dispatch_once_waiter_t;
-
-#define DISPATCH_ONCE_DONE ((_dispatch_once_waiter_t)~0l)
-
 #ifdef __BLOCKS__
 void
 dispatch_once(dispatch_once_t *val, dispatch_block_t block)
@@ -46,70 +38,34 @@
 #define DISPATCH_ONCE_SLOW_INLINE DISPATCH_NOINLINE
 #endif // DISPATCH_ONCE_INLINE_FASTPATH
 
-DISPATCH_ONCE_SLOW_INLINE
+DISPATCH_NOINLINE
 static void
-dispatch_once_f_slow(dispatch_once_t *val, void *ctxt, dispatch_function_t func)
+_dispatch_once_callout(dispatch_once_gate_t l, void *ctxt,
+		dispatch_function_t func)
 {
-#if DISPATCH_GATE_USE_FOR_DISPATCH_ONCE
-	dispatch_once_gate_t l = (dispatch_once_gate_t)val;
-
-	if (_dispatch_once_gate_tryenter(l)) {
-		_dispatch_client_callout(ctxt, func);
-		_dispatch_once_gate_broadcast(l);
-	} else {
-		_dispatch_once_gate_wait(l);
-	}
-#else
-	_dispatch_once_waiter_t volatile *vval = (_dispatch_once_waiter_t*)val;
-	struct _dispatch_once_waiter_s dow = { };
-	_dispatch_once_waiter_t tail = &dow, next, tmp;
-	dispatch_thread_event_t event;
-
-	if (os_atomic_cmpxchg(vval, NULL, tail, acquire)) {
-		dow.dow_thread = _dispatch_tid_self();
-		_dispatch_client_callout(ctxt, func);
-
-		next = (_dispatch_once_waiter_t)_dispatch_once_xchg_done(val);
-		while (next != tail) {
-			tmp = (_dispatch_once_waiter_t)_dispatch_wait_until(next->dow_next);
-			event = &next->dow_event;
-			next = tmp;
-			_dispatch_thread_event_signal(event);
-		}
-	} else {
-		_dispatch_thread_event_init(&dow.dow_event);
-		next = *vval;
-		for (;;) {
-			if (next == DISPATCH_ONCE_DONE) {
-				break;
-			}
-			if (os_atomic_cmpxchgv(vval, next, tail, &next, release)) {
-				dow.dow_thread = next->dow_thread;
-				dow.dow_next = next;
-				if (dow.dow_thread) {
-					pthread_priority_t pp = _dispatch_get_priority();
-					_dispatch_thread_override_start(dow.dow_thread, pp, val);
-				}
-				_dispatch_thread_event_wait(&dow.dow_event);
-				if (dow.dow_thread) {
-					_dispatch_thread_override_end(dow.dow_thread, val);
-				}
-				break;
-			}
-		}
-		_dispatch_thread_event_destroy(&dow.dow_event);
-	}
-#endif
+	_dispatch_client_callout(ctxt, func);
+	_dispatch_once_gate_broadcast(l);
 }
 
 DISPATCH_NOINLINE
 void
 dispatch_once_f(dispatch_once_t *val, void *ctxt, dispatch_function_t func)
 {
-#if !DISPATCH_ONCE_INLINE_FASTPATH
-	if (likely(os_atomic_load(val, acquire) == DLOCK_ONCE_DONE)) {
+	dispatch_once_gate_t l = (dispatch_once_gate_t)val;
+
+#if !DISPATCH_ONCE_INLINE_FASTPATH || DISPATCH_ONCE_USE_QUIESCENT_COUNTER
+	uintptr_t v = os_atomic_load(&l->dgo_once, acquire);
+	if (likely(v == DLOCK_ONCE_DONE)) {
 		return;
 	}
-#endif // !DISPATCH_ONCE_INLINE_FASTPATH
-	return dispatch_once_f_slow(val, ctxt, func);
+#if DISPATCH_ONCE_USE_QUIESCENT_COUNTER
+	if (likely(DISPATCH_ONCE_IS_GEN(v))) {
+		return _dispatch_once_mark_done_if_quiesced(l, v);
+	}
+#endif
+#endif
+	if (_dispatch_once_gate_tryenter(l)) {
+		return _dispatch_once_callout(l, ctxt, func);
+	}
+	return _dispatch_once_wait(l);
 }
diff --git a/src/queue.c b/src/queue.c
index adb1e1c..44cb655 100644
--- a/src/queue.c
+++ b/src/queue.c
@@ -23,530 +23,22 @@
 #include "protocol.h" // _dispatch_send_wakeup_runloop_thread
 #endif
 
-#if HAVE_PTHREAD_WORKQUEUES || DISPATCH_USE_INTERNAL_WORKQUEUE
-#define DISPATCH_USE_WORKQUEUES 1
-#endif
-#if (!HAVE_PTHREAD_WORKQUEUES || DISPATCH_DEBUG) && \
-		!defined(DISPATCH_ENABLE_THREAD_POOL)
-#define DISPATCH_ENABLE_THREAD_POOL 1
-#endif
-#if DISPATCH_ENABLE_PTHREAD_ROOT_QUEUES || DISPATCH_ENABLE_THREAD_POOL
-#define DISPATCH_USE_PTHREAD_POOL 1
-#endif
-#if HAVE_PTHREAD_WORKQUEUES && (!HAVE_PTHREAD_WORKQUEUE_QOS || \
-		DISPATCH_DEBUG) && !HAVE_PTHREAD_WORKQUEUE_SETDISPATCH_NP && \
-		!defined(DISPATCH_USE_LEGACY_WORKQUEUE_FALLBACK)
-#define DISPATCH_USE_LEGACY_WORKQUEUE_FALLBACK 1
-#endif
-#if HAVE_PTHREAD_WORKQUEUE_SETDISPATCH_NP && (DISPATCH_DEBUG || \
-		(!DISPATCH_USE_KEVENT_WORKQUEUE && !HAVE_PTHREAD_WORKQUEUE_QOS)) && \
-		!defined(DISPATCH_USE_PTHREAD_WORKQUEUE_SETDISPATCH_NP)
-#define DISPATCH_USE_PTHREAD_WORKQUEUE_SETDISPATCH_NP 1
-#endif
-#if DISPATCH_USE_PTHREAD_WORKQUEUE_SETDISPATCH_NP || \
-		DISPATCH_USE_LEGACY_WORKQUEUE_FALLBACK || \
-		DISPATCH_USE_INTERNAL_WORKQUEUE
-#if !DISPATCH_USE_INTERNAL_WORKQUEUE
-#define DISPATCH_USE_WORKQ_PRIORITY 1
-#endif
-#define DISPATCH_USE_WORKQ_OPTIONS 1
-#endif
-
-#if DISPATCH_USE_WORKQUEUES && DISPATCH_USE_PTHREAD_POOL && \
-		!DISPATCH_USE_LEGACY_WORKQUEUE_FALLBACK
-#define pthread_workqueue_t void*
-#endif
-
-static void _dispatch_sig_thread(void *ctxt);
-static void DISPATCH_TSD_DTOR_CC _dispatch_cache_cleanup(void *value);
-static void _dispatch_async_f2(dispatch_queue_t dq, dispatch_continuation_t dc);
-static void DISPATCH_TSD_DTOR_CC _dispatch_queue_cleanup(void *ctxt);
-static void DISPATCH_TSD_DTOR_CC _dispatch_wlh_cleanup(void *ctxt);
-static void DISPATCH_TSD_DTOR_CC _dispatch_deferred_items_cleanup(void *ctxt);
-static void DISPATCH_TSD_DTOR_CC _dispatch_frame_cleanup(void *ctxt);
-static void DISPATCH_TSD_DTOR_CC _dispatch_context_cleanup(void *ctxt);
-static void _dispatch_queue_barrier_complete(dispatch_queue_t dq,
+static inline void _dispatch_root_queues_init(void);
+static void _dispatch_lane_barrier_complete(dispatch_lane_class_t dqu,
 		dispatch_qos_t qos, dispatch_wakeup_flags_t flags);
-static void _dispatch_queue_non_barrier_complete(dispatch_queue_t dq);
-static void _dispatch_queue_push_sync_waiter(dispatch_queue_t dq,
-		dispatch_sync_context_t dsc, dispatch_qos_t qos);
+static void _dispatch_lane_non_barrier_complete(dispatch_lane_t dq,
+		dispatch_wakeup_flags_t flags);
 #if HAVE_PTHREAD_WORKQUEUE_QOS
-static void _dispatch_root_queue_push_override_stealer(dispatch_queue_t orig_rq,
-		dispatch_queue_t dq, dispatch_qos_t qos);
-static inline void _dispatch_queue_class_wakeup_with_override(dispatch_queue_t,
-		uint64_t dq_state, dispatch_wakeup_flags_t flags);
+static inline void _dispatch_queue_wakeup_with_override(
+		dispatch_queue_class_t dq, uint64_t dq_state,
+		dispatch_wakeup_flags_t flags);
 #endif
-#if HAVE_PTHREAD_WORKQUEUES
-static void _dispatch_worker_thread4(void *context);
-#if HAVE_PTHREAD_WORKQUEUE_QOS
-static void _dispatch_worker_thread3(pthread_priority_t priority);
-#endif
-#if DISPATCH_USE_PTHREAD_WORKQUEUE_SETDISPATCH_NP
-static void _dispatch_worker_thread2(int priority, int options, void *context);
-#endif
-#endif
-#if DISPATCH_USE_PTHREAD_POOL
-static void *_dispatch_worker_thread(void *context);
-#if defined(_WIN32)
-static unsigned WINAPI
-_dispatch_worker_thread_thunk(LPVOID lpParameter);
-#endif
-#endif
-
-#if DISPATCH_COCOA_COMPAT || defined(_WIN32)
-static dispatch_once_t _dispatch_main_q_handle_pred;
-#endif
-#if DISPATCH_COCOA_COMPAT
-static void _dispatch_runloop_queue_poke(dispatch_queue_t dq,
-		dispatch_qos_t qos, dispatch_wakeup_flags_t flags);
-#endif
-#if DISPATCH_COCOA_COMPAT || defined(_WIN32)
-static void _dispatch_runloop_queue_handle_init(void *ctxt);
-static void _dispatch_runloop_queue_handle_dispose(dispatch_queue_t dq);
-#endif
+static void _dispatch_workloop_drain_barrier_waiter(dispatch_workloop_t dwl,
+		struct dispatch_object_s *dc, dispatch_qos_t qos,
+		dispatch_wakeup_flags_t flags, uint64_t owned);
 
 #pragma mark -
-#pragma mark dispatch_root_queue
-
-struct dispatch_pthread_root_queue_context_s {
-#if !defined(_WIN32)
-	pthread_attr_t dpq_thread_attr;
-#endif
-	dispatch_block_t dpq_thread_configure;
-	struct dispatch_semaphore_s dpq_thread_mediator;
-	dispatch_pthread_root_queue_observer_hooks_s dpq_observer_hooks;
-};
-typedef struct dispatch_pthread_root_queue_context_s *
-		dispatch_pthread_root_queue_context_t;
-
-#if DISPATCH_ENABLE_THREAD_POOL
-static struct dispatch_pthread_root_queue_context_s
-		_dispatch_pthread_root_queue_contexts[] = {
-	[DISPATCH_ROOT_QUEUE_IDX_MAINTENANCE_QOS] = {
-		.dpq_thread_mediator = {
-			DISPATCH_GLOBAL_OBJECT_HEADER(semaphore),
-	}},
-	[DISPATCH_ROOT_QUEUE_IDX_MAINTENANCE_QOS_OVERCOMMIT] = {
-		.dpq_thread_mediator = {
-			DISPATCH_GLOBAL_OBJECT_HEADER(semaphore),
-	}},
-	[DISPATCH_ROOT_QUEUE_IDX_BACKGROUND_QOS] = {
-		.dpq_thread_mediator = {
-			DISPATCH_GLOBAL_OBJECT_HEADER(semaphore),
-	}},
-	[DISPATCH_ROOT_QUEUE_IDX_BACKGROUND_QOS_OVERCOMMIT] = {
-		.dpq_thread_mediator = {
-			DISPATCH_GLOBAL_OBJECT_HEADER(semaphore),
-	}},
-	[DISPATCH_ROOT_QUEUE_IDX_UTILITY_QOS] = {
-		.dpq_thread_mediator = {
-			DISPATCH_GLOBAL_OBJECT_HEADER(semaphore),
-	}},
-	[DISPATCH_ROOT_QUEUE_IDX_UTILITY_QOS_OVERCOMMIT] = {
-		.dpq_thread_mediator = {
-			DISPATCH_GLOBAL_OBJECT_HEADER(semaphore),
-	}},
-	[DISPATCH_ROOT_QUEUE_IDX_DEFAULT_QOS] = {
-		.dpq_thread_mediator = {
-			DISPATCH_GLOBAL_OBJECT_HEADER(semaphore),
-	}},
-	[DISPATCH_ROOT_QUEUE_IDX_DEFAULT_QOS_OVERCOMMIT] = {
-		.dpq_thread_mediator = {
-			DISPATCH_GLOBAL_OBJECT_HEADER(semaphore),
-	}},
-	[DISPATCH_ROOT_QUEUE_IDX_USER_INITIATED_QOS] = {
-		.dpq_thread_mediator = {
-			DISPATCH_GLOBAL_OBJECT_HEADER(semaphore),
-	}},
-	[DISPATCH_ROOT_QUEUE_IDX_USER_INITIATED_QOS_OVERCOMMIT] = {
-		.dpq_thread_mediator = {
-			DISPATCH_GLOBAL_OBJECT_HEADER(semaphore),
-	}},
-	[DISPATCH_ROOT_QUEUE_IDX_USER_INTERACTIVE_QOS] = {
-		.dpq_thread_mediator = {
-			DISPATCH_GLOBAL_OBJECT_HEADER(semaphore),
-	}},
-	[DISPATCH_ROOT_QUEUE_IDX_USER_INTERACTIVE_QOS_OVERCOMMIT] = {
-		.dpq_thread_mediator = {
-			DISPATCH_GLOBAL_OBJECT_HEADER(semaphore),
-	}},
-};
-#endif
-
-#ifndef DISPATCH_WORKQ_MAX_PTHREAD_COUNT
-#define DISPATCH_WORKQ_MAX_PTHREAD_COUNT 255
-#endif
-
-struct dispatch_root_queue_context_s {
-	union {
-		struct {
-			int volatile dgq_pending;
-#if DISPATCH_USE_WORKQUEUES
-			qos_class_t dgq_qos;
-#if DISPATCH_USE_WORKQ_PRIORITY
-			int dgq_wq_priority;
-#endif
-#if DISPATCH_USE_WORKQ_OPTIONS
-			int dgq_wq_options;
-#endif
-#if DISPATCH_USE_LEGACY_WORKQUEUE_FALLBACK || DISPATCH_USE_PTHREAD_POOL
-			pthread_workqueue_t dgq_kworkqueue;
-#endif
-#endif // DISPATCH_USE_WORKQUEUES
-#if DISPATCH_USE_PTHREAD_POOL
-			void *dgq_ctxt;
-			int32_t volatile dgq_thread_pool_size;
-#endif
-		};
-		char _dgq_pad[DISPATCH_CACHELINE_SIZE];
-	};
-};
-typedef struct dispatch_root_queue_context_s *dispatch_root_queue_context_t;
-
-#define WORKQ_PRIO_INVALID (-1)
-#ifndef WORKQ_BG_PRIOQUEUE_CONDITIONAL
-#define WORKQ_BG_PRIOQUEUE_CONDITIONAL WORKQ_PRIO_INVALID
-#endif
-#ifndef WORKQ_HIGH_PRIOQUEUE_CONDITIONAL
-#define WORKQ_HIGH_PRIOQUEUE_CONDITIONAL WORKQ_PRIO_INVALID
-#endif
-
-DISPATCH_CACHELINE_ALIGN
-static struct dispatch_root_queue_context_s _dispatch_root_queue_contexts[] = {
-	[DISPATCH_ROOT_QUEUE_IDX_MAINTENANCE_QOS] = {{{
-#if DISPATCH_USE_WORKQUEUES
-		.dgq_qos = QOS_CLASS_MAINTENANCE,
-#if DISPATCH_USE_WORKQ_PRIORITY
-		.dgq_wq_priority = WORKQ_BG_PRIOQUEUE,
-#endif
-#if DISPATCH_USE_WORKQ_OPTIONS
-		.dgq_wq_options = 0,
-#endif
-#endif
-#if DISPATCH_ENABLE_THREAD_POOL
-		.dgq_ctxt = &_dispatch_pthread_root_queue_contexts[
-				DISPATCH_ROOT_QUEUE_IDX_MAINTENANCE_QOS],
-#endif
-	}}},
-	[DISPATCH_ROOT_QUEUE_IDX_MAINTENANCE_QOS_OVERCOMMIT] = {{{
-#if DISPATCH_USE_WORKQUEUES
-		.dgq_qos = QOS_CLASS_MAINTENANCE,
-#if DISPATCH_USE_WORKQ_PRIORITY
-		.dgq_wq_priority = WORKQ_BG_PRIOQUEUE,
-#endif
-#if DISPATCH_USE_WORKQ_OPTIONS
-		.dgq_wq_options = WORKQ_ADDTHREADS_OPTION_OVERCOMMIT,
-#endif
-#endif
-#if DISPATCH_ENABLE_THREAD_POOL
-		.dgq_ctxt = &_dispatch_pthread_root_queue_contexts[
-				DISPATCH_ROOT_QUEUE_IDX_MAINTENANCE_QOS_OVERCOMMIT],
-#endif
-	}}},
-	[DISPATCH_ROOT_QUEUE_IDX_BACKGROUND_QOS] = {{{
-#if DISPATCH_USE_WORKQUEUES
-		.dgq_qos = QOS_CLASS_BACKGROUND,
-#if DISPATCH_USE_WORKQ_PRIORITY
-		.dgq_wq_priority = WORKQ_BG_PRIOQUEUE_CONDITIONAL,
-#endif
-#if DISPATCH_USE_WORKQ_OPTIONS
-		.dgq_wq_options = 0,
-#endif
-#endif
-#if DISPATCH_ENABLE_THREAD_POOL
-		.dgq_ctxt = &_dispatch_pthread_root_queue_contexts[
-				DISPATCH_ROOT_QUEUE_IDX_BACKGROUND_QOS],
-#endif
-	}}},
-	[DISPATCH_ROOT_QUEUE_IDX_BACKGROUND_QOS_OVERCOMMIT] = {{{
-#if DISPATCH_USE_WORKQUEUES
-		.dgq_qos = QOS_CLASS_BACKGROUND,
-#if DISPATCH_USE_WORKQ_PRIORITY
-		.dgq_wq_priority = WORKQ_BG_PRIOQUEUE_CONDITIONAL,
-#endif
-#if DISPATCH_USE_WORKQ_OPTIONS
-		.dgq_wq_options = WORKQ_ADDTHREADS_OPTION_OVERCOMMIT,
-#endif
-#endif
-#if DISPATCH_ENABLE_THREAD_POOL
-		.dgq_ctxt = &_dispatch_pthread_root_queue_contexts[
-				DISPATCH_ROOT_QUEUE_IDX_BACKGROUND_QOS_OVERCOMMIT],
-#endif
-	}}},
-	[DISPATCH_ROOT_QUEUE_IDX_UTILITY_QOS] = {{{
-#if DISPATCH_USE_WORKQUEUES
-		.dgq_qos = QOS_CLASS_UTILITY,
-#if DISPATCH_USE_WORKQ_PRIORITY
-		.dgq_wq_priority = WORKQ_LOW_PRIOQUEUE,
-#endif
-#if DISPATCH_USE_WORKQ_OPTIONS
-		.dgq_wq_options = 0,
-#endif
-#endif
-#if DISPATCH_ENABLE_THREAD_POOL
-		.dgq_ctxt = &_dispatch_pthread_root_queue_contexts[
-				DISPATCH_ROOT_QUEUE_IDX_UTILITY_QOS],
-#endif
-	}}},
-	[DISPATCH_ROOT_QUEUE_IDX_UTILITY_QOS_OVERCOMMIT] = {{{
-#if DISPATCH_USE_WORKQUEUES
-		.dgq_qos = QOS_CLASS_UTILITY,
-#if DISPATCH_USE_WORKQ_PRIORITY
-		.dgq_wq_priority = WORKQ_LOW_PRIOQUEUE,
-#endif
-#if DISPATCH_USE_WORKQ_OPTIONS
-		.dgq_wq_options = WORKQ_ADDTHREADS_OPTION_OVERCOMMIT,
-#endif
-#endif
-#if DISPATCH_ENABLE_THREAD_POOL
-		.dgq_ctxt = &_dispatch_pthread_root_queue_contexts[
-				DISPATCH_ROOT_QUEUE_IDX_UTILITY_QOS_OVERCOMMIT],
-#endif
-	}}},
-	[DISPATCH_ROOT_QUEUE_IDX_DEFAULT_QOS] = {{{
-#if DISPATCH_USE_WORKQUEUES
-		.dgq_qos = QOS_CLASS_DEFAULT,
-#if DISPATCH_USE_WORKQ_PRIORITY
-		.dgq_wq_priority = WORKQ_DEFAULT_PRIOQUEUE,
-#endif
-#if DISPATCH_USE_WORKQ_OPTIONS
-		.dgq_wq_options = 0,
-#endif
-#endif
-#if DISPATCH_ENABLE_THREAD_POOL
-		.dgq_ctxt = &_dispatch_pthread_root_queue_contexts[
-				DISPATCH_ROOT_QUEUE_IDX_DEFAULT_QOS],
-#endif
-	}}},
-	[DISPATCH_ROOT_QUEUE_IDX_DEFAULT_QOS_OVERCOMMIT] = {{{
-#if DISPATCH_USE_WORKQUEUES
-		.dgq_qos = QOS_CLASS_DEFAULT,
-#if DISPATCH_USE_WORKQ_PRIORITY
-		.dgq_wq_priority = WORKQ_DEFAULT_PRIOQUEUE,
-#endif
-#if DISPATCH_USE_WORKQ_OPTIONS
-		.dgq_wq_options = WORKQ_ADDTHREADS_OPTION_OVERCOMMIT,
-#endif
-#endif
-#if DISPATCH_ENABLE_THREAD_POOL
-		.dgq_ctxt = &_dispatch_pthread_root_queue_contexts[
-				DISPATCH_ROOT_QUEUE_IDX_DEFAULT_QOS_OVERCOMMIT],
-#endif
-	}}},
-	[DISPATCH_ROOT_QUEUE_IDX_USER_INITIATED_QOS] = {{{
-#if DISPATCH_USE_WORKQUEUES
-		.dgq_qos = QOS_CLASS_USER_INITIATED,
-#if DISPATCH_USE_WORKQ_PRIORITY
-		.dgq_wq_priority = WORKQ_HIGH_PRIOQUEUE,
-#endif
-#if DISPATCH_USE_WORKQ_OPTIONS
-		.dgq_wq_options = 0,
-#endif
-#endif
-#if DISPATCH_ENABLE_THREAD_POOL
-		.dgq_ctxt = &_dispatch_pthread_root_queue_contexts[
-				DISPATCH_ROOT_QUEUE_IDX_USER_INITIATED_QOS],
-#endif
-	}}},
-	[DISPATCH_ROOT_QUEUE_IDX_USER_INITIATED_QOS_OVERCOMMIT] = {{{
-#if DISPATCH_USE_WORKQUEUES
-		.dgq_qos = QOS_CLASS_USER_INITIATED,
-#if DISPATCH_USE_WORKQ_PRIORITY
-		.dgq_wq_priority = WORKQ_HIGH_PRIOQUEUE,
-#endif
-#if DISPATCH_USE_WORKQ_OPTIONS
-		.dgq_wq_options = WORKQ_ADDTHREADS_OPTION_OVERCOMMIT,
-#endif
-#endif
-#if DISPATCH_ENABLE_THREAD_POOL
-		.dgq_ctxt = &_dispatch_pthread_root_queue_contexts[
-				DISPATCH_ROOT_QUEUE_IDX_USER_INITIATED_QOS_OVERCOMMIT],
-#endif
-	}}},
-	[DISPATCH_ROOT_QUEUE_IDX_USER_INTERACTIVE_QOS] = {{{
-#if DISPATCH_USE_WORKQUEUES
-		.dgq_qos = QOS_CLASS_USER_INTERACTIVE,
-#if DISPATCH_USE_WORKQ_PRIORITY
-		.dgq_wq_priority = WORKQ_HIGH_PRIOQUEUE_CONDITIONAL,
-#endif
-#if DISPATCH_USE_WORKQ_OPTIONS
-		.dgq_wq_options = 0,
-#endif
-#endif
-#if DISPATCH_ENABLE_THREAD_POOL
-		.dgq_ctxt = &_dispatch_pthread_root_queue_contexts[
-				DISPATCH_ROOT_QUEUE_IDX_USER_INTERACTIVE_QOS],
-#endif
-	}}},
-	[DISPATCH_ROOT_QUEUE_IDX_USER_INTERACTIVE_QOS_OVERCOMMIT] = {{{
-#if DISPATCH_USE_WORKQUEUES
-		.dgq_qos = QOS_CLASS_USER_INTERACTIVE,
-#if DISPATCH_USE_WORKQ_PRIORITY
-		.dgq_wq_priority = WORKQ_HIGH_PRIOQUEUE_CONDITIONAL,
-#endif
-#if DISPATCH_USE_WORKQ_OPTIONS
-		.dgq_wq_options = WORKQ_ADDTHREADS_OPTION_OVERCOMMIT,
-#endif
-#endif
-#if DISPATCH_ENABLE_THREAD_POOL
-		.dgq_ctxt = &_dispatch_pthread_root_queue_contexts[
-				DISPATCH_ROOT_QUEUE_IDX_USER_INTERACTIVE_QOS_OVERCOMMIT],
-#endif
-	}}},
-};
-
-// 6618342 Contact the team that owns the Instrument DTrace probe before
-//         renaming this symbol
-DISPATCH_CACHELINE_ALIGN
-struct dispatch_queue_s _dispatch_root_queues[] = {
-#define _DISPATCH_ROOT_QUEUE_IDX(n, flags) \
-	((flags & DISPATCH_PRIORITY_FLAG_OVERCOMMIT) ? \
-		DISPATCH_ROOT_QUEUE_IDX_##n##_QOS_OVERCOMMIT : \
-		DISPATCH_ROOT_QUEUE_IDX_##n##_QOS)
-#define _DISPATCH_ROOT_QUEUE_ENTRY(n, flags, ...) \
-	[_DISPATCH_ROOT_QUEUE_IDX(n, flags)] = { \
-		DISPATCH_GLOBAL_OBJECT_HEADER(queue_root), \
-		.dq_state = DISPATCH_ROOT_QUEUE_STATE_INIT_VALUE, \
-		.do_ctxt = &_dispatch_root_queue_contexts[ \
-				_DISPATCH_ROOT_QUEUE_IDX(n, flags)], \
-		.dq_atomic_flags = DQF_WIDTH(DISPATCH_QUEUE_WIDTH_POOL), \
-		.dq_priority = _dispatch_priority_make(DISPATCH_QOS_##n, 0) | flags | \
-				DISPATCH_PRIORITY_FLAG_ROOTQUEUE | \
-				((flags & DISPATCH_PRIORITY_FLAG_DEFAULTQUEUE) ? 0 : \
-				DISPATCH_QOS_##n << DISPATCH_PRIORITY_OVERRIDE_SHIFT), \
-		__VA_ARGS__ \
-	}
-	_DISPATCH_ROOT_QUEUE_ENTRY(MAINTENANCE, 0,
-		.dq_label = "com.apple.root.maintenance-qos",
-		.dq_serialnum = 4,
-	),
-	_DISPATCH_ROOT_QUEUE_ENTRY(MAINTENANCE, DISPATCH_PRIORITY_FLAG_OVERCOMMIT,
-		.dq_label = "com.apple.root.maintenance-qos.overcommit",
-		.dq_serialnum = 5,
-	),
-	_DISPATCH_ROOT_QUEUE_ENTRY(BACKGROUND, 0,
-		.dq_label = "com.apple.root.background-qos",
-		.dq_serialnum = 6,
-	),
-	_DISPATCH_ROOT_QUEUE_ENTRY(BACKGROUND, DISPATCH_PRIORITY_FLAG_OVERCOMMIT,
-		.dq_label = "com.apple.root.background-qos.overcommit",
-		.dq_serialnum = 7,
-	),
-	_DISPATCH_ROOT_QUEUE_ENTRY(UTILITY, 0,
-		.dq_label = "com.apple.root.utility-qos",
-		.dq_serialnum = 8,
-	),
-	_DISPATCH_ROOT_QUEUE_ENTRY(UTILITY, DISPATCH_PRIORITY_FLAG_OVERCOMMIT,
-		.dq_label = "com.apple.root.utility-qos.overcommit",
-		.dq_serialnum = 9,
-	),
-	_DISPATCH_ROOT_QUEUE_ENTRY(DEFAULT, DISPATCH_PRIORITY_FLAG_DEFAULTQUEUE,
-		.dq_label = "com.apple.root.default-qos",
-		.dq_serialnum = 10,
-	),
-	_DISPATCH_ROOT_QUEUE_ENTRY(DEFAULT,
-			DISPATCH_PRIORITY_FLAG_DEFAULTQUEUE | DISPATCH_PRIORITY_FLAG_OVERCOMMIT,
-		.dq_label = "com.apple.root.default-qos.overcommit",
-		.dq_serialnum = 11,
-	),
-	_DISPATCH_ROOT_QUEUE_ENTRY(USER_INITIATED, 0,
-		.dq_label = "com.apple.root.user-initiated-qos",
-		.dq_serialnum = 12,
-	),
-	_DISPATCH_ROOT_QUEUE_ENTRY(USER_INITIATED, DISPATCH_PRIORITY_FLAG_OVERCOMMIT,
-		.dq_label = "com.apple.root.user-initiated-qos.overcommit",
-		.dq_serialnum = 13,
-	),
-	_DISPATCH_ROOT_QUEUE_ENTRY(USER_INTERACTIVE, 0,
-		.dq_label = "com.apple.root.user-interactive-qos",
-		.dq_serialnum = 14,
-	),
-	_DISPATCH_ROOT_QUEUE_ENTRY(USER_INTERACTIVE, DISPATCH_PRIORITY_FLAG_OVERCOMMIT,
-		.dq_label = "com.apple.root.user-interactive-qos.overcommit",
-		.dq_serialnum = 15,
-	),
-};
-
-#if DISPATCH_USE_PTHREAD_WORKQUEUE_SETDISPATCH_NP
-static const dispatch_queue_t _dispatch_wq2root_queues[][2] = {
-	[WORKQ_BG_PRIOQUEUE][0] = &_dispatch_root_queues[
-			DISPATCH_ROOT_QUEUE_IDX_BACKGROUND_QOS],
-	[WORKQ_BG_PRIOQUEUE][WORKQ_ADDTHREADS_OPTION_OVERCOMMIT] =
-			&_dispatch_root_queues[
-			DISPATCH_ROOT_QUEUE_IDX_BACKGROUND_QOS_OVERCOMMIT],
-	[WORKQ_LOW_PRIOQUEUE][0] = &_dispatch_root_queues[
-			DISPATCH_ROOT_QUEUE_IDX_UTILITY_QOS],
-	[WORKQ_LOW_PRIOQUEUE][WORKQ_ADDTHREADS_OPTION_OVERCOMMIT] =
-			&_dispatch_root_queues[
-			DISPATCH_ROOT_QUEUE_IDX_UTILITY_QOS_OVERCOMMIT],
-	[WORKQ_DEFAULT_PRIOQUEUE][0] = &_dispatch_root_queues[
-			DISPATCH_ROOT_QUEUE_IDX_DEFAULT_QOS],
-	[WORKQ_DEFAULT_PRIOQUEUE][WORKQ_ADDTHREADS_OPTION_OVERCOMMIT] =
-			&_dispatch_root_queues[
-			DISPATCH_ROOT_QUEUE_IDX_DEFAULT_QOS_OVERCOMMIT],
-	[WORKQ_HIGH_PRIOQUEUE][0] = &_dispatch_root_queues[
-			DISPATCH_ROOT_QUEUE_IDX_USER_INITIATED_QOS],
-	[WORKQ_HIGH_PRIOQUEUE][WORKQ_ADDTHREADS_OPTION_OVERCOMMIT] =
-			&_dispatch_root_queues[
-			DISPATCH_ROOT_QUEUE_IDX_USER_INITIATED_QOS_OVERCOMMIT],
-};
-#endif // DISPATCH_USE_PTHREAD_WORKQUEUE_SETDISPATCH_NP
-
-#if DISPATCH_USE_MGR_THREAD && DISPATCH_ENABLE_PTHREAD_ROOT_QUEUES
-static struct dispatch_queue_s _dispatch_mgr_root_queue;
-#else
-#define _dispatch_mgr_root_queue _dispatch_root_queues[\
-		DISPATCH_ROOT_QUEUE_IDX_USER_INTERACTIVE_QOS_OVERCOMMIT]
-#endif
-
-// 6618342 Contact the team that owns the Instrument DTrace probe before
-//         renaming this symbol
-DISPATCH_CACHELINE_ALIGN
-struct dispatch_queue_s _dispatch_mgr_q = {
-	DISPATCH_GLOBAL_OBJECT_HEADER(queue_mgr),
-	.dq_state = DISPATCH_QUEUE_STATE_INIT_VALUE(1) |
-			DISPATCH_QUEUE_ROLE_BASE_ANON,
-	.do_targetq = &_dispatch_mgr_root_queue,
-	.dq_label = "com.apple.libdispatch-manager",
-	.dq_atomic_flags = DQF_WIDTH(1),
-	.dq_priority = DISPATCH_PRIORITY_FLAG_MANAGER |
-			DISPATCH_PRIORITY_SATURATED_OVERRIDE,
-	.dq_serialnum = 2,
-};
-
-dispatch_queue_t
-dispatch_get_global_queue(intptr_t priority, uintptr_t flags)
-{
-	if (flags & ~(uintptr_t)DISPATCH_QUEUE_OVERCOMMIT) {
-		return DISPATCH_BAD_INPUT;
-	}
-	dispatch_qos_t qos = _dispatch_qos_from_queue_priority(priority);
-#if !HAVE_PTHREAD_WORKQUEUE_QOS
-	if (qos == QOS_CLASS_MAINTENANCE) {
-		qos = DISPATCH_QOS_BACKGROUND;
-	} else if (qos == QOS_CLASS_USER_INTERACTIVE) {
-		qos = DISPATCH_QOS_USER_INITIATED;
-	}
-#endif
-	if (qos == DISPATCH_QOS_UNSPECIFIED) {
-		return DISPATCH_BAD_INPUT;
-	}
-	return _dispatch_get_root_queue(qos, flags & DISPATCH_QUEUE_OVERCOMMIT);
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline dispatch_queue_t
-_dispatch_get_current_queue(void)
-{
-	return _dispatch_queue_get_current() ?:
-			_dispatch_get_root_queue(DISPATCH_QOS_DEFAULT, true);
-}
-
-dispatch_queue_t
-dispatch_get_current_queue(void)
-{
-	return _dispatch_get_current_queue();
-}
+#pragma mark dispatch_assert_queue
 
 DISPATCH_NOINLINE DISPATCH_NORETURN
 static void
@@ -570,7 +62,8 @@
 dispatch_assert_queue(dispatch_queue_t dq)
 {
 	unsigned long metatype = dx_metatype(dq);
-	if (unlikely(metatype != _DISPATCH_QUEUE_TYPE)) {
+	if (unlikely(metatype != _DISPATCH_LANE_TYPE &&
+			metatype != _DISPATCH_WORKLOOP_TYPE)) {
 		DISPATCH_CLIENT_CRASH(metatype, "invalid queue passed to "
 				"dispatch_assert_queue()");
 	}
@@ -578,16 +71,8 @@
 	if (likely(_dq_state_drain_locked_by_self(dq_state))) {
 		return;
 	}
-	// we can look at the width: if it is changing while we read it,
-	// it means that a barrier is running on `dq` concurrently, which
-	// proves that we're not on `dq`. Hence reading a stale '1' is ok.
-	//
-	// However if we can have thread bound queues, these mess with lock
-	// ownership and we always have to take the slowpath
-	if (likely(DISPATCH_COCOA_COMPAT || dq->dq_width > 1)) {
-		if (likely(_dispatch_thread_frame_find_queue(dq))) {
-			return;
-		}
+	if (likely(_dispatch_thread_frame_find_queue(dq))) {
+		return;
 	}
 	_dispatch_assert_queue_fail(dq, true);
 }
@@ -596,26 +81,18 @@
 dispatch_assert_queue_not(dispatch_queue_t dq)
 {
 	unsigned long metatype = dx_metatype(dq);
-	if (unlikely(metatype != _DISPATCH_QUEUE_TYPE)) {
+	if (unlikely(metatype != _DISPATCH_LANE_TYPE &&
+			metatype != _DISPATCH_WORKLOOP_TYPE)) {
 		DISPATCH_CLIENT_CRASH(metatype, "invalid queue passed to "
 				"dispatch_assert_queue_not()");
 	}
 	uint64_t dq_state = os_atomic_load2o(dq, dq_state, relaxed);
-	if (likely(!_dq_state_drain_locked_by_self(dq_state))) {
-		// we can look at the width: if it is changing while we read it,
-		// it means that a barrier is running on `dq` concurrently, which
-		// proves that we're not on `dq`. Hence reading a stale '1' is ok.
-		//
-		// However if we can have thread bound queues, these mess with lock
-		// ownership and we always have to take the slowpath
-		if (likely(!DISPATCH_COCOA_COMPAT && dq->dq_width == 1)) {
-			return;
-		}
-		if (likely(!_dispatch_thread_frame_find_queue(dq))) {
-			return;
-		}
+	if (unlikely(_dq_state_drain_locked_by_self(dq_state))) {
+		_dispatch_assert_queue_fail(dq, false);
 	}
-	_dispatch_assert_queue_fail(dq, false);
+	if (unlikely(_dispatch_thread_frame_find_queue(dq))) {
+		_dispatch_assert_queue_fail(dq, false);
+	}
 }
 
 void
@@ -637,2139 +114,6 @@
 	_dispatch_assert_queue_barrier_fail(dq);
 }
 
-#if DISPATCH_DEBUG && DISPATCH_ROOT_QUEUE_DEBUG
-#define _dispatch_root_queue_debug(...) _dispatch_debug(__VA_ARGS__)
-#define _dispatch_debug_root_queue(...) dispatch_debug_queue(__VA_ARGS__)
-#else
-#define _dispatch_root_queue_debug(...)
-#define _dispatch_debug_root_queue(...)
-#endif
-
-#pragma mark -
-#pragma mark dispatch_init
-
-static inline bool
-_dispatch_root_queues_init_workq(int *wq_supported)
-{
-	int r; (void)r;
-	bool result = false;
-	*wq_supported = 0;
-#if DISPATCH_USE_WORKQUEUES
-	bool disable_wq = false; (void)disable_wq;
-#if DISPATCH_ENABLE_THREAD_POOL && DISPATCH_DEBUG
-	disable_wq = slowpath(getenv("LIBDISPATCH_DISABLE_KWQ"));
-#endif
-#if DISPATCH_USE_KEVENT_WORKQUEUE || HAVE_PTHREAD_WORKQUEUE_QOS
-	bool disable_qos = false;
-#if DISPATCH_DEBUG
-	disable_qos = slowpath(getenv("LIBDISPATCH_DISABLE_QOS"));
-#endif
-#if DISPATCH_USE_KEVENT_WORKQUEUE
-	bool disable_kevent_wq = false;
-#if DISPATCH_DEBUG || DISPATCH_PROFILE
-	disable_kevent_wq = slowpath(getenv("LIBDISPATCH_DISABLE_KEVENT_WQ"));
-#endif
-#endif
-
-	if (!disable_wq && !disable_qos) {
-		*wq_supported = _pthread_workqueue_supported();
-#if DISPATCH_USE_KEVENT_WORKQUEUE
-		if (!disable_kevent_wq && (*wq_supported & WORKQ_FEATURE_KEVENT)) {
-			r = _pthread_workqueue_init_with_kevent(_dispatch_worker_thread3,
-					(pthread_workqueue_function_kevent_t)
-					_dispatch_kevent_worker_thread,
-					offsetof(struct dispatch_queue_s, dq_serialnum), 0);
-#if DISPATCH_USE_MGR_THREAD
-			_dispatch_kevent_workqueue_enabled = !r;
-#endif
-			result = !r;
-		} else
-#endif // DISPATCH_USE_KEVENT_WORKQUEUE
-		if (*wq_supported & WORKQ_FEATURE_FINEPRIO) {
-#if DISPATCH_USE_MGR_THREAD
-			r = _pthread_workqueue_init(_dispatch_worker_thread3,
-					offsetof(struct dispatch_queue_s, dq_serialnum), 0);
-			result = !r;
-#endif
-		}
-		if (!(*wq_supported & WORKQ_FEATURE_MAINTENANCE)) {
-			DISPATCH_INTERNAL_CRASH(*wq_supported,
-					"QoS Maintenance support required");
-		}
-	}
-#endif // DISPATCH_USE_KEVENT_WORKQUEUE || HAVE_PTHREAD_WORKQUEUE_QOS
-#if DISPATCH_USE_PTHREAD_WORKQUEUE_SETDISPATCH_NP
-	if (!result && !disable_wq) {
-		pthread_workqueue_setdispatchoffset_np(
-				offsetof(struct dispatch_queue_s, dq_serialnum));
-		r = pthread_workqueue_setdispatch_np(_dispatch_worker_thread2);
-#if !DISPATCH_USE_LEGACY_WORKQUEUE_FALLBACK
-		(void)dispatch_assume_zero(r);
-#endif
-		result = !r;
-	}
-#endif // DISPATCH_USE_PTHREAD_WORKQUEUE_SETDISPATCH_NP
-#if DISPATCH_USE_LEGACY_WORKQUEUE_FALLBACK || DISPATCH_USE_PTHREAD_POOL
-	if (!result) {
-#if DISPATCH_USE_LEGACY_WORKQUEUE_FALLBACK
-		pthread_workqueue_attr_t pwq_attr;
-		if (!disable_wq) {
-			r = pthread_workqueue_attr_init_np(&pwq_attr);
-			(void)dispatch_assume_zero(r);
-		}
-#endif
-		size_t i;
-		for (i = 0; i < DISPATCH_ROOT_QUEUE_COUNT; i++) {
-			pthread_workqueue_t pwq = NULL;
-			dispatch_root_queue_context_t qc;
-			qc = &_dispatch_root_queue_contexts[i];
-#if DISPATCH_USE_LEGACY_WORKQUEUE_FALLBACK
-			if (!disable_wq && qc->dgq_wq_priority != WORKQ_PRIO_INVALID) {
-				r = pthread_workqueue_attr_setqueuepriority_np(&pwq_attr,
-						qc->dgq_wq_priority);
-				(void)dispatch_assume_zero(r);
-				r = pthread_workqueue_attr_setovercommit_np(&pwq_attr,
-						qc->dgq_wq_options &
-						WORKQ_ADDTHREADS_OPTION_OVERCOMMIT);
-				(void)dispatch_assume_zero(r);
-				r = pthread_workqueue_create_np(&pwq, &pwq_attr);
-				(void)dispatch_assume_zero(r);
-				result = result || dispatch_assume(pwq);
-			}
-#endif // DISPATCH_USE_LEGACY_WORKQUEUE_FALLBACK
-			if (pwq) {
-				qc->dgq_kworkqueue = pwq;
-			} else {
-				qc->dgq_kworkqueue = (void*)(~0ul);
-				// because the fastpath of _dispatch_global_queue_poke didn't
-				// know yet that we're using the internal pool implementation
-				// we have to undo its setting of dgq_pending
-				qc->dgq_pending = 0;
-			}
-		}
-#if DISPATCH_USE_LEGACY_WORKQUEUE_FALLBACK
-		if (!disable_wq) {
-			r = pthread_workqueue_attr_destroy_np(&pwq_attr);
-			(void)dispatch_assume_zero(r);
-		}
-#endif
-	}
-#endif // DISPATCH_USE_LEGACY_WORKQUEUE_FALLBACK || DISPATCH_ENABLE_THREAD_POOL
-#endif // DISPATCH_USE_WORKQUEUES
-	return result;
-}
-
-#if DISPATCH_USE_PTHREAD_POOL
-static inline void
-_dispatch_root_queue_init_pthread_pool(dispatch_root_queue_context_t qc,
-		int32_t pool_size, bool overcommit)
-{
-	dispatch_pthread_root_queue_context_t pqc = qc->dgq_ctxt;
-	int32_t thread_pool_size = overcommit ? DISPATCH_WORKQ_MAX_PTHREAD_COUNT :
-			(int32_t)dispatch_hw_config(active_cpus);
-	if (slowpath(pool_size) && pool_size < thread_pool_size) {
-		thread_pool_size = pool_size;
-	}
-	qc->dgq_thread_pool_size = thread_pool_size;
-#if DISPATCH_USE_WORKQUEUES
-	if (qc->dgq_qos) {
-#if !defined(_WIN32)
-		(void)dispatch_assume_zero(pthread_attr_init(&pqc->dpq_thread_attr));
-		(void)dispatch_assume_zero(pthread_attr_setdetachstate(
-				&pqc->dpq_thread_attr, PTHREAD_CREATE_DETACHED));
-#endif
-#if HAVE_PTHREAD_WORKQUEUE_QOS
-		(void)dispatch_assume_zero(pthread_attr_set_qos_class_np(
-				&pqc->dpq_thread_attr, qc->dgq_qos, 0));
-#endif
-	}
-#endif // HAVE_PTHREAD_WORKQUEUES
-	_dispatch_sema4_t *sema = &pqc->dpq_thread_mediator.dsema_sema;
-	_dispatch_sema4_init(sema, _DSEMA4_POLICY_LIFO);
-	_dispatch_sema4_create(sema, _DSEMA4_POLICY_LIFO);
-}
-#endif // DISPATCH_USE_PTHREAD_POOL
-
-static void
-_dispatch_root_queues_init_once(void *context DISPATCH_UNUSED)
-{
-	int wq_supported;
-	_dispatch_fork_becomes_unsafe();
-	if (!_dispatch_root_queues_init_workq(&wq_supported)) {
-#if DISPATCH_ENABLE_THREAD_POOL
-		size_t i;
-		for (i = 0; i < DISPATCH_ROOT_QUEUE_COUNT; i++) {
-			bool overcommit = true;
-#if TARGET_OS_EMBEDDED || (DISPATCH_USE_INTERNAL_WORKQUEUE && HAVE_DISPATCH_WORKQ_MONITORING)
-			// some software hangs if the non-overcommitting queues do not
-			// overcommit when threads block. Someday, this behavior should
-			// apply to all platforms
-			if (!(i & 1)) {
-				overcommit = false;
-			}
-#endif
-			_dispatch_root_queue_init_pthread_pool(
-					&_dispatch_root_queue_contexts[i], 0, overcommit);
-		}
-#else
-		DISPATCH_INTERNAL_CRASH((errno << 16) | wq_supported,
-				"Root queue initialization failed");
-#endif // DISPATCH_ENABLE_THREAD_POOL
-	}
-}
-
-void
-_dispatch_root_queues_init(void)
-{
-	static dispatch_once_t _dispatch_root_queues_pred;
-	dispatch_once_f(&_dispatch_root_queues_pred, NULL,
-			_dispatch_root_queues_init_once);
-}
-
-DISPATCH_EXPORT DISPATCH_NOTHROW
-void
-libdispatch_init(void)
-{
-	dispatch_assert(DISPATCH_ROOT_QUEUE_COUNT == 2 * DISPATCH_QOS_MAX);
-
-	dispatch_assert(DISPATCH_QUEUE_PRIORITY_LOW ==
-			-DISPATCH_QUEUE_PRIORITY_HIGH);
-	dispatch_assert(countof(_dispatch_root_queues) ==
-			DISPATCH_ROOT_QUEUE_COUNT);
-	dispatch_assert(countof(_dispatch_root_queue_contexts) ==
-			DISPATCH_ROOT_QUEUE_COUNT);
-#if DISPATCH_USE_PTHREAD_WORKQUEUE_SETDISPATCH_NP
-	dispatch_assert(sizeof(_dispatch_wq2root_queues) /
-			sizeof(_dispatch_wq2root_queues[0][0]) ==
-			WORKQ_NUM_PRIOQUEUE * 2);
-#endif
-#if DISPATCH_ENABLE_THREAD_POOL
-	dispatch_assert(countof(_dispatch_pthread_root_queue_contexts) ==
-			DISPATCH_ROOT_QUEUE_COUNT);
-#endif
-
-	dispatch_assert(offsetof(struct dispatch_continuation_s, do_next) ==
-			offsetof(struct dispatch_object_s, do_next));
-	dispatch_assert(offsetof(struct dispatch_continuation_s, do_vtable) ==
-			offsetof(struct dispatch_object_s, do_vtable));
-	dispatch_assert(sizeof(struct dispatch_apply_s) <=
-			DISPATCH_CONTINUATION_SIZE);
-	dispatch_assert(sizeof(struct dispatch_queue_s) % DISPATCH_CACHELINE_SIZE
-			== 0);
-	dispatch_assert(offsetof(struct dispatch_queue_s, dq_state) % _Alignof(uint64_t) == 0);
-	dispatch_assert(sizeof(struct dispatch_root_queue_context_s) %
-			DISPATCH_CACHELINE_SIZE == 0);
-
-#if HAVE_PTHREAD_WORKQUEUE_QOS
-	dispatch_qos_t qos = _dispatch_qos_from_qos_class(qos_class_main());
-	dispatch_priority_t pri = _dispatch_priority_make(qos, 0);
-	_dispatch_main_q.dq_priority = _dispatch_priority_with_override_qos(pri, qos);
-#if DISPATCH_DEBUG
-	if (!slowpath(getenv("LIBDISPATCH_DISABLE_SET_QOS"))) {
-		_dispatch_set_qos_class_enabled = 1;
-	}
-#endif
-#endif
-
-#if DISPATCH_USE_THREAD_LOCAL_STORAGE
-	_dispatch_thread_key_create(&__dispatch_tsd_key, _libdispatch_tsd_cleanup);
-#else
-	_dispatch_thread_key_create(&dispatch_priority_key, NULL);
-	_dispatch_thread_key_create(&dispatch_r2k_key, NULL);
-	_dispatch_thread_key_create(&dispatch_queue_key, _dispatch_queue_cleanup);
-	_dispatch_thread_key_create(&dispatch_frame_key, _dispatch_frame_cleanup);
-	_dispatch_thread_key_create(&dispatch_cache_key, _dispatch_cache_cleanup);
-	_dispatch_thread_key_create(&dispatch_context_key, _dispatch_context_cleanup);
-	_dispatch_thread_key_create(&dispatch_pthread_root_queue_observer_hooks_key,
-			NULL);
-	_dispatch_thread_key_create(&dispatch_basepri_key, NULL);
-#if DISPATCH_INTROSPECTION
-	_dispatch_thread_key_create(&dispatch_introspection_key , NULL);
-#elif DISPATCH_PERF_MON
-	_dispatch_thread_key_create(&dispatch_bcounter_key, NULL);
-#endif
-	_dispatch_thread_key_create(&dispatch_wlh_key, _dispatch_wlh_cleanup);
-	_dispatch_thread_key_create(&dispatch_voucher_key, _voucher_thread_cleanup);
-	_dispatch_thread_key_create(&dispatch_deferred_items_key,
-			_dispatch_deferred_items_cleanup);
-#endif
-
-#if DISPATCH_USE_RESOLVERS // rdar://problem/8541707
-	_dispatch_main_q.do_targetq = &_dispatch_root_queues[
-			DISPATCH_ROOT_QUEUE_IDX_DEFAULT_QOS_OVERCOMMIT];
-#endif
-
-	_dispatch_queue_set_current(&_dispatch_main_q);
-	_dispatch_queue_set_bound_thread(&_dispatch_main_q);
-
-#if DISPATCH_USE_PTHREAD_ATFORK
-	(void)dispatch_assume_zero(pthread_atfork(dispatch_atfork_prepare,
-			dispatch_atfork_parent, dispatch_atfork_child));
-#endif
-	_dispatch_hw_config_init();
-	_dispatch_time_init();
-	_dispatch_vtable_init();
-	_os_object_init();
-	_voucher_init();
-	_dispatch_introspection_init();
-}
-
-#if DISPATCH_USE_THREAD_LOCAL_STORAGE
-#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
-#include <unistd.h>
-#endif
-#if !defined(_WIN32)
-#include <sys/syscall.h>
-#endif
-
-#ifndef __ANDROID__
-#ifdef SYS_gettid
-DISPATCH_ALWAYS_INLINE
-static inline pid_t
-gettid(void)
-{
-	return (pid_t)syscall(SYS_gettid);
-}
-#elif defined(__FreeBSD__)
-DISPATCH_ALWAYS_INLINE
-static inline pid_t
-gettid(void)
-{
-	return (pid_t)pthread_getthreadid_np();
-}
-#elif defined(_WIN32)
-DISPATCH_ALWAYS_INLINE
-static inline DWORD
-gettid(void)
-{
-	return GetCurrentThreadId();
-}
-#else
-#error "SYS_gettid unavailable on this system"
-#endif /* SYS_gettid */
-#endif /* ! __ANDROID__ */
-
-#define _tsd_call_cleanup(k, f)  do { \
-		if ((f) && tsd->k) ((void(*)(void*))(f))(tsd->k); \
-	} while (0)
-
-#ifdef __ANDROID__
-static void (*_dispatch_thread_detach_callback)(void);
-
-void
-_dispatch_install_thread_detach_callback(void (*cb)(void))
-{
-    if (os_atomic_xchg(&_dispatch_thread_detach_callback, cb, relaxed)) {
-        DISPATCH_CLIENT_CRASH(0, "Installing a thread detach callback twice");
-    }
-}
-#endif
-
-#if defined(_WIN32)
-static bool
-_dispatch_process_is_exiting(void)
-{
-	// The goal here is to detect if the current thread is executing cleanup
-	// code (e.g. FLS destructors) as a result of calling ExitProcess(). Windows
-	// doesn't provide an official method of getting this information, so we
-	// take advantage of how ExitProcess() works internally. The first thing
-	// that it does (according to MSDN) is terminate every other thread in the
-	// process. Logically, it should not be possible to create more threads
-	// after this point, and Windows indeed enforces this. Try to create a
-	// lightweight suspended thread, and if access is denied, assume that this
-	// is because the process is exiting.
-	//
-	// We aren't worried about any race conditions here during process exit.
-	// Cleanup code is only run on the thread that already called ExitProcess(),
-	// and every other thread will have been forcibly terminated by the time
-	// that happens. Additionally, while CreateThread() could conceivably fail
-	// due to resource exhaustion, the process would already be in a bad state
-	// if that happens. This is only intended to prevent unwanted cleanup code
-	// from running, so the worst case is that a thread doesn't clean up after
-	// itself when the process is about to die anyway.
-	const size_t stack_size = 1;  // As small as possible
-	HANDLE thread = CreateThread(NULL, stack_size, NULL, NULL,
-			CREATE_SUSPENDED | STACK_SIZE_PARAM_IS_A_RESERVATION, NULL);
-	if (thread) {
-		// Although Microsoft recommends against using TerminateThread, it's
-		// safe to use it here because we know that the thread is suspended and
-		// it has not executed any code due to a NULL lpStartAddress. There was
-		// a bug in Windows Server 2003 and Windows XP where the initial stack
-		// would not be freed, but libdispatch does not support them anyway.
-		TerminateThread(thread, 0);
-		CloseHandle(thread);
-		return false;
-	}
-	return GetLastError() == ERROR_ACCESS_DENIED;
-}
-#endif
-
-void DISPATCH_TSD_DTOR_CC
-_libdispatch_tsd_cleanup(void *ctx)
-{
-#if defined(_WIN32)
-	// On Windows, exiting a process will still call FLS destructors for the
-	// thread that called ExitProcess(). pthreads-based platforms don't call key
-	// destructors on exit, so be consistent.
-	if (_dispatch_process_is_exiting()) {
-		return;
-	}
-#endif
-
-	struct dispatch_tsd *tsd = (struct dispatch_tsd*) ctx;
-
-	_tsd_call_cleanup(dispatch_priority_key, NULL);
-	_tsd_call_cleanup(dispatch_r2k_key, NULL);
-
-	_tsd_call_cleanup(dispatch_queue_key, _dispatch_queue_cleanup);
-	_tsd_call_cleanup(dispatch_frame_key, _dispatch_frame_cleanup);
-	_tsd_call_cleanup(dispatch_cache_key, _dispatch_cache_cleanup);
-	_tsd_call_cleanup(dispatch_context_key, _dispatch_context_cleanup);
-	_tsd_call_cleanup(dispatch_pthread_root_queue_observer_hooks_key,
-			NULL);
-	_tsd_call_cleanup(dispatch_basepri_key, NULL);
-#if DISPATCH_INTROSPECTION
-	_tsd_call_cleanup(dispatch_introspection_key, NULL);
-#elif DISPATCH_PERF_MON
-	_tsd_call_cleanup(dispatch_bcounter_key, NULL);
-#endif
-	_tsd_call_cleanup(dispatch_wlh_key, _dispatch_wlh_cleanup);
-	_tsd_call_cleanup(dispatch_voucher_key, _voucher_thread_cleanup);
-	_tsd_call_cleanup(dispatch_deferred_items_key,
-			_dispatch_deferred_items_cleanup);
-#ifdef __ANDROID__
-	if (_dispatch_thread_detach_callback) {
-		_dispatch_thread_detach_callback();
-	}
-#endif
-	tsd->tid = 0;
-}
-
-DISPATCH_NOINLINE
-void
-libdispatch_tsd_init(void)
-{
-#if defined(_WIN32)
-	FlsSetValue(__dispatch_tsd_key, &__dispatch_tsd);
-#else
-	pthread_setspecific(__dispatch_tsd_key, &__dispatch_tsd);
-#endif /* defined(_WIN32) */
-	__dispatch_tsd.tid = gettid();
-}
-#endif
-
-DISPATCH_NOTHROW
-void
-_dispatch_queue_atfork_child(void)
-{
-	dispatch_queue_t main_q = &_dispatch_main_q;
-	void *crash = (void *)0x100;
-	size_t i;
-
-	if (_dispatch_queue_is_thread_bound(main_q)) {
-		_dispatch_queue_set_bound_thread(main_q);
-	}
-
-	if (!_dispatch_is_multithreaded_inline()) return;
-
-	main_q->dq_items_head = crash;
-	main_q->dq_items_tail = crash;
-
-	_dispatch_mgr_q.dq_items_head = crash;
-	_dispatch_mgr_q.dq_items_tail = crash;
-
-	for (i = 0; i < DISPATCH_ROOT_QUEUE_COUNT; i++) {
-		_dispatch_root_queues[i].dq_items_head = crash;
-		_dispatch_root_queues[i].dq_items_tail = crash;
-	}
-}
-
-DISPATCH_NOINLINE
-void
-_dispatch_fork_becomes_unsafe_slow(void)
-{
-	uint8_t value = os_atomic_or(&_dispatch_unsafe_fork,
-			_DISPATCH_UNSAFE_FORK_MULTITHREADED, relaxed);
-	if (value & _DISPATCH_UNSAFE_FORK_PROHIBIT) {
-		DISPATCH_CLIENT_CRASH(0, "Transition to multithreaded is prohibited");
-	}
-}
-
-DISPATCH_NOINLINE
-void
-_dispatch_prohibit_transition_to_multithreaded(bool prohibit)
-{
-	if (prohibit) {
-		uint8_t value = os_atomic_or(&_dispatch_unsafe_fork,
-				_DISPATCH_UNSAFE_FORK_PROHIBIT, relaxed);
-		if (value & _DISPATCH_UNSAFE_FORK_MULTITHREADED) {
-			DISPATCH_CLIENT_CRASH(0, "The executable is already multithreaded");
-		}
-	} else {
-		os_atomic_and(&_dispatch_unsafe_fork,
-				(uint8_t)~_DISPATCH_UNSAFE_FORK_PROHIBIT, relaxed);
-	}
-}
-
-#pragma mark -
-#pragma mark dispatch_queue_attr_t
-
-DISPATCH_ALWAYS_INLINE
-static inline bool
-_dispatch_qos_class_valid(dispatch_qos_class_t qos_class, int relative_priority)
-{
-	qos_class_t qos = (qos_class_t)qos_class;
-	switch (qos) {
-	case QOS_CLASS_MAINTENANCE:
-	case QOS_CLASS_BACKGROUND:
-	case QOS_CLASS_UTILITY:
-	case QOS_CLASS_DEFAULT:
-	case QOS_CLASS_USER_INITIATED:
-	case QOS_CLASS_USER_INTERACTIVE:
-	case QOS_CLASS_UNSPECIFIED:
-		break;
-	}
-	if (relative_priority > 0 || relative_priority < QOS_MIN_RELATIVE_PRIORITY){
-		return false;
-	}
-	return true;
-}
-
-#define DISPATCH_QUEUE_ATTR_OVERCOMMIT2IDX(overcommit) \
-		((overcommit) == _dispatch_queue_attr_overcommit_disabled ? \
-		DQA_INDEX_NON_OVERCOMMIT : \
-		((overcommit) == _dispatch_queue_attr_overcommit_enabled ? \
-		DQA_INDEX_OVERCOMMIT : DQA_INDEX_UNSPECIFIED_OVERCOMMIT))
-
-#define DISPATCH_QUEUE_ATTR_CONCURRENT2IDX(concurrent) \
-		((concurrent) ? DQA_INDEX_CONCURRENT : DQA_INDEX_SERIAL)
-
-#define DISPATCH_QUEUE_ATTR_INACTIVE2IDX(inactive) \
-		((inactive) ? DQA_INDEX_INACTIVE : DQA_INDEX_ACTIVE)
-
-#define DISPATCH_QUEUE_ATTR_AUTORELEASE_FREQUENCY2IDX(frequency) \
-		(frequency)
-
-#define DISPATCH_QUEUE_ATTR_PRIO2IDX(prio) (-(prio))
-
-#define DISPATCH_QUEUE_ATTR_QOS2IDX(qos) (qos)
-
-static inline dispatch_queue_attr_t
-_dispatch_get_queue_attr(dispatch_qos_t qos, int prio,
-		_dispatch_queue_attr_overcommit_t overcommit,
-		dispatch_autorelease_frequency_t frequency,
-		bool concurrent, bool inactive)
-{
-	return (dispatch_queue_attr_t)&_dispatch_queue_attrs
-			[DISPATCH_QUEUE_ATTR_QOS2IDX(qos)]
-			[DISPATCH_QUEUE_ATTR_PRIO2IDX(prio)]
-			[DISPATCH_QUEUE_ATTR_OVERCOMMIT2IDX(overcommit)]
-			[DISPATCH_QUEUE_ATTR_AUTORELEASE_FREQUENCY2IDX(frequency)]
-			[DISPATCH_QUEUE_ATTR_CONCURRENT2IDX(concurrent)]
-			[DISPATCH_QUEUE_ATTR_INACTIVE2IDX(inactive)];
-}
-
-dispatch_queue_attr_t
-_dispatch_get_default_queue_attr(void)
-{
-	return _dispatch_get_queue_attr(DISPATCH_QOS_UNSPECIFIED, 0,
-				_dispatch_queue_attr_overcommit_unspecified,
-				DISPATCH_AUTORELEASE_FREQUENCY_INHERIT, false, false);
-}
-
-dispatch_queue_attr_t
-dispatch_queue_attr_make_with_qos_class(dispatch_queue_attr_t dqa,
-		dispatch_qos_class_t qos_class, int relpri)
-{
-	if (!_dispatch_qos_class_valid(qos_class, relpri)) {
-		return DISPATCH_BAD_INPUT;
-	}
-	if (!slowpath(dqa)) {
-		dqa = _dispatch_get_default_queue_attr();
-	} else if (dqa->do_vtable != DISPATCH_VTABLE(queue_attr)) {
-		DISPATCH_CLIENT_CRASH(dqa->do_vtable, "Invalid queue attribute");
-	}
-	return _dispatch_get_queue_attr(_dispatch_qos_from_qos_class(qos_class),
-			relpri, dqa->dqa_overcommit, dqa->dqa_autorelease_frequency,
-			dqa->dqa_concurrent, dqa->dqa_inactive);
-}
-
-dispatch_queue_attr_t
-dispatch_queue_attr_make_initially_inactive(dispatch_queue_attr_t dqa)
-{
-	if (!slowpath(dqa)) {
-		dqa = _dispatch_get_default_queue_attr();
-	} else if (dqa->do_vtable != DISPATCH_VTABLE(queue_attr)) {
-		DISPATCH_CLIENT_CRASH(dqa->do_vtable, "Invalid queue attribute");
-	}
-	dispatch_priority_t pri = dqa->dqa_qos_and_relpri;
-	return _dispatch_get_queue_attr(_dispatch_priority_qos(pri),
-			_dispatch_priority_relpri(pri), dqa->dqa_overcommit,
-			dqa->dqa_autorelease_frequency, dqa->dqa_concurrent, true);
-}
-
-dispatch_queue_attr_t
-dispatch_queue_attr_make_with_overcommit(dispatch_queue_attr_t dqa,
-		bool overcommit)
-{
-	if (!slowpath(dqa)) {
-		dqa = _dispatch_get_default_queue_attr();
-	} else if (dqa->do_vtable != DISPATCH_VTABLE(queue_attr)) {
-		DISPATCH_CLIENT_CRASH(dqa->do_vtable, "Invalid queue attribute");
-	}
-	dispatch_priority_t pri = dqa->dqa_qos_and_relpri;
-	return _dispatch_get_queue_attr(_dispatch_priority_qos(pri),
-			_dispatch_priority_relpri(pri), overcommit ?
-			_dispatch_queue_attr_overcommit_enabled :
-			_dispatch_queue_attr_overcommit_disabled,
-			dqa->dqa_autorelease_frequency, dqa->dqa_concurrent,
-			dqa->dqa_inactive);
-}
-
-dispatch_queue_attr_t
-dispatch_queue_attr_make_with_autorelease_frequency(dispatch_queue_attr_t dqa,
-		dispatch_autorelease_frequency_t frequency)
-{
-	switch (frequency) {
-	case DISPATCH_AUTORELEASE_FREQUENCY_INHERIT:
-	case DISPATCH_AUTORELEASE_FREQUENCY_WORK_ITEM:
-	case DISPATCH_AUTORELEASE_FREQUENCY_NEVER:
-		break;
-	}
-	if (!slowpath(dqa)) {
-		dqa = _dispatch_get_default_queue_attr();
-	} else if (dqa->do_vtable != DISPATCH_VTABLE(queue_attr)) {
-		DISPATCH_CLIENT_CRASH(dqa->do_vtable, "Invalid queue attribute");
-	}
-	dispatch_priority_t pri = dqa->dqa_qos_and_relpri;
-	return _dispatch_get_queue_attr(_dispatch_priority_qos(pri),
-			_dispatch_priority_relpri(pri), dqa->dqa_overcommit,
-			frequency, dqa->dqa_concurrent, dqa->dqa_inactive);
-}
-
-#pragma mark -
-#pragma mark dispatch_queue_t
-
-void
-dispatch_queue_set_label_nocopy(dispatch_queue_t dq, const char *label)
-{
-	if (dq->do_ref_cnt == DISPATCH_OBJECT_GLOBAL_REFCNT) {
-		return;
-	}
-	dispatch_queue_flags_t dqf = _dispatch_queue_atomic_flags(dq);
-	if (unlikely(dqf & DQF_LABEL_NEEDS_FREE)) {
-		DISPATCH_CLIENT_CRASH(dq, "Cannot change label for this queue");
-	}
-	dq->dq_label = label;
-}
-
-static inline bool
-_dispatch_base_queue_is_wlh(dispatch_queue_t dq, dispatch_queue_t tq)
-{
-	(void)dq; (void)tq;
-	return false;
-}
-
-static void
-_dispatch_queue_inherit_wlh_from_target(dispatch_queue_t dq,
-		dispatch_queue_t tq)
-{
-	uint64_t old_state, new_state, role;
-
-	if (!dx_hastypeflag(tq, QUEUE_ROOT)) {
-		role = DISPATCH_QUEUE_ROLE_INNER;
-	} else if (_dispatch_base_queue_is_wlh(dq, tq)) {
-		role = DISPATCH_QUEUE_ROLE_BASE_WLH;
-	} else {
-		role = DISPATCH_QUEUE_ROLE_BASE_ANON;
-	}
-
-	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, relaxed, {
-		new_state = old_state & ~DISPATCH_QUEUE_ROLE_MASK;
-		new_state |= role;
-		if (old_state == new_state) {
-			os_atomic_rmw_loop_give_up(break);
-		}
-	});
-
-	dispatch_wlh_t cur_wlh = _dispatch_get_wlh();
-	if (cur_wlh == (dispatch_wlh_t)dq && !_dq_state_is_base_wlh(new_state)) {
-		_dispatch_event_loop_leave_immediate(cur_wlh, new_state);
-	}
-	if (!dx_hastypeflag(tq, QUEUE_ROOT)) {
-#if DISPATCH_ALLOW_NON_LEAF_RETARGET
-		_dispatch_queue_atomic_flags_set(tq, DQF_TARGETED);
-#else
-		_dispatch_queue_atomic_flags_set_and_clear(tq, DQF_TARGETED, DQF_LEGACY);
-#endif
-	}
-}
-
-unsigned long volatile _dispatch_queue_serial_numbers =
-		DISPATCH_QUEUE_SERIAL_NUMBER_INIT;
-
-dispatch_priority_t
-_dispatch_queue_compute_priority_and_wlh(dispatch_queue_t dq,
-		dispatch_wlh_t *wlh_out)
-{
-	dispatch_priority_t p = dq->dq_priority & DISPATCH_PRIORITY_REQUESTED_MASK;
-	dispatch_queue_t tq = dq->do_targetq;
-	dispatch_priority_t tqp = tq->dq_priority &DISPATCH_PRIORITY_REQUESTED_MASK;
-	dispatch_wlh_t wlh = DISPATCH_WLH_ANON;
-
-	if (_dq_state_is_base_wlh(dq->dq_state)) {
-		wlh = (dispatch_wlh_t)dq;
-	}
-
-	while (unlikely(!dx_hastypeflag(tq, QUEUE_ROOT))) {
-		if (unlikely(tq == &_dispatch_mgr_q)) {
-			if (wlh_out) *wlh_out = DISPATCH_WLH_ANON;
-			return DISPATCH_PRIORITY_FLAG_MANAGER;
-		}
-		if (unlikely(_dispatch_queue_is_thread_bound(tq))) {
-			// thread-bound hierarchies are weird, we need to install
-			// from the context of the thread this hierarchy is bound to
-			if (wlh_out) *wlh_out = NULL;
-			return 0;
-		}
-		if (unlikely(DISPATCH_QUEUE_IS_SUSPENDED(tq))) {
-			// this queue may not be activated yet, so the queue graph may not
-			// have stabilized yet
-			_dispatch_ktrace1(DISPATCH_PERF_delayed_registration, dq);
-			if (wlh_out) *wlh_out = NULL;
-			return 0;
-		}
-
-		if (_dq_state_is_base_wlh(tq->dq_state)) {
-			wlh = (dispatch_wlh_t)tq;
-		} else if (unlikely(_dispatch_queue_is_legacy(tq))) {
-			// we're not allowed to dereference tq->do_targetq
-			_dispatch_ktrace1(DISPATCH_PERF_delayed_registration, dq);
-			if (wlh_out) *wlh_out = NULL;
-			return 0;
-		}
-
-		if (!(tq->dq_priority & DISPATCH_PRIORITY_FLAG_INHERIT)) {
-			if (p < tqp) p = tqp;
-		}
-		tq = tq->do_targetq;
-		tqp = tq->dq_priority & DISPATCH_PRIORITY_REQUESTED_MASK;
-	}
-
-	if (unlikely(!tqp)) {
-		// pthread root queues opt out of QoS
-		if (wlh_out) *wlh_out = DISPATCH_WLH_ANON;
-		return DISPATCH_PRIORITY_FLAG_MANAGER;
-	}
-	if (wlh_out) *wlh_out = wlh;
-	return _dispatch_priority_inherit_from_root_queue(p, tq);
-}
-
-DISPATCH_NOINLINE
-static dispatch_queue_t
-_dispatch_queue_create_with_target(const char *label, dispatch_queue_attr_t dqa,
-		dispatch_queue_t tq, bool legacy)
-{
-	if (!slowpath(dqa)) {
-		dqa = _dispatch_get_default_queue_attr();
-	} else if (dqa->do_vtable != DISPATCH_VTABLE(queue_attr)) {
-		DISPATCH_CLIENT_CRASH(dqa->do_vtable, "Invalid queue attribute");
-	}
-
-	//
-	// Step 1: Normalize arguments (qos, overcommit, tq)
-	//
-
-	dispatch_qos_t qos = _dispatch_priority_qos(dqa->dqa_qos_and_relpri);
-#if !HAVE_PTHREAD_WORKQUEUE_QOS
-	if (qos == DISPATCH_QOS_USER_INTERACTIVE) {
-		qos = DISPATCH_QOS_USER_INITIATED;
-	}
-	if (qos == DISPATCH_QOS_MAINTENANCE) {
-		qos = DISPATCH_QOS_BACKGROUND;
-	}
-#endif // !HAVE_PTHREAD_WORKQUEUE_QOS
-
-	_dispatch_queue_attr_overcommit_t overcommit = dqa->dqa_overcommit;
-	if (overcommit != _dispatch_queue_attr_overcommit_unspecified && tq) {
-		if (tq->do_targetq) {
-			DISPATCH_CLIENT_CRASH(tq, "Cannot specify both overcommit and "
-					"a non-global target queue");
-		}
-	}
-
-	if (tq && !tq->do_targetq &&
-			tq->do_ref_cnt == DISPATCH_OBJECT_GLOBAL_REFCNT) {
-		// Handle discrepancies between attr and target queue, attributes win
-		if (overcommit == _dispatch_queue_attr_overcommit_unspecified) {
-			if (tq->dq_priority & DISPATCH_PRIORITY_FLAG_OVERCOMMIT) {
-				overcommit = _dispatch_queue_attr_overcommit_enabled;
-			} else {
-				overcommit = _dispatch_queue_attr_overcommit_disabled;
-			}
-		}
-		if (qos == DISPATCH_QOS_UNSPECIFIED) {
-			dispatch_qos_t tq_qos = _dispatch_priority_qos(tq->dq_priority);
-			tq = _dispatch_get_root_queue(tq_qos,
-					overcommit == _dispatch_queue_attr_overcommit_enabled);
-		} else {
-			tq = NULL;
-		}
-	} else if (tq && !tq->do_targetq) {
-		// target is a pthread or runloop root queue, setting QoS or overcommit
-		// is disallowed
-		if (overcommit != _dispatch_queue_attr_overcommit_unspecified) {
-			DISPATCH_CLIENT_CRASH(tq, "Cannot specify an overcommit attribute "
-					"and use this kind of target queue");
-		}
-		if (qos != DISPATCH_QOS_UNSPECIFIED) {
-			DISPATCH_CLIENT_CRASH(tq, "Cannot specify a QoS attribute "
-					"and use this kind of target queue");
-		}
-	} else {
-		if (overcommit == _dispatch_queue_attr_overcommit_unspecified) {
-			 // Serial queues default to overcommit!
-			overcommit = dqa->dqa_concurrent ?
-					_dispatch_queue_attr_overcommit_disabled :
-					_dispatch_queue_attr_overcommit_enabled;
-		}
-	}
-	if (!tq) {
-		tq = _dispatch_get_root_queue(
-				qos == DISPATCH_QOS_UNSPECIFIED ? DISPATCH_QOS_DEFAULT : qos,
-				overcommit == _dispatch_queue_attr_overcommit_enabled);
-		if (slowpath(!tq)) {
-			DISPATCH_CLIENT_CRASH(qos, "Invalid queue attribute");
-		}
-	}
-
-	//
-	// Step 2: Initialize the queue
-	//
-
-	if (legacy) {
-		// if any of these attributes is specified, use non legacy classes
-		if (dqa->dqa_inactive || dqa->dqa_autorelease_frequency) {
-			legacy = false;
-		}
-	}
-
-	const void *vtable;
-	dispatch_queue_flags_t dqf = 0;
-	if (legacy) {
-		vtable = DISPATCH_VTABLE(queue);
-	} else if (dqa->dqa_concurrent) {
-		vtable = DISPATCH_VTABLE(queue_concurrent);
-	} else {
-		vtable = DISPATCH_VTABLE(queue_serial);
-	}
-	switch (dqa->dqa_autorelease_frequency) {
-	case DISPATCH_AUTORELEASE_FREQUENCY_NEVER:
-		dqf |= DQF_AUTORELEASE_NEVER;
-		break;
-	case DISPATCH_AUTORELEASE_FREQUENCY_WORK_ITEM:
-		dqf |= DQF_AUTORELEASE_ALWAYS;
-		break;
-	}
-	if (legacy) {
-		dqf |= DQF_LEGACY;
-	}
-	if (label) {
-		const char *tmp = _dispatch_strdup_if_mutable(label);
-		if (tmp != label) {
-			dqf |= DQF_LABEL_NEEDS_FREE;
-			label = tmp;
-		}
-	}
-
-	dispatch_queue_t dq = _dispatch_object_alloc(vtable,
-			sizeof(struct dispatch_queue_s) - DISPATCH_QUEUE_CACHELINE_PAD);
-	_dispatch_queue_init(dq, dqf, dqa->dqa_concurrent ?
-			DISPATCH_QUEUE_WIDTH_MAX : 1, DISPATCH_QUEUE_ROLE_INNER |
-			(dqa->dqa_inactive ? DISPATCH_QUEUE_INACTIVE : 0));
-
-	dq->dq_label = label;
-#if HAVE_PTHREAD_WORKQUEUE_QOS
-	dq->dq_priority = dqa->dqa_qos_and_relpri;
-	if (overcommit == _dispatch_queue_attr_overcommit_enabled) {
-		dq->dq_priority |= DISPATCH_PRIORITY_FLAG_OVERCOMMIT;
-	}
-#endif
-	_dispatch_retain(tq);
-	if (qos == QOS_CLASS_UNSPECIFIED) {
-		// legacy way of inherithing the QoS from the target
-		_dispatch_queue_priority_inherit_from_target(dq, tq);
-	}
-	if (!dqa->dqa_inactive) {
-		_dispatch_queue_inherit_wlh_from_target(dq, tq);
-	}
-	dq->do_targetq = tq;
-	_dispatch_object_debug(dq, "%s", __func__);
-	return _dispatch_introspection_queue_create(dq);
-}
-
-dispatch_queue_t
-dispatch_queue_create_with_target(const char *label, dispatch_queue_attr_t dqa,
-		dispatch_queue_t tq)
-{
-	return _dispatch_queue_create_with_target(label, dqa, tq, false);
-}
-
-dispatch_queue_t
-dispatch_queue_create(const char *label, dispatch_queue_attr_t attr)
-{
-	return _dispatch_queue_create_with_target(label, attr,
-			DISPATCH_TARGET_QUEUE_DEFAULT, true);
-}
-
-dispatch_queue_t
-dispatch_queue_create_with_accounting_override_voucher(const char *label,
-		dispatch_queue_attr_t attr, voucher_t voucher)
-{
-	(void)label; (void)attr; (void)voucher;
-	DISPATCH_CLIENT_CRASH(0, "Unsupported interface");
-}
-
-void
-_dispatch_queue_destroy(dispatch_queue_t dq, bool *allow_free)
-{
-	uint64_t dq_state = os_atomic_load2o(dq, dq_state, relaxed);
-	uint64_t initial_state = DISPATCH_QUEUE_STATE_INIT_VALUE(dq->dq_width);
-
-	if (dx_hastypeflag(dq, QUEUE_ROOT)) {
-		initial_state = DISPATCH_ROOT_QUEUE_STATE_INIT_VALUE;
-	}
-	dq_state &= ~DISPATCH_QUEUE_MAX_QOS_MASK;
-	dq_state &= ~DISPATCH_QUEUE_DIRTY;
-	dq_state &= ~DISPATCH_QUEUE_ROLE_MASK;
-	if (slowpath(dq_state != initial_state)) {
-		if (_dq_state_drain_locked(dq_state)) {
-			DISPATCH_CLIENT_CRASH((uintptr_t)dq_state,
-					"Release of a locked queue");
-		}
-#ifndef __LP64__
-		dq_state >>= 32;
-#endif
-		DISPATCH_CLIENT_CRASH((uintptr_t)dq_state,
-				"Release of a queue with corrupt state");
-	}
-	if (slowpath(dq->dq_items_tail)) {
-		DISPATCH_CLIENT_CRASH(dq->dq_items_tail,
-				"Release of a queue while items are enqueued");
-	}
-
-	// trash the queue so that use after free will crash
-	dq->dq_items_head = (void *)0x200;
-	dq->dq_items_tail = (void *)0x200;
-
-	dispatch_queue_t dqsq = os_atomic_xchg2o(dq, dq_specific_q,
-			(void *)0x200, relaxed);
-	if (dqsq) {
-		_dispatch_release(dqsq);
-	}
-
-	// fastpath for queues that never got their storage retained
-	if (likely(os_atomic_load2o(dq, dq_sref_cnt, relaxed) == 0)) {
-		// poison the state with something that is suspended and is easy to spot
-		dq->dq_state = 0xdead000000000000;
-		return;
-	}
-
-	// Take over freeing the memory from _dispatch_object_dealloc()
-	//
-	// As soon as we call _dispatch_queue_release_storage(), we forfeit
-	// the possibility for the caller of dx_dispose() to finalize the object
-	// so that responsibility is ours.
-	_dispatch_object_finalize(dq);
-	*allow_free = false;
-	dq->dq_label = "<released queue, pending free>";
-	dq->do_targetq = NULL;
-	dq->do_finalizer = NULL;
-	dq->do_ctxt = NULL;
-	return _dispatch_queue_release_storage(dq);
-}
-
-// 6618342 Contact the team that owns the Instrument DTrace probe before
-//         renaming this symbol
-void
-_dispatch_queue_dispose(dispatch_queue_t dq, bool *allow_free)
-{
-	_dispatch_object_debug(dq, "%s", __func__);
-	_dispatch_introspection_queue_dispose(dq);
-	if (dq->dq_label && _dispatch_queue_label_needs_free(dq)) {
-		free((void*)dq->dq_label);
-	}
-	_dispatch_queue_destroy(dq, allow_free);
-}
-
-void
-_dispatch_queue_xref_dispose(dispatch_queue_t dq)
-{
-	uint64_t dq_state = os_atomic_load2o(dq, dq_state, relaxed);
-	if (unlikely(_dq_state_is_suspended(dq_state))) {
-		long state = (long)dq_state;
-		if (sizeof(long) < sizeof(uint64_t)) state = (long)(dq_state >> 32);
-		if (unlikely(_dq_state_is_inactive(dq_state))) {
-			// Arguments for and against this assert are within 6705399
-			DISPATCH_CLIENT_CRASH(state, "Release of an inactive object");
-		}
-		DISPATCH_CLIENT_CRASH(dq_state, "Release of a suspended object");
-	}
-	os_atomic_or2o(dq, dq_atomic_flags, DQF_RELEASED, relaxed);
-}
-
-DISPATCH_NOINLINE
-static void
-_dispatch_queue_suspend_slow(dispatch_queue_t dq)
-{
-	uint64_t dq_state, value, delta;
-
-	_dispatch_queue_sidelock_lock(dq);
-
-	// what we want to transfer (remove from dq_state)
-	delta  = DISPATCH_QUEUE_SUSPEND_HALF * DISPATCH_QUEUE_SUSPEND_INTERVAL;
-	// but this is a suspend so add a suspend count at the same time
-	delta -= DISPATCH_QUEUE_SUSPEND_INTERVAL;
-	if (dq->dq_side_suspend_cnt == 0) {
-		// we substract delta from dq_state, and we want to set this bit
-		delta -= DISPATCH_QUEUE_HAS_SIDE_SUSPEND_CNT;
-	}
-
-	os_atomic_rmw_loop2o(dq, dq_state, dq_state, value, relaxed, {
-		// unsigned underflow of the substraction can happen because other
-		// threads could have touched this value while we were trying to acquire
-		// the lock, or because another thread raced us to do the same operation
-		// and got to the lock first.
-		if (unlikely(os_sub_overflow(dq_state, delta, &value))) {
-			os_atomic_rmw_loop_give_up(goto retry);
-		}
-	});
-	if (unlikely(os_add_overflow(dq->dq_side_suspend_cnt,
-			DISPATCH_QUEUE_SUSPEND_HALF, &dq->dq_side_suspend_cnt))) {
-		DISPATCH_CLIENT_CRASH(0, "Too many nested calls to dispatch_suspend()");
-	}
-	return _dispatch_queue_sidelock_unlock(dq);
-
-retry:
-	_dispatch_queue_sidelock_unlock(dq);
-	return dx_vtable(dq)->do_suspend(dq);
-}
-
-void
-_dispatch_queue_suspend(dispatch_queue_t dq)
-{
-	dispatch_assert(dq->do_ref_cnt != DISPATCH_OBJECT_GLOBAL_REFCNT);
-
-	uint64_t dq_state, value;
-
-	os_atomic_rmw_loop2o(dq, dq_state, dq_state, value, relaxed, {
-		value = DISPATCH_QUEUE_SUSPEND_INTERVAL;
-		if (unlikely(os_add_overflow(dq_state, value, &value))) {
-			os_atomic_rmw_loop_give_up({
-				return _dispatch_queue_suspend_slow(dq);
-			});
-		}
-		if (!_dq_state_drain_locked(dq_state)) {
-			value |= DLOCK_OWNER_MASK;
-		}
-	});
-
-	if (!_dq_state_is_suspended(dq_state)) {
-		// rdar://8181908 we need to extend the queue life for the duration
-		// of the call to wakeup at _dispatch_queue_resume() time.
-		_dispatch_retain_2(dq);
-	}
-}
-
-DISPATCH_NOINLINE
-static void
-_dispatch_queue_resume_slow(dispatch_queue_t dq)
-{
-	uint64_t dq_state, value, delta;
-
-	_dispatch_queue_sidelock_lock(dq);
-
-	// what we want to transfer
-	delta  = DISPATCH_QUEUE_SUSPEND_HALF * DISPATCH_QUEUE_SUSPEND_INTERVAL;
-	// but this is a resume so consume a suspend count at the same time
-	delta -= DISPATCH_QUEUE_SUSPEND_INTERVAL;
-	switch (dq->dq_side_suspend_cnt) {
-	case 0:
-		goto retry;
-	case DISPATCH_QUEUE_SUSPEND_HALF:
-		// we will transition the side count to 0, so we want to clear this bit
-		delta -= DISPATCH_QUEUE_HAS_SIDE_SUSPEND_CNT;
-		break;
-	}
-	os_atomic_rmw_loop2o(dq, dq_state, dq_state, value, relaxed, {
-		// unsigned overflow of the addition can happen because other
-		// threads could have touched this value while we were trying to acquire
-		// the lock, or because another thread raced us to do the same operation
-		// and got to the lock first.
-		if (unlikely(os_add_overflow(dq_state, delta, &value))) {
-			os_atomic_rmw_loop_give_up(goto retry);
-		}
-	});
-	dq->dq_side_suspend_cnt -= DISPATCH_QUEUE_SUSPEND_HALF;
-	return _dispatch_queue_sidelock_unlock(dq);
-
-retry:
-	_dispatch_queue_sidelock_unlock(dq);
-	return dx_vtable(dq)->do_resume(dq, false);
-}
-
-DISPATCH_NOINLINE
-static void
-_dispatch_queue_resume_finalize_activation(dispatch_queue_t dq)
-{
-	bool allow_resume = true;
-	// Step 2: run the activation finalizer
-	if (dx_vtable(dq)->do_finalize_activation) {
-		dx_vtable(dq)->do_finalize_activation(dq, &allow_resume);
-	}
-	// Step 3: consume the suspend count
-	if (allow_resume) {
-		return dx_vtable(dq)->do_resume(dq, false);
-	}
-}
-
-void
-_dispatch_queue_resume(dispatch_queue_t dq, bool activate)
-{
-	// covers all suspend and inactive bits, including side suspend bit
-	const uint64_t suspend_bits = DISPATCH_QUEUE_SUSPEND_BITS_MASK;
-	uint64_t pending_barrier_width =
-			(dq->dq_width - 1) * DISPATCH_QUEUE_WIDTH_INTERVAL;
-	uint64_t set_owner_and_set_full_width_and_in_barrier =
-			_dispatch_lock_value_for_self() | DISPATCH_QUEUE_WIDTH_FULL_BIT |
-			DISPATCH_QUEUE_IN_BARRIER;
-
-	// backward compatibility: only dispatch sources can abuse
-	// dispatch_resume() to really mean dispatch_activate()
-	bool is_source = (dx_metatype(dq) == _DISPATCH_SOURCE_TYPE);
-	uint64_t dq_state, value;
-
-	dispatch_assert(dq->do_ref_cnt != DISPATCH_OBJECT_GLOBAL_REFCNT);
-
-	// Activation is a bit tricky as it needs to finalize before the wakeup.
-	//
-	// If after doing its updates to the suspend count and/or inactive bit,
-	// the last suspension related bit that would remain is the
-	// NEEDS_ACTIVATION one, then this function:
-	//
-	// 1. moves the state to { sc:1 i:0 na:0 } (converts the needs-activate into
-	//    a suspend count)
-	// 2. runs the activation finalizer
-	// 3. consumes the suspend count set in (1), and finishes the resume flow
-	//
-	// Concurrently, some property setters such as setting dispatch source
-	// handlers or _dispatch_queue_set_target_queue try to do in-place changes
-	// before activation. These protect their action by taking a suspend count.
-	// Step (1) above cannot happen if such a setter has locked the object.
-	if (activate) {
-		// relaxed atomic because this doesn't publish anything, this is only
-		// about picking the thread that gets to finalize the activation
-		os_atomic_rmw_loop2o(dq, dq_state, dq_state, value, relaxed, {
-			if ((dq_state & suspend_bits) ==
-					DISPATCH_QUEUE_NEEDS_ACTIVATION + DISPATCH_QUEUE_INACTIVE) {
-				// { sc:0 i:1 na:1 } -> { sc:1 i:0 na:0 }
-				value = dq_state - DISPATCH_QUEUE_INACTIVE
-						- DISPATCH_QUEUE_NEEDS_ACTIVATION
-						+ DISPATCH_QUEUE_SUSPEND_INTERVAL;
-			} else if (_dq_state_is_inactive(dq_state)) {
-				// { sc:>0 i:1 na:1 } -> { i:0 na:1 }
-				// simple activation because sc is not 0
-				// resume will deal with na:1 later
-				value = dq_state - DISPATCH_QUEUE_INACTIVE;
-			} else {
-				// object already active, this is a no-op, just exit
-				os_atomic_rmw_loop_give_up(return);
-			}
-		});
-	} else {
-		// release barrier needed to publish the effect of
-		// - dispatch_set_target_queue()
-		// - dispatch_set_*_handler()
-		// - do_finalize_activation()
-		os_atomic_rmw_loop2o(dq, dq_state, dq_state, value, release, {
-			if ((dq_state & suspend_bits) == DISPATCH_QUEUE_SUSPEND_INTERVAL
-					+ DISPATCH_QUEUE_NEEDS_ACTIVATION) {
-				// { sc:1 i:0 na:1 } -> { sc:1 i:0 na:0 }
-				value = dq_state - DISPATCH_QUEUE_NEEDS_ACTIVATION;
-			} else if (is_source && (dq_state & suspend_bits) ==
-					DISPATCH_QUEUE_NEEDS_ACTIVATION + DISPATCH_QUEUE_INACTIVE) {
-				// { sc:0 i:1 na:1 } -> { sc:1 i:0 na:0 }
-				value = dq_state - DISPATCH_QUEUE_INACTIVE
-						- DISPATCH_QUEUE_NEEDS_ACTIVATION
-						+ DISPATCH_QUEUE_SUSPEND_INTERVAL;
-			} else if (unlikely(os_sub_overflow(dq_state,
-					DISPATCH_QUEUE_SUSPEND_INTERVAL, &value))) {
-				// underflow means over-resume or a suspend count transfer
-				// to the side count is needed
-				os_atomic_rmw_loop_give_up({
-					if (!(dq_state & DISPATCH_QUEUE_HAS_SIDE_SUSPEND_CNT)) {
-						goto over_resume;
-					}
-					return _dispatch_queue_resume_slow(dq);
-				});
-		//
-		// below this, value = dq_state - DISPATCH_QUEUE_SUSPEND_INTERVAL
-		//
-			} else if (!_dq_state_is_runnable(value)) {
-				// Out of width or still suspended.
-				// For the former, force _dispatch_queue_non_barrier_complete
-				// to reconsider whether it has work to do
-				value |= DISPATCH_QUEUE_DIRTY;
-			} else if (!_dq_state_drain_locked_by(value, DLOCK_OWNER_MASK)) {
-				dispatch_assert(_dq_state_drain_locked(value));
-				// still locked by someone else, make drain_try_unlock() fail
-				// and reconsider whether it has work to do
-				value |= DISPATCH_QUEUE_DIRTY;
-			} else if (!is_source && (_dq_state_has_pending_barrier(value) ||
-					value + pending_barrier_width <
-					DISPATCH_QUEUE_WIDTH_FULL_BIT)) {
-				// if we can, acquire the full width drain lock
-				// and then perform a lock transfer
-				//
-				// However this is never useful for a source where there are no
-				// sync waiters, so never take the lock and do a plain wakeup
-				value &= DISPATCH_QUEUE_DRAIN_PRESERVED_BITS_MASK;
-				value |= set_owner_and_set_full_width_and_in_barrier;
-			} else {
-				// clear overrides and force a wakeup
-				value &= ~DISPATCH_QUEUE_DRAIN_UNLOCK_MASK;
-				value &= ~DISPATCH_QUEUE_MAX_QOS_MASK;
-			}
-		});
-	}
-
-	if ((dq_state ^ value) & DISPATCH_QUEUE_NEEDS_ACTIVATION) {
-		// we cleared the NEEDS_ACTIVATION bit and we have a valid suspend count
-		return _dispatch_queue_resume_finalize_activation(dq);
-	}
-
-	if (activate) {
-		// if we're still in an activate codepath here we should have
-		// { sc:>0 na:1 }, if not we've got a corrupt state
-		if (unlikely(!_dq_state_is_suspended(value))) {
-			DISPATCH_CLIENT_CRASH(dq, "Invalid suspension state");
-		}
-		return;
-	}
-
-	if (_dq_state_is_suspended(value)) {
-		return;
-	}
-
-	if (_dq_state_is_dirty(dq_state)) {
-		// <rdar://problem/14637483>
-		// dependency ordering for dq state changes that were flushed
-		// and not acted upon
-		os_atomic_thread_fence(dependency);
-		dq = os_atomic_force_dependency_on(dq, dq_state);
-	}
-	// Balancing the retain_2 done in suspend() for rdar://8181908
-	dispatch_wakeup_flags_t flags = DISPATCH_WAKEUP_CONSUME_2;
-	if ((dq_state ^ value) & DISPATCH_QUEUE_IN_BARRIER) {
-		flags |= DISPATCH_WAKEUP_BARRIER_COMPLETE;
-	} else if (!_dq_state_is_runnable(value)) {
-		if (_dq_state_is_base_wlh(dq_state)) {
-			_dispatch_event_loop_assert_not_owned((dispatch_wlh_t)dq);
-		}
-		return _dispatch_release_2(dq);
-	}
-	dispatch_assert(!_dq_state_received_sync_wait(dq_state));
-	dispatch_assert(!_dq_state_in_sync_transfer(dq_state));
-	return dx_wakeup(dq, _dq_state_max_qos(dq_state), flags);
-
-over_resume:
-	if (unlikely(_dq_state_is_inactive(dq_state))) {
-		DISPATCH_CLIENT_CRASH(dq, "Over-resume of an inactive object");
-	}
-	DISPATCH_CLIENT_CRASH(dq, "Over-resume of an object");
-}
-
-const char *
-dispatch_queue_get_label(dispatch_queue_t dq)
-{
-	if (slowpath(dq == DISPATCH_CURRENT_QUEUE_LABEL)) {
-		dq = _dispatch_get_current_queue();
-	}
-	return dq->dq_label ? dq->dq_label : "";
-}
-
-qos_class_t
-dispatch_queue_get_qos_class(dispatch_queue_t dq, int *relpri_ptr)
-{
-	dispatch_qos_class_t qos = _dispatch_priority_qos(dq->dq_priority);
-	if (relpri_ptr) {
-		*relpri_ptr = qos ? _dispatch_priority_relpri(dq->dq_priority) : 0;
-	}
-	return _dispatch_qos_to_qos_class(qos);
-}
-
-static void
-_dispatch_queue_set_width2(void *ctxt)
-{
-	int w = (int)(intptr_t)ctxt; // intentional truncation
-	uint32_t tmp;
-	dispatch_queue_t dq = _dispatch_queue_get_current();
-
-	if (w >= 0) {
-		tmp = w ? (unsigned int)w : 1;
-	} else {
-		dispatch_qos_t qos = _dispatch_qos_from_pp(_dispatch_get_priority());
-		switch (w) {
-		case DISPATCH_QUEUE_WIDTH_MAX_PHYSICAL_CPUS:
-			tmp = _dispatch_qos_max_parallelism(qos,
-					DISPATCH_MAX_PARALLELISM_PHYSICAL);
-			break;
-		case DISPATCH_QUEUE_WIDTH_ACTIVE_CPUS:
-			tmp = _dispatch_qos_max_parallelism(qos,
-					DISPATCH_MAX_PARALLELISM_ACTIVE);
-			break;
-		case DISPATCH_QUEUE_WIDTH_MAX_LOGICAL_CPUS:
-		default:
-			tmp = _dispatch_qos_max_parallelism(qos, 0);
-			break;
-		}
-	}
-	if (tmp > DISPATCH_QUEUE_WIDTH_MAX) {
-		tmp = DISPATCH_QUEUE_WIDTH_MAX;
-	}
-
-	dispatch_queue_flags_t old_dqf, new_dqf;
-	os_atomic_rmw_loop2o(dq, dq_atomic_flags, old_dqf, new_dqf, relaxed, {
-		new_dqf = (old_dqf & DQF_FLAGS_MASK) | DQF_WIDTH(tmp);
-	});
-	_dispatch_queue_inherit_wlh_from_target(dq, dq->do_targetq);
-	_dispatch_object_debug(dq, "%s", __func__);
-}
-
-void
-dispatch_queue_set_width(dispatch_queue_t dq, long width)
-{
-	if (unlikely(dq->do_ref_cnt == DISPATCH_OBJECT_GLOBAL_REFCNT ||
-			dx_hastypeflag(dq, QUEUE_ROOT) ||
-			dx_hastypeflag(dq, QUEUE_BASE))) {
-		return;
-	}
-
-	unsigned long type = dx_type(dq);
-	switch (type) {
-	case DISPATCH_QUEUE_LEGACY_TYPE:
-	case DISPATCH_QUEUE_CONCURRENT_TYPE:
-		break;
-	case DISPATCH_QUEUE_SERIAL_TYPE:
-		DISPATCH_CLIENT_CRASH(type, "Cannot set width of a serial queue");
-	default:
-		DISPATCH_CLIENT_CRASH(type, "Unexpected dispatch object type");
-	}
-
-	if (likely((int)width >= 0)) {
-		_dispatch_barrier_trysync_or_async_f(dq, (void*)(intptr_t)width,
-				_dispatch_queue_set_width2);
-	} else {
-		// The negative width constants need to execute on the queue to
-		// query the queue QoS
-		_dispatch_barrier_async_detached_f(dq, (void*)(intptr_t)width,
-				_dispatch_queue_set_width2);
-	}
-}
-
-static void
-_dispatch_queue_legacy_set_target_queue(void *ctxt)
-{
-	dispatch_queue_t dq = _dispatch_queue_get_current();
-	dispatch_queue_t tq = ctxt;
-	dispatch_queue_t otq = dq->do_targetq;
-
-	if (_dispatch_queue_atomic_flags(dq) & DQF_TARGETED) {
-#if DISPATCH_ALLOW_NON_LEAF_RETARGET
-		_dispatch_ktrace3(DISPATCH_PERF_non_leaf_retarget, dq, otq, tq);
-		_dispatch_bug_deprecated("Changing the target of a queue "
-				"already targeted by other dispatch objects");
-#else
-		DISPATCH_CLIENT_CRASH(0, "Cannot change the target of a queue "
-				"already targeted by other dispatch objects");
-#endif
-	}
-
-	_dispatch_queue_priority_inherit_from_target(dq, tq);
-	_dispatch_queue_inherit_wlh_from_target(dq, tq);
-#if HAVE_PTHREAD_WORKQUEUE_QOS
-	// see _dispatch_queue_class_wakeup()
-	_dispatch_queue_sidelock_lock(dq);
-#endif
-	dq->do_targetq = tq;
-#if HAVE_PTHREAD_WORKQUEUE_QOS
-	// see _dispatch_queue_class_wakeup()
-	_dispatch_queue_sidelock_unlock(dq);
-#endif
-
-	_dispatch_object_debug(dq, "%s", __func__);
-	_dispatch_introspection_target_queue_changed(dq);
-	_dispatch_release_tailcall(otq);
-}
-
-void
-_dispatch_queue_set_target_queue(dispatch_queue_t dq, dispatch_queue_t tq)
-{
-	dispatch_assert(dq->do_ref_cnt != DISPATCH_OBJECT_GLOBAL_REFCNT &&
-			dq->do_targetq);
-
-	if (unlikely(!tq)) {
-		bool is_concurrent_q = (dq->dq_width > 1);
-		tq = _dispatch_get_root_queue(DISPATCH_QOS_DEFAULT, !is_concurrent_q);
-	}
-
-	if (_dispatch_queue_try_inactive_suspend(dq)) {
-		_dispatch_object_set_target_queue_inline(dq, tq);
-		return dx_vtable(dq)->do_resume(dq, false);
-	}
-
-#if !DISPATCH_ALLOW_NON_LEAF_RETARGET
-	if (_dispatch_queue_atomic_flags(dq) & DQF_TARGETED) {
-		DISPATCH_CLIENT_CRASH(0, "Cannot change the target of a queue "
-				"already targeted by other dispatch objects");
-	}
-#endif
-
-	if (unlikely(!_dispatch_queue_is_legacy(dq))) {
-#if DISPATCH_ALLOW_NON_LEAF_RETARGET
-		if (_dispatch_queue_atomic_flags(dq) & DQF_TARGETED) {
-			DISPATCH_CLIENT_CRASH(0, "Cannot change the target of a queue "
-					"already targeted by other dispatch objects");
-		}
-#endif
-		DISPATCH_CLIENT_CRASH(0, "Cannot change the target of this object "
-				"after it has been activated");
-	}
-
-	unsigned long type = dx_type(dq);
-	switch (type) {
-	case DISPATCH_QUEUE_LEGACY_TYPE:
-#if DISPATCH_ALLOW_NON_LEAF_RETARGET
-		if (_dispatch_queue_atomic_flags(dq) & DQF_TARGETED) {
-			_dispatch_bug_deprecated("Changing the target of a queue "
-					"already targeted by other dispatch objects");
-		}
-#endif
-		break;
-	case DISPATCH_SOURCE_KEVENT_TYPE:
-	case DISPATCH_MACH_CHANNEL_TYPE:
-		_dispatch_ktrace1(DISPATCH_PERF_post_activate_retarget, dq);
-		_dispatch_bug_deprecated("Changing the target of a source "
-				"after it has been activated");
-		break;
-	default:
-		DISPATCH_CLIENT_CRASH(type, "Unexpected dispatch object type");
-	}
-
-	_dispatch_retain(tq);
-	return _dispatch_barrier_trysync_or_async_f(dq, tq,
-			_dispatch_queue_legacy_set_target_queue);
-}
-
-#pragma mark -
-#pragma mark dispatch_mgr_queue
-
-#if DISPATCH_USE_MGR_THREAD && DISPATCH_ENABLE_PTHREAD_ROOT_QUEUES
-static struct dispatch_pthread_root_queue_context_s
-		_dispatch_mgr_root_queue_pthread_context;
-static struct dispatch_root_queue_context_s
-		_dispatch_mgr_root_queue_context = {{{
-#if DISPATCH_USE_WORKQUEUES
-	.dgq_kworkqueue = (void*)(~0ul),
-#endif
-	.dgq_ctxt = &_dispatch_mgr_root_queue_pthread_context,
-	.dgq_thread_pool_size = 1,
-}}};
-
-static struct dispatch_queue_s _dispatch_mgr_root_queue = {
-	DISPATCH_GLOBAL_OBJECT_HEADER(queue_root),
-	.dq_state = DISPATCH_ROOT_QUEUE_STATE_INIT_VALUE,
-	.do_ctxt = &_dispatch_mgr_root_queue_context,
-	.dq_label = "com.apple.root.libdispatch-manager",
-	.dq_atomic_flags = DQF_WIDTH(DISPATCH_QUEUE_WIDTH_POOL),
-	.dq_priority = DISPATCH_PRIORITY_FLAG_MANAGER |
-			DISPATCH_PRIORITY_SATURATED_OVERRIDE,
-	.dq_serialnum = 3,
-};
-#endif // DISPATCH_USE_MGR_THREAD && DISPATCH_ENABLE_PTHREAD_ROOT_QUEUES
-
-#if DISPATCH_ENABLE_PTHREAD_ROOT_QUEUES || DISPATCH_USE_KEVENT_WORKQUEUE
-static struct {
-	volatile int prio;
-	volatile qos_class_t qos;
-	int default_prio;
-	int policy;
-#if defined(_WIN32)
-	HANDLE hThread;
-#else
-	pthread_t tid;
-#endif
-} _dispatch_mgr_sched;
-
-static dispatch_once_t _dispatch_mgr_sched_pred;
-
-#if HAVE_PTHREAD_WORKQUEUE_QOS
-// TODO: switch to "event-reflector thread" property <rdar://problem/18126138>
-// Must be kept in sync with list of qos classes in sys/qos.h
-static const int _dispatch_mgr_sched_qos2prio[] = {
-	[QOS_CLASS_MAINTENANCE] = 4,
-	[QOS_CLASS_BACKGROUND] = 4,
-	[QOS_CLASS_UTILITY] = 20,
-	[QOS_CLASS_DEFAULT] = 31,
-	[QOS_CLASS_USER_INITIATED] = 37,
-	[QOS_CLASS_USER_INTERACTIVE] = 47,
-};
-#endif // HAVE_PTHREAD_WORKQUEUE_QOS
-
-#if defined(_WIN32)
-static void
-_dispatch_mgr_sched_init(void *ctx DISPATCH_UNUSED)
-{
-	_dispatch_mgr_sched.policy = 0;
-	_dispatch_mgr_sched.default_prio = THREAD_PRIORITY_NORMAL;
-	_dispatch_mgr_sched.prio = _dispatch_mgr_sched.default_prio;
-}
-#else
-static void
-_dispatch_mgr_sched_init(void *ctxt DISPATCH_UNUSED)
-{
-	struct sched_param param;
-#if DISPATCH_USE_MGR_THREAD && DISPATCH_ENABLE_PTHREAD_ROOT_QUEUES
-	pthread_attr_t *attr;
-	attr = &_dispatch_mgr_root_queue_pthread_context.dpq_thread_attr;
-#else
-	pthread_attr_t a, *attr = &a;
-#endif
-	(void)dispatch_assume_zero(pthread_attr_init(attr));
-	(void)dispatch_assume_zero(pthread_attr_getschedpolicy(attr,
-			&_dispatch_mgr_sched.policy));
-	(void)dispatch_assume_zero(pthread_attr_getschedparam(attr, &param));
-#if HAVE_PTHREAD_WORKQUEUE_QOS
-	qos_class_t qos = qos_class_main();
-	if (qos == QOS_CLASS_DEFAULT) {
-		qos = QOS_CLASS_USER_INITIATED; // rdar://problem/17279292
-	}
-	if (qos) {
-		_dispatch_mgr_sched.qos = qos;
-		param.sched_priority = _dispatch_mgr_sched_qos2prio[qos];
-	}
-#endif
-	_dispatch_mgr_sched.default_prio = param.sched_priority;
-	_dispatch_mgr_sched.prio = _dispatch_mgr_sched.default_prio;
-}
-#endif /* defined(_WIN32) */
-#endif // DISPATCH_ENABLE_PTHREAD_ROOT_QUEUES || DISPATCH_USE_KEVENT_WORKQUEUE
-
-#if DISPATCH_USE_MGR_THREAD && DISPATCH_ENABLE_PTHREAD_ROOT_QUEUES
-#if defined(_WIN32)
-DISPATCH_NOINLINE
-static PHANDLE
-_dispatch_mgr_root_queue_init(void)
-{
-	dispatch_once_f(&_dispatch_mgr_sched_pred, NULL, _dispatch_mgr_sched_init);
-	return &_dispatch_mgr_sched.hThread;
-}
-#else
-DISPATCH_NOINLINE
-static pthread_t *
-_dispatch_mgr_root_queue_init(void)
-{
-	dispatch_once_f(&_dispatch_mgr_sched_pred, NULL, _dispatch_mgr_sched_init);
-	struct sched_param param;
-	pthread_attr_t *attr;
-	attr = &_dispatch_mgr_root_queue_pthread_context.dpq_thread_attr;
-	(void)dispatch_assume_zero(pthread_attr_setdetachstate(attr,
-			PTHREAD_CREATE_DETACHED));
-#if !DISPATCH_DEBUG
-	(void)dispatch_assume_zero(pthread_attr_setstacksize(attr, 64 * 1024));
-#endif
-#if HAVE_PTHREAD_WORKQUEUE_QOS
-	qos_class_t qos = _dispatch_mgr_sched.qos;
-	if (qos) {
-		if (_dispatch_set_qos_class_enabled) {
-			(void)dispatch_assume_zero(pthread_attr_set_qos_class_np(attr,
-					qos, 0));
-		}
-	}
-#endif
-	param.sched_priority = _dispatch_mgr_sched.prio;
-	if (param.sched_priority > _dispatch_mgr_sched.default_prio) {
-		(void)dispatch_assume_zero(pthread_attr_setschedparam(attr, &param));
-	}
-	return &_dispatch_mgr_sched.tid;
-}
-#endif
-
-static inline void
-_dispatch_mgr_priority_apply(void)
-{
-#if defined(_WIN32)
-	int nPriority = _dispatch_mgr_sched.prio;
-	do {
-		if (nPriority > _dispatch_mgr_sched.default_prio) {
-			// TODO(compnerd) set thread scheduling policy
-			dispatch_assume_zero(SetThreadPriority(_dispatch_mgr_sched.hThread, nPriority));
-			nPriority = GetThreadPriority(_dispatch_mgr_sched.hThread);
-		}
-	} while (_dispatch_mgr_sched.prio > nPriority);
-#else
-	struct sched_param param;
-	do {
-		param.sched_priority = _dispatch_mgr_sched.prio;
-		if (param.sched_priority > _dispatch_mgr_sched.default_prio) {
-			(void)dispatch_assume_zero(pthread_setschedparam(
-					_dispatch_mgr_sched.tid, _dispatch_mgr_sched.policy,
-					&param));
-		}
-	} while (_dispatch_mgr_sched.prio > param.sched_priority);
-#endif
-}
-
-DISPATCH_NOINLINE
-void
-_dispatch_mgr_priority_init(void)
-{
-#if defined(_WIN32)
-	int nPriority = GetThreadPriority(_dispatch_mgr_sched.hThread);
-	if (slowpath(_dispatch_mgr_sched.prio > nPriority)) {
-		return _dispatch_mgr_priority_apply();
-	}
-#else
-	struct sched_param param;
-	pthread_attr_t *attr;
-	attr = &_dispatch_mgr_root_queue_pthread_context.dpq_thread_attr;
-	(void)dispatch_assume_zero(pthread_attr_getschedparam(attr, &param));
-#if HAVE_PTHREAD_WORKQUEUE_QOS
-	qos_class_t qos = 0;
-	(void)pthread_attr_get_qos_class_np(attr, &qos, NULL);
-	if (_dispatch_mgr_sched.qos > qos && _dispatch_set_qos_class_enabled) {
-		(void)pthread_set_qos_class_self_np(_dispatch_mgr_sched.qos, 0);
-		int p = _dispatch_mgr_sched_qos2prio[_dispatch_mgr_sched.qos];
-		if (p > param.sched_priority) {
-			param.sched_priority = p;
-		}
-	}
-#endif
-	if (slowpath(_dispatch_mgr_sched.prio > param.sched_priority)) {
-		return _dispatch_mgr_priority_apply();
-	}
-#endif
-}
-#endif // DISPATCH_USE_MGR_THREAD && DISPATCH_ENABLE_PTHREAD_ROOT_QUEUES
-
-#if !defined(_WIN32)
-#if DISPATCH_ENABLE_PTHREAD_ROOT_QUEUES
-DISPATCH_NOINLINE
-static void
-_dispatch_mgr_priority_raise(const pthread_attr_t *attr)
-{
-	dispatch_once_f(&_dispatch_mgr_sched_pred, NULL, _dispatch_mgr_sched_init);
-	struct sched_param param;
-	(void)dispatch_assume_zero(pthread_attr_getschedparam(attr, &param));
-#if HAVE_PTHREAD_WORKQUEUE_QOS
-	qos_class_t q, qos = 0;
-	(void)pthread_attr_get_qos_class_np((pthread_attr_t *)attr, &qos, NULL);
-	if (qos) {
-		param.sched_priority = _dispatch_mgr_sched_qos2prio[qos];
-		os_atomic_rmw_loop2o(&_dispatch_mgr_sched, qos, q, qos, relaxed, {
-			if (q >= qos) os_atomic_rmw_loop_give_up(break);
-		});
-	}
-#endif
-	int p, prio = param.sched_priority;
-	os_atomic_rmw_loop2o(&_dispatch_mgr_sched, prio, p, prio, relaxed, {
-		if (p >= prio) os_atomic_rmw_loop_give_up(return);
-	});
-#if DISPATCH_USE_KEVENT_WORKQUEUE
-	_dispatch_root_queues_init();
-	if (_dispatch_kevent_workqueue_enabled) {
-		pthread_priority_t pp = 0;
-		if (prio > _dispatch_mgr_sched.default_prio) {
-			// The values of _PTHREAD_PRIORITY_SCHED_PRI_FLAG and
-			// _PTHREAD_PRIORITY_ROOTQUEUE_FLAG overlap, but that is not
-			// problematic in this case, since it the second one is only ever
-			// used on dq_priority fields.
-			// We never pass the _PTHREAD_PRIORITY_ROOTQUEUE_FLAG to a syscall,
-			// it is meaningful to libdispatch only.
-			pp = (pthread_priority_t)prio | _PTHREAD_PRIORITY_SCHED_PRI_FLAG;
-		} else if (qos) {
-			pp = _pthread_qos_class_encode(qos, 0, 0);
-		}
-		if (pp) {
-			int r = _pthread_workqueue_set_event_manager_priority(pp);
-			(void)dispatch_assume_zero(r);
-		}
-		return;
-	}
-#endif
-#if DISPATCH_USE_MGR_THREAD
-	if (_dispatch_mgr_sched.tid) {
-		return _dispatch_mgr_priority_apply();
-	}
-#endif
-}
-#endif // DISPATCH_ENABLE_PTHREAD_ROOT_QUEUES
-#endif
-
-#if DISPATCH_USE_KEVENT_WORKQUEUE
-void
-_dispatch_kevent_workqueue_init(void)
-{
-	// Initialize kevent workqueue support
-	_dispatch_root_queues_init();
-	if (!_dispatch_kevent_workqueue_enabled) return;
-	dispatch_once_f(&_dispatch_mgr_sched_pred, NULL, _dispatch_mgr_sched_init);
-	qos_class_t qos = _dispatch_mgr_sched.qos;
-	int prio = _dispatch_mgr_sched.prio;
-	pthread_priority_t pp = 0;
-	if (qos) {
-		pp = _pthread_qos_class_encode(qos, 0, 0);
-	}
-	if (prio > _dispatch_mgr_sched.default_prio) {
-		pp = (pthread_priority_t)prio | _PTHREAD_PRIORITY_SCHED_PRI_FLAG;
-	}
-	if (pp) {
-		int r = _pthread_workqueue_set_event_manager_priority(pp);
-		(void)dispatch_assume_zero(r);
-	}
-}
-#endif // DISPATCH_USE_KEVENT_WORKQUEUE
-
-#pragma mark -
-#pragma mark dispatch_pthread_root_queue
-
-#if DISPATCH_ENABLE_PTHREAD_ROOT_QUEUES
-static dispatch_queue_t
-_dispatch_pthread_root_queue_create(const char *label, unsigned long flags,
-		const pthread_attr_t *attr, dispatch_block_t configure,
-		dispatch_pthread_root_queue_observer_hooks_t observer_hooks)
-{
-	dispatch_queue_t dq;
-	dispatch_root_queue_context_t qc;
-	dispatch_pthread_root_queue_context_t pqc;
-	dispatch_queue_flags_t dqf = 0;
-	size_t dqs;
-	int32_t pool_size = flags & _DISPATCH_PTHREAD_ROOT_QUEUE_FLAG_POOL_SIZE ?
-			(int8_t)(flags & ~_DISPATCH_PTHREAD_ROOT_QUEUE_FLAG_POOL_SIZE) : 0;
-
-	dqs = sizeof(struct dispatch_queue_s) - DISPATCH_QUEUE_CACHELINE_PAD;
-	dqs = roundup(dqs, _Alignof(struct dispatch_root_queue_context_s));
-	dq = _dispatch_object_alloc(DISPATCH_VTABLE(queue_root), dqs +
-			sizeof(struct dispatch_root_queue_context_s) +
-			sizeof(struct dispatch_pthread_root_queue_context_s));
-	qc = (void*)dq + dqs;
-	dispatch_assert((uintptr_t)qc % _Alignof(__typeof__(*qc)) == 0);
-	pqc = (void*)qc + sizeof(struct dispatch_root_queue_context_s);
-	dispatch_assert((uintptr_t)pqc % _Alignof(__typeof__(*pqc)) == 0);
-	if (label) {
-		const char *tmp = _dispatch_strdup_if_mutable(label);
-		if (tmp != label) {
-			dqf |= DQF_LABEL_NEEDS_FREE;
-			label = tmp;
-		}
-	}
-
-	_dispatch_queue_init(dq, dqf, DISPATCH_QUEUE_WIDTH_POOL, 0);
-	dq->dq_label = label;
-	dq->dq_state = DISPATCH_ROOT_QUEUE_STATE_INIT_VALUE;
-	dq->do_ctxt = qc;
-	dq->dq_priority = DISPATCH_PRIORITY_SATURATED_OVERRIDE;
-
-	pqc->dpq_thread_mediator.do_vtable = DISPATCH_VTABLE(semaphore);
-	qc->dgq_ctxt = pqc;
-#if DISPATCH_USE_WORKQUEUES
-	qc->dgq_kworkqueue = (void*)(~0ul);
-#endif
-	_dispatch_root_queue_init_pthread_pool(qc, pool_size, true);
-
-#if defined(_WIN32)
-	dispatch_assert(attr == NULL);
-#else
-	if (attr) {
-		memcpy(&pqc->dpq_thread_attr, attr, sizeof(pthread_attr_t));
-		_dispatch_mgr_priority_raise(&pqc->dpq_thread_attr);
-	} else {
-		(void)dispatch_assume_zero(pthread_attr_init(&pqc->dpq_thread_attr));
-	}
-	(void)dispatch_assume_zero(pthread_attr_setdetachstate(
-			&pqc->dpq_thread_attr, PTHREAD_CREATE_DETACHED));
-#endif
-	if (configure) {
-		pqc->dpq_thread_configure = _dispatch_Block_copy(configure);
-	}
-	if (observer_hooks) {
-		pqc->dpq_observer_hooks = *observer_hooks;
-	}
-	_dispatch_object_debug(dq, "%s", __func__);
-	return _dispatch_introspection_queue_create(dq);
-}
-
-dispatch_queue_t
-dispatch_pthread_root_queue_create(const char *label, unsigned long flags,
-		const pthread_attr_t *attr, dispatch_block_t configure)
-{
-#if defined(_WIN32)
-	dispatch_assert(attr == NULL);
-#endif
-	return _dispatch_pthread_root_queue_create(label, flags, attr, configure,
-			NULL);
-}
-
-#if DISPATCH_IOHID_SPI
-dispatch_queue_t
-_dispatch_pthread_root_queue_create_with_observer_hooks_4IOHID(const char *label,
-		unsigned long flags, const pthread_attr_t *attr,
-		dispatch_pthread_root_queue_observer_hooks_t observer_hooks,
-		dispatch_block_t configure)
-{
-	if (!observer_hooks->queue_will_execute ||
-			!observer_hooks->queue_did_execute) {
-		DISPATCH_CLIENT_CRASH(0, "Invalid pthread root queue observer hooks");
-	}
-	return _dispatch_pthread_root_queue_create(label, flags, attr, configure,
-			observer_hooks);
-}
-#endif
-
-dispatch_queue_t
-dispatch_pthread_root_queue_copy_current(void)
-{
-	dispatch_queue_t dq = _dispatch_queue_get_current();
-	if (!dq) return NULL;
-	while (unlikely(dq->do_targetq)) {
-		dq = dq->do_targetq;
-	}
-	if (dx_type(dq) != DISPATCH_QUEUE_GLOBAL_ROOT_TYPE ||
-			dq->do_xref_cnt == DISPATCH_OBJECT_GLOBAL_REFCNT) {
-		return NULL;
-	}
-	return (dispatch_queue_t)_os_object_retain_with_resurrect(dq->_as_os_obj);
-}
-
-#endif // DISPATCH_ENABLE_PTHREAD_ROOT_QUEUES
-
-void
-_dispatch_pthread_root_queue_dispose(dispatch_queue_t dq, bool *allow_free)
-{
-	if (slowpath(dq->do_ref_cnt == DISPATCH_OBJECT_GLOBAL_REFCNT)) {
-		DISPATCH_INTERNAL_CRASH(dq, "Global root queue disposed");
-	}
-	_dispatch_object_debug(dq, "%s", __func__);
-	_dispatch_introspection_queue_dispose(dq);
-#if DISPATCH_USE_PTHREAD_POOL
-	dispatch_root_queue_context_t qc = dq->do_ctxt;
-	dispatch_pthread_root_queue_context_t pqc = qc->dgq_ctxt;
-
-#if !defined(_WIN32)
-	pthread_attr_destroy(&pqc->dpq_thread_attr);
-#endif
-	_dispatch_semaphore_dispose(&pqc->dpq_thread_mediator, NULL);
-	if (pqc->dpq_thread_configure) {
-		Block_release(pqc->dpq_thread_configure);
-	}
-	dq->do_targetq = _dispatch_get_root_queue(DISPATCH_QOS_DEFAULT, false);
-#endif
-	if (dq->dq_label && _dispatch_queue_label_needs_free(dq)) {
-		free((void*)dq->dq_label);
-	}
-	_dispatch_queue_destroy(dq, allow_free);
-}
-
-#pragma mark -
-#pragma mark dispatch_queue_specific
-
-struct dispatch_queue_specific_queue_s {
-	DISPATCH_QUEUE_HEADER(queue_specific_queue);
-	TAILQ_HEAD(dispatch_queue_specific_head_s,
-			dispatch_queue_specific_s) dqsq_contexts;
-} DISPATCH_ATOMIC64_ALIGN;
-
-struct dispatch_queue_specific_s {
-	const void *dqs_key;
-	void *dqs_ctxt;
-	dispatch_function_t dqs_destructor;
-	TAILQ_ENTRY(dispatch_queue_specific_s) dqs_list;
-};
-DISPATCH_DECL(dispatch_queue_specific);
-
-void
-_dispatch_queue_specific_queue_dispose(dispatch_queue_specific_queue_t dqsq,
-		bool *allow_free)
-{
-	dispatch_queue_specific_t dqs, tmp;
-	dispatch_queue_t rq = _dispatch_get_root_queue(DISPATCH_QOS_DEFAULT, false);
-
-	TAILQ_FOREACH_SAFE(dqs, &dqsq->dqsq_contexts, dqs_list, tmp) {
-		if (dqs->dqs_destructor) {
-			dispatch_async_f(rq, dqs->dqs_ctxt, dqs->dqs_destructor);
-		}
-		free(dqs);
-	}
-	_dispatch_queue_destroy(dqsq->_as_dq, allow_free);
-}
-
-static void
-_dispatch_queue_init_specific(dispatch_queue_t dq)
-{
-	dispatch_queue_specific_queue_t dqsq;
-
-	dqsq = _dispatch_object_alloc(DISPATCH_VTABLE(queue_specific_queue),
-			sizeof(struct dispatch_queue_specific_queue_s));
-	_dispatch_queue_init(dqsq->_as_dq, DQF_NONE, DISPATCH_QUEUE_WIDTH_MAX,
-			DISPATCH_QUEUE_ROLE_BASE_ANON);
-	dqsq->do_xref_cnt = -1;
-	dqsq->do_targetq = _dispatch_get_root_queue(
-			DISPATCH_QOS_USER_INITIATED, true);
-	dqsq->dq_label = "queue-specific";
-	TAILQ_INIT(&dqsq->dqsq_contexts);
-	if (slowpath(!os_atomic_cmpxchg2o(dq, dq_specific_q, NULL,
-			dqsq->_as_dq, release))) {
-		_dispatch_release(dqsq->_as_dq);
-	}
-}
-
-static void
-_dispatch_queue_set_specific(void *ctxt)
-{
-	dispatch_queue_specific_t dqs, dqsn = ctxt;
-	dispatch_queue_specific_queue_t dqsq =
-			(dispatch_queue_specific_queue_t)_dispatch_queue_get_current();
-
-	TAILQ_FOREACH(dqs, &dqsq->dqsq_contexts, dqs_list) {
-		if (dqs->dqs_key == dqsn->dqs_key) {
-			// Destroy previous context for existing key
-			if (dqs->dqs_destructor) {
-				dispatch_async_f(_dispatch_get_root_queue(
-						DISPATCH_QOS_DEFAULT, false), dqs->dqs_ctxt,
-						dqs->dqs_destructor);
-			}
-			if (dqsn->dqs_ctxt) {
-				// Copy new context for existing key
-				dqs->dqs_ctxt = dqsn->dqs_ctxt;
-				dqs->dqs_destructor = dqsn->dqs_destructor;
-			} else {
-				// Remove context storage for existing key
-				TAILQ_REMOVE(&dqsq->dqsq_contexts, dqs, dqs_list);
-				free(dqs);
-			}
-			return free(dqsn);
-		}
-	}
-	// Insert context storage for new key
-	TAILQ_INSERT_TAIL(&dqsq->dqsq_contexts, dqsn, dqs_list);
-}
-
-DISPATCH_NOINLINE
-void
-dispatch_queue_set_specific(dispatch_queue_t dq, const void *key,
-	void *ctxt, dispatch_function_t destructor)
-{
-	if (slowpath(!key)) {
-		return;
-	}
-	dispatch_queue_specific_t dqs;
-
-	dqs = _dispatch_calloc(1, sizeof(struct dispatch_queue_specific_s));
-	dqs->dqs_key = key;
-	dqs->dqs_ctxt = ctxt;
-	dqs->dqs_destructor = destructor;
-	if (slowpath(!dq->dq_specific_q)) {
-		_dispatch_queue_init_specific(dq);
-	}
-	_dispatch_barrier_trysync_or_async_f(dq->dq_specific_q, dqs,
-			_dispatch_queue_set_specific);
-}
-
-static void
-_dispatch_queue_get_specific(void *ctxt)
-{
-	void **ctxtp = ctxt;
-	void *key = *ctxtp;
-	dispatch_queue_specific_queue_t dqsq =
-			(dispatch_queue_specific_queue_t)_dispatch_queue_get_current();
-	dispatch_queue_specific_t dqs;
-
-	TAILQ_FOREACH(dqs, &dqsq->dqsq_contexts, dqs_list) {
-		if (dqs->dqs_key == key) {
-			*ctxtp = dqs->dqs_ctxt;
-			return;
-		}
-	}
-	*ctxtp = NULL;
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline void *
-_dispatch_queue_get_specific_inline(dispatch_queue_t dq, const void *key)
-{
-	void *ctxt = NULL;
-	if (fastpath(dx_metatype(dq) == _DISPATCH_QUEUE_TYPE && dq->dq_specific_q)){
-		ctxt = (void *)key;
-		dispatch_sync_f(dq->dq_specific_q, &ctxt, _dispatch_queue_get_specific);
-	}
-	return ctxt;
-}
-
-DISPATCH_NOINLINE
-void *
-dispatch_queue_get_specific(dispatch_queue_t dq, const void *key)
-{
-	if (slowpath(!key)) {
-		return NULL;
-	}
-	return _dispatch_queue_get_specific_inline(dq, key);
-}
-
-DISPATCH_NOINLINE
-void *
-dispatch_get_specific(const void *key)
-{
-	if (slowpath(!key)) {
-		return NULL;
-	}
-	void *ctxt = NULL;
-	dispatch_queue_t dq = _dispatch_queue_get_current();
-
-	while (slowpath(dq)) {
-		ctxt = _dispatch_queue_get_specific_inline(dq, key);
-		if (ctxt) break;
-		dq = dq->do_targetq;
-	}
-	return ctxt;
-}
-
-#if DISPATCH_IOHID_SPI
-bool
-_dispatch_queue_is_exclusively_owned_by_current_thread_4IOHID(
-		dispatch_queue_t dq) // rdar://problem/18033810
-{
-	if (dq->dq_width != 1) {
-		DISPATCH_CLIENT_CRASH(dq->dq_width, "Invalid queue type");
-	}
-	uint64_t dq_state = os_atomic_load2o(dq, dq_state, relaxed);
-	return _dq_state_drain_locked_by_self(dq_state);
-}
-#endif
-
-#pragma mark -
-#pragma mark dispatch_queue_debug
-
-size_t
-_dispatch_queue_debug_attr(dispatch_queue_t dq, char* buf, size_t bufsiz)
-{
-	size_t offset = 0;
-	dispatch_queue_t target = dq->do_targetq;
-	const char *tlabel = target && target->dq_label ? target->dq_label : "";
-	uint64_t dq_state = os_atomic_load2o(dq, dq_state, relaxed);
-
-	offset += dsnprintf(&buf[offset], bufsiz - offset, "sref = %d, "
-			"target = %s[%p], width = 0x%x, state = 0x%016llx",
-			dq->dq_sref_cnt + 1, tlabel, target, dq->dq_width,
-			(unsigned long long)dq_state);
-	if (_dq_state_is_suspended(dq_state)) {
-		offset += dsnprintf(&buf[offset], bufsiz - offset, ", suspended = %d",
-			_dq_state_suspend_cnt(dq_state));
-	}
-	if (_dq_state_is_inactive(dq_state)) {
-		offset += dsnprintf(&buf[offset], bufsiz - offset, ", inactive");
-	} else if (_dq_state_needs_activation(dq_state)) {
-		offset += dsnprintf(&buf[offset], bufsiz - offset, ", needs-activation");
-	}
-	if (_dq_state_is_enqueued(dq_state)) {
-		offset += dsnprintf(&buf[offset], bufsiz - offset, ", enqueued");
-	}
-	if (_dq_state_is_dirty(dq_state)) {
-		offset += dsnprintf(&buf[offset], bufsiz - offset, ", dirty");
-	}
-	dispatch_qos_t qos = _dq_state_max_qos(dq_state);
-	if (qos) {
-		offset += dsnprintf(&buf[offset], bufsiz - offset, ", max qos %d", qos);
-	}
-	mach_port_t owner = _dq_state_drain_owner(dq_state);
-	if (!_dispatch_queue_is_thread_bound(dq) && owner) {
-		offset += dsnprintf(&buf[offset], bufsiz - offset, ", draining on 0x%x",
-				owner);
-	}
-	if (_dq_state_is_in_barrier(dq_state)) {
-		offset += dsnprintf(&buf[offset], bufsiz - offset, ", in-barrier");
-	} else  {
-		offset += dsnprintf(&buf[offset], bufsiz - offset, ", in-flight = %d",
-				_dq_state_used_width(dq_state, dq->dq_width));
-	}
-	if (_dq_state_has_pending_barrier(dq_state)) {
-		offset += dsnprintf(&buf[offset], bufsiz - offset, ", pending-barrier");
-	}
-	if (_dispatch_queue_is_thread_bound(dq)) {
-		offset += dsnprintf(&buf[offset], bufsiz - offset, ", thread = 0x%x ",
-				owner);
-	}
-	return offset;
-}
-
-size_t
-dispatch_queue_debug(dispatch_queue_t dq, char* buf, size_t bufsiz)
-{
-	size_t offset = 0;
-	offset += dsnprintf(&buf[offset], bufsiz - offset, "%s[%p] = { ",
-			dq->dq_label ? dq->dq_label : dx_kind(dq), dq);
-	offset += _dispatch_object_debug_attr(dq, &buf[offset], bufsiz - offset);
-	offset += _dispatch_queue_debug_attr(dq, &buf[offset], bufsiz - offset);
-	offset += dsnprintf(&buf[offset], bufsiz - offset, "}");
-	return offset;
-}
-
-#if DISPATCH_DEBUG
-void
-dispatch_debug_queue(dispatch_queue_t dq, const char* str) {
-	if (fastpath(dq)) {
-		_dispatch_object_debug(dq, "%s", str);
-	} else {
-		_dispatch_log("queue[NULL]: %s", str);
-	}
-}
-#endif
-
-#if DISPATCH_PERF_MON
-
-#define DISPATCH_PERF_MON_BUCKETS 8
-
-static struct {
-	uint64_t volatile time_total;
-	uint64_t volatile count_total;
-	uint64_t volatile thread_total;
-} _dispatch_stats[DISPATCH_PERF_MON_BUCKETS];
-DISPATCH_USED static size_t _dispatch_stat_buckets = DISPATCH_PERF_MON_BUCKETS;
-
-void
-_dispatch_queue_merge_stats(uint64_t start, bool trace, perfmon_thread_type type)
-{
-	uint64_t delta = _dispatch_absolute_time() - start;
-	unsigned long count;
-	int bucket = 0;
-	count = (unsigned long)_dispatch_thread_getspecific(dispatch_bcounter_key);
-	_dispatch_thread_setspecific(dispatch_bcounter_key, NULL);
-	if (count == 0) {
-		bucket = 0;
-		if (trace) _dispatch_ktrace1(DISPATCH_PERF_MON_worker_useless, type);
-	} else {
-		bucket = MIN(DISPATCH_PERF_MON_BUCKETS - 1,
-					 (int)sizeof(count) * CHAR_BIT - __builtin_clzl(count));
-		os_atomic_add(&_dispatch_stats[bucket].count_total, count, relaxed);
-	}
-	os_atomic_add(&_dispatch_stats[bucket].time_total, delta, relaxed);
-	os_atomic_inc(&_dispatch_stats[bucket].thread_total, relaxed);
-	if (trace) {
-		_dispatch_ktrace3(DISPATCH_PERF_MON_worker_thread_end, count, delta, type);
-	}
-}
-
-#endif
-
 #pragma mark -
 #pragma mark _dispatch_set_priority_and_mach_voucher
 #if HAVE_PTHREAD_WORKQUEUE_QOS
@@ -2855,45 +199,42 @@
 #pragma mark -
 #pragma mark dispatch_continuation_t
 
+static void _dispatch_async_redirect_invoke(dispatch_continuation_t dc,
+		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags);
+#if HAVE_PTHREAD_WORKQUEUE_QOS
+static void _dispatch_queue_override_invoke(dispatch_continuation_t dc,
+		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags);
+static void _dispatch_workloop_stealer_invoke(dispatch_continuation_t dc,
+		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags);
+#endif // HAVE_PTHREAD_WORKQUEUE_QOS
+
 const struct dispatch_continuation_vtable_s _dispatch_continuation_vtables[] = {
 	DC_VTABLE_ENTRY(ASYNC_REDIRECT,
-		.do_kind = "dc-redirect",
 		.do_invoke = _dispatch_async_redirect_invoke),
 #if HAVE_MACH
 	DC_VTABLE_ENTRY(MACH_SEND_BARRRIER_DRAIN,
-		.do_kind = "dc-mach-send-drain",
 		.do_invoke = _dispatch_mach_send_barrier_drain_invoke),
 	DC_VTABLE_ENTRY(MACH_SEND_BARRIER,
-		.do_kind = "dc-mach-send-barrier",
 		.do_invoke = _dispatch_mach_barrier_invoke),
 	DC_VTABLE_ENTRY(MACH_RECV_BARRIER,
-		.do_kind = "dc-mach-recv-barrier",
 		.do_invoke = _dispatch_mach_barrier_invoke),
 	DC_VTABLE_ENTRY(MACH_ASYNC_REPLY,
-		.do_kind = "dc-mach-async-reply",
 		.do_invoke = _dispatch_mach_msg_async_reply_invoke),
 #endif
 #if HAVE_PTHREAD_WORKQUEUE_QOS
+	DC_VTABLE_ENTRY(WORKLOOP_STEALING,
+		.do_invoke = _dispatch_workloop_stealer_invoke),
 	DC_VTABLE_ENTRY(OVERRIDE_STEALING,
-		.do_kind = "dc-override-stealing",
 		.do_invoke = _dispatch_queue_override_invoke),
 	DC_VTABLE_ENTRY(OVERRIDE_OWNING,
-		.do_kind = "dc-override-owning",
 		.do_invoke = _dispatch_queue_override_invoke),
 #endif
+#if HAVE_MACH
+	DC_VTABLE_ENTRY(MACH_IPC_HANDOFF,
+		.do_invoke = _dispatch_mach_ipc_handoff_invoke),
+#endif
 };
 
-static void
-_dispatch_force_cache_cleanup(void)
-{
-	dispatch_continuation_t dc;
-	dc = _dispatch_thread_getspecific(dispatch_cache_key);
-	if (dc) {
-		_dispatch_thread_setspecific(dispatch_cache_key, NULL);
-		_dispatch_cache_cleanup(dc);
-	}
-}
-
 DISPATCH_NOINLINE
 static void DISPATCH_TSD_DTOR_CC
 _dispatch_cache_cleanup(void *value)
@@ -2906,6 +247,17 @@
 	}
 }
 
+static void
+_dispatch_force_cache_cleanup(void)
+{
+	dispatch_continuation_t dc;
+	dc = _dispatch_thread_getspecific(dispatch_cache_key);
+	if (dc) {
+		_dispatch_thread_setspecific(dispatch_cache_key, NULL);
+		_dispatch_cache_cleanup(dc);
+	}
+}
+
 #if DISPATCH_USE_MEMORYPRESSURE_SOURCE
 DISPATCH_NOINLINE
 void
@@ -2928,29 +280,11 @@
 #endif
 
 DISPATCH_NOINLINE
-static void
-_dispatch_continuation_push(dispatch_queue_t dq, dispatch_continuation_t dc)
-{
-	dx_push(dq, dc, _dispatch_continuation_override_qos(dq, dc));
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline void
-_dispatch_continuation_async2(dispatch_queue_t dq, dispatch_continuation_t dc,
-		bool barrier)
-{
-	if (fastpath(barrier || !DISPATCH_QUEUE_USES_REDIRECTION(dq->dq_width))) {
-		return _dispatch_continuation_push(dq, dc);
-	}
-	return _dispatch_async_f2(dq, dc);
-}
-
-DISPATCH_NOINLINE
 void
-_dispatch_continuation_async(dispatch_queue_t dq, dispatch_continuation_t dc)
+_dispatch_continuation_pop(dispatch_object_t dou, dispatch_invoke_context_t dic,
+		dispatch_invoke_flags_t flags, dispatch_queue_class_t dqu)
 {
-	_dispatch_continuation_async2(dq, dc,
-			dc->dc_flags & DISPATCH_OBJ_BARRIER_BIT);
+	_dispatch_continuation_pop_inline(dou, dic, flags, dqu._dq);
 }
 
 #pragma mark -
@@ -2969,12 +303,12 @@
 static inline dispatch_block_flags_t
 _dispatch_block_normalize_flags(dispatch_block_flags_t flags)
 {
-	if (flags & (DISPATCH_BLOCK_NO_VOUCHER|DISPATCH_BLOCK_DETACHED)) {
-		flags |= DISPATCH_BLOCK_HAS_VOUCHER;
-	}
 	if (flags & (DISPATCH_BLOCK_NO_QOS_CLASS|DISPATCH_BLOCK_DETACHED)) {
 		flags |= DISPATCH_BLOCK_HAS_PRIORITY;
 	}
+	if (flags & DISPATCH_BLOCK_ENFORCE_QOS_CLASS) {
+		flags &= ~(dispatch_block_flags_t)DISPATCH_BLOCK_INHERIT_QOS_CLASS;
+	}
 	return flags;
 }
 
@@ -2982,14 +316,25 @@
 _dispatch_block_create_with_voucher_and_priority(dispatch_block_flags_t flags,
 		voucher_t voucher, pthread_priority_t pri, dispatch_block_t block)
 {
+	dispatch_block_flags_t unmodified_flags = flags;
+	pthread_priority_t unmodified_pri = pri;
+
 	flags = _dispatch_block_normalize_flags(flags);
 	bool assign = (flags & DISPATCH_BLOCK_ASSIGN_CURRENT);
 
-	if (assign && !(flags & DISPATCH_BLOCK_HAS_VOUCHER)) {
+	if (!(flags & DISPATCH_BLOCK_HAS_VOUCHER)) {
+		if (flags & DISPATCH_BLOCK_DETACHED) {
+			voucher = VOUCHER_NULL;
+			flags |= DISPATCH_BLOCK_HAS_VOUCHER;
+		} else if (flags & DISPATCH_BLOCK_NO_VOUCHER) {
+			voucher = DISPATCH_NO_VOUCHER;
+			flags |= DISPATCH_BLOCK_HAS_VOUCHER;
+		} else if (assign) {
 #if OS_VOUCHER_ACTIVITY_SPI
-		voucher = VOUCHER_CURRENT;
+			voucher = VOUCHER_CURRENT;
 #endif
-		flags |= DISPATCH_BLOCK_HAS_VOUCHER;
+			flags |= DISPATCH_BLOCK_HAS_VOUCHER;
+		}
 	}
 #if OS_VOUCHER_ACTIVITY_SPI
 	if (voucher == VOUCHER_CURRENT) {
@@ -3001,9 +346,16 @@
 		flags |= DISPATCH_BLOCK_HAS_PRIORITY;
 	}
 	dispatch_block_t db = _dispatch_block_create(flags, voucher, pri, block);
+
 #if DISPATCH_DEBUG
 	dispatch_assert(_dispatch_block_get_data(db));
 #endif
+
+	_dispatch_trace_block_create_with_voucher_and_priority(db,
+			_dispatch_Block_invoke(block), unmodified_flags,
+			((unmodified_flags & DISPATCH_BLOCK_HAS_PRIORITY) ? unmodified_pri :
+					(unsigned long)UINT32_MAX),
+			_dispatch_get_priority(), pri);
 	return db;
 }
 
@@ -3039,6 +391,7 @@
 {
 	if (!_dispatch_block_flags_valid(flags)) return DISPATCH_BAD_INPUT;
 	flags |= DISPATCH_BLOCK_HAS_VOUCHER;
+	flags &= ~DISPATCH_BLOCK_NO_VOUCHER;
 	return _dispatch_block_create_with_voucher_and_priority(flags, voucher, 0,
 			block);
 }
@@ -3053,6 +406,7 @@
 		return DISPATCH_BAD_INPUT;
 	}
 	flags |= (DISPATCH_BLOCK_HAS_VOUCHER|DISPATCH_BLOCK_HAS_PRIORITY);
+	flags &= ~(DISPATCH_BLOCK_NO_VOUCHER|DISPATCH_BLOCK_NO_QOS_CLASS);
 	pthread_priority_t pri = 0;
 #if HAVE_PTHREAD_WORKQUEUE_QOS
 	pri = _pthread_qos_class_encode(qos_class, relative_priority, 0);
@@ -3069,20 +423,25 @@
 				"dispatch_block_perform()");
 	}
 	flags = _dispatch_block_normalize_flags(flags);
+
+	voucher_t voucher = DISPATCH_NO_VOUCHER;
+	if (flags & DISPATCH_BLOCK_DETACHED) {
+		voucher = VOUCHER_NULL;
+		flags |= DISPATCH_BLOCK_HAS_VOUCHER;
+	}
+
 	struct dispatch_block_private_data_s dbpds =
-			DISPATCH_BLOCK_PRIVATE_DATA_PERFORM_INITIALIZER(flags, block);
+		DISPATCH_BLOCK_PRIVATE_DATA_PERFORM_INITIALIZER(flags, block, voucher);
 	return _dispatch_block_invoke_direct(&dbpds);
 }
 
-#define _dbpd_group(dbpd) ((dbpd)->dbpd_group)
-
 void
 _dispatch_block_invoke_direct(const struct dispatch_block_private_data_s *dbcpd)
 {
 	dispatch_block_private_data_t dbpd = (dispatch_block_private_data_t)dbcpd;
 	dispatch_block_flags_t flags = dbpd->dbpd_flags;
 	unsigned int atomic_flags = dbpd->dbpd_atomic_flags;
-	if (slowpath(atomic_flags & DBF_WAITED)) {
+	if (unlikely(atomic_flags & DBF_WAITED)) {
 		DISPATCH_CLIENT_CRASH(atomic_flags, "A block object may not be both "
 				"run more than once and waited for");
 	}
@@ -3105,7 +464,7 @@
 out:
 	if ((atomic_flags & DBF_PERFORM) == 0) {
 		if (os_atomic_inc2o(dbpd, dbpd_performed, relaxed) == 1) {
-			dispatch_group_leave(_dbpd_group(dbpd));
+			dispatch_group_leave(dbpd->dbpd_group);
 		}
 	}
 }
@@ -3132,63 +491,19 @@
 out:
 	if ((atomic_flags & DBF_PERFORM) == 0) {
 		if (os_atomic_inc2o(dbpd, dbpd_performed, relaxed) == 1) {
-			dispatch_group_leave(_dbpd_group(dbpd));
+			dispatch_group_leave(dbpd->dbpd_group);
 		}
 	}
 
-	os_mpsc_queue_t oq;
-	oq = os_atomic_xchg2o(dbpd, dbpd_queue, NULL, relaxed);
-	if (oq) {
+	dispatch_queue_t boost_dq;
+	boost_dq = os_atomic_xchg2o(dbpd, dbpd_queue, NULL, relaxed);
+	if (boost_dq) {
 		// balances dispatch_{,barrier_,}sync
-		_os_object_release_internal_n(oq->_as_os_obj, 2);
+		_dispatch_release_2(boost_dq);
 	}
 }
 
-#if DISPATCH_USE_KEVENT_WORKQUEUE
-static void
-_dispatch_block_async_invoke_reset_max_qos(dispatch_queue_t dq,
-		dispatch_qos_t qos)
-{
-	uint64_t old_state, new_state, qos_bits = _dq_state_from_qos(qos);
-
-	// Only dispatch queues can reach this point (as opposed to sources or more
-	// complex objects) which allows us to handle the DIRTY bit protocol by only
-	// looking at the tail
-	dispatch_assert(dx_metatype(dq) == _DISPATCH_QUEUE_TYPE);
-
-again:
-	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, relaxed, {
-		dispatch_assert(_dq_state_is_base_wlh(old_state));
-		if ((old_state & DISPATCH_QUEUE_MAX_QOS_MASK) <= qos_bits) {
-			// Nothing to do if the QoS isn't going down
-			os_atomic_rmw_loop_give_up(return);
-		}
-		if (_dq_state_is_dirty(old_state)) {
-			os_atomic_rmw_loop_give_up({
-				// just renew the drain lock with an acquire barrier, to see
-				// what the enqueuer that set DIRTY has done.
-				// the xor generates better assembly as DISPATCH_QUEUE_DIRTY
-				// is already in a register
-				os_atomic_xor2o(dq, dq_state, DISPATCH_QUEUE_DIRTY, acquire);
-				if (!dq->dq_items_tail) {
-					goto again;
-				}
-				return;
-			});
-		}
-
-		new_state  = old_state;
-		new_state &= ~DISPATCH_QUEUE_MAX_QOS_MASK;
-		new_state |= qos_bits;
-	});
-
-	_dispatch_deferred_items_get()->ddi_wlh_needs_update = true;
-	_dispatch_event_loop_drain(KEVENT_FLAG_IMMEDIATE);
-}
-#endif // DISPATCH_USE_KEVENT_WORKQUEUE
-
 #define DISPATCH_BLOCK_ASYNC_INVOKE_RELEASE           0x1
-#define DISPATCH_BLOCK_ASYNC_INVOKE_NO_OVERRIDE_RESET 0x2
 
 DISPATCH_NOINLINE
 static void
@@ -3196,36 +511,25 @@
 {
 	dispatch_block_private_data_t dbpd = _dispatch_block_get_data(b);
 	unsigned int atomic_flags = dbpd->dbpd_atomic_flags;
-	if (slowpath(atomic_flags & DBF_WAITED)) {
+	if (unlikely(atomic_flags & DBF_WAITED)) {
 		DISPATCH_CLIENT_CRASH(atomic_flags, "A block object may not be both "
 				"run more than once and waited for");
 	}
 
-#if DISPATCH_USE_KEVENT_WORKQUEUE
-	if (unlikely((dbpd->dbpd_flags &
-			DISPATCH_BLOCK_IF_LAST_RESET_QUEUE_QOS_OVERRIDE) &&
-			!(invoke_flags & DISPATCH_BLOCK_ASYNC_INVOKE_NO_OVERRIDE_RESET))) {
-		dispatch_queue_t dq = _dispatch_get_current_queue();
-		dispatch_qos_t qos = _dispatch_qos_from_pp(_dispatch_get_priority());
-		if ((dispatch_wlh_t)dq == _dispatch_get_wlh() && !dq->dq_items_tail) {
-			_dispatch_block_async_invoke_reset_max_qos(dq, qos);
-		}
-	}
-#endif // DISPATCH_USE_KEVENT_WORKQUEUE
-
-	if (!slowpath(atomic_flags & DBF_CANCELED)) {
+	if (likely(!(atomic_flags & DBF_CANCELED))) {
 		dbpd->dbpd_block();
 	}
 	if ((atomic_flags & DBF_PERFORM) == 0) {
 		if (os_atomic_inc2o(dbpd, dbpd_performed, relaxed) == 1) {
-			dispatch_group_leave(_dbpd_group(dbpd));
+			dispatch_group_leave(dbpd->dbpd_group);
 		}
 	}
 
-	os_mpsc_queue_t oq = os_atomic_xchg2o(dbpd, dbpd_queue, NULL, relaxed);
-	if (oq) {
+	dispatch_queue_t boost_dq;
+	boost_dq = os_atomic_xchg2o(dbpd, dbpd_queue, NULL, relaxed);
+	if (boost_dq) {
 		// balances dispatch_{,barrier_,group_}async
-		_os_object_release_internal_n_inline(oq->_as_os_obj, 2);
+		_dispatch_release_2(boost_dq);
 	}
 
 	if (invoke_flags & DISPATCH_BLOCK_ASYNC_INVOKE_RELEASE) {
@@ -3245,27 +549,12 @@
 	_dispatch_block_async_invoke2(block, DISPATCH_BLOCK_ASYNC_INVOKE_RELEASE);
 }
 
-static void
-_dispatch_block_async_invoke_and_release_mach_barrier(void *block)
-{
-	_dispatch_block_async_invoke2(block, DISPATCH_BLOCK_ASYNC_INVOKE_RELEASE |
-			DISPATCH_BLOCK_ASYNC_INVOKE_NO_OVERRIDE_RESET);
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline bool
-_dispatch_block_supports_wait_and_cancel(dispatch_block_private_data_t dbpd)
-{
-	return dbpd && !(dbpd->dbpd_flags &
-			DISPATCH_BLOCK_IF_LAST_RESET_QUEUE_QOS_OVERRIDE);
-}
-
 void
 dispatch_block_cancel(dispatch_block_t db)
 {
 	dispatch_block_private_data_t dbpd = _dispatch_block_get_data(db);
-	if (unlikely(!_dispatch_block_supports_wait_and_cancel(dbpd))) {
-		DISPATCH_CLIENT_CRASH(db, "Invalid block object passed to "
+	if (unlikely(!dbpd)) {
+		DISPATCH_CLIENT_CRASH(0, "Invalid block object passed to "
 				"dispatch_block_cancel()");
 	}
 	(void)os_atomic_or2o(dbpd, dbpd_atomic_flags, DBF_CANCELED, relaxed);
@@ -3275,8 +564,8 @@
 dispatch_block_testcancel(dispatch_block_t db)
 {
 	dispatch_block_private_data_t dbpd = _dispatch_block_get_data(db);
-	if (unlikely(!_dispatch_block_supports_wait_and_cancel(dbpd))) {
-		DISPATCH_CLIENT_CRASH(db, "Invalid block object passed to "
+	if (unlikely(!dbpd)) {
+		DISPATCH_CLIENT_CRASH(0, "Invalid block object passed to "
 				"dispatch_block_testcancel()");
 	}
 	return (bool)(dbpd->dbpd_atomic_flags & DBF_CANCELED);
@@ -3286,14 +575,14 @@
 dispatch_block_wait(dispatch_block_t db, dispatch_time_t timeout)
 {
 	dispatch_block_private_data_t dbpd = _dispatch_block_get_data(db);
-	if (unlikely(!_dispatch_block_supports_wait_and_cancel(dbpd))) {
-		DISPATCH_CLIENT_CRASH(db, "Invalid block object passed to "
+	if (unlikely(!dbpd)) {
+		DISPATCH_CLIENT_CRASH(0, "Invalid block object passed to "
 				"dispatch_block_wait()");
 	}
 
 	unsigned int flags = os_atomic_or_orig2o(dbpd, dbpd_atomic_flags,
 			DBF_WAITING, relaxed);
-	if (slowpath(flags & (DBF_WAITED | DBF_WAITING))) {
+	if (unlikely(flags & (DBF_WAITED | DBF_WAITING))) {
 		DISPATCH_CLIENT_CRASH(flags, "A block object may not be waited for "
 				"more than once");
 	}
@@ -3304,16 +593,16 @@
 
 	pthread_priority_t pp = _dispatch_get_priority();
 
-	os_mpsc_queue_t boost_oq;
-	boost_oq = os_atomic_xchg2o(dbpd, dbpd_queue, NULL, relaxed);
-	if (boost_oq) {
+	dispatch_queue_t boost_dq;
+	boost_dq = os_atomic_xchg2o(dbpd, dbpd_queue, NULL, relaxed);
+	if (boost_dq) {
 		// release balances dispatch_{,barrier_,group_}async.
 		// Can't put the queue back in the timeout case: the block might
 		// finish after we fell out of group_wait and see our NULL, so
 		// neither of us would ever release. Side effect: After a _wait
 		// that times out, subsequent waits will not boost the qos of the
 		// still-running block.
-		dx_wakeup(boost_oq, _dispatch_qos_from_pp(pp),
+		dx_wakeup(boost_dq, _dispatch_qos_from_pp(pp),
 				DISPATCH_WAKEUP_BLOCK_WAIT | DISPATCH_WAKEUP_CONSUME_2);
 	}
 
@@ -3323,12 +612,12 @@
 	}
 
 	int performed = os_atomic_load2o(dbpd, dbpd_performed, relaxed);
-	if (slowpath(performed > 1 || (boost_th && boost_oq))) {
+	if (unlikely(performed > 1 || (boost_th && boost_dq))) {
 		DISPATCH_CLIENT_CRASH(performed, "A block object may not be both "
 				"run more than once and waited for");
 	}
 
-	intptr_t ret = dispatch_group_wait(_dbpd_group(dbpd), timeout);
+	long ret = dispatch_group_wait(dbpd->dbpd_group, timeout);
 
 	if (boost_th) {
 		_dispatch_thread_override_end(boost_th, dbpd);
@@ -3336,11 +625,9 @@
 
 	if (ret) {
 		// timed out: reverse our changes
-		(void)os_atomic_and2o(dbpd, dbpd_atomic_flags,
-				~DBF_WAITING, relaxed);
+		os_atomic_and2o(dbpd, dbpd_atomic_flags, ~DBF_WAITING, relaxed);
 	} else {
-		(void)os_atomic_or2o(dbpd, dbpd_atomic_flags,
-				DBF_WAITED, relaxed);
+		os_atomic_or2o(dbpd, dbpd_atomic_flags, DBF_WAITED, relaxed);
 		// don't need to re-test here: the second call would see
 		// the first call's WAITING
 	}
@@ -3358,33 +645,30 @@
 				"dispatch_block_notify()");
 	}
 	int performed = os_atomic_load2o(dbpd, dbpd_performed, relaxed);
-	if (slowpath(performed > 1)) {
+	if (unlikely(performed > 1)) {
 		DISPATCH_CLIENT_CRASH(performed, "A block object may not be both "
 				"run more than once and observed");
 	}
 
-	return dispatch_group_notify(_dbpd_group(dbpd), queue, notification_block);
+	return dispatch_group_notify(dbpd->dbpd_group, queue, notification_block);
 }
 
 DISPATCH_NOINLINE
-void
+dispatch_qos_t
 _dispatch_continuation_init_slow(dispatch_continuation_t dc,
-		dispatch_queue_class_t dqu, dispatch_block_flags_t flags)
+		dispatch_queue_t dq, dispatch_block_flags_t flags)
 {
 	dispatch_block_private_data_t dbpd = _dispatch_block_get_data(dc->dc_ctxt);
 	dispatch_block_flags_t block_flags = dbpd->dbpd_flags;
 	uintptr_t dc_flags = dc->dc_flags;
-	os_mpsc_queue_t oq = dqu._oq;
+	pthread_priority_t pp = 0;
 
 	// balanced in d_block_async_invoke_and_release or d_block_wait
-	if (os_atomic_cmpxchg2o(dbpd, dbpd_queue, NULL, oq, relaxed)) {
-		_os_object_retain_internal_n_inline(oq->_as_os_obj, 2);
+	if (os_atomic_cmpxchg2o(dbpd, dbpd_queue, NULL, dq, relaxed)) {
+		_dispatch_retain_2(dq);
 	}
 
-	if (dc_flags & DISPATCH_OBJ_MACH_BARRIER) {
-		dispatch_assert(dc_flags & DISPATCH_OBJ_CONSUME_BIT);
-		dc->dc_func = _dispatch_block_async_invoke_and_release_mach_barrier;
-	} else if (dc_flags & DISPATCH_OBJ_CONSUME_BIT) {
+	if (dc_flags & DC_FLAG_CONSUME) {
 		dc->dc_func = _dispatch_block_async_invoke_and_release;
 	} else {
 		dc->dc_func = _dispatch_block_async_invoke;
@@ -3392,24 +676,30 @@
 
 	flags |= block_flags;
 	if (block_flags & DISPATCH_BLOCK_HAS_PRIORITY) {
-		_dispatch_continuation_priority_set(dc, dbpd->dbpd_priority, flags);
+		pp = dbpd->dbpd_priority & ~_PTHREAD_PRIORITY_FLAGS_MASK;
+	} else if (flags & DISPATCH_BLOCK_HAS_PRIORITY) {
+		// _dispatch_source_handler_alloc is calling is and doesn't want us
+		// to propagate priorities
+		pp = 0;
 	} else {
-		_dispatch_continuation_priority_set(dc, dc->dc_priority, flags);
+		pp = _dispatch_priority_propagate();
 	}
+	_dispatch_continuation_priority_set(dc, dq, pp, flags);
 	if (block_flags & DISPATCH_BLOCK_BARRIER) {
-		dc_flags |= DISPATCH_OBJ_BARRIER_BIT;
+		dc_flags |= DC_FLAG_BARRIER;
 	}
 	if (block_flags & DISPATCH_BLOCK_HAS_VOUCHER) {
 		voucher_t v = dbpd->dbpd_voucher;
-		dc->dc_voucher = v ? _voucher_retain(v) : NULL;
-		dc_flags |= DISPATCH_OBJ_ENFORCE_VOUCHER;
+		dc->dc_voucher = (v && v != DISPATCH_NO_VOUCHER) ? _voucher_retain(v)
+				: v;
 		_dispatch_voucher_debug("continuation[%p] set", dc->dc_voucher, dc);
 		_dispatch_voucher_ktrace_dc_push(dc);
 	} else {
-		_dispatch_continuation_voucher_set(dc, oq, flags);
+		_dispatch_continuation_voucher_set(dc, flags);
 	}
-	dc_flags |= DISPATCH_OBJ_BLOCK_PRIVATE_DATA_BIT;
+	dc_flags |= DC_FLAG_BLOCK_WITH_PRIVATE_DATA;
 	dc->dc_flags = dc_flags;
+	return _dispatch_qos_from_pp(dc->dc_priority);
 }
 
 #endif // __BLOCKS__
@@ -3419,29 +709,14 @@
 DISPATCH_NOINLINE
 static void
 _dispatch_async_f_slow(dispatch_queue_t dq, void *ctxt,
-		dispatch_function_t func, pthread_priority_t pp,
-		dispatch_block_flags_t flags, uintptr_t dc_flags)
+		dispatch_function_t func, dispatch_block_flags_t flags,
+		uintptr_t dc_flags)
 {
 	dispatch_continuation_t dc = _dispatch_continuation_alloc_from_heap();
-	_dispatch_continuation_init_f(dc, dq, ctxt, func, pp, flags, dc_flags);
-	_dispatch_continuation_async(dq, dc);
-}
+	dispatch_qos_t qos;
 
-DISPATCH_ALWAYS_INLINE
-static inline void
-_dispatch_barrier_async_f2(dispatch_queue_t dq, void *ctxt,
-		dispatch_function_t func, pthread_priority_t pp,
-		dispatch_block_flags_t flags)
-{
-	dispatch_continuation_t dc = _dispatch_continuation_alloc_cacheonly();
-	uintptr_t dc_flags = DISPATCH_OBJ_CONSUME_BIT | DISPATCH_OBJ_BARRIER_BIT;
-
-	if (!fastpath(dc)) {
-		return _dispatch_async_f_slow(dq, ctxt, func, pp, flags, dc_flags);
-	}
-
-	_dispatch_continuation_init_f(dc, dq, ctxt, func, pp, flags, dc_flags);
-	_dispatch_continuation_push(dq, dc);
+	qos = _dispatch_continuation_init_f(dc, dq, ctxt, func, flags, dc_flags);
+	_dispatch_continuation_async(dq, dc, qos, dc->dc_flags);
 }
 
 DISPATCH_NOINLINE
@@ -3449,21 +724,31 @@
 dispatch_barrier_async_f(dispatch_queue_t dq, void *ctxt,
 		dispatch_function_t func)
 {
-	_dispatch_barrier_async_f2(dq, ctxt, func, 0, 0);
+	dispatch_continuation_t dc = _dispatch_continuation_alloc_cacheonly();
+	uintptr_t dc_flags = DC_FLAG_CONSUME | DC_FLAG_BARRIER;
+	dispatch_qos_t qos;
+
+	if (likely(!dc)) {
+		return _dispatch_async_f_slow(dq, ctxt, func, 0, dc_flags);
+	}
+
+	qos = _dispatch_continuation_init_f(dc, dq, ctxt, func, 0, dc_flags);
+	_dispatch_continuation_async(dq, dc, qos, dc_flags);
 }
 
 DISPATCH_NOINLINE
 void
-_dispatch_barrier_async_detached_f(dispatch_queue_t dq, void *ctxt,
+_dispatch_barrier_async_detached_f(dispatch_queue_class_t dq, void *ctxt,
 		dispatch_function_t func)
 {
 	dispatch_continuation_t dc = _dispatch_continuation_alloc();
-	dc->dc_flags = DISPATCH_OBJ_CONSUME_BIT | DISPATCH_OBJ_BARRIER_BIT;
+	dc->dc_flags = DC_FLAG_CONSUME | DC_FLAG_BARRIER | DC_FLAG_ALLOCATED;
 	dc->dc_func = func;
 	dc->dc_ctxt = ctxt;
 	dc->dc_voucher = DISPATCH_NO_VOUCHER;
 	dc->dc_priority = DISPATCH_NO_PRIORITY;
-	dx_push(dq, dc, 0);
+	_dispatch_trace_item_push(dq, dc);
+	dx_push(dq._dq, dc, 0);
 }
 
 #ifdef __BLOCKS__
@@ -3471,10 +756,11 @@
 dispatch_barrier_async(dispatch_queue_t dq, dispatch_block_t work)
 {
 	dispatch_continuation_t dc = _dispatch_continuation_alloc();
-	uintptr_t dc_flags = DISPATCH_OBJ_CONSUME_BIT | DISPATCH_OBJ_BARRIER_BIT;
+	uintptr_t dc_flags = DC_FLAG_CONSUME | DC_FLAG_BARRIER;
+	dispatch_qos_t qos;
 
-	_dispatch_continuation_init(dc, dq, work, 0, 0, dc_flags);
-	_dispatch_continuation_push(dq, dc);
+	qos = _dispatch_continuation_init(dc, dq, work, 0, dc_flags);
+	_dispatch_continuation_async(dq, dc, qos, dc_flags);
 }
 #endif
 
@@ -3490,15 +776,16 @@
 	dispatch_invoke_flags_t ctxt_flags = (dispatch_invoke_flags_t)dc->dc_ctxt;
 	// if we went through _dispatch_root_queue_push_override,
 	// the "right" root queue was stuffed into dc_func
-	dispatch_queue_t assumed_rq = (dispatch_queue_t)dc->dc_func;
-	dispatch_queue_t dq = dc->dc_data, rq, old_dq;
+	dispatch_queue_global_t assumed_rq = (dispatch_queue_global_t)dc->dc_func;
+	dispatch_lane_t dq = dc->dc_data;
+	dispatch_queue_t rq, old_dq;
 	dispatch_priority_t old_dbp;
 
 	if (ctxt_flags) {
 		flags &= ~_DISPATCH_INVOKE_AUTORELEASE_MASK;
 		flags |= ctxt_flags;
 	}
-	old_dq = _dispatch_get_current_queue();
+	old_dq = _dispatch_queue_get_current();
 	if (assumed_rq) {
 		old_dbp = _dispatch_root_queue_identity_assume(assumed_rq);
 		_dispatch_set_basepri(dq->dq_priority);
@@ -3506,9 +793,9 @@
 		old_dbp = _dispatch_set_basepri(dq->dq_priority);
 	}
 
+	uintptr_t dc_flags = DC_FLAG_CONSUME | DC_FLAG_NO_INTROSPECTION;
 	_dispatch_thread_frame_push(&dtf, dq);
-	_dispatch_continuation_pop_forwarded(dc, DISPATCH_NO_VOUCHER,
-			DISPATCH_OBJ_CONSUME_BIT, {
+	_dispatch_continuation_pop_forwarded(dc, dc_flags, NULL, {
 		_dispatch_continuation_pop(other_dc, dic, flags, dq);
 	});
 	_dispatch_thread_frame_pop(&dtf);
@@ -3516,18 +803,18 @@
 	_dispatch_reset_basepri(old_dbp);
 
 	rq = dq->do_targetq;
-	while (slowpath(rq->do_targetq) && rq != old_dq) {
-		_dispatch_queue_non_barrier_complete(rq);
+	while (unlikely(rq->do_targetq && rq != old_dq)) {
+		_dispatch_lane_non_barrier_complete(upcast(rq)._dl, 0);
 		rq = rq->do_targetq;
 	}
 
-	_dispatch_queue_non_barrier_complete(dq);
-	_dispatch_release_tailcall(dq); // pairs with _dispatch_async_redirect_wrap
+	// pairs with _dispatch_async_redirect_wrap
+	_dispatch_lane_non_barrier_complete(dq, DISPATCH_WAKEUP_CONSUME_2);
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline dispatch_continuation_t
-_dispatch_async_redirect_wrap(dispatch_queue_t dq, dispatch_object_t dou)
+_dispatch_async_redirect_wrap(dispatch_lane_t dq, dispatch_object_t dou)
 {
 	dispatch_continuation_t dc = _dispatch_continuation_alloc();
 
@@ -3539,94 +826,52 @@
 	dc->dc_other = dou._do;
 	dc->dc_voucher = DISPATCH_NO_VOUCHER;
 	dc->dc_priority = DISPATCH_NO_PRIORITY;
-	_dispatch_retain(dq); // released in _dispatch_async_redirect_invoke
+	_dispatch_retain_2(dq); // released in _dispatch_async_redirect_invoke
 	return dc;
 }
 
 DISPATCH_NOINLINE
 static void
-_dispatch_async_f_redirect(dispatch_queue_t dq,
+_dispatch_continuation_redirect_push(dispatch_lane_t dl,
 		dispatch_object_t dou, dispatch_qos_t qos)
 {
-	if (!slowpath(_dispatch_object_is_redirection(dou))) {
-		dou._dc = _dispatch_async_redirect_wrap(dq, dou);
-	}
-	dq = dq->do_targetq;
-
-	// Find the queue to redirect to
-	while (slowpath(DISPATCH_QUEUE_USES_REDIRECTION(dq->dq_width))) {
-		if (!fastpath(_dispatch_queue_try_acquire_async(dq))) {
-			break;
-		}
-		if (!dou._dc->dc_ctxt) {
-			// find first queue in descending target queue order that has
-			// an autorelease frequency set, and use that as the frequency for
-			// this continuation.
-			dou._dc->dc_ctxt = (void *)
-					(uintptr_t)_dispatch_queue_autorelease_frequency(dq);
-		}
-		dq = dq->do_targetq;
+	if (likely(!_dispatch_object_is_redirection(dou))) {
+		dou._dc = _dispatch_async_redirect_wrap(dl, dou);
+	} else if (!dou._dc->dc_ctxt) {
+		// find first queue in descending target queue order that has
+		// an autorelease frequency set, and use that as the frequency for
+		// this continuation.
+		dou._dc->dc_ctxt = (void *)
+		(uintptr_t)_dispatch_queue_autorelease_frequency(dl);
 	}
 
+	dispatch_queue_t dq = dl->do_targetq;
+	if (!qos) qos = _dispatch_priority_qos(dq->dq_priority);
 	dx_push(dq, dou, qos);
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_continuation_redirect(dispatch_queue_t dq,
-		struct dispatch_object_s *dc)
-{
-	_dispatch_trace_continuation_pop(dq, dc);
-	// This is a re-redirect, overrides have already been applied
-	// by _dispatch_async_f2.
-	// However we want to end up on the root queue matching `dc` qos, so pick up
-	// the current override of `dq` which includes dc's overrde (and maybe more)
-	uint64_t dq_state = os_atomic_load2o(dq, dq_state, relaxed);
-	_dispatch_async_f_redirect(dq, dc, _dq_state_max_qos(dq_state));
-	_dispatch_introspection_queue_item_complete(dc);
-}
-
-DISPATCH_NOINLINE
-static void
-_dispatch_async_f2(dispatch_queue_t dq, dispatch_continuation_t dc)
-{
-	// <rdar://problem/24738102&24743140> reserving non barrier width
-	// doesn't fail if only the ENQUEUED bit is set (unlike its barrier width
-	// equivalent), so we have to check that this thread hasn't enqueued
-	// anything ahead of this call or we can break ordering
-	if (slowpath(dq->dq_items_tail)) {
-		return _dispatch_continuation_push(dq, dc);
-	}
-
-	if (slowpath(!_dispatch_queue_try_acquire_async(dq))) {
-		return _dispatch_continuation_push(dq, dc);
-	}
-
-	return _dispatch_async_f_redirect(dq, dc,
-			_dispatch_continuation_override_qos(dq, dc));
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline void
 _dispatch_async_f(dispatch_queue_t dq, void *ctxt, dispatch_function_t func,
-		pthread_priority_t pp, dispatch_block_flags_t flags)
+		dispatch_block_flags_t flags)
 {
 	dispatch_continuation_t dc = _dispatch_continuation_alloc_cacheonly();
-	uintptr_t dc_flags = DISPATCH_OBJ_CONSUME_BIT;
+	uintptr_t dc_flags = DC_FLAG_CONSUME;
+	dispatch_qos_t qos;
 
-	if (!fastpath(dc)) {
-		return _dispatch_async_f_slow(dq, ctxt, func, pp, flags, dc_flags);
+	if (unlikely(!dc)) {
+		return _dispatch_async_f_slow(dq, ctxt, func, flags, dc_flags);
 	}
 
-	_dispatch_continuation_init_f(dc, dq, ctxt, func, pp, flags, dc_flags);
-	_dispatch_continuation_async2(dq, dc, false);
+	qos = _dispatch_continuation_init_f(dc, dq, ctxt, func, flags, dc_flags);
+	_dispatch_continuation_async(dq, dc, qos, dc->dc_flags);
 }
 
 DISPATCH_NOINLINE
 void
 dispatch_async_f(dispatch_queue_t dq, void *ctxt, dispatch_function_t func)
 {
-	_dispatch_async_f(dq, ctxt, func, 0, 0);
+	_dispatch_async_f(dq, ctxt, func, 0);
 }
 
 DISPATCH_NOINLINE
@@ -3634,7 +879,7 @@
 dispatch_async_enforce_qos_class_f(dispatch_queue_t dq, void *ctxt,
 		dispatch_function_t func)
 {
-	_dispatch_async_f(dq, ctxt, func, 0, DISPATCH_BLOCK_ENFORCE_QOS_CLASS);
+	_dispatch_async_f(dq, ctxt, func, DISPATCH_BLOCK_ENFORCE_QOS_CLASS);
 }
 
 #ifdef __BLOCKS__
@@ -3642,87 +887,51 @@
 dispatch_async(dispatch_queue_t dq, dispatch_block_t work)
 {
 	dispatch_continuation_t dc = _dispatch_continuation_alloc();
-	uintptr_t dc_flags = DISPATCH_OBJ_CONSUME_BIT;
+	uintptr_t dc_flags = DC_FLAG_CONSUME;
+	dispatch_qos_t qos;
 
-	_dispatch_continuation_init(dc, dq, work, 0, 0, dc_flags);
-	_dispatch_continuation_async(dq, dc);
-}
-#endif
-
-#pragma mark -
-#pragma mark dispatch_group_async
-
-DISPATCH_ALWAYS_INLINE
-static inline void
-_dispatch_continuation_group_async(dispatch_group_t dg, dispatch_queue_t dq,
-		dispatch_continuation_t dc)
-{
-	dispatch_group_enter(dg);
-	dc->dc_data = dg;
-	_dispatch_continuation_async(dq, dc);
-}
-
-DISPATCH_NOINLINE
-void
-dispatch_group_async_f(dispatch_group_t dg, dispatch_queue_t dq, void *ctxt,
-		dispatch_function_t func)
-{
-	dispatch_continuation_t dc = _dispatch_continuation_alloc();
-	uintptr_t dc_flags = DISPATCH_OBJ_CONSUME_BIT | DISPATCH_OBJ_GROUP_BIT;
-
-	_dispatch_continuation_init_f(dc, dq, ctxt, func, 0, 0, dc_flags);
-	_dispatch_continuation_group_async(dg, dq, dc);
-}
-
-#ifdef __BLOCKS__
-void
-dispatch_group_async(dispatch_group_t dg, dispatch_queue_t dq,
-		dispatch_block_t db)
-{
-	dispatch_continuation_t dc = _dispatch_continuation_alloc();
-	uintptr_t dc_flags = DISPATCH_OBJ_CONSUME_BIT | DISPATCH_OBJ_GROUP_BIT;
-
-	_dispatch_continuation_init(dc, dq, db, 0, 0, dc_flags);
-	_dispatch_continuation_group_async(dg, dq, dc);
+	qos = _dispatch_continuation_init(dc, dq, work, 0, dc_flags);
+	_dispatch_continuation_async(dq, dc, qos, dc->dc_flags);
 }
 #endif
 
 #pragma mark -
 #pragma mark _dispatch_sync_invoke / _dispatch_sync_complete
 
-DISPATCH_NOINLINE
-static void
-_dispatch_queue_non_barrier_complete(dispatch_queue_t dq)
+DISPATCH_ALWAYS_INLINE
+static uint64_t
+_dispatch_lane_non_barrier_complete_try_lock(dispatch_lane_t dq,
+		uint64_t old_state, uint64_t new_state, uint64_t owner_self)
 {
-	uint64_t old_state, new_state, owner_self = _dispatch_lock_value_for_self();
+	uint64_t full_width = new_state;
+	if (_dq_state_has_pending_barrier(new_state)) {
+		full_width -= DISPATCH_QUEUE_PENDING_BARRIER;
+		full_width += DISPATCH_QUEUE_WIDTH_INTERVAL;
+		full_width += DISPATCH_QUEUE_IN_BARRIER;
+	} else {
+		full_width += dq->dq_width * DISPATCH_QUEUE_WIDTH_INTERVAL;
+		full_width += DISPATCH_QUEUE_IN_BARRIER;
+	}
+	if ((full_width & DISPATCH_QUEUE_WIDTH_MASK) ==
+			DISPATCH_QUEUE_WIDTH_FULL_BIT) {
+		new_state = full_width;
+		new_state &= ~DISPATCH_QUEUE_DIRTY;
+		new_state |= owner_self;
+	} else if (_dq_state_is_dirty(old_state)) {
+		new_state |= DISPATCH_QUEUE_ENQUEUED;
+	}
+	return new_state;
+}
 
-	// see _dispatch_queue_resume()
-	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, relaxed, {
-		new_state = old_state - DISPATCH_QUEUE_WIDTH_INTERVAL;
-		if (unlikely(_dq_state_drain_locked(old_state))) {
-			// make drain_try_unlock() fail and reconsider whether there's
-			// enough width now for a new item
-			new_state |= DISPATCH_QUEUE_DIRTY;
-		} else if (likely(_dq_state_is_runnable(new_state))) {
-			uint64_t full_width = new_state;
-			if (_dq_state_has_pending_barrier(old_state)) {
-				full_width -= DISPATCH_QUEUE_PENDING_BARRIER;
-				full_width += DISPATCH_QUEUE_WIDTH_INTERVAL;
-				full_width += DISPATCH_QUEUE_IN_BARRIER;
-			} else {
-				full_width += dq->dq_width * DISPATCH_QUEUE_WIDTH_INTERVAL;
-				full_width += DISPATCH_QUEUE_IN_BARRIER;
-			}
-			if ((full_width & DISPATCH_QUEUE_WIDTH_MASK) ==
-					DISPATCH_QUEUE_WIDTH_FULL_BIT) {
-				new_state = full_width;
-				new_state &= ~DISPATCH_QUEUE_DIRTY;
-				new_state |= owner_self;
-			} else if (_dq_state_is_dirty(old_state)) {
-				new_state |= DISPATCH_QUEUE_ENQUEUED;
-			}
-		}
-	});
+DISPATCH_ALWAYS_INLINE
+static void
+_dispatch_lane_non_barrier_complete_finish(dispatch_lane_t dq,
+		dispatch_wakeup_flags_t flags, uint64_t old_state, uint64_t new_state)
+{
+	if (_dq_state_received_override(old_state)) {
+		// Ensure that the root queue sees that this thread was overridden.
+		_dispatch_set_basepri_override_qos(_dq_state_max_qos(old_state));
+	}
 
 	if ((old_state ^ new_state) & DISPATCH_QUEUE_IN_BARRIER) {
 		if (_dq_state_is_dirty(old_state)) {
@@ -3732,20 +941,49 @@
 			os_atomic_thread_fence(dependency);
 			dq = os_atomic_force_dependency_on(dq, old_state);
 		}
-		return _dispatch_queue_barrier_complete(dq, 0, 0);
+		return _dispatch_lane_barrier_complete(dq, 0, flags);
 	}
 
 	if ((old_state ^ new_state) & DISPATCH_QUEUE_ENQUEUED) {
-		_dispatch_retain_2(dq);
+		if (!(flags & DISPATCH_WAKEUP_CONSUME_2)) {
+			_dispatch_retain_2(dq);
+		}
 		dispatch_assert(!_dq_state_is_base_wlh(new_state));
+		_dispatch_trace_item_push(dq->do_targetq, dq);
 		return dx_push(dq->do_targetq, dq, _dq_state_max_qos(new_state));
 	}
+
+	if (flags & DISPATCH_WAKEUP_CONSUME_2) {
+		_dispatch_release_2_tailcall(dq);
+	}
 }
 
+DISPATCH_NOINLINE
+static void
+_dispatch_lane_non_barrier_complete(dispatch_lane_t dq,
+		dispatch_wakeup_flags_t flags)
+{
+	uint64_t old_state, new_state, owner_self = _dispatch_lock_value_for_self();
+
+	// see _dispatch_lane_resume()
+	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, relaxed, {
+		new_state = old_state - DISPATCH_QUEUE_WIDTH_INTERVAL;
+		if (unlikely(_dq_state_drain_locked(old_state))) {
+			// make drain_try_unlock() fail and reconsider whether there's
+			// enough width now for a new item
+			new_state |= DISPATCH_QUEUE_DIRTY;
+		} else if (likely(_dq_state_is_runnable(new_state))) {
+			new_state = _dispatch_lane_non_barrier_complete_try_lock(dq,
+					old_state, new_state, owner_self);
+		}
+	});
+
+	_dispatch_lane_non_barrier_complete_finish(dq, flags, old_state, new_state);
+}
 
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_sync_function_invoke_inline(dispatch_queue_t dq, void *ctxt,
+_dispatch_sync_function_invoke_inline(dispatch_queue_class_t dq, void *ctxt,
 		dispatch_function_t func)
 {
 	dispatch_thread_frame_s dtf;
@@ -3757,7 +995,7 @@
 
 DISPATCH_NOINLINE
 static void
-_dispatch_sync_function_invoke(dispatch_queue_t dq, void *ctxt,
+_dispatch_sync_function_invoke(dispatch_queue_class_t dq, void *ctxt,
 		dispatch_function_t func)
 {
 	_dispatch_sync_function_invoke_inline(dq, ctxt, func);
@@ -3768,13 +1006,13 @@
 _dispatch_sync_complete_recurse(dispatch_queue_t dq, dispatch_queue_t stop_dq,
 		uintptr_t dc_flags)
 {
-	bool barrier = (dc_flags & DISPATCH_OBJ_BARRIER_BIT);
+	bool barrier = (dc_flags & DC_FLAG_BARRIER);
 	do {
 		if (dq == stop_dq) return;
 		if (barrier) {
-			_dispatch_queue_barrier_complete(dq, 0, 0);
+			dx_wakeup(dq, 0, DISPATCH_WAKEUP_BARRIER_COMPLETE);
 		} else {
-			_dispatch_queue_non_barrier_complete(dq);
+			_dispatch_lane_non_barrier_complete(upcast(dq)._dl, 0);
 		}
 		dq = dq->do_targetq;
 		barrier = (dq->dq_width == 1);
@@ -3783,49 +1021,42 @@
 
 DISPATCH_NOINLINE
 static void
-_dispatch_sync_invoke_and_complete_recurse(dispatch_queue_t dq, void *ctxt,
-		dispatch_function_t func, uintptr_t dc_flags)
+_dispatch_sync_invoke_and_complete_recurse(dispatch_queue_class_t dq,
+		void *ctxt, dispatch_function_t func, uintptr_t dc_flags
+		DISPATCH_TRACE_ARG(void *dc))
 {
 	_dispatch_sync_function_invoke_inline(dq, ctxt, func);
-	_dispatch_sync_complete_recurse(dq, NULL, dc_flags);
+	_dispatch_trace_item_complete(dc);
+	_dispatch_sync_complete_recurse(dq._dq, NULL, dc_flags);
 }
 
 DISPATCH_NOINLINE
 static void
-_dispatch_sync_invoke_and_complete(dispatch_queue_t dq, void *ctxt,
-		dispatch_function_t func)
+_dispatch_sync_invoke_and_complete(dispatch_lane_t dq, void *ctxt,
+		dispatch_function_t func DISPATCH_TRACE_ARG(void *dc))
 {
 	_dispatch_sync_function_invoke_inline(dq, ctxt, func);
-	_dispatch_queue_non_barrier_complete(dq);
-}
-
-DISPATCH_NOINLINE
-static void
-_dispatch_barrier_sync_invoke_and_complete(dispatch_queue_t dq, void *ctxt,
-		dispatch_function_t func)
-{
-	_dispatch_sync_function_invoke_inline(dq, ctxt, func);
-	dx_wakeup(dq, 0, DISPATCH_WAKEUP_BARRIER_COMPLETE);
+	_dispatch_trace_item_complete(dc);
+	_dispatch_lane_non_barrier_complete(dq, 0);
 }
 
 /*
- * This is an optimized version of _dispatch_barrier_sync_invoke_and_complete
- *
  * For queues we can cheat and inline the unlock code, which is invalid
  * for objects with a more complex state machine (sources or mach channels)
  */
 DISPATCH_NOINLINE
 static void
-_dispatch_queue_barrier_sync_invoke_and_complete(dispatch_queue_t dq,
-		void *ctxt, dispatch_function_t func)
+_dispatch_lane_barrier_sync_invoke_and_complete(dispatch_lane_t dq,
+		void *ctxt, dispatch_function_t func DISPATCH_TRACE_ARG(void *dc))
 {
 	_dispatch_sync_function_invoke_inline(dq, ctxt, func);
+	_dispatch_trace_item_complete(dc);
 	if (unlikely(dq->dq_items_tail || dq->dq_width > 1)) {
-		return _dispatch_queue_barrier_complete(dq, 0, 0);
+		return _dispatch_lane_barrier_complete(dq, 0, 0);
 	}
 
 	// Presence of any of these bits requires more work that only
-	// _dispatch_queue_barrier_complete() handles properly
+	// _dispatch_*_barrier_complete() handles properly
 	//
 	// Note: testing for RECEIVED_OVERRIDE or RECEIVED_SYNC_WAIT without
 	// checking the role is sloppy, but is a super fast check, and neither of
@@ -3843,7 +1074,7 @@
 		new_state &= ~DISPATCH_QUEUE_MAX_QOS_MASK;
 		if (unlikely(old_state & fail_unlock_mask)) {
 			os_atomic_rmw_loop_give_up({
-				return _dispatch_queue_barrier_complete(dq, 0, 0);
+				return _dispatch_lane_barrier_complete(dq, 0, 0);
 			});
 		}
 	});
@@ -3855,89 +1086,237 @@
 #pragma mark -
 #pragma mark _dispatch_sync_wait / _dispatch_sync_waiter_wake
 
-#define DISPATCH_SYNC_WAITER_NO_UNLOCK (~0ull)
+DISPATCH_NOINLINE
+static void
+_dispatch_waiter_wake_wlh_anon(dispatch_sync_context_t dsc)
+{
+	if (dsc->dsc_override_qos > dsc->dsc_override_qos_floor) {
+		_dispatch_wqthread_override_start(dsc->dsc_waiter,
+				dsc->dsc_override_qos);
+	}
+	_dispatch_thread_event_signal(&dsc->dsc_event);
+}
 
 DISPATCH_NOINLINE
 static void
-_dispatch_sync_waiter_wake(dispatch_sync_context_t dsc,
-		dispatch_wlh_t wlh, uint64_t old_state, uint64_t new_state)
+_dispatch_waiter_wake(dispatch_sync_context_t dsc, dispatch_wlh_t wlh,
+		uint64_t old_state, uint64_t new_state)
 {
 	dispatch_wlh_t waiter_wlh = dsc->dc_data;
 
+#if DISPATCH_USE_KEVENT_WORKLOOP
+	//
+	// We need to interact with a workloop if any of the following 3 cases:
+	// 1. the current owner of the lock has a SYNC_WAIT knote to destroy
+	// 2. the next owner of the lock is a workloop, we need to make sure it has
+	//    a SYNC_WAIT knote to destroy when it will later release the lock
+	// 3. the waiter is waiting on a workloop (which may be different from `wlh`
+	//    if the hierarchy was mutated after the next owner started waiting)
+	//
+	// However, note that even when (2) is true, the next owner may be waiting
+	// without pushing (waiter_wlh == DISPATCH_WLH_ANON), in which case the next
+	// owner is really woken up when the thread event is signaled.
+	//
+#endif
 	if (_dq_state_in_sync_transfer(old_state) ||
 			_dq_state_in_sync_transfer(new_state) ||
 			(waiter_wlh != DISPATCH_WLH_ANON)) {
 		_dispatch_event_loop_wake_owner(dsc, wlh, old_state, new_state);
 	}
-	if (waiter_wlh == DISPATCH_WLH_ANON) {
-		if (dsc->dsc_override_qos > dsc->dsc_override_qos_floor) {
-			_dispatch_wqthread_override_start(dsc->dsc_waiter,
-					dsc->dsc_override_qos);
-		}
-		_dispatch_thread_event_signal(&dsc->dsc_event);
+	if (unlikely(waiter_wlh == DISPATCH_WLH_ANON)) {
+		_dispatch_waiter_wake_wlh_anon(dsc);
 	}
-	_dispatch_introspection_queue_item_complete(dsc->_as_dc);
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_async_waiter_update(dispatch_sync_context_t dsc,
+		dispatch_queue_class_t dqu)
+{
+	dispatch_queue_t dq = dqu._dq;
+	dispatch_priority_t p = dq->dq_priority & DISPATCH_PRIORITY_REQUESTED_MASK;
+	if (p) {
+		pthread_priority_t pp = _dispatch_priority_to_pp_strip_flags(p);
+		if (pp > (dsc->dc_priority & ~_PTHREAD_PRIORITY_FLAGS_MASK)) {
+			dsc->dc_priority = pp | _PTHREAD_PRIORITY_ENFORCE_FLAG;
+		}
+	}
+
+	if (dsc->dsc_autorelease == 0) {
+		dispatch_queue_flags_t dqf = _dispatch_queue_atomic_flags(dqu);
+		dqf &= (dispatch_queue_flags_t)_DQF_AUTORELEASE_MASK;
+		dsc->dsc_autorelease = (uint8_t)(dqf / DQF_AUTORELEASE_ALWAYS);
+	}
 }
 
 DISPATCH_NOINLINE
 static void
-_dispatch_sync_waiter_redirect_or_wake(dispatch_queue_t dq, uint64_t owned,
+_dispatch_non_barrier_waiter_redirect_or_wake(dispatch_lane_t dq,
 		dispatch_object_t dou)
 {
 	dispatch_sync_context_t dsc = (dispatch_sync_context_t)dou._dc;
-	uint64_t next_owner = 0, old_state, new_state;
-	dispatch_wlh_t wlh = NULL;
+	uint64_t old_state;
 
-	_dispatch_trace_continuation_pop(dq, dsc->_as_dc);
+	dispatch_assert(!(dsc->dc_flags & DC_FLAG_BARRIER));
 
-	if (owned == DISPATCH_SYNC_WAITER_NO_UNLOCK) {
-		dispatch_assert(!(dsc->dc_flags & DISPATCH_OBJ_BARRIER_BIT));
-		new_state = old_state = os_atomic_load2o(dq, dq_state, relaxed);
-	} else {
-		if (dsc->dc_flags & DISPATCH_OBJ_BARRIER_BIT) {
-			next_owner = _dispatch_lock_value_from_tid(dsc->dsc_waiter);
-		}
-		os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, release, {
-			new_state  = old_state - owned;
-			new_state &= ~DISPATCH_QUEUE_DRAIN_UNLOCK_MASK;
-			new_state &= ~DISPATCH_QUEUE_DIRTY;
-			new_state |= next_owner;
-			if (_dq_state_is_base_wlh(old_state)) {
-				new_state |= DISPATCH_QUEUE_SYNC_TRANSFER;
-			}
-		});
-		if (_dq_state_is_base_wlh(old_state)) {
-			wlh = (dispatch_wlh_t)dq;
-		} else if (_dq_state_received_override(old_state)) {
-			// Ensure that the root queue sees that this thread was overridden.
-			_dispatch_set_basepri_override_qos(_dq_state_max_qos(old_state));
-		}
+again:
+	old_state = os_atomic_load2o(dq, dq_state, relaxed);
+
+	if (dsc->dsc_override_qos < _dq_state_max_qos(old_state)) {
+		dsc->dsc_override_qos = (uint8_t)_dq_state_max_qos(old_state);
 	}
 
-	if (dsc->dc_data == DISPATCH_WLH_ANON) {
-		if (dsc->dsc_override_qos < _dq_state_max_qos(old_state)) {
-			dsc->dsc_override_qos = _dq_state_max_qos(old_state);
-		}
+	if (dsc->dc_flags & DC_FLAG_ASYNC_AND_WAIT) {
+		_dispatch_async_waiter_update(dsc, dq);
 	}
 
 	if (unlikely(_dq_state_is_inner_queue(old_state))) {
 		dispatch_queue_t tq = dq->do_targetq;
 		if (likely(tq->dq_width == 1)) {
-			dsc->dc_flags = DISPATCH_OBJ_BARRIER_BIT |
-					DISPATCH_OBJ_SYNC_WAITER_BIT;
+			dsc->dc_flags |= DC_FLAG_BARRIER;
 		} else {
-			dsc->dc_flags = DISPATCH_OBJ_SYNC_WAITER_BIT;
+			dsc->dc_flags &= ~DC_FLAG_BARRIER;
+			if (_dispatch_queue_try_reserve_sync_width(upcast(tq)._dl)) {
+				dq = upcast(tq)._dl;
+				goto again;
+			}
 		}
-		_dispatch_introspection_queue_item_complete(dsc->_as_dc);
-		return _dispatch_queue_push_sync_waiter(tq, dsc, 0);
+		return dx_push(tq, dsc, 0);
 	}
 
-	return _dispatch_sync_waiter_wake(dsc, wlh, old_state, new_state);
+	if (dsc->dc_flags & DC_FLAG_ASYNC_AND_WAIT) {
+		// _dispatch_barrier_async_and_wait_f_slow() expects dc_other to be the
+		// bottom queue of the graph
+		dsc->dc_other = dq;
+	}
+	return _dispatch_waiter_wake_wlh_anon(dsc);
 }
 
 DISPATCH_NOINLINE
 static void
-_dispatch_queue_class_barrier_complete(dispatch_queue_t dq, dispatch_qos_t qos,
+_dispatch_barrier_waiter_redirect_or_wake(dispatch_queue_class_t dqu,
+		dispatch_object_t dc, dispatch_wakeup_flags_t flags,
+		uint64_t old_state, uint64_t new_state)
+{
+	dispatch_sync_context_t dsc = (dispatch_sync_context_t)dc._dc;
+	dispatch_queue_t dq = dqu._dq;
+	dispatch_wlh_t wlh = DISPATCH_WLH_ANON;
+
+	if (dsc->dc_data == DISPATCH_WLH_ANON) {
+		if (dsc->dsc_override_qos < _dq_state_max_qos(old_state)) {
+			dsc->dsc_override_qos = (uint8_t)_dq_state_max_qos(old_state);
+		}
+	}
+
+	if (_dq_state_is_base_wlh(old_state)) {
+		wlh = (dispatch_wlh_t)dq;
+	} else if (_dq_state_received_override(old_state)) {
+		// Ensure that the root queue sees that this thread was overridden.
+		_dispatch_set_basepri_override_qos(_dq_state_max_qos(old_state));
+	}
+
+	if (flags & DISPATCH_WAKEUP_CONSUME_2) {
+		if (_dq_state_is_base_wlh(old_state) &&
+				_dq_state_is_enqueued_on_target(new_state)) {
+			// If the thread request still exists, we need to leave it a +1
+			_dispatch_release_no_dispose(dq);
+		} else {
+			_dispatch_release_2_no_dispose(dq);
+		}
+	} else if (_dq_state_is_base_wlh(old_state) &&
+			_dq_state_is_enqueued_on_target(old_state) &&
+			!_dq_state_is_enqueued_on_target(new_state)) {
+		// If we cleared the enqueued bit, we're about to destroy the workloop
+		// thread request, and we need to consume its +1.
+		_dispatch_release_no_dispose(dq);
+	}
+
+	//
+	// Past this point we are borrowing the reference of the sync waiter
+	//
+	if (unlikely(_dq_state_is_inner_queue(old_state))) {
+		dispatch_queue_t tq = dq->do_targetq;
+		if (dsc->dc_flags & DC_FLAG_ASYNC_AND_WAIT) {
+			_dispatch_async_waiter_update(dsc, dq);
+		}
+		if (likely(tq->dq_width == 1)) {
+			dsc->dc_flags |= DC_FLAG_BARRIER;
+		} else {
+			dispatch_lane_t dl = upcast(tq)._dl;
+			dsc->dc_flags &= ~DC_FLAG_BARRIER;
+			if (_dispatch_queue_try_reserve_sync_width(dl)) {
+				return _dispatch_non_barrier_waiter_redirect_or_wake(dl, dc);
+			}
+		}
+		// passing the QoS of `dq` helps pushing on low priority waiters with
+		// legacy workloops.
+#if DISPATCH_INTROSPECTION
+		dsc->dsc_from_async = false;
+#endif
+		return dx_push(tq, dsc, _dq_state_max_qos(old_state));
+	}
+
+	if (dsc->dc_flags & DC_FLAG_ASYNC_AND_WAIT) {
+		// _dispatch_async_and_wait_f_slow() expects dc_other to be the
+		// bottom queue of the graph
+		dsc->dc_other = dq;
+	}
+#if DISPATCH_INTROSPECTION
+	if (dsc->dsc_from_async) {
+		_dispatch_trace_runtime_event(async_sync_handoff, dq, 0);
+	} else {
+		_dispatch_trace_runtime_event(sync_sync_handoff, dq, 0);
+	}
+#endif // DISPATCH_INTROSPECTION
+	return _dispatch_waiter_wake(dsc, wlh, old_state, new_state);
+}
+
+DISPATCH_NOINLINE
+static void
+_dispatch_lane_drain_barrier_waiter(dispatch_lane_t dq,
+		struct dispatch_object_s *dc, dispatch_wakeup_flags_t flags,
+		uint64_t enqueued_bits)
+{
+	dispatch_sync_context_t dsc = (dispatch_sync_context_t)dc;
+	struct dispatch_object_s *next_dc;
+	uint64_t next_owner = 0, old_state, new_state;
+
+	next_owner = _dispatch_lock_value_from_tid(dsc->dsc_waiter);
+	next_dc = _dispatch_queue_pop_head(dq, dc);
+
+transfer_lock_again:
+	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, release, {
+		new_state  = old_state;
+		new_state &= ~DISPATCH_QUEUE_DRAIN_UNLOCK_MASK;
+		new_state &= ~DISPATCH_QUEUE_DIRTY;
+		new_state |= next_owner;
+
+		if (_dq_state_is_base_wlh(old_state)) {
+			new_state |= DISPATCH_QUEUE_SYNC_TRANSFER;
+			if (next_dc) {
+				// we know there's a next item, keep the enqueued bit if any
+			} else if (unlikely(_dq_state_is_dirty(old_state))) {
+				os_atomic_rmw_loop_give_up({
+					os_atomic_xor2o(dq, dq_state, DISPATCH_QUEUE_DIRTY, acquire);
+					next_dc = os_atomic_load2o(dq, dq_items_head, relaxed);
+					goto transfer_lock_again;
+				});
+			} else {
+				new_state &= ~DISPATCH_QUEUE_MAX_QOS_MASK;
+				new_state &= ~DISPATCH_QUEUE_ENQUEUED;
+			}
+		} else {
+			new_state -= enqueued_bits;
+		}
+	});
+
+	return _dispatch_barrier_waiter_redirect_or_wake(dq, dc, flags,
+			old_state, new_state);
+}
+
+DISPATCH_NOINLINE
+static void
+_dispatch_lane_class_barrier_complete(dispatch_lane_t dq, dispatch_qos_t qos,
 		dispatch_wakeup_flags_t flags, dispatch_queue_wakeup_target_t target,
 		uint64_t owned)
 {
@@ -3945,7 +1324,7 @@
 	dispatch_queue_t tq;
 
 	if (target == DISPATCH_QUEUE_WAKEUP_MGR) {
-		tq = &_dispatch_mgr_q;
+		tq = _dispatch_mgr_q._as_dq;
 		enqueue = DISPATCH_QUEUE_ENQUEUED_ON_MGR;
 	} else if (target) {
 		tq = (target == DISPATCH_QUEUE_WAKEUP_TARGET) ? dq->do_targetq : target;
@@ -3959,9 +1338,13 @@
 		new_state  = _dq_state_merge_qos(old_state - owned, qos);
 		new_state &= ~DISPATCH_QUEUE_DRAIN_UNLOCK_MASK;
 		if (unlikely(_dq_state_is_suspended(old_state))) {
-			new_state |= DLOCK_OWNER_MASK;
+			if (likely(_dq_state_is_base_wlh(old_state))) {
+				new_state &= ~DISPATCH_QUEUE_ENQUEUED;
+			}
 		} else if (enqueue) {
-			new_state |= enqueue;
+			if (!_dq_state_is_enqueued(old_state)) {
+				new_state |= enqueue;
+			}
 		} else if (unlikely(_dq_state_is_dirty(old_state))) {
 			os_atomic_rmw_loop_give_up({
 				// just renew the drain lock with an acquire barrier, to see
@@ -3972,9 +1355,6 @@
 				flags |= DISPATCH_WAKEUP_BARRIER_COMPLETE;
 				return dx_wakeup(dq, qos, flags);
 			});
-		} else if (_dq_state_is_base_wlh(old_state)) {
-			new_state &= ~DISPATCH_QUEUE_MAX_QOS_MASK;
-			new_state &= ~DISPATCH_QUEUE_ENQUEUED;
 		} else {
 			new_state &= ~DISPATCH_QUEUE_MAX_QOS_MASK;
 		}
@@ -3983,6 +1363,36 @@
 	dispatch_assert(_dq_state_drain_locked_by_self(old_state));
 	dispatch_assert(!_dq_state_is_enqueued_on_manager(old_state));
 
+	if (_dq_state_is_enqueued(new_state)) {
+		_dispatch_trace_runtime_event(sync_async_handoff, dq, 0);
+	}
+
+#if DISPATCH_USE_KEVENT_WORKLOOP
+	if (_dq_state_is_base_wlh(old_state)) {
+		// - Only non-"du_is_direct" sources & mach channels can be enqueued
+		//   on the manager.
+		//
+		// - Only dispatch_source_cancel_and_wait() and
+		//   dispatch_source_set_*_handler() use the barrier complete codepath,
+		//   none of which are used by mach channels.
+		//
+		// Hence no source-ish object can both be a workloop and need to use the
+		// manager at the same time.
+		dispatch_assert(!_dq_state_is_enqueued_on_manager(new_state));
+		if (_dq_state_is_enqueued_on_target(old_state) ||
+				_dq_state_is_enqueued_on_target(new_state) ||
+				_dq_state_received_sync_wait(old_state) ||
+				_dq_state_in_sync_transfer(old_state)) {
+			return _dispatch_event_loop_end_ownership((dispatch_wlh_t)dq,
+					old_state, new_state, flags);
+		}
+		_dispatch_event_loop_assert_not_owned((dispatch_wlh_t)dq);
+		if (flags & DISPATCH_WAKEUP_CONSUME_2) {
+			return _dispatch_release_2_tailcall(dq);
+		}
+		return;
+	}
+#endif
 
 	if (_dq_state_received_override(old_state)) {
 		// Ensure that the root queue sees that this thread was overridden.
@@ -4001,8 +1411,7 @@
 		// the same override so that a new stealer is enqueued because
 		// the previous one may be gone already
 		if (_dq_state_should_override(new_state)) {
-			return _dispatch_queue_class_wakeup_with_override(dq, new_state,
-					flags);
+			return _dispatch_queue_wakeup_with_override(dq, new_state, flags);
 		}
 #endif
 	}
@@ -4013,58 +1422,88 @@
 
 DISPATCH_NOINLINE
 static void
-_dispatch_queue_barrier_complete(dispatch_queue_t dq, dispatch_qos_t qos,
+_dispatch_lane_drain_non_barriers(dispatch_lane_t dq,
+		struct dispatch_object_s *dc, dispatch_wakeup_flags_t flags)
+{
+	size_t owned_width = dq->dq_width;
+	struct dispatch_object_s *next_dc;
+
+	// see _dispatch_lane_drain, go in non barrier mode, and drain items
+
+	os_atomic_and2o(dq, dq_state, ~DISPATCH_QUEUE_IN_BARRIER, release);
+
+	do {
+		if (likely(owned_width)) {
+			owned_width--;
+		} else if (_dispatch_object_is_waiter(dc)) {
+			// sync "readers" don't observe the limit
+			_dispatch_queue_reserve_sync_width(dq);
+		} else if (!_dispatch_queue_try_acquire_async(dq)) {
+			// no width left
+			break;
+		}
+		next_dc = _dispatch_queue_pop_head(dq, dc);
+		if (_dispatch_object_is_waiter(dc)) {
+			_dispatch_non_barrier_waiter_redirect_or_wake(dq, dc);
+		} else {
+			_dispatch_continuation_redirect_push(dq, dc,
+					_dispatch_queue_max_qos(dq));
+		}
+drain_again:
+		dc = next_dc;
+	} while (dc && !_dispatch_object_is_barrier(dc));
+
+	uint64_t old_state, new_state, owner_self = _dispatch_lock_value_for_self();
+	uint64_t owned = owned_width * DISPATCH_QUEUE_WIDTH_INTERVAL;
+
+	if (dc) {
+		owned = _dispatch_queue_adjust_owned(dq, owned, dc);
+	}
+
+	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, relaxed, {
+		new_state  = old_state - owned;
+		new_state &= ~DISPATCH_QUEUE_DRAIN_UNLOCK_MASK;
+		new_state &= ~DISPATCH_QUEUE_DIRTY;
+
+		// similar to _dispatch_lane_non_barrier_complete():
+		// if by the time we get here all redirected non barrier syncs are
+		// done and returned their width to the queue, we may be the last
+		// chance for the next item to run/be re-driven.
+		if (unlikely(dc)) {
+			new_state |= DISPATCH_QUEUE_DIRTY;
+			new_state = _dispatch_lane_non_barrier_complete_try_lock(dq,
+					old_state, new_state, owner_self);
+		} else if (unlikely(_dq_state_is_dirty(old_state))) {
+			os_atomic_rmw_loop_give_up({
+				os_atomic_xor2o(dq, dq_state, DISPATCH_QUEUE_DIRTY, acquire);
+				next_dc = os_atomic_load2o(dq, dq_items_head, relaxed);
+				goto drain_again;
+			});
+		}
+	});
+
+	old_state -= owned;
+	_dispatch_lane_non_barrier_complete_finish(dq, flags, old_state, new_state);
+}
+
+DISPATCH_NOINLINE
+static void
+_dispatch_lane_barrier_complete(dispatch_lane_class_t dqu, dispatch_qos_t qos,
 		dispatch_wakeup_flags_t flags)
 {
-	dispatch_continuation_t dc_tmp, dc_start = NULL, dc_end = NULL;
 	dispatch_queue_wakeup_target_t target = DISPATCH_QUEUE_WAKEUP_NONE;
-	struct dispatch_object_s *dc = NULL;
-	uint64_t owned = DISPATCH_QUEUE_IN_BARRIER +
-			dq->dq_width * DISPATCH_QUEUE_WIDTH_INTERVAL;
-	size_t count = 0;
-
-	dispatch_assert(dx_metatype(dq) == _DISPATCH_QUEUE_TYPE);
+	dispatch_lane_t dq = dqu._dl;
 
 	if (dq->dq_items_tail && !DISPATCH_QUEUE_IS_SUSPENDED(dq)) {
-		dc = _dispatch_queue_head(dq);
-		if (!_dispatch_object_is_sync_waiter(dc)) {
-			// not a slow item, needs to wake up
-		} else if (likely(dq->dq_width == 1) ||
-				_dispatch_object_is_barrier(dc)) {
-			// rdar://problem/8290662 "barrier/writer lock transfer"
-			dc_start = dc_end = (dispatch_continuation_t)dc;
-			owned = 0;
-			count = 1;
-			dc = _dispatch_queue_next(dq, dc);
-		} else {
-			// <rdar://problem/10164594> "reader lock transfer"
-			// we must not wake waiters immediately because our right
-			// for dequeuing is granted through holding the full "barrier" width
-			// which a signaled work item could relinquish out from our feet
-			dc_start = (dispatch_continuation_t)dc;
-			do {
-				// no check on width here because concurrent queues
-				// do not respect width for blocked readers, the thread
-				// is already spent anyway
-				dc_end = (dispatch_continuation_t)dc;
-				owned -= DISPATCH_QUEUE_WIDTH_INTERVAL;
-				count++;
-				dc = _dispatch_queue_next(dq, dc);
-			} while (dc && _dispatch_object_is_sync_waiter_non_barrier(dc));
+		struct dispatch_object_s *dc = _dispatch_queue_get_head(dq);
+		if (likely(dq->dq_width == 1 || _dispatch_object_is_barrier(dc))) {
+			if (_dispatch_object_is_waiter(dc)) {
+				return _dispatch_lane_drain_barrier_waiter(dq, dc, flags, 0);
+			}
+		} else if (dq->dq_width > 1 && !_dispatch_object_is_barrier(dc)) {
+			return _dispatch_lane_drain_non_barriers(dq, dc, flags);
 		}
 
-		if (count) {
-			do {
-				dc_tmp = dc_start;
-				dc_start = dc_start->do_next;
-				_dispatch_sync_waiter_redirect_or_wake(dq, owned, dc_tmp);
-				owned = DISPATCH_SYNC_WAITER_NO_UNLOCK;
-			} while (dc_tmp != dc_end);
-			if (flags & DISPATCH_WAKEUP_CONSUME_2) {
-				return _dispatch_release_2_tailcall(dq);
-			}
-			return;
-		}
 		if (!(flags & DISPATCH_WAKEUP_CONSUME_2)) {
 			_dispatch_retain_2(dq);
 			flags |= DISPATCH_WAKEUP_CONSUME_2;
@@ -4072,37 +1511,46 @@
 		target = DISPATCH_QUEUE_WAKEUP_TARGET;
 	}
 
-	return _dispatch_queue_class_barrier_complete(dq, qos, flags, target,owned);
+	uint64_t owned = DISPATCH_QUEUE_IN_BARRIER +
+			dq->dq_width * DISPATCH_QUEUE_WIDTH_INTERVAL;
+	return _dispatch_lane_class_barrier_complete(dq, qos, flags, target, owned);
 }
 
-#if DISPATCH_COCOA_COMPAT
 static void
-_dispatch_sync_thread_bound_invoke(void *ctxt)
+_dispatch_async_and_wait_invoke(void *ctxt)
 {
 	dispatch_sync_context_t dsc = ctxt;
-	dispatch_queue_t cq = _dispatch_queue_get_current();
-	dispatch_queue_t orig_dq = dsc->dc_other;
-	dispatch_thread_frame_s dtf;
-	dispatch_assert(_dispatch_queue_is_thread_bound(cq));
+	dispatch_queue_t top_dq = dsc->dc_other;
+	dispatch_invoke_flags_t iflags;
 
 	// the block runs on the thread the queue is bound to and not
-	// on the calling thread, but we mean to see the calling thread
+	// on the calling thread, but we want to see the calling thread
 	// dispatch thread frames, so we fake the link, and then undo it
-	_dispatch_thread_frame_push_and_rebase(&dtf, orig_dq, &dsc->dsc_dtf);
-	_dispatch_client_callout(dsc->dsc_ctxt, dsc->dsc_func);
-	_dispatch_thread_frame_pop(&dtf);
+	iflags = dsc->dsc_autorelease * DISPATCH_INVOKE_AUTORELEASE_ALWAYS;
+	dispatch_invoke_with_autoreleasepool(iflags, {
+		dispatch_thread_frame_s dtf;
+		_dispatch_introspection_sync_begin(top_dq);
+		_dispatch_thread_frame_push_and_rebase(&dtf, top_dq, &dsc->dsc_dtf);
+		_dispatch_client_callout(dsc->dsc_ctxt, dsc->dsc_func);
+		_dispatch_thread_frame_pop(&dtf);
+	});
 
-	// communicate back to _dispatch_sync_wait who the thread bound queue
-	// was so that we skip it during _dispatch_sync_complete_recurse
-	dsc->dc_other = cq;
+	// communicate back to _dispatch_async_and_wait_f_slow and
+	// _dispatch_sync_f_slow on which queue the work item was invoked
+	// so that the *_complete_recurse() call stops unlocking when it reaches it
+	dsc->dc_other = _dispatch_queue_get_current();
 	dsc->dsc_func = NULL;
-	_dispatch_thread_event_signal(&dsc->dsc_event); // release
+
+	if (dsc->dc_data == DISPATCH_WLH_ANON) {
+		_dispatch_thread_event_signal(&dsc->dsc_event); // release
+	} else {
+		_dispatch_event_loop_cancel_waiter(dsc);
+	}
 }
-#endif
 
 DISPATCH_ALWAYS_INLINE
 static inline uint64_t
-_dispatch_sync_wait_prepare(dispatch_queue_t dq)
+_dispatch_wait_prepare(dispatch_queue_t dq)
 {
 	uint64_t old_state, new_state;
 
@@ -4121,10 +1569,9 @@
 }
 
 static void
-_dispatch_sync_waiter_compute_wlh(dispatch_queue_t dq,
-		dispatch_sync_context_t dsc)
+_dispatch_wait_compute_wlh(dispatch_lane_t dq, dispatch_sync_context_t dsc)
 {
-	bool needs_locking = _dispatch_queue_is_legacy(dq);
+	bool needs_locking = _dispatch_queue_is_mutable(dq);
 
 	if (needs_locking) {
 		dsc->dsc_release_storage = true;
@@ -4132,116 +1579,119 @@
 	}
 
 	dispatch_queue_t tq = dq->do_targetq;
-	uint64_t dq_state = _dispatch_sync_wait_prepare(tq);
+	uint64_t tq_state = _dispatch_wait_prepare(tq);
 
-	if (_dq_state_is_suspended(dq_state) ||
-			_dq_state_is_base_anon(dq_state)) {
+	if (_dq_state_is_suspended(tq_state) ||
+			_dq_state_is_base_anon(tq_state)) {
 		dsc->dsc_release_storage = false;
 		dsc->dc_data = DISPATCH_WLH_ANON;
-	} else if (_dq_state_is_base_wlh(dq_state)) {
-		if (dsc->dsc_release_storage) {
+	} else if (_dq_state_is_base_wlh(tq_state)) {
+		if (dx_metatype(tq) == _DISPATCH_WORKLOOP_TYPE) {
+			dsc->dsc_wlh_is_workloop = true;
+			dsc->dsc_release_storage = false;
+		} else if (dsc->dsc_release_storage) {
 			_dispatch_queue_retain_storage(tq);
 		}
 		dsc->dc_data = (dispatch_wlh_t)tq;
 	} else {
-		_dispatch_sync_waiter_compute_wlh(tq, dsc);
+		_dispatch_wait_compute_wlh(upcast(tq)._dl, dsc);
 	}
-	if (needs_locking) _dispatch_queue_sidelock_unlock(dq);
+	if (needs_locking) {
+		if (dsc->dsc_wlh_is_workloop) {
+			_dispatch_queue_atomic_flags_clear(dq, DQF_MUTABLE);
+		}
+		_dispatch_queue_sidelock_unlock(dq);
+	}
 }
 
 DISPATCH_NOINLINE
 static void
-_dispatch_sync_wait(dispatch_queue_t top_dq, void *ctxt,
-		dispatch_function_t func, uintptr_t top_dc_flags,
-		dispatch_queue_t dq, uintptr_t dc_flags)
+__DISPATCH_WAIT_FOR_QUEUE__(dispatch_sync_context_t dsc, dispatch_queue_t dq)
 {
-	pthread_priority_t pp = _dispatch_get_priority();
-	dispatch_tid tid = _dispatch_tid_self();
-	dispatch_qos_t qos;
-	uint64_t dq_state;
-
-	dq_state = _dispatch_sync_wait_prepare(dq);
-	if (unlikely(_dq_state_drain_locked_by(dq_state, tid))) {
+	uint64_t dq_state = _dispatch_wait_prepare(dq);
+	if (unlikely(_dq_state_drain_locked_by(dq_state, dsc->dsc_waiter))) {
 		DISPATCH_CLIENT_CRASH((uintptr_t)dq_state,
 				"dispatch_sync called on queue "
 				"already owned by current thread");
 	}
 
-	struct dispatch_sync_context_s dsc = {
-		.dc_flags    = dc_flags | DISPATCH_OBJ_SYNC_WAITER_BIT,
-		.dc_other    = top_dq,
-		.dc_priority = pp | _PTHREAD_PRIORITY_ENFORCE_FLAG,
-		.dc_voucher  = DISPATCH_NO_VOUCHER,
-		.dsc_func    = func,
-		.dsc_ctxt    = ctxt,
-		.dsc_waiter  = tid,
-	};
+	// Blocks submitted to the main thread MUST run on the main thread, and
+	// dispatch_async_and_wait also executes on the remote context rather than
+	// the current thread.
+	//
+	// For both these cases we need to save the frame linkage for the sake of
+	// _dispatch_async_and_wait_invoke
+	_dispatch_thread_frame_save_state(&dsc->dsc_dtf);
+
 	if (_dq_state_is_suspended(dq_state) ||
 			_dq_state_is_base_anon(dq_state)) {
-		dsc.dc_data = DISPATCH_WLH_ANON;
+		dsc->dc_data = DISPATCH_WLH_ANON;
 	} else if (_dq_state_is_base_wlh(dq_state)) {
-		dsc.dc_data = (dispatch_wlh_t)dq;
+		dsc->dc_data = (dispatch_wlh_t)dq;
 	} else {
-		_dispatch_sync_waiter_compute_wlh(dq, &dsc);
+		_dispatch_wait_compute_wlh(upcast(dq)._dl, dsc);
 	}
-#if DISPATCH_COCOA_COMPAT
-	// It's preferred to execute synchronous blocks on the current thread
-	// due to thread-local side effects, etc. However, blocks submitted
-	// to the main thread MUST be run on the main thread
-	//
-	// Since we don't know whether that will happen, save the frame linkage
-	// for the sake of _dispatch_sync_thread_bound_invoke
-	_dispatch_thread_frame_save_state(&dsc.dsc_dtf);
 
-	// Since the continuation doesn't have the CONSUME bit, the voucher will be
-	// retained on adoption on the thread bound queue if it happens so we can
-	// borrow this thread's reference
-	dsc.dc_voucher = _voucher_get();
-	dsc.dc_func = _dispatch_sync_thread_bound_invoke;
-	dsc.dc_ctxt = &dsc;
-#endif
-
-	if (dsc.dc_data == DISPATCH_WLH_ANON) {
-		dsc.dsc_override_qos_floor = dsc.dsc_override_qos =
-				_dispatch_get_basepri_override_qos_floor();
-		qos = _dispatch_qos_from_pp(pp);
-		_dispatch_thread_event_init(&dsc.dsc_event);
+	if (dsc->dc_data == DISPATCH_WLH_ANON) {
+		dsc->dsc_override_qos_floor = dsc->dsc_override_qos =
+				(uint8_t)_dispatch_get_basepri_override_qos_floor();
+		_dispatch_thread_event_init(&dsc->dsc_event);
+	}
+	dx_push(dq, dsc, _dispatch_qos_from_pp(dsc->dc_priority));
+	_dispatch_trace_runtime_event(sync_wait, dq, 0);
+	if (dsc->dc_data == DISPATCH_WLH_ANON) {
+		_dispatch_thread_event_wait(&dsc->dsc_event); // acquire
 	} else {
-		qos = 0;
+		_dispatch_event_loop_wait_for_ownership(dsc);
 	}
-	_dispatch_queue_push_sync_waiter(dq, &dsc, qos);
-	if (dsc.dc_data == DISPATCH_WLH_ANON) {
-		_dispatch_thread_event_wait(&dsc.dsc_event); // acquire
-		_dispatch_thread_event_destroy(&dsc.dsc_event);
+	if (dsc->dc_data == DISPATCH_WLH_ANON) {
+		_dispatch_thread_event_destroy(&dsc->dsc_event);
 		// If _dispatch_sync_waiter_wake() gave this thread an override,
 		// ensure that the root queue sees it.
-		if (dsc.dsc_override_qos > dsc.dsc_override_qos_floor) {
-			_dispatch_set_basepri_override_qos(dsc.dsc_override_qos);
+		if (dsc->dsc_override_qos > dsc->dsc_override_qos_floor) {
+			_dispatch_set_basepri_override_qos(dsc->dsc_override_qos);
 		}
-	} else {
-		_dispatch_event_loop_wait_for_ownership(&dsc);
 	}
-	_dispatch_introspection_sync_begin(top_dq);
-#if DISPATCH_COCOA_COMPAT
-	if (unlikely(dsc.dsc_func == NULL)) {
-		// Queue bound to a non-dispatch thread, the continuation already ran
-		// so just unlock all the things, except for the thread bound queue
-		dispatch_queue_t bound_dq = dsc.dc_other;
-		return _dispatch_sync_complete_recurse(top_dq, bound_dq, top_dc_flags);
-	}
-#endif
-	_dispatch_sync_invoke_and_complete_recurse(top_dq, ctxt, func,top_dc_flags);
 }
 
+#pragma mark -
+#pragma mark _dispatch_barrier_trysync_or_async_f
+
 DISPATCH_NOINLINE
 static void
-_dispatch_sync_f_slow(dispatch_queue_t dq, void *ctxt,
-		dispatch_function_t func, uintptr_t dc_flags)
+_dispatch_barrier_trysync_or_async_f_complete(dispatch_lane_t dq,
+		void *ctxt, dispatch_function_t func, uint32_t flags)
 {
-	if (unlikely(!dq->do_targetq)) {
-		return _dispatch_sync_function_invoke(dq, ctxt, func);
+	dispatch_wakeup_flags_t wflags = DISPATCH_WAKEUP_BARRIER_COMPLETE;
+
+	_dispatch_sync_function_invoke_inline(dq, ctxt, func);
+	if (flags & DISPATCH_BARRIER_TRYSYNC_SUSPEND) {
+		uint64_t dq_state = os_atomic_sub2o(dq, dq_state,
+				DISPATCH_QUEUE_SUSPEND_INTERVAL, relaxed);
+		if (!_dq_state_is_suspended(dq_state)) {
+			wflags |= DISPATCH_WAKEUP_CONSUME_2;
+		}
 	}
-	_dispatch_sync_wait(dq, ctxt, func, dc_flags, dq, dc_flags);
+	dx_wakeup(dq, 0, wflags);
+}
+
+// Use for mutation of queue-/source-internal state only
+// ignores target queue hierarchy!
+DISPATCH_NOINLINE
+void
+_dispatch_barrier_trysync_or_async_f(dispatch_lane_t dq, void *ctxt,
+		dispatch_function_t func, uint32_t flags)
+{
+	dispatch_tid tid = _dispatch_tid_self();
+	uint64_t suspend_count = (flags & DISPATCH_BARRIER_TRYSYNC_SUSPEND) ? 1 : 0;
+	if (unlikely(!_dispatch_queue_try_acquire_barrier_sync_and_suspend(dq, tid,
+			suspend_count))) {
+		return _dispatch_barrier_async_detached_f(dq, ctxt, func);
+	}
+	if (flags & DISPATCH_BARRIER_TRYSYNC_SUSPEND) {
+		_dispatch_retain_2(dq); // see _dispatch_lane_suspend
+	}
+	_dispatch_barrier_trysync_or_async_f_complete(dq, ctxt, func, flags);
 }
 
 #pragma mark -
@@ -4249,7 +1699,46 @@
 
 DISPATCH_NOINLINE
 static void
-_dispatch_sync_recurse(dispatch_queue_t dq, void *ctxt,
+_dispatch_sync_f_slow(dispatch_queue_class_t top_dqu, void *ctxt,
+		dispatch_function_t func, uintptr_t top_dc_flags,
+		dispatch_queue_class_t dqu, uintptr_t dc_flags)
+{
+	dispatch_queue_t top_dq = top_dqu._dq;
+	dispatch_queue_t dq = dqu._dq;
+	if (unlikely(!dq->do_targetq)) {
+		return _dispatch_sync_function_invoke(dq, ctxt, func);
+	}
+
+	pthread_priority_t pp = _dispatch_get_priority();
+	struct dispatch_sync_context_s dsc = {
+		.dc_flags    = DC_FLAG_SYNC_WAITER | dc_flags,
+		.dc_func     = _dispatch_async_and_wait_invoke,
+		.dc_ctxt     = &dsc,
+		.dc_other    = top_dq,
+		.dc_priority = pp | _PTHREAD_PRIORITY_ENFORCE_FLAG,
+		.dc_voucher  = _voucher_get(),
+		.dsc_func    = func,
+		.dsc_ctxt    = ctxt,
+		.dsc_waiter  = _dispatch_tid_self(),
+	};
+
+	_dispatch_trace_item_push(top_dq, &dsc);
+	__DISPATCH_WAIT_FOR_QUEUE__(&dsc, dq);
+
+	if (dsc.dsc_func == NULL) {
+		dispatch_queue_t stop_dq = dsc.dc_other;
+		return _dispatch_sync_complete_recurse(top_dq, stop_dq, top_dc_flags);
+	}
+
+	_dispatch_introspection_sync_begin(top_dq);
+	_dispatch_trace_item_pop(top_dq, &dsc);
+	_dispatch_sync_invoke_and_complete_recurse(top_dq, ctxt, func,top_dc_flags
+			DISPATCH_TRACE_ARG(&dsc));
+}
+
+DISPATCH_NOINLINE
+static void
+_dispatch_sync_recurse(dispatch_lane_t dq, void *ctxt,
 		dispatch_function_t func, uintptr_t dc_flags)
 {
 	dispatch_tid tid = _dispatch_tid_self();
@@ -4258,18 +1747,66 @@
 	do {
 		if (likely(tq->dq_width == 1)) {
 			if (unlikely(!_dispatch_queue_try_acquire_barrier_sync(tq, tid))) {
-				return _dispatch_sync_wait(dq, ctxt, func, dc_flags, tq,
-						DISPATCH_OBJ_BARRIER_BIT);
+				return _dispatch_sync_f_slow(dq, ctxt, func, dc_flags, tq,
+						DC_FLAG_BARRIER);
 			}
 		} else {
-			if (unlikely(!_dispatch_queue_try_reserve_sync_width(tq))) {
-				return _dispatch_sync_wait(dq, ctxt, func, dc_flags, tq, 0);
+			dispatch_queue_concurrent_t dl = upcast(tq)._dl;
+			if (unlikely(!_dispatch_queue_try_reserve_sync_width(dl))) {
+				return _dispatch_sync_f_slow(dq, ctxt, func, dc_flags, tq, 0);
 			}
 		}
 		tq = tq->do_targetq;
 	} while (unlikely(tq->do_targetq));
 
-	return _dispatch_sync_invoke_and_complete_recurse(dq, ctxt, func, dc_flags);
+	_dispatch_introspection_sync_begin(dq);
+	_dispatch_sync_invoke_and_complete_recurse(dq, ctxt, func, dc_flags
+			DISPATCH_TRACE_ARG(_dispatch_trace_item_sync_push_pop(
+					dq, ctxt, func, dc_flags)));
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_barrier_sync_f_inline(dispatch_queue_t dq, void *ctxt,
+		dispatch_function_t func, uintptr_t dc_flags)
+{
+	dispatch_tid tid = _dispatch_tid_self();
+
+	if (unlikely(dx_metatype(dq) != _DISPATCH_LANE_TYPE)) {
+		DISPATCH_CLIENT_CRASH(0, "Queue type doesn't support dispatch_sync");
+	}
+
+	dispatch_lane_t dl = upcast(dq)._dl;
+	// The more correct thing to do would be to merge the qos of the thread
+	// that just acquired the barrier lock into the queue state.
+	//
+	// However this is too expensive for the fast path, so skip doing it.
+	// The chosen tradeoff is that if an enqueue on a lower priority thread
+	// contends with this fast path, this thread may receive a useless override.
+	//
+	// Global concurrent queues and queues bound to non-dispatch threads
+	// always fall into the slow case, see DISPATCH_ROOT_QUEUE_STATE_INIT_VALUE
+	if (unlikely(!_dispatch_queue_try_acquire_barrier_sync(dl, tid))) {
+		return _dispatch_sync_f_slow(dl, ctxt, func, DC_FLAG_BARRIER, dl,
+				DC_FLAG_BARRIER | dc_flags);
+	}
+
+	if (unlikely(dl->do_targetq->do_targetq)) {
+		return _dispatch_sync_recurse(dl, ctxt, func,
+				DC_FLAG_BARRIER | dc_flags);
+	}
+	_dispatch_introspection_sync_begin(dl);
+	_dispatch_lane_barrier_sync_invoke_and_complete(dl, ctxt, func
+			DISPATCH_TRACE_ARG(_dispatch_trace_item_sync_push_pop(
+					dq, ctxt, func, dc_flags | DC_FLAG_BARRIER)));
+}
+
+DISPATCH_NOINLINE
+static void
+_dispatch_barrier_sync_f(dispatch_queue_t dq, void *ctxt,
+		dispatch_function_t func, uintptr_t dc_flags)
+{
+	_dispatch_barrier_sync_f_inline(dq, ctxt, func, dc_flags);
 }
 
 DISPATCH_NOINLINE
@@ -4277,59 +1814,68 @@
 dispatch_barrier_sync_f(dispatch_queue_t dq, void *ctxt,
 		dispatch_function_t func)
 {
-	dispatch_tid tid = _dispatch_tid_self();
+	_dispatch_barrier_sync_f_inline(dq, ctxt, func, 0);
+}
 
-	// The more correct thing to do would be to merge the qos of the thread
-	// that just acquired the barrier lock into the queue state.
-	//
-	// However this is too expensive for the fastpath, so skip doing it.
-	// The chosen tradeoff is that if an enqueue on a lower priority thread
-	// contends with this fastpath, this thread may receive a useless override.
-	//
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_sync_f_inline(dispatch_queue_t dq, void *ctxt,
+		dispatch_function_t func, uintptr_t dc_flags)
+{
+	if (likely(dq->dq_width == 1)) {
+		return _dispatch_barrier_sync_f(dq, ctxt, func, dc_flags);
+	}
+
+	if (unlikely(dx_metatype(dq) != _DISPATCH_LANE_TYPE)) {
+		DISPATCH_CLIENT_CRASH(0, "Queue type doesn't support dispatch_sync");
+	}
+
+	dispatch_lane_t dl = upcast(dq)._dl;
 	// Global concurrent queues and queues bound to non-dispatch threads
 	// always fall into the slow case, see DISPATCH_ROOT_QUEUE_STATE_INIT_VALUE
-	if (unlikely(!_dispatch_queue_try_acquire_barrier_sync(dq, tid))) {
-		return _dispatch_sync_f_slow(dq, ctxt, func, DISPATCH_OBJ_BARRIER_BIT);
+	if (unlikely(!_dispatch_queue_try_reserve_sync_width(dl))) {
+		return _dispatch_sync_f_slow(dl, ctxt, func, 0, dl, dc_flags);
 	}
 
-	_dispatch_introspection_sync_begin(dq);
 	if (unlikely(dq->do_targetq->do_targetq)) {
-		return _dispatch_sync_recurse(dq, ctxt, func, DISPATCH_OBJ_BARRIER_BIT);
+		return _dispatch_sync_recurse(dl, ctxt, func, dc_flags);
 	}
-	_dispatch_queue_barrier_sync_invoke_and_complete(dq, ctxt, func);
+	_dispatch_introspection_sync_begin(dl);
+	_dispatch_sync_invoke_and_complete(dl, ctxt, func DISPATCH_TRACE_ARG(
+			_dispatch_trace_item_sync_push_pop(dq, ctxt, func, dc_flags)));
+}
+
+DISPATCH_NOINLINE
+static void
+_dispatch_sync_f(dispatch_queue_t dq, void *ctxt, dispatch_function_t func,
+		uintptr_t dc_flags)
+{
+	_dispatch_sync_f_inline(dq, ctxt, func, dc_flags);
 }
 
 DISPATCH_NOINLINE
 void
 dispatch_sync_f(dispatch_queue_t dq, void *ctxt, dispatch_function_t func)
 {
-	if (likely(dq->dq_width == 1)) {
-		return dispatch_barrier_sync_f(dq, ctxt, func);
-	}
-
-	// Global concurrent queues and queues bound to non-dispatch threads
-	// always fall into the slow case, see DISPATCH_ROOT_QUEUE_STATE_INIT_VALUE
-	if (unlikely(!_dispatch_queue_try_reserve_sync_width(dq))) {
-		return _dispatch_sync_f_slow(dq, ctxt, func, 0);
-	}
-
-	_dispatch_introspection_sync_begin(dq);
-	if (unlikely(dq->do_targetq->do_targetq)) {
-		return _dispatch_sync_recurse(dq, ctxt, func, 0);
-	}
-	_dispatch_sync_invoke_and_complete(dq, ctxt, func);
+	_dispatch_sync_f_inline(dq, ctxt, func, 0);
 }
 
 #ifdef __BLOCKS__
 DISPATCH_NOINLINE
 static void
-_dispatch_sync_block_with_private_data(dispatch_queue_t dq,
-		dispatch_block_t work, dispatch_block_flags_t flags)
+_dispatch_sync_block_with_privdata(dispatch_queue_t dq, dispatch_block_t work,
+		uintptr_t dc_flags)
 {
 	dispatch_block_private_data_t dbpd = _dispatch_block_get_data(work);
 	pthread_priority_t op = 0, p = 0;
+	dispatch_block_flags_t flags = dbpd->dbpd_flags;
 
-	flags |= dbpd->dbpd_flags;
+	if (flags & DISPATCH_BLOCK_BARRIER) {
+		dc_flags |= DC_FLAG_BLOCK_WITH_PRIVATE_DATA | DC_FLAG_BARRIER;
+	} else {
+		dc_flags |= DC_FLAG_BLOCK_WITH_PRIVATE_DATA;
+	}
+
 	op = _dispatch_block_invoke_should_set_priority(flags, dbpd->dbpd_priority);
 	if (op) {
 		p = dbpd->dbpd_priority;
@@ -4341,13 +1887,14 @@
 	ov = _dispatch_set_priority_and_voucher(p, v, 0);
 
 	// balanced in d_block_sync_invoke or d_block_wait
-	if (os_atomic_cmpxchg2o(dbpd, dbpd_queue, NULL, dq->_as_oq, relaxed)) {
+	if (os_atomic_cmpxchg2o(dbpd, dbpd_queue, NULL, dq, relaxed)) {
 		_dispatch_retain_2(dq);
 	}
-	if (flags & DISPATCH_BLOCK_BARRIER) {
-		dispatch_barrier_sync_f(dq, work, _dispatch_block_sync_invoke);
+	if (dc_flags & DC_FLAG_BARRIER) {
+		_dispatch_barrier_sync_f(dq, work, _dispatch_block_sync_invoke,
+				dc_flags);
 	} else {
-		dispatch_sync_f(dq, work, _dispatch_block_sync_invoke);
+		_dispatch_sync_f(dq, work, _dispatch_block_sync_invoke, dc_flags);
 	}
 	_dispatch_reset_priority_and_voucher(op, ov);
 }
@@ -4355,488 +1902,1547 @@
 void
 dispatch_barrier_sync(dispatch_queue_t dq, dispatch_block_t work)
 {
+	uintptr_t dc_flags = DC_FLAG_BARRIER | DC_FLAG_BLOCK;
 	if (unlikely(_dispatch_block_has_private_data(work))) {
-		dispatch_block_flags_t flags = DISPATCH_BLOCK_BARRIER;
-		return _dispatch_sync_block_with_private_data(dq, work, flags);
+		return _dispatch_sync_block_with_privdata(dq, work, dc_flags);
 	}
-	dispatch_barrier_sync_f(dq, work, _dispatch_Block_invoke(work));
+	_dispatch_barrier_sync_f(dq, work, _dispatch_Block_invoke(work), dc_flags);
 }
 
+DISPATCH_NOINLINE
 void
 dispatch_sync(dispatch_queue_t dq, dispatch_block_t work)
 {
+	uintptr_t dc_flags = DC_FLAG_BLOCK;
 	if (unlikely(_dispatch_block_has_private_data(work))) {
-		return _dispatch_sync_block_with_private_data(dq, work, 0);
+		return _dispatch_sync_block_with_privdata(dq, work, dc_flags);
 	}
-	dispatch_sync_f(dq, work, _dispatch_Block_invoke(work));
+	_dispatch_sync_f(dq, work, _dispatch_Block_invoke(work), dc_flags);
 }
 #endif // __BLOCKS__
 
 #pragma mark -
-#pragma mark dispatch_trysync
-
-// Use for mutation of queue-/source-internal state only
-// ignores target queue hierarchy!
-DISPATCH_NOINLINE
-void
-_dispatch_barrier_trysync_or_async_f(dispatch_queue_t dq, void *ctxt,
-		dispatch_function_t func)
-{
-	dispatch_tid tid = _dispatch_tid_self();
-	if (unlikely(!_dispatch_queue_try_acquire_barrier_sync(dq, tid))) {
-		return _dispatch_barrier_async_detached_f(dq, ctxt, func);
-	}
-	_dispatch_barrier_sync_invoke_and_complete(dq, ctxt, func);
-}
-
-DISPATCH_NOINLINE
-static long
-_dispatch_trysync_recurse(dispatch_queue_t dq, void *ctxt,
-		dispatch_function_t f, uintptr_t dc_flags)
-{
-	dispatch_tid tid = _dispatch_tid_self();
-	dispatch_queue_t q, tq = dq->do_targetq;
-
-	for (;;) {
-		if (likely(tq->do_targetq == NULL)) {
-			_dispatch_sync_invoke_and_complete_recurse(dq, ctxt, f, dc_flags);
-			return true;
-		}
-		if (unlikely(_dispatch_queue_cannot_trysync(tq))) {
-			for (q = dq; q != tq; q = q->do_targetq) {
-				_dispatch_queue_atomic_flags_set(q, DQF_CANNOT_TRYSYNC);
-			}
-			break;
-		}
-		if (likely(tq->dq_width == 1)) {
-			if (unlikely(!_dispatch_queue_try_acquire_barrier_sync(tq, tid))) {
-				break;
-			}
-		} else {
-			if (unlikely(!_dispatch_queue_try_reserve_sync_width(tq))) {
-				break;
-			}
-		}
-		tq = tq->do_targetq;
-	}
-
-	_dispatch_sync_complete_recurse(dq, tq, dc_flags);
-	return false;
-}
-
-DISPATCH_NOINLINE
-long
-_dispatch_barrier_trysync_f(dispatch_queue_t dq, void *ctxt,
-		dispatch_function_t f)
-{
-	dispatch_tid tid = _dispatch_tid_self();
-	if (unlikely(!dq->do_targetq)) {
-		DISPATCH_CLIENT_CRASH(dq, "_dispatch_trsync called on a root queue");
-	}
-	if (unlikely(_dispatch_queue_cannot_trysync(dq))) {
-		return false;
-	}
-	if (unlikely(!_dispatch_queue_try_acquire_barrier_sync(dq, tid))) {
-		return false;
-	}
-	return _dispatch_trysync_recurse(dq, ctxt, f, DISPATCH_OBJ_BARRIER_BIT);
-}
-
-DISPATCH_NOINLINE
-long
-_dispatch_trysync_f(dispatch_queue_t dq, void *ctxt, dispatch_function_t f)
-{
-	if (likely(dq->dq_width == 1)) {
-		return _dispatch_barrier_trysync_f(dq, ctxt, f);
-	}
-	if (unlikely(!dq->do_targetq)) {
-		DISPATCH_CLIENT_CRASH(dq, "_dispatch_trsync called on a root queue");
-	}
-	if (unlikely(_dispatch_queue_cannot_trysync(dq))) {
-		return false;
-	}
-	if (unlikely(!_dispatch_queue_try_reserve_sync_width(dq))) {
-		return false;
-	}
-	return _dispatch_trysync_recurse(dq, ctxt, f, 0);
-}
-
-#pragma mark -
-#pragma mark dispatch_queue_wakeup
-
-DISPATCH_NOINLINE
-void
-_dispatch_queue_wakeup(dispatch_queue_t dq, dispatch_qos_t qos,
-		dispatch_wakeup_flags_t flags)
-{
-	dispatch_queue_wakeup_target_t target = DISPATCH_QUEUE_WAKEUP_NONE;
-
-	if (unlikely(flags & DISPATCH_WAKEUP_BARRIER_COMPLETE)) {
-		return _dispatch_queue_barrier_complete(dq, qos, flags);
-	}
-	if (_dispatch_queue_class_probe(dq)) {
-		target = DISPATCH_QUEUE_WAKEUP_TARGET;
-	}
-	return _dispatch_queue_class_wakeup(dq, qos, flags, target);
-}
-
-#if DISPATCH_COCOA_COMPAT || defined(_WIN32)
-DISPATCH_ALWAYS_INLINE
-static inline bool
-_dispatch_runloop_handle_is_valid(dispatch_runloop_handle_t handle)
-{
-#if TARGET_OS_MAC
-	return MACH_PORT_VALID(handle);
-#elif defined(__linux__)
-	return handle >= 0;
-#elif defined(_WIN32)
-	return handle != INVALID_HANDLE_VALUE;
-#else
-#error "runloop support not implemented on this platform"
-#endif
-}
+#pragma mark dispatch_async_and_wait
 
 DISPATCH_ALWAYS_INLINE
-static inline dispatch_runloop_handle_t
-_dispatch_runloop_queue_get_handle(dispatch_queue_t dq)
+static inline dispatch_wlh_t
+_dispatch_fake_wlh(dispatch_queue_t dq)
 {
-#if TARGET_OS_MAC
-	return ((dispatch_runloop_handle_t)(uintptr_t)dq->do_ctxt);
-#elif defined(__linux__)
-	// decode: 0 is a valid fd, so offset by 1 to distinguish from NULL
-	return ((dispatch_runloop_handle_t)(uintptr_t)dq->do_ctxt) - 1;
-#elif defined(_WIN32)
-	return ((dispatch_runloop_handle_t)(uintptr_t)dq->do_ctxt);
-#else
-#error "runloop support not implemented on this platform"
-#endif
+	dispatch_wlh_t new_wlh = DISPATCH_WLH_ANON;
+	if (likely(dx_metatype(dq) == _DISPATCH_WORKLOOP_TYPE) ||
+			_dq_state_is_base_wlh(os_atomic_load2o(dq, dq_state, relaxed))) {
+		new_wlh = (dispatch_wlh_t)dq;
+	}
+	dispatch_wlh_t old_wlh = _dispatch_get_wlh();
+	_dispatch_thread_setspecific(dispatch_wlh_key, new_wlh);
+	return old_wlh;
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_runloop_queue_set_handle(dispatch_queue_t dq, dispatch_runloop_handle_t handle)
+_dispatch_restore_wlh(dispatch_wlh_t wlh)
 {
-#if TARGET_OS_MAC
-	dq->do_ctxt = (void *)(uintptr_t)handle;
-#elif defined(__linux__)
-	// encode: 0 is a valid fd, so offset by 1 to distinguish from NULL
-	dq->do_ctxt = (void *)(uintptr_t)(handle + 1);
-#elif defined(_WIN32)
-	dq->do_ctxt = (void *)(uintptr_t)handle;
-#else
-#error "runloop support not implemented on this platform"
-#endif
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline dispatch_qos_t
-_dispatch_runloop_queue_reset_max_qos(dispatch_queue_class_t dqu)
-{
-	uint64_t old_state, clear_bits = DISPATCH_QUEUE_MAX_QOS_MASK |
-			DISPATCH_QUEUE_RECEIVED_OVERRIDE;
-	old_state = os_atomic_and_orig2o(dqu._dq, dq_state, ~clear_bits, relaxed);
-	return _dq_state_max_qos(old_state);
-}
-#endif
-
-void
-_dispatch_runloop_queue_wakeup(dispatch_queue_t dq, dispatch_qos_t qos,
-		dispatch_wakeup_flags_t flags)
-{
-#if DISPATCH_COCOA_COMPAT
-	if (slowpath(_dispatch_queue_atomic_flags(dq) & DQF_RELEASED)) {
-		// <rdar://problem/14026816>
-		return _dispatch_queue_wakeup(dq, qos, flags);
-	}
-
-	if (flags & DISPATCH_WAKEUP_MAKE_DIRTY) {
-		os_atomic_or2o(dq, dq_state, DISPATCH_QUEUE_DIRTY, release);
-	}
-	if (_dispatch_queue_class_probe(dq)) {
-		return _dispatch_runloop_queue_poke(dq, qos, flags);
-	}
-
-	qos = _dispatch_runloop_queue_reset_max_qos(dq);
-	if (qos) {
-		mach_port_t owner = DISPATCH_QUEUE_DRAIN_OWNER(dq);
-		if (_dispatch_queue_class_probe(dq)) {
-			_dispatch_runloop_queue_poke(dq, qos, flags);
-		}
-		_dispatch_thread_override_end(owner, dq);
-		return;
-	}
-	if (flags & DISPATCH_WAKEUP_CONSUME_2) {
-		return _dispatch_release_2_tailcall(dq);
-	}
-#else
-	return _dispatch_queue_wakeup(dq, qos, flags);
-#endif
-}
-
-void
-_dispatch_main_queue_wakeup(dispatch_queue_t dq, dispatch_qos_t qos,
-		dispatch_wakeup_flags_t flags)
-{
-#if DISPATCH_COCOA_COMPAT
-	if (_dispatch_queue_is_thread_bound(dq)) {
-		return _dispatch_runloop_queue_wakeup(dq, qos, flags);
-	}
-#endif
-	return _dispatch_queue_wakeup(dq, qos, flags);
-}
-
-#pragma mark -
-#pragma mark dispatch root queues poke
-
-#if DISPATCH_COCOA_COMPAT
-static inline void
-_dispatch_runloop_queue_class_poke(dispatch_queue_t dq)
-{
-	dispatch_runloop_handle_t handle = _dispatch_runloop_queue_get_handle(dq);
-	if (!_dispatch_runloop_handle_is_valid(handle)) {
-		return;
-	}
-
-#if HAVE_MACH
-	mach_port_t mp = handle;
-	kern_return_t kr = _dispatch_send_wakeup_runloop_thread(mp, 0);
-	switch (kr) {
-	case MACH_SEND_TIMEOUT:
-	case MACH_SEND_TIMED_OUT:
-	case MACH_SEND_INVALID_DEST:
-		break;
-	default:
-		(void)dispatch_assume_zero(kr);
-		break;
-	}
-#elif defined(__linux__)
-	int result;
-	do {
-		result = eventfd_write(handle, 1);
-	} while (result == -1 && errno == EINTR);
-	(void)dispatch_assume_zero(result);
-#else
-#error "runloop support not implemented on this platform"
-#endif
+	_dispatch_thread_setspecific(dispatch_wlh_key, wlh);
 }
 
 DISPATCH_NOINLINE
 static void
-_dispatch_runloop_queue_poke(dispatch_queue_t dq, dispatch_qos_t qos,
-		dispatch_wakeup_flags_t flags)
+_dispatch_async_and_wait_invoke_and_complete_recurse(dispatch_queue_t dq,
+		dispatch_sync_context_t dsc, dispatch_queue_t bottom_q,
+		uintptr_t top_dc_flags)
 {
-	// it's not useful to handle WAKEUP_MAKE_DIRTY because mach_msg() will have
-	// a release barrier and that when runloop queues stop being thread-bound
-	// they have a non optional wake-up to start being a "normal" queue
-	// either in _dispatch_runloop_queue_xref_dispose,
-	// or in _dispatch_queue_cleanup2() for the main thread.
-	uint64_t old_state, new_state;
+	dispatch_invoke_flags_t iflags;
+	dispatch_wlh_t old_wlh = _dispatch_fake_wlh(bottom_q);
 
-	if (dq == &_dispatch_main_q) {
-		dispatch_once_f(&_dispatch_main_q_handle_pred, dq,
-				_dispatch_runloop_queue_handle_init);
+	iflags = dsc->dsc_autorelease * DISPATCH_INVOKE_AUTORELEASE_ALWAYS;
+	dispatch_invoke_with_autoreleasepool(iflags, {
+		dispatch_block_flags_t bflags = DISPATCH_BLOCK_HAS_PRIORITY;
+		dispatch_thread_frame_s dtf;
+		pthread_priority_t op = 0, p = dsc->dc_priority;
+		voucher_t ov, v = dsc->dc_voucher;
+
+		_dispatch_introspection_sync_begin(dq);
+		_dispatch_thread_frame_push(&dtf, dq);
+		op = _dispatch_block_invoke_should_set_priority(bflags, p);
+		ov = _dispatch_set_priority_and_voucher(op ? p : 0, v, 0);
+		_dispatch_trace_item_pop(dq, dsc);
+		_dispatch_client_callout(dsc->dsc_ctxt, dsc->dsc_func);
+		_dispatch_perfmon_workitem_inc();
+		_dispatch_reset_priority_and_voucher(op, ov);
+		_dispatch_thread_frame_pop(&dtf);
+	});
+
+	_dispatch_trace_item_complete(dsc);
+
+	_dispatch_restore_wlh(old_wlh);
+	_dispatch_sync_complete_recurse(dq, NULL, top_dc_flags);
+}
+
+DISPATCH_NOINLINE
+static void
+_dispatch_async_and_wait_f_slow(dispatch_queue_t dq, uintptr_t top_dc_flags,
+		dispatch_sync_context_t dsc, dispatch_queue_t tq)
+{
+	__DISPATCH_WAIT_FOR_QUEUE__(dsc, tq);
+
+	if (unlikely(dsc->dsc_func == NULL)) {
+		// see _dispatch_async_and_wait_invoke
+		dispatch_queue_t stop_dq = dsc->dc_other;
+		return _dispatch_sync_complete_recurse(dq, stop_dq, top_dc_flags);
+	}
+
+	// see _dispatch_*_redirect_or_wake
+	dispatch_queue_t bottom_q = dsc->dc_other;
+	return _dispatch_async_and_wait_invoke_and_complete_recurse(dq, dsc,
+			bottom_q, top_dc_flags);
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline bool
+_dispatch_async_and_wait_should_always_async(dispatch_queue_class_t dqu,
+		uint64_t dq_state)
+{
+	// If the queue is anchored at a pthread root queue for which we can't
+	// mirror attributes, then we need to take the async path.
+	return !_dq_state_is_inner_queue(dq_state) &&
+			!_dispatch_is_in_root_queues_array(dqu._dq->do_targetq);
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline bool
+_dispatch_async_and_wait_recurse_one(dispatch_queue_t dq, dispatch_tid tid,
+		uintptr_t dc_flags)
+{
+	uint64_t dq_state = os_atomic_load2o(dq, dq_state, relaxed);
+	if (unlikely(_dispatch_async_and_wait_should_always_async(dq, dq_state))) {
+		return false;
+	}
+	if (likely(dc_flags & DC_FLAG_BARRIER)) {
+		return _dispatch_queue_try_acquire_barrier_sync(dq, tid);
+	}
+	return _dispatch_queue_try_reserve_sync_width(upcast(dq)._dl);
+}
+
+DISPATCH_NOINLINE
+static void
+_dispatch_async_and_wait_recurse(dispatch_queue_t top_dq,
+		dispatch_sync_context_t dsc, dispatch_tid tid, uintptr_t top_flags)
+{
+	dispatch_queue_t dq = top_dq;
+	uintptr_t dc_flags = top_flags;
+
+	_dispatch_trace_item_push(top_dq, dsc);
+
+	for (;;) {
+		if (unlikely(!_dispatch_async_and_wait_recurse_one(dq, tid, dc_flags))){
+			return _dispatch_async_and_wait_f_slow(top_dq, top_flags, dsc, dq);
+		}
+
+		_dispatch_async_waiter_update(dsc, dq);
+		if (likely(!dq->do_targetq->do_targetq)) break;
+		dq = dq->do_targetq;
+		if (likely(dq->dq_width == 1)) {
+			dc_flags |= DC_FLAG_BARRIER;
+		} else {
+			dc_flags &= ~DC_FLAG_BARRIER;
+		}
+		dsc->dc_flags = dc_flags;
+	}
+
+	_dispatch_async_and_wait_invoke_and_complete_recurse(top_dq, dsc, dq,
+			top_flags);
+}
+
+DISPATCH_NOINLINE
+static void
+_dispatch_async_and_wait_f(dispatch_queue_t dq,
+		void *ctxt, dispatch_function_t func, uintptr_t dc_flags)
+{
+	pthread_priority_t pp = _dispatch_get_priority();
+	dispatch_tid tid = _dispatch_tid_self();
+	struct dispatch_sync_context_s dsc = {
+		.dc_flags    = dc_flags,
+		.dc_func     = _dispatch_async_and_wait_invoke,
+		.dc_ctxt     = &dsc,
+		.dc_other    = dq,
+		.dc_priority = pp | _PTHREAD_PRIORITY_ENFORCE_FLAG,
+		.dc_voucher  = _voucher_get(),
+		.dsc_func    = func,
+		.dsc_ctxt    = ctxt,
+		.dsc_waiter  = tid,
+	};
+
+	return _dispatch_async_and_wait_recurse(dq, &dsc, tid, dc_flags);
+}
+
+DISPATCH_NOINLINE
+void
+dispatch_async_and_wait_f(dispatch_queue_t dq, void *ctxt,
+		dispatch_function_t func)
+{
+	if (unlikely(!dq->do_targetq)) {
+		return _dispatch_sync_function_invoke(dq, ctxt, func);
+	}
+
+	uintptr_t dc_flags = DC_FLAG_ASYNC_AND_WAIT;
+	if (likely(dq->dq_width == 1)) dc_flags |= DC_FLAG_BARRIER;
+	return _dispatch_async_and_wait_f(dq, ctxt, func, dc_flags);
+}
+
+DISPATCH_NOINLINE
+void
+dispatch_barrier_async_and_wait_f(dispatch_queue_t dq, void *ctxt,
+		dispatch_function_t func)
+{
+	if (unlikely(!dq->do_targetq)) {
+		return _dispatch_sync_function_invoke(dq, ctxt, func);
+	}
+
+	uintptr_t dc_flags = DC_FLAG_ASYNC_AND_WAIT | DC_FLAG_BARRIER;
+	return _dispatch_async_and_wait_f(dq, ctxt, func, dc_flags);
+}
+
+#ifdef __BLOCKS__
+DISPATCH_NOINLINE
+static void
+_dispatch_async_and_wait_block_with_privdata(dispatch_queue_t dq,
+		dispatch_block_t work, uintptr_t dc_flags)
+{
+	dispatch_block_private_data_t dbpd = _dispatch_block_get_data(work);
+	dispatch_block_flags_t flags = dbpd->dbpd_flags;
+	pthread_priority_t pp;
+	voucher_t v;
+
+	if (dbpd->dbpd_flags & DISPATCH_BLOCK_BARRIER) {
+		dc_flags |= DC_FLAG_BLOCK_WITH_PRIVATE_DATA | DC_FLAG_BARRIER;
+	} else {
+		dc_flags |= DC_FLAG_BLOCK_WITH_PRIVATE_DATA;
+	}
+
+	if (_dispatch_block_invoke_should_set_priority(flags, dbpd->dbpd_priority)){
+		pp = dbpd->dbpd_priority;
+	} else {
+		pp = _dispatch_get_priority();
+	}
+	if (dbpd->dbpd_flags & DISPATCH_BLOCK_HAS_VOUCHER) {
+		v = dbpd->dbpd_voucher;
+	} else {
+		v = _voucher_get();
+	}
+
+	// balanced in d_block_sync_invoke or d_block_wait
+	if (os_atomic_cmpxchg2o(dbpd, dbpd_queue, NULL, dq, relaxed)) {
+		_dispatch_retain_2(dq);
+	}
+
+	dispatch_tid tid = _dispatch_tid_self();
+	struct dispatch_sync_context_s dsc = {
+		.dc_flags    = dc_flags,
+		.dc_func     = _dispatch_async_and_wait_invoke,
+		.dc_ctxt     = &dsc,
+		.dc_other    = dq,
+		.dc_priority = pp | _PTHREAD_PRIORITY_ENFORCE_FLAG,
+		.dc_voucher  = v,
+		.dsc_func    = _dispatch_block_sync_invoke,
+		.dsc_ctxt    = work,
+		.dsc_waiter  = tid,
+	};
+
+	return _dispatch_async_and_wait_recurse(dq, &dsc, tid, dc_flags);
+}
+
+void
+dispatch_barrier_async_and_wait(dispatch_queue_t dq, dispatch_block_t work)
+{
+	if (unlikely(!dq->do_targetq)) {
+		return dispatch_barrier_sync(dq, work);
+	}
+
+	uintptr_t dc_flags = DC_FLAG_ASYNC_AND_WAIT | DC_FLAG_BLOCK|DC_FLAG_BARRIER;
+	if (unlikely(_dispatch_block_has_private_data(work))) {
+		return _dispatch_async_and_wait_block_with_privdata(dq, work, dc_flags);
+	}
+
+	dispatch_function_t func = _dispatch_Block_invoke(work);
+	return _dispatch_async_and_wait_f(dq, work, func, dc_flags);
+}
+
+void
+dispatch_async_and_wait(dispatch_queue_t dq, dispatch_block_t work)
+{
+	if (unlikely(!dq->do_targetq)) {
+		return dispatch_sync(dq, work);
+	}
+
+	uintptr_t dc_flags = DC_FLAG_ASYNC_AND_WAIT | DC_FLAG_BLOCK;
+	if (likely(dq->dq_width == 1)) dc_flags |= DC_FLAG_BARRIER;
+	if (unlikely(_dispatch_block_has_private_data(work))) {
+		return _dispatch_async_and_wait_block_with_privdata(dq, work, dc_flags);
+	}
+
+	dispatch_function_t func = _dispatch_Block_invoke(work);
+	return _dispatch_async_and_wait_f(dq, work, func, dc_flags);
+}
+#endif // __BLOCKS__
+
+#pragma mark -
+#pragma mark dispatch_queue_specific
+
+static void
+_dispatch_queue_specific_head_dispose_slow(void *ctxt)
+{
+	dispatch_queue_specific_head_t dqsh = ctxt;
+	dispatch_queue_specific_t dqs, tmp;
+
+	TAILQ_FOREACH_SAFE(dqs, &dqsh->dqsh_entries, dqs_entry, tmp) {
+		dispatch_assert(dqs->dqs_destructor);
+		_dispatch_client_callout(dqs->dqs_ctxt, dqs->dqs_destructor);
+		free(dqs);
+	}
+	free(dqsh);
+}
+
+static void
+_dispatch_queue_specific_head_dispose(dispatch_queue_specific_head_t dqsh)
+{
+	dispatch_queue_t rq = _dispatch_get_default_queue(false);
+	dispatch_queue_specific_t dqs, tmp;
+	TAILQ_HEAD(, dispatch_queue_specific_s) entries =
+			TAILQ_HEAD_INITIALIZER(entries);
+
+	TAILQ_CONCAT(&entries, &dqsh->dqsh_entries, dqs_entry);
+	TAILQ_FOREACH_SAFE(dqs, &entries, dqs_entry, tmp) {
+		if (dqs->dqs_destructor) {
+			TAILQ_INSERT_TAIL(&dqsh->dqsh_entries, dqs, dqs_entry);
+		} else {
+			free(dqs);
+		}
+	}
+
+	if (TAILQ_EMPTY(&dqsh->dqsh_entries)) {
+		free(dqsh);
+	} else {
+		_dispatch_barrier_async_detached_f(rq, dqsh,
+				_dispatch_queue_specific_head_dispose_slow);
+	}
+}
+
+DISPATCH_NOINLINE
+static void
+_dispatch_queue_init_specific(dispatch_queue_t dq)
+{
+	dispatch_queue_specific_head_t dqsh;
+
+	dqsh = _dispatch_calloc(1, sizeof(struct dispatch_queue_specific_head_s));
+	TAILQ_INIT(&dqsh->dqsh_entries);
+	if (unlikely(!os_atomic_cmpxchg2o(dq, dq_specific_head,
+			NULL, dqsh, release))) {
+		_dispatch_queue_specific_head_dispose(dqsh);
+	}
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline dispatch_queue_specific_t
+_dispatch_queue_specific_find(dispatch_queue_specific_head_t dqsh,
+		const void *key)
+{
+	dispatch_queue_specific_t dqs;
+
+	TAILQ_FOREACH(dqs, &dqsh->dqsh_entries, dqs_entry) {
+		if (dqs->dqs_key == key) {
+			return dqs;
+		}
+	}
+	return NULL;
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline bool
+_dispatch_queue_admits_specific(dispatch_queue_t dq)
+{
+	if (dx_metatype(dq) == _DISPATCH_LANE_TYPE) {
+		return (dx_type(dq) == DISPATCH_QUEUE_MAIN_TYPE ||
+			!dx_hastypeflag(dq, QUEUE_BASE));
+	}
+	return dx_metatype(dq) == _DISPATCH_WORKLOOP_TYPE;
+}
+
+DISPATCH_NOINLINE
+void
+dispatch_queue_set_specific(dispatch_queue_t dq, const void *key,
+	void *ctxt, dispatch_function_t destructor)
+{
+	if (unlikely(!key)) {
+		return;
+	}
+	dispatch_queue_t rq = _dispatch_get_default_queue(false);
+	dispatch_queue_specific_head_t dqsh = dq->dq_specific_head;
+	dispatch_queue_specific_t dqs;
+
+	if (unlikely(!_dispatch_queue_admits_specific(dq))) {
+		DISPATCH_CLIENT_CRASH(0,
+				"Queue doesn't support dispatch_queue_set_specific");
+	}
+
+	if (ctxt && !dqsh) {
+		_dispatch_queue_init_specific(dq);
+		dqsh = dq->dq_specific_head;
+	} else if (!dqsh) {
+		return;
+	}
+
+	_dispatch_unfair_lock_lock(&dqsh->dqsh_lock);
+	dqs = _dispatch_queue_specific_find(dqsh, key);
+	if (dqs) {
+		if (dqs->dqs_destructor) {
+			_dispatch_barrier_async_detached_f(rq, dqs->dqs_ctxt,
+					dqs->dqs_destructor);
+		}
+		if (ctxt) {
+			dqs->dqs_ctxt = ctxt;
+			dqs->dqs_destructor = destructor;
+		} else {
+			TAILQ_REMOVE(&dqsh->dqsh_entries, dqs, dqs_entry);
+			free(dqs);
+		}
+	} else if (ctxt) {
+		dqs = _dispatch_calloc(1, sizeof(struct dispatch_queue_specific_s));
+		dqs->dqs_key = key;
+		dqs->dqs_ctxt = ctxt;
+		dqs->dqs_destructor = destructor;
+		TAILQ_INSERT_TAIL(&dqsh->dqsh_entries, dqs, dqs_entry);
+	}
+
+	_dispatch_unfair_lock_unlock(&dqsh->dqsh_lock);
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void *
+_dispatch_queue_get_specific_inline(dispatch_queue_t dq, const void *key)
+{
+	dispatch_queue_specific_head_t dqsh = dq->dq_specific_head;
+	dispatch_queue_specific_t dqs;
+	void *ctxt = NULL;
+
+	if (likely(_dispatch_queue_admits_specific(dq) && dqsh)) {
+		_dispatch_unfair_lock_lock(&dqsh->dqsh_lock);
+		dqs = _dispatch_queue_specific_find(dqsh, key);
+		if (dqs) ctxt = dqs->dqs_ctxt;
+		_dispatch_unfair_lock_unlock(&dqsh->dqsh_lock);
+	}
+	return ctxt;
+}
+
+DISPATCH_NOINLINE
+void *
+dispatch_queue_get_specific(dispatch_queue_t dq, const void *key)
+{
+	if (unlikely(!key)) {
+		return NULL;
+	}
+	return _dispatch_queue_get_specific_inline(dq, key);
+}
+
+DISPATCH_NOINLINE
+void *
+dispatch_get_specific(const void *key)
+{
+	dispatch_queue_t dq = _dispatch_queue_get_current();
+	void *ctxt = NULL;
+
+	if (likely(key && dq)) {
+		do {
+			ctxt = _dispatch_queue_get_specific_inline(dq, key);
+			dq = dq->do_targetq;
+		} while (unlikely(ctxt == NULL && dq));
+	}
+	return ctxt;
+}
+
+#pragma mark -
+#pragma mark dispatch_queue_t / dispatch_lane_t
+
+void
+dispatch_queue_set_label_nocopy(dispatch_queue_t dq, const char *label)
+{
+	if (unlikely(_dispatch_object_is_global(dq))) {
+		return;
+	}
+	dispatch_queue_flags_t dqf = _dispatch_queue_atomic_flags(dq);
+	if (unlikely(dqf & DQF_LABEL_NEEDS_FREE)) {
+		DISPATCH_CLIENT_CRASH(dq, "Cannot change label for this queue");
+	}
+	dq->dq_label = label;
+}
+
+static inline bool
+_dispatch_base_lane_is_wlh(dispatch_lane_t dq, dispatch_queue_t tq)
+{
+#if DISPATCH_USE_KEVENT_WORKLOOP
+	if (unlikely(!_dispatch_kevent_workqueue_enabled)) {
+		return false;
+	}
+	if (dx_type(dq) == DISPATCH_QUEUE_NETWORK_EVENT_TYPE) {
+		return true;
+	}
+	if (dx_metatype(dq) == _DISPATCH_SOURCE_TYPE) {
+		// Sources don't support sync waiters, so the ones that never change QoS
+		// don't benefit from any of the workloop features which have overhead,
+		// so just use the workqueue kqueue for these.
+		if (likely(!upcast(dq)._ds->ds_refs->du_can_be_wlh)) {
+			return false;
+		}
+		dispatch_assert(upcast(dq)._ds->ds_refs->du_is_direct);
+	}
+	return dq->dq_width == 1 && _dispatch_is_in_root_queues_array(tq);
+#else
+	(void)dq; (void)tq;
+	return false;
+#endif // DISPATCH_USE_KEVENT_WORKLOOP
+}
+
+static void
+_dispatch_lane_inherit_wlh_from_target(dispatch_lane_t dq, dispatch_queue_t tq)
+{
+	uint64_t old_state, new_state, role;
+
+	if (!dx_hastypeflag(tq, QUEUE_ROOT)) {
+		role = DISPATCH_QUEUE_ROLE_INNER;
+	} else if (_dispatch_base_lane_is_wlh(dq, tq)) {
+		role = DISPATCH_QUEUE_ROLE_BASE_WLH;
+	} else {
+		role = DISPATCH_QUEUE_ROLE_BASE_ANON;
 	}
 
 	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, relaxed, {
-		new_state = _dq_state_merge_qos(old_state, qos);
+		new_state = old_state & ~DISPATCH_QUEUE_ROLE_MASK;
+		new_state |= role;
 		if (old_state == new_state) {
-			os_atomic_rmw_loop_give_up(goto no_change);
+			os_atomic_rmw_loop_give_up(break);
 		}
 	});
 
-	dispatch_qos_t dq_qos = _dispatch_priority_qos(dq->dq_priority);
-	if (qos > dq_qos) {
-		mach_port_t owner = _dq_state_drain_owner(new_state);
-		pthread_priority_t pp = _dispatch_qos_to_pp(qos);
-		_dispatch_thread_override_start(owner, pp, dq);
-		if (_dq_state_max_qos(old_state) > dq_qos) {
-			_dispatch_thread_override_end(owner, dq);
+	if (_dq_state_is_base_wlh(old_state) && !_dq_state_is_base_wlh(new_state)) {
+		dispatch_deferred_items_t ddi = _dispatch_deferred_items_get();
+		if (ddi && ddi->ddi_wlh == (dispatch_wlh_t)dq) {
+			_dispatch_event_loop_leave_immediate(new_state);
 		}
 	}
-no_change:
-	_dispatch_runloop_queue_class_poke(dq);
-	if (flags & DISPATCH_WAKEUP_CONSUME_2) {
-		return _dispatch_release_2_tailcall(dq);
+	if (!dx_hastypeflag(tq, QUEUE_ROOT)) {
+		dispatch_queue_flags_t clear = 0, set = DQF_TARGETED;
+		if (dx_metatype(tq) == _DISPATCH_WORKLOOP_TYPE) {
+			clear |= DQF_MUTABLE;
+#if !DISPATCH_ALLOW_NON_LEAF_RETARGET
+		} else {
+			clear |= DQF_MUTABLE;
+#endif
+		}
+		if (clear) {
+			_dispatch_queue_atomic_flags_set_and_clear(tq, set, clear);
+		} else {
+			_dispatch_queue_atomic_flags_set(tq, set);
+		}
 	}
 }
+
+dispatch_priority_t
+_dispatch_queue_compute_priority_and_wlh(dispatch_queue_t dq,
+		dispatch_wlh_t *wlh_out)
+{
+	dispatch_priority_t dpri = dq->dq_priority;
+	dispatch_priority_t p = dpri & DISPATCH_PRIORITY_REQUESTED_MASK;
+	dispatch_qos_t fallback = _dispatch_priority_fallback_qos(dpri);
+	dispatch_queue_t tq = dq->do_targetq;
+	dispatch_wlh_t wlh = DISPATCH_WLH_ANON;
+
+	if (_dq_state_is_base_wlh(dq->dq_state)) {
+		wlh = (dispatch_wlh_t)dq;
+	}
+
+	while (unlikely(!dx_hastypeflag(tq, QUEUE_ROOT))) {
+		if (unlikely(tq == _dispatch_mgr_q._as_dq)) {
+			if (wlh_out) *wlh_out = DISPATCH_WLH_ANON;
+			return DISPATCH_PRIORITY_FLAG_MANAGER;
+		}
+		if (unlikely(_dispatch_queue_is_thread_bound(tq))) {
+			if (wlh_out) *wlh_out = DISPATCH_WLH_ANON;
+			return tq->dq_priority;
+		}
+		if (unlikely(DISPATCH_QUEUE_IS_SUSPENDED(tq))) {
+			// this queue may not be activated yet, so the queue graph may not
+			// have stabilized yet
+			_dispatch_ktrace2(DISPATCH_PERF_delayed_registration, dq,
+					  dx_metatype(dq) == _DISPATCH_SOURCE_TYPE ? dq : NULL);
+			if (wlh_out) *wlh_out = NULL;
+			return 0;
+		}
+
+		if (_dq_state_is_base_wlh(tq->dq_state)) {
+			wlh = (dispatch_wlh_t)tq;
+			if (dx_metatype(tq) == _DISPATCH_WORKLOOP_TYPE) {
+				_dispatch_queue_atomic_flags_clear(dq, DQF_MUTABLE);
+			}
+		} else if (unlikely(_dispatch_queue_is_mutable(tq))) {
+			// we're not allowed to dereference tq->do_targetq
+			_dispatch_ktrace2(DISPATCH_PERF_delayed_registration, dq,
+					  dx_metatype(dq) == _DISPATCH_SOURCE_TYPE ? dq : NULL);
+			if (wlh_out) *wlh_out = NULL;
+			return 0;
+		}
+
+		dispatch_priority_t tqp = tq->dq_priority;
+
+		tq = tq->do_targetq;
+		if (tqp & DISPATCH_PRIORITY_FLAG_INHERITED) {
+			// if the priority is inherited, it means we got it from our target
+			// which has fallback and various magical flags that the code below
+			// will handle, so do not bother here.
+			break;
+		}
+
+		if (!fallback) fallback = _dispatch_priority_fallback_qos(tqp);
+		tqp &= DISPATCH_PRIORITY_REQUESTED_MASK;
+		if (p < tqp) p = tqp;
+	}
+
+	if (likely(_dispatch_is_in_root_queues_array(tq) ||
+			tq->dq_serialnum == DISPATCH_QUEUE_SERIAL_NUMBER_WLF)) {
+		dispatch_priority_t rqp = tq->dq_priority;
+
+		if (!fallback) fallback = _dispatch_priority_fallback_qos(rqp);
+		rqp &= DISPATCH_PRIORITY_REQUESTED_MASK;
+		if (p < rqp) p = rqp;
+
+		p |= (tq->dq_priority & DISPATCH_PRIORITY_FLAG_OVERCOMMIT);
+		if ((dpri & DISPATCH_PRIORITY_FLAG_FLOOR) ||
+				!(dpri & DISPATCH_PRIORITY_REQUESTED_MASK)) {
+			p |= (dpri & DISPATCH_PRIORITY_FLAG_FLOOR);
+			if (fallback > _dispatch_priority_qos(p)) {
+				p |= _dispatch_priority_make_fallback(fallback);
+			}
+		}
+		if (wlh_out) *wlh_out = wlh;
+		return p;
+	}
+
+	// pthread root queues opt out of QoS
+	if (wlh_out) *wlh_out = DISPATCH_WLH_ANON;
+	return DISPATCH_PRIORITY_FLAG_MANAGER;
+}
+
+DISPATCH_ALWAYS_INLINE
+static void
+_dispatch_queue_setter_assert_inactive(dispatch_queue_class_t dq)
+{
+	uint64_t dq_state = os_atomic_load2o(dq._dq, dq_state, relaxed);
+	if (likely(dq_state & DISPATCH_QUEUE_INACTIVE)) return;
+#if DISPATCH_SIZEOF_PTR == 4
+	dq_state >>= 32;
 #endif
+	DISPATCH_CLIENT_CRASH((uintptr_t)dq_state,
+			"dispatch queue/source property setter called after activation");
+}
+
+DISPATCH_ALWAYS_INLINE
+static void
+_dispatch_workloop_attributes_alloc_if_needed(dispatch_workloop_t dwl)
+{
+	if (unlikely(!dwl->dwl_attr)) {
+		dwl->dwl_attr = _dispatch_calloc(1, sizeof(dispatch_workloop_attr_s));
+	}
+}
+
+void
+dispatch_set_qos_class_floor(dispatch_object_t dou,
+		dispatch_qos_class_t cls, int relpri)
+{
+	if (dx_cluster(dou._do) != _DISPATCH_QUEUE_CLUSTER) {
+		DISPATCH_CLIENT_CRASH(0,
+				"dispatch_set_qos_class_floor called on invalid object type");
+	}
+	if (dx_metatype(dou._do) == _DISPATCH_WORKLOOP_TYPE) {
+		return dispatch_workloop_set_qos_class_floor(dou._dwl, cls, relpri, 0);
+	}
+
+	dispatch_qos_t qos = _dispatch_qos_from_qos_class(cls);
+	dispatch_priority_t pri = _dispatch_priority_make(qos, relpri);
+	dispatch_priority_t old_pri = dou._dq->dq_priority;
+
+	if (pri) pri |= DISPATCH_PRIORITY_FLAG_FLOOR;
+	old_pri &= ~DISPATCH_PRIORITY_REQUESTED_MASK;
+	old_pri &= ~DISPATCH_PRIORITY_FLAG_FLOOR;
+	dou._dq->dq_priority = pri | old_pri;
+
+	_dispatch_queue_setter_assert_inactive(dou._dq);
+}
+
+void
+dispatch_set_qos_class(dispatch_object_t dou, dispatch_qos_class_t cls,
+		int relpri)
+{
+	if (dx_cluster(dou._do) != _DISPATCH_QUEUE_CLUSTER ||
+			dx_metatype(dou._do) == _DISPATCH_WORKLOOP_TYPE) {
+		DISPATCH_CLIENT_CRASH(0,
+				"dispatch_set_qos_class called on invalid object type");
+	}
+
+	dispatch_qos_t qos = _dispatch_qos_from_qos_class(cls);
+	dispatch_priority_t pri = _dispatch_priority_make(qos, relpri);
+	dispatch_priority_t old_pri = dou._dq->dq_priority;
+
+	old_pri &= ~DISPATCH_PRIORITY_REQUESTED_MASK;
+	old_pri &= ~DISPATCH_PRIORITY_FLAG_FLOOR;
+	dou._dq->dq_priority = pri | old_pri;
+
+	_dispatch_queue_setter_assert_inactive(dou._dq);
+}
+
+void
+dispatch_set_qos_class_fallback(dispatch_object_t dou, dispatch_qos_class_t cls)
+{
+	if (dx_cluster(dou._do) != _DISPATCH_QUEUE_CLUSTER) {
+		DISPATCH_CLIENT_CRASH(0,
+				"dispatch_set_qos_class_fallback called on invalid object type");
+	}
+
+	dispatch_qos_t qos = _dispatch_qos_from_qos_class(cls);
+	dispatch_priority_t pri = _dispatch_priority_make_fallback(qos);
+	dispatch_priority_t old_pri = dou._dq->dq_priority;
+
+	old_pri &= ~DISPATCH_PRIORITY_FALLBACK_QOS_MASK;
+	old_pri &= ~DISPATCH_PRIORITY_FLAG_FALLBACK;
+	dou._dq->dq_priority = pri | old_pri;
+
+	_dispatch_queue_setter_assert_inactive(dou._dq);
+}
+
+static dispatch_queue_t
+_dispatch_queue_priority_inherit_from_target(dispatch_lane_class_t dq,
+		dispatch_queue_t tq)
+{
+	const dispatch_priority_t inherited = DISPATCH_PRIORITY_FLAG_INHERITED;
+	dispatch_priority_t pri = dq._dl->dq_priority;
+
+	// This priority has been selected by the client, leave it alone
+	// However, when the client picked a QoS, we should adjust the target queue
+	// if it is a root queue to best match the ask
+	if (_dispatch_queue_priority_manually_selected(pri)) {
+		if (_dispatch_is_in_root_queues_array(tq)) {
+			dispatch_qos_t qos = _dispatch_priority_qos(pri);
+			if (!qos) qos = DISPATCH_QOS_DEFAULT;
+			tq = _dispatch_get_root_queue(qos,
+					pri & DISPATCH_PRIORITY_FLAG_OVERCOMMIT)->_as_dq;
+		}
+		return tq;
+	}
+
+	if (_dispatch_is_in_root_queues_array(tq)) {
+		// <rdar://problem/32921639> base queues need to know they target
+		// the default root queue so that _dispatch_queue_wakeup_qos()
+		// in _dispatch_queue_wakeup() can fallback to QOS_DEFAULT
+		// if no other priority was provided.
+		pri = tq->dq_priority | inherited;
+	} else if (pri & inherited) {
+		// if the FALLBACK flag is set on queues due to the code above
+		// we need to clear it if the queue is retargeted within a hierachy
+		// and is no longer a base queue.
+		pri &= ~DISPATCH_PRIORITY_FALLBACK_QOS_MASK;
+		pri &= ~DISPATCH_PRIORITY_FLAG_FALLBACK;
+	}
+
+	dq._dl->dq_priority = pri;
+	return tq;
+}
+
+
+DISPATCH_NOINLINE
+static dispatch_queue_t
+_dispatch_lane_create_with_target(const char *label, dispatch_queue_attr_t dqa,
+		dispatch_queue_t tq, bool legacy)
+{
+	dispatch_queue_attr_info_t dqai = _dispatch_queue_attr_to_info(dqa);
+
+	//
+	// Step 1: Normalize arguments (qos, overcommit, tq)
+	//
+
+	dispatch_qos_t qos = dqai.dqai_qos;
+#if !HAVE_PTHREAD_WORKQUEUE_QOS
+	if (qos == DISPATCH_QOS_USER_INTERACTIVE) {
+		dqai.dqai_qos = qos = DISPATCH_QOS_USER_INITIATED;
+	}
+	if (qos == DISPATCH_QOS_MAINTENANCE) {
+		dqai.dqai_qos = qos = DISPATCH_QOS_BACKGROUND;
+	}
+#endif // !HAVE_PTHREAD_WORKQUEUE_QOS
+
+	_dispatch_queue_attr_overcommit_t overcommit = dqai.dqai_overcommit;
+	if (overcommit != _dispatch_queue_attr_overcommit_unspecified && tq) {
+		if (tq->do_targetq) {
+			DISPATCH_CLIENT_CRASH(tq, "Cannot specify both overcommit and "
+					"a non-global target queue");
+		}
+	}
+
+	if (tq && dx_type(tq) == DISPATCH_QUEUE_GLOBAL_ROOT_TYPE) {
+		// Handle discrepancies between attr and target queue, attributes win
+		if (overcommit == _dispatch_queue_attr_overcommit_unspecified) {
+			if (tq->dq_priority & DISPATCH_PRIORITY_FLAG_OVERCOMMIT) {
+				overcommit = _dispatch_queue_attr_overcommit_enabled;
+			} else {
+				overcommit = _dispatch_queue_attr_overcommit_disabled;
+			}
+		}
+		if (qos == DISPATCH_QOS_UNSPECIFIED) {
+			qos = _dispatch_priority_qos(tq->dq_priority);
+		}
+		tq = NULL;
+	} else if (tq && !tq->do_targetq) {
+		// target is a pthread or runloop root queue, setting QoS or overcommit
+		// is disallowed
+		if (overcommit != _dispatch_queue_attr_overcommit_unspecified) {
+			DISPATCH_CLIENT_CRASH(tq, "Cannot specify an overcommit attribute "
+					"and use this kind of target queue");
+		}
+	} else {
+		if (overcommit == _dispatch_queue_attr_overcommit_unspecified) {
+			// Serial queues default to overcommit!
+			overcommit = dqai.dqai_concurrent ?
+					_dispatch_queue_attr_overcommit_disabled :
+					_dispatch_queue_attr_overcommit_enabled;
+		}
+	}
+	if (!tq) {
+		tq = _dispatch_get_root_queue(
+				qos == DISPATCH_QOS_UNSPECIFIED ? DISPATCH_QOS_DEFAULT : qos,
+				overcommit == _dispatch_queue_attr_overcommit_enabled)->_as_dq;
+		if (unlikely(!tq)) {
+			DISPATCH_CLIENT_CRASH(qos, "Invalid queue attribute");
+		}
+	}
+
+	//
+	// Step 2: Initialize the queue
+	//
+
+	if (legacy) {
+		// if any of these attributes is specified, use non legacy classes
+		if (dqai.dqai_inactive || dqai.dqai_autorelease_frequency) {
+			legacy = false;
+		}
+	}
+
+	const void *vtable;
+	dispatch_queue_flags_t dqf = legacy ? DQF_MUTABLE : 0;
+	if (dqai.dqai_concurrent) {
+		vtable = DISPATCH_VTABLE(queue_concurrent);
+	} else {
+		vtable = DISPATCH_VTABLE(queue_serial);
+	}
+	switch (dqai.dqai_autorelease_frequency) {
+	case DISPATCH_AUTORELEASE_FREQUENCY_NEVER:
+		dqf |= DQF_AUTORELEASE_NEVER;
+		break;
+	case DISPATCH_AUTORELEASE_FREQUENCY_WORK_ITEM:
+		dqf |= DQF_AUTORELEASE_ALWAYS;
+		break;
+	}
+	if (label) {
+		const char *tmp = _dispatch_strdup_if_mutable(label);
+		if (tmp != label) {
+			dqf |= DQF_LABEL_NEEDS_FREE;
+			label = tmp;
+		}
+	}
+
+	dispatch_lane_t dq = _dispatch_object_alloc(vtable,
+			sizeof(struct dispatch_lane_s));
+	_dispatch_queue_init(dq, dqf, dqai.dqai_concurrent ?
+			DISPATCH_QUEUE_WIDTH_MAX : 1, DISPATCH_QUEUE_ROLE_INNER |
+			(dqai.dqai_inactive ? DISPATCH_QUEUE_INACTIVE : 0));
+
+	dq->dq_label = label;
+	dq->dq_priority = _dispatch_priority_make((dispatch_qos_t)dqai.dqai_qos,
+			dqai.dqai_relpri);
+	if (overcommit == _dispatch_queue_attr_overcommit_enabled) {
+		dq->dq_priority |= DISPATCH_PRIORITY_FLAG_OVERCOMMIT;
+	}
+	if (!dqai.dqai_inactive) {
+		_dispatch_queue_priority_inherit_from_target(dq, tq);
+		_dispatch_lane_inherit_wlh_from_target(dq, tq);
+	}
+	_dispatch_retain(tq);
+	dq->do_targetq = tq;
+	_dispatch_object_debug(dq, "%s", __func__);
+	return _dispatch_trace_queue_create(dq)._dq;
+}
+
+dispatch_queue_t
+dispatch_queue_create_with_target(const char *label, dispatch_queue_attr_t dqa,
+		dispatch_queue_t tq)
+{
+	return _dispatch_lane_create_with_target(label, dqa, tq, false);
+}
+
+dispatch_queue_t
+dispatch_queue_create(const char *label, dispatch_queue_attr_t attr)
+{
+	return _dispatch_lane_create_with_target(label, attr,
+			DISPATCH_TARGET_QUEUE_DEFAULT, true);
+}
+
+dispatch_queue_t
+dispatch_queue_create_with_accounting_override_voucher(const char *label,
+		dispatch_queue_attr_t attr, voucher_t voucher)
+{
+	(void)label; (void)attr; (void)voucher;
+	DISPATCH_CLIENT_CRASH(0, "Unsupported interface");
+}
 
 DISPATCH_NOINLINE
 static void
-_dispatch_global_queue_poke_slow(dispatch_queue_t dq, int n, int floor)
+_dispatch_queue_dispose(dispatch_queue_class_t dqu, bool *allow_free)
 {
-	dispatch_root_queue_context_t qc = dq->do_ctxt;
-	int remaining = n;
-	int r = ENOSYS;
+	dispatch_queue_specific_head_t dqsh;
+	dispatch_queue_t dq = dqu._dq;
 
-	_dispatch_root_queues_init();
-	_dispatch_debug_root_queue(dq, __func__);
-#if DISPATCH_USE_WORKQUEUES
-#if DISPATCH_USE_PTHREAD_POOL
-	if (qc->dgq_kworkqueue != (void*)(~0ul))
-#endif
-	{
-		_dispatch_root_queue_debug("requesting new worker thread for global "
-				"queue: %p", dq);
-#if DISPATCH_USE_LEGACY_WORKQUEUE_FALLBACK
-		if (qc->dgq_kworkqueue) {
-			pthread_workitem_handle_t wh;
-			unsigned int gen_cnt;
-			do {
-				r = pthread_workqueue_additem_np(qc->dgq_kworkqueue,
-						_dispatch_worker_thread4, dq, &wh, &gen_cnt);
-				(void)dispatch_assume_zero(r);
-			} while (--remaining);
-			return;
-		}
-#endif // DISPATCH_USE_LEGACY_WORKQUEUE_FALLBACK
-#if HAVE_PTHREAD_WORKQUEUE_QOS
-		r = _pthread_workqueue_addthreads(remaining,
-				_dispatch_priority_to_pp(dq->dq_priority));
-#elif DISPATCH_USE_PTHREAD_WORKQUEUE_SETDISPATCH_NP
-		r = pthread_workqueue_addthreads_np(qc->dgq_wq_priority,
-				qc->dgq_wq_options, remaining);
-#endif
-		(void)dispatch_assume_zero(r);
+	if (dq->dq_label && _dispatch_queue_label_needs_free(dq)) {
+		free((void*)dq->dq_label);
+	}
+	dqsh = os_atomic_xchg2o(dq, dq_specific_head, (void *)0x200, relaxed);
+	if (dqsh) _dispatch_queue_specific_head_dispose(dqsh);
+
+	// fast path for queues that never got their storage retained
+	if (likely(os_atomic_load2o(dq, dq_sref_cnt, relaxed) == 0)) {
+		// poison the state with something that is suspended and is easy to spot
+		dq->dq_state = 0xdead000000000000;
 		return;
 	}
-#endif // DISPATCH_USE_WORKQUEUES
-#if DISPATCH_USE_PTHREAD_POOL
-	dispatch_pthread_root_queue_context_t pqc = qc->dgq_ctxt;
-	if (fastpath(pqc->dpq_thread_mediator.do_vtable)) {
-		while (dispatch_semaphore_signal(&pqc->dpq_thread_mediator)) {
-			_dispatch_root_queue_debug("signaled sleeping worker for "
-					"global queue: %p", dq);
-			if (!--remaining) {
-				return;
-			}
+
+	// Take over freeing the memory from _dispatch_object_dealloc()
+	//
+	// As soon as we call _dispatch_queue_release_storage(), we forfeit
+	// the possibility for the caller of dx_dispose() to finalize the object
+	// so that responsibility is ours.
+	_dispatch_object_finalize(dq);
+	*allow_free = false;
+	dq->dq_label = "<released queue, pending free>";
+	dq->do_targetq = NULL;
+	dq->do_finalizer = NULL;
+	dq->do_ctxt = NULL;
+	return _dispatch_queue_release_storage(dq);
+}
+
+void
+_dispatch_lane_class_dispose(dispatch_lane_class_t dqu, bool *allow_free)
+{
+	dispatch_lane_t dq = dqu._dl;
+	uint64_t dq_state = os_atomic_load2o(dq, dq_state, relaxed);
+	uint64_t initial_state = DISPATCH_QUEUE_STATE_INIT_VALUE(dq->dq_width);
+
+	if (dx_hastypeflag(dq, QUEUE_ROOT)) {
+		initial_state = DISPATCH_ROOT_QUEUE_STATE_INIT_VALUE;
+	}
+	dq_state &= ~DISPATCH_QUEUE_MAX_QOS_MASK;
+	dq_state &= ~DISPATCH_QUEUE_DIRTY;
+	dq_state &= ~DISPATCH_QUEUE_ROLE_MASK;
+	if (unlikely(dq_state != initial_state)) {
+		if (_dq_state_drain_locked(dq_state)) {
+			DISPATCH_CLIENT_CRASH((uintptr_t)dq_state,
+					"Release of a locked queue");
 		}
+#if DISPATCH_SIZEOF_PTR == 4
+		dq_state >>= 32;
+#endif
+		DISPATCH_CLIENT_CRASH((uintptr_t)dq_state,
+				"Release of a queue with corrupt state");
 	}
 
-	bool overcommit = dq->dq_priority & DISPATCH_PRIORITY_FLAG_OVERCOMMIT;
-	if (overcommit) {
-		os_atomic_add2o(qc, dgq_pending, remaining, relaxed);
-	} else {
-		if (!os_atomic_cmpxchg2o(qc, dgq_pending, 0, remaining, relaxed)) {
-			_dispatch_root_queue_debug("worker thread request still pending for "
-					"global queue: %p", dq);
-			return;
-		}
+	if (unlikely(dq->dq_items_tail)) {
+		DISPATCH_CLIENT_CRASH(dq->dq_items_tail,
+				"Release of a queue while items are enqueued");
 	}
+	dq->dq_items_head = (void *)0x200;
+	dq->dq_items_tail = (void *)0x200;
 
-	int32_t can_request, t_count;
-	// seq_cst with atomic store to tail <rdar://problem/16932833>
-	t_count = os_atomic_load2o(qc, dgq_thread_pool_size, ordered);
-	do {
-		can_request = t_count < floor ? 0 : t_count - floor;
-		if (remaining > can_request) {
-			_dispatch_root_queue_debug("pthread pool reducing request from %d to %d",
-					remaining, can_request);
-			os_atomic_sub2o(qc, dgq_pending, remaining - can_request, relaxed);
-			remaining = can_request;
-		}
-		if (remaining == 0) {
-			_dispatch_root_queue_debug("pthread pool is full for root queue: "
-					"%p", dq);
-			return;
-		}
-	} while (!os_atomic_cmpxchgvw2o(qc, dgq_thread_pool_size, t_count,
-			t_count - remaining, &t_count, acquire));
+	_dispatch_queue_dispose(dqu, allow_free);
+}
 
-#if defined(_WIN32)
-#if DISPATCH_USE_MGR_THREAD && DISPATCH_ENABLE_PTHREAD_ROOT_QUEUES
-	if (slowpath(dq == &_dispatch_mgr_root_queue)) {
-		_dispatch_mgr_root_queue_init();
+void
+_dispatch_lane_dispose(dispatch_lane_t dq, bool *allow_free)
+{
+	_dispatch_object_debug(dq, "%s", __func__);
+	_dispatch_trace_queue_dispose(dq);
+	_dispatch_lane_class_dispose(dq, allow_free);
+}
+
+void
+_dispatch_queue_xref_dispose(dispatch_queue_t dq)
+{
+	uint64_t dq_state = os_atomic_load2o(dq, dq_state, relaxed);
+	if (unlikely(_dq_state_is_suspended(dq_state))) {
+		long state = (long)dq_state;
+		if (sizeof(long) < sizeof(uint64_t)) state = (long)(dq_state >> 32);
+		if (unlikely(_dq_state_is_inactive(dq_state))) {
+			// Arguments for and against this assert are within 6705399
+			DISPATCH_CLIENT_CRASH(state, "Release of an inactive object");
+		}
+		DISPATCH_CLIENT_CRASH(dq_state, "Release of a suspended object");
 	}
-#endif
-	do {
-		_dispatch_retain(dq); // released in _dispatch_worker_thread
-#if DISPATCH_DEBUG
-		unsigned dwStackSize = 0;
-#else
-		unsigned dwStackSize = 64 * 1024;
-#endif
-		uintptr_t hThread = 0;
-		while (!(hThread = _beginthreadex(NULL, dwStackSize, _dispatch_worker_thread_thunk, dq, STACK_SIZE_PARAM_IS_A_RESERVATION, NULL))) {
-			if (errno != EAGAIN) {
-				(void)dispatch_assume(hThread);
-			}
-			_dispatch_temporary_resource_shortage();
-		}
-		if (_dispatch_mgr_sched.prio > _dispatch_mgr_sched.default_prio) {
-			(void)dispatch_assume_zero(SetThreadPriority((HANDLE)hThread, _dispatch_mgr_sched.prio) == TRUE);
-		}
-		CloseHandle((HANDLE)hThread);
-	} while (--remaining);
-#else
-	pthread_attr_t *attr = &pqc->dpq_thread_attr;
-	pthread_t tid, *pthr = &tid;
-#if DISPATCH_USE_MGR_THREAD && DISPATCH_ENABLE_PTHREAD_ROOT_QUEUES
-	if (slowpath(dq == &_dispatch_mgr_root_queue)) {
-		pthr = _dispatch_mgr_root_queue_init();
-	}
-#endif
-	do {
-		_dispatch_retain(dq); // released in _dispatch_worker_thread
-		while ((r = pthread_create(pthr, attr, _dispatch_worker_thread, dq))) {
-			if (r != EAGAIN) {
-				(void)dispatch_assume_zero(r);
-			}
-			_dispatch_temporary_resource_shortage();
-		}
-	} while (--remaining);
-#endif
-#endif // DISPATCH_USE_PTHREAD_POOL
+	os_atomic_or2o(dq, dq_atomic_flags, DQF_RELEASED, relaxed);
 }
 
 DISPATCH_NOINLINE
-void
-_dispatch_global_queue_poke(dispatch_queue_t dq, int n, int floor)
+static void
+_dispatch_lane_suspend_slow(dispatch_lane_t dq)
 {
-	if (!_dispatch_queue_class_probe(dq)) {
+	uint64_t old_state, new_state, delta;
+
+	_dispatch_queue_sidelock_lock(dq);
+
+	// what we want to transfer (remove from dq_state)
+	delta  = DISPATCH_QUEUE_SUSPEND_HALF * DISPATCH_QUEUE_SUSPEND_INTERVAL;
+	// but this is a suspend so add a suspend count at the same time
+	delta -= DISPATCH_QUEUE_SUSPEND_INTERVAL;
+	if (dq->dq_side_suspend_cnt == 0) {
+		// we substract delta from dq_state, and we want to set this bit
+		delta -= DISPATCH_QUEUE_HAS_SIDE_SUSPEND_CNT;
+	}
+
+	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, relaxed, {
+		// unsigned underflow of the substraction can happen because other
+		// threads could have touched this value while we were trying to acquire
+		// the lock, or because another thread raced us to do the same operation
+		// and got to the lock first.
+		if (unlikely(os_sub_overflow(old_state, delta, &new_state))) {
+			os_atomic_rmw_loop_give_up(goto retry);
+		}
+	});
+	if (unlikely(os_add_overflow(dq->dq_side_suspend_cnt,
+			DISPATCH_QUEUE_SUSPEND_HALF, &dq->dq_side_suspend_cnt))) {
+		DISPATCH_CLIENT_CRASH(0, "Too many nested calls to dispatch_suspend()");
+	}
+	return _dispatch_queue_sidelock_unlock(dq);
+
+retry:
+	_dispatch_queue_sidelock_unlock(dq);
+	return _dispatch_lane_suspend(dq);
+}
+
+void
+_dispatch_lane_suspend(dispatch_lane_t dq)
+{
+	uint64_t old_state, new_state;
+
+	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, relaxed, {
+		new_state = DISPATCH_QUEUE_SUSPEND_INTERVAL;
+		if (unlikely(os_add_overflow(old_state, new_state, &new_state))) {
+			os_atomic_rmw_loop_give_up({
+				return _dispatch_lane_suspend_slow(dq);
+			});
+		}
+	});
+
+	if (!_dq_state_is_suspended(old_state)) {
+		// rdar://8181908 we need to extend the queue life for the duration
+		// of the call to wakeup at _dispatch_lane_resume() time.
+		_dispatch_retain_2(dq);
+	}
+}
+
+DISPATCH_NOINLINE
+static void
+_dispatch_lane_resume_slow(dispatch_lane_t dq)
+{
+	uint64_t old_state, new_state, delta;
+
+	_dispatch_queue_sidelock_lock(dq);
+
+	// what we want to transfer
+	delta  = DISPATCH_QUEUE_SUSPEND_HALF * DISPATCH_QUEUE_SUSPEND_INTERVAL;
+	// but this is a resume so consume a suspend count at the same time
+	delta -= DISPATCH_QUEUE_SUSPEND_INTERVAL;
+	switch (dq->dq_side_suspend_cnt) {
+	case 0:
+		goto retry;
+	case DISPATCH_QUEUE_SUSPEND_HALF:
+		// we will transition the side count to 0, so we want to clear this bit
+		delta -= DISPATCH_QUEUE_HAS_SIDE_SUSPEND_CNT;
+		break;
+	}
+	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, relaxed, {
+		// unsigned overflow of the addition can happen because other
+		// threads could have touched this value while we were trying to acquire
+		// the lock, or because another thread raced us to do the same operation
+		// and got to the lock first.
+		if (unlikely(os_add_overflow(old_state, delta, &new_state))) {
+			os_atomic_rmw_loop_give_up(goto retry);
+		}
+	});
+	dq->dq_side_suspend_cnt -= DISPATCH_QUEUE_SUSPEND_HALF;
+	return _dispatch_queue_sidelock_unlock(dq);
+
+retry:
+	_dispatch_queue_sidelock_unlock(dq);
+	return _dispatch_lane_resume(dq, false);
+}
+
+DISPATCH_NOINLINE
+static void
+_dispatch_lane_resume_activate(dispatch_lane_t dq)
+{
+	bool allow_resume = true;
+	// Step 2: run the activation finalizer
+	if (dx_vtable(dq)->dq_activate) {
+		dx_vtable(dq)->dq_activate(dq, &allow_resume);
+	}
+	// Step 3: consume the suspend count
+	if (allow_resume) {
+		return _dispatch_lane_resume(dq, false);
+	}
+}
+
+void
+_dispatch_lane_resume(dispatch_lane_t dq, bool activate)
+{
+	// covers all suspend and inactive bits, including side suspend bit
+	const uint64_t suspend_bits = DISPATCH_QUEUE_SUSPEND_BITS_MASK;
+	uint64_t pending_barrier_width =
+			(dq->dq_width - 1) * DISPATCH_QUEUE_WIDTH_INTERVAL;
+	uint64_t set_owner_and_set_full_width_and_in_barrier =
+			_dispatch_lock_value_for_self() | DISPATCH_QUEUE_WIDTH_FULL_BIT |
+			DISPATCH_QUEUE_IN_BARRIER;
+
+	// backward compatibility: only dispatch sources can abuse
+	// dispatch_resume() to really mean dispatch_activate()
+	bool is_source = (dx_metatype(dq) == _DISPATCH_SOURCE_TYPE);
+	uint64_t old_state, new_state;
+
+	// Activation is a bit tricky as it needs to finalize before the wakeup.
+	//
+	// If after doing its updates to the suspend count and/or inactive bit,
+	// the last suspension related bit that would remain is the
+	// NEEDS_ACTIVATION one, then this function:
+	//
+	// 1. moves the state to { sc:1 i:0 na:0 } (converts the needs-activate into
+	//    a suspend count)
+	// 2. runs the activation finalizer
+	// 3. consumes the suspend count set in (1), and finishes the resume flow
+	//
+	// Concurrently, some property setters such as setting dispatch source
+	// handlers or _dispatch_lane_set_target_queue try to do in-place changes
+	// before activation. These protect their action by taking a suspend count.
+	// Step (1) above cannot happen if such a setter has locked the object.
+	if (activate) {
+		// relaxed atomic because this doesn't publish anything, this is only
+		// about picking the thread that gets to finalize the activation
+		os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, relaxed, {
+			if ((old_state & suspend_bits) ==
+					DISPATCH_QUEUE_NEEDS_ACTIVATION + DISPATCH_QUEUE_INACTIVE) {
+				// { sc:0 i:1 na:1 } -> { sc:1 i:0 na:0 }
+				new_state = old_state - DISPATCH_QUEUE_INACTIVE
+						- DISPATCH_QUEUE_NEEDS_ACTIVATION
+						+ DISPATCH_QUEUE_SUSPEND_INTERVAL;
+			} else if (_dq_state_is_inactive(old_state)) {
+				// { sc:>0 i:1 na:1 } -> { i:0 na:1 }
+				// simple activation because sc is not 0
+				// resume will deal with na:1 later
+				new_state = old_state - DISPATCH_QUEUE_INACTIVE;
+			} else {
+				// object already active, this is a no-op, just exit
+				os_atomic_rmw_loop_give_up(return);
+			}
+		});
+	} else {
+		// release barrier needed to publish the effect of
+		// - dispatch_set_target_queue()
+		// - dispatch_set_*_handler()
+		// - dq_activate()
+		os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, release, {
+			if ((old_state & suspend_bits) == DISPATCH_QUEUE_SUSPEND_INTERVAL
+					+ DISPATCH_QUEUE_NEEDS_ACTIVATION) {
+				// { sc:1 i:0 na:1 } -> { sc:1 i:0 na:0 }
+				new_state = old_state - DISPATCH_QUEUE_NEEDS_ACTIVATION;
+			} else if (is_source && (old_state & suspend_bits) ==
+					DISPATCH_QUEUE_NEEDS_ACTIVATION + DISPATCH_QUEUE_INACTIVE) {
+				// { sc:0 i:1 na:1 } -> { sc:1 i:0 na:0 }
+				new_state = old_state - DISPATCH_QUEUE_INACTIVE
+						- DISPATCH_QUEUE_NEEDS_ACTIVATION
+						+ DISPATCH_QUEUE_SUSPEND_INTERVAL;
+			} else if (unlikely(os_sub_overflow(old_state,
+					DISPATCH_QUEUE_SUSPEND_INTERVAL, &new_state))) {
+				// underflow means over-resume or a suspend count transfer
+				// to the side count is needed
+				os_atomic_rmw_loop_give_up({
+					if (!(old_state & DISPATCH_QUEUE_HAS_SIDE_SUSPEND_CNT)) {
+						goto over_resume;
+					}
+					return _dispatch_lane_resume_slow(dq);
+				});
+		//
+		// below this, new_state = old_state - DISPATCH_QUEUE_SUSPEND_INTERVAL
+		//
+			} else if (!_dq_state_is_runnable(new_state)) {
+				// Out of width or still suspended.
+				// For the former, force _dispatch_lane_non_barrier_complete
+				// to reconsider whether it has work to do
+				new_state |= DISPATCH_QUEUE_DIRTY;
+			} else if (_dq_state_drain_locked(new_state)) {
+				// still locked by someone else, make drain_try_unlock() fail
+				// and reconsider whether it has work to do
+				new_state |= DISPATCH_QUEUE_DIRTY;
+			} else if (!is_source && (_dq_state_has_pending_barrier(new_state) ||
+					new_state + pending_barrier_width <
+					DISPATCH_QUEUE_WIDTH_FULL_BIT)) {
+				// if we can, acquire the full width drain lock
+				// and then perform a lock transfer
+				//
+				// However this is never useful for a source where there are no
+				// sync waiters, so never take the lock and do a plain wakeup
+				new_state &= DISPATCH_QUEUE_DRAIN_PRESERVED_BITS_MASK;
+				new_state |= set_owner_and_set_full_width_and_in_barrier;
+			} else {
+				// clear overrides and force a wakeup
+				new_state &= ~DISPATCH_QUEUE_DRAIN_UNLOCK_MASK;
+				new_state &= ~DISPATCH_QUEUE_MAX_QOS_MASK;
+			}
+		});
+	}
+
+	if ((old_state ^ new_state) & DISPATCH_QUEUE_NEEDS_ACTIVATION) {
+		// we cleared the NEEDS_ACTIVATION bit and we have a valid suspend count
+		return _dispatch_lane_resume_activate(dq);
+	}
+
+	if (activate) {
+		// if we're still in an activate codepath here we should have
+		// { sc:>0 na:1 }, if not we've got a corrupt state
+		if (unlikely(!_dq_state_is_suspended(new_state))) {
+			DISPATCH_CLIENT_CRASH(dq, "Invalid suspension state");
+		}
 		return;
 	}
-#if DISPATCH_USE_WORKQUEUES
-	dispatch_root_queue_context_t qc = dq->do_ctxt;
-	if (
-#if DISPATCH_USE_PTHREAD_POOL
-			(qc->dgq_kworkqueue != (void*)(~0ul)) &&
+
+	if (_dq_state_is_suspended(new_state)) {
+		return;
+	}
+
+	if (_dq_state_is_dirty(old_state)) {
+		// <rdar://problem/14637483>
+		// dependency ordering for dq state changes that were flushed
+		// and not acted upon
+		os_atomic_thread_fence(dependency);
+		dq = os_atomic_force_dependency_on(dq, old_state);
+	}
+	// Balancing the retain_2 done in suspend() for rdar://8181908
+	dispatch_wakeup_flags_t flags = DISPATCH_WAKEUP_CONSUME_2;
+	if ((old_state ^ new_state) & DISPATCH_QUEUE_IN_BARRIER) {
+		flags |= DISPATCH_WAKEUP_BARRIER_COMPLETE;
+	} else if (!_dq_state_is_runnable(new_state)) {
+		if (_dq_state_is_base_wlh(old_state)) {
+			_dispatch_event_loop_assert_not_owned((dispatch_wlh_t)dq);
+		}
+		return _dispatch_release_2(dq);
+	}
+	dispatch_assert(!_dq_state_received_sync_wait(old_state));
+	dispatch_assert(!_dq_state_in_sync_transfer(old_state));
+	return dx_wakeup(dq, _dq_state_max_qos(old_state), flags);
+
+over_resume:
+	if (unlikely(_dq_state_is_inactive(old_state))) {
+		DISPATCH_CLIENT_CRASH(dq, "Over-resume of an inactive object");
+	}
+	DISPATCH_CLIENT_CRASH(dq, "Over-resume of an object");
+}
+
+const char *
+dispatch_queue_get_label(dispatch_queue_t dq)
+{
+	if (unlikely(dq == DISPATCH_CURRENT_QUEUE_LABEL)) {
+		dq = _dispatch_queue_get_current_or_default();
+	}
+	return dq->dq_label ? dq->dq_label : "";
+}
+
+qos_class_t
+dispatch_queue_get_qos_class(dispatch_queue_t dq, int *relpri_ptr)
+{
+	dispatch_priority_t pri = dq->dq_priority;
+	dispatch_qos_t qos = _dispatch_priority_qos(pri);
+	if (relpri_ptr) {
+		*relpri_ptr = qos ? _dispatch_priority_relpri(dq->dq_priority) : 0;
+	}
+	return _dispatch_qos_to_qos_class(qos);
+}
+
+static void
+_dispatch_lane_set_width(void *ctxt)
+{
+	int w = (int)(intptr_t)ctxt; // intentional truncation
+	uint32_t tmp;
+	dispatch_lane_t dq = upcast(_dispatch_queue_get_current())._dl;
+
+	if (w >= 0) {
+		tmp = w ? (unsigned int)w : 1;
+	} else {
+		dispatch_qos_t qos = _dispatch_qos_from_pp(_dispatch_get_priority());
+		switch (w) {
+		case DISPATCH_QUEUE_WIDTH_MAX_PHYSICAL_CPUS:
+			tmp = _dispatch_qos_max_parallelism(qos,
+					DISPATCH_MAX_PARALLELISM_PHYSICAL);
+			break;
+		case DISPATCH_QUEUE_WIDTH_ACTIVE_CPUS:
+			tmp = _dispatch_qos_max_parallelism(qos,
+					DISPATCH_MAX_PARALLELISM_ACTIVE);
+			break;
+		case DISPATCH_QUEUE_WIDTH_MAX_LOGICAL_CPUS:
+		default:
+			tmp = _dispatch_qos_max_parallelism(qos, 0);
+			break;
+		}
+	}
+	if (tmp > DISPATCH_QUEUE_WIDTH_MAX) {
+		tmp = DISPATCH_QUEUE_WIDTH_MAX;
+	}
+
+	dispatch_queue_flags_t old_dqf, new_dqf;
+	os_atomic_rmw_loop2o(dq, dq_atomic_flags, old_dqf, new_dqf, relaxed, {
+		new_dqf = (old_dqf & DQF_FLAGS_MASK) | DQF_WIDTH(tmp);
+	});
+	_dispatch_lane_inherit_wlh_from_target(dq, dq->do_targetq);
+	_dispatch_object_debug(dq, "%s", __func__);
+}
+
+void
+dispatch_queue_set_width(dispatch_queue_t dq, long width)
+{
+	unsigned long type = dx_type(dq);
+	if (unlikely(dx_metatype(dq) != _DISPATCH_LANE_TYPE)) {
+		DISPATCH_CLIENT_CRASH(type, "Unexpected dispatch object type");
+	} else if (unlikely(type != DISPATCH_QUEUE_CONCURRENT_TYPE)) {
+		DISPATCH_CLIENT_CRASH(type, "Cannot set width of a serial queue");
+	}
+
+	if (likely((int)width >= 0)) {
+		dispatch_lane_t dl = upcast(dq)._dl;
+		_dispatch_barrier_trysync_or_async_f(dl, (void*)(intptr_t)width,
+				_dispatch_lane_set_width, DISPATCH_BARRIER_TRYSYNC_SUSPEND);
+	} else {
+		// The negative width constants need to execute on the queue to
+		// query the queue QoS
+		_dispatch_barrier_async_detached_f(dq, (void*)(intptr_t)width,
+				_dispatch_lane_set_width);
+	}
+}
+
+static void
+_dispatch_lane_legacy_set_target_queue(void *ctxt)
+{
+	dispatch_lane_t dq = upcast(_dispatch_queue_get_current())._dl;
+	dispatch_queue_t tq = ctxt;
+	dispatch_queue_t otq = dq->do_targetq;
+
+	if (_dispatch_queue_atomic_flags(dq) & DQF_TARGETED) {
+#if DISPATCH_ALLOW_NON_LEAF_RETARGET
+		_dispatch_ktrace3(DISPATCH_PERF_non_leaf_retarget, dq, otq, tq);
+		_dispatch_bug_deprecated("Changing the target of a queue "
+				"already targeted by other dispatch objects");
+#else
+		DISPATCH_CLIENT_CRASH(0, "Cannot change the target of a queue "
+				"already targeted by other dispatch objects");
 #endif
-			!os_atomic_cmpxchg2o(qc, dgq_pending, 0, n, relaxed)) {
-		_dispatch_root_queue_debug("worker thread request still pending for "
-				"global queue: %p", dq);
-		return;
 	}
-#endif // DISPATCH_USE_WORKQUEUES
-	return _dispatch_global_queue_poke_slow(dq, n, floor);
+
+	tq = _dispatch_queue_priority_inherit_from_target(dq, tq);
+	_dispatch_lane_inherit_wlh_from_target(dq, tq);
+#if HAVE_PTHREAD_WORKQUEUE_QOS
+	// see _dispatch_queue_wakeup()
+	_dispatch_queue_sidelock_lock(dq);
+#endif
+	dq->do_targetq = tq;
+#if HAVE_PTHREAD_WORKQUEUE_QOS
+	// see _dispatch_queue_wakeup()
+	_dispatch_queue_sidelock_unlock(dq);
+#endif
+
+	_dispatch_object_debug(dq, "%s", __func__);
+	_dispatch_introspection_target_queue_changed(dq->_as_dq);
+	_dispatch_release_tailcall(otq);
+}
+
+void
+_dispatch_lane_set_target_queue(dispatch_lane_t dq, dispatch_queue_t tq)
+{
+	if (tq == DISPATCH_TARGET_QUEUE_DEFAULT) {
+		bool overcommit = (dq->dq_width == 1);
+		tq = _dispatch_get_default_queue(overcommit);
+	}
+
+	if (_dispatch_lane_try_inactive_suspend(dq)) {
+		_dispatch_object_set_target_queue_inline(dq, tq);
+		return _dispatch_lane_resume(dq, false);
+	}
+
+#if !DISPATCH_ALLOW_NON_LEAF_RETARGET
+	if (_dispatch_queue_atomic_flags(dq) & DQF_TARGETED) {
+		DISPATCH_CLIENT_CRASH(0, "Cannot change the target of a queue "
+				"already targeted by other dispatch objects");
+	}
+#endif
+
+	if (unlikely(!_dispatch_queue_is_mutable(dq))) {
+#if DISPATCH_ALLOW_NON_LEAF_RETARGET
+		if (_dispatch_queue_atomic_flags(dq) & DQF_TARGETED) {
+			DISPATCH_CLIENT_CRASH(0, "Cannot change the target of a queue "
+					"already targeted by other dispatch objects");
+		}
+#endif
+		DISPATCH_CLIENT_CRASH(0, "Cannot change the target of this object "
+				"after it has been activated");
+	}
+
+	unsigned long metatype = dx_metatype(dq);
+	switch (metatype) {
+	case _DISPATCH_LANE_TYPE:
+#if DISPATCH_ALLOW_NON_LEAF_RETARGET
+		if (_dispatch_queue_atomic_flags(dq) & DQF_TARGETED) {
+			_dispatch_bug_deprecated("Changing the target of a queue "
+					"already targeted by other dispatch objects");
+		}
+#endif
+		break;
+	case _DISPATCH_SOURCE_TYPE:
+		_dispatch_ktrace1(DISPATCH_PERF_post_activate_retarget, dq);
+		_dispatch_bug_deprecated("Changing the target of a source "
+				"after it has been activated");
+		break;
+	default:
+		DISPATCH_CLIENT_CRASH(metatype, "Unexpected dispatch object type");
+	}
+
+	_dispatch_retain(tq);
+	return _dispatch_barrier_trysync_or_async_f(dq, tq,
+			_dispatch_lane_legacy_set_target_queue,
+			DISPATCH_BARRIER_TRYSYNC_SUSPEND);
 }
 
 #pragma mark -
-#pragma mark dispatch_queue_drain
+#pragma mark _dispatch_queue_debug
 
-void
-_dispatch_continuation_pop(dispatch_object_t dou, dispatch_invoke_context_t dic,
-		dispatch_invoke_flags_t flags, dispatch_queue_t dq)
+size_t
+_dispatch_queue_debug_attr(dispatch_queue_t dq, char* buf, size_t bufsiz)
 {
-	_dispatch_continuation_pop_inline(dou, dic, flags, dq);
+	size_t offset = 0;
+	dispatch_queue_t target = dq->do_targetq;
+	const char *tlabel = target && target->dq_label ? target->dq_label : "";
+	uint64_t dq_state = os_atomic_load2o(dq, dq_state, relaxed);
+
+	offset += dsnprintf(&buf[offset], bufsiz - offset, "sref = %d, "
+			"target = %s[%p], width = 0x%x, state = 0x%016llx",
+			dq->dq_sref_cnt + 1, tlabel, target, dq->dq_width,
+			(unsigned long long)dq_state);
+	if (_dq_state_is_suspended(dq_state)) {
+		offset += dsnprintf(&buf[offset], bufsiz - offset, ", suspended = %d",
+			_dq_state_suspend_cnt(dq_state));
+	}
+	if (_dq_state_is_inactive(dq_state)) {
+		offset += dsnprintf(&buf[offset], bufsiz - offset, ", inactive");
+	} else if (_dq_state_needs_activation(dq_state)) {
+		offset += dsnprintf(&buf[offset], bufsiz - offset, ", needs-activation");
+	}
+	if (_dq_state_is_enqueued(dq_state)) {
+		offset += dsnprintf(&buf[offset], bufsiz - offset, ", enqueued");
+	}
+	if (_dq_state_is_dirty(dq_state)) {
+		offset += dsnprintf(&buf[offset], bufsiz - offset, ", dirty");
+	}
+	dispatch_qos_t qos = _dq_state_max_qos(dq_state);
+	if (qos) {
+		offset += dsnprintf(&buf[offset], bufsiz - offset, ", max qos %d", qos);
+	}
+	mach_port_t owner = _dq_state_drain_owner(dq_state);
+	if (!_dispatch_queue_is_thread_bound(dq) && owner) {
+		offset += dsnprintf(&buf[offset], bufsiz - offset, ", draining on 0x%x",
+				owner);
+	}
+	if (_dq_state_is_in_barrier(dq_state)) {
+		offset += dsnprintf(&buf[offset], bufsiz - offset, ", in-barrier");
+	} else  {
+		offset += dsnprintf(&buf[offset], bufsiz - offset, ", in-flight = %d",
+				_dq_state_used_width(dq_state, dq->dq_width));
+	}
+	if (_dq_state_has_pending_barrier(dq_state)) {
+		offset += dsnprintf(&buf[offset], bufsiz - offset, ", pending-barrier");
+	}
+	if (_dispatch_queue_is_thread_bound(dq)) {
+		offset += dsnprintf(&buf[offset], bufsiz - offset, ", thread = 0x%x ",
+				owner);
+	}
+	return offset;
 }
 
-void
-_dispatch_continuation_invoke(dispatch_object_t dou, voucher_t ov,
-		dispatch_invoke_flags_t flags)
+size_t
+_dispatch_queue_debug(dispatch_queue_t dq, char* buf, size_t bufsiz)
 {
-	_dispatch_continuation_invoke_inline(dou, ov, flags);
+	size_t offset = 0;
+	offset += dsnprintf(&buf[offset], bufsiz - offset, "%s[%p] = { ",
+			dq->dq_label ? dq->dq_label : _dispatch_object_class_name(dq), dq);
+	offset += _dispatch_object_debug_attr(dq, &buf[offset], bufsiz - offset);
+	offset += _dispatch_queue_debug_attr(dq, &buf[offset], bufsiz - offset);
+	offset += dsnprintf(&buf[offset], bufsiz - offset, "}");
+	return offset;
 }
 
+#if DISPATCH_PERF_MON
+
+#define DISPATCH_PERF_MON_BUCKETS 8
+
+static struct {
+	uint64_t volatile time_total;
+	uint64_t volatile count_total;
+	uint64_t volatile thread_total;
+} _dispatch_stats[DISPATCH_PERF_MON_BUCKETS];
+DISPATCH_USED static size_t _dispatch_stat_buckets = DISPATCH_PERF_MON_BUCKETS;
+
+void
+_dispatch_queue_merge_stats(uint64_t start, bool trace, perfmon_thread_type type)
+{
+	uint64_t delta = _dispatch_uptime() - start;
+	unsigned long count;
+	int bucket = 0;
+	count = (unsigned long)_dispatch_thread_getspecific(dispatch_bcounter_key);
+	_dispatch_thread_setspecific(dispatch_bcounter_key, NULL);
+	if (count == 0) {
+		bucket = 0;
+		if (trace) _dispatch_ktrace1(DISPATCH_PERF_MON_worker_useless, type);
+	} else {
+		bucket = MIN(DISPATCH_PERF_MON_BUCKETS - 1,
+				(int)sizeof(count) * CHAR_BIT - __builtin_clzl(count));
+		os_atomic_add(&_dispatch_stats[bucket].count_total, count, relaxed);
+	}
+	os_atomic_add(&_dispatch_stats[bucket].time_total, delta, relaxed);
+	os_atomic_inc(&_dispatch_stats[bucket].thread_total, relaxed);
+	if (trace) {
+		_dispatch_ktrace3(DISPATCH_PERF_MON_worker_thread_end, count, delta, type);
+	}
+}
+
+#endif
+
+#pragma mark -
+#pragma mark dispatch queue/lane drain & invoke
+
 DISPATCH_NOINLINE
 static void
 _dispatch_return_to_kernel(void)
 {
 #if DISPATCH_USE_KEVENT_WORKQUEUE
-	if (unlikely(_dispatch_get_wlh() == DISPATCH_WLH_ANON)) {
-		_dispatch_clear_return_to_kernel();
-	} else {
+	dispatch_deferred_items_t ddi = _dispatch_deferred_items_get();
+	if (likely(ddi && ddi->ddi_wlh != DISPATCH_WLH_ANON)) {
+		dispatch_assert(ddi->ddi_wlh_servicing);
 		_dispatch_event_loop_drain(KEVENT_FLAG_IMMEDIATE);
+	} else {
+		_dispatch_clear_return_to_kernel();
 	}
 #endif
 }
@@ -4844,18 +3450,14 @@
 void
 _dispatch_poll_for_events_4launchd(void)
 {
-#if DISPATCH_USE_KEVENT_WORKQUEUE
-	if (_dispatch_get_wlh()) {
-		dispatch_assert(_dispatch_deferred_items_get()->ddi_wlh_servicing);
-		_dispatch_event_loop_drain(KEVENT_FLAG_IMMEDIATE);
-	}
-#endif
+	_dispatch_return_to_kernel();
 }
 
-#if HAVE_PTHREAD_WORKQUEUE_NARROWING
-static os_atomic(uint64_t) _dispatch_narrowing_deadlines[DISPATCH_QOS_MAX];
+#if DISPATCH_USE_WORKQUEUE_NARROWING
+DISPATCH_STATIC_GLOBAL(os_atomic(uint64_t)
+_dispatch_narrowing_deadlines[DISPATCH_QOS_NBUCKETS]);
 #if !DISPATCH_TIME_UNIT_USES_NANOSECONDS
-static uint64_t _dispatch_narrow_check_interval_cache;
+DISPATCH_STATIC_GLOBAL(uint64_t _dispatch_narrow_check_interval_cache);
 #endif
 
 DISPATCH_ALWAYS_INLINE
@@ -4878,8 +3480,7 @@
 _dispatch_queue_drain_init_narrowing_check_deadline(dispatch_invoke_context_t dic,
 		dispatch_priority_t pri)
 {
-	if (_dispatch_priority_qos(pri) &&
-			!(pri & DISPATCH_PRIORITY_FLAG_OVERCOMMIT)) {
+	if (!(pri & DISPATCH_PRIORITY_FLAG_OVERCOMMIT)) {
 		dic->dic_next_narrow_check = _dispatch_approximate_time() +
 				_dispatch_narrow_check_interval();
 	}
@@ -4893,10 +3494,10 @@
 	if (dic->dic_next_narrow_check != DISPATCH_THREAD_IS_NARROWING) {
 		pthread_priority_t pp = _dispatch_get_priority();
 		dispatch_qos_t qos = _dispatch_qos_from_pp(pp);
-		if (unlikely(!qos || qos > countof(_dispatch_narrowing_deadlines))) {
+		if (unlikely(qos < DISPATCH_QOS_MIN || qos > DISPATCH_QOS_MAX)) {
 			DISPATCH_CLIENT_CRASH(pp, "Thread QoS corruption");
 		}
-		size_t idx = qos - 1; // no entry needed for DISPATCH_QOS_UNSPECIFIED
+		size_t idx = DISPATCH_QOS_BUCKET(qos);
 		os_atomic(uint64_t) *deadline = &_dispatch_narrowing_deadlines[idx];
 		uint64_t oldval, newval = now + _dispatch_narrow_check_interval();
 
@@ -4962,7 +3563,7 @@
  */
 DISPATCH_ALWAYS_INLINE
 static dispatch_queue_wakeup_target_t
-_dispatch_queue_drain(dispatch_queue_t dq, dispatch_invoke_context_t dic,
+_dispatch_lane_drain(dispatch_lane_t dq, dispatch_invoke_context_t dic,
 		dispatch_invoke_flags_t flags, uint64_t *owned_ptr, bool serial_drain)
 {
 	dispatch_queue_t orig_tq = dq->do_targetq;
@@ -4982,22 +3583,20 @@
 		owned &= DISPATCH_QUEUE_WIDTH_MASK;
 	}
 
-	dc = _dispatch_queue_head(dq);
+	dc = _dispatch_queue_get_head(dq);
 	goto first_iteration;
 
 	for (;;) {
+		dispatch_assert(dic->dic_barrier_waiter == NULL);
 		dc = next_dc;
-		if (unlikely(dic->dic_deferred)) {
-			goto out_with_deferred_compute_owned;
-		}
-		if (unlikely(_dispatch_needs_to_return_to_kernel())) {
-			_dispatch_return_to_kernel();
-		}
 		if (unlikely(!dc)) {
 			if (!dq->dq_items_tail) {
 				break;
 			}
-			dc = _dispatch_queue_head(dq);
+			dc = _dispatch_queue_get_head(dq);
+		}
+		if (unlikely(_dispatch_needs_to_return_to_kernel())) {
+			_dispatch_return_to_kernel();
 		}
 		if (unlikely(serial_drain != (dq->dq_width == 1))) {
 			break;
@@ -5005,6 +3604,12 @@
 		if (unlikely(_dispatch_queue_drain_should_narrow(dic))) {
 			break;
 		}
+		if (likely(flags & DISPATCH_INVOKE_WORKLOOP_DRAIN)) {
+			dispatch_workloop_t dwl = (dispatch_workloop_t)_dispatch_get_wlh();
+			if (unlikely(_dispatch_queue_max_qos(dwl) > dwl->dwl_drained_qos)) {
+				break;
+			}
+		}
 
 first_iteration:
 		dq_state = os_atomic_load(&dq->dq_state, relaxed);
@@ -5022,12 +3627,12 @@
 				}
 				owned = DISPATCH_QUEUE_IN_BARRIER;
 			}
-			next_dc = _dispatch_queue_next(dq, dc);
-			if (_dispatch_object_is_sync_waiter(dc)) {
-				owned = 0;
-				dic->dic_deferred = dc;
-				goto out_with_deferred;
+			if (_dispatch_object_is_sync_waiter(dc) &&
+					!(flags & DISPATCH_INVOKE_THREAD_BOUND)) {
+				dic->dic_barrier_waiter = dc;
+				goto out_with_barrier_waiter;
 			}
+			next_dc = _dispatch_queue_pop_head(dq, dc);
 		} else {
 			if (owned == DISPATCH_QUEUE_IN_BARRIER) {
 				// we just ran barrier work items, we have to make their
@@ -5037,7 +3642,7 @@
 				os_atomic_xor2o(dq, dq_state, owned, release);
 				owned = dq->dq_width * DISPATCH_QUEUE_WIDTH_INTERVAL;
 			} else if (unlikely(owned == 0)) {
-				if (_dispatch_object_is_sync_waiter(dc)) {
+				if (_dispatch_object_is_waiter(dc)) {
 					// sync "readers" don't observe the limit
 					_dispatch_queue_reserve_sync_width(dq);
 				} else if (!_dispatch_queue_try_acquire_async(dq)) {
@@ -5046,17 +3651,22 @@
 				owned = DISPATCH_QUEUE_WIDTH_INTERVAL;
 			}
 
-			next_dc = _dispatch_queue_next(dq, dc);
-			if (_dispatch_object_is_sync_waiter(dc)) {
+			next_dc = _dispatch_queue_pop_head(dq, dc);
+			if (_dispatch_object_is_waiter(dc)) {
 				owned -= DISPATCH_QUEUE_WIDTH_INTERVAL;
-				_dispatch_sync_waiter_redirect_or_wake(dq,
-						DISPATCH_SYNC_WAITER_NO_UNLOCK, dc);
+				_dispatch_non_barrier_waiter_redirect_or_wake(dq, dc);
 				continue;
 			}
 
 			if (flags & DISPATCH_INVOKE_REDIRECTING_DRAIN) {
 				owned -= DISPATCH_QUEUE_WIDTH_INTERVAL;
-				_dispatch_continuation_redirect(dq, dc);
+				// This is a re-redirect, overrides have already been applied by
+				// _dispatch_continuation_async*
+				// However we want to end up on the root queue matching `dc`
+				// qos, so pick up the current override of `dq` which includes
+				// dc's override (and maybe more)
+				_dispatch_continuation_redirect_push(dq, dc,
+						_dispatch_queue_max_qos(dq));
 				continue;
 			}
 		}
@@ -5081,23 +3691,9 @@
 	_dispatch_thread_frame_pop(&dtf);
 	return DISPATCH_QUEUE_WAKEUP_WAIT_FOR_EVENT;
 
-out_with_deferred_compute_owned:
-	if (serial_drain) {
-		owned = DISPATCH_QUEUE_IN_BARRIER + DISPATCH_QUEUE_WIDTH_INTERVAL;
-	} else {
-		if (owned == DISPATCH_QUEUE_IN_BARRIER) {
-			// if we're IN_BARRIER we really own the full width too
-			owned += dq->dq_width * DISPATCH_QUEUE_WIDTH_INTERVAL;
-		}
-		if (dc) {
-			owned = _dispatch_queue_adjust_owned(dq, owned, dc);
-		}
-	}
-out_with_deferred:
-	*owned_ptr &= DISPATCH_QUEUE_ENQUEUED | DISPATCH_QUEUE_ENQUEUED_ON_MGR;
-	*owned_ptr |= owned;
+out_with_barrier_waiter:
 	if (unlikely(flags & DISPATCH_INVOKE_DISALLOW_SYNC_WAITERS)) {
-		DISPATCH_INTERNAL_CRASH(dc,
+		DISPATCH_INTERNAL_CRASH(0,
 				"Deferred continuation on source, mach channel or mgr");
 	}
 	_dispatch_thread_frame_pop(&dtf);
@@ -5106,276 +3702,932 @@
 
 DISPATCH_NOINLINE
 static dispatch_queue_wakeup_target_t
-_dispatch_queue_concurrent_drain(dispatch_queue_t dq,
+_dispatch_lane_concurrent_drain(dispatch_lane_class_t dqu,
 		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags,
 		uint64_t *owned)
 {
-	return _dispatch_queue_drain(dq, dic, flags, owned, false);
+	return _dispatch_lane_drain(dqu._dl, dic, flags, owned, false);
 }
 
 DISPATCH_NOINLINE
 dispatch_queue_wakeup_target_t
-_dispatch_queue_serial_drain(dispatch_queue_t dq, dispatch_invoke_context_t dic,
-		dispatch_invoke_flags_t flags, uint64_t *owned)
+_dispatch_lane_serial_drain(dispatch_lane_class_t dqu,
+		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags,
+		uint64_t *owned)
 {
 	flags &= ~(dispatch_invoke_flags_t)DISPATCH_INVOKE_REDIRECTING_DRAIN;
-	return _dispatch_queue_drain(dq, dic, flags, owned, true);
+	return _dispatch_lane_drain(dqu._dl, dic, flags, owned, true);
 }
 
-#if DISPATCH_COCOA_COMPAT || defined(_WIN32)
-DISPATCH_NOINLINE
-static void
-_dispatch_main_queue_update_priority_from_thread(void)
-{
-	dispatch_queue_t dq = &_dispatch_main_q;
-	uint64_t dq_state = os_atomic_load2o(dq, dq_state, relaxed);
-	mach_port_t owner = _dq_state_drain_owner(dq_state);
-
-	dispatch_priority_t main_pri =
-			_dispatch_priority_from_pp_strip_flags(_dispatch_get_priority());
-	dispatch_qos_t main_qos = _dispatch_priority_qos(main_pri);
-	dispatch_qos_t max_qos = _dq_state_max_qos(dq_state);
-	dispatch_qos_t old_qos = _dispatch_priority_qos(dq->dq_priority);
-
-	// the main thread QoS was adjusted by someone else, learn the new QoS
-	// and reinitialize _dispatch_main_q.dq_priority
-	dq->dq_priority = _dispatch_priority_with_override_qos(main_pri, main_qos);
-
-	if (old_qos < max_qos && main_qos == DISPATCH_QOS_UNSPECIFIED) {
-		// main thread is opted out of QoS and we had an override
-		return _dispatch_thread_override_end(owner, dq);
-	}
-
-	if (old_qos < max_qos && max_qos <= main_qos) {
-		// main QoS was raised, and we had an override which is now useless
-		return _dispatch_thread_override_end(owner, dq);
-	}
-
-	if (main_qos < max_qos && max_qos <= old_qos) {
-		// main thread QoS was lowered, and we actually need an override
-		pthread_priority_t pp = _dispatch_qos_to_pp(max_qos);
-		return _dispatch_thread_override_start(owner, pp, dq);
-	}
-}
-
-static void
-_dispatch_main_queue_drain(void)
-{
-	dispatch_queue_t dq = &_dispatch_main_q;
-	dispatch_thread_frame_s dtf;
-
-	if (!dq->dq_items_tail) {
-		return;
-	}
-
-	_dispatch_perfmon_start_notrace();
-	if (!fastpath(_dispatch_queue_is_thread_bound(dq))) {
-		DISPATCH_CLIENT_CRASH(0, "_dispatch_main_queue_callback_4CF called"
-				" after dispatch_main()");
-	}
-	uint64_t dq_state = os_atomic_load2o(dq, dq_state, relaxed);
-	if (unlikely(!_dq_state_drain_locked_by_self(dq_state))) {
-		DISPATCH_CLIENT_CRASH((uintptr_t)dq_state,
-				"_dispatch_main_queue_callback_4CF called"
-				" from the wrong thread");
-	}
-
-	dispatch_once_f(&_dispatch_main_q_handle_pred, dq,
-			_dispatch_runloop_queue_handle_init);
-
-	// <rdar://problem/23256682> hide the frame chaining when CFRunLoop
-	// drains the main runloop, as this should not be observable that way
-	_dispatch_adopt_wlh_anon();
-	_dispatch_thread_frame_push_and_rebase(&dtf, dq, NULL);
-
-	pthread_priority_t pp = _dispatch_get_priority();
-	dispatch_priority_t pri = _dispatch_priority_from_pp(pp);
-	dispatch_qos_t qos = _dispatch_priority_qos(pri);
-	voucher_t voucher = _voucher_copy();
-
-	if (unlikely(qos != _dispatch_priority_qos(dq->dq_priority))) {
-		_dispatch_main_queue_update_priority_from_thread();
-	}
-	dispatch_priority_t old_dbp = _dispatch_set_basepri(pri);
-	_dispatch_set_basepri_override_qos(DISPATCH_QOS_SATURATED);
-
-	dispatch_invoke_context_s dic = { };
-	struct dispatch_object_s *dc, *next_dc, *tail;
-	dc = os_mpsc_capture_snapshot(dq, dq_items, &tail);
-	do {
-		next_dc = os_mpsc_pop_snapshot_head(dc, tail, do_next);
-		_dispatch_continuation_pop_inline(dc, &dic, DISPATCH_INVOKE_NONE, dq);
-	} while ((dc = next_dc));
-
-	dx_wakeup(dq, 0, 0);
-	_dispatch_voucher_debug("main queue restore", voucher);
-	_dispatch_reset_basepri(old_dbp);
-	_dispatch_reset_basepri_override();
-	_dispatch_reset_priority_and_voucher(pp, voucher);
-	_dispatch_thread_frame_pop(&dtf);
-	_dispatch_reset_wlh();
-	_dispatch_force_cache_cleanup();
-	_dispatch_perfmon_end_notrace();
-}
-
-static bool
-_dispatch_runloop_queue_drain_one(dispatch_queue_t dq)
-{
-	if (!dq->dq_items_tail) {
-		return false;
-	}
-	_dispatch_perfmon_start_notrace();
-	dispatch_thread_frame_s dtf;
-	bool should_reset_wlh = _dispatch_adopt_wlh_anon_recurse();
-	_dispatch_thread_frame_push(&dtf, dq);
-	pthread_priority_t pp = _dispatch_get_priority();
-	dispatch_priority_t pri = _dispatch_priority_from_pp(pp);
-	voucher_t voucher = _voucher_copy();
-	dispatch_priority_t old_dbp = _dispatch_set_basepri(pri);
-	_dispatch_set_basepri_override_qos(DISPATCH_QOS_SATURATED);
-
-	dispatch_invoke_context_s dic = { };
-	struct dispatch_object_s *dc, *next_dc;
-	dc = _dispatch_queue_head(dq);
-	next_dc = _dispatch_queue_next(dq, dc);
-	_dispatch_continuation_pop_inline(dc, &dic, DISPATCH_INVOKE_NONE, dq);
-
-	if (!next_dc) {
-		dx_wakeup(dq, 0, 0);
-	}
-
-	_dispatch_voucher_debug("runloop queue restore", voucher);
-	_dispatch_reset_basepri(old_dbp);
-	_dispatch_reset_basepri_override();
-	_dispatch_reset_priority_and_voucher(pp, voucher);
-	_dispatch_thread_frame_pop(&dtf);
-	if (should_reset_wlh) _dispatch_reset_wlh();
-	_dispatch_force_cache_cleanup();
-	_dispatch_perfmon_end_notrace();
-	return next_dc;
-}
-#endif
-
 void
-_dispatch_mgr_queue_drain(void)
+_dispatch_queue_invoke_finish(dispatch_queue_t dq,
+		dispatch_invoke_context_t dic, dispatch_queue_t tq, uint64_t owned)
 {
-	const dispatch_invoke_flags_t flags = DISPATCH_INVOKE_MANAGER_DRAIN;
-	dispatch_invoke_context_s dic = { };
-	dispatch_queue_t dq = &_dispatch_mgr_q;
-	uint64_t owned = DISPATCH_QUEUE_SERIAL_DRAIN_OWNED;
-
-	if (dq->dq_items_tail) {
-		_dispatch_perfmon_start();
-		_dispatch_set_basepri_override_qos(DISPATCH_QOS_SATURATED);
-		if (slowpath(_dispatch_queue_serial_drain(dq, &dic, flags, &owned))) {
-			DISPATCH_INTERNAL_CRASH(0, "Interrupted drain on manager queue");
+	struct dispatch_object_s *dc = dic->dic_barrier_waiter;
+	dispatch_qos_t qos = dic->dic_barrier_waiter_bucket;
+	if (dc) {
+		dic->dic_barrier_waiter = NULL;
+		dic->dic_barrier_waiter_bucket = DISPATCH_QOS_UNSPECIFIED;
+		owned &= DISPATCH_QUEUE_ENQUEUED | DISPATCH_QUEUE_ENQUEUED_ON_MGR;
+#if DISPATCH_INTROSPECTION
+		dispatch_sync_context_t dsc = (dispatch_sync_context_t)dc;
+		dsc->dsc_from_async = true;
+#endif
+		if (qos) {
+			return _dispatch_workloop_drain_barrier_waiter(upcast(dq)._dwl,
+					dc, qos, DISPATCH_WAKEUP_CONSUME_2, owned);
 		}
-		_dispatch_voucher_debug("mgr queue clear", NULL);
-		_voucher_clear();
-		_dispatch_reset_basepri_override();
-		_dispatch_perfmon_end(perfmon_thread_manager);
+		return _dispatch_lane_drain_barrier_waiter(upcast(dq)._dl, dc,
+				DISPATCH_WAKEUP_CONSUME_2, owned);
 	}
 
-#if DISPATCH_USE_KEVENT_WORKQUEUE
-	if (!_dispatch_kevent_workqueue_enabled)
-#endif
-	{
-		_dispatch_force_cache_cleanup();
+	uint64_t old_state, new_state, enqueued = DISPATCH_QUEUE_ENQUEUED;
+	if (tq == DISPATCH_QUEUE_WAKEUP_MGR) {
+		enqueued = DISPATCH_QUEUE_ENQUEUED_ON_MGR;
 	}
-}
-
-#pragma mark -
-#pragma mark dispatch_queue_invoke
-
-void
-_dispatch_queue_drain_sync_waiter(dispatch_queue_t dq,
-		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags,
-		uint64_t owned)
-{
-	struct dispatch_object_s *dc = dic->dic_deferred;
-	dispatch_assert(_dispatch_object_is_sync_waiter(dc));
-	dic->dic_deferred = NULL;
-	if (flags & DISPATCH_INVOKE_WLH) {
-		// Leave the enqueued bit in place, completion of the last sync waiter
-		// in the handoff chain is responsible for dequeuing
-		//
-		// We currently have a +2 to consume, but we need to keep a +1
-		// for the thread request
-		dispatch_assert(_dq_state_is_enqueued_on_target(owned));
-		dispatch_assert(!_dq_state_is_enqueued_on_manager(owned));
-		owned &= ~DISPATCH_QUEUE_ENQUEUED;
-		_dispatch_release_no_dispose(dq);
-	} else {
-		// The sync waiter must own a reference
-		_dispatch_release_2_no_dispose(dq);
+	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, release, {
+		new_state  = old_state - owned;
+		new_state &= ~DISPATCH_QUEUE_DRAIN_UNLOCK_MASK;
+		new_state |= DISPATCH_QUEUE_DIRTY;
+		if (_dq_state_is_runnable(new_state) &&
+				!_dq_state_is_enqueued(new_state)) {
+			// drain was not interupted for suspension
+			// we will reenqueue right away, just put ENQUEUED back
+			new_state |= enqueued;
+		}
+	});
+	old_state -= owned;
+	if (_dq_state_received_override(old_state)) {
+		// Ensure that the root queue sees that this thread was overridden.
+		_dispatch_set_basepri_override_qos(_dq_state_max_qos(new_state));
 	}
-	return _dispatch_sync_waiter_redirect_or_wake(dq, owned, dc);
+	if ((old_state ^ new_state) & enqueued) {
+		dispatch_assert(_dq_state_is_enqueued(new_state));
+		return _dispatch_queue_push_queue(tq, dq, new_state);
+	}
+	return _dispatch_release_2_tailcall(dq);
 }
 
 void
-_dispatch_queue_finalize_activation(dispatch_queue_t dq,
+_dispatch_lane_activate(dispatch_lane_class_t dq,
 		DISPATCH_UNUSED bool *allow_resume)
 {
-	dispatch_queue_t tq = dq->do_targetq;
-	_dispatch_queue_priority_inherit_from_target(dq, tq);
-	_dispatch_queue_inherit_wlh_from_target(dq, tq);
+	dispatch_queue_t tq = dq._dl->do_targetq;
+	dispatch_priority_t pri = dq._dl->dq_priority;
+
+	// Normalize priority: keep the fallback only when higher than the floor
+	if (_dispatch_priority_fallback_qos(pri) <= _dispatch_priority_qos(pri) ||
+			(_dispatch_priority_qos(pri) &&
+			!(pri & DISPATCH_PRIORITY_FLAG_FLOOR))) {
+		pri &= ~DISPATCH_PRIORITY_FALLBACK_QOS_MASK;
+		pri &= ~DISPATCH_PRIORITY_FLAG_FALLBACK;
+		dq._dl->dq_priority = pri;
+	}
+	tq = _dispatch_queue_priority_inherit_from_target(dq, tq);
+	_dispatch_lane_inherit_wlh_from_target(dq._dl, tq);
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline dispatch_queue_wakeup_target_t
-dispatch_queue_invoke2(dispatch_queue_t dq, dispatch_invoke_context_t dic,
+_dispatch_lane_invoke2(dispatch_lane_t dq, dispatch_invoke_context_t dic,
 		dispatch_invoke_flags_t flags, uint64_t *owned)
 {
 	dispatch_queue_t otq = dq->do_targetq;
 	dispatch_queue_t cq = _dispatch_queue_get_current();
 
-	if (slowpath(cq != otq)) {
+	if (unlikely(cq != otq)) {
 		return otq;
 	}
 	if (dq->dq_width == 1) {
-		return _dispatch_queue_serial_drain(dq, dic, flags, owned);
+		return _dispatch_lane_serial_drain(dq, dic, flags, owned);
 	}
-	return _dispatch_queue_concurrent_drain(dq, dic, flags, owned);
+	return _dispatch_lane_concurrent_drain(dq, dic, flags, owned);
 }
 
-// 6618342 Contact the team that owns the Instrument DTrace probe before
-//         renaming this symbol
 DISPATCH_NOINLINE
 void
-_dispatch_queue_invoke(dispatch_queue_t dq, dispatch_invoke_context_t dic,
+_dispatch_lane_invoke(dispatch_lane_t dq, dispatch_invoke_context_t dic,
 		dispatch_invoke_flags_t flags)
 {
-	_dispatch_queue_class_invoke(dq, dic, flags, 0, dispatch_queue_invoke2);
+	_dispatch_queue_class_invoke(dq, dic, flags, 0, _dispatch_lane_invoke2);
 }
 
 #pragma mark -
-#pragma mark dispatch_queue_class_wakeup
+#pragma mark dispatch_workloop_t
+
+#define _dispatch_wl(dwl, qos) os_mpsc(dwl, dwl, s[DISPATCH_QOS_BUCKET(qos)])
+#define _dispatch_workloop_looks_empty(dwl, qos) \
+		os_mpsc_looks_empty(_dispatch_wl(dwl, qos))
+#define _dispatch_workloop_get_head(dwl, qos) \
+		os_mpsc_get_head(_dispatch_wl(dwl, qos))
+#define _dispatch_workloop_pop_head(dwl, qos, dc) \
+		os_mpsc_pop_head(_dispatch_wl(dwl, qos), dc, do_next)
+#define _dispatch_workloop_push_update_tail(dwl, qos, dou) \
+		os_mpsc_push_update_tail(_dispatch_wl(dwl, qos), dou, do_next)
+#define _dispatch_workloop_push_update_prev(dwl, qos, prev, dou) \
+		os_mpsc_push_update_prev(_dispatch_wl(dwl, qos), prev, dou, do_next)
+
+dispatch_workloop_t
+dispatch_workloop_copy_current(void)
+{
+	dispatch_workloop_t dwl = _dispatch_wlh_to_workloop(_dispatch_get_wlh());
+	if (likely(dwl)) {
+		_os_object_retain_with_resurrect(dwl->_as_os_obj);
+		return dwl;
+	}
+	return NULL;
+}
+
+bool
+dispatch_workloop_is_current(dispatch_workloop_t dwl)
+{
+	return _dispatch_get_wlh() == (dispatch_wlh_t)dwl;
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline uint64_t
+_dispatch_workloop_role_bits(void)
+{
+#if DISPATCH_USE_KEVENT_WORKLOOP
+	if (likely(_dispatch_kevent_workqueue_enabled)) {
+		return DISPATCH_QUEUE_ROLE_BASE_WLH;
+	}
+#endif
+	return DISPATCH_QUEUE_ROLE_BASE_ANON;
+}
+
+bool
+_dispatch_workloop_should_yield_4NW(void)
+{
+	dispatch_workloop_t dwl = _dispatch_wlh_to_workloop(_dispatch_get_wlh());
+	if (likely(dwl)) {
+		return _dispatch_queue_max_qos(dwl) > dwl->dwl_drained_qos;
+	}
+	return false;
+}
+
+DISPATCH_NOINLINE
+static dispatch_workloop_t
+_dispatch_workloop_create(const char *label, uint64_t dq_state)
+{
+	dispatch_queue_flags_t dqf = DQF_AUTORELEASE_ALWAYS;
+	dispatch_workloop_t dwl;
+
+	if (label) {
+		const char *tmp = _dispatch_strdup_if_mutable(label);
+		if (tmp != label) {
+			dqf |= DQF_LABEL_NEEDS_FREE;
+			label = tmp;
+		}
+	}
+
+	dq_state |= _dispatch_workloop_role_bits();
+
+	dwl = _dispatch_queue_alloc(workloop, dqf, 1, dq_state)._dwl;
+	dwl->dq_label = label;
+	dwl->do_targetq = _dispatch_get_default_queue(true);
+	if (!(dq_state & DISPATCH_QUEUE_INACTIVE)) {
+		dwl->dq_priority = DISPATCH_PRIORITY_FLAG_OVERCOMMIT |
+				_dispatch_priority_make_fallback(DISPATCH_QOS_DEFAULT);
+	}
+	_dispatch_object_debug(dwl, "%s", __func__);
+	return _dispatch_introspection_queue_create(dwl)._dwl;
+}
+
+dispatch_workloop_t
+dispatch_workloop_create(const char *label)
+{
+	return _dispatch_workloop_create(label, 0);
+}
+
+dispatch_workloop_t
+dispatch_workloop_create_inactive(const char *label)
+{
+	return _dispatch_workloop_create(label, DISPATCH_QUEUE_INACTIVE);
+}
+
+void
+dispatch_workloop_set_autorelease_frequency(dispatch_workloop_t dwl,
+		dispatch_autorelease_frequency_t frequency)
+{
+	if (frequency == DISPATCH_AUTORELEASE_FREQUENCY_WORK_ITEM) {
+		_dispatch_queue_atomic_flags_set_and_clear(dwl,
+				DQF_AUTORELEASE_ALWAYS, DQF_AUTORELEASE_NEVER);
+	} else {
+		_dispatch_queue_atomic_flags_set_and_clear(dwl,
+				DQF_AUTORELEASE_NEVER, DQF_AUTORELEASE_ALWAYS);
+	}
+	_dispatch_queue_setter_assert_inactive(dwl);
+}
+
+DISPATCH_ALWAYS_INLINE
+static void
+_dispatch_workloop_attributes_dispose(dispatch_workloop_t dwl)
+{
+	if (dwl->dwl_attr) {
+		free(dwl->dwl_attr);
+	}
+}
+
+#if TARGET_OS_MAC
+DISPATCH_ALWAYS_INLINE
+static bool
+_dispatch_workloop_has_kernel_attributes(dispatch_workloop_t dwl)
+{
+	return dwl->dwl_attr && (dwl->dwl_attr->dwla_flags &
+			(DISPATCH_WORKLOOP_ATTR_HAS_SCHED |
+			 DISPATCH_WORKLOOP_ATTR_HAS_POLICY |
+			 DISPATCH_WORKLOOP_ATTR_HAS_CPUPERCENT));
+}
+
+void
+dispatch_workloop_set_scheduler_priority(dispatch_workloop_t dwl, int priority,
+		uint64_t flags)
+{
+	_dispatch_queue_setter_assert_inactive(dwl);
+	_dispatch_workloop_attributes_alloc_if_needed(dwl);
+
+	if (priority) {
+		dwl->dwl_attr->dwla_sched.sched_priority = priority;
+		dwl->dwl_attr->dwla_flags |= DISPATCH_WORKLOOP_ATTR_HAS_SCHED;
+	} else {
+		dwl->dwl_attr->dwla_sched.sched_priority = 0;
+		dwl->dwl_attr->dwla_flags &= ~DISPATCH_WORKLOOP_ATTR_HAS_SCHED;
+	}
+
+	if (flags & DISPATCH_WORKLOOP_FIXED_PRIORITY) {
+		dwl->dwl_attr->dwla_policy = POLICY_RR;
+		dwl->dwl_attr->dwla_flags |= DISPATCH_WORKLOOP_ATTR_HAS_POLICY;
+	} else {
+		dwl->dwl_attr->dwla_flags &= ~DISPATCH_WORKLOOP_ATTR_HAS_POLICY;
+	}
+}
+#endif // TARGET_OS_MAC
+
+void
+dispatch_workloop_set_qos_class_floor(dispatch_workloop_t dwl,
+		qos_class_t cls, int relpri, uint64_t flags)
+{
+	_dispatch_queue_setter_assert_inactive(dwl);
+	_dispatch_workloop_attributes_alloc_if_needed(dwl);
+
+	dispatch_qos_t qos = _dispatch_qos_from_qos_class(cls);
+
+	if (qos) {
+		dwl->dwl_attr->dwla_pri = _dispatch_priority_make(qos, relpri);
+		dwl->dwl_attr->dwla_flags |= DISPATCH_WORKLOOP_ATTR_HAS_QOS_CLASS;
+	} else {
+		dwl->dwl_attr->dwla_pri = 0;
+		dwl->dwl_attr->dwla_flags &= ~DISPATCH_WORKLOOP_ATTR_HAS_QOS_CLASS;
+	}
+
+#if TARGET_OS_MAC
+	if (flags & DISPATCH_WORKLOOP_FIXED_PRIORITY) {
+		dwl->dwl_attr->dwla_policy = POLICY_RR;
+		dwl->dwl_attr->dwla_flags |= DISPATCH_WORKLOOP_ATTR_HAS_POLICY;
+	} else {
+		dwl->dwl_attr->dwla_flags &= ~DISPATCH_WORKLOOP_ATTR_HAS_POLICY;
+	}
+#else // TARGET_OS_MAC
+	(void)flags;
+#endif // TARGET_OS_MAC
+}
+
+void
+dispatch_workloop_set_qos_class(dispatch_workloop_t dwl,
+		qos_class_t cls, uint64_t flags)
+{
+	dispatch_workloop_set_qos_class_floor(dwl, cls, 0, flags);
+}
+
+void
+dispatch_workloop_set_cpupercent(dispatch_workloop_t dwl, uint8_t percent,
+		uint32_t refillms)
+{
+	_dispatch_queue_setter_assert_inactive(dwl);
+	_dispatch_workloop_attributes_alloc_if_needed(dwl);
+
+	if ((dwl->dwl_attr->dwla_flags & (DISPATCH_WORKLOOP_ATTR_HAS_SCHED |
+			DISPATCH_WORKLOOP_ATTR_HAS_QOS_CLASS)) == 0) {
+		DISPATCH_CLIENT_CRASH(0, "workloop qos class or priority must be "
+				"set before cpupercent");
+	}
+
+	dwl->dwl_attr->dwla_cpupercent.percent = percent;
+	dwl->dwl_attr->dwla_cpupercent.refillms = refillms;
+	dwl->dwl_attr->dwla_flags |= DISPATCH_WORKLOOP_ATTR_HAS_CPUPERCENT;
+}
+
+#if TARGET_OS_MAC
+static void
+_dispatch_workloop_activate_simulator_fallback(dispatch_workloop_t dwl,
+		pthread_attr_t *attr)
+{
+	uint64_t old_state, new_state;
+	dispatch_queue_global_t dprq;
+
+	dprq = dispatch_pthread_root_queue_create(
+			"com.apple.libdispatch.workloop_fallback", 0, attr, NULL);
+
+	dwl->do_targetq = dprq->_as_dq;
+	_dispatch_retain(dprq);
+	dispatch_release(dprq);
+
+	os_atomic_rmw_loop2o(dwl, dq_state, old_state, new_state, relaxed, {
+		new_state = old_state & ~DISPATCH_QUEUE_ROLE_MASK;
+		new_state |= DISPATCH_QUEUE_ROLE_BASE_ANON;
+	});
+}
+
+static const struct dispatch_queue_global_s _dispatch_custom_workloop_root_queue = {
+	DISPATCH_GLOBAL_OBJECT_HEADER(queue_global),
+	.dq_state = DISPATCH_ROOT_QUEUE_STATE_INIT_VALUE,
+	.do_ctxt = NULL,
+	.dq_label = "com.apple.root.workloop-custom",
+	.dq_atomic_flags = DQF_WIDTH(DISPATCH_QUEUE_WIDTH_POOL),
+	.dq_priority = DISPATCH_PRIORITY_FLAG_MANAGER |
+			DISPATCH_PRIORITY_SATURATED_OVERRIDE,
+	.dq_serialnum = DISPATCH_QUEUE_SERIAL_NUMBER_WLF,
+	.dgq_thread_pool_size = 1,
+};
+#endif // TARGET_OS_MAC
+
+static void
+_dispatch_workloop_activate_attributes(dispatch_workloop_t dwl)
+{
+#if defined(_POSIX_THREADS)
+	dispatch_workloop_attr_t dwla = dwl->dwl_attr;
+	pthread_attr_t attr;
+
+	pthread_attr_init(&attr);
+	if (dwla->dwla_flags & DISPATCH_WORKLOOP_ATTR_HAS_QOS_CLASS) {
+		dwl->dq_priority |= dwla->dwla_pri | DISPATCH_PRIORITY_FLAG_FLOOR;
+	}
+#if TARGET_OS_MAC
+	if (dwla->dwla_flags & DISPATCH_WORKLOOP_ATTR_HAS_SCHED) {
+		pthread_attr_setschedparam(&attr, &dwla->dwla_sched);
+		// _dispatch_async_and_wait_should_always_async detects when a queue
+		// targets a root queue that is not part of the root queues array in
+		// order to force async_and_wait to async. We want this path to always
+		// be taken on workloops that have a scheduler priority set.
+		dwl->do_targetq =
+				(dispatch_queue_t)_dispatch_custom_workloop_root_queue._as_dq;
+	}
+	if (dwla->dwla_flags & DISPATCH_WORKLOOP_ATTR_HAS_POLICY) {
+		pthread_attr_setschedpolicy(&attr, dwla->dwla_policy);
+	}
+#endif // TARGET_OS_MAC
+#if HAVE_PTHREAD_ATTR_SETCPUPERCENT_NP
+	if (dwla->dwla_flags & DISPATCH_WORKLOOP_ATTR_HAS_CPUPERCENT) {
+		pthread_attr_setcpupercent_np(&attr, dwla->dwla_cpupercent.percent,
+				(unsigned long)dwla->dwla_cpupercent.refillms);
+	}
+#endif // HAVE_PTHREAD_ATTR_SETCPUPERCENT_NP
+#if TARGET_OS_MAC
+	if (_dispatch_workloop_has_kernel_attributes(dwl)) {
+		int rv = _pthread_workloop_create((uint64_t)dwl, 0, &attr);
+		switch (rv) {
+		case 0:
+			dwla->dwla_flags |= DISPATCH_WORKLOOP_ATTR_NEEDS_DESTROY;
+			break;
+		case ENOTSUP:
+			/* simulator fallback */
+			_dispatch_workloop_activate_simulator_fallback(dwl, &attr);
+			break;
+		default:
+			dispatch_assert_zero(rv);
+		}
+	}
+#endif // TARGET_OS_MAC
+	pthread_attr_destroy(&attr);
+#endif // defined(_POSIX_THREADS)
+}
+
+void
+_dispatch_workloop_dispose(dispatch_workloop_t dwl, bool *allow_free)
+{
+	uint64_t dq_state = os_atomic_load2o(dwl, dq_state, relaxed);
+	uint64_t initial_state = DISPATCH_QUEUE_STATE_INIT_VALUE(1);
+
+	initial_state |= _dispatch_workloop_role_bits();
+
+	if (unlikely(dq_state != initial_state)) {
+		if (_dq_state_drain_locked(dq_state)) {
+			DISPATCH_CLIENT_CRASH((uintptr_t)dq_state,
+					"Release of a locked workloop");
+		}
+#if DISPATCH_SIZEOF_PTR == 4
+		dq_state >>= 32;
+#endif
+		DISPATCH_CLIENT_CRASH((uintptr_t)dq_state,
+				"Release of a workloop with corrupt state");
+	}
+
+	_dispatch_object_debug(dwl, "%s", __func__);
+	_dispatch_introspection_queue_dispose(dwl);
+
+	for (size_t i = 0; i < countof(dwl->dwl_tails); i++) {
+		if (unlikely(dwl->dwl_tails[i])) {
+			DISPATCH_CLIENT_CRASH(dwl->dwl_tails[i],
+					"Release of a workloop while items are enqueued");
+		}
+		// trash the queue so that use after free will crash
+		dwl->dwl_tails[i] = (void *)0x200;
+		dwl->dwl_heads[i] = (void *)0x200;
+	}
+
+	if (dwl->dwl_timer_heap) {
+		for (size_t i = 0; i < DISPATCH_TIMER_WLH_COUNT; i++) {
+			dispatch_assert(dwl->dwl_timer_heap[i].dth_count == 0);
+		}
+		free(dwl->dwl_timer_heap);
+		dwl->dwl_timer_heap = NULL;
+	}
+
+#if TARGET_OS_MAC
+	if (dwl->dwl_attr && (dwl->dwl_attr->dwla_flags &
+			DISPATCH_WORKLOOP_ATTR_NEEDS_DESTROY)) {
+		(void)dispatch_assume_zero(_pthread_workloop_destroy((uint64_t)dwl));
+	}
+#endif // TARGET_OS_MAC
+	_dispatch_workloop_attributes_dispose(dwl);
+	_dispatch_queue_dispose(dwl, allow_free);
+}
+
+void
+_dispatch_workloop_activate(dispatch_workloop_t dwl)
+{
+	uint64_t dq_state = os_atomic_and_orig2o(dwl, dq_state,
+			~DISPATCH_QUEUE_INACTIVE, relaxed);
+
+	if (likely(dq_state & DISPATCH_QUEUE_INACTIVE)) {
+		if (dwl->dwl_attr) {
+			// Activation of a workloop with attributes forces us to create
+			// the workloop up front and register the attributes with the
+			// kernel.
+			_dispatch_workloop_activate_attributes(dwl);
+		}
+		if (!dwl->dq_priority) {
+			dwl->dq_priority =
+					_dispatch_priority_make_fallback(DISPATCH_QOS_DEFAULT);
+		}
+		dwl->dq_priority |= DISPATCH_PRIORITY_FLAG_OVERCOMMIT;
+		os_atomic_and2o(dwl, dq_state, ~DISPATCH_QUEUE_NEEDS_ACTIVATION,
+				relaxed);
+		_dispatch_workloop_wakeup(dwl, 0, DISPATCH_WAKEUP_CONSUME_2);
+		return;
+	}
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline bool
+_dispatch_workloop_try_lower_max_qos(dispatch_workloop_t dwl,
+		dispatch_qos_t qos)
+{
+	uint64_t old_state, new_state, qos_bits = _dq_state_from_qos(qos);
+
+	os_atomic_rmw_loop2o(dwl, dq_state, old_state, new_state, relaxed, {
+		if ((old_state & DISPATCH_QUEUE_MAX_QOS_MASK) <= qos_bits) {
+			os_atomic_rmw_loop_give_up(return true);
+		}
+
+		if (unlikely(_dq_state_is_dirty(old_state))) {
+			os_atomic_rmw_loop_give_up({
+				os_atomic_xor2o(dwl, dq_state, DISPATCH_QUEUE_DIRTY, acquire);
+				return false;
+			});
+		}
+
+		new_state  = old_state;
+		new_state &= ~DISPATCH_QUEUE_MAX_QOS_MASK;
+		new_state |= qos_bits;
+	});
+
+#if DISPATCH_USE_KEVENT_WORKQUEUE
+	dispatch_deferred_items_t ddi = _dispatch_deferred_items_get();
+	if (likely(ddi)) {
+		ddi->ddi_wlh_needs_update = true;
+		_dispatch_return_to_kernel();
+	}
+#endif // DISPATCH_USE_KEVENT_WORKQUEUE
+	return true;
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline dispatch_queue_wakeup_target_t
+_dispatch_workloop_invoke2(dispatch_workloop_t dwl,
+		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags,
+		uint64_t *owned)
+{
+	dispatch_thread_frame_s dtf;
+	struct dispatch_object_s *dc = NULL, *next_dc;
+
+	_dispatch_thread_frame_push(&dtf, dwl);
+
+	for (;;) {
+		dispatch_qos_t qos;
+		for (qos = DISPATCH_QOS_MAX; qos >= DISPATCH_QOS_MIN; qos--) {
+			if (!_dispatch_workloop_looks_empty(dwl, qos)) break;
+		}
+		if (qos < DISPATCH_QOS_MIN) {
+			break;
+		}
+		if (unlikely(!_dispatch_workloop_try_lower_max_qos(dwl, qos))) {
+			continue;
+		}
+		dwl->dwl_drained_qos = (uint8_t)qos;
+
+		dc = _dispatch_workloop_get_head(dwl, qos);
+		do {
+			if (_dispatch_object_is_sync_waiter(dc)) {
+				dic->dic_barrier_waiter_bucket = qos;
+				dic->dic_barrier_waiter = dc;
+				dwl->dwl_drained_qos = DISPATCH_QOS_UNSPECIFIED;
+				goto out_with_barrier_waiter;
+			}
+			next_dc = _dispatch_workloop_pop_head(dwl, qos, dc);
+			if (unlikely(_dispatch_needs_to_return_to_kernel())) {
+				_dispatch_return_to_kernel();
+			}
+
+			_dispatch_continuation_pop_inline(dc, dic, flags, dwl);
+			qos = dwl->dwl_drained_qos;
+		} while ((dc = next_dc) && (_dispatch_queue_max_qos(dwl) <= qos));
+	}
+
+	*owned = (*owned & DISPATCH_QUEUE_ENQUEUED) +
+			DISPATCH_QUEUE_IN_BARRIER + DISPATCH_QUEUE_WIDTH_INTERVAL;
+	_dispatch_thread_frame_pop(&dtf);
+	return NULL;
+
+out_with_barrier_waiter:
+	_dispatch_thread_frame_pop(&dtf);
+	return dwl->do_targetq;
+}
+
+void
+_dispatch_workloop_invoke(dispatch_workloop_t dwl,
+		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags)
+{
+	flags &= ~(dispatch_invoke_flags_t)DISPATCH_INVOKE_REDIRECTING_DRAIN;
+	flags |= DISPATCH_INVOKE_WORKLOOP_DRAIN;
+	_dispatch_queue_class_invoke(dwl, dic, flags, 0,_dispatch_workloop_invoke2);
+}
+
+DISPATCH_ALWAYS_INLINE
+static bool
+_dispatch_workloop_probe(dispatch_workloop_t dwl)
+{
+	dispatch_qos_t qos;
+	for (qos = DISPATCH_QOS_MAX; qos >= DISPATCH_QOS_MIN; qos--) {
+		if (!_dispatch_workloop_looks_empty(dwl, qos)) return true;
+	}
+	return false;
+}
+
+DISPATCH_NOINLINE
+static void
+_dispatch_workloop_drain_barrier_waiter(dispatch_workloop_t dwl,
+		struct dispatch_object_s *dc, dispatch_qos_t qos,
+		dispatch_wakeup_flags_t flags, uint64_t enqueued_bits)
+{
+	dispatch_sync_context_t dsc = (dispatch_sync_context_t)dc;
+	uint64_t next_owner = 0, old_state, new_state;
+	bool has_more_work;
+
+	next_owner = _dispatch_lock_value_from_tid(dsc->dsc_waiter);
+	has_more_work = (_dispatch_workloop_pop_head(dwl, qos, dc) != NULL);
+
+transfer_lock_again:
+	if (!has_more_work) {
+		has_more_work = _dispatch_workloop_probe(dwl);
+	}
+
+	os_atomic_rmw_loop2o(dwl, dq_state, old_state, new_state, release, {
+		new_state  = old_state;
+		new_state &= ~DISPATCH_QUEUE_DRAIN_UNLOCK_MASK;
+		new_state &= ~DISPATCH_QUEUE_DIRTY;
+		new_state |= next_owner;
+
+		if (likely(_dq_state_is_base_wlh(old_state))) {
+			new_state |= DISPATCH_QUEUE_SYNC_TRANSFER;
+			if (has_more_work) {
+				// we know there's a next item, keep the enqueued bit if any
+			} else if (unlikely(_dq_state_is_dirty(old_state))) {
+				os_atomic_rmw_loop_give_up({
+					os_atomic_xor2o(dwl, dq_state, DISPATCH_QUEUE_DIRTY, acquire);
+					goto transfer_lock_again;
+				});
+			} else {
+				new_state &= ~DISPATCH_QUEUE_MAX_QOS_MASK;
+				new_state &= ~DISPATCH_QUEUE_ENQUEUED;
+			}
+		} else {
+			new_state -= enqueued_bits;
+		}
+	});
+
+	return _dispatch_barrier_waiter_redirect_or_wake(dwl, dc, flags,
+			old_state, new_state);
+}
+
+static void
+_dispatch_workloop_barrier_complete(dispatch_workloop_t dwl, dispatch_qos_t qos,
+		dispatch_wakeup_flags_t flags)
+{
+	dispatch_queue_wakeup_target_t target = DISPATCH_QUEUE_WAKEUP_NONE;
+	dispatch_qos_t wl_qos;
+
+again:
+	for (wl_qos = DISPATCH_QOS_MAX; wl_qos >= DISPATCH_QOS_MIN; wl_qos--) {
+		struct dispatch_object_s *dc;
+
+		if (_dispatch_workloop_looks_empty(dwl, wl_qos)) continue;
+		dc = _dispatch_workloop_get_head(dwl, wl_qos);
+
+		if (_dispatch_object_is_waiter(dc)) {
+			return _dispatch_workloop_drain_barrier_waiter(dwl, dc, wl_qos,
+					flags, 0);
+		}
+
+		// We have work to do, we need to wake up
+		target = DISPATCH_QUEUE_WAKEUP_TARGET;
+	}
+
+	if (unlikely(target && !(flags & DISPATCH_WAKEUP_CONSUME_2))) {
+		_dispatch_retain_2(dwl);
+		flags |= DISPATCH_WAKEUP_CONSUME_2;
+	}
+
+	uint64_t old_state, new_state;
+
+	os_atomic_rmw_loop2o(dwl, dq_state, old_state, new_state, release, {
+		new_state  = _dq_state_merge_qos(old_state, qos);
+		new_state -= DISPATCH_QUEUE_IN_BARRIER;
+		new_state -= DISPATCH_QUEUE_WIDTH_INTERVAL;
+		new_state &= ~DISPATCH_QUEUE_DRAIN_UNLOCK_MASK;
+		if (target) {
+			new_state |= DISPATCH_QUEUE_ENQUEUED;
+		} else if (unlikely(_dq_state_is_dirty(old_state))) {
+			os_atomic_rmw_loop_give_up({
+				// just renew the drain lock with an acquire barrier, to see
+				// what the enqueuer that set DIRTY has done.
+				// the xor generates better assembly as DISPATCH_QUEUE_DIRTY
+				// is already in a register
+				os_atomic_xor2o(dwl, dq_state, DISPATCH_QUEUE_DIRTY, acquire);
+				goto again;
+			});
+		} else if (likely(_dq_state_is_base_wlh(old_state))) {
+			new_state &= ~DISPATCH_QUEUE_MAX_QOS_MASK;
+			new_state &= ~DISPATCH_QUEUE_ENQUEUED;
+		} else {
+			new_state &= ~DISPATCH_QUEUE_MAX_QOS_MASK;
+		}
+	});
+	dispatch_assert(_dq_state_drain_locked_by_self(old_state));
+	dispatch_assert(!_dq_state_is_enqueued_on_manager(old_state));
+
+	if (_dq_state_is_enqueued(new_state)) {
+		_dispatch_trace_runtime_event(sync_async_handoff, dwl, 0);
+	}
+
+#if DISPATCH_USE_KEVENT_WORKLOOP
+	if (_dq_state_is_base_wlh(old_state)) {
+		// - Only non-"du_is_direct" sources & mach channels can be enqueued
+		//   on the manager.
+		//
+		// - Only dispatch_source_cancel_and_wait() and
+		//   dispatch_source_set_*_handler() use the barrier complete codepath,
+		//   none of which are used by mach channels.
+		//
+		// Hence no source-ish object can both be a workloop and need to use the
+		// manager at the same time.
+		dispatch_assert(!_dq_state_is_enqueued_on_manager(new_state));
+		if (_dq_state_is_enqueued_on_target(old_state) ||
+				_dq_state_is_enqueued_on_target(new_state) ||
+				_dq_state_received_sync_wait(old_state) ||
+				_dq_state_in_sync_transfer(old_state)) {
+			return _dispatch_event_loop_end_ownership((dispatch_wlh_t)dwl,
+					old_state, new_state, flags);
+		}
+		_dispatch_event_loop_assert_not_owned((dispatch_wlh_t)dwl);
+		goto done;
+	}
+#endif
+
+	if (_dq_state_received_override(old_state)) {
+		// Ensure that the root queue sees that this thread was overridden.
+		_dispatch_set_basepri_override_qos(_dq_state_max_qos(old_state));
+	}
+
+	if (target) {
+		if (likely((old_state ^ new_state) & DISPATCH_QUEUE_ENQUEUED)) {
+			dispatch_assert(_dq_state_is_enqueued(new_state));
+			dispatch_assert(flags & DISPATCH_WAKEUP_CONSUME_2);
+			return _dispatch_queue_push_queue(dwl->do_targetq, dwl, new_state);
+		}
+#if HAVE_PTHREAD_WORKQUEUE_QOS
+		// <rdar://problem/27694093> when doing sync to async handoff
+		// if the queue received an override we have to forecefully redrive
+		// the same override so that a new stealer is enqueued because
+		// the previous one may be gone already
+		if (_dq_state_should_override(new_state)) {
+			return _dispatch_queue_wakeup_with_override(dwl, new_state, flags);
+		}
+#endif
+	}
+
+#if DISPATCH_USE_KEVENT_WORKLOOP
+done:
+#endif
+	if (flags & DISPATCH_WAKEUP_CONSUME_2) {
+		return _dispatch_release_2_tailcall(dwl);
+	}
+}
 
 #if HAVE_PTHREAD_WORKQUEUE_QOS
+static void
+_dispatch_workloop_stealer_invoke(dispatch_continuation_t dc,
+		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags)
+{
+	uintptr_t dc_flags = DC_FLAG_CONSUME | DC_FLAG_NO_INTROSPECTION;
+	_dispatch_continuation_pop_forwarded(dc, dc_flags, NULL, {
+		dispatch_queue_t dq = dc->dc_data;
+		dx_invoke(dq, dic, flags | DISPATCH_INVOKE_STEALING);
+	});
+}
+
+DISPATCH_NOINLINE
+static void
+_dispatch_workloop_push_stealer(dispatch_workloop_t dwl, dispatch_queue_t dq,
+		dispatch_qos_t qos)
+{
+	dispatch_continuation_t dc = _dispatch_continuation_alloc();
+
+	dc->do_vtable = DC_VTABLE(WORKLOOP_STEALING);
+	_dispatch_retain_2(dq);
+	dc->dc_func = NULL;
+	dc->dc_ctxt = dc;
+	dc->dc_other = NULL;
+	dc->dc_data = dq;
+	dc->dc_priority = DISPATCH_NO_PRIORITY;
+	dc->dc_voucher = DISPATCH_NO_VOUCHER;
+	_dispatch_workloop_push(dwl, dc, qos);
+}
+#endif // HAVE_PTHREAD_WORKQUEUE_QOS
+
 void
+_dispatch_workloop_wakeup(dispatch_workloop_t dwl, dispatch_qos_t qos,
+		dispatch_wakeup_flags_t flags)
+{
+	if (unlikely(flags & DISPATCH_WAKEUP_BARRIER_COMPLETE)) {
+		return _dispatch_workloop_barrier_complete(dwl, qos, flags);
+	}
+
+	if (unlikely(!(flags & DISPATCH_WAKEUP_CONSUME_2))) {
+		DISPATCH_INTERNAL_CRASH(flags, "Invalid way to wake up a workloop");
+	}
+
+	if (unlikely(flags & DISPATCH_WAKEUP_BLOCK_WAIT)) {
+		goto done;
+	}
+
+	uint64_t old_state, new_state;
+
+	os_atomic_rmw_loop2o(dwl, dq_state, old_state, new_state, release, {
+		new_state = _dq_state_merge_qos(old_state, qos);
+		if (_dq_state_max_qos(new_state)) {
+			new_state |= DISPATCH_QUEUE_ENQUEUED;
+		}
+		if (flags & DISPATCH_WAKEUP_MAKE_DIRTY) {
+			new_state |= DISPATCH_QUEUE_DIRTY;
+		} else if (new_state == old_state) {
+			os_atomic_rmw_loop_give_up(goto done);
+		}
+	});
+
+	if (unlikely(_dq_state_is_suspended(old_state))) {
+#if DISPATCH_SIZEOF_PTR == 4
+		old_state >>= 32;
+#endif
+		DISPATCH_CLIENT_CRASH(old_state, "Waking up an inactive workloop");
+	}
+	if (likely((old_state ^ new_state) & DISPATCH_QUEUE_ENQUEUED)) {
+		return _dispatch_queue_push_queue(dwl->do_targetq, dwl, new_state);
+	}
+#if HAVE_PTHREAD_WORKQUEUE_QOS
+	if (likely((old_state ^ new_state) & DISPATCH_QUEUE_MAX_QOS_MASK)) {
+		return _dispatch_queue_wakeup_with_override(dwl, new_state, flags);
+	}
+#endif // HAVE_PTHREAD_WORKQUEUE_QOS
+done:
+	return _dispatch_release_2_tailcall(dwl);
+}
+
+DISPATCH_NOINLINE
+static void
+_dispatch_workloop_push_waiter(dispatch_workloop_t dwl,
+		dispatch_sync_context_t dsc, dispatch_qos_t qos)
+{
+	struct dispatch_object_s *prev, *dc = (struct dispatch_object_s *)dsc;
+
+	dispatch_priority_t p = _dispatch_priority_from_pp(dsc->dc_priority);
+	if (qos < _dispatch_priority_qos(p)) {
+		qos = _dispatch_priority_qos(p);
+	}
+	if (qos == DISPATCH_QOS_UNSPECIFIED) {
+		qos = DISPATCH_QOS_DEFAULT;
+	}
+
+	prev = _dispatch_workloop_push_update_tail(dwl, qos, dc);
+	_dispatch_workloop_push_update_prev(dwl, qos, prev, dc);
+	if (likely(!os_mpsc_push_was_empty(prev))) return;
+
+	uint64_t set_owner_and_set_full_width_and_in_barrier =
+			_dispatch_lock_value_for_self() |
+			DISPATCH_QUEUE_WIDTH_FULL_BIT | DISPATCH_QUEUE_IN_BARRIER;
+	uint64_t old_state, new_state;
+
+	os_atomic_rmw_loop2o(dwl, dq_state, old_state, new_state, release, {
+		new_state  = _dq_state_merge_qos(old_state, qos);
+		new_state |= DISPATCH_QUEUE_DIRTY;
+		if (unlikely(_dq_state_drain_locked(old_state))) {
+			// not runnable, so we should just handle overrides
+		} else if (_dq_state_is_enqueued(old_state)) {
+			// 32123779 let the event thread redrive since it's out already
+		} else {
+			// see _dispatch_queue_drain_try_lock
+			new_state &= DISPATCH_QUEUE_DRAIN_PRESERVED_BITS_MASK;
+			new_state |= set_owner_and_set_full_width_and_in_barrier;
+		}
+	});
+
+	dsc->dsc_wlh_was_first = (dsc->dsc_waiter == _dispatch_tid_self());
+
+	if ((old_state ^ new_state) & DISPATCH_QUEUE_IN_BARRIER) {
+		return _dispatch_workloop_barrier_complete(dwl, qos, 0);
+	}
+#if HAVE_PTHREAD_WORKQUEUE_QOS
+	if (unlikely((old_state ^ new_state) & DISPATCH_QUEUE_MAX_QOS_MASK)) {
+		if (_dq_state_should_override(new_state)) {
+			return _dispatch_queue_wakeup_with_override(dwl, new_state, 0);
+		}
+	}
+#endif // HAVE_PTHREAD_WORKQUEUE_QOS
+}
+
+void
+_dispatch_workloop_push(dispatch_workloop_t dwl, dispatch_object_t dou,
+		dispatch_qos_t qos)
+{
+	struct dispatch_object_s *prev;
+
+	if (unlikely(_dispatch_object_is_waiter(dou))) {
+		return _dispatch_workloop_push_waiter(dwl, dou._dsc, qos);
+	}
+
+	if (qos < _dispatch_priority_qos(dwl->dq_priority)) {
+		qos = _dispatch_priority_qos(dwl->dq_priority);
+	}
+	if (qos == DISPATCH_QOS_UNSPECIFIED) {
+		qos = _dispatch_priority_fallback_qos(dwl->dq_priority);
+	}
+	prev = _dispatch_workloop_push_update_tail(dwl, qos, dou._do);
+	if (unlikely(os_mpsc_push_was_empty(prev))) {
+		_dispatch_retain_2_unsafe(dwl);
+	}
+	_dispatch_workloop_push_update_prev(dwl, qos, prev, dou._do);
+	if (unlikely(os_mpsc_push_was_empty(prev))) {
+		return _dispatch_workloop_wakeup(dwl, qos, DISPATCH_WAKEUP_CONSUME_2 |
+				DISPATCH_WAKEUP_MAKE_DIRTY);
+	}
+}
+
+#pragma mark -
+#pragma mark dispatch queue/lane push & wakeup
+
+#if HAVE_PTHREAD_WORKQUEUE_QOS
+static void
 _dispatch_queue_override_invoke(dispatch_continuation_t dc,
 		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags)
 {
 	dispatch_queue_t old_rq = _dispatch_queue_get_current();
-	dispatch_queue_t assumed_rq = dc->dc_other;
+	dispatch_queue_global_t assumed_rq = dc->dc_other;
 	dispatch_priority_t old_dp;
-	voucher_t ov = DISPATCH_NO_VOUCHER;
 	dispatch_object_t dou;
+	uintptr_t dc_flags = DC_FLAG_CONSUME;
 
 	dou._do = dc->dc_data;
 	old_dp = _dispatch_root_queue_identity_assume(assumed_rq);
 	if (dc_type(dc) == DISPATCH_CONTINUATION_TYPE(OVERRIDE_STEALING)) {
 		flags |= DISPATCH_INVOKE_STEALING;
-	} else {
-		// balance the fake continuation push in
-		// _dispatch_root_queue_push_override
-		_dispatch_trace_continuation_pop(assumed_rq, dou._do);
+		dc_flags |= DC_FLAG_NO_INTROSPECTION;
 	}
-	_dispatch_continuation_pop_forwarded(dc, ov, DISPATCH_OBJ_CONSUME_BIT, {
+	_dispatch_continuation_pop_forwarded(dc, dc_flags, assumed_rq, {
 		if (_dispatch_object_has_vtable(dou._do)) {
-			dx_invoke(dou._do, dic, flags);
+			dx_invoke(dou._dq, dic, flags);
 		} else {
-			_dispatch_continuation_invoke_inline(dou, ov, flags);
+			_dispatch_continuation_invoke_inline(dou, flags, assumed_rq);
 		}
 	});
 	_dispatch_reset_basepri(old_dp);
@@ -5384,33 +4636,25 @@
 
 DISPATCH_ALWAYS_INLINE
 static inline bool
-_dispatch_root_queue_push_needs_override(dispatch_queue_t rq,
+_dispatch_root_queue_push_needs_override(dispatch_queue_global_t rq,
 		dispatch_qos_t qos)
 {
+	dispatch_qos_t fallback = _dispatch_priority_fallback_qos(rq->dq_priority);
+	if (fallback) {
+		return qos && qos != fallback;
+	}
+
 	dispatch_qos_t rqos = _dispatch_priority_qos(rq->dq_priority);
-	bool defaultqueue = rq->dq_priority & DISPATCH_PRIORITY_FLAG_DEFAULTQUEUE;
-
-	if (unlikely(!rqos)) return false;
-
-	return defaultqueue ? qos && qos != rqos : qos > rqos;
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline bool
-_dispatch_root_queue_push_queue_override_needed(dispatch_queue_t rq,
-		dispatch_qos_t qos)
-{
-	// for root queues, the override is the guaranteed minimum override level
-	return qos > _dispatch_priority_override_qos(rq->dq_priority);
+	return rqos && qos > rqos;
 }
 
 DISPATCH_NOINLINE
 static void
-_dispatch_root_queue_push_override(dispatch_queue_t orig_rq,
+_dispatch_root_queue_push_override(dispatch_queue_global_t orig_rq,
 		dispatch_object_t dou, dispatch_qos_t qos)
 {
 	bool overcommit = orig_rq->dq_priority & DISPATCH_PRIORITY_FLAG_OVERCOMMIT;
-	dispatch_queue_t rq = _dispatch_get_root_queue(qos, overcommit);
+	dispatch_queue_global_t rq = _dispatch_get_root_queue(qos, overcommit);
 	dispatch_continuation_t dc = dou._dc;
 
 	if (_dispatch_object_is_redirection(dc)) {
@@ -5420,8 +4664,6 @@
 	} else {
 		dc = _dispatch_continuation_alloc();
 		dc->do_vtable = DC_VTABLE(OVERRIDE_OWNING);
-		// fake that we queued `dou` on `orig_rq` for introspection purposes
-		_dispatch_trace_continuation_push(orig_rq, dou);
 		dc->dc_ctxt = dc;
 		dc->dc_other = orig_rq;
 		dc->dc_data = dou._do;
@@ -5433,11 +4675,11 @@
 
 DISPATCH_NOINLINE
 static void
-_dispatch_root_queue_push_override_stealer(dispatch_queue_t orig_rq,
+_dispatch_root_queue_push_override_stealer(dispatch_queue_global_t orig_rq,
 		dispatch_queue_t dq, dispatch_qos_t qos)
 {
 	bool overcommit = orig_rq->dq_priority & DISPATCH_PRIORITY_FLAG_OVERCOMMIT;
-	dispatch_queue_t rq = _dispatch_get_root_queue(qos, overcommit);
+	dispatch_queue_global_t rq = _dispatch_get_root_queue(qos, overcommit);
 	dispatch_continuation_t dc = _dispatch_continuation_alloc();
 
 	dc->do_vtable = DC_VTABLE(OVERRIDE_STEALING);
@@ -5453,32 +4695,33 @@
 
 DISPATCH_NOINLINE
 static void
-_dispatch_queue_class_wakeup_with_override_slow(dispatch_queue_t dq,
+_dispatch_queue_wakeup_with_override_slow(dispatch_queue_t dq,
 		uint64_t dq_state, dispatch_wakeup_flags_t flags)
 {
 	dispatch_qos_t oqos, qos = _dq_state_max_qos(dq_state);
-	dispatch_queue_t tq;
+	dispatch_queue_t tq = dq->do_targetq;
+	mach_port_t owner;
 	bool locked;
 
 	if (_dq_state_is_base_anon(dq_state)) {
-		mach_port_t owner = _dq_state_drain_owner(dq_state);
-		if (owner) {
+		if (!_dispatch_is_in_root_queues_array(tq)) {
+			// <rdar://problem/40320044> Do not try to override pthread root
+			// queues, it isn't supported and can cause things to run
+			// on the wrong hierarchy if we enqueue a stealer by accident
+			goto out;
+		} else if ((owner = _dq_state_drain_owner(dq_state))) {
 			(void)_dispatch_wqthread_override_start_check_owner(owner, qos,
-				&dq->dq_state_lock);
+					&dq->dq_state_lock);
 			goto out;
 		}
-	}
 
-	tq = dq->do_targetq;
-
-	if (likely(!_dispatch_queue_is_legacy(dq))) {
-		locked = false;
-	} else if (_dispatch_is_in_root_queues_array(tq)) {
 		// avoid locking when we recognize the target queue as a global root
 		// queue it is gross, but is a very common case. The locking isn't
 		// needed because these target queues cannot go away.
 		locked = false;
-	} else if (_dispatch_queue_sidelock_trylock(dq, qos)) {
+	} else if (likely(!_dispatch_queue_is_mutable(dq))) {
+		locked = false;
+	} else if (_dispatch_queue_sidelock_trylock(upcast(dq)._dl, qos)) {
 		// <rdar://problem/17735825> to traverse the tq chain safely we must
 		// lock it to ensure it cannot change
 		locked = true;
@@ -5514,7 +4757,7 @@
 		//      This drainer must have seen the effects of (2) and that guy has
 		//      applied our override. Our job is done.
 		//
-		// - Another instance of _dispatch_queue_class_wakeup_with_override(),
+		// - Another instance of _dispatch_queue_wakeup_with_override_slow(),
 		//   which is fine because trylock leaves a hint that we failed our
 		//   trylock, causing the tryunlock below to fail and reassess whether
 		//   a better override needs to be applied.
@@ -5525,20 +4768,26 @@
 
 apply_again:
 	if (dx_hastypeflag(tq, QUEUE_ROOT)) {
-		if (_dispatch_root_queue_push_queue_override_needed(tq, qos)) {
-			_dispatch_root_queue_push_override_stealer(tq, dq, qos);
+		dispatch_queue_global_t rq = upcast(tq)._dgq;
+		if (qos > _dispatch_priority_qos(rq->dq_priority)) {
+			_dispatch_root_queue_push_override_stealer(rq, dq, qos);
 		}
+	} else if (dx_metatype(tq) == _DISPATCH_WORKLOOP_TYPE) {
+		_dispatch_workloop_push_stealer(upcast(tq)._dwl, dq, qos);
 	} else if (_dispatch_queue_need_override(tq, qos)) {
 		dx_wakeup(tq, qos, 0);
 	}
-	while (unlikely(locked && !_dispatch_queue_sidelock_tryunlock(dq))) {
+	if (likely(!locked)) {
+		goto out;
+	}
+	while (unlikely(!_dispatch_queue_sidelock_tryunlock(upcast(dq)._dl))) {
 		// rdar://problem/24081326
 		//
-		// Another instance of _dispatch_queue_class_wakeup_with_override()
-		// tried to acquire the side lock while we were running, and could have
+		// Another instance of _dispatch_queue_wakeup_with_override() tried
+		// to acquire the side lock while we were running, and could have
 		// had a better override than ours to apply.
 		//
-		oqos = _dq_state_max_qos(os_atomic_load2o(dq, dq_state, relaxed));
+		oqos = _dispatch_queue_max_qos(dq);
 		if (oqos > qos) {
 			qos = oqos;
 			// The other instance had a better priority than ours, override
@@ -5554,87 +4803,30 @@
 	}
 }
 
-
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_queue_class_wakeup_with_override(dispatch_queue_t dq,
+_dispatch_queue_wakeup_with_override(dispatch_queue_class_t dq,
 		uint64_t dq_state, dispatch_wakeup_flags_t flags)
 {
 	dispatch_assert(_dq_state_should_override(dq_state));
 
-	return _dispatch_queue_class_wakeup_with_override_slow(dq, dq_state, flags);
+#if DISPATCH_USE_KEVENT_WORKLOOP
+	if (likely(_dq_state_is_base_wlh(dq_state))) {
+		_dispatch_trace_runtime_event(worker_request, dq._dq, 1);
+		return _dispatch_event_loop_poke((dispatch_wlh_t)dq._dq, dq_state,
+				flags | DISPATCH_EVENT_LOOP_OVERRIDE);
+	}
+#endif // DISPATCH_USE_KEVENT_WORKLOOP
+	return _dispatch_queue_wakeup_with_override_slow(dq._dq, dq_state, flags);
 }
 #endif // HAVE_PTHREAD_WORKQUEUE_QOS
 
 DISPATCH_NOINLINE
 void
-_dispatch_root_queue_push(dispatch_queue_t rq, dispatch_object_t dou,
-		dispatch_qos_t qos)
-{
-#if DISPATCH_USE_KEVENT_WORKQUEUE
-	dispatch_deferred_items_t ddi = _dispatch_deferred_items_get();
-	if (unlikely(ddi && ddi->ddi_can_stash)) {
-		dispatch_object_t old_dou = ddi->ddi_stashed_dou;
-		dispatch_priority_t rq_overcommit;
-		rq_overcommit = rq->dq_priority & DISPATCH_PRIORITY_FLAG_OVERCOMMIT;
-
-		if (likely(!old_dou._do || rq_overcommit)) {
-			dispatch_queue_t old_rq = ddi->ddi_stashed_rq;
-			dispatch_qos_t old_qos = ddi->ddi_stashed_qos;
-			ddi->ddi_stashed_rq = rq;
-			ddi->ddi_stashed_dou = dou;
-			ddi->ddi_stashed_qos = qos;
-			_dispatch_debug("deferring item %p, rq %p, qos %d",
-					dou._do, rq, qos);
-			if (rq_overcommit) {
-				ddi->ddi_can_stash = false;
-			}
-			if (likely(!old_dou._do)) {
-				return;
-			}
-			// push the previously stashed item
-			qos = old_qos;
-			rq = old_rq;
-			dou = old_dou;
-		}
-	}
-#endif
-#if HAVE_PTHREAD_WORKQUEUE_QOS
-	if (_dispatch_root_queue_push_needs_override(rq, qos)) {
-		return _dispatch_root_queue_push_override(rq, dou, qos);
-	}
-#else
-	(void)qos;
-#endif
-	_dispatch_root_queue_push_inline(rq, dou, dou, 1);
-}
-
-void
-_dispatch_root_queue_wakeup(dispatch_queue_t dq,
-		DISPATCH_UNUSED dispatch_qos_t qos, dispatch_wakeup_flags_t flags)
-{
-	if (!(flags & DISPATCH_WAKEUP_BLOCK_WAIT)) {
-		DISPATCH_INTERNAL_CRASH(dq->dq_priority,
-				"Don't try to wake up or override a root queue");
-	}
-	if (flags & DISPATCH_WAKEUP_CONSUME_2) {
-		return _dispatch_release_2_tailcall(dq);
-	}
-}
-
-DISPATCH_NOINLINE
-void
-_dispatch_queue_push(dispatch_queue_t dq, dispatch_object_t dou,
-		dispatch_qos_t qos)
-{
-	_dispatch_queue_push_inline(dq, dou, qos);
-}
-
-DISPATCH_NOINLINE
-void
-_dispatch_queue_class_wakeup(dispatch_queue_t dq, dispatch_qos_t qos,
+_dispatch_queue_wakeup(dispatch_queue_class_t dqu, dispatch_qos_t qos,
 		dispatch_wakeup_flags_t flags, dispatch_queue_wakeup_target_t target)
 {
+	dispatch_queue_t dq = dqu._dq;
 	dispatch_assert(target != DISPATCH_QUEUE_WAKEUP_WAIT_FOR_EVENT);
 
 	if (target && !(flags & DISPATCH_WAKEUP_CONSUME_2)) {
@@ -5644,18 +4836,19 @@
 
 	if (unlikely(flags & DISPATCH_WAKEUP_BARRIER_COMPLETE)) {
 		//
-		// _dispatch_queue_class_barrier_complete() is about what both regular
+		// _dispatch_lane_class_barrier_complete() is about what both regular
 		// queues and sources needs to evaluate, but the former can have sync
-		// handoffs to perform which _dispatch_queue_class_barrier_complete()
-		// doesn't handle, only _dispatch_queue_barrier_complete() does.
+		// handoffs to perform which _dispatch_lane_class_barrier_complete()
+		// doesn't handle, only _dispatch_lane_barrier_complete() does.
 		//
-		// _dispatch_queue_wakeup() is the one for plain queues that calls
-		// _dispatch_queue_barrier_complete(), and this is only taken for non
+		// _dispatch_lane_wakeup() is the one for plain queues that calls
+		// _dispatch_lane_barrier_complete(), and this is only taken for non
 		// queue types.
 		//
-		dispatch_assert(dx_metatype(dq) != _DISPATCH_QUEUE_TYPE);
-		return _dispatch_queue_class_barrier_complete(dq, qos, flags, target,
-				DISPATCH_QUEUE_SERIAL_DRAIN_OWNED);
+		dispatch_assert(dx_metatype(dq) == _DISPATCH_SOURCE_TYPE);
+		qos = _dispatch_queue_wakeup_qos(dq, qos);
+		return _dispatch_lane_class_barrier_complete(upcast(dq)._dl, qos,
+				flags, target, DISPATCH_QUEUE_SERIAL_DRAIN_OWNED);
 	}
 
 	if (target) {
@@ -5663,7 +4856,7 @@
 		if (target == DISPATCH_QUEUE_WAKEUP_MGR) {
 			enqueue = DISPATCH_QUEUE_ENQUEUED_ON_MGR;
 		}
-		qos = _dispatch_queue_override_qos(dq, qos);
+		qos = _dispatch_queue_wakeup_qos(dq, qos);
 		os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, release, {
 			new_state = _dq_state_merge_qos(old_state, qos);
 			if (likely(!_dq_state_is_suspended(old_state) &&
@@ -5701,7 +4894,7 @@
 #if HAVE_PTHREAD_WORKQUEUE_QOS
 		if (unlikely((old_state ^ new_state) & DISPATCH_QUEUE_MAX_QOS_MASK)) {
 			if (_dq_state_should_override(new_state)) {
-				return _dispatch_queue_class_wakeup_with_override(dq, new_state,
+				return _dispatch_queue_wakeup_with_override(dq, new_state,
 						flags);
 			}
 		}
@@ -5721,8 +4914,7 @@
 			}
 		});
 		if (_dq_state_should_override(new_state)) {
-			return _dispatch_queue_class_wakeup_with_override(dq, new_state,
-					flags);
+			return _dispatch_queue_wakeup_with_override(dq, new_state, flags);
 		}
 #endif // HAVE_PTHREAD_WORKQUEUE_QOS
 	}
@@ -5733,24 +4925,50 @@
 }
 
 DISPATCH_NOINLINE
+void
+_dispatch_lane_wakeup(dispatch_lane_class_t dqu, dispatch_qos_t qos,
+		dispatch_wakeup_flags_t flags)
+{
+	dispatch_queue_wakeup_target_t target = DISPATCH_QUEUE_WAKEUP_NONE;
+
+	if (unlikely(flags & DISPATCH_WAKEUP_BARRIER_COMPLETE)) {
+		return _dispatch_lane_barrier_complete(dqu, qos, flags);
+	}
+	if (_dispatch_queue_class_probe(dqu)) {
+		target = DISPATCH_QUEUE_WAKEUP_TARGET;
+	}
+	return _dispatch_queue_wakeup(dqu, qos, flags, target);
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline bool
+_dispatch_lane_push_waiter_should_wakeup(dispatch_lane_t dq,
+		dispatch_sync_context_t dsc)
+{
+	if (_dispatch_queue_is_thread_bound(dq)) {
+		return true;
+	}
+	if (dsc->dc_flags & DC_FLAG_ASYNC_AND_WAIT) {
+		uint64_t dq_state = os_atomic_load2o(dq, dq_state, relaxed);
+		return _dispatch_async_and_wait_should_always_async(dq, dq_state);
+	}
+	return false;
+}
+
+DISPATCH_NOINLINE
 static void
-_dispatch_queue_push_sync_waiter(dispatch_queue_t dq,
-		dispatch_sync_context_t dsc, dispatch_qos_t qos)
+_dispatch_lane_push_waiter(dispatch_lane_t dq, dispatch_sync_context_t dsc,
+		dispatch_qos_t qos)
 {
 	uint64_t old_state, new_state;
 
-	if (unlikely(dx_type(dq) == DISPATCH_QUEUE_NETWORK_EVENT_TYPE)) {
-		DISPATCH_CLIENT_CRASH(0,
-				"dispatch_sync onto a network event queue");
+	if (dsc->dc_data != DISPATCH_WLH_ANON) {
+		// The kernel will handle all the overrides / priorities on our behalf.
+		qos = 0;
 	}
 
-	_dispatch_trace_continuation_push(dq, dsc->_as_dc);
-
-	if (unlikely(_dispatch_queue_push_update_tail(dq, dsc->_as_do))) {
-		// for slow waiters, we borrow the reference of the caller
-		// so we don't need to protect the wakeup with a temporary retain
-		_dispatch_queue_push_update_head(dq, dsc->_as_do);
-		if (unlikely(_dispatch_queue_is_thread_bound(dq))) {
+	if (unlikely(_dispatch_queue_push_item(dq, dsc))) {
+		if (unlikely(_dispatch_lane_push_waiter_should_wakeup(dq, dsc))) {
 			return dx_wakeup(dq, qos, DISPATCH_WAKEUP_MAKE_DIRTY);
 		}
 
@@ -5759,7 +4977,6 @@
 		uint64_t set_owner_and_set_full_width_and_in_barrier =
 				_dispatch_lock_value_for_self() |
 				DISPATCH_QUEUE_WIDTH_FULL_BIT | DISPATCH_QUEUE_IN_BARRIER;
-		// similar to _dispatch_queue_drain_try_unlock()
 		os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, release, {
 			new_state  = _dq_state_merge_qos(old_state, qos);
 			new_state |= DISPATCH_QUEUE_DIRTY;
@@ -5778,19 +4995,17 @@
 			}
 		});
 
-		if (_dq_state_is_base_wlh(old_state) &&
-				(dsc->dsc_waiter == _dispatch_tid_self())) {
-			dsc->dsc_wlh_was_first = true;
+		if (_dq_state_is_base_wlh(old_state)) {
+			dsc->dsc_wlh_was_first = (dsc->dsc_waiter == _dispatch_tid_self());
 		}
 
 		if ((old_state ^ new_state) & DISPATCH_QUEUE_IN_BARRIER) {
-			return _dispatch_queue_barrier_complete(dq, qos, 0);
+			return _dispatch_lane_barrier_complete(dq, qos, 0);
 		}
 #if HAVE_PTHREAD_WORKQUEUE_QOS
 		if (unlikely((old_state ^ new_state) & DISPATCH_QUEUE_MAX_QOS_MASK)) {
 			if (_dq_state_should_override(new_state)) {
-				return _dispatch_queue_class_wakeup_with_override(dq,
-						new_state, 0);
+				return _dispatch_queue_wakeup_with_override(dq, new_state, 0);
 			}
 		}
 	} else if (unlikely(qos)) {
@@ -5801,39 +5016,861 @@
 			}
 		});
 		if (_dq_state_should_override(new_state)) {
-			return _dispatch_queue_class_wakeup_with_override(dq, new_state, 0);
+			return _dispatch_queue_wakeup_with_override(dq, new_state, 0);
 		}
 #endif // HAVE_PTHREAD_WORKQUEUE_QOS
 	}
 }
 
+DISPATCH_NOINLINE
+void
+_dispatch_lane_push(dispatch_lane_t dq, dispatch_object_t dou,
+		dispatch_qos_t qos)
+{
+	dispatch_wakeup_flags_t flags = 0;
+	struct dispatch_object_s *prev;
+
+	if (unlikely(_dispatch_object_is_waiter(dou))) {
+		return _dispatch_lane_push_waiter(dq, dou._dsc, qos);
+	}
+
+	dispatch_assert(!_dispatch_object_is_global(dq));
+	qos = _dispatch_queue_push_qos(dq, qos);
+
+	// If we are going to call dx_wakeup(), the queue must be retained before
+	// the item we're pushing can be dequeued, which means:
+	// - before we exchange the tail if we have to override
+	// - before we set the head if we made the queue non empty.
+	// Otherwise, if preempted between one of these and the call to dx_wakeup()
+	// the blocks submitted to the queue may release the last reference to the
+	// queue when invoked by _dispatch_lane_drain. <rdar://problem/6932776>
+
+	prev = os_mpsc_push_update_tail(os_mpsc(dq, dq_items), dou._do, do_next);
+	if (unlikely(os_mpsc_push_was_empty(prev))) {
+		_dispatch_retain_2_unsafe(dq);
+		flags = DISPATCH_WAKEUP_CONSUME_2 | DISPATCH_WAKEUP_MAKE_DIRTY;
+	} else if (unlikely(_dispatch_queue_need_override(dq, qos))) {
+		// There's a race here, _dispatch_queue_need_override may read a stale
+		// dq_state value.
+		//
+		// If it's a stale load from the same drain streak, given that
+		// the max qos is monotonic, too old a read can only cause an
+		// unnecessary attempt at overriding which is harmless.
+		//
+		// We'll assume here that a stale load from an a previous drain streak
+		// never happens in practice.
+		_dispatch_retain_2_unsafe(dq);
+		flags = DISPATCH_WAKEUP_CONSUME_2;
+	}
+	os_mpsc_push_update_prev(os_mpsc(dq, dq_items), prev, dou._do, do_next);
+	if (flags) {
+		return dx_wakeup(dq, qos, flags);
+	}
+}
+
+DISPATCH_NOINLINE
+void
+_dispatch_lane_concurrent_push(dispatch_lane_t dq, dispatch_object_t dou,
+		dispatch_qos_t qos)
+{
+	// <rdar://problem/24738102&24743140> reserving non barrier width
+	// doesn't fail if only the ENQUEUED bit is set (unlike its barrier
+	// width equivalent), so we have to check that this thread hasn't
+	// enqueued anything ahead of this call or we can break ordering
+	if (dq->dq_items_tail == NULL &&
+			!_dispatch_object_is_waiter(dou) &&
+			!_dispatch_object_is_barrier(dou) &&
+			_dispatch_queue_try_acquire_async(dq)) {
+		return _dispatch_continuation_redirect_push(dq, dou, qos);
+	}
+
+	_dispatch_lane_push(dq, dou, qos);
+}
+
 #pragma mark -
-#pragma mark dispatch_root_queue_drain
+#pragma mark dispatch_mgr_queue
+
+#if DISPATCH_USE_PTHREAD_ROOT_QUEUES || DISPATCH_USE_KEVENT_WORKQUEUE
+struct _dispatch_mgr_sched_s {
+	volatile int prio;
+	volatile qos_class_t qos;
+	int default_prio;
+	int policy;
+#if defined(_WIN32)
+	HANDLE hThread;
+#else
+	pthread_t tid;
+#endif
+};
+
+DISPATCH_STATIC_GLOBAL(struct _dispatch_mgr_sched_s _dispatch_mgr_sched);
+DISPATCH_STATIC_GLOBAL(dispatch_once_t _dispatch_mgr_sched_pred);
+
+#if HAVE_PTHREAD_WORKQUEUE_QOS
+// TODO: switch to "event-reflector thread" property <rdar://problem/18126138>
+// Must be kept in sync with list of qos classes in sys/qos.h
+static int
+_dispatch_mgr_sched_qos2prio(qos_class_t qos)
+{
+	switch (qos) {
+	case QOS_CLASS_MAINTENANCE: return 4;
+	case QOS_CLASS_BACKGROUND: return 4;
+	case QOS_CLASS_UTILITY: return 20;
+	case QOS_CLASS_DEFAULT: return 31;
+	case QOS_CLASS_USER_INITIATED: return 37;
+	case QOS_CLASS_USER_INTERACTIVE: return 47;
+	}
+	return 0;
+}
+#endif // HAVE_PTHREAD_WORKQUEUE_QOS
+
+static void
+_dispatch_mgr_sched_init(void *ctxt DISPATCH_UNUSED)
+{
+#if !defined(_WIN32)
+	struct sched_param param;
+#if DISPATCH_USE_MGR_THREAD && DISPATCH_USE_PTHREAD_ROOT_QUEUES
+	dispatch_pthread_root_queue_context_t pqc = _dispatch_mgr_root_queue.do_ctxt;
+	pthread_attr_t *attr = &pqc->dpq_thread_attr;
+#else
+	pthread_attr_t a, *attr = &a;
+#endif
+	(void)dispatch_assume_zero(pthread_attr_init(attr));
+	(void)dispatch_assume_zero(pthread_attr_getschedpolicy(attr,
+			&_dispatch_mgr_sched.policy));
+	(void)dispatch_assume_zero(pthread_attr_getschedparam(attr, &param));
+#if HAVE_PTHREAD_WORKQUEUE_QOS
+	qos_class_t qos = qos_class_main();
+	if (qos == QOS_CLASS_DEFAULT) {
+		qos = QOS_CLASS_USER_INITIATED; // rdar://problem/17279292
+	}
+	if (qos) {
+		_dispatch_mgr_sched.qos = qos;
+		param.sched_priority = _dispatch_mgr_sched_qos2prio(qos);
+	}
+#endif
+	_dispatch_mgr_sched.default_prio = param.sched_priority;
+#else // defined(_WIN32)
+	_dispatch_mgr_sched.policy = 0;
+	_dispatch_mgr_sched.default_prio = THREAD_PRIORITY_NORMAL;
+#endif // defined(_WIN32)
+	_dispatch_mgr_sched.prio = _dispatch_mgr_sched.default_prio;
+}
+#endif // DISPATCH_USE_PTHREAD_ROOT_QUEUES || DISPATCH_USE_KEVENT_WORKQUEUE
+
+#if DISPATCH_USE_PTHREAD_ROOT_QUEUES
+#if DISPATCH_USE_MGR_THREAD
+#if !defined(_WIN32)
+DISPATCH_NOINLINE
+static pthread_t *
+_dispatch_mgr_root_queue_init(void)
+{
+	dispatch_once_f(&_dispatch_mgr_sched_pred, NULL, _dispatch_mgr_sched_init);
+	dispatch_pthread_root_queue_context_t pqc = _dispatch_mgr_root_queue.do_ctxt;
+	pthread_attr_t *attr = &pqc->dpq_thread_attr;
+	struct sched_param param;
+	(void)dispatch_assume_zero(pthread_attr_setdetachstate(attr,
+			PTHREAD_CREATE_DETACHED));
+#if !DISPATCH_DEBUG
+	(void)dispatch_assume_zero(pthread_attr_setstacksize(attr, 64 * 1024));
+#endif
+#if HAVE_PTHREAD_WORKQUEUE_QOS
+	qos_class_t qos = _dispatch_mgr_sched.qos;
+	if (qos) {
+		if (_dispatch_set_qos_class_enabled) {
+			(void)dispatch_assume_zero(pthread_attr_set_qos_class_np(attr,
+					qos, 0));
+		}
+	}
+#endif
+	param.sched_priority = _dispatch_mgr_sched.prio;
+	if (param.sched_priority > _dispatch_mgr_sched.default_prio) {
+		(void)dispatch_assume_zero(pthread_attr_setschedparam(attr, &param));
+	}
+	return &_dispatch_mgr_sched.tid;
+}
+#else // defined(_WIN32)
+DISPATCH_NOINLINE
+static PHANDLE
+_dispatch_mgr_root_queue_init(void)
+{
+	dispatch_once_f(&_dispatch_mgr_sched_pred, NULL, _dispatch_mgr_sched_init);
+	return &_dispatch_mgr_sched.hThread;
+}
+#endif // defined(_WIN32)
+
+static inline void
+_dispatch_mgr_priority_apply(void)
+{
+#if !defined(_WIN32)
+	struct sched_param param;
+	do {
+		param.sched_priority = _dispatch_mgr_sched.prio;
+		if (param.sched_priority > _dispatch_mgr_sched.default_prio) {
+			(void)dispatch_assume_zero(pthread_setschedparam(
+					_dispatch_mgr_sched.tid, _dispatch_mgr_sched.policy,
+					&param));
+		}
+	} while (_dispatch_mgr_sched.prio > param.sched_priority);
+#else // defined(_WIN32)
+	int nPriority = _dispatch_mgr_sched.prio;
+	do {
+		if (nPriority > _dispatch_mgr_sched.default_prio) {
+			// TODO(compnerd) set thread scheduling policy
+			dispatch_assume_zero(SetThreadPriority(_dispatch_mgr_sched.hThread, nPriority));
+			nPriority = GetThreadPriority(_dispatch_mgr_sched.hThread);
+		}
+	} while (_dispatch_mgr_sched.prio > nPriority);
+#endif // defined(_WIN32)
+}
+
+DISPATCH_NOINLINE
+static void
+_dispatch_mgr_priority_init(void)
+{
+#if !defined(_WIN32)
+	dispatch_pthread_root_queue_context_t pqc = _dispatch_mgr_root_queue.do_ctxt;
+	pthread_attr_t *attr = &pqc->dpq_thread_attr;
+	struct sched_param param;
+	(void)dispatch_assume_zero(pthread_attr_getschedparam(attr, &param));
+#if HAVE_PTHREAD_WORKQUEUE_QOS
+	qos_class_t qos = 0;
+	(void)pthread_attr_get_qos_class_np(attr, &qos, NULL);
+	if (_dispatch_mgr_sched.qos > qos && _dispatch_set_qos_class_enabled) {
+		(void)pthread_set_qos_class_self_np(_dispatch_mgr_sched.qos, 0);
+		int p = _dispatch_mgr_sched_qos2prio(_dispatch_mgr_sched.qos);
+		if (p > param.sched_priority) {
+			param.sched_priority = p;
+		}
+	}
+#endif
+	if (unlikely(_dispatch_mgr_sched.prio > param.sched_priority)) {
+		return _dispatch_mgr_priority_apply();
+	}
+#else // defined(_WIN32)
+	int nPriority = GetThreadPriority(_dispatch_mgr_sched.hThread);
+	if (slowpath(_dispatch_mgr_sched.prio > nPriority)) {
+		return _dispatch_mgr_priority_apply();
+	}
+#endif // defined(_WIN32)
+}
+#endif // DISPATCH_USE_MGR_THREAD
+
+#if !defined(_WIN32)
+DISPATCH_NOINLINE
+static void
+_dispatch_mgr_priority_raise(const pthread_attr_t *attr)
+{
+	dispatch_once_f(&_dispatch_mgr_sched_pred, NULL, _dispatch_mgr_sched_init);
+	struct sched_param param;
+	(void)dispatch_assume_zero(pthread_attr_getschedparam(attr, &param));
+#if HAVE_PTHREAD_WORKQUEUE_QOS
+	qos_class_t q, qos = 0;
+	(void)pthread_attr_get_qos_class_np((pthread_attr_t *)attr, &qos, NULL);
+	if (qos) {
+		param.sched_priority = _dispatch_mgr_sched_qos2prio(qos);
+		os_atomic_rmw_loop2o(&_dispatch_mgr_sched, qos, q, qos, relaxed, {
+			if (q >= qos) os_atomic_rmw_loop_give_up(break);
+		});
+	}
+#endif
+	int p, prio = param.sched_priority;
+	os_atomic_rmw_loop2o(&_dispatch_mgr_sched, prio, p, prio, relaxed, {
+		if (p >= prio) os_atomic_rmw_loop_give_up(return);
+	});
+#if DISPATCH_USE_KEVENT_WORKQUEUE
+	_dispatch_root_queues_init();
+	if (_dispatch_kevent_workqueue_enabled) {
+		pthread_priority_t pp = 0;
+		if (prio > _dispatch_mgr_sched.default_prio) {
+			// The values of _PTHREAD_PRIORITY_SCHED_PRI_FLAG and
+			// _PTHREAD_PRIORITY_ROOTQUEUE_FLAG overlap, but that is not
+			// problematic in this case, since it the second one is only ever
+			// used on dq_priority fields.
+			// We never pass the _PTHREAD_PRIORITY_ROOTQUEUE_FLAG to a syscall,
+			// it is meaningful to libdispatch only.
+			pp = (pthread_priority_t)prio | _PTHREAD_PRIORITY_SCHED_PRI_FLAG;
+		} else if (qos) {
+			pp = _pthread_qos_class_encode(qos, 0, 0);
+		}
+		if (pp) {
+			int r = _pthread_workqueue_set_event_manager_priority(pp);
+			(void)dispatch_assume_zero(r);
+		}
+		return;
+	}
+#endif
+#if DISPATCH_USE_MGR_THREAD
+	if (_dispatch_mgr_sched.tid) {
+		return _dispatch_mgr_priority_apply();
+	}
+#endif
+}
+#endif // !defined(_WIN32)
+#endif // DISPATCH_USE_PTHREAD_ROOT_QUEUES
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_queue_mgr_lock(struct dispatch_queue_static_s *dq)
+{
+	uint64_t old_state, new_state, set_owner_and_set_full_width =
+			_dispatch_lock_value_for_self() | DISPATCH_QUEUE_SERIAL_DRAIN_OWNED;
+
+	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, acquire, {
+		new_state = old_state;
+		if (unlikely(!_dq_state_is_runnable(old_state) ||
+				_dq_state_drain_locked(old_state))) {
+			DISPATCH_INTERNAL_CRASH((uintptr_t)old_state,
+					"Locking the manager should not fail");
+		}
+		new_state &= DISPATCH_QUEUE_DRAIN_PRESERVED_BITS_MASK;
+		new_state |= set_owner_and_set_full_width;
+	});
+}
+
+#if DISPATCH_USE_KEVENT_WORKQUEUE
+DISPATCH_ALWAYS_INLINE
+static inline bool
+_dispatch_queue_mgr_unlock(struct dispatch_queue_static_s *dq)
+{
+	uint64_t old_state, new_state;
+	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, release, {
+		new_state = old_state - DISPATCH_QUEUE_SERIAL_DRAIN_OWNED;
+		new_state &= ~DISPATCH_QUEUE_DRAIN_UNLOCK_MASK;
+		new_state &= ~DISPATCH_QUEUE_MAX_QOS_MASK;
+	});
+	return _dq_state_is_dirty(old_state);
+}
+#endif // DISPATCH_USE_KEVENT_WORKQUEUE
+
+static void
+_dispatch_mgr_queue_drain(void)
+{
+	const dispatch_invoke_flags_t flags = DISPATCH_INVOKE_MANAGER_DRAIN;
+	dispatch_invoke_context_s dic = { };
+	struct dispatch_queue_static_s *dq = &_dispatch_mgr_q;
+	uint64_t owned = DISPATCH_QUEUE_SERIAL_DRAIN_OWNED;
+
+	if (dq->dq_items_tail) {
+		_dispatch_perfmon_start();
+		_dispatch_set_basepri_override_qos(DISPATCH_QOS_SATURATED);
+		if (unlikely(_dispatch_lane_serial_drain(dq, &dic, flags, &owned))) {
+			DISPATCH_INTERNAL_CRASH(0, "Interrupted drain on manager queue");
+		}
+		_dispatch_voucher_debug("mgr queue clear", NULL);
+		_voucher_clear();
+		_dispatch_reset_basepri_override();
+		_dispatch_perfmon_end(perfmon_thread_manager);
+	}
+
+#if DISPATCH_USE_KEVENT_WORKQUEUE
+	if (!_dispatch_kevent_workqueue_enabled)
+#endif
+	{
+		_dispatch_force_cache_cleanup();
+	}
+}
+
+void
+_dispatch_mgr_queue_push(dispatch_lane_t dq, dispatch_object_t dou,
+		DISPATCH_UNUSED dispatch_qos_t qos)
+{
+	uint64_t dq_state;
+
+	if (unlikely(_dispatch_object_is_waiter(dou))) {
+		DISPATCH_CLIENT_CRASH(0, "Waiter pushed onto manager");
+	}
+
+	if (unlikely(_dispatch_queue_push_item(dq, dou))) {
+		dq_state = os_atomic_or2o(dq, dq_state, DISPATCH_QUEUE_DIRTY, release);
+		if (!_dq_state_drain_locked_by_self(dq_state)) {
+			_dispatch_trace_runtime_event(worker_request, &_dispatch_mgr_q, 1);
+			_dispatch_event_loop_poke(DISPATCH_WLH_MANAGER, 0, 0);
+		}
+	}
+}
+
+DISPATCH_NORETURN
+void
+_dispatch_mgr_queue_wakeup(DISPATCH_UNUSED dispatch_lane_t dq,
+		DISPATCH_UNUSED dispatch_qos_t qos,
+		DISPATCH_UNUSED dispatch_wakeup_flags_t flags)
+{
+	DISPATCH_INTERNAL_CRASH(0, "Don't try to wake up or override the manager");
+}
+
+#if DISPATCH_USE_MGR_THREAD
+DISPATCH_NOINLINE DISPATCH_NORETURN
+static void
+_dispatch_mgr_invoke(void)
+{
+#if DISPATCH_EVENT_BACKEND_KEVENT
+	dispatch_kevent_s evbuf[DISPATCH_DEFERRED_ITEMS_EVENT_COUNT];
+#endif
+	dispatch_deferred_items_s ddi = {
+		.ddi_wlh = DISPATCH_WLH_ANON,
+#if DISPATCH_EVENT_BACKEND_KEVENT
+		.ddi_maxevents = DISPATCH_DEFERRED_ITEMS_EVENT_COUNT,
+		.ddi_eventlist = evbuf,
+#endif
+	};
+
+	_dispatch_deferred_items_set(&ddi);
+	for (;;) {
+		bool poll = false;
+		_dispatch_mgr_queue_drain();
+		_dispatch_event_loop_drain_anon_timers();
+		poll = _dispatch_queue_class_probe(&_dispatch_mgr_q);
+		_dispatch_event_loop_drain(poll ? KEVENT_FLAG_IMMEDIATE : 0);
+	}
+}
+
+DISPATCH_NORETURN
+void
+_dispatch_mgr_thread(dispatch_lane_t dq DISPATCH_UNUSED,
+		dispatch_invoke_context_t dic DISPATCH_UNUSED,
+		dispatch_invoke_flags_t flags DISPATCH_UNUSED)
+{
+#if DISPATCH_USE_KEVENT_WORKQUEUE
+	if (_dispatch_kevent_workqueue_enabled) {
+		DISPATCH_INTERNAL_CRASH(0, "Manager queue invoked with "
+				"kevent workqueue enabled");
+	}
+#endif
+	_dispatch_queue_set_current(&_dispatch_mgr_q);
+#if DISPATCH_USE_PTHREAD_ROOT_QUEUES
+	_dispatch_mgr_priority_init();
+#endif
+	_dispatch_queue_mgr_lock(&_dispatch_mgr_q);
+	// never returns, so burn bridges behind us & clear stack 2k ahead
+	_dispatch_clear_stack(2048);
+	_dispatch_mgr_invoke();
+}
+#endif // DISPATCH_USE_MGR_THREAD
+
+#if DISPATCH_USE_KEVENT_WORKQUEUE
+
+dispatch_static_assert(WORKQ_KEVENT_EVENT_BUFFER_LEN >=
+		DISPATCH_DEFERRED_ITEMS_EVENT_COUNT,
+		"our list should not be longer than the kernel's");
+
+static void _dispatch_root_queue_drain_deferred_item(
+		dispatch_deferred_items_t ddi DISPATCH_PERF_MON_ARGS_PROTO);
+static void _dispatch_root_queue_drain_deferred_wlh(
+		dispatch_deferred_items_t ddi DISPATCH_PERF_MON_ARGS_PROTO);
+
+void
+_dispatch_kevent_workqueue_init(void)
+{
+	// Initialize kevent workqueue support
+	_dispatch_root_queues_init();
+	if (!_dispatch_kevent_workqueue_enabled) return;
+	dispatch_once_f(&_dispatch_mgr_sched_pred, NULL, _dispatch_mgr_sched_init);
+	qos_class_t qos = _dispatch_mgr_sched.qos;
+	int prio = _dispatch_mgr_sched.prio;
+	pthread_priority_t pp = 0;
+	if (qos) {
+		pp = _pthread_qos_class_encode(qos, 0, 0);
+	}
+	if (prio > _dispatch_mgr_sched.default_prio) {
+		pp = (pthread_priority_t)prio | _PTHREAD_PRIORITY_SCHED_PRI_FLAG;
+	}
+	if (pp) {
+		int r = _pthread_workqueue_set_event_manager_priority(pp);
+		(void)dispatch_assume_zero(r);
+	}
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline bool
+_dispatch_wlh_worker_thread_init(dispatch_deferred_items_t ddi)
+{
+	dispatch_assert(ddi->ddi_wlh);
+
+	pthread_priority_t pp = _dispatch_get_priority();
+	if (!(pp & _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG)) {
+		// If this thread does not have the event manager flag set, don't setup
+		// as the dispatch manager and let the caller know to only process
+		// the delivered events.
+		//
+		// Also add the NEEDS_UNBIND flag so that
+		// _dispatch_priority_compute_update knows it has to unbind
+		pp &= _PTHREAD_PRIORITY_OVERCOMMIT_FLAG | ~_PTHREAD_PRIORITY_FLAGS_MASK;
+		if (ddi->ddi_wlh == DISPATCH_WLH_ANON) {
+			pp |= _PTHREAD_PRIORITY_NEEDS_UNBIND_FLAG;
+		} else {
+			// pthread sets the flag when it is an event delivery thread
+			// so we need to explicitly clear it
+			pp &= ~(pthread_priority_t)_PTHREAD_PRIORITY_NEEDS_UNBIND_FLAG;
+		}
+		_dispatch_thread_setspecific(dispatch_priority_key,
+				(void *)(uintptr_t)pp);
+		if (ddi->ddi_wlh != DISPATCH_WLH_ANON) {
+			_dispatch_debug("wlh[%p]: handling events", ddi->ddi_wlh);
+		} else {
+			ddi->ddi_can_stash = true;
+		}
+		return false;
+	}
+
+	if ((pp & _PTHREAD_PRIORITY_SCHED_PRI_FLAG) ||
+			!(pp & ~_PTHREAD_PRIORITY_FLAGS_MASK)) {
+		// When the phtread kext is delivering kevents to us, and pthread
+		// root queues are in use, then the pthread priority TSD is set
+		// to a sched pri with the _PTHREAD_PRIORITY_SCHED_PRI_FLAG bit set.
+		//
+		// Given that this isn't a valid QoS we need to fixup the TSD,
+		// and the best option is to clear the qos/priority bits which tells
+		// us to not do any QoS related calls on this thread.
+		//
+		// However, in that case the manager thread is opted out of QoS,
+		// as far as pthread is concerned, and can't be turned into
+		// something else, so we can't stash.
+		pp &= (pthread_priority_t)_PTHREAD_PRIORITY_FLAGS_MASK;
+	}
+	// Managers always park without mutating to a regular worker thread, and
+	// hence never need to unbind from userland, and when draining a manager,
+	// the NEEDS_UNBIND flag would cause the mutation to happen.
+	// So we need to strip this flag
+	pp &= ~(pthread_priority_t)_PTHREAD_PRIORITY_NEEDS_UNBIND_FLAG;
+	_dispatch_thread_setspecific(dispatch_priority_key, (void *)(uintptr_t)pp);
+
+	// ensure kevents registered from this thread are registered at manager QoS
+	_dispatch_init_basepri_wlh(DISPATCH_PRIORITY_FLAG_MANAGER);
+	_dispatch_queue_set_current(&_dispatch_mgr_q);
+	_dispatch_queue_mgr_lock(&_dispatch_mgr_q);
+	return true;
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_wlh_worker_thread_reset(void)
+{
+	bool needs_poll = _dispatch_queue_mgr_unlock(&_dispatch_mgr_q);
+	_dispatch_clear_basepri();
+	_dispatch_queue_set_current(NULL);
+	if (needs_poll) {
+		_dispatch_trace_runtime_event(worker_request, &_dispatch_mgr_q, 1);
+		_dispatch_event_loop_poke(DISPATCH_WLH_MANAGER, 0, 0);
+	}
+}
+
+DISPATCH_ALWAYS_INLINE
+static void
+_dispatch_wlh_worker_thread(dispatch_wlh_t wlh, dispatch_kevent_t events,
+		int *nevents)
+{
+	_dispatch_introspection_thread_add();
+
+	DISPATCH_PERF_MON_VAR_INIT
+
+	dispatch_deferred_items_s ddi = {
+		.ddi_wlh = wlh,
+		.ddi_eventlist = events,
+	};
+	bool is_manager;
+
+	is_manager = _dispatch_wlh_worker_thread_init(&ddi);
+	if (!is_manager) {
+		_dispatch_trace_runtime_event(worker_event_delivery,
+				wlh == DISPATCH_WLH_ANON ? NULL : wlh, (uint64_t)*nevents);
+		_dispatch_perfmon_start_impl(true);
+	} else {
+		_dispatch_trace_runtime_event(worker_event_delivery,
+				&_dispatch_mgr_q, (uint64_t)*nevents);
+		ddi.ddi_wlh = DISPATCH_WLH_ANON;
+	}
+	_dispatch_deferred_items_set(&ddi);
+	_dispatch_event_loop_merge(events, *nevents);
+
+	if (is_manager) {
+		_dispatch_trace_runtime_event(worker_unpark, &_dispatch_mgr_q, 0);
+		_dispatch_mgr_queue_drain();
+		_dispatch_event_loop_drain_anon_timers();
+		_dispatch_wlh_worker_thread_reset();
+	} else if (ddi.ddi_stashed_dou._do) {
+		_dispatch_debug("wlh[%p]: draining deferred item %p", ddi.ddi_wlh,
+				ddi.ddi_stashed_dou._do);
+		if (ddi.ddi_wlh == DISPATCH_WLH_ANON) {
+			dispatch_assert(ddi.ddi_nevents == 0);
+			_dispatch_deferred_items_set(NULL);
+			_dispatch_trace_runtime_event(worker_unpark, ddi.ddi_stashed_rq, 0);
+			_dispatch_root_queue_drain_deferred_item(&ddi
+					DISPATCH_PERF_MON_ARGS);
+		} else {
+			_dispatch_trace_runtime_event(worker_unpark, wlh, 0);
+			_dispatch_root_queue_drain_deferred_wlh(&ddi
+					DISPATCH_PERF_MON_ARGS);
+		}
+	}
+
+	_dispatch_deferred_items_set(NULL);
+	if (!is_manager && !ddi.ddi_stashed_dou._do) {
+		_dispatch_perfmon_end(perfmon_thread_event_no_steal);
+	}
+	_dispatch_debug("returning %d deferred kevents", ddi.ddi_nevents);
+	_dispatch_clear_return_to_kernel();
+	*nevents = ddi.ddi_nevents;
+
+	_dispatch_trace_runtime_event(worker_park, NULL, 0);
+}
+
+DISPATCH_NOINLINE
+static void
+_dispatch_kevent_worker_thread(dispatch_kevent_t *events, int *nevents)
+{
+	if (!events || !nevents) {
+		// events for worker thread request have already been delivered earlier
+		return;
+	}
+	if (!dispatch_assume(*nevents && *events)) return;
+	_dispatch_adopt_wlh_anon();
+	_dispatch_wlh_worker_thread(DISPATCH_WLH_ANON, *events, nevents);
+	_dispatch_reset_wlh();
+}
+
+#if DISPATCH_USE_KEVENT_WORKLOOP
+DISPATCH_NOINLINE
+static void
+_dispatch_workloop_worker_thread(uint64_t *workloop_id,
+		dispatch_kevent_t *events, int *nevents)
+{
+	if (!workloop_id || !dispatch_assume(*workloop_id != 0)) {
+		return _dispatch_kevent_worker_thread(events, nevents);
+	}
+	if (!events || !nevents) {
+		// events for worker thread request have already been delivered earlier
+		return;
+	}
+	if (!dispatch_assume(*nevents && *events)) return;
+	dispatch_wlh_t wlh = (dispatch_wlh_t)*workloop_id;
+	_dispatch_adopt_wlh(wlh);
+	_dispatch_wlh_worker_thread(wlh, *events, nevents);
+	_dispatch_preserve_wlh_storage_reference(wlh);
+}
+#endif // DISPATCH_USE_KEVENT_WORKLOOP
+#endif // DISPATCH_USE_KEVENT_WORKQUEUE
+#pragma mark -
+#pragma mark dispatch_root_queue
+
+#if DISPATCH_USE_PTHREAD_POOL
+static void *_dispatch_worker_thread(void *context);
+#if defined(_WIN32)
+static unsigned WINAPI _dispatch_worker_thread_thunk(LPVOID lpParameter);
+#endif
+#endif // DISPATCH_USE_PTHREAD_POOL
+
+#if DISPATCH_DEBUG && DISPATCH_ROOT_QUEUE_DEBUG
+#define _dispatch_root_queue_debug(...) _dispatch_debug(__VA_ARGS__)
+static void
+_dispatch_debug_root_queue(dispatch_queue_class_t dqu, const char *str)
+{
+	if (likely(dqu._dq)) {
+		_dispatch_object_debug(dqu._dq, "%s", str);
+	} else {
+		_dispatch_log("queue[NULL]: %s", str);
+	}
+}
+#else
+#define _dispatch_root_queue_debug(...)
+#define _dispatch_debug_root_queue(...)
+#endif // DISPATCH_DEBUG && DISPATCH_ROOT_QUEUE_DEBUG
+
+DISPATCH_NOINLINE
+static void
+_dispatch_root_queue_poke_slow(dispatch_queue_global_t dq, int n, int floor)
+{
+	int remaining = n;
+#if !defined(_WIN32)
+	int r = ENOSYS;
+#endif
+
+	_dispatch_root_queues_init();
+	_dispatch_debug_root_queue(dq, __func__);
+	_dispatch_trace_runtime_event(worker_request, dq, (uint64_t)n);
+
+#if !DISPATCH_USE_INTERNAL_WORKQUEUE
+#if DISPATCH_USE_PTHREAD_ROOT_QUEUES
+	if (dx_type(dq) == DISPATCH_QUEUE_GLOBAL_ROOT_TYPE)
+#endif
+	{
+		_dispatch_root_queue_debug("requesting new worker thread for global "
+				"queue: %p", dq);
+		r = _pthread_workqueue_addthreads(remaining,
+				_dispatch_priority_to_pp_prefer_fallback(dq->dq_priority));
+		(void)dispatch_assume_zero(r);
+		return;
+	}
+#endif // !DISPATCH_USE_INTERNAL_WORKQUEUE
+#if DISPATCH_USE_PTHREAD_POOL
+	dispatch_pthread_root_queue_context_t pqc = dq->do_ctxt;
+	if (likely(pqc->dpq_thread_mediator.do_vtable)) {
+		while (dispatch_semaphore_signal(&pqc->dpq_thread_mediator)) {
+			_dispatch_root_queue_debug("signaled sleeping worker for "
+					"global queue: %p", dq);
+			if (!--remaining) {
+				return;
+			}
+		}
+	}
+
+	bool overcommit = dq->dq_priority & DISPATCH_PRIORITY_FLAG_OVERCOMMIT;
+	if (overcommit) {
+		os_atomic_add2o(dq, dgq_pending, remaining, relaxed);
+	} else {
+		if (!os_atomic_cmpxchg2o(dq, dgq_pending, 0, remaining, relaxed)) {
+			_dispatch_root_queue_debug("worker thread request still pending for "
+					"global queue: %p", dq);
+			return;
+		}
+	}
+
+	int can_request, t_count;
+	// seq_cst with atomic store to tail <rdar://problem/16932833>
+	t_count = os_atomic_load2o(dq, dgq_thread_pool_size, ordered);
+	do {
+		can_request = t_count < floor ? 0 : t_count - floor;
+		if (remaining > can_request) {
+			_dispatch_root_queue_debug("pthread pool reducing request from %d to %d",
+					remaining, can_request);
+			os_atomic_sub2o(dq, dgq_pending, remaining - can_request, relaxed);
+			remaining = can_request;
+		}
+		if (remaining == 0) {
+			_dispatch_root_queue_debug("pthread pool is full for root queue: "
+					"%p", dq);
+			return;
+		}
+	} while (!os_atomic_cmpxchgvw2o(dq, dgq_thread_pool_size, t_count,
+			t_count - remaining, &t_count, acquire));
+
+#if !defined(_WIN32)
+	pthread_attr_t *attr = &pqc->dpq_thread_attr;
+	pthread_t tid, *pthr = &tid;
+#if DISPATCH_USE_MGR_THREAD && DISPATCH_USE_PTHREAD_ROOT_QUEUES
+	if (unlikely(dq == &_dispatch_mgr_root_queue)) {
+		pthr = _dispatch_mgr_root_queue_init();
+	}
+#endif
+	do {
+		_dispatch_retain(dq); // released in _dispatch_worker_thread
+		while ((r = pthread_create(pthr, attr, _dispatch_worker_thread, dq))) {
+			if (r != EAGAIN) {
+				(void)dispatch_assume_zero(r);
+			}
+			_dispatch_temporary_resource_shortage();
+		}
+	} while (--remaining);
+#else // defined(_WIN32)
+#if DISPATCH_USE_MGR_THREAD && DISPATCH_USE_PTHREAD_ROOT_QUEUES
+	if (unlikely(dq == &_dispatch_mgr_root_queue)) {
+		_dispatch_mgr_root_queue_init();
+	}
+#endif
+	do {
+		_dispatch_retain(dq); // released in _dispatch_worker_thread
+#if DISPATCH_DEBUG
+		unsigned dwStackSize = 0;
+#else
+		unsigned dwStackSize = 64 * 1024;
+#endif
+		uintptr_t hThread = 0;
+		while (!(hThread = _beginthreadex(NULL, dwStackSize, _dispatch_worker_thread_thunk, dq, STACK_SIZE_PARAM_IS_A_RESERVATION, NULL))) {
+			if (errno != EAGAIN) {
+				(void)dispatch_assume(hThread);
+			}
+			_dispatch_temporary_resource_shortage();
+		}
+#if DISPATCH_USE_PTHREAD_ROOT_QUEUES
+		if (_dispatch_mgr_sched.prio > _dispatch_mgr_sched.default_prio) {
+			(void)dispatch_assume_zero(SetThreadPriority((HANDLE)hThread, _dispatch_mgr_sched.prio) == TRUE);
+		}
+#endif
+		CloseHandle((HANDLE)hThread);
+	} while (--remaining);
+#endif // defined(_WIN32)
+#else
+	(void)floor;
+#endif // DISPATCH_USE_PTHREAD_POOL
+}
+
+DISPATCH_NOINLINE
+void
+_dispatch_root_queue_poke(dispatch_queue_global_t dq, int n, int floor)
+{
+	if (!_dispatch_queue_class_probe(dq)) {
+		return;
+	}
+#if !DISPATCH_USE_INTERNAL_WORKQUEUE
+#if DISPATCH_USE_PTHREAD_POOL
+	if (likely(dx_type(dq) == DISPATCH_QUEUE_GLOBAL_ROOT_TYPE))
+#endif
+	{
+		if (unlikely(!os_atomic_cmpxchg2o(dq, dgq_pending, 0, n, relaxed))) {
+			_dispatch_root_queue_debug("worker thread request still pending "
+					"for global queue: %p", dq);
+			return;
+		}
+	}
+#endif // !DISPATCH_USE_INTERNAL_WORKQUEUE
+	return _dispatch_root_queue_poke_slow(dq, n, floor);
+}
+
+#define DISPATCH_ROOT_QUEUE_MEDIATOR ((struct dispatch_object_s *)~0ul)
+
+enum {
+	DISPATCH_ROOT_QUEUE_DRAIN_WAIT,
+	DISPATCH_ROOT_QUEUE_DRAIN_READY,
+	DISPATCH_ROOT_QUEUE_DRAIN_ABORT,
+};
+
+static int
+_dispatch_root_queue_mediator_is_gone(dispatch_queue_global_t dq)
+{
+	return os_atomic_load2o(dq, dq_items_head, relaxed) !=
+			DISPATCH_ROOT_QUEUE_MEDIATOR;
+}
+
+static int
+_dispatch_root_queue_head_tail_quiesced(dispatch_queue_global_t dq)
+{
+	// Wait for queue head and tail to be both non-empty or both empty
+	struct dispatch_object_s *head, *tail;
+	head = os_atomic_load2o(dq, dq_items_head, relaxed);
+	tail = os_atomic_load2o(dq, dq_items_tail, relaxed);
+	if ((head == NULL) == (tail == NULL)) {
+		if (tail == NULL) { // <rdar://problem/15917893>
+			return DISPATCH_ROOT_QUEUE_DRAIN_ABORT;
+		}
+		return DISPATCH_ROOT_QUEUE_DRAIN_READY;
+	}
+	return DISPATCH_ROOT_QUEUE_DRAIN_WAIT;
+}
 
 DISPATCH_NOINLINE
 static bool
-_dispatch_root_queue_drain_one_slow(dispatch_queue_t dq)
+__DISPATCH_ROOT_QUEUE_CONTENDED_WAIT__(dispatch_queue_global_t dq,
+		int (*predicate)(dispatch_queue_global_t dq))
 {
-	dispatch_root_queue_context_t qc = dq->do_ctxt;
-	struct dispatch_object_s *const mediator = (void *)~0ul;
-	bool pending = false, available = true;
 	unsigned int sleep_time = DISPATCH_CONTENTION_USLEEP_START;
+	int status = DISPATCH_ROOT_QUEUE_DRAIN_READY;
+	bool pending = false;
 
 	do {
 		// Spin for a short while in case the contention is temporary -- e.g.
 		// when starting up after dispatch_apply, or when executing a few
 		// short continuations in a row.
-		if (_dispatch_contention_wait_until(dq->dq_items_head != mediator)) {
+		if (_dispatch_contention_wait_until(status = predicate(dq))) {
 			goto out;
 		}
 		// Since we have serious contention, we need to back off.
 		if (!pending) {
 			// Mark this queue as pending to avoid requests for further threads
-			(void)os_atomic_inc2o(qc, dgq_pending, relaxed);
+			(void)os_atomic_inc2o(dq, dgq_pending, relaxed);
 			pending = true;
 		}
 		_dispatch_contention_usleep(sleep_time);
-		if (fastpath(dq->dq_items_head != mediator)) goto out;
+		if (likely(status = predicate(dq))) goto out;
 		sleep_time *= 2;
 	} while (sleep_time < DISPATCH_CONTENTION_USLEEP_MAX);
 
@@ -5842,57 +5879,49 @@
 	// Create a new pending thread and then exit this thread.
 	// The kernel will grant a new thread when the load subsides.
 	_dispatch_debug("contention on global queue: %p", dq);
-	available = false;
 out:
 	if (pending) {
-		(void)os_atomic_dec2o(qc, dgq_pending, relaxed);
+		(void)os_atomic_dec2o(dq, dgq_pending, relaxed);
 	}
-	if (!available) {
-		_dispatch_global_queue_poke(dq, 1, 0);
+	if (status == DISPATCH_ROOT_QUEUE_DRAIN_WAIT) {
+		_dispatch_root_queue_poke(dq, 1, 0);
 	}
-	return available;
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline bool
-_dispatch_root_queue_drain_one2(dispatch_queue_t dq)
-{
-	// Wait for queue head and tail to be both non-empty or both empty
-	bool available; // <rdar://problem/15917893>
-	_dispatch_wait_until((dq->dq_items_head != NULL) ==
-			(available = (dq->dq_items_tail != NULL)));
-	return available;
+	return status == DISPATCH_ROOT_QUEUE_DRAIN_READY;
 }
 
 DISPATCH_ALWAYS_INLINE_NDEBUG
 static inline struct dispatch_object_s *
-_dispatch_root_queue_drain_one(dispatch_queue_t dq)
+_dispatch_root_queue_drain_one(dispatch_queue_global_t dq)
 {
-	struct dispatch_object_s *head, *next, *const mediator = (void *)~0ul;
+	struct dispatch_object_s *head, *next;
 
 start:
-	// The mediator value acts both as a "lock" and a signal
-	head = os_atomic_xchg2o(dq, dq_items_head, mediator, relaxed);
+	// The MEDIATOR value acts both as a "lock" and a signal
+	head = os_atomic_xchg2o(dq, dq_items_head,
+			DISPATCH_ROOT_QUEUE_MEDIATOR, relaxed);
 
-	if (slowpath(head == NULL)) {
+	if (unlikely(head == NULL)) {
 		// The first xchg on the tail will tell the enqueueing thread that it
 		// is safe to blindly write out to the head pointer. A cmpxchg honors
 		// the algorithm.
-		if (slowpath(!os_atomic_cmpxchg2o(dq, dq_items_head, mediator,
-				NULL, relaxed))) {
+		if (unlikely(!os_atomic_cmpxchg2o(dq, dq_items_head,
+				DISPATCH_ROOT_QUEUE_MEDIATOR, NULL, relaxed))) {
 			goto start;
 		}
-		if (slowpath(dq->dq_items_tail) && // <rdar://problem/14416349>
-				_dispatch_root_queue_drain_one2(dq)) {
-			goto start;
+		if (unlikely(dq->dq_items_tail)) { // <rdar://problem/14416349>
+			if (__DISPATCH_ROOT_QUEUE_CONTENDED_WAIT__(dq,
+					_dispatch_root_queue_head_tail_quiesced)) {
+				goto start;
+			}
 		}
 		_dispatch_root_queue_debug("no work on global queue: %p", dq);
 		return NULL;
 	}
 
-	if (slowpath(head == mediator)) {
+	if (unlikely(head == DISPATCH_ROOT_QUEUE_MEDIATOR)) {
 		// This thread lost the race for ownership of the queue.
-		if (fastpath(_dispatch_root_queue_drain_one_slow(dq))) {
+		if (likely(__DISPATCH_ROOT_QUEUE_CONTENDED_WAIT__(dq,
+				_dispatch_root_queue_mediator_is_gone))) {
 			goto start;
 		}
 		return NULL;
@@ -5900,9 +5929,9 @@
 
 	// Restore the head pointer to a sane value before returning.
 	// If 'next' is NULL, then this item _might_ be the last item.
-	next = fastpath(head->do_next);
+	next = head->do_next;
 
-	if (slowpath(!next)) {
+	if (unlikely(!next)) {
 		os_atomic_store2o(dq, dq_items_head, NULL, relaxed);
 		// 22708742: set tail to NULL with release, so that NULL write to head
 		//           above doesn't clobber head from concurrent enqueuer
@@ -5915,72 +5944,128 @@
 	}
 
 	os_atomic_store2o(dq, dq_items_head, next, relaxed);
-	_dispatch_global_queue_poke(dq, 1, 0);
+	_dispatch_root_queue_poke(dq, 1, 0);
 out:
 	return head;
 }
 
 #if DISPATCH_USE_KEVENT_WORKQUEUE
-void
+static void
 _dispatch_root_queue_drain_deferred_wlh(dispatch_deferred_items_t ddi
 		DISPATCH_PERF_MON_ARGS_PROTO)
 {
-	dispatch_queue_t rq = ddi->ddi_stashed_rq;
+	dispatch_queue_global_t rq = ddi->ddi_stashed_rq;
 	dispatch_queue_t dq = ddi->ddi_stashed_dou._dq;
 	_dispatch_queue_set_current(rq);
-	dispatch_priority_t old_pri = _dispatch_set_basepri_wlh(rq->dq_priority);
+
 	dispatch_invoke_context_s dic = { };
 	dispatch_invoke_flags_t flags = DISPATCH_INVOKE_WORKER_DRAIN |
 			DISPATCH_INVOKE_REDIRECTING_DRAIN | DISPATCH_INVOKE_WLH;
 	_dispatch_queue_drain_init_narrowing_check_deadline(&dic, rq->dq_priority);
 	uint64_t dq_state;
 
+	_dispatch_init_basepri_wlh(rq->dq_priority);
 	ddi->ddi_wlh_servicing = true;
-	if (unlikely(_dispatch_needs_to_return_to_kernel())) {
-		_dispatch_return_to_kernel();
-	}
 retry:
 	dispatch_assert(ddi->ddi_wlh_needs_delete);
-	_dispatch_trace_continuation_pop(rq, dq);
+	_dispatch_trace_item_pop(rq, dq);
 
 	if (_dispatch_queue_drain_try_lock_wlh(dq, &dq_state)) {
 		dx_invoke(dq, &dic, flags);
+#if DISPATCH_USE_KEVENT_WORKLOOP
+		//
+		// dx_invoke() will always return `dq` unlocked or locked by another
+		// thread, and either have consumed the +2 or transferred it to the
+		// other thread.
+		//
+#endif
 		if (!ddi->ddi_wlh_needs_delete) {
+#if DISPATCH_USE_KEVENT_WORKLOOP
+			//
+			// The fate of the workloop thread request has already been dealt
+			// with, which can happen for 4 reasons, for which we just want
+			// to go park and skip trying to unregister the thread request:
+			// - the workloop target has been changed
+			// - the workloop has been re-enqueued because of narrowing
+			// - the workloop has been re-enqueued on the manager queue
+			// - the workloop ownership has been handed off to a sync owner
+			//
+#endif
 			goto park;
 		}
+#if DISPATCH_USE_KEVENT_WORKLOOP
+		//
+		// The workloop has been drained to completion or suspended.
+		// dx_invoke() has cleared the enqueued bit before it returned.
+		//
+		// Since a dispatch_set_target_queue() could occur between the unlock
+		// and our reload of `dq_state` (rdar://32671286) we need to re-assess
+		// the workloop-ness of the queue. If it's not a workloop anymore,
+		// _dispatch_event_loop_leave_immediate() will have handled the kevent
+		// deletion already.
+		//
+		// Then, we check one last time that the queue is still not enqueued,
+		// in which case we attempt to quiesce it.
+		//
+		// If we find it enqueued again, it means someone else has been
+		// enqueuing concurrently and has made a thread request that coalesced
+		// with ours, but since dx_invoke() cleared the enqueued bit,
+		// the other thread didn't realize that and added a +1 ref count.
+		// Take over that +1, and add our own to make the +2 this loop expects,
+		// and drain again.
+		//
+#endif // DISPATCH_USE_KEVENT_WORKLOOP
 		dq_state = os_atomic_load2o(dq, dq_state, relaxed);
 		if (unlikely(!_dq_state_is_base_wlh(dq_state))) { // rdar://32671286
 			goto park;
 		}
 		if (unlikely(_dq_state_is_enqueued_on_target(dq_state))) {
 			_dispatch_retain(dq);
-			_dispatch_trace_continuation_push(dq->do_targetq, dq);
+			_dispatch_trace_item_push(dq->do_targetq, dq);
 			goto retry;
 		}
 	} else {
-		_dispatch_release_no_dispose(dq);
+#if DISPATCH_USE_KEVENT_WORKLOOP
+		//
+		// The workloop enters this function with a +2 refcount, however we
+		// couldn't acquire the lock due to suspension or discovering that
+		// the workloop was locked by a sync owner.
+		//
+		// We need to give up, and _dispatch_event_loop_leave_deferred()
+		// will do a DISPATCH_WORKLOOP_ASYNC_DISCOVER_SYNC transition to
+		// tell the kernel to stop driving this thread request. We leave
+		// a +1 with the thread request, and consume the extra +1 we have.
+		//
+#endif
+		if (_dq_state_is_suspended(dq_state)) {
+			dispatch_assert(!_dq_state_is_enqueued(dq_state));
+			_dispatch_release_2_no_dispose(dq);
+		} else {
+			dispatch_assert(_dq_state_is_enqueued(dq_state));
+			dispatch_assert(_dq_state_drain_locked(dq_state));
+			_dispatch_release_no_dispose(dq);
+		}
 	}
 
-	_dispatch_event_loop_leave_deferred((dispatch_wlh_t)dq, dq_state);
+	_dispatch_event_loop_leave_deferred(ddi, dq_state);
 
 park:
 	// event thread that could steal
 	_dispatch_perfmon_end(perfmon_thread_event_steal);
-	_dispatch_reset_basepri(old_pri);
-	_dispatch_reset_basepri_override();
+	_dispatch_clear_basepri();
 	_dispatch_queue_set_current(NULL);
 
 	_dispatch_voucher_debug("root queue clear", NULL);
 	_dispatch_reset_voucher(NULL, DISPATCH_THREAD_PARK);
 }
 
-void
+static void
 _dispatch_root_queue_drain_deferred_item(dispatch_deferred_items_t ddi
 		DISPATCH_PERF_MON_ARGS_PROTO)
 {
-	dispatch_queue_t rq = ddi->ddi_stashed_rq;
+	dispatch_queue_global_t rq = ddi->ddi_stashed_rq;
 	_dispatch_queue_set_current(rq);
-	dispatch_priority_t old_pri = _dispatch_set_basepri(rq->dq_priority);
+	_dispatch_trace_runtime_event(worker_unpark, NULL, 0);
 
 	dispatch_invoke_context_s dic = { };
 	dispatch_invoke_flags_t flags = DISPATCH_INVOKE_WORKER_DRAIN |
@@ -5989,6 +6074,8 @@
 	_dispatch_last_resort_autorelease_pool_push(&dic);
 #endif // DISPATCH_COCOA_COMPAT
 	_dispatch_queue_drain_init_narrowing_check_deadline(&dic, rq->dq_priority);
+	_dispatch_init_basepri(rq->dq_priority);
+
 	_dispatch_continuation_pop_inline(ddi->ddi_stashed_dou, &dic, flags, rq);
 
 	// event thread that could steal
@@ -5996,8 +6083,7 @@
 #if DISPATCH_COCOA_COMPAT
 	_dispatch_last_resort_autorelease_pool_pop(&dic);
 #endif // DISPATCH_COCOA_COMPAT
-	_dispatch_reset_basepri(old_pri);
-	_dispatch_reset_basepri_override();
+	_dispatch_clear_basepri();
 	_dispatch_queue_set_current(NULL);
 
 	_dispatch_voucher_debug("root queue clear", NULL);
@@ -6007,18 +6093,17 @@
 
 DISPATCH_NOT_TAIL_CALLED // prevent tailcall (for Instrument DTrace probe)
 static void
-_dispatch_root_queue_drain(dispatch_queue_t dq, pthread_priority_t pp)
+_dispatch_root_queue_drain(dispatch_queue_global_t dq,
+		dispatch_priority_t pri, dispatch_invoke_flags_t flags)
 {
 #if DISPATCH_DEBUG
 	dispatch_queue_t cq;
-	if (slowpath(cq = _dispatch_queue_get_current())) {
+	if (unlikely(cq = _dispatch_queue_get_current())) {
 		DISPATCH_INTERNAL_CRASH(cq, "Premature thread recycling");
 	}
 #endif
 	_dispatch_queue_set_current(dq);
-	dispatch_priority_t pri = dq->dq_priority;
-	if (!pri) pri = _dispatch_priority_from_pp(pp);
-	dispatch_priority_t old_dbp = _dispatch_set_basepri(pri);
+	_dispatch_init_basepri(pri);
 	_dispatch_adopt_wlh_anon();
 
 	struct dispatch_object_s *item;
@@ -6027,11 +6112,9 @@
 #if DISPATCH_COCOA_COMPAT
 	_dispatch_last_resort_autorelease_pool_push(&dic);
 #endif // DISPATCH_COCOA_COMPAT
-	dispatch_invoke_flags_t flags = DISPATCH_INVOKE_WORKER_DRAIN |
-			DISPATCH_INVOKE_REDIRECTING_DRAIN;
 	_dispatch_queue_drain_init_narrowing_check_deadline(&dic, pri);
 	_dispatch_perfmon_start();
-	while ((item = fastpath(_dispatch_root_queue_drain_one(dq)))) {
+	while (likely(item = _dispatch_root_queue_drain_one(dq))) {
 		if (reset) _dispatch_wqthread_override_reset();
 		_dispatch_continuation_pop_inline(item, &dic, flags, dq);
 		reset = _dispatch_reset_basepri_override();
@@ -6041,7 +6124,7 @@
 	}
 
 	// overcommit or not. worker thread
-	if (pri & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG) {
+	if (pri & DISPATCH_PRIORITY_FLAG_OVERCOMMIT) {
 		_dispatch_perfmon_end(perfmon_thread_worker_oc);
 	} else {
 		_dispatch_perfmon_end(perfmon_thread_worker_non_oc);
@@ -6051,78 +6134,76 @@
 	_dispatch_last_resort_autorelease_pool_pop(&dic);
 #endif // DISPATCH_COCOA_COMPAT
 	_dispatch_reset_wlh();
-	_dispatch_reset_basepri(old_dbp);
-	_dispatch_reset_basepri_override();
+	_dispatch_clear_basepri();
 	_dispatch_queue_set_current(NULL);
 }
 
-#pragma mark -
-#pragma mark dispatch_worker_thread
-
-#if HAVE_PTHREAD_WORKQUEUES
+#if !DISPATCH_USE_INTERNAL_WORKQUEUE
 static void
-_dispatch_worker_thread4(void *context)
-{
-	dispatch_queue_t dq = context;
-	dispatch_root_queue_context_t qc = dq->do_ctxt;
-
-	_dispatch_introspection_thread_add();
-	int pending = os_atomic_dec2o(qc, dgq_pending, relaxed);
-	dispatch_assert(pending >= 0);
-	_dispatch_root_queue_drain(dq, _dispatch_get_priority());
-	_dispatch_voucher_debug("root queue clear", NULL);
-	_dispatch_reset_voucher(NULL, DISPATCH_THREAD_PARK);
-}
-
-#if HAVE_PTHREAD_WORKQUEUE_QOS
-static void
-_dispatch_worker_thread3(pthread_priority_t pp)
+_dispatch_worker_thread2(pthread_priority_t pp)
 {
 	bool overcommit = pp & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG;
-	dispatch_queue_t dq;
+	dispatch_queue_global_t dq;
+
 	pp &= _PTHREAD_PRIORITY_OVERCOMMIT_FLAG | ~_PTHREAD_PRIORITY_FLAGS_MASK;
 	_dispatch_thread_setspecific(dispatch_priority_key, (void *)(uintptr_t)pp);
 	dq = _dispatch_get_root_queue(_dispatch_qos_from_pp(pp), overcommit);
-	return _dispatch_worker_thread4(dq);
-}
-#endif // HAVE_PTHREAD_WORKQUEUE_QOS
 
-#if DISPATCH_USE_PTHREAD_WORKQUEUE_SETDISPATCH_NP
-// 6618342 Contact the team that owns the Instrument DTrace probe before
-//         renaming this symbol
-static void
-_dispatch_worker_thread2(int priority, int options,
-		void *context DISPATCH_UNUSED)
-{
-	dispatch_assert(priority >= 0 && priority < WORKQ_NUM_PRIOQUEUE);
-	dispatch_assert(!(options & ~WORKQ_ADDTHREADS_OPTION_OVERCOMMIT));
-	dispatch_queue_t dq = _dispatch_wq2root_queues[priority][options];
+	_dispatch_introspection_thread_add();
+	_dispatch_trace_runtime_event(worker_unpark, dq, 0);
 
-	return _dispatch_worker_thread4(dq);
+	int pending = os_atomic_dec2o(dq, dgq_pending, relaxed);
+	dispatch_assert(pending >= 0);
+	_dispatch_root_queue_drain(dq, dq->dq_priority,
+			DISPATCH_INVOKE_WORKER_DRAIN | DISPATCH_INVOKE_REDIRECTING_DRAIN);
+	_dispatch_voucher_debug("root queue clear", NULL);
+	_dispatch_reset_voucher(NULL, DISPATCH_THREAD_PARK);
+	_dispatch_trace_runtime_event(worker_park, NULL, 0);
 }
-#endif // DISPATCH_USE_PTHREAD_WORKQUEUE_SETDISPATCH_NP
-#endif // HAVE_PTHREAD_WORKQUEUES
+#endif // !DISPATCH_USE_INTERNAL_WORKQUEUE
 
 #if DISPATCH_USE_PTHREAD_POOL
+static inline void
+_dispatch_root_queue_init_pthread_pool(dispatch_queue_global_t dq,
+		int pool_size, dispatch_priority_t pri)
+{
+	dispatch_pthread_root_queue_context_t pqc = dq->do_ctxt;
+	int thread_pool_size = DISPATCH_WORKQ_MAX_PTHREAD_COUNT;
+	if (!(pri & DISPATCH_PRIORITY_FLAG_OVERCOMMIT)) {
+		thread_pool_size = (int32_t)dispatch_hw_config(active_cpus);
+	}
+	if (pool_size && pool_size < thread_pool_size) thread_pool_size = pool_size;
+	dq->dgq_thread_pool_size = thread_pool_size;
+	qos_class_t cls = _dispatch_qos_to_qos_class(_dispatch_priority_qos(pri) ?:
+			_dispatch_priority_fallback_qos(pri));
+	if (cls) {
+#if !defined(_WIN32)
+		pthread_attr_t *attr = &pqc->dpq_thread_attr;
+		int r = pthread_attr_init(attr);
+		dispatch_assume_zero(r);
+		r = pthread_attr_setdetachstate(attr, PTHREAD_CREATE_DETACHED);
+		dispatch_assume_zero(r);
+#endif // !defined(_WIN32)
+#if HAVE_PTHREAD_WORKQUEUE_QOS
+		r = pthread_attr_set_qos_class_np(attr, cls, 0);
+		dispatch_assume_zero(r);
+#endif // HAVE_PTHREAD_WORKQUEUE_QOS
+	}
+	_dispatch_sema4_t *sema = &pqc->dpq_thread_mediator.dsema_sema;
+	pqc->dpq_thread_mediator.do_vtable = DISPATCH_VTABLE(semaphore);
+	_dispatch_sema4_init(sema, _DSEMA4_POLICY_LIFO);
+	_dispatch_sema4_create(sema, _DSEMA4_POLICY_LIFO);
+}
+
 // 6618342 Contact the team that owns the Instrument DTrace probe before
 //         renaming this symbol
-#if defined(_WIN32)
-static unsigned WINAPI
-_dispatch_worker_thread_thunk(LPVOID lpParameter)
-{
-  _dispatch_worker_thread(lpParameter);
-  return 0;
-}
-#endif
-
 static void *
 _dispatch_worker_thread(void *context)
 {
-	dispatch_queue_t dq = context;
-	dispatch_root_queue_context_t qc = dq->do_ctxt;
-	dispatch_pthread_root_queue_context_t pqc = qc->dgq_ctxt;
+	dispatch_queue_global_t dq = context;
+	dispatch_pthread_root_queue_context_t pqc = dq->do_ctxt;
 
-	int pending = os_atomic_dec2o(qc, dgq_pending, relaxed);
+	int pending = os_atomic_dec2o(dq, dgq_pending, relaxed);
 	if (unlikely(pending < 0)) {
 		DISPATCH_INTERNAL_CRASH(pending, "Pending thread request underflow");
 	}
@@ -6135,162 +6216,329 @@
 		pqc->dpq_thread_configure();
 	}
 
-	// workaround tweaks the kernel workqueue does for us
 #if !defined(_WIN32)
+	// workaround tweaks the kernel workqueue does for us
 	_dispatch_sigmask();
 #endif
 	_dispatch_introspection_thread_add();
 
-#if DISPATCH_USE_INTERNAL_WORKQUEUE
-	bool overcommit = (qc->dgq_wq_options & WORKQ_ADDTHREADS_OPTION_OVERCOMMIT);
-	bool manager = (dq == &_dispatch_mgr_root_queue);
-	bool monitored = !(overcommit || manager);
-	if (monitored) {
-		_dispatch_workq_worker_register(dq, qc->dgq_qos);
+	const int64_t timeout = 5ull * NSEC_PER_SEC;
+	pthread_priority_t pp = _dispatch_get_priority();
+	dispatch_priority_t pri = dq->dq_priority;
+
+	// If the queue is neither
+	// - the manager
+	// - with a fallback set
+	// - with a requested QoS or QoS floor
+	// then infer the basepri from the current priority.
+	if ((pri & (DISPATCH_PRIORITY_FLAG_MANAGER |
+			DISPATCH_PRIORITY_FLAG_FALLBACK |
+			DISPATCH_PRIORITY_FLAG_FLOOR |
+			DISPATCH_PRIORITY_REQUESTED_MASK)) == 0) {
+		pri &= DISPATCH_PRIORITY_FLAG_OVERCOMMIT;
+		if (pp & _PTHREAD_PRIORITY_QOS_CLASS_MASK) {
+			pri |= _dispatch_priority_from_pp(pp);
+		} else {
+			pri |= _dispatch_priority_make_override(DISPATCH_QOS_SATURATED);
+		}
 	}
+
+#if DISPATCH_USE_INTERNAL_WORKQUEUE
+	bool monitored = ((pri & (DISPATCH_PRIORITY_FLAG_OVERCOMMIT |
+			DISPATCH_PRIORITY_FLAG_MANAGER)) == 0);
+	if (monitored) _dispatch_workq_worker_register(dq);
 #endif
 
-	const int64_t timeout = 5ull * NSEC_PER_SEC;
-	pthread_priority_t old_pri = _dispatch_get_priority();
 	do {
-		_dispatch_root_queue_drain(dq, old_pri);
-		_dispatch_reset_priority_and_voucher(old_pri, NULL);
+		_dispatch_trace_runtime_event(worker_unpark, dq, 0);
+		_dispatch_root_queue_drain(dq, pri, DISPATCH_INVOKE_REDIRECTING_DRAIN);
+		_dispatch_reset_priority_and_voucher(pp, NULL);
+		_dispatch_trace_runtime_event(worker_park, NULL, 0);
 	} while (dispatch_semaphore_wait(&pqc->dpq_thread_mediator,
 			dispatch_time(0, timeout)) == 0);
 
 #if DISPATCH_USE_INTERNAL_WORKQUEUE
-	if (monitored) {
-		_dispatch_workq_worker_unregister(dq, qc->dgq_qos);
-	}
+	if (monitored) _dispatch_workq_worker_unregister(dq);
 #endif
-	(void)os_atomic_inc2o(qc, dgq_thread_pool_size, release);
-	_dispatch_global_queue_poke(dq, 1, 0);
-	_dispatch_release(dq); // retained in _dispatch_global_queue_poke_slow
+	(void)os_atomic_inc2o(dq, dgq_thread_pool_size, release);
+	_dispatch_root_queue_poke(dq, 1, 0);
+	_dispatch_release(dq); // retained in _dispatch_root_queue_poke_slow
 	return NULL;
 }
+#if defined(_WIN32)
+static unsigned WINAPI
+_dispatch_worker_thread_thunk(LPVOID lpParameter)
+{
+  _dispatch_worker_thread(lpParameter);
+  return 0;
+}
+#endif // defined(_WIN32)
 #endif // DISPATCH_USE_PTHREAD_POOL
 
-#pragma mark -
-#pragma mark dispatch_network_root_queue
-#if TARGET_OS_MAC
-
-dispatch_queue_t
-_dispatch_network_root_queue_create_4NW(const char *label,
-		const pthread_attr_t *attrs, dispatch_block_t configure)
+DISPATCH_NOINLINE
+void
+_dispatch_root_queue_wakeup(dispatch_queue_global_t dq,
+		DISPATCH_UNUSED dispatch_qos_t qos, dispatch_wakeup_flags_t flags)
 {
-	unsigned long flags = dispatch_pthread_root_queue_flags_pool_size(1);
-	return dispatch_pthread_root_queue_create(label, flags, attrs, configure);
-}
-
-#endif // TARGET_OS_MAC
-#pragma mark -
-#pragma mark dispatch_runloop_queue
-
-static bool _dispatch_program_is_probably_callback_driven;
-
-#if DISPATCH_COCOA_COMPAT || defined(_WIN32)
-
-dispatch_queue_t
-_dispatch_runloop_root_queue_create_4CF(const char *label, unsigned long flags)
-{
-	dispatch_queue_t dq;
-	size_t dqs;
-
-	if (slowpath(flags)) {
-		return DISPATCH_BAD_INPUT;
+	if (!(flags & DISPATCH_WAKEUP_BLOCK_WAIT)) {
+		DISPATCH_INTERNAL_CRASH(dq->dq_priority,
+				"Don't try to wake up or override a root queue");
 	}
-	dqs = sizeof(struct dispatch_queue_s) - DISPATCH_QUEUE_CACHELINE_PAD;
-	dq = _dispatch_object_alloc(DISPATCH_VTABLE(queue_runloop), dqs);
-	_dispatch_queue_init(dq, DQF_THREAD_BOUND | DQF_CANNOT_TRYSYNC, 1,
-			DISPATCH_QUEUE_ROLE_BASE_ANON);
-	dq->do_targetq = _dispatch_get_root_queue(DISPATCH_QOS_DEFAULT, true);
-	dq->dq_label = label ? label : "runloop-queue"; // no-copy contract
-	_dispatch_runloop_queue_handle_init(dq);
-	_dispatch_queue_set_bound_thread(dq);
-	_dispatch_object_debug(dq, "%s", __func__);
-	return _dispatch_introspection_queue_create(dq);
+	if (flags & DISPATCH_WAKEUP_CONSUME_2) {
+		return _dispatch_release_2_tailcall(dq);
+	}
 }
 
+DISPATCH_NOINLINE
 void
-_dispatch_runloop_queue_xref_dispose(dispatch_queue_t dq)
+_dispatch_root_queue_push(dispatch_queue_global_t rq, dispatch_object_t dou,
+		dispatch_qos_t qos)
 {
-	_dispatch_object_debug(dq, "%s", __func__);
+#if DISPATCH_USE_KEVENT_WORKQUEUE
+	dispatch_deferred_items_t ddi = _dispatch_deferred_items_get();
+	if (unlikely(ddi && ddi->ddi_can_stash)) {
+		dispatch_object_t old_dou = ddi->ddi_stashed_dou;
+		dispatch_priority_t rq_overcommit;
+		rq_overcommit = rq->dq_priority & DISPATCH_PRIORITY_FLAG_OVERCOMMIT;
 
-	dispatch_qos_t qos = _dispatch_runloop_queue_reset_max_qos(dq);
-	_dispatch_queue_clear_bound_thread(dq);
-	dx_wakeup(dq, qos, DISPATCH_WAKEUP_MAKE_DIRTY);
-	if (qos) _dispatch_thread_override_end(DISPATCH_QUEUE_DRAIN_OWNER(dq), dq);
+		if (likely(!old_dou._do || rq_overcommit)) {
+			dispatch_queue_global_t old_rq = ddi->ddi_stashed_rq;
+			dispatch_qos_t old_qos = ddi->ddi_stashed_qos;
+			ddi->ddi_stashed_rq = rq;
+			ddi->ddi_stashed_dou = dou;
+			ddi->ddi_stashed_qos = qos;
+			_dispatch_debug("deferring item %p, rq %p, qos %d",
+					dou._do, rq, qos);
+			if (rq_overcommit) {
+				ddi->ddi_can_stash = false;
+			}
+			if (likely(!old_dou._do)) {
+				return;
+			}
+			// push the previously stashed item
+			qos = old_qos;
+			rq = old_rq;
+			dou = old_dou;
+		}
+	}
+#endif
+#if HAVE_PTHREAD_WORKQUEUE_QOS
+	if (_dispatch_root_queue_push_needs_override(rq, qos)) {
+		return _dispatch_root_queue_push_override(rq, dou, qos);
+	}
+#else
+	(void)qos;
+#endif
+	_dispatch_root_queue_push_inline(rq, dou, dou, 1);
 }
 
-void
-_dispatch_runloop_queue_dispose(dispatch_queue_t dq, bool *allow_free)
+#pragma mark -
+#pragma mark dispatch_pthread_root_queue
+#if DISPATCH_USE_PTHREAD_ROOT_QUEUES
+
+static dispatch_queue_global_t
+_dispatch_pthread_root_queue_create(const char *label, unsigned long flags,
+		const pthread_attr_t *attr, dispatch_block_t configure,
+		dispatch_pthread_root_queue_observer_hooks_t observer_hooks)
 {
-	_dispatch_object_debug(dq, "%s", __func__);
-	_dispatch_introspection_queue_dispose(dq);
-	_dispatch_runloop_queue_handle_dispose(dq);
-	_dispatch_queue_destroy(dq, allow_free);
+	dispatch_queue_pthread_root_t dpq;
+	dispatch_queue_flags_t dqf = 0;
+	int32_t pool_size = flags & _DISPATCH_PTHREAD_ROOT_QUEUE_FLAG_POOL_SIZE ?
+			(int8_t)(flags & ~_DISPATCH_PTHREAD_ROOT_QUEUE_FLAG_POOL_SIZE) : 0;
+
+	if (label) {
+		const char *tmp = _dispatch_strdup_if_mutable(label);
+		if (tmp != label) {
+			dqf |= DQF_LABEL_NEEDS_FREE;
+			label = tmp;
+		}
+	}
+
+	dpq = _dispatch_queue_alloc(queue_pthread_root, dqf,
+			DISPATCH_QUEUE_WIDTH_POOL, 0)._dpq;
+	dpq->dq_label = label;
+	dpq->dq_state = DISPATCH_ROOT_QUEUE_STATE_INIT_VALUE;
+	dpq->dq_priority = DISPATCH_PRIORITY_FLAG_OVERCOMMIT;
+	dpq->do_ctxt = &dpq->dpq_ctxt;
+
+	dispatch_pthread_root_queue_context_t pqc = &dpq->dpq_ctxt;
+	_dispatch_root_queue_init_pthread_pool(dpq->_as_dgq, pool_size,
+			DISPATCH_PRIORITY_FLAG_OVERCOMMIT);
+
+#if !defined(_WIN32)
+	if (attr) {
+		memcpy(&pqc->dpq_thread_attr, attr, sizeof(pthread_attr_t));
+		_dispatch_mgr_priority_raise(&pqc->dpq_thread_attr);
+	} else {
+		(void)dispatch_assume_zero(pthread_attr_init(&pqc->dpq_thread_attr));
+	}
+	(void)dispatch_assume_zero(pthread_attr_setdetachstate(
+			&pqc->dpq_thread_attr, PTHREAD_CREATE_DETACHED));
+#else // defined(_WIN32)
+	dispatch_assert(attr == NULL);
+#endif // defined(_WIN32)
+	if (configure) {
+		pqc->dpq_thread_configure = _dispatch_Block_copy(configure);
+	}
+	if (observer_hooks) {
+		pqc->dpq_observer_hooks = *observer_hooks;
+	}
+	_dispatch_object_debug(dpq, "%s", __func__);
+	return _dispatch_trace_queue_create(dpq)._dgq;
+}
+
+dispatch_queue_global_t
+dispatch_pthread_root_queue_create(const char *label, unsigned long flags,
+		const pthread_attr_t *attr, dispatch_block_t configure)
+{
+	return _dispatch_pthread_root_queue_create(label, flags, attr, configure,
+			NULL);
+}
+
+#if DISPATCH_IOHID_SPI
+dispatch_queue_global_t
+_dispatch_pthread_root_queue_create_with_observer_hooks_4IOHID(const char *label,
+		unsigned long flags, const pthread_attr_t *attr,
+		dispatch_pthread_root_queue_observer_hooks_t observer_hooks,
+		dispatch_block_t configure)
+{
+	if (!observer_hooks->queue_will_execute ||
+			!observer_hooks->queue_did_execute) {
+		DISPATCH_CLIENT_CRASH(0, "Invalid pthread root queue observer hooks");
+	}
+	return _dispatch_pthread_root_queue_create(label, flags, attr, configure,
+			observer_hooks);
 }
 
 bool
-_dispatch_runloop_root_queue_perform_4CF(dispatch_queue_t dq)
+_dispatch_queue_is_exclusively_owned_by_current_thread_4IOHID(
+		dispatch_queue_t dq) // rdar://problem/18033810
 {
-	if (slowpath(dq->do_vtable != DISPATCH_VTABLE(queue_runloop))) {
-		DISPATCH_CLIENT_CRASH(dq->do_vtable, "Not a runloop queue");
+	if (dq->dq_width != 1) {
+		DISPATCH_CLIENT_CRASH(dq->dq_width, "Invalid queue type");
 	}
-	dispatch_retain(dq);
-	bool r = _dispatch_runloop_queue_drain_one(dq);
-	dispatch_release(dq);
-	return r;
+	uint64_t dq_state = os_atomic_load2o(dq, dq_state, relaxed);
+	return _dq_state_drain_locked_by_self(dq_state);
+}
+#endif
+
+dispatch_queue_global_t
+dispatch_pthread_root_queue_copy_current(void)
+{
+	dispatch_queue_t dq = _dispatch_queue_get_current();
+	if (!dq) return NULL;
+	while (unlikely(dq->do_targetq)) {
+		dq = dq->do_targetq;
+	}
+	if (dx_type(dq) != DISPATCH_QUEUE_PTHREAD_ROOT_TYPE) {
+		return NULL;
+	}
+	_os_object_retain_with_resurrect(dq->_as_os_obj);
+	return upcast(dq)._dgq;
 }
 
 void
-_dispatch_runloop_root_queue_wakeup_4CF(dispatch_queue_t dq)
+_dispatch_pthread_root_queue_dispose(dispatch_queue_global_t dq,
+		bool *allow_free)
 {
-	if (slowpath(dq->do_vtable != DISPATCH_VTABLE(queue_runloop))) {
-		DISPATCH_CLIENT_CRASH(dq->do_vtable, "Not a runloop queue");
+	dispatch_pthread_root_queue_context_t pqc = dq->do_ctxt;
+
+	_dispatch_object_debug(dq, "%s", __func__);
+	_dispatch_trace_queue_dispose(dq);
+
+#if !defined(_WIN32)
+	pthread_attr_destroy(&pqc->dpq_thread_attr);
+#endif
+	_dispatch_semaphore_dispose(&pqc->dpq_thread_mediator, NULL);
+	if (pqc->dpq_thread_configure) {
+		Block_release(pqc->dpq_thread_configure);
 	}
-	_dispatch_runloop_queue_wakeup(dq, 0, false);
+	dq->do_targetq = _dispatch_get_default_queue(false);
+	_dispatch_lane_class_dispose(dq, allow_free);
 }
 
-#if TARGET_OS_MAC || defined(_WIN32)
-dispatch_runloop_handle_t
-_dispatch_runloop_root_queue_get_port_4CF(dispatch_queue_t dq)
+#endif // DISPATCH_USE_PTHREAD_ROOT_QUEUES
+#pragma mark -
+#pragma mark dispatch_runloop_queue
+
+DISPATCH_STATIC_GLOBAL(bool _dispatch_program_is_probably_callback_driven);
+
+#if DISPATCH_COCOA_COMPAT
+DISPATCH_STATIC_GLOBAL(dispatch_once_t _dispatch_main_q_handle_pred);
+
+DISPATCH_ALWAYS_INLINE
+static inline bool
+_dispatch_runloop_handle_is_valid(dispatch_runloop_handle_t handle)
 {
-	if (slowpath(dq->do_vtable != DISPATCH_VTABLE(queue_runloop))) {
-		DISPATCH_CLIENT_CRASH(dq->do_vtable, "Not a runloop queue");
-	}
-	return _dispatch_runloop_queue_get_handle(dq);
-}
+#if TARGET_OS_MAC
+	return MACH_PORT_VALID(handle);
+#elif defined(__linux__)
+	return handle >= 0;
+#elif defined(_WIN32)
+	return handle != NULL;
+#else
+#error "runloop support not implemented on this platform"
 #endif
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline dispatch_runloop_handle_t
+_dispatch_runloop_queue_get_handle(dispatch_lane_t dq)
+{
+#if TARGET_OS_MAC
+	return ((dispatch_runloop_handle_t)(uintptr_t)dq->do_ctxt);
+#elif defined(__linux__)
+	// decode: 0 is a valid fd, so offset by 1 to distinguish from NULL
+	return ((dispatch_runloop_handle_t)(uintptr_t)dq->do_ctxt) - 1;
+#elif defined(_WIN32)
+	return ((dispatch_runloop_handle_t)(uintptr_t)dq->do_ctxt);
+#else
+#error "runloop support not implemented on this platform"
+#endif
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_runloop_queue_set_handle(dispatch_lane_t dq,
+		dispatch_runloop_handle_t handle)
+{
+#if TARGET_OS_MAC
+	dq->do_ctxt = (void *)(uintptr_t)handle;
+#elif defined(__linux__)
+	// encode: 0 is a valid fd, so offset by 1 to distinguish from NULL
+	dq->do_ctxt = (void *)(uintptr_t)(handle + 1);
+#elif defined(_WIN32)
+	dq->do_ctxt = (void *)(uintptr_t)handle;
+#else
+#error "runloop support not implemented on this platform"
+#endif
+}
 
 static void
 _dispatch_runloop_queue_handle_init(void *ctxt)
 {
-	dispatch_queue_t dq = (dispatch_queue_t)ctxt;
+	dispatch_lane_t dq = (dispatch_lane_t)ctxt;
 	dispatch_runloop_handle_t handle;
 
 	_dispatch_fork_becomes_unsafe();
 
 #if TARGET_OS_MAC
-	mach_port_t mp;
+	mach_port_options_t opts = {
+		.flags = MPO_CONTEXT_AS_GUARD | MPO_STRICT | MPO_INSERT_SEND_RIGHT,
+	};
+	mach_port_context_t guard = (uintptr_t)dq;
 	kern_return_t kr;
-	kr = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &mp);
-	DISPATCH_VERIFY_MIG(kr);
-	(void)dispatch_assume_zero(kr);
-	kr = mach_port_insert_right(mach_task_self(), mp, mp,
-			MACH_MSG_TYPE_MAKE_SEND);
-	DISPATCH_VERIFY_MIG(kr);
-	(void)dispatch_assume_zero(kr);
-	if (dq != &_dispatch_main_q) {
-		struct mach_port_limits limits = {
-			.mpl_qlimit = 1,
-		};
-		kr = mach_port_set_attributes(mach_task_self(), mp,
-				MACH_PORT_LIMITS_INFO, (mach_port_info_t)&limits,
-				sizeof(limits));
-		DISPATCH_VERIFY_MIG(kr);
-		(void)dispatch_assume_zero(kr);
+	mach_port_t mp;
+
+	if (dx_type(dq) == DISPATCH_QUEUE_MAIN_TYPE) {
+		opts.flags |= MPO_QLIMIT;
+		opts.mpl.mpl_qlimit = 1;
 	}
+
+	kr = mach_port_construct(mach_task_self(), &opts, guard, &mp);
+	DISPATCH_VERIFY_MIG(kr);
+	(void)dispatch_assume_zero(kr);
+
 	handle = mp;
 #elif defined(__linux__)
 	int fd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
@@ -6316,7 +6564,13 @@
 	}
 	handle = fd;
 #elif defined(_WIN32)
-	handle = INVALID_HANDLE_VALUE;
+	HANDLE hEvent;
+	hEvent = CreateEventW(NULL, /*bManualReset=*/TRUE,
+		/*bInitialState=*/FALSE, NULL);
+	if (hEvent == NULL) {
+		DISPATCH_INTERNAL_CRASH(GetLastError(), "CreateEventW");
+	}
+	handle = hEvent;
 #else
 #error "runloop support not implemented on this platform"
 #endif
@@ -6326,7 +6580,7 @@
 }
 
 static void
-_dispatch_runloop_queue_handle_dispose(dispatch_queue_t dq)
+_dispatch_runloop_queue_handle_dispose(dispatch_lane_t dq)
 {
 	dispatch_runloop_handle_t handle = _dispatch_runloop_queue_get_handle(dq);
 	if (!_dispatch_runloop_handle_is_valid(handle)) {
@@ -6334,67 +6588,440 @@
 	}
 	dq->do_ctxt = NULL;
 #if TARGET_OS_MAC
-	mach_port_t mp = handle;
-	kern_return_t kr = mach_port_deallocate(mach_task_self(), mp);
-	DISPATCH_VERIFY_MIG(kr);
-	(void)dispatch_assume_zero(kr);
-	kr = mach_port_mod_refs(mach_task_self(), mp, MACH_PORT_RIGHT_RECEIVE, -1);
+	mach_port_t mp = (mach_port_t)handle;
+	mach_port_context_t guard = (uintptr_t)dq;
+	kern_return_t kr;
+	kr = mach_port_destruct(mach_task_self(), mp, -1, guard);
 	DISPATCH_VERIFY_MIG(kr);
 	(void)dispatch_assume_zero(kr);
 #elif defined(__linux__)
 	int rc = close(handle);
 	(void)dispatch_assume_zero(rc);
 #elif defined(_WIN32)
-	CloseHandle(handle);
+	BOOL bSuccess;
+	bSuccess = CloseHandle(handle);
+	(void)dispatch_assume(bSuccess);
 #else
 #error "runloop support not implemented on this platform"
 #endif
 }
 
+static inline void
+_dispatch_runloop_queue_class_poke(dispatch_lane_t dq)
+{
+	dispatch_runloop_handle_t handle = _dispatch_runloop_queue_get_handle(dq);
+	if (!_dispatch_runloop_handle_is_valid(handle)) {
+		return;
+	}
+
+	_dispatch_trace_runtime_event(worker_request, dq, 1);
+#if HAVE_MACH
+	mach_port_t mp = handle;
+	kern_return_t kr = _dispatch_send_wakeup_runloop_thread(mp, 0);
+	switch (kr) {
+	case MACH_SEND_TIMEOUT:
+	case MACH_SEND_TIMED_OUT:
+	case MACH_SEND_INVALID_DEST:
+		break;
+	default:
+		(void)dispatch_assume_zero(kr);
+		break;
+	}
+#elif defined(__linux__)
+	int result;
+	do {
+		result = eventfd_write(handle, 1);
+	} while (result == -1 && errno == EINTR);
+	(void)dispatch_assume_zero(result);
+#elif defined(_WIN32)
+	BOOL bSuccess;
+	bSuccess = SetEvent(handle);
+	(void)dispatch_assume(bSuccess);
+#else
+#error "runloop support not implemented on this platform"
+#endif
+}
+
+DISPATCH_NOINLINE
+static void
+_dispatch_runloop_queue_poke(dispatch_lane_t dq, dispatch_qos_t qos,
+		dispatch_wakeup_flags_t flags)
+{
+	// it's not useful to handle WAKEUP_MAKE_DIRTY because mach_msg() will have
+	// a release barrier and that when runloop queues stop being thread-bound
+	// they have a non optional wake-up to start being a "normal" queue
+	// either in _dispatch_runloop_queue_xref_dispose,
+	// or in _dispatch_queue_cleanup2() for the main thread.
+	uint64_t old_state, new_state;
+
+	if (dx_type(dq) == DISPATCH_QUEUE_MAIN_TYPE) {
+		dispatch_once_f(&_dispatch_main_q_handle_pred, dq,
+				_dispatch_runloop_queue_handle_init);
+	}
+
+	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, relaxed, {
+		new_state = _dq_state_merge_qos(old_state, qos);
+		if (old_state == new_state) {
+			os_atomic_rmw_loop_give_up(goto no_change);
+		}
+	});
+
+	dispatch_qos_t dq_qos = _dispatch_priority_qos(dq->dq_priority);
+	if (qos > dq_qos) {
+		mach_port_t owner = _dq_state_drain_owner(new_state);
+		pthread_priority_t pp = _dispatch_qos_to_pp(qos);
+		_dispatch_thread_override_start(owner, pp, dq);
+		if (_dq_state_max_qos(old_state) > dq_qos) {
+			_dispatch_thread_override_end(owner, dq);
+		}
+	}
+no_change:
+	_dispatch_runloop_queue_class_poke(dq);
+	if (flags & DISPATCH_WAKEUP_CONSUME_2) {
+		return _dispatch_release_2_tailcall(dq);
+	}
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline dispatch_qos_t
+_dispatch_runloop_queue_reset_max_qos(dispatch_lane_t dq)
+{
+	uint64_t old_state, clear_bits = DISPATCH_QUEUE_MAX_QOS_MASK |
+			DISPATCH_QUEUE_RECEIVED_OVERRIDE;
+	old_state = os_atomic_and_orig2o(dq, dq_state, ~clear_bits, relaxed);
+	return _dq_state_max_qos(old_state);
+}
+
+void
+_dispatch_runloop_queue_wakeup(dispatch_lane_t dq, dispatch_qos_t qos,
+		dispatch_wakeup_flags_t flags)
+{
+	if (unlikely(_dispatch_queue_atomic_flags(dq) & DQF_RELEASED)) {
+		// <rdar://problem/14026816>
+		return _dispatch_lane_wakeup(dq, qos, flags);
+	}
+
+	if (flags & DISPATCH_WAKEUP_MAKE_DIRTY) {
+		os_atomic_or2o(dq, dq_state, DISPATCH_QUEUE_DIRTY, release);
+	}
+	if (_dispatch_queue_class_probe(dq)) {
+		return _dispatch_runloop_queue_poke(dq, qos, flags);
+	}
+
+	qos = _dispatch_runloop_queue_reset_max_qos(dq);
+	if (qos) {
+		mach_port_t owner = DISPATCH_QUEUE_DRAIN_OWNER(dq);
+		if (_dispatch_queue_class_probe(dq)) {
+			_dispatch_runloop_queue_poke(dq, qos, flags);
+		}
+		_dispatch_thread_override_end(owner, dq);
+		return;
+	}
+	if (flags & DISPATCH_WAKEUP_CONSUME_2) {
+		return _dispatch_release_2_tailcall(dq);
+	}
+}
+
+DISPATCH_NOINLINE
+static void
+_dispatch_main_queue_update_priority_from_thread(void)
+{
+	dispatch_queue_main_t dq = &_dispatch_main_q;
+	uint64_t dq_state = os_atomic_load2o(dq, dq_state, relaxed);
+	mach_port_t owner = _dq_state_drain_owner(dq_state);
+
+	dispatch_priority_t main_pri =
+			_dispatch_priority_from_pp_strip_flags(_dispatch_get_priority());
+	dispatch_qos_t main_qos = _dispatch_priority_qos(main_pri);
+	dispatch_qos_t max_qos = _dq_state_max_qos(dq_state);
+	dispatch_qos_t old_qos = _dispatch_priority_qos(dq->dq_priority);
+
+	// the main thread QoS was adjusted by someone else, learn the new QoS
+	// and reinitialize _dispatch_main_q.dq_priority
+	dq->dq_priority = main_pri;
+
+	if (old_qos < max_qos && main_qos == DISPATCH_QOS_UNSPECIFIED) {
+		// main thread is opted out of QoS and we had an override
+		return _dispatch_thread_override_end(owner, dq);
+	}
+
+	if (old_qos < max_qos && max_qos <= main_qos) {
+		// main QoS was raised, and we had an override which is now useless
+		return _dispatch_thread_override_end(owner, dq);
+	}
+
+	if (main_qos < max_qos && max_qos <= old_qos) {
+		// main thread QoS was lowered, and we actually need an override
+		pthread_priority_t pp = _dispatch_qos_to_pp(max_qos);
+		return _dispatch_thread_override_start(owner, pp, dq);
+	}
+}
+
+static void
+_dispatch_main_queue_drain(dispatch_queue_main_t dq)
+{
+	dispatch_thread_frame_s dtf;
+
+	if (!dq->dq_items_tail) {
+		return;
+	}
+
+	_dispatch_perfmon_start_notrace();
+	if (unlikely(!_dispatch_queue_is_thread_bound(dq))) {
+		DISPATCH_CLIENT_CRASH(0, "_dispatch_main_queue_callback_4CF called"
+				" after dispatch_main()");
+	}
+	uint64_t dq_state = os_atomic_load2o(dq, dq_state, relaxed);
+	if (unlikely(!_dq_state_drain_locked_by_self(dq_state))) {
+		DISPATCH_CLIENT_CRASH((uintptr_t)dq_state,
+				"_dispatch_main_queue_callback_4CF called"
+				" from the wrong thread");
+	}
+
+	dispatch_once_f(&_dispatch_main_q_handle_pred, dq,
+			_dispatch_runloop_queue_handle_init);
+
+	// <rdar://problem/23256682> hide the frame chaining when CFRunLoop
+	// drains the main runloop, as this should not be observable that way
+	_dispatch_adopt_wlh_anon();
+	_dispatch_thread_frame_push_and_rebase(&dtf, dq, NULL);
+
+	pthread_priority_t pp = _dispatch_get_priority();
+	dispatch_priority_t pri = _dispatch_priority_from_pp(pp);
+	dispatch_qos_t qos = _dispatch_priority_qos(pri);
+	voucher_t voucher = _voucher_copy();
+
+	if (unlikely(qos != _dispatch_priority_qos(dq->dq_priority))) {
+		_dispatch_main_queue_update_priority_from_thread();
+	}
+	dispatch_priority_t old_dbp = _dispatch_set_basepri(pri);
+	_dispatch_set_basepri_override_qos(DISPATCH_QOS_SATURATED);
+
+	dispatch_invoke_context_s dic = { };
+	struct dispatch_object_s *dc, *next_dc, *tail;
+	dc = os_mpsc_capture_snapshot(os_mpsc(dq, dq_items), &tail);
+	do {
+		next_dc = os_mpsc_pop_snapshot_head(dc, tail, do_next);
+		_dispatch_continuation_pop_inline(dc, &dic,
+				DISPATCH_INVOKE_THREAD_BOUND, dq);
+	} while ((dc = next_dc));
+
+	dx_wakeup(dq->_as_dq, 0, 0);
+	_dispatch_voucher_debug("main queue restore", voucher);
+	_dispatch_reset_basepri(old_dbp);
+	_dispatch_reset_basepri_override();
+	_dispatch_reset_priority_and_voucher(pp, voucher);
+	_dispatch_thread_frame_pop(&dtf);
+	_dispatch_reset_wlh();
+	_dispatch_force_cache_cleanup();
+	_dispatch_perfmon_end_notrace();
+}
+
+static bool
+_dispatch_runloop_queue_drain_one(dispatch_lane_t dq)
+{
+	if (!dq->dq_items_tail) {
+		return false;
+	}
+	_dispatch_perfmon_start_notrace();
+	dispatch_thread_frame_s dtf;
+	bool should_reset_wlh = _dispatch_adopt_wlh_anon_recurse();
+	_dispatch_thread_frame_push(&dtf, dq);
+	pthread_priority_t pp = _dispatch_get_priority();
+	dispatch_priority_t pri = _dispatch_priority_from_pp(pp);
+	voucher_t voucher = _voucher_copy();
+	dispatch_priority_t old_dbp = _dispatch_set_basepri(pri);
+	_dispatch_set_basepri_override_qos(DISPATCH_QOS_SATURATED);
+
+	dispatch_invoke_context_s dic = { };
+	struct dispatch_object_s *dc, *next_dc;
+	dc = _dispatch_queue_get_head(dq);
+	next_dc = _dispatch_queue_pop_head(dq, dc);
+	_dispatch_continuation_pop_inline(dc, &dic,
+			DISPATCH_INVOKE_THREAD_BOUND, dq);
+
+	if (!next_dc) {
+		dx_wakeup(dq, 0, 0);
+	}
+
+	_dispatch_voucher_debug("runloop queue restore", voucher);
+	_dispatch_reset_basepri(old_dbp);
+	_dispatch_reset_basepri_override();
+	_dispatch_reset_priority_and_voucher(pp, voucher);
+	_dispatch_thread_frame_pop(&dtf);
+	if (should_reset_wlh) _dispatch_reset_wlh();
+	_dispatch_force_cache_cleanup();
+	_dispatch_perfmon_end_notrace();
+	return next_dc;
+}
+
+dispatch_queue_serial_t
+_dispatch_runloop_root_queue_create_4CF(const char *label, unsigned long flags)
+{
+	pthread_priority_t pp = _dispatch_get_priority();
+	dispatch_lane_t dq;
+
+	if (unlikely(flags)) {
+		return DISPATCH_BAD_INPUT;
+	}
+	dq = _dispatch_object_alloc(DISPATCH_VTABLE(queue_runloop),
+			sizeof(struct dispatch_lane_s));
+	_dispatch_queue_init(dq, DQF_THREAD_BOUND, 1,
+			DISPATCH_QUEUE_ROLE_BASE_ANON);
+	dq->do_targetq = _dispatch_get_default_queue(true);
+	dq->dq_label = label ? label : "runloop-queue"; // no-copy contract
+	if (pp & _PTHREAD_PRIORITY_QOS_CLASS_MASK) {
+		dq->dq_priority = _dispatch_priority_from_pp_strip_flags(pp);
+	}
+	_dispatch_runloop_queue_handle_init(dq);
+	_dispatch_queue_set_bound_thread(dq);
+	_dispatch_object_debug(dq, "%s", __func__);
+	return _dispatch_trace_queue_create(dq)._dl;
+}
+
+void
+_dispatch_runloop_queue_xref_dispose(dispatch_lane_t dq)
+{
+	_dispatch_object_debug(dq, "%s", __func__);
+
+	dispatch_qos_t qos = _dispatch_runloop_queue_reset_max_qos(dq);
+	_dispatch_queue_clear_bound_thread(dq);
+	dx_wakeup(dq, qos, DISPATCH_WAKEUP_MAKE_DIRTY);
+	if (qos) _dispatch_thread_override_end(DISPATCH_QUEUE_DRAIN_OWNER(dq), dq);
+}
+
+void
+_dispatch_runloop_queue_dispose(dispatch_lane_t dq, bool *allow_free)
+{
+	_dispatch_object_debug(dq, "%s", __func__);
+	_dispatch_trace_queue_dispose(dq);
+	_dispatch_runloop_queue_handle_dispose(dq);
+	_dispatch_lane_class_dispose(dq, allow_free);
+}
+
+bool
+_dispatch_runloop_root_queue_perform_4CF(dispatch_queue_t dq)
+{
+	if (unlikely(dx_type(dq) != DISPATCH_QUEUE_RUNLOOP_TYPE)) {
+		DISPATCH_CLIENT_CRASH(dx_type(dq), "Not a runloop queue");
+	}
+	dispatch_retain(dq);
+	bool r = _dispatch_runloop_queue_drain_one(upcast(dq)._dl);
+	dispatch_release(dq);
+	return r;
+}
+
+void
+_dispatch_runloop_root_queue_wakeup_4CF(dispatch_queue_t dq)
+{
+	if (unlikely(dx_type(dq) != DISPATCH_QUEUE_RUNLOOP_TYPE)) {
+		DISPATCH_CLIENT_CRASH(dx_type(dq), "Not a runloop queue");
+	}
+	_dispatch_runloop_queue_wakeup(upcast(dq)._dl, 0, false);
+}
+
+#if TARGET_OS_MAC || defined(_WIN32)
+dispatch_runloop_handle_t
+_dispatch_runloop_root_queue_get_port_4CF(dispatch_queue_t dq)
+{
+	if (unlikely(dx_type(dq) != DISPATCH_QUEUE_RUNLOOP_TYPE)) {
+		DISPATCH_CLIENT_CRASH(dx_type(dq), "Not a runloop queue");
+	}
+	return _dispatch_runloop_queue_get_handle(upcast(dq)._dl);
+}
+#endif
+
+#endif // DISPATCH_COCOA_COMPAT
 #pragma mark -
 #pragma mark dispatch_main_queue
+#if DISPATCH_COCOA_COMPAT
 
 dispatch_runloop_handle_t
 _dispatch_get_main_queue_handle_4CF(void)
 {
-	dispatch_queue_t dq = &_dispatch_main_q;
+	dispatch_queue_main_t dq = &_dispatch_main_q;
 	dispatch_once_f(&_dispatch_main_q_handle_pred, dq,
 			_dispatch_runloop_queue_handle_init);
-	return _dispatch_runloop_queue_get_handle(dq);
+	return _dispatch_runloop_queue_get_handle(dq->_as_dl);
 }
 
-#if TARGET_OS_MAC
 dispatch_runloop_handle_t
 _dispatch_get_main_queue_port_4CF(void)
 {
 	return _dispatch_get_main_queue_handle_4CF();
 }
-#endif
-
-static bool main_q_is_draining;
-
-// 6618342 Contact the team that owns the Instrument DTrace probe before
-//         renaming this symbol
-DISPATCH_NOINLINE
-static void
-_dispatch_queue_set_mainq_drain_state(bool arg)
-{
-	main_q_is_draining = arg;
-}
 
 void
 _dispatch_main_queue_callback_4CF(
 		void *ignored DISPATCH_UNUSED)
 {
-	if (main_q_is_draining) {
+	// the main queue cannot be suspended and no-one looks at this bit
+	// so abuse it to avoid dirtying more memory
+
+	if (_dispatch_main_q.dq_side_suspend_cnt) {
 		return;
 	}
-	_dispatch_queue_set_mainq_drain_state(true);
-	_dispatch_main_queue_drain();
-	_dispatch_queue_set_mainq_drain_state(false);
+	_dispatch_main_q.dq_side_suspend_cnt = true;
+	_dispatch_main_queue_drain(&_dispatch_main_q);
+	_dispatch_main_q.dq_side_suspend_cnt = false;
 }
 
+#endif // DISPATCH_COCOA_COMPAT
+
+DISPATCH_NOINLINE
+void
+_dispatch_main_queue_push(dispatch_queue_main_t dq, dispatch_object_t dou,
+		dispatch_qos_t qos)
+{
+	// Same as _dispatch_lane_push() but without the refcounting due to being
+	// a global object
+	if (_dispatch_queue_push_item(dq, dou)) {
+		return dx_wakeup(dq, qos, DISPATCH_WAKEUP_MAKE_DIRTY);
+	}
+
+	qos = _dispatch_queue_push_qos(dq, qos);
+	if (_dispatch_queue_need_override(dq, qos)) {
+		return dx_wakeup(dq, qos, 0);
+	}
+}
+
+void
+_dispatch_main_queue_wakeup(dispatch_queue_main_t dq, dispatch_qos_t qos,
+		dispatch_wakeup_flags_t flags)
+{
+#if DISPATCH_COCOA_COMPAT
+	if (_dispatch_queue_is_thread_bound(dq)) {
+		return _dispatch_runloop_queue_wakeup(dq->_as_dl, qos, flags);
+	}
 #endif
+	return _dispatch_lane_wakeup(dq, qos, flags);
+}
+
+#if !defined(_WIN32)
+DISPATCH_NOINLINE DISPATCH_NORETURN
+static void
+_dispatch_sigsuspend(void)
+{
+	static const sigset_t mask;
+
+	for (;;) {
+		sigsuspend(&mask);
+	}
+}
+#endif // !defined(_WIN32)
+
+DISPATCH_NORETURN
+static void
+_dispatch_sig_thread(void *ctxt DISPATCH_UNUSED)
+{
+	// never returns, so burn bridges behind us
+	_dispatch_clear_stack(0);
+#if defined(_WIN32)
+	Sleep(INFINITE);
+#else
+	_dispatch_sigsuspend();
+#endif
+}
 
 void
 dispatch_main(void)
@@ -6418,11 +7045,11 @@
 		pthread_setspecific(dispatch_main_key, &dispatch_main_key);
 		_dispatch_sigmask();
 #endif
-#if defined(_WIN32)
-		_endthreadex(0);
-#else
+#if !defined(_WIN32)
 		pthread_exit(NULL);
-#endif
+#else
+		_endthreadex(0);
+#endif // defined(_WIN32)
 		DISPATCH_INTERNAL_CRASH(errno, "pthread_exit() returned");
 #if HAVE_PTHREAD_MAIN_NP
 	}
@@ -6430,37 +7057,11 @@
 #endif
 }
 
-#if !defined(_WIN32)
-DISPATCH_NOINLINE DISPATCH_NORETURN
-static void
-_dispatch_sigsuspend(void)
-{
-	static const sigset_t mask;
-
-	for (;;) {
-		sigsuspend(&mask);
-	}
-}
-#endif
-
-DISPATCH_NORETURN
-static void
-_dispatch_sig_thread(void *ctxt DISPATCH_UNUSED)
-{
-	// never returns, so burn bridges behind us
-	_dispatch_clear_stack(0);
-#if defined(_WIN32)
-	for (;;) SuspendThread(GetCurrentThread());
-#else
-	_dispatch_sigsuspend();
-#endif
-}
-
 DISPATCH_NOINLINE
 static void
 _dispatch_queue_cleanup2(void)
 {
-	dispatch_queue_t dq = &_dispatch_main_q;
+	dispatch_queue_main_t dq = &_dispatch_main_q;
 	uint64_t old_state, new_state;
 
 	// Turning the main queue from a runloop queue into an ordinary serial queue
@@ -6478,8 +7079,8 @@
 		new_state += DISPATCH_QUEUE_WIDTH_INTERVAL;
 		new_state += DISPATCH_QUEUE_IN_BARRIER;
 	});
-	_dispatch_queue_atomic_flags_clear(dq, DQF_THREAD_BOUND|DQF_CANNOT_TRYSYNC);
-	_dispatch_queue_barrier_complete(dq, 0, 0);
+	_dispatch_queue_atomic_flags_clear(dq, DQF_THREAD_BOUND);
+	_dispatch_lane_barrier_complete(dq, 0, 0);
 
 	// overload the "probably" variable to mean that dispatch_main() or
 	// similar non-POSIX API was called
@@ -6487,8 +7088,8 @@
 	// See dispatch_main for call to _dispatch_sig_thread on linux.
 #ifndef __linux__
 	if (_dispatch_program_is_probably_callback_driven) {
-		_dispatch_barrier_async_detached_f(_dispatch_get_root_queue(
-				DISPATCH_QOS_DEFAULT, true), NULL, _dispatch_sig_thread);
+		_dispatch_barrier_async_detached_f(_dispatch_get_default_queue(true),
+				NULL, _dispatch_sig_thread);
 		sleep(1); // workaround 6778970
 	}
 #endif
@@ -6496,7 +7097,7 @@
 #if DISPATCH_COCOA_COMPAT
 	dispatch_once_f(&_dispatch_main_q_handle_pred, dq,
 			_dispatch_runloop_queue_handle_init);
-	_dispatch_runloop_queue_handle_dispose(dq);
+	_dispatch_runloop_queue_handle_dispose(dq->_as_dl);
 #endif
 }
 
@@ -6546,3 +7147,341 @@
 	DISPATCH_INTERNAL_CRASH(ctxt,
 			"Premature thread exit while a dispatch context is set");
 }
+#pragma mark -
+#pragma mark dispatch_init
+
+static void
+_dispatch_root_queues_init_once(void *context DISPATCH_UNUSED)
+{
+	_dispatch_fork_becomes_unsafe();
+#if DISPATCH_USE_INTERNAL_WORKQUEUE
+	size_t i;
+	for (i = 0; i < DISPATCH_ROOT_QUEUE_COUNT; i++) {
+		_dispatch_root_queue_init_pthread_pool(&_dispatch_root_queues[i], 0,
+				_dispatch_root_queues[i].dq_priority);
+	}
+#else
+	int wq_supported = _pthread_workqueue_supported();
+	int r = ENOTSUP;
+
+	if (!(wq_supported & WORKQ_FEATURE_MAINTENANCE)) {
+		DISPATCH_INTERNAL_CRASH(wq_supported,
+				"QoS Maintenance support required");
+	}
+
+	if (unlikely(!_dispatch_kevent_workqueue_enabled)) {
+		r = _pthread_workqueue_init(_dispatch_worker_thread2,
+				offsetof(struct dispatch_queue_s, dq_serialnum), 0);
+#if DISPATCH_USE_KEVENT_WORKLOOP
+	} else if (wq_supported & WORKQ_FEATURE_WORKLOOP) {
+		r = _pthread_workqueue_init_with_workloop(_dispatch_worker_thread2,
+				(pthread_workqueue_function_kevent_t)
+				_dispatch_kevent_worker_thread,
+				(pthread_workqueue_function_workloop_t)
+				_dispatch_workloop_worker_thread,
+				offsetof(struct dispatch_queue_s, dq_serialnum), 0);
+#endif // DISPATCH_USE_KEVENT_WORKLOOP
+#if DISPATCH_USE_KEVENT_WORKQUEUE
+	} else if (wq_supported & WORKQ_FEATURE_KEVENT) {
+		r = _pthread_workqueue_init_with_kevent(_dispatch_worker_thread2,
+				(pthread_workqueue_function_kevent_t)
+				_dispatch_kevent_worker_thread,
+				offsetof(struct dispatch_queue_s, dq_serialnum), 0);
+#endif
+	} else {
+		DISPATCH_INTERNAL_CRASH(wq_supported, "Missing Kevent WORKQ support");
+	}
+
+	if (r != 0) {
+		DISPATCH_INTERNAL_CRASH((r << 16) | wq_supported,
+				"Root queue initialization failed");
+	}
+#endif // DISPATCH_USE_INTERNAL_WORKQUEUE
+}
+
+DISPATCH_STATIC_GLOBAL(dispatch_once_t _dispatch_root_queues_pred);
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_root_queues_init(void)
+{
+	dispatch_once_f(&_dispatch_root_queues_pred, NULL,
+			_dispatch_root_queues_init_once);
+}
+
+DISPATCH_EXPORT DISPATCH_NOTHROW
+void
+libdispatch_init(void)
+{
+	dispatch_assert(sizeof(struct dispatch_apply_s) <=
+			DISPATCH_CONTINUATION_SIZE);
+
+	if (_dispatch_getenv_bool("LIBDISPATCH_STRICT", false)) {
+		_dispatch_mode |= DISPATCH_MODE_STRICT;
+	}
+#if HAVE_OS_FAULT_WITH_PAYLOAD && TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR
+	if (_dispatch_getenv_bool("LIBDISPATCH_NO_FAULTS", false)) {
+		_dispatch_mode |= DISPATCH_MODE_NO_FAULTS;
+	} else if (getpid() == 1 ||
+			!os_variant_has_internal_diagnostics("com.apple.libdispatch")) {
+		_dispatch_mode |= DISPATCH_MODE_NO_FAULTS;
+	}
+#endif // HAVE_OS_FAULT_WITH_PAYLOAD && TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR
+
+
+#if DISPATCH_DEBUG || DISPATCH_PROFILE
+#if DISPATCH_USE_KEVENT_WORKQUEUE
+	if (getenv("LIBDISPATCH_DISABLE_KEVENT_WQ")) {
+		_dispatch_kevent_workqueue_enabled = false;
+	}
+#endif
+#endif
+
+#if HAVE_PTHREAD_WORKQUEUE_QOS
+	dispatch_qos_t qos = _dispatch_qos_from_qos_class(qos_class_main());
+	_dispatch_main_q.dq_priority = _dispatch_priority_make(qos, 0);
+#if DISPATCH_DEBUG
+	if (!getenv("LIBDISPATCH_DISABLE_SET_QOS")) {
+		_dispatch_set_qos_class_enabled = 1;
+	}
+#endif
+#endif
+
+#if DISPATCH_USE_THREAD_LOCAL_STORAGE
+	_dispatch_thread_key_create(&__dispatch_tsd_key, _libdispatch_tsd_cleanup);
+#else
+	_dispatch_thread_key_create(&dispatch_priority_key, NULL);
+	_dispatch_thread_key_create(&dispatch_r2k_key, NULL);
+	_dispatch_thread_key_create(&dispatch_queue_key, _dispatch_queue_cleanup);
+	_dispatch_thread_key_create(&dispatch_frame_key, _dispatch_frame_cleanup);
+	_dispatch_thread_key_create(&dispatch_cache_key, _dispatch_cache_cleanup);
+	_dispatch_thread_key_create(&dispatch_context_key, _dispatch_context_cleanup);
+	_dispatch_thread_key_create(&dispatch_pthread_root_queue_observer_hooks_key,
+			NULL);
+	_dispatch_thread_key_create(&dispatch_basepri_key, NULL);
+#if DISPATCH_INTROSPECTION
+	_dispatch_thread_key_create(&dispatch_introspection_key , NULL);
+#elif DISPATCH_PERF_MON
+	_dispatch_thread_key_create(&dispatch_bcounter_key, NULL);
+#endif
+	_dispatch_thread_key_create(&dispatch_wlh_key, _dispatch_wlh_cleanup);
+	_dispatch_thread_key_create(&dispatch_voucher_key, _voucher_thread_cleanup);
+	_dispatch_thread_key_create(&dispatch_deferred_items_key,
+			_dispatch_deferred_items_cleanup);
+#endif
+
+#if DISPATCH_USE_RESOLVERS // rdar://problem/8541707
+	_dispatch_main_q.do_targetq = _dispatch_get_default_queue(true);
+#endif
+
+	_dispatch_queue_set_current(&_dispatch_main_q);
+	_dispatch_queue_set_bound_thread(&_dispatch_main_q);
+
+#if DISPATCH_USE_PTHREAD_ATFORK
+	(void)dispatch_assume_zero(pthread_atfork(dispatch_atfork_prepare,
+			dispatch_atfork_parent, dispatch_atfork_child));
+#endif
+	_dispatch_hw_config_init();
+	_dispatch_time_init();
+	_dispatch_vtable_init();
+	_os_object_init();
+	_voucher_init();
+	_dispatch_introspection_init();
+}
+
+#if DISPATCH_USE_THREAD_LOCAL_STORAGE
+#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
+#include <unistd.h>
+#endif
+#if !defined(_WIN32)
+#include <sys/syscall.h>
+#endif
+
+#ifndef __ANDROID__
+#ifdef SYS_gettid
+DISPATCH_ALWAYS_INLINE
+static inline pid_t
+_gettid(void)
+{
+	return (pid_t)syscall(SYS_gettid);
+}
+#elif defined(__FreeBSD__)
+DISPATCH_ALWAYS_INLINE
+static inline pid_t
+_gettid(void)
+{
+	return (pid_t)pthread_getthreadid_np();
+}
+#elif defined(_WIN32)
+DISPATCH_ALWAYS_INLINE
+static inline DWORD
+_gettid(void)
+{
+	return GetCurrentThreadId();
+}
+#else
+#error "SYS_gettid unavailable on this system"
+#endif /* SYS_gettid */
+#endif /* ! __ANDROID__ */
+
+#define _tsd_call_cleanup(k, f)  do { \
+		if ((f) && tsd->k) ((void(*)(void*))(f))(tsd->k); \
+	} while (0)
+
+#ifdef __ANDROID__
+static void (*_dispatch_thread_detach_callback)(void);
+
+void
+_dispatch_install_thread_detach_callback(void (*cb)(void))
+{
+	if (os_atomic_xchg(&_dispatch_thread_detach_callback, cb, relaxed)) {
+		DISPATCH_CLIENT_CRASH(0, "Installing a thread detach callback twice");
+	}
+}
+#endif
+
+#if defined(_WIN32)
+static bool
+_dispatch_process_is_exiting(void)
+{
+   // The goal here is to detect if the current thread is executing cleanup
+   // code (e.g. FLS destructors) as a result of calling ExitProcess(). Windows
+   // doesn't provide an official method of getting this information, so we
+   // take advantage of how ExitProcess() works internally. The first thing
+   // that it does (according to MSDN) is terminate every other thread in the
+   // process. Logically, it should not be possible to create more threads
+   // after this point, and Windows indeed enforces this. Try to create a
+   // lightweight suspended thread, and if access is denied, assume that this
+   // is because the process is exiting.
+   //
+   // We aren't worried about any race conditions here during process exit.
+   // Cleanup code is only run on the thread that already called ExitProcess(),
+   // and every other thread will have been forcibly terminated by the time
+   // that happens. Additionally, while CreateThread() could conceivably fail
+   // due to resource exhaustion, the process would already be in a bad state
+   // if that happens. This is only intended to prevent unwanted cleanup code
+   // from running, so the worst case is that a thread doesn't clean up after
+   // itself when the process is about to die anyway.
+   const size_t stack_size = 1;  // As small as possible
+   HANDLE thread = CreateThread(NULL, stack_size, NULL, NULL,
+           CREATE_SUSPENDED | STACK_SIZE_PARAM_IS_A_RESERVATION, NULL);
+   if (thread) {
+       // Although Microsoft recommends against using TerminateThread, it's
+       // safe to use it here because we know that the thread is suspended and
+       // it has not executed any code due to a NULL lpStartAddress. There was
+       // a bug in Windows Server 2003 and Windows XP where the initial stack
+       // would not be freed, but libdispatch does not support them anyway.
+       TerminateThread(thread, 0);
+       CloseHandle(thread);
+       return false;
+   }
+   return GetLastError() == ERROR_ACCESS_DENIED;
+}
+#endif // defined(_WIN32)
+
+
+void DISPATCH_TSD_DTOR_CC
+_libdispatch_tsd_cleanup(void *ctx)
+{
+#if defined(_WIN32)
+   // On Windows, exiting a process will still call FLS destructors for the
+   // thread that called ExitProcess(). pthreads-based platforms don't call key
+   // destructors on exit, so be consistent.
+   if (_dispatch_process_is_exiting()) {
+       return;
+   }
+#endif // defined(_WIN32)
+
+	struct dispatch_tsd *tsd = (struct dispatch_tsd*) ctx;
+
+	_tsd_call_cleanup(dispatch_priority_key, NULL);
+	_tsd_call_cleanup(dispatch_r2k_key, NULL);
+
+	_tsd_call_cleanup(dispatch_queue_key, _dispatch_queue_cleanup);
+	_tsd_call_cleanup(dispatch_frame_key, _dispatch_frame_cleanup);
+	_tsd_call_cleanup(dispatch_cache_key, _dispatch_cache_cleanup);
+	_tsd_call_cleanup(dispatch_context_key, _dispatch_context_cleanup);
+	_tsd_call_cleanup(dispatch_pthread_root_queue_observer_hooks_key,
+			NULL);
+	_tsd_call_cleanup(dispatch_basepri_key, NULL);
+#if DISPATCH_INTROSPECTION
+	_tsd_call_cleanup(dispatch_introspection_key, NULL);
+#elif DISPATCH_PERF_MON
+	_tsd_call_cleanup(dispatch_bcounter_key, NULL);
+#endif
+	_tsd_call_cleanup(dispatch_wlh_key, _dispatch_wlh_cleanup);
+	_tsd_call_cleanup(dispatch_voucher_key, _voucher_thread_cleanup);
+	_tsd_call_cleanup(dispatch_deferred_items_key,
+			_dispatch_deferred_items_cleanup);
+#ifdef __ANDROID__
+	if (_dispatch_thread_detach_callback) {
+		_dispatch_thread_detach_callback();
+	}
+#endif
+	tsd->tid = 0;
+}
+
+DISPATCH_NOINLINE
+void
+libdispatch_tsd_init(void)
+{
+#if !defined(_WIN32)
+	pthread_setspecific(__dispatch_tsd_key, &__dispatch_tsd);
+#else
+	FlsSetValue(__dispatch_tsd_key, &__dispatch_tsd);
+#endif // defined(_WIN32)
+	__dispatch_tsd.tid = _gettid();
+}
+#endif
+
+DISPATCH_NOTHROW
+void
+_dispatch_queue_atfork_child(void)
+{
+	dispatch_queue_main_t main_q = &_dispatch_main_q;
+	void *crash = (void *)0x100;
+	size_t i;
+
+	if (_dispatch_queue_is_thread_bound(main_q)) {
+		_dispatch_queue_set_bound_thread(main_q);
+	}
+
+	if (!_dispatch_is_multithreaded_inline()) return;
+
+	main_q->dq_items_head = crash;
+	main_q->dq_items_tail = crash;
+
+	_dispatch_mgr_q.dq_items_head = crash;
+	_dispatch_mgr_q.dq_items_tail = crash;
+
+	for (i = 0; i < DISPATCH_ROOT_QUEUE_COUNT; i++) {
+		_dispatch_root_queues[i].dq_items_head = crash;
+		_dispatch_root_queues[i].dq_items_tail = crash;
+	}
+}
+
+DISPATCH_NOINLINE
+void
+_dispatch_fork_becomes_unsafe_slow(void)
+{
+	uint8_t value = os_atomic_or(&_dispatch_unsafe_fork,
+			_DISPATCH_UNSAFE_FORK_MULTITHREADED, relaxed);
+	if (value & _DISPATCH_UNSAFE_FORK_PROHIBIT) {
+		DISPATCH_CLIENT_CRASH(0, "Transition to multithreaded is prohibited");
+	}
+}
+
+DISPATCH_NOINLINE
+void
+_dispatch_prohibit_transition_to_multithreaded(bool prohibit)
+{
+	if (prohibit) {
+		uint8_t value = os_atomic_or(&_dispatch_unsafe_fork,
+				_DISPATCH_UNSAFE_FORK_PROHIBIT, relaxed);
+		if (value & _DISPATCH_UNSAFE_FORK_MULTITHREADED) {
+			DISPATCH_CLIENT_CRASH(0, "The executable is already multithreaded");
+		}
+	} else {
+		os_atomic_and(&_dispatch_unsafe_fork,
+				(uint8_t)~_DISPATCH_UNSAFE_FORK_PROHIBIT, relaxed);
+	}
+}
diff --git a/src/queue_internal.h b/src/queue_internal.h
index 1a590e2..ce235f4 100644
--- a/src/queue_internal.h
+++ b/src/queue_internal.h
@@ -32,24 +32,8 @@
 #include <dispatch/base.h> // for HeaderDoc
 #endif
 
-#if defined(__BLOCKS__) && !defined(DISPATCH_ENABLE_PTHREAD_ROOT_QUEUES)
-#define DISPATCH_ENABLE_PTHREAD_ROOT_QUEUES 1 // <rdar://problem/10719357>
-#endif
-
-/* x86 & cortex-a8 have a 64 byte cacheline */
-#define DISPATCH_CACHELINE_SIZE 64u
-#define ROUND_UP_TO_CACHELINE_SIZE(x) \
-		(((x) + (DISPATCH_CACHELINE_SIZE - 1u)) & \
-		~(DISPATCH_CACHELINE_SIZE - 1u))
-#define DISPATCH_CACHELINE_ALIGN \
-		__attribute__((__aligned__(DISPATCH_CACHELINE_SIZE)))
-
-#define DISPATCH_CACHELINE_PAD_SIZE(type) \
-		(roundup(sizeof(type), DISPATCH_CACHELINE_SIZE) - sizeof(type))
-
-
 #pragma mark -
-#pragma mark dispatch_queue_t
+#pragma mark dispatch_queue_flags, dq_state
 
 DISPATCH_ENUM(dispatch_queue_flags, uint32_t,
 	DQF_NONE                = 0x00000000,
@@ -59,94 +43,61 @@
 	DQF_THREAD_BOUND        = 0x00040000, // queue is bound to a thread
 	DQF_BARRIER_BIT         = 0x00080000, // queue is a barrier on its target
 	DQF_TARGETED            = 0x00100000, // queue is targeted by another object
-	DQF_LABEL_NEEDS_FREE    = 0x00200000, // queue label was strduped; need to free it
-	DQF_CANNOT_TRYSYNC      = 0x00400000,
+	DQF_LABEL_NEEDS_FREE    = 0x00200000, // queue label was strdup()ed
+	DQF_MUTABLE             = 0x00400000,
 	DQF_RELEASED            = 0x00800000, // xref_cnt == -1
-	DQF_LEGACY              = 0x01000000,
 
-	// only applies to sources
 	//
-	// Assuming DSF_ARMED (a), DSF_DEFERRED_DELETE (p), DSF_DELETED (d):
+	// Only applies to sources
 	//
-	// ---
-	// a--
-	//    source states for regular operations
-	//    (delivering event / waiting for event)
+	// @const DSF_STRICT
+	// Semantics of the source are strict (implies DQF_MUTABLE being unset):
+	// - handlers can't be changed past activation
+	// - EV_VANISHED causes a hard failure
+	// - source can't change WLH
 	//
-	// ap-
-	//    Either armed for deferred deletion delivery, waiting for an EV_DELETE,
-	//    and the next state will be -pd (EV_DELETE delivered),
-	//    Or, a cancellation raced with an event delivery and failed
-	//    (EINPROGRESS), and when the event delivery happens, the next state
-	//    will be -p-.
+	// @const DSF_WLH_CHANGED
+	// The wlh for the source changed (due to retarget past activation).
+	// Only used for debugging and diagnostics purposes.
 	//
-	// -pd
-	//    Received EV_DELETE (from ap-), needs to unregister ds_refs, the muxnote
-	//    is gone from the kernel. Next state will be --d.
+	// @const DSF_CANCELED
+	// Explicit cancelation has been requested.
 	//
-	// -p-
-	//    Received an EV_ONESHOT event (from a--), or the delivery of an event
-	//    causing the cancellation to fail with EINPROGRESS was delivered
-	//    (from ap-). The muxnote still lives, next state will be --d.
+	// @const DSF_CANCEL_WAITER
+	// At least one caller of dispatch_source_cancel_and_wait() is waiting on
+	// the cancelation to finish. DSF_CANCELED must be set if this bit is set.
 	//
-	// --d
-	//    Final state of the source, the muxnote is gone from the kernel and
-	//    ds_refs is unregistered. The source can safely be released.
+	// @const DSF_NEEDS_EVENT
+	// The source has started to delete its unotes due to cancelation, but
+	// couldn't finish its unregistration and is waiting for some asynchronous
+	// events to fire to be able to.
 	//
-	// a-d (INVALID)
-	// apd (INVALID)
-	//    Setting DSF_DELETED should also always atomically clear DSF_ARMED. If
-	//    the muxnote is gone from the kernel, it makes no sense whatsoever to
-	//    have it armed. And generally speaking, once `d` or `p` has been set,
-	//    `a` cannot do a cleared -> set transition anymore
-	//    (see _dispatch_source_try_set_armed).
+	// This flag prevents spurious wakeups when the source state machine
+	// requires specific events to make progress. Events that are likely
+	// to unblock a source state machine pass DISPATCH_WAKEUP_EVENT
+	// which neuters the effect of DSF_NEEDS_EVENT.
 	//
-	DSF_WLH_CHANGED         = 0x04000000,
-	DSF_CANCEL_WAITER       = 0x08000000, // synchronous waiters for cancel
-	DSF_CANCELED            = 0x10000000, // cancellation has been requested
-	DSF_ARMED               = 0x20000000, // source is armed
-	DSF_DEFERRED_DELETE     = 0x40000000, // source is pending delete
-	DSF_DELETED             = 0x80000000, // source muxnote is deleted
-#define DSF_STATE_MASK (DSF_ARMED | DSF_DEFERRED_DELETE | DSF_DELETED)
+	// @const DSF_DELETED
+	// The source can now only be used as a queue and is not allowed to register
+	// any new unote anymore. All the previously registered unotes are inactive
+	// and their knote is gone. However, these previously registered unotes may
+	// still be in the process of delivering their last event.
+	//
+	// Sources have an internal refcount taken always while they use eventing
+	// subsystems which is consumed when this bit is set.
+	//
+	DSF_STRICT              = 0x04000000,
+	DSF_WLH_CHANGED         = 0x08000000,
+	DSF_CANCELED            = 0x10000000,
+	DSF_CANCEL_WAITER       = 0x20000000,
+	DSF_NEEDS_EVENT         = 0x40000000,
+	DSF_DELETED             = 0x80000000,
 
 #define DQF_FLAGS_MASK        ((dispatch_queue_flags_t)0xffff0000)
 #define DQF_WIDTH_MASK        ((dispatch_queue_flags_t)0x0000ffff)
 #define DQF_WIDTH(n)          ((dispatch_queue_flags_t)(uint16_t)(n))
 );
 
-#define _DISPATCH_QUEUE_HEADER(x) \
-	struct os_mpsc_queue_s _as_oq[0]; \
-	DISPATCH_OBJECT_HEADER(x); \
-	_OS_MPSC_QUEUE_FIELDS(dq, dq_state); \
-	uint32_t dq_side_suspend_cnt; \
-	dispatch_unfair_lock_s dq_sidelock; \
-	union { \
-		dispatch_queue_t dq_specific_q; \
-		struct dispatch_source_refs_s *ds_refs; \
-		struct dispatch_timer_source_refs_s *ds_timer_refs; \
-		struct dispatch_mach_recv_refs_s *dm_recv_refs; \
-	}; \
-	DISPATCH_UNION_LE(uint32_t volatile dq_atomic_flags, \
-		const uint16_t dq_width, \
-		const uint16_t __dq_opaque \
-	); \
-	DISPATCH_INTROSPECTION_QUEUE_HEADER
-	/* LP64: 32bit hole */
-
-#define DISPATCH_QUEUE_HEADER(x) \
-	struct dispatch_queue_s _as_dq[0]; \
-	_DISPATCH_QUEUE_HEADER(x)
-
-struct _dispatch_unpadded_queue_s {
-	_DISPATCH_QUEUE_HEADER(dummy);
-};
-
-#define DISPATCH_QUEUE_CACHELINE_PAD \
-		DISPATCH_CACHELINE_PAD_SIZE(struct _dispatch_unpadded_queue_s)
-
-#define DISPATCH_QUEUE_CACHELINE_PADDING \
-		char _dq_pad[DISPATCH_QUEUE_CACHELINE_PAD]
-
 /*
  * dispatch queues `dq_state` demystified
  *
@@ -240,12 +191,12 @@
  *
  *    When done, any "Drainer", in particular for dispatch_*_sync() handoff
  *    paths, exits in 3 steps, and the point of the DIRTY bit is to make
- *    the Drainers take the slowpath at step 2 to take into account enqueuers
+ *    the Drainers take the slow path at step 2 to take into account enqueuers
  *    that could have made the queue non idle concurrently.
  *
  *    <code>
  *        // drainer-exit step 1
- *        if (slowpath(dq->dq_items_tail)) { // speculative test
+ *        if (unlikely(dq->dq_items_tail)) { // speculative test
  *            return handle_non_empty_queue_or_wakeup(dq);
  *        }
  *        // drainer-exit step 2
@@ -487,7 +438,7 @@
 		((DISPATCH_QUEUE_WIDTH_FULL - (width)) << DISPATCH_QUEUE_WIDTH_SHIFT)
 
 /* Magic dq_state values for global queues: they have QUEUE_FULL and IN_BARRIER
- * set to force the slowpath in both dispatch_barrier_sync() and dispatch_sync()
+ * set to force the slow path in dispatch_barrier_sync() and dispatch_sync()
  */
 #define DISPATCH_ROOT_QUEUE_STATE_INIT_VALUE \
 		(DISPATCH_QUEUE_WIDTH_FULL_BIT | DISPATCH_QUEUE_IN_BARRIER)
@@ -495,43 +446,299 @@
 #define DISPATCH_QUEUE_SERIAL_DRAIN_OWNED \
 		(DISPATCH_QUEUE_IN_BARRIER | DISPATCH_QUEUE_WIDTH_INTERVAL)
 
-DISPATCH_CLASS_DECL(queue);
+#pragma mark -
+#pragma mark dispatch_queue_t
 
-#if !defined(__cplusplus) || !DISPATCH_INTROSPECTION
+typedef struct dispatch_queue_specific_s {
+	const void *dqs_key;
+	void *dqs_ctxt;
+	dispatch_function_t dqs_destructor;
+	TAILQ_ENTRY(dispatch_queue_specific_s) dqs_entry;
+} *dispatch_queue_specific_t;
+
+typedef struct dispatch_queue_specific_head_s {
+	dispatch_unfair_lock_s dqsh_lock;
+	TAILQ_HEAD(, dispatch_queue_specific_s) dqsh_entries;
+} *dispatch_queue_specific_head_t;
+
+#define DISPATCH_WORKLOOP_ATTR_HAS_SCHED 0x1u
+#define DISPATCH_WORKLOOP_ATTR_HAS_POLICY 0x2u
+#define DISPATCH_WORKLOOP_ATTR_HAS_CPUPERCENT 0x4u
+#define DISPATCH_WORKLOOP_ATTR_HAS_QOS_CLASS 0x8u
+#define DISPATCH_WORKLOOP_ATTR_NEEDS_DESTROY 0x10u
+typedef struct dispatch_workloop_attr_s *dispatch_workloop_attr_t;
+typedef struct dispatch_workloop_attr_s {
+	uint32_t dwla_flags;
+	dispatch_priority_t dwla_pri;
+#if TARGET_OS_MAC
+	struct sched_param dwla_sched;
+#endif // TARGET_OS_MAC
+	int dwla_policy;
+	struct {
+		uint8_t percent;
+		uint32_t refillms;
+	} dwla_cpupercent;
+} dispatch_workloop_attr_s;
+
+/*
+ * Dispatch Queue cluster related types
+ *
+ * The dispatch queue cluster uses aliasing structs, and loosely follows the
+ * external types exposed in <dispatch/queue.h>
+ *
+ * The API types pretend to have this hierarchy:
+ *
+ * dispatch_queue_t
+ *  +--> dispatch_workloop_t
+ *  +--> dispatch_queue_serial_t --> dispatch_queue_main_t
+ *  +--> dispatch_queue_concurrent_t
+ *  '--> dispatch_queue_global_t
+ *
+ *
+ * However, in the library itself, there are more types and a finer grained
+ * hierarchy when it comes to the struct members.
+ *
+ * dispatch_queue_class_t / struct dispatch_queue_s
+ *  +--> struct dispatch_workloop_s
+ *  '--> dispatch_lane_class_t
+ *        +--> struct dispatch_lane_s
+ *        |     +--> struct dispatch_source_s
+ *        |     '--> struct dispatch_mach_s
+ *        +--> struct dispatch_queue_static_s
+ *        '--> struct dispatch_queue_global_s
+ *              +--> struct dispatch_queue_pthread_root_s
+ *
+ *
+ * dispatch_queue_class_t && struct dispatch_queue_s
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * The queue class type is a transparent union of all queue types, which allows
+ * cutting down the explicit downcasts to `dispatch_queue_t` when calling
+ * a function working on any dispatch_queue_t type.
+ *
+ * The concrete struct layout is struct dispatch_queue_s
+ * it provides:
+ * - dispatch object fields
+ * - dq_state
+ * - dq_serialnum
+ * - dq_label
+ * - dq_atomic_flags
+ * - dq_sref_cnt
+ * - an auxiliary pointer used by sub-classes (dq_specific_head, ds_refs, ...)
+ * - dq_priority (XXX: we should push it down to lanes)
+ *
+ * It also provides storage for one opaque pointer sized field.
+ *
+ * dispatch_lane_class_t
+ * ~~~~~~~~~~~~~~~~~~~~~
+ *
+ * The lane class type is a transparent union of all "lane" types, which have
+ * a single head/tail pair.
+ *
+ * There's no proper concrete struct layout associated, `struct dispatch_lane_s`
+ * is used most of the time instead. The lane class adds:
+ * - dq_items_head
+ * - dq_items_tail (allocated in the hole the queue class carves out)
+ *
+ *
+ * struct dispatch_lane_s and variants
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This is the concrete type used for:
+ * - API serial/concurrent/runloop queues
+ * - sources and mach channels
+ * - the main and manager queues, as struct dispatch_queue_static_s which is
+ *   a cacheline aligned variant of struct dispatch_lane_s.
+ *
+ * It also provides:
+ * - dq_sidelock, used for suspension & target queue handling,
+ * - dq_side_suspend_cnt.
+ *
+ * Sources (struct dispatch_source_s) and mach channels (struct dispatch_mach_s)
+ * use the last 32bit word for flags private to their use.
+ *
+ * struct dispatch_queue_global_s is used for all dispatch root queues:
+ * - global concurent queues
+ * - pthread root queues
+ * - the network event thread
+ *
+ * These pretend to derive from dispatch_lane_s but use the dq_sidelock,
+ * dq_side_suspend_cnt differently, which is possible because root queues cannot
+ * be targetted or suspended and hence have no use for these.
+ */
+
+#if OS_OBJECT_HAVE_OBJC1
+#define _DISPATCH_QUEUE_CLASS_HEADER(x, __pointer_sized_field__) \
+	DISPATCH_OBJECT_HEADER(x); \
+	DISPATCH_UNION_LE(uint64_t volatile dq_state, \
+			dispatch_lock dq_state_lock, \
+			uint32_t dq_state_bits \
+	); \
+	__pointer_sized_field__
+#else
+#define _DISPATCH_QUEUE_CLASS_HEADER(x, __pointer_sized_field__) \
+	DISPATCH_OBJECT_HEADER(x); \
+	__pointer_sized_field__; \
+	DISPATCH_UNION_LE(uint64_t volatile dq_state, \
+			dispatch_lock dq_state_lock, \
+			uint32_t dq_state_bits \
+	)
+#endif
+
+#define DISPATCH_QUEUE_CLASS_HEADER(x, __pointer_sized_field__) \
+	_DISPATCH_QUEUE_CLASS_HEADER(x, __pointer_sized_field__); \
+	/* LP64 global queue cacheline boundary */ \
+	unsigned long dq_serialnum; \
+	const char *dq_label; \
+	DISPATCH_UNION_LE(uint32_t volatile dq_atomic_flags, \
+		const uint16_t dq_width, \
+		const uint16_t __dq_opaque2 \
+	); \
+	dispatch_priority_t dq_priority; \
+	union { \
+		struct dispatch_queue_specific_head_s *dq_specific_head; \
+		struct dispatch_source_refs_s *ds_refs; \
+		struct dispatch_timer_source_refs_s *ds_timer_refs; \
+		struct dispatch_mach_recv_refs_s *dm_recv_refs; \
+	}; \
+	int volatile dq_sref_cnt
+
 struct dispatch_queue_s {
-	_DISPATCH_QUEUE_HEADER(queue);
-	DISPATCH_QUEUE_CACHELINE_PADDING; // for static queues only
+	DISPATCH_QUEUE_CLASS_HEADER(queue, void *__dq_opaque1);
+	/* 32bit hole on LP64 */
 } DISPATCH_ATOMIC64_ALIGN;
 
-#if __has_feature(c_static_assert) && !DISPATCH_INTROSPECTION
-_Static_assert(sizeof(struct dispatch_queue_s) <= 128, "dispatch queue size");
+struct dispatch_workloop_s {
+	struct dispatch_queue_s _as_dq[0];
+	DISPATCH_QUEUE_CLASS_HEADER(workloop, dispatch_timer_heap_t dwl_timer_heap);
+	uint8_t dwl_drained_qos;
+	/* 24 bits hole */
+	struct dispatch_object_s *dwl_heads[DISPATCH_QOS_NBUCKETS];
+	struct dispatch_object_s *dwl_tails[DISPATCH_QOS_NBUCKETS];
+	dispatch_workloop_attr_t dwl_attr;
+} DISPATCH_ATOMIC64_ALIGN;
+
+#define DISPATCH_LANE_CLASS_HEADER(x) \
+	struct dispatch_queue_s _as_dq[0]; \
+	DISPATCH_QUEUE_CLASS_HEADER(x, \
+			struct dispatch_object_s *volatile dq_items_tail); \
+	dispatch_unfair_lock_s dq_sidelock; \
+	struct dispatch_object_s *volatile dq_items_head; \
+	uint32_t dq_side_suspend_cnt
+
+typedef struct dispatch_lane_s {
+	DISPATCH_LANE_CLASS_HEADER(lane);
+	/* 32bit hole on LP64 */
+} DISPATCH_ATOMIC64_ALIGN *dispatch_lane_t;
+
+// Cache aligned type for static queues (main queue, manager)
+struct dispatch_queue_static_s {
+	struct dispatch_lane_s _as_dl[0]; \
+	DISPATCH_LANE_CLASS_HEADER(lane);
+} DISPATCH_CACHELINE_ALIGN;
+
+#define DISPATCH_QUEUE_ROOT_CLASS_HEADER(x) \
+	struct dispatch_queue_s _as_dq[0]; \
+	DISPATCH_QUEUE_CLASS_HEADER(x, \
+			struct dispatch_object_s *volatile dq_items_tail); \
+	int volatile dgq_thread_pool_size; \
+	struct dispatch_object_s *volatile dq_items_head; \
+	int volatile dgq_pending
+
+struct dispatch_queue_global_s {
+	DISPATCH_QUEUE_ROOT_CLASS_HEADER(lane);
+} DISPATCH_CACHELINE_ALIGN;
+
+
+typedef struct dispatch_pthread_root_queue_observer_hooks_s {
+	void (*queue_will_execute)(dispatch_queue_t queue);
+	void (*queue_did_execute)(dispatch_queue_t queue);
+} dispatch_pthread_root_queue_observer_hooks_s;
+typedef dispatch_pthread_root_queue_observer_hooks_s
+		*dispatch_pthread_root_queue_observer_hooks_t;
+
+#ifdef __APPLE__
+#define DISPATCH_IOHID_SPI 1
+
+DISPATCH_EXPORT DISPATCH_MALLOC DISPATCH_RETURNS_RETAINED DISPATCH_WARN_RESULT
+DISPATCH_NOTHROW DISPATCH_NONNULL4
+dispatch_queue_global_t
+_dispatch_pthread_root_queue_create_with_observer_hooks_4IOHID(
+	const char *label, unsigned long flags, const pthread_attr_t *attr,
+	dispatch_pthread_root_queue_observer_hooks_t observer_hooks,
+	dispatch_block_t configure);
+
+DISPATCH_EXPORT DISPATCH_PURE DISPATCH_WARN_RESULT DISPATCH_NOTHROW
+bool
+_dispatch_queue_is_exclusively_owned_by_current_thread_4IOHID(
+		dispatch_queue_t queue);
+
+#endif // __APPLE__
+
+#if DISPATCH_USE_PTHREAD_POOL
+typedef struct dispatch_pthread_root_queue_context_s {
+#if !defined(_WIN32)
+	pthread_attr_t dpq_thread_attr;
 #endif
-#endif // !defined(__cplusplus) || !DISPATCH_INTROSPECTION
+	dispatch_block_t dpq_thread_configure;
+	struct dispatch_semaphore_s dpq_thread_mediator;
+	dispatch_pthread_root_queue_observer_hooks_s dpq_observer_hooks;
+} *dispatch_pthread_root_queue_context_t;
+#endif // DISPATCH_USE_PTHREAD_POOL
 
-DISPATCH_INTERNAL_SUBCLASS_DECL(queue_serial, queue);
-DISPATCH_INTERNAL_SUBCLASS_DECL(queue_concurrent, queue);
-DISPATCH_INTERNAL_SUBCLASS_DECL(queue_main, queue);
-DISPATCH_INTERNAL_SUBCLASS_DECL(queue_root, queue);
-DISPATCH_INTERNAL_SUBCLASS_DECL(queue_runloop, queue);
-DISPATCH_INTERNAL_SUBCLASS_DECL(queue_mgr, queue);
+#if DISPATCH_USE_PTHREAD_ROOT_QUEUES
+typedef struct dispatch_queue_pthread_root_s {
+	struct dispatch_queue_global_s _as_dgq[0];
+	DISPATCH_QUEUE_ROOT_CLASS_HEADER(lane);
+	struct dispatch_pthread_root_queue_context_s dpq_ctxt;
+} *dispatch_queue_pthread_root_t;
+#endif // DISPATCH_USE_PTHREAD_ROOT_QUEUES
 
-OS_OBJECT_INTERNAL_CLASS_DECL(dispatch_queue_specific_queue, dispatch_queue,
-		DISPATCH_OBJECT_VTABLE_HEADER(dispatch_queue_specific_queue));
+dispatch_static_assert(sizeof(struct dispatch_queue_s) <= 128);
+dispatch_static_assert(sizeof(struct dispatch_lane_s) <= 128);
+dispatch_static_assert(sizeof(struct dispatch_queue_global_s) <= 128);
+dispatch_static_assert(offsetof(struct dispatch_queue_s, dq_state) %
+		sizeof(uint64_t) == 0, "dq_state must be 8-byte aligned");
 
-typedef union {
-	struct os_mpsc_queue_s *_oq;
-	struct dispatch_queue_s *_dq;
-	struct dispatch_source_s *_ds;
-	struct dispatch_mach_s *_dm;
-	struct dispatch_queue_specific_queue_s *_dqsq;
-#if USE_OBJC
-	os_mpsc_queue_t _ojbc_oq;
-	dispatch_queue_t _objc_dq;
-	dispatch_source_t _objc_ds;
-	dispatch_mach_t _objc_dm;
-	dispatch_queue_specific_queue_t _objc_dqsq;
+#define dispatch_assert_valid_queue_type(type) \
+		dispatch_static_assert(sizeof(struct dispatch_queue_s) <= \
+				sizeof(struct type), #type " smaller than dispatch_queue_s"); \
+		dispatch_static_assert(_Alignof(struct type) >= sizeof(uint64_t), \
+				#type " is not 8-byte aligned"); \
+		dispatch_assert_aliases(dispatch_queue_s, type, dq_state); \
+		dispatch_assert_aliases(dispatch_queue_s, type, dq_serialnum); \
+		dispatch_assert_aliases(dispatch_queue_s, type, dq_label); \
+		dispatch_assert_aliases(dispatch_queue_s, type, dq_atomic_flags); \
+		dispatch_assert_aliases(dispatch_queue_s, type, dq_sref_cnt); \
+		dispatch_assert_aliases(dispatch_queue_s, type, dq_specific_head); \
+		dispatch_assert_aliases(dispatch_queue_s, type, dq_priority)
+
+#define dispatch_assert_valid_lane_type(type) \
+		dispatch_assert_valid_queue_type(type); \
+		dispatch_assert_aliases(dispatch_lane_s, type, dq_items_head); \
+		dispatch_assert_aliases(dispatch_lane_s, type, dq_items_tail)
+
+dispatch_assert_valid_queue_type(dispatch_lane_s);
+dispatch_assert_valid_lane_type(dispatch_queue_static_s);
+dispatch_assert_valid_lane_type(dispatch_queue_global_s);
+#if DISPATCH_USE_PTHREAD_ROOT_QUEUES
+dispatch_assert_valid_lane_type(dispatch_queue_pthread_root_s);
 #endif
-} dispatch_queue_class_t DISPATCH_TRANSPARENT_UNION;
+
+DISPATCH_CLASS_DECL(queue, QUEUE);
+DISPATCH_CLASS_DECL_BARE(lane, QUEUE);
+DISPATCH_CLASS_DECL(workloop, QUEUE);
+DISPATCH_SUBCLASS_DECL(queue_serial, queue, lane);
+DISPATCH_SUBCLASS_DECL(queue_main, queue_serial, lane);
+DISPATCH_SUBCLASS_DECL(queue_concurrent, queue, lane);
+DISPATCH_SUBCLASS_DECL(queue_global, queue, lane);
+#if DISPATCH_USE_PTHREAD_ROOT_QUEUES
+DISPATCH_INTERNAL_SUBCLASS_DECL(queue_pthread_root, queue, lane);
+#endif
+DISPATCH_INTERNAL_SUBCLASS_DECL(queue_runloop, queue_serial, lane);
+DISPATCH_INTERNAL_SUBCLASS_DECL(queue_mgr, queue_serial, lane);
+
+struct firehose_client_s;
 
 typedef struct dispatch_thread_context_s *dispatch_thread_context_t;
 typedef struct dispatch_thread_context_s {
@@ -540,98 +747,113 @@
 	union {
 		size_t dtc_apply_nesting;
 		dispatch_io_t dtc_io_in_barrier;
+		union firehose_buffer_u *dtc_fb;
+		void *dtc_mig_demux_ctx;
+		dispatch_mach_msg_t dtc_dmsg;
+		struct dispatch_ipc_handoff_s *dtc_dih;
 	};
 } dispatch_thread_context_s;
 
-typedef struct dispatch_thread_frame_s *dispatch_thread_frame_t;
-typedef struct dispatch_thread_frame_s {
-	// must be in the same order as our TSD keys!
-	dispatch_queue_t dtf_queue;
-	dispatch_thread_frame_t dtf_prev;
+typedef union dispatch_thread_frame_s *dispatch_thread_frame_t;
+typedef union dispatch_thread_frame_s {
+	struct {
+		// must be in the same order as our TSD keys!
+		dispatch_queue_t dtf_queue;
+		dispatch_thread_frame_t dtf_prev;
+	};
+	void *dtf_pair[2];
 } dispatch_thread_frame_s;
 
 typedef dispatch_queue_t dispatch_queue_wakeup_target_t;
 #define DISPATCH_QUEUE_WAKEUP_NONE           ((dispatch_queue_wakeup_target_t)0)
 #define DISPATCH_QUEUE_WAKEUP_TARGET         ((dispatch_queue_wakeup_target_t)1)
-#define DISPATCH_QUEUE_WAKEUP_MGR            (&_dispatch_mgr_q)
+#define DISPATCH_QUEUE_WAKEUP_MGR            (_dispatch_mgr_q._as_dq)
 #define DISPATCH_QUEUE_WAKEUP_WAIT_FOR_EVENT ((dispatch_queue_wakeup_target_t)-1)
 
-void _dispatch_queue_class_wakeup(dispatch_queue_t dqu, dispatch_qos_t qos,
+void _dispatch_queue_xref_dispose(dispatch_queue_class_t dq);
+void _dispatch_queue_wakeup(dispatch_queue_class_t dqu, dispatch_qos_t qos,
 		dispatch_wakeup_flags_t flags, dispatch_queue_wakeup_target_t target);
+void _dispatch_queue_invoke_finish(dispatch_queue_t dq,
+		dispatch_invoke_context_t dic, dispatch_queue_t tq, uint64_t owned);
+
 dispatch_priority_t _dispatch_queue_compute_priority_and_wlh(
-		dispatch_queue_t dq, dispatch_wlh_t *wlh_out);
-void _dispatch_queue_destroy(dispatch_queue_t dq, bool *allow_free);
-void _dispatch_queue_dispose(dispatch_queue_t dq, bool *allow_free);
-void _dispatch_queue_xref_dispose(struct dispatch_queue_s *dq);
-void _dispatch_queue_set_target_queue(dispatch_queue_t dq, dispatch_queue_t tq);
-void _dispatch_queue_suspend(dispatch_queue_t dq);
-void _dispatch_queue_resume(dispatch_queue_t dq, bool activate);
-void _dispatch_queue_finalize_activation(dispatch_queue_t dq,
-		bool *allow_resume);
-void _dispatch_queue_invoke(dispatch_queue_t dq,
+		dispatch_queue_class_t dq, dispatch_wlh_t *wlh_out);
+
+void _dispatch_lane_set_target_queue(dispatch_lane_t dq, dispatch_queue_t tq);
+void _dispatch_lane_class_dispose(dispatch_queue_class_t dq, bool *allow_free);
+void _dispatch_lane_dispose(dispatch_lane_class_t dq, bool *allow_free);
+void _dispatch_lane_suspend(dispatch_lane_class_t dq);
+void _dispatch_lane_resume(dispatch_lane_class_t dq, bool activate);
+void _dispatch_lane_activate(dispatch_lane_class_t dq, bool *allow_resume);
+void _dispatch_lane_invoke(dispatch_lane_class_t dq,
 		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags);
-void _dispatch_global_queue_poke(dispatch_queue_t dq, int n, int floor);
-void _dispatch_queue_push(dispatch_queue_t dq, dispatch_object_t dou,
+void _dispatch_lane_push(dispatch_lane_class_t dq, dispatch_object_t dou,
 		dispatch_qos_t qos);
-void _dispatch_queue_wakeup(dispatch_queue_t dq, dispatch_qos_t qos,
+void _dispatch_lane_concurrent_push(dispatch_lane_class_t dq,
+		dispatch_object_t dou, dispatch_qos_t qos);
+void _dispatch_lane_wakeup(dispatch_lane_class_t dq, dispatch_qos_t qos,
 		dispatch_wakeup_flags_t flags);
-dispatch_queue_wakeup_target_t _dispatch_queue_serial_drain(dispatch_queue_t dq,
-		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags,
-		uint64_t *owned);
-void _dispatch_queue_drain_sync_waiter(dispatch_queue_t dq,
-		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags,
-		uint64_t owned);
-void _dispatch_queue_specific_queue_dispose(
-		dispatch_queue_specific_queue_t dqsq, bool *allow_free);
-void _dispatch_root_queue_wakeup(dispatch_queue_t dq, dispatch_qos_t qos,
-		dispatch_wakeup_flags_t flags);
-void _dispatch_root_queue_push(dispatch_queue_t dq, dispatch_object_t dou,
+dispatch_queue_wakeup_target_t _dispatch_lane_serial_drain(
+		dispatch_lane_class_t dq, dispatch_invoke_context_t dic,
+		dispatch_invoke_flags_t flags, uint64_t *owned);
+
+void _dispatch_workloop_dispose(dispatch_workloop_t dwl, bool *allow_free);
+void _dispatch_workloop_activate(dispatch_workloop_t dwl);
+void _dispatch_workloop_invoke(dispatch_workloop_t dwl,
+		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags);
+void _dispatch_workloop_push(dispatch_workloop_t dwl, dispatch_object_t dou,
 		dispatch_qos_t qos);
-#if DISPATCH_USE_KEVENT_WORKQUEUE
-void _dispatch_root_queue_drain_deferred_item(dispatch_deferred_items_t ddi
-		DISPATCH_PERF_MON_ARGS_PROTO);
-void _dispatch_root_queue_drain_deferred_wlh(dispatch_deferred_items_t ddi
-		DISPATCH_PERF_MON_ARGS_PROTO);
-#endif
-void _dispatch_pthread_root_queue_dispose(dispatch_queue_t dq,
-		bool *allow_free);
-void _dispatch_main_queue_wakeup(dispatch_queue_t dq, dispatch_qos_t qos,
+void _dispatch_workloop_wakeup(dispatch_workloop_t dwl, dispatch_qos_t qos,
 		dispatch_wakeup_flags_t flags);
-void _dispatch_runloop_queue_wakeup(dispatch_queue_t dq, dispatch_qos_t qos,
+
+void _dispatch_root_queue_poke(dispatch_queue_global_t dq, int n, int floor);
+void _dispatch_root_queue_wakeup(dispatch_queue_global_t dq, dispatch_qos_t qos,
 		dispatch_wakeup_flags_t flags);
-void _dispatch_runloop_queue_xref_dispose(dispatch_queue_t dq);
-void _dispatch_runloop_queue_dispose(dispatch_queue_t dq, bool *allow_free);
-void _dispatch_mgr_queue_drain(void);
-#if DISPATCH_USE_MGR_THREAD && DISPATCH_ENABLE_PTHREAD_ROOT_QUEUES
-void _dispatch_mgr_priority_init(void);
-#else
-static inline void _dispatch_mgr_priority_init(void) {}
-#endif
+void _dispatch_root_queue_push(dispatch_queue_global_t dq,
+		dispatch_object_t dou, dispatch_qos_t qos);
 #if DISPATCH_USE_KEVENT_WORKQUEUE
 void _dispatch_kevent_workqueue_init(void);
-#else
-static inline void _dispatch_kevent_workqueue_init(void) {}
 #endif
+#if DISPATCH_USE_PTHREAD_ROOT_QUEUES
+void _dispatch_pthread_root_queue_dispose(dispatch_lane_class_t dq,
+		bool *allow_free);
+#endif // DISPATCH_USE_PTHREAD_ROOT_QUEUES
+void _dispatch_main_queue_push(dispatch_queue_main_t dq, dispatch_object_t dou,
+		dispatch_qos_t qos);
+void _dispatch_main_queue_wakeup(dispatch_queue_main_t dq, dispatch_qos_t qos,
+		dispatch_wakeup_flags_t flags);
+#if DISPATCH_COCOA_COMPAT
+void _dispatch_runloop_queue_wakeup(dispatch_lane_t dq,
+		dispatch_qos_t qos, dispatch_wakeup_flags_t flags);
+void _dispatch_runloop_queue_xref_dispose(dispatch_lane_t dq);
+void _dispatch_runloop_queue_dispose(dispatch_lane_t dq, bool *allow_free);
+#endif // DISPATCH_COCOA_COMPAT
+void _dispatch_mgr_queue_push(dispatch_lane_t dq, dispatch_object_t dou,
+		dispatch_qos_t qos);
+void _dispatch_mgr_queue_wakeup(dispatch_lane_t dq, dispatch_qos_t qos,
+		dispatch_wakeup_flags_t flags);
+#if DISPATCH_USE_MGR_THREAD
+void _dispatch_mgr_thread(dispatch_lane_t dq, dispatch_invoke_context_t dic,
+		dispatch_invoke_flags_t flags);
+#endif
+
 void _dispatch_apply_invoke(void *ctxt);
 void _dispatch_apply_redirect_invoke(void *ctxt);
-void _dispatch_barrier_async_detached_f(dispatch_queue_t dq, void *ctxt,
+void _dispatch_barrier_async_detached_f(dispatch_queue_class_t dq, void *ctxt,
 		dispatch_function_t func);
-void _dispatch_barrier_trysync_or_async_f(dispatch_queue_t dq, void *ctxt,
-		dispatch_function_t func);
+#define DISPATCH_BARRIER_TRYSYNC_SUSPEND 0x1
+void _dispatch_barrier_trysync_or_async_f(dispatch_lane_class_t dq, void *ctxt,
+		dispatch_function_t func, uint32_t flags);
 void _dispatch_queue_atfork_child(void);
 
-#if DISPATCH_DEBUG
-void dispatch_debug_queue(dispatch_queue_t dq, const char* str);
-#else
-static inline void dispatch_debug_queue(dispatch_queue_t dq DISPATCH_UNUSED,
-		const char* str DISPATCH_UNUSED) {}
-#endif
+DISPATCH_COLD
+size_t _dispatch_queue_debug(dispatch_queue_class_t dq,
+		char *buf, size_t bufsiz);
+DISPATCH_COLD
+size_t _dispatch_queue_debug_attr(dispatch_queue_t dq,
+		char *buf, size_t bufsiz);
 
-size_t dispatch_queue_debug(dispatch_queue_t dq, char* buf, size_t bufsiz);
-size_t _dispatch_queue_debug_attr(dispatch_queue_t dq, char* buf,
-		size_t bufsiz);
-
-#define DISPATCH_ROOT_QUEUE_COUNT (DISPATCH_QOS_MAX * 2)
+#define DISPATCH_ROOT_QUEUE_COUNT (DISPATCH_QOS_NBUCKETS * 2)
 
 // must be in lowest to highest qos order (as encoded in dispatch_qos_t)
 // overcommit qos index values need bit 1 set
@@ -656,16 +878,24 @@
 // 2 - mgr_q
 // 3 - mgr_root_q
 // 4,5,6,7,8,9,10,11,12,13,14,15 - global queues
+// 17 - workloop_fallback_q
 // we use 'xadd' on Intel, so the initial value == next assigned
-#define DISPATCH_QUEUE_SERIAL_NUMBER_INIT 16
+#define DISPATCH_QUEUE_SERIAL_NUMBER_INIT 17
 extern unsigned long volatile _dispatch_queue_serial_numbers;
-extern struct dispatch_queue_s _dispatch_root_queues[];
-extern struct dispatch_queue_s _dispatch_mgr_q;
-void _dispatch_root_queues_init(void);
+
+// mark the workloop fallback queue to avoid finalizing objects on the base
+// queue of custom outside-of-qos workloops
+#define DISPATCH_QUEUE_SERIAL_NUMBER_WLF 16
+
+extern struct dispatch_queue_static_s _dispatch_mgr_q; // serial 2
+#if DISPATCH_USE_MGR_THREAD && DISPATCH_USE_PTHREAD_ROOT_QUEUES
+extern struct dispatch_queue_global_s _dispatch_mgr_root_queue; // serial 3
+#endif
+extern struct dispatch_queue_global_s _dispatch_root_queues[]; // serials 4 - 15
 
 #if DISPATCH_DEBUG
 #define DISPATCH_ASSERT_ON_MANAGER_QUEUE() \
-       dispatch_assert_queue(&_dispatch_mgr_q)
+		dispatch_assert_queue(_dispatch_mgr_q._as_dq)
 #else
 #define DISPATCH_ASSERT_ON_MANAGER_QUEUE()
 #endif
@@ -673,75 +903,50 @@
 #pragma mark -
 #pragma mark dispatch_queue_attr_t
 
+DISPATCH_CLASS_DECL(queue_attr, OBJECT);
+struct dispatch_queue_attr_s {
+	OS_OBJECT_STRUCT_HEADER(dispatch_queue_attr);
+};
+
+typedef struct dispatch_queue_attr_info_s {
+	dispatch_qos_t dqai_qos : 8;
+	int      dqai_relpri : 8;
+	uint16_t dqai_overcommit:2;
+	uint16_t dqai_autorelease_frequency:2;
+	uint16_t dqai_concurrent:1;
+	uint16_t dqai_inactive:1;
+} dispatch_queue_attr_info_t;
+
 typedef enum {
 	_dispatch_queue_attr_overcommit_unspecified = 0,
 	_dispatch_queue_attr_overcommit_enabled,
 	_dispatch_queue_attr_overcommit_disabled,
 } _dispatch_queue_attr_overcommit_t;
 
-DISPATCH_CLASS_DECL(queue_attr);
-struct dispatch_queue_attr_s {
-	OS_OBJECT_STRUCT_HEADER(dispatch_queue_attr);
-	dispatch_priority_requested_t dqa_qos_and_relpri;
-	uint16_t dqa_overcommit:2;
-	uint16_t dqa_autorelease_frequency:2;
-	uint16_t dqa_concurrent:1;
-	uint16_t dqa_inactive:1;
-};
-
-enum {
-	DQA_INDEX_UNSPECIFIED_OVERCOMMIT = 0,
-	DQA_INDEX_NON_OVERCOMMIT,
-	DQA_INDEX_OVERCOMMIT,
-};
-
 #define DISPATCH_QUEUE_ATTR_OVERCOMMIT_COUNT 3
 
-enum {
-	DQA_INDEX_AUTORELEASE_FREQUENCY_INHERIT =
-			DISPATCH_AUTORELEASE_FREQUENCY_INHERIT,
-	DQA_INDEX_AUTORELEASE_FREQUENCY_WORK_ITEM =
-			DISPATCH_AUTORELEASE_FREQUENCY_WORK_ITEM,
-	DQA_INDEX_AUTORELEASE_FREQUENCY_NEVER =
-			DISPATCH_AUTORELEASE_FREQUENCY_NEVER,
-};
-
 #define DISPATCH_QUEUE_ATTR_AUTORELEASE_FREQUENCY_COUNT 3
 
-enum {
-	DQA_INDEX_CONCURRENT = 0,
-	DQA_INDEX_SERIAL,
-};
-
-#define DISPATCH_QUEUE_ATTR_CONCURRENCY_COUNT 2
-
-enum {
-	DQA_INDEX_ACTIVE = 0,
-	DQA_INDEX_INACTIVE,
-};
-
-#define DISPATCH_QUEUE_ATTR_INACTIVE_COUNT 2
-
-typedef enum {
-	DQA_INDEX_QOS_CLASS_UNSPECIFIED = 0,
-	DQA_INDEX_QOS_CLASS_MAINTENANCE,
-	DQA_INDEX_QOS_CLASS_BACKGROUND,
-	DQA_INDEX_QOS_CLASS_UTILITY,
-	DQA_INDEX_QOS_CLASS_DEFAULT,
-	DQA_INDEX_QOS_CLASS_USER_INITIATED,
-	DQA_INDEX_QOS_CLASS_USER_INTERACTIVE,
-} _dispatch_queue_attr_index_qos_class_t;
+#define DISPATCH_QUEUE_ATTR_QOS_COUNT (DISPATCH_QOS_MAX + 1)
 
 #define DISPATCH_QUEUE_ATTR_PRIO_COUNT (1 - QOS_MIN_RELATIVE_PRIORITY)
 
-extern const struct dispatch_queue_attr_s _dispatch_queue_attrs[]
-		[DISPATCH_QUEUE_ATTR_PRIO_COUNT]
-		[DISPATCH_QUEUE_ATTR_OVERCOMMIT_COUNT]
-		[DISPATCH_QUEUE_ATTR_AUTORELEASE_FREQUENCY_COUNT]
-		[DISPATCH_QUEUE_ATTR_CONCURRENCY_COUNT]
-		[DISPATCH_QUEUE_ATTR_INACTIVE_COUNT];
+#define DISPATCH_QUEUE_ATTR_CONCURRENCY_COUNT 2
 
-dispatch_queue_attr_t _dispatch_get_default_queue_attr(void);
+#define DISPATCH_QUEUE_ATTR_INACTIVE_COUNT 2
+
+#define DISPATCH_QUEUE_ATTR_COUNT  ( \
+		DISPATCH_QUEUE_ATTR_OVERCOMMIT_COUNT * \
+		DISPATCH_QUEUE_ATTR_AUTORELEASE_FREQUENCY_COUNT * \
+		DISPATCH_QUEUE_ATTR_QOS_COUNT * \
+		DISPATCH_QUEUE_ATTR_PRIO_COUNT * \
+		DISPATCH_QUEUE_ATTR_CONCURRENCY_COUNT * \
+		DISPATCH_QUEUE_ATTR_INACTIVE_COUNT )
+
+extern const struct dispatch_queue_attr_s
+_dispatch_queue_attrs[DISPATCH_QUEUE_ATTR_COUNT];
+
+dispatch_queue_attr_info_t _dispatch_queue_attr_to_info(dispatch_queue_attr_t);
 
 #pragma mark -
 #pragma mark dispatch_continuation_t
@@ -749,7 +954,7 @@
 // If dc_flags is less than 0x1000, then the object is a continuation.
 // Otherwise, the object has a private layout and memory management rules. The
 // layout until after 'do_next' must align with normal objects.
-#if __LP64__
+#if DISPATCH_SIZEOF_PTR == 8
 #define DISPATCH_CONTINUATION_HEADER(x) \
 	union { \
 		const void *do_vtable; \
@@ -816,54 +1021,62 @@
 		~(DISPATCH_CONTINUATION_SIZE - 1u))
 
 // continuation is a dispatch_sync or dispatch_barrier_sync
-#define DISPATCH_OBJ_SYNC_WAITER_BIT		0x001ul
+#define DC_FLAG_SYNC_WAITER				0x001ul
 // continuation acts as a barrier
-#define DISPATCH_OBJ_BARRIER_BIT			0x002ul
+#define DC_FLAG_BARRIER					0x002ul
 // continuation resources are freed on run
 // this is set on async or for non event_handler source handlers
-#define DISPATCH_OBJ_CONSUME_BIT			0x004ul
+#define DC_FLAG_CONSUME					0x004ul
 // continuation has a group in dc_data
-#define DISPATCH_OBJ_GROUP_BIT				0x008ul
+#define DC_FLAG_GROUP_ASYNC				0x008ul
 // continuation function is a block (copied in dc_ctxt)
-#define DISPATCH_OBJ_BLOCK_BIT				0x010ul
+#define DC_FLAG_BLOCK					0x010ul
 // continuation function is a block with private data, implies BLOCK_BIT
-#define DISPATCH_OBJ_BLOCK_PRIVATE_DATA_BIT	0x020ul
+#define DC_FLAG_BLOCK_WITH_PRIVATE_DATA	0x020ul
 // source handler requires fetching context from source
-#define DISPATCH_OBJ_CTXT_FETCH_BIT			0x040ul
-// use the voucher from the continuation even if the queue has voucher set
-#define DISPATCH_OBJ_ENFORCE_VOUCHER		0x080ul
-// never set on continuations, used by mach.c only
-#define DISPATCH_OBJ_MACH_BARRIER		0x1000000ul
+#define DC_FLAG_FETCH_CONTEXT			0x040ul
+// continuation is a dispatch_async_and_wait
+#define DC_FLAG_ASYNC_AND_WAIT			0x080ul
+// bit used to make sure dc_flags is never 0 for allocated continuations
+#define DC_FLAG_ALLOCATED				0x100ul
+// continuation is an internal implementation detail that should not be
+// introspected
+#define DC_FLAG_NO_INTROSPECTION		0x200ul
 
 typedef struct dispatch_continuation_s {
-	struct dispatch_object_s _as_do[0];
 	DISPATCH_CONTINUATION_HEADER(continuation);
 } *dispatch_continuation_t;
 
+dispatch_assert_aliases(dispatch_continuation_s, dispatch_object_s, do_next);
+dispatch_assert_aliases(dispatch_continuation_s, dispatch_object_s, do_vtable);
+
 typedef struct dispatch_sync_context_s {
-	struct dispatch_object_s _as_do[0];
 	struct dispatch_continuation_s _as_dc[0];
 	DISPATCH_CONTINUATION_HEADER(continuation);
 	dispatch_function_t dsc_func;
 	void *dsc_ctxt;
-#if DISPATCH_COCOA_COMPAT
 	dispatch_thread_frame_s dsc_dtf;
-#endif
 	dispatch_thread_event_s dsc_event;
 	dispatch_tid dsc_waiter;
-	dispatch_qos_t dsc_override_qos_floor;
-	dispatch_qos_t dsc_override_qos;
-	bool dsc_wlh_was_first;
-	bool dsc_release_storage;
+	uint8_t dsc_override_qos_floor;
+	uint8_t dsc_override_qos;
+	uint16_t dsc_autorelease : 2;
+	uint16_t dsc_wlh_was_first : 1;
+	uint16_t dsc_wlh_is_workloop : 1;
+	uint16_t dsc_waiter_needs_cancel : 1;
+	uint16_t dsc_release_storage : 1;
+#if DISPATCH_INTROSPECTION
+	uint16_t dsc_from_async : 1;
+#endif
 } *dispatch_sync_context_t;
 
 typedef struct dispatch_continuation_vtable_s {
 	_OS_OBJECT_CLASS_HEADER();
-	DISPATCH_INVOKABLE_VTABLE_HEADER(dispatch_continuation);
+	DISPATCH_OBJECT_VTABLE_HEADER(dispatch_continuation);
 } const *dispatch_continuation_vtable_t;
 
 #ifndef DISPATCH_CONTINUATION_CACHE_LIMIT
-#if TARGET_OS_EMBEDDED
+#if TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR
 #define DISPATCH_CONTINUATION_CACHE_LIMIT 112 // one 256k heap for 64 threads
 #define DISPATCH_CONTINUATION_CACHE_LIMIT_MEMORYPRESSURE_PRESSURE_WARN 16
 #else
@@ -874,13 +1087,9 @@
 
 dispatch_continuation_t _dispatch_continuation_alloc_from_heap(void);
 void _dispatch_continuation_free_to_heap(dispatch_continuation_t c);
-void _dispatch_continuation_async(dispatch_queue_t dq,
-	dispatch_continuation_t dc);
 void _dispatch_continuation_pop(dispatch_object_t dou,
 		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags,
-		dispatch_queue_t dq);
-void _dispatch_continuation_invoke(dispatch_object_t dou,
-		voucher_t override_voucher, dispatch_invoke_flags_t flags);
+		dispatch_queue_class_t dqu);
 
 #if DISPATCH_USE_MEMORYPRESSURE_SOURCE
 extern int _dispatch_continuation_cache_limit;
@@ -902,9 +1111,13 @@
 	DC_MACH_RECV_BARRIER_TYPE,
 	DC_MACH_ASYNC_REPLY_TYPE,
 #if HAVE_PTHREAD_WORKQUEUE_QOS
+	DC_WORKLOOP_STEALING_TYPE,
 	DC_OVERRIDE_STEALING_TYPE,
 	DC_OVERRIDE_OWNING_TYPE,
 #endif
+#if HAVE_MACH
+	DC_MACH_IPC_HANDOFF_TYPE,
+#endif
 	_DC_MAX_TYPE,
 };
 
@@ -912,29 +1125,12 @@
 static inline unsigned long
 dc_type(dispatch_continuation_t dc)
 {
-	return dx_type(dc->_as_do);
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline unsigned long
-dc_subtype(dispatch_continuation_t dc)
-{
-	return dx_subtype(dc->_as_do);
+	return dx_type((struct dispatch_object_s *)dc);
 }
 
 extern const struct dispatch_continuation_vtable_s
 		_dispatch_continuation_vtables[_DC_MAX_TYPE];
 
-void
-_dispatch_async_redirect_invoke(dispatch_continuation_t dc,
-		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags);
-
-#if HAVE_PTHREAD_WORKQUEUE_QOS
-void
-_dispatch_queue_override_invoke(dispatch_continuation_t dc,
-		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags);
-#endif
-
 #define DC_VTABLE(name)  (&_dispatch_continuation_vtables[DC_##name##_TYPE])
 
 #define DC_VTABLE_ENTRY(name, ...)  \
@@ -963,13 +1159,22 @@
 #pragma mark dispatch_apply_t
 
 struct dispatch_apply_s {
-	size_t volatile da_index, da_todo;
-	size_t da_iterations, da_nested;
+#if !OS_OBJECT_HAVE_OBJC1
 	dispatch_continuation_t da_dc;
+#endif
+	size_t volatile da_index, da_todo;
+	size_t da_iterations;
+#if OS_OBJECT_HAVE_OBJC1
+	dispatch_continuation_t da_dc;
+#endif
+	size_t da_nested;
 	dispatch_thread_event_s da_event;
 	dispatch_invoke_flags_t da_flags;
 	int32_t da_thr_cnt;
 };
+dispatch_static_assert(offsetof(struct dispatch_continuation_s, dc_flags) ==
+		offsetof(struct dispatch_apply_s, da_dc),
+		"These fields must alias so that leaks instruments work");
 typedef struct dispatch_apply_s *dispatch_apply_t;
 
 #pragma mark -
@@ -990,7 +1195,7 @@
 	voucher_t dbpd_voucher; \
 	dispatch_block_t dbpd_block; \
 	dispatch_group_t dbpd_group; \
-	os_mpsc_queue_t volatile dbpd_queue; \
+	dispatch_queue_t dbpd_queue; \
 	mach_port_t dbpd_thread;
 
 #if !defined(__cplusplus)
@@ -1009,55 +1214,26 @@
 #define DISPATCH_BLOCK_PRIVATE_DATA_MAGIC 0xD159B10C // 0xDISPatch_BLOCk
 
 // struct for synchronous perform: no group_leave at end of invoke
-#define DISPATCH_BLOCK_PRIVATE_DATA_PERFORM_INITIALIZER(flags, block) \
+#define DISPATCH_BLOCK_PRIVATE_DATA_PERFORM_INITIALIZER(flags, block, voucher) \
 		{ \
 			.dbpd_magic = DISPATCH_BLOCK_PRIVATE_DATA_MAGIC, \
 			.dbpd_flags = (flags), \
 			.dbpd_atomic_flags = DBF_PERFORM, \
 			.dbpd_block = (block), \
+			.dbpd_voucher = (voucher), \
 		}
 
+extern void (*const _dispatch_block_special_invoke)(void*);
+
 dispatch_block_t _dispatch_block_create(dispatch_block_flags_t flags,
 		voucher_t voucher, pthread_priority_t priority, dispatch_block_t block);
 void _dispatch_block_invoke_direct(const struct dispatch_block_private_data_s *dbcpd);
 void _dispatch_block_sync_invoke(void *block);
 
-void _dispatch_continuation_init_slow(dispatch_continuation_t dc,
+void *_dispatch_continuation_get_function_symbol(dispatch_continuation_t dc);
+dispatch_qos_t _dispatch_continuation_init_slow(dispatch_continuation_t dc,
 		dispatch_queue_class_t dqu, dispatch_block_flags_t flags);
 
-long _dispatch_barrier_trysync_f(dispatch_queue_t dq, void *ctxt,
-		dispatch_function_t func);
-
-/* exported for tests in dispatch_trysync.c */
-DISPATCH_EXPORT DISPATCH_NOTHROW
-long _dispatch_trysync_f(dispatch_queue_t dq, void *ctxt,
-		dispatch_function_t f);
-
 #endif /* __BLOCKS__ */
 
-typedef struct dispatch_pthread_root_queue_observer_hooks_s {
-	void (*queue_will_execute)(dispatch_queue_t queue);
-	void (*queue_did_execute)(dispatch_queue_t queue);
-} dispatch_pthread_root_queue_observer_hooks_s;
-typedef dispatch_pthread_root_queue_observer_hooks_s
-		*dispatch_pthread_root_queue_observer_hooks_t;
-
-#ifdef __APPLE__
-#define DISPATCH_IOHID_SPI 1
-
-DISPATCH_EXPORT DISPATCH_MALLOC DISPATCH_RETURNS_RETAINED DISPATCH_WARN_RESULT
-DISPATCH_NOTHROW DISPATCH_NONNULL4
-dispatch_queue_t
-_dispatch_pthread_root_queue_create_with_observer_hooks_4IOHID(
-	const char *label, unsigned long flags, const pthread_attr_t *attr,
-	dispatch_pthread_root_queue_observer_hooks_t observer_hooks,
-	dispatch_block_t configure);
-
-DISPATCH_EXPORT DISPATCH_PURE DISPATCH_WARN_RESULT DISPATCH_NOTHROW
-bool
-_dispatch_queue_is_exclusively_owned_by_current_thread_4IOHID(
-		dispatch_queue_t queue);
-
-#endif // __APPLE__
-
 #endif
diff --git a/src/semaphore.c b/src/semaphore.c
index 5fea942..bc96051 100644
--- a/src/semaphore.c
+++ b/src/semaphore.c
@@ -24,20 +24,6 @@
 intptr_t _dispatch_semaphore_signal_slow(dispatch_semaphore_t dsema);
 
 #pragma mark -
-#pragma mark dispatch_semaphore_class_t
-
-static void
-_dispatch_semaphore_class_init(intptr_t value, dispatch_semaphore_class_t dsemau)
-{
-	struct dispatch_semaphore_header_s *dsema = dsemau._dsema_hdr;
-
-	dsema->do_next = DISPATCH_OBJECT_LISTLESS;
-	dsema->do_targetq = _dispatch_get_root_queue(DISPATCH_QOS_DEFAULT, false);
-	dsema->dsema_value = value;
-	_dispatch_sema4_init(&dsema->dsema_sema, _DSEMA4_POLICY_FIFO);
-}
-
-#pragma mark -
 #pragma mark dispatch_semaphore_t
 
 dispatch_semaphore_t
@@ -52,9 +38,12 @@
 		return DISPATCH_BAD_INPUT;
 	}
 
-	dsema = (dispatch_semaphore_t)_dispatch_object_alloc(
-			DISPATCH_VTABLE(semaphore), sizeof(struct dispatch_semaphore_s));
-	_dispatch_semaphore_class_init(value, dsema);
+	dsema = _dispatch_object_alloc(DISPATCH_VTABLE(semaphore),
+			sizeof(struct dispatch_semaphore_s));
+	dsema->do_next = DISPATCH_OBJECT_LISTLESS;
+	dsema->do_targetq = _dispatch_get_default_queue(false);
+	dsema->dsema_value = value;
+	_dispatch_sema4_init(&dsema->dsema_sema, _DSEMA4_POLICY_FIFO);
 	dsema->dsema_orig = value;
 	return dsema;
 }
@@ -80,14 +69,14 @@
 
 	size_t offset = 0;
 	offset += dsnprintf(&buf[offset], bufsiz - offset, "%s[%p] = { ",
-			dx_kind(dsema), dsema);
+			_dispatch_object_class_name(dsema), dsema);
 	offset += _dispatch_object_debug_attr(dsema, &buf[offset], bufsiz - offset);
 #if USE_MACH_SEM
 	offset += dsnprintf(&buf[offset], bufsiz - offset, "port = 0x%u, ",
 			dsema->dsema_sema);
 #endif
 	offset += dsnprintf(&buf[offset], bufsiz - offset,
-			"value = %" PRId64 ", orig = %" PRId64 " }", dsema->dsema_value, dsema->dsema_orig);
+			"value = %" PRIdPTR ", orig = %" PRIdPTR " }", dsema->dsema_value, dsema->dsema_orig);
 	return offset;
 }
 
@@ -104,10 +93,10 @@
 dispatch_semaphore_signal(dispatch_semaphore_t dsema)
 {
 	long value = os_atomic_inc2o(dsema, dsema_value, release);
-	if (fastpath(value > 0)) {
+	if (likely(value > 0)) {
 		return 0;
 	}
-	if (slowpath(value == LONG_MIN)) {
+	if (unlikely(value == LONG_MIN)) {
 		DISPATCH_CLIENT_CRASH(value,
 				"Unbalanced call to dispatch_semaphore_signal()");
 	}
@@ -150,7 +139,7 @@
 dispatch_semaphore_wait(dispatch_semaphore_t dsema, dispatch_time_t timeout)
 {
 	long value = os_atomic_dec2o(dsema, dsema_value, acquire);
-	if (fastpath(value >= 0)) {
+	if (likely(value >= 0)) {
 		return 0;
 	}
 	return _dispatch_semaphore_wait_slow(dsema, timeout);
@@ -161,13 +150,16 @@
 
 DISPATCH_ALWAYS_INLINE
 static inline dispatch_group_t
-_dispatch_group_create_with_count(long count)
+_dispatch_group_create_with_count(uint32_t n)
 {
-	dispatch_group_t dg = (dispatch_group_t)_dispatch_object_alloc(
-			DISPATCH_VTABLE(group), sizeof(struct dispatch_group_s));
-	_dispatch_semaphore_class_init(count, dg);
-	if (count) {
-		os_atomic_store2o(dg, do_ref_cnt, 1, relaxed); // <rdar://problem/22318411>
+	dispatch_group_t dg = _dispatch_object_alloc(DISPATCH_VTABLE(group),
+			sizeof(struct dispatch_group_s));
+	dg->do_next = DISPATCH_OBJECT_LISTLESS;
+	dg->do_targetq = _dispatch_get_default_queue(false);
+	if (n) {
+		os_atomic_store2o(dg, dg_bits,
+				(uint32_t)-n * DISPATCH_GROUP_VALUE_INTERVAL, relaxed);
+		os_atomic_store2o(dg, do_ref_cnt, 1, relaxed); // <rdar://22318411>
 	}
 	return dg;
 }
@@ -185,156 +177,149 @@
 }
 
 void
-dispatch_group_enter(dispatch_group_t dg)
-{
-	long value = os_atomic_inc_orig2o(dg, dg_value, acquire);
-	if (slowpath((unsigned long)value >= (unsigned long)LONG_MAX)) {
-		DISPATCH_CLIENT_CRASH(value,
-				"Too many nested calls to dispatch_group_enter()");
-	}
-	if (value == 0) {
-		_dispatch_retain(dg); // <rdar://problem/22318411>
-	}
-}
-
-DISPATCH_NOINLINE
-static intptr_t
-_dispatch_group_wake(dispatch_group_t dg, bool needs_release)
-{
-	dispatch_continuation_t next, head, tail = NULL;
-	long rval;
-
-	// cannot use os_mpsc_capture_snapshot() because we can have concurrent
-	// _dispatch_group_wake() calls
-	head = os_atomic_xchg2o(dg, dg_notify_head, NULL, relaxed);
-	if (head) {
-		// snapshot before anything is notified/woken <rdar://problem/8554546>
-		tail = os_atomic_xchg2o(dg, dg_notify_tail, NULL, release);
-	}
-	rval = (long)os_atomic_xchg2o(dg, dg_waiters, 0, relaxed);
-	if (rval) {
-		// wake group waiters
-		_dispatch_sema4_create(&dg->dg_sema, _DSEMA4_POLICY_FIFO);
-		_dispatch_sema4_signal(&dg->dg_sema, rval);
-	}
-	uint16_t refs = needs_release ? 1 : 0; // <rdar://problem/22318411>
-	if (head) {
-		// async group notify blocks
-		do {
-			next = os_mpsc_pop_snapshot_head(head, tail, do_next);
-			dispatch_queue_t dsn_queue = (dispatch_queue_t)head->dc_data;
-			_dispatch_continuation_async(dsn_queue, head);
-			_dispatch_release(dsn_queue);
-		} while ((head = next));
-		refs++;
-	}
-	if (refs) _dispatch_release_n(dg, refs);
-	return 0;
-}
-
-void
-dispatch_group_leave(dispatch_group_t dg)
-{
-	long value = os_atomic_dec2o(dg, dg_value, release);
-	if (slowpath(value == 0)) {
-		return (void)_dispatch_group_wake(dg, true);
-	}
-	if (slowpath(value < 0)) {
-		DISPATCH_CLIENT_CRASH(value,
-				"Unbalanced call to dispatch_group_leave()");
-	}
-}
-
-void
 _dispatch_group_dispose(dispatch_object_t dou, DISPATCH_UNUSED bool *allow_free)
 {
-	dispatch_group_t dg = dou._dg;
+	uint64_t dg_state = os_atomic_load2o(dou._dg, dg_state, relaxed);
 
-	if (dg->dg_value) {
-		DISPATCH_CLIENT_CRASH(dg->dg_value,
+	if (unlikely((uint32_t)dg_state)) {
+		DISPATCH_CLIENT_CRASH((uintptr_t)dg_state,
 				"Group object deallocated while in use");
 	}
-
-	_dispatch_sema4_dispose(&dg->dg_sema, _DSEMA4_POLICY_FIFO);
 }
 
 size_t
 _dispatch_group_debug(dispatch_object_t dou, char *buf, size_t bufsiz)
 {
 	dispatch_group_t dg = dou._dg;
+	uint64_t dg_state = os_atomic_load2o(dg, dg_state, relaxed);
 
 	size_t offset = 0;
 	offset += dsnprintf(&buf[offset], bufsiz - offset, "%s[%p] = { ",
-			dx_kind(dg), dg);
+			_dispatch_object_class_name(dg), dg);
 	offset += _dispatch_object_debug_attr(dg, &buf[offset], bufsiz - offset);
-#if USE_MACH_SEM
-	offset += dsnprintf(&buf[offset], bufsiz - offset, "port = 0x%u, ",
-			dg->dg_sema);
-#endif
 	offset += dsnprintf(&buf[offset], bufsiz - offset,
-			"count = %" PRId64 ", waiters = %d }", dg->dg_value, dg->dg_waiters);
+			"count = %u, gen = %d, waiters = %d, notifs = %d }",
+			_dg_state_value(dg_state), _dg_state_gen(dg_state),
+			(bool)(dg_state & DISPATCH_GROUP_HAS_WAITERS),
+			(bool)(dg_state & DISPATCH_GROUP_HAS_NOTIFS));
 	return offset;
 }
 
 DISPATCH_NOINLINE
 static intptr_t
-_dispatch_group_wait_slow(dispatch_group_t dg, dispatch_time_t timeout)
+_dispatch_group_wait_slow(dispatch_group_t dg, uint32_t gen,
+		dispatch_time_t timeout)
 {
-	long value;
-	int orig_waiters;
-
-	// check before we cause another signal to be sent by incrementing
-	// dg->dg_waiters
-	value = os_atomic_load2o(dg, dg_value, ordered); // 19296565
-	if (value == 0) {
-		return _dispatch_group_wake(dg, false);
-	}
-
-	(void)os_atomic_inc2o(dg, dg_waiters, relaxed);
-	// check the values again in case we need to wake any threads
-	value = os_atomic_load2o(dg, dg_value, ordered); // 19296565
-	if (value == 0) {
-		_dispatch_group_wake(dg, false);
-		// Fall through to consume the extra signal, forcing timeout to avoid
-		// useless setups as it won't block
-		timeout = DISPATCH_TIME_FOREVER;
-	}
-
-	_dispatch_sema4_create(&dg->dg_sema, _DSEMA4_POLICY_FIFO);
-	switch (timeout) {
-	default:
-		if (!_dispatch_sema4_timedwait(&dg->dg_sema, timeout)) {
-			break;
+	for (;;) {
+		int rc = _dispatch_wait_on_address(&dg->dg_gen, gen, timeout, 0);
+		if (likely(gen != os_atomic_load2o(dg, dg_gen, acquire))) {
+			return 0;
 		}
-		// Fall through and try to undo the earlier change to
-		// dg->dg_waiters
-	case DISPATCH_TIME_NOW:
-		orig_waiters = dg->dg_waiters;
-		while (orig_waiters) {
-			if (os_atomic_cmpxchgvw2o(dg, dg_waiters, orig_waiters,
-					orig_waiters - 1, &orig_waiters, relaxed)) {
-				return _DSEMA4_TIMEOUT();
-			}
+		if (rc == ETIMEDOUT) {
+			return _DSEMA4_TIMEOUT();
 		}
-		// Another thread is running _dispatch_group_wake()
-		// Fall through and drain the wakeup.
-	case DISPATCH_TIME_FOREVER:
-		_dispatch_sema4_wait(&dg->dg_sema);
-		break;
 	}
-	return 0;
 }
 
 intptr_t
 dispatch_group_wait(dispatch_group_t dg, dispatch_time_t timeout)
 {
-	if (dg->dg_value == 0) {
-		return 0;
+	uint64_t old_state, new_state;
+
+	os_atomic_rmw_loop2o(dg, dg_state, old_state, new_state, relaxed, {
+		if ((old_state & DISPATCH_GROUP_VALUE_MASK) == 0) {
+			os_atomic_rmw_loop_give_up_with_fence(acquire, return 0);
+		}
+		if (unlikely(timeout == 0)) {
+			os_atomic_rmw_loop_give_up(return _DSEMA4_TIMEOUT());
+		}
+		new_state = old_state | DISPATCH_GROUP_HAS_WAITERS;
+		if (unlikely(old_state & DISPATCH_GROUP_HAS_WAITERS)) {
+			os_atomic_rmw_loop_give_up(break);
+		}
+	});
+
+	return _dispatch_group_wait_slow(dg, _dg_state_gen(new_state), timeout);
+}
+
+DISPATCH_NOINLINE
+static void
+_dispatch_group_wake(dispatch_group_t dg, uint64_t dg_state, bool needs_release)
+{
+	uint16_t refs = needs_release ? 1 : 0; // <rdar://problem/22318411>
+
+	if (dg_state & DISPATCH_GROUP_HAS_NOTIFS) {
+		dispatch_continuation_t dc, next_dc, tail;
+
+		// Snapshot before anything is notified/woken <rdar://problem/8554546>
+		dc = os_mpsc_capture_snapshot(os_mpsc(dg, dg_notify), &tail);
+		do {
+			dispatch_queue_t dsn_queue = (dispatch_queue_t)dc->dc_data;
+			next_dc = os_mpsc_pop_snapshot_head(dc, tail, do_next);
+			_dispatch_continuation_async(dsn_queue, dc,
+					_dispatch_qos_from_pp(dc->dc_priority), dc->dc_flags);
+			_dispatch_release(dsn_queue);
+		} while ((dc = next_dc));
+
+		refs++;
 	}
-	if (timeout == 0) {
-		return _DSEMA4_TIMEOUT();
+
+	if (dg_state & DISPATCH_GROUP_HAS_WAITERS) {
+		_dispatch_wake_by_address(&dg->dg_gen);
 	}
-	return _dispatch_group_wait_slow(dg, timeout);
+
+	if (refs) _dispatch_release_n(dg, refs);
+}
+
+void
+dispatch_group_leave(dispatch_group_t dg)
+{
+	// The value is incremented on a 64bits wide atomic so that the carry for
+	// the -1 -> 0 transition increments the generation atomically.
+	uint64_t new_state, old_state = os_atomic_add_orig2o(dg, dg_state,
+			DISPATCH_GROUP_VALUE_INTERVAL, release);
+	uint32_t old_value = (uint32_t)(old_state & DISPATCH_GROUP_VALUE_MASK);
+
+	if (unlikely(old_value == DISPATCH_GROUP_VALUE_1)) {
+		old_state += DISPATCH_GROUP_VALUE_INTERVAL;
+		do {
+			new_state = old_state;
+			if ((old_state & DISPATCH_GROUP_VALUE_MASK) == 0) {
+				new_state &= ~DISPATCH_GROUP_HAS_WAITERS;
+				new_state &= ~DISPATCH_GROUP_HAS_NOTIFS;
+			} else {
+				// If the group was entered again since the atomic_add above,
+				// we can't clear the waiters bit anymore as we don't know for
+				// which generation the waiters are for
+				new_state &= ~DISPATCH_GROUP_HAS_NOTIFS;
+			}
+			if (old_state == new_state) break;
+		} while (unlikely(!os_atomic_cmpxchgv2o(dg, dg_state,
+				old_state, new_state, &old_state, relaxed)));
+		return _dispatch_group_wake(dg, old_state, true);
+	}
+
+	if (unlikely(old_value == 0)) {
+		DISPATCH_CLIENT_CRASH((uintptr_t)old_value,
+				"Unbalanced call to dispatch_group_leave()");
+	}
+}
+
+void
+dispatch_group_enter(dispatch_group_t dg)
+{
+	// The value is decremented on a 32bits wide atomic so that the carry
+	// for the 0 -> -1 transition is not propagated to the upper 32bits.
+	uint32_t old_bits = os_atomic_sub_orig2o(dg, dg_bits,
+			DISPATCH_GROUP_VALUE_INTERVAL, acquire);
+	uint32_t old_value = old_bits & DISPATCH_GROUP_VALUE_MASK;
+	if (unlikely(old_value == 0)) {
+		_dispatch_retain(dg); // <rdar://problem/22318411>
+	}
+	if (unlikely(old_value == DISPATCH_GROUP_VALUE_MAX)) {
+		DISPATCH_CLIENT_CRASH(old_bits,
+				"Too many nested calls to dispatch_group_enter()");
+	}
 }
 
 DISPATCH_ALWAYS_INLINE
@@ -342,16 +327,24 @@
 _dispatch_group_notify(dispatch_group_t dg, dispatch_queue_t dq,
 		dispatch_continuation_t dsn)
 {
+	uint64_t old_state, new_state;
+	dispatch_continuation_t prev;
+
 	dsn->dc_data = dq;
-	dsn->do_next = NULL;
 	_dispatch_retain(dq);
-	if (os_mpsc_push_update_tail(dg, dg_notify, dsn, do_next)) {
-		_dispatch_retain(dg);
-		os_atomic_store2o(dg, dg_notify_head, dsn, ordered);
-		// seq_cst with atomic store to notify_head <rdar://problem/11750916>
-		if (os_atomic_load2o(dg, dg_value, ordered) == 0) {
-			_dispatch_group_wake(dg, false);
-		}
+
+	prev = os_mpsc_push_update_tail(os_mpsc(dg, dg_notify), dsn, do_next);
+	if (os_mpsc_push_was_empty(prev)) _dispatch_retain(dg);
+	os_mpsc_push_update_prev(os_mpsc(dg, dg_notify), prev, dsn, do_next);
+	if (os_mpsc_push_was_empty(prev)) {
+		os_atomic_rmw_loop2o(dg, dg_state, old_state, new_state, release, {
+			new_state = old_state | DISPATCH_GROUP_HAS_NOTIFS;
+			if ((uint32_t)old_state == 0) {
+				os_atomic_rmw_loop_give_up({
+					return _dispatch_group_wake(dg, new_state, false);
+				});
+			}
+		});
 	}
 }
 
@@ -361,8 +354,7 @@
 		dispatch_function_t func)
 {
 	dispatch_continuation_t dsn = _dispatch_continuation_alloc();
-	_dispatch_continuation_init_f(dsn, dq, ctxt, func, 0, 0,
-			DISPATCH_OBJ_CONSUME_BIT);
+	_dispatch_continuation_init_f(dsn, dq, ctxt, func, 0, DC_FLAG_CONSUME);
 	_dispatch_group_notify(dg, dq, dsn);
 }
 
@@ -372,7 +364,44 @@
 		dispatch_block_t db)
 {
 	dispatch_continuation_t dsn = _dispatch_continuation_alloc();
-	_dispatch_continuation_init(dsn, dq, db, 0, 0, DISPATCH_OBJ_CONSUME_BIT);
+	_dispatch_continuation_init(dsn, dq, db, 0, DC_FLAG_CONSUME);
 	_dispatch_group_notify(dg, dq, dsn);
 }
 #endif
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_continuation_group_async(dispatch_group_t dg, dispatch_queue_t dq,
+		dispatch_continuation_t dc, dispatch_qos_t qos)
+{
+	dispatch_group_enter(dg);
+	dc->dc_data = dg;
+	_dispatch_continuation_async(dq, dc, qos, dc->dc_flags);
+}
+
+DISPATCH_NOINLINE
+void
+dispatch_group_async_f(dispatch_group_t dg, dispatch_queue_t dq, void *ctxt,
+		dispatch_function_t func)
+{
+	dispatch_continuation_t dc = _dispatch_continuation_alloc();
+	uintptr_t dc_flags = DC_FLAG_CONSUME | DC_FLAG_GROUP_ASYNC;
+	dispatch_qos_t qos;
+
+	qos = _dispatch_continuation_init_f(dc, dq, ctxt, func, 0, dc_flags);
+	_dispatch_continuation_group_async(dg, dq, dc, qos);
+}
+
+#ifdef __BLOCKS__
+void
+dispatch_group_async(dispatch_group_t dg, dispatch_queue_t dq,
+		dispatch_block_t db)
+{
+	dispatch_continuation_t dc = _dispatch_continuation_alloc();
+	uintptr_t dc_flags = DC_FLAG_CONSUME | DC_FLAG_GROUP_ASYNC;
+	dispatch_qos_t qos;
+
+	qos = _dispatch_continuation_init(dc, dq, db, 0, dc_flags);
+	_dispatch_continuation_group_async(dg, dq, dc, qos);
+}
+#endif
diff --git a/src/semaphore_internal.h b/src/semaphore_internal.h
index 227d475..b9b6c7b 100644
--- a/src/semaphore_internal.h
+++ b/src/semaphore_internal.h
@@ -29,45 +29,80 @@
 
 struct dispatch_queue_s;
 
-#define DISPATCH_SEMAPHORE_HEADER(cls, ns) \
-	DISPATCH_OBJECT_HEADER(cls); \
-	intptr_t volatile ns##_value; \
-	_dispatch_sema4_t ns##_sema
-
-struct dispatch_semaphore_header_s {
-	DISPATCH_SEMAPHORE_HEADER(semaphore, dsema);
-};
-
-DISPATCH_CLASS_DECL(semaphore);
+DISPATCH_CLASS_DECL(semaphore, OBJECT);
 struct dispatch_semaphore_s {
-	DISPATCH_SEMAPHORE_HEADER(semaphore, dsema);
+	DISPATCH_OBJECT_HEADER(semaphore);
+	intptr_t volatile dsema_value;
 	intptr_t dsema_orig;
+	_dispatch_sema4_t dsema_sema;
 };
 
-DISPATCH_CLASS_DECL(group);
+/*
+ * Dispatch Group State:
+ *
+ * Generation (32 - 63):
+ *   32 bit counter that is incremented each time the group value reaaches
+ *   0 after a dispatch_group_leave. This 32bit word is used to block waiters
+ *   (threads in dispatch_group_wait) in _dispatch_wait_on_address() until the
+ *   generation changes.
+ *
+ * Value (2 - 31):
+ *   30 bit value counter of the number of times the group was entered.
+ *   dispatch_group_enter counts downward on 32bits, and dispatch_group_leave
+ *   upward on 64bits, which causes the generation to bump each time the value
+ *   reaches 0 again due to carry propagation.
+ *
+ * Has Notifs (1):
+ *   This bit is set when the list of notifications on the group becomes non
+ *   empty. It is also used as a lock as the thread that successfuly clears this
+ *   bit is the thread responsible for firing the notifications.
+ *
+ * Has Waiters (0):
+ *   This bit is set when there are waiters (threads in dispatch_group_wait)
+ *   that need to be woken up the next time the value reaches 0. Waiters take
+ *   a snapshot of the generation before waiting and will wait for the
+ *   generation to change before they return.
+ */
+#define DISPATCH_GROUP_GEN_MASK         0xffffffff00000000ULL
+#define DISPATCH_GROUP_VALUE_MASK       0x00000000fffffffcULL
+#define DISPATCH_GROUP_VALUE_INTERVAL   0x0000000000000004ULL
+#define DISPATCH_GROUP_VALUE_1          DISPATCH_GROUP_VALUE_MASK
+#define DISPATCH_GROUP_VALUE_MAX        DISPATCH_GROUP_VALUE_INTERVAL
+#define DISPATCH_GROUP_HAS_NOTIFS       0x0000000000000002ULL
+#define DISPATCH_GROUP_HAS_WAITERS      0x0000000000000001ULL
+DISPATCH_CLASS_DECL(group, OBJECT);
 struct dispatch_group_s {
-	DISPATCH_SEMAPHORE_HEADER(group, dg);
-	int volatile dg_waiters;
+	DISPATCH_OBJECT_HEADER(group);
+	DISPATCH_UNION_LE(uint64_t volatile dg_state,
+			uint32_t dg_bits,
+			uint32_t dg_gen
+	) DISPATCH_ATOMIC64_ALIGN;
 	struct dispatch_continuation_s *volatile dg_notify_head;
 	struct dispatch_continuation_s *volatile dg_notify_tail;
 };
 
-typedef union {
-	struct dispatch_semaphore_header_s *_dsema_hdr;
-	struct dispatch_semaphore_s *_dsema;
-	struct dispatch_group_s *_dg;
-#if USE_OBJC
-	dispatch_semaphore_t _objc_dsema;
-	dispatch_group_t _objc_dg;
-#endif
-} dispatch_semaphore_class_t DISPATCH_TRANSPARENT_UNION;
+DISPATCH_ALWAYS_INLINE
+static inline uint32_t
+_dg_state_value(uint64_t dg_state)
+{
+	return (uint32_t)(-((uint32_t)dg_state & DISPATCH_GROUP_VALUE_MASK)) >> 2;
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline uint32_t
+_dg_state_gen(uint64_t dg_state)
+{
+	return (uint32_t)(dg_state >> 32);
+}
 
 dispatch_group_t _dispatch_group_create_and_enter(void);
 void _dispatch_group_dispose(dispatch_object_t dou, bool *allow_free);
+DISPATCH_COLD
 size_t _dispatch_group_debug(dispatch_object_t dou, char *buf,
 		size_t bufsiz);
 
 void _dispatch_semaphore_dispose(dispatch_object_t dou, bool *allow_free);
+DISPATCH_COLD
 size_t _dispatch_semaphore_debug(dispatch_object_t dou, char *buf,
 		size_t bufsiz);
 
diff --git a/src/shims.h b/src/shims.h
index 85f4026..22aa486 100644
--- a/src/shims.h
+++ b/src/shims.h
@@ -29,38 +29,38 @@
 
 #if !defined(_WIN32)
 #include <pthread.h>
-#endif
-#if defined(_WIN32)
+#else // defined(_WIN32)
 #include "shims/generic_win_stubs.h"
+#endif // defined(_WIN32)
+
+#if defined(_WIN32) || defined(__ANDROID__)
 #include "shims/generic_sys_queue.h"
 #endif
 
 #ifdef __ANDROID__
 #include "shims/android_stubs.h"
-#endif
+#endif // __ANDROID__
 
 #if !HAVE_MACH
 #include "shims/mach.h"
 #endif
-
-#include "shims/hw_config.h"
-#include "shims/priority.h"
-
-#if HAVE_PTHREAD_WORKQUEUES
-#if __has_include(<pthread/workqueue_private.h>)
-#include <pthread/workqueue_private.h>
-#else
-#include <pthread_workqueue.h>
-#endif
-#ifndef WORKQ_FEATURE_MAINTENANCE
-#define WORKQ_FEATURE_MAINTENANCE 0x10
-#endif
-#endif // HAVE_PTHREAD_WORKQUEUES
+#include "shims/target.h"
 
 #if DISPATCH_USE_INTERNAL_WORKQUEUE
 #include "event/workqueue_internal.h"
+#elif HAVE_PTHREAD_WORKQUEUES
+#include <pthread/workqueue_private.h>
+#else
+#error Unsupported configuration
 #endif
 
+#ifndef DISPATCH_WORKQ_MAX_PTHREAD_COUNT
+#define DISPATCH_WORKQ_MAX_PTHREAD_COUNT 255
+#endif
+
+#include "shims/hw_config.h"
+#include "shims/priority.h"
+
 #if HAVE_PTHREAD_NP_H
 #include <pthread_np.h>
 #endif
@@ -157,7 +157,7 @@
 #if HAVE_PTHREAD_QOS_H && __has_include(<pthread/qos_private.h>) && \
 		defined(PTHREAD_MAX_PARALLELISM_PHYSICAL) && \
 		DISPATCH_HAVE_HW_CONFIG_COMMPAGE && \
-		DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(109900)
+		DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(101300)
 #define DISPATCH_USE_PTHREAD_QOS_MAX_PARALLELISM 1
 #define DISPATCH_MAX_PARALLELISM_PHYSICAL PTHREAD_MAX_PARALLELISM_PHYSICAL
 #else
diff --git a/src/shims/atomic.h b/src/shims/atomic.h
index 1cb0940..0bb27d3 100644
--- a/src/shims/atomic.h
+++ b/src/shims/atomic.h
@@ -71,7 +71,7 @@
 #define _os_atomic_c11_op(p, v, m, o, op) \
 		({ _os_atomic_basetypeof(p) _v = (v), _r = \
 		atomic_fetch_##o##_explicit(_os_atomic_c11_atomic(p), _v, \
-		memory_order_##m); (__typeof__(*(p)))(_r op _v); })
+		memory_order_##m); (__typeof__(_r))(_r op _v); })
 #define _os_atomic_c11_op_orig(p, v, m, o, op) \
 		atomic_fetch_##o##_explicit(_os_atomic_c11_atomic(p), v, \
 		memory_order_##m)
@@ -161,7 +161,7 @@
 		do { \
 			__VA_ARGS__; \
 			_result = os_atomic_cmpxchgvw(_p, ov, nv, &ov, m); \
-		} while (os_unlikely(!_result)); \
+		} while (unlikely(!_result)); \
 		_result; \
 	})
 #define os_atomic_rmw_loop2o(p, f, ov, nv, m, ...) \
diff --git a/src/shims/atomic_sfb.h b/src/shims/atomic_sfb.h
index b8e3260..a87def0 100644
--- a/src/shims/atomic_sfb.h
+++ b/src/shims/atomic_sfb.h
@@ -90,11 +90,11 @@
 	os_atomic_rmw_loop(p, b, b_masked, relaxed, {
 		// ffs returns 1 + index, or 0 if none set
 		index = (unsigned int)__builtin_ffsl((long)~b);
-		if (slowpath(index == 0)) {
+		if (unlikely(index == 0)) {
 			os_atomic_rmw_loop_give_up(return UINT_MAX);
 		}
 		index--;
-		if (slowpath(index > max_index)) {
+		if (unlikely(index > max_index)) {
 			os_atomic_rmw_loop_give_up(return UINT_MAX);
 		}
 		b_masked = b | (1UL << index);
diff --git a/src/shims/generic_sys_queue.h b/src/shims/generic_sys_queue.h
index 1d9a18d..fd4ac1d 100644
--- a/src/shims/generic_sys_queue.h
+++ b/src/shims/generic_sys_queue.h
@@ -89,4 +89,57 @@
 		} \
 	} while(0)
 
+#define TAILQ_HEAD_INITIALIZER(head) \
+	{ NULL, (head).tq_first }
+
+#define TAILQ_CONCAT(head1, head2, field) do { \
+		if (!TAILQ_EMPTY(head2)) { \
+			(head1)->tq_last = (head2)->tq_first; \
+			(head1)->tq_first->field.te_prev = (head1)->tq_last; \
+			(head1)->tq_last = (head2)->tq_last; \
+			TAILQ_INIT((head2)); \
+		} \
+	} while (0)
+
+#define LIST_HEAD(name, type) struct name { \
+		struct type *lh_first; \
+	}
+
+#define LIST_ENTRY(type) struct { \
+		struct type *le_next; \
+		struct type *le_prev; \
+	}
+
+#define	LIST_EMPTY(head) ((head)->lh_first == NULL)
+
+#define LIST_FIRST(head) ((head)->lh_first)
+
+#define LIST_FOREACH(var, head, field) \
+	for ((var) = LIST_FIRST((head)); \
+		(var); \
+		(var) = LIST_NEXT((var), field))
+
+#define	LIST_FOREACH_SAFE(var, head, field, tvar) \
+	for ((var) = LIST_FIRST((head)); \
+		(var) && ((tvar) = LIST_NEXT((var), field), 1); \
+		(var) = (tvar))
+
+#define	LIST_INIT(head) do { \
+	LIST_FIRST((head)) = NULL; \
+} while (0)
+
+#define LIST_NEXT(elm, field) ((elm)->field.le_next)
+
+#define LIST_REMOVE(elm, field) do { \
+		if (LIST_NEXT((elm), field) != NULL) \
+			LIST_NEXT((elm), field)->field.le_prev = (elm)->field.le_prev; \
+	} while (0)
+
+#define LIST_INSERT_HEAD(head, elm, field) do { \
+		if ((LIST_NEXT((elm), field) = LIST_FIRST((head))) != NULL) \
+			LIST_FIRST((head))->field.le_prev = LIST_NEXT((elm), field); \
+		LIST_FIRST((head)) = (elm); \
+		(elm)->field.le_prev = LIST_FIRST((head)); \
+	} while (0)
+
 #endif // __DISPATCH_SHIMS_SYS_QUEUE__
diff --git a/src/shims/generic_win_stubs.c b/src/shims/generic_win_stubs.c
new file mode 100644
index 0000000..67b6f51
--- /dev/null
+++ b/src/shims/generic_win_stubs.c
@@ -0,0 +1,24 @@
+#include "internal.h"
+
+/*
+ * This file contains stubbed out functions we are using during
+ * the initial Windows port.  When the port is complete, this file
+ * should be empty (and thus removed).
+ */
+
+void
+_dispatch_runloop_queue_dispose(dispatch_queue_t dq DISPATCH_UNUSED,
+		bool *allow_free DISPATCH_UNUSED)
+{
+	WIN_PORT_ERROR();
+}
+
+void
+_dispatch_runloop_queue_xref_dispose(dispatch_queue_t dq DISPATCH_UNUSED)
+{
+	WIN_PORT_ERROR();
+}
+
+/*
+ * Stubbed out static data
+ */
diff --git a/src/shims/generic_win_stubs.h b/src/shims/generic_win_stubs.h
index c983cdc..1ce41f7 100644
--- a/src/shims/generic_win_stubs.h
+++ b/src/shims/generic_win_stubs.h
@@ -34,4 +34,6 @@
 #define WIN_PORT_ERROR() \
 		_RPTF1(_CRT_ASSERT, "WIN_PORT_ERROR in %s", __FUNCTION__)
 
+#define strcasecmp _stricmp
+
 #endif
diff --git a/src/shims/hw_config.h b/src/shims/hw_config.h
index e788727..89b7f8f 100644
--- a/src/shims/hw_config.h
+++ b/src/shims/hw_config.h
@@ -43,6 +43,13 @@
 #error "could not determine pointer size as a constant int"
 #endif // __SIZEOF_POINTER__
 
+#define DISPATCH_CACHELINE_SIZE 64u
+#define ROUND_UP_TO_CACHELINE_SIZE(x) \
+		(((x) + (DISPATCH_CACHELINE_SIZE - 1u)) & \
+		~(DISPATCH_CACHELINE_SIZE - 1u))
+#define DISPATCH_CACHELINE_ALIGN \
+		__attribute__((__aligned__(DISPATCH_CACHELINE_SIZE)))
+
 typedef enum {
 	_dispatch_hw_config_logical_cpus,
 	_dispatch_hw_config_physical_cpus,
diff --git a/src/shims/lock.c b/src/shims/lock.c
index e1b94dc..1f3a38b 100644
--- a/src/shims/lock.c
+++ b/src/shims/lock.c
@@ -20,19 +20,9 @@
 
 #include "internal.h"
 
-#define _dlock_syscall_switch(err, syscall, ...) \
-	for (;;) { \
-		int err; \
-		switch ((err = ((syscall) < 0 ? errno : 0))) { \
-		case EINTR: continue; \
-		__VA_ARGS__ \
-		} \
-		break; \
-	}
-
 #if TARGET_OS_MAC
-_Static_assert(DLOCK_LOCK_DATA_CONTENTION == ULF_WAIT_WORKQ_DATA_CONTENTION,
-		"values should be the same");
+dispatch_static_assert(DLOCK_LOCK_DATA_CONTENTION ==
+		ULF_WAIT_WORKQ_DATA_CONTENTION);
 
 #if !HAVE_UL_UNFAIR_LOCK
 DISPATCH_ALWAYS_INLINE
@@ -161,8 +151,8 @@
 		uint64_t nsec = _dispatch_timeout(timeout);
 		_timeout.tv_sec = (__typeof__(_timeout.tv_sec))(nsec / NSEC_PER_SEC);
 		_timeout.tv_nsec = (__typeof__(_timeout.tv_nsec))(nsec % NSEC_PER_SEC);
-		kr = slowpath(semaphore_timedwait(*sema, _timeout));
-	} while (kr == KERN_ABORTED);
+		kr = semaphore_timedwait(*sema, _timeout);
+	} while (unlikely(kr == KERN_ABORTED));
 
 	if (kr == KERN_OPERATION_TIMED_OUT) {
 		return true;
@@ -220,8 +210,8 @@
 		uint64_t nsec = _dispatch_time_nanoseconds_since_epoch(timeout);
 		_timeout.tv_sec = (__typeof__(_timeout.tv_sec))(nsec / NSEC_PER_SEC);
 		_timeout.tv_nsec = (__typeof__(_timeout.tv_nsec))(nsec % NSEC_PER_SEC);
-		ret = slowpath(sem_timedwait(sema, &_timeout));
-	} while (ret == -1 && errno == EINTR);
+		ret = sem_timedwait(sema, &_timeout);
+	} while (unlikely(ret == -1 && errno == EINTR));
 
 	if (ret == -1 && errno == ETIMEDOUT) {
 		return true;
@@ -270,8 +260,7 @@
 	if (ms) timeEndPeriod(ms);
 }
 
-void
-_dispatch_sema4_create_slow(_dispatch_sema4_t *s4, int policy DISPATCH_UNUSED)
+void _dispatch_sema4_init(_dispatch_sema4_t *sema, int policy DISPATCH_UNUSED)
 {
 	HANDLE tmp;
 
@@ -281,7 +270,7 @@
 		_dispatch_temporary_resource_shortage();
 	}
 
-	if (!os_atomic_cmpxchg(s4, 0, tmp, relaxed)) {
+	if (!os_atomic_cmpxchg(sema, 0, tmp, relaxed)) {
 		CloseHandle(tmp);
 	}
 }
@@ -318,7 +307,7 @@
 	nsec = _dispatch_timeout(timeout);
 	msec = (DWORD)(nsec / (uint64_t)1000000);
 	resolution = _push_timer_resolution(msec);
-	wait_result = WaitForSingleObject(sema, msec);
+	wait_result = WaitForSingleObject(*sema, msec);
 	_pop_timer_resolution(resolution);
 	return wait_result == WAIT_TIMEOUT;
 }
@@ -327,62 +316,79 @@
 #endif
 
 #pragma mark - ulock wrappers
+#if HAVE_UL_COMPARE_AND_WAIT || HAVE_UL_UNFAIR_LOCK
+
+// returns 0, ETIMEDOUT, ENOTEMPTY, EFAULT, EINTR
+static int
+_dlock_wait(uint32_t *uaddr, uint32_t val, uint32_t timeout, uint32_t flags)
+{
+	for (;;) {
+		int rc = __ulock_wait(flags | ULF_NO_ERRNO, uaddr, val, timeout);
+		if (rc > 0) {
+			return ENOTEMPTY;
+		}
+		switch (-rc) {
+		case 0:
+			return 0;
+		case EINTR:
+			/*
+			 * if we have a timeout, we need to return for the caller to
+			 * recompute the new deadline, else just go back to wait.
+			 */
+			if (timeout == 0) {
+				continue;
+			}
+			/* FALLTHROUGH */
+		case ETIMEDOUT:
+		case EFAULT:
+			return -rc;
+		default:
+			DISPATCH_INTERNAL_CRASH(-rc, "ulock_wait() failed");
+		}
+	}
+}
+
+static void
+_dlock_wake(uint32_t *uaddr, uint32_t flags)
+{
+	int rc = __ulock_wake(flags | ULF_NO_ERRNO, uaddr, 0);
+	if (rc == 0 || rc == -ENOENT) return;
+	DISPATCH_INTERNAL_CRASH(-rc, "ulock_wake() failed");
+}
+
+#endif // HAVE_UL_COMPARE_AND_WAIT || HAVE_UL_UNFAIR_LOCK
 #if HAVE_UL_COMPARE_AND_WAIT
 
 static int
 _dispatch_ulock_wait(uint32_t *uaddr, uint32_t val, uint32_t timeout,
 		uint32_t flags)
 {
-	int rc;
-	_dlock_syscall_switch(err,
-		rc = __ulock_wait(UL_COMPARE_AND_WAIT | flags, uaddr, val, timeout),
-		case 0: return rc > 0 ? ENOTEMPTY : 0;
-		case ETIMEDOUT: case EFAULT: return err;
-		case EOWNERDEAD: DISPATCH_CLIENT_CRASH(*uaddr,
-				"corruption of lock owner");
-		default: DISPATCH_INTERNAL_CRASH(err, "ulock_wait() failed");
-	);
+	return _dlock_wait(uaddr, val, timeout, flags | UL_COMPARE_AND_WAIT);
 }
 
 static void
 _dispatch_ulock_wake(uint32_t *uaddr, uint32_t flags)
 {
-	_dlock_syscall_switch(err,
-		__ulock_wake(UL_COMPARE_AND_WAIT | flags, uaddr, 0),
-		case 0: case ENOENT: break;
-		default: DISPATCH_INTERNAL_CRASH(err, "ulock_wake() failed");
-	);
+	return _dlock_wake(uaddr, flags | UL_COMPARE_AND_WAIT);
 }
 
-#endif
+#endif // HAVE_UL_COMPARE_AND_WAIT
 #if HAVE_UL_UNFAIR_LOCK
 
-// returns 0, ETIMEDOUT, ENOTEMPTY, EFAULT
 static int
 _dispatch_unfair_lock_wait(uint32_t *uaddr, uint32_t val, uint32_t timeout,
 		dispatch_lock_options_t flags)
 {
-	int rc;
-	_dlock_syscall_switch(err,
-		rc = __ulock_wait(UL_UNFAIR_LOCK | flags, uaddr, val, timeout),
-		case 0: return rc > 0 ? ENOTEMPTY : 0;
-		case ETIMEDOUT: case EFAULT: return err;
-		case EOWNERDEAD: DISPATCH_CLIENT_CRASH(*uaddr,
-				"corruption of lock owner");
-		default: DISPATCH_INTERNAL_CRASH(err, "ulock_wait() failed");
-	);
+	return _dlock_wait(uaddr, val, timeout, flags | UL_UNFAIR_LOCK);
 }
 
 static void
 _dispatch_unfair_lock_wake(uint32_t *uaddr, uint32_t flags)
 {
-	_dlock_syscall_switch(err, __ulock_wake(UL_UNFAIR_LOCK | flags, uaddr, 0),
-		case 0: case ENOENT: break;
-		default: DISPATCH_INTERNAL_CRASH(err, "ulock_wake() failed");
-	);
+	return _dlock_wake(uaddr, flags | UL_UNFAIR_LOCK);
 }
 
-#endif
+#endif // HAVE_UL_UNFAIR_LOCK
 #pragma mark - futex wrappers
 #if HAVE_FUTEX
 #include <sys/time.h>
@@ -401,70 +407,111 @@
 	return (int)syscall(SYS_futex, uaddr, op | opflags, val, timeout, uaddr2, val3);
 }
 
+// returns 0, ETIMEDOUT, EFAULT, EINTR, EWOULDBLOCK
+DISPATCH_ALWAYS_INLINE
+static inline int
+_futex_blocking_op(uint32_t *uaddr, int futex_op, uint32_t val,
+		const struct timespec *timeout, int flags)
+{
+	for (;;) {
+		int rc = _dispatch_futex(uaddr, futex_op, val, timeout, NULL, 0, flags);
+		if (!rc) {
+			return 0;
+		}
+		switch (errno) {
+		case EINTR:
+			/*
+			 * if we have a timeout, we need to return for the caller to
+			 * recompute the new deadline, else just go back to wait.
+			 */
+			if (timeout == 0) {
+				continue;
+			}
+			/* FALLTHROUGH */
+		case ETIMEDOUT:
+		case EFAULT:
+		case EWOULDBLOCK:
+			return errno;
+		default:
+			DISPATCH_INTERNAL_CRASH(errno, "_futex_op() failed");
+		}
+	}
+}
+
 static int
 _dispatch_futex_wait(uint32_t *uaddr, uint32_t val,
 		const struct timespec *timeout, int opflags)
 {
-	_dlock_syscall_switch(err,
-		_dispatch_futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags),
-		case 0: case EWOULDBLOCK: case ETIMEDOUT: return err;
-		default: DISPATCH_CLIENT_CRASH(err, "futex_wait() failed");
-	);
+	return _futex_blocking_op(uaddr, FUTEX_WAIT, val, timeout, opflags);
 }
 
 static void
 _dispatch_futex_wake(uint32_t *uaddr, int wake, int opflags)
 {
-	int rc;
-	_dlock_syscall_switch(err,
-		rc = _dispatch_futex(uaddr, FUTEX_WAKE, (uint32_t)wake, NULL, NULL, 0, opflags),
-		case 0: return;
-		default: DISPATCH_CLIENT_CRASH(err, "futex_wake() failed");
-	);
+	int rc = _dispatch_futex(uaddr, FUTEX_WAKE, (uint32_t)wake, NULL, NULL, 0,
+			opflags);
+	if (rc >= 0 || errno == ENOENT) return;
+	DISPATCH_INTERNAL_CRASH(errno, "_dlock_wake() failed");
 }
 
 static void
 _dispatch_futex_lock_pi(uint32_t *uaddr, struct timespec *timeout, int detect,
 	      int opflags)
 {
-	_dlock_syscall_switch(err,
-		_dispatch_futex(uaddr, FUTEX_LOCK_PI, (uint32_t)detect, timeout,
-				NULL, 0, opflags),
-		case 0: return;
-		default: DISPATCH_CLIENT_CRASH(errno, "futex_lock_pi() failed");
-	);
+	int err = _futex_blocking_op(uaddr, FUTEX_LOCK_PI, (uint32_t)detect,
+			timeout, opflags);
+	if (err == 0) return;
+	DISPATCH_CLIENT_CRASH(err, "futex_lock_pi() failed");
 }
 
 static void
 _dispatch_futex_unlock_pi(uint32_t *uaddr, int opflags)
 {
-	_dlock_syscall_switch(err,
-		_dispatch_futex(uaddr, FUTEX_UNLOCK_PI, 0, NULL, NULL, 0, opflags),
-		case 0: return;
-		default: DISPATCH_CLIENT_CRASH(errno, "futex_unlock_pi() failed");
-	);
+	int rc = _dispatch_futex(uaddr, FUTEX_UNLOCK_PI, 0, NULL, NULL, 0, opflags);
+	if (rc == 0) return;
+	DISPATCH_CLIENT_CRASH(errno, "futex_unlock_pi() failed");
 }
 
 #endif
 #pragma mark - wait for address
 
-void
-_dispatch_wait_on_address(uint32_t volatile *address, uint32_t value,
-		dispatch_lock_options_t flags)
+int
+_dispatch_wait_on_address(uint32_t volatile *_address, uint32_t value,
+		dispatch_time_t timeout, dispatch_lock_options_t flags)
 {
-#if HAVE_UL_COMPARE_AND_WAIT
-	_dispatch_ulock_wait((uint32_t *)address, value, 0, flags);
-#elif HAVE_FUTEX
-	_dispatch_futex_wait((uint32_t *)address, value, NULL, FUTEX_PRIVATE_FLAG);
-#elif defined(_WIN32)
-	WaitOnAddress(address, (PVOID)(uintptr_t)value, sizeof(value), INFINITE);
-#else
-	mach_msg_timeout_t timeout = 1;
-	while (os_atomic_load(address, relaxed) == value) {
-		thread_switch(MACH_PORT_NULL, SWITCH_OPTION_WAIT, timeout++);
+	uint32_t *address = (uint32_t *)_address;
+	uint64_t nsecs = _dispatch_timeout(timeout);
+	if (nsecs == 0) {
+		return ETIMEDOUT;
 	}
-#endif
+#if HAVE_UL_COMPARE_AND_WAIT
+	uint64_t usecs = 0;
+	int rc;
+	if (nsecs == DISPATCH_TIME_FOREVER) {
+		return _dispatch_ulock_wait(address, value, 0, flags);
+	}
+	do {
+		usecs = howmany(nsecs, NSEC_PER_USEC);
+		if (usecs > UINT32_MAX) usecs = UINT32_MAX;
+		rc = _dispatch_ulock_wait(address, value, (uint32_t)usecs, flags);
+	} while (usecs == UINT32_MAX && rc == ETIMEDOUT &&
+			(nsecs = _dispatch_timeout(timeout)) != 0);
+	return rc;
+#elif HAVE_FUTEX
 	(void)flags;
+	if (nsecs != DISPATCH_TIME_FOREVER) {
+		struct timespec ts = {
+			.tv_sec = (__typeof__(ts.tv_sec))(nsecs / NSEC_PER_SEC),
+			.tv_nsec = (__typeof__(ts.tv_nsec))(nsecs % NSEC_PER_SEC),
+		};
+		return _dispatch_futex_wait(address, value, &ts, FUTEX_PRIVATE_FLAG);
+	}
+	return _dispatch_futex_wait(address, value, NULL, FUTEX_PRIVATE_FLAG);
+#elif defined(_WIN32)
+	return WaitOnAddress(address, &value, sizeof(value), INFINITE) == TRUE;
+#else
+#error _dispatch_wait_on_address unimplemented for this platform
+#endif
 }
 
 void
@@ -507,7 +554,7 @@
 		}
 #if HAVE_UL_COMPARE_AND_WAIT
 		int rc = _dispatch_ulock_wait(&dte->dte_value, UINT32_MAX, 0, 0);
-		dispatch_assert(rc == 0 || rc == EFAULT);
+		dispatch_assert(rc == 0 || rc == EFAULT || rc == EINTR);
 #elif HAVE_FUTEX
 		_dispatch_futex_wait(&dte->dte_value, UINT32_MAX,
 				NULL, FUTEX_PRIVATE_FLAG);
@@ -600,33 +647,44 @@
 #pragma mark - gate lock
 
 void
-_dispatch_gate_wait_slow(dispatch_gate_t dgl, dispatch_lock value,
-		dispatch_lock_options_t flags)
+_dispatch_once_wait(dispatch_once_gate_t dgo)
 {
 	dispatch_lock self = _dispatch_lock_value_for_self();
-	dispatch_lock old_value, new_value;
+	uintptr_t old_v, new_v;
+#if HAVE_UL_UNFAIR_LOCK || HAVE_FUTEX
+	dispatch_lock *lock = &dgo->dgo_gate.dgl_lock;
+#endif
 	uint32_t timeout = 1;
 
 	for (;;) {
-		os_atomic_rmw_loop(&dgl->dgl_lock, old_value, new_value, acquire, {
-			if (likely(old_value == value)) {
-				os_atomic_rmw_loop_give_up_with_fence(acquire, return);
+		os_atomic_rmw_loop(&dgo->dgo_once, old_v, new_v, relaxed, {
+			if (likely(old_v == DLOCK_ONCE_DONE)) {
+				os_atomic_rmw_loop_give_up(return);
 			}
-			new_value = old_value | DLOCK_WAITERS_BIT;
-			if (new_value == old_value) os_atomic_rmw_loop_give_up(break);
+#if DISPATCH_ONCE_USE_QUIESCENT_COUNTER
+			if (DISPATCH_ONCE_IS_GEN(old_v)) {
+				os_atomic_rmw_loop_give_up({
+					os_atomic_thread_fence(acquire);
+					return _dispatch_once_mark_done_if_quiesced(dgo, old_v);
+				});
+			}
+#endif
+			new_v = old_v | (uintptr_t)DLOCK_WAITERS_BIT;
+			if (new_v == old_v) os_atomic_rmw_loop_give_up(break);
 		});
-		if (unlikely(_dispatch_lock_is_locked_by(old_value, self))) {
+		if (unlikely(_dispatch_lock_is_locked_by((dispatch_lock)old_v, self))) {
 			DISPATCH_CLIENT_CRASH(0, "trying to lock recursively");
 		}
 #if HAVE_UL_UNFAIR_LOCK
-		_dispatch_unfair_lock_wait(&dgl->dgl_lock, new_value, 0, flags);
+		_dispatch_unfair_lock_wait(lock, (dispatch_lock)new_v, 0,
+				DLOCK_LOCK_NONE);
 #elif HAVE_FUTEX
-		_dispatch_futex_wait(&dgl->dgl_lock, new_value, NULL, FUTEX_PRIVATE_FLAG);
+		_dispatch_futex_wait(lock, (dispatch_lock)new_v, NULL,
+				FUTEX_PRIVATE_FLAG);
 #else
-		_dispatch_thread_switch(new_value, flags, timeout++);
+		_dispatch_thread_switch(new_v, 0, timeout++);
 #endif
 		(void)timeout;
-		(void)flags;
 	}
 }
 
@@ -645,3 +703,14 @@
 	(void)dgl;
 #endif
 }
+
+#if TARGET_OS_MAC
+
+void
+_dispatch_firehose_gate_wait(dispatch_gate_t dgl, uint32_t owner,
+		uint32_t flags)
+{
+	_dispatch_unfair_lock_wait(&dgl->dgl_lock, owner, 0, flags);
+}
+
+#endif
diff --git a/src/shims/lock.h b/src/shims/lock.h
index 4a9bd78..ca450d5 100644
--- a/src/shims/lock.h
+++ b/src/shims/lock.h
@@ -174,6 +174,14 @@
 #endif
 #endif // HAVE_FUTEX
 
+#if defined(__x86_64__) || defined(__i386__) || defined(__s390x__)
+#define DISPATCH_ONCE_USE_QUIESCENT_COUNTER 0
+#elif __APPLE__
+#define DISPATCH_ONCE_USE_QUIESCENT_COUNTER 1
+#else
+#define DISPATCH_ONCE_USE_QUIESCENT_COUNTER 0
+#endif
+
 #pragma mark - semaphores
 
 #if USE_MACH_SEM
@@ -205,9 +213,9 @@
 #define _DSEMA4_POLICY_LIFO 0
 #define _DSEMA4_TIMEOUT() ((errno) = ETIMEDOUT, -1)
 
-#define _dispatch_sema4_init(sema, policy) (void)(*(sema) = 0)
-#define _dispatch_sema4_is_created(sema)   (*(sema) != 0)
-void _dispatch_sema4_create_slow(_dispatch_sema4_t *sema, int policy);
+void _dispatch_sema4_init(_dispatch_sema4_t *sema, int policy);
+#define _dispatch_sema4_is_created(sema)   ((void)sema, 1)
+#define _dispatch_sema4_create_slow(sema, policy) ((void)sema, (void)policy)
 
 #else
 #error "port has to implement _dispatch_sema4_t"
@@ -239,8 +247,8 @@
 #pragma mark - compare and wait
 
 DISPATCH_NOT_TAIL_CALLED
-void _dispatch_wait_on_address(uint32_t volatile *address, uint32_t value,
-		dispatch_lock_options_t flags);
+int _dispatch_wait_on_address(uint32_t volatile *address, uint32_t value,
+		dispatch_time_t timeout, dispatch_lock_options_t flags);
 void _dispatch_wake_by_address(uint32_t volatile *address);
 
 #pragma mark - thread event
@@ -313,7 +321,7 @@
 #if HAVE_UL_COMPARE_AND_WAIT || HAVE_FUTEX
 	if (os_atomic_dec(&dte->dte_value, acquire) == 0) {
 		// 1 -> 0 is always a valid transition, so we can return
-		// for any other value, go to the slowpath which checks it's not corrupt
+		// for any other value, take the slow path which checks it's not corrupt
 		return;
 	}
 #else
@@ -355,7 +363,7 @@
 			DLOCK_OWNER_NULL, value_self, acquire))) {
 		return;
 	}
-	return _dispatch_unfair_lock_lock_slow(l, DLOCK_LOCK_NONE);
+	return _dispatch_unfair_lock_lock_slow(l, DLOCK_LOCK_DATA_CONTENTION);
 }
 
 DISPATCH_ALWAYS_INLINE
@@ -427,16 +435,10 @@
 
 #pragma mark - gate lock
 
-#if HAVE_UL_UNFAIR_LOCK || HAVE_FUTEX
-#define DISPATCH_GATE_USE_FOR_DISPATCH_ONCE 1
-#else
-#define DISPATCH_GATE_USE_FOR_DISPATCH_ONCE 0
-#endif
-
 #define DLOCK_GATE_UNLOCKED	((dispatch_lock)0)
 
-#define DLOCK_ONCE_UNLOCKED	((dispatch_once_t)0)
-#define DLOCK_ONCE_DONE		(~(dispatch_once_t)0)
+#define DLOCK_ONCE_UNLOCKED	((uintptr_t)0)
+#define DLOCK_ONCE_DONE		(~(uintptr_t)0)
 
 typedef struct dispatch_gate_s {
 	dispatch_lock dgl_lock;
@@ -445,13 +447,210 @@
 typedef struct dispatch_once_gate_s {
 	union {
 		dispatch_gate_s dgo_gate;
-		dispatch_once_t dgo_once;
+		uintptr_t dgo_once;
 	};
 } dispatch_once_gate_s, *dispatch_once_gate_t;
 
-DISPATCH_NOT_TAIL_CALLED
-void _dispatch_gate_wait_slow(dispatch_gate_t l, dispatch_lock value,
-		uint32_t flags);
+#if DISPATCH_ONCE_USE_QUIESCENT_COUNTER
+#define DISPATCH_ONCE_MAKE_GEN(gen)  (((gen) << 2) + DLOCK_FAILED_TRYLOCK_BIT)
+#define DISPATCH_ONCE_IS_GEN(gen)    (((gen) & 3) == DLOCK_FAILED_TRYLOCK_BIT)
+
+/*
+ * the _COMM_PAGE_CPU_QUIESCENT_COUNTER value is incremented every time
+ * all CPUs have performed a context switch.
+ *
+ * A counter update algorithm is:
+ *
+ *     // atomic_or acq_rel is marked as ======== below
+ *     if (atomic_or(&mask, acq_rel) == full_mask) {
+ *
+ *         tmp = atomic_load(&generation, relaxed);
+ *         atomic_store(&generation, gen + 1, relaxed);
+ *
+ *         // atomic_store release is marked as -------- below
+ *         atomic_store(&mask, 0, release);
+ *     }
+ *
+ * This enforces boxes delimited by the acq_rel/release barriers to only be able
+ * to observe two possible values for the counter which have been marked below.
+ *
+ * Lemma 1
+ * ~~~~~~~
+ *
+ * Between two acq_rel barriers, a thread can only observe two possible values
+ * of the generation counter G maintained by the kernel.
+ *
+ * The Figure below, adds the happens-before-relationships and assertions:
+ *
+ * |     Thread A     |     Thread B     |     Thread C     |
+ * |                  |                  |                  |
+ * |==================|                  |                  |
+ * |      G = N       |                  |                  |
+ * |------------------|--------.         |                  |
+ * |                  |        |         |                  |
+ * |                  |        v         |                  |
+ * |                  |==================|                  |
+ * |                  |  assert(G >= N)  |                  |
+ * |                  |                  |                  |
+ * |                  |                  |                  |
+ * |                  |                  |                  |
+ * |                  |  assert(G < N+2) |                  |
+ * |                  |==================|--------.         |
+ * |                  |                  |        |         |
+ * |                  |                  |        v         |
+ * |                  |                  |==================|
+ * |                  |                  |      G = N + 2   |
+ * |                  |                  |------------------|
+ * |                  |                  |                  |
+ *
+ *
+ * This allows us to name the area delimited by two consecutive acq_rel
+ * barriers { N, N+1 } after the two possible values of G they can observe,
+ * which we'll use from now on.
+ *
+ *
+ * Lemma 2
+ * ~~~~~~~
+ *
+ * Any operation that a thread does while observing G in { N-2, N-1 } will be
+ * visible to a thread that can observe G in { N, N + 1 }.
+ *
+ * Any operation that a thread does while observing G in { N, N + 1 } cannot
+ * possibly be visible to a thread observing G in { N-2, N-1 }
+ *
+ * This is a corollary of Lemma 1: the only possibility is for the update
+ * of G to N to have happened between two acq_rel barriers of the considered
+ * threads.
+ *
+ * Below is a figure of why instantiated with N = 2
+ *
+ * |     Thread A     |     Thread B     |     Thread C     |
+ * |                  |                  |                  |
+ * |   G ∈ { 0, 1 }   |                  |                  |
+ * |                  |                  |                  |
+ * |                  |                  |                  |
+ * |   store(X, 1)    |                  |                  |
+ * |   assert(!Z)     |                  |                  |
+ * |                  |                  |                  |
+ * |==================|--------.         |                  |
+ * |   G ∈ { 1, 2 }   |        |         |                  |
+ * |                  |        v         |                  |
+ * |                  |==================|--------.         |
+ * |                  |      G = 2       |        |         |
+ * |                  |------------------|        |         |
+ * |                  |                  |        |         |
+ * |                  |                  |        v         |
+ * |                  |                  |==================|
+ * |                  |                  |   G ∈ { 2, 3 }   |
+ * |                  |                  |                  |
+ * |                  |                  |                  |
+ * |                  |                  |   store(Z, 1)    |
+ * |                  |                  |   assert(X)      |
+ * |                  |                  |                  |
+ * |                  |                  |                  |
+ *
+ *
+ * Theorem
+ * ~~~~~~~
+ *
+ * The optimal number of increments to observe for the dispatch once algorithm
+ * to be safe is 4.
+ *
+ * Proof (correctness):
+ *
+ *  Consider a dispatch once initializer thread in its { N, N+1 } "zone".
+ *
+ *  Per Lemma 2, any observer thread in its { N+2, N+3 } zone will see the
+ *  effect of the dispatch once initialization.
+ *
+ *  Per Lemma 2, when the DONE transition happens in a thread zone { N+3, N+4 },
+ *  then threads can observe this transiton in their { N+2, N+3 } zone at the
+ *  earliest.
+ *
+ *  Hence for an initializer bracket of { N, N+1 }, the first safe bracket for
+ *  the DONE transition is { N+3, N+4 }.
+ *
+ *
+ * Proof (optimal):
+ *
+ *  The following ordering is possible if waiting only for three periods:
+ *
+ * |     Thread A     |     Thread B     |     Thread C     |
+ * |                  |                  |                  |
+ * |                  |                  |                  |
+ * |                  |                  |==================|
+ * |                  |                  |   G ∈ { 1, 2 }   |
+ * |                  |                  |                  |
+ * |                  |                  |                  |
+ * |                  |                  |  R(once == -1) <-+--.
+ * |                  |                  |                  |  |
+ * |           -------+------------------+---------.        |  |
+ * |                  |                  |         |        |  |
+ * |  W(global, 42)   |                  |         |        |  |
+ * |  WRel(once, G:0) |                  |         |        |  |
+ * |                  |                  |         |        |  |
+ * |                  |                  |         v        |  |
+ * |                  |                  |   R(global == 0) |  |
+ * |                  |                  |                  |  |
+ * |                  |                  |                  |  |
+ * |==================|                  |                  |  |
+ * |   G ∈ { 1, 2 }   |                  |                  |  |
+ * |                  |==================|                  |  |
+ * |                  |      G = 2       |                  |  |
+ * |                  |------------------|                  |  |
+ * |                  |                  |                  |  |
+ * |==================|                  |                  |  |
+ * |   G ∈ { 2, 3 }   |                  |                  |  |
+ * |                  |                  |                  |  |
+ * |                  |                  |                  |  |
+ * |   W(once, -1) ---+------------------+------------------+--'
+ * |                  |                  |                  |
+ * |                  |                  |==================|
+ * |                  |                  |   G ∈ { 2, 3 }   |
+ * |                  |                  |                  |
+ *
+ */
+#define DISPATCH_ONCE_GEN_SAFE_DELTA  (4 << 2)
+
+DISPATCH_ALWAYS_INLINE
+static inline uintptr_t
+_dispatch_once_generation(void)
+{
+	uintptr_t value;
+	value = *(volatile uintptr_t *)_COMM_PAGE_CPU_QUIESCENT_COUNTER;
+	return (uintptr_t)DISPATCH_ONCE_MAKE_GEN(value);
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline uintptr_t
+_dispatch_once_mark_quiescing(dispatch_once_gate_t dgo)
+{
+	return os_atomic_xchg(&dgo->dgo_once, _dispatch_once_generation(), release);
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_once_mark_done_if_quiesced(dispatch_once_gate_t dgo, uintptr_t gen)
+{
+	if (_dispatch_once_generation() - gen >= DISPATCH_ONCE_GEN_SAFE_DELTA) {
+		/*
+		 * See explanation above, when the quiescing counter approach is taken
+		 * then this store needs only to be relaxed as it is used as a witness
+		 * that the required barriers have happened.
+		 */
+		os_atomic_store(&dgo->dgo_once, DLOCK_ONCE_DONE, relaxed);
+	}
+}
+#else
+DISPATCH_ALWAYS_INLINE
+static inline uintptr_t
+_dispatch_once_mark_done(dispatch_once_gate_t dgo)
+{
+	return os_atomic_xchg(&dgo->dgo_once, DLOCK_ONCE_DONE, release);
+}
+#endif // DISPATCH_ONCE_USE_QUIESCENT_COUNTER
+
+void _dispatch_once_wait(dispatch_once_gate_t l);
 void _dispatch_gate_broadcast_slow(dispatch_gate_t l, dispatch_lock tid_cur);
 
 DISPATCH_ALWAYS_INLINE
@@ -462,9 +661,6 @@
 			_dispatch_lock_value_for_self(), acquire);
 }
 
-#define _dispatch_gate_wait(l, flags) \
-	_dispatch_gate_wait_slow(l, DLOCK_GATE_UNLOCKED, flags)
-
 DISPATCH_ALWAYS_INLINE
 static inline void
 _dispatch_gate_broadcast(dispatch_gate_t l)
@@ -480,18 +676,7 @@
 _dispatch_once_gate_tryenter(dispatch_once_gate_t l)
 {
 	return os_atomic_cmpxchg(&l->dgo_once, DLOCK_ONCE_UNLOCKED,
-			(dispatch_once_t)_dispatch_lock_value_for_self(), acquire);
-}
-
-#define _dispatch_once_gate_wait(l) \
-	_dispatch_gate_wait_slow(&(l)->dgo_gate, (dispatch_lock)DLOCK_ONCE_DONE, \
-			DLOCK_LOCK_NONE)
-
-DISPATCH_ALWAYS_INLINE
-static inline dispatch_once_t
-_dispatch_once_xchg_done(dispatch_once_t *pred)
-{
-	return os_atomic_xchg(pred, DLOCK_ONCE_DONE, release);
+			(uintptr_t)_dispatch_lock_value_for_self(), relaxed);
 }
 
 DISPATCH_ALWAYS_INLINE
@@ -499,9 +684,22 @@
 _dispatch_once_gate_broadcast(dispatch_once_gate_t l)
 {
 	dispatch_lock value_self = _dispatch_lock_value_for_self();
-	dispatch_once_t cur = _dispatch_once_xchg_done(&l->dgo_once);
-	if (likely(cur == (dispatch_once_t)value_self)) return;
-	_dispatch_gate_broadcast_slow(&l->dgo_gate, (dispatch_lock)cur);
+	uintptr_t v;
+#if DISPATCH_ONCE_USE_QUIESCENT_COUNTER
+	v = _dispatch_once_mark_quiescing(l);
+#else
+	v = _dispatch_once_mark_done(l);
+#endif
+	if (likely((dispatch_lock)v == value_self)) return;
+	_dispatch_gate_broadcast_slow(&l->dgo_gate, (dispatch_lock)v);
 }
 
+#if TARGET_OS_MAC
+
+DISPATCH_NOT_TAIL_CALLED
+void _dispatch_firehose_gate_wait(dispatch_gate_t l, uint32_t owner,
+		uint32_t flags);
+
+#endif // TARGET_OS_MAC
+
 #endif // __DISPATCH_SHIMS_LOCK__
diff --git a/src/shims/perfmon.h b/src/shims/perfmon.h
index be9327b..af6183f 100644
--- a/src/shims/perfmon.h
+++ b/src/shims/perfmon.h
@@ -67,7 +67,7 @@
 
 #define _dispatch_perfmon_start_impl(trace) ({ \
 		if (trace) _dispatch_ktrace0(DISPATCH_PERF_MON_worker_thread_start); \
-		perfmon_start = _dispatch_absolute_time(); \
+		perfmon_start = _dispatch_uptime(); \
 	})
 #define _dispatch_perfmon_start() \
 		DISPATCH_PERF_MON_VAR _dispatch_perfmon_start_impl(true)
diff --git a/src/shims/priority.h b/src/shims/priority.h
index 0202d3c..3a79c5e 100644
--- a/src/shims/priority.h
+++ b/src/shims/priority.h
@@ -36,8 +36,8 @@
 #ifndef _PTHREAD_PRIORITY_SCHED_PRI_FLAG
 #define _PTHREAD_PRIORITY_SCHED_PRI_FLAG 0x20000000
 #endif
-#ifndef _PTHREAD_PRIORITY_DEFAULTQUEUE_FLAG
-#define _PTHREAD_PRIORITY_DEFAULTQUEUE_FLAG 0x04000000
+#ifndef _PTHREAD_PRIORITY_FALLBACK_FLAG
+#define _PTHREAD_PRIORITY_FALLBACK_FLAG 0x04000000
 #endif
 #ifndef _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
 #define _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG 0x02000000
@@ -63,50 +63,78 @@
 #define _PTHREAD_PRIORITY_PRIORITY_MASK 0x000000ff
 #define _PTHREAD_PRIORITY_OVERCOMMIT_FLAG 0x80000000
 #define _PTHREAD_PRIORITY_SCHED_PRI_FLAG 0x20000000
-#define _PTHREAD_PRIORITY_DEFAULTQUEUE_FLAG 0x04000000
+#define _PTHREAD_PRIORITY_FALLBACK_FLAG 0x04000000
 #define _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG 0x02000000
 #define _PTHREAD_PRIORITY_NEEDS_UNBIND_FLAG 0x01000000
 #define _PTHREAD_PRIORITY_ENFORCE_FLAG  0x10000000
 
 #endif // HAVE_PTHREAD_QOS_H
 
+#if !defined(POLICY_RR) && defined(SCHED_RR)
+#define POLICY_RR SCHED_RR
+#endif // !defined(POLICY_RR) && defined(SCHED_RR)
+
 typedef uint32_t dispatch_qos_t;
 typedef uint32_t dispatch_priority_t;
-typedef uint32_t dispatch_priority_t;
-typedef uint16_t dispatch_priority_requested_t;
 
-#define DISPATCH_QOS_UNSPECIFIED            ((dispatch_qos_t)0)
-#define DISPATCH_QOS_MAINTENANCE            ((dispatch_qos_t)1)
-#define DISPATCH_QOS_BACKGROUND             ((dispatch_qos_t)2)
-#define DISPATCH_QOS_UTILITY                ((dispatch_qos_t)3)
-#define DISPATCH_QOS_DEFAULT                ((dispatch_qos_t)4)
-#define DISPATCH_QOS_USER_INITIATED         ((dispatch_qos_t)5)
-#define DISPATCH_QOS_USER_INTERACTIVE       ((dispatch_qos_t)6)
-#define DISPATCH_QOS_MAX                    DISPATCH_QOS_USER_INTERACTIVE
-#define DISPATCH_QOS_SATURATED              ((dispatch_qos_t)15)
+#define DISPATCH_QOS_UNSPECIFIED        ((dispatch_qos_t)0)
+#define DISPATCH_QOS_MAINTENANCE        ((dispatch_qos_t)1)
+#define DISPATCH_QOS_BACKGROUND         ((dispatch_qos_t)2)
+#define DISPATCH_QOS_UTILITY            ((dispatch_qos_t)3)
+#define DISPATCH_QOS_DEFAULT            ((dispatch_qos_t)4)
+#define DISPATCH_QOS_USER_INITIATED     ((dispatch_qos_t)5)
+#define DISPATCH_QOS_USER_INTERACTIVE   ((dispatch_qos_t)6)
+#define DISPATCH_QOS_MIN                DISPATCH_QOS_MAINTENANCE
+#define DISPATCH_QOS_MAX                DISPATCH_QOS_USER_INTERACTIVE
+#define DISPATCH_QOS_SATURATED          ((dispatch_qos_t)15)
+
+#define DISPATCH_QOS_NBUCKETS           (DISPATCH_QOS_MAX - DISPATCH_QOS_MIN + 1)
+#define DISPATCH_QOS_BUCKET(qos)        ((int)((qos) - DISPATCH_QOS_MIN))
+#define DISPATCH_QOS_FOR_BUCKET(bucket) ((dispatch_qos_t)((uint32_t)bucket + DISPATCH_QOS_MIN))
 
 #define DISPATCH_PRIORITY_RELPRI_MASK        ((dispatch_priority_t)0x000000ff)
 #define DISPATCH_PRIORITY_RELPRI_SHIFT       0
-#define DISPATCH_PRIORITY_QOS_MASK           ((dispatch_priority_t)0x0000ff00)
+#define DISPATCH_PRIORITY_QOS_MASK           ((dispatch_priority_t)0x00000f00)
 #define DISPATCH_PRIORITY_QOS_SHIFT          8
-#define DISPATCH_PRIORITY_REQUESTED_MASK     ((dispatch_priority_t)0x0000ffff)
-#define DISPATCH_PRIORITY_OVERRIDE_MASK      ((dispatch_priority_t)0x00ff0000)
+#define DISPATCH_PRIORITY_REQUESTED_MASK     ((dispatch_priority_t)0x00000fff)
+#define DISPATCH_PRIORITY_FALLBACK_QOS_MASK  ((dispatch_priority_t)0x0000f000)
+#define DISPATCH_PRIORITY_FALLBACK_QOS_SHIFT 12
+#define DISPATCH_PRIORITY_OVERRIDE_MASK      ((dispatch_priority_t)0x000f0000)
 #define DISPATCH_PRIORITY_OVERRIDE_SHIFT     16
 #define DISPATCH_PRIORITY_FLAGS_MASK         ((dispatch_priority_t)0xff000000)
 
-#define DISPATCH_PRIORITY_SATURATED_OVERRIDE ((dispatch_priority_t)0x000f0000)
+#define DISPATCH_PRIORITY_SATURATED_OVERRIDE DISPATCH_PRIORITY_OVERRIDE_MASK
 
 #define DISPATCH_PRIORITY_FLAG_OVERCOMMIT    ((dispatch_priority_t)0x80000000) // _PTHREAD_PRIORITY_OVERCOMMIT_FLAG
-#define DISPATCH_PRIORITY_FLAG_DEFAULTQUEUE  ((dispatch_priority_t)0x04000000) // _PTHREAD_PRIORITY_DEFAULTQUEUE_FLAG
+#define DISPATCH_PRIORITY_FLAG_FALLBACK      ((dispatch_priority_t)0x04000000) // _PTHREAD_PRIORITY_FALLBACK_FLAG
 #define DISPATCH_PRIORITY_FLAG_MANAGER       ((dispatch_priority_t)0x02000000) // _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
 #define DISPATCH_PRIORITY_PTHREAD_PRIORITY_FLAGS_MASK \
-		(DISPATCH_PRIORITY_FLAG_OVERCOMMIT | DISPATCH_PRIORITY_FLAG_DEFAULTQUEUE | \
+		(DISPATCH_PRIORITY_FLAG_OVERCOMMIT | DISPATCH_PRIORITY_FLAG_FALLBACK | \
 		DISPATCH_PRIORITY_FLAG_MANAGER)
 
 // not passed to pthread
-#define DISPATCH_PRIORITY_FLAG_INHERIT       ((dispatch_priority_t)0x40000000) // _PTHREAD_PRIORITY_INHERIT_FLAG
+#define DISPATCH_PRIORITY_FLAG_FLOOR         ((dispatch_priority_t)0x40000000) // _PTHREAD_PRIORITY_INHERIT_FLAG
 #define DISPATCH_PRIORITY_FLAG_ENFORCE       ((dispatch_priority_t)0x10000000) // _PTHREAD_PRIORITY_ENFORCE_FLAG
-#define DISPATCH_PRIORITY_FLAG_ROOTQUEUE     ((dispatch_priority_t)0x20000000) // _PTHREAD_PRIORITY_ROOTQUEUE_FLAG
+#define DISPATCH_PRIORITY_FLAG_INHERITED     ((dispatch_priority_t)0x20000000)
+
+DISPATCH_ALWAYS_INLINE
+static inline bool
+_dispatch_qos_class_valid(qos_class_t cls, int relpri)
+{
+	switch ((unsigned int)cls) {
+	case QOS_CLASS_MAINTENANCE:
+	case QOS_CLASS_BACKGROUND:
+	case QOS_CLASS_UTILITY:
+	case QOS_CLASS_DEFAULT:
+	case QOS_CLASS_USER_INITIATED:
+	case QOS_CLASS_USER_INTERACTIVE:
+	case QOS_CLASS_UNSPECIFIED:
+		break;
+	default:
+		return false;
+	}
+	return QOS_MIN_RELATIVE_PRIORITY <= relpri && relpri <= 0;
+}
 
 #pragma mark dispatch_qos
 
@@ -164,6 +192,16 @@
 }
 
 DISPATCH_ALWAYS_INLINE
+static inline dispatch_qos_t
+_dispatch_qos_from_pp_unsafe(pthread_priority_t pp)
+{
+	// this assumes we know there is a QOS and pp has been masked off properly
+	pp >>= _PTHREAD_PRIORITY_QOS_CLASS_SHIFT;
+	DISPATCH_COMPILER_CAN_ASSUME(pp);
+	return (dispatch_qos_t)__builtin_ffs((int)pp);
+}
+
+DISPATCH_ALWAYS_INLINE
 static inline pthread_priority_t
 _dispatch_qos_to_pp(dispatch_qos_t qos)
 {
@@ -186,15 +224,16 @@
 	(qos ? ((((qos) << DISPATCH_PRIORITY_QOS_SHIFT) & DISPATCH_PRIORITY_QOS_MASK) | \
 	 ((dispatch_priority_t)(relpri - 1) & DISPATCH_PRIORITY_RELPRI_MASK)) : 0)
 
-DISPATCH_ALWAYS_INLINE
-static inline dispatch_priority_t
-_dispatch_priority_with_override_qos(dispatch_priority_t pri,
-		dispatch_qos_t oqos)
-{
-	pri &= ~DISPATCH_PRIORITY_OVERRIDE_MASK;
-	pri |= oqos << DISPATCH_PRIORITY_OVERRIDE_SHIFT;
-	return pri;
-}
+#define _dispatch_priority_make_override(qos) \
+	(((qos) << DISPATCH_PRIORITY_OVERRIDE_SHIFT) & \
+	 DISPATCH_PRIORITY_OVERRIDE_MASK)
+
+#define _dispatch_priority_make_floor(qos) \
+	(qos ? (_dispatch_priority_make(qos) | DISPATCH_PRIORITY_FLAG_FLOOR) : 0)
+
+#define _dispatch_priority_make_fallback(qos) \
+	(qos ? ((((qos) << DISPATCH_PRIORITY_FALLBACK_QOS_SHIFT) & \
+	 DISPATCH_PRIORITY_FALLBACK_QOS_MASK) | DISPATCH_PRIORITY_FLAG_FALLBACK) : 0)
 
 DISPATCH_ALWAYS_INLINE
 static inline int
@@ -216,6 +255,14 @@
 
 DISPATCH_ALWAYS_INLINE
 static inline dispatch_qos_t
+_dispatch_priority_fallback_qos(dispatch_priority_t dbp)
+{
+	dbp &= DISPATCH_PRIORITY_FALLBACK_QOS_MASK;
+	return dbp >> DISPATCH_PRIORITY_FALLBACK_QOS_SHIFT;
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline dispatch_qos_t
 _dispatch_priority_override_qos(dispatch_priority_t dbp)
 {
 	dbp &= DISPATCH_PRIORITY_OVERRIDE_MASK;
@@ -223,6 +270,16 @@
 }
 
 DISPATCH_ALWAYS_INLINE
+static inline bool
+_dispatch_queue_priority_manually_selected(dispatch_priority_t pri)
+{
+	return !(pri & DISPATCH_PRIORITY_FLAG_INHERITED) &&
+			(pri & (DISPATCH_PRIORITY_FLAG_FALLBACK |
+			DISPATCH_PRIORITY_FLAG_FLOOR |
+			DISPATCH_PRIORITY_REQUESTED_MASK));
+}
+
+DISPATCH_ALWAYS_INLINE
 static inline dispatch_priority_t
 _dispatch_priority_from_pp_impl(pthread_priority_t pp, bool keep_flags)
 {
@@ -244,26 +301,40 @@
 #define _dispatch_priority_from_pp_strip_flags(pp) \
 		_dispatch_priority_from_pp_impl(pp, false)
 
+#define DISPATCH_PRIORITY_TO_PP_STRIP_FLAGS     0x1
+#define DISPATCH_PRIORITY_TO_PP_PREFER_FALLBACK 0x2
+
 DISPATCH_ALWAYS_INLINE
 static inline pthread_priority_t
-_dispatch_priority_to_pp_impl(dispatch_priority_t dbp, bool keep_flags)
+_dispatch_priority_to_pp_strip_flags(dispatch_priority_t dbp)
 {
-	pthread_priority_t pp;
-	if (keep_flags) {
-		pp = dbp & (DISPATCH_PRIORITY_PTHREAD_PRIORITY_FLAGS_MASK |
-				DISPATCH_PRIORITY_RELPRI_MASK);
-	} else {
-		pp = dbp & DISPATCH_PRIORITY_RELPRI_MASK;
-	}
+	pthread_priority_t pp = dbp & DISPATCH_PRIORITY_RELPRI_MASK;
 	dispatch_qos_t qos = _dispatch_priority_qos(dbp);
 	if (qos) {
 		pp |= (1ul << ((qos - 1) + _PTHREAD_PRIORITY_QOS_CLASS_SHIFT));
 	}
 	return pp;
 }
-#define _dispatch_priority_to_pp(pp) \
-		_dispatch_priority_to_pp_impl(pp, true)
-#define _dispatch_priority_to_pp_strip_flags(pp) \
-		_dispatch_priority_to_pp_impl(pp, false)
+
+DISPATCH_ALWAYS_INLINE
+static inline pthread_priority_t
+_dispatch_priority_to_pp_prefer_fallback(dispatch_priority_t dbp)
+{
+	pthread_priority_t pp;
+	dispatch_qos_t qos;
+
+	if (dbp & DISPATCH_PRIORITY_FLAG_FALLBACK) {
+		pp = dbp & DISPATCH_PRIORITY_PTHREAD_PRIORITY_FLAGS_MASK;
+		pp |= _PTHREAD_PRIORITY_PRIORITY_MASK;
+		qos = _dispatch_priority_fallback_qos(dbp);
+	} else {
+		pp = dbp & (DISPATCH_PRIORITY_PTHREAD_PRIORITY_FLAGS_MASK |
+				DISPATCH_PRIORITY_RELPRI_MASK);
+		qos = _dispatch_priority_qos(dbp);
+		if (unlikely(!qos)) return pp;
+	}
+
+	return pp | (1ul << ((qos - 1) + _PTHREAD_PRIORITY_QOS_CLASS_SHIFT));
+}
 
 #endif // __DISPATCH_SHIMS_PRIORITY__
diff --git a/src/shims/target.h b/src/shims/target.h
new file mode 100644
index 0000000..8e996aa
--- /dev/null
+++ b/src/shims/target.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2018 Apple Inc. All rights reserved.
+ *
+ * @APPLE_APACHE_LICENSE_HEADER_START@
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * @APPLE_APACHE_LICENSE_HEADER_END@
+ */
+
+/*
+ * IMPORTANT: This header file describes INTERNAL interfaces to libdispatch
+ * which are subject to change in future releases of Mac OS X. Any applications
+ * relying on these interfaces WILL break.
+ */
+
+// These are the portable dispatch version requirements macros, isolated from
+// the rest of the C internal headers to be suitable for inclusion in MIG defs,
+// asm, etc.
+
+#ifndef __DISPATCH_SHIMS_TARGET__
+#define __DISPATCH_SHIMS_TARGET__
+
+#ifdef __APPLE__
+#include <Availability.h>
+#include <TargetConditionals.h>
+
+#if TARGET_OS_OSX
+#  define DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(x) \
+		(__MAC_OS_X_VERSION_MIN_REQUIRED >= (x))
+#  if !DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(101200)
+#    error "OS X hosts older than OS X 10.12 aren't supported anymore"
+#  endif // !DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(101200)
+#elif TARGET_OS_SIMULATOR
+#  define DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(x) \
+		(IPHONE_SIMULATOR_HOST_MIN_VERSION_REQUIRED >= (x))
+#  if !DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(101200)
+#    error "Simulator hosts older than OS X 10.12 aren't supported anymore"
+#  endif // !DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(101200)
+#else
+#  define DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(x) 1
+#  if __IPHONE_OS_VERSION_MIN_REQUIRED < 90000
+#    error "iOS hosts older than iOS 9.0 aren't supported anymore"
+#  endif
+#endif
+
+#else // !__APPLE__
+#define DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(x) 0
+#endif // !__APPLE__
+
+#endif // __DISPATCH_SHIMS_TARGET__
diff --git a/src/shims/time.h b/src/shims/time.h
index 2e27605..8fae5a2 100644
--- a/src/shims/time.h
+++ b/src/shims/time.h
@@ -41,9 +41,10 @@
 #endif
 
 typedef enum {
+	DISPATCH_CLOCK_UPTIME,
+	DISPATCH_CLOCK_MONOTONIC,
 	DISPATCH_CLOCK_WALL,
-	DISPATCH_CLOCK_MACH,
-#define DISPATCH_CLOCK_COUNT  (DISPATCH_CLOCK_MACH + 1)
+#define DISPATCH_CLOCK_COUNT  (DISPATCH_CLOCK_WALL + 1)
 } dispatch_clock_t;
 
 void _dispatch_time_init(void);
@@ -107,13 +108,14 @@
 	dispatch_assume_zero(clock_gettime(CLOCK_REALTIME, &ts));
 	return _dispatch_timespec_to_nano(ts);
 #elif defined(_WIN32)
+	static const uint64_t kNTToUNIXBiasAdjustment = 11644473600 * NSEC_PER_SEC;
 	// FILETIME is 100-nanosecond intervals since January 1, 1601 (UTC).
 	FILETIME ft;
 	ULARGE_INTEGER li;
-	GetSystemTimeAsFileTime(&ft);
+	GetSystemTimePreciseAsFileTime(&ft);
 	li.LowPart = ft.dwLowDateTime;
 	li.HighPart = ft.dwHighDateTime;
-	return li.QuadPart * 100ull;
+	return li.QuadPart * 100ull - kNTToUNIXBiasAdjustment;
 #else
 	struct timeval tv;
 	dispatch_assert_zero(gettimeofday(&tv, NULL));
@@ -135,15 +137,37 @@
  */
 
 static inline uint64_t
-_dispatch_absolute_time(void)
+_dispatch_uptime(void)
 {
 #if HAVE_MACH_ABSOLUTE_TIME
 	return mach_absolute_time();
+#elif HAVE_DECL_CLOCK_MONOTONIC && defined(__linux__)
+	struct timespec ts;
+	dispatch_assume_zero(clock_gettime(CLOCK_MONOTONIC, &ts));
+	return _dispatch_timespec_to_nano(ts);
 #elif HAVE_DECL_CLOCK_UPTIME && !defined(__linux__)
 	struct timespec ts;
 	dispatch_assume_zero(clock_gettime(CLOCK_UPTIME, &ts));
 	return _dispatch_timespec_to_nano(ts);
-#elif HAVE_DECL_CLOCK_MONOTONIC && defined(__linux__)
+#elif defined(_WIN32)
+	ULONGLONG ullUnbiasedTime;
+	QueryUnbiasedInterruptTime(&ullUnbiasedTime);
+	return ullUnbiasedTime * 100;
+#else
+#error platform needs to implement _dispatch_uptime()
+#endif
+}
+
+static inline uint64_t
+_dispatch_monotonic_time(void)
+{
+#if HAVE_MACH_ABSOLUTE_TIME
+	return mach_continuous_time();
+#elif defined(__linux__)
+	struct timespec ts;
+	dispatch_assume_zero(clock_gettime(CLOCK_BOOTTIME, &ts));
+	return _dispatch_timespec_to_nano(ts);
+#elif HAVE_DECL_CLOCK_MONOTONIC
 	struct timespec ts;
 	dispatch_assume_zero(clock_gettime(CLOCK_MONOTONIC, &ts));
 	return _dispatch_timespec_to_nano(ts);
@@ -154,7 +178,7 @@
 
 	return ullTime * 100ull;
 #else
-#error platform needs to implement _dispatch_absolute_time()
+#error platform needs to implement _dispatch_monotonic_time()
 #endif
 }
 
@@ -164,16 +188,16 @@
 {
 #if HAVE_MACH_APPROXIMATE_TIME
 	return mach_approximate_time();
-#elif HAVE_DECL_CLOCK_UPTIME_FAST && !defined(__linux__)
-	struct timespec ts;
-	dispatch_assume_zero(clock_gettime(CLOCK_UPTIME_FAST, &ts));
-	return _dispatch_timespec_to_nano(ts);
 #elif HAVE_DECL_CLOCK_MONOTONIC_COARSE && defined(__linux__)
 	struct timespec ts;
 	dispatch_assume_zero(clock_gettime(CLOCK_MONOTONIC_COARSE, &ts));
 	return _dispatch_timespec_to_nano(ts);
+#elif HAVE_DECL_CLOCK_UPTIME_FAST && !defined(__linux__)
+	struct timespec ts;
+	dispatch_assume_zero(clock_gettime(CLOCK_UPTIME_FAST, &ts));
+	return _dispatch_timespec_to_nano(ts);
 #else
-	return _dispatch_absolute_time();
+	return _dispatch_uptime();
 #endif
 }
 
@@ -182,8 +206,10 @@
 _dispatch_time_now(dispatch_clock_t clock)
 {
 	switch (clock) {
-	case DISPATCH_CLOCK_MACH:
-		return _dispatch_absolute_time();
+	case DISPATCH_CLOCK_UPTIME:
+		return _dispatch_uptime();
+	case DISPATCH_CLOCK_MONOTONIC:
+		return _dispatch_monotonic_time();
 	case DISPATCH_CLOCK_WALL:
 		return _dispatch_get_nanoseconds();
 	}
@@ -202,7 +228,75 @@
 	if (likely(cache->nows[clock])) {
 		return cache->nows[clock];
 	}
-	return cache->nows[clock] = _dispatch_time_now(clock);
+#if TARGET_OS_MAC
+	struct timespec ts;
+	mach_get_times(&cache->nows[DISPATCH_CLOCK_UPTIME],
+			&cache->nows[DISPATCH_CLOCK_MONOTONIC], &ts);
+	cache->nows[DISPATCH_CLOCK_WALL] = _dispatch_timespec_to_nano(ts);
+#else
+	cache->nows[clock] = _dispatch_time_now(clock);
+#endif
+	return cache->nows[clock];
 }
 
+// Encoding of dispatch_time_t:
+// 1. Wall time has the top two bits set; negate to get the actual value.
+// 2. Absolute time has the top two bits clear and is the actual value.
+// 3. Continuous time has bit 63 set and bit 62 clear. Clear bit 63 to get the
+// actual value.
+// 4. "Forever" and "now" are encoded as ~0ULL and 0ULL respectively.
+//
+// The consequence of all this is that we can't have an actual time value that
+// is >= 0x4000000000000000. Larger values always get silently converted to
+// DISPATCH_TIME_FOREVER because the APIs that return time values have no way to
+// indicate a range error.
+#define DISPATCH_UP_OR_MONOTONIC_TIME_MASK	(1ULL << 63)
+#define DISPATCH_WALLTIME_MASK	(1ULL << 62)
+#define DISPATCH_TIME_MAX_VALUE (DISPATCH_WALLTIME_MASK - 1)
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_time_to_clock_and_value(dispatch_time_t time,
+		dispatch_clock_t *clock, uint64_t *value)
+{
+	uint64_t actual_value;
+	if ((int64_t)time < 0) {
+		// Wall time or mach continuous time
+		if (time & DISPATCH_WALLTIME_MASK) {
+			// Wall time (value 11 in bits 63, 62)
+			*clock = DISPATCH_CLOCK_WALL;
+			actual_value = time == DISPATCH_WALLTIME_NOW ?
+					_dispatch_get_nanoseconds() : (uint64_t)-time;
+		} else {
+			// Continuous time (value 10 in bits 63, 62).
+			*clock = DISPATCH_CLOCK_MONOTONIC;
+			actual_value = time & ~DISPATCH_UP_OR_MONOTONIC_TIME_MASK;
+		}
+	} else {
+		*clock = DISPATCH_CLOCK_UPTIME;
+		actual_value = time;
+	}
+
+	// Range-check the value before returning.
+	*value = actual_value > DISPATCH_TIME_MAX_VALUE ? DISPATCH_TIME_FOREVER
+			: actual_value;
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline dispatch_time_t
+_dispatch_clock_and_value_to_time(dispatch_clock_t clock, uint64_t value)
+{
+	if (value >= DISPATCH_TIME_MAX_VALUE) {
+		return DISPATCH_TIME_FOREVER;
+	}
+	switch (clock) {
+	case DISPATCH_CLOCK_WALL:
+		return -value;
+	case DISPATCH_CLOCK_UPTIME:
+		return value;
+	case DISPATCH_CLOCK_MONOTONIC:
+		return value | DISPATCH_UP_OR_MONOTONIC_TIME_MASK;
+	}
+	__builtin_unreachable();
+}
 #endif // __DISPATCH_SHIMS_TIME__
diff --git a/src/shims/tsd.h b/src/shims/tsd.h
index 9f94eae..446c4d7 100644
--- a/src/shims/tsd.h
+++ b/src/shims/tsd.h
@@ -58,6 +58,12 @@
 #endif // _os_tsd_get_base
 #endif
 
+#if defined(_WIN32)
+#define DISPATCH_TSD_DTOR_CC __stdcall
+#else
+#define DISPATCH_TSD_DTOR_CC
+#endif
+
 #if DISPATCH_USE_DIRECT_TSD
 #ifndef __TSD_THREAD_QOS_CLASS
 #define __TSD_THREAD_QOS_CLASS 4
@@ -101,18 +107,13 @@
 #elif DISPATCH_USE_THREAD_LOCAL_STORAGE
 
 #if defined(_WIN32)
-#define DISPATCH_TSD_DTOR_CC __stdcall
-#else
-#define DISPATCH_TSD_DTOR_CC
-#endif
-
-#if defined(_WIN32)
 
 DISPATCH_TSD_INLINE
 static inline void
 _dispatch_thread_key_create(DWORD *k, void (DISPATCH_TSD_DTOR_CC *d)(void *))
 {
-	dispatch_assert_zero((*k = FlsAlloc(d)));
+	*k = FlsAlloc(d);
+	dispatch_assert(*k != FLS_OUT_OF_INDEXES);
 }
 
 extern DWORD __dispatch_tsd_key;
@@ -226,7 +227,7 @@
 	if (_pthread_has_direct_tsd()) {
 		(void)_pthread_setspecific_direct(k, v);
 	} else {
-#if TARGET_IPHONE_SIMULATOR
+#if TARGET_OS_SIMULATOR
 		(void)_pthread_setspecific_static(k, v); // rdar://26058142
 #else
 		__builtin_trap(); // unreachable
diff --git a/src/shims/yield.c b/src/shims/yield.c
new file mode 100644
index 0000000..43f0017
--- /dev/null
+++ b/src/shims/yield.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2018 Apple Inc. All rights reserved.
+ *
+ * @APPLE_APACHE_LICENSE_HEADER_START@
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * @APPLE_APACHE_LICENSE_HEADER_END@
+ */
+
+#include "internal.h"
+
+DISPATCH_NOINLINE
+static void *
+__DISPATCH_WAIT_FOR_ENQUEUER__(void **ptr)
+{
+	int spins = 0;
+	void *value;
+	while ((value = os_atomic_load(ptr, relaxed)) == NULL) {
+		_dispatch_preemption_yield(++spins);
+	}
+	return value;
+}
+
+void *
+_dispatch_wait_for_enqueuer(void **ptr)
+{
+#if !DISPATCH_HW_CONFIG_UP
+#if defined(__arm__) || defined(__arm64__)
+	int spins = DISPATCH_WAIT_SPINS_WFE;
+	void *value;
+	while (unlikely(spins-- > 0)) {
+		if (likely(value = __builtin_arm_ldrex(ptr))) {
+			__builtin_arm_clrex();
+			return value;
+		}
+		__builtin_arm_wfe();
+	}
+#else
+	int spins = DISPATCH_WAIT_SPINS;
+	void *value;
+	while (unlikely(spins-- > 0)) {
+		if (likely(value = os_atomic_load(ptr, relaxed))) {
+			return value;
+		}
+		dispatch_hardware_pause();
+	}
+#endif
+#endif // DISPATCH_HW_CONFIG_UP
+	return __DISPATCH_WAIT_FOR_ENQUEUER__(ptr);
+}
diff --git a/src/shims/yield.h b/src/shims/yield.h
index 2373e50..53eb800 100644
--- a/src/shims/yield.h
+++ b/src/shims/yield.h
@@ -30,6 +30,24 @@
 #pragma mark -
 #pragma mark _dispatch_wait_until
 
+// _dispatch_wait_until() is used for cases when we're waiting on a thread to
+// finish a critical section that is a few instructions long and cannot fail
+// (IOW has a guarantee of making forward progress).
+//
+// Using _dispatch_wait_until() has two implications:
+// - there's a single waiter for the specified condition,
+// - the thing it is waiting on has a strong guarantee of forward progress
+//   toward resolving the condition.
+//
+// For these reasons, we spin shortly for the likely case when the other thread
+// is on core and we just caught it in the inconsistency window. If the
+// condition we're waiting for doesn't resolve quickly, then we yield because
+// it's very likely the other thread that can unblock us is preempted, and we
+// need to wait for it to be scheduled again.
+//
+// Its typical client is the enqueuer/dequeuer starvation issue for the dispatch
+// enqueue algorithm where there is typically a 1-10 instruction gap between the
+// exchange at the tail and setting the head/prev pointer.
 #if DISPATCH_HW_CONFIG_UP
 #define _dispatch_wait_until(c) ({ \
 		__typeof__(c) _c; \
@@ -40,9 +58,11 @@
 			_dispatch_preemption_yield(_spins); \
 		} \
 		_c; })
-#elif TARGET_OS_EMBEDDED
-// <rdar://problem/15440575>
-#ifndef DISPATCH_WAIT_SPINS
+#else
+#ifndef DISPATCH_WAIT_SPINS_WFE
+#define DISPATCH_WAIT_SPINS_WFE 10
+#endif
+#ifndef DISPATCH_WAIT_SPINS // <rdar://problem/15440575>
 #define DISPATCH_WAIT_SPINS 1024
 #endif
 #define _dispatch_wait_until(c) ({ \
@@ -50,23 +70,18 @@
 		int _spins = -(DISPATCH_WAIT_SPINS); \
 		for (;;) { \
 			if (likely(_c = (c))) break; \
-			if (slowpath(_spins++ >= 0)) { \
+			if (unlikely(_spins++ >= 0)) { \
 				_dispatch_preemption_yield(_spins); \
 			} else { \
 				dispatch_hardware_pause(); \
 			} \
 		} \
 		_c; })
-#else
-#define _dispatch_wait_until(c) ({ \
-		__typeof__(c) _c; \
-		for (;;) { \
-			if (likely(_c = (c))) break; \
-			dispatch_hardware_pause(); \
-		} \
-		_c; })
 #endif
 
+DISPATCH_NOT_TAIL_CALLED DISPATCH_EXPORT
+void *_dispatch_wait_for_enqueuer(void **ptr);
+
 #pragma mark -
 #pragma mark _dispatch_contention_wait_until
 
@@ -79,22 +94,25 @@
 #ifndef DISPATCH_CONTENTION_SPINS_MIN
 #define DISPATCH_CONTENTION_SPINS_MIN (32 - 1)
 #endif
-#if TARGET_OS_EMBEDDED
+#if TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR
 #define _dispatch_contention_spins() \
 		((DISPATCH_CONTENTION_SPINS_MIN) + ((DISPATCH_CONTENTION_SPINS_MAX) - \
 		(DISPATCH_CONTENTION_SPINS_MIN)) / 2)
 #elif defined(_WIN32)
-#define _dispatch_contention_spins() ({                                        \
-		unsigned int _value;                                           \
-		rand_s(&_value);                                               \
-		(_value & DISPATCH_CONTENTION_SPINS_MAX) | DISPATCH_CONTENTION_SPINS_MIN; })
+// Use randomness to prevent threads from resonating at the same frequency and
+// permanently contending. Windows doesn't provide rand_r(), so use a simple
+// LCG. (msvcrt has rand_s(), but its security guarantees aren't optimal here.)
+#define _dispatch_contention_spins() ({ \
+		static os_atomic(unsigned int) _seed = 1; \
+		unsigned int _next = os_atomic_load(&_seed, relaxed); \
+		os_atomic_store(&_seed, _next * 1103515245 + 12345, relaxed); \
+		((_next >> 24) & (DISPATCH_CONTENTION_SPINS_MAX)) | \
+				(DISPATCH_CONTENTION_SPINS_MIN); })
 #else
 // Use randomness to prevent threads from resonating at the same
-// frequency and permanently contending. All threads sharing the same
-// seed value is safe with the FreeBSD rand_r implementation.
+// frequency and permanently contending.
 #define _dispatch_contention_spins() ({ \
-		static unsigned int _seed; \
-		((unsigned int)rand_r(&_seed) & (DISPATCH_CONTENTION_SPINS_MAX)) | \
+		((unsigned int)rand() & (DISPATCH_CONTENTION_SPINS_MAX)) | \
 				(DISPATCH_CONTENTION_SPINS_MIN); })
 #endif
 #define _dispatch_contention_wait_until(c) ({ \
@@ -102,7 +120,7 @@
 		unsigned int _spins = _dispatch_contention_spins(); \
 		while (_spins--) { \
 			dispatch_hardware_pause(); \
-			if ((_out = fastpath(c))) break; \
+			if (likely(_out = (c))) break; \
 		}; _out; })
 #endif
 
@@ -132,17 +150,27 @@
 		DISPATCH_YIELD_THREAD_SWITCH_OPTION, (mach_msg_timeout_t)(n))
 #define _dispatch_preemption_yield_to(th, n) thread_switch(th, \
 		DISPATCH_YIELD_THREAD_SWITCH_OPTION, (mach_msg_timeout_t)(n))
+#elif HAVE_PTHREAD_YIELD_NP
+#define _dispatch_preemption_yield(n) { (void)n; pthread_yield_np(); }
+#define _dispatch_preemption_yield_to(th, n) { (void)n; pthread_yield_np(); }
+#elif defined(_WIN32)
+#define _dispatch_preemption_yield(n) { (void)n; Sleep(0); }
+#define _dispatch_preemption_yield_to(th, n) { (void)n; Sleep(0); }
 #else
-#define _dispatch_preemption_yield(n) pthread_yield_np()
-#define _dispatch_preemption_yield_to(th, n) pthread_yield_np()
+#define _dispatch_preemption_yield(n) { (void)n; sched_yield(); }
+#define _dispatch_preemption_yield_to(th, n) { (void)n; sched_yield(); }
 #endif // HAVE_MACH
 
 #pragma mark -
 #pragma mark _dispatch_contention_usleep
 
 #ifndef DISPATCH_CONTENTION_USLEEP_START
+#if defined(_WIN32)
+#define DISPATCH_CONTENTION_USLEEP_START 1000   // Must be >= 1ms for Sleep()
+#else
 #define DISPATCH_CONTENTION_USLEEP_START 500
 #endif
+#endif
 #ifndef DISPATCH_CONTENTION_USLEEP_MAX
 #define DISPATCH_CONTENTION_USLEEP_MAX 100000
 #endif
@@ -157,20 +185,7 @@
 #endif
 #else
 #if defined(_WIN32)
-DISPATCH_INLINE void
-_dispatch_contention_usleep(uint64_t useconds) {
-	static BOOL bQPFExecuted = FALSE;
-	static LARGE_INTEGER liFreq;
-	LARGE_INTEGER liStart, liNow;
-
-	if (!bQPFExecuted)
-		bQPFExecuted = QueryPerformanceFrequency(&liFreq);
-
-	QueryPerformanceCounter(&liStart);
-	do {
-		QueryPerformanceCounter(&liNow);
-	} while ((liNow.QuadPart - liStart.QuadPart) / (float)liFreq.QuadPart * 1000 * 1000 < useconds);
-}
+#define _dispatch_contention_usleep(u) Sleep((u) / 1000)
 #else
 #define _dispatch_contention_usleep(u) usleep((u))
 #endif
diff --git a/src/source.c b/src/source.c
index ff3ec70..1010da1 100644
--- a/src/source.c
+++ b/src/source.c
@@ -20,21 +20,24 @@
 
 #include "internal.h"
 
-static void _dispatch_source_handler_free(dispatch_source_t ds, long kind);
-static void _dispatch_source_set_interval(dispatch_source_t ds, uint64_t interval);
-
-#define DISPATCH_TIMERS_UNREGISTER 0x1
-#define DISPATCH_TIMERS_RETAIN_2 0x2
-static void _dispatch_timers_update(dispatch_unote_t du, uint32_t flags);
-static void _dispatch_timers_unregister(dispatch_timer_source_refs_t dt);
-
-static void _dispatch_source_timer_configure(dispatch_source_t ds);
-static inline unsigned long _dispatch_source_timer_data(
-		dispatch_source_t ds, dispatch_unote_t du);
+static void _dispatch_source_handler_free(dispatch_source_refs_t ds, long kind);
 
 #pragma mark -
 #pragma mark dispatch_source_t
 
+DISPATCH_ALWAYS_INLINE
+static inline dispatch_continuation_t
+_dispatch_source_get_handler(dispatch_source_refs_t dr, long kind)
+{
+	return os_atomic_load(&dr->ds_handler[kind], relaxed);
+}
+#define _dispatch_source_get_event_handler(dr) \
+		_dispatch_source_get_handler(dr, DS_EVENT_HANDLER)
+#define _dispatch_source_get_cancel_handler(dr) \
+		_dispatch_source_get_handler(dr, DS_CANCEL_HANDLER)
+#define _dispatch_source_get_registration_handler(dr) \
+		_dispatch_source_get_handler(dr, DS_REGISTN_HANDLER)
+
 dispatch_source_t
 dispatch_source_create(dispatch_source_type_t dst, uintptr_t handle,
 		uintptr_t mask, dispatch_queue_t dq)
@@ -47,24 +50,21 @@
 		return DISPATCH_BAD_INPUT;
 	}
 
-	ds = _dispatch_object_alloc(DISPATCH_VTABLE(source),
-			sizeof(struct dispatch_source_s));
-	// Initialize as a queue first, then override some settings below.
-	_dispatch_queue_init(ds->_as_dq, DQF_LEGACY, 1,
-			DISPATCH_QUEUE_INACTIVE | DISPATCH_QUEUE_ROLE_INNER);
+	ds = _dispatch_queue_alloc(source,
+			dux_type(dr)->dst_strict ? DSF_STRICT : DQF_MUTABLE, 1,
+			DISPATCH_QUEUE_INACTIVE | DISPATCH_QUEUE_ROLE_INNER)._ds;
 	ds->dq_label = "source";
-	ds->do_ref_cnt++; // the reference the manager queue holds
 	ds->ds_refs = dr;
 	dr->du_owner_wref = _dispatch_ptr2wref(ds);
 
-	if (slowpath(!dq)) {
-		dq = _dispatch_get_root_queue(DISPATCH_QOS_DEFAULT, true);
+	if (unlikely(!dq)) {
+		dq = _dispatch_get_default_queue(true);
 	} else {
 		_dispatch_retain((dispatch_queue_t _Nonnull)dq);
 	}
 	ds->do_targetq = dq;
-	if (dr->du_is_timer && (dr->du_fflags & DISPATCH_TIMER_INTERVAL)) {
-		_dispatch_source_set_interval(ds, handle);
+	if (dr->du_is_timer && (dr->du_timer_flags & DISPATCH_TIMER_INTERVAL)) {
+		dispatch_source_set_timer(ds, DISPATCH_TIME_NOW, handle, UINT64_MAX);
 	}
 	_dispatch_object_debug(ds, "%s", __func__);
 	return ds;
@@ -74,19 +74,22 @@
 _dispatch_source_dispose(dispatch_source_t ds, bool *allow_free)
 {
 	_dispatch_object_debug(ds, "%s", __func__);
-	_dispatch_source_handler_free(ds, DS_REGISTN_HANDLER);
-	_dispatch_source_handler_free(ds, DS_EVENT_HANDLER);
-	_dispatch_source_handler_free(ds, DS_CANCEL_HANDLER);
+
+	_dispatch_trace_source_dispose(ds);
+	_dispatch_source_handler_free(ds->ds_refs, DS_REGISTN_HANDLER);
+	_dispatch_source_handler_free(ds->ds_refs, DS_EVENT_HANDLER);
+	_dispatch_source_handler_free(ds->ds_refs, DS_CANCEL_HANDLER);
 	_dispatch_unote_dispose(ds->ds_refs);
 	ds->ds_refs = NULL;
-	_dispatch_queue_destroy(ds->_as_dq, allow_free);
+	_dispatch_lane_class_dispose(ds, allow_free);
 }
 
 void
 _dispatch_source_xref_dispose(dispatch_source_t ds)
 {
-	dispatch_queue_flags_t dqf = _dispatch_queue_atomic_flags(ds->_as_dq);
-	if (unlikely(!(dqf & (DQF_LEGACY|DSF_CANCELED)))) {
+	dispatch_queue_flags_t dqf = _dispatch_queue_atomic_flags(ds);
+	if (unlikely((dqf & DSF_STRICT) && !(dqf & DSF_CANCELED) &&
+			_dispatch_source_get_cancel_handler(ds->ds_refs))) {
 		DISPATCH_CLIENT_CRASH(ds, "Release of a source that has not been "
 				"cancelled, but has a mandatory cancel handler");
 	}
@@ -110,12 +113,15 @@
 	if (dr->du_vmpressure_override) {
 		return NOTE_VM_PRESSURE;
 	}
-#if TARGET_IPHONE_SIMULATOR
+#if TARGET_OS_SIMULATOR
 	if (dr->du_memorypressure_override) {
 		return NOTE_MEMORYSTATUS_PRESSURE_WARN;
 	}
 #endif
 #endif // DISPATCH_USE_MEMORYSTATUS
+	if (dr->du_is_timer) {
+		return dr->du_timer_flags;
+	}
 	return dr->du_fflags;
 }
 
@@ -123,45 +129,51 @@
 dispatch_source_get_handle(dispatch_source_t ds)
 {
 	dispatch_source_refs_t dr = ds->ds_refs;
-#if TARGET_IPHONE_SIMULATOR
+#if TARGET_OS_SIMULATOR
 	if (dr->du_memorypressure_override) {
 		return 0;
 	}
 #endif
+	if (dr->du_filter == DISPATCH_EVFILT_TIMER_WITH_CLOCK) {
+		switch (_dispatch_timer_flags_to_clock(dr->du_timer_flags)) {
+		case DISPATCH_CLOCK_UPTIME: return DISPATCH_CLOCKID_UPTIME;
+		case DISPATCH_CLOCK_MONOTONIC: return DISPATCH_CLOCKID_MONOTONIC;
+		case DISPATCH_CLOCK_WALL: return DISPATCH_CLOCKID_WALLTIME;
+		}
+	}
 	return dr->du_ident;
 }
 
 uintptr_t
 dispatch_source_get_data(dispatch_source_t ds)
 {
-#if DISPATCH_USE_MEMORYSTATUS
 	dispatch_source_refs_t dr = ds->ds_refs;
+#if DISPATCH_USE_MEMORYSTATUS
 	if (dr->du_vmpressure_override) {
 		return NOTE_VM_PRESSURE;
 	}
-#if TARGET_IPHONE_SIMULATOR
+#if TARGET_OS_SIMULATOR
 	if (dr->du_memorypressure_override) {
 		return NOTE_MEMORYSTATUS_PRESSURE_WARN;
 	}
 #endif
 #endif // DISPATCH_USE_MEMORYSTATUS
-	uint64_t value = os_atomic_load2o(ds, ds_data, relaxed);
-	return (uintptr_t)(
-		ds->ds_refs->du_data_action == DISPATCH_UNOTE_ACTION_DATA_OR_STATUS_SET
-		? DISPATCH_SOURCE_GET_DATA(value) : value);
+	uint64_t value = os_atomic_load2o(dr, ds_data, relaxed);
+	return (unsigned long)(dr->du_has_extended_status ?
+			DISPATCH_SOURCE_GET_DATA(value) : value);
 }
 
 size_t
 dispatch_source_get_extended_data(dispatch_source_t ds,
 		dispatch_source_extended_data_t edata, size_t size)
 {
+	dispatch_source_refs_t dr = ds->ds_refs;
 	size_t target_size = MIN(size,
 		sizeof(struct dispatch_source_extended_data_s));
 	if (size > 0) {
 		unsigned long data, status = 0;
-		if (ds->ds_refs->du_data_action
-				== DISPATCH_UNOTE_ACTION_DATA_OR_STATUS_SET) {
-			uint64_t combined = os_atomic_load(&ds->ds_data, relaxed);
+		if (dr->du_has_extended_status) {
+			uint64_t combined = os_atomic_load(&dr->ds_data, relaxed);
 			data = DISPATCH_SOURCE_GET_DATA(combined);
 			status = DISPATCH_SOURCE_GET_STATUS(combined);
 		} else {
@@ -184,39 +196,31 @@
 	return target_size;
 }
 
-DISPATCH_NOINLINE
-void
-_dispatch_source_merge_data(dispatch_source_t ds, pthread_priority_t pp,
-		uintptr_t val)
-{
-	dispatch_queue_flags_t dqf = _dispatch_queue_atomic_flags(ds->_as_dq);
-	int filter = ds->ds_refs->du_filter;
-
-	if (unlikely(dqf & (DSF_CANCELED | DSF_DELETED))) {
-		return;
-	}
-
-	switch (filter) {
-	case DISPATCH_EVFILT_CUSTOM_ADD:
-		os_atomic_add2o(ds, ds_pending_data, val, relaxed);
-		break;
-	case DISPATCH_EVFILT_CUSTOM_OR:
-		os_atomic_or2o(ds, ds_pending_data, val, relaxed);
-		break;
-	case DISPATCH_EVFILT_CUSTOM_REPLACE:
-		os_atomic_store2o(ds, ds_pending_data, val, relaxed);
-		break;
-	default:
-		DISPATCH_CLIENT_CRASH(filter, "Invalid source type");
-	}
-
-	dx_wakeup(ds, _dispatch_qos_from_pp(pp), DISPATCH_WAKEUP_MAKE_DIRTY);
-}
-
 void
 dispatch_source_merge_data(dispatch_source_t ds, uintptr_t val)
 {
-	_dispatch_source_merge_data(ds, 0, val);
+	dispatch_queue_flags_t dqf = _dispatch_queue_atomic_flags(ds);
+	dispatch_source_refs_t dr = ds->ds_refs;
+
+	if (unlikely(dqf & (DSF_CANCELED | DQF_RELEASED))) {
+		return;
+	}
+
+	switch (dr->du_filter) {
+	case DISPATCH_EVFILT_CUSTOM_ADD:
+		os_atomic_add2o(dr, ds_pending_data, val, relaxed);
+		break;
+	case DISPATCH_EVFILT_CUSTOM_OR:
+		os_atomic_or2o(dr, ds_pending_data, val, relaxed);
+		break;
+	case DISPATCH_EVFILT_CUSTOM_REPLACE:
+		os_atomic_store2o(dr, ds_pending_data, val, relaxed);
+		break;
+	default:
+		DISPATCH_CLIENT_CRASH(dr->du_filter, "Invalid source type");
+	}
+
+	dx_wakeup(ds, 0, DISPATCH_WAKEUP_MAKE_DIRTY);
 }
 
 #pragma mark -
@@ -224,21 +228,8 @@
 
 DISPATCH_ALWAYS_INLINE
 static inline dispatch_continuation_t
-_dispatch_source_get_handler(dispatch_source_refs_t dr, long kind)
-{
-	return os_atomic_load(&dr->ds_handler[kind], relaxed);
-}
-#define _dispatch_source_get_event_handler(dr) \
-	_dispatch_source_get_handler(dr, DS_EVENT_HANDLER)
-#define _dispatch_source_get_cancel_handler(dr) \
-	_dispatch_source_get_handler(dr, DS_CANCEL_HANDLER)
-#define _dispatch_source_get_registration_handler(dr) \
-	_dispatch_source_get_handler(dr, DS_REGISTN_HANDLER)
-
-DISPATCH_ALWAYS_INLINE
-static inline dispatch_continuation_t
-_dispatch_source_handler_alloc(dispatch_source_t ds, void *func, long kind,
-		bool block)
+_dispatch_source_handler_alloc(dispatch_source_t ds, void *func, uintptr_t kind,
+		bool is_block)
 {
 	// sources don't propagate priority by default
 	const dispatch_block_flags_t flags =
@@ -248,20 +239,19 @@
 		uintptr_t dc_flags = 0;
 
 		if (kind != DS_EVENT_HANDLER) {
-			dc_flags |= DISPATCH_OBJ_CONSUME_BIT;
+			dc_flags |= DC_FLAG_CONSUME;
 		}
-		if (block) {
+		if (is_block) {
 #ifdef __BLOCKS__
-			_dispatch_continuation_init(dc, ds, func, 0, flags, dc_flags);
+			_dispatch_continuation_init(dc, ds, func, flags, dc_flags);
 #endif /* __BLOCKS__ */
 		} else {
-			dc_flags |= DISPATCH_OBJ_CTXT_FETCH_BIT;
-			_dispatch_continuation_init_f(dc, ds, ds->do_ctxt, func,
-					0, flags, dc_flags);
+			dc_flags |= DC_FLAG_FETCH_CONTEXT;
+			_dispatch_continuation_init_f(dc, ds, ds->do_ctxt, func, flags,
+					dc_flags);
 		}
-		_dispatch_trace_continuation_push(ds->_as_dq, dc);
 	} else {
-		dc->dc_flags = 0;
+		dc->dc_flags = DC_FLAG_ALLOCATED;
 		dc->dc_func = NULL;
 	}
 	return dc;
@@ -272,7 +262,7 @@
 _dispatch_source_handler_dispose(dispatch_continuation_t dc)
 {
 #ifdef __BLOCKS__
-	if (dc->dc_flags & DISPATCH_OBJ_BLOCK_BIT) {
+	if (dc->dc_flags & DC_FLAG_BLOCK) {
 		Block_release(dc->dc_ctxt);
 	}
 #endif /* __BLOCKS__ */
@@ -285,16 +275,16 @@
 
 DISPATCH_ALWAYS_INLINE
 static inline dispatch_continuation_t
-_dispatch_source_handler_take(dispatch_source_t ds, long kind)
+_dispatch_source_handler_take(dispatch_source_refs_t dr, long kind)
 {
-	return os_atomic_xchg(&ds->ds_refs->ds_handler[kind], NULL, relaxed);
+	return os_atomic_xchg(&dr->ds_handler[kind], NULL, relaxed);
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_source_handler_free(dispatch_source_t ds, long kind)
+_dispatch_source_handler_free(dispatch_source_refs_t dr, long kind)
 {
-	dispatch_continuation_t dc = _dispatch_source_handler_take(ds, kind);
+	dispatch_continuation_t dc = _dispatch_source_handler_take(dr, kind);
 	if (dc) _dispatch_source_handler_dispose(dc);
 }
 
@@ -306,7 +296,7 @@
 	if (!dc->dc_func) {
 		_dispatch_continuation_free(dc);
 		dc = NULL;
-	} else if (dc->dc_flags & DISPATCH_OBJ_CTXT_FETCH_BIT) {
+	} else if (dc->dc_flags & DC_FLAG_FETCH_CONTEXT) {
 		dc->dc_ctxt = ds->do_ctxt;
 	}
 	dc = os_atomic_xchg(&ds->ds_refs->ds_handler[kind], dc, release);
@@ -317,37 +307,48 @@
 static void
 _dispatch_source_set_handler_slow(void *context)
 {
-	dispatch_source_t ds = (dispatch_source_t)_dispatch_queue_get_current();
+	dispatch_source_t ds = upcast(_dispatch_queue_get_current())._ds;
 	dispatch_assert(dx_type(ds) == DISPATCH_SOURCE_KEVENT_TYPE);
 
 	dispatch_continuation_t dc = context;
-	void *kind = dc->dc_data;
+	uintptr_t kind = (uintptr_t)dc->dc_data;
 	dc->dc_data = NULL;
-	_dispatch_source_handler_replace(ds, (uintptr_t)kind, dc);
+	_dispatch_source_handler_replace(ds, kind, dc);
 }
 
 DISPATCH_NOINLINE
 static void
-_dispatch_source_set_handler(dispatch_source_t ds, uintptr_t kind,
-		dispatch_continuation_t dc)
+_dispatch_source_set_handler(dispatch_source_t ds, void *func,
+		uintptr_t kind, bool is_block)
 {
-	dispatch_assert(dx_type(ds) == DISPATCH_SOURCE_KEVENT_TYPE);
-	if (_dispatch_queue_try_inactive_suspend(ds->_as_dq)) {
+	dispatch_continuation_t dc;
+
+	dc = _dispatch_source_handler_alloc(ds, func, kind, is_block);
+
+	if (_dispatch_lane_try_inactive_suspend(ds)) {
 		_dispatch_source_handler_replace(ds, kind, dc);
-		return dx_vtable(ds)->do_resume(ds, false);
+		return _dispatch_lane_resume(ds, false);
 	}
-	if (unlikely(!_dispatch_queue_is_legacy(ds->_as_dq))) {
+
+	dispatch_queue_flags_t dqf = _dispatch_queue_atomic_flags(ds);
+	if (unlikely(dqf & DSF_STRICT)) {
 		DISPATCH_CLIENT_CRASH(kind, "Cannot change a handler of this source "
 				"after it has been activated");
 	}
-	_dispatch_ktrace1(DISPATCH_PERF_post_activate_mutation, ds);
-	if (kind == DS_REGISTN_HANDLER) {
-		_dispatch_bug_deprecated("Setting registration handler after "
-				"the source has been activated");
+	// Ignore handlers mutations past cancelation, it's harmless
+	if ((dqf & DSF_CANCELED) == 0) {
+		_dispatch_ktrace1(DISPATCH_PERF_post_activate_mutation, ds);
+		if (kind == DS_REGISTN_HANDLER) {
+			_dispatch_bug_deprecated("Setting registration handler after "
+					"the source has been activated");
+		} else if (func == NULL) {
+			_dispatch_bug_deprecated("Clearing handler after "
+					"the source has been activated");
+		}
 	}
 	dc->dc_data = (void *)kind;
-	_dispatch_barrier_trysync_or_async_f(ds->_as_dq, dc,
-			_dispatch_source_set_handler_slow);
+	_dispatch_barrier_trysync_or_async_f(ds, dc,
+			_dispatch_source_set_handler_slow, 0);
 }
 
 #ifdef __BLOCKS__
@@ -355,9 +356,7 @@
 dispatch_source_set_event_handler(dispatch_source_t ds,
 		dispatch_block_t handler)
 {
-	dispatch_continuation_t dc;
-	dc = _dispatch_source_handler_alloc(ds, handler, DS_EVENT_HANDLER, true);
-	_dispatch_source_set_handler(ds, DS_EVENT_HANDLER, dc);
+	_dispatch_source_set_handler(ds, handler, DS_EVENT_HANDLER, true);
 }
 #endif /* __BLOCKS__ */
 
@@ -365,69 +364,39 @@
 dispatch_source_set_event_handler_f(dispatch_source_t ds,
 		dispatch_function_t handler)
 {
-	dispatch_continuation_t dc;
-	dc = _dispatch_source_handler_alloc(ds, handler, DS_EVENT_HANDLER, false);
-	_dispatch_source_set_handler(ds, DS_EVENT_HANDLER, dc);
+	_dispatch_source_set_handler(ds, handler, DS_EVENT_HANDLER, false);
 }
 
 #ifdef __BLOCKS__
-DISPATCH_NOINLINE
-static void
-_dispatch_source_set_cancel_handler(dispatch_source_t ds,
-		dispatch_block_t handler)
-{
-	dispatch_continuation_t dc;
-	dc = _dispatch_source_handler_alloc(ds, handler, DS_CANCEL_HANDLER, true);
-	_dispatch_source_set_handler(ds, DS_CANCEL_HANDLER, dc);
-}
-
 void
 dispatch_source_set_cancel_handler(dispatch_source_t ds,
 		dispatch_block_t handler)
 {
-	if (unlikely(!_dispatch_queue_is_legacy(ds->_as_dq))) {
-		DISPATCH_CLIENT_CRASH(0, "Cannot set a non mandatory handler on "
-				"this source");
-	}
-	return _dispatch_source_set_cancel_handler(ds, handler);
+	_dispatch_source_set_handler(ds, handler, DS_CANCEL_HANDLER, true);
 }
 
 void
 dispatch_source_set_mandatory_cancel_handler(dispatch_source_t ds,
 		dispatch_block_t handler)
 {
-	_dispatch_queue_atomic_flags_clear(ds->_as_dq, DQF_LEGACY);
-	return _dispatch_source_set_cancel_handler(ds, handler);
+	_dispatch_queue_atomic_flags_set_and_clear(ds, DSF_STRICT, DQF_MUTABLE);
+	dispatch_source_set_cancel_handler(ds, handler);
 }
 #endif /* __BLOCKS__ */
 
-DISPATCH_NOINLINE
-static void
-_dispatch_source_set_cancel_handler_f(dispatch_source_t ds,
-		dispatch_function_t handler)
-{
-	dispatch_continuation_t dc;
-	dc = _dispatch_source_handler_alloc(ds, handler, DS_CANCEL_HANDLER, false);
-	_dispatch_source_set_handler(ds, DS_CANCEL_HANDLER, dc);
-}
-
 void
 dispatch_source_set_cancel_handler_f(dispatch_source_t ds,
 		dispatch_function_t handler)
 {
-	if (unlikely(!_dispatch_queue_is_legacy(ds->_as_dq))) {
-		DISPATCH_CLIENT_CRASH(0, "Cannot set a non mandatory handler on "
-				"this source");
-	}
-	return _dispatch_source_set_cancel_handler_f(ds, handler);
+	_dispatch_source_set_handler(ds, handler, DS_CANCEL_HANDLER, false);
 }
 
 void
 dispatch_source_set_mandatory_cancel_handler_f(dispatch_source_t ds,
 		dispatch_function_t handler)
 {
-	_dispatch_queue_atomic_flags_clear(ds->_as_dq, DQF_LEGACY);
-	return _dispatch_source_set_cancel_handler_f(ds, handler);
+	_dispatch_queue_atomic_flags_set_and_clear(ds, DSF_STRICT, DQF_MUTABLE);
+	dispatch_source_set_cancel_handler_f(ds, handler);
 }
 
 #ifdef __BLOCKS__
@@ -435,9 +404,7 @@
 dispatch_source_set_registration_handler(dispatch_source_t ds,
 		dispatch_block_t handler)
 {
-	dispatch_continuation_t dc;
-	dc = _dispatch_source_handler_alloc(ds, handler, DS_REGISTN_HANDLER, true);
-	_dispatch_source_set_handler(ds, DS_REGISTN_HANDLER, dc);
+	_dispatch_source_set_handler(ds, handler, DS_REGISTN_HANDLER, true);
 }
 #endif /* __BLOCKS__ */
 
@@ -445,28 +412,42 @@
 dispatch_source_set_registration_handler_f(dispatch_source_t ds,
 	dispatch_function_t handler)
 {
-	dispatch_continuation_t dc;
-	dc = _dispatch_source_handler_alloc(ds, handler, DS_REGISTN_HANDLER, false);
-	_dispatch_source_set_handler(ds, DS_REGISTN_HANDLER, dc);
+	_dispatch_source_set_handler(ds, handler, DS_REGISTN_HANDLER, false);
 }
 
 #pragma mark -
 #pragma mark dispatch_source_invoke
 
+#if TARGET_OS_MAC
+bool
+_dispatch_source_will_reenable_kevent_4NW(dispatch_source_t ds)
+{
+	uint64_t dq_state = os_atomic_load2o(ds, dq_state, relaxed);
+
+	if (unlikely(!_dq_state_drain_locked_by_self(dq_state))) {
+		DISPATCH_CLIENT_CRASH(0, "_dispatch_source_will_reenable_kevent_4NW "
+				"not called from within the event handler");
+	}
+	return _dispatch_unote_needs_rearm(ds->ds_refs);
+}
+#endif // TARGET_OS_MAC
+
 static void
 _dispatch_source_registration_callout(dispatch_source_t ds, dispatch_queue_t cq,
 		dispatch_invoke_flags_t flags)
 {
 	dispatch_continuation_t dc;
 
-	dc = _dispatch_source_handler_take(ds, DS_REGISTN_HANDLER);
+	dc = _dispatch_source_handler_take(ds->ds_refs, DS_REGISTN_HANDLER);
 	if (ds->dq_atomic_flags & (DSF_CANCELED | DQF_RELEASED)) {
 		// no registration callout if source is canceled rdar://problem/8955246
 		return _dispatch_source_handler_dispose(dc);
 	}
-	if (dc->dc_flags & DISPATCH_OBJ_CTXT_FETCH_BIT) {
+	if (dc->dc_flags & DC_FLAG_FETCH_CONTEXT) {
 		dc->dc_ctxt = ds->do_ctxt;
 	}
+
+	_dispatch_trace_source_callout_entry(ds, DS_REGISTN_HANDLER, cq, dc);
 	_dispatch_continuation_pop(dc, NULL, flags, cq);
 }
 
@@ -474,50 +455,134 @@
 _dispatch_source_cancel_callout(dispatch_source_t ds, dispatch_queue_t cq,
 		dispatch_invoke_flags_t flags)
 {
+	dispatch_source_refs_t dr = ds->ds_refs;
 	dispatch_continuation_t dc;
 
-	dc = _dispatch_source_handler_take(ds, DS_CANCEL_HANDLER);
-	ds->ds_pending_data = 0;
-	ds->ds_data = 0;
-	_dispatch_source_handler_free(ds, DS_EVENT_HANDLER);
-	_dispatch_source_handler_free(ds, DS_REGISTN_HANDLER);
+	dc = _dispatch_source_handler_take(dr, DS_CANCEL_HANDLER);
+	dr->ds_pending_data = 0;
+	dr->ds_data = 0;
+	_dispatch_source_handler_free(dr, DS_EVENT_HANDLER);
+	_dispatch_source_handler_free(dr, DS_REGISTN_HANDLER);
 	if (!dc) {
 		return;
 	}
 	if (!(ds->dq_atomic_flags & DSF_CANCELED)) {
 		return _dispatch_source_handler_dispose(dc);
 	}
-	if (dc->dc_flags & DISPATCH_OBJ_CTXT_FETCH_BIT) {
+	if (dc->dc_flags & DC_FLAG_FETCH_CONTEXT) {
 		dc->dc_ctxt = ds->do_ctxt;
 	}
+	_dispatch_trace_source_callout_entry(ds, DS_CANCEL_HANDLER, cq, dc);
 	_dispatch_continuation_pop(dc, NULL, flags, cq);
 }
 
+DISPATCH_ALWAYS_INLINE
+static inline bool
+_dispatch_source_refs_needs_configuration(dispatch_unote_t du)
+{
+	return du._du->du_is_timer &&
+			os_atomic_load2o(du._dt, dt_pending_config, relaxed);
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline bool
+_dispatch_source_refs_needs_rearm(dispatch_unote_t du)
+{
+	if (!du._du->du_is_timer) {
+		return _dispatch_unote_needs_rearm(du);
+	}
+	if (os_atomic_load2o(du._dt, dt_pending_config, relaxed)) {
+		return true;
+	}
+	if (_dispatch_unote_needs_rearm(du)) {
+		return du._dt->dt_timer.target < INT64_MAX;
+	}
+	return false;
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline unsigned long
+_dispatch_source_timer_data(dispatch_timer_source_refs_t dr, uint64_t prev)
+{
+	unsigned long data = (unsigned long)prev >> 1;
+
+	// The timer may be in _dispatch_source_invoke2() already for other
+	// reasons such as running the registration handler when ds_pending_data
+	// is changed by _dispatch_timers_run2() without holding the drain lock.
+	//
+	// We hence need dependency ordering to pair with the release barrier
+	// done by _dispatch_timers_run2() when setting the DISARMED_MARKER bit.
+	os_atomic_thread_fence(dependency);
+	dr = os_atomic_force_dependency_on(dr, data);
+
+	if (dr->dt_timer.target < INT64_MAX) {
+		uint64_t now = _dispatch_time_now(DISPATCH_TIMER_CLOCK(dr->du_ident));
+		if (now >= dr->dt_timer.target) {
+			data = _dispatch_timer_unote_compute_missed(dr, now, data);
+		}
+	}
+
+	return data;
+}
+
 static void
 _dispatch_source_latch_and_call(dispatch_source_t ds, dispatch_queue_t cq,
 		dispatch_invoke_flags_t flags)
 {
 	dispatch_source_refs_t dr = ds->ds_refs;
 	dispatch_continuation_t dc = _dispatch_source_get_handler(dr, DS_EVENT_HANDLER);
-	uint64_t prev;
+	uint64_t prev = os_atomic_xchg2o(dr, ds_pending_data, 0, relaxed);
 
-	if (dr->du_is_timer && !(dr->du_fflags & DISPATCH_TIMER_AFTER)) {
-		prev = _dispatch_source_timer_data(ds, dr);
-	} else {
-		prev = os_atomic_xchg2o(ds, ds_pending_data, 0, relaxed);
+	if (dr->du_is_timer && (dr->du_timer_flags & DISPATCH_TIMER_AFTER)) {
+		_dispatch_trace_item_pop(cq, dc); // see _dispatch_after
 	}
-	if (dr->du_data_action == DISPATCH_UNOTE_ACTION_DATA_SET) {
-		ds->ds_data = ~prev;
-	} else {
-		ds->ds_data = prev;
+	switch (dux_type(dr)->dst_action) {
+	case DISPATCH_UNOTE_ACTION_SOURCE_TIMER:
+		if (prev & DISPATCH_TIMER_DISARMED_MARKER) {
+			dr->ds_data = _dispatch_source_timer_data(ds->ds_timer_refs, prev);
+		} else {
+			dr->ds_data = prev >> 1;
+		}
+		break;
+	case DISPATCH_UNOTE_ACTION_SOURCE_SET_DATA:
+		dr->ds_data = ~prev;
+		break;
+	default:
+		if (prev == 0 && dr->du_filter == DISPATCH_EVFILT_CUSTOM_REPLACE) {
+			return;
+		}
+		dr->ds_data = prev;
+		break;
 	}
-	if (!dispatch_assume(prev != 0) || !dc) {
+	if (unlikely(!dc)) {
+		return _dispatch_ktrace1(DISPATCH_PERF_handlerless_source_fire, ds);
+	}
+	if (!dispatch_assume(prev != 0)) {
 		return;
 	}
+	_dispatch_trace_source_callout_entry(ds, DS_EVENT_HANDLER, cq, dc);
+#ifdef DBG_BSD_MEMSTAT
+	if (unlikely(dr->du_filter == EVFILT_MEMORYSTATUS)) {
+		_dispatch_ktrace2(KDBG_CODE(DBG_BSD, DBG_BSD_MEMSTAT, 0x100) | DBG_FUNC_START,
+				prev, _dispatch_continuation_get_function_symbol(dc));
+	}
+#endif
 	_dispatch_continuation_pop(dc, NULL, flags, cq);
-	if (dr->du_is_timer && (dr->du_fflags & DISPATCH_TIMER_AFTER)) {
-		_dispatch_source_handler_free(ds, DS_EVENT_HANDLER);
-		dispatch_release(ds); // dispatch_after sources are one-shot
+#ifdef DBG_BSD_MEMSTAT
+	if (unlikely(dr->du_filter == EVFILT_MEMORYSTATUS)) {
+		_dispatch_ktrace0(KDBG_CODE(DBG_BSD, DBG_BSD_MEMSTAT, 0x100) | DBG_FUNC_END);
+	}
+#endif
+	if (dr->du_is_timer) {
+		if ((prev & DISPATCH_TIMER_DISARMED_MARKER) &&
+				_dispatch_source_refs_needs_configuration(dr)) {
+			_dispatch_timer_unote_configure(ds->ds_timer_refs);
+		}
+		if (dr->du_timer_flags & DISPATCH_TIMER_AFTER) {
+			_dispatch_trace_item_complete(dc); // see _dispatch_after
+			_dispatch_source_handler_free(dr, DS_EVENT_HANDLER);
+			dispatch_release(ds); // dispatch_after sources are one-shot
+		}
 	}
 }
 
@@ -526,192 +591,138 @@
 _dispatch_source_refs_finalize_unregistration(dispatch_source_t ds)
 {
 	dispatch_queue_flags_t dqf;
-	dispatch_source_refs_t dr = ds->ds_refs;
-
-	dqf = _dispatch_queue_atomic_flags_set_and_clear_orig(ds->_as_dq,
-			DSF_DELETED, DSF_ARMED | DSF_DEFERRED_DELETE | DSF_CANCEL_WAITER);
+	dqf = _dispatch_queue_atomic_flags_set_and_clear_orig(ds,
+			DSF_DELETED, DSF_NEEDS_EVENT | DSF_CANCEL_WAITER);
+	if (dqf & DSF_DELETED) {
+		DISPATCH_INTERNAL_CRASH(dqf, "Source finalized twice");
+	}
 	if (dqf & DSF_CANCEL_WAITER) {
 		_dispatch_wake_by_address(&ds->dq_atomic_flags);
 	}
-	_dispatch_debug("kevent-source[%p]: disarmed kevent[%p]", ds, dr);
-	_dispatch_release_tailcall(ds); // the retain is done at creation time
-}
-
-void
-_dispatch_source_refs_unregister(dispatch_source_t ds, uint32_t options)
-{
 	_dispatch_object_debug(ds, "%s", __func__);
-	dispatch_queue_flags_t dqf = _dispatch_queue_atomic_flags(ds->_as_dq);
-	dispatch_source_refs_t dr = ds->ds_refs;
-
-	if (dr->du_is_timer) {
-		// Because of the optimization to unregister fired oneshot timers
-		// from the target queue, we can't trust _dispatch_unote_registered()
-		// to tell the truth, it may not have happened yet
-		if (dqf & DSF_ARMED) {
-			_dispatch_timers_unregister(ds->ds_timer_refs);
-			_dispatch_release_2(ds);
-		}
-		dr->du_ident = DISPATCH_TIMER_IDENT_CANCELED;
-	} else {
-		if (_dispatch_unote_needs_rearm(dr) && !(dqf & DSF_ARMED)) {
-			options |= DU_UNREGISTER_IMMEDIATE_DELETE;
-		}
-		if (!_dispatch_unote_unregister(dr, options)) {
-			_dispatch_debug("kevent-source[%p]: deferred delete kevent[%p]",
-					ds, dr);
-			_dispatch_queue_atomic_flags_set(ds->_as_dq, DSF_DEFERRED_DELETE);
-			return; // deferred unregistration
-		}
-	}
-
-	ds->ds_is_installed = true;
-	_dispatch_source_refs_finalize_unregistration(ds);
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline bool
-_dispatch_source_tryarm(dispatch_source_t ds)
-{
-	dispatch_queue_flags_t oqf, nqf;
-	return os_atomic_rmw_loop2o(ds, dq_atomic_flags, oqf, nqf, relaxed, {
-		if (oqf & (DSF_DEFERRED_DELETE | DSF_DELETED)) {
-			// the test is inside the loop because it's convenient but the
-			// result should not change for the duration of the rmw_loop
-			os_atomic_rmw_loop_give_up(break);
-		}
-		nqf = oqf | DSF_ARMED;
-	});
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline bool
-_dispatch_source_refs_resume(dispatch_source_t ds)
-{
-	dispatch_source_refs_t dr = ds->ds_refs;
-	if (dr->du_is_timer) {
-		_dispatch_timers_update(dr, 0);
-		return true;
-	}
-	if (unlikely(!_dispatch_source_tryarm(ds))) {
-		return false;
-	}
-	_dispatch_unote_resume(dr);
-	_dispatch_debug("kevent-source[%p]: rearmed kevent[%p]", ds, dr);
-	return true;
-}
-
-void
-_dispatch_source_refs_register(dispatch_source_t ds, dispatch_wlh_t wlh,
-		dispatch_priority_t pri)
-{
-	dispatch_source_refs_t dr = ds->ds_refs;
-	dispatch_priority_t kbp;
-
-	dispatch_assert(!ds->ds_is_installed);
-
-	if (dr->du_is_timer) {
-		dispatch_queue_t dq = ds->_as_dq;
-		kbp = _dispatch_queue_compute_priority_and_wlh(dq, NULL);
-		// aggressively coalesce background/maintenance QoS timers
-		// <rdar://problem/12200216&27342536>
-		if (_dispatch_qos_is_background(_dispatch_priority_qos(kbp))) {
-			if (dr->du_fflags & DISPATCH_TIMER_STRICT) {
-				_dispatch_ktrace1(DISPATCH_PERF_strict_bg_timer, ds);
-			} else {
-				dr->du_fflags |= DISPATCH_TIMER_BACKGROUND;
-				dr->du_ident = _dispatch_source_timer_idx(dr);
-			}
-		}
-		_dispatch_timers_update(dr, 0);
-		return;
-	}
-
-	if (unlikely(!_dispatch_source_tryarm(ds) ||
-			!_dispatch_unote_register(dr, wlh, pri))) {
-		// Do the parts of dispatch_source_refs_unregister() that
-		// are required after this partial initialization.
-		_dispatch_source_refs_finalize_unregistration(ds);
-	} else {
-		_dispatch_debug("kevent-source[%p]: armed kevent[%p]", ds, dr);
-	}
-	_dispatch_object_debug(ds, "%s", __func__);
+	return _dispatch_release_tailcall(ds); // see _dispatch_queue_alloc()
 }
 
 static void
-_dispatch_source_set_event_handler_context(void *ctxt)
+_dispatch_source_refs_unregister(dispatch_source_t ds, uint32_t options)
 {
-	dispatch_source_t ds = ctxt;
-	dispatch_continuation_t dc = _dispatch_source_get_event_handler(ds->ds_refs);
+	_dispatch_object_debug(ds, "%s", __func__);
+	dispatch_source_refs_t dr = ds->ds_refs;
 
-	if (dc && (dc->dc_flags & DISPATCH_OBJ_CTXT_FETCH_BIT)) {
-		dc->dc_ctxt = ds->do_ctxt;
+	if (_dispatch_unote_unregister(dr, options)) {
+		return _dispatch_source_refs_finalize_unregistration(ds);
 	}
+
+	// deferred unregistration
+	dispatch_queue_flags_t oqf, nqf;
+	os_atomic_rmw_loop2o(ds, dq_atomic_flags, oqf, nqf, relaxed, {
+		if (oqf & (DSF_NEEDS_EVENT | DSF_DELETED)) {
+			os_atomic_rmw_loop_give_up(break);
+		}
+		nqf = oqf | DSF_NEEDS_EVENT;
+	});
 }
 
-DISPATCH_ALWAYS_INLINE
-static inline void
+static void
 _dispatch_source_install(dispatch_source_t ds, dispatch_wlh_t wlh,
 		dispatch_priority_t pri)
 {
-	_dispatch_source_refs_register(ds, wlh, pri);
+	dispatch_source_refs_t dr = ds->ds_refs;
+
+	dispatch_assert(!ds->ds_is_installed);
 	ds->ds_is_installed = true;
+
+	_dispatch_object_debug(ds, "%s", __func__);
+	if (unlikely(!_dispatch_unote_register(dr, wlh, pri))) {
+		return _dispatch_source_refs_finalize_unregistration(ds);
+	}
 }
 
 void
-_dispatch_source_finalize_activation(dispatch_source_t ds, bool *allow_resume)
+_dispatch_source_activate(dispatch_source_t ds, bool *allow_resume)
 {
 	dispatch_continuation_t dc;
 	dispatch_source_refs_t dr = ds->ds_refs;
 	dispatch_priority_t pri;
 	dispatch_wlh_t wlh;
 
-	if (unlikely(dr->du_is_direct &&
-			(_dispatch_queue_atomic_flags(ds->_as_dq) & DSF_CANCELED))) {
-		return _dispatch_source_refs_unregister(ds, 0);
+	if (unlikely(_dispatch_queue_atomic_flags(ds) & DSF_CANCELED)) {
+		ds->ds_is_installed = true;
+		return _dispatch_source_refs_finalize_unregistration(ds);
 	}
 
 	dc = _dispatch_source_get_event_handler(dr);
 	if (dc) {
 		if (_dispatch_object_is_barrier(dc)) {
-			_dispatch_queue_atomic_flags_set(ds->_as_dq, DQF_BARRIER_BIT);
+			_dispatch_queue_atomic_flags_set(ds, DQF_BARRIER_BIT);
 		}
-		ds->dq_priority = _dispatch_priority_from_pp_strip_flags(dc->dc_priority);
-		if (dc->dc_flags & DISPATCH_OBJ_CTXT_FETCH_BIT) {
-			_dispatch_barrier_async_detached_f(ds->_as_dq, ds,
-					_dispatch_source_set_event_handler_context);
+		if ((dc->dc_priority & _PTHREAD_PRIORITY_ENFORCE_FLAG) ||
+				!_dispatch_queue_priority_manually_selected(ds->dq_priority)) {
+			ds->dq_priority = _dispatch_priority_from_pp_strip_flags(dc->dc_priority);
 		}
+		if (dc->dc_flags & DC_FLAG_FETCH_CONTEXT) {
+			dc->dc_ctxt = ds->do_ctxt;
+		}
+	} else {
+		_dispatch_bug_deprecated("dispatch source activated "
+				"with no event handler set");
 	}
 
 	// call "super"
-	_dispatch_queue_finalize_activation(ds->_as_dq, allow_resume);
+	_dispatch_lane_activate(ds, allow_resume);
 
-	if (dr->du_is_direct && !ds->ds_is_installed) {
-		dispatch_queue_t dq = ds->_as_dq;
-		pri = _dispatch_queue_compute_priority_and_wlh(dq, &wlh);
-		if (pri) _dispatch_source_install(ds, wlh, pri);
+	if ((dr->du_is_direct || dr->du_is_timer) && !ds->ds_is_installed) {
+		pri = _dispatch_queue_compute_priority_and_wlh(ds, &wlh);
+		if (pri) {
+#if DISPATCH_USE_KEVENT_WORKLOOP
+			dispatch_workloop_t dwl = _dispatch_wlh_to_workloop(wlh);
+			if (dwl && dr->du_filter == DISPATCH_EVFILT_TIMER_WITH_CLOCK &&
+					dr->du_ident < DISPATCH_TIMER_WLH_COUNT) {
+				if (!dwl->dwl_timer_heap) {
+					uint32_t count = DISPATCH_TIMER_WLH_COUNT;
+					dwl->dwl_timer_heap = _dispatch_calloc(count,
+							sizeof(struct dispatch_timer_heap_s));
+				}
+				dr->du_is_direct = true;
+				_dispatch_wlh_retain(wlh);
+				_dispatch_unote_state_set(dr, wlh, 0);
+			}
+#endif
+			_dispatch_source_install(ds, wlh, pri);
+		}
+	}
+}
+
+DISPATCH_NOINLINE
+static void
+_dispatch_source_handle_wlh_change(dispatch_source_t ds)
+{
+	dispatch_queue_flags_t dqf;
+
+	dqf = _dispatch_queue_atomic_flags_set_orig(ds, DSF_WLH_CHANGED);
+	if (!(dqf & DQF_MUTABLE)) {
+		DISPATCH_CLIENT_CRASH(0, "Changing target queue "
+				"hierarchy after source was activated");
+	}
+	if (!(dqf & DSF_WLH_CHANGED)) {
+		_dispatch_bug_deprecated("Changing target queue "
+				"hierarchy after source was activated");
 	}
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline dispatch_queue_wakeup_target_t
-_dispatch_source_invoke2(dispatch_object_t dou, dispatch_invoke_context_t dic,
+_dispatch_source_invoke2(dispatch_source_t ds, dispatch_invoke_context_t dic,
 		dispatch_invoke_flags_t flags, uint64_t *owned)
 {
-	dispatch_source_t ds = dou._ds;
 	dispatch_queue_wakeup_target_t retq = DISPATCH_QUEUE_WAKEUP_NONE;
 	dispatch_queue_t dq = _dispatch_queue_get_current();
 	dispatch_source_refs_t dr = ds->ds_refs;
 	dispatch_queue_flags_t dqf;
 
-	if (!(flags & DISPATCH_INVOKE_MANAGER_DRAIN) &&
-			_dispatch_unote_wlh_changed(dr, _dispatch_get_wlh())) {
-		dqf = _dispatch_queue_atomic_flags_set_orig(ds->_as_dq,
-				DSF_WLH_CHANGED);
-		if (!(dqf & DSF_WLH_CHANGED)) {
-			_dispatch_bug_deprecated("Changing target queue "
-					"hierarchy after source was activated");
-		}
+	if (unlikely(!(flags & DISPATCH_INVOKE_MANAGER_DRAIN) &&
+			_dispatch_unote_wlh_changed(dr, _dispatch_get_event_wlh()))) {
+		_dispatch_source_handle_wlh_change(ds);
 	}
 
 	if (_dispatch_queue_class_probe(ds)) {
@@ -719,7 +730,7 @@
 		// and not the source's regular target queue: we need to be able
 		// to drain timer setting and the like there.
 		dispatch_with_disabled_narrowing(dic, {
-			retq = _dispatch_queue_serial_drain(ds->_as_dq, dic, flags, owned);
+			retq = _dispatch_lane_serial_drain(ds, dic, flags, owned);
 		});
 	}
 
@@ -730,32 +741,23 @@
 
 	// The order of tests here in invoke and in wakeup should be consistent.
 
-	dispatch_queue_t dkq = &_dispatch_mgr_q;
-	bool prevent_starvation = false;
+	dispatch_queue_t dkq = _dispatch_mgr_q._as_dq;
+	bool avoid_starvation = false;
 
 	if (dr->du_is_direct) {
 		dkq = ds->do_targetq;
 	}
 
-	if (dr->du_is_timer &&
-			os_atomic_load2o(ds, ds_timer_refs->dt_pending_config, relaxed)) {
-		dqf = _dispatch_queue_atomic_flags(ds->_as_dq);
-		if (!(dqf & (DSF_CANCELED | DQF_RELEASED))) {
-			// timer has to be configured on the kevent queue
-			if (dq != dkq) {
-				return dkq;
-			}
-			_dispatch_source_timer_configure(ds);
-		}
-	}
-
 	if (!ds->ds_is_installed) {
 		// The source needs to be installed on the kevent queue.
 		if (dq != dkq) {
 			return dkq;
 		}
-		_dispatch_source_install(ds, _dispatch_get_wlh(),
-				_dispatch_get_basepri());
+		dispatch_priority_t pri = DISPATCH_PRIORITY_FLAG_MANAGER;
+		if (likely(flags & DISPATCH_INVOKE_WORKER_DRAIN)) {
+			pri = _dispatch_get_basepri();
+		}
+		_dispatch_source_install(ds, _dispatch_get_event_wlh(), pri);
 	}
 
 	if (unlikely(DISPATCH_QUEUE_IS_SUSPENDED(ds))) {
@@ -763,6 +765,16 @@
 		return ds->do_targetq;
 	}
 
+	if (_dispatch_source_refs_needs_configuration(dr)) {
+		dqf = _dispatch_queue_atomic_flags(ds);
+		if (!(dqf & (DSF_CANCELED | DQF_RELEASED))) {
+			if (dq != dkq) {
+				return dkq;
+			}
+			_dispatch_timer_unote_configure(ds->ds_timer_refs);
+		}
+	}
+
 	if (_dispatch_source_get_registration_handler(dr)) {
 		// The source has been registered and the registration handler needs
 		// to be delivered on the target queue.
@@ -773,26 +785,19 @@
 		_dispatch_source_registration_callout(ds, dq, flags);
 	}
 
-	dqf = _dispatch_queue_atomic_flags(ds->_as_dq);
-	if ((dqf & DSF_DEFERRED_DELETE) && !(dqf & DSF_ARMED)) {
-unregister_event:
-		// DSF_DELETE: Pending source kevent unregistration has been completed
-		// !DSF_ARMED: event was delivered and can safely be unregistered
-		if (dq != dkq) {
-			return dkq;
-		}
-		_dispatch_source_refs_unregister(ds, DU_UNREGISTER_IMMEDIATE_DELETE);
-		dqf = _dispatch_queue_atomic_flags(ds->_as_dq);
+	if (_dispatch_unote_needs_delete(dr)) {
+		_dispatch_source_refs_unregister(ds, DUU_DELETE_ACK | DUU_MUST_SUCCEED);
 	}
 
+	dqf = _dispatch_queue_atomic_flags(ds);
 	if (!(dqf & (DSF_CANCELED | DQF_RELEASED)) &&
-			os_atomic_load2o(ds, ds_pending_data, relaxed)) {
+			os_atomic_load2o(dr, ds_pending_data, relaxed)) {
 		// The source has pending data to deliver via the event handler callback
 		// on the target queue. Some sources need to be rearmed on the kevent
 		// queue after event delivery.
 		if (dq == ds->do_targetq) {
 			_dispatch_source_latch_and_call(ds, dq, flags);
-			dqf = _dispatch_queue_atomic_flags(ds->_as_dq);
+			dqf = _dispatch_queue_atomic_flags(ds);
 
 			// starvation avoidance: if the source triggers itself then force a
 			// re-queue to give other things already queued on the target queue
@@ -801,10 +806,12 @@
 			// however, if the source is directly targeting an overcommit root
 			// queue, this would requeue the source and ask for a new overcommit
 			// thread right away.
-			prevent_starvation = dq->do_targetq ||
-					!(dq->dq_priority & DISPATCH_PRIORITY_FLAG_OVERCOMMIT);
-			if (prevent_starvation &&
-					os_atomic_load2o(ds, ds_pending_data, relaxed)) {
+			if (!(dqf & (DSF_CANCELED | DSF_DELETED))) {
+				avoid_starvation = dq->do_targetq ||
+						!(dq->dq_priority & DISPATCH_PRIORITY_FLAG_OVERCOMMIT);
+			}
+			if (avoid_starvation &&
+					os_atomic_load2o(dr, ds_pending_data, relaxed)) {
 				retq = ds->do_targetq;
 			}
 		} else {
@@ -814,55 +821,51 @@
 		}
 	}
 
-	if ((dqf & (DSF_CANCELED | DQF_RELEASED)) && !(dqf & DSF_DEFERRED_DELETE)) {
+	if ((dqf & (DSF_CANCELED | DQF_RELEASED)) && !(dqf & DSF_DELETED)) {
 		// The source has been cancelled and needs to be uninstalled from the
 		// kevent queue. After uninstallation, the cancellation handler needs
 		// to be delivered to the target queue.
-		if (!(dqf & DSF_DELETED)) {
-			if (dr->du_is_timer && !(dqf & DSF_ARMED)) {
-				// timers can cheat if not armed because there's nothing left
-				// to do on the manager queue and unregistration can happen
-				// on the regular target queue
-			} else if (dq != dkq) {
-				return dkq;
-			}
-			_dispatch_source_refs_unregister(ds, 0);
-			dqf = _dispatch_queue_atomic_flags(ds->_as_dq);
-			if (unlikely(dqf & DSF_DEFERRED_DELETE)) {
-				if (!(dqf & DSF_ARMED)) {
-					goto unregister_event;
-				}
-				// we need to wait for the EV_DELETE
-				return retq ? retq : DISPATCH_QUEUE_WAKEUP_WAIT_FOR_EVENT;
-			}
+		if (dr->du_is_timer && !_dispatch_unote_armed(dr)) {
+			// timers can cheat if not armed because there's nothing left
+			// to do on the manager queue and unregistration can happen
+			// on the regular target queue
+		} else if (dq != dkq) {
+			return dkq;
 		}
+		uint32_t duu_options = DUU_DELETE_ACK;
+		if (!(dqf & DSF_NEEDS_EVENT)) duu_options |= DUU_PROBE;
+		_dispatch_source_refs_unregister(ds, duu_options);
+		dqf = _dispatch_queue_atomic_flags(ds);
+		if (unlikely(!(dqf & DSF_DELETED))) {
+			// we need to wait for the EV_DELETE
+			return retq ? retq : DISPATCH_QUEUE_WAKEUP_WAIT_FOR_EVENT;
+		}
+	}
+
+	if ((dqf & (DSF_CANCELED | DQF_RELEASED)) && (dqf & DSF_DELETED)) {
 		if (dq != ds->do_targetq && (_dispatch_source_get_event_handler(dr) ||
 				_dispatch_source_get_cancel_handler(dr) ||
 				_dispatch_source_get_registration_handler(dr))) {
 			retq = ds->do_targetq;
 		} else {
 			_dispatch_source_cancel_callout(ds, dq, flags);
-			dqf = _dispatch_queue_atomic_flags(ds->_as_dq);
+			dqf = _dispatch_queue_atomic_flags(ds);
 		}
-		prevent_starvation = false;
+		avoid_starvation = false;
 	}
 
-	if (_dispatch_unote_needs_rearm(dr) &&
-			!(dqf & (DSF_ARMED|DSF_DELETED|DSF_CANCELED|DQF_RELEASED))) {
+	if (!(dqf & (DSF_CANCELED | DQF_RELEASED)) &&
+			_dispatch_source_refs_needs_rearm(dr)) {
 		// The source needs to be rearmed on the kevent queue.
 		if (dq != dkq) {
 			return dkq;
 		}
-		if (unlikely(dqf & DSF_DEFERRED_DELETE)) {
-			// no need for resume when we can directly unregister the kevent
-			goto unregister_event;
-		}
 		if (unlikely(DISPATCH_QUEUE_IS_SUSPENDED(ds))) {
 			// do not try to rearm the kevent if the source is suspended
 			// from the source handler
 			return ds->do_targetq;
 		}
-		if (prevent_starvation && dr->du_wlh == DISPATCH_WLH_ANON) {
+		if (avoid_starvation && _dispatch_unote_wlh(dr) == DISPATCH_WLH_ANON) {
 			// keep the old behavior to force re-enqueue to our target queue
 			// for the rearm.
 			//
@@ -871,10 +874,8 @@
 			// not a concern and we can rearm right away.
 			return ds->do_targetq;
 		}
-		if (unlikely(!_dispatch_source_refs_resume(ds))) {
-			goto unregister_event;
-		}
-		if (!prevent_starvation && _dispatch_wlh_should_poll_unote(dr)) {
+		_dispatch_unote_resume(dr);
+		if (!avoid_starvation && _dispatch_wlh_should_poll_unote(dr)) {
 			// try to redrive the drain from under the lock for sources
 			// targeting an overcommit root queue to avoid parking
 			// when the next event has already fired
@@ -892,6 +893,17 @@
 {
 	_dispatch_queue_class_invoke(ds, dic, flags,
 			DISPATCH_INVOKE_DISALLOW_SYNC_WAITERS, _dispatch_source_invoke2);
+
+#if DISPATCH_EVENT_BACKEND_KEVENT
+	if (flags & DISPATCH_INVOKE_WORKLOOP_DRAIN) {
+		dispatch_workloop_t dwl = (dispatch_workloop_t)_dispatch_get_wlh();
+		dispatch_timer_heap_t dth = dwl->dwl_timer_heap;
+		if (dth && dth[0].dth_dirty_bits) {
+			_dispatch_event_loop_drain_timers(dwl->dwl_timer_heap,
+					DISPATCH_TIMER_WLH_COUNT);
+		}
+	}
+#endif // DISPATCH_EVENT_BACKEND_KEVENT
 }
 
 void
@@ -904,51 +916,53 @@
 	dispatch_source_refs_t dr = ds->ds_refs;
 	dispatch_queue_wakeup_target_t dkq = DISPATCH_QUEUE_WAKEUP_MGR;
 	dispatch_queue_wakeup_target_t tq = DISPATCH_QUEUE_WAKEUP_NONE;
-	dispatch_queue_flags_t dqf = _dispatch_queue_atomic_flags(ds->_as_dq);
-	bool deferred_delete = (dqf & DSF_DEFERRED_DELETE);
+	dispatch_queue_flags_t dqf = _dispatch_queue_atomic_flags(ds);
+	dispatch_unote_state_t du_state = _dispatch_unote_state(dr);
 
 	if (dr->du_is_direct) {
 		dkq = DISPATCH_QUEUE_WAKEUP_TARGET;
 	}
 
-	if (!(dqf & (DSF_CANCELED | DQF_RELEASED)) && dr->du_is_timer &&
-			os_atomic_load2o(ds, ds_timer_refs->dt_pending_config, relaxed)) {
-		// timer has to be configured on the kevent queue
-		tq = dkq;
-	} else if (!ds->ds_is_installed) {
+	if (!ds->ds_is_installed) {
 		// The source needs to be installed on the kevent queue.
 		tq = dkq;
+	} else if (!(dqf & (DSF_CANCELED | DQF_RELEASED)) &&
+			_dispatch_source_refs_needs_configuration(dr)) {
+		// timer has to be configured on the kevent queue
+		tq = dkq;
 	} else if (_dispatch_source_get_registration_handler(dr)) {
 		// The registration handler needs to be delivered to the target queue.
 		tq = DISPATCH_QUEUE_WAKEUP_TARGET;
-	} else if (deferred_delete && !(dqf & DSF_ARMED)) {
-		// Pending source kevent unregistration has been completed
-		// or EV_ONESHOT event can be acknowledged
-		tq = dkq;
+	} else if (_du_state_needs_delete(du_state)) {
+		// Deferred deletion can be acknowledged which can always be done
+		// from the target queue
+		tq = DISPATCH_QUEUE_WAKEUP_TARGET;
 	} else if (!(dqf & (DSF_CANCELED | DQF_RELEASED)) &&
-			os_atomic_load2o(ds, ds_pending_data, relaxed)) {
+			os_atomic_load2o(dr, ds_pending_data, relaxed)) {
 		// The source has pending data to deliver to the target queue.
 		tq = DISPATCH_QUEUE_WAKEUP_TARGET;
-	} else if ((dqf & (DSF_CANCELED | DQF_RELEASED)) && !deferred_delete) {
+	} else if ((dqf & (DSF_CANCELED | DQF_RELEASED)) && !(dqf & DSF_DELETED)) {
 		// The source needs to be uninstalled from the kevent queue, or the
 		// cancellation handler needs to be delivered to the target queue.
 		// Note: cancellation assumes installation.
-		if (!(dqf & DSF_DELETED)) {
-			if (dr->du_is_timer && !(dqf & DSF_ARMED)) {
-				// timers can cheat if not armed because there's nothing left
-				// to do on the manager queue and unregistration can happen
-				// on the regular target queue
-				tq = DISPATCH_QUEUE_WAKEUP_TARGET;
-			} else {
-				tq = dkq;
-			}
-		} else if (_dispatch_source_get_event_handler(dr) ||
-				_dispatch_source_get_cancel_handler(dr) ||
-				_dispatch_source_get_registration_handler(dr)) {
+		if (dr->du_is_timer && !_dispatch_unote_armed(dr)) {
+			// timers can cheat if not armed because there's nothing left
+			// to do on the manager queue and unregistration can happen
+			// on the regular target queue
 			tq = DISPATCH_QUEUE_WAKEUP_TARGET;
+		} else if ((dqf & DSF_NEEDS_EVENT) && !(flags & DISPATCH_WAKEUP_EVENT)){
+			// we're waiting for an event
+		} else {
+			// we need to initialize the deletion sequence
+			tq = dkq;
 		}
-	} else if (_dispatch_unote_needs_rearm(dr) &&
-			!(dqf & (DSF_ARMED|DSF_DELETED|DSF_CANCELED|DQF_RELEASED))) {
+	} else if ((dqf & (DSF_CANCELED | DQF_RELEASED)) && (dqf & DSF_DELETED) &&
+			(_dispatch_source_get_event_handler(dr) ||
+			_dispatch_source_get_cancel_handler(dr) ||
+			_dispatch_source_get_registration_handler(dr))) {
+		tq = DISPATCH_QUEUE_WAKEUP_TARGET;
+	} else if (!(dqf & (DSF_CANCELED | DQF_RELEASED)) &&
+			_dispatch_source_refs_needs_rearm(dr)) {
 		// The source needs to be rearmed on the kevent queue.
 		tq = dkq;
 	}
@@ -957,11 +971,11 @@
 	}
 
 	if ((tq == DISPATCH_QUEUE_WAKEUP_TARGET) &&
-			ds->do_targetq == &_dispatch_mgr_q) {
+			ds->do_targetq == _dispatch_mgr_q._as_dq) {
 		tq = DISPATCH_QUEUE_WAKEUP_MGR;
 	}
 
-	return _dispatch_queue_class_wakeup(ds->_as_dq, qos, flags, tq);
+	return _dispatch_queue_wakeup(ds, qos, flags, tq);
 }
 
 void
@@ -974,8 +988,7 @@
 	// need to therefore retain/release before setting the bit
 	_dispatch_retain_2(ds);
 
-	dispatch_queue_t q = ds->_as_dq;
-	if (_dispatch_queue_atomic_flags_set_orig(q, DSF_CANCELED) & DSF_CANCELED) {
+	if (_dispatch_queue_atomic_flags_set_orig(ds, DSF_CANCELED) & DSF_CANCELED){
 		_dispatch_release_2_tailcall(ds);
 	} else {
 		dx_wakeup(ds, 0, DISPATCH_WAKEUP_MAKE_DIRTY | DISPATCH_WAKEUP_CONSUME_2);
@@ -985,7 +998,7 @@
 void
 dispatch_source_cancel_and_wait(dispatch_source_t ds)
 {
-	dispatch_queue_flags_t old_dqf, dqf, new_dqf;
+	dispatch_queue_flags_t old_dqf, new_dqf;
 	dispatch_source_refs_t dr = ds->ds_refs;
 
 	if (unlikely(_dispatch_source_get_cancel_handler(dr))) {
@@ -998,21 +1011,21 @@
 		if (old_dqf & DSF_CANCEL_WAITER) {
 			os_atomic_rmw_loop_give_up(break);
 		}
-		if ((old_dqf & DSF_STATE_MASK) == DSF_DELETED) {
+		if (old_dqf & DSF_DELETED) {
 			// just add DSF_CANCELED
-		} else if ((old_dqf & DSF_DEFERRED_DELETE) || !dr->du_is_direct) {
+		} else if ((old_dqf & DSF_NEEDS_EVENT) || dr->du_is_timer ||
+				!dr->du_is_direct) {
 			new_dqf |= DSF_CANCEL_WAITER;
 		}
 	});
-	dqf = new_dqf;
 
 	if (old_dqf & DQF_RELEASED) {
 		DISPATCH_CLIENT_CRASH(ds, "Dispatch source used after last release");
 	}
-	if ((old_dqf & DSF_STATE_MASK) == DSF_DELETED) {
+	if (old_dqf & DSF_DELETED) {
 		return;
 	}
-	if (dqf & DSF_CANCEL_WAITER) {
+	if (new_dqf & DSF_CANCEL_WAITER) {
 		goto wakeup;
 	}
 
@@ -1048,16 +1061,17 @@
 
 	if (likely(_dq_state_is_runnable(old_state) &&
 			!_dq_state_drain_locked(old_state))) {
-		// same thing _dispatch_source_invoke2() does when handling cancellation
-		dqf = _dispatch_queue_atomic_flags(ds->_as_dq);
-		if (!(dqf & (DSF_DEFERRED_DELETE | DSF_DELETED))) {
-			_dispatch_source_refs_unregister(ds, 0);
-			dqf = _dispatch_queue_atomic_flags(ds->_as_dq);
-			if (likely((dqf & DSF_STATE_MASK) == DSF_DELETED)) {
-				_dispatch_source_cancel_callout(ds, NULL, DISPATCH_INVOKE_NONE);
-			}
+		// deletion may have proceeded concurrently while we were
+		// taking the lock, so we need to check we're not doing it twice.
+		if (likely(!(_dispatch_queue_atomic_flags(ds) & DSF_DELETED))) {
+			// same thing _dispatch_source_invoke2() does for cancellation
+			_dispatch_source_refs_unregister(ds, DUU_DELETE_ACK | DUU_PROBE);
 		}
-		dx_wakeup(ds, 0, DISPATCH_WAKEUP_BARRIER_COMPLETE);
+		if (likely(_dispatch_queue_atomic_flags(ds) & DSF_DELETED)) {
+			_dispatch_source_cancel_callout(ds, NULL, DISPATCH_INVOKE_NONE);
+		}
+		dx_wakeup(ds, 0, DISPATCH_WAKEUP_EVENT |
+				DISPATCH_WAKEUP_BARRIER_COMPLETE);
 	} else if (unlikely(_dq_state_drain_locked_by_self(old_state))) {
 		DISPATCH_CLIENT_CRASH(ds, "dispatch_source_cancel_and_wait "
 				"called from a source handler");
@@ -1069,8 +1083,8 @@
 		dispatch_activate(ds);
 	}
 
-	dqf = _dispatch_queue_atomic_flags(ds->_as_dq);
-	while (unlikely((dqf & DSF_STATE_MASK) != DSF_DELETED)) {
+	dispatch_queue_flags_t dqf = _dispatch_queue_atomic_flags(ds);
+	while (unlikely(!(dqf & DSF_DELETED))) {
 		if (unlikely(!(dqf & DSF_CANCEL_WAITER))) {
 			if (!os_atomic_cmpxchgv2o(ds, dq_atomic_flags,
 					dqf, dqf | DSF_CANCEL_WAITER, &dqf, relaxed)) {
@@ -1078,128 +1092,54 @@
 			}
 			dqf |= DSF_CANCEL_WAITER;
 		}
-		_dispatch_wait_on_address(&ds->dq_atomic_flags, dqf, DLOCK_LOCK_NONE);
-		dqf = _dispatch_queue_atomic_flags(ds->_as_dq);
+		_dispatch_wait_on_address(&ds->dq_atomic_flags, dqf,
+				DISPATCH_TIME_FOREVER, DLOCK_LOCK_NONE);
+		dqf = _dispatch_queue_atomic_flags(ds);
 	}
 }
 
 void
-_dispatch_source_merge_evt(dispatch_unote_t du, uint32_t flags, uintptr_t data,
-		uintptr_t status, pthread_priority_t pp)
+_dispatch_source_merge_evt(dispatch_unote_t du, uint32_t flags,
+		OS_UNUSED uintptr_t data, pthread_priority_t pp)
 {
-	dispatch_source_refs_t dr = du._dr;
-	dispatch_source_t ds = _dispatch_source_from_refs(dr);
-	dispatch_wakeup_flags_t wflags = 0;
-	dispatch_queue_flags_t dqf;
+	dispatch_source_t ds = _dispatch_source_from_refs(du._dr);
 
-	if (_dispatch_unote_needs_rearm(dr) || (flags & (EV_DELETE | EV_ONESHOT))) {
-		// once we modify the queue atomic flags below, it will allow concurrent
-		// threads running _dispatch_source_invoke2 to dispose of the source,
-		// so we can't safely borrow the reference we get from the muxnote udata
-		// anymore, and need our own
-		wflags = DISPATCH_WAKEUP_CONSUME_2;
-		_dispatch_retain_2(ds); // rdar://20382435
-	}
-
-	if ((flags & EV_UDATA_SPECIFIC) && (flags & EV_ONESHOT) &&
-			!(flags & EV_DELETE)) {
-		dqf = _dispatch_queue_atomic_flags_set_and_clear(ds->_as_dq,
-				DSF_DEFERRED_DELETE, DSF_ARMED);
-		if (flags & EV_VANISHED) {
-			_dispatch_bug_kevent_client("kevent", dr->du_type->dst_kind,
-					"monitored resource vanished before the source "
-					"cancel handler was invoked", 0);
+	dispatch_unote_state_t du_state = _dispatch_unote_state(du);
+	if (!(flags & EV_UDATA_SPECIFIC) && !_du_state_registered(du_state)) {
+		if (!du._du->du_is_timer) {
+			// Timers must be unregistered from their target queue, else this
+			// unregistration can race with the optimization in
+			// _dispatch_source_invoke() to unregister fired oneshot timers.
+			//
+			// Because oneshot timers dominate the world, we prefer paying an
+			// extra wakeup for repeating timers, and avoid the wakeup for
+			// oneshot timers.
+			_dispatch_source_refs_finalize_unregistration(ds);
 		}
-		_dispatch_debug("kevent-source[%p]: %s kevent[%p]", ds,
-				(flags & EV_VANISHED) ? "vanished" :
-				"deferred delete oneshot", dr);
-	} else if (flags & (EV_DELETE | EV_ONESHOT)) {
-		_dispatch_source_refs_unregister(ds, DU_UNREGISTER_ALREADY_DELETED);
-		_dispatch_debug("kevent-source[%p]: deleted kevent[%p]", ds, dr);
-		if (flags & EV_DELETE) goto done;
-		dqf = _dispatch_queue_atomic_flags(ds->_as_dq);
-	} else if (_dispatch_unote_needs_rearm(dr)) {
-		dqf = _dispatch_queue_atomic_flags_clear(ds->_as_dq, DSF_ARMED);
-		_dispatch_debug("kevent-source[%p]: disarmed kevent[%p]", ds, dr);
-	} else {
-		dqf = _dispatch_queue_atomic_flags(ds->_as_dq);
 	}
 
-	if (dqf & (DSF_CANCELED | DQF_RELEASED)) {
-		goto done; // rdar://20204025
-	}
-
-	dispatch_unote_action_t action = dr->du_data_action;
-	if ((flags & EV_UDATA_SPECIFIC) && (flags & EV_ONESHOT) &&
-			(flags & EV_VANISHED)) {
+	dispatch_queue_flags_t dqf = _dispatch_queue_atomic_flags(ds);
+	if (unlikely(flags & EV_VANISHED)) {
+		if (dqf & DSF_STRICT) {
+			DISPATCH_CLIENT_CRASH(du._du->du_ident, "Unexpected EV_VANISHED "
+					"(do not destroy random mach ports or file descriptors)");
+		} else {
+			_dispatch_bug_kevent_vanished(du._du);
+		}
 		// if the resource behind the ident vanished, the event handler can't
 		// do anything useful anymore, so do not try to call it at all
-		//
-		// Note: if the kernel doesn't support EV_VANISHED we always get it
-		// back unchanged from the flags passed at EV_ADD (registration) time
-		// Since we never ask for both EV_ONESHOT and EV_VANISHED for sources,
-		// if we get both bits it was a real EV_VANISHED delivery
-		os_atomic_store2o(ds, ds_pending_data, 0, relaxed);
-#if HAVE_MACH
-	} else if (dr->du_filter == EVFILT_MACHPORT) {
-		os_atomic_store2o(ds, ds_pending_data, data, relaxed);
-#endif
-	} else if (action == DISPATCH_UNOTE_ACTION_DATA_SET) {
-		os_atomic_store2o(ds, ds_pending_data, data, relaxed);
-	} else if (action == DISPATCH_UNOTE_ACTION_DATA_ADD) {
-		os_atomic_add2o(ds, ds_pending_data, data, relaxed);
-	} else if (data && action == DISPATCH_UNOTE_ACTION_DATA_OR) {
-		os_atomic_or2o(ds, ds_pending_data, data, relaxed);
-	} else if (data && action == DISPATCH_UNOTE_ACTION_DATA_OR_STATUS_SET) {
-		// We combine the data and status into a single 64-bit value.
-		uint64_t odata, ndata;
-		uint64_t value = DISPATCH_SOURCE_COMBINE_DATA_AND_STATUS(data, status);
-		os_atomic_rmw_loop2o(ds, ds_pending_data, odata, ndata, relaxed, {
-            ndata = DISPATCH_SOURCE_GET_DATA(odata) | value;
-		});
-	} else if (data) {
-		DISPATCH_INTERNAL_CRASH(action, "Unexpected source action value");
+		os_atomic_store2o(du._dr, ds_pending_data, 0, relaxed);
 	}
-	_dispatch_debug("kevent-source[%p]: merged kevent[%p]", ds, dr);
 
-done:
+	_dispatch_debug("kevent-source[%p]: merged kevent[%p]", ds, du._dr);
 	_dispatch_object_debug(ds, "%s", __func__);
-	dx_wakeup(ds, _dispatch_qos_from_pp(pp), wflags | DISPATCH_WAKEUP_MAKE_DIRTY);
+	dx_wakeup(ds, _dispatch_qos_from_pp(pp), DISPATCH_WAKEUP_EVENT |
+			DISPATCH_WAKEUP_CONSUME_2 | DISPATCH_WAKEUP_MAKE_DIRTY);
 }
 
 #pragma mark -
 #pragma mark dispatch_source_timer
 
-#if DISPATCH_USE_DTRACE
-static dispatch_timer_source_refs_t
-		_dispatch_trace_next_timer[DISPATCH_TIMER_QOS_COUNT];
-#define _dispatch_trace_next_timer_set(x, q) \
-		_dispatch_trace_next_timer[(q)] = (x)
-#define _dispatch_trace_next_timer_program(d, q) \
-		_dispatch_trace_timer_program(_dispatch_trace_next_timer[(q)], (d))
-DISPATCH_ALWAYS_INLINE
-static inline void
-_dispatch_mgr_trace_timers_wakes(void)
-{
-	uint32_t qos;
-
-	if (_dispatch_timers_will_wake) {
-		if (slowpath(DISPATCH_TIMER_WAKE_ENABLED())) {
-			for (qos = 0; qos < DISPATCH_TIMER_QOS_COUNT; qos++) {
-				if (_dispatch_timers_will_wake & (1 << qos)) {
-					_dispatch_trace_timer_wake(_dispatch_trace_next_timer[qos]);
-				}
-			}
-		}
-		_dispatch_timers_will_wake = 0;
-	}
-}
-#else
-#define _dispatch_trace_next_timer_set(x, q)
-#define _dispatch_trace_next_timer_program(d, q)
-#define _dispatch_mgr_trace_timers_wakes()
-#endif
-
 #define _dispatch_source_timer_telemetry_enabled() false
 
 DISPATCH_NOINLINE
@@ -1224,32 +1164,9 @@
 	}
 }
 
-DISPATCH_NOINLINE
-static void
-_dispatch_source_timer_configure(dispatch_source_t ds)
-{
-	dispatch_timer_source_refs_t dt = ds->ds_timer_refs;
-	dispatch_timer_config_t dtc;
-
-	dtc = os_atomic_xchg2o(dt, dt_pending_config, NULL, dependency);
-	if (dtc->dtc_clock == DISPATCH_CLOCK_MACH) {
-		dt->du_fflags |= DISPATCH_TIMER_CLOCK_MACH;
-	} else {
-		dt->du_fflags &= ~(uint32_t)DISPATCH_TIMER_CLOCK_MACH;
-	}
-	dt->dt_timer = dtc->dtc_timer;
-	free(dtc);
-	if (ds->ds_is_installed) {
-		// Clear any pending data that might have accumulated on
-		// older timer params <rdar://problem/8574886>
-		os_atomic_store2o(ds, ds_pending_data, 0, relaxed);
-		_dispatch_timers_update(dt, 0);
-	}
-}
-
 static dispatch_timer_config_t
-_dispatch_source_timer_config_create(dispatch_time_t start,
-		uint64_t interval, uint64_t leeway)
+_dispatch_timer_config_create(dispatch_time_t start,
+		uint64_t interval, uint64_t leeway, dispatch_timer_source_refs_t dt)
 {
 	dispatch_timer_config_t dtc;
 	dtc = _dispatch_calloc(1ul, sizeof(struct dispatch_timer_config_s));
@@ -1266,18 +1183,28 @@
 	if ((int64_t)leeway < 0) {
 		leeway = INT64_MAX;
 	}
-	if (start == DISPATCH_TIME_NOW) {
-		start = _dispatch_absolute_time();
-	} else if (start == DISPATCH_TIME_FOREVER) {
-		start = INT64_MAX;
+
+	dispatch_clock_t clock;
+	uint64_t target;
+	if (start == DISPATCH_TIME_FOREVER) {
+		target = INT64_MAX;
+		// Do not change the clock when postponing the time forever in the
+		// future, this will default to UPTIME if no clock was set.
+		clock = _dispatch_timer_flags_to_clock(dt->du_timer_flags);
+	} else {
+		_dispatch_time_to_clock_and_value(start, &clock, &target);
+		if (target == DISPATCH_TIME_NOW) {
+			if (clock == DISPATCH_CLOCK_UPTIME) {
+				target = _dispatch_uptime();
+			} else {
+				dispatch_assert(clock == DISPATCH_CLOCK_MONOTONIC);
+				target = _dispatch_monotonic_time();
+			}
+		}
 	}
 
-	if ((int64_t)start < 0) {
-		// wall clock
-		start = (dispatch_time_t)-((int64_t)start);
-		dtc->dtc_clock = DISPATCH_CLOCK_WALL;
-	} else {
-		// absolute clock
+	if (clock != DISPATCH_CLOCK_WALL) {
+		// uptime or monotonic clock
 		interval = _dispatch_time_nano2mach(interval);
 		if (interval < 1) {
 			// rdar://problem/7287561 interval must be at least one in
@@ -1287,22 +1214,75 @@
 			interval = 1;
 		}
 		leeway = _dispatch_time_nano2mach(leeway);
-		dtc->dtc_clock = DISPATCH_CLOCK_MACH;
 	}
 	if (interval < INT64_MAX && leeway > interval / 2) {
 		leeway = interval / 2;
 	}
 
-	dtc->dtc_timer.target = start;
+	dtc->dtc_clock = clock;
+	dtc->dtc_timer.target = target;
 	dtc->dtc_timer.interval = interval;
-	if (start + leeway < INT64_MAX) {
-		dtc->dtc_timer.deadline = start + leeway;
+	if (target + leeway < INT64_MAX) {
+		dtc->dtc_timer.deadline = target + leeway;
 	} else {
 		dtc->dtc_timer.deadline = INT64_MAX;
 	}
 	return dtc;
 }
 
+static dispatch_timer_config_t
+_dispatch_interval_config_create(dispatch_time_t start,
+		uint64_t interval, uint64_t leeway, dispatch_timer_source_refs_t dt)
+{
+#define NSEC_PER_FRAME (NSEC_PER_SEC/60)
+// approx 1 year (60s * 60m * 24h * 365d)
+#define FOREVER_NSEC 31536000000000000ull
+
+	const bool animation = dt->du_timer_flags & DISPATCH_INTERVAL_UI_ANIMATION;
+	dispatch_timer_config_t dtc;
+	dtc = _dispatch_calloc(1ul, sizeof(struct dispatch_timer_config_s));
+	dtc->dtc_clock = DISPATCH_CLOCK_UPTIME;
+
+	if (start == DISPATCH_TIME_FOREVER) {
+		dtc->dtc_timer.target = INT64_MAX;
+		dtc->dtc_timer.interval = INT64_MAX;
+		dtc->dtc_timer.deadline = INT64_MAX;
+		return dtc;
+	}
+
+	if (start != DISPATCH_TIME_NOW) {
+		DISPATCH_CLIENT_CRASH(0, "Start value is not DISPATCH_TIME_NOW or "
+				"DISPATCH_TIME_FOREVER");
+	} else if (unlikely(interval == 0)) {
+		DISPATCH_CLIENT_CRASH(0, "Setting interval to 0");
+	}
+
+	if (likely(interval <= (animation ? FOREVER_NSEC/NSEC_PER_FRAME :
+			FOREVER_NSEC/NSEC_PER_MSEC))) {
+		interval *= animation ? NSEC_PER_FRAME : NSEC_PER_MSEC;
+	} else {
+		interval = FOREVER_NSEC;
+	}
+
+	interval = _dispatch_time_nano2mach(interval);
+	start = _dispatch_uptime() + interval;
+	start -= (start % interval);
+	if (leeway <= 1000) {
+		leeway = interval * leeway / 1000;
+	} else if (leeway != UINT64_MAX) {
+		DISPATCH_CLIENT_CRASH(0, "Passing an invalid leeway");
+	} else if (animation) {
+		leeway = _dispatch_time_nano2mach(NSEC_PER_FRAME);
+	} else {
+		leeway = interval / 2;
+	}
+	dtc->dtc_clock = DISPATCH_CLOCK_UPTIME;
+	dtc->dtc_timer.target = start;
+	dtc->dtc_timer.deadline = start + leeway;
+	dtc->dtc_timer.interval = interval;
+	return dtc;
+}
+
 DISPATCH_NOINLINE
 void
 dispatch_source_set_timer(dispatch_source_t ds, dispatch_time_t start,
@@ -1311,49 +1291,32 @@
 	dispatch_timer_source_refs_t dt = ds->ds_timer_refs;
 	dispatch_timer_config_t dtc;
 
-	if (unlikely(!dt->du_is_timer || (dt->du_fflags&DISPATCH_TIMER_INTERVAL))) {
+	if (unlikely(!dt->du_is_timer)) {
 		DISPATCH_CLIENT_CRASH(ds, "Attempt to set timer on a non-timer source");
 	}
 
-	dtc = _dispatch_source_timer_config_create(start, interval, leeway);
+	if (dt->du_timer_flags & DISPATCH_TIMER_INTERVAL) {
+		dtc = _dispatch_interval_config_create(start, interval, leeway, dt);
+	} else {
+		dtc = _dispatch_timer_config_create(start, interval, leeway, dt);
+	}
+	if (_dispatch_timer_flags_to_clock(dt->du_timer_flags) != dtc->dtc_clock &&
+			dt->du_filter == DISPATCH_EVFILT_TIMER_WITH_CLOCK) {
+		DISPATCH_CLIENT_CRASH(0, "Attempting to modify timer clock");
+	}
+
 	_dispatch_source_timer_telemetry(ds, dtc->dtc_clock, &dtc->dtc_timer);
 	dtc = os_atomic_xchg2o(dt, dt_pending_config, dtc, release);
 	if (dtc) free(dtc);
 	dx_wakeup(ds, 0, DISPATCH_WAKEUP_MAKE_DIRTY);
 }
 
-static void
-_dispatch_source_set_interval(dispatch_source_t ds, uint64_t interval)
-{
-#define NSEC_PER_FRAME (NSEC_PER_SEC/60)
-// approx 1 year (60s * 60m * 24h * 365d)
-#define FOREVER_NSEC 31536000000000000ull
-
-	dispatch_timer_source_refs_t dr = ds->ds_timer_refs;
-	const bool animation = dr->du_fflags & DISPATCH_INTERVAL_UI_ANIMATION;
-	if (fastpath(interval <= (animation ? FOREVER_NSEC/NSEC_PER_FRAME :
-			FOREVER_NSEC/NSEC_PER_MSEC))) {
-		interval *= animation ? NSEC_PER_FRAME : NSEC_PER_MSEC;
-	} else {
-		interval = FOREVER_NSEC;
-	}
-	interval = _dispatch_time_nano2mach(interval);
-	uint64_t target = _dispatch_absolute_time() + interval;
-	target -= (target % interval);
-	const uint64_t leeway = animation ?
-			_dispatch_time_nano2mach(NSEC_PER_FRAME) : interval / 2;
-	dr->dt_timer.target = target;
-	dr->dt_timer.deadline = target + leeway;
-	dr->dt_timer.interval = interval;
-	_dispatch_source_timer_telemetry(ds, DISPATCH_CLOCK_MACH, &dr->dt_timer);
-}
-
 #pragma mark -
 #pragma mark dispatch_after
 
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_after(dispatch_time_t when, dispatch_queue_t queue,
+_dispatch_after(dispatch_time_t when, dispatch_queue_t dq,
 		void *ctxt, void *handler, bool block)
 {
 	dispatch_timer_source_refs_t dt;
@@ -1370,9 +1333,9 @@
 	delta = _dispatch_timeout(when);
 	if (delta == 0) {
 		if (block) {
-			return dispatch_async(queue, handler);
+			return dispatch_async(dq, handler);
 		}
-		return dispatch_async_f(queue, ctxt, handler);
+		return dispatch_async_f(dq, ctxt, handler);
 	}
 	leeway = delta / 10; // <rdar://problem/13447496>
 
@@ -1380,31 +1343,30 @@
 	if (leeway > 60 * NSEC_PER_SEC) leeway = 60 * NSEC_PER_SEC;
 
 	// this function can and should be optimized to not use a dispatch source
-	ds = dispatch_source_create(&_dispatch_source_type_after, 0, 0, queue);
+	ds = dispatch_source_create(&_dispatch_source_type_after, 0, 0, dq);
 	dt = ds->ds_timer_refs;
 
 	dispatch_continuation_t dc = _dispatch_continuation_alloc();
 	if (block) {
-		_dispatch_continuation_init(dc, ds, handler, 0, 0, 0);
+		_dispatch_continuation_init(dc, dq, handler, 0, 0);
 	} else {
-		_dispatch_continuation_init_f(dc, ds, ctxt, handler, 0, 0, 0);
+		_dispatch_continuation_init_f(dc, dq, ctxt, handler, 0, 0);
 	}
 	// reference `ds` so that it doesn't show up as a leak
 	dc->dc_data = ds;
-	_dispatch_trace_continuation_push(ds->_as_dq, dc);
+	_dispatch_trace_item_push(dq, dc);
 	os_atomic_store2o(dt, ds_handler[DS_EVENT_HANDLER], dc, relaxed);
 
-	if ((int64_t)when < 0) {
-		// wall clock
-		when = (dispatch_time_t)-((int64_t)when);
-	} else {
-		// absolute clock
-		dt->du_fflags |= DISPATCH_TIMER_CLOCK_MACH;
+	dispatch_clock_t clock;
+	uint64_t target;
+	_dispatch_time_to_clock_and_value(when, &clock, &target);
+	if (clock != DISPATCH_CLOCK_WALL) {
 		leeway = _dispatch_time_nano2mach(leeway);
 	}
-	dt->dt_timer.target = when;
+	dt->du_timer_flags |= _dispatch_timer_flags_from_clock(clock);
+	dt->dt_timer.target = target;
 	dt->dt_timer.interval = UINT64_MAX;
-	dt->dt_timer.deadline = when + leeway;
+	dt->dt_timer.deadline = target + leeway;
 	dispatch_activate(ds);
 }
 
@@ -1426,1103 +1388,28 @@
 #endif
 
 #pragma mark -
-#pragma mark dispatch_timers
-
-/*
- * The dispatch_timer_heap_t structure is a double min-heap of timers,
- * interleaving the by-target min-heap in the even slots, and the by-deadline
- * in the odd ones.
- *
- * The min element of these is held inline in the dispatch_timer_heap_t
- * structure, and further entries are held in segments.
- *
- * dth_segments is the number of allocated segments.
- *
- * Segment 0 has a size of `DISPATCH_HEAP_INIT_SEGMENT_CAPACITY` pointers
- * Segment k has a size of (DISPATCH_HEAP_INIT_SEGMENT_CAPACITY << (k - 1))
- *
- * Segment n (dth_segments - 1) is the last segment and points its final n
- * entries to previous segments. Its address is held in the `dth_heap` field.
- *
- * segment n   [ regular timer pointers | n-1 | k | 0 ]
- *                                         |    |   |
- * segment n-1 <---------------------------'    |   |
- * segment k   <--------------------------------'   |
- * segment 0   <------------------------------------'
- */
-#define DISPATCH_HEAP_INIT_SEGMENT_CAPACITY 8u
-
-/*
- * There are two min-heaps stored interleaved in a single array,
- * even indices are for the by-target min-heap, and odd indices for
- * the by-deadline one.
- */
-#define DTH_HEAP_ID_MASK (DTH_ID_COUNT - 1)
-#define DTH_HEAP_ID(idx) ((idx) & DTH_HEAP_ID_MASK)
-#define DTH_IDX_FOR_HEAP_ID(idx, heap_id) \
-		(((idx) & ~DTH_HEAP_ID_MASK) | (heap_id))
-
-DISPATCH_ALWAYS_INLINE
-static inline uint32_t
-_dispatch_timer_heap_capacity(uint32_t segments)
-{
-	if (segments == 0) return 2;
-	uint32_t seg_no = segments - 1;
-	// for C = DISPATCH_HEAP_INIT_SEGMENT_CAPACITY,
-	// 2 + C + SUM(C << (i-1), i = 1..seg_no) - seg_no
-	return 2 + (DISPATCH_HEAP_INIT_SEGMENT_CAPACITY << seg_no) - seg_no;
-}
-
-DISPATCH_NOINLINE
-static void
-_dispatch_timer_heap_grow(dispatch_timer_heap_t dth)
-{
-	uint32_t seg_capacity = DISPATCH_HEAP_INIT_SEGMENT_CAPACITY;
-	uint32_t seg_no = dth->dth_segments++;
-	void **heap, **heap_prev = dth->dth_heap;
-
-	if (seg_no > 0) {
-		seg_capacity <<= (seg_no - 1);
-	}
-	heap = _dispatch_calloc(seg_capacity, sizeof(void *));
-	if (seg_no > 1) {
-		uint32_t prev_seg_no = seg_no - 1;
-		uint32_t prev_seg_capacity = seg_capacity >> 1;
-		memcpy(&heap[seg_capacity - prev_seg_no],
-				&heap_prev[prev_seg_capacity - prev_seg_no],
-				prev_seg_no * sizeof(void *));
-	}
-	if (seg_no > 0) {
-		heap[seg_capacity - seg_no] = heap_prev;
-	}
-	dth->dth_heap = heap;
-}
-
-DISPATCH_NOINLINE
-static void
-_dispatch_timer_heap_shrink(dispatch_timer_heap_t dth)
-{
-	uint32_t seg_capacity = DISPATCH_HEAP_INIT_SEGMENT_CAPACITY;
-	uint32_t seg_no = --dth->dth_segments;
-	void **heap = dth->dth_heap, **heap_prev = NULL;
-
-	if (seg_no > 0) {
-		seg_capacity <<= (seg_no - 1);
-		heap_prev = heap[seg_capacity - seg_no];
-	}
-	if (seg_no > 1) {
-		uint32_t prev_seg_no = seg_no - 1;
-		uint32_t prev_seg_capacity = seg_capacity >> 1;
-		memcpy(&heap_prev[prev_seg_capacity - prev_seg_no],
-				&heap[seg_capacity - prev_seg_no],
-				prev_seg_no * sizeof(void *));
-	}
-	dth->dth_heap = heap_prev;
-	free(heap);
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline dispatch_timer_source_refs_t *
-_dispatch_timer_heap_get_slot(dispatch_timer_heap_t dth, uint32_t idx)
-{
-	uint32_t seg_no, segments = dth->dth_segments;
-	void **segment;
-
-	if (idx < DTH_ID_COUNT) {
-		return &dth->dth_min[idx];
-	}
-	idx -= DTH_ID_COUNT;
-
-	// Derive the segment number from the index. Naming
-	// DISPATCH_HEAP_INIT_SEGMENT_CAPACITY `C`, the segments index ranges are:
-	// 0: 0 .. (C - 1)
-	// 1: C .. 2 * C - 1
-	// k: 2^(k-1) * C .. 2^k * C - 1
-	// so `k` can be derived from the first bit set in `idx`
-	seg_no = (uint32_t)(__builtin_clz(DISPATCH_HEAP_INIT_SEGMENT_CAPACITY - 1) -
-			__builtin_clz(idx | (DISPATCH_HEAP_INIT_SEGMENT_CAPACITY - 1)));
-	if (seg_no + 1 == segments) {
-		segment = dth->dth_heap;
-	} else {
-		uint32_t seg_capacity = DISPATCH_HEAP_INIT_SEGMENT_CAPACITY;
-		seg_capacity <<= (segments - 2);
-		segment = dth->dth_heap[seg_capacity - seg_no - 1];
-	}
-	if (seg_no) {
-		idx -= DISPATCH_HEAP_INIT_SEGMENT_CAPACITY << (seg_no - 1);
-	}
-	return (dispatch_timer_source_refs_t *)(segment + idx);
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline void
-_dispatch_timer_heap_set(dispatch_timer_source_refs_t *slot,
-		dispatch_timer_source_refs_t dt, uint32_t idx)
-{
-	*slot = dt;
-	dt->dt_heap_entry[DTH_HEAP_ID(idx)] = idx;
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline uint32_t
-_dispatch_timer_heap_parent(uint32_t idx)
-{
-	uint32_t heap_id = DTH_HEAP_ID(idx);
-	idx = (idx - DTH_ID_COUNT) / 2; // go to the parent
-	return DTH_IDX_FOR_HEAP_ID(idx, heap_id);
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline uint32_t
-_dispatch_timer_heap_left_child(uint32_t idx)
-{
-	uint32_t heap_id = DTH_HEAP_ID(idx);
-	// 2 * (idx - heap_id) + DTH_ID_COUNT + heap_id
-	return 2 * idx + DTH_ID_COUNT - heap_id;
-}
-
-#if DISPATCH_HAVE_TIMER_COALESCING
-DISPATCH_ALWAYS_INLINE
-static inline uint32_t
-_dispatch_timer_heap_walk_skip(uint32_t idx, uint32_t count)
-{
-	uint32_t heap_id = DTH_HEAP_ID(idx);
-
-	idx -= heap_id;
-	if (unlikely(idx + DTH_ID_COUNT == count)) {
-		// reaching `count` doesn't mean we're done, but there is a weird
-		// corner case if the last item of the heap is a left child:
-		//
-		//     /\
-		//    /  \
-		//   /  __\
-		//  /__/
-		//     ^
-		//
-		// The formula below would return the sibling of `idx` which is
-		// out of bounds. Fortunately, the correct answer is the same
-		// as for idx's parent
-		idx = _dispatch_timer_heap_parent(idx);
-	}
-
-	//
-	// When considering the index in a non interleaved, 1-based array
-	// representation of a heap, hence looking at (idx / DTH_ID_COUNT + 1)
-	// for a given idx in our dual-heaps, that index is in one of two forms:
-	//
-	//     (a) 1xxxx011111    or    (b) 111111111
-	//         d    i    0              d       0
-	//
-	// The first bit set is the row of the binary tree node (0-based).
-	// The following digits from most to least significant represent the path
-	// to that node, where `0` is a left turn and `1` a right turn.
-	//
-	// For example 0b0101 (5) is a node on row 2 accessed going left then right:
-	//
-	// row 0          1
-	//              /   .
-	// row 1      2       3
-	//           . \     . .
-	// row 2    4   5   6   7
-	//         : : : : : : : :
-	//
-	// Skipping a sub-tree in walk order means going to the sibling of the last
-	// node reached after we turned left. If the node was of the form (a),
-	// this node is 1xxxx1, which for the above example is 0b0011 (3).
-	// If the node was of the form (b) then we never took a left, meaning
-	// we reached the last element in traversal order.
-	//
-
-	//
-	// we want to find
-	// - the least significant bit set to 0 in (idx / DTH_ID_COUNT + 1)
-	// - which is offset by log_2(DTH_ID_COUNT) from the position of the least
-	//   significant 0 in (idx + DTH_ID_COUNT + DTH_ID_COUNT - 1)
-	//   since idx is a multiple of DTH_ID_COUNT and DTH_ID_COUNT a power of 2.
-	// - which in turn is the same as the position of the least significant 1 in
-	//   ~(idx + DTH_ID_COUNT + DTH_ID_COUNT - 1)
-	//
-	dispatch_static_assert(powerof2(DTH_ID_COUNT));
-	idx += DTH_ID_COUNT + DTH_ID_COUNT - 1;
-	idx >>= __builtin_ctz(~idx);
-
-	//
-	// `idx` is now either:
-	// - 0 if it was the (b) case above, in which case the walk is done
-	// - 1xxxx0 as the position in a 0 based array representation of a non
-	//   interleaved heap, so we just have to compute the interleaved index.
-	//
-	return likely(idx) ? DTH_ID_COUNT * idx + heap_id : UINT32_MAX;
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline uint32_t
-_dispatch_timer_heap_walk_next(uint32_t idx, uint32_t count)
-{
-	//
-	// Goes to the next element in heap walk order, which is the prefix ordered
-	// walk of the tree.
-	//
-	// From a given node, the next item to return is the left child if it
-	// exists, else the first right sibling we find by walking our parent chain,
-	// which is exactly what _dispatch_timer_heap_walk_skip() returns.
-	//
-	uint32_t lchild = _dispatch_timer_heap_left_child(idx);
-	if (lchild < count) {
-		return lchild;
-	}
-	return _dispatch_timer_heap_walk_skip(idx, count);
-}
-
-DISPATCH_NOINLINE
-static uint64_t
-_dispatch_timer_heap_max_target_before(dispatch_timer_heap_t dth, uint64_t limit)
-{
-	dispatch_timer_source_refs_t dri;
-	uint32_t idx = _dispatch_timer_heap_left_child(DTH_TARGET_ID);
-	uint32_t count = dth->dth_count;
-	uint64_t tmp, target = dth->dth_min[DTH_TARGET_ID]->dt_timer.target;
-
-	while (idx < count) {
-		dri = *_dispatch_timer_heap_get_slot(dth, idx);
-		tmp = dri->dt_timer.target;
-		if (tmp > limit) {
-			// skip subtree since none of the targets below can be before limit
-			idx = _dispatch_timer_heap_walk_skip(idx, count);
-		} else {
-			target = tmp;
-			idx = _dispatch_timer_heap_walk_next(idx, count);
-		}
-	}
-	return target;
-}
-#endif // DISPATCH_HAVE_TIMER_COALESCING
-
-DISPATCH_NOINLINE
-static void
-_dispatch_timer_heap_resift(dispatch_timer_heap_t dth,
-		dispatch_timer_source_refs_t dt, uint32_t idx)
-{
-	dispatch_static_assert(offsetof(struct dispatch_timer_source_s, target) ==
-			offsetof(struct dispatch_timer_source_s, heap_key[DTH_TARGET_ID]));
-	dispatch_static_assert(offsetof(struct dispatch_timer_source_s, deadline) ==
-			offsetof(struct dispatch_timer_source_s, heap_key[DTH_DEADLINE_ID]));
-#define dth_cmp(hid, dt1, op, dt2) \
-		(((dt1)->dt_timer.heap_key)[hid] op ((dt2)->dt_timer.heap_key)[hid])
-
-	dispatch_timer_source_refs_t *pslot, pdt;
-	dispatch_timer_source_refs_t *cslot, cdt;
-	dispatch_timer_source_refs_t *rslot, rdt;
-	uint32_t cidx, dth_count = dth->dth_count;
-	dispatch_timer_source_refs_t *slot;
-	int heap_id = DTH_HEAP_ID(idx);
-	bool sifted_up = false;
-
-	// try to sift up
-
-	slot = _dispatch_timer_heap_get_slot(dth, idx);
-	while (idx >= DTH_ID_COUNT) {
-		uint32_t pidx = _dispatch_timer_heap_parent(idx);
-		pslot = _dispatch_timer_heap_get_slot(dth, pidx);
-		pdt = *pslot;
-		if (dth_cmp(heap_id, pdt, <=, dt)) {
-			break;
-		}
-		_dispatch_timer_heap_set(slot, pdt, idx);
-		slot = pslot;
-		idx = pidx;
-		sifted_up = true;
-	}
-	if (sifted_up) {
-		goto done;
-	}
-
-	// try to sift down
-
-	while ((cidx = _dispatch_timer_heap_left_child(idx)) < dth_count) {
-		uint32_t ridx = cidx + DTH_ID_COUNT;
-		cslot = _dispatch_timer_heap_get_slot(dth, cidx);
-		cdt = *cslot;
-		if (ridx < dth_count) {
-			rslot = _dispatch_timer_heap_get_slot(dth, ridx);
-			rdt = *rslot;
-			if (dth_cmp(heap_id, cdt, >, rdt)) {
-				cidx = ridx;
-				cdt = rdt;
-				cslot = rslot;
-			}
-		}
-		if (dth_cmp(heap_id, dt, <=, cdt)) {
-			break;
-		}
-		_dispatch_timer_heap_set(slot, cdt, idx);
-		slot = cslot;
-		idx = cidx;
-	}
-
-done:
-	_dispatch_timer_heap_set(slot, dt, idx);
-#undef dth_cmp
-}
-
-DISPATCH_ALWAYS_INLINE
-static void
-_dispatch_timer_heap_insert(dispatch_timer_heap_t dth,
-		dispatch_timer_source_refs_t dt)
-{
-	uint32_t idx = (dth->dth_count += DTH_ID_COUNT) - DTH_ID_COUNT;
-
-	DISPATCH_TIMER_ASSERT(dt->dt_heap_entry[DTH_TARGET_ID], ==,
-			DTH_INVALID_ID, "target idx");
-	DISPATCH_TIMER_ASSERT(dt->dt_heap_entry[DTH_DEADLINE_ID], ==,
-			DTH_INVALID_ID, "deadline idx");
-
-	if (idx == 0) {
-		dt->dt_heap_entry[DTH_TARGET_ID] = DTH_TARGET_ID;
-		dt->dt_heap_entry[DTH_DEADLINE_ID] = DTH_DEADLINE_ID;
-		dth->dth_min[DTH_TARGET_ID] = dth->dth_min[DTH_DEADLINE_ID] = dt;
-		return;
-	}
-
-	if (unlikely(idx + DTH_ID_COUNT >
-			_dispatch_timer_heap_capacity(dth->dth_segments))) {
-		_dispatch_timer_heap_grow(dth);
-	}
-	_dispatch_timer_heap_resift(dth, dt, idx + DTH_TARGET_ID);
-	_dispatch_timer_heap_resift(dth, dt, idx + DTH_DEADLINE_ID);
-}
-
-DISPATCH_NOINLINE
-static void
-_dispatch_timer_heap_remove(dispatch_timer_heap_t dth,
-		dispatch_timer_source_refs_t dt)
-{
-	uint32_t idx = (dth->dth_count -= DTH_ID_COUNT);
-
-	DISPATCH_TIMER_ASSERT(dt->dt_heap_entry[DTH_TARGET_ID], !=,
-			DTH_INVALID_ID, "target idx");
-	DISPATCH_TIMER_ASSERT(dt->dt_heap_entry[DTH_DEADLINE_ID], !=,
-			DTH_INVALID_ID, "deadline idx");
-
-	if (idx == 0) {
-		DISPATCH_TIMER_ASSERT(dth->dth_min[DTH_TARGET_ID], ==, dt,
-				"target slot");
-		DISPATCH_TIMER_ASSERT(dth->dth_min[DTH_DEADLINE_ID], ==, dt,
-				"deadline slot");
-		dth->dth_min[DTH_TARGET_ID] = dth->dth_min[DTH_DEADLINE_ID] = NULL;
-		goto clear_heap_entry;
-	}
-
-	for (uint32_t heap_id = 0; heap_id < DTH_ID_COUNT; heap_id++) {
-		dispatch_timer_source_refs_t *slot, last_dt;
-		slot = _dispatch_timer_heap_get_slot(dth, idx + heap_id);
-		last_dt = *slot; *slot = NULL;
-		if (last_dt != dt) {
-			uint32_t removed_idx = dt->dt_heap_entry[heap_id];
-			_dispatch_timer_heap_resift(dth, last_dt, removed_idx);
-		}
-	}
-	if (unlikely(idx <= _dispatch_timer_heap_capacity(dth->dth_segments - 1))) {
-		_dispatch_timer_heap_shrink(dth);
-	}
-
-clear_heap_entry:
-	dt->dt_heap_entry[DTH_TARGET_ID] = DTH_INVALID_ID;
-	dt->dt_heap_entry[DTH_DEADLINE_ID] = DTH_INVALID_ID;
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline void
-_dispatch_timer_heap_update(dispatch_timer_heap_t dth,
-		dispatch_timer_source_refs_t dt)
-{
-	DISPATCH_TIMER_ASSERT(dt->dt_heap_entry[DTH_TARGET_ID], !=,
-			DTH_INVALID_ID, "target idx");
-	DISPATCH_TIMER_ASSERT(dt->dt_heap_entry[DTH_DEADLINE_ID], !=,
-			DTH_INVALID_ID, "deadline idx");
-
-
-	_dispatch_timer_heap_resift(dth, dt, dt->dt_heap_entry[DTH_TARGET_ID]);
-	_dispatch_timer_heap_resift(dth, dt, dt->dt_heap_entry[DTH_DEADLINE_ID]);
-}
-
-DISPATCH_ALWAYS_INLINE
-static bool
-_dispatch_timer_heap_has_new_min(dispatch_timer_heap_t dth,
-		uint32_t count, uint32_t mask)
-{
-	dispatch_timer_source_refs_t dt;
-	bool changed = false;
-	uint64_t tmp;
-	uint32_t tidx;
-
-	for (tidx = 0; tidx < count; tidx++) {
-		if (!(mask & (1u << tidx))) {
-			continue;
-		}
-
-		dt = dth[tidx].dth_min[DTH_TARGET_ID];
-		tmp = dt ? dt->dt_timer.target : UINT64_MAX;
-		if (dth[tidx].dth_target != tmp) {
-			dth[tidx].dth_target = tmp;
-			changed = true;
-		}
-		dt = dth[tidx].dth_min[DTH_DEADLINE_ID];
-		tmp = dt ? dt->dt_timer.deadline : UINT64_MAX;
-		if (dth[tidx].dth_deadline != tmp) {
-			dth[tidx].dth_deadline = tmp;
-			changed = true;
-		}
-	}
-	return changed;
-}
-
-static inline void
-_dispatch_timers_unregister(dispatch_timer_source_refs_t dt)
-{
-	uint32_t tidx = dt->du_ident;
-	dispatch_timer_heap_t heap = &_dispatch_timers_heap[tidx];
-
-	_dispatch_timer_heap_remove(heap, dt);
-	_dispatch_timers_reconfigure = true;
-	_dispatch_timers_processing_mask |= 1 << tidx;
-	dispatch_assert(dt->du_wlh == NULL || dt->du_wlh == DISPATCH_WLH_ANON);
-	dt->du_wlh = NULL;
-}
-
-static inline void
-_dispatch_timers_register(dispatch_timer_source_refs_t dt, uint32_t tidx)
-{
-	dispatch_timer_heap_t heap = &_dispatch_timers_heap[tidx];
-	if (_dispatch_unote_registered(dt)) {
-		DISPATCH_TIMER_ASSERT(dt->du_ident, ==, tidx, "tidx");
-		_dispatch_timer_heap_update(heap, dt);
-	} else {
-		dt->du_ident = tidx;
-		_dispatch_timer_heap_insert(heap, dt);
-	}
-	_dispatch_timers_reconfigure = true;
-	_dispatch_timers_processing_mask |= 1 << tidx;
-	dispatch_assert(dt->du_wlh == NULL || dt->du_wlh == DISPATCH_WLH_ANON);
-	dt->du_wlh = DISPATCH_WLH_ANON;
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline bool
-_dispatch_source_timer_tryarm(dispatch_source_t ds)
-{
-	dispatch_queue_flags_t oqf, nqf;
-	return os_atomic_rmw_loop2o(ds, dq_atomic_flags, oqf, nqf, relaxed, {
-		if (oqf & (DSF_CANCELED | DQF_RELEASED)) {
-			// do not install a cancelled timer
-			os_atomic_rmw_loop_give_up(break);
-		}
-		nqf = oqf | DSF_ARMED;
-	});
-}
-
-// Updates the ordered list of timers based on next fire date for changes to ds.
-// Should only be called from the context of _dispatch_mgr_q.
-static void
-_dispatch_timers_update(dispatch_unote_t du, uint32_t flags)
-{
-	dispatch_timer_source_refs_t dr = du._dt;
-	dispatch_source_t ds = _dispatch_source_from_refs(dr);
-	const char *verb = "updated";
-	bool will_register, disarm = false;
-
-	DISPATCH_ASSERT_ON_MANAGER_QUEUE();
-
-	if (unlikely(dr->du_ident == DISPATCH_TIMER_IDENT_CANCELED)) {
-		dispatch_assert((flags & DISPATCH_TIMERS_RETAIN_2) == 0);
-		return;
-	}
-
-	// Unregister timers that are unconfigured, disabled, suspended or have
-	// missed intervals. Rearm after dispatch_set_timer(), resume or source
-	// invoke will reenable them
-	will_register = !(flags & DISPATCH_TIMERS_UNREGISTER) &&
-			dr->dt_timer.target < INT64_MAX &&
-			!os_atomic_load2o(ds, ds_pending_data, relaxed) &&
-			!DISPATCH_QUEUE_IS_SUSPENDED(ds) &&
-			!os_atomic_load2o(dr, dt_pending_config, relaxed);
-	if (likely(!_dispatch_unote_registered(dr))) {
-		dispatch_assert((flags & DISPATCH_TIMERS_RETAIN_2) == 0);
-		if (unlikely(!will_register || !_dispatch_source_timer_tryarm(ds))) {
-			return;
-		}
-		verb = "armed";
-	} else if (unlikely(!will_register)) {
-		disarm = true;
-		verb = "disarmed";
-	}
-
-	// The heap owns a +2 on dispatch sources it references
-	//
-	// _dispatch_timers_run2() also sometimes passes DISPATCH_TIMERS_RETAIN_2
-	// when it wants to take over this +2 at the same time we are unregistering
-	// the timer from the heap.
-	//
-	// Compute our refcount balance according to these rules, if our balance
-	// would become negative we retain the source upfront, if it is positive, we
-	// get rid of the extraneous refcounts after we're done touching the source.
-	int refs = will_register ? -2 : 0;
-	if (_dispatch_unote_registered(dr) && !(flags & DISPATCH_TIMERS_RETAIN_2)) {
-		refs += 2;
-	}
-	if (refs < 0) {
-		dispatch_assert(refs == -2);
-		_dispatch_retain_2(ds);
-	}
-
-	uint32_t tidx = _dispatch_source_timer_idx(dr);
-	if (unlikely(_dispatch_unote_registered(dr) &&
-			(!will_register || dr->du_ident != tidx))) {
-		_dispatch_timers_unregister(dr);
-	}
-	if (likely(will_register)) {
-		_dispatch_timers_register(dr, tidx);
-	}
-
-	if (disarm) {
-		_dispatch_queue_atomic_flags_clear(ds->_as_dq, DSF_ARMED);
-	}
-	_dispatch_debug("kevent-source[%p]: %s timer[%p]", ds, verb, dr);
-	_dispatch_object_debug(ds, "%s", __func__);
-	if (refs > 0) {
-		dispatch_assert(refs == 2);
-		_dispatch_release_2_tailcall(ds);
-	}
-}
-
-#define DISPATCH_TIMER_MISSED_MARKER  1ul
-
-DISPATCH_ALWAYS_INLINE
-static inline unsigned long
-_dispatch_source_timer_compute_missed(dispatch_timer_source_refs_t dt,
-		uint64_t now, unsigned long prev)
-{
-	uint64_t missed = (now - dt->dt_timer.target) / dt->dt_timer.interval;
-	if (++missed + prev > LONG_MAX) {
-		missed = LONG_MAX - prev;
-	}
-	if (dt->dt_timer.interval < INT64_MAX) {
-		uint64_t push_by = missed * dt->dt_timer.interval;
-		dt->dt_timer.target += push_by;
-		dt->dt_timer.deadline += push_by;
-	} else {
-		dt->dt_timer.target = UINT64_MAX;
-		dt->dt_timer.deadline = UINT64_MAX;
-	}
-	prev += missed;
-	return prev;
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline unsigned long
-_dispatch_source_timer_data(dispatch_source_t ds, dispatch_unote_t du)
-{
-	dispatch_timer_source_refs_t dr = du._dt;
-	uint64_t data, prev, clear_prev = 0;
-
-	os_atomic_rmw_loop2o(ds, ds_pending_data, prev, clear_prev, relaxed, {
-		data = prev >> 1;
-		if (unlikely(prev & DISPATCH_TIMER_MISSED_MARKER)) {
-			os_atomic_rmw_loop_give_up(goto handle_missed_intervals);
-		}
-	});
-	return (unsigned long)data;
-
-handle_missed_intervals:
-	// The timer may be in _dispatch_source_invoke2() already for other
-	// reasons such as running the registration handler when ds_pending_data
-	// is changed by _dispatch_timers_run2() without holding the drain lock.
-	//
-	// We hence need dependency ordering to pair with the release barrier
-	// done by _dispatch_timers_run2() when setting the MISSED_MARKER bit.
-	os_atomic_thread_fence(dependency);
-	dr = os_atomic_force_dependency_on(dr, data);
-
-	uint64_t now = _dispatch_time_now(DISPATCH_TIMER_CLOCK(dr->du_ident));
-	if (now >= dr->dt_timer.target) {
-		OS_COMPILER_CAN_ASSUME(dr->dt_timer.interval < INT64_MAX);
-		data = _dispatch_source_timer_compute_missed(dr, now, (unsigned long)data);
-	}
-
-	// When we see the MISSED_MARKER the manager has given up on this timer
-	// and expects the handler to call "resume".
-	//
-	// However, it may not have reflected this into the atomic flags yet
-	// so make sure _dispatch_source_invoke2() sees the timer is disarmed
-	//
-	// The subsequent _dispatch_source_refs_resume() will enqueue the source
-	// on the manager and make the changes to `ds_timer` above visible.
-	_dispatch_queue_atomic_flags_clear(ds->_as_dq, DSF_ARMED);
-	os_atomic_store2o(ds, ds_pending_data, 0, relaxed);
-	return (unsigned long)data;
-}
-
-static inline void
-_dispatch_timers_run2(dispatch_clock_now_cache_t nows, uint32_t tidx)
-{
-	dispatch_timer_source_refs_t dr;
-	dispatch_source_t ds;
-	uint64_t data, pending_data;
-	uint64_t now = _dispatch_time_now_cached(DISPATCH_TIMER_CLOCK(tidx), nows);
-
-	while ((dr = _dispatch_timers_heap[tidx].dth_min[DTH_TARGET_ID])) {
-		DISPATCH_TIMER_ASSERT(dr->du_filter, ==, DISPATCH_EVFILT_TIMER,
-				"invalid filter");
-		DISPATCH_TIMER_ASSERT(dr->du_ident, ==, tidx, "tidx");
-		DISPATCH_TIMER_ASSERT(dr->dt_timer.target, !=, 0, "missing target");
-		ds = _dispatch_source_from_refs(dr);
-		if (dr->dt_timer.target > now) {
-			// Done running timers for now.
-			break;
-		}
-		if (dr->du_fflags & DISPATCH_TIMER_AFTER) {
-			_dispatch_trace_timer_fire(dr, 1, 1);
-			_dispatch_source_merge_evt(dr, EV_ONESHOT, 1, 0, 0);
-			_dispatch_debug("kevent-source[%p]: fired after timer[%p]", ds, dr);
-			_dispatch_object_debug(ds, "%s", __func__);
-			continue;
-		}
-
-		data = os_atomic_load2o(ds, ds_pending_data, relaxed);
-		if (unlikely(data)) {
-			// the release barrier is required to make the changes
-			// to `ds_timer` visible to _dispatch_source_timer_data()
-			if (os_atomic_cmpxchg2o(ds, ds_pending_data, data,
-					data | DISPATCH_TIMER_MISSED_MARKER, release)) {
-				_dispatch_timers_update(dr, DISPATCH_TIMERS_UNREGISTER);
-				continue;
-			}
-		}
-
-		data = _dispatch_source_timer_compute_missed(dr, now, 0);
-		_dispatch_timers_update(dr, DISPATCH_TIMERS_RETAIN_2);
-		pending_data = data << 1;
-		if (!_dispatch_unote_registered(dr) && dr->dt_timer.target < INT64_MAX){
-			// if we unregistered because of suspension we have to fake we
-			// missed events.
-			pending_data |= DISPATCH_TIMER_MISSED_MARKER;
-			os_atomic_store2o(ds, ds_pending_data, pending_data, release);
-		} else {
-			os_atomic_store2o(ds, ds_pending_data, pending_data, relaxed);
-		}
-		_dispatch_trace_timer_fire(dr, data, data);
-		_dispatch_debug("kevent-source[%p]: fired timer[%p]", ds, dr);
-		_dispatch_object_debug(ds, "%s", __func__);
-		dx_wakeup(ds, 0, DISPATCH_WAKEUP_MAKE_DIRTY | DISPATCH_WAKEUP_CONSUME_2);
-	}
-}
-
-DISPATCH_NOINLINE
-static void
-_dispatch_timers_run(dispatch_clock_now_cache_t nows)
-{
-	uint32_t tidx;
-	for (tidx = 0; tidx < DISPATCH_TIMER_COUNT; tidx++) {
-		if (_dispatch_timers_heap[tidx].dth_count) {
-			_dispatch_timers_run2(nows, tidx);
-		}
-	}
-}
-
-#if DISPATCH_HAVE_TIMER_COALESCING
-#define DISPATCH_KEVENT_COALESCING_WINDOW_INIT(qos, ms) \
-		[DISPATCH_TIMER_QOS_##qos] = 2ull * (ms) * NSEC_PER_MSEC
-
-static const uint64_t _dispatch_kevent_coalescing_window[] = {
-	DISPATCH_KEVENT_COALESCING_WINDOW_INIT(NORMAL, 75),
-#if DISPATCH_HAVE_TIMER_QOS
-	DISPATCH_KEVENT_COALESCING_WINDOW_INIT(CRITICAL, 1),
-	DISPATCH_KEVENT_COALESCING_WINDOW_INIT(BACKGROUND, 100),
-#endif
-};
-#endif // DISPATCH_HAVE_TIMER_COALESCING
-
-static inline dispatch_timer_delay_s
-_dispatch_timers_get_delay(dispatch_timer_heap_t dth, dispatch_clock_t clock,
-		uint32_t qos, dispatch_clock_now_cache_t nows)
-{
-	uint64_t target = dth->dth_target, deadline = dth->dth_deadline;
-	uint64_t delta = INT64_MAX, dldelta = INT64_MAX;
-	dispatch_timer_delay_s rc;
-
-	dispatch_assert(target <= deadline);
-	if (delta == 0 || target >= INT64_MAX) {
-		goto done;
-	}
-
-	if (qos < DISPATCH_TIMER_QOS_COUNT && dth->dth_count > 2) {
-#if DISPATCH_HAVE_TIMER_COALESCING
-		// Timer pre-coalescing <rdar://problem/13222034>
-		// When we have several timers with this target/deadline bracket:
-		//
-		//      Target        window  Deadline
-		//        V           <-------V
-		// t1:    [...........|.................]
-		// t2:         [......|.......]
-		// t3:             [..|..........]
-		// t4:                | [.............]
-		//                 ^
-		//          Optimal Target
-		//
-		// Coalescing works better if the Target is delayed to "Optimal", by
-		// picking the latest target that isn't too close to the deadline.
-		uint64_t window = _dispatch_kevent_coalescing_window[qos];
-		if (target + window < deadline) {
-			uint64_t latest = deadline - window;
-			target = _dispatch_timer_heap_max_target_before(dth, latest);
-		}
-#endif
-	}
-
-	uint64_t now = _dispatch_time_now_cached(clock, nows);
-	if (target <= now) {
-		delta = 0;
-		dldelta = 0;
-		goto done;
-	}
-
-	uint64_t tmp = target - now;
-	if (clock != DISPATCH_CLOCK_WALL) {
-		tmp = _dispatch_time_mach2nano(tmp);
-	}
-	if (tmp < delta) {
-		delta = tmp;
-	}
-
-	tmp = deadline - now;
-	if (clock != DISPATCH_CLOCK_WALL) {
-		tmp = _dispatch_time_mach2nano(tmp);
-	}
-	if (tmp < dldelta) {
-		dldelta = tmp;
-	}
-
-done:
-	rc.delay = delta;
-	rc.leeway = delta < INT64_MAX ? dldelta - delta : INT64_MAX;
-	return rc;
-}
-
-static bool
-_dispatch_timers_program2(dispatch_clock_now_cache_t nows, uint32_t tidx)
-{
-	uint32_t qos = DISPATCH_TIMER_QOS(tidx);
-	dispatch_clock_t clock = DISPATCH_TIMER_CLOCK(tidx);
-	dispatch_timer_heap_t heap = &_dispatch_timers_heap[tidx];
-	dispatch_timer_delay_s range;
-
-	range = _dispatch_timers_get_delay(heap, clock, qos, nows);
-	if (range.delay == 0 || range.delay >= INT64_MAX) {
-		_dispatch_trace_next_timer_set(NULL, qos);
-		if (heap->dth_flags & DTH_ARMED) {
-			_dispatch_event_loop_timer_delete(tidx);
-		}
-		return range.delay == 0;
-	}
-
-	_dispatch_trace_next_timer_set(heap->dth_min[DTH_TARGET_ID], qos);
-	_dispatch_trace_next_timer_program(range.delay, qos);
-	_dispatch_event_loop_timer_arm(tidx, range, nows);
-	return false;
-}
-
-DISPATCH_NOINLINE
-static bool
-_dispatch_timers_program(dispatch_clock_now_cache_t nows)
-{
-	bool poll = false;
-	uint32_t tidx, timerm = _dispatch_timers_processing_mask;
-
-	for (tidx = 0; tidx < DISPATCH_TIMER_COUNT; tidx++) {
-		if (timerm & (1 << tidx)) {
-			poll |= _dispatch_timers_program2(nows, tidx);
-		}
-	}
-	return poll;
-}
-
-DISPATCH_NOINLINE
-static bool
-_dispatch_timers_configure(void)
-{
-	// Find out if there is a new target/deadline on the timer lists
-	return _dispatch_timer_heap_has_new_min(_dispatch_timers_heap,
-			countof(_dispatch_timers_heap), _dispatch_timers_processing_mask);
-}
-
-static inline bool
-_dispatch_mgr_timers(void)
-{
-	dispatch_clock_now_cache_s nows = { };
-	bool expired = _dispatch_timers_expired;
-	if (unlikely(expired)) {
-		_dispatch_timers_run(&nows);
-	}
-	_dispatch_mgr_trace_timers_wakes();
-	bool reconfigure = _dispatch_timers_reconfigure;
-	if (unlikely(reconfigure || expired)) {
-		if (reconfigure) {
-			reconfigure = _dispatch_timers_configure();
-			_dispatch_timers_reconfigure = false;
-		}
-		if (reconfigure || expired) {
-			expired = _dispatch_timers_expired = _dispatch_timers_program(&nows);
-		}
-		_dispatch_timers_processing_mask = 0;
-	}
-	return expired;
-}
-
-#pragma mark -
-#pragma mark dispatch_mgr
-
-void
-_dispatch_mgr_queue_push(dispatch_queue_t dq, dispatch_object_t dou,
-		DISPATCH_UNUSED dispatch_qos_t qos)
-{
-	uint64_t dq_state;
-	_dispatch_trace_continuation_push(dq, dou._do);
-	if (unlikely(_dispatch_queue_push_update_tail(dq, dou._do))) {
-		_dispatch_queue_push_update_head(dq, dou._do);
-		dq_state = os_atomic_or2o(dq, dq_state, DISPATCH_QUEUE_DIRTY, release);
-		if (!_dq_state_drain_locked_by_self(dq_state)) {
-			_dispatch_event_loop_poke(DISPATCH_WLH_MANAGER, 0, 0);
-		}
-	}
-}
-
-DISPATCH_NORETURN
-void
-_dispatch_mgr_queue_wakeup(DISPATCH_UNUSED dispatch_queue_t dq,
-		DISPATCH_UNUSED dispatch_qos_t qos,
-		DISPATCH_UNUSED dispatch_wakeup_flags_t flags)
-{
-	DISPATCH_INTERNAL_CRASH(0, "Don't try to wake up or override the manager");
-}
-
-#if DISPATCH_USE_MGR_THREAD
-DISPATCH_NOINLINE DISPATCH_NORETURN
-static void
-_dispatch_mgr_invoke(void)
-{
-#if DISPATCH_EVENT_BACKEND_KEVENT
-	dispatch_kevent_s evbuf[DISPATCH_DEFERRED_ITEMS_EVENT_COUNT];
-#endif
-	dispatch_deferred_items_s ddi = {
-#if DISPATCH_EVENT_BACKEND_KEVENT
-		.ddi_maxevents = DISPATCH_DEFERRED_ITEMS_EVENT_COUNT,
-		.ddi_eventlist = evbuf,
-#endif
-	};
-	bool poll;
-
-	_dispatch_deferred_items_set(&ddi);
-	for (;;) {
-		_dispatch_mgr_queue_drain();
-		poll = _dispatch_mgr_timers();
-		poll = poll || _dispatch_queue_class_probe(&_dispatch_mgr_q);
-		_dispatch_event_loop_drain(poll ? KEVENT_FLAG_IMMEDIATE : 0);
-	}
-}
-#endif // DISPATCH_USE_MGR_THREAD
-
-DISPATCH_NORETURN
-void
-_dispatch_mgr_thread(dispatch_queue_t dq DISPATCH_UNUSED,
-		dispatch_invoke_context_t dic DISPATCH_UNUSED,
-		dispatch_invoke_flags_t flags DISPATCH_UNUSED)
-{
-#if DISPATCH_USE_KEVENT_WORKQUEUE
-	if (_dispatch_kevent_workqueue_enabled) {
-		DISPATCH_INTERNAL_CRASH(0, "Manager queue invoked with "
-				"kevent workqueue enabled");
-	}
-#endif
-#if DISPATCH_USE_MGR_THREAD
-	_dispatch_queue_set_current(&_dispatch_mgr_q);
-	_dispatch_mgr_priority_init();
-	_dispatch_queue_mgr_lock(&_dispatch_mgr_q);
-	// never returns, so burn bridges behind us & clear stack 2k ahead
-	_dispatch_clear_stack(2048);
-	_dispatch_mgr_invoke();
-#endif
-}
-
-#if DISPATCH_USE_KEVENT_WORKQUEUE
-
-#define DISPATCH_KEVENT_WORKER_IS_NOT_MANAGER ((dispatch_priority_t)~0u)
-
-_Static_assert(WORKQ_KEVENT_EVENT_BUFFER_LEN >=
-		DISPATCH_DEFERRED_ITEMS_EVENT_COUNT,
-		"our list should not be longer than the kernel's");
-
-DISPATCH_ALWAYS_INLINE
-static inline dispatch_priority_t
-_dispatch_wlh_worker_thread_init(dispatch_wlh_t wlh,
-		dispatch_deferred_items_t ddi)
-{
-	dispatch_assert(wlh);
-	dispatch_priority_t old_dbp;
-
-	pthread_priority_t pp = _dispatch_get_priority();
-	if (!(pp & _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG)) {
-		// If this thread does not have the event manager flag set, don't setup
-		// as the dispatch manager and let the caller know to only process
-		// the delivered events.
-		//
-		// Also add the NEEDS_UNBIND flag so that
-		// _dispatch_priority_compute_update knows it has to unbind
-		pp &= _PTHREAD_PRIORITY_OVERCOMMIT_FLAG | ~_PTHREAD_PRIORITY_FLAGS_MASK;
-		if (wlh == DISPATCH_WLH_ANON) {
-			pp |= _PTHREAD_PRIORITY_NEEDS_UNBIND_FLAG;
-		} else {
-			// pthread sets the flag when it is an event delivery thread
-			// so we need to explicitly clear it
-			pp &= ~(pthread_priority_t)_PTHREAD_PRIORITY_NEEDS_UNBIND_FLAG;
-		}
-		_dispatch_thread_setspecific(dispatch_priority_key,
-				(void *)(uintptr_t)pp);
-		if (wlh != DISPATCH_WLH_ANON) {
-			_dispatch_debug("wlh[%p]: handling events", wlh);
-		} else {
-			ddi->ddi_can_stash = true;
-		}
-		return DISPATCH_KEVENT_WORKER_IS_NOT_MANAGER;
-	}
-
-	if ((pp & _PTHREAD_PRIORITY_SCHED_PRI_FLAG) ||
-			!(pp & ~_PTHREAD_PRIORITY_FLAGS_MASK)) {
-		// When the phtread kext is delivering kevents to us, and pthread
-		// root queues are in use, then the pthread priority TSD is set
-		// to a sched pri with the _PTHREAD_PRIORITY_SCHED_PRI_FLAG bit set.
-		//
-		// Given that this isn't a valid QoS we need to fixup the TSD,
-		// and the best option is to clear the qos/priority bits which tells
-		// us to not do any QoS related calls on this thread.
-		//
-		// However, in that case the manager thread is opted out of QoS,
-		// as far as pthread is concerned, and can't be turned into
-		// something else, so we can't stash.
-		pp &= (pthread_priority_t)_PTHREAD_PRIORITY_FLAGS_MASK;
-	}
-	// Managers always park without mutating to a regular worker thread, and
-	// hence never need to unbind from userland, and when draining a manager,
-	// the NEEDS_UNBIND flag would cause the mutation to happen.
-	// So we need to strip this flag
-	pp &= ~(pthread_priority_t)_PTHREAD_PRIORITY_NEEDS_UNBIND_FLAG;
-	_dispatch_thread_setspecific(dispatch_priority_key, (void *)(uintptr_t)pp);
-
-	// ensure kevents registered from this thread are registered at manager QoS
-	old_dbp = _dispatch_set_basepri(DISPATCH_PRIORITY_FLAG_MANAGER);
-	_dispatch_queue_set_current(&_dispatch_mgr_q);
-	_dispatch_queue_mgr_lock(&_dispatch_mgr_q);
-	return old_dbp;
-}
-
-DISPATCH_ALWAYS_INLINE DISPATCH_WARN_RESULT
-static inline bool
-_dispatch_wlh_worker_thread_reset(dispatch_priority_t old_dbp)
-{
-	bool needs_poll = _dispatch_queue_mgr_unlock(&_dispatch_mgr_q);
-	_dispatch_reset_basepri(old_dbp);
-	_dispatch_reset_basepri_override();
-	_dispatch_queue_set_current(NULL);
-	return needs_poll;
-}
-
-DISPATCH_ALWAYS_INLINE
-static void
-_dispatch_wlh_worker_thread(dispatch_wlh_t wlh, dispatch_kevent_t events,
-		int *nevents)
-{
-	_dispatch_introspection_thread_add();
-	DISPATCH_PERF_MON_VAR_INIT
-
-	dispatch_deferred_items_s ddi = {
-		.ddi_eventlist = events,
-	};
-	dispatch_priority_t old_dbp;
-
-	old_dbp = _dispatch_wlh_worker_thread_init(wlh, &ddi);
-	if (old_dbp == DISPATCH_KEVENT_WORKER_IS_NOT_MANAGER) {
-		_dispatch_perfmon_start_impl(true);
-	} else {
-		dispatch_assert(wlh == DISPATCH_WLH_ANON);
-		wlh = DISPATCH_WLH_ANON;
-	}
-	_dispatch_deferred_items_set(&ddi);
-	_dispatch_event_loop_merge(events, *nevents);
-
-	if (old_dbp != DISPATCH_KEVENT_WORKER_IS_NOT_MANAGER) {
-		_dispatch_mgr_queue_drain();
-		bool poll = _dispatch_mgr_timers();
-		if (_dispatch_wlh_worker_thread_reset(old_dbp)) {
-			poll = true;
-		}
-		if (poll) _dispatch_event_loop_poke(DISPATCH_WLH_MANAGER, 0, 0);
-	} else if (ddi.ddi_stashed_dou._do) {
-		_dispatch_debug("wlh[%p]: draining deferred item %p", wlh,
-				ddi.ddi_stashed_dou._do);
-		if (wlh == DISPATCH_WLH_ANON) {
-			dispatch_assert(ddi.ddi_nevents == 0);
-			_dispatch_deferred_items_set(NULL);
-			_dispatch_root_queue_drain_deferred_item(&ddi
-					DISPATCH_PERF_MON_ARGS);
-		} else {
-			_dispatch_root_queue_drain_deferred_wlh(&ddi
-					DISPATCH_PERF_MON_ARGS);
-		}
-	}
-
-	_dispatch_deferred_items_set(NULL);
-	if (old_dbp == DISPATCH_KEVENT_WORKER_IS_NOT_MANAGER &&
-			!ddi.ddi_stashed_dou._do) {
-		_dispatch_perfmon_end(perfmon_thread_event_no_steal);
-	}
-	_dispatch_debug("returning %d deferred kevents", ddi.ddi_nevents);
-	*nevents = ddi.ddi_nevents;
-}
-
-DISPATCH_NOINLINE
-void
-_dispatch_kevent_worker_thread(dispatch_kevent_t *events, int *nevents)
-{
-	if (!events && !nevents) {
-		// events for worker thread request have already been delivered earlier
-		return;
-	}
-	if (!dispatch_assume(*nevents && *events)) return;
-	_dispatch_adopt_wlh_anon();
-	_dispatch_wlh_worker_thread(DISPATCH_WLH_ANON, *events, nevents);
-	_dispatch_reset_wlh();
-}
-
-
-#endif // DISPATCH_USE_KEVENT_WORKQUEUE
-#pragma mark -
 #pragma mark dispatch_source_debug
 
+DISPATCH_COLD
 static size_t
 _dispatch_source_debug_attr(dispatch_source_t ds, char* buf, size_t bufsiz)
 {
 	dispatch_queue_t target = ds->do_targetq;
 	dispatch_source_refs_t dr = ds->ds_refs;
+	dispatch_queue_flags_t dqf = _dispatch_queue_atomic_flags(ds);
+	dispatch_unote_state_t du_state = _dispatch_unote_state(dr);
 	return dsnprintf(buf, bufsiz, "target = %s[%p], ident = 0x%x, "
 			"mask = 0x%x, pending_data = 0x%llx, registered = %d, "
-			"armed = %d, deleted = %d%s, canceled = %d, ",
+			"armed = %d, %s%s%s",
 			target && target->dq_label ? target->dq_label : "", target,
-			dr->du_ident, dr->du_fflags, (unsigned long long)ds->ds_pending_data,
-			ds->ds_is_installed, (bool)(ds->dq_atomic_flags & DSF_ARMED),
-			(bool)(ds->dq_atomic_flags & DSF_DELETED),
-			(ds->dq_atomic_flags & DSF_DEFERRED_DELETE) ? " (pending)" : "",
-			(bool)(ds->dq_atomic_flags & DSF_CANCELED));
+			dr->du_ident, dr->du_fflags, (unsigned long long)dr->ds_pending_data,
+			_du_state_registered(du_state), _du_state_armed(du_state),
+			(dqf & DSF_CANCELED) ? "cancelled, " : "",
+			(dqf & DSF_NEEDS_EVENT) ? "needs-event, " : "",
+			(dqf & DSF_DELETED) ? "deleted, " : "");
 }
 
+DISPATCH_COLD
 static size_t
 _dispatch_timer_debug_attr(dispatch_source_t ds, char* buf, size_t bufsiz)
 {
@@ -2531,7 +1418,7 @@
 			", interval = 0x%llx, flags = 0x%x }, ",
 			(unsigned long long)dr->dt_timer.target,
 			(unsigned long long)dr->dt_timer.deadline,
-			(unsigned long long)dr->dt_timer.interval, dr->du_fflags);
+			(unsigned long long)dr->dt_timer.interval, dr->du_timer_flags);
 }
 
 size_t
@@ -2540,7 +1427,7 @@
 	dispatch_source_refs_t dr = ds->ds_refs;
 	size_t offset = 0;
 	offset += dsnprintf(&buf[offset], bufsiz - offset, "%s[%p] = { ",
-			dx_kind(ds), ds);
+			_dispatch_object_class_name(ds), ds);
 	offset += _dispatch_object_debug_attr(ds, &buf[offset], bufsiz - offset);
 	offset += _dispatch_source_debug_attr(ds, &buf[offset], bufsiz - offset);
 	if (dr->du_is_timer) {
@@ -2548,6 +1435,6 @@
 	}
 	offset += dsnprintf(&buf[offset], bufsiz - offset, "kevent = %p%s, "
 			"filter = %s }", dr,  dr->du_is_direct ? " (direct)" : "",
-			dr->du_type->dst_kind);
+			dux_type(dr)->dst_kind);
 	return offset;
 }
diff --git a/src/source_internal.h b/src/source_internal.h
index 000c540..f38c2e9 100644
--- a/src/source_internal.h
+++ b/src/source_internal.h
@@ -32,100 +32,41 @@
 #include <dispatch/base.h> // for HeaderDoc
 #endif
 
-enum {
-	/* DISPATCH_TIMER_STRICT 0x1 */
-	/* DISPATCH_TIMER_BACKGROUND = 0x2, */
-	DISPATCH_TIMER_CLOCK_MACH = 0x4,
-	DISPATCH_TIMER_INTERVAL = 0x8,
-	DISPATCH_TIMER_AFTER = 0x10,
-	/* DISPATCH_INTERVAL_UI_ANIMATION = 0x20 */
-};
+_OS_OBJECT_CLASS_IMPLEMENTS_PROTOCOL(dispatch_source, dispatch_object)
+DISPATCH_CLASS_DECL_BARE(source, QUEUE);
 
-DISPATCH_ALWAYS_INLINE
-static inline unsigned int
-_dispatch_source_timer_idx(dispatch_unote_t du)
-{
-	uint32_t clock, qos = 0, fflags = du._dt->du_fflags;
-
-	dispatch_assert(DISPATCH_CLOCK_MACH == 1);
-	dispatch_assert(DISPATCH_CLOCK_WALL == 0);
-	clock = (fflags & DISPATCH_TIMER_CLOCK_MACH) / DISPATCH_TIMER_CLOCK_MACH;
-
-#if DISPATCH_HAVE_TIMER_QOS
-	dispatch_assert(DISPATCH_TIMER_STRICT == DISPATCH_TIMER_QOS_CRITICAL);
-	dispatch_assert(DISPATCH_TIMER_BACKGROUND == DISPATCH_TIMER_QOS_BACKGROUND);
-	qos = fflags & (DISPATCH_TIMER_STRICT | DISPATCH_TIMER_BACKGROUND);
-	// flags are normalized so this should never happen
-	dispatch_assert(qos < DISPATCH_TIMER_QOS_COUNT);
-#endif
-
-	return DISPATCH_TIMER_INDEX(clock, qos);
-}
-
-#define _DISPATCH_SOURCE_HEADER(refs) \
-	DISPATCH_QUEUE_HEADER(refs); \
-	unsigned int \
+#define DISPATCH_SOURCE_CLASS_HEADER(x) \
+	DISPATCH_LANE_CLASS_HEADER(x); \
+	uint16_t \
+		/* set under the drain lock */ \
 		ds_is_installed:1, \
-		dm_needs_mgr:1, \
 		dm_connect_handler_called:1, \
-		dm_uninstalled:1, \
 		dm_cancel_handler_called:1, \
-		dm_is_xpc:1
+		dm_is_xpc:1, \
+		__ds_flags_pad : 12; \
+	uint16_t __dq_flags_separation[0]; \
+	uint16_t \
+		/* set under the send queue lock */ \
+		dm_needs_mgr:1, \
+		dm_disconnected:1, \
+		__dm_flags_pad : 14
 
-#define DISPATCH_SOURCE_HEADER(refs) \
-	struct dispatch_source_s _as_ds[0]; \
-	_DISPATCH_SOURCE_HEADER(refs)
-
-DISPATCH_CLASS_DECL_BARE(source);
-_OS_OBJECT_CLASS_IMPLEMENTS_PROTOCOL(dispatch_source, dispatch_object);
-
-#ifndef __cplusplus
 struct dispatch_source_s {
-	_DISPATCH_SOURCE_HEADER(source);
-	uint64_t ds_data DISPATCH_ATOMIC64_ALIGN;
-	uint64_t ds_pending_data DISPATCH_ATOMIC64_ALIGN;
+	DISPATCH_SOURCE_CLASS_HEADER(source);
 } DISPATCH_ATOMIC64_ALIGN;
+dispatch_assert_valid_lane_type(dispatch_source_s);
+dispatch_static_assert(sizeof(struct dispatch_source_s) <= 128);
 
-// Extracts source data from the ds_data field
-#define DISPATCH_SOURCE_GET_DATA(d) ((d) & 0xFFFFFFFF)
-
-// Extracts status from the ds_data field
-#define DISPATCH_SOURCE_GET_STATUS(d) ((d) >> 32)
-
-// Combine data and status for the ds_data field
-#define DISPATCH_SOURCE_COMBINE_DATA_AND_STATUS(data, status) \
-		((((uint64_t)(status)) << 32) | (data))
-
-#endif // __cplusplus
-
-void _dispatch_source_refs_register(dispatch_source_t ds,
-		dispatch_wlh_t wlh, dispatch_priority_t bp);
-void _dispatch_source_refs_unregister(dispatch_source_t ds, uint32_t options);
 void _dispatch_source_xref_dispose(dispatch_source_t ds);
 void _dispatch_source_dispose(dispatch_source_t ds, bool *allow_free);
-void _dispatch_source_finalize_activation(dispatch_source_t ds,
-		bool *allow_resume);
+void _dispatch_source_activate(dispatch_source_t ds, bool *allow_resume);
 void _dispatch_source_invoke(dispatch_source_t ds,
 		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags);
 void _dispatch_source_wakeup(dispatch_source_t ds, dispatch_qos_t qos,
 		dispatch_wakeup_flags_t flags);
 void _dispatch_source_merge_evt(dispatch_unote_t du, uint32_t flags,
-		uintptr_t data, uintptr_t status, pthread_priority_t pp);
+		uintptr_t data, pthread_priority_t pp);
+DISPATCH_COLD
 size_t _dispatch_source_debug(dispatch_source_t ds, char* buf, size_t bufsiz);
 
-DISPATCH_EXPORT // for firehose server
-void _dispatch_source_merge_data(dispatch_source_t ds, pthread_priority_t pp,
-		uintptr_t val);
-
-void _dispatch_mgr_queue_push(dispatch_queue_t dq, dispatch_object_t dou,
-		dispatch_qos_t qos);
-void _dispatch_mgr_queue_wakeup(dispatch_queue_t dq, dispatch_qos_t qos,
-		dispatch_wakeup_flags_t flags);
-void _dispatch_mgr_thread(dispatch_queue_t dq, dispatch_invoke_context_t dic,
-		dispatch_invoke_flags_t flags);
-#if DISPATCH_USE_KEVENT_WORKQUEUE
-void _dispatch_kevent_worker_thread(dispatch_kevent_t *events,
-		int *nevents);
-#endif // DISPATCH_USE_KEVENT_WORKQUEUE
-
 #endif /* __DISPATCH_SOURCE_INTERNAL__ */
diff --git a/src/time.c b/src/time.c
index 5b0bab0..b70f813 100644
--- a/src/time.c
+++ b/src/time.c
@@ -26,7 +26,6 @@
 	bool ratio_1_to_1;
 } _dispatch_host_time_data_s;
 
-DISPATCH_CACHELINE_ALIGN
 static _dispatch_host_time_data_s _dispatch_host_time_data;
 
 uint64_t (*_dispatch_host_time_mach2nano)(uint64_t machtime);
@@ -96,39 +95,53 @@
 	if (inval == DISPATCH_TIME_FOREVER) {
 		return DISPATCH_TIME_FOREVER;
 	}
-	if ((int64_t)inval < 0) {
+
+	dispatch_clock_t clock;
+	uint64_t value;
+	_dispatch_time_to_clock_and_value(inval, &clock, &value);
+	if (value == DISPATCH_TIME_FOREVER) {
+		// Out-of-range for this clock.
+		return value;
+	}
+	if (clock == DISPATCH_CLOCK_WALL) {
 		// wall clock
+		offset = (uint64_t)delta;
 		if (delta >= 0) {
-			offset = (uint64_t)delta;
-			if ((int64_t)(inval -= offset) >= 0) {
+			if ((int64_t)(value += offset) <= 0) {
 				return DISPATCH_TIME_FOREVER; // overflow
 			}
-			return inval;
 		} else {
-			offset = (uint64_t)-delta;
-			if ((int64_t)(inval += offset) >= -1) {
-				// -1 is special == DISPATCH_TIME_FOREVER == forever
-				return (dispatch_time_t)-2ll; // underflow
+			if ((int64_t)(value += offset) < 1) {
+				// -1 is special == DISPATCH_TIME_FOREVER == forever, so
+				// return -2 (after conversion to dispatch_time_t) instead.
+				value = 2; // underflow.
 			}
-			return inval;
 		}
+		return _dispatch_clock_and_value_to_time(DISPATCH_CLOCK_WALL, value);
 	}
-	// mach clock
-	if (inval == 0) {
-		inval = _dispatch_absolute_time();
+
+	// up time or monotonic time. "value" has the clock type removed,
+	// so the test against DISPATCH_TIME_NOW is correct for either clock.
+	if (value == DISPATCH_TIME_NOW) {
+		if (clock == DISPATCH_CLOCK_UPTIME) {
+			value = _dispatch_uptime();
+		} else {
+			dispatch_assert(clock == DISPATCH_CLOCK_MONOTONIC);
+			value = _dispatch_monotonic_time();
+		}
 	}
 	if (delta >= 0) {
 		offset = _dispatch_time_nano2mach((uint64_t)delta);
-		if ((int64_t)(inval += offset) <= 0) {
+		if ((int64_t)(value += offset) <= 0) {
 			return DISPATCH_TIME_FOREVER; // overflow
 		}
-		return inval;
+		return _dispatch_clock_and_value_to_time(clock, value);
 	} else {
 		offset = _dispatch_time_nano2mach((uint64_t)-delta);
-		if ((int64_t)(inval -= offset) < 1) {
-			return 1; // underflow
+		if ((int64_t)(value -= offset) < 1) {
+			return _dispatch_clock_and_value_to_time(clock, 1); // underflow
 		}
-		return inval;
+		return _dispatch_clock_and_value_to_time(clock, value);
 	}
 }
 
@@ -156,16 +169,25 @@
 	if (when == DISPATCH_TIME_FOREVER) {
 		return DISPATCH_TIME_FOREVER;
 	}
-	if (when == 0) {
+	if (when == DISPATCH_TIME_NOW) {
 		return 0;
 	}
-	if ((int64_t)when < 0) {
-		when = (dispatch_time_t)-(int64_t)when;
+
+	dispatch_clock_t clock;
+	uint64_t value;
+	_dispatch_time_to_clock_and_value(when, &clock, &value);
+	if (clock == DISPATCH_CLOCK_WALL) {
 		now = _dispatch_get_nanoseconds();
-		return now >= when ? 0 : when - now;
+		return now >= value ? 0 : value - now;
+	} else {
+		if (clock == DISPATCH_CLOCK_UPTIME) {
+			now = _dispatch_uptime();
+		} else {
+			dispatch_assert(clock == DISPATCH_CLOCK_MONOTONIC);
+			now = _dispatch_monotonic_time();
+		}
+		return now >= value ? 0 : _dispatch_time_mach2nano(value - now);
 	}
-	now = _dispatch_absolute_time();
-	return now >= when ? 0 : _dispatch_time_mach2nano(when - now);
 }
 
 uint64_t
@@ -178,5 +200,7 @@
 		// time in nanoseconds since the POSIX epoch already
 		return (uint64_t)-(int64_t)when;
 	}
+
+	// Up time or monotonic time.
 	return _dispatch_get_nanoseconds() + _dispatch_timeout(when);
 }
diff --git a/src/trace.h b/src/trace.h
index c670f60..ed69e1b 100644
--- a/src/trace.h
+++ b/src/trace.h
@@ -31,8 +31,8 @@
 
 #if DISPATCH_USE_DTRACE_INTROSPECTION
 #define _dispatch_trace_callout(_c, _f, _dcc) do { \
-		if (slowpath(DISPATCH_CALLOUT_ENTRY_ENABLED()) || \
-				slowpath(DISPATCH_CALLOUT_RETURN_ENABLED())) { \
+		if (unlikely(DISPATCH_CALLOUT_ENTRY_ENABLED() || \
+				DISPATCH_CALLOUT_RETURN_ENABLED())) { \
 			dispatch_queue_t _dq = _dispatch_queue_get_current(); \
 			const char *_label = _dq && _dq->dq_label ? _dq->dq_label : ""; \
 			dispatch_function_t _func = (dispatch_function_t)(_f); \
@@ -75,6 +75,24 @@
 #define _dispatch_client_callout2		_dispatch_trace_client_callout2
 #endif // DISPATCH_USE_DTRACE_INTROSPECTION || DISPATCH_INTROSPECTION
 
+#ifdef _COMM_PAGE_KDEBUG_ENABLE
+#define DISPATCH_KTRACE_ENABLED \
+		(*(volatile uint32_t *)_COMM_PAGE_KDEBUG_ENABLE != 0)
+
+#if DISPATCH_INTROSPECTION
+#define _dispatch_only_if_ktrace_enabled(...) \
+		if (unlikely(DISPATCH_KTRACE_ENABLED)) ({ __VA_ARGS__; })
+#else
+#define _dispatch_only_if_ktrace_enabled(...) (void)0
+#endif /* DISPATCH_INTROSPECTION */
+
+#else /* _COMM_PAGE_KDEBUG_ENABLE */
+
+#define DISPATCH_KTRACE_ENABLED 0
+#define _dispatch_only_if_ktrace_enabled(...) (void)0
+#endif /* _COMM_PAGE_KDEBUG_ENABLE */
+
+
 #if DISPATCH_USE_DTRACE_INTROSPECTION
 #define _dispatch_trace_continuation(_q, _o, _t) do { \
 		dispatch_queue_t _dq = (_q); \
@@ -85,25 +103,25 @@
 		dispatch_function_t _func; \
 		void *_ctxt; \
 		if (_dispatch_object_has_vtable(_do)) { \
-			_kind = (char*)dx_kind(_do); \
-			if ((dx_type(_do) & _DISPATCH_META_TYPE_MASK) == \
-					_DISPATCH_SOURCE_TYPE && (_dq) != &_dispatch_mgr_q) { \
+			_kind = (char*)_dispatch_object_class_name(_do); \
+			if ((dx_metatype(_do) == _DISPATCH_SOURCE_TYPE) && \
+					_dq != _dispatch_mgr_q._as_dq) { \
 				dispatch_source_t _ds = (dispatch_source_t)_do; \
 				_dc = os_atomic_load(&_ds->ds_refs->ds_handler[ \
 						DS_EVENT_HANDLER], relaxed); \
 				_func = _dc ? _dc->dc_func : NULL; \
 				_ctxt = _dc ? _dc->dc_ctxt : NULL; \
 			} else { \
-				_func = (dispatch_function_t)_dispatch_queue_invoke; \
+				_func = (dispatch_function_t)_dispatch_lane_invoke; \
 				_ctxt = _do->do_ctxt; \
 			} \
 		} else { \
 			_dc = (void*)_do; \
 			_ctxt = _dc->dc_ctxt; \
-			if (_dc->dc_flags & DISPATCH_OBJ_SYNC_WAITER_BIT) { \
+			if (_dc->dc_flags & DC_FLAG_SYNC_WAITER) { \
 				_kind = "semaphore"; \
 				_func = (dispatch_function_t)dispatch_semaphore_signal; \
-			} else if (_dc->dc_flags & DISPATCH_OBJ_BLOCK_BIT) { \
+			} else if (_dc->dc_flags & DC_FLAG_BLOCK) { \
 				_kind = "block"; \
 				_func = _dispatch_Block_invoke(_dc->dc_ctxt); \
 			} else { \
@@ -121,62 +139,246 @@
 #endif // DISPATCH_USE_DTRACE_INTROSPECTION || DISPATCH_INTROSPECTION
 
 #if DISPATCH_USE_DTRACE_INTROSPECTION || DISPATCH_INTROSPECTION
+
+DISPATCH_ALWAYS_INLINE
+static inline dispatch_queue_class_t
+_dispatch_trace_queue_create(dispatch_queue_class_t dqu)
+{
+	_dispatch_only_if_ktrace_enabled({
+		uint64_t dq_label[4] = {0}; // So that we get the right null termination
+		dispatch_queue_t dq = dqu._dq;
+		strncpy((char *)dq_label, (char *)dq->dq_label ?: "", sizeof(dq_label));
+
+		_dispatch_ktrace2(DISPATCH_QOS_TRACE_queue_creation_start,
+				dq->dq_serialnum,
+				_dispatch_priority_to_pp_prefer_fallback(dq->dq_priority));
+
+		_dispatch_ktrace4(DISPATCH_QOS_TRACE_queue_creation_end,
+						dq_label[0], dq_label[1], dq_label[2], dq_label[3]);
+	});
+
+	return _dispatch_introspection_queue_create(dqu);
+}
+
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_trace_root_queue_push_list(dispatch_queue_t dq,
-		dispatch_object_t _head, dispatch_object_t _tail, int n)
+_dispatch_trace_queue_dispose(dispatch_queue_class_t dqu)
 {
-	if (slowpath(DISPATCH_QUEUE_PUSH_ENABLED())) {
+	_dispatch_ktrace1(DISPATCH_QOS_TRACE_queue_dispose, (dqu._dq)->dq_serialnum);
+	_dispatch_introspection_queue_dispose(dqu);
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_trace_source_dispose(dispatch_source_t ds)
+{
+	_dispatch_ktrace1(DISPATCH_QOS_TRACE_src_dispose, (uintptr_t)ds);
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_trace_block_create_with_voucher_and_priority(dispatch_block_t db,
+		void *func, dispatch_block_flags_t original_flags,
+		pthread_priority_t original_priority,
+		pthread_priority_t thread_prio, pthread_priority_t final_block_prio)
+{
+	_dispatch_ktrace4(DISPATCH_QOS_TRACE_private_block_creation,
+			(uintptr_t)db,
+			(uintptr_t)func,
+			BITPACK_UINT32_PAIR(original_flags, original_priority),
+			BITPACK_UINT32_PAIR(thread_prio, final_block_prio));
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_trace_firehose_reserver_gave_up(uint8_t stream, uint8_t ref,
+		bool waited, uint64_t old_state, uint64_t new_state)
+{
+	uint64_t first = ((uint64_t)ref << 8) | (uint64_t)stream;
+	uint64_t second = waited;
+	_dispatch_ktrace4(DISPATCH_FIREHOSE_TRACE_reserver_gave_up, first, second,
+			old_state, new_state);
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_trace_firehose_reserver_wait(uint8_t stream, uint8_t ref,
+		bool waited, uint64_t old_state, uint64_t new_state, bool reliable)
+{
+	uint64_t first = ((uint64_t)ref << 8) | (uint64_t)stream;
+	uint64_t second = ((uint64_t)reliable << 1) | waited;
+	_dispatch_ktrace4(DISPATCH_FIREHOSE_TRACE_reserver_wait, first, second,
+			old_state, new_state);
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_trace_firehose_allocator(uint64_t ask0, uint64_t ask1,
+		uint64_t old_state, uint64_t new_state)
+{
+	_dispatch_ktrace4(DISPATCH_FIREHOSE_TRACE_allocator, ask0, ask1, old_state,
+			new_state);
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_trace_firehose_wait_for_logd(uint8_t stream, uint64_t timestamp,
+		uint64_t old_state, uint64_t new_state)
+{
+	_dispatch_ktrace4(DISPATCH_FIREHOSE_TRACE_wait_for_logd, stream, timestamp,
+			old_state, new_state);
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_trace_firehose_chunk_install(uint64_t ask0, uint64_t ask1,
+		uint64_t old_state, uint64_t new_state)
+{
+	_dispatch_ktrace4(DISPATCH_FIREHOSE_TRACE_chunk_install, ask0, ask1,
+			old_state, new_state);
+}
+
+/* Implemented in introspection.c */
+void
+_dispatch_trace_item_push_internal(dispatch_queue_t dq, dispatch_object_t dou);
+
+#define _dispatch_trace_item_push_inline(...) \
+		_dispatch_only_if_ktrace_enabled({ \
+			_dispatch_trace_item_push_internal(__VA_ARGS__); \
+		})
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_trace_item_push_list(dispatch_queue_global_t dq,
+		dispatch_object_t _head, dispatch_object_t _tail)
+{
+	if (unlikely(DISPATCH_QUEUE_PUSH_ENABLED() || DISPATCH_KTRACE_ENABLED)) {
 		struct dispatch_object_s *dou = _head._do;
 		do {
-			_dispatch_trace_continuation(dq, dou, DISPATCH_QUEUE_PUSH);
+			if (unlikely(DISPATCH_QUEUE_PUSH_ENABLED())) {
+				_dispatch_trace_continuation(dq->_as_dq, dou, DISPATCH_QUEUE_PUSH);
+			}
+
+			_dispatch_trace_item_push_inline(dq->_as_dq, dou);
 		} while (dou != _tail._do && (dou = dou->do_next));
 	}
 	_dispatch_introspection_queue_push_list(dq, _head, _tail);
-	_dispatch_root_queue_push_inline(dq, _head, _tail, n);
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_trace_queue_push_inline(dispatch_queue_t dq, dispatch_object_t _tail,
-		dispatch_qos_t qos)
+_dispatch_trace_item_push(dispatch_queue_class_t dqu, dispatch_object_t _tail)
 {
-	if (slowpath(DISPATCH_QUEUE_PUSH_ENABLED())) {
-		struct dispatch_object_s *dou = _tail._do;
-		_dispatch_trace_continuation(dq, dou, DISPATCH_QUEUE_PUSH);
+	if (unlikely(DISPATCH_QUEUE_PUSH_ENABLED())) {
+		_dispatch_trace_continuation(dqu._dq, _tail._do, DISPATCH_QUEUE_PUSH);
 	}
-	_dispatch_introspection_queue_push(dq, _tail);
-	_dispatch_queue_push_inline(dq, _tail, qos);
+
+	_dispatch_trace_item_push_inline(dqu._dq, _tail._do);
+	_dispatch_introspection_queue_push(dqu, _tail);
+}
+
+/* Implemented in introspection.c */
+void
+_dispatch_trace_item_pop_internal(dispatch_queue_t dq, dispatch_object_t dou);
+
+#define _dispatch_trace_item_pop_inline(...) \
+		_dispatch_only_if_ktrace_enabled({ \
+			_dispatch_trace_item_pop_internal(__VA_ARGS__); \
+		})
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_trace_item_pop(dispatch_queue_class_t dqu, dispatch_object_t dou)
+{
+	if (unlikely(DISPATCH_QUEUE_POP_ENABLED())) {
+		_dispatch_trace_continuation(dqu._dq, dou._do, DISPATCH_QUEUE_POP);
+	}
+
+	_dispatch_trace_item_pop_inline(dqu._dq, dou);
+	_dispatch_introspection_queue_pop(dqu, dou);
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_trace_continuation_push(dispatch_queue_t dq, dispatch_object_t _tail)
+_dispatch_trace_item_complete_inline(dispatch_object_t dou)
 {
-	if (slowpath(DISPATCH_QUEUE_PUSH_ENABLED())) {
-		struct dispatch_object_s *dou = _tail._do;
-		_dispatch_trace_continuation(dq, dou, DISPATCH_QUEUE_PUSH);
-	}
-	_dispatch_introspection_queue_push(dq, _tail);
+	_dispatch_ktrace1(DISPATCH_QOS_TRACE_queue_item_complete, dou._do_value);
 }
 
-#define _dispatch_root_queue_push_inline _dispatch_trace_root_queue_push_list
-#define _dispatch_queue_push_inline _dispatch_trace_queue_push_inline
-
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_trace_continuation_pop(dispatch_queue_t dq, dispatch_object_t dou)
+_dispatch_trace_item_complete(dispatch_object_t dou)
 {
-	if (slowpath(DISPATCH_QUEUE_POP_ENABLED())) {
-		_dispatch_trace_continuation(dq, dou._do, DISPATCH_QUEUE_POP);
-	}
-	_dispatch_introspection_queue_pop(dq, dou);
+	_dispatch_trace_item_complete_inline(dou);
+	_dispatch_introspection_queue_item_complete(dou);
 }
+
+DISPATCH_ALWAYS_INLINE
+static inline struct dispatch_object_s *
+_dispatch_trace_item_sync_push_pop(dispatch_queue_class_t dqu,
+		void *ctx, dispatch_function_t f, uintptr_t dc_flags)
+{
+	// No need to add tracing here since the introspection calls out to
+	// _trace_item_push and _trace_item_pop
+	return _dispatch_introspection_queue_fake_sync_push_pop(dqu._dq, ctx,
+			f, dc_flags);
+}
+
+/* Implemented in introspection.c */
+void
+_dispatch_trace_source_callout_entry_internal(dispatch_source_t ds, long kind,
+		dispatch_queue_t dq, dispatch_continuation_t dc);
+
+#define _dispatch_trace_source_callout_entry(...) \
+		_dispatch_only_if_ktrace_enabled({ \
+			_dispatch_trace_source_callout_entry_internal(__VA_ARGS__); \
+		})
+
+#define _dispatch_trace_runtime_event(evt, ptr, value) \
+		_dispatch_introspection_runtime_event(\
+				dispatch_introspection_runtime_event_##evt, ptr, value)
+
+#define DISPATCH_TRACE_ARG(arg) , arg
 #else
-#define _dispatch_trace_continuation_push(dq, dou) \
+#define _dispatch_trace_queue_create _dispatch_introspection_queue_create
+#define _dispatch_trace_queue_dispose _dispatch_introspection_queue_dispose
+#define _dispatch_trace_source_dispose(ds) ((void)0)
+#define _dispatch_trace_block_create_with_voucher_and_priority(_db, _func, \
+		_flags, _pri, _tpri, _bpri) \
+		do { (void)_db; (void)_func; (void) _flags; (void) _pri; (void) _tpri; \
+			(void) _bpri; } while (0)
+#define _dispatch_trace_firehose_reserver_gave_up(stream, ref, waited, \
+		old_state, new_state) \
+		do { (void)(stream); (void)(ref); (void)(waited); (void)(old_state); \
+			(void)(new_state); } while (0)
+#define _dispatch_trace_firehose_reserver_wait(stream, ref, waited, \
+		old_state, new_state, reliable) \
+		do { (void)(stream); (void)(ref); (void)(waited); (void)(old_state); \
+			(void)(new_state); (void)(reliable); } while (0)
+#define _dispatch_trace_firehose_allocator(ask0, ask1, old_state, new_state) \
+		do { (void)(ask0); (void)(ask1); (void)(old_state); \
+			(void)(new_state); } while (0)
+#define _dispatch_trace_firehose_wait_for_logd(stream, timestamp, old_state, \
+		new_state) \
+		do { (void)(stream); (void)(timestamp); (void)(old_state); \
+			(void)(new_state); } while (0)
+#define _dispatch_trace_firehose_chunk_install(ask0, ask1, old_state, \
+		new_state) \
+		do { (void)(ask0); (void)(ask1); (void)(old_state); \
+			(void)(new_state); } while (0)
+#define _dispatch_trace_item_push(dq, dou) \
 		do { (void)(dq); (void)(dou); } while(0)
-#define _dispatch_trace_continuation_pop(dq, dou) \
+#define _dispatch_trace_item_push_list(dq, head, tail) \
+		do { (void)(dq); (void)(head); (void)tail; } while(0)
+#define _dispatch_trace_item_pop(dq, dou) \
 		do { (void)(dq); (void)(dou); } while(0)
+#define _dispatch_trace_item_complete(dou) ((void)0)
+#define _dispatch_trace_item_sync_push_pop(dq, ctxt, func, flags) \
+		do { (void)(dq); (void)(ctxt); (void)(func); (void)(flags); } while(0)
+#define _dispatch_trace_source_callout_entry(ds, k, dq, dc) ((void)0)
+#define _dispatch_trace_runtime_event(evt, ptr, value) \
+		do { (void)(ptr); (void)(value); } while(0)
+#define DISPATCH_TRACE_ARG(arg)
 #endif // DISPATCH_USE_DTRACE_INTROSPECTION || DISPATCH_INTROSPECTION
 
 #if DISPATCH_USE_DTRACE
@@ -189,13 +391,31 @@
 }
 
 DISPATCH_ALWAYS_INLINE
+static inline uint64_t
+_dispatch_time_clock_to_nsecs(dispatch_clock_t clock, uint64_t t)
+{
+#if !DISPATCH_TIME_UNIT_USES_NANOSECONDS
+	switch (clock) {
+	case DISPATCH_CLOCK_MONOTONIC:
+	case DISPATCH_CLOCK_UPTIME:
+		return _dispatch_time_mach2nano(t);
+	case DISPATCH_CLOCK_WALL:
+		return t;
+	}
+#else
+	(void)clock;
+	return t;
+#endif
+}
+
+DISPATCH_ALWAYS_INLINE
 static inline dispatch_trace_timer_params_t
 _dispatch_trace_timer_params(dispatch_clock_t clock,
 		struct dispatch_timer_source_s *values, uint64_t deadline,
 		dispatch_trace_timer_params_t params)
 {
 	#define _dispatch_trace_time2nano3(t) \
-			(clock == DISPATCH_CLOCK_MACH ? _dispatch_time_mach2nano(t) : (t))
+			(_dispatch_time_clock_to_nsecs(clock, t))
 	#define _dispatch_trace_time2nano2(v, t) ({ uint64_t _t = (t); \
 			(v) >= INT64_MAX ? -1ll : (int64_t)_dispatch_trace_time2nano3(_t);})
 	#define _dispatch_trace_time2nano(v) ({ uint64_t _t; \
@@ -218,7 +438,7 @@
 static inline bool
 _dispatch_trace_timer_configure_enabled(void)
 {
-	return slowpath(DISPATCH_TIMER_CONFIGURE_ENABLED());
+	return DISPATCH_TIMER_CONFIGURE_ENABLED();
 }
 
 DISPATCH_ALWAYS_INLINE
@@ -236,7 +456,7 @@
 static inline void
 _dispatch_trace_timer_program(dispatch_timer_source_refs_t dr, uint64_t deadline)
 {
-	if (slowpath(DISPATCH_TIMER_PROGRAM_ENABLED())) {
+	if (unlikely(DISPATCH_TIMER_PROGRAM_ENABLED())) {
 		if (deadline && dr) {
 			dispatch_source_t ds = _dispatch_source_from_refs(dr);
 			dispatch_clock_t clock = DISPATCH_TIMER_CLOCK(dr->du_ident);
@@ -252,7 +472,7 @@
 static inline void
 _dispatch_trace_timer_wake(dispatch_timer_source_refs_t dr)
 {
-	if (slowpath(DISPATCH_TIMER_WAKE_ENABLED())) {
+	if (unlikely(DISPATCH_TIMER_WAKE_ENABLED())) {
 		if (dr) {
 			dispatch_source_t ds = _dispatch_source_from_refs(dr);
 			DISPATCH_TIMER_WAKE(ds, _dispatch_trace_timer_function(dr));
@@ -265,7 +485,7 @@
 _dispatch_trace_timer_fire(dispatch_timer_source_refs_t dr, uint64_t data,
 		uint64_t missed)
 {
-	if (slowpath(DISPATCH_TIMER_FIRE_ENABLED())) {
+	if (unlikely(DISPATCH_TIMER_FIRE_ENABLED())) {
 		if (!(data - missed) && dr) {
 			dispatch_source_t ds = _dispatch_source_from_refs(dr);
 			DISPATCH_TIMER_FIRE(ds, _dispatch_trace_timer_function(dr));
diff --git a/src/transform.c b/src/transform.c
index 45d5669..39147fa 100644
--- a/src/transform.c
+++ b/src/transform.c
@@ -359,7 +359,9 @@
 			if (os_mul_overflow(size - i, sizeof(uint16_t), &next)) {
 				return (bool)false;
 			}
-			if (wch >= 0xd800 && wch < 0xdfff) {
+			if (wch == 0xfeff && offset + i == 3) {
+				// skip the BOM if any, as we already inserted one ourselves
+			} else if (wch >= 0xd800 && wch < 0xdfff) {
 				// Illegal range (surrogate pair)
 				return (bool)false;
 			} else if (wch >= 0x10000) {
@@ -565,6 +567,26 @@
 	return _dispatch_transform_to_utf16(data, OSBigEndian);
 }
 
+static dispatch_data_t
+_dispatch_transform_to_utf8_without_bom(dispatch_data_t data)
+{
+	static uint8_t const utf8_bom[] = { 0xef, 0xbb, 0xbf };
+	const void *p;
+	dispatch_data_t subrange = _dispatch_data_subrange_map(data, &p, 0, 3);
+	bool has_bom = false;
+
+	if (subrange) {
+		has_bom = (memcmp(p, utf8_bom, sizeof(utf8_bom)) == 0);
+		dispatch_release(subrange);
+	}
+	if (has_bom) {
+		return dispatch_data_create_subrange(data, 3,
+				dispatch_data_get_size(data) - 3);
+	}
+	dispatch_retain(data);
+	return data;
+}
+
 #pragma mark -
 #pragma mark base32
 
@@ -1096,7 +1118,7 @@
 	.output_mask = (_DISPATCH_DATA_FORMAT_UTF8 | _DISPATCH_DATA_FORMAT_UTF16BE |
 			_DISPATCH_DATA_FORMAT_UTF16LE),
 	.decode = NULL,
-	.encode = NULL,
+	.encode = _dispatch_transform_to_utf8_without_bom,
 };
 
 const struct dispatch_data_format_type_s _dispatch_data_format_type_utf_any = {
diff --git a/src/voucher.c b/src/voucher.c
index e32bd29..f8ce0c8 100644
--- a/src/voucher.c
+++ b/src/voucher.c
@@ -20,14 +20,6 @@
 
 #include "internal.h"
 
-#if !defined(VOUCHER_EXPORT_PERSONA_SPI)
-#if TARGET_OS_IPHONE
-#define VOUCHER_EXPORT_PERSONA_SPI 1
-#else
-#define VOUCHER_EXPORT_PERSONA_SPI 0
-#endif
-#endif
-
 #ifndef PERSONA_ID_NONE
 #define PERSONA_ID_NONE ((uid_t)-1)
 #endif
@@ -46,12 +38,12 @@
 #define FIREHOSE_ACTIVITY_ID_MAKE(aid, flags) \
 		FIREHOSE_ACTIVITY_ID_MERGE_FLAGS((aid) & MACH_ACTIVITY_ID_MASK, flags)
 
-static volatile uint64_t _voucher_aid_next;
+DISPATCH_STATIC_GLOBAL(volatile uint64_t _voucher_aid_next);
 
 #pragma mark -
 #pragma mark voucher_t
 
-OS_OBJECT_CLASS_DECL(voucher, object);
+OS_OBJECT_CLASS_DECL(voucher);
 #if !USE_OBJC
 OS_OBJECT_VTABLE_INSTANCE(voucher,
 		(void (*)(_os_object_t))_voucher_xref_dispose,
@@ -169,11 +161,13 @@
 #pragma mark -
 #pragma mark voucher_hash
 
-DISPATCH_CACHELINE_ALIGN
-static voucher_hash_head_s _voucher_hash[VL_HASH_SIZE];
+extern voucher_hash_head_s _voucher_hash[VL_HASH_SIZE];
+DISPATCH_GLOBAL_INIT(voucher_hash_head_s _voucher_hash[VL_HASH_SIZE], {
+	[0 ... VL_HASH_SIZE - 1] = { ~(uintptr_t)VOUCHER_NULL },
+});
+DISPATCH_STATIC_GLOBAL(dispatch_unfair_lock_s _voucher_hash_lock);
 
 #define _voucher_hash_head(kv)   (&_voucher_hash[VL_HASH((kv))])
-static dispatch_unfair_lock_s _voucher_hash_lock;
 #define _voucher_hash_lock_lock() \
 		_dispatch_unfair_lock_lock(&_voucher_hash_lock)
 #define _voucher_hash_lock_unlock() \
@@ -181,37 +175,29 @@
 
 DISPATCH_ALWAYS_INLINE
 static inline void
-_voucher_hash_head_init(voucher_hash_head_s *head)
-{
-	_voucher_hash_set_next(&head->vhh_first, VOUCHER_NULL);
-	_voucher_hash_set_prev_ptr(&head->vhh_last_ptr, &head->vhh_first);
-}
-
-DISPATCH_ALWAYS_INLINE
-static inline void
 _voucher_hash_enqueue(mach_voucher_t kv, voucher_t v)
 {
-	// same as TAILQ_INSERT_TAIL
+	// same as LIST_INSERT_HEAD
 	voucher_hash_head_s *head = _voucher_hash_head(kv);
-	uintptr_t prev_ptr = head->vhh_last_ptr;
-	_voucher_hash_set_next(&v->v_list.vhe_next, VOUCHER_NULL);
-	v->v_list.vhe_prev_ptr = prev_ptr;
-	_voucher_hash_store_to_prev_ptr(prev_ptr, v);
-	_voucher_hash_set_prev_ptr(&head->vhh_last_ptr, &v->v_list.vhe_next);
+	voucher_t next = _voucher_hash_get_next(head->vhh_first);
+	v->v_list.vhe_next = head->vhh_first;
+	if (next) {
+		_voucher_hash_set_prev_ptr(&next->v_list.vhe_prev_ptr,
+				&v->v_list.vhe_next);
+	}
+	_voucher_hash_set_next(&head->vhh_first, v);
+	_voucher_hash_set_prev_ptr(&v->v_list.vhe_prev_ptr, &head->vhh_first);
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline void
-_voucher_hash_remove(mach_voucher_t kv, voucher_t v)
+_voucher_hash_remove(voucher_t v)
 {
-	// same as TAILQ_REMOVE
-	voucher_hash_head_s *head = _voucher_hash_head(kv);
+	// same as LIST_REMOVE
 	voucher_t next = _voucher_hash_get_next(v->v_list.vhe_next);
 	uintptr_t prev_ptr = v->v_list.vhe_prev_ptr;
 	if (next) {
 		next->v_list.vhe_prev_ptr = prev_ptr;
-	} else {
-		head->vhh_last_ptr = prev_ptr;
 	}
 	_voucher_hash_store_to_prev_ptr(prev_ptr, next);
 	_voucher_hash_mark_not_enqueued(v);
@@ -270,7 +256,7 @@
 	}
 	// check for resurrection race with _voucher_find_and_retain
 	if (os_atomic_load2o(v, os_obj_xref_cnt, ordered) < 0) {
-		if (_voucher_hash_is_enqueued(v)) _voucher_hash_remove(kv, v);
+		if (_voucher_hash_is_enqueued(v)) _voucher_hash_remove(v);
 	}
 	_voucher_hash_lock_unlock();
 }
@@ -321,7 +307,7 @@
 	};
 	kr = _voucher_create_mach_voucher(&task_create_recipe,
 			sizeof(task_create_recipe), &kv);
-	if (slowpath(kr)) {
+	if (unlikely(kr)) {
 		DISPATCH_CLIENT_CRASH(kr, "Could not create task mach voucher");
 	}
 	_voucher_default_task_mach_voucher = kv;
@@ -755,7 +741,7 @@
 	_dispatch_voucher_debug("kvoucher[0x%08x] decrement importance count to %u:"
 			" %s - 0x%x", v, kv, count, mach_error_string(kr), kr);
 #endif
-	if (slowpath(dispatch_assume_zero(kr) == KERN_FAILURE)) {
+	if (unlikely(dispatch_assume_zero(kr) == KERN_FAILURE)) {
 		DISPATCH_CLIENT_CRASH(kr, "Voucher importance count underflow");
 	}
 }
@@ -781,7 +767,7 @@
 {
 	_voucher_trace(DISPOSE, voucher);
 	_dispatch_voucher_debug("dispose", voucher);
-	if (slowpath(_voucher_hash_is_enqueued(voucher))) {
+	if (unlikely(_voucher_hash_is_enqueued(voucher))) {
 		_dispatch_voucher_debug("corruption", voucher);
 		DISPATCH_CLIENT_CRASH(0, "Voucher corruption");
 	}
@@ -813,11 +799,6 @@
 	return _os_object_dealloc((_os_object_t)voucher);
 }
 
-static void
-_voucher_activity_debug_channel_barrier_nop(void *ctxt DISPATCH_UNUSED)
-{
-}
-
 void
 _voucher_activity_debug_channel_init(void)
 {
@@ -842,9 +823,6 @@
 				DISPATCH_TARGET_QUEUE_DEFAULT, NULL, handler);
 		dm->dm_recv_refs->du_can_be_wlh = false; // 29906118
 		dispatch_mach_connect(dm, dbgp, MACH_PORT_NULL, NULL);
-		// will force the DISPATCH_MACH_CONNECTED event
-		dispatch_mach_send_barrier_f(dm, NULL,
-				_voucher_activity_debug_channel_barrier_nop);
 		_voucher_activity_debug_channel = dm;
 	}
 }
@@ -863,7 +841,145 @@
 	_firehose_task_buffer = NULL; // firehose buffer is VM_INHERIT_NONE
 }
 
-#if VOUCHER_EXPORT_PERSONA_SPI
+voucher_t
+voucher_copy_with_persona_mach_voucher(mach_voucher_t persona_mach_voucher)
+{
+#if !VOUCHER_USE_PERSONA
+	(void)persona_mach_voucher;
+	return voucher_copy();
+#else // !VOUCHER_USE_PERSONA
+	if (!persona_mach_voucher) return voucher_copy();
+	kern_return_t kr;
+	mach_voucher_t okv = MACH_VOUCHER_NULL, kv;
+	voucher_t ov = _voucher_get();
+	if (ov) {
+		okv = ov->v_ipc_kvoucher ? ov->v_ipc_kvoucher : ov->v_kvoucher;
+	}
+	const mach_voucher_attr_recipe_data_t bank_redeem_recipe[] = {
+		[0] = {
+			.key = MACH_VOUCHER_ATTR_KEY_ALL,
+			.command = MACH_VOUCHER_ATTR_COPY,
+			.previous_voucher = okv,
+		},
+		[1] = {
+			.key = MACH_VOUCHER_ATTR_KEY_BANK,
+			.command = MACH_VOUCHER_ATTR_REDEEM,
+			.previous_voucher = persona_mach_voucher,
+		},
+	};
+	kr = _voucher_create_mach_voucher(bank_redeem_recipe,
+			sizeof(bank_redeem_recipe), &kv);
+	if (dispatch_assume_zero(kr)) {
+		if (kr == KERN_INVALID_CAPABILITY) {
+			// bank attribute redeem failed
+			return VOUCHER_INVALID;
+		}
+		kv = MACH_VOUCHER_NULL;
+	}
+	if (kv == okv) {
+		if (kv) _voucher_dealloc_mach_voucher(kv);
+		return _voucher_retain(ov);
+	}
+	voucher_t v = _voucher_find_and_retain(kv);
+	if (v && (!ov || ov->v_ipc_kvoucher)) {
+		_dispatch_voucher_debug("kvoucher[0x%08x] find with persona "
+				"from voucher[%p]", v, kv, ov);
+		_voucher_dealloc_mach_voucher(kv);
+		return v;
+	}
+	voucher_t kvbase = v;
+	voucher_fields_t ignore_fields = VOUCHER_FIELD_KVOUCHER;
+	v = _voucher_clone(ov, ignore_fields);
+	v->v_kvoucher = kv;
+	if (!ov || ov->v_ipc_kvoucher) {
+		v->v_ipc_kvoucher = kv;
+		_voucher_insert(v);
+	} else if (kvbase) {
+		v->v_kvbase = kvbase;
+		_voucher_dealloc_mach_voucher(kv); // borrow base reference
+	}
+	if (!kvbase) {
+		_dispatch_voucher_debug("kvoucher[0x%08x] create with persona "
+				"from voucher[%p]", v, kv, ov);
+	}
+	_voucher_trace(CREATE, v, v->v_kvoucher, v->v_activity);
+	return v;
+#endif // VOUCHER_USE_PERSONA
+}
+
+kern_return_t
+mach_voucher_persona_self(mach_voucher_t *persona_mach_voucher)
+{
+	mach_voucher_t bkv = MACH_VOUCHER_NULL;
+	kern_return_t kr = KERN_NOT_SUPPORTED;
+#if VOUCHER_USE_PERSONA
+	mach_voucher_t kv = _voucher_get_task_mach_voucher();
+
+	const mach_voucher_attr_recipe_data_t bank_send_recipe[] = {
+		[0] = {
+			.key = MACH_VOUCHER_ATTR_KEY_BANK,
+			.command = MACH_VOUCHER_ATTR_COPY,
+			.previous_voucher = kv,
+		},
+		[1] = {
+			.key = MACH_VOUCHER_ATTR_KEY_BANK,
+			.command = MACH_VOUCHER_ATTR_SEND_PREPROCESS,
+		},
+	};
+	kr = _voucher_create_mach_voucher(bank_send_recipe,
+			sizeof(bank_send_recipe), &bkv);
+	if (dispatch_assume_zero(kr)) {
+		bkv = MACH_VOUCHER_NULL;
+	}
+#endif // VOUCHER_USE_PERSONA
+	*persona_mach_voucher = bkv;
+	return kr;
+}
+
+kern_return_t
+mach_voucher_persona_for_originator(uid_t persona_id,
+	mach_voucher_t originator_persona_mach_voucher,
+	uint64_t originator_unique_pid, mach_voucher_t *persona_mach_voucher)
+{
+	mach_voucher_t bkv = MACH_VOUCHER_NULL;
+	kern_return_t kr = KERN_NOT_SUPPORTED;
+#if VOUCHER_USE_PERSONA
+	struct persona_modify_info modify_info = {
+		.persona_id = persona_id,
+		.unique_pid = originator_unique_pid,
+	};
+	size_t bank_modify_recipe_size = _voucher_mach_recipe_size(0) +
+			_voucher_mach_recipe_size(sizeof(modify_info));
+	mach_voucher_attr_recipe_t bank_modify_recipe =
+			(mach_voucher_attr_recipe_t)alloca(bank_modify_recipe_size);
+
+	bzero((void *)bank_modify_recipe, bank_modify_recipe_size);
+
+	bank_modify_recipe[0] = (mach_voucher_attr_recipe_data_t){
+		.key = MACH_VOUCHER_ATTR_KEY_BANK,
+		.command = MACH_VOUCHER_ATTR_COPY,
+		.previous_voucher = originator_persona_mach_voucher,
+	};
+	bank_modify_recipe[1] = (mach_voucher_attr_recipe_data_t){
+		.key = MACH_VOUCHER_ATTR_KEY_BANK,
+		.command = MACH_VOUCHER_ATTR_BANK_MODIFY_PERSONA,
+		.content_size = sizeof(modify_info),
+	};
+	_dispatch_memappend(bank_modify_recipe[1].content, &modify_info);
+	kr = _voucher_create_mach_voucher(bank_modify_recipe,
+			bank_modify_recipe_size, &bkv);
+	if (dispatch_assume_zero(kr)) {
+		bkv = MACH_VOUCHER_NULL;
+	}
+#else // VOUCHER_USE_PERSONA
+	(void)persona_id;
+	(void)originator_persona_mach_voucher;
+	(void)originator_unique_pid;
+#endif // VOUCHER_USE_PERSONA
+	*persona_mach_voucher = bkv;
+	return kr;
+}
+
 #if VOUCHER_USE_PERSONA
 static kern_return_t
 _voucher_get_current_persona_token(struct persona_token *token)
@@ -892,7 +1008,35 @@
 	}
 	return kr;
 }
-#endif
+
+static kern_return_t
+_voucher_get_current_persona_id(uid_t *persona_id)
+{
+	kern_return_t kr = KERN_FAILURE;
+	voucher_t v = _voucher_get();
+
+	if (v && v->v_kvoucher) {
+		mach_voucher_t kv = v->v_ipc_kvoucher ?: v->v_kvoucher;
+		mach_voucher_attr_content_t kvc_in = NULL;
+		mach_voucher_attr_content_size_t kvc_in_size = 0;
+		mach_voucher_attr_content_t kvc_out =
+			(mach_voucher_attr_content_t)persona_id;
+		mach_voucher_attr_content_size_t kvc_out_size = sizeof(*persona_id);
+
+		kr = mach_voucher_attr_command(kv, MACH_VOUCHER_ATTR_KEY_BANK,
+				BANK_PERSONA_ID, kvc_in, kvc_in_size,
+				kvc_out, &kvc_out_size);
+		if (kr != KERN_NOT_SUPPORTED
+				// Voucher doesn't have a persona id
+				&& kr != KERN_INVALID_VALUE
+				// Kernel doesn't understand BANK_PERSONA_ID
+				&& kr != KERN_INVALID_ARGUMENT) {
+			(void)dispatch_assume_zero(kr);
+		}
+	}
+	return kr;
+}
+#endif // VOUCHER_USE_PERSONA
 
 uid_t
 voucher_get_current_persona(void)
@@ -900,11 +1044,10 @@
 	uid_t persona_id = PERSONA_ID_NONE;
 
 #if VOUCHER_USE_PERSONA
-	struct persona_token token;
 	int err;
 
-	if (_voucher_get_current_persona_token(&token) == KERN_SUCCESS) {
-		return token.originator.persona_id;
+	if (_voucher_get_current_persona_id(&persona_id) == KERN_SUCCESS) {
+		return persona_id;
 	}
 
 	// falling back to the process persona if there is no adopted voucher
@@ -914,7 +1057,7 @@
 			(void)dispatch_assume_zero(err);
 		}
 	}
-#endif
+#endif // VOUCHER_USE_PERSONA
 	return persona_id;
 }
 
@@ -927,9 +1070,9 @@
 		*persona_info = token.originator;
 		return 0;
 	}
-#else
+#else // VOUCHER_USE_PERSONA
 	(void)persona_info;
-#endif
+#endif // VOUCHER_USE_PERSONA
 	return -1;
 }
 
@@ -942,12 +1085,11 @@
 		*persona_info = token.proximate;
 		return 0;
 	}
-#else
+#else // VOUCHER_USE_PERSONA
 	(void)persona_info;
-#endif
+#endif // VOUCHER_USE_PERSONA
 	return -1;
 }
-#endif
 
 #pragma mark -
 #pragma mark _voucher_init
@@ -1020,10 +1162,6 @@
 _voucher_init(void)
 {
 	_voucher_libkernel_init();
-	unsigned int i;
-	for (i = 0; i < VL_HASH_SIZE; i++) {
-		_voucher_hash_head_init(&_voucher_hash[i]);
-	}
 }
 
 #pragma mark -
@@ -1077,9 +1215,10 @@
 	return _voucher_activity_id_allocate(flags);
 }
 
-#define _voucher_activity_tracepoint_reserve(stamp, stream, pub, priv, privbuf) \
+#define _voucher_activity_tracepoint_reserve(stamp, stream, pub, priv, \
+		privbuf, reliable) \
 		firehose_buffer_tracepoint_reserve(_firehose_task_buffer, stamp, \
-				stream, pub, priv, privbuf)
+				stream, pub, priv, privbuf, reliable)
 
 #define _voucher_activity_tracepoint_flush(ft, ftid) \
 		firehose_buffer_tracepoint_flush(_firehose_task_buffer, ft, ftid)
@@ -1096,7 +1235,7 @@
 
 	info_size = proc_pidinfo(getpid(), PROC_PIDUNIQIDENTIFIERINFO, 1,
 			&p_uniqinfo, PROC_PIDUNIQIDENTIFIERINFO_SIZE);
-	if (slowpath(info_size != PROC_PIDUNIQIDENTIFIERINFO_SIZE)) {
+	if (unlikely(info_size != PROC_PIDUNIQIDENTIFIERINFO_SIZE)) {
 		if (info_size == 0) {
 			DISPATCH_INTERNAL_CRASH(errno,
 				"Unable to get the unique pid (error)");
@@ -1108,11 +1247,7 @@
 	_voucher_unique_pid = p_uniqinfo.p_uniqueid;
 
 
-	if (!fastpath(_voucher_libtrace_hooks)) {
-		if (0) { // <rdar://problem/23393959>
-			DISPATCH_CLIENT_CRASH(0,
-					"Activity subsystem isn't initialized yet");
-		}
+	if (unlikely(!_voucher_libtrace_hooks)) {
 		return;
 	}
 	logd_port = _voucher_libtrace_hooks->vah_get_logd_port();
@@ -1144,13 +1279,31 @@
 			NULL, _firehose_task_buffer_init);
 
 	firehose_buffer_t fb = _firehose_task_buffer;
-	if (fastpath(fb)) {
-		return slowpath(fb->fb_header.fbh_sendp == MACH_PORT_DEAD);
-	}
-	return true;
+	return !fb || fb->fb_header.fbh_sendp[false] == MACH_PORT_DEAD;
 }
 
-void*
+void *
+voucher_activity_get_logging_preferences(size_t *length)
+{
+	if (unlikely(_voucher_activity_disabled())) {
+		*length = 0;
+		return NULL;
+	}
+
+	return firehose_buffer_get_logging_prefs(_firehose_task_buffer, length);
+}
+
+bool
+voucher_activity_should_send_strings(void)
+{
+	if (unlikely(_voucher_activity_disabled())) {
+		return false;
+	}
+
+	return firehose_buffer_should_send_strings(_firehose_task_buffer);
+}
+
+void *
 voucher_activity_get_metadata_buffer(size_t *length)
 {
 	if (_voucher_activity_disabled()) {
@@ -1224,8 +1377,8 @@
 
 	for (size_t i = 0; i < countof(streams); i++) {
 		ft = _voucher_activity_tracepoint_reserve(stamp, streams[i], pubsize,
-				0, NULL);
-		if (!fastpath(ft)) continue;
+				0, NULL, true);
+		if (unlikely(!ft)) continue;
 
 		uint8_t *pubptr = ft->ft_data;
 		if (current_id) {
@@ -1285,8 +1438,9 @@
 
 	_dispatch_voucher_ktrace_activity_adopt(new_id);
 
-	ft = _voucher_activity_tracepoint_reserve(stamp, stream, pubsize, 0, NULL);
-	if (!fastpath(ft)) return;
+	ft = _voucher_activity_tracepoint_reserve(stamp, stream, pubsize, 0, NULL,
+			true);
+	if (unlikely(!ft)) return;
 	uint8_t *pubptr = ft->ft_data;
 	if (old_id) pubptr = _dispatch_memappend(pubptr, &old_id);
 	if (new_id) pubptr = _dispatch_memappend(pubptr, &new_id);
@@ -1326,14 +1480,15 @@
 
 DISPATCH_NOINLINE
 firehose_tracepoint_id_t
-voucher_activity_trace_v(firehose_stream_t stream,
+voucher_activity_trace_v_2(firehose_stream_t stream,
 		firehose_tracepoint_id_t trace_id, uint64_t stamp,
-		const struct iovec *iov, size_t publen, size_t privlen)
+		const struct iovec *iov, size_t publen, size_t privlen, uint32_t flags)
 {
 	firehose_tracepoint_id_u ftid = { .ftid_value = trace_id };
 	const uint16_t ft_size = offsetof(struct firehose_tracepoint_s, ft_data);
 	const size_t _firehose_chunk_payload_size =
 			sizeof(((struct firehose_chunk_s *)0)->fc_data);
+	bool reliable = !(flags & VOUCHER_ACTIVITY_TRACE_FLAG_UNRELIABLE);
 
 	if (_voucher_activity_disabled()) return 0;
 
@@ -1364,13 +1519,13 @@
 		pubsize += sizeof(struct firehose_buffer_range_s);
 	}
 
-	if (slowpath(ft_size + pubsize + privlen > _firehose_chunk_payload_size)) {
+	if (unlikely(ft_size + pubsize + privlen > _firehose_chunk_payload_size)) {
 		DISPATCH_CLIENT_CRASH(ft_size + pubsize + privlen, "Log is too large");
 	}
 
 	ft = _voucher_activity_tracepoint_reserve(stamp, stream, (uint16_t)pubsize,
-				(uint16_t)privlen, &privptr);
-	if (!fastpath(ft)) return 0;
+				(uint16_t)privlen, &privptr, reliable);
+	if (unlikely(!ft)) return 0;
 	pubptr = ft->ft_data;
 	if (va_id) {
 		pubptr = _dispatch_memappend(pubptr, &va_id);
@@ -1404,6 +1559,16 @@
 	return ftid.ftid_value;
 }
 
+DISPATCH_NOINLINE
+firehose_tracepoint_id_t
+voucher_activity_trace_v(firehose_stream_t stream,
+		firehose_tracepoint_id_t trace_id, uint64_t stamp,
+		const struct iovec *iov, size_t publen, size_t privlen)
+{
+	return voucher_activity_trace_v_2(stream, trace_id, stamp, iov, publen,
+			privlen, 0);
+}
+
 firehose_tracepoint_id_t
 voucher_activity_trace(firehose_stream_t stream,
 		firehose_tracepoint_id_t trace_id, uint64_t stamp,
@@ -1413,29 +1578,22 @@
 	return voucher_activity_trace_v(stream, trace_id, stamp, &iov, publen, 0);
 }
 
-firehose_tracepoint_id_t
-voucher_activity_trace_with_private_strings(firehose_stream_t stream,
-		firehose_tracepoint_id_t trace_id, uint64_t stamp,
-		const void *pubdata, size_t publen,
-		const void *privdata, size_t privlen)
-{
-	struct iovec iov[2] = {
-		{ (void *)pubdata, publen },
-		{ (void *)privdata, privlen },
-	};
-	return voucher_activity_trace_v(stream, trace_id, stamp,
-			iov, publen, privlen);
-}
-
 #pragma mark -
 #pragma mark _voucher_debug
 
+#define bufprintf(...) \
+		offset += dsnprintf(&buf[offset], bufsiz > offset ? bufsiz - offset : 0, ##__VA_ARGS__)
+#define bufprintprefix() \
+		if (prefix) bufprintf("%s", prefix)
+#define VOUCHER_DETAIL_PREFIX "        "
+#define IKOT_VOUCHER	37U
+#define VOUCHER_CONTENTS_SIZE 8192
+#define MAX_HEX_DATA_SIZE 1024
+
 size_t
-_voucher_debug(voucher_t v, char* buf, size_t bufsiz)
+_voucher_debug(voucher_t v, char *buf, size_t bufsiz)
 {
 	size_t offset = 0;
-	#define bufprintf(...) \
-			offset += dsnprintf(&buf[offset], bufsiz - offset, ##__VA_ARGS__)
 	bufprintf("voucher[%p] = { xref = %d, ref = %d", v,
 			v->os_obj_xref_cnt + 1, v->os_obj_ref_cnt + 1);
 
@@ -1443,11 +1601,17 @@
 		bufprintf(", base voucher %p", v->v_kvbase);
 	}
 	if (v->v_kvoucher) {
-		bufprintf(", kvoucher%s 0x%x", v->v_kvoucher == v->v_ipc_kvoucher ?
+		bufprintf(", kvoucher%s 0x%x [\n", v->v_kvoucher == v->v_ipc_kvoucher ?
 				" & ipc kvoucher" : "", v->v_kvoucher);
+		offset = voucher_kvoucher_debug(mach_task_self(), v->v_kvoucher, buf,
+				bufsiz, offset, VOUCHER_DETAIL_PREFIX, MAX_HEX_DATA_SIZE);
+		bufprintf("]");
 	}
 	if (v->v_ipc_kvoucher && v->v_ipc_kvoucher != v->v_kvoucher) {
-		bufprintf(", ipc kvoucher 0x%x", v->v_ipc_kvoucher);
+		bufprintf(", ipc kvoucher 0x%x [\n", v->v_ipc_kvoucher);
+		offset = voucher_kvoucher_debug(mach_task_self(), v->v_ipc_kvoucher,
+				buf, bufsiz, offset, VOUCHER_DETAIL_PREFIX, MAX_HEX_DATA_SIZE);
+		bufprintf("]");
 	}
 	if (v->v_priority) {
 		bufprintf(", QOS 0x%x", v->v_priority);
@@ -1457,6 +1621,128 @@
 				v->v_activity, v->v_activity_creator, v->v_parent_activity);
 	}
 	bufprintf(" }");
+	
+	return offset;
+}
+
+static size_t
+format_hex_data(char *prefix, char *desc, uint8_t *data, size_t data_len,
+	   char *buf, size_t bufsiz, size_t offset)
+{
+	size_t i;
+	uint8_t chars[17];
+	uint8_t *pc = data;
+
+	if (desc) {
+ 		bufprintf("%s%s:\n", prefix, desc);
+	}
+
+	ssize_t offset_in_row = -1;
+	for (i = 0; i < data_len; i++) {
+		offset_in_row = i % 16;
+		if (offset_in_row == 0) {
+			if (i != 0) {
+				bufprintf("  %s\n", chars);
+			}
+			bufprintf("%s  %04lx ", prefix, i);
+		}
+		bufprintf(" %02x", pc[i]);
+		chars[offset_in_row] = (pc[i] < 0x20) || (pc[i] > 0x7e) ? '.' : pc[i];
+	}
+	chars[offset_in_row + 1] = '\0';
+
+	if ((i % 16) != 0) {
+		while ((i % 16) != 0) {
+			bufprintf("   ");
+			i++;
+		}
+		bufprintf("  %s\n", chars);
+	}
+	return offset;
+}
+
+static size_t
+format_recipe_detail(mach_voucher_attr_recipe_t recipe, char *buf,
+		size_t bufsiz, size_t offset, char *prefix, size_t max_hex_data)
+{
+	bufprintprefix();
+	bufprintf("Key: %u, ", recipe->key);
+	bufprintf("Command: %u, ", recipe->command);
+	bufprintf("Previous voucher: 0x%x, ", recipe->previous_voucher);
+	bufprintf("Content size: %u\n", recipe->content_size);
+
+	switch (recipe->key) {
+	case MACH_VOUCHER_ATTR_KEY_ATM:
+		bufprintprefix();
+		bufprintf("ATM ID: %llu", *(uint64_t *)(uintptr_t)recipe->content);
+		break;
+	case MACH_VOUCHER_ATTR_KEY_IMPORTANCE:
+		bufprintprefix();
+		bufprintf("IMPORTANCE INFO: %s", (char *)recipe->content);
+		break;
+	case MACH_VOUCHER_ATTR_KEY_BANK:
+		bufprintprefix();
+		bufprintf("RESOURCE ACCOUNTING INFO: %s", (char *)recipe->content);
+		break;
+	default:
+		offset = format_hex_data(prefix, "Recipe Contents", recipe->content,
+				MIN(recipe->content_size, max_hex_data), buf, bufsiz, offset);
+		break;
+	}
+	if (buf[offset - 1] != '\n') {
+		bufprintf("\n");
+	}
+	return offset;
+}
+
+size_t
+voucher_kvoucher_debug(mach_port_t task, mach_port_name_t voucher, char *buf,
+		size_t bufsiz, size_t offset, char *prefix, size_t max_hex_data)
+{
+	uint8_t voucher_contents[VOUCHER_CONTENTS_SIZE];
+	bzero(voucher_contents, VOUCHER_CONTENTS_SIZE);
+	size_t recipe_size = VOUCHER_CONTENTS_SIZE;
+	unsigned v_kobject = 0;
+	unsigned v_kotype = 0;
+
+	kern_return_t kr = mach_port_kernel_object(task, voucher, &v_kotype,
+			&v_kobject);
+	if (kr == KERN_SUCCESS && v_kotype == IKOT_VOUCHER) {
+		kr = mach_voucher_debug_info(task, voucher,
+				(mach_voucher_attr_raw_recipe_array_t)voucher_contents,
+				(mach_msg_type_number_t *)&recipe_size);
+		if (kr != KERN_SUCCESS && kr != KERN_NOT_SUPPORTED) {
+			bufprintprefix();
+			bufprintf("Voucher: 0x%x Failed to get contents %s\n", v_kobject,
+					mach_error_string(kr));
+			goto done;
+		}
+
+		if (recipe_size == 0) {
+			bufprintprefix();
+			bufprintf("Voucher: 0x%x has no contents\n", v_kobject);
+			goto done;
+		}
+
+		bufprintprefix();
+		bufprintf("Voucher: 0x%x\n", v_kobject);
+		unsigned int used_size = 0;
+		mach_voucher_attr_recipe_t recipe = NULL;
+		while (recipe_size > used_size) {
+			recipe = (mach_voucher_attr_recipe_t)&voucher_contents[used_size];
+			if (recipe->key) {
+				offset = format_recipe_detail(recipe, buf, bufsiz, offset,
+						prefix, max_hex_data);
+			}
+			used_size += sizeof(mach_voucher_attr_recipe_data_t)
+					+ recipe->content_size;
+		}
+	} else {
+		bufprintprefix();
+		bufprintf("Invalid voucher: 0x%x\n", voucher);
+   	}
+
+done:
 	return offset;
 }
 
@@ -1587,7 +1873,31 @@
 	(void)voucher;
 }
 
-#if VOUCHER_EXPORT_PERSONA_SPI
+#if __has_include(<mach/mach.h>)
+voucher_t
+voucher_copy_with_persona_mach_voucher(mach_voucher_t persona_mach_voucher)
+{
+	(void)persona_mach_voucher;
+	return NULL;
+}
+
+kern_return_t
+mach_voucher_persona_self(mach_voucher_t *persona_mach_voucher)
+{
+	(void)persona_mach_voucher;
+	return KERN_NOT_SUPPORTED;
+}
+
+kern_return_t
+mach_voucher_persona_for_originator(uid_t persona_id,
+	mach_voucher_t originator_persona_mach_voucher,
+	uint64_t originator_unique_pid, mach_voucher_t *persona_mach_voucher)
+{
+	(void)persona_id; (void)originator_persona_mach_voucher;
+	(void)originator_unique_pid; (void)persona_mach_voucher;
+	return KERN_NOT_SUPPORTED;
+}
+
 uid_t
 voucher_get_current_persona(void)
 {
@@ -1607,7 +1917,7 @@
 	(void)persona_info;
 	return -1;
 }
-#endif // VOUCHER_EXPORT_PERSONA_SPI
+#endif // __has_include(<mach/mach.h>)
 
 void
 _voucher_activity_debug_channel_init(void)
diff --git a/src/voucher_internal.h b/src/voucher_internal.h
index 9f5d72b..ec88743 100644
--- a/src/voucher_internal.h
+++ b/src/voucher_internal.h
@@ -177,7 +177,6 @@
 
 typedef struct voucher_hash_head_s {
 	uintptr_t vhh_first;
-	uintptr_t vhh_last_ptr;
 } voucher_hash_head_s;
 
 DISPATCH_ALWAYS_INLINE
@@ -243,7 +242,7 @@
 } voucher_recipe_s;
 #endif
 
-#if TARGET_OS_EMBEDDED
+#if TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR
 #define VL_HASH_SIZE  64u // must be a power of two
 #else
 #define VL_HASH_SIZE 256u // must be a power of two
@@ -262,7 +261,7 @@
 #define _dispatch_voucher_debug_machport(name) ((void)(name))
 #endif
 
-#if DISPATCH_USE_DTRACE
+#if DISPATCH_USE_DTRACE_INTROSPECTION && defined(__APPLE__) // rdar://33642820
 #define _voucher_trace(how, ...)  ({ \
 		if (unlikely(VOUCHER_##how##_ENABLED())) { \
 			VOUCHER_##how(__VA_ARGS__); \
@@ -576,11 +575,10 @@
 DISPATCH_ALWAYS_INLINE
 static inline void
 _dispatch_continuation_voucher_set(dispatch_continuation_t dc,
-		dispatch_queue_class_t dqu, dispatch_block_flags_t flags)
+		dispatch_block_flags_t flags)
 {
 	voucher_t v = NULL;
 
-	(void)dqu;
 	// _dispatch_continuation_voucher_set is never called for blocks with
 	// private data or with the DISPATCH_BLOCK_HAS_VOUCHER flag set.
 	// only _dispatch_continuation_init_slow handles this bit.
@@ -594,16 +592,14 @@
 	_dispatch_voucher_ktrace_dc_push(dc);
 }
 
-static inline dispatch_queue_t _dispatch_queue_get_current(void);
-
 DISPATCH_ALWAYS_INLINE
 static inline void
 _dispatch_continuation_voucher_adopt(dispatch_continuation_t dc,
-		voucher_t ov, uintptr_t dc_flags)
+		uintptr_t dc_flags)
 {
 	voucher_t v = dc->dc_voucher;
-	dispatch_thread_set_self_t consume = (dc_flags & DISPATCH_OBJ_CONSUME_BIT);
-	dispatch_assert(DISPATCH_OBJ_CONSUME_BIT == DISPATCH_VOUCHER_CONSUME);
+	dispatch_thread_set_self_t consume = (dc_flags & DC_FLAG_CONSUME);
+	dispatch_assert(DC_FLAG_CONSUME == DISPATCH_VOUCHER_CONSUME);
 
 	if (consume) {
 		dc->dc_voucher = VOUCHER_INVALID;
@@ -611,17 +607,6 @@
 	if (likely(v != DISPATCH_NO_VOUCHER)) {
 		_dispatch_voucher_ktrace_dc_pop(dc, v);
 		_dispatch_voucher_debug("continuation[%p] adopt", v, dc);
-
-		if (likely(!(dc_flags & DISPATCH_OBJ_ENFORCE_VOUCHER))) {
-			if (unlikely(ov != DISPATCH_NO_VOUCHER && v != ov)) {
-				if (consume && v) _voucher_release(v);
-				consume = 0;
-				v = ov;
-			}
-		}
-	} else {
-		consume = 0;
-		v = ov;
 	}
 	(void)_dispatch_adopt_priority_and_set_voucher(dc->dc_priority, v,
 			consume | DISPATCH_VOUCHER_REPLACE);
@@ -759,17 +744,17 @@
 DISPATCH_ALWAYS_INLINE
 static inline void
 _dispatch_continuation_voucher_set(dispatch_continuation_t dc,
-		dispatch_queue_class_t dqu, dispatch_block_flags_t flags)
+		dispatch_block_flags_t flags)
 {
-	(void)dc; (void)dqu; (void)flags;
+	(void)dc; (void)flags;
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline void
-_dispatch_continuation_voucher_adopt(dispatch_continuation_t dc, voucher_t ov,
+_dispatch_continuation_voucher_adopt(dispatch_continuation_t dc,
 		uintptr_t dc_flags)
 {
-	(void)dc; (void)ov; (void)dc_flags;
+	(void)dc; (void)dc_flags;
 }
 
 #endif // VOUCHER_USE_MACH_VOUCHER
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 3a4684f..f530f04 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -192,8 +192,10 @@
 add_unit_test(dispatch_plusplus SOURCES dispatch_plusplus.cpp)
 
 # test-specific link options
-target_link_libraries(dispatch_group PRIVATE m)
-target_link_libraries(dispatch_timer_short PRIVATE m)
+if(NOT WIN32)
+  target_link_libraries(dispatch_group PRIVATE m)
+  target_link_libraries(dispatch_timer_short PRIVATE m)
+endif()
 
 # test-specific compile options
 set_target_properties(dispatch_c99 PROPERTIES C_STANDARD 99)
diff --git a/tests/dispatch_drift.c b/tests/dispatch_drift.c
index e483f36..0381cab 100644
--- a/tests/dispatch_drift.c
+++ b/tests/dispatch_drift.c
@@ -22,8 +22,8 @@
 #include <mach/mach_time.h>
 #endif
 #include <dispatch/dispatch.h>
-#include <sys/time.h>
 #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
+#include <sys/time.h>
 #include <unistd.h>
 #endif
 #include <stdio.h>
@@ -46,8 +46,13 @@
 	__block uint32_t count = 0;
 	__block double last_jitter = 0;
 	__block double drift_sum = 0;
+#if defined(_WIN32)
+	// 25 times a second (Windows timer resolution is poor)
+	uint64_t interval = 1000000000 / 25;
+#else
 	// 100 times a second
 	uint64_t interval = 1000000000 / 100;
+#endif
 	double interval_d = interval / 1000000000.0;
 	// for 25 seconds
 	unsigned int target = (unsigned int)(25.0 / interval_d);
diff --git a/tests/dispatch_timer_bit31.c b/tests/dispatch_timer_bit31.c
index eed17ae..a70c4f6 100644
--- a/tests/dispatch_timer_bit31.c
+++ b/tests/dispatch_timer_bit31.c
@@ -21,7 +21,9 @@
 #include <assert.h>
 #include <stdio.h>
 #include <string.h>
+#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
 #include <sys/time.h>
+#endif
 
 #include <dispatch/dispatch.h>
 
diff --git a/tests/dispatch_timer_bit63.c b/tests/dispatch_timer_bit63.c
index 84868ca..f01ca51 100644
--- a/tests/dispatch_timer_bit63.c
+++ b/tests/dispatch_timer_bit63.c
@@ -21,7 +21,9 @@
 #include <assert.h>
 #include <stdio.h>
 #include <string.h>
+#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
 #include <sys/time.h>
+#endif
 
 #include <dispatch/dispatch.h>
 
diff --git a/tests/dispatch_timer_set_time.c b/tests/dispatch_timer_set_time.c
index 5ffd63e..6f30b0c 100644
--- a/tests/dispatch_timer_set_time.c
+++ b/tests/dispatch_timer_set_time.c
@@ -18,11 +18,12 @@
  * @APPLE_APACHE_LICENSE_HEADER_END@
  */
 
-#include <sys/time.h>
 #include <assert.h>
 #include <stdio.h>
 #include <string.h>
+#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
 #include <sys/time.h>
+#endif
 
 #include <dispatch/dispatch.h>
 
diff --git a/tests/dispatch_timer_timeout.c b/tests/dispatch_timer_timeout.c
index f43409e..109bbff 100644
--- a/tests/dispatch_timer_timeout.c
+++ b/tests/dispatch_timer_timeout.c
@@ -21,7 +21,9 @@
 #include <assert.h>
 #include <stdio.h>
 #include <string.h>
+#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
 #include <sys/time.h>
+#endif
 
 #include <dispatch/dispatch.h>
 
diff --git a/tests/generic_win_port.c b/tests/generic_win_port.c
index d9a52f4..f84f9f9 100644
--- a/tests/generic_win_port.c
+++ b/tests/generic_win_port.c
@@ -183,18 +183,50 @@
 	return 0;
 }
 
+typedef void (WINAPI *QueryUnbiasedInterruptTimePreciseT)(PULONGLONG);
+static QueryUnbiasedInterruptTimePreciseT QueryUnbiasedInterruptTimePrecisePtr;
+
+static BOOL
+mach_absolute_time_init(PINIT_ONCE InitOnce, PVOID Parameter, PVOID *lpContext)
+{
+	// QueryUnbiasedInterruptTimePrecise() is declared in the Windows headers
+	// but it isn't available in any import libraries. We must manually load it
+	// from KernelBase.dll.
+	HMODULE kernelbase = LoadLibraryW(L"KernelBase.dll");
+	if (!kernelbase) {
+		print_winapi_error("LoadLibraryW", GetLastError());
+		abort();
+	}
+	QueryUnbiasedInterruptTimePrecisePtr =
+			(QueryUnbiasedInterruptTimePreciseT)GetProcAddress(kernelbase,
+					"QueryUnbiasedInterruptTimePrecise");
+	if (!QueryUnbiasedInterruptTimePrecisePtr) {
+		fprintf(stderr, "QueryUnbiasedInterruptTimePrecise is not available\n");
+		abort();
+	}
+	return TRUE;
+}
+
+uint64_t
+mach_absolute_time(void)
+{
+	static INIT_ONCE init_once = INIT_ONCE_STATIC_INIT;
+	if (!InitOnceExecuteOnce(&init_once, mach_absolute_time_init, NULL, NULL)) {
+		print_winapi_error("InitOnceExecuteOnce", GetLastError());
+		abort();
+	}
+	ULONGLONG result = 0;
+	QueryUnbiasedInterruptTimePrecisePtr(&result);
+	return result * 100;  // Convert from 100ns units
+}
+
 void
 print_winapi_error(const char *function_name, DWORD error)
 {
 	char *message = NULL;
 	DWORD len = FormatMessageA(
-			FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM,
-			NULL,
-			error,
-			0,
-			(LPSTR)&message,
-			0,
-			NULL);
+			FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM, NULL,
+			error, 0, (LPSTR)&message, 0, NULL);
 	if (len > 0) {
 		// Note: FormatMessage includes a newline at the end of the message
 		fprintf(stderr, "%s: %s", function_name, message);
@@ -214,10 +246,6 @@
 int
 usleep(unsigned int usec)
 {
-	DWORD ms = usec / 1000;
-	if (ms == 0 && usec != 0) {
-		ms = 1;
-	}
-	Sleep(ms);
+	Sleep((usec + 999) / 1000);
 	return 0;
 }
diff --git a/tests/generic_win_port.h b/tests/generic_win_port.h
index cf96a21..41c076c 100644
--- a/tests/generic_win_port.h
+++ b/tests/generic_win_port.h
@@ -12,6 +12,14 @@
 typedef long ssize_t;
 #endif
 
+struct mach_timebase_info {
+	uint32_t numer;
+	uint32_t denom;
+};
+
+typedef struct mach_timebase_info *mach_timebase_info_t;
+typedef struct mach_timebase_info mach_timebase_info_data_t;
+
 static inline int32_t
 OSAtomicIncrement32(volatile int32_t *var)
 {
@@ -45,6 +53,18 @@
 int
 gettimeofday(struct timeval *tp, void *tzp);
 
+uint64_t
+mach_absolute_time(void);
+
+static inline
+int
+mach_timebase_info(mach_timebase_info_t tbi)
+{
+	tbi->numer = 1;
+	tbi->denom = 1;
+	return 0;
+}
+
 void
 print_winapi_error(const char *function_name, DWORD error);
 
diff --git a/xcodeconfig/libdispatch-dyld-stub.xcconfig b/xcodeconfig/libdispatch-dyld-stub.xcconfig
index dd1814d..763bafe 100644
--- a/xcodeconfig/libdispatch-dyld-stub.xcconfig
+++ b/xcodeconfig/libdispatch-dyld-stub.xcconfig
@@ -21,7 +21,7 @@
 PRODUCT_NAME = libdispatch_dyld_stub
 INSTALL_PATH = /usr/local/lib/dyld_stub
 BUILD_VARIANTS = normal
-GCC_PREPROCESSOR_DEFINITIONS = $(inherited) DISPATCH_VARIANT_DYLD_STUB=1 $(STATICLIB_PREPROCESSOR_DEFINITIONS)
+GCC_PREPROCESSOR_DEFINITIONS = $(GCC_PREPROCESSOR_DEFINITIONS) DISPATCH_VARIANT_DYLD_STUB=1 $(STATICLIB_PREPROCESSOR_DEFINITIONS)
 OTHER_LDFLAGS =
 VERSIONING_SYSTEM =
 EXCLUDED_SOURCE_FILE_NAMES = *
diff --git a/xcodeconfig/libdispatch-introspection.xcconfig b/xcodeconfig/libdispatch-introspection.xcconfig
index c7826d5..a2f98f9 100644
--- a/xcodeconfig/libdispatch-introspection.xcconfig
+++ b/xcodeconfig/libdispatch-introspection.xcconfig
@@ -21,6 +21,6 @@
 BUILD_VARIANTS = normal
 INSTALL_PATH = /usr/lib/system/introspection
 
-GCC_PREPROCESSOR_DEFINITIONS = $(inherited) DISPATCH_INTROSPECTION=1
+GCC_PREPROCESSOR_DEFINITIONS = $(GCC_PREPROCESSOR_DEFINITIONS) DISPATCH_INTROSPECTION=1
 CONFIGURATION_BUILD_DIR = $(BUILD_DIR)/introspection
 OTHER_LDFLAGS = $(OTHER_LDFLAGS) -Wl,-interposable_list,$(SRCROOT)/xcodeconfig/libdispatch.interposable
diff --git a/xcodeconfig/libdispatch-mp-static.xcconfig b/xcodeconfig/libdispatch-mp-static.xcconfig
index af3715f..22dc9c2 100644
--- a/xcodeconfig/libdispatch-mp-static.xcconfig
+++ b/xcodeconfig/libdispatch-mp-static.xcconfig
@@ -23,7 +23,7 @@
 PRODUCT_NAME = libdispatch
 INSTALL_PATH = /usr/local/lib/system
 BUILD_VARIANTS = normal debug
-GCC_PREPROCESSOR_DEFINITIONS = $(inherited) $(STATICLIB_PREPROCESSOR_DEFINITIONS)
+GCC_PREPROCESSOR_DEFINITIONS = $(GCC_PREPROCESSOR_DEFINITIONS) $(STATICLIB_PREPROCESSOR_DEFINITIONS)
 OTHER_LDFLAGS =
 SKIP_INSTALL[sdk=*simulator*] = YES
 EXCLUDED_SOURCE_FILE_NAMES[sdk=*simulator*] = *
diff --git a/xcodeconfig/libdispatch-resolved.xcconfig b/xcodeconfig/libdispatch-resolved.xcconfig
index 2f2e273..2d509c5 100644
--- a/xcodeconfig/libdispatch-resolved.xcconfig
+++ b/xcodeconfig/libdispatch-resolved.xcconfig
@@ -18,7 +18,7 @@
 // @APPLE_APACHE_LICENSE_HEADER_END@
 //
 
-SUPPORTED_PLATFORMS = iphoneos appletvos watchos
+SUPPORTED_PLATFORMS = iphoneos
 PRODUCT_NAME = libdispatch_$(DISPATCH_RESOLVED_VARIANT)
 OTHER_LDFLAGS =
 SKIP_INSTALL = YES
diff --git a/xcodeconfig/libdispatch-up-static.xcconfig b/xcodeconfig/libdispatch-up-static.xcconfig
deleted file mode 100644
index 170c5b3..0000000
--- a/xcodeconfig/libdispatch-up-static.xcconfig
+++ /dev/null
@@ -1,28 +0,0 @@
-//
-// Copyright (c) 2012-2013 Apple Inc. All rights reserved.
-//
-// @APPLE_APACHE_LICENSE_HEADER_START@
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// @APPLE_APACHE_LICENSE_HEADER_END@
-//
-
-// skip simulator
-SUPPORTED_PLATFORMS = macosx iphoneos appletvos watchos
-PRODUCT_NAME = libdispatch_up
-BUILD_VARIANTS = normal
-GCC_PREPROCESSOR_DEFINITIONS = $(inherited) DISPATCH_HW_CONFIG_UP=1 $(STATICLIB_PREPROCESSOR_DEFINITIONS)
-OTHER_LDFLAGS =
-SKIP_INSTALL = YES
-EXCLUDED_SOURCE_FILE_NAMES[sdk=*simulator*] = *
diff --git a/xcodeconfig/libdispatch.clean b/xcodeconfig/libdispatch.clean
new file mode 100644
index 0000000..c6ba14c
--- /dev/null
+++ b/xcodeconfig/libdispatch.clean
@@ -0,0 +1,48 @@
+#
+# Copyright (c) 2018 Apple Inc. All rights reserved.
+#
+# @APPLE_APACHE_LICENSE_HEADER_START@
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# @APPLE_APACHE_LICENSE_HEADER_END@
+#
+
+__dispatch_bug.last_seen
+__dispatch_bug_deprecated.last_seen
+__dispatch_bug_kevent_client.last_seen
+__dispatch_bug_kevent_client.last_seen.37
+__dispatch_bug_kevent_client.last_seen.39
+__dispatch_bug_kevent_vanished.last_seen
+__dispatch_bug_mach_client.last_seen
+
+__dispatch_build_pred
+__dispatch_build
+
+__dispatch_child_of_unsafe_fork
+__dispatch_continuation_cache_limit
+__dispatch_data_empty
+__dispatch_host_time_data.0
+__dispatch_host_time_data.1
+__dispatch_host_time_mach2nano
+__dispatch_host_time_nano2mach
+__dispatch_source_timer_use_telemetry
+__dispatch_timers_force_max_leeway
+__os_object_debug_missing_pools
+_dispatch_benchmark_f.bdata
+_dispatch_benchmark_f.pred
+_dispatch_io_defaults
+_dispatch_log_disabled
+_dispatch_logfile
+
+__dyld_private
diff --git a/xcodeconfig/libdispatch.dirty b/xcodeconfig/libdispatch.dirty
new file mode 100644
index 0000000..d8d1a0d
--- /dev/null
+++ b/xcodeconfig/libdispatch.dirty
@@ -0,0 +1,153 @@
+#
+# Copyright (c) 2013 Apple Inc. All rights reserved.
+#
+# @APPLE_APACHE_LICENSE_HEADER_START@
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# @APPLE_APACHE_LICENSE_HEADER_END@
+#
+
+# Must be kept in sync with ObjC TFB checks in object_internal.h
+
+# dispatch_object_t classes
+_OBJC_CLASS_$_OS_dispatch_object
+_OBJC_CLASS_$_OS_dispatch_semaphore
+__OS_dispatch_semaphore_vtable
+_OBJC_CLASS_$_OS_dispatch_group
+__OS_dispatch_group_vtable
+_OBJC_CLASS_$_OS_dispatch_queue
+__OS_dispatch_queue_vtable
+_OBJC_CLASS_$_OS_dispatch_workloop
+__OS_dispatch_workloop_vtable
+_OBJC_CLASS_$_OS_dispatch_queue_serial
+__OS_dispatch_queue_serial_vtable
+_OBJC_CLASS_$_OS_dispatch_queue_concurrent
+__OS_dispatch_queue_concurrent_vtable
+_OBJC_CLASS_$_OS_dispatch_queue_global
+__OS_dispatch_queue_global_vtable
+_OBJC_CLASS_$_OS_dispatch_queue_pthread_root
+__OS_dispatch_queue_pthread_root_vtable
+_OBJC_CLASS_$_OS_dispatch_queue_main
+__OS_dispatch_queue_main_vtable
+_OBJC_CLASS_$_OS_dispatch_queue_runloop
+__OS_dispatch_queue_runloop_vtable
+_OBJC_CLASS_$_OS_dispatch_queue_mgr
+__OS_dispatch_queue_mgr_vtable
+_OBJC_CLASS_$_OS_dispatch_queue_attr
+__OS_dispatch_queue_attr_vtable
+_OBJC_CLASS_$_OS_dispatch_source
+__OS_dispatch_source_vtable
+_OBJC_CLASS_$_OS_dispatch_mach
+__OS_dispatch_mach_vtable
+_OBJC_CLASS_$_OS_dispatch_mach_msg
+__OS_dispatch_mach_msg_vtable
+_OBJC_CLASS_$_OS_dispatch_io
+__OS_dispatch_io_vtable
+_OBJC_CLASS_$_OS_dispatch_operation
+__OS_dispatch_operation_vtable
+_OBJC_CLASS_$_OS_dispatch_disk
+__OS_dispatch_disk_vtable
+# os_object_t classes
+_OBJC_CLASS_$_OS_object
+_OBJC_CLASS_$_OS_voucher
+#_OBJC_CLASS_$_OS_voucher_recipe
+# non-os_object_t classes
+_OBJC_CLASS_$_OS_dispatch_data
+_OBJC_CLASS_$_OS_dispatch_data_empty
+# metaclasses
+_OBJC_METACLASS_$_OS_dispatch_object
+_OBJC_METACLASS_$_OS_dispatch_semaphore
+_OBJC_METACLASS_$_OS_dispatch_group
+_OBJC_METACLASS_$_OS_dispatch_queue
+_OBJC_METACLASS_$_OS_dispatch_workloop
+_OBJC_METACLASS_$_OS_dispatch_queue_serial
+_OBJC_METACLASS_$_OS_dispatch_queue_concurrent
+_OBJC_METACLASS_$_OS_dispatch_queue_global
+_OBJC_METACLASS_$_OS_dispatch_queue_pthread_root
+_OBJC_METACLASS_$_OS_dispatch_queue_main
+_OBJC_METACLASS_$_OS_dispatch_queue_runloop
+_OBJC_METACLASS_$_OS_dispatch_queue_mgr
+_OBJC_METACLASS_$_OS_dispatch_queue_attr
+_OBJC_METACLASS_$_OS_dispatch_source
+_OBJC_METACLASS_$_OS_dispatch_mach
+_OBJC_METACLASS_$_OS_dispatch_mach_msg
+_OBJC_METACLASS_$_OS_dispatch_io
+_OBJC_METACLASS_$_OS_dispatch_operation
+_OBJC_METACLASS_$_OS_dispatch_disk
+_OBJC_METACLASS_$_OS_object
+_OBJC_METACLASS_$_OS_voucher
+#_OBJC_METACLASS_$_OS_voucher_recipe
+_OBJC_METACLASS_$_OS_dispatch_data
+_OBJC_METACLASS_$_OS_dispatch_data_empty
+
+# Other dirty symbols
+# large structs / hashes
+__dispatch_main_q
+__dispatch_mgr_q
+__dispatch_mgr_sched
+__dispatch_root_queues
+__dispatch_sources
+__dispatch_timers_heap
+__dispatch_trace_next_timer
+__voucher_hash
+
+# 64 bits
+__dispatch_narrow_check_interval_cache
+__dispatch_narrowing_deadlines
+__voucher_aid_next
+__voucher_unique_pid
+
+# pointer sized
+__dispatch_begin_NSAutoReleasePool
+__dispatch_continuation_alloc_init_pred
+__dispatch_end_NSAutoReleasePool
+__dispatch_is_daemon_pred
+__dispatch_kq_poll_pred
+__dispatch_logv_pred
+__dispatch_mach_calendar_pred
+__dispatch_mach_host_port_pred
+__dispatch_mach_notify_port_pred
+__dispatch_mach_xpc_hooks
+__dispatch_main_heap
+__dispatch_main_q_handle_pred
+__dispatch_mgr_sched_pred
+__dispatch_queue_serial_numbers
+__dispatch_root_queues_pred
+__dispatch_source_timer_telemetry_pred
+__firehose_task_buffer
+__firehose_task_buffer_pred
+__voucher_activity_debug_channel
+__voucher_libtrace_hooks
+__voucher_task_mach_voucher_pred
+
+# 32bits
+__dispatch_mach_host_port
+__dispatch_mach_notify_port
+__voucher_default_task_mach_voucher
+__voucher_hash_lock
+__voucher_task_mach_voucher
+
+# byte-sized
+__dispatch_is_daemon
+__dispatch_memory_warn
+__dispatch_mode
+__dispatch_program_is_probably_callback_driven
+__dispatch_unsafe_fork
+__dispatch_use_dispatch_alloc
+
+__dispatch_io_devs
+__dispatch_io_fds
+__dispatch_io_devs_lockq
+__dispatch_io_fds_lockq
+__dispatch_io_init_pred
diff --git a/xcodeconfig/libdispatch.order b/xcodeconfig/libdispatch.order
index 9642ca4..b586837 100644
--- a/xcodeconfig/libdispatch.order
+++ b/xcodeconfig/libdispatch.order
@@ -28,20 +28,22 @@
 __OS_dispatch_group_vtable
 _OBJC_CLASS_$_OS_dispatch_queue
 __OS_dispatch_queue_vtable
+_OBJC_CLASS_$_OS_dispatch_workloop
+__OS_dispatch_workloop_vtable
 _OBJC_CLASS_$_OS_dispatch_queue_serial
 __OS_dispatch_queue_serial_vtable
 _OBJC_CLASS_$_OS_dispatch_queue_concurrent
 __OS_dispatch_queue_concurrent_vtable
-_OBJC_CLASS_$_OS_dispatch_queue_root
-__OS_dispatch_queue_root_vtable
+_OBJC_CLASS_$_OS_dispatch_queue_global
+__OS_dispatch_queue_global_vtable
+_OBJC_CLASS_$_OS_dispatch_queue_pthread_root
+__OS_dispatch_queue_pthread_root_vtable
 _OBJC_CLASS_$_OS_dispatch_queue_main
 __OS_dispatch_queue_main_vtable
 _OBJC_CLASS_$_OS_dispatch_queue_runloop
 __OS_dispatch_queue_runloop_vtable
 _OBJC_CLASS_$_OS_dispatch_queue_mgr
 __OS_dispatch_queue_mgr_vtable
-_OBJC_CLASS_$_OS_dispatch_queue_specific_queue
-__OS_dispatch_queue_specific_queue_vtable
 _OBJC_CLASS_$_OS_dispatch_queue_attr
 __OS_dispatch_queue_attr_vtable
 _OBJC_CLASS_$_OS_dispatch_source
@@ -68,13 +70,14 @@
 _OBJC_METACLASS_$_OS_dispatch_semaphore
 _OBJC_METACLASS_$_OS_dispatch_group
 _OBJC_METACLASS_$_OS_dispatch_queue
+_OBJC_METACLASS_$_OS_dispatch_workloop
 _OBJC_METACLASS_$_OS_dispatch_queue_serial
 _OBJC_METACLASS_$_OS_dispatch_queue_concurrent
-_OBJC_METACLASS_$_OS_dispatch_queue_root
+_OBJC_METACLASS_$_OS_dispatch_queue_global
+_OBJC_METACLASS_$_OS_dispatch_queue_pthread_root
 _OBJC_METACLASS_$_OS_dispatch_queue_main
 _OBJC_METACLASS_$_OS_dispatch_queue_runloop
 _OBJC_METACLASS_$_OS_dispatch_queue_mgr
-_OBJC_METACLASS_$_OS_dispatch_queue_specific_queue
 _OBJC_METACLASS_$_OS_dispatch_queue_attr
 _OBJC_METACLASS_$_OS_dispatch_source
 _OBJC_METACLASS_$_OS_dispatch_mach
diff --git a/xcodeconfig/libdispatch.xcconfig b/xcodeconfig/libdispatch.xcconfig
index 643e1d3..f473b8f 100644
--- a/xcodeconfig/libdispatch.xcconfig
+++ b/xcodeconfig/libdispatch.xcconfig
@@ -73,13 +73,14 @@
 CLANG_WARN_SUSPICIOUS_IMPLICIT_CONVERSION = YES
 CLANG_WARN_SUSPICIOUS_MOVE = YES
 CLANG_WARN_UNREACHABLE_CODE = YES
+CLANG_WARN_UNGUARDED_AVAILABILITY = YES
 GCC_TREAT_WARNINGS_AS_ERRORS = YES
 GCC_OPTIMIZATION_LEVEL = s
 GCC_NO_COMMON_BLOCKS = YES
-GCC_PREPROCESSOR_DEFINITIONS = __DARWIN_NON_CANCELABLE=1 $(DISPATCH_PREPROCESSOR_DEFINITIONS)
+GCC_PREPROCESSOR_DEFINITIONS = __DARWIN_NON_CANCELABLE=1
 STATICLIB_PREPROCESSOR_DEFINITIONS = DISPATCH_VARIANT_STATIC=1 USE_OBJC=0 DISPATCH_USE_DTRACE=0
-WARNING_CFLAGS = -Wall -Wextra -Warray-bounds-pointer-arithmetic -Watomic-properties -Wcomma -Wconditional-uninitialized -Wcovered-switch-default -Wdate-time -Wdeprecated -Wdouble-promotion -Wduplicate-enum -Wexpansion-to-defined -Wfloat-equal -Widiomatic-parentheses -Wignored-qualifiers -Wimplicit-fallthrough -Wnullable-to-nonnull-conversion -Wobjc-interface-ivars -Wover-aligned -Wpacked -Wpointer-arith -Wselector -Wstatic-in-inline -Wsuper-class-method-mismatch -Wswitch-enum -Wtautological-compare -Wunguarded-availability -Wunused -Wno-unknown-warning-option $(NO_WARNING_CFLAGS)
-NO_WARNING_CFLAGS = -Wno-pedantic -Wno-bad-function-cast -Wno-c++-compat -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-cast-align -Wno-cast-qual -Wno-disabled-macro-expansion -Wno-documentation-unknown-command -Wno-format-nonliteral -Wno-missing-variable-declarations -Wno-old-style-cast -Wno-padded -Wno-reserved-id-macro -Wno-shift-sign-overflow -Wno-undef -Wno-unreachable-code-aggressive -Wno-unused-macros -Wno-used-but-marked-unused -Wno-vla
+WARNING_CFLAGS = -Wall -Wextra -Warray-bounds-pointer-arithmetic -Watomic-properties -Wcomma -Wconditional-uninitialized -Wcovered-switch-default -Wdate-time -Wdeprecated -Wdouble-promotion -Wduplicate-enum -Wexpansion-to-defined -Wfloat-equal -Widiomatic-parentheses -Wignored-qualifiers -Wnullable-to-nonnull-conversion -Wobjc-interface-ivars -Wover-aligned -Wpacked -Wpointer-arith -Wselector -Wstatic-in-inline -Wsuper-class-method-mismatch -Wswitch-enum -Wtautological-compare -Wunused -Wno-unknown-warning-option $(NO_WARNING_CFLAGS)
+NO_WARNING_CFLAGS = -Wno-pedantic -Wno-bad-function-cast -Wno-c++-compat -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-cast-align -Wno-cast-qual -Wno-disabled-macro-expansion -Wno-documentation-unknown-command -Wno-format-nonliteral -Wno-missing-variable-declarations -Wno-old-style-cast -Wno-padded -Wno-reserved-id-macro -Wno-shift-sign-overflow -Wno-undef -Wno-unreachable-code-aggressive -Wno-unused-macros -Wno-used-but-marked-unused -Wno-vla -Wno-unguarded-availability-new
 OTHER_CFLAGS = -fverbose-asm -isystem $(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders $(PLATFORM_CFLAGS)
 OTHER_CFLAGS[arch=i386][sdk=macosx*] = $(OTHER_CFLAGS) -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-exceptions
 OTHER_CFLAGS_normal = -momit-leaf-frame-pointer
@@ -89,7 +90,14 @@
 DYLIB_CURRENT_VERSION = $(CURRENT_PROJECT_VERSION)
 SIM_SUFFIX[sdk=*simulator*] = _sim
 DYLIB_LDFLAGS = -umbrella System -nodefaultlibs -ldyld -lcompiler_rt -lsystem$(SIM_SUFFIX)_kernel -lsystem$(SIM_SUFFIX)_platform -lsystem$(SIM_SUFFIX)_pthread -lsystem_malloc -lsystem_c -lsystem_blocks -lunwind
-OBJC_LDFLAGS = -Wl,-upward-lobjc -Wl,-order_file,$(SRCROOT)/xcodeconfig/libdispatch.order
+OBJC_LDFLAGS = -Wl,-upward-lobjc
+LIBDARWIN_LDFLAGS = -Wl,-upward-lsystem_darwin
+LIBDARWIN_LDFLAGS[sdk=*simulator*] =
+ORDER_LDFLAGS = -Wl,-order_file,$(SRCROOT)/xcodeconfig/libdispatch.order -Wl,-dirty_data_list,$(SRCROOT)/xcodeconfig/libdispatch.dirty
+ORDER_LDFLAGS[sdk=macosx*] = -Wl,-order_file,$(SRCROOT)/xcodeconfig/libdispatch.order
 ALIASES_LDFLAGS = -Wl,-alias_list,$(SRCROOT)/xcodeconfig/libdispatch.aliases
-OTHER_LDFLAGS = $(OTHER_LDFLAGS) $(DYLIB_LDFLAGS) $(CR_LDFLAGS) $(OBJC_LDFLAGS) $(ALIASES_LDFLAGS) $(PLATFORM_LDFLAGS)
+OTHER_LDFLAGS = $(OTHER_LDFLAGS) $(LIBDARWIN_LDFLAGS) $(DYLIB_LDFLAGS) $(CR_LDFLAGS) $(OBJC_LDFLAGS) $(ALIASES_LDFLAGS) $(PLATFORM_LDFLAGS) $(ORDER_LDFLAGS)
 OTHER_MIGFLAGS = -novouchers
+
+COPY_HEADERS_RUN_UNIFDEF = YES
+COPY_HEADERS_UNIFDEF_FLAGS = -U__DISPATCH_BUILDING_DISPATCH__ -U__linux__ -DTARGET_OS_WIN32=0 -U__ANDROID__
diff --git a/xcodeconfig/libfirehose.xcconfig b/xcodeconfig/libfirehose.xcconfig
index 4c71199..547b13a 100644
--- a/xcodeconfig/libfirehose.xcconfig
+++ b/xcodeconfig/libfirehose.xcconfig
@@ -21,7 +21,7 @@
 SUPPORTED_PLATFORMS = macosx iphoneos iphonesimulator appletvos appletvsimulator watchos watchsimulator
 PRODUCT_NAME = $(TARGET_NAME)
 INSTALL_PATH = /usr/local/lib/
-GCC_PREPROCESSOR_DEFINITIONS = $(inherited) FIREHOSE_SERVER=1 DISPATCH_USE_DTRACE=0
+GCC_PREPROCESSOR_DEFINITIONS = $(GCC_PREPROCESSOR_DEFINITIONS) FIREHOSE_SERVER=1 DISPATCH_USE_DTRACE=0
 OTHER_MIGFLAGS = -novouchers
 OTHER_LDFLAGS =
 PUBLIC_HEADERS_FOLDER_PATH = /usr/include/os
diff --git a/xcodeconfig/libfirehose_kernel.xcconfig b/xcodeconfig/libfirehose_kernel.xcconfig
index c572f80..e6d83a3 100644
--- a/xcodeconfig/libfirehose_kernel.xcconfig
+++ b/xcodeconfig/libfirehose_kernel.xcconfig
@@ -18,16 +18,20 @@
 // @APPLE_APACHE_LICENSE_HEADER_END@
 //
 
-#include "libfirehose.xcconfig"
-
 SUPPORTED_PLATFORMS = macosx iphoneos appletvos watchos
 PRODUCT_NAME = $(TARGET_NAME)
 INSTALL_PATH = /usr/local/lib/kernel/
-GCC_PREPROCESSOR_DEFINITIONS = $(inherited) KERNEL=1 DISPATCH_USE_DTRACE=0
+GCC_PREPROCESSOR_DEFINITIONS = $(GCC_PREPROCESSOR_DEFINITIONS) KERNEL=1 DISPATCH_USE_DTRACE=0
+OTHER_MIGFLAGS = -novouchers
+OTHER_LDFLAGS =
 OTHER_CFLAGS = -mkernel -nostdinc -Wno-packed
-// LLVM_LTO = YES
+PUBLIC_HEADERS_FOLDER_PATH = /usr/include/os
 PRIVATE_HEADERS_FOLDER_PATH = /usr/local/include/kernel/os
 HEADER_SEARCH_PATHS = $(PROJECT_DIR) $(SDKROOT)/System/Library/Frameworks/Kernel.framework/PrivateHeaders $(SDKROOT)/System/Library/Frameworks/Kernel.framework/Headers $(SDKROOT)/usr/local/include/os $(SDKROOT)/usr/local/include/firehose
+STRIP_INSTALLED_PRODUCT = NO
+COPY_PHASE_STRIP = NO
+SEPARATE_STRIP = NO
+VALID_ARCHS[sdk=macosx*] = $(NATIVE_ARCH_ACTUAL)
 
 COPY_HEADERS_RUN_UNIFDEF = YES
 COPY_HEADERS_UNIFDEF_FLAGS = -DKERNEL=1 -DOS_FIREHOSE_SPI=1 -DOS_VOUCHER_ACTIVITY_SPI_TYPES=1 -UOS_VOUCHER_ACTIVITY_SPI
diff --git a/xcodescripts/check-order.sh b/xcodescripts/check-order.sh
new file mode 100644
index 0000000..60cb9eb
--- /dev/null
+++ b/xcodescripts/check-order.sh
@@ -0,0 +1,90 @@
+#!/bin/bash -e
+#
+# Copyright (c) 2018 Apple Inc. All rights reserved.
+#
+# @APPLE_APACHE_LICENSE_HEADER_START@
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# @APPLE_APACHE_LICENSE_HEADER_END@
+#
+
+test "$ACTION" = install || exit 0
+
+list_objc_syms ()
+{
+    nm -arch $1 -nU ${DSTROOT}/usr/lib/system/libdispatch.dylib | grep _OBJC | cut -d' ' -f3
+}
+
+list_mutable_data_syms ()
+{
+    nm -arch $1 -m ${DSTROOT}/usr/lib/system/libdispatch.dylib |grep __DATA|egrep -v '(__const|__crash_info)'|sed 's/^.* //'
+}
+
+list_objc_order ()
+{
+    grep '^_OBJC' "${SCRIPT_INPUT_FILE_0}"
+}
+
+list_dirty_order ()
+{
+    grep '^[^#]' "${SCRIPT_INPUT_FILE_1}"
+}
+
+list_clean_order ()
+{
+    grep '^[^#]' "${SCRIPT_INPUT_FILE_2}"
+}
+
+fail=
+
+case "$PLATFORM_NAME" in
+    *simulator) exit 0;;
+    *) ;;
+esac
+
+if comm -12 <(list_dirty_order | sort) <(list_clean_order | sort) | grep .; then
+    echo 1>&2 "error: *** SYMBOLS CAN'T BE BOTH CLEAN AND DIRTY ***"
+    comm 1>&2 -12 <(list_dirty_order | sort) <(list_clean_order | sort)
+    fail=t
+fi
+
+for arch in $ARCHS; do
+    if test "$PLATFORM_NAME" = macosx -a "$arch" = i386; then
+        continue
+    fi
+
+    if list_mutable_data_syms $arch | sort | uniq -c | grep -qvw 1; then
+        echo 1>&2 "error: *** DUPLICATED SYMBOL NAMES FOR SLICE $arch ***"
+        list_mutable_data_syms $arch | sort | uniq -c | grep -qw 1 1>&2
+        fail=t
+    fi
+
+    if comm -23 <(list_mutable_data_syms $arch | sort) <((list_dirty_order; list_clean_order) | sort) | grep -q .; then
+        echo 1>&2 "error: *** SYMBOLS NOT MARKED CLEAN OR DIRTY FOR SLICE $arch ***"
+        comm 1>&2 -23 <(list_mutable_data_syms $arch | sort) <((list_dirty_order; list_clean_order) | sort)
+        fail=t
+    fi
+
+    if comm -13 <(list_mutable_data_syms $arch | sort) <((list_dirty_order; list_clean_order) | sort) | grep -q .; then
+        echo 1>&2 "warning: *** Found unknown symbols in dirty/clean files for slice $arch ***"
+        comm 1>&2 -13 <(list_mutable_data_syms $arch | sort) <((list_dirty_order; list_clean_order) | sort)
+    fi
+
+    if ! cmp -s <(list_objc_syms $arch) <(list_objc_order); then
+        echo 1>&2 "error: *** SYMBOL ORDER IS NOT WHAT IS EXPECTED FOR SLICE $arch ***"
+        diff 1>&2 -U100 <(list_objc_syms $arch) <(list_objc_order) || fail=t
+    fi
+done
+
+test -z "$fail"
diff --git a/xcodescripts/mig-headers.sh b/xcodescripts/mig-headers.sh
index 003e9f2..bd477c0 100755
--- a/xcodescripts/mig-headers.sh
+++ b/xcodescripts/mig-headers.sh
@@ -22,6 +22,11 @@
 export MIGCC="$(xcrun -find cc)"
 export MIGCOM="$(xcrun -find migcom)"
 export PATH="${PLATFORM_DEVELOPER_BIN_DIR}:${DEVELOPER_BIN_DIR}:${PATH}"
+
+for p in ${HEADER_SEARCH_PATHS}; do
+	OTHER_MIGFLAGS="${OTHER_MIGFLAGS} -I${p}"
+done
+
 for a in ${ARCHS}; do
 	xcrun mig ${OTHER_MIGFLAGS} -arch $a -header "${SCRIPT_OUTPUT_FILE_0}" \
 			-sheader "${SCRIPT_OUTPUT_FILE_1}" -user /dev/null \