Merge pull request #289 from apple/das-darwin-libdispatch-913.1.4-merge-master

Merge darwin/libdispatch-913.1.4 to master
diff --git a/PATCHES b/PATCHES
index 963c8b1..c3d28b3 100644
--- a/PATCHES
+++ b/PATCHES
@@ -332,3 +332,24 @@
 [5e8789e] APPLIED rdar://32283666
 [3fba60a] APPLIED rdar://32283666
 [d6eb245] APPLIED rdar://32283666
+[0b6c22e] APPLIED rdar://33531111
+[5a3c02a] APPLIED rdar://33531111
+[22df1e7] APPLIED rdar://33531111
+[21273de] APPLIED rdar://33531111
+[dc1857c] APPLIED rdar://33531111
+[56f36b6] APPLIED rdar://33531111
+[c87c6bb] APPLIED rdar://33531111
+[b791d23] APPLIED rdar://33531111
+[c2d0c49] APPLIED rdar://33531111
+[1d25040] APPLIED rdar://33531111
+[ab89c6c] APPLIED rdar://33531111
+[e591e7e] APPLIED rdar://33531111
+[ded5bab] APPLIED rdar://33531111
+[ce90d0c] APPLIED rdar://33531111
+[69c8f3e] APPLIED rdar://33531111
+[23a3a84] APPLIED rdar://33531111
+[79b7529] APPLIED rdar://33531111
+[f8e71eb] APPLIED rdar://33531111
+[8947dcf] APPLIED rdar://33531111
+[5ad9208] APPLIED rdar://33531111
+[698d085] APPLIED rdar://33531111
diff --git a/dispatch/queue.h b/dispatch/queue.h
index 606bd30..8dab75f 100644
--- a/dispatch/queue.h
+++ b/dispatch/queue.h
@@ -223,7 +223,7 @@
  * @abstract
  * Constant to pass to dispatch_apply() or dispatch_apply_f() to request that
  * the system automatically use worker threads that match the configuration of
- * the current thread most closely.
+ * the current thread as closely as possible.
  *
  * @discussion
  * When submitting a block for parallel invocation, passing this constant as the
diff --git a/libdispatch.xcodeproj/project.pbxproj b/libdispatch.xcodeproj/project.pbxproj
index 361994f..e136647 100644
--- a/libdispatch.xcodeproj/project.pbxproj
+++ b/libdispatch.xcodeproj/project.pbxproj
@@ -741,10 +741,12 @@
 		96DF70BD0F38FE3C0074BD99 /* once.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; lineEnding = 0; path = once.c; sourceTree = "<group>"; xcLanguageSpecificationIdentifier = xcode.lang.c; };
 		B63B793F1E8F004F0060C1E1 /* dispatch_no_blocks.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = dispatch_no_blocks.c; sourceTree = "<group>"; };
 		B68330BC1EBCF6080003E71C /* dispatch_wl.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = dispatch_wl.c; sourceTree = "<group>"; };
+		B69878521F06F8790088F94F /* dispatch_signals.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = dispatch_signals.c; sourceTree = "<group>"; };
 		B6AC73FD1EB10973009FB2F2 /* perf_thread_request.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = perf_thread_request.c; sourceTree = "<group>"; };
 		B6AE9A4A1D7F53B300AC007F /* dispatch_queue_create.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = dispatch_queue_create.c; sourceTree = "<group>"; };
 		B6AE9A561D7F53C100AC007F /* perf_async_bench.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = perf_async_bench.m; sourceTree = "<group>"; };
 		B6AE9A581D7F53CB00AC007F /* perf_bench.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = perf_bench.m; sourceTree = "<group>"; };
+		B6FA01801F0AD522004479BF /* dispatch_pthread_root_queue.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = dispatch_pthread_root_queue.c; sourceTree = "<group>"; };
 		C00B0E0A1C5AEBBE000330B3 /* libdispatch_dyld_stub.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libdispatch_dyld_stub.a; sourceTree = BUILT_PRODUCTS_DIR; };
 		C00B0E121C5AEBF7000330B3 /* libdispatch-dyld-stub.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; path = "libdispatch-dyld-stub.xcconfig"; sourceTree = "<group>"; };
 		C01866BD1C5973210040FC07 /* libdispatch.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libdispatch.a; sourceTree = BUILT_PRODUCTS_DIR; };
@@ -1060,6 +1062,7 @@
 				6E67D9151C1768B300FC98AC /* dispatch_pingpong.c */,
 				6E326B441C239B61002A6505 /* dispatch_priority.c */,
 				6E326AB51C225477002A6505 /* dispatch_proc.c */,
+				B6FA01801F0AD522004479BF /* dispatch_pthread_root_queue.c */,
 				6E326AB31C224870002A6505 /* dispatch_qos.c */,
 				B6AE9A4A1D7F53B300AC007F /* dispatch_queue_create.c */,
 				6E67D9111C17669C00FC98AC /* dispatch_queue_finalizer.c */,
@@ -1069,6 +1072,7 @@
 				6E326ADC1C234396002A6505 /* dispatch_readsync.c */,
 				6E8E4E6D1C1A35EE0004F5CC /* dispatch_select.c */,
 				6E8E4E9B1C1A4EF10004F5CC /* dispatch_sema.c */,
+				B69878521F06F8790088F94F /* dispatch_signals.c */,
 				6EA2CB841C005DEF0076794A /* dispatch_source.c */,
 				6E326AE01C234780002A6505 /* dispatch_starfish.c */,
 				6EE89F3D1BFAF5B000EB140D /* dispatch_state_machine.c */,
diff --git a/man/dispatch_apply.3 b/man/dispatch_apply.3
index 5a43a0a..57c99a8 100644
--- a/man/dispatch_apply.3
+++ b/man/dispatch_apply.3
@@ -1,4 +1,4 @@
-.\" Copyright (c) 2008-2010 Apple Inc. All rights reserved.
+.\" Copyright (c) 2008-2017 Apple Inc. All rights reserved.
 .Dd May 1, 2009
 .Dt dispatch_apply 3
 .Os Darwin
@@ -20,21 +20,32 @@
 .Fn dispatch_apply
 function provides data-level concurrency through a "for (;;)" loop like primitive:
 .Bd -literal
-dispatch_queue_t the_queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
 size_t iterations = 10;
 
 // 'idx' is zero indexed, just like:
 // for (idx = 0; idx < iterations; idx++)
 
-dispatch_apply(iterations, the_queue, ^(size_t idx) {
+dispatch_apply(iterations, DISPATCH_APPLY_AUTO, ^(size_t idx) {
 	printf("%zu\\n", idx);
 });
 .Ed
 .Pp
+Although any queue can be used, it is strongly recommended to use 
+.Vt DISPATCH_APPLY_AUTO
+as the 
+.Vt queue 
+argument to both
+.Fn dispatch_apply
+and
+.Fn dispatch_apply_f , 
+as shown in the example above, since this allows the system to automatically use worker threads
+that match the configuration of the current thread as closely as possible.
+No assumptions should be made about which global concurrent queue will be used.
+.Pp
 Like a "for (;;)" loop, the
 .Fn dispatch_apply
 function is synchronous.
-If asynchronous behavior is desired, please wrap the call to
+If asynchronous behavior is desired, wrap the call to
 .Fn dispatch_apply
 with a call to
 .Fn dispatch_async
@@ -49,7 +60,7 @@
 .Bd -literal
 #define STRIDE 3
 
-dispatch_apply(count / STRIDE, queue, ^(size_t idx) {
+dispatch_apply(count / STRIDE, DISPATCH_APPLY_AUTO, ^(size_t idx) {
 	size_t j = idx * STRIDE;
 	size_t j_stop = j + STRIDE;
 	do {
@@ -74,12 +85,21 @@
 and target queue for the duration of the asynchronous operation (as the calling
 function may immediately release its interest in these objects).
 .Sh FUNDAMENTALS
-Conceptually,
 .Fn dispatch_apply
-is a convenient wrapper around
+and
+.Fn dispatch_apply_f
+attempt to quickly create enough worker threads to efficiently iterate work in parallel.
+By contrast, a loop that passes work items individually to
 .Fn dispatch_async
-and a semaphore to wait for completion.
-In practice, the dispatch library optimizes this function.
+or
+.Fn dispatch_async_f
+will incur more overhead and does not express the desired parallel execution semantics to
+the system, so may not create an optimal number of worker threads for a parallel workload.
+For this reason, prefer to use 
+.Fn dispatch_apply
+or
+.Fn dispatch_apply_f
+when parallel execution is important.
 .Pp
 The
 .Fn dispatch_apply
@@ -99,5 +119,4 @@
 .Sh SEE ALSO
 .Xr dispatch 3 ,
 .Xr dispatch_async 3 ,
-.Xr dispatch_queue_create 3 ,
-.Xr dispatch_semaphore_create 3
+.Xr dispatch_queue_create 3
diff --git a/os/voucher_private.h b/os/voucher_private.h
index 6675a0e..aecbbc9 100644
--- a/os/voucher_private.h
+++ b/os/voucher_private.h
@@ -202,8 +202,23 @@
  * This flag is ignored if a specific voucher object is assigned with the
  * dispatch_block_create_with_voucher* functions, and is equivalent to passing
  * the NULL voucher to these functions.
+ *
+ * @const DISPATCH_BLOCK_IF_LAST_RESET_QUEUE_QOS_OVERRIDE
+ * Flag indicating that this dispatch block object should try to reset the
+ * recorded maximum QoS of all currently enqueued items on a serial dispatch
+ * queue at the base of a queue hierarchy.
+ *
+ * This is only works if the queue becomes empty by dequeuing the block in
+ * question, and then allows that block to enqueue more work on this hierarchy
+ * without perpetuating QoS overrides resulting from items previously executed
+ * on the hierarchy.
+ *
+ * A dispatch block object created with this flag set cannot be used with
+ * dispatch_block_wait() or dispatch_block_cancel().
  */
-#define DISPATCH_BLOCK_NO_VOUCHER (0x40)
+#define DISPATCH_BLOCK_NO_VOUCHER (0x40ul)
+
+#define DISPATCH_BLOCK_IF_LAST_RESET_QUEUE_QOS_OVERRIDE (0x80ul)
 
 /*!
  * @function dispatch_block_create_with_voucher
diff --git a/private/private.h b/private/private.h
index cc9d578..ed9f876 100644
--- a/private/private.h
+++ b/private/private.h
@@ -43,6 +43,9 @@
 #include <sys/cdefs.h>
 #endif
 #include <pthread.h>
+#if TARGET_OS_MAC
+#include <pthread/qos.h>
+#endif
 
 #ifndef __DISPATCH_BUILDING_DISPATCH__
 #include <dispatch/dispatch.h>
diff --git a/private/source_private.h b/private/source_private.h
index 019f648..ad22e6a 100644
--- a/private/source_private.h
+++ b/private/source_private.h
@@ -102,6 +102,13 @@
 API_AVAILABLE(macos(10.8), ios(6.0)) DISPATCH_LINUX_UNAVAILABLE()
 DISPATCH_SOURCE_TYPE_DECL(sock);
 
+/*!
+ * @const DISPATCH_SOURCE_TYPE_NW_CHANNEL
+ * @discussion A dispatch source that monitors events on a network channel.
+ */
+#define DISPATCH_SOURCE_TYPE_NW_CHANNEL (&_dispatch_source_type_nw_channel)
+API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0)) DISPATCH_LINUX_UNAVAILABLE()
+DISPATCH_SOURCE_TYPE_DECL(nw_channel);
 
 __END_DECLS
 
@@ -165,6 +172,15 @@
 	DISPATCH_SOCK_NOTIFY_ACK = 0x00004000,
 };
 
+/*!
+ * @enum dispatch_source_nw_channel_flags_t
+ *
+ * @constant DISPATCH_NW_CHANNEL_FLOW_ADV_UPDATE
+ * Received network channel flow advisory.
+ */
+enum {
+	DISPATCH_NW_CHANNEL_FLOW_ADV_UPDATE = 0x00000001,
+};
 
 /*!
  * @enum dispatch_source_vfs_flags_t
diff --git a/src/apply.c b/src/apply.c
index 9d64522..6f44cf9 100644
--- a/src/apply.c
+++ b/src/apply.c
@@ -253,12 +253,23 @@
 	if (unlikely(iterations == 0)) {
 		return;
 	}
-	int32_t thr_cnt = (int32_t)dispatch_hw_config(active_cpus);
 	dispatch_thread_context_t dtctxt =
 			_dispatch_thread_context_find(_dispatch_apply_key);
 	size_t nested = dtctxt ? dtctxt->dtc_apply_nesting : 0;
 	dispatch_queue_t old_dq = _dispatch_queue_get_current();
 
+	if (likely(dq == DISPATCH_APPLY_AUTO)) {
+		dq = _dispatch_apply_root_queue(old_dq);
+	}
+	dispatch_qos_t qos = _dispatch_priority_qos(dq->dq_priority);
+	if (unlikely(dq->do_targetq)) {
+		// if the queue passed-in is not a root queue, use the current QoS
+		// since the caller participates in the work anyway
+		qos = _dispatch_qos_from_pp(_dispatch_get_priority());
+	}
+	int32_t thr_cnt = (int32_t)_dispatch_qos_max_parallelism(qos,
+			DISPATCH_MAX_PARALLELISM_ACTIVE);
+
 	if (likely(!nested)) {
 		nested = iterations;
 	} else {
@@ -269,9 +280,6 @@
 	if (iterations < (size_t)thr_cnt) {
 		thr_cnt = (int32_t)iterations;
 	}
-	if (likely(dq == DISPATCH_APPLY_AUTO)) {
-		dq = _dispatch_apply_root_queue(old_dq);
-	}
 	struct dispatch_continuation_s dc = {
 		.dc_func = (void*)func,
 		.dc_ctxt = ctxt,
diff --git a/src/event/event_config.h b/src/event/event_config.h
index 2ac3c42..60f776f 100644
--- a/src/event/event_config.h
+++ b/src/event/event_config.h
@@ -50,6 +50,25 @@
 #define DISPATCH_MACHPORT_DEBUG 0
 #endif
 
+#ifndef DISPATCH_TIMER_ASSERTIONS
+#if DISPATCH_DEBUG
+#define DISPATCH_TIMER_ASSERTIONS 1
+#else
+#define DISPATCH_TIMER_ASSERTIONS 0
+#endif
+#endif
+
+#if DISPATCH_TIMER_ASSERTIONS
+#define DISPATCH_TIMER_ASSERT(a, op, b, text) ({ \
+		typeof(a) _a = (a); \
+		if (unlikely(!(_a op (b)))) { \
+			DISPATCH_CLIENT_CRASH(_a, "Timer: " text); \
+		} \
+	})
+#else
+#define DISPATCH_TIMER_ASSERT(a, op, b, text) ((void)0)
+#endif
+
 #ifndef EV_VANISHED
 #define EV_VANISHED 0x0200
 #endif
@@ -105,6 +124,11 @@
 #	ifndef VQ_DESIRED_DISK
 #	undef HAVE_DECL_VQ_DESIRED_DISK
 #	endif // VQ_DESIRED_DISK
+
+#	if !defined(EVFILT_NW_CHANNEL) && defined(__APPLE__)
+#	define EVFILT_NW_CHANNEL       (-16)
+#	define NOTE_FLOW_ADV_UPDATE    	0x1
+#	endif
 #else // DISPATCH_EVENT_BACKEND_KEVENT
 #	define EV_ADD					0x0001
 #	define EV_DELETE				0x0002
diff --git a/src/event/event_kevent.c b/src/event/event_kevent.c
index c15a397..8fe76d5 100644
--- a/src/event/event_kevent.c
+++ b/src/event/event_kevent.c
@@ -671,8 +671,9 @@
 		r = 0;
 	} else if (flags & KEVENT_FLAG_ERROR_EVENTS) {
 		for (i = 0, r = 0; i < n; i++) {
-			if ((ke_out[i].flags & EV_ERROR) && (r = (int)ke_out[i].data)) {
+			if ((ke_out[i].flags & EV_ERROR) && ke_out[i].data) {
 				_dispatch_kevent_drain(&ke_out[i]);
+				r = (int)ke_out[i].data;
 			}
 		}
 	} else {
@@ -1407,6 +1408,17 @@
 };
 #endif // EVFILT_SOCK
 
+#ifdef EVFILT_NW_CHANNEL
+const dispatch_source_type_s _dispatch_source_type_nw_channel = {
+	.dst_kind       = "nw_channel",
+	.dst_filter     = EVFILT_NW_CHANNEL,
+	.dst_flags      = DISPATCH_EV_DIRECT|EV_CLEAR|EV_VANISHED,
+	.dst_mask       = NOTE_FLOW_ADV_UPDATE,
+	.dst_size       = sizeof(struct dispatch_source_refs_s),
+	.dst_create     = _dispatch_unote_create_with_fd,
+	.dst_merge_evt  = _dispatch_source_merge_evt,
+};
+#endif // EVFILT_NW_CHANNEL
 
 #if DISPATCH_USE_MEMORYSTATUS
 
@@ -1609,9 +1621,9 @@
 	if (!tlr) {
 		DISPATCH_INTERNAL_CRASH(0, "message received without expected trailer");
 	}
-	if (tlr->msgh_audit.val[DISPATCH_MACH_AUDIT_TOKEN_PID] != 0) {
-		(void)dispatch_assume_zero(
-				tlr->msgh_audit.val[DISPATCH_MACH_AUDIT_TOKEN_PID]);
+	if (hdr->msgh_id <= MACH_NOTIFY_LAST
+			&& dispatch_assume_zero(tlr->msgh_audit.val[
+			DISPATCH_MACH_AUDIT_TOKEN_PID])) {
 		mach_msg_destroy(hdr);
 		return;
 	}
diff --git a/src/init.c b/src/init.c
index dea5e87..6672fac 100644
--- a/src/init.c
+++ b/src/init.c
@@ -897,6 +897,7 @@
 _dispatch_temporary_resource_shortage(void)
 {
 	sleep(1);
+	asm("");  // prevent tailcall
 }
 
 void *
diff --git a/src/inline_internal.h b/src/inline_internal.h
index 0ed9e51..4103c68 100644
--- a/src/inline_internal.h
+++ b/src/inline_internal.h
@@ -1740,7 +1740,7 @@
 _dispatch_root_queue_identity_assume(dispatch_queue_t assumed_rq)
 {
 	dispatch_priority_t old_dbp = _dispatch_get_basepri();
-	dispatch_assert(dx_type(assumed_rq) == DISPATCH_QUEUE_GLOBAL_ROOT_TYPE);
+	dispatch_assert(dx_hastypeflag(assumed_rq, QUEUE_ROOT));
 	_dispatch_reset_basepri(assumed_rq->dq_priority);
 	_dispatch_queue_set_current(assumed_rq);
 	return old_dbp;
@@ -2108,11 +2108,25 @@
 
 	if ((!_dispatch_priority_qos(pri) || (pri & inherited_flag)) &&
 			(tpri & rootqueue_flag)) {
-		if (tpri & defaultqueue_flag) {
-			dq->dq_priority = 0;
+		if (_dispatch_priority_override_qos(pri) == DISPATCH_QOS_SATURATED) {
+			pri &= DISPATCH_PRIORITY_OVERRIDE_MASK;
 		} else {
-			dq->dq_priority = (tpri & ~rootqueue_flag) | inherited_flag;
+			pri = 0;
 		}
+		if (tpri & defaultqueue_flag) {
+			// <rdar://problem/32921639> base queues need to know they target
+			// the default root queue so that _dispatch_queue_override_qos()
+			// in _dispatch_queue_class_wakeup() can fallback to QOS_DEFAULT
+			// if no other priority was provided.
+			pri |= defaultqueue_flag;
+		} else {
+			pri |= (tpri & ~rootqueue_flag) | inherited_flag;
+		}
+		dq->dq_priority = pri;
+	} else if (pri & defaultqueue_flag) {
+		// the DEFAULTQUEUE flag is only set on queues due to the code above,
+		// and must never be kept if we don't target a global root queue.
+		dq->dq_priority = (pri & ~defaultqueue_flag);
 	}
 #else
 	(void)dq; (void)tq;
@@ -2272,7 +2286,9 @@
 _dispatch_queue_override_qos(dispatch_queue_class_t dqu, dispatch_qos_t qos)
 {
 	if (dqu._oq->oq_priority & DISPATCH_PRIORITY_FLAG_DEFAULTQUEUE) {
-		return qos;
+		// queues targeting the default root queue use any asynchronous
+		// workitem priority available and fallback to QOS_DEFAULT otherwise.
+		return qos ? qos : DISPATCH_QOS_DEFAULT;
 	}
 	// for asynchronous workitems, queue priority is the floor for overrides
 	return MAX(qos, _dispatch_priority_qos(dqu._oq->oq_priority));
@@ -2338,14 +2354,20 @@
 }
 
 DISPATCH_ALWAYS_INLINE DISPATCH_WARN_RESULT
-static inline bool
-_dispatch_block_invoke_should_set_priority(dispatch_block_flags_t flags)
+static inline pthread_priority_t
+_dispatch_block_invoke_should_set_priority(dispatch_block_flags_t flags,
+        pthread_priority_t new_pri)
 {
-	if (flags & DISPATCH_BLOCK_HAS_PRIORITY) {
-		return (flags & DISPATCH_BLOCK_ENFORCE_QOS_CLASS) ||
-				!(flags & DISPATCH_BLOCK_INHERIT_QOS_CLASS);
+	pthread_priority_t old_pri, p = 0;  // 0 means do not change priority.
+	if ((flags & DISPATCH_BLOCK_HAS_PRIORITY)
+			&& ((flags & DISPATCH_BLOCK_ENFORCE_QOS_CLASS) ||
+			!(flags & DISPATCH_BLOCK_INHERIT_QOS_CLASS))) {
+		old_pri = _dispatch_get_priority();
+		new_pri &= ~_PTHREAD_PRIORITY_FLAGS_MASK;
+		p = old_pri & ~_PTHREAD_PRIORITY_FLAGS_MASK;
+		if (!p || p >= new_pri) p = 0;
 	}
-	return false;
+	return p;
 }
 
 DISPATCH_ALWAYS_INLINE
diff --git a/src/internal.h b/src/internal.h
index 3f481aa..84e33e3 100644
--- a/src/internal.h
+++ b/src/internal.h
@@ -459,9 +459,9 @@
 		if (__builtin_constant_p(e)) { \
 			dispatch_static_assert(e); \
 		} else { \
-			typeof(e) _e = fastpath(e); /* always eval 'e' */ \
-			if (!_e) { \
-				__assert_rtn(__func__, __FILE__, __LINE__, #e); \
+			typeof(e) _e = (e); /* always eval 'e' */ \
+			if (unlikely(DISPATCH_DEBUG && !_e)) { \
+				_dispatch_abort(__LINE__, (long)_e); \
 			} \
 		} \
 	} while (0)
@@ -483,9 +483,9 @@
 		if (__builtin_constant_p(e)) { \
 			dispatch_static_assert(e); \
 		} else { \
-			typeof(e) _e = slowpath(e); /* always eval 'e' */ \
-			if (_e) { \
-				__assert_rtn(__func__, __FILE__, __LINE__, #e); \
+			typeof(e) _e = (e); /* always eval 'e' */ \
+			if (unlikely(DISPATCH_DEBUG && _e)) { \
+				_dispatch_abort(__LINE__, (long)_e); \
 			} \
 		} \
 	} while (0)
@@ -506,8 +506,8 @@
  */
 #if __GNUC__
 #define dispatch_assume(e) ({ \
-		typeof(e) _e = fastpath(e); /* always eval 'e' */ \
-		if (!_e) { \
+		typeof(e) _e = (e); /* always eval 'e' */ \
+		if (unlikely(!_e)) { \
 			if (__builtin_constant_p(e)) { \
 				dispatch_static_assert(e); \
 			} \
@@ -531,8 +531,8 @@
  */
 #if __GNUC__
 #define dispatch_assume_zero(e) ({ \
-		typeof(e) _e = slowpath(e); /* always eval 'e' */ \
-		if (_e) { \
+		typeof(e) _e = (e); /* always eval 'e' */ \
+		if (unlikely(_e)) { \
 			if (__builtin_constant_p(e)) { \
 				dispatch_static_assert(e); \
 			} \
@@ -558,8 +558,8 @@
 		if (__builtin_constant_p(e)) { \
 			dispatch_static_assert(e); \
 		} else { \
-			typeof(e) _e = fastpath(e); /* always eval 'e' */ \
-			if (DISPATCH_DEBUG && !_e) { \
+			typeof(e) _e = (e); /* always eval 'e' */ \
+			if (unlikely(DISPATCH_DEBUG && !_e)) { \
 				_dispatch_log("%s() 0x%lx: " msg, __func__, (long)_e, ##args); \
 				abort(); \
 			} \
@@ -567,8 +567,8 @@
 	} while (0)
 #else
 #define dispatch_debug_assert(e, msg, args...) do { \
-	long _e = (long)fastpath(e); /* always eval 'e' */ \
-	if (DISPATCH_DEBUG && !_e) { \
+	typeof(e) _e = (e); /* always eval 'e' */ \
+	if (unlikely(DISPATCH_DEBUG && !_e)) { \
 		_dispatch_log("%s() 0x%lx: " msg, __FUNCTION__, _e, ##args); \
 		abort(); \
 	} \
@@ -626,7 +626,7 @@
 static inline void
 _dispatch_fork_becomes_unsafe(void)
 {
-	if (!fastpath(_dispatch_is_multithreaded_inline())) {
+	if (unlikely(!_dispatch_is_multithreaded_inline())) {
 		_dispatch_fork_becomes_unsafe_slow();
 		DISPATCH_COMPILER_CAN_ASSUME(_dispatch_is_multithreaded_inline());
 	}
@@ -732,6 +732,20 @@
 #endif // HAVE_SYS_GUARDED_H
 
 
+#if DISPATCH_USE_DTRACE || DISPATCH_USE_DTRACE_INTROSPECTION
+typedef struct dispatch_trace_timer_params_s {
+	int64_t deadline, interval, leeway;
+} *dispatch_trace_timer_params_t;
+
+#ifdef __cplusplus
+extern "C++" {
+#endif
+#include "provider.h"
+#ifdef __cplusplus
+}
+#endif
+#endif // DISPATCH_USE_DTRACE || DISPATCH_USE_DTRACE_INTROSPECTION
+
 #if __has_include(<sys/kdebug.h>)
 #include <sys/kdebug.h>
 #ifndef DBG_DISPATCH
@@ -754,7 +768,7 @@
 #define ARIADNE_ENTER_DISPATCH_MAIN_CODE 0
 #endif
 #if !defined(DISPATCH_USE_VOUCHER_KDEBUG_TRACE) && \
-		(DISPATCH_INTROSPECTION || DISPATCH_PROFILE)
+		(DISPATCH_INTROSPECTION || DISPATCH_PROFILE || DISPATCH_DEBUG)
 #define DISPATCH_USE_VOUCHER_KDEBUG_TRACE 1
 #endif
 
diff --git a/src/mach.c b/src/mach.c
index 0f9e9a8..699492d 100644
--- a/src/mach.c
+++ b/src/mach.c
@@ -59,7 +59,7 @@
 static void _dispatch_mach_send_push(dispatch_mach_t dm, dispatch_object_t dou,
 		dispatch_qos_t qos);
 static void _dispatch_mach_cancel(dispatch_mach_t dm);
-static void _dispatch_mach_send_barrier_drain_push(dispatch_mach_t dm,
+static void _dispatch_mach_push_send_barrier_drain(dispatch_mach_t dm,
 		dispatch_qos_t qos);
 static void _dispatch_mach_handle_or_push_received_msg(dispatch_mach_t dm,
 		dispatch_mach_msg_t dmsg);
@@ -73,6 +73,9 @@
 static void _dispatch_mach_notification_kevent_register(dispatch_mach_t dm,
 		mach_port_t send);
 
+// For tests only.
+DISPATCH_EXPORT void _dispatch_mach_hooks_install_default(void);
+
 dispatch_source_t
 _dispatch_source_create_mach_msg_direct_recv(mach_port_t recvp,
 		const struct dispatch_continuation_s *dc)
@@ -153,6 +156,13 @@
 	}
 }
 
+void
+_dispatch_mach_hooks_install_default(void)
+{
+	os_atomic_store(&_dispatch_mach_xpc_hooks,
+			&_dispatch_mach_xpc_hooks_default, relaxed);
+}
+
 #pragma mark -
 #pragma mark dispatch_mach_t
 
@@ -431,6 +441,9 @@
 	if (!drq) {
 		pri = dm->dq_priority;
 		wlh = dm->dm_recv_refs->du_wlh;
+	} else if (dx_type(drq) == DISPATCH_QUEUE_NETWORK_EVENT_TYPE) {
+		pri = DISPATCH_PRIORITY_FLAG_MANAGER;
+		wlh = (dispatch_wlh_t)drq;
 	} else if (dx_hastypeflag(drq, QUEUE_ROOT)) {
 		pri = drq->dq_priority;
 		wlh = DISPATCH_WLH_ANON;
@@ -1386,7 +1399,7 @@
 
 	if (new_state & DISPATCH_MACH_STATE_PENDING_BARRIER) {
 		qos = _dmsr_state_max_qos(new_state);
-		_dispatch_mach_send_barrier_drain_push(dm, qos);
+		_dispatch_mach_push_send_barrier_drain(dm, qos);
 	} else {
 		if (needs_mgr || dm->dm_needs_mgr) {
 			qos = _dmsr_state_max_qos(new_state);
@@ -1472,7 +1485,7 @@
 
 DISPATCH_NOINLINE
 static void
-_dispatch_mach_send_barrier_drain_push(dispatch_mach_t dm, dispatch_qos_t qos)
+_dispatch_mach_push_send_barrier_drain(dispatch_mach_t dm, dispatch_qos_t qos)
 {
 	dispatch_continuation_t dc = _dispatch_continuation_alloc();
 
@@ -1534,7 +1547,7 @@
 
 	dispatch_wakeup_flags_t wflags = 0;
 	if (state_flags & DISPATCH_MACH_STATE_PENDING_BARRIER) {
-		_dispatch_mach_send_barrier_drain_push(dm, qos);
+		_dispatch_mach_push_send_barrier_drain(dm, qos);
 	} else if (wakeup || dmsr->dmsr_disconnect_cnt ||
 			(dm->dq_atomic_flags & DSF_CANCELED)) {
 		wflags = DISPATCH_WAKEUP_MAKE_DIRTY | DISPATCH_WAKEUP_CONSUME_2;
@@ -1739,6 +1752,7 @@
 	}
 	*msg_pp = _dispatch_priority_compute_propagated(0, flags);
 	// TODO: remove QoS contribution of sync IPC messages to send queue
+	// rdar://31848737
 	return _dispatch_qos_from_pp(*msg_pp);
 }
 
@@ -2216,7 +2230,7 @@
 		dispatch_function_t func)
 {
 	dispatch_continuation_t dc = _dispatch_continuation_alloc();
-	uintptr_t dc_flags = DISPATCH_OBJ_CONSUME_BIT;
+	uintptr_t dc_flags = DISPATCH_OBJ_CONSUME_BIT | DISPATCH_OBJ_MACH_BARRIER;
 	dispatch_qos_t qos;
 
 	_dispatch_continuation_init_f(dc, dm, context, func, 0, 0, dc_flags);
@@ -2231,7 +2245,7 @@
 dispatch_mach_send_barrier(dispatch_mach_t dm, dispatch_block_t barrier)
 {
 	dispatch_continuation_t dc = _dispatch_continuation_alloc();
-	uintptr_t dc_flags = DISPATCH_OBJ_CONSUME_BIT;
+	uintptr_t dc_flags = DISPATCH_OBJ_CONSUME_BIT | DISPATCH_OBJ_MACH_BARRIER;
 	dispatch_qos_t qos;
 
 	_dispatch_continuation_init(dc, dm, barrier, 0, 0, dc_flags);
@@ -2247,7 +2261,7 @@
 		dispatch_function_t func)
 {
 	dispatch_continuation_t dc = _dispatch_continuation_alloc();
-	uintptr_t dc_flags = DISPATCH_OBJ_CONSUME_BIT;
+	uintptr_t dc_flags = DISPATCH_OBJ_CONSUME_BIT | DISPATCH_OBJ_MACH_BARRIER;
 
 	_dispatch_continuation_init_f(dc, dm, context, func, 0, 0, dc_flags);
 	_dispatch_mach_barrier_set_vtable(dc, dm, DC_VTABLE(MACH_RECV_BARRIER));
@@ -2259,7 +2273,7 @@
 dispatch_mach_receive_barrier(dispatch_mach_t dm, dispatch_block_t barrier)
 {
 	dispatch_continuation_t dc = _dispatch_continuation_alloc();
-	uintptr_t dc_flags = DISPATCH_OBJ_CONSUME_BIT;
+	uintptr_t dc_flags = DISPATCH_OBJ_CONSUME_BIT | DISPATCH_OBJ_MACH_BARRIER;
 
 	_dispatch_continuation_init(dc, dm, barrier, 0, 0, dc_flags);
 	_dispatch_mach_barrier_set_vtable(dc, dm, DC_VTABLE(MACH_RECV_BARRIER));
diff --git a/src/object.c b/src/object.c
index 43f580b..86d1005 100644
--- a/src/object.c
+++ b/src/object.c
@@ -236,8 +236,9 @@
 dispatch_get_context(dispatch_object_t dou)
 {
 	DISPATCH_OBJECT_TFB(_dispatch_objc_get_context, dou);
-	if (slowpath(dou._do->do_ref_cnt == DISPATCH_OBJECT_GLOBAL_REFCNT) ||
-			slowpath(dx_hastypeflag(dou._do, QUEUE_ROOT))) {
+	if (unlikely(dou._do->do_ref_cnt == DISPATCH_OBJECT_GLOBAL_REFCNT ||
+			dx_hastypeflag(dou._do, QUEUE_ROOT) ||
+			dx_hastypeflag(dou._do, QUEUE_BASE))) {
 		return NULL;
 	}
 	return dou._do->do_ctxt;
@@ -247,8 +248,9 @@
 dispatch_set_context(dispatch_object_t dou, void *context)
 {
 	DISPATCH_OBJECT_TFB(_dispatch_objc_set_context, dou, context);
-	if (slowpath(dou._do->do_ref_cnt == DISPATCH_OBJECT_GLOBAL_REFCNT) ||
-			slowpath(dx_hastypeflag(dou._do, QUEUE_ROOT))) {
+	if (unlikely(dou._do->do_ref_cnt == DISPATCH_OBJECT_GLOBAL_REFCNT ||
+			dx_hastypeflag(dou._do, QUEUE_ROOT) ||
+			dx_hastypeflag(dou._do, QUEUE_BASE))) {
 		return;
 	}
 	dou._do->do_ctxt = context;
@@ -258,8 +260,9 @@
 dispatch_set_finalizer_f(dispatch_object_t dou, dispatch_function_t finalizer)
 {
 	DISPATCH_OBJECT_TFB(_dispatch_objc_set_finalizer_f, dou, finalizer);
-	if (slowpath(dou._do->do_ref_cnt == DISPATCH_OBJECT_GLOBAL_REFCNT) ||
-			slowpath(dx_hastypeflag(dou._do, QUEUE_ROOT))) {
+	if (unlikely(dou._do->do_ref_cnt == DISPATCH_OBJECT_GLOBAL_REFCNT ||
+			dx_hastypeflag(dou._do, QUEUE_ROOT) ||
+			dx_hastypeflag(dou._do, QUEUE_BASE))) {
 		return;
 	}
 	dou._do->do_finalizer = finalizer;
@@ -271,8 +274,9 @@
 	DISPATCH_OBJECT_TFB(_dispatch_objc_set_target_queue, dou, tq);
 	if (dx_vtable(dou._do)->do_set_targetq) {
 		dx_vtable(dou._do)->do_set_targetq(dou._do, tq);
-	} else if (dou._do->do_ref_cnt != DISPATCH_OBJECT_GLOBAL_REFCNT &&
-			!slowpath(dx_hastypeflag(dou._do, QUEUE_ROOT))) {
+	} else if (likely(dou._do->do_ref_cnt != DISPATCH_OBJECT_GLOBAL_REFCNT &&
+			!dx_hastypeflag(dou._do, QUEUE_ROOT) &&
+			!dx_hastypeflag(dou._do, QUEUE_BASE))) {
 		if (slowpath(!tq)) {
 			tq = _dispatch_get_root_queue(DISPATCH_QOS_DEFAULT, false);
 		}
diff --git a/src/object.m b/src/object.m
index cc97cc3..efee829 100644
--- a/src/object.m
+++ b/src/object.m
@@ -387,6 +387,14 @@
 DISPATCH_UNAVAILABLE_INIT()
 DISPATCH_OBJC_LOAD()
 
+-(id)retain {
+	return (id)_voucher_retain_inline((struct voucher_s *)self);
+}
+
+-(oneway void)release {
+	return _voucher_release_inline((struct voucher_s *)self);
+}
+
 - (void)_xref_dispose {
 	return _voucher_xref_dispose(self); // calls _os_object_release_internal()
 }
diff --git a/src/object_internal.h b/src/object_internal.h
index 0060f27..4504f65 100644
--- a/src/object_internal.h
+++ b/src/object_internal.h
@@ -355,6 +355,8 @@
 	_DISPATCH_DISK_TYPE				=    0x70000, // meta-type for io disks
 
 	_DISPATCH_QUEUE_ROOT_TYPEFLAG	=     0x0100, // bit set for any root queues
+	_DISPATCH_QUEUE_BASE_TYPEFLAG	=     0x0200, // base of a hierarchy
+	                                              // targets a root queue
 
 #define DISPATCH_CONTINUATION_TYPE(name)  \
 		(_DISPATCH_CONTINUATION_TYPE | DC_##name##_TYPE)
@@ -372,10 +374,11 @@
 	DISPATCH_QUEUE_GLOBAL_ROOT_TYPE		= 4 | _DISPATCH_QUEUE_TYPE |
 			_DISPATCH_QUEUE_ROOT_TYPEFLAG,
 	DISPATCH_QUEUE_NETWORK_EVENT_TYPE	= 5 | _DISPATCH_QUEUE_TYPE |
-			_DISPATCH_QUEUE_ROOT_TYPEFLAG,
+			_DISPATCH_QUEUE_BASE_TYPEFLAG,
 	DISPATCH_QUEUE_RUNLOOP_TYPE			= 6 | _DISPATCH_QUEUE_TYPE |
-			_DISPATCH_QUEUE_ROOT_TYPEFLAG,
-	DISPATCH_QUEUE_MGR_TYPE				= 7 | _DISPATCH_QUEUE_TYPE,
+			_DISPATCH_QUEUE_BASE_TYPEFLAG,
+	DISPATCH_QUEUE_MGR_TYPE				= 7 | _DISPATCH_QUEUE_TYPE |
+			_DISPATCH_QUEUE_BASE_TYPEFLAG,
 	DISPATCH_QUEUE_SPECIFIC_TYPE		= 8 | _DISPATCH_QUEUE_TYPE,
 
 	DISPATCH_SEMAPHORE_TYPE				= 1 | _DISPATCH_SEMAPHORE_TYPE,
diff --git a/src/provider.d b/src/provider.d
index ede3c56..13bcf7a 100644
--- a/src/provider.d
+++ b/src/provider.d
@@ -101,3 +101,41 @@
 #pragma D attributes Private/Private/Common provider dispatch function
 #pragma D attributes Evolving/Evolving/Common provider dispatch name
 #pragma D attributes Evolving/Evolving/Common provider dispatch args
+
+typedef struct voucher_s *voucher_t;
+
+/*
+ * Probes for vouchers
+ */
+provider voucher {
+
+    /*
+     * Voucher lifetime:
+     *
+     * voucher$target:::create     A new voucher is being created
+     * voucher$target:::dispose    A voucher is being freed
+     * voucher$target:::retain     A voucher is being retained
+     * voucher$target:::release    A voucher is being released
+     */
+    probe create(voucher_t voucher, mach_port_t kv, uint64_t activity_id);
+    probe dispose(voucher_t voucher);
+    probe retain(voucher_t voucher, int resulting_refcnt);
+    probe release(voucher_t voucher, int resulting_refcnt);
+
+    /*
+     * Thread adoption
+     *
+     * voucher$target:::adopt      A voucher is being adopted by the current thread
+     * voucher$target:::orphan     A voucher is being orphanned by the current thread
+     */
+    probe adopt(voucher_t voucher);
+    probe orphan(voucher_t voucher);
+
+};
+
+#pragma D attributes Evolving/Evolving/Common provider voucher provider
+#pragma D attributes Private/Private/Common provider voucher module
+#pragma D attributes Private/Private/Common provider voucher function
+#pragma D attributes Evolving/Evolving/Common provider voucher name
+#pragma D attributes Evolving/Evolving/Common provider voucher args
+
diff --git a/src/queue.c b/src/queue.c
index fe26ab7..33e5009 100644
--- a/src/queue.c
+++ b/src/queue.c
@@ -26,20 +26,32 @@
 #if HAVE_PTHREAD_WORKQUEUES || DISPATCH_USE_INTERNAL_WORKQUEUE
 #define DISPATCH_USE_WORKQUEUES 1
 #endif
-#if (!HAVE_PTHREAD_WORKQUEUES || DISPATCH_DEBUG) && !defined(DISPATCH_ENABLE_THREAD_POOL)
+#if (!HAVE_PTHREAD_WORKQUEUES || DISPATCH_DEBUG) && \
+		!defined(DISPATCH_ENABLE_THREAD_POOL)
 #define DISPATCH_ENABLE_THREAD_POOL 1
 #endif
 #if DISPATCH_ENABLE_PTHREAD_ROOT_QUEUES || DISPATCH_ENABLE_THREAD_POOL
 #define DISPATCH_USE_PTHREAD_POOL 1
 #endif
-#if HAVE_PTHREAD_WORKQUEUES && (!HAVE_PTHREAD_WORKQUEUE_QOS || DISPATCH_DEBUG) && \
-		!HAVE_PTHREAD_WORKQUEUE_SETDISPATCH_NP && \
+#if HAVE_PTHREAD_WORKQUEUES && (!HAVE_PTHREAD_WORKQUEUE_QOS || \
+		DISPATCH_DEBUG) && !HAVE_PTHREAD_WORKQUEUE_SETDISPATCH_NP && \
 		!defined(DISPATCH_USE_LEGACY_WORKQUEUE_FALLBACK)
 #define DISPATCH_USE_LEGACY_WORKQUEUE_FALLBACK 1
 #endif
-#if HAVE_PTHREAD_WORKQUEUE_SETDISPATCH_NP || DISPATCH_USE_LEGACY_WORKQUEUE_FALLBACK
+#if HAVE_PTHREAD_WORKQUEUE_SETDISPATCH_NP && (DISPATCH_DEBUG || \
+		(!DISPATCH_USE_KEVENT_WORKQUEUE && !HAVE_PTHREAD_WORKQUEUE_QOS)) && \
+		!defined(DISPATCH_USE_PTHREAD_WORKQUEUE_SETDISPATCH_NP)
+#define DISPATCH_USE_PTHREAD_WORKQUEUE_SETDISPATCH_NP 1
+#endif
+#if DISPATCH_USE_PTHREAD_WORKQUEUE_SETDISPATCH_NP || \
+		DISPATCH_USE_LEGACY_WORKQUEUE_FALLBACK || \
+		DISPATCH_USE_INTERNAL_WORKQUEUE
+#if !DISPATCH_USE_INTERNAL_WORKQUEUE
 #define DISPATCH_USE_WORKQ_PRIORITY 1
 #endif
+#define DISPATCH_USE_WORKQ_OPTIONS 1
+#endif
+
 #if DISPATCH_USE_WORKQUEUES && DISPATCH_USE_PTHREAD_POOL && \
 		!DISPATCH_USE_LEGACY_WORKQUEUE_FALLBACK
 #define pthread_workqueue_t void*
@@ -69,7 +81,7 @@
 #if HAVE_PTHREAD_WORKQUEUE_QOS
 static void _dispatch_worker_thread3(pthread_priority_t priority);
 #endif
-#if HAVE_PTHREAD_WORKQUEUE_SETDISPATCH_NP
+#if DISPATCH_USE_PTHREAD_WORKQUEUE_SETDISPATCH_NP
 static void _dispatch_worker_thread2(int priority, int options, void *context);
 #endif
 #endif
@@ -164,7 +176,9 @@
 #if DISPATCH_USE_WORKQ_PRIORITY
 			int dgq_wq_priority;
 #endif
+#if DISPATCH_USE_WORKQ_OPTIONS
 			int dgq_wq_options;
+#endif
 #if DISPATCH_USE_LEGACY_WORKQUEUE_FALLBACK || DISPATCH_USE_PTHREAD_POOL
 			pthread_workqueue_t dgq_kworkqueue;
 #endif
@@ -195,8 +209,10 @@
 #if DISPATCH_USE_WORKQ_PRIORITY
 		.dgq_wq_priority = WORKQ_BG_PRIOQUEUE,
 #endif
+#if DISPATCH_USE_WORKQ_OPTIONS
 		.dgq_wq_options = 0,
 #endif
+#endif
 #if DISPATCH_ENABLE_THREAD_POOL
 		.dgq_ctxt = &_dispatch_pthread_root_queue_contexts[
 				DISPATCH_ROOT_QUEUE_IDX_MAINTENANCE_QOS],
@@ -208,8 +224,10 @@
 #if DISPATCH_USE_WORKQ_PRIORITY
 		.dgq_wq_priority = WORKQ_BG_PRIOQUEUE,
 #endif
+#if DISPATCH_USE_WORKQ_OPTIONS
 		.dgq_wq_options = WORKQ_ADDTHREADS_OPTION_OVERCOMMIT,
 #endif
+#endif
 #if DISPATCH_ENABLE_THREAD_POOL
 		.dgq_ctxt = &_dispatch_pthread_root_queue_contexts[
 				DISPATCH_ROOT_QUEUE_IDX_MAINTENANCE_QOS_OVERCOMMIT],
@@ -221,8 +239,10 @@
 #if DISPATCH_USE_WORKQ_PRIORITY
 		.dgq_wq_priority = WORKQ_BG_PRIOQUEUE_CONDITIONAL,
 #endif
+#if DISPATCH_USE_WORKQ_OPTIONS
 		.dgq_wq_options = 0,
 #endif
+#endif
 #if DISPATCH_ENABLE_THREAD_POOL
 		.dgq_ctxt = &_dispatch_pthread_root_queue_contexts[
 				DISPATCH_ROOT_QUEUE_IDX_BACKGROUND_QOS],
@@ -234,8 +254,10 @@
 #if DISPATCH_USE_WORKQ_PRIORITY
 		.dgq_wq_priority = WORKQ_BG_PRIOQUEUE_CONDITIONAL,
 #endif
+#if DISPATCH_USE_WORKQ_OPTIONS
 		.dgq_wq_options = WORKQ_ADDTHREADS_OPTION_OVERCOMMIT,
 #endif
+#endif
 #if DISPATCH_ENABLE_THREAD_POOL
 		.dgq_ctxt = &_dispatch_pthread_root_queue_contexts[
 				DISPATCH_ROOT_QUEUE_IDX_BACKGROUND_QOS_OVERCOMMIT],
@@ -247,8 +269,10 @@
 #if DISPATCH_USE_WORKQ_PRIORITY
 		.dgq_wq_priority = WORKQ_LOW_PRIOQUEUE,
 #endif
+#if DISPATCH_USE_WORKQ_OPTIONS
 		.dgq_wq_options = 0,
 #endif
+#endif
 #if DISPATCH_ENABLE_THREAD_POOL
 		.dgq_ctxt = &_dispatch_pthread_root_queue_contexts[
 				DISPATCH_ROOT_QUEUE_IDX_UTILITY_QOS],
@@ -260,8 +284,10 @@
 #if DISPATCH_USE_WORKQ_PRIORITY
 		.dgq_wq_priority = WORKQ_LOW_PRIOQUEUE,
 #endif
+#if DISPATCH_USE_WORKQ_OPTIONS
 		.dgq_wq_options = WORKQ_ADDTHREADS_OPTION_OVERCOMMIT,
 #endif
+#endif
 #if DISPATCH_ENABLE_THREAD_POOL
 		.dgq_ctxt = &_dispatch_pthread_root_queue_contexts[
 				DISPATCH_ROOT_QUEUE_IDX_UTILITY_QOS_OVERCOMMIT],
@@ -273,8 +299,10 @@
 #if DISPATCH_USE_WORKQ_PRIORITY
 		.dgq_wq_priority = WORKQ_DEFAULT_PRIOQUEUE,
 #endif
+#if DISPATCH_USE_WORKQ_OPTIONS
 		.dgq_wq_options = 0,
 #endif
+#endif
 #if DISPATCH_ENABLE_THREAD_POOL
 		.dgq_ctxt = &_dispatch_pthread_root_queue_contexts[
 				DISPATCH_ROOT_QUEUE_IDX_DEFAULT_QOS],
@@ -286,8 +314,10 @@
 #if DISPATCH_USE_WORKQ_PRIORITY
 		.dgq_wq_priority = WORKQ_DEFAULT_PRIOQUEUE,
 #endif
+#if DISPATCH_USE_WORKQ_OPTIONS
 		.dgq_wq_options = WORKQ_ADDTHREADS_OPTION_OVERCOMMIT,
 #endif
+#endif
 #if DISPATCH_ENABLE_THREAD_POOL
 		.dgq_ctxt = &_dispatch_pthread_root_queue_contexts[
 				DISPATCH_ROOT_QUEUE_IDX_DEFAULT_QOS_OVERCOMMIT],
@@ -299,8 +329,10 @@
 #if DISPATCH_USE_WORKQ_PRIORITY
 		.dgq_wq_priority = WORKQ_HIGH_PRIOQUEUE,
 #endif
+#if DISPATCH_USE_WORKQ_OPTIONS
 		.dgq_wq_options = 0,
 #endif
+#endif
 #if DISPATCH_ENABLE_THREAD_POOL
 		.dgq_ctxt = &_dispatch_pthread_root_queue_contexts[
 				DISPATCH_ROOT_QUEUE_IDX_USER_INITIATED_QOS],
@@ -312,8 +344,10 @@
 #if DISPATCH_USE_WORKQ_PRIORITY
 		.dgq_wq_priority = WORKQ_HIGH_PRIOQUEUE,
 #endif
+#if DISPATCH_USE_WORKQ_OPTIONS
 		.dgq_wq_options = WORKQ_ADDTHREADS_OPTION_OVERCOMMIT,
 #endif
+#endif
 #if DISPATCH_ENABLE_THREAD_POOL
 		.dgq_ctxt = &_dispatch_pthread_root_queue_contexts[
 				DISPATCH_ROOT_QUEUE_IDX_USER_INITIATED_QOS_OVERCOMMIT],
@@ -325,8 +359,10 @@
 #if DISPATCH_USE_WORKQ_PRIORITY
 		.dgq_wq_priority = WORKQ_HIGH_PRIOQUEUE_CONDITIONAL,
 #endif
+#if DISPATCH_USE_WORKQ_OPTIONS
 		.dgq_wq_options = 0,
 #endif
+#endif
 #if DISPATCH_ENABLE_THREAD_POOL
 		.dgq_ctxt = &_dispatch_pthread_root_queue_contexts[
 				DISPATCH_ROOT_QUEUE_IDX_USER_INTERACTIVE_QOS],
@@ -338,8 +374,10 @@
 #if DISPATCH_USE_WORKQ_PRIORITY
 		.dgq_wq_priority = WORKQ_HIGH_PRIOQUEUE_CONDITIONAL,
 #endif
+#if DISPATCH_USE_WORKQ_OPTIONS
 		.dgq_wq_options = WORKQ_ADDTHREADS_OPTION_OVERCOMMIT,
 #endif
+#endif
 #if DISPATCH_ENABLE_THREAD_POOL
 		.dgq_ctxt = &_dispatch_pthread_root_queue_contexts[
 				DISPATCH_ROOT_QUEUE_IDX_USER_INTERACTIVE_QOS_OVERCOMMIT],
@@ -419,7 +457,7 @@
 	),
 };
 
-#if HAVE_PTHREAD_WORKQUEUE_SETDISPATCH_NP
+#if DISPATCH_USE_PTHREAD_WORKQUEUE_SETDISPATCH_NP
 static const dispatch_queue_t _dispatch_wq2root_queues[][2] = {
 	[WORKQ_BG_PRIOQUEUE][0] = &_dispatch_root_queues[
 			DISPATCH_ROOT_QUEUE_IDX_BACKGROUND_QOS],
@@ -442,7 +480,7 @@
 			&_dispatch_root_queues[
 			DISPATCH_ROOT_QUEUE_IDX_USER_INITIATED_QOS_OVERCOMMIT],
 };
-#endif // HAVE_PTHREAD_WORKQUEUE_SETDISPATCH_NP
+#endif // DISPATCH_USE_PTHREAD_WORKQUEUE_SETDISPATCH_NP
 
 #if DISPATCH_USE_MGR_THREAD && DISPATCH_ENABLE_PTHREAD_ROOT_QUEUES
 static struct dispatch_queue_s _dispatch_mgr_root_queue;
@@ -650,7 +688,7 @@
 		}
 	}
 #endif // DISPATCH_USE_KEVENT_WORKQUEUE || HAVE_PTHREAD_WORKQUEUE_QOS
-#if HAVE_PTHREAD_WORKQUEUE_SETDISPATCH_NP
+#if DISPATCH_USE_PTHREAD_WORKQUEUE_SETDISPATCH_NP
 	if (!result && !disable_wq) {
 		pthread_workqueue_setdispatchoffset_np(
 				offsetof(struct dispatch_queue_s, dq_serialnum));
@@ -660,7 +698,7 @@
 #endif
 		result = !r;
 	}
-#endif // HAVE_PTHREAD_WORKQUEUE_SETDISPATCH_NP
+#endif // DISPATCH_USE_PTHREAD_WORKQUEUE_SETDISPATCH_NP
 #if DISPATCH_USE_LEGACY_WORKQUEUE_FALLBACK || DISPATCH_USE_PTHREAD_POOL
 	if (!result) {
 #if DISPATCH_USE_LEGACY_WORKQUEUE_FALLBACK
@@ -788,7 +826,7 @@
 			DISPATCH_ROOT_QUEUE_COUNT);
 	dispatch_assert(countof(_dispatch_root_queue_contexts) ==
 			DISPATCH_ROOT_QUEUE_COUNT);
-#if HAVE_PTHREAD_WORKQUEUE_SETDISPATCH_NP
+#if DISPATCH_USE_PTHREAD_WORKQUEUE_SETDISPATCH_NP
 	dispatch_assert(sizeof(_dispatch_wq2root_queues) /
 			sizeof(_dispatch_wq2root_queues[0][0]) ==
 			WORKQ_NUM_PRIOQUEUE * 2);
@@ -1420,7 +1458,7 @@
 	uint64_t dq_state = os_atomic_load2o(dq, dq_state, relaxed);
 	uint64_t initial_state = DISPATCH_QUEUE_STATE_INIT_VALUE(dq->dq_width);
 
-	if (dx_type(dq) == DISPATCH_QUEUE_GLOBAL_ROOT_TYPE) {
+	if (dx_hastypeflag(dq, QUEUE_ROOT)) {
 		initial_state = DISPATCH_ROOT_QUEUE_STATE_INIT_VALUE;
 	}
 	dq_state &= ~DISPATCH_QUEUE_MAX_QOS_MASK;
@@ -1799,23 +1837,24 @@
 	uint32_t tmp;
 	dispatch_queue_t dq = _dispatch_queue_get_current();
 
-	if (w > 0) {
-		tmp = (unsigned int)w;
-	} else switch (w) {
-	case 0:
-		tmp = 1;
-		break;
-	case DISPATCH_QUEUE_WIDTH_MAX_PHYSICAL_CPUS:
-		tmp = dispatch_hw_config(physical_cpus);
-		break;
-	case DISPATCH_QUEUE_WIDTH_ACTIVE_CPUS:
-		tmp = dispatch_hw_config(active_cpus);
-		break;
-	default:
-		// fall through
-	case DISPATCH_QUEUE_WIDTH_MAX_LOGICAL_CPUS:
-		tmp = dispatch_hw_config(logical_cpus);
-		break;
+	if (w >= 0) {
+		tmp = w ? (unsigned int)w : 1;
+	} else {
+		dispatch_qos_t qos = _dispatch_qos_from_pp(_dispatch_get_priority());
+		switch (w) {
+		case DISPATCH_QUEUE_WIDTH_MAX_PHYSICAL_CPUS:
+			tmp = _dispatch_qos_max_parallelism(qos,
+					DISPATCH_MAX_PARALLELISM_PHYSICAL);
+			break;
+		case DISPATCH_QUEUE_WIDTH_ACTIVE_CPUS:
+			tmp = _dispatch_qos_max_parallelism(qos,
+					DISPATCH_MAX_PARALLELISM_ACTIVE);
+			break;
+		case DISPATCH_QUEUE_WIDTH_MAX_LOGICAL_CPUS:
+		default:
+			tmp = _dispatch_qos_max_parallelism(qos, 0);
+			break;
+		}
 	}
 	if (tmp > DISPATCH_QUEUE_WIDTH_MAX) {
 		tmp = DISPATCH_QUEUE_WIDTH_MAX;
@@ -1832,8 +1871,9 @@
 void
 dispatch_queue_set_width(dispatch_queue_t dq, long width)
 {
-	if (slowpath(dq->do_ref_cnt == DISPATCH_OBJECT_GLOBAL_REFCNT) ||
-			slowpath(dx_hastypeflag(dq, QUEUE_ROOT))) {
+	if (unlikely(dq->do_ref_cnt == DISPATCH_OBJECT_GLOBAL_REFCNT ||
+			dx_hastypeflag(dq, QUEUE_ROOT) ||
+			dx_hastypeflag(dq, QUEUE_BASE))) {
 		return;
 	}
 
@@ -1848,8 +1888,15 @@
 		DISPATCH_CLIENT_CRASH(type, "Unexpected dispatch object type");
 	}
 
-	_dispatch_barrier_trysync_or_async_f(dq, (void*)(intptr_t)width,
-			_dispatch_queue_set_width2);
+	if (likely((int)width >= 0)) {
+		_dispatch_barrier_trysync_or_async_f(dq, (void*)(intptr_t)width,
+				_dispatch_queue_set_width2);
+	} else {
+		// The negative width constants need to execute on the queue to
+		// query the queue QoS
+		_dispatch_barrier_async_detached_f(dq, (void*)(intptr_t)width,
+				_dispatch_queue_set_width2);
+	}
 }
 
 static void
@@ -2265,7 +2312,7 @@
 {
 	dispatch_queue_t dq = _dispatch_queue_get_current();
 	if (!dq) return NULL;
-	while (slowpath(dq->do_targetq)) {
+	while (unlikely(dq->do_targetq)) {
 		dq = dq->do_targetq;
 	}
 	if (dx_type(dq) != DISPATCH_QUEUE_GLOBAL_ROOT_TYPE ||
@@ -2911,9 +2958,9 @@
 	if (atomic_flags & DBF_CANCELED) goto out;
 
 	pthread_priority_t op = 0, p = 0;
-	if (_dispatch_block_invoke_should_set_priority(flags)) {
-		op = _dispatch_get_priority();
-		p  = dbpd->dbpd_priority;
+	op = _dispatch_block_invoke_should_set_priority(flags, dbpd->dbpd_priority);
+	if (op) {
+		p = dbpd->dbpd_priority;
 	}
 	voucher_t ov, v = DISPATCH_NO_VOUCHER;
 	if (flags & DISPATCH_BLOCK_HAS_VOUCHER) {
@@ -2966,9 +3013,55 @@
 	}
 }
 
-DISPATCH_ALWAYS_INLINE
+#if DISPATCH_USE_KEVENT_WORKQUEUE
 static void
-_dispatch_block_async_invoke2(dispatch_block_t b, bool release)
+_dispatch_block_async_invoke_reset_max_qos(dispatch_queue_t dq,
+		dispatch_qos_t qos)
+{
+	uint64_t old_state, new_state, qos_bits = _dq_state_from_qos(qos);
+
+	// Only dispatch queues can reach this point (as opposed to sources or more
+	// complex objects) which allows us to handle the DIRTY bit protocol by only
+	// looking at the tail
+	dispatch_assert(dx_metatype(dq) == _DISPATCH_QUEUE_TYPE);
+
+again:
+	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, relaxed, {
+		dispatch_assert(_dq_state_is_base_wlh(old_state));
+		if ((old_state & DISPATCH_QUEUE_MAX_QOS_MASK) <= qos_bits) {
+			// Nothing to do if the QoS isn't going down
+			os_atomic_rmw_loop_give_up(return);
+		}
+		if (_dq_state_is_dirty(old_state)) {
+			os_atomic_rmw_loop_give_up({
+				// just renew the drain lock with an acquire barrier, to see
+				// what the enqueuer that set DIRTY has done.
+				// the xor generates better assembly as DISPATCH_QUEUE_DIRTY
+				// is already in a register
+				os_atomic_xor2o(dq, dq_state, DISPATCH_QUEUE_DIRTY, acquire);
+				if (!dq->dq_items_tail) {
+					goto again;
+				}
+				return;
+			});
+		}
+
+		new_state  = old_state;
+		new_state &= ~DISPATCH_QUEUE_MAX_QOS_MASK;
+		new_state |= qos_bits;
+	});
+
+	_dispatch_deferred_items_get()->ddi_wlh_needs_update = true;
+	_dispatch_event_loop_drain(KEVENT_FLAG_IMMEDIATE);
+}
+#endif // DISPATCH_USE_KEVENT_WORKQUEUE
+
+#define DISPATCH_BLOCK_ASYNC_INVOKE_RELEASE           0x1
+#define DISPATCH_BLOCK_ASYNC_INVOKE_NO_OVERRIDE_RESET 0x2
+
+DISPATCH_NOINLINE
+static void
+_dispatch_block_async_invoke2(dispatch_block_t b, unsigned long invoke_flags)
 {
 	dispatch_block_private_data_t dbpd = _dispatch_block_get_data(b);
 	unsigned int atomic_flags = dbpd->dbpd_atomic_flags;
@@ -2976,6 +3069,19 @@
 		DISPATCH_CLIENT_CRASH(atomic_flags, "A block object may not be both "
 				"run more than once and waited for");
 	}
+
+#if DISPATCH_USE_KEVENT_WORKQUEUE
+	if (unlikely((dbpd->dbpd_flags &
+			DISPATCH_BLOCK_IF_LAST_RESET_QUEUE_QOS_OVERRIDE) &&
+			!(invoke_flags & DISPATCH_BLOCK_ASYNC_INVOKE_NO_OVERRIDE_RESET))) {
+		dispatch_queue_t dq = _dispatch_get_current_queue();
+		dispatch_qos_t qos = _dispatch_qos_from_pp(_dispatch_get_priority());
+		if ((dispatch_wlh_t)dq == _dispatch_get_wlh() && !dq->dq_items_tail) {
+			_dispatch_block_async_invoke_reset_max_qos(dq, qos);
+		}
+	}
+#endif // DISPATCH_USE_KEVENT_WORKQUEUE
+
 	if (!slowpath(atomic_flags & DBF_CANCELED)) {
 		dbpd->dbpd_block();
 	}
@@ -2984,13 +3090,14 @@
 			dispatch_group_leave(_dbpd_group(dbpd));
 		}
 	}
-	os_mpsc_queue_t oq;
-	oq = os_atomic_xchg2o(dbpd, dbpd_queue, NULL, relaxed);
+
+	os_mpsc_queue_t oq = os_atomic_xchg2o(dbpd, dbpd_queue, NULL, relaxed);
 	if (oq) {
 		// balances dispatch_{,barrier_,group_}async
 		_os_object_release_internal_n_inline(oq->_as_os_obj, 2);
 	}
-	if (release) {
+
+	if (invoke_flags & DISPATCH_BLOCK_ASYNC_INVOKE_RELEASE) {
 		Block_release(b);
 	}
 }
@@ -2998,20 +3105,35 @@
 static void
 _dispatch_block_async_invoke(void *block)
 {
-	_dispatch_block_async_invoke2(block, false);
+	_dispatch_block_async_invoke2(block, 0);
 }
 
 static void
 _dispatch_block_async_invoke_and_release(void *block)
 {
-	_dispatch_block_async_invoke2(block, true);
+	_dispatch_block_async_invoke2(block, DISPATCH_BLOCK_ASYNC_INVOKE_RELEASE);
+}
+
+static void
+_dispatch_block_async_invoke_and_release_mach_barrier(void *block)
+{
+	_dispatch_block_async_invoke2(block, DISPATCH_BLOCK_ASYNC_INVOKE_RELEASE |
+			DISPATCH_BLOCK_ASYNC_INVOKE_NO_OVERRIDE_RESET);
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline bool
+_dispatch_block_supports_wait_and_cancel(dispatch_block_private_data_t dbpd)
+{
+	return dbpd && !(dbpd->dbpd_flags &
+			DISPATCH_BLOCK_IF_LAST_RESET_QUEUE_QOS_OVERRIDE);
 }
 
 void
 dispatch_block_cancel(dispatch_block_t db)
 {
 	dispatch_block_private_data_t dbpd = _dispatch_block_get_data(db);
-	if (!dbpd) {
+	if (unlikely(!_dispatch_block_supports_wait_and_cancel(dbpd))) {
 		DISPATCH_CLIENT_CRASH(db, "Invalid block object passed to "
 				"dispatch_block_cancel()");
 	}
@@ -3022,7 +3144,7 @@
 dispatch_block_testcancel(dispatch_block_t db)
 {
 	dispatch_block_private_data_t dbpd = _dispatch_block_get_data(db);
-	if (!dbpd) {
+	if (unlikely(!_dispatch_block_supports_wait_and_cancel(dbpd))) {
 		DISPATCH_CLIENT_CRASH(db, "Invalid block object passed to "
 				"dispatch_block_testcancel()");
 	}
@@ -3033,7 +3155,7 @@
 dispatch_block_wait(dispatch_block_t db, dispatch_time_t timeout)
 {
 	dispatch_block_private_data_t dbpd = _dispatch_block_get_data(db);
-	if (!dbpd) {
+	if (unlikely(!_dispatch_block_supports_wait_and_cancel(dbpd))) {
 		DISPATCH_CLIENT_CRASH(db, "Invalid block object passed to "
 				"dispatch_block_wait()");
 	}
@@ -3128,7 +3250,10 @@
 		_os_object_retain_internal_n_inline(oq->_as_os_obj, 2);
 	}
 
-	if (dc_flags & DISPATCH_OBJ_CONSUME_BIT) {
+	if (dc_flags & DISPATCH_OBJ_MACH_BARRIER) {
+		dispatch_assert(dc_flags & DISPATCH_OBJ_CONSUME_BIT);
+		dc->dc_func = _dispatch_block_async_invoke_and_release_mach_barrier;
+	} else if (dc_flags & DISPATCH_OBJ_CONSUME_BIT) {
 		dc->dc_func = _dispatch_block_async_invoke_and_release;
 	} else {
 		dc->dc_func = _dispatch_block_async_invoke;
@@ -3156,28 +3281,7 @@
 	dc->dc_flags = dc_flags;
 }
 
-void
-_dispatch_continuation_update_bits(dispatch_continuation_t dc,
-		uintptr_t dc_flags)
-{
-	dc->dc_flags = dc_flags;
-	if (dc_flags & DISPATCH_OBJ_CONSUME_BIT) {
-		if (dc_flags & DISPATCH_OBJ_BLOCK_PRIVATE_DATA_BIT) {
-			dc->dc_func = _dispatch_block_async_invoke_and_release;
-		} else if (dc_flags & DISPATCH_OBJ_BLOCK_BIT) {
-			dc->dc_func = _dispatch_call_block_and_release;
-		}
-	} else {
-		if (dc_flags & DISPATCH_OBJ_BLOCK_PRIVATE_DATA_BIT) {
-			dc->dc_func = _dispatch_block_async_invoke;
-		} else if (dc_flags & DISPATCH_OBJ_BLOCK_BIT) {
-			dc->dc_func = _dispatch_Block_invoke(dc->dc_ctxt);
-		}
-	}
-}
-
 #endif // __BLOCKS__
-
 #pragma mark -
 #pragma mark dispatch_barrier_async
 
@@ -4092,15 +4196,19 @@
 		dispatch_block_t work, dispatch_block_flags_t flags)
 {
 	dispatch_block_private_data_t dbpd = _dispatch_block_get_data(work);
-	pthread_priority_t op = 0;
+	pthread_priority_t op = 0, p = 0;
 
 	flags |= dbpd->dbpd_flags;
-	if (_dispatch_block_invoke_should_set_priority(flags)) {
-		voucher_t v = DISPATCH_NO_VOUCHER;
-		op = _dispatch_get_priority();
-		v  = _dispatch_set_priority_and_voucher(dbpd->dbpd_priority, v, 0);
-		dispatch_assert(v == DISPATCH_NO_VOUCHER);
+	op = _dispatch_block_invoke_should_set_priority(flags, dbpd->dbpd_priority);
+	if (op) {
+		p = dbpd->dbpd_priority;
 	}
+	voucher_t ov, v = DISPATCH_NO_VOUCHER;
+	if (flags & DISPATCH_BLOCK_HAS_VOUCHER) {
+		v = dbpd->dbpd_voucher;
+	}
+	ov = _dispatch_set_priority_and_voucher(p, v, 0);
+
 	// balanced in d_block_sync_invoke or d_block_wait
 	if (os_atomic_cmpxchg2o(dbpd, dbpd_queue, NULL, dq->_as_oq, relaxed)) {
 		_dispatch_retain_2(dq);
@@ -4110,7 +4218,7 @@
 	} else {
 		dispatch_sync_f(dq, work, _dispatch_block_sync_invoke);
 	}
-	_dispatch_reset_priority_and_voucher(op, DISPATCH_NO_VOUCHER);
+	_dispatch_reset_priority_and_voucher(op, ov);
 }
 
 void
@@ -4448,7 +4556,7 @@
 #if HAVE_PTHREAD_WORKQUEUE_QOS
 		r = _pthread_workqueue_addthreads(remaining,
 				_dispatch_priority_to_pp(dq->dq_priority));
-#elif HAVE_PTHREAD_WORKQUEUE_SETDISPATCH_NP
+#elif DISPATCH_USE_PTHREAD_WORKQUEUE_SETDISPATCH_NP
 		r = pthread_workqueue_addthreads_np(qc->dgq_wq_priority,
 				qc->dgq_wq_options, remaining);
 #endif
@@ -5250,7 +5358,7 @@
 	}
 
 apply_again:
-	if (dx_type(tq) == DISPATCH_QUEUE_GLOBAL_ROOT_TYPE) {
+	if (dx_hastypeflag(tq, QUEUE_ROOT)) {
 		if (_dispatch_root_queue_push_queue_override_needed(tq, qos)) {
 			_dispatch_root_queue_push_override_stealer(tq, dq, qos);
 		}
@@ -5675,6 +5783,9 @@
 			goto park;
 		}
 		dq_state = os_atomic_load2o(dq, dq_state, relaxed);
+		if (unlikely(!_dq_state_is_base_wlh(dq_state))) { // rdar://32671286
+			goto park;
+		}
 		if (unlikely(_dq_state_is_enqueued_on_target(dq_state))) {
 			_dispatch_retain(dq);
 			_dispatch_trace_continuation_push(dq->do_targetq, dq);
@@ -5810,7 +5921,7 @@
 }
 #endif // HAVE_PTHREAD_WORKQUEUE_QOS
 
-#if HAVE_PTHREAD_WORKQUEUE_SETDISPATCH_NP
+#if DISPATCH_USE_PTHREAD_WORKQUEUE_SETDISPATCH_NP
 // 6618342 Contact the team that owns the Instrument DTrace probe before
 //         renaming this symbol
 static void
@@ -5823,7 +5934,7 @@
 
 	return _dispatch_worker_thread4(dq);
 }
-#endif // HAVE_PTHREAD_WORKQUEUE_SETDISPATCH_NP
+#endif // DISPATCH_USE_PTHREAD_WORKQUEUE_SETDISPATCH_NP
 #endif // HAVE_PTHREAD_WORKQUEUES
 
 #if DISPATCH_USE_PTHREAD_POOL
diff --git a/src/queue_internal.h b/src/queue_internal.h
index c1d0f6e..1a590e2 100644
--- a/src/queue_internal.h
+++ b/src/queue_internal.h
@@ -832,6 +832,8 @@
 #define DISPATCH_OBJ_CTXT_FETCH_BIT			0x040ul
 // use the voucher from the continuation even if the queue has voucher set
 #define DISPATCH_OBJ_ENFORCE_VOUCHER		0x080ul
+// never set on continuations, used by mach.c only
+#define DISPATCH_OBJ_MACH_BARRIER		0x1000000ul
 
 typedef struct dispatch_continuation_s {
 	struct dispatch_object_s _as_do[0];
@@ -975,7 +977,7 @@
 
 #ifdef __BLOCKS__
 
-#define DISPATCH_BLOCK_API_MASK (0x80u - 1)
+#define DISPATCH_BLOCK_API_MASK (0x100u - 1)
 #define DISPATCH_BLOCK_HAS_VOUCHER (1u << 31)
 #define DISPATCH_BLOCK_HAS_PRIORITY (1u << 30)
 
@@ -1022,8 +1024,6 @@
 
 void _dispatch_continuation_init_slow(dispatch_continuation_t dc,
 		dispatch_queue_class_t dqu, dispatch_block_flags_t flags);
-void _dispatch_continuation_update_bits(dispatch_continuation_t dc,
-		uintptr_t dc_flags);
 
 long _dispatch_barrier_trysync_f(dispatch_queue_t dq, void *ctxt,
 		dispatch_function_t func);
diff --git a/src/shims.h b/src/shims.h
index 8dd23ee..28e1c53 100644
--- a/src/shims.h
+++ b/src/shims.h
@@ -36,6 +36,7 @@
 #include "shims/android_stubs.h"
 #endif
 
+#include "shims/hw_config.h"
 #include "shims/priority.h"
 
 #if HAVE_PTHREAD_WORKQUEUES
@@ -147,6 +148,51 @@
 }
 #endif
 
+#if HAVE_PTHREAD_QOS_H && __has_include(<pthread/qos_private.h>) && \
+		defined(PTHREAD_MAX_PARALLELISM_PHYSICAL) && \
+		DISPATCH_HAVE_HW_CONFIG_COMMPAGE && \
+		DISPATCH_MIN_REQUIRED_OSX_AT_LEAST(109900)
+#define DISPATCH_USE_PTHREAD_QOS_MAX_PARALLELISM 1
+#define DISPATCH_MAX_PARALLELISM_PHYSICAL PTHREAD_MAX_PARALLELISM_PHYSICAL
+#else
+#define DISPATCH_MAX_PARALLELISM_PHYSICAL 0x1
+#endif
+#define DISPATCH_MAX_PARALLELISM_ACTIVE 0x2
+_Static_assert(!(DISPATCH_MAX_PARALLELISM_PHYSICAL &
+		DISPATCH_MAX_PARALLELISM_ACTIVE), "Overlapping parallelism flags");
+
+DISPATCH_ALWAYS_INLINE
+static inline uint32_t
+_dispatch_qos_max_parallelism(dispatch_qos_t qos, unsigned long flags)
+{
+	uint32_t p;
+	int r = 0;
+
+	if (qos) {
+#if DISPATCH_USE_PTHREAD_QOS_MAX_PARALLELISM
+		r = pthread_qos_max_parallelism(_dispatch_qos_to_qos_class(qos),
+				flags & PTHREAD_MAX_PARALLELISM_PHYSICAL);
+#endif
+	}
+	if (likely(r > 0)) {
+		p = (uint32_t)r;
+	} else {
+		p = (flags & DISPATCH_MAX_PARALLELISM_PHYSICAL) ?
+				dispatch_hw_config(physical_cpus) :
+				dispatch_hw_config(logical_cpus);
+	}
+	if (flags & DISPATCH_MAX_PARALLELISM_ACTIVE) {
+		uint32_t active_cpus = dispatch_hw_config(active_cpus);
+		if ((flags & DISPATCH_MAX_PARALLELISM_PHYSICAL) &&
+				active_cpus < dispatch_hw_config(logical_cpus)) {
+			active_cpus /= dispatch_hw_config(logical_cpus) /
+					dispatch_hw_config(physical_cpus);
+		}
+		if (active_cpus < p) p = active_cpus;
+	}
+	return p;
+}
+
 #if !HAVE_NORETURN_BUILTIN_TRAP
 /*
  * XXXRW: Work-around for possible clang bug in which __builtin_trap() is not
@@ -174,7 +220,6 @@
 #include "shims/yield.h"
 #include "shims/lock.h"
 
-#include "shims/hw_config.h"
 #include "shims/perfmon.h"
 
 #include "shims/getprogname.h"
@@ -228,7 +273,8 @@
 #define _dispatch_clear_stack(s) do { \
 		void *a[(s)/sizeof(void*) ? (s)/sizeof(void*) : 1]; \
 		a[0] = pthread_get_stackaddr_np(pthread_self()); \
-		bzero((void*)&a[1], (size_t)(a[0] - (void*)&a[1])); \
+		void* volatile const p = (void*)&a[1]; /* <rdar://32604885> */ \
+		bzero((void*)p, (size_t)(a[0] - (void*)&a[1])); \
 	} while (0)
 #else
 #define _dispatch_clear_stack(s)
diff --git a/src/shims/priority.h b/src/shims/priority.h
index 948e4c7..3e85ff5 100644
--- a/src/shims/priority.h
+++ b/src/shims/priority.h
@@ -94,7 +94,7 @@
 #define DISPATCH_PRIORITY_OVERRIDE_SHIFT     16
 #define DISPATCH_PRIORITY_FLAGS_MASK         ((dispatch_priority_t)0xff000000)
 
-#define DISPATCH_PRIORITY_SATURATED_OVERRIDE DISPATCH_PRIORITY_OVERRIDE_MASK
+#define DISPATCH_PRIORITY_SATURATED_OVERRIDE ((dispatch_priority_t)0x000f0000)
 
 #define DISPATCH_PRIORITY_FLAG_OVERCOMMIT    ((dispatch_priority_t)0x80000000) // _PTHREAD_PRIORITY_OVERCOMMIT_FLAG
 #define DISPATCH_PRIORITY_FLAG_DEFAULTQUEUE  ((dispatch_priority_t)0x04000000) // _PTHREAD_PRIORITY_DEFAULTQUEUE_FLAG
diff --git a/src/source.c b/src/source.c
index fd337a9..6b97507 100644
--- a/src/source.c
+++ b/src/source.c
@@ -521,6 +521,22 @@
 	}
 }
 
+DISPATCH_NOINLINE
+static void
+_dispatch_source_refs_finalize_unregistration(dispatch_source_t ds)
+{
+	dispatch_queue_flags_t dqf;
+	dispatch_source_refs_t dr = ds->ds_refs;
+
+	dqf = _dispatch_queue_atomic_flags_set_and_clear_orig(ds->_as_dq,
+			DSF_DELETED, DSF_ARMED | DSF_DEFERRED_DELETE | DSF_CANCEL_WAITER);
+	if (dqf & DSF_CANCEL_WAITER) {
+		_dispatch_wake_by_address(&ds->dq_atomic_flags);
+	}
+	_dispatch_debug("kevent-source[%p]: disarmed kevent[%p]", ds, dr);
+	_dispatch_release_tailcall(ds); // the retain is done at creation time
+}
+
 void
 _dispatch_source_refs_unregister(dispatch_source_t ds, uint32_t options)
 {
@@ -549,14 +565,8 @@
 		}
 	}
 
-	dqf = _dispatch_queue_atomic_flags_set_and_clear_orig(ds->_as_dq,
-			DSF_DELETED, DSF_ARMED | DSF_DEFERRED_DELETE | DSF_CANCEL_WAITER);
-	if (dqf & DSF_CANCEL_WAITER) {
-		_dispatch_wake_by_address(&ds->dq_atomic_flags);
-	}
 	ds->ds_is_installed = true;
-	_dispatch_debug("kevent-source[%p]: disarmed kevent[%p]", ds, dr);
-	_dispatch_release_tailcall(ds); // the retain is done at creation time
+	_dispatch_source_refs_finalize_unregistration(ds);
 }
 
 DISPATCH_ALWAYS_INLINE
@@ -619,8 +629,9 @@
 
 	if (unlikely(!_dispatch_source_tryarm(ds) ||
 			!_dispatch_unote_register(dr, wlh, pri))) {
-		_dispatch_queue_atomic_flags_set_and_clear(ds->_as_dq, DSF_DELETED,
-				DSF_ARMED | DSF_DEFERRED_DELETE);
+		// Do the parts of dispatch_source_refs_unregister() that
+		// are required after this partial initialization.
+		_dispatch_source_refs_finalize_unregistration(ds);
 	} else {
 		_dispatch_debug("kevent-source[%p]: armed kevent[%p]", ds, dr);
 	}
@@ -1761,8 +1772,10 @@
 {
 	uint32_t idx = (dth->dth_count += DTH_ID_COUNT) - DTH_ID_COUNT;
 
-	dispatch_assert(dt->dt_heap_entry[DTH_TARGET_ID] == DTH_INVALID_ID);
-	dispatch_assert(dt->dt_heap_entry[DTH_DEADLINE_ID] == DTH_INVALID_ID);
+	DISPATCH_TIMER_ASSERT(dt->dt_heap_entry[DTH_TARGET_ID], ==,
+			DTH_INVALID_ID, "target idx");
+	DISPATCH_TIMER_ASSERT(dt->dt_heap_entry[DTH_DEADLINE_ID], ==,
+			DTH_INVALID_ID, "deadline idx");
 
 	if (idx == 0) {
 		dt->dt_heap_entry[DTH_TARGET_ID] = DTH_TARGET_ID;
@@ -1786,12 +1799,16 @@
 {
 	uint32_t idx = (dth->dth_count -= DTH_ID_COUNT);
 
-	dispatch_assert(dt->dt_heap_entry[DTH_TARGET_ID] != DTH_INVALID_ID);
-	dispatch_assert(dt->dt_heap_entry[DTH_DEADLINE_ID] != DTH_INVALID_ID);
+	DISPATCH_TIMER_ASSERT(dt->dt_heap_entry[DTH_TARGET_ID], !=,
+			DTH_INVALID_ID, "target idx");
+	DISPATCH_TIMER_ASSERT(dt->dt_heap_entry[DTH_DEADLINE_ID], !=,
+			DTH_INVALID_ID, "deadline idx");
 
 	if (idx == 0) {
-		dispatch_assert(dth->dth_min[DTH_TARGET_ID] == dt);
-		dispatch_assert(dth->dth_min[DTH_DEADLINE_ID] == dt);
+		DISPATCH_TIMER_ASSERT(dth->dth_min[DTH_TARGET_ID], ==, dt,
+				"target slot");
+		DISPATCH_TIMER_ASSERT(dth->dth_min[DTH_DEADLINE_ID], ==, dt,
+				"deadline slot");
 		dth->dth_min[DTH_TARGET_ID] = dth->dth_min[DTH_DEADLINE_ID] = NULL;
 		goto clear_heap_entry;
 	}
@@ -1819,8 +1836,11 @@
 _dispatch_timer_heap_update(dispatch_timer_heap_t dth,
 		dispatch_timer_source_refs_t dt)
 {
-	dispatch_assert(dt->dt_heap_entry[DTH_TARGET_ID] != DTH_INVALID_ID);
-	dispatch_assert(dt->dt_heap_entry[DTH_DEADLINE_ID] != DTH_INVALID_ID);
+	DISPATCH_TIMER_ASSERT(dt->dt_heap_entry[DTH_TARGET_ID], !=,
+			DTH_INVALID_ID, "target idx");
+	DISPATCH_TIMER_ASSERT(dt->dt_heap_entry[DTH_DEADLINE_ID], !=,
+			DTH_INVALID_ID, "deadline idx");
+
 
 	_dispatch_timer_heap_resift(dth, dt, dt->dt_heap_entry[DTH_TARGET_ID]);
 	_dispatch_timer_heap_resift(dth, dt, dt->dt_heap_entry[DTH_DEADLINE_ID]);
@@ -1875,7 +1895,7 @@
 {
 	dispatch_timer_heap_t heap = &_dispatch_timers_heap[tidx];
 	if (_dispatch_unote_registered(dt)) {
-		dispatch_assert(dt->du_ident == tidx);
+		DISPATCH_TIMER_ASSERT(dt->du_ident, ==, tidx, "tidx");
 		_dispatch_timer_heap_update(heap, dt);
 	} else {
 		dt->du_ident = tidx;
@@ -2051,7 +2071,10 @@
 	uint64_t now = _dispatch_time_now_cached(DISPATCH_TIMER_CLOCK(tidx), nows);
 
 	while ((dr = _dispatch_timers_heap[tidx].dth_min[DTH_TARGET_ID])) {
-		dispatch_assert(tidx == dr->du_ident && dr->dt_timer.target);
+		DISPATCH_TIMER_ASSERT(dr->du_filter, ==, DISPATCH_EVFILT_TIMER,
+				"invalid filter");
+		DISPATCH_TIMER_ASSERT(dr->du_ident, ==, tidx, "tidx");
+		DISPATCH_TIMER_ASSERT(dr->dt_timer.target, !=, 0, "missing target");
 		ds = _dispatch_source_from_refs(dr);
 		if (dr->dt_timer.target > now) {
 			// Done running timers for now.
diff --git a/src/swift/Source.swift b/src/swift/Source.swift
index a3a7e79..421a6e9 100644
--- a/src/swift/Source.swift
+++ b/src/swift/Source.swift
@@ -279,28 +279,342 @@
 #endif
 
 public extension DispatchSourceTimer {
+	///
+	/// Sets the deadline and leeway for a timer event that fires once.
+	///
+	/// Once this function returns, any pending source data accumulated for the previous timer values
+	/// has been cleared and the next timer event will occur at `deadline`.
+	///
+	/// Delivery of the timer event may be delayed by the system in order to improve power consumption
+	/// and system performance. The upper limit to the allowable delay may be configured with the `leeway`
+	/// argument; the lower limit is under the control of the system.
+	///
+	/// The lower limit to the allowable delay may vary with process state such as visibility of the
+	/// application UI. If the timer source was created with flags `TimerFlags.strict`, the system
+	/// will make a best effort to strictly observe the provided `leeway` value, even if it is smaller
+	/// than the current lower limit. Note that a minimal amount of delay is to be expected even if
+	/// this flag is specified.
+	///
+	/// Calling this method has no effect if the timer source has already been canceled.
+	/// - note: Delivery of the timer event does not cancel the timer source.
+	///
+	/// - parameter deadline: the time at which the timer event will be delivered, subject to the
+	///     leeway and other considerations described above. The deadline is based on Mach absolute
+	///     time.
+	/// - parameter leeway: the leeway for the timer.
+	///
+	@available(swift, deprecated: 4, renamed: "schedule(deadline:repeating:leeway:)")
 	public func scheduleOneshot(deadline: DispatchTime, leeway: DispatchTimeInterval = .nanoseconds(0)) {
 		dispatch_source_set_timer((self as! DispatchSource).__wrapped, deadline.rawValue, ~0, UInt64(leeway.rawValue))
 	}
 
+	///
+	/// Sets the deadline and leeway for a timer event that fires once.
+	///
+	/// Once this function returns, any pending source data accumulated for the previous timer values
+	/// has been cleared and the next timer event will occur at `wallDeadline`.
+	///
+	/// Delivery of the timer event may be delayed by the system in order to improve power consumption
+	/// and system performance. The upper limit to the allowable delay may be configured with the `leeway`
+	/// argument; the lower limit is under the control of the system.
+	///
+	/// The lower limit to the allowable delay may vary with process state such as visibility of the
+	/// application UI. If the timer source was created with flags `TimerFlags.strict`, the system
+	/// will make a best effort to strictly observe the provided `leeway` value, even if it is smaller
+	/// than the current lower limit. Note that a minimal amount of delay is to be expected even if
+	/// this flag is specified.
+	///
+	/// Calling this method has no effect if the timer source has already been canceled.
+	/// - note: Delivery of the timer event does not cancel the timer source.
+	///
+	/// - parameter wallDeadline: the time at which the timer event will be delivered, subject to the
+	///     leeway and other considerations described above. The deadline is based on
+	///     `gettimeofday(3)`.
+	/// - parameter leeway: the leeway for the timer.
+	///
+	@available(swift, deprecated: 4, renamed: "schedule(wallDeadline:repeating:leeway:)")
 	public func scheduleOneshot(wallDeadline: DispatchWallTime, leeway: DispatchTimeInterval = .nanoseconds(0)) {
 		dispatch_source_set_timer((self as! DispatchSource).__wrapped, wallDeadline.rawValue, ~0, UInt64(leeway.rawValue))
 	}
 
+	///
+	/// Sets the deadline, interval and leeway for a timer event that fires at least once.
+	///
+	/// Once this function returns, any pending source data accumulated for the previous timer values
+	/// has been cleared. The next timer event will occur at `deadline` and every `interval` units of
+	/// time thereafter until the timer source is canceled.
+	///
+	/// Delivery of a timer event may be delayed by the system in order to improve power consumption
+	/// and system performance. The upper limit to the allowable delay may be configured with the `leeway`
+	/// argument; the lower limit is under the control of the system.
+	///
+	/// For the initial timer fire at `deadline`, the upper limit to the allowable delay is set to
+	/// `leeway`. For the subsequent timer fires at `deadline + N * interval`, the upper
+	/// limit is the smaller of `leeway` and `interval/2`.
+	///
+	/// The lower limit to the allowable delay may vary with process state such as visibility of the
+	/// application UI. If the timer source was created with flags `TimerFlags.strict`, the system
+	/// will make a best effort to strictly observe the provided `leeway` value, even if it is smaller
+	/// than the current lower limit. Note that a minimal amount of delay is to be expected even if
+	/// this flag is specified.
+	///
+	/// Calling this method has no effect if the timer source has already been canceled.
+	///
+	/// - parameter deadline: the time at which the timer event will be delivered, subject to the
+	///     leeway and other considerations described above. The deadline is based on Mach absolute
+	///     time.
+	/// - parameter interval: the interval for the timer.
+	/// - parameter leeway: the leeway for the timer.
+	///
+	@available(swift, deprecated: 4, renamed: "schedule(deadline:repeating:leeway:)")
 	public func scheduleRepeating(deadline: DispatchTime, interval: DispatchTimeInterval, leeway: DispatchTimeInterval = .nanoseconds(0)) {
-		dispatch_source_set_timer((self as! DispatchSource).__wrapped, deadline.rawValue, UInt64(interval.rawValue), UInt64(leeway.rawValue))
+		dispatch_source_set_timer((self as! DispatchSource).__wrapped, deadline.rawValue, interval == .never ? ~0 : UInt64(interval.rawValue), UInt64(leeway.rawValue))
 	}
 
+	///
+	/// Sets the deadline, interval and leeway for a timer event that fires at least once.
+	///
+	/// Once this function returns, any pending source data accumulated for the previous timer values
+	/// has been cleared. The next timer event will occur at `deadline` and every `interval` seconds
+	/// thereafter until the timer source is canceled.
+	///
+	/// Delivery of a timer event may be delayed by the system in order to improve power consumption and
+	/// system performance. The upper limit to the allowable delay may be configured with the `leeway`
+	/// argument; the lower limit is under the control of the system.
+	///
+	/// For the initial timer fire at `deadline`, the upper limit to the allowable delay is set to
+	/// `leeway`. For the subsequent timer fires at `deadline + N * interval`, the upper
+	/// limit is the smaller of `leeway` and `interval/2`.
+	///
+	/// The lower limit to the allowable delay may vary with process state such as visibility of the
+	/// application UI. If the timer source was created with flags `TimerFlags.strict`, the system
+	/// will make a best effort to strictly observe the provided `leeway` value, even if it is smaller
+	/// than the current lower limit. Note that a minimal amount of delay is to be expected even if
+	/// this flag is specified.
+	///
+	/// Calling this method has no effect if the timer source has already been canceled.
+	///
+	/// - parameter deadline: the time at which the timer event will be delivered, subject to the
+	///     leeway and other considerations described above. The deadline is based on Mach absolute
+	///     time.
+	/// - parameter interval: the interval for the timer in seconds.
+	/// - parameter leeway: the leeway for the timer.
+	///
+	@available(swift, deprecated: 4, renamed: "schedule(deadline:repeating:leeway:)")
 	public func scheduleRepeating(deadline: DispatchTime, interval: Double, leeway: DispatchTimeInterval = .nanoseconds(0)) {
-		dispatch_source_set_timer((self as! DispatchSource).__wrapped, deadline.rawValue, UInt64(interval * Double(NSEC_PER_SEC)), UInt64(leeway.rawValue))
+		dispatch_source_set_timer((self as! DispatchSource).__wrapped, deadline.rawValue, interval.isInfinite ? ~0 : UInt64(interval * Double(NSEC_PER_SEC)), UInt64(leeway.rawValue))
 	}
 
+	///
+	/// Sets the deadline, interval and leeway for a timer event that fires at least once.
+	///
+	/// Once this function returns, any pending source data accumulated for the previous timer values
+	/// has been cleared. The next timer event will occur at `wallDeadline` and every `interval` units of
+	/// time thereafter until the timer source is canceled.
+	///
+	/// Delivery of a timer event may be delayed by the system in order to improve power consumption and
+	/// system performance. The upper limit to the allowable delay may be configured with the `leeway`
+	/// argument; the lower limit is under the control of the system.
+	///
+	/// For the initial timer fire at `wallDeadline`, the upper limit to the allowable delay is set to
+	/// `leeway`. For the subsequent timer fires at `wallDeadline + N * interval`, the upper
+	/// limit is the smaller of `leeway` and `interval/2`.
+	///
+	/// The lower limit to the allowable delay may vary with process state such as visibility of the
+	/// application UI. If the timer source was created with flags `TimerFlags.strict`, the system
+	/// will make a best effort to strictly observe the provided `leeway` value, even if it is smaller
+	/// than the current lower limit. Note that a minimal amount of delay is to be expected even if
+	/// this flag is specified.
+	///
+	/// Calling this method has no effect if the timer source has already been canceled.
+	///
+	/// - parameter wallDeadline: the time at which the timer event will be delivered, subject to the
+	///     leeway and other considerations described above. The deadline is based on
+	///     `gettimeofday(3)`.
+	/// - parameter interval: the interval for the timer.
+	/// - parameter leeway: the leeway for the timer.
+	///
+	@available(swift, deprecated: 4, renamed: "schedule(wallDeadline:repeating:leeway:)")
 	public func scheduleRepeating(wallDeadline: DispatchWallTime, interval: DispatchTimeInterval, leeway: DispatchTimeInterval = .nanoseconds(0)) {
-		dispatch_source_set_timer((self as! DispatchSource).__wrapped, wallDeadline.rawValue, UInt64(interval.rawValue), UInt64(leeway.rawValue))
+		dispatch_source_set_timer((self as! DispatchSource).__wrapped, wallDeadline.rawValue, interval == .never ? ~0 : UInt64(interval.rawValue), UInt64(leeway.rawValue))
 	}
 
+	///
+	/// Sets the deadline, interval and leeway for a timer event that fires at least once.
+	///
+	/// Once this function returns, any pending source data accumulated for the previous timer values
+	/// has been cleared. The next timer event will occur at `wallDeadline` and every `interval` seconds
+	/// thereafter until the timer source is canceled.
+	///
+	/// Delivery of a timer event may be delayed by the system in order to improve power consumption and
+	/// system performance. The upper limit to the allowable delay may be configured with the `leeway`
+	/// argument; the lower limit is under the control of the system.
+	///
+	/// For the initial timer fire at `wallDeadline`, the upper limit to the allowable delay is set to
+	/// `leeway`. For the subsequent timer fires at `wallDeadline + N * interval`, the upper
+	/// limit is the smaller of `leeway` and `interval/2`.
+	///
+	/// The lower limit to the allowable delay may vary with process state such as visibility of the
+	/// application UI. If the timer source was created with flags `TimerFlags.strict`, the system
+	/// will make a best effort to strictly observe the provided `leeway` value, even if it is smaller
+	/// than the current lower limit. Note that a minimal amount of delay is to be expected even if
+	/// this flag is specified.
+	///
+	/// Calling this method has no effect if the timer source has already been canceled.
+	///
+	/// - parameter wallDeadline: the time at which the timer event will be delivered, subject to the
+	///     leeway and other considerations described above. The deadline is based on
+	///     `gettimeofday(3)`.
+	/// - parameter interval: the interval for the timer in seconds.
+	/// - parameter leeway: the leeway for the timer.
+	///
+	@available(swift, deprecated: 4, renamed: "schedule(wallDeadline:repeating:leeway:)")
 	public func scheduleRepeating(wallDeadline: DispatchWallTime, interval: Double, leeway: DispatchTimeInterval = .nanoseconds(0)) {
-		dispatch_source_set_timer((self as! DispatchSource).__wrapped, wallDeadline.rawValue, UInt64(interval * Double(NSEC_PER_SEC)), UInt64(leeway.rawValue))
+		dispatch_source_set_timer((self as! DispatchSource).__wrapped, wallDeadline.rawValue, interval.isInfinite ? ~0 : UInt64(interval * Double(NSEC_PER_SEC)), UInt64(leeway.rawValue))
+	}
+
+	///
+	/// Sets the deadline, repeat interval and leeway for a timer event.
+	///
+	/// Once this function returns, any pending source data accumulated for the previous timer values
+	/// has been cleared. The next timer event will occur at `deadline` and every `repeating` units of
+	/// time thereafter until the timer source is canceled. If the value of `repeating` is `.never`,
+	/// or is defaulted, the timer fires only once.
+	///
+	/// Delivery of a timer event may be delayed by the system in order to improve power consumption
+	/// and system performance. The upper limit to the allowable delay may be configured with the `leeway`
+	/// argument; the lower limit is under the control of the system.
+	///
+	/// For the initial timer fire at `deadline`, the upper limit to the allowable delay is set to
+	/// `leeway`. For the subsequent timer fires at `deadline + N * repeating`, the upper
+	/// limit is the smaller of `leeway` and `repeating/2`.
+	///
+	/// The lower limit to the allowable delay may vary with process state such as visibility of the
+	/// application UI. If the timer source was created with flags `TimerFlags.strict`, the system
+	/// will make a best effort to strictly observe the provided `leeway` value, even if it is smaller
+	/// than the current lower limit. Note that a minimal amount of delay is to be expected even if
+	/// this flag is specified.
+	///
+	/// Calling this method has no effect if the timer source has already been canceled.
+	///
+	/// - parameter deadline: the time at which the first timer event will be delivered, subject to the
+	///     leeway and other considerations described above. The deadline is based on Mach absolute
+	///     time.
+	/// - parameter repeating: the repeat interval for the timer, or `.never` if the timer should fire
+	///		only once.
+	/// - parameter leeway: the leeway for the timer.
+	///
+	@available(swift, introduced: 4)
+	public func schedule(deadline: DispatchTime, repeating interval: DispatchTimeInterval = .never, leeway: DispatchTimeInterval = .nanoseconds(0)) {
+		dispatch_source_set_timer((self as! DispatchSource).__wrapped, deadline.rawValue, interval == .never ? ~0 : UInt64(interval.rawValue), UInt64(leeway.rawValue))
+	}
+
+	///
+	/// Sets the deadline, repeat interval and leeway for a timer event.
+	///
+	/// Once this function returns, any pending source data accumulated for the previous timer values
+	/// has been cleared. The next timer event will occur at `deadline` and every `repeating` seconds
+	/// thereafter until the timer source is canceled. If the value of `repeating` is `.infinity`,
+	/// the timer fires only once.
+	///
+	/// Delivery of a timer event may be delayed by the system in order to improve power consumption
+	/// and system performance. The upper limit to the allowable delay may be configured with the `leeway`
+	/// argument; the lower limit is under the control of the system.
+	///
+	/// For the initial timer fire at `deadline`, the upper limit to the allowable delay is set to
+	/// `leeway`. For the subsequent timer fires at `deadline + N * repeating`, the upper
+	/// limit is the smaller of `leeway` and `repeating/2`.
+	///
+	/// The lower limit to the allowable delay may vary with process state such as visibility of the
+	/// application UI. If the timer source was created with flags `TimerFlags.strict`, the system
+	/// will make a best effort to strictly observe the provided `leeway` value, even if it is smaller
+	/// than the current lower limit. Note that a minimal amount of delay is to be expected even if
+	/// this flag is specified.
+	///
+	/// Calling this method has no effect if the timer source has already been canceled.
+	///
+	/// - parameter deadline: the time at which the timer event will be delivered, subject to the
+	///     leeway and other considerations described above. The deadline is based on Mach absolute
+	///     time.
+	/// - parameter repeating: the repeat interval for the timer in seconds, or `.infinity` if the timer
+	///		should fire only once.
+	/// - parameter leeway: the leeway for the timer.
+	///
+	@available(swift, introduced: 4)
+	public func schedule(deadline: DispatchTime, repeating interval: Double, leeway: DispatchTimeInterval = .nanoseconds(0)) {
+		dispatch_source_set_timer((self as! DispatchSource).__wrapped, deadline.rawValue, interval.isInfinite ? ~0 : UInt64(interval * Double(NSEC_PER_SEC)), UInt64(leeway.rawValue))
+	}
+
+	///
+	/// Sets the deadline, repeat interval and leeway for a timer event.
+	///
+	/// Once this function returns, any pending source data accumulated for the previous timer values
+	/// has been cleared. The next timer event will occur at `wallDeadline` and every `repeating` units of
+	/// time thereafter until the timer source is canceled. If the value of `repeating` is `.never`,
+	/// or is defaulted, the timer fires only once.
+	///
+	/// Delivery of a timer event may be delayed by the system in order to improve power consumption and
+	/// system performance. The upper limit to the allowable delay may be configured with the `leeway`
+	/// argument; the lower limit is under the control of the system.
+	///
+	/// For the initial timer fire at `wallDeadline`, the upper limit to the allowable delay is set to
+	/// `leeway`. For the subsequent timer fires at `wallDeadline + N * repeating`, the upper
+	/// limit is the smaller of `leeway` and `repeating/2`.
+	///
+	/// The lower limit to the allowable delay may vary with process state such as visibility of the
+	/// application UI. If the timer source was created with flags `TimerFlags.strict`, the system
+	/// will make a best effort to strictly observe the provided `leeway` value, even if it is smaller
+	/// than the current lower limit. Note that a minimal amount of delay is to be expected even if
+	/// this flag is specified.
+	///
+	/// Calling this method has no effect if the timer source has already been canceled.
+	///
+	/// - parameter wallDeadline: the time at which the timer event will be delivered, subject to the
+	///     leeway and other considerations described above. The deadline is based on
+	///     `gettimeofday(3)`.
+	/// - parameter repeating: the repeat interval for the timer, or `.never` if the timer should fire
+	///		only once.
+	/// - parameter leeway: the leeway for the timer.
+	///
+	@available(swift, introduced: 4)
+	public func schedule(wallDeadline: DispatchWallTime, repeating interval: DispatchTimeInterval = .never, leeway: DispatchTimeInterval = .nanoseconds(0)) {
+		dispatch_source_set_timer((self as! DispatchSource).__wrapped, wallDeadline.rawValue, interval == .never ? ~0 : UInt64(interval.rawValue), UInt64(leeway.rawValue))
+	}
+
+	///
+	/// Sets the deadline, repeat interval and leeway for a timer event that fires at least once.
+	///
+	/// Once this function returns, any pending source data accumulated for the previous timer values
+	/// has been cleared. The next timer event will occur at `wallDeadline` and every `repeating` seconds
+	/// thereafter until the timer source is canceled. If the value of `repeating` is `.infinity`,
+	/// the timer fires only once.
+	///
+	/// Delivery of a timer event may be delayed by the system in order to improve power consumption
+	/// and system performance. The upper limit to the allowable delay may be configured with the `leeway`
+	/// argument; the lower limit is under the control of the system.
+	///
+	/// For the initial timer fire at `wallDeadline`, the upper limit to the allowable delay is set to
+	/// `leeway`. For the subsequent timer fires at `wallDeadline + N * repeating`, the upper
+	/// limit is the smaller of `leeway` and `repeating/2`.
+	///
+	/// The lower limit to the allowable delay may vary with process state such as visibility of the
+	/// application UI. If the timer source was created with flags `TimerFlags.strict`, the system
+	/// will make a best effort to strictly observe the provided `leeway` value, even if it is smaller
+	/// than the current lower limit. Note that a minimal amount of delay is to be expected even if
+	/// this flag is specified.
+	///
+	/// Calling this method has no effect if the timer source has already been canceled.
+	///
+	/// - parameter wallDeadline: the time at which the timer event will be delivered, subject to the
+	///     leeway and other considerations described above. The deadline is based on
+	///     `gettimeofday(3)`.
+	/// - parameter repeating: the repeat interval for the timer in seconds, or `.infinity` if the timer
+	///		should fire only once.
+	/// - parameter leeway: the leeway for the timer.
+	///
+	@available(swift, introduced: 4)
+	public func schedule(wallDeadline: DispatchWallTime, repeating interval: Double, leeway: DispatchTimeInterval = .nanoseconds(0)) {
+		dispatch_source_set_timer((self as! DispatchSource).__wrapped, wallDeadline.rawValue, interval.isInfinite ? ~0 : UInt64(interval * Double(NSEC_PER_SEC)), UInt64(leeway.rawValue))
 	}
 }
 
diff --git a/src/swift/Time.swift b/src/swift/Time.swift
index 8178ffd..d7d49c9 100644
--- a/src/swift/Time.swift
+++ b/src/swift/Time.swift
@@ -124,6 +124,8 @@
 	case milliseconds(Int)
 	case microseconds(Int)
 	case nanoseconds(Int)
+	@_downgrade_exhaustivity_check
+	case never
 
 	internal var rawValue: Int64 {
 		switch self {
@@ -131,6 +133,16 @@
 		case .milliseconds(let ms): return Int64(ms) * Int64(NSEC_PER_MSEC)
 		case .microseconds(let us): return Int64(us) * Int64(NSEC_PER_USEC)
 		case .nanoseconds(let ns): return Int64(ns)
+		case .never: return Int64.max
+		}
+	}
+
+	public static func ==(lhs: DispatchTimeInterval, rhs: DispatchTimeInterval) -> Bool {
+		switch (lhs, rhs) {
+		case (.never, .never): return true
+		case (.never, _): return false
+		case (_, .never): return false
+		default: return lhs.rawValue == rhs.rawValue
 		}
 	}
 }
diff --git a/src/trace.h b/src/trace.h
index 872cd6f..c670f60 100644
--- a/src/trace.h
+++ b/src/trace.h
@@ -29,14 +29,6 @@
 
 #if DISPATCH_PURE_C
 
-#if DISPATCH_USE_DTRACE || DISPATCH_USE_DTRACE_INTROSPECTION
-typedef struct dispatch_trace_timer_params_s {
-	int64_t deadline, interval, leeway;
-} *dispatch_trace_timer_params_t;
-
-#include "provider.h"
-#endif // DISPATCH_USE_DTRACE || DISPATCH_USE_DTRACE_INTROSPECTION
-
 #if DISPATCH_USE_DTRACE_INTROSPECTION
 #define _dispatch_trace_callout(_c, _f, _dcc) do { \
 		if (slowpath(DISPATCH_CALLOUT_ENTRY_ENABLED()) || \
diff --git a/src/voucher.c b/src/voucher.c
index e4128a2..458e2f0 100644
--- a/src/voucher.c
+++ b/src/voucher.c
@@ -85,6 +85,7 @@
 	if (extra) {
 		memcpy(_voucher_extra_recipes(voucher), recipe->vr_data, extra);
 	}
+	_voucher_trace(CREATE, voucher, MACH_PORT_NULL, 0);
 	return voucher;
 }
 #endif
@@ -585,6 +586,7 @@
 		}
 	}
 
+	_voucher_trace(CREATE, v, v->v_kvoucher, v->v_activity);
 	_voucher_insert(v);
 	_dispatch_voucher_debug("kvoucher[0x%08x] create", v, kv);
 	return v;
@@ -619,6 +621,7 @@
 				"voucher[%p]", v, kv, ov);
 		_dispatch_voucher_debug_machport(kv);
 	}
+	_voucher_trace(CREATE, v, v->v_kvoucher, v->v_activity);
 	return v;
 }
 
@@ -676,6 +679,7 @@
 		_dispatch_voucher_debug("kvoucher[0x%08x] create without importance "
 				"from voucher[%p]", v, kv, ov);
 	}
+	_voucher_trace(CREATE, v, v->v_kvoucher, v->v_activity);
 	return v;
 }
 
@@ -711,6 +715,7 @@
 		v->v_kvbase = _voucher_retain(ov);
 		_voucher_dealloc_mach_voucher(kv); // borrow base reference
 	}
+	_voucher_trace(CREATE, v, kv, v->v_activity);
 	_voucher_insert(v);
 	_dispatch_voucher_debug("kvoucher[0x%08x] create accounting voucher "
 			"from voucher[%p]", v, kv, ov);
@@ -774,6 +779,7 @@
 void
 _voucher_dispose(voucher_t voucher)
 {
+	_voucher_trace(DISPOSE, voucher);
 	_dispatch_voucher_debug("dispose", voucher);
 	if (slowpath(_voucher_hash_is_enqueued(voucher))) {
 		_dispatch_voucher_debug("corruption", voucher);
@@ -1237,6 +1243,7 @@
 	}
 done:
 	*trace_id = ftid.ftid_value;
+	_voucher_trace(CREATE, v, v->v_kvoucher, va_id);
 	return v;
 }
 
diff --git a/src/voucher_internal.h b/src/voucher_internal.h
index a0ddd4d..772c8c4 100644
--- a/src/voucher_internal.h
+++ b/src/voucher_internal.h
@@ -262,6 +262,16 @@
 #define _dispatch_voucher_debug_machport(name) ((void)(name))
 #endif
 
+#if DISPATCH_USE_DTRACE
+#define _voucher_trace(how, ...)  ({ \
+		if (unlikely(VOUCHER_##how##_ENABLED())) { \
+			VOUCHER_##how(__VA_ARGS__); \
+		} \
+	})
+#else
+#define _voucher_trace(how, ...) ((void)0)
+#endif
+
 #ifndef DISPATCH_VOUCHER_OBJC_DEBUG
 #if DISPATCH_INTROSPECTION || DISPATCH_DEBUG
 #define DISPATCH_VOUCHER_OBJC_DEBUG 1
@@ -270,36 +280,29 @@
 #endif
 #endif // DISPATCH_VOUCHER_OBJC_DEBUG
 
-#if DISPATCH_PURE_C
-
 DISPATCH_ALWAYS_INLINE
-static inline voucher_t
-_voucher_retain(voucher_t voucher)
+static inline struct voucher_s *
+_voucher_retain_inline(struct voucher_s *voucher)
 {
-#if !DISPATCH_VOUCHER_OBJC_DEBUG
 	// not using _os_object_refcnt* because we don't need barriers:
 	// vouchers are immutable and are in a hash table with a lock
 	int xref_cnt = os_atomic_inc2o(voucher, os_obj_xref_cnt, relaxed);
+	_voucher_trace(RETAIN, (voucher_t)voucher, xref_cnt + 1);
 	_dispatch_voucher_debug("retain  -> %d", voucher, xref_cnt + 1);
 	if (unlikely(xref_cnt <= 0)) {
 		_OS_OBJECT_CLIENT_CRASH("Voucher resurrection");
 	}
-#else
-	os_retain(voucher);
-	_dispatch_voucher_debug("retain  -> %d", voucher,
-			voucher->os_obj_xref_cnt + 1);
-#endif // DISPATCH_DEBUG
 	return voucher;
 }
 
 DISPATCH_ALWAYS_INLINE
 static inline void
-_voucher_release(voucher_t voucher)
+_voucher_release_inline(struct voucher_s *voucher)
 {
-#if !DISPATCH_VOUCHER_OBJC_DEBUG
 	// not using _os_object_refcnt* because we don't need barriers:
 	// vouchers are immutable and are in a hash table with a lock
 	int xref_cnt = os_atomic_dec2o(voucher, os_obj_xref_cnt, relaxed);
+	_voucher_trace(RELEASE, (voucher_t)voucher, xref_cnt + 1);
 	_dispatch_voucher_debug("release -> %d", voucher, xref_cnt + 1);
 	if (likely(xref_cnt >= 0)) {
 		return;
@@ -308,10 +311,31 @@
 		_OS_OBJECT_CLIENT_CRASH("Voucher over-release");
 	}
 	return _os_object_xref_dispose((_os_object_t)voucher);
+}
+
+#if DISPATCH_PURE_C
+
+DISPATCH_ALWAYS_INLINE
+static inline voucher_t
+_voucher_retain(voucher_t voucher)
+{
+#if DISPATCH_VOUCHER_OBJC_DEBUG
+	os_retain(voucher);
 #else
-	_dispatch_voucher_debug("release -> %d", voucher, voucher->os_obj_xref_cnt);
-	return os_release(voucher);
-#endif // DISPATCH_DEBUG
+	_voucher_retain_inline(voucher);
+#endif // DISPATCH_VOUCHER_OBJC_DEBUG
+	return voucher;
+}
+
+DISPATCH_ALWAYS_INLINE
+static inline void
+_voucher_release(voucher_t voucher)
+{
+#if DISPATCH_VOUCHER_OBJC_DEBUG
+	os_release(voucher);
+#else
+	_voucher_release_inline(voucher);
+#endif // DISPATCH_VOUCHER_OBJC_DEBUG
 }
 
 DISPATCH_ALWAYS_INLINE
@@ -322,13 +346,13 @@
 	// not using _os_object_refcnt* because we don't need barriers:
 	// vouchers are immutable and are in a hash table with a lock
 	int xref_cnt = os_atomic_dec2o(voucher, os_obj_xref_cnt, relaxed);
+	_voucher_trace(RELEASE, voucher, xref_cnt + 1);
 	_dispatch_voucher_debug("release -> %d", voucher, xref_cnt + 1);
 	if (likely(xref_cnt >= 0)) {
 		return;
 	}
 	_OS_OBJECT_CLIENT_CRASH("Voucher over-release");
 #else
-	_dispatch_voucher_debug("release -> %d", voucher, voucher->os_obj_xref_cnt);
 	return os_release(voucher);
 #endif // DISPATCH_DEBUG
 }
@@ -371,8 +395,10 @@
 _voucher_swap_and_get_mach_voucher(voucher_t ov, voucher_t voucher)
 {
 	if (ov == voucher) return VOUCHER_NO_MACH_VOUCHER;
-	_dispatch_voucher_debug("swap from voucher[%p]", voucher, ov);
+	if (ov) _voucher_trace(ORPHAN, ov);
 	_dispatch_thread_setspecific(dispatch_voucher_key, voucher);
+	if (voucher) _voucher_trace(ADOPT, voucher);
+	_dispatch_voucher_debug("swap from voucher[%p]", voucher, ov);
 	mach_voucher_t kv = voucher ? voucher->v_kvoucher : MACH_VOUCHER_NULL;
 	mach_voucher_t okv = ov ? ov->v_kvoucher : MACH_VOUCHER_NULL;
 #if OS_VOUCHER_ACTIVITY_GENERATE_SWAPS
diff --git a/tools/voucher_trace.d b/tools/voucher_trace.d
new file mode 100755
index 0000000..890198e
--- /dev/null
+++ b/tools/voucher_trace.d
@@ -0,0 +1,78 @@
+#!/usr/sbin/dtrace -s
+
+/*
+ * Copyright (c) 2017 Apple Inc. All rights reserved.
+ *
+ * @APPLE_APACHE_LICENSE_HEADER_START@
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * @APPLE_APACHE_LICENSE_HEADER_END@
+ */
+
+/*
+ * Usage: voucher_trace.d -p [pid]
+ *        traced process must have been executed with
+ *        DYLD_LIBRARY_PATH=/usr/lib/system/introspection or with
+ *        DYLD_IMAGE_SUFFIX=_profile or DYLD_IMAGE_SUFFIX=_debug
+ */
+
+#pragma D option quiet
+#pragma D option zdefs
+#pragma D option bufsize=16m
+
+BEGIN {
+	printf("Starting to trace voucher operations...\n");
+}
+
+voucher$target:libdispatch*.dylib::create
+{
+	printf("ALLOC   voucher 0x%p, thread %#llx, ref 1, port %#x, aid %#llx", arg0, tid, arg1, arg2);
+	ustack(10);
+	printf("\n")
+}
+
+voucher$target:libdispatch*.dylib::dispose
+{
+	printf("FREE    voucher 0x%p, thread %#llx, ref 0", arg0, tid);
+	ustack(10);
+	printf("\n")
+}
+
+voucher$target:libdispatch*.dylib::retain
+{
+	printf("RETAIN  voucher 0x%p, thread %#llx, ref %d", arg0, tid, arg1);
+	ustack(10);
+	printf("\n")
+}
+
+voucher$target:libdispatch*.dylib::release
+{
+	printf("RELEASE voucher 0x%p, thread %#llx, ref %d", arg0, tid, arg1);
+	ustack(10);
+	printf("\n")
+}
+
+voucher$target:libdispatch*.dylib::adopt
+{
+	printf("ADOPT   voucher 0x%p, thread %#llx", arg0, tid);
+	ustack(10);
+	printf("\n")
+}
+
+voucher$target:libdispatch*.dylib::orphan
+{
+	printf("ORPHAN  voucher 0x%p, thread %#llx", arg0, tid);
+	ustack(10);
+	printf("\n")
+}
diff --git a/xcodeconfig/libdispatch.order b/xcodeconfig/libdispatch.order
index a25ecc9..9642ca4 100644
--- a/xcodeconfig/libdispatch.order
+++ b/xcodeconfig/libdispatch.order
@@ -71,6 +71,18 @@
 _OBJC_METACLASS_$_OS_dispatch_queue_serial
 _OBJC_METACLASS_$_OS_dispatch_queue_concurrent
 _OBJC_METACLASS_$_OS_dispatch_queue_root
+_OBJC_METACLASS_$_OS_dispatch_queue_main
+_OBJC_METACLASS_$_OS_dispatch_queue_runloop
+_OBJC_METACLASS_$_OS_dispatch_queue_mgr
+_OBJC_METACLASS_$_OS_dispatch_queue_specific_queue
+_OBJC_METACLASS_$_OS_dispatch_queue_attr
+_OBJC_METACLASS_$_OS_dispatch_source
+_OBJC_METACLASS_$_OS_dispatch_mach
+_OBJC_METACLASS_$_OS_dispatch_mach_msg
+_OBJC_METACLASS_$_OS_dispatch_io
+_OBJC_METACLASS_$_OS_dispatch_operation
+_OBJC_METACLASS_$_OS_dispatch_disk
+_OBJC_METACLASS_$_OS_object
 _OBJC_METACLASS_$_OS_voucher
 #_OBJC_METACLASS_$_OS_voucher_recipe
 _OBJC_METACLASS_$_OS_dispatch_data