Merge pull request #460 from triplef/fix-cmake-private-install

Added missing private headers to cmake install.
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 412c340..74b7849 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -77,6 +77,8 @@
   set(INSTALL_OS_HEADERS_DIR "include/os" CACHE PATH "Path where the headers will be installed")
 endif()
 
+option(DISPATCH_ENABLE_ASSERTS "enable debug assertions" FALSE)
+
 option(ENABLE_DTRACE "enable dtrace support" "")
 
 option(ENABLE_TESTING "build libdispatch tests" ON)
diff --git a/private/private.h b/private/private.h
index 594bd20..b87f5dc 100644
--- a/private/private.h
+++ b/private/private.h
@@ -177,13 +177,13 @@
 
 #if TARGET_OS_MAC
 #define DISPATCH_COCOA_COMPAT 1
-#elif defined(__linux__) || defined(__FreeBSD__)
+#elif defined(__linux__) || defined(__FreeBSD__) || defined(_WIN32)
 #define DISPATCH_COCOA_COMPAT 1
 #else
 #define DISPATCH_COCOA_COMPAT 0
 #endif
 
-#if DISPATCH_COCOA_COMPAT || defined(_WIN32)
+#if DISPATCH_COCOA_COMPAT
 
 #define DISPATCH_CF_SPI_VERSION 20160712
 
@@ -197,12 +197,10 @@
 #error "runloop support not implemented on this platform"
 #endif
 
-#if TARGET_OS_MAC
 API_AVAILABLE(macos(10.6), ios(4.0))
 DISPATCH_EXPORT DISPATCH_CONST DISPATCH_WARN_RESULT DISPATCH_NOTHROW
 dispatch_runloop_handle_t
 _dispatch_get_main_queue_port_4CF(void);
-#endif
 
 API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0))
 DISPATCH_EXPORT DISPATCH_NOTHROW
@@ -221,12 +219,12 @@
 _dispatch_runloop_root_queue_create_4CF(const char *_Nullable label,
 		unsigned long flags);
 
-#if TARGET_OS_MAC || defined(_WIN32)
 API_AVAILABLE(macos(10.9), ios(7.0))
 DISPATCH_EXPORT DISPATCH_WARN_RESULT DISPATCH_NOTHROW
 dispatch_runloop_handle_t
 _dispatch_runloop_root_queue_get_port_4CF(dispatch_queue_t queue);
 
+#if TARGET_OS_MAC
 API_AVAILABLE(macos(10.13.2), ios(11.2), tvos(11.2), watchos(4.2))
 DISPATCH_EXPORT DISPATCH_WARN_RESULT DISPATCH_NOTHROW
 bool
@@ -257,7 +255,7 @@
 DISPATCH_EXPORT
 void (*_Nullable _dispatch_end_NSAutoReleasePool)(void *);
 
-#endif /* DISPATCH_COCOA_COMPAT || defined(_WIN32) */
+#endif /* DISPATCH_COCOA_COMPAT */
 
 API_AVAILABLE(macos(10.13), ios(11.0), tvos(11.0), watchos(4.0))
 DISPATCH_EXPORT DISPATCH_NOTHROW
diff --git a/src/allocator_internal.h b/src/allocator_internal.h
index 5f8c2f0..9409048 100644
--- a/src/allocator_internal.h
+++ b/src/allocator_internal.h
@@ -97,7 +97,7 @@
 // Use the largest type your platform is comfortable doing atomic ops with.
 // TODO: rdar://11477843
 typedef unsigned long bitmap_t;
-#if defined(__LP64__)
+#if DISPATCH_SIZEOF_PTR == 8
 #define BYTES_PER_BITMAP 8
 #else
 #define BYTES_PER_BITMAP 4
@@ -147,7 +147,7 @@
 
 #define PADDING_TO_CONTINUATION_SIZE(x) (ROUND_UP_TO_CONTINUATION_SIZE(x) - (x))
 
-#if defined(__LP64__)
+#if DISPATCH_SIZEOF_PTR == 8
 #define SIZEOF_HEADER 16
 #else
 #define SIZEOF_HEADER 8
diff --git a/src/benchmark.c b/src/benchmark.c
index b475043..15e9f55 100644
--- a/src/benchmark.c
+++ b/src/benchmark.c
@@ -41,7 +41,7 @@
 	register size_t cnt = bdata->count;
 	size_t i = 0;
 	uint64_t start, delta;
-#if defined(__LP64__)
+#if DISPATCH_SIZEOF_PTR == 8 && !defined(_WIN32)
 	__uint128_t lcost;
 #else
 	long double lcost;
@@ -93,7 +93,7 @@
 	};
 	static dispatch_once_t pred;
 	uint64_t ns, start, delta;
-#if defined(__LP64__)
+#if DISPATCH_SIZEOF_PTR == 8 && !defined(_WIN32)
 	__uint128_t conversion, big_denom;
 #else
 	long double conversion, big_denom;
diff --git a/src/event/event_windows.c b/src/event/event_windows.c
index 2fe9680..1e3fae7 100644
--- a/src/event/event_windows.c
+++ b/src/event/event_windows.c
@@ -21,26 +21,31 @@
 #include "internal.h"
 #if DISPATCH_EVENT_BACKEND_WINDOWS
 
+static HANDLE hPort = NULL;
+enum _dispatch_windows_port {
+	DISPATCH_PORT_POKE = 0,
+	DISPATCH_PORT_TIMER_CLOCK_WALL,
+	DISPATCH_PORT_TIMER_CLOCK_UPTIME,
+	DISPATCH_PORT_TIMER_CLOCK_MONOTONIC,
+};
+
 #pragma mark dispatch_unote_t
 
 bool
-_dispatch_unote_register(dispatch_unote_t du DISPATCH_UNUSED,
-		dispatch_wlh_t wlh DISPATCH_UNUSED,
-		dispatch_priority_t pri DISPATCH_UNUSED)
+_dispatch_unote_register_muxed(dispatch_unote_t du DISPATCH_UNUSED)
 {
 	WIN_PORT_ERROR();
 	return false;
 }
 
 void
-_dispatch_unote_resume(dispatch_unote_t du DISPATCH_UNUSED)
+_dispatch_unote_resume_muxed(dispatch_unote_t du DISPATCH_UNUSED)
 {
 	WIN_PORT_ERROR();
 }
 
 bool
-_dispatch_unote_unregister(dispatch_unote_t du DISPATCH_UNUSED,
-		uint32_t flags DISPATCH_UNUSED)
+_dispatch_unote_unregister_muxed(dispatch_unote_t du DISPATCH_UNUSED)
 {
 	WIN_PORT_ERROR();
 	return false;
@@ -48,32 +53,191 @@
 
 #pragma mark timers
 
-void
-_dispatch_event_loop_timer_arm(uint32_t tidx DISPATCH_UNUSED,
-		dispatch_timer_delay_s range DISPATCH_UNUSED,
-		dispatch_clock_now_cache_t nows DISPATCH_UNUSED)
+typedef struct _dispatch_windows_timeout_s {
+	PTP_TIMER pTimer;
+	enum _dispatch_windows_port ullIdent;
+	bool bArmed;
+} *dispatch_windows_timeout_t;
+
+#define DISPATCH_WINDOWS_TIMEOUT_INITIALIZER(clock)                             \
+	[DISPATCH_CLOCK_##clock] = {                                            \
+		.pTimer = NULL,                                                 \
+		.ullIdent = DISPATCH_PORT_TIMER_CLOCK_##clock,                  \
+		.bArmed = FALSE,                                                \
+	}
+
+static struct _dispatch_windows_timeout_s _dispatch_windows_timeout[] = {
+	DISPATCH_WINDOWS_TIMEOUT_INITIALIZER(WALL),
+	DISPATCH_WINDOWS_TIMEOUT_INITIALIZER(UPTIME),
+	DISPATCH_WINDOWS_TIMEOUT_INITIALIZER(MONOTONIC),
+};
+
+static void
+_dispatch_event_merge_timer(dispatch_clock_t clock)
 {
-	WIN_PORT_ERROR();
+	uint32_t tidx = DISPATCH_TIMER_INDEX(clock, 0);
+
+	_dispatch_windows_timeout[clock].bArmed = FALSE;
+
+	_dispatch_timers_heap_dirty(_dispatch_timers_heap, tidx);
+	_dispatch_timers_heap[tidx].dth_needs_program = true;
+	_dispatch_timers_heap[tidx].dth_armed = false;
+}
+
+static void CALLBACK
+_dispatch_timer_callback(PTP_CALLBACK_INSTANCE Instance, PVOID Context,
+	PTP_TIMER Timer)
+{
+	BOOL bSuccess;
+
+	bSuccess = PostQueuedCompletionStatus(hPort, 0, (ULONG_PTR)Context,
+		NULL);
+	if (bSuccess == FALSE) {
+		DISPATCH_INTERNAL_CRASH(GetLastError(),
+			"PostQueuedCompletionStatus");
+	}
 }
 
 void
-_dispatch_event_loop_timer_delete(uint32_t tidx DISPATCH_UNUSED)
+_dispatch_event_loop_timer_arm(dispatch_timer_heap_t dth DISPATCH_UNUSED,
+		uint32_t tidx, dispatch_timer_delay_s range,
+		dispatch_clock_now_cache_t nows)
 {
-	WIN_PORT_ERROR();
+	dispatch_windows_timeout_t timer;
+	FILETIME ftDueTime;
+	LARGE_INTEGER liTime;
+
+	switch (DISPATCH_TIMER_CLOCK(tidx)) {
+	case DISPATCH_CLOCK_WALL:
+		timer = &_dispatch_windows_timeout[DISPATCH_CLOCK_WALL];
+		liTime.QuadPart = range.delay +
+			_dispatch_time_now_cached(DISPATCH_TIMER_CLOCK(tidx), nows);
+		break;
+
+	case DISPATCH_CLOCK_UPTIME:
+	case DISPATCH_CLOCK_MONOTONIC:
+		timer = &_dispatch_windows_timeout[DISPATCH_TIMER_CLOCK(tidx)];
+		liTime.QuadPart = -((range.delay + 99) / 100);
+		break;
+	}
+
+	if (timer->pTimer == NULL) {
+		timer->pTimer = CreateThreadpoolTimer(_dispatch_timer_callback,
+			(LPVOID)timer->ullIdent, NULL);
+		if (timer->pTimer == NULL) {
+			DISPATCH_INTERNAL_CRASH(GetLastError(),
+				"CreateThreadpoolTimer");
+		}
+	}
+
+	ftDueTime.dwHighDateTime = liTime.HighPart;
+	ftDueTime.dwLowDateTime = liTime.LowPart;
+
+	SetThreadpoolTimer(timer->pTimer, &ftDueTime, /*msPeriod=*/0,
+		/*msWindowLength=*/0);
+	timer->bArmed = TRUE;
+}
+
+void
+_dispatch_event_loop_timer_delete(dispatch_timer_heap_t dth DISPATCH_UNUSED,
+		uint32_t tidx)
+{
+	dispatch_windows_timeout_t timer;
+
+	switch (DISPATCH_TIMER_CLOCK(tidx)) {
+	case DISPATCH_CLOCK_WALL:
+		timer = &_dispatch_windows_timeout[DISPATCH_CLOCK_WALL];
+		break;
+
+	case DISPATCH_CLOCK_UPTIME:
+	case DISPATCH_CLOCK_MONOTONIC:
+		timer = &_dispatch_windows_timeout[DISPATCH_TIMER_CLOCK(tidx)];
+		break;
+	}
+
+	SetThreadpoolTimer(timer->pTimer, NULL, /*msPeriod=*/0,
+		/*msWindowLength=*/0);
+	timer->bArmed = FALSE;
 }
 
 #pragma mark dispatch_loop
 
+static void
+_dispatch_windows_port_init(void *context DISPATCH_UNUSED)
+{
+	hPort = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 1);
+	if (hPort == NULL) {
+		DISPATCH_INTERNAL_CRASH(GetLastError(),
+			"CreateIoCompletionPort");
+	}
+
+#if DISPATCH_USE_MGR_THREAD
+	_dispatch_trace_item_push(_dispatch_mgr_q.do_targetq, &_dispatch_mgr_q);
+	dx_push(_dispatch_mgr_q.do_targetq, &_dispatch_mgr_q, 0);
+#endif
+}
+
 void
 _dispatch_event_loop_poke(dispatch_wlh_t wlh DISPATCH_UNUSED,
 		uint64_t dq_state DISPATCH_UNUSED, uint32_t flags DISPATCH_UNUSED)
 {
-	WIN_PORT_ERROR();
+	static dispatch_once_t _dispatch_windows_port_init_pred;
+	BOOL bSuccess;
+
+	dispatch_once_f(&_dispatch_windows_port_init_pred, NULL,
+		_dispatch_windows_port_init);
+	bSuccess = PostQueuedCompletionStatus(hPort, 0, DISPATCH_PORT_POKE,
+		NULL);
+	(void)dispatch_assume(bSuccess);
 }
 
 DISPATCH_NOINLINE
 void
-_dispatch_event_loop_drain(uint32_t flags DISPATCH_UNUSED)
+_dispatch_event_loop_drain(uint32_t flags)
+{
+	DWORD dwNumberOfBytesTransferred;
+	ULONG_PTR ulCompletionKey;
+	LPOVERLAPPED pOV;
+	BOOL bSuccess;
+
+	pOV = (LPOVERLAPPED)&pOV;
+	bSuccess = GetQueuedCompletionStatus(hPort, &dwNumberOfBytesTransferred,
+		&ulCompletionKey, &pOV,
+		(flags & KEVENT_FLAG_IMMEDIATE) ? 0 : INFINITE);
+	while (bSuccess) {
+		switch (ulCompletionKey) {
+		case DISPATCH_PORT_POKE:
+			break;
+
+		case DISPATCH_PORT_TIMER_CLOCK_WALL:
+			_dispatch_event_merge_timer(DISPATCH_CLOCK_WALL);
+			break;
+
+		case DISPATCH_PORT_TIMER_CLOCK_UPTIME:
+			_dispatch_event_merge_timer(DISPATCH_CLOCK_UPTIME);
+			break;
+
+		case DISPATCH_PORT_TIMER_CLOCK_MONOTONIC:
+			_dispatch_event_merge_timer(DISPATCH_CLOCK_MONOTONIC);
+			break;
+
+		default:
+			DISPATCH_INTERNAL_CRASH(ulCompletionKey,
+				"unsupported completion key");
+		}
+
+		bSuccess = GetQueuedCompletionStatus(hPort,
+			&dwNumberOfBytesTransferred, &ulCompletionKey, &pOV, 0);
+	}
+
+	if (bSuccess == FALSE && pOV != NULL) {
+		DISPATCH_INTERNAL_CRASH(GetLastError(),
+			"GetQueuedCompletionStatus");
+	}
+}
+
+void
+_dispatch_event_loop_cancel_waiter(dispatch_sync_context_t dsc DISPATCH_UNUSED)
 {
 	WIN_PORT_ERROR();
 }
@@ -109,9 +273,9 @@
 #endif
 
 void
-_dispatch_event_loop_leave_immediate(dispatch_wlh_t wlh, uint64_t dq_state)
+_dispatch_event_loop_leave_immediate(uint64_t dq_state)
 {
-	(void)wlh; (void)dq_state;
+	(void)dq_state;
 }
 
 #endif // DISPATCH_EVENT_BACKEND_WINDOWS
diff --git a/src/event/workqueue.c b/src/event/workqueue.c
index dc020f3..28f1675 100644
--- a/src/event/workqueue.c
+++ b/src/event/workqueue.c
@@ -97,7 +97,6 @@
 	_dispatch_unfair_lock_unlock(&mon->registered_tid_lock);
 #else
 	(void)root_q;
-	(void)cls;
 #endif // HAVE_DISPATCH_WORKQ_MONITORING
 }
 
@@ -124,7 +123,6 @@
 	_dispatch_unfair_lock_unlock(&mon->registered_tid_lock);
 #else
 	(void)root_q;
-	(void)cls;
 #endif // HAVE_DISPATCH_WORKQ_MONITORING
 }
 
diff --git a/src/inline_internal.h b/src/inline_internal.h
index d40a15b..67ecfc9 100644
--- a/src/inline_internal.h
+++ b/src/inline_internal.h
@@ -1294,7 +1294,7 @@
 	if (unlikely(!_dq_state_is_base_wlh(old_state) ||
 			!_dq_state_is_enqueued_on_target(old_state) ||
 			_dq_state_is_enqueued_on_manager(old_state))) {
-#if !__LP64__
+#if DISPATCH_SIZEOF_PTR == 4
 		old_state >>= 32;
 #endif
 		DISPATCH_INTERNAL_CRASH(old_state, "Invalid wlh state");
diff --git a/src/internal.h b/src/internal.h
index c5eff33..29e451e 100644
--- a/src/internal.h
+++ b/src/internal.h
@@ -306,9 +306,6 @@
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdio.h>
-#if defined(_WIN32)
-#define _CRT_RAND_S
-#endif
 #include <stdlib.h>
 #include <string.h>
 #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
diff --git a/src/io.c b/src/io.c
index 874932e..42f1424 100644
--- a/src/io.c
+++ b/src/io.c
@@ -152,7 +152,7 @@
 #endif // DISPATCH_IO_DEBUG
 
 #define _dispatch_fd_debug(msg, fd, ...) \
-		_dispatch_io_log("fd[0x%x]: " msg, fd, ##__VA_ARGS__)
+		_dispatch_io_log("fd[0x%" PRIx64 "]: " msg, fd, ##__VA_ARGS__)
 #define _dispatch_op_debug(msg, op, ...) \
 		_dispatch_io_log("op[%p]: " msg, op, ##__VA_ARGS__)
 #define _dispatch_channel_debug(msg, channel, ...) \
@@ -1385,7 +1385,7 @@
 	// On fds lock queue
 	dispatch_fd_entry_t fd_entry = _dispatch_fd_entry_create(
 			_dispatch_io_fds_lockq);
-	_dispatch_fd_entry_debug("create: fd %d", fd_entry, fd);
+	_dispatch_fd_entry_debug("create: fd %" PRId64, fd_entry, fd);
 	fd_entry->fd = fd;
 	LIST_INSERT_HEAD(&_dispatch_io_fds[hash], fd_entry, fd_list);
 	fd_entry->barrier_queue = dispatch_queue_create(
@@ -1399,11 +1399,11 @@
 			int result = ioctlsocket((SOCKET)fd, (long)FIONBIO, &value);
 			(void)dispatch_assume_zero(result);
 			_dispatch_stream_init(fd_entry,
-				_dispatch_get_root_queue(DISPATCH_QOS_DEFAULT, false));
+				_dispatch_get_default_queue(false));
 		} else {
 			dispatch_suspend(fd_entry->barrier_queue);
-			dispatch_once_f(&_dispatch_io_devs_lockq_pred, NULL,
-					_dispatch_io_devs_lockq_init);
+			dispatch_once_f(&_dispatch_io_init_pred, NULL,
+					_dispatch_io_queues_init);
 			dispatch_async(_dispatch_io_devs_lockq, ^{
 				_dispatch_disk_init(fd_entry, 0);
 				dispatch_resume(fd_entry->barrier_queue);
diff --git a/src/queue.c b/src/queue.c
index 83aa8ec..6332c6b 100644
--- a/src/queue.c
+++ b/src/queue.c
@@ -571,7 +571,7 @@
 	return (bool)(dbpd->dbpd_atomic_flags & DBF_CANCELED);
 }
 
-long
+intptr_t
 dispatch_block_wait(dispatch_block_t db, dispatch_time_t timeout)
 {
 	dispatch_block_private_data_t dbpd = _dispatch_block_get_data(db);
@@ -2536,7 +2536,7 @@
 {
 	uint64_t dq_state = os_atomic_load2o(dq._dq, dq_state, relaxed);
 	if (likely(dq_state & DISPATCH_QUEUE_INACTIVE)) return;
-#ifndef __LP64__
+#if DISPATCH_SIZEOF_PTR == 4
 	dq_state >>= 32;
 #endif
 	DISPATCH_CLIENT_CRASH((uintptr_t)dq_state,
@@ -2851,7 +2851,7 @@
 			DISPATCH_CLIENT_CRASH((uintptr_t)dq_state,
 					"Release of a locked queue");
 		}
-#ifndef __LP64__
+#if DISPATCH_SIZEOF_PTR == 4
 		dq_state >>= 32;
 #endif
 		DISPATCH_CLIENT_CRASH((uintptr_t)dq_state,
@@ -4055,6 +4055,7 @@
 static void
 _dispatch_workloop_activate_attributes(dispatch_workloop_t dwl)
 {
+#if defined(_POSIX_THREADS)
 	dispatch_workloop_attr_t dwla = dwl->dwl_attr;
 	pthread_attr_t attr;
 
@@ -4082,7 +4083,7 @@
 				(unsigned long)dwla->dwla_cpupercent.refillms);
 	}
 #endif // HAVE_PTHREAD_ATTR_SETCPUPERCENT_NP
- #if TARGET_OS_MAC
+#if TARGET_OS_MAC
 	if (_dispatch_workloop_has_kernel_attributes(dwl)) {
 		int rv = _pthread_workloop_create((uint64_t)dwl, 0, &attr);
 		switch (rv) {
@@ -4099,6 +4100,7 @@
 	}
 #endif // TARGET_OS_MAC
 	pthread_attr_destroy(&attr);
+#endif // defined(_POSIX_THREADS)
 }
 
 void
@@ -4114,7 +4116,7 @@
 			DISPATCH_CLIENT_CRASH((uintptr_t)dq_state,
 					"Release of a locked workloop");
 		}
-#ifndef __LP64__
+#if DISPATCH_SIZEOF_PTR == 4
 		dq_state >>= 32;
 #endif
 		DISPATCH_CLIENT_CRASH((uintptr_t)dq_state,
@@ -4505,7 +4507,7 @@
 	});
 
 	if (unlikely(_dq_state_is_suspended(old_state))) {
-#ifndef __LP64__
+#if DISPATCH_SIZEOF_PTR == 4
 		old_state >>= 32;
 #endif
 		DISPATCH_CLIENT_CRASH(old_state, "Waking up an inactive workloop");
@@ -5679,7 +5681,9 @@
 _dispatch_root_queue_poke_slow(dispatch_queue_global_t dq, int n, int floor)
 {
 	int remaining = n;
+#if !defined(_WIN32)
 	int r = ENOSYS;
+#endif
 
 	_dispatch_root_queues_init();
 	_dispatch_debug_root_queue(dq, __func__);
@@ -5777,9 +5781,11 @@
 			}
 			_dispatch_temporary_resource_shortage();
 		}
+#if DISPATCH_USE_PTHREAD_ROOT_QUEUES
 		if (_dispatch_mgr_sched.prio > _dispatch_mgr_sched.default_prio) {
 			(void)dispatch_assume_zero(SetThreadPriority((HANDLE)hThread, _dispatch_mgr_sched.prio) == TRUE);
 		}
+#endif
 		CloseHandle((HANDLE)hThread);
 	} while (--remaining);
 #endif // defined(_WIN32)
@@ -6468,6 +6474,8 @@
 	return MACH_PORT_VALID(handle);
 #elif defined(__linux__)
 	return handle >= 0;
+#elif defined(_WIN32)
+	return handle != NULL;
 #else
 #error "runloop support not implemented on this platform"
 #endif
@@ -6482,6 +6490,8 @@
 #elif defined(__linux__)
 	// decode: 0 is a valid fd, so offset by 1 to distinguish from NULL
 	return ((dispatch_runloop_handle_t)(uintptr_t)dq->do_ctxt) - 1;
+#elif defined(_WIN32)
+	return ((dispatch_runloop_handle_t)(uintptr_t)dq->do_ctxt);
 #else
 #error "runloop support not implemented on this platform"
 #endif
@@ -6497,6 +6507,8 @@
 #elif defined(__linux__)
 	// encode: 0 is a valid fd, so offset by 1 to distinguish from NULL
 	dq->do_ctxt = (void *)(uintptr_t)(handle + 1);
+#elif defined(_WIN32)
+	dq->do_ctxt = (void *)(uintptr_t)handle;
 #else
 #error "runloop support not implemented on this platform"
 #endif
@@ -6551,6 +6563,14 @@
 		}
 	}
 	handle = fd;
+#elif defined(_WIN32)
+	HANDLE hEvent;
+	hEvent = CreateEventW(NULL, /*bManualReset=*/TRUE,
+		/*bInitialState=*/FALSE, NULL);
+	if (hEvent == NULL) {
+		DISPATCH_INTERNAL_CRASH(GetLastError(), "CreateEventW");
+	}
+	handle = hEvent;
 #else
 #error "runloop support not implemented on this platform"
 #endif
@@ -6577,6 +6597,10 @@
 #elif defined(__linux__)
 	int rc = close(handle);
 	(void)dispatch_assume_zero(rc);
+#elif defined(_WIN32)
+	BOOL bSuccess;
+	bSuccess = CloseHandle(handle);
+	(void)dispatch_assume(bSuccess);
 #else
 #error "runloop support not implemented on this platform"
 #endif
@@ -6609,6 +6633,10 @@
 		result = eventfd_write(handle, 1);
 	} while (result == -1 && errno == EINTR);
 	(void)dispatch_assume_zero(result);
+#elif defined(_WIN32)
+	BOOL bSuccess;
+	bSuccess = SetEvent(handle);
+	(void)dispatch_assume(bSuccess);
 #else
 #error "runloop support not implemented on this platform"
 #endif
@@ -6917,13 +6945,11 @@
 	return _dispatch_runloop_queue_get_handle(dq->_as_dl);
 }
 
-#if TARGET_OS_MAC
 dispatch_runloop_handle_t
 _dispatch_get_main_queue_port_4CF(void)
 {
 	return _dispatch_get_main_queue_handle_4CF();
 }
-#endif
 
 void
 _dispatch_main_queue_callback_4CF(
@@ -6990,7 +7016,9 @@
 {
 	// never returns, so burn bridges behind us
 	_dispatch_clear_stack(0);
-#if !defined(_WIN32)
+#if defined(_WIN32)
+	Sleep(INFINITE);
+#else
 	_dispatch_sigsuspend();
 #endif
 }
diff --git a/src/queue_internal.h b/src/queue_internal.h
index 9ec4fec..ce235f4 100644
--- a/src/queue_internal.h
+++ b/src/queue_internal.h
@@ -954,7 +954,7 @@
 // If dc_flags is less than 0x1000, then the object is a continuation.
 // Otherwise, the object has a private layout and memory management rules. The
 // layout until after 'do_next' must align with normal objects.
-#if __LP64__
+#if DISPATCH_SIZEOF_PTR == 8
 #define DISPATCH_CONTINUATION_HEADER(x) \
 	union { \
 		const void *do_vtable; \
diff --git a/src/semaphore.c b/src/semaphore.c
index 73acbe8..610f728 100644
--- a/src/semaphore.c
+++ b/src/semaphore.c
@@ -76,7 +76,7 @@
 			dsema->dsema_sema);
 #endif
 	offset += dsnprintf(&buf[offset], bufsiz - offset,
-			"value = %" PRId64 ", orig = %" PRId64 " }", dsema->dsema_value, dsema->dsema_orig);
+			"value = %ld, orig = %" PRIdPTR " }", dsema->dsema_value, dsema->dsema_orig);
 	return offset;
 }
 
diff --git a/src/semaphore_internal.h b/src/semaphore_internal.h
index 0f209cd..b9b6c7b 100644
--- a/src/semaphore_internal.h
+++ b/src/semaphore_internal.h
@@ -32,7 +32,7 @@
 DISPATCH_CLASS_DECL(semaphore, OBJECT);
 struct dispatch_semaphore_s {
 	DISPATCH_OBJECT_HEADER(semaphore);
-	long volatile dsema_value;
+	intptr_t volatile dsema_value;
 	intptr_t dsema_orig;
 	_dispatch_sema4_t dsema_sema;
 };
diff --git a/src/shims/generic_sys_queue.h b/src/shims/generic_sys_queue.h
index 1d9a18d..c6c6587 100644
--- a/src/shims/generic_sys_queue.h
+++ b/src/shims/generic_sys_queue.h
@@ -89,4 +89,46 @@
 		} \
 	} while(0)
 
+#define TAILQ_HEAD_INITIALIZER(head) \
+	{ NULL, (head).tq_first }
+
+#define TAILQ_CONCAT(head1, head2, field) do { \
+		if (!TAILQ_EMPTY(head2)) { \
+			(head1)->tq_last = (head2)->tq_first; \
+			(head1)->tq_first->field.te_prev = (head1)->tq_last; \
+			(head1)->tq_last = (head2)->tq_last; \
+			TAILQ_INIT((head2)); \
+		} \
+	} while (0)
+
+#define LIST_HEAD(name, type) struct name { \
+		struct type *lh_first; \
+	}
+
+#define LIST_ENTRY(type) struct { \
+		struct type *le_next; \
+		struct type *le_prev; \
+	}
+
+#define LIST_FIRST(head) ((head)->lh_first)
+
+#define LIST_FOREACH(var, head, field) \
+	for ((var) = LIST_FIRST((head)); \
+		(var); \
+		(var) = LIST_NEXT((var), field))
+
+#define LIST_NEXT(elm, field) ((elm)->field.le_next)
+
+#define LIST_REMOVE(elm, field) do { \
+		if (LIST_NEXT((elm), field) != NULL) \
+			LIST_NEXT((elm), field)->field.le_prev = (elm)->field.le_prev; \
+	} while (0)
+
+#define LIST_INSERT_HEAD(head, elm, field) do { \
+		if ((LIST_NEXT((elm), field) = LIST_FIRST((head))) != NULL) \
+			LIST_FIRST((head))->field.le_prev = LIST_NEXT((elm), field); \
+		LIST_FIRST((head)) = (elm); \
+		(elm)->field.le_prev = LIST_FIRST((head)); \
+	} while (0)
+
 #endif // __DISPATCH_SHIMS_SYS_QUEUE__
diff --git a/src/shims/generic_win_stubs.h b/src/shims/generic_win_stubs.h
index c983cdc..1ce41f7 100644
--- a/src/shims/generic_win_stubs.h
+++ b/src/shims/generic_win_stubs.h
@@ -34,4 +34,6 @@
 #define WIN_PORT_ERROR() \
 		_RPTF1(_CRT_ASSERT, "WIN_PORT_ERROR in %s", __FUNCTION__)
 
+#define strcasecmp _stricmp
+
 #endif
diff --git a/src/shims/lock.c b/src/shims/lock.c
index 31e9605..1f3a38b 100644
--- a/src/shims/lock.c
+++ b/src/shims/lock.c
@@ -260,8 +260,7 @@
 	if (ms) timeEndPeriod(ms);
 }
 
-void
-_dispatch_sema4_create_slow(_dispatch_sema4_t *s4, int policy DISPATCH_UNUSED)
+void _dispatch_sema4_init(_dispatch_sema4_t *sema, int policy DISPATCH_UNUSED)
 {
 	HANDLE tmp;
 
@@ -271,7 +270,7 @@
 		_dispatch_temporary_resource_shortage();
 	}
 
-	if (!os_atomic_cmpxchg(s4, 0, tmp, relaxed)) {
+	if (!os_atomic_cmpxchg(sema, 0, tmp, relaxed)) {
 		CloseHandle(tmp);
 	}
 }
@@ -308,7 +307,7 @@
 	nsec = _dispatch_timeout(timeout);
 	msec = (DWORD)(nsec / (uint64_t)1000000);
 	resolution = _push_timer_resolution(msec);
-	wait_result = WaitForSingleObject(sema, msec);
+	wait_result = WaitForSingleObject(*sema, msec);
 	_pop_timer_resolution(resolution);
 	return wait_result == WAIT_TIMEOUT;
 }
@@ -509,7 +508,7 @@
 	}
 	return _dispatch_futex_wait(address, value, NULL, FUTEX_PRIVATE_FLAG);
 #elif defined(_WIN32)
-	WaitOnAddress(address, (PVOID)(uintptr_t)value, sizeof(value), INFINITE);
+	return WaitOnAddress(address, &value, sizeof(value), INFINITE) == TRUE;
 #else
 #error _dispatch_wait_on_address unimplemented for this platform
 #endif
@@ -652,7 +651,9 @@
 {
 	dispatch_lock self = _dispatch_lock_value_for_self();
 	uintptr_t old_v, new_v;
+#if HAVE_UL_UNFAIR_LOCK || HAVE_FUTEX
 	dispatch_lock *lock = &dgo->dgo_gate.dgl_lock;
+#endif
 	uint32_t timeout = 1;
 
 	for (;;) {
@@ -681,7 +682,7 @@
 		_dispatch_futex_wait(lock, (dispatch_lock)new_v, NULL,
 				FUTEX_PRIVATE_FLAG);
 #else
-		_dispatch_thread_switch(new_v, flags, timeout++);
+		_dispatch_thread_switch(new_v, 0, timeout++);
 #endif
 		(void)timeout;
 	}
diff --git a/src/shims/lock.h b/src/shims/lock.h
index 0fd956f..ca450d5 100644
--- a/src/shims/lock.h
+++ b/src/shims/lock.h
@@ -213,9 +213,9 @@
 #define _DSEMA4_POLICY_LIFO 0
 #define _DSEMA4_TIMEOUT() ((errno) = ETIMEDOUT, -1)
 
-#define _dispatch_sema4_init(sema, policy) (void)(*(sema) = 0)
-#define _dispatch_sema4_is_created(sema)   (*(sema) != 0)
-void _dispatch_sema4_create_slow(_dispatch_sema4_t *sema, int policy);
+void _dispatch_sema4_init(_dispatch_sema4_t *sema, int policy);
+#define _dispatch_sema4_is_created(sema)   ((void)sema, 1)
+#define _dispatch_sema4_create_slow(sema, policy) ((void)sema, (void)policy)
 
 #else
 #error "port has to implement _dispatch_sema4_t"
diff --git a/src/shims/time.h b/src/shims/time.h
index 063d523..8fae5a2 100644
--- a/src/shims/time.h
+++ b/src/shims/time.h
@@ -108,13 +108,14 @@
 	dispatch_assume_zero(clock_gettime(CLOCK_REALTIME, &ts));
 	return _dispatch_timespec_to_nano(ts);
 #elif defined(_WIN32)
+	static const uint64_t kNTToUNIXBiasAdjustment = 11644473600 * NSEC_PER_SEC;
 	// FILETIME is 100-nanosecond intervals since January 1, 1601 (UTC).
 	FILETIME ft;
 	ULARGE_INTEGER li;
-	GetSystemTimeAsFileTime(&ft);
+	GetSystemTimePreciseAsFileTime(&ft);
 	li.LowPart = ft.dwLowDateTime;
 	li.HighPart = ft.dwHighDateTime;
-	return li.QuadPart * 100ull;
+	return li.QuadPart * 100ull - kNTToUNIXBiasAdjustment;
 #else
 	struct timeval tv;
 	dispatch_assert_zero(gettimeofday(&tv, NULL));
@@ -148,9 +149,10 @@
 	struct timespec ts;
 	dispatch_assume_zero(clock_gettime(CLOCK_UPTIME, &ts));
 	return _dispatch_timespec_to_nano(ts);
-#elif TARGET_OS_WIN32
-	LARGE_INTEGER now;
-	return QueryPerformanceCounter(&now) ? now.QuadPart : 0;
+#elif defined(_WIN32)
+	ULONGLONG ullUnbiasedTime;
+	QueryUnbiasedInterruptTime(&ullUnbiasedTime);
+	return ullUnbiasedTime * 100;
 #else
 #error platform needs to implement _dispatch_uptime()
 #endif
diff --git a/src/shims/tsd.h b/src/shims/tsd.h
index f44d7c8..446c4d7 100644
--- a/src/shims/tsd.h
+++ b/src/shims/tsd.h
@@ -112,7 +112,8 @@
 static inline void
 _dispatch_thread_key_create(DWORD *k, void (DISPATCH_TSD_DTOR_CC *d)(void *))
 {
-	dispatch_assert_zero((*k = FlsAlloc(d)));
+	*k = FlsAlloc(d);
+	dispatch_assert(*k != FLS_OUT_OF_INDEXES);
 }
 
 extern DWORD __dispatch_tsd_key;
diff --git a/src/shims/yield.h b/src/shims/yield.h
index fc1b9fc..7e599cb 100644
--- a/src/shims/yield.h
+++ b/src/shims/yield.h
@@ -99,10 +99,15 @@
 		((DISPATCH_CONTENTION_SPINS_MIN) + ((DISPATCH_CONTENTION_SPINS_MAX) - \
 		(DISPATCH_CONTENTION_SPINS_MIN)) / 2)
 #elif defined(_WIN32)
-#define _dispatch_contention_spins() ({                                        \
-		unsigned int _value;                                           \
-		rand_s(&_value);                                               \
-		(_value & DISPATCH_CONTENTION_SPINS_MAX) | DISPATCH_CONTENTION_SPINS_MIN; })
+// Use randomness to prevent threads from resonating at the same frequency and
+// permanently contending. Windows doesn't provide rand_r(), so use a simple
+// LCG. (msvcrt has rand_s(), but its security guarantees aren't optimal here.)
+#define _dispatch_contention_spins() ({ \
+		static os_atomic(unsigned int) _seed = 1; \
+		unsigned int _next = os_atomic_load(&_seed, relaxed); \
+		os_atomic_store(&_seed, _next * 1103515245 + 12345, relaxed); \
+		((_next >> 24) & (DISPATCH_CONTENTION_SPINS_MAX)) | \
+				(DISPATCH_CONTENTION_SPINS_MIN); })
 #else
 // Use randomness to prevent threads from resonating at the same
 // frequency and permanently contending.
@@ -149,8 +154,8 @@
 #define _dispatch_preemption_yield(n) { (void)n; pthread_yield_np(); }
 #define _dispatch_preemption_yield_to(th, n) { (void)n; pthread_yield_np(); }
 #elif defined(_WIN32)
-#define _dispatch_preemption_yield(n) { (void)n; sched_yield(); }
-#define _dispatch_preemption_yield_to(th, n) { (void)n; sched_yield(); }
+#define _dispatch_preemption_yield(n) { (void)n; Sleep(0); }
+#define _dispatch_preemption_yield_to(th, n) { (void)n; Sleep(0); }
 #else 
 #define _dispatch_preemption_yield(n) { (void)n; pthread_yield(); }
 #define _dispatch_preemption_yield_to(th, n) { (void)n; pthread_yield(); }
@@ -160,8 +165,12 @@
 #pragma mark _dispatch_contention_usleep
 
 #ifndef DISPATCH_CONTENTION_USLEEP_START
+#if defined(_WIN32)
+#define DISPATCH_CONTENTION_USLEEP_START 1000   // Must be >= 1ms for Sleep()
+#else
 #define DISPATCH_CONTENTION_USLEEP_START 500
 #endif
+#endif
 #ifndef DISPATCH_CONTENTION_USLEEP_MAX
 #define DISPATCH_CONTENTION_USLEEP_MAX 100000
 #endif
@@ -176,20 +185,7 @@
 #endif
 #else
 #if defined(_WIN32)
-DISPATCH_INLINE void
-_dispatch_contention_usleep(uint64_t useconds) {
-	static BOOL bQPFExecuted = FALSE;
-	static LARGE_INTEGER liFreq;
-	LARGE_INTEGER liStart, liNow;
-
-	if (!bQPFExecuted)
-		bQPFExecuted = QueryPerformanceFrequency(&liFreq);
-
-	QueryPerformanceCounter(&liStart);
-	do {
-		QueryPerformanceCounter(&liNow);
-	} while ((liNow.QuadPart - liStart.QuadPart) / (float)liFreq.QuadPart * 1000 * 1000 < useconds);
-}
+#define _dispatch_contention_usleep(u) Sleep((u) / 1000)
 #else
 #define _dispatch_contention_usleep(u) usleep((u))
 #endif
diff --git a/src/source.c b/src/source.c
index 9335484..1010da1 100644
--- a/src/source.c
+++ b/src/source.c
@@ -197,7 +197,7 @@
 }
 
 void
-dispatch_source_merge_data(dispatch_source_t ds, unsigned long val)
+dispatch_source_merge_data(dispatch_source_t ds, uintptr_t val)
 {
 	dispatch_queue_flags_t dqf = _dispatch_queue_atomic_flags(ds);
 	dispatch_source_refs_t dr = ds->ds_refs;
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 3a4684f..f530f04 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -192,8 +192,10 @@
 add_unit_test(dispatch_plusplus SOURCES dispatch_plusplus.cpp)
 
 # test-specific link options
-target_link_libraries(dispatch_group PRIVATE m)
-target_link_libraries(dispatch_timer_short PRIVATE m)
+if(NOT WIN32)
+  target_link_libraries(dispatch_group PRIVATE m)
+  target_link_libraries(dispatch_timer_short PRIVATE m)
+endif()
 
 # test-specific compile options
 set_target_properties(dispatch_c99 PROPERTIES C_STANDARD 99)
diff --git a/tests/dispatch_drift.c b/tests/dispatch_drift.c
index e483f36..0381cab 100644
--- a/tests/dispatch_drift.c
+++ b/tests/dispatch_drift.c
@@ -22,8 +22,8 @@
 #include <mach/mach_time.h>
 #endif
 #include <dispatch/dispatch.h>
-#include <sys/time.h>
 #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
+#include <sys/time.h>
 #include <unistd.h>
 #endif
 #include <stdio.h>
@@ -46,8 +46,13 @@
 	__block uint32_t count = 0;
 	__block double last_jitter = 0;
 	__block double drift_sum = 0;
+#if defined(_WIN32)
+	// 25 times a second (Windows timer resolution is poor)
+	uint64_t interval = 1000000000 / 25;
+#else
 	// 100 times a second
 	uint64_t interval = 1000000000 / 100;
+#endif
 	double interval_d = interval / 1000000000.0;
 	// for 25 seconds
 	unsigned int target = (unsigned int)(25.0 / interval_d);
diff --git a/tests/dispatch_timer_bit31.c b/tests/dispatch_timer_bit31.c
index eed17ae..a70c4f6 100644
--- a/tests/dispatch_timer_bit31.c
+++ b/tests/dispatch_timer_bit31.c
@@ -21,7 +21,9 @@
 #include <assert.h>
 #include <stdio.h>
 #include <string.h>
+#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
 #include <sys/time.h>
+#endif
 
 #include <dispatch/dispatch.h>
 
diff --git a/tests/dispatch_timer_bit63.c b/tests/dispatch_timer_bit63.c
index 84868ca..f01ca51 100644
--- a/tests/dispatch_timer_bit63.c
+++ b/tests/dispatch_timer_bit63.c
@@ -21,7 +21,9 @@
 #include <assert.h>
 #include <stdio.h>
 #include <string.h>
+#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
 #include <sys/time.h>
+#endif
 
 #include <dispatch/dispatch.h>
 
diff --git a/tests/dispatch_timer_set_time.c b/tests/dispatch_timer_set_time.c
index 5ffd63e..6f30b0c 100644
--- a/tests/dispatch_timer_set_time.c
+++ b/tests/dispatch_timer_set_time.c
@@ -18,11 +18,12 @@
  * @APPLE_APACHE_LICENSE_HEADER_END@
  */
 
-#include <sys/time.h>
 #include <assert.h>
 #include <stdio.h>
 #include <string.h>
+#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
 #include <sys/time.h>
+#endif
 
 #include <dispatch/dispatch.h>
 
diff --git a/tests/dispatch_timer_timeout.c b/tests/dispatch_timer_timeout.c
index f43409e..109bbff 100644
--- a/tests/dispatch_timer_timeout.c
+++ b/tests/dispatch_timer_timeout.c
@@ -21,7 +21,9 @@
 #include <assert.h>
 #include <stdio.h>
 #include <string.h>
+#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
 #include <sys/time.h>
+#endif
 
 #include <dispatch/dispatch.h>
 
diff --git a/tests/generic_win_port.c b/tests/generic_win_port.c
index d9a52f4..f84f9f9 100644
--- a/tests/generic_win_port.c
+++ b/tests/generic_win_port.c
@@ -183,18 +183,50 @@
 	return 0;
 }
 
+typedef void (WINAPI *QueryUnbiasedInterruptTimePreciseT)(PULONGLONG);
+static QueryUnbiasedInterruptTimePreciseT QueryUnbiasedInterruptTimePrecisePtr;
+
+static BOOL
+mach_absolute_time_init(PINIT_ONCE InitOnce, PVOID Parameter, PVOID *lpContext)
+{
+	// QueryUnbiasedInterruptTimePrecise() is declared in the Windows headers
+	// but it isn't available in any import libraries. We must manually load it
+	// from KernelBase.dll.
+	HMODULE kernelbase = LoadLibraryW(L"KernelBase.dll");
+	if (!kernelbase) {
+		print_winapi_error("LoadLibraryW", GetLastError());
+		abort();
+	}
+	QueryUnbiasedInterruptTimePrecisePtr =
+			(QueryUnbiasedInterruptTimePreciseT)GetProcAddress(kernelbase,
+					"QueryUnbiasedInterruptTimePrecise");
+	if (!QueryUnbiasedInterruptTimePrecisePtr) {
+		fprintf(stderr, "QueryUnbiasedInterruptTimePrecise is not available\n");
+		abort();
+	}
+	return TRUE;
+}
+
+uint64_t
+mach_absolute_time(void)
+{
+	static INIT_ONCE init_once = INIT_ONCE_STATIC_INIT;
+	if (!InitOnceExecuteOnce(&init_once, mach_absolute_time_init, NULL, NULL)) {
+		print_winapi_error("InitOnceExecuteOnce", GetLastError());
+		abort();
+	}
+	ULONGLONG result = 0;
+	QueryUnbiasedInterruptTimePrecisePtr(&result);
+	return result * 100;  // Convert from 100ns units
+}
+
 void
 print_winapi_error(const char *function_name, DWORD error)
 {
 	char *message = NULL;
 	DWORD len = FormatMessageA(
-			FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM,
-			NULL,
-			error,
-			0,
-			(LPSTR)&message,
-			0,
-			NULL);
+			FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM, NULL,
+			error, 0, (LPSTR)&message, 0, NULL);
 	if (len > 0) {
 		// Note: FormatMessage includes a newline at the end of the message
 		fprintf(stderr, "%s: %s", function_name, message);
@@ -214,10 +246,6 @@
 int
 usleep(unsigned int usec)
 {
-	DWORD ms = usec / 1000;
-	if (ms == 0 && usec != 0) {
-		ms = 1;
-	}
-	Sleep(ms);
+	Sleep((usec + 999) / 1000);
 	return 0;
 }
diff --git a/tests/generic_win_port.h b/tests/generic_win_port.h
index cf96a21..41c076c 100644
--- a/tests/generic_win_port.h
+++ b/tests/generic_win_port.h
@@ -12,6 +12,14 @@
 typedef long ssize_t;
 #endif
 
+struct mach_timebase_info {
+	uint32_t numer;
+	uint32_t denom;
+};
+
+typedef struct mach_timebase_info *mach_timebase_info_t;
+typedef struct mach_timebase_info mach_timebase_info_data_t;
+
 static inline int32_t
 OSAtomicIncrement32(volatile int32_t *var)
 {
@@ -45,6 +53,18 @@
 int
 gettimeofday(struct timeval *tp, void *tzp);
 
+uint64_t
+mach_absolute_time(void);
+
+static inline
+int
+mach_timebase_info(mach_timebase_info_t tbi)
+{
+	tbi->numer = 1;
+	tbi->denom = 1;
+	return 0;
+}
+
 void
 print_winapi_error(const char *function_name, DWORD error);