revive the Windows port

The `DISPATCH_EXPORT` macro added an extraneous `extern` on the
declaration.  This was pointed out by a newer clang.  Adjust the
declaration accordingly.

Add build rules to improve Windows builds.  These additions help
cross-compile to Windows with both clang-cl as well as the GNU clang
driver.

Use the newer Windows APIs to detect the windows CPU state.  This allows
us to collect all of the CPU configuration information to properly
affinitise work to the preferred CPU.

Use the FLS API to create thread local data storage for libdispatch's
queues.
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ead4696..b0adcb5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -8,6 +8,10 @@
         LANGUAGES C CXX)
 enable_testing()
 
+if("${CMAKE_C_SIMULATE_ID}" STREQUAL "MSVC")
+  include(ClangClCompileRules)
+endif()
+
 set(CMAKE_C_STANDARD 11)
 set(CMAKE_C_STANDARD_REQUIRED YES)
 
@@ -262,10 +266,6 @@
 endif()
 check_symbol_exists(__printflike "bsd/sys/cdefs.h" HAVE_PRINTFLIKE)
 
-if(CMAKE_SYSTEM_NAME STREQUAL Windows)
-  add_definitions(-DTARGET_OS_WIN32)
-endif()
-
 if(CMAKE_SYSTEM_NAME STREQUAL Android)
   set(ENABLE_DTRACE_DEFAULT OFF)
 endif()
@@ -321,6 +321,13 @@
                "${CMAKE_BINARY_DIR}/config/config_ac.h")
 add_definitions(-DHAVE_CONFIG_H)
 
+if(CMAKE_SYSTEM_NAME STREQUAL Windows)
+  include(DispatchWindowsSupport)
+  dispatch_windows_arch_spelling(${CMAKE_SYSTEM_PROCESSOR} DISPATCH_MSVC_ARCH)
+  dispatch_windows_include_for_arch(${DISPATCH_MSVC_ARCH} DISPATCH_INCLUDES)
+  include_directories(BEFORE SYSTEM ${DISPATCH_INCLUDES})
+endif()
+
 add_subdirectory(dispatch)
 add_subdirectory(man)
 add_subdirectory(os)
diff --git a/cmake/modules/ClangClCompileRules.cmake b/cmake/modules/ClangClCompileRules.cmake
new file mode 100644
index 0000000..9c2a4cd
--- /dev/null
+++ b/cmake/modules/ClangClCompileRules.cmake
@@ -0,0 +1,8 @@
+
+# clang-cl interprets paths starting with /U as macro undefines, so we need to
+# put a -- before the input file path to force it to be treated as a path.
+string(REPLACE "-c <SOURCE>" "-c -- <SOURCE>" CMAKE_C_COMPILE_OBJECT "${CMAKE_C_COMPILE_OBJECT}")
+string(REPLACE "-c <SOURCE>" "-c -- <SOURCE>" CMAKE_CXX_COMPILE_OBJECT "${CMAKE_CXX_COMPILE_OBJECT}")
+
+set(CMAKE_C_LINK_EXECUTABLE "<CMAKE_C_COMPILER> <FLAGS> <CMAKE_C_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
+
diff --git a/cmake/modules/DispatchWindowsSupport.cmake b/cmake/modules/DispatchWindowsSupport.cmake
new file mode 100644
index 0000000..eed28a2
--- /dev/null
+++ b/cmake/modules/DispatchWindowsSupport.cmake
@@ -0,0 +1,74 @@
+
+function(dispatch_windows_arch_spelling arch var)
+  if(${arch} STREQUAL i686)
+    set(${var} x86 PARENT_SCOPE)
+  elseif(${arch} STREQUAL x86_64)
+    set(${var} x64 PARENT_SCOPE)
+  elseif(${arch} STREQUAL armv7)
+    set(${var} arm PARENT_SCOPE)
+  elseif(${arch} STREQUAL aarch64)
+    set(${var} arm64 PARENT_SCOPE)
+  else()
+    message(FATAL_ERROR "do not know MSVC spelling for ARCH: `${arch}`")
+  endif()
+endfunction()
+
+function(dispatch_verify_windows_environment_variables)
+  set(VCToolsInstallDir $ENV{VCToolsInstallDir})
+  set(UniversalCRTSdkDir $ENV{UniversalCRTSdkDir})
+  set(UCRTVersion $ENV{UCRTVersion})
+
+  if("${VCToolsInstallDir}" STREQUAL "")
+    message(SEND_ERROR "VCToolsInstallDir environment variable must be set")
+  endif()
+  if("${UniversalCRTSdkDir}" STREQUAL "")
+    message(SEND_ERROR "UniversalCRTSdkDir environment variable must be set")
+  endif()
+  if("${UCRTVersion}" STREQUAL "")
+    message(SEND_ERROR "UCRTVersion environment variable must be set")
+  endif()
+endfunction()
+
+function(dispatch_windows_include_for_arch arch var)
+  dispatch_verify_windows_environment_variables()
+
+  set(paths
+        "$ENV{VCToolsInstallDir}/include"
+        "$ENV{UniversalCRTSdkDir}/Include/$ENV{UCRTVersion}/ucrt"
+        "$ENV{UniversalCRTSdkDir}/Include/$ENV{UCRTVersion}/shared"
+        "$ENV{UniversalCRTSdkDir}/Include/$ENV{UCRTVersion}/um")
+  set(${var} ${paths} PARENT_SCOPE)
+endfunction()
+
+function(dispatch_windows_lib_for_arch arch var)
+  dispatch_verify_windows_environment_variables()
+  dispatch_windows_arch_spelling(${arch} ARCH)
+
+  set(paths)
+  if(${ARCH} STREQUAL x86)
+    list(APPEND paths "$ENV{VCToolsInstallDir}/Lib")
+  else()
+    list(APPEND paths "$ENV{VCToolsInstallDir}/Lib/${ARCH}")
+  endif()
+  list(APPEND paths
+          "$ENV{UniversalCRTSdkDir}/Lib/$ENV{UCRTVersion}/ucrt/${ARCH}"
+          "$ENV{UniversalCRTSdkDir}/Lib/$ENV{UCRTVersion}/um/${ARCH}")
+  set(${var} ${paths} PARENT_SCOPE)
+endfunction()
+
+function(dispatch_windows_generate_sdk_vfs_overlay flags)
+  dispatch_verify_windows_environment_variables()
+
+  get_filename_component(VCToolsInstallDir $ENV{VCToolsInstallDir} ABSOLUTE)
+  get_filename_component(UniversalCRTSdkDir $ENV{UniversalCRTSdkDir} ABSOLUTE)
+  set(UCRTVersion $ENV{UCRTVersion})
+
+  # TODO(compnerd) use a target to avoid re-creating this file all the time
+  configure_file("${CMAKE_SOURCE_DIR}/utils/WindowsSDKVFSOverlay.yaml.in"
+                 "${CMAKE_BINARY_DIR}/windows-sdk-vfs-overlay.yaml"
+                 @ONLY)
+
+  set(${flags}
+      -ivfsoverlay;"${CMAKE_BINARY_DIR}/windows-sdk-vfs-overlay.yaml"
+      PARENT_SCOPE)
+endfunction()
diff --git a/dispatch/base.h b/dispatch/base.h
index 0c5f8c9..77fe03f 100644
--- a/dispatch/base.h
+++ b/dispatch/base.h
@@ -128,15 +128,20 @@
 #endif
 #endif
 
-#if TARGET_OS_WIN32 && defined(__DISPATCH_BUILDING_DISPATCH__) && \
-		defined(__cplusplus)
-#define DISPATCH_EXPORT extern "C" extern __declspec(dllexport)
-#elif TARGET_OS_WIN32 && defined(__DISPATCH_BUILDING_DISPATCH__)
+#if defined(_WIN32)
+#if defined(__DISPATCH_BUILDING_DISPATCH__)
+#if defined(__cplusplus)
+#define DISPATCH_EXPORT extern "C" __declspec(dllexport)
+#else
 #define DISPATCH_EXPORT extern __declspec(dllexport)
-#elif TARGET_OS_WIN32 && defined(__cplusplus)
-#define DISPATCH_EXPORT extern "C" extern __declspec(dllimport)
-#elif TARGET_OS_WIN32
+#endif
+#else
+#if defined(__cplusplus)
+#define DISPATCH_EXPORT extern "C" __declspec(dllimport)
+#else
 #define DISPATCH_EXPORT extern __declspec(dllimport)
+#endif
+#endif
 #elif __GNUC__
 #define DISPATCH_EXPORT extern __attribute__((visibility("default")))
 #else
diff --git a/dispatch/data.h b/dispatch/data.h
index 33a0c9d..8250669 100644
--- a/dispatch/data.h
+++ b/dispatch/data.h
@@ -62,16 +62,11 @@
 #define DISPATCH_DATA_DESTRUCTOR_DEFAULT NULL
 
 #ifdef __BLOCKS__
-#if !TARGET_OS_WIN32
 /*! @parseOnly */
 #define DISPATCH_DATA_DESTRUCTOR_TYPE_DECL(name) \
 	DISPATCH_EXPORT const dispatch_block_t _dispatch_data_destructor_##name
 #else
 #define DISPATCH_DATA_DESTRUCTOR_TYPE_DECL(name) \
-	DISPATCH_EXPORT dispatch_block_t _dispatch_data_destructor_##name
-#endif
-#else
-#define DISPATCH_DATA_DESTRUCTOR_TYPE_DECL(name) \
 	DISPATCH_EXPORT const dispatch_function_t \
 	_dispatch_data_destructor_##name
 #endif /* __BLOCKS__ */
diff --git a/dispatch/dispatch.h b/dispatch/dispatch.h
index d36259c..1901226 100644
--- a/dispatch/dispatch.h
+++ b/dispatch/dispatch.h
@@ -26,7 +26,9 @@
 #include <os/availability.h>
 #include <TargetConditionals.h>
 #include <os/base.h>
-#elif defined(__linux__) || defined(__FreeBSD__)
+#elif defined(_WIN32)
+#include <os/generic_win_base.h>
+#elif defined(__unix__)
 #include <os/generic_unix_base.h>
 #endif
 
diff --git a/dispatch/io.h b/dispatch/io.h
index a9e6892..db9733d 100644
--- a/dispatch/io.h
+++ b/dispatch/io.h
@@ -50,7 +50,11 @@
  * @typedef dispatch_fd_t
  * Native file descriptor type for the platform.
  */
+#if defined(_WIN32)
+typedef intptr_t dispatch_fd_t;
+#else
 typedef int dispatch_fd_t;
+#endif
 
 /*!
  * @functiongroup Dispatch I/O Convenience API
diff --git a/dispatch/object.h b/dispatch/object.h
index 3ff36c2..3a31b10 100644
--- a/dispatch/object.h
+++ b/dispatch/object.h
@@ -121,7 +121,7 @@
 #ifndef DISPATCH_DATA_DECL
 #define DISPATCH_DATA_DECL(name) OS_OBJECT_DECL_SWIFT(name)
 #endif // DISPATCH_DATA_DECL
-#elif !TARGET_OS_WIN32
+#else
 /*! @parseOnly */
 #define DISPATCH_SOURCE_DECL(name) \
 		DISPATCH_DECL(name);
@@ -131,12 +131,6 @@
 #define DISPATCH_SOURCE_TYPE_DECL(name) \
 		DISPATCH_EXPORT const struct dispatch_source_type_s \
 		_dispatch_source_type_##name
-#else
-#define DISPATCH_SOURCE_DECL(name) \
-		DISPATCH_DECL(name);
-#define DISPATCH_SOURCE_TYPE_DECL(name) \
-		DISPATCH_EXPORT struct dispatch_source_type_s _dispatch_source_type_##name
-#define DISPATCH_DATA_DECL(name) DISPATCH_DECL(name)
 #endif
 
 #ifdef __BLOCKS__
diff --git a/dispatch/source.h b/dispatch/source.h
index 6992d42..35bb03f 100644
--- a/dispatch/source.h
+++ b/dispatch/source.h
@@ -31,7 +31,7 @@
 #include <mach/message.h>
 #endif
 
-#if !TARGET_OS_WIN32
+#if !defined(_WIN32)
 #include <sys/signal.h>
 #endif
 
diff --git a/os/generic_win_base.h b/os/generic_win_base.h
new file mode 100644
index 0000000..8b4c613
--- /dev/null
+++ b/os/generic_win_base.h
@@ -0,0 +1,132 @@
+/*
+ * This source file is part of the Swift.org open source project
+ *
+ * Copyright (c) 2015 Apple Inc. and the Swift project authors
+ *
+ * Licensed under Apache License v2.0 with Runtime Library Exception
+ *
+ * See http://swift.org/LICENSE.txt for license information
+ * See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+ *
+ */
+
+#ifndef __OS_GENERIC_WIN_BASE__
+#define __OS_GENERIC_WIN_BASE__
+
+// Unices provide `roundup` via sys/param.h
+#define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
+// Unices provide `MAX` via sys/param.h
+#define MAX(a,b) (((a)>(b))?(a):(b))
+// Unices provide `MIN` via sys/param.h
+#define MIN(a,b) (((a)<(b))?(a):(b))
+// Unices provide `howmany` via sys/param.h
+#define howmany(x, y)  (((x) + ((y) - 1)) / (y))
+
+typedef int mode_t;
+typedef void pthread_attr_t;
+
+#if defined(__cplusplus)
+#define __BEGIN_DECLS extern "C" {
+#define __END_DECLS }
+#else
+#define __BEGIN_DECLS
+#define __END_DECLS
+#endif
+
+#ifndef API_AVAILABLE
+#define API_AVAILABLE(...)
+#endif
+#ifndef API_DEPRECATED
+#define API_DEPRECATED(...)
+#endif
+#ifndef API_UNAVAILABLE
+#define API_UNAVAILABLE(...)
+#endif
+#ifndef API_DEPRECATED_WITH_REPLACEMENT
+#define API_DEPRECATED_WITH_REPLACEMENT(...)
+#endif
+
+#if !defined(__has_attribute)
+#define __has_attribute(attibute) 0
+#endif
+
+#if !defined(__has_builtin)
+#define __has_builtin(builtin) 0
+#endif
+
+#if !defined(__has_feature)
+#define __has_feature(feature) 0
+#endif
+
+#if __has_builtin(__builtin_expect)
+#define OS_EXPECT(expression, value) __builtin_expect((expression), (value))
+#else
+#define OS_EXPECT(expression, value) (expression)
+#endif
+
+#if __has_attribute(__unused__)
+#define OS_UNUSED __attribute__((__unused__))
+#else
+#define OS_UNUSED
+#endif
+
+#ifndef os_likely
+#define os_likely(expression) OS_EXPECT(!!(expression), 1)
+#endif
+#ifndef os_unlikely
+#define os_unlikely(expression) OS_EXPECT(!!(expression), 0)
+#endif
+
+#if __has_feature(assume_nonnull)
+#define OS_ASSUME_NONNULL_BEGIN _Pragma("clang assume_nonnull begin")
+#define OS_ASSUME_NONNULL_END   _Pragma("clang assume_nonnull end")
+#else
+#define OS_ASSUME_NONNULL_BEGIN
+#define OS_ASSUME_NONNULL_END
+#endif
+
+#if __has_builtin(__builtin_assume)
+#define OS_COMPILER_CAN_ASSUME(expr) __builtin_assume(expr)
+#else
+#define OS_COMPILER_CAN_ASSUME(expr) ((void)(expr))
+#endif
+
+#if __has_feature(attribute_availability_swift)
+// equivalent to __SWIFT_UNAVAILABLE from Availability.h
+#define OS_SWIFT_UNAVAILABLE(msg)                                              \
+  __attribute__((__availability__(swift, unavailable, message = msg)))
+#else
+#define OS_SWIFT_UNAVAILABLE(msg)
+#endif
+
+#define __OS_STRINGIFY(s) #s
+#define OS_STRINGIFY(s) __OS_STRINGIFY(s)
+
+#if __has_feature(objc_fixed_enum) || __has_extension(cxx_strong_enums)
+#define OS_ENUM(name, type, ...) typedef enum : type { __VA_ARGS__ } name##_t
+#else
+#define OS_ENUM(name, type, ...)                                               \
+  enum { __VA_ARGS__ };                                                        \
+  typedef type name##_t
+#endif
+
+#ifdef OS_EXPORT
+#undef OS_EXPORT
+#endif
+#define OS_EXPORT __declspec(dllexport)
+
+#ifdef OS_WARN_RESULT_NEEDS_RELEASE
+#undef OS_WARN_RESULT_NEEDS_RELEASE
+#endif
+
+#ifdef OS_WARN_RESULT
+#undef OS_WARN_RESULT
+#endif
+#define OS_WARN_RESULT
+
+#ifdef OS_NOTHROW
+#undef OS_NOTHROW
+#endif
+#define OS_NOTHROW
+
+#endif
diff --git a/os/object.h b/os/object.h
index 3666c33..1ad1158 100644
--- a/os/object.h
+++ b/os/object.h
@@ -26,7 +26,9 @@
 #include <os/availability.h>
 #include <TargetConditionals.h>
 #include <os/base.h>
-#elif defined(__linux__) || defined(__FreeBSD__)
+#elif defined(_WIN32)
+#include <os/generic_win_base.h>
+#elif defined(__unix__)
 #include <os/generic_unix_base.h>
 #endif
 
diff --git a/private/data_private.h b/private/data_private.h
index 364a8ff..a922157 100644
--- a/private/data_private.h
+++ b/private/data_private.h
@@ -182,15 +182,9 @@
  */
 typedef const struct dispatch_data_format_type_s *dispatch_data_format_type_t;
 
-#if !TARGET_OS_WIN32
 #define DISPATCH_DATA_FORMAT_TYPE_DECL(name) \
 	DISPATCH_EXPORT const struct dispatch_data_format_type_s \
 	_dispatch_data_format_type_##name
-#else
-#define DISPATCH_DATA_FORMAT_TYPE_DECL(name) \
-	DISPATCH_EXPORT struct dispatch_data_format_type_s \
-	_dispatch_data_format_type_##name
-#endif
 
 /*!
  * @const DISPATCH_DATA_FORMAT_TYPE_NONE
diff --git a/private/layout_private.h b/private/layout_private.h
index 0c0cd94..81bcabd 100644
--- a/private/layout_private.h
+++ b/private/layout_private.h
@@ -28,7 +28,6 @@
 
 __BEGIN_DECLS
 
-#if !TARGET_OS_WIN32
 API_AVAILABLE(macos(10.6), ios(4.0))
 DISPATCH_EXPORT const struct dispatch_queue_offsets_s {
 	// always add new fields at the end
@@ -51,7 +50,6 @@
 	const uint16_t dqo_priority;
 	const uint16_t dqo_priority_size;
 } dispatch_queue_offsets;
-#endif
 
 #if DISPATCH_LAYOUT_SPI
 
diff --git a/private/private.h b/private/private.h
index ee2b26e..7fba396 100644
--- a/private/private.h
+++ b/private/private.h
@@ -32,7 +32,9 @@
 #include <os/availability.h>
 #include <TargetConditionals.h>
 #include <os/base.h>
-#elif defined(__linux__) || defined(__FreeBSD__)
+#elif defined(_WIN32)
+#include <os/generic_win_base.h>
+#elif defined(__unix__)
 #include <os/generic_unix_base.h>
 #endif
 
@@ -44,7 +46,9 @@
 #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
 #include <unistd.h>
 #endif
+#if !defined(_WIN32)
 #include <pthread.h>
+#endif
 #if TARGET_OS_MAC
 #include <pthread/qos.h>
 #endif
diff --git a/src/BlocksRuntime/runtime.c b/src/BlocksRuntime/runtime.c
index da85f71..b8c4b33 100644
--- a/src/BlocksRuntime/runtime.c
+++ b/src/BlocksRuntime/runtime.c
@@ -172,7 +172,7 @@
 }
 
 static void _Block_assign_weak_default(const void *ptr, void *dest) {
-#if !TARGET_OS_WIN32
+#if !defined(_WIN32)
     *(long *)dest = (long)ptr;
 #else
     *(void **)dest = (void *)ptr;
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index c9c55b5..a7cd89b 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -47,14 +47,23 @@
               shims/atomic_sfb.h
               shims/getprogname.h
               shims/hw_config.h
-              shims/generic_unix_stubs.c
-              shims/generic_unix_stubs.h
               shims/lock.c
               shims/lock.h
               shims/perfmon.h
               shims/time.h
               shims/tsd.h
               shims/yield.h)
+if(UNIX)
+  target_sources(dispatch
+                 PRIVATE
+                   shims/generic_unix_stubs.c
+                   shims/generic_unix_stubs.h)
+elseif(WIN32)
+  target_sources(dispatch
+                 PRIVATE
+                   shims/generic_win_stubs.c
+                   shims/generic_win_stubs.h)
+endif()
 if(DISPATCH_USE_INTERNAL_WORKQUEUE)
   target_sources(dispatch
                  PRIVATE
@@ -130,6 +139,11 @@
                              SYSTEM BEFORE PRIVATE
                                "${WITH_BLOCKS_RUNTIME}")
 endif()
+if(WIN32)
+  target_compile_definitions(dispatch
+                             PRIVATE
+                               _CRT_NONSTDC_NO_WARNINGS)
+endif()
 if("${CMAKE_C_SIMULATE_ID}" STREQUAL "MSVC")
   target_compile_options(dispatch PRIVATE /EHsc-)
 else()
diff --git a/src/block.cpp b/src/block.cpp
index 8f8113a..a46b551 100644
--- a/src/block.cpp
+++ b/src/block.cpp
@@ -69,7 +69,9 @@
 	{
 		// copy constructor, create copy with retained references
 		if (dbpd_voucher) voucher_retain(dbpd_voucher);
-		if (o.dbpd_block) dbpd_block = _dispatch_Block_copy(o.dbpd_block);
+		if (o.dbpd_block) {
+			dbpd_block = reinterpret_cast<dispatch_block_t>(_dispatch_Block_copy(o.dbpd_block));
+		}
 		_dispatch_block_private_data_debug("copy from %p, block: %p from %p",
 				&o, dbpd_block, o.dbpd_block);
 		if (!o.dbpd_magic) return; // No group in initial copy of stack object
@@ -98,11 +100,11 @@
 		pthread_priority_t pri, dispatch_block_t block)
 {
 	struct dispatch_block_private_data_s dbpds(flags, voucher, pri, block);
-	return _dispatch_Block_copy(^{
+	return reinterpret_cast<dispatch_block_t>(_dispatch_Block_copy(^{
 		// Capture stack object: invokes copy constructor (17094902)
 		(void)dbpds;
 		_dispatch_block_invoke_direct(&dbpds);
-	});
+	}));
 }
 
 extern "C" {
diff --git a/src/event/event_config.h b/src/event/event_config.h
index 871a3e0..fda5d22 100644
--- a/src/event/event_config.h
+++ b/src/event/event_config.h
@@ -29,6 +29,9 @@
 #	include <sys/event.h>
 #	define DISPATCH_EVENT_BACKEND_EPOLL 0
 #	define DISPATCH_EVENT_BACKEND_KEVENT 1
+#elif defined(_WIN32)
+#	define DISPATCH_EVENT_BACKEND_EPOLL 0
+#	define DISPATCH_EVENT_BACKEND_KEVENT 0
 #else
 #	error unsupported event loop
 #endif
diff --git a/src/init.c b/src/init.c
index 4ef733d..00a04d3 100644
--- a/src/init.c
+++ b/src/init.c
@@ -37,7 +37,7 @@
 void
 _libdispatch_init(void);
 
-DISPATCH_EXPORT DISPATCH_NOTHROW
+DISPATCH_NOTHROW
 void
 _libdispatch_init(void)
 {
@@ -45,6 +45,7 @@
 }
 #endif
 
+#if !defined(_WIN32)
 DISPATCH_EXPORT DISPATCH_NOTHROW
 void
 dispatch_atfork_prepare(void)
@@ -96,6 +97,7 @@
 	r |= pthread_sigmask(SIG_BLOCK, &mask, NULL);
 	return dispatch_assume_zero(r);
 }
+#endif
 
 #pragma mark -
 #pragma mark dispatch_globals
@@ -111,7 +113,11 @@
 
 #if DISPATCH_USE_THREAD_LOCAL_STORAGE
 __thread struct dispatch_tsd __dispatch_tsd;
+#if defined(_WIN32)
+DWORD __dispatch_tsd_key;
+#else
 pthread_key_t __dispatch_tsd_key;
+#endif
 #elif !DISPATCH_USE_DIRECT_TSD
 pthread_key_t dispatch_queue_key;
 pthread_key_t dispatch_frame_key;
@@ -690,26 +696,57 @@
 			log_to_file = true;
 		} else if (strcmp(e, "stderr") == 0) {
 			log_to_file = true;
+#if defined(_WIN32)
+			dispatch_logfile = _fileno(stderr);
+#else
 			dispatch_logfile = STDERR_FILENO;
+#endif
 		}
 	}
 	if (!dispatch_log_disabled) {
 		if (log_to_file && dispatch_logfile == -1) {
+#if defined(_WIN32)
+			char path[MAX_PATH + 1] = {0};
+			DWORD dwLength = GetTempPathA(MAX_PATH, path);
+			dispatch_assert(dwLength <= MAX_PATH + 1);
+			snprintf(&path[dwLength], MAX_PATH - dwLength, "libdispatch.%d.log",
+					GetCurrentProcessId());
+			dispatch_logfile = _open(path, O_WRONLY | O_APPEND | O_CREAT, 0666);
+#else
 			char path[PATH_MAX];
 			snprintf(path, sizeof(path), "/var/tmp/libdispatch.%d.log",
 					getpid());
 			dispatch_logfile = open(path, O_WRONLY | O_APPEND | O_CREAT |
 					O_NOFOLLOW | O_CLOEXEC, 0666);
+#endif
 		}
 		if (dispatch_logfile != -1) {
 			struct timeval tv;
+#if defined(_WIN32)
+			DWORD dwTime = GetTickCount();
+			tv.tv_sec = dwTime / 1000;
+			tv.tv_usec = 1000 * (dwTime % 1000);
+#else
 			gettimeofday(&tv, NULL);
+#endif
 #if DISPATCH_DEBUG
 			dispatch_log_basetime = _dispatch_absolute_time();
 #endif
+#if defined(_WIN32)
+			FILE *pLogFile = _fdopen(dispatch_logfile, "w");
+
+			char szProgramName[MAX_PATH + 1] = {0};
+			GetModuleFileNameA(NULL, szProgramName, MAX_PATH);
+
+			fprintf(pLogFile, "=== log file opened for %s[%lu] at "
+					"%ld.%06u ===\n", szProgramName, GetCurrentProcessId(),
+					tv.tv_sec, (int)tv.tv_usec);
+			fclose(pLogFile);
+#else
 			dprintf(dispatch_logfile, "=== log file opened for %s[%u] at "
 					"%ld.%06u ===\n", getprogname() ?: "", getpid(),
 					tv.tv_sec, (int)tv.tv_usec);
+#endif
 		}
 	}
 }
@@ -721,7 +758,12 @@
 
 	buf[len++] = '\n';
 retry:
+#if defined(_WIN32)
+	dispatch_assert(len <= UINT_MAX);
+	r = _write(dispatch_logfile, buf, (unsigned int)len);
+#else
 	r = write(dispatch_logfile, buf, len);
+#endif
 	if (slowpath(r == -1) && errno == EINTR) {
 		goto retry;
 	}
@@ -765,6 +807,36 @@
 		free(str);
 	}
 }
+#elif defined(_WIN32)
+static inline void
+_dispatch_syslog(const char *msg)
+{
+  OutputDebugStringA(msg);
+}
+
+static inline void
+_dispatch_vsyslog(const char *msg, va_list ap)
+{
+  va_list argp;
+
+  va_copy(argp, ap);
+
+  int length = _vscprintf(msg, ap);
+  if (length == -1)
+    return;
+
+  char *buffer = malloc((size_t)length + 1);
+  if (buffer == NULL)
+    return;
+
+  _vsnprintf(buffer, (size_t)length + 1, msg, argp);
+
+  va_end(argp);
+
+  _dispatch_syslog(buffer);
+
+  free(buffer);
+}
 #else // DISPATCH_USE_SIMPLE_ASL
 static inline void
 _dispatch_syslog(const char *msg)
diff --git a/src/internal.h b/src/internal.h
index 84505e5..126f4c6 100644
--- a/src/internal.h
+++ b/src/internal.h
@@ -148,9 +148,7 @@
 #include <dispatch/semaphore.h>
 #include <dispatch/once.h>
 #include <dispatch/data.h>
-#if !TARGET_OS_WIN32
 #include <dispatch/io.h>
-#endif
 
 #if defined(__OBJC__) || defined(__cplusplus)
 #define DISPATCH_PURE_C 0
@@ -159,7 +157,9 @@
 #endif
 
 /* private.h must be included last to avoid picking up installed headers. */
+#if !defined(_WIN32)
 #include <pthread.h>
+#endif
 #include "os/object_private.h"
 #include "queue_private.h"
 #include "source_private.h"
@@ -167,16 +167,14 @@
 #include "data_private.h"
 #include "os/voucher_private.h"
 #include "os/voucher_activity_private.h"
-#if !TARGET_OS_WIN32
 #include "io_private.h"
-#endif
 #include "layout_private.h"
 #include "benchmark.h"
 #include "private.h"
 
 /* SPI for Libsystem-internal use */
 DISPATCH_EXPORT DISPATCH_NOTHROW void libdispatch_init(void);
-#if !TARGET_OS_WIN32
+#if !defined(_WIN32)
 DISPATCH_EXPORT DISPATCH_NOTHROW void dispatch_atfork_prepare(void);
 DISPATCH_EXPORT DISPATCH_NOTHROW void dispatch_atfork_parent(void);
 DISPATCH_EXPORT DISPATCH_NOTHROW void dispatch_atfork_child(void);
@@ -244,10 +242,12 @@
 #endif
 
 #include <sys/stat.h>
-
-#if !TARGET_OS_WIN32
-#include <sys/mount.h>
 #include <sys/queue.h>
+
+#if defined(_WIN32)
+#include <time.h>
+#else
+#include <sys/mount.h>
 #ifdef __ANDROID__
 #include <linux/sysctl.h>
 #else
@@ -283,13 +283,19 @@
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdio.h>
+#if defined(_WIN32)
+#define _CRT_RAND_S
+#endif
 #include <stdlib.h>
 #include <string.h>
 #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
 #include <unistd.h>
 #endif
+#if defined(_WIN32)
+#include <io.h>
+#endif
 
-#if __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
 #define DISPATCH_NOINLINE __attribute__((__noinline__))
 #define DISPATCH_USED __attribute__((__used__))
 #define DISPATCH_UNUSED __attribute__((__unused__))
@@ -420,7 +426,7 @@
 #endif
 #endif // DISPATCH_USE_SIMPLE_ASL
 
-#if !DISPATCH_USE_SIMPLE_ASL && !DISPATCH_USE_OS_DEBUG_LOG && !TARGET_OS_WIN32
+#if !DISPATCH_USE_SIMPLE_ASL && !DISPATCH_USE_OS_DEBUG_LOG && !defined(_WIN32)
 #include <syslog.h>
 #endif
 
@@ -513,7 +519,7 @@
 	})
 #else
 static inline long
-_dispatch_assume(long e, long line)
+_dispatch_assume(long e, unsigned long line)
 {
 	if (!e) _dispatch_bug(line, e);
 	return e;
@@ -538,7 +544,7 @@
 	})
 #else
 static inline long
-_dispatch_assume_zero(long e, long line)
+_dispatch_assume_zero(long e, unsigned long line)
 {
 	if (e) _dispatch_bug(line, e);
 	return e;
@@ -604,7 +610,9 @@
 const char *_dispatch_strdup_if_mutable(const char *str);
 void _dispatch_vtable_init(void);
 char *_dispatch_get_build(void);
+#if !defined(_WIN32)
 int _dispatch_sigmask(void);
+#endif
 
 uint64_t _dispatch_timeout(dispatch_time_t when);
 uint64_t _dispatch_time_nanoseconds_since_epoch(dispatch_time_t when);
@@ -913,6 +921,18 @@
 		_dispatch_hardware_crash(); \
 	} while (0)
 
+#if defined(_WIN32)
+#define _dispatch_client_assert_fail(fmt, ...)  do { \
+		char *_msg = NULL; \
+		int _length = _scprintf("%s" fmt, DISPATCH_ASSERTION_FAILED_MESSAGE, ##__VA_ARGS__); \
+		dispatch_assert(_length != -1); \
+		_msg = (char *)malloc((unsigned)_length + 1); \
+		dispatch_assert(_msg); \
+		_snprintf(_msg, (unsigned)_length, "%s" fmt, DISPATCH_ASSERTION_FAILED_MESSAGE, ##__VA_ARGS__); \
+		_dispatch_assert_crash(_msg); \
+		free(_msg); \
+	} while (0)
+#else
 #define _dispatch_client_assert_fail(fmt, ...)  do { \
 		char *_msg = NULL; \
 		asprintf(&_msg, "%s" fmt, DISPATCH_ASSERTION_FAILED_MESSAGE, \
@@ -920,6 +940,7 @@
 		_dispatch_assert_crash(_msg); \
 		free(_msg); \
 	} while (0)
+#endif
 
 #define DISPATCH_NO_VOUCHER ((voucher_t)(void*)~0ul)
 #define DISPATCH_NO_PRIORITY ((pthread_priority_t)~0ul)
@@ -967,9 +988,7 @@
 #include "mach_internal.h"
 #include "voucher_internal.h"
 #include "data_internal.h"
-#if !TARGET_OS_WIN32
 #include "io_internal.h"
-#endif
 #include "inline_internal.h"
 #include "firehose/firehose_internal.h"
 
diff --git a/src/introspection.c b/src/introspection.c
index 1332adf..1bb095d 100644
--- a/src/introspection.c
+++ b/src/introspection.c
@@ -137,7 +137,7 @@
 	_dispatch_unfair_lock_unlock(&_dispatch_introspection.threads_lock);
 }
 
-static void
+static DISPATCH_TSD_DTOR_CC void
 _dispatch_introspection_thread_remove(void *ctxt)
 {
 	dispatch_introspection_thread_t dit = ctxt;
diff --git a/src/io.c b/src/io.c
index 6ec45d8..fbb0373 100644
--- a/src/io.c
+++ b/src/io.c
@@ -30,6 +30,15 @@
 #endif
 
 #ifndef PAGE_SIZE
+#if defined(_WIN32)
+static DWORD
+getpagesize(void)
+{
+	SYSTEM_INFO siInfo;
+	GetSystemInfo(&siInfo);
+	return siInfo.dwPageSize;
+}
+#endif
 #define PAGE_SIZE ((size_t)getpagesize())
 #endif
 
@@ -366,12 +375,23 @@
 			err = _dispatch_io_validate_type(channel, fd_entry->stat.mode);
 		}
 		if (!err && type == DISPATCH_IO_RANDOM) {
+#if defined(_WIN32)
+			LARGE_INTEGER liPosition;
+			LARGE_INTEGER liDistance = {};
+			if (!SetFilePointerEx((HANDLE)fd_entry->fd, liDistance, &liPosition, FILE_CURRENT)) {
+				err = (int)GetLastError();
+			} else {
+				err = 0;
+				channel->f_ptr = liPosition.QuadPart;
+			}
+#else
 			off_t f_ptr;
 			_dispatch_io_syscall_switch_noerr(err,
 				f_ptr = lseek(fd_entry->fd, 0, SEEK_CUR),
 				case 0: channel->f_ptr = f_ptr; break;
 				default: (void)dispatch_assume_zero(err); break;
 			);
+#endif
 		}
 		channel->err = err;
 		_dispatch_fd_entry_retain(fd_entry);
@@ -423,11 +443,15 @@
 		int err = 0;
 		struct stat st;
 		_dispatch_io_syscall_switch_noerr(err,
+#if defined(_WIN32)
+			stat(path_data->path, &st),
+#else
 			(path_data->oflag & O_NOFOLLOW) == O_NOFOLLOW
 #if __APPLE__
 					|| (path_data->oflag & O_SYMLINK) == O_SYMLINK
 #endif
 					? lstat(path_data->path, &st) : stat(path_data->path, &st),
+#endif
 			case 0:
 				err = _dispatch_io_validate_type(channel, st.st_mode);
 				break;
@@ -528,12 +552,23 @@
 						in_channel->fd_entry->stat.mode);
 			}
 			if (!err && type == DISPATCH_IO_RANDOM && in_channel->fd != -1) {
+#if defined(_WIN32)
+				LARGE_INTEGER liPosition;
+				LARGE_INTEGER liDistance = {};
+				if (!SetFilePointerEx((HANDLE)in_channel->fd_entry->fd, liDistance, &liPosition, FILE_CURRENT)) {
+					err = (int)GetLastError();
+				} else {
+					err = 0;
+					channel->f_ptr = liPosition.QuadPart;
+				}
+#else
 				off_t f_ptr;
 				_dispatch_io_syscall_switch_noerr(err,
 					f_ptr = lseek(in_channel->fd_entry->fd, 0, SEEK_CUR),
 					case 0: channel->f_ptr = f_ptr; break;
 					default: (void)dispatch_assume_zero(err); break;
 				);
+#endif
 			}
 			channel->err = err;
 			if (err) {
@@ -1075,7 +1110,11 @@
 	}
 	// For write operations, op->buf is owned by op->buf_data
 	if (op->buf && op->direction == DOP_DIR_READ) {
+#if defined(_WIN32)
+		_aligned_free(op->buf);
+#else
 		free(op->buf);
+#endif
 	}
 	if (op->buf_data) {
 		_dispatch_io_data_release(op->buf_data);
@@ -1226,13 +1265,15 @@
 	);
 }
 #else
+#if !defined(_WIN32)
 static inline void
 _dispatch_fd_entry_guard(dispatch_fd_entry_t fd_entry) { (void)fd_entry; }
+#endif
 static inline void
 _dispatch_fd_entry_unguard(dispatch_fd_entry_t fd_entry) { (void)fd_entry; }
 #endif // DISPATCH_USE_GUARDED_FD
 
-static inline int
+static inline dispatch_fd_t
 _dispatch_fd_entry_guarded_open(dispatch_fd_entry_t fd_entry, const char *path,
 		int oflag, mode_t mode) {
 #if DISPATCH_USE_GUARDED_FD
@@ -1249,11 +1290,28 @@
 #else
 	(void)fd_entry;
 #endif
+#if defined(_WIN32)
+	(void)mode;
+	DWORD dwDesiredAccess = 0;
+	if (oflag & _O_RDWR)
+		dwDesiredAccess = GENERIC_READ | GENERIC_WRITE;
+	else if (oflag & _O_RDONLY)
+		dwDesiredAccess = GENERIC_READ;
+	else if (oflag & _O_WRONLY)
+		dwDesiredAccess = GENERIC_WRITE;
+	DWORD dwCreationDisposition = OPEN_EXISTING;
+	if (oflag & _O_CREAT)
+		dwCreationDisposition = OPEN_ALWAYS;
+	if (oflag & _O_TRUNC)
+		dwCreationDisposition = CREATE_ALWAYS;
+	return (dispatch_fd_t)CreateFile(path, dwDesiredAccess, 0, NULL, dwCreationDisposition, 0, NULL);
+#else
 	return open(path, oflag, mode);
+#endif
 }
 
 static inline int
-_dispatch_fd_entry_guarded_close(dispatch_fd_entry_t fd_entry, int fd) {
+_dispatch_fd_entry_guarded_close(dispatch_fd_entry_t fd_entry, dispatch_fd_t fd) {
 #if DISPATCH_USE_GUARDED_FD
 	if (fd_entry->guard_flags) {
 		guardid_t guard = (uintptr_t)fd_entry;
@@ -1263,7 +1321,11 @@
 	(void)fd_entry;
 #endif
 	{
+#if defined(_WIN32)
+		return CloseHandle((HANDLE)fd);
+#else
 		return close(fd);
+#endif
 	}
 }
 
@@ -1337,6 +1399,24 @@
 			"com.apple.libdispatch-io.barrierq", NULL);
 	fd_entry->barrier_group = dispatch_group_create();
 	dispatch_async(fd_entry->barrier_queue, ^{
+#if defined(_WIN32)
+		DWORD dwType = GetFileType((HANDLE)fd);
+		if (dwType == FILE_TYPE_PIPE) {
+			unsigned long value = 1;
+			int result = ioctlsocket((SOCKET)fd, (long)FIONBIO, &value);
+			(void)dispatch_assume_zero(result);
+			_dispatch_stream_init(fd_entry,
+				_dispatch_get_root_queue(DISPATCH_QOS_DEFAULT, false));
+		} else {
+			dispatch_suspend(fd_entry->barrier_queue);
+			dispatch_once_f(&_dispatch_io_devs_lockq_pred, NULL,
+					_dispatch_io_devs_lockq_init);
+			dispatch_async(_dispatch_io_devs_lockq, ^{
+				_dispatch_disk_init(fd_entry, 0);
+				dispatch_resume(fd_entry->barrier_queue);
+			});
+		}
+#else
 		_dispatch_fd_entry_debug("stat", fd_entry);
 		int err, orig_flags, orig_nosigpipe = -1;
 		struct stat st;
@@ -1404,6 +1484,7 @@
 		}
 		fd_entry->orig_flags = orig_flags;
 		fd_entry->orig_nosigpipe = orig_nosigpipe;
+#endif
 	});
 	// This is the first item run when the close queue is resumed, indicating
 	// that all channels associated with this entry have been closed and that
@@ -1434,6 +1515,7 @@
 		dispatch_release(fd_entry->barrier_queue);
 		_dispatch_fd_entry_debug("barrier group release", fd_entry);
 		dispatch_release(fd_entry->barrier_group);
+#if !defined(_WIN32)
 		if (fd_entry->orig_flags != -1) {
 			_dispatch_io_syscall(
 				fcntl(fd, F_SETFL, fd_entry->orig_flags)
@@ -1446,6 +1528,7 @@
 			);
 		}
 #endif
+#endif
 		_dispatch_fd_entry_unguard(fd_entry);
 		if (fd_entry->convenience_channel) {
 			fd_entry->convenience_channel->fd_entry = NULL;
@@ -1465,7 +1548,11 @@
 			path_data->channel->queue);
 	_dispatch_fd_entry_debug("create: path %s", fd_entry, path_data->path);
 	if (S_ISREG(mode)) {
+#if defined(_WIN32)
+		_dispatch_disk_init(fd_entry, 0);
+#else
 		_dispatch_disk_init(fd_entry, (dev_t)major(dev));
+#endif
 	} else {
 			_dispatch_stream_init(fd_entry,
 					_dispatch_get_root_queue(DISPATCH_QOS_DEFAULT, false));
@@ -1520,7 +1607,7 @@
 	if (fd_entry->err) {
 		return fd_entry->err;
 	}
-	int fd = -1;
+	dispatch_fd_t fd = -1;
 	int oflag = fd_entry->disk ? fd_entry->path_data->oflag & ~O_NONBLOCK :
 			fd_entry->path_data->oflag | O_NONBLOCK;
 open:
@@ -2143,6 +2230,10 @@
 _dispatch_operation_advise(dispatch_operation_t op, size_t chunk_size)
 {
 	_dispatch_op_debug("advise", op);
+#if defined(_WIN32)
+	(void)op;
+	(void)chunk_size;
+#else
 	if (_dispatch_io_get_error(op, NULL, true)) return;
 #if defined(__linux__) || defined(__FreeBSD__)
 	// linux does not support fcntl (F_RDAVISE)
@@ -2186,6 +2277,7 @@
 		default: (void)dispatch_assume_zero(err); break;
 	);
 #endif
+#endif
 }
 
 static int
@@ -2219,7 +2311,17 @@
 			} else {
 				op->buf_siz = max_buf_siz;
 			}
+#if defined(_WIN32)
+			static bool bQueried = false;
+			static SYSTEM_INFO siInfo;
+			if (!bQueried) {
+				GetNativeSystemInfo(&siInfo);
+				bQueried = true;
+			}
+			op->buf = _aligned_malloc(op->buf_siz, siInfo.dwPageSize);
+#else
 			op->buf = valloc(op->buf_siz);
+#endif
 			_dispatch_op_debug("buffer allocated", op);
 		} else if (op->direction == DOP_DIR_WRITE) {
 			// Always write the first data piece, if that is smaller than a
@@ -2257,20 +2359,51 @@
 	}
 	void *buf = op->buf + op->buf_len;
 	size_t len = op->buf_siz - op->buf_len;
+#if defined(_WIN32)
+	assert(len <= UINT_MAX && "overflow for read/write");
+	LONGLONG off = (LONGLONG)((size_t)op->offset + op->total);
+#else
 	off_t off = (off_t)((size_t)op->offset + op->total);
+#endif
+#if defined(_WIN32)
+	long processed = -1;
+#else
 	ssize_t processed = -1;
+#endif
 syscall:
 	if (op->direction == DOP_DIR_READ) {
 		if (op->params.type == DISPATCH_IO_STREAM) {
+#if defined(_WIN32)
+			ReadFile((HANDLE)op->fd_entry->fd, buf, (DWORD)len, (LPDWORD)&processed, NULL);
+#else
 			processed = read(op->fd_entry->fd, buf, len);
+#endif
 		} else if (op->params.type == DISPATCH_IO_RANDOM) {
+#if defined(_WIN32)
+			OVERLAPPED ovlOverlapped = {};
+			ovlOverlapped.Offset = off & 0xffffffff;
+			ovlOverlapped.OffsetHigh = (off >> 32) & 0xffffffff;
+			ReadFile((HANDLE)op->fd_entry->fd, buf, (DWORD)len, (LPDWORD)&processed, &ovlOverlapped);
+#else
 			processed = pread(op->fd_entry->fd, buf, len, off);
+#endif
 		}
 	} else if (op->direction == DOP_DIR_WRITE) {
 		if (op->params.type == DISPATCH_IO_STREAM) {
+#if defined(_WIN32)
+			WriteFile((HANDLE)op->fd_entry->fd, buf, (DWORD)len, (LPDWORD)&processed, NULL);
+#else
 			processed = write(op->fd_entry->fd, buf, len);
+#endif
 		} else if (op->params.type == DISPATCH_IO_RANDOM) {
+#if defined(_WIN32)
+			OVERLAPPED ovlOverlapped = {};
+			ovlOverlapped.Offset = off & 0xffffffff;
+			ovlOverlapped.OffsetHigh = (off >> 32) & 0xffffffff;
+			WriteFile((HANDLE)op->fd_entry->fd, buf, (DWORD)len, (LPDWORD)&processed, &ovlOverlapped);
+#else
 			processed = pwrite(op->fd_entry->fd, buf, len, off);
+#endif
 		}
 	}
 	// Encountered an error on the file descriptor
diff --git a/src/io_internal.h b/src/io_internal.h
index 672727f..d70e075 100644
--- a/src/io_internal.h
+++ b/src/io_internal.h
@@ -145,7 +145,11 @@
 	dispatch_queue_t op_q;
 	dispatch_op_direction_t direction; // READ OR WRITE
 	dispatch_io_param_s params;
+#if defined(_WIN32)
+	LONGLONG offset;
+#else
 	off_t offset;
+#endif
 	size_t length;
 	int err;
 	dispatch_io_handler_t handler;
@@ -172,7 +176,11 @@
 	dispatch_fd_entry_t fd_entry;
 	unsigned int atomic_flags;
 	dispatch_fd_t fd, fd_actual;
+#if defined(_WIN32)
+	LONGLONG f_ptr;
+#else
 	off_t f_ptr;
+#endif
 	int err; // contains creation errors only
 };
 
diff --git a/src/queue.c b/src/queue.c
index d747a59..896ed21 100644
--- a/src/queue.c
+++ b/src/queue.c
@@ -58,13 +58,13 @@
 #endif
 
 static void _dispatch_sig_thread(void *ctxt);
-static void _dispatch_cache_cleanup(void *value);
+static void DISPATCH_TSD_DTOR_CC _dispatch_cache_cleanup(void *value);
 static void _dispatch_async_f2(dispatch_queue_t dq, dispatch_continuation_t dc);
-static void _dispatch_queue_cleanup(void *ctxt);
-static void _dispatch_wlh_cleanup(void *ctxt);
-static void _dispatch_deferred_items_cleanup(void *ctxt);
-static void _dispatch_frame_cleanup(void *ctxt);
-static void _dispatch_context_cleanup(void *ctxt);
+static void DISPATCH_TSD_DTOR_CC _dispatch_queue_cleanup(void *ctxt);
+static void DISPATCH_TSD_DTOR_CC _dispatch_wlh_cleanup(void *ctxt);
+static void DISPATCH_TSD_DTOR_CC _dispatch_deferred_items_cleanup(void *ctxt);
+static void DISPATCH_TSD_DTOR_CC _dispatch_frame_cleanup(void *ctxt);
+static void DISPATCH_TSD_DTOR_CC _dispatch_context_cleanup(void *ctxt);
 static void _dispatch_queue_barrier_complete(dispatch_queue_t dq,
 		dispatch_qos_t qos, dispatch_wakeup_flags_t flags);
 static void _dispatch_queue_non_barrier_complete(dispatch_queue_t dq);
@@ -87,6 +87,10 @@
 #endif
 #if DISPATCH_USE_PTHREAD_POOL
 static void *_dispatch_worker_thread(void *context);
+#if defined(_WIN32)
+static unsigned WINAPI
+_dispatch_worker_thread_thunk(LPVOID lpParameter);
+#endif
 #endif
 
 #if DISPATCH_COCOA_COMPAT
@@ -101,7 +105,9 @@
 #pragma mark dispatch_root_queue
 
 struct dispatch_pthread_root_queue_context_s {
+#if !defined(_WIN32)
 	pthread_attr_t dpq_thread_attr;
+#endif
 	dispatch_block_t dpq_thread_configure;
 	struct dispatch_semaphore_s dpq_thread_mediator;
 	dispatch_pthread_root_queue_observer_hooks_s dpq_observer_hooks;
@@ -763,9 +769,11 @@
 	qc->dgq_thread_pool_size = thread_pool_size;
 #if DISPATCH_USE_WORKQUEUES
 	if (qc->dgq_qos) {
+#if !defined(_WIN32)
 		(void)dispatch_assume_zero(pthread_attr_init(&pqc->dpq_thread_attr));
 		(void)dispatch_assume_zero(pthread_attr_setdetachstate(
 				&pqc->dpq_thread_attr, PTHREAD_CREATE_DETACHED));
+#endif
 #if HAVE_PTHREAD_WORKQUEUE_QOS
 		(void)dispatch_assume_zero(pthread_attr_set_qos_class_np(
 				&pqc->dpq_thread_attr, qc->dgq_qos, 0));
@@ -906,7 +914,9 @@
 #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
 #include <unistd.h>
 #endif
+#if !defined(_WIN32)
 #include <sys/syscall.h>
+#endif
 
 #ifndef __ANDROID__
 #ifdef SYS_gettid
@@ -923,6 +933,13 @@
 {
 	return (pid_t)pthread_getthreadid_np();
 }
+#elif defined(_WIN32)
+DISPATCH_ALWAYS_INLINE
+static inline DWORD
+gettid(void)
+{
+	return GetCurrentThreadId();
+}
 #else
 #error "SYS_gettid unavailable on this system"
 #endif /* SYS_gettid */
@@ -944,7 +961,7 @@
 }
 #endif
 
-void
+void DISPATCH_TSD_DTOR_CC
 _libdispatch_tsd_cleanup(void *ctx)
 {
 	struct dispatch_tsd *tsd = (struct dispatch_tsd*) ctx;
@@ -980,7 +997,11 @@
 void
 libdispatch_tsd_init(void)
 {
+#if defined(_WIN32)
+	FlsSetValue(__dispatch_tsd_key, &__dispatch_tsd);
+#else
 	pthread_setspecific(__dispatch_tsd_key, &__dispatch_tsd);
+#endif /* defined(_WIN32) */
 	__dispatch_tsd.tid = gettid();
 }
 #endif
@@ -1164,8 +1185,6 @@
 	case DISPATCH_AUTORELEASE_FREQUENCY_WORK_ITEM:
 	case DISPATCH_AUTORELEASE_FREQUENCY_NEVER:
 		break;
-	default:
-		return DISPATCH_BAD_INPUT;
 	}
 	if (!slowpath(dqa)) {
 		dqa = _dispatch_get_default_queue_attr();
@@ -2035,7 +2054,11 @@
 	volatile qos_class_t qos;
 	int default_prio;
 	int policy;
+#if defined(_WIN32)
+	HANDLE hThread;
+#else
 	pthread_t tid;
+#endif
 } _dispatch_mgr_sched;
 
 static dispatch_once_t _dispatch_mgr_sched_pred;
@@ -2053,6 +2076,15 @@
 };
 #endif // HAVE_PTHREAD_WORKQUEUE_QOS
 
+#if defined(_WIN32)
+static void
+_dispatch_mgr_sched_init(void *ctx DISPATCH_UNUSED)
+{
+	_dispatch_mgr_sched.policy = 0;
+	_dispatch_mgr_sched.default_prio = THREAD_PRIORITY_NORMAL;
+	_dispatch_mgr_sched.prio = _dispatch_mgr_sched.default_prio;
+}
+#else
 static void
 _dispatch_mgr_sched_init(void *ctxt DISPATCH_UNUSED)
 {
@@ -2080,9 +2112,19 @@
 	_dispatch_mgr_sched.default_prio = param.sched_priority;
 	_dispatch_mgr_sched.prio = _dispatch_mgr_sched.default_prio;
 }
+#endif /* defined(_WIN32) */
 #endif // DISPATCH_ENABLE_PTHREAD_ROOT_QUEUES || DISPATCH_USE_KEVENT_WORKQUEUE
 
 #if DISPATCH_USE_MGR_THREAD && DISPATCH_ENABLE_PTHREAD_ROOT_QUEUES
+#if defined(_WIN32)
+DISPATCH_NOINLINE
+static PHANDLE
+_dispatch_mgr_root_queue_init(void)
+{
+	dispatch_once_f(&_dispatch_mgr_sched_pred, NULL, _dispatch_mgr_sched_init);
+	return &_dispatch_mgr_sched.hThread;
+}
+#else
 DISPATCH_NOINLINE
 static pthread_t *
 _dispatch_mgr_root_queue_init(void)
@@ -2111,10 +2153,21 @@
 	}
 	return &_dispatch_mgr_sched.tid;
 }
+#endif
 
 static inline void
 _dispatch_mgr_priority_apply(void)
 {
+#if defined(_WIN32)
+	int nPriority = _dispatch_mgr_sched.prio;
+	do {
+		if (nPriority > _dispatch_mgr_sched.default_prio) {
+			// TODO(compnerd) set thread scheduling policy
+			dispatch_assume_zero(SetThreadPriority(_dispatch_mgr_sched.hThread, nPriority));
+			nPriority = GetThreadPriority(_dispatch_mgr_sched.hThread);
+		}
+	} while (_dispatch_mgr_sched.prio > nPriority);
+#else
 	struct sched_param param;
 	do {
 		param.sched_priority = _dispatch_mgr_sched.prio;
@@ -2124,12 +2177,19 @@
 					&param));
 		}
 	} while (_dispatch_mgr_sched.prio > param.sched_priority);
+#endif
 }
 
 DISPATCH_NOINLINE
 void
 _dispatch_mgr_priority_init(void)
 {
+#if defined(_WIN32)
+	int nPriority = GetThreadPriority(_dispatch_mgr_sched.hThread);
+	if (slowpath(_dispatch_mgr_sched.prio > nPriority)) {
+		return _dispatch_mgr_priority_apply();
+	}
+#else
 	struct sched_param param;
 	pthread_attr_t *attr;
 	attr = &_dispatch_mgr_root_queue_pthread_context.dpq_thread_attr;
@@ -2148,9 +2208,11 @@
 	if (slowpath(_dispatch_mgr_sched.prio > param.sched_priority)) {
 		return _dispatch_mgr_priority_apply();
 	}
+#endif
 }
 #endif // DISPATCH_USE_MGR_THREAD && DISPATCH_ENABLE_PTHREAD_ROOT_QUEUES
 
+#if !defined(_WIN32)
 #if DISPATCH_ENABLE_PTHREAD_ROOT_QUEUES
 DISPATCH_NOINLINE
 static void
@@ -2202,6 +2264,7 @@
 #endif
 }
 #endif // DISPATCH_ENABLE_PTHREAD_ROOT_QUEUES
+#endif
 
 #if DISPATCH_USE_KEVENT_WORKQUEUE
 void
@@ -2274,6 +2337,9 @@
 #endif
 	_dispatch_root_queue_init_pthread_pool(qc, pool_size, true);
 
+#if defined(_WIN32)
+	dispatch_assert(attr == NULL);
+#else
 	if (attr) {
 		memcpy(&pqc->dpq_thread_attr, attr, sizeof(pthread_attr_t));
 		_dispatch_mgr_priority_raise(&pqc->dpq_thread_attr);
@@ -2282,6 +2348,7 @@
 	}
 	(void)dispatch_assume_zero(pthread_attr_setdetachstate(
 			&pqc->dpq_thread_attr, PTHREAD_CREATE_DETACHED));
+#endif
 	if (configure) {
 		pqc->dpq_thread_configure = _dispatch_Block_copy(configure);
 	}
@@ -2296,6 +2363,9 @@
 dispatch_pthread_root_queue_create(const char *label, unsigned long flags,
 		const pthread_attr_t *attr, dispatch_block_t configure)
 {
+#if defined(_WIN32)
+	dispatch_assert(attr == NULL);
+#endif
 	return _dispatch_pthread_root_queue_create(label, flags, attr, configure,
 			NULL);
 }
@@ -2345,7 +2415,9 @@
 	dispatch_root_queue_context_t qc = dq->do_ctxt;
 	dispatch_pthread_root_queue_context_t pqc = qc->dgq_ctxt;
 
+#if !defined(_WIN32)
 	pthread_attr_destroy(&pqc->dpq_thread_attr);
+#endif
 	_dispatch_semaphore_dispose(&pqc->dpq_thread_mediator, NULL);
 	if (pqc->dpq_thread_configure) {
 		Block_release(pqc->dpq_thread_configure);
@@ -2773,7 +2845,7 @@
 }
 
 DISPATCH_NOINLINE
-static void
+static void DISPATCH_TSD_DTOR_CC
 _dispatch_cache_cleanup(void *value)
 {
 	dispatch_continuation_t dc, next_dc = value;
@@ -4399,7 +4471,6 @@
 #error "runloop support not implemented on this platform"
 #endif
 }
-#endif // DISPATCH_COCOA_COMPAT
 
 DISPATCH_ALWAYS_INLINE
 static inline dispatch_qos_t
@@ -4410,6 +4481,7 @@
 	old_state = os_atomic_and_orig2o(dqu._dq, dq_state, ~clear_bits, relaxed);
 	return _dq_state_max_qos(old_state);
 }
+#endif // DISPATCH_COCOA_COMPAT
 
 void
 _dispatch_runloop_queue_wakeup(dispatch_queue_t dq, dispatch_qos_t qos,
@@ -4615,6 +4687,32 @@
 	} while (!os_atomic_cmpxchgvw2o(qc, dgq_thread_pool_size, t_count,
 			t_count - remaining, &t_count, acquire));
 
+#if defined(_WIN32)
+#if DISPATCH_USE_MGR_THREAD && DISPATCH_ENABLE_PTHREAD_ROOT_QUEUES
+	if (slowpath(dq == &_dispatch_mgr_root_queue)) {
+		_dispatch_mgr_root_queue_init();
+	}
+#endif
+	do {
+		_dispatch_retain(dq); // released in _dispatch_worker_thread
+#if DISPATCH_DEBUG
+		unsigned dwStackSize = 0;
+#else
+		unsigned dwStackSize = 64 * 1024;
+#endif
+		uintptr_t hThread = 0;
+		while (!(hThread = _beginthreadex(NULL, dwStackSize, _dispatch_worker_thread_thunk, dq, STACK_SIZE_PARAM_IS_A_RESERVATION, NULL))) {
+			if (errno != EAGAIN) {
+				(void)dispatch_assume(hThread);
+			}
+			_dispatch_temporary_resource_shortage();
+		}
+		if (_dispatch_mgr_sched.prio > _dispatch_mgr_sched.default_prio) {
+			(void)dispatch_assume_zero(SetThreadPriority((HANDLE)hThread, _dispatch_mgr_sched.prio) == TRUE);
+		}
+		CloseHandle((HANDLE)hThread);
+	} while (--remaining);
+#else
 	pthread_attr_t *attr = &pqc->dpq_thread_attr;
 	pthread_t tid, *pthr = &tid;
 #if DISPATCH_USE_MGR_THREAD && DISPATCH_ENABLE_PTHREAD_ROOT_QUEUES
@@ -4631,6 +4729,7 @@
 			_dispatch_temporary_resource_shortage();
 		}
 	} while (--remaining);
+#endif
 #endif // DISPATCH_USE_PTHREAD_POOL
 }
 
@@ -5951,6 +6050,15 @@
 #if DISPATCH_USE_PTHREAD_POOL
 // 6618342 Contact the team that owns the Instrument DTrace probe before
 //         renaming this symbol
+#if defined(_WIN32)
+static unsigned WINAPI
+_dispatch_worker_thread_thunk(LPVOID lpParameter)
+{
+  _dispatch_worker_thread(lpParameter);
+  return 0;
+}
+#endif
+
 static void *
 _dispatch_worker_thread(void *context)
 {
@@ -5972,7 +6080,9 @@
 	}
 
 	// workaround tweaks the kernel workqueue does for us
+#if !defined(_WIN32)
 	_dispatch_sigmask();
+#endif
 	_dispatch_introspection_thread_add();
 
 #if DISPATCH_USE_INTERNAL_WORKQUEUE
@@ -6248,7 +6358,11 @@
 		pthread_setspecific(dispatch_main_key, &dispatch_main_key);
 		_dispatch_sigmask();
 #endif
+#if defined(_WIN32)
+		_endthreadex(0);
+#else
 		pthread_exit(NULL);
+#endif
 		DISPATCH_INTERNAL_CRASH(errno, "pthread_exit() returned");
 #if HAVE_PTHREAD_MAIN_NP
 	}
@@ -6256,6 +6370,7 @@
 #endif
 }
 
+#if !defined(_WIN32)
 DISPATCH_NOINLINE DISPATCH_NORETURN
 static void
 _dispatch_sigsuspend(void)
@@ -6266,6 +6381,7 @@
 		sigsuspend(&mask);
 	}
 }
+#endif
 
 DISPATCH_NORETURN
 static void
@@ -6273,7 +6389,9 @@
 {
 	// never returns, so burn bridges behind us
 	_dispatch_clear_stack(0);
+#if !defined(_WIN32)
 	_dispatch_sigsuspend();
+#endif
 }
 
 DISPATCH_NOINLINE
@@ -6320,7 +6438,7 @@
 #endif
 }
 
-static void
+static void DISPATCH_TSD_DTOR_CC
 _dispatch_queue_cleanup(void *ctxt)
 {
 	if (ctxt == &_dispatch_main_q) {
@@ -6331,7 +6449,7 @@
 			"Premature thread exit while a dispatch queue is running");
 }
 
-static void
+static void DISPATCH_TSD_DTOR_CC
 _dispatch_wlh_cleanup(void *ctxt)
 {
 	// POSIX defines that destructors are only called if 'ctxt' is non-null
@@ -6341,7 +6459,7 @@
 }
 
 DISPATCH_NORETURN
-static void
+static void DISPATCH_TSD_DTOR_CC
 _dispatch_deferred_items_cleanup(void *ctxt)
 {
 	// POSIX defines that destructors are only called if 'ctxt' is non-null
@@ -6350,7 +6468,7 @@
 }
 
 DISPATCH_NORETURN
-static void
+static DISPATCH_TSD_DTOR_CC void
 _dispatch_frame_cleanup(void *ctxt)
 {
 	// POSIX defines that destructors are only called if 'ctxt' is non-null
@@ -6359,7 +6477,7 @@
 }
 
 DISPATCH_NORETURN
-static void
+static void DISPATCH_TSD_DTOR_CC
 _dispatch_context_cleanup(void *ctxt)
 {
 	// POSIX defines that destructors are only called if 'ctxt' is non-null
diff --git a/src/shims.h b/src/shims.h
index 4a78bfc..278b6ce 100644
--- a/src/shims.h
+++ b/src/shims.h
@@ -27,8 +27,12 @@
 #ifndef __DISPATCH_OS_SHIMS__
 #define __DISPATCH_OS_SHIMS__
 
+#if !defined(_WIN32)
 #include <pthread.h>
-#if defined(__linux__) || defined(__FreeBSD__)
+#endif
+#if defined(_WIN32)
+#include "shims/generic_win_stubs.h"
+#elif defined(__unix__)
 #include "shims/generic_unix_stubs.h"
 #endif
 
@@ -74,12 +78,6 @@
 
 #endif // HAVE_STRLCPY
 
-
-#if TARGET_OS_WIN32
-#define bzero(ptr,len) memset((ptr), 0, (len))
-#define snprintf _snprintf
-#endif // TARGET_OS_WIN32
-
 #if PTHREAD_WORKQUEUE_SPI_VERSION < 20140716
 static inline int
 _pthread_workqueue_override_start_direct(mach_port_t thread,
diff --git a/src/shims/atomic.h b/src/shims/atomic.h
index 60f2891..1cb0940 100644
--- a/src/shims/atomic.h
+++ b/src/shims/atomic.h
@@ -32,7 +32,7 @@
 #endif
 
 // FreeBSD only defines _Bool in C mode. In C++ mode _Bool is not being defined.
-#if defined(__cplusplus) && defined(__FreeBSD__)
+#if defined(__cplusplus) && (defined(__FreeBSD__) || defined(_WIN32))
 #define _Bool bool
 #endif
 #include <stdatomic.h>
diff --git a/src/shims/generic_win_stubs.c b/src/shims/generic_win_stubs.c
new file mode 100644
index 0000000..f6984a2
--- /dev/null
+++ b/src/shims/generic_win_stubs.c
@@ -0,0 +1,4 @@
+
+/*
+ * Stubbed out static data
+ */
diff --git a/src/shims/generic_win_stubs.h b/src/shims/generic_win_stubs.h
new file mode 100644
index 0000000..d7a6f21
--- /dev/null
+++ b/src/shims/generic_win_stubs.h
@@ -0,0 +1,50 @@
+
+#ifndef __DISPATCH__STUBS__INTERNAL
+#define __DISPATCH__STUBS__INTERNAL
+
+#include <stdint.h>
+
+#include <Windows.h>
+
+#include <io.h>
+#include <process.h>
+
+/*
+ * Stub out defines for some mach types and related macros
+ */
+
+typedef uint32_t mach_port_t;
+
+#define MACH_PORT_NULL (0)
+
+typedef uint32_t mach_msg_bits_t;
+typedef void *mach_msg_header_t;
+
+/*
+ * Stub out defines for other missing types
+ */
+
+// SIZE_T_MAX should not be hardcoded like this here.
+#ifndef SIZE_T_MAX
+#define SIZE_T_MAX (~(size_t)0)
+#endif
+
+typedef __typeof__(_Generic((__SIZE_TYPE__)0,                                  \
+			    unsigned long long int : (long long int)0,         \
+			    unsigned long int : (long int)0,                   \
+			    unsigned int : (int)0,                             \
+			    unsigned short : (short)0,                         \
+			    unsigned char : (signed char)0)) ssize_t;
+
+#define S_ISDIR(mode)  (((mode) & S_IFMT) == S_IFDIR)
+#define S_ISFIFO(mode) ((mode) & _S_IFIFO)
+#define S_ISREG(mode)  ((mode) & _S_IFREG)
+#define S_ISSOCK(mode) 0
+
+#define O_NONBLOCK 04000
+
+#define bzero(ptr,len) memset((ptr), 0, (len))
+#define snprintf _snprintf
+
+#endif
+
diff --git a/src/shims/hw_config.h b/src/shims/hw_config.h
index 485dad6..e788727 100644
--- a/src/shims/hw_config.h
+++ b/src/shims/hw_config.h
@@ -43,8 +43,6 @@
 #error "could not determine pointer size as a constant int"
 #endif // __SIZEOF_POINTER__
 
-#if !TARGET_OS_WIN32
-
 typedef enum {
 	_dispatch_hw_config_logical_cpus,
 	_dispatch_hw_config_physical_cpus,
@@ -115,6 +113,64 @@
 			return (uint32_t)sysconf(_SC_NPROCESSORS_ONLN);
 		}
 	}
+#elif defined(_WIN32)
+	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION slpiInfo = NULL;
+	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION slpiCurrent = NULL;
+	DWORD dwProcessorLogicalCount = 0;
+	DWORD dwProcessorPackageCount = 0;
+	DWORD dwProcessorCoreCount = 0;
+	DWORD dwSize = 0;
+
+	while (true) {
+		DWORD dwResult;
+
+		if (GetLogicalProcessorInformation(slpiInfo, &dwSize))
+			break;
+
+		dwResult = GetLastError();
+
+		if (slpiInfo)
+			free(slpiInfo);
+
+		if (dwResult == ERROR_INSUFFICIENT_BUFFER) {
+			slpiInfo = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(dwSize);
+			dispatch_assert(slpiInfo);
+		} else {
+			slpiInfo = NULL;
+			dwSize = 0;
+			break;
+		}
+	}
+
+	for (slpiCurrent = slpiInfo;
+	     dwSize >= sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
+	     slpiCurrent++, dwSize -= sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION)) {
+		switch (slpiCurrent->Relationship) {
+		case RelationProcessorCore:
+			++dwProcessorCoreCount;
+			dwProcessorLogicalCount += __popcnt64(slpiCurrent->ProcessorMask);
+			break;
+		case RelationProcessorPackage:
+			++dwProcessorPackageCount;
+			break;
+		case RelationNumaNode:
+		case RelationCache:
+		case RelationGroup:
+		case RelationAll:
+			break;
+		}
+	}
+
+	free(slpiInfo);
+
+	switch (c) {
+	case _dispatch_hw_config_logical_cpus:
+		return dwProcessorLogicalCount;
+	case _dispatch_hw_config_physical_cpus:
+		return dwProcessorPackageCount;
+	case _dispatch_hw_config_active_cpus:
+		return dwProcessorCoreCount;
+	}
 #else
 	const char *name = NULL;
 	int r;
@@ -160,31 +216,4 @@
 
 #endif // DISPATCH_HAVE_HW_CONFIG_COMMPAGE
 
-#else // TARGET_OS_WIN32
-
-static inline long
-_dispatch_count_bits(unsigned long value)
-{
-	long bits = 0;
-	while (value) {
-		bits += (value & 1);
-		value = value >> 1;
-	}
-	return bits;
-}
-
-static inline uint32_t
-_dispatch_get_ncpus(void)
-{
-	uint32_t val;
-	DWORD_PTR procmask, sysmask;
-	if (GetProcessAffinityMask(GetCurrentProcess(), &procmask, &sysmask)) {
-		val = _dispatch_count_bits(procmask);
-	} else {
-		val = 1;
-	}
-	return val;
-}
-#endif // TARGET_OS_WIN32
-
 #endif /* __DISPATCH_SHIMS_HW_CONFIG__ */
diff --git a/src/shims/lock.c b/src/shims/lock.c
index bc55450..27b0c10 100644
--- a/src/shims/lock.c
+++ b/src/shims/lock.c
@@ -51,6 +51,21 @@
 #endif // HAVE_UL_UNFAIR_LOCK
 #endif
 
+#if defined(_WIN32)
+#if !HAVE_UL_UNFAIR_LOCK
+DISPATCH_ALWAYS_INLINE
+static inline void
+_dispatch_thread_switch(dispatch_lock value, dispatch_lock_options_t flags,
+		uint32_t timeout)
+{
+	(void)value;
+	(void)flags;
+	(void)timeout;
+	SwitchToThread();
+}
+#endif
+#endif
+
 #pragma mark - semaphores
 
 #if USE_MACH_SEM
@@ -230,9 +245,7 @@
 	// aim for the best resolution we can accomplish
 	dispatch_once(&once, ^{
 		TIMECAPS tc;
-		MMRESULT res;
-		res = timeGetDevCaps(&tc, sizeof(tc));
-		if (res == MMSYSERR_NOERROR) {
+		if (timeGetDevCaps(&tc, sizeof(tc)) == MMSYSERR_NOERROR) {
 			best_resolution = min(max(tc.wPeriodMin, best_resolution),
 					tc.wPeriodMax);
 		}
@@ -302,7 +315,7 @@
 	nsec = _dispatch_timeout(timeout);
 	msec = (DWORD)(nsec / (uint64_t)1000000);
 	resolution = _push_timer_resolution(msec);
-	wait_result = WaitForSingleObject(dsema->dsema_handle, msec);
+	wait_result = WaitForSingleObject(sema, msec);
 	_pop_timer_resolution(resolution);
 	return wait_result == WAIT_TIMEOUT;
 }
@@ -440,6 +453,8 @@
 	_dispatch_ulock_wait((uint32_t *)address, value, 0, flags);
 #elif HAVE_FUTEX
 	_dispatch_futex_wait((uint32_t *)address, value, NULL, FUTEX_PRIVATE_FLAG);
+#elif defined(_WIN32)
+	WaitOnAddress(address, (PVOID)(uintptr_t)value, sizeof(value), INFINITE);
 #else
 	mach_msg_timeout_t timeout = 1;
 	while (os_atomic_load(address, relaxed) == value) {
@@ -456,6 +471,8 @@
 	_dispatch_ulock_wake((uint32_t *)address, ULF_WAKE_ALL);
 #elif HAVE_FUTEX
 	_dispatch_futex_wake((uint32_t *)address, INT_MAX, FUTEX_PRIVATE_FLAG);
+#elif defined(_WIN32)
+	WakeByAddressAll((uint32_t *)address);
 #else
 	(void)address;
 #endif
@@ -545,11 +562,11 @@
 _dispatch_unfair_lock_lock_slow(dispatch_unfair_lock_t dul,
 		dispatch_lock_options_t flags)
 {
-	dispatch_lock cur, value_self = _dispatch_lock_value_for_self();
+	dispatch_lock cur, self = _dispatch_lock_value_for_self();
 	uint32_t timeout = 1;
 
 	while (unlikely(!os_atomic_cmpxchgv(&dul->dul_lock,
-			DLOCK_OWNER_NULL, value_self, &cur, acquire))) {
+			DLOCK_OWNER_NULL, self, &cur, acquire))) {
 		if (unlikely(_dispatch_lock_is_locked_by(cur, self))) {
 			DISPATCH_CLIENT_CRASH(0, "trying to lock recursively");
 		}
diff --git a/src/shims/lock.h b/src/shims/lock.h
index 37a3ecf..4a9bd78 100644
--- a/src/shims/lock.h
+++ b/src/shims/lock.h
@@ -79,6 +79,27 @@
 	return lock_value & DLOCK_OWNER_MASK;
 }
 
+#elif defined(_WIN32)
+
+#include <Windows.h>
+
+typedef DWORD dispatch_tid;
+typedef uint32_t dispatch_lock;
+
+#define DLOCK_OWNER_NULL			((dispatch_tid)0)
+#define DLOCK_OWNER_MASK			((dispatch_lock)0xfffffffc)
+#define DLOCK_WAITERS_BIT			((dispatch_lock)0x00000001)
+#define DLOCK_FAILED_TRYLOCK_BIT		((dispatch_lock)0x00000002)
+
+#define _dispatch_tid_self()		((dispatch_tid)(_dispatch_get_tsd_base()->tid << 2))
+
+DISPATCH_ALWAYS_INLINE
+static inline dispatch_tid
+_dispatch_lock_owner(dispatch_lock lock_value)
+{
+	return lock_value & DLOCK_OWNER_MASK;
+}
+
 #else
 #  error define _dispatch_lock encoding scheme for your platform here
 #endif
diff --git a/src/shims/time.h b/src/shims/time.h
index 6cc5a16..2e27605 100644
--- a/src/shims/time.h
+++ b/src/shims/time.h
@@ -31,7 +31,7 @@
 #error "Please #include <dispatch/dispatch.h> instead of this file directly."
 #endif
 
-#if TARGET_OS_WIN32
+#if defined(_WIN32)
 static inline unsigned int
 sleep(unsigned int seconds)
 {
@@ -106,7 +106,7 @@
 	struct timespec ts;
 	dispatch_assume_zero(clock_gettime(CLOCK_REALTIME, &ts));
 	return _dispatch_timespec_to_nano(ts);
-#elif TARGET_OS_WIN32
+#elif defined(_WIN32)
 	// FILETIME is 100-nanosecond intervals since January 1, 1601 (UTC).
 	FILETIME ft;
 	ULARGE_INTEGER li;
@@ -147,9 +147,12 @@
 	struct timespec ts;
 	dispatch_assume_zero(clock_gettime(CLOCK_MONOTONIC, &ts));
 	return _dispatch_timespec_to_nano(ts);
-#elif TARGET_OS_WIN32
-	LARGE_INTEGER now;
-	return QueryPerformanceCounter(&now) ? now.QuadPart : 0;
+#elif defined(_WIN32)
+	ULONGLONG ullTime;
+	if (!QueryUnbiasedInterruptTime(&ullTime))
+		return 0;
+
+	return ullTime * 100ull;
 #else
 #error platform needs to implement _dispatch_absolute_time()
 #endif
diff --git a/src/shims/tsd.h b/src/shims/tsd.h
index c119e4f..7271f45 100644
--- a/src/shims/tsd.h
+++ b/src/shims/tsd.h
@@ -69,6 +69,7 @@
 #define __TSD_MACH_SPECIAL_REPLY 8
 #endif
 
+
 static const unsigned long dispatch_priority_key	= __TSD_THREAD_QOS_CLASS;
 static const unsigned long dispatch_r2k_key			= __TSD_RETURN_TO_KERNEL;
 
@@ -99,15 +100,44 @@
 }
 #elif DISPATCH_USE_THREAD_LOCAL_STORAGE
 
+#if defined(_WIN32)
+#define DISPATCH_TSD_DTOR_CC __stdcall
+#else
+#define DISPATCH_TSD_DTOR_CC
+#endif
+
+#if defined(_WIN32)
+
 DISPATCH_TSD_INLINE
 static inline void
-_dispatch_thread_key_create(pthread_key_t *k, void (*d)(void *))
+_dispatch_thread_key_create(DWORD *k, void (DISPATCH_TSD_DTOR_CC *d)(void *))
+{
+	dispatch_assert_zero((*k = FlsAlloc(d)));
+}
+
+extern __declspec(thread) struct dispatch_tsd __dispatch_tsd;
+extern DWORD __dispatch_tsd_key;
+
+#else
+
+DISPATCH_TSD_INLINE
+static inline void
+_dispatch_thread_key_create(pthread_key_t *k, void (DISPATCH_TSD_DTOR_CC *d)(void *))
 {
 	dispatch_assert_zero(pthread_key_create(k, d));
 }
 
+extern __thread struct dispatch_tsd __dispatch_tsd;
+extern pthread_key_t __dispatch_tsd_key;
+
+#endif
+
 struct dispatch_tsd {
+#if defined(_WIN32)
+	DWORD tid;
+#else
 	pid_t tid;
+#endif
 	void *dispatch_queue_key;
 	void *dispatch_frame_key;
 	void *dispatch_cache_key;
@@ -126,8 +156,6 @@
 	void *dispatch_deferred_items_key;
 };
 
-extern __thread struct dispatch_tsd __dispatch_tsd;
-extern pthread_key_t __dispatch_tsd_key;
 extern void libdispatch_tsd_init(void);
 extern void _libdispatch_tsd_cleanup(void *ctx);
 
@@ -285,7 +313,7 @@
 }
 #endif
 
-#if TARGET_OS_WIN32
+#if defined(_WIN32)
 #define _dispatch_thread_self() ((uintptr_t)GetCurrentThreadId())
 #else
 #if DISPATCH_USE_DIRECT_TSD
@@ -296,7 +324,7 @@
 #endif
 #endif
 
-#if TARGET_OS_WIN32
+#if defined(_WIN32)
 #define _dispatch_thread_port() ((mach_port_t)0)
 #elif !DISPATCH_USE_THREAD_LOCAL_STORAGE
 #if DISPATCH_USE_DIRECT_TSD
diff --git a/src/shims/yield.h b/src/shims/yield.h
index 99864af..2373e50 100644
--- a/src/shims/yield.h
+++ b/src/shims/yield.h
@@ -83,6 +83,11 @@
 #define _dispatch_contention_spins() \
 		((DISPATCH_CONTENTION_SPINS_MIN) + ((DISPATCH_CONTENTION_SPINS_MAX) - \
 		(DISPATCH_CONTENTION_SPINS_MIN)) / 2)
+#elif defined(_WIN32)
+#define _dispatch_contention_spins() ({                                        \
+		unsigned int _value;                                           \
+		rand_s(&_value);                                               \
+		(_value & DISPATCH_CONTENTION_SPINS_MAX) | DISPATCH_CONTENTION_SPINS_MIN; })
 #else
 // Use randomness to prevent threads from resonating at the same
 // frequency and permanently contending. All threads sharing the same
@@ -151,7 +156,24 @@
 		SWITCH_OPTION_WAIT, (((u)-1)/1000)+1)
 #endif
 #else
+#if defined(_WIN32)
+DISPATCH_INLINE void
+_dispatch_contention_usleep(uint64_t useconds) {
+	static BOOL bQPFExecuted = FALSE;
+	static LARGE_INTEGER liFreq;
+	LARGE_INTEGER liStart, liNow;
+
+	if (!bQPFExecuted)
+		bQPFExecuted = QueryPerformanceFrequency(&liFreq);
+
+	QueryPerformanceCounter(&liStart);
+	do {
+		QueryPerformanceCounter(&liNow);
+	} while ((liNow.QuadPart - liStart.QuadPart) / (float)liFreq.QuadPart * 1000 * 1000 < useconds);
+}
+#else
 #define _dispatch_contention_usleep(u) usleep((u))
+#endif
 #endif // HAVE_MACH
 
 #endif // __DISPATCH_SHIMS_YIELD__
diff --git a/src/transform.c b/src/transform.c
index 44a1271..7f2c556 100644
--- a/src/transform.c
+++ b/src/transform.c
@@ -30,6 +30,9 @@
 #include <sys/endian.h>
 #define OSLittleEndian _LITTLE_ENDIAN
 #define OSBigEndian _BIG_ENDIAN
+#elif defined(_WIN32)
+#define OSLittleEndian 1234
+#define OSBigEndian 4321
 #endif
 
 #if defined(__linux__) || defined(__FreeBSD__)
@@ -37,6 +40,11 @@
 #define OSSwapBigToHostInt16 be16toh
 #define OSSwapHostToLittleInt16 htole16
 #define OSSwapHostToBigInt16 htobe16
+#elif defined(_WIN32)
+#define OSSwapLittleToHostInt16
+#define OSSwapBigToHostInt16 ntohs
+#define OSSwapHostToLittleInt16
+#define OSSwapHostToBigInt16 htons
 #endif
 
 #if defined(__LITTLE_ENDIAN__)
diff --git a/src/voucher.c b/src/voucher.c
index 458e2f0..e32bd29 100644
--- a/src/voucher.c
+++ b/src/voucher.c
@@ -157,7 +157,7 @@
 	return _voucher_release(voucher);
 }
 
-void
+void DISPATCH_TSD_DTOR_CC
 _voucher_thread_cleanup(void *voucher)
 {
 	// when a thread exits and has a voucher left, the kernel
@@ -1515,7 +1515,7 @@
 	(void)v;
 }
 
-void
+void DISPATCH_TSD_DTOR_CC
 _voucher_thread_cleanup(void *voucher)
 {
 	(void)voucher;
diff --git a/src/voucher_internal.h b/src/voucher_internal.h
index 772c8c4..9f5d72b 100644
--- a/src/voucher_internal.h
+++ b/src/voucher_internal.h
@@ -97,7 +97,7 @@
 void _voucher_xref_dispose(voucher_t voucher);
 void _voucher_dispose(voucher_t voucher);
 size_t _voucher_debug(voucher_t v, char* buf, size_t bufsiz);
-void _voucher_thread_cleanup(void *voucher);
+void DISPATCH_TSD_DTOR_CC _voucher_thread_cleanup(void *voucher);
 mach_voucher_t _voucher_get_mach_voucher(voucher_t voucher);
 voucher_t _voucher_create_without_importance(voucher_t voucher);
 voucher_t _voucher_create_accounting_voucher(voucher_t voucher);