use reserved spelling for inline assembly

Use the reserved spelling for `asm` as it is a non-standard compiler
extension.

Rather than trying to enumerate the user label prefix on the various
targets, rely on the compiler.  The GNU compilers define
`__USER_LABEL_PREFIX__` to "_" as appropriate.  In the case that you are
using a non-GNU compiler, it is possible to specify the macro according
to your target.  This simplifies and unifies the code.
diff --git a/os/object_private.h b/os/object_private.h
index 215c3d1..3b46322 100644
--- a/os/object_private.h
+++ b/os/object_private.h
@@ -86,7 +86,7 @@
 #endif
 #define OS_OBJECT_OBJC_CLASS_DECL(name) \
 		extern void *OS_OBJECT_CLASS_SYMBOL(name) \
-				asm(OS_OBJC_CLASS_RAW_SYMBOL_NAME(OS_OBJECT_CLASS(name)))
+				__asm__(OS_OBJC_CLASS_RAW_SYMBOL_NAME(OS_OBJECT_CLASS(name)))
 #else
 #define OS_OBJECT_HAVE_OBJC1 0
 #define OS_OBJECT_HAVE_OBJC2 0
diff --git a/src/block.cpp b/src/block.cpp
index 6936ada..8f8113a 100644
--- a/src/block.cpp
+++ b/src/block.cpp
@@ -109,11 +109,7 @@
 // The compiler hides the name of the function it generates, and changes it if
 // we try to reference it directly, but the linker still sees it.
 extern void DISPATCH_BLOCK_SPECIAL_INVOKE(void *)
-#if defined(__linux__) || defined(__FreeBSD__)
-		asm("___dispatch_block_create_block_invoke");
-#else
-		asm("____dispatch_block_create_block_invoke");
-#endif
+		__asm__(OS_STRINGIFY(__USER_LABEL_PREFIX__) "___dispatch_block_create_block_invoke");
 void (*_dispatch_block_special_invoke)(void*) = DISPATCH_BLOCK_SPECIAL_INVOKE;
 }
 
diff --git a/src/init.c b/src/init.c
index 26612c0..4ef733d 100644
--- a/src/init.c
+++ b/src/init.c
@@ -897,7 +897,7 @@
 _dispatch_temporary_resource_shortage(void)
 {
 	sleep(1);
-	asm("");  // prevent tailcall
+	__asm__ __volatile__("");  // prevent tailcall
 }
 
 void *
diff --git a/src/introspection.c b/src/introspection.c
index 8692a8b..6e57979 100644
--- a/src/introspection.c
+++ b/src/introspection.c
@@ -447,7 +447,7 @@
 
 #define DISPATCH_INTROSPECTION_INTERPOSABLE_HOOK(h) \
 		DISPATCH_EXPORT void _dispatch_introspection_hook_##h(void) \
-		asm("_dispatch_introspection_hook_" #h); \
+		__asm__("_dispatch_introspection_hook_" #h); \
 		void _dispatch_introspection_hook_##h(void) {}
 
 #define DISPATCH_INTROSPECTION_INTERPOSABLE_HOOK_CALLOUT(h, ...)\
diff --git a/src/object_internal.h b/src/object_internal.h
index 4504f65..ca3ac0a 100644
--- a/src/object_internal.h
+++ b/src/object_internal.h
@@ -50,7 +50,7 @@
 #if USE_OBJC
 #define DISPATCH_OBJC_CLASS_DECL(name) \
 		extern void *DISPATCH_CLASS_SYMBOL(name) \
-				asm(DISPATCH_CLASS_RAW_SYMBOL_NAME(name))
+				__asm__(DISPATCH_CLASS_RAW_SYMBOL_NAME(name))
 #endif
 
 // define a new proper class
@@ -65,7 +65,7 @@
 		}; \
 		OS_OBJECT_EXTRA_VTABLE_DECL(name, name) \
 		extern const struct name##_vtable_s OS_OBJECT_CLASS_SYMBOL(name) \
-				asm(OS_OBJC_CLASS_RAW_SYMBOL_NAME(OS_OBJECT_CLASS(name)))
+				__asm__(OS_OBJC_CLASS_RAW_SYMBOL_NAME(OS_OBJECT_CLASS(name)))
 
 #if OS_OBJECT_SWIFT3
 #define OS_OBJECT_INTERNAL_CLASS_DECL(name, super, ...) \
@@ -101,7 +101,7 @@
 		struct name##_s; \
 		OS_OBJECT_EXTRA_VTABLE_DECL(name, super) \
 		extern const struct super##_vtable_s OS_OBJECT_CLASS_SYMBOL(name) \
-				asm(OS_OBJC_CLASS_RAW_SYMBOL_NAME(OS_OBJECT_CLASS(name)))
+				__asm__(OS_OBJC_CLASS_RAW_SYMBOL_NAME(OS_OBJECT_CLASS(name)))
 
 #define DISPATCH_SUBCLASS_DECL(name, super) \
 		OS_OBJECT_SUBCLASS_DECL(dispatch_##name, super)
diff --git a/src/source.c b/src/source.c
index 3f9caee..730e441 100644
--- a/src/source.c
+++ b/src/source.c
@@ -1220,7 +1220,7 @@
 	if (_dispatch_trace_timer_configure_enabled() ||
 			_dispatch_source_timer_telemetry_enabled()) {
 		_dispatch_source_timer_telemetry_slow(ds, clock, values);
-		asm(""); // prevent tailcall
+		__asm__ __volatile__ (""); // prevent tailcall
 	}
 }
 
diff --git a/tests/Foundation/bench.mm b/tests/Foundation/bench.mm
index c516366..0000748 100644
--- a/tests/Foundation/bench.mm
+++ b/tests/Foundation/bench.mm
@@ -150,7 +150,7 @@
 {
 	uint32_t lo, hi;
 
-	asm volatile("rdtsc" : "=a" (lo), "=d" (hi));
+	__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
 
 	return (uint64_t)hi << 32 | lo;
 }
@@ -245,7 +245,7 @@
 
 	s = mach_absolute_time();
 	for (i = cnt; i; i--) {
-		asm volatile("");
+		__asm__ __volatile__ ("");
 	}
 	print_result(s, "Empty loop:");
 
@@ -374,46 +374,46 @@
 
 	s = mach_absolute_time();
 	for (i = cnt; i; i--) {
-		asm("nop");
+		__asm__ __volatile__ ("nop");
 	}
 	print_result(s, "raw 'nop':");
 
 #if defined(__i386__) || defined(__x86_64__)
 	s = mach_absolute_time();
 	for (i = cnt; i; i--) {
-		asm("pause");
+		__asm__ __volatile__ ("pause");
 	}
 	print_result(s, "raw 'pause':");
 
 	s = mach_absolute_time();
 	for (i = cnt; i; i--) {
-		asm("mfence");
+		__asm__ __volatile__ ("mfence");
 	}
 	print_result(s, "Atomic mfence:");
 
 	s = mach_absolute_time();
 	for (i = cnt; i; i--) {
-		asm("lfence");
+		__asm__ __volatile__ ("lfence");
 	}
 	print_result(s, "Atomic lfence:");
 
 	s = mach_absolute_time();
 	for (i = cnt; i; i--) {
-		asm("sfence");
+		__asm__ __volatile__ ("sfence");
 	}
 	print_result(s, "Atomic sfence:");
 
 	s = mach_absolute_time();
 	for (i = cnt; i; i--) {
 		uint64_t sidt_rval;
-		asm("sidt %0" : "=m" (sidt_rval));
+		__asm__ __volatile__ ("sidt %0" : "=m" (sidt_rval));
 	}
 	print_result(s, "'sidt' instruction:");
 
 	s = mach_absolute_time();
 	for (i = cnt; i; i--) {
 		long prev;
-		asm volatile("cmpxchg %1,%2"
+		__asm__ __volatile__ ("cmpxchg %1,%2"
 				: "=a" (prev) : "r" (0l), "m" (global), "0" (1l));
 	}
 	print_result(s, "'cmpxchg' without the 'lock' prefix:");
@@ -421,7 +421,7 @@
 	s = mach_absolute_time();
 	for (i = cnt; i; i--) {
 		global = 0;
-		asm volatile("mfence" ::: "memory");
+		__asm__ __volatile__ ("mfence" ::: "memory");
 	}
 	print_result(s, "Store + mfence:");
 
@@ -429,14 +429,14 @@
 	for (i = cnt; i; i--) {
 		unsigned long _clbr;
 #ifdef __LP64__
-		asm volatile("cpuid" : "=a" (_clbr)
+		__asm__ __volatile__ ("cpuid" : "=a" (_clbr)
 				: "0" (0) : "rbx", "rcx", "rdx", "cc", "memory");
 #else
 #ifdef __llvm__
-		asm volatile("cpuid" : "=a" (_clbr) : "0" (0)
+		__asm__ __volatile__ ("cpuid" : "=a" (_clbr) : "0" (0)
 				: "ebx", "ecx", "edx", "cc", "memory" );
 #else // gcc does not allow inline i386 asm to clobber ebx
-		asm volatile("pushl %%ebx\n\tcpuid\n\tpopl %%ebx"
+		__asm__ __volatile__ ("pushl %%ebx\n\tcpuid\n\tpopl %%ebx"
 				: "=a" (_clbr) : "0" (0) : "ecx", "edx", "cc", "memory" );
 #endif
 #endif
@@ -454,7 +454,7 @@
 #ifdef _ARM_ARCH_7
 	s = mach_absolute_time();
 	for (i = cnt; i; i--) {
-		asm("yield");
+		__asm__ __volatile__ ("yield");
 	}
 	print_result(s, "raw 'yield':");
 #endif
@@ -462,9 +462,9 @@
 	s = mach_absolute_time();
 	for (i = cnt; i; i--) {
 #ifdef _ARM_ARCH_7
-		asm volatile("dmb ish" : : : "memory");
+		__asm__ __volatile__ ("dmb ish" : : : "memory");
 #else
-		asm volatile("mcr	p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory");
+		__asm__ __volatile__ ("mcr	p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory");
 #endif
 	}
 	print_result(s, "'dmb ish' instruction:");
@@ -472,7 +472,7 @@
 #ifdef _ARM_ARCH_7
 	s = mach_absolute_time();
 	for (i = cnt; i; i--) {
-		asm volatile("dmb ishst" : : : "memory");
+		__asm__ __volatile__ ("dmb ishst" : : : "memory");
 	}
 	print_result(s, "'dmb ishst' instruction:");
 #endif
@@ -480,9 +480,9 @@
 #ifdef _ARM_ARCH_7
 	s = mach_absolute_time();
 	for (i = cnt; i; i--) {
-		asm volatile("str	%[_r], [%[_p], %[_o]]" :
+		__asm__ __volatile__ ("str	%[_r], [%[_p], %[_o]]" :
 				: [_p] "p" (&global), [_o] "M" (0), [_r] "r" (0) : "memory");
-		asm volatile("dmb ishst" : : : "memory");
+		__asm__ __volatile__ ("dmb ishst" : : : "memory");
 	}
 	print_result(s, "'str + dmb ishst' instructions:");
 #endif
@@ -493,10 +493,10 @@
 		uintptr_t prev;
 		uint32_t t;
 		do {
-		asm volatile("ldrex	%[_r], [%[_p], %[_o]]"
+		__asm__ __volatile__ ("ldrex	%[_r], [%[_p], %[_o]]"
 				: [_r] "=&r" (prev) \
 				: [_p] "p" (&global), [_o] "M" (0) : "memory");
-		asm volatile("strex	%[_t], %[_r], [%[_p], %[_o]]"
+		__asm__ __volatile__ ("strex	%[_t], %[_r], [%[_p], %[_o]]"
 				: [_t] "=&r" (t) \
 				: [_p] "p" (&global), [_o] "M" (0), [_r] "r" (0) : "memory");
 		} while (t);
@@ -507,9 +507,9 @@
 	s = mach_absolute_time();
 	for (i = cnt; i; i--) {
 #ifdef _ARM_ARCH_7
-		asm volatile("dsb ish" : : : "memory");
+		__asm__ __volatile__ ("dsb ish" : : : "memory");
 #else
-		asm volatile("mcr	p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory");
+		__asm__ __volatile__ ("mcr	p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory");
 #endif
 	}
 	print_result(s, "'dsb ish' instruction:");
@@ -517,16 +517,16 @@
 #if BENCH_SLOW
 	s = mach_absolute_time();
 	for (i = cnt; i; i--) {
-		register long _swtch_pri asm("ip") = -59;
-		asm volatile("svc	0x80" : : "r" (_swtch_pri) : "r0", "memory");
+		register long _swtch_pri __asm__("ip") = -59;
+		__asm__ __volatile__ ("svc	0x80" : : "r" (_swtch_pri) : "r0", "memory");
 	}
 	print_result(s, "swtch_pri syscall:");
 
 	s = mach_absolute_time();
 	for (i = cnt; i; i--) {
-		register long _r0 asm("r0") = 0, _r1 asm("r1") = 1, _r2 asm("r2") = 1;
-		register long _thread_switch asm("ip") = -61;
-		asm volatile("svc	0x80" : "+r" (_r0)
+		register long _r0 __asm__("r0") = 0, _r1 __asm__("r1") = 1, _r2 __asm__("r2") = 1;
+		register long _thread_switch __asm__("ip") = -61;
+		__asm__ __volatile__ ("svc	0x80" : "+r" (_r0)
 				: "r" (_r1), "r" (_r2), "r" (_thread_switch): "memory");
 	}
 	print_result(s, "thread_switch syscall:");
@@ -636,9 +636,9 @@
 		while (!__sync_bool_compare_and_swap(&global, 0, 1)) {
 			do {
 #if defined(__i386__) || defined(__x86_64__)
-				asm("pause");
+				__asm__ __volatile__ ("pause");
 #elif defined(__arm__) && defined _ARM_ARCH_7
-				asm("yield");
+				__asm__ __volatile__ ("yield");
 #endif
 			} while (global);
 		}