Merge pull request #279 from apple/mad/greedy-signalfd

Fix improper double-fire of signal sources on Linux
diff --git a/dispatch/once.h b/dispatch/once.h
index 68acfe8..37a4950 100644
--- a/dispatch/once.h
+++ b/dispatch/once.h
@@ -40,6 +40,14 @@
 DISPATCH_SWIFT3_UNAVAILABLE("Use lazily initialized globals instead")
 typedef long dispatch_once_t;
 
+#if defined(__x86_64__) || defined(__i386__) || defined(__s390x__)
+#define DISPATCH_ONCE_INLINE_FASTPATH 1
+#elif defined(__APPLE__)
+#define DISPATCH_ONCE_INLINE_FASTPATH 1
+#else
+#define DISPATCH_ONCE_INLINE_FASTPATH 0
+#endif
+
 /*!
  * @function dispatch_once
  *
@@ -65,6 +73,7 @@
 dispatch_once(dispatch_once_t *predicate,
 		DISPATCH_NOESCAPE dispatch_block_t block);
 
+#if DISPATCH_ONCE_INLINE_FASTPATH
 DISPATCH_INLINE DISPATCH_ALWAYS_INLINE DISPATCH_NONNULL_ALL DISPATCH_NOTHROW
 DISPATCH_SWIFT3_UNAVAILABLE("Use lazily initialized globals instead")
 void
@@ -81,6 +90,7 @@
 #undef dispatch_once
 #define dispatch_once _dispatch_once
 #endif
+#endif // DISPATCH_ONCE_INLINE_FASTPATH
 
 API_AVAILABLE(macos(10.6), ios(4.0))
 DISPATCH_EXPORT DISPATCH_NONNULL1 DISPATCH_NONNULL3 DISPATCH_NOTHROW
@@ -89,6 +99,7 @@
 dispatch_once_f(dispatch_once_t *predicate, void *_Nullable context,
 		dispatch_function_t function);
 
+#if DISPATCH_ONCE_INLINE_FASTPATH
 DISPATCH_INLINE DISPATCH_ALWAYS_INLINE DISPATCH_NONNULL1 DISPATCH_NONNULL3
 DISPATCH_NOTHROW
 DISPATCH_SWIFT3_UNAVAILABLE("Use lazily initialized globals instead")
@@ -105,6 +116,7 @@
 }
 #undef dispatch_once_f
 #define dispatch_once_f _dispatch_once_f
+#endif // DISPATCH_ONCE_INLINE_FASTPATH
 
 __END_DECLS
 
diff --git a/src/event/event_epoll.c b/src/event/event_epoll.c
index dfdd00c..c86421b 100644
--- a/src/event/event_epoll.c
+++ b/src/event/event_epoll.c
@@ -51,7 +51,8 @@
 	int     dmn_ident;
 	uint32_t dmn_events;
 	int16_t dmn_filter;
-	bool    dmn_socket_listener;
+	bool    dmn_skip_outq_ioctl;
+	bool    dmn_skip_inq_ioctl;
 } *dispatch_muxnote_t;
 
 typedef struct dispatch_epoll_timeout_s {
@@ -143,7 +144,7 @@
 	struct stat sb;
 	int fd = du._du->du_ident;
 	int16_t filter = du._du->du_filter;
-	bool socket_listener = false;
+	bool skip_outq_ioctl = false, skip_inq_ioctl = false;
 	sigset_t sigmask;
 
 	switch (filter) {
@@ -173,11 +174,15 @@
 			if (fd < 0) {
 				return NULL;
 			}
+			// Linux doesn't support output queue size ioctls for regular files
+			skip_outq_ioctl = true;
 		} else if (S_ISSOCK(sb.st_mode)) {
 			socklen_t vlen = sizeof(int);
 			int v;
+			// Linux doesn't support saying how many clients are ready to be
+			// accept()ed for sockets
 			if (getsockopt(fd, SOL_SOCKET, SO_ACCEPTCONN, &v, &vlen) == 0) {
-				socket_listener = (bool)v;
+				skip_inq_ioctl = (bool)v;
 			}
 		}
 		break;
@@ -193,7 +198,8 @@
 	dmn->dmn_ident = du._du->du_ident;
 	dmn->dmn_filter = filter;
 	dmn->dmn_events = events;
-	dmn->dmn_socket_listener = socket_listener;
+	dmn->dmn_skip_outq_ioctl = skip_outq_ioctl;
+	dmn->dmn_skip_inq_ioctl = skip_inq_ioctl;
 	return dmn;
 }
 
@@ -316,6 +322,7 @@
 		if (events == dmn->dmn_events) {
 			// nothing to do
 		} else if (events & (EPOLLIN | EPOLLOUT)) {
+			dmn->dmn_events = events;
 			_dispatch_epoll_update(dmn, EPOLL_CTL_MOD);
 		} else {
 			epoll_ctl(_dispatch_epfd, EPOLL_CTL_DEL, dmn->dmn_fd, NULL);
@@ -491,16 +498,28 @@
 static uintptr_t
 _dispatch_get_buffer_size(dispatch_muxnote_t dmn, bool writer)
 {
-	unsigned long op = writer ? SIOCOUTQ : SIOCINQ;
 	int n;
 
-	if (!writer && dmn->dmn_socket_listener) {
-		// Linux doesn't support saying how many clients are ready to be
-		// accept()ed
+	if (writer ? dmn->dmn_skip_outq_ioctl : dmn->dmn_skip_inq_ioctl) {
 		return 1;
 	}
 
-	if (dispatch_assume_zero(ioctl(dmn->dmn_ident, op, &n))) {
+	if (ioctl(dmn->dmn_ident, writer ? SIOCOUTQ : SIOCINQ, &n) != 0) {
+		switch (errno) {
+		case EINVAL:
+		case ENOTTY:
+			// this file descriptor actually doesn't support the buffer
+			// size ioctl, remember that for next time to avoid the syscall.
+			break;
+		default:
+			dispatch_assume_zero(errno);
+			break;
+		}
+		if (writer) {
+			dmn->dmn_skip_outq_ioctl = true;
+		} else {
+			dmn->dmn_skip_inq_ioctl = true;
+		}
 		return 1;
 	}
 	return (uintptr_t)n;
diff --git a/src/once.c b/src/once.c
index 75d7a39..c01538c 100644
--- a/src/once.c
+++ b/src/once.c
@@ -40,9 +40,15 @@
 }
 #endif
 
-DISPATCH_NOINLINE
-void
-dispatch_once_f(dispatch_once_t *val, void *ctxt, dispatch_function_t func)
+#if DISPATCH_ONCE_INLINE_FASTPATH
+#define DISPATCH_ONCE_SLOW_INLINE inline DISPATCH_ALWAYS_INLINE
+#else
+#define DISPATCH_ONCE_SLOW_INLINE DISPATCH_NOINLINE
+#endif // DISPATCH_ONCE_INLINE_FASTPATH
+
+DISPATCH_ONCE_SLOW_INLINE
+static void
+dispatch_once_f_slow(dispatch_once_t *val, void *ctxt, dispatch_function_t func)
 {
 #if DISPATCH_GATE_USE_FOR_DISPATCH_ONCE
 	dispatch_once_gate_t l = (dispatch_once_gate_t)val;
@@ -95,3 +101,15 @@
 	}
 #endif
 }
+
+DISPATCH_NOINLINE
+void
+dispatch_once_f(dispatch_once_t *val, void *ctxt, dispatch_function_t func)
+{
+#if !DISPATCH_ONCE_INLINE_FASTPATH
+	if (likely(os_atomic_load(val, acquire) == DLOCK_ONCE_DONE)) {
+		return;
+	}
+#endif // !DISPATCH_ONCE_INLINE_FASTPATH
+	return dispatch_once_f_slow(val, ctxt, func);
+}
diff --git a/src/shims/lock.h b/src/shims/lock.h
index 99c5563..0c089aa 100644
--- a/src/shims/lock.h
+++ b/src/shims/lock.h
@@ -59,9 +59,6 @@
 #elif defined(__linux__)
 
 #include <linux/futex.h>
-#if !defined(__x86_64__) && !defined(__i386__) && !defined(__s390x__)
-#include <linux/membarrier.h>
-#endif
 #include <unistd.h>
 #include <sys/syscall.h>   /* For SYS_xxx definitions */
 
@@ -473,28 +470,7 @@
 static inline dispatch_once_t
 _dispatch_once_xchg_done(dispatch_once_t *pred)
 {
-#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__)
-	// On Intel, any load is a load-acquire, so we don't need to be fancy
-	// same for s390x
 	return os_atomic_xchg(pred, DLOCK_ONCE_DONE, release);
-#elif defined(__linux__)
-	if (unlikely(syscall(__NR_membarrier, MEMBARRIER_CMD_SHARED, 0) < 0)) {
-		/*
-		 * sys_membarrier not supported
-		 *
-		 * Ideally we would call DISPATCH_INTERNAL_CRASH() here, but
-		 * due to ordering constraints in internal.h required by Darwin
-		 * the macro is undefined when this header is included.
-		 * Instead, open-code what would be a call to
-		 * _dispatch_hardware_crash() inside DISPATCH_INTERNAL_CRASH().
-		 */
-		__asm__("");
-		__builtin_trap();
-	}
-	return os_atomic_xchg(pred, DLOCK_ONCE_DONE, relaxed);
-#else
-#  error dispatch_once algorithm not available for this port
-#endif
 }
 
 DISPATCH_ALWAYS_INLINE
diff --git a/tests/dispatch_timer.c b/tests/dispatch_timer.c
index bebbc0d..e480ec2 100644
--- a/tests/dispatch_timer.c
+++ b/tests/dispatch_timer.c
@@ -86,7 +86,8 @@
 		fprintf(stderr, "%d\n", ++i);
 		if (i >= stop_at) {
 			test_long("i", i, stop_at);
-			dispatch_source_set_timer(s, dispatch_time(DISPATCH_TIME_NOW, 0), 0, 0);
+			dispatch_source_set_timer(s, dispatch_time(DISPATCH_TIME_NOW, 0),
+					DISPATCH_TIME_FOREVER, 0);
 			dispatch_source_cancel(s);
 		}
 	});