[fdio] do not accept sockets with a full fd table

If the fd table is full, pulling sockets from the control channel results in
consuming the incoming connection, but being unable to actually service it -
we then immediately close (which then ends up in linger). Other processes
would then not be able to pull these sockets (in a load balance scenario),
and netstack also potentially loses the ability to apply policy from having a
full listen queue. Instead, try to allocate an fd up front, and only if found
accept the socket from the control channel. This is more conformant with
common accept behaviors.

Test: install a package with 5500 blobs with a package server routed via a remote port forward over ssh. Observe successful install rather than failure.
Bug: PKG-371 #comment fdio: do not accept sockets with a full fd table
Change-Id: Idb5513ada590b0d8da2f5cd68b4d734765e438d4
diff --git a/system/ulib/fdio/bsdsocket.c b/system/ulib/fdio/bsdsocket.c
index 29dc44a..ebb0200 100644
--- a/system/ulib/fdio/bsdsocket.c
+++ b/system/ulib/fdio/bsdsocket.c
@@ -188,19 +188,35 @@
         return ERROR(ZX_ERR_BAD_STATE);
     }
 
+    // An |fd| must be reserved before we accept the socket, otherwise the
+    // application may be pulling streams from the listen socket that it can not
+    // service, which is unusual behavior that breaks load balancing type cases
+    // and causes excessive resource pressure under high load. Instead, we take
+    // a lock on the fd table, find a free fd, and hold it until we get the
+    // stream or discover an error.
+    mtx_lock(&fdio_lock);
+    int fd2 = fdio_find_free_fd(0);
+    if (fd2 < 0) {
+        mtx_unlock(&fdio_lock);
+        return -1;
+    }
+
     size_t actual = 0u;
     zxs_socket_t accepted;
     memset(&accepted, 0, sizeof(accepted));
     zx_status_t status = zxs_accept(socket, addr, len ? *len : 0u, &actual, &accepted);
     fdio_release(io);
     if (status == ZX_ERR_SHOULD_WAIT) {
+        mtx_unlock(&fdio_lock);
         return ERRNO(EWOULDBLOCK);
     } else if (status != ZX_OK) {
+        mtx_unlock(&fdio_lock);
         return ERROR(status);
     }
 
     fdio_t* io2 = NULL;
     if ((io2 = fdio_socket_create_stream(accepted.socket, IOFLAG_SOCKET_CONNECTED)) == NULL) {
+        mtx_unlock(&fdio_lock);
         return ERROR(ZX_ERR_NO_RESOURCES);
     }
 
@@ -212,12 +228,8 @@
         *len = actual;
     }
 
-    int fd2;
-    if ((fd2 = fdio_bind_to_fd(io2, -1, 0)) < 0) {
-        io2->ops->close(io2);
-        fdio_release(io2);
-        return ERRNO(EMFILE);
-    }
+    fdio_allocate_fd(fd2, io2);
+    mtx_unlock(&fdio_lock);
     return fd2;
 }
 
diff --git a/system/ulib/fdio/private.h b/system/ulib/fdio/private.h
index 5fb7a48..2ff6782 100644
--- a/system/ulib/fdio/private.h
+++ b/system/ulib/fdio/private.h
@@ -7,6 +7,7 @@
 #include <fuchsia/io/c/fidl.h>
 #include <lib/fdio/limits.h>
 #include <lib/fdio/vfs.h>
+#include <errno.h>
 #include <stdarg.h>
 #include <stdatomic.h>
 #include <stdbool.h>
@@ -266,6 +267,33 @@
 
 void fdio_set_debug_level(unsigned level);
 
+// Find a free fd larger than |starting_fd| in the fd table and return it, or
+// -1. Must be called while |fdio_lock| is held. If starting_fd is less than 0,
+// errno is set to EINVAL. If no free fd is found, errno is set to EMFILE.
+static inline int fdio_find_free_fd(int starting_fd) {
+    // If we are not given an |fd|, the |starting_fd| must be non-negative.
+    if (starting_fd < 0) {
+        errno = EINVAL;
+        return -1;
+    }
+
+    // A negative fd implies that any free fd value can be used
+    //TODO: bitmap, ffs, etc
+    for (int fd = starting_fd; fd < FDIO_MAX_FD; fd++) {
+        if (fdio_fdtab[fd] == NULL) {
+            return fd;
+        }
+    }
+    errno = EMFILE;
+    return -1;
+}
+
+// Assign |io| to |fd|. Must bec alled while |fdio_lock| is held.
+static inline void fdio_allocate_fd(int fd, fdio_t *io) {
+    LOG(1, "fdio: allocate_fd() OK fd=%d\n", fd);
+    io->dupcount++;
+    fdio_fdtab[fd] = io;
+}
 
 // Enable intrusive allocation debugging
 //
diff --git a/system/ulib/fdio/unistd.c b/system/ulib/fdio/unistd.c
index 6fe6132..46f21b2 100644
--- a/system/ulib/fdio/unistd.c
+++ b/system/ulib/fdio/unistd.c
@@ -65,21 +65,10 @@
     mtx_lock(&fdio_lock);
     LOG(1, "fdio: bind_to_fd(%p, %d, %d)\n", io, fd, starting_fd);
     if (fd < 0) {
-        // If we are not given an |fd|, the |starting_fd| must be non-negative.
-        if (starting_fd < 0) {
-            errno = EINVAL;
-            mtx_unlock(&fdio_lock);
-            return -1;
+        fd = fdio_find_free_fd(starting_fd);
+        if (fd >= starting_fd) {
+            goto free_fd_found;
         }
-
-        // A negative fd implies that any free fd value can be used
-        //TODO: bitmap, ffs, etc
-        for (fd = starting_fd; fd < FDIO_MAX_FD; fd++) {
-            if (fdio_fdtab[fd] == NULL) {
-                goto free_fd_found;
-            }
-        }
-        errno = EMFILE;
         mtx_unlock(&fdio_lock);
         return -1;
     } else if (fd >= FDIO_MAX_FD) {
@@ -101,9 +90,7 @@
     }
 
 free_fd_found:
-    LOG(1, "fdio: bind_to_fd() OK fd=%d\n", fd);
-    io->dupcount++;
-    fdio_fdtab[fd] = io;
+    fdio_allocate_fd(fd, io);
     mtx_unlock(&fdio_lock);
 
     if (io_to_close) {