* Release 3.77.92.
* Complete implementation of new jobserver algorithm.
* A few minor fixups.
diff --git a/ChangeLog b/ChangeLog
index 59d403d..14f2c6d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,43 @@
+1999-08-01  Paul D. Smith  <psmith@gnu.org>
+
+	New jobserver algorithm to avoid a possible hole where we could
+	miss SIGCHLDs and get into a deadlock.  The original algorithm was
+	suggested by Roland McGrath with a nice refinement by Paul Eggert.
+	Many thanks as well to Tim Magill and Howard Chu, who also
+	provided many viable ideas and critiques.  We all had a fun week
+	dreaming up interesting ways to use and abuse UNIX syscalls :).
+
+	Previously we could miss a SIGCHLD if it happened after we reaped
+	the children but before we re-entered the blocking read.  If this
+	happened to all makes and/or all children, make would never wake
+	up.
+
+	We avoid this by having the SIGCHLD handler reset the blocking bit
+	on the jobserver pipe read FD (normally read does block in this
+	algorithm).  Now if the handler is called between the time we reap
+	and the time we read(), and there are no tokens available, the
+	read will merely return with EAGAIN instead of blocking.
+
+	* main.c (main): Set the blocking bit explicitly here.
+	* job.c (child_handler): If we have a jobserver pipe, set the
+	non-blocking bit for it.
+	(start_waiting_job): Move the token stuff back to new_job; if we
+	do it here then we're not controlling the number of remote jobs
+	started!
+	(new_job): Move the check for job slots to _after_ we've created a
+	child structure.  If the read returns without getting a token, set
+	the blocking bit then try to reap_children.
+
+	* make.h (EINTR_SET): Define to test errno if EINTR is available,
+	or 0 otherwise.  Just some code cleanup.
+	* arscan.c (ar_member_touch): Use it.
+	* function.c (func_shell): Use it.
+	* job.c (reap_children): Use it.
+	* remake.c (touch_file): Use it.
+
 1999-07-28  Paul D. Smith  <psmith@gnu.org>
 
-	* make.h: Define _() and N_() macros as a passthrough to initiate
+	* make.h: Define _() and N_() macros as passthrough to initiate
 	NLS support.
 	* <all>: Add _()/N_() around translatable strings.
 
diff --git a/arscan.c b/arscan.c
index cc9b9fd..25e739a 100644
--- a/arscan.c
+++ b/arscan.c
@@ -775,11 +775,8 @@
   if (AR_HDR_SIZE != write (fd, (char *) &ar_hdr, AR_HDR_SIZE))
     goto lose;
   /* The file's mtime is the time we we want.  */
-#ifdef EINTR
-  while (fstat (fd, &statbuf) < 0 && errno == EINTR);
-#else
-  fstat (fd, &statbuf);
-#endif
+  while (fstat (fd, &statbuf) < 0 && EINTR_SET)
+    ;
 #if defined(ARFMAG) || defined(ARFZMAG) || defined(AIAMAG) || defined(WINDOWS32)
   /* Advance member's time to that time */
   for (i = 0; i < sizeof ar_hdr.ar_date; i++)
diff --git a/configure.in b/configure.in
index 096df54..a864e17 100644
--- a/configure.in
+++ b/configure.in
@@ -3,7 +3,7 @@
 AC_PREREQ(2.13)dnl		dnl Minimum Autoconf version required.
 AC_INIT(vpath.c)dnl		dnl A distinctive file to look for in srcdir.
 
-AM_INIT_AUTOMAKE(make, 3.77.91)
+AM_INIT_AUTOMAKE(make, 3.77.92)
 AM_CONFIG_HEADER(config.h)
 
 dnl Regular configure stuff
diff --git a/function.c b/function.c
index 0771274..4047339 100644
--- a/function.c
+++ b/function.c
@@ -1381,11 +1381,7 @@
 	  if (cc > 0)
 	    i += cc;
 	}
-#ifdef EINTR
-      while (cc > 0 || errno == EINTR);
-#else
-      while (cc > 0);
-#endif
+      while (cc > 0 || EINTR_SET);
 
       /* Close the read side of the pipe.  */
 #ifdef  __MSDOS__
diff --git a/job.c b/job.c
index 17743a2..575a030 100644
--- a/job.c
+++ b/job.c
@@ -145,6 +145,17 @@
 
 #endif	/* Don't have `union wait'.  */
 
+/* How to set close-on-exec for a file descriptor.  */
+
+#if !defined F_SETFD
+# define CLOSE_ON_EXEC(_d)
+#else
+# ifndef FD_CLOEXEC
+#  define FD_CLOEXEC 1
+# endif
+# define CLOSE_ON_EXEC(_d) (void) fcntl ((_d), F_SETFD, FD_CLOEXEC)
+#endif
+
 #ifdef VMS
 static int vms_jobsefnmask = 0;
 #endif /* !VMS */
@@ -218,9 +229,6 @@
 #endif /* WINDOWS32 */
 
 
-#ifndef MAKE_JOBSERVER
-# define free_job_token(c)
-#else
 static void
 free_job_token (child)
      struct child *child;
@@ -248,7 +256,6 @@
 
   child->job_token = '-';
 }
-#endif
 
 
 /* Write an error message describing the exit status given in
@@ -302,14 +309,13 @@
 /* Handle a dead child.  This handler may or may not ever be installed.
 
    If we're using the jobserver blocking read, we need it.  First, installing
-   it ensures the read will interrupt on SIGCHLD.  Second, we close the dup'd
-   read side of the pipe to ensure we don't enter another blocking read
-   without reaping all the dead children.  In this case we don't need the
-   dead_children count.
+   it ensures the read will interrupt on SIGCHLD.  Second, we reset the
+   blocking bit on the read side of the pipe to ensure we don't enter another
+   blocking read without reaping all the dead children.  In this case we
+   don't need the dead_children count.
 
    If we don't have either waitpid or wait3, then make is unreliable, but we
-   use the dead_children count to reap children as best we can.  In this case
-   job_rfd will never be >= 0.  */
+   use the dead_children count to reap children as best we can.  */
 
 static unsigned int dead_children = 0;
 
@@ -319,9 +325,15 @@
 {
   ++dead_children;
 
-  if (job_rfd >= 0)
-    close (job_rfd);
-  job_rfd = -1;
+#ifdef HAVE_JOBSERVER
+  if (job_fds[0] >= 0)
+    {
+      int fl = fcntl(job_fds[0], F_GETFL, 0);
+
+      if (fl >= 0)
+        fcntl(job_fds[0], F_SETFL, fl | O_NONBLOCK);
+    }
+#endif
 
   if (debug_flag)
     printf (_("Got a SIGCHLD; %u unreaped children.\n"), dead_children);
@@ -330,11 +342,12 @@
 
 extern int shell_function_pid, shell_function_completed;
 
-/* Reap dead children, storing the returned status and the new command
+/* Reap all dead children, storing the returned status and the new command
    state (`cs_finished') in the `file' member of the `struct child' for the
-   dead child, and removing the child from the chain.  If BLOCK nonzero,
-   reap at least one child, waiting for it to die if necessary.  If ERR is
-   nonzero, print an error message first.  */
+   dead child, and removing the child from the chain.  In addition, if BLOCK
+   nonzero, we block in this function until we've reaped at least one
+   complete child, waiting for it to die if necessary.  If ERR is nonzero,
+   print an error message first.  */
 
 void
 reap_children (block, err)
@@ -415,19 +428,18 @@
 	{
           /* A remote status command failed miserably.  Punt.  */
 	remote_status_lose:
-#ifdef	EINTR
-	  if (errno == EINTR)
+	  if (EINTR_SET)
 	    continue;
-#endif
+
 	  pfatal_with_name ("remote_status");
 	}
       else
 	{
 	  /* No remote children.  Check for local children.  */
-
 #if !defined(__MSDOS__) && !defined(_AMIGA) && !defined(WINDOWS32)
 	  if (any_local)
 	    {
+            local_wait:
 #ifdef VMS
 	      vmsWaitForChildren (&status);
 	      pid = c->pid;
@@ -446,15 +458,14 @@
 	  if (pid < 0)
 	    {
               /* The wait*() failed miserably.  Punt.  */
-#ifdef EINTR
-	      if (errno == EINTR)
-		continue;
-#endif
+	      if (EINTR_SET)
+		goto local_wait;
+
 	      pfatal_with_name ("wait");
 	    }
 	  else if (pid > 0)
 	    {
-	      /* We got one; chop the status word up.  */
+	      /* We got a child exit; chop the status word up.  */
 	      exit_code = WEXITSTATUS (status);
 	      exit_sig = WIFSIGNALED (status) ? WTERMSIG (status) : 0;
 	      coredump = WCOREDUMP (status);
@@ -1171,20 +1182,16 @@
 
   c->remote = start_remote_job_p (1);
 
-  /* If not, start it locally.  */
-  if (!c->remote)
+  /* If we are running at least one job already and the load average
+     is too high, make this one wait.  */
+  if (!c->remote && job_slots_used > 0 && load_too_high ())
     {
-      /* If we are running at least one job already and the load average
-	 is too high, make this one wait.  */
-      if (job_slots_used > 0 && load_too_high ())
-	{
-	  /* Put this child on the chain of children waiting
-	     for the load average to go down.  */
-	  set_command_state (f, cs_running);
-	  c->next = waiting_jobs;
-	  waiting_jobs = c;
-	  return 0;
-	}
+      /* Put this child on the chain of children waiting for the load average
+         to go down.  */
+      set_command_state (f, cs_running);
+      c->next = waiting_jobs;
+      waiting_jobs = c;
+      return 0;
     }
 
   /* Start the first command; reap_children will run later command lines.  */
@@ -1379,19 +1386,25 @@
           }
         /* Read a token.  As long as there's no token available we'll block.
            If we get a SIGCHLD we'll return with EINTR.  If one happened
-           before we got here we'll return with EBADF.  */
-        else if (read (job_rfd, &c->job_token, 1) < 1)
+           before we got here we'll return immediately with EAGAIN because
+           the signal handler unsets the blocking bit.  */
+        else if (read (job_fds[0], &c->job_token, 1) < 1)
           {
-            if (errno == EINTR)
-              ;
+            int fl;
 
-            /* If EBADF, we got a SIGCHLD before.  Otherwise, who knows?  */
-            else if (errno != EBADF)
+#if !defined(EAGAIN)
+# define EAGAIN EWOULDBLOCK
+#endif
+            if (errno != EINTR && errno != EAGAIN)
               pfatal_with_name (_("read jobs pipe"));
 
-            /* Something's done; reap it.  We don't force a block here; if
-               something strange happened and nothing's ready, just come back
-               and wait some more.  */
+            /* Set the blocking bit on the read FD again, just in case.  */
+            fl = fcntl(job_fds[0], F_GETFL, 0);
+            if (fl >= 0)
+              fcntl(job_fds[0], F_SETFL, fl & ~O_NONBLOCK);
+
+            /* Something's done.  We don't want to block for a whole child,
+               just reap whatever's there.  */
             reap_children (0, 0);
           }
 
diff --git a/main.c b/main.c
index d40ae3f..c51fe7c 100644
--- a/main.c
+++ b/main.c
@@ -205,7 +205,6 @@
 /* File descriptors for the jobs pipe.  */
 
 int job_fds[2] = { -1, -1 };
-int job_rfd = -1;
 
 /* Maximum load average at which multiple jobs will be run.
    Negative values mean unlimited, while zero means limit to
@@ -1353,15 +1352,13 @@
       job_slots_str = xstrdup(buf);
     }
 
-    /* If we have a jobserver pipe, dup(2) the read end.  We'll use that in
-       the child handler to note a child has died.  See job.c.  */
-
+  /* Be sure the blocking bit on the read FD is set to start with.  */
   if (job_fds[0] >= 0)
     {
-      job_rfds = dup (job_fds[0]);
-      CLOSE_ON_EXEC (job_rfds);
+      int fl = fcntl(job_fds[0], F_GETFL, 0);
+      if (fl >= 0)
+        fcntl(job_fds[0], F_SETFL, fl & ~O_NONBLOCK);
     }
-
 #endif
 
   /* Set up MAKEFLAGS and MFLAGS again, so they will be right.  */
diff --git a/make.h b/make.h
index 0496ce7..26eaec9 100644
--- a/make.h
+++ b/make.h
@@ -77,6 +77,14 @@
 extern int errno;
 #endif
 
+/* A shortcut for EINTR checking.  Note you should never negate this!  That
+   very likely doesn't mean what you want if EINTR is not available.  */
+#ifdef EINTR
+# define EINTR_SET (errno == EINTR)
+#else
+# define EINTR_SET (0)
+#endif
+
 #ifndef isblank
 # define isblank(c)     ((c) == ' ' || (c) == '\t')
 #endif
diff --git a/remake.c b/remake.c
index 6464f34..74c4a9a 100644
--- a/remake.c
+++ b/remake.c
@@ -918,13 +918,10 @@
 	  char buf;
 	  int status;
 
-#ifdef EINTR
 	  do
-#endif
 	    status = fstat (fd, &statbuf);
-#ifdef EINTR
-	  while (status < 0 && errno == EINTR);
-#endif
+	  while (status < 0 && EINTR_SET);
+
 	  if (status < 0)
 	    TOUCH_ERROR ("touch: fstat: ");
 	  /* Rewrite character 0 same as it already is.  */