Change build log to always log the most recent input mtime

If an edge's output files' mtimes are compared to the most recent
input's mtime, edges might be calculated as clean even if they are
actually dirty. While an edge's command is running its rule to produce
its outputs and an input to the edge is updated before the outputs are
written to disk, then subsequent runs will think that the outputs are
newer than the inputs, even though the inputs have actually been updated
and may be different than what were used to produce those outputs.

Ninja will now restat all inputs just prior to running an edge's command
and remember the most recent input mtime. When the command completes,
it will stat any discovered dependencies from dep files (if necessary),
recalculate the most recent input mtime, and log it to the build log
file. On subsequent runs, ninja will use this value to compare to the
edge's most recent input's mtime to determine whether the outputs are
dirty.

This extends the methodology used by restat rules to work in all cases.
Restat rules are still unique in that they will clean the edge's output
nodes recursively if the edge's command did not change the output, but
in all cases, the mtime recorded in the log file is now the most recent
input mtime. See the new tests for more clarification.
diff --git a/src/build.cc b/src/build.cc
index fb5890a..2e29232 100644
--- a/src/build.cc
+++ b/src/build.cc
@@ -270,19 +270,11 @@
 #define MEM_FN mem_fn  // mem_fun was removed in C++17.
 #endif
     if (find_if(begin, end, MEM_FN(&Node::dirty)) == end) {
-      // Recompute most_recent_input.
-      Node* most_recent_input = NULL;
-      for (vector<Node*>::iterator i = begin; i != end; ++i) {
-        if (!most_recent_input || (*i)->mtime() > most_recent_input->mtime())
-          most_recent_input = *i;
-      }
-
       // Now, this edge is dirty if any of the outputs are dirty.
       // If the edge isn't dirty, clean the outputs and mark the edge as not
       // wanted.
       bool outputs_dirty = false;
-      if (!scan->RecomputeOutputsDirty(*oe, most_recent_input,
-                                       &outputs_dirty, err)) {
+      if (!scan->RecomputeOutputsDirty(*oe, &outputs_dirty, err)) {
         return false;
       }
       if (!outputs_dirty) {
@@ -696,6 +688,20 @@
       return false;
   }
 
+  // Find the most recent mtime of any (existing) non-order-only input
+  Node* most_recent_input = NULL;
+  for (vector<Node*>::iterator i = edge->inputs_.begin();
+       i != edge->inputs_.end() - edge->order_only_deps_; ++i) {
+    if (!(*i)->Stat(disk_interface_, err))
+      return false;
+    if (!most_recent_input || (*i)->mtime() > most_recent_input->mtime())
+      most_recent_input = *i;
+  }
+
+  edge->most_recent_input_ = most_recent_input;
+  if (most_recent_input)
+    edge->most_recent_input_mtime_ = most_recent_input->mtime();
+
   // start command computing and run it
   if (!command_runner_->StartCommand(edge)) {
     err->assign("command '" + edge->EvaluateCommand() + "' failed.");
@@ -744,20 +750,18 @@
     return plan_.EdgeFinished(edge, Plan::kEdgeFailed, err);
   }
 
-  // Restat the edge outputs
-  TimeStamp output_mtime = 0;
-  bool restat = edge->GetBindingBool("restat");
+  TimeStamp most_recent_input_mtime = 0;
   if (!config_.dry_run) {
+    // Restat the edge outputs
     bool node_cleaned = false;
-
     for (vector<Node*>::iterator o = edge->outputs_.begin();
          o != edge->outputs_.end(); ++o) {
-      TimeStamp new_mtime = disk_interface_->Stat((*o)->path(), err);
+      TimeStamp old_mtime = (*o)->mtime();
+      (*o)->Stat(disk_interface_, err);
+      TimeStamp new_mtime = (*o)->mtime();
       if (new_mtime == -1)
         return false;
-      if (new_mtime > output_mtime)
-        output_mtime = new_mtime;
-      if ((*o)->mtime() == new_mtime && restat) {
+      if (old_mtime == new_mtime && edge->GetBindingBool("restat")) {
         // The rule command did not change the output.  Propagate the clean
         // state through the build graph.
         // Note that this also applies to nonexistent outputs (mtime == 0).
@@ -767,33 +771,34 @@
       }
     }
 
-    if (node_cleaned) {
-      TimeStamp restat_mtime = 0;
-      // If any output was cleaned, find the most recent mtime of any
-      // (existing) non-order-only input or the depfile.
-      for (vector<Node*>::iterator i = edge->inputs_.begin();
-           i != edge->inputs_.end() - edge->order_only_deps_; ++i) {
-        TimeStamp input_mtime = disk_interface_->Stat((*i)->path(), err);
-        if (input_mtime == -1)
-          return false;
-        if (input_mtime > restat_mtime)
-          restat_mtime = input_mtime;
-      }
+    // Use the time from the most recent input that was computed when the edge was
+    // started, not the mtime of the node as it is now. There could have been other edges
+    // that restat'd the input node and detected a change, but for *this* edge, we want
+    // the mtime as it was when the command began.
+    most_recent_input_mtime = edge->most_recent_input_mtime_;
 
+    // If there were any added deps, compute the most recent input mtime
+    for (vector<Node*>::iterator i = deps_nodes.begin();
+         i != deps_nodes.end(); ++i) {
+      (*i)->StatIfNecessary(disk_interface_, err);
+      if ((*i)->mtime() > most_recent_input_mtime)
+        most_recent_input_mtime = (*i)->mtime();
+    }
+
+    if (node_cleaned) {
+      // If any output was cleaned, take into account the mtime of the depfile
       string depfile = edge->GetUnescapedDepfile();
-      if (restat_mtime != 0 && deps_type.empty() && !depfile.empty()) {
+      if (most_recent_input_mtime != 0 && deps_type.empty() && !depfile.empty()) {
         TimeStamp depfile_mtime = disk_interface_->Stat(depfile, err);
         if (depfile_mtime == -1)
           return false;
-        if (depfile_mtime > restat_mtime)
-          restat_mtime = depfile_mtime;
+        if (depfile_mtime > most_recent_input_mtime)
+          most_recent_input_mtime = depfile_mtime;
       }
 
       // The total number of edges in the plan may have changed as a result
       // of a restat.
       status_->PlanHasTotalEdges(plan_.command_edge_count());
-
-      output_mtime = restat_mtime;
     }
   }
 
@@ -807,7 +812,7 @@
 
   if (scan_.build_log()) {
     if (!scan_.build_log()->RecordCommand(edge, start_time_millis,
-                                          end_time_millis, output_mtime)) {
+                                          end_time_millis, most_recent_input_mtime)) {
       *err = string("Error writing to build log: ") + strerror(errno);
       return false;
     }
diff --git a/src/build_log.cc b/src/build_log.cc
index 4dcd6ce..b35279d 100644
--- a/src/build_log.cc
+++ b/src/build_log.cc
@@ -116,9 +116,9 @@
   : output(output) {}
 
 BuildLog::LogEntry::LogEntry(const string& output, uint64_t command_hash,
-  int start_time, int end_time, TimeStamp restat_mtime)
+  int start_time, int end_time, TimeStamp mtime)
   : output(output), command_hash(command_hash),
-    start_time(start_time), end_time(end_time), mtime(restat_mtime)
+    start_time(start_time), end_time(end_time), mtime(mtime)
 {}
 
 BuildLog::BuildLog()
@@ -303,7 +303,7 @@
     *end = 0;
 
     int start_time = 0, end_time = 0;
-    TimeStamp restat_mtime = 0;
+    TimeStamp mtime = 0;
 
     start_time = atoi(start);
     start = end + 1;
@@ -319,7 +319,7 @@
     if (!end)
       continue;
     *end = 0;
-    restat_mtime = strtoll(start, NULL, 10);
+    mtime = strtoll(start, NULL, 10);
     start = end + 1;
 
     end = (char*)memchr(start, kFieldSeparator, line_end - start);
@@ -343,7 +343,7 @@
 
     entry->start_time = start_time;
     entry->end_time = end_time;
-    entry->mtime = restat_mtime;
+    entry->mtime = mtime;
     if (log_version >= 5) {
       char c = *end; *end = '\0';
       entry->command_hash = (uint64_t)strtoull(start, NULL, 16);
diff --git a/src/build_log.h b/src/build_log.h
index 88551e3..dd72c4c 100644
--- a/src/build_log.h
+++ b/src/build_log.h
@@ -73,7 +73,7 @@
 
     explicit LogEntry(const std::string& output);
     LogEntry(const std::string& output, uint64_t command_hash,
-             int start_time, int end_time, TimeStamp restat_mtime);
+             int start_time, int end_time, TimeStamp mtime);
   };
 
   /// Lookup a previously-run command by its output path.
diff --git a/src/build_test.cc b/src/build_test.cc
index f58a7de..474dd19 100644
--- a/src/build_test.cc
+++ b/src/build_test.cc
@@ -609,6 +609,15 @@
     if (fs_->ReadFile(edge->inputs_[0]->path(), &content, &err) ==
         DiskInterface::Okay)
       fs_->WriteFile(edge->outputs_[0]->path(), content);
+  } else if (edge->rule().name() == "long-cc" ||
+             edge->rule().name() == "long-cc2") {
+    for (vector<Node*>::iterator out = edge->outputs_.begin();
+        out != edge->outputs_.end(); ++out) {
+      fs_->Tick();
+      fs_->Tick();
+      fs_->Tick();
+      fs_->Create((*out)->path(), "");
+    }
   } else {
     printf("unknown command\n");
     return false;
@@ -664,6 +673,15 @@
   else
     result->status = ExitSuccess;
 
+  // These rules simulate an external process modifying files while the build command
+  // runs. See TestInputMtimeRaceCondition and TestInputMtimeRaceConditionWithDepFile.
+  // Note: only the "first" time the rule is run per test is the file modified, so the
+  // test can verify that subsequent runs without the race have no work to do.
+  if (edge->rule().name() == "long-cc" && fs_->now_ == 4)
+    fs_->files_["in1"].mtime = 3;
+  if (edge->rule().name() == "long-cc2" && fs_->now_ == 4)
+    fs_->files_["header.h"].mtime = 3;
+
   // Provide a way for test cases to verify when an edge finishes that
   // some other edge is still active.  This is useful for test cases
   // covering behavior involving multiple active edges.
@@ -2266,6 +2284,148 @@
   builder.command_runner_.release();
 }
 
+TEST_F(BuildWithDepsLogTest, TestInputMtimeRaceCondition) {
+  string err;
+  const char* manifest =
+      "rule long-cc\n"
+      "  command = long-cc\n"
+      "build out: long-cc in1\n";
+
+  State state;
+  ASSERT_NO_FATAL_FAILURE(AddCatRule(&state));
+  ASSERT_NO_FATAL_FAILURE(AssertParse(&state, manifest));
+
+  BuildLog build_log;
+  ASSERT_TRUE(build_log.Load("build_log", &err));
+  ASSERT_TRUE(build_log.OpenForWrite("build_log", *this, &err));
+
+  DepsLog deps_log;
+  ASSERT_TRUE(deps_log.Load("ninja_deps", &state, &err));
+  ASSERT_TRUE(deps_log.OpenForWrite("ninja_deps", &err));
+
+  BuildLog::LogEntry* log_entry = NULL;
+  {
+    Builder builder(&state, config_, &build_log, &deps_log, &fs_, &status_, 0);
+    builder.command_runner_.reset(&command_runner_);
+    command_runner_.commands_ran_.clear();
+
+    // Run the build, out gets built, dep file is created
+    EXPECT_TRUE(builder.AddTarget("out", &err));
+    ASSERT_EQ("", err);
+    EXPECT_TRUE(builder.Build(&err));
+    ASSERT_EQ(1u, command_runner_.commands_ran_.size());
+
+    // See that an entry in the logfile is created. the input_mtime is 1 since that was
+    // the mtime of in1 when the command was started
+    log_entry = build_log.LookupByOutput("out");
+    ASSERT_TRUE(NULL != log_entry);
+    ASSERT_EQ(1u, log_entry->mtime);
+
+    builder.command_runner_.release();
+  }
+
+  {
+    Builder builder(&state, config_, &build_log, &deps_log, &fs_, &status_, 0);
+    builder.command_runner_.reset(&command_runner_);
+    command_runner_.commands_ran_.clear();
+
+    // Trigger the build again - "out" should rebuild despite having a newer mtime than
+    // "in1", since "in1" was touched during the build of out (simulated by changing its
+    // mtime in the the test builder's WaitForCommand() which runs before FinishCommand()
+    command_runner_.commands_ran_.clear();
+    state.Reset();
+    EXPECT_TRUE(builder.AddTarget("out", &err));
+    ASSERT_EQ("", err);
+    EXPECT_TRUE(builder.Build(&err));
+    ASSERT_EQ(1u, command_runner_.commands_ran_.size());
+
+    // Check that the logfile entry is still correct
+    log_entry = build_log.LookupByOutput("out");
+    ASSERT_TRUE(NULL != log_entry);
+    ASSERT_EQ(fs_.files_["in1"].mtime, log_entry->mtime);
+    builder.command_runner_.release();
+  }
+
+  {
+    Builder builder(&state, config_, &build_log, &deps_log, &fs_, &status_, 0);
+    builder.command_runner_.reset(&command_runner_);
+    command_runner_.commands_ran_.clear();
+
+    // And a subsequent run should not have any work to do
+    command_runner_.commands_ran_.clear();
+    state.Reset();
+    EXPECT_TRUE(builder.AddTarget("out", &err));
+    ASSERT_EQ("", err);
+    EXPECT_TRUE(builder.AlreadyUpToDate());
+
+    builder.command_runner_.release();
+  }
+}
+
+TEST_F(BuildWithDepsLogTest, TestInputMtimeRaceConditionWithDepFile) {
+  string err;
+  const char* manifest =
+      "rule long-cc2\n"
+      "  command = long-cc2\n"
+      "build out: long-cc2 in1\n"
+      "  deps = gcc\n"
+      "  depfile = in1.d\n";
+
+  fs_.Create("header.h", "");
+  fs_.Create("in1.d", "out: header.h");
+
+  State state;
+  ASSERT_NO_FATAL_FAILURE(AddCatRule(&state));
+  ASSERT_NO_FATAL_FAILURE(AssertParse(&state, manifest));
+
+  BuildLog build_log;
+  ASSERT_TRUE(build_log.Load("build_log", &err));
+  ASSERT_TRUE(build_log.OpenForWrite("build_log", *this, &err));
+
+  DepsLog deps_log;
+  ASSERT_TRUE(deps_log.Load("ninja_deps", &state, &err));
+  ASSERT_TRUE(deps_log.OpenForWrite("ninja_deps", &err));
+
+  {
+    Builder builder(&state, config_, &build_log, &deps_log, &fs_, &status_, 0);
+    builder.command_runner_.reset(&command_runner_);
+    command_runner_.commands_ran_.clear();
+
+    // Run the build, out gets built, dep file is created
+    EXPECT_TRUE(builder.AddTarget("out", &err));
+    ASSERT_EQ("", err);
+    EXPECT_TRUE(builder.Build(&err));
+    ASSERT_EQ(1u, command_runner_.commands_ran_.size());
+
+    // See that an entry in the logfile is created. the mtime is 3 due to header.h being
+    // updated during the build of out (simulated by changing its mtime in the the test
+    // builder's WaitForCommand() which runs before FinishCommand()
+    BuildLog::LogEntry* log_entry = build_log.LookupByOutput("out");
+    ASSERT_TRUE(NULL != log_entry);
+    ASSERT_EQ(3u, log_entry->mtime);
+
+    builder.command_runner_.release();
+  }
+
+  {
+    // Trigger the build again - "out" won't rebuild since its newest mtime (header.h)
+    // wasn't known when out was originally built and was only discovered via the deps file
+    // when the command completed. Subsequent runs will see out's recorded mtime equal to
+    // the actual most recent mtime.
+    Builder builder(&state, config_, &build_log, &deps_log, &fs_, &status_, 0);
+    builder.command_runner_.reset(&command_runner_);
+    command_runner_.commands_ran_.clear();
+
+    command_runner_.commands_ran_.clear();
+    state.Reset();
+    EXPECT_TRUE(builder.AddTarget("out", &err));
+    ASSERT_EQ("", err);
+    EXPECT_TRUE(builder.AlreadyUpToDate());
+
+    builder.command_runner_.release();
+  }
+}
+
 /// Check that a restat rule generating a header cancels compilations correctly.
 TEST_F(BuildTest, RestatDepfileDependency) {
   ASSERT_NO_FATAL_FAILURE(AssertParse(&state_,
diff --git a/src/graph.cc b/src/graph.cc
index 90e8d08..9e8eda6 100644
--- a/src/graph.cc
+++ b/src/graph.cc
@@ -143,10 +143,12 @@
     }
   }
 
+  edge->most_recent_input_ = most_recent_input;
+
   // We may also be dirty due to output state: missing outputs, out of
   // date outputs, etc.  Visit all outputs and determine whether they're dirty.
   if (!dirty)
-    if (!RecomputeOutputsDirty(edge, most_recent_input, &dirty, err))
+    if (!RecomputeOutputsDirty(edge, &dirty, err))
       return false;
 
   // Finally, visit each output and update their dirty state if necessary.
@@ -212,12 +214,11 @@
   return false;
 }
 
-bool DependencyScan::RecomputeOutputsDirty(Edge* edge, Node* most_recent_input,
-                                           bool* outputs_dirty, string* err) {
+bool DependencyScan::RecomputeOutputsDirty(Edge* edge, bool* outputs_dirty, string* err) {
   string command = edge->EvaluateCommand(/*incl_rsp_file=*/true);
   for (vector<Node*>::iterator o = edge->outputs_.begin();
        o != edge->outputs_.end(); ++o) {
-    if (RecomputeOutputDirty(edge, most_recent_input, command, *o)) {
+    if (RecomputeOutputDirty(edge, command, *o)) {
       *outputs_dirty = true;
       return true;
     }
@@ -226,7 +227,6 @@
 }
 
 bool DependencyScan::RecomputeOutputDirty(const Edge* edge,
-                                          const Node* most_recent_input,
                                           const string& command,
                                           Node* output) {
   if (edge->is_phony()) {
@@ -248,30 +248,7 @@
     return true;
   }
 
-  // Dirty if the output is older than the input.
-  if (most_recent_input && output->mtime() < most_recent_input->mtime()) {
-    TimeStamp output_mtime = output->mtime();
-
-    // If this is a restat rule, we may have cleaned the output with a restat
-    // rule in a previous run and stored the most recent input mtime in the
-    // build log.  Use that mtime instead, so that the file will only be
-    // considered dirty if an input was modified since the previous run.
-    bool used_restat = false;
-    if (edge->GetBindingBool("restat") && build_log() &&
-        (entry = build_log()->LookupByOutput(output->path()))) {
-      output_mtime = entry->mtime;
-      used_restat = true;
-    }
-
-    if (output_mtime < most_recent_input->mtime()) {
-      EXPLAIN("%soutput %s older than most recent input %s "
-              "(%" PRId64 " vs %" PRId64 ")",
-              used_restat ? "restat of " : "", output->path().c_str(),
-              most_recent_input->path().c_str(),
-              output_mtime, most_recent_input->mtime());
-      return true;
-    }
-  }
+  const Node* most_recent_input = edge->most_recent_input_;
 
   if (build_log()) {
     bool generator = edge->GetBindingBool("generator");
@@ -299,6 +276,14 @@
       EXPLAIN("command line not found in log for %s", output->path().c_str());
       return true;
     }
+  } else if (most_recent_input &&
+             output->mtime() < most_recent_input->mtime()) {
+      EXPLAIN(
+          "output %s older than most recent input %s "
+          "(%" PRId64 " vs %" PRId64 ")",
+          output->path().c_str(), most_recent_input->path().c_str(),
+          output->mtime(), most_recent_input->mtime());
+    return true;
   }
 
   return false;
diff --git a/src/graph.h b/src/graph.h
index 6756378..b7187ad 100644
--- a/src/graph.h
+++ b/src/graph.h
@@ -146,6 +146,7 @@
 
   Edge()
       : rule_(NULL), pool_(NULL), dyndep_(NULL), env_(NULL), mark_(VisitNone),
+        most_recent_input_(NULL), most_recent_input_mtime_(0),
         id_(0), outputs_ready_(false), deps_loaded_(false),
         deps_missing_(false), implicit_deps_(0), order_only_deps_(0),
         implicit_outs_(0) {}
@@ -178,6 +179,8 @@
   Node* dyndep_;
   BindingEnv* env_;
   VisitMark mark_;
+  Node* most_recent_input_;
+  TimeStamp most_recent_input_mtime_;
   size_t id_;
   bool outputs_ready_;
   bool deps_loaded_;
@@ -298,8 +301,7 @@
 
   /// Recompute whether any output of the edge is dirty, if so sets |*dirty|.
   /// Returns false on failure.
-  bool RecomputeOutputsDirty(Edge* edge, Node* most_recent_input,
-                             bool* dirty, std::string* err);
+  bool RecomputeOutputsDirty(Edge* edge, bool* dirty, std::string* err);
 
   BuildLog* build_log() const {
     return build_log_;
@@ -325,8 +327,8 @@
 
   /// Recompute whether a given single output should be marked dirty.
   /// Returns true if so.
-  bool RecomputeOutputDirty(const Edge* edge, const Node* most_recent_input,
-                            const std::string& command, Node* output);
+  bool RecomputeOutputDirty(const Edge* edge, const std::string& command,
+                            Node* output);
 
   BuildLog* build_log_;
   DiskInterface* disk_interface_;