Merge pull request #1753 from jdrouhard/log_input_mtime

Change build log to always log the most recent input mtime
diff --git a/src/build.cc b/src/build.cc
index fb5890a..2e29232 100644
--- a/src/build.cc
+++ b/src/build.cc
@@ -270,19 +270,11 @@
 #define MEM_FN mem_fn  // mem_fun was removed in C++17.
 #endif
     if (find_if(begin, end, MEM_FN(&Node::dirty)) == end) {
-      // Recompute most_recent_input.
-      Node* most_recent_input = NULL;
-      for (vector<Node*>::iterator i = begin; i != end; ++i) {
-        if (!most_recent_input || (*i)->mtime() > most_recent_input->mtime())
-          most_recent_input = *i;
-      }
-
       // Now, this edge is dirty if any of the outputs are dirty.
       // If the edge isn't dirty, clean the outputs and mark the edge as not
       // wanted.
       bool outputs_dirty = false;
-      if (!scan->RecomputeOutputsDirty(*oe, most_recent_input,
-                                       &outputs_dirty, err)) {
+      if (!scan->RecomputeOutputsDirty(*oe, &outputs_dirty, err)) {
         return false;
       }
       if (!outputs_dirty) {
@@ -696,6 +688,20 @@
       return false;
   }
 
+  // Find the most recent mtime of any (existing) non-order-only input
+  Node* most_recent_input = NULL;
+  for (vector<Node*>::iterator i = edge->inputs_.begin();
+       i != edge->inputs_.end() - edge->order_only_deps_; ++i) {
+    if (!(*i)->Stat(disk_interface_, err))
+      return false;
+    if (!most_recent_input || (*i)->mtime() > most_recent_input->mtime())
+      most_recent_input = *i;
+  }
+
+  edge->most_recent_input_ = most_recent_input;
+  if (most_recent_input)
+    edge->most_recent_input_mtime_ = most_recent_input->mtime();
+
   // start command computing and run it
   if (!command_runner_->StartCommand(edge)) {
     err->assign("command '" + edge->EvaluateCommand() + "' failed.");
@@ -744,20 +750,18 @@
     return plan_.EdgeFinished(edge, Plan::kEdgeFailed, err);
   }
 
-  // Restat the edge outputs
-  TimeStamp output_mtime = 0;
-  bool restat = edge->GetBindingBool("restat");
+  TimeStamp most_recent_input_mtime = 0;
   if (!config_.dry_run) {
+    // Restat the edge outputs
     bool node_cleaned = false;
-
     for (vector<Node*>::iterator o = edge->outputs_.begin();
          o != edge->outputs_.end(); ++o) {
-      TimeStamp new_mtime = disk_interface_->Stat((*o)->path(), err);
+      TimeStamp old_mtime = (*o)->mtime();
+      (*o)->Stat(disk_interface_, err);
+      TimeStamp new_mtime = (*o)->mtime();
       if (new_mtime == -1)
         return false;
-      if (new_mtime > output_mtime)
-        output_mtime = new_mtime;
-      if ((*o)->mtime() == new_mtime && restat) {
+      if (old_mtime == new_mtime && edge->GetBindingBool("restat")) {
         // The rule command did not change the output.  Propagate the clean
         // state through the build graph.
         // Note that this also applies to nonexistent outputs (mtime == 0).
@@ -767,33 +771,34 @@
       }
     }
 
-    if (node_cleaned) {
-      TimeStamp restat_mtime = 0;
-      // If any output was cleaned, find the most recent mtime of any
-      // (existing) non-order-only input or the depfile.
-      for (vector<Node*>::iterator i = edge->inputs_.begin();
-           i != edge->inputs_.end() - edge->order_only_deps_; ++i) {
-        TimeStamp input_mtime = disk_interface_->Stat((*i)->path(), err);
-        if (input_mtime == -1)
-          return false;
-        if (input_mtime > restat_mtime)
-          restat_mtime = input_mtime;
-      }
+    // Use the time from the most recent input that was computed when the edge was
+    // started, not the mtime of the node as it is now. There could have been other edges
+    // that restat'd the input node and detected a change, but for *this* edge, we want
+    // the mtime as it was when the command began.
+    most_recent_input_mtime = edge->most_recent_input_mtime_;
 
+    // If there were any added deps, compute the most recent input mtime
+    for (vector<Node*>::iterator i = deps_nodes.begin();
+         i != deps_nodes.end(); ++i) {
+      (*i)->StatIfNecessary(disk_interface_, err);
+      if ((*i)->mtime() > most_recent_input_mtime)
+        most_recent_input_mtime = (*i)->mtime();
+    }
+
+    if (node_cleaned) {
+      // If any output was cleaned, take into account the mtime of the depfile
       string depfile = edge->GetUnescapedDepfile();
-      if (restat_mtime != 0 && deps_type.empty() && !depfile.empty()) {
+      if (most_recent_input_mtime != 0 && deps_type.empty() && !depfile.empty()) {
         TimeStamp depfile_mtime = disk_interface_->Stat(depfile, err);
         if (depfile_mtime == -1)
           return false;
-        if (depfile_mtime > restat_mtime)
-          restat_mtime = depfile_mtime;
+        if (depfile_mtime > most_recent_input_mtime)
+          most_recent_input_mtime = depfile_mtime;
       }
 
       // The total number of edges in the plan may have changed as a result
       // of a restat.
       status_->PlanHasTotalEdges(plan_.command_edge_count());
-
-      output_mtime = restat_mtime;
     }
   }
 
@@ -807,7 +812,7 @@
 
   if (scan_.build_log()) {
     if (!scan_.build_log()->RecordCommand(edge, start_time_millis,
-                                          end_time_millis, output_mtime)) {
+                                          end_time_millis, most_recent_input_mtime)) {
       *err = string("Error writing to build log: ") + strerror(errno);
       return false;
     }
diff --git a/src/build_log.cc b/src/build_log.cc
index 4dcd6ce..b35279d 100644
--- a/src/build_log.cc
+++ b/src/build_log.cc
@@ -116,9 +116,9 @@
   : output(output) {}
 
 BuildLog::LogEntry::LogEntry(const string& output, uint64_t command_hash,
-  int start_time, int end_time, TimeStamp restat_mtime)
+  int start_time, int end_time, TimeStamp mtime)
   : output(output), command_hash(command_hash),
-    start_time(start_time), end_time(end_time), mtime(restat_mtime)
+    start_time(start_time), end_time(end_time), mtime(mtime)
 {}
 
 BuildLog::BuildLog()
@@ -303,7 +303,7 @@
     *end = 0;
 
     int start_time = 0, end_time = 0;
-    TimeStamp restat_mtime = 0;
+    TimeStamp mtime = 0;
 
     start_time = atoi(start);
     start = end + 1;
@@ -319,7 +319,7 @@
     if (!end)
       continue;
     *end = 0;
-    restat_mtime = strtoll(start, NULL, 10);
+    mtime = strtoll(start, NULL, 10);
     start = end + 1;
 
     end = (char*)memchr(start, kFieldSeparator, line_end - start);
@@ -343,7 +343,7 @@
 
     entry->start_time = start_time;
     entry->end_time = end_time;
-    entry->mtime = restat_mtime;
+    entry->mtime = mtime;
     if (log_version >= 5) {
       char c = *end; *end = '\0';
       entry->command_hash = (uint64_t)strtoull(start, NULL, 16);
diff --git a/src/build_log.h b/src/build_log.h
index 88551e3..dd72c4c 100644
--- a/src/build_log.h
+++ b/src/build_log.h
@@ -73,7 +73,7 @@
 
     explicit LogEntry(const std::string& output);
     LogEntry(const std::string& output, uint64_t command_hash,
-             int start_time, int end_time, TimeStamp restat_mtime);
+             int start_time, int end_time, TimeStamp mtime);
   };
 
   /// Lookup a previously-run command by its output path.
diff --git a/src/build_test.cc b/src/build_test.cc
index f58a7de..474dd19 100644
--- a/src/build_test.cc
+++ b/src/build_test.cc
@@ -609,6 +609,15 @@
     if (fs_->ReadFile(edge->inputs_[0]->path(), &content, &err) ==
         DiskInterface::Okay)
       fs_->WriteFile(edge->outputs_[0]->path(), content);
+  } else if (edge->rule().name() == "long-cc" ||
+             edge->rule().name() == "long-cc2") {
+    for (vector<Node*>::iterator out = edge->outputs_.begin();
+        out != edge->outputs_.end(); ++out) {
+      fs_->Tick();
+      fs_->Tick();
+      fs_->Tick();
+      fs_->Create((*out)->path(), "");
+    }
   } else {
     printf("unknown command\n");
     return false;
@@ -664,6 +673,15 @@
   else
     result->status = ExitSuccess;
 
+  // These rules simulate an external process modifying files while the build command
+  // runs. See TestInputMtimeRaceCondition and TestInputMtimeRaceConditionWithDepFile.
+  // Note: only the "first" time the rule is run per test is the file modified, so the
+  // test can verify that subsequent runs without the race have no work to do.
+  if (edge->rule().name() == "long-cc" && fs_->now_ == 4)
+    fs_->files_["in1"].mtime = 3;
+  if (edge->rule().name() == "long-cc2" && fs_->now_ == 4)
+    fs_->files_["header.h"].mtime = 3;
+
   // Provide a way for test cases to verify when an edge finishes that
   // some other edge is still active.  This is useful for test cases
   // covering behavior involving multiple active edges.
@@ -2266,6 +2284,148 @@
   builder.command_runner_.release();
 }
 
+TEST_F(BuildWithDepsLogTest, TestInputMtimeRaceCondition) {
+  string err;
+  const char* manifest =
+      "rule long-cc\n"
+      "  command = long-cc\n"
+      "build out: long-cc in1\n";
+
+  State state;
+  ASSERT_NO_FATAL_FAILURE(AddCatRule(&state));
+  ASSERT_NO_FATAL_FAILURE(AssertParse(&state, manifest));
+
+  BuildLog build_log;
+  ASSERT_TRUE(build_log.Load("build_log", &err));
+  ASSERT_TRUE(build_log.OpenForWrite("build_log", *this, &err));
+
+  DepsLog deps_log;
+  ASSERT_TRUE(deps_log.Load("ninja_deps", &state, &err));
+  ASSERT_TRUE(deps_log.OpenForWrite("ninja_deps", &err));
+
+  BuildLog::LogEntry* log_entry = NULL;
+  {
+    Builder builder(&state, config_, &build_log, &deps_log, &fs_, &status_, 0);
+    builder.command_runner_.reset(&command_runner_);
+    command_runner_.commands_ran_.clear();
+
+    // Run the build, out gets built, dep file is created
+    EXPECT_TRUE(builder.AddTarget("out", &err));
+    ASSERT_EQ("", err);
+    EXPECT_TRUE(builder.Build(&err));
+    ASSERT_EQ(1u, command_runner_.commands_ran_.size());
+
+    // See that an entry in the logfile is created. the input_mtime is 1 since that was
+    // the mtime of in1 when the command was started
+    log_entry = build_log.LookupByOutput("out");
+    ASSERT_TRUE(NULL != log_entry);
+    ASSERT_EQ(1u, log_entry->mtime);
+
+    builder.command_runner_.release();
+  }
+
+  {
+    Builder builder(&state, config_, &build_log, &deps_log, &fs_, &status_, 0);
+    builder.command_runner_.reset(&command_runner_);
+    command_runner_.commands_ran_.clear();
+
+    // Trigger the build again - "out" should rebuild despite having a newer mtime than
+    // "in1", since "in1" was touched during the build of out (simulated by changing its
+    // mtime in the the test builder's WaitForCommand() which runs before FinishCommand()
+    command_runner_.commands_ran_.clear();
+    state.Reset();
+    EXPECT_TRUE(builder.AddTarget("out", &err));
+    ASSERT_EQ("", err);
+    EXPECT_TRUE(builder.Build(&err));
+    ASSERT_EQ(1u, command_runner_.commands_ran_.size());
+
+    // Check that the logfile entry is still correct
+    log_entry = build_log.LookupByOutput("out");
+    ASSERT_TRUE(NULL != log_entry);
+    ASSERT_EQ(fs_.files_["in1"].mtime, log_entry->mtime);
+    builder.command_runner_.release();
+  }
+
+  {
+    Builder builder(&state, config_, &build_log, &deps_log, &fs_, &status_, 0);
+    builder.command_runner_.reset(&command_runner_);
+    command_runner_.commands_ran_.clear();
+
+    // And a subsequent run should not have any work to do
+    command_runner_.commands_ran_.clear();
+    state.Reset();
+    EXPECT_TRUE(builder.AddTarget("out", &err));
+    ASSERT_EQ("", err);
+    EXPECT_TRUE(builder.AlreadyUpToDate());
+
+    builder.command_runner_.release();
+  }
+}
+
+TEST_F(BuildWithDepsLogTest, TestInputMtimeRaceConditionWithDepFile) {
+  string err;
+  const char* manifest =
+      "rule long-cc2\n"
+      "  command = long-cc2\n"
+      "build out: long-cc2 in1\n"
+      "  deps = gcc\n"
+      "  depfile = in1.d\n";
+
+  fs_.Create("header.h", "");
+  fs_.Create("in1.d", "out: header.h");
+
+  State state;
+  ASSERT_NO_FATAL_FAILURE(AddCatRule(&state));
+  ASSERT_NO_FATAL_FAILURE(AssertParse(&state, manifest));
+
+  BuildLog build_log;
+  ASSERT_TRUE(build_log.Load("build_log", &err));
+  ASSERT_TRUE(build_log.OpenForWrite("build_log", *this, &err));
+
+  DepsLog deps_log;
+  ASSERT_TRUE(deps_log.Load("ninja_deps", &state, &err));
+  ASSERT_TRUE(deps_log.OpenForWrite("ninja_deps", &err));
+
+  {
+    Builder builder(&state, config_, &build_log, &deps_log, &fs_, &status_, 0);
+    builder.command_runner_.reset(&command_runner_);
+    command_runner_.commands_ran_.clear();
+
+    // Run the build, out gets built, dep file is created
+    EXPECT_TRUE(builder.AddTarget("out", &err));
+    ASSERT_EQ("", err);
+    EXPECT_TRUE(builder.Build(&err));
+    ASSERT_EQ(1u, command_runner_.commands_ran_.size());
+
+    // See that an entry in the logfile is created. the mtime is 3 due to header.h being
+    // updated during the build of out (simulated by changing its mtime in the the test
+    // builder's WaitForCommand() which runs before FinishCommand()
+    BuildLog::LogEntry* log_entry = build_log.LookupByOutput("out");
+    ASSERT_TRUE(NULL != log_entry);
+    ASSERT_EQ(3u, log_entry->mtime);
+
+    builder.command_runner_.release();
+  }
+
+  {
+    // Trigger the build again - "out" won't rebuild since its newest mtime (header.h)
+    // wasn't known when out was originally built and was only discovered via the deps file
+    // when the command completed. Subsequent runs will see out's recorded mtime equal to
+    // the actual most recent mtime.
+    Builder builder(&state, config_, &build_log, &deps_log, &fs_, &status_, 0);
+    builder.command_runner_.reset(&command_runner_);
+    command_runner_.commands_ran_.clear();
+
+    command_runner_.commands_ran_.clear();
+    state.Reset();
+    EXPECT_TRUE(builder.AddTarget("out", &err));
+    ASSERT_EQ("", err);
+    EXPECT_TRUE(builder.AlreadyUpToDate());
+
+    builder.command_runner_.release();
+  }
+}
+
 /// Check that a restat rule generating a header cancels compilations correctly.
 TEST_F(BuildTest, RestatDepfileDependency) {
   ASSERT_NO_FATAL_FAILURE(AssertParse(&state_,
diff --git a/src/graph.cc b/src/graph.cc
index 90e8d08..9e8eda6 100644
--- a/src/graph.cc
+++ b/src/graph.cc
@@ -143,10 +143,12 @@
     }
   }
 
+  edge->most_recent_input_ = most_recent_input;
+
   // We may also be dirty due to output state: missing outputs, out of
   // date outputs, etc.  Visit all outputs and determine whether they're dirty.
   if (!dirty)
-    if (!RecomputeOutputsDirty(edge, most_recent_input, &dirty, err))
+    if (!RecomputeOutputsDirty(edge, &dirty, err))
       return false;
 
   // Finally, visit each output and update their dirty state if necessary.
@@ -212,12 +214,11 @@
   return false;
 }
 
-bool DependencyScan::RecomputeOutputsDirty(Edge* edge, Node* most_recent_input,
-                                           bool* outputs_dirty, string* err) {
+bool DependencyScan::RecomputeOutputsDirty(Edge* edge, bool* outputs_dirty, string* err) {
   string command = edge->EvaluateCommand(/*incl_rsp_file=*/true);
   for (vector<Node*>::iterator o = edge->outputs_.begin();
        o != edge->outputs_.end(); ++o) {
-    if (RecomputeOutputDirty(edge, most_recent_input, command, *o)) {
+    if (RecomputeOutputDirty(edge, command, *o)) {
       *outputs_dirty = true;
       return true;
     }
@@ -226,7 +227,6 @@
 }
 
 bool DependencyScan::RecomputeOutputDirty(const Edge* edge,
-                                          const Node* most_recent_input,
                                           const string& command,
                                           Node* output) {
   if (edge->is_phony()) {
@@ -248,30 +248,7 @@
     return true;
   }
 
-  // Dirty if the output is older than the input.
-  if (most_recent_input && output->mtime() < most_recent_input->mtime()) {
-    TimeStamp output_mtime = output->mtime();
-
-    // If this is a restat rule, we may have cleaned the output with a restat
-    // rule in a previous run and stored the most recent input mtime in the
-    // build log.  Use that mtime instead, so that the file will only be
-    // considered dirty if an input was modified since the previous run.
-    bool used_restat = false;
-    if (edge->GetBindingBool("restat") && build_log() &&
-        (entry = build_log()->LookupByOutput(output->path()))) {
-      output_mtime = entry->mtime;
-      used_restat = true;
-    }
-
-    if (output_mtime < most_recent_input->mtime()) {
-      EXPLAIN("%soutput %s older than most recent input %s "
-              "(%" PRId64 " vs %" PRId64 ")",
-              used_restat ? "restat of " : "", output->path().c_str(),
-              most_recent_input->path().c_str(),
-              output_mtime, most_recent_input->mtime());
-      return true;
-    }
-  }
+  const Node* most_recent_input = edge->most_recent_input_;
 
   if (build_log()) {
     bool generator = edge->GetBindingBool("generator");
@@ -299,6 +276,14 @@
       EXPLAIN("command line not found in log for %s", output->path().c_str());
       return true;
     }
+  } else if (most_recent_input &&
+             output->mtime() < most_recent_input->mtime()) {
+      EXPLAIN(
+          "output %s older than most recent input %s "
+          "(%" PRId64 " vs %" PRId64 ")",
+          output->path().c_str(), most_recent_input->path().c_str(),
+          output->mtime(), most_recent_input->mtime());
+    return true;
   }
 
   return false;
diff --git a/src/graph.h b/src/graph.h
index 6756378..b7187ad 100644
--- a/src/graph.h
+++ b/src/graph.h
@@ -146,6 +146,7 @@
 
   Edge()
       : rule_(NULL), pool_(NULL), dyndep_(NULL), env_(NULL), mark_(VisitNone),
+        most_recent_input_(NULL), most_recent_input_mtime_(0),
         id_(0), outputs_ready_(false), deps_loaded_(false),
         deps_missing_(false), implicit_deps_(0), order_only_deps_(0),
         implicit_outs_(0) {}
@@ -178,6 +179,8 @@
   Node* dyndep_;
   BindingEnv* env_;
   VisitMark mark_;
+  Node* most_recent_input_;
+  TimeStamp most_recent_input_mtime_;
   size_t id_;
   bool outputs_ready_;
   bool deps_loaded_;
@@ -298,8 +301,7 @@
 
   /// Recompute whether any output of the edge is dirty, if so sets |*dirty|.
   /// Returns false on failure.
-  bool RecomputeOutputsDirty(Edge* edge, Node* most_recent_input,
-                             bool* dirty, std::string* err);
+  bool RecomputeOutputsDirty(Edge* edge, bool* dirty, std::string* err);
 
   BuildLog* build_log() const {
     return build_log_;
@@ -325,8 +327,8 @@
 
   /// Recompute whether a given single output should be marked dirty.
   /// Returns true if so.
-  bool RecomputeOutputDirty(const Edge* edge, const Node* most_recent_input,
-                            const std::string& command, Node* output);
+  bool RecomputeOutputDirty(const Edge* edge, const std::string& command,
+                            Node* output);
 
   BuildLog* build_log_;
   DiskInterface* disk_interface_;