Add a -t missingdeps tool to detect some classes of build flakes

The tool looks for targets that depend on a generated file, but do not
properly specify a dependency on the generator. It needs to be run after
a successful build, and will list all potential flakes that may have
broken the build, but didn't due to accidental build step ordering.

The search relies on the correctness of depfile and generator output
information, but these are usually easier to get right than dependencies.

The errors found can usually be verified as actual build flakes by trying
to build the listed problematic files alone in a clean build directory.
Such builds usually fail with a compile job lacking a generated file.

There is some overlap between this tool and 'gn check', but not everyone
uses gn, not everyone using gn uses gn check, and most importantly, gn
check is more about modularity, and less about actual build-time deps
without flakes.

The tool needs to be run after a build completes and depfile data is
collected. It may take several seconds to process, up to a dozen or
two on a large, chromium-sized build.
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 39348c9..e6377e0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -93,6 +93,7 @@
 	src/line_printer.cc
 	src/manifest_parser.cc
 	src/metrics.cc
+	src/missing_deps.cc
 	src/parser.cc
 	src/state.cc
 	src/string_piece_util.cc
@@ -179,6 +180,7 @@
     src/graph_test.cc
     src/lexer_test.cc
     src/manifest_parser_test.cc
+    src/missing_deps_test.cc
     src/ninja_test.cc
     src/state_test.cc
     src/string_piece_util_test.cc
diff --git a/configure.py b/configure.py
index cded265..647b5b0 100755
--- a/configure.py
+++ b/configure.py
@@ -511,6 +511,7 @@
              'line_printer',
              'manifest_parser',
              'metrics',
+             'missing_deps',
              'parser',
              'state',
              'string_piece_util',
@@ -577,6 +578,7 @@
              'graph_test',
              'lexer_test',
              'manifest_parser_test',
+             'missing_deps_test',
              'ninja_test',
              'state_test',
              'string_piece_util_test',
diff --git a/src/missing_deps.cc b/src/missing_deps.cc
new file mode 100644
index 0000000..a0fd048
--- /dev/null
+++ b/src/missing_deps.cc
@@ -0,0 +1,181 @@
+// Copyright 2019 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "missing_deps.h"
+
+#include <string.h>
+
+#include <iostream>
+
+#include "depfile_parser.h"
+#include "deps_log.h"
+#include "disk_interface.h"
+#include "graph.h"
+#include "state.h"
+#include "util.h"
+
+namespace {
+
+/// ImplicitDepLoader variant that stores dep nodes into the given output
+/// without updating graph deps like the base loader does.
+struct NodeStoringImplicitDepLoader : public ImplicitDepLoader {
+  NodeStoringImplicitDepLoader(
+      State* state, DepsLog* deps_log, DiskInterface* disk_interface,
+      DepfileParserOptions const* depfile_parser_options,
+      std::vector<Node*>* dep_nodes_output)
+      : ImplicitDepLoader(state, deps_log, disk_interface,
+                          depfile_parser_options),
+        dep_nodes_output_(dep_nodes_output) {}
+
+ protected:
+  virtual bool ProcessDepfileDeps(Edge* edge,
+                                  std::vector<StringPiece>* depfile_ins,
+                                  std::string* err);
+
+ private:
+  std::vector<Node*>* dep_nodes_output_;
+};
+
+bool NodeStoringImplicitDepLoader::ProcessDepfileDeps(
+    Edge* edge, std::vector<StringPiece>* depfile_ins, std::string* err) {
+  for (std::vector<StringPiece>::iterator i = depfile_ins->begin();
+       i != depfile_ins->end(); ++i) {
+    uint64_t slash_bits;
+    if (!CanonicalizePath(const_cast<char*>(i->str_), &i->len_, &slash_bits,
+                          err))
+      return false;
+    Node* node = state_->GetNode(*i, slash_bits);
+    dep_nodes_output_->push_back(node);
+  }
+  return true;
+}
+
+}  // namespace
+
+MissingDependencyScannerDelegate::~MissingDependencyScannerDelegate() {}
+
+void MissingDependencyPrinter::OnMissingDep(Node* node, const std::string& path,
+                                            const Rule& generator) {
+  std::cout << "Missing dep: " << node->path() << " uses " << path
+            << " (generated by " << generator.name() << ")\n";
+}
+
+MissingDependencyScanner::MissingDependencyScanner(
+    MissingDependencyScannerDelegate* delegate, DepsLog* deps_log, State* state,
+    DiskInterface* disk_interface)
+    : delegate_(delegate), deps_log_(deps_log), state_(state),
+      disk_interface_(disk_interface), missing_dep_path_count_(0) {}
+
+void MissingDependencyScanner::ProcessNode(Node* node) {
+  if (!node)
+    return;
+  Edge* edge = node->in_edge();
+  if (!edge)
+    return;
+  if (!seen_.insert(node).second)
+    return;
+
+  for (std::vector<Node*>::iterator in = edge->inputs_.begin();
+       in != edge->inputs_.end(); ++in) {
+    ProcessNode(*in);
+  }
+
+  std::string deps_type = edge->GetBinding("deps");
+  if (!deps_type.empty()) {
+    DepsLog::Deps* deps = deps_log_->GetDeps(node);
+    if (deps)
+      ProcessNodeDeps(node, deps->nodes, deps->node_count);
+  } else {
+    DepfileParserOptions parser_opts;
+    std::vector<Node*> depfile_deps;
+    NodeStoringImplicitDepLoader dep_loader(state_, deps_log_, disk_interface_,
+                                            &parser_opts, &depfile_deps);
+    std::string err;
+    dep_loader.LoadDeps(edge, &err);
+    if (!depfile_deps.empty())
+      ProcessNodeDeps(node, &depfile_deps[0], depfile_deps.size());
+  }
+}
+
+void MissingDependencyScanner::ProcessNodeDeps(Node* node, Node** dep_nodes,
+                                               int dep_nodes_count) {
+  Edge* edge = node->in_edge();
+  std::set<Edge*> deplog_edges;
+  for (int i = 0; i < dep_nodes_count; ++i) {
+    Node* deplog_node = dep_nodes[i];
+    Edge* deplog_edge = deplog_node->in_edge();
+    if (deplog_edge) {
+      deplog_edges.insert(deplog_edge);
+    }
+  }
+  std::vector<Edge*> missing_deps;
+  for (std::set<Edge*>::iterator de = deplog_edges.begin();
+       de != deplog_edges.end(); ++de) {
+    if (!PathExistsBetween(*de, edge)) {
+      missing_deps.push_back(*de);
+    }
+  }
+
+  if (!missing_deps.empty()) {
+    std::set<std::string> missing_deps_rule_names;
+    for (std::vector<Edge*>::iterator ne = missing_deps.begin();
+         ne != missing_deps.end(); ++ne) {
+      for (int i = 0; i < dep_nodes_count; ++i) {
+        if (dep_nodes[i]->in_edge() == *ne) {
+          generated_nodes_.insert(dep_nodes[i]);
+          generator_rules_.insert(&(*ne)->rule());
+          missing_deps_rule_names.insert((*ne)->rule().name());
+          delegate_->OnMissingDep(node, dep_nodes[i]->path(), (*ne)->rule());
+        }
+      }
+    }
+    missing_dep_path_count_ += missing_deps_rule_names.size();
+    nodes_missing_deps_.insert(node);
+  }
+}
+
+void MissingDependencyScanner::PrintStats() {
+  std::cout << "Processed " << seen_.size() << " nodes.\n";
+  if (HadMissingDeps()) {
+    std::cout << "Error: There are " << missing_dep_path_count_
+              << " missing dependency paths.\n";
+    std::cout << nodes_missing_deps_.size()
+              << " targets had depfile dependencies on "
+              << generated_nodes_.size() << " distinct generated inputs "
+              << "(from " << generator_rules_.size() << " rules) "
+              << " without a non-depfile dep path to the generator.\n";
+    std::cout << "There might be build flakiness if any of the targets listed "
+                 "above are built alone, or not late enough, in a clean output "
+                 "directory.\n";
+  } else {
+    std::cout << "No missing dependencies on generated files found.\n";
+  }
+}
+
+bool MissingDependencyScanner::PathExistsBetween(Edge* from, Edge* to) {
+  EdgePair key(from, to);
+  EdgeAdjacencyMap::iterator it = edge_adjacency_map_.find(key);
+  if (it != edge_adjacency_map_.end())
+    return it->second;
+  bool found = false;
+  for (size_t i = 0; i < to->inputs_.size(); ++i) {
+    Edge* e = to->inputs_[i]->in_edge();
+    if (e && (e == from || PathExistsBetween(from, e))) {
+      found = true;
+      break;
+    }
+  }
+  edge_adjacency_map_.insert(std::make_pair(key, found));
+  return found;
+}
diff --git a/src/missing_deps.h b/src/missing_deps.h
new file mode 100644
index 0000000..211a865
--- /dev/null
+++ b/src/missing_deps.h
@@ -0,0 +1,105 @@
+// Copyright 2019 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef NINJA_MISSING_DEPS_H_
+#define NINJA_MISSING_DEPS_H_
+
+#include <map>
+#include <set>
+#include <string>
+
+#if __cplusplus >= 201103L
+#include <unordered_map>
+#endif
+
+struct DepsLog;
+struct DiskInterface;
+struct Edge;
+struct Node;
+struct Rule;
+struct State;
+
+class MissingDependencyScannerDelegate {
+ public:
+  virtual ~MissingDependencyScannerDelegate();
+  virtual void OnMissingDep(Node* node, const std::string& path,
+                            const Rule& generator) = 0;
+};
+
+class MissingDependencyPrinter : public MissingDependencyScannerDelegate {
+  void OnMissingDep(Node* node, const std::string& path, const Rule& generator);
+  void OnStats(int nodes_processed, int nodes_missing_deps,
+               int missing_dep_path_count, int generated_nodes,
+               int generator_rules);
+};
+
+struct EdgePair {
+  EdgePair(Edge* from, Edge* to) : from_(from), to_(to) {}
+  bool operator==(const EdgePair& other) const {
+    return from_ == other.from_ && to_ == other.to_;
+  }
+  bool operator<(const EdgePair& other) const {
+    return (from_ < other.from_) ||
+           ((from_ == other.from_) && (to_ < other.to_));
+  }
+  Edge* from_;
+  Edge* to_;
+};
+
+#if __cplusplus >= 201103L
+namespace std {
+template <>
+struct hash<EdgePair> {
+  size_t operator()(const EdgePair& k) const {
+    uintptr_t uint_from = uintptr_t(k.from_);
+    uintptr_t uint_to = uintptr_t(k.to_);
+    return hash<uintptr_t>()(uint_from ^ (uint_to >> 3));
+  }
+};
+}  // namespace std
+#endif  // __cplusplus >= 201103L
+
+struct MissingDependencyScanner {
+ public:
+  MissingDependencyScanner(MissingDependencyScannerDelegate* delegate,
+                           DepsLog* deps_log, State* state,
+                           DiskInterface* disk_interface);
+  void ProcessNode(Node* node);
+  void PrintStats();
+  bool HadMissingDeps() { return !nodes_missing_deps_.empty(); }
+
+  void ProcessNodeDeps(Node* node, Node** dep_nodes, int dep_nodes_count);
+
+  bool PathExistsBetween(Edge* from, Edge* to);
+
+  MissingDependencyScannerDelegate* delegate_;
+  DepsLog* deps_log_;
+  State* state_;
+  DiskInterface* disk_interface_;
+  std::set<Node*> seen_;
+  std::set<Node*> nodes_missing_deps_;
+  std::set<Node*> generated_nodes_;
+  std::set<const Rule*> generator_rules_;
+  int missing_dep_path_count_;
+
+ private:
+#if __cplusplus >= 201103L
+  using EdgeAdjacencyMap = std::unordered_map<EdgePair, bool>;
+#else
+  typedef std::map<EdgePair, bool> EdgeAdjacencyMap;
+#endif
+  EdgeAdjacencyMap edge_adjacency_map_;
+};
+
+#endif  // NINJA_MISSING_DEPS_H_
diff --git a/src/missing_deps_test.cc b/src/missing_deps_test.cc
new file mode 100644
index 0000000..7b62e6c
--- /dev/null
+++ b/src/missing_deps_test.cc
@@ -0,0 +1,162 @@
+// Copyright 2019 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <memory>
+
+#include "deps_log.h"
+#include "graph.h"
+#include "missing_deps.h"
+#include "state.h"
+#include "test.h"
+
+const char kTestDepsLogFilename[] = "MissingDepTest-tempdepslog";
+
+class MissingDependencyTestDelegate : public MissingDependencyScannerDelegate {
+  void OnMissingDep(Node* node, const std::string& path,
+                    const Rule& generator) {}
+};
+
+struct MissingDependencyScannerTest : public testing::Test {
+  MissingDependencyScannerTest()
+      : generator_rule_("generator_rule"), compile_rule_("compile_rule"),
+        scanner_(&delegate_, &deps_log_, &state_, &filesystem_) {
+    std::string err;
+    deps_log_.OpenForWrite(kTestDepsLogFilename, &err);
+    ASSERT_EQ("", err);
+  }
+
+  MissingDependencyScanner& scanner() { return scanner_; }
+
+  void RecordDepsLogDep(const std::string& from, const std::string& to) {
+    Node* node_deps[] = { state_.LookupNode(to) };
+    deps_log_.RecordDeps(state_.LookupNode(from), 0, 1, node_deps);
+  }
+
+  void ProcessAllNodes() {
+    std::string err;
+    std::vector<Node*> nodes = state_.RootNodes(&err);
+    EXPECT_EQ("", err);
+    for (std::vector<Node*>::iterator it = nodes.begin(); it != nodes.end();
+         ++it) {
+      scanner().ProcessNode(*it);
+    }
+  }
+
+  void CreateInitialState() {
+    EvalString deps_type;
+    deps_type.AddText("gcc");
+    compile_rule_.AddBinding("deps", deps_type);
+    generator_rule_.AddBinding("deps", deps_type);
+    Edge* header_edge = state_.AddEdge(&generator_rule_);
+    state_.AddOut(header_edge, "generated_header", 0);
+    Edge* compile_edge = state_.AddEdge(&compile_rule_);
+    state_.AddOut(compile_edge, "compiled_object", 0);
+  }
+
+  void CreateGraphDependencyBetween(const char* from, const char* to) {
+    Node* from_node = state_.LookupNode(from);
+    Edge* from_edge = from_node->in_edge();
+    state_.AddIn(from_edge, to, 0);
+  }
+
+  void AssertMissingDependencyBetween(const char* flaky, const char* generated,
+                                      Rule* rule) {
+    Node* flaky_node = state_.LookupNode(flaky);
+    ASSERT_EQ(1u, scanner().nodes_missing_deps_.count(flaky_node));
+    Node* generated_node = state_.LookupNode(generated);
+    ASSERT_EQ(1u, scanner().generated_nodes_.count(generated_node));
+    ASSERT_EQ(1u, scanner().generator_rules_.count(rule));
+  }
+
+  MissingDependencyTestDelegate delegate_;
+  Rule generator_rule_;
+  Rule compile_rule_;
+  DepsLog deps_log_;
+  State state_;
+  VirtualFileSystem filesystem_;
+  MissingDependencyScanner scanner_;
+};
+
+TEST_F(MissingDependencyScannerTest, EmptyGraph) {
+  ProcessAllNodes();
+  ASSERT_FALSE(scanner().HadMissingDeps());
+}
+
+TEST_F(MissingDependencyScannerTest, NoMissingDep) {
+  CreateInitialState();
+  ProcessAllNodes();
+  ASSERT_FALSE(scanner().HadMissingDeps());
+}
+
+TEST_F(MissingDependencyScannerTest, MissingDepPresent) {
+  CreateInitialState();
+  // compiled_object uses generated_header, without a proper dependency
+  RecordDepsLogDep("compiled_object", "generated_header");
+  ProcessAllNodes();
+  ASSERT_TRUE(scanner().HadMissingDeps());
+  ASSERT_EQ(1u, scanner().nodes_missing_deps_.size());
+  ASSERT_EQ(1u, scanner().missing_dep_path_count_);
+  AssertMissingDependencyBetween("compiled_object", "generated_header",
+                                 &generator_rule_);
+}
+
+TEST_F(MissingDependencyScannerTest, MissingDepFixedDirect) {
+  CreateInitialState();
+  // Adding the direct dependency fixes the missing dep
+  CreateGraphDependencyBetween("compiled_object", "generated_header");
+  RecordDepsLogDep("compiled_object", "generated_header");
+  ProcessAllNodes();
+  ASSERT_FALSE(scanner().HadMissingDeps());
+}
+
+TEST_F(MissingDependencyScannerTest, MissingDepFixedIndirect) {
+  CreateInitialState();
+  // Adding an indirect dependency also fixes the issue
+  Edge* intermediate_edge = state_.AddEdge(&generator_rule_);
+  state_.AddOut(intermediate_edge, "intermediate", 0);
+  CreateGraphDependencyBetween("compiled_object", "intermediate");
+  CreateGraphDependencyBetween("intermediate", "generated_header");
+  RecordDepsLogDep("compiled_object", "generated_header");
+  ProcessAllNodes();
+  ASSERT_FALSE(scanner().HadMissingDeps());
+}
+
+TEST_F(MissingDependencyScannerTest, CyclicMissingDep) {
+  CreateInitialState();
+  RecordDepsLogDep("generated_header", "compiled_object");
+  RecordDepsLogDep("compiled_object", "generated_header");
+  // In case of a cycle, both paths are reported (and there is
+  // no way to fix the issue by adding deps).
+  ProcessAllNodes();
+  ASSERT_TRUE(scanner().HadMissingDeps());
+  ASSERT_EQ(2u, scanner().nodes_missing_deps_.size());
+  ASSERT_EQ(2u, scanner().missing_dep_path_count_);
+  AssertMissingDependencyBetween("compiled_object", "generated_header",
+                                 &generator_rule_);
+  AssertMissingDependencyBetween("generated_header", "compiled_object",
+                                 &compile_rule_);
+}
+
+TEST_F(MissingDependencyScannerTest, CycleInGraph) {
+  CreateInitialState();
+  CreateGraphDependencyBetween("compiled_object", "generated_header");
+  CreateGraphDependencyBetween("generated_header", "compiled_object");
+  // The missing-deps tool doesn't deal with cycles in the graph, beacuse
+  // there will be an error loading the graph before we get to the tool.
+  // This test is to illustrate that.
+  std::string err;
+  std::vector<Node*> nodes = state_.RootNodes(&err);
+  ASSERT_NE("", err);
+}
+
diff --git a/src/ninja.cc b/src/ninja.cc
index eb97320..7af0941 100644
--- a/src/ninja.cc
+++ b/src/ninja.cc
@@ -37,11 +37,13 @@
 #include "deps_log.h"
 #include "clean.h"
 #include "debug_flags.h"
+#include "depfile_parser.h"
 #include "disk_interface.h"
 #include "graph.h"
 #include "graphviz.h"
 #include "manifest_parser.h"
 #include "metrics.h"
+#include "missing_deps.h"
 #include "state.h"
 #include "util.h"
 #include "version.h"
@@ -117,6 +119,7 @@
   int ToolGraph(const Options* options, int argc, char* argv[]);
   int ToolQuery(const Options* options, int argc, char* argv[]);
   int ToolDeps(const Options* options, int argc, char* argv[]);
+  int ToolMissingDeps(const Options* options, int argc, char* argv[]);
   int ToolBrowse(const Options* options, int argc, char* argv[]);
   int ToolMSVC(const Options* options, int argc, char* argv[]);
   int ToolTargets(const Options* options, int argc, char* argv[]);
@@ -523,6 +526,26 @@
   return 0;
 }
 
+int NinjaMain::ToolMissingDeps(const Options* options, int argc, char** argv) {
+  vector<Node*> nodes;
+  string err;
+  if (!CollectTargetsFromArgs(argc, argv, &nodes, &err)) {
+    Error("%s", err.c_str());
+    return 1;
+  }
+  RealDiskInterface disk_interface;
+  MissingDependencyPrinter printer;
+  MissingDependencyScanner scanner(&printer, &deps_log_, &state_,
+                                   &disk_interface);
+  for (vector<Node*>::iterator it = nodes.begin(); it != nodes.end(); ++it) {
+    scanner.ProcessNode(*it);
+  }
+  scanner.PrintStats();
+  if (scanner.HadMissingDeps())
+    return 3;
+  return 0;
+}
+
 int NinjaMain::ToolTargets(const Options* options, int argc, char* argv[]) {
   int depth = 1;
   if (argc >= 1) {
@@ -960,6 +983,8 @@
       Tool::RUN_AFTER_LOAD, &NinjaMain::ToolCommands },
     { "deps", "show dependencies stored in the deps log",
       Tool::RUN_AFTER_LOGS, &NinjaMain::ToolDeps },
+    { "missingdeps", "check deps log dependencies on generated files",
+      Tool::RUN_AFTER_LOGS, &NinjaMain::ToolMissingDeps },
     { "graph", "output graphviz dot file for targets",
       Tool::RUN_AFTER_LOAD, &NinjaMain::ToolGraph },
     { "query", "show inputs/outputs for a path",
@@ -985,7 +1010,7 @@
     printf("ninja subtools:\n");
     for (const Tool* tool = &kTools[0]; tool->name; ++tool) {
       if (tool->desc)
-        printf("%10s  %s\n", tool->name, tool->desc);
+        printf("%11s  %s\n", tool->name, tool->desc);
     }
     return NULL;
   }