blob: e2813b89853e817593dda47438d2a4c4c155c987 [file] [log] [blame]
// Copyright 2020 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// ninjagraph provides utilities to parse the DOT output from Ninja's `-t graph`
// tool to a Go native format.
package ninjagraph
import (
"bufio"
"fmt"
"io"
"regexp"
"runtime"
"strconv"
"strings"
"sync"
"time"
"go.fuchsia.dev/fuchsia/tools/build/ninjago/ninjalog"
"golang.org/x/sync/errgroup"
)
var (
// Example: "0x1234567" [label="../../path/to/myfile.cc"]
nodePattern = regexp.MustCompile(`^"0x([0-9a-f"]+)" \[label="([^"]+)"`)
// Example:
// "0x1234567" -> "0x7654321"
// or
// "0x1234567" -> "0x7654321" [label=" host_x64_cxx"]
//
// Labels for edges currently have a leading space. Optionally match it just
// in case it's removed in the future.
// https://github.com/ninja-build/ninja/blob/6c5e886aacd98766fe43539c2c8ae7f3ca2af2aa/src/graphviz.cc#L53
edgePattern = regexp.MustCompile(`^"0x([0-9a-f]+)" -> "0x([0-9a-f]+)"(?: \[label=" ?([^"]+)")?`)
)
// graphvizNode is a node in Ninja's Graphviz output.
//
// It is possible for a graphviz node to represent an edge with multiple
// inputs/outputs in Ninja's build graph.
type graphvizNode struct {
id int64
label string
// isNinjaEdge is true if this Graphviz node represents a Ninja build edge.
isNinjaEdge bool
}
// graphvizEdge is an edge in Ninja's Graphviz output.
type graphvizEdge struct {
from, to int64
// If label is non-empty, this edge represents a build edge in Ninja's build
// graph, and label is the rule for this build edge.
label string
}
func graphvizNodeFrom(line string) (graphvizNode, bool, error) {
match := nodePattern.FindStringSubmatch(line)
if match == nil {
return graphvizNode{}, false, nil
}
id, err := strconv.ParseInt(match[1], 16, 64)
if err != nil {
return graphvizNode{}, false, err
}
return graphvizNode{
id: id,
label: match[2],
// A Graphviz node can represent a Ninja build edge with multiple inputs and
// outputs, in this case Ninja draws a ellipse instead of a rectangle.
isNinjaEdge: strings.HasSuffix(line, "shape=ellipse]"),
}, true, nil
}
func graphvizEdgeFrom(line string) (graphvizEdge, bool, error) {
match := edgePattern.FindStringSubmatch(line)
if match == nil {
return graphvizEdge{}, false, nil
}
from, err := strconv.ParseInt(match[1], 16, 64)
if err != nil {
return graphvizEdge{}, false, err
}
to, err := strconv.ParseInt(match[2], 16, 64)
if err != nil {
return graphvizEdge{}, false, err
}
var label string
if len(match) > 3 {
label = match[3]
}
return graphvizEdge{
from: from,
to: to,
label: label,
}, true, nil
}
// Node is a node in Ninja's build graph. It represents an input/ouput file in
// the build.
type Node struct {
// ID is an unique ID of this node.
ID int64
// Path is the path to the input/output file this node represents.
Path string
// In is the edge that produced this node. Nil if this node is not produced by
// any rule.
In *Edge
// Outs contains all edges that use this node as input.
Outs []*Edge
// Fields below are used to memoize search results while looking up the
// critical path.
// criticalInput points to the one of inputs used to produce this node, which
// took the longest time to build.
criticalInput *Node
// criticalBuildDuration is the sum of build durations of all edges along the
// critical path that produced this output.
criticalBuildDuration *time.Duration
}
// Edge is an edge in Ninja's build graph. It links nodes with a build rule.
type Edge struct {
Inputs []int64
Outputs []int64
Rule string
// Fields below are populated after `PopulateEdges`.
// Step is a build step associated with this edge. It can be derived from
// joining with ninja_log. After the join, all steps on non-phony edges are
// populated.
Step *ninjalog.Step
// Fields to memoize earliest start and latest finish for critical path
// calculation based on float.
//
// https://en.wikipedia.org/wiki/Critical_path_method
earliestStart, latestFinish *time.Duration
}
// Graph is a Ninja build graph.
type Graph struct {
// Nodes maps from node IDs to nodes.
Nodes map[int64]*Node
// Edges contains all edges in the graph.
Edges []*Edge
// sink is an edge that marks the completion of the build.
//
// This edge will only be populated after `addSink`.
sink *Edge
}
// addEdge adds an edge to the graph and updates the related nodes accordingly.
func (g *Graph) addEdge(e *Edge) error {
for _, input := range e.Inputs {
n, ok := g.Nodes[input]
if !ok {
return fmt.Errorf("node %x not found, while an edge claims to use it as input", input)
}
n.Outs = append(n.Outs, e)
}
var outputs []int64
for _, output := range e.Outputs {
n, ok := g.Nodes[output]
if !ok {
// Skip this output because it is not included in the build graph.
//
// This is possible when a rule produces multiple outputs, and some of
// those outputs are not explicitly included in the build graph. For
// example, some rule produces both stripped and unstripped versions of a
// binary, and the unstripped ones are not included in the build graph.
continue
}
outputs = append(outputs, output)
if n.In != nil {
return fmt.Errorf("multiple edges claim to produce %x as output", output)
}
n.In = e
}
e.Outputs = outputs
g.Edges = append(g.Edges, e)
return nil
}
// FromDOT reads all lines from the input reader and parses it into a `Graph`.
//
// This function expects the content to be parsed to follow the Ninja log format,
// otherwise the results is undefined.
func FromDOT(r io.Reader) (Graph, error) {
var gNodes []graphvizNode
var gEdges []graphvizEdge
gNodesCh := make(chan []graphvizNode)
gEdgesCh := make(chan []graphvizEdge)
// If a Ninja build edge contains zero or multiple inputs and multiple outputs,
// it is represented as a Graphviz node + edges connected to input and output
// nodes. These two indexes are useful later when we convert the Graphviz
// representation back into a Ninja build edge.
edgesBySrc := make(map[int64][]graphvizEdge)
edgesByDst := make(map[int64][]graphvizEdge)
wg := sync.WaitGroup{}
go func() {
wg.Add(1)
defer wg.Done()
for ns := range gNodesCh {
gNodes = append(gNodes, ns...)
}
}()
go func() {
wg.Add(1)
defer wg.Done()
for es := range gEdgesCh {
gEdges = append(gEdges, es...)
for _, e := range es {
edgesBySrc[e.from] = append(edgesBySrc[e.from], e)
edgesByDst[e.to] = append(edgesByDst[e.to], e)
}
}
}()
eg := errgroup.Group{}
sem := make(chan struct{}, runtime.GOMAXPROCS(0)-2)
s := bufio.NewScanner(r)
for s.Scan() {
sem <- struct{}{}
// Chunk the lines to reduce channel IO, this significantly reduces runtime.
lines := []string{s.Text()}
for i := 0; i < 10_000 && s.Scan(); i++ {
lines = append(lines, s.Text())
}
eg.Go(func() error {
defer func() { <-sem }()
var ns []graphvizNode
var es []graphvizEdge
for _, line := range lines {
n, ok, err := graphvizNodeFrom(line)
if err != nil {
return err
}
if ok {
ns = append(ns, n)
continue
}
e, ok, err := graphvizEdgeFrom(line)
if err != nil {
return err
}
if ok {
es = append(es, e)
continue
}
}
gNodesCh <- ns
gEdgesCh <- es
return nil
})
}
if err := eg.Wait(); err != nil {
return Graph{}, err
}
close(gNodesCh)
close(gEdgesCh)
wg.Wait()
if err := s.Err(); err != nil && err != io.EOF {
return Graph{}, err
}
// We've done parsing of the Graphviz representation, now map it back to the
// actual Ninja build graph.
g := Graph{
Nodes: make(map[int64]*Node),
}
// Collect all Graphviz nodes that represent Ninja build edges and assemble
// them later.
var edgeNodes []graphvizNode
// Later steps require all nodes to be present in the graph, so this step
// cannot be done in parallel. This is fine because the number of nodes is
// usually much smaller than the number of edges.
for _, n := range gNodes {
if n.isNinjaEdge {
edgeNodes = append(edgeNodes, n)
continue
}
g.Nodes[n.id] = &Node{ID: n.id, Path: n.label}
}
wg.Add(2)
edges := make(chan []*Edge)
go func() {
defer wg.Done()
var es []*Edge
for _, edge := range gEdges {
if edge.label == "" {
continue
}
es = append(es, &Edge{
Inputs: []int64{edge.from},
Outputs: []int64{edge.to},
Rule: edge.label,
})
// Chunk the edges to reduce channel IO, this significantly reduces runtime.
if len(es) > 10_000 {
edges <- es
es = nil
}
}
edges <- es
}()
go func() {
defer wg.Done()
var es []*Edge
for _, n := range edgeNodes {
// If a Ninja build edge is represented as a Graphviz node, all the
// Graphviz edges pointing to it are connected to inputs, and all the
// Graphviz edges going out of it are connected to outputs.
e := &Edge{Rule: n.label}
for _, from := range edgesByDst[n.id] {
e.Inputs = append(e.Inputs, from.from)
}
for _, to := range edgesBySrc[n.id] {
e.Outputs = append(e.Outputs, to.to)
}
es = append(es, e)
// Chunk the edges to reduce channel IO, this significantly reduces runtime.
if len(es) > 10_000 {
edges <- es
es = nil
}
}
edges <- es
}()
go func() {
wg.Wait()
close(edges)
}()
for es := range edges {
for _, e := range es {
if err := g.addEdge(e); err != nil {
return Graph{}, err
}
}
}
return g, nil
}
// PopulateEdges joins the build steps from ninjalog with the graph and
// populates build time on edges.
//
// `steps` should be deduplicated, so they have a 1-to-1 mapping to edges.
// Although the mapping can be partial, which means not all edges from the graph
// have a corresponding step from the input.
//
// If any non-phony edges are missing steps, subsequent attempt to calculate
// critical path will fail. To avoid this, use `WithStepsOnly` to extract a
// partial graph.
func (g *Graph) PopulateEdges(steps []ninjalog.Step) error {
stepByOut := make(map[string]ninjalog.Step)
for _, step := range steps {
// Index all outputs, otherwise the edges can miss steps due to missing
// nodes. If steps are only indexed on their main output, and that output
// node is missing from the graph, we can't draw a link between the step and
// its corresponding edge.
//
// For example, if a step has its main output set to `foo`, and also
// produces `bar` and `baz`, we want to index this step on all three of the
// outputs. This way, if an edge missed `foo` in its output (possible when
// the output node is not included in the build graph), it can still be
// associated with the step since it can match on both other outputs.
for _, out := range append(step.Outs, step.Out) {
if conflict, ok := stepByOut[out]; ok {
return fmt.Errorf("multiple steps claim to produce the same output %s\nverbose debugging info:\n:step 1: %#v\ncommand 1: %#v\nstep 2: %#v\ncommand 2: %#v\n", out, conflict, conflict.Command, step, step.Command)
}
stepByOut[out] = step
}
}
for _, edge := range g.Edges {
// Skip "phony" builds because they don't actually run any build commands.
//
// For example "build default: phony obj/default.stamp" can be included in
// the graph.
if edge.Rule == "phony" {
continue
}
var nodes []*Node
for _, output := range edge.Outputs {
node := g.Nodes[output]
if node == nil {
return fmt.Errorf("node %x not found, yet an edge claims to produce it, invalid graph", output)
}
nodes = append(nodes, node)
}
// Look for the corresponding ninjalog step for this edge. We do this by
// matching outputs: for each output we look for the build step that
// produced it, and associate that with the edge. Along the way we also
// check all the outputs claimed by this edge is produced by the same step,
// so there is a 1-to-1 mapping between them.
var step *ninjalog.Step
for _, node := range nodes {
s, ok := stepByOut[node.Path]
if !ok {
break
}
if step != nil && step.CmdHash != s.CmdHash {
return fmt.Errorf("multiple steps match the same edge on outputs %v, previous step: %#v, this step: %#v", pathsOf(nodes), step, s)
}
step = &s
}
// Steps can be missing if they are from a partial build, for example
// incremental builds and failed builds. In this case, some edges in the
// graphs have not been reached.
if step != nil {
edge.Step = step
}
}
return nil
}
func pathsOf(nodes []*Node) []string {
var paths []string
for _, n := range nodes {
paths = append(paths, n.Path)
}
return paths
}
// WithStepsOnly returns an extracted subgraph that contains edges that either
// have `Step`s associated with them, or are phonies. Only nodes with edges
// connected to them are kept in the returned graph.
//
// This function is useful, after `Step`s are populated, for extracting a
// partial build graph from, for example, incremental builds and failed builds.
//
// If this function returns successfully, the returned Graph is guaranteed to
// have steps associated to all non-phony edges. All phony edges are kept to
// preserve dependencies.
func WithStepsOnly(g Graph) (Graph, error) {
subGraph := Graph{
Nodes: make(map[int64]*Node),
}
for _, edge := range g.Edges {
// Steps can be missing when analyzing partial builds, for example
// incremental and failed builds. Missing a step means this edge is not
// reached in this partial build (build command for this edge is not
// executed), so we simply omit them.
if edge.Rule != "phony" && edge.Step == nil {
continue
}
subGraph.Edges = append(subGraph.Edges, &Edge{
// Avoid reusing the existing edge or copying over memoized pointer
// fields, so when memoized fields are set on the new graph, it won't
// affect the old one.
Inputs: edge.Inputs,
Outputs: edge.Outputs,
Rule: edge.Rule,
Step: edge.Step,
})
}
for _, edge := range subGraph.Edges {
for _, id := range edge.Inputs {
node, ok := subGraph.Nodes[id]
if !ok {
n, ok := g.Nodes[id]
if !ok {
return Graph{}, fmt.Errorf("node %x not found, yet an edge claims it as input, invalid graph", id)
}
node = &Node{
// Avoid reusing the existing node or copying over memoized pointer
// fields, so when memoized fields are set on the new graph, it won't
// affect the old one.
ID: n.ID,
Path: n.Path,
}
subGraph.Nodes[id] = node
}
node.Outs = append(node.Outs, edge)
}
for _, id := range edge.Outputs {
node, ok := subGraph.Nodes[id]
if !ok {
n, ok := g.Nodes[id]
if !ok {
return Graph{}, fmt.Errorf("node %x not found, yet an edge claims to produce it, invalid graph", id)
}
node = &Node{
// Avoid reusing the existing node or copying over memoized pointer
// fields, so when memoized fields are set on the new graph, it won't
// affect the old one.
ID: n.ID,
Path: n.Path,
}
subGraph.Nodes[id] = node
}
if node.In != nil {
return Graph{}, fmt.Errorf("multiple edges claim to produce %x as output, invalid graph", id)
}
node.In = edge
}
}
return subGraph, nil
}