tools/build/ninjago/ninjalog/ninjalog.go - fuchsia - Git at Google

 // Copyright 2014 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 package ninjalog

 import (
 	"bufio"
 	"container/heap"
 	"encoding/binary"
 	"encoding/json"
 	"fmt"
 	"io"
 	"path"
 	"regexp"
 	"sort"
 	"strconv"
 	"strings"
 	"time"

 	"github.com/google/shlex"

 	"go.fuchsia.dev/fuchsia/tools/build/ninjago/compdb"
 )

 // Step is one step in ninja_log file.
 // time is measured from ninja start time.
 type Step struct {
 	Start time.Duration
 	End   time.Duration
 	// modification time, but not convertable to absolute real time.
 	// on POSIX, time_t is used, but on Windows different type is used.
 	// htts://github.com/martine/ninja/blob/HEAD/src/timestamp.h
 	Restat  int
 	Out     string
 	CmdHash uint64

 	// other outs for the same CmdHash if dedup'ed.
 	Outs []string
 	// compilation database command
 	Command *compdb.Command

 	// Whether ths step is on the critical path.
 	OnCriticalPath bool
 	// TotalFloat is the amount of time this step can be delayed without affecting
 	// the completion time of the build.
 	//
 	// https://en.wikipedia.org/wiki/Float_(project_management)
 	TotalFloat time.Duration
 	// Drag is the amount of time this step is adding to the total build time. All
 	// non-critial steps have zero drag.
 	//
 	// https://en.wikipedia.org/wiki/Critical_path_drag
 	Drag time.Duration
 }

 // Duration reports step's duration.
 func (s Step) Duration() time.Duration {
 	return s.End - s.Start
 }

 // AllOutputs return all outputs of this step.
 func (s Step) AllOutputs() []string {
 	return append(s.Outs, s.Out)
 }

 var pythonRE = regexp.MustCompile(`^python(\d(\.\d+)?)?$`)

 // extractPythonScript returns the first python script in input tokens.
 func extractPythonScript(tokens []string) string {
 	for _, t := range tokens {
 		if c := baseCmd(t); strings.HasSuffix(c, ".py") {
 			return c
 		}
 	}
 	return ""
 }

 func isWrapperPythonScript(script string) bool {
 	for _, s := range []string{"action_tracer.py", "output_cacher.py"} {
 		if strings.HasSuffix(script, s) {
 			return true
 		}
 	}
 	return false
 }

 // extractCommand extracts the actual script/binary being run from a full
 // command. This function includes logic to handle special wrapper scripts used
 // in Fuchsia's build.
 //
 // This function is best-effort, if nothing can be successfully extracted, an
 // empty string is returned.
 func extractCommand(cmd []string) string {
 	for i, token := range cmd {
 		c := baseCmd(token)
 		if c == "env" {
 			continue
 		}

 		// gn_run_binary.sh is a script to run an arbitrary binary produced by the
 		// current build.
 		//
 		// First argument to gn_run_binary.sh is the bin directory of the toolchain,
 		// skip it to take the second argument, which is the binary to run.
 		if c == "gn_run_binary.sh" {
 			if i+2 < len(cmd) {
 				return baseCmd(cmd[i+2])
 			}
 			break
 		}

 		if pythonRE.MatchString(c) {
 			if script := extractPythonScript(cmd); script != "" {
 				if !isWrapperPythonScript(script) {
 					return script
 				}
 				// The top level command we extracted is one of the wrapper python
 				// scripts, the actual command is after --, so abandon everything before
 				// that.
 				for j := i + 1; j < len(cmd); j++ {
 					if cmd[j] == "--" && j+1 < len(cmd) {
 						return extractCommand(cmd[j+1:])
 					}
 				}
 			}
 			break
 		}

 		return c
 	}
 	return ""
 }

 func baseCmd(cmd string) string {
 	return strings.Trim(path.Base(cmd), "()")
 }

 // category returns a comma separated list of executed commands.
 func (s Step) Category() string {
 	if s.Command == nil {
 		return "unknown"
 	}
 	tokens, err := shlex.Split(s.Command.Command)
 	if err != nil {
 		return "invalid command"
 	}

 	var commands [][]string
 	start := 0
 	for i, t := range tokens {
 		if t == "||" || t == "&&" {
 			commands = append(commands, tokens[start:i])
 			start = i + 1
 		}
 	}
 	if start < len(tokens) {
 		commands = append(commands, tokens[start:])
 	}

 	var categories []string
 	for _, command := range commands {
 		if c := extractCommand(command); c != "" {
 			categories = append(categories, c)
 		}
 	}
 	return strings.Join(categories, ",")
 }

 // Steps is a list of Step.
 // It could be used to sort by start time.
 type Steps []Step

 func (s Steps) Len() int      { return len(s) }
 func (s Steps) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
 func (s Steps) Less(i, j int) bool {
 	if s[i].Start != s[j].Start {
 		return s[i].Start < s[j].Start
 	}
 	if s[i].End != s[j].End {
 		return s[i].End < s[j].End
 	}
 	return s[i].Out < s[j].Out
 }

 // Reverse reverses steps.
 // It would be more efficient if steps is already sorted than using sort.Reverse.
 func (s Steps) Reverse() {
 	for i, j := 0, len(s)-1; i < j; i, j = i+1, j-1 {
 		s[i], s[j] = s[j], s[i]
 	}
 }

 // ByEnd is used to sort by end time.
 type ByEnd struct{ Steps }

 func (s ByEnd) Less(i, j int) bool { return s.Steps[i].End < s.Steps[j].End }

 // ByDuration is used to sort by duration.
 type ByDuration struct{ Steps }

 func (s ByDuration) Less(i, j int) bool { return s.Steps[i].Duration() < s.Steps[j].Duration() }

 // ByWeightedTime is used to sort by weighted time.
 type ByWeightedTime struct {
 	Weighted map[string]time.Duration
 	Steps
 }

 func (s ByWeightedTime) Less(i, j int) bool {
 	return s.Weighted[s.Steps[i].Out] < s.Weighted[s.Steps[j].Out]
 }

 // Metadata is data added by compile.py.
 type Metadata struct {
 	// Platform is platform of buildbot.
 	Platform string `json:"platform"`

 	// Argv is argv of compile.py
 	Argv []string `json:"argv"`

 	// Cwd is current working directory of compile.py
 	Cwd string `json:"cwd"`

 	// Compiler is compiler used.
 	Compiler string `json:"compiler"`

 	// Cmdline is command line of ninja.
 	Cmdline []string `json:"cmdline"`

 	// Exit is exit status of ninja.
 	Exit int `json:"exit"`

 	// Env is environment variables.
 	Env map[string]string `json:"env"`

 	// CompilerProxyInfo is a path name of associated compiler_proxy.INFO log.
 	CompilerProxyInfo string `json:"compiler_proxy_info"`

 	// Raw is raw string for metadata.
 	Raw string
 	// Error is error message of parsing metadata.
 	Error string
 }

 // NinjaLog is parsed data of ninja_log file.
 type NinjaLog struct {
 	// Filename is a filename of ninja_log.
 	Filename string

 	// Start is start line of the last build in ninja_log file.
 	Start int

 	// Steps contains steps in the last build in ninja_log file.
 	Steps []Step

 	// Metadata is additional data found in ninja_log file.
 	Metadata Metadata
 }

 // Parse parses .ninja_log file, with chromium's compile.py metadata.
 func Parse(fname string, r io.Reader) (*NinjaLog, error) {
 	b := bufio.NewReader(r)
 	scanner := bufio.NewScanner(b)
 	nlog := &NinjaLog{Filename: fname}
 	lineno := 0
 	if !scanner.Scan() {
 		if err := scanner.Err(); err != nil {
 			return nil, err
 		}
 		return nil, fmt.Errorf("empty file?")
 	}
 	lineno++
 	line := scanner.Text()
 	if line != "# ninja log v5" {
 		return nil, fmt.Errorf("unexpected format: %s", line)
 	}
 	nlog.Start = lineno
 	var lastStep Step
 	for scanner.Scan() {
 		line := scanner.Text()
 		if line == "# end of ninja log" {
 			break
 		}
 		if line == "" {
 			continue
 		}
 		step, err := lineToStep(line)
 		if err != nil {
 			return nil, fmt.Errorf("error at %d: %w", lineno, err)
 		}
 		if step.End < lastStep.End {
 			nlog.Start = lineno
 			nlog.Steps = nil
 		}
 		nlog.Steps = append(nlog.Steps, step)
 		lastStep = step
 		lineno++
 	}
 	if err := scanner.Err(); err != nil {
 		return nil, fmt.Errorf("error at %d: %w", lineno, err)
 	}
 	if !scanner.Scan() {
 		if err := scanner.Err(); err != nil {
 			return nil, fmt.Errorf("error at %d: %w", lineno, err)
 		}
 		// missing metadata?
 		return nlog, nil
 	}
 	lineno++
 	nlog.Metadata.Raw = scanner.Text()
 	if err := parseMetadata([]byte(nlog.Metadata.Raw), &nlog.Metadata); err != nil {
 		nlog.Metadata.Error = fmt.Sprintf("error at %d: %v", lineno, err)
 	}
 	return nlog, nil
 }

 func lineToStep(line string) (Step, error) {
 	var step Step

 	// Due to slowness of strings.Split in App Engine Go,
 	// we use more faster implementation.
 	fields := make([]string, 0, 5)
 	for i := 0; i < 5; i += 1 {
 		m := strings.IndexByte(line, '\t')
 		if m < 0 {
 			m = len(line)
 		}
 		fields = append(fields, line[:m])
 		if m < len(line) {
 			line = line[m+1:]
 		}
 	}

 	if len(fields) < 5 {
 		return step, fmt.Errorf("few fields:%d", len(fields))
 	}
 	s, err := strconv.ParseUint(fields[0], 10, 0)
 	if err != nil {
 		return step, fmt.Errorf("bad start %s:%w", fields[0], err)
 	}
 	e, err := strconv.ParseUint(fields[1], 10, 0)
 	if err != nil {
 		return step, fmt.Errorf("bad end %s:%w", fields[1], err)
 	}
 	rs, err := strconv.ParseUint(fields[2], 10, 0)
 	if err != nil {
 		return step, fmt.Errorf("bad restat %s:%w", fields[2], err)
 	}
 	step.Start = time.Duration(s) * time.Millisecond
 	step.End = time.Duration(e) * time.Millisecond
 	step.Restat = int(rs)
 	step.Out = fields[3]
 	ch, err := strconv.ParseUint(fields[4], 16, 64)
 	if err != nil {
 		return step, fmt.Errorf("bad hash %s:%w", fields[4], err)
 	}
 	step.CmdHash = ch
 	return step, nil
 }

 func stepToLine(s Step) string {
 	return fmt.Sprintf("%d\t%d\t%d\t%s\t%x",
 		s.Start.Nanoseconds()/int64(time.Millisecond),
 		s.End.Nanoseconds()/int64(time.Millisecond),
 		s.Restat,
 		s.Out,
 		s.CmdHash)
 }

 func parseMetadata(buf []byte, metadata *Metadata) error {
 	return json.Unmarshal(buf, metadata)
 }

 // Dump dumps steps as ninja log v5 format in w.
 func Dump(w io.Writer, steps []Step) error {
 	_, err := fmt.Fprintf(w, "# ninja log v5\n")
 	if err != nil {
 		return err
 	}
 	for _, s := range steps {
 		_, err = fmt.Fprintln(w, stepToLine(s))
 		if err != nil {
 			return err
 		}
 	}
 	return nil
 }

 // Dedup dedupes steps. step may have the same cmd hash.
 // Dedup only returns the first step for these steps.
 // steps will be sorted by start time.
 func Dedup(steps []Step) []Step {
 	m := make(map[uint64]*Step)
 	sort.Sort(Steps(steps))
 	var dedup []Step
 	for _, s := range steps {
 		if os := m[s.CmdHash]; os != nil {
 			os.Outs = append(os.Outs, s.Out)
 			continue
 		}
 		dedup = append(dedup, s)
 		m[s.CmdHash] = &dedup[len(dedup)-1]
 	}
 	return dedup
 }

 // MurmurHash64A computes Murmur hash using the same exact
 // algorithm and seed that is used by Ninja in .ninja_log.
 // See https://github.com/ninja-build/ninja/blob/cc79afb/src/build_log.cc#L64
 func MurmurHash64A(data []byte) uint64 {
 	const seed = uint64(0xDECAFBADDECAFBAD)
 	const m = uint64(0xc6a4a7935bd1e995)
 	const r = 47
 	h := seed ^ (uint64(len(data)) * m)
 	for len(data) >= 8 {
 		k := binary.LittleEndian.Uint64(data[0:])
 		k *= m
 		k ^= k >> r
 		k *= m
 		h ^= k
 		h *= m
 		data = data[8:]
 	}
 	switch len(data) & 7 {
 	case 7:
 		h ^= uint64(data[6]) << 48
 		fallthrough
 	case 6:
 		h ^= uint64(data[5]) << 40
 		fallthrough
 	case 5:
 		h ^= uint64(data[4]) << 32
 		fallthrough
 	case 4:
 		h ^= uint64(data[3]) << 24
 		fallthrough
 	case 3:
 		h ^= uint64(data[2]) << 16
 		fallthrough
 	case 2:
 		h ^= uint64(data[1]) << 8
 		fallthrough
 	case 1:
 		h ^= uint64(data[0])
 		h *= m
 	}
 	h ^= h >> r
 	h *= m
 	h ^= h >> r
 	return h
 }

 // Populate the steps with information from the compilation database.
 func Populate(steps []Step, commands []compdb.Command) []Step {
 	m := make(map[uint64]compdb.Command)
 	// TODO(phosek): consider computing hashes in parallel
 	for _, c := range commands {
 		m[MurmurHash64A([]byte(c.Command))] = c
 	}
 	var populated []Step
 	for _, s := range steps {
 		if c, ok := m[s.CmdHash]; ok {
 			s.Command = &c
 		}
 		populated = append(populated, s)
 	}
 	return populated
 }

 // TotalTime returns startup time and end time of ninja, and accumulated time
 // of all tasks.
 func TotalTime(steps []Step) (startupTime, endTime, cpuTime time.Duration) {
 	if len(steps) == 0 {
 		return 0, 0, 0
 	}
 	steps = Dedup(steps)
 	startup := steps[0].Start
 	var end time.Duration
 	for _, s := range steps {
 		if s.Start < startup {
 			startup = s.Start
 		}
 		if s.End > end {
 			end = s.End
 		}
 		cpuTime += s.Duration()
 	}
 	return startup, end, cpuTime
 }

 // Flow returns concurrent steps by time.
 // steps in every []Step will not have time overlap.
 // steps will be sorted by start time.
 func Flow(steps []Step) [][]Step {
 	sort.Sort(Steps(steps))
 	var threads [][]Step

 	for _, s := range steps {
 		tid := -1
 		for i, th := range threads {
 			if len(th) == 0 {
 				panic(fmt.Errorf("thread %d has no entry", i))
 			}
 			if th[len(th)-1].End <= s.Start {
 				tid = i
 				break
 			}
 		}
 		if tid == -1 {
 			threads = append(threads, nil)
 			tid = len(threads) - 1
 		}
 		threads[tid] = append(threads[tid], s)
 	}
 	return threads
 }

 // action represents an event's action. "start" or "end".
 type action string

 const (
 	unknownAction action = ""
 	startAction   action = "start"
 	stopAction    action = "stop"
 )

 // event is an event of steps.
 type event struct {
 	time   time.Duration
 	action action
 	target string
 }

 // toEvent converts steps into events.
 // events are sorted by its time.
 func toEvent(steps []Step) []event {
 	var events []event
 	for _, s := range steps {
 		events = append(events,
 			event{
 				time:   s.Start,
 				action: startAction,
 				target: s.Out,
 			},
 			event{
 				time:   s.End,
 				action: stopAction,
 				target: s.Out,
 			},
 		)
 	}
 	sort.Slice(events, func(i, j int) bool {
 		if events[i].time == events[j].time {
 			// If a task starts and stops on the same time stamp
 			// then the start will come first.
 			return events[i].action < events[j].action
 		}
 		return events[i].time < events[j].time
 	})
 	return events
 }

 // WeightedTime calculates weighted time, which is elapsed time with
 // each segment divided by the number of tasks that were running in paralle.
 // This makes it a much better approximation of how "important" a slow step was.
 // For example, A link that is entirely or mostly serialized will have a
 // weighted time that is the same or similar to its elapsed time.
 // A compile that runs in parallel with 999 other compiles will have a weighted
 // time that is tiny.
 func WeightedTime(steps []Step) map[string]time.Duration {
 	if len(steps) == 0 {
 		return nil
 	}
 	steps = Dedup(steps)
 	events := toEvent(steps)
 	weightedDuration := make(map[string]time.Duration)

 	// Track the tasks which are currently running.
 	runningTasks := make(map[string]time.Duration)

 	// Record the time we have processed up to so we know how to calculate
 	// time deltas.
 	lastTime := events[0].time

 	// Track the accumulated weighted time so that it can efficiently be
 	// added to individual tasks.
 	var lastWeightedTime time.Duration

 	for _, event := range events {
 		numRunning := len(runningTasks)
 		if numRunning > 0 {
 			// Update the total weighted time up to this moment.
 			lastWeightedTime += (event.time - lastTime) / time.Duration(numRunning)
 		}
 		switch event.action {
 		case startAction:
 			// Record the total weighted task time when this task starts.
 			runningTasks[event.target] = lastWeightedTime
 		case stopAction:
 			// Record the change in the total weighted task time while this task ran.
 			weightedDuration[event.target] = lastWeightedTime - runningTasks[event.target]
 			delete(runningTasks, event.target)
 		}
 		lastTime = event.time
 	}
 	return weightedDuration
 }

 // Stat represents statistics for build step.
 type Stat struct {
 	// Type used to group this stat, this is determined by the grouping function
 	// provided by the caller when this stat is calculated.
 	Type string
 	// Count of builds for this type.
 	Count int32
 	// Accumulative build time for this type.
 	Time time.Duration
 	// Accumulative weighted build time for this time.
 	Weighted time.Duration
 	// Build times of all actions grouped under this stat.
 	Times []time.Duration
 }

 // StatsByType summarizes build step statistics with weighted and typeOf.
 // Order of the returned slice is undefined.
 func StatsByType(steps []Step, weighted map[string]time.Duration, typeOf func(Step) string) []Stat {
 	if len(steps) == 0 {
 		return nil
 	}
 	steps = Dedup(steps)
 	m := make(map[string]int) // type to index of stats.
 	var stats []Stat
 	for _, step := range steps {
 		t := typeOf(step)
 		if i, ok := m[t]; ok {
 			stats[i].Count++
 			stats[i].Time += step.Duration()
 			stats[i].Weighted += weighted[step.Out]
 			stats[i].Times = append(stats[i].Times, step.Duration())
 			continue
 		}
 		stats = append(stats, Stat{
 			Type:     t,
 			Count:    1,
 			Time:     step.Duration(),
 			Times:    []time.Duration{step.Duration()},
 			Weighted: weighted[step.Out],
 		})
 		m[t] = len(stats) - 1
 	}
 	return stats
 }

 // minHeap is a heap of `Step`s. Its root is always the shortest Step in terms
 // of duration.
 type minHeap []Step

 func (h minHeap) Len() int      { return len(h) }
 func (h minHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
 func (h minHeap) Less(i, j int) bool {
 	return h[i].Duration() < h[j].Duration()
 }
 func (h *minHeap) Push(x interface{}) { *h = append(*h, x.(Step)) }
 func (h *minHeap) Pop() interface{} {
 	l := (*h).Len()
 	if l == 0 {
 		return Step{}
 	}
 	step := (*h)[l-1]
 	*h = (*h)[:l-1]
 	return step
 }

 // SlowestSteps returns the `n` steps that took the longest time to finish.
 //
 // Returned steps are sorted on build time in descending order (step takes the
 // longest time to build is the first element).
 func SlowestSteps(steps []Step, n int) []Step {
 	mh := new(minHeap)
 	for _, step := range steps {
 		heap.Push(mh, step)
 		if mh.Len() > n {
 			heap.Pop(mh)
 		}
 	}
 	var res []Step
 	for mh.Len() > 0 {
 		res = append(res, heap.Pop(mh).(Step))
 	}
 	for left, right := 0, len(res)-1; left < right; left, right = left+1, right-1 {
 		res[left], res[right] = res[right], res[left]
 	}
 	return res
 }
	// Copyright 2014 The Fuchsia Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	package ninjalog

	import (
	"bufio"
	"container/heap"
	"encoding/binary"
	"encoding/json"
	"fmt"
	"io"
	"path"
	"regexp"
	"sort"
	"strconv"
	"strings"
	"time"

	"github.com/google/shlex"

	"go.fuchsia.dev/fuchsia/tools/build/ninjago/compdb"
	)

	// Step is one step in ninja_log file.
	// time is measured from ninja start time.
	type Step struct {
	Start time.Duration
	End time.Duration
	// modification time, but not convertable to absolute real time.
	// on POSIX, time_t is used, but on Windows different type is used.
	// htts://github.com/martine/ninja/blob/HEAD/src/timestamp.h
	Restat int
	Out string
	CmdHash uint64

	// other outs for the same CmdHash if dedup'ed.
	Outs []string
	// compilation database command
	Command *compdb.Command

	// Whether ths step is on the critical path.
	OnCriticalPath bool
	// TotalFloat is the amount of time this step can be delayed without affecting
	// the completion time of the build.
	//
	// https://en.wikipedia.org/wiki/Float_(project_management)
	TotalFloat time.Duration
	// Drag is the amount of time this step is adding to the total build time. All
	// non-critial steps have zero drag.
	//
	// https://en.wikipedia.org/wiki/Critical_path_drag
	Drag time.Duration
	}

	// Duration reports step's duration.
	func (s Step) Duration() time.Duration {
	return s.End - s.Start
	}

	// AllOutputs return all outputs of this step.
	func (s Step) AllOutputs() []string {
	return append(s.Outs, s.Out)
	}

	var pythonRE = regexp.MustCompile(`^python(\d(\.\d+)?)?$`)

	// extractPythonScript returns the first python script in input tokens.
	func extractPythonScript(tokens []string) string {
	for _, t := range tokens {
	if c := baseCmd(t); strings.HasSuffix(c, ".py") {
	return c
	}
	}
	return ""
	}

	func isWrapperPythonScript(script string) bool {
	for _, s := range []string{"action_tracer.py", "output_cacher.py"} {
	if strings.HasSuffix(script, s) {
	return true
	}
	}
	return false
	}

	// extractCommand extracts the actual script/binary being run from a full
	// command. This function includes logic to handle special wrapper scripts used
	// in Fuchsia's build.
	//
	// This function is best-effort, if nothing can be successfully extracted, an
	// empty string is returned.
	func extractCommand(cmd []string) string {
	for i, token := range cmd {
	c := baseCmd(token)
	if c == "env" {
	continue
	}

	// gn_run_binary.sh is a script to run an arbitrary binary produced by the
	// current build.
	//
	// First argument to gn_run_binary.sh is the bin directory of the toolchain,
	// skip it to take the second argument, which is the binary to run.
	if c == "gn_run_binary.sh" {
	if i+2 < len(cmd) {
	return baseCmd(cmd[i+2])
	}
	break
	}

	if pythonRE.MatchString(c) {
	if script := extractPythonScript(cmd); script != "" {
	if !isWrapperPythonScript(script) {
	return script
	}
	// The top level command we extracted is one of the wrapper python
	// scripts, the actual command is after --, so abandon everything before
	// that.
	for j := i + 1; j < len(cmd); j++ {
	if cmd[j] == "--" && j+1 < len(cmd) {
	return extractCommand(cmd[j+1:])
	}
	}
	}
	break
	}

	return c
	}
	return ""
	}

	func baseCmd(cmd string) string {
	return strings.Trim(path.Base(cmd), "()")
	}

	// category returns a comma separated list of executed commands.
	func (s Step) Category() string {
	if s.Command == nil {
	return "unknown"
	}
	tokens, err := shlex.Split(s.Command.Command)
	if err != nil {
	return "invalid command"
	}

	var commands [][]string
	start := 0
	for i, t := range tokens {
	if t == "\|\|" \|\| t == "&&" {
	commands = append(commands, tokens[start:i])
	start = i + 1
	}
	}
	if start < len(tokens) {
	commands = append(commands, tokens[start:])
	}

	var categories []string
	for _, command := range commands {
	if c := extractCommand(command); c != "" {
	categories = append(categories, c)
	}
	}
	return strings.Join(categories, ",")
	}

	// Steps is a list of Step.
	// It could be used to sort by start time.
	type Steps []Step

	func (s Steps) Len() int { return len(s) }
	func (s Steps) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
	func (s Steps) Less(i, j int) bool {
	if s[i].Start != s[j].Start {
	return s[i].Start < s[j].Start
	}
	if s[i].End != s[j].End {
	return s[i].End < s[j].End
	}
	return s[i].Out < s[j].Out
	}

	// Reverse reverses steps.
	// It would be more efficient if steps is already sorted than using sort.Reverse.
	func (s Steps) Reverse() {
	for i, j := 0, len(s)-1; i < j; i, j = i+1, j-1 {
	s[i], s[j] = s[j], s[i]
	}
	}

	// ByEnd is used to sort by end time.
	type ByEnd struct{ Steps }

	func (s ByEnd) Less(i, j int) bool { return s.Steps[i].End < s.Steps[j].End }

	// ByDuration is used to sort by duration.
	type ByDuration struct{ Steps }

	func (s ByDuration) Less(i, j int) bool { return s.Steps[i].Duration() < s.Steps[j].Duration() }

	// ByWeightedTime is used to sort by weighted time.
	type ByWeightedTime struct {
	Weighted map[string]time.Duration
	Steps
	}

	func (s ByWeightedTime) Less(i, j int) bool {
	return s.Weighted[s.Steps[i].Out] < s.Weighted[s.Steps[j].Out]
	}

	// Metadata is data added by compile.py.
	type Metadata struct {
	// Platform is platform of buildbot.
	Platform string `json:"platform"`

	// Argv is argv of compile.py
	Argv []string `json:"argv"`

	// Cwd is current working directory of compile.py
	Cwd string `json:"cwd"`

	// Compiler is compiler used.
	Compiler string `json:"compiler"`

	// Cmdline is command line of ninja.
	Cmdline []string `json:"cmdline"`

	// Exit is exit status of ninja.
	Exit int `json:"exit"`

	// Env is environment variables.
	Env map[string]string `json:"env"`

	// CompilerProxyInfo is a path name of associated compiler_proxy.INFO log.
	CompilerProxyInfo string `json:"compiler_proxy_info"`

	// Raw is raw string for metadata.
	Raw string
	// Error is error message of parsing metadata.
	Error string
	}

	// NinjaLog is parsed data of ninja_log file.
	type NinjaLog struct {
	// Filename is a filename of ninja_log.
	Filename string

	// Start is start line of the last build in ninja_log file.
	Start int

	// Steps contains steps in the last build in ninja_log file.
	Steps []Step

	// Metadata is additional data found in ninja_log file.
	Metadata Metadata
	}

	// Parse parses .ninja_log file, with chromium's compile.py metadata.
	func Parse(fname string, r io.Reader) (*NinjaLog, error) {
	b := bufio.NewReader(r)
	scanner := bufio.NewScanner(b)
	nlog := &NinjaLog{Filename: fname}
	lineno := 0
	if !scanner.Scan() {
	if err := scanner.Err(); err != nil {
	return nil, err
	}
	return nil, fmt.Errorf("empty file?")
	}
	lineno++
	line := scanner.Text()
	if line != "# ninja log v5" {
	return nil, fmt.Errorf("unexpected format: %s", line)
	}
	nlog.Start = lineno
	var lastStep Step
	for scanner.Scan() {
	line := scanner.Text()
	if line == "# end of ninja log" {
	break
	}
	if line == "" {
	continue
	}
	step, err := lineToStep(line)
	if err != nil {
	return nil, fmt.Errorf("error at %d: %w", lineno, err)
	}
	if step.End < lastStep.End {
	nlog.Start = lineno
	nlog.Steps = nil
	}
	nlog.Steps = append(nlog.Steps, step)
	lastStep = step
	lineno++
	}
	if err := scanner.Err(); err != nil {
	return nil, fmt.Errorf("error at %d: %w", lineno, err)
	}
	if !scanner.Scan() {
	if err := scanner.Err(); err != nil {
	return nil, fmt.Errorf("error at %d: %w", lineno, err)
	}
	// missing metadata?
	return nlog, nil
	}
	lineno++
	nlog.Metadata.Raw = scanner.Text()
	if err := parseMetadata([]byte(nlog.Metadata.Raw), &nlog.Metadata); err != nil {
	nlog.Metadata.Error = fmt.Sprintf("error at %d: %v", lineno, err)
	}
	return nlog, nil
	}

	func lineToStep(line string) (Step, error) {
	var step Step

	// Due to slowness of strings.Split in App Engine Go,
	// we use more faster implementation.
	fields := make([]string, 0, 5)
	for i := 0; i < 5; i += 1 {
	m := strings.IndexByte(line, '\t')
	if m < 0 {
	m = len(line)
	}
	fields = append(fields, line[:m])
	if m < len(line) {
	line = line[m+1:]
	}
	}

	if len(fields) < 5 {
	return step, fmt.Errorf("few fields:%d", len(fields))
	}
	s, err := strconv.ParseUint(fields[0], 10, 0)
	if err != nil {
	return step, fmt.Errorf("bad start %s:%w", fields[0], err)
	}
	e, err := strconv.ParseUint(fields[1], 10, 0)
	if err != nil {
	return step, fmt.Errorf("bad end %s:%w", fields[1], err)
	}
	rs, err := strconv.ParseUint(fields[2], 10, 0)
	if err != nil {
	return step, fmt.Errorf("bad restat %s:%w", fields[2], err)
	}
	step.Start = time.Duration(s) * time.Millisecond
	step.End = time.Duration(e) * time.Millisecond
	step.Restat = int(rs)
	step.Out = fields[3]
	ch, err := strconv.ParseUint(fields[4], 16, 64)
	if err != nil {
	return step, fmt.Errorf("bad hash %s:%w", fields[4], err)
	}
	step.CmdHash = ch
	return step, nil
	}

	func stepToLine(s Step) string {
	return fmt.Sprintf("%d\t%d\t%d\t%s\t%x",
	s.Start.Nanoseconds()/int64(time.Millisecond),
	s.End.Nanoseconds()/int64(time.Millisecond),
	s.Restat,
	s.Out,
	s.CmdHash)
	}

	func parseMetadata(buf []byte, metadata *Metadata) error {
	return json.Unmarshal(buf, metadata)
	}

	// Dump dumps steps as ninja log v5 format in w.
	func Dump(w io.Writer, steps []Step) error {
	_, err := fmt.Fprintf(w, "# ninja log v5\n")
	if err != nil {
	return err
	}
	for _, s := range steps {
	_, err = fmt.Fprintln(w, stepToLine(s))
	if err != nil {
	return err
	}
	}
	return nil
	}

	// Dedup dedupes steps. step may have the same cmd hash.
	// Dedup only returns the first step for these steps.
	// steps will be sorted by start time.
	func Dedup(steps []Step) []Step {
	m := make(map[uint64]*Step)
	sort.Sort(Steps(steps))
	var dedup []Step
	for _, s := range steps {
	if os := m[s.CmdHash]; os != nil {
	os.Outs = append(os.Outs, s.Out)
	continue
	}
	dedup = append(dedup, s)
	m[s.CmdHash] = &dedup[len(dedup)-1]
	}
	return dedup
	}

	// MurmurHash64A computes Murmur hash using the same exact
	// algorithm and seed that is used by Ninja in .ninja_log.
	// See https://github.com/ninja-build/ninja/blob/cc79afb/src/build_log.cc#L64
	func MurmurHash64A(data []byte) uint64 {
	const seed = uint64(0xDECAFBADDECAFBAD)
	const m = uint64(0xc6a4a7935bd1e995)
	const r = 47
	h := seed ^ (uint64(len(data)) * m)
	for len(data) >= 8 {
	k := binary.LittleEndian.Uint64(data[0:])
	k *= m
	k ^= k >> r
	k *= m
	h ^= k
	h *= m
	data = data[8:]
	}
	switch len(data) & 7 {
	case 7:
	h ^= uint64(data[6]) << 48
	fallthrough
	case 6:
	h ^= uint64(data[5]) << 40
	fallthrough
	case 5:
	h ^= uint64(data[4]) << 32
	fallthrough
	case 4:
	h ^= uint64(data[3]) << 24
	fallthrough
	case 3:
	h ^= uint64(data[2]) << 16
	fallthrough
	case 2:
	h ^= uint64(data[1]) << 8
	fallthrough
	case 1:
	h ^= uint64(data[0])
	h *= m
	}
	h ^= h >> r
	h *= m
	h ^= h >> r
	return h
	}

	// Populate the steps with information from the compilation database.
	func Populate(steps []Step, commands []compdb.Command) []Step {
	m := make(map[uint64]compdb.Command)
	// TODO(phosek): consider computing hashes in parallel
	for _, c := range commands {
	m[MurmurHash64A([]byte(c.Command))] = c
	}
	var populated []Step
	for _, s := range steps {
	if c, ok := m[s.CmdHash]; ok {
	s.Command = &c
	}
	populated = append(populated, s)
	}
	return populated
	}

	// TotalTime returns startup time and end time of ninja, and accumulated time
	// of all tasks.
	func TotalTime(steps []Step) (startupTime, endTime, cpuTime time.Duration) {
	if len(steps) == 0 {
	return 0, 0, 0
	}
	steps = Dedup(steps)
	startup := steps[0].Start
	var end time.Duration
	for _, s := range steps {
	if s.Start < startup {
	startup = s.Start
	}
	if s.End > end {
	end = s.End
	}
	cpuTime += s.Duration()
	}
	return startup, end, cpuTime
	}

	// Flow returns concurrent steps by time.
	// steps in every []Step will not have time overlap.
	// steps will be sorted by start time.
	func Flow(steps []Step) [][]Step {
	sort.Sort(Steps(steps))
	var threads [][]Step

	for _, s := range steps {
	tid := -1
	for i, th := range threads {
	if len(th) == 0 {
	panic(fmt.Errorf("thread %d has no entry", i))
	}
	if th[len(th)-1].End <= s.Start {
	tid = i
	break
	}
	}
	if tid == -1 {
	threads = append(threads, nil)
	tid = len(threads) - 1
	}
	threads[tid] = append(threads[tid], s)
	}
	return threads
	}

	// action represents an event's action. "start" or "end".
	type action string

	const (
	unknownAction action = ""
	startAction action = "start"
	stopAction action = "stop"
	)

	// event is an event of steps.
	type event struct {
	time time.Duration
	action action
	target string
	}

	// toEvent converts steps into events.
	// events are sorted by its time.
	func toEvent(steps []Step) []event {
	var events []event
	for _, s := range steps {
	events = append(events,
	event{
	time: s.Start,
	action: startAction,
	target: s.Out,
	},
	event{
	time: s.End,
	action: stopAction,
	target: s.Out,
	},
	)
	}
	sort.Slice(events, func(i, j int) bool {
	if events[i].time == events[j].time {
	// If a task starts and stops on the same time stamp
	// then the start will come first.
	return events[i].action < events[j].action
	}
	return events[i].time < events[j].time
	})
	return events
	}

	// WeightedTime calculates weighted time, which is elapsed time with
	// each segment divided by the number of tasks that were running in paralle.
	// This makes it a much better approximation of how "important" a slow step was.
	// For example, A link that is entirely or mostly serialized will have a
	// weighted time that is the same or similar to its elapsed time.
	// A compile that runs in parallel with 999 other compiles will have a weighted
	// time that is tiny.
	func WeightedTime(steps []Step) map[string]time.Duration {
	if len(steps) == 0 {
	return nil
	}
	steps = Dedup(steps)
	events := toEvent(steps)
	weightedDuration := make(map[string]time.Duration)

	// Track the tasks which are currently running.
	runningTasks := make(map[string]time.Duration)

	// Record the time we have processed up to so we know how to calculate
	// time deltas.
	lastTime := events[0].time

	// Track the accumulated weighted time so that it can efficiently be
	// added to individual tasks.
	var lastWeightedTime time.Duration

	for _, event := range events {
	numRunning := len(runningTasks)
	if numRunning > 0 {
	// Update the total weighted time up to this moment.
	lastWeightedTime += (event.time - lastTime) / time.Duration(numRunning)
	}
	switch event.action {
	case startAction:
	// Record the total weighted task time when this task starts.
	runningTasks[event.target] = lastWeightedTime
	case stopAction:
	// Record the change in the total weighted task time while this task ran.
	weightedDuration[event.target] = lastWeightedTime - runningTasks[event.target]
	delete(runningTasks, event.target)
	}
	lastTime = event.time
	}
	return weightedDuration
	}

	// Stat represents statistics for build step.
	type Stat struct {
	// Type used to group this stat, this is determined by the grouping function
	// provided by the caller when this stat is calculated.
	Type string
	// Count of builds for this type.
	Count int32
	// Accumulative build time for this type.
	Time time.Duration
	// Accumulative weighted build time for this time.
	Weighted time.Duration
	// Build times of all actions grouped under this stat.
	Times []time.Duration
	}

	// StatsByType summarizes build step statistics with weighted and typeOf.
	// Order of the returned slice is undefined.
	func StatsByType(steps []Step, weighted map[string]time.Duration, typeOf func(Step) string) []Stat {
	if len(steps) == 0 {
	return nil
	}
	steps = Dedup(steps)
	m := make(map[string]int) // type to index of stats.
	var stats []Stat
	for _, step := range steps {
	t := typeOf(step)
	if i, ok := m[t]; ok {
	stats[i].Count++
	stats[i].Time += step.Duration()
	stats[i].Weighted += weighted[step.Out]
	stats[i].Times = append(stats[i].Times, step.Duration())
	continue
	}
	stats = append(stats, Stat{
	Type: t,
	Count: 1,
	Time: step.Duration(),
	Times: []time.Duration{step.Duration()},
	Weighted: weighted[step.Out],
	})
	m[t] = len(stats) - 1
	}
	return stats
	}

	// minHeap is a heap of `Step`s. Its root is always the shortest Step in terms
	// of duration.
	type minHeap []Step

	func (h minHeap) Len() int { return len(h) }
	func (h minHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
	func (h minHeap) Less(i, j int) bool {
	return h[i].Duration() < h[j].Duration()
	}
	func (h minHeap) Push(x interface{}) { h = append(*h, x.(Step)) }
	func (h *minHeap) Pop() interface{} {
	l := (*h).Len()
	if l == 0 {
	return Step{}
	}
	step := (*h)[l-1]
	h = (h)[:l-1]
	return step
	}

	// SlowestSteps returns the `n` steps that took the longest time to finish.
	//
	// Returned steps are sorted on build time in descending order (step takes the
	// longest time to build is the first element).
	func SlowestSteps(steps []Step, n int) []Step {
	mh := new(minHeap)
	for _, step := range steps {
	heap.Push(mh, step)
	if mh.Len() > n {
	heap.Pop(mh)
	}
	}
	var res []Step
	for mh.Len() > 0 {
	res = append(res, heap.Pop(mh).(Step))
	}
	for left, right := 0, len(res)-1; left < right; left, right = left+1, right-1 {
	res[left], res[right] = res[right], res[left]
	}
	return res
	}