[dump_breakpad_symbols] Switch to .build-id

Changes:
* Revert back to using temporary files on disk. Otherwise the
  tool consumes upwards of 5GB of memory while executing.
* Add a package "breakpad/generator" for generating symbols.

TEST=locally on a fuchsia checkout

IN-1068 #comment

Change-Id: I61208ff6e1e863cc7aac6a47e8deb328e67a416e
diff --git a/breakpad/generator/generator.go b/breakpad/generator/generator.go
new file mode 100644
index 0000000..a28dcd5
--- /dev/null
+++ b/breakpad/generator/generator.go
@@ -0,0 +1,164 @@
+// Copyright 2019 The Fuchsia Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package generator
+
+import (
+	"bytes"
+	"fmt"
+	"io/ioutil"
+	"log"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"sync"
+
+	"fuchsia.googlesource.com/tools/breakpad"
+	"fuchsia.googlesource.com/tools/elflib"
+)
+
+// The default module name for modules that don't have a soname, e.g., executables and
+// loadable modules. This allows us to use the same module name at runtime as sonames are
+// the only names that are guaranteed to be available at build and run times. This value
+// must be kept in sync with what Crashpad uses at run time for symbol resolution to work
+// properly.
+const defaultModuleName = "<_>"
+
+// The module OS used to overwrite existing OS values in generated symbol files, even if
+// they're already set to something else.
+const replacementModuleOS = "Fuchsia"
+
+// Generate generates breakpad symbol data for each of the input elflib.BinaryFileRefs.
+// Returns the path to a directory containing the generated files, or the empty string if
+// an error occurred.
+func Generate(bfrs []elflib.BinaryFileRef, dumpSymsPath string) (path string, err error) {
+	outc := make(chan string)
+	errc := make(chan error)
+	defer close(outc)
+	defer close(errc)
+
+	g := &generator{
+		dumpSymsPath: dumpSymsPath,
+		visited:      make(map[string]bool),
+		visitedMutex: &sync.Mutex{},
+	}
+
+	jobs := make(chan elflib.BinaryFileRef)
+	go g.run(jobs, outc, errc)
+	for _, bfr := range bfrs {
+		jobs <- bfr
+	}
+	close(jobs)
+
+	select {
+	case err = <-errc:
+		return "", err
+	case path = <-outc:
+		return path, nil
+	}
+}
+
+// Generator is a helper class for executing Breakpad's dump_syms tool.
+//
+// The run method is meant to be executed as a go-routine. It will manage its own working
+// directory, and publish the path to that directory only on success.
+//
+// The run method is threadsafe, and will skip files that have already been processed.
+type generator struct {
+	// The path to the Breakpad dump_syms executable.
+	dumpSymsPath string
+
+	// Filepaths that have already been processed by this generator.
+	visited      map[string]bool
+	visitedMutex *sync.Mutex
+}
+
+// Run executes this generator on the given channel of elflib.BinarFileRefs.
+//
+// A temp directory is created to store generated files. On success, the directory is
+// emitted on out. On the first encountered error, the generator will emit the error on
+// errs, delete the output directory, and exit.
+func (g *generator) run(in <-chan elflib.BinaryFileRef, out chan<- string, errs chan<- error) {
+	outdir, err := ioutil.TempDir("", "breakpad")
+	if err != nil {
+		errs <- err
+		return
+	}
+	if err := g.generate(in, outdir); err != nil {
+		errs <- err
+		os.RemoveAll(outdir)
+		return
+	}
+	out <- outdir
+}
+
+func (g *generator) generate(in <-chan elflib.BinaryFileRef, outdir string) error {
+	for bfr := range in {
+		if !g.markVisited(bfr.Filepath) {
+			continue
+		}
+		sf, err := g.genFromBinaryFileRef(bfr)
+		if err != nil {
+			return err
+		}
+		fd, err := os.Create(filepath.Join(outdir, sf.ModuleSection.BuildID))
+		if err != nil {
+			return err
+		}
+		defer fd.Close()
+		if _, err := sf.WriteTo(fd); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (g *generator) genFromBinaryFileRef(bfr elflib.BinaryFileRef) (*breakpad.SymbolFile, error) {
+	log.Printf("generating symbols for %q", bfr.Filepath)
+	sf, err := g.genSymbolFile(bfr)
+	if err != nil {
+		return nil, err
+	}
+	sf.ModuleSection.OS = replacementModuleOS
+	sf.ModuleSection.ModuleName = defaultModuleName
+	soname, err := g.readSoName(bfr.Filepath)
+	if err == nil && soname != "" {
+		sf.ModuleSection.ModuleName = soname
+	}
+	return sf, nil
+}
+
+func (g *generator) genSymbolFile(bfr elflib.BinaryFileRef) (*breakpad.SymbolFile, error) {
+	var stdout bytes.Buffer
+	cmd := exec.Cmd{
+		Path:   g.dumpSymsPath,
+		Args:   []string{g.dumpSymsPath, bfr.Filepath},
+		Stdout: &stdout,
+	}
+	if err := cmd.Run(); err != nil {
+		return nil, fmt.Errorf("command failed %v: %v", cmd.Args, err)
+	}
+	return breakpad.ParseSymbolFile(&stdout)
+}
+
+func (g *generator) readSoName(path string) (string, error) {
+	fd, err := os.Open(path)
+	if err != nil {
+		return "", fmt.Errorf("open failed %q: %v", path, err)
+	}
+	defer fd.Close()
+	return elflib.GetSoName(path, fd)
+}
+
+// Marks that path has been visited and returs true iff this generator has not alread
+// visited path. Otherwise returns false.
+func (g *generator) markVisited(path string) (succeeded bool) {
+	g.visitedMutex.Lock()
+	defer g.visitedMutex.Unlock()
+	if g.visited[path] {
+		return false
+	}
+	g.visited[path] = true
+	return true
+}
diff --git a/cmd/dump_breakpad_symbols/main.go b/cmd/dump_breakpad_symbols/main.go
index 4943501..fe71304 100755
--- a/cmd/dump_breakpad_symbols/main.go
+++ b/cmd/dump_breakpad_symbols/main.go
@@ -7,17 +7,14 @@
 import (
 	// TODO(kjharland): change crypto/sha1 to a safer hash algorithm. sha256 or sha2, etc.
 	"archive/tar"
-	"bytes"
 	"compress/gzip"
 	"context"
 	"flag"
 	"fmt"
-	"io"
 	"log"
 	"os"
-	"os/exec"
 
-	"fuchsia.googlesource.com/tools/breakpad"
+	"fuchsia.googlesource.com/tools/breakpad/generator"
 	"fuchsia.googlesource.com/tools/elflib"
 	"fuchsia.googlesource.com/tools/tarutil"
 )
@@ -33,13 +30,6 @@
 $ dump_breakpad_symbols -dump-syms-path=dump_syms -tar-file=out.tar.gz -depfile=dep.out ./.build-ids
 `
 
-// The default module name for modules that don't have a soname, e.g., executables and
-// loadable modules. This allows us to use the same module name at runtime as sonames are
-// the only names that are guaranteed to be available at build and run times. This value
-// must be kept in sync with what Crashpad uses at run time for symbol resolution to work
-// properly.
-const defaultModuleName = "<_>"
-
 // Command line flag values
 var (
 	depFilepath  string
@@ -63,43 +53,29 @@
 
 func main() {
 	flag.Parse()
-	if err := execute(context.Background()); err != nil {
+	if err := execute(context.Background(), flag.Args()...); err != nil {
 		log.Fatal(err)
 	}
 }
 
-func execute(ctx context.Context) error {
-	// Open the input files for reading.  In practice there are very few files,
-	// so it's fine to open them all at once.
-	var inputReaders []io.Reader
-	inputPaths := flag.Args()
-	for _, path := range inputPaths {
-		file, err := os.Open(path)
+func execute(ctx context.Context, dirs ...string) error {
+	// Collect all binary file refs from each directory
+	var bfrs []elflib.BinaryFileRef
+	for _, dir := range dirs {
+		newbfrs, err := elflib.WalkBuildIDDir(dir)
 		if err != nil {
-			return fmt.Errorf("failed to open %s: %v\n", path, err)
+			return err
 		}
-		defer file.Close()
-		inputReaders = append(inputReaders, file)
+		bfrs = append(bfrs, newbfrs...)
 	}
 
-	// Process the IDsFiles.
-	symbolFiles := processIdsFiles(inputReaders)
-
-	// Write the Ninja dep file.
-	depfile := depfile{outputPath: tarFilepath, inputPaths: inputPaths}
-	depfd, err := os.Create(depFilepath)
+	// Generate all symbol files.
+	path, err := generator.Generate(bfrs, dumpSymsPath)
 	if err != nil {
-		return fmt.Errorf("failed to create file %q: %v", depFilepath, err)
-	}
-	n, err := depfile.WriteTo(depfd)
-	if err != nil {
-		return fmt.Errorf("failed to write Ninja dep file %q: %v", depFilepath, err)
-	}
-	if n == 0 {
-		return fmt.Errorf("wrote 0 bytes to %q", depFilepath)
+		log.Fatalf("failed to generate symbols: %v", err)
 	}
 
-	// Write the tar archive containing all symbol files.
+	// Write all files to the specified tar archive.
 	tarfd, err := os.Create(tarFilepath)
 	if err != nil {
 		return fmt.Errorf("failed to create %q: %v", tarFilepath, err)
@@ -108,92 +84,24 @@
 	defer gzw.Close()
 	tw := tar.NewWriter(gzw)
 	defer tw.Close()
-	for sf := range symbolFiles {
-		if err := tarutil.TarReader(tw, sf.Reader(), sf.ModuleSection.BuildID); err != nil {
-			return fmt.Errorf("failed to archive %q: %v", sf.ModuleSection.BuildID, err)
-		}
+
+	log.Printf("archiving %q to %q", path, tarFilepath)
+	if err := tarutil.TarDirectory(tw, path); err != nil {
+		return fmt.Errorf("failed to write %q: %v", tarFilepath, err)
+	}
+
+	// Write the Ninja dep file.
+	depfile := depfile{outputPath: tarFilepath, inputPaths: dirs}
+	depfd, err := os.Create(depFilepath)
+	if err != nil {
+		return fmt.Errorf("failed to create %q: %v", depFilepath, err)
+	}
+	n, err := depfile.WriteTo(depfd)
+	if err != nil {
+		return fmt.Errorf("failed to write %q: %v", depFilepath, err)
+	}
+	if n == 0 {
+		return fmt.Errorf("wrote 0 bytes to %q", depFilepath)
 	}
 	return nil
 }
-
-// processIdsFiles dumps symbol data for each executable in a set of ids files.
-func processIdsFiles(idsFiles []io.Reader) <-chan breakpad.SymbolFile {
-	output := make(chan breakpad.SymbolFile, 10000) // Arbitrary capacity.
-
-	go func() {
-		// Binary paths we've already seen.  Duplicates are skipped.
-		visited := make(map[string]bool)
-
-		// Iterate through the given set of filepaths.
-		for _, idsFile := range idsFiles {
-			// Extract the paths to each binary from the IDs file.
-			binaries, err := elflib.ReadIDsFile(idsFile)
-			if err != nil {
-				fmt.Fprintln(os.Stderr, err)
-				continue
-			}
-
-			// Generate the symbol file for each binary.
-			for _, bin := range binaries {
-				binaryPath := bin.Filepath
-
-				// Check whether we've seen this path already. Skip if so.
-				if _, ok := visited[binaryPath]; ok {
-					continue
-				}
-				// Record that we've seen this binary path.
-				visited[binaryPath] = true
-
-				sf, err := generateSymbolFile(binaryPath)
-				if err != nil {
-					log.Println(err)
-					continue
-				}
-
-				output <- *sf
-			}
-		}
-		close(output)
-	}()
-
-	return output
-}
-
-func generateSymbolFile(path string) (*breakpad.SymbolFile, error) {
-	var stdout bytes.Buffer
-	cmd := exec.Cmd{
-		Path:   dumpSymsPath,
-		Args:   []string{dumpSymsPath, path},
-		Stdout: &stdout,
-	}
-	if err := cmd.Run(); err != nil {
-		return nil, fmt.Errorf("cmd failed %v: %v", cmd.Args, err)
-	}
-
-	symbolFile, err := breakpad.ParseSymbolFile(&stdout)
-	if err != nil {
-		return nil, fmt.Errorf("failed to parse dump_syms output: %v", err)
-	}
-
-	// Ensure the module name is either the soname (for shared libraries) or the default
-	// value (for executables and loadable modules).
-	fd, err := os.Open(path)
-	if err != nil {
-		return nil, fmt.Errorf("failed to open %q: %v", path, err)
-	}
-	defer fd.Close()
-	soname, err := elflib.GetSoName(path, fd)
-	if err != nil {
-		return nil, fmt.Errorf("failed to read soname from %q: %v", path, err)
-	}
-	if soname == "" {
-		symbolFile.ModuleSection.ModuleName = defaultModuleName
-	} else {
-		symbolFile.ModuleSection.ModuleName = soname
-	}
-
-	// Ensure the module section specifies this is a Fuchsia binary instead of Linux
-	// binary, which is the default for the dump_syms tool.
-	symbolFile.ModuleSection.OS = "Fuchsia"
-	return symbolFile, nil
-}