[bloatalyzer] Refactoring data structure
So that it's more useful for manipulating.
Change-Id: Iebc2bcb13e429f60de4122c8eb9cb3d23c3538ba
diff --git a/bloaty/bloaty.go b/bloaty/bloaty.go
index 77ca2cc..6177af8 100644
--- a/bloaty/bloaty.go
+++ b/bloaty/bloaty.go
@@ -10,27 +10,17 @@
"fmt"
"os"
"os/exec"
+ "sort"
"strings"
"sync"
)
type bloatyOutput struct {
- key string
- data Symbol
+ data row
file string
err error
}
-// Symbol represents all data about one symbol in the produced Bloaty output.
-type Symbol struct {
- Name string `json:"Name"`
- File string `json:"File"`
- Segs map[string]int `json:"Segs"`
- TotalVmsz uint64 `json:"TotalVmsz"`
- TotalFilesz uint64 `json:"TotalFilesz"`
- Binaries []string `json:"Binaries"`
-}
-
// TODO(jakehehrlich): Add reading ids.txt to elflib, since there are now three
// different tools that each need to read it for mostly unrelated reasons
func getFiles(idsPath string) ([]string, error) {
@@ -83,20 +73,10 @@
}()
for r := range rows {
- data := bloatyOutput{
- key: r.Symbol + ":" + r.File,
- data: Symbol{
- Name: r.Symbol,
- File: r.File,
- Segs: make(map[string]int),
- TotalVmsz: r.Vmsz,
- TotalFilesz: r.Filesz,
- Binaries: append([]string{}, file),
- },
+ out <- bloatyOutput{
+ data: r,
file: file,
}
- data.data.Segs[r.Seg] += 1
- out <- data
}
if err := cmd.Wait(); err != nil {
@@ -106,28 +86,108 @@
}
-func updateSymbol(sym, newSym Symbol, file string) Symbol {
- sym.Name = newSym.Name
- sym.File = newSym.File
- // TODO: Filtering by section would allow some of these symbols to be ignored
- // or considered on a more useful global level.
- for seg, count := range newSym.Segs {
- sym.Segs[seg] += count
- }
- sym.TotalVmsz += newSym.TotalVmsz
- sym.TotalFilesz += newSym.TotalFilesz
+func updateSymbol(newSym *row, file string, sym *Symbol) {
+ sym.Name = newSym.Symbol
+ sym.Vmsz += newSym.Vmsz
+ sym.Filesz += newSym.Filesz
sym.Binaries = append(sym.Binaries, file)
- return sym
}
-func RunBloaty(bloatyPath, idsPath string) (map[string]Symbol, error) {
+func addRowToOutput(r *row, file string, output map[string]*Segment) {
+ if _, ok := output[r.Seg]; !ok {
+ output[r.Seg] = &Segment{make(map[string]*File)}
+ }
+ seg := output[r.Seg]
+
+ if _, ok := seg.Files[r.File]; !ok {
+ seg.Files[r.File] = &File{Symbols: make(map[string]*Symbol)}
+ }
+ f := seg.Files[r.File]
+
+ if _, ok := f.Symbols[r.Symbol]; !ok {
+ f.Symbols[r.Symbol] = &Symbol{}
+ }
+ updateSymbol(r, file, f.Symbols[r.Symbol])
+ seg.Files[r.File] = f
+ output[r.Seg] = seg
+}
+
+func getTopN(fileSizes map[string]uint64, topFiles, topSyms uint64, output *map[string]*Segment) {
+ // If both topFiles and topSyms are 0, bail early because we're returning everything.
+ if topFiles == 0 && topSyms == 0 {
+ return
+ }
+ type sortedFile struct {
+ name string
+ size uint64
+ }
+
+ smallFiles := make(map[string]uint64)
+ if topFiles > 0 && topFiles < uint64(len(fileSizes)) {
+ var all []struct {
+ name string
+ size uint64
+ }
+ for name, size := range fileSizes {
+ all = append(all, sortedFile{name, size})
+ }
+ sort.Slice(all, func(i, j int) bool {
+ return all[i].size < all[j].size
+ })
+
+ for _, d := range all[:uint64(len(all))-topFiles] {
+ smallFiles[d.name] = d.size
+ }
+ }
+
+ for _, segData := range *output {
+ smallFilesSize := uint64(0)
+ for file, fileData := range segData.Files {
+ smallSyms := Symbol{Name: "all small syms"}
+ // If the file labeled a small file, add to small files size and delete the sym data.
+ if size, exists := smallFiles[file]; exists {
+ smallFilesSize += size
+ delete(segData.Files, file)
+ } else if topSyms > 0 && topSyms < uint64(len(fileData.Symbols)) {
+ var all []*Symbol
+ for _, sym := range fileData.Symbols {
+ all = append(all, sym)
+ }
+ sort.Slice(all, func(i, j int) bool {
+ return all[i].Filesz < all[j].Filesz
+ })
+
+ for _, d := range all[:uint64(len(all))-topSyms] {
+ if sym, exists := fileData.Symbols[d.Name]; exists {
+ smallSyms.Vmsz += sym.Vmsz
+ smallSyms.Filesz += sym.Filesz
+ delete(fileData.Symbols, d.Name)
+ }
+ }
+ }
+
+ if topSyms > 0 {
+ fileData.Symbols["all small syms"] = &smallSyms
+ }
+ }
+
+ if topFiles > 0 {
+ segData.Files["all small files"] = &File{TotalFilesz: smallFilesSize}
+ }
+ }
+}
+
+// RunBloaty runs bloaty on all files in ids.txt, and returns a mapping of the
+// symbols and files by segment.
+func RunBloaty(bloatyPath, idsPath string, topFiles, topSyms uint64) (map[string]*Segment, error) {
files, err := getFiles(idsPath)
if err != nil {
return nil, err
}
var wg sync.WaitGroup
- output := make(map[string]Symbol)
+ output := make(map[string]*Segment)
+ fileSizes := make(map[string]uint64)
data := make(chan bloatyOutput)
for _, file := range files {
@@ -145,15 +205,13 @@
for d := range data {
if d.err != nil {
- fmt.Printf("error: %v", d.err)
+ fmt.Printf("%v", d.err)
continue
}
- if sym, ok := output[d.key]; !ok {
- output[d.key] = d.data
- } else {
- output[d.key] = updateSymbol(sym, d.data, d.file)
- }
+ addRowToOutput(&d.data, d.file, output)
+ fileSizes[d.data.File] += d.data.Filesz
}
+ getTopN(fileSizes, topFiles, topSyms, &output)
return output, nil
}
diff --git a/bloaty/bloaty_test.go b/bloaty/bloaty_test.go
index 0506324..2acf8cd 100644
--- a/bloaty/bloaty_test.go
+++ b/bloaty/bloaty_test.go
@@ -49,3 +49,237 @@
}
}
}
+
+func TestGetTopNFiles(t *testing.T) {
+ input := map[string]*Segment{
+ "LOAD [R]": {
+ Files: map[string]*File{
+ "file.c": {
+ TotalFilesz: 14,
+ Symbols: map[string]*Symbol{
+ "ecm_bind": {
+ Name: "ecm_bind",
+ Vmsz: 14,
+ Filesz: 14,
+ Binaries: []string{"lib.so"},
+ },
+ },
+ },
+ "different.c": {
+ TotalFilesz: 5,
+ Symbols: map[string]*Symbol{
+ "ecm_bind": {
+ Name: "ecm_bind",
+ Vmsz: 23,
+ Filesz: 5,
+ Binaries: []string{"lib.so"},
+ },
+ },
+ },
+ },
+ },
+ "LOAD [RX]": {
+ Files: map[string]*File{
+ "other.c": {
+ TotalFilesz: 1,
+ Symbols: map[string]*Symbol{
+ "ecm_bind": {
+ Name: "ecm_bind",
+ Vmsz: 1,
+ Filesz: 1,
+ Binaries: []string{"lib.so"},
+ },
+ },
+ },
+ "test.c": {
+ TotalFilesz: 4,
+ Symbols: map[string]*Symbol{
+ "test": {
+ Name: "test",
+ Vmsz: 4,
+ Filesz: 4,
+ Binaries: []string{"lib.so"},
+ },
+ },
+ },
+ },
+ },
+ }
+
+ fileSizes := map[string]uint64{
+ "file.c": 14,
+ "different.c": 5,
+ "other.c": 1,
+ "test.c": 4,
+ }
+
+ getTopN(fileSizes, 1, 0, &input)
+ if len(input) != 2 {
+ t.Fatalf("In TestGetTopN, len is wrong: \n%+v", input)
+ }
+
+ if _, ok := input["LOAD [R]"]; !ok {
+ t.Fatalf("In TestGetTopN, missing LOAD [R]: \n%+v", input)
+ }
+
+ if len(input["LOAD [R]"].Files) != 2 {
+ t.Fatalf("In TestGetTopN, len is wrong: \n%+v", input["LOAD [R]"].Files)
+ }
+
+ if val, ok := input["LOAD [R]"].Files["file.c"]; !ok {
+ t.Fatalf("In TestGetTopN, missing file.c: \n%+v", input["LOAD [R]"].Files)
+ } else if val.TotalFilesz != 14 {
+ t.Fatalf("In TestGetTopN, filesz is wrong: \n%+v", val)
+ }
+
+ if val, ok := input["LOAD [R]"].Files["all small files"]; !ok {
+ t.Fatalf("In TestGetTopN, missing all small files: \n%+v", input["LOAD [R]"].Files)
+ } else if val.TotalFilesz != 5 {
+ t.Fatalf("In TestGetTopN, filesz is wrong: \n%+v", val)
+ }
+
+ if _, ok := input["LOAD [RX]"]; !ok {
+ t.Fatalf("In TestGetTopN, missing LOAD [RX]: \n%+v", input)
+ }
+
+ if len(input["LOAD [RX]"].Files) != 1 {
+ t.Fatalf("In TestGetTopN, len is wrong: \n%+v", input["LOAD [R]"].Files)
+ }
+
+ if val, ok := input["LOAD [RX]"].Files["all small files"]; !ok {
+ t.Fatalf("In TestGetTopN, missing all small files: \n%+v", input["LOAD [R]"].Files)
+ } else if val.TotalFilesz != 5 {
+ t.Fatalf("In TestGetTopN, filesz is wrong: \n%+v", val)
+ }
+}
+
+func TestGetTopNSymbols(t *testing.T) {
+ input := map[string]*Segment{
+ "LOAD [R]": {
+ Files: map[string]*File{
+ "file.c": {
+ TotalFilesz: 14,
+ Symbols: map[string]*Symbol{
+ "ecm_bind": {
+ Name: "ecm_bind",
+ Vmsz: 14,
+ Filesz: 14,
+ Binaries: []string{"lib.so"},
+ },
+ "test": {
+ Name: "test",
+ Vmsz: 23,
+ Filesz: 5,
+ Binaries: []string{"lib.so"},
+ },
+ "other": {
+ Name: "other",
+ Vmsz: 5,
+ Filesz: 5,
+ Binaries: []string{"lib.so"},
+ },
+ },
+ },
+ },
+ },
+ }
+
+ fileSizes := map[string]uint64{
+ "file.c": 14,
+ "different.c": 5,
+ "other.c": 1,
+ "test.c": 4,
+ }
+
+ getTopN(fileSizes, 0, 1, &input)
+ if len(input) != 1 {
+ t.Fatalf("In TestGetTopNSymbols, len is wrong: \n%+v", input)
+ }
+
+ if _, ok := input["LOAD [R]"]; !ok {
+ t.Fatalf("In TestGetTopNSymbols, missing LOAD [R]: \n%+v", input)
+ }
+
+ if len(input["LOAD [R]"].Files) != 1 {
+ t.Fatalf("In TestGetTopNSymbols, len is wrong: \n%+v", input["LOAD [R]"].Files)
+ }
+
+ if val, ok := input["LOAD [R]"].Files["file.c"]; !ok {
+ t.Fatalf("In TestGetTopNSymbols, missing file.c: \n%+v", input["LOAD [R]"].Files)
+ } else if val.TotalFilesz != 14 {
+ t.Fatalf("In TestGetTopNSymbols, filesz is wrong: \n%+v", val)
+ }
+
+ if len(input["LOAD [R]"].Files["file.c"].Symbols) != 2 {
+ t.Fatalf("In TestGetTopNSymbols, len is wrong: \n%+v", input["LOAD [R]"].Files["file.c"].Symbols)
+ }
+
+ if val, ok := input["LOAD [R]"].Files["file.c"].Symbols["ecm_bind"]; !ok {
+ t.Fatalf("In TestGetTopNSymbols, missing ecm_bind: \n%+v", input["LOAD [R]"].Files)
+ } else if val.Filesz != 14 {
+ t.Fatalf("In TestGetTopNSymbols, filesz is wrong: \n%+v", val)
+ }
+
+ if val, ok := input["LOAD [R]"].Files["file.c"].Symbols["all small syms"]; !ok {
+ t.Fatalf("In TestGetTopNSymbols, missing ecm_bind: \n%+v", input["LOAD [R]"].Files["file.c"].Symbols["all small syms"])
+ } else if val.Filesz != 10 {
+ t.Fatalf("In TestGetTopNSymbols, filesz is wrong: \n%+v", val)
+ }
+}
+
+func TestAddRowToOutput(t *testing.T) {
+ rows := []row{
+ {"ecm_bind", "other.c", "LOAD [RX]", 7, 7},
+ {"test", "other.c", "LOAD [RX]", 12, 2},
+ {"ecm_bind", "other.c", "LOAD [R]", 23, 5},
+ {"ecm_bind", "file.c", "LOAD [R]", 3, 3},
+ }
+
+ actual := make(map[string]*Segment)
+ for _, row := range rows {
+ addRowToOutput(&row, row.File, actual)
+ }
+
+ // {"ecm_bind", "other.c", "LOAD [RX]", 7, 7},
+ if _, ok := actual["LOAD [RX]"]; !ok {
+ t.Fatalf("In TestAddRowToOutput, got \n%+v", actual)
+ }
+ if _, ok := actual["LOAD [RX]"].Files["other.c"]; !ok {
+ t.Fatalf("In TestAddRowToOutput, missing LOAD[RX] other.c")
+ }
+ if val, ok := actual["LOAD [RX]"].Files["other.c"].Symbols["ecm_bind"]; !ok {
+ t.Fatalf("In TestAddRowToOutput, missing LOAD[RX] other.c ecm_bind")
+ } else if val.Name != "ecm_bind" || val.Vmsz != 7 || val.Filesz != 7 {
+ t.Fatalf("In TestAddRowToOutput, got \n%+v", val)
+ }
+
+ // {"test", "other.c", "LOAD [RX]", 12, 2},
+ if val, ok := actual["LOAD [RX]"].Files["other.c"].Symbols["test"]; !ok {
+ t.Fatalf("In TestAddRowToOutput, missing LOAD[RX] other.c test")
+ } else if val.Name != "test" || val.Vmsz != 12 || val.Filesz != 2 {
+ t.Fatalf("In TestAddRowToOutput, got \n%+v", val)
+ }
+
+ // {"ecm_bind", "other.c", "LOAD [R]", 23, 5},
+ if _, ok := actual["LOAD [R]"]; !ok {
+ t.Fatalf("In TestAddRowToOutput, missing LOAD[R]")
+ }
+ if _, ok := actual["LOAD [R]"].Files["other.c"]; !ok {
+ t.Fatalf("In TestAddRowToOutput, missing LOAD[R]:other.c")
+ }
+ if val, ok := actual["LOAD [R]"].Files["other.c"].Symbols["ecm_bind"]; !ok {
+ t.Fatalf("In TestAddRowToOutput, missing LOAD[R] other.c ecm_bind")
+ } else if val.Name != "ecm_bind" || val.Vmsz != 23 || val.Filesz != 5 {
+ t.Fatalf("In TestAddRowToOutput, got \n%+v", val)
+ }
+
+ // {"ecm_bind", "file.c", "LOAD [R]", 3, 3},
+ if _, ok := actual["LOAD [R]"].Files["file.c"]; !ok {
+ t.Fatalf("In TestAddRowToOutput, missing LOAD[R] file.c")
+ }
+ if val, ok := actual["LOAD [R]"].Files["file.c"].Symbols["ecm_bind"]; !ok {
+ t.Fatalf("In TestAddRowToOutput, missing LOAD[R] file.c ecm_bind")
+ } else if val.Name != "ecm_bind" || val.Vmsz != 3 || val.Filesz != 3 {
+ t.Fatalf("In TestAddRowToOutput, got \n%+v", val)
+ }
+}
diff --git a/bloaty/symbols.go b/bloaty/symbols.go
index 566a9b9..afb33b7 100644
--- a/bloaty/symbols.go
+++ b/bloaty/symbols.go
@@ -19,6 +19,25 @@
Filesz uint64
}
+// Symbol represents data about one symbol in the produced Bloaty output.
+type Symbol struct {
+ Name string `json:"Name"`
+ Vmsz uint64 `json:"Vmsz"`
+ Filesz uint64 `json:"Filesz"`
+ Binaries []string `json:"Binaries"`
+}
+
+// File represents all data about one file in the produced Bloaty output
+type File struct {
+ Symbols map[string]*Symbol `json:"Symbols"`
+ TotalFilesz uint64 `json:"TotalFilesz"`
+}
+
+// Segment represents all data about one segment in the produced Bloaty output
+type Segment struct {
+ Files map[string]*File
+}
+
func parseRow(rawRow []string) (row, bool, error) {
var out row
if len(rawRow) != 5 {
diff --git a/cmd/bloatalyzer/main.go b/cmd/bloatalyzer/main.go
index b7d97dd..f17d0f1 100644
--- a/cmd/bloatalyzer/main.go
+++ b/cmd/bloatalyzer/main.go
@@ -19,12 +19,16 @@
bloatyPath string
idsPath string
output string
+ topFiles uint64
+ topSyms uint64
)
func init() {
flag.StringVar(&bloatyPath, "b", "", "path to bloaty executable")
flag.StringVar(&idsPath, "i", "", "path to ids.txt")
flag.StringVar(&output, "o", "", "output path")
+ flag.Uint64Var(&topFiles, "top-files", 0, "max number of files to keep")
+ flag.Uint64Var(&topSyms, "top-syms", 0, "max number of symbols to keep per file")
}
func main() {
@@ -43,7 +47,7 @@
logger.Fatalf(ctx, "%s", "must provide path to ids.txt file.")
}
- data, err := bloaty.RunBloaty(bloatyPath, idsPath)
+ data, err := bloaty.RunBloaty(bloatyPath, idsPath, topFiles, topSyms)
if err != nil {
logger.Fatalf(ctx, "%v", err)
}