Revamp how source listing is produced so it works for inlined functions. (#599)

* Revamp how source listing is produced so it works for inlined functions.

Some other things improved due to these changes:
1. Produced output does not contain long runs of uninteresting source.
2. Speed of producing weblist page for a large binary goes from
   ~ 57s to ~ 5.5s.

* use keyed literals to satisfy extra checks

* Fix up file names for Windows (to use backslash instead of slash
as separator).

* Fix nil dereference when we attempt to close after encountering a missing object file

* Limit number of address ranges we process to avoid unbounded hangs

Stop printing address ranges after processing 25 of them. These ranges
are sorted by the number of samples that fell within them.

Change back to printing inner-most file:line next to an instruction
to reduce caller/callee confusion.

* Fix comment typo

Co-authored-by: Alexey Alexandrov <aalexand@users.noreply.github.com>
diff --git a/internal/driver/driver.go b/internal/driver/driver.go
index 878f2e1..3967a12 100644
--- a/internal/driver/driver.go
+++ b/internal/driver/driver.go
@@ -163,7 +163,7 @@
 	trim := cfg.Trim
 
 	switch cmd {
-	case "disasm", "weblist":
+	case "disasm":
 		trim = false
 		cfg.Granularity = "addresses"
 		// Force the 'noinlines' mode so that source locations for a given address
@@ -172,6 +172,10 @@
 		// This is because the merge is done by address and in case of an inlined
 		// stack each of the inlined entries is a separate callgraph node.
 		cfg.NoInlines = true
+	case "weblist":
+		trim = false
+		cfg.Granularity = "addresses"
+		cfg.NoInlines = false // Need inline info to support call expansion
 	case "peek":
 		trim = false
 	case "list":
diff --git a/internal/driver/driver_test.go b/internal/driver/driver_test.go
index 68c3c03..e00bed5 100644
--- a/internal/driver/driver_test.go
+++ b/internal/driver/driver_test.go
@@ -95,7 +95,7 @@
 		{"dot,inuse_space,flat,tagfocus=30kb:,tagignore=1mb:2mb", "heap"},
 		{"disasm=line[13],addresses,flat", "cpu"},
 		{"peek=line.*01", "cpu"},
-		{"weblist=line[13],addresses,flat", "cpu"},
+		{"weblist=line(1000|3000)$,addresses,flat", "cpu"},
 		{"tags,tagfocus=400kb:", "heap_request"},
 		{"tags,tagfocus=+400kb:", "heap_request"},
 		{"dot", "long_name_funcs"},
@@ -1585,24 +1585,28 @@
 }
 
 func (m *mockObjTool) Disasm(file string, start, end uint64, intelSyntax bool) ([]plugin.Inst, error) {
-	switch start {
-	case 0x1000:
-		return []plugin.Inst{
-			{Addr: 0x1000, Text: "instruction one", File: "file1000.src", Line: 1},
-			{Addr: 0x1001, Text: "instruction two", File: "file1000.src", Line: 1},
-			{Addr: 0x1002, Text: "instruction three", File: "file1000.src", Line: 2},
-			{Addr: 0x1003, Text: "instruction four", File: "file1000.src", Line: 1},
-		}, nil
-	case 0x3000:
-		return []plugin.Inst{
-			{Addr: 0x3000, Text: "instruction one"},
-			{Addr: 0x3001, Text: "instruction two"},
-			{Addr: 0x3002, Text: "instruction three"},
-			{Addr: 0x3003, Text: "instruction four"},
-			{Addr: 0x3004, Text: "instruction five"},
-		}, nil
+	const fn1 = "line1000"
+	const fn3 = "line3000"
+	const file1 = "testdata/file1000.src"
+	const file3 = "testdata/file3000.src"
+	data := []plugin.Inst{
+		{Addr: 0x1000, Text: "instruction one", Function: fn1, File: file1, Line: 1},
+		{Addr: 0x1001, Text: "instruction two", Function: fn1, File: file1, Line: 1},
+		{Addr: 0x1002, Text: "instruction three", Function: fn1, File: file1, Line: 2},
+		{Addr: 0x1003, Text: "instruction four", Function: fn1, File: file1, Line: 1},
+		{Addr: 0x3000, Text: "instruction one", Function: fn3, File: file3},
+		{Addr: 0x3001, Text: "instruction two", Function: fn3, File: file3},
+		{Addr: 0x3002, Text: "instruction three", Function: fn3, File: file3},
+		{Addr: 0x3003, Text: "instruction four", Function: fn3, File: file3},
+		{Addr: 0x3004, Text: "instruction five", Function: fn3, File: file3},
 	}
-	return nil, fmt.Errorf("unimplemented")
+	var result []plugin.Inst
+	for _, inst := range data {
+		if inst.Addr >= start && inst.Addr <= end {
+			result = append(result, inst)
+		}
+	}
+	return result, nil
 }
 
 type mockFile struct {
@@ -1630,7 +1634,52 @@
 // is in general a list of positions representing a call stack,
 // with the leaf function first.
 func (*mockFile) SourceLine(addr uint64) ([]plugin.Frame, error) {
-	return nil, fmt.Errorf("unimplemented")
+	// Return enough data to support the SourceLine() calls needed for
+	// weblist on cpuProfile() contents.
+	frame := func(fn, file string, line int) plugin.Frame {
+		return plugin.Frame{Func: fn, File: file, Line: line}
+	}
+	switch addr {
+	case 0x1000:
+		return []plugin.Frame{
+			frame("mangled1000", "testdata/file1000.src", 1),
+		}, nil
+	case 0x1001:
+		return []plugin.Frame{
+			frame("mangled1000", "testdata/file1000.src", 1),
+		}, nil
+	case 0x1002:
+		return []plugin.Frame{
+			frame("mangled1000", "testdata/file1000.src", 2),
+		}, nil
+	case 0x1003:
+		return []plugin.Frame{
+			frame("mangled1000", "testdata/file1000.src", 1),
+		}, nil
+	case 0x2000:
+		return []plugin.Frame{
+			frame("mangled2001", "testdata/file2000.src", 9),
+			frame("mangled2000", "testdata/file2000.src", 4),
+		}, nil
+	case 0x3000:
+		return []plugin.Frame{
+			frame("mangled3002", "testdata/file3000.src", 2),
+			frame("mangled3001", "testdata/file3000.src", 5),
+			frame("mangled3000", "testdata/file3000.src", 6),
+		}, nil
+	case 0x3001:
+		return []plugin.Frame{
+			frame("mangled3001", "testdata/file3000.src", 8),
+			frame("mangled3000", "testdata/file3000.src", 9),
+		}, nil
+	case 0x3002:
+		return []plugin.Frame{
+			frame("mangled3002", "testdata/file3000.src", 5),
+			frame("mangled3000", "testdata/file3000.src", 9),
+		}, nil
+	}
+
+	return nil, nil
 }
 
 // Symbols returns a list of symbols in the object file.
diff --git a/internal/driver/interactive_test.go b/internal/driver/interactive_test.go
index df09c9a..db9d7e3 100644
--- a/internal/driver/interactive_test.go
+++ b/internal/driver/interactive_test.go
@@ -226,7 +226,7 @@
 			"weblist  find -test",
 			map[string]string{
 				"granularity": "addresses",
-				"noinlines":   "true",
+				"noinlines":   "false",
 				"nodecount":   "0",
 				"sort":        "flat",
 				"ignore":      "test",
diff --git a/internal/driver/testdata/pprof.cpu.flat.addresses.disasm b/internal/driver/testdata/pprof.cpu.flat.addresses.disasm
index e1df7b1..57987e8 100644
--- a/internal/driver/testdata/pprof.cpu.flat.addresses.disasm
+++ b/internal/driver/testdata/pprof.cpu.flat.addresses.disasm
@@ -2,13 +2,13 @@
 ROUTINE ======================== line1000
      1.10s      1.10s (flat, cum) 98.21% of Total
      1.10s      1.10s       1000: instruction one                         ;line1000 file1000.src:1
-         .          .       1001: instruction two                         ;file1000.src:1
-         .          .       1002: instruction three                       ;file1000.src:2
-         .          .       1003: instruction four                        ;file1000.src:1
+         .          .       1001: instruction two
+         .          .       1002: instruction three                       ;line1000 file1000.src:2
+         .          .       1003: instruction four                        ;line1000 file1000.src:1
 ROUTINE ======================== line3000
       10ms      1.12s (flat, cum)   100% of Total
       10ms      1.01s       3000: instruction one                         ;line3000 file3000.src:6
          .      100ms       3001: instruction two                         ;line3000 file3000.src:9
          .       10ms       3002: instruction three
-         .          .       3003: instruction four
+         .          .       3003: instruction four                        ;line3000 file3000.src
          .          .       3004: instruction five
diff --git a/internal/driver/testdata/pprof.cpu.flat.addresses.weblist b/internal/driver/testdata/pprof.cpu.flat.addresses.weblist
index 743b68e..f765648 100644
--- a/internal/driver/testdata/pprof.cpu.flat.addresses.weblist
+++ b/internal/driver/testdata/pprof.cpu.flat.addresses.weblist
@@ -84,14 +84,18 @@
 <span class=line>      3</span> <span class=nop>           .          .           line3 </span>
 <span class=line>      4</span> <span class=nop>           .          .           line4 </span>
 <span class=line>      5</span> <span class=nop>           .          .           line5 </span>
-<span class=line>      6</span> <span class=deadsrc>        10ms      1.01s           line6 </span><span class=asm>                10ms      1.01s     3000:     instruction one                                                              <span class=unimportant>file3000.src:6</span>
+<span class=line>      6</span> <span class=deadsrc>        10ms      1.01s           line6 </span><span class=asm>                                          <span class=inlinesrc>    line5                                                                       </span> <span class=unimportant>file3000.src:5</span>
+                                          <span class=inlinesrc>        line2                                                                   </span> <span class=unimportant>file3000.src:2</span>
+                10ms      1.01s     3000:             instruction one                                                      <span class=unimportant>file3000.src:2</span>
 </span>
 <span class=line>      7</span> <span class=nop>           .          .           line7 </span>
 <span class=line>      8</span> <span class=nop>           .          .           line8 </span>
-<span class=line>      9</span> <span class=deadsrc>           .      110ms           line9 </span><span class=asm>                   .      100ms     3001:     instruction two                                                              <span class=unimportant>file3000.src:9</span>
-                   .       10ms     3002:     instruction three                                                            <span class=unimportant>file3000.src:9</span>
-                   .          .     3003:     instruction four                                                             <span class=unimportant></span>
-                   .          .     3004:     instruction five                                                             <span class=unimportant></span>
+<span class=line>      9</span> <span class=deadsrc>           .      110ms           line9 </span><span class=asm>                                          <span class=inlinesrc>    line8                                                                       </span> <span class=unimportant>file3000.src:8</span>
+                   .      100ms     3001:         instruction two                                                          <span class=unimportant>file3000.src:8</span>
+                                          <span class=inlinesrc>    line5                                                                       </span> <span class=unimportant>file3000.src:5</span>
+                   .       10ms     3002:         instruction three                                                        <span class=unimportant>file3000.src:5</span>
+                   .          .     3003:         instruction four                                                         <span class=unimportant></span>
+                   .          .     3004:         instruction five                                                         <span class=unimportant></span>
 </span>
 <span class=line>     10</span> <span class=nop>           .          .           line0 </span>
 <span class=line>     11</span> <span class=nop>           .          .           line1 </span>
diff --git a/internal/driver/webui_test.go b/internal/driver/webui_test.go
index f998ea4..3986cd3 100644
--- a/internal/driver/webui_test.go
+++ b/internal/driver/webui_test.go
@@ -75,8 +75,12 @@
 	testcases := []testCase{
 		{"/", []string{"F1", "F2", "F3", "testbin", "cpu"}, true},
 		{"/top", []string{`"Name":"F2","InlineLabel":"","Flat":200,"Cum":300,"FlatFormat":"200ms","CumFormat":"300ms"}`}, false},
-		{"/source?f=" + url.QueryEscape("F[12]"),
-			[]string{"F1", "F2", "300ms +line1"}, false},
+		{"/source?f=" + url.QueryEscape("F[12]"), []string{
+			"F1",
+			"F2",
+			`\. +300ms .*f1:asm`,    // Cumulative count for F1
+			"200ms +300ms .*f2:asm", // Flat + cumulative count for F2
+		}, false},
 		{"/peek?f=" + url.QueryEscape("F[12]"),
 			[]string{"300ms.*F1", "200ms.*300ms.*F2"}, false},
 		{"/disasm?f=" + url.QueryEscape("F[12]"),
@@ -174,9 +178,9 @@
 
 func (obj fakeObjTool) Disasm(file string, start, end uint64, intelSyntax bool) ([]plugin.Inst, error) {
 	return []plugin.Inst{
-		{Addr: addrBase + 0, Text: "f1:asm", Function: "F1"},
-		{Addr: addrBase + 10, Text: "f2:asm", Function: "F2"},
-		{Addr: addrBase + 20, Text: "d3:asm", Function: "F3"},
+		{Addr: addrBase + 10, Text: "f1:asm", Function: "F1", Line: 3},
+		{Addr: addrBase + 20, Text: "f2:asm", Function: "F2", Line: 11},
+		{Addr: addrBase + 30, Text: "d3:asm", Function: "F3", Line: 22},
 	}, nil
 }
 
@@ -196,7 +200,7 @@
 		{
 			ID:             1,
 			Start:          addrBase,
-			Limit:          addrBase + 10,
+			Limit:          addrBase + 100,
 			Offset:         0,
 			File:           "testbin",
 			HasFunctions:   true,
diff --git a/internal/report/source.go b/internal/report/source.go
index b480535..4f841ef 100644
--- a/internal/report/source.go
+++ b/internal/report/source.go
@@ -24,12 +24,15 @@
 	"io"
 	"os"
 	"path/filepath"
+	"regexp"
+	"sort"
 	"strconv"
 	"strings"
 
 	"github.com/google/pprof/internal/graph"
 	"github.com/google/pprof/internal/measurement"
 	"github.com/google/pprof/internal/plugin"
+	"github.com/google/pprof/profile"
 )
 
 // printSource prints an annotated source listing, include all
@@ -126,19 +129,68 @@
 	return nil
 }
 
+// sourcePrinter holds state needed for generating source+asm HTML listing.
+type sourcePrinter struct {
+	reader     *sourceReader
+	objectTool plugin.ObjTool
+	objects    map[string]plugin.ObjFile  // Opened object files
+	sym        *regexp.Regexp             // May be nil
+	files      map[string]*sourceFile     // Set of files to print.
+	insts      map[uint64]instructionInfo // Instructions of interest (keyed by address).
+
+	// Set of function names that we are interested in (because they had
+	// a sample and match sym).
+	interest map[string]bool
+
+	// Mapping from system function names to printable names.
+	prettyNames map[string]string
+}
+
+// instructionInfo holds collected information for an instruction.
+type instructionInfo struct {
+	objAddr   uint64 // Address in object file (with base subtracted out)
+	length    int    // Instruction length in bytes
+	disasm    string // Disassembly of instruction
+	file      string // For top-level function in which instruction occurs
+	line      int    // For top-level function in which instruction occurs
+	flat, cum int64  // Samples to report (divisor already applied)
+}
+
+// sourceFile contains collected information for files we will print.
+type sourceFile struct {
+	fname    string
+	cum      int64
+	flat     int64
+	lines    map[int][]sourceInst // Instructions to show per line
+	funcName map[int]string       // Function name per line
+}
+
+// sourceInst holds information for an instruction to be displayed.
+type sourceInst struct {
+	addr  uint64
+	stack []callID // Inlined call-stack
+}
+
+// sourceFunction contains information for a contiguous range of lines per function we
+// will print.
+type sourceFunction struct {
+	name       string
+	begin, end int // Line numbers (end is not included in the range)
+	flat, cum  int64
+}
+
+// addressRange is a range of addresses plus the object file that contains it.
+type addressRange struct {
+	begin, end uint64
+	obj        plugin.ObjFile
+	mapping    *profile.Mapping
+	score      int64 // Used to order ranges for processing
+}
+
 // PrintWebList prints annotated source listing of rpt to w.
+// rpt.prof should contain inlined call info.
 func PrintWebList(w io.Writer, rpt *Report, obj plugin.ObjTool, maxFiles int) error {
-	o := rpt.options
-	g := rpt.newGraph(nil)
-
-	// If the regexp source can be parsed as an address, also match
-	// functions that land on that address.
-	var address *uint64
-	if hex, err := strconv.ParseUint(o.Symbol.String(), 0, 64); err == nil {
-		address = &hex
-	}
-
-	sourcePath := o.SourcePath
+	sourcePath := rpt.options.SourcePath
 	if sourcePath == "" {
 		wd, err := os.Getwd()
 		if err != nil {
@@ -146,171 +198,485 @@
 		}
 		sourcePath = wd
 	}
-	reader := newSourceReader(sourcePath, o.TrimPath)
+	sp := newSourcePrinter(rpt, obj, sourcePath)
+	sp.print(w, maxFiles, rpt)
+	sp.close()
+	return nil
+}
 
-	type fileFunction struct {
-		fileName, functionName string
+func newSourcePrinter(rpt *Report, obj plugin.ObjTool, sourcePath string) *sourcePrinter {
+	sp := &sourcePrinter{
+		reader:      newSourceReader(sourcePath, rpt.options.TrimPath),
+		objectTool:  obj,
+		objects:     map[string]plugin.ObjFile{},
+		sym:         rpt.options.Symbol,
+		files:       map[string]*sourceFile{},
+		insts:       map[uint64]instructionInfo{},
+		prettyNames: map[string]string{},
+		interest:    map[string]bool{},
 	}
 
-	// Extract interesting symbols from binary files in the profile and
-	// classify samples per symbol.
-	symbols := symbolsFromBinaries(rpt.prof, g, o.Symbol, address, obj)
-	symNodes := nodesPerSymbol(g.Nodes, symbols)
+	// If the regexp source can be parsed as an address, also match
+	// functions that land on that address.
+	var address *uint64
+	if sp.sym != nil {
+		if hex, err := strconv.ParseUint(sp.sym.String(), 0, 64); err == nil {
+			address = &hex
+		}
+	}
 
-	// Identify sources associated to a symbol by examining
-	// symbol samples. Classify samples per source file.
-	fileNodes := make(map[fileFunction]graph.Nodes)
-	if len(symNodes) == 0 {
-		for _, n := range g.Nodes {
-			if n.Info.File == "" || !o.Symbol.MatchString(n.Info.Name) {
+	addrs := map[uint64]bool{}
+	flat := map[uint64]int64{}
+	cum := map[uint64]int64{}
+
+	// Record an interest in the function corresponding to lines[index].
+	markInterest := func(addr uint64, lines []profile.Line, index int) {
+		fn := lines[index]
+		if fn.Function == nil {
+			return
+		}
+		sp.interest[fn.Function.Name] = true
+		sp.interest[fn.Function.SystemName] = true
+		addrs[addr] = true
+	}
+
+	// See if sp.sym matches line.
+	matches := func(line profile.Line) bool {
+		if line.Function == nil {
+			return false
+		}
+		return sp.sym.MatchString(line.Function.Name) ||
+			sp.sym.MatchString(line.Function.SystemName) ||
+			sp.sym.MatchString(line.Function.Filename)
+	}
+
+	// Extract sample counts and compute set of interesting functions.
+	for _, sample := range rpt.prof.Sample {
+		value := rpt.options.SampleValue(sample.Value)
+		if rpt.options.SampleMeanDivisor != nil {
+			div := rpt.options.SampleMeanDivisor(sample.Value)
+			if div != 0 {
+				value /= div
+			}
+		}
+
+		// Find call-sites matching sym.
+		for i := len(sample.Location) - 1; i >= 0; i-- {
+			loc := sample.Location[i]
+			for _, line := range loc.Line {
+				if line.Function == nil {
+					continue
+				}
+				sp.prettyNames[line.Function.SystemName] = line.Function.Name
+			}
+
+			cum[loc.Address] += value
+			if i == 0 {
+				flat[loc.Address] += value
+			}
+
+			if sp.sym == nil || (address != nil && loc.Address == *address) {
+				// Interested in top-level entry of stack.
+				if len(loc.Line) > 0 {
+					markInterest(loc.Address, loc.Line, len(loc.Line)-1)
+				}
 				continue
 			}
-			ff := fileFunction{n.Info.File, n.Info.Name}
-			fileNodes[ff] = append(fileNodes[ff], n)
-		}
-	} else {
-		for _, nodes := range symNodes {
-			for _, n := range nodes {
-				if n.Info.File != "" {
-					ff := fileFunction{n.Info.File, n.Info.Name}
-					fileNodes[ff] = append(fileNodes[ff], n)
+
+			// Seach in inlined stack for a match.
+			matchFile := (loc.Mapping != nil && sp.sym.MatchString(loc.Mapping.File))
+			for j, line := range loc.Line {
+				if (j == 0 && matchFile) || matches(line) {
+					markInterest(loc.Address, loc.Line, j)
 				}
 			}
 		}
 	}
 
-	if len(fileNodes) == 0 {
-		return fmt.Errorf("no source information for %s", o.Symbol.String())
-	}
+	sp.expandAddresses(rpt, addrs, flat)
+	sp.initSamples(flat, cum)
+	return sp
+}
 
-	sourceFiles := make(graph.Nodes, 0, len(fileNodes))
-	for _, nodes := range fileNodes {
-		sNode := *nodes[0]
-		sNode.Flat, sNode.Cum = nodes.Sum()
-		sourceFiles = append(sourceFiles, &sNode)
-	}
-
-	// Limit number of files printed?
-	if maxFiles < 0 {
-		sourceFiles.Sort(graph.FileOrder)
-	} else {
-		sourceFiles.Sort(graph.FlatNameOrder)
-		if maxFiles < len(sourceFiles) {
-			sourceFiles = sourceFiles[:maxFiles]
+func (sp *sourcePrinter) close() {
+	for _, objFile := range sp.objects {
+		if objFile != nil {
+			objFile.Close()
 		}
 	}
+}
 
-	// Print each file associated with this function.
-	for _, n := range sourceFiles {
-		ff := fileFunction{n.Info.File, n.Info.Name}
-		fns := fileNodes[ff]
+func (sp *sourcePrinter) expandAddresses(rpt *Report, addrs map[uint64]bool, flat map[uint64]int64) {
+	// We found interesting addresses (ones with non-zero samples) above.
+	// Get covering address ranges and disassemble the ranges.
+	ranges := sp.splitIntoRanges(rpt.prof, addrs, flat)
 
-		asm := assemblyPerSourceLine(symbols, fns, ff.fileName, obj, o.IntelSyntax)
-		start, end := sourceCoordinates(asm)
+	// Trim ranges if there are too many.
+	const maxRanges = 25
+	sort.Slice(ranges, func(i, j int) bool {
+		return ranges[i].score > ranges[j].score
+	})
+	if len(ranges) > maxRanges {
+		ranges = ranges[:maxRanges]
+	}
 
-		fnodes, path, err := getSourceFromFile(ff.fileName, reader, fns, start, end)
+	for _, r := range ranges {
+		base := r.obj.Base()
+		insts, err := sp.objectTool.Disasm(r.mapping.File, r.begin-base, r.end-base,
+			rpt.options.IntelSyntax)
 		if err != nil {
-			fnodes, path = getMissingFunctionSource(ff.fileName, asm, start, end)
+			// TODO(sanjay): Report that the covered addresses are missing.
+			continue
 		}
 
-		printFunctionHeader(w, ff.functionName, path, n.Flat, n.Cum, rpt)
-		for _, fn := range fnodes {
-			printFunctionSourceLine(w, fn, asm[fn.Info.Lineno], reader, rpt)
+		var lastFrames []plugin.Frame
+		var lastAddr, maxAddr uint64
+		for i, inst := range insts {
+			addr := inst.Addr + base
+
+			// Guard against duplicate output from Disasm.
+			if addr <= maxAddr {
+				continue
+			}
+			maxAddr = addr
+
+			length := 1
+			if i+1 < len(insts) && insts[i+1].Addr > inst.Addr {
+				// Extend to next instruction.
+				length = int(insts[i+1].Addr - inst.Addr)
+			}
+
+			// Get inlined-call-stack for address.
+			frames, err := r.obj.SourceLine(addr)
+			if err != nil {
+				// Construct a frame from disassembler output.
+				frames = []plugin.Frame{{Func: inst.Function, File: inst.File, Line: inst.Line}}
+			}
+
+			x := instructionInfo{objAddr: inst.Addr, length: length, disasm: inst.Text}
+			if len(frames) > 0 {
+				// We could consider using the outer-most caller's source
+				// location so we give the some hint as to where the
+				// inlining happened that led to this instruction. So for
+				// example, suppose we have the following (inlined) call
+				// chains for this instruction:
+				//   F1->G->H
+				//   F2->G->H
+				// We could tag the instructions from the first call with
+				// F1 and instructions from the second call with F2. But
+				// that leads to a somewhat confusing display. So for now,
+				// we stick with just the inner-most location (i.e., H).
+				// In the future we will consider changing the display to
+				// make caller info more visible.
+				index := 0 // Inner-most frame
+				x.file = frames[index].File
+				x.line = frames[index].Line
+			}
+			sp.insts[addr] = x
+
+			// We sometimes get instructions with a zero reported line number.
+			// Make such instructions have the same line info as the preceding
+			// instruction, if an earlier instruction is found close enough.
+			const neighborhood = 32
+			if len(frames) > 0 && frames[0].Line != 0 {
+				lastFrames = frames
+				lastAddr = addr
+			} else if (addr-lastAddr <= neighborhood) && lastFrames != nil {
+				frames = lastFrames
+			}
+
+			// See if the stack contains a function we are interested in.
+			for i, f := range frames {
+				if !sp.interest[f.Func] {
+					continue
+				}
+
+				// Record sub-stack under frame's file/line.
+				fname := canonicalizeFileName(f.File)
+				file := sp.files[fname]
+				if file == nil {
+					file = &sourceFile{
+						fname:    fname,
+						lines:    map[int][]sourceInst{},
+						funcName: map[int]string{},
+					}
+					sp.files[fname] = file
+				}
+				callees := frames[:i]
+				stack := make([]callID, 0, len(callees))
+				for j := len(callees) - 1; j >= 0; j-- { // Reverse so caller is first
+					stack = append(stack, callID{
+						file: callees[j].File,
+						line: callees[j].Line,
+					})
+				}
+				file.lines[f.Line] = append(file.lines[f.Line], sourceInst{addr, stack})
+
+				// Remember the first function name encountered per source line
+				// and assume that that line belongs to that function.
+				if _, ok := file.funcName[f.Line]; !ok {
+					file.funcName[f.Line] = f.Func
+				}
+			}
+		}
+	}
+}
+
+// splitIntoRanges converts the set of addresses we are interested in into a set of address
+// ranges to disassemble.
+func (sp *sourcePrinter) splitIntoRanges(prof *profile.Profile, set map[uint64]bool, flat map[uint64]int64) []addressRange {
+	// List of mappings so we can stop expanding address ranges at mapping boundaries.
+	mappings := append([]*profile.Mapping{}, prof.Mapping...)
+	sort.Slice(mappings, func(i, j int) bool { return mappings[i].Start < mappings[j].Start })
+
+	var result []addressRange
+	addrs := make([]uint64, 0, len(set))
+	for addr := range set {
+		addrs = append(addrs, addr)
+	}
+	sort.Slice(addrs, func(i, j int) bool { return addrs[i] < addrs[j] })
+
+	mappingIndex := 0
+	const expand = 500 // How much to expand range to pick up nearby addresses.
+	for i, n := 0, len(addrs); i < n; {
+		begin, end := addrs[i], addrs[i]
+		sum := flat[begin]
+		i++
+
+		// Advance to mapping containing addrs[i]
+		for mappingIndex < len(mappings) && mappings[mappingIndex].Limit <= begin {
+			mappingIndex++
+		}
+		if mappingIndex >= len(mappings) {
+			// TODO(sanjay): Report missed address and its samples.
+			break
+		}
+		m := mappings[mappingIndex]
+		obj := sp.objectFile(m)
+		if obj == nil {
+			// TODO(sanjay): Report missed address and its samples.
+			continue
+		}
+
+		// Find following addresses that are close enough to addrs[i].
+		for i < n && addrs[i] <= end+2*expand && addrs[i] < m.Limit {
+			// When we expand ranges by "expand" on either side, the ranges
+			// for addrs[i] and addrs[i-1] will merge.
+			end = addrs[i]
+			sum += flat[end]
+			i++
+		}
+		if m.Start-begin >= expand {
+			begin -= expand
+		} else {
+			begin = m.Start
+		}
+		if m.Limit-end >= expand {
+			end += expand
+		} else {
+			end = m.Limit
+		}
+
+		result = append(result, addressRange{begin, end, obj, m, sum})
+	}
+	return result
+}
+
+func (sp *sourcePrinter) initSamples(flat, cum map[uint64]int64) {
+	for addr, inst := range sp.insts {
+		// Move all samples that were assigned to the middle of an instruction to the
+		// beginning of that instruction. This takes care of samples that were recorded
+		// against pc+1.
+		instEnd := addr + uint64(inst.length)
+		for p := addr; p < instEnd; p++ {
+			inst.flat += flat[p]
+			inst.cum += cum[p]
+		}
+		sp.insts[addr] = inst
+	}
+}
+
+func (sp *sourcePrinter) print(w io.Writer, maxFiles int, rpt *Report) {
+	// Finalize per-file counts.
+	for _, file := range sp.files {
+		seen := map[uint64]bool{}
+		for _, line := range file.lines {
+			for _, x := range line {
+				if seen[x.addr] {
+					// Same address can be displayed multiple times in a file
+					// (e.g., if we show multiple inlined functions).
+					// Avoid double-counting samples in this case.
+					continue
+				}
+				seen[x.addr] = true
+				inst := sp.insts[x.addr]
+				file.cum += inst.cum
+				file.flat += inst.flat
+			}
+		}
+	}
+
+	// Get sorted list of files to print.
+	var files []*sourceFile
+	for _, f := range sp.files {
+		files = append(files, f)
+	}
+	order := func(i, j int) bool { return files[i].flat > files[j].flat }
+	if maxFiles < 0 {
+		// Order by name for compatibility with old code.
+		order = func(i, j int) bool { return files[i].fname < files[j].fname }
+		maxFiles = len(files)
+	}
+	sort.Slice(files, order)
+	for i, f := range files {
+		if i < maxFiles {
+			sp.printFile(w, f, rpt)
+		}
+	}
+}
+
+func (sp *sourcePrinter) printFile(w io.Writer, f *sourceFile, rpt *Report) {
+	for _, fn := range sp.functions(f) {
+		if fn.cum == 0 {
+			continue
+		}
+		printFunctionHeader(w, fn.name, f.fname, fn.flat, fn.cum, rpt)
+		var asm []assemblyInstruction
+		for l := fn.begin; l < fn.end; l++ {
+			lineContents, ok := sp.reader.line(f.fname, l)
+			if !ok {
+				if len(f.lines[l]) == 0 {
+					// Outside of range of valid lines and nothing to print.
+					continue
+				}
+				if l == 0 {
+					// Line number 0 shows up if line number is not known.
+					lineContents = "<instructions with unknown line numbers>"
+				} else {
+					// Past end of file, but have data to print.
+					lineContents = "???"
+				}
+			}
+
+			// Make list of assembly instructions.
+			asm = asm[:0]
+			var flatSum, cumSum int64
+			var lastAddr uint64
+			for _, inst := range f.lines[l] {
+				addr := inst.addr
+				x := sp.insts[addr]
+				flatSum += x.flat
+				cumSum += x.cum
+				startsBlock := (addr != lastAddr+uint64(sp.insts[lastAddr].length))
+				lastAddr = addr
+
+				// divisors already applied, so leave flatDiv,cumDiv as 0
+				asm = append(asm, assemblyInstruction{
+					address:     x.objAddr,
+					instruction: x.disasm,
+					function:    fn.name,
+					file:        x.file,
+					line:        x.line,
+					flat:        x.flat,
+					cum:         x.cum,
+					startsBlock: startsBlock,
+					inlineCalls: inst.stack,
+				})
+			}
+
+			printFunctionSourceLine(w, l, flatSum, cumSum, lineContents, asm, sp.reader, rpt)
 		}
 		printFunctionClosing(w)
 	}
-	return nil
 }
 
-// sourceCoordinates returns the lowest and highest line numbers from
-// a set of assembly statements.
-func sourceCoordinates(asm map[int][]assemblyInstruction) (start, end int) {
-	for l := range asm {
-		if start == 0 || l < start {
-			start = l
-		}
-		if end == 0 || l > end {
-			end = l
-		}
+// functions splits apart the lines to show in a file into a list of per-function ranges.
+func (sp *sourcePrinter) functions(f *sourceFile) []sourceFunction {
+	var funcs []sourceFunction
+
+	// Get interesting lines in sorted order.
+	lines := make([]int, 0, len(f.lines))
+	for l := range f.lines {
+		lines = append(lines, l)
 	}
-	return start, end
+	sort.Ints(lines)
+
+	// Merge adjacent lines that are in same function and not too far apart.
+	const mergeLimit = 20
+	for _, l := range lines {
+		name := f.funcName[l]
+		if pretty, ok := sp.prettyNames[name]; ok {
+			// Use demangled name if available.
+			name = pretty
+		}
+
+		fn := sourceFunction{name: name, begin: l, end: l + 1}
+		for _, x := range f.lines[l] {
+			inst := sp.insts[x.addr]
+			fn.flat += inst.flat
+			fn.cum += inst.cum
+		}
+
+		// See if we should merge into preceding function.
+		if len(funcs) > 0 {
+			last := funcs[len(funcs)-1]
+			if l-last.end < mergeLimit && last.name == name {
+				last.end = l + 1
+				last.flat += fn.flat
+				last.cum += fn.cum
+				funcs[len(funcs)-1] = last
+				continue
+			}
+		}
+
+		// Add new function.
+		funcs = append(funcs, fn)
+	}
+
+	// Expand function boundaries to show neighborhood.
+	const expand = 5
+	for i, f := range funcs {
+		if i == 0 {
+			// Extend backwards, stopping at line number 1, but do not disturb 0
+			// since that is a special line number that can show up when addr2line
+			// cannot determine the real line number.
+			if f.begin > expand {
+				f.begin -= expand
+			} else if f.begin > 1 {
+				f.begin = 1
+			}
+		} else {
+			// Find gap from predecessor and divide between predecessor and f.
+			halfGap := (f.begin - funcs[i-1].end) / 2
+			if halfGap > expand {
+				halfGap = expand
+			}
+			funcs[i-1].end += halfGap
+			f.begin -= halfGap
+		}
+		funcs[i] = f
+	}
+
+	// Also extend the ending point of the last function.
+	if len(funcs) > 0 {
+		funcs[len(funcs)-1].end += expand
+	}
+
+	return funcs
 }
 
-// assemblyPerSourceLine disassembles the binary containing a symbol
-// and classifies the assembly instructions according to its
-// corresponding source line, annotating them with a set of samples.
-func assemblyPerSourceLine(objSyms []*objSymbol, rs graph.Nodes, src string, obj plugin.ObjTool, intelSyntax bool) map[int][]assemblyInstruction {
-	assembly := make(map[int][]assemblyInstruction)
-	// Identify symbol to use for this collection of samples.
-	o := findMatchingSymbol(objSyms, rs)
-	if o == nil {
-		return assembly
+// objectFile return the object for the named file, opening it if necessary.
+// It returns nil on error.
+func (sp *sourcePrinter) objectFile(m *profile.Mapping) plugin.ObjFile {
+	if object, ok := sp.objects[m.File]; ok {
+		return object // May be nil if we detected an error earlier.
 	}
-
-	// Extract assembly for matched symbol
-	insts, err := obj.Disasm(o.sym.File, o.sym.Start, o.sym.End, intelSyntax)
+	object, err := sp.objectTool.Open(m.File, m.Start, m.Limit, m.Offset)
 	if err != nil {
-		return assembly
+		object = nil
 	}
-
-	srcBase := filepath.Base(src)
-	anodes := annotateAssembly(insts, rs, o.base)
-	var lineno = 0
-	var prevline = 0
-	for _, an := range anodes {
-		// Do not rely solely on the line number produced by Disasm
-		// since it is not what we want in the presence of inlining.
-		//
-		// E.g., suppose we are printing source code for F and this
-		// instruction is from H where F called G called H and both
-		// of those calls were inlined. We want to use the line
-		// number from F, not from H (which is what Disasm gives us).
-		//
-		// So find the outer-most linenumber in the source file.
-		found := false
-		if frames, err := o.file.SourceLine(an.address + o.base); err == nil {
-			for i := len(frames) - 1; i >= 0; i-- {
-				if filepath.Base(frames[i].File) == srcBase {
-					for j := i - 1; j >= 0; j-- {
-						an.inlineCalls = append(an.inlineCalls, callID{frames[j].File, frames[j].Line})
-					}
-					lineno = frames[i].Line
-					found = true
-					break
-				}
-			}
-		}
-		if !found && filepath.Base(an.file) == srcBase {
-			lineno = an.line
-		}
-
-		if lineno != 0 {
-			if lineno != prevline {
-				// This instruction starts a new block
-				// of contiguous instructions on this line.
-				an.startsBlock = true
-			}
-			prevline = lineno
-			assembly[lineno] = append(assembly[lineno], an)
-		}
-	}
-
-	return assembly
-}
-
-// findMatchingSymbol looks for the symbol that corresponds to a set
-// of samples, by comparing their addresses.
-func findMatchingSymbol(objSyms []*objSymbol, ns graph.Nodes) *objSymbol {
-	for _, n := range ns {
-		for _, o := range objSyms {
-			if filepath.Base(o.sym.File) == filepath.Base(n.Info.Objfile) &&
-				o.sym.Start <= n.Info.Address-o.base &&
-				n.Info.Address-o.base <= o.sym.End {
-				return o
-			}
-		}
-	}
-	return nil
+	sp.objects[m.File] = object // Cache even on error.
+	return object
 }
 
 // printHeader prints the page header for a weblist report.
@@ -348,22 +714,23 @@
 }
 
 // printFunctionSourceLine prints a source line and the corresponding assembly.
-func printFunctionSourceLine(w io.Writer, fn *graph.Node, assembly []assemblyInstruction, reader *sourceReader, rpt *Report) {
+func printFunctionSourceLine(w io.Writer, lineNo int, flat, cum int64, lineContents string,
+	assembly []assemblyInstruction, reader *sourceReader, rpt *Report) {
 	if len(assembly) == 0 {
 		fmt.Fprintf(w,
 			"<span class=line> %6d</span> <span class=nop>  %10s %10s %8s  %s </span>\n",
-			fn.Info.Lineno,
-			valueOrDot(fn.Flat, rpt), valueOrDot(fn.Cum, rpt),
-			"", template.HTMLEscapeString(fn.Info.Name))
+			lineNo,
+			valueOrDot(flat, rpt), valueOrDot(cum, rpt),
+			"", template.HTMLEscapeString(lineContents))
 		return
 	}
 
 	fmt.Fprintf(w,
 		"<span class=line> %6d</span> <span class=deadsrc>  %10s %10s %8s  %s </span>",
-		fn.Info.Lineno,
-		valueOrDot(fn.Flat, rpt), valueOrDot(fn.Cum, rpt),
-		"", template.HTMLEscapeString(fn.Info.Name))
-	srcIndent := indentation(fn.Info.Name)
+		lineNo,
+		valueOrDot(flat, rpt), valueOrDot(cum, rpt),
+		"", template.HTMLEscapeString(lineContents))
+	srcIndent := indentation(lineContents)
 	fmt.Fprint(w, "<span class=asm>")
 	var curCalls []callID
 	for i, an := range assembly {
@@ -374,15 +741,9 @@
 
 		var fileline string
 		if an.file != "" {
-			fileline = fmt.Sprintf("%s:%d", template.HTMLEscapeString(an.file), an.line)
+			fileline = fmt.Sprintf("%s:%d", template.HTMLEscapeString(filepath.Base(an.file)), an.line)
 		}
 		flat, cum := an.flat, an.cum
-		if an.flatDiv != 0 {
-			flat = flat / an.flatDiv
-		}
-		if an.cumDiv != 0 {
-			cum = cum / an.cumDiv
-		}
 
 		// Print inlined call context.
 		for j, c := range an.inlineCalls {
@@ -398,15 +759,18 @@
 			text := strings.Repeat(" ", srcIndent+4+4*j) + strings.TrimSpace(fline)
 			fmt.Fprintf(w, " %8s %10s %10s %8s  <span class=inlinesrc>%s</span> <span class=unimportant>%s:%d</span>\n",
 				"", "", "", "",
-				template.HTMLEscapeString(fmt.Sprintf("%-80s", text)),
+				template.HTMLEscapeString(rightPad(text, 80)),
 				template.HTMLEscapeString(filepath.Base(c.file)), c.line)
 		}
 		curCalls = an.inlineCalls
 		text := strings.Repeat(" ", srcIndent+4+4*len(curCalls)) + an.instruction
 		fmt.Fprintf(w, " %8s %10s %10s %8x: %s <span class=unimportant>%s</span>\n",
 			"", valueOrDot(flat, rpt), valueOrDot(cum, rpt), an.address,
-			template.HTMLEscapeString(fmt.Sprintf("%-80s", text)),
-			template.HTMLEscapeString(fileline))
+			template.HTMLEscapeString(rightPad(text, 80)),
+			// fileline should not be escaped since it was formed by appending
+			// line number (just digits) to an escaped file name. Escaping here
+			// would cause double-escaping of file name.
+			fileline)
 	}
 	fmt.Fprintln(w, "</span>")
 }
@@ -482,36 +846,6 @@
 	return src, file, nil
 }
 
-// getMissingFunctionSource creates a dummy function body to point to
-// the source file and annotates it with the samples in asm.
-func getMissingFunctionSource(filename string, asm map[int][]assemblyInstruction, start, end int) (graph.Nodes, string) {
-	var fnodes graph.Nodes
-	for i := start; i <= end; i++ {
-		insts := asm[i]
-		if len(insts) == 0 {
-			continue
-		}
-		var group assemblyInstruction
-		for _, insn := range insts {
-			group.flat += insn.flat
-			group.cum += insn.cum
-			group.flatDiv += insn.flatDiv
-			group.cumDiv += insn.cumDiv
-		}
-		flat := group.flatValue()
-		cum := group.cumValue()
-		fnodes = append(fnodes, &graph.Node{
-			Info: graph.NodeInfo{
-				Name:   "???",
-				Lineno: i,
-			},
-			Flat: flat,
-			Cum:  cum,
-		})
-	}
-	return fnodes, filename
-}
-
 // sourceReader provides access to source code with caching of file contents.
 type sourceReader struct {
 	// searchPath is a filepath.ListSeparator-separated list of directories where
@@ -543,6 +877,7 @@
 	return reader.errors[path]
 }
 
+// line returns the line numbered "lineno" in path, or _,false if lineno is out of range.
 func (reader *sourceReader) line(path string, lineno int) (string, bool) {
 	lines, ok := reader.files[path]
 	if !ok {
@@ -651,3 +986,37 @@
 	}
 	return column
 }
+
+// rightPad pads the input with spaces on the right-hand-side to make it have
+// at least width n. It treats tabs as enough spaces that lead to the next
+// 8-aligned tab-stop.
+func rightPad(s string, n int) string {
+	var str strings.Builder
+
+	// Convert tabs to spaces as we go so padding works regardless of what prefix
+	// is placed before the result.
+	column := 0
+	for _, c := range s {
+		column++
+		if c == '\t' {
+			str.WriteRune(' ')
+			for column%8 != 0 {
+				column++
+				str.WriteRune(' ')
+			}
+		} else {
+			str.WriteRune(c)
+		}
+	}
+	for column < n {
+		column++
+		str.WriteRune(' ')
+	}
+	return str.String()
+}
+
+func canonicalizeFileName(fname string) string {
+	fname = strings.TrimPrefix(fname, "/proc/self/cwd/")
+	fname = strings.TrimPrefix(fname, "./")
+	return filepath.Clean(fname)
+}
diff --git a/internal/report/source_test.go b/internal/report/source_test.go
index c6e179c..78c9d47 100644
--- a/internal/report/source_test.go
+++ b/internal/report/source_test.go
@@ -46,8 +46,8 @@
 	}
 	output := buf.String()
 
-	for _, expect := range []string{"func busyLoop", "callq", "math.Abs"} {
-		if !strings.Contains(output, expect) {
+	for _, expect := range []string{"func busyLoop", "callq.*mapassign"} {
+		if match, _ := regexp.MatchString(expect, output); !match {
 			t.Errorf("weblist output does not contain '%s':\n%s", expect, output)
 		}
 	}
@@ -168,6 +168,28 @@
 	}
 }
 
+func TestRightPad(t *testing.T) {
+	for _, c := range []struct {
+		pad    int
+		in     string
+		expect string
+	}{
+		{0, "", ""},
+		{4, "", "    "},
+		{4, "x", "x   "},
+		{4, "abcd", "abcd"},   // No padding because of overflow
+		{4, "abcde", "abcde"}, // No padding because of overflow
+		{10, "\tx", "        x "},
+		{10, "w\txy\tz", "w       xy      z"},
+		{20, "w\txy\tz", "w       xy      z   "},
+	} {
+		out := rightPad(c.in, c.pad)
+		if out != c.expect {
+			t.Errorf("rightPad(%q, %d): got %q, want %q", c.in, c.pad, out, c.expect)
+		}
+	}
+}
+
 func readProfile(fname string, t *testing.T) *profile.Profile {
 	file, err := os.Open(fname)
 	if err != nil {