blob: ecf7aca1c87c64296469225ccdd69beecbaaab54 [file] [log] [blame]
#!/usr/bin/env fuchsia-vendored-python
#
# Copyright 2017 The Fuchsia Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Visualizes the output of Zircon's "memgraph" tool.
For usage, see
https://fuchsia.dev/fuchsia-src/development/memory/memory#visualize_memory_usage
"""
import collections
import html
import json
import sys
import os.path
import textwrap
FUCHSIA_DIR = os.path.abspath(
os.path.join(__file__, os.pardir, os.pardir, os.pardir)
)
# Magic value for nodes with empty names.
UNNAMED_NAME = "<unnamed>"
class Node(object):
"""A generic node in the kernel/job/process/memory tree."""
def __init__(self):
self.type = ""
self.koid = 0
self.name = ""
self.area = 0
self.children = []
def html_label(self):
"""Returns a safe HTML string that identifies this Node."""
tag = ""
if self.type:
tag = (
'<span class="treemap-node-type '
'treemap-node-type-{}">{}</span> '
).format(
html.escape(self.type[0]), html.escape(self.type[0].upper())
)
if self.name in ("", UNNAMED_NAME):
name = "<i>UNNAMED</i> [koid {}]".format(self.koid)
else:
name = html.escape(self.name)
return tag + name
# Mapping of unique ID strings to Node objects.
ids_to_nodes = {}
def lookup(node_id):
"""Returns or creates the Node associated with an ID string.
Args:
node_id: ID string to look up
Returns:
A Node object
"""
node = ids_to_nodes.get(node_id)
if node is None:
node = Node()
ids_to_nodes[node_id] = node
return node
def sum_area(node):
"""Recursively calculates the Node.area values of a subtree.
Args:
node: The Node at the root of the tree to walk
Returns:
The final area of |node|
"""
# Area should either be set explicitly or calculated from the children.
if node.children and (node.area != 0):
raise AssertionError(
"Node {} has {} children and non-zero area {}".format(
node.name, len(node.children), node.area
)
)
node.area += sum(map(sum_area, node.children))
return node.area
def format_size(nbytes):
"""Formats a size as a human-readable string like "123.4k".
Units are in powers of 1024, so "k" is technically "kiB", etc.
Values smaller than "k" have the suffix "B".
Exact multiples of a unit are displayed without a decimal;
e.g., "17k" means the value is exactly 17 * 1024.
Otherwise, a decimal is present; e.g., "17.0k" means the value
is (17 * 1024) +/- epsilon.
Args:
nbytes: The value to format
Returns:
The formatted string
"""
units = "BkMGTPE"
ui = 0
r = 0
whole = True
while nbytes >= 10000 or (nbytes != 0 and (nbytes & 1023) == 0):
ui += 1
if nbytes & 1023:
whole = False
r = nbytes % 1024
nbytes //= 1024
if whole:
return "{}{}".format(nbytes, units[ui])
round_up = (r % 100) >= 50
r = (r // 100) + round_up
if r == 10:
nbytes += 1
r = 0
return "{}.{}{}".format(nbytes, r, units[ui])
# Enum for tracking VMO reference types.
VIA_HANDLE = 1
VIA_MAPPING = 2
def populate_process(process_node, process_record, hide_aggregated=True):
"""Adds the process's child nodes.
Args:
process_node: A process's Node
process_record: The same process's input record
hide_aggregated: If true, do not create Nodes for individual VMOs
that have been aggregated by name into a single Node
"""
# If there aren't any VMOs, use the sizes in the record.
if not process_record.get("vmo_refs", []):
# Get the breakdown.
priv = process_record.get("private_bytes", 0)
pss = process_record.get("pss_bytes", 0)
shared = max(0, pss - priv) # Kernel calls this "scaled shared"
pid = process_record["id"]
if priv:
node = lookup(pid + "/priv")
node.name = "Private"
node.area = priv
process_node.children.append(node)
if shared:
node = lookup(pid + "/shared")
node.name = "Proportional shared"
node.area = shared
process_node.children.append(node)
# The process's area will be set to the sum of the children.
return
# Otherwise, this entry has VMOs.
# Build the set of reference types from this process to its VMOs.
koid_to_ref_types = collections.defaultdict(set)
for vmo_ref in process_record.get("vmo_refs", []):
ref_types = koid_to_ref_types[vmo_ref["vmo_koid"]]
if "HANDLE" in vmo_ref["via"]:
ref_types.update([VIA_HANDLE])
if "MAPPING" in vmo_ref["via"]:
ref_types.update([VIA_MAPPING])
# De-dup the set of VMOs known to the process, and group them by name. Each
# of these entries are equivalent, though some values may be different (like
# committed_bytes) because they were snapshotted at different times.
name_to_vmo = collections.defaultdict(list)
koid_to_vmo = dict()
id_prefix = "{}/vmo".format(process_record["id"])
for vmo in process_record.get("vmos", []):
# Although multiple processes may point to the same VMO, we're building
# a tree and thus need to create unique IDs for VMOs under this process.
vmo_koid = vmo["koid"]
vmo_id = "{}/{}".format(id_prefix, vmo_koid)
vmo_node = lookup(vmo_id)
if vmo_node.name:
# This is a duplicate of a VMO we've already seen.
continue
vmo_node.type = "vmo"
vmo_node.koid = vmo_koid
vmo_node.name = vmo["name"] if vmo["name"] else UNNAMED_NAME
name_to_vmo[vmo_node.name].append(vmo_node)
koid_to_vmo[vmo_koid] = vmo_node
# Figure out a size for the VMO.
ref_types = koid_to_ref_types[vmo_koid]
if VIA_MAPPING in ref_types:
# The VMO is already accounted for in the process's pss_bytes value.
# TODO(dbort): To make the VMO areas exactly line up with pss_bytes,
# we'd need sub-VMO mapping information like what 'vmaps' provides:
# this process may only map a subset of the VMO's committed pages,
# but we're counting all of them. This isn't necessarily wrong,
# just different.
vmo_node.area = int(
float(vmo["committed_bytes"]) / vmo["share_count"]
)
# NB: This counts as private memory if share_count is 1.
else:
# The process only has a handle to this VMO but does not map it: the
# process's pss_bytes value does not account for this VMO.
assert ref_types == set([VIA_HANDLE])
# Treat our handle reference as an increment to the VMO's
# share_count. This may over-estimate this process's share, because
# other processes could also have handle-only references that we
# don't know about.
vmo_node.area = int(
float(vmo["committed_bytes"]) / (float(vmo["share_count"]) + 1)
)
# Create the aggregated VMO nodes.
children = []
for name, vmos in name_to_vmo.items():
if len(vmos) == 1 or name == UNNAMED_NAME:
# Only one VMO with this name, or multiple VMOs with an empty name.
# Add them as direct children.
children.extend(vmos)
else:
# Create a parent VMO for all of these VMOs with the same name.
parent_id = "{}/{}".format(id_prefix, name)
pnode = lookup(parent_id)
pnode.name = "{}[{}]".format(name, len(vmos))
pnode.type = "vmo"
if hide_aggregated:
pnode.area = sum(map(sum_area, vmos))
# And then drop the vmo nodes on the ground (by not adding
# them as children).
else:
# The area will be calculated from the children.
pnode.children.extend(vmos)
children.append(pnode)
# TODO(dbort): Call out VMOs/aggregates that are only reachable via handle?
process_node.children.extend(children)
def build_webtreemap(node):
"""Returns a JSON-able dict tree representing a Node tree.
See
https://github.com/evmar/webtreemap/blob/gh-pages/README.markdown#input-format
For a description of this data format.
Args:
node: The Node at the root of the tree to walk
Returns:
A webtreemap-compatible dict representing the tree
"""
return {
"name": "{} ({})".format(node.html_label(), format_size(node.area)),
"data": {
"$area": node.area,
# TODO(dbort): Turn this on and style different node types
# if https://github.com/evmar/webtreemap/pull/15 is
# accepted. Would define a class like
# 'webtreemap-symbol-<type>' but there's a bug in
# webtreemap.js.
# '$symbol': node.type,
},
"children": list(map(build_webtreemap, node.children)),
}
def dump_html_table(node, depth=0, parent_area=None, total_area=None):
"""Returns an HTML representation of the tree.
Args:
node: The root of the tree to dump
depth: The depth of the node. Use 0 for the root node.
parent_area: The size of the parent node; used to show fractions.
Use None for the root node.
total_area: The total size of the tree; used to show fractions.
Use None for the root node.
Returns:
A sequence of HTML lines, joinable by whitespace
"""
lines = []
if not depth:
# We're the root node. Dump the headers.
lines.extend(
[
"<style>",
"table#tree {",
" border-collapse: collapse;",
" border-spacing: 0;",
"}",
"table#tree tr:nth-child(even) {",
" background-color: #eee;",
"}",
"table#tree tr:nth-child(odd) {",
" background-color: #fff;",
"}",
"table#tree tr:hover {",
" background-color: #ff8;",
"}",
"table#tree td {",
" text-align: right;",
" padding-left: 1em;",
" padding-right: 1em;",
" font-family:Consolas,Monaco,Lucida Console,",
" Liberation Mono,DejaVu Sans Mono,",
" Bitstream Vera Sans Mono,Courier New,monospace;",
"}",
"table#tree td.name {",
" text-align: left;",
"}",
"</style>",
'<table id="tree">',
"<tr>",
"<th>Name</th>",
"<th>Size<br/>(bytes/1024^n)</th>",
"<th>Size (bytes)</th>",
"<th>Fraction of parent</th>",
"<th>Fraction of total</th>",
"</tr>",
]
)
lines.extend(
[
"<tr>",
# Indent the names based on depth.
'<td class="name"><span style="color:#bbb">{indent}</span>'
"{label}</td>".format(
indent=("|" + "&nbsp;" * 2) * depth, label=node.html_label()
),
"<td>{fsize}</td>".format(fsize=format_size(node.area)),
"<td>{size}</td>".format(size=node.area),
]
)
if depth:
# We're not the root node.
pfrac = node.area / float(parent_area) if parent_area else 0
tfrac = node.area / float(total_area) if total_area else 0
for frac in (pfrac, tfrac):
lines.extend(
[
(
"<td>{pct:.3f}%&nbsp;"
'<progress value="{frac}"></progress></td>'
).format(pct=frac * 100, frac=frac)
]
)
else:
lines.append("<td></td>" * 2)
lines.append("</tr>")
if total_area is None:
total_area = node.area
# Append children by size, largest to smallest.
def dump_child(child):
return dump_html_table(
child, depth=depth + 1, parent_area=node.area, total_area=total_area
)
children = sorted(node.children, reverse=True, key=lambda n: n.area)
for line in [dump_child(c) for c in children]:
lines.extend(line)
if not depth:
lines.append("</table>")
return lines
def build_tree(dataset):
"""Builds a Node tree from a set of memgraph records.
See
https://fuchsia.dev/fuchsia-src/development/memory/memory#visualize_memory_usage
for an example of generating memgraph JSON data.
Args:
dataset: A sequence of memgraph records, typically parsed from JSON
Returns:
The root of the new Node tree
"""
ids_to_nodes.clear() # Clear out the global registry.
root_node = None
root_job = None
for record in dataset:
record_type = record["type"]
# Only read certain types.
if record_type not in ("kernel", "j", "p"):
continue
node = lookup(record["id"])
node.type = record_type
node.koid = record.get("koid", 0)
node.name = record["name"]
if record_type == "kernel":
node.area = record.get("size_bytes", 0)
elif record_type == "j":
if record["parent"].startswith("kernel/"):
assert not root_job, "Found multiple root jobs"
root_job = node
elif record_type == "p":
# Add the process's children, which will determine its area.
populate_process(node, record)
if not record["parent"]:
# The root node has an empty parent.
assert not root_node, "Found multiple root objects"
root_node = node
else:
parent_node = lookup(record["parent"])
parent_node.children.append(node)
assert root_node, "Did not find root object"
assert root_job, "Did not find root job"
# A better name for physmem.
lookup("kernel/physmem").name = "All physical memory"
# Sum up the job tree. Don't touch kernel entries, which already have
# the correct sizes.
sum_area(root_job)
# The root job is usually named "root";
# make it more clear that it's a job.
root_job.name = "root job"
# Give users a hint that processes live in the VMO entry.
kvmo_node = lookup("kernel/vmo")
kvmo_node.name = "VMOs/processes"
# Create a fake entry to cover the portion of kernel/vmo that isn't
# covered by the job tree.
node = lookup("kernel/vmo/unknown")
node.name = "unknown (kernel & unmapped)"
node.area = kvmo_node.area - root_job.area
kvmo_node.children.append(node)
return root_node
def print_html_document(root_node):
"""Prints to stdout an HTML document that visualizes a Node tree.
Args:
root_node: The Node at the root of the tree to walk
"""
html = """\
<!DOCTYPE html>
<title>Memory usage</title>
<script>
var kTree = %(json)s
</script>
<link rel='stylesheet' href='%(css)s'>
<style>
body {
font-family: sans-serif;
font-size: 0.8em;
margin: 2ex 4ex;
}
h1 {
font-weight: normal;
}
#map {
width: 800px;
height: 600px;
position: relative;
cursor: pointer;
-webkit-user-select: none;
}
.treemap-node-type { font-weight: bold; }
/* Colorblind-safe colors from http://mkweb.bcgsc.ca/colorblind/ */
.treemap-node-type-k { color: black; }
.treemap-node-type-j { color: RGB(213, 94, 0); } /* Vermillion */
.treemap-node-type-p { color: RGB(0, 114, 178); } /* Blue */
.treemap-node-type-v { color: RGB(0, 158, 115); } /* Bluish green */
</style>
<h1>Memory usage</h1>
<p>Click on a box to zoom in. Click on the outermost box to zoom out.</p>
<div id='map'></div>
<script src='%(js)s'></script>
<script>
var map = document.getElementById('map');
appendTreemap(map, kTree);
</script>
<ul style="list-style: none">
<li><span class="treemap-node-type treemap-node-type-k">K</span>: Kernel memory
<li><span class="treemap-node-type treemap-node-type-j">J</span>: Job
<li><span class="treemap-node-type treemap-node-type-p">P</span>: Process
<li><span class="treemap-node-type treemap-node-type-v">V</span>: VMO
<ul style="list-style: none">
<li> VMO names with <b>[<i>n</i>]</b> suffixes are aggregates of <i>n</i>
VMOs that have the same name.
</ul>
</ul>
<hr>
%(table)s
""" % {
"json": json.dumps(build_webtreemap(root_node)),
"table": " ".join(dump_html_table(root_node)),
"css": os.path.join(
FUCHSIA_DIR,
"scripts",
"third_party",
"webtreemap",
"webtreemap.css",
),
"js": os.path.join(
FUCHSIA_DIR, "scripts", "third_party", "webtreemap", "webtreemap.js"
),
}
print(textwrap.dedent(html))
def main():
root_node = build_tree(json.load(sys.stdin))
print_html_document(root_node)
if __name__ == "__main__":
main()