blob: 0005661656736513cb0a91e0d2ef499b74c8e73d [file] [log] [blame]
#!/usr/bin/env fuchsia-vendored-python
# Copyright 2020 The Fuchsia Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
#### CATEGORY=Run, inspect and debug
### generate a report of CPU stats from a snapshot
# usage: fx cpu-stats [-f json|html] [-o OUTPUT_FILE] [INPUT_FILES...]
#
# Example: fx cpu-stats -f html -o output.html
# (Loads the latest data from an attached Fuchsia device and creates HTML output)
#
# Example: fx cpu-stats -f html -o output.html /tmp/snapshot.zip
# (Loads the given snapshot and creates HTML output)
#
# Example: fx cpu-stats -f json /tmp/snapshot.zip
# (Prints the stats for the given snapshot as JSON to stdout)
import argparse
from collections import defaultdict
from datetime import timedelta
import json
from math import sqrt
import os
import subprocess
import sys
from tempfile import TemporaryDirectory
import zipfile
try:
import numpy as np
except ModuleNotFoundError:
print('You need numpy installed.\nRun: pip3 install numpy', file=sys.stderr)
sys.exit(1)
MIN_MEASUREMENTS_FOR_CROSS_CORRELATION = 5
# Try to process the input file as a snapshot.zip file.
def try_unzip(zip_file):
with zipfile.ZipFile(zip_file) as f:
with f.open('inspect.json') as json_file:
return json_file.read()
def main():
parser = argparse.ArgumentParser('Process CPU stats from snapshots')
parser.add_argument(
'input',
nargs='*',
type=argparse.FileType('rb'),
help='One or more snapshot.zip or inspect.json files. If not set, attempt to use `ffx target snapshot` to get a fresh report.'
)
parser.add_argument(
'--format',
'-f',
default='json',
choices=['json', 'html'],
help='The output format. Use JSON for raw data, and HTML for a pretty page'
)
parser.add_argument(
'--out',
'-o',
type=argparse.FileType('w'),
default=sys.stdout,
help='The output file')
args = parser.parse_args()
tempdir = None
if not args.input:
print('No input specified, reading a new snapshot', file=sys.stderr)
tempdir = TemporaryDirectory()
report = subprocess.check_call(
['fx', 'ffx', 'target', 'snapshot', '-d', tempdir.name],
stdout=sys.stderr,
stderr=sys.stderr)
args.input = [open(os.path.join(tempdir.name, 'snapshot.zip'), 'rb')]
json_contents = []
for input_file in args.input:
try:
json_string = try_unzip(input_file)
except zipfile.BadZipFile:
# File wasn't a zip, we'll fall back to inspect.json
input_file.seek(0, 0)
json_string = input_file.read()
pass
except KeyError as e:
print(f'File {input_file.name} is not valid: {e}', file=sys.stderr)
return 1
try:
json_contents.append((input_file.name, json.loads(json_string)))
except json.JSONDecodeError as e:
print(
f'Failed to parse JSON from {input_file.name}: {e}', file=sys.stderr)
return 1
results = dict()
return_code = 0
for (name, content) in json_contents:
try:
results[name] = process_content(content)
except ProcessError as e:
print(f'Failed to process {name}: {e}', file=sys.stderr)
return_code = 1
if args.format == 'json':
json.dump(results, args.out, indent=2)
args.out.write('\n')
elif args.format == 'html':
arrays = {}
for name, result in results.items():
arrays[f'CPU percentages for {name}'] = {
'line': make_line_chart_array(result['percentages'], 'cpu_percent')
}
arrays[f'Queue percentages for {name}'] = {
'line': make_line_chart_array(result['percentages'], 'queue_percent')
}
arrays[f'Correlations for {name}'] = {
'table':
make_table_chart_array(
result['correlations'],
keys=[
'name1', 'name2', 'correlation', 'weighted_correlation'
],
titles=[
'Name', 'Name (other)', 'Correlation',
'Weighted Correlation'
],
where=lambda x: x['correlation'] >= 0.5)
}
args.out.write(HTML_TEMPLATE.replace('<<SAMPLE_DATA>>', json.dumps(arrays)))
print(f'Wrote to {args.out.name}', file=sys.stderr)
return return_code
class ProcessError(Exception):
pass
# Obtain a field from the percentages array and format it as the input to a line chart.
def make_line_chart_array(percentages, field):
names = set()
times = set()
name_to_time_to_measure = defaultdict(lambda: dict())
for val in percentages:
names.add(val['name'])
times.add(val['timestamp'])
name_to_time_to_measure[val['name']][val['timestamp']] = val
names = sorted(names)
times = sorted(times)
output = [['Time (since boot)'] + names]
for time in times:
delta = timedelta(microseconds=time / 1000)
column = [{'v': delta.total_seconds(), 'f': str(delta)}]
for name in names:
column.append(
name_to_time_to_measure.get(name, dict()).get(time,
dict()).get(field))
output.append(column)
return output
# Take a list of data dicts with the given keys and format it as the input for a table.
# Titles gives the name for the corresponding key.
# If "where" is set, only rows matching that predicate will be included
def make_table_chart_array(data, keys=None, titles=None, where=lambda _: True):
if not keys:
keys = []
if not titles:
titles = []
assert (len(titles) == len(keys))
out = [titles]
for val in data:
if not where(val):
continue
row = [val[k] for k in keys]
out.append(row)
return out
def process_content(content):
samples.update(get_samples(content))
# Create a table of samples including value differences between successive timestamps.
# Output:
# - name: The name of the component
# - timestamp: The timestamp for the measurement.
# - cpu_percent: The percentage of CPU time since the previous
# measurement used by this process. Out of the time
# available on a single processor. For example, a
# component using 2 CPUs at 100% would have a value
# of 200.
# - queue_percent: The percentage of queue time since the previous measurement.
# - cpu_diff: The total time spent running since the previous measurement.
# - queue_diff: The total time spent queued to run since the previous measurement.
percentages = []
for name, s in samples.items():
prev = None
for timestamp, sample in s.items():
if not prev:
percentages.append({
'name': name,
'timestamp': timestamp,
'cpu_percent': 100.0 * sample['cpu_time'] / timestamp,
'queue_percent': 100.0 * sample['queue_time'] / timestamp,
'cpu_diff': sample['cpu_time'],
'queue_diff': sample['queue_time']
})
else:
cpu_diff = sample['cpu_time'] - prev[1]['cpu_time']
queue_diff = sample['queue_time'] - prev[1]['queue_time']
time_diff = timestamp - prev[0]
percentages.append({
'name': name,
'timestamp': timestamp,
'cpu_percent': 100.0 * cpu_diff / time_diff,
'queue_percent': 100.0 * queue_diff / time_diff,
'cpu_diff': cpu_diff,
'queue_diff': queue_diff
})
prev = (timestamp, sample)
# Extract sets of all timestamps and names, and create a lookup table
# identifying a measurement for a particular component at a time.
timestamps = sorted(list({p['timestamp'] for p in percentages}))
names = sorted(list({p['name'] for p in percentages}))
by_name_time = {(p['name'], p['timestamp']): p for p in percentages}
# Extract a time series for each component.
# Every component will have an entry for every timestamp in the
# input. If no measurement existed at that time, the value will be False.
series = dict()
for name in names:
series[name + ' (cpu)'] = [
by_name_time.get((name, time), dict()).get('cpu_percent') or False
for time in timestamps
]
series[name + ' (queue)'] = [
by_name_time.get((name, time), dict()).get('queue_percent') or False
for time in timestamps
]
# Compute O(n^2) cross correlations for each pair of percentages.
# This is used to identify components that frequently run together
keys = list(series.keys())
correlations = []
for i, key1 in enumerate(keys):
for j in range(i + 1, len(keys)):
key2 = keys[j]
# Omit timestamps for which either of the components is missing a
# measurement, or for which the measurements are both zero. Otherwise
# we will find a strong correlation between components that are
# both not running at the same time.
entries = [
v for v in zip(series[key1], series[key2]) if v[0] is not False and
v[1] is not False and not (v[0] == v[1] and v[1] == 0)
]
# Omit entries with too few samples for us to have very good estimates.
if len(entries) < MIN_MEASUREMENTS_FOR_CROSS_CORRELATION:
continue
# Calculate the covariance between the two data series.
# The output of np.cov is a 2x2 symmetric matrix with variances
# along the diagonal and the covariance in (0,1) and (1,0).
#
# We normalize the covariance by the product of individual standard
# deviations to produce a correlation coefficient in the range [-1, 1]
#
# We additionally report a correlation coefficient weighted by the
# total amount of either CPU or queue time for the measurements. This
# helps to identify the components that not only run together
# frequently, but in doing so have the largest impact on overall
# CPU usage.
vals1 = np.array(list(map(lambda x: x[0], entries)))
vals2 = np.array(list(map(lambda x: x[1], entries)))
cov = np.cov(vals1, vals2, bias=True)
if (cov[0][0] >= 0 and cov[1][1] >= 0 and cov[0][1] >= 0):
correlation = cov[0][1] / (sqrt(cov[0][0]) * sqrt(cov[1][1]))
# Weight by the arithmetic mean of geometric means of matched measurements.
try:
weight = np.mean([sqrt(e[0] * e[1]) for e in entries])
except ValueError:
# It has been observed that measurements in this calculation
# can be negative, which causes the sqrt to fail. Catch
# this case while we investigate more closely.
# TODO(https://fxbug.dev/42066759): Investigate negative values.
weight = 0
correlations.append({
'name1': key1,
'name2': key2,
'correlation': correlation,
'weighted_correlation': correlation * weight
})
return {
'samples': samples,
'percentages': percentages,
'correlations': correlations
}
def get_component_payload(content, moniker):
result = None
for component in content:
if component['moniker'] == moniker:
result = component['payload']
if not result:
raise ProcessError('Could not find payload for %s' % moniker)
return result
def get_samples(content):
try:
component_manager = get_component_payload(content, '<component_manager>')
components = component_manager['root']['stats']['measurements']['components']
except KeyError as e:
raise ProcessError(f'Missing key: {e}')
except ProcessError:
# Component manager is missing, print a warning and continue.
print("Missing component manager inspect, skipping", file=sys.stderr)
return defaultdict(lambda: defaultdict())
# Map from name to timestamp to sample
samples = defaultdict(lambda: defaultdict())
for (moniker, component_stats) in components.items():
for (_task_koid, task_stats) in component_stats.items():
for (i, timestamp) in enumerate(task_stats['timestamps']):
samples[moniker][timestamp] = {
'cpu_time': task_stats['cpu_times'][i],
'queue_time': task_stats['queue_times'][i]
}
return samples
HTML_TEMPLATE = """
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>CPU Stats</title>
<style type="text/css">
#charts > div {
height: 700px;
width: 900px;
}
</style>
<script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script>
<script type="text/javascript">
var sampleData = <<SAMPLE_DATA>>;
</script>
<script type="text/javascript">
google.charts.load('current', {'packages':['corechart', 'table']});
google.charts.setOnLoadCallback(drawChart);
function drawChart() {
var id = 0;
for (var name in sampleData) {
if (sampleData[name].line) {
var data = google.visualization.arrayToDataTable(sampleData[name].line);
// The first column is formatted to provide ticks on the
// horizontal axis. Many measurements at locations < 60s
// confuses the spacing algorithm for grid lines, so we omit
// them from the ticks.
var ticks = sampleData[name].line.slice(1)
.map(function(e) { return e[0]; })
.filter(function(v) { return v['v'] >= 60; });
console.log(ticks);
var options = {
title: name,
theme: 'material',
interpolateNulls: true,
hAxis: {
ticks: ticks,
gridlines: {
interval: [15, 30, 60]
}
}
};
var idname = 'chart-' + (id++);
var div = document.createElement('div');
div.id = idname;
document.getElementById('charts').appendChild(div);
var chart = new google.visualization.LineChart(document.getElementById(idname));
chart.draw(data, options);
} else if (sampleData[name].table) {
var data = google.visualization.arrayToDataTable(sampleData[name].table);
var options = {
title: name
};
var idname = 'chart-' + (id++);
var div = document.createElement('div');
div.id = idname;
document.getElementById('charts').appendChild(div);
var chart = new google.visualization.Table(document.getElementById(idname));
chart.draw(data, options);
}
}
}
</script>
</head>
<body>
<div id="charts">
</div>
</body>
</html>
"""
if __name__ == '__main__':
sys.exit(main())