blob: df058b63703186ad0a4ea28e90d80af316b9fc69 [file] [log] [blame]
#!/usr/bin/env fuchsia-vendored-python
# Copyright 2025 The Fuchsia Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Utility to print out the sizes of all bootfs files and identify them
This script takes a pair of assembly_builder_forensics.json and zbi.json files
from a given assembled system and uses them to generate CSV output that lists
all the bootfs files by size (descending), identifying each blob in the bootfs
packages by their path in the package. When a blob is present in multiple
packages, its package name is listed as 'multiple'.
Some blobs are generated by the assembly process, and as such, aren't listed
in the assembly_builder_forensics.file by merkle. These files are given a
path of 'assembly_generated' and a package of 'unknown'.
"""
import argparse
import json
import sys
from dataclasses import dataclass
from pathlib import Path
@dataclass
class BootfsFile:
bootfs_path: str
size: int
package_path: str | None
packages: list[str]
def get_zbi_bootfs(path: Path) -> list[dict[str, str | int]]:
"""Get all the bootfs files from zbi.json
Returns a list of the files in the bootfs, as the 'contents' list from the zbi.json format.
This is a list of dicts with the following fields:
- name: the path of the file in bootfs
- offset: (bytes) the offset within the uncompressed bootfs image where the file starts
- length: (bytes) the length of the file's data
- size: (bytes) the space within bootfs that the file takes up. This will be in multiples of
the alignment used by bootfs (4KiB).
"""
with open(path) as zbi_json_file:
zbi_json = json.load(zbi_json_file)
for entry in zbi_json:
if entry["type"] == "BOOTFS":
return entry["contents"]
raise ValueError("Unable to find bootfs contents in zbi.json")
@dataclass
class PackagePath:
"""The path within a specific package for a file"""
package: str
path: str
@dataclass
class BootfsBlob:
"""All the PackagePaths that a given file (by content identity was found at)"""
paths: list[PackagePath]
def get_bootfs_package_blobs(path: Path) -> dict[str, list[PackagePath]]:
"""Get blob info for all packages in bootfs
Returns a dict of BootfsBlob objects by their merkle. Each object lists each package path that
it has.
"""
blobs: dict[str, list[PackagePath]] = {}
with open(path) as forensics_file:
forensics_json = json.load(forensics_file)
packages = forensics_json["packages"]["inner"]
for package, info in packages.items():
for blob in info["manifest"]["blobs"]:
merkle = blob["merkle"]
blobs.setdefault(merkle, []).append(
PackagePath(package, blob["path"])
)
return blobs
def main() -> int:
parser = argparse.ArgumentParser(description="Run build benchmarks")
parser.add_argument(
"--forensics",
type=Path,
help="path to an assembly_builder_forensics.json file",
)
parser.add_argument(
"--zbi-json", type=Path, help="path to the zbi.json file"
)
args = parser.parse_args()
zbi_bootfs_entries = get_zbi_bootfs(args.zbi_json)
bootfs_package_blobs = get_bootfs_package_blobs(args.forensics)
bootfs_files: list[BootfsFile] = []
for entry in zbi_bootfs_entries:
name: str = str(entry["name"])
if name.startswith("blob/"):
# if the bootfs file is a package blob, look it up by merkle in bootfs_package_blobs.
merkle = name[5:]
blob_paths = bootfs_package_blobs.get(merkle)
if blob_paths:
# if the blob is at multiple paths in different packages, pick just one (this is
# fairly rare, so not that big of worry about loss of information)
paths = set(entry.path for entry in blob_paths)
bootfs_files.append(
BootfsFile(
name,
int(entry["size"]),
paths.pop(),
[entry.package for entry in blob_paths],
)
)
else:
# The assembly_builder_forensics.json file doesn't have information about blobs that
# are created during product assembly, so mark these as "assembly_generated".
bootfs_files.append(
BootfsFile(
name,
int(entry["size"]),
"assembly_generated",
["unknown"],
)
)
else:
# if it's a bare file in bootfs, just list it by path and size.
bootfs_files.append(BootfsFile(name, int(entry["size"]), None, []))
# Output CSV data about each file, with it's package(s) and (one) path for identification if it
# is a package blob.
print("path,size,paths,packages")
for file in sorted(bootfs_files, key=lambda x: x.size, reverse=True):
if file.package_path:
# if the blob is in multiple packages, then just use 'multiple' instead of listing them
# all. Most blobs that are multiple packages are libs that are more-readily identified
# by their path in the package, than they are their package name, anyway.
package = (
file.packages[0] if len(file.packages) == 1 else "multiple"
)
print(
f"{file.bootfs_path},{file.size},{file.package_path},{package}"
)
else:
print(f"{file.bootfs_path},{file.size},,")
return 0
if __name__ == "__main__":
sys.exit(main())