| #!/usr/bin/env fuchsia-vendored-python |
| # Copyright 2025 The Fuchsia Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Utility to print out the sizes of all bootfs files and identify them |
| |
| This script takes a pair of assembly_builder_forensics.json and zbi.json files |
| from a given assembled system and uses them to generate CSV output that lists |
| all the bootfs files by size (descending), identifying each blob in the bootfs |
| packages by their path in the package. When a blob is present in multiple |
| packages, its package name is listed as 'multiple'. |
| |
| Some blobs are generated by the assembly process, and as such, aren't listed |
| in the assembly_builder_forensics.file by merkle. These files are given a |
| path of 'assembly_generated' and a package of 'unknown'. |
| |
| """ |
| |
| import argparse |
| import json |
| import sys |
| from dataclasses import dataclass |
| from pathlib import Path |
| |
| |
| @dataclass |
| class BootfsFile: |
| bootfs_path: str |
| size: int |
| package_path: str | None |
| packages: list[str] |
| |
| |
| def get_zbi_bootfs(path: Path) -> list[dict[str, str | int]]: |
| """Get all the bootfs files from zbi.json |
| |
| Returns a list of the files in the bootfs, as the 'contents' list from the zbi.json format. |
| This is a list of dicts with the following fields: |
| - name: the path of the file in bootfs |
| - offset: (bytes) the offset within the uncompressed bootfs image where the file starts |
| - length: (bytes) the length of the file's data |
| - size: (bytes) the space within bootfs that the file takes up. This will be in multiples of |
| the alignment used by bootfs (4KiB). |
| """ |
| with open(path) as zbi_json_file: |
| zbi_json = json.load(zbi_json_file) |
| for entry in zbi_json: |
| if entry["type"] == "BOOTFS": |
| return entry["contents"] |
| raise ValueError("Unable to find bootfs contents in zbi.json") |
| |
| |
| @dataclass |
| class PackagePath: |
| """The path within a specific package for a file""" |
| |
| package: str |
| path: str |
| |
| |
| @dataclass |
| class BootfsBlob: |
| """All the PackagePaths that a given file (by content identity was found at)""" |
| |
| paths: list[PackagePath] |
| |
| |
| def get_bootfs_package_blobs(path: Path) -> dict[str, list[PackagePath]]: |
| """Get blob info for all packages in bootfs |
| |
| Returns a dict of BootfsBlob objects by their merkle. Each object lists each package path that |
| it has. |
| """ |
| blobs: dict[str, list[PackagePath]] = {} |
| with open(path) as forensics_file: |
| forensics_json = json.load(forensics_file) |
| packages = forensics_json["packages"]["inner"] |
| for package, info in packages.items(): |
| for blob in info["manifest"]["blobs"]: |
| merkle = blob["merkle"] |
| blobs.setdefault(merkle, []).append( |
| PackagePath(package, blob["path"]) |
| ) |
| return blobs |
| |
| |
| def main() -> int: |
| parser = argparse.ArgumentParser(description="Run build benchmarks") |
| parser.add_argument( |
| "--forensics", |
| type=Path, |
| help="path to an assembly_builder_forensics.json file", |
| ) |
| parser.add_argument( |
| "--zbi-json", type=Path, help="path to the zbi.json file" |
| ) |
| args = parser.parse_args() |
| |
| zbi_bootfs_entries = get_zbi_bootfs(args.zbi_json) |
| bootfs_package_blobs = get_bootfs_package_blobs(args.forensics) |
| |
| bootfs_files: list[BootfsFile] = [] |
| |
| for entry in zbi_bootfs_entries: |
| name: str = str(entry["name"]) |
| if name.startswith("blob/"): |
| # if the bootfs file is a package blob, look it up by merkle in bootfs_package_blobs. |
| merkle = name[5:] |
| blob_paths = bootfs_package_blobs.get(merkle) |
| if blob_paths: |
| # if the blob is at multiple paths in different packages, pick just one (this is |
| # fairly rare, so not that big of worry about loss of information) |
| paths = set(entry.path for entry in blob_paths) |
| bootfs_files.append( |
| BootfsFile( |
| name, |
| int(entry["size"]), |
| paths.pop(), |
| [entry.package for entry in blob_paths], |
| ) |
| ) |
| else: |
| # The assembly_builder_forensics.json file doesn't have information about blobs that |
| # are created during product assembly, so mark these as "assembly_generated". |
| bootfs_files.append( |
| BootfsFile( |
| name, |
| int(entry["size"]), |
| "assembly_generated", |
| ["unknown"], |
| ) |
| ) |
| else: |
| # if it's a bare file in bootfs, just list it by path and size. |
| bootfs_files.append(BootfsFile(name, int(entry["size"]), None, [])) |
| |
| # Output CSV data about each file, with it's package(s) and (one) path for identification if it |
| # is a package blob. |
| print("path,size,paths,packages") |
| for file in sorted(bootfs_files, key=lambda x: x.size, reverse=True): |
| if file.package_path: |
| # if the blob is in multiple packages, then just use 'multiple' instead of listing them |
| # all. Most blobs that are multiple packages are libs that are more-readily identified |
| # by their path in the package, than they are their package name, anyway. |
| package = ( |
| file.packages[0] if len(file.packages) == 1 else "multiple" |
| ) |
| print( |
| f"{file.bootfs_path},{file.size},{file.package_path},{package}" |
| ) |
| else: |
| print(f"{file.bootfs_path},{file.size},,") |
| |
| return 0 |
| |
| |
| if __name__ == "__main__": |
| sys.exit(main()) |