blob: 754df960c8f2fbe8d9a89bea7abecc07853d8d25 [file] [log] [blame]
#!/usr/bin/env python3.8
# Copyright 2017 The Fuchsia Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""
This tool takes in multiple manifest files:
* system image and archive manifest files from each package
* Zircon's bootfs.manifest, optionally using a subset selected by the
"group" syntax (e.g. could specify just "core", or "core,misc" or
"core,misc,test").
* "auxiliary" manifests
** one from the toolchain for the target libraries (libc++ et al)
** one from the Zircon/*-ulib build, which has the Zircon ASan libraries
** the unselected parts of the "main" manifests (i.e. Zircon)
It emits final /boot and /system manifests used to make the actual images,
final archive manifests used to make each package, and the build ID map.
The "auxiliary" manifests just supply a pool of files that might be used to
satisfy dependencies; their files are not included in the output a priori.
The tool examines each file in its main input manifests. If it's not an
ELF file, it just goes into the appropriate output manifest. If it's an
ELF file, then the tool figures out what "variant" it is (if any), such as
"asan" and what other ELF files it requires via PT_INTERP and DT_NEEDED.
It then finds those dependencies and includes them in the output manifest,
and iterates on their dependencies. Each dependency is found either in the
*-shared/ toolchain $root_out_dir for the same variant toolchain that built
the root file, or among the files in auxiliary manifests (i.e. toolchain
and Zircon libraries). For things built in the asan variant, it finds the
asan versions of the toolchain/Zircon libraries.
"""
from collections import namedtuple
import argparse
import elfinfo
import fnmatch
import itertools
import manifest
import os
import sys
import variant
binary_info = variant.binary_info
# An entry for a binary is (manifest.manifest_entry, elfinfo.elf_info).
binary_entry = namedtuple('binary_entry', ['entry', 'info'])
# In recursions of CollectBinaries.AddBinary, this is the type of the
# context argument.
binary_context = namedtuple(
'binary_context', [
'variant',
'soname_map',
'root_dependent',
])
# Each --output argument yields an output_manifest tuple.
output_manifest = namedtuple('output_manifest', ['file', 'manifest'])
def collect_auxiliaries(manifest, examined):
"""Collect all the binaries from auxiliary manifests.
Args:
manifest: A list/iterable of input manifest.manifest_entry items.
examined: A set that will be updated with the list of source files
from the |manifest| input list.
Returns:
A { target_path -> binary_entry } map, where |target_path| is the
target path that appears in the input entries list, and |binary_entry|
corresponds to the corresponding ELF binary.
"""
aux_binaries = {}
for entry in manifest:
examined.add(entry.source)
info = binary_info(entry.source)
if info:
new_binary = binary_entry(entry, info)
binary = aux_binaries.setdefault(entry.target, new_binary)
if binary.entry.source != new_binary.entry.source:
raise Exception(
"'%s' in both %r and %r" %
(entry.target, binary.entry, entry))
return aux_binaries
# Return an iterable of binary_entry for all the binaries in `manifest`
# and their dependencies from `aux_binaries`, and an iterable of
# manifest_entry for all the other files in `manifest`.
def collect_binaries(manifest, aux_binaries, examined):
# As we go, we'll collect the actual binaries for the output
# in this dictionary mapping entry.target to binary_entry.
unexamined_binaries = {}
binaries = {}
# We'll collect entries in the manifest that aren't binaries here.
nonbinaries = []
# This maps GN toolchain (from variant.shared_toolchain) to a
# dictionary mapping DT_SONAME string to binary_entry.
soname_map_by_toolchain = {}
def rewrite_binary_group(old_binary, group_override):
return binary_entry(
old_binary.entry._replace(group=group_override), old_binary.info)
def add_binary(binary, context=None, auxiliary=False):
# Add a binary by target name.
def add_auxiliary(target, required, group_override=None):
if group_override is None:
group_override = binary.entry.group
aux_context = context
else:
aux_context = None
# Look for the target in auxiliary manifests.
aux_binary = aux_binaries.get(target)
if required:
assert aux_binary, (
"'%s' not in auxiliary manifests, needed by %r via %r" %
(target, binary.entry, context.root_dependent))
if aux_binary:
add_binary(
rewrite_binary_group(aux_binary, group_override),
aux_context, True)
return True
return False
existing_binary = binaries.get(binary.entry.target)
if existing_binary is not None:
if existing_binary.entry.source != binary.entry.source:
raise Exception(
"%r in both %r and %r" %
(binary.entry.target, existing_binary, binary))
# If the old record was in a later group, we still need to
# process all the dependencies again to promote them to
# the new group too.
if existing_binary.entry.group <= binary.entry.group:
return
examined.add(binary.entry.source)
# If we're not part of a recursion, discover the binary's context.
if context is None:
binary_variant, variant_file = variant.find_variant(
binary.info, binary.entry.target)
if variant_file is not None:
# This is a variant that was actually built in a different
# place than its original name says. Rewrite everything to
# refer to the "real" name.
binary = binary_entry(
binary.entry._replace(source=variant_file),
binary.info.rename(variant_file))
examined.add(variant_file)
context = binary_context(
binary_variant,
soname_map_by_toolchain.setdefault(
binary_variant.shared_toolchain, {}), binary)
binaries[binary.entry.target] = binary
assert binary.entry.group is not None, binary
if binary.info.soname:
# This binary has a SONAME, so record it in the map.
soname_binary = context.soname_map.setdefault(
binary.info.soname, binary)
if soname_binary.entry.source != binary.entry.source:
raise Exception(
"SONAME '%s' in both %r and %r" %
(binary.info.soname, soname_binary, binary))
if binary.entry.group < soname_binary.entry.group:
# Update the record to the earliest group.
context.soname_map[binary.info.soname] = binary
# The PT_INTERP is implicitly required from an auxiliary manifest.
if binary.info.interp:
add_auxiliary('lib/' + binary.info.interp, True)
# The variant might require other auxiliary binaries too.
for variant_aux, variant_aux_group in context.variant.aux:
add_auxiliary(variant_aux, True, variant_aux_group)
# Handle the DT_NEEDED list.
for soname in binary.info.needed:
# The vDSO is not actually a file.
if soname == 'libzircon.so':
continue
lib = context.soname_map.get(soname)
if lib and lib.entry.group <= binary.entry.group:
# Already handled this one in the same or earlier group.
continue
# The DT_SONAME is libc.so, but the file is ld.so.1 on disk.
if soname == 'libc.so':
soname = 'ld.so.1'
# Translate the SONAME to a target file name.
target = context.variant.soname_target(soname)
if add_auxiliary(target, False):
# We found it in an existing manifest.
continue
# Check if it's elsewhere in the input set.
lib = unexamined_binaries.get(target)
if lib is None:
# It must be in the shared_toolchain output directory.
shared_toolchain = context.variant.shared_toolchain
# TODO(fxbug.dev/38226): See //sdk/lib/fdio/BUILD.gn.
# libFuzzer depends on libfdio, so fuzzers need to use a
# version of fdio without SanitizerCoverage instrumentation to
# avoid polluting coverage data for the code under test.
if soname == 'libfdio.so' and shared_toolchain.endswith(
'-fuzzer-shared'):
shared_toolchain = shared_toolchain[:-len('-fuzzer-shared')]
shared_toolchain += '-shared'
# Context like group is inherited from the dependent.
lib_entry = binary.entry._replace(
source=os.path.join(shared_toolchain, soname),
target=target)
assert os.path.exists(lib_entry.source), (
"missing %r needed by %r via %r" %
(lib_entry, binary, context.root_dependent))
# Read its ELF info and sanity-check.
lib = binary_entry(lib_entry, binary_info(lib_entry.source))
assert lib.info and lib.info.soname == soname, (
"SONAME '%s' expected in %r, needed by %r via %r" %
(soname, lib, binary, context.root_dependent))
# Recurse.
add_binary(lib, context)
for entry in manifest:
info = None
# Don't inspect data or firmware resources in the manifest. Regardless
# of the bits in these files, we treat them as opaque data.
try:
if not entry.target.startswith(
'data/') and not entry.target.startswith('lib/firmware/'):
info = binary_info(entry.source)
except IOError as e:
raise Exception('%s from %s' % (e, entry))
if info:
if (entry.target not in unexamined_binaries or entry.group <
unexamined_binaries[entry.target].entry.group):
unexamined_binaries[entry.target] = binary_entry(entry, info)
else:
nonbinaries.append(entry)
for binary in unexamined_binaries.values():
add_binary(binary)
for target in unexamined_binaries.keys():
assert target in binaries, (
"Target %s missing from %s" % (target, binaries.keys()))
return iter(binaries.values()), nonbinaries
# Take an iterable of binary_entry, and return list of binary_entry (all
# stripped files), a list of binary_info (all debug files), and a boolean
# saying whether any new stripped output files were written in the process.
def strip_binary_manifest(
manifest, stripped_dir, build_id_dir, toolchain_lib_dirs, examined):
new_output = False
def find_debug_file(filename):
# In the Zircon makefile build, the file to be installed is called
# foo.strip and the unstripped file is called foo. In the new Zircon
# GN build, the file to be installed is called foo and the unstripped
# file is called foo.debug. In the Fuchsia GN build, the file to be
# installed is called foo and the unstripped file has the same name in
# the exe.unstripped or lib.unstripped subdirectory.
if filename.endswith('.strip'):
debugfile = filename[:-6]
elif os.path.exists(filename + '.debug'):
debugfile = filename + '.debug'
else:
# Check for toolchain runtime libraries, which are stored under
# {toolchain}/lib/.../libfoo.so, and whose unstripped file will
# be under {toolchain}/lib/debug/.build-id/xx/xxxxxx.debug.
lib_dir = None
for dir in toolchain_lib_dirs:
if os.path.realpath(filename).startswith(os.path.realpath(dir) +
os.sep):
lib_dir = dir
break
if lib_dir:
build_id_dir = os.path.join(lib_dir, 'debug', '.build-id')
if not os.path.exists(build_id_dir):
return None
info = elfinfo.get_elf_info(filename)
debugfile = os.path.join(
build_id_dir, info.build_id[:2],
info.build_id[2:] + '.debug')
if not os.path.exists(debugfile):
return None
# Pass filename as fallback so we don't fallback to the build-id entry name.
return binary_info(
debugfile, fallback_soname=os.path.basename(filename))
else:
dir, file = os.path.split(filename)
if file.endswith('.so') or '.so.' in file:
subdir = 'lib.unstripped'
else:
subdir = 'exe.unstripped'
debugfile = os.path.join(dir, subdir, file)
while not os.path.exists(debugfile):
# For dir/foo/bar, if dir/foo/exe.unstripped/bar
# didn't exist, try dir/exe.unstripped/foo/bar.
parent, dir = os.path.split(dir)
if not parent or not dir:
return None
dir, file = parent, os.path.join(dir, file)
debugfile = os.path.join(dir, subdir, file)
if not os.path.exists(debugfile):
debugfile = os.path.join(subdir, filename)
if not os.path.exists(debugfile):
return None
debug = binary_info(debugfile)
assert debug, (
"Debug file '%s' for '%s' is invalid" % (debugfile, filename))
examined.add(debugfile)
return debug
# The toolchain-supplied shared libraries are delivered unstripped. For
# these, strip the binary right here and update the manifest entry to point
# to the stripped file.
def make_debug_file(entry, info):
debug = info
stripped = os.path.join(stripped_dir, entry.target)
dir = os.path.dirname(stripped)
if not os.path.isdir(dir):
os.makedirs(dir)
if info.strip(stripped):
new_output = True
info = binary_info(stripped)
assert info, (
"Stripped file '%s' for '%s' is invalid" %
(stripped, debug.filename))
examined.add(debug.filename)
examined.add(stripped)
return entry._replace(source=stripped), info, debug
stripped_manifest = []
debug_list = []
for entry, info in manifest:
assert entry.source == info.filename
if info.stripped:
debug = find_debug_file(info.filename)
else:
entry, info, debug = make_debug_file(entry, info)
stripped_manifest.append(binary_entry(entry, info))
if debug is None:
print('WARNING: no debug file found for %s' % info.filename)
continue
assert not debug.stripped, "'%s' is stripped" % debug.filename
assert info == debug._replace(
filename=info.filename, sizes=info.sizes,
stripped=True), ("Debug file mismatch: %r vs %r" % (info, debug))
if debug.build_id:
debug_list.append(debug)
else:
# Every binary should have a build ID, except for test cases
# specifically testing missing-build-ID or missing-PT_NOTE cases.
# Those will have 'test' in the name.
assert 'test' in os.path.basename(
debug.filename), ("'%s' has no build ID" % debug.filename)
return stripped_manifest, debug_list, new_output
def emit_manifests(args, selected, unselected):
def update_file(file, contents, force=False):
if (not force and os.path.exists(file) and
os.path.getsize(file) == len(contents)):
with open(file, 'r') as f:
if f.read() == contents:
return
with open(file, 'w') as f:
f.write(contents)
# The name of every file we examine to make decisions goes into this set.
examined = set(args.manifest)
# Collect all the inputs and reify.
aux_binaries = collect_auxiliaries(unselected, examined)
binaries, nonbinaries = collect_binaries(selected, aux_binaries, examined)
# Prepare to collate groups.
outputs = [output_manifest(file, []) for file in args.output]
# Finalize the output binaries. If stripping wrote any new/changed files,
# then force an update of the manifest file even if it's identical. The
# manifest file's timestamp is what GN/Ninja sees as running this script
# having touched any of its outputs, and GN/Ninja doesn't know that the
# stripped files are implicit outputs (there's no such thing as a depfile
# for outputs, only for inputs).
binaries, debug_files, force_update = strip_binary_manifest(
binaries, args.stripped_dir, args.build_id_dir, args.toolchain_lib_dir,
examined)
# Collate groups.
for entry in itertools.chain((binary.entry for binary in binaries),
nonbinaries):
outputs[entry.group].manifest.append(entry._replace(group=None))
all_binaries = {binary.info.build_id: binary.entry for binary in binaries}
all_debug_files = {info.build_id: info for info in debug_files}
# Emit each primary manifest.
for output in outputs:
depfile_output = output.file
# Sort so that functionally identical output is textually
# identical.
output.manifest.sort(key=lambda entry: entry.target)
update_file(
output.file, manifest.format_manifest_file(output.manifest),
force_update)
# Emit the depfile.
if args.depfile:
with open(args.depfile, 'w') as f:
f.write(depfile_output + ':')
for file in sorted(examined):
f.write(' ' + file)
f.write('\n')
def parse_args():
parser = argparse.ArgumentParser(
description='''
Massage manifest files from the build to produce images.
''',
epilog='''
The --cwd and --group options apply to subsequent --manifest arguments.
Each input --manifest is assigned to the preceding --output argument file.
Any input --manifest that precedes all --output arguments
just supplies auxiliary files implicitly required by other (later) input
manifests, but does not add all its files to any --output manifest. This
is used for shared libraries and the like.
''',
fromfile_prefix_chars='@')
parser.add_argument(
'--build-id-dir',
required=False,
metavar='DIR',
help='.build-id directory to populate when stripping')
parser.add_argument(
'--toolchain-lib-dir',
default=[],
action='append',
metavar='DIR',
help='Path to a toolchain library directory (multiple allowed)')
parser.add_argument(
'--depfile', metavar='DEPFILE', help='Ninja depfile to write')
parser.add_argument(
'--stripped-dir',
required=True,
metavar='STRIPPED_DIR',
help='Directory to hold stripped copies when needed')
return manifest.common_parse_args(parser)
def main():
args = parse_args()
emit_manifests(args, args.selected, args.unselected)
if __name__ == "__main__":
main()