blob: bb40d835bc40b700393dd7e9e6b84fb8d0d88131 [file] [log] [blame]
#!/usr/bin/env python3.8
# Copyright 2021 The Fuchsia Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Wraps a command so that its outputs are timestamp-fresh only if their contents change.
Every declared output is renamed with a temporary suffix in the command.
If the command succeeds, the temporary file is moved over the original declared output
if the output did not already exist or the contents are different.
This conditional move is done for every declared output that appears in the arguments list.
Assumptions:
Output files are their own token in the command's arguments.
Limitation: This won't attempt to find/rename outputs in "--flag=out1,out2" (yet),
nor outputs whose names appear in some file.
If x is a writeable path (output), then x.any_suffix is also writeable.
Command being wrapped does not change behavior with the name of its output arguments.
If any of the above assumptions do not hold, then bypass wrapping.
"""
import argparse
import os
import shutil
import subprocess
import sys
from typing import Any, Callable, Collection, Dict, FrozenSet, Iterable, Sequence, Tuple
import dataclasses
def _partition(
iterable: Iterable[Any],
predicate: Callable[[Any],
bool]) -> Tuple[Sequence[Any], Sequence[Any]]:
"""Splits sequence into two sequences based on predicate function."""
trues = []
falses = []
for item in iterable:
if predicate(item):
trues.append(item)
else:
falses.append(item)
return trues, falses
def files_match(file1: str, file2: str):
"""`diff`s two files, returns True if they both exist and match."""
# Silence "Files x and y differ" message.
# TODO(fangism): can use faster diff-ing strategies, e.g. file size
return subprocess.call(
["diff", "-q", file1, file2],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL) == 0
def move_if_different(src: str, dest: str, verbose: bool = False):
if not os.path.exists(src):
# Then original command failed to produce this file.
return
if not os.path.exists(dest) or not files_match(dest, src):
if verbose:
print(f" === Updated: {dest}")
shutil.move(src, dest)
else:
if verbose:
print(f" === Cached: {dest}")
os.remove(src)
@dataclasses.dataclass
class TempFileTransform(object):
"""Represents a file name transform.
temp_dir: Write temporary files in here.
If blank, paths are relative to working directory.
suffix: Add this suffix to temporary files, e.g. ".tmp".
At least temp_dir or suffix must be non-blank.
"""
temp_dir: str = ""
suffix: str = ""
@property
def valid(self):
return self.temp_dir or self.suffix
def transform(self, path: str) -> str:
return os.path.join(self.temp_dir, path + self.suffix)
def replace_tokens(
command: Iterable[str],
transform: Callable[[str],
str]) -> Tuple[Sequence[str], Dict[str, str]]:
"""Substitutes command tokens with a transformation.
Args:
command: sequence of command tokens, some of which may be substituted.
transform: function to substituted command tokens.
Returns:
modified command (some tokens replaced),
dictionary of text substitutions made (original, new)
"""
renamed_tokens = {}
def replace_token(arg: str):
# TODO(fangism): lex a single arg into tokens, substitute, re-assemble
# This would support outputs like "--flag=out1,out2..."
new_arg = transform(arg)
if new_arg != arg:
# record any substitutions made
renamed_tokens[arg] = new_arg
return new_arg
substituted_command = [replace_token(token) for token in command]
return substituted_command, renamed_tokens
@dataclasses.dataclass
class Action(object):
"""Represents a set of parameters of a single build action."""
command: Sequence[str] = dataclasses.field(default_factory=list)
outputs: FrozenSet[str] = dataclasses.field(default_factory=set)
label: str = ""
def run_cached(
self,
tempfile_transform: TempFileTransform,
verbose: bool = False) -> int:
"""Runs a modified command and conditionally moves outputs in-place.
Args:
tempfile_transform: describes transformation to temporary file name.
verbose: If True, print substituted command.
"""
def replace_arg(arg: str):
if arg in self.outputs:
return tempfile_transform.transform(arg)
else:
return arg
# Rename output arguments.
# renamed_outputs: keys: original file names, values: transformed temporary file names
substituted_command, renamed_outputs = replace_tokens(
self.command, replace_arg)
if verbose:
cmd_str = " ".join(substituted_command)
print(f"=== substituted command: {cmd_str}")
# mkdir when needed.
if tempfile_transform.temp_dir:
for new_arg in renamed_outputs.values():
os.makedirs(os.path.dirname(new_arg), exist_ok=True)
# Run the modified command.
retval = subprocess.call(substituted_command)
if retval != 0:
# Option: clean-up .tmp files or leave them for inspection
return retval
# Otherwise command succeeded, so conditionally move outputs in-place.
# TODO(fangism): This loop could be parallelized.
for orig_out, temp_out in renamed_outputs.items():
move_if_different(src=temp_out, dest=orig_out, verbose=verbose)
if verbose:
unrenamed_outputs = self.outputs - set(renamed_outputs.keys())
if unrenamed_outputs:
# Having un-renamed outputs is not an error, but rather an indicator
# of a potentially missed opportunity to cache unchanged outputs.
unrenamed_formatted = " ".join(unrenamed_outputs)
print(f" === Un-renamed outputs: {unrenamed_formatted}")
return 0
def run_twice_and_compare_outputs(
self,
tempfile_transform: TempFileTransform,
verbose: bool = False) -> int:
"""Runs a command twice, copying declared outputs in between.
Compare both sets of outputs, and error out if any differ.
The advantage of this variant over others is that it eliminates
output-path sensitivities by running the *same* command twice.
One possible disadvantage is that this may expose behavioral
differences due to the non/pre-existence of outputs ahead of
running the command.
Args:
tempfile_transform: used to rename backup copies of outputs.
verbose: if True, print more diagnostics.
Returns:
exit code 0 on command success and outputs match, else nonzero.
"""
# Run the command the first time.
retval = subprocess.call(self.command)
# If the command failed, skip re-running.
if retval != 0:
return retval
# Backup a copy of all declared outputs.
renamed_outputs = {}
for out in self.outputs:
# TODO(fangism): what do we do about symlinks?
# TODO(fangism): An output *directory* is unexpected, coming from GN,
# but has been observed. For now skip it.
if os.path.isfile(out):
renamed_outputs[out] = tempfile_transform.transform(out)
# A nonexistent output would be caught by action_tracer.py.
for out, backup in renamed_outputs.items():
if tempfile_transform.temp_dir:
os.makedirs(os.path.dirname(backup), exist_ok=True)
# preserve metadata such as timestamp
shutil.copy2(out, backup, follow_symlinks=False)
rerun_retval = subprocess.call(self.command)
if rerun_retval != 0:
print(
f"""Re-run of command {self.command} failed, while first time succeeded!?"""
)
return rerun_retval
return verify_files_match(fileset=renamed_outputs, label=self.label)
def run_twice_with_substitution_and_compare_outputs(
self,
tempfile_transform: TempFileTransform,
verbose: bool = False) -> int:
"""Runs a command twice, the second time with renamed outputs, and compares.
Caveat: If the contents if the outputs are sensitive to the names of the
outputs, this will find too many differences.
Args:
tempfile_transform: used to rename backup copies of outputs.
verbose: if True, print more diagnostics.
Returns:
exit code 0 on command success and outputs match, else nonzero.
"""
def replace_arg(arg: str):
if arg in self.outputs:
return tempfile_transform.transform(arg)
else:
return arg
# Rename output arguments.
# renamed_outputs: keys: original file names, values: transformed temporary file names
substituted_command, renamed_outputs = replace_tokens(
self.command, replace_arg)
if verbose:
cmd_str = " ".join(substituted_command)
print(f"=== substituted command: {cmd_str}")
# mkdir when needed.
if tempfile_transform.temp_dir:
for new_arg in renamed_outputs.values():
os.makedirs(os.path.dirname(new_arg), exist_ok=True)
# Run the original command.
retval = subprocess.call(self.command)
# If the command failed, skip re-running.
if retval != 0:
return retval
# Otherwise command succeeded, re-run with different output locations.
rerun_retval = subprocess.call(substituted_command)
if rerun_retval != 0:
print(
f"Re-run failed with substituted outputs of target [{self.label}]: {substituted_command}"
)
return rerun_retval
return verify_files_match(fileset=renamed_outputs, label=self.label)
def verify_files_match(fileset: Dict[str, str], label: str) -> int:
"""Compare outputs and report differences. Remove matching copies.
Args:
fileset: {file: backup} key-value pairs of files to compare.
Backup files that match are removed to save space, while the .keys()
files are kept.
label: An identifier for the action that was run, for diagnostics.
Returns:
exit code 0 if all files matched, else 1.
"""
matching_files, different_files = _partition(
fileset.items(),
# If either file is missing, this will fail, which indicates that
# something is not working as expected.
lambda pair: files_match(pair[0], pair[1]))
# Remove any files that matched to save space.
for _, temp_out in matching_files:
os.remove(temp_out)
if different_files:
print(
f"Repeating command for target [{label}] with renamed outputs produces different results:"
)
for orig, temp in different_files:
print(f" {orig} vs. {temp}")
# Keep around different outputs for analysis.
# Note: Even though the original command succeeded, forcing this to
# fail may influence tools that examine the freshness of outputs
# relative to the last succeeded command.
return 1
return 0
def main_arg_parser() -> argparse.ArgumentParser:
"""Construct the argument parser, called by main()."""
parser = argparse.ArgumentParser(
description="Wraps a GN action to preserve unchanged outputs",
argument_default=[],
)
# label is only used for diagnostics
parser.add_argument(
"--label",
type=str,
default="",
help="The wrapped target's label",
)
parser.add_argument(
"--outputs", nargs="*", help="An action's declared outputs")
parser.add_argument(
"--depfile", help="A depfile that is written by the command")
parser.add_argument(
"--temp-suffix",
type=str,
default=".tmp",
help="Suffix to use for temporary outputs",
)
parser.add_argument(
"--temp-dir",
type=str,
default="",
help=
"Temporary directory for writing, can be relative to working directory or absolute.",
)
parser.add_argument(
"--verbose",
action="store_true",
default=False,
help="Print information about which outputs were renamed/cached.",
)
parser.add_argument(
"--disable",
action="store_false",
dest="enable",
default=True,
help="If disabled, run the original command as-is.",
)
parser.add_argument(
"--check-repeatability",
action="store_true",
default=False,
help=
"Check for repeatability: run the command twice, with different outputs, and compare.",
)
parser.add_argument(
"--rename-outputs",
action="store_true",
default=False,
help=
"When checking for repeatability: rename command-line outputs on the second run.",
)
# Positional args are the command and arguments to run.
parser.add_argument("command", nargs="*", help="The command to run")
return parser
def main():
parser = main_arg_parser()
args = parser.parse_args()
tempfile_transform = TempFileTransform(
temp_dir=args.temp_dir,
suffix=args.temp_suffix,
)
if not tempfile_transform.valid:
raise ValueError(
"Need either --temp-dir or --temp-suffix, but both are missing.")
wrap = args.enable
# Decided whether or not to wrap the action script.
ignored_scripts = {
# If the action is only copying or linking, don't bother wrapping.
"ln",
"cp", # TODO: Could conditionally copy if different.
"rsync",
}
script = args.command[0]
if os.path.basename(script) in ignored_scripts:
wrap = False
# If disabled, run the original command as-is.
if not wrap:
return subprocess.call(args.command)
# Otherwise, rewrite the command using temporary outputs.
outputs = set(args.outputs)
if args.depfile:
outputs.add(args.depfile)
# Run a modified command that can leave unchanged outputs untouched.
action = Action(
command=args.command,
outputs=outputs,
label=args.label,
)
# Run one of the following modes:
# check_repeatability: run the command twice, and compare the outputs.
# [default]: redirect outputs to temporary locations, and move them
# in-place to their original locations if contents have not changed.
if args.check_repeatability:
if args.rename_outputs:
# This check variant will find path-sensitive outputs,
# and nondeterminstic outputs.
return action.run_twice_with_substitution_and_compare_outputs(
tempfile_transform=tempfile_transform, verbose=args.verbose)
else:
# This check will only find nondeterministic outputs.
# For example, those affected by the current time.
return action.run_twice_and_compare_outputs(
tempfile_transform=tempfile_transform, verbose=args.verbose)
return action.run_cached(
tempfile_transform=tempfile_transform, verbose=args.verbose)
if __name__ == "__main__":
sys.exit(main())