blob: 2df080c19d981f112ce98850eed687c8c199c059 [file] [log] [blame]
#!/bin/bash
# Copyright 2021 The Fuchsia Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
function usage() {
cat <<EOF
usage: $0 DIR1 DIR2 [RELPATH]
Compares two build directories for artifact differences.
Comparison logic tries to account for the file type in
choosing a diff strategy.
If RELPATH is provided, then compare DIR1/RELPATH vs. DIR2/RELPATH.
The joined path may reference a directory or file.
Run this from the Fuchsia source checkout root dir ($FUCHSIA_DIR),
because it references some tools from the source tree.
Example: Compare two clean builds with same output dir:
fx set ...
fx clean-build
cp -p -r out/default out/default.bkp
fx clean-build
cp -p -r out/default out/default.bkp2
$0 out/default.bkp out/default.bkp2
EOF
}
readonly script="$0"
# assume script is always with path prefix, e.g. "./$script"
readonly script_dir="${script%/*}" # dirname
readonly script_basename="${script##*/}" # basename
source "$script_dir"/../../build/rbe/common-setup.sh
project_root="$default_project_root"
# json formatting (as pipe): jq . < stdin > stdout
readonly jq="$project_root"/prebuilt/third_party/jq/"$HOST_PLATFORM"/bin/jq
function json_format() {
"$jq" . < "$1" > "$1".formatted
}
# GLOBAL MUTABLES
# Accumulate paths to unexpected differences here.
# bash: array variables are not POSIX
unexpected_diffs=()
unexpected_matches=()
# These files match or differ, but we didn't know what to expect.
unclassified_diffs=()
unclassified_matches=()
# These files were skipped.
# It might be useful to compare these after ALL
# other differences have been eliminated.
skipped_files=()
# Ignored files are unimportant will never be compared.
ignored_files=()
unspecified_files=()
function diff_json() {
json_format "$1" || { echo "Failed to format $1" ; return 1;}
json_format "$2" || { echo "Failed to format $2" ; return 1;}
diff -u "$1".formatted "$2".formatted
# return with the exit code of diff
}
function diff_text() {
diff -u "$1" "$2"
}
function diff_binary() {
diff -q "$1" "$2"
}
function exe_unstripped_expect() {
# Print "diff/match/unknown" depending on whether this file is known to be
# reproducible across identical builds.
local path="$1"
# e.g. exe.unstripped/tool_name
base="$(basename "$path")"
case "$base" in
archivist* | \
base_resolver | \
cobalt | \
cobalt_app_unittests | \
codec_runner_sw_aac | \
component_manager | \
component_manager_boot_env | \
component_manager_test)
# TODO(fangism): add many more cases of known diffs
echo "diff" ;;
*) echo "match" ;;
esac
}
function diff_file_relpath() {
# $1 is left dir
# $2 is right dir
# $3 is a relative path under both dirs, and is itself not a directory.
# This file name is reported in diagnostics.
# one could also use an all-inclusive diff tool like https://diffoscope.org/
local left="$1/$3"
local right="$2/$3"
local common_path="$3"
filebase="$(basename "$common_path")"
# TODO(fangism): Some files are stored as blobs so content differences
# appear as filename entry differences. Skip these. Perhaps silently?
# Testing -r also allows for symlinks.
test -r "$left" || {
printf "%s does not exist\n" "$left"
return
}
test -r "$right" || {
echo "%s does not exist\n" "$right"
return
}
expect=""
# Classify each category of files with expectations in each case below:
# expect={diff,match,unknown,ignore,skip}; diff...
# Leave blank for ignored files (not compared).
# "unknown" means unclassified and could contain a mix of matches/diffs.
# Goal:
# * Identify and classify known differences (eliminate unknowns).
# * Gradually reduce sources of differences.
#
# TODO(fangism): different expectations in clean-vs-clean /
# clean-vs-incremental modes.
case "$filebase" in
# The exit status of this case statement will be used to determine
# whether or not the given file is an erroneous diff.
#
# Generally:
# diff_text for text files that are expected to match
# diff_binary for binaries or known large textual differences
# depfiles
*.d) expect=match; diff_text "$left" "$right" ;;
# C++ object files (binary)
# Nondeterminism due to __TIME__-stamping has been eliminated
# and is continuously verified by another builder.
*.o) expect=match; diff_binary "$left" "$right"
# TODO(fangism): compare objdumps for details
# See build/rbe/detail-diff.sh.
;;
*.so) expect=match; diff_binary "$left" "$right" ;;
*.dylib) expect=match; diff_binary "$left" "$right" ;;
*.a) expect=match; diff_binary "$left" "$right" ;;
# Rust libraries (binary)
# There may be unexpected rmeta mismatches.
# See http//https://fxbug.dev/42079524, https://github.com/rust-lang/rust/issues/113584
*.rlib) expect=match; diff_binary "$left" "$right" ;;
*.rmeta) expect=match; diff_binary "$left" "$right" ;;
# Generated code
*.rs)
case "$common_path" in
gen/src/*/qmi-protocol.rs)
expect=diff ;; # ordering diff
*) expect=match ;;
esac
diff_text "$left" "$right"
;;
*.vbmeta) expect=unknown; diff_binary "$left" "$right" ;;
memory_metrics_registry.cb.h)
expect=diff; diff_text "$left" "$right" ;; # ordering diff
# The following groups of files have known huge diffs,
# so omit details from the general report, and diff_binary.
meta.far) expect=unknown; diff_binary "$left" "$right" ;;
contents) expect=unknown; diff_binary "$left" "$right" ;;
all_blobs.json) expect=skip ;; # too big right now
blobs.json) expect=diff; diff_binary "$left" "$right" ;;
# Many of blobs.json do actually match
blob.manifest) expect=diff; diff_binary "$left" "$right" ;; # many hashes
blobs.manifest) expect=unknown; diff_binary "$left" "$right" ;;
package_manifest.json) expect=unknown; diff_binary "$left" "$right" ;;
targets.json)
case "$common_path" in
gen/gopaths/*) expect=match; diff_json "$left" "$right" ;;
amber-files/repository/targets.json) expect=skip ;; # too big right now
*) expect=diff ;; # diffs: many hashes
esac
;;
*.targets.json) expect=skip ;; # diffs: sig, expires
outputs.json) expect=skip ;; # ordering differences from zircon/tools/zither
snapshot.json) expect=skip ;; # diffs: sig, expires, version (use diff_json)
*.root.json) expect=skip ;; # diffs: sig, expires
*.snapshot.json) expect=skip ;; # diffs: sig, expires, version
timestamp.json) expect=ignore ;; # diffs: sig, expires, version
elf_sizes.json) expect=diff; diff_json "$left" "$right" ;; # diffs: build_id
recovery-eng_blobs.json) expect=diff; diff_json "$left" "$right" ;; # diffs: bytes, merkle, size (ordering)
*.zbi.json) expect=unknown; diff_json "$left" "$right" ;; # diffs: crc32, size
update_packages.manifest.json) expect=diff; diff_json "$left" "$right" ;; # hashes
images.json) expect=skip ;; # too many diffs
transfer.json) expect=skip ;; # look like list of blob hashes
compile_commands.json) expect=skip ;; # too many
# printed types contain memory addresses
test.stringarrays.api_summary.json ) expect=ignore ;;
test.experimentalzxctypes.api_summary.json) expect=ignore ;;
docs.goldens.json)
# in fuchsia-idk-build-*/gen/zircon/vdso
# looks like ordering differences
expect=ignore ;;
# Diff formatted JSON for readability.
*.json)
case "$common_path" in
host-tools/goroot/src/cmd/interna/test2json/testdata/*.json)
expect=unknown ;;
*) expect=match ;;
esac
diff_json "$left" "$right"
;;
*.json.formatted) expect=ignore ;; # This is remant from an earlier diff.
# recovery things
recovery-eng_additional_boot_args.txt) expect=diff; diff_text "$left" "$right" ;; # hashes
recovery-eng_pkgsvr_index) expect=diff; diff_text "$left" "$right" ;; # hashes
update_packages.manifest) expect=diff; diff_text "$left" "$right" ;; # hashes
*.image_assembly_inputs) expect=skip ;; # too many blob/hash differences
*.image_assembler_all_inputs) expect=skip ;; # too many blob/hash differences
manifest.fini) expect=skip ;; # looks like ordering differences
# Bazel-related outputs
java.log)
case "$common_path" in
gen/build/bazel/*/java.log) expect=ignore ;; # log with timestamps
*) expect=unknown ;;
esac
;;
lock)
case "$common_path" in
gen/build/bazel/*/lock) expect=ignore ;; # contains PID
*) expect=unknown ;;
esac
;;
command_port)
case "$common_path" in
gen/build/bazel/*/command_port) expect=ignore ;; # randomly chosen port
*) expect=unknown ;;
esac
;;
request_cookie)
case "$common_path" in
gen/build/bazel/*/request_cookie) expect=ignore ;; # looks like hash
*) expect=unknown ;;
esac
;;
response_cookie)
case "$common_path" in
gen/build/bazel/*/response_cookie) expect=ignore ;; # looks like hash
*) expect=unknown ;;
esac
;;
server_info.rawproto)
case "$common_path" in
gen/build/bazel/*/server_info.rawproto) expect=ignore ;; # probably PID
*) expect=unknown ;;
esac
;;
server.starttime) expect=ignore ;; # timestamp
server.pid.txt) expect=ignore ;; # contains PID
command.log)
case "$common_path" in
gen/build/bazel/*/command.log) expect=ignore ;; # subject to build parallelism and completion ordering
*) expect=unknown ;;
esac
;;
ffx.log)
case "$common_path" in
gen/build/bazel/*/cache/logs/ffx.log) expect=ignore ;; # log with timestamps
*) expect=unknown ;;
esac
;;
volatile-status.txt) expect=ignore ;; # bears timestamp
test.cache_status)
case "$common_path" in
bazel-out/*/test.cache_status) expect=diff; diff_binary "$left" "$right" ;;
*) expect=unknown ;;
esac
;;
test.log)
case "$common_path" in
bazel-out/*/test.log) expect=ignore ;; # reports test time duration
*) expect=unknown ;;
esac
;;
test.xml)
case "$common_path" in
bazel-out/*/test.xml) expect=ignore ;; # reports test time duration
*) expect=unknown ;;
esac
;;
action_cache_v15.blaze) expect=ignore ;;
filename_index_v15.blaze) expect=ignore ;;
command.profile.gz) expect=ignore ;;
workspace-events.log*) expect=ignore ;;
# numbered bazel action logs (unordered)
stdout-*) expect=ignore ;;
stderr-*) expect=ignore ;;
# Various binaries.
*.blk) expect=unknown; diff_binary "$left" "$right" ;;
*.vboot) expect=unknown; diff_binary "$left" "$right" ;;
*.zbi) expect=unknown; diff_binary "$left" "$right" ;;
# Most archives carry timestamp information of their contents.
# One way to make this reproducible is to force a magic date/time
# while archiving, which effectively removes time variance.
*.tar | *.tar.gz | *.tgz | *.pyz | *.zip) expect=diff; diff_binary "$left" "$right" ;;
# Ignore ninja logs, as they bear timestamps,
# and are non-essential build artifacts.
.ninja.log) expect=ignore ;;
# Ignore filesystem access trace files (fsatrace logs).
# They may contain nondeterministic paths to /proc/PID
*_trace.txt) expect=ignore ;;
# like exe.unstripped/*.map files
# .map files are side-effect outputs of linking binaries, and not consumed
# anywhere else important.
# Many of these (but not all) reference mktemp paths (Rust toolchain).
*.map) expect=ignore ;;
# Ignore stamp files.
*.stamp) expect=ignore ;;
# Ignore temporary and backup files.
*.tmp) expect=ignore ;;
*.bak) expect=ignore ;;
*.bkp) expect=ignore ;;
# All others.
# Binary files diffs will still only be reported tersely.
*)
file_type="$(file "$left" | head -n 1 | cut -d: -f2-)"
case "$file_type" in
# Binary examples:
# ELF 64-bit LSB pie executable, x86-64, version 1 (SYSV),... dynamically linked, interpreter ld.so.1, no section header
# Mach-O universal binary with 2 architectures: [x86_64:Mach-O 64-bit executable x86_64] [arm64e:Mach-O 64-bit executable arm64e]
*ELF*executable* | *Mach-O*binary*)
case "$common_path" in
exe.unstripped/*) expect="$(exe_unstripped_expect "$common_path")" ;;
*) expect=unknown ;;
esac
diff_binary "$left" "$right"
;;
# Assume non-binaries are text.
*) expect=match; diff_text "$left" "$right" ;;
esac
esac
# Record unexpected and unclassified differences/matches.
diff_status="$?"
case "$expect" in
match)
test "$diff_status" = 0 || unexpected_diffs+=("$common_path") ;;
diff)
test "$diff_status" != 0 || unexpected_matches+=("$common_path") ;;
unknown)
if test "$diff_status" = 0
then unclassified_matches+=("$common_path")
else unclassified_diffs+=("$common_path")
fi
;;
ignore) ignored_files+=("$common_path") ;;
skip) skipped_files+=("$common_path") ;;
*) unspecified_files+=("$common_path") ;;
esac
}
function diff_select() {
# $1 and $2 are two directories, e.g. "out/default"
# $3 is the relative path down from $1 and $2, e.g. "subdir/" or "".
local relpath="$3"
local fullpath="$1/$3"
if test -d "$fullpath"
then diff_dir_recursive "$1" "$2" "$relpath/"
# TODO(fangism): what about test -L for symlinks?
else diff_file_relpath "$1" "$2" "$relpath"
fi
}
function diff_dir_recursive() {
# $1 and $2 are two directories, e.g. "out/default"
# $3 is the relative path down from $1 and $2, e.g. "subdir/" or "".
# For dual-traversal, arbitrarily use $2's subdirs.
# echo "Comparing: $sub"
local sub="$3" # sub-dir or file
# Ignore some dirs.
case "$sub" in
# Ignore files whose names are content-hash like.
amber-files/repository/blobs/) return ;;
amber-files/repository/targets/) return ;;
# bazel runfiles dirs use absolute symlinks, so ignore those
*.runfiles) return ;;
*) ;; # continue
esac
# Silence empty dirs.
if ! ls "$2/$sub"*
then return # empty dir
fi > /dev/null 2>&1
for f in "$2/$sub"*
do
filebase="$(basename "$f")"
relpath="$sub$filebase"
diff_select "$1" "$2" "$relpath"
done
}
test "$#" -ge 2 || { usage; exit 2; }
if test "$#" = 3
then diff_select "$1" "$2" "$3"
else diff_dir_recursive "$1" "$2" ""
fi
# Summarize findings:
echo "======== COMPARISON SUMMARY ========"
exit_status=0
if test "${#unexpected_diffs[@]}" != 0
then
echo "UNEXPECTED DIFFS: (action: fix source of difference)"
for path in "${unexpected_diffs[@]}"
do echo " $path"
done
echo "end of UNEXPECTED DIFFS"
echo
exit_status=1
fi
# Good news: these files matched
if test "${#unexpected_matches[@]}" != 0
then
echo "UNEXPECTED MATCHES: (action: make these expect=match now?)"
for path in "${unexpected_matches[@]}"
do echo " $path"
done
echo "end of UNEXPECTED MATCHES"
echo
exit_status=1
fi
# This group mismatched, but we didn't know what to expect.
if test "${#unclassified_diffs[@]}" != 0
then
echo "UNCLASSIFIED DIFFS: (action: classify them as expect=diff)"
for path in "${unclassified_diffs[@]}"
do echo " $path"
done
echo "end of UNCLASSIFIED DIFFS"
echo
# Leave exit status as it were.
fi
# This group matched, but we didn't know what to expect.
if test "${#unclassified_matches[@]}" != 0
then
echo "UNCLASSIFIED MATCHES: (action: classify them as expect=match)"
for path in "${unclassified_matches[@]}"
do echo " $path"
done
echo "end of UNCLASSIFIED MATCHES"
echo
# Leave exit status as it were.
fi
if test "${#skipped_files[@]}" != 0
then
echo "SKIPPED FILES: (action: compare these after all other differences have been resolved)"
for path in "${skipped_files[@]}"
do echo " $path"
done
echo "end of SKIPPED FILES"
echo
# Leave exit status as it were.
fi
# Don't bother reporting "${ignored_files[@]}", the list is long and not useful.
# Make sure all cases are covered.
if test "${#unspecified_files[@]}" != 0
then
echo "UNSPECIFIED FILES: (action: classify into one of the above categories)"
for path in "${unspecified_files[@]}"
do echo " $path"
done
echo "end of UNSPECIFIED FILES"
echo
exit_status=1
fi
echo "Exiting with status $exit_status"
exit "$exit_status"