blob: 441ff1a61f9f2613b5e3f97106432e85fa8700d3 [file] [log] [blame]
# Copyright 2026 The Fuchsia Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Provides a way to map Bazel labels to file bazel_paths"""
import os
import sys
from typing import Iterable
_SCRIPT_DIR = os.path.dirname(__file__)
sys.path.insert(0, _SCRIPT_DIR)
import build_utils
import workspace_utils
# Set this to True to debug operations locally in this script.
_DEBUG = False
def debug(msg: str) -> None:
# Print debug message to stderr if _DEBUG is True.
if _DEBUG:
print("BAZEL_ACTION_DEBUG: " + msg, file=sys.stderr)
# A list of built-in Bazel workspaces like @bazel_tools// which are actually
# stored in the prebuilt Bazel install_base directory with a timestamp *far* in
# the future, e.g. 2033-01-01. This is a hack that Bazel uses to determine when
# its install base has changed unexpectedly.
#
# If any file from these directories are listed in a depfile, they will force
# rebuilds on *every* Ninja invocations, because the tool will consider that
# the outputs are always older (as 2022 < 2033).
#
# This list is thus used to remove these from depfile inputs. Given that the
# files are part of Bazel's installation, their content would hardly change
# between build invocations anyway, so this is safe.
#
_BAZEL_BUILTIN_REPOSITORIES = (
"bazel_tools",
"bazel_features_globals",
"bazel_features_version",
"local_config_cc",
"local_config_platform",
# The two repositories below were added by Bazel 7.2.
"host_platform",
"internal_platforms_do_not_use",
# Created and used internally by @rules_python
"rules_python_internal",
# Introduced by bzlmod
"bazel_skylib",
"bazel_skylib+",
"platforms",
"rules_cc",
"rules_license",
"rules_license+",
"rules_python",
"rules_python+",
"pythons_hub", # A sub-repo created by rule_python+
"rules_rust",
"io_bazel_rules_go",
# Created from in-tree top-level module
"fuchsia_sdk_common",
)
# A list of file extensions for files that should be ignored from depfiles.
_IGNORED_FILE_SUFFIXES = (
# .pyc files contain their own timestamp which does not necessarily match
# their file timestamp, triggering the python interpreter to rewrite them
# more or less randomly. Apart from that, their content always matches the
# corresponding '.py' file which will always be listed as a real input,
# so ignoring them is always safe.
#
# See https://stackoverflow.com/questions/23775760/how-does-the-python-interpreter-know-when-to-compile-and-update-a-pyc-file
#
".pyc",
)
# A set of labels that should be ignored from depfiles.
_IGNORED_LABELS = {
# These files do not exist. They are returned by the cquery due to an
# unfortunate sequence of events.
#
# See details in https://fxbug.dev/434864899.
"//third_party/rust_crates/vendor/ansi_term-0.12.1:LICENSE",
"//third_party/rust_crates/vendor/nu-ansi-term-0.46.0:LICENSE",
"//third_party/rust_crates/vendor/remove_dir_all-0.5.3:LICENSE",
# Internal targets for building Go SDK from rules_go.
"@@rules_go++go_sdk+io_bazel_rules_nogo//:BUILD.bazel",
"@@rules_go++go_sdk+io_bazel_rules_nogo//:scope.bzl",
"@@rules_go++go_sdk+main___wrap_0//:ROOT",
}
# A list of external repository names which do not require a hash content file
# I.e. their implementation should already record the right dependencies to
# their input files.
_BAZEL_NO_CONTENT_HASH_REPOSITORIES = (
"fuchsia_build_config",
"fuchsia_build_info",
"fuchsia_prebuilt_rust",
"gn_targets",
)
# Maps from apparent repo names to canonical repo names.
#
# This dictionary is created for determining the paths for external
# repositories, which use canonical repo names as directory names.
_APPARENT_REPO_NAME_TO_CANONICAL = {
"fuchsia_prebuilt_rust": "+_repo_rules+fuchsia_prebuilt_rust",
}
class BazelLabelMapper(object):
"""Provides a way to map Bazel labels to file bazel_paths.
Usage is:
1) Create instance, passing the path to the Bazel workspace.
2) Call source_label_to_path(<label>) where the label comes from
a query.
"""
def __init__(self, bazel_workspace: str, output_dir: str):
# Get the $OUTPUT_BASE/external directory from the $WORKSPACE_DIR,
# the following only works in the context of the Fuchsia platform build
# because the workspace/ and output_base/ directories are always
# parallel entries in the $BAZEL_TOPDIR.
#
# Another way to get it is to call `bazel info output_base` and append
# /external to the result, but this would slow down every call to this
# script, and is not worth it for now.
#
self._root_workspace = os.path.abspath(bazel_workspace)
self._output_dir = os.path.abspath(output_dir)
output_base = os.path.normpath(
os.path.join(bazel_workspace, "..", "output_base")
)
self._output_base = output_base
assert os.path.isdir(output_base), f"Missing directory: {output_base}"
self._external_dir_prefix = (
os.path.realpath(os.path.join(output_base, "external")) + "/"
)
# Some repositories have generated files that are associated with
# a content hash file generated by //build/regenerator.py. This map is
# used to return the path to such file if it exists, or an empty
# string otherwise.
self._repository_hash_map: dict[str, str] = {}
def _get_repository_content_hash(self, repository_name: str) -> str:
"""Check whether a repository name has an associated content hash file.
Args:
repository_name: Bazel repository name, must start with an @,
e.g. '@foo' or '@@foo.1.0'
Returns:
If the corresponding repository has a content hash file, return
its path. Otherwise, return an empty string.
"""
# TODO(jayzhuang): Refine the logic for bazel_gn_target_action.py
# incremental builds.
#
# Use the innermost repository name for finding content hash file.
#
# The call here is unfortunately a bit awkward given how these helper
# functions are defined. We are planning on overhauling the logic for
# incremental builds so leaving it as-is for now.
repository_name = "@" + workspace_utils.innermost_repository_name(
f"{repository_name}//:root"
)
hash_file = self._repository_hash_map.get(repository_name, None)
if hash_file is None:
# Canonical names like @@foo.<version> need to be converted to just `foo` here.
file_prefix = repository_name[1:]
if file_prefix.startswith("@"):
name, dot, version = file_prefix[1:].partition(".")
if dot == ".":
file_prefix = name
else:
# No version, get rid of initial @@
file_prefix = file_prefix[1:]
# First look into $BUILD_DIR/regenerator_outputs/bazel_content_hashes/
# then into $WORKSPACE/fuchsia_build_generated/ which should contain
# symlinks to Ninja-generated content hashes.
hash_file = os.path.join(
self._output_dir,
"regenerator_outputs",
"bazel_content_hashes",
file_prefix + ".hash",
)
if not os.path.exists(hash_file):
# LINT.IfChange(fuchsia_build_generated_hashes)
hash_file = os.path.join(
self._root_workspace,
"fuchsia_build_generated",
file_prefix + ".hash",
)
if not os.path.exists(hash_file):
hash_file = ""
# LINT.ThenChange(//build/bazel/scripts/workspace_utils.py)
self._repository_hash_map[repository_name] = hash_file
return hash_file
def source_label_to_path(
self, label: str, relative_to: str | None = None
) -> str:
"""Convert a Bazel label to a source file into the corresponding file path.
Args:
label: A fully formed Bazel label, as return by a query. If BzlMod is
enabled, this expects canonical repository names to be present
(e.g. '@foo.12//src/lib:foo.cc' and no '@foo//src/lib:foo.cc').
relative_to: Optional directory path string.
Returns:
If relative_to is None, the absolute path to the corresponding source
file, otherwise, the same path relative to `relative_to`.
This returns an empty string if the file should be ignored, i.e.
not added to the depfile.
"""
#
# NOTE: Only the following input label formats are supported
#
# //<package>:<target>
# @//<package>:<target>
# @<name>//<package>:<target>
# @@<name>//<package>:<target>
# @@<name>.<version>//<package>:<target>
#
repository, sep, package_label = label.partition("//")
assert sep == "//", f"Missing // in source label: {label}"
if repository == "" or repository == "@":
# @// references the root project workspace, it should normally
# not appear in queries, but handle it here just in case.
#
# // references a path relative to the current workspace, but the
# queries are always performed from the root project workspace, so
# is equivalent to @// for this function.
repository_dir = self._root_workspace
from_external_repository = False
else:
# A note on canonical repository directory names.
#
# An external repository named 'foo' in the project's WORKSPACE.bazel
# file will be stored under `$OUTPUT_BASE/external/foo` when BzlMod
# is not enabled.
#
# However, it will be stored under `$OUTPUT_BASE/external/@foo.<version>`
# instead when BzlMod is enabled, where <version> is determined statically
# by Bazel at startup after resolving the dependencies expressed from
# the project's MODULE.bazel file.
#
# It is not possible to guess <version> here but queries will always
# return labels for items in the repository that look like:
#
# @@foo.<version>//...
#
# This is called a "canonical label", this allows the project to use
# @foo to reference the repository in its own BUILD.bazel files, while
# a dependency module would call it @com_acme_foo instead. All three
# labels will point to the same location.
#
# Queries always return canonical labels, so removing the initial @
# and the trailing // allows us to get the correct repository directory
# in all cases.
assert repository.startswith(
"@"
), f"Invalid repository name in source label {label}"
# @@ is used with canonical repo names, so remove both @@ and @.
repository_name = repository.removeprefix("@@").removeprefix("@")
repository_dir = (
self._external_dir_prefix
+ _APPARENT_REPO_NAME_TO_CANONICAL.get(
repository_name, repository_name
)
)
from_external_repository = True
package, colon, target = package_label.partition(":")
assert colon == ":", f"Missing colon in source label {label}"
path = os.path.join(repository_dir, package, target)
# Check whether this path is a symlink to something else.
# Use os.path.realpath() which always return an absolute path
# after resolving all symlinks to their final destination, then
# compare it with os.path.abspath(path):
real_path = os.path.realpath(path)
if real_path != os.path.abspath(path):
# This is symlink, so first resolve it to its destination.
path = real_path
# If the symlink points to another external repository, try
# to find a content hash file for it, or return an empty path.
if path.startswith(self._external_dir_prefix):
# This path points to generated files in another Bazel external
# repository. Check if the latter has a content hash file, or
# return an empty path.
repo_path = path[len(self._external_dir_prefix) :]
repo_name, sep, _ = repo_path.partition("/")
assert (
sep == "/"
), f"Unexpected external repository path: external/{repo_path} (from {label})"
path = self._get_repository_content_hash("@" + repo_name)
elif from_external_repository:
# This is generated file inside an external repository. Find a content
# hash file for it, or return an empty path.
path = self._get_repository_content_hash(repository)
if path:
assert os.path.isabs(
path
), f"Unexpected non-absolute path: {path} (from {label})"
# Check that the translated path does not point into the output_base
# as timestamps in this directory are not guaranteed to be consistent.
assert not path.startswith(
self._output_base
), f"Path should not be in Bazel output_base: {path} (from {label})"
if relative_to:
path = os.path.relpath(path, relative_to)
return path
def get_sources_for_labels(self, labels: list[str]) -> Iterable[str]:
sources: set[str] = set()
labels = [
label for label in labels if not is_ignored_input_label(label)
]
ignored_labels = []
for label in labels:
path = self.source_label_to_path(
label, relative_to=self._output_dir
)
if path:
if build_utils.is_likely_content_hash_path(path):
debug(f"{path} ::: IGNORED CONTENT HASH NAME")
else:
debug(f"{path} <-- {label}")
sources.add(path)
elif label_requires_content_hash(label):
debug(f"IGNORED: {label}")
ignored_labels.append(label)
if ignored_labels:
print(
"ERROR: Found ignored external repository files:",
file=sys.stderr,
)
for label in ignored_labels:
print(f" {label}", file=sys.stderr)
print(
"""
These files are likely generated by a Bazel repository rule which has
no associated content hash file. Due to this, Bazel may regenerate them
semi-randomly in ways that confuse Ninja dependency computations.
To solve this issue, change the build/bazel/scripts/bazel_label_mapper.py script to
add corresponding entries to the _BAZEL_NO_CONTENT_HASH_REPOSITORIES list, to
track all input files that the repository rule may access when it is run.
""",
file=sys.stderr,
)
raise ValueError("Found ignored labels")
return sources
def is_ignored_input_label(label: str) -> bool:
"""Return True if the label of a build or source file should be ignored."""
is_builtin = (
workspace_utils.innermost_repository_name(label)
in _BAZEL_BUILTIN_REPOSITORIES
)
is_ignored = (
label.endswith(_IGNORED_FILE_SUFFIXES) or label in _IGNORED_LABELS
)
return is_builtin or is_ignored
def label_requires_content_hash(label: str) -> bool:
"""Return True if the label or source file belongs to a repository
that requires a content hash file."""
return not (
workspace_utils.innermost_repository_name(label)
in _BAZEL_NO_CONTENT_HASH_REPOSITORIES
)