blob: 67d7d0bf637b4af16404fc1f4fdb201647ebd928 [file] [log] [blame]
# Copyright 2018 The Fuchsia Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import functools
from urlparse import urlparse
from recipe_engine import recipe_api
from . import patch
# Set as an output property and consumed by other recipes code and the results
# uploader code in google3.
# It's a monotonic integer that corresponds to integration revisions so we
# can stuff our results into systems that expect Piper changelist numbers.
REVISION_COUNT_PROPERTY = "integration-revision-count"
# Set as an output property and consumed by the
# go/cq-incremental-builder-monitor_dev dashboard.
CHECKOUT_FAILED_PROPERTY = "checkout_failed"
# By default, skip patching GerritChanges which map to these projects. They are
# not valid projects to patch into standard Fuchsia checkouts.
SKIP_PATCH_PROJECTS = ("infra/recipes",)
class _CheckoutResults(object):
"""Represents a Fuchsia source checkout."""
def __init__(
self,
api,
root_dir,
snapshot_file,
release_branch,
release_version,
source_info,
):
self._api = api
self._root_dir = root_dir
self._snapshot_file = snapshot_file
self._release_branch = release_branch
self._release_version = release_version
self.source_info = source_info
self._changed_files_cache = None
@property
def root_dir(self):
"""The path to the root directory of the jiri checkout."""
return self._root_dir
@property
def snapshot_file(self):
"""The path to the jiri snapshot file."""
return self._snapshot_file
@property
def release_branch(self):
"""Release branch corresponding to checkout if applicable, otherwise None."""
return self._release_branch
@property
def release_version(self):
"""Release version of checkout if applicable, otherwise None."""
return self._release_version
def project(self, project_name, **kwargs):
return self._api.checkout.project(
project_name, checkout_root=self.root_dir, **kwargs
)
@property
def integration_revision(self):
# If triggered by integration CQ, then build_input_resolver will ensure
# the input gitiles_commit is what we want.
if self.contains_integration_patch:
return self._api.buildbucket.build.input.gitiles_commit.id
# Otherwise just use the revision that we actually checked out.
return [repo for repo in self.source_info if repo["name"] == "integration"][0][
"revision"
]
@property
def contains_integration_patch(self):
"""Returns whether we're testing an integration change."""
changes = self._api.buildbucket.build.input.gerrit_changes
return changes and changes[0].project == "integration"
def _upload_source_manifest(self, gcs_bucket, namespace=None):
"""Upload the jiri source manifest to GCS."""
assert gcs_bucket
with self._api.context(cwd=self._root_dir):
source_manifest = self._api.jiri.source_manifest()
with self._api.step.nest("upload source manifest"):
self._api.upload.file_to_gcs(
source=self._api.json.input(source_manifest),
bucket=gcs_bucket,
subpath="source_manifest.json",
namespace=namespace,
)
def upload_results(self, gcs_bucket, namespace=None):
"""Upload snapshot to a given GCS bucket."""
assert gcs_bucket
with self._api.step.nest("upload checkout results") as presentation:
self._api.upload.file_to_gcs(
source=self.snapshot_file,
bucket=gcs_bucket,
subpath=self._api.path.basename(self.snapshot_file),
namespace=namespace,
)
presentation.properties[REVISION_COUNT_PROPERTY] = int(
self._api.git(
"set %s property" % REVISION_COUNT_PROPERTY,
"-C",
self._root_dir.join("integration"),
"rev-list",
"--count",
"HEAD",
step_test_data=lambda: self._api.raw_io.test_api.stream_output("1"),
stdout=self._api.raw_io.output(),
).stdout.strip()
)
if not self._api.platform.is_mac:
self._upload_source_manifest(gcs_bucket, namespace=namespace)
def changed_files(self, test_data=("foo.cc", "bar.cc"), **kwargs):
"""Returns a list of absolute paths that were changed.
Checks the git repo specified in buildbucket_input.gerrit_changes[0].
Args:
test_data (seq(str)): Mock list of changed files.
**kwargs (dict): Passed through to `api.git.get_changed_files()`.
Returns:
Empty list if input gerrit_changes is empty or if the build is
triggered by a change to a repo that's not included in the
checkout. List of Paths otherwise.
"""
bb_input = self._api.buildbucket.build.input
if not bb_input.gerrit_changes:
return []
cache_key = tuple(kwargs.items())
if not self._changed_files_cache or cache_key != self._changed_files_cache[0]:
with self._api.step.nest("get changed files"):
change = bb_input.gerrit_changes[0]
project = change.project
with self._api.context(cwd=self._root_dir):
project_test_data = [
{
"name": project,
"path": self._api.path.abspath(self._root_dir)
if project == "project"
else self._api.path.abspath(self._root_dir.join(project)),
}
]
try:
repo_path = self.project(project, test_data=project_test_data)[
"path"
]
except self._api.jiri.NoSuchProjectError:
return []
with self._api.context(cwd=self._api.path.abs_to_path(repo_path)):
changed_files = self._api.git.get_changed_files(
test_data=test_data, **kwargs
)
changed_files = [
self._api.path.join(repo_path, changed) for changed in changed_files
]
# We only expect this function to be called with one key per build, so
# keeping a cache of one element should be sufficient, while still
# being correct in case it is called with different keys.
self._changed_files_cache = (cache_key, changed_files)
return self._changed_files_cache[1]
def _nest(func):
"""Nest function call within "checkout" step.
Check whether already inside a "checkout" step since some public
methods in CheckoutApi call other public methods.
"""
@functools.wraps(func)
def wrapper(self, *args, **kwargs):
if not self._presentation:
with self.m.step.nest("checkout") as pres:
self._presentation = pres
try:
ret = func(self, *args, **kwargs)
except:
pres.properties[CHECKOUT_FAILED_PROPERTY] = True
raise
self._presentation = None
return ret
else:
return func(self, *args, **kwargs)
return wrapper
class CheckoutApi(recipe_api.RecipeApi):
"""An abstraction over how Jiri checkouts are created during Fuchsia CI/CQ
builds."""
REVISION_COUNT_PROPERTY = REVISION_COUNT_PROPERTY
CHECKOUT_INFO_PROPERTY = "checkout_info"
# An invalid patch file represents a user error not an infra
# failure. A user should correct their change.
class PatchFileValidationError(recipe_api.StepFailure):
pass
def __init__(self, props, *args, **kwargs):
super(CheckoutApi, self).__init__(*args, **kwargs)
self._presentation = None
self._gitiles_commit = None
if props.gitiles_commit.host:
self._gitiles_commit = props.gitiles_commit
def CheckoutResults(self, *args, **kwargs):
"""Return a CheckoutResults object.
Outside this module, should only be used in testing example recipes.
"""
return _CheckoutResults(self.m, *args, **kwargs)
@_nest
def from_spec(self, checkout_spec, **kwargs):
"""Initialize a Fuchsia checkout according to a checkout spec.
Args:
checkout_spec (infra.fuchsia.Fuchsia.Checkout): Checkout spec
protobuf.
**kwargs (dict): Passed through to fuchsia_with_options().
"""
return self.fuchsia_with_options(
manifest=checkout_spec.manifest,
remote=checkout_spec.remote,
attributes=checkout_spec.attributes,
is_release_version=checkout_spec.is_release_version,
**kwargs
)
@_nest
def fuchsia_with_options(
self,
manifest,
remote,
path=None,
project="integration",
is_release_version=False,
timeout_secs=45 * 60,
incremental_build=False,
**kwargs
):
"""Uses Jiri to check out a Fuchsia project.
The root of the checkout is returned via _CheckoutResults.root_dir.
Args:
manifest (str): A path to the manifest in the remote (e.g.
manifest/minimal).
remote (str): A URL to the remote repository which Jiri will be
pointed at.
path (Path or None): Path to a directory in which to check out
Fuchsia.
project (str): The name that jiri should assign to the project.
is_release_version (bool): Whether the checkout is a release
version.
timeout_secs (int): A timeout to assign to each Jiri operation.
incremental_build (bool): Whether the checkout is being used for an
incremental build.
**kwargs (dict): Passed through to with_options().
Returns:
A _CheckoutResults containing details of the checkout.
"""
if not path:
path = self.m.path["start_dir"].join("fuchsia")
if incremental_build:
path = self.m.path["cache"].join("incremental")
# Incremental builds are specific to the Fuchsia project and are not
# relevant in the general case of checking out a Jiri project, which is
# why this logic is in this function and not in with_options().
if incremental_build:
cache_ctx = lambda: self.m.cache.guard("incremental")
else:
cache_ctx = self.m.nullcontext
with cache_ctx():
self.with_options(
path,
manifest,
remote,
project=project,
timeout_secs=timeout_secs,
incremental_build=incremental_build,
**kwargs
)
with self.m.context(infra_steps=True, cwd=path):
source_info = self.m.jiri.project(name="source-info").json.output
snapshot_file = self.m.path["cleanup"].join("jiri_snapshot.xml")
self.m.jiri.snapshot(snapshot_file)
# Always log snapshot contents (even if uploading to GCS) to help debug
# things like tryjob failures during roller-commits.
self.m.file.read_text("read snapshot", snapshot_file)
# If using a release version, resolve release version and release
# branches.
release_version = release_branch = None
if is_release_version:
release_version = self._get_release_version(path)
release_branch = self._get_release_branch()
return self.CheckoutResults(
root_dir=path,
snapshot_file=snapshot_file,
release_branch=release_branch,
release_version=release_version,
source_info=source_info,
)
@_nest
def with_options(self, path, *args, **kwargs):
"""Wrapper to avoid deeply nesting the function body.
Only context manager-related logic should go in this function. All
other logic should go in _with_options().
Args: see _with_options().
"""
self.m.file.ensure_directory("ensure checkout dir", path)
with self.m.context(infra_steps=True, cwd=path):
return self._with_options(path, *args, **kwargs)
def _with_options(
self,
path,
manifest,
remote,
project=None,
attributes=(),
build_input=None,
fetch_packages=True,
skip_patch_projects=SKIP_PATCH_PROJECTS,
timeout_secs=None,
incremental_build=False,
):
"""Initializes and populates a jiri checkout from a remote manifest.
If a gitiles_commit was provided through the "gitiles_commit" property,
this will set the buildbucket.build.input's gitiles_commit to the
gitiles_commit from the property.
Args:
path (Path): The Fuchsia checkout root.
manifest (str): Relative path to the manifest in the remote repository.
remote (str): URL to the remote repository.
project (str): The name that jiri should assign to the project.
attributes (seq(str)): A list of jiri manifest attributes; projects or
packages with matching attributes - otherwise regarded as optional -
will be downloaded.
build_input (buildbucket.build_pb2.Build.Input): The input to a buildbucket
build.
fetch_packages (bool): Whether or not to fetch CIPD packages (and
run jiri hooks). Running hooks could theoretically be a separate
parameter but in practice there are no use cases for fetching
packages without running hooks. And when we want to disable
both, we generally care more about disabling fetching packages
since it normally takes the longest time.
skip_patch_projects (seq(str)): Do not attempt to patch these
projects.
timeout_secs (int): A timeout to assign to each Jiri operation.
incremental_build (bool): Whether or not this checkout will be
used for an incremental build.
"""
assert manifest, "'manifest' must be set"
assert remote, "'remote' must be set"
build_input = build_input or self.m.buildbucket.build.input
if self._gitiles_commit:
# Override build input gitiles_commit with gitiles_commit from
# properties.
build_input.gitiles_commit.CopyFrom(self._gitiles_commit)
# The revision of the manifest repository to import. We'll do any
# patches and overrides on top of the checkout determined by this
# version of the manifest repository.
base_manifest_revision = None
commit = None
gerrit_change = None
skip_partial = []
patches = [] # Details of projects to patch (used in CQ).
overrides = [] # Details of projects to override (used in local CI).
if build_input.gerrit_changes:
assert (
len(build_input.gerrit_changes) == 1
), "build input contains more than one gerrit_change"
gerrit_change = build_input.gerrit_changes[0]
skip_partial.append(
"https://%s/%s"
% (gerrit_change.host.replace("-review", ""), gerrit_change.project)
)
change_details = self._get_change_details(gerrit_change)
# Always re-resolve HEAD rather than using the base commit resolved
# by build_input_resolver, even for integration patches, because we
# want to wait as late as possible to choose a base commit for the
# checkout. This will make it more likely that we'll catch merge
# conflicts and other types of collisions between changes.
#
# The most notable risk of doing this is that the spec might come
# from a different version of the integration repo than the version
# included in this checkout, which could cause issues if the spec is
# not forwards-compatible with newer versions of the integration
# repo (e.g. it references a jiri project that is deleted from the
# manifests). However, this is extremely rare in practice because
# since we rarely make changes that simultaneously affect infra and
# non-infra parts of integration like jiri manifests.
base_manifest_revision = self._resolve_branch_head(
remote, branch=change_details["branch"]
)
current_revision = self._get_current_revision(gerrit_change, change_details)
if gerrit_change.project not in skip_patch_projects:
patches.append(
{
"host": "https://%s" % gerrit_change.host,
"project": gerrit_change.project,
"ref": current_revision["ref"],
}
)
# TODO(olivernewman): Also load patches.json into the "patches" list
# (which must happen after patching in the gerrit change so we have
# patches.json locally) so we can expose all patches in the checkout
# info output property.
elif build_input.gitiles_commit.id:
commit = build_input.gitiles_commit
is_manifest_commit = commit.project == project
if is_manifest_commit:
base_manifest_revision = commit.id
else:
base_manifest_revision = self._resolve_branch_head(remote, "main")
# In order to identify a project to override, jiri keys on both
# the project name and the remote source repository (not to be
# confused with `remote`, the manifest repository). Doing this
# correctly would require finding the commit's remote in the
# transitive imports of the jiri manifest. But those transitive
# imports aren't available until we run "jiri update", and doing
# that twice is slow, so we rely on:
# 1. The convention that the name of the jiri project
# is the same as commit.project.
# 2. The hope that the URL scheme of the commit remote is the
# same as that of the manifest remote.
manifest_remote_url = urlparse(remote)
host = commit.host
# When using sso we only specify the lowest subdomain, by convention.
if manifest_remote_url.scheme == "sso":
host = host.split(".")[0]
commit_remote = "{}://{}/{}".format(
manifest_remote_url.scheme, host, commit.project
)
overrides.append(
{
"project": commit.project, # See 1. above
"remote": commit_remote,
"new_revision": commit.id,
}
)
else:
# If we have neither a triggering commit nor a triggering Gerrit
# change, just checkout the manifest repository at HEAD.
base_manifest_revision = self._resolve_branch_head(remote, "main")
self.m.jiri.init(
directory=path, attributes=attributes, skip_partial=skip_partial
)
self.m.jiri.import_manifest(
manifest,
remote,
name=project,
revision=base_manifest_revision,
overwrite=incremental_build,
)
for override in overrides:
self.m.jiri.override(**override)
# Resets the checkout to make sure that nothing remains in the cache
# from a previous build.
if incremental_build:
with self.m.context(cwd=path):
self.m.jiri.reset()
# We must clone all projects prior to applying any patches. But we need
# not run hooks or fetch packages until after all patches are applied,
# since applying a patch to the manifest repository could update the
# versions of the packages we need to fetch.
self.m.jiri.update(
run_hooks=False,
fetch_packages=False,
timeout=timeout_secs,
gc=incremental_build,
)
for p in patches:
is_manifest_patch = project == p["project"]
# Failures in pulling down patches and rebasing are likely not
# infra-related. If we got here, we're already able to talk to Gerrit
# successfully, so any errors are likely merge conflicts.
with self.m.context(infra_steps=False):
self._apply_patch(
path,
p["ref"],
p["project"],
gerrit_change,
change_details,
is_manifest_patch,
timeout_secs,
)
# Run hooks and fetch CIPD packages separately from `jiri update` to get
# timing information. We want to fetch packages only *after* any gerrit
# changes have been patched in. If we fetched packages *before* patching
# in a change to the manifest repository, then we'd need to run hooks
# again afterward to honor the contents of the patch, and might end up
# overwriting old versions of CIPD packages that were downloaded prior
# to patching. So it's more efficient to only fetch packages once at the
# very end.
if fetch_packages:
self.m.jiri.run_hooks(
# We must use the local manifest from integration.git rather
# than the contents of .jiri_manifest in cases where we might
# have patched in a change to the manifest repository. The only
# time we *cannot* use the local manifest is when there are
# overrides, in which case .jiri_manifest will be the only
# correct source of truth.
local_manifest=bool(gerrit_change),
fetch_packages=True,
# Jiri sets the fetch-packages timeout to 5x the hook timeout.
hook_timeout_secs=timeout_secs / 5 if timeout_secs else None,
)
# This information is consumed by `fx sync-to` to reproduce infra
# checkouts locally.
# TODO(olivernewman): Also expose overrides, and patches specified by
# patches.json. Neither overrides (used only in local CI) nor
# patches.json (used only in occasional manual experimental builds) is
# used very frequently, but they're necessary to include here for
# complete correctness.
self._presentation.properties[self.CHECKOUT_INFO_PROPERTY] = {
"manifest_remote": remote,
"manifest": manifest,
"base_manifest_revision": base_manifest_revision,
"patches": patches,
}
def project(self, project_name, checkout_root=None, **kwargs):
"""Returns metadata for a project in the checkout.
Raises NoSuchProjectError if the project is not among the repos in
the checkout.
"""
if not checkout_root:
checkout_root = self.m.context.cwd
with self.m.context(cwd=checkout_root):
output = self.m.jiri.project(projects=[project_name], **kwargs).json.output
if not output:
raise self.m.jiri.NoSuchProjectError(
"project %r is not present in the checkout" % project_name
)
return output[0]
def _resolve_branch_head(self, remote, branch):
"""Return the hash of the commit currently at the tip of a branch.
Args:
remote (str): The URL of the repository containing the branch.
branch (str): Name of the branch to resolve (e.g. "main").
"""
return self.m.git.get_remote_branch_head(
url=self.m.sso.sso_to_https(remote),
branch=branch,
step_name="resolve head of %r branch" % branch,
)
def _apply_patch(
self,
path,
patch_ref,
patch_project,
gerrit_change,
change_details,
is_manifest_patch,
timeout_secs,
):
rebase_branch = change_details["branch"]
self.m.jiri.patch(
patch_ref,
host="https://%s" % gerrit_change.host,
project=patch_project,
rebase=True,
rebase_branch=rebase_branch,
)
# Handle patches.json, if present.
self._apply_patchfile(path, gerrit_change, rebase_branch)
if is_manifest_patch:
self.m.jiri.update(
gc=True,
rebase_tracked=True,
local_manifest=True,
run_hooks=False,
fetch_packages=False,
timeout=timeout_secs,
)
# It's difficult to figure out what commit the tryjob rebased a CL on
# top of. So we simply log the last few commits here. (It's not
# sufficient to log just the parent commit, because checking out a CL
# at the top of a stack of open CLs will also check out and rebase all
# the parent CLs on top of main).
project_dir = self.project(patch_project)["path"]
self.m.git.log(directory=project_dir, depth=10)
def _apply_patchfile(self, path, gerrit_change, rebase_branch):
"""Parses and applies the PatchFile for the given gerrit change."""
# TODO: This is a fragile assumption that relies on integration.git
# being checked out at //integration. Find a better way to derive path
# to patches.json.
patchfile_path = path.join(gerrit_change.project, "patches.json")
try:
# Note that in recipe unit testing mode, `read_json` returns None
# string if the file has not been mocked, rather than raising an
# exception.
contents = self.m.file.read_json("read patches.json", patchfile_path)
except self.m.file.Error as e:
if e.errno_name != "ENOENT": # pragma: no cover
raise
self.m.step.active_result.presentation.status = self.m.step.SUCCESS
contents = None
# The change doesn't include a patch file, so no need to do any
# patching.
if not contents:
self.m.step.active_result.presentation.step_text = "no patch file found"
return
patch_file = patch.PatchFile.from_json(contents)
# Ensure patchfile is valid.
validation_err = patch_file.validate(gerrit_change)
if validation_err is not None:
raise self.PatchFileValidationError(str(validation_err))
for patch_input in patch_file.inputs:
# If the patch pulls in a project that's not in the workspace already, the patch
# would not affect this build / test run. Skip this patch.
try:
self.project(patch_input.project)
except self.m.jiri.NoSuchProjectError:
warning = (
"warning: skipping patch for %s which is not in the checkout"
% patch_input.project
)
self.m.step(warning, [])
continue
# Strip protocol if present.
host = patch_input.host
host_url = urlparse(host)
if host_url.scheme:
host = host_url.hostname
# Patch in the change
self.m.jiri.patch(
ref=patch_input.ref,
host="https://%s" % host,
project=patch_input.project,
rebase=True,
rebase_branch=rebase_branch,
)
def _get_change_details(self, gerrit_change):
"""Fetches the details of a Gerrit change."""
return self.m.gerrit.change_details(
name="get change details",
change_id="%s~%s" % (gerrit_change.project, gerrit_change.change),
host=gerrit_change.host,
query_params=["ALL_REVISIONS"],
test_data=self.m.json.test_api.output(
{
"branch": "main",
"revisions": {
"d4e5f6": {"_number": 3, "ref": "refs/changes/00/100/3"},
"a1b2c3": {"_number": 7, "ref": "refs/changes/00/100/7"},
"g7h8i9": {"_number": 9, "ref": "refs/changes/00/100/9"},
},
}
),
).json.output
def _get_current_revision(self, gerrit_change, change_details):
current_patchsets = [
rev
for rev in change_details["revisions"].itervalues()
if rev["_number"] == gerrit_change.patchset
]
assert len(current_patchsets) == 1
return current_patchsets[0]
def _get_release_version(self, path):
"""Get release version corresponding to HEAD."""
with self.m.step.nest("resolve release version") as presentation:
release_version = self.m.release.ref_to_release_version(
ref="HEAD",
repo_path=path.join("integration"),
)
release_version_str = str(release_version).replace("releases/", "")
# Fuchsia's buildmon service depends on this property being set.
presentation.properties["release_version"] = release_version_str
return release_version_str
def _get_release_branch(self):
with self.m.step.nest("resolve release branch") as presentation:
ref = self.m.buildbucket.build.input.gitiles_commit.ref.replace(
"refs/heads/", ""
)
if not self.m.release.validate_branch(ref):
return None
presentation.properties["release_branch"] = ref
return ref