blob: e96c4c2fbc9c7abe76a6d47580a8d909572a0915 [file] [log] [blame]
# Copyright 2022 The Fuchsia Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
from urllib.parse import urlparse
import contextlib
import re
from recipe_engine import recipe_api
class GitCheckoutApi(recipe_api.RecipeApi):
"""GitCheckoutApi provides support for cloning repositories using pure git."""
_GIT_HASH_RE = re.compile("[0-9a-fA-F]{40}")
def __call__(
self,
repo,
path=None,
rebase_merges=False,
fallback_ref=None,
revision=None,
ignore_build_input=False,
step_name="checkout",
**kwargs,
):
"""Clone a repository using git, respecting the Buildbucket input.
Args:
repo (str): The remote URL to check out.
path (Path or None): The directory in which the repo will be checked out.
If not specified, an appropriate directory will be selected and
created automatically.
rebase_merges (bool): When checkout out a Gerrit change, try to
rebase merges instead of skipping them.
fallback_ref (str or None): The ref to fetch and checkout if not specified
by the Buildbucket input. Defaults to the main branch.
revision (str or None): Always checkout the repository at this revision
(commit hash, branch name, tag, etc.), ignoring the Buildbucket
input.
ignore_build_input (bool): Always checkout the repository at the
"main" branch. This is equivalent to setting revision="main".
step_name (str or None): Name of the top-level checkout step. If
None or empty, no step nesting will be used.
"""
if ignore_build_input:
assert (
not revision
), "it's redundant to set both ignore_build_input and revision"
revision = "main"
nest = self.m.step.nest(step_name) if step_name else contextlib.nullcontext()
with nest, self.m.context(infra_steps=True):
if revision:
return self._checkout(repo, path=path, ref=revision, **kwargs)
build_input = self.m.buildbucket.build.input
if build_input.gerrit_changes:
change = build_input.gerrit_changes[0]
if (
f"https://{change.host.replace('-review', '')}/{change.project}"
== repo
):
return self._checkout_gerrit_change(
change, path=path, rebase_merges=rebase_merges, **kwargs
)
elif build_input.gitiles_commit.project:
gitiles_commit = build_input.gitiles_commit
if f"https://{gitiles_commit.host}/{gitiles_commit.project}" == repo:
return self._checkout_gitiles_commit(
build_input.gitiles_commit, path=path, **kwargs
)
return self._checkout(repo, path=path, ref=fallback_ref, **kwargs)
def _checkout_gitiles_commit(self, gitiles_commit, path, **kwargs):
"""Checkout a commit specified by a GitilesCommit."""
url = f"https://{gitiles_commit.host}/{gitiles_commit.project}"
return self._checkout(url, path=path, ref=gitiles_commit.id, **kwargs)
def _checkout_gerrit_change(
self, gerrit_change, path, rebase_merges=False, **kwargs
):
"""Checkout a CL specified by a GerritChange and rebase it onto ref."""
change_details = self.m.gerrit.change_details(
name="get change details",
change_id=f"{gerrit_change.project}~{gerrit_change.change}",
host=gerrit_change.host,
test_data=self.m.json.test_api.output({"branch": "main"}),
).json.output
base_ref = f"refs/heads/{change_details['branch']}"
remote, change_ref = self.m.gerrit.resolve_change(gerrit_change)
with self.m.step.nest(f"fetch {base_ref}"):
path, base_revision = self._checkout(
remote, path=path, ref=base_ref, **kwargs
)
# TODO(olivernewman): This probably doesn't need to be a full separate
# `_checkout` call since we don't care about repeating most of the
# operations.
checkout_result = self._checkout(remote, path=path, ref=change_ref, **kwargs)
with self.m.context(cwd=path):
self.m.git.rebase(base_revision, rebase_merges=rebase_merges)
return checkout_result
def _checkout(
self,
url,
path=None,
ref=None,
recursive=False,
submodules=True,
submodule_force=False,
submodule_paths=None,
remote="origin",
cache=True,
depth=None,
filters=None,
use_packfiles=True,
tags=True,
**kwargs,
):
"""Clone a repo and return the checked out directory and revision.
Args:
url (str): URL of remote repo to use as upstream.
path (Path): Directory to clone into.
ref (str): Ref to fetch and checkout.
recursive (bool): Whether to recursively fetch submodules.
submodules (bool): Whether to sync and update submodules.
submodule_force (bool): Whether to update submodules with --force.
submodule_paths (list): List of path(s) to submodule(s).
remote (str): Name of the remote to use.
cache (bool): Whether to use the reference cache.
depth (int or None): Depth in history of checkout. If None, do a
full checkout.
filters (bool or str or seq[str]): Use git's --filter option. If
True, uses "blob:none".
use_packfiles (bool): Enable downloading precomputed packfiles from
a CDN.
"""
if path is None:
path = self.m.path["start_dir"].join(self.m.git.remote_alias(url))
elif path == self.m.path["start_dir"]:
raise ValueError("checkout must not be rooted at start_dir")
self.m.file.ensure_directory("makedirs", path)
with self.m.context(cwd=path):
if self.m.path.exists(path.join(".git")): # pragma: no cover
self.m.git.config_remove_section(f"remote.{remote}", **kwargs)
else:
self.m.git.init(**kwargs)
self.m.git.remote_add(remote or "origin", url)
if use_packfiles:
self._enable_packfiles(**kwargs)
if cache:
with self.m.step.nest("cache"), self.m.cache.guard("git"):
o = urlparse(url)
dirname = o.hostname + o.path.replace("-", "--").replace("/", "-")
cache_path = self.m.path["cache"].join("git", dirname)
self.m.file.ensure_directory("makedirs", cache_path)
with self.m.context(cwd=cache_path):
objects_path = cache_path.join("objects")
if self.m.path.exists(objects_path): # pragma: no cover
self.m.git.config_remove_section(
f"remote.{remote}", **kwargs
)
else:
self.m.git.init(bare=True, **kwargs)
# Configure the remote from which refs and objects will
# be downloaded. If the cache is warm the remote may
# already exist so we can't use `git remote add`.
self.m.git.config(
f"remote.{remote}.url" or "origin",
url,
step_name="remote set-url",
**kwargs,
)
if use_packfiles:
self._enable_packfiles(**kwargs)
# Make sure to fetch all refs into the cache, not just
# the current HEAD.
self.m.git.config_replace_all(
"remote.origin.fetch",
"+refs/heads/*:refs/heads/*",
value_regex=r"\+refs/heads/\*:.*",
step_name="replace fetch configs",
**kwargs,
)
# Download objects and refs to the cache.
self.m.git.fetch(
repository=remote or "origin",
prune=True,
tags=tags,
depth=depth,
filters=filters,
**kwargs,
)
# Configure the cache directory as an alternative objects
# directory for the checkout. Objects will be fetched from
# the local cache when possible, and added to the cache when
# fetched from the remote. See
# https://docs.gitlab.com/ee/development/git_object_deduplication.html#understanding-git-alternates
info = path.join(".git", "objects", "info")
self.m.file.ensure_directory("makedirs object/info", info)
self.m.file.write_text(
"alternates", info.join("alternates"), f"{objects_path}\n"
)
if not ref:
fetch_ref = "main"
checkout_ref = "FETCH_HEAD"
elif self._GIT_HASH_RE.match(ref):
fetch_ref = ""
checkout_ref = ref
elif ref.startswith("refs/heads/"):
fetch_ref = ref[len("refs/heads/") :]
checkout_ref = "FETCH_HEAD"
else:
fetch_ref = ref
checkout_ref = "FETCH_HEAD"
# Download objects and refs to the checkout, implicitly using the
# cache directory if enabled.
self.m.git.fetch(
repository=remote or "origin",
refspec=fetch_ref,
tags=tags,
recurse_submodules=recursive or None,
depth=depth,
filters=filters,
**kwargs,
)
self.m.git.raw_checkout(ref=checkout_ref, force=True, **kwargs)
step_test_data = kwargs.pop(
"step_test_data",
lambda: self.m.raw_io.test_api.stream_output_text("deadbeef"),
)
sha = self.m.git.rev_parse("HEAD", step_test_data=step_test_data)
self.m.git.clean(force=True, recursive=True, ignore_rules=True, **kwargs)
if submodules:
with self.m.step.nest("submodule"):
self.m.git.sync_submodule(**kwargs)
self.m.git.update_submodule(
recursive=recursive,
force=submodule_force,
paths=submodule_paths,
filters=filters,
**kwargs,
)
return path, sha
def _enable_packfiles(self, **kwargs):
"""Enable downloading precomputed packfiles from a CDN.
This is more efficient than the default, which is for the Git server to
serve packfiles directly.
"""
self.m.git.config(
"fetch.uriprotocols", "https", step_name="set fetch.uriprotocols", **kwargs
)