| # Copyright 2022 The Fuchsia Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| from urllib.parse import urlparse |
| |
| import contextlib |
| import re |
| |
| from recipe_engine import recipe_api |
| |
| |
| class GitCheckoutApi(recipe_api.RecipeApi): |
| """GitCheckoutApi provides support for cloning repositories using pure git.""" |
| |
| _GIT_HASH_RE = re.compile("[0-9a-fA-F]{40}") |
| |
| def __call__( |
| self, |
| repo, |
| path=None, |
| rebase_merges=False, |
| fallback_ref=None, |
| revision=None, |
| ignore_build_input=False, |
| step_name="checkout", |
| **kwargs, |
| ): |
| """Clone a repository using git, respecting the Buildbucket input. |
| |
| Args: |
| repo (str): The remote URL to check out. |
| path (Path or None): The directory in which the repo will be checked out. |
| If not specified, an appropriate directory will be selected and |
| created automatically. |
| rebase_merges (bool): When checkout out a Gerrit change, try to |
| rebase merges instead of skipping them. |
| fallback_ref (str or None): The ref to fetch and checkout if not specified |
| by the Buildbucket input. Defaults to the main branch. |
| revision (str or None): Always checkout the repository at this revision |
| (commit hash, branch name, tag, etc.), ignoring the Buildbucket |
| input. |
| ignore_build_input (bool): Always checkout the repository at the |
| "main" branch. This is equivalent to setting revision="main". |
| step_name (str or None): Name of the top-level checkout step. If |
| None or empty, no step nesting will be used. |
| """ |
| if ignore_build_input: |
| assert ( |
| not revision |
| ), "it's redundant to set both ignore_build_input and revision" |
| revision = "main" |
| |
| nest = self.m.step.nest(step_name) if step_name else contextlib.nullcontext() |
| with nest, self.m.context(infra_steps=True): |
| if revision: |
| return self._checkout(repo, path=path, ref=revision, **kwargs) |
| |
| build_input = self.m.buildbucket.build.input |
| if build_input.gerrit_changes: |
| change = build_input.gerrit_changes[0] |
| if ( |
| f"https://{change.host.replace('-review', '')}/{change.project}" |
| == repo |
| ): |
| return self._checkout_gerrit_change( |
| change, path=path, rebase_merges=rebase_merges, **kwargs |
| ) |
| elif build_input.gitiles_commit.project: |
| gitiles_commit = build_input.gitiles_commit |
| if f"https://{gitiles_commit.host}/{gitiles_commit.project}" == repo: |
| return self._checkout_gitiles_commit( |
| build_input.gitiles_commit, path=path, **kwargs |
| ) |
| return self._checkout(repo, path=path, ref=fallback_ref, **kwargs) |
| |
| def _checkout_gitiles_commit(self, gitiles_commit, path, **kwargs): |
| """Checkout a commit specified by a GitilesCommit.""" |
| url = f"https://{gitiles_commit.host}/{gitiles_commit.project}" |
| return self._checkout(url, path=path, ref=gitiles_commit.id, **kwargs) |
| |
| def _checkout_gerrit_change( |
| self, gerrit_change, path, rebase_merges=False, **kwargs |
| ): |
| """Checkout a CL specified by a GerritChange and rebase it onto ref.""" |
| change_details = self.m.gerrit.change_details( |
| name="get change details", |
| change_id=f"{gerrit_change.project}~{gerrit_change.change}", |
| host=gerrit_change.host, |
| test_data=self.m.json.test_api.output({"branch": "main"}), |
| ).json.output |
| base_ref = f"refs/heads/{change_details['branch']}" |
| remote, change_ref = self.m.gerrit.resolve_change(gerrit_change) |
| with self.m.step.nest(f"fetch {base_ref}"): |
| path, base_revision = self._checkout( |
| remote, path=path, ref=base_ref, **kwargs |
| ) |
| # TODO(olivernewman): This probably doesn't need to be a full separate |
| # `_checkout` call since we don't care about repeating most of the |
| # operations. |
| checkout_result = self._checkout(remote, path=path, ref=change_ref, **kwargs) |
| with self.m.context(cwd=path): |
| self.m.git.rebase(base_revision, rebase_merges=rebase_merges) |
| return checkout_result |
| |
| def _checkout( |
| self, |
| url, |
| path=None, |
| ref=None, |
| recursive=False, |
| submodules=True, |
| submodule_force=False, |
| submodule_paths=None, |
| remote="origin", |
| cache=True, |
| depth=None, |
| filters=None, |
| use_packfiles=True, |
| tags=True, |
| **kwargs, |
| ): |
| """Clone a repo and return the checked out directory and revision. |
| |
| Args: |
| url (str): URL of remote repo to use as upstream. |
| path (Path): Directory to clone into. |
| ref (str): Ref to fetch and checkout. |
| recursive (bool): Whether to recursively fetch submodules. |
| submodules (bool): Whether to sync and update submodules. |
| submodule_force (bool): Whether to update submodules with --force. |
| submodule_paths (list): List of path(s) to submodule(s). |
| remote (str): Name of the remote to use. |
| cache (bool): Whether to use the reference cache. |
| depth (int or None): Depth in history of checkout. If None, do a |
| full checkout. |
| filters (bool or str or seq[str]): Use git's --filter option. If |
| True, uses "blob:none". |
| use_packfiles (bool): Enable downloading precomputed packfiles from |
| a CDN. |
| """ |
| if path is None: |
| path = self.m.path["start_dir"].join(self.m.git.remote_alias(url)) |
| elif path == self.m.path["start_dir"]: |
| raise ValueError("checkout must not be rooted at start_dir") |
| |
| self.m.file.ensure_directory("makedirs", path) |
| |
| with self.m.context(cwd=path): |
| if self.m.path.exists(path.join(".git")): # pragma: no cover |
| self.m.git.config_remove_section(f"remote.{remote}", **kwargs) |
| else: |
| self.m.git.init(**kwargs) |
| self.m.git.remote_add(remote or "origin", url) |
| |
| if use_packfiles: |
| self._enable_packfiles(**kwargs) |
| |
| if cache: |
| with self.m.step.nest("cache"), self.m.cache.guard("git"): |
| o = urlparse(url) |
| dirname = o.hostname + o.path.replace("-", "--").replace("/", "-") |
| cache_path = self.m.path["cache"].join("git", dirname) |
| |
| self.m.file.ensure_directory("makedirs", cache_path) |
| |
| with self.m.context(cwd=cache_path): |
| objects_path = cache_path.join("objects") |
| if self.m.path.exists(objects_path): # pragma: no cover |
| self.m.git.config_remove_section( |
| f"remote.{remote}", **kwargs |
| ) |
| else: |
| self.m.git.init(bare=True, **kwargs) |
| # Configure the remote from which refs and objects will |
| # be downloaded. If the cache is warm the remote may |
| # already exist so we can't use `git remote add`. |
| self.m.git.config( |
| f"remote.{remote}.url" or "origin", |
| url, |
| step_name="remote set-url", |
| **kwargs, |
| ) |
| |
| if use_packfiles: |
| self._enable_packfiles(**kwargs) |
| |
| # Make sure to fetch all refs into the cache, not just |
| # the current HEAD. |
| self.m.git.config_replace_all( |
| "remote.origin.fetch", |
| "+refs/heads/*:refs/heads/*", |
| value_regex=r"\+refs/heads/\*:.*", |
| step_name="replace fetch configs", |
| **kwargs, |
| ) |
| # Download objects and refs to the cache. |
| self.m.git.fetch( |
| repository=remote or "origin", |
| prune=True, |
| tags=tags, |
| depth=depth, |
| filters=filters, |
| **kwargs, |
| ) |
| |
| # Configure the cache directory as an alternative objects |
| # directory for the checkout. Objects will be fetched from |
| # the local cache when possible, and added to the cache when |
| # fetched from the remote. See |
| # https://docs.gitlab.com/ee/development/git_object_deduplication.html#understanding-git-alternates |
| info = path.join(".git", "objects", "info") |
| self.m.file.ensure_directory("makedirs object/info", info) |
| self.m.file.write_text( |
| "alternates", info.join("alternates"), f"{objects_path}\n" |
| ) |
| |
| if not ref: |
| fetch_ref = "main" |
| checkout_ref = "FETCH_HEAD" |
| elif self._GIT_HASH_RE.match(ref): |
| fetch_ref = "" |
| checkout_ref = ref |
| elif ref.startswith("refs/heads/"): |
| fetch_ref = ref[len("refs/heads/") :] |
| checkout_ref = "FETCH_HEAD" |
| else: |
| fetch_ref = ref |
| checkout_ref = "FETCH_HEAD" |
| # Download objects and refs to the checkout, implicitly using the |
| # cache directory if enabled. |
| self.m.git.fetch( |
| repository=remote or "origin", |
| refspec=fetch_ref, |
| tags=tags, |
| recurse_submodules=recursive or None, |
| depth=depth, |
| filters=filters, |
| **kwargs, |
| ) |
| self.m.git.raw_checkout(ref=checkout_ref, force=True, **kwargs) |
| step_test_data = kwargs.pop( |
| "step_test_data", |
| lambda: self.m.raw_io.test_api.stream_output_text("deadbeef"), |
| ) |
| sha = self.m.git.rev_parse("HEAD", step_test_data=step_test_data) |
| self.m.git.clean(force=True, recursive=True, ignore_rules=True, **kwargs) |
| if submodules: |
| with self.m.step.nest("submodule"): |
| self.m.git.sync_submodule(**kwargs) |
| self.m.git.update_submodule( |
| recursive=recursive, |
| force=submodule_force, |
| paths=submodule_paths, |
| filters=filters, |
| **kwargs, |
| ) |
| return path, sha |
| |
| def _enable_packfiles(self, **kwargs): |
| """Enable downloading precomputed packfiles from a CDN. |
| |
| This is more efficient than the default, which is for the Git server to |
| serve packfiles directly. |
| """ |
| self.m.git.config( |
| "fetch.uriprotocols", "https", step_name="set fetch.uriprotocols", **kwargs |
| ) |