| # Copyright 2018 The Fuchsia Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| import datetime |
| import enum |
| import re |
| import textwrap |
| |
| import attr |
| |
| from recipe_engine import recipe_api |
| |
| from PB.recipe_engine import result |
| from PB.go.chromium.org.luci.buildbucket.proto import common |
| |
| from PB.recipe_modules.fuchsia.auto_roller.options import Options |
| |
| COMMIT_MESSAGE_TITLE = ( |
| """{prepend}[{type}] {type_descr} {roller} packages to {version}""" |
| ) |
| |
| COMMIT_MESSAGE_DO_NOT_SUBMIT = "DO NOT SUBMIT " |
| |
| COMMIT_MESSAGE = """ |
| |
| {packages} |
| |
| From: {old_version} |
| To: {version} |
| {multiply} |
| {divider} |
| """ |
| |
| CIPD_URL = "https://chrome-infra-packages.appspot.com/p/{package}/+/{version}" |
| |
| UPSTREAM_REF = "main" |
| |
| # The name of the link to the Gerrit change created for a roll. This is |
| # displayed in the CQ failure error message to help others understand where to |
| # look when debugging failed rolls. |
| GERRIT_LINK_NAME = "gerrit link" |
| |
| # A substring of the message that CQ posts to CLs that pass the dry run. |
| PASSED_DRY_RUN_MESSAGES = ( |
| "Dry run: This CL passed the CQ dry run.", |
| "This CL has passed the run", |
| ) |
| FAILED_DRY_RUN_MESSAGES = ( |
| "Dry run: Failed builds", |
| "This CL has failed the run", |
| ) |
| |
| # CQ's dry run update comment will be posted by a service account with this |
| # domain. |
| CQ_SERVICE_ACCOUNT_DOMAIN = "luci-project-accounts.iam.gserviceaccount.com" |
| |
| # Status of Gerrit labels. See _is_cq_complete() for details. |
| LABEL_STATUSES = ("rejected", "approved", "disliked", "recommended") |
| |
| |
| class CQResult(enum.Enum): |
| """Represents the result of waiting for CQ to complete.""" |
| |
| # CQ completed successfully. |
| SUCCESS = 1 |
| |
| # CQ tryjobs failed. |
| FAILURE = 2 |
| |
| # Timed out waiting for CQ to finish. |
| TIMEOUT = 3 |
| |
| # The CL was manually abandoned. |
| ABANDONED = 4 |
| |
| # The CL bypassed CQ. |
| SKIPPED = 5 |
| |
| # The CQ result cannot be determined because the CQ label was removed but |
| # the CQ Bot result comment was not posted. This applies to dry run mode |
| # only. |
| MISSING_COMMENT = 6 |
| |
| |
| FAILURE_REASONS = { |
| CQResult.FAILURE: "CQ failed", |
| CQResult.TIMEOUT: "auto-roller timeout", |
| CQResult.ABANDONED: "CL manually abandoned", |
| } |
| |
| |
| @attr.s |
| class _ChangeId: |
| project = attr.ib(type=str) |
| branch = attr.ib(type=str) |
| id = attr.ib(type=str) |
| |
| def __str__(self): |
| return "~".join((self.project, self.branch, self.id)) |
| |
| |
| @attr.s |
| class GerritChange: |
| _api = attr.ib() |
| # Full ID of the change, e.g. "fuchsia~main~Iabcd..." |
| id = attr.ib(type=str) |
| upstream_ref = attr.ib(type=str) |
| dry_run = attr.ib(type=bool) |
| remote = attr.ib(type=str) |
| # project and host are computed from remote. |
| project = attr.ib(type=str, init=False) |
| host = attr.ib(type=str, init=False) |
| cq_result = attr.ib(type=CQResult, default=None) |
| _last_seen_status = attr.ib(type=str, default="") |
| _revision = attr.ib(type=str, default="", init=False) |
| |
| def __attrs_post_init__(self): |
| self.host = self._api.gerrit.host_from_remote_url(self.remote) |
| self.project = self._api.gerrit.project_from_remote_url(self.remote) |
| |
| @property |
| def url(self): |
| """A link to the CL in the Gerrit UI. |
| |
| The URL is actually a link to a search query for the change ID, but if |
| there's only one change returned by the query then Gerrit redirects to |
| that change, and this query will always return one or zero changes |
| because it uses a full unique change ID. |
| """ |
| return "https://%s/q/%s" % (self.host, self.id) |
| |
| @property |
| def last_seen_status(self): |
| """The status most recently returned by self.get_details(). |
| |
| May be out of date with the Gerrit backend. |
| """ |
| return self._last_seen_status |
| |
| @property |
| def revision(self): |
| """The commit hash most recently returned by self.get_details(). |
| |
| May be out of date with the Gerrit backend. |
| """ |
| return self._revision |
| |
| @property |
| def success(self): |
| """Whether the roll was successful. |
| |
| A roll is considered successful if CQ passed, or if CQ was |
| bypassed and (for a non-dry run) the change was force-submitted. |
| """ |
| return self.cq_result in (CQResult.SUCCESS, CQResult.SKIPPED) |
| |
| @property |
| def retrigger_link(self): |
| """Link to retrigger the roll.""" |
| trigger = "{}/{}".format( |
| self._api.buildbucket.build.builder.project, |
| self._api.buildbucket.build.builder.builder, |
| ) |
| |
| for tag in self._api.buildbucket.build.tags: |
| if tag.key == "scheduler_job_id": |
| trigger = tag.value |
| break |
| |
| return "https://luci-scheduler.appspot.com/jobs/{}".format(trigger) |
| |
| def raise_if_failed(self): |
| """Raise an informative StepFailure if CQ failed.""" |
| if not self.success: |
| msg = ( |
| "Failed to roll changes: {reason}.\n\n" |
| "[{link_name}]({url})\n\n" |
| "[Retrigger]({retrigger})" |
| ).format( |
| reason=FAILURE_REASONS[self.cq_result], |
| link_name=GERRIT_LINK_NAME, |
| url=self.url, |
| retrigger=self.retrigger_link, |
| ) |
| raise recipe_api.StepFailure(msg) |
| |
| def get_details(self, step_name): |
| """Queries the change-details endpoint for this change. |
| |
| Returns: |
| A dict corresponding to the Gerrit API response. |
| """ |
| step = self._api.gerrit.change_details( |
| step_name, |
| self.id, |
| host=self.host, |
| query_params=["CURRENT_REVISION"], |
| max_attempts=3, |
| ) |
| details = step.json.output |
| self._last_seen_status = details["status"] |
| self._revision = details["current_revision"] |
| return details |
| |
| |
| class AutoRollerApi(recipe_api.RecipeApi): |
| """API for writing auto-roller recipes.""" |
| |
| Options = Options |
| |
| CQResult = CQResult |
| |
| def __init__(self, poll_interval_secs, poll_timeout_secs, *args, **kwargs): |
| # poll_interval_secs and poll_timeout_secs are input properties which come |
| # from __init__.PROPERTIES in this directory. |
| super().__init__(*args, **kwargs) |
| self._poll_interval_secs = poll_interval_secs |
| self._poll_timeout_secs = poll_timeout_secs |
| |
| def initialize(self): |
| if not self._poll_timeout_secs: |
| # If execution_timeout is set, set the poll timeout to 10 minutes |
| # before the build times out. If there's no execution_timeout or |
| # it's less than 10 minutes, set the poll timeout to the prior |
| # default, 50 minutes. |
| delta = 10 * 60 |
| if self.m.buildbucket.build.execution_timeout.seconds > delta: |
| self._poll_timeout_secs = ( |
| self.m.buildbucket.build.execution_timeout.seconds - delta |
| ) |
| else: |
| self._poll_timeout_secs = 50 * 60 |
| assert self._poll_timeout_secs > 0 |
| |
| def attempt_roll( |
| self, |
| opts, |
| repo_dir, |
| commit_message, |
| author_override=None, |
| raise_on_failure=True, |
| ): |
| """Commit and attempt to submit local edits to a git repo. |
| |
| The high-level steps it performs are as follows (some of these steps |
| will differ slightly in practice depending on argument values, but |
| this is what most rollers do): |
| |
| 1. Calculate a Gerrit change ID using the local change diff, along |
| with several other fields, as the input to a hash function. |
| 2. Query Gerrit to see if there's an existing change with the same |
| change ID (and thus the same diff). |
| - If there is no duplicate change, commit the local changes and |
| push them to Gerrit to create a new change, setting the |
| appropriate labels (CQ+1 in dry-run mode, or CQ+2 and CR+2 in |
| production mode) using Gerrit push options to avoid making |
| separate API request to set the labels. |
| - If there is a change with the same diff: |
| - If we're running in production mode, the change probably |
| failed a previous CQ attempt and got abandoned. Restore |
| the change if it's abandoned and then set the |
| Commit-Queue+2 label using the Gerrit CLI. |
| - If we're running in dry-run mode, assume that we've |
| already done a dry run on the CL, and exit the build |
| early. If the previous dry run succeeded, the build will |
| turn green, otherwise it will turn red. This is to avoid |
| needlessly running CQ many times on a broken dry run CL. |
| 3. Wait for CQ to complete. |
| - If CQ passes: |
| - If we're running in production mode, then CQ will have |
| submitted the CL after CQ passed. Exit and turn the build |
| green. |
| - If we're running in dry-run mode, abandon the CL now that |
| we're done with it. Then exit and turn the build green. |
| - If CQ fails, abandon the CL and exit, turning the build red. |
| - If the change is manually abandoned while CQ is running, exit |
| and turn the build red. |
| |
| Args: |
| opts (Options): Configuration proto; see options.proto for |
| schema. |
| repo_dir (Path): The path to the directory containing a local |
| copy of the git repo with unstaged changes that will be |
| committed and rolled. |
| commit_message (str): The commit message for the roll. Note that |
| this method will automatically append a Gerrit Change ID and |
| several other footers to the change. Also, it may be a |
| multiline string (embedded newlines are allowed). |
| author_override (dict): Dict representation of a git.Commit_User |
| containing name and email keys. If specified, override the |
| author of the commit. |
| raise_on_failure (bool): Whether to raise an exception if the |
| roll fails. |
| |
| Returns: |
| A GerritChange corresponding to the roll CL created, or None if |
| there were no changes to roll. |
| """ |
| # Make a copy of `opts` so we can modify it without affecting the |
| # caller. |
| new_opts = Options() |
| new_opts.CopyFrom(opts) |
| opts = new_opts |
| |
| opts.upstream_ref = opts.upstream_ref or UPSTREAM_REF |
| |
| # Check to see if there are actually any changes in repo_dir before |
| # continuing. |
| if not self._repo_has_uncommitted_files(repo_dir, opts.commit_untracked): |
| self.m.step.empty("no changes to roll") |
| return None |
| |
| assert not ( |
| opts.force_submit and opts.no_tryjobs |
| ), "`force_submit` and `no_tryjobs` are mutually exclusive" |
| |
| labels = {} |
| if not opts.dry_run: |
| if opts.bot_commit: |
| labels["Bot-Commit"] = 1 |
| if not opts.force_submit: |
| labels["Commit-Queue"] = 1 if opts.dry_run else 2 |
| |
| if opts.labels_to_set: |
| labels.update(opts.labels_to_set) |
| labels = {k: v for k, v in labels.items() if v} |
| |
| # Create the change both locally and remotely and push, applying the |
| # specified labels (which will trigger CQ if the Commit-Queue label is |
| # set). |
| change = self._create_gerrit_change( |
| opts, |
| repo_dir=repo_dir, |
| commit_message=commit_message, |
| author_override=author_override, |
| labels=labels, |
| ) |
| |
| # If not setting Commit-Queue on upload we shouldn't try to clear it |
| # in _ensure_abandoned(). By default we set Commit-Queue, so if it's |
| # not explicitly mentioned assume we're setting it. |
| clear_commit_queue = opts.labels_to_set.get("Commit-Queue", True) |
| |
| if opts.force_submit: |
| change.cq_result = CQResult.SKIPPED |
| if opts.dry_run: |
| self._ensure_abandoned( |
| change, |
| reason="would force-submit", |
| notify_option=opts.cl_notify_option, |
| clear_commit_queue=clear_commit_queue, |
| ) |
| else: |
| self._force_submit(change) |
| return change |
| |
| # If we already have a CQ result for this change (e.g. because we found |
| # a previous dry run attempt) then don't bother rerunning CQ. |
| if not change.cq_result or change.cq_result == CQResult.MISSING_COMMENT: |
| change.cq_result = self._wait_for_cq( |
| change, opts.dry_run, opts.labels_to_set or {}, opts.labels_to_wait_on |
| ) |
| |
| try: |
| if not change.success and opts.cc_on_failure_emails: |
| self.m.gerrit.set_review( |
| "cc", change.id, ccs=opts.cc_on_failure_emails, host=change.host |
| ) |
| |
| finally: |
| # Interpret the result and finish. |
| if opts.dry_run: |
| # Only abandon the roll on success if it was a dry-run. |
| self._ensure_abandoned( |
| change, |
| reason="dry run %s" % ("passed" if change.success else "failed"), |
| notify_option=opts.cl_notify_option, |
| clear_commit_queue=clear_commit_queue, |
| ) |
| elif not change.success: |
| self._ensure_abandoned( |
| change, |
| notify_option=opts.cl_notify_option, |
| clear_commit_queue=clear_commit_queue, |
| retrigger_link=True, |
| ) |
| elif change.last_seen_status and change.last_seen_status != "MERGED": |
| # For Gerrit hosts that do not use CQ, the roller itself must |
| # handle submission. |
| self._force_submit(change) |
| |
| if raise_on_failure: |
| change.raise_if_failed() |
| |
| return change |
| |
| def raw_result(self, change=None, success_text=None): |
| """Return a RawResult object containing a Gerrit link. |
| |
| If the change failed, will raise a user-friendly exception with a link |
| to the roll CL. |
| |
| This is meant to be returned from RunSteps(). If done so, it results in |
| the Gerrit link being included in the run summary at the top of the |
| build page in MILO and in the "Summary" column of the builder page. |
| |
| If `change` is None, we assume that no roll was attempted because |
| everything was already up to date. |
| |
| Args: |
| change (GerritChange or None): The change created for the roll |
| attempt, or None if no change was necessary. |
| success_text (str): Custom text to include in the summary along |
| with the Gerrit link in case of a successful roll, rather |
| than the default text. |
| """ |
| if not change: |
| return self.nothing_to_roll() |
| |
| # Possibly redundant, but doesn't hurt anything. |
| change.raise_if_failed() |
| |
| if not success_text: |
| success_text = "%s succeeded" % ("Dry run" if change.dry_run else "Roll") |
| |
| return result.RawResult( |
| summary_markdown="{}. [{}]({})".format( |
| success_text, GERRIT_LINK_NAME, change.url |
| ), |
| status=common.SUCCESS, |
| ) |
| |
| def nothing_to_roll(self): |
| """Returns a RawResult suitable for returning from RunSteps. |
| |
| Roller recipes should return this value when exiting early due to all |
| dependencies already being up to date. |
| """ |
| return result.RawResult( |
| summary_markdown="Nothing to roll.", |
| status=common.SUCCESS, |
| ) |
| |
| def generate_package_roll_message( |
| self, |
| packages, |
| version, |
| old_version=None, |
| multiply="", |
| divider="", |
| dry_run=False, |
| ): |
| """Generate a commit message for a set of rolling CIPD packages.""" |
| roller_string = self.m.buildbucket.builder_name.replace("-roller", "").replace( |
| "-dryrun", "" |
| ) |
| |
| package_line = " ".join( |
| ["{package}", "old:{old}" if old_version else "", "new:{new}"] |
| ) |
| packages_with_urls = [] |
| for package in sorted(packages): |
| format_kwargs = { |
| "new": CIPD_URL.format(package=package, version=version), |
| "package": package, |
| } |
| if old_version: |
| format_kwargs["old"] = CIPD_URL.format( |
| package=package, version=old_version |
| ) |
| packages_with_urls.append(package_line.format(**format_kwargs)) |
| |
| if dry_run: |
| message_title = COMMIT_MESSAGE_TITLE.format( |
| prepend=COMMIT_MESSAGE_DO_NOT_SUBMIT, |
| type="dryrun", |
| type_descr="Dry run", |
| roller=roller_string, |
| version=version, |
| ) |
| else: |
| message_title = COMMIT_MESSAGE_TITLE.format( |
| prepend="", |
| type="roll", |
| type_descr="Roll", |
| roller=roller_string, |
| version=version, |
| ) |
| |
| message_body = COMMIT_MESSAGE.format( |
| roller=roller_string, |
| packages="\n".join(packages_with_urls), |
| old_version=old_version, |
| version=version, |
| builder=self.m.buildbucket.builder_name, |
| build_id=self.m.buildbucket_util.id, |
| multiply=multiply, |
| divider=divider, |
| ) |
| |
| return "".join([message_title, message_body]) |
| |
| def _repo_has_uncommitted_files(self, repo_dir, check_untracked): |
| """Checks whether the git repository at repo_dir has any changes. |
| |
| Args: |
| repo_dir (Path): Path to the git repository. |
| check_untracked (bool): Whether to include untracked files in the check. |
| |
| Returns: |
| True if there are, and False if not. |
| """ |
| with self.m.context(cwd=repo_dir): |
| step_result = self.m.git.ls_files( |
| step_name="check for no-op commit", |
| modified=True, |
| deleted=True, |
| exclude_standard=True, |
| others=check_untracked, |
| ) |
| step_result.presentation.logs["stdout"] = step_result.stdout.split("\n") |
| return bool(step_result.stdout.strip()) |
| |
| def _create_gerrit_change( |
| self, opts, repo_dir, commit_message, author_override, labels |
| ): |
| """Creates a Gerrit change containing modified files under repo_dir. |
| |
| Returns: |
| A GerritChange corresponding to the created CL. |
| """ |
| with self.m.context(cwd=repo_dir, infra_steps=True): |
| if opts.commit_untracked: |
| add_kwargs = dict(add_all=True) |
| else: |
| add_kwargs = dict(only_tracked=True) |
| self.m.git.add(**add_kwargs) |
| |
| change_id, dupe_details = self._find_usable_change_id(opts) |
| |
| if dupe_details: |
| if opts.dry_run: |
| # If we've already done a CQ dry run with these changes, |
| # it's likely a waste of resources to do additional dry |
| # runs. |
| self.m.step.empty("no new changes to dry-run") |
| cq_result = None |
| if not opts.force_submit: |
| cq_result = self._cq_dry_run_result(dupe_details) |
| return GerritChange( |
| api=self.m, |
| remote=opts.remote, |
| upstream_ref=opts.upstream_ref, |
| id=change_id, |
| dry_run=opts.dry_run, |
| cq_result=cq_result, |
| last_seen_status=dupe_details["status"], |
| ) |
| |
| # TODO(olivernewman): There's a bug in Gerrit where a new |
| # patchset isn't considered trivial if it includes a rebase and |
| # a commit message modification, so CQ won't reuse tryjobs from |
| # older patchsets. We can get around this by splitting up the |
| # operations into separate patchsets, which will both be |
| # considered trivial: first, commit and push the changes with |
| # the same commit message as the previous patchset. Then, amend |
| # the commit to use the new commit message, and re-push. |
| # |
| # TODO(olivernewman): Remove this logic and just do a single |
| # commit+push once http://b/171317704 is resolved. |
| current_revision = dupe_details["current_revision"] |
| old_commit_message = dupe_details["revisions"][current_revision][ |
| "commit" |
| ]["message"] |
| self.m.git.commit( |
| message=old_commit_message, |
| author_override=author_override, |
| ) |
| self._push(opts) |
| |
| extra_footers = [ |
| "Roller-URL: %s" % self.m.buildbucket_util.build_url, |
| ] |
| if opts.roller_owners: |
| # Purely for informational purposes; it's useful for build |
| # gardeners to have an easily identifiable point of contact for |
| # rollers when debugging a breakage caused by a roll. |
| extra_footers.append( |
| "Roller-Owners: %s" % ", ".join(opts.roller_owners) |
| ) |
| if opts.add_gitwatcher_ignore: |
| extra_footers.append("GitWatcher: ignore") |
| extra_footers.extend( |
| [ |
| "CQ-Do-Not-Cancel-Tryjobs: true", |
| # Convert from a full change ID back to a partial change ID. |
| "Change-Id: %s" % change_id.id, |
| ] |
| ) |
| if opts.no_tryjobs: |
| extra_footers.append("No-Try: true") |
| if opts.include_tryjobs: |
| extra_footers.append( |
| "Cq-Include-Trybots: " |
| + ";".join( |
| [ |
| "%s:%s" % (bucket, ",".join(builders)) |
| for bucket, builders in opts.include_tryjobs.items() |
| ] |
| ) |
| ) |
| if len(commit_message.splitlines()) == 1: |
| # Make sure we have a blank line after the commit message title. |
| commit_message += "\n" |
| commit_message += "".join("\n" + footer for footer in extra_footers) |
| |
| self.m.git.commit( |
| message=commit_message, |
| # If we're updating an existing CL, use this commit step to |
| # amend the old commit message. |
| # TODO(olivernewman): Stop amending once http://b/171317704 is |
| # resolved. |
| amend=bool(dupe_details), |
| author_override=author_override, |
| all_tracked=True, |
| ) |
| |
| push_step, upstream_ref = self._push(opts, labels) |
| # Update the branch in case the push got redirected. |
| change_id.branch = upstream_ref |
| |
| change = GerritChange( |
| api=self.m, |
| remote=opts.remote, |
| upstream_ref=upstream_ref, |
| id=change_id, |
| dry_run=opts.dry_run, |
| ) |
| push_step.presentation.links[GERRIT_LINK_NAME] = change.url |
| return change |
| |
| def _find_usable_change_id(self, opts): |
| """Finds a Change-Id for the CL that Gerrit will allow. |
| |
| The Change-Id must *NOT* be the same as the Change-Id of an existing |
| already-merged CL. |
| |
| Given that restriction, we try to make it as likely as possible that |
| we'll find and re-CQ+2 an unmerged change with the same diff, if one |
| exists. |
| |
| Returns: |
| The calculated Change-Id, along with the change details of an |
| identical change (or None if there is no identical change). |
| """ |
| gerrit_host = self.m.gerrit.host_from_remote_url(opts.remote) |
| gerrit_project = self.m.gerrit.project_from_remote_url(opts.remote) |
| |
| possible_extra_hash_inputs = [ |
| # If there's an already-merged CL with the same diff, we'll fall |
| # back to including the base revision in the Change-Id hash inputs. |
| # Barring a race condition with an identical roller, it should be |
| # impossible for this CL to have the same diff *and* the same base |
| # revision as an already-merged CL. |
| self.m.git.rev_parse("HEAD"), |
| # If all else fails, we'll give up on trying to find and restore an |
| # identical change and just try to get a unique Change Id. |
| str(self.m.random.random()), |
| ] |
| for i in range(len(possible_extra_hash_inputs) + 1): |
| with self.m.step.nest("calculate Change-Id") as presentation: |
| partial_change_id = self._calculate_change_id( |
| opts.create_unique_change_id, |
| opts.dry_run, |
| extra_hash_inputs=possible_extra_hash_inputs[:i], |
| ) |
| presentation.step_text = partial_change_id |
| change_id = self._full_change_id( |
| gerrit_project, opts.upstream_ref, partial_change_id |
| ) |
| |
| # Check to see if there's an existing identical roll CL that the roller |
| # abandoned (probably because it failed in CQ). |
| change_query_step = self.m.gerrit.change_query( |
| "check for identical roll", |
| "change:%s" % change_id, |
| query_params=["CURRENT_COMMIT", "CURRENT_REVISION", "MESSAGES"], |
| host=gerrit_host, |
| # The Gerrit API returns null if no changes are found. |
| test_data=self.m.json.test_api.output(None), |
| ) |
| identical_changes = change_query_step.json.output |
| if not identical_changes: |
| return change_id, None |
| |
| def _change_url(details): |
| return "https://%s/%s" % (gerrit_host, details["_number"]) |
| |
| for c in identical_changes: |
| link_name = "CL %d (%s)" % (c["_number"], c["status"].lower()) |
| change_query_step.presentation.links[link_name] = _change_url(c) |
| |
| # There's something weird going on if there's more than one |
| # identical change (Gerrit is supposed to make this |
| # impossible) so we should just exit. |
| assert len(identical_changes) == 1, "%d != 1" % len(identical_changes) |
| change_details = identical_changes[0] |
| |
| if change_details["status"] == "MERGED": |
| # If there's a merged change with the same change ID, we'll |
| # recalculate the change ID using more specific hash inputs. |
| continue |
| elif change_details["status"] == "ABANDONED" and not opts.dry_run: |
| # Don't bother restoring abandoned changes in dry-run mode, |
| # since we'll just reuse the result of the previous dry run |
| # instead of retrying CQ. |
| restore_step = self.m.gerrit.restore_change( |
| "restore CL from previous roll attempt", |
| change_id, |
| host=gerrit_host, |
| message="Retrying CQ.", |
| ) |
| restore_step.presentation.step_text = ( |
| "\nfound an identical roll CL that was abandoned." |
| ) |
| else: |
| # Either we're in dry-run mode, or the previous roll attempt's |
| # CL is open, rather than abandoned. If the change is open, |
| # it's likely that either: |
| # - Someone restored a previously abandoned auto-roll CL |
| # expecting the roller to pick it back up. |
| # - OR the roller failed to clean up properly on its last |
| # attempt. |
| # In either case we should re-CQ the CL. |
| self.m.step.empty("reusing CL from previous roll attempt") |
| |
| return change_id, change_details |
| |
| raise self.m.step.InfraFailure( |
| "failed to calculate a unique Change-Id for this CL" |
| ) # pragma: no cover |
| |
| def _calculate_change_id(self, create_unique_id, dry_run, extra_hash_inputs=()): |
| """Compute a Gerrit change ID for the roll commit. |
| |
| This function assumes that it's run in the directory corresponding to |
| the git repo that will contain the roll commit. |
| |
| We generate our own change ID because: |
| - We need to know the change ID post-push in order to perform operations |
| on the change via the Gerrit API. Since we generate the change ID |
| ourselves, there's no need to scan Gerrit's `git push` output for the |
| change URL, or to query the Gerrit API for the commit hash (such |
| queries often fail due to Gerrit backend replication delays). |
| - Deterministically generating a change ID using the git diff lets us |
| detect when an identical roll has already been attempted, because |
| we'll calculate the same change ID as an existing CL. |
| """ |
| mock_diff = textwrap.dedent( |
| """\ |
| diff --git a/foo.txt b/foo.txt |
| index 63bcead3a0..b0dff288cd 100644 |
| --- a/foo.txt |
| +++ b/foo.txt |
| @@ -16 +16 @@ |
| - foo = 5 |
| + foo = 6 |
| diff --git a/bar.txt b/bar.txt |
| index 9410318a54..5a8c587f75 100644 |
| --- a/bar.txt |
| +++ b/bar.txt |
| @@ -5 +5 @@ |
| - bar = 0 |
| + bar = 1 |
| """ |
| ) |
| |
| diff_step = self.m.git.diff( |
| ref_base=None, |
| # Include no context around the changed lines to make it less likely |
| # that two identical patches will have different diff output |
| # depending on whether a recent change touching nearby lines is |
| # checked out locally. |
| unified=0, |
| # Assumes we've already staged the changes we're going to commit. |
| cached=True, |
| step_test_data=lambda: self.m.raw_io.test_api.stream_output_text(mock_diff), |
| ) |
| |
| # git diff output includes a header line for each modified file of the |
| # form: "index <oldhash>..<newhash> <mode>". These hashes might vary |
| # depending on how much history we have checked out locally, making it |
| # less likely that otherwise identical patches will have the same diff |
| # output. So strip out these "index" lines in the diff that we use to |
| # calculate a Change-Id. |
| diff_lines = [ |
| l for l in diff_step.stdout.split("\n") if not l.startswith("index ") |
| ] |
| diff_step.presentation.logs["diff (without hashes)"] = diff_lines |
| |
| # Use the hash of the diff so that this CL will collide with any older |
| # identical CLs. Also include: |
| # - The builder name, to ensure that two builders don't try to operate |
| # on the same roll CLs. |
| # - Whether we're running in dry-run mode, because commit messages |
| # intentionally differ between dry-run and non-dry-run rolls. |
| # - Whether this is a led build. If it is a led build it probably means |
| # a developer is trying to test changes to a recipe, so we don't want |
| # CLs created by the led job to collide with CLs created by a |
| # production roller build. |
| hash_inputs = [ |
| "\n".join(diff_lines), |
| str(self.m.buildbucket.builder_name), |
| ("dry-run" if dry_run else ""), |
| # TODO(crbug.com/1129577): Use the name of the user who launched the |
| # led job instead of a boolean led/no-led so that multiple people |
| # can test the same recipe with led without stepping on each other's |
| # toes. |
| ("led" if self.m.led.launched_by_led else ""), |
| ] |
| if create_unique_id: |
| hash_inputs.append(self.m.buildbucket_util.id) |
| hash_inputs.extend(extra_hash_inputs) |
| hash_step = self.m.git.hash_object( |
| # Separate hash inputs with a special string just to be extra |
| # unambiguous about which part of the string corresponds to a |
| # separate parameter. |
| "####".join(hash_inputs), |
| ) |
| |
| return "I%s" % hash_step.stdout.strip() |
| |
| def _full_change_id(self, project, upstream_ref, change_id): |
| """Returns the full (unique) change ID for a change. |
| |
| Each Gerrit host only enforces change ID uniqueness per (repo, upstream_ref) |
| combination, so a full change ID is required to uniquely identify a change. |
| """ |
| return _ChangeId(project, upstream_ref, change_id) |
| |
| def _push(self, opts, labels=None): |
| labels = labels or {} |
| # Apply labels at push time rather than using `api.gerrit.set_review()` |
| # to apply them post-push in order to avoid extra Gerrit API requests as |
| # well as a possible race condition when applying labels to a very |
| # recently pushed change. See |
| # https://gerrit-review.googlesource.com/Documentation/user-upload.html#review_labels. |
| gerrit_options = [] |
| for label, score in sorted(labels.items()): |
| if score != 0: |
| # Label push options look like "l=Commit-Queue+2". |
| gerrit_options.append("l=%s%+d" % (label, score)) |
| if opts.cl_notify_option: |
| gerrit_options.append("notify=%s" % opts.cl_notify_option) |
| if opts.cc_emails: |
| gerrit_options.extend("cc=%s" % c for c in opts.cc_emails) |
| |
| push_ref = "HEAD:refs/for/%s" % opts.upstream_ref |
| if gerrit_options: |
| push_ref += "%" + ",".join(gerrit_options) |
| |
| def do_push(): |
| # Gerrit sometimes rejects pushes with spurious "found private key" |
| # errors, so we disable key checking. |
| step = self.m.git.push( |
| push_ref, |
| options=["nokeycheck"], |
| timeout=datetime.timedelta(minutes=3), |
| stdout=self.m.raw_io.output_text(), |
| stderr=self.m.raw_io.output_text(), |
| ok_ret="any", |
| ) |
| if step.retcode: |
| step.presentation.logs["stderr"] = step.stderr |
| # If push fails because the remote change has already been |
| # created ("no new changes"), that likely means that a previous |
| # failed push attempt actually succeeded on the server side but |
| # the success wasn't propagated to the client. So we should |
| # consider that to be a success. |
| if "(no new changes)" not in step.stderr: |
| step.presentation.status = self.m.step.INFRA_FAILURE |
| self.m.step.raise_on_failure(step) |
| |
| match = re.search( |
| r"Update redirected to refs/for/refs/heads/([-\w_]+)(%|\.)", |
| step.stdout, |
| ) |
| ref = opts.upstream_ref |
| if match: |
| ref = match.group(1) |
| step.presentation.logs["redirect"] = ref |
| step.presentation.logs["stdout"] = step.stdout |
| return step, ref |
| |
| # Retry in case Gerrit backend replication delays cause the backend to |
| # think that a recently restored CL is still closed. |
| return self.m.utils.retry(do_push, max_attempts=5, sleep=5, backoff_factor=2) |
| |
| def _is_cq_complete( |
| self, iteration, change, dry_run, labels_to_set, labels_to_wait_on |
| ): |
| with self.m.context(infra_steps=True): |
| details = change.get_details("check if done (%d)" % iteration) |
| |
| # If the CQ label is un-set, then that means either: |
| # * CQ failed (production mode), or |
| # * CQ finished (dry-run mode). |
| # |
| # 'recommended' and 'approved' are objects that appear for a label if |
| # somebody gave the label a positive vote (maximum vote (+2) for approved, |
| # non-maximum (+1) for 'recommended') and contains the information of one |
| # reviewer who gave this vote. There are 4 different states for a label in |
| # this sense: 'rejected', 'approved', 'disliked', and 'recommended'. For a |
| # given label, only one of these will be shown if the label has any votes |
| # in priority order 'rejected' > 'approved' > 'disliked' > 'recommended'. |
| # Unfortunately, this is the absolute simplest way to check this. Gerrit |
| # provides an 'all' field that contains every vote, but iterating over |
| # every vote, or operating under the assumption that there's at least one |
| # causes more error cases. |
| # |
| # Read more at: |
| # https://gerrit-review.googlesource.com/Documentation/rest-self.m-changes.html#get-change-detail |
| |
| if details["status"] == "ABANDONED": |
| return CQResult.ABANDONED |
| |
| # If it merged, we're done! (Even if this is a dry run.) |
| if details["status"] == "MERGED": |
| return CQResult.SUCCESS |
| |
| # Wait on all extra non-CQ labels before checking CQ. |
| for label in labels_to_wait_on or (): |
| with self.m.step.nest("check %s label (%d)" % (label, iteration)) as pres: |
| pres.step_summary_text = "not set" |
| if label not in details["labels"]: |
| pres.step_summary_text = "no such label" |
| return CQResult.FAILURE |
| for status in LABEL_STATUSES: |
| if status in details["labels"][label]: |
| pres.step_summary_text = status |
| if "rejected" in details["labels"][label]: |
| return CQResult.FAILURE |
| if "approved" not in details["labels"][label]: |
| return None |
| |
| # If there's no "Commit-Queue" label don't attempt to wait on CQ to |
| # complete. (Presumably there's a non-CQ presubmit system that was |
| # handled through labels_to_wait_on above, and if we got here that |
| # non-CQ presubmit system has passed.) |
| if "Commit-Queue" not in details["labels"]: |
| return CQResult.SUCCESS |
| |
| # If the properties say to set Commit-Queue to 1 regardless of whether |
| # this is a dry run then we're likely mixing CQ with other presubmits, |
| # and we want to submit after all presubmits run. In this case use the |
| # dry run processing even if not in a dry run. |
| if dry_run or labels_to_set.get("Commit-Queue") == 1: |
| dry_run_result = self._cq_dry_run_result(details) |
| if dry_run_result: |
| return dry_run_result |
| else: |
| # If CQ drops the CQ+2 label at any point (i.e. 'approved' state), then |
| # that always means CQ has failed. CQ will always remove the CQ+2 label |
| # when it fails, and it will never remove it on success. |
| # |
| # Note: Because CQ won't unset the the CQ+2 label when it merges, there's |
| # no chance that we might see that the CL hasn't merged with the CQ+2 |
| # label unset on a successful CQ. |
| if "approved" not in details["labels"]["Commit-Queue"]: |
| return CQResult.FAILURE |
| |
| # CQ is still running. |
| return None |
| |
| def _cq_dry_run_result(self, change_details): |
| # If CQ drops the CQ+1 label (i.e. 'recommended' state), then that means |
| # CQ finished trying. CQ will always remove the CQ+1 label when it's |
| # finished, regardless of success or failure. |
| labels = change_details["labels"] or {} |
| if "recommended" in labels.get("Commit-Queue", {}): |
| return None |
| |
| # Unfortunately the only way to determine whether the dry run has passed |
| # or failed is to check whether CQ's most recent dry run comment has a |
| # specific string. |
| messages = change_details["messages"] or [] |
| for message in reversed(messages): |
| real_author = message.get("real_author", {}) |
| if not real_author.get("email", "").endswith(CQ_SERVICE_ACCOUNT_DOMAIN): |
| continue |
| for msg in PASSED_DRY_RUN_MESSAGES: |
| if msg in message["message"]: |
| return CQResult.SUCCESS |
| for msg in FAILED_DRY_RUN_MESSAGES: |
| if msg in message["message"]: |
| return CQResult.FAILURE |
| # If we get here then this message is likely a "CQ is trying the |
| # patch" comment, which indicates that CQ has not yet posted the |
| # dry run result. |
| break |
| |
| # If we're here, the CQ label was removed but CQ Bot didn't comment the |
| # status yet. |
| return CQResult.MISSING_COMMENT |
| |
| def _wait_for_cq(self, change_id, dry_run, labels_to_set, labels_to_wait_on): |
| """Polls gerrit to see if CQ was successful. |
| |
| Returns a CQResult representing the status of CQ. |
| """ |
| # Wait 30 seconds before polling for the first time to guard against Gerrit |
| # minor replication delays. |
| wait_time = 30 |
| |
| # If waiting on additional labels it's possible they've been set to +1 |
| # or -1 based on a previous run and reinvoking won't immediately clear |
| # them. In that case we need to give the non-CQ presubmits time to clear |
| # existing votes when starting jobs. |
| if labels_to_wait_on: |
| wait_time = 5 * 60 |
| |
| self.m.time.sleep(wait_time) |
| |
| with self.m.step.nest("check for completion"): |
| num_iterations = int(self._poll_timeout_secs / self._poll_interval_secs) |
| # Include one-time tolerance for the indeterminate CQ result where |
| # we are waiting for the CQ result comment to be posted. |
| wait_for_comment = True |
| for i in range(num_iterations + 1): |
| # Check the status of the CL. |
| status = self._is_cq_complete( |
| i, change_id, dry_run, labels_to_set, labels_to_wait_on |
| ) |
| if status == CQResult.MISSING_COMMENT: |
| # Unset the flag so that we no longer tolerate a missing CQ |
| # result comment. |
| if wait_for_comment: |
| wait_for_comment = False |
| else: |
| raise self.m.step.InfraFailure( |
| "CQ removed the CQ+1 vote but didn't post a message with the dry run result" |
| ) |
| elif status: |
| return status |
| |
| # If none of the terminal conditions above were reached (that is, there were |
| # no label changes from what we initially set, and the change has not |
| # merged or abandoned), then we should wait for |poll_interval_secs| before |
| # trying again. Don't sleep after the final check. |
| |
| if i < num_iterations: |
| self.m.time.sleep(self._poll_interval_secs) |
| |
| return CQResult.TIMEOUT |
| |
| def _ensure_abandoned( |
| self, |
| change, |
| reason="", |
| notify_option=None, |
| clear_commit_queue=True, |
| retrigger_link=False, |
| ): |
| if not reason: |
| reason = FAILURE_REASONS[change.cq_result] |
| |
| if change.cq_result == CQResult.TIMEOUT and clear_commit_queue: |
| # CQ automatically removes the CQ label in case of a tryjob |
| # failure, but if CQ has not yet completed then we'll need to |
| # remove the label ourselves so that CQ doesn't submit the CL right |
| # away if and when it's restored by a subsequent roll attempt. |
| self.m.gerrit.set_review( |
| "remove Commit-Queue label", |
| change.id, |
| labels={"Commit-Queue": 0}, |
| host=change.host, |
| ) |
| |
| if change.last_seen_status == "ABANDONED": |
| step = self.m.step.empty(reason) |
| else: |
| # Capitalize the first letter and add a period for stylistic |
| # consistency with comments added by CQ and by Gerrit itself. |
| message = reason[0].upper() + reason[1:].rstrip(".") + "." |
| if retrigger_link: |
| message += " To retry this roll go to {}.".format(change.retrigger_link) |
| |
| step = self.m.gerrit.abandon( |
| "abandon roll", |
| change.id, |
| message=message, |
| host=change.host, |
| notify=notify_option, |
| ) |
| step.presentation.step_text = reason |
| |
| step.presentation.links[GERRIT_LINK_NAME] = change.url |
| if not change.success: |
| step.presentation.status = self.m.step.FAILURE |
| |
| def _force_submit(self, change): |
| def submit_func(): |
| self.m.gerrit.submit("force-submit change", change.id, host=change.host) |
| |
| # Retry submission in case Gerrit backend replication delays cause 404s. |
| self.m.utils.retry(submit_func, max_attempts=5) |