| #!/usr/bin/env python3 |
| # |
| # Copyright 2021 The Fuchsia Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| import argparse |
| import hashlib |
| import json |
| import os |
| import re |
| import subprocess |
| import sys |
| import tempfile |
| import time |
| import traceback |
| |
| from pathlib import Path |
| from typing import Any, Dict, List, Optional, Set |
| |
| DEFAULT_REPO = "https://fuchsia-review.googlesource.com/fuchsia" |
| |
| USAGE_DESCRIPTION = r""" |
| Split a single CL into multiple CLs and upload each of those CLs to Gerrit. |
| This command should be run from the directory containing your Git checkout. |
| |
| For example, if you've created a single large CL with Gerrit CL number 1234 |
| and the most recent patchset number is 4, you can split that into multiple |
| CLs with the following command: |
| |
| split_cl.py --cl=12345 --patch=4 |
| |
| This script will: |
| |
| 1. Download the specified patchset to your local Git checkout. |
| |
| 2. Determine the set of files changed by that patchset, then open your |
| $EDITOR with a configuration file that allows you to specify how the |
| patchset should be split into multiple CLs. Comments at the top of |
| the configuration file explain how this is done. When you are satisfied, |
| save the file and close your $EDITOR. |
| |
| 3. Create a local Git branch for each CL specified in the prior step. |
| |
| 4. Upload each Git branch to gerrit. The uploaded CLs will use the same |
| commit message as the original CL, except the first line will be |
| prefixed by one or more tags (such as "[fidl]") which are automatically |
| determined based on the set of changed files. All CLs will be tagged |
| with the same gerrit topic. |
| |
| 5. Finally, after all CLs are uploaded, the script will checkout JIRI_HEAD. |
| """ |
| |
| |
| def parse_args() -> Any: |
| parser = argparse.ArgumentParser( |
| description=USAGE_DESCRIPTION, |
| formatter_class=argparse.RawDescriptionHelpFormatter, |
| ) |
| parser.add_argument( |
| "--dry_run", |
| dest="dry_run", |
| action="store_true", |
| help="If true, print commands that would run but don't execute any of them.", |
| required=False, |
| ) |
| parser.add_argument( |
| "--cl", |
| dest="cl", |
| metavar="CL", |
| type=int, |
| help="Gerrit CL to split. Must be a Gerrit CL number.", |
| required=True, |
| ) |
| parser.add_argument( |
| "--patch", |
| dest="patch", |
| metavar="N", |
| type=int, |
| help="Patch number of the CL to split.", |
| required=True, |
| ) |
| parser.add_argument( |
| "--repo", |
| dest="repo", |
| help=f"Gerrit repo to connect to. Defaults to {DEFAULT_REPO}", |
| default=DEFAULT_REPO, |
| required=False, |
| ) |
| return parser.parse_args() |
| |
| |
| def run_and_get_stdout(cmd: List[str]) -> str: |
| """ |
| Run the given command and return its stdout. |
| """ |
| print(f"Running {cmd}") |
| return subprocess.run(cmd, stdout=subprocess.PIPE, check=True).stdout.decode("utf-8") |
| |
| |
| class Splitter: |
| def __init__(self, dry_run: bool, repo: str, cl: int, patch: int): |
| self.dry_run = dry_run |
| self.repo = repo |
| self.cl = cl |
| self.patch = patch |
| self.branch = self.gerrit_branch() |
| self.topic = f"auto-batch-{cl}" |
| |
| fetch_cmd = f'git fetch {self.repo} {self.branch}'.split(' ') |
| checkout_cmd = 'git checkout FETCH_HEAD'.split(' ') |
| diff_cmd = 'git diff --name-only HEAD~'.split(' ') |
| subject_cmd = 'git log -n 1 --format=format:%s'.split(' ') |
| body_cmd = 'git log -n 1 --format=format:%b'.split(' ') |
| |
| # Checkout the CL so we can learn the files changed and the CL description. |
| print("Loading CL info...") |
| run_and_get_stdout(fetch_cmd) |
| run_and_get_stdout(checkout_cmd) |
| diff_stdout = run_and_get_stdout(diff_cmd) |
| self.paths_changed = [ |
| Path(line.strip()) |
| for line in diff_stdout.split('\n') |
| if line.strip() != "" |
| ] |
| self.subject = run_and_get_stdout(subject_cmd) |
| self.description = run_and_get_stdout(body_cmd) |
| self.description = re.compile('Change-Id:.*').sub('', self.description) |
| print() |
| |
| |
| def gerrit_branch(self) -> str: |
| """ |
| Return the Gerrit branch name for the given CL and patch. See: |
| https://gerrit-review.googlesource.com/Documentation/concept-refs-for-namespace.html |
| """ |
| cl_last_digits = self.cl % 100 |
| return f'refs/changes/{cl_last_digits:02d}/{self.cl}/{self.patch}' |
| |
| |
| def maybe_run(self, cmd: List[str], input: Any = None, must_succeed: bool = True) -> Optional[subprocess.CompletedProcess]: |
| """ |
| If self.dry_run, just print the command, otherwise run it. |
| """ |
| print(f"Running {cmd}") |
| if self.dry_run: |
| return None |
| return subprocess.run(cmd, stdout=subprocess.PIPE, input=input, check=must_succeed) |
| |
| |
| def create_commit(self, tags: List[str], files: List[Path]): |
| tags_str = "".join([f"[{tag}]" for tag in tags]) |
| commit_subject = f"{tags_str} {self.subject}" |
| branch = f"{self.topic}-{'-'.join(tags)}-{get_stable_hash([str(f) for f in files])}" |
| |
| print() |
| print(f"Creating commit: {commit_subject}") |
| print(f"{len(files)} files:") |
| for file in files: |
| print(f" {file}") |
| |
| self.maybe_run(["git", "--no-pager", "checkout", "JIRI_HEAD"]) |
| self.maybe_run(["git", "--no-pager", "branch", "-D", branch], must_succeed=False) |
| self.maybe_run(["git", "--no-pager", "checkout", "-b", branch]) |
| self.maybe_run(["git", "--no-pager", "fetch", self.repo, self.branch]) |
| |
| diff = self.maybe_run(["git", "--no-pager", "show", "FETCH_HEAD", "--"] + |
| [str(f) for f in files]) |
| self.maybe_run(["git", "--no-pager", "apply", "--whitespace=fix", "--3way", "-"], |
| input=(diff.stdout if diff else None)) |
| |
| self.maybe_run(["git", "--no-pager", "commit", "-m", commit_subject, "-m", self.description]) |
| self.maybe_run(["git", "--no-pager", "push", "origin", "HEAD:refs/for/main", "-o", |
| f"topic={self.topic}"]) |
| self.maybe_run(["git", "--no-pager", "checkout", "JIRI_HEAD"]) |
| |
| |
| def get_cl_tags(files: List[Path]) -> List[str]: |
| """ |
| Come up with descriptive tags given a list of files changed. |
| |
| For each path, use the first rule that applies in the following order: |
| |
| 1) Pick the path component right after the last "lib", "bin", "drivers", |
| or "devices". |
| 2) If the path begins with "src", then |
| - if the third path component is "tests" then pick the fourth component |
| - pick the third path component, e.g. src/developer/shell -> shell |
| 3) If the path begins with "zircon", then pick the path component after |
| either "ulib" or "utest", e.g. |
| zircon/system/ulib/fs-pty/test/service-test.cc -> fs-pty |
| 4) If the path begins with "examples" or "tools", then pick the next path |
| component, e.g. examples/fidl/llcpp/async_completer/client/main.cc -> fidl |
| |
| Example: |
| |
| get_cl_tags([ |
| "src/lib/loader_service/loader_service_test.cc", |
| "src/lib/loader_service/loader_service_test_fixture.cc", |
| ]) == ["loader_service"] |
| |
| """ |
| |
| def get_tag(p: Path) -> str: |
| if p.parts[0] == "examples" or p.parts[0] == "tools": |
| return p.parts[1] |
| tag: str = "" |
| for part, next_part in zip(p.parts, p.parts[1:]): |
| if ( |
| part == "lib" |
| or part == "bin" |
| or part == "drivers" |
| or part == "devices" |
| ): |
| if next_part != "tests" and not next_part.endswith(".cc"): |
| tag = next_part |
| if tag != "": |
| return tag |
| if p.parts[0] == "build": |
| return "build" |
| if p.parts[0] == "src": |
| if len(p.parts) >= 3: |
| if p.parts[2] == "tests" and not p.parts[3].endswith(".cc"): |
| return p.parts[3] |
| return p.parts[2] |
| if p.parts[0] == "zircon": |
| for part, next_part in zip(p.parts, p.parts[1:]): |
| if part == "ulib" or part == "utest": |
| return next_part |
| raise RuntimeError(f"Could not infer tags from path {p}") |
| |
| tags: Set[str] = set() |
| for file in files: |
| tags.add(get_tag(file)) |
| return sorted(list(tags)) |
| |
| |
| def get_stable_hash(thing): |
| return hashlib.sha1(json.dumps(thing).encode("utf-8")).digest().hex() |
| |
| |
| def main() -> int: |
| # Parse and validate arguments. |
| args = parse_args() |
| splitter = Splitter(args.dry_run, args.repo, args.cl, args.patch) |
| |
| file_groups: Dict[Path, List[Path]] = {} |
| for file in splitter.paths_changed: |
| if file.parent not in file_groups: |
| file_groups[file.parent] = [] |
| file_groups[file.parent].append(file) |
| |
| print(f"Found {len(file_groups)} folders...") |
| with tempfile.NamedTemporaryFile("w") as tmp: |
| tmp.write( |
| f""" |
| # Consecutive lines will be combined into the same CL. |
| # By default, files are grouped by their immediate parent folder. |
| # When you are satisfied, save this file and close your editor. |
| """.strip() |
| ) |
| tmp.write("\n\n") |
| for stem, files in file_groups.items(): |
| for file in files: |
| tmp.write(f"{file}\n") |
| tmp.write("\n") |
| tmp.flush() |
| editor = os.getenv('EDITOR', 'vim') |
| os.system(f"{editor} {tmp.name}") |
| with open(tmp.name) as tmp_read: |
| lines = [ |
| line.strip() |
| for line in tmp_read.readlines() |
| if not line.startswith("#") |
| ] |
| |
| change_lists: List[List[Path]] = [] |
| current_change: List[Path] = [] |
| for line in lines: |
| if line == "": |
| if len(current_change) > 0: |
| change_lists.append(current_change) |
| current_change = [] |
| else: |
| current_change.append(Path(line)) |
| |
| for change in change_lists: |
| splitter.create_commit(get_cl_tags(change), change) |
| if not splitter.dry_run: |
| # This sleep is intended to reduce the change that gerrit DoS-blocks our |
| # requests. The gerrit server's DoS config is unknown, so this may be |
| # insufficient in some cases. |
| print("Sleeping for 5s to throttle gerrit requests...") |
| time.sleep(5) |
| |
| |
| if __name__ == "__main__": |
| try: |
| sys.exit(main()) |
| except Exception as e: |
| traceback.print_exception(e, tb=None, value=None) |
| print(f"Error: {e}") |
| sys.exit(1) |