blob: b460e1b29ca18885ec66d0b325ad24c347e1a0da [file] [log] [blame]
# utils/update_checkout.py - Utility to update local checkouts --*- python -*-
#
# This source file is part of the Swift.org open source project
#
# Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
# Licensed under Apache License v2.0 with Runtime Library Exception
#
# See https://swift.org/LICENSE.txt for license information
# See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
from __future__ import print_function
import argparse
import json
import os
import re
import sys
import traceback
from functools import reduce
from multiprocessing import freeze_support
from swift_build_support.swift_build_support import shell
from swift_build_support.swift_build_support.SwiftBuildSupport import \
SWIFT_SOURCE_ROOT
SCRIPT_FILE = os.path.abspath(__file__)
SCRIPT_DIR = os.path.dirname(SCRIPT_FILE)
def confirm_tag_in_repo(tag, repo_name):
tag_exists = shell.capture(['git', 'ls-remote', '--tags',
'origin', tag], echo=False)
if not tag_exists:
print("Tag '" + tag + "' does not exist for '" +
repo_name + "', just updating regularly")
tag = None
return tag
def find_rev_by_timestamp(timestamp, repo_name, refspec):
base_args = ["git", "log", "-1", "--format=%H",
'--before=' + timestamp]
# On repos with regular batch-automerges from swift-ci -- namely clang,
# llvm and lldb -- prefer the most-recent change _made by swift-ci_
# before the timestamp, falling back to most-recent in general if there
# is none by swift-ci.
if repo_name in ["llvm", "clang", "lldb"]:
rev = shell.capture(base_args +
['--author', 'swift-ci', refspec]).strip()
if rev:
return rev
rev = shell.capture(base_args + [refspec]).strip()
if rev:
return rev
else:
raise RuntimeError('No rev in %s before timestamp %s' %
(repo_name, timestamp))
def get_branch_for_repo(config, repo_name, scheme_name, scheme_map,
cross_repos_pr):
cross_repo = False
repo_branch = scheme_name
if scheme_map:
scheme_branch = scheme_map[repo_name]
repo_branch = scheme_branch
remote_repo_id = config['repos'][repo_name]['remote']['id']
if remote_repo_id in cross_repos_pr:
cross_repo = True
pr_id = cross_repos_pr[remote_repo_id]
repo_branch = "ci_pr_{0}".format(pr_id)
shell.run(["git", "checkout", scheme_branch],
echo=True)
shell.capture(["git", "branch", "-D", repo_branch],
echo=True, allow_non_zero_exit=True)
shell.run(["git", "fetch", "origin",
"pull/{0}/merge:{1}"
.format(pr_id, repo_branch), "--tags"], echo=True)
return repo_branch, cross_repo
def update_single_repository(args):
config, repo_name, scheme_name, scheme_map, tag, timestamp, \
reset_to_remote, should_clean, cross_repos_pr = args
repo_path = os.path.join(SWIFT_SOURCE_ROOT, repo_name)
if not os.path.isdir(repo_path):
return
try:
print("Updating '" + repo_path + "'")
with shell.pushd(repo_path, dry_run=False, echo=False):
cross_repo = False
checkout_target = None
if tag:
checkout_target = confirm_tag_in_repo(tag, repo_name)
elif scheme_name:
checkout_target, cross_repo = get_branch_for_repo(
config, repo_name, scheme_name, scheme_map, cross_repos_pr)
if timestamp:
checkout_target = find_rev_by_timestamp(timestamp,
repo_name,
checkout_target)
elif timestamp:
checkout_target = find_rev_by_timestamp(timestamp, repo_name,
"HEAD")
# The clean option restores a repository to pristine condition.
if should_clean:
shell.run(['git', 'clean', '-fdx'], echo=True)
shell.run(['git', 'submodule', 'foreach', '--recursive', 'git',
'clean', '-fdx'], echo=True)
shell.run(['git', 'submodule', 'foreach', '--recursive', 'git',
'reset', '--hard', 'HEAD'], echo=True)
shell.run(['git', 'reset', '--hard', 'HEAD'], echo=True)
# It is possible to reset --hard and still be mid-rebase.
try:
shell.run(['git', 'rebase', '--abort'], echo=True)
except Exception:
pass
if checkout_target:
shell.run(['git', 'status', '--porcelain', '-uno'],
echo=False)
shell.run(['git', 'checkout', checkout_target], echo=True)
# It's important that we checkout, fetch, and rebase, in order.
# .git/FETCH_HEAD updates the not-for-merge attributes based on
# which branch was checked out during the fetch.
shell.run(["git", "fetch", "--recurse-submodules=yes", "--tags"],
echo=True)
# If we were asked to reset to the specified branch, do the hard
# reset and return.
if checkout_target and reset_to_remote and not cross_repo:
shell.run(['git', 'reset', '--hard',
"origin/%s" % checkout_target], echo=True)
return
# Query whether we have a "detached HEAD", which will mean that
# we previously checked out a tag rather than a branch.
detached_head = False
try:
# This git command returns error code 1 if HEAD is detached.
# Otherwise there was some other error, and we need to handle
# it like other command errors.
shell.run(["git", "symbolic-ref", "-q", "HEAD"], echo=False)
except Exception as e:
if e.ret == 1:
detached_head = True
else:
raise # Pass this error up the chain.
# If we have a detached HEAD in this repository, we don't want
# to rebase. With a detached HEAD, the fetch will have marked
# all the branches in FETCH_HEAD as not-for-merge, and the
# "git rebase FETCH_HEAD" will try to rebase the tree from the
# default branch's current head, making a mess.
# Prior to Git 2.6, this is the way to do a "git pull
# --rebase" that respects rebase.autostash. See
# http://stackoverflow.com/a/30209750/125349
if not cross_repo and not detached_head:
shell.run(["git", "rebase", "FETCH_HEAD"], echo=True)
elif detached_head:
print(repo_path,
"\nDetached HEAD; probably checked out a tag. No need "
"to rebase.\n")
shell.run(["git", "submodule", "update", "--recursive"], echo=True)
except Exception:
(type, value, tb) = sys.exc_info()
print('Error on repo "%s": %s' % (repo_path, traceback.format_exc()))
return value
def get_timestamp_to_match(args):
if not args.match_timestamp:
return None
with shell.pushd(os.path.join(SWIFT_SOURCE_ROOT, "swift"),
dry_run=False, echo=False):
return shell.capture(["git", "log", "-1", "--format=%cI"],
echo=False).strip()
def update_all_repositories(args, config, scheme_name, cross_repos_pr):
scheme_map = None
if scheme_name:
# This loop is only correct, since we know that each alias set has
# unique contents. This is checked by validate_config. Thus the first
# branch scheme data that has scheme_name as one of its aliases is
# the only possible correct answer.
for v in config['branch-schemes'].values():
if scheme_name in v['aliases']:
scheme_map = v['repos']
break
pool_args = []
timestamp = get_timestamp_to_match(args)
for repo_name in config['repos'].keys():
if repo_name in args.skip_repository_list:
print("Skipping update of '" + repo_name + "', requested by user")
continue
my_args = [config,
repo_name,
scheme_name,
scheme_map,
args.tag,
timestamp,
args.reset_to_remote,
args.clean,
cross_repos_pr]
pool_args.append(my_args)
return shell.run_parallel(update_single_repository, pool_args,
args.n_processes)
def obtain_additional_swift_sources(pool_args):
(args, repo_name, repo_info, repo_branch, remote, with_ssh, scheme_name,
skip_history, skip_repository_list) = pool_args
with shell.pushd(SWIFT_SOURCE_ROOT, dry_run=False, echo=False):
print("Cloning '" + repo_name + "'")
if skip_history:
shell.run(['git', 'clone', '--recursive', '--depth', '1',
'--branch', repo_branch, remote, repo_name],
echo=True)
else:
shell.run(['git', 'clone', '--recursive', remote,
repo_name], echo=True)
if scheme_name:
src_path = os.path.join(SWIFT_SOURCE_ROOT, repo_name, ".git")
shell.run(['git', '--git-dir', src_path, '--work-tree',
os.path.join(SWIFT_SOURCE_ROOT, repo_name),
'checkout', repo_branch], echo=False)
with shell.pushd(os.path.join(SWIFT_SOURCE_ROOT, repo_name),
dry_run=False, echo=False):
shell.run(["git", "submodule", "update", "--recursive"],
echo=False)
def obtain_all_additional_swift_sources(args, config, with_ssh, scheme_name,
skip_history, skip_repository_list):
pool_args = []
with shell.pushd(SWIFT_SOURCE_ROOT, dry_run=False, echo=False):
for repo_name, repo_info in config['repos'].items():
if repo_name in skip_repository_list:
print("Skipping clone of '" + repo_name + "', requested by "
"user")
continue
if os.path.isdir(os.path.join(repo_name, ".git")):
print("Skipping clone of '" + repo_name + "', directory "
"already exists")
continue
# If we have a url override, use that url instead of
# interpolating.
remote_repo_info = repo_info['remote']
if 'url' in remote_repo_info:
remote = remote_repo_info['url']
else:
remote_repo_id = remote_repo_info['id']
if with_ssh is True or 'https-clone-pattern' not in config:
remote = config['ssh-clone-pattern'] % remote_repo_id
else:
remote = config['https-clone-pattern'] % remote_repo_id
repo_branch = None
if scheme_name:
for v in config['branch-schemes'].values():
if scheme_name not in v['aliases']:
continue
repo_branch = v['repos'][repo_name]
break
else:
repo_branch = scheme_name
pool_args.append([args, repo_name, repo_info, repo_branch, remote,
with_ssh, scheme_name, skip_history,
skip_repository_list])
if not pool_args:
print("Not cloning any repositories.")
return
return shell.run_parallel(obtain_additional_swift_sources, pool_args,
args.n_processes)
def dump_repo_hashes(config):
max_len = reduce(lambda acc, x: max(acc, len(x)),
config['repos'].keys(), 0)
fmt = "{:<%r}{}" % (max_len + 5)
for repo_name, repo_info in sorted(config['repos'].items(),
key=lambda x: x[0]):
repo_path = os.path.join(SWIFT_SOURCE_ROOT, repo_name)
if os.path.isdir(repo_path):
with shell.pushd(repo_path, dry_run=False, echo=False):
h = shell.capture(["git", "log", "--oneline", "-n", "1"],
echo=False).strip()
print(fmt.format(repo_name, h))
else:
print(fmt.format(repo_name, "(not checked out)"))
def dump_hashes_config(args, config):
branch_scheme_name = args.dump_hashes_config
new_config = {}
config_copy_keys = ['ssh-clone-pattern', 'https-clone-pattern', 'repos']
for config_copy_key in config_copy_keys:
new_config[config_copy_key] = config[config_copy_key]
repos = {}
branch_scheme = {'aliases': [branch_scheme_name], 'repos': repos}
new_config['branch-schemes'] = {args.dump_hashes_config: branch_scheme}
for repo_name, repo_info in sorted(config['repos'].items(),
key=lambda x: x[0]):
with shell.pushd(os.path.join(SWIFT_SOURCE_ROOT, repo_name),
dry_run=False,
echo=False):
h = shell.capture(["git", "rev-parse", "HEAD"],
echo=False).strip()
repos[repo_name] = str(h)
print(json.dumps(new_config, indent=4))
def validate_config(config):
# Make sure that our branch-names are unique.
scheme_names = config['branch-schemes'].keys()
if len(scheme_names) != len(set(scheme_names)):
raise RuntimeError('Configuration file has duplicate schemes?!')
# Ensure the branch-scheme name is also an alias
# This guarantees sensible behavior of update_repository_to_scheme when
# the branch-scheme is passed as the scheme name
for scheme_name in config['branch-schemes'].keys():
if scheme_name not in config['branch-schemes'][scheme_name]['aliases']:
raise RuntimeError('branch-scheme name: "{0}" must be an alias '
'too.'.format(scheme_name))
# Then make sure the alias names used by our branches are unique.
#
# We do this by constructing a list consisting of len(names),
# set(names). Then we reduce over that list summing the counts and taking
# the union of the sets. We have uniqueness if the length of the union
# equals the length of the sum of the counts.
data = [(len(v['aliases']), set(v['aliases']))
for v in config['branch-schemes'].values()]
result = reduce(lambda acc, x: (acc[0] + x[0], acc[1] | x[1]), data,
(0, set([])))
if result[0] == len(result[1]):
return
raise RuntimeError('Configuration file has schemes with duplicate '
'aliases?!')
def main():
freeze_support()
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description="""
repositories.
By default, updates your checkouts of Swift, SourceKit, LLDB, and SwiftPM.""")
parser.add_argument(
"--clone",
help="Obtain Sources for Swift and Related Projects",
action="store_true")
parser.add_argument(
"--clone-with-ssh",
help="Obtain Sources for Swift and Related Projects via SSH",
action="store_true")
parser.add_argument(
"--skip-history",
help="Skip histories when obtaining sources",
action="store_true")
parser.add_argument(
"--skip-repository",
metavar="DIRECTORY",
default=[],
help="Skip the specified repository",
dest='skip_repository_list',
action="append")
parser.add_argument(
"--scheme",
help='Use branches from the specified branch-scheme. A "branch-scheme"'
' is a list of (repo, branch) pairs.',
metavar='BRANCH-SCHEME',
dest='scheme')
parser.add_argument(
'--reset-to-remote',
help='Reset each branch to the remote state.',
action='store_true')
parser.add_argument(
'--clean',
help='Clean unrelated files from each repository.',
action='store_true')
parser.add_argument(
"--config",
default=os.path.join(SCRIPT_DIR, "update-checkout-config.json"),
help="Configuration file to use")
parser.add_argument(
"--github-comment",
help="""Check out related pull requests referenced in the given
free-form GitHub-style comment.""",
metavar='GITHUB-COMMENT',
dest='github_comment')
parser.add_argument(
'--dump-hashes',
action='store_true',
help='Dump the git hashes of all repositories being tracked')
parser.add_argument(
'--dump-hashes-config',
help='Dump the git hashes of all repositories packaged into '
'update-checkout-config.json',
metavar='BRANCH-SCHEME-NAME')
parser.add_argument(
"--tag",
help="""Check out each repository to the specified tag.""",
metavar='TAG-NAME')
parser.add_argument(
"--match-timestamp",
help='Check out adjacent repositories to match timestamp of '
' current swift checkout.',
action='store_true')
parser.add_argument(
"-j", "--jobs",
type=int,
help="Number of threads to run at once",
default=0,
dest="n_processes")
args = parser.parse_args()
if args.reset_to_remote and not args.scheme:
print("update-checkout usage error: --reset-to-remote must specify "
"--scheme=foo")
sys.exit(1)
clone = args.clone
clone_with_ssh = args.clone_with_ssh
skip_history = args.skip_history
scheme = args.scheme
github_comment = args.github_comment
with open(args.config) as f:
config = json.load(f)
validate_config(config)
if args.dump_hashes:
dump_repo_hashes(config)
return (None, None)
if args.dump_hashes_config:
dump_hashes_config(args, config)
return (None, None)
cross_repos_pr = {}
if github_comment:
regex_pr = r'(apple/[-a-zA-Z0-9_]+/pull/\d+|apple/[-a-zA-Z0-9_]+#\d+)'
repos_with_pr = re.findall(regex_pr, github_comment)
print("Found related pull requests:", str(repos_with_pr))
repos_with_pr = [pr.replace('/pull/', '#') for pr in repos_with_pr]
cross_repos_pr = dict(pr.split('#') for pr in repos_with_pr)
clone_results = None
if clone or clone_with_ssh:
# If branch is None, default to using the default branch alias
# specified by our configuration file.
if scheme is None:
scheme = config['default-branch-scheme']
skip_repo_list = args.skip_repository_list
clone_results = obtain_all_additional_swift_sources(args, config,
clone_with_ssh,
scheme,
skip_history,
skip_repo_list)
# Quick check whether somebody is calling update in an empty directory
directory_contents = os.listdir(SWIFT_SOURCE_ROOT)
if not ('cmark' in directory_contents or
'llvm' in directory_contents or
'clang' in directory_contents):
print("You don't have all swift sources. "
"Call this script with --clone to get them.")
update_results = update_all_repositories(args, config, scheme,
cross_repos_pr)
fail_count = 0
fail_count += shell.check_parallel_results(clone_results, "CLONE")
fail_count += shell.check_parallel_results(update_results, "UPDATE")
if fail_count > 0:
print("update-checkout failed, fix errors and try again")
else:
print("update-checkout succeeded")
sys.exit(fail_count)
if __name__ == "__main__":
main()