blob: e1c2b0bac47fbbde3ab23e6f4a4e7b5c3b196436 [file] [log] [blame]
#!/usr/bin/env python3
# Copyright 2018 The Fuchsia Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Downloads bazel release binaries and creates CIPD configs to be pushed.
TODO(IN-651): This should be done by a recipe and builder.
- Finds the latest bazel release on Github
- Finds the binary assets we care about
- Downloads and validates the binary assets (skips if already downloaded)
- Downloads the LICENSE file from the time of the release
- Creates a cipd.yaml file for each binary
- Prints the 'cipd create' commands to push the binaries to CIPD
Does not actually talk to CIPD.
"""
import collections
import hashlib
import json
import logging
import os
import re
import shutil
import sys
import urllib.request
GITHUB_API = 'https://api.github.com'
GITHUB_API_HEADERS = {'Accept': 'application/vnd.github.v3.full+json'}
_BaseReleaseBinary = collections.namedtuple(
'_BaseReleaseBinary',
(
'name', # Name of the binary. E.g., 'bazel-0.16.1-darwin-x86_64'
'release', # E.g., '0.16.1'
'os', # E.g., 'linux', 'darwin', 'windows'
'arch', # E.g., 'x86_64'
'bin_url', # URL of the binary
'sha256_url', # URL of the SHA-256 hash
'sig_url', # URL of the signature
))
class ReleaseBinary(_BaseReleaseBinary):
"""A remote binary asset associated with a release."""
def __init__(self, **kwargs): # Args are handled by the superclass.
self._sha256 = None
self._package_dir = None
self._files = None
@property
def sha256(self):
"""The SHA-256 checksum of the binary as a hex string.
Raises:
AssertionError: if the binary has not been downloaded and verified.
"""
assert self._sha256
return self._sha256
@property
def package_dir(self):
"""Path to the directory containing the downloaded binary.
Raises:
AssertionError: if the binary has not been downloaded and verified.
"""
assert self._package_dir
return self._package_dir
@property
def files(self):
"""Sequence of paths to the binary and associated files.
All paths will be under package_dir.
Raises:
AssertionError: if the binary has not been downloaded and verified.
"""
assert self._files
return self._files
@property
def cipd_yaml(self):
"""Canonical path to the cipd.yaml file that describes this binary.
File may not exist yet; when created it should live in this location.
Raises:
AssertionError: if the binary has not been downloaded and verified.
"""
return os.path.join(self.package_dir, 'cipd.yaml')
def download_and_verify(self, target_dir, bin_name):
"""Downloads the binary asset and verifies its checksum.
NOTE: Does not currently check that the signature is valid.
Args:
target_dir: The directory to download to.
bin_name: The name to call the binary file. Used as the stem
for the .sha256 and .sig files.
Raises:
Exception if the binary cannot be downoaded or the validation fails.
"""
def download_file(url, filename):
"""Downloads a URL to a local file.
Skips the download if the file already exists.
Args:
url: The URL to download.
filename: The file to download to.
"""
if os.path.exists(filename):
logging.info('Skipping download: %s already exists', filename)
return
logging.info('Downloading %s from %s...', filename, url)
# Download to a temporary file, in case the download is interrupted.
tmp_file = filename + '~'
if os.path.exists(tmp_file):
os.remove(tmp_file)
# TODO(dbort): Back off and retry on 5xx errors.
urllib.request.urlretrieve(url=url, filename=tmp_file)
# Download succeeded; move to the final path.
shutil.move(tmp_file, filename)
logging.info('Downloaded %s', filename)
# Download the binary and its verification assets.
bin_path = os.path.join(target_dir, bin_name)
download_file(url=self.bin_url, filename=bin_path)
# Make it executable.
os.chmod(bin_path, 0o755)
sha256_path = bin_path + '.sha256'
download_file(url=self.sha256_url, filename=sha256_path)
sig_path = bin_path + '.sig'
download_file(url=self.sig_url, filename=sig_path)
# Verify.
logging.info('Calculating SHA-256 of %s...', bin_path)
hasher = hashlib.sha256()
with open(bin_path, 'rb') as f:
for b in iter(lambda: f.read(4096), b''):
hasher.update(b)
actual_sha256 = hasher.hexdigest()
logging.info('SHA-256 of %s: %s', bin_path, actual_sha256)
with open(sha256_path, 'r') as f:
# The .sha256 file should contain a line like
# ^<hex-digest> <filename>$
# We only want the hex digest: delete the first whitespace
# character and everything after it.
expected_sha256 = re.sub(r'\s.*', '', f.read().strip())
if actual_sha256 == expected_sha256:
logging.info('Checksum of %s verified', bin_path)
else:
raise ValueError(
'Actual SHA-256 {actual_sha256} of {bin_path} ({bin_url}) != '
'expected SHA-256 {expected_sha256} '
'from {sig_path} ({sig_url})'.format(
actual_sha256=actual_sha256,
bin_path=bin_path,
bin_url=self.bin_url,
expected_sha256=expected_sha256,
sig_path=sig_path,
sig_url=self.sig_url,
))
# Success. Expose the local information about the binary and associated
# files.
self._package_dir = target_dir
self._sha256 = actual_sha256
self._files = (bin_path, sha256_path, sig_path)
# TODO(dbort): Validate the signature using the key at
# https://bazel.build/bazel-release.pub.gpg. This requires a
# non-standard library like https://pythonhosted.org/python-gnupg/,
# which isn't installed by default on many Debian systems.
def extract_release_binaries(stem, release_json):
"""Returns a sequence of ReleaseBinary objects based on the JSON.
Expects that binary assets match the pattern
<stem>-<release>-<os>-<architecture>[.exe]
and have associated checksum and signature assets that match the same
pattern with '.sha256' and '.sig' extensions.
Args:
stem: The bare name of the binary; e.g., 'bazel'. Assets should have
names that begin with this stem.
release_json: The parsed Github API JSON describing the release
and its assets.
Returns:
A sequence of ReleaseBinary objects.
"""
# Index asset entries by name.
name_to_asset = {}
for asset in release_json['assets']:
name_to_asset[asset['name']] = asset
release = release_json['name'] # E.g., '0.16.1'
# Find the binaries and their related checksums/signatures.
binaries = []
arch = 'x86_64'
for os_type in ('linux', 'darwin'):
bin_name = '{stem}-{release}-{os}-{arch}'.format(
stem=stem,
release=release,
os=os_type,
arch=arch,
)
sha256_name = bin_name + '.sha256'
sig_name = bin_name + '.sig'
entry = ReleaseBinary(
name=bin_name,
release=release,
os=os_type,
arch=arch,
# Intentionally raise KeyError if any of these are missing.
bin_url=name_to_asset[bin_name]['browser_download_url'],
sha256_url=name_to_asset[sha256_name]['browser_download_url'],
sig_url=name_to_asset[sig_name]['browser_download_url'],
)
binaries.append(entry)
return tuple(sorted(binaries, key=lambda b: b.name))
def get_latest_release_json(repo):
"""Returns JSON text describing the latest release of a Github repo.
See https://developer.github.com/v3/repos/releases/#get-the-latest-release
for example JSON data.
Args:
repo: The target Github repo, as an 'owner/repo' string.
Returns:
String containing JSON describing the latest release.
Raises:
urllib2.HTTPError: Error reading from the Github API
"""
url = '{api}/repos/{repo}/releases/latest'.format(
api=GITHUB_API, repo=repo)
req = urllib.request.Request(url=url, headers=GITHUB_API_HEADERS)
logging.info('Fetching latest release JSON from %s...', url)
# TODO(dbort): Back off and retry on 5xx errors.
response = urllib.request.urlopen(req) # Throws urllib2.HTTPError
body = response.read()
logging.info('Release JSON fetched')
return body.decode('utf-8')
def download_binaries(work_dir, bin_name, binaries):
"""Downloads binaries to local directories.
Args:
work_dir: Path to a directory under which the dirs should live.
bin_name: The name to call the binary files.
binaries: Sequence of ReleaseBinary objects to download.
Returns:
None. See ReleaseBinary.package_dir for the download location of each
binary.
Raises:
Exceptions if there was a problem downloading or verifying a binary.
"""
for binary in binaries:
d = os.path.join(work_dir, bin_name, binary.release, binary.os,
binary.arch)
if not os.path.exists(d):
os.makedirs(d)
binary.download_and_verify(target_dir=d, bin_name=bin_name)
def download_license(repo, ref, out_file):
"""Downloads the LICENSE file at the specified ref.
Args:
repo: The target Github repo, as an 'owner/repo' string.
ref: The git ref of the file to download; typically a version string like
'0.16.1'.
out_file: The file object to write the LICENSE text to.
Raises:
urllib2.HTTPError: if there was a problem downloading the file.
"""
url = 'https://raw.githubusercontent.com/{repo}/{ref}/LICENSE'.format(
repo=repo,
ref=ref,
)
req = urllib.request.Request(url=url)
logging.info('Downloading %s...', url)
# TODO(dbort): Back off and retry on 5xx errors.
response = urllib.request.urlopen(req) # Throws urllib2.HTTPError
logging.info('Downloaded %s', url)
out_file.write(response.read().decode('utf-8'))
def create_local_cipd_configs(binaries, package_prefix, common_files):
"""Creates a local CIPD config for each binary.
TODO(dbort): Add a mapping of filenames and strings to include
in the package. README, license, relese_json.
Args:
binaries: Sequence of ReleaseBinary objects to create configs for.
package_prefix: Prefix of the CIPD package to use for each binary.
The final component will be based on the OS and architecture.
common_files: Sequence of paths to files that every CIPD config should
include.
Returns:
None. See ReleaseBinary.cipd_yaml for the location of the cipd.yaml
file for each binary.
"""
for binary in binaries:
# Copy the common files into the package directory.
extra_files = []
for src in common_files:
dst = os.path.join(binary.package_dir, os.path.basename(src))
shutil.copyfile(src, dst)
extra_files.append(dst)
# Convert to values conventionally used in CIPD package paths.
cipd_os = {'darwin': 'mac', 'linux': 'linux'}[binary.os]
cipd_arch = {'x86_64': 'amd64'}[binary.arch]
# The YAML file will refer to files relative to its location,
# which should be in package_dir.
assert os.path.dirname(binary.cipd_yaml) == binary.package_dir
yaml_lines = [
'# This file and package automatically created by',
'# fuchsia/infra/infra/tools/update-bazel-cipd.py',
'package: {}/{}-{}'.format(package_prefix, cipd_os, cipd_arch),
'description: ' + binary.name,
'data:',
' - file: ' + os.path.basename(binary.cipd_yaml),
]
for f in sorted(list(binary.files) + extra_files):
yaml_lines.append(' - file: ' + os.path.relpath(
f, binary.package_dir))
# Write the YAML file.
with open(binary.cipd_yaml, 'w') as f:
f.write('\n'.join(yaml_lines))
f.write('\n')
logging.info('Created CIPD config %s', binary.cipd_yaml)
def main(unused_argv):
github_repo = 'bazelbuild/bazel'
bin_name = 'bazel'
package_prefix = 'fuchsia/third_party/' + bin_name
# Create a root directory to work under.
# It's helpful to use the same temp directory on multiple invocations, in
# case a download fails. Include a string based on the username so that
# multiple users on the same machine don't collide. Don't use the username
# directly to avoid accidental leaks.
work_dir = ('/tmp/update-bazel-cipd-' + hashlib.sha256(
os.environ['USER'].encode('ascii')).hexdigest()[:8])
# Ask github about the latest release.
raw_release_json = get_latest_release_json(github_repo)
release_json = json.loads(raw_release_json)
release_name = release_json['name']
# Extract info about the binaries we care about.
binaries = extract_release_binaries(
stem=bin_name, release_json=release_json)
# TODO(dbort): Remove any entries from |binaries| whose assets already live
# in CIPD, and exit early if there's no work to do.
# Save the release JSON to a file so the CIPD packages can include a
# snapshot.
release_root = os.path.join(work_dir, bin_name, release_name)
if not os.path.exists(release_root):
os.makedirs(release_root)
json_path = os.path.join(release_root, 'release.json')
with open(json_path, 'w') as f:
f.write(raw_release_json)
# Download the LICENSE file at the release tag ref.
license_path = os.path.join(release_root, 'LICENSE')
with open(license_path, 'w') as f:
download_license(
repo=github_repo, ref=release_json['tag_name'], out_file=f)
# Download the binaries.
download_binaries(work_dir=work_dir, bin_name=bin_name, binaries=binaries)
# Create CIPD configs.
logging.info('Creating CIPD configs...')
create_local_cipd_configs(
binaries=binaries,
package_prefix=package_prefix,
common_files=(json_path, license_path))
logging.info('Created CIPD configs')
# TODO(dbort): Run these commands instead of just printing them.
print('Create the packages by running the commands:')
print(' cipd auth-login # One-time auth.')
for binary in binaries:
print(' cipd create --pkg-def "{pkg_def}" '
'-tag "version:{version}" -ref latest'.format(
pkg_def=binary.cipd_yaml,
version=release_name))
if __name__ == '__main__':
logging.basicConfig(level=logging.INFO)
main(sys.argv)