| #!/usr/bin/env python3 |
| # Copyright 2018 The Fuchsia Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| """Downloads bazel release binaries and creates CIPD configs to be pushed. |
| |
| TODO(IN-651): This should be done by a recipe and builder. |
| |
| - Finds the latest bazel release on Github |
| - Finds the binary assets we care about |
| - Downloads and validates the binary assets (skips if already downloaded) |
| - Downloads the LICENSE file from the time of the release |
| - Creates a cipd.yaml file for each binary |
| - Prints the 'cipd create' commands to push the binaries to CIPD |
| |
| Does not actually talk to CIPD. |
| """ |
| |
| import collections |
| import hashlib |
| import json |
| import logging |
| import os |
| import re |
| import shutil |
| import sys |
| import urllib.request |
| |
| GITHUB_API = 'https://api.github.com' |
| GITHUB_API_HEADERS = {'Accept': 'application/vnd.github.v3.full+json'} |
| |
| _BaseReleaseBinary = collections.namedtuple( |
| '_BaseReleaseBinary', |
| ( |
| 'name', # Name of the binary. E.g., 'bazel-0.16.1-darwin-x86_64' |
| 'release', # E.g., '0.16.1' |
| 'os', # E.g., 'linux', 'darwin', 'windows' |
| 'arch', # E.g., 'x86_64' |
| 'bin_url', # URL of the binary |
| 'sha256_url', # URL of the SHA-256 hash |
| 'sig_url', # URL of the signature |
| )) |
| |
| |
| class ReleaseBinary(_BaseReleaseBinary): |
| """A remote binary asset associated with a release.""" |
| |
| def __init__(self, **kwargs): # Args are handled by the superclass. |
| self._sha256 = None |
| self._package_dir = None |
| self._files = None |
| |
| @property |
| def sha256(self): |
| """The SHA-256 checksum of the binary as a hex string. |
| |
| Raises: |
| AssertionError: if the binary has not been downloaded and verified. |
| """ |
| assert self._sha256 |
| return self._sha256 |
| |
| @property |
| def package_dir(self): |
| """Path to the directory containing the downloaded binary. |
| |
| Raises: |
| AssertionError: if the binary has not been downloaded and verified. |
| """ |
| assert self._package_dir |
| return self._package_dir |
| |
| @property |
| def files(self): |
| """Sequence of paths to the binary and associated files. |
| |
| All paths will be under package_dir. |
| |
| Raises: |
| AssertionError: if the binary has not been downloaded and verified. |
| """ |
| assert self._files |
| return self._files |
| |
| @property |
| def cipd_yaml(self): |
| """Canonical path to the cipd.yaml file that describes this binary. |
| |
| File may not exist yet; when created it should live in this location. |
| |
| Raises: |
| AssertionError: if the binary has not been downloaded and verified. |
| """ |
| return os.path.join(self.package_dir, 'cipd.yaml') |
| |
| def download_and_verify(self, target_dir, bin_name): |
| """Downloads the binary asset and verifies its checksum. |
| |
| NOTE: Does not currently check that the signature is valid. |
| |
| Args: |
| target_dir: The directory to download to. |
| bin_name: The name to call the binary file. Used as the stem |
| for the .sha256 and .sig files. |
| Raises: |
| Exception if the binary cannot be downoaded or the validation fails. |
| """ |
| |
| def download_file(url, filename): |
| """Downloads a URL to a local file. |
| |
| Skips the download if the file already exists. |
| |
| Args: |
| url: The URL to download. |
| filename: The file to download to. |
| """ |
| if os.path.exists(filename): |
| logging.info('Skipping download: %s already exists', filename) |
| return |
| |
| logging.info('Downloading %s from %s...', filename, url) |
| # Download to a temporary file, in case the download is interrupted. |
| tmp_file = filename + '~' |
| if os.path.exists(tmp_file): |
| os.remove(tmp_file) |
| |
| # TODO(dbort): Back off and retry on 5xx errors. |
| urllib.request.urlretrieve(url=url, filename=tmp_file) |
| |
| # Download succeeded; move to the final path. |
| shutil.move(tmp_file, filename) |
| logging.info('Downloaded %s', filename) |
| |
| # Download the binary and its verification assets. |
| bin_path = os.path.join(target_dir, bin_name) |
| download_file(url=self.bin_url, filename=bin_path) |
| # Make it executable. |
| os.chmod(bin_path, 0o755) |
| |
| sha256_path = bin_path + '.sha256' |
| download_file(url=self.sha256_url, filename=sha256_path) |
| |
| sig_path = bin_path + '.sig' |
| download_file(url=self.sig_url, filename=sig_path) |
| |
| # Verify. |
| logging.info('Calculating SHA-256 of %s...', bin_path) |
| hasher = hashlib.sha256() |
| with open(bin_path, 'rb') as f: |
| for b in iter(lambda: f.read(4096), b''): |
| hasher.update(b) |
| actual_sha256 = hasher.hexdigest() |
| logging.info('SHA-256 of %s: %s', bin_path, actual_sha256) |
| |
| with open(sha256_path, 'r') as f: |
| # The .sha256 file should contain a line like |
| # ^<hex-digest> <filename>$ |
| # We only want the hex digest: delete the first whitespace |
| # character and everything after it. |
| expected_sha256 = re.sub(r'\s.*', '', f.read().strip()) |
| |
| if actual_sha256 == expected_sha256: |
| logging.info('Checksum of %s verified', bin_path) |
| else: |
| raise ValueError( |
| 'Actual SHA-256 {actual_sha256} of {bin_path} ({bin_url}) != ' |
| 'expected SHA-256 {expected_sha256} ' |
| 'from {sig_path} ({sig_url})'.format( |
| actual_sha256=actual_sha256, |
| bin_path=bin_path, |
| bin_url=self.bin_url, |
| expected_sha256=expected_sha256, |
| sig_path=sig_path, |
| sig_url=self.sig_url, |
| )) |
| |
| # Success. Expose the local information about the binary and associated |
| # files. |
| self._package_dir = target_dir |
| self._sha256 = actual_sha256 |
| self._files = (bin_path, sha256_path, sig_path) |
| |
| # TODO(dbort): Validate the signature using the key at |
| # https://bazel.build/bazel-release.pub.gpg. This requires a |
| # non-standard library like https://pythonhosted.org/python-gnupg/, |
| # which isn't installed by default on many Debian systems. |
| |
| |
| def extract_release_binaries(stem, release_json): |
| """Returns a sequence of ReleaseBinary objects based on the JSON. |
| |
| Expects that binary assets match the pattern |
| |
| <stem>-<release>-<os>-<architecture>[.exe] |
| |
| and have associated checksum and signature assets that match the same |
| pattern with '.sha256' and '.sig' extensions. |
| |
| Args: |
| stem: The bare name of the binary; e.g., 'bazel'. Assets should have |
| names that begin with this stem. |
| release_json: The parsed Github API JSON describing the release |
| and its assets. |
| Returns: |
| A sequence of ReleaseBinary objects. |
| """ |
| # Index asset entries by name. |
| name_to_asset = {} |
| for asset in release_json['assets']: |
| name_to_asset[asset['name']] = asset |
| |
| release = release_json['name'] # E.g., '0.16.1' |
| |
| # Find the binaries and their related checksums/signatures. |
| binaries = [] |
| arch = 'x86_64' |
| for os_type in ('linux', 'darwin'): |
| bin_name = '{stem}-{release}-{os}-{arch}'.format( |
| stem=stem, |
| release=release, |
| os=os_type, |
| arch=arch, |
| ) |
| sha256_name = bin_name + '.sha256' |
| sig_name = bin_name + '.sig' |
| |
| entry = ReleaseBinary( |
| name=bin_name, |
| release=release, |
| os=os_type, |
| arch=arch, |
| # Intentionally raise KeyError if any of these are missing. |
| bin_url=name_to_asset[bin_name]['browser_download_url'], |
| sha256_url=name_to_asset[sha256_name]['browser_download_url'], |
| sig_url=name_to_asset[sig_name]['browser_download_url'], |
| ) |
| binaries.append(entry) |
| return tuple(sorted(binaries, key=lambda b: b.name)) |
| |
| |
| def get_latest_release_json(repo): |
| """Returns JSON text describing the latest release of a Github repo. |
| |
| See https://developer.github.com/v3/repos/releases/#get-the-latest-release |
| for example JSON data. |
| |
| Args: |
| repo: The target Github repo, as an 'owner/repo' string. |
| Returns: |
| String containing JSON describing the latest release. |
| Raises: |
| urllib2.HTTPError: Error reading from the Github API |
| """ |
| url = '{api}/repos/{repo}/releases/latest'.format( |
| api=GITHUB_API, repo=repo) |
| req = urllib.request.Request(url=url, headers=GITHUB_API_HEADERS) |
| logging.info('Fetching latest release JSON from %s...', url) |
| # TODO(dbort): Back off and retry on 5xx errors. |
| response = urllib.request.urlopen(req) # Throws urllib2.HTTPError |
| body = response.read() |
| logging.info('Release JSON fetched') |
| return body.decode('utf-8') |
| |
| |
| def download_binaries(work_dir, bin_name, binaries): |
| """Downloads binaries to local directories. |
| |
| Args: |
| work_dir: Path to a directory under which the dirs should live. |
| bin_name: The name to call the binary files. |
| binaries: Sequence of ReleaseBinary objects to download. |
| Returns: |
| None. See ReleaseBinary.package_dir for the download location of each |
| binary. |
| Raises: |
| Exceptions if there was a problem downloading or verifying a binary. |
| """ |
| for binary in binaries: |
| d = os.path.join(work_dir, bin_name, binary.release, binary.os, |
| binary.arch) |
| if not os.path.exists(d): |
| os.makedirs(d) |
| binary.download_and_verify(target_dir=d, bin_name=bin_name) |
| |
| |
| def download_license(repo, ref, out_file): |
| """Downloads the LICENSE file at the specified ref. |
| |
| Args: |
| repo: The target Github repo, as an 'owner/repo' string. |
| ref: The git ref of the file to download; typically a version string like |
| '0.16.1'. |
| out_file: The file object to write the LICENSE text to. |
| Raises: |
| urllib2.HTTPError: if there was a problem downloading the file. |
| """ |
| url = 'https://raw.githubusercontent.com/{repo}/{ref}/LICENSE'.format( |
| repo=repo, |
| ref=ref, |
| ) |
| req = urllib.request.Request(url=url) |
| logging.info('Downloading %s...', url) |
| # TODO(dbort): Back off and retry on 5xx errors. |
| response = urllib.request.urlopen(req) # Throws urllib2.HTTPError |
| logging.info('Downloaded %s', url) |
| out_file.write(response.read().decode('utf-8')) |
| |
| |
| def create_local_cipd_configs(binaries, package_prefix, common_files): |
| """Creates a local CIPD config for each binary. |
| |
| TODO(dbort): Add a mapping of filenames and strings to include |
| in the package. README, license, relese_json. |
| |
| Args: |
| binaries: Sequence of ReleaseBinary objects to create configs for. |
| package_prefix: Prefix of the CIPD package to use for each binary. |
| The final component will be based on the OS and architecture. |
| common_files: Sequence of paths to files that every CIPD config should |
| include. |
| Returns: |
| None. See ReleaseBinary.cipd_yaml for the location of the cipd.yaml |
| file for each binary. |
| """ |
| for binary in binaries: |
| # Copy the common files into the package directory. |
| extra_files = [] |
| for src in common_files: |
| dst = os.path.join(binary.package_dir, os.path.basename(src)) |
| shutil.copyfile(src, dst) |
| extra_files.append(dst) |
| |
| # Convert to values conventionally used in CIPD package paths. |
| cipd_os = {'darwin': 'mac', 'linux': 'linux'}[binary.os] |
| cipd_arch = {'x86_64': 'amd64'}[binary.arch] |
| |
| # The YAML file will refer to files relative to its location, |
| # which should be in package_dir. |
| assert os.path.dirname(binary.cipd_yaml) == binary.package_dir |
| yaml_lines = [ |
| '# This file and package automatically created by', |
| '# fuchsia/infra/infra/tools/update-bazel-cipd.py', |
| 'package: {}/{}-{}'.format(package_prefix, cipd_os, cipd_arch), |
| 'description: ' + binary.name, |
| 'data:', |
| ' - file: ' + os.path.basename(binary.cipd_yaml), |
| ] |
| |
| for f in sorted(list(binary.files) + extra_files): |
| yaml_lines.append(' - file: ' + os.path.relpath( |
| f, binary.package_dir)) |
| |
| # Write the YAML file. |
| with open(binary.cipd_yaml, 'w') as f: |
| f.write('\n'.join(yaml_lines)) |
| f.write('\n') |
| logging.info('Created CIPD config %s', binary.cipd_yaml) |
| |
| |
| def main(unused_argv): |
| github_repo = 'bazelbuild/bazel' |
| bin_name = 'bazel' |
| package_prefix = 'fuchsia/third_party/' + bin_name |
| |
| # Create a root directory to work under. |
| # It's helpful to use the same temp directory on multiple invocations, in |
| # case a download fails. Include a string based on the username so that |
| # multiple users on the same machine don't collide. Don't use the username |
| # directly to avoid accidental leaks. |
| work_dir = ('/tmp/update-bazel-cipd-' + hashlib.sha256( |
| os.environ['USER'].encode('ascii')).hexdigest()[:8]) |
| |
| # Ask github about the latest release. |
| raw_release_json = get_latest_release_json(github_repo) |
| release_json = json.loads(raw_release_json) |
| release_name = release_json['name'] |
| |
| # Extract info about the binaries we care about. |
| binaries = extract_release_binaries( |
| stem=bin_name, release_json=release_json) |
| |
| # TODO(dbort): Remove any entries from |binaries| whose assets already live |
| # in CIPD, and exit early if there's no work to do. |
| |
| # Save the release JSON to a file so the CIPD packages can include a |
| # snapshot. |
| release_root = os.path.join(work_dir, bin_name, release_name) |
| if not os.path.exists(release_root): |
| os.makedirs(release_root) |
| json_path = os.path.join(release_root, 'release.json') |
| with open(json_path, 'w') as f: |
| f.write(raw_release_json) |
| |
| # Download the LICENSE file at the release tag ref. |
| license_path = os.path.join(release_root, 'LICENSE') |
| with open(license_path, 'w') as f: |
| download_license( |
| repo=github_repo, ref=release_json['tag_name'], out_file=f) |
| |
| # Download the binaries. |
| download_binaries(work_dir=work_dir, bin_name=bin_name, binaries=binaries) |
| |
| # Create CIPD configs. |
| logging.info('Creating CIPD configs...') |
| create_local_cipd_configs( |
| binaries=binaries, |
| package_prefix=package_prefix, |
| common_files=(json_path, license_path)) |
| logging.info('Created CIPD configs') |
| |
| # TODO(dbort): Run these commands instead of just printing them. |
| print('Create the packages by running the commands:') |
| print(' cipd auth-login # One-time auth.') |
| for binary in binaries: |
| print(' cipd create --pkg-def "{pkg_def}" ' |
| '-tag "version:{version}" -ref latest'.format( |
| pkg_def=binary.cipd_yaml, |
| version=release_name)) |
| |
| |
| if __name__ == '__main__': |
| logging.basicConfig(level=logging.INFO) |
| main(sys.argv) |