| #!/usr/bin/env python |
| # -*- coding: utf-8 -*- |
| |
| # ===--- Benchmark_Driver ------------------------------------------------===// |
| # |
| # This source file is part of the Swift.org open source project |
| # |
| # Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors |
| # Licensed under Apache License v2.0 with Runtime Library Exception |
| # |
| # See https://swift.org/LICENSE.txt for license information |
| # See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors |
| # |
| # ===---------------------------------------------------------------------===// |
| """ |
| Benchmark_Driver is a tool for running and analysing Swift Benchmarking Suite. |
| |
| Example: |
| $ Benchmark_Driver run |
| |
| Use `Benchmark_Driver -h` for help on available commands and options. |
| |
| class `BenchmarkDriver` runs performance tests and impements the `run` COMMAND. |
| class `BenchmarkDoctor` analyzes performance tests, implements `check` COMMAND. |
| |
| """ |
| |
| import argparse |
| import glob |
| import logging |
| import math |
| import os |
| import re |
| import subprocess |
| import sys |
| import time |
| |
| from compare_perf_tests import LogParser |
| |
| DRIVER_DIR = os.path.dirname(os.path.realpath(__file__)) |
| |
| |
| class BenchmarkDriver(object): |
| """Executes tests from Swift Benchmark Suite. |
| |
| It's a higher level wrapper for the Benchmark_X family of binaries |
| (X = [O, Onone, Osize]). |
| """ |
| |
| def __init__(self, args, tests=None, _subprocess=None, parser=None): |
| """Initialize with command line arguments. |
| |
| Optional parameters are for injecting dependencies -- used for testing. |
| """ |
| self.args = args |
| self._subprocess = _subprocess or subprocess |
| self.all_tests = [] |
| self.test_number = {} |
| self.tests = tests or self._get_tests() |
| self.parser = parser or LogParser() |
| self.results = {} |
| # Set a constant hash seed. Some tests are currently sensitive to |
| # fluctuations in the number of hash collisions. |
| os.environ['SWIFT_DETERMINISTIC_HASHING'] = '1' |
| |
| def _invoke(self, cmd): |
| return self._subprocess.check_output( |
| cmd, stderr=self._subprocess.STDOUT) |
| |
| @property |
| def test_harness(self): |
| """Full path to test harness binary.""" |
| suffix = (self.args.optimization if hasattr(self.args, 'optimization') |
| else 'O') |
| return os.path.join(self.args.tests, "Benchmark_" + suffix) |
| |
| def _git(self, cmd): |
| """Execute the Git command in the `swift-repo`.""" |
| return self._invoke( |
| ('git -C {0} '.format(self.args.swift_repo) + cmd).split()).strip() |
| |
| @property |
| def log_file(self): |
| """Full path to log file. |
| |
| If `swift-repo` is set, log file is tied to Git branch and revision. |
| """ |
| if not self.args.output_dir: |
| return None |
| log_dir = self.args.output_dir |
| harness_name = os.path.basename(self.test_harness) |
| suffix = '-' + time.strftime('%Y%m%d%H%M%S', time.localtime()) |
| if self.args.swift_repo: |
| log_dir = os.path.join( |
| log_dir, self._git('rev-parse --abbrev-ref HEAD')) # branch |
| suffix += '-' + self._git('rev-parse --short HEAD') # revision |
| return os.path.join(log_dir, harness_name + suffix + '.log') |
| |
| @property |
| def _cmd_list_benchmarks(self): |
| # Use tab delimiter for easier parsing to override the default comma. |
| # (The third 'column' is always comma-separated list of tags in square |
| # brackets -- currently unused here.) |
| return [self.test_harness, '--list', '--delim=\t'] + ( |
| ['--skip-tags='] if (self.args.benchmarks or |
| self.args.filters) else []) |
| |
| def _get_tests(self): |
| """Return a list of performance tests to run.""" |
| number_name_pairs = [ |
| line.split('\t')[:2] for line in |
| self._invoke(self._cmd_list_benchmarks).split('\n')[1:-1] |
| ] |
| # unzip list of pairs into 2 lists |
| test_numbers, self.all_tests = map(list, zip(*number_name_pairs)) |
| self.test_number = dict(zip(self.all_tests, test_numbers)) |
| if self.args.filters: |
| return self._tests_matching_patterns() |
| if self.args.benchmarks: |
| return self._tests_by_name_or_number(test_numbers) |
| return self.all_tests |
| |
| def _tests_matching_patterns(self): |
| regexes = [re.compile(pattern) for pattern in self.args.filters] |
| return sorted(list(set([name for pattern in regexes |
| for name in self.all_tests |
| if pattern.match(name)]))) |
| |
| def _tests_by_name_or_number(self, test_numbers): |
| benchmarks = set(self.args.benchmarks) |
| number_to_name = dict(zip(test_numbers, self.all_tests)) |
| tests_by_number = [number_to_name[i] |
| for i in benchmarks.intersection(set(test_numbers))] |
| return sorted(list(benchmarks |
| .intersection(set(self.all_tests)) |
| .union(tests_by_number))) |
| |
| def run(self, test=None, num_samples=None, num_iters=None, |
| sample_time=None, verbose=None, measure_memory=False, |
| quantile=None): |
| """Execute benchmark and gather results.""" |
| num_samples = num_samples or 0 |
| num_iters = num_iters or 0 # automatically determine N to run for 1s |
| sample_time = sample_time or 0 # default is 1s |
| |
| cmd = self._cmd_run( |
| test, num_samples, num_iters, sample_time, |
| verbose, measure_memory, quantile) |
| output = self._invoke(cmd) |
| results = self.parser.results_from_string(output) |
| return results.items()[0][1] if test else results |
| |
| def _cmd_run(self, test, num_samples, num_iters, sample_time, |
| verbose, measure_memory, quantile): |
| cmd = [self.test_harness] |
| if test: |
| cmd.append(test) |
| else: |
| cmd.extend([self.test_number.get(name, name) |
| for name in self.tests]) |
| if num_samples > 0: |
| cmd.append('--num-samples={0}'.format(num_samples)) |
| if num_iters > 0: |
| cmd.append('--num-iters={0}'.format(num_iters)) |
| if sample_time > 0: |
| cmd.append('--sample-time={0}'.format(sample_time)) |
| if verbose: |
| cmd.append('--verbose') |
| if measure_memory: |
| cmd.append('--memory') |
| if quantile: |
| cmd.append('--quantile={0}'.format(quantile)) |
| cmd.append('--delta') |
| return cmd |
| |
| def run_independent_samples(self, test): |
| """Run benchmark multiple times, gathering independent samples. |
| |
| Returns the aggregated result of independent benchmark invocations. |
| """ |
| def merge_results(a, b): |
| a.merge(b) |
| return a |
| |
| return reduce(merge_results, |
| [self.run(test, measure_memory=True, |
| num_iters=1, quantile=20) |
| for _ in range(self.args.independent_samples)]) |
| |
| def log_results(self, output, log_file=None): |
| """Log output to `log_file`. |
| |
| Creates `args.output_dir` if it doesn't exist yet. |
| """ |
| log_file = log_file or self.log_file |
| dir = os.path.dirname(log_file) |
| if not os.path.exists(dir): |
| os.makedirs(dir) |
| print('Logging results to: %s' % log_file) |
| with open(log_file, 'w') as f: |
| f.write(output) |
| |
| RESULT = '{:>3} {:<40} {:>7} {:>7} {:>6} {:>10} {:>6} {:>7} {:>10}' |
| |
| def run_and_log(self, csv_console=True): |
| """Run benchmarks and continuously log results to the console. |
| |
| There are two console log formats: CSV and justified columns. Both are |
| compatible with `LogParser`. Depending on the `csv_console` parameter, |
| the CSV log format is either printed to console or returned as a string |
| from this method. When `csv_console` is False, the console output |
| format is justified columns. |
| """ |
| format = ( |
| (lambda values: ','.join(values)) if csv_console else |
| (lambda values: self.RESULT.format(*values))) # justified columns |
| |
| def console_log(values): |
| print(format(values)) |
| |
| def result_values(r): |
| return map(str, [r.test_num, r.name, r.num_samples, r.min, |
| r.samples.q1, r.median, r.samples.q3, r.max, |
| r.max_rss]) |
| |
| header = ['#', 'TEST', 'SAMPLES', 'MIN(μs)', 'Q1(μs)', 'MEDIAN(μs)', |
| 'Q3(μs)', 'MAX(μs)', 'MAX_RSS(B)'] |
| console_log(header) |
| results = [header] |
| for test in self.tests: |
| result = result_values(self.run_independent_samples(test)) |
| console_log(result) |
| results.append(result) |
| |
| print( |
| '\nTotal performance tests executed: {0}'.format(len(self.tests))) |
| return (None if csv_console else |
| ('\n'.join([','.join(r) for r in results]) + '\n')) # csv_log |
| |
| @staticmethod |
| def run_benchmarks(args): |
| """Run benchmarks and log results.""" |
| driver = BenchmarkDriver(args) |
| csv_log = driver.run_and_log(csv_console=(args.output_dir is None)) |
| if csv_log: |
| driver.log_results(csv_log) |
| return 0 |
| |
| |
| class LoggingReportFormatter(logging.Formatter): |
| """Format logs as plain text or with colors on the terminal. |
| |
| Plain text outputs level, category and massage: 'DEBUG category: Hi!' |
| Colored output uses color coding based on the level. |
| """ |
| |
| import logging as log |
| colors = {log.DEBUG: '9', log.INFO: '2', log.WARNING: '3', log.ERROR: '1', |
| log.CRITICAL: '5'} |
| |
| def __init__(self, use_color=False): |
| """Specify if report should use colors; defaults to False.""" |
| super(LoggingReportFormatter, self).__init__('%(message)s') |
| self.use_color = use_color |
| |
| def format(self, record): |
| """Format the log record with level and category.""" |
| msg = super(LoggingReportFormatter, self).format(record) |
| category = ((record.name.split('.')[-1] + ': ') if '.' in record.name |
| else '') |
| return ('\033[1;3{0}m{1}{2}\033[1;0m'.format( |
| self.colors[record.levelno], category, msg) if self.use_color else |
| '{0} {1}{2}'.format(record.levelname, category, msg)) |
| |
| |
| class MarkdownReportHandler(logging.StreamHandler): |
| r"""Write custom formatted messages from BenchmarkDoctor to the stream. |
| |
| It works around StreamHandler's hardcoded '\n' and handles the custom |
| level and category formatting for BenchmarkDoctor's check report. |
| """ |
| |
| def __init__(self, stream): |
| """Initialize the handler and write a Markdown table header.""" |
| super(MarkdownReportHandler, self).__init__(stream) |
| self.setLevel(logging.INFO) |
| self.stream.write('\n✅ | Benchmark Check Report\n---|---') |
| self.stream.flush() |
| |
| levels = {logging.WARNING: '\n⚠️', logging.ERROR: '\n⛔️', |
| logging.INFO: ' <br><sub> '} |
| categories = {'naming': '🔤', 'runtime': '⏱', 'memory': 'Ⓜ️'} |
| quotes_re = re.compile("'") |
| |
| def format(self, record): |
| msg = super(MarkdownReportHandler, self).format(record) |
| return (self.levels.get(record.levelno, '') + |
| ('' if record.levelno == logging.INFO else |
| self.categories.get(record.name.split('.')[-1], '') + ' | ') + |
| self.quotes_re.sub('`', msg)) |
| |
| def emit(self, record): |
| msg = self.format(record) |
| stream = self.stream |
| try: |
| if (isinstance(msg, unicode) and |
| getattr(stream, 'encoding', None)): |
| stream.write(msg.encode(stream.encoding)) |
| else: |
| stream.write(msg) |
| except UnicodeError: |
| stream.write(msg.encode("UTF-8")) |
| self.flush() |
| |
| def close(self): |
| self.stream.write('\n\n') |
| self.stream.flush() |
| super(MarkdownReportHandler, self).close() |
| |
| |
| class BenchmarkDoctor(object): |
| """Checks that the benchmark conforms to the standard set of requirements. |
| |
| Benchmarks that are part of Swift Benchmark Suite are required to follow |
| a set of rules that ensure quality measurements. These include naming |
| convention, robustness when varying execution parameters like |
| `num-iters` and `num-samples` (no setup overhead, constant memory |
| consumption). |
| """ |
| |
| log = logging.getLogger('BenchmarkDoctor') |
| log_naming = log.getChild('naming') |
| log_runtime = log.getChild('runtime') |
| log_memory = log.getChild('memory') |
| log.setLevel(logging.DEBUG) |
| |
| def __init__(self, args, driver=None): |
| """Initialize with command line parameters. |
| |
| Optional `driver` parameter for injecting dependency; used for testing. |
| """ |
| super(BenchmarkDoctor, self).__init__() |
| self.driver = driver or BenchmarkDriver(args) |
| self.results = {} |
| |
| if hasattr(args, 'markdown') and args.markdown: |
| self.console_handler = MarkdownReportHandler(sys.stdout) |
| else: |
| self.console_handler = logging.StreamHandler(sys.stdout) |
| self.console_handler.setFormatter( |
| LoggingReportFormatter(use_color=sys.stdout.isatty())) |
| self.console_handler.setLevel(logging.DEBUG if args.verbose else |
| logging.INFO) |
| self.log.addHandler(self.console_handler) |
| self.log.debug('Checking tests: %s', ', '.join(self.driver.tests)) |
| self.requirements = [ |
| self._name_matches_benchmark_naming_convention, |
| self._name_is_at_most_40_chars_long, |
| self._no_setup_overhead, |
| self._reasonable_setup_time, |
| self._optimized_runtime_in_range, |
| self._constant_memory_use |
| ] |
| |
| def __del__(self): |
| """Close log handlers on exit.""" |
| for handler in list(self.log.handlers): |
| handler.close() |
| self.log.removeHandler(self.console_handler) |
| |
| benchmark_naming_convention_re = re.compile(r'[A-Z][a-zA-Z0-9\-.!?]+') |
| camel_humps_re = re.compile(r'[a-z][A-Z]') |
| |
| @staticmethod |
| def _name_matches_benchmark_naming_convention(measurements): |
| name = measurements['name'] |
| match = BenchmarkDoctor.benchmark_naming_convention_re.match(name) |
| matched = match.group(0) if match else '' |
| composite_words = len(BenchmarkDoctor.camel_humps_re.findall(name)) + 1 |
| |
| if name != matched: |
| BenchmarkDoctor.log_naming.error( |
| "'%s' name doesn't conform to benchmark naming convention.", |
| name) |
| BenchmarkDoctor.log_naming.info( |
| 'See http://bit.ly/BenchmarkNaming') |
| |
| if composite_words > 4: |
| BenchmarkDoctor.log_naming.warning( |
| "'%s' name is composed of %d words.", name, composite_words) |
| BenchmarkDoctor.log_naming.info( |
| "Split '%s' name into dot-separated groups and variants. " |
| "See http://bit.ly/BenchmarkNaming", name) |
| |
| @staticmethod |
| def _name_is_at_most_40_chars_long(measurements): |
| name = measurements['name'] |
| |
| if len(name) > 40: |
| BenchmarkDoctor.log_naming.error( |
| "'%s' name is %d characters long.", name, len(name)) |
| BenchmarkDoctor.log_naming.info( |
| 'Benchmark name should not be longer than 40 characters.') |
| |
| @staticmethod |
| def _select(measurements, num_iters=None, opt_level='O'): |
| prefix = measurements['name'] + ' ' + opt_level |
| prefix += '' if num_iters is None else (' i' + str(num_iters)) |
| return [series for name, series in measurements.items() |
| if name.startswith(prefix)] |
| |
| @staticmethod |
| def _optimized_runtime_in_range(measurements): |
| name = measurements['name'] |
| setup, ratio = BenchmarkDoctor._setup_overhead(measurements) |
| setup = 0 if ratio < 0.05 else setup |
| runtime = min( |
| [(result.samples.min - correction) for i_series in |
| [BenchmarkDoctor._select(measurements, num_iters=i) |
| for correction in [(setup / i) for i in [1, 2]] |
| ] for result in i_series]) |
| |
| threshold = 1000 |
| if threshold < runtime: |
| log = (BenchmarkDoctor.log_runtime.warning if runtime < 10000 else |
| BenchmarkDoctor.log_runtime.error) |
| caveat = '' if setup == 0 else ' (excluding the setup overhead)' |
| log("'%s' execution took at least %d μs%s.", name, runtime, caveat) |
| |
| def factor(base): # suitable divisior that's integer power of base |
| return int(pow(base, math.ceil( |
| math.log(runtime / float(threshold), base)))) |
| |
| BenchmarkDoctor.log_runtime.info( |
| "Decrease the workload of '%s' by a factor of %d (%d), to be " |
| "less than %d μs.", name, factor(2), factor(10), threshold) |
| |
| threshold = 20 |
| if runtime < threshold: |
| log = (BenchmarkDoctor.log_runtime.error if runtime == 0 else |
| BenchmarkDoctor.log_runtime.warning) |
| log("'%s' execution took %d μs.", name, runtime) |
| |
| BenchmarkDoctor.log_runtime.info( |
| "Ensure the workload of '%s' has a properly measurable size" |
| " (runtime > %d μs) and is not eliminated by the compiler (use" |
| " `blackHole` function if necessary)." if runtime == 0 else |
| "Increase the workload of '%s' to be more than %d μs.", |
| name, threshold) |
| |
| @staticmethod |
| def _setup_overhead(measurements): |
| select = BenchmarkDoctor._select |
| ti1, ti2 = [float(min(mins)) for mins in |
| [[result.samples.min for result in i_series] |
| for i_series in |
| [select(measurements, num_iters=i) for i in [1, 2]]]] |
| setup = (int(round(2.0 * (ti1 - ti2))) if ti2 > 20 # limit of accuracy |
| else 0) |
| ratio = (setup / ti1) if ti1 > 0 else 0 |
| return (setup, ratio) |
| |
| @staticmethod |
| def _no_setup_overhead(measurements): |
| setup, ratio = BenchmarkDoctor._setup_overhead(measurements) |
| if ratio > 0.05: |
| BenchmarkDoctor.log_runtime.error( |
| "'%s' has setup overhead of %d μs (%.1f%%).", |
| measurements['name'], setup, round((100 * ratio), 1)) |
| BenchmarkDoctor.log_runtime.info( |
| 'Move initialization of benchmark data to the `setUpFunction` ' |
| 'registered in `BenchmarkInfo`.') |
| |
| @staticmethod |
| def _reasonable_setup_time(measurements): |
| setup = min([result.setup |
| for result in BenchmarkDoctor._select(measurements)]) |
| if 200000 < setup: # 200 ms |
| BenchmarkDoctor.log_runtime.error( |
| "'%s' setup took at least %d μs.", |
| measurements['name'], setup) |
| BenchmarkDoctor.log_runtime.info( |
| 'The `setUpFunction` should take no more than 200 ms.') |
| |
| @staticmethod |
| def _constant_memory_use(measurements): |
| select = BenchmarkDoctor._select |
| (min_i1, max_i1), (min_i2, max_i2) = [ |
| (min(memory_use), max(memory_use)) for memory_use in |
| [[r.mem_pages for r in i_series] for i_series in |
| [select(measurements, num_iters=i) for i in |
| [1, 2]]]] |
| range_i1, range_i2 = max_i1 - min_i1, max_i2 - min_i2 |
| normal_range = 15 # pages |
| name = measurements['name'] |
| more_info = False |
| |
| if abs(min_i1 - min_i2) > max(range_i1, range_i2, normal_range): |
| more_info = True |
| BenchmarkDoctor.log_memory.error( |
| "'%s' varies the memory footprint of the base " |
| "workload depending on the `num-iters`.", name) |
| |
| if max(range_i1, range_i2) > normal_range: |
| more_info = True |
| BenchmarkDoctor.log_memory.warning( |
| "'%s' has very wide range of memory used between " |
| "independent, repeated measurements.", name) |
| |
| if more_info: |
| BenchmarkDoctor.log_memory.info( |
| "'%s' mem_pages [i1, i2]: min=[%d, %d] 𝚫=%d R=[%d, %d]", |
| name, |
| *[min_i1, min_i2, abs(min_i1 - min_i2), range_i1, range_i2]) |
| |
| @staticmethod |
| def _adjusted_1s_samples(runtime): |
| u"""Return sample count that can be taken in approximately 1 second. |
| |
| Based on the runtime (μs) of one sample taken with num-iters=1. |
| """ |
| if runtime == 0: |
| return 2 |
| s = 1000000 / float(runtime) # samples for 1s run |
| s = int(pow(2, round(math.log(s, 2)))) # rounding to power of 2 |
| return s if s > 2 else 2 # always take at least 2 samples |
| |
| def measure(self, benchmark): |
| """Measure benchmark with varying iterations and optimization levels. |
| |
| Returns a dictionary with benchmark name and `PerformanceTestResult`s. |
| """ |
| self.log.debug('Calibrating num-samples for {0}:'.format(benchmark)) |
| r = self.driver.run(benchmark, num_samples=3, num_iters=1, |
| verbose=True) # calibrate |
| num_samples = self._adjusted_1s_samples(r.samples.min) |
| |
| def capped(s): |
| return min(s, 200) |
| run_args = [(capped(num_samples), 1), (capped(num_samples / 2), 2)] |
| opts = self.driver.args.optimization |
| opts = opts if isinstance(opts, list) else [opts] |
| self.log.debug( |
| 'Runtime {0} μs yields {1} adjusted samples per second.'.format( |
| r.samples.min, num_samples)) |
| self.log.debug( |
| 'Measuring {0}, 5 x i1 ({1} samples), 5 x i2 ({2} samples)'.format( |
| benchmark, run_args[0][0], run_args[1][0])) |
| |
| measurements = dict( |
| [('{0} {1} i{2}{3}'.format(benchmark, o, i, suffix), |
| self.driver.run(benchmark, num_samples=s, num_iters=i, |
| verbose=True, measure_memory=True)) |
| for o in opts |
| for s, i in run_args |
| for suffix in list('abcde') |
| ] |
| ) |
| measurements['name'] = benchmark |
| return measurements |
| |
| def analyze(self, benchmark_measurements): |
| """Analyze whether benchmark fullfills all requirtements.""" |
| self.log.debug('Analyzing %s', benchmark_measurements['name']) |
| for rule in self.requirements: |
| rule(benchmark_measurements) |
| |
| def check(self): |
| """Measure and analyse all enabled tests.""" |
| for test in self.driver.tests: |
| self.analyze(self.measure(test)) |
| |
| @staticmethod |
| def run_check(args): |
| """Validate benchmarks conform to health rules, report violations.""" |
| doctor = BenchmarkDoctor(args) |
| doctor.check() |
| # TODO non-zero error code when errors are logged |
| # See https://stackoverflow.com/a/31142078/41307 |
| return 0 |
| |
| |
| def format_name(log_path): |
| """Return the filename and directory for a log file.""" |
| return '/'.join(log_path.split('/')[-2:]) |
| |
| |
| def compare_logs(compare_script, new_log, old_log, log_dir, opt): |
| """Return diff of log files at paths `new_log` and `old_log`.""" |
| print('Comparing %s %s ...' % (format_name(old_log), format_name(new_log))) |
| subprocess.call([compare_script, '--old-file', old_log, |
| '--new-file', new_log, '--format', 'markdown', |
| '--output', os.path.join(log_dir, 'latest_compare_{0}.md' |
| .format(opt))]) |
| |
| |
| def compare(args): |
| log_dir = args.log_dir |
| compare_script = args.compare_script |
| baseline_branch = args.baseline_branch |
| current_branch = \ |
| BenchmarkDriver(args, tests=[''])._git('rev-parse --abbrev-ref HEAD') |
| current_branch_dir = os.path.join(log_dir, current_branch) |
| baseline_branch_dir = os.path.join(log_dir, baseline_branch) |
| |
| if current_branch != baseline_branch and \ |
| not os.path.isdir(baseline_branch_dir): |
| print(('Unable to find benchmark logs for {baseline_branch} branch. ' + |
| 'Set a baseline benchmark log by passing --benchmark to ' + |
| 'build-script while on {baseline_branch} branch.') |
| .format(baseline_branch=baseline_branch)) |
| return 1 |
| |
| recent_logs = {} |
| for branch_dir in [current_branch_dir, baseline_branch_dir]: |
| for opt in ['O', 'Onone']: |
| recent_logs[os.path.basename(branch_dir) + '_' + opt] = sorted( |
| glob.glob(os.path.join( |
| branch_dir, 'Benchmark_' + opt + '-*.log')), |
| key=os.path.getctime, reverse=True) |
| |
| if current_branch == baseline_branch: |
| if len(recent_logs[baseline_branch + '_O']) > 1 and \ |
| len(recent_logs[baseline_branch + '_Onone']) > 1: |
| compare_logs(compare_script, |
| recent_logs[baseline_branch + '_O'][0], |
| recent_logs[baseline_branch + '_O'][1], |
| log_dir, 'O') |
| compare_logs(compare_script, |
| recent_logs[baseline_branch + '_Onone'][0], |
| recent_logs[baseline_branch + '_Onone'][1], |
| log_dir, 'Onone') |
| else: |
| print(('{baseline_branch}/{baseline_branch} comparison ' + |
| 'skipped: no previous {baseline_branch} logs') |
| .format(baseline_branch=baseline_branch)) |
| else: |
| # TODO: Check for outdated baseline branch log |
| if len(recent_logs[current_branch + '_O']) == 0 or \ |
| len(recent_logs[current_branch + '_Onone']) == 0: |
| print('branch sanity failure: missing branch logs') |
| return 1 |
| |
| if len(recent_logs[current_branch + '_O']) == 1 or \ |
| len(recent_logs[current_branch + '_Onone']) == 1: |
| print('branch/branch comparison skipped: no previous branch logs') |
| else: |
| compare_logs(compare_script, |
| recent_logs[current_branch + '_O'][0], |
| recent_logs[current_branch + '_O'][1], |
| log_dir, 'O') |
| compare_logs(compare_script, |
| recent_logs[current_branch + '_Onone'][0], |
| recent_logs[current_branch + '_Onone'][1], |
| log_dir, 'Onone') |
| |
| if len(recent_logs[baseline_branch + '_O']) == 0 or \ |
| len(recent_logs[baseline_branch + '_Onone']) == 0: |
| print(('branch/{baseline_branch} failure: no {baseline_branch} ' + |
| 'logs') |
| .format(baseline_branch=baseline_branch)) |
| return 1 |
| else: |
| compare_logs(compare_script, |
| recent_logs[current_branch + '_O'][0], |
| recent_logs[baseline_branch + '_O'][0], |
| log_dir, 'O') |
| compare_logs(compare_script, |
| recent_logs[current_branch + '_Onone'][0], |
| recent_logs[baseline_branch + '_Onone'][0], |
| log_dir, 'Onone') |
| |
| # TODO: Fail on large regressions |
| |
| return 0 |
| |
| |
| def positive_int(value): |
| """Verify the value is a positive integer.""" |
| ivalue = int(value) |
| if not (ivalue > 0): |
| raise ValueError |
| return ivalue |
| |
| |
| def parse_args(args): |
| """Parse command line arguments and set default values.""" |
| parser = argparse.ArgumentParser( |
| epilog='Example: ./Benchmark_Driver run -i 5 -f Prefix -f .*Suffix.*' |
| ) |
| subparsers = parser.add_subparsers( |
| title='Swift benchmark driver commands', |
| help='See COMMAND -h for additional arguments', metavar='COMMAND') |
| |
| shared_benchmarks_parser = argparse.ArgumentParser(add_help=False) |
| benchmarks_group = shared_benchmarks_parser.add_mutually_exclusive_group() |
| benchmarks_group.add_argument( |
| 'benchmarks', |
| default=[], |
| help='benchmark to run (default: all)', nargs='*', metavar="BENCHMARK") |
| benchmarks_group.add_argument( |
| '-f', '--filter', dest='filters', action='append', |
| help='run all tests whose name match regular expression PATTERN, ' + |
| 'multiple filters are supported', metavar="PATTERN") |
| shared_benchmarks_parser.add_argument( |
| '-t', '--tests', |
| help='directory containing Benchmark_O{,none,size} ' + |
| '(default: DRIVER_DIR)', |
| default=DRIVER_DIR) |
| shared_benchmarks_parser.add_argument( |
| '-o', '--optimization', |
| metavar='OPT', |
| choices=['O', 'Onone', 'Osize'], |
| help='optimization level to use: {O,Onone,Osize}, (default: O)', |
| default='O') |
| |
| run_parser = subparsers.add_parser( |
| 'run', |
| help='Run benchmarks and output results to stdout', |
| parents=[shared_benchmarks_parser]) |
| run_parser.add_argument( |
| '-i', '--independent-samples', |
| help='number of times to run each test (default: 1)', |
| type=positive_int, default=1) |
| run_parser.add_argument( |
| '--output-dir', |
| help='log results to directory (default: no logging)') |
| run_parser.add_argument( |
| '--swift-repo', |
| help='absolute path to the Swift source repository') |
| run_parser.set_defaults(func=BenchmarkDriver.run_benchmarks) |
| |
| check_parser = subparsers.add_parser( |
| 'check', |
| help='', |
| parents=[shared_benchmarks_parser]) |
| check_group = check_parser.add_mutually_exclusive_group() |
| check_group.add_argument( |
| '-v', '--verbose', action='store_true', |
| help='show more details during benchmark analysis') |
| check_group.add_argument( |
| '-md', '--markdown', action='store_true', |
| help='format report as Markdown table') |
| check_parser.set_defaults(func=BenchmarkDoctor.run_check) |
| |
| compare_parser = subparsers.add_parser( |
| 'compare', |
| help='Compare benchmark results') |
| compare_parser.add_argument( |
| '--log-dir', required=True, |
| help='directory containing benchmark logs') |
| compare_parser.add_argument( |
| '--swift-repo', required=True, |
| help='absolute path to the Swift source repository') |
| compare_parser.add_argument( |
| '--compare-script', required=True, |
| help='absolute path to compare script') |
| compare_parser.add_argument( |
| '--baseline-branch', default='master', |
| help='attempt to compare results to baseline results for specified ' |
| 'branch (default: master)') |
| compare_parser.set_defaults(func=compare) |
| |
| return parser.parse_args(args) |
| |
| |
| def main(): |
| """Parse command line arguments and execute the specified COMMAND.""" |
| args = parse_args(sys.argv[1:]) |
| return args.func(args) |
| |
| |
| if __name__ == '__main__': |
| exit(main()) |