| #!/usr/bin/env python |
| # -*- coding: utf-8 -*- |
| |
| # ===--- run_smoke_bench -------------------------------------------------===// |
| # |
| # This source file is part of the Swift.org open source project |
| # |
| # Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors |
| # Licensed under Apache License v2.0 with Runtime Library Exception |
| # |
| # See https://swift.org/LICENSE.txt for license information |
| # See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors |
| # |
| # ===---------------------------------------------------------------------===// |
| # |
| # Performs a very fast check which benchmarks regressed and improved. |
| # |
| # Initially runs the benchmars with a low sample count and just re-runs those |
| # benchmarks which differ. |
| # Also reports code size differences. |
| # |
| # ===---------------------------------------------------------------------===// |
| |
| from __future__ import print_function |
| |
| import argparse |
| import glob |
| import os |
| import subprocess |
| import sys |
| |
| from compare_perf_tests import LogParser, TestComparator, create_report |
| |
| from imp import load_source |
| # import Benchmark_Driver # doesn't work because it misses '.py' extension |
| Benchmark_Driver = load_source( |
| 'Benchmark_Driver', os.path.join(os.path.dirname( |
| os.path.abspath(__file__)), 'Benchmark_Driver')) |
| # from Benchmark_Driver import BenchmarkDriver, BenchmarkDoctor, ... |
| BenchmarkDriver = Benchmark_Driver.BenchmarkDriver |
| BenchmarkDoctor = Benchmark_Driver.BenchmarkDoctor |
| MarkdownReportHandler = Benchmark_Driver.MarkdownReportHandler |
| |
| VERBOSE = False |
| |
| |
| class DriverArgs(object): |
| """Arguments for BenchmarkDriver.""" |
| def __init__(self, tests, optimization='O'): |
| """Initialize with path to the build-dir and optimization level.""" |
| self.benchmarks = None |
| self.filters = None |
| self.tests = os.path.join(tests, 'bin') |
| self.optimization = optimization |
| |
| |
| def log(msg): |
| print(msg) |
| sys.stdout.flush() |
| |
| |
| def main(): |
| global VERBOSE |
| argparser = argparse.ArgumentParser() |
| argparser.add_argument( |
| '-verbose', action='store_true', |
| help='print verbose messages') |
| argparser.add_argument( |
| '-O', action='append_const', const='O', dest='opt_levels', |
| help='test -O benchmarks') |
| argparser.add_argument( |
| '-Osize', action='append_const', const='Osize', dest='opt_levels', |
| help='test -Osize benchmarks') |
| argparser.add_argument( |
| '-Onone', action='append_const', const='Onone', dest='opt_levels', |
| help='test -Onone benchmarks (except code size)') |
| argparser.add_argument( |
| '-skip-code-size', action='store_true', |
| help="Don't report code size differences") |
| argparser.add_argument( |
| '-skip-performance', action='store_true', |
| help="Don't report performance differences") |
| argparser.add_argument( |
| '-skip-check-added', action='store_true', |
| help="Don't validate newly added benchmarks") |
| argparser.add_argument( |
| '-o', type=str, |
| help='In addition to stdout, write the results into a markdown file') |
| argparser.add_argument( |
| '-threshold', type=float, |
| help='The performance threshold in % which triggers a re-run', |
| default=5) |
| argparser.add_argument( |
| '-num-samples', type=int, |
| help='The (minimum) number of samples to run', default=3) |
| argparser.add_argument( |
| '-platform', type=str, |
| help='The benchmark build platform', default='macosx') |
| argparser.add_argument( |
| 'oldbuilddir', nargs=1, type=str, |
| help='old benchmark build directory') |
| argparser.add_argument( |
| 'newbuilddir', nargs=1, type=str, |
| help='new benchmark build directory') |
| args = argparser.parse_args() |
| VERBOSE = args.verbose |
| |
| return test_opt_levels(args) |
| |
| |
| def test_opt_levels(args): |
| output_file = None |
| if args.o: |
| output_file = open(args.o, 'w') |
| |
| changes = False |
| for opt_level in args.opt_levels or ['O', 'Osize', 'Onone']: |
| log('####### Testing optimization level -' + opt_level + ' #######') |
| if not args.skip_performance: |
| if test_performance(opt_level, args.oldbuilddir[0], |
| args.newbuilddir[0], |
| float(args.threshold) / 100, args.num_samples, |
| output_file): |
| changes = True |
| |
| # There is no point in reporting code size for Onone. |
| if not args.skip_code_size and opt_level != 'Onone': |
| if report_code_size(opt_level, args.oldbuilddir[0], |
| args.newbuilddir[0], |
| args.platform, output_file): |
| changes = True |
| |
| if not args.skip_code_size: |
| if report_code_size('swiftlibs', args.oldbuilddir[0], |
| args.newbuilddir[0], |
| args.platform, output_file): |
| changes = True |
| |
| if not args.skip_check_added: |
| check_added(args, output_file) |
| |
| if output_file: |
| if changes: |
| output_file.write(get_info_text()) |
| else: |
| output_file.write("### No performance and code size changes") |
| output_file.close() |
| return 0 |
| |
| |
| def measure(driver, tests, i): |
| """Log and measure samples of the tests with the given driver. |
| |
| Collect increasing number of samples, depending on the iteration. |
| """ |
| num_samples = min(i + 3, 10) |
| msg = ' Iteration {0} for {1}: num samples = {2}, '.format( |
| i, driver.args.tests, num_samples) |
| msg += ('running all tests' if driver.all_tests == tests else |
| 're-testing {0} tests'.format(len(tests))) |
| log(msg) |
| driver.tests = tests |
| return driver.run(num_samples=num_samples, sample_time=0.0025) |
| |
| |
| def merge(results, other_results): |
| """"Merge the other PreformanceTestResults into the first dictionary.""" |
| for test, result in other_results.items(): |
| results[test].merge(result) |
| return results |
| |
| |
| def test_performance(opt_level, old_dir, new_dir, threshold, num_samples, |
| output_file): |
| """Detect performance changes in benchmarks. |
| |
| Start fast with few samples per benchmark and gradually spend more time |
| gathering more precise measurements of the change candidates. |
| """ |
| |
| i, unchanged_length_count = 0, 0 |
| old, new = [BenchmarkDriver(DriverArgs(dir, optimization=opt_level)) |
| for dir in [old_dir, new_dir]] |
| results = [measure(driver, driver.tests, i) for driver in [old, new]] |
| tests = TestComparator(results[0], results[1], threshold) |
| changed = tests.decreased + tests.increased |
| |
| while len(changed) > 0 and unchanged_length_count < 5: |
| i += 1 |
| if VERBOSE: |
| log(' test again: ' + str([test.name for test in changed])) |
| results = [merge(the_results, |
| measure(driver, [test.name for test in changed], i)) |
| for the_results, driver in zip(results, [old, new])] |
| tests = TestComparator(results[0], results[1], threshold) |
| changed = tests.decreased + tests.increased |
| |
| if len(old.tests) == len(changed): |
| unchanged_length_count += 1 |
| else: |
| unchanged_length_count = 0 |
| |
| log('') |
| return report_results("Performance: -" + opt_level, None, None, |
| threshold * 1.4, output_file, *results) |
| |
| |
| def get_results(bench_dir, opt_level, num_samples, to_test): |
| try: |
| exe = os.path.join(bench_dir, 'bin', 'Benchmark_' + opt_level) |
| args = [exe, '--num-samples=' + str(num_samples), |
| '--sample-time=0.0025'] |
| if to_test: |
| args += to_test |
| env = {'DYLD_LIBRARY_PATH': os.path.join(bench_dir, 'lib', 'swift', |
| 'macos'), |
| 'SWIFT_DETERMINISTIC_HASHING': '1'} |
| output = subprocess.check_output(args, env=env) |
| except subprocess.CalledProcessError as e: |
| sys.stderr.write(e.output) |
| sys.stderr.flush() |
| return sys.exit(e.returncode) |
| else: |
| return output |
| |
| |
| def report_code_size(opt_level, old_dir, new_dir, platform, output_file): |
| if opt_level == 'swiftlibs': |
| files = glob.glob(os.path.join(old_dir, 'lib', 'swift', platform, |
| '*.dylib')) |
| else: |
| files = glob.glob(os.path.join(old_dir, |
| opt_level + '-*' + platform + '*', |
| '*.o')) |
| |
| idx = 1 |
| old_lines = "" |
| new_lines = "" |
| for oldfile in files: |
| newfile = oldfile.replace(old_dir, new_dir, 1) |
| if os.path.isfile(newfile): |
| oldsize = get_codesize(oldfile) |
| newsize = get_codesize(newfile) |
| bname = os.path.basename(oldfile) |
| |
| def result_line(value): |
| v = ',' + str(value) |
| return (str(idx) + ',' + bname + ',1' + (v * 3) + |
| ',0' + v + '\n') |
| |
| old_lines += result_line(oldsize) |
| new_lines += result_line(newsize) |
| idx += 1 |
| |
| return report_results("Code size: -" + opt_level, |
| old_lines, new_lines, 0.01, output_file) |
| |
| |
| def get_codesize(filename): |
| output = subprocess.check_output(['size', filename]).splitlines() |
| header_line = output[0] |
| data_line = output[1] |
| if header_line.find('__TEXT') != 0: |
| sys.exit('unexpected output from size command:\n' + output) |
| return int(data_line.split('\t')[0]) |
| |
| |
| def report_results(title, old_lines, new_lines, threshold, output_file, |
| old_results=None, new_results=None): |
| old_results = old_results or LogParser.results_from_string(old_lines) |
| new_results = new_results or LogParser.results_from_string(new_lines) |
| |
| print("------- " + title + " -------") |
| print(create_report(old_results, new_results, threshold, 'git')) |
| |
| if output_file: |
| report = create_report(old_results, new_results, threshold, 'markdown') |
| if report != "": |
| output_file.write("### " + title + "\n") |
| output_file.write(report) |
| output_file.write("\n") |
| return True |
| return False |
| |
| |
| def get_info_text(): |
| text = """ |
| <details> |
| <summary><strong>How to read the data</strong></summary> |
| The tables contain differences in performance which are larger than 8% and |
| differences in code size which are larger than 1%. |
| |
| If you see any unexpected regressions, you should consider fixing the |
| regressions before you merge the PR. |
| |
| **Noise**: Sometimes the performance results (not code size!) contain false |
| alarms. Unexpected regressions which are marked with '(?)' are probably noise. |
| If you see regressions which you cannot explain you can try to run the |
| benchmarks again. If regressions still show up, please consult with the |
| performance team (@eeckstein). |
| </details> |
| |
| <details> |
| <summary><strong>Hardware Overview</strong></summary> |
| |
| """ |
| po = subprocess.check_output(['system_profiler', 'SPHardwareDataType']) |
| for line in po.splitlines(): |
| selection = ['Model Name', |
| 'Model Identifier', |
| 'Processor Name', |
| 'Processor Speed', |
| 'Number of Processors', |
| 'Total Number of Cores', |
| 'L2 Cache', |
| 'L3 Cache', |
| 'Memory:'] |
| |
| if any(s in line for s in selection): |
| text += line + "\n" |
| |
| text += """ |
| </details>""" |
| return text |
| |
| |
| def check_added(args, output_file=None): |
| old = BenchmarkDriver(DriverArgs(args.oldbuilddir[0])) |
| new = BenchmarkDriver(DriverArgs(args.newbuilddir[0])) |
| added = set(new.tests).difference(set(old.tests)) |
| new.tests = list(added) |
| doctor = BenchmarkDoctor(args, driver=new) |
| if added and output_file: |
| doctor.log.addHandler(MarkdownReportHandler(output_file)) |
| doctor.check() |
| |
| |
| if __name__ == '__main__': |
| sys.exit(main()) |