| #!/usr/bin/env python |
| # -*- coding: utf-8 -*- |
| |
| # ===--- run_smoke_bench -------------------------------------------------===// |
| # |
| # This source file is part of the Swift.org open source project |
| # |
| # Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors |
| # Licensed under Apache License v2.0 with Runtime Library Exception |
| # |
| # See https://swift.org/LICENSE.txt for license information |
| # See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors |
| # |
| # ===---------------------------------------------------------------------===// |
| # |
| # Performs a very fast check which benchmarks regressed and improved. |
| # |
| # Initially runs the benchmars with a low sample count and just re-runs those |
| # benchmarks which differ. |
| # Also reports code size differences. |
| # |
| # ===---------------------------------------------------------------------===// |
| |
| from __future__ import print_function |
| |
| import argparse |
| import glob |
| import os |
| import subprocess |
| import sys |
| from imp import load_source |
| |
| from compare_perf_tests import LogParser, TestComparator, create_report |
| |
| # import Benchmark_Driver # doesn't work because it misses '.py' extension |
| Benchmark_Driver = load_source( |
| "Benchmark_Driver", |
| os.path.join(os.path.dirname(os.path.abspath(__file__)), "Benchmark_Driver"), |
| ) |
| # from Benchmark_Driver import BenchmarkDriver, BenchmarkDoctor, ... |
| BenchmarkDriver = Benchmark_Driver.BenchmarkDriver |
| BenchmarkDoctor = Benchmark_Driver.BenchmarkDoctor |
| MarkdownReportHandler = Benchmark_Driver.MarkdownReportHandler |
| |
| VERBOSE = False |
| |
| |
| class DriverArgs(object): |
| """Arguments for BenchmarkDriver.""" |
| |
| def __init__(self, tests, optimization="O", architecture=None): |
| """Initialize with path to the build-dir and optimization level.""" |
| self.benchmarks = None |
| self.filters = None |
| self.tests = os.path.join(tests, "bin") |
| self.optimization = optimization |
| self.architecture = architecture |
| |
| |
| def log(msg): |
| print(msg) |
| sys.stdout.flush() |
| |
| |
| def main(): |
| global VERBOSE |
| argparser = argparse.ArgumentParser() |
| argparser.add_argument( |
| "-verbose", action="store_true", help="print verbose messages" |
| ) |
| argparser.add_argument( |
| "-O", |
| action="append_const", |
| const="O", |
| dest="opt_levels", |
| help="test -O benchmarks", |
| ) |
| argparser.add_argument( |
| "-Osize", |
| action="append_const", |
| const="Osize", |
| dest="opt_levels", |
| help="test -Osize benchmarks", |
| ) |
| argparser.add_argument( |
| "-Onone", |
| action="append_const", |
| const="Onone", |
| dest="opt_levels", |
| help="test -Onone benchmarks (except code size)", |
| ) |
| argparser.add_argument( |
| "-skip-code-size", |
| action="store_true", |
| help="Don't report code size differences", |
| ) |
| argparser.add_argument( |
| "-skip-performance", |
| action="store_true", |
| help="Don't report performance differences", |
| ) |
| argparser.add_argument( |
| "-skip-check-added", |
| action="store_true", |
| help="Don't validate newly added benchmarks", |
| ) |
| argparser.add_argument( |
| "-o", |
| type=str, |
| help="In addition to stdout, write the results into a markdown file", |
| ) |
| argparser.add_argument( |
| "-threshold", |
| type=float, |
| help="The performance threshold in %% which triggers a re-run", |
| default=5, |
| ) |
| argparser.add_argument( |
| "-num-samples", |
| type=int, |
| help="The (minimum) number of samples to run", |
| default=3, |
| ) |
| argparser.add_argument( |
| "-num-reruns", |
| type=int, |
| help="The number of re-runs until it's assumed to be a real change", |
| default=8, |
| ) |
| argparser.add_argument( |
| "-arch", |
| type=str, |
| help="The architecture. The default is x86_64", |
| default="x86_64", |
| ) |
| argparser.add_argument( |
| "-platform", type=str, help="The benchmark build platform", default="macosx" |
| ) |
| argparser.add_argument( |
| "oldbuilddir", nargs=1, type=str, help="old benchmark build directory" |
| ) |
| argparser.add_argument( |
| "newbuilddir", nargs=1, type=str, help="new benchmark build directory" |
| ) |
| args = argparser.parse_args() |
| VERBOSE = args.verbose |
| |
| return test_opt_levels(args) |
| |
| |
| def test_opt_levels(args): |
| output_file = None |
| if args.o: |
| output_file = open(args.o, "w") |
| |
| changes = False |
| for opt_level in args.opt_levels or ["O", "Osize", "Onone"]: |
| log("####### Testing optimization level -" + opt_level + " #######") |
| if not args.skip_performance: |
| if test_performance( |
| opt_level, |
| args.oldbuilddir[0], |
| args.newbuilddir[0], |
| float(args.threshold) / 100, |
| args.num_samples, |
| args.num_reruns, |
| output_file, |
| args.arch |
| ): |
| changes = True |
| |
| # There is no point in reporting code size for Onone. |
| if not args.skip_code_size and opt_level != "Onone": |
| if report_code_size( |
| opt_level, |
| args.oldbuilddir[0], |
| args.newbuilddir[0], |
| args.arch, |
| args.platform, |
| output_file, |
| ): |
| changes = True |
| |
| if not args.skip_code_size: |
| if report_code_size( |
| "swiftlibs", |
| args.oldbuilddir[0], |
| args.newbuilddir[0], |
| args.arch, |
| args.platform, |
| output_file, |
| ): |
| changes = True |
| |
| if not args.skip_check_added: |
| check_added(args, output_file) |
| |
| if output_file: |
| if changes: |
| output_file.write(get_info_text()) |
| else: |
| output_file.write("### No performance and code size changes") |
| output_file.close() |
| return 0 |
| |
| |
| def measure(driver, tests, i): |
| """Log and measure samples of the tests with the given driver. |
| |
| Collect increasing number of samples, depending on the iteration. |
| """ |
| num_samples = min(i + 3, 10) |
| msg = " Iteration {0} for {1}: num samples = {2}, ".format( |
| i, driver.args.tests, num_samples |
| ) |
| msg += ( |
| "running all tests" |
| if driver.all_tests == tests |
| else "re-testing {0} tests".format(len(tests)) |
| ) |
| log(msg) |
| driver.tests = tests |
| return driver.run(num_samples=num_samples, sample_time=0.0025) |
| |
| |
| def merge(results, other_results): |
| """"Merge the other PreformanceTestResults into the first dictionary.""" |
| for test, result in other_results.items(): |
| results[test].merge(result) |
| return results |
| |
| |
| def test_performance( |
| opt_level, old_dir, new_dir, threshold, num_samples, num_reruns, |
| output_file, arch |
| ): |
| """Detect performance changes in benchmarks. |
| |
| Start fast with few samples per benchmark and gradually spend more time |
| gathering more precise measurements of the change candidates. |
| """ |
| |
| i, unchanged_length_count = 0, 0 |
| old, new = [ |
| BenchmarkDriver(DriverArgs(dir, optimization=opt_level, architecture=arch)) |
| for dir in [old_dir, new_dir] |
| ] |
| results = [measure(driver, driver.tests, i) for driver in [old, new]] |
| tests = TestComparator(results[0], results[1], threshold) |
| changed = tests.decreased + tests.increased |
| |
| while len(changed) > 0 and unchanged_length_count < num_reruns: |
| i += 1 |
| if VERBOSE: |
| log(" test again: " + str([test.name for test in changed])) |
| results = [ |
| merge(the_results, measure(driver, [test.name for test in changed], i)) |
| for the_results, driver in zip(results, [old, new]) |
| ] |
| tests = TestComparator(results[0], results[1], threshold) |
| changed = tests.decreased + tests.increased |
| |
| if len(old.tests) == len(changed): |
| unchanged_length_count += 1 |
| else: |
| unchanged_length_count = 0 |
| |
| log("") |
| return report_results( |
| "Performance: -" + opt_level, None, None, threshold * 1.4, output_file, *results |
| ) |
| |
| |
| def report_code_size(opt_level, old_dir, new_dir, architecture, platform, output_file): |
| if opt_level == "swiftlibs": |
| files = glob.glob(os.path.join(old_dir, "lib", "swift", platform, "*.dylib")) |
| else: |
| files = glob.glob( |
| os.path.join(old_dir, opt_level + "-" + architecture + "*" + |
| platform + "*", "*.o") |
| ) |
| |
| idx = 1 |
| old_lines = "" |
| new_lines = "" |
| for oldfile in files: |
| new_dir = os.path.join(new_dir, '') |
| newfile = oldfile.replace(old_dir, new_dir, 1) |
| if os.path.isfile(newfile): |
| oldsize = get_codesize(oldfile) |
| newsize = get_codesize(newfile) |
| bname = os.path.basename(oldfile) |
| |
| def result_line(value): |
| v = "," + str(value) |
| return str(idx) + "," + bname + ",1" + (v * 3) + ",0" + v + "\n" |
| |
| old_lines += result_line(oldsize) |
| new_lines += result_line(newsize) |
| idx += 1 |
| |
| return report_results( |
| "Code size: -" + opt_level, old_lines, new_lines, 0.01, output_file |
| ) |
| |
| |
| def get_codesize(filename): |
| output = subprocess.check_output(["size", filename]).splitlines() |
| header_line = output[0] |
| data_line = output[1] |
| if header_line.find("__TEXT") != 0: |
| sys.exit("unexpected output from size command:\n" + output) |
| return int(data_line.split("\t")[0]) |
| |
| |
| def report_results( |
| title, |
| old_lines, |
| new_lines, |
| threshold, |
| output_file, |
| old_results=None, |
| new_results=None, |
| ): |
| old_results = old_results or LogParser.results_from_string(old_lines) |
| new_results = new_results or LogParser.results_from_string(new_lines) |
| |
| print("------- " + title + " -------") |
| print(create_report(old_results, new_results, threshold, "git")) |
| |
| if output_file: |
| report = create_report(old_results, new_results, threshold, "markdown") |
| if report != "": |
| output_file.write("### " + title + "\n") |
| output_file.write(report) |
| output_file.write("\n") |
| return True |
| return False |
| |
| |
| def get_info_text(): |
| text = """ |
| <details> |
| <summary><strong>How to read the data</strong></summary> |
| The tables contain differences in performance which are larger than 8% and |
| differences in code size which are larger than 1%. |
| |
| If you see any unexpected regressions, you should consider fixing the |
| regressions before you merge the PR. |
| |
| **Noise**: Sometimes the performance results (not code size!) contain false |
| alarms. Unexpected regressions which are marked with '(?)' are probably noise. |
| If you see regressions which you cannot explain you can try to run the |
| benchmarks again. If regressions still show up, please consult with the |
| performance team (@eeckstein). |
| </details> |
| |
| <details> |
| <summary><strong>Hardware Overview</strong></summary> |
| |
| """ |
| po = subprocess.check_output(["system_profiler", "SPHardwareDataType"]) |
| for line in po.splitlines(): |
| selection = [ |
| "Model Name", |
| "Model Identifier", |
| "Processor Name", |
| "Processor Speed", |
| "Number of Processors", |
| "Total Number of Cores", |
| "L2 Cache", |
| "L3 Cache", |
| "Memory:", |
| ] |
| |
| if any(s in line for s in selection): |
| text += line + "\n" |
| |
| text += """ |
| </details>""" |
| return text |
| |
| |
| def check_added(args, output_file=None): |
| old = BenchmarkDriver(DriverArgs(args.oldbuilddir[0], architecture=args.arch)) |
| new = BenchmarkDriver(DriverArgs(args.newbuilddir[0], architecture=args.arch)) |
| added = set(new.tests).difference(set(old.tests)) |
| new.tests = list(added) |
| doctor = BenchmarkDoctor(args, driver=new) |
| if added and output_file: |
| doctor.log.addHandler(MarkdownReportHandler(output_file)) |
| doctor.check() |
| |
| |
| if __name__ == "__main__": |
| sys.exit(main()) |