|  | #!/usr/bin/env python3 | 
|  |  | 
|  | # type: ignore | 
|  |  | 
|  | """ | 
|  | compare.py - versatile benchmark output compare tool | 
|  | """ | 
|  |  | 
|  | import argparse | 
|  | import json | 
|  | import os | 
|  | import sys | 
|  | import unittest | 
|  | from argparse import ArgumentParser | 
|  |  | 
|  | import gbench | 
|  | from gbench import report, util | 
|  |  | 
|  |  | 
|  | def check_inputs(in1, in2, flags): | 
|  | """ | 
|  | Perform checking on the user provided inputs and diagnose any abnormalities | 
|  | """ | 
|  | in1_kind, in1_err = util.classify_input_file(in1) | 
|  | in2_kind, in2_err = util.classify_input_file(in2) | 
|  | output_file = util.find_benchmark_flag("--benchmark_out=", flags) | 
|  | output_type = util.find_benchmark_flag("--benchmark_out_format=", flags) | 
|  | if ( | 
|  | in1_kind == util.IT_Executable | 
|  | and in2_kind == util.IT_Executable | 
|  | and output_file | 
|  | ): | 
|  | print( | 
|  | ( | 
|  | "WARNING: '--benchmark_out=%s' will be passed to both " | 
|  | "benchmarks causing it to be overwritten" | 
|  | ) | 
|  | % output_file | 
|  | ) | 
|  | if in1_kind == util.IT_JSON and in2_kind == util.IT_JSON: | 
|  | # When both sides are JSON the only supported flag is | 
|  | # --benchmark_filter= | 
|  | for flag in util.remove_benchmark_flags("--benchmark_filter=", flags): | 
|  | print( | 
|  | "WARNING: passing %s has no effect since both " | 
|  | "inputs are JSON" % flag | 
|  | ) | 
|  | if output_type is not None and output_type != "json": | 
|  | print( | 
|  | ( | 
|  | "ERROR: passing '--benchmark_out_format=%s' to 'compare.py`" | 
|  | " is not supported." | 
|  | ) | 
|  | % output_type | 
|  | ) | 
|  | sys.exit(1) | 
|  |  | 
|  |  | 
|  | def create_parser(): | 
|  | parser = ArgumentParser( | 
|  | description="versatile benchmark output compare tool" | 
|  | ) | 
|  |  | 
|  | parser.add_argument( | 
|  | "-a", | 
|  | "--display_aggregates_only", | 
|  | dest="display_aggregates_only", | 
|  | action="store_true", | 
|  | help="If there are repetitions, by default, we display everything - the" | 
|  | " actual runs, and the aggregates computed. Sometimes, it is " | 
|  | "desirable to only view the aggregates. E.g. when there are a lot " | 
|  | "of repetitions. Do note that only the display is affected. " | 
|  | "Internally, all the actual runs are still used, e.g. for U test.", | 
|  | ) | 
|  |  | 
|  | parser.add_argument( | 
|  | "--no-color", | 
|  | dest="color", | 
|  | default=True, | 
|  | action="store_false", | 
|  | help="Do not use colors in the terminal output", | 
|  | ) | 
|  |  | 
|  | parser.add_argument( | 
|  | "-d", | 
|  | "--dump_to_json", | 
|  | dest="dump_to_json", | 
|  | help="Additionally, dump benchmark comparison output to this file in JSON format.", | 
|  | ) | 
|  |  | 
|  | utest = parser.add_argument_group() | 
|  | utest.add_argument( | 
|  | "--no-utest", | 
|  | dest="utest", | 
|  | default=True, | 
|  | action="store_false", | 
|  | help="The tool can do a two-tailed Mann-Whitney U test with the null hypothesis that it is equally likely that a randomly selected value from one sample will be less than or greater than a randomly selected value from a second sample.\nWARNING: requires **LARGE** (no less than {}) number of repetitions to be meaningful!\nThe test is being done by default, if at least {} repetitions were done.\nThis option can disable the U Test.".format( | 
|  | report.UTEST_OPTIMAL_REPETITIONS, report.UTEST_MIN_REPETITIONS | 
|  | ), | 
|  | ) | 
|  | alpha_default = 0.05 | 
|  | utest.add_argument( | 
|  | "--alpha", | 
|  | dest="utest_alpha", | 
|  | default=alpha_default, | 
|  | type=float, | 
|  | help=( | 
|  | "significance level alpha. if the calculated p-value is below this value, then the result is said to be statistically significant and the null hypothesis is rejected.\n(default: %0.4f)" | 
|  | ) | 
|  | % alpha_default, | 
|  | ) | 
|  |  | 
|  | subparsers = parser.add_subparsers( | 
|  | help="This tool has multiple modes of operation:", dest="mode" | 
|  | ) | 
|  |  | 
|  | parser_a = subparsers.add_parser( | 
|  | "benchmarks", | 
|  | help="The most simple use-case, compare all the output of these two benchmarks", | 
|  | ) | 
|  | baseline = parser_a.add_argument_group("baseline", "The benchmark baseline") | 
|  | baseline.add_argument( | 
|  | "test_baseline", | 
|  | metavar="test_baseline", | 
|  | type=argparse.FileType("r"), | 
|  | nargs=1, | 
|  | help="A benchmark executable or JSON output file", | 
|  | ) | 
|  | contender = parser_a.add_argument_group( | 
|  | "contender", "The benchmark that will be compared against the baseline" | 
|  | ) | 
|  | contender.add_argument( | 
|  | "test_contender", | 
|  | metavar="test_contender", | 
|  | type=argparse.FileType("r"), | 
|  | nargs=1, | 
|  | help="A benchmark executable or JSON output file", | 
|  | ) | 
|  | parser_a.add_argument( | 
|  | "benchmark_options", | 
|  | metavar="benchmark_options", | 
|  | nargs=argparse.REMAINDER, | 
|  | help="Arguments to pass when running benchmark executables", | 
|  | ) | 
|  |  | 
|  | parser_b = subparsers.add_parser( | 
|  | "filters", help="Compare filter one with the filter two of benchmark" | 
|  | ) | 
|  | baseline = parser_b.add_argument_group("baseline", "The benchmark baseline") | 
|  | baseline.add_argument( | 
|  | "test", | 
|  | metavar="test", | 
|  | type=argparse.FileType("r"), | 
|  | nargs=1, | 
|  | help="A benchmark executable or JSON output file", | 
|  | ) | 
|  | baseline.add_argument( | 
|  | "filter_baseline", | 
|  | metavar="filter_baseline", | 
|  | type=str, | 
|  | nargs=1, | 
|  | help="The first filter, that will be used as baseline", | 
|  | ) | 
|  | contender = parser_b.add_argument_group( | 
|  | "contender", "The benchmark that will be compared against the baseline" | 
|  | ) | 
|  | contender.add_argument( | 
|  | "filter_contender", | 
|  | metavar="filter_contender", | 
|  | type=str, | 
|  | nargs=1, | 
|  | help="The second filter, that will be compared against the baseline", | 
|  | ) | 
|  | parser_b.add_argument( | 
|  | "benchmark_options", | 
|  | metavar="benchmark_options", | 
|  | nargs=argparse.REMAINDER, | 
|  | help="Arguments to pass when running benchmark executables", | 
|  | ) | 
|  |  | 
|  | parser_c = subparsers.add_parser( | 
|  | "benchmarksfiltered", | 
|  | help="Compare filter one of first benchmark with filter two of the second benchmark", | 
|  | ) | 
|  | baseline = parser_c.add_argument_group("baseline", "The benchmark baseline") | 
|  | baseline.add_argument( | 
|  | "test_baseline", | 
|  | metavar="test_baseline", | 
|  | type=argparse.FileType("r"), | 
|  | nargs=1, | 
|  | help="A benchmark executable or JSON output file", | 
|  | ) | 
|  | baseline.add_argument( | 
|  | "filter_baseline", | 
|  | metavar="filter_baseline", | 
|  | type=str, | 
|  | nargs=1, | 
|  | help="The first filter, that will be used as baseline", | 
|  | ) | 
|  | contender = parser_c.add_argument_group( | 
|  | "contender", "The benchmark that will be compared against the baseline" | 
|  | ) | 
|  | contender.add_argument( | 
|  | "test_contender", | 
|  | metavar="test_contender", | 
|  | type=argparse.FileType("r"), | 
|  | nargs=1, | 
|  | help="The second benchmark executable or JSON output file, that will be compared against the baseline", | 
|  | ) | 
|  | contender.add_argument( | 
|  | "filter_contender", | 
|  | metavar="filter_contender", | 
|  | type=str, | 
|  | nargs=1, | 
|  | help="The second filter, that will be compared against the baseline", | 
|  | ) | 
|  | parser_c.add_argument( | 
|  | "benchmark_options", | 
|  | metavar="benchmark_options", | 
|  | nargs=argparse.REMAINDER, | 
|  | help="Arguments to pass when running benchmark executables", | 
|  | ) | 
|  |  | 
|  | return parser | 
|  |  | 
|  |  | 
|  | def main(): | 
|  | # Parse the command line flags | 
|  | parser = create_parser() | 
|  | args, unknown_args = parser.parse_known_args() | 
|  | if args.mode is None: | 
|  | parser.print_help() | 
|  | exit(1) | 
|  | assert not unknown_args | 
|  | benchmark_options = args.benchmark_options | 
|  |  | 
|  | if args.mode == "benchmarks": | 
|  | test_baseline = args.test_baseline[0].name | 
|  | test_contender = args.test_contender[0].name | 
|  | filter_baseline = "" | 
|  | filter_contender = "" | 
|  |  | 
|  | # NOTE: if test_baseline == test_contender, you are analyzing the stdev | 
|  |  | 
|  | description = "Comparing %s to %s" % (test_baseline, test_contender) | 
|  | elif args.mode == "filters": | 
|  | test_baseline = args.test[0].name | 
|  | test_contender = args.test[0].name | 
|  | filter_baseline = args.filter_baseline[0] | 
|  | filter_contender = args.filter_contender[0] | 
|  |  | 
|  | # NOTE: if filter_baseline == filter_contender, you are analyzing the | 
|  | # stdev | 
|  |  | 
|  | description = "Comparing %s to %s (from %s)" % ( | 
|  | filter_baseline, | 
|  | filter_contender, | 
|  | args.test[0].name, | 
|  | ) | 
|  | elif args.mode == "benchmarksfiltered": | 
|  | test_baseline = args.test_baseline[0].name | 
|  | test_contender = args.test_contender[0].name | 
|  | filter_baseline = args.filter_baseline[0] | 
|  | filter_contender = args.filter_contender[0] | 
|  |  | 
|  | # NOTE: if test_baseline == test_contender and | 
|  | # filter_baseline == filter_contender, you are analyzing the stdev | 
|  |  | 
|  | description = "Comparing %s (from %s) to %s (from %s)" % ( | 
|  | filter_baseline, | 
|  | test_baseline, | 
|  | filter_contender, | 
|  | test_contender, | 
|  | ) | 
|  | else: | 
|  | # should never happen | 
|  | print("Unrecognized mode of operation: '%s'" % args.mode) | 
|  | parser.print_help() | 
|  | exit(1) | 
|  |  | 
|  | check_inputs(test_baseline, test_contender, benchmark_options) | 
|  |  | 
|  | if args.display_aggregates_only: | 
|  | benchmark_options += ["--benchmark_display_aggregates_only=true"] | 
|  |  | 
|  | options_baseline = [] | 
|  | options_contender = [] | 
|  |  | 
|  | if filter_baseline and filter_contender: | 
|  | options_baseline = ["--benchmark_filter=%s" % filter_baseline] | 
|  | options_contender = ["--benchmark_filter=%s" % filter_contender] | 
|  |  | 
|  | # Run the benchmarks and report the results | 
|  | json1 = json1_orig = gbench.util.sort_benchmark_results( | 
|  | gbench.util.run_or_load_benchmark( | 
|  | test_baseline, benchmark_options + options_baseline | 
|  | ) | 
|  | ) | 
|  | json2 = json2_orig = gbench.util.sort_benchmark_results( | 
|  | gbench.util.run_or_load_benchmark( | 
|  | test_contender, benchmark_options + options_contender | 
|  | ) | 
|  | ) | 
|  |  | 
|  | # Now, filter the benchmarks so that the difference report can work | 
|  | if filter_baseline and filter_contender: | 
|  | replacement = "[%s vs. %s]" % (filter_baseline, filter_contender) | 
|  | json1 = gbench.report.filter_benchmark( | 
|  | json1_orig, filter_baseline, replacement | 
|  | ) | 
|  | json2 = gbench.report.filter_benchmark( | 
|  | json2_orig, filter_contender, replacement | 
|  | ) | 
|  |  | 
|  | diff_report = gbench.report.get_difference_report(json1, json2, args.utest) | 
|  | output_lines = gbench.report.print_difference_report( | 
|  | diff_report, | 
|  | args.display_aggregates_only, | 
|  | args.utest, | 
|  | args.utest_alpha, | 
|  | args.color, | 
|  | ) | 
|  | print(description) | 
|  | for ln in output_lines: | 
|  | print(ln) | 
|  |  | 
|  | # Optionally, diff and output to JSON | 
|  | if args.dump_to_json is not None: | 
|  | with open(args.dump_to_json, "w") as f_json: | 
|  | json.dump(diff_report, f_json, indent=1) | 
|  |  | 
|  |  | 
|  | class TestParser(unittest.TestCase): | 
|  | def setUp(self): | 
|  | self.parser = create_parser() | 
|  | testInputs = os.path.join( | 
|  | os.path.dirname(os.path.realpath(__file__)), "gbench", "Inputs" | 
|  | ) | 
|  | self.testInput0 = os.path.join(testInputs, "test1_run1.json") | 
|  | self.testInput1 = os.path.join(testInputs, "test1_run2.json") | 
|  |  | 
|  | def test_benchmarks_basic(self): | 
|  | parsed = self.parser.parse_args( | 
|  | ["benchmarks", self.testInput0, self.testInput1] | 
|  | ) | 
|  | self.assertFalse(parsed.display_aggregates_only) | 
|  | self.assertTrue(parsed.utest) | 
|  | self.assertEqual(parsed.mode, "benchmarks") | 
|  | self.assertEqual(parsed.test_baseline[0].name, self.testInput0) | 
|  | self.assertEqual(parsed.test_contender[0].name, self.testInput1) | 
|  | self.assertFalse(parsed.benchmark_options) | 
|  |  | 
|  | def test_benchmarks_basic_without_utest(self): | 
|  | parsed = self.parser.parse_args( | 
|  | ["--no-utest", "benchmarks", self.testInput0, self.testInput1] | 
|  | ) | 
|  | self.assertFalse(parsed.display_aggregates_only) | 
|  | self.assertFalse(parsed.utest) | 
|  | self.assertEqual(parsed.utest_alpha, 0.05) | 
|  | self.assertEqual(parsed.mode, "benchmarks") | 
|  | self.assertEqual(parsed.test_baseline[0].name, self.testInput0) | 
|  | self.assertEqual(parsed.test_contender[0].name, self.testInput1) | 
|  | self.assertFalse(parsed.benchmark_options) | 
|  |  | 
|  | def test_benchmarks_basic_display_aggregates_only(self): | 
|  | parsed = self.parser.parse_args( | 
|  | ["-a", "benchmarks", self.testInput0, self.testInput1] | 
|  | ) | 
|  | self.assertTrue(parsed.display_aggregates_only) | 
|  | self.assertTrue(parsed.utest) | 
|  | self.assertEqual(parsed.mode, "benchmarks") | 
|  | self.assertEqual(parsed.test_baseline[0].name, self.testInput0) | 
|  | self.assertEqual(parsed.test_contender[0].name, self.testInput1) | 
|  | self.assertFalse(parsed.benchmark_options) | 
|  |  | 
|  | def test_benchmarks_basic_with_utest_alpha(self): | 
|  | parsed = self.parser.parse_args( | 
|  | ["--alpha=0.314", "benchmarks", self.testInput0, self.testInput1] | 
|  | ) | 
|  | self.assertFalse(parsed.display_aggregates_only) | 
|  | self.assertTrue(parsed.utest) | 
|  | self.assertEqual(parsed.utest_alpha, 0.314) | 
|  | self.assertEqual(parsed.mode, "benchmarks") | 
|  | self.assertEqual(parsed.test_baseline[0].name, self.testInput0) | 
|  | self.assertEqual(parsed.test_contender[0].name, self.testInput1) | 
|  | self.assertFalse(parsed.benchmark_options) | 
|  |  | 
|  | def test_benchmarks_basic_without_utest_with_utest_alpha(self): | 
|  | parsed = self.parser.parse_args( | 
|  | [ | 
|  | "--no-utest", | 
|  | "--alpha=0.314", | 
|  | "benchmarks", | 
|  | self.testInput0, | 
|  | self.testInput1, | 
|  | ] | 
|  | ) | 
|  | self.assertFalse(parsed.display_aggregates_only) | 
|  | self.assertFalse(parsed.utest) | 
|  | self.assertEqual(parsed.utest_alpha, 0.314) | 
|  | self.assertEqual(parsed.mode, "benchmarks") | 
|  | self.assertEqual(parsed.test_baseline[0].name, self.testInput0) | 
|  | self.assertEqual(parsed.test_contender[0].name, self.testInput1) | 
|  | self.assertFalse(parsed.benchmark_options) | 
|  |  | 
|  | def test_benchmarks_with_remainder(self): | 
|  | parsed = self.parser.parse_args( | 
|  | ["benchmarks", self.testInput0, self.testInput1, "d"] | 
|  | ) | 
|  | self.assertFalse(parsed.display_aggregates_only) | 
|  | self.assertTrue(parsed.utest) | 
|  | self.assertEqual(parsed.mode, "benchmarks") | 
|  | self.assertEqual(parsed.test_baseline[0].name, self.testInput0) | 
|  | self.assertEqual(parsed.test_contender[0].name, self.testInput1) | 
|  | self.assertEqual(parsed.benchmark_options, ["d"]) | 
|  |  | 
|  | def test_benchmarks_with_remainder_after_doubleminus(self): | 
|  | parsed = self.parser.parse_args( | 
|  | ["benchmarks", self.testInput0, self.testInput1, "--", "e"] | 
|  | ) | 
|  | self.assertFalse(parsed.display_aggregates_only) | 
|  | self.assertTrue(parsed.utest) | 
|  | self.assertEqual(parsed.mode, "benchmarks") | 
|  | self.assertEqual(parsed.test_baseline[0].name, self.testInput0) | 
|  | self.assertEqual(parsed.test_contender[0].name, self.testInput1) | 
|  | self.assertEqual(parsed.benchmark_options, ["e"]) | 
|  |  | 
|  | def test_filters_basic(self): | 
|  | parsed = self.parser.parse_args(["filters", self.testInput0, "c", "d"]) | 
|  | self.assertFalse(parsed.display_aggregates_only) | 
|  | self.assertTrue(parsed.utest) | 
|  | self.assertEqual(parsed.mode, "filters") | 
|  | self.assertEqual(parsed.test[0].name, self.testInput0) | 
|  | self.assertEqual(parsed.filter_baseline[0], "c") | 
|  | self.assertEqual(parsed.filter_contender[0], "d") | 
|  | self.assertFalse(parsed.benchmark_options) | 
|  |  | 
|  | def test_filters_with_remainder(self): | 
|  | parsed = self.parser.parse_args( | 
|  | ["filters", self.testInput0, "c", "d", "e"] | 
|  | ) | 
|  | self.assertFalse(parsed.display_aggregates_only) | 
|  | self.assertTrue(parsed.utest) | 
|  | self.assertEqual(parsed.mode, "filters") | 
|  | self.assertEqual(parsed.test[0].name, self.testInput0) | 
|  | self.assertEqual(parsed.filter_baseline[0], "c") | 
|  | self.assertEqual(parsed.filter_contender[0], "d") | 
|  | self.assertEqual(parsed.benchmark_options, ["e"]) | 
|  |  | 
|  | def test_filters_with_remainder_after_doubleminus(self): | 
|  | parsed = self.parser.parse_args( | 
|  | ["filters", self.testInput0, "c", "d", "--", "f"] | 
|  | ) | 
|  | self.assertFalse(parsed.display_aggregates_only) | 
|  | self.assertTrue(parsed.utest) | 
|  | self.assertEqual(parsed.mode, "filters") | 
|  | self.assertEqual(parsed.test[0].name, self.testInput0) | 
|  | self.assertEqual(parsed.filter_baseline[0], "c") | 
|  | self.assertEqual(parsed.filter_contender[0], "d") | 
|  | self.assertEqual(parsed.benchmark_options, ["f"]) | 
|  |  | 
|  | def test_benchmarksfiltered_basic(self): | 
|  | parsed = self.parser.parse_args( | 
|  | ["benchmarksfiltered", self.testInput0, "c", self.testInput1, "e"] | 
|  | ) | 
|  | self.assertFalse(parsed.display_aggregates_only) | 
|  | self.assertTrue(parsed.utest) | 
|  | self.assertEqual(parsed.mode, "benchmarksfiltered") | 
|  | self.assertEqual(parsed.test_baseline[0].name, self.testInput0) | 
|  | self.assertEqual(parsed.filter_baseline[0], "c") | 
|  | self.assertEqual(parsed.test_contender[0].name, self.testInput1) | 
|  | self.assertEqual(parsed.filter_contender[0], "e") | 
|  | self.assertFalse(parsed.benchmark_options) | 
|  |  | 
|  | def test_benchmarksfiltered_with_remainder(self): | 
|  | parsed = self.parser.parse_args( | 
|  | [ | 
|  | "benchmarksfiltered", | 
|  | self.testInput0, | 
|  | "c", | 
|  | self.testInput1, | 
|  | "e", | 
|  | "f", | 
|  | ] | 
|  | ) | 
|  | self.assertFalse(parsed.display_aggregates_only) | 
|  | self.assertTrue(parsed.utest) | 
|  | self.assertEqual(parsed.mode, "benchmarksfiltered") | 
|  | self.assertEqual(parsed.test_baseline[0].name, self.testInput0) | 
|  | self.assertEqual(parsed.filter_baseline[0], "c") | 
|  | self.assertEqual(parsed.test_contender[0].name, self.testInput1) | 
|  | self.assertEqual(parsed.filter_contender[0], "e") | 
|  | self.assertEqual(parsed.benchmark_options[0], "f") | 
|  |  | 
|  | def test_benchmarksfiltered_with_remainder_after_doubleminus(self): | 
|  | parsed = self.parser.parse_args( | 
|  | [ | 
|  | "benchmarksfiltered", | 
|  | self.testInput0, | 
|  | "c", | 
|  | self.testInput1, | 
|  | "e", | 
|  | "--", | 
|  | "g", | 
|  | ] | 
|  | ) | 
|  | self.assertFalse(parsed.display_aggregates_only) | 
|  | self.assertTrue(parsed.utest) | 
|  | self.assertEqual(parsed.mode, "benchmarksfiltered") | 
|  | self.assertEqual(parsed.test_baseline[0].name, self.testInput0) | 
|  | self.assertEqual(parsed.filter_baseline[0], "c") | 
|  | self.assertEqual(parsed.test_contender[0].name, self.testInput1) | 
|  | self.assertEqual(parsed.filter_contender[0], "e") | 
|  | self.assertEqual(parsed.benchmark_options[0], "g") | 
|  |  | 
|  |  | 
|  | if __name__ == "__main__": | 
|  | # unittest.main() | 
|  | main() | 
|  |  | 
|  | # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 | 
|  | # kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off; | 
|  | # kate: indent-mode python; remove-trailing-spaces modified; |