| #!/usr/bin/python |
| # |
| # ==-- process-stats-dir - summarize one or more Swift -stats-output-dirs --==# |
| # |
| # This source file is part of the Swift.org open source project |
| # |
| # Copyright (c) 2014-2017 Apple Inc. and the Swift project authors |
| # Licensed under Apache License v2.0 with Runtime Library Exception |
| # |
| # See https://swift.org/LICENSE.txt for license information |
| # See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors |
| # |
| # ==------------------------------------------------------------------------==# |
| # |
| # This file processes the contents of one or more directories generated by |
| # `swiftc -stats-output-dir` and emits summary data, traces etc. for analysis. |
| |
| import argparse |
| import csv |
| import json |
| import os |
| import platform |
| import re |
| import sys |
| import time |
| import urllib |
| import urllib2 |
| from collections import namedtuple |
| from operator import attrgetter |
| from jobstats import load_stats_dir, merge_all_jobstats |
| |
| |
| # Passed args with 2-element remainder ["old", "new"], return a list of tuples |
| # of the form [(name, (oldstats, newstats))] where each name is a common subdir |
| # of each of "old" and "new", and the stats are those found in the respective |
| # dirs. |
| def load_paired_stats_dirs(args): |
| assert(len(args.remainder) == 2) |
| paired_stats = [] |
| (old, new) = args.remainder |
| for p in sorted(os.listdir(old)): |
| full_old = os.path.join(old, p) |
| full_new = os.path.join(new, p) |
| if not (os.path.exists(full_old) and os.path.isdir(full_old) and |
| os.path.exists(full_new) and os.path.isdir(full_new)): |
| continue |
| old_stats = load_stats_dir(full_old, **vars(args)) |
| new_stats = load_stats_dir(full_new, **vars(args)) |
| if len(old_stats) == 0 or len(new_stats) == 0: |
| continue |
| paired_stats.append((p, (old_stats, new_stats))) |
| return paired_stats |
| |
| |
| def write_catapult_trace(args): |
| allstats = [] |
| for path in args.remainder: |
| allstats += load_stats_dir(path, **vars(args)) |
| json.dump([s.to_catapult_trace_obj() for s in allstats], args.output) |
| |
| |
| def write_lnt_values(args): |
| for d in args.remainder: |
| stats = load_stats_dir(d, **vars(args)) |
| merged = merge_all_jobstats(stats, **vars(args)) |
| j = merged.to_lnt_test_obj(args) |
| if args.lnt_submit is None: |
| json.dump(j, args.output, indent=4) |
| else: |
| url = args.lnt_submit |
| print "\nsubmitting to LNT server: " + url |
| json_report = {'input_data': json.dumps(j), 'commit': '1'} |
| data = urllib.urlencode(json_report) |
| response_str = urllib2.urlopen(urllib2.Request(url, data)) |
| response = json.loads(response_str.read()) |
| print "### response:" |
| print response |
| if 'success' in response: |
| print "server response:\tSuccess" |
| else: |
| print "server response:\tError" |
| print "error:\t", response['error'] |
| sys.exit(1) |
| |
| |
| def show_paired_incrementality(args): |
| fieldnames = ["old_pct", "old_skip", |
| "new_pct", "new_skip", |
| "delta_pct", "delta_skip", |
| "name"] |
| out = csv.DictWriter(args.output, fieldnames, dialect='excel-tab') |
| out.writeheader() |
| |
| for (name, (oldstats, newstats)) in load_paired_stats_dirs(args): |
| olddriver = merge_all_jobstats((x for x in oldstats |
| if x.is_driver_job()), **vars(args)) |
| newdriver = merge_all_jobstats((x for x in newstats |
| if x.is_driver_job()), **vars(args)) |
| if olddriver is None or newdriver is None: |
| continue |
| oldpct = olddriver.incrementality_percentage() |
| newpct = newdriver.incrementality_percentage() |
| deltapct = newpct - oldpct |
| oldskip = olddriver.driver_jobs_skipped() |
| newskip = newdriver.driver_jobs_skipped() |
| deltaskip = newskip - oldskip |
| out.writerow(dict(name=name, |
| old_pct=oldpct, old_skip=oldskip, |
| new_pct=newpct, new_skip=newskip, |
| delta_pct=deltapct, delta_skip=deltaskip)) |
| |
| |
| def show_incrementality(args): |
| fieldnames = ["incrementality", "name"] |
| out = csv.DictWriter(args.output, fieldnames, dialect='excel-tab') |
| out.writeheader() |
| |
| for path in args.remainder: |
| stats = load_stats_dir(path, **vars(args)) |
| for s in stats: |
| if s.is_driver_job(): |
| pct = s.incrementality_percentage() |
| out.writerow(dict(name=os.path.basename(path), |
| incrementality=pct)) |
| |
| |
| def diff_and_pct(old, new): |
| if old == 0: |
| if new == 0: |
| return (0, 0.0) |
| else: |
| return (new, 100.0) |
| delta = (new - old) |
| delta_pct = round((float(delta) / float(old)) * 100.0, 2) |
| return (delta, delta_pct) |
| |
| |
| def update_epoch_value(d, name, epoch, value): |
| changed = 0 |
| if name in d: |
| (existing_epoch, existing_value) = d[name] |
| if existing_epoch > epoch: |
| print("note: keeping newer value %d from epoch %d for %s" |
| % (existing_value, existing_epoch, name)) |
| epoch = existing_epoch |
| value = existing_value |
| elif existing_value == value: |
| epoch = existing_epoch |
| else: |
| (_, delta_pct) = diff_and_pct(existing_value, value) |
| print ("note: changing value %d -> %d (%.2f%%) for %s" % |
| (existing_value, value, delta_pct, name)) |
| changed = 1 |
| d[name] = (epoch, value) |
| return (epoch, value, changed) |
| |
| |
| def read_stats_dict_from_csv(f, select_stat=''): |
| infieldnames = ["epoch", "name", "value"] |
| c = csv.DictReader(f, infieldnames, |
| dialect='excel-tab', |
| quoting=csv.QUOTE_NONNUMERIC) |
| d = {} |
| sre = re.compile('.*' if len(select_stat) == 0 else |
| '|'.join(select_stat)) |
| for row in c: |
| epoch = int(row["epoch"]) |
| name = row["name"] |
| if sre.search(name) is None: |
| continue |
| value = int(row["value"]) |
| update_epoch_value(d, name, epoch, value) |
| return d |
| |
| |
| # The idea here is that a "baseline" is a (tab-separated) CSV file full of |
| # the counters you want to track, each prefixed by an epoch timestamp of |
| # the last time the value was reset. |
| # |
| # When you set a fresh baseline, all stats in the provided stats dir are |
| # written to the baseline. When you set against an _existing_ baseline, |
| # only the counters mentioned in the existing baseline are updated, and |
| # only if their values differ. |
| # |
| # Finally, since it's a line-oriented CSV file, you can put: |
| # |
| # mybaseline.csv merge=union |
| # |
| # in your .gitattributes file, and forget about merge conflicts. The reader |
| # function above will take the later epoch anytime it detects duplicates, |
| # so union-merging is harmless. Duplicates will be eliminated whenever the |
| # next baseline-set is done. |
| def set_csv_baseline(args): |
| existing = None |
| if os.path.exists(args.set_csv_baseline): |
| with open(args.set_csv_baseline, "r") as f: |
| existing = read_stats_dict_from_csv(f, |
| select_stat=args.select_stat) |
| print ("updating %d baseline entries in %s" % |
| (len(existing), args.set_csv_baseline)) |
| else: |
| print "making new baseline " + args.set_csv_baseline |
| fieldnames = ["epoch", "name", "value"] |
| with open(args.set_csv_baseline, "wb") as f: |
| out = csv.DictWriter(f, fieldnames, dialect='excel-tab', |
| quoting=csv.QUOTE_NONNUMERIC) |
| m = merge_all_jobstats((s for d in args.remainder |
| for s in load_stats_dir(d, **vars(args))), |
| **vars(args)) |
| if m is None: |
| print "no stats found" |
| return 1 |
| changed = 0 |
| newepoch = int(time.time()) |
| for name in sorted(m.stats.keys()): |
| epoch = newepoch |
| value = m.stats[name] |
| if existing is not None: |
| if name not in existing: |
| continue |
| (epoch, value, chg) = update_epoch_value(existing, name, |
| epoch, value) |
| changed += chg |
| out.writerow(dict(epoch=int(epoch), |
| name=name, |
| value=int(value))) |
| if existing is not None: |
| print "changed %d entries in baseline" % changed |
| return 0 |
| |
| |
| OutputRow = namedtuple("OutputRow", |
| ["name", "old", "new", |
| "delta", "delta_pct"]) |
| |
| |
| def compare_stats(args, old_stats, new_stats): |
| for name in sorted(old_stats.keys()): |
| old = old_stats[name] |
| new = new_stats.get(name, 0) |
| (delta, delta_pct) = diff_and_pct(old, new) |
| if ((name.startswith("time.") or '.time.' in name) and |
| abs(delta) < args.delta_usec_thresh): |
| continue |
| if abs(delta_pct) < args.delta_pct_thresh: |
| continue |
| yield OutputRow(name=name, |
| old=int(old), new=int(new), |
| delta=int(delta), |
| delta_pct=delta_pct) |
| |
| |
| def write_comparison(args, old_stats, new_stats): |
| regressions = 0 |
| rows = list(compare_stats(args, old_stats, new_stats)) |
| sort_key = (attrgetter('delta_pct') |
| if args.sort_by_delta_pct |
| else attrgetter('name')) |
| rows.sort(key=sort_key, reverse=args.sort_descending) |
| regressions = sum(1 for row in rows if row.delta > 0) |
| |
| if args.markdown: |
| out = args.output |
| out.write(' | '.join(OutputRow._fields)) |
| out.write('\n') |
| out.write(' | '.join('---:' for _ in OutputRow._fields)) |
| out.write('\n') |
| for row in rows: |
| out.write(' | '.join(str(v) for v in row)) |
| out.write('\n') |
| else: |
| out = csv.DictWriter(args.output, OutputRow._fields, |
| dialect='excel-tab') |
| out.writeheader() |
| for row in rows: |
| out.writerow(row._asdict()) |
| |
| return regressions |
| |
| |
| def compare_to_csv_baseline(args): |
| old_stats = read_stats_dict_from_csv(args.compare_to_csv_baseline, |
| select_stat=args.select_stat) |
| m = merge_all_jobstats((s for d in args.remainder |
| for s in load_stats_dir(d, **vars(args))), |
| **vars(args)) |
| old_stats = dict((k, v) for (k, (_, v)) in old_stats.items()) |
| new_stats = m.stats |
| |
| return write_comparison(args, old_stats, new_stats) |
| |
| |
| # Summarize immediate difference between two stats-dirs, optionally |
| def compare_stats_dirs(args): |
| if len(args.remainder) != 2: |
| raise ValueError("Expected exactly 2 stats-dirs") |
| |
| (old, new) = args.remainder |
| old_stats = merge_all_jobstats(load_stats_dir(old, **vars(args)), |
| **vars(args)) |
| new_stats = merge_all_jobstats(load_stats_dir(new, **vars(args)), |
| **vars(args)) |
| |
| return write_comparison(args, old_stats.stats, new_stats.stats) |
| |
| |
| def main(): |
| parser = argparse.ArgumentParser() |
| parser.add_argument("--verbose", action="store_true", |
| help="Report activity verbosely") |
| parser.add_argument("--output", default="-", |
| type=argparse.FileType('wb', 0), |
| help="Write output to file") |
| parser.add_argument("--paired", action="store_true", |
| help="Process two dirs-of-stats-dirs, pairwise") |
| parser.add_argument("--delta-pct-thresh", type=float, default=0.01, |
| help="Percentage change required to report") |
| parser.add_argument("--delta-usec-thresh", type=int, default=100000, |
| help="Absolute delta on times required to report") |
| parser.add_argument("--lnt-machine", type=str, default=platform.node(), |
| help="Machine name for LNT submission") |
| parser.add_argument("--lnt-run-info", action='append', default=[], |
| type=lambda kv: kv.split("="), |
| help="Extra key=value pairs for LNT run-info") |
| parser.add_argument("--lnt-machine-info", action='append', default=[], |
| type=lambda kv: kv.split("="), |
| help="Extra key=value pairs for LNT machine-info") |
| parser.add_argument("--lnt-order", type=str, |
| default=str(int(time.time())), |
| help="Order for LNT submission") |
| parser.add_argument("--lnt-tag", type=str, default="swift-compile", |
| help="Tag for LNT submission") |
| parser.add_argument("--lnt-submit", type=str, default=None, |
| help="URL to submit LNT data to (rather than print)") |
| parser.add_argument("--select-module", |
| default=[], |
| action="append", |
| help="Select specific modules") |
| parser.add_argument("--group-by-module", |
| default=False, |
| action="store_true", |
| help="Group stats by module") |
| parser.add_argument("--select-stat", |
| default=[], |
| action="append", |
| help="Select specific statistics") |
| parser.add_argument("--exclude-timers", |
| default=False, |
| action="store_true", |
| help="only select counters, exclude timers") |
| parser.add_argument("--sort-by-delta-pct", |
| default=False, |
| action="store_true", |
| help="Sort comparison results by delta-%%, not stat") |
| parser.add_argument("--sort-descending", |
| default=False, |
| action="store_true", |
| help="Sort comparison results in descending order") |
| parser.add_argument("--merge-by", |
| default="sum", |
| type=str, |
| help="Merge identical metrics by (sum|min|max)") |
| parser.add_argument("--markdown", |
| default=False, |
| action="store_true", |
| help="Write output in markdown table format") |
| modes = parser.add_mutually_exclusive_group(required=True) |
| modes.add_argument("--catapult", action="store_true", |
| help="emit a 'catapult'-compatible trace of events") |
| modes.add_argument("--incrementality", action="store_true", |
| help="summarize the 'incrementality' of a build") |
| modes.add_argument("--set-csv-baseline", type=str, default=None, |
| help="Merge stats from a stats-dir into a CSV baseline") |
| modes.add_argument("--compare-to-csv-baseline", |
| type=argparse.FileType('rb', 0), default=None, |
| metavar="BASELINE.csv", |
| help="Compare stats dir to named CSV baseline") |
| modes.add_argument("--compare-stats-dirs", |
| action="store_true", |
| help="Compare two stats dirs directly") |
| modes.add_argument("--lnt", action="store_true", |
| help="Emit an LNT-compatible test summary") |
| parser.add_argument('remainder', nargs=argparse.REMAINDER, |
| help="stats-dirs to process") |
| |
| args = parser.parse_args() |
| if len(args.remainder) == 0: |
| parser.print_help() |
| return 1 |
| if args.catapult: |
| write_catapult_trace(args) |
| elif args.compare_stats_dirs: |
| return compare_stats_dirs(args) |
| elif args.set_csv_baseline is not None: |
| return set_csv_baseline(args) |
| elif args.compare_to_csv_baseline is not None: |
| return compare_to_csv_baseline(args) |
| elif args.incrementality: |
| if args.paired: |
| show_paired_incrementality(args) |
| else: |
| show_incrementality(args) |
| elif args.lnt: |
| write_lnt_values(args) |
| return None |
| |
| |
| sys.exit(main()) |