| #!/usr/bin/python |
| # |
| # ==-- jobstats - support for reading the contents of stats dirs --==# |
| # |
| # This source file is part of the Swift.org open source project |
| # |
| # Copyright (c) 2014-2017 Apple Inc. and the Swift project authors |
| # Licensed under Apache License v2.0 with Runtime Library Exception |
| # |
| # See https://swift.org/LICENSE.txt for license information |
| # See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors |
| # |
| # ==------------------------------------------------------------------------==# |
| # |
| # This file contains subroutines for loading object-representations of one or |
| # more directories generated by `swiftc -stats-output-dir`. |
| |
| import datetime |
| import itertools |
| import json |
| import os |
| import random |
| import re |
| |
| |
| class JobStats(object): |
| """Object holding the stats of a single job run during a compilation, |
| corresponding to a single JSON file produced by a single job process |
| passed -stats-output-dir.""" |
| |
| def __init__(self, jobkind, jobid, module, start_usec, dur_usec, |
| jobargs, stats): |
| self.jobkind = jobkind |
| self.jobid = jobid |
| self.module = module |
| self.start_usec = start_usec |
| self.dur_usec = dur_usec |
| self.jobargs = jobargs |
| self.stats = stats |
| |
| def is_driver_job(self): |
| """Return true iff self measures a driver job""" |
| return self.jobkind == 'driver' |
| |
| def is_frontend_job(self): |
| """Return true iff self measures a frontend job""" |
| return self.jobkind == 'frontend' |
| |
| def driver_jobs_ran(self): |
| """Return the count of a driver job's ran sub-jobs""" |
| assert(self.is_driver_job()) |
| return self.stats.get("Driver.NumDriverJobsRun", 0) |
| |
| def driver_jobs_skipped(self): |
| """Return the count of a driver job's skipped sub-jobs""" |
| assert(self.is_driver_job()) |
| return self.stats.get("Driver.NumDriverJobsSkipped", 0) |
| |
| def driver_jobs_total(self): |
| """Return the total count of a driver job's ran + skipped sub-jobs""" |
| assert(self.is_driver_job()) |
| return self.driver_jobs_ran() + self.driver_jobs_skipped() |
| |
| def merged_with(self, other, merge_by="sum"): |
| """Return a new JobStats, holding the merger of self and other""" |
| merged_stats = {} |
| ops = {"sum": lambda a, b: a + b, |
| # Because 0 is also a sentinel on counters we do a modified |
| # "nonzero-min" here. Not ideal but best we can do. |
| "min": lambda a, b: (min(a, b) |
| if a != 0 and b != 0 |
| else max(a, b)), |
| "max": lambda a, b: max(a, b)} |
| op = ops[merge_by] |
| for k, v in self.stats.items() + other.stats.items(): |
| if k in merged_stats: |
| merged_stats[k] = op(v, merged_stats[k]) |
| else: |
| merged_stats[k] = v |
| merged_kind = self.jobkind |
| if other.jobkind != merged_kind: |
| merged_kind = "<merged>" |
| merged_module = self.module |
| if other.module != merged_module: |
| merged_module = "<merged>" |
| merged_start = min(self.start_usec, other.start_usec) |
| merged_end = max(self.start_usec + self.dur_usec, |
| other.start_usec + other.dur_usec) |
| merged_dur = merged_end - merged_start |
| return JobStats(merged_kind, random.randint(0, 1000000000), |
| merged_module, merged_start, merged_dur, |
| self.jobargs + other.jobargs, merged_stats) |
| |
| def prefixed_by(self, prefix): |
| prefixed_stats = dict([((prefix + "." + k), v) |
| for (k, v) in self.stats.items()]) |
| return JobStats(self.jobkind, random.randint(0, 1000000000), |
| self.module, self.start_usec, self.dur_usec, |
| self.jobargs, prefixed_stats) |
| |
| def incrementality_percentage(self): |
| """Assuming the job is a driver job, return the amount of |
| jobs that actually ran, as a percentage of the total number.""" |
| assert(self.is_driver_job()) |
| ran = self.driver_jobs_ran() |
| total = self.driver_jobs_total() |
| return round((float(ran) / float(total)) * 100.0, 2) |
| |
| def to_catapult_trace_obj(self): |
| """Return a JSON-formattable object fitting chrome's |
| 'catapult' trace format""" |
| return {"name": self.module, |
| "cat": self.jobkind, |
| "ph": "X", # "X" == "complete event" |
| "pid": self.jobid, |
| "tid": 1, |
| "ts": self.start_usec, |
| "dur": self.dur_usec, |
| "args": self.jobargs} |
| |
| def start_timestr(self): |
| """Return a formatted timestamp of the job's start-time""" |
| t = datetime.datetime.fromtimestamp(self.start_usec / 1000000.0) |
| return t.strftime("%Y-%m-%d %H:%M:%S") |
| |
| def end_timestr(self): |
| """Return a formatted timestamp of the job's end-time""" |
| t = datetime.datetime.fromtimestamp((self.start_usec + |
| self.dur_usec) / 1000000.0) |
| return t.strftime("%Y-%m-%d %H:%M:%S") |
| |
| def pick_lnt_metric_suffix(self, metric_name): |
| """Guess an appropriate LNT metric type for a given metric name""" |
| if "BytesOutput" in metric_name: |
| return "code_size" |
| if "RSS" in metric_name or "BytesAllocated" in metric_name: |
| return "mem" |
| return "compile" |
| |
| def to_lnt_test_obj(self, args): |
| """Return a JSON-formattable object fitting LNT's 'submit' format""" |
| run_info = { |
| "run_order": str(args.lnt_order), |
| "tag": str(args.lnt_tag), |
| } |
| run_info.update(dict(args.lnt_run_info)) |
| stats = self.stats |
| return { |
| "Machine": |
| { |
| "Name": args.lnt_machine, |
| "Info": dict(args.lnt_machine_info) |
| }, |
| "Run": |
| { |
| "Start Time": self.start_timestr(), |
| "End Time": self.end_timestr(), |
| "Info": run_info |
| }, |
| "Tests": |
| [ |
| { |
| "Data": [v], |
| "Info": {}, |
| "Name": "%s.%s.%s.%s" % (args.lnt_tag, self.module, |
| k, self.pick_lnt_metric_suffix(k)) |
| } |
| for (k, v) in stats.items() |
| ] |
| } |
| |
| |
| def load_stats_dir(path, select_module=[], select_stat=[], |
| exclude_timers=False, **kwargs): |
| """Loads all stats-files found in path into a list of JobStats objects""" |
| jobstats = [] |
| auxpat = (r"(?P<module>[^-]+)-(?P<input>[^-]+)-(?P<triple>[^-]+)" + |
| r"-(?P<out>[^-]*)-(?P<opt>[^-]+)") |
| fpat = (r"^stats-(?P<start>\d+)-swift-(?P<kind>\w+)-" + |
| auxpat + |
| r"-(?P<pid>\d+)(-.*)?.json$") |
| fre = re.compile(fpat) |
| sre = re.compile('.*' if len(select_stat) == 0 else |
| '|'.join(select_stat)) |
| for root, dirs, files in os.walk(path): |
| for f in files: |
| m = fre.match(f) |
| if not m: |
| continue |
| # NB: "pid" in fpat is a random number, not unix pid. |
| mg = m.groupdict() |
| jobkind = mg['kind'] |
| jobid = int(mg['pid']) |
| start_usec = int(mg['start']) |
| module = mg["module"] |
| if len(select_module) != 0 and module not in select_module: |
| continue |
| jobargs = [mg["input"], mg["triple"], mg["out"], mg["opt"]] |
| |
| with open(os.path.join(root, f)) as fp: |
| j = json.load(fp) |
| dur_usec = 1 |
| patstr = (r"time\.swift-" + jobkind + r"\." + auxpat + |
| r"\.wall$") |
| pat = re.compile(patstr) |
| stats = dict() |
| for (k, v) in j.items(): |
| if sre.search(k) is None: |
| continue |
| if k.startswith("time."): |
| v = int(1000000.0 * float(v)) |
| if exclude_timers: |
| continue |
| stats[k] = v |
| tm = re.match(pat, k) |
| if tm: |
| dur_usec = v |
| |
| e = JobStats(jobkind=jobkind, jobid=jobid, |
| module=module, start_usec=start_usec, |
| dur_usec=dur_usec, jobargs=jobargs, |
| stats=stats) |
| jobstats.append(e) |
| return jobstats |
| |
| |
| def merge_all_jobstats(jobstats, select_module=[], group_by_module=False, |
| merge_by="sum", **kwargs): |
| """Does a pairwise merge of the elements of list of jobs""" |
| m = None |
| if len(select_module) > 0: |
| jobstats = filter(lambda j: j.module in select_module, jobstats) |
| if group_by_module: |
| def keyfunc(j): |
| return j.module |
| jobstats = list(jobstats) |
| jobstats.sort(key=keyfunc) |
| prefixed = [] |
| for mod, group in itertools.groupby(jobstats, keyfunc): |
| groupmerge = merge_all_jobstats(group, merge_by=merge_by) |
| prefixed.append(groupmerge.prefixed_by(mod)) |
| jobstats = prefixed |
| for j in jobstats: |
| if m is None: |
| m = j |
| else: |
| m = m.merged_with(j, merge_by=merge_by) |
| return m |