utils/jobstats/jobstats.py - third_party/swift - Git at Google

 #!/usr/bin/python
 #
 # ==-- jobstats - support for reading the contents of stats dirs --==#
 #
 # This source file is part of the Swift.org open source project
 #
 # Copyright (c) 2014-2017 Apple Inc. and the Swift project authors
 # Licensed under Apache License v2.0 with Runtime Library Exception
 #
 # See https://swift.org/LICENSE.txt for license information
 # See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
 #
 # ==------------------------------------------------------------------------==#
 #
 # This file contains subroutines for loading object-representations of one or
 # more directories generated by `swiftc -stats-output-dir`.

 import datetime
 import itertools
 import json
 import os
 import random
 import re


 class JobStats(object):
     """Object holding the stats of a single job run during a compilation,
     corresponding to a single JSON file produced by a single job process
     passed -stats-output-dir."""

     def __init__(self, jobkind, jobid, module, start_usec, dur_usec,
                  jobargs, stats):
         self.jobkind = jobkind
         self.jobid = jobid
         self.module = module
         self.start_usec = start_usec
         self.dur_usec = dur_usec
         self.jobargs = jobargs
         self.stats = stats

     def is_driver_job(self):
         """Return true iff self measures a driver job"""
         return self.jobkind == 'driver'

     def is_frontend_job(self):
         """Return true iff self measures a frontend job"""
         return self.jobkind == 'frontend'

     def driver_jobs_ran(self):
         """Return the count of a driver job's ran sub-jobs"""
         assert(self.is_driver_job())
         return self.stats.get("Driver.NumDriverJobsRun", 0)

     def driver_jobs_skipped(self):
         """Return the count of a driver job's skipped sub-jobs"""
         assert(self.is_driver_job())
         return self.stats.get("Driver.NumDriverJobsSkipped", 0)

     def driver_jobs_total(self):
         """Return the total count of a driver job's ran + skipped sub-jobs"""
         assert(self.is_driver_job())
         return self.driver_jobs_ran() + self.driver_jobs_skipped()

     def merged_with(self, other, merge_by="sum"):
         """Return a new JobStats, holding the merger of self and other"""
         merged_stats = {}
         ops = {"sum": lambda a, b: a + b,
                # Because 0 is also a sentinel on counters we do a modified
                # "nonzero-min" here. Not ideal but best we can do.
                "min": lambda a, b: (min(a, b)
                                     if a != 0 and b != 0
                                     else max(a, b)),
                "max": lambda a, b: max(a, b)}
         op = ops[merge_by]
         for k, v in self.stats.items() + other.stats.items():
             if k in merged_stats:
                 merged_stats[k] = op(v, merged_stats[k])
             else:
                 merged_stats[k] = v
         merged_kind = self.jobkind
         if other.jobkind != merged_kind:
             merged_kind = "<merged>"
         merged_module = self.module
         if other.module != merged_module:
             merged_module = "<merged>"
         merged_start = min(self.start_usec, other.start_usec)
         merged_end = max(self.start_usec + self.dur_usec,
                          other.start_usec + other.dur_usec)
         merged_dur = merged_end - merged_start
         return JobStats(merged_kind, random.randint(0, 1000000000),
                         merged_module, merged_start, merged_dur,
                         self.jobargs + other.jobargs, merged_stats)

     def prefixed_by(self, prefix):
         prefixed_stats = dict([((prefix + "." + k), v)
                                for (k, v) in self.stats.items()])
         return JobStats(self.jobkind, random.randint(0, 1000000000),
                         self.module, self.start_usec, self.dur_usec,
                         self.jobargs, prefixed_stats)

     def incrementality_percentage(self):
         """Assuming the job is a driver job, return the amount of
         jobs that actually ran, as a percentage of the total number."""
         assert(self.is_driver_job())
         ran = self.driver_jobs_ran()
         total = self.driver_jobs_total()
         return round((float(ran) / float(total)) * 100.0, 2)

     def to_catapult_trace_obj(self):
         """Return a JSON-formattable object fitting chrome's
         'catapult' trace format"""
         return {"name": self.module,
                 "cat": self.jobkind,
                 "ph": "X",              # "X" == "complete event"
                 "pid": self.jobid,
                 "tid": 1,
                 "ts": self.start_usec,
                 "dur": self.dur_usec,
                 "args": self.jobargs}

     def start_timestr(self):
         """Return a formatted timestamp of the job's start-time"""
         t = datetime.datetime.fromtimestamp(self.start_usec / 1000000.0)
         return t.strftime("%Y-%m-%d %H:%M:%S")

     def end_timestr(self):
         """Return a formatted timestamp of the job's end-time"""
         t = datetime.datetime.fromtimestamp((self.start_usec +
                                              self.dur_usec) / 1000000.0)
         return t.strftime("%Y-%m-%d %H:%M:%S")

     def pick_lnt_metric_suffix(self, metric_name):
         """Guess an appropriate LNT metric type for a given metric name"""
         if "BytesOutput" in metric_name:
             return "code_size"
         if "RSS" in metric_name or "BytesAllocated" in metric_name:
             return "mem"
         return "compile"

     def to_lnt_test_obj(self, args):
         """Return a JSON-formattable object fitting LNT's 'submit' format"""
         run_info = {
             "run_order": str(args.lnt_order),
             "tag": str(args.lnt_tag),
         }
         run_info.update(dict(args.lnt_run_info))
         stats = self.stats
         return {
             "Machine":
             {
                 "Name": args.lnt_machine,
                 "Info": dict(args.lnt_machine_info)
             },
             "Run":
             {
                 "Start Time": self.start_timestr(),
                 "End Time": self.end_timestr(),
                 "Info": run_info
             },
             "Tests":
             [
                 {
                     "Data": [v],
                     "Info": {},
                     "Name": "%s.%s.%s.%s" % (args.lnt_tag, self.module,
                                              k, self.pick_lnt_metric_suffix(k))
                 }
                 for (k, v) in stats.items()
             ]
         }


 def load_stats_dir(path, select_module=[], select_stat=[],
                    exclude_timers=False, **kwargs):
     """Loads all stats-files found in path into a list of JobStats objects"""
     jobstats = []
     auxpat = (r"(?P<module>[^-]+)-(?P<input>[^-]+)-(?P<triple>[^-]+)" +
               r"-(?P<out>[^-]*)-(?P<opt>[^-]+)")
     fpat = (r"^stats-(?P<start>\d+)-swift-(?P<kind>\w+)-" +
             auxpat +
             r"-(?P<pid>\d+)(-.*)?.json$")
     fre = re.compile(fpat)
     sre = re.compile('.*' if len(select_stat) == 0 else
                      '|'.join(select_stat))
     for root, dirs, files in os.walk(path):
         for f in files:
             m = fre.match(f)
             if not m:
                 continue
             # NB: "pid" in fpat is a random number, not unix pid.
             mg = m.groupdict()
             jobkind = mg['kind']
             jobid = int(mg['pid'])
             start_usec = int(mg['start'])
             module = mg["module"]
             if len(select_module) != 0 and module not in select_module:
                 continue
             jobargs = [mg["input"], mg["triple"], mg["out"], mg["opt"]]

             with open(os.path.join(root, f)) as fp:
                 j = json.load(fp)
             dur_usec = 1
             patstr = (r"time\.swift-" + jobkind + r"\." + auxpat +
                       r"\.wall$")
             pat = re.compile(patstr)
             stats = dict()
             for (k, v) in j.items():
                 if sre.search(k) is None:
                     continue
                 if k.startswith("time."):
                     v = int(1000000.0 * float(v))
                     if exclude_timers:
                         continue
                 stats[k] = v
                 tm = re.match(pat, k)
                 if tm:
                     dur_usec = v

             e = JobStats(jobkind=jobkind, jobid=jobid,
                          module=module, start_usec=start_usec,
                          dur_usec=dur_usec, jobargs=jobargs,
                          stats=stats)
             jobstats.append(e)
     return jobstats


 def merge_all_jobstats(jobstats, select_module=[], group_by_module=False,
                        merge_by="sum", **kwargs):
     """Does a pairwise merge of the elements of list of jobs"""
     m = None
     if len(select_module) > 0:
         jobstats = filter(lambda j: j.module in select_module, jobstats)
     if group_by_module:
         def keyfunc(j):
             return j.module
         jobstats = list(jobstats)
         jobstats.sort(key=keyfunc)
         prefixed = []
         for mod, group in itertools.groupby(jobstats, keyfunc):
             groupmerge = merge_all_jobstats(group, merge_by=merge_by)
             prefixed.append(groupmerge.prefixed_by(mod))
         jobstats = prefixed
     for j in jobstats:
         if m is None:
             m = j
         else:
             m = m.merged_with(j, merge_by=merge_by)
     return m
	#!/usr/bin/python
	#
	# ==-- jobstats - support for reading the contents of stats dirs --==#
	#
	# This source file is part of the Swift.org open source project
	#
	# Copyright (c) 2014-2017 Apple Inc. and the Swift project authors
	# Licensed under Apache License v2.0 with Runtime Library Exception
	#
	# See https://swift.org/LICENSE.txt for license information
	# See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
	#
	# ==------------------------------------------------------------------------==#
	#
	# This file contains subroutines for loading object-representations of one or
	# more directories generated by `swiftc -stats-output-dir`.

	import datetime
	import itertools
	import json
	import os
	import random
	import re


	class JobStats(object):
	"""Object holding the stats of a single job run during a compilation,
	corresponding to a single JSON file produced by a single job process
	passed -stats-output-dir."""

	def __init__(self, jobkind, jobid, module, start_usec, dur_usec,
	jobargs, stats):
	self.jobkind = jobkind
	self.jobid = jobid
	self.module = module
	self.start_usec = start_usec
	self.dur_usec = dur_usec
	self.jobargs = jobargs
	self.stats = stats

	def is_driver_job(self):
	"""Return true iff self measures a driver job"""
	return self.jobkind == 'driver'

	def is_frontend_job(self):
	"""Return true iff self measures a frontend job"""
	return self.jobkind == 'frontend'

	def driver_jobs_ran(self):
	"""Return the count of a driver job's ran sub-jobs"""
	assert(self.is_driver_job())
	return self.stats.get("Driver.NumDriverJobsRun", 0)

	def driver_jobs_skipped(self):
	"""Return the count of a driver job's skipped sub-jobs"""
	assert(self.is_driver_job())
	return self.stats.get("Driver.NumDriverJobsSkipped", 0)

	def driver_jobs_total(self):
	"""Return the total count of a driver job's ran + skipped sub-jobs"""
	assert(self.is_driver_job())
	return self.driver_jobs_ran() + self.driver_jobs_skipped()

	def merged_with(self, other, merge_by="sum"):
	"""Return a new JobStats, holding the merger of self and other"""
	merged_stats = {}
	ops = {"sum": lambda a, b: a + b,
	# Because 0 is also a sentinel on counters we do a modified
	# "nonzero-min" here. Not ideal but best we can do.
	"min": lambda a, b: (min(a, b)
	if a != 0 and b != 0
	else max(a, b)),
	"max": lambda a, b: max(a, b)}
	op = ops[merge_by]
	for k, v in self.stats.items() + other.stats.items():
	if k in merged_stats:
	merged_stats[k] = op(v, merged_stats[k])
	else:
	merged_stats[k] = v
	merged_kind = self.jobkind
	if other.jobkind != merged_kind:
	merged_kind = "<merged>"
	merged_module = self.module
	if other.module != merged_module:
	merged_module = "<merged>"
	merged_start = min(self.start_usec, other.start_usec)
	merged_end = max(self.start_usec + self.dur_usec,
	other.start_usec + other.dur_usec)
	merged_dur = merged_end - merged_start
	return JobStats(merged_kind, random.randint(0, 1000000000),
	merged_module, merged_start, merged_dur,
	self.jobargs + other.jobargs, merged_stats)

	def prefixed_by(self, prefix):
	prefixed_stats = dict([((prefix + "." + k), v)
	for (k, v) in self.stats.items()])
	return JobStats(self.jobkind, random.randint(0, 1000000000),
	self.module, self.start_usec, self.dur_usec,
	self.jobargs, prefixed_stats)

	def incrementality_percentage(self):
	"""Assuming the job is a driver job, return the amount of
	jobs that actually ran, as a percentage of the total number."""
	assert(self.is_driver_job())
	ran = self.driver_jobs_ran()
	total = self.driver_jobs_total()
	return round((float(ran) / float(total)) * 100.0, 2)

	def to_catapult_trace_obj(self):
	"""Return a JSON-formattable object fitting chrome's
	'catapult' trace format"""
	return {"name": self.module,
	"cat": self.jobkind,
	"ph": "X", # "X" == "complete event"
	"pid": self.jobid,
	"tid": 1,
	"ts": self.start_usec,
	"dur": self.dur_usec,
	"args": self.jobargs}

	def start_timestr(self):
	"""Return a formatted timestamp of the job's start-time"""
	t = datetime.datetime.fromtimestamp(self.start_usec / 1000000.0)
	return t.strftime("%Y-%m-%d %H:%M:%S")

	def end_timestr(self):
	"""Return a formatted timestamp of the job's end-time"""
	t = datetime.datetime.fromtimestamp((self.start_usec +
	self.dur_usec) / 1000000.0)
	return t.strftime("%Y-%m-%d %H:%M:%S")

	def pick_lnt_metric_suffix(self, metric_name):
	"""Guess an appropriate LNT metric type for a given metric name"""
	if "BytesOutput" in metric_name:
	return "code_size"
	if "RSS" in metric_name or "BytesAllocated" in metric_name:
	return "mem"
	return "compile"

	def to_lnt_test_obj(self, args):
	"""Return a JSON-formattable object fitting LNT's 'submit' format"""
	run_info = {
	"run_order": str(args.lnt_order),
	"tag": str(args.lnt_tag),
	}
	run_info.update(dict(args.lnt_run_info))
	stats = self.stats
	return {
	"Machine":
	{
	"Name": args.lnt_machine,
	"Info": dict(args.lnt_machine_info)
	},
	"Run":
	{
	"Start Time": self.start_timestr(),
	"End Time": self.end_timestr(),
	"Info": run_info
	},
	"Tests":
	[
	{
	"Data": [v],
	"Info": {},
	"Name": "%s.%s.%s.%s" % (args.lnt_tag, self.module,
	k, self.pick_lnt_metric_suffix(k))
	}
	for (k, v) in stats.items()
	]
	}


	def load_stats_dir(path, select_module=[], select_stat=[],
	exclude_timers=False, **kwargs):
	"""Loads all stats-files found in path into a list of JobStats objects"""
	jobstats = []
	auxpat = (r"(?P<module>[^-]+)-(?P<input>[^-]+)-(?P<triple>[^-]+)" +
	r"-(?P<out>[^-]*)-(?P<opt>[^-]+)")
	fpat = (r"^stats-(?P<start>\d+)-swift-(?P<kind>\w+)-" +
	auxpat +
	r"-(?P<pid>\d+)(-.*)?.json$")
	fre = re.compile(fpat)
	sre = re.compile('.*' if len(select_stat) == 0 else
	'\|'.join(select_stat))
	for root, dirs, files in os.walk(path):
	for f in files:
	m = fre.match(f)
	if not m:
	continue
	# NB: "pid" in fpat is a random number, not unix pid.
	mg = m.groupdict()
	jobkind = mg['kind']
	jobid = int(mg['pid'])
	start_usec = int(mg['start'])
	module = mg["module"]
	if len(select_module) != 0 and module not in select_module:
	continue
	jobargs = [mg["input"], mg["triple"], mg["out"], mg["opt"]]

	with open(os.path.join(root, f)) as fp:
	j = json.load(fp)
	dur_usec = 1
	patstr = (r"time\.swift-" + jobkind + r"\." + auxpat +
	r"\.wall$")
	pat = re.compile(patstr)
	stats = dict()
	for (k, v) in j.items():
	if sre.search(k) is None:
	continue
	if k.startswith("time."):
	v = int(1000000.0 * float(v))
	if exclude_timers:
	continue
	stats[k] = v
	tm = re.match(pat, k)
	if tm:
	dur_usec = v

	e = JobStats(jobkind=jobkind, jobid=jobid,
	module=module, start_usec=start_usec,
	dur_usec=dur_usec, jobargs=jobargs,
	stats=stats)
	jobstats.append(e)
	return jobstats


	def merge_all_jobstats(jobstats, select_module=[], group_by_module=False,
	merge_by="sum", **kwargs):
	"""Does a pairwise merge of the elements of list of jobs"""
	m = None
	if len(select_module) > 0:
	jobstats = filter(lambda j: j.module in select_module, jobstats)
	if group_by_module:
	def keyfunc(j):
	return j.module
	jobstats = list(jobstats)
	jobstats.sort(key=keyfunc)
	prefixed = []
	for mod, group in itertools.groupby(jobstats, keyfunc):
	groupmerge = merge_all_jobstats(group, merge_by=merge_by)
	prefixed.append(groupmerge.prefixed_by(mod))
	jobstats = prefixed
	for j in jobstats:
	if m is None:
	m = j
	else:
	m = m.merged_with(j, merge_by=merge_by)
	return m