recipes/fuchsia_perfcompare.py - infra/recipes - Git at Google

 # Copyright 2018 The Fuchsia Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 """Recipe for comparing performance between two revisions of Fuchsia."""

 from recipe_engine.config import Enum, List, Single
 from recipe_engine.recipe_api import Property

 TARGETS = ['arm64', 'x64']

 BUILD_TYPES = ['debug', 'release', 'thinlto', 'lto']

 DEVICES = [
     'QEMU',
     'Intel NUC Kit NUC6i3SYK',
     'Intel NUC Kit NUC7i5DNHE',
     'Khadas Vim2 Max',
 ]

 DEPS = [
     'fuchsia/artifacts',
     'fuchsia/build',
     'fuchsia/buildbucket_util',
     'fuchsia/checkout',
     'fuchsia/fuchsia',
     'fuchsia/testing',
     'fuchsia/testing_requests',
     'recipe_engine/buildbucket',
     'recipe_engine/context',
     'recipe_engine/file',
     'recipe_engine/json',
     'recipe_engine/path',
     'recipe_engine/properties',
     'recipe_engine/python',
     'recipe_engine/raw_io',
     'recipe_engine/step',
     'recipe_engine/time',
 ]

 PROPERTIES = {
     'manifest':
         Property(kind=str, help='Jiri manifest to use'),
     'remote':
         Property(kind=str, help='Remote manifest repository'),
     'target':
         Property(kind=Enum(*TARGETS), help='Target to build'),
     'build_type':
         Property(
             kind=Enum(*BUILD_TYPES), help='The build type', default='debug'),
     'packages':
         Property(kind=List(basestring), help='Packages to build', default=[]),
     'variants':
         Property(
             kind=List(basestring),
             help='--variant arguments to GN in `select_variant`',
             default=[]),
     'gn_args':
         Property(
             kind=List(basestring), help='Extra args to pass to GN', default=[]),
     'ninja_targets':
         Property(
             kind=List(basestring),
             help='Extra target args to pass to ninja',
             default=[]),
     'board':
         Property(kind=str, help='Board to build', default=None),
     'product':
         Property(kind=str, help='Product to build', default=None),
     'test_pool':
         Property(
             kind=str,
             help='Swarming pool from which a test task will be drawn',
             default='fuchsia.tests'),
     'device_type':
         Property(
             kind=str,
             help='The type of device to execute tests on, if the value is'
             ' not QEMU it will be passed to Swarming as the device_type'
             ' dimension',
             default='QEMU'),
     'pave':
         Property(
             kind=bool,
             help='Whether to pave images the device for testing. (Ignored if'
             ' device_type == QEMU)',
             default=True),
     'test_timeout_secs':
         Property(
             kind=Single((int, float)),
             help='How long to wait until timing out on tests',
             default=40 * 60),
     # This property is not intended to be set by a config.  It is just here
     # so that a test case can pass a smaller number in order to reduce the
     # size of the generated test expectations.
     'boots_per_revision':
         Property(
             kind=int,
             help='Number of boots of Fuchsia to run performance tests on,'
             ' in order to deal with cross-boot variation of performance',
             default=5),
     'debug_symbol_gcs_bucket':
         Property(
             kind=str,
             help='GCS bucket to upload to and read debug symbols from'),
     'artifact_gcs_bucket':
         Property(
             kind=str,
             help='GCS bucket to upload to and read build artifacts from'),
     'test_task_service_account':
         Property(
             kind=str,
             help='The service account to run test tasks with',
             default=''),
 }


 def git_rev_parse(api, desc, git_dir, rev):
   with api.context(cwd=git_dir):
     result = api.step(
         desc, ['git', 'rev-parse', rev], stdout=api.raw_io.output())
     return result.stdout.strip()


 def git_checkout(api, git_dir, rev):
   with api.context(cwd=git_dir):
     api.step('git checkout', ['git', 'checkout', rev])


 # Run "git log" on the repo of the change being tested, to list the most
 # recent commits there.  This is to help developers debug the bot run and
 # understand what it is testing.  The change will often be rebased onto
 # tip-of-tree; the step records what the change was rebased onto.
 def git_log(api, git_dir):
   with api.context(cwd=git_dir):
     api.step('git log for debugging', ['git', 'log', '--max-count=10'])


 def RunSteps(api, manifest, remote, target, build_type, packages, variants,
              gn_args, ninja_targets, test_pool, device_type, pave, board,
              product, test_timeout_secs, boots_per_revision,
              debug_symbol_gcs_bucket, artifact_gcs_bucket,
              test_task_service_account):
   checkout_root = api.path['start_dir'].join('fuchsia')
   checkout = api.checkout.fuchsia_with_options(
       path=checkout_root,
       build=api.buildbucket.build,
       manifest=manifest,
       remote=remote,
   )

   snapshot_target = 'obj/build/images/system.snapshot'
   ninja_targets = list(ninja_targets)
   ninja_targets.append(snapshot_target)

   test_cmds = [
       ' '.join([
           '/pkgfs/packages/fuchsia_benchmarks/0/bin/benchmarks_perfcompare.sh',
           api.testing_requests.results_dir_on_target
       ])
   ]

   # This builds Fuchsia at the given revision and kicks off a swarming task
   # to run the tests.  It returns a function that, when called, will wait
   # for the swarming job to complete and return the test results.  This
   # division into two parts allows swarming tasks to run concurrently.
   def test_version(before_or_after, git_rev):
     with api.step.nest('build and launch tests for "%s" revision' %
                        before_or_after):
       git_checkout(api, checkout.root_dir, git_rev)
       build_dir = checkout.root_dir.join('out')
       build = api.build.with_options(
           build_dir=build_dir,
           checkout=checkout,
           target=target,
           build_type=build_type,
           packages=packages,
           variants=variants,
           gn_args=gn_args,
           ninja_targets=ninja_targets,
           board=board,
           product=product,
           pave=pave,
       )
       if debug_symbol_gcs_bucket:
         build.upload_debug_symbols(
             debug_symbol_gcs_bucket=debug_symbol_gcs_bucket)
       # Make a copy of the binary size data.
       snapshot_file = build.fuchsia_build_dir.join(snapshot_target)
       copied_file = checkout.root_dir.join('snapshot_%s' % before_or_after)
       api.step('copy binary size data', ['cp', snapshot_file, copied_file])
       # Copy the results data into the logs.
       api.step('binary size data', ['cat', snapshot_file])

       # Must be set before testing.shard_requests() is called.
       api.artifacts.gcs_bucket = artifact_gcs_bucket
       # The uuid is used for the namespace so append before_or_after to it to
       # use different namespaces for the before artifacts and after artifacts.
       api.artifacts.uuid = '%s/%s' % (api.buildbucket_util.id, before_or_after)
       shard_requests = api.testing_requests.deprecated_shard_requests(
           build,
           test_cmds,
           device_type,
           test_pool,
           test_timeout_secs,
           pave,
           default_service_account=test_task_service_account,
       )
       # Must be done after testing.shard_requests() is called, because that
       # modifies the filesystem images. TODO(garymm,joshuaseaton): once legacy_qemu
       # code paths are removed, remove this comment as it will become false.
       api.artifacts.upload('upload artifacts', build)

       orchestration_inputs = api.build.TestOrchestrationInputs.from_build_results(
           build, shard_requests)
       collect_funcs = []
       for boot_idx in xrange(boots_per_revision):
         with api.step.nest('boot %d of %d' %
                            (boot_idx + 1, boots_per_revision)):
           collect_funcs.append(
               api.testing.deprecated_test_async(
                   debug_symbol_gcs_bucket,
                   device_type,
                   orchestration_inputs,
                   overwrite_summary=False))

     def finish_func():
       with api.step.nest('collect results for "%s" revision' % before_or_after):
         results_dir = checkout.root_dir.join('perf_results_%s' %
                                              before_or_after)
         by_boot_dir = results_dir.join('by_boot')
         api.step('make results directory', ['mkdir', '-p', by_boot_dir])
         for boot_idx, collect_func in enumerate(collect_funcs):
           with api.step.nest('boot %d of %d' %
                              (boot_idx + 1, boots_per_revision)):
             test_results = collect_func()
             # Make a copy of the perf test results directory.
             api.step('copy perf test results', [
                 'cp', '-r', test_results.results_dir,
                 by_boot_dir.join('boot%06d' % boot_idx)
             ])

         return {
             'binary_size_data_file': copied_file,
             'perf_test_results_dir': results_dir
         }

     return finish_func

   rev_before = git_rev_parse(api, 'get "before" revision', checkout.root_dir,
                              'HEAD^')
   rev_after = git_rev_parse(api, 'get "after" revision', checkout.root_dir,
                             'HEAD')
   git_log(api, checkout.root_dir)
   # Build the "before" and "after" revisions and launch swarming tasks to
   # test them.  Building will happen sequentially, but the swarming tasks
   # will run concurrently.
   finish_before = test_version('before', rev_before)
   finish_after = test_version('after', rev_after)
   results_before = finish_before()
   results_after = finish_after()
   # Runs with the checkout in the "after" state.
   perfcompare_tool = checkout.root_dir.join(
       'garnet/bin/perfcompare/perfcompare.py')
   api.python('compare binary sizes before and after', perfcompare_tool, [
       'compare_sizes', results_before['binary_size_data_file'],
       results_after['binary_size_data_file']
   ])
   # Output the perf results dataset as a single file so that it can be
   # easily downloaded.
   api.python('generate raw_perf_dataset.json', perfcompare_tool, [
       'make_combined_perf_dataset_file',
       results_before['perf_test_results_dir'],
       results_after['perf_test_results_dir']
   ])
   api.python('compare perf test results before and after', perfcompare_tool, [
       'compare_perf', results_before['perf_test_results_dir'],
       results_after['perf_test_results_dir']
   ])


 def GenTests(api):
   # Since the recipe above runs Fuchsia twice (for the "before" and "after"
   # builds), adjust the dummy output to contain two sets of results.
   def testing_steps():
     steps = []
     task_result_step = api.testing.task_step_data(
         [
             api.fuchsia.m.swarming.task_result(
                 # Include serial.txt to cover the processing of the serial log,
                 # since that codepath is used in production and has broken in
                 # the past.
                 id='1',
                 name='test',
                 outputs=['output.fs', 'serial.txt']),
         ],
         enable_retries=False,
     )
     steps.append(task_result_step)
     for rev in ('before', 'after'):
       for boot in (1, 2):
         base_name = 'collect results for "%s" revision.boot %d of 2' % (rev,
                                                                         boot)
         task_result_step.step_data[
             base_name + '.collect'] = task_result_step.step_data['collect']
         # Include summary.json in the mocked contents of the output archive to
         # get better coverage of codepaths in the testing recipe module used by
         # this recipe.
         summary_data = {
             'tests': [{
                 'name': 'perfcompare_benchmark.catapult_json',
                 'result': 'SUCCESS',
             }]
         }
         steps.extend([
             api.step_data(
                 base_name + '.get extracted files',
                 api.file.listdir(['summary.json']),
             ),
             api.step_data(
                 base_name + '.all test results.read summary.json',
                 api.file.read_text(api.json.dumps(summary_data),),
             )
         ])
     del task_result_step.step_data['collect']
     return steps

   yield api.fuchsia.test(
       'successful_run',
       # Pass a smaller value than the default for boots_per_revision to
       # reduce the size of the test expectations output, but use a number
       # >1 in order to test multiple boots.
       properties=dict(
           run_tests=True,
           boots_per_revision=2,
           debug_symbol_gcs_bucket='debug-symbols',
           artifact_gcs_bucket='fuchsia-infra-artifacts',
           test_task_service_account='service_account',
       ),
       clear_default_steps=True,
       steps=testing_steps(),
   )
	# Copyright 2018 The Fuchsia Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.
	"""Recipe for comparing performance between two revisions of Fuchsia."""

	from recipe_engine.config import Enum, List, Single
	from recipe_engine.recipe_api import Property

	TARGETS = ['arm64', 'x64']

	BUILD_TYPES = ['debug', 'release', 'thinlto', 'lto']

	DEVICES = [
	'QEMU',
	'Intel NUC Kit NUC6i3SYK',
	'Intel NUC Kit NUC7i5DNHE',
	'Khadas Vim2 Max',
	]

	DEPS = [
	'fuchsia/artifacts',
	'fuchsia/build',
	'fuchsia/buildbucket_util',
	'fuchsia/checkout',
	'fuchsia/fuchsia',
	'fuchsia/testing',
	'fuchsia/testing_requests',
	'recipe_engine/buildbucket',
	'recipe_engine/context',
	'recipe_engine/file',
	'recipe_engine/json',
	'recipe_engine/path',
	'recipe_engine/properties',
	'recipe_engine/python',
	'recipe_engine/raw_io',
	'recipe_engine/step',
	'recipe_engine/time',
	]

	PROPERTIES = {
	'manifest':
	Property(kind=str, help='Jiri manifest to use'),
	'remote':
	Property(kind=str, help='Remote manifest repository'),
	'target':
	Property(kind=Enum(*TARGETS), help='Target to build'),
	'build_type':
	Property(
	kind=Enum(*BUILD_TYPES), help='The build type', default='debug'),
	'packages':
	Property(kind=List(basestring), help='Packages to build', default=[]),
	'variants':
	Property(
	kind=List(basestring),
	help='--variant arguments to GN in `select_variant`',
	default=[]),
	'gn_args':
	Property(
	kind=List(basestring), help='Extra args to pass to GN', default=[]),
	'ninja_targets':
	Property(
	kind=List(basestring),
	help='Extra target args to pass to ninja',
	default=[]),
	'board':
	Property(kind=str, help='Board to build', default=None),
	'product':
	Property(kind=str, help='Product to build', default=None),
	'test_pool':
	Property(
	kind=str,
	help='Swarming pool from which a test task will be drawn',
	default='fuchsia.tests'),
	'device_type':
	Property(
	kind=str,
	help='The type of device to execute tests on, if the value is'
	' not QEMU it will be passed to Swarming as the device_type'
	' dimension',
	default='QEMU'),
	'pave':
	Property(
	kind=bool,
	help='Whether to pave images the device for testing. (Ignored if'
	' device_type == QEMU)',
	default=True),
	'test_timeout_secs':
	Property(
	kind=Single((int, float)),
	help='How long to wait until timing out on tests',
	default=40 * 60),
	# This property is not intended to be set by a config. It is just here
	# so that a test case can pass a smaller number in order to reduce the
	# size of the generated test expectations.
	'boots_per_revision':
	Property(
	kind=int,
	help='Number of boots of Fuchsia to run performance tests on,'
	' in order to deal with cross-boot variation of performance',
	default=5),
	'debug_symbol_gcs_bucket':
	Property(
	kind=str,
	help='GCS bucket to upload to and read debug symbols from'),
	'artifact_gcs_bucket':
	Property(
	kind=str,
	help='GCS bucket to upload to and read build artifacts from'),
	'test_task_service_account':
	Property(
	kind=str,
	help='The service account to run test tasks with',
	default=''),
	}


	def git_rev_parse(api, desc, git_dir, rev):
	with api.context(cwd=git_dir):
	result = api.step(
	desc, ['git', 'rev-parse', rev], stdout=api.raw_io.output())
	return result.stdout.strip()


	def git_checkout(api, git_dir, rev):
	with api.context(cwd=git_dir):
	api.step('git checkout', ['git', 'checkout', rev])


	# Run "git log" on the repo of the change being tested, to list the most
	# recent commits there. This is to help developers debug the bot run and
	# understand what it is testing. The change will often be rebased onto
	# tip-of-tree; the step records what the change was rebased onto.
	def git_log(api, git_dir):
	with api.context(cwd=git_dir):
	api.step('git log for debugging', ['git', 'log', '--max-count=10'])


	def RunSteps(api, manifest, remote, target, build_type, packages, variants,
	gn_args, ninja_targets, test_pool, device_type, pave, board,
	product, test_timeout_secs, boots_per_revision,
	debug_symbol_gcs_bucket, artifact_gcs_bucket,
	test_task_service_account):
	checkout_root = api.path['start_dir'].join('fuchsia')
	checkout = api.checkout.fuchsia_with_options(
	path=checkout_root,
	build=api.buildbucket.build,
	manifest=manifest,
	remote=remote,
	)

	snapshot_target = 'obj/build/images/system.snapshot'
	ninja_targets = list(ninja_targets)
	ninja_targets.append(snapshot_target)

	test_cmds = [
	' '.join([
	'/pkgfs/packages/fuchsia_benchmarks/0/bin/benchmarks_perfcompare.sh',
	api.testing_requests.results_dir_on_target
	])
	]

	# This builds Fuchsia at the given revision and kicks off a swarming task
	# to run the tests. It returns a function that, when called, will wait
	# for the swarming job to complete and return the test results. This
	# division into two parts allows swarming tasks to run concurrently.
	def test_version(before_or_after, git_rev):
	with api.step.nest('build and launch tests for "%s" revision' %
	before_or_after):
	git_checkout(api, checkout.root_dir, git_rev)
	build_dir = checkout.root_dir.join('out')
	build = api.build.with_options(
	build_dir=build_dir,
	checkout=checkout,
	target=target,
	build_type=build_type,
	packages=packages,
	variants=variants,
	gn_args=gn_args,
	ninja_targets=ninja_targets,
	board=board,
	product=product,
	pave=pave,
	)
	if debug_symbol_gcs_bucket:
	build.upload_debug_symbols(
	debug_symbol_gcs_bucket=debug_symbol_gcs_bucket)
	# Make a copy of the binary size data.
	snapshot_file = build.fuchsia_build_dir.join(snapshot_target)
	copied_file = checkout.root_dir.join('snapshot_%s' % before_or_after)
	api.step('copy binary size data', ['cp', snapshot_file, copied_file])
	# Copy the results data into the logs.
	api.step('binary size data', ['cat', snapshot_file])

	# Must be set before testing.shard_requests() is called.
	api.artifacts.gcs_bucket = artifact_gcs_bucket
	# The uuid is used for the namespace so append before_or_after to it to
	# use different namespaces for the before artifacts and after artifacts.
	api.artifacts.uuid = '%s/%s' % (api.buildbucket_util.id, before_or_after)
	shard_requests = api.testing_requests.deprecated_shard_requests(
	build,
	test_cmds,
	device_type,
	test_pool,
	test_timeout_secs,
	pave,
	default_service_account=test_task_service_account,
	)
	# Must be done after testing.shard_requests() is called, because that
	# modifies the filesystem images. TODO(garymm,joshuaseaton): once legacy_qemu
	# code paths are removed, remove this comment as it will become false.
	api.artifacts.upload('upload artifacts', build)

	orchestration_inputs = api.build.TestOrchestrationInputs.from_build_results(
	build, shard_requests)
	collect_funcs = []
	for boot_idx in xrange(boots_per_revision):
	with api.step.nest('boot %d of %d' %
	(boot_idx + 1, boots_per_revision)):
	collect_funcs.append(
	api.testing.deprecated_test_async(
	debug_symbol_gcs_bucket,
	device_type,
	orchestration_inputs,
	overwrite_summary=False))

	def finish_func():
	with api.step.nest('collect results for "%s" revision' % before_or_after):
	results_dir = checkout.root_dir.join('perf_results_%s' %
	before_or_after)
	by_boot_dir = results_dir.join('by_boot')
	api.step('make results directory', ['mkdir', '-p', by_boot_dir])
	for boot_idx, collect_func in enumerate(collect_funcs):
	with api.step.nest('boot %d of %d' %
	(boot_idx + 1, boots_per_revision)):
	test_results = collect_func()
	# Make a copy of the perf test results directory.
	api.step('copy perf test results', [
	'cp', '-r', test_results.results_dir,
	by_boot_dir.join('boot%06d' % boot_idx)
	])

	return {
	'binary_size_data_file': copied_file,
	'perf_test_results_dir': results_dir
	}

	return finish_func

	rev_before = git_rev_parse(api, 'get "before" revision', checkout.root_dir,
	'HEAD^')
	rev_after = git_rev_parse(api, 'get "after" revision', checkout.root_dir,
	'HEAD')
	git_log(api, checkout.root_dir)
	# Build the "before" and "after" revisions and launch swarming tasks to
	# test them. Building will happen sequentially, but the swarming tasks
	# will run concurrently.
	finish_before = test_version('before', rev_before)
	finish_after = test_version('after', rev_after)
	results_before = finish_before()
	results_after = finish_after()
	# Runs with the checkout in the "after" state.
	perfcompare_tool = checkout.root_dir.join(
	'garnet/bin/perfcompare/perfcompare.py')
	api.python('compare binary sizes before and after', perfcompare_tool, [
	'compare_sizes', results_before['binary_size_data_file'],
	results_after['binary_size_data_file']
	])
	# Output the perf results dataset as a single file so that it can be
	# easily downloaded.
	api.python('generate raw_perf_dataset.json', perfcompare_tool, [
	'make_combined_perf_dataset_file',
	results_before['perf_test_results_dir'],
	results_after['perf_test_results_dir']
	])
	api.python('compare perf test results before and after', perfcompare_tool, [
	'compare_perf', results_before['perf_test_results_dir'],
	results_after['perf_test_results_dir']
	])


	def GenTests(api):
	# Since the recipe above runs Fuchsia twice (for the "before" and "after"
	# builds), adjust the dummy output to contain two sets of results.
	def testing_steps():
	steps = []
	task_result_step = api.testing.task_step_data(
	[
	api.fuchsia.m.swarming.task_result(
	# Include serial.txt to cover the processing of the serial log,
	# since that codepath is used in production and has broken in
	# the past.
	id='1',
	name='test',
	outputs=['output.fs', 'serial.txt']),
	],
	enable_retries=False,
	)
	steps.append(task_result_step)
	for rev in ('before', 'after'):
	for boot in (1, 2):
	base_name = 'collect results for "%s" revision.boot %d of 2' % (rev,
	boot)
	task_result_step.step_data[
	base_name + '.collect'] = task_result_step.step_data['collect']
	# Include summary.json in the mocked contents of the output archive to
	# get better coverage of codepaths in the testing recipe module used by
	# this recipe.
	summary_data = {
	'tests': [{
	'name': 'perfcompare_benchmark.catapult_json',
	'result': 'SUCCESS',
	}]
	}
	steps.extend([
	api.step_data(
	base_name + '.get extracted files',
	api.file.listdir(['summary.json']),
	),
	api.step_data(
	base_name + '.all test results.read summary.json',
	api.file.read_text(api.json.dumps(summary_data),),
	)
	])
	del task_result_step.step_data['collect']
	return steps

	yield api.fuchsia.test(
	'successful_run',
	# Pass a smaller value than the default for boots_per_revision to
	# reduce the size of the test expectations output, but use a number
	# >1 in order to test multiple boots.
	properties=dict(
	run_tests=True,
	boots_per_revision=2,
	debug_symbol_gcs_bucket='debug-symbols',
	artifact_gcs_bucket='fuchsia-infra-artifacts',
	test_task_service_account='service_account',
	),
	clear_default_steps=True,
	steps=testing_steps(),
	)