# Copyright 2019 The Fuchsia Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Recipe for running zbi tests."""

import copy
import os

from recipe_engine.config import Enum, List
from recipe_engine.post_process import StatusSuccess, StatusFailure
from recipe_engine.recipe_api import Property

DEPS = [
    'fuchsia/build',
    'fuchsia/checkout',
    'fuchsia/emu',
    'fuchsia/status_check',
    'fuchsia/swarming_retry',
    'fuchsia/symbolize',
    'recipe_engine/buildbucket',
    'recipe_engine/cipd',
    'recipe_engine/context',
    'recipe_engine/file',
    'recipe_engine/isolated',
    'recipe_engine/json',
    'recipe_engine/path',
    'recipe_engine/platform',
    'recipe_engine/properties',
    'recipe_engine/step',
    'recipe_engine/swarming',
]

TARGETS = ['x64', 'arm64']

# How long to wait (in seconds) before killing the test swarming task if there's
# no output being produced.
TEST_IO_TIMEOUT_SECS = 180

# How long a pending test swarming task waits to be scheduled on a bot.
# We should never expire a test task.  This is currently 5 hours, but
# should be treated as infinite.
TEST_EXPIRATION_TIMEOUT_SECS = 18000

# How long the test is allowed to run before swarming kills it.
TEST_EXECUTION_TIMEOUT_SECS = 600

QEMU_KERNEL_NAME = 'qemu-kernel'

# The log level to use for botanist invocations in test tasks. Can be one of
# "fatal", "error", "warning", "info", "debug", or "trace", where "trace" is
# the most verbose, and fatal is the least.
BOTANIST_LOG_LEVEL = 'debug'

PROPERTIES = {
    'project':
        Property(kind=str, help='Jiri remote manifest project', default=None),
    'manifest':
        Property(kind=str, help='Jiri manifest to use'),
    'remote':
        Property(kind=str, help='Remote manifest repository'),
    'target_cpu':
        Property(kind=Enum(*TARGETS), help='Target to build'),
    'variants':
        Property(kind=List(basestring), help='Variants to use'),
    'device_type':
        Property(
            kind=str,
            help='Device type to use, currently only emulators (QEMU or AEMU)',
            default='QEMU')
}


def prebuilt_path(api, checkout_root, *path):
  """Returns the Path to the host-platform subdir under the given subdirs."""
  path = list(path)
  path.append('{os}-{arch}'.format(
      os=api.platform.name,
      arch={'intel': 'x64'}[api.platform.arch],
  ))
  return checkout_root.join('prebuilt', *path)


def create_qemu_task(api, **kwargs):
  """Create a Task object.

  The base class of the class is inside the api object, so it can't be
  top-level or otherwise defined at module load time. Defining it in this
  function as an alternative.

  For full args list see Task.__init__ a few lines down.
  """

  class Task(api.swarming_retry.TriggeredTask):

    def __init__(self, api, name, zbi_test, checkout_root, gn_results,
                 target_cpu, device_type, **kwargs):
      super(Task, self).__init__(api=api, name=name, request=None, **kwargs)
      self._checkout_root = checkout_root
      self._gn_results = gn_results
      self._target_cpu = target_cpu
      self._zbi_test = copy.deepcopy(zbi_test)
      self._device_type = device_type

      self._create_request()

    def _create_request(self):
      isolate_tree = api.file.symlink_tree(
          root=self._api.path.mkdtemp('isolate'))
      isolate_tree.register_link(
          target=self._gn_results.fuchsia_build_dir.join(
              self._zbi_test['path']),
          linkname=isolate_tree.root.join(
              os.path.basename(self._zbi_test['path'])))

      qemu_kernel = copy.deepcopy([
          image for image in self._gn_results.image_manifest
          if image['name'] == QEMU_KERNEL_NAME
      ][0])

      qemu_kernel_basename = api.path.basename(qemu_kernel['path'])
      isolate_tree.register_link(
          target=self._gn_results.fuchsia_build_dir.join(qemu_kernel['path']),
          linkname=isolate_tree.root.join(qemu_kernel_basename))
      qemu_kernel['path'] = qemu_kernel_basename

      isolate_tree.register_link(
          target=self._gn_results.tool('botanist', self._target_cpu),
          linkname=isolate_tree.root.join('botanist'),
      )
      isolate_tree.create_links('create tree of images')

      # TODO(IN-1420) botanist expects test to be named 'zircon-a'
      self._zbi_test['name'] = 'zircon-a'
      self._zbi_test['path'] = os.path.basename(self._zbi_test['path'])

      image_manifest_name = 'images.json'
      image_manifest_path = isolate_tree.root.join(image_manifest_name)
      image_manifest = [self._zbi_test, qemu_kernel]
      self._api.file.write_json(
          'write image manifest', image_manifest_path, image_manifest, indent=2)

      isolated = self._api.isolated.isolated(isolate_tree.root)
      isolated.add_dir(isolate_tree.root)
      isolated_hash = isolated.archive('isolate images')

      ensure_file = self._api.cipd.EnsureFile()

      if self._device_type == 'AEMU':
        self._api.emu.add_aemu_to_ensure_file(ensure_file, subdir='aemu/bin')
      elif self._device_type == 'QEMU':
        self._api.emu.add_qemu_to_ensure_file(ensure_file, subdir='qemu')

      dimensions = {
          'pool': 'fuchsia.tests',
          'os': 'Debian',
          'cpu': self._zbi_test['cpu'],
          'kvm': '1',
      }

      cmd = [
          './botanist',
          '-level', BOTANIST_LOG_LEVEL,
          'qemu',
          '-type', '%s' % self._device_type.lower(),
          '-qemu-dir', './%s/bin' % self._device_type.lower(),
          '-images', image_manifest_name,
          '-arch', self._zbi_test['cpu'],
          '-use-kvm',
      ]  # yapf: disable

      env_name = '%s-%s' % (self._device_type, self._zbi_test['cpu'])
      tags = {'test_environment_name': [env_name]}
      request = self._api.swarming.task_request().with_name(
          self.name).with_tags(tags)
      self._request = request.with_slice(0, request[0]
          .with_command(cmd)
          .with_isolated(isolated_hash)
          .with_dimensions(**dimensions)
          .with_execution_timeout_secs(TEST_EXECUTION_TIMEOUT_SECS)
          .with_expiration_secs(TEST_EXPIRATION_TIMEOUT_SECS)
          .with_io_timeout_secs(TEST_IO_TIMEOUT_SECS)
          .with_cipd_ensure_file(ensure_file)
      )  # yapf: disable

    def process_result(self):
      attempt = self.attempts[-1]
      assert attempt.result
      result = attempt.result

      symbolize_tool = self._gn_results.tool('symbolize')
      clang_dir = prebuilt_path(self._api, self._checkout_root, 'third_party',
                                'clang')
      llvm_symbolizer = self._gn_results.tool('llvm-symbolizer')
      build_id_dirs = (
          self._gn_results.zircon_build_dir.join('.build-id'),
          clang_dir.join('lib', 'debug', '.build-id'),
      )

      with self._api.step.nest(result.name) as presentation:
        attempt.logs['symbolized log'] = self._api.symbolize(
            symbolize_tool=symbolize_tool,
            build_id_dirs=build_id_dirs,
            llvm_symbolizer=llvm_symbolizer,
            data=result.output,
            presentation=presentation)

        # A kernel panic may be present in the logs even if the task timed
        # out, so check for that first.
        if 'KERNEL PANIC' in result.output:
          attempt.failure_reason = 'kernel panic'

        # Because of the way these tests run (they are the only user-mode
        # process in the system, and then the system shuts down) we can't
        # collect an exit code or nicely structured output, so we have to
        # search the output for a hard-coded string to detect success.

        # TODO(mohrr) abstract this out because qemu and devices will
        # differ.
        if self._zbi_test['success_string'] not in result.output:
          attempt.failure_reason = 'failed to find success string'

    def present_attempt(self, task_step, attempt, **kwargs):
      del task_step, kwargs  # Unused.
      name = '%s (%s)' % (attempt.name, 'pass' if attempt.success else 'fail')
      step = api.step(name, None)
      step.presentation.step_summary_text = attempt.failure_reason
      step.presentation.links['task UI'] = attempt.task_ui_link

      for log, data in attempt.logs.iteritems():
        step.presentation.logs[log] = data

  return Task(api=api, **kwargs)


def RunSteps(api, project, manifest, remote, target_cpu, variants, device_type):
  """Builds and executes Zircon tests in QEMU on a different machine."""

  with api.context(infra_steps=True):
    assert manifest
    assert remote
    checkout = api.checkout.fuchsia_with_options(
        path=api.path['start_dir'].join('fuchsia'),
        build=api.buildbucket.build,
        manifest=manifest,
        remote=remote,
        project=project,
    )

  with api.step.nest('build'):
    gn_results = api.build.gen(
        checkout_root=checkout.root_dir,
        fuchsia_build_dir=checkout.root_dir.join('out', 'default'),
        target=target_cpu,
        build_type='debug',
        product='products/bringup.gni',
        variants=variants,
        # This forces GN to enter //garnet/tests/zircon/BUILD.gn, which
        # is necessary in order to generate the zbi test build API.
        # //bundles:infratools is necessary to build botanist.
        packages=['//garnet/tests/zircon:zbi_tests', '//bundles:infratools'],
    )

    api.build.ninja(
        checkout_root=checkout.root_dir,
        gn_results=gn_results,
        targets=['bundles:infratools'],
        build_zbi_tests=True,
        build_images=True,
        image_filter=lambda image: image['name'] == QEMU_KERNEL_NAME,
    )

  tasks = []
  for name, zbi_test in gn_results.zbi_tests.iteritems():
    task_name = '%s - %s' % (name, device_type)
    with api.step.nest('prepare test: %s' % task_name):
      tasks.append(
          create_qemu_task(
              api,
              name=task_name,
              zbi_test=zbi_test,
              checkout_root=checkout.root_dir,
              gn_results=gn_results,
              target_cpu=target_cpu,
              device_type=device_type))

  with api.swarming_retry.retry(tasks) as retry:
    retry.run_tasks()
    retry.present_tasks()
    retry.raise_failures()


def GenTests(api):

  def test(name, zbi_test, status, device_type):
    full_name = '%s-%s-%s' % (name, zbi_test['cpu'], device_type)

    # This entry isn't used by RunSteps but makes writing tests a little
    # cleaner.
    zbi_test['task_name'] = '%s - %s' % (zbi_test['name'], device_type)

    test = api.status_check.test(full_name, status=status)
    test += api.buildbucket.ci_build(
        git_repo='https://fuchsia.googlesource.com/fuchsia')

    test += api.properties(
        project='zircon',
        manifest='manifest',
        remote='https://fuchsia.googlesource.com/fuchsia',
        target_cpu=zbi_test['cpu'],
        variants=['clang'],
        device_type=device_type)

    test += api.step_data('build.read zbi test manifest',
                          api.json.output([zbi_test]))
    return test

  zbi_test = api.build.mock_zbi_test('arm64')
  for device_type in ['AEMU', 'QEMU']:
    yield (
        test('pass', zbi_test, 'success', device_type) +
        api.swarming_retry.trigger_data(
            name=zbi_test['task_name'], task_id=123, iteration=0) +
        api.swarming_retry.collect_data([
            api.swarming_retry.passed_task(zbi_test['task_name'],
                                            task_id=123,
                                            output=zbi_test['success_string']),
        ], iteration=0) +
        api.post_process(StatusSuccess)
    )  # yapf: disable

    zbi_test = api.build.mock_zbi_test('x64')
    yield (
        test('fail', zbi_test, 'failure', device_type) +
        api.swarming_retry.trigger_data(
            name=zbi_test['task_name'], task_id=123, iteration=0) +
        api.swarming_retry.collect_data([
            api.swarming_retry.passed_task(zbi_test['task_name'],
                                            task_id=123,
                                            output='not success'),
        ], iteration=0) +
        api.swarming_retry.trigger_data(
            name=zbi_test['task_name'], task_id=456, iteration=1) +
        api.swarming_retry.collect_data([
            api.swarming_retry.passed_task(zbi_test['task_name'],
                                            task_id=456,
                                            output='not success'),
        ], iteration=1) +
        api.post_process(StatusFailure)
    )  # yapf: disable

    zbi_test = api.build.mock_zbi_test('x64')
    yield (
        test('flake', zbi_test, 'success', device_type) +
        api.swarming_retry.trigger_data(
            name=zbi_test['task_name'], task_id=123, iteration=0) +
        api.swarming_retry.collect_data([
            api.swarming_retry.passed_task(zbi_test['task_name'],
                                            task_id=123,
                                            output='not success'),
        ], iteration=0) +
        api.swarming_retry.trigger_data(
            name=zbi_test['task_name'], task_id=456, iteration=1) +
        api.swarming_retry.collect_data([
            api.swarming_retry.passed_task(zbi_test['task_name'],
                                            task_id=456,
                                            output=zbi_test['success_string']),
        ], iteration=1) +
        api.post_process(StatusSuccess)
    )  # yapf: disable

    # Including success string and "KERNEL PANIC" in output to clarify
    # that this fails because of the kernel panic and not because the
    # success string is not present.
    output = zbi_test['success_string'] + 'KERNEL PANIC'
    zbi_test = api.build.mock_zbi_test('x64')
    yield (
        test('kernel_panic', zbi_test, 'failure', device_type) +
        api.swarming_retry.trigger_data(
            name=zbi_test['task_name'], task_id=123, iteration=0) +
        api.swarming_retry.collect_data([
            api.swarming_retry.passed_task(zbi_test['task_name'],
                                            task_id=123,
                                            output=output),
        ], iteration=0) +
        api.swarming_retry.trigger_data(
            name=zbi_test['task_name'], task_id=456, iteration=1) +
        api.swarming_retry.collect_data([
            api.swarming_retry.passed_task(zbi_test['task_name'],
                                            task_id=456,
                                            output=output),
        ], iteration=1) +
        api.post_process(StatusFailure)
    )  # yapf: disable
