# Copyright 2021 The Fuchsia Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""Entrypoint to Sapling bug filing automation.

Query for patchsets that failed exclusively on tryjobs from a specified LUCI
project, and attach the data to a Monorail bug for further human review.
"""

from datetime import datetime, timedelta

from PB.recipe_engine import result
from PB.go.chromium.org.luci.buildbucket.proto import common as common_pb2
from PB.go.chromium.org.luci.buildbucket.proto import build as build_pb2

from PB.recipes.fuchsia.sapling import InputProperties

DEPS = [
    "fuchsia/bigquery",
    "fuchsia/buildbucket_util",
    "fuchsia/lkg",
    "fuchsia/monorail",
    "fuchsia/status_check",
    "recipe_engine/buildbucket",
    "recipe_engine/file",
    "recipe_engine/path",
    "recipe_engine/step",
    "recipe_engine/time",
]

PROPERTIES = InputProperties

# The datetime string format to pass around queries and properties.
DATETIME_FMT = "%Y-%m-%d %H:%M:%S"

SAPLING_QUERY = """
WITH
  eq_patchset_attempts AS (
    SELECT
      -- equivalent_cl_group_key is a UUID for a group of patchsets which are
      -- "equivalent", e.g. PS1 and PS2 will share an equivalent_cl_group_key if
      -- PS2 is a trivial rebase. It is a consistent value across multiple CQ
      -- attempts.
      -- We use this to uniquely identify a group of "equivalent" patchsets
      -- rather than treating every patchset as unique.
      equivalent_cl_group_key,
      cl.change,
      cl.patchset,
      cl.host,
      bb.id,
      start_time,
      status,
      COUNTIF(status = 'SUCCESS')
        OVER (PARTITION BY equivalent_cl_group_key) AS cq_success,
    FROM
      `commit-queue.fuchsia.attempts`,
      UNNEST(gerrit_changes) AS cl,
      UNNEST(builds) AS bb
    WHERE
      cl.project = "fuchsia"
      -- Get all attempts that were either SUCCESS or FAILURE. We'll use this
      -- later to filter out flakes.
      AND status IN ("SUCCESS", "FAILURE")
  ),
  patchset_info AS (
    SELECT
      cl.change,
      MAX(cl.patchset) AS max_patchset,
      MIN(cl.patchset) AS min_patchset,
    FROM `commit-queue.fuchsia.attempts`, UNNEST(gerrit_changes) AS cl
    WHERE cl.project = "fuchsia"
    GROUP BY(cl.change)
  ),
  all_details AS (
    SELECT
      equivalent_cl_group_key,
      MAX(change) AS change,
      MAX(patchset) AS patchset,
      MAX(host) AS host,
      -- This will be "fuchsia" if there's even one "fuchsia" builder.
      MIN(build_info.builder.project)
        AS project,
      EXTRACT(DATETIME FROM MIN(try.start_time)) AS start_time,
      ARRAY_AGG(build_info.builder.builder) AS failed_builders,
      ARRAY_AGG(build_info.id) AS bbids,
    FROM `cr-buildbucket.fuchsia.builds` AS build_info
    JOIN eq_patchset_attempts AS try
      ON try.id = build_info.id
    WHERE build_info.status = "FAILURE"
      -- Exclude any experimental tryjobs.
      AND JSON_EXTRACT(build_info.input.properties, "$['$recipe_engine/cq'].experimental") IS NULL
      -- Only use tryjobs with no autocorrelator findings. Findings indicate
      -- false rejection.
      AND (
          JSON_EXTRACT(build_info.output.properties, "$.num_autocorrelator_findings") IS NULL
          OR JSON_EXTRACT(build_info.output.properties, "$.num_autocorrelator_findings") = "0.0"
      )
      AND cq_success = 0
    GROUP BY equivalent_cl_group_key
  )

-- If all equivalent patchsets failed with different builders i.e. a series of
-- different flakes caused a failure, this query may output some flakes.
-- Empirically however, this does not seem to be very frequent.
SELECT
  ad.change AS change,
  'https://fxrev.dev/c/' || ad.change || '/' || ad.patchset
    AS patchset,
  CASE
    WHEN ad.patchset > 1
      THEN
        'https://fxrev.dev/c/' || ad.change || '/'
        || CAST(ad.patchset - 1 AS string) || '..' || ad.patchset
    ELSE NULL
    END
    AS prev_patchset,
  CASE
    WHEN ad.patchset < pi.max_patchset
      THEN
        'https://fxrev.dev/c/' || ad.change || '/' || ad.patchset || '..'
        || CAST(ad.patchset + 1 AS string)
    ELSE NULL
    END
    AS next_patchset,
  ARRAY(SELECT DISTINCT * FROM UNNEST(failed_builders)) AS failed_builders,
  ARRAY(
    SELECT DISTINCT 'https://ci.chromium.org/b/' || bbid || ' '
    FROM UNNEST(bbids) AS bbid
  ) AS build_links,
FROM all_details AS ad
JOIN patchset_info AS pi
  ON ad.change = pi.change
WHERE
  start_time >= DATETIME '{start_time}'
  AND start_time <= DATETIME '{end_time}'
  AND project != "fuchsia";
"""

BUG_DESCRIPTION = """
This bug was filed by http://go/tq-sapling-bug-pipeline.

We've identified that {patchset} ran through CQ and failed exclusively on
internal tryjobs. This indicates a gap in platform test coverage that is
currently covered only by out-of-platform tests.

Failed builds:
{build_info}

{patchset_info}

More context: http://go/tq-sapling
"""


def RunSteps(api, props):
    # Determine start and end times to pass into Sapling query.
    query_start_time = get_query_start_time(api)
    query_end_time = api.time.utcnow().strftime(DATETIME_FMT)

    query_output = api.path["start_dir"].join("sapling.json")
    api.bigquery.project = "fuchsia-infra"
    api.bigquery.query(
        "run sapling query",
        SAPLING_QUERY.format(
            start_time=query_start_time,
            end_time=query_end_time,
        ),
        query_output,
    )

    # Set query start and end time properties so a future build can consume
    # them.
    presentation = api.step.active_result.presentation
    presentation.properties["query_start_time"] = query_start_time
    presentation.properties["query_end_time"] = query_end_time

    rows = api.file.read_json(
        "read rows",
        query_output,
        test_data=[
            {
                "change": 123456,
                "patchset": "https://fxrev.dev/c/123456/2",
                "prev_patchset": "https://fxrev.dev/c/123456/1..2",
                "next_patchset": "https://fxrev.dev/c/123456/2..3",
                "failed_builders": ["foo.x64", "foo.arm64"],
                "build_links": [
                    "https://ci.chromium.org/b/123",
                    "https://ci.chromium.org/b/456",
                ],
            },
        ],
    )
    if not rows:
        return result.RawResult(
            summary_markdown="No new Sapling bugs to file",
            status=common_pb2.SUCCESS,
        )
    with api.step.nest("file bugs") as presentation:
        for row in rows:
            file_monorail_bug(api, row)


def get_query_start_time(api):
    """Determine the Sapling query start time, which is the query end time of
    the last successful build. Essentially, this lets us pick up exactly where
    we last left off.

    If there is no successful build in history, or its `query_end_time` property
    is not set, return the current time minus one day.

    Returns:
        str: string representation of the query start time in UTC.
    """
    start_time = None
    try:
        build_id = api.lkg.build(
            "get lkg build",
            builder=api.buildbucket_util.full_builder_name(),
            test_data="889900",
        )
        build = api.buildbucket.get(
            int(build_id),
            fields=["output"],
        )
        start_time = dict(build.output.properties).get("query_end_time")
    # We will hit this case if this is the first time this build has ever run,
    # or if the builder has never succeeded before. Fallback on the default
    # query start time, which is calculated below.
    except api.step.StepFailure:
        pass

    start_time = start_time or (api.time.utcnow() - timedelta(days=1)).strftime(
        DATETIME_FMT
    )
    # Ensure that the start time matches the expected format.
    assert start_time == datetime.strptime(start_time, DATETIME_FMT).strftime(
        DATETIME_FMT
    )
    return start_time


def file_monorail_bug(api, row):
    """File a new Monorail bug for a Sapling query row.

    Args:
        row (dict): Dict representation of a row which conforms to the Sapling
          query schema.
    """
    api.monorail.file_bug(
        "file bug for %s" % row["patchset"],
        "[sapling] Internal-only failures found in %s" % row["patchset"],
        BUG_DESCRIPTION.format(
            patchset=row["patchset"],
            build_info=build_info(row),
            patchset_info=patchset_info(row),
        ),
        components=["EngProd>Sapling"],
        labels=[
            # These bugs contain internal links that are only viewable by
            # Googlers.
            "Restrict-View-Google",
            "sapling-untriaged",
            # Attach a label with the change number to aid bug deduplication
            # across patchsets.
            "sapling-%d" % row["change"],
        ],
    )


def build_info(row):
    """Return build info for a Sapling query row."""
    build_info = [
        "%s: %s" % (builder, link)
        for builder, link in zip(row["failed_builders"], row["build_links"])
    ]
    return "\n".join(build_info)


def patchset_info(row):
    """Return available patchset link(s) for a Sapling query row."""
    patchset_info = ["Patchset: %s" % row["patchset"]]
    if row["prev_patchset"]:
        patchset_info.append("Previous patchset: %s" % row["prev_patchset"])
    if row["next_patchset"]:
        patchset_info.append("Next patchset: %s" % row["next_patchset"])
    return "\n".join(patchset_info)


def GenTests(api):
    yield api.status_check.test("lkg")
    yield api.status_check.test("no_lkg") + api.step_data("get lkg build", retcode=1)
    yield api.status_check.test("no_results") + api.step_data(
        "read rows", api.file.read_json(None)
    )
