blob: 2c27465f8bccd85f77d9f6bef674fd816d6ef700 [file] [log] [blame]
# Copyright 2019 The Fuchsia Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
# Pylint is not smart enough to infer the return type of methods with a custom
# property decorator like @cached_property, so we have to disable some spurious
# warnings from cached property accesses. See
# https://github.com/PyCQA/pylint/issues/3484
#
# pylint: disable=no-member
import collections
from contextlib import contextmanager
import itertools
import re
from recipe_engine import recipe_api
from RECIPE_MODULES.fuchsia.utils import memoize
Range = collections.namedtuple("Range", "start end")
FORMATTING_MESSAGE = """File not formatted properly.
Run the following to format:
"""
MISSING_COMMIT_TAG_MESSAGE = 'The change description should start with a commit tag like "[tag] Change Description".'
# Skip the inclusivity check on a whole section. Please do not change the order of these lines.
INCLUSIVE_DISABLE_RE = re.compile(r"inclusive-language:\s*disable")
INCLUSIVE_ENABLE_RE = re.compile(r"inclusive-language:\s*enable")
INCLUSIVE_IGNORE_RE = re.compile(r"inclusive-language:\s*ignore")
# This list is the backup copy of the canonical source file:
# //tools/mdlint/rules/respectful_code_words.json which implements
# https://fuchsia.dev/fuchsia-src/contribute/respectful_code
# This version of the file was copied from
# https://fuchsia.googlesource.com/fuchsia/+/e9939747816166d1b72a255bd9648e99b1da9f71/tools/mdlint/rules/respectful_code_words.json.
# inclusive-language: disable
INCLUSIVE_WORD_REPLACEMENTS = {
"blackhat": ["illegal", "unethical"],
"blacklist": ["denylist", "blocklist"],
"blacklisted": ["denied", "blocked"],
"blacklisting": ["denying", "blocking"],
"blacklists": ["denylists", "blocklists"],
"citizen": ["priority"],
"citizens": ["priorities"],
"cop": ["build gardener", "build monitor", "supervisor", "primary"],
"cops": ["build gardeners", "build monitors", "supervisors", "primaries"],
"crazier": ["unexpected", "catastrophic", "incoherent"],
"crazies": ["unexpected", "catastrophes", "incoherences"],
"craziest": ["unexpected", "catastrophic", "incoherent"],
"crazy": ["unexpected", "catastrophic", "incoherent"],
"cripple": ["slow down"],
"crippled": ["slowed down"],
"cripples": ["slows down"],
"crippling": ["slowing down"],
"dummies": ["placeholders", "samples", "copies", "prototypes", "mock-up"],
"dummy": ["placeholder", "sample", "copy", "prototype", "mock-up"],
"ghetto": ["no suggestion"],
"grandfather": [
"legacy clause",
"exempt",
"existing",
"holdover",
"carryover",
"baseline",
],
"grandfathered": [
"legacy",
"exempt",
"existing",
"holdover",
"carryover",
"baseline",
],
"grandfathering": [
"legacy clause",
"exempt",
"existing",
"holdover",
"carryover",
"baseline",
],
"grandfathers": [
"legacy clauses",
"exempt",
"existing",
"holdovers",
"carryovers",
"baselines",
],
"guru": ["expert", "teacher"],
"insane": ["unexpected", "catastrophic", "incoherent"],
"man-hour": ["work hour", "person hour"],
"man-in-the-middle": ["person-in-the-middle"],
"manned": ["staffed", "attended to", "crewed"],
"manning": ["staffing", "attending to"],
"manpower": ["workforce", "staff"],
"master": ["main", "primary"],
"masters": ["mains", "primaries"],
"native": ["core", "built-in", "machine code", "platform-specific"],
"pow-wow": ["meeting", "huddle", "talk", "summit"],
"powwow": ["meeting", "huddle", "talk", "summit"],
"primitive": ["alpha", "nascent"],
"redline": ["priority line", "memory limit", "maximum"],
"redlined": ["hit the maximum", "hit the memory limit"],
"redlining": ["hitting the maximum", "hitting the memory limit"],
"sane": ["valid", "sound", "rational", "sensible"],
"sanity": [
"check",
"quick check",
"confidence check",
"coherence check",
"calibration check",
],
"slave": ["secondary", "replica", "subsidiary"],
"slaves": ["secondaries", "replicas", "subsidiaries"],
"whitehat": ["ethical"],
"whitelist": ["allowlist", "safelist", "approvelist"],
"whitelisted": ["allowlisted", "safelisted", "approvelisted"],
"whitelisting": ["allowlisting", "safelisting", "approvelisting"],
"whitelists": ["allowlists", "safelists", "approvelists"],
}
# inclusive-language: enable
def _analyzer_name(analyzer_func):
"""Return a normalized name for the analyzer function."""
return analyzer_func.__name__.lstrip("_").lower()
class TriciumAnalyzeApi(recipe_api.RecipeApi):
"""API for running analyses on Tricium."""
_FILENAME_RE = re.compile(r"^\+\+\+\ [^/]+/(.*)")
_CHUNK_RE = re.compile(
r"^@@ \-(?P<before_line>\d+)(,(?P<before_count>\d+))? \+(?P<after_line>\d+)(,(?P<after_count>\d+))?",
)
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._ext_to_analyzers = {
".c": [self._ClangFormat, self._ClangTidy],
".cc": [self._ClangFormat, self._ClangTidy],
".cml": [self._CmlFormat],
".cpp": [self._ClangFormat, self._ClangTidy],
".dart": [self._DartFmt],
".h": [self._ClangFormat, self._ClangTidy],
".hh": [self._ClangFormat, self._ClangTidy],
".hpp": [self._ClangFormat, self._ClangTidy],
".fidl": [self._FidlFormat, self._FidlLint],
".gn": [self._GNFormat],
".gni": [self._GNFormat],
".go": [self._GoFmt, self._GoVet],
".md": [self._MdLint],
".py": [self._Black, self._Yapf],
".rs": [self._RustFmt],
".star": [self._Yapf],
".ts": [self._ClangFormat],
".triage": [self._Json5Format],
}
# Analyzer tools that are always produced as part of a Fuchsia built,
# rather than distributed as prebuilts.
self._built_tools = [
"cmc",
"fidl-format",
"fidl-lint",
"formatjson5",
"mdlint",
]
# The paths to these tools may be set directly by the recipe.
self.black = None
self.go = None
self.gofmt = None
self.yapf = None
self.checkout = None
self.build_results = None
# Whether to suggest the use of the fx tool.
# The tool only works properly when run in fuchsia.git or one of its sub-directories.
self.suggest_fx = True
def __call__(
self,
filenames,
enabled_analyzers,
enabled_luci_analyzers=(),
):
"""Check for errors in the given files.
Runs the relevant language's analyzers over each file and posts
Tricium comments if errors are found.
Args:
filenames (seq(str)): Relative paths files in the checkout. Must
be strings rather than Path objects.
enabled_analyzers (seq(str)): Names of enabled analyzers.
enabled_luci_analyzers (seq(str)): Names of enabled analyzers
supported by the recipe engine's tricium recipe module.
"""
enabled_analyzers = [a.lower() for a in enabled_analyzers]
if self.build_results: # pragma: no cover
assert self.checkout, "`build_results` must be set with `checkout`"
with self.m.step.defer_results():
with self.m.step.nest("check for inclusivity"):
self._check_for_inclusivity("", contents=self._commit_message())
for filename in filenames:
self._check_for_inclusivity(filename)
if enabled_luci_analyzers:
with self.m.step.nest("run luci analyzers"):
self._run_luci_analyzers(
enabled_luci_analyzers,
# cwd is None if we're still in start_dir.
checkout_dir=self.m.context.cwd or self.m.path["start_dir"],
filenames=filenames,
)
# TODO(fxbug.dev/82386): Move most analyzer logic into staticlints.
if "staticlints" in enabled_analyzers:
# staticlints runs on all files at once.
self._run_staticlints(filenames)
for filename in filenames:
analyzers = self._analyzers_for_file(filename, enabled_analyzers)
if not analyzers:
continue
with self.m.step.nest("analyze %s" % filename):
for analyzer_func in analyzers:
analyzer_func(filename)
self.m.tricium.write_comments()
def _check_for_inclusivity(self, filename, contents=""):
if not contents:
contents = self.m.file.read_text(
"read %s" % filename,
self.m.context.cwd.join(filename),
include_log=False,
)
change_diff = self.m.git(
"get change diff for %s" % filename,
"diff-tree",
"--no-commit-id",
"--diff-filter=d",
"-U0",
"HEAD",
"--",
filename,
stdout=self.m.raw_io.output_text(),
).stdout
change_line_ranges = self._get_ranges_from_diff(
change_diff, include_before=False, include_after=True
)
else:
change_line_ranges = [Range(1, len(contents.splitlines()) + 1)]
content_lines = contents.splitlines()
# Ideally, we would read the canonical source for inclusive words:
# https://source.corp.google.com/fuchsia/tools/mdlint/rules/respectful_code_words.json
# However, that file resides in the Fuchsia repository, and not all
# recipes check out that repo. Therefore, we default to the hard-coded
# version and use the canonical one if present.
inclusive_word_replacements = INCLUSIVE_WORD_REPLACEMENTS
inclusive_file = self.checkout.root_dir.join(
"tools", "mdlint", "rules", "respectful_code_words.json"
)
if self.m.path.exists(inclusive_file):
inclusive_word_replacements = self.m.file.read_json(
name="reading inclusive words file",
source=inclusive_file,
test_data={"foo": ["bar", "baz"], "master": ["main", "primary"]},
include_log=False,
)
enabled = True
line_indices_per_word = {}
for i, line in enumerate(content_lines):
if not enabled:
if not INCLUSIVE_ENABLE_RE.search(line):
continue
enabled = True
if INCLUSIVE_DISABLE_RE.search(line):
enabled = False
continue
if INCLUSIVE_IGNORE_RE.search(line):
continue
# The line indices used by tricium and in change_diff start with 1, so
# add 1 to get the 1-based index.
line_index = i + 1
# Exclude URLs in the check.
url_regex = re.compile(r"\w+://[^\s]*")
line_without_urls = url_regex.sub("", line)
for word in inclusive_word_replacements:
word_regex = re.compile(r"\b%s\b" % word)
line_indices = line_indices_per_word.get(word, [])
if word_regex.search(line_without_urls):
for r in change_line_ranges:
if line_index >= r.start and line_index < r.end:
line_indices.append(line_index)
break
line_indices_per_word[word] = line_indices
for word, replacements in inclusive_word_replacements.items():
line_indices = line_indices_per_word.get(word, [])
# If a non-inclusive word appears too many times in a file, combine
# all occurrences into one comment to avoid overwhelming Tricium
# with too many comments.
if len(line_indices) > 3:
self.m.tricium.add_comment(
"Inclusivity",
"Please avoid '%s' found on lines %s. Suggested replacements: %s.\n\nSee https://fuchsia.dev/fuchsia-src/contribute/respectful_code"
% (word, str(line_indices), str(replacements)),
filename,
)
else:
for i in line_indices:
self.m.tricium.add_comment(
"Inclusivity",
"Please avoid '%s'. Suggested replacements: %s.\n\nSee https://fuchsia.dev/fuchsia-src/contribute/respectful_code"
% (word, str(replacements)),
filename,
start_line=i,
)
def _run_luci_analyzers(self, enabled_luci_analyzers, checkout_dir, filenames):
all_analyzers = self.m.tricium.analyzers.by_name()
self.m.tricium.run_legacy(
[all_analyzers[name] for name in enabled_luci_analyzers],
input_base=checkout_dir,
affected_files=filenames,
commit_message=self._commit_message(),
# Don't emit comments yet. We'll handle that ourselves after running
# non-LUCI analyzers.
emit=False,
)
def _analyzers_for_file(self, filename, enabled_analyzers):
assert isinstance(
filename, str
), "filenames must be string paths relative to the checkout"
_, ext = self.m.path.splitext(filename)
return [
analyzer_func
for analyzer_func in self._ext_to_analyzers.get(ext, [])
if _analyzer_name(analyzer_func) in enabled_analyzers
]
def _run_staticlints(self, filenames):
assert self.checkout
step = self.m.step(
"run staticlints",
[
self.build_results.tool("staticlints"),
"-checkout-dir",
self.checkout.root_dir,
"-build-dir",
self.build_results.build_dir,
"-files-json",
self.m.json.input([{"path": f} for f in filenames]),
"-output-json",
# If the command fails then the output might be empty, so don't
# assume it's valid JSON.
self.m.raw_io.output(add_output_log=True),
],
# We don't want to raise an exception until after emitting any
# comments. As of 2022-02 the Tricium service ignores comments from
# failed builds, but that is likely to change in the future so we do
# want to make a best effort at emitting comments even if some
# analysis steps fail.
ok_ret="any",
)
findings = []
if step.raw_io.output.strip():
findings = self.m.json.loads(step.raw_io.output)
for finding in findings:
# staticlints emits JSON objects whose fields correspond exactly to
# those of the Tricium comment schema.
self.m.tricium.add_comment(**finding)
if step.retcode:
step.presentation.status = self.m.step.FAILURE
self.m.step.raise_on_failure(step)
@contextmanager
def _diff_format(self, category, filename, cmd_format="fx format-code --files=%s"):
"""Checks for diffs after running an auto-formatter.
If there's a diff in the lines that were touched by the CL under
test, adds a comment on the CL.
"""
# This step gets the changed ranges between parent commit and current CL.
# The diff will have following format:
#
# diff --git a/filename b/filename
# --- a/filename
# --- b/filename
# @@ PARENT_START_LINE, LINE_COUNT(optional) CL_START_LINE, LINE_COUNT(optional) @@ CONTEXT
# CHANGE_DETAILS
#
change_diff = self.m.git(
"get change diff",
"diff-tree",
"--no-commit-id",
"--diff-filter=d",
"-U0",
"HEAD",
"--",
filename,
stdout=self.m.raw_io.output_text(),
).stdout
change_line_ranges = self._get_ranges_from_diff(
change_diff, include_before=False, include_after=True
)
self.m.step.active_result.presentation.logs["change_line_ranges"] = str(
change_line_ranges
)
# The caller should run the formatter within the `with` block that
# called this function.
yield
# This step gets the changed ranges between current CL and formatted
# CL. The diff will have following format:
#
# diff --git a/filename b/filename
# --- a/filename
# --- b/filename
# @@ CL_START_LINE, LINE_COUNT(optional) FORMATTED_CL_START_LINE, LINE_COUNT(optional) @@ CONTEXT
# CHANGE_DETAILS
#
formatted_diff = self.m.git(
"get formatted diff",
"diff-index",
"--no-commit-id",
"--diff-filter=d",
"-U0",
"HEAD",
"--",
filename,
stdout=self.m.raw_io.output_text(),
).stdout
# range_tree contains ranges of changed lines in current cl compared to its parent commit
# range_index contains ranges of changed lines in current cl that changed by formatter
# the intersection of 'range_tree' and 'range_index' will reveal the ranges in the cl
# affected by formatter.
formatted_line_ranges = self._get_ranges_from_diff(
formatted_diff, include_before=True, include_after=False
)
self.m.step.active_result.presentation.logs["formatted_line_ranges"] = str(
formatted_line_ranges
)
# Ideally we'd have a generic way to support self.suggest_fx == False in this
# function. However today there's only one analyzer that actually needs this,
# and restructuring the code around this use case would add complexity on net.
# If we start supporting this for many analyzers we should reconsider, perhaps
# by having a class per analyzer rather than just a function.
intersection = self._intersect_ranges(change_line_ranges, formatted_line_ranges)
if intersection:
self.m.tricium.add_comment(
"Format/%s" % category,
"%s%s" % (FORMATTING_MESSAGE, cmd_format % filename),
filename,
)
self.m.git("reset", "reset", "--hard", "HEAD")
def _Black(self, filename):
with self._diff_format(
"Black",
filename,
cmd_format="black %s\n"
"If black isn't in your PATH, see http://go/fxi-cookbook#getting-the-infra-source-code",
):
self.m.step("black", [self.black, filename])
def _FidlFormat(self, filename):
assert self.checkout
# Fidl test files often purposefully formatted in unrecommended ways
# so they should be skipped.
if str(filename).endswith(".test.fidl"):
return
with self._diff_format("FidlFormat", filename):
with self.m.step.nest("fidl-format"):
fidl_format_path = self.build_results.tool("fidl-format")
self.m.step("run", [fidl_format_path, "-i", filename])
def _CmlFormat(self, filename):
assert self.checkout
with self._diff_format("CmlFormat", filename):
with self.m.step.nest("cmc"):
cmc_path = self.build_results.tool("cmc")
self.m.step(
"run",
[cmc_path, "format", "--cml", "--in-place", filename],
)
def _GoFmt(self, filename):
with self._diff_format("GoFmt", filename):
with self.m.step.nest("gofmt"):
if not self.gofmt:
self.gofmt = self.build_results.tool("gofmt")
self.m.step("run", [self.gofmt, "-w", "-s", filename])
def _GNFormat(self, filename):
with self._diff_format("GNFormat", filename):
with self.m.step.nest("gn format"):
self.m.step("run", [self.build_results.tool("gn"), "format", filename])
def _RustFmt(self, filename):
assert self.checkout
with self._diff_format("RustFmt", filename):
with self.m.step.nest("rustfmt"):
self.m.step(
"run",
[
self.build_results.tool("rustfmt"),
"--config-path={}".format(
self.checkout.root_dir.join("rustfmt.toml")
),
"--unstable-features",
"--skip-children",
filename,
],
)
def _Yapf(self, filename):
cmd_format = "fx format-code --files=%s"
if not self.suggest_fx:
cmd_format = "yapf --in-place %s"
with self._diff_format("YAPF", filename, cmd_format):
with self.m.step.nest("yapf"):
if not self.yapf:
self.yapf = self.build_results.tool("yapf")
self.m.step("run", [self.yapf, "--in-place", filename])
def _DartFmt(self, filename):
with self._diff_format("DartFmt", filename):
with self.m.step.nest("dart format"):
self.m.step(
"run", [self.build_results.tool("dart"), "format", filename]
)
def _ClangFormat(self, filename):
with self._diff_format(
"ClangFormat",
filename,
cmd_format="fx format-code --changed-lines --files=%s",
), self.m.step.nest("clang-format"):
paths = self.m.git(
"get file diff",
"diff",
"-U0",
"--no-color",
"HEAD^",
"--",
filename,
stdout=self.m.raw_io.output_text(),
)
self.m.python3(
"clang-format-diff.py",
[
self.build_results.tool("clang-format-diff"),
"-p1",
"-i",
"-style=file",
"-fallback-style=Google",
"-sort-includes",
"-binary",
self.build_results.tool("clang-format"),
],
stdin=self.m.raw_io.input_text(data=paths.stdout),
)
def _capitalize_msg(self, message):
if not message or message[0].isupper():
return message
return message[0].upper() + message[1:]
def _FidlLint(self, filename):
assert self.checkout
# Fidl test files are often purposefully use syntax that does not follow
# linting rules so they should be skipped.
if str(filename).endswith(".test.fidl"):
return
with self.m.step.nest("fidl-lint"):
fidl_lint_path = self.build_results.tool("fidl-lint")
results = self.m.step(
"run",
[fidl_lint_path, "--format=json", filename],
ok_ret=(0, 1),
stdout=self.m.json.output(),
).stdout
for result in results:
capitalized_msg = self._capitalize_msg(result["message"]) + "."
capitalized_desc = ""
for suggestion in result.get("suggestions", ()):
if "description" in suggestion:
capitalized_desc += (
self._capitalize_msg(suggestion["description"]) + ". "
)
if capitalized_desc:
capitalized_msg = capitalized_msg + " " + capitalized_desc[:-1]
result["message"] = capitalized_msg
# fidl-lint's JSON output already conforms to the Tricium
# comment schema so there's no need to parse it.
self.m.tricium.add_comment(**result)
def _GoVet(self, filename):
with self.m.step.nest("go vet") as presentation:
cwd = self.m.context.cwd
package_dir = cwd.join(self.m.path.dirname(filename))
package_warnings = self._go_vet_package(package_dir)
if not package_warnings:
return
presentation.logs["warnings"] = self.m.json.dumps(
package_warnings, indent=2
).splitlines()
for warning in package_warnings:
warning_file = self.m.path.relpath(warning.path, cwd)
if warning_file != filename:
continue
self.m.tricium.add_comment(
"Lint/GoVet",
warning.message,
# All file paths reported to tricium should be relative
# to the root of the git repo. The caller ensures that
# cwd is the root of the git repo.
filename,
start_line=warning.line,
end_line=warning.line,
start_char=warning.char,
end_char=warning.char + 1,
)
_GoVetWarning = collections.namedtuple("GoVetWarning", "path message line char")
@memoize
def _go_vet_package(self, package_dir):
with self.m.context(cwd=package_dir):
if not self.go:
self.go = self.build_results.tool("go")
step = self.m.step(
"run",
[self.go, "vet", "-json"],
stderr=self.m.raw_io.output_text(),
ok_ret="any",
)
if step.retcode:
# With the -json flag set, `go vet` will only return a
# non-zero retcode if the Go code is not compilable. If the
# code is actually not compilable by the Fuchsia build
# system then that will be caught in CQ; otherwise it's
# likely just not compilable by the native Go toolchain
# because it relies on generated Go files produced by ninja.
# So we can skip vetting this code, since Tricium warnings
# are best-effort anyway.
step.presentation.step_text = "failed to compile, skipping"
return None
stderr_lines = step.stderr.splitlines()
step.presentation.logs["stderr"] = stderr_lines
# Unfortunately `go vet -json` does not output only valid JSON, so
# we have to parse the output manually.
# Look at the test cases in examples/ for the expected output format.
parsed_output = None
current_entry_lines = []
for line in stderr_lines:
if current_entry_lines:
current_entry_lines.append(line)
# Ends the JSON object
if line == "}":
parsed_output = self.m.json.loads("\n".join(current_entry_lines))
break
# Empty JSON object
elif line == "{}":
parsed_output = {}
break
# Start new non-empty JSON object
elif line == "{":
assert not current_entry_lines
current_entry_lines.append(line)
assert parsed_output is not None, "invalid go vet output"
go_vet_warnings = []
for package_warnings in parsed_output.values():
# Each package's warnings are grouped by the warning type (e.g.
# "unreachable"), but we don't care about the warning type because
# the full warning message is available for each warning.
for warning in itertools.chain(*package_warnings.values()):
abspath, line, column = warning["posn"].split(":")
go_vet_warnings.append(
self._GoVetWarning(
path=abspath,
line=int(line),
# go vet emits 1-based column indices, but tricium
# expects 0-based.
char=int(column) - 1,
message=warning["message"],
)
)
return go_vet_warnings
def _ClangTidy(self, filename):
assert self.checkout
with self.m.step.nest("clang-tidy"):
clang_tidy = self.build_results.tool("clang-tidy")
clang_tidy_diff = self.build_results.tool("clang-tidy-diff")
warnings_file = self.m.path["cleanup"].join("clang_tidy_fixes.yaml")
diff = self.m.git(
"get file diff",
"diff",
"-U0",
"--no-color",
"HEAD^",
"--",
filename,
stdout=self.m.raw_io.output_text(),
)
with self.m.context(cwd=self.checkout.root_dir):
clang_tidy_args = [
"-p1",
"-path",
self.build_results.compdb_path,
"-export-fixes",
warnings_file,
"-clang-tidy-binary",
clang_tidy,
]
step_result = self.m.step(
name="clang-tidy-diff.py",
cmd=[
"vpython3",
"-vpython-spec",
self.resource("clang-tidy-diff.vpython"),
clang_tidy_diff,
]
+ clang_tidy_args,
stdin=self.m.raw_io.input_text(data=diff.stdout),
# This script may return 1 if there are compile
# errors -- that's okay, since this is a linter
# check. We'll log them below.
ok_ret=(0, 1),
)
if step_result.retcode:
self.m.step.active_result.presentation.status = "WARNING"
errors = self._parse_warnings(warnings_file)
self.m.path.mock_add_paths(
self.checkout.root_dir.join("path", "to", "file.cpp")
)
# We iterate through all produced error sets...
for check in errors:
# ...and for each check, iterate through all the errors it produced...
for err in errors[check]:
# ...and extract the information from that error for a comment.
error_filepath = self.m.path.abspath(
self.build_results.build_dir.join(
err["DiagnosticMessage"]["FilePath"]
)
)
if (
not self.m.path.exists(error_filepath)
or err["DiagnosticMessage"]["FilePath"] == ""
):
continue # pragma: no cover
# Extract the line and character for this warning.
sline, schar = self._get_line_from_offset(
error_filepath, err["DiagnosticMessage"]["FileOffset"]
)
end_line, end_char = sline, schar + 1
if (sline, schar) == (0, 0):
end_line, end_char = 0, 0 # file level comment.
# Add the comment to Tricium.
self.m.tricium.add_comment(
"Lint/ClangTidy",
"%s: %s"
% (
err["DiagnosticName"],
err["DiagnosticMessage"]["Message"],
),
# All file paths reported to tricium should be relative to the root of the git repo.
# The caller ensures that cwd is the root of the git repo.
self.m.path.relpath(
str(err["DiagnosticMessage"]["FilePath"]),
self.m.path.abspath(self.m.context.cwd),
),
start_line=sline,
start_char=schar,
end_line=end_line,
end_char=end_char,
)
def _Json5Format(self, filename):
assert self.checkout
with self._diff_format("Json5Format", filename):
with self.m.step.nest("json5"):
formatjson5_path = self.build_results.tool("formatjson5")
self.m.step(
"run",
[formatjson5_path, "--replace", filename],
)
def _MdLint(self, filename):
output = self._run_mdlint_once()
for finding in output.get(filename, []):
# mdlint's output is already of the format that tricium expects.
self.m.tricium.add_comment(**finding)
@memoize
def _run_mdlint_once(self):
assert self.checkout
mdlint = self.build_results.tool("mdlint")
with self.m.step.nest("mdlint"):
step = self.m.step(
"run",
[
mdlint,
"--root-dir",
"docs",
"--filter-filenames",
"governance/rfcs",
"--enable",
"all",
"--json",
],
stderr=self.m.json.output(),
step_test_data=lambda: self.m.json.test_api.output_stream([], "stderr"),
ok_ret=(0, 1),
)
findings = collections.defaultdict(list)
for finding in step.stderr:
findings[finding["path"]].append(finding)
return findings
def _parse_warnings(self, warnings_file):
"""Parse all warnings output by clang-tidy.
Clang-Tidy issues warnings as follows:
- DiagnosticName: 'check name'
Message: 'error message'
FileOffset: <offset (int)>
FilePath: 'file path'
Replacements:
- FilePath: 'replacement file path'
Offset: <replacement start offset (int)>
Length: <replacement length (int)>
ReplacementText: 'replacement text'
Args:
raw_warnings (str): YAML-encoded warnings as output by the clang-tidy binary
Returns:
A dict of parsed warnings by check.
Schema:
{
'<check name>': [
{
'DiagnosticName': 'check name'
'Message': 'error message',
'StartLine': <error start line (int)>,
'StartChar': <error start char (int)>,
'Replacements': [
{
'File': 'replacement file path',
'StartLine': <replacement start line (int)>,
'StartChar': <replacement start char (int)>,
'EndLine': <replacement end line (int)>,
'EndChar': <replacement end char (int)>,
'Text': 'replacement text'
},
...
]
},
...
],
'<other check name>': [ ... ]
}
"""
self.m.path.mock_add_paths(warnings_file)
if not self.m.path.exists(warnings_file):
return {} # pragma: no cover
parsed_results = self.m.yaml.read_file(
"load %s" % self.m.path.basename(warnings_file), warnings_file
)
if not parsed_results:
return {}
all_warnings = {}
for warning in parsed_results["Diagnostics"]:
if warning["DiagnosticName"] not in all_warnings:
all_warnings[warning["DiagnosticName"]] = []
all_warnings[warning["DiagnosticName"]].append(warning)
return all_warnings
def _get_line_from_offset(self, path, offset):
"""Get the file line and char number from a file offset.
Clang-Tidy emits warnings that mark the location of the error by the char
offset from the beginning of the file. This converts that number into a line
and char position.
Args:
path (str): Path to file.
offset (int): Offset to convert.
"""
file_data = self._read_file(path)
line = 1
char = 0
for i, c in enumerate(file_data):
if c == "\n":
line += 1
char = 0
else:
char += 1
if i + 1 == offset:
return line, char
return 0, 0
@memoize # Only read a file once even if it has multiple analysis errors.
def _read_file(self, path):
return self.m.file.read_text(
"read %s" % path,
path,
test_data="""test
d
newlineoutput""",
)
def _get_ranges_from_diff(self, diff, include_before=False, include_after=False):
"""Compute sequence of ranges of changed lines from diff.
The diff *must* contain only one file.
Args:
diff (str): Unified diff.
include_before (str): Whether to include line ranges from the
base of the diff (i.e., before the changes in the diff were
made).
include_after (str): Whether to include line ranges from the
target of the diff (i.e., after the changes in the diff were
made).
"""
ranges = []
found_filename = False
for line in diff.splitlines():
if self._FILENAME_RE.search(line):
assert not found_filename, "diff contains multiple files"
found_filename = True
match = self._CHUNK_RE.search(line)
if not match:
continue
if include_before:
start_line = int(match.group("before_line"))
line_count = 1
if match.group("before_count"):
line_count = int(match.group("before_count"))
ranges.append(Range(start_line, start_line + line_count))
if include_after:
start_line = int(match.group("after_line"))
line_count = 1
if match.group("after_count"):
line_count = int(match.group("after_count"))
ranges.append(Range(start_line, start_line + line_count))
return ranges
def _intersect_ranges(self, ranges1, ranges2):
"""Given two lists of line ranges, find their intersection.
Each range *includes* its start and end lines.
Assumes that within each list, the ranges are non-overlapping and
sorted in increasing order.
Example:
ranges1: [(1, 5), (7, 12), (100, 101)]
ranges2: [(2, 8), (8, 9)]
output: [(2, 5), (7, 9)]
"""
ranges = []
i1 = i2 = 0
while i1 < len(ranges1) and i2 < len(ranges2):
r1, r2 = ranges1[i1], ranges2[i2]
# We found a pair of overlapping ranges, so record a new range
# corresponding to the overlap between the two.
if r1.end >= r2.start and r2.end >= r1.start:
points = sorted([r1.start, r1.end, r2.start, r2.end])
ranges.append(Range(points[1], points[2]))
if r1.end < r2.end:
i1 += 1
else:
i2 += 1
# If one range ends at the same line that the next range starts, merge
# them into a single range.
merged_ranges = []
i = 0
while i < len(ranges):
if i + 1 < len(ranges) and ranges[i].end == ranges[i + 1].start:
merged = Range(ranges[i].start, ranges[i + 1].end)
merged_ranges.append(merged)
i += 2
else:
merged_ranges.append(ranges[i])
i += 1
return merged_ranges
def check_commit_message(self):
"""Checks if the "Commit-Message-has-tags" Gerrit label is unset."""
with self.m.step.nest("check commit tags"):
# If commit message tags are required for the repo, the label value
# will always be a non-null dict. The dict will be empty if the
# label is unset.
if (
self._gerrit_change()["labels"].get("Commit-Message-has-tags", None)
== {}
):
self.m.tricium.add_comment(
"Format/CommitTag",
MISSING_COMMIT_TAG_MESSAGE,
"",
)
@memoize
def _gerrit_change(self):
change = self.m.buildbucket.build.input.gerrit_changes[0]
details = self.m.gerrit.change_details(
name="get change details",
change_id=str(change.change),
# Retrieve full commit message for all revisions, since the patchset
# that triggered this build may not be the current (latest) patchset
# so we'll need to retrieve the commit message from an older
# patchset.
query_params=["ALL_COMMITS", "ALL_REVISIONS"],
host=change.host,
test_data=self.m.json.test_api.output(
{
"labels": {},
"current_revision": "123abc",
"revisions": {
"123abc": {
"_number": change.patchset,
"commit": {"message": "[foo] Add tests"},
}
},
}
),
).json.output
# Gerrit's "change details" endpoint doesn't support requesting a
# specific patchset, so the "current_revision" field will always point
# to the latest patchset available, even if it's newer than the
# patchset that triggered the current build. So make sure that we only
# look at the patchset that triggered this build.
for sha, revision in details["revisions"].items():
if revision["_number"] == change.patchset:
details["current_revision"] = sha
return details
def _commit_message(self):
change = self._gerrit_change()
current_revision = change["current_revision"]
return change["revisions"][current_revision]["commit"]["message"]