blob: edd46ab28f44a9a449df53dc24385ad84cd31f5c [file] [log] [blame]
#!/usr/bin/env python3
# Copyright 2020 The Fuchsia Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""
A hacky script to clean up recipe DEPS:
- Delete unused recipe and recipe module DEPS. It uses a simple heuristic for
this, which may produce some false positives and false negatives.
OPTIONAL_TODO: Using the ast module would be a more reliable way of
detecting dep uses, since this heuristic might find usages in docstrings
and trailing comments.
- Ensure all deps are prefixed with a repo name (e.g. "fuchsia").
- Sort all DEPS lists alphabetically.
"""
import argparse
import difflib
import functools
import json
import os
import re
import sys
def main():
parser = argparse.ArgumentParser(description="Sort and filter recipe DEPS")
parser.add_argument(
"--check",
action="store_true",
help=(
"Instead of writing changes, print diff to stdout and exit with retcode 1 "
"if cleanup is needed."
),
)
parser.add_argument(
"--json-output",
type=str,
help=(
"Write a JSON list of relative paths of badly formatted files to this file."
),
)
args = parser.parse_args()
cwd = os.getcwd()
# Get the name of this recipes repo to append to any DEPS that don't
# specify a repo.
with open(os.path.join(cwd, "infra", "config", "recipes.cfg")) as f:
cfg = json.load(f)
repo_name = cfg["repo_name"]
files_with_deps = []
recipes_dir = os.path.join(cwd, "recipes")
modules_dir = os.path.join(cwd, "recipe_modules")
for directory in [recipes_dir, modules_dir]:
for subdir, _, files in os.walk(directory):
for relpath in files:
_, ext = os.path.splitext(relpath)
if ext != ".py":
continue
path = os.path.join(subdir, relpath)
# __init__.py files are handled separately, since they contain
# DEPS entries but usages of those deps may be in other files
# in the directory.
if relpath == "__init__.py":
continue
files_with_deps.append(analyze_recipe(path, repo_name))
for relpath in os.listdir(modules_dir):
path = os.path.join(modules_dir, relpath)
if os.path.exists(os.path.join(path, "api.py")):
files_with_deps.append(analyze_module(path, repo_name))
files_to_fix = [f for f in files_with_deps if f.diff]
for f in files_to_fix:
if args.check:
script_relpath = os.path.relpath(__file__, cwd)
print(
"Some recipe files' DEPS are malformatted.\n"
"Run ./%s to fix:\n" % script_relpath
)
print(f.diff)
else:
f.rewrite()
if args.json_output:
with open(args.json_output, "w") as outfile:
json.dump([os.path.relpath(f.path, cwd) for f in files_to_fix], outfile)
if args.check and files_to_fix:
sys.exit(1)
class FileWithDEPS:
def __init__(self, path, repo_name):
"""Read the file and its DEPS."""
self.path = path
self._repo_name = repo_name
# To be updated by the caller after static analysis.
self.used_deps = set()
# Modules imported using `from RECIPE_MODULES.repo.module import ...`.
# Even though the recipe engine doesn't enforce it, we require that
# every directly-imported module be declared in the importer's DEPS so
# that DEPS are a complete representation of the recipe's or module's
# dependencies.
#
# To be updated by the caller after static analysis.
self.recipe_module_imports = set()
# A mapping from full dep name (e.g. "fuchsia/foo") to a list of lines
# that correspond to that dep - the actual import line that names the
# dep, along with any comments preceding the import line. All lines
# include any trailing newline.
self.original_deps = {}
# The original lines of the file, including trailing newlines.
self.original_lines = []
# The line number of the first dep.
self._deps_start_line = -1
# The line number of the line after the last dep.
self._deps_end_line = -1
with open(self.path) as f:
self.original_lines = f.readlines()
single_quote_count = double_quote_count = 0
# The lines associated with the dep currently being parsed, including any
# comment lines preceding the dep.
current_dep_lines = []
for i, original_line in enumerate(self.original_lines):
line = original_line.strip()
if line == "DEPS = [":
self._deps_start_line = i + 1
continue
elif self._deps_start_line == -1:
continue
elif line == "]":
self._deps_end_line = i
break
if line.startswith("#"):
current_dep_lines.append(original_line)
continue
match = re.search(
r'(?P<quote>[\'|"])(?P<dep>\S+)(?P=quote),?\s*(?P<comment>\#.*)?$',
line,
)
if not match:
current_dep_lines.append(original_line)
continue
if match.group("quote") == "'":
single_quote_count += 1
elif match.group("quote") == '"':
double_quote_count += 1
dep = match.group("dep")
dep_line = original_line
# Prepend the recipe repo name (assumed to be repo_name) if it's
# missing.
if "/" not in dep:
old_dep = dep
dep = "%s/%s" % (self._repo_name, old_dep)
dep_line = dep_line.replace(old_dep, dep, 1)
current_dep_lines.append(dep_line)
self.original_deps[dep] = "".join(current_dep_lines)
current_dep_lines = []
self._quote = '"'
if single_quote_count > double_quote_count:
self._quote = "'"
@property
@functools.lru_cache
def new_lines(self):
"""Returns a list of the lines of the file with formatting applied."""
new_deps = {
dep: f" {self._quote}{dep}{self._quote},\n"
for dep in self.recipe_module_imports
}
for dep, lines in self.original_deps.items():
dep_basename = dep.split("/")[-1]
if dep in self.used_deps or dep_basename in self.used_deps:
new_deps[dep] = lines
sorted_dep_lines = [text for _, text in sorted(new_deps.items())]
new_lines = self.original_lines[:]
new_lines[self._deps_start_line : self._deps_end_line] = sorted_dep_lines
return new_lines
@property
@functools.lru_cache
def diff(self):
"""Returns a git-style rendering of the diff that would be produced."""
return "".join(
difflib.unified_diff(
self.original_lines, self.new_lines, "a" + self.path, "b" + self.path
)
).rstrip()
def rewrite(self):
print("rewriting %s" % os.path.relpath(self.path, os.getcwd()))
with open(self.path, "w") as f:
f.writelines(self.new_lines)
def analyze_recipe(path, repo_name):
"""Check for unused DEPS in a recipe file.
Args:
path (str): The absolute path to the recipe Python file.
repo_name (str): The name of the current recipes repo.
"""
recipe_file = FileWithDEPS(path, repo_name)
for dep in recipe_file.original_deps:
# This is a simple heuristic: for a dep "foo", consider it used if
# "api.foo" is found anywhere in the file.
dep_regex = re.compile(r"api\.{}\b".format(dep.split("/")[-1]))
for line in recipe_file.original_lines:
if line.strip().startswith("#"):
continue
imported_module = parse_recipe_module_import(line)
if imported_module:
recipe_file.recipe_module_imports.add(imported_module)
if dep_regex.search(line):
recipe_file.used_deps.add(dep)
break
return recipe_file
def analyze_module(module_dir, repo_name):
"""Check for unused DEPS in a recipe module's __init__.py file.
Args:
module_dir (str): The absolute path to the root of the recipe module.
repo_name (str): The name of the current recipes repo.
"""
# We'll search for matches of this regex in each of this module's files as
# a heuristic for determining which dependencies the module uses.
usage_regex = re.compile(r"\b((self\.)?\b_?api|self\.m)\.(?P<dep>\w+)\b")
init_path = os.path.join(module_dir, "__init__.py")
module_name = os.path.basename(module_dir)
if not os.path.exists(init_path):
raise Exception("recipe module %s has no __init__.py file" % module_name)
init_file = FileWithDEPS(init_path, repo_name)
for subdir, subdirs, files in os.walk(module_dir, topdown=True):
# The "examples" directory contains standalone recipes that don't
# relate to the recipe module's DEPS and shouldn't be taken into
# account when computing the recipe module's unused DEPS. Likewise, the
# "resources" directory contains standalone scripts that don't use
# recipe DEPS at all.
if subdir == module_dir:
for special_subdir in ["examples", "resources"]:
if special_subdir in subdirs:
# Tell os.walk() not to enter this subdirectory.
subdirs.remove(special_subdir)
for relpath in files:
_, ext = os.path.splitext(relpath)
if ext != ".py":
continue
path = os.path.join(subdir, relpath)
with open(path) as f:
lines = f.readlines()
for line in lines:
if line.strip().startswith("#"):
continue
imported_module = parse_recipe_module_import(line)
if imported_module and imported_module != f"{repo_name}/{module_name}":
init_file.recipe_module_imports.add(imported_module)
for match in usage_regex.finditer(line):
init_file.used_deps.add(match.group("dep"))
return init_file
def parse_recipe_module_import(line):
match = re.match(r"from RECIPE_MODULES.(?P<repo>\w+).(?P<module>\w+) import", line)
if not match:
return None
return match.group("repo") + "/" + match.group("module")
if __name__ == "__main__":
main()