blob: fdefa96d5f4d30668559fb2c995032d6bd5d8147 [file] [log] [blame]
#!/usr/bin/env python3
# Copyright 2020 The Fuchsia Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""
A hacky script to clean up recipe DEPS:
- Delete unused recipe and recipe module DEPS. It uses a simple heuristic for
this, which may produce some false positives and false negatives.
OPTIONAL_TODO: Using the ast module would be a more reliable way of
detecting dep uses, since this heuristic might find usages in docstrings
and trailing comments.
- Ensure all deps are prefixed with a repo name (e.g. "fuchsia").
- Sort all DEPS lists alphabetically.
"""
import argparse
import difflib
import functools
import json
import os
import re
import sys
def main():
parser = argparse.ArgumentParser(description="Sort and filter recipe DEPS")
parser.add_argument(
"--check",
action="store_true",
help=(
"Instead of writing changes, print diff to stdout and exit with retcode 1 "
"if cleanup is needed."
),
)
parser.add_argument(
"--json-output",
type=str,
help=(
"Write a JSON list of relative paths of badly formatted files to this file."
),
)
args = parser.parse_args()
cwd = os.getcwd()
# Get the name of this recipes repo to append to any DEPS that don't
# specify a repo.
with open(os.path.join(cwd, "infra", "config", "recipes.cfg")) as f:
cfg = json.load(f)
repo_name = cfg["repo_name"]
files_with_deps = []
recipes_dir = os.path.join(cwd, "recipes")
modules_dir = os.path.join(cwd, "recipe_modules")
for directory in [recipes_dir, modules_dir]:
for subdir, _, files in os.walk(directory):
for relpath in files:
_, ext = os.path.splitext(relpath)
if ext != ".py":
continue
path = os.path.join(subdir, relpath)
# __init__.py files are handled separately, since they contain
# DEPS entries but usages of those deps may be in other files
# in the directory.
if relpath == "__init__.py":
continue
files_with_deps.append(analyze_recipe(path, repo_name))
for relpath in os.listdir(modules_dir):
path = os.path.join(modules_dir, relpath)
if os.path.exists(os.path.join(path, "api.py")):
files_with_deps.append(analyze_module(path, repo_name))
files_to_fix = [f for f in files_with_deps if f.diff]
for f in files_to_fix:
if args.check:
script_relpath = os.path.relpath(__file__, cwd)
print(
"Some recipe files' DEPS are malformatted.\n"
"Run ./%s to fix:\n" % script_relpath
)
print(f.diff)
else:
f.rewrite()
if args.json_output:
with open(args.json_output, "w") as outfile:
json.dump([os.path.relpath(f.path, cwd) for f in files_to_fix], outfile)
if args.check and files_to_fix:
sys.exit(1)
class FileWithDEPS(object):
def __init__(self, path, repo_name):
"""Read the file and its DEPS."""
self.path = path
self._repo_name = repo_name
# To be updated by the caller after static analysis.
self.used_deps = set()
# A mapping from full dep name (e.g. "fuchsia/foo") to a list of lines
# that correspond to that dep - the actual import line that names the
# dep, along with any comments preceding the import line. All lines
# include any trailing newline.
self.original_deps = {}
# The original lines of the file, including trailing newlines.
self.original_lines = []
# The line number of the first dep.
self._deps_start_line = -1
# The line number of the line after the last dep.
self._deps_end_line = -1
with open(self.path) as f:
self.original_lines = f.readlines()
# The lines associated with the dep currently being parsed, including any
# comment lines preceding the dep.
current_dep_lines = []
for i, original_line in enumerate(self.original_lines):
line = original_line.strip()
if line == "DEPS = [":
self._deps_start_line = i + 1
continue
elif self._deps_start_line == -1:
continue
elif line == "]":
self._deps_end_line = i
break
if line.startswith("#"):
current_dep_lines.append(original_line)
continue
match = re.search(r'([\'|"])(?P<dep>\S+)\1,?\s*(?P<comment>\#.*)?$', line)
if not match:
current_dep_lines.append(original_line)
continue
dep = match.group("dep")
dep_line = original_line
# Prepend the recipe repo name (assumed to be repo_name) if it's
# missing.
if "/" not in dep:
old_dep = dep
dep = "%s/%s" % (self._repo_name, old_dep)
dep_line = dep_line.replace(old_dep, dep, 1)
current_dep_lines.append(dep_line)
self.original_deps[dep] = "".join(current_dep_lines)
current_dep_lines = []
@property
@functools.lru_cache
def new_lines(self):
"""Returns a list of the lines of the file with formatting applied."""
new_deps = {}
for dep, lines in self.original_deps.items():
dep_basename = dep.split("/")[-1]
if dep in self.used_deps or dep_basename in self.used_deps:
new_deps[dep] = lines
sorted_dep_lines = [text for _, text in sorted(new_deps.items())]
new_lines = self.original_lines[:]
new_lines[self._deps_start_line : self._deps_end_line] = sorted_dep_lines
return new_lines
@property
@functools.lru_cache
def diff(self):
"""Returns a git-style rendering of the diff that would be produced."""
return "".join(
difflib.unified_diff(
self.original_lines, self.new_lines, "a" + self.path, "b" + self.path
)
).rstrip()
def rewrite(self):
print("rewriting %s" % os.path.relpath(self.path, os.getcwd()))
with open(self.path, "w") as f:
f.writelines(self.new_lines)
def analyze_recipe(path, repo_name):
"""Check for unused DEPS in a recipe file.
Args:
path (str): The absolute path to the recipe Python file.
repo_name (str): The name of the current recipes repo.
"""
recipe_file = FileWithDEPS(path, repo_name)
for dep in recipe_file.original_deps:
# This is a simple heuristic: for a dep "foo", consider it used if
# "api.foo" is found anywhere in the file.
dep_regex = re.compile(r"api\.{}\b".format(dep.split("/")[-1]))
for line in recipe_file.original_lines:
if line.strip().startswith("#"):
continue
if dep_regex.search(line):
recipe_file.used_deps.add(dep)
break
return recipe_file
def analyze_module(module_dir, repo_name):
"""Check for unused DEPS in a recipe module's __init__.py file.
Args:
module_dir (str): The absolute path to the root of the recipe module.
repo_name (str): The name of the current recipes repo.
"""
# We'll search for matches of this regex in each of this module's files as
# a heuristic for determining which dependencies the module uses.
usage_regex = re.compile(r"\b((self\.)?\b_?api|self\.m)\.(?P<dep>\w+)\b")
init_path = os.path.join(module_dir, "__init__.py")
if not os.path.exists(init_path):
raise Exception(
"recipe module %s has no __init__.py file" % (os.path.dirname(module_dir))
)
init_file = FileWithDEPS(init_path, repo_name)
for subdir, subdirs, files in os.walk(module_dir, topdown=True):
# The "examples" directory contains standalone recipes that don't
# relate to the recipe module's DEPS and shouldn't be taken into
# account when computing the recipe module's unused DEPS. Likewise, the
# "resources" directory contains standalone scripts that don't use
# recipe DEPS at all.
if subdir == module_dir:
for special_subdir in ["examples", "resources"]:
if special_subdir in subdirs:
# Tell os.walk() not to enter this subdirectory.
subdirs.remove(special_subdir)
for relpath in files:
_, ext = os.path.splitext(relpath)
if ext != ".py":
continue
path = os.path.join(subdir, relpath)
with open(path) as f:
lines = f.readlines()
for line in lines:
if line.strip().startswith("#"):
continue
for match in usage_regex.finditer(line):
init_file.used_deps.add(match.group("dep"))
return init_file
if __name__ == "__main__":
main()