| #!/usr/bin/env python3 |
| # Copyright 2020 The Fuchsia Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| """ |
| A hacky script to clean up recipe DEPS: |
| - Delete unused recipe and recipe module DEPS. It uses a simple heuristic for |
| this, which may produce some false positives and false negatives. |
| OPTIONAL_TODO: Using the ast module would be a more reliable way of |
| detecting dep uses, since this heuristic might find usages in docstrings |
| and trailing comments. |
| - Ensure all deps are prefixed with a repo name (e.g. "fuchsia"). |
| - Sort all DEPS lists alphabetically. |
| """ |
| |
| import argparse |
| import difflib |
| import functools |
| import json |
| import os |
| import re |
| import sys |
| |
| |
| def main(): |
| parser = argparse.ArgumentParser(description="Sort and filter recipe DEPS") |
| parser.add_argument( |
| "--check", |
| action="store_true", |
| help=( |
| "Instead of writing changes, print diff to stdout and exit with retcode 1 " |
| "if cleanup is needed." |
| ), |
| ) |
| parser.add_argument( |
| "--json-output", |
| type=str, |
| help=( |
| "Write a JSON list of relative paths of badly formatted files to this file." |
| ), |
| ) |
| args = parser.parse_args() |
| |
| cwd = os.getcwd() |
| |
| # Get the name of this recipes repo to append to any DEPS that don't |
| # specify a repo. |
| with open(os.path.join(cwd, "infra", "config", "recipes.cfg")) as f: |
| cfg = json.load(f) |
| repo_name = cfg["repo_name"] |
| |
| files_with_deps = [] |
| |
| recipes_dir = os.path.join(cwd, "recipes") |
| modules_dir = os.path.join(cwd, "recipe_modules") |
| for directory in [recipes_dir, modules_dir]: |
| for subdir, _, files in os.walk(directory): |
| for relpath in files: |
| _, ext = os.path.splitext(relpath) |
| if ext != ".py": |
| continue |
| path = os.path.join(subdir, relpath) |
| # __init__.py files are handled separately, since they contain |
| # DEPS entries but usages of those deps may be in other files |
| # in the directory. |
| if relpath == "__init__.py": |
| continue |
| files_with_deps.append(analyze_recipe(path, repo_name)) |
| |
| for relpath in os.listdir(modules_dir): |
| path = os.path.join(modules_dir, relpath) |
| if os.path.exists(os.path.join(path, "api.py")): |
| files_with_deps.append(analyze_module(path, repo_name)) |
| |
| files_to_fix = [f for f in files_with_deps if f.diff] |
| for f in files_to_fix: |
| if args.check: |
| script_relpath = os.path.relpath(__file__, cwd) |
| print( |
| "Some recipe files' DEPS are malformatted.\n" |
| "Run ./%s to fix:\n" % script_relpath |
| ) |
| print(f.diff) |
| else: |
| f.rewrite() |
| |
| if args.json_output: |
| with open(args.json_output, "w") as outfile: |
| json.dump([os.path.relpath(f.path, cwd) for f in files_to_fix], outfile) |
| |
| if args.check and files_to_fix: |
| sys.exit(1) |
| |
| |
| class FileWithDEPS: |
| def __init__(self, path, repo_name): |
| """Read the file and its DEPS.""" |
| self.path = path |
| self._repo_name = repo_name |
| |
| # To be updated by the caller after static analysis. |
| self.used_deps = set() |
| |
| # Modules imported using `from RECIPE_MODULES.repo.module import ...`. |
| # Even though the recipe engine doesn't enforce it, we require that |
| # every directly-imported module be declared in the importer's DEPS so |
| # that DEPS are a complete representation of the recipe's or module's |
| # dependencies. |
| # |
| # To be updated by the caller after static analysis. |
| self.recipe_module_imports = set() |
| |
| # A mapping from full dep name (e.g. "fuchsia/foo") to a list of lines |
| # that correspond to that dep - the actual import line that names the |
| # dep, along with any comments preceding the import line. All lines |
| # include any trailing newline. |
| self.original_deps = {} |
| # The original lines of the file, including trailing newlines. |
| self.original_lines = [] |
| |
| # The line number of the first dep. |
| self._deps_start_line = -1 |
| # The line number of the line after the last dep. |
| self._deps_end_line = -1 |
| |
| with open(self.path) as f: |
| self.original_lines = f.readlines() |
| |
| single_quote_count = double_quote_count = 0 |
| |
| # The lines associated with the dep currently being parsed, including any |
| # comment lines preceding the dep. |
| current_dep_lines = [] |
| |
| for i, original_line in enumerate(self.original_lines): |
| line = original_line.strip() |
| if line == "DEPS = [": |
| self._deps_start_line = i + 1 |
| continue |
| elif self._deps_start_line == -1: |
| continue |
| elif line == "]": |
| self._deps_end_line = i |
| break |
| |
| if line.startswith("#"): |
| current_dep_lines.append(original_line) |
| continue |
| |
| match = re.search( |
| r'(?P<quote>[\'|"])(?P<dep>\S+)(?P=quote),?\s*(?P<comment>\#.*)?$', |
| line, |
| ) |
| if not match: |
| current_dep_lines.append(original_line) |
| continue |
| |
| if match.group("quote") == "'": |
| single_quote_count += 1 |
| elif match.group("quote") == '"': |
| double_quote_count += 1 |
| |
| dep = match.group("dep") |
| |
| dep_line = original_line |
| # Prepend the recipe repo name (assumed to be repo_name) if it's |
| # missing. |
| if "/" not in dep: |
| old_dep = dep |
| dep = "%s/%s" % (self._repo_name, old_dep) |
| dep_line = dep_line.replace(old_dep, dep, 1) |
| |
| current_dep_lines.append(dep_line) |
| self.original_deps[dep] = "".join(current_dep_lines) |
| current_dep_lines = [] |
| |
| self._quote = '"' |
| if single_quote_count > double_quote_count: |
| self._quote = "'" |
| |
| @property |
| @functools.lru_cache |
| def new_lines(self): |
| """Returns a list of the lines of the file with formatting applied.""" |
| new_deps = { |
| dep: f" {self._quote}{dep}{self._quote},\n" |
| for dep in self.recipe_module_imports |
| } |
| for dep, lines in self.original_deps.items(): |
| dep_basename = dep.split("/")[-1] |
| if dep in self.used_deps or dep_basename in self.used_deps: |
| new_deps[dep] = lines |
| |
| sorted_dep_lines = [text for _, text in sorted(new_deps.items())] |
| new_lines = self.original_lines[:] |
| new_lines[self._deps_start_line : self._deps_end_line] = sorted_dep_lines |
| return new_lines |
| |
| @property |
| @functools.lru_cache |
| def diff(self): |
| """Returns a git-style rendering of the diff that would be produced.""" |
| return "".join( |
| difflib.unified_diff( |
| self.original_lines, self.new_lines, "a" + self.path, "b" + self.path |
| ) |
| ).rstrip() |
| |
| def rewrite(self): |
| print("rewriting %s" % os.path.relpath(self.path, os.getcwd())) |
| with open(self.path, "w") as f: |
| f.writelines(self.new_lines) |
| |
| |
| def analyze_recipe(path, repo_name): |
| """Check for unused DEPS in a recipe file. |
| |
| Args: |
| path (str): The absolute path to the recipe Python file. |
| repo_name (str): The name of the current recipes repo. |
| """ |
| recipe_file = FileWithDEPS(path, repo_name) |
| for dep in recipe_file.original_deps: |
| # This is a simple heuristic: for a dep "foo", consider it used if |
| # "api.foo" is found anywhere in the file. |
| dep_regex = re.compile(r"api\.{}\b".format(dep.split("/")[-1])) |
| for line in recipe_file.original_lines: |
| if line.strip().startswith("#"): |
| continue |
| imported_module = parse_recipe_module_import(line) |
| if imported_module: |
| recipe_file.recipe_module_imports.add(imported_module) |
| if dep_regex.search(line): |
| recipe_file.used_deps.add(dep) |
| break |
| |
| return recipe_file |
| |
| |
| def analyze_module(module_dir, repo_name): |
| """Check for unused DEPS in a recipe module's __init__.py file. |
| |
| Args: |
| module_dir (str): The absolute path to the root of the recipe module. |
| repo_name (str): The name of the current recipes repo. |
| """ |
| # We'll search for matches of this regex in each of this module's files as |
| # a heuristic for determining which dependencies the module uses. |
| usage_regex = re.compile(r"\b((self\.)?\b_?api|self\.m)\.(?P<dep>\w+)\b") |
| init_path = os.path.join(module_dir, "__init__.py") |
| module_name = os.path.basename(module_dir) |
| if not os.path.exists(init_path): |
| raise Exception("recipe module %s has no __init__.py file" % module_name) |
| init_file = FileWithDEPS(init_path, repo_name) |
| for subdir, subdirs, files in os.walk(module_dir, topdown=True): |
| # The "examples" directory contains standalone recipes that don't |
| # relate to the recipe module's DEPS and shouldn't be taken into |
| # account when computing the recipe module's unused DEPS. Likewise, the |
| # "resources" directory contains standalone scripts that don't use |
| # recipe DEPS at all. |
| if subdir == module_dir: |
| for special_subdir in ["examples", "resources"]: |
| if special_subdir in subdirs: |
| # Tell os.walk() not to enter this subdirectory. |
| subdirs.remove(special_subdir) |
| |
| for relpath in files: |
| _, ext = os.path.splitext(relpath) |
| if ext != ".py": |
| continue |
| path = os.path.join(subdir, relpath) |
| |
| with open(path) as f: |
| lines = f.readlines() |
| for line in lines: |
| if line.strip().startswith("#"): |
| continue |
| imported_module = parse_recipe_module_import(line) |
| if imported_module and imported_module != f"{repo_name}/{module_name}": |
| init_file.recipe_module_imports.add(imported_module) |
| for match in usage_regex.finditer(line): |
| init_file.used_deps.add(match.group("dep")) |
| |
| return init_file |
| |
| |
| def parse_recipe_module_import(line): |
| match = re.match(r"from RECIPE_MODULES.(?P<repo>\w+).(?P<module>\w+) import", line) |
| if not match: |
| return None |
| return match.group("repo") + "/" + match.group("module") |
| |
| |
| if __name__ == "__main__": |
| main() |