scripts/cleanup_deps.py - infra/recipes.git - Git at Google

 #!/usr/bin/env python3
 # Copyright 2020 The Fuchsia Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 """
 A hacky script to clean up recipe DEPS:
 - Delete unused recipe and recipe module DEPS. It uses a simple heuristic for
   this, which may produce some false positives and false negatives.
   OPTIONAL_TODO: Using the ast module would be a more reliable way of
   detecting dep uses, since this heuristic might find usages in docstrings
   and trailing comments.
 - Ensure all deps are prefixed with a repo name (e.g. "fuchsia").
 - Sort all DEPS lists alphabetically.
 """

 import argparse
 import difflib
 import functools
 import json
 import os
 import re
 import sys


 def main():
     parser = argparse.ArgumentParser(description="Sort and filter recipe DEPS")
     parser.add_argument(
         "--check",
         action="store_true",
         help=(
             "Instead of writing changes, print diff to stdout and exit with retcode 1 "
             "if cleanup is needed."
         ),
     )
     parser.add_argument(
         "--json-output",
         type=str,
         help=(
             "Write a JSON list of relative paths of badly formatted files to this file."
         ),
     )
     args = parser.parse_args()

     cwd = os.getcwd()

     # Get the name of this recipes repo to append to any DEPS that don't
     # specify a repo.
     with open(os.path.join(cwd, "infra", "config", "recipes.cfg")) as f:
         cfg = json.load(f)
     repo_name = cfg["repo_name"]

     files_with_deps = []

     recipes_dir = os.path.join(cwd, "recipes")
     modules_dir = os.path.join(cwd, "recipe_modules")
     for directory in [recipes_dir, modules_dir]:
         for subdir, _, files in os.walk(directory):
             for relpath in files:
                 _, ext = os.path.splitext(relpath)
                 if ext != ".py":
                     continue
                 path = os.path.join(subdir, relpath)
                 # __init__.py files are handled separately, since they contain
                 # DEPS entries but usages of those deps may be in other files
                 # in the directory.
                 if relpath == "__init__.py":
                     continue
                 files_with_deps.append(analyze_recipe(path, repo_name))

     for relpath in os.listdir(modules_dir):
         path = os.path.join(modules_dir, relpath)
         if os.path.exists(os.path.join(path, "api.py")):
             files_with_deps.append(analyze_module(path, repo_name))

     files_to_fix = [f for f in files_with_deps if f.diff]
     for f in files_to_fix:
         if args.check:
             script_relpath = os.path.relpath(__file__, cwd)
             print(
                 "Some recipe files' DEPS are malformatted.\n"
                 "Run ./%s to fix:\n" % script_relpath
             )
             print(f.diff)
         else:
             f.rewrite()

     if args.json_output:
         with open(args.json_output, "w") as outfile:
             json.dump([os.path.relpath(f.path, cwd) for f in files_to_fix], outfile)

     if args.check and files_to_fix:
         sys.exit(1)


 class FileWithDEPS:
     def __init__(self, path, repo_name):
         """Read the file and its DEPS."""
         self.path = path
         self._repo_name = repo_name

         # To be updated by the caller after static analysis.
         self.used_deps = set()

         # Modules imported using `from RECIPE_MODULES.repo.module import ...`.
         # Even though the recipe engine doesn't enforce it, we require that
         # every directly-imported module be declared in the importer's DEPS so
         # that DEPS are a complete representation of the recipe's or module's
         # dependencies.
         #
         # To be updated by the caller after static analysis.
         self.recipe_module_imports = set()

         # A mapping from full dep name (e.g. "fuchsia/foo") to a list of lines
         # that correspond to that dep - the actual import line that names the
         # dep, along with any comments preceding the import line. All lines
         # include any trailing newline.
         self.original_deps = {}
         # The original lines of the file, including trailing newlines.
         self.original_lines = []

         # The line number of the first dep.
         self._deps_start_line = -1
         # The line number of the line after the last dep.
         self._deps_end_line = -1

         with open(self.path) as f:
             self.original_lines = f.readlines()

         single_quote_count = double_quote_count = 0

         # The lines associated with the dep currently being parsed, including any
         # comment lines preceding the dep.
         current_dep_lines = []

         for i, original_line in enumerate(self.original_lines):
             line = original_line.strip()
             if line == "DEPS = [":
                 self._deps_start_line = i + 1
                 continue
             elif self._deps_start_line == -1:
                 continue
             elif line == "]":
                 self._deps_end_line = i
                 break

             if line.startswith("#"):
                 current_dep_lines.append(original_line)
                 continue

             match = re.search(
                 r'(?P<quote>[\'|"])(?P<dep>\S+)(?P=quote),?\s*(?P<comment>\#.*)?$',
                 line,
             )
             if not match:
                 current_dep_lines.append(original_line)
                 continue

             if match.group("quote") == "'":
                 single_quote_count += 1
             elif match.group("quote") == '"':
                 double_quote_count += 1

             dep = match.group("dep")

             dep_line = original_line
             # Prepend the recipe repo name (assumed to be repo_name) if it's
             # missing.
             if "/" not in dep:
                 old_dep = dep
                 dep = "%s/%s" % (self._repo_name, old_dep)
                 dep_line = dep_line.replace(old_dep, dep, 1)

             current_dep_lines.append(dep_line)
             self.original_deps[dep] = "".join(current_dep_lines)
             current_dep_lines = []

         self._quote = '"'
         if single_quote_count > double_quote_count:
             self._quote = "'"

     @property
     @functools.lru_cache
     def new_lines(self):
         """Returns a list of the lines of the file with formatting applied."""
         new_deps = {
             dep: f"    {self._quote}{dep}{self._quote},\n"
             for dep in self.recipe_module_imports
         }
         for dep, lines in self.original_deps.items():
             dep_basename = dep.split("/")[-1]
             if dep in self.used_deps or dep_basename in self.used_deps:
                 new_deps[dep] = lines

         sorted_dep_lines = [text for _, text in sorted(new_deps.items())]
         new_lines = self.original_lines[:]
         new_lines[self._deps_start_line : self._deps_end_line] = sorted_dep_lines
         return new_lines

     @property
     @functools.lru_cache
     def diff(self):
         """Returns a git-style rendering of the diff that would be produced."""
         return "".join(
             difflib.unified_diff(
                 self.original_lines, self.new_lines, "a" + self.path, "b" + self.path
             )
         ).rstrip()

     def rewrite(self):
         print("rewriting %s" % os.path.relpath(self.path, os.getcwd()))
         with open(self.path, "w") as f:
             f.writelines(self.new_lines)


 def analyze_recipe(path, repo_name):
     """Check for unused DEPS in a recipe file.

     Args:
         path (str): The absolute path to the recipe Python file.
         repo_name (str): The name of the current recipes repo.
     """
     recipe_file = FileWithDEPS(path, repo_name)
     for dep in recipe_file.original_deps:
         # This is a simple heuristic: for a dep "foo", consider it used if
         # "api.foo" is found anywhere in the file.
         dep_regex = re.compile(r"api\.{}\b".format(dep.split("/")[-1]))
         for line in recipe_file.original_lines:
             if line.strip().startswith("#"):
                 continue
             imported_module = parse_recipe_module_import(line)
             if imported_module:
                 recipe_file.recipe_module_imports.add(imported_module)
             if dep_regex.search(line):
                 recipe_file.used_deps.add(dep)
                 break

     return recipe_file


 def analyze_module(module_dir, repo_name):
     """Check for unused DEPS in a recipe module's __init__.py file.

     Args:
         module_dir (str): The absolute path to the root of the recipe module.
         repo_name (str): The name of the current recipes repo.
     """
     # We'll search for matches of this regex in each of this module's files as
     # a heuristic for determining which dependencies the module uses.
     usage_regex = re.compile(r"\b((self\.)?\b_?api|self\.m)\.(?P<dep>\w+)\b")
     init_path = os.path.join(module_dir, "__init__.py")
     module_name = os.path.basename(module_dir)
     if not os.path.exists(init_path):
         raise Exception("recipe module %s has no __init__.py file" % module_name)
     init_file = FileWithDEPS(init_path, repo_name)
     for subdir, subdirs, files in os.walk(module_dir, topdown=True):
         # The "examples" directory contains standalone recipes that don't
         # relate to the recipe module's DEPS and shouldn't be taken into
         # account when computing the recipe module's unused DEPS. Likewise, the
         # "resources" directory contains standalone scripts that don't use
         # recipe DEPS at all.
         if subdir == module_dir:
             for special_subdir in ["examples", "resources"]:
                 if special_subdir in subdirs:
                     # Tell os.walk() not to enter this subdirectory.
                     subdirs.remove(special_subdir)

         for relpath in files:
             _, ext = os.path.splitext(relpath)
             if ext != ".py":
                 continue
             path = os.path.join(subdir, relpath)

             with open(path) as f:
                 lines = f.readlines()
             for line in lines:
                 if line.strip().startswith("#"):
                     continue
                 imported_module = parse_recipe_module_import(line)
                 if imported_module and imported_module != f"{repo_name}/{module_name}":
                     init_file.recipe_module_imports.add(imported_module)
                 for match in usage_regex.finditer(line):
                     init_file.used_deps.add(match.group("dep"))

     return init_file


 def parse_recipe_module_import(line):
     match = re.match(r"from RECIPE_MODULES.(?P<repo>\w+).(?P<module>\w+) import", line)
     if not match:
         return None
     return match.group("repo") + "/" + match.group("module")


 if __name__ == "__main__":
     main()
	#!/usr/bin/env python3
	# Copyright 2020 The Fuchsia Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.
	"""
	A hacky script to clean up recipe DEPS:
	- Delete unused recipe and recipe module DEPS. It uses a simple heuristic for
	this, which may produce some false positives and false negatives.
	OPTIONAL_TODO: Using the ast module would be a more reliable way of
	detecting dep uses, since this heuristic might find usages in docstrings
	and trailing comments.
	- Ensure all deps are prefixed with a repo name (e.g. "fuchsia").
	- Sort all DEPS lists alphabetically.
	"""

	import argparse
	import difflib
	import functools
	import json
	import os
	import re
	import sys


	def main():
	parser = argparse.ArgumentParser(description="Sort and filter recipe DEPS")
	parser.add_argument(
	"--check",
	action="store_true",
	help=(
	"Instead of writing changes, print diff to stdout and exit with retcode 1 "
	"if cleanup is needed."
	),
	)
	parser.add_argument(
	"--json-output",
	type=str,
	help=(
	"Write a JSON list of relative paths of badly formatted files to this file."
	),
	)
	args = parser.parse_args()

	cwd = os.getcwd()

	# Get the name of this recipes repo to append to any DEPS that don't
	# specify a repo.
	with open(os.path.join(cwd, "infra", "config", "recipes.cfg")) as f:
	cfg = json.load(f)
	repo_name = cfg["repo_name"]

	files_with_deps = []

	recipes_dir = os.path.join(cwd, "recipes")
	modules_dir = os.path.join(cwd, "recipe_modules")
	for directory in [recipes_dir, modules_dir]:
	for subdir, _, files in os.walk(directory):
	for relpath in files:
	_, ext = os.path.splitext(relpath)
	if ext != ".py":
	continue
	path = os.path.join(subdir, relpath)
	# __init__.py files are handled separately, since they contain
	# DEPS entries but usages of those deps may be in other files
	# in the directory.
	if relpath == "__init__.py":
	continue
	files_with_deps.append(analyze_recipe(path, repo_name))

	for relpath in os.listdir(modules_dir):
	path = os.path.join(modules_dir, relpath)
	if os.path.exists(os.path.join(path, "api.py")):
	files_with_deps.append(analyze_module(path, repo_name))

	files_to_fix = [f for f in files_with_deps if f.diff]
	for f in files_to_fix:
	if args.check:
	script_relpath = os.path.relpath(__file__, cwd)
	print(
	"Some recipe files' DEPS are malformatted.\n"
	"Run ./%s to fix:\n" % script_relpath
	)
	print(f.diff)
	else:
	f.rewrite()

	if args.json_output:
	with open(args.json_output, "w") as outfile:
	json.dump([os.path.relpath(f.path, cwd) for f in files_to_fix], outfile)

	if args.check and files_to_fix:
	sys.exit(1)


	class FileWithDEPS:
	def __init__(self, path, repo_name):
	"""Read the file and its DEPS."""
	self.path = path
	self._repo_name = repo_name

	# To be updated by the caller after static analysis.
	self.used_deps = set()

	# Modules imported using `from RECIPE_MODULES.repo.module import ...`.
	# Even though the recipe engine doesn't enforce it, we require that
	# every directly-imported module be declared in the importer's DEPS so
	# that DEPS are a complete representation of the recipe's or module's
	# dependencies.
	#
	# To be updated by the caller after static analysis.
	self.recipe_module_imports = set()

	# A mapping from full dep name (e.g. "fuchsia/foo") to a list of lines
	# that correspond to that dep - the actual import line that names the
	# dep, along with any comments preceding the import line. All lines
	# include any trailing newline.
	self.original_deps = {}
	# The original lines of the file, including trailing newlines.
	self.original_lines = []

	# The line number of the first dep.
	self._deps_start_line = -1
	# The line number of the line after the last dep.
	self._deps_end_line = -1

	with open(self.path) as f:
	self.original_lines = f.readlines()

	single_quote_count = double_quote_count = 0

	# The lines associated with the dep currently being parsed, including any
	# comment lines preceding the dep.
	current_dep_lines = []

	for i, original_line in enumerate(self.original_lines):
	line = original_line.strip()
	if line == "DEPS = [":
	self._deps_start_line = i + 1
	continue
	elif self._deps_start_line == -1:
	continue
	elif line == "]":
	self._deps_end_line = i
	break

	if line.startswith("#"):
	current_dep_lines.append(original_line)
	continue

	match = re.search(
	r'(?P<quote>[\'\|"])(?P<dep>\S+)(?P=quote),?\s(?P<comment>\#.)?$',
	line,
	)
	if not match:
	current_dep_lines.append(original_line)
	continue

	if match.group("quote") == "'":
	single_quote_count += 1
	elif match.group("quote") == '"':
	double_quote_count += 1

	dep = match.group("dep")

	dep_line = original_line
	# Prepend the recipe repo name (assumed to be repo_name) if it's
	# missing.
	if "/" not in dep:
	old_dep = dep
	dep = "%s/%s" % (self._repo_name, old_dep)
	dep_line = dep_line.replace(old_dep, dep, 1)

	current_dep_lines.append(dep_line)
	self.original_deps[dep] = "".join(current_dep_lines)
	current_dep_lines = []

	self._quote = '"'
	if single_quote_count > double_quote_count:
	self._quote = "'"

	@property
	@functools.lru_cache
	def new_lines(self):
	"""Returns a list of the lines of the file with formatting applied."""
	new_deps = {
	dep: f" {self._quote}{dep}{self._quote},\n"
	for dep in self.recipe_module_imports
	}
	for dep, lines in self.original_deps.items():
	dep_basename = dep.split("/")[-1]
	if dep in self.used_deps or dep_basename in self.used_deps:
	new_deps[dep] = lines

	sorted_dep_lines = [text for _, text in sorted(new_deps.items())]
	new_lines = self.original_lines[:]
	new_lines[self._deps_start_line : self._deps_end_line] = sorted_dep_lines
	return new_lines

	@property
	@functools.lru_cache
	def diff(self):
	"""Returns a git-style rendering of the diff that would be produced."""
	return "".join(
	difflib.unified_diff(
	self.original_lines, self.new_lines, "a" + self.path, "b" + self.path
	)
	).rstrip()

	def rewrite(self):
	print("rewriting %s" % os.path.relpath(self.path, os.getcwd()))
	with open(self.path, "w") as f:
	f.writelines(self.new_lines)


	def analyze_recipe(path, repo_name):
	"""Check for unused DEPS in a recipe file.

	Args:
	path (str): The absolute path to the recipe Python file.
	repo_name (str): The name of the current recipes repo.
	"""
	recipe_file = FileWithDEPS(path, repo_name)
	for dep in recipe_file.original_deps:
	# This is a simple heuristic: for a dep "foo", consider it used if
	# "api.foo" is found anywhere in the file.
	dep_regex = re.compile(r"api\.{}\b".format(dep.split("/")[-1]))
	for line in recipe_file.original_lines:
	if line.strip().startswith("#"):
	continue
	imported_module = parse_recipe_module_import(line)
	if imported_module:
	recipe_file.recipe_module_imports.add(imported_module)
	if dep_regex.search(line):
	recipe_file.used_deps.add(dep)
	break

	return recipe_file


	def analyze_module(module_dir, repo_name):
	"""Check for unused DEPS in a recipe module's __init__.py file.

	Args:
	module_dir (str): The absolute path to the root of the recipe module.
	repo_name (str): The name of the current recipes repo.
	"""
	# We'll search for matches of this regex in each of this module's files as
	# a heuristic for determining which dependencies the module uses.
	usage_regex = re.compile(r"\b((self\.)?\b_?api\|self\.m)\.(?P<dep>\w+)\b")
	init_path = os.path.join(module_dir, "__init__.py")
	module_name = os.path.basename(module_dir)
	if not os.path.exists(init_path):
	raise Exception("recipe module %s has no __init__.py file" % module_name)
	init_file = FileWithDEPS(init_path, repo_name)
	for subdir, subdirs, files in os.walk(module_dir, topdown=True):
	# The "examples" directory contains standalone recipes that don't
	# relate to the recipe module's DEPS and shouldn't be taken into
	# account when computing the recipe module's unused DEPS. Likewise, the
	# "resources" directory contains standalone scripts that don't use
	# recipe DEPS at all.
	if subdir == module_dir:
	for special_subdir in ["examples", "resources"]:
	if special_subdir in subdirs:
	# Tell os.walk() not to enter this subdirectory.
	subdirs.remove(special_subdir)

	for relpath in files:
	_, ext = os.path.splitext(relpath)
	if ext != ".py":
	continue
	path = os.path.join(subdir, relpath)

	with open(path) as f:
	lines = f.readlines()
	for line in lines:
	if line.strip().startswith("#"):
	continue
	imported_module = parse_recipe_module_import(line)
	if imported_module and imported_module != f"{repo_name}/{module_name}":
	init_file.recipe_module_imports.add(imported_module)
	for match in usage_regex.finditer(line):
	init_file.used_deps.add(match.group("dep"))

	return init_file


	def parse_recipe_module_import(line):
	match = re.match(r"from RECIPE_MODULES.(?P<repo>\w+).(?P<module>\w+) import", line)
	if not match:
	return None
	return match.group("repo") + "/" + match.group("module")


	if __name__ == "__main__":
	main()