scripts/cleanup_deps.py - infra/recipes - Git at Google

 #!/usr/bin/env python3
 # Copyright 2020 The Fuchsia Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 """
 A hacky script to clean up recipe DEPS:
 - Delete unused recipe and recipe module DEPS. It uses a simple heuristic for
   this, which may produce some false positives and false negatives.
   OPTIONAL_TODO: Using the ast module would be a more reliable way of
   detecting dep uses, since this heuristic might find usages in docstrings
   and trailing comments.
 - Ensure all deps are prefixed with a repo name (e.g. "fuchsia").
 - Sort all DEPS lists alphabetically.
 """

 import json
 import os
 import re


 def main():
     cwd = os.getcwd()

     # Get the name of this recipes repo to append to any DEPS that don't
     # specify a repo.
     with open(os.path.join(cwd, "infra", "config", "recipes.cfg")) as f:
         cfg = json.load(f)
     repo_name = cfg["repo_name"]

     recipes_dir = os.path.join(cwd, "recipes")
     modules_dir = os.path.join(cwd, "recipe_modules")
     for directory in [recipes_dir, modules_dir]:
         for subdir, _, files in os.walk(directory):
             for relpath in files:
                 _, ext = os.path.splitext(relpath)
                 if ext != ".py":
                     continue
                 path = os.path.join(subdir, relpath)
                 # __init__.py files are handled separately, since they contain
                 # DEPS entries but usages of those deps may be in other files
                 # in the directory.
                 if relpath == "__init__.py":
                     continue
                 cleanup_recipe(path, repo_name)

     for relpath in os.listdir(modules_dir):
         path = os.path.join(modules_dir, relpath)
         if os.path.isdir(path):
             cleanup_module(path, repo_name)


 class FileWithDEPS(object):
     def __init__(self, path, repo_name):
         """Read the file and its DEPS."""
         self._path = path
         self._repo_name = repo_name

         # A mapping from full dep name (e.g. "fuchsia/foo") to a list of lines
         # that correspond to that dep - the actual import line that names the
         # dep, along with any comments preceding the import line. All lines
         # include any trailing newline.
         self.original_deps = {}
         # The original lines of the file, including trailing newlines.
         self.original_lines = []

         # The line number of the first dep.
         self._deps_start_line = -1
         # The line number of the line after the last dep.
         self._deps_end_line = -1

         with open(self._path) as f:
             self.original_lines = f.readlines()

         # The lines associated with the dep currently being parsed, including any
         # comment lines preceding the dep.
         current_dep_lines = []

         for i, original_line in enumerate(self.original_lines):
             line = original_line.strip()
             if line == "DEPS = [":
                 self._deps_start_line = i + 1
                 continue
             elif self._deps_start_line == -1:
                 continue
             elif line == "]":
                 self._deps_end_line = i
                 break

             if line.startswith("#"):
                 current_dep_lines.append(original_line)
                 continue

             match = re.search(r'([\'|"])(?P<dep>\S+)\1,?\s*(?P<comment>\#.*)?$', line)
             if not match:
                 current_dep_lines.append(original_line)
                 continue

             dep = match.group("dep")

             dep_line = original_line
             # Prepend the recipe repo name (assumed to be repo_name) if it's
             # missing.
             if "/" not in dep:
                 old_dep = dep
                 dep = "%s/%s" % (self._repo_name, old_dep)
                 dep_line = dep_line.replace(old_dep, dep, 1)

             current_dep_lines.append(dep_line)
             self.original_deps[dep] = "".join(current_dep_lines)
             current_dep_lines = []

     def update_deps(self, used_deps):
         """Remove unused DEPS and sort remaining DEPS.

         Also prepend the repo name to any deps that didn't have it; e.g.
         "foo" -> "fuchsia/foo".

         Args:
             used_deps (seq of str): The deps from this recipe that *are*
                 used. Can include repo name ("fuchsia/foo") or not ("foo").
         """
         new_deps = {}
         for dep, lines in self.original_deps.items():
             dep_basename = dep.split("/")[-1]
             if dep in used_deps or dep_basename in used_deps:
                 new_deps[dep] = lines

         sorted_dep_lines = [text for _, text in sorted(new_deps.items())]
         new_lines = self.original_lines[:]
         new_lines[self._deps_start_line : self._deps_end_line] = sorted_dep_lines

         if new_lines == self.original_lines:
             # Skip writing to disk since the DEPS don't need to be changed.
             return

         print("rewriting %s" % os.path.relpath(self._path, os.getcwd()))
         with open(self._path, "w") as f:
             f.writelines(new_lines)


 def cleanup_recipe(path, repo_name):
     """Removed unused DEPS from a recipe file.

     Args:
         path (str): The absolute path to the recipe Python file.
         repo_name (str): The name of the current recipes repo.
     """
     recipe_file = FileWithDEPS(path, repo_name)
     used_deps = set()
     for dep in recipe_file.original_deps:
         # This is a simple heuristic: for a dep "foo", consider it used if
         # "api.foo" is found anywhere in the file.
         dep_regex = re.compile(r"api\.{}\b".format(dep.split("/")[-1]))
         for line in recipe_file.original_lines:
             if line.strip().startswith("#"):
                 continue
             if dep_regex.search(line):
                 used_deps.add(dep)
                 break

     recipe_file.update_deps(used_deps)


 def cleanup_module(module_dir, repo_name):
     """Removed unused DEPS from a recipe module's __init__.py file.

     Args:
         module_dir (str): The absolute path to the root of the recipe module.
         repo_name (str): The name of the current recipes repo.
     """
     # We'll search for matches of this regex in each of this module's files as
     # a heuristic for determining which dependencies the module uses.
     usage_regex = re.compile(r"\b((self\.)?\b_?api|self\.m)\.(?P<dep>\w+)\b")
     init_file = FileWithDEPS(os.path.join(module_dir, "__init__.py"), repo_name)
     used_deps = set()
     for subdir, subdirs, files in os.walk(module_dir, topdown=True):
         # The "examples" directory contains standalone recipes that don't
         # relate to the recipe module's DEPS and shouldn't be taken into
         # account when computing the recipe module's unused DEPS. Likewise, the
         # "resources" directory contains standalone scripts that don't use
         # recipe DEPS at all.
         if subdir == module_dir:
             for special_subdir in ["examples", "resources"]:
                 if special_subdir in subdirs:
                     # Tell os.walk() not to enter this subdirectory.
                     subdirs.remove(special_subdir)

         for relpath in files:
             _, ext = os.path.splitext(relpath)
             if ext != ".py":
                 continue
             path = os.path.join(subdir, relpath)

             with open(path) as f:
                 lines = f.readlines()
             for line in lines:
                 if line.strip().startswith("#"):
                     continue
                 for match in usage_regex.finditer(line):
                     used_deps.add(match.group("dep"))

     init_file.update_deps(used_deps)


 if __name__ == "__main__":
     main()
	#!/usr/bin/env python3
	# Copyright 2020 The Fuchsia Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.
	"""
	A hacky script to clean up recipe DEPS:
	- Delete unused recipe and recipe module DEPS. It uses a simple heuristic for
	this, which may produce some false positives and false negatives.
	OPTIONAL_TODO: Using the ast module would be a more reliable way of
	detecting dep uses, since this heuristic might find usages in docstrings
	and trailing comments.
	- Ensure all deps are prefixed with a repo name (e.g. "fuchsia").
	- Sort all DEPS lists alphabetically.
	"""

	import json
	import os
	import re


	def main():
	cwd = os.getcwd()

	# Get the name of this recipes repo to append to any DEPS that don't
	# specify a repo.
	with open(os.path.join(cwd, "infra", "config", "recipes.cfg")) as f:
	cfg = json.load(f)
	repo_name = cfg["repo_name"]

	recipes_dir = os.path.join(cwd, "recipes")
	modules_dir = os.path.join(cwd, "recipe_modules")
	for directory in [recipes_dir, modules_dir]:
	for subdir, _, files in os.walk(directory):
	for relpath in files:
	_, ext = os.path.splitext(relpath)
	if ext != ".py":
	continue
	path = os.path.join(subdir, relpath)
	# __init__.py files are handled separately, since they contain
	# DEPS entries but usages of those deps may be in other files
	# in the directory.
	if relpath == "__init__.py":
	continue
	cleanup_recipe(path, repo_name)

	for relpath in os.listdir(modules_dir):
	path = os.path.join(modules_dir, relpath)
	if os.path.isdir(path):
	cleanup_module(path, repo_name)


	class FileWithDEPS(object):
	def __init__(self, path, repo_name):
	"""Read the file and its DEPS."""
	self._path = path
	self._repo_name = repo_name

	# A mapping from full dep name (e.g. "fuchsia/foo") to a list of lines
	# that correspond to that dep - the actual import line that names the
	# dep, along with any comments preceding the import line. All lines
	# include any trailing newline.
	self.original_deps = {}
	# The original lines of the file, including trailing newlines.
	self.original_lines = []

	# The line number of the first dep.
	self._deps_start_line = -1
	# The line number of the line after the last dep.
	self._deps_end_line = -1

	with open(self._path) as f:
	self.original_lines = f.readlines()

	# The lines associated with the dep currently being parsed, including any
	# comment lines preceding the dep.
	current_dep_lines = []

	for i, original_line in enumerate(self.original_lines):
	line = original_line.strip()
	if line == "DEPS = [":
	self._deps_start_line = i + 1
	continue
	elif self._deps_start_line == -1:
	continue
	elif line == "]":
	self._deps_end_line = i
	break

	if line.startswith("#"):
	current_dep_lines.append(original_line)
	continue

	match = re.search(r'([\'\|"])(?P<dep>\S+)\1,?\s(?P<comment>\#.)?$', line)
	if not match:
	current_dep_lines.append(original_line)
	continue

	dep = match.group("dep")

	dep_line = original_line
	# Prepend the recipe repo name (assumed to be repo_name) if it's
	# missing.
	if "/" not in dep:
	old_dep = dep
	dep = "%s/%s" % (self._repo_name, old_dep)
	dep_line = dep_line.replace(old_dep, dep, 1)

	current_dep_lines.append(dep_line)
	self.original_deps[dep] = "".join(current_dep_lines)
	current_dep_lines = []

	def update_deps(self, used_deps):
	"""Remove unused DEPS and sort remaining DEPS.

	Also prepend the repo name to any deps that didn't have it; e.g.
	"foo" -> "fuchsia/foo".

	Args:
	used_deps (seq of str): The deps from this recipe that are
	used. Can include repo name ("fuchsia/foo") or not ("foo").
	"""
	new_deps = {}
	for dep, lines in self.original_deps.items():
	dep_basename = dep.split("/")[-1]
	if dep in used_deps or dep_basename in used_deps:
	new_deps[dep] = lines

	sorted_dep_lines = [text for _, text in sorted(new_deps.items())]
	new_lines = self.original_lines[:]
	new_lines[self._deps_start_line : self._deps_end_line] = sorted_dep_lines

	if new_lines == self.original_lines:
	# Skip writing to disk since the DEPS don't need to be changed.
	return

	print("rewriting %s" % os.path.relpath(self._path, os.getcwd()))
	with open(self._path, "w") as f:
	f.writelines(new_lines)


	def cleanup_recipe(path, repo_name):
	"""Removed unused DEPS from a recipe file.

	Args:
	path (str): The absolute path to the recipe Python file.
	repo_name (str): The name of the current recipes repo.
	"""
	recipe_file = FileWithDEPS(path, repo_name)
	used_deps = set()
	for dep in recipe_file.original_deps:
	# This is a simple heuristic: for a dep "foo", consider it used if
	# "api.foo" is found anywhere in the file.
	dep_regex = re.compile(r"api\.{}\b".format(dep.split("/")[-1]))
	for line in recipe_file.original_lines:
	if line.strip().startswith("#"):
	continue
	if dep_regex.search(line):
	used_deps.add(dep)
	break

	recipe_file.update_deps(used_deps)


	def cleanup_module(module_dir, repo_name):
	"""Removed unused DEPS from a recipe module's __init__.py file.

	Args:
	module_dir (str): The absolute path to the root of the recipe module.
	repo_name (str): The name of the current recipes repo.
	"""
	# We'll search for matches of this regex in each of this module's files as
	# a heuristic for determining which dependencies the module uses.
	usage_regex = re.compile(r"\b((self\.)?\b_?api\|self\.m)\.(?P<dep>\w+)\b")
	init_file = FileWithDEPS(os.path.join(module_dir, "__init__.py"), repo_name)
	used_deps = set()
	for subdir, subdirs, files in os.walk(module_dir, topdown=True):
	# The "examples" directory contains standalone recipes that don't
	# relate to the recipe module's DEPS and shouldn't be taken into
	# account when computing the recipe module's unused DEPS. Likewise, the
	# "resources" directory contains standalone scripts that don't use
	# recipe DEPS at all.
	if subdir == module_dir:
	for special_subdir in ["examples", "resources"]:
	if special_subdir in subdirs:
	# Tell os.walk() not to enter this subdirectory.
	subdirs.remove(special_subdir)

	for relpath in files:
	_, ext = os.path.splitext(relpath)
	if ext != ".py":
	continue
	path = os.path.join(subdir, relpath)

	with open(path) as f:
	lines = f.readlines()
	for line in lines:
	if line.strip().startswith("#"):
	continue
	for match in usage_regex.finditer(line):
	used_deps.add(match.group("dep"))

	init_file.update_deps(used_deps)


	if __name__ == "__main__":
	main()