misc/diff-cache.py - third_party/github.com/python/mypy - Git at Google

 #!/usr/bin/env python3
 """Produce a diff between mypy caches.

 With some infrastructure, this can allow for distributing small cache diffs to users in
 many cases instead of full cache artifacts.
 """

 import argparse
 import json
 import os
 import sys

 from collections import defaultdict
 from typing import Any, Dict, Optional, Set

 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

 from mypy.metastore import FilesystemMetadataStore, MetadataStore, SqliteMetadataStore


 def make_cache(input_dir: str, sqlite: bool) -> MetadataStore:
     if sqlite:
         return SqliteMetadataStore(input_dir)
     else:
         return FilesystemMetadataStore(input_dir)


 def merge_deps(all: Dict[str, Set[str]], new: Dict[str, Set[str]]) -> None:
     for k, v in new.items():
         all.setdefault(k, set()).update(v)


 def load(cache: MetadataStore, s: str) -> Any:
     data = cache.read(s)
     obj = json.loads(data)
     if s.endswith(".meta.json"):
         # For meta files, zero out the mtimes and sort the
         # dependencies to avoid spurious conflicts
         obj["mtime"] = 0
         obj["data_mtime"] = 0
         if "dependencies" in obj:
             all_deps = obj["dependencies"] + obj["suppressed"]
             num_deps = len(obj["dependencies"])
             thing = list(zip(all_deps, obj["dep_prios"], obj["dep_lines"]))

             def unzip(x: Any) -> Any:
                 return zip(*x) if x else ((), (), ())

             obj["dependencies"], prios1, lines1 = unzip(sorted(thing[:num_deps]))
             obj["suppressed"], prios2, lines2 = unzip(sorted(thing[num_deps:]))
             obj["dep_prios"] = prios1 + prios2
             obj["dep_lines"] = lines1 + lines2
     if s.endswith(".deps.json"):
         # For deps files, sort the deps to avoid spurious mismatches
         for v in obj.values():
             v.sort()
     return obj


 def main() -> None:
     parser = argparse.ArgumentParser()
     parser.add_argument(
         "--verbose", action="store_true", default=False, help="Increase verbosity"
     )
     parser.add_argument(
         "--sqlite", action="store_true", default=False, help="Use a sqlite cache"
     )
     parser.add_argument("input_dir1", help="Input directory for the cache")
     parser.add_argument("input_dir2", help="Input directory for the cache")
     parser.add_argument("output", help="Output file")
     args = parser.parse_args()

     cache1 = make_cache(args.input_dir1, args.sqlite)
     cache2 = make_cache(args.input_dir2, args.sqlite)

     type_misses: Dict[str, int] = defaultdict(int)
     type_hits: Dict[str, int] = defaultdict(int)

     updates: Dict[str, Optional[str]] = {}

     deps1: Dict[str, Set[str]] = {}
     deps2: Dict[str, Set[str]] = {}

     misses = hits = 0
     cache1_all = list(cache1.list_all())
     for s in cache1_all:
         obj1 = load(cache1, s)
         try:
             obj2 = load(cache2, s)
         except FileNotFoundError:
             obj2 = None

         typ = s.split(".")[-2]
         if obj1 != obj2:
             misses += 1
             type_misses[typ] += 1

             # Collect the dependencies instead of including them directly in the diff
             # so we can produce a much smaller direct diff of them.
             if ".deps." not in s:
                 if obj2 is not None:
                     updates[s] = json.dumps(obj2)
                 else:
                     updates[s] = None
             elif obj2:
                 merge_deps(deps1, obj1)
                 merge_deps(deps2, obj2)
         else:
             hits += 1
             type_hits[typ] += 1

     cache1_all_set = set(cache1_all)
     for s in cache2.list_all():
         if s not in cache1_all_set:
             updates[s] = cache2.read(s)

     # Compute what deps have been added and merge them all into the
     # @root deps file.
     new_deps = {k: deps1.get(k, set()) - deps2.get(k, set()) for k in deps2}
     new_deps = {k: v for k, v in new_deps.items() if v}
     try:
         root_deps = load(cache1, "@root.deps.json")
     except FileNotFoundError:
         root_deps = {}
     merge_deps(new_deps, root_deps)

     new_deps_json = {k: list(v) for k, v in new_deps.items() if v}
     updates["@root.deps.json"] = json.dumps(new_deps_json)

     # Drop updates to deps.meta.json for size reasons. The diff
     # applier will manually fix it up.
     updates.pop("./@deps.meta.json", None)
     updates.pop("@deps.meta.json", None)

     ###

     print("Generated incremental cache:", hits, "hits,", misses, "misses")
     if args.verbose:
         print("hits", type_hits)
         print("misses", type_misses)

     with open(args.output, "w") as f:
         json.dump(updates, f)


 if __name__ == "__main__":
     main()
	#!/usr/bin/env python3
	"""Produce a diff between mypy caches.

	With some infrastructure, this can allow for distributing small cache diffs to users in
	many cases instead of full cache artifacts.
	"""

	import argparse
	import json
	import os
	import sys

	from collections import defaultdict
	from typing import Any, Dict, Optional, Set

	sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

	from mypy.metastore import FilesystemMetadataStore, MetadataStore, SqliteMetadataStore


	def make_cache(input_dir: str, sqlite: bool) -> MetadataStore:
	if sqlite:
	return SqliteMetadataStore(input_dir)
	else:
	return FilesystemMetadataStore(input_dir)


	def merge_deps(all: Dict[str, Set[str]], new: Dict[str, Set[str]]) -> None:
	for k, v in new.items():
	all.setdefault(k, set()).update(v)


	def load(cache: MetadataStore, s: str) -> Any:
	data = cache.read(s)
	obj = json.loads(data)
	if s.endswith(".meta.json"):
	# For meta files, zero out the mtimes and sort the
	# dependencies to avoid spurious conflicts
	obj["mtime"] = 0
	obj["data_mtime"] = 0
	if "dependencies" in obj:
	all_deps = obj["dependencies"] + obj["suppressed"]
	num_deps = len(obj["dependencies"])
	thing = list(zip(all_deps, obj["dep_prios"], obj["dep_lines"]))

	def unzip(x: Any) -> Any:
	return zip(*x) if x else ((), (), ())

	obj["dependencies"], prios1, lines1 = unzip(sorted(thing[:num_deps]))
	obj["suppressed"], prios2, lines2 = unzip(sorted(thing[num_deps:]))
	obj["dep_prios"] = prios1 + prios2
	obj["dep_lines"] = lines1 + lines2
	if s.endswith(".deps.json"):
	# For deps files, sort the deps to avoid spurious mismatches
	for v in obj.values():
	v.sort()
	return obj


	def main() -> None:
	parser = argparse.ArgumentParser()
	parser.add_argument(
	"--verbose", action="store_true", default=False, help="Increase verbosity"
	)
	parser.add_argument(
	"--sqlite", action="store_true", default=False, help="Use a sqlite cache"
	)
	parser.add_argument("input_dir1", help="Input directory for the cache")
	parser.add_argument("input_dir2", help="Input directory for the cache")
	parser.add_argument("output", help="Output file")
	args = parser.parse_args()

	cache1 = make_cache(args.input_dir1, args.sqlite)
	cache2 = make_cache(args.input_dir2, args.sqlite)

	type_misses: Dict[str, int] = defaultdict(int)
	type_hits: Dict[str, int] = defaultdict(int)

	updates: Dict[str, Optional[str]] = {}

	deps1: Dict[str, Set[str]] = {}
	deps2: Dict[str, Set[str]] = {}

	misses = hits = 0
	cache1_all = list(cache1.list_all())
	for s in cache1_all:
	obj1 = load(cache1, s)
	try:
	obj2 = load(cache2, s)
	except FileNotFoundError:
	obj2 = None

	typ = s.split(".")[-2]
	if obj1 != obj2:
	misses += 1
	type_misses[typ] += 1

	# Collect the dependencies instead of including them directly in the diff
	# so we can produce a much smaller direct diff of them.
	if ".deps." not in s:
	if obj2 is not None:
	updates[s] = json.dumps(obj2)
	else:
	updates[s] = None
	elif obj2:
	merge_deps(deps1, obj1)
	merge_deps(deps2, obj2)
	else:
	hits += 1
	type_hits[typ] += 1

	cache1_all_set = set(cache1_all)
	for s in cache2.list_all():
	if s not in cache1_all_set:
	updates[s] = cache2.read(s)

	# Compute what deps have been added and merge them all into the
	# @root deps file.
	new_deps = {k: deps1.get(k, set()) - deps2.get(k, set()) for k in deps2}
	new_deps = {k: v for k, v in new_deps.items() if v}
	try:
	root_deps = load(cache1, "@root.deps.json")
	except FileNotFoundError:
	root_deps = {}
	merge_deps(new_deps, root_deps)

	new_deps_json = {k: list(v) for k, v in new_deps.items() if v}
	updates["@root.deps.json"] = json.dumps(new_deps_json)

	# Drop updates to deps.meta.json for size reasons. The diff
	# applier will manually fix it up.
	updates.pop("./@deps.meta.json", None)
	updates.pop("@deps.meta.json", None)

	###

	print("Generated incremental cache:", hits, "hits,", misses, "misses")
	if args.verbose:
	print("hits", type_hits)
	print("misses", type_misses)

	with open(args.output, "w") as f:
	json.dump(updates, f)


	if __name__ == "__main__":
	main()