misc/diff-cache.py - third_party/github.com/python/mypy - Git at Google

 #!/usr/bin/env python3
 """Produce a diff between mypy caches.

 With some infrastructure, this can allow for distributing small cache diffs to users in
 many cases instead of full cache artifacts.
 """

 from __future__ import annotations

 import argparse
 import json
 import os
 import sys
 from collections import defaultdict
 from typing import Any

 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

 from mypy.metastore import FilesystemMetadataStore, MetadataStore, SqliteMetadataStore


 def make_cache(input_dir: str, sqlite: bool) -> MetadataStore:
     if sqlite:
         return SqliteMetadataStore(input_dir)
     else:
         return FilesystemMetadataStore(input_dir)


 def merge_deps(all: dict[str, set[str]], new: dict[str, set[str]]) -> None:
     for k, v in new.items():
         all.setdefault(k, set()).update(v)


 def load(cache: MetadataStore, s: str) -> Any:
     data = cache.read(s)
     obj = json.loads(data)
     if s.endswith(".meta.json"):
         # For meta files, zero out the mtimes and sort the
         # dependencies to avoid spurious conflicts
         obj["mtime"] = 0
         obj["data_mtime"] = 0
         if "dependencies" in obj:
             all_deps = obj["dependencies"] + obj["suppressed"]
             num_deps = len(obj["dependencies"])
             thing = list(zip(all_deps, obj["dep_prios"], obj["dep_lines"]))

             def unzip(x: Any) -> Any:
                 return zip(*x) if x else ((), (), ())

             obj["dependencies"], prios1, lines1 = unzip(sorted(thing[:num_deps]))
             obj["suppressed"], prios2, lines2 = unzip(sorted(thing[num_deps:]))
             obj["dep_prios"] = prios1 + prios2
             obj["dep_lines"] = lines1 + lines2
     if s.endswith(".deps.json"):
         # For deps files, sort the deps to avoid spurious mismatches
         for v in obj.values():
             v.sort()
     return obj


 def main() -> None:
     parser = argparse.ArgumentParser()
     parser.add_argument("--verbose", action="store_true", default=False, help="Increase verbosity")
     parser.add_argument("--sqlite", action="store_true", default=False, help="Use a sqlite cache")
     parser.add_argument("input_dir1", help="Input directory for the cache")
     parser.add_argument("input_dir2", help="Input directory for the cache")
     parser.add_argument("output", help="Output file")
     args = parser.parse_args()

     cache1 = make_cache(args.input_dir1, args.sqlite)
     cache2 = make_cache(args.input_dir2, args.sqlite)

     type_misses: dict[str, int] = defaultdict(int)
     type_hits: dict[str, int] = defaultdict(int)

     updates: dict[str, str | None] = {}

     deps1: dict[str, set[str]] = {}
     deps2: dict[str, set[str]] = {}

     misses = hits = 0
     cache1_all = list(cache1.list_all())
     for s in cache1_all:
         obj1 = load(cache1, s)
         try:
             obj2 = load(cache2, s)
         except FileNotFoundError:
             obj2 = None

         typ = s.split(".")[-2]
         if obj1 != obj2:
             misses += 1
             type_misses[typ] += 1

             # Collect the dependencies instead of including them directly in the diff
             # so we can produce a much smaller direct diff of them.
             if ".deps." not in s:
                 if obj2 is not None:
                     updates[s] = json.dumps(obj2)
                 else:
                     updates[s] = None
             elif obj2:
                 merge_deps(deps1, obj1)
                 merge_deps(deps2, obj2)
         else:
             hits += 1
             type_hits[typ] += 1

     cache1_all_set = set(cache1_all)
     for s in cache2.list_all():
         if s not in cache1_all_set:
             updates[s] = cache2.read(s)

     # Compute what deps have been added and merge them all into the
     # @root deps file.
     new_deps = {k: deps1.get(k, set()) - deps2.get(k, set()) for k in deps2}
     new_deps = {k: v for k, v in new_deps.items() if v}
     try:
         root_deps = load(cache1, "@root.deps.json")
     except FileNotFoundError:
         root_deps = {}
     merge_deps(new_deps, root_deps)

     new_deps_json = {k: list(v) for k, v in new_deps.items() if v}
     updates["@root.deps.json"] = json.dumps(new_deps_json)

     # Drop updates to deps.meta.json for size reasons. The diff
     # applier will manually fix it up.
     updates.pop("./@deps.meta.json", None)
     updates.pop("@deps.meta.json", None)

     ###

     print("Generated incremental cache:", hits, "hits,", misses, "misses")
     if args.verbose:
         print("hits", type_hits)
         print("misses", type_misses)

     with open(args.output, "w") as f:
         json.dump(updates, f)


 if __name__ == "__main__":
     main()
	#!/usr/bin/env python3
	"""Produce a diff between mypy caches.

	With some infrastructure, this can allow for distributing small cache diffs to users in
	many cases instead of full cache artifacts.
	"""

	from __future__ import annotations

	import argparse
	import json
	import os
	import sys
	from collections import defaultdict
	from typing import Any

	sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

	from mypy.metastore import FilesystemMetadataStore, MetadataStore, SqliteMetadataStore


	def make_cache(input_dir: str, sqlite: bool) -> MetadataStore:
	if sqlite:
	return SqliteMetadataStore(input_dir)
	else:
	return FilesystemMetadataStore(input_dir)


	def merge_deps(all: dict[str, set[str]], new: dict[str, set[str]]) -> None:
	for k, v in new.items():
	all.setdefault(k, set()).update(v)


	def load(cache: MetadataStore, s: str) -> Any:
	data = cache.read(s)
	obj = json.loads(data)
	if s.endswith(".meta.json"):
	# For meta files, zero out the mtimes and sort the
	# dependencies to avoid spurious conflicts
	obj["mtime"] = 0
	obj["data_mtime"] = 0
	if "dependencies" in obj:
	all_deps = obj["dependencies"] + obj["suppressed"]
	num_deps = len(obj["dependencies"])
	thing = list(zip(all_deps, obj["dep_prios"], obj["dep_lines"]))

	def unzip(x: Any) -> Any:
	return zip(*x) if x else ((), (), ())

	obj["dependencies"], prios1, lines1 = unzip(sorted(thing[:num_deps]))
	obj["suppressed"], prios2, lines2 = unzip(sorted(thing[num_deps:]))
	obj["dep_prios"] = prios1 + prios2
	obj["dep_lines"] = lines1 + lines2
	if s.endswith(".deps.json"):
	# For deps files, sort the deps to avoid spurious mismatches
	for v in obj.values():
	v.sort()
	return obj


	def main() -> None:
	parser = argparse.ArgumentParser()
	parser.add_argument("--verbose", action="store_true", default=False, help="Increase verbosity")
	parser.add_argument("--sqlite", action="store_true", default=False, help="Use a sqlite cache")
	parser.add_argument("input_dir1", help="Input directory for the cache")
	parser.add_argument("input_dir2", help="Input directory for the cache")
	parser.add_argument("output", help="Output file")
	args = parser.parse_args()

	cache1 = make_cache(args.input_dir1, args.sqlite)
	cache2 = make_cache(args.input_dir2, args.sqlite)

	type_misses: dict[str, int] = defaultdict(int)
	type_hits: dict[str, int] = defaultdict(int)

	updates: dict[str, str \| None] = {}

	deps1: dict[str, set[str]] = {}
	deps2: dict[str, set[str]] = {}

	misses = hits = 0
	cache1_all = list(cache1.list_all())
	for s in cache1_all:
	obj1 = load(cache1, s)
	try:
	obj2 = load(cache2, s)
	except FileNotFoundError:
	obj2 = None

	typ = s.split(".")[-2]
	if obj1 != obj2:
	misses += 1
	type_misses[typ] += 1

	# Collect the dependencies instead of including them directly in the diff
	# so we can produce a much smaller direct diff of them.
	if ".deps." not in s:
	if obj2 is not None:
	updates[s] = json.dumps(obj2)
	else:
	updates[s] = None
	elif obj2:
	merge_deps(deps1, obj1)
	merge_deps(deps2, obj2)
	else:
	hits += 1
	type_hits[typ] += 1

	cache1_all_set = set(cache1_all)
	for s in cache2.list_all():
	if s not in cache1_all_set:
	updates[s] = cache2.read(s)

	# Compute what deps have been added and merge them all into the
	# @root deps file.
	new_deps = {k: deps1.get(k, set()) - deps2.get(k, set()) for k in deps2}
	new_deps = {k: v for k, v in new_deps.items() if v}
	try:
	root_deps = load(cache1, "@root.deps.json")
	except FileNotFoundError:
	root_deps = {}
	merge_deps(new_deps, root_deps)

	new_deps_json = {k: list(v) for k, v in new_deps.items() if v}
	updates["@root.deps.json"] = json.dumps(new_deps_json)

	# Drop updates to deps.meta.json for size reasons. The diff
	# applier will manually fix it up.
	updates.pop("./@deps.meta.json", None)
	updates.pop("@deps.meta.json", None)

	###

	print("Generated incremental cache:", hits, "hits,", misses, "misses")
	if args.verbose:
	print("hits", type_hits)
	print("misses", type_misses)

	with open(args.output, "w") as f:
	json.dump(updates, f)


	if __name__ == "__main__":
	main()