blob: 11811cc3ae55dbbcddc021b707a3707fc1d6db63 [file] [log] [blame] [edit]
#!/usr/bin/env python3
"""Produce a diff between mypy caches.
With some infrastructure, this can allow for distributing small cache diffs to users in
many cases instead of full cache artifacts.
"""
import argparse
import json
import os
import sys
from collections import defaultdict
from typing import Any, Dict, Optional, Set
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from mypy.metastore import FilesystemMetadataStore, MetadataStore, SqliteMetadataStore
def make_cache(input_dir: str, sqlite: bool) -> MetadataStore:
if sqlite:
return SqliteMetadataStore(input_dir)
else:
return FilesystemMetadataStore(input_dir)
def merge_deps(all: Dict[str, Set[str]], new: Dict[str, Set[str]]) -> None:
for k, v in new.items():
all.setdefault(k, set()).update(v)
def load(cache: MetadataStore, s: str) -> Any:
data = cache.read(s)
obj = json.loads(data)
if s.endswith(".meta.json"):
# For meta files, zero out the mtimes and sort the
# dependencies to avoid spurious conflicts
obj["mtime"] = 0
obj["data_mtime"] = 0
if "dependencies" in obj:
all_deps = obj["dependencies"] + obj["suppressed"]
num_deps = len(obj["dependencies"])
thing = list(zip(all_deps, obj["dep_prios"], obj["dep_lines"]))
def unzip(x: Any) -> Any:
return zip(*x) if x else ((), (), ())
obj["dependencies"], prios1, lines1 = unzip(sorted(thing[:num_deps]))
obj["suppressed"], prios2, lines2 = unzip(sorted(thing[num_deps:]))
obj["dep_prios"] = prios1 + prios2
obj["dep_lines"] = lines1 + lines2
if s.endswith(".deps.json"):
# For deps files, sort the deps to avoid spurious mismatches
for v in obj.values():
v.sort()
return obj
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument(
"--verbose", action="store_true", default=False, help="Increase verbosity"
)
parser.add_argument(
"--sqlite", action="store_true", default=False, help="Use a sqlite cache"
)
parser.add_argument("input_dir1", help="Input directory for the cache")
parser.add_argument("input_dir2", help="Input directory for the cache")
parser.add_argument("output", help="Output file")
args = parser.parse_args()
cache1 = make_cache(args.input_dir1, args.sqlite)
cache2 = make_cache(args.input_dir2, args.sqlite)
type_misses: Dict[str, int] = defaultdict(int)
type_hits: Dict[str, int] = defaultdict(int)
updates: Dict[str, Optional[str]] = {}
deps1: Dict[str, Set[str]] = {}
deps2: Dict[str, Set[str]] = {}
misses = hits = 0
cache1_all = list(cache1.list_all())
for s in cache1_all:
obj1 = load(cache1, s)
try:
obj2 = load(cache2, s)
except FileNotFoundError:
obj2 = None
typ = s.split(".")[-2]
if obj1 != obj2:
misses += 1
type_misses[typ] += 1
# Collect the dependencies instead of including them directly in the diff
# so we can produce a much smaller direct diff of them.
if ".deps." not in s:
if obj2 is not None:
updates[s] = json.dumps(obj2)
else:
updates[s] = None
elif obj2:
merge_deps(deps1, obj1)
merge_deps(deps2, obj2)
else:
hits += 1
type_hits[typ] += 1
cache1_all_set = set(cache1_all)
for s in cache2.list_all():
if s not in cache1_all_set:
updates[s] = cache2.read(s)
# Compute what deps have been added and merge them all into the
# @root deps file.
new_deps = {k: deps1.get(k, set()) - deps2.get(k, set()) for k in deps2}
new_deps = {k: v for k, v in new_deps.items() if v}
try:
root_deps = load(cache1, "@root.deps.json")
except FileNotFoundError:
root_deps = {}
merge_deps(new_deps, root_deps)
new_deps_json = {k: list(v) for k, v in new_deps.items() if v}
updates["@root.deps.json"] = json.dumps(new_deps_json)
# Drop updates to deps.meta.json for size reasons. The diff
# applier will manually fix it up.
updates.pop("./@deps.meta.json", None)
updates.pop("@deps.meta.json", None)
###
print("Generated incremental cache:", hits, "hits,", misses, "misses")
if args.verbose:
print("hits", type_hits)
print("misses", type_misses)
with open(args.output, "w") as f:
json.dump(updates, f)
if __name__ == "__main__":
main()