blob: 15d3e5a83983a4514a2925dd3e7d8329c61dea6f [file] [log] [blame]
#!/usr/bin/env python3
"""Produce a diff between mypy caches.
With some infrastructure, this can allow for distributing small cache diffs to users in
many cases instead of full cache artifacts.
"""
from __future__ import annotations
import argparse
import json
import os
import sys
from collections import defaultdict
from typing import Any
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from mypy.metastore import FilesystemMetadataStore, MetadataStore, SqliteMetadataStore
def make_cache(input_dir: str, sqlite: bool) -> MetadataStore:
if sqlite:
return SqliteMetadataStore(input_dir)
else:
return FilesystemMetadataStore(input_dir)
def merge_deps(all: dict[str, set[str]], new: dict[str, set[str]]) -> None:
for k, v in new.items():
all.setdefault(k, set()).update(v)
def load(cache: MetadataStore, s: str) -> Any:
data = cache.read(s)
obj = json.loads(data)
if s.endswith(".meta.json"):
# For meta files, zero out the mtimes and sort the
# dependencies to avoid spurious conflicts
obj["mtime"] = 0
obj["data_mtime"] = 0
if "dependencies" in obj:
all_deps = obj["dependencies"] + obj["suppressed"]
num_deps = len(obj["dependencies"])
thing = list(zip(all_deps, obj["dep_prios"], obj["dep_lines"]))
def unzip(x: Any) -> Any:
return zip(*x) if x else ((), (), ())
obj["dependencies"], prios1, lines1 = unzip(sorted(thing[:num_deps]))
obj["suppressed"], prios2, lines2 = unzip(sorted(thing[num_deps:]))
obj["dep_prios"] = prios1 + prios2
obj["dep_lines"] = lines1 + lines2
if s.endswith(".deps.json"):
# For deps files, sort the deps to avoid spurious mismatches
for v in obj.values():
v.sort()
return obj
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--verbose", action="store_true", default=False, help="Increase verbosity")
parser.add_argument("--sqlite", action="store_true", default=False, help="Use a sqlite cache")
parser.add_argument("input_dir1", help="Input directory for the cache")
parser.add_argument("input_dir2", help="Input directory for the cache")
parser.add_argument("output", help="Output file")
args = parser.parse_args()
cache1 = make_cache(args.input_dir1, args.sqlite)
cache2 = make_cache(args.input_dir2, args.sqlite)
type_misses: dict[str, int] = defaultdict(int)
type_hits: dict[str, int] = defaultdict(int)
updates: dict[str, str | None] = {}
deps1: dict[str, set[str]] = {}
deps2: dict[str, set[str]] = {}
misses = hits = 0
cache1_all = list(cache1.list_all())
for s in cache1_all:
obj1 = load(cache1, s)
try:
obj2 = load(cache2, s)
except FileNotFoundError:
obj2 = None
typ = s.split(".")[-2]
if obj1 != obj2:
misses += 1
type_misses[typ] += 1
# Collect the dependencies instead of including them directly in the diff
# so we can produce a much smaller direct diff of them.
if ".deps." not in s:
if obj2 is not None:
updates[s] = json.dumps(obj2)
else:
updates[s] = None
elif obj2:
merge_deps(deps1, obj1)
merge_deps(deps2, obj2)
else:
hits += 1
type_hits[typ] += 1
cache1_all_set = set(cache1_all)
for s in cache2.list_all():
if s not in cache1_all_set:
updates[s] = cache2.read(s)
# Compute what deps have been added and merge them all into the
# @root deps file.
new_deps = {k: deps1.get(k, set()) - deps2.get(k, set()) for k in deps2}
new_deps = {k: v for k, v in new_deps.items() if v}
try:
root_deps = load(cache1, "@root.deps.json")
except FileNotFoundError:
root_deps = {}
merge_deps(new_deps, root_deps)
new_deps_json = {k: list(v) for k, v in new_deps.items() if v}
updates["@root.deps.json"] = json.dumps(new_deps_json)
# Drop updates to deps.meta.json for size reasons. The diff
# applier will manually fix it up.
updates.pop("./@deps.meta.json", None)
updates.pop("@deps.meta.json", None)
###
print("Generated incremental cache:", hits, "hits,", misses, "misses")
if args.verbose:
print("hits", type_hits)
print("misses", type_misses)
with open(args.output, "w") as f:
json.dump(updates, f)
if __name__ == "__main__":
main()