Write cache for modules with errors (#19820)
This is required for https://github.com/python/mypy/issues/933
Here I use a very simple-minded approach, errors are serialized simply
as a list of strings. In near future I may switch to serializing
`ErrorInfo`s (as this has some other benefits). Note that many tests
have `[stale ...]` checks updated because previously modules with errors
were not included in the list. I double-checked each test that the new
values are correct.
Note we still don't write cache if there were blockers in an SCC (like a
syntax error).
diff --git a/mypy/build.py b/mypy/build.py
index 5ccf1c8..2d3296a 100644
--- a/mypy/build.py
+++ b/mypy/build.py
@@ -15,7 +15,6 @@
import collections
import contextlib
-import errno
import gc
import json
import os
@@ -337,6 +336,7 @@
dep_lines: list[int]
dep_hashes: dict[str, str]
interface_hash: str # hash representing the public interface
+ error_lines: list[str]
version_id: str # mypy version for cache invalidation
ignore_all: bool # if errors were ignored
plugin_data: Any # config data from plugins
@@ -376,6 +376,7 @@
meta.get("dep_lines", []),
meta.get("dep_hashes", {}),
meta.get("interface_hash", ""),
+ meta.get("error_lines", []),
meta.get("version_id", sentinel),
meta.get("ignore_all", True),
meta.get("plugin_data", None),
@@ -1502,6 +1503,7 @@
"dep_lines": meta.dep_lines,
"dep_hashes": meta.dep_hashes,
"interface_hash": meta.interface_hash,
+ "error_lines": meta.error_lines,
"version_id": manager.version_id,
"ignore_all": meta.ignore_all,
"plugin_data": meta.plugin_data,
@@ -1678,28 +1680,6 @@
return cache_meta_from_dict(meta, data_json)
-def delete_cache(id: str, path: str, manager: BuildManager) -> None:
- """Delete cache files for a module.
-
- The cache files for a module are deleted when mypy finds errors there.
- This avoids inconsistent states with cache files from different mypy runs,
- see #4043 for an example.
- """
- # We don't delete .deps files on errors, since the dependencies
- # are mostly generated from other files and the metadata is
- # tracked separately.
- meta_path, data_path, _ = get_cache_names(id, path, manager.options)
- cache_paths = [meta_path, data_path]
- manager.log(f"Deleting {id} {path} {' '.join(x for x in cache_paths if x)}")
-
- for filename in cache_paths:
- try:
- manager.metastore.remove(filename)
- except OSError as e:
- if e.errno != errno.ENOENT:
- manager.log(f"Error deleting cache file {filename}: {e.strerror}")
-
-
"""Dependency manager.
Design
@@ -1875,6 +1855,9 @@
# Map from dependency id to its last observed interface hash
dep_hashes: dict[str, str] = {}
+ # List of errors reported for this file last time.
+ error_lines: list[str] = []
+
# Parent package, its parent, etc.
ancestors: list[str] | None = None
@@ -1896,9 +1879,6 @@
# Whether to ignore all errors
ignore_all = False
- # Whether the module has an error or any of its dependencies have one.
- transitive_error = False
-
# Errors reported before semantic analysis, to allow fine-grained
# mode to keep reporting them.
early_errors: list[ErrorInfo]
@@ -2000,6 +1980,7 @@
assert len(all_deps) == len(self.meta.dep_lines)
self.dep_line_map = {id: line for id, line in zip(all_deps, self.meta.dep_lines)}
self.dep_hashes = self.meta.dep_hashes
+ self.error_lines = self.meta.error_lines
if temporary:
self.load_tree(temporary=True)
if not manager.use_fine_grained_cache():
@@ -2517,11 +2498,6 @@
print(f"Error serializing {self.id}", file=self.manager.stdout)
raise # Propagate to display traceback
return None
- is_errors = self.transitive_error
- if is_errors:
- delete_cache(self.id, self.path, self.manager)
- self.meta = None
- return None
dep_prios = self.dependency_priorities()
dep_lines = self.dependency_lines()
assert self.source_hash is not None
@@ -3315,15 +3291,14 @@
else:
fresh_msg = f"stale due to deps ({' '.join(sorted(stale_deps))})"
- # Initialize transitive_error for all SCC members from union
- # of transitive_error of dependencies.
- if any(graph[dep].transitive_error for dep in deps if dep in graph):
- for id in scc:
- graph[id].transitive_error = True
-
scc_str = " ".join(scc)
if fresh:
manager.trace(f"Queuing {fresh_msg} SCC ({scc_str})")
+ for id in scc:
+ if graph[id].error_lines:
+ manager.flush_errors(
+ manager.errors.simplify_path(graph[id].xpath), graph[id].error_lines, False
+ )
fresh_scc_queue.append(scc)
else:
if fresh_scc_queue:
@@ -3335,11 +3310,6 @@
# single fresh SCC. This is intentional -- we don't need those modules
# loaded if there are no more stale SCCs to be rechecked.
#
- # Also note we shouldn't have to worry about transitive_error here,
- # since modules with transitive errors aren't written to the cache,
- # and if any dependencies were changed, this SCC would be stale.
- # (Also, in quick_and_dirty mode we don't care about transitive errors.)
- #
# TODO: see if it's possible to determine if we need to process only a
# _subset_ of the past SCCs instead of having to process them all.
if (
@@ -3491,16 +3461,17 @@
for id in stale:
graph[id].generate_unused_ignore_notes()
graph[id].generate_ignore_without_code_notes()
- if any(manager.errors.is_errors_for_file(graph[id].xpath) for id in stale):
- for id in stale:
- graph[id].transitive_error = True
+
+ # Flush errors, and write cache in two phases: first data files, then meta files.
meta_tuples = {}
+ errors_by_id = {}
for id in stale:
if graph[id].xpath not in manager.errors.ignored_files:
errors = manager.errors.file_messages(
graph[id].xpath, formatter=manager.error_formatter
)
manager.flush_errors(manager.errors.simplify_path(graph[id].xpath), errors, False)
+ errors_by_id[id] = errors
meta_tuples[id] = graph[id].write_cache()
graph[id].mark_as_rechecked()
for id in stale:
@@ -3512,6 +3483,7 @@
meta["dep_hashes"] = {
dep: graph[dep].interface_hash for dep in graph[id].dependencies if dep in graph
}
+ meta["error_lines"] = errors_by_id.get(id, [])
graph[id].meta = write_cache_meta(meta, manager, meta_json, data_json)
diff --git a/mypy/test/testcheck.py b/mypy/test/testcheck.py
index 04ef537..73f33c0 100644
--- a/mypy/test/testcheck.py
+++ b/mypy/test/testcheck.py
@@ -9,7 +9,6 @@
from pathlib import Path
from mypy import build
-from mypy.build import Graph
from mypy.errors import CompileError
from mypy.modulefinder import BuildSource, FindModuleCache, SearchPaths
from mypy.test.config import test_data_prefix, test_temp_dir
@@ -164,11 +163,13 @@
sys.path.insert(0, plugin_dir)
res = None
+ blocker = False
try:
res = build.build(sources=sources, options=options, alt_lib_path=test_temp_dir)
a = res.errors
except CompileError as e:
a = e.messages
+ blocker = True
finally:
assert sys.path[0] == plugin_dir
del sys.path[0]
@@ -199,7 +200,7 @@
if res:
if options.cache_dir != os.devnull:
- self.verify_cache(module_data, res.errors, res.manager, res.graph)
+ self.verify_cache(module_data, res.manager, blocker)
name = "targets"
if incremental_step:
@@ -229,42 +230,23 @@
check_test_output_files(testcase, incremental_step, strip_prefix="tmp/")
def verify_cache(
- self,
- module_data: list[tuple[str, str, str]],
- a: list[str],
- manager: build.BuildManager,
- graph: Graph,
+ self, module_data: list[tuple[str, str, str]], manager: build.BuildManager, blocker: bool
) -> None:
- # There should be valid cache metadata for each module except
- # for those that had an error in themselves or one of their
- # dependencies.
- error_paths = self.find_error_message_paths(a)
- busted_paths = {m.path for id, m in manager.modules.items() if graph[id].transitive_error}
- modules = self.find_module_files(manager)
- modules.update({module_name: path for module_name, path, text in module_data})
- missing_paths = self.find_missing_cache_files(modules, manager)
- # We would like to assert error_paths.issubset(busted_paths)
- # but this runs into trouble because while some 'notes' are
- # really errors that cause an error to be marked, many are
- # just notes attached to other errors.
- assert error_paths or not busted_paths, "Some modules reported error despite no errors"
- if not missing_paths == busted_paths:
- raise AssertionError(f"cache data discrepancy {missing_paths} != {busted_paths}")
+ if not blocker:
+ # There should be valid cache metadata for each module except
+ # in case of a blocking error in themselves or one of their
+ # dependencies.
+ modules = self.find_module_files(manager)
+ modules.update({module_name: path for module_name, path, text in module_data})
+ missing_paths = self.find_missing_cache_files(modules, manager)
+ if missing_paths:
+ raise AssertionError(f"cache data missing for {missing_paths}")
assert os.path.isfile(os.path.join(manager.options.cache_dir, ".gitignore"))
cachedir_tag = os.path.join(manager.options.cache_dir, "CACHEDIR.TAG")
assert os.path.isfile(cachedir_tag)
with open(cachedir_tag) as f:
assert f.read().startswith("Signature: 8a477f597d28d172789f06886806bc55")
- def find_error_message_paths(self, a: list[str]) -> set[str]:
- hits = set()
- for line in a:
- m = re.match(r"([^\s:]+):(\d+:)?(\d+:)? (error|warning|note):", line)
- if m:
- p = m.group(1)
- hits.add(p)
- return hits
-
def find_module_files(self, manager: build.BuildManager) -> dict[str, str]:
return {id: module.path for id, module in manager.modules.items()}
diff --git a/test-data/unit/check-incremental.test b/test-data/unit/check-incremental.test
index 06f2287..8e05f92 100644
--- a/test-data/unit/check-incremental.test
+++ b/test-data/unit/check-incremental.test
@@ -124,7 +124,7 @@
def func1() -> A: pass
[rechecked mod1]
-[stale]
+[stale mod1]
[out2]
tmp/mod1.py:1: error: Name "A" is not defined
@@ -428,7 +428,7 @@
def foo(self) -> str: return "a"
[rechecked mod1, mod2, mod2.mod3]
-[stale mod2, mod2.mod3]
+[stale mod1, mod2, mod2.mod3]
[builtins fixtures/module.pyi]
[out1]
[out2]
@@ -466,7 +466,7 @@
def foo(self) -> str: return "a"
[rechecked mod1, mod2, mod2.mod3]
-[stale mod2.mod3]
+[stale mod1, mod2.mod3]
[builtins fixtures/module.pyi]
[out1]
[out2]
@@ -541,7 +541,7 @@
return "foo"
[rechecked mod0, mod1, mod2]
-[stale mod2]
+[stale mod0, mod2]
[out2]
tmp/mod1.py:4: error: Incompatible return value type (got "str", expected "int")
@@ -952,7 +952,7 @@
[file parent/b.py.2]
x = 10
-[stale parent.b]
+[stale parent.a, parent.b]
[rechecked parent.a, parent.b]
[out2]
tmp/parent/a.py:2: note: Revealed type is "builtins.int"
@@ -1080,7 +1080,7 @@
[builtins fixtures/args.pyi]
[rechecked collections, main, package.subpackage.mod1]
-[stale collections, package.subpackage.mod1]
+[stale collections, main, package.subpackage.mod1]
[out2]
tmp/main.py:4: error: "Class" has no attribute "some_attribute"
@@ -1120,7 +1120,7 @@
[builtins fixtures/module_all.pyi]
[rechecked main, c, c.submodule]
-[stale c]
+[stale main, c, c.submodule]
[out2]
tmp/c/submodule.py:3: error: Incompatible types in assignment (expression has type "str", variable has type "int")
tmp/main.py:7: error: "C" has no attribute "foo"
@@ -1174,7 +1174,7 @@
foo = 3.14
reveal_type(foo)
[rechecked m, n]
-[stale]
+[stale n]
[out1]
tmp/n.py:2: note: Revealed type is "builtins.str"
tmp/m.py:3: error: Argument 1 to "accept_int" has incompatible type "str"; expected "int"
@@ -1204,7 +1204,7 @@
foo(3)
[rechecked client]
-[stale]
+[stale client]
[out2]
tmp/client.py:4: error: Argument 1 to "foo" has incompatible type "int"; expected "str"
@@ -1316,7 +1316,7 @@
bar = "str"
[rechecked main]
-[stale]
+[stale main]
[out1]
tmp/main.py:3: error: Argument 1 to "accept_int" has incompatible type "str"; expected "int"
tmp/main.py:4: note: Revealed type is "builtins.str"
@@ -1354,8 +1354,8 @@
class C:
def foo(self) -> int: return 1
-[rechecked mod3, mod2, mod1]
-[stale mod3, mod2]
+[rechecked mod3]
+[stale]
[out1]
tmp/mod3.py:6: error: Incompatible types in assignment (expression has type "str", variable has type "int")
tmp/mod1.py:3: note: Revealed type is "builtins.int"
@@ -1393,8 +1393,8 @@
class C:
def foo(self) -> str: return 'a'
-[rechecked mod4, mod3, mod2, mod1]
-[stale mod4]
+[rechecked mod4, mod3, mod1]
+[stale mod1, mod4]
[out1]
tmp/mod3.py:6: error: Incompatible types in assignment (expression has type "str", variable has type "int")
tmp/mod1.py:3: note: Revealed type is "builtins.int"
@@ -1438,8 +1438,8 @@
class C:
def foo(self) -> str: return 'a'
-[rechecked mod4, mod3, mod2, mod1]
-[stale mod4, mod3, mod2]
+[rechecked mod4, mod3, mod1]
+[stale mod1, mod4]
[out1]
tmp/mod3.py:6: error: Incompatible types in assignment (expression has type "str", variable has type "int")
tmp/mod1.py:3: note: Revealed type is "builtins.int"
@@ -2173,7 +2173,7 @@
x = 1
[delete m.py.2]
[rechecked n]
-[stale]
+[stale n]
[out2]
tmp/n.py:1: error: Cannot find implementation or library stub for module named "m"
tmp/n.py:1: note: See https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports
@@ -2224,7 +2224,7 @@
[rechecked m]
[stale m]
[rechecked2 m]
-[stale2]
+[stale2 m]
[out3]
tmp/m.py:2: error: Incompatible return value type (got "str", expected "int")
@@ -2243,7 +2243,7 @@
def foo(x) -> int:
pass
[rechecked m, n]
-[stale m]
+[stale m, n]
[rechecked2 m, n]
[stale2 m, n]
[out2]
@@ -2894,7 +2894,7 @@
import m.a # Depends on module with error
[file m/c.py]
import m # No error here
-[rechecked m.a, m.b]
+[rechecked]
[out1]
tmp/m/a.py:1: error: Unsupported operand types for + ("int" and "str")
[out2]
@@ -3091,10 +3091,10 @@
[stale]
[out2]
main:2: note: Revealed type is "def (a: builtins.int) -> a.A"
-main:3: note: Revealed type is "def [_AT] (self: _AT`1, other: _AT`1) -> builtins.bool"
-main:4: note: Revealed type is "def [_AT] (self: _AT`2, other: _AT`2) -> builtins.bool"
-main:5: note: Revealed type is "def [_AT] (self: _AT`3, other: _AT`3) -> builtins.bool"
-main:6: note: Revealed type is "def [_AT] (self: _AT`4, other: _AT`4) -> builtins.bool"
+main:3: note: Revealed type is "def [_AT] (self: _AT`3, other: _AT`3) -> builtins.bool"
+main:4: note: Revealed type is "def [_AT] (self: _AT`4, other: _AT`4) -> builtins.bool"
+main:5: note: Revealed type is "def [_AT] (self: _AT`5, other: _AT`5) -> builtins.bool"
+main:6: note: Revealed type is "def [_AT] (self: _AT`6, other: _AT`6) -> builtins.bool"
main:15: error: Unsupported operand types for < ("A" and "int")
main:16: error: Unsupported operand types for <= ("A" and "int")
main:17: error: Unsupported operand types for > ("A" and "int")
@@ -7237,3 +7237,49 @@
[out2]
[out3]
tmp/bar.py:2: error: Incompatible types in assignment (expression has type "None", variable has type "int")
+
+[case testIncrementalBlockingErrorRepeatAndUndo]
+import m
+[file m.py]
+import f
+reveal_type(f.x)
+[file m.py.3]
+import f
+reveal_type(f.x)
+# touch
+[file f.py]
+x = 1
+[file f.py.2]
+no way
+[file f.py.4]
+x = 1
+[out]
+tmp/m.py:2: note: Revealed type is "builtins.int"
+[out2]
+tmp/f.py:1: error: Invalid syntax
+[out3]
+tmp/f.py:1: error: Invalid syntax
+[out4]
+tmp/m.py:2: note: Revealed type is "builtins.int"
+
+[case testIncrementalSameErrorOrder]
+import m
+[file m.py]
+import n
+def accept_int(x: int) -> None: pass
+accept_int(n.foo)
+[file n.py]
+import other
+foo = "hello"
+reveal_type(foo)
+[file other.py]
+[file other.py.2]
+# touch
+[rechecked other]
+[stale]
+[out]
+tmp/n.py:3: note: Revealed type is "builtins.str"
+tmp/m.py:3: error: Argument 1 to "accept_int" has incompatible type "str"; expected "int"
+[out2]
+tmp/n.py:3: note: Revealed type is "builtins.str"
+tmp/m.py:3: error: Argument 1 to "accept_int" has incompatible type "str"; expected "int"
diff --git a/test-data/unit/check-serialize.test b/test-data/unit/check-serialize.test
index 03c185a..1498c8d 100644
--- a/test-data/unit/check-serialize.test
+++ b/test-data/unit/check-serialize.test
@@ -31,7 +31,7 @@
-- We only do the following two sections once here to avoid repetition.
-- Most other test cases are similar.
[rechecked a]
-[stale]
+[stale a]
[out2]
tmp/a.py:2: error: Incompatible types in assignment (expression has type "str", variable has type "int")