| """Utilities for calculating and reporting statistics about types.""" |
| |
| import os |
| from collections import Counter |
| from contextlib import contextmanager |
| |
| import typing |
| from typing import Dict, List, cast, Optional, Union, Iterator |
| from typing_extensions import Final |
| |
| from mypy.traverser import TraverserVisitor |
| from mypy.typeanal import collect_all_inner_types |
| from mypy.types import ( |
| Type, AnyType, Instance, FunctionLike, TupleType, TypeVarType, TypeQuery, CallableType, |
| TypeOfAny, get_proper_type, get_proper_types |
| ) |
| from mypy import nodes |
| from mypy.nodes import ( |
| Expression, FuncDef, TypeApplication, AssignmentStmt, NameExpr, CallExpr, MypyFile, |
| MemberExpr, OpExpr, ComparisonExpr, IndexExpr, UnaryExpr, YieldFromExpr, RefExpr, ClassDef, |
| AssignmentExpr, ImportFrom, Import, ImportAll, PassStmt, BreakStmt, ContinueStmt, StrExpr, |
| BytesExpr, UnicodeExpr, IntExpr, FloatExpr, ComplexExpr, EllipsisExpr, ExpressionStmt, Node |
| ) |
| from mypy.util import correct_relative_import |
| from mypy.argmap import map_formals_to_actuals |
| |
| TYPE_EMPTY: Final = 0 |
| TYPE_UNANALYZED: Final = 1 # type of non-typechecked code |
| TYPE_PRECISE: Final = 2 |
| TYPE_IMPRECISE: Final = 3 |
| TYPE_ANY: Final = 4 |
| |
| precision_names: Final = [ |
| 'empty', |
| 'unanalyzed', |
| 'precise', |
| 'imprecise', |
| 'any', |
| ] |
| |
| |
| class StatisticsVisitor(TraverserVisitor): |
| def __init__(self, |
| inferred: bool, |
| filename: str, |
| modules: Dict[str, MypyFile], |
| typemap: Optional[Dict[Expression, Type]] = None, |
| all_nodes: bool = False, |
| visit_untyped_defs: bool = True) -> None: |
| self.inferred = inferred |
| self.filename = filename |
| self.modules = modules |
| self.typemap = typemap |
| self.all_nodes = all_nodes |
| self.visit_untyped_defs = visit_untyped_defs |
| |
| self.num_precise_exprs = 0 |
| self.num_imprecise_exprs = 0 |
| self.num_any_exprs = 0 |
| |
| self.num_simple_types = 0 |
| self.num_generic_types = 0 |
| self.num_tuple_types = 0 |
| self.num_function_types = 0 |
| self.num_typevar_types = 0 |
| self.num_complex_types = 0 |
| self.num_any_types = 0 |
| |
| self.line = -1 |
| |
| self.line_map: Dict[int, int] = {} |
| |
| self.type_of_any_counter: typing.Counter[int] = Counter() |
| self.any_line_map: Dict[int, List[AnyType]] = {} |
| |
| # For each scope (top level/function), whether the scope was type checked |
| # (annotated function). |
| # |
| # TODO: Handle --check-untyped-defs |
| self.checked_scopes = [True] |
| |
| self.output: List[str] = [] |
| |
| TraverserVisitor.__init__(self) |
| |
| def visit_mypy_file(self, o: MypyFile) -> None: |
| self.cur_mod_node = o |
| self.cur_mod_id = o.fullname |
| super().visit_mypy_file(o) |
| |
| def visit_import_from(self, imp: ImportFrom) -> None: |
| self.process_import(imp) |
| |
| def visit_import_all(self, imp: ImportAll) -> None: |
| self.process_import(imp) |
| |
| def process_import(self, imp: Union[ImportFrom, ImportAll]) -> None: |
| import_id, ok = correct_relative_import(self.cur_mod_id, |
| imp.relative, |
| imp.id, |
| self.cur_mod_node.is_package_init_file()) |
| if ok and import_id in self.modules: |
| kind = TYPE_PRECISE |
| else: |
| kind = TYPE_ANY |
| self.record_line(imp.line, kind) |
| |
| def visit_import(self, imp: Import) -> None: |
| if all(id in self.modules for id, _ in imp.ids): |
| kind = TYPE_PRECISE |
| else: |
| kind = TYPE_ANY |
| self.record_line(imp.line, kind) |
| |
| def visit_func_def(self, o: FuncDef) -> None: |
| with self.enter_scope(o): |
| self.line = o.line |
| if len(o.expanded) > 1 and o.expanded != [o] * len(o.expanded): |
| if o in o.expanded: |
| print('{}:{}: ERROR: cycle in function expansion; skipping'.format( |
| self.filename, |
| o.get_line())) |
| return |
| for defn in o.expanded: |
| self.visit_func_def(cast(FuncDef, defn)) |
| else: |
| if o.type: |
| sig = cast(CallableType, o.type) |
| arg_types = sig.arg_types |
| if (sig.arg_names and sig.arg_names[0] == 'self' and |
| not self.inferred): |
| arg_types = arg_types[1:] |
| for arg in arg_types: |
| self.type(arg) |
| self.type(sig.ret_type) |
| elif self.all_nodes: |
| self.record_line(self.line, TYPE_ANY) |
| if not o.is_dynamic() or self.visit_untyped_defs: |
| super().visit_func_def(o) |
| |
| @contextmanager |
| def enter_scope(self, o: FuncDef) -> Iterator[None]: |
| self.checked_scopes.append(o.type is not None and self.checked_scopes[-1]) |
| yield None |
| self.checked_scopes.pop() |
| |
| def is_checked_scope(self) -> bool: |
| return self.checked_scopes[-1] |
| |
| def visit_class_def(self, o: ClassDef) -> None: |
| self.record_line(o.line, TYPE_PRECISE) # TODO: Look at base classes |
| # Override this method because we don't want to analyze base_type_exprs (base_type_exprs |
| # are base classes in a class declaration). |
| # While base_type_exprs are technically expressions, type analyzer does not visit them and |
| # they are not in the typemap. |
| for d in o.decorators: |
| d.accept(self) |
| o.defs.accept(self) |
| |
| def visit_type_application(self, o: TypeApplication) -> None: |
| self.line = o.line |
| for t in o.types: |
| self.type(t) |
| super().visit_type_application(o) |
| |
| def visit_assignment_stmt(self, o: AssignmentStmt) -> None: |
| self.line = o.line |
| if (isinstance(o.rvalue, nodes.CallExpr) and |
| isinstance(o.rvalue.analyzed, nodes.TypeVarExpr)): |
| # Type variable definition -- not a real assignment. |
| return |
| if o.type: |
| self.type(o.type) |
| elif self.inferred and not self.all_nodes: |
| # if self.all_nodes is set, lvalues will be visited later |
| for lvalue in o.lvalues: |
| if isinstance(lvalue, nodes.TupleExpr): |
| items = lvalue.items |
| else: |
| items = [lvalue] |
| for item in items: |
| if isinstance(item, RefExpr) and item.is_inferred_def: |
| if self.typemap is not None: |
| self.type(self.typemap.get(item)) |
| super().visit_assignment_stmt(o) |
| |
| def visit_expression_stmt(self, o: ExpressionStmt) -> None: |
| if isinstance(o.expr, (StrExpr, UnicodeExpr, BytesExpr)): |
| # Docstring |
| self.record_line(o.line, TYPE_EMPTY) |
| else: |
| super().visit_expression_stmt(o) |
| |
| def visit_pass_stmt(self, o: PassStmt) -> None: |
| self.record_precise_if_checked_scope(o) |
| |
| def visit_break_stmt(self, o: BreakStmt) -> None: |
| self.record_precise_if_checked_scope(o) |
| |
| def visit_continue_stmt(self, o: ContinueStmt) -> None: |
| self.record_precise_if_checked_scope(o) |
| |
| def visit_name_expr(self, o: NameExpr) -> None: |
| if o.fullname in ('builtins.None', |
| 'builtins.True', |
| 'builtins.False', |
| 'builtins.Ellipsis'): |
| self.record_precise_if_checked_scope(o) |
| else: |
| self.process_node(o) |
| super().visit_name_expr(o) |
| |
| def visit_yield_from_expr(self, o: YieldFromExpr) -> None: |
| if o.expr: |
| o.expr.accept(self) |
| |
| def visit_call_expr(self, o: CallExpr) -> None: |
| self.process_node(o) |
| if o.analyzed: |
| o.analyzed.accept(self) |
| else: |
| o.callee.accept(self) |
| for a in o.args: |
| a.accept(self) |
| self.record_call_target_precision(o) |
| |
| def record_call_target_precision(self, o: CallExpr) -> None: |
| """Record precision of formal argument types used in a call.""" |
| if not self.typemap or o.callee not in self.typemap: |
| # Type not available. |
| return |
| callee_type = get_proper_type(self.typemap[o.callee]) |
| if isinstance(callee_type, CallableType): |
| self.record_callable_target_precision(o, callee_type) |
| else: |
| pass # TODO: Handle overloaded functions, etc. |
| |
| def record_callable_target_precision(self, o: CallExpr, callee: CallableType) -> None: |
| """Record imprecision caused by callee argument types. |
| |
| This only considers arguments passed in a call expression. Arguments |
| with default values that aren't provided in a call arguably don't |
| contribute to typing imprecision at the *call site* (but they |
| contribute at the function definition). |
| """ |
| assert self.typemap |
| typemap = self.typemap |
| actual_to_formal = map_formals_to_actuals( |
| o.arg_kinds, |
| o.arg_names, |
| callee.arg_kinds, |
| callee.arg_names, |
| lambda n: typemap[o.args[n]]) |
| for formals in actual_to_formal: |
| for n in formals: |
| formal = get_proper_type(callee.arg_types[n]) |
| if isinstance(formal, AnyType): |
| self.record_line(o.line, TYPE_ANY) |
| elif is_imprecise(formal): |
| self.record_line(o.line, TYPE_IMPRECISE) |
| |
| def visit_member_expr(self, o: MemberExpr) -> None: |
| self.process_node(o) |
| super().visit_member_expr(o) |
| |
| def visit_op_expr(self, o: OpExpr) -> None: |
| self.process_node(o) |
| super().visit_op_expr(o) |
| |
| def visit_comparison_expr(self, o: ComparisonExpr) -> None: |
| self.process_node(o) |
| super().visit_comparison_expr(o) |
| |
| def visit_index_expr(self, o: IndexExpr) -> None: |
| self.process_node(o) |
| super().visit_index_expr(o) |
| |
| def visit_assignment_expr(self, o: AssignmentExpr) -> None: |
| self.process_node(o) |
| super().visit_assignment_expr(o) |
| |
| def visit_unary_expr(self, o: UnaryExpr) -> None: |
| self.process_node(o) |
| super().visit_unary_expr(o) |
| |
| def visit_str_expr(self, o: StrExpr) -> None: |
| self.record_precise_if_checked_scope(o) |
| |
| def visit_unicode_expr(self, o: UnicodeExpr) -> None: |
| self.record_precise_if_checked_scope(o) |
| |
| def visit_bytes_expr(self, o: BytesExpr) -> None: |
| self.record_precise_if_checked_scope(o) |
| |
| def visit_int_expr(self, o: IntExpr) -> None: |
| self.record_precise_if_checked_scope(o) |
| |
| def visit_float_expr(self, o: FloatExpr) -> None: |
| self.record_precise_if_checked_scope(o) |
| |
| def visit_complex_expr(self, o: ComplexExpr) -> None: |
| self.record_precise_if_checked_scope(o) |
| |
| def visit_ellipsis(self, o: EllipsisExpr) -> None: |
| self.record_precise_if_checked_scope(o) |
| |
| # Helpers |
| |
| def process_node(self, node: Expression) -> None: |
| if self.all_nodes: |
| if self.typemap is not None: |
| self.line = node.line |
| self.type(self.typemap.get(node)) |
| |
| def record_precise_if_checked_scope(self, node: Node) -> None: |
| if isinstance(node, Expression) and self.typemap and node not in self.typemap: |
| kind = TYPE_UNANALYZED |
| elif self.is_checked_scope(): |
| kind = TYPE_PRECISE |
| else: |
| kind = TYPE_ANY |
| self.record_line(node.line, kind) |
| |
| def type(self, t: Optional[Type]) -> None: |
| t = get_proper_type(t) |
| |
| if not t: |
| # If an expression does not have a type, it is often due to dead code. |
| # Don't count these because there can be an unanalyzed value on a line with other |
| # analyzed expressions, which overwrite the TYPE_UNANALYZED. |
| self.record_line(self.line, TYPE_UNANALYZED) |
| return |
| |
| if isinstance(t, AnyType) and is_special_form_any(t): |
| # TODO: What if there is an error in special form definition? |
| self.record_line(self.line, TYPE_PRECISE) |
| return |
| |
| if isinstance(t, AnyType): |
| self.log(' !! Any type around line %d' % self.line) |
| self.num_any_exprs += 1 |
| self.record_line(self.line, TYPE_ANY) |
| elif ((not self.all_nodes and is_imprecise(t)) or |
| (self.all_nodes and is_imprecise2(t))): |
| self.log(' !! Imprecise type around line %d' % self.line) |
| self.num_imprecise_exprs += 1 |
| self.record_line(self.line, TYPE_IMPRECISE) |
| else: |
| self.num_precise_exprs += 1 |
| self.record_line(self.line, TYPE_PRECISE) |
| |
| for typ in get_proper_types(collect_all_inner_types(t)) + [t]: |
| if isinstance(typ, AnyType): |
| typ = get_original_any(typ) |
| if is_special_form_any(typ): |
| continue |
| self.type_of_any_counter[typ.type_of_any] += 1 |
| self.num_any_types += 1 |
| if self.line in self.any_line_map: |
| self.any_line_map[self.line].append(typ) |
| else: |
| self.any_line_map[self.line] = [typ] |
| elif isinstance(typ, Instance): |
| if typ.args: |
| if any(is_complex(arg) for arg in typ.args): |
| self.num_complex_types += 1 |
| else: |
| self.num_generic_types += 1 |
| else: |
| self.num_simple_types += 1 |
| elif isinstance(typ, FunctionLike): |
| self.num_function_types += 1 |
| elif isinstance(typ, TupleType): |
| if any(is_complex(item) for item in typ.items): |
| self.num_complex_types += 1 |
| else: |
| self.num_tuple_types += 1 |
| elif isinstance(typ, TypeVarType): |
| self.num_typevar_types += 1 |
| |
| def log(self, string: str) -> None: |
| self.output.append(string) |
| |
| def record_line(self, line: int, precision: int) -> None: |
| self.line_map[line] = max(precision, |
| self.line_map.get(line, TYPE_EMPTY)) |
| |
| |
| def dump_type_stats(tree: MypyFile, |
| path: str, |
| modules: Dict[str, MypyFile], |
| inferred: bool = False, |
| typemap: Optional[Dict[Expression, Type]] = None) -> None: |
| if is_special_module(path): |
| return |
| print(path) |
| visitor = StatisticsVisitor(inferred, |
| filename=tree.fullname, |
| modules=modules, |
| typemap=typemap) |
| tree.accept(visitor) |
| for line in visitor.output: |
| print(line) |
| print(' ** precision **') |
| print(' precise ', visitor.num_precise_exprs) |
| print(' imprecise', visitor.num_imprecise_exprs) |
| print(' any ', visitor.num_any_exprs) |
| print(' ** kinds **') |
| print(' simple ', visitor.num_simple_types) |
| print(' generic ', visitor.num_generic_types) |
| print(' function ', visitor.num_function_types) |
| print(' tuple ', visitor.num_tuple_types) |
| print(' TypeVar ', visitor.num_typevar_types) |
| print(' complex ', visitor.num_complex_types) |
| print(' any ', visitor.num_any_types) |
| |
| |
| def is_special_module(path: str) -> bool: |
| return os.path.basename(path) in ('abc.pyi', 'typing.pyi', 'builtins.pyi') |
| |
| |
| def is_imprecise(t: Type) -> bool: |
| return t.accept(HasAnyQuery()) |
| |
| |
| class HasAnyQuery(TypeQuery[bool]): |
| def __init__(self) -> None: |
| super().__init__(any) |
| |
| def visit_any(self, t: AnyType) -> bool: |
| return not is_special_form_any(t) |
| |
| |
| def is_imprecise2(t: Type) -> bool: |
| return t.accept(HasAnyQuery2()) |
| |
| |
| class HasAnyQuery2(HasAnyQuery): |
| def visit_callable_type(self, t: CallableType) -> bool: |
| # We don't want to flag references to functions with some Any |
| # argument types (etc.) since they generally don't mean trouble. |
| return False |
| |
| |
| def is_generic(t: Type) -> bool: |
| t = get_proper_type(t) |
| return isinstance(t, Instance) and bool(t.args) |
| |
| |
| def is_complex(t: Type) -> bool: |
| t = get_proper_type(t) |
| return is_generic(t) or isinstance(t, (FunctionLike, TupleType, |
| TypeVarType)) |
| |
| |
| def ensure_dir_exists(dir: str) -> None: |
| if not os.path.exists(dir): |
| os.makedirs(dir) |
| |
| |
| def is_special_form_any(t: AnyType) -> bool: |
| return get_original_any(t).type_of_any == TypeOfAny.special_form |
| |
| |
| def get_original_any(t: AnyType) -> AnyType: |
| if t.type_of_any == TypeOfAny.from_another_any: |
| assert t.source_any |
| assert t.source_any.type_of_any != TypeOfAny.from_another_any |
| t = t.source_any |
| return t |