| """Intermediate representation of classes.""" |
| |
| from __future__ import annotations |
| |
| from typing import List, NamedTuple |
| |
| from mypyc.common import PROPSET_PREFIX, JsonDict |
| from mypyc.ir.func_ir import FuncDecl, FuncIR, FuncSignature |
| from mypyc.ir.ops import DeserMaps, Value |
| from mypyc.ir.rtypes import RInstance, RType, deserialize_type |
| from mypyc.namegen import NameGenerator, exported_name |
| |
| # Some notes on the vtable layout: Each concrete class has a vtable |
| # that contains function pointers for its methods. So that subclasses |
| # may be efficiently used when their parent class is expected, the |
| # layout of child vtables must be an extension of their base class's |
| # vtable. |
| # |
| # This makes multiple inheritance tricky, since obviously we cannot be |
| # an extension of multiple parent classes. We solve this by requiring |
| # all but one parent to be "traits", which we can operate on in a |
| # somewhat less efficient way. For each trait implemented by a class, |
| # we generate a separate vtable for the methods in that trait. |
| # We then store an array of (trait type, trait vtable) pointers alongside |
| # a class's main vtable. When we want to call a trait method, we |
| # (at runtime!) search the array of trait vtables to find the correct one, |
| # then call through it. |
| # Trait vtables additionally need entries for attribute getters and setters, |
| # since they can't always be in the same location. |
| # |
| # To keep down the number of indirections necessary, we store the |
| # array of trait vtables in the memory *before* the class vtable, and |
| # search it backwards. (This is a trick we can only do once---there |
| # are only two directions to store data in---but I don't think we'll |
| # need it again.) |
| # There are some tricks we could try in the future to store the trait |
| # vtables inline in the trait table (which would cut down one indirection), |
| # but this seems good enough for now. |
| # |
| # As an example: |
| # Imagine that we have a class B that inherits from a concrete class A |
| # and traits T1 and T2, and that A has methods foo() and |
| # bar() and B overrides bar() with a more specific type. |
| # Then B's vtable will look something like: |
| # |
| # T1 type object |
| # ptr to B's T1 trait vtable |
| # T2 type object |
| # ptr to B's T2 trait vtable |
| # -> | A.foo |
| # | Glue function that converts between A.bar's type and B.bar |
| # B.bar |
| # B.baz |
| # |
| # The arrow points to the "start" of the vtable (what vtable pointers |
| # point to) and the bars indicate which parts correspond to the parent |
| # class A's vtable layout. |
| # |
| # Classes that allow interpreted code to subclass them also have a |
| # "shadow vtable" that contains implementations that delegate to |
| # making a pycall, so that overridden methods in interpreted children |
| # will be called. (A better strategy could dynamically generate these |
| # vtables based on which methods are overridden in the children.) |
| |
| # Descriptions of method and attribute entries in class vtables. |
| # The 'cls' field is the class that the method/attr was defined in, |
| # which might be a parent class. |
| # The 'shadow_method', if present, contains the method that should be |
| # placed in the class's shadow vtable (if it has one). |
| |
| |
| class VTableMethod(NamedTuple): |
| cls: "ClassIR" |
| name: str |
| method: FuncIR |
| shadow_method: FuncIR | None |
| |
| |
| VTableEntries = List[VTableMethod] |
| |
| |
| class ClassIR: |
| """Intermediate representation of a class. |
| |
| This also describes the runtime structure of native instances. |
| """ |
| |
| def __init__( |
| self, |
| name: str, |
| module_name: str, |
| is_trait: bool = False, |
| is_generated: bool = False, |
| is_abstract: bool = False, |
| is_ext_class: bool = True, |
| ) -> None: |
| self.name = name |
| self.module_name = module_name |
| self.is_trait = is_trait |
| self.is_generated = is_generated |
| self.is_abstract = is_abstract |
| self.is_ext_class = is_ext_class |
| # An augmented class has additional methods separate from what mypyc generates. |
| # Right now the only one is dataclasses. |
| self.is_augmented = False |
| # Does this inherit from a Python class? |
| self.inherits_python = False |
| # Do instances of this class have __dict__? |
| self.has_dict = False |
| # Do we allow interpreted subclasses? Derived from a mypyc_attr. |
| self.allow_interpreted_subclasses = False |
| # Does this class need getseters to be generated for its attributes? (getseters are also |
| # added if is_generated is False) |
| self.needs_getseters = False |
| # Is this class declared as serializable (supports copy.copy |
| # and pickle) using @mypyc_attr(serializable=True)? |
| # |
| # Additionally, any class with this attribute False but with |
| # an __init__ that can be called without any arguments is |
| # *implicitly serializable*. In this case __init__ will be |
| # called during deserialization without arguments. If this is |
| # True, we match Python semantics and __init__ won't be called |
| # during deserialization. |
| # |
| # This impacts also all subclasses. Use is_serializable() to |
| # also consider base classes. |
| self._serializable = False |
| # If this a subclass of some built-in python class, the name |
| # of the object for that class. We currently only support this |
| # in a few ad-hoc cases. |
| self.builtin_base: str | None = None |
| # Default empty constructor |
| self.ctor = FuncDecl(name, None, module_name, FuncSignature([], RInstance(self))) |
| # Attributes defined in the class (not inherited) |
| self.attributes: dict[str, RType] = {} |
| # Deletable attributes |
| self.deletable: list[str] = [] |
| # We populate method_types with the signatures of every method before |
| # we generate methods, and we rely on this information being present. |
| self.method_decls: dict[str, FuncDecl] = {} |
| # Map of methods that are actually present in an extension class |
| self.methods: dict[str, FuncIR] = {} |
| # Glue methods for boxing/unboxing when a class changes the type |
| # while overriding a method. Maps from (parent class overridden, method) |
| # to IR of glue method. |
| self.glue_methods: dict[tuple[ClassIR, str], FuncIR] = {} |
| |
| # Properties are accessed like attributes, but have behavior like method calls. |
| # They don't belong in the methods dictionary, since we don't want to expose them to |
| # Python's method API. But we want to put them into our own vtable as methods, so that |
| # they are properly handled and overridden. The property dictionary values are a tuple |
| # containing a property getter and an optional property setter. |
| self.properties: dict[str, tuple[FuncIR, FuncIR | None]] = {} |
| # We generate these in prepare_class_def so that we have access to them when generating |
| # other methods and properties that rely on these types. |
| self.property_types: dict[str, RType] = {} |
| |
| self.vtable: dict[str, int] | None = None |
| self.vtable_entries: VTableEntries = [] |
| self.trait_vtables: dict[ClassIR, VTableEntries] = {} |
| # N.B: base might not actually quite be the direct base. |
| # It is the nearest concrete base, but we allow a trait in between. |
| self.base: ClassIR | None = None |
| self.traits: list[ClassIR] = [] |
| # Supply a working mro for most generated classes. Real classes will need to |
| # fix it up. |
| self.mro: list[ClassIR] = [self] |
| # base_mro is the chain of concrete (non-trait) ancestors |
| self.base_mro: list[ClassIR] = [self] |
| |
| # Direct subclasses of this class (use subclasses() to also include non-direct ones) |
| # None if separate compilation prevents this from working. |
| # |
| # Often it's better to use has_no_subclasses() or subclasses() instead. |
| self.children: list[ClassIR] | None = [] |
| |
| # Instance attributes that are initialized in the class body. |
| self.attrs_with_defaults: set[str] = set() |
| |
| # Attributes that are always initialized in __init__ or class body |
| # (inferred in mypyc.analysis.attrdefined using interprocedural analysis) |
| self._always_initialized_attrs: set[str] = set() |
| |
| # Attributes that are sometimes initialized in __init__ |
| self._sometimes_initialized_attrs: set[str] = set() |
| |
| # If True, __init__ can make 'self' visible to unanalyzed/arbitrary code |
| self.init_self_leak = False |
| |
| # Definedness of these attributes is backed by a bitmap. Index in the list |
| # indicates the bit number. Includes inherited attributes. We need the |
| # bitmap for types such as native ints that can't have a dedicated error |
| # value that doesn't overlap a valid value. The bitmap is used if the |
| # value of an attribute is the same as the error value. |
| self.bitmap_attrs: list[str] = [] |
| |
| def __repr__(self) -> str: |
| return ( |
| "ClassIR(" |
| "name={self.name}, module_name={self.module_name}, " |
| "is_trait={self.is_trait}, is_generated={self.is_generated}, " |
| "is_abstract={self.is_abstract}, is_ext_class={self.is_ext_class}" |
| ")".format(self=self) |
| ) |
| |
| @property |
| def fullname(self) -> str: |
| return f"{self.module_name}.{self.name}" |
| |
| def real_base(self) -> ClassIR | None: |
| """Return the actual concrete base class, if there is one.""" |
| if len(self.mro) > 1 and not self.mro[1].is_trait: |
| return self.mro[1] |
| return None |
| |
| def vtable_entry(self, name: str) -> int: |
| assert self.vtable is not None, "vtable not computed yet" |
| assert name in self.vtable, f"{self.name!r} has no attribute {name!r}" |
| return self.vtable[name] |
| |
| def attr_details(self, name: str) -> tuple[RType, ClassIR]: |
| for ir in self.mro: |
| if name in ir.attributes: |
| return ir.attributes[name], ir |
| if name in ir.property_types: |
| return ir.property_types[name], ir |
| raise KeyError(f"{self.name!r} has no attribute {name!r}") |
| |
| def attr_type(self, name: str) -> RType: |
| return self.attr_details(name)[0] |
| |
| def method_decl(self, name: str) -> FuncDecl: |
| for ir in self.mro: |
| if name in ir.method_decls: |
| return ir.method_decls[name] |
| raise KeyError(f"{self.name!r} has no attribute {name!r}") |
| |
| def method_sig(self, name: str) -> FuncSignature: |
| return self.method_decl(name).sig |
| |
| def has_method(self, name: str) -> bool: |
| try: |
| self.method_decl(name) |
| except KeyError: |
| return False |
| return True |
| |
| def is_method_final(self, name: str) -> bool: |
| subs = self.subclasses() |
| if subs is None: |
| # TODO: Look at the final attribute! |
| return False |
| |
| if self.has_method(name): |
| method_decl = self.method_decl(name) |
| for subc in subs: |
| if subc.method_decl(name) != method_decl: |
| return False |
| return True |
| else: |
| return not any(subc.has_method(name) for subc in subs) |
| |
| def has_attr(self, name: str) -> bool: |
| try: |
| self.attr_type(name) |
| except KeyError: |
| return False |
| return True |
| |
| def is_deletable(self, name: str) -> bool: |
| return any(name in ir.deletable for ir in self.mro) |
| |
| def is_always_defined(self, name: str) -> bool: |
| if self.is_deletable(name): |
| return False |
| return name in self._always_initialized_attrs |
| |
| def name_prefix(self, names: NameGenerator) -> str: |
| return names.private_name(self.module_name, self.name) |
| |
| def struct_name(self, names: NameGenerator) -> str: |
| return f"{exported_name(self.fullname)}Object" |
| |
| def get_method_and_class( |
| self, name: str, *, prefer_method: bool = False |
| ) -> tuple[FuncIR, ClassIR] | None: |
| for ir in self.mro: |
| if name in ir.methods: |
| func_ir = ir.methods[name] |
| if not prefer_method and func_ir.decl.implicit: |
| # This is an implicit accessor, so there is also an attribute definition |
| # which the caller prefers. This happens if an attribute overrides a |
| # property. |
| return None |
| return func_ir, ir |
| |
| return None |
| |
| def get_method(self, name: str, *, prefer_method: bool = False) -> FuncIR | None: |
| res = self.get_method_and_class(name, prefer_method=prefer_method) |
| return res[0] if res else None |
| |
| def has_method_decl(self, name: str) -> bool: |
| return any(name in ir.method_decls for ir in self.mro) |
| |
| def has_no_subclasses(self) -> bool: |
| return self.children == [] and not self.allow_interpreted_subclasses |
| |
| def subclasses(self) -> set[ClassIR] | None: |
| """Return all subclasses of this class, both direct and indirect. |
| |
| Return None if it is impossible to identify all subclasses, for example |
| because we are performing separate compilation. |
| """ |
| if self.children is None or self.allow_interpreted_subclasses: |
| return None |
| result = set(self.children) |
| for child in self.children: |
| if child.children: |
| child_subs = child.subclasses() |
| if child_subs is None: |
| return None |
| result.update(child_subs) |
| return result |
| |
| def concrete_subclasses(self) -> list[ClassIR] | None: |
| """Return all concrete (i.e. non-trait and non-abstract) subclasses. |
| |
| Include both direct and indirect subclasses. Place classes with no children first. |
| """ |
| subs = self.subclasses() |
| if subs is None: |
| return None |
| concrete = {c for c in subs if not (c.is_trait or c.is_abstract)} |
| # We place classes with no children first because they are more likely |
| # to appear in various isinstance() checks. We then sort leaves by name |
| # to get stable order. |
| return sorted(concrete, key=lambda c: (len(c.children or []), c.name)) |
| |
| def is_serializable(self) -> bool: |
| return any(ci._serializable for ci in self.mro) |
| |
| def serialize(self) -> JsonDict: |
| return { |
| "name": self.name, |
| "module_name": self.module_name, |
| "is_trait": self.is_trait, |
| "is_ext_class": self.is_ext_class, |
| "is_abstract": self.is_abstract, |
| "is_generated": self.is_generated, |
| "is_augmented": self.is_augmented, |
| "inherits_python": self.inherits_python, |
| "has_dict": self.has_dict, |
| "allow_interpreted_subclasses": self.allow_interpreted_subclasses, |
| "needs_getseters": self.needs_getseters, |
| "_serializable": self._serializable, |
| "builtin_base": self.builtin_base, |
| "ctor": self.ctor.serialize(), |
| # We serialize dicts as lists to ensure order is preserved |
| "attributes": [(k, t.serialize()) for k, t in self.attributes.items()], |
| # We try to serialize a name reference, but if the decl isn't in methods |
| # then we can't be sure that will work so we serialize the whole decl. |
| "method_decls": [ |
| (k, d.id if k in self.methods else d.serialize()) |
| for k, d in self.method_decls.items() |
| ], |
| # We serialize method fullnames out and put methods in a separate dict |
| "methods": [(k, m.id) for k, m in self.methods.items()], |
| "glue_methods": [ |
| ((cir.fullname, k), m.id) for (cir, k), m in self.glue_methods.items() |
| ], |
| # We serialize properties and property_types separately out of an |
| # abundance of caution about preserving dict ordering... |
| "property_types": [(k, t.serialize()) for k, t in self.property_types.items()], |
| "properties": list(self.properties), |
| "vtable": self.vtable, |
| "vtable_entries": serialize_vtable(self.vtable_entries), |
| "trait_vtables": [ |
| (cir.fullname, serialize_vtable(v)) for cir, v in self.trait_vtables.items() |
| ], |
| # References to class IRs are all just names |
| "base": self.base.fullname if self.base else None, |
| "traits": [cir.fullname for cir in self.traits], |
| "mro": [cir.fullname for cir in self.mro], |
| "base_mro": [cir.fullname for cir in self.base_mro], |
| "children": [cir.fullname for cir in self.children] |
| if self.children is not None |
| else None, |
| "deletable": self.deletable, |
| "attrs_with_defaults": sorted(self.attrs_with_defaults), |
| "_always_initialized_attrs": sorted(self._always_initialized_attrs), |
| "_sometimes_initialized_attrs": sorted(self._sometimes_initialized_attrs), |
| "init_self_leak": self.init_self_leak, |
| } |
| |
| @classmethod |
| def deserialize(cls, data: JsonDict, ctx: DeserMaps) -> ClassIR: |
| fullname = data["module_name"] + "." + data["name"] |
| assert fullname in ctx.classes, "Class %s not in deser class map" % fullname |
| ir = ctx.classes[fullname] |
| |
| ir.is_trait = data["is_trait"] |
| ir.is_generated = data["is_generated"] |
| ir.is_abstract = data["is_abstract"] |
| ir.is_ext_class = data["is_ext_class"] |
| ir.is_augmented = data["is_augmented"] |
| ir.inherits_python = data["inherits_python"] |
| ir.has_dict = data["has_dict"] |
| ir.allow_interpreted_subclasses = data["allow_interpreted_subclasses"] |
| ir.needs_getseters = data["needs_getseters"] |
| ir._serializable = data["_serializable"] |
| ir.builtin_base = data["builtin_base"] |
| ir.ctor = FuncDecl.deserialize(data["ctor"], ctx) |
| ir.attributes = {k: deserialize_type(t, ctx) for k, t in data["attributes"]} |
| ir.method_decls = { |
| k: ctx.functions[v].decl if isinstance(v, str) else FuncDecl.deserialize(v, ctx) |
| for k, v in data["method_decls"] |
| } |
| ir.methods = {k: ctx.functions[v] for k, v in data["methods"]} |
| ir.glue_methods = { |
| (ctx.classes[c], k): ctx.functions[v] for (c, k), v in data["glue_methods"] |
| } |
| ir.property_types = {k: deserialize_type(t, ctx) for k, t in data["property_types"]} |
| ir.properties = { |
| k: (ir.methods[k], ir.methods.get(PROPSET_PREFIX + k)) for k in data["properties"] |
| } |
| |
| ir.vtable = data["vtable"] |
| ir.vtable_entries = deserialize_vtable(data["vtable_entries"], ctx) |
| ir.trait_vtables = { |
| ctx.classes[k]: deserialize_vtable(v, ctx) for k, v in data["trait_vtables"] |
| } |
| |
| base = data["base"] |
| ir.base = ctx.classes[base] if base else None |
| ir.traits = [ctx.classes[s] for s in data["traits"]] |
| ir.mro = [ctx.classes[s] for s in data["mro"]] |
| ir.base_mro = [ctx.classes[s] for s in data["base_mro"]] |
| ir.children = data["children"] and [ctx.classes[s] for s in data["children"]] |
| ir.deletable = data["deletable"] |
| ir.attrs_with_defaults = set(data["attrs_with_defaults"]) |
| ir._always_initialized_attrs = set(data["_always_initialized_attrs"]) |
| ir._sometimes_initialized_attrs = set(data["_sometimes_initialized_attrs"]) |
| ir.init_self_leak = data["init_self_leak"] |
| |
| return ir |
| |
| |
| class NonExtClassInfo: |
| """Information needed to construct a non-extension class (Python class). |
| |
| Includes the class dictionary, a tuple of base classes, |
| the class annotations dictionary, and the metaclass. |
| """ |
| |
| def __init__(self, dict: Value, bases: Value, anns: Value, metaclass: Value) -> None: |
| self.dict = dict |
| self.bases = bases |
| self.anns = anns |
| self.metaclass = metaclass |
| |
| |
| def serialize_vtable_entry(entry: VTableMethod) -> JsonDict: |
| return { |
| ".class": "VTableMethod", |
| "cls": entry.cls.fullname, |
| "name": entry.name, |
| "method": entry.method.decl.id, |
| "shadow_method": entry.shadow_method.decl.id if entry.shadow_method else None, |
| } |
| |
| |
| def serialize_vtable(vtable: VTableEntries) -> list[JsonDict]: |
| return [serialize_vtable_entry(v) for v in vtable] |
| |
| |
| def deserialize_vtable_entry(data: JsonDict, ctx: DeserMaps) -> VTableMethod: |
| if data[".class"] == "VTableMethod": |
| return VTableMethod( |
| ctx.classes[data["cls"]], |
| data["name"], |
| ctx.functions[data["method"]], |
| ctx.functions[data["shadow_method"]] if data["shadow_method"] else None, |
| ) |
| assert False, "Bogus vtable .class: %s" % data[".class"] |
| |
| |
| def deserialize_vtable(data: list[JsonDict], ctx: DeserMaps) -> VTableEntries: |
| return [deserialize_vtable_entry(x, ctx) for x in data] |
| |
| |
| def all_concrete_classes(class_ir: ClassIR) -> list[ClassIR] | None: |
| """Return all concrete classes among the class itself and its subclasses.""" |
| concrete = class_ir.concrete_subclasses() |
| if concrete is None: |
| return None |
| if not (class_ir.is_abstract or class_ir.is_trait): |
| concrete.append(class_ir) |
| return concrete |