| """Generate C code for a Python C extension module from Python source code.""" |
| |
| # FIXME: Basically nothing in this file operates on the level of a |
| # single module and it should be renamed. |
| |
| import os |
| import json |
| from mypy.ordered_dict import OrderedDict |
| from typing import List, Tuple, Dict, Iterable, Set, TypeVar, Optional |
| |
| from mypy.nodes import MypyFile |
| from mypy.build import ( |
| BuildSource, BuildResult, State, build, sorted_components, get_cache_names, |
| create_metastore, compute_hash, |
| ) |
| from mypy.errors import CompileError |
| from mypy.options import Options |
| from mypy.plugin import Plugin, ReportConfigContext |
| from mypy.fscache import FileSystemCache |
| from mypy.util import hash_digest |
| |
| from mypyc.irbuild.main import build_ir |
| from mypyc.irbuild.prepare import load_type_map |
| from mypyc.irbuild.mapper import Mapper |
| from mypyc.common import ( |
| PREFIX, TOP_LEVEL_NAME, INT_PREFIX, MODULE_PREFIX, RUNTIME_C_FILES, USE_FASTCALL, |
| USE_VECTORCALL, shared_lib_name, |
| ) |
| from mypyc.codegen.cstring import encode_as_c_string, encode_bytes_as_c_string |
| from mypyc.codegen.emit import EmitterContext, Emitter, HeaderDeclaration |
| from mypyc.codegen.emitfunc import generate_native_function, native_function_header |
| from mypyc.codegen.emitclass import generate_class_type_decl, generate_class |
| from mypyc.codegen.emitwrapper import ( |
| generate_wrapper_function, wrapper_function_header, |
| generate_legacy_wrapper_function, legacy_wrapper_function_header, |
| ) |
| from mypyc.ir.ops import LiteralsMap, DeserMaps |
| from mypyc.ir.rtypes import RType, RTuple |
| from mypyc.ir.func_ir import FuncIR |
| from mypyc.ir.class_ir import ClassIR |
| from mypyc.ir.module_ir import ModuleIR, ModuleIRs, deserialize_modules |
| from mypyc.options import CompilerOptions |
| from mypyc.transform.uninit import insert_uninit_checks |
| from mypyc.transform.refcount import insert_ref_count_opcodes |
| from mypyc.transform.exceptions import insert_exception_handling |
| from mypyc.namegen import NameGenerator, exported_name |
| from mypyc.errors import Errors |
| |
| |
| # All of the modules being compiled are divided into "groups". A group |
| # is a set of modules that are placed into the same shared library. |
| # Two common configurations are that every module is placed in a group |
| # by itself (fully separate compilation) and that every module is |
| # placed in the same group (fully whole-program compilation), but we |
| # support finer-grained control of the group as well. |
| # |
| # In fully whole-program compilation, we will generate N+1 extension |
| # modules: one shim per module and one shared library containing all |
| # the actual code. |
| # In fully separate compilation, we (unfortunately) will generate 2*N |
| # extension modules: one shim per module and also one library containing |
| # each module's actual code. (This might be fixable in the future, |
| # but allows a clean separation between setup of the export tables |
| # (see generate_export_table) and running module top levels.) |
| # |
| # A group is represented as a list of BuildSources containing all of |
| # its modules along with the name of the group. (Which can be None |
| # only if we are compiling only a single group with a single file in it |
| # and not using shared libraries). |
| Group = Tuple[List[BuildSource], Optional[str]] |
| Groups = List[Group] |
| |
| # A list of (file name, file contents) pairs. |
| FileContents = List[Tuple[str, str]] |
| |
| |
| class MarkedDeclaration: |
| """Add a mark, useful for topological sort.""" |
| def __init__(self, declaration: HeaderDeclaration, mark: bool) -> None: |
| self.declaration = declaration |
| self.mark = False |
| |
| |
| class MypycPlugin(Plugin): |
| """Plugin for making mypyc interoperate properly with mypy incremental mode. |
| |
| Basically the point of this plugin is to force mypy to recheck things |
| based on the demands of mypyc in a couple situations: |
| * Any modules in the same group must be compiled together, so we |
| tell mypy that modules depend on all their groupmates. |
| * If the IR metadata is missing or stale or any of the generated |
| C source files associated missing or stale, then we need to |
| recompile the module so we mark it as stale. |
| """ |
| |
| def __init__( |
| self, options: Options, compiler_options: CompilerOptions, groups: Groups) -> None: |
| super().__init__(options) |
| self.group_map = {} # type: Dict[str, Tuple[Optional[str], List[str]]] |
| for sources, name in groups: |
| modules = sorted(source.module for source in sources) |
| for id in modules: |
| self.group_map[id] = (name, modules) |
| |
| self.compiler_options = compiler_options |
| self.metastore = create_metastore(options) |
| |
| def report_config_data( |
| self, ctx: ReportConfigContext) -> Optional[Tuple[Optional[str], List[str]]]: |
| # The config data we report is the group map entry for the module. |
| # If the data is being used to check validity, we do additional checks |
| # that the IR cache exists and matches the metadata cache and all |
| # output source files exist and are up to date. |
| |
| id, path, is_check = ctx.id, ctx.path, ctx.is_check |
| |
| if id not in self.group_map: |
| return None |
| |
| # If we aren't doing validity checks, just return the cache data |
| if not is_check: |
| return self.group_map[id] |
| |
| # Load the metadata and IR cache |
| meta_path, _, _ = get_cache_names(id, path, self.options) |
| ir_path = get_ir_cache_name(id, path, self.options) |
| try: |
| meta_json = self.metastore.read(meta_path) |
| ir_json = self.metastore.read(ir_path) |
| except FileNotFoundError: |
| # This could happen if mypyc failed after mypy succeeded |
| # in the previous run or if some cache files got |
| # deleted. No big deal, just fail to load the cache. |
| return None |
| |
| ir_data = json.loads(ir_json) |
| |
| # Check that the IR cache matches the metadata cache |
| if compute_hash(meta_json) != ir_data['meta_hash']: |
| return None |
| |
| # Check that all of the source files are present and as |
| # expected. The main situation where this would come up is the |
| # user deleting the build directory without deleting |
| # .mypy_cache, which we should handle gracefully. |
| for path, hash in ir_data['src_hashes'].items(): |
| try: |
| with open(os.path.join(self.compiler_options.target_dir, path), 'rb') as f: |
| contents = f.read() |
| except FileNotFoundError: |
| return None |
| real_hash = hash_digest(contents) |
| if hash != real_hash: |
| return None |
| |
| return self.group_map[id] |
| |
| def get_additional_deps(self, file: MypyFile) -> List[Tuple[int, str, int]]: |
| # Report dependency on modules in the module's group |
| return [(10, id, -1) for id in self.group_map.get(file.fullname, (None, []))[1]] |
| |
| |
| def parse_and_typecheck( |
| sources: List[BuildSource], |
| options: Options, |
| compiler_options: CompilerOptions, |
| groups: Groups, |
| fscache: Optional[FileSystemCache] = None, |
| alt_lib_path: Optional[str] = None |
| ) -> BuildResult: |
| assert options.strict_optional, 'strict_optional must be turned on' |
| result = build(sources=sources, |
| options=options, |
| alt_lib_path=alt_lib_path, |
| fscache=fscache, |
| extra_plugins=[MypycPlugin(options, compiler_options, groups)]) |
| if result.errors: |
| raise CompileError(result.errors) |
| return result |
| |
| |
| def compile_scc_to_ir( |
| scc: List[MypyFile], |
| result: BuildResult, |
| mapper: Mapper, |
| compiler_options: CompilerOptions, |
| errors: Errors, |
| ) -> ModuleIRs: |
| """Compile an SCC into ModuleIRs. |
| |
| Any modules that this SCC depends on must have either compiled or |
| loaded from a cache into mapper. |
| |
| Arguments: |
| scc: The list of MypyFiles to compile |
| result: The BuildResult from the mypy front-end |
| mapper: The Mapper object mapping mypy ASTs to class and func IRs |
| compiler_options: The compilation options |
| errors: Where to report any errors encountered |
| |
| Returns the IR of the modules. |
| """ |
| |
| if compiler_options.verbose: |
| print("Compiling {}".format(", ".join(x.name for x in scc))) |
| |
| # Generate basic IR, with missing exception and refcount handling. |
| modules = build_ir( |
| scc, result.graph, result.types, mapper, compiler_options, errors |
| ) |
| if errors.num_errors > 0: |
| return modules |
| |
| # Insert uninit checks. |
| for module in modules.values(): |
| for fn in module.functions: |
| insert_uninit_checks(fn) |
| # Insert exception handling. |
| for module in modules.values(): |
| for fn in module.functions: |
| insert_exception_handling(fn) |
| # Insert refcount handling. |
| for module in modules.values(): |
| for fn in module.functions: |
| insert_ref_count_opcodes(fn) |
| |
| return modules |
| |
| |
| def compile_modules_to_ir( |
| result: BuildResult, |
| mapper: Mapper, |
| compiler_options: CompilerOptions, |
| errors: Errors, |
| ) -> ModuleIRs: |
| """Compile a collection of modules into ModuleIRs. |
| |
| The modules to compile are specified as part of mapper's group_map. |
| |
| Returns the IR of the modules. |
| """ |
| deser_ctx = DeserMaps({}, {}) |
| modules = {} |
| |
| # Process the graph by SCC in topological order, like we do in mypy.build |
| for scc in sorted_components(result.graph): |
| scc_states = [result.graph[id] for id in scc] |
| trees = [st.tree for st in scc_states if st.id in mapper.group_map and st.tree] |
| |
| if not trees: |
| continue |
| |
| fresh = all(id not in result.manager.rechecked_modules for id in scc) |
| if fresh: |
| load_scc_from_cache(trees, result, mapper, deser_ctx) |
| else: |
| scc_ir = compile_scc_to_ir(trees, result, mapper, compiler_options, errors) |
| modules.update(scc_ir) |
| |
| return modules |
| |
| |
| def compile_ir_to_c( |
| groups: Groups, |
| modules: ModuleIRs, |
| result: BuildResult, |
| mapper: Mapper, |
| compiler_options: CompilerOptions, |
| ) -> Dict[Optional[str], List[Tuple[str, str]]]: |
| """Compile a collection of ModuleIRs to C source text. |
| |
| Returns a dictionary mapping group names to a list of (file name, |
| file text) pairs. |
| """ |
| source_paths = {source.module: result.graph[source.module].xpath |
| for sources, _ in groups for source in sources} |
| |
| names = NameGenerator([[source.module for source in sources] for sources, _ in groups]) |
| |
| # Generate C code for each compilation group. Each group will be |
| # compiled into a separate extension module. |
| ctext = {} # type: Dict[Optional[str], List[Tuple[str, str]]] |
| for group_sources, group_name in groups: |
| group_modules = [(source.module, modules[source.module]) for source in group_sources |
| if source.module in modules] |
| if not group_modules: |
| ctext[group_name] = [] |
| continue |
| literals = mapper.literals[group_name] |
| generator = GroupGenerator( |
| literals, group_modules, source_paths, |
| group_name, mapper.group_map, names, |
| compiler_options |
| ) |
| ctext[group_name] = generator.generate_c_for_modules() |
| |
| return ctext |
| |
| |
| def get_ir_cache_name(id: str, path: str, options: Options) -> str: |
| meta_path, _, _ = get_cache_names(id, path, options) |
| return meta_path.replace('.meta.json', '.ir.json') |
| |
| |
| def get_state_ir_cache_name(state: State) -> str: |
| return get_ir_cache_name(state.id, state.xpath, state.options) |
| |
| |
| def write_cache( |
| modules: ModuleIRs, |
| result: BuildResult, |
| group_map: Dict[str, Optional[str]], |
| ctext: Dict[Optional[str], List[Tuple[str, str]]], |
| ) -> None: |
| """Write out the cache information for modules. |
| |
| Each module has the following cache information written (which is |
| in addition to the cache information written by mypy itself): |
| * A serialized version of its mypyc IR, minus the bodies of |
| functions. This allows code that depends on it to use |
| these serialized data structures when compiling against it |
| instead of needing to recompile it. (Compiling against a |
| module requires access to both its mypy and mypyc data |
| structures.) |
| * The hash of the mypy metadata cache file for the module. |
| This is used to ensure that the mypyc cache and the mypy |
| cache are in sync and refer to the same version of the code. |
| This is particularly important if mypyc crashes/errors/is |
| stopped after mypy has written its cache but before mypyc has. |
| * The hashes of all of the source file outputs for the group |
| the module is in. This is so that the module will be |
| recompiled if the source outputs are missing. |
| """ |
| |
| hashes = {} |
| for name, files in ctext.items(): |
| hashes[name] = {file: compute_hash(data) for file, data in files} |
| |
| # Write out cache data |
| for id, module in modules.items(): |
| st = result.graph[id] |
| |
| meta_path, _, _ = get_cache_names(id, st.xpath, result.manager.options) |
| # If the metadata isn't there, skip writing the cache. |
| try: |
| meta_data = result.manager.metastore.read(meta_path) |
| except IOError: |
| continue |
| |
| newpath = get_state_ir_cache_name(st) |
| ir_data = { |
| 'ir': module.serialize(), |
| 'meta_hash': compute_hash(meta_data), |
| 'src_hashes': hashes[group_map[id]], |
| } |
| |
| result.manager.metastore.write(newpath, json.dumps(ir_data)) |
| |
| result.manager.metastore.commit() |
| |
| |
| def load_scc_from_cache( |
| scc: List[MypyFile], |
| result: BuildResult, |
| mapper: Mapper, |
| ctx: DeserMaps, |
| ) -> ModuleIRs: |
| """Load IR for an SCC of modules from the cache. |
| |
| Arguments and return are as compile_scc_to_ir. |
| """ |
| cache_data = { |
| k.fullname: json.loads( |
| result.manager.metastore.read(get_state_ir_cache_name(result.graph[k.fullname])) |
| )['ir'] for k in scc |
| } |
| modules = deserialize_modules(cache_data, ctx) |
| load_type_map(mapper, scc, ctx) |
| return modules |
| |
| |
| def compile_modules_to_c( |
| result: BuildResult, |
| compiler_options: CompilerOptions, |
| errors: Errors, |
| groups: Groups, |
| ) -> Tuple[ModuleIRs, List[FileContents]]: |
| """Compile Python module(s) to the source of Python C extension modules. |
| |
| This generates the source code for the "shared library" module |
| for each group. The shim modules are generated in mypyc.build. |
| Each shared library module provides, for each module in its group, |
| a PyCapsule containing an initialization function. |
| Additionally, it provides a capsule containing an export table of |
| pointers to all of the group's functions and static variables. |
| |
| Arguments: |
| result: The BuildResult from the mypy front-end |
| compiler_options: The compilation options |
| errors: Where to report any errors encountered |
| groups: The groups that we are compiling. See documentation of Groups type above. |
| ops: Optionally, where to dump stringified ops for debugging. |
| |
| Returns the IR of the modules and a list containing the generated files for each group. |
| """ |
| # Construct a map from modules to what group they belong to |
| group_map = {source.module: lib_name for group, lib_name in groups for source in group} |
| mapper = Mapper(group_map) |
| |
| modules = compile_modules_to_ir(result, mapper, compiler_options, errors) |
| ctext = compile_ir_to_c(groups, modules, result, mapper, compiler_options) |
| |
| if errors.num_errors == 0: |
| write_cache(modules, result, group_map, ctext) |
| |
| return modules, [ctext[name] for _, name in groups] |
| |
| |
| def generate_function_declaration(fn: FuncIR, emitter: Emitter) -> None: |
| emitter.context.declarations[emitter.native_function_name(fn.decl)] = HeaderDeclaration( |
| '{};'.format(native_function_header(fn.decl, emitter)), |
| needs_export=True) |
| if fn.name != TOP_LEVEL_NAME: |
| if is_fastcall_supported(fn): |
| emitter.context.declarations[PREFIX + fn.cname(emitter.names)] = HeaderDeclaration( |
| '{};'.format(wrapper_function_header(fn, emitter.names))) |
| else: |
| emitter.context.declarations[PREFIX + fn.cname(emitter.names)] = HeaderDeclaration( |
| '{};'.format(legacy_wrapper_function_header(fn, emitter.names))) |
| |
| |
| def pointerize(decl: str, name: str) -> str: |
| """Given a C decl and its name, modify it to be a declaration to a pointer.""" |
| # This doesn't work in general but does work for all our types... |
| if '(' in decl: |
| # Function pointer. Stick an * in front of the name and wrap it in parens. |
| return decl.replace(name, '(*{})'.format(name)) |
| else: |
| # Non-function pointer. Just stick an * in front of the name. |
| return decl.replace(name, '*{}'.format(name)) |
| |
| |
| def group_dir(group_name: str) -> str: |
| """Given a group name, return the relative directory path for it. """ |
| return os.sep.join(group_name.split('.')[:-1]) |
| |
| |
| class GroupGenerator: |
| def __init__(self, |
| literals: LiteralsMap, |
| modules: List[Tuple[str, ModuleIR]], |
| source_paths: Dict[str, str], |
| group_name: Optional[str], |
| group_map: Dict[str, Optional[str]], |
| names: NameGenerator, |
| compiler_options: CompilerOptions) -> None: |
| """Generator for C source for a compilation group. |
| |
| The code for a compilation group contains an internal and an |
| external .h file, and then one .c if not in multi_file mode or |
| one .c file per module if in multi_file mode.) |
| |
| Arguments: |
| literals: The literals declared in this group |
| modules: (name, ir) pairs for each module in the group |
| source_paths: Map from module names to source file paths |
| group_name: The name of the group (or None if this is single-module compilation) |
| group_map: A map of modules to their group names |
| names: The name generator for the compilation |
| multi_file: Whether to put each module in its own source file regardless |
| of group structure. |
| """ |
| self.literals = literals |
| self.modules = modules |
| self.source_paths = source_paths |
| self.context = EmitterContext(names, group_name, group_map) |
| self.names = names |
| # Initializations of globals to simple values that we can't |
| # do statically because the windows loader is bad. |
| self.simple_inits = [] # type: List[Tuple[str, str]] |
| self.group_name = group_name |
| self.use_shared_lib = group_name is not None |
| self.compiler_options = compiler_options |
| self.multi_file = compiler_options.multi_file |
| |
| @property |
| def group_suffix(self) -> str: |
| return '_' + exported_name(self.group_name) if self.group_name else '' |
| |
| @property |
| def short_group_suffix(self) -> str: |
| return '_' + exported_name(self.group_name.split('.')[-1]) if self.group_name else '' |
| |
| def generate_c_for_modules(self) -> List[Tuple[str, str]]: |
| file_contents = [] |
| multi_file = self.use_shared_lib and self.multi_file |
| |
| base_emitter = Emitter(self.context) |
| # Optionally just include the runtime library c files to |
| # reduce the number of compiler invocations needed |
| if self.compiler_options.include_runtime_files: |
| for name in RUNTIME_C_FILES: |
| base_emitter.emit_line('#include "{}"'.format(name)) |
| base_emitter.emit_line('#include "__native{}.h"'.format(self.short_group_suffix)) |
| base_emitter.emit_line('#include "__native_internal{}.h"'.format(self.short_group_suffix)) |
| emitter = base_emitter |
| |
| for (_, literal), identifier in self.literals.items(): |
| if isinstance(literal, int): |
| symbol = emitter.static_name(identifier, None) |
| self.declare_global('CPyTagged ', symbol) |
| else: |
| self.declare_static_pyobject(identifier, emitter) |
| |
| for module_name, module in self.modules: |
| if multi_file: |
| emitter = Emitter(self.context) |
| emitter.emit_line('#include "__native{}.h"'.format(self.short_group_suffix)) |
| emitter.emit_line( |
| '#include "__native_internal{}.h"'.format(self.short_group_suffix)) |
| |
| self.declare_module(module_name, emitter) |
| self.declare_internal_globals(module_name, emitter) |
| self.declare_imports(module.imports, emitter) |
| |
| for cl in module.classes: |
| if cl.is_ext_class: |
| generate_class(cl, module_name, emitter) |
| |
| # Generate Python extension module definitions and module initialization functions. |
| self.generate_module_def(emitter, module_name, module) |
| |
| for fn in module.functions: |
| emitter.emit_line() |
| generate_native_function(fn, emitter, self.source_paths[module_name], module_name) |
| if fn.name != TOP_LEVEL_NAME: |
| emitter.emit_line() |
| if is_fastcall_supported(fn): |
| generate_wrapper_function( |
| fn, emitter, self.source_paths[module_name], module_name) |
| else: |
| generate_legacy_wrapper_function( |
| fn, emitter, self.source_paths[module_name], module_name) |
| if multi_file: |
| name = ('__native_{}.c'.format(emitter.names.private_name(module_name))) |
| file_contents.append((name, ''.join(emitter.fragments))) |
| |
| # The external header file contains type declarations while |
| # the internal contains declarations of functions and objects |
| # (which are shared between shared libraries via dynamic |
| # exports tables and not accessed directly.) |
| ext_declarations = Emitter(self.context) |
| ext_declarations.emit_line('#ifndef MYPYC_NATIVE{}_H'.format(self.group_suffix)) |
| ext_declarations.emit_line('#define MYPYC_NATIVE{}_H'.format(self.group_suffix)) |
| ext_declarations.emit_line('#include <Python.h>') |
| ext_declarations.emit_line('#include <CPy.h>') |
| |
| declarations = Emitter(self.context) |
| declarations.emit_line('#ifndef MYPYC_NATIVE_INTERNAL{}_H'.format(self.group_suffix)) |
| declarations.emit_line('#define MYPYC_NATIVE_INTERNAL{}_H'.format(self.group_suffix)) |
| declarations.emit_line('#include <Python.h>') |
| declarations.emit_line('#include <CPy.h>') |
| declarations.emit_line('#include "__native{}.h"'.format(self.short_group_suffix)) |
| declarations.emit_line() |
| declarations.emit_line('int CPyGlobalsInit(void);') |
| declarations.emit_line() |
| |
| for module_name, module in self.modules: |
| self.declare_finals(module_name, module.final_names, declarations) |
| for cl in module.classes: |
| generate_class_type_decl(cl, emitter, ext_declarations, declarations) |
| for fn in module.functions: |
| generate_function_declaration(fn, declarations) |
| |
| for lib in sorted(self.context.group_deps): |
| elib = exported_name(lib) |
| short_lib = exported_name(lib.split('.')[-1]) |
| declarations.emit_lines( |
| '#include <{}>'.format( |
| os.path.join(group_dir(lib), "__native_{}.h".format(short_lib)) |
| ), |
| 'struct export_table_{} exports_{};'.format(elib, elib) |
| ) |
| |
| sorted_decls = self.toposort_declarations() |
| |
| emitter = base_emitter |
| self.generate_globals_init(emitter) |
| |
| emitter.emit_line() |
| |
| for declaration in sorted_decls: |
| decls = ext_declarations if declaration.is_type else declarations |
| if not declaration.is_type: |
| decls.emit_lines( |
| 'extern {}'.format(declaration.decl[0]), *declaration.decl[1:]) |
| # If there is a definition, emit it. Otherwise repeat the declaration |
| # (without an extern). |
| if declaration.defn: |
| emitter.emit_lines(*declaration.defn) |
| else: |
| emitter.emit_lines(*declaration.decl) |
| else: |
| decls.emit_lines(*declaration.decl) |
| |
| if self.group_name: |
| self.generate_export_table(ext_declarations, emitter) |
| |
| self.generate_shared_lib_init(emitter) |
| |
| ext_declarations.emit_line('#endif') |
| declarations.emit_line('#endif') |
| |
| output_dir = group_dir(self.group_name) if self.group_name else '' |
| return file_contents + [ |
| (os.path.join(output_dir, '__native{}.c'.format(self.short_group_suffix)), |
| ''.join(emitter.fragments)), |
| (os.path.join(output_dir, '__native_internal{}.h'.format(self.short_group_suffix)), |
| ''.join(declarations.fragments)), |
| (os.path.join(output_dir, '__native{}.h'.format(self.short_group_suffix)), |
| ''.join(ext_declarations.fragments)), |
| ] |
| |
| def generate_export_table(self, decl_emitter: Emitter, code_emitter: Emitter) -> None: |
| """Generate the declaration and definition of the group's export struct. |
| |
| To avoid needing to deal with deeply platform specific issues |
| involving dynamic library linking (and some possibly |
| insurmountable issues involving cyclic dependencies), compiled |
| code accesses functions and data in other compilation groups |
| via an explicit "export struct". |
| |
| Each group declares a struct type that contains a pointer to |
| every function and static variable it exports. It then |
| populates this struct and stores a pointer to it in a capsule |
| stored as an attribute named 'exports' on the group's shared |
| library's python module. |
| |
| On load, a group's init function will import all of its |
| dependencies' exports tables using the capsule mechanism and |
| copy the contents into a local copy of the table (to eliminate |
| the need for a pointer indirection when accessing it). |
| |
| Then, all calls to functions in another group and accesses to statics |
| from another group are done indirectly via the export table. |
| |
| For example, a group containing a module b, where b contains a class B |
| and a function bar, would declare an export table like: |
| struct export_table_b { |
| PyTypeObject **CPyType_B; |
| PyObject *(*CPyDef_B)(CPyTagged cpy_r_x); |
| CPyTagged (*CPyDef_B___foo)(PyObject *cpy_r_self, CPyTagged cpy_r_y); |
| tuple_T2OI (*CPyDef_bar)(PyObject *cpy_r_x); |
| char (*CPyDef___top_level__)(void); |
| }; |
| that would be initialized with: |
| static struct export_table_b exports = { |
| &CPyType_B, |
| &CPyDef_B, |
| &CPyDef_B___foo, |
| &CPyDef_bar, |
| &CPyDef___top_level__, |
| }; |
| To call `b.foo`, then, a function in another group would do |
| `exports_b.CPyDef_bar(...)`. |
| """ |
| |
| decls = decl_emitter.context.declarations |
| |
| decl_emitter.emit_lines( |
| '', |
| 'struct export_table{} {{'.format(self.group_suffix), |
| ) |
| for name, decl in decls.items(): |
| if decl.needs_export: |
| decl_emitter.emit_line(pointerize('\n'.join(decl.decl), name)) |
| |
| decl_emitter.emit_line('};') |
| |
| code_emitter.emit_lines( |
| '', |
| 'static struct export_table{} exports = {{'.format(self.group_suffix), |
| ) |
| for name, decl in decls.items(): |
| if decl.needs_export: |
| code_emitter.emit_line('&{},'.format(name)) |
| |
| code_emitter.emit_line('};') |
| |
| def generate_shared_lib_init(self, emitter: Emitter) -> None: |
| """Generate the init function for a shared library. |
| |
| A shared library contains all of the actual code for a |
| compilation group. |
| |
| The init function is responsible for creating Capsules that |
| wrap pointers to the initialization function of all the real |
| init functions for modules in this shared library as well as |
| the export table containing all of the exported functions and |
| values from all the modules. |
| |
| These capsules are stored in attributes of the shared library. |
| """ |
| assert self.group_name is not None |
| |
| emitter.emit_line() |
| emitter.emit_lines( |
| 'PyMODINIT_FUNC PyInit_{}(void)'.format( |
| shared_lib_name(self.group_name).split('.')[-1]), |
| '{', |
| ('static PyModuleDef def = {{ PyModuleDef_HEAD_INIT, "{}", NULL, -1, NULL, NULL }};' |
| .format(shared_lib_name(self.group_name))), |
| 'int res;', |
| 'PyObject *capsule;', |
| 'PyObject *tmp;', |
| 'static PyObject *module;', |
| 'if (module) {', |
| 'Py_INCREF(module);', |
| 'return module;', |
| '}', |
| 'module = PyModule_Create(&def);', |
| 'if (!module) {', |
| 'goto fail;', |
| '}', |
| '', |
| ) |
| |
| emitter.emit_lines( |
| 'capsule = PyCapsule_New(&exports, "{}.exports", NULL);'.format( |
| shared_lib_name(self.group_name)), |
| 'if (!capsule) {', |
| 'goto fail;', |
| '}', |
| 'res = PyObject_SetAttrString(module, "exports", capsule);', |
| 'Py_DECREF(capsule);', |
| 'if (res < 0) {', |
| 'goto fail;', |
| '}', |
| '', |
| ) |
| |
| for mod, _ in self.modules: |
| name = exported_name(mod) |
| emitter.emit_lines( |
| 'extern PyObject *CPyInit_{}(void);'.format(name), |
| 'capsule = PyCapsule_New((void *)CPyInit_{}, "{}.init_{}", NULL);'.format( |
| name, shared_lib_name(self.group_name), name), |
| 'if (!capsule) {', |
| 'goto fail;', |
| '}', |
| 'res = PyObject_SetAttrString(module, "init_{}", capsule);'.format(name), |
| 'Py_DECREF(capsule);', |
| 'if (res < 0) {', |
| 'goto fail;', |
| '}', |
| '', |
| ) |
| |
| for group in sorted(self.context.group_deps): |
| egroup = exported_name(group) |
| emitter.emit_lines( |
| 'tmp = PyImport_ImportModule("{}"); if (!tmp) goto fail; Py_DECREF(tmp);'.format( |
| shared_lib_name(group)), |
| 'struct export_table_{} *pexports_{} = PyCapsule_Import("{}.exports", 0);'.format( |
| egroup, egroup, shared_lib_name(group)), |
| 'if (!pexports_{}) {{'.format(egroup), |
| 'goto fail;', |
| '}', |
| 'memcpy(&exports_{group}, pexports_{group}, sizeof(exports_{group}));'.format( |
| group=egroup), |
| '', |
| ) |
| |
| emitter.emit_lines( |
| 'return module;', |
| 'fail:', |
| 'Py_XDECREF(module);', |
| 'return NULL;', |
| '}', |
| ) |
| |
| def generate_globals_init(self, emitter: Emitter) -> None: |
| emitter.emit_lines( |
| '', |
| 'int CPyGlobalsInit(void)', |
| '{', |
| 'static int is_initialized = 0;', |
| 'if (is_initialized) return 0;', |
| '' |
| ) |
| |
| emitter.emit_line('CPy_Init();') |
| for symbol, fixup in self.simple_inits: |
| emitter.emit_line('{} = {};'.format(symbol, fixup)) |
| |
| for (_, literal), identifier in self.literals.items(): |
| symbol = emitter.static_name(identifier, None) |
| if isinstance(literal, int): |
| actual_symbol = symbol |
| symbol = INT_PREFIX + symbol |
| emitter.emit_line( |
| 'PyObject * {} = PyLong_FromString(\"{}\", NULL, 10);'.format( |
| symbol, str(literal)) |
| ) |
| elif isinstance(literal, float): |
| emitter.emit_line( |
| '{} = PyFloat_FromDouble({});'.format(symbol, str(literal)) |
| ) |
| elif isinstance(literal, complex): |
| emitter.emit_line( |
| '{} = PyComplex_FromDoubles({}, {});'.format( |
| symbol, str(literal.real), str(literal.imag)) |
| ) |
| elif isinstance(literal, str): |
| emitter.emit_line( |
| '{} = PyUnicode_FromStringAndSize({}, {});'.format( |
| symbol, *encode_as_c_string(literal)) |
| ) |
| elif isinstance(literal, bytes): |
| emitter.emit_line( |
| '{} = PyBytes_FromStringAndSize({}, {});'.format( |
| symbol, *encode_bytes_as_c_string(literal)) |
| ) |
| else: |
| assert False, ('Literals must be integers, floating point numbers, or strings,', |
| 'but the provided literal is of type {}'.format(type(literal))) |
| emitter.emit_lines('if (unlikely({} == NULL))'.format(symbol), |
| ' return -1;') |
| # Ints have an unboxed representation. |
| if isinstance(literal, int): |
| emitter.emit_line( |
| '{} = CPyTagged_FromObject({});'.format(actual_symbol, symbol) |
| ) |
| elif isinstance(literal, str): |
| emitter.emit_line('PyUnicode_InternInPlace(&{});'.format(symbol)) |
| |
| emitter.emit_lines( |
| 'is_initialized = 1;', |
| 'return 0;', |
| '}', |
| ) |
| |
| def generate_module_def(self, emitter: Emitter, module_name: str, module: ModuleIR) -> None: |
| """Emit the PyModuleDef struct for a module and the module init function.""" |
| # Emit module methods |
| module_prefix = emitter.names.private_name(module_name) |
| emitter.emit_line('static PyMethodDef {}module_methods[] = {{'.format(module_prefix)) |
| for fn in module.functions: |
| if fn.class_name is not None or fn.name == TOP_LEVEL_NAME: |
| continue |
| if is_fastcall_supported(fn): |
| flag = 'METH_FASTCALL' |
| else: |
| flag = 'METH_VARARGS' |
| emitter.emit_line( |
| ('{{"{name}", (PyCFunction){prefix}{cname}, {flag} | METH_KEYWORDS, ' |
| 'NULL /* docstring */}},').format( |
| name=fn.name, |
| cname=fn.cname(emitter.names), |
| prefix=PREFIX, |
| flag=flag)) |
| emitter.emit_line('{NULL, NULL, 0, NULL}') |
| emitter.emit_line('};') |
| emitter.emit_line() |
| |
| # Emit module definition struct |
| emitter.emit_lines('static struct PyModuleDef {}module = {{'.format(module_prefix), |
| 'PyModuleDef_HEAD_INIT,', |
| '"{}",'.format(module_name), |
| 'NULL, /* docstring */', |
| '-1, /* size of per-interpreter state of the module,', |
| ' or -1 if the module keeps state in global variables. */', |
| '{}module_methods'.format(module_prefix), |
| '};') |
| emitter.emit_line() |
| # Emit module init function. If we are compiling just one module, this |
| # will be the C API init function. If we are compiling 2+ modules, we |
| # generate a shared library for the modules and shims that call into |
| # the shared library, and in this case we use an internal module |
| # initialized function that will be called by the shim. |
| if not self.use_shared_lib: |
| declaration = 'PyMODINIT_FUNC PyInit_{}(void)'.format(module_name) |
| else: |
| declaration = 'PyObject *CPyInit_{}(void)'.format(exported_name(module_name)) |
| emitter.emit_lines(declaration, |
| '{') |
| # Store the module reference in a static and return it when necessary. |
| # This is separate from the *global* reference to the module that will |
| # be populated when it is imported by a compiled module. We want that |
| # reference to only be populated when the module has been successfully |
| # imported, whereas this we want to have to stop a circular import. |
| module_static = self.module_internal_static_name(module_name, emitter) |
| |
| emitter.emit_lines('if ({}) {{'.format(module_static), |
| 'Py_INCREF({});'.format(module_static), |
| 'return {};'.format(module_static), |
| '}') |
| |
| emitter.emit_lines('{} = PyModule_Create(&{}module);'.format(module_static, module_prefix), |
| 'if (unlikely({} == NULL))'.format(module_static), |
| ' return NULL;') |
| emitter.emit_line( |
| 'PyObject *modname = PyObject_GetAttrString((PyObject *){}, "__name__");'.format( |
| module_static)) |
| |
| module_globals = emitter.static_name('globals', module_name) |
| emitter.emit_lines('{} = PyModule_GetDict({});'.format(module_globals, module_static), |
| 'if (unlikely({} == NULL))'.format(module_globals), |
| ' return NULL;') |
| |
| # HACK: Manually instantiate generated classes here |
| for cl in module.classes: |
| if cl.is_generated: |
| type_struct = emitter.type_struct_name(cl) |
| emitter.emit_lines( |
| '{t} = (PyTypeObject *)CPyType_FromTemplate(' |
| '(PyObject *){t}_template, NULL, modname);' |
| .format(t=type_struct)) |
| emitter.emit_lines('if (unlikely(!{}))'.format(type_struct), |
| ' return NULL;') |
| |
| emitter.emit_lines('if (CPyGlobalsInit() < 0)', |
| ' return NULL;') |
| |
| self.generate_top_level_call(module, emitter) |
| |
| emitter.emit_lines('Py_DECREF(modname);') |
| |
| emitter.emit_line('return {};'.format(module_static)) |
| emitter.emit_line('}') |
| |
| def generate_top_level_call(self, module: ModuleIR, emitter: Emitter) -> None: |
| """Generate call to function representing module top level.""" |
| # Optimization: we tend to put the top level last, so reverse iterate |
| for fn in reversed(module.functions): |
| if fn.name == TOP_LEVEL_NAME: |
| emitter.emit_lines( |
| 'char result = {}();'.format(emitter.native_function_name(fn.decl)), |
| 'if (result == 2)', |
| ' return NULL;', |
| ) |
| break |
| |
| def toposort_declarations(self) -> List[HeaderDeclaration]: |
| """Topologically sort the declaration dict by dependencies. |
| |
| Declarations can require other declarations to come prior in C (such as declaring structs). |
| In order to guarantee that the C output will compile the declarations will thus need to |
| be properly ordered. This simple DFS guarantees that we have a proper ordering. |
| |
| This runs in O(V + E). |
| """ |
| result = [] |
| marked_declarations = OrderedDict() # type: Dict[str, MarkedDeclaration] |
| for k, v in self.context.declarations.items(): |
| marked_declarations[k] = MarkedDeclaration(v, False) |
| |
| def _toposort_visit(name: str) -> None: |
| decl = marked_declarations[name] |
| if decl.mark: |
| return |
| |
| for child in decl.declaration.dependencies: |
| _toposort_visit(child) |
| |
| result.append(decl.declaration) |
| decl.mark = True |
| |
| for name, marked_declaration in marked_declarations.items(): |
| _toposort_visit(name) |
| |
| return result |
| |
| def declare_global(self, type_spaced: str, name: str, |
| *, |
| initializer: Optional[str] = None) -> None: |
| if not initializer: |
| defn = None |
| else: |
| defn = ['{}{} = {};'.format(type_spaced, name, initializer)] |
| if name not in self.context.declarations: |
| self.context.declarations[name] = HeaderDeclaration( |
| '{}{};'.format(type_spaced, name), |
| defn=defn, |
| ) |
| |
| def declare_internal_globals(self, module_name: str, emitter: Emitter) -> None: |
| static_name = emitter.static_name('globals', module_name) |
| self.declare_global('PyObject *', static_name) |
| |
| def module_internal_static_name(self, module_name: str, emitter: Emitter) -> str: |
| return emitter.static_name(module_name + '_internal', None, prefix=MODULE_PREFIX) |
| |
| def declare_module(self, module_name: str, emitter: Emitter) -> None: |
| # We declare two globals for each module: |
| # one used internally in the implementation of module init to cache results |
| # and prevent infinite recursion in import cycles, and one used |
| # by other modules to refer to it. |
| internal_static_name = self.module_internal_static_name(module_name, emitter) |
| self.declare_global('CPyModule *', internal_static_name, initializer='NULL') |
| static_name = emitter.static_name(module_name, None, prefix=MODULE_PREFIX) |
| self.declare_global('CPyModule *', static_name) |
| self.simple_inits.append((static_name, 'Py_None')) |
| |
| def declare_imports(self, imps: Iterable[str], emitter: Emitter) -> None: |
| for imp in imps: |
| self.declare_module(imp, emitter) |
| |
| def declare_finals( |
| self, module: str, final_names: Iterable[Tuple[str, RType]], emitter: Emitter) -> None: |
| for name, typ in final_names: |
| static_name = emitter.static_name(name, module) |
| emitter.context.declarations[static_name] = HeaderDeclaration( |
| '{}{};'.format(emitter.ctype_spaced(typ), static_name), |
| [self.final_definition(module, name, typ, emitter)], |
| needs_export=True) |
| |
| def final_definition( |
| self, module: str, name: str, typ: RType, emitter: Emitter) -> str: |
| static_name = emitter.static_name(name, module) |
| # Here we rely on the fact that undefined value and error value are always the same |
| if isinstance(typ, RTuple): |
| # We need to inline because initializer must be static |
| undefined = '{{ {} }}'.format(''.join(emitter.tuple_undefined_value_helper(typ))) |
| else: |
| undefined = emitter.c_undefined_value(typ) |
| return '{}{} = {};'.format(emitter.ctype_spaced(typ), static_name, undefined) |
| |
| def declare_static_pyobject(self, identifier: str, emitter: Emitter) -> None: |
| symbol = emitter.static_name(identifier, None) |
| self.declare_global('PyObject *', symbol) |
| |
| |
| def sort_classes(classes: List[Tuple[str, ClassIR]]) -> List[Tuple[str, ClassIR]]: |
| mod_name = {ir: name for name, ir in classes} |
| irs = [ir for _, ir in classes] |
| deps = OrderedDict() # type: Dict[ClassIR, Set[ClassIR]] |
| for ir in irs: |
| if ir not in deps: |
| deps[ir] = set() |
| if ir.base: |
| deps[ir].add(ir.base) |
| deps[ir].update(ir.traits) |
| sorted_irs = toposort(deps) |
| return [(mod_name[ir], ir) for ir in sorted_irs] |
| |
| |
| T = TypeVar('T') |
| |
| |
| def toposort(deps: Dict[T, Set[T]]) -> List[T]: |
| """Topologically sort a dict from item to dependencies. |
| |
| This runs in O(V + E). |
| """ |
| result = [] |
| visited = set() # type: Set[T] |
| |
| def visit(item: T) -> None: |
| if item in visited: |
| return |
| |
| for child in deps[item]: |
| visit(child) |
| |
| result.append(item) |
| visited.add(item) |
| |
| for item in deps: |
| visit(item) |
| |
| return result |
| |
| |
| def is_fastcall_supported(fn: FuncIR) -> bool: |
| if fn.class_name is not None: |
| if fn.name == '__call__': |
| # We can use vectorcalls (PEP 590) when supported |
| return USE_VECTORCALL |
| # TODO: Support fastcall for __init__. |
| return USE_FASTCALL and fn.name != '__init__' |
| return USE_FASTCALL |