1152 lines
46 KiB
Python
1152 lines
46 KiB
Python
"""Generate C code for a Python C extension module from Python source code."""
|
|
|
|
# FIXME: Basically nothing in this file operates on the level of a
|
|
# single module and it should be renamed.
|
|
|
|
import os
|
|
import json
|
|
from mypy.backports import OrderedDict
|
|
from typing import List, Tuple, Dict, Iterable, Set, TypeVar, Optional
|
|
|
|
from mypy.nodes import MypyFile
|
|
from mypy.build import (
|
|
BuildSource, BuildResult, State, build, sorted_components, get_cache_names,
|
|
create_metastore, compute_hash,
|
|
)
|
|
from mypy.errors import CompileError
|
|
from mypy.options import Options
|
|
from mypy.plugin import Plugin, ReportConfigContext
|
|
from mypy.fscache import FileSystemCache
|
|
from mypy.util import hash_digest
|
|
|
|
from mypyc.irbuild.main import build_ir
|
|
from mypyc.irbuild.prepare import load_type_map
|
|
from mypyc.irbuild.mapper import Mapper
|
|
from mypyc.common import (
|
|
PREFIX, TOP_LEVEL_NAME, MODULE_PREFIX, RUNTIME_C_FILES, short_id_from_name, use_fastcall,
|
|
use_vectorcall, shared_lib_name,
|
|
)
|
|
from mypyc.codegen.cstring import c_string_initializer
|
|
from mypyc.codegen.literals import Literals
|
|
from mypyc.codegen.emit import EmitterContext, Emitter, HeaderDeclaration
|
|
from mypyc.codegen.emitfunc import generate_native_function, native_function_header
|
|
from mypyc.codegen.emitclass import generate_class_type_decl, generate_class
|
|
from mypyc.codegen.emitwrapper import (
|
|
generate_wrapper_function, wrapper_function_header,
|
|
generate_legacy_wrapper_function, legacy_wrapper_function_header,
|
|
)
|
|
from mypyc.ir.ops import DeserMaps, LoadLiteral
|
|
from mypyc.ir.rtypes import RType, RTuple
|
|
from mypyc.ir.func_ir import FuncIR
|
|
from mypyc.ir.class_ir import ClassIR
|
|
from mypyc.ir.module_ir import ModuleIR, ModuleIRs, deserialize_modules
|
|
from mypyc.options import CompilerOptions
|
|
from mypyc.transform.uninit import insert_uninit_checks
|
|
from mypyc.transform.refcount import insert_ref_count_opcodes
|
|
from mypyc.transform.exceptions import insert_exception_handling
|
|
from mypyc.namegen import NameGenerator, exported_name
|
|
from mypyc.errors import Errors
|
|
|
|
|
|
# All of the modules being compiled are divided into "groups". A group
|
|
# is a set of modules that are placed into the same shared library.
|
|
# Two common configurations are that every module is placed in a group
|
|
# by itself (fully separate compilation) and that every module is
|
|
# placed in the same group (fully whole-program compilation), but we
|
|
# support finer-grained control of the group as well.
|
|
#
|
|
# In fully whole-program compilation, we will generate N+1 extension
|
|
# modules: one shim per module and one shared library containing all
|
|
# the actual code.
|
|
# In fully separate compilation, we (unfortunately) will generate 2*N
|
|
# extension modules: one shim per module and also one library containing
|
|
# each module's actual code. (This might be fixable in the future,
|
|
# but allows a clean separation between setup of the export tables
|
|
# (see generate_export_table) and running module top levels.)
|
|
#
|
|
# A group is represented as a list of BuildSources containing all of
|
|
# its modules along with the name of the group. (Which can be None
|
|
# only if we are compiling only a single group with a single file in it
|
|
# and not using shared libraries).
|
|
Group = Tuple[List[BuildSource], Optional[str]]
|
|
Groups = List[Group]
|
|
|
|
# A list of (file name, file contents) pairs.
|
|
FileContents = List[Tuple[str, str]]
|
|
|
|
|
|
class MarkedDeclaration:
|
|
"""Add a mark, useful for topological sort."""
|
|
def __init__(self, declaration: HeaderDeclaration, mark: bool) -> None:
|
|
self.declaration = declaration
|
|
self.mark = False
|
|
|
|
|
|
class MypycPlugin(Plugin):
|
|
"""Plugin for making mypyc interoperate properly with mypy incremental mode.
|
|
|
|
Basically the point of this plugin is to force mypy to recheck things
|
|
based on the demands of mypyc in a couple situations:
|
|
* Any modules in the same group must be compiled together, so we
|
|
tell mypy that modules depend on all their groupmates.
|
|
* If the IR metadata is missing or stale or any of the generated
|
|
C source files associated missing or stale, then we need to
|
|
recompile the module so we mark it as stale.
|
|
"""
|
|
|
|
def __init__(
|
|
self, options: Options, compiler_options: CompilerOptions, groups: Groups) -> None:
|
|
super().__init__(options)
|
|
self.group_map: Dict[str, Tuple[Optional[str], List[str]]] = {}
|
|
for sources, name in groups:
|
|
modules = sorted(source.module for source in sources)
|
|
for id in modules:
|
|
self.group_map[id] = (name, modules)
|
|
|
|
self.compiler_options = compiler_options
|
|
self.metastore = create_metastore(options)
|
|
|
|
def report_config_data(
|
|
self, ctx: ReportConfigContext) -> Optional[Tuple[Optional[str], List[str]]]:
|
|
# The config data we report is the group map entry for the module.
|
|
# If the data is being used to check validity, we do additional checks
|
|
# that the IR cache exists and matches the metadata cache and all
|
|
# output source files exist and are up to date.
|
|
|
|
id, path, is_check = ctx.id, ctx.path, ctx.is_check
|
|
|
|
if id not in self.group_map:
|
|
return None
|
|
|
|
# If we aren't doing validity checks, just return the cache data
|
|
if not is_check:
|
|
return self.group_map[id]
|
|
|
|
# Load the metadata and IR cache
|
|
meta_path, _, _ = get_cache_names(id, path, self.options)
|
|
ir_path = get_ir_cache_name(id, path, self.options)
|
|
try:
|
|
meta_json = self.metastore.read(meta_path)
|
|
ir_json = self.metastore.read(ir_path)
|
|
except FileNotFoundError:
|
|
# This could happen if mypyc failed after mypy succeeded
|
|
# in the previous run or if some cache files got
|
|
# deleted. No big deal, just fail to load the cache.
|
|
return None
|
|
|
|
ir_data = json.loads(ir_json)
|
|
|
|
# Check that the IR cache matches the metadata cache
|
|
if compute_hash(meta_json) != ir_data['meta_hash']:
|
|
return None
|
|
|
|
# Check that all of the source files are present and as
|
|
# expected. The main situation where this would come up is the
|
|
# user deleting the build directory without deleting
|
|
# .mypy_cache, which we should handle gracefully.
|
|
for path, hash in ir_data['src_hashes'].items():
|
|
try:
|
|
with open(os.path.join(self.compiler_options.target_dir, path), 'rb') as f:
|
|
contents = f.read()
|
|
except FileNotFoundError:
|
|
return None
|
|
real_hash = hash_digest(contents)
|
|
if hash != real_hash:
|
|
return None
|
|
|
|
return self.group_map[id]
|
|
|
|
def get_additional_deps(self, file: MypyFile) -> List[Tuple[int, str, int]]:
|
|
# Report dependency on modules in the module's group
|
|
return [(10, id, -1) for id in self.group_map.get(file.fullname, (None, []))[1]]
|
|
|
|
|
|
def parse_and_typecheck(
|
|
sources: List[BuildSource],
|
|
options: Options,
|
|
compiler_options: CompilerOptions,
|
|
groups: Groups,
|
|
fscache: Optional[FileSystemCache] = None,
|
|
alt_lib_path: Optional[str] = None
|
|
) -> BuildResult:
|
|
assert options.strict_optional, 'strict_optional must be turned on'
|
|
result = build(sources=sources,
|
|
options=options,
|
|
alt_lib_path=alt_lib_path,
|
|
fscache=fscache,
|
|
extra_plugins=[MypycPlugin(options, compiler_options, groups)])
|
|
if result.errors:
|
|
raise CompileError(result.errors)
|
|
return result
|
|
|
|
|
|
def compile_scc_to_ir(
|
|
scc: List[MypyFile],
|
|
result: BuildResult,
|
|
mapper: Mapper,
|
|
compiler_options: CompilerOptions,
|
|
errors: Errors,
|
|
) -> ModuleIRs:
|
|
"""Compile an SCC into ModuleIRs.
|
|
|
|
Any modules that this SCC depends on must have either compiled or
|
|
loaded from a cache into mapper.
|
|
|
|
Arguments:
|
|
scc: The list of MypyFiles to compile
|
|
result: The BuildResult from the mypy front-end
|
|
mapper: The Mapper object mapping mypy ASTs to class and func IRs
|
|
compiler_options: The compilation options
|
|
errors: Where to report any errors encountered
|
|
|
|
Returns the IR of the modules.
|
|
"""
|
|
|
|
if compiler_options.verbose:
|
|
print("Compiling {}".format(", ".join(x.name for x in scc)))
|
|
|
|
# Generate basic IR, with missing exception and refcount handling.
|
|
modules = build_ir(
|
|
scc, result.graph, result.types, mapper, compiler_options, errors
|
|
)
|
|
if errors.num_errors > 0:
|
|
return modules
|
|
|
|
# Insert uninit checks.
|
|
for module in modules.values():
|
|
for fn in module.functions:
|
|
insert_uninit_checks(fn)
|
|
# Insert exception handling.
|
|
for module in modules.values():
|
|
for fn in module.functions:
|
|
insert_exception_handling(fn)
|
|
# Insert refcount handling.
|
|
for module in modules.values():
|
|
for fn in module.functions:
|
|
insert_ref_count_opcodes(fn)
|
|
|
|
return modules
|
|
|
|
|
|
def compile_modules_to_ir(
|
|
result: BuildResult,
|
|
mapper: Mapper,
|
|
compiler_options: CompilerOptions,
|
|
errors: Errors,
|
|
) -> ModuleIRs:
|
|
"""Compile a collection of modules into ModuleIRs.
|
|
|
|
The modules to compile are specified as part of mapper's group_map.
|
|
|
|
Returns the IR of the modules.
|
|
"""
|
|
deser_ctx = DeserMaps({}, {})
|
|
modules = {}
|
|
|
|
# Process the graph by SCC in topological order, like we do in mypy.build
|
|
for scc in sorted_components(result.graph):
|
|
scc_states = [result.graph[id] for id in scc]
|
|
trees = [st.tree for st in scc_states if st.id in mapper.group_map and st.tree]
|
|
|
|
if not trees:
|
|
continue
|
|
|
|
fresh = all(id not in result.manager.rechecked_modules for id in scc)
|
|
if fresh:
|
|
load_scc_from_cache(trees, result, mapper, deser_ctx)
|
|
else:
|
|
scc_ir = compile_scc_to_ir(trees, result, mapper, compiler_options, errors)
|
|
modules.update(scc_ir)
|
|
|
|
return modules
|
|
|
|
|
|
def compile_ir_to_c(
|
|
groups: Groups,
|
|
modules: ModuleIRs,
|
|
result: BuildResult,
|
|
mapper: Mapper,
|
|
compiler_options: CompilerOptions,
|
|
) -> Dict[Optional[str], List[Tuple[str, str]]]:
|
|
"""Compile a collection of ModuleIRs to C source text.
|
|
|
|
Returns a dictionary mapping group names to a list of (file name,
|
|
file text) pairs.
|
|
"""
|
|
source_paths = {source.module: result.graph[source.module].xpath
|
|
for sources, _ in groups for source in sources}
|
|
|
|
names = NameGenerator([[source.module for source in sources] for sources, _ in groups])
|
|
|
|
# Generate C code for each compilation group. Each group will be
|
|
# compiled into a separate extension module.
|
|
ctext: Dict[Optional[str], List[Tuple[str, str]]] = {}
|
|
for group_sources, group_name in groups:
|
|
group_modules = [(source.module, modules[source.module]) for source in group_sources
|
|
if source.module in modules]
|
|
if not group_modules:
|
|
ctext[group_name] = []
|
|
continue
|
|
generator = GroupGenerator(
|
|
group_modules, source_paths,
|
|
group_name, mapper.group_map, names,
|
|
compiler_options
|
|
)
|
|
ctext[group_name] = generator.generate_c_for_modules()
|
|
|
|
return ctext
|
|
|
|
|
|
def get_ir_cache_name(id: str, path: str, options: Options) -> str:
|
|
meta_path, _, _ = get_cache_names(id, path, options)
|
|
return meta_path.replace('.meta.json', '.ir.json')
|
|
|
|
|
|
def get_state_ir_cache_name(state: State) -> str:
|
|
return get_ir_cache_name(state.id, state.xpath, state.options)
|
|
|
|
|
|
def write_cache(
|
|
modules: ModuleIRs,
|
|
result: BuildResult,
|
|
group_map: Dict[str, Optional[str]],
|
|
ctext: Dict[Optional[str], List[Tuple[str, str]]],
|
|
) -> None:
|
|
"""Write out the cache information for modules.
|
|
|
|
Each module has the following cache information written (which is
|
|
in addition to the cache information written by mypy itself):
|
|
* A serialized version of its mypyc IR, minus the bodies of
|
|
functions. This allows code that depends on it to use
|
|
these serialized data structures when compiling against it
|
|
instead of needing to recompile it. (Compiling against a
|
|
module requires access to both its mypy and mypyc data
|
|
structures.)
|
|
* The hash of the mypy metadata cache file for the module.
|
|
This is used to ensure that the mypyc cache and the mypy
|
|
cache are in sync and refer to the same version of the code.
|
|
This is particularly important if mypyc crashes/errors/is
|
|
stopped after mypy has written its cache but before mypyc has.
|
|
* The hashes of all of the source file outputs for the group
|
|
the module is in. This is so that the module will be
|
|
recompiled if the source outputs are missing.
|
|
"""
|
|
|
|
hashes = {}
|
|
for name, files in ctext.items():
|
|
hashes[name] = {file: compute_hash(data) for file, data in files}
|
|
|
|
# Write out cache data
|
|
for id, module in modules.items():
|
|
st = result.graph[id]
|
|
|
|
meta_path, _, _ = get_cache_names(id, st.xpath, result.manager.options)
|
|
# If the metadata isn't there, skip writing the cache.
|
|
try:
|
|
meta_data = result.manager.metastore.read(meta_path)
|
|
except OSError:
|
|
continue
|
|
|
|
newpath = get_state_ir_cache_name(st)
|
|
ir_data = {
|
|
'ir': module.serialize(),
|
|
'meta_hash': compute_hash(meta_data),
|
|
'src_hashes': hashes[group_map[id]],
|
|
}
|
|
|
|
result.manager.metastore.write(newpath, json.dumps(ir_data))
|
|
|
|
result.manager.metastore.commit()
|
|
|
|
|
|
def load_scc_from_cache(
|
|
scc: List[MypyFile],
|
|
result: BuildResult,
|
|
mapper: Mapper,
|
|
ctx: DeserMaps,
|
|
) -> ModuleIRs:
|
|
"""Load IR for an SCC of modules from the cache.
|
|
|
|
Arguments and return are as compile_scc_to_ir.
|
|
"""
|
|
cache_data = {
|
|
k.fullname: json.loads(
|
|
result.manager.metastore.read(get_state_ir_cache_name(result.graph[k.fullname]))
|
|
)['ir'] for k in scc
|
|
}
|
|
modules = deserialize_modules(cache_data, ctx)
|
|
load_type_map(mapper, scc, ctx)
|
|
return modules
|
|
|
|
|
|
def compile_modules_to_c(
|
|
result: BuildResult,
|
|
compiler_options: CompilerOptions,
|
|
errors: Errors,
|
|
groups: Groups,
|
|
) -> Tuple[ModuleIRs, List[FileContents]]:
|
|
"""Compile Python module(s) to the source of Python C extension modules.
|
|
|
|
This generates the source code for the "shared library" module
|
|
for each group. The shim modules are generated in mypyc.build.
|
|
Each shared library module provides, for each module in its group,
|
|
a PyCapsule containing an initialization function.
|
|
Additionally, it provides a capsule containing an export table of
|
|
pointers to all of the group's functions and static variables.
|
|
|
|
Arguments:
|
|
result: The BuildResult from the mypy front-end
|
|
compiler_options: The compilation options
|
|
errors: Where to report any errors encountered
|
|
groups: The groups that we are compiling. See documentation of Groups type above.
|
|
ops: Optionally, where to dump stringified ops for debugging.
|
|
|
|
Returns the IR of the modules and a list containing the generated files for each group.
|
|
"""
|
|
# Construct a map from modules to what group they belong to
|
|
group_map = {source.module: lib_name for group, lib_name in groups for source in group}
|
|
mapper = Mapper(group_map)
|
|
|
|
# Sometimes when we call back into mypy, there might be errors.
|
|
# We don't want to crash when that happens.
|
|
result.manager.errors.set_file('<mypyc>', module=None, scope=None)
|
|
|
|
modules = compile_modules_to_ir(result, mapper, compiler_options, errors)
|
|
ctext = compile_ir_to_c(groups, modules, result, mapper, compiler_options)
|
|
|
|
if errors.num_errors == 0:
|
|
write_cache(modules, result, group_map, ctext)
|
|
|
|
return modules, [ctext[name] for _, name in groups]
|
|
|
|
|
|
def generate_function_declaration(fn: FuncIR, emitter: Emitter) -> None:
|
|
emitter.context.declarations[emitter.native_function_name(fn.decl)] = HeaderDeclaration(
|
|
f'{native_function_header(fn.decl, emitter)};',
|
|
needs_export=True)
|
|
if fn.name != TOP_LEVEL_NAME:
|
|
if is_fastcall_supported(fn, emitter.capi_version):
|
|
emitter.context.declarations[PREFIX + fn.cname(emitter.names)] = HeaderDeclaration(
|
|
f'{wrapper_function_header(fn, emitter.names)};')
|
|
else:
|
|
emitter.context.declarations[PREFIX + fn.cname(emitter.names)] = HeaderDeclaration(
|
|
f'{legacy_wrapper_function_header(fn, emitter.names)};')
|
|
|
|
|
|
def pointerize(decl: str, name: str) -> str:
|
|
"""Given a C decl and its name, modify it to be a declaration to a pointer."""
|
|
# This doesn't work in general but does work for all our types...
|
|
if '(' in decl:
|
|
# Function pointer. Stick an * in front of the name and wrap it in parens.
|
|
return decl.replace(name, f'(*{name})')
|
|
else:
|
|
# Non-function pointer. Just stick an * in front of the name.
|
|
return decl.replace(name, f'*{name}')
|
|
|
|
|
|
def group_dir(group_name: str) -> str:
|
|
"""Given a group name, return the relative directory path for it. """
|
|
return os.sep.join(group_name.split('.')[:-1])
|
|
|
|
|
|
class GroupGenerator:
|
|
def __init__(self,
|
|
modules: List[Tuple[str, ModuleIR]],
|
|
source_paths: Dict[str, str],
|
|
group_name: Optional[str],
|
|
group_map: Dict[str, Optional[str]],
|
|
names: NameGenerator,
|
|
compiler_options: CompilerOptions) -> None:
|
|
"""Generator for C source for a compilation group.
|
|
|
|
The code for a compilation group contains an internal and an
|
|
external .h file, and then one .c if not in multi_file mode or
|
|
one .c file per module if in multi_file mode.)
|
|
|
|
Arguments:
|
|
modules: (name, ir) pairs for each module in the group
|
|
source_paths: Map from module names to source file paths
|
|
group_name: The name of the group (or None if this is single-module compilation)
|
|
group_map: A map of modules to their group names
|
|
names: The name generator for the compilation
|
|
multi_file: Whether to put each module in its own source file regardless
|
|
of group structure.
|
|
"""
|
|
self.modules = modules
|
|
self.source_paths = source_paths
|
|
self.context = EmitterContext(names, group_name, group_map)
|
|
self.names = names
|
|
# Initializations of globals to simple values that we can't
|
|
# do statically because the windows loader is bad.
|
|
self.simple_inits: List[Tuple[str, str]] = []
|
|
self.group_name = group_name
|
|
self.use_shared_lib = group_name is not None
|
|
self.compiler_options = compiler_options
|
|
self.multi_file = compiler_options.multi_file
|
|
|
|
@property
|
|
def group_suffix(self) -> str:
|
|
return '_' + exported_name(self.group_name) if self.group_name else ''
|
|
|
|
@property
|
|
def short_group_suffix(self) -> str:
|
|
return '_' + exported_name(self.group_name.split('.')[-1]) if self.group_name else ''
|
|
|
|
def generate_c_for_modules(self) -> List[Tuple[str, str]]:
|
|
file_contents = []
|
|
multi_file = self.use_shared_lib and self.multi_file
|
|
|
|
# Collect all literal refs in IR.
|
|
for _, module in self.modules:
|
|
for fn in module.functions:
|
|
collect_literals(fn, self.context.literals)
|
|
|
|
base_emitter = Emitter(self.context)
|
|
# Optionally just include the runtime library c files to
|
|
# reduce the number of compiler invocations needed
|
|
if self.compiler_options.include_runtime_files:
|
|
for name in RUNTIME_C_FILES:
|
|
base_emitter.emit_line(f'#include "{name}"')
|
|
base_emitter.emit_line(f'#include "__native{self.short_group_suffix}.h"')
|
|
base_emitter.emit_line(f'#include "__native_internal{self.short_group_suffix}.h"')
|
|
emitter = base_emitter
|
|
|
|
self.generate_literal_tables()
|
|
|
|
for module_name, module in self.modules:
|
|
if multi_file:
|
|
emitter = Emitter(self.context)
|
|
emitter.emit_line(f'#include "__native{self.short_group_suffix}.h"')
|
|
emitter.emit_line(
|
|
f'#include "__native_internal{self.short_group_suffix}.h"')
|
|
|
|
self.declare_module(module_name, emitter)
|
|
self.declare_internal_globals(module_name, emitter)
|
|
self.declare_imports(module.imports, emitter)
|
|
|
|
for cl in module.classes:
|
|
if cl.is_ext_class:
|
|
generate_class(cl, module_name, emitter)
|
|
|
|
# Generate Python extension module definitions and module initialization functions.
|
|
self.generate_module_def(emitter, module_name, module)
|
|
|
|
for fn in module.functions:
|
|
emitter.emit_line()
|
|
generate_native_function(fn, emitter, self.source_paths[module_name], module_name)
|
|
if fn.name != TOP_LEVEL_NAME:
|
|
emitter.emit_line()
|
|
if is_fastcall_supported(fn, emitter.capi_version):
|
|
generate_wrapper_function(
|
|
fn, emitter, self.source_paths[module_name], module_name)
|
|
else:
|
|
generate_legacy_wrapper_function(
|
|
fn, emitter, self.source_paths[module_name], module_name)
|
|
if multi_file:
|
|
name = (f'__native_{emitter.names.private_name(module_name)}.c')
|
|
file_contents.append((name, ''.join(emitter.fragments)))
|
|
|
|
# The external header file contains type declarations while
|
|
# the internal contains declarations of functions and objects
|
|
# (which are shared between shared libraries via dynamic
|
|
# exports tables and not accessed directly.)
|
|
ext_declarations = Emitter(self.context)
|
|
ext_declarations.emit_line(f'#ifndef MYPYC_NATIVE{self.group_suffix}_H')
|
|
ext_declarations.emit_line(f'#define MYPYC_NATIVE{self.group_suffix}_H')
|
|
ext_declarations.emit_line('#include <Python.h>')
|
|
ext_declarations.emit_line('#include <CPy.h>')
|
|
|
|
declarations = Emitter(self.context)
|
|
declarations.emit_line(f'#ifndef MYPYC_NATIVE_INTERNAL{self.group_suffix}_H')
|
|
declarations.emit_line(f'#define MYPYC_NATIVE_INTERNAL{self.group_suffix}_H')
|
|
declarations.emit_line('#include <Python.h>')
|
|
declarations.emit_line('#include <CPy.h>')
|
|
declarations.emit_line(f'#include "__native{self.short_group_suffix}.h"')
|
|
declarations.emit_line()
|
|
declarations.emit_line('int CPyGlobalsInit(void);')
|
|
declarations.emit_line()
|
|
|
|
for module_name, module in self.modules:
|
|
self.declare_finals(module_name, module.final_names, declarations)
|
|
for cl in module.classes:
|
|
generate_class_type_decl(cl, emitter, ext_declarations, declarations)
|
|
for fn in module.functions:
|
|
generate_function_declaration(fn, declarations)
|
|
|
|
for lib in sorted(self.context.group_deps):
|
|
elib = exported_name(lib)
|
|
short_lib = exported_name(lib.split('.')[-1])
|
|
declarations.emit_lines(
|
|
'#include <{}>'.format(
|
|
os.path.join(group_dir(lib), f"__native_{short_lib}.h")
|
|
),
|
|
f'struct export_table_{elib} exports_{elib};'
|
|
)
|
|
|
|
sorted_decls = self.toposort_declarations()
|
|
|
|
emitter = base_emitter
|
|
self.generate_globals_init(emitter)
|
|
|
|
emitter.emit_line()
|
|
|
|
for declaration in sorted_decls:
|
|
decls = ext_declarations if declaration.is_type else declarations
|
|
if not declaration.is_type:
|
|
decls.emit_lines(
|
|
f'extern {declaration.decl[0]}', *declaration.decl[1:])
|
|
# If there is a definition, emit it. Otherwise repeat the declaration
|
|
# (without an extern).
|
|
if declaration.defn:
|
|
emitter.emit_lines(*declaration.defn)
|
|
else:
|
|
emitter.emit_lines(*declaration.decl)
|
|
else:
|
|
decls.emit_lines(*declaration.decl)
|
|
|
|
if self.group_name:
|
|
self.generate_export_table(ext_declarations, emitter)
|
|
|
|
self.generate_shared_lib_init(emitter)
|
|
|
|
ext_declarations.emit_line('#endif')
|
|
declarations.emit_line('#endif')
|
|
|
|
output_dir = group_dir(self.group_name) if self.group_name else ''
|
|
return file_contents + [
|
|
(os.path.join(output_dir, f'__native{self.short_group_suffix}.c'),
|
|
''.join(emitter.fragments)),
|
|
(os.path.join(output_dir, f'__native_internal{self.short_group_suffix}.h'),
|
|
''.join(declarations.fragments)),
|
|
(os.path.join(output_dir, f'__native{self.short_group_suffix}.h'),
|
|
''.join(ext_declarations.fragments)),
|
|
]
|
|
|
|
def generate_literal_tables(self) -> None:
|
|
"""Generate tables containing descriptions of Python literals to construct.
|
|
|
|
We will store the constructed literals in a single array that contains
|
|
literals of all types. This way we can refer to an arbitrary literal by
|
|
its index.
|
|
"""
|
|
literals = self.context.literals
|
|
# During module initialization we store all the constructed objects here
|
|
self.declare_global('PyObject *[%d]' % literals.num_literals(), 'CPyStatics')
|
|
# Descriptions of str literals
|
|
init_str = c_string_array_initializer(literals.encoded_str_values())
|
|
self.declare_global('const char * const []', 'CPyLit_Str', initializer=init_str)
|
|
# Descriptions of bytes literals
|
|
init_bytes = c_string_array_initializer(literals.encoded_bytes_values())
|
|
self.declare_global('const char * const []', 'CPyLit_Bytes', initializer=init_bytes)
|
|
# Descriptions of int literals
|
|
init_int = c_string_array_initializer(literals.encoded_int_values())
|
|
self.declare_global('const char * const []', 'CPyLit_Int', initializer=init_int)
|
|
# Descriptions of float literals
|
|
init_floats = c_array_initializer(literals.encoded_float_values())
|
|
self.declare_global('const double []', 'CPyLit_Float', initializer=init_floats)
|
|
# Descriptions of complex literals
|
|
init_complex = c_array_initializer(literals.encoded_complex_values())
|
|
self.declare_global('const double []', 'CPyLit_Complex', initializer=init_complex)
|
|
# Descriptions of tuple literals
|
|
init_tuple = c_array_initializer(literals.encoded_tuple_values())
|
|
self.declare_global('const int []', 'CPyLit_Tuple', initializer=init_tuple)
|
|
|
|
def generate_export_table(self, decl_emitter: Emitter, code_emitter: Emitter) -> None:
|
|
"""Generate the declaration and definition of the group's export struct.
|
|
|
|
To avoid needing to deal with deeply platform specific issues
|
|
involving dynamic library linking (and some possibly
|
|
insurmountable issues involving cyclic dependencies), compiled
|
|
code accesses functions and data in other compilation groups
|
|
via an explicit "export struct".
|
|
|
|
Each group declares a struct type that contains a pointer to
|
|
every function and static variable it exports. It then
|
|
populates this struct and stores a pointer to it in a capsule
|
|
stored as an attribute named 'exports' on the group's shared
|
|
library's python module.
|
|
|
|
On load, a group's init function will import all of its
|
|
dependencies' exports tables using the capsule mechanism and
|
|
copy the contents into a local copy of the table (to eliminate
|
|
the need for a pointer indirection when accessing it).
|
|
|
|
Then, all calls to functions in another group and accesses to statics
|
|
from another group are done indirectly via the export table.
|
|
|
|
For example, a group containing a module b, where b contains a class B
|
|
and a function bar, would declare an export table like:
|
|
struct export_table_b {
|
|
PyTypeObject **CPyType_B;
|
|
PyObject *(*CPyDef_B)(CPyTagged cpy_r_x);
|
|
CPyTagged (*CPyDef_B___foo)(PyObject *cpy_r_self, CPyTagged cpy_r_y);
|
|
tuple_T2OI (*CPyDef_bar)(PyObject *cpy_r_x);
|
|
char (*CPyDef___top_level__)(void);
|
|
};
|
|
that would be initialized with:
|
|
static struct export_table_b exports = {
|
|
&CPyType_B,
|
|
&CPyDef_B,
|
|
&CPyDef_B___foo,
|
|
&CPyDef_bar,
|
|
&CPyDef___top_level__,
|
|
};
|
|
To call `b.foo`, then, a function in another group would do
|
|
`exports_b.CPyDef_bar(...)`.
|
|
"""
|
|
|
|
decls = decl_emitter.context.declarations
|
|
|
|
decl_emitter.emit_lines(
|
|
'',
|
|
f'struct export_table{self.group_suffix} {{',
|
|
)
|
|
for name, decl in decls.items():
|
|
if decl.needs_export:
|
|
decl_emitter.emit_line(pointerize('\n'.join(decl.decl), name))
|
|
|
|
decl_emitter.emit_line('};')
|
|
|
|
code_emitter.emit_lines(
|
|
'',
|
|
f'static struct export_table{self.group_suffix} exports = {{',
|
|
)
|
|
for name, decl in decls.items():
|
|
if decl.needs_export:
|
|
code_emitter.emit_line(f'&{name},')
|
|
|
|
code_emitter.emit_line('};')
|
|
|
|
def generate_shared_lib_init(self, emitter: Emitter) -> None:
|
|
"""Generate the init function for a shared library.
|
|
|
|
A shared library contains all of the actual code for a
|
|
compilation group.
|
|
|
|
The init function is responsible for creating Capsules that
|
|
wrap pointers to the initialization function of all the real
|
|
init functions for modules in this shared library as well as
|
|
the export table containing all of the exported functions and
|
|
values from all the modules.
|
|
|
|
These capsules are stored in attributes of the shared library.
|
|
"""
|
|
assert self.group_name is not None
|
|
|
|
emitter.emit_line()
|
|
emitter.emit_lines(
|
|
'PyMODINIT_FUNC PyInit_{}(void)'.format(
|
|
shared_lib_name(self.group_name).split('.')[-1]),
|
|
'{',
|
|
('static PyModuleDef def = {{ PyModuleDef_HEAD_INIT, "{}", NULL, -1, NULL, NULL }};'
|
|
.format(shared_lib_name(self.group_name))),
|
|
'int res;',
|
|
'PyObject *capsule;',
|
|
'PyObject *tmp;',
|
|
'static PyObject *module;',
|
|
'if (module) {',
|
|
'Py_INCREF(module);',
|
|
'return module;',
|
|
'}',
|
|
'module = PyModule_Create(&def);',
|
|
'if (!module) {',
|
|
'goto fail;',
|
|
'}',
|
|
'',
|
|
)
|
|
|
|
emitter.emit_lines(
|
|
'capsule = PyCapsule_New(&exports, "{}.exports", NULL);'.format(
|
|
shared_lib_name(self.group_name)),
|
|
'if (!capsule) {',
|
|
'goto fail;',
|
|
'}',
|
|
'res = PyObject_SetAttrString(module, "exports", capsule);',
|
|
'Py_DECREF(capsule);',
|
|
'if (res < 0) {',
|
|
'goto fail;',
|
|
'}',
|
|
'',
|
|
)
|
|
|
|
for mod, _ in self.modules:
|
|
name = exported_name(mod)
|
|
emitter.emit_lines(
|
|
f'extern PyObject *CPyInit_{name}(void);',
|
|
'capsule = PyCapsule_New((void *)CPyInit_{}, "{}.init_{}", NULL);'.format(
|
|
name, shared_lib_name(self.group_name), name),
|
|
'if (!capsule) {',
|
|
'goto fail;',
|
|
'}',
|
|
f'res = PyObject_SetAttrString(module, "init_{name}", capsule);',
|
|
'Py_DECREF(capsule);',
|
|
'if (res < 0) {',
|
|
'goto fail;',
|
|
'}',
|
|
'',
|
|
)
|
|
|
|
for group in sorted(self.context.group_deps):
|
|
egroup = exported_name(group)
|
|
emitter.emit_lines(
|
|
'tmp = PyImport_ImportModule("{}"); if (!tmp) goto fail; Py_DECREF(tmp);'.format(
|
|
shared_lib_name(group)),
|
|
'struct export_table_{} *pexports_{} = PyCapsule_Import("{}.exports", 0);'.format(
|
|
egroup, egroup, shared_lib_name(group)),
|
|
f'if (!pexports_{egroup}) {{',
|
|
'goto fail;',
|
|
'}',
|
|
'memcpy(&exports_{group}, pexports_{group}, sizeof(exports_{group}));'.format(
|
|
group=egroup),
|
|
'',
|
|
)
|
|
|
|
emitter.emit_lines(
|
|
'return module;',
|
|
'fail:',
|
|
'Py_XDECREF(module);',
|
|
'return NULL;',
|
|
'}',
|
|
)
|
|
|
|
def generate_globals_init(self, emitter: Emitter) -> None:
|
|
emitter.emit_lines(
|
|
'',
|
|
'int CPyGlobalsInit(void)',
|
|
'{',
|
|
'static int is_initialized = 0;',
|
|
'if (is_initialized) return 0;',
|
|
''
|
|
)
|
|
|
|
emitter.emit_line('CPy_Init();')
|
|
for symbol, fixup in self.simple_inits:
|
|
emitter.emit_line(f'{symbol} = {fixup};')
|
|
|
|
values = 'CPyLit_Str, CPyLit_Bytes, CPyLit_Int, CPyLit_Float, CPyLit_Complex, CPyLit_Tuple'
|
|
emitter.emit_lines(f'if (CPyStatics_Initialize(CPyStatics, {values}) < 0) {{',
|
|
'return -1;',
|
|
'}')
|
|
|
|
emitter.emit_lines(
|
|
'is_initialized = 1;',
|
|
'return 0;',
|
|
'}',
|
|
)
|
|
|
|
def generate_module_def(self, emitter: Emitter, module_name: str, module: ModuleIR) -> None:
|
|
"""Emit the PyModuleDef struct for a module and the module init function."""
|
|
# Emit module methods
|
|
module_prefix = emitter.names.private_name(module_name)
|
|
emitter.emit_line(f'static PyMethodDef {module_prefix}module_methods[] = {{')
|
|
for fn in module.functions:
|
|
if fn.class_name is not None or fn.name == TOP_LEVEL_NAME:
|
|
continue
|
|
name = short_id_from_name(fn.name, fn.decl.shortname, fn.line)
|
|
if is_fastcall_supported(fn, emitter.capi_version):
|
|
flag = 'METH_FASTCALL'
|
|
else:
|
|
flag = 'METH_VARARGS'
|
|
emitter.emit_line(
|
|
('{{"{name}", (PyCFunction){prefix}{cname}, {flag} | METH_KEYWORDS, '
|
|
'NULL /* docstring */}},').format(
|
|
name=name,
|
|
cname=fn.cname(emitter.names),
|
|
prefix=PREFIX,
|
|
flag=flag))
|
|
emitter.emit_line('{NULL, NULL, 0, NULL}')
|
|
emitter.emit_line('};')
|
|
emitter.emit_line()
|
|
|
|
# Emit module definition struct
|
|
emitter.emit_lines(f'static struct PyModuleDef {module_prefix}module = {{',
|
|
'PyModuleDef_HEAD_INIT,',
|
|
f'"{module_name}",',
|
|
'NULL, /* docstring */',
|
|
'-1, /* size of per-interpreter state of the module,',
|
|
' or -1 if the module keeps state in global variables. */',
|
|
f'{module_prefix}module_methods',
|
|
'};')
|
|
emitter.emit_line()
|
|
# Emit module init function. If we are compiling just one module, this
|
|
# will be the C API init function. If we are compiling 2+ modules, we
|
|
# generate a shared library for the modules and shims that call into
|
|
# the shared library, and in this case we use an internal module
|
|
# initialized function that will be called by the shim.
|
|
if not self.use_shared_lib:
|
|
declaration = f'PyMODINIT_FUNC PyInit_{module_name}(void)'
|
|
else:
|
|
declaration = f'PyObject *CPyInit_{exported_name(module_name)}(void)'
|
|
emitter.emit_lines(declaration,
|
|
'{')
|
|
emitter.emit_line('PyObject* modname = NULL;')
|
|
# Store the module reference in a static and return it when necessary.
|
|
# This is separate from the *global* reference to the module that will
|
|
# be populated when it is imported by a compiled module. We want that
|
|
# reference to only be populated when the module has been successfully
|
|
# imported, whereas this we want to have to stop a circular import.
|
|
module_static = self.module_internal_static_name(module_name, emitter)
|
|
|
|
emitter.emit_lines(f'if ({module_static}) {{',
|
|
f'Py_INCREF({module_static});',
|
|
f'return {module_static};',
|
|
'}')
|
|
|
|
emitter.emit_lines(f'{module_static} = PyModule_Create(&{module_prefix}module);',
|
|
f'if (unlikely({module_static} == NULL))',
|
|
' goto fail;')
|
|
emitter.emit_line(
|
|
'modname = PyObject_GetAttrString((PyObject *){}, "__name__");'.format(
|
|
module_static))
|
|
|
|
module_globals = emitter.static_name('globals', module_name)
|
|
emitter.emit_lines(f'{module_globals} = PyModule_GetDict({module_static});',
|
|
f'if (unlikely({module_globals} == NULL))',
|
|
' goto fail;')
|
|
|
|
# HACK: Manually instantiate generated classes here
|
|
type_structs: List[str] = []
|
|
for cl in module.classes:
|
|
type_struct = emitter.type_struct_name(cl)
|
|
type_structs.append(type_struct)
|
|
if cl.is_generated:
|
|
emitter.emit_lines(
|
|
'{t} = (PyTypeObject *)CPyType_FromTemplate('
|
|
'(PyObject *){t}_template, NULL, modname);'
|
|
.format(t=type_struct))
|
|
emitter.emit_lines(f'if (unlikely(!{type_struct}))',
|
|
' goto fail;')
|
|
|
|
emitter.emit_lines('if (CPyGlobalsInit() < 0)',
|
|
' goto fail;')
|
|
|
|
self.generate_top_level_call(module, emitter)
|
|
|
|
emitter.emit_lines('Py_DECREF(modname);')
|
|
|
|
emitter.emit_line(f'return {module_static};')
|
|
emitter.emit_lines('fail:',
|
|
f'Py_CLEAR({module_static});',
|
|
'Py_CLEAR(modname);')
|
|
for name, typ in module.final_names:
|
|
static_name = emitter.static_name(name, module_name)
|
|
emitter.emit_dec_ref(static_name, typ, is_xdec=True)
|
|
undef = emitter.c_undefined_value(typ)
|
|
emitter.emit_line(f'{static_name} = {undef};')
|
|
# the type objects returned from CPyType_FromTemplate are all new references
|
|
# so we have to decref them
|
|
for t in type_structs:
|
|
emitter.emit_line(f'Py_CLEAR({t});')
|
|
emitter.emit_line('return NULL;')
|
|
emitter.emit_line('}')
|
|
|
|
def generate_top_level_call(self, module: ModuleIR, emitter: Emitter) -> None:
|
|
"""Generate call to function representing module top level."""
|
|
# Optimization: we tend to put the top level last, so reverse iterate
|
|
for fn in reversed(module.functions):
|
|
if fn.name == TOP_LEVEL_NAME:
|
|
emitter.emit_lines(
|
|
f'char result = {emitter.native_function_name(fn.decl)}();',
|
|
'if (result == 2)',
|
|
' goto fail;',
|
|
)
|
|
break
|
|
|
|
def toposort_declarations(self) -> List[HeaderDeclaration]:
|
|
"""Topologically sort the declaration dict by dependencies.
|
|
|
|
Declarations can require other declarations to come prior in C (such as declaring structs).
|
|
In order to guarantee that the C output will compile the declarations will thus need to
|
|
be properly ordered. This simple DFS guarantees that we have a proper ordering.
|
|
|
|
This runs in O(V + E).
|
|
"""
|
|
result = []
|
|
marked_declarations: Dict[str, MarkedDeclaration] = OrderedDict()
|
|
for k, v in self.context.declarations.items():
|
|
marked_declarations[k] = MarkedDeclaration(v, False)
|
|
|
|
def _toposort_visit(name: str) -> None:
|
|
decl = marked_declarations[name]
|
|
if decl.mark:
|
|
return
|
|
|
|
for child in decl.declaration.dependencies:
|
|
_toposort_visit(child)
|
|
|
|
result.append(decl.declaration)
|
|
decl.mark = True
|
|
|
|
for name, marked_declaration in marked_declarations.items():
|
|
_toposort_visit(name)
|
|
|
|
return result
|
|
|
|
def declare_global(self, type_spaced: str, name: str,
|
|
*,
|
|
initializer: Optional[str] = None) -> None:
|
|
if '[' not in type_spaced:
|
|
base = f'{type_spaced}{name}'
|
|
else:
|
|
a, b = type_spaced.split('[', 1)
|
|
base = f'{a}{name}[{b}'
|
|
|
|
if not initializer:
|
|
defn = None
|
|
else:
|
|
defn = [f'{base} = {initializer};']
|
|
if name not in self.context.declarations:
|
|
self.context.declarations[name] = HeaderDeclaration(
|
|
f'{base};',
|
|
defn=defn,
|
|
)
|
|
|
|
def declare_internal_globals(self, module_name: str, emitter: Emitter) -> None:
|
|
static_name = emitter.static_name('globals', module_name)
|
|
self.declare_global('PyObject *', static_name)
|
|
|
|
def module_internal_static_name(self, module_name: str, emitter: Emitter) -> str:
|
|
return emitter.static_name(module_name + '_internal', None, prefix=MODULE_PREFIX)
|
|
|
|
def declare_module(self, module_name: str, emitter: Emitter) -> None:
|
|
# We declare two globals for each module:
|
|
# one used internally in the implementation of module init to cache results
|
|
# and prevent infinite recursion in import cycles, and one used
|
|
# by other modules to refer to it.
|
|
internal_static_name = self.module_internal_static_name(module_name, emitter)
|
|
self.declare_global('CPyModule *', internal_static_name, initializer='NULL')
|
|
static_name = emitter.static_name(module_name, None, prefix=MODULE_PREFIX)
|
|
self.declare_global('CPyModule *', static_name)
|
|
self.simple_inits.append((static_name, 'Py_None'))
|
|
|
|
def declare_imports(self, imps: Iterable[str], emitter: Emitter) -> None:
|
|
for imp in imps:
|
|
self.declare_module(imp, emitter)
|
|
|
|
def declare_finals(
|
|
self, module: str, final_names: Iterable[Tuple[str, RType]], emitter: Emitter) -> None:
|
|
for name, typ in final_names:
|
|
static_name = emitter.static_name(name, module)
|
|
emitter.context.declarations[static_name] = HeaderDeclaration(
|
|
f'{emitter.ctype_spaced(typ)}{static_name};',
|
|
[self.final_definition(module, name, typ, emitter)],
|
|
needs_export=True)
|
|
|
|
def final_definition(
|
|
self, module: str, name: str, typ: RType, emitter: Emitter) -> str:
|
|
static_name = emitter.static_name(name, module)
|
|
# Here we rely on the fact that undefined value and error value are always the same
|
|
if isinstance(typ, RTuple):
|
|
# We need to inline because initializer must be static
|
|
undefined = '{{ {} }}'.format(''.join(emitter.tuple_undefined_value_helper(typ)))
|
|
else:
|
|
undefined = emitter.c_undefined_value(typ)
|
|
return f'{emitter.ctype_spaced(typ)}{static_name} = {undefined};'
|
|
|
|
def declare_static_pyobject(self, identifier: str, emitter: Emitter) -> None:
|
|
symbol = emitter.static_name(identifier, None)
|
|
self.declare_global('PyObject *', symbol)
|
|
|
|
|
|
def sort_classes(classes: List[Tuple[str, ClassIR]]) -> List[Tuple[str, ClassIR]]:
|
|
mod_name = {ir: name for name, ir in classes}
|
|
irs = [ir for _, ir in classes]
|
|
deps: Dict[ClassIR, Set[ClassIR]] = OrderedDict()
|
|
for ir in irs:
|
|
if ir not in deps:
|
|
deps[ir] = set()
|
|
if ir.base:
|
|
deps[ir].add(ir.base)
|
|
deps[ir].update(ir.traits)
|
|
sorted_irs = toposort(deps)
|
|
return [(mod_name[ir], ir) for ir in sorted_irs]
|
|
|
|
|
|
T = TypeVar('T')
|
|
|
|
|
|
def toposort(deps: Dict[T, Set[T]]) -> List[T]:
|
|
"""Topologically sort a dict from item to dependencies.
|
|
|
|
This runs in O(V + E).
|
|
"""
|
|
result = []
|
|
visited: Set[T] = set()
|
|
|
|
def visit(item: T) -> None:
|
|
if item in visited:
|
|
return
|
|
|
|
for child in deps[item]:
|
|
visit(child)
|
|
|
|
result.append(item)
|
|
visited.add(item)
|
|
|
|
for item in deps:
|
|
visit(item)
|
|
|
|
return result
|
|
|
|
|
|
def is_fastcall_supported(fn: FuncIR, capi_version: Tuple[int, int]) -> bool:
|
|
if fn.class_name is not None:
|
|
if fn.name == '__call__':
|
|
# We can use vectorcalls (PEP 590) when supported
|
|
return use_vectorcall(capi_version)
|
|
# TODO: Support fastcall for __init__.
|
|
return use_fastcall(capi_version) and fn.name != '__init__'
|
|
return use_fastcall(capi_version)
|
|
|
|
|
|
def collect_literals(fn: FuncIR, literals: Literals) -> None:
|
|
"""Store all Python literal object refs in fn.
|
|
|
|
Collecting literals must happen only after we have the final IR.
|
|
This way we won't include literals that have been optimized away.
|
|
"""
|
|
for block in fn.blocks:
|
|
for op in block.ops:
|
|
if isinstance(op, LoadLiteral):
|
|
literals.record_literal(op.value)
|
|
|
|
|
|
def c_array_initializer(components: List[str]) -> str:
|
|
"""Construct an initializer for a C array variable.
|
|
|
|
Components are C expressions valid in an initializer.
|
|
|
|
For example, if components are ["1", "2"], the result
|
|
would be "{1, 2}", which can be used like this:
|
|
|
|
int a[] = {1, 2};
|
|
|
|
If the result is long, split it into multiple lines.
|
|
"""
|
|
res = []
|
|
current: List[str] = []
|
|
cur_len = 0
|
|
for c in components:
|
|
if not current or cur_len + 2 + len(c) < 70:
|
|
current.append(c)
|
|
cur_len += len(c) + 2
|
|
else:
|
|
res.append(', '.join(current))
|
|
current = [c]
|
|
cur_len = len(c)
|
|
if not res:
|
|
# Result fits on a single line
|
|
return '{%s}' % ', '.join(current)
|
|
# Multi-line result
|
|
res.append(', '.join(current))
|
|
return '{\n ' + ',\n '.join(res) + '\n}'
|
|
|
|
|
|
def c_string_array_initializer(components: List[bytes]) -> str:
|
|
result = []
|
|
result.append('{\n')
|
|
for s in components:
|
|
result.append(' ' + c_string_initializer(s) + ',\n')
|
|
result.append('}')
|
|
return ''.join(result)
|