177 lines
6.2 KiB
Python
177 lines
6.2 KiB
Python
"""Basic introspection of modules."""
|
|
|
|
from typing import List, Optional, Union
|
|
from types import ModuleType
|
|
from multiprocessing import Process, Queue
|
|
import importlib
|
|
import inspect
|
|
import os
|
|
import pkgutil
|
|
import queue
|
|
import sys
|
|
|
|
|
|
class ModuleProperties:
|
|
# Note that all __init__ args must have default values
|
|
def __init__(self,
|
|
name: str = "",
|
|
file: Optional[str] = None,
|
|
path: Optional[List[str]] = None,
|
|
all: Optional[List[str]] = None,
|
|
is_c_module: bool = False,
|
|
subpackages: Optional[List[str]] = None) -> None:
|
|
self.name = name # __name__ attribute
|
|
self.file = file # __file__ attribute
|
|
self.path = path # __path__ attribute
|
|
self.all = all # __all__ attribute
|
|
self.is_c_module = is_c_module
|
|
self.subpackages = subpackages or []
|
|
|
|
|
|
def is_c_module(module: ModuleType) -> bool:
|
|
if module.__dict__.get('__file__') is None:
|
|
# Could be a namespace package. These must be handled through
|
|
# introspection, since there is no source file.
|
|
return True
|
|
return os.path.splitext(module.__dict__['__file__'])[-1] in ['.so', '.pyd']
|
|
|
|
|
|
class InspectError(Exception):
|
|
pass
|
|
|
|
|
|
def get_package_properties(package_id: str) -> ModuleProperties:
|
|
"""Use runtime introspection to get information about a module/package."""
|
|
try:
|
|
package = importlib.import_module(package_id)
|
|
except BaseException as e:
|
|
raise InspectError(str(e)) from e
|
|
name = getattr(package, "__name__", package_id)
|
|
file = getattr(package, "__file__", None)
|
|
path: Optional[List[str]] = getattr(package, "__path__", None)
|
|
if not isinstance(path, list):
|
|
path = None
|
|
pkg_all = getattr(package, '__all__', None)
|
|
if pkg_all is not None:
|
|
try:
|
|
pkg_all = list(pkg_all)
|
|
except Exception:
|
|
pkg_all = None
|
|
is_c = is_c_module(package)
|
|
|
|
if path is None:
|
|
# Object has no path; this means it's either a module inside a package
|
|
# (and thus no sub-packages), or it could be a C extension package.
|
|
if is_c:
|
|
# This is a C extension module, now get the list of all sub-packages
|
|
# using the inspect module
|
|
subpackages = [package.__name__ + "." + name
|
|
for name, val in inspect.getmembers(package)
|
|
if inspect.ismodule(val)
|
|
and val.__name__ == package.__name__ + "." + name]
|
|
else:
|
|
# It's a module inside a package. There's nothing else to walk/yield.
|
|
subpackages = []
|
|
else:
|
|
all_packages = pkgutil.walk_packages(path, prefix=package.__name__ + ".",
|
|
onerror=lambda r: None)
|
|
subpackages = [qualified_name for importer, qualified_name, ispkg in all_packages]
|
|
return ModuleProperties(name=name,
|
|
file=file,
|
|
path=path,
|
|
all=pkg_all,
|
|
is_c_module=is_c,
|
|
subpackages=subpackages)
|
|
|
|
|
|
def worker(tasks: 'Queue[str]',
|
|
results: 'Queue[Union[str, ModuleProperties]]',
|
|
sys_path: List[str]) -> None:
|
|
"""The main loop of a worker introspection process."""
|
|
sys.path = sys_path
|
|
while True:
|
|
mod = tasks.get()
|
|
try:
|
|
prop = get_package_properties(mod)
|
|
except InspectError as e:
|
|
results.put(str(e))
|
|
continue
|
|
results.put(prop)
|
|
|
|
|
|
class ModuleInspect:
|
|
"""Perform runtime introspection of modules in a separate process.
|
|
|
|
Reuse the process for multiple modules for efficiency. However, if there is an
|
|
error, retry using a fresh process to avoid cross-contamination of state between
|
|
modules.
|
|
|
|
We use a separate process to isolate us from many side effects. For example, the
|
|
import of a module may kill the current process, and we want to recover from that.
|
|
|
|
Always use in a with statement for proper clean-up:
|
|
|
|
with ModuleInspect() as m:
|
|
p = m.get_package_properties('urllib.parse')
|
|
"""
|
|
|
|
def __init__(self) -> None:
|
|
self._start()
|
|
|
|
def _start(self) -> None:
|
|
self.tasks: Queue[str] = Queue()
|
|
self.results: Queue[Union[ModuleProperties, str]] = Queue()
|
|
self.proc = Process(target=worker, args=(self.tasks, self.results, sys.path))
|
|
self.proc.start()
|
|
self.counter = 0 # Number of successful roundtrips
|
|
|
|
def close(self) -> None:
|
|
"""Free any resources used."""
|
|
self.proc.terminate()
|
|
|
|
def get_package_properties(self, package_id: str) -> ModuleProperties:
|
|
"""Return some properties of a module/package using runtime introspection.
|
|
|
|
Raise InspectError if the target couldn't be imported.
|
|
"""
|
|
self.tasks.put(package_id)
|
|
res = self._get_from_queue()
|
|
if res is None:
|
|
# The process died; recover and report error.
|
|
self._start()
|
|
raise InspectError(f'Process died when importing {package_id!r}')
|
|
if isinstance(res, str):
|
|
# Error importing module
|
|
if self.counter > 0:
|
|
# Also try with a fresh process. Maybe one of the previous imports has
|
|
# corrupted some global state.
|
|
self.close()
|
|
self._start()
|
|
return self.get_package_properties(package_id)
|
|
raise InspectError(res)
|
|
self.counter += 1
|
|
return res
|
|
|
|
def _get_from_queue(self) -> Union[ModuleProperties, str, None]:
|
|
"""Get value from the queue.
|
|
|
|
Return the value read from the queue, or None if the process unexpectedly died.
|
|
"""
|
|
max_iter = 100
|
|
n = 0
|
|
while True:
|
|
if n == max_iter:
|
|
raise RuntimeError('Timeout waiting for subprocess')
|
|
try:
|
|
return self.results.get(timeout=0.05)
|
|
except queue.Empty:
|
|
if not self.proc.is_alive():
|
|
return None
|
|
n += 1
|
|
|
|
def __enter__(self) -> 'ModuleInspect':
|
|
return self
|
|
|
|
def __exit__(self, *args: object) -> None:
|
|
self.close()
|