usse/scrape/venv/lib/python3.10/site-packages/sphinx/util/parallel.py

155 lines
5.0 KiB
Python
Raw Normal View History

2023-12-22 14:26:01 +00:00
"""Parallel building utilities."""
from __future__ import annotations
import os
import time
import traceback
from math import sqrt
from typing import TYPE_CHECKING, Any, Callable
try:
import multiprocessing
HAS_MULTIPROCESSING = True
except ImportError:
HAS_MULTIPROCESSING = False
from sphinx.errors import SphinxParallelError
from sphinx.util import logging
if TYPE_CHECKING:
from collections.abc import Sequence
logger = logging.getLogger(__name__)
# our parallel functionality only works for the forking Process
parallel_available = multiprocessing and os.name == 'posix'
class SerialTasks:
"""Has the same interface as ParallelTasks, but executes tasks directly."""
def __init__(self, nproc: int = 1) -> None:
pass
def add_task(
self, task_func: Callable, arg: Any = None, result_func: Callable | None = None,
) -> None:
if arg is not None:
res = task_func(arg)
else:
res = task_func()
if result_func:
result_func(res)
def join(self) -> None:
pass
class ParallelTasks:
"""Executes *nproc* tasks in parallel after forking."""
def __init__(self, nproc: int) -> None:
self.nproc = nproc
# (optional) function performed by each task on the result of main task
self._result_funcs: dict[int, Callable] = {}
# task arguments
self._args: dict[int, list[Any] | None] = {}
# list of subprocesses (both started and waiting)
self._procs: dict[int, Any] = {}
# list of receiving pipe connections of running subprocesses
self._precvs: dict[int, Any] = {}
# list of receiving pipe connections of waiting subprocesses
self._precvsWaiting: dict[int, Any] = {}
# number of working subprocesses
self._pworking = 0
# task number of each subprocess
self._taskid = 0
def _process(self, pipe: Any, func: Callable, arg: Any) -> None:
try:
collector = logging.LogCollector()
with collector.collect():
if arg is None:
ret = func()
else:
ret = func(arg)
failed = False
except BaseException as err:
failed = True
errmsg = traceback.format_exception_only(err.__class__, err)[0].strip()
ret = (errmsg, traceback.format_exc())
logging.convert_serializable(collector.logs)
pipe.send((failed, collector.logs, ret))
def add_task(
self, task_func: Callable, arg: Any = None, result_func: Callable | None = None,
) -> None:
tid = self._taskid
self._taskid += 1
self._result_funcs[tid] = result_func or (lambda arg, result: None)
self._args[tid] = arg
precv, psend = multiprocessing.Pipe(False)
context: Any = multiprocessing.get_context('fork')
proc = context.Process(target=self._process, args=(psend, task_func, arg))
self._procs[tid] = proc
self._precvsWaiting[tid] = precv
self._join_one()
def join(self) -> None:
try:
while self._pworking:
if not self._join_one():
time.sleep(0.02)
finally:
# shutdown other child processes on failure
self.terminate()
def terminate(self) -> None:
for tid in list(self._precvs):
self._procs[tid].terminate()
self._result_funcs.pop(tid)
self._procs.pop(tid)
self._precvs.pop(tid)
self._pworking -= 1
def _join_one(self) -> bool:
joined_any = False
for tid, pipe in self._precvs.items():
if pipe.poll():
exc, logs, result = pipe.recv()
if exc:
raise SphinxParallelError(*result)
for log in logs:
logger.handle(log)
self._result_funcs.pop(tid)(self._args.pop(tid), result)
self._procs[tid].join()
self._precvs.pop(tid)
self._pworking -= 1
joined_any = True
break
while self._precvsWaiting and self._pworking < self.nproc:
newtid, newprecv = self._precvsWaiting.popitem()
self._precvs[newtid] = newprecv
self._procs[newtid].start()
self._pworking += 1
return joined_any
def make_chunks(arguments: Sequence[str], nproc: int, maxbatch: int = 10) -> list[Any]:
# determine how many documents to read in one go
nargs = len(arguments)
chunksize = nargs // nproc
if chunksize >= maxbatch:
# try to improve batch size vs. number of batches
chunksize = int(sqrt(nargs / nproc * maxbatch))
if chunksize == 0:
chunksize = 1
nchunks, rest = divmod(nargs, chunksize)
if rest:
nchunks += 1
# partition documents in "chunks" that will be written by one Process
return [arguments[i * chunksize:(i + 1) * chunksize] for i in range(nchunks)]